Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'probes-v6.10' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace

Pull probes updates from Masami Hiramatsu:

- tracing/probes: Add new pseudo-types %pd and %pD support for dumping
dentry name from 'struct dentry *' and file name from 'struct file *'

- uprobes performance optimizations:
- Speed up the BPF uprobe event by delaying the fetching of the
uprobe event arguments that are not used in BPF
- Avoid locking by speculatively checking whether uprobe event is
valid
- Reduce lock contention by using read/write_lock instead of
spinlock for uprobe list operation. This improved BPF uprobe
benchmark result 43% on average

- rethook: Remove non-fatal warning messages when tracing stack from
BPF and skip rcu_is_watching() validation in rethook if possible

- objpool: Optimize objpool (which is used by kretprobes and fprobe as
rethook backend storage) by inlining functions and avoid caching
nr_cpu_ids because it is a const value

- fprobe: Add entry/exit callbacks types (code cleanup)

- kprobes: Check ftrace was killed in kprobes if it uses ftrace

* tag 'probes-v6.10' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace:
kprobe/ftrace: bail out if ftrace was killed
selftests/ftrace: Fix required features for VFS type test case
objpool: cache nr_possible_cpus() and avoid caching nr_cpu_ids
objpool: enable inlining objpool_push() and objpool_pop() operations
rethook: honor CONFIG_FTRACE_VALIDATE_RCU_IS_WATCHING in rethook_try_get()
ftrace: make extra rcu_is_watching() validation check optional
uprobes: reduce contention on uprobes_tree access
rethook: Remove warning messages printed for finding return address of a frame.
fprobe: Add entry/exit callbacks types
selftests/ftrace: add fprobe test cases for VFS type "%pd" and "%pD"
selftests/ftrace: add kprobe test cases for VFS type "%pd" and "%pD"
Documentation: tracing: add new type '%pd' and '%pD' for kprobe
tracing/probes: support '%pD' type for print struct file's name
tracing/probes: support '%pd' type for print struct dentry's name
uprobes: add speculative lockless system-wide uprobe filter check
uprobes: prepare uprobe args buffer lazily
uprobes: encapsulate preparation of uprobe args buffer

+406 -176
+6 -2
Documentation/trace/kprobetrace.rst
··· 58 58 NAME=FETCHARG : Set NAME as the argument name of FETCHARG. 59 59 FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types 60 60 (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal types 61 - (x8/x16/x32/x64), "char", "string", "ustring", "symbol", "symstr" 62 - and bitfield are supported. 61 + (x8/x16/x32/x64), VFS layer common type(%pd/%pD), "char", 62 + "string", "ustring", "symbol", "symstr" and bitfield are 63 + supported. 63 64 64 65 (\*1) only for the probe on function entry (offs == 0). Note, this argument access 65 66 is best effort, because depending on the argument type, it may be passed on ··· 122 121 With 'symstr' type, you can filter the event with wildcard pattern of the 123 122 symbols, and you don't need to solve symbol name by yourself. 124 123 For $comm, the default type is "string"; any other type is invalid. 124 + 125 + VFS layer common type(%pd/%pD) is a special type, which fetches dentry's or 126 + file's name from struct dentry's address or struct file's address. 125 127 126 128 .. _user_mem_access: 127 129
+3
arch/csky/kernel/probes/ftrace.c
··· 12 12 struct kprobe_ctlblk *kcb; 13 13 struct pt_regs *regs; 14 14 15 + if (unlikely(kprobe_ftrace_disabled)) 16 + return; 17 + 15 18 bit = ftrace_test_recursion_trylock(ip, parent_ip); 16 19 if (bit < 0) 17 20 return;
+3
arch/loongarch/kernel/ftrace_dyn.c
··· 287 287 struct kprobe *p; 288 288 struct kprobe_ctlblk *kcb; 289 289 290 + if (unlikely(kprobe_ftrace_disabled)) 291 + return; 292 + 290 293 bit = ftrace_test_recursion_trylock(ip, parent_ip); 291 294 if (bit < 0) 292 295 return;
+3
arch/parisc/kernel/ftrace.c
··· 206 206 struct kprobe *p; 207 207 int bit; 208 208 209 + if (unlikely(kprobe_ftrace_disabled)) 210 + return; 211 + 209 212 bit = ftrace_test_recursion_trylock(ip, parent_ip); 210 213 if (bit < 0) 211 214 return;
+3
arch/powerpc/kernel/kprobes-ftrace.c
··· 21 21 struct pt_regs *regs; 22 22 int bit; 23 23 24 + if (unlikely(kprobe_ftrace_disabled)) 25 + return; 26 + 24 27 bit = ftrace_test_recursion_trylock(nip, parent_nip); 25 28 if (bit < 0) 26 29 return;
+3
arch/riscv/kernel/probes/ftrace.c
··· 11 11 struct kprobe_ctlblk *kcb; 12 12 int bit; 13 13 14 + if (unlikely(kprobe_ftrace_disabled)) 15 + return; 16 + 14 17 bit = ftrace_test_recursion_trylock(ip, parent_ip); 15 18 if (bit < 0) 16 19 return;
+3
arch/s390/kernel/ftrace.c
··· 296 296 struct kprobe *p; 297 297 int bit; 298 298 299 + if (unlikely(kprobe_ftrace_disabled)) 300 + return; 301 + 299 302 bit = ftrace_test_recursion_trylock(ip, parent_ip); 300 303 if (bit < 0) 301 304 return;
+3
arch/x86/kernel/kprobes/ftrace.c
··· 21 21 struct kprobe_ctlblk *kcb; 22 22 int bit; 23 23 24 + if (unlikely(kprobe_ftrace_disabled)) 25 + return; 26 + 24 27 bit = ftrace_test_recursion_trylock(ip, parent_ip); 25 28 if (bit < 0) 26 29 return;
+12 -6
include/linux/fprobe.h
··· 7 7 #include <linux/ftrace.h> 8 8 #include <linux/rethook.h> 9 9 10 + struct fprobe; 11 + 12 + typedef int (*fprobe_entry_cb)(struct fprobe *fp, unsigned long entry_ip, 13 + unsigned long ret_ip, struct pt_regs *regs, 14 + void *entry_data); 15 + 16 + typedef void (*fprobe_exit_cb)(struct fprobe *fp, unsigned long entry_ip, 17 + unsigned long ret_ip, struct pt_regs *regs, 18 + void *entry_data); 19 + 10 20 /** 11 21 * struct fprobe - ftrace based probe. 12 22 * @ops: The ftrace_ops. ··· 44 34 size_t entry_data_size; 45 35 int nr_maxactive; 46 36 47 - int (*entry_handler)(struct fprobe *fp, unsigned long entry_ip, 48 - unsigned long ret_ip, struct pt_regs *regs, 49 - void *entry_data); 50 - void (*exit_handler)(struct fprobe *fp, unsigned long entry_ip, 51 - unsigned long ret_ip, struct pt_regs *regs, 52 - void *entry_data); 37 + fprobe_entry_cb entry_handler; 38 + fprobe_exit_cb exit_handler; 53 39 }; 54 40 55 41 /* This fprobe is soft-disabled. */
+7
include/linux/kprobes.h
··· 378 378 extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, 379 379 struct ftrace_ops *ops, struct ftrace_regs *fregs); 380 380 extern int arch_prepare_kprobe_ftrace(struct kprobe *p); 381 + /* Set when ftrace has been killed: kprobes on ftrace must be disabled for safety */ 382 + extern bool kprobe_ftrace_disabled __read_mostly; 383 + extern void kprobe_ftrace_kill(void); 381 384 #else 382 385 static inline int arch_prepare_kprobe_ftrace(struct kprobe *p) 383 386 { 384 387 return -EINVAL; 385 388 } 389 + static inline void kprobe_ftrace_kill(void) {} 386 390 #endif /* CONFIG_KPROBES_ON_FTRACE */ 387 391 388 392 /* Get the kprobe at this addr (if any) - called with preemption disabled */ ··· 497 493 { 498 494 } 499 495 static inline void kprobe_free_init_mem(void) 496 + { 497 + } 498 + static inline void kprobe_ftrace_kill(void) 500 499 { 501 500 } 502 501 static inline int disable_kprobe(struct kprobe *kp)
+101 -4
include/linux/objpool.h
··· 5 5 6 6 #include <linux/types.h> 7 7 #include <linux/refcount.h> 8 + #include <linux/atomic.h> 9 + #include <linux/cpumask.h> 10 + #include <linux/irqflags.h> 11 + #include <linux/smp.h> 8 12 9 13 /* 10 14 * objpool: ring-array based lockless MPMC queue ··· 73 69 * struct objpool_head - object pooling metadata 74 70 * @obj_size: object size, aligned to sizeof(void *) 75 71 * @nr_objs: total objs (to be pre-allocated with objpool) 76 - * @nr_cpus: local copy of nr_cpu_ids 72 + * @nr_possible_cpus: cached value of num_possible_cpus() 77 73 * @capacity: max objs can be managed by one objpool_slot 78 74 * @gfp: gfp flags for kmalloc & vmalloc 79 75 * @ref: refcount of objpool ··· 85 81 struct objpool_head { 86 82 int obj_size; 87 83 int nr_objs; 88 - int nr_cpus; 84 + int nr_possible_cpus; 89 85 int capacity; 90 86 gfp_t gfp; 91 87 refcount_t ref; ··· 122 118 gfp_t gfp, void *context, objpool_init_obj_cb objinit, 123 119 objpool_fini_cb release); 124 120 121 + /* try to retrieve object from slot */ 122 + static inline void *__objpool_try_get_slot(struct objpool_head *pool, int cpu) 123 + { 124 + struct objpool_slot *slot = pool->cpu_slots[cpu]; 125 + /* load head snapshot, other cpus may change it */ 126 + uint32_t head = smp_load_acquire(&slot->head); 127 + 128 + while (head != READ_ONCE(slot->last)) { 129 + void *obj; 130 + 131 + /* 132 + * data visibility of 'last' and 'head' could be out of 133 + * order since memory updating of 'last' and 'head' are 134 + * performed in push() and pop() independently 135 + * 136 + * before any retrieving attempts, pop() must guarantee 137 + * 'last' is behind 'head', that is to say, there must 138 + * be available objects in slot, which could be ensured 139 + * by condition 'last != head && last - head <= nr_objs' 140 + * that is equivalent to 'last - head - 1 < nr_objs' as 141 + * 'last' and 'head' are both unsigned int32 142 + */ 143 + if (READ_ONCE(slot->last) - head - 1 >= pool->nr_objs) { 144 + head = READ_ONCE(slot->head); 145 + continue; 146 + } 147 + 148 + /* obj must be retrieved before moving forward head */ 149 + obj = READ_ONCE(slot->entries[head & slot->mask]); 150 + 151 + /* move head forward to mark it's consumption */ 152 + if (try_cmpxchg_release(&slot->head, &head, head + 1)) 153 + return obj; 154 + } 155 + 156 + return NULL; 157 + } 158 + 125 159 /** 126 160 * objpool_pop() - allocate an object from objpool 127 161 * @pool: object pool 128 162 * 129 163 * return value: object ptr or NULL if failed 130 164 */ 131 - void *objpool_pop(struct objpool_head *pool); 165 + static inline void *objpool_pop(struct objpool_head *pool) 166 + { 167 + void *obj = NULL; 168 + unsigned long flags; 169 + int i, cpu; 170 + 171 + /* disable local irq to avoid preemption & interruption */ 172 + raw_local_irq_save(flags); 173 + 174 + cpu = raw_smp_processor_id(); 175 + for (i = 0; i < pool->nr_possible_cpus; i++) { 176 + obj = __objpool_try_get_slot(pool, cpu); 177 + if (obj) 178 + break; 179 + cpu = cpumask_next_wrap(cpu, cpu_possible_mask, -1, 1); 180 + } 181 + raw_local_irq_restore(flags); 182 + 183 + return obj; 184 + } 185 + 186 + /* adding object to slot, abort if the slot was already full */ 187 + static inline int 188 + __objpool_try_add_slot(void *obj, struct objpool_head *pool, int cpu) 189 + { 190 + struct objpool_slot *slot = pool->cpu_slots[cpu]; 191 + uint32_t head, tail; 192 + 193 + /* loading tail and head as a local snapshot, tail first */ 194 + tail = READ_ONCE(slot->tail); 195 + 196 + do { 197 + head = READ_ONCE(slot->head); 198 + /* fault caught: something must be wrong */ 199 + WARN_ON_ONCE(tail - head > pool->nr_objs); 200 + } while (!try_cmpxchg_acquire(&slot->tail, &tail, tail + 1)); 201 + 202 + /* now the tail position is reserved for the given obj */ 203 + WRITE_ONCE(slot->entries[tail & slot->mask], obj); 204 + /* update sequence to make this obj available for pop() */ 205 + smp_store_release(&slot->last, tail + 1); 206 + 207 + return 0; 208 + } 132 209 133 210 /** 134 211 * objpool_push() - reclaim the object and return back to objpool ··· 219 134 * return: 0 or error code (it fails only when user tries to push 220 135 * the same object multiple times or wrong "objects" into objpool) 221 136 */ 222 - int objpool_push(void *obj, struct objpool_head *pool); 137 + static inline int objpool_push(void *obj, struct objpool_head *pool) 138 + { 139 + unsigned long flags; 140 + int rc; 141 + 142 + /* disable local irq to avoid preemption & interruption */ 143 + raw_local_irq_save(flags); 144 + rc = __objpool_try_add_slot(obj, pool, raw_smp_processor_id()); 145 + raw_local_irq_restore(flags); 146 + 147 + return rc; 148 + } 149 + 223 150 224 151 /** 225 152 * objpool_drop() - discard the object and deref objpool
+1 -1
include/linux/trace_recursion.h
··· 135 135 # define do_ftrace_record_recursion(ip, pip) do { } while (0) 136 136 #endif 137 137 138 - #ifdef CONFIG_ARCH_WANTS_NO_INSTR 138 + #ifdef CONFIG_FTRACE_VALIDATE_RCU_IS_WATCHING 139 139 # define trace_warn_on_no_rcu(ip) \ 140 140 ({ \ 141 141 bool __ret = !rcu_is_watching(); \
+11 -11
kernel/events/uprobes.c
··· 39 39 */ 40 40 #define no_uprobe_events() RB_EMPTY_ROOT(&uprobes_tree) 41 41 42 - static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */ 42 + static DEFINE_RWLOCK(uprobes_treelock); /* serialize rbtree access */ 43 43 44 44 #define UPROBES_HASH_SZ 13 45 45 /* serialize uprobe->pending_list */ ··· 669 669 { 670 670 struct uprobe *uprobe; 671 671 672 - spin_lock(&uprobes_treelock); 672 + read_lock(&uprobes_treelock); 673 673 uprobe = __find_uprobe(inode, offset); 674 - spin_unlock(&uprobes_treelock); 674 + read_unlock(&uprobes_treelock); 675 675 676 676 return uprobe; 677 677 } ··· 701 701 { 702 702 struct uprobe *u; 703 703 704 - spin_lock(&uprobes_treelock); 704 + write_lock(&uprobes_treelock); 705 705 u = __insert_uprobe(uprobe); 706 - spin_unlock(&uprobes_treelock); 706 + write_unlock(&uprobes_treelock); 707 707 708 708 return u; 709 709 } ··· 935 935 if (WARN_ON(!uprobe_is_active(uprobe))) 936 936 return; 937 937 938 - spin_lock(&uprobes_treelock); 938 + write_lock(&uprobes_treelock); 939 939 rb_erase(&uprobe->rb_node, &uprobes_tree); 940 - spin_unlock(&uprobes_treelock); 940 + write_unlock(&uprobes_treelock); 941 941 RB_CLEAR_NODE(&uprobe->rb_node); /* for uprobe_is_active() */ 942 942 put_uprobe(uprobe); 943 943 } ··· 1298 1298 min = vaddr_to_offset(vma, start); 1299 1299 max = min + (end - start) - 1; 1300 1300 1301 - spin_lock(&uprobes_treelock); 1301 + read_lock(&uprobes_treelock); 1302 1302 n = find_node_in_range(inode, min, max); 1303 1303 if (n) { 1304 1304 for (t = n; t; t = rb_prev(t)) { ··· 1316 1316 get_uprobe(u); 1317 1317 } 1318 1318 } 1319 - spin_unlock(&uprobes_treelock); 1319 + read_unlock(&uprobes_treelock); 1320 1320 } 1321 1321 1322 1322 /* @vma contains reference counter, not the probed instruction. */ ··· 1407 1407 min = vaddr_to_offset(vma, start); 1408 1408 max = min + (end - start) - 1; 1409 1409 1410 - spin_lock(&uprobes_treelock); 1410 + read_lock(&uprobes_treelock); 1411 1411 n = find_node_in_range(inode, min, max); 1412 - spin_unlock(&uprobes_treelock); 1412 + read_unlock(&uprobes_treelock); 1413 1413 1414 1414 return !!n; 1415 1415 }
+6
kernel/kprobes.c
··· 1067 1067 1068 1068 static int kprobe_ipmodify_enabled; 1069 1069 static int kprobe_ftrace_enabled; 1070 + bool kprobe_ftrace_disabled; 1070 1071 1071 1072 static int __arm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops, 1072 1073 int *cnt) ··· 1135 1134 return __disarm_kprobe_ftrace(p, 1136 1135 ipmodify ? &kprobe_ipmodify_ops : &kprobe_ftrace_ops, 1137 1136 ipmodify ? &kprobe_ipmodify_enabled : &kprobe_ftrace_enabled); 1137 + } 1138 + 1139 + void kprobe_ftrace_kill() 1140 + { 1141 + kprobe_ftrace_disabled = true; 1138 1142 } 1139 1143 #else /* !CONFIG_KPROBES_ON_FTRACE */ 1140 1144 static inline int arm_kprobe_ftrace(struct kprobe *p)
+13
kernel/trace/Kconfig
··· 974 974 This file can be reset, but the limit can not change in 975 975 size at runtime. 976 976 977 + config FTRACE_VALIDATE_RCU_IS_WATCHING 978 + bool "Validate RCU is on during ftrace execution" 979 + depends on FUNCTION_TRACER 980 + depends on ARCH_WANTS_NO_INSTR 981 + help 982 + All callbacks that attach to the function tracing have some sort of 983 + protection against recursion. This option is only to verify that 984 + ftrace (and other users of ftrace_test_recursion_trylock()) are not 985 + called outside of RCU, as if they are, it can cause a race. But it 986 + also has a noticeable overhead when enabled. 987 + 988 + If unsure, say N 989 + 977 990 config RING_BUFFER_RECORD_RECURSION 978 991 bool "Record functions that recurse in the ring buffer" 979 992 depends on FTRACE_RECORD_RECURSION
+1
kernel/trace/ftrace.c
··· 7894 7894 ftrace_disabled = 1; 7895 7895 ftrace_enabled = 0; 7896 7896 ftrace_trace_function = ftrace_stub; 7897 + kprobe_ftrace_kill(); 7897 7898 } 7898 7899 7899 7900 /**
+3 -1
kernel/trace/rethook.c
··· 166 166 if (unlikely(!handler)) 167 167 return NULL; 168 168 169 + #if defined(CONFIG_FTRACE_VALIDATE_RCU_IS_WATCHING) || defined(CONFIG_KPROBE_EVENTS_ON_NOTRACE) 169 170 /* 170 171 * This expects the caller will set up a rethook on a function entry. 171 172 * When the function returns, the rethook will eventually be reclaimed ··· 175 174 */ 176 175 if (unlikely(!rcu_is_watching())) 177 176 return NULL; 177 + #endif 178 178 179 179 return (struct rethook_node *)objpool_pop(&rh->pool); 180 180 } ··· 250 248 if (WARN_ON_ONCE(!cur)) 251 249 return 0; 252 250 253 - if (WARN_ON_ONCE(tsk != current && task_is_running(tsk))) 251 + if (tsk != current && task_is_running(tsk)) 254 252 return 0; 255 253 256 254 do {
+1 -1
kernel/trace/trace.c
··· 5540 5540 "\t kernel return probes support: $retval, $arg<N>, $comm\n" 5541 5541 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n" 5542 5542 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n" 5543 - "\t symstr, <type>\\[<array-size>\\]\n" 5543 + "\t symstr, %pd/%pD, <type>\\[<array-size>\\]\n" 5544 5544 #ifdef CONFIG_HIST_TRIGGERS 5545 5545 "\t field: <stype> <name>;\n" 5546 5546 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
+6
kernel/trace/trace_fprobe.c
··· 994 994 char gbuf[MAX_EVENT_NAME_LEN]; 995 995 char sbuf[KSYM_NAME_LEN]; 996 996 char abuf[MAX_BTF_ARGS_LEN]; 997 + char *dbuf = NULL; 997 998 bool is_tracepoint = false; 998 999 struct tracepoint *tpoint = NULL; 999 1000 struct traceprobe_parse_context ctx = { ··· 1105 1104 argv = new_argv; 1106 1105 } 1107 1106 1107 + ret = traceprobe_expand_dentry_args(argc, argv, &dbuf); 1108 + if (ret) 1109 + goto out; 1110 + 1108 1111 /* setup a probe */ 1109 1112 tf = alloc_trace_fprobe(group, event, symbol, tpoint, maxactive, 1110 1113 argc, is_return); ··· 1159 1154 trace_probe_log_clear(); 1160 1155 kfree(new_argv); 1161 1156 kfree(symbol); 1157 + kfree(dbuf); 1162 1158 return ret; 1163 1159 1164 1160 parse_error:
+6
kernel/trace/trace_kprobe.c
··· 800 800 char buf[MAX_EVENT_NAME_LEN]; 801 801 char gbuf[MAX_EVENT_NAME_LEN]; 802 802 char abuf[MAX_BTF_ARGS_LEN]; 803 + char *dbuf = NULL; 803 804 struct traceprobe_parse_context ctx = { .flags = TPARG_FL_KERNEL }; 804 805 805 806 switch (argv[0][0]) { ··· 952 951 argv = new_argv; 953 952 } 954 953 954 + ret = traceprobe_expand_dentry_args(argc, argv, &dbuf); 955 + if (ret) 956 + goto out; 957 + 955 958 /* setup a probe */ 956 959 tk = alloc_trace_kprobe(group, event, addr, symbol, offset, maxactive, 957 960 argc, is_return); ··· 1002 997 trace_probe_log_clear(); 1003 998 kfree(new_argv); 1004 999 kfree(symbol); 1000 + kfree(dbuf); 1005 1001 return ret; 1006 1002 1007 1003 parse_error:
+63
kernel/trace/trace_probe.c
··· 12 12 #define pr_fmt(fmt) "trace_probe: " fmt 13 13 14 14 #include <linux/bpf.h> 15 + #include <linux/fs.h> 15 16 #include "trace_btf.h" 16 17 17 18 #include "trace_probe.h" ··· 1736 1735 error: 1737 1736 kfree(new_argv); 1738 1737 return ERR_PTR(ret); 1738 + } 1739 + 1740 + /* @buf: *buf must be equal to NULL. Caller must to free *buf */ 1741 + int traceprobe_expand_dentry_args(int argc, const char *argv[], char **buf) 1742 + { 1743 + int i, used, ret; 1744 + const int bufsize = MAX_DENTRY_ARGS_LEN; 1745 + char *tmpbuf = NULL; 1746 + 1747 + if (*buf) 1748 + return -EINVAL; 1749 + 1750 + used = 0; 1751 + for (i = 0; i < argc; i++) { 1752 + char *tmp; 1753 + char *equal; 1754 + size_t arg_len; 1755 + 1756 + if (!glob_match("*:%p[dD]", argv[i])) 1757 + continue; 1758 + 1759 + if (!tmpbuf) { 1760 + tmpbuf = kmalloc(bufsize, GFP_KERNEL); 1761 + if (!tmpbuf) 1762 + return -ENOMEM; 1763 + } 1764 + 1765 + tmp = kstrdup(argv[i], GFP_KERNEL); 1766 + if (!tmp) 1767 + goto nomem; 1768 + 1769 + equal = strchr(tmp, '='); 1770 + if (equal) 1771 + *equal = '\0'; 1772 + arg_len = strlen(argv[i]); 1773 + tmp[arg_len - 4] = '\0'; 1774 + if (argv[i][arg_len - 1] == 'd') 1775 + ret = snprintf(tmpbuf + used, bufsize - used, 1776 + "%s%s+0x0(+0x%zx(%s)):string", 1777 + equal ? tmp : "", equal ? "=" : "", 1778 + offsetof(struct dentry, d_name.name), 1779 + equal ? equal + 1 : tmp); 1780 + else 1781 + ret = snprintf(tmpbuf + used, bufsize - used, 1782 + "%s%s+0x0(+0x%zx(+0x%zx(%s))):string", 1783 + equal ? tmp : "", equal ? "=" : "", 1784 + offsetof(struct dentry, d_name.name), 1785 + offsetof(struct file, f_path.dentry), 1786 + equal ? equal + 1 : tmp); 1787 + 1788 + kfree(tmp); 1789 + if (ret >= bufsize - used) 1790 + goto nomem; 1791 + argv[i] = tmpbuf + used; 1792 + used += ret + 1; 1793 + } 1794 + 1795 + *buf = tmpbuf; 1796 + return 0; 1797 + nomem: 1798 + kfree(tmpbuf); 1799 + return -ENOMEM; 1739 1800 } 1740 1801 1741 1802 void traceprobe_finish_parse(struct traceprobe_parse_context *ctx)
+2
kernel/trace/trace_probe.h
··· 34 34 #define MAX_ARRAY_LEN 64 35 35 #define MAX_ARG_NAME_LEN 32 36 36 #define MAX_BTF_ARGS_LEN 128 37 + #define MAX_DENTRY_ARGS_LEN 256 37 38 #define MAX_STRING_SIZE PATH_MAX 38 39 #define MAX_ARG_BUF_LEN (MAX_TRACE_ARGS * MAX_ARG_NAME_LEN) 39 40 ··· 429 428 const char **traceprobe_expand_meta_args(int argc, const char *argv[], 430 429 int *new_argc, char *buf, int bufsize, 431 430 struct traceprobe_parse_context *ctx); 431 + extern int traceprobe_expand_dentry_args(int argc, const char *argv[], char **buf); 432 432 433 433 extern int traceprobe_update_arg(struct probe_arg *arg); 434 434 extern void traceprobe_free_probe_arg(struct probe_arg *arg);
+59 -44
kernel/trace/trace_uprobe.c
··· 854 854 struct uprobe_cpu_buffer { 855 855 struct mutex mutex; 856 856 void *buf; 857 + int dsize; 857 858 }; 858 859 static struct uprobe_cpu_buffer __percpu *uprobe_cpu_buffer; 859 860 static int uprobe_buffer_refcnt; ··· 941 940 942 941 static void uprobe_buffer_put(struct uprobe_cpu_buffer *ucb) 943 942 { 943 + if (!ucb) 944 + return; 944 945 mutex_unlock(&ucb->mutex); 946 + } 947 + 948 + static struct uprobe_cpu_buffer *prepare_uprobe_buffer(struct trace_uprobe *tu, 949 + struct pt_regs *regs, 950 + struct uprobe_cpu_buffer **ucbp) 951 + { 952 + struct uprobe_cpu_buffer *ucb; 953 + int dsize, esize; 954 + 955 + if (*ucbp) 956 + return *ucbp; 957 + 958 + esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); 959 + dsize = __get_data_size(&tu->tp, regs, NULL); 960 + 961 + ucb = uprobe_buffer_get(); 962 + ucb->dsize = tu->tp.size + dsize; 963 + 964 + store_trace_args(ucb->buf, &tu->tp, regs, NULL, esize, dsize); 965 + 966 + *ucbp = ucb; 967 + return ucb; 945 968 } 946 969 947 970 static void __uprobe_trace_func(struct trace_uprobe *tu, 948 971 unsigned long func, struct pt_regs *regs, 949 - struct uprobe_cpu_buffer *ucb, int dsize, 972 + struct uprobe_cpu_buffer **ucbp, 950 973 struct trace_event_file *trace_file) 951 974 { 952 975 struct uprobe_trace_entry_head *entry; 953 976 struct trace_event_buffer fbuffer; 977 + struct uprobe_cpu_buffer *ucb; 954 978 void *data; 955 979 int size, esize; 956 980 struct trace_event_call *call = trace_probe_event_call(&tu->tp); 957 981 958 982 WARN_ON(call != trace_file->event_call); 959 983 960 - if (WARN_ON_ONCE(tu->tp.size + dsize > PAGE_SIZE)) 984 + ucb = prepare_uprobe_buffer(tu, regs, ucbp); 985 + if (WARN_ON_ONCE(ucb->dsize > PAGE_SIZE)) 961 986 return; 962 987 963 988 if (trace_trigger_soft_disabled(trace_file)) 964 989 return; 965 990 966 991 esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); 967 - size = esize + tu->tp.size + dsize; 992 + size = esize + ucb->dsize; 968 993 entry = trace_event_buffer_reserve(&fbuffer, trace_file, size); 969 994 if (!entry) 970 995 return; ··· 1004 977 data = DATAOF_TRACE_ENTRY(entry, false); 1005 978 } 1006 979 1007 - memcpy(data, ucb->buf, tu->tp.size + dsize); 980 + memcpy(data, ucb->buf, ucb->dsize); 1008 981 1009 982 trace_event_buffer_commit(&fbuffer); 1010 983 } 1011 984 1012 985 /* uprobe handler */ 1013 986 static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs, 1014 - struct uprobe_cpu_buffer *ucb, int dsize) 987 + struct uprobe_cpu_buffer **ucbp) 1015 988 { 1016 989 struct event_file_link *link; 1017 990 ··· 1020 993 1021 994 rcu_read_lock(); 1022 995 trace_probe_for_each_link_rcu(link, &tu->tp) 1023 - __uprobe_trace_func(tu, 0, regs, ucb, dsize, link->file); 996 + __uprobe_trace_func(tu, 0, regs, ucbp, link->file); 1024 997 rcu_read_unlock(); 1025 998 1026 999 return 0; ··· 1028 1001 1029 1002 static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func, 1030 1003 struct pt_regs *regs, 1031 - struct uprobe_cpu_buffer *ucb, int dsize) 1004 + struct uprobe_cpu_buffer **ucbp) 1032 1005 { 1033 1006 struct event_file_link *link; 1034 1007 1035 1008 rcu_read_lock(); 1036 1009 trace_probe_for_each_link_rcu(link, &tu->tp) 1037 - __uprobe_trace_func(tu, func, regs, ucb, dsize, link->file); 1010 + __uprobe_trace_func(tu, func, regs, ucbp, link->file); 1038 1011 rcu_read_unlock(); 1039 1012 } 1040 1013 ··· 1226 1199 { 1227 1200 struct perf_event *event; 1228 1201 1229 - if (filter->nr_systemwide) 1230 - return true; 1231 - 1232 1202 list_for_each_entry(event, &filter->perf_events, hw.tp_list) { 1233 1203 if (event->hw.target->mm == mm) 1234 1204 return true; ··· 1350 1326 tu = container_of(uc, struct trace_uprobe, consumer); 1351 1327 filter = tu->tp.event->filter; 1352 1328 1329 + /* 1330 + * speculative short-circuiting check to avoid unnecessarily taking 1331 + * filter->rwlock below, if the uprobe has system-wide consumer 1332 + */ 1333 + if (READ_ONCE(filter->nr_systemwide)) 1334 + return true; 1335 + 1353 1336 read_lock(&filter->rwlock); 1354 1337 ret = __uprobe_perf_filter(filter, mm); 1355 1338 read_unlock(&filter->rwlock); ··· 1366 1335 1367 1336 static void __uprobe_perf_func(struct trace_uprobe *tu, 1368 1337 unsigned long func, struct pt_regs *regs, 1369 - struct uprobe_cpu_buffer *ucb, int dsize) 1338 + struct uprobe_cpu_buffer **ucbp) 1370 1339 { 1371 1340 struct trace_event_call *call = trace_probe_event_call(&tu->tp); 1372 1341 struct uprobe_trace_entry_head *entry; 1342 + struct uprobe_cpu_buffer *ucb; 1373 1343 struct hlist_head *head; 1374 1344 void *data; 1375 1345 int size, esize; ··· 1388 1356 1389 1357 esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); 1390 1358 1391 - size = esize + tu->tp.size + dsize; 1359 + ucb = prepare_uprobe_buffer(tu, regs, ucbp); 1360 + size = esize + ucb->dsize; 1392 1361 size = ALIGN(size + sizeof(u32), sizeof(u64)) - sizeof(u32); 1393 1362 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough")) 1394 1363 return; ··· 1412 1379 data = DATAOF_TRACE_ENTRY(entry, false); 1413 1380 } 1414 1381 1415 - memcpy(data, ucb->buf, tu->tp.size + dsize); 1382 + memcpy(data, ucb->buf, ucb->dsize); 1416 1383 1417 - if (size - esize > tu->tp.size + dsize) { 1418 - int len = tu->tp.size + dsize; 1419 - 1420 - memset(data + len, 0, size - esize - len); 1421 - } 1384 + if (size - esize > ucb->dsize) 1385 + memset(data + ucb->dsize, 0, size - esize - ucb->dsize); 1422 1386 1423 1387 perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, 1424 1388 head, NULL); ··· 1425 1395 1426 1396 /* uprobe profile handler */ 1427 1397 static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs, 1428 - struct uprobe_cpu_buffer *ucb, int dsize) 1398 + struct uprobe_cpu_buffer **ucbp) 1429 1399 { 1430 1400 if (!uprobe_perf_filter(&tu->consumer, 0, current->mm)) 1431 1401 return UPROBE_HANDLER_REMOVE; 1432 1402 1433 1403 if (!is_ret_probe(tu)) 1434 - __uprobe_perf_func(tu, 0, regs, ucb, dsize); 1404 + __uprobe_perf_func(tu, 0, regs, ucbp); 1435 1405 return 0; 1436 1406 } 1437 1407 1438 1408 static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func, 1439 1409 struct pt_regs *regs, 1440 - struct uprobe_cpu_buffer *ucb, int dsize) 1410 + struct uprobe_cpu_buffer **ucbp) 1441 1411 { 1442 - __uprobe_perf_func(tu, func, regs, ucb, dsize); 1412 + __uprobe_perf_func(tu, func, regs, ucbp); 1443 1413 } 1444 1414 1445 1415 int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type, ··· 1504 1474 { 1505 1475 struct trace_uprobe *tu; 1506 1476 struct uprobe_dispatch_data udd; 1507 - struct uprobe_cpu_buffer *ucb; 1508 - int dsize, esize; 1477 + struct uprobe_cpu_buffer *ucb = NULL; 1509 1478 int ret = 0; 1510 - 1511 1479 1512 1480 tu = container_of(con, struct trace_uprobe, consumer); 1513 1481 tu->nhit++; ··· 1518 1490 if (WARN_ON_ONCE(!uprobe_cpu_buffer)) 1519 1491 return 0; 1520 1492 1521 - dsize = __get_data_size(&tu->tp, regs, NULL); 1522 - esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); 1523 - 1524 - ucb = uprobe_buffer_get(); 1525 - store_trace_args(ucb->buf, &tu->tp, regs, NULL, esize, dsize); 1526 - 1527 1493 if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE)) 1528 - ret |= uprobe_trace_func(tu, regs, ucb, dsize); 1494 + ret |= uprobe_trace_func(tu, regs, &ucb); 1529 1495 1530 1496 #ifdef CONFIG_PERF_EVENTS 1531 1497 if (trace_probe_test_flag(&tu->tp, TP_FLAG_PROFILE)) 1532 - ret |= uprobe_perf_func(tu, regs, ucb, dsize); 1498 + ret |= uprobe_perf_func(tu, regs, &ucb); 1533 1499 #endif 1534 1500 uprobe_buffer_put(ucb); 1535 1501 return ret; ··· 1534 1512 { 1535 1513 struct trace_uprobe *tu; 1536 1514 struct uprobe_dispatch_data udd; 1537 - struct uprobe_cpu_buffer *ucb; 1538 - int dsize, esize; 1515 + struct uprobe_cpu_buffer *ucb = NULL; 1539 1516 1540 1517 tu = container_of(con, struct trace_uprobe, consumer); 1541 1518 ··· 1546 1525 if (WARN_ON_ONCE(!uprobe_cpu_buffer)) 1547 1526 return 0; 1548 1527 1549 - dsize = __get_data_size(&tu->tp, regs, NULL); 1550 - esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); 1551 - 1552 - ucb = uprobe_buffer_get(); 1553 - store_trace_args(ucb->buf, &tu->tp, regs, NULL, esize, dsize); 1554 - 1555 1528 if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE)) 1556 - uretprobe_trace_func(tu, func, regs, ucb, dsize); 1529 + uretprobe_trace_func(tu, func, regs, &ucb); 1557 1530 1558 1531 #ifdef CONFIG_PERF_EVENTS 1559 1532 if (trace_probe_test_flag(&tu->tp, TP_FLAG_PROFILE)) 1560 - uretprobe_perf_func(tu, func, regs, ucb, dsize); 1533 + uretprobe_perf_func(tu, func, regs, &ucb); 1561 1534 #endif 1562 1535 uprobe_buffer_put(ucb); 1563 1536 return 0;
+6 -106
lib/objpool.c
··· 50 50 { 51 51 int i, cpu_count = 0; 52 52 53 - for (i = 0; i < pool->nr_cpus; i++) { 53 + for (i = 0; i < nr_cpu_ids; i++) { 54 54 55 55 struct objpool_slot *slot; 56 56 int nodes, size, rc; ··· 60 60 continue; 61 61 62 62 /* compute how many objects to be allocated with this slot */ 63 - nodes = nr_objs / num_possible_cpus(); 64 - if (cpu_count < (nr_objs % num_possible_cpus())) 63 + nodes = nr_objs / pool->nr_possible_cpus; 64 + if (cpu_count < (nr_objs % pool->nr_possible_cpus)) 65 65 nodes++; 66 66 cpu_count++; 67 67 ··· 103 103 if (!pool->cpu_slots) 104 104 return; 105 105 106 - for (i = 0; i < pool->nr_cpus; i++) 106 + for (i = 0; i < nr_cpu_ids; i++) 107 107 kvfree(pool->cpu_slots[i]); 108 108 kfree(pool->cpu_slots); 109 109 } ··· 130 130 131 131 /* initialize objpool pool */ 132 132 memset(pool, 0, sizeof(struct objpool_head)); 133 - pool->nr_cpus = nr_cpu_ids; 133 + pool->nr_possible_cpus = num_possible_cpus(); 134 134 pool->obj_size = object_size; 135 135 pool->capacity = capacity; 136 136 pool->gfp = gfp & ~__GFP_ZERO; 137 137 pool->context = context; 138 138 pool->release = release; 139 - slot_size = pool->nr_cpus * sizeof(struct objpool_slot); 139 + slot_size = nr_cpu_ids * sizeof(struct objpool_slot); 140 140 pool->cpu_slots = kzalloc(slot_size, pool->gfp); 141 141 if (!pool->cpu_slots) 142 142 return -ENOMEM; ··· 151 151 return rc; 152 152 } 153 153 EXPORT_SYMBOL_GPL(objpool_init); 154 - 155 - /* adding object to slot, abort if the slot was already full */ 156 - static inline int 157 - objpool_try_add_slot(void *obj, struct objpool_head *pool, int cpu) 158 - { 159 - struct objpool_slot *slot = pool->cpu_slots[cpu]; 160 - uint32_t head, tail; 161 - 162 - /* loading tail and head as a local snapshot, tail first */ 163 - tail = READ_ONCE(slot->tail); 164 - 165 - do { 166 - head = READ_ONCE(slot->head); 167 - /* fault caught: something must be wrong */ 168 - WARN_ON_ONCE(tail - head > pool->nr_objs); 169 - } while (!try_cmpxchg_acquire(&slot->tail, &tail, tail + 1)); 170 - 171 - /* now the tail position is reserved for the given obj */ 172 - WRITE_ONCE(slot->entries[tail & slot->mask], obj); 173 - /* update sequence to make this obj available for pop() */ 174 - smp_store_release(&slot->last, tail + 1); 175 - 176 - return 0; 177 - } 178 - 179 - /* reclaim an object to object pool */ 180 - int objpool_push(void *obj, struct objpool_head *pool) 181 - { 182 - unsigned long flags; 183 - int rc; 184 - 185 - /* disable local irq to avoid preemption & interruption */ 186 - raw_local_irq_save(flags); 187 - rc = objpool_try_add_slot(obj, pool, raw_smp_processor_id()); 188 - raw_local_irq_restore(flags); 189 - 190 - return rc; 191 - } 192 - EXPORT_SYMBOL_GPL(objpool_push); 193 - 194 - /* try to retrieve object from slot */ 195 - static inline void *objpool_try_get_slot(struct objpool_head *pool, int cpu) 196 - { 197 - struct objpool_slot *slot = pool->cpu_slots[cpu]; 198 - /* load head snapshot, other cpus may change it */ 199 - uint32_t head = smp_load_acquire(&slot->head); 200 - 201 - while (head != READ_ONCE(slot->last)) { 202 - void *obj; 203 - 204 - /* 205 - * data visibility of 'last' and 'head' could be out of 206 - * order since memory updating of 'last' and 'head' are 207 - * performed in push() and pop() independently 208 - * 209 - * before any retrieving attempts, pop() must guarantee 210 - * 'last' is behind 'head', that is to say, there must 211 - * be available objects in slot, which could be ensured 212 - * by condition 'last != head && last - head <= nr_objs' 213 - * that is equivalent to 'last - head - 1 < nr_objs' as 214 - * 'last' and 'head' are both unsigned int32 215 - */ 216 - if (READ_ONCE(slot->last) - head - 1 >= pool->nr_objs) { 217 - head = READ_ONCE(slot->head); 218 - continue; 219 - } 220 - 221 - /* obj must be retrieved before moving forward head */ 222 - obj = READ_ONCE(slot->entries[head & slot->mask]); 223 - 224 - /* move head forward to mark it's consumption */ 225 - if (try_cmpxchg_release(&slot->head, &head, head + 1)) 226 - return obj; 227 - } 228 - 229 - return NULL; 230 - } 231 - 232 - /* allocate an object from object pool */ 233 - void *objpool_pop(struct objpool_head *pool) 234 - { 235 - void *obj = NULL; 236 - unsigned long flags; 237 - int i, cpu; 238 - 239 - /* disable local irq to avoid preemption & interruption */ 240 - raw_local_irq_save(flags); 241 - 242 - cpu = raw_smp_processor_id(); 243 - for (i = 0; i < num_possible_cpus(); i++) { 244 - obj = objpool_try_get_slot(pool, cpu); 245 - if (obj) 246 - break; 247 - cpu = cpumask_next_wrap(cpu, cpu_possible_mask, -1, 1); 248 - } 249 - raw_local_irq_restore(flags); 250 - 251 - return obj; 252 - } 253 - EXPORT_SYMBOL_GPL(objpool_pop); 254 154 255 155 /* release whole objpool forcely */ 256 156 void objpool_free(struct objpool_head *pool)
+41
tools/testing/selftests/ftrace/test.d/dynevent/fprobe_args_vfs.tc
··· 1 + #!/bin/sh 2 + # SPDX-License-Identifier: GPL-2.0 3 + # description: Fprobe event VFS type argument 4 + # requires: dynamic_events "%pd/%pD":README "f[:[<group>/][<event>]] <func-name>[%return] [<args>]":README 5 + 6 + 7 + : "Test argument %pd with name for fprobe" 8 + echo 'f:testprobe dput name=$arg1:%pd' > dynamic_events 9 + echo 1 > events/fprobes/testprobe/enable 10 + grep -q "1" events/fprobes/testprobe/enable 11 + echo 0 > events/fprobes/testprobe/enable 12 + grep "dput" trace | grep -q "enable" 13 + echo "" > dynamic_events 14 + echo "" > trace 15 + 16 + : "Test argument %pd without name for fprobe" 17 + echo 'f:testprobe dput $arg1:%pd' > dynamic_events 18 + echo 1 > events/fprobes/testprobe/enable 19 + grep -q "1" events/fprobes/testprobe/enable 20 + echo 0 > events/fprobes/testprobe/enable 21 + grep "dput" trace | grep -q "enable" 22 + echo "" > dynamic_events 23 + echo "" > trace 24 + 25 + : "Test argument %pD with name for fprobe" 26 + echo 'f:testprobe vfs_read name=$arg1:%pD' > dynamic_events 27 + echo 1 > events/fprobes/testprobe/enable 28 + grep -q "1" events/fprobes/testprobe/enable 29 + echo 0 > events/fprobes/testprobe/enable 30 + grep "vfs_read" trace | grep -q "enable" 31 + echo "" > dynamic_events 32 + echo "" > trace 33 + 34 + : "Test argument %pD without name for fprobe" 35 + echo 'f:testprobe vfs_read $arg1:%pD' > dynamic_events 36 + echo 1 > events/fprobes/testprobe/enable 37 + grep -q "1" events/fprobes/testprobe/enable 38 + echo 0 > events/fprobes/testprobe/enable 39 + grep "vfs_read" trace | grep -q "enable" 40 + echo "" > dynamic_events 41 + echo "" > trace
+40
tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc
··· 1 + #!/bin/sh 2 + # SPDX-License-Identifier: GPL-2.0 3 + # description: Kprobe event VFS type argument 4 + # requires: kprobe_events "%pd/%pD":README 5 + 6 + : "Test argument %pd with name" 7 + echo 'p:testprobe dput name=$arg1:%pd' > kprobe_events 8 + echo 1 > events/kprobes/testprobe/enable 9 + grep -q "1" events/kprobes/testprobe/enable 10 + echo 0 > events/kprobes/testprobe/enable 11 + grep "dput" trace | grep -q "enable" 12 + echo "" > kprobe_events 13 + echo "" > trace 14 + 15 + : "Test argument %pd without name" 16 + echo 'p:testprobe dput $arg1:%pd' > kprobe_events 17 + echo 1 > events/kprobes/testprobe/enable 18 + grep -q "1" events/kprobes/testprobe/enable 19 + echo 0 > events/kprobes/testprobe/enable 20 + grep "dput" trace | grep -q "enable" 21 + echo "" > kprobe_events 22 + echo "" > trace 23 + 24 + : "Test argument %pD with name" 25 + echo 'p:testprobe vfs_read name=$arg1:%pD' > kprobe_events 26 + echo 1 > events/kprobes/testprobe/enable 27 + grep -q "1" events/kprobes/testprobe/enable 28 + echo 0 > events/kprobes/testprobe/enable 29 + grep "vfs_read" trace | grep -q "enable" 30 + echo "" > kprobe_events 31 + echo "" > trace 32 + 33 + : "Test argument %pD without name" 34 + echo 'p:testprobe vfs_read $arg1:%pD' > kprobe_events 35 + echo 1 > events/kprobes/testprobe/enable 36 + grep -q "1" events/kprobes/testprobe/enable 37 + echo 0 > events/kprobes/testprobe/enable 38 + grep "vfs_read" trace | grep -q "enable" 39 + echo "" > kprobe_events 40 + echo "" > trace