Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

fprobe: Rewrite fprobe on function-graph tracer

Rewrite fprobe implementation on function-graph tracer.
Major API changes are:
- 'nr_maxactive' field is deprecated.
- This depends on CONFIG_DYNAMIC_FTRACE_WITH_ARGS or
!CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS, and
CONFIG_HAVE_FUNCTION_GRAPH_FREGS. So currently works only
on x86_64.
- Currently the entry size is limited in 15 * sizeof(long).
- If there is too many fprobe exit handler set on the same
function, it will fail to probe.

Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Acked-by: Heiko Carstens <hca@linux.ibm.com> # s390
Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Florent Revest <revest@chromium.org>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: bpf <bpf@vger.kernel.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Alan Maguire <alan.maguire@oracle.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Naveen N Rao <naveen@kernel.org>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: x86@kernel.org
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: https://lore.kernel.org/173519003970.391279.14406792285453830996.stgit@devnote2
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>

authored by

Masami Hiramatsu (Google) and committed by
Steven Rostedt (Google)
4346ba16 7495e179

+545 -252
+6
arch/arm64/include/asm/ftrace.h
··· 135 135 return arch_ftrace_regs(fregs)->fp; 136 136 } 137 137 138 + static __always_inline unsigned long 139 + ftrace_regs_get_return_address(const struct ftrace_regs *fregs) 140 + { 141 + return arch_ftrace_regs(fregs)->lr; 142 + } 143 + 138 144 static __always_inline struct pt_regs * 139 145 ftrace_partial_regs(const struct ftrace_regs *fregs, struct pt_regs *regs) 140 146 {
+6
arch/loongarch/include/asm/ftrace.h
··· 61 61 #define ftrace_regs_get_frame_pointer(fregs) \ 62 62 (arch_ftrace_regs(fregs)->regs.regs[22]) 63 63 64 + static __always_inline unsigned long 65 + ftrace_regs_get_return_address(struct ftrace_regs *fregs) 66 + { 67 + return *(unsigned long *)(arch_ftrace_regs(fregs)->regs.regs[1]); 68 + } 69 + 64 70 #define ftrace_graph_func ftrace_graph_func 65 71 void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, 66 72 struct ftrace_ops *op, struct ftrace_regs *fregs);
+6
arch/powerpc/include/asm/ftrace.h
··· 57 57 regs_set_return_ip(&arch_ftrace_regs(fregs)->regs, ip); 58 58 } 59 59 60 + static __always_inline unsigned long 61 + ftrace_regs_get_return_address(struct ftrace_regs *fregs) 62 + { 63 + return arch_ftrace_regs(fregs)->regs.link; 64 + } 65 + 60 66 struct ftrace_ops; 61 67 62 68 #define ftrace_graph_func ftrace_graph_func
+5
arch/riscv/include/asm/ftrace.h
··· 186 186 return arch_ftrace_regs(fregs)->a0; 187 187 } 188 188 189 + static __always_inline unsigned long ftrace_regs_get_return_address(const struct ftrace_regs *fregs) 190 + { 191 + return arch_ftrace_regs(fregs)->ra; 192 + } 193 + 189 194 static __always_inline void ftrace_regs_set_return_value(struct ftrace_regs *fregs, 190 195 unsigned long ret) 191 196 {
+6
arch/s390/include/asm/ftrace.h
··· 77 77 return ftrace_regs_get_stack_pointer(fregs); 78 78 } 79 79 80 + static __always_inline unsigned long 81 + ftrace_regs_get_return_address(const struct ftrace_regs *fregs) 82 + { 83 + return arch_ftrace_regs(fregs)->regs.gprs[14]; 84 + } 85 + 80 86 #define arch_ftrace_fill_perf_regs(fregs, _regs) do { \ 81 87 (_regs)->psw.mask = 0; \ 82 88 (_regs)->psw.addr = arch_ftrace_regs(fregs)->regs.psw.addr; \
+6
arch/x86/include/asm/ftrace.h
··· 58 58 do { arch_ftrace_regs(fregs)->regs.ip = (_ip); } while (0) 59 59 60 60 61 + static __always_inline unsigned long 62 + ftrace_regs_get_return_address(struct ftrace_regs *fregs) 63 + { 64 + return *(unsigned long *)ftrace_regs_get_stack_pointer(fregs); 65 + } 66 + 61 67 struct ftrace_ops; 62 68 #define ftrace_graph_func ftrace_graph_func 63 69 void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+43 -15
include/linux/fprobe.h
··· 5 5 6 6 #include <linux/compiler.h> 7 7 #include <linux/ftrace.h> 8 - #include <linux/rethook.h> 8 + #include <linux/rcupdate.h> 9 + #include <linux/refcount.h> 10 + #include <linux/slab.h> 9 11 10 12 struct fprobe; 11 - 12 13 typedef int (*fprobe_entry_cb)(struct fprobe *fp, unsigned long entry_ip, 13 14 unsigned long ret_ip, struct ftrace_regs *regs, 14 15 void *entry_data); ··· 19 18 void *entry_data); 20 19 21 20 /** 21 + * struct fprobe_hlist_node - address based hash list node for fprobe. 22 + * 23 + * @hlist: The hlist node for address search hash table. 24 + * @addr: One of the probing address of @fp. 25 + * @fp: The fprobe which owns this. 26 + */ 27 + struct fprobe_hlist_node { 28 + struct hlist_node hlist; 29 + unsigned long addr; 30 + struct fprobe *fp; 31 + }; 32 + 33 + /** 34 + * struct fprobe_hlist - hash list nodes for fprobe. 35 + * 36 + * @hlist: The hlist node for existence checking hash table. 37 + * @rcu: rcu_head for RCU deferred release. 38 + * @fp: The fprobe which owns this fprobe_hlist. 39 + * @size: The size of @array. 40 + * @array: The fprobe_hlist_node for each address to probe. 41 + */ 42 + struct fprobe_hlist { 43 + struct hlist_node hlist; 44 + struct rcu_head rcu; 45 + struct fprobe *fp; 46 + int size; 47 + struct fprobe_hlist_node array[] __counted_by(size); 48 + }; 49 + 50 + /** 22 51 * struct fprobe - ftrace based probe. 23 - * @ops: The ftrace_ops. 52 + * 24 53 * @nmissed: The counter for missing events. 25 54 * @flags: The status flag. 26 - * @rethook: The rethook data structure. (internal data) 27 55 * @entry_data_size: The private data storage size. 28 - * @nr_maxactive: The max number of active functions. 56 + * @nr_maxactive: The max number of active functions. (*deprecated) 29 57 * @entry_handler: The callback function for function entry. 30 58 * @exit_handler: The callback function for function exit. 59 + * @hlist_array: The fprobe_hlist for fprobe search from IP hash table. 31 60 */ 32 61 struct fprobe { 33 - #ifdef CONFIG_FUNCTION_TRACER 34 - /* 35 - * If CONFIG_FUNCTION_TRACER is not set, CONFIG_FPROBE is disabled too. 36 - * But user of fprobe may keep embedding the struct fprobe on their own 37 - * code. To avoid build error, this will keep the fprobe data structure 38 - * defined here, but remove ftrace_ops data structure. 39 - */ 40 - struct ftrace_ops ops; 41 - #endif 42 62 unsigned long nmissed; 43 63 unsigned int flags; 44 - struct rethook *rethook; 45 64 size_t entry_data_size; 46 65 int nr_maxactive; 47 66 48 67 fprobe_entry_cb entry_handler; 49 68 fprobe_exit_cb exit_handler; 69 + 70 + struct fprobe_hlist *hlist_array; 50 71 }; 51 72 52 73 /* This fprobe is soft-disabled. */ ··· 143 120 if (fp) 144 121 fp->flags &= ~FPROBE_FL_DISABLED; 145 122 } 123 + 124 + /* The entry data size is 4 bits (=16) * sizeof(long) in maximum */ 125 + #define FPROBE_DATA_SIZE_BITS 4 126 + #define MAX_FPROBE_DATA_SIZE_WORD ((1L << FPROBE_DATA_SIZE_BITS) - 1) 127 + #define MAX_FPROBE_DATA_SIZE (MAX_FPROBE_DATA_SIZE_WORD * sizeof(long)) 146 128 147 129 #endif
+3 -5
kernel/trace/Kconfig
··· 307 307 308 308 config FPROBE 309 309 bool "Kernel Function Probe (fprobe)" 310 - depends on FUNCTION_TRACER 311 - depends on DYNAMIC_FTRACE_WITH_REGS || DYNAMIC_FTRACE_WITH_ARGS 312 - depends on HAVE_FTRACE_REGS_HAVING_PT_REGS || !HAVE_DYNAMIC_FTRACE_WITH_ARGS 313 - depends on HAVE_RETHOOK 314 - select RETHOOK 310 + depends on HAVE_FUNCTION_GRAPH_FREGS && HAVE_FTRACE_GRAPH_FUNC 311 + depends on DYNAMIC_FTRACE_WITH_ARGS 312 + select FUNCTION_GRAPH_TRACER 315 313 default n 316 314 help 317 315 This option enables kernel function probe (fprobe) based on ftrace.
+464 -187
kernel/trace/fprobe.c
··· 8 8 #include <linux/fprobe.h> 9 9 #include <linux/kallsyms.h> 10 10 #include <linux/kprobes.h> 11 - #include <linux/rethook.h> 11 + #include <linux/list.h> 12 + #include <linux/mutex.h> 12 13 #include <linux/slab.h> 13 14 #include <linux/sort.h> 14 15 15 16 #include "trace.h" 16 17 17 - struct fprobe_rethook_node { 18 - struct rethook_node node; 19 - unsigned long entry_ip; 20 - unsigned long entry_parent_ip; 21 - char data[]; 22 - }; 18 + #define FPROBE_IP_HASH_BITS 8 19 + #define FPROBE_IP_TABLE_SIZE (1 << FPROBE_IP_HASH_BITS) 23 20 24 - static inline void __fprobe_handler(unsigned long ip, unsigned long parent_ip, 25 - struct ftrace_ops *ops, struct ftrace_regs *fregs) 21 + #define FPROBE_HASH_BITS 6 22 + #define FPROBE_TABLE_SIZE (1 << FPROBE_HASH_BITS) 23 + 24 + #define SIZE_IN_LONG(x) ((x + sizeof(long) - 1) >> (sizeof(long) == 8 ? 3 : 2)) 25 + 26 + /* 27 + * fprobe_table: hold 'fprobe_hlist::hlist' for checking the fprobe still 28 + * exists. The key is the address of fprobe instance. 29 + * fprobe_ip_table: hold 'fprobe_hlist::array[*]' for searching the fprobe 30 + * instance related to the funciton address. The key is the ftrace IP 31 + * address. 32 + * 33 + * When unregistering the fprobe, fprobe_hlist::fp and fprobe_hlist::array[*].fp 34 + * are set NULL and delete those from both hash tables (by hlist_del_rcu). 35 + * After an RCU grace period, the fprobe_hlist itself will be released. 36 + * 37 + * fprobe_table and fprobe_ip_table can be accessed from either 38 + * - Normal hlist traversal and RCU add/del under 'fprobe_mutex' is held. 39 + * - RCU hlist traversal under disabling preempt 40 + */ 41 + static struct hlist_head fprobe_table[FPROBE_TABLE_SIZE]; 42 + static struct hlist_head fprobe_ip_table[FPROBE_IP_TABLE_SIZE]; 43 + static DEFINE_MUTEX(fprobe_mutex); 44 + 45 + /* 46 + * Find first fprobe in the hlist. It will be iterated twice in the entry 47 + * probe, once for correcting the total required size, the second time is 48 + * calling back the user handlers. 49 + * Thus the hlist in the fprobe_table must be sorted and new probe needs to 50 + * be added *before* the first fprobe. 51 + */ 52 + static struct fprobe_hlist_node *find_first_fprobe_node(unsigned long ip) 26 53 { 27 - struct fprobe_rethook_node *fpr; 28 - struct rethook_node *rh = NULL; 29 - struct fprobe *fp; 30 - void *entry_data = NULL; 31 - int ret = 0; 54 + struct fprobe_hlist_node *node; 55 + struct hlist_head *head; 32 56 33 - fp = container_of(ops, struct fprobe, ops); 34 - 35 - if (fp->exit_handler) { 36 - rh = rethook_try_get(fp->rethook); 37 - if (!rh) { 38 - fp->nmissed++; 39 - return; 40 - } 41 - fpr = container_of(rh, struct fprobe_rethook_node, node); 42 - fpr->entry_ip = ip; 43 - fpr->entry_parent_ip = parent_ip; 44 - if (fp->entry_data_size) 45 - entry_data = fpr->data; 57 + head = &fprobe_ip_table[hash_ptr((void *)ip, FPROBE_IP_HASH_BITS)]; 58 + hlist_for_each_entry_rcu(node, head, hlist, 59 + lockdep_is_held(&fprobe_mutex)) { 60 + if (node->addr == ip) 61 + return node; 46 62 } 63 + return NULL; 64 + } 65 + NOKPROBE_SYMBOL(find_first_fprobe_node); 47 66 48 - if (fp->entry_handler) 49 - ret = fp->entry_handler(fp, ip, parent_ip, fregs, entry_data); 67 + /* Node insertion and deletion requires the fprobe_mutex */ 68 + static void insert_fprobe_node(struct fprobe_hlist_node *node) 69 + { 70 + unsigned long ip = node->addr; 71 + struct fprobe_hlist_node *next; 72 + struct hlist_head *head; 50 73 51 - /* If entry_handler returns !0, nmissed is not counted. */ 52 - if (rh) { 53 - if (ret) 54 - rethook_recycle(rh); 55 - else 56 - rethook_hook(rh, ftrace_get_regs(fregs), true); 74 + lockdep_assert_held(&fprobe_mutex); 75 + 76 + next = find_first_fprobe_node(ip); 77 + if (next) { 78 + hlist_add_before_rcu(&node->hlist, &next->hlist); 79 + return; 57 80 } 81 + head = &fprobe_ip_table[hash_ptr((void *)ip, FPROBE_IP_HASH_BITS)]; 82 + hlist_add_head_rcu(&node->hlist, head); 58 83 } 59 84 60 - static void fprobe_handler(unsigned long ip, unsigned long parent_ip, 61 - struct ftrace_ops *ops, struct ftrace_regs *fregs) 85 + /* Return true if there are synonims */ 86 + static bool delete_fprobe_node(struct fprobe_hlist_node *node) 62 87 { 63 - struct fprobe *fp; 64 - int bit; 88 + lockdep_assert_held(&fprobe_mutex); 65 89 66 - fp = container_of(ops, struct fprobe, ops); 67 - if (fprobe_disabled(fp)) 68 - return; 69 - 70 - /* recursion detection has to go before any traceable function and 71 - * all functions before this point should be marked as notrace 72 - */ 73 - bit = ftrace_test_recursion_trylock(ip, parent_ip); 74 - if (bit < 0) { 75 - fp->nmissed++; 76 - return; 77 - } 78 - __fprobe_handler(ip, parent_ip, ops, fregs); 79 - ftrace_test_recursion_unlock(bit); 80 - 90 + WRITE_ONCE(node->fp, NULL); 91 + hlist_del_rcu(&node->hlist); 92 + return !!find_first_fprobe_node(node->addr); 81 93 } 82 - NOKPROBE_SYMBOL(fprobe_handler); 83 94 84 - static void fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip, 85 - struct ftrace_ops *ops, struct ftrace_regs *fregs) 95 + /* Check existence of the fprobe */ 96 + static bool is_fprobe_still_exist(struct fprobe *fp) 86 97 { 87 - struct fprobe *fp; 88 - int bit; 98 + struct hlist_head *head; 99 + struct fprobe_hlist *fph; 89 100 90 - fp = container_of(ops, struct fprobe, ops); 91 - if (fprobe_disabled(fp)) 92 - return; 93 - 94 - /* recursion detection has to go before any traceable function and 95 - * all functions called before this point should be marked as notrace 96 - */ 97 - bit = ftrace_test_recursion_trylock(ip, parent_ip); 98 - if (bit < 0) { 99 - fp->nmissed++; 100 - return; 101 + head = &fprobe_table[hash_ptr(fp, FPROBE_HASH_BITS)]; 102 + hlist_for_each_entry_rcu(fph, head, hlist, 103 + lockdep_is_held(&fprobe_mutex)) { 104 + if (fph->fp == fp) 105 + return true; 101 106 } 107 + return false; 108 + } 109 + NOKPROBE_SYMBOL(is_fprobe_still_exist); 102 110 111 + static int add_fprobe_hash(struct fprobe *fp) 112 + { 113 + struct fprobe_hlist *fph = fp->hlist_array; 114 + struct hlist_head *head; 115 + 116 + lockdep_assert_held(&fprobe_mutex); 117 + 118 + if (WARN_ON_ONCE(!fph)) 119 + return -EINVAL; 120 + 121 + if (is_fprobe_still_exist(fp)) 122 + return -EEXIST; 123 + 124 + head = &fprobe_table[hash_ptr(fp, FPROBE_HASH_BITS)]; 125 + hlist_add_head_rcu(&fp->hlist_array->hlist, head); 126 + return 0; 127 + } 128 + 129 + static int del_fprobe_hash(struct fprobe *fp) 130 + { 131 + struct fprobe_hlist *fph = fp->hlist_array; 132 + 133 + lockdep_assert_held(&fprobe_mutex); 134 + 135 + if (WARN_ON_ONCE(!fph)) 136 + return -EINVAL; 137 + 138 + if (!is_fprobe_still_exist(fp)) 139 + return -ENOENT; 140 + 141 + fph->fp = NULL; 142 + hlist_del_rcu(&fph->hlist); 143 + return 0; 144 + } 145 + 146 + /* Generic fprobe_header */ 147 + struct __fprobe_header { 148 + struct fprobe *fp; 149 + unsigned long size_words; 150 + } __packed; 151 + 152 + #define FPROBE_HEADER_SIZE_IN_LONG SIZE_IN_LONG(sizeof(struct __fprobe_header)) 153 + 154 + static inline bool write_fprobe_header(unsigned long *stack, 155 + struct fprobe *fp, unsigned int size_words) 156 + { 157 + struct __fprobe_header *fph = (struct __fprobe_header *)stack; 158 + 159 + if (WARN_ON_ONCE(size_words > MAX_FPROBE_DATA_SIZE_WORD)) 160 + return false; 161 + 162 + fph->fp = fp; 163 + fph->size_words = size_words; 164 + return true; 165 + } 166 + 167 + static inline void read_fprobe_header(unsigned long *stack, 168 + struct fprobe **fp, unsigned int *size_words) 169 + { 170 + struct __fprobe_header *fph = (struct __fprobe_header *)stack; 171 + 172 + *fp = fph->fp; 173 + *size_words = fph->size_words; 174 + } 175 + 176 + /* 177 + * fprobe shadow stack management: 178 + * Since fprobe shares a single fgraph_ops, it needs to share the stack entry 179 + * among the probes on the same function exit. Note that a new probe can be 180 + * registered before a target function is returning, we can not use the hash 181 + * table to find the corresponding probes. Thus the probe address is stored on 182 + * the shadow stack with its entry data size. 183 + * 184 + */ 185 + static inline int __fprobe_handler(unsigned long ip, unsigned long parent_ip, 186 + struct fprobe *fp, struct ftrace_regs *fregs, 187 + void *data) 188 + { 189 + if (!fp->entry_handler) 190 + return 0; 191 + 192 + return fp->entry_handler(fp, ip, parent_ip, fregs, data); 193 + } 194 + 195 + static inline int __fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip, 196 + struct fprobe *fp, struct ftrace_regs *fregs, 197 + void *data) 198 + { 199 + int ret; 103 200 /* 104 201 * This user handler is shared with other kprobes and is not expected to be 105 202 * called recursively. So if any other kprobe handler is running, this will ··· 205 108 */ 206 109 if (unlikely(kprobe_running())) { 207 110 fp->nmissed++; 208 - goto recursion_unlock; 111 + return 0; 209 112 } 210 113 211 114 kprobe_busy_begin(); 212 - __fprobe_handler(ip, parent_ip, ops, fregs); 115 + ret = __fprobe_handler(ip, parent_ip, fp, fregs, data); 213 116 kprobe_busy_end(); 214 - 215 - recursion_unlock: 216 - ftrace_test_recursion_unlock(bit); 117 + return ret; 217 118 } 218 119 219 - static void fprobe_exit_handler(struct rethook_node *rh, void *data, 220 - unsigned long ret_ip, struct pt_regs *regs) 120 + static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops, 121 + struct ftrace_regs *fregs) 221 122 { 222 - struct fprobe *fp = (struct fprobe *)data; 223 - struct fprobe_rethook_node *fpr; 224 - struct ftrace_regs *fregs = (struct ftrace_regs *)regs; 225 - int bit; 123 + struct fprobe_hlist_node *node, *first; 124 + unsigned long *fgraph_data = NULL; 125 + unsigned long func = trace->func; 126 + unsigned long ret_ip; 127 + int reserved_words; 128 + struct fprobe *fp; 129 + int used, ret; 226 130 227 - if (!fp || fprobe_disabled(fp)) 228 - return; 131 + if (WARN_ON_ONCE(!fregs)) 132 + return 0; 229 133 230 - fpr = container_of(rh, struct fprobe_rethook_node, node); 134 + first = node = find_first_fprobe_node(func); 135 + if (unlikely(!first)) 136 + return 0; 137 + 138 + reserved_words = 0; 139 + hlist_for_each_entry_from_rcu(node, hlist) { 140 + if (node->addr != func) 141 + break; 142 + fp = READ_ONCE(node->fp); 143 + if (!fp || !fp->exit_handler) 144 + continue; 145 + /* 146 + * Since fprobe can be enabled until the next loop, we ignore the 147 + * fprobe's disabled flag in this loop. 148 + */ 149 + reserved_words += 150 + FPROBE_HEADER_SIZE_IN_LONG + SIZE_IN_LONG(fp->entry_data_size); 151 + } 152 + node = first; 153 + if (reserved_words) { 154 + fgraph_data = fgraph_reserve_data(gops->idx, reserved_words * sizeof(long)); 155 + if (unlikely(!fgraph_data)) { 156 + hlist_for_each_entry_from_rcu(node, hlist) { 157 + if (node->addr != func) 158 + break; 159 + fp = READ_ONCE(node->fp); 160 + if (fp && !fprobe_disabled(fp)) 161 + fp->nmissed++; 162 + } 163 + return 0; 164 + } 165 + } 231 166 232 167 /* 233 - * we need to assure no calls to traceable functions in-between the 234 - * end of fprobe_handler and the beginning of fprobe_exit_handler. 168 + * TODO: recursion detection has been done in the fgraph. Thus we need 169 + * to add a callback to increment missed counter. 235 170 */ 236 - bit = ftrace_test_recursion_trylock(fpr->entry_ip, fpr->entry_parent_ip); 237 - if (bit < 0) { 238 - fp->nmissed++; 171 + ret_ip = ftrace_regs_get_return_address(fregs); 172 + used = 0; 173 + hlist_for_each_entry_from_rcu(node, hlist) { 174 + int data_size; 175 + void *data; 176 + 177 + if (node->addr != func) 178 + break; 179 + fp = READ_ONCE(node->fp); 180 + if (!fp || fprobe_disabled(fp)) 181 + continue; 182 + 183 + data_size = fp->entry_data_size; 184 + if (data_size && fp->exit_handler) 185 + data = fgraph_data + used + FPROBE_HEADER_SIZE_IN_LONG; 186 + else 187 + data = NULL; 188 + 189 + if (fprobe_shared_with_kprobes(fp)) 190 + ret = __fprobe_kprobe_handler(func, ret_ip, fp, fregs, data); 191 + else 192 + ret = __fprobe_handler(func, ret_ip, fp, fregs, data); 193 + 194 + /* If entry_handler returns !0, nmissed is not counted but skips exit_handler. */ 195 + if (!ret && fp->exit_handler) { 196 + int size_words = SIZE_IN_LONG(data_size); 197 + 198 + if (write_fprobe_header(&fgraph_data[used], fp, size_words)) 199 + used += FPROBE_HEADER_SIZE_IN_LONG + size_words; 200 + } 201 + } 202 + if (used < reserved_words) 203 + memset(fgraph_data + used, 0, reserved_words - used); 204 + 205 + /* If any exit_handler is set, data must be used. */ 206 + return used != 0; 207 + } 208 + NOKPROBE_SYMBOL(fprobe_entry); 209 + 210 + static void fprobe_return(struct ftrace_graph_ret *trace, 211 + struct fgraph_ops *gops, 212 + struct ftrace_regs *fregs) 213 + { 214 + unsigned long *fgraph_data = NULL; 215 + unsigned long ret_ip; 216 + struct fprobe *fp; 217 + int size, curr; 218 + int size_words; 219 + 220 + fgraph_data = (unsigned long *)fgraph_retrieve_data(gops->idx, &size); 221 + if (WARN_ON_ONCE(!fgraph_data)) 222 + return; 223 + size_words = SIZE_IN_LONG(size); 224 + ret_ip = ftrace_regs_get_instruction_pointer(fregs); 225 + 226 + preempt_disable(); 227 + 228 + curr = 0; 229 + while (size_words > curr) { 230 + read_fprobe_header(&fgraph_data[curr], &fp, &size); 231 + if (!fp) 232 + break; 233 + curr += FPROBE_HEADER_SIZE_IN_LONG; 234 + if (is_fprobe_still_exist(fp) && !fprobe_disabled(fp)) { 235 + if (WARN_ON_ONCE(curr + size > size_words)) 236 + break; 237 + fp->exit_handler(fp, trace->func, ret_ip, fregs, 238 + size ? fgraph_data + curr : NULL); 239 + } 240 + curr += size; 241 + } 242 + preempt_enable(); 243 + } 244 + NOKPROBE_SYMBOL(fprobe_return); 245 + 246 + static struct fgraph_ops fprobe_graph_ops = { 247 + .entryfunc = fprobe_entry, 248 + .retfunc = fprobe_return, 249 + }; 250 + static int fprobe_graph_active; 251 + 252 + /* Add @addrs to the ftrace filter and register fgraph if needed. */ 253 + static int fprobe_graph_add_ips(unsigned long *addrs, int num) 254 + { 255 + int ret; 256 + 257 + lockdep_assert_held(&fprobe_mutex); 258 + 259 + ret = ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 0, 0); 260 + if (ret) 261 + return ret; 262 + 263 + if (!fprobe_graph_active) { 264 + ret = register_ftrace_graph(&fprobe_graph_ops); 265 + if (WARN_ON_ONCE(ret)) { 266 + ftrace_free_filter(&fprobe_graph_ops.ops); 267 + return ret; 268 + } 269 + } 270 + fprobe_graph_active++; 271 + return 0; 272 + } 273 + 274 + /* Remove @addrs from the ftrace filter and unregister fgraph if possible. */ 275 + static void fprobe_graph_remove_ips(unsigned long *addrs, int num) 276 + { 277 + lockdep_assert_held(&fprobe_mutex); 278 + 279 + fprobe_graph_active--; 280 + if (!fprobe_graph_active) { 281 + /* Q: should we unregister it ? */ 282 + unregister_ftrace_graph(&fprobe_graph_ops); 239 283 return; 240 284 } 241 285 242 - fp->exit_handler(fp, fpr->entry_ip, ret_ip, fregs, 243 - fp->entry_data_size ? (void *)fpr->data : NULL); 244 - ftrace_test_recursion_unlock(bit); 286 + ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 1, 0); 245 287 } 246 - NOKPROBE_SYMBOL(fprobe_exit_handler); 247 288 248 289 static int symbols_cmp(const void *a, const void *b) 249 290 { ··· 411 176 return ERR_PTR(-ENOENT); 412 177 } 413 178 414 - static void fprobe_init(struct fprobe *fp) 415 - { 416 - fp->nmissed = 0; 417 - if (fprobe_shared_with_kprobes(fp)) 418 - fp->ops.func = fprobe_kprobe_handler; 419 - else 420 - fp->ops.func = fprobe_handler; 179 + struct filter_match_data { 180 + const char *filter; 181 + const char *notfilter; 182 + size_t index; 183 + size_t size; 184 + unsigned long *addrs; 185 + }; 421 186 422 - fp->ops.flags |= FTRACE_OPS_FL_SAVE_REGS; 187 + static int filter_match_callback(void *data, const char *name, unsigned long addr) 188 + { 189 + struct filter_match_data *match = data; 190 + 191 + if (!glob_match(match->filter, name) || 192 + (match->notfilter && glob_match(match->notfilter, name))) 193 + return 0; 194 + 195 + if (!ftrace_location(addr)) 196 + return 0; 197 + 198 + if (match->addrs) 199 + match->addrs[match->index] = addr; 200 + 201 + match->index++; 202 + return match->index == match->size; 423 203 } 424 204 425 - static int fprobe_init_rethook(struct fprobe *fp, int num) 205 + /* 206 + * Make IP list from the filter/no-filter glob patterns. 207 + * Return the number of matched symbols, or -ENOENT. 208 + */ 209 + static int ip_list_from_filter(const char *filter, const char *notfilter, 210 + unsigned long *addrs, size_t size) 426 211 { 427 - int size; 212 + struct filter_match_data match = { .filter = filter, .notfilter = notfilter, 213 + .index = 0, .size = size, .addrs = addrs}; 214 + int ret; 428 215 429 - if (!fp->exit_handler) { 430 - fp->rethook = NULL; 431 - return 0; 432 - } 216 + ret = kallsyms_on_each_symbol(filter_match_callback, &match); 217 + if (ret < 0) 218 + return ret; 219 + ret = module_kallsyms_on_each_symbol(NULL, filter_match_callback, &match); 220 + if (ret < 0) 221 + return ret; 433 222 434 - /* Initialize rethook if needed */ 435 - if (fp->nr_maxactive) 436 - num = fp->nr_maxactive; 437 - else 438 - num *= num_possible_cpus() * 2; 439 - if (num <= 0) 440 - return -EINVAL; 441 - 442 - size = sizeof(struct fprobe_rethook_node) + fp->entry_data_size; 443 - 444 - /* Initialize rethook */ 445 - fp->rethook = rethook_alloc((void *)fp, fprobe_exit_handler, size, num); 446 - if (IS_ERR(fp->rethook)) 447 - return PTR_ERR(fp->rethook); 448 - 449 - return 0; 223 + return match.index ?: -ENOENT; 450 224 } 451 225 452 226 static void fprobe_fail_cleanup(struct fprobe *fp) 453 227 { 454 - if (!IS_ERR_OR_NULL(fp->rethook)) { 455 - /* Don't need to cleanup rethook->handler because this is not used. */ 456 - rethook_free(fp->rethook); 457 - fp->rethook = NULL; 458 - } 459 - ftrace_free_filter(&fp->ops); 228 + kfree(fp->hlist_array); 229 + fp->hlist_array = NULL; 460 230 } 231 + 232 + /* Initialize the fprobe data structure. */ 233 + static int fprobe_init(struct fprobe *fp, unsigned long *addrs, int num) 234 + { 235 + struct fprobe_hlist *hlist_array; 236 + unsigned long addr; 237 + int size, i; 238 + 239 + if (!fp || !addrs || num <= 0) 240 + return -EINVAL; 241 + 242 + size = ALIGN(fp->entry_data_size, sizeof(long)); 243 + if (size > MAX_FPROBE_DATA_SIZE) 244 + return -E2BIG; 245 + fp->entry_data_size = size; 246 + 247 + hlist_array = kzalloc(struct_size(hlist_array, array, num), GFP_KERNEL); 248 + if (!hlist_array) 249 + return -ENOMEM; 250 + 251 + fp->nmissed = 0; 252 + 253 + hlist_array->size = num; 254 + fp->hlist_array = hlist_array; 255 + hlist_array->fp = fp; 256 + for (i = 0; i < num; i++) { 257 + hlist_array->array[i].fp = fp; 258 + addr = ftrace_location(addrs[i]); 259 + if (!addr) { 260 + fprobe_fail_cleanup(fp); 261 + return -ENOENT; 262 + } 263 + hlist_array->array[i].addr = addr; 264 + } 265 + return 0; 266 + } 267 + 268 + #define FPROBE_IPS_MAX INT_MAX 461 269 462 270 /** 463 271 * register_fprobe() - Register fprobe to ftrace by pattern. ··· 515 237 */ 516 238 int register_fprobe(struct fprobe *fp, const char *filter, const char *notfilter) 517 239 { 518 - struct ftrace_hash *hash; 519 - unsigned char *str; 520 - int ret, len; 240 + unsigned long *addrs; 241 + int ret; 521 242 522 243 if (!fp || !filter) 523 244 return -EINVAL; 524 245 525 - fprobe_init(fp); 526 - 527 - len = strlen(filter); 528 - str = kstrdup(filter, GFP_KERNEL); 529 - ret = ftrace_set_filter(&fp->ops, str, len, 0); 530 - kfree(str); 531 - if (ret) 246 + ret = ip_list_from_filter(filter, notfilter, NULL, FPROBE_IPS_MAX); 247 + if (ret < 0) 532 248 return ret; 533 249 534 - if (notfilter) { 535 - len = strlen(notfilter); 536 - str = kstrdup(notfilter, GFP_KERNEL); 537 - ret = ftrace_set_notrace(&fp->ops, str, len, 0); 538 - kfree(str); 539 - if (ret) 540 - goto out; 541 - } 250 + addrs = kcalloc(ret, sizeof(unsigned long), GFP_KERNEL); 251 + if (!addrs) 252 + return -ENOMEM; 253 + ret = ip_list_from_filter(filter, notfilter, addrs, ret); 254 + if (ret > 0) 255 + ret = register_fprobe_ips(fp, addrs, ret); 542 256 543 - /* TODO: 544 - * correctly calculate the total number of filtered symbols 545 - * from both filter and notfilter. 546 - */ 547 - hash = rcu_access_pointer(fp->ops.local_hash.filter_hash); 548 - if (WARN_ON_ONCE(!hash)) 549 - goto out; 550 - 551 - ret = fprobe_init_rethook(fp, (int)hash->count); 552 - if (!ret) 553 - ret = register_ftrace_function(&fp->ops); 554 - 555 - out: 556 - if (ret) 557 - fprobe_fail_cleanup(fp); 257 + kfree(addrs); 558 258 return ret; 559 259 } 560 260 EXPORT_SYMBOL_GPL(register_fprobe); ··· 540 284 /** 541 285 * register_fprobe_ips() - Register fprobe to ftrace by address. 542 286 * @fp: A fprobe data structure to be registered. 543 - * @addrs: An array of target ftrace location addresses. 287 + * @addrs: An array of target function address. 544 288 * @num: The number of entries of @addrs. 545 289 * 546 290 * Register @fp to ftrace for enabling the probe on the address given by @addrs. ··· 552 296 */ 553 297 int register_fprobe_ips(struct fprobe *fp, unsigned long *addrs, int num) 554 298 { 555 - int ret; 299 + struct fprobe_hlist *hlist_array; 300 + int ret, i; 556 301 557 - if (!fp || !addrs || num <= 0) 558 - return -EINVAL; 559 - 560 - fprobe_init(fp); 561 - 562 - ret = ftrace_set_filter_ips(&fp->ops, addrs, num, 0, 0); 302 + ret = fprobe_init(fp, addrs, num); 563 303 if (ret) 564 304 return ret; 565 305 566 - ret = fprobe_init_rethook(fp, num); 567 - if (!ret) 568 - ret = register_ftrace_function(&fp->ops); 306 + mutex_lock(&fprobe_mutex); 307 + 308 + hlist_array = fp->hlist_array; 309 + ret = fprobe_graph_add_ips(addrs, num); 310 + if (!ret) { 311 + add_fprobe_hash(fp); 312 + for (i = 0; i < hlist_array->size; i++) 313 + insert_fprobe_node(&hlist_array->array[i]); 314 + } 315 + mutex_unlock(&fprobe_mutex); 569 316 570 317 if (ret) 571 318 fprobe_fail_cleanup(fp); 319 + 572 320 return ret; 573 321 } 574 322 EXPORT_SYMBOL_GPL(register_fprobe_ips); ··· 610 350 611 351 bool fprobe_is_registered(struct fprobe *fp) 612 352 { 613 - if (!fp || (fp->ops.saved_func != fprobe_handler && 614 - fp->ops.saved_func != fprobe_kprobe_handler)) 353 + if (!fp || !fp->hlist_array) 615 354 return false; 616 355 return true; 617 356 } 618 357 619 358 /** 620 - * unregister_fprobe() - Unregister fprobe from ftrace 359 + * unregister_fprobe() - Unregister fprobe. 621 360 * @fp: A fprobe data structure to be unregistered. 622 361 * 623 362 * Unregister fprobe (and remove ftrace hooks from the function entries). ··· 625 366 */ 626 367 int unregister_fprobe(struct fprobe *fp) 627 368 { 628 - int ret; 369 + struct fprobe_hlist *hlist_array; 370 + unsigned long *addrs = NULL; 371 + int ret = 0, i, count; 629 372 630 - if (!fprobe_is_registered(fp)) 631 - return -EINVAL; 373 + mutex_lock(&fprobe_mutex); 374 + if (!fp || !is_fprobe_still_exist(fp)) { 375 + ret = -EINVAL; 376 + goto out; 377 + } 632 378 633 - if (!IS_ERR_OR_NULL(fp->rethook)) 634 - rethook_stop(fp->rethook); 379 + hlist_array = fp->hlist_array; 380 + addrs = kcalloc(hlist_array->size, sizeof(unsigned long), GFP_KERNEL); 381 + if (!addrs) { 382 + ret = -ENOMEM; /* TODO: Fallback to one-by-one loop */ 383 + goto out; 384 + } 635 385 636 - ret = unregister_ftrace_function(&fp->ops); 637 - if (ret < 0) 638 - return ret; 386 + /* Remove non-synonim ips from table and hash */ 387 + count = 0; 388 + for (i = 0; i < hlist_array->size; i++) { 389 + if (!delete_fprobe_node(&hlist_array->array[i])) 390 + addrs[count++] = hlist_array->array[i].addr; 391 + } 392 + del_fprobe_hash(fp); 639 393 640 - if (!IS_ERR_OR_NULL(fp->rethook)) 641 - rethook_free(fp->rethook); 394 + if (count) 395 + fprobe_graph_remove_ips(addrs, count); 642 396 643 - ftrace_free_filter(&fp->ops); 397 + kfree_rcu(hlist_array, rcu); 398 + fp->hlist_array = NULL; 644 399 400 + out: 401 + mutex_unlock(&fprobe_mutex); 402 + 403 + kfree(addrs); 645 404 return ret; 646 405 } 647 406 EXPORT_SYMBOL_GPL(unregister_fprobe);
-45
lib/test_fprobe.c
··· 17 17 /* Use indirect calls to avoid inlining the target functions */ 18 18 static u32 (*target)(u32 value); 19 19 static u32 (*target2)(u32 value); 20 - static u32 (*target_nest)(u32 value, u32 (*nest)(u32)); 21 20 static unsigned long target_ip; 22 21 static unsigned long target2_ip; 23 - static unsigned long target_nest_ip; 24 22 static int entry_return_value; 25 23 26 24 static noinline u32 fprobe_selftest_target(u32 value) ··· 29 31 static noinline u32 fprobe_selftest_target2(u32 value) 30 32 { 31 33 return (value / div_factor) + 1; 32 - } 33 - 34 - static noinline u32 fprobe_selftest_nest_target(u32 value, u32 (*nest)(u32)) 35 - { 36 - return nest(value + 2); 37 34 } 38 35 39 36 static notrace int fp_entry_handler(struct fprobe *fp, unsigned long ip, ··· 70 77 KUNIT_EXPECT_EQ(current_test, *(u32 *)data, entry_val); 71 78 } else 72 79 KUNIT_EXPECT_NULL(current_test, data); 73 - } 74 - 75 - static notrace int nest_entry_handler(struct fprobe *fp, unsigned long ip, 76 - unsigned long ret_ip, 77 - struct ftrace_regs *fregs, void *data) 78 - { 79 - KUNIT_EXPECT_FALSE(current_test, preemptible()); 80 - return 0; 81 - } 82 - 83 - static notrace void nest_exit_handler(struct fprobe *fp, unsigned long ip, 84 - unsigned long ret_ip, 85 - struct ftrace_regs *fregs, void *data) 86 - { 87 - KUNIT_EXPECT_FALSE(current_test, preemptible()); 88 - KUNIT_EXPECT_EQ(current_test, ip, target_nest_ip); 89 80 } 90 81 91 82 /* Test entry only (no rethook) */ ··· 168 191 KUNIT_EXPECT_EQ(test, 0, unregister_fprobe(&fp)); 169 192 } 170 193 171 - /* Test nr_maxactive */ 172 - static void test_fprobe_nest(struct kunit *test) 173 - { 174 - static const char *syms[] = {"fprobe_selftest_target", "fprobe_selftest_nest_target"}; 175 - struct fprobe fp = { 176 - .entry_handler = nest_entry_handler, 177 - .exit_handler = nest_exit_handler, 178 - .nr_maxactive = 1, 179 - }; 180 - 181 - current_test = test; 182 - KUNIT_EXPECT_EQ(test, 0, register_fprobe_syms(&fp, syms, 2)); 183 - 184 - target_nest(rand1, target); 185 - KUNIT_EXPECT_EQ(test, 1, fp.nmissed); 186 - 187 - KUNIT_EXPECT_EQ(test, 0, unregister_fprobe(&fp)); 188 - } 189 - 190 194 static void test_fprobe_skip(struct kunit *test) 191 195 { 192 196 struct fprobe fp = { ··· 205 247 rand1 = get_random_u32_above(div_factor); 206 248 target = fprobe_selftest_target; 207 249 target2 = fprobe_selftest_target2; 208 - target_nest = fprobe_selftest_nest_target; 209 250 target_ip = get_ftrace_location(target); 210 251 target2_ip = get_ftrace_location(target2); 211 - target_nest_ip = get_ftrace_location(target_nest); 212 252 213 253 return 0; 214 254 } ··· 216 260 KUNIT_CASE(test_fprobe), 217 261 KUNIT_CASE(test_fprobe_syms), 218 262 KUNIT_CASE(test_fprobe_data), 219 - KUNIT_CASE(test_fprobe_nest), 220 263 KUNIT_CASE(test_fprobe_skip), 221 264 {} 222 265 };