Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1#ifndef __BPF_EXPERIMENTAL__
2#define __BPF_EXPERIMENTAL__
3
4#include <vmlinux.h>
5#include <bpf/bpf_tracing.h>
6#include <bpf/bpf_helpers.h>
7#include <bpf/bpf_core_read.h>
8
9#define __contains(name, node) __attribute__((btf_decl_tag("contains:" #name ":" #node)))
10
11/* Convenience macro to wrap over bpf_obj_new */
12#define bpf_obj_new(type) ((type *)bpf_obj_new(bpf_core_type_id_local(type)))
13
14/* Convenience macro to wrap over bpf_percpu_obj_new */
15#define bpf_percpu_obj_new(type) ((type __percpu_kptr *)bpf_percpu_obj_new(bpf_core_type_id_local(type)))
16
17struct bpf_iter_task_vma;
18
19extern int bpf_iter_task_vma_new(struct bpf_iter_task_vma *it,
20 struct task_struct *task,
21 __u64 addr) __ksym;
22extern struct vm_area_struct *bpf_iter_task_vma_next(struct bpf_iter_task_vma *it) __ksym;
23extern void bpf_iter_task_vma_destroy(struct bpf_iter_task_vma *it) __ksym;
24
25/* Description
26 * Throw a BPF exception from the program, immediately terminating its
27 * execution and unwinding the stack. The supplied 'cookie' parameter
28 * will be the return value of the program when an exception is thrown,
29 * and the default exception callback is used. Otherwise, if an exception
30 * callback is set using the '__exception_cb(callback)' declaration tag
31 * on the main program, the 'cookie' parameter will be the callback's only
32 * input argument.
33 *
34 * Thus, in case of default exception callback, 'cookie' is subjected to
35 * constraints on the program's return value (as with R0 on exit).
36 * Otherwise, the return value of the marked exception callback will be
37 * subjected to the same checks.
38 *
39 * Note that throwing an exception with lingering resources (locks,
40 * references, etc.) will lead to a verification error.
41 *
42 * Note that callbacks *cannot* call this helper.
43 * Returns
44 * Never.
45 * Throws
46 * An exception with the specified 'cookie' value.
47 */
48extern void bpf_throw(u64 cookie) __ksym;
49
50/* Description
51 * Acquire a reference on the exe_file member field belonging to the
52 * mm_struct that is nested within the supplied task_struct. The supplied
53 * task_struct must be trusted/referenced.
54 * Returns
55 * A referenced file pointer pointing to the exe_file member field of the
56 * mm_struct nested in the supplied task_struct, or NULL.
57 */
58extern struct file *bpf_get_task_exe_file(struct task_struct *task) __ksym;
59
60/* Description
61 * Release a reference on the supplied file. The supplied file must be
62 * acquired.
63 */
64extern void bpf_put_file(struct file *file) __ksym;
65
66/* Description
67 * Resolve a pathname for the supplied path and store it in the supplied
68 * buffer. The supplied path must be trusted/referenced.
69 * Returns
70 * A positive integer corresponding to the length of the resolved pathname,
71 * including the NULL termination character, stored in the supplied
72 * buffer. On error, a negative integer is returned.
73 */
74extern int bpf_path_d_path(const struct path *path, char *buf, size_t buf__sz) __ksym;
75
76/* This macro must be used to mark the exception callback corresponding to the
77 * main program. For example:
78 *
79 * int exception_cb(u64 cookie) {
80 * return cookie;
81 * }
82 *
83 * SEC("tc")
84 * __exception_cb(exception_cb)
85 * int main_prog(struct __sk_buff *ctx) {
86 * ...
87 * return TC_ACT_OK;
88 * }
89 *
90 * Here, exception callback for the main program will be 'exception_cb'. Note
91 * that this attribute can only be used once, and multiple exception callbacks
92 * specified for the main program will lead to verification error.
93 */
94#define __exception_cb(name) __attribute__((btf_decl_tag("exception_callback:" #name)))
95
96#define __bpf_assert_signed(x) _Generic((x), \
97 unsigned long: 0, \
98 unsigned long long: 0, \
99 signed long: 1, \
100 signed long long: 1 \
101)
102
103#define __bpf_assert_check(LHS, op, RHS) \
104 _Static_assert(sizeof(&(LHS)), "1st argument must be an lvalue expression"); \
105 _Static_assert(sizeof(LHS) == 8, "Only 8-byte integers are supported\n"); \
106 _Static_assert(__builtin_constant_p(__bpf_assert_signed(LHS)), "internal static assert"); \
107 _Static_assert(__builtin_constant_p((RHS)), "2nd argument must be a constant expression")
108
109#define __bpf_assert(LHS, op, cons, RHS, VAL) \
110 ({ \
111 (void)bpf_throw; \
112 asm volatile ("if %[lhs] " op " %[rhs] goto +2; r1 = %[value]; call bpf_throw" \
113 : : [lhs] "r"(LHS), [rhs] cons(RHS), [value] "ri"(VAL) : ); \
114 })
115
116#define __bpf_assert_op_sign(LHS, op, cons, RHS, VAL, supp_sign) \
117 ({ \
118 __bpf_assert_check(LHS, op, RHS); \
119 if (__bpf_assert_signed(LHS) && !(supp_sign)) \
120 __bpf_assert(LHS, "s" #op, cons, RHS, VAL); \
121 else \
122 __bpf_assert(LHS, #op, cons, RHS, VAL); \
123 })
124
125#define __bpf_assert_op(LHS, op, RHS, VAL, supp_sign) \
126 ({ \
127 if (sizeof(typeof(RHS)) == 8) { \
128 const typeof(RHS) rhs_var = (RHS); \
129 __bpf_assert_op_sign(LHS, op, "r", rhs_var, VAL, supp_sign); \
130 } else { \
131 __bpf_assert_op_sign(LHS, op, "i", RHS, VAL, supp_sign); \
132 } \
133 })
134
135#define __cmp_cannot_be_signed(x) \
136 __builtin_strcmp(#x, "==") == 0 || __builtin_strcmp(#x, "!=") == 0 || \
137 __builtin_strcmp(#x, "&") == 0
138
139#define __is_signed_type(type) (((type)(-1)) < (type)1)
140
141#define __bpf_cmp(LHS, OP, PRED, RHS, DEFAULT) \
142 ({ \
143 __label__ l_true; \
144 bool ret = DEFAULT; \
145 asm volatile goto("if %[lhs] " OP " %[rhs] goto %l[l_true]" \
146 :: [lhs] "r"((short)LHS), [rhs] PRED (RHS) :: l_true); \
147 ret = !DEFAULT; \
148l_true: \
149 ret; \
150 })
151
152/* C type conversions coupled with comparison operator are tricky.
153 * Make sure BPF program is compiled with -Wsign-compare then
154 * __lhs OP __rhs below will catch the mistake.
155 * Be aware that we check only __lhs to figure out the sign of compare.
156 */
157#define _bpf_cmp(LHS, OP, RHS, UNLIKELY) \
158 ({ \
159 typeof(LHS) __lhs = (LHS); \
160 typeof(RHS) __rhs = (RHS); \
161 bool ret; \
162 _Static_assert(sizeof(&(LHS)), "1st argument must be an lvalue expression"); \
163 (void)(__lhs OP __rhs); \
164 if (__cmp_cannot_be_signed(OP) || !__is_signed_type(typeof(__lhs))) { \
165 if (sizeof(__rhs) == 8) \
166 /* "i" will truncate 64-bit constant into s32, \
167 * so we have to use extra register via "r". \
168 */ \
169 ret = __bpf_cmp(__lhs, #OP, "r", __rhs, UNLIKELY); \
170 else \
171 ret = __bpf_cmp(__lhs, #OP, "ri", __rhs, UNLIKELY); \
172 } else { \
173 if (sizeof(__rhs) == 8) \
174 ret = __bpf_cmp(__lhs, "s"#OP, "r", __rhs, UNLIKELY); \
175 else \
176 ret = __bpf_cmp(__lhs, "s"#OP, "ri", __rhs, UNLIKELY); \
177 } \
178 ret; \
179 })
180
181#ifndef bpf_cmp_unlikely
182#define bpf_cmp_unlikely(LHS, OP, RHS) _bpf_cmp(LHS, OP, RHS, true)
183#endif
184
185#ifndef bpf_cmp_likely
186#define bpf_cmp_likely(LHS, OP, RHS) \
187 ({ \
188 bool ret = 0; \
189 if (__builtin_strcmp(#OP, "==") == 0) \
190 ret = _bpf_cmp(LHS, !=, RHS, false); \
191 else if (__builtin_strcmp(#OP, "!=") == 0) \
192 ret = _bpf_cmp(LHS, ==, RHS, false); \
193 else if (__builtin_strcmp(#OP, "<=") == 0) \
194 ret = _bpf_cmp(LHS, >, RHS, false); \
195 else if (__builtin_strcmp(#OP, "<") == 0) \
196 ret = _bpf_cmp(LHS, >=, RHS, false); \
197 else if (__builtin_strcmp(#OP, ">") == 0) \
198 ret = _bpf_cmp(LHS, <=, RHS, false); \
199 else if (__builtin_strcmp(#OP, ">=") == 0) \
200 ret = _bpf_cmp(LHS, <, RHS, false); \
201 else \
202 asm volatile("r0 " #OP " invalid compare"); \
203 ret; \
204 })
205#endif
206
207/*
208 * Note that cond_break can only be portably used in the body of a breakable
209 * construct, whereas can_loop can be used anywhere.
210 */
211#ifdef __BPF_FEATURE_MAY_GOTO
212#define can_loop \
213 ({ __label__ l_break, l_continue; \
214 bool ret = true; \
215 asm volatile goto("may_goto %l[l_break]" \
216 :::: l_break); \
217 goto l_continue; \
218 l_break: ret = false; \
219 l_continue:; \
220 ret; \
221 })
222
223#define __cond_break(expr) \
224 ({ __label__ l_break, l_continue; \
225 asm volatile goto("may_goto %l[l_break]" \
226 :::: l_break); \
227 goto l_continue; \
228 l_break: expr; \
229 l_continue:; \
230 })
231#else
232#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
233#define can_loop \
234 ({ __label__ l_break, l_continue; \
235 bool ret = true; \
236 asm volatile goto("1:.byte 0xe5; \
237 .byte 0; \
238 .long ((%l[l_break] - 1b - 8) / 8) & 0xffff; \
239 .short 0" \
240 :::: l_break); \
241 goto l_continue; \
242 l_break: ret = false; \
243 l_continue:; \
244 ret; \
245 })
246
247#define __cond_break(expr) \
248 ({ __label__ l_break, l_continue; \
249 asm volatile goto("1:.byte 0xe5; \
250 .byte 0; \
251 .long ((%l[l_break] - 1b - 8) / 8) & 0xffff; \
252 .short 0" \
253 :::: l_break); \
254 goto l_continue; \
255 l_break: expr; \
256 l_continue:; \
257 })
258#else
259#define can_loop \
260 ({ __label__ l_break, l_continue; \
261 bool ret = true; \
262 asm volatile goto("1:.byte 0xe5; \
263 .byte 0; \
264 .long (((%l[l_break] - 1b - 8) / 8) & 0xffff) << 16; \
265 .short 0" \
266 :::: l_break); \
267 goto l_continue; \
268 l_break: ret = false; \
269 l_continue:; \
270 ret; \
271 })
272
273#define __cond_break(expr) \
274 ({ __label__ l_break, l_continue; \
275 asm volatile goto("1:.byte 0xe5; \
276 .byte 0; \
277 .long (((%l[l_break] - 1b - 8) / 8) & 0xffff) << 16; \
278 .short 0" \
279 :::: l_break); \
280 goto l_continue; \
281 l_break: expr; \
282 l_continue:; \
283 })
284#endif
285#endif
286
287#define cond_break __cond_break(break)
288#define cond_break_label(label) __cond_break(goto label)
289
290#ifndef bpf_nop_mov
291#define bpf_nop_mov(var) \
292 asm volatile("%[reg]=%[reg]"::[reg]"r"((short)var))
293#endif
294
295/* emit instruction:
296 * rX = rX .off = BPF_ADDR_SPACE_CAST .imm32 = (dst_as << 16) | src_as
297 */
298#ifndef bpf_addr_space_cast
299#define bpf_addr_space_cast(var, dst_as, src_as)\
300 asm volatile(".byte 0xBF; \
301 .ifc %[reg], r0; \
302 .byte 0x00; \
303 .endif; \
304 .ifc %[reg], r1; \
305 .byte 0x11; \
306 .endif; \
307 .ifc %[reg], r2; \
308 .byte 0x22; \
309 .endif; \
310 .ifc %[reg], r3; \
311 .byte 0x33; \
312 .endif; \
313 .ifc %[reg], r4; \
314 .byte 0x44; \
315 .endif; \
316 .ifc %[reg], r5; \
317 .byte 0x55; \
318 .endif; \
319 .ifc %[reg], r6; \
320 .byte 0x66; \
321 .endif; \
322 .ifc %[reg], r7; \
323 .byte 0x77; \
324 .endif; \
325 .ifc %[reg], r8; \
326 .byte 0x88; \
327 .endif; \
328 .ifc %[reg], r9; \
329 .byte 0x99; \
330 .endif; \
331 .short %[off]; \
332 .long %[as]" \
333 : [reg]"+r"(var) \
334 : [off]"i"(BPF_ADDR_SPACE_CAST) \
335 , [as]"i"((dst_as << 16) | src_as));
336#endif
337
338void bpf_preempt_disable(void) __weak __ksym;
339void bpf_preempt_enable(void) __weak __ksym;
340
341typedef struct {
342} __bpf_preempt_t;
343
344static inline __bpf_preempt_t __bpf_preempt_constructor(void)
345{
346 __bpf_preempt_t ret = {};
347
348 bpf_preempt_disable();
349 return ret;
350}
351static inline void __bpf_preempt_destructor(__bpf_preempt_t *t)
352{
353 bpf_preempt_enable();
354}
355#define bpf_guard_preempt() \
356 __bpf_preempt_t ___bpf_apply(preempt, __COUNTER__) \
357 __attribute__((__unused__, __cleanup__(__bpf_preempt_destructor))) = \
358 __bpf_preempt_constructor()
359
360/* Description
361 * Assert that a conditional expression is true.
362 * Returns
363 * Void.
364 * Throws
365 * An exception with the value zero when the assertion fails.
366 */
367#define bpf_assert(cond) if (!(cond)) bpf_throw(0);
368
369/* Description
370 * Assert that a conditional expression is true.
371 * Returns
372 * Void.
373 * Throws
374 * An exception with the specified value when the assertion fails.
375 */
376#define bpf_assert_with(cond, value) if (!(cond)) bpf_throw(value);
377
378/* Description
379 * Assert that LHS is in the range [BEG, END] (inclusive of both). This
380 * statement updates the known bounds of LHS during verification. Note
381 * that both BEG and END must be constant values, and must fit within the
382 * data type of LHS.
383 * Returns
384 * Void.
385 * Throws
386 * An exception with the value zero when the assertion fails.
387 */
388#define bpf_assert_range(LHS, BEG, END) \
389 ({ \
390 _Static_assert(BEG <= END, "BEG must be <= END"); \
391 barrier_var(LHS); \
392 __bpf_assert_op(LHS, >=, BEG, 0, false); \
393 __bpf_assert_op(LHS, <=, END, 0, false); \
394 })
395
396/* Description
397 * Assert that LHS is in the range [BEG, END] (inclusive of both). This
398 * statement updates the known bounds of LHS during verification. Note
399 * that both BEG and END must be constant values, and must fit within the
400 * data type of LHS.
401 * Returns
402 * Void.
403 * Throws
404 * An exception with the specified value when the assertion fails.
405 */
406#define bpf_assert_range_with(LHS, BEG, END, value) \
407 ({ \
408 _Static_assert(BEG <= END, "BEG must be <= END"); \
409 barrier_var(LHS); \
410 __bpf_assert_op(LHS, >=, BEG, value, false); \
411 __bpf_assert_op(LHS, <=, END, value, false); \
412 })
413
414struct bpf_iter_css_task;
415struct cgroup_subsys_state;
416extern int bpf_iter_css_task_new(struct bpf_iter_css_task *it,
417 struct cgroup_subsys_state *css, unsigned int flags) __weak __ksym;
418extern struct task_struct *bpf_iter_css_task_next(struct bpf_iter_css_task *it) __weak __ksym;
419extern void bpf_iter_css_task_destroy(struct bpf_iter_css_task *it) __weak __ksym;
420
421struct bpf_iter_task;
422extern int bpf_iter_task_new(struct bpf_iter_task *it,
423 struct task_struct *task, unsigned int flags) __weak __ksym;
424extern struct task_struct *bpf_iter_task_next(struct bpf_iter_task *it) __weak __ksym;
425extern void bpf_iter_task_destroy(struct bpf_iter_task *it) __weak __ksym;
426
427struct bpf_iter_css;
428extern int bpf_iter_css_new(struct bpf_iter_css *it,
429 struct cgroup_subsys_state *start, unsigned int flags) __weak __ksym;
430extern struct cgroup_subsys_state *bpf_iter_css_next(struct bpf_iter_css *it) __weak __ksym;
431extern void bpf_iter_css_destroy(struct bpf_iter_css *it) __weak __ksym;
432
433extern int bpf_wq_init(struct bpf_wq *wq, void *p__map, unsigned int flags) __weak __ksym;
434extern int bpf_wq_start(struct bpf_wq *wq, unsigned int flags) __weak __ksym;
435
436struct bpf_iter_kmem_cache;
437extern int bpf_iter_kmem_cache_new(struct bpf_iter_kmem_cache *it) __weak __ksym;
438extern struct kmem_cache *bpf_iter_kmem_cache_next(struct bpf_iter_kmem_cache *it) __weak __ksym;
439extern void bpf_iter_kmem_cache_destroy(struct bpf_iter_kmem_cache *it) __weak __ksym;
440
441struct bpf_iter_dmabuf;
442extern int bpf_iter_dmabuf_new(struct bpf_iter_dmabuf *it) __weak __ksym;
443extern struct dma_buf *bpf_iter_dmabuf_next(struct bpf_iter_dmabuf *it) __weak __ksym;
444extern void bpf_iter_dmabuf_destroy(struct bpf_iter_dmabuf *it) __weak __ksym;
445
446extern int bpf_cgroup_read_xattr(struct cgroup *cgroup, const char *name__str,
447 struct bpf_dynptr *value_p) __weak __ksym;
448
449#define PREEMPT_BITS 8
450#define SOFTIRQ_BITS 8
451#define HARDIRQ_BITS 4
452#define NMI_BITS 4
453
454#define PREEMPT_SHIFT 0
455#define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS)
456#define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS)
457#define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS)
458
459#define __IRQ_MASK(x) ((1UL << (x))-1)
460
461#define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)
462#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
463#define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT)
464
465#define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT)
466
467extern bool CONFIG_PREEMPT_RT __kconfig __weak;
468#ifdef bpf_target_x86
469extern const int __preempt_count __ksym __weak;
470
471struct pcpu_hot___local {
472 int preempt_count;
473} __attribute__((preserve_access_index));
474
475extern struct pcpu_hot___local pcpu_hot __ksym __weak;
476#endif
477
478struct task_struct___preempt_rt {
479 int softirq_disable_cnt;
480} __attribute__((preserve_access_index));
481
482#ifdef bpf_target_s390
483extern struct lowcore *bpf_get_lowcore(void) __weak __ksym;
484#endif
485
486static inline int get_preempt_count(void)
487{
488#if defined(bpf_target_x86)
489 /* By default, read the per-CPU __preempt_count. */
490 if (bpf_ksym_exists(&__preempt_count))
491 return *(int *) bpf_this_cpu_ptr(&__preempt_count);
492
493 /*
494 * If __preempt_count does not exist, try to read preempt_count under
495 * struct pcpu_hot. Between v6.1 and v6.14 -- more specifically,
496 * [64701838bf057, 46e8fff6d45fe), preempt_count had been managed
497 * under struct pcpu_hot.
498 */
499 if (bpf_core_field_exists(pcpu_hot.preempt_count))
500 return ((struct pcpu_hot___local *)
501 bpf_this_cpu_ptr(&pcpu_hot))->preempt_count;
502#elif defined(bpf_target_arm64)
503 return bpf_get_current_task_btf()->thread_info.preempt.count;
504#elif defined(bpf_target_powerpc)
505 return bpf_get_current_task_btf()->thread_info.preempt_count;
506#elif defined(bpf_target_s390)
507 return bpf_get_lowcore()->preempt_count;
508#endif
509 return 0;
510}
511
512/* Description
513 * Report whether it is in interrupt context. Only works on the following archs:
514 * * x86
515 * * arm64
516 * * powerpc64
517 * * s390x
518 */
519static inline int bpf_in_interrupt(void)
520{
521 struct task_struct___preempt_rt *tsk;
522 int pcnt;
523
524 pcnt = get_preempt_count();
525 if (!CONFIG_PREEMPT_RT)
526 return pcnt & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_MASK);
527
528 tsk = (void *) bpf_get_current_task_btf();
529 return (pcnt & (NMI_MASK | HARDIRQ_MASK)) |
530 (tsk->softirq_disable_cnt & SOFTIRQ_MASK);
531}
532
533/* Description
534 * Report whether it is in NMI context. Only works on the following archs:
535 * * x86
536 * * arm64
537 * * powerpc64
538 * * s390x
539 */
540static inline int bpf_in_nmi(void)
541{
542 return get_preempt_count() & NMI_MASK;
543}
544
545/* Description
546 * Report whether it is in hard IRQ context. Only works on the following archs:
547 * * x86
548 * * arm64
549 * * powerpc64
550 * * s390x
551 */
552static inline int bpf_in_hardirq(void)
553{
554 return get_preempt_count() & HARDIRQ_MASK;
555}
556
557/* Description
558 * Report whether it is in softirq context. Only works on the following archs:
559 * * x86
560 * * arm64
561 * * powerpc64
562 * * s390x
563 */
564static inline int bpf_in_serving_softirq(void)
565{
566 struct task_struct___preempt_rt *tsk;
567 int pcnt;
568
569 pcnt = get_preempt_count();
570 if (!CONFIG_PREEMPT_RT)
571 return (pcnt & SOFTIRQ_MASK) & SOFTIRQ_OFFSET;
572
573 tsk = (void *) bpf_get_current_task_btf();
574 return (tsk->softirq_disable_cnt & SOFTIRQ_MASK) & SOFTIRQ_OFFSET;
575}
576
577/* Description
578 * Report whether it is in task context. Only works on the following archs:
579 * * x86
580 * * arm64
581 * * powerpc64
582 * * s390x
583 */
584static inline int bpf_in_task(void)
585{
586 struct task_struct___preempt_rt *tsk;
587 int pcnt;
588
589 pcnt = get_preempt_count();
590 if (!CONFIG_PREEMPT_RT)
591 return !(pcnt & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET));
592
593 tsk = (void *) bpf_get_current_task_btf();
594 return !((pcnt & (NMI_MASK | HARDIRQ_MASK)) |
595 ((tsk->softirq_disable_cnt & SOFTIRQ_MASK) & SOFTIRQ_OFFSET));
596}
597#endif