Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

bpf: Implement accurate raw_tp context access via BTF

libbpf analyzes bpf C program, searches in-kernel BTF for given type name
and stores it into expected_attach_type.
The kernel verifier expects this btf_id to point to something like:
typedef void (*btf_trace_kfree_skb)(void *, struct sk_buff *skb, void *loc);
which represents signature of raw_tracepoint "kfree_skb".

Then btf_ctx_access() matches ctx+0 access in bpf program with 'skb'
and 'ctx+8' access with 'loc' arguments of "kfree_skb" tracepoint.
In first case it passes btf_id of 'struct sk_buff *' back to the verifier core
and 'void *' in second case.

Then the verifier tracks PTR_TO_BTF_ID as any other pointer type.
Like PTR_TO_SOCKET points to 'struct bpf_sock',
PTR_TO_TCP_SOCK points to 'struct bpf_tcp_sock', and so on.
PTR_TO_BTF_ID points to in-kernel structs.
If 1234 is btf_id of 'struct sk_buff' in vmlinux's BTF
then PTR_TO_BTF_ID#1234 points to one of in kernel skbs.

When PTR_TO_BTF_ID#1234 is dereferenced (like r2 = *(u64 *)r1 + 32)
the btf_struct_access() checks which field of 'struct sk_buff' is
at offset 32. Checks that size of access matches type definition
of the field and continues to track the dereferenced type.
If that field was a pointer to 'struct net_device' the r2's type
will be PTR_TO_BTF_ID#456. Where 456 is btf_id of 'struct net_device'
in vmlinux's BTF.

Such verifier analysis prevents "cheating" in BPF C program.
The program cannot cast arbitrary pointer to 'struct sk_buff *'
and access it. C compiler would allow type cast, of course,
but the verifier will notice type mismatch based on BPF assembly
and in-kernel BTF.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20191016032505.2089704-7-ast@kernel.org

authored by

Alexei Starovoitov and committed by
Daniel Borkmann
9e15db66 f75a697e

+296 -5
+16 -1
include/linux/bpf.h
··· 16 16 #include <linux/u64_stats_sync.h> 17 17 18 18 struct bpf_verifier_env; 19 + struct bpf_verifier_log; 19 20 struct perf_event; 20 21 struct bpf_prog; 21 22 struct bpf_map; ··· 282 281 PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */ 283 282 PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */ 284 283 PTR_TO_XDP_SOCK, /* reg points to struct xdp_sock */ 284 + PTR_TO_BTF_ID, /* reg points to kernel struct */ 285 285 }; 286 286 287 287 /* The information passed from prog-specific *_is_valid_access ··· 290 288 */ 291 289 struct bpf_insn_access_aux { 292 290 enum bpf_reg_type reg_type; 293 - int ctx_field_size; 291 + union { 292 + int ctx_field_size; 293 + u32 btf_id; 294 + }; 295 + struct bpf_verifier_log *log; /* for verbose logs */ 294 296 }; 295 297 296 298 static inline void ··· 489 483 490 484 bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); 491 485 int bpf_prog_calc_tag(struct bpf_prog *fp); 486 + const char *kernel_type_name(u32 btf_type_id); 492 487 493 488 const struct bpf_func_proto *bpf_get_trace_printk_proto(void); 494 489 ··· 755 748 int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, 756 749 const union bpf_attr *kattr, 757 750 union bpf_attr __user *uattr); 751 + bool btf_ctx_access(int off, int size, enum bpf_access_type type, 752 + const struct bpf_prog *prog, 753 + struct bpf_insn_access_aux *info); 754 + int btf_struct_access(struct bpf_verifier_log *log, 755 + const struct btf_type *t, int off, int size, 756 + enum bpf_access_type atype, 757 + u32 *next_btf_id); 758 + 758 759 #else /* !CONFIG_BPF_SYSCALL */ 759 760 static inline struct bpf_prog *bpf_prog_get(u32 ufd) 760 761 {
+4
include/linux/bpf_verifier.h
··· 52 52 */ 53 53 struct bpf_map *map_ptr; 54 54 55 + u32 btf_id; /* for PTR_TO_BTF_ID */ 56 + 55 57 /* Max size from any of the above. */ 56 58 unsigned long raw; 57 59 }; ··· 401 399 const char *fmt, va_list args); 402 400 __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env, 403 401 const char *fmt, ...); 402 + __printf(2, 3) void bpf_log(struct bpf_verifier_log *log, 403 + const char *fmt, ...); 404 404 405 405 static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env) 406 406 {
+190
kernel/bpf/btf.c
··· 3436 3436 return ERR_PTR(err); 3437 3437 } 3438 3438 3439 + extern struct btf *btf_vmlinux; 3440 + 3441 + bool btf_ctx_access(int off, int size, enum bpf_access_type type, 3442 + const struct bpf_prog *prog, 3443 + struct bpf_insn_access_aux *info) 3444 + { 3445 + struct bpf_verifier_log *log = info->log; 3446 + u32 btf_id = prog->aux->attach_btf_id; 3447 + const struct btf_param *args; 3448 + const struct btf_type *t; 3449 + const char prefix[] = "btf_trace_"; 3450 + const char *tname; 3451 + u32 nr_args, arg; 3452 + 3453 + if (!btf_id) 3454 + return true; 3455 + 3456 + if (IS_ERR(btf_vmlinux)) { 3457 + bpf_log(log, "btf_vmlinux is malformed\n"); 3458 + return false; 3459 + } 3460 + 3461 + t = btf_type_by_id(btf_vmlinux, btf_id); 3462 + if (!t || BTF_INFO_KIND(t->info) != BTF_KIND_TYPEDEF) { 3463 + bpf_log(log, "btf_id is invalid\n"); 3464 + return false; 3465 + } 3466 + 3467 + tname = __btf_name_by_offset(btf_vmlinux, t->name_off); 3468 + if (strncmp(prefix, tname, sizeof(prefix) - 1)) { 3469 + bpf_log(log, "btf_id points to wrong type name %s\n", tname); 3470 + return false; 3471 + } 3472 + tname += sizeof(prefix) - 1; 3473 + 3474 + t = btf_type_by_id(btf_vmlinux, t->type); 3475 + if (!btf_type_is_ptr(t)) 3476 + return false; 3477 + t = btf_type_by_id(btf_vmlinux, t->type); 3478 + if (!btf_type_is_func_proto(t)) 3479 + return false; 3480 + 3481 + if (off % 8) { 3482 + bpf_log(log, "raw_tp '%s' offset %d is not multiple of 8\n", 3483 + tname, off); 3484 + return false; 3485 + } 3486 + arg = off / 8; 3487 + args = (const struct btf_param *)(t + 1); 3488 + /* skip first 'void *__data' argument in btf_trace_##name typedef */ 3489 + args++; 3490 + nr_args = btf_type_vlen(t) - 1; 3491 + if (arg >= nr_args) { 3492 + bpf_log(log, "raw_tp '%s' doesn't have %d-th argument\n", 3493 + tname, arg); 3494 + return false; 3495 + } 3496 + 3497 + t = btf_type_by_id(btf_vmlinux, args[arg].type); 3498 + /* skip modifiers */ 3499 + while (btf_type_is_modifier(t)) 3500 + t = btf_type_by_id(btf_vmlinux, t->type); 3501 + if (btf_type_is_int(t)) 3502 + /* accessing a scalar */ 3503 + return true; 3504 + if (!btf_type_is_ptr(t)) { 3505 + bpf_log(log, 3506 + "raw_tp '%s' arg%d '%s' has type %s. Only pointer access is allowed\n", 3507 + tname, arg, 3508 + __btf_name_by_offset(btf_vmlinux, t->name_off), 3509 + btf_kind_str[BTF_INFO_KIND(t->info)]); 3510 + return false; 3511 + } 3512 + if (t->type == 0) 3513 + /* This is a pointer to void. 3514 + * It is the same as scalar from the verifier safety pov. 3515 + * No further pointer walking is allowed. 3516 + */ 3517 + return true; 3518 + 3519 + /* this is a pointer to another type */ 3520 + info->reg_type = PTR_TO_BTF_ID; 3521 + info->btf_id = t->type; 3522 + 3523 + t = btf_type_by_id(btf_vmlinux, t->type); 3524 + /* skip modifiers */ 3525 + while (btf_type_is_modifier(t)) 3526 + t = btf_type_by_id(btf_vmlinux, t->type); 3527 + if (!btf_type_is_struct(t)) { 3528 + bpf_log(log, 3529 + "raw_tp '%s' arg%d type %s is not a struct\n", 3530 + tname, arg, btf_kind_str[BTF_INFO_KIND(t->info)]); 3531 + return false; 3532 + } 3533 + bpf_log(log, "raw_tp '%s' arg%d has btf_id %d type %s '%s'\n", 3534 + tname, arg, info->btf_id, btf_kind_str[BTF_INFO_KIND(t->info)], 3535 + __btf_name_by_offset(btf_vmlinux, t->name_off)); 3536 + return true; 3537 + } 3538 + 3539 + int btf_struct_access(struct bpf_verifier_log *log, 3540 + const struct btf_type *t, int off, int size, 3541 + enum bpf_access_type atype, 3542 + u32 *next_btf_id) 3543 + { 3544 + const struct btf_member *member; 3545 + const struct btf_type *mtype; 3546 + const char *tname, *mname; 3547 + int i, moff = 0, msize; 3548 + 3549 + again: 3550 + tname = __btf_name_by_offset(btf_vmlinux, t->name_off); 3551 + if (!btf_type_is_struct(t)) { 3552 + bpf_log(log, "Type '%s' is not a struct", tname); 3553 + return -EINVAL; 3554 + } 3555 + 3556 + for_each_member(i, t, member) { 3557 + /* offset of the field in bits */ 3558 + moff = btf_member_bit_offset(t, member); 3559 + 3560 + if (btf_member_bitfield_size(t, member)) 3561 + /* bitfields are not supported yet */ 3562 + continue; 3563 + 3564 + if (off + size <= moff / 8) 3565 + /* won't find anything, field is already too far */ 3566 + break; 3567 + 3568 + /* type of the field */ 3569 + mtype = btf_type_by_id(btf_vmlinux, member->type); 3570 + mname = __btf_name_by_offset(btf_vmlinux, member->name_off); 3571 + 3572 + /* skip modifiers */ 3573 + while (btf_type_is_modifier(mtype)) 3574 + mtype = btf_type_by_id(btf_vmlinux, mtype->type); 3575 + 3576 + if (btf_type_is_array(mtype)) 3577 + /* array deref is not supported yet */ 3578 + continue; 3579 + 3580 + if (!btf_type_has_size(mtype) && !btf_type_is_ptr(mtype)) { 3581 + bpf_log(log, "field %s doesn't have size\n", mname); 3582 + return -EFAULT; 3583 + } 3584 + if (btf_type_is_ptr(mtype)) 3585 + msize = 8; 3586 + else 3587 + msize = mtype->size; 3588 + if (off >= moff / 8 + msize) 3589 + /* no overlap with member, keep iterating */ 3590 + continue; 3591 + /* the 'off' we're looking for is either equal to start 3592 + * of this field or inside of this struct 3593 + */ 3594 + if (btf_type_is_struct(mtype)) { 3595 + /* our field must be inside that union or struct */ 3596 + t = mtype; 3597 + 3598 + /* adjust offset we're looking for */ 3599 + off -= moff / 8; 3600 + goto again; 3601 + } 3602 + if (msize != size) { 3603 + /* field access size doesn't match */ 3604 + bpf_log(log, 3605 + "cannot access %d bytes in struct %s field %s that has size %d\n", 3606 + size, tname, mname, msize); 3607 + return -EACCES; 3608 + } 3609 + 3610 + if (btf_type_is_ptr(mtype)) { 3611 + const struct btf_type *stype; 3612 + 3613 + stype = btf_type_by_id(btf_vmlinux, mtype->type); 3614 + /* skip modifiers */ 3615 + while (btf_type_is_modifier(stype)) 3616 + stype = btf_type_by_id(btf_vmlinux, stype->type); 3617 + if (btf_type_is_struct(stype)) { 3618 + *next_btf_id = mtype->type; 3619 + return PTR_TO_BTF_ID; 3620 + } 3621 + } 3622 + /* all other fields are treated as scalars */ 3623 + return SCALAR_VALUE; 3624 + } 3625 + bpf_log(log, "struct %s doesn't have field at offset %d\n", tname, off); 3626 + return -EINVAL; 3627 + } 3628 + 3439 3629 void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj, 3440 3630 struct seq_file *m) 3441 3631 {
+85 -3
kernel/bpf/verifier.c
··· 286 286 va_end(args); 287 287 } 288 288 289 + __printf(2, 3) void bpf_log(struct bpf_verifier_log *log, 290 + const char *fmt, ...) 291 + { 292 + va_list args; 293 + 294 + if (!bpf_verifier_log_needed(log)) 295 + return; 296 + 297 + va_start(args, fmt); 298 + bpf_verifier_vlog(log, fmt, args); 299 + va_end(args); 300 + } 301 + 289 302 static const char *ltrim(const char *s) 290 303 { 291 304 while (isspace(*s)) ··· 419 406 [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null", 420 407 [PTR_TO_TP_BUFFER] = "tp_buffer", 421 408 [PTR_TO_XDP_SOCK] = "xdp_sock", 409 + [PTR_TO_BTF_ID] = "ptr_", 422 410 }; 423 411 424 412 static char slot_type_char[] = { ··· 450 436 return cur->frame[reg->frameno]; 451 437 } 452 438 439 + const char *kernel_type_name(u32 id) 440 + { 441 + return btf_name_by_offset(btf_vmlinux, 442 + btf_type_by_id(btf_vmlinux, id)->name_off); 443 + } 444 + 453 445 static void print_verifier_state(struct bpf_verifier_env *env, 454 446 const struct bpf_func_state *state) 455 447 { ··· 480 460 /* reg->off should be 0 for SCALAR_VALUE */ 481 461 verbose(env, "%lld", reg->var_off.value + reg->off); 482 462 } else { 463 + if (t == PTR_TO_BTF_ID) 464 + verbose(env, "%s", kernel_type_name(reg->btf_id)); 483 465 verbose(env, "(id=%d", reg->id); 484 466 if (reg_type_may_be_refcounted_or_null(t)) 485 467 verbose(env, ",ref_obj_id=%d", reg->ref_obj_id); ··· 2359 2337 2360 2338 /* check access to 'struct bpf_context' fields. Supports fixed offsets only */ 2361 2339 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size, 2362 - enum bpf_access_type t, enum bpf_reg_type *reg_type) 2340 + enum bpf_access_type t, enum bpf_reg_type *reg_type, 2341 + u32 *btf_id) 2363 2342 { 2364 2343 struct bpf_insn_access_aux info = { 2365 2344 .reg_type = *reg_type, 2345 + .log = &env->log, 2366 2346 }; 2367 2347 2368 2348 if (env->ops->is_valid_access && ··· 2378 2354 */ 2379 2355 *reg_type = info.reg_type; 2380 2356 2381 - env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size; 2357 + if (*reg_type == PTR_TO_BTF_ID) 2358 + *btf_id = info.btf_id; 2359 + else 2360 + env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size; 2382 2361 /* remember the offset of last byte accessed in ctx */ 2383 2362 if (env->prog->aux->max_ctx_offset < off + size) 2384 2363 env->prog->aux->max_ctx_offset = off + size; ··· 2807 2780 return 0; 2808 2781 } 2809 2782 2783 + static int check_ptr_to_btf_access(struct bpf_verifier_env *env, 2784 + struct bpf_reg_state *regs, 2785 + int regno, int off, int size, 2786 + enum bpf_access_type atype, 2787 + int value_regno) 2788 + { 2789 + struct bpf_reg_state *reg = regs + regno; 2790 + const struct btf_type *t = btf_type_by_id(btf_vmlinux, reg->btf_id); 2791 + const char *tname = btf_name_by_offset(btf_vmlinux, t->name_off); 2792 + u32 btf_id; 2793 + int ret; 2794 + 2795 + if (atype != BPF_READ) { 2796 + verbose(env, "only read is supported\n"); 2797 + return -EACCES; 2798 + } 2799 + 2800 + if (off < 0) { 2801 + verbose(env, 2802 + "R%d is ptr_%s invalid negative access: off=%d\n", 2803 + regno, tname, off); 2804 + return -EACCES; 2805 + } 2806 + if (!tnum_is_const(reg->var_off) || reg->var_off.value) { 2807 + char tn_buf[48]; 2808 + 2809 + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 2810 + verbose(env, 2811 + "R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n", 2812 + regno, tname, off, tn_buf); 2813 + return -EACCES; 2814 + } 2815 + 2816 + ret = btf_struct_access(&env->log, t, off, size, atype, &btf_id); 2817 + if (ret < 0) 2818 + return ret; 2819 + 2820 + if (ret == SCALAR_VALUE) { 2821 + mark_reg_unknown(env, regs, value_regno); 2822 + return 0; 2823 + } 2824 + mark_reg_known_zero(env, regs, value_regno); 2825 + regs[value_regno].type = PTR_TO_BTF_ID; 2826 + regs[value_regno].btf_id = btf_id; 2827 + return 0; 2828 + } 2829 + 2810 2830 /* check whether memory at (regno + off) is accessible for t = (read | write) 2811 2831 * if t==write, value_regno is a register which value is stored into memory 2812 2832 * if t==read, value_regno is a register which will receive the value from memory ··· 2914 2840 } 2915 2841 } else if (reg->type == PTR_TO_CTX) { 2916 2842 enum bpf_reg_type reg_type = SCALAR_VALUE; 2843 + u32 btf_id = 0; 2917 2844 2918 2845 if (t == BPF_WRITE && value_regno >= 0 && 2919 2846 is_pointer_value(env, value_regno)) { ··· 2926 2851 if (err < 0) 2927 2852 return err; 2928 2853 2929 - err = check_ctx_access(env, insn_idx, off, size, t, &reg_type); 2854 + err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf_id); 2855 + if (err) 2856 + verbose_linfo(env, insn_idx, "; "); 2930 2857 if (!err && t == BPF_READ && value_regno >= 0) { 2931 2858 /* ctx access returns either a scalar, or a 2932 2859 * PTR_TO_PACKET[_META,_END]. In the latter ··· 2947 2870 * a sub-register. 2948 2871 */ 2949 2872 regs[value_regno].subreg_def = DEF_NOT_SUBREG; 2873 + if (reg_type == PTR_TO_BTF_ID) 2874 + regs[value_regno].btf_id = btf_id; 2950 2875 } 2951 2876 regs[value_regno].type = reg_type; 2952 2877 } ··· 3008 2929 err = check_tp_buffer_access(env, reg, regno, off, size); 3009 2930 if (!err && t == BPF_READ && value_regno >= 0) 3010 2931 mark_reg_unknown(env, regs, value_regno); 2932 + } else if (reg->type == PTR_TO_BTF_ID) { 2933 + err = check_ptr_to_btf_access(env, regs, regno, off, size, t, 2934 + value_regno); 3011 2935 } else { 3012 2936 verbose(env, "R%d invalid mem access '%s'\n", regno, 3013 2937 reg_type_str[reg->type]);
+1 -1
kernel/trace/bpf_trace.c
··· 1074 1074 return false; 1075 1075 if (off % size != 0) 1076 1076 return false; 1077 - return true; 1077 + return btf_ctx_access(off, size, type, prog, info); 1078 1078 } 1079 1079 1080 1080 const struct bpf_verifier_ops raw_tracepoint_verifier_ops = {