Merge https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

+16 -7

Documentation/networking/filter.rst

··· 320 320 ret #-1 321 321 drop: ret #0 322 322 323 - **(Accelerated) VLAN w/ id 10**:: 324 - 325 - ld vlan_tci 326 - jneq #10, drop 327 - ret #-1 328 - drop: ret #0 329 - 330 323 **icmp random packet sampling, 1 in 4**:: 331 324 332 325 ldh [12] ··· 350 357 jeq #35, good /* __NR_nanosleep */ 351 358 bad: ret #0 /* SECCOMP_RET_KILL_THREAD */ 352 359 good: ret #0x7fff0000 /* SECCOMP_RET_ALLOW */ 360 + 361 + Examples for low-level BPF extension: 362 + 363 + **Packet for interface index 13**:: 364 + 365 + ld ifidx 366 + jneq #13, drop 367 + ret #-1 368 + drop: ret #0 369 + 370 + **(Accelerated) VLAN w/ id 10**:: 371 + 372 + ld vlan_tci 373 + jneq #10, drop 374 + ret #-1 375 + drop: ret #0 353 376 354 377 The above example code can be placed into a file (here called "foo"), and 355 378 then be passed to the bpf_asm tool for generating opcodes, output that xt_bpf

-54

include/linux/bpf-cgroup.h

··· 27 27 extern struct static_key_false cgroup_bpf_enabled_key[MAX_BPF_ATTACH_TYPE]; 28 28 #define cgroup_bpf_enabled(type) static_branch_unlikely(&cgroup_bpf_enabled_key[type]) 29 29 30 - #define BPF_CGROUP_STORAGE_NEST_MAX 8 31 - 32 - struct bpf_cgroup_storage_info { 33 - struct task_struct *task; 34 - struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]; 35 - }; 36 - 37 - /* For each cpu, permit maximum BPF_CGROUP_STORAGE_NEST_MAX number of tasks 38 - * to use bpf cgroup storage simultaneously. 39 - */ 40 - DECLARE_PER_CPU(struct bpf_cgroup_storage_info, 41 - bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]); 42 - 43 30 #define for_each_cgroup_storage_type(stype) \ 44 31 for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++) 45 32 ··· 157 170 return BPF_CGROUP_STORAGE_PERCPU; 158 171 159 172 return BPF_CGROUP_STORAGE_SHARED; 160 - } 161 - 162 - static inline int bpf_cgroup_storage_set(struct bpf_cgroup_storage 163 - *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) 164 - { 165 - enum bpf_cgroup_storage_type stype; 166 - int i, err = 0; 167 - 168 - preempt_disable(); 169 - for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) { 170 - if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != NULL)) 171 - continue; 172 - 173 - this_cpu_write(bpf_cgroup_storage_info[i].task, current); 174 - for_each_cgroup_storage_type(stype) 175 - this_cpu_write(bpf_cgroup_storage_info[i].storage[stype], 176 - storage[stype]); 177 - goto out; 178 - } 179 - err = -EBUSY; 180 - WARN_ON_ONCE(1); 181 - 182 - out: 183 - preempt_enable(); 184 - return err; 185 - } 186 - 187 - static inline void bpf_cgroup_storage_unset(void) 188 - { 189 - int i; 190 - 191 - for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) { 192 - if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current)) 193 - continue; 194 - 195 - this_cpu_write(bpf_cgroup_storage_info[i].task, NULL); 196 - return; 197 - } 198 173 } 199 174 200 175 struct bpf_cgroup_storage * ··· 436 487 return -EINVAL; 437 488 } 438 489 439 - static inline int bpf_cgroup_storage_set( 440 - struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) { return 0; } 441 - static inline void bpf_cgroup_storage_unset(void) {} 442 490 static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, 443 491 struct bpf_map *map) { return 0; } 444 492 static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(

+42 -20

include/linux/bpf.h

··· 1142 1142 struct bpf_prog *include_prog, 1143 1143 struct bpf_prog_array **new_array); 1144 1144 1145 + struct bpf_run_ctx {}; 1146 + 1147 + struct bpf_cg_run_ctx { 1148 + struct bpf_run_ctx run_ctx; 1149 + struct bpf_prog_array_item *prog_item; 1150 + }; 1151 + 1145 1152 /* BPF program asks to bypass CAP_NET_BIND_SERVICE in bind. */ 1146 1153 #define BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE (1 << 0) 1147 1154 /* BPF program asks to set CN on the packet. */ 1148 1155 #define BPF_RET_SET_CN (1 << 0) 1149 1156 1150 - /* For BPF_PROG_RUN_ARRAY_FLAGS and __BPF_PROG_RUN_ARRAY, 1151 - * if bpf_cgroup_storage_set() failed, the rest of programs 1152 - * will not execute. This should be a really rare scenario 1153 - * as it requires BPF_CGROUP_STORAGE_NEST_MAX number of 1154 - * preemptions all between bpf_cgroup_storage_set() and 1155 - * bpf_cgroup_storage_unset() on the same cpu. 1156 - */ 1157 1157 #define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags) \ 1158 1158 ({ \ 1159 1159 struct bpf_prog_array_item *_item; \ 1160 1160 struct bpf_prog *_prog; \ 1161 1161 struct bpf_prog_array *_array; \ 1162 + struct bpf_run_ctx *old_run_ctx; \ 1163 + struct bpf_cg_run_ctx run_ctx; \ 1162 1164 u32 _ret = 1; \ 1163 1165 u32 func_ret; \ 1164 1166 migrate_disable(); \ 1165 1167 rcu_read_lock(); \ 1166 1168 _array = rcu_dereference(array); \ 1167 1169 _item = &_array->items[0]; \ 1170 + old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); \ 1168 1171 while ((_prog = READ_ONCE(_item->prog))) { \ 1169 - if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage))) \ 1170 - break; \ 1172 + run_ctx.prog_item = _item; \ 1171 1173 func_ret = func(_prog, ctx); \ 1172 1174 _ret &= (func_ret & 1); \ 1173 - *(ret_flags) |= (func_ret >> 1); \ 1174 - bpf_cgroup_storage_unset(); \ 1175 + *(ret_flags) |= (func_ret >> 1); \ 1175 1176 _item++; \ 1176 1177 } \ 1178 + bpf_reset_run_ctx(old_run_ctx); \ 1177 1179 rcu_read_unlock(); \ 1178 1180 migrate_enable(); \ 1179 1181 _ret; \ ··· 1186 1184 struct bpf_prog_array_item *_item; \ 1187 1185 struct bpf_prog *_prog; \ 1188 1186 struct bpf_prog_array *_array; \ 1187 + struct bpf_run_ctx *old_run_ctx; \ 1188 + struct bpf_cg_run_ctx run_ctx; \ 1189 1189 u32 _ret = 1; \ 1190 1190 migrate_disable(); \ 1191 1191 rcu_read_lock(); \ ··· 1195 1191 if (unlikely(check_non_null && !_array))\ 1196 1192 goto _out; \ 1197 1193 _item = &_array->items[0]; \ 1198 - while ((_prog = READ_ONCE(_item->prog))) { \ 1199 - if (!set_cg_storage) { \ 1200 - _ret &= func(_prog, ctx); \ 1201 - } else { \ 1202 - if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage))) \ 1203 - break; \ 1204 - _ret &= func(_prog, ctx); \ 1205 - bpf_cgroup_storage_unset(); \ 1206 - } \ 1194 + old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);\ 1195 + while ((_prog = READ_ONCE(_item->prog))) { \ 1196 + run_ctx.prog_item = _item; \ 1197 + _ret &= func(_prog, ctx); \ 1207 1198 _item++; \ 1208 1199 } \ 1200 + bpf_reset_run_ctx(old_run_ctx); \ 1209 1201 _out: \ 1210 1202 rcu_read_unlock(); \ 1211 1203 migrate_enable(); \ ··· 1282 1282 else 1283 1283 __this_cpu_dec(bpf_prog_active); 1284 1284 migrate_enable(); 1285 + } 1286 + 1287 + static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx) 1288 + { 1289 + struct bpf_run_ctx *old_ctx; 1290 + 1291 + old_ctx = current->bpf_ctx; 1292 + current->bpf_ctx = new_ctx; 1293 + return old_ctx; 1294 + } 1295 + 1296 + static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx) 1297 + { 1298 + current->bpf_ctx = old_ctx; 1285 1299 } 1286 1300 1287 1301 extern const struct file_operations bpf_map_fops; ··· 1442 1428 struct seq_file *seq); 1443 1429 typedef int (*bpf_iter_fill_link_info_t)(const struct bpf_iter_aux_info *aux, 1444 1430 struct bpf_link_info *info); 1431 + typedef const struct bpf_func_proto * 1432 + (*bpf_iter_get_func_proto_t)(enum bpf_func_id func_id, 1433 + const struct bpf_prog *prog); 1445 1434 1446 1435 enum bpf_iter_feature { 1447 1436 BPF_ITER_RESCHED = BIT(0), ··· 1457 1440 bpf_iter_detach_target_t detach_target; 1458 1441 bpf_iter_show_fdinfo_t show_fdinfo; 1459 1442 bpf_iter_fill_link_info_t fill_link_info; 1443 + bpf_iter_get_func_proto_t get_func_proto; 1460 1444 u32 ctx_arg_info_size; 1461 1445 u32 feature; 1462 1446 struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX]; ··· 1480 1462 int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info); 1481 1463 void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info); 1482 1464 bool bpf_iter_prog_supported(struct bpf_prog *prog); 1465 + const struct bpf_func_proto * 1466 + bpf_iter_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog); 1483 1467 int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr, struct bpf_prog *prog); 1484 1468 int bpf_iter_new_fd(struct bpf_link *link); 1485 1469 bool bpf_link_is_iter(struct bpf_link *link); ··· 2056 2036 extern const struct bpf_func_proto bpf_task_storage_delete_proto; 2057 2037 extern const struct bpf_func_proto bpf_for_each_map_elem_proto; 2058 2038 extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto; 2039 + extern const struct bpf_func_proto bpf_sk_setsockopt_proto; 2040 + extern const struct bpf_func_proto bpf_sk_getsockopt_proto; 2059 2041 2060 2042 const struct bpf_func_proto *bpf_tracing_func_proto( 2061 2043 enum bpf_func_id func_id, const struct bpf_prog *prog);

+3

include/linux/sched.h

··· 42 42 struct bio_list; 43 43 struct blk_plug; 44 44 struct bpf_local_storage; 45 + struct bpf_run_ctx; 45 46 struct capture_control; 46 47 struct cfs_rq; 47 48 struct fs_struct; ··· 1380 1379 #ifdef CONFIG_BPF_SYSCALL 1381 1380 /* Used by BPF task local storage */ 1382 1381 struct bpf_local_storage __rcu *bpf_storage; 1382 + /* Used for BPF run context */ 1383 + struct bpf_run_ctx *bpf_ctx; 1383 1384 #endif 1384 1385 1385 1386 #ifdef CONFIG_GCC_PLUGIN_STACKLEAK

+6

include/net/inet_hashtables.h

··· 160 160 ____cacheline_aligned_in_smp; 161 161 }; 162 162 163 + #define inet_lhash2_for_each_icsk_continue(__icsk) \ 164 + hlist_for_each_entry_continue(__icsk, icsk_listen_portaddr_node) 165 + 166 + #define inet_lhash2_for_each_icsk(__icsk, list) \ 167 + hlist_for_each_entry(__icsk, list, icsk_listen_portaddr_node) 168 + 163 169 #define inet_lhash2_for_each_icsk_rcu(__icsk, list) \ 164 170 hlist_for_each_entry_rcu(__icsk, list, icsk_listen_portaddr_node) 165 171

-1

include/net/tcp.h

··· 1958 1958 struct seq_net_private p; 1959 1959 enum tcp_seq_states state; 1960 1960 struct sock *syn_wait_sk; 1961 - struct tcp_seq_afinfo *bpf_seq_afinfo; 1962 1961 int bucket, offset, sbucket, num; 1963 1962 loff_t last_pos; 1964 1963 };

+22

kernel/bpf/bpf_iter.c

··· 360 360 return supported; 361 361 } 362 362 363 + const struct bpf_func_proto * 364 + bpf_iter_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 365 + { 366 + const struct bpf_iter_target_info *tinfo; 367 + const struct bpf_func_proto *fn = NULL; 368 + 369 + mutex_lock(&targets_mutex); 370 + list_for_each_entry(tinfo, &targets, list) { 371 + if (tinfo->btf_id == prog->aux->attach_btf_id) { 372 + const struct bpf_iter_reg *reg_info; 373 + 374 + reg_info = tinfo->reg_info; 375 + if (reg_info->get_func_proto) 376 + fn = reg_info->get_func_proto(func_id, prog); 377 + break; 378 + } 379 + } 380 + mutex_unlock(&targets_mutex); 381 + 382 + return fn; 383 + } 384 + 363 385 static void bpf_iter_link_release(struct bpf_link *link) 364 386 { 365 387 struct bpf_iter_link *iter_link =

+5

kernel/bpf/btf.c

··· 4825 4825 const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i]; 4826 4826 4827 4827 if (ctx_arg_info->offset == off) { 4828 + if (!ctx_arg_info->btf_id) { 4829 + bpf_log(log,"invalid btf_id for context argument offset %u\n", off); 4830 + return false; 4831 + } 4832 + 4828 4833 info->reg_type = ctx_arg_info->reg_type; 4829 4834 info->btf = btf_vmlinux; 4830 4835 info->btf_id = ctx_arg_info->btf_id;

+5 -11

kernel/bpf/helpers.c

··· 393 393 }; 394 394 395 395 #ifdef CONFIG_CGROUP_BPF 396 - DECLARE_PER_CPU(struct bpf_cgroup_storage_info, 397 - bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]); 398 396 399 397 BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags) 400 398 { ··· 401 403 * verifier checks that its value is correct. 402 404 */ 403 405 enum bpf_cgroup_storage_type stype = cgroup_storage_type(map); 404 - struct bpf_cgroup_storage *storage = NULL; 406 + struct bpf_cgroup_storage *storage; 407 + struct bpf_cg_run_ctx *ctx; 405 408 void *ptr; 406 - int i; 407 409 408 - for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) { 409 - if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current)) 410 - continue; 411 - 412 - storage = this_cpu_read(bpf_cgroup_storage_info[i].storage[stype]); 413 - break; 414 - } 410 + /* get current cgroup storage from BPF run context */ 411 + ctx = container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx); 412 + storage = ctx->prog_item->cgroup_storage[stype]; 415 413 416 414 if (stype == BPF_CGROUP_STORAGE_SHARED) 417 415 ptr = &READ_ONCE(storage->buf)->data[0];

+11 -5

kernel/bpf/local_storage.c

··· 1 1 //SPDX-License-Identifier: GPL-2.0 2 2 #include <linux/bpf-cgroup.h> 3 3 #include <linux/bpf.h> 4 + #include <linux/bpf_local_storage.h> 4 5 #include <linux/btf.h> 5 6 #include <linux/bug.h> 6 7 #include <linux/filter.h> ··· 11 10 #include <uapi/linux/btf.h> 12 11 13 12 #ifdef CONFIG_CGROUP_BPF 14 - 15 - DEFINE_PER_CPU(struct bpf_cgroup_storage_info, 16 - bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]); 17 13 18 14 #include "../cgroup/cgroup-internal.h" 19 15 ··· 284 286 285 287 static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr) 286 288 { 289 + __u32 max_value_size = BPF_LOCAL_STORAGE_MAX_VALUE_SIZE; 287 290 int numa_node = bpf_map_attr_numa_node(attr); 288 291 struct bpf_cgroup_storage_map *map; 292 + 293 + /* percpu is bound by PCPU_MIN_UNIT_SIZE, non-percu 294 + * is the same as other local storages. 295 + */ 296 + if (attr->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) 297 + max_value_size = min_t(__u32, max_value_size, 298 + PCPU_MIN_UNIT_SIZE); 289 299 290 300 if (attr->key_size != sizeof(struct bpf_cgroup_storage_key) && 291 301 attr->key_size != sizeof(__u64)) ··· 302 296 if (attr->value_size == 0) 303 297 return ERR_PTR(-EINVAL); 304 298 305 - if (attr->value_size > PAGE_SIZE) 299 + if (attr->value_size > max_value_size) 306 300 return ERR_PTR(-E2BIG); 307 301 308 302 if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK || ··· 415 409 static void cgroup_storage_seq_show_elem(struct bpf_map *map, void *key, 416 410 struct seq_file *m) 417 411 { 418 - enum bpf_cgroup_storage_type stype = cgroup_storage_type(map); 412 + enum bpf_cgroup_storage_type stype; 419 413 struct bpf_cgroup_storage *storage; 420 414 int cpu; 421 415

+1

kernel/fork.c

··· 2083 2083 #endif 2084 2084 #ifdef CONFIG_BPF_SYSCALL 2085 2085 RCU_INIT_POINTER(p->bpf_storage, NULL); 2086 + p->bpf_ctx = NULL; 2086 2087 #endif 2087 2088 2088 2089 /* Perform scheduler related setup. Assign this task to a CPU. */

+7 -2

kernel/trace/bpf_trace.c

··· 965 965 { 966 966 struct kprobe *kp = kprobe_running(); 967 967 968 - return kp ? (u64) kp->addr : 0; 968 + return kp ? (uintptr_t)kp->addr : 0; 969 969 } 970 970 971 971 static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe = { ··· 1461 1461 const struct bpf_func_proto * 1462 1462 tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 1463 1463 { 1464 + const struct bpf_func_proto *fn; 1465 + 1464 1466 switch (func_id) { 1465 1467 #ifdef CONFIG_NET 1466 1468 case BPF_FUNC_skb_output: ··· 1503 1501 case BPF_FUNC_d_path: 1504 1502 return &bpf_d_path_proto; 1505 1503 default: 1506 - return raw_tp_prog_func_proto(func_id, prog); 1504 + fn = raw_tp_prog_func_proto(func_id, prog); 1505 + if (!fn && prog->expected_attach_type == BPF_TRACE_ITER) 1506 + fn = bpf_iter_get_func_proto(func_id, prog); 1507 + return fn; 1507 1508 } 1508 1509 } 1509 1510

+10 -3

lib/test_bpf.c

··· 4286 4286 .u.insns_int = { 4287 4287 BPF_LD_IMM64(R0, 0), 4288 4288 BPF_LD_IMM64(R1, 0xffffffffffffffffLL), 4289 - BPF_STX_MEM(BPF_W, R10, R1, -40), 4290 - BPF_LDX_MEM(BPF_W, R0, R10, -40), 4289 + BPF_STX_MEM(BPF_DW, R10, R1, -40), 4290 + BPF_LDX_MEM(BPF_DW, R0, R10, -40), 4291 4291 BPF_EXIT_INSN(), 4292 4292 }, 4293 4293 INTERNAL, ··· 6659 6659 u64 duration; 6660 6660 u32 ret; 6661 6661 6662 - if (test->test[i].data_size == 0 && 6662 + /* 6663 + * NOTE: Several sub-tests may be present, in which case 6664 + * a zero {data_size, result} tuple indicates the end of 6665 + * the sub-test array. The first test is always run, 6666 + * even if both data_size and result happen to be zero. 6667 + */ 6668 + if (i > 0 && 6669 + test->test[i].data_size == 0 && 6663 6670 test->test[i].result == 0) 6664 6671 break; 6665 6672

+11 -12

net/bpf/test_run.c

··· 88 88 static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, 89 89 u32 *retval, u32 *time, bool xdp) 90 90 { 91 - struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { NULL }; 91 + struct bpf_prog_array_item item = {.prog = prog}; 92 + struct bpf_run_ctx *old_ctx; 93 + struct bpf_cg_run_ctx run_ctx; 92 94 struct bpf_test_timer t = { NO_MIGRATE }; 93 95 enum bpf_cgroup_storage_type stype; 94 96 int ret; 95 97 96 98 for_each_cgroup_storage_type(stype) { 97 - storage[stype] = bpf_cgroup_storage_alloc(prog, stype); 98 - if (IS_ERR(storage[stype])) { 99 - storage[stype] = NULL; 99 + item.cgroup_storage[stype] = bpf_cgroup_storage_alloc(prog, stype); 100 + if (IS_ERR(item.cgroup_storage[stype])) { 101 + item.cgroup_storage[stype] = NULL; 100 102 for_each_cgroup_storage_type(stype) 101 - bpf_cgroup_storage_free(storage[stype]); 103 + bpf_cgroup_storage_free(item.cgroup_storage[stype]); 102 104 return -ENOMEM; 103 105 } 104 106 } ··· 109 107 repeat = 1; 110 108 111 109 bpf_test_timer_enter(&t); 110 + old_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); 112 111 do { 113 - ret = bpf_cgroup_storage_set(storage); 114 - if (ret) 115 - break; 116 - 112 + run_ctx.prog_item = &item; 117 113 if (xdp) 118 114 *retval = bpf_prog_run_xdp(prog, ctx); 119 115 else 120 116 *retval = BPF_PROG_RUN(prog, ctx); 121 - 122 - bpf_cgroup_storage_unset(); 123 117 } while (bpf_test_timer_continue(&t, repeat, &ret, time)); 118 + bpf_reset_run_ctx(old_ctx); 124 119 bpf_test_timer_leave(&t); 125 120 126 121 for_each_cgroup_storage_type(stype) 127 - bpf_cgroup_storage_free(storage[stype]); 122 + bpf_cgroup_storage_free(item.cgroup_storage[stype]); 128 123 129 124 return ret; 130 125 }

+34

net/core/filter.c

··· 5016 5016 return -EINVAL; 5017 5017 } 5018 5018 5019 + BPF_CALL_5(bpf_sk_setsockopt, struct sock *, sk, int, level, 5020 + int, optname, char *, optval, int, optlen) 5021 + { 5022 + return _bpf_setsockopt(sk, level, optname, optval, optlen); 5023 + } 5024 + 5025 + const struct bpf_func_proto bpf_sk_setsockopt_proto = { 5026 + .func = bpf_sk_setsockopt, 5027 + .gpl_only = false, 5028 + .ret_type = RET_INTEGER, 5029 + .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, 5030 + .arg2_type = ARG_ANYTHING, 5031 + .arg3_type = ARG_ANYTHING, 5032 + .arg4_type = ARG_PTR_TO_MEM, 5033 + .arg5_type = ARG_CONST_SIZE, 5034 + }; 5035 + 5036 + BPF_CALL_5(bpf_sk_getsockopt, struct sock *, sk, int, level, 5037 + int, optname, char *, optval, int, optlen) 5038 + { 5039 + return _bpf_getsockopt(sk, level, optname, optval, optlen); 5040 + } 5041 + 5042 + const struct bpf_func_proto bpf_sk_getsockopt_proto = { 5043 + .func = bpf_sk_getsockopt, 5044 + .gpl_only = false, 5045 + .ret_type = RET_INTEGER, 5046 + .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, 5047 + .arg2_type = ARG_ANYTHING, 5048 + .arg3_type = ARG_ANYTHING, 5049 + .arg4_type = ARG_PTR_TO_UNINIT_MEM, 5050 + .arg5_type = ARG_CONST_SIZE, 5051 + }; 5052 + 5019 5053 BPF_CALL_5(bpf_sock_addr_setsockopt, struct bpf_sock_addr_kern *, ctx, 5020 5054 int, level, int, optname, char *, optval, int, optlen) 5021 5055 {

+324 -86

net/ipv4/tcp_ipv4.c

··· 2277 2277 #ifdef CONFIG_PROC_FS 2278 2278 /* Proc filesystem TCP sock list dumping. */ 2279 2279 2280 - /* 2281 - * Get next listener socket follow cur. If cur is NULL, get first socket 2282 - * starting from bucket given in st->bucket; when st->bucket is zero the 2283 - * very first socket in the hash table is returned. 2280 + static unsigned short seq_file_family(const struct seq_file *seq); 2281 + 2282 + static bool seq_sk_match(struct seq_file *seq, const struct sock *sk) 2283 + { 2284 + unsigned short family = seq_file_family(seq); 2285 + 2286 + /* AF_UNSPEC is used as a match all */ 2287 + return ((family == AF_UNSPEC || family == sk->sk_family) && 2288 + net_eq(sock_net(sk), seq_file_net(seq))); 2289 + } 2290 + 2291 + /* Find a non empty bucket (starting from st->bucket) 2292 + * and return the first sk from it. 2293 + */ 2294 + static void *listening_get_first(struct seq_file *seq) 2295 + { 2296 + struct tcp_iter_state *st = seq->private; 2297 + 2298 + st->offset = 0; 2299 + for (; st->bucket <= tcp_hashinfo.lhash2_mask; st->bucket++) { 2300 + struct inet_listen_hashbucket *ilb2; 2301 + struct inet_connection_sock *icsk; 2302 + struct sock *sk; 2303 + 2304 + ilb2 = &tcp_hashinfo.lhash2[st->bucket]; 2305 + if (hlist_empty(&ilb2->head)) 2306 + continue; 2307 + 2308 + spin_lock(&ilb2->lock); 2309 + inet_lhash2_for_each_icsk(icsk, &ilb2->head) { 2310 + sk = (struct sock *)icsk; 2311 + if (seq_sk_match(seq, sk)) 2312 + return sk; 2313 + } 2314 + spin_unlock(&ilb2->lock); 2315 + } 2316 + 2317 + return NULL; 2318 + } 2319 + 2320 + /* Find the next sk of "cur" within the same bucket (i.e. st->bucket). 2321 + * If "cur" is the last one in the st->bucket, 2322 + * call listening_get_first() to return the first sk of the next 2323 + * non empty bucket. 2284 2324 */ 2285 2325 static void *listening_get_next(struct seq_file *seq, void *cur) 2286 2326 { 2287 - struct tcp_seq_afinfo *afinfo; 2288 2327 struct tcp_iter_state *st = seq->private; 2289 - struct net *net = seq_file_net(seq); 2290 - struct inet_listen_hashbucket *ilb; 2291 - struct hlist_nulls_node *node; 2328 + struct inet_listen_hashbucket *ilb2; 2329 + struct inet_connection_sock *icsk; 2292 2330 struct sock *sk = cur; 2293 2331 2294 - if (st->bpf_seq_afinfo) 2295 - afinfo = st->bpf_seq_afinfo; 2296 - else 2297 - afinfo = PDE_DATA(file_inode(seq->file)); 2298 - 2299 - if (!sk) { 2300 - get_head: 2301 - ilb = &tcp_hashinfo.listening_hash[st->bucket]; 2302 - spin_lock(&ilb->lock); 2303 - sk = sk_nulls_head(&ilb->nulls_head); 2304 - st->offset = 0; 2305 - goto get_sk; 2306 - } 2307 - ilb = &tcp_hashinfo.listening_hash[st->bucket]; 2308 2332 ++st->num; 2309 2333 ++st->offset; 2310 2334 2311 - sk = sk_nulls_next(sk); 2312 - get_sk: 2313 - sk_nulls_for_each_from(sk, node) { 2314 - if (!net_eq(sock_net(sk), net)) 2315 - continue; 2316 - if (afinfo->family == AF_UNSPEC || 2317 - sk->sk_family == afinfo->family) 2335 + icsk = inet_csk(sk); 2336 + inet_lhash2_for_each_icsk_continue(icsk) { 2337 + sk = (struct sock *)icsk; 2338 + if (seq_sk_match(seq, sk)) 2318 2339 return sk; 2319 2340 } 2320 - spin_unlock(&ilb->lock); 2321 - st->offset = 0; 2322 - if (++st->bucket < INET_LHTABLE_SIZE) 2323 - goto get_head; 2324 - return NULL; 2341 + 2342 + ilb2 = &tcp_hashinfo.lhash2[st->bucket]; 2343 + spin_unlock(&ilb2->lock); 2344 + ++st->bucket; 2345 + return listening_get_first(seq); 2325 2346 } 2326 2347 2327 2348 static void *listening_get_idx(struct seq_file *seq, loff_t *pos) ··· 2352 2331 2353 2332 st->bucket = 0; 2354 2333 st->offset = 0; 2355 - rc = listening_get_next(seq, NULL); 2334 + rc = listening_get_first(seq); 2356 2335 2357 2336 while (rc && *pos) { 2358 2337 rc = listening_get_next(seq, rc); ··· 2372 2351 */ 2373 2352 static void *established_get_first(struct seq_file *seq) 2374 2353 { 2375 - struct tcp_seq_afinfo *afinfo; 2376 2354 struct tcp_iter_state *st = seq->private; 2377 - struct net *net = seq_file_net(seq); 2378 - void *rc = NULL; 2379 - 2380 - if (st->bpf_seq_afinfo) 2381 - afinfo = st->bpf_seq_afinfo; 2382 - else 2383 - afinfo = PDE_DATA(file_inode(seq->file)); 2384 2355 2385 2356 st->offset = 0; 2386 2357 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) { ··· 2386 2373 2387 2374 spin_lock_bh(lock); 2388 2375 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { 2389 - if ((afinfo->family != AF_UNSPEC && 2390 - sk->sk_family != afinfo->family) || 2391 - !net_eq(sock_net(sk), net)) { 2392 - continue; 2393 - } 2394 - rc = sk; 2395 - goto out; 2376 + if (seq_sk_match(seq, sk)) 2377 + return sk; 2396 2378 } 2397 2379 spin_unlock_bh(lock); 2398 2380 } 2399 - out: 2400 - return rc; 2381 + 2382 + return NULL; 2401 2383 } 2402 2384 2403 2385 static void *established_get_next(struct seq_file *seq, void *cur) 2404 2386 { 2405 - struct tcp_seq_afinfo *afinfo; 2406 2387 struct sock *sk = cur; 2407 2388 struct hlist_nulls_node *node; 2408 2389 struct tcp_iter_state *st = seq->private; 2409 - struct net *net = seq_file_net(seq); 2410 - 2411 - if (st->bpf_seq_afinfo) 2412 - afinfo = st->bpf_seq_afinfo; 2413 - else 2414 - afinfo = PDE_DATA(file_inode(seq->file)); 2415 2390 2416 2391 ++st->num; 2417 2392 ++st->offset; ··· 2407 2406 sk = sk_nulls_next(sk); 2408 2407 2409 2408 sk_nulls_for_each_from(sk, node) { 2410 - if ((afinfo->family == AF_UNSPEC || 2411 - sk->sk_family == afinfo->family) && 2412 - net_eq(sock_net(sk), net)) 2409 + if (seq_sk_match(seq, sk)) 2413 2410 return sk; 2414 2411 } 2415 2412 ··· 2450 2451 static void *tcp_seek_last_pos(struct seq_file *seq) 2451 2452 { 2452 2453 struct tcp_iter_state *st = seq->private; 2454 + int bucket = st->bucket; 2453 2455 int offset = st->offset; 2454 2456 int orig_num = st->num; 2455 2457 void *rc = NULL; 2456 2458 2457 2459 switch (st->state) { 2458 2460 case TCP_SEQ_STATE_LISTENING: 2459 - if (st->bucket >= INET_LHTABLE_SIZE) 2461 + if (st->bucket > tcp_hashinfo.lhash2_mask) 2460 2462 break; 2461 2463 st->state = TCP_SEQ_STATE_LISTENING; 2462 - rc = listening_get_next(seq, NULL); 2463 - while (offset-- && rc) 2464 + rc = listening_get_first(seq); 2465 + while (offset-- && rc && bucket == st->bucket) 2464 2466 rc = listening_get_next(seq, rc); 2465 2467 if (rc) 2466 2468 break; ··· 2472 2472 if (st->bucket > tcp_hashinfo.ehash_mask) 2473 2473 break; 2474 2474 rc = established_get_first(seq); 2475 - while (offset-- && rc) 2475 + while (offset-- && rc && bucket == st->bucket) 2476 2476 rc = established_get_next(seq, rc); 2477 2477 } 2478 2478 ··· 2542 2542 switch (st->state) { 2543 2543 case TCP_SEQ_STATE_LISTENING: 2544 2544 if (v != SEQ_START_TOKEN) 2545 - spin_unlock(&tcp_hashinfo.listening_hash[st->bucket].lock); 2545 + spin_unlock(&tcp_hashinfo.lhash2[st->bucket].lock); 2546 2546 break; 2547 2547 case TCP_SEQ_STATE_ESTABLISHED: 2548 2548 if (v) ··· 2687 2687 } 2688 2688 2689 2689 #ifdef CONFIG_BPF_SYSCALL 2690 + struct bpf_tcp_iter_state { 2691 + struct tcp_iter_state state; 2692 + unsigned int cur_sk; 2693 + unsigned int end_sk; 2694 + unsigned int max_sk; 2695 + struct sock **batch; 2696 + bool st_bucket_done; 2697 + }; 2698 + 2690 2699 struct bpf_iter__tcp { 2691 2700 __bpf_md_ptr(struct bpf_iter_meta *, meta); 2692 2701 __bpf_md_ptr(struct sock_common *, sk_common); ··· 2714 2705 return bpf_iter_run_prog(prog, &ctx); 2715 2706 } 2716 2707 2708 + static void bpf_iter_tcp_put_batch(struct bpf_tcp_iter_state *iter) 2709 + { 2710 + while (iter->cur_sk < iter->end_sk) 2711 + sock_put(iter->batch[iter->cur_sk++]); 2712 + } 2713 + 2714 + static int bpf_iter_tcp_realloc_batch(struct bpf_tcp_iter_state *iter, 2715 + unsigned int new_batch_sz) 2716 + { 2717 + struct sock **new_batch; 2718 + 2719 + new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz, 2720 + GFP_USER | __GFP_NOWARN); 2721 + if (!new_batch) 2722 + return -ENOMEM; 2723 + 2724 + bpf_iter_tcp_put_batch(iter); 2725 + kvfree(iter->batch); 2726 + iter->batch = new_batch; 2727 + iter->max_sk = new_batch_sz; 2728 + 2729 + return 0; 2730 + } 2731 + 2732 + static unsigned int bpf_iter_tcp_listening_batch(struct seq_file *seq, 2733 + struct sock *start_sk) 2734 + { 2735 + struct bpf_tcp_iter_state *iter = seq->private; 2736 + struct tcp_iter_state *st = &iter->state; 2737 + struct inet_connection_sock *icsk; 2738 + unsigned int expected = 1; 2739 + struct sock *sk; 2740 + 2741 + sock_hold(start_sk); 2742 + iter->batch[iter->end_sk++] = start_sk; 2743 + 2744 + icsk = inet_csk(start_sk); 2745 + inet_lhash2_for_each_icsk_continue(icsk) { 2746 + sk = (struct sock *)icsk; 2747 + if (seq_sk_match(seq, sk)) { 2748 + if (iter->end_sk < iter->max_sk) { 2749 + sock_hold(sk); 2750 + iter->batch[iter->end_sk++] = sk; 2751 + } 2752 + expected++; 2753 + } 2754 + } 2755 + spin_unlock(&tcp_hashinfo.lhash2[st->bucket].lock); 2756 + 2757 + return expected; 2758 + } 2759 + 2760 + static unsigned int bpf_iter_tcp_established_batch(struct seq_file *seq, 2761 + struct sock *start_sk) 2762 + { 2763 + struct bpf_tcp_iter_state *iter = seq->private; 2764 + struct tcp_iter_state *st = &iter->state; 2765 + struct hlist_nulls_node *node; 2766 + unsigned int expected = 1; 2767 + struct sock *sk; 2768 + 2769 + sock_hold(start_sk); 2770 + iter->batch[iter->end_sk++] = start_sk; 2771 + 2772 + sk = sk_nulls_next(start_sk); 2773 + sk_nulls_for_each_from(sk, node) { 2774 + if (seq_sk_match(seq, sk)) { 2775 + if (iter->end_sk < iter->max_sk) { 2776 + sock_hold(sk); 2777 + iter->batch[iter->end_sk++] = sk; 2778 + } 2779 + expected++; 2780 + } 2781 + } 2782 + spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); 2783 + 2784 + return expected; 2785 + } 2786 + 2787 + static struct sock *bpf_iter_tcp_batch(struct seq_file *seq) 2788 + { 2789 + struct bpf_tcp_iter_state *iter = seq->private; 2790 + struct tcp_iter_state *st = &iter->state; 2791 + unsigned int expected; 2792 + bool resized = false; 2793 + struct sock *sk; 2794 + 2795 + /* The st->bucket is done. Directly advance to the next 2796 + * bucket instead of having the tcp_seek_last_pos() to skip 2797 + * one by one in the current bucket and eventually find out 2798 + * it has to advance to the next bucket. 2799 + */ 2800 + if (iter->st_bucket_done) { 2801 + st->offset = 0; 2802 + st->bucket++; 2803 + if (st->state == TCP_SEQ_STATE_LISTENING && 2804 + st->bucket > tcp_hashinfo.lhash2_mask) { 2805 + st->state = TCP_SEQ_STATE_ESTABLISHED; 2806 + st->bucket = 0; 2807 + } 2808 + } 2809 + 2810 + again: 2811 + /* Get a new batch */ 2812 + iter->cur_sk = 0; 2813 + iter->end_sk = 0; 2814 + iter->st_bucket_done = false; 2815 + 2816 + sk = tcp_seek_last_pos(seq); 2817 + if (!sk) 2818 + return NULL; /* Done */ 2819 + 2820 + if (st->state == TCP_SEQ_STATE_LISTENING) 2821 + expected = bpf_iter_tcp_listening_batch(seq, sk); 2822 + else 2823 + expected = bpf_iter_tcp_established_batch(seq, sk); 2824 + 2825 + if (iter->end_sk == expected) { 2826 + iter->st_bucket_done = true; 2827 + return sk; 2828 + } 2829 + 2830 + if (!resized && !bpf_iter_tcp_realloc_batch(iter, expected * 3 / 2)) { 2831 + resized = true; 2832 + goto again; 2833 + } 2834 + 2835 + return sk; 2836 + } 2837 + 2838 + static void *bpf_iter_tcp_seq_start(struct seq_file *seq, loff_t *pos) 2839 + { 2840 + /* bpf iter does not support lseek, so it always 2841 + * continue from where it was stop()-ped. 2842 + */ 2843 + if (*pos) 2844 + return bpf_iter_tcp_batch(seq); 2845 + 2846 + return SEQ_START_TOKEN; 2847 + } 2848 + 2849 + static void *bpf_iter_tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2850 + { 2851 + struct bpf_tcp_iter_state *iter = seq->private; 2852 + struct tcp_iter_state *st = &iter->state; 2853 + struct sock *sk; 2854 + 2855 + /* Whenever seq_next() is called, the iter->cur_sk is 2856 + * done with seq_show(), so advance to the next sk in 2857 + * the batch. 2858 + */ 2859 + if (iter->cur_sk < iter->end_sk) { 2860 + /* Keeping st->num consistent in tcp_iter_state. 2861 + * bpf_iter_tcp does not use st->num. 2862 + * meta.seq_num is used instead. 2863 + */ 2864 + st->num++; 2865 + /* Move st->offset to the next sk in the bucket such that 2866 + * the future start() will resume at st->offset in 2867 + * st->bucket. See tcp_seek_last_pos(). 2868 + */ 2869 + st->offset++; 2870 + sock_put(iter->batch[iter->cur_sk++]); 2871 + } 2872 + 2873 + if (iter->cur_sk < iter->end_sk) 2874 + sk = iter->batch[iter->cur_sk]; 2875 + else 2876 + sk = bpf_iter_tcp_batch(seq); 2877 + 2878 + ++*pos; 2879 + /* Keeping st->last_pos consistent in tcp_iter_state. 2880 + * bpf iter does not do lseek, so st->last_pos always equals to *pos. 2881 + */ 2882 + st->last_pos = *pos; 2883 + return sk; 2884 + } 2885 + 2717 2886 static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v) 2718 2887 { 2719 2888 struct bpf_iter_meta meta; 2720 2889 struct bpf_prog *prog; 2721 2890 struct sock *sk = v; 2891 + bool slow; 2722 2892 uid_t uid; 2893 + int ret; 2723 2894 2724 2895 if (v == SEQ_START_TOKEN) 2725 2896 return 0; 2897 + 2898 + if (sk_fullsock(sk)) 2899 + slow = lock_sock_fast(sk); 2900 + 2901 + if (unlikely(sk_unhashed(sk))) { 2902 + ret = SEQ_SKIP; 2903 + goto unlock; 2904 + } 2726 2905 2727 2906 if (sk->sk_state == TCP_TIME_WAIT) { 2728 2907 uid = 0; ··· 2925 2728 2926 2729 meta.seq = seq; 2927 2730 prog = bpf_iter_get_info(&meta, false); 2928 - return tcp_prog_seq_show(prog, &meta, v, uid); 2731 + ret = tcp_prog_seq_show(prog, &meta, v, uid); 2732 + 2733 + unlock: 2734 + if (sk_fullsock(sk)) 2735 + unlock_sock_fast(sk, slow); 2736 + return ret; 2737 + 2929 2738 } 2930 2739 2931 2740 static void bpf_iter_tcp_seq_stop(struct seq_file *seq, void *v) 2932 2741 { 2742 + struct bpf_tcp_iter_state *iter = seq->private; 2933 2743 struct bpf_iter_meta meta; 2934 2744 struct bpf_prog *prog; 2935 2745 ··· 2947 2743 (void)tcp_prog_seq_show(prog, &meta, v, 0); 2948 2744 } 2949 2745 2950 - tcp_seq_stop(seq, v); 2746 + if (iter->cur_sk < iter->end_sk) { 2747 + bpf_iter_tcp_put_batch(iter); 2748 + iter->st_bucket_done = false; 2749 + } 2951 2750 } 2952 2751 2953 2752 static const struct seq_operations bpf_iter_tcp_seq_ops = { 2954 2753 .show = bpf_iter_tcp_seq_show, 2955 - .start = tcp_seq_start, 2956 - .next = tcp_seq_next, 2754 + .start = bpf_iter_tcp_seq_start, 2755 + .next = bpf_iter_tcp_seq_next, 2957 2756 .stop = bpf_iter_tcp_seq_stop, 2958 2757 }; 2959 2758 #endif 2759 + static unsigned short seq_file_family(const struct seq_file *seq) 2760 + { 2761 + const struct tcp_seq_afinfo *afinfo; 2762 + 2763 + #ifdef CONFIG_BPF_SYSCALL 2764 + /* Iterated from bpf_iter. Let the bpf prog to filter instead. */ 2765 + if (seq->op == &bpf_iter_tcp_seq_ops) 2766 + return AF_UNSPEC; 2767 + #endif 2768 + 2769 + /* Iterated from proc fs */ 2770 + afinfo = PDE_DATA(file_inode(seq->file)); 2771 + return afinfo->family; 2772 + } 2960 2773 2961 2774 static const struct seq_operations tcp4_seq_ops = { 2962 2775 .show = tcp4_seq_show, ··· 3223 3002 DEFINE_BPF_ITER_FUNC(tcp, struct bpf_iter_meta *meta, 3224 3003 struct sock_common *sk_common, uid_t uid) 3225 3004 3005 + #define INIT_BATCH_SZ 16 3006 + 3226 3007 static int bpf_iter_init_tcp(void *priv_data, struct bpf_iter_aux_info *aux) 3227 3008 { 3228 - struct tcp_iter_state *st = priv_data; 3229 - struct tcp_seq_afinfo *afinfo; 3230 - int ret; 3009 + struct bpf_tcp_iter_state *iter = priv_data; 3010 + int err; 3231 3011 3232 - afinfo = kmalloc(sizeof(*afinfo), GFP_USER | __GFP_NOWARN); 3233 - if (!afinfo) 3234 - return -ENOMEM; 3012 + err = bpf_iter_init_seq_net(priv_data, aux); 3013 + if (err) 3014 + return err; 3235 3015 3236 - afinfo->family = AF_UNSPEC; 3237 - st->bpf_seq_afinfo = afinfo; 3238 - ret = bpf_iter_init_seq_net(priv_data, aux); 3239 - if (ret) 3240 - kfree(afinfo); 3241 - return ret; 3016 + err = bpf_iter_tcp_realloc_batch(iter, INIT_BATCH_SZ); 3017 + if (err) { 3018 + bpf_iter_fini_seq_net(priv_data); 3019 + return err; 3020 + } 3021 + 3022 + return 0; 3242 3023 } 3243 3024 3244 3025 static void bpf_iter_fini_tcp(void *priv_data) 3245 3026 { 3246 - struct tcp_iter_state *st = priv_data; 3027 + struct bpf_tcp_iter_state *iter = priv_data; 3247 3028 3248 - kfree(st->bpf_seq_afinfo); 3249 3029 bpf_iter_fini_seq_net(priv_data); 3030 + kvfree(iter->batch); 3250 3031 } 3251 3032 3252 3033 static const struct bpf_iter_seq_info tcp_seq_info = { 3253 3034 .seq_ops = &bpf_iter_tcp_seq_ops, 3254 3035 .init_seq_private = bpf_iter_init_tcp, 3255 3036 .fini_seq_private = bpf_iter_fini_tcp, 3256 - .seq_priv_size = sizeof(struct tcp_iter_state), 3037 + .seq_priv_size = sizeof(struct bpf_tcp_iter_state), 3257 3038 }; 3039 + 3040 + static const struct bpf_func_proto * 3041 + bpf_iter_tcp_get_func_proto(enum bpf_func_id func_id, 3042 + const struct bpf_prog *prog) 3043 + { 3044 + switch (func_id) { 3045 + case BPF_FUNC_setsockopt: 3046 + return &bpf_sk_setsockopt_proto; 3047 + case BPF_FUNC_getsockopt: 3048 + return &bpf_sk_getsockopt_proto; 3049 + default: 3050 + return NULL; 3051 + } 3052 + } 3258 3053 3259 3054 static struct bpf_iter_reg tcp_reg_info = { 3260 3055 .target = "tcp", ··· 3279 3042 { offsetof(struct bpf_iter__tcp, sk_common), 3280 3043 PTR_TO_BTF_ID_OR_NULL }, 3281 3044 }, 3045 + .get_func_proto = bpf_iter_tcp_get_func_proto, 3282 3046 .seq_info = &tcp_seq_info, 3283 3047 }; 3284 3048

+8 -8

net/unix/unix_bpf.c

··· 44 44 { 45 45 struct unix_sock *u = unix_sk(sk); 46 46 struct sk_psock *psock; 47 - int copied, ret; 47 + int copied; 48 48 49 49 psock = sk_psock_get(sk); 50 50 if (unlikely(!psock)) ··· 53 53 mutex_lock(&u->iolock); 54 54 if (!skb_queue_empty(&sk->sk_receive_queue) && 55 55 sk_psock_queue_empty(psock)) { 56 - ret = __unix_dgram_recvmsg(sk, msg, len, flags); 57 - goto out; 56 + mutex_unlock(&u->iolock); 57 + sk_psock_put(sk, psock); 58 + return __unix_dgram_recvmsg(sk, msg, len, flags); 58 59 } 59 60 60 61 msg_bytes_ready: ··· 69 68 if (data) { 70 69 if (!sk_psock_queue_empty(psock)) 71 70 goto msg_bytes_ready; 72 - ret = __unix_dgram_recvmsg(sk, msg, len, flags); 73 - goto out; 71 + mutex_unlock(&u->iolock); 72 + sk_psock_put(sk, psock); 73 + return __unix_dgram_recvmsg(sk, msg, len, flags); 74 74 } 75 75 copied = -EAGAIN; 76 76 } 77 - ret = copied; 78 - out: 79 77 mutex_unlock(&u->iolock); 80 78 sk_psock_put(sk, psock); 81 - return ret; 79 + return copied; 82 80 } 83 81 84 82 static struct proto *unix_prot_saved __read_mostly;

+2

samples/bpf/.gitignore

··· 45 45 xdp_redirect 46 46 xdp_redirect_cpu 47 47 xdp_redirect_map 48 + xdp_redirect_map_multi 48 49 xdp_router_ipv4 49 50 xdp_rxq_info 50 51 xdp_sample_pkts 51 52 xdp_tx_iptunnel 52 53 xdpsock 54 + xdpsock_ctrl_proc 53 55 xsk_fwd 54 56 testfile.img 55 57 hbm_out.log

+1

samples/bpf/test_override_return.sh

··· 1 1 #!/bin/bash 2 2 3 + rm -r tmpmnt 3 4 rm -f testfile.img 4 5 dd if=/dev/zero of=testfile.img bs=1M seek=1000 count=1 5 6 DEVICE=$(losetup --show -f testfile.img)

+5

samples/bpf/tracex7_user.c

··· 14 14 int ret = 0; 15 15 FILE *f; 16 16 17 + if (!argv[1]) { 18 + fprintf(stderr, "ERROR: Run with the btrfs device argument!\n"); 19 + return 0; 20 + } 21 + 17 22 snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); 18 23 obj = bpf_object__open_file(filename, NULL); 19 24 if (libbpf_get_error(obj)) {

+47 -1

tools/bpf/bpftool/Documentation/bpftool-btf.rst

··· 12 12 13 13 **bpftool** [*OPTIONS*] **btf** *COMMAND* 14 14 15 - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] } 15 + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | {**-d** | **--debug** } | 16 + { **-B** | **--base-btf** } } 16 17 17 18 *COMMANDS* := { **dump** | **help** } 18 19 ··· 73 72 OPTIONS 74 73 ======= 75 74 .. include:: common_options.rst 75 + 76 + -B, --base-btf *FILE* 77 + Pass a base BTF object. Base BTF objects are typically used 78 + with BTF objects for kernel modules. To avoid duplicating 79 + all kernel symbols required by modules, BTF objects for 80 + modules are "split", they are built incrementally on top of 81 + the kernel (vmlinux) BTF object. So the base BTF reference 82 + should usually point to the kernel BTF. 83 + 84 + When the main BTF object to process (for example, the 85 + module BTF to dump) is passed as a *FILE*, bpftool attempts 86 + to autodetect the path for the base object, and passing 87 + this option is optional. When the main BTF object is passed 88 + through other handles, this option becomes necessary. 76 89 77 90 EXAMPLES 78 91 ======== ··· 232 217 **# bpftool btf dump prog tag b88e0a09b1d9759d** 233 218 234 219 **# bpftool btf dump prog pinned /sys/fs/bpf/prog_name** 220 + 221 + | 222 + | **# bpftool btf dump file /sys/kernel/btf/i2c_smbus** 223 + | (or) 224 + | **# I2C_SMBUS_ID=$(bpftool btf show -p | jq '.[] | select(.name=="i2c_smbus").id')** 225 + | **# bpftool btf dump id ${I2C_SMBUS_ID} -B /sys/kernel/btf/vmlinux** 226 + 227 + :: 228 + 229 + [104848] STRUCT 'i2c_smbus_alert' size=40 vlen=2 230 + 'alert' type_id=393 bits_offset=0 231 + 'ara' type_id=56050 bits_offset=256 232 + [104849] STRUCT 'alert_data' size=12 vlen=3 233 + 'addr' type_id=16 bits_offset=0 234 + 'type' type_id=56053 bits_offset=32 235 + 'data' type_id=7 bits_offset=64 236 + [104850] PTR '(anon)' type_id=104848 237 + [104851] PTR '(anon)' type_id=104849 238 + [104852] FUNC 'i2c_register_spd' type_id=84745 linkage=static 239 + [104853] FUNC 'smbalert_driver_init' type_id=1213 linkage=static 240 + [104854] FUNC_PROTO '(anon)' ret_type_id=18 vlen=1 241 + 'ara' type_id=56050 242 + [104855] FUNC 'i2c_handle_smbus_alert' type_id=104854 linkage=static 243 + [104856] FUNC 'smbalert_remove' type_id=104854 linkage=static 244 + [104857] FUNC_PROTO '(anon)' ret_type_id=18 vlen=2 245 + 'ara' type_id=56050 246 + 'id' type_id=56056 247 + [104858] FUNC 'smbalert_probe' type_id=104857 linkage=static 248 + [104859] FUNC 'smbalert_work' type_id=9695 linkage=static 249 + [104860] FUNC 'smbus_alert' type_id=71367 linkage=static 250 + [104861] FUNC 'smbus_do_alert' type_id=84827 linkage=static

+2 -1

tools/bpf/bpftool/Documentation/bpftool-cgroup.rst

··· 12 12 13 13 **bpftool** [*OPTIONS*] **cgroup** *COMMAND* 14 14 15 - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } } 15 + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | 16 + { **-f** | **--bpffs** } } 16 17 17 18 *COMMANDS* := 18 19 { **show** | **list** | **tree** | **attach** | **detach** | **help** }

+1 -1

tools/bpf/bpftool/Documentation/bpftool-feature.rst

··· 12 12 13 13 **bpftool** [*OPTIONS*] **feature** *COMMAND* 14 14 15 - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] } 15 + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } 16 16 17 17 *COMMANDS* := { **probe** | **help** } 18 18

+8 -1

tools/bpf/bpftool/Documentation/bpftool-gen.rst

··· 12 12 13 13 **bpftool** [*OPTIONS*] **gen** *COMMAND* 14 14 15 - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] } 15 + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | 16 + { **-L** | **--use-loader** } } 16 17 17 18 *COMMAND* := { **object** | **skeleton** | **help** } 18 19 ··· 152 151 OPTIONS 153 152 ======= 154 153 .. include:: common_options.rst 154 + 155 + -L, --use-loader 156 + For skeletons, generate a "light" skeleton (also known as "loader" 157 + skeleton). A light skeleton contains a loader eBPF program. It does 158 + not use the majority of the libbpf infrastructure, and does not need 159 + libelf. 155 160 156 161 EXAMPLES 157 162 ========

+2

tools/bpf/bpftool/Documentation/bpftool-iter.rst

··· 12 12 13 13 **bpftool** [*OPTIONS*] **iter** *COMMAND* 14 14 15 + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } 16 + 15 17 *COMMANDS* := { **pin** | **help** } 16 18 17 19 ITER COMMANDS

+2 -1

tools/bpf/bpftool/Documentation/bpftool-link.rst

··· 12 12 13 13 **bpftool** [*OPTIONS*] **link** *COMMAND* 14 14 15 - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } } 15 + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | 16 + { **-f** | **--bpffs** } | { **-n** | **--nomount** } } 16 17 17 18 *COMMANDS* := { **show** | **list** | **pin** | **help** } 18 19

+2 -1

tools/bpf/bpftool/Documentation/bpftool-map.rst

··· 12 12 13 13 **bpftool** [*OPTIONS*] **map** *COMMAND* 14 14 15 - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } } 15 + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | 16 + { **-f** | **--bpffs** } | { **-n** | **--nomount** } } 16 17 17 18 *COMMANDS* := 18 19 { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext**

+1 -1

tools/bpf/bpftool/Documentation/bpftool-net.rst

··· 12 12 13 13 **bpftool** [*OPTIONS*] **net** *COMMAND* 14 14 15 - *OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] } 15 + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } 16 16 17 17 *COMMANDS* := 18 18 { **show** | **list** | **attach** | **detach** | **help** }

+1 -1

tools/bpf/bpftool/Documentation/bpftool-perf.rst

··· 12 12 13 13 **bpftool** [*OPTIONS*] **perf** *COMMAND* 14 14 15 - *OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] } 15 + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } 16 16 17 17 *COMMANDS* := 18 18 { **show** | **list** | **help** }

+33 -3

tools/bpf/bpftool/Documentation/bpftool-prog.rst

··· 12 12 13 13 **bpftool** [*OPTIONS*] **prog** *COMMAND* 14 14 15 - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } } 15 + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | 16 + { **-f** | **--bpffs** } | { **-m** | **--mapcompat** } | { **-n** | **--nomount** } | 17 + { **-L** | **--use-loader** } } 16 18 17 19 *COMMANDS* := 18 20 { **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load** ··· 50 48 | **struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup** 51 49 | } 52 50 | *ATTACH_TYPE* := { 53 - | **msg_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector** 51 + | **msg_verdict** | **skb_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector** 54 52 | } 55 53 | *METRICs* := { 56 - | **cycles** | **instructions** | **l1d_loads** | **llc_misses** 54 + | **cycles** | **instructions** | **l1d_loads** | **llc_misses** | 55 + | **itlb_misses** | **dtlb_misses** 57 56 | } 58 57 59 58 ··· 226 223 Do not automatically attempt to mount any virtual file system 227 224 (such as tracefs or BPF virtual file system) when necessary. 228 225 226 + -L, --use-loader 227 + Load program as a "loader" program. This is useful to debug 228 + the generation of such programs. When this option is in 229 + use, bpftool attempts to load the programs from the object 230 + file into the kernel, but does not pin them (therefore, the 231 + *PATH* must not be provided). 232 + 233 + When combined with the **-d**\ \|\ **--debug** option, 234 + additional debug messages are generated, and the execution 235 + of the loader program will use the **bpf_trace_printk**\ () 236 + helper to log each step of loading BTF, creating the maps, 237 + and loading the programs (see **bpftool prog tracelog** as 238 + a way to dump those messages). 239 + 229 240 EXAMPLES 230 241 ======== 231 242 **# bpftool prog show** ··· 343 326 40176203 cycles (83.05%) 344 327 42518139 instructions # 1.06 insns per cycle (83.39%) 345 328 123 llc_misses # 2.89 LLC misses per million insns (83.15%) 329 + 330 + | 331 + | Output below is for the trace logs. 332 + | Run in separate terminals: 333 + | **# bpftool prog tracelog** 334 + | **# bpftool prog load -L -d file.o** 335 + 336 + :: 337 + 338 + bpftool-620059 [004] d... 2634685.517903: bpf_trace_printk: btf_load size 665 r=5 339 + bpftool-620059 [004] d... 2634685.517912: bpf_trace_printk: map_create sample_map idx 0 type 2 value_size 4 value_btf_id 0 r=6 340 + bpftool-620059 [004] d... 2634685.517997: bpf_trace_printk: prog_load sample insn_cnt 13 r=7 341 + bpftool-620059 [004] d... 2634685.517999: bpf_trace_printk: close(5) = 0

+1 -1

tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst

··· 12 12 13 13 **bpftool** [*OPTIONS*] **struct_ops** *COMMAND* 14 14 15 - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] } 15 + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } 16 16 17 17 *COMMANDS* := 18 18 { **show** | **list** | **dump** | **register** | **unregister** | **help** }

+6 -6

tools/bpf/bpftool/Documentation/bpftool.rst

··· 18 18 19 19 *OBJECT* := { **map** | **program** | **cgroup** | **perf** | **net** | **feature** } 20 20 21 - *OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** } 22 - | { **-j** | **--json** } [{ **-p** | **--pretty** }] } 21 + *OPTIONS* := { { **-V** | **--version** } | 22 + { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } 23 23 24 24 *MAP-COMMANDS* := 25 - { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext** 26 - | **delete** | **pin** | **event_pipe** | **help** } 25 + { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext** | 26 + **delete** | **pin** | **event_pipe** | **help** } 27 27 28 - *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin** 29 - | **load** | **attach** | **detach** | **help** } 28 + *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin** | 29 + **load** | **attach** | **detach** | **help** } 30 30 31 31 *CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** } 32 32

+37 -29

tools/bpf/bpftool/bash-completion/bpftool

··· 260 260 261 261 # Deal with options 262 262 if [[ ${words[cword]} == -* ]]; then 263 - local c='--version --json --pretty --bpffs --mapcompat --debug' 263 + local c='--version --json --pretty --bpffs --mapcompat --debug \ 264 + --use-loader --base-btf' 264 265 COMPREPLY=( $( compgen -W "$c" -- "$cur" ) ) 265 266 return 0 266 267 fi ··· 279 278 _sysfs_get_netdevs 280 279 return 0 281 280 ;; 282 - file|pinned) 281 + file|pinned|-B|--base-btf) 283 282 _filedir 284 283 return 0 285 284 ;; ··· 292 291 # Remove all options so completions don't have to deal with them. 293 292 local i 294 293 for (( i=1; i < ${#words[@]}; )); do 295 - if [[ ${words[i]::1} == - ]]; then 294 + if [[ ${words[i]::1} == - ]] && 295 + [[ ${words[i]} != "-B" ]] && [[ ${words[i]} != "--base-btf" ]]; then 296 296 words=( "${words[@]:0:i}" "${words[@]:i+1}" ) 297 297 [[ $i -le $cword ]] && cword=$(( cword - 1 )) 298 298 else ··· 345 343 346 344 local PROG_TYPE='id pinned tag name' 347 345 local MAP_TYPE='id pinned name' 348 - local METRIC_TYPE='cycles instructions l1d_loads llc_misses' 346 + local METRIC_TYPE='cycles instructions l1d_loads llc_misses \ 347 + itlb_misses dtlb_misses' 349 348 case $command in 350 349 show|list) 351 350 [[ $prev != "$command" ]] && return 0 ··· 407 404 return 0 408 405 ;; 409 406 5) 410 - COMPREPLY=( $( compgen -W 'msg_verdict stream_verdict \ 411 - stream_parser flow_dissector' -- "$cur" ) ) 407 + local BPFTOOL_PROG_ATTACH_TYPES='msg_verdict \ 408 + skb_verdict stream_verdict stream_parser \ 409 + flow_dissector' 410 + COMPREPLY=( $( compgen -W "$BPFTOOL_PROG_ATTACH_TYPES" -- "$cur" ) ) 412 411 return 0 413 412 ;; 414 413 6) ··· 469 464 470 465 case $prev in 471 466 type) 472 - COMPREPLY=( $( compgen -W "socket kprobe \ 467 + local BPFTOOL_PROG_LOAD_TYPES='socket kprobe \ 473 468 kretprobe classifier flow_dissector \ 474 469 action tracepoint raw_tracepoint \ 475 470 xdp perf_event cgroup/skb cgroup/sock \ ··· 484 479 cgroup/post_bind4 cgroup/post_bind6 \ 485 480 cgroup/sysctl cgroup/getsockopt \ 486 481 cgroup/setsockopt cgroup/sock_release struct_ops \ 487 - fentry fexit freplace sk_lookup" -- \ 488 - "$cur" ) ) 482 + fentry fexit freplace sk_lookup' 483 + COMPREPLY=( $( compgen -W "$BPFTOOL_PROG_LOAD_TYPES" -- "$cur" ) ) 489 484 return 0 490 485 ;; 491 486 id) ··· 703 698 return 0 704 699 ;; 705 700 type) 706 - COMPREPLY=( $( compgen -W 'hash array prog_array \ 707 - perf_event_array percpu_hash percpu_array \ 708 - stack_trace cgroup_array lru_hash \ 701 + local BPFTOOL_MAP_CREATE_TYPES='hash array \ 702 + prog_array perf_event_array percpu_hash \ 703 + percpu_array stack_trace cgroup_array lru_hash \ 709 704 lru_percpu_hash lpm_trie array_of_maps \ 710 705 hash_of_maps devmap devmap_hash sockmap cpumap \ 711 706 xskmap sockhash cgroup_storage reuseport_sockarray \ 712 707 percpu_cgroup_storage queue stack sk_storage \ 713 - struct_ops inode_storage task_storage' -- \ 714 - "$cur" ) ) 708 + struct_ops inode_storage task_storage ringbuf' 709 + COMPREPLY=( $( compgen -W "$BPFTOOL_MAP_CREATE_TYPES" -- "$cur" ) ) 715 710 return 0 716 711 ;; 717 712 key|value|flags|entries) ··· 1022 1017 return 0 1023 1018 ;; 1024 1019 attach|detach) 1025 - local ATTACH_TYPES='ingress egress sock_create sock_ops \ 1026 - device bind4 bind6 post_bind4 post_bind6 connect4 connect6 \ 1020 + local BPFTOOL_CGROUP_ATTACH_TYPES='ingress egress \ 1021 + sock_create sock_ops device \ 1022 + bind4 bind6 post_bind4 post_bind6 connect4 connect6 \ 1027 1023 getpeername4 getpeername6 getsockname4 getsockname6 \ 1028 1024 sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl getsockopt \ 1029 1025 setsockopt sock_release' 1030 1026 local ATTACH_FLAGS='multi override' 1031 1027 local PROG_TYPE='id pinned tag name' 1032 - case $prev in 1033 - $command) 1034 - _filedir 1035 - return 0 1036 - ;; 1037 - ingress|egress|sock_create|sock_ops|device|bind4|bind6|\ 1038 - post_bind4|post_bind6|connect4|connect6|getpeername4|\ 1039 - getpeername6|getsockname4|getsockname6|sendmsg4|sendmsg6|\ 1040 - recvmsg4|recvmsg6|sysctl|getsockopt|setsockopt|sock_release) 1028 + # Check for $prev = $command first 1029 + if [ $prev = $command ]; then 1030 + _filedir 1031 + return 0 1032 + # Then check for attach type. This is done outside of the 1033 + # "case $prev in" to avoid writing the whole list of attach 1034 + # types again as pattern to match (where we cannot reuse 1035 + # our variable). 1036 + elif [[ $BPFTOOL_CGROUP_ATTACH_TYPES =~ $prev ]]; then 1041 1037 COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \ 1042 1038 "$cur" ) ) 1043 1039 return 0 1044 - ;; 1040 + fi 1041 + # case/esac for the other cases 1042 + case $prev in 1045 1043 id) 1046 1044 _bpftool_get_prog_ids 1047 1045 return 0 1048 1046 ;; 1049 1047 *) 1050 - if ! _bpftool_search_list "$ATTACH_TYPES"; then 1051 - COMPREPLY=( $( compgen -W "$ATTACH_TYPES" -- \ 1052 - "$cur" ) ) 1048 + if ! _bpftool_search_list "$BPFTOOL_CGROUP_ATTACH_TYPES"; then 1049 + COMPREPLY=( $( compgen -W \ 1050 + "$BPFTOOL_CGROUP_ATTACH_TYPES" -- "$cur" ) ) 1053 1051 elif [[ "$command" == "attach" ]]; then 1054 1052 # We have an attach type on the command line, 1055 1053 # but it is not the previous word, or

+4 -7

tools/bpf/bpftool/btf.c

··· 580 580 } 581 581 582 582 if (!btf) { 583 - err = btf__get_from_id(btf_id, &btf); 583 + btf = btf__load_from_kernel_by_id_split(btf_id, base_btf); 584 + err = libbpf_get_error(btf); 584 585 if (err) { 585 586 p_err("get btf by id (%u): %s", btf_id, strerror(err)); 586 - goto done; 587 - } 588 - if (!btf) { 589 - err = -ENOENT; 590 - p_err("can't find btf with ID (%u)", btf_id); 591 587 goto done; 592 588 } 593 589 } ··· 981 985 " FORMAT := { raw | c }\n" 982 986 " " HELP_SPEC_MAP "\n" 983 987 " " HELP_SPEC_PROGRAM "\n" 984 - " " HELP_SPEC_OPTIONS "\n" 988 + " " HELP_SPEC_OPTIONS " |\n" 989 + " {-B|--base-btf} }\n" 985 990 "", 986 991 bin_name, "btf"); 987 992

+4 -2

tools/bpf/bpftool/btf_dumper.c

··· 64 64 } 65 65 info = &prog_info->info; 66 66 67 - if (!info->btf_id || !info->nr_func_info || 68 - btf__get_from_id(info->btf_id, &prog_btf)) 67 + if (!info->btf_id || !info->nr_func_info) 68 + goto print; 69 + prog_btf = btf__load_from_kernel_by_id(info->btf_id); 70 + if (libbpf_get_error(prog_btf)) 69 71 goto print; 70 72 finfo = u64_to_ptr(info->func_info); 71 73 func_type = btf__type_by_id(prog_btf, finfo->type_id);

+2 -1

tools/bpf/bpftool/cgroup.c

··· 501 501 HELP_SPEC_ATTACH_TYPES "\n" 502 502 " " HELP_SPEC_ATTACH_FLAGS "\n" 503 503 " " HELP_SPEC_PROGRAM "\n" 504 - " " HELP_SPEC_OPTIONS "\n" 504 + " " HELP_SPEC_OPTIONS " |\n" 505 + " {-f|--bpffs} }\n" 505 506 "", 506 507 bin_name, argv[-2]); 507 508

+6

tools/bpf/bpftool/common.c

··· 67 67 [BPF_MODIFY_RETURN] = "mod_ret", 68 68 [BPF_LSM_MAC] = "lsm_mac", 69 69 [BPF_SK_LOOKUP] = "sk_lookup", 70 + [BPF_TRACE_ITER] = "trace_iter", 71 + [BPF_XDP_DEVMAP] = "xdp_devmap", 72 + [BPF_XDP_CPUMAP] = "xdp_cpumap", 73 + [BPF_XDP] = "xdp", 74 + [BPF_SK_REUSEPORT_SELECT] = "sk_skb_reuseport_select", 75 + [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_skb_reuseport_select_or_migrate", 70 76 }; 71 77 72 78 void p_err(const char *fmt, ...)

+1

tools/bpf/bpftool/feature.c

··· 1005 1005 " %1$s %2$s help\n" 1006 1006 "\n" 1007 1007 " COMPONENT := { kernel | dev NAME }\n" 1008 + " " HELP_SPEC_OPTIONS " }\n" 1008 1009 "", 1009 1010 bin_name, argv[-2]); 1010 1011

+2 -1

tools/bpf/bpftool/gen.c

··· 1026 1026 " %1$s %2$s skeleton FILE [name OBJECT_NAME]\n" 1027 1027 " %1$s %2$s help\n" 1028 1028 "\n" 1029 - " " HELP_SPEC_OPTIONS "\n" 1029 + " " HELP_SPEC_OPTIONS " |\n" 1030 + " {-L|--use-loader} }\n" 1030 1031 "", 1031 1032 bin_name, "gen"); 1032 1033

+2

tools/bpf/bpftool/iter.c

··· 97 97 fprintf(stderr, 98 98 "Usage: %1$s %2$s pin OBJ PATH [map MAP]\n" 99 99 " %1$s %2$s help\n" 100 + "\n" 100 101 " " HELP_SPEC_MAP "\n" 102 + " " HELP_SPEC_OPTIONS " }\n" 101 103 "", 102 104 bin_name, "iter"); 103 105

+2 -1

tools/bpf/bpftool/link.c

··· 401 401 " %1$s %2$s help\n" 402 402 "\n" 403 403 " " HELP_SPEC_LINK "\n" 404 - " " HELP_SPEC_OPTIONS "\n" 404 + " " HELP_SPEC_OPTIONS " |\n" 405 + " {-f|--bpffs} | {-n|--nomount} }\n" 405 406 "", 406 407 bin_name, argv[-2]); 407 408

+2 -1

tools/bpf/bpftool/main.c

··· 64 64 " %s version\n" 65 65 "\n" 66 66 " OBJECT := { prog | map | link | cgroup | perf | net | feature | btf | gen | struct_ops | iter }\n" 67 - " " HELP_SPEC_OPTIONS "\n" 67 + " " HELP_SPEC_OPTIONS " |\n" 68 + " {-V|--version} }\n" 68 69 "", 69 70 bin_name, bin_name, bin_name); 70 71

+1 -2

tools/bpf/bpftool/main.h

··· 57 57 #define HELP_SPEC_PROGRAM \ 58 58 "PROG := { id PROG_ID | pinned FILE | tag PROG_TAG | name PROG_NAME }" 59 59 #define HELP_SPEC_OPTIONS \ 60 - "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-f|--bpffs} |\n" \ 61 - "\t {-m|--mapcompat} | {-n|--nomount} }" 60 + "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug}" 62 61 #define HELP_SPEC_MAP \ 63 62 "MAP := { id MAP_ID | pinned FILE | name MAP_NAME }" 64 63 #define HELP_SPEC_LINK \

+10 -9

tools/bpf/bpftool/map.c

··· 807 807 } else if (info->btf_value_type_id) { 808 808 int err; 809 809 810 - err = btf__get_from_id(info->btf_id, &btf); 811 - if (err || !btf) { 810 + btf = btf__load_from_kernel_by_id(info->btf_id); 811 + err = libbpf_get_error(btf); 812 + if (err) { 812 813 p_err("failed to get btf"); 813 - btf = err ? ERR_PTR(err) : ERR_PTR(-ESRCH); 814 + btf = ERR_PTR(err); 814 815 } 815 816 } 816 817 ··· 1040 1039 void *value) 1041 1040 { 1042 1041 json_writer_t *btf_wtr; 1043 - struct btf *btf = NULL; 1044 - int err; 1042 + struct btf *btf; 1045 1043 1046 - err = btf__get_from_id(info->btf_id, &btf); 1047 - if (err) { 1044 + btf = btf__load_from_kernel_by_id(info->btf_id); 1045 + if (libbpf_get_error(btf)) { 1048 1046 p_err("failed to get btf"); 1049 1047 return; 1050 1048 } ··· 1466 1466 " devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n" 1467 1467 " cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n" 1468 1468 " queue | stack | sk_storage | struct_ops | ringbuf | inode_storage |\n" 1469 - " task_storage }\n" 1470 - " " HELP_SPEC_OPTIONS "\n" 1469 + " task_storage }\n" 1470 + " " HELP_SPEC_OPTIONS " |\n" 1471 + " {-f|--bpffs} | {-n|--nomount} }\n" 1471 1472 "", 1472 1473 bin_name, argv[-2]); 1473 1474

+1

tools/bpf/bpftool/net.c

··· 729 729 "\n" 730 730 " " HELP_SPEC_PROGRAM "\n" 731 731 " ATTACH_TYPE := { xdp | xdpgeneric | xdpdrv | xdpoffload }\n" 732 + " " HELP_SPEC_OPTIONS " }\n" 732 733 "\n" 733 734 "Note: Only xdp and tc attachments are supported now.\n" 734 735 " For progs attached to cgroups, use \"bpftool cgroup\"\n"

+4 -1

tools/bpf/bpftool/perf.c

··· 231 231 static int do_help(int argc, char **argv) 232 232 { 233 233 fprintf(stderr, 234 - "Usage: %1$s %2$s { show | list | help }\n" 234 + "Usage: %1$s %2$s { show | list }\n" 235 + " %1$s %2$s help }\n" 236 + "\n" 237 + " " HELP_SPEC_OPTIONS " }\n" 235 238 "", 236 239 bin_name, argv[-2]); 237 240

+25 -12

tools/bpf/bpftool/prog.c

··· 249 249 struct bpf_map_info map_info; 250 250 struct btf_var_secinfo *vsi; 251 251 bool printed_header = false; 252 - struct btf *btf = NULL; 253 252 unsigned int i, vlen; 254 253 void *value = NULL; 255 254 const char *name; 255 + struct btf *btf; 256 256 int err; 257 257 258 258 if (!num_maps) ··· 263 263 if (!value) 264 264 return; 265 265 266 - err = btf__get_from_id(map_info.btf_id, &btf); 267 - if (err || !btf) 266 + btf = btf__load_from_kernel_by_id(map_info.btf_id); 267 + if (libbpf_get_error(btf)) 268 268 goto out_free; 269 269 270 270 t_datasec = btf__type_by_id(btf, map_info.btf_value_type_id); ··· 646 646 member_len = info->xlated_prog_len; 647 647 } 648 648 649 - if (info->btf_id && btf__get_from_id(info->btf_id, &btf)) { 650 - p_err("failed to get btf"); 651 - return -1; 649 + if (info->btf_id) { 650 + btf = btf__load_from_kernel_by_id(info->btf_id); 651 + if (libbpf_get_error(btf)) { 652 + p_err("failed to get btf"); 653 + return -1; 654 + } 652 655 } 653 656 654 657 func_info = u64_to_ptr(info->func_info); ··· 783 780 linum); 784 781 kernel_syms_destroy(&dd); 785 782 } 783 + 784 + btf__free(btf); 786 785 787 786 return 0; 788 787 } ··· 2007 2002 struct bpf_prog_info_linear *info_linear; 2008 2003 struct bpf_func_info *func_info; 2009 2004 const struct btf_type *t; 2005 + struct btf *btf = NULL; 2010 2006 char *name = NULL; 2011 - struct btf *btf; 2012 2007 2013 2008 info_linear = bpf_program__get_prog_info_linear( 2014 2009 tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO); ··· 2017 2012 return NULL; 2018 2013 } 2019 2014 2020 - if (info_linear->info.btf_id == 0 || 2021 - btf__get_from_id(info_linear->info.btf_id, &btf)) { 2015 + if (info_linear->info.btf_id == 0) { 2022 2016 p_err("prog FD %d doesn't have valid btf", tgt_fd); 2017 + goto out; 2018 + } 2019 + 2020 + btf = btf__load_from_kernel_by_id(info_linear->info.btf_id); 2021 + if (libbpf_get_error(btf)) { 2022 + p_err("failed to load btf for prog FD %d", tgt_fd); 2023 2023 goto out; 2024 2024 } 2025 2025 ··· 2037 2027 } 2038 2028 name = strdup(btf__name_by_offset(btf, t->name_off)); 2039 2029 out: 2030 + btf__free(btf); 2040 2031 free(info_linear); 2041 2032 return name; 2042 2033 } ··· 2256 2245 " cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n" 2257 2246 " cgroup/getsockopt | cgroup/setsockopt | cgroup/sock_release |\n" 2258 2247 " struct_ops | fentry | fexit | freplace | sk_lookup }\n" 2259 - " ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n" 2260 - " flow_dissector }\n" 2248 + " ATTACH_TYPE := { msg_verdict | skb_verdict | stream_verdict |\n" 2249 + " stream_parser | flow_dissector }\n" 2261 2250 " METRIC := { cycles | instructions | l1d_loads | llc_misses | itlb_misses | dtlb_misses }\n" 2262 - " " HELP_SPEC_OPTIONS "\n" 2251 + " " HELP_SPEC_OPTIONS " |\n" 2252 + " {-f|--bpffs} | {-m|--mapcompat} | {-n|--nomount} |\n" 2253 + " {-L|--use-loader} }\n" 2263 2254 "", 2264 2255 bin_name, argv[-2]); 2265 2256

+1 -1

tools/bpf/bpftool/struct_ops.c

··· 572 572 " %1$s %2$s unregister STRUCT_OPS_MAP\n" 573 573 " %1$s %2$s help\n" 574 574 "\n" 575 - " OPTIONS := { {-j|--json} [{-p|--pretty}] }\n" 576 575 " STRUCT_OPS_MAP := [ id STRUCT_OPS_MAP_ID | name STRUCT_OPS_MAP_NAME ]\n" 576 + " " HELP_SPEC_OPTIONS " }\n" 577 577 "", 578 578 bin_name, argv[-2]); 579 579

+7 -6

tools/bpf/resolve_btfids/main.c

··· 291 291 sh->sh_addralign = expected; 292 292 293 293 if (gelf_update_shdr(scn, sh) == 0) { 294 - printf("FAILED cannot update section header: %s\n", 294 + pr_err("FAILED cannot update section header: %s\n", 295 295 elf_errmsg(-1)); 296 296 return -1; 297 297 } ··· 317 317 318 318 elf = elf_begin(fd, ELF_C_RDWR_MMAP, NULL); 319 319 if (!elf) { 320 + close(fd); 320 321 pr_err("FAILED cannot create ELF descriptor: %s\n", 321 322 elf_errmsg(-1)); 322 323 return -1; ··· 485 484 err = libbpf_get_error(btf); 486 485 if (err) { 487 486 pr_err("FAILED: load BTF from %s: %s\n", 488 - obj->path, strerror(-err)); 487 + obj->btf ?: obj->path, strerror(-err)); 489 488 return -1; 490 489 } 491 490 ··· 556 555 int i; 557 556 558 557 if (!id->id) { 559 - pr_err("FAILED unresolved symbol %s\n", id->name); 560 - return -EINVAL; 558 + pr_err("WARN: resolve_btfids: unresolved symbol %s\n", id->name); 561 559 } 562 560 563 561 for (i = 0; i < id->addr_cnt; i++) { ··· 734 734 735 735 err = 0; 736 736 out: 737 - if (obj.efile.elf) 737 + if (obj.efile.elf) { 738 738 elf_end(obj.efile.elf); 739 - close(obj.efile.fd); 739 + close(obj.efile.fd); 740 + } 740 741 return err; 741 742 }

+1 -1

tools/lib/bpf/Build

··· 1 1 libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \ 2 2 netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \ 3 - btf_dump.o ringbuf.o strset.o linker.o gen_loader.o 3 + btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o

+36 -11

tools/lib/bpf/btf.c

··· 1180 1180 1181 1181 static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian); 1182 1182 1183 - int btf__load(struct btf *btf) 1183 + int btf__load_into_kernel(struct btf *btf) 1184 1184 { 1185 1185 __u32 log_buf_size = 0, raw_size; 1186 1186 char *log_buf = NULL; ··· 1228 1228 free(log_buf); 1229 1229 return libbpf_err(err); 1230 1230 } 1231 + int btf__load(struct btf *) __attribute__((alias("btf__load_into_kernel"))); 1231 1232 1232 1233 int btf__fd(const struct btf *btf) 1233 1234 { ··· 1383 1382 return btf; 1384 1383 } 1385 1384 1385 + struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf) 1386 + { 1387 + struct btf *btf; 1388 + int btf_fd; 1389 + 1390 + btf_fd = bpf_btf_get_fd_by_id(id); 1391 + if (btf_fd < 0) 1392 + return libbpf_err_ptr(-errno); 1393 + 1394 + btf = btf_get_from_fd(btf_fd, base_btf); 1395 + close(btf_fd); 1396 + 1397 + return libbpf_ptr(btf); 1398 + } 1399 + 1400 + struct btf *btf__load_from_kernel_by_id(__u32 id) 1401 + { 1402 + return btf__load_from_kernel_by_id_split(id, NULL); 1403 + } 1404 + 1386 1405 int btf__get_from_id(__u32 id, struct btf **btf) 1387 1406 { 1388 1407 struct btf *res; 1389 - int err, btf_fd; 1408 + int err; 1390 1409 1391 1410 *btf = NULL; 1392 - btf_fd = bpf_btf_get_fd_by_id(id); 1393 - if (btf_fd < 0) 1394 - return libbpf_err(-errno); 1395 - 1396 - res = btf_get_from_fd(btf_fd, NULL); 1411 + res = btf__load_from_kernel_by_id(id); 1397 1412 err = libbpf_get_error(res); 1398 - 1399 - close(btf_fd); 1400 1413 1401 1414 if (err) 1402 1415 return libbpf_err(err); ··· 4036 4021 */ 4037 4022 if (d->hypot_adjust_canon) 4038 4023 continue; 4039 - 4024 + 4040 4025 if (t_kind == BTF_KIND_FWD && c_kind != BTF_KIND_FWD) 4041 4026 d->map[t_id] = c_id; 4042 4027 ··· 4409 4394 * Probe few well-known locations for vmlinux kernel image and try to load BTF 4410 4395 * data out of it to use for target BTF. 4411 4396 */ 4412 - struct btf *libbpf_find_kernel_btf(void) 4397 + struct btf *btf__load_vmlinux_btf(void) 4413 4398 { 4414 4399 struct { 4415 4400 const char *path_fmt; ··· 4453 4438 4454 4439 pr_warn("failed to find valid kernel BTF\n"); 4455 4440 return libbpf_err_ptr(-ESRCH); 4441 + } 4442 + 4443 + struct btf *libbpf_find_kernel_btf(void) __attribute__((alias("btf__load_vmlinux_btf"))); 4444 + 4445 + struct btf *btf__load_module_btf(const char *module_name, struct btf *vmlinux_btf) 4446 + { 4447 + char path[80]; 4448 + 4449 + snprintf(path, sizeof(path), "/sys/kernel/btf/%s", module_name); 4450 + return btf__parse_split(path, vmlinux_btf); 4456 4451 } 4457 4452 4458 4453 int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ctx)

+28 -3

tools/lib/bpf/btf.h

··· 44 44 LIBBPF_API struct btf *btf__parse_raw(const char *path); 45 45 LIBBPF_API struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf); 46 46 47 + LIBBPF_API struct btf *btf__load_vmlinux_btf(void); 48 + LIBBPF_API struct btf *btf__load_module_btf(const char *module_name, struct btf *vmlinux_btf); 49 + LIBBPF_API struct btf *libbpf_find_kernel_btf(void); 50 + 51 + LIBBPF_API struct btf *btf__load_from_kernel_by_id(__u32 id); 52 + LIBBPF_API struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf); 53 + LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf); 54 + 47 55 LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf); 48 56 LIBBPF_API int btf__load(struct btf *btf); 57 + LIBBPF_API int btf__load_into_kernel(struct btf *btf); 49 58 LIBBPF_API __s32 btf__find_by_name(const struct btf *btf, 50 59 const char *type_name); 51 60 LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf, ··· 75 66 LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size); 76 67 LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); 77 68 LIBBPF_API const char *btf__str_by_offset(const struct btf *btf, __u32 offset); 78 - LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf); 79 69 LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, 80 70 __u32 expected_key_size, 81 71 __u32 expected_value_size, ··· 96 88 void **line_info, __u32 *cnt); 97 89 LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext); 98 90 LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext); 99 - 100 - LIBBPF_API struct btf *libbpf_find_kernel_btf(void); 101 91 102 92 LIBBPF_API int btf__find_str(struct btf *btf, const char *s); 103 93 LIBBPF_API int btf__add_str(struct btf *btf, const char *s); ··· 189 183 LIBBPF_API int 190 184 btf_dump__emit_type_decl(struct btf_dump *d, __u32 id, 191 185 const struct btf_dump_emit_type_decl_opts *opts); 186 + 187 + 188 + struct btf_dump_type_data_opts { 189 + /* size of this struct, for forward/backward compatibility */ 190 + size_t sz; 191 + const char *indent_str; 192 + int indent_level; 193 + /* below match "show" flags for bpf_show_snprintf() */ 194 + bool compact; /* no newlines/indentation */ 195 + bool skip_names; /* skip member/type names */ 196 + bool emit_zeroes; /* show 0-valued fields */ 197 + size_t :0; 198 + }; 199 + #define btf_dump_type_data_opts__last_field emit_zeroes 200 + 201 + LIBBPF_API int 202 + btf_dump__dump_type_data(struct btf_dump *d, __u32 id, 203 + const void *data, size_t data_sz, 204 + const struct btf_dump_type_data_opts *opts); 192 205 193 206 /* 194 207 * A set of helpers for easier BTF types handling

+866 -5

tools/lib/bpf/btf_dump.c

··· 10 10 #include <stddef.h> 11 11 #include <stdlib.h> 12 12 #include <string.h> 13 + #include <ctype.h> 14 + #include <endian.h> 13 15 #include <errno.h> 14 16 #include <linux/err.h> 15 17 #include <linux/btf.h> ··· 55 53 __u8 referenced: 1; 56 54 }; 57 55 56 + /* indent string length; one indent string is added for each indent level */ 57 + #define BTF_DATA_INDENT_STR_LEN 32 58 + 59 + /* 60 + * Common internal data for BTF type data dump operations. 61 + */ 62 + struct btf_dump_data { 63 + const void *data_end; /* end of valid data to show */ 64 + bool compact; 65 + bool skip_names; 66 + bool emit_zeroes; 67 + __u8 indent_lvl; /* base indent level */ 68 + char indent_str[BTF_DATA_INDENT_STR_LEN]; 69 + /* below are used during iteration */ 70 + int depth; 71 + bool is_array_member; 72 + bool is_array_terminated; 73 + bool is_array_char; 74 + }; 75 + 58 76 struct btf_dump { 59 77 const struct btf *btf; 60 78 const struct btf_ext *btf_ext; ··· 82 60 struct btf_dump_opts opts; 83 61 int ptr_sz; 84 62 bool strip_mods; 63 + bool skip_anon_defs; 85 64 int last_id; 86 65 87 66 /* per-type auxiliary state */ ··· 112 89 * name occurrences 113 90 */ 114 91 struct hashmap *ident_names; 92 + /* 93 + * data for typed display; allocated if needed. 94 + */ 95 + struct btf_dump_data *typed_dump; 115 96 }; 116 97 117 98 static size_t str_hash_fn(const void *key, void *ctx) ··· 792 765 break; 793 766 case BTF_KIND_FUNC_PROTO: { 794 767 const struct btf_param *p = btf_params(t); 795 - __u16 vlen = btf_vlen(t); 768 + __u16 n = btf_vlen(t); 796 769 int i; 797 770 798 771 btf_dump_emit_type(d, t->type, cont_id); 799 - for (i = 0; i < vlen; i++, p++) 772 + for (i = 0; i < n; i++, p++) 800 773 btf_dump_emit_type(d, p->type, cont_id); 801 774 802 775 break; ··· 879 852 static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id, 880 853 const struct btf_type *t) 881 854 { 882 - btf_dump_printf(d, "%s %s", 855 + btf_dump_printf(d, "%s%s%s", 883 856 btf_is_struct(t) ? "struct" : "union", 857 + t->name_off ? " " : "", 884 858 btf_dump_type_name(d, id)); 885 859 } 886 860 ··· 1287 1259 case BTF_KIND_UNION: 1288 1260 btf_dump_emit_mods(d, decls); 1289 1261 /* inline anonymous struct/union */ 1290 - if (t->name_off == 0) 1262 + if (t->name_off == 0 && !d->skip_anon_defs) 1291 1263 btf_dump_emit_struct_def(d, id, t, lvl); 1292 1264 else 1293 1265 btf_dump_emit_struct_fwd(d, id, t); ··· 1295 1267 case BTF_KIND_ENUM: 1296 1268 btf_dump_emit_mods(d, decls); 1297 1269 /* inline anonymous enum */ 1298 - if (t->name_off == 0) 1270 + if (t->name_off == 0 && !d->skip_anon_defs) 1299 1271 btf_dump_emit_enum_def(d, id, t, lvl); 1300 1272 else 1301 1273 btf_dump_emit_enum_fwd(d, id, t); ··· 1420 1392 btf_dump_emit_name(d, fname, last_was_ptr); 1421 1393 } 1422 1394 1395 + /* show type name as (type_name) */ 1396 + static void btf_dump_emit_type_cast(struct btf_dump *d, __u32 id, 1397 + bool top_level) 1398 + { 1399 + const struct btf_type *t; 1400 + 1401 + /* for array members, we don't bother emitting type name for each 1402 + * member to avoid the redundancy of 1403 + * .name = (char[4])[(char)'f',(char)'o',(char)'o',] 1404 + */ 1405 + if (d->typed_dump->is_array_member) 1406 + return; 1407 + 1408 + /* avoid type name specification for variable/section; it will be done 1409 + * for the associated variable value(s). 1410 + */ 1411 + t = btf__type_by_id(d->btf, id); 1412 + if (btf_is_var(t) || btf_is_datasec(t)) 1413 + return; 1414 + 1415 + if (top_level) 1416 + btf_dump_printf(d, "("); 1417 + 1418 + d->skip_anon_defs = true; 1419 + d->strip_mods = true; 1420 + btf_dump_emit_type_decl(d, id, "", 0); 1421 + d->strip_mods = false; 1422 + d->skip_anon_defs = false; 1423 + 1424 + if (top_level) 1425 + btf_dump_printf(d, ")"); 1426 + } 1427 + 1423 1428 /* return number of duplicates (occurrences) of a given name */ 1424 1429 static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map, 1425 1430 const char *orig_name) ··· 1502 1441 static const char *btf_dump_ident_name(struct btf_dump *d, __u32 id) 1503 1442 { 1504 1443 return btf_dump_resolve_name(d, id, d->ident_names); 1444 + } 1445 + 1446 + static int btf_dump_dump_type_data(struct btf_dump *d, 1447 + const char *fname, 1448 + const struct btf_type *t, 1449 + __u32 id, 1450 + const void *data, 1451 + __u8 bits_offset, 1452 + __u8 bit_sz); 1453 + 1454 + static const char *btf_dump_data_newline(struct btf_dump *d) 1455 + { 1456 + return d->typed_dump->compact || d->typed_dump->depth == 0 ? "" : "\n"; 1457 + } 1458 + 1459 + static const char *btf_dump_data_delim(struct btf_dump *d) 1460 + { 1461 + return d->typed_dump->depth == 0 ? "" : ","; 1462 + } 1463 + 1464 + static void btf_dump_data_pfx(struct btf_dump *d) 1465 + { 1466 + int i, lvl = d->typed_dump->indent_lvl + d->typed_dump->depth; 1467 + 1468 + if (d->typed_dump->compact) 1469 + return; 1470 + 1471 + for (i = 0; i < lvl; i++) 1472 + btf_dump_printf(d, "%s", d->typed_dump->indent_str); 1473 + } 1474 + 1475 + /* A macro is used here as btf_type_value[s]() appends format specifiers 1476 + * to the format specifier passed in; these do the work of appending 1477 + * delimiters etc while the caller simply has to specify the type values 1478 + * in the format specifier + value(s). 1479 + */ 1480 + #define btf_dump_type_values(d, fmt, ...) \ 1481 + btf_dump_printf(d, fmt "%s%s", \ 1482 + ##__VA_ARGS__, \ 1483 + btf_dump_data_delim(d), \ 1484 + btf_dump_data_newline(d)) 1485 + 1486 + static int btf_dump_unsupported_data(struct btf_dump *d, 1487 + const struct btf_type *t, 1488 + __u32 id) 1489 + { 1490 + btf_dump_printf(d, "<unsupported kind:%u>", btf_kind(t)); 1491 + return -ENOTSUP; 1492 + } 1493 + 1494 + static int btf_dump_get_bitfield_value(struct btf_dump *d, 1495 + const struct btf_type *t, 1496 + const void *data, 1497 + __u8 bits_offset, 1498 + __u8 bit_sz, 1499 + __u64 *value) 1500 + { 1501 + __u16 left_shift_bits, right_shift_bits; 1502 + __u8 nr_copy_bits, nr_copy_bytes; 1503 + const __u8 *bytes = data; 1504 + int sz = t->size; 1505 + __u64 num = 0; 1506 + int i; 1507 + 1508 + /* Maximum supported bitfield size is 64 bits */ 1509 + if (sz > 8) { 1510 + pr_warn("unexpected bitfield size %d\n", sz); 1511 + return -EINVAL; 1512 + } 1513 + 1514 + /* Bitfield value retrieval is done in two steps; first relevant bytes are 1515 + * stored in num, then we left/right shift num to eliminate irrelevant bits. 1516 + */ 1517 + nr_copy_bits = bit_sz + bits_offset; 1518 + nr_copy_bytes = t->size; 1519 + #if __BYTE_ORDER == __LITTLE_ENDIAN 1520 + for (i = nr_copy_bytes - 1; i >= 0; i--) 1521 + num = num * 256 + bytes[i]; 1522 + #elif __BYTE_ORDER == __BIG_ENDIAN 1523 + for (i = 0; i < nr_copy_bytes; i++) 1524 + num = num * 256 + bytes[i]; 1525 + #else 1526 + # error "Unrecognized __BYTE_ORDER__" 1527 + #endif 1528 + left_shift_bits = 64 - nr_copy_bits; 1529 + right_shift_bits = 64 - bit_sz; 1530 + 1531 + *value = (num << left_shift_bits) >> right_shift_bits; 1532 + 1533 + return 0; 1534 + } 1535 + 1536 + static int btf_dump_bitfield_check_zero(struct btf_dump *d, 1537 + const struct btf_type *t, 1538 + const void *data, 1539 + __u8 bits_offset, 1540 + __u8 bit_sz) 1541 + { 1542 + __u64 check_num; 1543 + int err; 1544 + 1545 + err = btf_dump_get_bitfield_value(d, t, data, bits_offset, bit_sz, &check_num); 1546 + if (err) 1547 + return err; 1548 + if (check_num == 0) 1549 + return -ENODATA; 1550 + return 0; 1551 + } 1552 + 1553 + static int btf_dump_bitfield_data(struct btf_dump *d, 1554 + const struct btf_type *t, 1555 + const void *data, 1556 + __u8 bits_offset, 1557 + __u8 bit_sz) 1558 + { 1559 + __u64 print_num; 1560 + int err; 1561 + 1562 + err = btf_dump_get_bitfield_value(d, t, data, bits_offset, bit_sz, &print_num); 1563 + if (err) 1564 + return err; 1565 + 1566 + btf_dump_type_values(d, "0x%llx", (unsigned long long)print_num); 1567 + 1568 + return 0; 1569 + } 1570 + 1571 + /* ints, floats and ptrs */ 1572 + static int btf_dump_base_type_check_zero(struct btf_dump *d, 1573 + const struct btf_type *t, 1574 + __u32 id, 1575 + const void *data) 1576 + { 1577 + static __u8 bytecmp[16] = {}; 1578 + int nr_bytes; 1579 + 1580 + /* For pointer types, pointer size is not defined on a per-type basis. 1581 + * On dump creation however, we store the pointer size. 1582 + */ 1583 + if (btf_kind(t) == BTF_KIND_PTR) 1584 + nr_bytes = d->ptr_sz; 1585 + else 1586 + nr_bytes = t->size; 1587 + 1588 + if (nr_bytes < 1 || nr_bytes > 16) { 1589 + pr_warn("unexpected size %d for id [%u]\n", nr_bytes, id); 1590 + return -EINVAL; 1591 + } 1592 + 1593 + if (memcmp(data, bytecmp, nr_bytes) == 0) 1594 + return -ENODATA; 1595 + return 0; 1596 + } 1597 + 1598 + static bool ptr_is_aligned(const void *data, int data_sz) 1599 + { 1600 + return ((uintptr_t)data) % data_sz == 0; 1601 + } 1602 + 1603 + static int btf_dump_int_data(struct btf_dump *d, 1604 + const struct btf_type *t, 1605 + __u32 type_id, 1606 + const void *data, 1607 + __u8 bits_offset) 1608 + { 1609 + __u8 encoding = btf_int_encoding(t); 1610 + bool sign = encoding & BTF_INT_SIGNED; 1611 + int sz = t->size; 1612 + 1613 + if (sz == 0) { 1614 + pr_warn("unexpected size %d for id [%u]\n", sz, type_id); 1615 + return -EINVAL; 1616 + } 1617 + 1618 + /* handle packed int data - accesses of integers not aligned on 1619 + * int boundaries can cause problems on some platforms. 1620 + */ 1621 + if (!ptr_is_aligned(data, sz)) 1622 + return btf_dump_bitfield_data(d, t, data, 0, 0); 1623 + 1624 + switch (sz) { 1625 + case 16: { 1626 + const __u64 *ints = data; 1627 + __u64 lsi, msi; 1628 + 1629 + /* avoid use of __int128 as some 32-bit platforms do not 1630 + * support it. 1631 + */ 1632 + #if __BYTE_ORDER == __LITTLE_ENDIAN 1633 + lsi = ints[0]; 1634 + msi = ints[1]; 1635 + #elif __BYTE_ORDER == __BIG_ENDIAN 1636 + lsi = ints[1]; 1637 + msi = ints[0]; 1638 + #else 1639 + # error "Unrecognized __BYTE_ORDER__" 1640 + #endif 1641 + if (msi == 0) 1642 + btf_dump_type_values(d, "0x%llx", (unsigned long long)lsi); 1643 + else 1644 + btf_dump_type_values(d, "0x%llx%016llx", (unsigned long long)msi, 1645 + (unsigned long long)lsi); 1646 + break; 1647 + } 1648 + case 8: 1649 + if (sign) 1650 + btf_dump_type_values(d, "%lld", *(long long *)data); 1651 + else 1652 + btf_dump_type_values(d, "%llu", *(unsigned long long *)data); 1653 + break; 1654 + case 4: 1655 + if (sign) 1656 + btf_dump_type_values(d, "%d", *(__s32 *)data); 1657 + else 1658 + btf_dump_type_values(d, "%u", *(__u32 *)data); 1659 + break; 1660 + case 2: 1661 + if (sign) 1662 + btf_dump_type_values(d, "%d", *(__s16 *)data); 1663 + else 1664 + btf_dump_type_values(d, "%u", *(__u16 *)data); 1665 + break; 1666 + case 1: 1667 + if (d->typed_dump->is_array_char) { 1668 + /* check for null terminator */ 1669 + if (d->typed_dump->is_array_terminated) 1670 + break; 1671 + if (*(char *)data == '\0') { 1672 + d->typed_dump->is_array_terminated = true; 1673 + break; 1674 + } 1675 + if (isprint(*(char *)data)) { 1676 + btf_dump_type_values(d, "'%c'", *(char *)data); 1677 + break; 1678 + } 1679 + } 1680 + if (sign) 1681 + btf_dump_type_values(d, "%d", *(__s8 *)data); 1682 + else 1683 + btf_dump_type_values(d, "%u", *(__u8 *)data); 1684 + break; 1685 + default: 1686 + pr_warn("unexpected sz %d for id [%u]\n", sz, type_id); 1687 + return -EINVAL; 1688 + } 1689 + return 0; 1690 + } 1691 + 1692 + union float_data { 1693 + long double ld; 1694 + double d; 1695 + float f; 1696 + }; 1697 + 1698 + static int btf_dump_float_data(struct btf_dump *d, 1699 + const struct btf_type *t, 1700 + __u32 type_id, 1701 + const void *data) 1702 + { 1703 + const union float_data *flp = data; 1704 + union float_data fl; 1705 + int sz = t->size; 1706 + 1707 + /* handle unaligned data; copy to local union */ 1708 + if (!ptr_is_aligned(data, sz)) { 1709 + memcpy(&fl, data, sz); 1710 + flp = &fl; 1711 + } 1712 + 1713 + switch (sz) { 1714 + case 16: 1715 + btf_dump_type_values(d, "%Lf", flp->ld); 1716 + break; 1717 + case 8: 1718 + btf_dump_type_values(d, "%lf", flp->d); 1719 + break; 1720 + case 4: 1721 + btf_dump_type_values(d, "%f", flp->f); 1722 + break; 1723 + default: 1724 + pr_warn("unexpected size %d for id [%u]\n", sz, type_id); 1725 + return -EINVAL; 1726 + } 1727 + return 0; 1728 + } 1729 + 1730 + static int btf_dump_var_data(struct btf_dump *d, 1731 + const struct btf_type *v, 1732 + __u32 id, 1733 + const void *data) 1734 + { 1735 + enum btf_func_linkage linkage = btf_var(v)->linkage; 1736 + const struct btf_type *t; 1737 + const char *l; 1738 + __u32 type_id; 1739 + 1740 + switch (linkage) { 1741 + case BTF_FUNC_STATIC: 1742 + l = "static "; 1743 + break; 1744 + case BTF_FUNC_EXTERN: 1745 + l = "extern "; 1746 + break; 1747 + case BTF_FUNC_GLOBAL: 1748 + default: 1749 + l = ""; 1750 + break; 1751 + } 1752 + 1753 + /* format of output here is [linkage] [type] [varname] = (type)value, 1754 + * for example "static int cpu_profile_flip = (int)1" 1755 + */ 1756 + btf_dump_printf(d, "%s", l); 1757 + type_id = v->type; 1758 + t = btf__type_by_id(d->btf, type_id); 1759 + btf_dump_emit_type_cast(d, type_id, false); 1760 + btf_dump_printf(d, " %s = ", btf_name_of(d, v->name_off)); 1761 + return btf_dump_dump_type_data(d, NULL, t, type_id, data, 0, 0); 1762 + } 1763 + 1764 + static int btf_dump_array_data(struct btf_dump *d, 1765 + const struct btf_type *t, 1766 + __u32 id, 1767 + const void *data) 1768 + { 1769 + const struct btf_array *array = btf_array(t); 1770 + const struct btf_type *elem_type; 1771 + __u32 i, elem_size = 0, elem_type_id; 1772 + bool is_array_member; 1773 + 1774 + elem_type_id = array->type; 1775 + elem_type = skip_mods_and_typedefs(d->btf, elem_type_id, NULL); 1776 + elem_size = btf__resolve_size(d->btf, elem_type_id); 1777 + if (elem_size <= 0) { 1778 + pr_warn("unexpected elem size %d for array type [%u]\n", elem_size, id); 1779 + return -EINVAL; 1780 + } 1781 + 1782 + if (btf_is_int(elem_type)) { 1783 + /* 1784 + * BTF_INT_CHAR encoding never seems to be set for 1785 + * char arrays, so if size is 1 and element is 1786 + * printable as a char, we'll do that. 1787 + */ 1788 + if (elem_size == 1) 1789 + d->typed_dump->is_array_char = true; 1790 + } 1791 + 1792 + /* note that we increment depth before calling btf_dump_print() below; 1793 + * this is intentional. btf_dump_data_newline() will not print a 1794 + * newline for depth 0 (since this leaves us with trailing newlines 1795 + * at the end of typed display), so depth is incremented first. 1796 + * For similar reasons, we decrement depth before showing the closing 1797 + * parenthesis. 1798 + */ 1799 + d->typed_dump->depth++; 1800 + btf_dump_printf(d, "[%s", btf_dump_data_newline(d)); 1801 + 1802 + /* may be a multidimensional array, so store current "is array member" 1803 + * status so we can restore it correctly later. 1804 + */ 1805 + is_array_member = d->typed_dump->is_array_member; 1806 + d->typed_dump->is_array_member = true; 1807 + for (i = 0; i < array->nelems; i++, data += elem_size) { 1808 + if (d->typed_dump->is_array_terminated) 1809 + break; 1810 + btf_dump_dump_type_data(d, NULL, elem_type, elem_type_id, data, 0, 0); 1811 + } 1812 + d->typed_dump->is_array_member = is_array_member; 1813 + d->typed_dump->depth--; 1814 + btf_dump_data_pfx(d); 1815 + btf_dump_type_values(d, "]"); 1816 + 1817 + return 0; 1818 + } 1819 + 1820 + static int btf_dump_struct_data(struct btf_dump *d, 1821 + const struct btf_type *t, 1822 + __u32 id, 1823 + const void *data) 1824 + { 1825 + const struct btf_member *m = btf_members(t); 1826 + __u16 n = btf_vlen(t); 1827 + int i, err; 1828 + 1829 + /* note that we increment depth before calling btf_dump_print() below; 1830 + * this is intentional. btf_dump_data_newline() will not print a 1831 + * newline for depth 0 (since this leaves us with trailing newlines 1832 + * at the end of typed display), so depth is incremented first. 1833 + * For similar reasons, we decrement depth before showing the closing 1834 + * parenthesis. 1835 + */ 1836 + d->typed_dump->depth++; 1837 + btf_dump_printf(d, "{%s", btf_dump_data_newline(d)); 1838 + 1839 + for (i = 0; i < n; i++, m++) { 1840 + const struct btf_type *mtype; 1841 + const char *mname; 1842 + __u32 moffset; 1843 + __u8 bit_sz; 1844 + 1845 + mtype = btf__type_by_id(d->btf, m->type); 1846 + mname = btf_name_of(d, m->name_off); 1847 + moffset = btf_member_bit_offset(t, i); 1848 + 1849 + bit_sz = btf_member_bitfield_size(t, i); 1850 + err = btf_dump_dump_type_data(d, mname, mtype, m->type, data + moffset / 8, 1851 + moffset % 8, bit_sz); 1852 + if (err < 0) 1853 + return err; 1854 + } 1855 + d->typed_dump->depth--; 1856 + btf_dump_data_pfx(d); 1857 + btf_dump_type_values(d, "}"); 1858 + return err; 1859 + } 1860 + 1861 + union ptr_data { 1862 + unsigned int p; 1863 + unsigned long long lp; 1864 + }; 1865 + 1866 + static int btf_dump_ptr_data(struct btf_dump *d, 1867 + const struct btf_type *t, 1868 + __u32 id, 1869 + const void *data) 1870 + { 1871 + if (ptr_is_aligned(data, d->ptr_sz) && d->ptr_sz == sizeof(void *)) { 1872 + btf_dump_type_values(d, "%p", *(void **)data); 1873 + } else { 1874 + union ptr_data pt; 1875 + 1876 + memcpy(&pt, data, d->ptr_sz); 1877 + if (d->ptr_sz == 4) 1878 + btf_dump_type_values(d, "0x%x", pt.p); 1879 + else 1880 + btf_dump_type_values(d, "0x%llx", pt.lp); 1881 + } 1882 + return 0; 1883 + } 1884 + 1885 + static int btf_dump_get_enum_value(struct btf_dump *d, 1886 + const struct btf_type *t, 1887 + const void *data, 1888 + __u32 id, 1889 + __s64 *value) 1890 + { 1891 + int sz = t->size; 1892 + 1893 + /* handle unaligned enum value */ 1894 + if (!ptr_is_aligned(data, sz)) { 1895 + __u64 val; 1896 + int err; 1897 + 1898 + err = btf_dump_get_bitfield_value(d, t, data, 0, 0, &val); 1899 + if (err) 1900 + return err; 1901 + *value = (__s64)val; 1902 + return 0; 1903 + } 1904 + 1905 + switch (t->size) { 1906 + case 8: 1907 + *value = *(__s64 *)data; 1908 + return 0; 1909 + case 4: 1910 + *value = *(__s32 *)data; 1911 + return 0; 1912 + case 2: 1913 + *value = *(__s16 *)data; 1914 + return 0; 1915 + case 1: 1916 + *value = *(__s8 *)data; 1917 + return 0; 1918 + default: 1919 + pr_warn("unexpected size %d for enum, id:[%u]\n", t->size, id); 1920 + return -EINVAL; 1921 + } 1922 + } 1923 + 1924 + static int btf_dump_enum_data(struct btf_dump *d, 1925 + const struct btf_type *t, 1926 + __u32 id, 1927 + const void *data) 1928 + { 1929 + const struct btf_enum *e; 1930 + __s64 value; 1931 + int i, err; 1932 + 1933 + err = btf_dump_get_enum_value(d, t, data, id, &value); 1934 + if (err) 1935 + return err; 1936 + 1937 + for (i = 0, e = btf_enum(t); i < btf_vlen(t); i++, e++) { 1938 + if (value != e->val) 1939 + continue; 1940 + btf_dump_type_values(d, "%s", btf_name_of(d, e->name_off)); 1941 + return 0; 1942 + } 1943 + 1944 + btf_dump_type_values(d, "%d", value); 1945 + return 0; 1946 + } 1947 + 1948 + static int btf_dump_datasec_data(struct btf_dump *d, 1949 + const struct btf_type *t, 1950 + __u32 id, 1951 + const void *data) 1952 + { 1953 + const struct btf_var_secinfo *vsi; 1954 + const struct btf_type *var; 1955 + __u32 i; 1956 + int err; 1957 + 1958 + btf_dump_type_values(d, "SEC(\"%s\") ", btf_name_of(d, t->name_off)); 1959 + 1960 + for (i = 0, vsi = btf_var_secinfos(t); i < btf_vlen(t); i++, vsi++) { 1961 + var = btf__type_by_id(d->btf, vsi->type); 1962 + err = btf_dump_dump_type_data(d, NULL, var, vsi->type, data + vsi->offset, 0, 0); 1963 + if (err < 0) 1964 + return err; 1965 + btf_dump_printf(d, ";"); 1966 + } 1967 + return 0; 1968 + } 1969 + 1970 + /* return size of type, or if base type overflows, return -E2BIG. */ 1971 + static int btf_dump_type_data_check_overflow(struct btf_dump *d, 1972 + const struct btf_type *t, 1973 + __u32 id, 1974 + const void *data, 1975 + __u8 bits_offset) 1976 + { 1977 + __s64 size = btf__resolve_size(d->btf, id); 1978 + 1979 + if (size < 0 || size >= INT_MAX) { 1980 + pr_warn("unexpected size [%zu] for id [%u]\n", 1981 + (size_t)size, id); 1982 + return -EINVAL; 1983 + } 1984 + 1985 + /* Only do overflow checking for base types; we do not want to 1986 + * avoid showing part of a struct, union or array, even if we 1987 + * do not have enough data to show the full object. By 1988 + * restricting overflow checking to base types we can ensure 1989 + * that partial display succeeds, while avoiding overflowing 1990 + * and using bogus data for display. 1991 + */ 1992 + t = skip_mods_and_typedefs(d->btf, id, NULL); 1993 + if (!t) { 1994 + pr_warn("unexpected error skipping mods/typedefs for id [%u]\n", 1995 + id); 1996 + return -EINVAL; 1997 + } 1998 + 1999 + switch (btf_kind(t)) { 2000 + case BTF_KIND_INT: 2001 + case BTF_KIND_FLOAT: 2002 + case BTF_KIND_PTR: 2003 + case BTF_KIND_ENUM: 2004 + if (data + bits_offset / 8 + size > d->typed_dump->data_end) 2005 + return -E2BIG; 2006 + break; 2007 + default: 2008 + break; 2009 + } 2010 + return (int)size; 2011 + } 2012 + 2013 + static int btf_dump_type_data_check_zero(struct btf_dump *d, 2014 + const struct btf_type *t, 2015 + __u32 id, 2016 + const void *data, 2017 + __u8 bits_offset, 2018 + __u8 bit_sz) 2019 + { 2020 + __s64 value; 2021 + int i, err; 2022 + 2023 + /* toplevel exceptions; we show zero values if 2024 + * - we ask for them (emit_zeros) 2025 + * - if we are at top-level so we see "struct empty { }" 2026 + * - or if we are an array member and the array is non-empty and 2027 + * not a char array; we don't want to be in a situation where we 2028 + * have an integer array 0, 1, 0, 1 and only show non-zero values. 2029 + * If the array contains zeroes only, or is a char array starting 2030 + * with a '\0', the array-level check_zero() will prevent showing it; 2031 + * we are concerned with determining zero value at the array member 2032 + * level here. 2033 + */ 2034 + if (d->typed_dump->emit_zeroes || d->typed_dump->depth == 0 || 2035 + (d->typed_dump->is_array_member && 2036 + !d->typed_dump->is_array_char)) 2037 + return 0; 2038 + 2039 + t = skip_mods_and_typedefs(d->btf, id, NULL); 2040 + 2041 + switch (btf_kind(t)) { 2042 + case BTF_KIND_INT: 2043 + if (bit_sz) 2044 + return btf_dump_bitfield_check_zero(d, t, data, bits_offset, bit_sz); 2045 + return btf_dump_base_type_check_zero(d, t, id, data); 2046 + case BTF_KIND_FLOAT: 2047 + case BTF_KIND_PTR: 2048 + return btf_dump_base_type_check_zero(d, t, id, data); 2049 + case BTF_KIND_ARRAY: { 2050 + const struct btf_array *array = btf_array(t); 2051 + const struct btf_type *elem_type; 2052 + __u32 elem_type_id, elem_size; 2053 + bool ischar; 2054 + 2055 + elem_type_id = array->type; 2056 + elem_size = btf__resolve_size(d->btf, elem_type_id); 2057 + elem_type = skip_mods_and_typedefs(d->btf, elem_type_id, NULL); 2058 + 2059 + ischar = btf_is_int(elem_type) && elem_size == 1; 2060 + 2061 + /* check all elements; if _any_ element is nonzero, all 2062 + * of array is displayed. We make an exception however 2063 + * for char arrays where the first element is 0; these 2064 + * are considered zeroed also, even if later elements are 2065 + * non-zero because the string is terminated. 2066 + */ 2067 + for (i = 0; i < array->nelems; i++) { 2068 + if (i == 0 && ischar && *(char *)data == 0) 2069 + return -ENODATA; 2070 + err = btf_dump_type_data_check_zero(d, elem_type, 2071 + elem_type_id, 2072 + data + 2073 + (i * elem_size), 2074 + bits_offset, 0); 2075 + if (err != -ENODATA) 2076 + return err; 2077 + } 2078 + return -ENODATA; 2079 + } 2080 + case BTF_KIND_STRUCT: 2081 + case BTF_KIND_UNION: { 2082 + const struct btf_member *m = btf_members(t); 2083 + __u16 n = btf_vlen(t); 2084 + 2085 + /* if any struct/union member is non-zero, the struct/union 2086 + * is considered non-zero and dumped. 2087 + */ 2088 + for (i = 0; i < n; i++, m++) { 2089 + const struct btf_type *mtype; 2090 + __u32 moffset; 2091 + 2092 + mtype = btf__type_by_id(d->btf, m->type); 2093 + moffset = btf_member_bit_offset(t, i); 2094 + 2095 + /* btf_int_bits() does not store member bitfield size; 2096 + * bitfield size needs to be stored here so int display 2097 + * of member can retrieve it. 2098 + */ 2099 + bit_sz = btf_member_bitfield_size(t, i); 2100 + err = btf_dump_type_data_check_zero(d, mtype, m->type, data + moffset / 8, 2101 + moffset % 8, bit_sz); 2102 + if (err != ENODATA) 2103 + return err; 2104 + } 2105 + return -ENODATA; 2106 + } 2107 + case BTF_KIND_ENUM: 2108 + err = btf_dump_get_enum_value(d, t, data, id, &value); 2109 + if (err) 2110 + return err; 2111 + if (value == 0) 2112 + return -ENODATA; 2113 + return 0; 2114 + default: 2115 + return 0; 2116 + } 2117 + } 2118 + 2119 + /* returns size of data dumped, or error. */ 2120 + static int btf_dump_dump_type_data(struct btf_dump *d, 2121 + const char *fname, 2122 + const struct btf_type *t, 2123 + __u32 id, 2124 + const void *data, 2125 + __u8 bits_offset, 2126 + __u8 bit_sz) 2127 + { 2128 + int size, err; 2129 + 2130 + size = btf_dump_type_data_check_overflow(d, t, id, data, bits_offset); 2131 + if (size < 0) 2132 + return size; 2133 + err = btf_dump_type_data_check_zero(d, t, id, data, bits_offset, bit_sz); 2134 + if (err) { 2135 + /* zeroed data is expected and not an error, so simply skip 2136 + * dumping such data. Record other errors however. 2137 + */ 2138 + if (err == -ENODATA) 2139 + return size; 2140 + return err; 2141 + } 2142 + btf_dump_data_pfx(d); 2143 + 2144 + if (!d->typed_dump->skip_names) { 2145 + if (fname && strlen(fname) > 0) 2146 + btf_dump_printf(d, ".%s = ", fname); 2147 + btf_dump_emit_type_cast(d, id, true); 2148 + } 2149 + 2150 + t = skip_mods_and_typedefs(d->btf, id, NULL); 2151 + 2152 + switch (btf_kind(t)) { 2153 + case BTF_KIND_UNKN: 2154 + case BTF_KIND_FWD: 2155 + case BTF_KIND_FUNC: 2156 + case BTF_KIND_FUNC_PROTO: 2157 + err = btf_dump_unsupported_data(d, t, id); 2158 + break; 2159 + case BTF_KIND_INT: 2160 + if (bit_sz) 2161 + err = btf_dump_bitfield_data(d, t, data, bits_offset, bit_sz); 2162 + else 2163 + err = btf_dump_int_data(d, t, id, data, bits_offset); 2164 + break; 2165 + case BTF_KIND_FLOAT: 2166 + err = btf_dump_float_data(d, t, id, data); 2167 + break; 2168 + case BTF_KIND_PTR: 2169 + err = btf_dump_ptr_data(d, t, id, data); 2170 + break; 2171 + case BTF_KIND_ARRAY: 2172 + err = btf_dump_array_data(d, t, id, data); 2173 + break; 2174 + case BTF_KIND_STRUCT: 2175 + case BTF_KIND_UNION: 2176 + err = btf_dump_struct_data(d, t, id, data); 2177 + break; 2178 + case BTF_KIND_ENUM: 2179 + /* handle bitfield and int enum values */ 2180 + if (bit_sz) { 2181 + __u64 print_num; 2182 + __s64 enum_val; 2183 + 2184 + err = btf_dump_get_bitfield_value(d, t, data, bits_offset, bit_sz, 2185 + &print_num); 2186 + if (err) 2187 + break; 2188 + enum_val = (__s64)print_num; 2189 + err = btf_dump_enum_data(d, t, id, &enum_val); 2190 + } else 2191 + err = btf_dump_enum_data(d, t, id, data); 2192 + break; 2193 + case BTF_KIND_VAR: 2194 + err = btf_dump_var_data(d, t, id, data); 2195 + break; 2196 + case BTF_KIND_DATASEC: 2197 + err = btf_dump_datasec_data(d, t, id, data); 2198 + break; 2199 + default: 2200 + pr_warn("unexpected kind [%u] for id [%u]\n", 2201 + BTF_INFO_KIND(t->info), id); 2202 + return -EINVAL; 2203 + } 2204 + if (err < 0) 2205 + return err; 2206 + return size; 2207 + } 2208 + 2209 + int btf_dump__dump_type_data(struct btf_dump *d, __u32 id, 2210 + const void *data, size_t data_sz, 2211 + const struct btf_dump_type_data_opts *opts) 2212 + { 2213 + struct btf_dump_data typed_dump = {}; 2214 + const struct btf_type *t; 2215 + int ret; 2216 + 2217 + if (!OPTS_VALID(opts, btf_dump_type_data_opts)) 2218 + return libbpf_err(-EINVAL); 2219 + 2220 + t = btf__type_by_id(d->btf, id); 2221 + if (!t) 2222 + return libbpf_err(-ENOENT); 2223 + 2224 + d->typed_dump = &typed_dump; 2225 + d->typed_dump->data_end = data + data_sz; 2226 + d->typed_dump->indent_lvl = OPTS_GET(opts, indent_level, 0); 2227 + 2228 + /* default indent string is a tab */ 2229 + if (!opts->indent_str) 2230 + d->typed_dump->indent_str[0] = '\t'; 2231 + else 2232 + strncat(d->typed_dump->indent_str, opts->indent_str, 2233 + sizeof(d->typed_dump->indent_str) - 1); 2234 + 2235 + d->typed_dump->compact = OPTS_GET(opts, compact, false); 2236 + d->typed_dump->skip_names = OPTS_GET(opts, skip_names, false); 2237 + d->typed_dump->emit_zeroes = OPTS_GET(opts, emit_zeroes, false); 2238 + 2239 + ret = btf_dump_dump_type_data(d, NULL, t, id, data, 0, 0); 2240 + 2241 + d->typed_dump = NULL; 2242 + 2243 + return libbpf_err(ret); 1505 2244 }

+115 -1348

tools/lib/bpf/libbpf.c

··· 498 498 * it at load time. 499 499 */ 500 500 struct btf *btf_vmlinux; 501 + /* Path to the custom BTF to be used for BPF CO-RE relocations as an 502 + * override for vmlinux BTF. 503 + */ 504 + char *btf_custom_path; 501 505 /* vmlinux BTF override for CO-RE relocations */ 502 506 struct btf *btf_vmlinux_override; 503 507 /* Lazily initialized kernel module BTFs */ ··· 593 589 insn->src_reg == BPF_PSEUDO_CALL && 594 590 insn->dst_reg == 0 && 595 591 insn->off == 0; 596 - } 597 - 598 - static bool is_ldimm64_insn(struct bpf_insn *insn) 599 - { 600 - return insn->code == (BPF_LD | BPF_IMM | BPF_DW); 601 592 } 602 593 603 594 static bool is_call_insn(const struct bpf_insn *insn) ··· 2644 2645 struct bpf_program *prog; 2645 2646 int i; 2646 2647 2647 - /* CO-RE relocations need kernel BTF */ 2648 - if (obj->btf_ext && obj->btf_ext->core_relo_info.len) 2648 + /* CO-RE relocations need kernel BTF, only when btf_custom_path 2649 + * is not specified 2650 + */ 2651 + if (obj->btf_ext && obj->btf_ext->core_relo_info.len && !obj->btf_custom_path) 2649 2652 return true; 2650 2653 2651 2654 /* Support for typed ksyms needs kernel BTF */ ··· 2680 2679 if (!force && !obj_needs_vmlinux_btf(obj)) 2681 2680 return 0; 2682 2681 2683 - obj->btf_vmlinux = libbpf_find_kernel_btf(); 2682 + obj->btf_vmlinux = btf__load_vmlinux_btf(); 2684 2683 err = libbpf_get_error(obj->btf_vmlinux); 2685 2684 if (err) { 2686 2685 pr_warn("Error loading vmlinux BTF: %d\n", err); ··· 2769 2768 */ 2770 2769 btf__set_fd(kern_btf, 0); 2771 2770 } else { 2772 - err = btf__load(kern_btf); 2771 + err = btf__load_into_kernel(kern_btf); 2773 2772 } 2774 2773 if (sanitize) { 2775 2774 if (!err) { ··· 4522 4521 { 4523 4522 struct bpf_create_map_attr create_attr; 4524 4523 struct bpf_map_def *def = &map->def; 4524 + int err = 0; 4525 4525 4526 4526 memset(&create_attr, 0, sizeof(create_attr)); 4527 4527 ··· 4565 4563 4566 4564 if (bpf_map_type__is_map_in_map(def->type)) { 4567 4565 if (map->inner_map) { 4568 - int err; 4569 - 4570 4566 err = bpf_object__create_map(obj, map->inner_map, true); 4571 4567 if (err) { 4572 4568 pr_warn("map '%s': failed to create inner map: %d\n", ··· 4589 4589 if (map->fd < 0 && (create_attr.btf_key_type_id || 4590 4590 create_attr.btf_value_type_id)) { 4591 4591 char *cp, errmsg[STRERR_BUFSIZE]; 4592 - int err = -errno; 4593 4592 4593 + err = -errno; 4594 4594 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); 4595 4595 pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n", 4596 4596 map->name, cp, err); ··· 4602 4602 map->fd = bpf_create_map_xattr(&create_attr); 4603 4603 } 4604 4604 4605 - if (map->fd < 0) 4606 - return -errno; 4605 + err = map->fd < 0 ? -errno : 0; 4607 4606 4608 4607 if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) { 4609 4608 if (obj->gen_loader) ··· 4611 4612 zfree(&map->inner_map); 4612 4613 } 4613 4614 4614 - return 0; 4615 + return err; 4615 4616 } 4616 4617 4617 4618 static int init_map_slots(struct bpf_object *obj, struct bpf_map *map) ··· 4657 4658 char *cp, errmsg[STRERR_BUFSIZE]; 4658 4659 unsigned int i, j; 4659 4660 int err; 4661 + bool retried; 4660 4662 4661 4663 for (i = 0; i < obj->nr_maps; i++) { 4662 4664 map = &obj->maps[i]; 4663 4665 4666 + retried = false; 4667 + retry: 4664 4668 if (map->pin_path) { 4665 4669 err = bpf_object__reuse_map(map); 4666 4670 if (err) { 4667 4671 pr_warn("map '%s': error reusing pinned map\n", 4668 4672 map->name); 4673 + goto err_out; 4674 + } 4675 + if (retried && map->fd < 0) { 4676 + pr_warn("map '%s': cannot find pinned map\n", 4677 + map->name); 4678 + err = -ENOENT; 4669 4679 goto err_out; 4670 4680 } 4671 4681 } ··· 4710 4702 if (map->pin_path && !map->pinned) { 4711 4703 err = bpf_map__pin(map, NULL); 4712 4704 if (err) { 4705 + zclose(map->fd); 4706 + if (!retried && err == -EEXIST) { 4707 + retried = true; 4708 + goto retry; 4709 + } 4713 4710 pr_warn("map '%s': failed to auto-pin at '%s': %d\n", 4714 4711 map->name, map->pin_path, err); 4715 - zclose(map->fd); 4716 4712 goto err_out; 4717 4713 } 4718 4714 } ··· 4733 4721 return err; 4734 4722 } 4735 4723 4736 - #define BPF_CORE_SPEC_MAX_LEN 64 4737 - 4738 - /* represents BPF CO-RE field or array element accessor */ 4739 - struct bpf_core_accessor { 4740 - __u32 type_id; /* struct/union type or array element type */ 4741 - __u32 idx; /* field index or array index */ 4742 - const char *name; /* field name or NULL for array accessor */ 4743 - }; 4744 - 4745 - struct bpf_core_spec { 4746 - const struct btf *btf; 4747 - /* high-level spec: named fields and array indices only */ 4748 - struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN]; 4749 - /* original unresolved (no skip_mods_or_typedefs) root type ID */ 4750 - __u32 root_type_id; 4751 - /* CO-RE relocation kind */ 4752 - enum bpf_core_relo_kind relo_kind; 4753 - /* high-level spec length */ 4754 - int len; 4755 - /* raw, low-level spec: 1-to-1 with accessor spec string */ 4756 - int raw_spec[BPF_CORE_SPEC_MAX_LEN]; 4757 - /* raw spec length */ 4758 - int raw_len; 4759 - /* field bit offset represented by spec */ 4760 - __u32 bit_offset; 4761 - }; 4762 - 4763 - static bool str_is_empty(const char *s) 4764 - { 4765 - return !s || !s[0]; 4766 - } 4767 - 4768 - static bool is_flex_arr(const struct btf *btf, 4769 - const struct bpf_core_accessor *acc, 4770 - const struct btf_array *arr) 4771 - { 4772 - const struct btf_type *t; 4773 - 4774 - /* not a flexible array, if not inside a struct or has non-zero size */ 4775 - if (!acc->name || arr->nelems > 0) 4776 - return false; 4777 - 4778 - /* has to be the last member of enclosing struct */ 4779 - t = btf__type_by_id(btf, acc->type_id); 4780 - return acc->idx == btf_vlen(t) - 1; 4781 - } 4782 - 4783 - static const char *core_relo_kind_str(enum bpf_core_relo_kind kind) 4784 - { 4785 - switch (kind) { 4786 - case BPF_FIELD_BYTE_OFFSET: return "byte_off"; 4787 - case BPF_FIELD_BYTE_SIZE: return "byte_sz"; 4788 - case BPF_FIELD_EXISTS: return "field_exists"; 4789 - case BPF_FIELD_SIGNED: return "signed"; 4790 - case BPF_FIELD_LSHIFT_U64: return "lshift_u64"; 4791 - case BPF_FIELD_RSHIFT_U64: return "rshift_u64"; 4792 - case BPF_TYPE_ID_LOCAL: return "local_type_id"; 4793 - case BPF_TYPE_ID_TARGET: return "target_type_id"; 4794 - case BPF_TYPE_EXISTS: return "type_exists"; 4795 - case BPF_TYPE_SIZE: return "type_size"; 4796 - case BPF_ENUMVAL_EXISTS: return "enumval_exists"; 4797 - case BPF_ENUMVAL_VALUE: return "enumval_value"; 4798 - default: return "unknown"; 4799 - } 4800 - } 4801 - 4802 - static bool core_relo_is_field_based(enum bpf_core_relo_kind kind) 4803 - { 4804 - switch (kind) { 4805 - case BPF_FIELD_BYTE_OFFSET: 4806 - case BPF_FIELD_BYTE_SIZE: 4807 - case BPF_FIELD_EXISTS: 4808 - case BPF_FIELD_SIGNED: 4809 - case BPF_FIELD_LSHIFT_U64: 4810 - case BPF_FIELD_RSHIFT_U64: 4811 - return true; 4812 - default: 4813 - return false; 4814 - } 4815 - } 4816 - 4817 - static bool core_relo_is_type_based(enum bpf_core_relo_kind kind) 4818 - { 4819 - switch (kind) { 4820 - case BPF_TYPE_ID_LOCAL: 4821 - case BPF_TYPE_ID_TARGET: 4822 - case BPF_TYPE_EXISTS: 4823 - case BPF_TYPE_SIZE: 4824 - return true; 4825 - default: 4826 - return false; 4827 - } 4828 - } 4829 - 4830 - static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind) 4831 - { 4832 - switch (kind) { 4833 - case BPF_ENUMVAL_EXISTS: 4834 - case BPF_ENUMVAL_VALUE: 4835 - return true; 4836 - default: 4837 - return false; 4838 - } 4839 - } 4840 - 4841 - /* 4842 - * Turn bpf_core_relo into a low- and high-level spec representation, 4843 - * validating correctness along the way, as well as calculating resulting 4844 - * field bit offset, specified by accessor string. Low-level spec captures 4845 - * every single level of nestedness, including traversing anonymous 4846 - * struct/union members. High-level one only captures semantically meaningful 4847 - * "turning points": named fields and array indicies. 4848 - * E.g., for this case: 4849 - * 4850 - * struct sample { 4851 - * int __unimportant; 4852 - * struct { 4853 - * int __1; 4854 - * int __2; 4855 - * int a[7]; 4856 - * }; 4857 - * }; 4858 - * 4859 - * struct sample *s = ...; 4860 - * 4861 - * int x = &s->a[3]; // access string = '0:1:2:3' 4862 - * 4863 - * Low-level spec has 1:1 mapping with each element of access string (it's 4864 - * just a parsed access string representation): [0, 1, 2, 3]. 4865 - * 4866 - * High-level spec will capture only 3 points: 4867 - * - intial zero-index access by pointer (&s->... is the same as &s[0]...); 4868 - * - field 'a' access (corresponds to '2' in low-level spec); 4869 - * - array element #3 access (corresponds to '3' in low-level spec). 4870 - * 4871 - * Type-based relocations (TYPE_EXISTS/TYPE_SIZE, 4872 - * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their 4873 - * spec and raw_spec are kept empty. 4874 - * 4875 - * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access 4876 - * string to specify enumerator's value index that need to be relocated. 4877 - */ 4878 - static int bpf_core_parse_spec(const struct btf *btf, 4879 - __u32 type_id, 4880 - const char *spec_str, 4881 - enum bpf_core_relo_kind relo_kind, 4882 - struct bpf_core_spec *spec) 4883 - { 4884 - int access_idx, parsed_len, i; 4885 - struct bpf_core_accessor *acc; 4886 - const struct btf_type *t; 4887 - const char *name; 4888 - __u32 id; 4889 - __s64 sz; 4890 - 4891 - if (str_is_empty(spec_str) || *spec_str == ':') 4892 - return -EINVAL; 4893 - 4894 - memset(spec, 0, sizeof(*spec)); 4895 - spec->btf = btf; 4896 - spec->root_type_id = type_id; 4897 - spec->relo_kind = relo_kind; 4898 - 4899 - /* type-based relocations don't have a field access string */ 4900 - if (core_relo_is_type_based(relo_kind)) { 4901 - if (strcmp(spec_str, "0")) 4902 - return -EINVAL; 4903 - return 0; 4904 - } 4905 - 4906 - /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */ 4907 - while (*spec_str) { 4908 - if (*spec_str == ':') 4909 - ++spec_str; 4910 - if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1) 4911 - return -EINVAL; 4912 - if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN) 4913 - return -E2BIG; 4914 - spec_str += parsed_len; 4915 - spec->raw_spec[spec->raw_len++] = access_idx; 4916 - } 4917 - 4918 - if (spec->raw_len == 0) 4919 - return -EINVAL; 4920 - 4921 - t = skip_mods_and_typedefs(btf, type_id, &id); 4922 - if (!t) 4923 - return -EINVAL; 4924 - 4925 - access_idx = spec->raw_spec[0]; 4926 - acc = &spec->spec[0]; 4927 - acc->type_id = id; 4928 - acc->idx = access_idx; 4929 - spec->len++; 4930 - 4931 - if (core_relo_is_enumval_based(relo_kind)) { 4932 - if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t)) 4933 - return -EINVAL; 4934 - 4935 - /* record enumerator name in a first accessor */ 4936 - acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off); 4937 - return 0; 4938 - } 4939 - 4940 - if (!core_relo_is_field_based(relo_kind)) 4941 - return -EINVAL; 4942 - 4943 - sz = btf__resolve_size(btf, id); 4944 - if (sz < 0) 4945 - return sz; 4946 - spec->bit_offset = access_idx * sz * 8; 4947 - 4948 - for (i = 1; i < spec->raw_len; i++) { 4949 - t = skip_mods_and_typedefs(btf, id, &id); 4950 - if (!t) 4951 - return -EINVAL; 4952 - 4953 - access_idx = spec->raw_spec[i]; 4954 - acc = &spec->spec[spec->len]; 4955 - 4956 - if (btf_is_composite(t)) { 4957 - const struct btf_member *m; 4958 - __u32 bit_offset; 4959 - 4960 - if (access_idx >= btf_vlen(t)) 4961 - return -EINVAL; 4962 - 4963 - bit_offset = btf_member_bit_offset(t, access_idx); 4964 - spec->bit_offset += bit_offset; 4965 - 4966 - m = btf_members(t) + access_idx; 4967 - if (m->name_off) { 4968 - name = btf__name_by_offset(btf, m->name_off); 4969 - if (str_is_empty(name)) 4970 - return -EINVAL; 4971 - 4972 - acc->type_id = id; 4973 - acc->idx = access_idx; 4974 - acc->name = name; 4975 - spec->len++; 4976 - } 4977 - 4978 - id = m->type; 4979 - } else if (btf_is_array(t)) { 4980 - const struct btf_array *a = btf_array(t); 4981 - bool flex; 4982 - 4983 - t = skip_mods_and_typedefs(btf, a->type, &id); 4984 - if (!t) 4985 - return -EINVAL; 4986 - 4987 - flex = is_flex_arr(btf, acc - 1, a); 4988 - if (!flex && access_idx >= a->nelems) 4989 - return -EINVAL; 4990 - 4991 - spec->spec[spec->len].type_id = id; 4992 - spec->spec[spec->len].idx = access_idx; 4993 - spec->len++; 4994 - 4995 - sz = btf__resolve_size(btf, id); 4996 - if (sz < 0) 4997 - return sz; 4998 - spec->bit_offset += access_idx * sz * 8; 4999 - } else { 5000 - pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n", 5001 - type_id, spec_str, i, id, btf_kind_str(t)); 5002 - return -EINVAL; 5003 - } 5004 - } 5005 - 5006 - return 0; 5007 - } 5008 - 5009 4724 static bool bpf_core_is_flavor_sep(const char *s) 5010 4725 { 5011 4726 /* check X___Y name pattern, where X and Y are not underscores */ ··· 4745 5006 * before last triple underscore. Struct name part after last triple 4746 5007 * underscore is ignored by BPF CO-RE relocation during relocation matching. 4747 5008 */ 4748 - static size_t bpf_core_essential_name_len(const char *name) 5009 + size_t bpf_core_essential_name_len(const char *name) 4749 5010 { 4750 5011 size_t n = strlen(name); 4751 5012 int i; ··· 4757 5018 return n; 4758 5019 } 4759 5020 4760 - struct core_cand 4761 - { 4762 - const struct btf *btf; 4763 - const struct btf_type *t; 4764 - const char *name; 4765 - __u32 id; 4766 - }; 4767 - 4768 - /* dynamically sized list of type IDs and its associated struct btf */ 4769 - struct core_cand_list { 4770 - struct core_cand *cands; 4771 - int len; 4772 - }; 4773 - 4774 - static void bpf_core_free_cands(struct core_cand_list *cands) 5021 + static void bpf_core_free_cands(struct bpf_core_cand_list *cands) 4775 5022 { 4776 5023 free(cands->cands); 4777 5024 free(cands); 4778 5025 } 4779 5026 4780 - static int bpf_core_add_cands(struct core_cand *local_cand, 5027 + static int bpf_core_add_cands(struct bpf_core_cand *local_cand, 4781 5028 size_t local_essent_len, 4782 5029 const struct btf *targ_btf, 4783 5030 const char *targ_btf_name, 4784 5031 int targ_start_id, 4785 - struct core_cand_list *cands) 5032 + struct bpf_core_cand_list *cands) 4786 5033 { 4787 - struct core_cand *new_cands, *cand; 5034 + struct bpf_core_cand *new_cands, *cand; 4788 5035 const struct btf_type *t; 4789 5036 const char *targ_name; 4790 5037 size_t targ_essent_len; ··· 4906 5181 return 0; 4907 5182 } 4908 5183 4909 - static struct core_cand_list * 5184 + static struct bpf_core_cand_list * 4910 5185 bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id) 4911 5186 { 4912 - struct core_cand local_cand = {}; 4913 - struct core_cand_list *cands; 5187 + struct bpf_core_cand local_cand = {}; 5188 + struct bpf_core_cand_list *cands; 4914 5189 const struct btf *main_btf; 4915 5190 size_t local_essent_len; 4916 5191 int err, i; ··· 4964 5239 return ERR_PTR(err); 4965 5240 } 4966 5241 4967 - /* Check two types for compatibility for the purpose of field access 4968 - * relocation. const/volatile/restrict and typedefs are skipped to ensure we 4969 - * are relocating semantically compatible entities: 4970 - * - any two STRUCTs/UNIONs are compatible and can be mixed; 4971 - * - any two FWDs are compatible, if their names match (modulo flavor suffix); 4972 - * - any two PTRs are always compatible; 4973 - * - for ENUMs, names should be the same (ignoring flavor suffix) or at 4974 - * least one of enums should be anonymous; 4975 - * - for ENUMs, check sizes, names are ignored; 4976 - * - for INT, size and signedness are ignored; 4977 - * - any two FLOATs are always compatible; 4978 - * - for ARRAY, dimensionality is ignored, element types are checked for 4979 - * compatibility recursively; 4980 - * - everything else shouldn't be ever a target of relocation. 4981 - * These rules are not set in stone and probably will be adjusted as we get 4982 - * more experience with using BPF CO-RE relocations. 4983 - */ 4984 - static int bpf_core_fields_are_compat(const struct btf *local_btf, 4985 - __u32 local_id, 4986 - const struct btf *targ_btf, 4987 - __u32 targ_id) 4988 - { 4989 - const struct btf_type *local_type, *targ_type; 4990 - 4991 - recur: 4992 - local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id); 4993 - targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); 4994 - if (!local_type || !targ_type) 4995 - return -EINVAL; 4996 - 4997 - if (btf_is_composite(local_type) && btf_is_composite(targ_type)) 4998 - return 1; 4999 - if (btf_kind(local_type) != btf_kind(targ_type)) 5000 - return 0; 5001 - 5002 - switch (btf_kind(local_type)) { 5003 - case BTF_KIND_PTR: 5004 - case BTF_KIND_FLOAT: 5005 - return 1; 5006 - case BTF_KIND_FWD: 5007 - case BTF_KIND_ENUM: { 5008 - const char *local_name, *targ_name; 5009 - size_t local_len, targ_len; 5010 - 5011 - local_name = btf__name_by_offset(local_btf, 5012 - local_type->name_off); 5013 - targ_name = btf__name_by_offset(targ_btf, targ_type->name_off); 5014 - local_len = bpf_core_essential_name_len(local_name); 5015 - targ_len = bpf_core_essential_name_len(targ_name); 5016 - /* one of them is anonymous or both w/ same flavor-less names */ 5017 - return local_len == 0 || targ_len == 0 || 5018 - (local_len == targ_len && 5019 - strncmp(local_name, targ_name, local_len) == 0); 5020 - } 5021 - case BTF_KIND_INT: 5022 - /* just reject deprecated bitfield-like integers; all other 5023 - * integers are by default compatible between each other 5024 - */ 5025 - return btf_int_offset(local_type) == 0 && 5026 - btf_int_offset(targ_type) == 0; 5027 - case BTF_KIND_ARRAY: 5028 - local_id = btf_array(local_type)->type; 5029 - targ_id = btf_array(targ_type)->type; 5030 - goto recur; 5031 - default: 5032 - pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n", 5033 - btf_kind(local_type), local_id, targ_id); 5034 - return 0; 5035 - } 5036 - } 5037 - 5038 - /* 5039 - * Given single high-level named field accessor in local type, find 5040 - * corresponding high-level accessor for a target type. Along the way, 5041 - * maintain low-level spec for target as well. Also keep updating target 5042 - * bit offset. 5043 - * 5044 - * Searching is performed through recursive exhaustive enumeration of all 5045 - * fields of a struct/union. If there are any anonymous (embedded) 5046 - * structs/unions, they are recursively searched as well. If field with 5047 - * desired name is found, check compatibility between local and target types, 5048 - * before returning result. 5049 - * 5050 - * 1 is returned, if field is found. 5051 - * 0 is returned if no compatible field is found. 5052 - * <0 is returned on error. 5053 - */ 5054 - static int bpf_core_match_member(const struct btf *local_btf, 5055 - const struct bpf_core_accessor *local_acc, 5056 - const struct btf *targ_btf, 5057 - __u32 targ_id, 5058 - struct bpf_core_spec *spec, 5059 - __u32 *next_targ_id) 5060 - { 5061 - const struct btf_type *local_type, *targ_type; 5062 - const struct btf_member *local_member, *m; 5063 - const char *local_name, *targ_name; 5064 - __u32 local_id; 5065 - int i, n, found; 5066 - 5067 - targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); 5068 - if (!targ_type) 5069 - return -EINVAL; 5070 - if (!btf_is_composite(targ_type)) 5071 - return 0; 5072 - 5073 - local_id = local_acc->type_id; 5074 - local_type = btf__type_by_id(local_btf, local_id); 5075 - local_member = btf_members(local_type) + local_acc->idx; 5076 - local_name = btf__name_by_offset(local_btf, local_member->name_off); 5077 - 5078 - n = btf_vlen(targ_type); 5079 - m = btf_members(targ_type); 5080 - for (i = 0; i < n; i++, m++) { 5081 - __u32 bit_offset; 5082 - 5083 - bit_offset = btf_member_bit_offset(targ_type, i); 5084 - 5085 - /* too deep struct/union/array nesting */ 5086 - if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN) 5087 - return -E2BIG; 5088 - 5089 - /* speculate this member will be the good one */ 5090 - spec->bit_offset += bit_offset; 5091 - spec->raw_spec[spec->raw_len++] = i; 5092 - 5093 - targ_name = btf__name_by_offset(targ_btf, m->name_off); 5094 - if (str_is_empty(targ_name)) { 5095 - /* embedded struct/union, we need to go deeper */ 5096 - found = bpf_core_match_member(local_btf, local_acc, 5097 - targ_btf, m->type, 5098 - spec, next_targ_id); 5099 - if (found) /* either found or error */ 5100 - return found; 5101 - } else if (strcmp(local_name, targ_name) == 0) { 5102 - /* matching named field */ 5103 - struct bpf_core_accessor *targ_acc; 5104 - 5105 - targ_acc = &spec->spec[spec->len++]; 5106 - targ_acc->type_id = targ_id; 5107 - targ_acc->idx = i; 5108 - targ_acc->name = targ_name; 5109 - 5110 - *next_targ_id = m->type; 5111 - found = bpf_core_fields_are_compat(local_btf, 5112 - local_member->type, 5113 - targ_btf, m->type); 5114 - if (!found) 5115 - spec->len--; /* pop accessor */ 5116 - return found; 5117 - } 5118 - /* member turned out not to be what we looked for */ 5119 - spec->bit_offset -= bit_offset; 5120 - spec->raw_len--; 5121 - } 5122 - 5123 - return 0; 5124 - } 5125 - 5126 5242 /* Check local and target types for compatibility. This check is used for 5127 5243 * type-based CO-RE relocations and follow slightly different rules than 5128 5244 * field-based relocations. This function assumes that root types were already ··· 4983 5417 * These rules are not set in stone and probably will be adjusted as we get 4984 5418 * more experience with using BPF CO-RE relocations. 4985 5419 */ 4986 - static int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, 4987 - const struct btf *targ_btf, __u32 targ_id) 5420 + int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, 5421 + const struct btf *targ_btf, __u32 targ_id) 4988 5422 { 4989 5423 const struct btf_type *local_type, *targ_type; 4990 5424 int depth = 32; /* max recursion depth */ ··· 5058 5492 } 5059 5493 } 5060 5494 5061 - /* 5062 - * Try to match local spec to a target type and, if successful, produce full 5063 - * target spec (high-level, low-level + bit offset). 5064 - */ 5065 - static int bpf_core_spec_match(struct bpf_core_spec *local_spec, 5066 - const struct btf *targ_btf, __u32 targ_id, 5067 - struct bpf_core_spec *targ_spec) 5068 - { 5069 - const struct btf_type *targ_type; 5070 - const struct bpf_core_accessor *local_acc; 5071 - struct bpf_core_accessor *targ_acc; 5072 - int i, sz, matched; 5073 - 5074 - memset(targ_spec, 0, sizeof(*targ_spec)); 5075 - targ_spec->btf = targ_btf; 5076 - targ_spec->root_type_id = targ_id; 5077 - targ_spec->relo_kind = local_spec->relo_kind; 5078 - 5079 - if (core_relo_is_type_based(local_spec->relo_kind)) { 5080 - return bpf_core_types_are_compat(local_spec->btf, 5081 - local_spec->root_type_id, 5082 - targ_btf, targ_id); 5083 - } 5084 - 5085 - local_acc = &local_spec->spec[0]; 5086 - targ_acc = &targ_spec->spec[0]; 5087 - 5088 - if (core_relo_is_enumval_based(local_spec->relo_kind)) { 5089 - size_t local_essent_len, targ_essent_len; 5090 - const struct btf_enum *e; 5091 - const char *targ_name; 5092 - 5093 - /* has to resolve to an enum */ 5094 - targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id); 5095 - if (!btf_is_enum(targ_type)) 5096 - return 0; 5097 - 5098 - local_essent_len = bpf_core_essential_name_len(local_acc->name); 5099 - 5100 - for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) { 5101 - targ_name = btf__name_by_offset(targ_spec->btf, e->name_off); 5102 - targ_essent_len = bpf_core_essential_name_len(targ_name); 5103 - if (targ_essent_len != local_essent_len) 5104 - continue; 5105 - if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) { 5106 - targ_acc->type_id = targ_id; 5107 - targ_acc->idx = i; 5108 - targ_acc->name = targ_name; 5109 - targ_spec->len++; 5110 - targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx; 5111 - targ_spec->raw_len++; 5112 - return 1; 5113 - } 5114 - } 5115 - return 0; 5116 - } 5117 - 5118 - if (!core_relo_is_field_based(local_spec->relo_kind)) 5119 - return -EINVAL; 5120 - 5121 - for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) { 5122 - targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, 5123 - &targ_id); 5124 - if (!targ_type) 5125 - return -EINVAL; 5126 - 5127 - if (local_acc->name) { 5128 - matched = bpf_core_match_member(local_spec->btf, 5129 - local_acc, 5130 - targ_btf, targ_id, 5131 - targ_spec, &targ_id); 5132 - if (matched <= 0) 5133 - return matched; 5134 - } else { 5135 - /* for i=0, targ_id is already treated as array element 5136 - * type (because it's the original struct), for others 5137 - * we should find array element type first 5138 - */ 5139 - if (i > 0) { 5140 - const struct btf_array *a; 5141 - bool flex; 5142 - 5143 - if (!btf_is_array(targ_type)) 5144 - return 0; 5145 - 5146 - a = btf_array(targ_type); 5147 - flex = is_flex_arr(targ_btf, targ_acc - 1, a); 5148 - if (!flex && local_acc->idx >= a->nelems) 5149 - return 0; 5150 - if (!skip_mods_and_typedefs(targ_btf, a->type, 5151 - &targ_id)) 5152 - return -EINVAL; 5153 - } 5154 - 5155 - /* too deep struct/union/array nesting */ 5156 - if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN) 5157 - return -E2BIG; 5158 - 5159 - targ_acc->type_id = targ_id; 5160 - targ_acc->idx = local_acc->idx; 5161 - targ_acc->name = NULL; 5162 - targ_spec->len++; 5163 - targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx; 5164 - targ_spec->raw_len++; 5165 - 5166 - sz = btf__resolve_size(targ_btf, targ_id); 5167 - if (sz < 0) 5168 - return sz; 5169 - targ_spec->bit_offset += local_acc->idx * sz * 8; 5170 - } 5171 - } 5172 - 5173 - return 1; 5174 - } 5175 - 5176 - static int bpf_core_calc_field_relo(const struct bpf_program *prog, 5177 - const struct bpf_core_relo *relo, 5178 - const struct bpf_core_spec *spec, 5179 - __u32 *val, __u32 *field_sz, __u32 *type_id, 5180 - bool *validate) 5181 - { 5182 - const struct bpf_core_accessor *acc; 5183 - const struct btf_type *t; 5184 - __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id; 5185 - const struct btf_member *m; 5186 - const struct btf_type *mt; 5187 - bool bitfield; 5188 - __s64 sz; 5189 - 5190 - *field_sz = 0; 5191 - 5192 - if (relo->kind == BPF_FIELD_EXISTS) { 5193 - *val = spec ? 1 : 0; 5194 - return 0; 5195 - } 5196 - 5197 - if (!spec) 5198 - return -EUCLEAN; /* request instruction poisoning */ 5199 - 5200 - acc = &spec->spec[spec->len - 1]; 5201 - t = btf__type_by_id(spec->btf, acc->type_id); 5202 - 5203 - /* a[n] accessor needs special handling */ 5204 - if (!acc->name) { 5205 - if (relo->kind == BPF_FIELD_BYTE_OFFSET) { 5206 - *val = spec->bit_offset / 8; 5207 - /* remember field size for load/store mem size */ 5208 - sz = btf__resolve_size(spec->btf, acc->type_id); 5209 - if (sz < 0) 5210 - return -EINVAL; 5211 - *field_sz = sz; 5212 - *type_id = acc->type_id; 5213 - } else if (relo->kind == BPF_FIELD_BYTE_SIZE) { 5214 - sz = btf__resolve_size(spec->btf, acc->type_id); 5215 - if (sz < 0) 5216 - return -EINVAL; 5217 - *val = sz; 5218 - } else { 5219 - pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n", 5220 - prog->name, relo->kind, relo->insn_off / 8); 5221 - return -EINVAL; 5222 - } 5223 - if (validate) 5224 - *validate = true; 5225 - return 0; 5226 - } 5227 - 5228 - m = btf_members(t) + acc->idx; 5229 - mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id); 5230 - bit_off = spec->bit_offset; 5231 - bit_sz = btf_member_bitfield_size(t, acc->idx); 5232 - 5233 - bitfield = bit_sz > 0; 5234 - if (bitfield) { 5235 - byte_sz = mt->size; 5236 - byte_off = bit_off / 8 / byte_sz * byte_sz; 5237 - /* figure out smallest int size necessary for bitfield load */ 5238 - while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) { 5239 - if (byte_sz >= 8) { 5240 - /* bitfield can't be read with 64-bit read */ 5241 - pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n", 5242 - prog->name, relo->kind, relo->insn_off / 8); 5243 - return -E2BIG; 5244 - } 5245 - byte_sz *= 2; 5246 - byte_off = bit_off / 8 / byte_sz * byte_sz; 5247 - } 5248 - } else { 5249 - sz = btf__resolve_size(spec->btf, field_type_id); 5250 - if (sz < 0) 5251 - return -EINVAL; 5252 - byte_sz = sz; 5253 - byte_off = spec->bit_offset / 8; 5254 - bit_sz = byte_sz * 8; 5255 - } 5256 - 5257 - /* for bitfields, all the relocatable aspects are ambiguous and we 5258 - * might disagree with compiler, so turn off validation of expected 5259 - * value, except for signedness 5260 - */ 5261 - if (validate) 5262 - *validate = !bitfield; 5263 - 5264 - switch (relo->kind) { 5265 - case BPF_FIELD_BYTE_OFFSET: 5266 - *val = byte_off; 5267 - if (!bitfield) { 5268 - *field_sz = byte_sz; 5269 - *type_id = field_type_id; 5270 - } 5271 - break; 5272 - case BPF_FIELD_BYTE_SIZE: 5273 - *val = byte_sz; 5274 - break; 5275 - case BPF_FIELD_SIGNED: 5276 - /* enums will be assumed unsigned */ 5277 - *val = btf_is_enum(mt) || 5278 - (btf_int_encoding(mt) & BTF_INT_SIGNED); 5279 - if (validate) 5280 - *validate = true; /* signedness is never ambiguous */ 5281 - break; 5282 - case BPF_FIELD_LSHIFT_U64: 5283 - #if __BYTE_ORDER == __LITTLE_ENDIAN 5284 - *val = 64 - (bit_off + bit_sz - byte_off * 8); 5285 - #else 5286 - *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8); 5287 - #endif 5288 - break; 5289 - case BPF_FIELD_RSHIFT_U64: 5290 - *val = 64 - bit_sz; 5291 - if (validate) 5292 - *validate = true; /* right shift is never ambiguous */ 5293 - break; 5294 - case BPF_FIELD_EXISTS: 5295 - default: 5296 - return -EOPNOTSUPP; 5297 - } 5298 - 5299 - return 0; 5300 - } 5301 - 5302 - static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo, 5303 - const struct bpf_core_spec *spec, 5304 - __u32 *val) 5305 - { 5306 - __s64 sz; 5307 - 5308 - /* type-based relos return zero when target type is not found */ 5309 - if (!spec) { 5310 - *val = 0; 5311 - return 0; 5312 - } 5313 - 5314 - switch (relo->kind) { 5315 - case BPF_TYPE_ID_TARGET: 5316 - *val = spec->root_type_id; 5317 - break; 5318 - case BPF_TYPE_EXISTS: 5319 - *val = 1; 5320 - break; 5321 - case BPF_TYPE_SIZE: 5322 - sz = btf__resolve_size(spec->btf, spec->root_type_id); 5323 - if (sz < 0) 5324 - return -EINVAL; 5325 - *val = sz; 5326 - break; 5327 - case BPF_TYPE_ID_LOCAL: 5328 - /* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */ 5329 - default: 5330 - return -EOPNOTSUPP; 5331 - } 5332 - 5333 - return 0; 5334 - } 5335 - 5336 - static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo, 5337 - const struct bpf_core_spec *spec, 5338 - __u32 *val) 5339 - { 5340 - const struct btf_type *t; 5341 - const struct btf_enum *e; 5342 - 5343 - switch (relo->kind) { 5344 - case BPF_ENUMVAL_EXISTS: 5345 - *val = spec ? 1 : 0; 5346 - break; 5347 - case BPF_ENUMVAL_VALUE: 5348 - if (!spec) 5349 - return -EUCLEAN; /* request instruction poisoning */ 5350 - t = btf__type_by_id(spec->btf, spec->spec[0].type_id); 5351 - e = btf_enum(t) + spec->spec[0].idx; 5352 - *val = e->val; 5353 - break; 5354 - default: 5355 - return -EOPNOTSUPP; 5356 - } 5357 - 5358 - return 0; 5359 - } 5360 - 5361 - struct bpf_core_relo_res 5362 - { 5363 - /* expected value in the instruction, unless validate == false */ 5364 - __u32 orig_val; 5365 - /* new value that needs to be patched up to */ 5366 - __u32 new_val; 5367 - /* relocation unsuccessful, poison instruction, but don't fail load */ 5368 - bool poison; 5369 - /* some relocations can't be validated against orig_val */ 5370 - bool validate; 5371 - /* for field byte offset relocations or the forms: 5372 - * *(T *)(rX + <off>) = rY 5373 - * rX = *(T *)(rY + <off>), 5374 - * we remember original and resolved field size to adjust direct 5375 - * memory loads of pointers and integers; this is necessary for 32-bit 5376 - * host kernel architectures, but also allows to automatically 5377 - * relocate fields that were resized from, e.g., u32 to u64, etc. 5378 - */ 5379 - bool fail_memsz_adjust; 5380 - __u32 orig_sz; 5381 - __u32 orig_type_id; 5382 - __u32 new_sz; 5383 - __u32 new_type_id; 5384 - }; 5385 - 5386 - /* Calculate original and target relocation values, given local and target 5387 - * specs and relocation kind. These values are calculated for each candidate. 5388 - * If there are multiple candidates, resulting values should all be consistent 5389 - * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity. 5390 - * If instruction has to be poisoned, *poison will be set to true. 5391 - */ 5392 - static int bpf_core_calc_relo(const struct bpf_program *prog, 5393 - const struct bpf_core_relo *relo, 5394 - int relo_idx, 5395 - const struct bpf_core_spec *local_spec, 5396 - const struct bpf_core_spec *targ_spec, 5397 - struct bpf_core_relo_res *res) 5398 - { 5399 - int err = -EOPNOTSUPP; 5400 - 5401 - res->orig_val = 0; 5402 - res->new_val = 0; 5403 - res->poison = false; 5404 - res->validate = true; 5405 - res->fail_memsz_adjust = false; 5406 - res->orig_sz = res->new_sz = 0; 5407 - res->orig_type_id = res->new_type_id = 0; 5408 - 5409 - if (core_relo_is_field_based(relo->kind)) { 5410 - err = bpf_core_calc_field_relo(prog, relo, local_spec, 5411 - &res->orig_val, &res->orig_sz, 5412 - &res->orig_type_id, &res->validate); 5413 - err = err ?: bpf_core_calc_field_relo(prog, relo, targ_spec, 5414 - &res->new_val, &res->new_sz, 5415 - &res->new_type_id, NULL); 5416 - if (err) 5417 - goto done; 5418 - /* Validate if it's safe to adjust load/store memory size. 5419 - * Adjustments are performed only if original and new memory 5420 - * sizes differ. 5421 - */ 5422 - res->fail_memsz_adjust = false; 5423 - if (res->orig_sz != res->new_sz) { 5424 - const struct btf_type *orig_t, *new_t; 5425 - 5426 - orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id); 5427 - new_t = btf__type_by_id(targ_spec->btf, res->new_type_id); 5428 - 5429 - /* There are two use cases in which it's safe to 5430 - * adjust load/store's mem size: 5431 - * - reading a 32-bit kernel pointer, while on BPF 5432 - * size pointers are always 64-bit; in this case 5433 - * it's safe to "downsize" instruction size due to 5434 - * pointer being treated as unsigned integer with 5435 - * zero-extended upper 32-bits; 5436 - * - reading unsigned integers, again due to 5437 - * zero-extension is preserving the value correctly. 5438 - * 5439 - * In all other cases it's incorrect to attempt to 5440 - * load/store field because read value will be 5441 - * incorrect, so we poison relocated instruction. 5442 - */ 5443 - if (btf_is_ptr(orig_t) && btf_is_ptr(new_t)) 5444 - goto done; 5445 - if (btf_is_int(orig_t) && btf_is_int(new_t) && 5446 - btf_int_encoding(orig_t) != BTF_INT_SIGNED && 5447 - btf_int_encoding(new_t) != BTF_INT_SIGNED) 5448 - goto done; 5449 - 5450 - /* mark as invalid mem size adjustment, but this will 5451 - * only be checked for LDX/STX/ST insns 5452 - */ 5453 - res->fail_memsz_adjust = true; 5454 - } 5455 - } else if (core_relo_is_type_based(relo->kind)) { 5456 - err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val); 5457 - err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val); 5458 - } else if (core_relo_is_enumval_based(relo->kind)) { 5459 - err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val); 5460 - err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val); 5461 - } 5462 - 5463 - done: 5464 - if (err == -EUCLEAN) { 5465 - /* EUCLEAN is used to signal instruction poisoning request */ 5466 - res->poison = true; 5467 - err = 0; 5468 - } else if (err == -EOPNOTSUPP) { 5469 - /* EOPNOTSUPP means unknown/unsupported relocation */ 5470 - pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n", 5471 - prog->name, relo_idx, core_relo_kind_str(relo->kind), 5472 - relo->kind, relo->insn_off / 8); 5473 - } 5474 - 5475 - return err; 5476 - } 5477 - 5478 - /* 5479 - * Turn instruction for which CO_RE relocation failed into invalid one with 5480 - * distinct signature. 5481 - */ 5482 - static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx, 5483 - int insn_idx, struct bpf_insn *insn) 5484 - { 5485 - pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n", 5486 - prog->name, relo_idx, insn_idx); 5487 - insn->code = BPF_JMP | BPF_CALL; 5488 - insn->dst_reg = 0; 5489 - insn->src_reg = 0; 5490 - insn->off = 0; 5491 - /* if this instruction is reachable (not a dead code), 5492 - * verifier will complain with the following message: 5493 - * invalid func unknown#195896080 5494 - */ 5495 - insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */ 5496 - } 5497 - 5498 - static int insn_bpf_size_to_bytes(struct bpf_insn *insn) 5499 - { 5500 - switch (BPF_SIZE(insn->code)) { 5501 - case BPF_DW: return 8; 5502 - case BPF_W: return 4; 5503 - case BPF_H: return 2; 5504 - case BPF_B: return 1; 5505 - default: return -1; 5506 - } 5507 - } 5508 - 5509 - static int insn_bytes_to_bpf_size(__u32 sz) 5510 - { 5511 - switch (sz) { 5512 - case 8: return BPF_DW; 5513 - case 4: return BPF_W; 5514 - case 2: return BPF_H; 5515 - case 1: return BPF_B; 5516 - default: return -1; 5517 - } 5518 - } 5519 - 5520 - /* 5521 - * Patch relocatable BPF instruction. 5522 - * 5523 - * Patched value is determined by relocation kind and target specification. 5524 - * For existence relocations target spec will be NULL if field/type is not found. 5525 - * Expected insn->imm value is determined using relocation kind and local 5526 - * spec, and is checked before patching instruction. If actual insn->imm value 5527 - * is wrong, bail out with error. 5528 - * 5529 - * Currently supported classes of BPF instruction are: 5530 - * 1. rX = <imm> (assignment with immediate operand); 5531 - * 2. rX += <imm> (arithmetic operations with immediate operand); 5532 - * 3. rX = <imm64> (load with 64-bit immediate value); 5533 - * 4. rX = *(T *)(rY + <off>), where T is one of {u8, u16, u32, u64}; 5534 - * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64}; 5535 - * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}. 5536 - */ 5537 - static int bpf_core_patch_insn(struct bpf_program *prog, 5538 - const struct bpf_core_relo *relo, 5539 - int relo_idx, 5540 - const struct bpf_core_relo_res *res) 5541 - { 5542 - __u32 orig_val, new_val; 5543 - struct bpf_insn *insn; 5544 - int insn_idx; 5545 - __u8 class; 5546 - 5547 - if (relo->insn_off % BPF_INSN_SZ) 5548 - return -EINVAL; 5549 - insn_idx = relo->insn_off / BPF_INSN_SZ; 5550 - /* adjust insn_idx from section frame of reference to the local 5551 - * program's frame of reference; (sub-)program code is not yet 5552 - * relocated, so it's enough to just subtract in-section offset 5553 - */ 5554 - insn_idx = insn_idx - prog->sec_insn_off; 5555 - insn = &prog->insns[insn_idx]; 5556 - class = BPF_CLASS(insn->code); 5557 - 5558 - if (res->poison) { 5559 - poison: 5560 - /* poison second part of ldimm64 to avoid confusing error from 5561 - * verifier about "unknown opcode 00" 5562 - */ 5563 - if (is_ldimm64_insn(insn)) 5564 - bpf_core_poison_insn(prog, relo_idx, insn_idx + 1, insn + 1); 5565 - bpf_core_poison_insn(prog, relo_idx, insn_idx, insn); 5566 - return 0; 5567 - } 5568 - 5569 - orig_val = res->orig_val; 5570 - new_val = res->new_val; 5571 - 5572 - switch (class) { 5573 - case BPF_ALU: 5574 - case BPF_ALU64: 5575 - if (BPF_SRC(insn->code) != BPF_K) 5576 - return -EINVAL; 5577 - if (res->validate && insn->imm != orig_val) { 5578 - pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n", 5579 - prog->name, relo_idx, 5580 - insn_idx, insn->imm, orig_val, new_val); 5581 - return -EINVAL; 5582 - } 5583 - orig_val = insn->imm; 5584 - insn->imm = new_val; 5585 - pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n", 5586 - prog->name, relo_idx, insn_idx, 5587 - orig_val, new_val); 5588 - break; 5589 - case BPF_LDX: 5590 - case BPF_ST: 5591 - case BPF_STX: 5592 - if (res->validate && insn->off != orig_val) { 5593 - pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n", 5594 - prog->name, relo_idx, insn_idx, insn->off, orig_val, new_val); 5595 - return -EINVAL; 5596 - } 5597 - if (new_val > SHRT_MAX) { 5598 - pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n", 5599 - prog->name, relo_idx, insn_idx, new_val); 5600 - return -ERANGE; 5601 - } 5602 - if (res->fail_memsz_adjust) { 5603 - pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. " 5604 - "Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n", 5605 - prog->name, relo_idx, insn_idx); 5606 - goto poison; 5607 - } 5608 - 5609 - orig_val = insn->off; 5610 - insn->off = new_val; 5611 - pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n", 5612 - prog->name, relo_idx, insn_idx, orig_val, new_val); 5613 - 5614 - if (res->new_sz != res->orig_sz) { 5615 - int insn_bytes_sz, insn_bpf_sz; 5616 - 5617 - insn_bytes_sz = insn_bpf_size_to_bytes(insn); 5618 - if (insn_bytes_sz != res->orig_sz) { 5619 - pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n", 5620 - prog->name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz); 5621 - return -EINVAL; 5622 - } 5623 - 5624 - insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz); 5625 - if (insn_bpf_sz < 0) { 5626 - pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n", 5627 - prog->name, relo_idx, insn_idx, res->new_sz); 5628 - return -EINVAL; 5629 - } 5630 - 5631 - insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code); 5632 - pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n", 5633 - prog->name, relo_idx, insn_idx, res->orig_sz, res->new_sz); 5634 - } 5635 - break; 5636 - case BPF_LD: { 5637 - __u64 imm; 5638 - 5639 - if (!is_ldimm64_insn(insn) || 5640 - insn[0].src_reg != 0 || insn[0].off != 0 || 5641 - insn_idx + 1 >= prog->insns_cnt || 5642 - insn[1].code != 0 || insn[1].dst_reg != 0 || 5643 - insn[1].src_reg != 0 || insn[1].off != 0) { 5644 - pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n", 5645 - prog->name, relo_idx, insn_idx); 5646 - return -EINVAL; 5647 - } 5648 - 5649 - imm = insn[0].imm + ((__u64)insn[1].imm << 32); 5650 - if (res->validate && imm != orig_val) { 5651 - pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n", 5652 - prog->name, relo_idx, 5653 - insn_idx, (unsigned long long)imm, 5654 - orig_val, new_val); 5655 - return -EINVAL; 5656 - } 5657 - 5658 - insn[0].imm = new_val; 5659 - insn[1].imm = 0; /* currently only 32-bit values are supported */ 5660 - pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n", 5661 - prog->name, relo_idx, insn_idx, 5662 - (unsigned long long)imm, new_val); 5663 - break; 5664 - } 5665 - default: 5666 - pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n", 5667 - prog->name, relo_idx, insn_idx, insn->code, 5668 - insn->src_reg, insn->dst_reg, insn->off, insn->imm); 5669 - return -EINVAL; 5670 - } 5671 - 5672 - return 0; 5673 - } 5674 - 5675 - /* Output spec definition in the format: 5676 - * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>, 5677 - * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b 5678 - */ 5679 - static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec) 5680 - { 5681 - const struct btf_type *t; 5682 - const struct btf_enum *e; 5683 - const char *s; 5684 - __u32 type_id; 5685 - int i; 5686 - 5687 - type_id = spec->root_type_id; 5688 - t = btf__type_by_id(spec->btf, type_id); 5689 - s = btf__name_by_offset(spec->btf, t->name_off); 5690 - 5691 - libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s); 5692 - 5693 - if (core_relo_is_type_based(spec->relo_kind)) 5694 - return; 5695 - 5696 - if (core_relo_is_enumval_based(spec->relo_kind)) { 5697 - t = skip_mods_and_typedefs(spec->btf, type_id, NULL); 5698 - e = btf_enum(t) + spec->raw_spec[0]; 5699 - s = btf__name_by_offset(spec->btf, e->name_off); 5700 - 5701 - libbpf_print(level, "::%s = %u", s, e->val); 5702 - return; 5703 - } 5704 - 5705 - if (core_relo_is_field_based(spec->relo_kind)) { 5706 - for (i = 0; i < spec->len; i++) { 5707 - if (spec->spec[i].name) 5708 - libbpf_print(level, ".%s", spec->spec[i].name); 5709 - else if (i > 0 || spec->spec[i].idx > 0) 5710 - libbpf_print(level, "[%u]", spec->spec[i].idx); 5711 - } 5712 - 5713 - libbpf_print(level, " ("); 5714 - for (i = 0; i < spec->raw_len; i++) 5715 - libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]); 5716 - 5717 - if (spec->bit_offset % 8) 5718 - libbpf_print(level, " @ offset %u.%u)", 5719 - spec->bit_offset / 8, spec->bit_offset % 8); 5720 - else 5721 - libbpf_print(level, " @ offset %u)", spec->bit_offset / 8); 5722 - return; 5723 - } 5724 - } 5725 - 5726 5495 static size_t bpf_core_hash_fn(const void *key, void *ctx) 5727 5496 { 5728 5497 return (size_t)key; ··· 5073 6172 return (void *)(uintptr_t)x; 5074 6173 } 5075 6174 5076 - /* 5077 - * CO-RE relocate single instruction. 5078 - * 5079 - * The outline and important points of the algorithm: 5080 - * 1. For given local type, find corresponding candidate target types. 5081 - * Candidate type is a type with the same "essential" name, ignoring 5082 - * everything after last triple underscore (___). E.g., `sample`, 5083 - * `sample___flavor_one`, `sample___flavor_another_one`, are all candidates 5084 - * for each other. Names with triple underscore are referred to as 5085 - * "flavors" and are useful, among other things, to allow to 5086 - * specify/support incompatible variations of the same kernel struct, which 5087 - * might differ between different kernel versions and/or build 5088 - * configurations. 5089 - * 5090 - * N.B. Struct "flavors" could be generated by bpftool's BTF-to-C 5091 - * converter, when deduplicated BTF of a kernel still contains more than 5092 - * one different types with the same name. In that case, ___2, ___3, etc 5093 - * are appended starting from second name conflict. But start flavors are 5094 - * also useful to be defined "locally", in BPF program, to extract same 5095 - * data from incompatible changes between different kernel 5096 - * versions/configurations. For instance, to handle field renames between 5097 - * kernel versions, one can use two flavors of the struct name with the 5098 - * same common name and use conditional relocations to extract that field, 5099 - * depending on target kernel version. 5100 - * 2. For each candidate type, try to match local specification to this 5101 - * candidate target type. Matching involves finding corresponding 5102 - * high-level spec accessors, meaning that all named fields should match, 5103 - * as well as all array accesses should be within the actual bounds. Also, 5104 - * types should be compatible (see bpf_core_fields_are_compat for details). 5105 - * 3. It is supported and expected that there might be multiple flavors 5106 - * matching the spec. As long as all the specs resolve to the same set of 5107 - * offsets across all candidates, there is no error. If there is any 5108 - * ambiguity, CO-RE relocation will fail. This is necessary to accomodate 5109 - * imprefection of BTF deduplication, which can cause slight duplication of 5110 - * the same BTF type, if some directly or indirectly referenced (by 5111 - * pointer) type gets resolved to different actual types in different 5112 - * object files. If such situation occurs, deduplicated BTF will end up 5113 - * with two (or more) structurally identical types, which differ only in 5114 - * types they refer to through pointer. This should be OK in most cases and 5115 - * is not an error. 5116 - * 4. Candidate types search is performed by linearly scanning through all 5117 - * types in target BTF. It is anticipated that this is overall more 5118 - * efficient memory-wise and not significantly worse (if not better) 5119 - * CPU-wise compared to prebuilding a map from all local type names to 5120 - * a list of candidate type names. It's also sped up by caching resolved 5121 - * list of matching candidates per each local "root" type ID, that has at 5122 - * least one bpf_core_relo associated with it. This list is shared 5123 - * between multiple relocations for the same type ID and is updated as some 5124 - * of the candidates are pruned due to structural incompatibility. 5125 - */ 5126 6175 static int bpf_core_apply_relo(struct bpf_program *prog, 5127 6176 const struct bpf_core_relo *relo, 5128 6177 int relo_idx, 5129 6178 const struct btf *local_btf, 5130 6179 struct hashmap *cand_cache) 5131 6180 { 5132 - struct bpf_core_spec local_spec, cand_spec, targ_spec = {}; 5133 6181 const void *type_key = u32_as_hash_key(relo->type_id); 5134 - struct bpf_core_relo_res cand_res, targ_res; 6182 + struct bpf_core_cand_list *cands = NULL; 6183 + const char *prog_name = prog->name; 5135 6184 const struct btf_type *local_type; 5136 6185 const char *local_name; 5137 - struct core_cand_list *cands = NULL; 5138 - __u32 local_id; 5139 - const char *spec_str; 5140 - int i, j, err; 6186 + __u32 local_id = relo->type_id; 6187 + struct bpf_insn *insn; 6188 + int insn_idx, err; 5141 6189 5142 - local_id = relo->type_id; 6190 + if (relo->insn_off % BPF_INSN_SZ) 6191 + return -EINVAL; 6192 + insn_idx = relo->insn_off / BPF_INSN_SZ; 6193 + /* adjust insn_idx from section frame of reference to the local 6194 + * program's frame of reference; (sub-)program code is not yet 6195 + * relocated, so it's enough to just subtract in-section offset 6196 + */ 6197 + insn_idx = insn_idx - prog->sec_insn_off; 6198 + if (insn_idx > prog->insns_cnt) 6199 + return -EINVAL; 6200 + insn = &prog->insns[insn_idx]; 6201 + 5143 6202 local_type = btf__type_by_id(local_btf, local_id); 5144 6203 if (!local_type) 5145 6204 return -EINVAL; ··· 5108 6247 if (!local_name) 5109 6248 return -EINVAL; 5110 6249 5111 - spec_str = btf__name_by_offset(local_btf, relo->access_str_off); 5112 - if (str_is_empty(spec_str)) 5113 - return -EINVAL; 5114 - 5115 6250 if (prog->obj->gen_loader) { 5116 - pr_warn("// TODO core_relo: prog %td insn[%d] %s %s kind %d\n", 6251 + pr_warn("// TODO core_relo: prog %td insn[%d] %s kind %d\n", 5117 6252 prog - prog->obj->programs, relo->insn_off / 8, 5118 - local_name, spec_str, relo->kind); 6253 + local_name, relo->kind); 5119 6254 return -ENOTSUP; 5120 6255 } 5121 - err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec); 5122 - if (err) { 5123 - pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n", 5124 - prog->name, relo_idx, local_id, btf_kind_str(local_type), 5125 - str_is_empty(local_name) ? "<anon>" : local_name, 5126 - spec_str, err); 5127 - return -EINVAL; 5128 - } 5129 6256 5130 - pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog->name, 5131 - relo_idx, core_relo_kind_str(relo->kind), relo->kind); 5132 - bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec); 5133 - libbpf_print(LIBBPF_DEBUG, "\n"); 5134 - 5135 - /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */ 5136 - if (relo->kind == BPF_TYPE_ID_LOCAL) { 5137 - targ_res.validate = true; 5138 - targ_res.poison = false; 5139 - targ_res.orig_val = local_spec.root_type_id; 5140 - targ_res.new_val = local_spec.root_type_id; 5141 - goto patch_insn; 5142 - } 5143 - 5144 - /* libbpf doesn't support candidate search for anonymous types */ 5145 - if (str_is_empty(spec_str)) { 5146 - pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n", 5147 - prog->name, relo_idx, core_relo_kind_str(relo->kind), relo->kind); 5148 - return -EOPNOTSUPP; 5149 - } 5150 - 5151 - if (!hashmap__find(cand_cache, type_key, (void **)&cands)) { 6257 + if (relo->kind != BPF_TYPE_ID_LOCAL && 6258 + !hashmap__find(cand_cache, type_key, (void **)&cands)) { 5152 6259 cands = bpf_core_find_cands(prog->obj, local_btf, local_id); 5153 6260 if (IS_ERR(cands)) { 5154 6261 pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n", 5155 - prog->name, relo_idx, local_id, btf_kind_str(local_type), 6262 + prog_name, relo_idx, local_id, btf_kind_str(local_type), 5156 6263 local_name, PTR_ERR(cands)); 5157 6264 return PTR_ERR(cands); 5158 6265 } ··· 5131 6302 } 5132 6303 } 5133 6304 5134 - for (i = 0, j = 0; i < cands->len; i++) { 5135 - err = bpf_core_spec_match(&local_spec, cands->cands[i].btf, 5136 - cands->cands[i].id, &cand_spec); 5137 - if (err < 0) { 5138 - pr_warn("prog '%s': relo #%d: error matching candidate #%d ", 5139 - prog->name, relo_idx, i); 5140 - bpf_core_dump_spec(LIBBPF_WARN, &cand_spec); 5141 - libbpf_print(LIBBPF_WARN, ": %d\n", err); 5142 - return err; 5143 - } 5144 - 5145 - pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog->name, 5146 - relo_idx, err == 0 ? "non-matching" : "matching", i); 5147 - bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec); 5148 - libbpf_print(LIBBPF_DEBUG, "\n"); 5149 - 5150 - if (err == 0) 5151 - continue; 5152 - 5153 - err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, &cand_spec, &cand_res); 5154 - if (err) 5155 - return err; 5156 - 5157 - if (j == 0) { 5158 - targ_res = cand_res; 5159 - targ_spec = cand_spec; 5160 - } else if (cand_spec.bit_offset != targ_spec.bit_offset) { 5161 - /* if there are many field relo candidates, they 5162 - * should all resolve to the same bit offset 5163 - */ 5164 - pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n", 5165 - prog->name, relo_idx, cand_spec.bit_offset, 5166 - targ_spec.bit_offset); 5167 - return -EINVAL; 5168 - } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) { 5169 - /* all candidates should result in the same relocation 5170 - * decision and value, otherwise it's dangerous to 5171 - * proceed due to ambiguity 5172 - */ 5173 - pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n", 5174 - prog->name, relo_idx, 5175 - cand_res.poison ? "failure" : "success", cand_res.new_val, 5176 - targ_res.poison ? "failure" : "success", targ_res.new_val); 5177 - return -EINVAL; 5178 - } 5179 - 5180 - cands->cands[j++] = cands->cands[i]; 5181 - } 5182 - 5183 - /* 5184 - * For BPF_FIELD_EXISTS relo or when used BPF program has field 5185 - * existence checks or kernel version/config checks, it's expected 5186 - * that we might not find any candidates. In this case, if field 5187 - * wasn't found in any candidate, the list of candidates shouldn't 5188 - * change at all, we'll just handle relocating appropriately, 5189 - * depending on relo's kind. 5190 - */ 5191 - if (j > 0) 5192 - cands->len = j; 5193 - 5194 - /* 5195 - * If no candidates were found, it might be both a programmer error, 5196 - * as well as expected case, depending whether instruction w/ 5197 - * relocation is guarded in some way that makes it unreachable (dead 5198 - * code) if relocation can't be resolved. This is handled in 5199 - * bpf_core_patch_insn() uniformly by replacing that instruction with 5200 - * BPF helper call insn (using invalid helper ID). If that instruction 5201 - * is indeed unreachable, then it will be ignored and eliminated by 5202 - * verifier. If it was an error, then verifier will complain and point 5203 - * to a specific instruction number in its log. 5204 - */ 5205 - if (j == 0) { 5206 - pr_debug("prog '%s': relo #%d: no matching targets found\n", 5207 - prog->name, relo_idx); 5208 - 5209 - /* calculate single target relo result explicitly */ 5210 - err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, NULL, &targ_res); 5211 - if (err) 5212 - return err; 5213 - } 5214 - 5215 - patch_insn: 5216 - /* bpf_core_patch_insn() should know how to handle missing targ_spec */ 5217 - err = bpf_core_patch_insn(prog, relo, relo_idx, &targ_res); 5218 - if (err) { 5219 - pr_warn("prog '%s': relo #%d: failed to patch insn #%zu: %d\n", 5220 - prog->name, relo_idx, relo->insn_off / BPF_INSN_SZ, err); 5221 - return -EINVAL; 5222 - } 5223 - 5224 - return 0; 6305 + return bpf_core_apply_relo_insn(prog_name, insn, insn_idx, relo, relo_idx, local_btf, cands); 5225 6306 } 5226 6307 5227 6308 static int ··· 5971 7232 5972 7233 for (i = 0; i < obj->nr_programs; i++) { 5973 7234 struct bpf_program *p = &obj->programs[i]; 5974 - 7235 + 5975 7236 if (!p->nr_reloc) 5976 7237 continue; 5977 7238 ··· 6335 7596 __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, 6336 7597 const struct bpf_object_open_opts *opts) 6337 7598 { 6338 - const char *obj_name, *kconfig; 7599 + const char *obj_name, *kconfig, *btf_tmp_path; 6339 7600 struct bpf_program *prog; 6340 7601 struct bpf_object *obj; 6341 7602 char tmp_name[64]; ··· 6366 7627 if (IS_ERR(obj)) 6367 7628 return obj; 6368 7629 7630 + btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL); 7631 + if (btf_tmp_path) { 7632 + if (strlen(btf_tmp_path) >= PATH_MAX) { 7633 + err = -ENAMETOOLONG; 7634 + goto out; 7635 + } 7636 + obj->btf_custom_path = strdup(btf_tmp_path); 7637 + if (!obj->btf_custom_path) { 7638 + err = -ENOMEM; 7639 + goto out; 7640 + } 7641 + } 7642 + 6369 7643 kconfig = OPTS_GET(opts, kconfig, NULL); 6370 7644 if (kconfig) { 6371 7645 obj->kconfig = strdup(kconfig); 6372 - if (!obj->kconfig) 6373 - return ERR_PTR(-ENOMEM); 7646 + if (!obj->kconfig) { 7647 + err = -ENOMEM; 7648 + goto out; 7649 + } 6374 7650 } 6375 7651 6376 7652 err = bpf_object__elf_init(obj); ··· 6851 8097 err = err ? : bpf_object__sanitize_maps(obj); 6852 8098 err = err ? : bpf_object__init_kern_struct_ops_maps(obj); 6853 8099 err = err ? : bpf_object__create_maps(obj); 6854 - err = err ? : bpf_object__relocate(obj, attr->target_btf_path); 8100 + err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : attr->target_btf_path); 6855 8101 err = err ? : bpf_object__load_progs(obj, attr->log_level); 6856 8102 6857 8103 if (obj->gen_loader) { ··· 7246 8492 return map->pin_path; 7247 8493 } 7248 8494 8495 + const char *bpf_map__pin_path(const struct bpf_map *map) 8496 + { 8497 + return map->pin_path; 8498 + } 8499 + 7249 8500 bool bpf_map__is_pinned(const struct bpf_map *map) 7250 8501 { 7251 8502 return map->pinned; ··· 7503 8744 for (i = 0; i < obj->nr_maps; i++) 7504 8745 bpf_map__destroy(&obj->maps[i]); 7505 8746 8747 + zfree(&obj->btf_custom_path); 7506 8748 zfree(&obj->kconfig); 7507 8749 zfree(&obj->externs); 7508 8750 obj->nr_extern = 0; ··· 8273 9513 ret = snprintf(btf_type_name, sizeof(btf_type_name), 8274 9514 "%s%s", prefix, name); 8275 9515 /* snprintf returns the number of characters written excluding the 8276 - * the terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it 9516 + * terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it 8277 9517 * indicates truncation. 8278 9518 */ 8279 9519 if (ret < 0 || ret >= sizeof(btf_type_name)) ··· 8297 9537 struct btf *btf; 8298 9538 int err; 8299 9539 8300 - btf = libbpf_find_kernel_btf(); 9540 + btf = btf__load_vmlinux_btf(); 8301 9541 err = libbpf_get_error(btf); 8302 9542 if (err) { 8303 9543 pr_warn("vmlinux BTF is not found\n"); ··· 8316 9556 { 8317 9557 struct bpf_prog_info_linear *info_linear; 8318 9558 struct bpf_prog_info *info; 8319 - struct btf *btf = NULL; 8320 - int err = -EINVAL; 9559 + struct btf *btf; 9560 + int err; 8321 9561 8322 9562 info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0); 8323 9563 err = libbpf_get_error(info_linear); ··· 8326 9566 attach_prog_fd); 8327 9567 return err; 8328 9568 } 9569 + 9570 + err = -EINVAL; 8329 9571 info = &info_linear->info; 8330 9572 if (!info->btf_id) { 8331 9573 pr_warn("The target program doesn't have BTF\n"); 8332 9574 goto out; 8333 9575 } 8334 - if (btf__get_from_id(info->btf_id, &btf)) { 9576 + btf = btf__load_from_kernel_by_id(info->btf_id); 9577 + if (libbpf_get_error(btf)) { 8335 9578 pr_warn("Failed to get BTF of the program\n"); 8336 9579 goto out; 8337 9580 } ··· 8818 10055 int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog) 8819 10056 { 8820 10057 int ret; 8821 - 10058 + 8822 10059 ret = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL); 8823 10060 return libbpf_err_errno(ret); 8824 10061 } ··· 9109 10346 return pfd; 9110 10347 } 9111 10348 9112 - struct bpf_program_attach_kprobe_opts { 9113 - bool retprobe; 9114 - unsigned long offset; 9115 - }; 9116 - 9117 - static struct bpf_link* 10349 + struct bpf_link * 9118 10350 bpf_program__attach_kprobe_opts(struct bpf_program *prog, 9119 10351 const char *func_name, 9120 - struct bpf_program_attach_kprobe_opts *opts) 10352 + struct bpf_kprobe_opts *opts) 9121 10353 { 9122 10354 char errmsg[STRERR_BUFSIZE]; 9123 10355 struct bpf_link *link; 10356 + unsigned long offset; 10357 + bool retprobe; 9124 10358 int pfd, err; 9125 10359 9126 - pfd = perf_event_open_probe(false /* uprobe */, opts->retprobe, func_name, 9127 - opts->offset, -1 /* pid */); 10360 + if (!OPTS_VALID(opts, bpf_kprobe_opts)) 10361 + return libbpf_err_ptr(-EINVAL); 10362 + 10363 + retprobe = OPTS_GET(opts, retprobe, false); 10364 + offset = OPTS_GET(opts, offset, 0); 10365 + 10366 + pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name, 10367 + offset, -1 /* pid */); 9128 10368 if (pfd < 0) { 9129 10369 pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n", 9130 - prog->name, opts->retprobe ? "kretprobe" : "kprobe", func_name, 10370 + prog->name, retprobe ? "kretprobe" : "kprobe", func_name, 9131 10371 libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); 9132 10372 return libbpf_err_ptr(pfd); 9133 10373 } ··· 9139 10373 if (err) { 9140 10374 close(pfd); 9141 10375 pr_warn("prog '%s': failed to attach to %s '%s': %s\n", 9142 - prog->name, opts->retprobe ? "kretprobe" : "kprobe", func_name, 10376 + prog->name, retprobe ? "kretprobe" : "kprobe", func_name, 9143 10377 libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 9144 10378 return libbpf_err_ptr(err); 9145 10379 } ··· 9150 10384 bool retprobe, 9151 10385 const char *func_name) 9152 10386 { 9153 - struct bpf_program_attach_kprobe_opts opts = { 10387 + DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts, 9154 10388 .retprobe = retprobe, 9155 - }; 10389 + ); 9156 10390 9157 10391 return bpf_program__attach_kprobe_opts(prog, func_name, &opts); 9158 10392 } ··· 9160 10394 static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec, 9161 10395 struct bpf_program *prog) 9162 10396 { 9163 - struct bpf_program_attach_kprobe_opts opts; 10397 + DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts); 9164 10398 unsigned long offset = 0; 9165 10399 struct bpf_link *link; 9166 10400 const char *func_name; ··· 9170 10404 func_name = prog->sec_name + sec->len; 9171 10405 opts.retprobe = strcmp(sec->sec, "kretprobe/") == 0; 9172 10406 9173 - n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%lx", &func, &offset); 10407 + n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset); 9174 10408 if (n < 1) { 9175 10409 err = -EINVAL; 9176 10410 pr_warn("kprobe name is invalid: %s\n", func_name); 9177 10411 return libbpf_err_ptr(err); 9178 10412 } 9179 10413 if (opts.retprobe && offset != 0) { 10414 + free(func); 9180 10415 err = -EINVAL; 9181 10416 pr_warn("kretprobes do not support offset specification\n"); 9182 10417 return libbpf_err_ptr(err);

+24 -1

tools/lib/bpf/libbpf.h

··· 94 94 * system Kconfig for CONFIG_xxx externs. 95 95 */ 96 96 const char *kconfig; 97 + /* Path to the custom BTF to be used for BPF CO-RE relocations. 98 + * This custom BTF completely replaces the use of vmlinux BTF 99 + * for the purpose of CO-RE relocations. 100 + * NOTE: any other BPF feature (e.g., fentry/fexit programs, 101 + * struct_ops, etc) will need actual kernel BTF at /sys/kernel/btf/vmlinux. 102 + */ 103 + const char *btf_custom_path; 97 104 }; 98 - #define bpf_object_open_opts__last_field kconfig 105 + #define bpf_object_open_opts__last_field btf_custom_path 106 + 107 + struct bpf_kprobe_opts { 108 + /* size of this struct, for forward/backward compatiblity */ 109 + size_t sz; 110 + /* function's offset to install kprobe to */ 111 + unsigned long offset; 112 + /* kprobe is return probe */ 113 + bool retprobe; 114 + size_t :0; 115 + }; 116 + #define bpf_kprobe_opts__last_field retprobe 99 117 100 118 LIBBPF_API struct bpf_object *bpf_object__open(const char *path); 101 119 LIBBPF_API struct bpf_object * ··· 260 242 LIBBPF_API struct bpf_link * 261 243 bpf_program__attach_kprobe(struct bpf_program *prog, bool retprobe, 262 244 const char *func_name); 245 + LIBBPF_API struct bpf_link * 246 + bpf_program__attach_kprobe_opts(struct bpf_program *prog, 247 + const char *func_name, 248 + struct bpf_kprobe_opts *opts); 263 249 LIBBPF_API struct bpf_link * 264 250 bpf_program__attach_uprobe(struct bpf_program *prog, bool retprobe, 265 251 pid_t pid, const char *binary_path, ··· 499 477 LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map); 500 478 LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path); 501 479 LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map); 480 + LIBBPF_API const char *bpf_map__pin_path(const struct bpf_map *map); 502 481 LIBBPF_API bool bpf_map__is_pinned(const struct bpf_map *map); 503 482 LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path); 504 483 LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path);

+8

tools/lib/bpf/libbpf.map

··· 371 371 LIBBPF_0.5.0 { 372 372 global: 373 373 bpf_map__initial_value; 374 + bpf_map__pin_path; 374 375 bpf_map_lookup_and_delete_elem_flags; 376 + bpf_program__attach_kprobe_opts; 375 377 bpf_object__gen_loader; 378 + btf__load_from_kernel_by_id; 379 + btf__load_from_kernel_by_id_split; 380 + btf__load_into_kernel; 381 + btf__load_module_btf; 382 + btf__load_vmlinux_btf; 383 + btf_dump__dump_type_data; 376 384 libbpf_set_strict_mode; 377 385 } LIBBPF_0.4.0;

+11 -70

tools/lib/bpf/libbpf_internal.h

··· 14 14 #include <errno.h> 15 15 #include <linux/err.h> 16 16 #include "libbpf_legacy.h" 17 + #include "relo_core.h" 17 18 18 19 /* make sure libbpf doesn't use kernel-only integer typedefs */ 19 20 #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 ··· 367 366 __u32 line_col; 368 367 }; 369 368 370 - /* bpf_core_relo_kind encodes which aspect of captured field/type/enum value 371 - * has to be adjusted by relocations. 372 - */ 373 - enum bpf_core_relo_kind { 374 - BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */ 375 - BPF_FIELD_BYTE_SIZE = 1, /* field size in bytes */ 376 - BPF_FIELD_EXISTS = 2, /* field existence in target kernel */ 377 - BPF_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */ 378 - BPF_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */ 379 - BPF_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */ 380 - BPF_TYPE_ID_LOCAL = 6, /* type ID in local BPF object */ 381 - BPF_TYPE_ID_TARGET = 7, /* type ID in target kernel */ 382 - BPF_TYPE_EXISTS = 8, /* type existence in target kernel */ 383 - BPF_TYPE_SIZE = 9, /* type size in bytes */ 384 - BPF_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */ 385 - BPF_ENUMVAL_VALUE = 11, /* enum value integer value */ 386 - }; 387 - 388 - /* The minimum bpf_core_relo checked by the loader 389 - * 390 - * CO-RE relocation captures the following data: 391 - * - insn_off - instruction offset (in bytes) within a BPF program that needs 392 - * its insn->imm field to be relocated with actual field info; 393 - * - type_id - BTF type ID of the "root" (containing) entity of a relocatable 394 - * type or field; 395 - * - access_str_off - offset into corresponding .BTF string section. String 396 - * interpretation depends on specific relocation kind: 397 - * - for field-based relocations, string encodes an accessed field using 398 - * a sequence of field and array indices, separated by colon (:). It's 399 - * conceptually very close to LLVM's getelementptr ([0]) instruction's 400 - * arguments for identifying offset to a field. 401 - * - for type-based relocations, strings is expected to be just "0"; 402 - * - for enum value-based relocations, string contains an index of enum 403 - * value within its enum type; 404 - * 405 - * Example to provide a better feel. 406 - * 407 - * struct sample { 408 - * int a; 409 - * struct { 410 - * int b[10]; 411 - * }; 412 - * }; 413 - * 414 - * struct sample *s = ...; 415 - * int x = &s->a; // encoded as "0:0" (a is field #0) 416 - * int y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1, 417 - * // b is field #0 inside anon struct, accessing elem #5) 418 - * int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array) 419 - * 420 - * type_id for all relocs in this example will capture BTF type id of 421 - * `struct sample`. 422 - * 423 - * Such relocation is emitted when using __builtin_preserve_access_index() 424 - * Clang built-in, passing expression that captures field address, e.g.: 425 - * 426 - * bpf_probe_read(&dst, sizeof(dst), 427 - * __builtin_preserve_access_index(&src->a.b.c)); 428 - * 429 - * In this case Clang will emit field relocation recording necessary data to 430 - * be able to find offset of embedded `a.b.c` field within `src` struct. 431 - * 432 - * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction 433 - */ 434 - struct bpf_core_relo { 435 - __u32 insn_off; 436 - __u32 type_id; 437 - __u32 access_str_off; 438 - enum bpf_core_relo_kind kind; 439 - }; 440 369 441 370 typedef int (*type_id_visit_fn)(__u32 *type_id, void *ctx); 442 371 typedef int (*str_off_visit_fn)(__u32 *str_off, void *ctx); ··· 423 492 424 493 /* legacy: pass-through original pointer */ 425 494 return ret; 495 + } 496 + 497 + static inline bool str_is_empty(const char *s) 498 + { 499 + return !s || !s[0]; 500 + } 501 + 502 + static inline bool is_ldimm64_insn(struct bpf_insn *insn) 503 + { 504 + return insn->code == (BPF_LD | BPF_IMM | BPF_DW); 426 505 } 427 506 428 507 #endif /* __LIBBPF_LIBBPF_INTERNAL_H */

+1295

tools/lib/bpf/relo_core.c

··· 1 + // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) 2 + /* Copyright (c) 2019 Facebook */ 3 + 4 + #include <stdio.h> 5 + #include <string.h> 6 + #include <errno.h> 7 + #include <ctype.h> 8 + #include <linux/err.h> 9 + 10 + #include "libbpf.h" 11 + #include "bpf.h" 12 + #include "btf.h" 13 + #include "str_error.h" 14 + #include "libbpf_internal.h" 15 + 16 + #define BPF_CORE_SPEC_MAX_LEN 64 17 + 18 + /* represents BPF CO-RE field or array element accessor */ 19 + struct bpf_core_accessor { 20 + __u32 type_id; /* struct/union type or array element type */ 21 + __u32 idx; /* field index or array index */ 22 + const char *name; /* field name or NULL for array accessor */ 23 + }; 24 + 25 + struct bpf_core_spec { 26 + const struct btf *btf; 27 + /* high-level spec: named fields and array indices only */ 28 + struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN]; 29 + /* original unresolved (no skip_mods_or_typedefs) root type ID */ 30 + __u32 root_type_id; 31 + /* CO-RE relocation kind */ 32 + enum bpf_core_relo_kind relo_kind; 33 + /* high-level spec length */ 34 + int len; 35 + /* raw, low-level spec: 1-to-1 with accessor spec string */ 36 + int raw_spec[BPF_CORE_SPEC_MAX_LEN]; 37 + /* raw spec length */ 38 + int raw_len; 39 + /* field bit offset represented by spec */ 40 + __u32 bit_offset; 41 + }; 42 + 43 + static bool is_flex_arr(const struct btf *btf, 44 + const struct bpf_core_accessor *acc, 45 + const struct btf_array *arr) 46 + { 47 + const struct btf_type *t; 48 + 49 + /* not a flexible array, if not inside a struct or has non-zero size */ 50 + if (!acc->name || arr->nelems > 0) 51 + return false; 52 + 53 + /* has to be the last member of enclosing struct */ 54 + t = btf__type_by_id(btf, acc->type_id); 55 + return acc->idx == btf_vlen(t) - 1; 56 + } 57 + 58 + static const char *core_relo_kind_str(enum bpf_core_relo_kind kind) 59 + { 60 + switch (kind) { 61 + case BPF_FIELD_BYTE_OFFSET: return "byte_off"; 62 + case BPF_FIELD_BYTE_SIZE: return "byte_sz"; 63 + case BPF_FIELD_EXISTS: return "field_exists"; 64 + case BPF_FIELD_SIGNED: return "signed"; 65 + case BPF_FIELD_LSHIFT_U64: return "lshift_u64"; 66 + case BPF_FIELD_RSHIFT_U64: return "rshift_u64"; 67 + case BPF_TYPE_ID_LOCAL: return "local_type_id"; 68 + case BPF_TYPE_ID_TARGET: return "target_type_id"; 69 + case BPF_TYPE_EXISTS: return "type_exists"; 70 + case BPF_TYPE_SIZE: return "type_size"; 71 + case BPF_ENUMVAL_EXISTS: return "enumval_exists"; 72 + case BPF_ENUMVAL_VALUE: return "enumval_value"; 73 + default: return "unknown"; 74 + } 75 + } 76 + 77 + static bool core_relo_is_field_based(enum bpf_core_relo_kind kind) 78 + { 79 + switch (kind) { 80 + case BPF_FIELD_BYTE_OFFSET: 81 + case BPF_FIELD_BYTE_SIZE: 82 + case BPF_FIELD_EXISTS: 83 + case BPF_FIELD_SIGNED: 84 + case BPF_FIELD_LSHIFT_U64: 85 + case BPF_FIELD_RSHIFT_U64: 86 + return true; 87 + default: 88 + return false; 89 + } 90 + } 91 + 92 + static bool core_relo_is_type_based(enum bpf_core_relo_kind kind) 93 + { 94 + switch (kind) { 95 + case BPF_TYPE_ID_LOCAL: 96 + case BPF_TYPE_ID_TARGET: 97 + case BPF_TYPE_EXISTS: 98 + case BPF_TYPE_SIZE: 99 + return true; 100 + default: 101 + return false; 102 + } 103 + } 104 + 105 + static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind) 106 + { 107 + switch (kind) { 108 + case BPF_ENUMVAL_EXISTS: 109 + case BPF_ENUMVAL_VALUE: 110 + return true; 111 + default: 112 + return false; 113 + } 114 + } 115 + 116 + /* 117 + * Turn bpf_core_relo into a low- and high-level spec representation, 118 + * validating correctness along the way, as well as calculating resulting 119 + * field bit offset, specified by accessor string. Low-level spec captures 120 + * every single level of nestedness, including traversing anonymous 121 + * struct/union members. High-level one only captures semantically meaningful 122 + * "turning points": named fields and array indicies. 123 + * E.g., for this case: 124 + * 125 + * struct sample { 126 + * int __unimportant; 127 + * struct { 128 + * int __1; 129 + * int __2; 130 + * int a[7]; 131 + * }; 132 + * }; 133 + * 134 + * struct sample *s = ...; 135 + * 136 + * int x = &s->a[3]; // access string = '0:1:2:3' 137 + * 138 + * Low-level spec has 1:1 mapping with each element of access string (it's 139 + * just a parsed access string representation): [0, 1, 2, 3]. 140 + * 141 + * High-level spec will capture only 3 points: 142 + * - intial zero-index access by pointer (&s->... is the same as &s[0]...); 143 + * - field 'a' access (corresponds to '2' in low-level spec); 144 + * - array element #3 access (corresponds to '3' in low-level spec). 145 + * 146 + * Type-based relocations (TYPE_EXISTS/TYPE_SIZE, 147 + * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their 148 + * spec and raw_spec are kept empty. 149 + * 150 + * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access 151 + * string to specify enumerator's value index that need to be relocated. 152 + */ 153 + static int bpf_core_parse_spec(const struct btf *btf, 154 + __u32 type_id, 155 + const char *spec_str, 156 + enum bpf_core_relo_kind relo_kind, 157 + struct bpf_core_spec *spec) 158 + { 159 + int access_idx, parsed_len, i; 160 + struct bpf_core_accessor *acc; 161 + const struct btf_type *t; 162 + const char *name; 163 + __u32 id; 164 + __s64 sz; 165 + 166 + if (str_is_empty(spec_str) || *spec_str == ':') 167 + return -EINVAL; 168 + 169 + memset(spec, 0, sizeof(*spec)); 170 + spec->btf = btf; 171 + spec->root_type_id = type_id; 172 + spec->relo_kind = relo_kind; 173 + 174 + /* type-based relocations don't have a field access string */ 175 + if (core_relo_is_type_based(relo_kind)) { 176 + if (strcmp(spec_str, "0")) 177 + return -EINVAL; 178 + return 0; 179 + } 180 + 181 + /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */ 182 + while (*spec_str) { 183 + if (*spec_str == ':') 184 + ++spec_str; 185 + if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1) 186 + return -EINVAL; 187 + if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN) 188 + return -E2BIG; 189 + spec_str += parsed_len; 190 + spec->raw_spec[spec->raw_len++] = access_idx; 191 + } 192 + 193 + if (spec->raw_len == 0) 194 + return -EINVAL; 195 + 196 + t = skip_mods_and_typedefs(btf, type_id, &id); 197 + if (!t) 198 + return -EINVAL; 199 + 200 + access_idx = spec->raw_spec[0]; 201 + acc = &spec->spec[0]; 202 + acc->type_id = id; 203 + acc->idx = access_idx; 204 + spec->len++; 205 + 206 + if (core_relo_is_enumval_based(relo_kind)) { 207 + if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t)) 208 + return -EINVAL; 209 + 210 + /* record enumerator name in a first accessor */ 211 + acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off); 212 + return 0; 213 + } 214 + 215 + if (!core_relo_is_field_based(relo_kind)) 216 + return -EINVAL; 217 + 218 + sz = btf__resolve_size(btf, id); 219 + if (sz < 0) 220 + return sz; 221 + spec->bit_offset = access_idx * sz * 8; 222 + 223 + for (i = 1; i < spec->raw_len; i++) { 224 + t = skip_mods_and_typedefs(btf, id, &id); 225 + if (!t) 226 + return -EINVAL; 227 + 228 + access_idx = spec->raw_spec[i]; 229 + acc = &spec->spec[spec->len]; 230 + 231 + if (btf_is_composite(t)) { 232 + const struct btf_member *m; 233 + __u32 bit_offset; 234 + 235 + if (access_idx >= btf_vlen(t)) 236 + return -EINVAL; 237 + 238 + bit_offset = btf_member_bit_offset(t, access_idx); 239 + spec->bit_offset += bit_offset; 240 + 241 + m = btf_members(t) + access_idx; 242 + if (m->name_off) { 243 + name = btf__name_by_offset(btf, m->name_off); 244 + if (str_is_empty(name)) 245 + return -EINVAL; 246 + 247 + acc->type_id = id; 248 + acc->idx = access_idx; 249 + acc->name = name; 250 + spec->len++; 251 + } 252 + 253 + id = m->type; 254 + } else if (btf_is_array(t)) { 255 + const struct btf_array *a = btf_array(t); 256 + bool flex; 257 + 258 + t = skip_mods_and_typedefs(btf, a->type, &id); 259 + if (!t) 260 + return -EINVAL; 261 + 262 + flex = is_flex_arr(btf, acc - 1, a); 263 + if (!flex && access_idx >= a->nelems) 264 + return -EINVAL; 265 + 266 + spec->spec[spec->len].type_id = id; 267 + spec->spec[spec->len].idx = access_idx; 268 + spec->len++; 269 + 270 + sz = btf__resolve_size(btf, id); 271 + if (sz < 0) 272 + return sz; 273 + spec->bit_offset += access_idx * sz * 8; 274 + } else { 275 + pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n", 276 + type_id, spec_str, i, id, btf_kind_str(t)); 277 + return -EINVAL; 278 + } 279 + } 280 + 281 + return 0; 282 + } 283 + 284 + /* Check two types for compatibility for the purpose of field access 285 + * relocation. const/volatile/restrict and typedefs are skipped to ensure we 286 + * are relocating semantically compatible entities: 287 + * - any two STRUCTs/UNIONs are compatible and can be mixed; 288 + * - any two FWDs are compatible, if their names match (modulo flavor suffix); 289 + * - any two PTRs are always compatible; 290 + * - for ENUMs, names should be the same (ignoring flavor suffix) or at 291 + * least one of enums should be anonymous; 292 + * - for ENUMs, check sizes, names are ignored; 293 + * - for INT, size and signedness are ignored; 294 + * - any two FLOATs are always compatible; 295 + * - for ARRAY, dimensionality is ignored, element types are checked for 296 + * compatibility recursively; 297 + * - everything else shouldn't be ever a target of relocation. 298 + * These rules are not set in stone and probably will be adjusted as we get 299 + * more experience with using BPF CO-RE relocations. 300 + */ 301 + static int bpf_core_fields_are_compat(const struct btf *local_btf, 302 + __u32 local_id, 303 + const struct btf *targ_btf, 304 + __u32 targ_id) 305 + { 306 + const struct btf_type *local_type, *targ_type; 307 + 308 + recur: 309 + local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id); 310 + targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); 311 + if (!local_type || !targ_type) 312 + return -EINVAL; 313 + 314 + if (btf_is_composite(local_type) && btf_is_composite(targ_type)) 315 + return 1; 316 + if (btf_kind(local_type) != btf_kind(targ_type)) 317 + return 0; 318 + 319 + switch (btf_kind(local_type)) { 320 + case BTF_KIND_PTR: 321 + case BTF_KIND_FLOAT: 322 + return 1; 323 + case BTF_KIND_FWD: 324 + case BTF_KIND_ENUM: { 325 + const char *local_name, *targ_name; 326 + size_t local_len, targ_len; 327 + 328 + local_name = btf__name_by_offset(local_btf, 329 + local_type->name_off); 330 + targ_name = btf__name_by_offset(targ_btf, targ_type->name_off); 331 + local_len = bpf_core_essential_name_len(local_name); 332 + targ_len = bpf_core_essential_name_len(targ_name); 333 + /* one of them is anonymous or both w/ same flavor-less names */ 334 + return local_len == 0 || targ_len == 0 || 335 + (local_len == targ_len && 336 + strncmp(local_name, targ_name, local_len) == 0); 337 + } 338 + case BTF_KIND_INT: 339 + /* just reject deprecated bitfield-like integers; all other 340 + * integers are by default compatible between each other 341 + */ 342 + return btf_int_offset(local_type) == 0 && 343 + btf_int_offset(targ_type) == 0; 344 + case BTF_KIND_ARRAY: 345 + local_id = btf_array(local_type)->type; 346 + targ_id = btf_array(targ_type)->type; 347 + goto recur; 348 + default: 349 + pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n", 350 + btf_kind(local_type), local_id, targ_id); 351 + return 0; 352 + } 353 + } 354 + 355 + /* 356 + * Given single high-level named field accessor in local type, find 357 + * corresponding high-level accessor for a target type. Along the way, 358 + * maintain low-level spec for target as well. Also keep updating target 359 + * bit offset. 360 + * 361 + * Searching is performed through recursive exhaustive enumeration of all 362 + * fields of a struct/union. If there are any anonymous (embedded) 363 + * structs/unions, they are recursively searched as well. If field with 364 + * desired name is found, check compatibility between local and target types, 365 + * before returning result. 366 + * 367 + * 1 is returned, if field is found. 368 + * 0 is returned if no compatible field is found. 369 + * <0 is returned on error. 370 + */ 371 + static int bpf_core_match_member(const struct btf *local_btf, 372 + const struct bpf_core_accessor *local_acc, 373 + const struct btf *targ_btf, 374 + __u32 targ_id, 375 + struct bpf_core_spec *spec, 376 + __u32 *next_targ_id) 377 + { 378 + const struct btf_type *local_type, *targ_type; 379 + const struct btf_member *local_member, *m; 380 + const char *local_name, *targ_name; 381 + __u32 local_id; 382 + int i, n, found; 383 + 384 + targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); 385 + if (!targ_type) 386 + return -EINVAL; 387 + if (!btf_is_composite(targ_type)) 388 + return 0; 389 + 390 + local_id = local_acc->type_id; 391 + local_type = btf__type_by_id(local_btf, local_id); 392 + local_member = btf_members(local_type) + local_acc->idx; 393 + local_name = btf__name_by_offset(local_btf, local_member->name_off); 394 + 395 + n = btf_vlen(targ_type); 396 + m = btf_members(targ_type); 397 + for (i = 0; i < n; i++, m++) { 398 + __u32 bit_offset; 399 + 400 + bit_offset = btf_member_bit_offset(targ_type, i); 401 + 402 + /* too deep struct/union/array nesting */ 403 + if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN) 404 + return -E2BIG; 405 + 406 + /* speculate this member will be the good one */ 407 + spec->bit_offset += bit_offset; 408 + spec->raw_spec[spec->raw_len++] = i; 409 + 410 + targ_name = btf__name_by_offset(targ_btf, m->name_off); 411 + if (str_is_empty(targ_name)) { 412 + /* embedded struct/union, we need to go deeper */ 413 + found = bpf_core_match_member(local_btf, local_acc, 414 + targ_btf, m->type, 415 + spec, next_targ_id); 416 + if (found) /* either found or error */ 417 + return found; 418 + } else if (strcmp(local_name, targ_name) == 0) { 419 + /* matching named field */ 420 + struct bpf_core_accessor *targ_acc; 421 + 422 + targ_acc = &spec->spec[spec->len++]; 423 + targ_acc->type_id = targ_id; 424 + targ_acc->idx = i; 425 + targ_acc->name = targ_name; 426 + 427 + *next_targ_id = m->type; 428 + found = bpf_core_fields_are_compat(local_btf, 429 + local_member->type, 430 + targ_btf, m->type); 431 + if (!found) 432 + spec->len--; /* pop accessor */ 433 + return found; 434 + } 435 + /* member turned out not to be what we looked for */ 436 + spec->bit_offset -= bit_offset; 437 + spec->raw_len--; 438 + } 439 + 440 + return 0; 441 + } 442 + 443 + /* 444 + * Try to match local spec to a target type and, if successful, produce full 445 + * target spec (high-level, low-level + bit offset). 446 + */ 447 + static int bpf_core_spec_match(struct bpf_core_spec *local_spec, 448 + const struct btf *targ_btf, __u32 targ_id, 449 + struct bpf_core_spec *targ_spec) 450 + { 451 + const struct btf_type *targ_type; 452 + const struct bpf_core_accessor *local_acc; 453 + struct bpf_core_accessor *targ_acc; 454 + int i, sz, matched; 455 + 456 + memset(targ_spec, 0, sizeof(*targ_spec)); 457 + targ_spec->btf = targ_btf; 458 + targ_spec->root_type_id = targ_id; 459 + targ_spec->relo_kind = local_spec->relo_kind; 460 + 461 + if (core_relo_is_type_based(local_spec->relo_kind)) { 462 + return bpf_core_types_are_compat(local_spec->btf, 463 + local_spec->root_type_id, 464 + targ_btf, targ_id); 465 + } 466 + 467 + local_acc = &local_spec->spec[0]; 468 + targ_acc = &targ_spec->spec[0]; 469 + 470 + if (core_relo_is_enumval_based(local_spec->relo_kind)) { 471 + size_t local_essent_len, targ_essent_len; 472 + const struct btf_enum *e; 473 + const char *targ_name; 474 + 475 + /* has to resolve to an enum */ 476 + targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id); 477 + if (!btf_is_enum(targ_type)) 478 + return 0; 479 + 480 + local_essent_len = bpf_core_essential_name_len(local_acc->name); 481 + 482 + for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) { 483 + targ_name = btf__name_by_offset(targ_spec->btf, e->name_off); 484 + targ_essent_len = bpf_core_essential_name_len(targ_name); 485 + if (targ_essent_len != local_essent_len) 486 + continue; 487 + if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) { 488 + targ_acc->type_id = targ_id; 489 + targ_acc->idx = i; 490 + targ_acc->name = targ_name; 491 + targ_spec->len++; 492 + targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx; 493 + targ_spec->raw_len++; 494 + return 1; 495 + } 496 + } 497 + return 0; 498 + } 499 + 500 + if (!core_relo_is_field_based(local_spec->relo_kind)) 501 + return -EINVAL; 502 + 503 + for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) { 504 + targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, 505 + &targ_id); 506 + if (!targ_type) 507 + return -EINVAL; 508 + 509 + if (local_acc->name) { 510 + matched = bpf_core_match_member(local_spec->btf, 511 + local_acc, 512 + targ_btf, targ_id, 513 + targ_spec, &targ_id); 514 + if (matched <= 0) 515 + return matched; 516 + } else { 517 + /* for i=0, targ_id is already treated as array element 518 + * type (because it's the original struct), for others 519 + * we should find array element type first 520 + */ 521 + if (i > 0) { 522 + const struct btf_array *a; 523 + bool flex; 524 + 525 + if (!btf_is_array(targ_type)) 526 + return 0; 527 + 528 + a = btf_array(targ_type); 529 + flex = is_flex_arr(targ_btf, targ_acc - 1, a); 530 + if (!flex && local_acc->idx >= a->nelems) 531 + return 0; 532 + if (!skip_mods_and_typedefs(targ_btf, a->type, 533 + &targ_id)) 534 + return -EINVAL; 535 + } 536 + 537 + /* too deep struct/union/array nesting */ 538 + if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN) 539 + return -E2BIG; 540 + 541 + targ_acc->type_id = targ_id; 542 + targ_acc->idx = local_acc->idx; 543 + targ_acc->name = NULL; 544 + targ_spec->len++; 545 + targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx; 546 + targ_spec->raw_len++; 547 + 548 + sz = btf__resolve_size(targ_btf, targ_id); 549 + if (sz < 0) 550 + return sz; 551 + targ_spec->bit_offset += local_acc->idx * sz * 8; 552 + } 553 + } 554 + 555 + return 1; 556 + } 557 + 558 + static int bpf_core_calc_field_relo(const char *prog_name, 559 + const struct bpf_core_relo *relo, 560 + const struct bpf_core_spec *spec, 561 + __u32 *val, __u32 *field_sz, __u32 *type_id, 562 + bool *validate) 563 + { 564 + const struct bpf_core_accessor *acc; 565 + const struct btf_type *t; 566 + __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id; 567 + const struct btf_member *m; 568 + const struct btf_type *mt; 569 + bool bitfield; 570 + __s64 sz; 571 + 572 + *field_sz = 0; 573 + 574 + if (relo->kind == BPF_FIELD_EXISTS) { 575 + *val = spec ? 1 : 0; 576 + return 0; 577 + } 578 + 579 + if (!spec) 580 + return -EUCLEAN; /* request instruction poisoning */ 581 + 582 + acc = &spec->spec[spec->len - 1]; 583 + t = btf__type_by_id(spec->btf, acc->type_id); 584 + 585 + /* a[n] accessor needs special handling */ 586 + if (!acc->name) { 587 + if (relo->kind == BPF_FIELD_BYTE_OFFSET) { 588 + *val = spec->bit_offset / 8; 589 + /* remember field size for load/store mem size */ 590 + sz = btf__resolve_size(spec->btf, acc->type_id); 591 + if (sz < 0) 592 + return -EINVAL; 593 + *field_sz = sz; 594 + *type_id = acc->type_id; 595 + } else if (relo->kind == BPF_FIELD_BYTE_SIZE) { 596 + sz = btf__resolve_size(spec->btf, acc->type_id); 597 + if (sz < 0) 598 + return -EINVAL; 599 + *val = sz; 600 + } else { 601 + pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n", 602 + prog_name, relo->kind, relo->insn_off / 8); 603 + return -EINVAL; 604 + } 605 + if (validate) 606 + *validate = true; 607 + return 0; 608 + } 609 + 610 + m = btf_members(t) + acc->idx; 611 + mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id); 612 + bit_off = spec->bit_offset; 613 + bit_sz = btf_member_bitfield_size(t, acc->idx); 614 + 615 + bitfield = bit_sz > 0; 616 + if (bitfield) { 617 + byte_sz = mt->size; 618 + byte_off = bit_off / 8 / byte_sz * byte_sz; 619 + /* figure out smallest int size necessary for bitfield load */ 620 + while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) { 621 + if (byte_sz >= 8) { 622 + /* bitfield can't be read with 64-bit read */ 623 + pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n", 624 + prog_name, relo->kind, relo->insn_off / 8); 625 + return -E2BIG; 626 + } 627 + byte_sz *= 2; 628 + byte_off = bit_off / 8 / byte_sz * byte_sz; 629 + } 630 + } else { 631 + sz = btf__resolve_size(spec->btf, field_type_id); 632 + if (sz < 0) 633 + return -EINVAL; 634 + byte_sz = sz; 635 + byte_off = spec->bit_offset / 8; 636 + bit_sz = byte_sz * 8; 637 + } 638 + 639 + /* for bitfields, all the relocatable aspects are ambiguous and we 640 + * might disagree with compiler, so turn off validation of expected 641 + * value, except for signedness 642 + */ 643 + if (validate) 644 + *validate = !bitfield; 645 + 646 + switch (relo->kind) { 647 + case BPF_FIELD_BYTE_OFFSET: 648 + *val = byte_off; 649 + if (!bitfield) { 650 + *field_sz = byte_sz; 651 + *type_id = field_type_id; 652 + } 653 + break; 654 + case BPF_FIELD_BYTE_SIZE: 655 + *val = byte_sz; 656 + break; 657 + case BPF_FIELD_SIGNED: 658 + /* enums will be assumed unsigned */ 659 + *val = btf_is_enum(mt) || 660 + (btf_int_encoding(mt) & BTF_INT_SIGNED); 661 + if (validate) 662 + *validate = true; /* signedness is never ambiguous */ 663 + break; 664 + case BPF_FIELD_LSHIFT_U64: 665 + #if __BYTE_ORDER == __LITTLE_ENDIAN 666 + *val = 64 - (bit_off + bit_sz - byte_off * 8); 667 + #else 668 + *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8); 669 + #endif 670 + break; 671 + case BPF_FIELD_RSHIFT_U64: 672 + *val = 64 - bit_sz; 673 + if (validate) 674 + *validate = true; /* right shift is never ambiguous */ 675 + break; 676 + case BPF_FIELD_EXISTS: 677 + default: 678 + return -EOPNOTSUPP; 679 + } 680 + 681 + return 0; 682 + } 683 + 684 + static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo, 685 + const struct bpf_core_spec *spec, 686 + __u32 *val) 687 + { 688 + __s64 sz; 689 + 690 + /* type-based relos return zero when target type is not found */ 691 + if (!spec) { 692 + *val = 0; 693 + return 0; 694 + } 695 + 696 + switch (relo->kind) { 697 + case BPF_TYPE_ID_TARGET: 698 + *val = spec->root_type_id; 699 + break; 700 + case BPF_TYPE_EXISTS: 701 + *val = 1; 702 + break; 703 + case BPF_TYPE_SIZE: 704 + sz = btf__resolve_size(spec->btf, spec->root_type_id); 705 + if (sz < 0) 706 + return -EINVAL; 707 + *val = sz; 708 + break; 709 + case BPF_TYPE_ID_LOCAL: 710 + /* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */ 711 + default: 712 + return -EOPNOTSUPP; 713 + } 714 + 715 + return 0; 716 + } 717 + 718 + static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo, 719 + const struct bpf_core_spec *spec, 720 + __u32 *val) 721 + { 722 + const struct btf_type *t; 723 + const struct btf_enum *e; 724 + 725 + switch (relo->kind) { 726 + case BPF_ENUMVAL_EXISTS: 727 + *val = spec ? 1 : 0; 728 + break; 729 + case BPF_ENUMVAL_VALUE: 730 + if (!spec) 731 + return -EUCLEAN; /* request instruction poisoning */ 732 + t = btf__type_by_id(spec->btf, spec->spec[0].type_id); 733 + e = btf_enum(t) + spec->spec[0].idx; 734 + *val = e->val; 735 + break; 736 + default: 737 + return -EOPNOTSUPP; 738 + } 739 + 740 + return 0; 741 + } 742 + 743 + struct bpf_core_relo_res 744 + { 745 + /* expected value in the instruction, unless validate == false */ 746 + __u32 orig_val; 747 + /* new value that needs to be patched up to */ 748 + __u32 new_val; 749 + /* relocation unsuccessful, poison instruction, but don't fail load */ 750 + bool poison; 751 + /* some relocations can't be validated against orig_val */ 752 + bool validate; 753 + /* for field byte offset relocations or the forms: 754 + * *(T *)(rX + <off>) = rY 755 + * rX = *(T *)(rY + <off>), 756 + * we remember original and resolved field size to adjust direct 757 + * memory loads of pointers and integers; this is necessary for 32-bit 758 + * host kernel architectures, but also allows to automatically 759 + * relocate fields that were resized from, e.g., u32 to u64, etc. 760 + */ 761 + bool fail_memsz_adjust; 762 + __u32 orig_sz; 763 + __u32 orig_type_id; 764 + __u32 new_sz; 765 + __u32 new_type_id; 766 + }; 767 + 768 + /* Calculate original and target relocation values, given local and target 769 + * specs and relocation kind. These values are calculated for each candidate. 770 + * If there are multiple candidates, resulting values should all be consistent 771 + * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity. 772 + * If instruction has to be poisoned, *poison will be set to true. 773 + */ 774 + static int bpf_core_calc_relo(const char *prog_name, 775 + const struct bpf_core_relo *relo, 776 + int relo_idx, 777 + const struct bpf_core_spec *local_spec, 778 + const struct bpf_core_spec *targ_spec, 779 + struct bpf_core_relo_res *res) 780 + { 781 + int err = -EOPNOTSUPP; 782 + 783 + res->orig_val = 0; 784 + res->new_val = 0; 785 + res->poison = false; 786 + res->validate = true; 787 + res->fail_memsz_adjust = false; 788 + res->orig_sz = res->new_sz = 0; 789 + res->orig_type_id = res->new_type_id = 0; 790 + 791 + if (core_relo_is_field_based(relo->kind)) { 792 + err = bpf_core_calc_field_relo(prog_name, relo, local_spec, 793 + &res->orig_val, &res->orig_sz, 794 + &res->orig_type_id, &res->validate); 795 + err = err ?: bpf_core_calc_field_relo(prog_name, relo, targ_spec, 796 + &res->new_val, &res->new_sz, 797 + &res->new_type_id, NULL); 798 + if (err) 799 + goto done; 800 + /* Validate if it's safe to adjust load/store memory size. 801 + * Adjustments are performed only if original and new memory 802 + * sizes differ. 803 + */ 804 + res->fail_memsz_adjust = false; 805 + if (res->orig_sz != res->new_sz) { 806 + const struct btf_type *orig_t, *new_t; 807 + 808 + orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id); 809 + new_t = btf__type_by_id(targ_spec->btf, res->new_type_id); 810 + 811 + /* There are two use cases in which it's safe to 812 + * adjust load/store's mem size: 813 + * - reading a 32-bit kernel pointer, while on BPF 814 + * size pointers are always 64-bit; in this case 815 + * it's safe to "downsize" instruction size due to 816 + * pointer being treated as unsigned integer with 817 + * zero-extended upper 32-bits; 818 + * - reading unsigned integers, again due to 819 + * zero-extension is preserving the value correctly. 820 + * 821 + * In all other cases it's incorrect to attempt to 822 + * load/store field because read value will be 823 + * incorrect, so we poison relocated instruction. 824 + */ 825 + if (btf_is_ptr(orig_t) && btf_is_ptr(new_t)) 826 + goto done; 827 + if (btf_is_int(orig_t) && btf_is_int(new_t) && 828 + btf_int_encoding(orig_t) != BTF_INT_SIGNED && 829 + btf_int_encoding(new_t) != BTF_INT_SIGNED) 830 + goto done; 831 + 832 + /* mark as invalid mem size adjustment, but this will 833 + * only be checked for LDX/STX/ST insns 834 + */ 835 + res->fail_memsz_adjust = true; 836 + } 837 + } else if (core_relo_is_type_based(relo->kind)) { 838 + err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val); 839 + err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val); 840 + } else if (core_relo_is_enumval_based(relo->kind)) { 841 + err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val); 842 + err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val); 843 + } 844 + 845 + done: 846 + if (err == -EUCLEAN) { 847 + /* EUCLEAN is used to signal instruction poisoning request */ 848 + res->poison = true; 849 + err = 0; 850 + } else if (err == -EOPNOTSUPP) { 851 + /* EOPNOTSUPP means unknown/unsupported relocation */ 852 + pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n", 853 + prog_name, relo_idx, core_relo_kind_str(relo->kind), 854 + relo->kind, relo->insn_off / 8); 855 + } 856 + 857 + return err; 858 + } 859 + 860 + /* 861 + * Turn instruction for which CO_RE relocation failed into invalid one with 862 + * distinct signature. 863 + */ 864 + static void bpf_core_poison_insn(const char *prog_name, int relo_idx, 865 + int insn_idx, struct bpf_insn *insn) 866 + { 867 + pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n", 868 + prog_name, relo_idx, insn_idx); 869 + insn->code = BPF_JMP | BPF_CALL; 870 + insn->dst_reg = 0; 871 + insn->src_reg = 0; 872 + insn->off = 0; 873 + /* if this instruction is reachable (not a dead code), 874 + * verifier will complain with the following message: 875 + * invalid func unknown#195896080 876 + */ 877 + insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */ 878 + } 879 + 880 + static int insn_bpf_size_to_bytes(struct bpf_insn *insn) 881 + { 882 + switch (BPF_SIZE(insn->code)) { 883 + case BPF_DW: return 8; 884 + case BPF_W: return 4; 885 + case BPF_H: return 2; 886 + case BPF_B: return 1; 887 + default: return -1; 888 + } 889 + } 890 + 891 + static int insn_bytes_to_bpf_size(__u32 sz) 892 + { 893 + switch (sz) { 894 + case 8: return BPF_DW; 895 + case 4: return BPF_W; 896 + case 2: return BPF_H; 897 + case 1: return BPF_B; 898 + default: return -1; 899 + } 900 + } 901 + 902 + /* 903 + * Patch relocatable BPF instruction. 904 + * 905 + * Patched value is determined by relocation kind and target specification. 906 + * For existence relocations target spec will be NULL if field/type is not found. 907 + * Expected insn->imm value is determined using relocation kind and local 908 + * spec, and is checked before patching instruction. If actual insn->imm value 909 + * is wrong, bail out with error. 910 + * 911 + * Currently supported classes of BPF instruction are: 912 + * 1. rX = <imm> (assignment with immediate operand); 913 + * 2. rX += <imm> (arithmetic operations with immediate operand); 914 + * 3. rX = <imm64> (load with 64-bit immediate value); 915 + * 4. rX = *(T *)(rY + <off>), where T is one of {u8, u16, u32, u64}; 916 + * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64}; 917 + * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}. 918 + */ 919 + static int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn, 920 + int insn_idx, const struct bpf_core_relo *relo, 921 + int relo_idx, const struct bpf_core_relo_res *res) 922 + { 923 + __u32 orig_val, new_val; 924 + __u8 class; 925 + 926 + class = BPF_CLASS(insn->code); 927 + 928 + if (res->poison) { 929 + poison: 930 + /* poison second part of ldimm64 to avoid confusing error from 931 + * verifier about "unknown opcode 00" 932 + */ 933 + if (is_ldimm64_insn(insn)) 934 + bpf_core_poison_insn(prog_name, relo_idx, insn_idx + 1, insn + 1); 935 + bpf_core_poison_insn(prog_name, relo_idx, insn_idx, insn); 936 + return 0; 937 + } 938 + 939 + orig_val = res->orig_val; 940 + new_val = res->new_val; 941 + 942 + switch (class) { 943 + case BPF_ALU: 944 + case BPF_ALU64: 945 + if (BPF_SRC(insn->code) != BPF_K) 946 + return -EINVAL; 947 + if (res->validate && insn->imm != orig_val) { 948 + pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n", 949 + prog_name, relo_idx, 950 + insn_idx, insn->imm, orig_val, new_val); 951 + return -EINVAL; 952 + } 953 + orig_val = insn->imm; 954 + insn->imm = new_val; 955 + pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n", 956 + prog_name, relo_idx, insn_idx, 957 + orig_val, new_val); 958 + break; 959 + case BPF_LDX: 960 + case BPF_ST: 961 + case BPF_STX: 962 + if (res->validate && insn->off != orig_val) { 963 + pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n", 964 + prog_name, relo_idx, insn_idx, insn->off, orig_val, new_val); 965 + return -EINVAL; 966 + } 967 + if (new_val > SHRT_MAX) { 968 + pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n", 969 + prog_name, relo_idx, insn_idx, new_val); 970 + return -ERANGE; 971 + } 972 + if (res->fail_memsz_adjust) { 973 + pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. " 974 + "Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n", 975 + prog_name, relo_idx, insn_idx); 976 + goto poison; 977 + } 978 + 979 + orig_val = insn->off; 980 + insn->off = new_val; 981 + pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n", 982 + prog_name, relo_idx, insn_idx, orig_val, new_val); 983 + 984 + if (res->new_sz != res->orig_sz) { 985 + int insn_bytes_sz, insn_bpf_sz; 986 + 987 + insn_bytes_sz = insn_bpf_size_to_bytes(insn); 988 + if (insn_bytes_sz != res->orig_sz) { 989 + pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n", 990 + prog_name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz); 991 + return -EINVAL; 992 + } 993 + 994 + insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz); 995 + if (insn_bpf_sz < 0) { 996 + pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n", 997 + prog_name, relo_idx, insn_idx, res->new_sz); 998 + return -EINVAL; 999 + } 1000 + 1001 + insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code); 1002 + pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n", 1003 + prog_name, relo_idx, insn_idx, res->orig_sz, res->new_sz); 1004 + } 1005 + break; 1006 + case BPF_LD: { 1007 + __u64 imm; 1008 + 1009 + if (!is_ldimm64_insn(insn) || 1010 + insn[0].src_reg != 0 || insn[0].off != 0 || 1011 + insn[1].code != 0 || insn[1].dst_reg != 0 || 1012 + insn[1].src_reg != 0 || insn[1].off != 0) { 1013 + pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n", 1014 + prog_name, relo_idx, insn_idx); 1015 + return -EINVAL; 1016 + } 1017 + 1018 + imm = insn[0].imm + ((__u64)insn[1].imm << 32); 1019 + if (res->validate && imm != orig_val) { 1020 + pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n", 1021 + prog_name, relo_idx, 1022 + insn_idx, (unsigned long long)imm, 1023 + orig_val, new_val); 1024 + return -EINVAL; 1025 + } 1026 + 1027 + insn[0].imm = new_val; 1028 + insn[1].imm = 0; /* currently only 32-bit values are supported */ 1029 + pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n", 1030 + prog_name, relo_idx, insn_idx, 1031 + (unsigned long long)imm, new_val); 1032 + break; 1033 + } 1034 + default: 1035 + pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n", 1036 + prog_name, relo_idx, insn_idx, insn->code, 1037 + insn->src_reg, insn->dst_reg, insn->off, insn->imm); 1038 + return -EINVAL; 1039 + } 1040 + 1041 + return 0; 1042 + } 1043 + 1044 + /* Output spec definition in the format: 1045 + * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>, 1046 + * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b 1047 + */ 1048 + static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec) 1049 + { 1050 + const struct btf_type *t; 1051 + const struct btf_enum *e; 1052 + const char *s; 1053 + __u32 type_id; 1054 + int i; 1055 + 1056 + type_id = spec->root_type_id; 1057 + t = btf__type_by_id(spec->btf, type_id); 1058 + s = btf__name_by_offset(spec->btf, t->name_off); 1059 + 1060 + libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s); 1061 + 1062 + if (core_relo_is_type_based(spec->relo_kind)) 1063 + return; 1064 + 1065 + if (core_relo_is_enumval_based(spec->relo_kind)) { 1066 + t = skip_mods_and_typedefs(spec->btf, type_id, NULL); 1067 + e = btf_enum(t) + spec->raw_spec[0]; 1068 + s = btf__name_by_offset(spec->btf, e->name_off); 1069 + 1070 + libbpf_print(level, "::%s = %u", s, e->val); 1071 + return; 1072 + } 1073 + 1074 + if (core_relo_is_field_based(spec->relo_kind)) { 1075 + for (i = 0; i < spec->len; i++) { 1076 + if (spec->spec[i].name) 1077 + libbpf_print(level, ".%s", spec->spec[i].name); 1078 + else if (i > 0 || spec->spec[i].idx > 0) 1079 + libbpf_print(level, "[%u]", spec->spec[i].idx); 1080 + } 1081 + 1082 + libbpf_print(level, " ("); 1083 + for (i = 0; i < spec->raw_len; i++) 1084 + libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]); 1085 + 1086 + if (spec->bit_offset % 8) 1087 + libbpf_print(level, " @ offset %u.%u)", 1088 + spec->bit_offset / 8, spec->bit_offset % 8); 1089 + else 1090 + libbpf_print(level, " @ offset %u)", spec->bit_offset / 8); 1091 + return; 1092 + } 1093 + } 1094 + 1095 + /* 1096 + * CO-RE relocate single instruction. 1097 + * 1098 + * The outline and important points of the algorithm: 1099 + * 1. For given local type, find corresponding candidate target types. 1100 + * Candidate type is a type with the same "essential" name, ignoring 1101 + * everything after last triple underscore (___). E.g., `sample`, 1102 + * `sample___flavor_one`, `sample___flavor_another_one`, are all candidates 1103 + * for each other. Names with triple underscore are referred to as 1104 + * "flavors" and are useful, among other things, to allow to 1105 + * specify/support incompatible variations of the same kernel struct, which 1106 + * might differ between different kernel versions and/or build 1107 + * configurations. 1108 + * 1109 + * N.B. Struct "flavors" could be generated by bpftool's BTF-to-C 1110 + * converter, when deduplicated BTF of a kernel still contains more than 1111 + * one different types with the same name. In that case, ___2, ___3, etc 1112 + * are appended starting from second name conflict. But start flavors are 1113 + * also useful to be defined "locally", in BPF program, to extract same 1114 + * data from incompatible changes between different kernel 1115 + * versions/configurations. For instance, to handle field renames between 1116 + * kernel versions, one can use two flavors of the struct name with the 1117 + * same common name and use conditional relocations to extract that field, 1118 + * depending on target kernel version. 1119 + * 2. For each candidate type, try to match local specification to this 1120 + * candidate target type. Matching involves finding corresponding 1121 + * high-level spec accessors, meaning that all named fields should match, 1122 + * as well as all array accesses should be within the actual bounds. Also, 1123 + * types should be compatible (see bpf_core_fields_are_compat for details). 1124 + * 3. It is supported and expected that there might be multiple flavors 1125 + * matching the spec. As long as all the specs resolve to the same set of 1126 + * offsets across all candidates, there is no error. If there is any 1127 + * ambiguity, CO-RE relocation will fail. This is necessary to accomodate 1128 + * imprefection of BTF deduplication, which can cause slight duplication of 1129 + * the same BTF type, if some directly or indirectly referenced (by 1130 + * pointer) type gets resolved to different actual types in different 1131 + * object files. If such situation occurs, deduplicated BTF will end up 1132 + * with two (or more) structurally identical types, which differ only in 1133 + * types they refer to through pointer. This should be OK in most cases and 1134 + * is not an error. 1135 + * 4. Candidate types search is performed by linearly scanning through all 1136 + * types in target BTF. It is anticipated that this is overall more 1137 + * efficient memory-wise and not significantly worse (if not better) 1138 + * CPU-wise compared to prebuilding a map from all local type names to 1139 + * a list of candidate type names. It's also sped up by caching resolved 1140 + * list of matching candidates per each local "root" type ID, that has at 1141 + * least one bpf_core_relo associated with it. This list is shared 1142 + * between multiple relocations for the same type ID and is updated as some 1143 + * of the candidates are pruned due to structural incompatibility. 1144 + */ 1145 + int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, 1146 + int insn_idx, 1147 + const struct bpf_core_relo *relo, 1148 + int relo_idx, 1149 + const struct btf *local_btf, 1150 + struct bpf_core_cand_list *cands) 1151 + { 1152 + struct bpf_core_spec local_spec, cand_spec, targ_spec = {}; 1153 + struct bpf_core_relo_res cand_res, targ_res; 1154 + const struct btf_type *local_type; 1155 + const char *local_name; 1156 + __u32 local_id; 1157 + const char *spec_str; 1158 + int i, j, err; 1159 + 1160 + local_id = relo->type_id; 1161 + local_type = btf__type_by_id(local_btf, local_id); 1162 + if (!local_type) 1163 + return -EINVAL; 1164 + 1165 + local_name = btf__name_by_offset(local_btf, local_type->name_off); 1166 + if (!local_name) 1167 + return -EINVAL; 1168 + 1169 + spec_str = btf__name_by_offset(local_btf, relo->access_str_off); 1170 + if (str_is_empty(spec_str)) 1171 + return -EINVAL; 1172 + 1173 + err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec); 1174 + if (err) { 1175 + pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n", 1176 + prog_name, relo_idx, local_id, btf_kind_str(local_type), 1177 + str_is_empty(local_name) ? "<anon>" : local_name, 1178 + spec_str, err); 1179 + return -EINVAL; 1180 + } 1181 + 1182 + pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog_name, 1183 + relo_idx, core_relo_kind_str(relo->kind), relo->kind); 1184 + bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec); 1185 + libbpf_print(LIBBPF_DEBUG, "\n"); 1186 + 1187 + /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */ 1188 + if (relo->kind == BPF_TYPE_ID_LOCAL) { 1189 + targ_res.validate = true; 1190 + targ_res.poison = false; 1191 + targ_res.orig_val = local_spec.root_type_id; 1192 + targ_res.new_val = local_spec.root_type_id; 1193 + goto patch_insn; 1194 + } 1195 + 1196 + /* libbpf doesn't support candidate search for anonymous types */ 1197 + if (str_is_empty(spec_str)) { 1198 + pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n", 1199 + prog_name, relo_idx, core_relo_kind_str(relo->kind), relo->kind); 1200 + return -EOPNOTSUPP; 1201 + } 1202 + 1203 + 1204 + for (i = 0, j = 0; i < cands->len; i++) { 1205 + err = bpf_core_spec_match(&local_spec, cands->cands[i].btf, 1206 + cands->cands[i].id, &cand_spec); 1207 + if (err < 0) { 1208 + pr_warn("prog '%s': relo #%d: error matching candidate #%d ", 1209 + prog_name, relo_idx, i); 1210 + bpf_core_dump_spec(LIBBPF_WARN, &cand_spec); 1211 + libbpf_print(LIBBPF_WARN, ": %d\n", err); 1212 + return err; 1213 + } 1214 + 1215 + pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog_name, 1216 + relo_idx, err == 0 ? "non-matching" : "matching", i); 1217 + bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec); 1218 + libbpf_print(LIBBPF_DEBUG, "\n"); 1219 + 1220 + if (err == 0) 1221 + continue; 1222 + 1223 + err = bpf_core_calc_relo(prog_name, relo, relo_idx, &local_spec, &cand_spec, &cand_res); 1224 + if (err) 1225 + return err; 1226 + 1227 + if (j == 0) { 1228 + targ_res = cand_res; 1229 + targ_spec = cand_spec; 1230 + } else if (cand_spec.bit_offset != targ_spec.bit_offset) { 1231 + /* if there are many field relo candidates, they 1232 + * should all resolve to the same bit offset 1233 + */ 1234 + pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n", 1235 + prog_name, relo_idx, cand_spec.bit_offset, 1236 + targ_spec.bit_offset); 1237 + return -EINVAL; 1238 + } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) { 1239 + /* all candidates should result in the same relocation 1240 + * decision and value, otherwise it's dangerous to 1241 + * proceed due to ambiguity 1242 + */ 1243 + pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n", 1244 + prog_name, relo_idx, 1245 + cand_res.poison ? "failure" : "success", cand_res.new_val, 1246 + targ_res.poison ? "failure" : "success", targ_res.new_val); 1247 + return -EINVAL; 1248 + } 1249 + 1250 + cands->cands[j++] = cands->cands[i]; 1251 + } 1252 + 1253 + /* 1254 + * For BPF_FIELD_EXISTS relo or when used BPF program has field 1255 + * existence checks or kernel version/config checks, it's expected 1256 + * that we might not find any candidates. In this case, if field 1257 + * wasn't found in any candidate, the list of candidates shouldn't 1258 + * change at all, we'll just handle relocating appropriately, 1259 + * depending on relo's kind. 1260 + */ 1261 + if (j > 0) 1262 + cands->len = j; 1263 + 1264 + /* 1265 + * If no candidates were found, it might be both a programmer error, 1266 + * as well as expected case, depending whether instruction w/ 1267 + * relocation is guarded in some way that makes it unreachable (dead 1268 + * code) if relocation can't be resolved. This is handled in 1269 + * bpf_core_patch_insn() uniformly by replacing that instruction with 1270 + * BPF helper call insn (using invalid helper ID). If that instruction 1271 + * is indeed unreachable, then it will be ignored and eliminated by 1272 + * verifier. If it was an error, then verifier will complain and point 1273 + * to a specific instruction number in its log. 1274 + */ 1275 + if (j == 0) { 1276 + pr_debug("prog '%s': relo #%d: no matching targets found\n", 1277 + prog_name, relo_idx); 1278 + 1279 + /* calculate single target relo result explicitly */ 1280 + err = bpf_core_calc_relo(prog_name, relo, relo_idx, &local_spec, NULL, &targ_res); 1281 + if (err) 1282 + return err; 1283 + } 1284 + 1285 + patch_insn: 1286 + /* bpf_core_patch_insn() should know how to handle missing targ_spec */ 1287 + err = bpf_core_patch_insn(prog_name, insn, insn_idx, relo, relo_idx, &targ_res); 1288 + if (err) { 1289 + pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n", 1290 + prog_name, relo_idx, relo->insn_off / 8, err); 1291 + return -EINVAL; 1292 + } 1293 + 1294 + return 0; 1295 + }

+100

tools/lib/bpf/relo_core.h

··· 1 + /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ 2 + /* Copyright (c) 2019 Facebook */ 3 + 4 + #ifndef __RELO_CORE_H 5 + #define __RELO_CORE_H 6 + 7 + /* bpf_core_relo_kind encodes which aspect of captured field/type/enum value 8 + * has to be adjusted by relocations. 9 + */ 10 + enum bpf_core_relo_kind { 11 + BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */ 12 + BPF_FIELD_BYTE_SIZE = 1, /* field size in bytes */ 13 + BPF_FIELD_EXISTS = 2, /* field existence in target kernel */ 14 + BPF_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */ 15 + BPF_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */ 16 + BPF_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */ 17 + BPF_TYPE_ID_LOCAL = 6, /* type ID in local BPF object */ 18 + BPF_TYPE_ID_TARGET = 7, /* type ID in target kernel */ 19 + BPF_TYPE_EXISTS = 8, /* type existence in target kernel */ 20 + BPF_TYPE_SIZE = 9, /* type size in bytes */ 21 + BPF_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */ 22 + BPF_ENUMVAL_VALUE = 11, /* enum value integer value */ 23 + }; 24 + 25 + /* The minimum bpf_core_relo checked by the loader 26 + * 27 + * CO-RE relocation captures the following data: 28 + * - insn_off - instruction offset (in bytes) within a BPF program that needs 29 + * its insn->imm field to be relocated with actual field info; 30 + * - type_id - BTF type ID of the "root" (containing) entity of a relocatable 31 + * type or field; 32 + * - access_str_off - offset into corresponding .BTF string section. String 33 + * interpretation depends on specific relocation kind: 34 + * - for field-based relocations, string encodes an accessed field using 35 + * a sequence of field and array indices, separated by colon (:). It's 36 + * conceptually very close to LLVM's getelementptr ([0]) instruction's 37 + * arguments for identifying offset to a field. 38 + * - for type-based relocations, strings is expected to be just "0"; 39 + * - for enum value-based relocations, string contains an index of enum 40 + * value within its enum type; 41 + * 42 + * Example to provide a better feel. 43 + * 44 + * struct sample { 45 + * int a; 46 + * struct { 47 + * int b[10]; 48 + * }; 49 + * }; 50 + * 51 + * struct sample *s = ...; 52 + * int x = &s->a; // encoded as "0:0" (a is field #0) 53 + * int y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1, 54 + * // b is field #0 inside anon struct, accessing elem #5) 55 + * int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array) 56 + * 57 + * type_id for all relocs in this example will capture BTF type id of 58 + * `struct sample`. 59 + * 60 + * Such relocation is emitted when using __builtin_preserve_access_index() 61 + * Clang built-in, passing expression that captures field address, e.g.: 62 + * 63 + * bpf_probe_read(&dst, sizeof(dst), 64 + * __builtin_preserve_access_index(&src->a.b.c)); 65 + * 66 + * In this case Clang will emit field relocation recording necessary data to 67 + * be able to find offset of embedded `a.b.c` field within `src` struct. 68 + * 69 + * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction 70 + */ 71 + struct bpf_core_relo { 72 + __u32 insn_off; 73 + __u32 type_id; 74 + __u32 access_str_off; 75 + enum bpf_core_relo_kind kind; 76 + }; 77 + 78 + struct bpf_core_cand { 79 + const struct btf *btf; 80 + const struct btf_type *t; 81 + const char *name; 82 + __u32 id; 83 + }; 84 + 85 + /* dynamically sized list of type IDs and its associated struct btf */ 86 + struct bpf_core_cand_list { 87 + struct bpf_core_cand *cands; 88 + int len; 89 + }; 90 + 91 + int bpf_core_apply_relo_insn(const char *prog_name, 92 + struct bpf_insn *insn, int insn_idx, 93 + const struct bpf_core_relo *relo, int relo_idx, 94 + const struct btf *local_btf, 95 + struct bpf_core_cand_list *cands); 96 + int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, 97 + const struct btf *targ_btf, __u32 targ_id); 98 + 99 + size_t bpf_core_essential_name_len(const char *name); 100 + #endif

+6 -5

tools/perf/util/bpf-event.c

··· 223 223 free(info_linear); 224 224 return -1; 225 225 } 226 - if (btf__get_from_id(info->btf_id, &btf)) { 226 + btf = btf__load_from_kernel_by_id(info->btf_id); 227 + if (libbpf_get_error(btf)) { 227 228 pr_debug("%s: failed to get BTF of id %u, aborting\n", __func__, info->btf_id); 228 229 err = -1; 229 - btf = NULL; 230 230 goto out; 231 231 } 232 232 perf_env__fetch_btf(env, info->btf_id, btf); ··· 296 296 297 297 out: 298 298 free(info_linear); 299 - free(btf); 299 + btf__free(btf); 300 300 return err ? -1 : 0; 301 301 } 302 302 ··· 478 478 if (btf_id == 0) 479 479 goto out; 480 480 481 - if (btf__get_from_id(btf_id, &btf)) { 481 + btf = btf__load_from_kernel_by_id(btf_id); 482 + if (libbpf_get_error(btf)) { 482 483 pr_debug("%s: failed to get BTF of id %u, aborting\n", 483 484 __func__, btf_id); 484 485 goto out; ··· 487 486 perf_env__fetch_btf(env, btf_id, btf); 488 487 489 488 out: 490 - free(btf); 489 + btf__free(btf); 491 490 close(fd); 492 491 } 493 492

+9 -3

tools/perf/util/bpf_counter.c

··· 64 64 struct bpf_prog_info_linear *info_linear; 65 65 struct bpf_func_info *func_info; 66 66 const struct btf_type *t; 67 + struct btf *btf = NULL; 67 68 char *name = NULL; 68 - struct btf *btf; 69 69 70 70 info_linear = bpf_program__get_prog_info_linear( 71 71 tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO); ··· 74 74 return NULL; 75 75 } 76 76 77 - if (info_linear->info.btf_id == 0 || 78 - btf__get_from_id(info_linear->info.btf_id, &btf)) { 77 + if (info_linear->info.btf_id == 0) { 79 78 pr_debug("prog FD %d doesn't have valid btf\n", tgt_fd); 79 + goto out; 80 + } 81 + 82 + btf = btf__load_from_kernel_by_id(info_linear->info.btf_id); 83 + if (libbpf_get_error(btf)) { 84 + pr_debug("failed to load btf for prog FD %d\n", tgt_fd); 80 85 goto out; 81 86 } 82 87 ··· 94 89 } 95 90 name = strdup(btf__name_by_offset(btf, t->name_off)); 96 91 out: 92 + btf__free(btf); 97 93 free(info_linear); 98 94 return name; 99 95 }

+7

tools/testing/selftests/bpf/README.rst

··· 19 19 bpf selftests, runs them (by default ``tools/testing/selftests/bpf/test_progs``) and 20 20 saves the resulting output (by default in ``~/.bpf_selftests``). 21 21 22 + Script dependencies: 23 + - clang (preferably built from sources, https://github.com/llvm/llvm-project); 24 + - pahole (preferably built from sources, https://git.kernel.org/pub/scm/devel/pahole/pahole.git/); 25 + - qemu; 26 + - docutils (for ``rst2man``); 27 + - libcap-devel. 28 + 22 29 For more information on about using the script, run: 23 30 24 31 .. code-block:: console

+29 -9

tools/testing/selftests/bpf/netcnt_common.h

··· 6 6 7 7 #define MAX_PERCPU_PACKETS 32 8 8 9 - struct percpu_net_cnt { 10 - __u64 packets; 11 - __u64 bytes; 9 + /* sizeof(struct bpf_local_storage_elem): 10 + * 11 + * It really is about 128 bytes on x86_64, but allocate more to account for 12 + * possible layout changes, different architectures, etc. 13 + * The kernel will wrap up to PAGE_SIZE internally anyway. 14 + */ 15 + #define SIZEOF_BPF_LOCAL_STORAGE_ELEM 256 12 16 13 - __u64 prev_ts; 17 + /* Try to estimate kernel's BPF_LOCAL_STORAGE_MAX_VALUE_SIZE: */ 18 + #define BPF_LOCAL_STORAGE_MAX_VALUE_SIZE (0xFFFF - \ 19 + SIZEOF_BPF_LOCAL_STORAGE_ELEM) 14 20 15 - __u64 prev_packets; 16 - __u64 prev_bytes; 21 + #define PCPU_MIN_UNIT_SIZE 32768 22 + 23 + union percpu_net_cnt { 24 + struct { 25 + __u64 packets; 26 + __u64 bytes; 27 + 28 + __u64 prev_ts; 29 + 30 + __u64 prev_packets; 31 + __u64 prev_bytes; 32 + }; 33 + __u8 data[PCPU_MIN_UNIT_SIZE]; 17 34 }; 18 35 19 - struct net_cnt { 20 - __u64 packets; 21 - __u64 bytes; 36 + union net_cnt { 37 + struct { 38 + __u64 packets; 39 + __u64 bytes; 40 + }; 41 + __u8 data[BPF_LOCAL_STORAGE_MAX_VALUE_SIZE]; 22 42 }; 23 43 24 44 #endif

+76 -9

tools/testing/selftests/bpf/network_helpers.c

··· 66 66 67 67 #define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; }) 68 68 69 - int start_server(int family, int type, const char *addr_str, __u16 port, 70 - int timeout_ms) 69 + static int __start_server(int type, const struct sockaddr *addr, 70 + socklen_t addrlen, int timeout_ms, bool reuseport) 71 71 { 72 - struct sockaddr_storage addr = {}; 73 - socklen_t len; 72 + int on = 1; 74 73 int fd; 75 74 76 - if (make_sockaddr(family, addr_str, port, &addr, &len)) 77 - return -1; 78 - 79 - fd = socket(family, type, 0); 75 + fd = socket(addr->sa_family, type, 0); 80 76 if (fd < 0) { 81 77 log_err("Failed to create server socket"); 82 78 return -1; ··· 81 85 if (settimeo(fd, timeout_ms)) 82 86 goto error_close; 83 87 84 - if (bind(fd, (const struct sockaddr *)&addr, len) < 0) { 88 + if (reuseport && 89 + setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on))) { 90 + log_err("Failed to set SO_REUSEPORT"); 91 + return -1; 92 + } 93 + 94 + if (bind(fd, addr, addrlen) < 0) { 85 95 log_err("Failed to bind socket"); 86 96 goto error_close; 87 97 } ··· 104 102 error_close: 105 103 save_errno_close(fd); 106 104 return -1; 105 + } 106 + 107 + int start_server(int family, int type, const char *addr_str, __u16 port, 108 + int timeout_ms) 109 + { 110 + struct sockaddr_storage addr; 111 + socklen_t addrlen; 112 + 113 + if (make_sockaddr(family, addr_str, port, &addr, &addrlen)) 114 + return -1; 115 + 116 + return __start_server(type, (struct sockaddr *)&addr, 117 + addrlen, timeout_ms, false); 118 + } 119 + 120 + int *start_reuseport_server(int family, int type, const char *addr_str, 121 + __u16 port, int timeout_ms, unsigned int nr_listens) 122 + { 123 + struct sockaddr_storage addr; 124 + unsigned int nr_fds = 0; 125 + socklen_t addrlen; 126 + int *fds; 127 + 128 + if (!nr_listens) 129 + return NULL; 130 + 131 + if (make_sockaddr(family, addr_str, port, &addr, &addrlen)) 132 + return NULL; 133 + 134 + fds = malloc(sizeof(*fds) * nr_listens); 135 + if (!fds) 136 + return NULL; 137 + 138 + fds[0] = __start_server(type, (struct sockaddr *)&addr, addrlen, 139 + timeout_ms, true); 140 + if (fds[0] == -1) 141 + goto close_fds; 142 + nr_fds = 1; 143 + 144 + if (getsockname(fds[0], (struct sockaddr *)&addr, &addrlen)) 145 + goto close_fds; 146 + 147 + for (; nr_fds < nr_listens; nr_fds++) { 148 + fds[nr_fds] = __start_server(type, (struct sockaddr *)&addr, 149 + addrlen, timeout_ms, true); 150 + if (fds[nr_fds] == -1) 151 + goto close_fds; 152 + } 153 + 154 + return fds; 155 + 156 + close_fds: 157 + free_fds(fds, nr_fds); 158 + return NULL; 159 + } 160 + 161 + void free_fds(int *fds, unsigned int nr_close_fds) 162 + { 163 + if (fds) { 164 + while (nr_close_fds) 165 + close(fds[--nr_close_fds]); 166 + free(fds); 167 + } 107 168 } 108 169 109 170 int fastopen_connect(int server_fd, const char *data, unsigned int data_len, ··· 282 217 if (family == AF_INET) { 283 218 struct sockaddr_in *sin = (void *)addr; 284 219 220 + memset(addr, 0, sizeof(*sin)); 285 221 sin->sin_family = AF_INET; 286 222 sin->sin_port = htons(port); 287 223 if (addr_str && ··· 296 230 } else if (family == AF_INET6) { 297 231 struct sockaddr_in6 *sin6 = (void *)addr; 298 232 233 + memset(addr, 0, sizeof(*sin6)); 299 234 sin6->sin6_family = AF_INET6; 300 235 sin6->sin6_port = htons(port); 301 236 if (addr_str &&

+4

tools/testing/selftests/bpf/network_helpers.h

··· 36 36 int settimeo(int fd, int timeout_ms); 37 37 int start_server(int family, int type, const char *addr, __u16 port, 38 38 int timeout_ms); 39 + int *start_reuseport_server(int family, int type, const char *addr_str, 40 + __u16 port, int timeout_ms, 41 + unsigned int nr_listens); 42 + void free_fds(int *fds, unsigned int nr_close_fds); 39 43 int connect_to_fd(int server_fd, int timeout_ms); 40 44 int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms); 41 45 int fastopen_connect(int server_fd, const char *data, unsigned int data_len,

+226

tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2021 Facebook */ 3 + #define _GNU_SOURCE 4 + #include <sched.h> 5 + #include <test_progs.h> 6 + #include "network_helpers.h" 7 + #include "bpf_dctcp.skel.h" 8 + #include "bpf_cubic.skel.h" 9 + #include "bpf_iter_setsockopt.skel.h" 10 + 11 + static int create_netns(void) 12 + { 13 + if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns")) 14 + return -1; 15 + 16 + if (!ASSERT_OK(system("ip link set dev lo up"), "bring up lo")) 17 + return -1; 18 + 19 + return 0; 20 + } 21 + 22 + static unsigned int set_bpf_cubic(int *fds, unsigned int nr_fds) 23 + { 24 + unsigned int i; 25 + 26 + for (i = 0; i < nr_fds; i++) { 27 + if (setsockopt(fds[i], SOL_TCP, TCP_CONGESTION, "bpf_cubic", 28 + sizeof("bpf_cubic"))) 29 + return i; 30 + } 31 + 32 + return nr_fds; 33 + } 34 + 35 + static unsigned int check_bpf_dctcp(int *fds, unsigned int nr_fds) 36 + { 37 + char tcp_cc[16]; 38 + socklen_t optlen = sizeof(tcp_cc); 39 + unsigned int i; 40 + 41 + for (i = 0; i < nr_fds; i++) { 42 + if (getsockopt(fds[i], SOL_TCP, TCP_CONGESTION, 43 + tcp_cc, &optlen) || 44 + strcmp(tcp_cc, "bpf_dctcp")) 45 + return i; 46 + } 47 + 48 + return nr_fds; 49 + } 50 + 51 + static int *make_established(int listen_fd, unsigned int nr_est, 52 + int **paccepted_fds) 53 + { 54 + int *est_fds, *accepted_fds; 55 + unsigned int i; 56 + 57 + est_fds = malloc(sizeof(*est_fds) * nr_est); 58 + if (!est_fds) 59 + return NULL; 60 + 61 + accepted_fds = malloc(sizeof(*accepted_fds) * nr_est); 62 + if (!accepted_fds) { 63 + free(est_fds); 64 + return NULL; 65 + } 66 + 67 + for (i = 0; i < nr_est; i++) { 68 + est_fds[i] = connect_to_fd(listen_fd, 0); 69 + if (est_fds[i] == -1) 70 + break; 71 + if (set_bpf_cubic(&est_fds[i], 1) != 1) { 72 + close(est_fds[i]); 73 + break; 74 + } 75 + 76 + accepted_fds[i] = accept(listen_fd, NULL, 0); 77 + if (accepted_fds[i] == -1) { 78 + close(est_fds[i]); 79 + break; 80 + } 81 + } 82 + 83 + if (!ASSERT_EQ(i, nr_est, "create established fds")) { 84 + free_fds(accepted_fds, i); 85 + free_fds(est_fds, i); 86 + return NULL; 87 + } 88 + 89 + *paccepted_fds = accepted_fds; 90 + return est_fds; 91 + } 92 + 93 + static unsigned short get_local_port(int fd) 94 + { 95 + struct sockaddr_in6 addr; 96 + socklen_t addrlen = sizeof(addr); 97 + 98 + if (!getsockname(fd, &addr, &addrlen)) 99 + return ntohs(addr.sin6_port); 100 + 101 + return 0; 102 + } 103 + 104 + static void do_bpf_iter_setsockopt(struct bpf_iter_setsockopt *iter_skel, 105 + bool random_retry) 106 + { 107 + int *reuse_listen_fds = NULL, *accepted_fds = NULL, *est_fds = NULL; 108 + unsigned int nr_reuse_listens = 256, nr_est = 256; 109 + int err, iter_fd = -1, listen_fd = -1; 110 + char buf; 111 + 112 + /* Prepare non-reuseport listen_fd */ 113 + listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); 114 + if (!ASSERT_GE(listen_fd, 0, "start_server")) 115 + return; 116 + if (!ASSERT_EQ(set_bpf_cubic(&listen_fd, 1), 1, 117 + "set listen_fd to cubic")) 118 + goto done; 119 + iter_skel->bss->listen_hport = get_local_port(listen_fd); 120 + if (!ASSERT_NEQ(iter_skel->bss->listen_hport, 0, 121 + "get_local_port(listen_fd)")) 122 + goto done; 123 + 124 + /* Connect to non-reuseport listen_fd */ 125 + est_fds = make_established(listen_fd, nr_est, &accepted_fds); 126 + if (!ASSERT_OK_PTR(est_fds, "create established")) 127 + goto done; 128 + 129 + /* Prepare reuseport listen fds */ 130 + reuse_listen_fds = start_reuseport_server(AF_INET6, SOCK_STREAM, 131 + "::1", 0, 0, 132 + nr_reuse_listens); 133 + if (!ASSERT_OK_PTR(reuse_listen_fds, "start_reuseport_server")) 134 + goto done; 135 + if (!ASSERT_EQ(set_bpf_cubic(reuse_listen_fds, nr_reuse_listens), 136 + nr_reuse_listens, "set reuse_listen_fds to cubic")) 137 + goto done; 138 + iter_skel->bss->reuse_listen_hport = get_local_port(reuse_listen_fds[0]); 139 + if (!ASSERT_NEQ(iter_skel->bss->reuse_listen_hport, 0, 140 + "get_local_port(reuse_listen_fds[0])")) 141 + goto done; 142 + 143 + /* Run bpf tcp iter to switch from bpf_cubic to bpf_dctcp */ 144 + iter_skel->bss->random_retry = random_retry; 145 + iter_fd = bpf_iter_create(bpf_link__fd(iter_skel->links.change_tcp_cc)); 146 + if (!ASSERT_GE(iter_fd, 0, "create iter_fd")) 147 + goto done; 148 + 149 + while ((err = read(iter_fd, &buf, sizeof(buf))) == -1 && 150 + errno == EAGAIN) 151 + ; 152 + if (!ASSERT_OK(err, "read iter error")) 153 + goto done; 154 + 155 + /* Check reuseport listen fds for dctcp */ 156 + ASSERT_EQ(check_bpf_dctcp(reuse_listen_fds, nr_reuse_listens), 157 + nr_reuse_listens, 158 + "check reuse_listen_fds dctcp"); 159 + 160 + /* Check non reuseport listen fd for dctcp */ 161 + ASSERT_EQ(check_bpf_dctcp(&listen_fd, 1), 1, 162 + "check listen_fd dctcp"); 163 + 164 + /* Check established fds for dctcp */ 165 + ASSERT_EQ(check_bpf_dctcp(est_fds, nr_est), nr_est, 166 + "check est_fds dctcp"); 167 + 168 + /* Check accepted fds for dctcp */ 169 + ASSERT_EQ(check_bpf_dctcp(accepted_fds, nr_est), nr_est, 170 + "check accepted_fds dctcp"); 171 + 172 + done: 173 + if (iter_fd != -1) 174 + close(iter_fd); 175 + if (listen_fd != -1) 176 + close(listen_fd); 177 + free_fds(reuse_listen_fds, nr_reuse_listens); 178 + free_fds(accepted_fds, nr_est); 179 + free_fds(est_fds, nr_est); 180 + } 181 + 182 + void test_bpf_iter_setsockopt(void) 183 + { 184 + struct bpf_iter_setsockopt *iter_skel = NULL; 185 + struct bpf_cubic *cubic_skel = NULL; 186 + struct bpf_dctcp *dctcp_skel = NULL; 187 + struct bpf_link *cubic_link = NULL; 188 + struct bpf_link *dctcp_link = NULL; 189 + 190 + if (create_netns()) 191 + return; 192 + 193 + /* Load iter_skel */ 194 + iter_skel = bpf_iter_setsockopt__open_and_load(); 195 + if (!ASSERT_OK_PTR(iter_skel, "iter_skel")) 196 + return; 197 + iter_skel->links.change_tcp_cc = bpf_program__attach_iter(iter_skel->progs.change_tcp_cc, NULL); 198 + if (!ASSERT_OK_PTR(iter_skel->links.change_tcp_cc, "attach iter")) 199 + goto done; 200 + 201 + /* Load bpf_cubic */ 202 + cubic_skel = bpf_cubic__open_and_load(); 203 + if (!ASSERT_OK_PTR(cubic_skel, "cubic_skel")) 204 + goto done; 205 + cubic_link = bpf_map__attach_struct_ops(cubic_skel->maps.cubic); 206 + if (!ASSERT_OK_PTR(cubic_link, "cubic_link")) 207 + goto done; 208 + 209 + /* Load bpf_dctcp */ 210 + dctcp_skel = bpf_dctcp__open_and_load(); 211 + if (!ASSERT_OK_PTR(dctcp_skel, "dctcp_skel")) 212 + goto done; 213 + dctcp_link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp); 214 + if (!ASSERT_OK_PTR(dctcp_link, "dctcp_link")) 215 + goto done; 216 + 217 + do_bpf_iter_setsockopt(iter_skel, true); 218 + do_bpf_iter_setsockopt(iter_skel, false); 219 + 220 + done: 221 + bpf_link__destroy(cubic_link); 222 + bpf_link__destroy(dctcp_link); 223 + bpf_cubic__destroy(cubic_skel); 224 + bpf_dctcp__destroy(dctcp_skel); 225 + bpf_iter_setsockopt__destroy(iter_skel); 226 + }

+3 -1

tools/testing/selftests/bpf/prog_tests/btf.c

··· 4350 4350 goto done; 4351 4351 } 4352 4352 4353 - err = btf__get_from_id(info.btf_id, &btf); 4353 + btf = btf__load_from_kernel_by_id(info.btf_id); 4354 + err = libbpf_get_error(btf); 4354 4355 if (CHECK(err, "cannot get btf from kernel, err: %d", err)) 4355 4356 goto done; 4356 4357 ··· 4387 4386 fprintf(stderr, "OK"); 4388 4387 4389 4388 done: 4389 + btf__free(btf); 4390 4390 free(func_info); 4391 4391 bpf_object__close(obj); 4392 4392 }

+615

tools/testing/selftests/bpf/prog_tests/btf_dump.c

··· 232 232 btf__free(btf); 233 233 } 234 234 235 + #define STRSIZE 4096 236 + 237 + static void btf_dump_snprintf(void *ctx, const char *fmt, va_list args) 238 + { 239 + char *s = ctx, new[STRSIZE]; 240 + 241 + vsnprintf(new, STRSIZE, fmt, args); 242 + if (strlen(s) < STRSIZE) 243 + strncat(s, new, STRSIZE - strlen(s) - 1); 244 + } 245 + 246 + static int btf_dump_data(struct btf *btf, struct btf_dump *d, 247 + char *name, char *prefix, __u64 flags, void *ptr, 248 + size_t ptr_sz, char *str, const char *expected_val) 249 + { 250 + DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts); 251 + size_t type_sz; 252 + __s32 type_id; 253 + int ret = 0; 254 + 255 + if (flags & BTF_F_COMPACT) 256 + opts.compact = true; 257 + if (flags & BTF_F_NONAME) 258 + opts.skip_names = true; 259 + if (flags & BTF_F_ZERO) 260 + opts.emit_zeroes = true; 261 + if (prefix) { 262 + ASSERT_STRNEQ(name, prefix, strlen(prefix), 263 + "verify prefix match"); 264 + name += strlen(prefix) + 1; 265 + } 266 + type_id = btf__find_by_name(btf, name); 267 + if (!ASSERT_GE(type_id, 0, "find type id")) 268 + return -ENOENT; 269 + type_sz = btf__resolve_size(btf, type_id); 270 + str[0] = '\0'; 271 + ret = btf_dump__dump_type_data(d, type_id, ptr, ptr_sz, &opts); 272 + if (type_sz <= ptr_sz) { 273 + if (!ASSERT_EQ(ret, type_sz, "failed/unexpected type_sz")) 274 + return -EINVAL; 275 + } else { 276 + if (!ASSERT_EQ(ret, -E2BIG, "failed to return -E2BIG")) 277 + return -EINVAL; 278 + } 279 + if (!ASSERT_STREQ(str, expected_val, "ensure expected/actual match")) 280 + return -EFAULT; 281 + return 0; 282 + } 283 + 284 + #define TEST_BTF_DUMP_DATA(_b, _d, _prefix, _str, _type, _flags, \ 285 + _expected, ...) \ 286 + do { \ 287 + char __ptrtype[64] = #_type; \ 288 + char *_ptrtype = (char *)__ptrtype; \ 289 + _type _ptrdata = __VA_ARGS__; \ 290 + void *_ptr = &_ptrdata; \ 291 + \ 292 + (void) btf_dump_data(_b, _d, _ptrtype, _prefix, _flags, \ 293 + _ptr, sizeof(_type), _str, \ 294 + _expected); \ 295 + } while (0) 296 + 297 + /* Use where expected data string matches its stringified declaration */ 298 + #define TEST_BTF_DUMP_DATA_C(_b, _d, _prefix, _str, _type, _flags, \ 299 + ...) \ 300 + TEST_BTF_DUMP_DATA(_b, _d, _prefix, _str, _type, _flags, \ 301 + "(" #_type ")" #__VA_ARGS__, __VA_ARGS__) 302 + 303 + /* overflow test; pass typesize < expected type size, ensure E2BIG returned */ 304 + #define TEST_BTF_DUMP_DATA_OVER(_b, _d, _prefix, _str, _type, _type_sz, \ 305 + _expected, ...) \ 306 + do { \ 307 + char __ptrtype[64] = #_type; \ 308 + char *_ptrtype = (char *)__ptrtype; \ 309 + _type _ptrdata = __VA_ARGS__; \ 310 + void *_ptr = &_ptrdata; \ 311 + \ 312 + (void) btf_dump_data(_b, _d, _ptrtype, _prefix, 0, \ 313 + _ptr, _type_sz, _str, _expected); \ 314 + } while (0) 315 + 316 + #define TEST_BTF_DUMP_VAR(_b, _d, _prefix, _str, _var, _type, _flags, \ 317 + _expected, ...) \ 318 + do { \ 319 + _type _ptrdata = __VA_ARGS__; \ 320 + void *_ptr = &_ptrdata; \ 321 + \ 322 + (void) btf_dump_data(_b, _d, _var, _prefix, _flags, \ 323 + _ptr, sizeof(_type), _str, \ 324 + _expected); \ 325 + } while (0) 326 + 327 + static void test_btf_dump_int_data(struct btf *btf, struct btf_dump *d, 328 + char *str) 329 + { 330 + #ifdef __SIZEOF_INT128__ 331 + __int128 i = 0xffffffffffffffff; 332 + 333 + /* this dance is required because we cannot directly initialize 334 + * a 128-bit value to anything larger than a 64-bit value. 335 + */ 336 + i = (i << 64) | (i - 1); 337 + #endif 338 + /* simple int */ 339 + TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, int, BTF_F_COMPACT, 1234); 340 + TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_NONAME, 341 + "1234", 1234); 342 + TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, 0, "(int)1234", 1234); 343 + 344 + /* zero value should be printed at toplevel */ 345 + TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT, "(int)0", 0); 346 + TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_NONAME, 347 + "0", 0); 348 + TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_ZERO, 349 + "(int)0", 0); 350 + TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, 351 + BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO, 352 + "0", 0); 353 + TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, int, BTF_F_COMPACT, -4567); 354 + TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_NONAME, 355 + "-4567", -4567); 356 + TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, 0, "(int)-4567", -4567); 357 + 358 + TEST_BTF_DUMP_DATA_OVER(btf, d, NULL, str, int, sizeof(int)-1, "", 1); 359 + 360 + #ifdef __SIZEOF_INT128__ 361 + TEST_BTF_DUMP_DATA(btf, d, NULL, str, __int128, BTF_F_COMPACT, 362 + "(__int128)0xffffffffffffffff", 363 + 0xffffffffffffffff); 364 + ASSERT_OK(btf_dump_data(btf, d, "__int128", NULL, 0, &i, 16, str, 365 + "(__int128)0xfffffffffffffffffffffffffffffffe"), 366 + "dump __int128"); 367 + #endif 368 + } 369 + 370 + static void test_btf_dump_float_data(struct btf *btf, struct btf_dump *d, 371 + char *str) 372 + { 373 + float t1 = 1.234567; 374 + float t2 = -1.234567; 375 + float t3 = 0.0; 376 + double t4 = 5.678912; 377 + double t5 = -5.678912; 378 + double t6 = 0.0; 379 + long double t7 = 9.876543; 380 + long double t8 = -9.876543; 381 + long double t9 = 0.0; 382 + 383 + /* since the kernel does not likely have any float types in its BTF, we 384 + * will need to add some of various sizes. 385 + */ 386 + 387 + ASSERT_GT(btf__add_float(btf, "test_float", 4), 0, "add float"); 388 + ASSERT_OK(btf_dump_data(btf, d, "test_float", NULL, 0, &t1, 4, str, 389 + "(test_float)1.234567"), "dump float"); 390 + ASSERT_OK(btf_dump_data(btf, d, "test_float", NULL, 0, &t2, 4, str, 391 + "(test_float)-1.234567"), "dump float"); 392 + ASSERT_OK(btf_dump_data(btf, d, "test_float", NULL, 0, &t3, 4, str, 393 + "(test_float)0.000000"), "dump float"); 394 + 395 + ASSERT_GT(btf__add_float(btf, "test_double", 8), 0, "add_double"); 396 + ASSERT_OK(btf_dump_data(btf, d, "test_double", NULL, 0, &t4, 8, str, 397 + "(test_double)5.678912"), "dump double"); 398 + ASSERT_OK(btf_dump_data(btf, d, "test_double", NULL, 0, &t5, 8, str, 399 + "(test_double)-5.678912"), "dump double"); 400 + ASSERT_OK(btf_dump_data(btf, d, "test_double", NULL, 0, &t6, 8, str, 401 + "(test_double)0.000000"), "dump double"); 402 + 403 + ASSERT_GT(btf__add_float(btf, "test_long_double", 16), 0, "add long double"); 404 + ASSERT_OK(btf_dump_data(btf, d, "test_long_double", NULL, 0, &t7, 16, 405 + str, "(test_long_double)9.876543"), 406 + "dump long_double"); 407 + ASSERT_OK(btf_dump_data(btf, d, "test_long_double", NULL, 0, &t8, 16, 408 + str, "(test_long_double)-9.876543"), 409 + "dump long_double"); 410 + ASSERT_OK(btf_dump_data(btf, d, "test_long_double", NULL, 0, &t9, 16, 411 + str, "(test_long_double)0.000000"), 412 + "dump long_double"); 413 + } 414 + 415 + static void test_btf_dump_char_data(struct btf *btf, struct btf_dump *d, 416 + char *str) 417 + { 418 + /* simple char */ 419 + TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, char, BTF_F_COMPACT, 100); 420 + TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_NONAME, 421 + "100", 100); 422 + TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, 0, "(char)100", 100); 423 + /* zero value should be printed at toplevel */ 424 + TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT, 425 + "(char)0", 0); 426 + TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_NONAME, 427 + "0", 0); 428 + TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_ZERO, 429 + "(char)0", 0); 430 + TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO, 431 + "0", 0); 432 + TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, 0, "(char)0", 0); 433 + 434 + TEST_BTF_DUMP_DATA_OVER(btf, d, NULL, str, char, sizeof(char)-1, "", 100); 435 + } 436 + 437 + static void test_btf_dump_typedef_data(struct btf *btf, struct btf_dump *d, 438 + char *str) 439 + { 440 + /* simple typedef */ 441 + TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, uint64_t, BTF_F_COMPACT, 100); 442 + TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT | BTF_F_NONAME, 443 + "1", 1); 444 + TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, 0, "(u64)1", 1); 445 + /* zero value should be printed at toplevel */ 446 + TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT, "(u64)0", 0); 447 + TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT | BTF_F_NONAME, 448 + "0", 0); 449 + TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT | BTF_F_ZERO, 450 + "(u64)0", 0); 451 + TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, 452 + BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO, 453 + "0", 0); 454 + TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, 0, "(u64)0", 0); 455 + 456 + /* typedef struct */ 457 + TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, atomic_t, BTF_F_COMPACT, 458 + {.counter = (int)1,}); 459 + TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT | BTF_F_NONAME, 460 + "{1,}", { .counter = 1 }); 461 + TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, 0, 462 + "(atomic_t){\n" 463 + " .counter = (int)1,\n" 464 + "}", 465 + {.counter = 1,}); 466 + /* typedef with 0 value should be printed at toplevel */ 467 + TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT, "(atomic_t){}", 468 + {.counter = 0,}); 469 + TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT | BTF_F_NONAME, 470 + "{}", {.counter = 0,}); 471 + TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, 0, 472 + "(atomic_t){\n" 473 + "}", 474 + {.counter = 0,}); 475 + TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT | BTF_F_ZERO, 476 + "(atomic_t){.counter = (int)0,}", 477 + {.counter = 0,}); 478 + TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, 479 + BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO, 480 + "{0,}", {.counter = 0,}); 481 + TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_ZERO, 482 + "(atomic_t){\n" 483 + " .counter = (int)0,\n" 484 + "}", 485 + { .counter = 0,}); 486 + 487 + /* overflow should show type but not value since it overflows */ 488 + TEST_BTF_DUMP_DATA_OVER(btf, d, NULL, str, atomic_t, sizeof(atomic_t)-1, 489 + "(atomic_t){\n", { .counter = 1}); 490 + } 491 + 492 + static void test_btf_dump_enum_data(struct btf *btf, struct btf_dump *d, 493 + char *str) 494 + { 495 + /* enum where enum value does (and does not) exist */ 496 + TEST_BTF_DUMP_DATA_C(btf, d, "enum", str, enum bpf_cmd, BTF_F_COMPACT, 497 + BPF_MAP_CREATE); 498 + TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, BTF_F_COMPACT, 499 + "(enum bpf_cmd)BPF_MAP_CREATE", 0); 500 + TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, 501 + BTF_F_COMPACT | BTF_F_NONAME, 502 + "BPF_MAP_CREATE", 503 + BPF_MAP_CREATE); 504 + TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, 0, 505 + "(enum bpf_cmd)BPF_MAP_CREATE", 506 + BPF_MAP_CREATE); 507 + TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, 508 + BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO, 509 + "BPF_MAP_CREATE", 0); 510 + TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, 511 + BTF_F_COMPACT | BTF_F_ZERO, 512 + "(enum bpf_cmd)BPF_MAP_CREATE", 513 + BPF_MAP_CREATE); 514 + TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, 515 + BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO, 516 + "BPF_MAP_CREATE", BPF_MAP_CREATE); 517 + TEST_BTF_DUMP_DATA_C(btf, d, "enum", str, enum bpf_cmd, BTF_F_COMPACT, 2000); 518 + TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, 519 + BTF_F_COMPACT | BTF_F_NONAME, 520 + "2000", 2000); 521 + TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, 0, 522 + "(enum bpf_cmd)2000", 2000); 523 + 524 + TEST_BTF_DUMP_DATA_OVER(btf, d, "enum", str, enum bpf_cmd, 525 + sizeof(enum bpf_cmd) - 1, "", BPF_MAP_CREATE); 526 + } 527 + 528 + static void test_btf_dump_struct_data(struct btf *btf, struct btf_dump *d, 529 + char *str) 530 + { 531 + DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts); 532 + char zero_data[512] = { }; 533 + char type_data[512]; 534 + void *fops = type_data; 535 + void *skb = type_data; 536 + size_t type_sz; 537 + __s32 type_id; 538 + char *cmpstr; 539 + int ret; 540 + 541 + memset(type_data, 255, sizeof(type_data)); 542 + 543 + /* simple struct */ 544 + TEST_BTF_DUMP_DATA_C(btf, d, "struct", str, struct btf_enum, BTF_F_COMPACT, 545 + {.name_off = (__u32)3,.val = (__s32)-1,}); 546 + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, 547 + BTF_F_COMPACT | BTF_F_NONAME, 548 + "{3,-1,}", 549 + { .name_off = 3, .val = -1,}); 550 + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, 0, 551 + "(struct btf_enum){\n" 552 + " .name_off = (__u32)3,\n" 553 + " .val = (__s32)-1,\n" 554 + "}", 555 + { .name_off = 3, .val = -1,}); 556 + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, 557 + BTF_F_COMPACT | BTF_F_NONAME, 558 + "{-1,}", 559 + { .name_off = 0, .val = -1,}); 560 + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, 561 + BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO, 562 + "{0,-1,}", 563 + { .name_off = 0, .val = -1,}); 564 + /* empty struct should be printed */ 565 + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, BTF_F_COMPACT, 566 + "(struct btf_enum){}", 567 + { .name_off = 0, .val = 0,}); 568 + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, 569 + BTF_F_COMPACT | BTF_F_NONAME, 570 + "{}", 571 + { .name_off = 0, .val = 0,}); 572 + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, 0, 573 + "(struct btf_enum){\n" 574 + "}", 575 + { .name_off = 0, .val = 0,}); 576 + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, 577 + BTF_F_COMPACT | BTF_F_ZERO, 578 + "(struct btf_enum){.name_off = (__u32)0,.val = (__s32)0,}", 579 + { .name_off = 0, .val = 0,}); 580 + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, 581 + BTF_F_ZERO, 582 + "(struct btf_enum){\n" 583 + " .name_off = (__u32)0,\n" 584 + " .val = (__s32)0,\n" 585 + "}", 586 + { .name_off = 0, .val = 0,}); 587 + 588 + /* struct with pointers */ 589 + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, BTF_F_COMPACT, 590 + "(struct list_head){.next = (struct list_head *)0x1,}", 591 + { .next = (struct list_head *)1 }); 592 + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, 0, 593 + "(struct list_head){\n" 594 + " .next = (struct list_head *)0x1,\n" 595 + "}", 596 + { .next = (struct list_head *)1 }); 597 + /* NULL pointer should not be displayed */ 598 + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, BTF_F_COMPACT, 599 + "(struct list_head){}", 600 + { .next = (struct list_head *)0 }); 601 + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, 0, 602 + "(struct list_head){\n" 603 + "}", 604 + { .next = (struct list_head *)0 }); 605 + 606 + /* struct with function pointers */ 607 + type_id = btf__find_by_name(btf, "file_operations"); 608 + if (ASSERT_GT(type_id, 0, "find type id")) { 609 + type_sz = btf__resolve_size(btf, type_id); 610 + str[0] = '\0'; 611 + 612 + ret = btf_dump__dump_type_data(d, type_id, fops, type_sz, &opts); 613 + ASSERT_EQ(ret, type_sz, 614 + "unexpected return value dumping file_operations"); 615 + cmpstr = 616 + "(struct file_operations){\n" 617 + " .owner = (struct module *)0xffffffffffffffff,\n" 618 + " .llseek = (loff_t (*)(struct file *, loff_t, int))0xffffffffffffffff,"; 619 + 620 + ASSERT_STRNEQ(str, cmpstr, strlen(cmpstr), "file_operations"); 621 + } 622 + 623 + /* struct with char array */ 624 + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, BTF_F_COMPACT, 625 + "(struct bpf_prog_info){.name = (char[16])['f','o','o',],}", 626 + { .name = "foo",}); 627 + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, 628 + BTF_F_COMPACT | BTF_F_NONAME, 629 + "{['f','o','o',],}", 630 + {.name = "foo",}); 631 + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, 0, 632 + "(struct bpf_prog_info){\n" 633 + " .name = (char[16])[\n" 634 + " 'f',\n" 635 + " 'o',\n" 636 + " 'o',\n" 637 + " ],\n" 638 + "}", 639 + {.name = "foo",}); 640 + /* leading null char means do not display string */ 641 + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, BTF_F_COMPACT, 642 + "(struct bpf_prog_info){}", 643 + {.name = {'\0', 'f', 'o', 'o'}}); 644 + /* handle non-printable characters */ 645 + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, BTF_F_COMPACT, 646 + "(struct bpf_prog_info){.name = (char[16])[1,2,3,],}", 647 + { .name = {1, 2, 3, 0}}); 648 + 649 + /* struct with non-char array */ 650 + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, BTF_F_COMPACT, 651 + "(struct __sk_buff){.cb = (__u32[5])[1,2,3,4,5,],}", 652 + { .cb = {1, 2, 3, 4, 5,},}); 653 + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, 654 + BTF_F_COMPACT | BTF_F_NONAME, 655 + "{[1,2,3,4,5,],}", 656 + { .cb = { 1, 2, 3, 4, 5},}); 657 + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, 0, 658 + "(struct __sk_buff){\n" 659 + " .cb = (__u32[5])[\n" 660 + " 1,\n" 661 + " 2,\n" 662 + " 3,\n" 663 + " 4,\n" 664 + " 5,\n" 665 + " ],\n" 666 + "}", 667 + { .cb = { 1, 2, 3, 4, 5},}); 668 + /* For non-char, arrays, show non-zero values only */ 669 + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, BTF_F_COMPACT, 670 + "(struct __sk_buff){.cb = (__u32[5])[0,0,1,0,0,],}", 671 + { .cb = { 0, 0, 1, 0, 0},}); 672 + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, 0, 673 + "(struct __sk_buff){\n" 674 + " .cb = (__u32[5])[\n" 675 + " 0,\n" 676 + " 0,\n" 677 + " 1,\n" 678 + " 0,\n" 679 + " 0,\n" 680 + " ],\n" 681 + "}", 682 + { .cb = { 0, 0, 1, 0, 0},}); 683 + 684 + /* struct with bitfields */ 685 + TEST_BTF_DUMP_DATA_C(btf, d, "struct", str, struct bpf_insn, BTF_F_COMPACT, 686 + {.code = (__u8)1,.dst_reg = (__u8)0x2,.src_reg = (__u8)0x3,.off = (__s16)4,.imm = (__s32)5,}); 687 + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_insn, 688 + BTF_F_COMPACT | BTF_F_NONAME, 689 + "{1,0x2,0x3,4,5,}", 690 + { .code = 1, .dst_reg = 0x2, .src_reg = 0x3, .off = 4, 691 + .imm = 5,}); 692 + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_insn, 0, 693 + "(struct bpf_insn){\n" 694 + " .code = (__u8)1,\n" 695 + " .dst_reg = (__u8)0x2,\n" 696 + " .src_reg = (__u8)0x3,\n" 697 + " .off = (__s16)4,\n" 698 + " .imm = (__s32)5,\n" 699 + "}", 700 + {.code = 1, .dst_reg = 2, .src_reg = 3, .off = 4, .imm = 5}); 701 + 702 + /* zeroed bitfields should not be displayed */ 703 + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_insn, BTF_F_COMPACT, 704 + "(struct bpf_insn){.dst_reg = (__u8)0x1,}", 705 + { .code = 0, .dst_reg = 1}); 706 + 707 + /* struct with enum bitfield */ 708 + type_id = btf__find_by_name(btf, "fs_context"); 709 + if (ASSERT_GT(type_id, 0, "find fs_context")) { 710 + type_sz = btf__resolve_size(btf, type_id); 711 + str[0] = '\0'; 712 + 713 + opts.emit_zeroes = true; 714 + ret = btf_dump__dump_type_data(d, type_id, zero_data, type_sz, &opts); 715 + ASSERT_EQ(ret, type_sz, 716 + "unexpected return value dumping fs_context"); 717 + 718 + ASSERT_NEQ(strstr(str, "FS_CONTEXT_FOR_MOUNT"), NULL, 719 + "bitfield value not present"); 720 + } 721 + 722 + /* struct with nested anon union */ 723 + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_sock_ops, BTF_F_COMPACT, 724 + "(struct bpf_sock_ops){.op = (__u32)1,(union){.args = (__u32[4])[1,2,3,4,],.reply = (__u32)1,.replylong = (__u32[4])[1,2,3,4,],},}", 725 + { .op = 1, .args = { 1, 2, 3, 4}}); 726 + 727 + /* union with nested struct */ 728 + TEST_BTF_DUMP_DATA(btf, d, "union", str, union bpf_iter_link_info, BTF_F_COMPACT, 729 + "(union bpf_iter_link_info){.map = (struct){.map_fd = (__u32)1,},}", 730 + { .map = { .map_fd = 1 }}); 731 + 732 + /* struct skb with nested structs/unions; because type output is so 733 + * complex, we don't do a string comparison, just verify we return 734 + * the type size as the amount of data displayed. 735 + */ 736 + type_id = btf__find_by_name(btf, "sk_buff"); 737 + if (ASSERT_GT(type_id, 0, "find struct sk_buff")) { 738 + type_sz = btf__resolve_size(btf, type_id); 739 + str[0] = '\0'; 740 + 741 + ret = btf_dump__dump_type_data(d, type_id, skb, type_sz, &opts); 742 + ASSERT_EQ(ret, type_sz, 743 + "unexpected return value dumping sk_buff"); 744 + } 745 + 746 + /* overflow bpf_sock_ops struct with final element nonzero/zero. 747 + * Regardless of the value of the final field, we don't have all the 748 + * data we need to display it, so we should trigger an overflow. 749 + * In other words oveflow checking should trump "is field zero?" 750 + * checks because if we've overflowed, it shouldn't matter what the 751 + * field is - we can't trust its value so shouldn't display it. 752 + */ 753 + TEST_BTF_DUMP_DATA_OVER(btf, d, "struct", str, struct bpf_sock_ops, 754 + sizeof(struct bpf_sock_ops) - 1, 755 + "(struct bpf_sock_ops){\n\t.op = (__u32)1,\n", 756 + { .op = 1, .skb_tcp_flags = 2}); 757 + TEST_BTF_DUMP_DATA_OVER(btf, d, "struct", str, struct bpf_sock_ops, 758 + sizeof(struct bpf_sock_ops) - 1, 759 + "(struct bpf_sock_ops){\n\t.op = (__u32)1,\n", 760 + { .op = 1, .skb_tcp_flags = 0}); 761 + } 762 + 763 + static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d, 764 + char *str) 765 + { 766 + TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_number", int, BTF_F_COMPACT, 767 + "int cpu_number = (int)100", 100); 768 + TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_profile_flip", int, BTF_F_COMPACT, 769 + "static int cpu_profile_flip = (int)2", 2); 770 + } 771 + 772 + static void test_btf_datasec(struct btf *btf, struct btf_dump *d, char *str, 773 + const char *name, const char *expected_val, 774 + void *data, size_t data_sz) 775 + { 776 + DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts); 777 + int ret = 0, cmp; 778 + size_t secsize; 779 + __s32 type_id; 780 + 781 + opts.compact = true; 782 + 783 + type_id = btf__find_by_name(btf, name); 784 + if (!ASSERT_GT(type_id, 0, "find type id")) 785 + return; 786 + 787 + secsize = btf__resolve_size(btf, type_id); 788 + ASSERT_EQ(secsize, 0, "verify section size"); 789 + 790 + str[0] = '\0'; 791 + ret = btf_dump__dump_type_data(d, type_id, data, data_sz, &opts); 792 + ASSERT_EQ(ret, 0, "unexpected return value"); 793 + 794 + cmp = strcmp(str, expected_val); 795 + ASSERT_EQ(cmp, 0, "ensure expected/actual match"); 796 + } 797 + 798 + static void test_btf_dump_datasec_data(char *str) 799 + { 800 + struct btf *btf = btf__parse("xdping_kern.o", NULL); 801 + struct btf_dump_opts opts = { .ctx = str }; 802 + char license[4] = "GPL"; 803 + struct btf_dump *d; 804 + 805 + if (!ASSERT_OK_PTR(btf, "xdping_kern.o BTF not found")) 806 + return; 807 + 808 + d = btf_dump__new(btf, NULL, &opts, btf_dump_snprintf); 809 + if (!ASSERT_OK_PTR(d, "could not create BTF dump")) 810 + return; 811 + 812 + test_btf_datasec(btf, d, str, "license", 813 + "SEC(\"license\") char[4] _license = (char[4])['G','P','L',];", 814 + license, sizeof(license)); 815 + } 816 + 235 817 void test_btf_dump() { 818 + char str[STRSIZE]; 819 + struct btf_dump_opts opts = { .ctx = str }; 820 + struct btf_dump *d; 821 + struct btf *btf; 236 822 int i; 237 823 238 824 for (i = 0; i < ARRAY_SIZE(btf_dump_test_cases); i++) { ··· 831 245 } 832 246 if (test__start_subtest("btf_dump: incremental")) 833 247 test_btf_dump_incremental(); 248 + 249 + btf = libbpf_find_kernel_btf(); 250 + if (!ASSERT_OK_PTR(btf, "no kernel BTF found")) 251 + return; 252 + 253 + d = btf_dump__new(btf, NULL, &opts, btf_dump_snprintf); 254 + if (!ASSERT_OK_PTR(d, "could not create BTF dump")) 255 + return; 256 + 257 + /* Verify type display for various types. */ 258 + if (test__start_subtest("btf_dump: int_data")) 259 + test_btf_dump_int_data(btf, d, str); 260 + if (test__start_subtest("btf_dump: float_data")) 261 + test_btf_dump_float_data(btf, d, str); 262 + if (test__start_subtest("btf_dump: char_data")) 263 + test_btf_dump_char_data(btf, d, str); 264 + if (test__start_subtest("btf_dump: typedef_data")) 265 + test_btf_dump_typedef_data(btf, d, str); 266 + if (test__start_subtest("btf_dump: enum_data")) 267 + test_btf_dump_enum_data(btf, d, str); 268 + if (test__start_subtest("btf_dump: struct_data")) 269 + test_btf_dump_struct_data(btf, d, str); 270 + if (test__start_subtest("btf_dump: var_data")) 271 + test_btf_dump_var_data(btf, d, str); 272 + btf_dump__free(d); 273 + btf__free(btf); 274 + 275 + if (test__start_subtest("btf_dump: datasec_data")) 276 + test_btf_dump_datasec_data(str); 834 277 }

+10 -12

tools/testing/selftests/bpf/prog_tests/core_autosize.c

··· 53 53 char btf_file[] = "/tmp/core_autosize.btf.XXXXXX"; 54 54 int err, fd = -1, zero = 0; 55 55 int char_id, short_id, int_id, long_long_id, void_ptr_id, id; 56 + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts); 56 57 struct test_core_autosize* skel = NULL; 57 - struct bpf_object_load_attr load_attr = {}; 58 58 struct bpf_program *prog; 59 59 struct bpf_map *bss_map; 60 60 struct btf *btf = NULL; ··· 125 125 fd = -1; 126 126 127 127 /* open and load BPF program with custom BTF as the kernel BTF */ 128 - skel = test_core_autosize__open(); 128 + open_opts.btf_custom_path = btf_file; 129 + skel = test_core_autosize__open_opts(&open_opts); 129 130 if (!ASSERT_OK_PTR(skel, "skel_open")) 130 - return; 131 + goto cleanup; 131 132 132 133 /* disable handle_signed() for now */ 133 134 prog = bpf_object__find_program_by_name(skel->obj, "handle_signed"); ··· 136 135 goto cleanup; 137 136 bpf_program__set_autoload(prog, false); 138 137 139 - load_attr.obj = skel->obj; 140 - load_attr.target_btf_path = btf_file; 141 - err = bpf_object__load_xattr(&load_attr); 138 + err = bpf_object__load(skel->obj); 142 139 if (!ASSERT_OK(err, "prog_load")) 143 140 goto cleanup; 144 141 ··· 203 204 skel = NULL; 204 205 205 206 /* now re-load with handle_signed() enabled, it should fail loading */ 206 - skel = test_core_autosize__open(); 207 + open_opts.btf_custom_path = btf_file; 208 + skel = test_core_autosize__open_opts(&open_opts); 207 209 if (!ASSERT_OK_PTR(skel, "skel_open")) 208 - return; 210 + goto cleanup; 209 211 210 - load_attr.obj = skel->obj; 211 - load_attr.target_btf_path = btf_file; 212 - err = bpf_object__load_xattr(&load_attr); 213 - if (!ASSERT_ERR(err, "bad_prog_load")) 212 + err = test_core_autosize__load(skel); 213 + if (!ASSERT_ERR(err, "skel_load")) 214 214 goto cleanup; 215 215 216 216 cleanup:

+11 -14

tools/testing/selftests/bpf/prog_tests/core_reloc.c

··· 816 816 void test_core_reloc(void) 817 817 { 818 818 const size_t mmap_sz = roundup_page(sizeof(struct data)); 819 - struct bpf_object_load_attr load_attr = {}; 819 + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts); 820 820 struct core_reloc_test_case *test_case; 821 821 const char *tp_name, *probe_name; 822 822 int err, i, equal; ··· 846 846 continue; 847 847 } 848 848 849 - obj = bpf_object__open_file(test_case->bpf_obj_file, NULL); 849 + if (test_case->btf_src_file) { 850 + err = access(test_case->btf_src_file, R_OK); 851 + if (!ASSERT_OK(err, "btf_src_file")) 852 + goto cleanup; 853 + } 854 + 855 + open_opts.btf_custom_path = test_case->btf_src_file; 856 + obj = bpf_object__open_file(test_case->bpf_obj_file, &open_opts); 850 857 if (!ASSERT_OK_PTR(obj, "obj_open")) 851 - continue; 858 + goto cleanup; 852 859 853 860 probe_name = "raw_tracepoint/sys_enter"; 854 861 tp_name = "sys_enter"; ··· 869 862 "prog '%s' not found\n", probe_name)) 870 863 goto cleanup; 871 864 872 - 873 - if (test_case->btf_src_file) { 874 - err = access(test_case->btf_src_file, R_OK); 875 - if (!ASSERT_OK(err, "btf_src_file")) 876 - goto cleanup; 877 - } 878 - 879 - load_attr.obj = obj; 880 - load_attr.log_level = 0; 881 - load_attr.target_btf_path = test_case->btf_src_file; 882 - err = bpf_object__load_xattr(&load_attr); 865 + err = bpf_object__load(obj); 883 866 if (err) { 884 867 if (!test_case->fails) 885 868 ASSERT_OK(err, "obj_load");

+2

tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c

··· 17 17 */ 18 18 #ifndef __x86_64__ 19 19 bpf_program__set_autoload(skel->progs.test6, false); 20 + bpf_program__set_autoload(skel->progs.test7, false); 20 21 #endif 21 22 22 23 err = get_func_ip_test__load(skel); ··· 47 46 ASSERT_EQ(skel->bss->test5_result, 1, "test5_result"); 48 47 #ifdef __x86_64__ 49 48 ASSERT_EQ(skel->bss->test6_result, 1, "test6_result"); 49 + ASSERT_EQ(skel->bss->test7_result, 1, "test7_result"); 50 50 #endif 51 51 52 52 cleanup:

+9

tools/testing/selftests/bpf/prog_tests/pinning.c

··· 125 125 if (CHECK(err, "pin maps", "err %d errno %d\n", err, errno)) 126 126 goto out; 127 127 128 + /* get pinning path */ 129 + if (!ASSERT_STREQ(bpf_map__pin_path(map), pinpath, "get pin path")) 130 + goto out; 131 + 128 132 /* set pinning path of other map and re-pin all */ 129 133 map = bpf_object__find_map_by_name(obj, "nopinmap"); 130 134 if (CHECK(!map, "find map", "NULL map")) ··· 136 132 137 133 err = bpf_map__set_pin_path(map, custpinpath); 138 134 if (CHECK(err, "set pin path", "err %d errno %d\n", err, errno)) 135 + goto out; 136 + 137 + /* get pinning path after set */ 138 + if (!ASSERT_STREQ(bpf_map__pin_path(map), custpinpath, 139 + "get pin path after set")) 139 140 goto out; 140 141 141 142 /* should only pin the one unpinned map */

+17 -6

tools/testing/selftests/bpf/prog_tests/tc_redirect.c

··· 13 13 #define _GNU_SOURCE 14 14 15 15 #include <arpa/inet.h> 16 + #include <linux/if.h> 17 + #include <linux/if_tun.h> 16 18 #include <linux/limits.h> 17 19 #include <linux/sysctl.h> 18 - #include <linux/if_tun.h> 19 - #include <linux/if.h> 20 20 #include <sched.h> 21 21 #include <stdbool.h> 22 22 #include <stdio.h> 23 - #include <sys/stat.h> 24 23 #include <sys/mount.h> 24 + #include <sys/stat.h> 25 + #include <unistd.h> 25 26 26 27 #include "test_progs.h" 27 28 #include "network_helpers.h" ··· 390 389 close(client_fd); 391 390 } 392 391 392 + static char *ping_command(int family) 393 + { 394 + if (family == AF_INET6) { 395 + /* On some systems 'ping' doesn't support IPv6, so use ping6 if it is present. */ 396 + if (!system("which ping6 >/dev/null 2>&1")) 397 + return "ping6"; 398 + else 399 + return "ping -6"; 400 + } 401 + return "ping"; 402 + } 403 + 393 404 static int test_ping(int family, const char *addr) 394 405 { 395 - const char *ping = family == AF_INET6 ? "ping6" : "ping"; 396 - 397 - SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping, addr); 406 + SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr); 398 407 return 0; 399 408 fail: 400 409 return -1;

+72

tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2021 Facebook */ 3 + #include "bpf_iter.h" 4 + #include "bpf_tracing_net.h" 5 + #include <bpf/bpf_helpers.h> 6 + #include <bpf/bpf_endian.h> 7 + 8 + #define bpf_tcp_sk(skc) ({ \ 9 + struct sock_common *_skc = skc; \ 10 + sk = NULL; \ 11 + tp = NULL; \ 12 + if (_skc) { \ 13 + tp = bpf_skc_to_tcp_sock(_skc); \ 14 + sk = (struct sock *)tp; \ 15 + } \ 16 + tp; \ 17 + }) 18 + 19 + unsigned short reuse_listen_hport = 0; 20 + unsigned short listen_hport = 0; 21 + char cubic_cc[TCP_CA_NAME_MAX] = "bpf_cubic"; 22 + char dctcp_cc[TCP_CA_NAME_MAX] = "bpf_dctcp"; 23 + bool random_retry = false; 24 + 25 + static bool tcp_cc_eq(const char *a, const char *b) 26 + { 27 + int i; 28 + 29 + for (i = 0; i < TCP_CA_NAME_MAX; i++) { 30 + if (a[i] != b[i]) 31 + return false; 32 + if (!a[i]) 33 + break; 34 + } 35 + 36 + return true; 37 + } 38 + 39 + SEC("iter/tcp") 40 + int change_tcp_cc(struct bpf_iter__tcp *ctx) 41 + { 42 + char cur_cc[TCP_CA_NAME_MAX]; 43 + struct tcp_sock *tp; 44 + struct sock *sk; 45 + int ret; 46 + 47 + if (!bpf_tcp_sk(ctx->sk_common)) 48 + return 0; 49 + 50 + if (sk->sk_family != AF_INET6 || 51 + (sk->sk_state != TCP_LISTEN && 52 + sk->sk_state != TCP_ESTABLISHED) || 53 + (sk->sk_num != reuse_listen_hport && 54 + sk->sk_num != listen_hport && 55 + bpf_ntohs(sk->sk_dport) != listen_hport)) 56 + return 0; 57 + 58 + if (bpf_getsockopt(tp, SOL_TCP, TCP_CONGESTION, 59 + cur_cc, sizeof(cur_cc))) 60 + return 0; 61 + 62 + if (!tcp_cc_eq(cur_cc, cubic_cc)) 63 + return 0; 64 + 65 + if (random_retry && bpf_get_prandom_u32() % 4 == 1) 66 + return 1; 67 + 68 + bpf_setsockopt(tp, SOL_TCP, TCP_CONGESTION, dctcp_cc, sizeof(dctcp_cc)); 69 + return 0; 70 + } 71 + 72 + char _license[] SEC("license") = "GPL";

+6

tools/testing/selftests/bpf/progs/bpf_tracing_net.h

··· 5 5 #define AF_INET 2 6 6 #define AF_INET6 10 7 7 8 + #define SOL_TCP 6 9 + #define TCP_CONGESTION 13 10 + #define TCP_CA_NAME_MAX 16 11 + 8 12 #define ICSK_TIME_RETRANS 1 9 13 #define ICSK_TIME_PROBE0 3 10 14 #define ICSK_TIME_LOSS_PROBE 5 ··· 36 32 #define ir_v6_rmt_addr req.__req_common.skc_v6_daddr 37 33 #define ir_v6_loc_addr req.__req_common.skc_v6_rcv_saddr 38 34 35 + #define sk_num __sk_common.skc_num 36 + #define sk_dport __sk_common.skc_dport 39 37 #define sk_family __sk_common.skc_family 40 38 #define sk_rmem_alloc sk_backlog.rmem_alloc 41 39 #define sk_refcnt __sk_common.skc_refcnt

+11

tools/testing/selftests/bpf/progs/get_func_ip_test.c

··· 11 11 extern const void bpf_fentry_test4 __ksym; 12 12 extern const void bpf_modify_return_test __ksym; 13 13 extern const void bpf_fentry_test6 __ksym; 14 + extern const void bpf_fentry_test7 __ksym; 14 15 15 16 __u64 test1_result = 0; 16 17 SEC("fentry/bpf_fentry_test1") ··· 70 69 __u64 addr = bpf_get_func_ip(ctx); 71 70 72 71 test6_result = (const void *) addr == &bpf_fentry_test6 + 5; 72 + return 0; 73 + } 74 + 75 + __u64 test7_result = 0; 76 + SEC("kprobe/bpf_fentry_test7+5") 77 + int test7(struct pt_regs *ctx) 78 + { 79 + __u64 addr = bpf_get_func_ip(ctx); 80 + 81 + test7_result = (const void *) addr == &bpf_fentry_test7 + 5; 73 82 return 0; 74 83 }

+4 -4

tools/testing/selftests/bpf/progs/netcnt_prog.c

··· 13 13 struct { 14 14 __uint(type, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE); 15 15 __type(key, struct bpf_cgroup_storage_key); 16 - __type(value, struct percpu_net_cnt); 16 + __type(value, union percpu_net_cnt); 17 17 } percpu_netcnt SEC(".maps"); 18 18 19 19 struct { 20 20 __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE); 21 21 __type(key, struct bpf_cgroup_storage_key); 22 - __type(value, struct net_cnt); 22 + __type(value, union net_cnt); 23 23 } netcnt SEC(".maps"); 24 24 25 25 SEC("cgroup/skb") 26 26 int bpf_nextcnt(struct __sk_buff *skb) 27 27 { 28 - struct percpu_net_cnt *percpu_cnt; 28 + union percpu_net_cnt *percpu_cnt; 29 29 char fmt[] = "%d %llu %llu\n"; 30 - struct net_cnt *cnt; 30 + union net_cnt *cnt; 31 31 __u64 ts, dt; 32 32 int ret; 33 33

+26

tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2021 Isovalent, Inc. */ 3 + #include <linux/bpf.h> 4 + #include <bpf/bpf_helpers.h> 5 + 6 + struct inner { 7 + __uint(type, BPF_MAP_TYPE_ARRAY); 8 + __type(key, __u32); 9 + __type(value, int); 10 + __uint(max_entries, 4); 11 + }; 12 + 13 + struct { 14 + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); 15 + __uint(max_entries, 0); /* This will make map creation to fail */ 16 + __uint(key_size, sizeof(__u32)); 17 + __array(values, struct inner); 18 + } mim SEC(".maps"); 19 + 20 + SEC("xdp") 21 + int xdp_noop0(struct xdp_md *ctx) 22 + { 23 + return XDP_PASS; 24 + } 25 + 26 + char _license[] SEC("license") = "GPL";

+586

tools/testing/selftests/bpf/test_bpftool_synctypes.py

··· 1 + #!/usr/bin/env python3 2 + # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) 3 + # 4 + # Copyright (C) 2021 Isovalent, Inc. 5 + 6 + import argparse 7 + import re 8 + import os, sys 9 + 10 + LINUX_ROOT = os.path.abspath(os.path.join(__file__, 11 + os.pardir, os.pardir, os.pardir, os.pardir, os.pardir)) 12 + BPFTOOL_DIR = os.path.join(LINUX_ROOT, 'tools/bpf/bpftool') 13 + retval = 0 14 + 15 + class BlockParser(object): 16 + """ 17 + A parser for extracting set of values from blocks such as enums. 18 + @reader: a pointer to the open file to parse 19 + """ 20 + def __init__(self, reader): 21 + self.reader = reader 22 + 23 + def search_block(self, start_marker): 24 + """ 25 + Search for a given structure in a file. 26 + @start_marker: regex marking the beginning of a structure to parse 27 + """ 28 + offset = self.reader.tell() 29 + array_start = re.search(start_marker, self.reader.read()) 30 + if array_start is None: 31 + raise Exception('Failed to find start of block') 32 + self.reader.seek(offset + array_start.start()) 33 + 34 + def parse(self, pattern, end_marker): 35 + """ 36 + Parse a block and return a set of values. Values to extract must be 37 + on separate lines in the file. 38 + @pattern: pattern used to identify the values to extract 39 + @end_marker: regex marking the end of the block to parse 40 + """ 41 + entries = set() 42 + while True: 43 + line = self.reader.readline() 44 + if not line or re.match(end_marker, line): 45 + break 46 + capture = pattern.search(line) 47 + if capture and pattern.groups >= 1: 48 + entries.add(capture.group(1)) 49 + return entries 50 + 51 + class ArrayParser(BlockParser): 52 + """ 53 + A parser for extracting dicionaries of values from some BPF-related arrays. 54 + @reader: a pointer to the open file to parse 55 + @array_name: name of the array to parse 56 + """ 57 + end_marker = re.compile('^};') 58 + 59 + def __init__(self, reader, array_name): 60 + self.array_name = array_name 61 + self.start_marker = re.compile(f'(static )?const char \* const {self.array_name}\[.*\] = {{\n') 62 + super().__init__(reader) 63 + 64 + def search_block(self): 65 + """ 66 + Search for the given array in a file. 67 + """ 68 + super().search_block(self.start_marker); 69 + 70 + def parse(self): 71 + """ 72 + Parse a block and return data as a dictionary. Items to extract must be 73 + on separate lines in the file. 74 + """ 75 + pattern = re.compile('\[(BPF_\w*)\]\s*= "(.*)",?$') 76 + entries = {} 77 + while True: 78 + line = self.reader.readline() 79 + if line == '' or re.match(self.end_marker, line): 80 + break 81 + capture = pattern.search(line) 82 + if capture: 83 + entries[capture.group(1)] = capture.group(2) 84 + return entries 85 + 86 + class InlineListParser(BlockParser): 87 + """ 88 + A parser for extracting set of values from inline lists. 89 + """ 90 + def parse(self, pattern, end_marker): 91 + """ 92 + Parse a block and return a set of values. Multiple values to extract 93 + can be on a same line in the file. 94 + @pattern: pattern used to identify the values to extract 95 + @end_marker: regex marking the end of the block to parse 96 + """ 97 + entries = set() 98 + while True: 99 + line = self.reader.readline() 100 + if not line: 101 + break 102 + entries.update(pattern.findall(line)) 103 + if re.search(end_marker, line): 104 + break 105 + return entries 106 + 107 + class FileExtractor(object): 108 + """ 109 + A generic reader for extracting data from a given file. This class contains 110 + several helper methods that wrap arround parser objects to extract values 111 + from different structures. 112 + This class does not offer a way to set a filename, which is expected to be 113 + defined in children classes. 114 + """ 115 + def __init__(self): 116 + self.reader = open(self.filename, 'r') 117 + 118 + def close(self): 119 + """ 120 + Close the file used by the parser. 121 + """ 122 + self.reader.close() 123 + 124 + def reset_read(self): 125 + """ 126 + Reset the file position indicator for this parser. This is useful when 127 + parsing several structures in the file without respecting the order in 128 + which those structures appear in the file. 129 + """ 130 + self.reader.seek(0) 131 + 132 + def get_types_from_array(self, array_name): 133 + """ 134 + Search for and parse an array associating names to BPF_* enum members, 135 + for example: 136 + 137 + const char * const prog_type_name[] = { 138 + [BPF_PROG_TYPE_UNSPEC] = "unspec", 139 + [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter", 140 + [BPF_PROG_TYPE_KPROBE] = "kprobe", 141 + }; 142 + 143 + Return a dictionary with the enum member names as keys and the 144 + associated names as values, for example: 145 + 146 + {'BPF_PROG_TYPE_UNSPEC': 'unspec', 147 + 'BPF_PROG_TYPE_SOCKET_FILTER': 'socket_filter', 148 + 'BPF_PROG_TYPE_KPROBE': 'kprobe'} 149 + 150 + @array_name: name of the array to parse 151 + """ 152 + array_parser = ArrayParser(self.reader, array_name) 153 + array_parser.search_block() 154 + return array_parser.parse() 155 + 156 + def get_enum(self, enum_name): 157 + """ 158 + Search for and parse an enum containing BPF_* members, for example: 159 + 160 + enum bpf_prog_type { 161 + BPF_PROG_TYPE_UNSPEC, 162 + BPF_PROG_TYPE_SOCKET_FILTER, 163 + BPF_PROG_TYPE_KPROBE, 164 + }; 165 + 166 + Return a set containing all member names, for example: 167 + 168 + {'BPF_PROG_TYPE_UNSPEC', 169 + 'BPF_PROG_TYPE_SOCKET_FILTER', 170 + 'BPF_PROG_TYPE_KPROBE'} 171 + 172 + @enum_name: name of the enum to parse 173 + """ 174 + start_marker = re.compile(f'enum {enum_name} {{\n') 175 + pattern = re.compile('^\s*(BPF_\w+),?$') 176 + end_marker = re.compile('^};') 177 + parser = BlockParser(self.reader) 178 + parser.search_block(start_marker) 179 + return parser.parse(pattern, end_marker) 180 + 181 + def __get_description_list(self, start_marker, pattern, end_marker): 182 + parser = InlineListParser(self.reader) 183 + parser.search_block(start_marker) 184 + return parser.parse(pattern, end_marker) 185 + 186 + def get_rst_list(self, block_name): 187 + """ 188 + Search for and parse a list of type names from RST documentation, for 189 + example: 190 + 191 + | *TYPE* := { 192 + | **socket** | **kprobe** | 193 + | **kretprobe** 194 + | } 195 + 196 + Return a set containing all type names, for example: 197 + 198 + {'socket', 'kprobe', 'kretprobe'} 199 + 200 + @block_name: name of the blog to parse, 'TYPE' in the example 201 + """ 202 + start_marker = re.compile(f'\*{block_name}\* := {{') 203 + pattern = re.compile('\*\*([\w/-]+)\*\*') 204 + end_marker = re.compile('}\n') 205 + return self.__get_description_list(start_marker, pattern, end_marker) 206 + 207 + def get_help_list(self, block_name): 208 + """ 209 + Search for and parse a list of type names from a help message in 210 + bpftool, for example: 211 + 212 + " TYPE := { socket | kprobe |\\n" 213 + " kretprobe }\\n" 214 + 215 + Return a set containing all type names, for example: 216 + 217 + {'socket', 'kprobe', 'kretprobe'} 218 + 219 + @block_name: name of the blog to parse, 'TYPE' in the example 220 + """ 221 + start_marker = re.compile(f'"\s*{block_name} := {{') 222 + pattern = re.compile('([\w/]+) [|}]') 223 + end_marker = re.compile('}') 224 + return self.__get_description_list(start_marker, pattern, end_marker) 225 + 226 + def get_help_list_macro(self, macro): 227 + """ 228 + Search for and parse a list of values from a help message starting with 229 + a macro in bpftool, for example: 230 + 231 + " " HELP_SPEC_OPTIONS " |\\n" 232 + " {-f|--bpffs} | {-m|--mapcompat} | {-n|--nomount} }\\n" 233 + 234 + Return a set containing all item names, for example: 235 + 236 + {'-f', '--bpffs', '-m', '--mapcompat', '-n', '--nomount'} 237 + 238 + @macro: macro starting the block, 'HELP_SPEC_OPTIONS' in the example 239 + """ 240 + start_marker = re.compile(f'"\s*{macro}\s*" [|}}]') 241 + pattern = re.compile('([\w-]+) ?(?:\||}[ }\]])') 242 + end_marker = re.compile('}\\\\n') 243 + return self.__get_description_list(start_marker, pattern, end_marker) 244 + 245 + def default_options(self): 246 + """ 247 + Return the default options contained in HELP_SPEC_OPTIONS 248 + """ 249 + return { '-j', '--json', '-p', '--pretty', '-d', '--debug' } 250 + 251 + def get_bashcomp_list(self, block_name): 252 + """ 253 + Search for and parse a list of type names from a variable in bash 254 + completion file, for example: 255 + 256 + local BPFTOOL_PROG_LOAD_TYPES='socket kprobe \\ 257 + kretprobe' 258 + 259 + Return a set containing all type names, for example: 260 + 261 + {'socket', 'kprobe', 'kretprobe'} 262 + 263 + @block_name: name of the blog to parse, 'TYPE' in the example 264 + """ 265 + start_marker = re.compile(f'local {block_name}=\'') 266 + pattern = re.compile('(?:.*=\')?([\w/]+)') 267 + end_marker = re.compile('\'$') 268 + return self.__get_description_list(start_marker, pattern, end_marker) 269 + 270 + class SourceFileExtractor(FileExtractor): 271 + """ 272 + An abstract extractor for a source file with usage message. 273 + This class does not offer a way to set a filename, which is expected to be 274 + defined in children classes. 275 + """ 276 + def get_options(self): 277 + return self.default_options().union(self.get_help_list_macro('HELP_SPEC_OPTIONS')) 278 + 279 + class ProgFileExtractor(SourceFileExtractor): 280 + """ 281 + An extractor for bpftool's prog.c. 282 + """ 283 + filename = os.path.join(BPFTOOL_DIR, 'prog.c') 284 + 285 + def get_prog_types(self): 286 + return self.get_types_from_array('prog_type_name') 287 + 288 + def get_attach_types(self): 289 + return self.get_types_from_array('attach_type_strings') 290 + 291 + def get_prog_attach_help(self): 292 + return self.get_help_list('ATTACH_TYPE') 293 + 294 + class MapFileExtractor(SourceFileExtractor): 295 + """ 296 + An extractor for bpftool's map.c. 297 + """ 298 + filename = os.path.join(BPFTOOL_DIR, 'map.c') 299 + 300 + def get_map_types(self): 301 + return self.get_types_from_array('map_type_name') 302 + 303 + def get_map_help(self): 304 + return self.get_help_list('TYPE') 305 + 306 + class CgroupFileExtractor(SourceFileExtractor): 307 + """ 308 + An extractor for bpftool's cgroup.c. 309 + """ 310 + filename = os.path.join(BPFTOOL_DIR, 'cgroup.c') 311 + 312 + def get_prog_attach_help(self): 313 + return self.get_help_list('ATTACH_TYPE') 314 + 315 + class CommonFileExtractor(SourceFileExtractor): 316 + """ 317 + An extractor for bpftool's common.c. 318 + """ 319 + filename = os.path.join(BPFTOOL_DIR, 'common.c') 320 + 321 + def __init__(self): 322 + super().__init__() 323 + self.attach_types = {} 324 + 325 + def get_attach_types(self): 326 + if not self.attach_types: 327 + self.attach_types = self.get_types_from_array('attach_type_name') 328 + return self.attach_types 329 + 330 + def get_cgroup_attach_types(self): 331 + if not self.attach_types: 332 + self.get_attach_types() 333 + cgroup_types = {} 334 + for (key, value) in self.attach_types.items(): 335 + if key.find('BPF_CGROUP') != -1: 336 + cgroup_types[key] = value 337 + return cgroup_types 338 + 339 + class GenericSourceExtractor(SourceFileExtractor): 340 + """ 341 + An extractor for generic source code files. 342 + """ 343 + filename = "" 344 + 345 + def __init__(self, filename): 346 + self.filename = os.path.join(BPFTOOL_DIR, filename) 347 + super().__init__() 348 + 349 + class BpfHeaderExtractor(FileExtractor): 350 + """ 351 + An extractor for the UAPI BPF header. 352 + """ 353 + filename = os.path.join(LINUX_ROOT, 'tools/include/uapi/linux/bpf.h') 354 + 355 + def get_prog_types(self): 356 + return self.get_enum('bpf_prog_type') 357 + 358 + def get_map_types(self): 359 + return self.get_enum('bpf_map_type') 360 + 361 + def get_attach_types(self): 362 + return self.get_enum('bpf_attach_type') 363 + 364 + class ManPageExtractor(FileExtractor): 365 + """ 366 + An abstract extractor for an RST documentation page. 367 + This class does not offer a way to set a filename, which is expected to be 368 + defined in children classes. 369 + """ 370 + def get_options(self): 371 + return self.get_rst_list('OPTIONS') 372 + 373 + class ManProgExtractor(ManPageExtractor): 374 + """ 375 + An extractor for bpftool-prog.rst. 376 + """ 377 + filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-prog.rst') 378 + 379 + def get_attach_types(self): 380 + return self.get_rst_list('ATTACH_TYPE') 381 + 382 + class ManMapExtractor(ManPageExtractor): 383 + """ 384 + An extractor for bpftool-map.rst. 385 + """ 386 + filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-map.rst') 387 + 388 + def get_map_types(self): 389 + return self.get_rst_list('TYPE') 390 + 391 + class ManCgroupExtractor(ManPageExtractor): 392 + """ 393 + An extractor for bpftool-cgroup.rst. 394 + """ 395 + filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-cgroup.rst') 396 + 397 + def get_attach_types(self): 398 + return self.get_rst_list('ATTACH_TYPE') 399 + 400 + class ManGenericExtractor(ManPageExtractor): 401 + """ 402 + An extractor for generic RST documentation pages. 403 + """ 404 + filename = "" 405 + 406 + def __init__(self, filename): 407 + self.filename = os.path.join(BPFTOOL_DIR, filename) 408 + super().__init__() 409 + 410 + class BashcompExtractor(FileExtractor): 411 + """ 412 + An extractor for bpftool's bash completion file. 413 + """ 414 + filename = os.path.join(BPFTOOL_DIR, 'bash-completion/bpftool') 415 + 416 + def get_prog_attach_types(self): 417 + return self.get_bashcomp_list('BPFTOOL_PROG_ATTACH_TYPES') 418 + 419 + def get_map_types(self): 420 + return self.get_bashcomp_list('BPFTOOL_MAP_CREATE_TYPES') 421 + 422 + def get_cgroup_attach_types(self): 423 + return self.get_bashcomp_list('BPFTOOL_CGROUP_ATTACH_TYPES') 424 + 425 + def verify(first_set, second_set, message): 426 + """ 427 + Print all values that differ between two sets. 428 + @first_set: one set to compare 429 + @second_set: another set to compare 430 + @message: message to print for values belonging to only one of the sets 431 + """ 432 + global retval 433 + diff = first_set.symmetric_difference(second_set) 434 + if diff: 435 + print(message, diff) 436 + retval = 1 437 + 438 + def main(): 439 + # No arguments supported at this time, but print usage for -h|--help 440 + argParser = argparse.ArgumentParser(description=""" 441 + Verify that bpftool's code, help messages, documentation and bash 442 + completion are all in sync on program types, map types, attach types, and 443 + options. Also check that bpftool is in sync with the UAPI BPF header. 444 + """) 445 + args = argParser.parse_args() 446 + 447 + # Map types (enum) 448 + 449 + bpf_info = BpfHeaderExtractor() 450 + ref = bpf_info.get_map_types() 451 + 452 + map_info = MapFileExtractor() 453 + source_map_items = map_info.get_map_types() 454 + map_types_enum = set(source_map_items.keys()) 455 + 456 + verify(ref, map_types_enum, 457 + f'Comparing BPF header (enum bpf_map_type) and {MapFileExtractor.filename} (map_type_name):') 458 + 459 + # Map types (names) 460 + 461 + source_map_types = set(source_map_items.values()) 462 + source_map_types.discard('unspec') 463 + 464 + help_map_types = map_info.get_map_help() 465 + help_map_options = map_info.get_options() 466 + map_info.close() 467 + 468 + man_map_info = ManMapExtractor() 469 + man_map_options = man_map_info.get_options() 470 + man_map_types = man_map_info.get_map_types() 471 + man_map_info.close() 472 + 473 + bashcomp_info = BashcompExtractor() 474 + bashcomp_map_types = bashcomp_info.get_map_types() 475 + 476 + verify(source_map_types, help_map_types, 477 + f'Comparing {MapFileExtractor.filename} (map_type_name) and {MapFileExtractor.filename} (do_help() TYPE):') 478 + verify(source_map_types, man_map_types, 479 + f'Comparing {MapFileExtractor.filename} (map_type_name) and {ManMapExtractor.filename} (TYPE):') 480 + verify(help_map_options, man_map_options, 481 + f'Comparing {MapFileExtractor.filename} (do_help() OPTIONS) and {ManMapExtractor.filename} (OPTIONS):') 482 + verify(source_map_types, bashcomp_map_types, 483 + f'Comparing {MapFileExtractor.filename} (map_type_name) and {BashcompExtractor.filename} (BPFTOOL_MAP_CREATE_TYPES):') 484 + 485 + # Program types (enum) 486 + 487 + ref = bpf_info.get_prog_types() 488 + 489 + prog_info = ProgFileExtractor() 490 + prog_types = set(prog_info.get_prog_types().keys()) 491 + 492 + verify(ref, prog_types, 493 + f'Comparing BPF header (enum bpf_prog_type) and {ProgFileExtractor.filename} (prog_type_name):') 494 + 495 + # Attach types (enum) 496 + 497 + ref = bpf_info.get_attach_types() 498 + bpf_info.close() 499 + 500 + common_info = CommonFileExtractor() 501 + attach_types = common_info.get_attach_types() 502 + 503 + verify(ref, attach_types, 504 + f'Comparing BPF header (enum bpf_attach_type) and {CommonFileExtractor.filename} (attach_type_name):') 505 + 506 + # Attach types (names) 507 + 508 + source_prog_attach_types = set(prog_info.get_attach_types().values()) 509 + 510 + help_prog_attach_types = prog_info.get_prog_attach_help() 511 + help_prog_options = prog_info.get_options() 512 + prog_info.close() 513 + 514 + man_prog_info = ManProgExtractor() 515 + man_prog_options = man_prog_info.get_options() 516 + man_prog_attach_types = man_prog_info.get_attach_types() 517 + man_prog_info.close() 518 + 519 + bashcomp_info.reset_read() # We stopped at map types, rewind 520 + bashcomp_prog_attach_types = bashcomp_info.get_prog_attach_types() 521 + 522 + verify(source_prog_attach_types, help_prog_attach_types, 523 + f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {ProgFileExtractor.filename} (do_help() ATTACH_TYPE):') 524 + verify(source_prog_attach_types, man_prog_attach_types, 525 + f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {ManProgExtractor.filename} (ATTACH_TYPE):') 526 + verify(help_prog_options, man_prog_options, 527 + f'Comparing {ProgFileExtractor.filename} (do_help() OPTIONS) and {ManProgExtractor.filename} (OPTIONS):') 528 + verify(source_prog_attach_types, bashcomp_prog_attach_types, 529 + f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {BashcompExtractor.filename} (BPFTOOL_PROG_ATTACH_TYPES):') 530 + 531 + # Cgroup attach types 532 + 533 + source_cgroup_attach_types = set(common_info.get_cgroup_attach_types().values()) 534 + common_info.close() 535 + 536 + cgroup_info = CgroupFileExtractor() 537 + help_cgroup_attach_types = cgroup_info.get_prog_attach_help() 538 + help_cgroup_options = cgroup_info.get_options() 539 + cgroup_info.close() 540 + 541 + man_cgroup_info = ManCgroupExtractor() 542 + man_cgroup_options = man_cgroup_info.get_options() 543 + man_cgroup_attach_types = man_cgroup_info.get_attach_types() 544 + man_cgroup_info.close() 545 + 546 + bashcomp_cgroup_attach_types = bashcomp_info.get_cgroup_attach_types() 547 + bashcomp_info.close() 548 + 549 + verify(source_cgroup_attach_types, help_cgroup_attach_types, 550 + f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {CgroupFileExtractor.filename} (do_help() ATTACH_TYPE):') 551 + verify(source_cgroup_attach_types, man_cgroup_attach_types, 552 + f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {ManCgroupExtractor.filename} (ATTACH_TYPE):') 553 + verify(help_cgroup_options, man_cgroup_options, 554 + f'Comparing {CgroupFileExtractor.filename} (do_help() OPTIONS) and {ManCgroupExtractor.filename} (OPTIONS):') 555 + verify(source_cgroup_attach_types, bashcomp_cgroup_attach_types, 556 + f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {BashcompExtractor.filename} (BPFTOOL_CGROUP_ATTACH_TYPES):') 557 + 558 + # Options for remaining commands 559 + 560 + for cmd in [ 'btf', 'feature', 'gen', 'iter', 'link', 'net', 'perf', 'struct_ops', ]: 561 + source_info = GenericSourceExtractor(cmd + '.c') 562 + help_cmd_options = source_info.get_options() 563 + source_info.close() 564 + 565 + man_cmd_info = ManGenericExtractor(os.path.join('Documentation', 'bpftool-' + cmd + '.rst')) 566 + man_cmd_options = man_cmd_info.get_options() 567 + man_cmd_info.close() 568 + 569 + verify(help_cmd_options, man_cmd_options, 570 + f'Comparing {source_info.filename} (do_help() OPTIONS) and {man_cmd_info.filename} (OPTIONS):') 571 + 572 + source_main_info = GenericSourceExtractor('main.c') 573 + help_main_options = source_main_info.get_options() 574 + source_main_info.close() 575 + 576 + man_main_info = ManGenericExtractor(os.path.join('Documentation', 'bpftool.rst')) 577 + man_main_options = man_main_info.get_options() 578 + man_main_info.close() 579 + 580 + verify(help_main_options, man_main_options, 581 + f'Comparing {source_main_info.filename} (do_help() OPTIONS) and {man_main_info.filename} (OPTIONS):') 582 + 583 + sys.exit(retval) 584 + 585 + if __name__ == "__main__": 586 + main()

+69 -3

tools/testing/selftests/bpf/test_maps.c

··· 764 764 udp = socket(AF_INET, SOCK_DGRAM, 0); 765 765 i = 0; 766 766 err = bpf_map_update_elem(fd, &i, &udp, BPF_ANY); 767 - if (!err) { 768 - printf("Failed socket SOCK_DGRAM allowed '%i:%i'\n", 767 + if (err) { 768 + printf("Failed socket update SOCK_DGRAM '%i:%i'\n", 769 769 i, udp); 770 770 goto out_sockmap; 771 771 } ··· 1153 1153 } 1154 1154 1155 1155 #define MAPINMAP_PROG "./test_map_in_map.o" 1156 + #define MAPINMAP_INVALID_PROG "./test_map_in_map_invalid.o" 1156 1157 static void test_map_in_map(void) 1157 1158 { 1158 1159 struct bpf_object *obj; 1159 1160 struct bpf_map *map; 1160 1161 int mim_fd, fd, err; 1161 1162 int pos = 0; 1163 + struct bpf_map_info info = {}; 1164 + __u32 len = sizeof(info); 1165 + __u32 id = 0; 1166 + libbpf_print_fn_t old_print_fn; 1162 1167 1163 1168 obj = bpf_object__open(MAPINMAP_PROG); 1164 1169 ··· 1233 1228 } 1234 1229 1235 1230 close(fd); 1231 + fd = -1; 1236 1232 bpf_object__close(obj); 1233 + 1234 + /* Test that failing bpf_object__create_map() destroys the inner map */ 1235 + obj = bpf_object__open(MAPINMAP_INVALID_PROG); 1236 + err = libbpf_get_error(obj); 1237 + if (err) { 1238 + printf("Failed to load %s program: %d %d", 1239 + MAPINMAP_INVALID_PROG, err, errno); 1240 + goto out_map_in_map; 1241 + } 1242 + 1243 + map = bpf_object__find_map_by_name(obj, "mim"); 1244 + if (!map) { 1245 + printf("Failed to load array of maps from test prog\n"); 1246 + goto out_map_in_map; 1247 + } 1248 + 1249 + old_print_fn = libbpf_set_print(NULL); 1250 + 1251 + err = bpf_object__load(obj); 1252 + if (!err) { 1253 + printf("Loading obj supposed to fail\n"); 1254 + goto out_map_in_map; 1255 + } 1256 + 1257 + libbpf_set_print(old_print_fn); 1258 + 1259 + /* Iterate over all maps to check whether the internal map 1260 + * ("mim.internal") has been destroyed. 1261 + */ 1262 + while (true) { 1263 + err = bpf_map_get_next_id(id, &id); 1264 + if (err) { 1265 + if (errno == ENOENT) 1266 + break; 1267 + printf("Failed to get next map: %d", errno); 1268 + goto out_map_in_map; 1269 + } 1270 + 1271 + fd = bpf_map_get_fd_by_id(id); 1272 + if (fd < 0) { 1273 + if (errno == ENOENT) 1274 + continue; 1275 + printf("Failed to get map by id %u: %d", id, errno); 1276 + goto out_map_in_map; 1277 + } 1278 + 1279 + err = bpf_obj_get_info_by_fd(fd, &info, &len); 1280 + if (err) { 1281 + printf("Failed to get map info by fd %d: %d", fd, 1282 + errno); 1283 + goto out_map_in_map; 1284 + } 1285 + 1286 + if (!strcmp(info.name, "mim.inner")) { 1287 + printf("Inner map mim.inner was not destroyed\n"); 1288 + goto out_map_in_map; 1289 + } 1290 + } 1291 + 1237 1292 return; 1238 1293 1239 1294 out_map_in_map: 1240 - close(fd); 1295 + if (fd >= 0) 1296 + close(fd); 1241 1297 exit(1); 1242 1298 } 1243 1299

+2 -2

tools/testing/selftests/bpf/test_netcnt.c

··· 33 33 34 34 int main(int argc, char **argv) 35 35 { 36 - struct percpu_net_cnt *percpu_netcnt; 36 + union percpu_net_cnt *percpu_netcnt; 37 37 struct bpf_cgroup_storage_key key; 38 38 int map_fd, percpu_map_fd; 39 39 int error = EXIT_FAILURE; 40 - struct net_cnt netcnt; 41 40 struct bpf_object *obj; 42 41 int prog_fd, cgroup_fd; 43 42 unsigned long packets; 43 + union net_cnt netcnt; 44 44 unsigned long bytes; 45 45 int cpu, nproc; 46 46 __u32 prog_cnt;

+12

tools/testing/selftests/bpf/test_progs.h

··· 221 221 ___ok; \ 222 222 }) 223 223 224 + #define ASSERT_STRNEQ(actual, expected, len, name) ({ \ 225 + static int duration = 0; \ 226 + const char *___act = actual; \ 227 + const char *___exp = expected; \ 228 + int ___len = len; \ 229 + bool ___ok = strncmp(___act, ___exp, ___len) == 0; \ 230 + CHECK(!___ok, (name), \ 231 + "unexpected %s: actual '%.*s' != expected '%.*s'\n", \ 232 + (name), ___len, ___act, ___len, ___exp); \ 233 + ___ok; \ 234 + }) 235 + 224 236 #define ASSERT_OK(res, name) ({ \ 225 237 static int duration = 0; \ 226 238 long long ___res = (res); \

+1 -1

tools/testing/selftests/bpf/test_tc_tunnel.sh

··· 69 69 } 70 70 71 71 server_listen() { 72 - ip netns exec "${ns2}" nc "${netcat_opt}" -l -p "${port}" > "${outfile}" & 72 + ip netns exec "${ns2}" nc "${netcat_opt}" -l "${port}" > "${outfile}" & 73 73 server_pid=$! 74 74 sleep 0.2 75 75 }

Configure Feed

Configure Feed