Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'add-bpf_xdp_get_xfrm_state-kfunc'

Daniel Xu says:

====================
Add bpf_xdp_get_xfrm_state() kfunc

This patchset adds two kfunc helpers, bpf_xdp_get_xfrm_state() and
bpf_xdp_xfrm_state_release() that wrap xfrm_state_lookup() and
xfrm_state_put(). The intent is to support software RSS (via XDP) for
the ongoing/upcoming ipsec pcpu work [0]. Recent experiments performed
on (hopefully) reproducible AWS testbeds indicate that single tunnel
pcpu ipsec can reach line rate on 100G ENA nics.

Note this patchset only tests/shows generic xfrm_state access. The
"secret sauce" (if you can really even call it that) involves accessing
a soon-to-be-upstreamed pcpu_num field in xfrm_state. Early example is
available here [1].

[0]: https://datatracker.ietf.org/doc/draft-ietf-ipsecme-multi-sa-performance/03/
[1]: https://github.com/danobi/xdp-tools/blob/e89a1c617aba3b50d990f779357d6ce2863ecb27/xdp-bench/xdp_redirect_cpumap.bpf.c#L385-L406

Changes from v5:
* Improve kfunc doc comments
* Remove extraneous replay-window setting on selftest reverse path
* Squash two kfunc commits into one
* Rebase to bpf-next to pick up bitfield write patches
* Remove testing of opts.error in selftest prog

Changes from v4:
* Fixup commit message for selftest
* Set opts->error -ENOENT for !x
* Revert single file xfrm + bpf

Changes from v3:
* Place all xfrm bpf integrations in xfrm_bpf.c
* Avoid using nval as a temporary
* Rebase to bpf-next
* Remove extraneous __failure_unpriv annotation for verifier tests

Changes from v2:
* Fix/simplify BPF_CORE_WRITE_BITFIELD() algorithm
* Added verifier tests for bitfield writes
* Fix state leakage across test_tunnel subtests

Changes from v1:
* Move xfrm tunnel tests to test_progs
* Fix writing to opts->error when opts is invalid
* Use __bpf_kfunc_start_defs()
* Remove unused vxlanhdr definition
* Add and use BPF_CORE_WRITE_BITFIELD() macro
* Make series bisect clean

Changes from RFCv2:
* Rebased to ipsec-next
* Fix netns leak

Changes from RFCv1:
* Add Antony's commit tags
* Add KF_ACQUIRE and KF_RELEASE semantics
====================

Reviewed-by: Eyal Birger <eyal.birger@gmail.com>
Link: https://lore.kernel.org/r/cover.1702593901.git.dxu@dxuuu.xyz
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

+384 -155
+9
include/net/xfrm.h
··· 2190 2190 2191 2191 #endif 2192 2192 2193 + #if IS_ENABLED(CONFIG_DEBUG_INFO_BTF) 2194 + int register_xfrm_state_bpf(void); 2195 + #else 2196 + static inline int register_xfrm_state_bpf(void) 2197 + { 2198 + return 0; 2199 + } 2200 + #endif 2201 + 2193 2202 #endif /* _NET_XFRM_H */
+1
net/xfrm/Makefile
··· 21 21 obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o 22 22 obj-$(CONFIG_XFRM_INTERFACE) += xfrm_interface.o 23 23 obj-$(CONFIG_XFRM_ESPINTCP) += espintcp.o 24 + obj-$(CONFIG_DEBUG_INFO_BTF) += xfrm_state_bpf.o
+2
net/xfrm/xfrm_policy.c
··· 4218 4218 #ifdef CONFIG_XFRM_ESPINTCP 4219 4219 espintcp_init(); 4220 4220 #endif 4221 + 4222 + register_xfrm_state_bpf(); 4221 4223 } 4222 4224 4223 4225 #ifdef CONFIG_AUDITSYSCALL
+134
net/xfrm/xfrm_state_bpf.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* Unstable XFRM state BPF helpers. 3 + * 4 + * Note that it is allowed to break compatibility for these functions since the 5 + * interface they are exposed through to BPF programs is explicitly unstable. 6 + */ 7 + 8 + #include <linux/bpf.h> 9 + #include <linux/btf.h> 10 + #include <linux/btf_ids.h> 11 + #include <net/xdp.h> 12 + #include <net/xfrm.h> 13 + 14 + /* bpf_xfrm_state_opts - Options for XFRM state lookup helpers 15 + * 16 + * Members: 17 + * @error - Out parameter, set for any errors encountered 18 + * Values: 19 + * -EINVAL - netns_id is less than -1 20 + * -EINVAL - opts__sz isn't BPF_XFRM_STATE_OPTS_SZ 21 + * -ENONET - No network namespace found for netns_id 22 + * -ENOENT - No xfrm_state found 23 + * @netns_id - Specify the network namespace for lookup 24 + * Values: 25 + * BPF_F_CURRENT_NETNS (-1) 26 + * Use namespace associated with ctx 27 + * [0, S32_MAX] 28 + * Network Namespace ID 29 + * @mark - XFRM mark to match on 30 + * @daddr - Destination address to match on 31 + * @spi - Security parameter index to match on 32 + * @proto - IP protocol to match on (eg. IPPROTO_ESP) 33 + * @family - Protocol family to match on (AF_INET/AF_INET6) 34 + */ 35 + struct bpf_xfrm_state_opts { 36 + s32 error; 37 + s32 netns_id; 38 + u32 mark; 39 + xfrm_address_t daddr; 40 + __be32 spi; 41 + u8 proto; 42 + u16 family; 43 + }; 44 + 45 + enum { 46 + BPF_XFRM_STATE_OPTS_SZ = sizeof(struct bpf_xfrm_state_opts), 47 + }; 48 + 49 + __bpf_kfunc_start_defs(); 50 + 51 + /* bpf_xdp_get_xfrm_state - Get XFRM state 52 + * 53 + * A `struct xfrm_state *`, if found, must be released with a corresponding 54 + * bpf_xdp_xfrm_state_release. 55 + * 56 + * Parameters: 57 + * @ctx - Pointer to ctx (xdp_md) in XDP program 58 + * Cannot be NULL 59 + * @opts - Options for lookup (documented above) 60 + * Cannot be NULL 61 + * @opts__sz - Length of the bpf_xfrm_state_opts structure 62 + * Must be BPF_XFRM_STATE_OPTS_SZ 63 + */ 64 + __bpf_kfunc struct xfrm_state * 65 + bpf_xdp_get_xfrm_state(struct xdp_md *ctx, struct bpf_xfrm_state_opts *opts, u32 opts__sz) 66 + { 67 + struct xdp_buff *xdp = (struct xdp_buff *)ctx; 68 + struct net *net = dev_net(xdp->rxq->dev); 69 + struct xfrm_state *x; 70 + 71 + if (!opts || opts__sz < sizeof(opts->error)) 72 + return NULL; 73 + 74 + if (opts__sz != BPF_XFRM_STATE_OPTS_SZ) { 75 + opts->error = -EINVAL; 76 + return NULL; 77 + } 78 + 79 + if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS)) { 80 + opts->error = -EINVAL; 81 + return NULL; 82 + } 83 + 84 + if (opts->netns_id >= 0) { 85 + net = get_net_ns_by_id(net, opts->netns_id); 86 + if (unlikely(!net)) { 87 + opts->error = -ENONET; 88 + return NULL; 89 + } 90 + } 91 + 92 + x = xfrm_state_lookup(net, opts->mark, &opts->daddr, opts->spi, 93 + opts->proto, opts->family); 94 + 95 + if (opts->netns_id >= 0) 96 + put_net(net); 97 + if (!x) 98 + opts->error = -ENOENT; 99 + 100 + return x; 101 + } 102 + 103 + /* bpf_xdp_xfrm_state_release - Release acquired xfrm_state object 104 + * 105 + * This must be invoked for referenced PTR_TO_BTF_ID, and the verifier rejects 106 + * the program if any references remain in the program in all of the explored 107 + * states. 108 + * 109 + * Parameters: 110 + * @x - Pointer to referenced xfrm_state object, obtained using 111 + * bpf_xdp_get_xfrm_state. 112 + */ 113 + __bpf_kfunc void bpf_xdp_xfrm_state_release(struct xfrm_state *x) 114 + { 115 + xfrm_state_put(x); 116 + } 117 + 118 + __bpf_kfunc_end_defs(); 119 + 120 + BTF_SET8_START(xfrm_state_kfunc_set) 121 + BTF_ID_FLAGS(func, bpf_xdp_get_xfrm_state, KF_RET_NULL | KF_ACQUIRE) 122 + BTF_ID_FLAGS(func, bpf_xdp_xfrm_state_release, KF_RELEASE) 123 + BTF_SET8_END(xfrm_state_kfunc_set) 124 + 125 + static const struct btf_kfunc_id_set xfrm_state_xdp_kfunc_set = { 126 + .owner = THIS_MODULE, 127 + .set = &xfrm_state_kfunc_set, 128 + }; 129 + 130 + int __init register_xfrm_state_bpf(void) 131 + { 132 + return register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, 133 + &xfrm_state_xdp_kfunc_set); 134 + }
+157 -5
tools/testing/selftests/bpf/prog_tests/test_tunnel.c
··· 50 50 */ 51 51 52 52 #include <arpa/inet.h> 53 + #include <linux/if_link.h> 53 54 #include <linux/if_tun.h> 54 55 #include <linux/limits.h> 55 56 #include <linux/sysctl.h> ··· 92 91 93 92 #define IPIP_TUNL_DEV0 "ipip00" 94 93 #define IPIP_TUNL_DEV1 "ipip11" 94 + 95 + #define XFRM_AUTH "0x1111111111111111111111111111111111111111" 96 + #define XFRM_ENC "0x22222222222222222222222222222222" 97 + #define XFRM_SPI_IN_TO_OUT 0x1 98 + #define XFRM_SPI_OUT_TO_IN 0x2 95 99 96 100 #define PING_ARGS "-i 0.01 -c 3 -w 10 -q" 97 101 ··· 268 262 SYS_NOFAIL("ip -n at_ns0 fou del port 5555 2> /dev/null"); 269 263 SYS_NOFAIL("ip link delete dev %s", IPIP_TUNL_DEV1); 270 264 SYS_NOFAIL("ip fou del port 5555 2> /dev/null"); 265 + } 266 + 267 + static int add_xfrm_tunnel(void) 268 + { 269 + /* at_ns0 namespace 270 + * at_ns0 -> root 271 + */ 272 + SYS(fail, 273 + "ip netns exec at_ns0 " 274 + "ip xfrm state add src %s dst %s proto esp " 275 + "spi %d reqid 1 mode tunnel replay-window 42 " 276 + "auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s", 277 + IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT, XFRM_AUTH, XFRM_ENC); 278 + SYS(fail, 279 + "ip netns exec at_ns0 " 280 + "ip xfrm policy add src %s/32 dst %s/32 dir out " 281 + "tmpl src %s dst %s proto esp reqid 1 " 282 + "mode tunnel", 283 + IP4_ADDR_TUNL_DEV0, IP4_ADDR_TUNL_DEV1, IP4_ADDR_VETH0, IP4_ADDR1_VETH1); 284 + 285 + /* root -> at_ns0 */ 286 + SYS(fail, 287 + "ip netns exec at_ns0 " 288 + "ip xfrm state add src %s dst %s proto esp " 289 + "spi %d reqid 2 mode tunnel " 290 + "auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s", 291 + IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN, XFRM_AUTH, XFRM_ENC); 292 + SYS(fail, 293 + "ip netns exec at_ns0 " 294 + "ip xfrm policy add src %s/32 dst %s/32 dir in " 295 + "tmpl src %s dst %s proto esp reqid 2 " 296 + "mode tunnel", 297 + IP4_ADDR_TUNL_DEV1, IP4_ADDR_TUNL_DEV0, IP4_ADDR1_VETH1, IP4_ADDR_VETH0); 298 + 299 + /* address & route */ 300 + SYS(fail, "ip netns exec at_ns0 ip addr add dev veth0 %s/32", 301 + IP4_ADDR_TUNL_DEV0); 302 + SYS(fail, "ip netns exec at_ns0 ip route add %s dev veth0 via %s src %s", 303 + IP4_ADDR_TUNL_DEV1, IP4_ADDR1_VETH1, IP4_ADDR_TUNL_DEV0); 304 + 305 + /* root namespace 306 + * at_ns0 -> root 307 + */ 308 + SYS(fail, 309 + "ip xfrm state add src %s dst %s proto esp " 310 + "spi %d reqid 1 mode tunnel replay-window 42 " 311 + "auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s", 312 + IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT, XFRM_AUTH, XFRM_ENC); 313 + SYS(fail, 314 + "ip xfrm policy add src %s/32 dst %s/32 dir in " 315 + "tmpl src %s dst %s proto esp reqid 1 " 316 + "mode tunnel", 317 + IP4_ADDR_TUNL_DEV0, IP4_ADDR_TUNL_DEV1, IP4_ADDR_VETH0, IP4_ADDR1_VETH1); 318 + 319 + /* root -> at_ns0 */ 320 + SYS(fail, 321 + "ip xfrm state add src %s dst %s proto esp " 322 + "spi %d reqid 2 mode tunnel " 323 + "auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s", 324 + IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN, XFRM_AUTH, XFRM_ENC); 325 + SYS(fail, 326 + "ip xfrm policy add src %s/32 dst %s/32 dir out " 327 + "tmpl src %s dst %s proto esp reqid 2 " 328 + "mode tunnel", 329 + IP4_ADDR_TUNL_DEV1, IP4_ADDR_TUNL_DEV0, IP4_ADDR1_VETH1, IP4_ADDR_VETH0); 330 + 331 + /* address & route */ 332 + SYS(fail, "ip addr add dev veth1 %s/32", IP4_ADDR_TUNL_DEV1); 333 + SYS(fail, "ip route add %s dev veth1 via %s src %s", 334 + IP4_ADDR_TUNL_DEV0, IP4_ADDR_VETH0, IP4_ADDR_TUNL_DEV1); 335 + 336 + return 0; 337 + fail: 338 + return -1; 339 + } 340 + 341 + static void delete_xfrm_tunnel(void) 342 + { 343 + SYS_NOFAIL("ip xfrm policy delete dir out src %s/32 dst %s/32 2> /dev/null", 344 + IP4_ADDR_TUNL_DEV1, IP4_ADDR_TUNL_DEV0); 345 + SYS_NOFAIL("ip xfrm policy delete dir in src %s/32 dst %s/32 2> /dev/null", 346 + IP4_ADDR_TUNL_DEV0, IP4_ADDR_TUNL_DEV1); 347 + SYS_NOFAIL("ip xfrm state delete src %s dst %s proto esp spi %d 2> /dev/null", 348 + IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT); 349 + SYS_NOFAIL("ip xfrm state delete src %s dst %s proto esp spi %d 2> /dev/null", 350 + IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN); 271 351 } 272 352 273 353 static int test_ping(int family, const char *addr) ··· 624 532 test_tunnel_kern__destroy(skel); 625 533 } 626 534 535 + static void test_xfrm_tunnel(void) 536 + { 537 + DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook, 538 + .attach_point = BPF_TC_INGRESS); 539 + LIBBPF_OPTS(bpf_xdp_attach_opts, opts); 540 + struct test_tunnel_kern *skel = NULL; 541 + struct nstoken *nstoken; 542 + int xdp_prog_fd; 543 + int tc_prog_fd; 544 + int ifindex; 545 + int err; 546 + 547 + err = add_xfrm_tunnel(); 548 + if (!ASSERT_OK(err, "add_xfrm_tunnel")) 549 + return; 550 + 551 + skel = test_tunnel_kern__open_and_load(); 552 + if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) 553 + goto done; 554 + 555 + ifindex = if_nametoindex("veth1"); 556 + if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex")) 557 + goto done; 558 + 559 + /* attach tc prog to tunnel dev */ 560 + tc_hook.ifindex = ifindex; 561 + tc_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state); 562 + if (!ASSERT_GE(tc_prog_fd, 0, "bpf_program__fd")) 563 + goto done; 564 + if (attach_tc_prog(&tc_hook, tc_prog_fd, -1)) 565 + goto done; 566 + 567 + /* attach xdp prog to tunnel dev */ 568 + xdp_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state_xdp); 569 + if (!ASSERT_GE(xdp_prog_fd, 0, "bpf_program__fd")) 570 + goto done; 571 + err = bpf_xdp_attach(ifindex, xdp_prog_fd, XDP_FLAGS_REPLACE, &opts); 572 + if (!ASSERT_OK(err, "bpf_xdp_attach")) 573 + goto done; 574 + 575 + /* ping from at_ns0 namespace test */ 576 + nstoken = open_netns("at_ns0"); 577 + err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV1); 578 + close_netns(nstoken); 579 + if (!ASSERT_OK(err, "test_ping")) 580 + goto done; 581 + 582 + if (!ASSERT_EQ(skel->bss->xfrm_reqid, 1, "req_id")) 583 + goto done; 584 + if (!ASSERT_EQ(skel->bss->xfrm_spi, XFRM_SPI_IN_TO_OUT, "spi")) 585 + goto done; 586 + if (!ASSERT_EQ(skel->bss->xfrm_remote_ip, 0xac100164, "remote_ip")) 587 + goto done; 588 + if (!ASSERT_EQ(skel->bss->xfrm_replay_window, 42, "replay_window")) 589 + goto done; 590 + 591 + done: 592 + delete_xfrm_tunnel(); 593 + if (skel) 594 + test_tunnel_kern__destroy(skel); 595 + } 596 + 627 597 #define RUN_TEST(name, ...) \ 628 598 ({ \ 629 599 if (test__start_subtest(#name)) { \ 600 + config_device(); \ 630 601 test_ ## name(__VA_ARGS__); \ 602 + cleanup(); \ 631 603 } \ 632 604 }) 633 605 634 606 static void *test_tunnel_run_tests(void *arg) 635 607 { 636 - cleanup(); 637 - config_device(); 638 - 639 608 RUN_TEST(vxlan_tunnel); 640 609 RUN_TEST(ip6vxlan_tunnel); 641 610 RUN_TEST(ipip_tunnel, NONE); 642 611 RUN_TEST(ipip_tunnel, FOU); 643 612 RUN_TEST(ipip_tunnel, GUE); 644 - 645 - cleanup(); 613 + RUN_TEST(xfrm_tunnel); 646 614 647 615 return NULL; 648 616 }
+1
tools/testing/selftests/bpf/progs/bpf_tracing_net.h
··· 26 26 #define IPV6_AUTOFLOWLABEL 70 27 27 28 28 #define TC_ACT_UNSPEC (-1) 29 + #define TC_ACT_OK 0 29 30 #define TC_ACT_SHOT 2 30 31 31 32 #define SOL_TCP 6
+80 -58
tools/testing/selftests/bpf/progs/test_tunnel_kern.c
··· 6 6 * modify it under the terms of version 2 of the GNU General Public 7 7 * License as published by the Free Software Foundation. 8 8 */ 9 - #include <stddef.h> 10 - #include <string.h> 11 - #include <arpa/inet.h> 12 - #include <linux/bpf.h> 13 - #include <linux/if_ether.h> 14 - #include <linux/if_packet.h> 15 - #include <linux/if_tunnel.h> 16 - #include <linux/ip.h> 17 - #include <linux/ipv6.h> 18 - #include <linux/icmp.h> 19 - #include <linux/types.h> 20 - #include <linux/socket.h> 21 - #include <linux/pkt_cls.h> 22 - #include <linux/erspan.h> 23 - #include <linux/udp.h> 9 + #include "vmlinux.h" 10 + #include <bpf/bpf_core_read.h> 24 11 #include <bpf/bpf_helpers.h> 25 12 #include <bpf/bpf_endian.h> 13 + #include "bpf_kfuncs.h" 14 + #include "bpf_tracing_net.h" 26 15 27 16 #define log_err(__ret) bpf_printk("ERROR line:%d ret:%d\n", __LINE__, __ret) 28 17 29 - #define VXLAN_UDP_PORT 4789 18 + #define VXLAN_UDP_PORT 4789 19 + #define ETH_P_IP 0x0800 20 + #define PACKET_HOST 0 21 + #define TUNNEL_CSUM bpf_htons(0x01) 22 + #define TUNNEL_KEY bpf_htons(0x04) 30 23 31 24 /* Only IPv4 address assigned to veth1. 32 25 * 172.16.1.200 33 26 */ 34 27 #define ASSIGNED_ADDR_VETH1 0xac1001c8 35 28 36 - struct geneve_opt { 37 - __be16 opt_class; 38 - __u8 type; 39 - __u8 length:5; 40 - __u8 r3:1; 41 - __u8 r2:1; 42 - __u8 r1:1; 43 - __u8 opt_data[8]; /* hard-coded to 8 byte */ 44 - }; 45 - 46 - struct vxlanhdr { 47 - __be32 vx_flags; 48 - __be32 vx_vni; 49 - } __attribute__((packed)); 50 - 51 - struct vxlan_metadata { 52 - __u32 gbp; 53 - }; 54 - 55 - struct bpf_fou_encap { 56 - __be16 sport; 57 - __be16 dport; 58 - }; 59 - 60 - enum bpf_fou_encap_type { 61 - FOU_BPF_ENCAP_FOU, 62 - FOU_BPF_ENCAP_GUE, 63 - }; 64 - 65 29 int bpf_skb_set_fou_encap(struct __sk_buff *skb_ctx, 66 30 struct bpf_fou_encap *encap, int type) __ksym; 67 31 int bpf_skb_get_fou_encap(struct __sk_buff *skb_ctx, 68 32 struct bpf_fou_encap *encap) __ksym; 33 + struct xfrm_state * 34 + bpf_xdp_get_xfrm_state(struct xdp_md *ctx, struct bpf_xfrm_state_opts *opts, 35 + u32 opts__sz) __ksym; 36 + void bpf_xdp_xfrm_state_release(struct xfrm_state *x) __ksym; 69 37 70 38 struct { 71 39 __uint(type, BPF_MAP_TYPE_ARRAY); ··· 173 205 __u8 hwid = 7; 174 206 175 207 md.version = 2; 176 - md.u.md2.dir = direction; 177 - md.u.md2.hwid = hwid & 0xf; 178 - md.u.md2.hwid_upper = (hwid >> 4) & 0x3; 208 + BPF_CORE_WRITE_BITFIELD(&md.u.md2, dir, direction); 209 + BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid, (hwid & 0xf)); 210 + BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid_upper, (hwid >> 4) & 0x3); 179 211 #endif 180 212 181 213 ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md)); ··· 214 246 bpf_printk("\tindex %x\n", index); 215 247 #else 216 248 bpf_printk("\tdirection %d hwid %x timestamp %u\n", 217 - md.u.md2.dir, 218 - (md.u.md2.hwid_upper << 4) + md.u.md2.hwid, 249 + BPF_CORE_READ_BITFIELD(&md.u.md2, dir), 250 + (BPF_CORE_READ_BITFIELD(&md.u.md2, hwid_upper) << 4) + 251 + BPF_CORE_READ_BITFIELD(&md.u.md2, hwid), 219 252 bpf_ntohl(md.u.md2.timestamp)); 220 253 #endif 221 254 ··· 253 284 __u8 hwid = 17; 254 285 255 286 md.version = 2; 256 - md.u.md2.dir = direction; 257 - md.u.md2.hwid = hwid & 0xf; 258 - md.u.md2.hwid_upper = (hwid >> 4) & 0x3; 287 + BPF_CORE_WRITE_BITFIELD(&md.u.md2, dir, direction); 288 + BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid, (hwid & 0xf)); 289 + BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid_upper, (hwid >> 4) & 0x3); 259 290 #endif 260 291 261 292 ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md)); ··· 295 326 bpf_printk("\tindex %x\n", index); 296 327 #else 297 328 bpf_printk("\tdirection %d hwid %x timestamp %u\n", 298 - md.u.md2.dir, 299 - (md.u.md2.hwid_upper << 4) + md.u.md2.hwid, 329 + BPF_CORE_READ_BITFIELD(&md.u.md2, dir), 330 + (BPF_CORE_READ_BITFIELD(&md.u.md2, hwid_upper) << 4) + 331 + BPF_CORE_READ_BITFIELD(&md.u.md2, hwid), 300 332 bpf_ntohl(md.u.md2.timestamp)); 301 333 #endif 302 334 ··· 933 963 return TC_ACT_OK; 934 964 } 935 965 966 + volatile int xfrm_reqid = 0; 967 + volatile int xfrm_spi = 0; 968 + volatile int xfrm_remote_ip = 0; 969 + 936 970 SEC("tc") 937 971 int xfrm_get_state(struct __sk_buff *skb) 938 972 { ··· 947 973 if (ret < 0) 948 974 return TC_ACT_OK; 949 975 950 - bpf_printk("reqid %d spi 0x%x remote ip 0x%x\n", 951 - x.reqid, bpf_ntohl(x.spi), 952 - bpf_ntohl(x.remote_ipv4)); 976 + xfrm_reqid = x.reqid; 977 + xfrm_spi = bpf_ntohl(x.spi); 978 + xfrm_remote_ip = bpf_ntohl(x.remote_ipv4); 979 + 953 980 return TC_ACT_OK; 981 + } 982 + 983 + volatile int xfrm_replay_window = 0; 984 + 985 + SEC("xdp") 986 + int xfrm_get_state_xdp(struct xdp_md *xdp) 987 + { 988 + struct bpf_xfrm_state_opts opts = {}; 989 + struct xfrm_state *x = NULL; 990 + struct ip_esp_hdr *esph; 991 + struct bpf_dynptr ptr; 992 + u8 esph_buf[8] = {}; 993 + u8 iph_buf[20] = {}; 994 + struct iphdr *iph; 995 + u32 off; 996 + 997 + if (bpf_dynptr_from_xdp(xdp, 0, &ptr)) 998 + goto out; 999 + 1000 + off = sizeof(struct ethhdr); 1001 + iph = bpf_dynptr_slice(&ptr, off, iph_buf, sizeof(iph_buf)); 1002 + if (!iph || iph->protocol != IPPROTO_ESP) 1003 + goto out; 1004 + 1005 + off += sizeof(struct iphdr); 1006 + esph = bpf_dynptr_slice(&ptr, off, esph_buf, sizeof(esph_buf)); 1007 + if (!esph) 1008 + goto out; 1009 + 1010 + opts.netns_id = BPF_F_CURRENT_NETNS; 1011 + opts.daddr.a4 = iph->daddr; 1012 + opts.spi = esph->spi; 1013 + opts.proto = IPPROTO_ESP; 1014 + opts.family = AF_INET; 1015 + 1016 + x = bpf_xdp_get_xfrm_state(xdp, &opts, sizeof(opts)); 1017 + if (!x) 1018 + goto out; 1019 + 1020 + if (!x->replay_esn) 1021 + goto out; 1022 + 1023 + xfrm_replay_window = x->replay_esn->replay_window; 1024 + out: 1025 + if (x) 1026 + bpf_xdp_xfrm_state_release(x); 1027 + return XDP_PASS; 954 1028 } 955 1029 956 1030 char _license[] SEC("license") = "GPL";
-92
tools/testing/selftests/bpf/test_tunnel.sh
··· 517 517 echo -e ${GREEN}"PASS: ip6$TYPE"${NC} 518 518 } 519 519 520 - setup_xfrm_tunnel() 521 - { 522 - auth=0x$(printf '1%.0s' {1..40}) 523 - enc=0x$(printf '2%.0s' {1..32}) 524 - spi_in_to_out=0x1 525 - spi_out_to_in=0x2 526 - # at_ns0 namespace 527 - # at_ns0 -> root 528 - ip netns exec at_ns0 \ 529 - ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \ 530 - spi $spi_in_to_out reqid 1 mode tunnel \ 531 - auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc 532 - ip netns exec at_ns0 \ 533 - ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir out \ 534 - tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \ 535 - mode tunnel 536 - # root -> at_ns0 537 - ip netns exec at_ns0 \ 538 - ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \ 539 - spi $spi_out_to_in reqid 2 mode tunnel \ 540 - auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc 541 - ip netns exec at_ns0 \ 542 - ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir in \ 543 - tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \ 544 - mode tunnel 545 - # address & route 546 - ip netns exec at_ns0 \ 547 - ip addr add dev veth0 10.1.1.100/32 548 - ip netns exec at_ns0 \ 549 - ip route add 10.1.1.200 dev veth0 via 172.16.1.200 \ 550 - src 10.1.1.100 551 - 552 - # root namespace 553 - # at_ns0 -> root 554 - ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \ 555 - spi $spi_in_to_out reqid 1 mode tunnel \ 556 - auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc 557 - ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir in \ 558 - tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \ 559 - mode tunnel 560 - # root -> at_ns0 561 - ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \ 562 - spi $spi_out_to_in reqid 2 mode tunnel \ 563 - auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc 564 - ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir out \ 565 - tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \ 566 - mode tunnel 567 - # address & route 568 - ip addr add dev veth1 10.1.1.200/32 569 - ip route add 10.1.1.100 dev veth1 via 172.16.1.100 src 10.1.1.200 570 - } 571 - 572 - test_xfrm_tunnel() 573 - { 574 - if [[ -e /sys/kernel/tracing/trace ]]; then 575 - TRACE=/sys/kernel/tracing/trace 576 - else 577 - TRACE=/sys/kernel/debug/tracing/trace 578 - fi 579 - config_device 580 - > ${TRACE} 581 - setup_xfrm_tunnel 582 - mkdir -p ${BPF_PIN_TUNNEL_DIR} 583 - bpftool prog loadall ${BPF_FILE} ${BPF_PIN_TUNNEL_DIR} 584 - tc qdisc add dev veth1 clsact 585 - tc filter add dev veth1 proto ip ingress bpf da object-pinned \ 586 - ${BPF_PIN_TUNNEL_DIR}/xfrm_get_state 587 - ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 588 - sleep 1 589 - grep "reqid 1" ${TRACE} 590 - check_err $? 591 - grep "spi 0x1" ${TRACE} 592 - check_err $? 593 - grep "remote ip 0xac100164" ${TRACE} 594 - check_err $? 595 - cleanup 596 - 597 - if [ $ret -ne 0 ]; then 598 - echo -e ${RED}"FAIL: xfrm tunnel"${NC} 599 - return 1 600 - fi 601 - echo -e ${GREEN}"PASS: xfrm tunnel"${NC} 602 - } 603 - 604 520 attach_bpf() 605 521 { 606 522 DEV=$1 ··· 546 630 ip link del ip6geneve11 2> /dev/null 547 631 ip link del erspan11 2> /dev/null 548 632 ip link del ip6erspan11 2> /dev/null 549 - ip xfrm policy delete dir out src 10.1.1.200/32 dst 10.1.1.100/32 2> /dev/null 550 - ip xfrm policy delete dir in src 10.1.1.100/32 dst 10.1.1.200/32 2> /dev/null 551 - ip xfrm state delete src 172.16.1.100 dst 172.16.1.200 proto esp spi 0x1 2> /dev/null 552 - ip xfrm state delete src 172.16.1.200 dst 172.16.1.100 proto esp spi 0x2 2> /dev/null 553 633 } 554 634 555 635 cleanup_exit() ··· 626 714 627 715 echo "Testing IP6IP6 tunnel..." 628 716 test_ip6ip6 629 - errors=$(( $errors + $? )) 630 - 631 - echo "Testing IPSec tunnel..." 632 - test_xfrm_tunnel 633 717 errors=$(( $errors + $? )) 634 718 635 719 return $errors