Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'add-support-for-per-route-seg6-tunsrc'

Justin Iurman says:

====================
Add support for per-route seg6 tunsrc

This series adds support for the new per-route seg6 "tunsrc" parameter.
Selftests are extended to make sure it works as expected.

Example with the iproute2-next companion patch:

ip -6 r a 2001:db8:1::/64 encap seg6 mode encap tunsrc 2001:db8:ab::
segs 2001:db8:42::1,2001:db8:ffff::2 dev eth0
====================

Link: https://patch.msgid.link/20260324091434.359341-1-justin.iurman@6wind.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+180 -44
+1
include/uapi/linux/seg6_iptunnel.h
··· 20 20 enum { 21 21 SEG6_IPTUNNEL_UNSPEC, 22 22 SEG6_IPTUNNEL_SRH, 23 + SEG6_IPTUNNEL_SRC, /* struct in6_addr */ 23 24 __SEG6_IPTUNNEL_MAX, 24 25 }; 25 26 #define SEG6_IPTUNNEL_MAX (__SEG6_IPTUNNEL_MAX - 1)
+83 -31
net/ipv6/seg6_iptunnel.c
··· 49 49 50 50 struct seg6_lwt { 51 51 struct dst_cache cache; 52 + struct in6_addr tunsrc; 52 53 struct seg6_iptunnel_encap tuninfo[]; 53 54 }; 54 55 ··· 66 65 67 66 static const struct nla_policy seg6_iptunnel_policy[SEG6_IPTUNNEL_MAX + 1] = { 68 67 [SEG6_IPTUNNEL_SRH] = { .type = NLA_BINARY }, 68 + [SEG6_IPTUNNEL_SRC] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)), 69 69 }; 70 70 71 71 static int nla_put_srh(struct sk_buff *skb, int attrtype, ··· 89 87 } 90 88 91 89 static void set_tun_src(struct net *net, struct net_device *dev, 92 - struct in6_addr *daddr, struct in6_addr *saddr) 90 + struct in6_addr *daddr, struct in6_addr *saddr, 91 + struct in6_addr *route_tunsrc) 93 92 { 94 93 struct seg6_pernet_data *sdata = seg6_pernet(net); 95 94 struct in6_addr *tun_src; 96 95 97 - rcu_read_lock(); 98 - 99 - tun_src = rcu_dereference(sdata->tun_src); 100 - 101 - if (!ipv6_addr_any(tun_src)) { 102 - memcpy(saddr, tun_src, sizeof(struct in6_addr)); 96 + /* Priority order to select tunnel source address: 97 + * 1. per route source address (if configured) 98 + * 2. per network namespace source address (if configured) 99 + * 3. dynamic resolution 100 + */ 101 + if (route_tunsrc && !ipv6_addr_any(route_tunsrc)) { 102 + memcpy(saddr, route_tunsrc, sizeof(struct in6_addr)); 103 103 } else { 104 - ipv6_dev_get_saddr(net, dev, daddr, IPV6_PREFER_SRC_PUBLIC, 105 - saddr); 106 - } 104 + rcu_read_lock(); 105 + tun_src = rcu_dereference(sdata->tun_src); 107 106 108 - rcu_read_unlock(); 107 + if (!ipv6_addr_any(tun_src)) { 108 + memcpy(saddr, tun_src, sizeof(struct in6_addr)); 109 + } else { 110 + ipv6_dev_get_saddr(net, dev, daddr, 111 + IPV6_PREFER_SRC_PUBLIC, saddr); 112 + } 113 + 114 + rcu_read_unlock(); 115 + } 109 116 } 110 117 111 118 /* Compute flowlabel for outer IPv6 header */ ··· 136 125 } 137 126 138 127 static int __seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, 139 - int proto, struct dst_entry *cache_dst) 128 + int proto, struct dst_entry *cache_dst, 129 + struct in6_addr *route_tunsrc) 140 130 { 141 131 struct dst_entry *dst = skb_dst(skb); 142 132 struct net_device *dev = dst_dev(dst); ··· 194 182 isrh->nexthdr = proto; 195 183 196 184 hdr->daddr = isrh->segments[isrh->first_segment]; 197 - set_tun_src(net, dev, &hdr->daddr, &hdr->saddr); 185 + set_tun_src(net, dev, &hdr->daddr, &hdr->saddr, route_tunsrc); 198 186 199 187 #ifdef CONFIG_IPV6_SEG6_HMAC 200 188 if (sr_has_hmac(isrh)) { ··· 214 202 /* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */ 215 203 int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) 216 204 { 217 - return __seg6_do_srh_encap(skb, osrh, proto, NULL); 205 + return __seg6_do_srh_encap(skb, osrh, proto, NULL, NULL); 218 206 } 219 207 EXPORT_SYMBOL_GPL(seg6_do_srh_encap); 220 208 221 209 /* encapsulate an IPv6 packet within an outer IPv6 header with reduced SRH */ 222 210 static int seg6_do_srh_encap_red(struct sk_buff *skb, 223 211 struct ipv6_sr_hdr *osrh, int proto, 224 - struct dst_entry *cache_dst) 212 + struct dst_entry *cache_dst, 213 + struct in6_addr *route_tunsrc) 225 214 { 226 215 __u8 first_seg = osrh->first_segment; 227 216 struct dst_entry *dst = skb_dst(skb); ··· 285 272 if (skip_srh) { 286 273 hdr->nexthdr = proto; 287 274 288 - set_tun_src(net, dev, &hdr->daddr, &hdr->saddr); 275 + set_tun_src(net, dev, &hdr->daddr, &hdr->saddr, route_tunsrc); 289 276 goto out; 290 277 } 291 278 ··· 321 308 322 309 srcaddr: 323 310 isrh->nexthdr = proto; 324 - set_tun_src(net, dev, &hdr->daddr, &hdr->saddr); 311 + set_tun_src(net, dev, &hdr->daddr, &hdr->saddr, route_tunsrc); 325 312 326 313 #ifdef CONFIG_IPV6_SEG6_HMAC 327 314 if (unlikely(!skip_srh && sr_has_hmac(isrh))) { ··· 396 383 { 397 384 struct dst_entry *dst = skb_dst(skb); 398 385 struct seg6_iptunnel_encap *tinfo; 386 + struct seg6_lwt *slwt; 399 387 int proto, err = 0; 400 388 401 - tinfo = seg6_encap_lwtunnel(dst->lwtstate); 389 + slwt = seg6_lwt_lwtunnel(dst->lwtstate); 390 + tinfo = slwt->tuninfo; 402 391 403 392 switch (tinfo->mode) { 404 393 case SEG6_IPTUN_MODE_INLINE: ··· 425 410 return -EINVAL; 426 411 427 412 if (tinfo->mode == SEG6_IPTUN_MODE_ENCAP) 428 - err = __seg6_do_srh_encap(skb, tinfo->srh, 429 - proto, cache_dst); 413 + err = __seg6_do_srh_encap(skb, tinfo->srh, proto, 414 + cache_dst, &slwt->tunsrc); 430 415 else 431 - err = seg6_do_srh_encap_red(skb, tinfo->srh, 432 - proto, cache_dst); 416 + err = seg6_do_srh_encap_red(skb, tinfo->srh, proto, 417 + cache_dst, &slwt->tunsrc); 433 418 434 419 if (err) 435 420 return err; ··· 451 436 452 437 if (tinfo->mode == SEG6_IPTUN_MODE_L2ENCAP) 453 438 err = __seg6_do_srh_encap(skb, tinfo->srh, 454 - IPPROTO_ETHERNET, 455 - cache_dst); 439 + IPPROTO_ETHERNET, cache_dst, 440 + &slwt->tunsrc); 456 441 else 457 442 err = seg6_do_srh_encap_red(skb, tinfo->srh, 458 - IPPROTO_ETHERNET, 459 - cache_dst); 443 + IPPROTO_ETHERNET, cache_dst, 444 + &slwt->tunsrc); 460 445 461 446 if (err) 462 447 return err; ··· 693 678 if (family != AF_INET6) 694 679 return -EINVAL; 695 680 681 + if (tb[SEG6_IPTUNNEL_SRC]) { 682 + NL_SET_ERR_MSG(extack, "incompatible mode for tunsrc"); 683 + return -EINVAL; 684 + } 696 685 break; 697 686 case SEG6_IPTUN_MODE_ENCAP: 698 687 break; ··· 721 702 slwt = seg6_lwt_lwtunnel(newts); 722 703 723 704 err = dst_cache_init(&slwt->cache, GFP_ATOMIC); 724 - if (err) { 725 - kfree(newts); 726 - return err; 727 - } 705 + if (err) 706 + goto free_lwt_state; 728 707 729 708 memcpy(&slwt->tuninfo, tuninfo, tuninfo_len); 709 + 710 + if (tb[SEG6_IPTUNNEL_SRC]) { 711 + slwt->tunsrc = nla_get_in6_addr(tb[SEG6_IPTUNNEL_SRC]); 712 + 713 + if (ipv6_addr_any(&slwt->tunsrc) || 714 + ipv6_addr_is_multicast(&slwt->tunsrc) || 715 + ipv6_addr_loopback(&slwt->tunsrc)) { 716 + NL_SET_ERR_MSG(extack, "invalid tunsrc address"); 717 + err = -EINVAL; 718 + goto free_dst_cache; 719 + } 720 + } 730 721 731 722 newts->type = LWTUNNEL_ENCAP_SEG6; 732 723 newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT; ··· 749 720 *ts = newts; 750 721 751 722 return 0; 723 + 724 + free_dst_cache: 725 + dst_cache_destroy(&slwt->cache); 726 + free_lwt_state: 727 + kfree(newts); 728 + return err; 752 729 } 753 730 754 731 static void seg6_destroy_state(struct lwtunnel_state *lwt) ··· 766 731 struct lwtunnel_state *lwtstate) 767 732 { 768 733 struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate); 734 + struct seg6_lwt *slwt = seg6_lwt_lwtunnel(lwtstate); 769 735 770 736 if (nla_put_srh(skb, SEG6_IPTUNNEL_SRH, tuninfo)) 737 + return -EMSGSIZE; 738 + 739 + if (!ipv6_addr_any(&slwt->tunsrc) && 740 + nla_put_in6_addr(skb, SEG6_IPTUNNEL_SRC, &slwt->tunsrc)) 771 741 return -EMSGSIZE; 772 742 773 743 return 0; ··· 781 741 static int seg6_encap_nlsize(struct lwtunnel_state *lwtstate) 782 742 { 783 743 struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate); 744 + struct seg6_lwt *slwt = seg6_lwt_lwtunnel(lwtstate); 745 + int nlsize; 784 746 785 - return nla_total_size(SEG6_IPTUN_ENCAP_SIZE(tuninfo)); 747 + nlsize = nla_total_size(SEG6_IPTUN_ENCAP_SIZE(tuninfo)); 748 + 749 + if (!ipv6_addr_any(&slwt->tunsrc)) 750 + nlsize += nla_total_size(sizeof(slwt->tunsrc)); 751 + 752 + return nlsize; 786 753 } 787 754 788 755 static int seg6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) 789 756 { 790 757 struct seg6_iptunnel_encap *a_hdr = seg6_encap_lwtunnel(a); 791 758 struct seg6_iptunnel_encap *b_hdr = seg6_encap_lwtunnel(b); 759 + struct seg6_lwt *a_slwt = seg6_lwt_lwtunnel(a); 760 + struct seg6_lwt *b_slwt = seg6_lwt_lwtunnel(b); 792 761 int len = SEG6_IPTUN_ENCAP_SIZE(a_hdr); 793 762 794 763 if (len != SEG6_IPTUN_ENCAP_SIZE(b_hdr)) 764 + return 1; 765 + 766 + if (!ipv6_addr_equal(&a_slwt->tunsrc, &b_slwt->tunsrc)) 795 767 return 1; 796 768 797 769 return memcmp(a_hdr, b_hdr, len);
+96 -13
tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh
··· 193 193 nsuccess=0 194 194 nfail=0 195 195 196 + HAS_TUNSRC=false 197 + 196 198 log_test() 197 199 { 198 200 local rc="$1" ··· 347 345 ip -netns "${nsname}" addr \ 348 346 add "${net_prefix}::${rt}/64" dev "${devname}" nodad 349 347 348 + # A dedicated ::dead:<rt> address (with preferred_lft 0, i.e., 349 + # deprecated) is added when there is support for tunsrc. Because 350 + # it is deprecated, the kernel should never auto-select it as 351 + # source with current config. Only an explicit tunsrc can place 352 + # it in the outer header. 353 + if $HAS_TUNSRC; then 354 + ip -netns "${nsname}" addr \ 355 + add "${net_prefix}::dead:${rt}/64" \ 356 + dev "${devname}" nodad preferred_lft 0 357 + fi 358 + 350 359 ip -netns "${nsname}" link set "${devname}" up 351 360 done 352 361 ··· 433 420 # to the destination host) 434 421 # $5 - encap mode (full or red) 435 422 # $6 - traffic type (IPv6 or IPv4) 423 + # $7 - force tunsrc (true or false) 436 424 __setup_rt_policy() 437 425 { 438 426 local dst="$1" ··· 442 428 local dec_rt="$4" 443 429 local mode="$5" 444 430 local traffic="$6" 431 + local with_tunsrc="$7" 445 432 local nsname 446 433 local policy='' 434 + local tunsrc='' 447 435 local n 436 + 437 + # Verify the per-route tunnel source address ("tunsrc") feature. 438 + # If it is not supported, fallback on encap config without tunsrc. 439 + if $with_tunsrc && $HAS_TUNSRC; then 440 + local net_prefix 441 + local drule 442 + local nxt 443 + 444 + eval nsname=\${$(get_rtname "${dec_rt}")} 445 + 446 + # Next SRv6 hop: first End router if any, or the decap router 447 + [ -z "${end_rts}" ] && nxt="${dec_rt}" || nxt="${end_rts%% *}" 448 + 449 + # Use the right prefix for tunsrc depending on the next SRv6 hop 450 + net_prefix="$(get_network_prefix "${encap_rt}" "${nxt}")" 451 + tunsrc="tunsrc ${net_prefix}::dead:${encap_rt}" 452 + 453 + # To verify that the outer source address matches the one 454 + # configured with tunsrc, the decap router discards packets 455 + # with any other source address. 456 + ip netns exec "${nsname}" ip6tables -t raw -I PREROUTING 1 \ 457 + -s "${net_prefix}::dead:${encap_rt}" \ 458 + -d "${VPN_LOCATOR_SERVICE}:${dec_rt}::${DT46_FUNC}" \ 459 + -j ACCEPT 460 + 461 + drule="PREROUTING \ 462 + -d ${VPN_LOCATOR_SERVICE}:${dec_rt}::${DT46_FUNC} \ 463 + -j DROP" 464 + 465 + if ! ip netns exec "${nsname}" \ 466 + ip6tables -t raw -C ${drule} &>/dev/null; then 467 + ip netns exec "${nsname}" ip6tables -t raw -A ${drule} 468 + fi 469 + fi 448 470 449 471 eval nsname=\${$(get_rtname "${encap_rt}")} 450 472 ··· 494 444 if [ "${traffic}" -eq 6 ]; then 495 445 ip -netns "${nsname}" -6 route \ 496 446 add "${IPv6_HS_NETWORK}::${dst}" vrf "${VRF_DEVNAME}" \ 497 - encap seg6 mode "${mode}" segs "${policy}" \ 447 + encap seg6 mode "${mode}" ${tunsrc} segs "${policy}" \ 498 448 dev "${VRF_DEVNAME}" 499 449 500 450 ip -netns "${nsname}" -6 neigh \ ··· 505 455 # received, otherwise the proxy arp does not work. 506 456 ip -netns "${nsname}" -4 route \ 507 457 add "${IPv4_HS_NETWORK}.${dst}" vrf "${VRF_DEVNAME}" \ 508 - encap seg6 mode "${mode}" segs "${policy}" \ 458 + encap seg6 mode "${mode}" ${tunsrc} segs "${policy}" \ 509 459 dev "${VRF_DEVNAME}" 510 460 fi 511 461 } ··· 513 463 # see __setup_rt_policy 514 464 setup_rt_policy_ipv6() 515 465 { 516 - __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 6 466 + __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 6 "$6" 517 467 } 518 468 519 469 #see __setup_rt_policy 520 470 setup_rt_policy_ipv4() 521 471 { 522 - __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 4 472 + __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 4 "$6" 523 473 } 524 474 525 475 setup_hs() ··· 617 567 # the network path between hs-1 and hs-2 traverses several routers 618 568 # depending on the direction of traffic. 619 569 # 620 - # Direction hs-1 -> hs-2 (H.Encaps.Red) 570 + # Direction hs-1 -> hs-2 (H.Encaps.Red + tunsrc) 621 571 # - rt-3,rt-4 (SRv6 End behaviors) 622 572 # - rt-2 (SRv6 End.DT46 behavior) 623 573 # 624 574 # Direction hs-2 -> hs-1 (H.Encaps.Red) 625 575 # - rt-1 (SRv6 End.DT46 behavior) 626 - setup_rt_policy_ipv6 2 1 "3 4" 2 encap.red 627 - setup_rt_policy_ipv6 1 2 "" 1 encap.red 576 + setup_rt_policy_ipv6 2 1 "3 4" 2 encap.red true 577 + setup_rt_policy_ipv6 1 2 "" 1 encap.red false 628 578 629 579 # create an IPv4 VPN between hosts hs-1 and hs-2 630 580 # the network path between hs-1 and hs-2 traverses several routers 631 581 # depending on the direction of traffic. 632 582 # 633 - # Direction hs-1 -> hs-2 (H.Encaps.Red) 583 + # Direction hs-1 -> hs-2 (H.Encaps.Red + tunsrc) 634 584 # - rt-2 (SRv6 End.DT46 behavior) 635 585 # 636 586 # Direction hs-2 -> hs-1 (H.Encaps.Red) 637 587 # - rt-4,rt-3 (SRv6 End behaviors) 638 588 # - rt-1 (SRv6 End.DT46 behavior) 639 - setup_rt_policy_ipv4 2 1 "" 2 encap.red 640 - setup_rt_policy_ipv4 1 2 "4 3" 1 encap.red 589 + setup_rt_policy_ipv4 2 1 "" 2 encap.red true 590 + setup_rt_policy_ipv4 1 2 "4 3" 1 encap.red false 641 591 642 592 # create an IPv6 VPN between hosts hs-3 and hs-4 643 593 # the network path between hs-3 and hs-4 traverses several routers 644 594 # depending on the direction of traffic. 645 595 # 646 - # Direction hs-3 -> hs-4 (H.Encaps.Red) 596 + # Direction hs-3 -> hs-4 (H.Encaps.Red + tunsrc) 647 597 # - rt-2 (SRv6 End Behavior) 648 598 # - rt-4 (SRv6 End.DT46 behavior) 649 599 # 650 600 # Direction hs-4 -> hs-3 (H.Encaps.Red) 651 601 # - rt-1 (SRv6 End behavior) 652 602 # - rt-3 (SRv6 End.DT46 behavior) 653 - setup_rt_policy_ipv6 4 3 "2" 4 encap.red 654 - setup_rt_policy_ipv6 3 4 "1" 3 encap.red 603 + setup_rt_policy_ipv6 4 3 "2" 4 encap.red true 604 + setup_rt_policy_ipv6 3 4 "1" 3 encap.red false 655 605 656 606 # testing environment was set up successfully 657 607 SETUP_ERR=0 ··· 859 809 fi 860 810 } 861 811 812 + # Before enabling tunsrc tests, make sure tunsrc and ip6tables are supported. 813 + check_tunsrc_support() 814 + { 815 + setup_ns tunsrc_ns 816 + 817 + ip -netns "${tunsrc_ns}" link add veth0 type veth \ 818 + peer name veth1 netns "${tunsrc_ns}" 819 + 820 + ip -netns "${tunsrc_ns}" link set veth0 up 821 + 822 + if ! ip -netns "${tunsrc_ns}" -6 route add fc00::dead:beef/128 \ 823 + encap seg6 mode encap.red tunsrc fc00::1 segs fc00::2 \ 824 + dev veth0 &>/dev/null; then 825 + cleanup_ns "${tunsrc_ns}" 826 + return 827 + fi 828 + 829 + if ! ip -netns "${tunsrc_ns}" -6 route show | grep -q "tunsrc"; then 830 + cleanup_ns "${tunsrc_ns}" 831 + return 832 + fi 833 + 834 + if ! ip netns exec "${tunsrc_ns}" ip6tables -t raw -A PREROUTING \ 835 + -d fc00::dead:beef -j DROP &>/dev/null; then 836 + cleanup_ns "${tunsrc_ns}" 837 + return 838 + fi 839 + 840 + cleanup_ns "${tunsrc_ns}" 841 + HAS_TUNSRC=true 842 + } 843 + 862 844 if [ "$(id -u)" -ne 0 ]; then 863 845 echo "SKIP: Need root privileges" 864 846 exit "${ksft_skip}" ··· 908 826 set -e 909 827 trap cleanup EXIT 910 828 829 + check_tunsrc_support 911 830 setup 912 831 set +e 913 832