Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'nf-next-25-05-23' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next

Pablo Neira Ayuso says:

====================
Netfilter updates for net-next

The following batch contains Netfilter updates for net-next,
specifically 26 patches: 5 patches adding/updating selftests,
4 fixes, 3 PREEMPT_RT fixes, and 14 patches to enhance nf_tables):

1) Improve selftest coverage for pipapo 4 bit group format, from
Florian Westphal.

2) Fix incorrect dependencies when compiling a kernel without
legacy ip{6}tables support, also from Florian.

3) Two patches to fix nft_fib vrf issues, including selftest updates
to improve coverage, also from Florian Westphal.

4) Fix incorrect nesting in nft_tunnel's GENEVE support, from
Fernando F. Mancera.

5) Three patches to fix PREEMPT_RT issues with nf_dup infrastructure
and nft_inner to match in inner headers, from Sebastian Andrzej Siewior.

6) Integrate conntrack information into nft trace infrastructure,
from Florian Westphal.

7) A series of 13 patches to allow to specify wildcard netdevice in
netdev basechain and flowtables, eg.

table netdev filter {
chain ingress {
type filter hook ingress devices = { eth0, eth1, vlan* } priority 0; policy accept;
}
}

This also allows for runtime hook registration on NETDEV_{UN}REGISTER
event, from Phil Sutter.

netfilter pull request 25-05-23

* tag 'nf-next-25-05-23' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next: (26 commits)
selftests: netfilter: Torture nftables netdev hooks
netfilter: nf_tables: Add notifications for hook changes
netfilter: nf_tables: Support wildcard netdev hook specs
netfilter: nf_tables: Sort labels in nft_netdev_hook_alloc()
netfilter: nf_tables: Handle NETDEV_CHANGENAME events
netfilter: nf_tables: Wrap netdev notifiers
netfilter: nf_tables: Respect NETDEV_REGISTER events
netfilter: nf_tables: Prepare for handling NETDEV_REGISTER events
netfilter: nf_tables: Have a list of nf_hook_ops in nft_hook
netfilter: nf_tables: Pass nf_hook_ops to nft_unregister_flowtable_hook()
netfilter: nf_tables: Introduce nft_register_flowtable_ops()
netfilter: nf_tables: Introduce nft_hook_find_ops{,_rcu}()
netfilter: nf_tables: Introduce functions freeing nft_hook objects
netfilter: nf_tables: add packets conntrack state to debug trace info
netfilter: conntrack: make nf_conntrack_id callable without a module dependency
netfilter: nf_dup_netdev: Move the recursion counter struct netdev_xmit
netfilter: nft_inner: Use nested-BH locking for nft_pcpu_tun_ctx
netfilter: nf_dup{4, 6}: Move duplication check to task_struct
netfilter: nft_tunnel: fix geneve_opt dump
selftests: netfilter: nft_fib.sh: add type and oif tests with and without VRFs
...
====================

Link: https://patch.msgid.link/20250523132712.458507-1-pablo@netfilter.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>

+1505 -231
+3
include/linux/netdevice_xmit.h
··· 11 11 #if IS_ENABLED(CONFIG_NET_ACT_MIRRED) 12 12 u8 sched_mirred_nest; 13 13 #endif 14 + #if IS_ENABLED(CONFIG_NF_DUP_NETDEV) 15 + u8 nf_dup_skb_recursion; 16 + #endif 14 17 }; 15 18 16 19 #endif
+4 -11
include/linux/netfilter.h
··· 95 95 }; 96 96 97 97 struct nf_hook_ops { 98 + struct list_head list; 99 + struct rcu_head rcu; 100 + 98 101 /* User fills in from here down. */ 99 102 nf_hookfn *hook; 100 103 struct net_device *dev; ··· 473 470 void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb); 474 471 void (*set_closing)(struct nf_conntrack *nfct); 475 472 int (*confirm)(struct sk_buff *skb); 473 + u32 (*get_id)(const struct nf_conntrack *nfct); 476 474 }; 477 475 extern const struct nf_ct_hook __rcu *nf_ct_hook; 478 476 ··· 500 496 501 497 extern const struct nf_defrag_hook __rcu *nf_defrag_v4_hook; 502 498 extern const struct nf_defrag_hook __rcu *nf_defrag_v6_hook; 503 - 504 - /* 505 - * nf_skb_duplicated - TEE target has sent a packet 506 - * 507 - * When a xtables target sends a packet, the OUTPUT and POSTROUTING 508 - * hooks are traversed again, i.e. nft and xtables are invoked recursively. 509 - * 510 - * This is used by xtables TEE target to prevent the duplicated skb from 511 - * being duplicated again. 512 - */ 513 - DECLARE_PER_CPU(bool, nf_skb_duplicated); 514 499 515 500 /* 516 501 * Contains bitmask of ctnetlink event subscribers, if any.
+1
include/linux/sched.h
··· 1044 1044 /* delay due to memory thrashing */ 1045 1045 unsigned in_thrashing:1; 1046 1046 #endif 1047 + unsigned in_nf_duplicate:1; 1047 1048 #ifdef CONFIG_PREEMPT_RT 1048 1049 struct netdev_xmit net_xmit; 1049 1050 #endif
+11 -1
include/net/netfilter/nf_tables.h
··· 1142 1142 int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain); 1143 1143 void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain); 1144 1144 1145 + struct nft_hook; 1146 + void nf_tables_chain_device_notify(const struct nft_chain *chain, 1147 + const struct nft_hook *hook, 1148 + const struct net_device *dev, int event); 1149 + 1145 1150 enum nft_chain_types { 1146 1151 NFT_CHAIN_T_DEFAULT = 0, 1147 1152 NFT_CHAIN_T_ROUTE, ··· 1204 1199 1205 1200 struct nft_hook { 1206 1201 struct list_head list; 1207 - struct nf_hook_ops ops; 1202 + struct list_head ops_list; 1208 1203 struct rcu_head rcu; 1209 1204 char ifname[IFNAMSIZ]; 1210 1205 u8 ifnamelen; 1211 1206 }; 1207 + 1208 + struct nf_hook_ops *nft_hook_find_ops(const struct nft_hook *hook, 1209 + const struct net_device *dev); 1210 + struct nf_hook_ops *nft_hook_find_ops_rcu(const struct nft_hook *hook, 1211 + const struct net_device *dev); 1212 1212 1213 1213 /** 1214 1214 * struct nft_base_chain - nf_tables base chain
+9
include/net/netfilter/nft_fib.h
··· 2 2 #ifndef _NFT_FIB_H_ 3 3 #define _NFT_FIB_H_ 4 4 5 + #include <net/l3mdev.h> 5 6 #include <net/netfilter/nf_tables.h> 6 7 7 8 struct nft_fib { ··· 38 37 return sk->sk_rx_dst_ifindex == indev->ifindex; 39 38 40 39 return nft_fib_is_loopback(pkt->skb, indev); 40 + } 41 + 42 + static inline int nft_fib_l3mdev_master_ifindex_rcu(const struct nft_pktinfo *pkt, 43 + const struct net_device *iif) 44 + { 45 + const struct net_device *dev = iif ? iif : pkt->skb->dev; 46 + 47 + return l3mdev_master_ifindex_rcu(dev); 41 48 } 42 49 43 50 int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset);
+18
include/uapi/linux/netfilter/nf_tables.h
··· 142 142 NFT_MSG_DESTROYOBJ, 143 143 NFT_MSG_DESTROYFLOWTABLE, 144 144 NFT_MSG_GETSETELEM_RESET, 145 + NFT_MSG_NEWDEV, 146 + NFT_MSG_DELDEV, 145 147 NFT_MSG_MAX, 146 148 }; 147 149 ··· 1786 1784 * enum nft_device_attributes - nf_tables device netlink attributes 1787 1785 * 1788 1786 * @NFTA_DEVICE_NAME: name of this device (NLA_STRING) 1787 + * @NFTA_DEVICE_TABLE: table containing the flowtable or chain hooking into the device (NLA_STRING) 1788 + * @NFTA_DEVICE_FLOWTABLE: flowtable hooking into the device (NLA_STRING) 1789 + * @NFTA_DEVICE_CHAIN: chain hooking into the device (NLA_STRING) 1790 + * @NFTA_DEVICE_SPEC: hook spec matching the device (NLA_STRING) 1789 1791 */ 1790 1792 enum nft_devices_attributes { 1791 1793 NFTA_DEVICE_UNSPEC, 1792 1794 NFTA_DEVICE_NAME, 1795 + NFTA_DEVICE_TABLE, 1796 + NFTA_DEVICE_FLOWTABLE, 1797 + NFTA_DEVICE_CHAIN, 1798 + NFTA_DEVICE_SPEC, 1793 1799 __NFTA_DEVICE_MAX 1794 1800 }; 1795 1801 #define NFTA_DEVICE_MAX (__NFTA_DEVICE_MAX - 1) ··· 1851 1841 * @NFTA_TRACE_MARK: nfmark (NLA_U32) 1852 1842 * @NFTA_TRACE_NFPROTO: nf protocol processed (NLA_U32) 1853 1843 * @NFTA_TRACE_POLICY: policy that decided fate of packet (NLA_U32) 1844 + * @NFTA_TRACE_CT_ID: conntrack id (NLA_U32) 1845 + * @NFTA_TRACE_CT_DIRECTION: packets direction (NLA_U8) 1846 + * @NFTA_TRACE_CT_STATUS: conntrack status (NLA_U32) 1847 + * @NFTA_TRACE_CT_STATE: packet state (new, established, ...) (NLA_U32) 1854 1848 */ 1855 1849 enum nft_trace_attributes { 1856 1850 NFTA_TRACE_UNSPEC, ··· 1875 1861 NFTA_TRACE_NFPROTO, 1876 1862 NFTA_TRACE_POLICY, 1877 1863 NFTA_TRACE_PAD, 1864 + NFTA_TRACE_CT_ID, 1865 + NFTA_TRACE_CT_DIRECTION, 1866 + NFTA_TRACE_CT_STATUS, 1867 + NFTA_TRACE_CT_STATE, 1878 1868 __NFTA_TRACE_MAX 1879 1869 }; 1880 1870 #define NFTA_TRACE_MAX (__NFTA_TRACE_MAX - 1)
+2
include/uapi/linux/netfilter/nfnetlink.h
··· 25 25 #define NFNLGRP_ACCT_QUOTA NFNLGRP_ACCT_QUOTA 26 26 NFNLGRP_NFTRACE, 27 27 #define NFNLGRP_NFTRACE NFNLGRP_NFTRACE 28 + NFNLGRP_NFT_DEV, 29 + #define NFNLGRP_NFT_DEV NFNLGRP_NFT_DEV 28 30 __NFNLGRP_MAX, 29 31 }; 30 32 #define NFNLGRP_MAX (__NFNLGRP_MAX - 1)
+1 -1
net/ipv4/netfilter/ip_tables.c
··· 270 270 * but it is no problem since absolute verdict is issued by these. 271 271 */ 272 272 if (static_key_false(&xt_tee_enabled)) 273 - jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated); 273 + jumpstack += private->stacksize * current->in_nf_duplicate; 274 274 275 275 e = get_entry(table_base, private->hook_entry[hook]); 276 276
+3 -3
net/ipv4/netfilter/nf_dup_ipv4.c
··· 54 54 struct iphdr *iph; 55 55 56 56 local_bh_disable(); 57 - if (this_cpu_read(nf_skb_duplicated)) 57 + if (current->in_nf_duplicate) 58 58 goto out; 59 59 /* 60 60 * Copy the skb, and route the copy. Will later return %XT_CONTINUE for ··· 86 86 --iph->ttl; 87 87 88 88 if (nf_dup_ipv4_route(net, skb, gw, oif)) { 89 - __this_cpu_write(nf_skb_duplicated, true); 89 + current->in_nf_duplicate = true; 90 90 ip_local_out(net, skb->sk, skb); 91 - __this_cpu_write(nf_skb_duplicated, false); 91 + current->in_nf_duplicate = false; 92 92 } else { 93 93 kfree_skb(skb); 94 94 }
+9 -2
net/ipv4/netfilter/nft_fib_ipv4.c
··· 50 50 else 51 51 addr = iph->saddr; 52 52 53 - *dst = inet_dev_addr_type(nft_net(pkt), dev, addr); 53 + if (priv->flags & (NFTA_FIB_F_IIF | NFTA_FIB_F_OIF)) { 54 + *dst = inet_dev_addr_type(nft_net(pkt), dev, addr); 55 + return; 56 + } 57 + 58 + *dst = inet_addr_type_dev_table(nft_net(pkt), pkt->skb->dev, addr); 54 59 } 55 60 EXPORT_SYMBOL_GPL(nft_fib4_eval_type); 56 61 ··· 70 65 struct flowi4 fl4 = { 71 66 .flowi4_scope = RT_SCOPE_UNIVERSE, 72 67 .flowi4_iif = LOOPBACK_IFINDEX, 68 + .flowi4_proto = pkt->tprot, 73 69 .flowi4_uid = sock_net_uid(nft_net(pkt), NULL), 74 - .flowi4_l3mdev = l3mdev_master_ifindex_rcu(nft_in(pkt)), 75 70 }; 76 71 const struct net_device *oif; 77 72 const struct net_device *found; ··· 94 89 oif = nft_in(pkt); 95 90 else 96 91 oif = NULL; 92 + 93 + fl4.flowi4_l3mdev = nft_fib_l3mdev_master_ifindex_rcu(pkt, oif); 97 94 98 95 iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph); 99 96 if (!iph) {
+1 -1
net/ipv6/netfilter/ip6_tables.c
··· 292 292 * but it is no problem since absolute verdict is issued by these. 293 293 */ 294 294 if (static_key_false(&xt_tee_enabled)) 295 - jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated); 295 + jumpstack += private->stacksize * current->in_nf_duplicate; 296 296 297 297 e = get_entry(table_base, private->hook_entry[hook]); 298 298
+3 -3
net/ipv6/netfilter/nf_dup_ipv6.c
··· 48 48 const struct in6_addr *gw, int oif) 49 49 { 50 50 local_bh_disable(); 51 - if (this_cpu_read(nf_skb_duplicated)) 51 + if (current->in_nf_duplicate) 52 52 goto out; 53 53 skb = pskb_copy(skb, GFP_ATOMIC); 54 54 if (skb == NULL) ··· 64 64 --iph->hop_limit; 65 65 } 66 66 if (nf_dup_ipv6_route(net, skb, gw, oif)) { 67 - __this_cpu_write(nf_skb_duplicated, true); 67 + current->in_nf_duplicate = true; 68 68 ip6_local_out(net, skb->sk, skb); 69 - __this_cpu_write(nf_skb_duplicated, false); 69 + current->in_nf_duplicate = false; 70 70 } else { 71 71 kfree_skb(skb); 72 72 }
+10 -7
net/ipv6/netfilter/nft_fib_ipv6.c
··· 50 50 fl6->flowi6_mark = pkt->skb->mark; 51 51 52 52 fl6->flowlabel = (*(__be32 *)iph) & IPV6_FLOWINFO_MASK; 53 + fl6->flowi6_l3mdev = nft_fib_l3mdev_master_ifindex_rcu(pkt, dev); 53 54 54 55 return lookup_flags; 55 56 } ··· 73 72 dev = nft_in(pkt); 74 73 else if (priv->flags & NFTA_FIB_F_OIF) 75 74 dev = nft_out(pkt); 76 - 77 - fl6.flowi6_l3mdev = l3mdev_master_ifindex_rcu(dev); 78 75 79 76 nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph); 80 77 ··· 157 158 { 158 159 const struct nft_fib *priv = nft_expr_priv(expr); 159 160 int noff = skb_network_offset(pkt->skb); 161 + const struct net_device *found = NULL; 160 162 const struct net_device *oif = NULL; 161 163 u32 *dest = &regs->data[priv->dreg]; 162 164 struct ipv6hdr *iph, _iph; ··· 165 165 .flowi6_iif = LOOPBACK_IFINDEX, 166 166 .flowi6_proto = pkt->tprot, 167 167 .flowi6_uid = sock_net_uid(nft_net(pkt), NULL), 168 - .flowi6_l3mdev = l3mdev_master_ifindex_rcu(nft_in(pkt)), 169 168 }; 170 169 struct rt6_info *rt; 171 170 int lookup_flags; ··· 202 203 if (rt->rt6i_flags & (RTF_REJECT | RTF_ANYCAST | RTF_LOCAL)) 203 204 goto put_rt_err; 204 205 205 - if (oif && oif != rt->rt6i_idev->dev && 206 - l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) != oif->ifindex) 207 - goto put_rt_err; 206 + if (!oif) { 207 + found = rt->rt6i_idev->dev; 208 + } else { 209 + if (oif == rt->rt6i_idev->dev || 210 + l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) == oif->ifindex) 211 + found = oif; 212 + } 208 213 209 - nft_fib_store_result(dest, priv, rt->rt6i_idev->dev); 214 + nft_fib_store_result(dest, priv, found); 210 215 put_rt_err: 211 216 ip6_rt_put(rt); 212 217 }
-3
net/netfilter/core.c
··· 31 31 const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly; 32 32 EXPORT_SYMBOL_GPL(nf_ipv6_ops); 33 33 34 - DEFINE_PER_CPU(bool, nf_skb_duplicated); 35 - EXPORT_SYMBOL_GPL(nf_skb_duplicated); 36 - 37 34 #ifdef CONFIG_JUMP_LABEL 38 35 struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; 39 36 EXPORT_SYMBOL(nf_hooks_needed);
+6
net/netfilter/nf_conntrack_core.c
··· 505 505 } 506 506 EXPORT_SYMBOL_GPL(nf_ct_get_id); 507 507 508 + static u32 nf_conntrack_get_id(const struct nf_conntrack *nfct) 509 + { 510 + return nf_ct_get_id(nf_ct_to_nf_conn(nfct)); 511 + } 512 + 508 513 static void 509 514 clean_from_lists(struct nf_conn *ct) 510 515 { ··· 2715 2710 .attach = nf_conntrack_attach, 2716 2711 .set_closing = nf_conntrack_set_closing, 2717 2712 .confirm = __nf_conntrack_confirm, 2713 + .get_id = nf_conntrack_get_id, 2718 2714 }; 2719 2715 2720 2716 void nf_conntrack_init_end(void)
+18 -4
net/netfilter/nf_dup_netdev.c
··· 15 15 16 16 #define NF_RECURSION_LIMIT 2 17 17 18 - static DEFINE_PER_CPU(u8, nf_dup_skb_recursion); 18 + #ifndef CONFIG_PREEMPT_RT 19 + static u8 *nf_get_nf_dup_skb_recursion(void) 20 + { 21 + return this_cpu_ptr(&softnet_data.xmit.nf_dup_skb_recursion); 22 + } 23 + #else 24 + 25 + static u8 *nf_get_nf_dup_skb_recursion(void) 26 + { 27 + return &current->net_xmit.nf_dup_skb_recursion; 28 + } 29 + 30 + #endif 19 31 20 32 static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev, 21 33 enum nf_dev_hooks hook) 22 34 { 23 - if (__this_cpu_read(nf_dup_skb_recursion) > NF_RECURSION_LIMIT) 35 + u8 *nf_dup_skb_recursion = nf_get_nf_dup_skb_recursion(); 36 + 37 + if (*nf_dup_skb_recursion > NF_RECURSION_LIMIT) 24 38 goto err; 25 39 26 40 if (hook == NF_NETDEV_INGRESS && skb_mac_header_was_set(skb)) { ··· 46 32 47 33 skb->dev = dev; 48 34 skb_clear_tstamp(skb); 49 - __this_cpu_inc(nf_dup_skb_recursion); 35 + (*nf_dup_skb_recursion)++; 50 36 dev_queue_xmit(skb); 51 - __this_cpu_dec(nf_dup_skb_recursion); 37 + (*nf_dup_skb_recursion)--; 52 38 return; 53 39 err: 54 40 kfree_skb(skb);
+310 -96
net/netfilter/nf_tables_api.c
··· 300 300 static int nft_netdev_register_hooks(struct net *net, 301 301 struct list_head *hook_list) 302 302 { 303 + struct nf_hook_ops *ops; 303 304 struct nft_hook *hook; 304 305 int err, j; 305 306 306 307 j = 0; 307 308 list_for_each_entry(hook, hook_list, list) { 308 - err = nf_register_net_hook(net, &hook->ops); 309 - if (err < 0) 310 - goto err_register; 309 + list_for_each_entry(ops, &hook->ops_list, list) { 310 + err = nf_register_net_hook(net, ops); 311 + if (err < 0) 312 + goto err_register; 311 313 312 - j++; 314 + j++; 315 + } 313 316 } 314 317 return 0; 315 318 316 319 err_register: 317 320 list_for_each_entry(hook, hook_list, list) { 318 - if (j-- <= 0) 319 - break; 321 + list_for_each_entry(ops, &hook->ops_list, list) { 322 + if (j-- <= 0) 323 + break; 320 324 321 - nf_unregister_net_hook(net, &hook->ops); 325 + nf_unregister_net_hook(net, ops); 326 + } 322 327 } 323 328 return err; 329 + } 330 + 331 + static void nft_netdev_hook_free_ops(struct nft_hook *hook) 332 + { 333 + struct nf_hook_ops *ops, *next; 334 + 335 + list_for_each_entry_safe(ops, next, &hook->ops_list, list) { 336 + list_del(&ops->list); 337 + kfree(ops); 338 + } 339 + } 340 + 341 + static void nft_netdev_hook_free(struct nft_hook *hook) 342 + { 343 + nft_netdev_hook_free_ops(hook); 344 + kfree(hook); 345 + } 346 + 347 + static void __nft_netdev_hook_free_rcu(struct rcu_head *rcu) 348 + { 349 + struct nft_hook *hook = container_of(rcu, struct nft_hook, rcu); 350 + 351 + nft_netdev_hook_free(hook); 352 + } 353 + 354 + static void nft_netdev_hook_free_rcu(struct nft_hook *hook) 355 + { 356 + call_rcu(&hook->rcu, __nft_netdev_hook_free_rcu); 324 357 } 325 358 326 359 static void nft_netdev_unregister_hooks(struct net *net, ··· 361 328 bool release_netdev) 362 329 { 363 330 struct nft_hook *hook, *next; 331 + struct nf_hook_ops *ops; 364 332 365 333 list_for_each_entry_safe(hook, next, hook_list, list) { 366 - nf_unregister_net_hook(net, &hook->ops); 334 + list_for_each_entry(ops, &hook->ops_list, list) 335 + nf_unregister_net_hook(net, ops); 367 336 if (release_netdev) { 368 337 list_del(&hook->list); 369 - kfree_rcu(hook, rcu); 338 + nft_netdev_hook_free_rcu(hook); 370 339 } 371 340 } 372 341 } ··· 2288 2253 list_for_each_entry_safe(hook, next, 2289 2254 &basechain->hook_list, list) { 2290 2255 list_del_rcu(&hook->list); 2291 - kfree_rcu(hook, rcu); 2256 + nft_netdev_hook_free_rcu(hook); 2292 2257 } 2293 2258 } 2294 2259 module_put(basechain->type->owner); ··· 2309 2274 static struct nft_hook *nft_netdev_hook_alloc(struct net *net, 2310 2275 const struct nlattr *attr) 2311 2276 { 2277 + struct nf_hook_ops *ops; 2312 2278 struct net_device *dev; 2313 2279 struct nft_hook *hook; 2314 2280 int err; 2315 2281 2316 2282 hook = kzalloc(sizeof(struct nft_hook), GFP_KERNEL_ACCOUNT); 2317 - if (!hook) { 2318 - err = -ENOMEM; 2319 - goto err_hook_alloc; 2320 - } 2283 + if (!hook) 2284 + return ERR_PTR(-ENOMEM); 2285 + 2286 + INIT_LIST_HEAD(&hook->ops_list); 2321 2287 2322 2288 err = nla_strscpy(hook->ifname, attr, IFNAMSIZ); 2323 2289 if (err < 0) 2324 - goto err_hook_dev; 2290 + goto err_hook_free; 2325 2291 2326 2292 hook->ifnamelen = nla_len(attr); 2327 2293 ··· 2330 2294 * indirectly serializing all the other holders of the commit_mutex with 2331 2295 * the rtnl_mutex. 2332 2296 */ 2333 - dev = __dev_get_by_name(net, hook->ifname); 2334 - if (!dev) { 2335 - err = -ENOENT; 2336 - goto err_hook_dev; 2337 - } 2338 - hook->ops.dev = dev; 2297 + for_each_netdev(net, dev) { 2298 + if (strncmp(dev->name, hook->ifname, hook->ifnamelen)) 2299 + continue; 2339 2300 2301 + ops = kzalloc(sizeof(struct nf_hook_ops), GFP_KERNEL_ACCOUNT); 2302 + if (!ops) { 2303 + err = -ENOMEM; 2304 + goto err_hook_free; 2305 + } 2306 + ops->dev = dev; 2307 + list_add_tail(&ops->list, &hook->ops_list); 2308 + } 2340 2309 return hook; 2341 2310 2342 - err_hook_dev: 2343 - kfree(hook); 2344 - err_hook_alloc: 2311 + err_hook_free: 2312 + nft_netdev_hook_free(hook); 2345 2313 return ERR_PTR(err); 2346 2314 } 2347 2315 ··· 2355 2315 struct nft_hook *hook; 2356 2316 2357 2317 list_for_each_entry(hook, hook_list, list) { 2358 - if (!strcmp(hook->ifname, this->ifname)) 2318 + if (!strncmp(hook->ifname, this->ifname, 2319 + min(hook->ifnamelen, this->ifnamelen))) 2359 2320 return hook; 2360 2321 } 2361 2322 ··· 2386 2345 } 2387 2346 if (nft_hook_list_find(hook_list, hook)) { 2388 2347 NL_SET_BAD_ATTR(extack, tmp); 2389 - kfree(hook); 2348 + nft_netdev_hook_free(hook); 2390 2349 err = -EEXIST; 2391 2350 goto err_hook; 2392 2351 } ··· 2404 2363 err_hook: 2405 2364 list_for_each_entry_safe(hook, next, hook_list, list) { 2406 2365 list_del(&hook->list); 2407 - kfree(hook); 2366 + nft_netdev_hook_free(hook); 2408 2367 } 2409 2368 return err; 2410 2369 } ··· 2547 2506 2548 2507 list_for_each_entry_safe(h, next, &hook->list, list) { 2549 2508 list_del(&h->list); 2550 - kfree(h); 2509 + nft_netdev_hook_free(h); 2551 2510 } 2552 2511 module_put(hook->type->owner); 2553 2512 } ··· 2600 2559 struct nft_chain_hook *hook, u32 flags) 2601 2560 { 2602 2561 struct nft_chain *chain; 2562 + struct nf_hook_ops *ops; 2603 2563 struct nft_hook *h; 2604 2564 2605 2565 basechain->type = hook->type; ··· 2609 2567 2610 2568 if (nft_base_chain_netdev(family, hook->num)) { 2611 2569 list_splice_init(&hook->list, &basechain->hook_list); 2612 - list_for_each_entry(h, &basechain->hook_list, list) 2613 - nft_basechain_hook_init(&h->ops, family, hook, chain); 2570 + list_for_each_entry(h, &basechain->hook_list, list) { 2571 + list_for_each_entry(ops, &h->ops_list, list) 2572 + nft_basechain_hook_init(ops, family, hook, chain); 2573 + } 2614 2574 } 2615 2575 nft_basechain_hook_init(&basechain->ops, family, hook, chain); 2616 2576 ··· 2831 2787 2832 2788 if (nft_base_chain_netdev(ctx->family, basechain->ops.hooknum)) { 2833 2789 list_for_each_entry_safe(h, next, &hook.list, list) { 2834 - h->ops.pf = basechain->ops.pf; 2835 - h->ops.hooknum = basechain->ops.hooknum; 2836 - h->ops.priority = basechain->ops.priority; 2837 - h->ops.priv = basechain->ops.priv; 2838 - h->ops.hook = basechain->ops.hook; 2790 + list_for_each_entry(ops, &h->ops_list, list) { 2791 + ops->pf = basechain->ops.pf; 2792 + ops->hooknum = basechain->ops.hooknum; 2793 + ops->priority = basechain->ops.priority; 2794 + ops->priv = basechain->ops.priv; 2795 + ops->hook = basechain->ops.hook; 2796 + } 2839 2797 2840 2798 if (nft_hook_list_find(&basechain->hook_list, h)) { 2841 2799 list_del(&h->list); 2842 - kfree(h); 2800 + nft_netdev_hook_free(h); 2843 2801 } 2844 2802 } 2845 2803 } else { ··· 2959 2913 err_hooks: 2960 2914 if (nla[NFTA_CHAIN_HOOK]) { 2961 2915 list_for_each_entry_safe(h, next, &hook.list, list) { 2962 - if (unregister) 2963 - nf_unregister_net_hook(ctx->net, &h->ops); 2916 + if (unregister) { 2917 + list_for_each_entry(ops, &h->ops_list, list) 2918 + nf_unregister_net_hook(ctx->net, ops); 2919 + } 2964 2920 list_del(&h->list); 2965 - kfree_rcu(h, rcu); 2921 + nft_netdev_hook_free_rcu(h); 2966 2922 } 2967 2923 module_put(hook.type->owner); 2968 2924 } ··· 8833 8785 struct netlink_ext_ack *extack, bool add) 8834 8786 { 8835 8787 struct nlattr *tb[NFTA_FLOWTABLE_HOOK_MAX + 1]; 8788 + struct nf_hook_ops *ops; 8836 8789 struct nft_hook *hook; 8837 8790 int hooknum, priority; 8838 8791 int err; ··· 8888 8839 } 8889 8840 8890 8841 list_for_each_entry(hook, &flowtable_hook->list, list) { 8891 - hook->ops.pf = NFPROTO_NETDEV; 8892 - hook->ops.hooknum = flowtable_hook->num; 8893 - hook->ops.priority = flowtable_hook->priority; 8894 - hook->ops.priv = &flowtable->data; 8895 - hook->ops.hook = flowtable->data.type->hook; 8842 + list_for_each_entry(ops, &hook->ops_list, list) { 8843 + ops->pf = NFPROTO_NETDEV; 8844 + ops->hooknum = flowtable_hook->num; 8845 + ops->priority = flowtable_hook->priority; 8846 + ops->priv = &flowtable->data; 8847 + ops->hook = flowtable->data.type->hook; 8848 + } 8896 8849 } 8897 8850 8898 8851 return err; ··· 8936 8885 } 8937 8886 8938 8887 /* Only called from error and netdev event paths. */ 8939 - static void nft_unregister_flowtable_hook(struct net *net, 8940 - struct nft_flowtable *flowtable, 8941 - struct nft_hook *hook) 8888 + static void nft_unregister_flowtable_ops(struct net *net, 8889 + struct nft_flowtable *flowtable, 8890 + struct nf_hook_ops *ops) 8942 8891 { 8943 - nf_unregister_net_hook(net, &hook->ops); 8944 - flowtable->data.type->setup(&flowtable->data, hook->ops.dev, 8892 + nf_unregister_net_hook(net, ops); 8893 + flowtable->data.type->setup(&flowtable->data, ops->dev, 8945 8894 FLOW_BLOCK_UNBIND); 8946 8895 } 8947 8896 ··· 8951 8900 bool release_netdev) 8952 8901 { 8953 8902 struct nft_hook *hook, *next; 8903 + struct nf_hook_ops *ops; 8954 8904 8955 8905 list_for_each_entry_safe(hook, next, hook_list, list) { 8956 - nf_unregister_net_hook(net, &hook->ops); 8957 - flowtable->data.type->setup(&flowtable->data, hook->ops.dev, 8958 - FLOW_BLOCK_UNBIND); 8906 + list_for_each_entry(ops, &hook->ops_list, list) 8907 + nft_unregister_flowtable_ops(net, flowtable, ops); 8959 8908 if (release_netdev) { 8960 8909 list_del(&hook->list); 8961 - kfree_rcu(hook, rcu); 8910 + nft_netdev_hook_free_rcu(hook); 8962 8911 } 8963 8912 } 8964 8913 } ··· 8970 8919 __nft_unregister_flowtable_net_hooks(net, flowtable, hook_list, false); 8971 8920 } 8972 8921 8922 + static int nft_register_flowtable_ops(struct net *net, 8923 + struct nft_flowtable *flowtable, 8924 + struct nf_hook_ops *ops) 8925 + { 8926 + int err; 8927 + 8928 + err = flowtable->data.type->setup(&flowtable->data, 8929 + ops->dev, FLOW_BLOCK_BIND); 8930 + if (err < 0) 8931 + return err; 8932 + 8933 + err = nf_register_net_hook(net, ops); 8934 + if (!err) 8935 + return 0; 8936 + 8937 + flowtable->data.type->setup(&flowtable->data, 8938 + ops->dev, FLOW_BLOCK_UNBIND); 8939 + return err; 8940 + } 8941 + 8973 8942 static int nft_register_flowtable_net_hooks(struct net *net, 8974 8943 struct nft_table *table, 8975 8944 struct list_head *hook_list, ··· 8997 8926 { 8998 8927 struct nft_hook *hook, *next; 8999 8928 struct nft_flowtable *ft; 8929 + struct nf_hook_ops *ops; 9000 8930 int err, i = 0; 9001 8931 9002 8932 list_for_each_entry(hook, hook_list, list) { ··· 9011 8939 } 9012 8940 } 9013 8941 9014 - err = flowtable->data.type->setup(&flowtable->data, 9015 - hook->ops.dev, 9016 - FLOW_BLOCK_BIND); 9017 - if (err < 0) 9018 - goto err_unregister_net_hooks; 8942 + list_for_each_entry(ops, &hook->ops_list, list) { 8943 + err = nft_register_flowtable_ops(net, flowtable, ops); 8944 + if (err < 0) 8945 + goto err_unregister_net_hooks; 9019 8946 9020 - err = nf_register_net_hook(net, &hook->ops); 9021 - if (err < 0) { 9022 - flowtable->data.type->setup(&flowtable->data, 9023 - hook->ops.dev, 9024 - FLOW_BLOCK_UNBIND); 9025 - goto err_unregister_net_hooks; 8947 + i++; 9026 8948 } 9027 - 9028 - i++; 9029 8949 } 9030 8950 9031 8951 return 0; 9032 8952 9033 8953 err_unregister_net_hooks: 9034 8954 list_for_each_entry_safe(hook, next, hook_list, list) { 9035 - if (i-- <= 0) 9036 - break; 8955 + list_for_each_entry(ops, &hook->ops_list, list) { 8956 + if (i-- <= 0) 8957 + break; 9037 8958 9038 - nft_unregister_flowtable_hook(net, flowtable, hook); 8959 + nft_unregister_flowtable_ops(net, flowtable, ops); 8960 + } 9039 8961 list_del_rcu(&hook->list); 9040 - kfree_rcu(hook, rcu); 8962 + nft_netdev_hook_free_rcu(hook); 9041 8963 } 9042 8964 9043 8965 return err; ··· 9043 8977 9044 8978 list_for_each_entry_safe(hook, next, hook_list, list) { 9045 8979 list_del_rcu(&hook->list); 9046 - kfree_rcu(hook, rcu); 8980 + nft_netdev_hook_free_rcu(hook); 9047 8981 } 9048 8982 } 9049 8983 ··· 9054 8988 const struct nlattr * const *nla = ctx->nla; 9055 8989 struct nft_flowtable_hook flowtable_hook; 9056 8990 struct nft_hook *hook, *next; 8991 + struct nf_hook_ops *ops; 9057 8992 struct nft_trans *trans; 9058 8993 bool unregister = false; 9059 8994 u32 flags; ··· 9068 9001 list_for_each_entry_safe(hook, next, &flowtable_hook.list, list) { 9069 9002 if (nft_hook_list_find(&flowtable->hook_list, hook)) { 9070 9003 list_del(&hook->list); 9071 - kfree(hook); 9004 + nft_netdev_hook_free(hook); 9072 9005 } 9073 9006 } 9074 9007 ··· 9112 9045 9113 9046 err_flowtable_update_hook: 9114 9047 list_for_each_entry_safe(hook, next, &flowtable_hook.list, list) { 9115 - if (unregister) 9116 - nft_unregister_flowtable_hook(ctx->net, flowtable, hook); 9048 + if (unregister) { 9049 + list_for_each_entry(ops, &hook->ops_list, list) 9050 + nft_unregister_flowtable_ops(ctx->net, 9051 + flowtable, ops); 9052 + } 9117 9053 list_del_rcu(&hook->list); 9118 - kfree_rcu(hook, rcu); 9054 + nft_netdev_hook_free_rcu(hook); 9119 9055 } 9120 9056 9121 9057 return err; ··· 9264 9194 9265 9195 list_for_each_entry_safe(this, next, &flowtable_hook->list, list) { 9266 9196 list_del(&this->list); 9267 - kfree(this); 9197 + nft_netdev_hook_free(this); 9268 9198 } 9269 9199 } 9270 9200 ··· 9627 9557 flowtable->data.type->free(&flowtable->data); 9628 9558 list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) { 9629 9559 list_del_rcu(&hook->list); 9630 - kfree_rcu(hook, rcu); 9560 + nft_netdev_hook_free_rcu(hook); 9631 9561 } 9632 9562 kfree(flowtable->name); 9633 9563 module_put(flowtable->data.type->owner); ··· 9660 9590 return -EMSGSIZE; 9661 9591 } 9662 9592 9663 - static void nft_flowtable_event(unsigned long event, struct net_device *dev, 9664 - struct nft_flowtable *flowtable) 9593 + struct nf_hook_ops *nft_hook_find_ops(const struct nft_hook *hook, 9594 + const struct net_device *dev) 9665 9595 { 9596 + struct nf_hook_ops *ops; 9597 + 9598 + list_for_each_entry(ops, &hook->ops_list, list) { 9599 + if (ops->dev == dev) 9600 + return ops; 9601 + } 9602 + return NULL; 9603 + } 9604 + EXPORT_SYMBOL_GPL(nft_hook_find_ops); 9605 + 9606 + struct nf_hook_ops *nft_hook_find_ops_rcu(const struct nft_hook *hook, 9607 + const struct net_device *dev) 9608 + { 9609 + struct nf_hook_ops *ops; 9610 + 9611 + list_for_each_entry_rcu(ops, &hook->ops_list, list) { 9612 + if (ops->dev == dev) 9613 + return ops; 9614 + } 9615 + return NULL; 9616 + } 9617 + EXPORT_SYMBOL_GPL(nft_hook_find_ops_rcu); 9618 + 9619 + static void 9620 + nf_tables_device_notify(const struct nft_table *table, int attr, 9621 + const char *name, const struct nft_hook *hook, 9622 + const struct net_device *dev, int event) 9623 + { 9624 + struct net *net = dev_net(dev); 9625 + struct nlmsghdr *nlh; 9626 + struct sk_buff *skb; 9627 + u16 flags = 0; 9628 + 9629 + if (!nfnetlink_has_listeners(net, NFNLGRP_NFT_DEV)) 9630 + return; 9631 + 9632 + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 9633 + if (!skb) 9634 + goto err; 9635 + 9636 + event = event == NETDEV_REGISTER ? NFT_MSG_NEWDEV : NFT_MSG_DELDEV; 9637 + event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); 9638 + nlh = nfnl_msg_put(skb, 0, 0, event, flags, table->family, 9639 + NFNETLINK_V0, nft_base_seq(net)); 9640 + if (!nlh) 9641 + goto err; 9642 + 9643 + if (nla_put_string(skb, NFTA_DEVICE_TABLE, table->name) || 9644 + nla_put_string(skb, attr, name) || 9645 + nla_put(skb, NFTA_DEVICE_SPEC, hook->ifnamelen, hook->ifname) || 9646 + nla_put_string(skb, NFTA_DEVICE_NAME, dev->name)) 9647 + goto err; 9648 + 9649 + nlmsg_end(skb, nlh); 9650 + nfnetlink_send(skb, net, 0, NFNLGRP_NFT_DEV, 9651 + nlmsg_report(nlh), GFP_KERNEL); 9652 + return; 9653 + err: 9654 + if (skb) 9655 + kfree_skb(skb); 9656 + nfnetlink_set_err(net, 0, NFNLGRP_NFT_DEV, -ENOBUFS); 9657 + } 9658 + 9659 + void 9660 + nf_tables_chain_device_notify(const struct nft_chain *chain, 9661 + const struct nft_hook *hook, 9662 + const struct net_device *dev, int event) 9663 + { 9664 + nf_tables_device_notify(chain->table, NFTA_DEVICE_CHAIN, 9665 + chain->name, hook, dev, event); 9666 + } 9667 + 9668 + static void 9669 + nf_tables_flowtable_device_notify(const struct nft_flowtable *ft, 9670 + const struct nft_hook *hook, 9671 + const struct net_device *dev, int event) 9672 + { 9673 + nf_tables_device_notify(ft->table, NFTA_DEVICE_FLOWTABLE, 9674 + ft->name, hook, dev, event); 9675 + } 9676 + 9677 + static int nft_flowtable_event(unsigned long event, struct net_device *dev, 9678 + struct nft_flowtable *flowtable, bool changename) 9679 + { 9680 + struct nf_hook_ops *ops; 9666 9681 struct nft_hook *hook; 9682 + bool match; 9667 9683 9668 9684 list_for_each_entry(hook, &flowtable->hook_list, list) { 9669 - if (hook->ops.dev != dev) 9670 - continue; 9685 + ops = nft_hook_find_ops(hook, dev); 9686 + match = !strncmp(hook->ifname, dev->name, hook->ifnamelen); 9671 9687 9672 - /* flow_offload_netdev_event() cleans up entries for us. */ 9673 - nft_unregister_flowtable_hook(dev_net(dev), flowtable, hook); 9674 - list_del_rcu(&hook->list); 9675 - kfree_rcu(hook, rcu); 9688 + switch (event) { 9689 + case NETDEV_UNREGISTER: 9690 + /* NOP if not found or new name still matching */ 9691 + if (!ops || (changename && match)) 9692 + continue; 9693 + 9694 + /* flow_offload_netdev_event() cleans up entries for us. */ 9695 + nft_unregister_flowtable_ops(dev_net(dev), 9696 + flowtable, ops); 9697 + list_del_rcu(&ops->list); 9698 + kfree_rcu(ops, rcu); 9699 + break; 9700 + case NETDEV_REGISTER: 9701 + /* NOP if not matching or already registered */ 9702 + if (!match || (changename && ops)) 9703 + continue; 9704 + 9705 + ops = kzalloc(sizeof(struct nf_hook_ops), 9706 + GFP_KERNEL_ACCOUNT); 9707 + if (!ops) 9708 + return 1; 9709 + 9710 + ops->pf = NFPROTO_NETDEV; 9711 + ops->hooknum = flowtable->hooknum; 9712 + ops->priority = flowtable->data.priority; 9713 + ops->priv = &flowtable->data; 9714 + ops->hook = flowtable->data.type->hook; 9715 + ops->dev = dev; 9716 + if (nft_register_flowtable_ops(dev_net(dev), 9717 + flowtable, ops)) { 9718 + kfree(ops); 9719 + return 1; 9720 + } 9721 + list_add_tail_rcu(&ops->list, &hook->ops_list); 9722 + break; 9723 + } 9724 + nf_tables_flowtable_device_notify(flowtable, hook, dev, event); 9676 9725 break; 9677 9726 } 9727 + return 0; 9728 + } 9729 + 9730 + static int __nf_tables_flowtable_event(unsigned long event, 9731 + struct net_device *dev, 9732 + bool changename) 9733 + { 9734 + struct nftables_pernet *nft_net = nft_pernet(dev_net(dev)); 9735 + struct nft_flowtable *flowtable; 9736 + struct nft_table *table; 9737 + 9738 + list_for_each_entry(table, &nft_net->tables, list) { 9739 + list_for_each_entry(flowtable, &table->flowtables, list) { 9740 + if (nft_flowtable_event(event, dev, 9741 + flowtable, changename)) 9742 + return 1; 9743 + } 9744 + } 9745 + return 0; 9678 9746 } 9679 9747 9680 9748 static int nf_tables_flowtable_event(struct notifier_block *this, 9681 9749 unsigned long event, void *ptr) 9682 9750 { 9683 9751 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 9684 - struct nft_flowtable *flowtable; 9685 9752 struct nftables_pernet *nft_net; 9686 - struct nft_table *table; 9753 + int ret = NOTIFY_DONE; 9687 9754 struct net *net; 9688 9755 9689 - if (event != NETDEV_UNREGISTER) 9690 - return 0; 9756 + if (event != NETDEV_REGISTER && 9757 + event != NETDEV_UNREGISTER && 9758 + event != NETDEV_CHANGENAME) 9759 + return NOTIFY_DONE; 9691 9760 9692 9761 net = dev_net(dev); 9693 9762 nft_net = nft_pernet(net); 9694 9763 mutex_lock(&nft_net->commit_mutex); 9695 - list_for_each_entry(table, &nft_net->tables, list) { 9696 - list_for_each_entry(flowtable, &table->flowtables, list) { 9697 - nft_flowtable_event(event, dev, flowtable); 9698 - } 9699 - } 9700 - mutex_unlock(&nft_net->commit_mutex); 9701 9764 9702 - return NOTIFY_DONE; 9765 + if (event == NETDEV_CHANGENAME) { 9766 + if (__nf_tables_flowtable_event(NETDEV_REGISTER, dev, true)) { 9767 + ret = NOTIFY_BAD; 9768 + goto out_unlock; 9769 + } 9770 + __nf_tables_flowtable_event(NETDEV_UNREGISTER, dev, true); 9771 + } else if (__nf_tables_flowtable_event(event, dev, false)) { 9772 + ret = NOTIFY_BAD; 9773 + } 9774 + out_unlock: 9775 + mutex_unlock(&nft_net->commit_mutex); 9776 + return ret; 9703 9777 } 9704 9778 9705 9779 static struct notifier_block nf_tables_flowtable_notifier = {
+28 -21
net/netfilter/nf_tables_offload.c
··· 220 220 221 221 bool nft_chain_offload_support(const struct nft_base_chain *basechain) 222 222 { 223 + struct nf_hook_ops *ops; 223 224 struct net_device *dev; 224 225 struct nft_hook *hook; 225 226 ··· 228 227 return false; 229 228 230 229 list_for_each_entry(hook, &basechain->hook_list, list) { 231 - if (hook->ops.pf != NFPROTO_NETDEV || 232 - hook->ops.hooknum != NF_NETDEV_INGRESS) 233 - return false; 230 + list_for_each_entry(ops, &hook->ops_list, list) { 231 + if (ops->pf != NFPROTO_NETDEV || 232 + ops->hooknum != NF_NETDEV_INGRESS) 233 + return false; 234 234 235 - dev = hook->ops.dev; 236 - if (!dev->netdev_ops->ndo_setup_tc && !flow_indr_dev_exists()) 237 - return false; 235 + dev = ops->dev; 236 + if (!dev->netdev_ops->ndo_setup_tc && 237 + !flow_indr_dev_exists()) 238 + return false; 239 + } 238 240 } 239 241 240 242 return true; ··· 459 455 const struct net_device *this_dev, 460 456 enum flow_block_command cmd) 461 457 { 462 - struct net_device *dev; 458 + struct nf_hook_ops *ops; 463 459 struct nft_hook *hook; 464 460 int err, i = 0; 465 461 466 462 list_for_each_entry(hook, &basechain->hook_list, list) { 467 - dev = hook->ops.dev; 468 - if (this_dev && this_dev != dev) 469 - continue; 463 + list_for_each_entry(ops, &hook->ops_list, list) { 464 + if (this_dev && this_dev != ops->dev) 465 + continue; 470 466 471 - err = nft_chain_offload_cmd(basechain, dev, cmd); 472 - if (err < 0 && cmd == FLOW_BLOCK_BIND) { 473 - if (!this_dev) 474 - goto err_flow_block; 467 + err = nft_chain_offload_cmd(basechain, ops->dev, cmd); 468 + if (err < 0 && cmd == FLOW_BLOCK_BIND) { 469 + if (!this_dev) 470 + goto err_flow_block; 475 471 476 - return err; 472 + return err; 473 + } 474 + i++; 477 475 } 478 - i++; 479 476 } 480 477 481 478 return 0; 482 479 483 480 err_flow_block: 484 481 list_for_each_entry(hook, &basechain->hook_list, list) { 485 - if (i-- <= 0) 486 - break; 482 + list_for_each_entry(ops, &hook->ops_list, list) { 483 + if (i-- <= 0) 484 + break; 487 485 488 - dev = hook->ops.dev; 489 - nft_chain_offload_cmd(basechain, dev, FLOW_BLOCK_UNBIND); 486 + nft_chain_offload_cmd(basechain, ops->dev, 487 + FLOW_BLOCK_UNBIND); 488 + } 490 489 } 491 490 return err; 492 491 } ··· 645 638 found = NULL; 646 639 basechain = nft_base_chain(chain); 647 640 list_for_each_entry(hook, &basechain->hook_list, list) { 648 - if (hook->ops.dev != dev) 641 + if (!nft_hook_find_ops(hook, dev)) 649 642 continue; 650 643 651 644 found = hook;
+53 -1
net/netfilter/nf_tables_trace.c
··· 15 15 #include <linux/netfilter.h> 16 16 #include <linux/netfilter/nfnetlink.h> 17 17 #include <linux/netfilter/nf_tables.h> 18 + #include <net/netfilter/nf_conntrack.h> 18 19 #include <net/netfilter/nf_tables_core.h> 19 20 #include <net/netfilter/nf_tables.h> 20 21 ··· 85 84 86 85 if (nla_put_be16(nlskb, NFTA_TRACE_OIFTYPE, 87 86 htons(outdev->type))) 87 + return -1; 88 + } 89 + 90 + return 0; 91 + } 92 + 93 + static int nf_trace_fill_ct_info(struct sk_buff *nlskb, 94 + const struct sk_buff *skb) 95 + { 96 + const struct nf_ct_hook *ct_hook; 97 + enum ip_conntrack_info ctinfo; 98 + const struct nf_conn *ct; 99 + u32 state; 100 + 101 + ct_hook = rcu_dereference(nf_ct_hook); 102 + if (!ct_hook) 103 + return 0; 104 + 105 + ct = nf_ct_get(skb, &ctinfo); 106 + if (!ct) { 107 + if (ctinfo != IP_CT_UNTRACKED) /* not seen by conntrack or invalid */ 108 + return 0; 109 + 110 + state = NF_CT_STATE_UNTRACKED_BIT; 111 + } else { 112 + state = NF_CT_STATE_BIT(ctinfo); 113 + } 114 + 115 + if (nla_put_be32(nlskb, NFTA_TRACE_CT_STATE, htonl(state))) 116 + return -1; 117 + 118 + if (ct) { 119 + u32 id = ct_hook->get_id(&ct->ct_general); 120 + u32 status = READ_ONCE(ct->status); 121 + u8 dir = CTINFO2DIR(ctinfo); 122 + 123 + if (nla_put_u8(nlskb, NFTA_TRACE_CT_DIRECTION, dir)) 124 + return -1; 125 + 126 + if (nla_put_be32(nlskb, NFTA_TRACE_CT_ID, (__force __be32)id)) 127 + return -1; 128 + 129 + if (status && nla_put_be32(nlskb, NFTA_TRACE_CT_STATUS, htonl(status))) 88 130 return -1; 89 131 } 90 132 ··· 254 210 nla_total_size(sizeof(__be32)) + /* trace type */ 255 211 nla_total_size(0) + /* VERDICT, nested */ 256 212 nla_total_size(sizeof(u32)) + /* verdict code */ 257 - nla_total_size(sizeof(u32)) + /* id */ 213 + nla_total_size(sizeof(u32)) + /* ct id */ 214 + nla_total_size(sizeof(u8)) + /* ct direction */ 215 + nla_total_size(sizeof(u32)) + /* ct state */ 216 + nla_total_size(sizeof(u32)) + /* ct status */ 217 + nla_total_size(sizeof(u32)) + /* trace id */ 258 218 nla_total_size(NFT_TRACETYPE_LL_HSIZE) + 259 219 nla_total_size(NFT_TRACETYPE_NETWORK_HSIZE) + 260 220 nla_total_size(NFT_TRACETYPE_TRANSPORT_HSIZE) + ··· 339 291 340 292 if (nf_trace_fill_pkt_info(skb, pkt)) 341 293 goto nla_put_failure; 294 + 295 + if (nf_trace_fill_ct_info(skb, pkt->skb)) 296 + goto nla_put_failure; 297 + 342 298 info->packet_dumped = true; 343 299 } 344 300
+1
net/netfilter/nfnetlink.c
··· 86 86 [NFNLGRP_NFTABLES] = NFNL_SUBSYS_NFTABLES, 87 87 [NFNLGRP_ACCT_QUOTA] = NFNL_SUBSYS_ACCT, 88 88 [NFNLGRP_NFTRACE] = NFNL_SUBSYS_NFTABLES, 89 + [NFNLGRP_NFT_DEV] = NFNL_SUBSYS_NFTABLES, 89 90 }; 90 91 91 92 static struct nfnl_net *nfnl_pernet(struct net *net)
+76 -18
net/netfilter/nft_chain_filter.c
··· 318 318 }, 319 319 }; 320 320 321 - static void nft_netdev_event(unsigned long event, struct net_device *dev, 322 - struct nft_base_chain *basechain) 321 + static int nft_netdev_event(unsigned long event, struct net_device *dev, 322 + struct nft_base_chain *basechain, bool changename) 323 323 { 324 + struct nft_table *table = basechain->chain.table; 325 + struct nf_hook_ops *ops; 324 326 struct nft_hook *hook; 327 + bool match; 325 328 326 329 list_for_each_entry(hook, &basechain->hook_list, list) { 327 - if (hook->ops.dev != dev) 328 - continue; 330 + ops = nft_hook_find_ops(hook, dev); 331 + match = !strncmp(hook->ifname, dev->name, hook->ifnamelen); 329 332 330 - if (!(basechain->chain.table->flags & NFT_TABLE_F_DORMANT)) 331 - nf_unregister_net_hook(dev_net(dev), &hook->ops); 333 + switch (event) { 334 + case NETDEV_UNREGISTER: 335 + /* NOP if not found or new name still matching */ 336 + if (!ops || (changename && match)) 337 + continue; 332 338 333 - list_del_rcu(&hook->list); 334 - kfree_rcu(hook, rcu); 339 + if (!(table->flags & NFT_TABLE_F_DORMANT)) 340 + nf_unregister_net_hook(dev_net(dev), ops); 341 + 342 + list_del_rcu(&ops->list); 343 + kfree_rcu(ops, rcu); 344 + break; 345 + case NETDEV_REGISTER: 346 + /* NOP if not matching or already registered */ 347 + if (!match || (changename && ops)) 348 + continue; 349 + 350 + ops = kmemdup(&basechain->ops, 351 + sizeof(struct nf_hook_ops), 352 + GFP_KERNEL_ACCOUNT); 353 + if (!ops) 354 + return 1; 355 + 356 + ops->dev = dev; 357 + 358 + if (!(table->flags & NFT_TABLE_F_DORMANT) && 359 + nf_register_net_hook(dev_net(dev), ops)) { 360 + kfree(ops); 361 + return 1; 362 + } 363 + list_add_tail_rcu(&ops->list, &hook->ops_list); 364 + break; 365 + } 366 + nf_tables_chain_device_notify(&basechain->chain, 367 + hook, dev, event); 335 368 break; 336 369 } 370 + return 0; 337 371 } 338 372 339 - static int nf_tables_netdev_event(struct notifier_block *this, 340 - unsigned long event, void *ptr) 373 + static int __nf_tables_netdev_event(unsigned long event, 374 + struct net_device *dev, 375 + bool changename) 341 376 { 342 - struct net_device *dev = netdev_notifier_info_to_dev(ptr); 343 377 struct nft_base_chain *basechain; 344 378 struct nftables_pernet *nft_net; 345 379 struct nft_chain *chain; 346 380 struct nft_table *table; 347 381 348 - if (event != NETDEV_UNREGISTER) 349 - return NOTIFY_DONE; 350 - 351 382 nft_net = nft_pernet(dev_net(dev)); 352 - mutex_lock(&nft_net->commit_mutex); 353 383 list_for_each_entry(table, &nft_net->tables, list) { 354 384 if (table->family != NFPROTO_NETDEV && 355 385 table->family != NFPROTO_INET) ··· 394 364 basechain->ops.hooknum != NF_INET_INGRESS) 395 365 continue; 396 366 397 - nft_netdev_event(event, dev, basechain); 367 + if (nft_netdev_event(event, dev, basechain, changename)) 368 + return 1; 398 369 } 399 370 } 400 - mutex_unlock(&nft_net->commit_mutex); 371 + return 0; 372 + } 401 373 402 - return NOTIFY_DONE; 374 + static int nf_tables_netdev_event(struct notifier_block *this, 375 + unsigned long event, void *ptr) 376 + { 377 + struct net_device *dev = netdev_notifier_info_to_dev(ptr); 378 + struct nftables_pernet *nft_net; 379 + int ret = NOTIFY_DONE; 380 + 381 + if (event != NETDEV_REGISTER && 382 + event != NETDEV_UNREGISTER && 383 + event != NETDEV_CHANGENAME) 384 + return NOTIFY_DONE; 385 + 386 + nft_net = nft_pernet(dev_net(dev)); 387 + mutex_lock(&nft_net->commit_mutex); 388 + 389 + if (event == NETDEV_CHANGENAME) { 390 + if (__nf_tables_netdev_event(NETDEV_REGISTER, dev, true)) { 391 + ret = NOTIFY_BAD; 392 + goto out_unlock; 393 + } 394 + __nf_tables_netdev_event(NETDEV_UNREGISTER, dev, true); 395 + } else if (__nf_tables_netdev_event(event, dev, false)) { 396 + ret = NOTIFY_BAD; 397 + } 398 + out_unlock: 399 + mutex_unlock(&nft_net->commit_mutex); 400 + return ret; 403 401 } 404 402 405 403 static struct notifier_block nf_tables_netdev_notifier = {
+1 -1
net/netfilter/nft_flow_offload.c
··· 175 175 bool found = false; 176 176 177 177 list_for_each_entry_rcu(hook, &ft->hook_list, list) { 178 - if (hook->ops.dev != dev) 178 + if (!nft_hook_find_ops_rcu(hook, dev)) 179 179 continue; 180 180 181 181 found = true;
+15 -3
net/netfilter/nft_inner.c
··· 23 23 #include <linux/ip.h> 24 24 #include <linux/ipv6.h> 25 25 26 - static DEFINE_PER_CPU(struct nft_inner_tun_ctx, nft_pcpu_tun_ctx); 26 + struct nft_inner_tun_ctx_locked { 27 + struct nft_inner_tun_ctx ctx; 28 + local_lock_t bh_lock; 29 + }; 30 + 31 + static DEFINE_PER_CPU(struct nft_inner_tun_ctx_locked, nft_pcpu_tun_ctx) = { 32 + .bh_lock = INIT_LOCAL_LOCK(bh_lock), 33 + }; 27 34 28 35 /* Same layout as nft_expr but it embeds the private expression data area. */ 29 36 struct __nft_expr { ··· 244 237 struct nft_inner_tun_ctx *this_cpu_tun_ctx; 245 238 246 239 local_bh_disable(); 247 - this_cpu_tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx); 240 + local_lock_nested_bh(&nft_pcpu_tun_ctx.bh_lock); 241 + this_cpu_tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx.ctx); 248 242 if (this_cpu_tun_ctx->cookie != (unsigned long)pkt->skb) { 249 243 local_bh_enable(); 244 + local_unlock_nested_bh(&nft_pcpu_tun_ctx.bh_lock); 250 245 return false; 251 246 } 252 247 *tun_ctx = *this_cpu_tun_ctx; 248 + local_unlock_nested_bh(&nft_pcpu_tun_ctx.bh_lock); 253 249 local_bh_enable(); 254 250 255 251 return true; ··· 264 254 struct nft_inner_tun_ctx *this_cpu_tun_ctx; 265 255 266 256 local_bh_disable(); 267 - this_cpu_tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx); 257 + local_lock_nested_bh(&nft_pcpu_tun_ctx.bh_lock); 258 + this_cpu_tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx.ctx); 268 259 if (this_cpu_tun_ctx->cookie != tun_ctx->cookie) 269 260 *this_cpu_tun_ctx = *tun_ctx; 261 + local_unlock_nested_bh(&nft_pcpu_tun_ctx.bh_lock); 270 262 local_bh_enable(); 271 263 } 272 264
+4 -4
net/netfilter/nft_tunnel.c
··· 621 621 struct geneve_opt *opt; 622 622 int offset = 0; 623 623 624 - inner = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS_GENEVE); 625 - if (!inner) 626 - goto failure; 627 624 while (opts->len > offset) { 625 + inner = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS_GENEVE); 626 + if (!inner) 627 + goto failure; 628 628 opt = (struct geneve_opt *)(opts->u.data + offset); 629 629 if (nla_put_be16(skb, NFTA_TUNNEL_KEY_GENEVE_CLASS, 630 630 opt->opt_class) || ··· 634 634 opt->length * 4, opt->opt_data)) 635 635 goto inner_failure; 636 636 offset += sizeof(*opt) + opt->length * 4; 637 + nla_nest_end(skb, inner); 637 638 } 638 - nla_nest_end(skb, inner); 639 639 } 640 640 nla_nest_end(skb, nest); 641 641 return 0;
+2 -2
net/netfilter/xt_TCPOPTSTRIP.c
··· 91 91 return tcpoptstrip_mangle_packet(skb, par, ip_hdrlen(skb)); 92 92 } 93 93 94 - #if IS_ENABLED(CONFIG_IP6_NF_MANGLE) 94 + #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) 95 95 static unsigned int 96 96 tcpoptstrip_tg6(struct sk_buff *skb, const struct xt_action_param *par) 97 97 { ··· 119 119 .targetsize = sizeof(struct xt_tcpoptstrip_target_info), 120 120 .me = THIS_MODULE, 121 121 }, 122 - #if IS_ENABLED(CONFIG_IP6_NF_MANGLE) 122 + #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) 123 123 { 124 124 .name = "TCPOPTSTRIP", 125 125 .family = NFPROTO_IPV6,
+1 -1
net/netfilter/xt_mark.c
··· 48 48 .targetsize = sizeof(struct xt_mark_tginfo2), 49 49 .me = THIS_MODULE, 50 50 }, 51 - #if IS_ENABLED(CONFIG_IP_NF_ARPTABLES) 51 + #if IS_ENABLED(CONFIG_IP_NF_ARPTABLES) || IS_ENABLED(CONFIG_NFT_COMPAT_ARP) 52 52 { 53 53 .name = "MARK", 54 54 .revision = 2,
+1
tools/testing/selftests/net/netfilter/Makefile
··· 24 24 TEST_PROGS += nft_conntrack_helper.sh 25 25 TEST_PROGS += nft_fib.sh 26 26 TEST_PROGS += nft_flowtable.sh 27 + TEST_PROGS += nft_interface_stress.sh 27 28 TEST_PROGS += nft_meta.sh 28 29 TEST_PROGS += nft_nat.sh 29 30 TEST_PROGS += nft_nat_zones.sh
-34
tools/testing/selftests/net/netfilter/conntrack_vrf.sh
··· 32 32 33 33 IP0=172.30.30.1 34 34 IP1=172.30.30.2 35 - DUMMYNET=10.9.9 36 35 PFXL=30 37 36 ret=0 38 37 ··· 51 52 52 53 setup_ns ns0 ns1 53 54 54 - ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.forwarding=1 55 - 56 55 if ! ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1; then 57 56 echo "SKIP: Could not add veth device" 58 57 exit $ksft_skip ··· 61 64 exit $ksft_skip 62 65 fi 63 66 64 - ip -net "$ns0" link add dummy0 type dummy 65 - 66 67 ip -net "$ns0" li set veth0 master tvrf 67 - ip -net "$ns0" li set dummy0 master tvrf 68 68 ip -net "$ns0" li set tvrf up 69 69 ip -net "$ns0" li set veth0 up 70 - ip -net "$ns0" li set dummy0 up 71 70 ip -net "$ns1" li set veth0 up 72 71 73 72 ip -net "$ns0" addr add $IP0/$PFXL dev veth0 74 73 ip -net "$ns1" addr add $IP1/$PFXL dev veth0 75 - ip -net "$ns0" addr add $DUMMYNET.1/$PFXL dev dummy0 76 74 77 75 listener_ready() 78 76 { ··· 208 216 fi 209 217 } 210 218 211 - test_fib() 212 - { 213 - ip netns exec "$ns0" nft -f - <<EOF 214 - flush ruleset 215 - table ip t { 216 - counter fibcount { } 217 - 218 - chain prerouting { 219 - type filter hook prerouting priority 0; 220 - meta iifname veth0 ip daddr $DUMMYNET.2 fib daddr oif dummy0 counter name fibcount notrack 221 - } 222 - } 223 - EOF 224 - ip -net "$ns1" route add 10.9.9.0/24 via "$IP0" dev veth0 225 - ip netns exec "$ns1" ping -q -w 1 -c 1 "$DUMMYNET".2 > /dev/null 226 - 227 - if ip netns exec "$ns0" nft list counter t fibcount | grep -q "packets 1"; then 228 - echo "PASS: fib lookup returned exepected output interface" 229 - else 230 - echo "FAIL: fib lookup did not return exepected output interface" 231 - ret=1 232 - return 233 - fi 234 - } 235 - 236 219 test_ct_zone_in 237 220 test_masquerade_vrf "default" 238 221 test_masquerade_vrf "pfifo" 239 222 test_masquerade_veth 240 - test_fib 241 223 242 224 exit $ret
+161 -4
tools/testing/selftests/net/netfilter/nft_concat_range.sh
··· 15 15 # Available test groups: 16 16 # - reported_issues: check for issues that were reported in the past 17 17 # - correctness: check that packets match given entries, and only those 18 + # - correctness_large: same but with additional non-matching entries 18 19 # - concurrency: attempt races between insertion, deletion and lookup 19 20 # - timeout: check that packets match entries until they expire 20 21 # - performance: estimate matching rate, compare with rbtree and hash baselines 21 - TESTS="reported_issues correctness concurrency timeout" 22 + TESTS="reported_issues correctness correctness_large concurrency timeout" 23 + 22 24 [ -n "$NFT_CONCAT_RANGE_TESTS" ] && TESTS="${NFT_CONCAT_RANGE_TESTS}" 23 25 24 26 # Set types, defined by TYPE_ variables below ··· 1259 1257 # - add ranged element, check that packets match it 1260 1258 # - check that packets outside range don't match it 1261 1259 # - remove some elements, check that packets don't match anymore 1262 - test_correctness() { 1263 - setup veth send_"${proto}" set || return ${ksft_skip} 1264 - 1260 + test_correctness_main() { 1265 1261 range_size=1 1266 1262 for i in $(seq "${start}" $((start + count))); do 1267 1263 end=$((start + range_size)) ··· 1291 1291 range_size=$((range_size + 1)) 1292 1292 start=$((end + range_size)) 1293 1293 done 1294 + } 1295 + 1296 + test_correctness() { 1297 + setup veth send_"${proto}" set || return ${ksft_skip} 1298 + 1299 + test_correctness_main 1300 + } 1301 + 1302 + # Repeat the correctness tests, but add extra non-matching entries. 1303 + # This exercises the more compact '4 bit group' representation that 1304 + # gets picked when the default 8-bit representation exceed 1305 + # NFT_PIPAPO_LT_SIZE_HIGH bytes of memory. 1306 + # See usage of NFT_PIPAPO_LT_SIZE_HIGH in pipapo_lt_bits_adjust(). 1307 + # 1308 + # The format() helper is way too slow when generating lots of 1309 + # entries so its not used here. 1310 + test_correctness_large() { 1311 + setup veth send_"${proto}" set || return ${ksft_skip} 1312 + # number of dummy (filler) entries to add. 1313 + local dcount=16385 1314 + 1315 + ( 1316 + echo -n "add element inet filter test { " 1317 + 1318 + case "$type_spec" in 1319 + "ether_addr . ipv4_addr") 1320 + for i in $(seq 1 $dcount); do 1321 + [ $i -gt 1 ] && echo ", " 1322 + format_mac $((1000000 + i)) 1323 + printf ". 172.%i.%i.%i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) 1324 + done 1325 + ;; 1326 + "inet_proto . ipv6_addr") 1327 + for i in $(seq 1 $dcount); do 1328 + [ $i -gt 1 ] && echo ", " 1329 + printf "%i . " $((RANDOM%256)) 1330 + format_addr6 $((1000000 + i)) 1331 + done 1332 + ;; 1333 + "inet_service . inet_proto") 1334 + # smaller key sizes, need more entries to hit the 1335 + # 4-bit threshold. 1336 + dcount=65536 1337 + for i in $(seq 1 $dcount); do 1338 + local proto=$((RANDOM%256)) 1339 + 1340 + # Test uses UDP to match, as it also fails when matching 1341 + # an entry that doesn't exist, so skip 'udp' entries 1342 + # to not trigger a wrong failure. 1343 + [ $proto -eq 17 ] && proto=18 1344 + [ $i -gt 1 ] && echo ", " 1345 + printf "%i . %i " $(((i%65534) + 1)) $((proto)) 1346 + done 1347 + ;; 1348 + "inet_service . ipv4_addr") 1349 + dcount=32768 1350 + for i in $(seq 1 $dcount); do 1351 + [ $i -gt 1 ] && echo ", " 1352 + printf "%i . 172.%i.%i.%i " $(((RANDOM%65534) + 1)) $((RANDOM%256)) $((RANDOM%256)) $((i%256)) 1353 + done 1354 + ;; 1355 + "ipv4_addr . ether_addr") 1356 + for i in $(seq 1 $dcount); do 1357 + [ $i -gt 1 ] && echo ", " 1358 + printf "172.%i.%i.%i . " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) 1359 + format_mac $((1000000 + i)) 1360 + done 1361 + ;; 1362 + "ipv4_addr . inet_service") 1363 + dcount=32768 1364 + for i in $(seq 1 $dcount); do 1365 + [ $i -gt 1 ] && echo ", " 1366 + printf "172.%i.%i.%i . %i" $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) 1367 + done 1368 + ;; 1369 + "ipv4_addr . inet_service . ether_addr . inet_proto . ipv4_addr") 1370 + dcount=65536 1371 + for i in $(seq 1 $dcount); do 1372 + [ $i -gt 1 ] && echo ", " 1373 + printf "172.%i.%i.%i . %i . " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) 1374 + format_mac $((1000000 + i)) 1375 + printf ". %i . 192.168.%i.%i" $((RANDOM%256)) $((RANDOM%256)) $((i%256)) 1376 + done 1377 + ;; 1378 + "ipv4_addr . inet_service . inet_proto") 1379 + for i in $(seq 1 $dcount); do 1380 + [ $i -gt 1 ] && echo ", " 1381 + printf "172.%i.%i.%i . %i . %i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) $((RANDOM%256)) 1382 + done 1383 + ;; 1384 + "ipv4_addr . inet_service . inet_proto . ipv4_addr") 1385 + for i in $(seq 1 $dcount); do 1386 + [ $i -gt 1 ] && echo ", " 1387 + printf "172.%i.%i.%i . %i . %i . 192.168.%i.%i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) $((RANDOM%256)) $((RANDOM%256)) $((RANDOM%256)) 1388 + done 1389 + ;; 1390 + "ipv4_addr . inet_service . ipv4_addr") 1391 + dcount=32768 1392 + for i in $(seq 1 $dcount); do 1393 + [ $i -gt 1 ] && echo ", " 1394 + printf "172.%i.%i.%i . %i . 192.168.%i.%i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) $((RANDOM%256)) $((RANDOM%256)) 1395 + done 1396 + ;; 1397 + "ipv6_addr . ether_addr") 1398 + for i in $(seq 1 $dcount); do 1399 + [ $i -gt 1 ] && echo ", " 1400 + format_addr6 $((i + 1000000)) 1401 + echo -n " . " 1402 + format_mac $((1000000 + i)) 1403 + done 1404 + ;; 1405 + "ipv6_addr . inet_service") 1406 + dcount=32768 1407 + for i in $(seq 1 $dcount); do 1408 + [ $i -gt 1 ] && echo ", " 1409 + format_addr6 $((i + 1000000)) 1410 + echo -n " . $(((RANDOM%65534) + 1))" 1411 + done 1412 + ;; 1413 + "ipv6_addr . inet_service . ether_addr") 1414 + dcount=32768 1415 + for i in $(seq 1 $dcount); do 1416 + [ $i -gt 1 ] && echo ", " 1417 + format_addr6 $((i + 1000000)) 1418 + echo -n " . $(((RANDOM%65534) + 1)) . " 1419 + format_mac $((i + 1000000)) 1420 + done 1421 + ;; 1422 + "ipv6_addr . inet_service . ether_addr . inet_proto") 1423 + dcount=65536 1424 + for i in $(seq 1 $dcount); do 1425 + [ $i -gt 1 ] && echo ", " 1426 + format_addr6 $((i + 1000000)) 1427 + echo -n " . $(((RANDOM%65534) + 1)) . " 1428 + format_mac $((i + 1000000)) 1429 + echo -n " . $((RANDOM%256))" 1430 + done 1431 + ;; 1432 + "ipv6_addr . inet_service . ipv6_addr . inet_service") 1433 + dcount=32768 1434 + for i in $(seq 1 $dcount); do 1435 + [ $i -gt 1 ] && echo ", " 1436 + format_addr6 $((i + 1000000)) 1437 + echo -n " . $(((RANDOM%65534) + 1)) . " 1438 + format_addr6 $((i + 2123456)) 1439 + echo -n " . $((RANDOM%256))" 1440 + done 1441 + ;; 1442 + *) 1443 + "Unhandled $type_spec" 1444 + return 1 1445 + esac 1446 + echo -n "}" 1447 + 1448 + ) | nft -f - || return 1 1449 + 1450 + test_correctness_main 1294 1451 } 1295 1452 1296 1453 # Concurrency test template:
+602 -10
tools/testing/selftests/net/netfilter/nft_fib.sh
··· 3 3 # This tests the fib expression. 4 4 # 5 5 # Kselftest framework requirement - SKIP code is 4. 6 + # 7 + # 10.0.1.99 10.0.1.1 10.0.2.1 10.0.2.99 8 + # dead:1::99 dead:1::1 dead:2::1 dead:2::99 9 + # ns1 <-------> [ veth0 ] nsrouter [veth1] <-------> ns2 6 10 7 11 source lib.sh 8 12 ··· 76 72 EOF 77 73 } 78 74 75 + load_type_ruleset() { 76 + local netns=$1 77 + 78 + for family in ip ip6;do 79 + ip netns exec "$netns" nft -f /dev/stdin <<EOF 80 + table $family filter { 81 + chain type_match_in { 82 + fib daddr type local counter comment "daddr configured on other iface" 83 + fib daddr . iif type local counter comment "daddr configured on iif" 84 + fib daddr type unicast counter comment "daddr not local" 85 + fib daddr . iif type unicast counter comment "daddr not configured on iif" 86 + } 87 + 88 + chain type_match_out { 89 + fib daddr type unicast counter 90 + fib daddr . oif type unicast counter 91 + fib daddr type local counter 92 + fib daddr . oif type local counter 93 + } 94 + 95 + chain prerouting { 96 + type filter hook prerouting priority 0; 97 + icmp type echo-request counter jump type_match_in 98 + icmpv6 type echo-request counter jump type_match_in 99 + } 100 + 101 + chain input { 102 + type filter hook input priority 0; 103 + icmp type echo-request counter jump type_match_in 104 + icmpv6 type echo-request counter jump type_match_in 105 + } 106 + 107 + chain forward { 108 + type filter hook forward priority 0; 109 + icmp type echo-request counter jump type_match_in 110 + icmpv6 type echo-request counter jump type_match_in 111 + } 112 + 113 + chain output { 114 + type filter hook output priority 0; 115 + icmp type echo-request counter jump type_match_out 116 + icmpv6 type echo-request counter jump type_match_out 117 + } 118 + 119 + chain postrouting { 120 + type filter hook postrouting priority 0; 121 + icmp type echo-request counter jump type_match_out 122 + icmpv6 type echo-request counter jump type_match_out 123 + } 124 + } 125 + EOF 126 + done 127 + } 128 + 129 + reload_type_ruleset() { 130 + ip netns exec "$1" nft flush table ip filter 131 + ip netns exec "$1" nft flush table ip6 filter 132 + load_type_ruleset "$1" 133 + } 134 + 135 + check_fib_type_counter_family() { 136 + local family="$1" 137 + local want="$2" 138 + local ns="$3" 139 + local chain="$4" 140 + local what="$5" 141 + local errmsg="$6" 142 + 143 + if ! ip netns exec "$ns" nft list chain "$family" filter "$chain" | grep "$what" | grep -q "packets $want";then 144 + echo "Netns $ns $family fib type counter doesn't match expected packet count of $want for $what $errmsg" 1>&2 145 + ip netns exec "$ns" nft list chain "$family" filter "$chain" 146 + ret=1 147 + return 1 148 + fi 149 + 150 + return 0 151 + } 152 + 153 + check_fib_type_counter() { 154 + check_fib_type_counter_family "ip" "$@" || return 1 155 + check_fib_type_counter_family "ip6" "$@" || return 1 156 + } 157 + 79 158 load_ruleset_count() { 80 159 local netns=$1 81 160 ··· 177 90 if dmesg | grep -q ' nft_rpfilter: ';then 178 91 dmesg | grep ' nft_rpfilter: ' 179 92 echo "FAIL: rpfilter did drop packets" 93 + ret=1 180 94 return 1 181 95 fi 182 96 ··· 252 164 return 0 253 165 } 254 166 167 + test_ping_unreachable() { 168 + local daddr4=$1 169 + local daddr6=$2 170 + 171 + if ip netns exec "$ns1" ping -c 1 -w 1 -q "$daddr4" > /dev/null; then 172 + echo "FAIL: ${ns1} could reach $daddr4" 1>&2 173 + return 1 174 + fi 175 + 176 + if ip netns exec "$ns1" ping -c 1 -w 1 -q "$daddr6" > /dev/null; then 177 + echo "FAIL: ${ns1} could reach $daddr6" 1>&2 178 + return 1 179 + fi 180 + 181 + return 0 182 + } 183 + 184 + test_fib_type() { 185 + local notice="$1" 186 + local errmsg="addr-on-if" 187 + local lret=0 188 + 189 + if ! load_type_ruleset "$nsrouter";then 190 + echo "SKIP: Could not load fib type ruleset" 191 + [ $ret -eq 0 ] && ret=$ksft_skip 192 + return 193 + fi 194 + 195 + # makes router receive packet for addresses configured on incoming 196 + # interface. 197 + test_ping 10.0.1.1 dead:1::1 || return 1 198 + 199 + # expectation: triggers all 'local' in prerouting/input. 200 + check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr type local" "$errmsg" || lret=1 201 + check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr . iif type local" "$errmsg" || lret=1 202 + 203 + reload_type_ruleset "$nsrouter" 204 + # makes router receive packet for address configured on a different (but local) 205 + # interface. 206 + test_ping 10.0.2.1 dead:2::1 || return 1 207 + 208 + # expectation: triggers 'unicast' in prerouting/input for daddr . iif and local for 'daddr'. 209 + errmsg="addr-on-host" 210 + check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr type local" "$errmsg" || lret=1 211 + check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr . iif type unicast" "$errmsg" || lret=1 212 + 213 + reload_type_ruleset "$nsrouter" 214 + test_ping 10.0.2.99 dead:2::99 || return 1 215 + errmsg="addr-on-otherhost" 216 + check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr type unicast" "$errmsg" || lret=1 217 + check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr . iif type unicast" "$errmsg" || lret=1 218 + 219 + if [ $lret -eq 0 ];then 220 + echo "PASS: fib expression address types match ($notice)" 221 + else 222 + echo "FAIL: fib expression address types match ($notice)" 223 + ret=1 224 + fi 225 + } 226 + 227 + test_fib_vrf_dev_add_dummy() 228 + { 229 + if ! ip -net "$nsrouter" link add dummy0 type dummy ;then 230 + echo "SKIP: VRF tests: dummy device type not supported" 231 + return 1 232 + fi 233 + 234 + if ! ip -net "$nsrouter" link add tvrf type vrf table 9876;then 235 + echo "SKIP: VRF tests: vrf device type not supported" 236 + return 1 237 + fi 238 + 239 + ip -net "$nsrouter" link set dummy0 master tvrf 240 + ip -net "$nsrouter" link set dummy0 up 241 + ip -net "$nsrouter" link set tvrf up 242 + } 243 + 244 + load_ruleset_vrf() 245 + { 246 + # Due to the many different possible combinations using named counters 247 + # or one-rule-per-expected-result is complex. 248 + # 249 + # Instead, add dynamic sets for the fib modes 250 + # (fib address type, fib output interface lookup .. ), 251 + # and then add the obtained fib results to them. 252 + # 253 + # The test is successful if the sets contain the expected results 254 + # and no unexpected extra entries existed. 255 + ip netns exec "$nsrouter" nft -f - <<EOF 256 + flush ruleset 257 + table inet t { 258 + set fibif4 { 259 + typeof meta iif . ip daddr . fib daddr oif 260 + flags dynamic 261 + counter 262 + } 263 + 264 + set fibif4iif { 265 + typeof meta iif . ip daddr . fib daddr . iif oif 266 + flags dynamic 267 + counter 268 + } 269 + 270 + set fibif6 { 271 + typeof meta iif . ip6 daddr . fib daddr oif 272 + flags dynamic 273 + counter 274 + } 275 + 276 + set fibif6iif { 277 + typeof meta iif . ip6 daddr . fib daddr . iif oif 278 + flags dynamic 279 + counter 280 + } 281 + 282 + set fibtype4 { 283 + typeof meta iif . ip daddr . fib daddr type 284 + flags dynamic 285 + counter 286 + } 287 + 288 + set fibtype4iif { 289 + typeof meta iif . ip daddr . fib daddr . iif type 290 + flags dynamic 291 + counter 292 + } 293 + 294 + set fibtype6 { 295 + typeof meta iif . ip6 daddr . fib daddr type 296 + flags dynamic 297 + counter 298 + } 299 + 300 + set fibtype6iif { 301 + typeof meta iif . ip6 daddr . fib daddr . iif type 302 + flags dynamic 303 + counter 304 + } 305 + 306 + chain fib_test { 307 + meta nfproto ipv4 jump { 308 + add @fibif4 { meta iif . ip daddr . fib daddr oif } 309 + add @fibif4iif { meta iif . ip daddr . fib daddr . iif oif } 310 + add @fibtype4 { meta iif . ip daddr . fib daddr type } 311 + add @fibtype4iif { meta iif . ip daddr . fib daddr . iif type } 312 + 313 + add @fibif4 { meta iif . ip saddr . fib saddr oif } 314 + add @fibif4iif { meta iif . ip saddr . fib saddr . iif oif } 315 + } 316 + 317 + meta nfproto ipv6 jump { 318 + add @fibif6 { meta iif . ip6 daddr . fib daddr oif } 319 + add @fibif6iif { meta iif . ip6 daddr . fib daddr . iif oif } 320 + add @fibtype6 { meta iif . ip6 daddr . fib daddr type } 321 + add @fibtype6iif { meta iif . ip6 daddr . fib daddr . iif type } 322 + 323 + add @fibif6 { meta iif . ip6 saddr . fib saddr oif } 324 + add @fibif6iif { meta iif . ip6 saddr . fib saddr . iif oif } 325 + } 326 + } 327 + 328 + chain prerouting { 329 + type filter hook prerouting priority 0; 330 + icmp type echo-request counter jump fib_test 331 + 332 + # neighbour discovery to be ignored. 333 + icmpv6 type echo-request counter jump fib_test 334 + } 335 + } 336 + EOF 337 + 338 + if [ $? -ne 0 ] ;then 339 + echo "SKIP: Could not load ruleset for fib vrf test" 340 + [ $ret -eq 0 ] && ret=$ksft_skip 341 + return 1 342 + fi 343 + } 344 + 345 + check_type() 346 + { 347 + local setname="$1" 348 + local iifname="$2" 349 + local addr="$3" 350 + local type="$4" 351 + local count="$5" 352 + 353 + [ -z "$count" ] && count=1 354 + 355 + if ! ip netns exec "$nsrouter" nft get element inet t "$setname" { "$iifname" . "$addr" . "$type" } |grep -q "counter packets $count";then 356 + echo "FAIL: did not find $iifname . $addr . $type in $setname" 357 + ip netns exec "$nsrouter" nft list set inet t "$setname" 358 + ret=1 359 + return 1 360 + fi 361 + 362 + # delete the entry, this allows to check if anything unexpected appeared 363 + # at the end of the test run: all dynamic sets should be empty by then. 364 + if ! ip netns exec "$nsrouter" nft delete element inet t "$setname" { "$iifname" . "$addr" . "$type" } ; then 365 + echo "FAIL: can't delete $iifname . $addr . $type in $setname" 366 + ip netns exec "$nsrouter" nft list set inet t "$setname" 367 + ret=1 368 + return 1 369 + fi 370 + 371 + return 0 372 + } 373 + 374 + check_local() 375 + { 376 + check_type $@ "local" 1 377 + } 378 + 379 + check_unicast() 380 + { 381 + check_type $@ "unicast" 1 382 + } 383 + 384 + check_rpf() 385 + { 386 + check_type $@ 387 + } 388 + 389 + check_fib_vrf_sets_empty() 390 + { 391 + local setname="" 392 + local lret=0 393 + 394 + # A non-empty set means that we have seen unexpected packets OR 395 + # that a fib lookup provided unexpected results. 396 + for setname in "fibif4" "fibif4iif" "fibif6" "fibif6iif" \ 397 + "fibtype4" "fibtype4iif" "fibtype6" "fibtype6iif";do 398 + if ip netns exec "$nsrouter" nft list set inet t "$setname" | grep -q elements;then 399 + echo "FAIL: $setname not empty" 400 + ip netns exec "$nsrouter" nft list set inet t "$setname" 401 + ret=1 402 + lret=1 403 + fi 404 + done 405 + 406 + return $lret 407 + } 408 + 409 + check_fib_vrf_type() 410 + { 411 + local msg="$1" 412 + 413 + local addr 414 + # the incoming interface is always veth0. As its not linked to a VRF, 415 + # the 'tvrf' device should NOT show up anywhere. 416 + local ifname="veth0" 417 + local lret=0 418 + 419 + # local_veth0, local_veth1 420 + for addr in "10.0.1.1" "10.0.2.1"; do 421 + check_local fibtype4 "$ifname" "$addr" || lret=1 422 + check_type fibif4 "$ifname" "$addr" "0" || lret=1 423 + done 424 + for addr in "dead:1::1" "dead:2::1";do 425 + check_local fibtype6 "$ifname" "$addr" || lret=1 426 + check_type fibif6 "$ifname" "$addr" "0" || lret=1 427 + done 428 + 429 + # when restricted to the incoming interface, 10.0.1.1 should 430 + # be 'local', but 10.0.2.1 unicast. 431 + check_local fibtype4iif "$ifname" "10.0.1.1" || lret=1 432 + check_unicast fibtype4iif "$ifname" "10.0.2.1" || lret=1 433 + 434 + # same for the ipv6 addresses. 435 + check_local fibtype6iif "$ifname" "dead:1::1" || lret=1 436 + check_unicast fibtype6iif "$ifname" "dead:2::1" || lret=1 437 + 438 + # None of these addresses should find a valid route when restricting 439 + # to the incoming interface (we ask for daddr - 10.0.1.1/2.1 are 440 + # reachable via 'lo'. 441 + for addr in "10.0.1.1" "10.0.2.1" "10.9.9.1" "10.9.9.2";do 442 + check_type fibif4iif "$ifname" "$addr" "0" || lret=1 443 + done 444 + 445 + # expect default route (veth1), dummy0 is part of VRF but iif isn't. 446 + for addr in "10.9.9.1" "10.9.9.2";do 447 + check_unicast fibtype4 "$ifname" "$addr" || lret=1 448 + check_unicast fibtype4iif "$ifname" "$addr" || lret=1 449 + check_type fibif4 "$ifname" "$addr" "veth1" || lret=1 450 + done 451 + for addr in "dead:9::1" "dead:9::2";do 452 + check_unicast fibtype6 "$ifname" "$addr" || lret=1 453 + check_unicast fibtype6iif "$ifname" "$addr" || lret=1 454 + check_type fibif6 "$ifname" "$addr" "veth1" || lret=1 455 + done 456 + 457 + # same for the IPv6 equivalent addresses. 458 + for addr in "dead:1::1" "dead:2::1" "dead:9::1" "dead:9::2";do 459 + check_type fibif6iif "$ifname" "$addr" "0" || lret=1 460 + done 461 + 462 + check_unicast fibtype4 "$ifname" "10.0.2.99" || lret=1 463 + check_unicast fibtype4iif "$ifname" "10.0.2.99" || lret=1 464 + check_unicast fibtype6 "$ifname" "dead:2::99" || lret=1 465 + check_unicast fibtype6iif "$ifname" "dead:2::99" || lret=1 466 + 467 + check_type fibif4 "$ifname" "10.0.2.99" "veth1" || lret=1 468 + check_type fibif4iif "$ifname" "10.0.2.99" 0 || lret=1 469 + check_type fibif6 "$ifname" "dead:2::99" "veth1" || lret=1 470 + check_type fibif6iif "$ifname" "dead:2::99" 0 || lret=1 471 + 472 + check_rpf fibif4 "$ifname" "10.0.1.99" "veth0" 5 || lret=1 473 + check_rpf fibif4iif "$ifname" "10.0.1.99" "veth0" 5 || lret=1 474 + check_rpf fibif6 "$ifname" "dead:1::99" "veth0" 5 || lret=1 475 + check_rpf fibif6iif "$ifname" "dead:1::99" "veth0" 5 || lret=1 476 + 477 + check_fib_vrf_sets_empty || lret=1 478 + 479 + if [ $lret -eq 0 ];then 480 + echo "PASS: $msg" 481 + else 482 + echo "FAIL: $msg" 483 + ret=1 484 + fi 485 + } 486 + 487 + check_fib_veth_vrf_type() 488 + { 489 + local msg="$1" 490 + 491 + local addr 492 + local ifname 493 + local setname 494 + local lret=0 495 + 496 + # as veth0 is now part of tvrf interface, packets will be seen 497 + # twice, once with iif veth0, then with iif tvrf. 498 + 499 + for ifname in "veth0" "tvrf"; do 500 + for addr in "10.0.1.1" "10.9.9.1"; do 501 + check_local fibtype4 "$ifname" "$addr" || lret=1 502 + # addr local, but nft_fib doesn't return routes with RTN_LOCAL. 503 + check_type fibif4 "$ifname" "$addr" 0 || lret=1 504 + check_type fibif4iif "$ifname" "$addr" 0 || lret=1 505 + done 506 + 507 + for addr in "dead:1::1" "dead:9::1"; do 508 + check_local fibtype6 "$ifname" "$addr" || lret=1 509 + # same, address is local but no route is returned for lo. 510 + check_type fibif6 "$ifname" "$addr" 0 || lret=1 511 + check_type fibif6iif "$ifname" "$addr" 0 || lret=1 512 + done 513 + 514 + for t in fibtype4 fibtype4iif; do 515 + check_unicast "$t" "$ifname" 10.9.9.2 || lret=1 516 + done 517 + for t in fibtype6 fibtype6iif; do 518 + check_unicast "$t" "$ifname" dead:9::2 || lret=1 519 + done 520 + 521 + check_unicast fibtype4iif "$ifname" "10.9.9.1" || lret=1 522 + check_unicast fibtype6iif "$ifname" "dead:9::1" || lret=1 523 + 524 + check_unicast fibtype4 "$ifname" "10.0.2.99" || lret=1 525 + check_unicast fibtype4iif "$ifname" "10.0.2.99" || lret=1 526 + 527 + check_unicast fibtype6 "$ifname" "dead:2::99" || lret=1 528 + check_unicast fibtype6iif "$ifname" "dead:2::99" || lret=1 529 + 530 + check_type fibif4 "$ifname" "10.0.2.99" "veth1" || lret=1 531 + check_type fibif6 "$ifname" "dead:2::99" "veth1" || lret=1 532 + check_type fibif4 "$ifname" "10.9.9.2" "dummy0" || lret=1 533 + check_type fibif6 "$ifname" "dead:9::2" "dummy0" || lret=1 534 + 535 + # restricted to iif -- MUST NOT provide result, its != $ifname. 536 + check_type fibif4iif "$ifname" "10.0.2.99" 0 || lret=1 537 + check_type fibif6iif "$ifname" "dead:2::99" 0 || lret=1 538 + 539 + check_rpf fibif4 "$ifname" "10.0.1.99" "veth0" 4 || lret=1 540 + check_rpf fibif6 "$ifname" "dead:1::99" "veth0" 4 || lret=1 541 + check_rpf fibif4iif "$ifname" "10.0.1.99" "$ifname" 4 || lret=1 542 + check_rpf fibif6iif "$ifname" "dead:1::99" "$ifname" 4 || lret=1 543 + done 544 + 545 + check_local fibtype4iif "veth0" "10.0.1.1" || lret=1 546 + check_local fibtype6iif "veth0" "dead:1::1" || lret=1 547 + 548 + check_unicast fibtype4iif "tvrf" "10.0.1.1" || lret=1 549 + check_unicast fibtype6iif "tvrf" "dead:1::1" || lret=1 550 + 551 + # 10.9.9.2 should not provide a result for iif veth, but 552 + # should when iif is tvrf. 553 + # This is because its reachable via dummy0 which is part of 554 + # tvrf. iif veth0 MUST conceal the dummy0 result (i.e. return oif 0). 555 + check_type fibif4iif "veth0" "10.9.9.2" 0 || lret=1 556 + check_type fibif6iif "veth0" "dead:9::2" 0 || lret=1 557 + 558 + check_type fibif4iif "tvrf" "10.9.9.2" "tvrf" || lret=1 559 + check_type fibif6iif "tvrf" "dead:9::2" "tvrf" || lret=1 560 + 561 + check_fib_vrf_sets_empty || lret=1 562 + 563 + if [ $lret -eq 0 ];then 564 + echo "PASS: $msg" 565 + else 566 + echo "FAIL: $msg" 567 + ret=1 568 + fi 569 + } 570 + 571 + # Extends nsrouter config by adding dummy0+vrf. 572 + # 573 + # 10.0.1.99 10.0.1.1 10.0.2.1 10.0.2.99 574 + # dead:1::99 dead:1::1 dead:2::1 dead:2::99 575 + # ns1 <-------> [ veth0 ] nsrouter [veth1] <-------> ns2 576 + # [dummy0] 577 + # 10.9.9.1 578 + # dead:9::1 579 + # [tvrf] 580 + test_fib_vrf() 581 + { 582 + local cntname="" 583 + 584 + if ! test_fib_vrf_dev_add_dummy; then 585 + [ $ret -eq 0 ] && ret=$ksft_skip 586 + return 587 + fi 588 + 589 + ip -net "$nsrouter" addr add "10.9.9.1"/24 dev dummy0 590 + ip -net "$nsrouter" addr add "dead:9::1"/64 dev dummy0 nodad 591 + 592 + ip -net "$nsrouter" route add default via 10.0.2.99 593 + ip -net "$nsrouter" route add default via dead:2::99 594 + 595 + load_ruleset_vrf || return 596 + 597 + # no echo reply for these addresses: The dummy interface is part of tvrf, 598 + # but veth0 (incoming interface) isn't linked to it. 599 + test_ping_unreachable "10.9.9.1" "dead:9::1" & 600 + test_ping_unreachable "10.9.9.2" "dead:9::2" & 601 + 602 + # expect replies from these. 603 + test_ping "10.0.1.1" "dead:1::1" 604 + test_ping "10.0.2.1" "dead:2::1" 605 + test_ping "10.0.2.99" "dead:2::99" 606 + 607 + wait 608 + 609 + check_fib_vrf_type "fib expression address types match (iif not in vrf)" 610 + 611 + # second round: this time, make veth0 (rx interface) part of the vrf. 612 + # 10.9.9.1 / dead:9::1 become reachable from ns1, while ns2 613 + # becomes unreachable. 614 + ip -net "$nsrouter" link set veth0 master tvrf 615 + ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad 616 + 617 + # this reload should not be needed, but in case 618 + # there is some error (missing or unexpected entry) this will prevent them 619 + # from leaking into round 2. 620 + load_ruleset_vrf || return 621 + 622 + test_ping "10.0.1.1" "dead:1::1" 623 + test_ping "10.9.9.1" "dead:9::1" 624 + 625 + # ns2 should no longer be reachable (veth1 not in vrf) 626 + test_ping_unreachable "10.0.2.99" "dead:2::99" & 627 + 628 + # vrf via dummy0, but host doesn't exist 629 + test_ping_unreachable "10.9.9.2" "dead:9::2" & 630 + 631 + wait 632 + 633 + check_fib_veth_vrf_type "fib expression address types match (iif in vrf)" 634 + } 635 + 255 636 ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null 256 637 ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null 257 638 ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null 258 639 259 640 test_ping 10.0.2.1 dead:2::1 || exit 1 260 - check_drops || exit 1 641 + check_drops 261 642 262 643 test_ping 10.0.2.99 dead:2::99 || exit 1 263 - check_drops || exit 1 644 + check_drops 264 645 265 - echo "PASS: fib expression did not cause unwanted packet drops" 646 + [ $ret -eq 0 ] && echo "PASS: fib expression did not cause unwanted packet drops" 647 + 648 + load_input_ruleset "$ns1" 649 + 650 + test_ping 127.0.0.1 ::1 651 + check_drops 652 + 653 + test_ping 10.0.1.99 dead:1::99 654 + check_drops 655 + 656 + [ $ret -eq 0 ] && echo "PASS: fib expression did not discard loopback packets" 266 657 267 658 load_input_ruleset "$ns1" 268 659 ··· 801 234 # ... pbr ruleset for the router, check iif+oif. 802 235 if ! load_pbr_ruleset "$nsrouter";then 803 236 echo "SKIP: Could not load fib forward ruleset" 804 - exit $ksft_skip 237 + [ "$ret" -eq 0 ] && ret=$ksft_skip 805 238 fi 806 239 807 240 ip -net "$nsrouter" rule add from all table 128 ··· 812 245 # drop main ipv4 table 813 246 ip -net "$nsrouter" -4 rule delete table main 814 247 815 - if ! test_ping 10.0.2.99 dead:2::99;then 816 - ip -net "$nsrouter" nft list ruleset 817 - echo "FAIL: fib mismatch in pbr setup" 818 - exit 1 248 + if test_ping 10.0.2.99 dead:2::99;then 249 + echo "PASS: fib expression forward check with policy based routing" 250 + else 251 + echo "FAIL: fib expression forward check with policy based routing" 252 + ret=1 819 253 fi 820 254 821 - echo "PASS: fib expression forward check with policy based routing" 822 - exit 0 255 + test_fib_type "policy routing" 256 + ip netns exec "$nsrouter" nft delete table ip filter 257 + ip netns exec "$nsrouter" nft delete table ip6 filter 258 + 259 + # Un-do policy routing changes 260 + ip -net "$nsrouter" rule del from all table 128 261 + ip -net "$nsrouter" rule del from all iif veth0 table 129 262 + 263 + ip -net "$nsrouter" route del table 128 to 10.0.1.0/24 dev veth0 264 + ip -net "$nsrouter" route del table 129 to 10.0.2.0/24 dev veth1 265 + 266 + ip -net "$ns1" -4 route del default 267 + ip -net "$ns1" -6 route del default 268 + 269 + ip -net "$ns1" -4 route add default via 10.0.1.1 270 + ip -net "$ns1" -6 route add default via dead:1::1 271 + 272 + ip -net "$nsrouter" -4 rule add from all table main priority 32766 273 + 274 + test_fib_type "default table" 275 + ip netns exec "$nsrouter" nft delete table ip filter 276 + ip netns exec "$nsrouter" nft delete table ip6 filter 277 + 278 + test_fib_vrf 279 + 280 + exit $ret
+151
tools/testing/selftests/net/netfilter/nft_interface_stress.sh
··· 1 + #!/bin/bash -e 2 + # 3 + # SPDX-License-Identifier: GPL-2.0 4 + # 5 + # Torture nftables' netdevice notifier callbacks and related code by frequent 6 + # renaming of interfaces which netdev-family chains and flowtables hook into. 7 + 8 + source lib.sh 9 + 10 + checktool "nft --version" "run test without nft tool" 11 + checktool "iperf3 --version" "run test without iperf3 tool" 12 + 13 + # how many seconds to torture the kernel? 14 + # default to 80% of max run time but don't exceed 48s 15 + TEST_RUNTIME=$((${kselftest_timeout:-60} * 8 / 10)) 16 + [[ $TEST_RUNTIME -gt 48 ]] && TEST_RUNTIME=48 17 + 18 + trap "cleanup_all_ns" EXIT 19 + 20 + setup_ns nsc nsr nss 21 + 22 + ip -net $nsc link add cr0 type veth peer name rc0 netns $nsr 23 + ip -net $nsc addr add 10.0.0.1/24 dev cr0 24 + ip -net $nsc link set cr0 up 25 + ip -net $nsc route add default via 10.0.0.2 26 + 27 + ip -net $nss link add sr0 type veth peer name rs0 netns $nsr 28 + ip -net $nss addr add 10.1.0.1/24 dev sr0 29 + ip -net $nss link set sr0 up 30 + ip -net $nss route add default via 10.1.0.2 31 + 32 + ip -net $nsr addr add 10.0.0.2/24 dev rc0 33 + ip -net $nsr link set rc0 up 34 + ip -net $nsr addr add 10.1.0.2/24 dev rs0 35 + ip -net $nsr link set rs0 up 36 + ip netns exec $nsr sysctl -q net.ipv4.ip_forward=1 37 + ip netns exec $nsr sysctl -q net.ipv4.conf.all.forwarding=1 38 + 39 + { 40 + echo "table netdev t {" 41 + for ((i = 0; i < 10; i++)); do 42 + cat <<-EOF 43 + chain chain_rc$i { 44 + type filter hook ingress device rc$i priority 0 45 + counter 46 + } 47 + chain chain_rs$i { 48 + type filter hook ingress device rs$i priority 0 49 + counter 50 + } 51 + EOF 52 + done 53 + echo "}" 54 + echo "table ip t {" 55 + for ((i = 0; i < 10; i++)); do 56 + cat <<-EOF 57 + flowtable ft_${i} { 58 + hook ingress priority 0 59 + devices = { rc$i, rs$i } 60 + } 61 + EOF 62 + done 63 + echo "chain c {" 64 + echo "type filter hook forward priority 0" 65 + for ((i = 0; i < 10; i++)); do 66 + echo -n "iifname rc$i oifname rs$i " 67 + echo "ip protocol tcp counter flow add @ft_${i}" 68 + done 69 + echo "counter" 70 + echo "}" 71 + echo "}" 72 + } | ip netns exec $nsr nft -f - || { 73 + echo "SKIP: Could not load nft ruleset" 74 + exit $ksft_skip 75 + } 76 + 77 + for ((o=0, n=1; ; o=n, n++, n %= 10)); do 78 + ip -net $nsr link set rc$o name rc$n 79 + ip -net $nsr link set rs$o name rs$n 80 + done & 81 + rename_loop_pid=$! 82 + 83 + while true; do ip netns exec $nsr nft list ruleset >/dev/null 2>&1; done & 84 + nft_list_pid=$! 85 + 86 + ip netns exec $nsr nft monitor >/dev/null & 87 + nft_monitor_pid=$! 88 + 89 + ip netns exec $nss iperf3 --server --daemon -1 90 + summary_expr='s,^\[SUM\] .* \([0-9\.]\+\) Kbits/sec .* receiver,\1,p' 91 + rate=$(ip netns exec $nsc iperf3 \ 92 + --format k -c 10.1.0.1 --time $TEST_RUNTIME \ 93 + --length 56 --parallel 10 -i 0 | sed -n "$summary_expr") 94 + 95 + kill $nft_list_pid 96 + kill $nft_monitor_pid 97 + kill $rename_loop_pid 98 + wait 99 + 100 + ip netns exec $nsr nft -f - <<EOF 101 + table ip t { 102 + flowtable ft_wild { 103 + hook ingress priority 0 104 + devices = { wild* } 105 + } 106 + } 107 + EOF 108 + if [[ $? -ne 0 ]]; then 109 + echo "SKIP wildcard tests: not supported by host's nft?" 110 + else 111 + for ((i = 0; i < 100; i++)); do 112 + ip -net $nsr link add wild$i type dummy & 113 + done 114 + wait 115 + for ((i = 80; i < 100; i++)); do 116 + ip -net $nsr link del wild$i & 117 + done 118 + for ((i = 0; i < 80; i++)); do 119 + ip -net $nsr link del wild$i & 120 + done 121 + wait 122 + for ((i = 0; i < 100; i += 10)); do 123 + ( 124 + for ((j = 0; j < 10; j++)); do 125 + ip -net $nsr link add wild$((i + j)) type dummy 126 + done 127 + for ((j = 0; j < 10; j++)); do 128 + ip -net $nsr link del wild$((i + j)) 129 + done 130 + ) & 131 + done 132 + wait 133 + fi 134 + 135 + [[ $(</proc/sys/kernel/tainted) -eq 0 ]] || { 136 + echo "FAIL: Kernel is tainted!" 137 + exit $ksft_fail 138 + } 139 + 140 + [[ $rate -gt 0 ]] || { 141 + echo "FAIL: Zero throughput in iperf3" 142 + exit $ksft_fail 143 + } 144 + 145 + [[ -f /sys/kernel/debug/kmemleak && \ 146 + -n $(</sys/kernel/debug/kmemleak) ]] && { 147 + echo "FAIL: non-empty kmemleak report" 148 + exit $ksft_fail 149 + } 150 + 151 + exit $ksft_pass