Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'nf-next-25-05-06' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next

Pablo Neira Ayuso says:

====================
Netfilter updates for net-next

The following patchset contains Netfilter updates for net-next:

1) Apparently, nf_conntrack_bridge changes the way in which fragments
are handled, dealing to packet drop. From Huajian Yang.

2) Add a selftest to stress the conntrack subsystem, from Florian Westphal.

3) nft_quota depletion is off-by-one byte, Zhongqiu Duan.

4) Rewrites the procfs to read the conntrack table to speed it up,
from Florian Westphal.

5) Two patches to prevent overflow in nft_pipapo lookup table and to
clamp the maximum bucket size.

6) Update nft_fib selftest to check for loopback packet bypass.
From Florian Westphal.

netfilter pull request 25-05-06

* tag 'nf-next-25-05-06' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next:
selftests: netfilter: nft_fib.sh: check lo packets bypass fib lookup
netfilter: nft_set_pipapo: clamp maximum map bucket size to INT_MAX
netfilter: nft_set_pipapo: prevent overflow in lookup table allocation
netfilter: nf_conntrack: speed up reads from nf_conntrack proc file
netfilter: nft_quota: match correctly when the quota just depleted
selftests: netfilter: add conntrack stress test
netfilter: bridge: Move specific fragmented packet to slow_path instead of dropping it
====================

Link: https://patch.msgid.link/20250505234151.228057-1-pablo@netfilter.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>

+559 -68
+6 -6
net/bridge/netfilter/nf_conntrack_bridge.c
··· 60 60 struct ip_fraglist_iter iter; 61 61 struct sk_buff *frag; 62 62 63 - if (first_len - hlen > mtu || 64 - skb_headroom(skb) < ll_rs) 63 + if (first_len - hlen > mtu) 65 64 goto blackhole; 66 65 67 - if (skb_cloned(skb)) 66 + if (skb_cloned(skb) || 67 + skb_headroom(skb) < ll_rs) 68 68 goto slow_path; 69 69 70 70 skb_walk_frags(skb, frag) { 71 - if (frag->len > mtu || 72 - skb_headroom(frag) < hlen + ll_rs) 71 + if (frag->len > mtu) 73 72 goto blackhole; 74 73 75 - if (skb_shared(frag)) 74 + if (skb_shared(frag) || 75 + skb_headroom(frag) < hlen + ll_rs) 76 76 goto slow_path; 77 77 } 78 78
+6 -6
net/ipv6/netfilter.c
··· 164 164 struct ip6_fraglist_iter iter; 165 165 struct sk_buff *frag2; 166 166 167 - if (first_len - hlen > mtu || 168 - skb_headroom(skb) < (hroom + sizeof(struct frag_hdr))) 167 + if (first_len - hlen > mtu) 169 168 goto blackhole; 170 169 171 - if (skb_cloned(skb)) 170 + if (skb_cloned(skb) || 171 + skb_headroom(skb) < (hroom + sizeof(struct frag_hdr))) 172 172 goto slow_path; 173 173 174 174 skb_walk_frags(skb, frag2) { 175 - if (frag2->len > mtu || 176 - skb_headroom(frag2) < (hlen + hroom + sizeof(struct frag_hdr))) 175 + if (frag2->len > mtu) 177 176 goto blackhole; 178 177 179 178 /* Partially cloned skb? */ 180 - if (skb_shared(frag2)) 179 + if (skb_shared(frag2) || 180 + skb_headroom(frag2) < (hlen + hroom + sizeof(struct frag_hdr))) 181 181 goto slow_path; 182 182 } 183 183
+53 -35
net/netfilter/nf_conntrack_standalone.c
··· 98 98 struct seq_net_private p; 99 99 struct hlist_nulls_head *hash; 100 100 unsigned int htable_size; 101 + unsigned int skip_elems; 101 102 unsigned int bucket; 102 103 u_int64_t time_now; 103 104 }; 104 105 105 - static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) 106 + static struct nf_conntrack_tuple_hash *ct_get_next(const struct net *net, 107 + struct ct_iter_state *st) 106 108 { 107 - struct ct_iter_state *st = seq->private; 109 + struct nf_conntrack_tuple_hash *h; 108 110 struct hlist_nulls_node *n; 111 + unsigned int i; 109 112 110 - for (st->bucket = 0; 111 - st->bucket < st->htable_size; 112 - st->bucket++) { 113 - n = rcu_dereference( 114 - hlist_nulls_first_rcu(&st->hash[st->bucket])); 115 - if (!is_a_nulls(n)) 116 - return n; 117 - } 118 - return NULL; 119 - } 113 + for (i = st->bucket; i < st->htable_size; i++) { 114 + unsigned int skip = 0; 120 115 121 - static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, 122 - struct hlist_nulls_node *head) 123 - { 124 - struct ct_iter_state *st = seq->private; 116 + restart: 117 + hlist_nulls_for_each_entry_rcu(h, n, &st->hash[i], hnnode) { 118 + struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); 119 + struct hlist_nulls_node *tmp = n; 125 120 126 - head = rcu_dereference(hlist_nulls_next_rcu(head)); 127 - while (is_a_nulls(head)) { 128 - if (likely(get_nulls_value(head) == st->bucket)) { 129 - if (++st->bucket >= st->htable_size) 130 - return NULL; 121 + if (!net_eq(net, nf_ct_net(ct))) 122 + continue; 123 + 124 + if (++skip <= st->skip_elems) 125 + continue; 126 + 127 + /* h should be returned, skip to nulls marker. */ 128 + while (!is_a_nulls(tmp)) 129 + tmp = rcu_dereference(hlist_nulls_next_rcu(tmp)); 130 + 131 + /* check if h is still linked to hash[i] */ 132 + if (get_nulls_value(tmp) != i) { 133 + skip = 0; 134 + goto restart; 135 + } 136 + 137 + st->skip_elems = skip; 138 + st->bucket = i; 139 + return h; 131 140 } 132 - head = rcu_dereference( 133 - hlist_nulls_first_rcu(&st->hash[st->bucket])); 141 + 142 + skip = 0; 143 + if (get_nulls_value(n) != i) 144 + goto restart; 145 + 146 + st->skip_elems = 0; 134 147 } 135 - return head; 136 - } 137 148 138 - static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos) 139 - { 140 - struct hlist_nulls_node *head = ct_get_first(seq); 141 - 142 - if (head) 143 - while (pos && (head = ct_get_next(seq, head))) 144 - pos--; 145 - return pos ? NULL : head; 149 + st->bucket = i; 150 + return NULL; 146 151 } 147 152 148 153 static void *ct_seq_start(struct seq_file *seq, loff_t *pos) 149 154 __acquires(RCU) 150 155 { 151 156 struct ct_iter_state *st = seq->private; 157 + struct net *net = seq_file_net(seq); 152 158 153 159 st->time_now = ktime_get_real_ns(); 154 160 rcu_read_lock(); 155 161 156 162 nf_conntrack_get_ht(&st->hash, &st->htable_size); 157 - return ct_get_idx(seq, *pos); 163 + 164 + if (*pos == 0) { 165 + st->skip_elems = 0; 166 + st->bucket = 0; 167 + } else if (st->skip_elems) { 168 + /* resume from last dumped entry */ 169 + st->skip_elems--; 170 + } 171 + 172 + return ct_get_next(net, st); 158 173 } 159 174 160 175 static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos) 161 176 { 177 + struct ct_iter_state *st = s->private; 178 + struct net *net = seq_file_net(s); 179 + 162 180 (*pos)++; 163 - return ct_get_next(s, v); 181 + return ct_get_next(net, st); 164 182 } 165 183 166 184 static void ct_seq_stop(struct seq_file *s, void *v)
+13 -7
net/netfilter/nft_quota.c
··· 19 19 }; 20 20 21 21 static inline bool nft_overquota(struct nft_quota *priv, 22 - const struct sk_buff *skb) 22 + const struct sk_buff *skb, 23 + bool *report) 23 24 { 24 - return atomic64_add_return(skb->len, priv->consumed) >= 25 - atomic64_read(&priv->quota); 25 + u64 consumed = atomic64_add_return(skb->len, priv->consumed); 26 + u64 quota = atomic64_read(&priv->quota); 27 + 28 + if (report) 29 + *report = consumed >= quota; 30 + 31 + return consumed > quota; 26 32 } 27 33 28 34 static inline bool nft_quota_invert(struct nft_quota *priv) ··· 40 34 struct nft_regs *regs, 41 35 const struct nft_pktinfo *pkt) 42 36 { 43 - if (nft_overquota(priv, pkt->skb) ^ nft_quota_invert(priv)) 37 + if (nft_overquota(priv, pkt->skb, NULL) ^ nft_quota_invert(priv)) 44 38 regs->verdict.code = NFT_BREAK; 45 39 } 46 40 ··· 57 51 const struct nft_pktinfo *pkt) 58 52 { 59 53 struct nft_quota *priv = nft_obj_data(obj); 60 - bool overquota; 54 + bool overquota, report; 61 55 62 - overquota = nft_overquota(priv, pkt->skb); 56 + overquota = nft_overquota(priv, pkt->skb, &report); 63 57 if (overquota ^ nft_quota_invert(priv)) 64 58 regs->verdict.code = NFT_BREAK; 65 59 66 - if (overquota && 60 + if (report && 67 61 !test_and_set_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags)) 68 62 nft_obj_notify(nft_net(pkt), obj->key.table, obj, 0, 0, 69 63 NFT_MSG_NEWOBJ, 0, nft_pf(pkt), 0, GFP_ATOMIC);
+50 -14
net/netfilter/nft_set_pipapo.c
··· 663 663 check_add_overflow(rules, extra, &rules_alloc)) 664 664 return -EOVERFLOW; 665 665 666 + if (rules_alloc > (INT_MAX / sizeof(*new_mt))) 667 + return -ENOMEM; 668 + 666 669 new_mt = kvmalloc_array(rules_alloc, sizeof(*new_mt), GFP_KERNEL_ACCOUNT); 667 670 if (!new_mt) 668 671 return -ENOMEM; ··· 686 683 return 0; 687 684 } 688 685 686 + 687 + /** 688 + * lt_calculate_size() - Get storage size for lookup table with overflow check 689 + * @groups: Amount of bit groups 690 + * @bb: Number of bits grouped together in lookup table buckets 691 + * @bsize: Size of each bucket in lookup table, in longs 692 + * 693 + * Return: allocation size including alignment overhead, negative on overflow 694 + */ 695 + static ssize_t lt_calculate_size(unsigned int groups, unsigned int bb, 696 + unsigned int bsize) 697 + { 698 + ssize_t ret = groups * NFT_PIPAPO_BUCKETS(bb) * sizeof(long); 699 + 700 + if (check_mul_overflow(ret, bsize, &ret)) 701 + return -1; 702 + if (check_add_overflow(ret, NFT_PIPAPO_ALIGN_HEADROOM, &ret)) 703 + return -1; 704 + if (ret > INT_MAX) 705 + return -1; 706 + 707 + return ret; 708 + } 709 + 689 710 /** 690 711 * pipapo_resize() - Resize lookup or mapping table, or both 691 712 * @f: Field containing lookup and mapping tables ··· 728 701 long *new_lt = NULL, *new_p, *old_lt = f->lt, *old_p; 729 702 unsigned int new_bucket_size, copy; 730 703 int group, bucket, err; 704 + ssize_t lt_size; 731 705 732 706 if (rules >= NFT_PIPAPO_RULE0_MAX) 733 707 return -ENOSPC; ··· 747 719 else 748 720 copy = new_bucket_size; 749 721 750 - new_lt = kvzalloc(f->groups * NFT_PIPAPO_BUCKETS(f->bb) * 751 - new_bucket_size * sizeof(*new_lt) + 752 - NFT_PIPAPO_ALIGN_HEADROOM, 753 - GFP_KERNEL); 722 + lt_size = lt_calculate_size(f->groups, f->bb, new_bucket_size); 723 + if (lt_size < 0) 724 + return -ENOMEM; 725 + 726 + new_lt = kvzalloc(lt_size, GFP_KERNEL_ACCOUNT); 754 727 if (!new_lt) 755 728 return -ENOMEM; 756 729 ··· 936 907 { 937 908 unsigned int groups, bb; 938 909 unsigned long *new_lt; 939 - size_t lt_size; 910 + ssize_t lt_size; 940 911 941 912 lt_size = f->groups * NFT_PIPAPO_BUCKETS(f->bb) * f->bsize * 942 913 sizeof(*f->lt); ··· 946 917 groups = f->groups * 2; 947 918 bb = NFT_PIPAPO_GROUP_BITS_LARGE_SET; 948 919 949 - lt_size = groups * NFT_PIPAPO_BUCKETS(bb) * f->bsize * 950 - sizeof(*f->lt); 920 + lt_size = lt_calculate_size(groups, bb, f->bsize); 921 + if (lt_size < 0) 922 + return; 951 923 } else if (f->bb == NFT_PIPAPO_GROUP_BITS_LARGE_SET && 952 924 lt_size < NFT_PIPAPO_LT_SIZE_LOW) { 953 925 groups = f->groups / 2; 954 926 bb = NFT_PIPAPO_GROUP_BITS_SMALL_SET; 955 927 956 - lt_size = groups * NFT_PIPAPO_BUCKETS(bb) * f->bsize * 957 - sizeof(*f->lt); 928 + lt_size = lt_calculate_size(groups, bb, f->bsize); 929 + if (lt_size < 0) 930 + return; 958 931 959 932 /* Don't increase group width if the resulting lookup table size 960 933 * would exceed the upper size threshold for a "small" set. ··· 967 936 return; 968 937 } 969 938 970 - new_lt = kvzalloc(lt_size + NFT_PIPAPO_ALIGN_HEADROOM, GFP_KERNEL_ACCOUNT); 939 + new_lt = kvzalloc(lt_size, GFP_KERNEL_ACCOUNT); 971 940 if (!new_lt) 972 941 return; 973 942 ··· 1482 1451 1483 1452 for (i = 0; i < old->field_count; i++) { 1484 1453 unsigned long *new_lt; 1454 + ssize_t lt_size; 1485 1455 1486 1456 memcpy(dst, src, offsetof(struct nft_pipapo_field, lt)); 1487 1457 1488 - new_lt = kvzalloc(src->groups * NFT_PIPAPO_BUCKETS(src->bb) * 1489 - src->bsize * sizeof(*dst->lt) + 1490 - NFT_PIPAPO_ALIGN_HEADROOM, 1491 - GFP_KERNEL_ACCOUNT); 1458 + lt_size = lt_calculate_size(src->groups, src->bb, src->bsize); 1459 + if (lt_size < 0) 1460 + goto out_lt; 1461 + 1462 + new_lt = kvzalloc(lt_size, GFP_KERNEL_ACCOUNT); 1492 1463 if (!new_lt) 1493 1464 goto out_lt; 1494 1465 ··· 1502 1469 src->groups * NFT_PIPAPO_BUCKETS(src->bb)); 1503 1470 1504 1471 if (src->rules > 0) { 1472 + if (src->rules_alloc > (INT_MAX / sizeof(*src->mt))) 1473 + goto out_mt; 1474 + 1505 1475 dst->mt = kvmalloc_array(src->rules_alloc, 1506 1476 sizeof(*src->mt), 1507 1477 GFP_KERNEL_ACCOUNT);
+1
tools/testing/selftests/net/netfilter/Makefile
··· 12 12 TEST_PROGS += conntrack_icmp_related.sh 13 13 TEST_PROGS += conntrack_ipip_mtu.sh 14 14 TEST_PROGS += conntrack_tcp_unreplied.sh 15 + TEST_PROGS += conntrack_resize.sh 15 16 TEST_PROGS += conntrack_sctp_collision.sh 16 17 TEST_PROGS += conntrack_vrf.sh 17 18 TEST_PROGS += conntrack_reverse_clash.sh
+1
tools/testing/selftests/net/netfilter/config
··· 46 46 CONFIG_NETFILTER_XT_MATCH_STRING=m 47 47 CONFIG_NETFILTER_XT_TARGET_REDIRECT=m 48 48 CONFIG_NF_CONNTRACK=m 49 + CONFIG_NF_CONNTRACK_PROCFS=y 49 50 CONFIG_NF_CONNTRACK_EVENTS=y 50 51 CONFIG_NF_CONNTRACK_FTP=m 51 52 CONFIG_NF_CONNTRACK_MARK=y
+406
tools/testing/selftests/net/netfilter/conntrack_resize.sh
··· 1 + #!/bin/bash 2 + # SPDX-License-Identifier: GPL-2.0 3 + 4 + source lib.sh 5 + 6 + checktool "conntrack --version" "run test without conntrack" 7 + checktool "nft --version" "run test without nft tool" 8 + 9 + init_net_max=0 10 + ct_buckets=0 11 + tmpfile="" 12 + ret=0 13 + 14 + modprobe -q nf_conntrack 15 + if ! sysctl -q net.netfilter.nf_conntrack_max >/dev/null;then 16 + echo "SKIP: conntrack sysctls not available" 17 + exit $KSFT_SKIP 18 + fi 19 + 20 + init_net_max=$(sysctl -n net.netfilter.nf_conntrack_max) || exit 1 21 + ct_buckets=$(sysctl -n net.netfilter.nf_conntrack_buckets) || exit 1 22 + 23 + cleanup() { 24 + cleanup_all_ns 25 + 26 + rm -f "$tmpfile" 27 + 28 + # restore original sysctl setting 29 + sysctl -q net.netfilter.nf_conntrack_max=$init_net_max 30 + sysctl -q net.netfilter.nf_conntrack_buckets=$ct_buckets 31 + } 32 + trap cleanup EXIT 33 + 34 + check_max_alias() 35 + { 36 + local expected="$1" 37 + # old name, expected to alias to the first, i.e. changing one 38 + # changes the other as well. 39 + local lv=$(sysctl -n net.nf_conntrack_max) 40 + 41 + if [ $expected -ne "$lv" ];then 42 + echo "nf_conntrack_max sysctls should have identical values" 43 + exit 1 44 + fi 45 + } 46 + 47 + insert_ctnetlink() { 48 + local ns="$1" 49 + local count="$2" 50 + local i=0 51 + local bulk=16 52 + 53 + while [ $i -lt $count ] ;do 54 + ip netns exec "$ns" bash -c "for i in \$(seq 1 $bulk); do \ 55 + if ! conntrack -I -s \$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%255+1)) \ 56 + -d \$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%255+1)) \ 57 + --protonum 17 --timeout 120 --status ASSURED,SEEN_REPLY --sport \$RANDOM --dport 53; then \ 58 + return;\ 59 + fi & \ 60 + done ; wait" 2>/dev/null 61 + 62 + i=$((i+bulk)) 63 + done 64 + } 65 + 66 + check_ctcount() { 67 + local ns="$1" 68 + local count="$2" 69 + local msg="$3" 70 + 71 + local now=$(ip netns exec "$ns" conntrack -C) 72 + 73 + if [ $now -ne "$count" ] ;then 74 + echo "expected $count entries in $ns, not $now: $msg" 75 + exit 1 76 + fi 77 + 78 + echo "PASS: got $count connections: $msg" 79 + } 80 + 81 + ctresize() { 82 + local duration="$1" 83 + local now=$(date +%s) 84 + local end=$((now + duration)) 85 + 86 + while [ $now -lt $end ]; do 87 + sysctl -q net.netfilter.nf_conntrack_buckets=$RANDOM 88 + now=$(date +%s) 89 + done 90 + } 91 + 92 + do_rsleep() { 93 + local limit="$1" 94 + local r=$RANDOM 95 + 96 + r=$((r%limit)) 97 + sleep "$r" 98 + } 99 + 100 + ct_flush_once() { 101 + local ns="$1" 102 + 103 + ip netns exec "$ns" conntrack -F 2>/dev/null 104 + } 105 + 106 + ctflush() { 107 + local ns="$1" 108 + local duration="$2" 109 + local now=$(date +%s) 110 + local end=$((now + duration)) 111 + 112 + do_rsleep "$duration" 113 + 114 + while [ $now -lt $end ]; do 115 + ct_flush_once "$ns" 116 + do_rsleep "$duration" 117 + now=$(date +%s) 118 + done 119 + } 120 + 121 + ctflood() 122 + { 123 + local ns="$1" 124 + local duration="$2" 125 + local msg="$3" 126 + local now=$(date +%s) 127 + local end=$((now + duration)) 128 + local j=0 129 + local k=0 130 + 131 + while [ $now -lt $end ]; do 132 + j=$((j%256)) 133 + k=$((k%256)) 134 + 135 + ip netns exec "$ns" bash -c \ 136 + "j=$j k=$k; for i in \$(seq 1 254); do ping -q -c 1 127.\$k.\$j.\$i & done; wait" >/dev/null 2>&1 137 + 138 + j=$((j+1)) 139 + 140 + if [ $j -eq 256 ];then 141 + k=$((k+1)) 142 + fi 143 + 144 + now=$(date +%s) 145 + done 146 + 147 + wait 148 + } 149 + 150 + # dump to /dev/null. We don't want dumps to cause infinite loops 151 + # or use-after-free even when conntrack table is altered while dumps 152 + # are in progress. 153 + ct_nulldump() 154 + { 155 + local ns="$1" 156 + 157 + ip netns exec "$ns" conntrack -L > /dev/null 2>&1 & 158 + 159 + # Don't require /proc support in conntrack 160 + if [ -r /proc/self/net/nf_conntrack ] ; then 161 + ip netns exec "$ns" bash -c "wc -l < /proc/self/net/nf_conntrack" > /dev/null & 162 + fi 163 + 164 + wait 165 + } 166 + 167 + check_taint() 168 + { 169 + local tainted_then="$1" 170 + local msg="$2" 171 + 172 + local tainted_now=0 173 + 174 + if [ "$tainted_then" -ne 0 ];then 175 + return 176 + fi 177 + 178 + read tainted_now < /proc/sys/kernel/tainted 179 + 180 + if [ "$tainted_now" -eq 0 ];then 181 + echo "PASS: $msg" 182 + else 183 + echo "TAINT: $msg" 184 + dmesg 185 + exit 1 186 + fi 187 + } 188 + 189 + insert_flood() 190 + { 191 + local n="$1" 192 + local r=0 193 + 194 + r=$((RANDOM%2000)) 195 + 196 + ctflood "$n" "$timeout" "floodresize" & 197 + insert_ctnetlink "$n" "$r" & 198 + ctflush "$n" "$timeout" & 199 + ct_nulldump "$n" & 200 + 201 + wait 202 + } 203 + 204 + test_floodresize_all() 205 + { 206 + local timeout=20 207 + local n="" 208 + local tainted_then="" 209 + 210 + read tainted_then < /proc/sys/kernel/tainted 211 + 212 + for n in "$nsclient1" "$nsclient2";do 213 + insert_flood "$n" & 214 + done 215 + 216 + # resize table constantly while flood/insert/dump/flushs 217 + # are happening in parallel. 218 + ctresize "$timeout" 219 + 220 + # wait for subshells to complete, everything is limited 221 + # by $timeout. 222 + wait 223 + 224 + check_taint "$tainted_then" "resize+flood" 225 + } 226 + 227 + check_dump() 228 + { 229 + local ns="$1" 230 + local protoname="$2" 231 + local c=0 232 + local proto=0 233 + local proc=0 234 + local unique="" 235 + 236 + c=$(ip netns exec "$ns" conntrack -C) 237 + 238 + # NOTE: assumes timeouts are large enough to not have 239 + # expirations in all following tests. 240 + l=$(ip netns exec "$ns" conntrack -L 2>/dev/null | tee "$tmpfile" | wc -l) 241 + 242 + if [ "$c" -ne "$l" ]; then 243 + echo "FAIL: count inconsistency for $ns: $c != $l" 244 + ret=1 245 + fi 246 + 247 + # check the dump we retrieved is free of duplicated entries. 248 + unique=$(sort "$tmpfile" | uniq | wc -l) 249 + if [ "$l" -ne "$unique" ]; then 250 + echo "FAIL: count identical but listing contained redundant entries: $l != $unique" 251 + ret=1 252 + fi 253 + 254 + # we either inserted icmp or only udp, hence, --proto should return same entry count as without filter. 255 + proto=$(ip netns exec "$ns" conntrack -L --proto $protoname 2>/dev/null | wc -l) 256 + if [ "$l" -ne "$proto" ]; then 257 + echo "FAIL: dump inconsistency for $ns: $l != $proto" 258 + ret=1 259 + fi 260 + 261 + if [ -r /proc/self/net/nf_conntrack ] ; then 262 + proc=$(ip netns exec "$ns" bash -c "wc -l < /proc/self/net/nf_conntrack") 263 + 264 + if [ "$l" -ne "$proc" ]; then 265 + echo "FAIL: proc inconsistency for $ns: $l != $proc" 266 + ret=1 267 + fi 268 + 269 + proc=$(ip netns exec "$ns" bash -c "sort < /proc/self/net/nf_conntrack | uniq | wc -l") 270 + 271 + if [ "$l" -ne "$proc" ]; then 272 + echo "FAIL: proc inconsistency after uniq filter for $ns: $l != $proc" 273 + ret=1 274 + fi 275 + fi 276 + 277 + echo "PASS: dump in netns had same entry count (-C $c, -L $l, -p $proto, /proc $proc)" 278 + } 279 + 280 + test_dump_all() 281 + { 282 + local timeout=3 283 + local tainted_then="" 284 + 285 + read tainted_then < /proc/sys/kernel/tainted 286 + 287 + ct_flush_once "$nsclient1" 288 + ct_flush_once "$nsclient2" 289 + 290 + ctflood "$nsclient1" $timeout "dumpall" & 291 + insert_ctnetlink "$nsclient2" 2000 292 + 293 + wait 294 + 295 + check_dump "$nsclient1" "icmp" 296 + check_dump "$nsclient2" "udp" 297 + 298 + check_taint "$tainted_then" "test parallel conntrack dumps" 299 + } 300 + 301 + check_sysctl_immutable() 302 + { 303 + local ns="$1" 304 + local name="$2" 305 + local failhard="$3" 306 + local o=0 307 + local n=0 308 + 309 + o=$(ip netns exec "$ns" sysctl -n "$name" 2>/dev/null) 310 + n=$((o+1)) 311 + 312 + # return value isn't reliable, need to read it back 313 + ip netns exec "$ns" sysctl -q "$name"=$n 2>/dev/null >/dev/null 314 + 315 + n=$(ip netns exec "$ns" sysctl -n "$name" 2>/dev/null) 316 + 317 + [ -z "$n" ] && return 1 318 + 319 + if [ $o -ne $n ]; then 320 + if [ $failhard -gt 0 ] ;then 321 + echo "FAIL: net.$name should not be changeable from namespace (now $n)" 322 + ret=1 323 + fi 324 + return 0 325 + fi 326 + 327 + return 1 328 + } 329 + 330 + test_conntrack_max_limit() 331 + { 332 + sysctl -q net.netfilter.nf_conntrack_max=100 333 + insert_ctnetlink "$nsclient1" 101 334 + 335 + # check netns is clamped by init_net, i.e., either netns follows 336 + # init_net value, or a higher pernet limit (compared to init_net) is ignored. 337 + check_ctcount "$nsclient1" 100 "netns conntrack_max is init_net bound" 338 + 339 + sysctl -q net.netfilter.nf_conntrack_max=$init_net_max 340 + } 341 + 342 + test_conntrack_disable() 343 + { 344 + local timeout=2 345 + 346 + # disable conntrack pickups 347 + ip netns exec "$nsclient1" nft flush table ip test_ct 348 + 349 + ct_flush_once "$nsclient1" 350 + ct_flush_once "$nsclient2" 351 + 352 + ctflood "$nsclient1" "$timeout" "conntrack disable" 353 + ip netns exec "$nsclient2" ping -q -c 1 127.0.0.1 >/dev/null 2>&1 354 + 355 + # Disabled, should not have picked up any connection. 356 + check_ctcount "$nsclient1" 0 "conntrack disabled" 357 + 358 + # This one is still active, expect 1 connection. 359 + check_ctcount "$nsclient2" 1 "conntrack enabled" 360 + } 361 + 362 + init_net_max=$(sysctl -n net.netfilter.nf_conntrack_max) 363 + 364 + check_max_alias $init_net_max 365 + 366 + sysctl -q net.netfilter.nf_conntrack_max="262000" 367 + check_max_alias 262000 368 + 369 + setup_ns nsclient1 nsclient2 370 + 371 + # check this only works from init_net 372 + for n in netfilter.nf_conntrack_buckets netfilter.nf_conntrack_expect_max net.nf_conntrack_max;do 373 + check_sysctl_immutable "$nsclient1" "net.$n" 1 374 + done 375 + 376 + # won't work on older kernels. If it works, check that the netns obeys the limit 377 + if check_sysctl_immutable "$nsclient1" net.netfilter.nf_conntrack_max 0;then 378 + # subtest: if pernet is changeable, check that reducing it in pernet 379 + # limits the pernet entries. Inverse, pernet clamped by a lower init_net 380 + # setting, is already checked by "test_conntrack_max_limit" test. 381 + 382 + ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_max=1 383 + insert_ctnetlink "$nsclient1" 2 384 + check_ctcount "$nsclient1" 1 "netns conntrack_max is pernet bound" 385 + ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_max=$init_net_max 386 + fi 387 + 388 + for n in "$nsclient1" "$nsclient2";do 389 + # enable conntrack in both namespaces 390 + ip netns exec "$n" nft -f - <<EOF 391 + table ip test_ct { 392 + chain input { 393 + type filter hook input priority 0 394 + ct state new counter 395 + } 396 + } 397 + EOF 398 + done 399 + 400 + tmpfile=$(mktemp) 401 + test_conntrack_max_limit 402 + test_dump_all 403 + test_floodresize_all 404 + test_conntrack_disable 405 + 406 + exit $ret
+23
tools/testing/selftests/net/netfilter/nft_fib.sh
··· 45 45 EOF 46 46 } 47 47 48 + load_input_ruleset() { 49 + local netns=$1 50 + 51 + ip netns exec "$netns" nft -f /dev/stdin <<EOF 52 + table inet filter { 53 + chain input { 54 + type filter hook input priority 0; policy accept; 55 + fib saddr . iif oif missing counter log prefix "$netns nft_rpfilter: " drop 56 + } 57 + } 58 + EOF 59 + } 60 + 48 61 load_pbr_ruleset() { 49 62 local netns=$1 50 63 ··· 177 164 check_drops || exit 1 178 165 179 166 echo "PASS: fib expression did not cause unwanted packet drops" 167 + 168 + load_input_ruleset "$ns1" 169 + 170 + test_ping 127.0.0.1 ::1 || exit 1 171 + check_drops || exit 1 172 + 173 + test_ping 10.0.1.99 dead:1::99 || exit 1 174 + check_drops || exit 1 175 + 176 + echo "PASS: fib expression did not discard loopback packets" 180 177 181 178 ip netns exec "$nsrouter" nft flush table inet filter 182 179