Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'nf-next-25-09-24' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next

Florian Westphal says:

====================
netfilter: fixes for net-next

These fixes target next because the bug is either not severe or has
existed for so long that there is no reason to cram them in at the last
minute.

1) Fix IPVS ftp unregistering during netns cleanup, broken since netns
support was introduced in 2011 in the 2.6.39 kernel.
From Slavin Liu.

2) nfnetlink must reset the 'nlh' pointer back to the original
address when a batch is replayed, else we emit bogus ACK messages
and conceal real errno from userspace.
From Fernando Fernandez Mancera. This was broken since 6.10.

3) Recent fix for nftables 'pipapo' set type was incomplete, it only
made things work for the AVX2 version of the algorithm.

4) Testing revealed another problem with avx2 version that results in
out-of-bounds read access, this bug always existed since feature was
added in 5.7 kernel. This also comes with a selftest update.

Last fix resolves a long-standing bug (since 4.9) in conntrack /proc
interface:
Decrease skip count when we reap an expired entry during dump.
As-is we erronously elide one conntrack entry from dump for every expired
entry seen. From Eric Dumazet.

* tag 'nf-next-25-09-24' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next:
netfilter: nf_conntrack: do not skip entries in /proc/net/nf_conntrack
selftests: netfilter: nft_concat_range.sh: add check for double-create bug
netfilter: nft_set_pipapo_avx2: fix skip of expired entries
netfilter: nft_set_pipapo: use 0 genmask for packetpath lookups
netfilter: nfnetlink: reset nlh pointer during batch replay
ipvs: Defer ip_vs_ftp unregister during netns cleanup
====================

Link: https://patch.msgid.link/20250924140654.10210-1-fw@strlen.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+73 -10
+3 -1
net/netfilter/ipvs/ip_vs_ftp.c
··· 53 53 IP_VS_FTP_EPSV, 54 54 }; 55 55 56 + static bool exiting_module; 56 57 /* 57 58 * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper 58 59 * First port is set to the default port. ··· 606 605 { 607 606 struct netns_ipvs *ipvs = net_ipvs(net); 608 607 609 - if (!ipvs) 608 + if (!ipvs || !exiting_module) 610 609 return; 611 610 612 611 unregister_ip_vs_app(ipvs, &ip_vs_ftp); ··· 628 627 */ 629 628 static void __exit ip_vs_ftp_exit(void) 630 629 { 630 + exiting_module = true; 631 631 unregister_pernet_subsys(&ip_vs_ftp_ops); 632 632 /* rcu_barrier() is called by netns */ 633 633 }
+3
net/netfilter/nf_conntrack_standalone.c
··· 317 317 smp_acquire__after_ctrl_dep(); 318 318 319 319 if (nf_ct_should_gc(ct)) { 320 + struct ct_iter_state *st = s->private; 321 + 322 + st->skip_elems--; 320 323 nf_ct_kill(ct); 321 324 goto release; 322 325 }
+2
net/netfilter/nfnetlink.c
··· 376 376 const struct nfnetlink_subsystem *ss; 377 377 const struct nfnl_callback *nc; 378 378 struct netlink_ext_ack extack; 379 + struct nlmsghdr *onlh = nlh; 379 380 LIST_HEAD(err_list); 380 381 u32 status; 381 382 int err; ··· 387 386 status = 0; 388 387 replay_abort: 389 388 skb = netlink_skb_clone(oskb, GFP_KERNEL); 389 + nlh = onlh; 390 390 if (!skb) 391 391 return netlink_ack(oskb, nlh, -ENOMEM, NULL); 392 392
+4 -5
net/netfilter/nft_set_pipapo.c
··· 549 549 * 550 550 * This function is called from the data path. It will search for 551 551 * an element matching the given key in the current active copy. 552 - * Unlike other set types, this uses NFT_GENMASK_ANY instead of 553 - * nft_genmask_cur(). 552 + * Unlike other set types, this uses 0 instead of nft_genmask_cur(). 554 553 * 555 554 * This is because new (future) elements are not reachable from 556 555 * priv->match, they get added to priv->clone instead. ··· 559 560 * inconsistent state: matching old entries get skipped but thew 560 561 * newly matching entries are unreachable. 561 562 * 562 - * GENMASK will still find the 'now old' entries which ensures consistent 563 - * priv->match view. 563 + * GENMASK_ANY doesn't work for the same reason: old-gen entries get 564 + * skipped, new-gen entries are only reachable from priv->clone. 564 565 * 565 566 * nft_pipapo_commit swaps ->clone and ->match shortly after the 566 567 * genbit flip. As ->clone doesn't contain the old entries in the first ··· 577 578 const struct nft_pipapo_elem *e; 578 579 579 580 m = rcu_dereference(priv->match); 580 - e = pipapo_get_slow(m, (const u8 *)key, NFT_GENMASK_ANY, get_jiffies_64()); 581 + e = pipapo_get_slow(m, (const u8 *)key, 0, get_jiffies_64()); 581 582 582 583 return e ? &e->ext : NULL; 583 584 }
+6 -3
net/netfilter/nft_set_pipapo_avx2.c
··· 1179 1179 1180 1180 nft_pipapo_avx2_prepare(); 1181 1181 1182 - next_match: 1183 1182 nft_pipapo_for_each_field(f, i, m) { 1184 1183 bool last = i == m->field_count - 1, first = !i; 1185 1184 int ret = 0; ··· 1225 1226 1226 1227 #undef NFT_SET_PIPAPO_AVX2_LOOKUP 1227 1228 1229 + next_match: 1228 1230 if (ret < 0) { 1229 1231 scratch->map_index = map_index; 1230 1232 kernel_fpu_end(); ··· 1238 1238 1239 1239 e = f->mt[ret].e; 1240 1240 if (unlikely(__nft_set_elem_expired(&e->ext, tstamp) || 1241 - !nft_set_elem_active(&e->ext, genmask))) 1241 + !nft_set_elem_active(&e->ext, genmask))) { 1242 + ret = pipapo_refill(res, f->bsize, f->rules, 1243 + fill, f->mt, last); 1242 1244 goto next_match; 1245 + } 1243 1246 1244 1247 scratch->map_index = map_index; 1245 1248 kernel_fpu_end(); ··· 1295 1292 1296 1293 m = rcu_dereference(priv->match); 1297 1294 1298 - e = pipapo_get_avx2(m, rp, NFT_GENMASK_ANY, get_jiffies_64()); 1295 + e = pipapo_get_avx2(m, rp, 0, get_jiffies_64()); 1299 1296 local_bh_enable(); 1300 1297 1301 1298 return e ? &e->ext : NULL;
+55 -1
tools/testing/selftests/net/netfilter/nft_concat_range.sh
··· 29 29 net6_port_net6_port net_port_mac_proto_net" 30 30 31 31 # Reported bugs, also described by TYPE_ variables below 32 - BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch" 32 + BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch doublecreate" 33 33 34 34 # List of possible paths to pktgen script from kernel tree for performance tests 35 35 PKTGEN_SCRIPT_PATHS=" ··· 407 407 perf_duration 0 408 408 " 409 409 410 + 411 + TYPE_doublecreate=" 412 + display cannot create same element twice 413 + type_spec ipv4_addr . ipv4_addr 414 + chain_spec ip saddr . ip daddr 415 + dst addr4 416 + proto icmp 417 + 418 + race_repeat 0 419 + 420 + perf_duration 0 421 + " 410 422 411 423 # Set template for all tests, types and rules are filled in depending on test 412 424 set_template=' ··· 1910 1898 err "False match for $a2" 1911 1899 return 1 1912 1900 fi 1901 + } 1902 + 1903 + test_bug_doublecreate() 1904 + { 1905 + local elements="1.2.3.4 . 1.2.4.1, 1.2.4.1 . 1.2.3.4" 1906 + local ret=1 1907 + local i 1908 + 1909 + setup veth send_"${proto}" set || return ${ksft_skip} 1910 + 1911 + add "{ $elements }" || return 1 1912 + # expected to work: 'add' on existing should be no-op. 1913 + add "{ $elements }" || return 1 1914 + 1915 + # 'create' should return an error. 1916 + if nft create element inet filter test "{ $elements }" 2>/dev/null; then 1917 + err "Could create an existing element" 1918 + return 1 1919 + fi 1920 + nft -f - <<EOF 2>/dev/null 1921 + flush set inet filter test 1922 + create element inet filter test { $elements } 1923 + create element inet filter test { $elements } 1924 + EOF 1925 + ret=$? 1926 + if [ $ret -eq 0 ]; then 1927 + err "Could create element twice in one transaction" 1928 + err "$(nft -a list ruleset)" 1929 + return 1 1930 + fi 1931 + 1932 + nft -f - <<EOF 2>/dev/null 1933 + flush set inet filter test 1934 + create element inet filter test { $elements } 1935 + EOF 1936 + ret=$? 1937 + if [ $ret -ne 0 ]; then 1938 + err "Could not flush and re-create element in one transaction" 1939 + return 1 1940 + fi 1941 + 1942 + return 0 1913 1943 } 1914 1944 1915 1945 test_reported_issues() {