Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'nf-26-02-17' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf

Florian Westphal says:

====================
netfilter: updates for net

The following patchset contains Netfilter fixes for *net*:

1) Add missing __rcu annotations to NAT helper hook pointers in Amanda,
FTP, IRC, SNMP and TFTP helpers. From Sun Jian.

2-4):
- Add global spinlock to serialize nft_counter fetch+reset operations.
- Use atomic64_xchg() for nft_quota reset instead of read+subtract pattern.
Note AI review detects a race in this change but it isn't new. The
'racing' bit only exists to prevent constant stream of 'quota expired'
notifications.
- Revert commit_mutex usage in nf_tables reset path, it caused
circular lock dependency. All from Brian Witte.

5) Fix uninitialized l3num value in nf_conntrack_h323 helper.

6) Fix musl libc compatibility in netfilter_bridge.h UAPI header. This
change isn't nice (UAPI headers should not include libc headers), but
as-is musl builds may fail due to redefinition of struct ethhdr.

7) Fix protocol checksum validation in IPVS for IPv6 with extension headers,
from Julian Anastasov.

8) Fix device reference leak in IPVS when netdev goes down. Also from
Julian.

9) Remove WARN_ON_ONCE when accessing forward path array, this can
trigger with sufficiently long forward paths. From Pablo Neira Ayuso.

10) Fix use-after-free in nf_tables_addchain() error path, from Inseo An.

* tag 'nf-26-02-17' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf:
netfilter: nf_tables: fix use-after-free in nf_tables_addchain()
net: remove WARN_ON_ONCE when accessing forward path array
ipvs: do not keep dest_dst if dev is going down
ipvs: skip ipv6 extension headers for csum checks
include: uapi: netfilter_bridge.h: Cover for musl libc
netfilter: nf_conntrack_h323: don't pass uninitialised l3num value
netfilter: nf_tables: revert commit_mutex usage in reset path
netfilter: nft_quota: use atomic64_xchg for reset
netfilter: nft_counter: serialize reset with spinlock
netfilter: annotate NAT helper hook pointers with __rcu
====================

Link: https://patch.msgid.link/20260217163233.31455-1-fw@strlen.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+166 -303
+1 -1
include/linux/netfilter/nf_conntrack_amanda.h
··· 7 7 #include <linux/skbuff.h> 8 8 #include <net/netfilter/nf_conntrack_expect.h> 9 9 10 - extern unsigned int (*nf_nat_amanda_hook)(struct sk_buff *skb, 10 + extern unsigned int (__rcu *nf_nat_amanda_hook)(struct sk_buff *skb, 11 11 enum ip_conntrack_info ctinfo, 12 12 unsigned int protoff, 13 13 unsigned int matchoff,
+1 -1
include/linux/netfilter/nf_conntrack_ftp.h
··· 26 26 27 27 /* For NAT to hook in when we find a packet which describes what other 28 28 * connection we should expect. */ 29 - extern unsigned int (*nf_nat_ftp_hook)(struct sk_buff *skb, 29 + extern unsigned int (__rcu *nf_nat_ftp_hook)(struct sk_buff *skb, 30 30 enum ip_conntrack_info ctinfo, 31 31 enum nf_ct_ftp_type type, 32 32 unsigned int protoff,
+1 -1
include/linux/netfilter/nf_conntrack_irc.h
··· 8 8 9 9 #define IRC_PORT 6667 10 10 11 - extern unsigned int (*nf_nat_irc_hook)(struct sk_buff *skb, 11 + extern unsigned int (__rcu *nf_nat_irc_hook)(struct sk_buff *skb, 12 12 enum ip_conntrack_info ctinfo, 13 13 unsigned int protoff, 14 14 unsigned int matchoff,
+1 -1
include/linux/netfilter/nf_conntrack_snmp.h
··· 5 5 #include <linux/netfilter.h> 6 6 #include <linux/skbuff.h> 7 7 8 - extern int (*nf_nat_snmp_hook)(struct sk_buff *skb, 8 + extern int (__rcu *nf_nat_snmp_hook)(struct sk_buff *skb, 9 9 unsigned int protoff, 10 10 struct nf_conn *ct, 11 11 enum ip_conntrack_info ctinfo);
+1 -1
include/linux/netfilter/nf_conntrack_tftp.h
··· 19 19 #define TFTP_OPCODE_ACK 4 20 20 #define TFTP_OPCODE_ERROR 5 21 21 22 - extern unsigned int (*nf_nat_tftp_hook)(struct sk_buff *skb, 22 + extern unsigned int (__rcu *nf_nat_tftp_hook)(struct sk_buff *skb, 23 23 enum ip_conntrack_info ctinfo, 24 24 struct nf_conntrack_expect *exp); 25 25
+4
include/uapi/linux/netfilter_bridge.h
··· 5 5 /* bridge-specific defines for netfilter. 6 6 */ 7 7 8 + #ifndef __KERNEL__ 9 + #include <netinet/if_ether.h> /* for __UAPI_DEF_ETHHDR if defined */ 10 + #endif 11 + 8 12 #include <linux/in.h> 9 13 #include <linux/netfilter.h> 10 14 #include <linux/if_ether.h>
+1 -1
net/core/dev.c
··· 744 744 { 745 745 int k = stack->num_paths++; 746 746 747 - if (WARN_ON_ONCE(k >= NET_DEVICE_PATH_STACK_MAX)) 747 + if (k >= NET_DEVICE_PATH_STACK_MAX) 748 748 return NULL; 749 749 750 750 return &stack->path[k];
+6 -12
net/netfilter/ipvs/ip_vs_proto_sctp.c
··· 10 10 #include <net/ip_vs.h> 11 11 12 12 static int 13 - sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp); 13 + sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 14 + unsigned int sctphoff); 14 15 15 16 static int 16 17 sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, ··· 109 108 int ret; 110 109 111 110 /* Some checks before mangling */ 112 - if (!sctp_csum_check(cp->af, skb, pp)) 111 + if (!sctp_csum_check(cp->af, skb, pp, sctphoff)) 113 112 return 0; 114 113 115 114 /* Call application helper if needed */ ··· 157 156 int ret; 158 157 159 158 /* Some checks before mangling */ 160 - if (!sctp_csum_check(cp->af, skb, pp)) 159 + if (!sctp_csum_check(cp->af, skb, pp, sctphoff)) 161 160 return 0; 162 161 163 162 /* Call application helper if needed */ ··· 186 185 } 187 186 188 187 static int 189 - sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) 188 + sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 189 + unsigned int sctphoff) 190 190 { 191 - unsigned int sctphoff; 192 191 struct sctphdr *sh; 193 192 __le32 cmp, val; 194 - 195 - #ifdef CONFIG_IP_VS_IPV6 196 - if (af == AF_INET6) 197 - sctphoff = sizeof(struct ipv6hdr); 198 - else 199 - #endif 200 - sctphoff = ip_hdrlen(skb); 201 193 202 194 sh = (struct sctphdr *)(skb->data + sctphoff); 203 195 cmp = sh->checksum;
+7 -14
net/netfilter/ipvs/ip_vs_proto_tcp.c
··· 28 28 #include <net/ip_vs.h> 29 29 30 30 static int 31 - tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp); 31 + tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 32 + unsigned int tcphoff); 32 33 33 34 static int 34 35 tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, ··· 166 165 int ret; 167 166 168 167 /* Some checks before mangling */ 169 - if (!tcp_csum_check(cp->af, skb, pp)) 168 + if (!tcp_csum_check(cp->af, skb, pp, tcphoff)) 170 169 return 0; 171 170 172 171 /* Call application helper if needed */ ··· 244 243 int ret; 245 244 246 245 /* Some checks before mangling */ 247 - if (!tcp_csum_check(cp->af, skb, pp)) 246 + if (!tcp_csum_check(cp->af, skb, pp, tcphoff)) 248 247 return 0; 249 248 250 249 /* ··· 301 300 302 301 303 302 static int 304 - tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) 303 + tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 304 + unsigned int tcphoff) 305 305 { 306 - unsigned int tcphoff; 307 - 308 - #ifdef CONFIG_IP_VS_IPV6 309 - if (af == AF_INET6) 310 - tcphoff = sizeof(struct ipv6hdr); 311 - else 312 - #endif 313 - tcphoff = ip_hdrlen(skb); 314 - 315 306 switch (skb->ip_summed) { 316 307 case CHECKSUM_NONE: 317 308 skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); ··· 314 321 if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, 315 322 &ipv6_hdr(skb)->daddr, 316 323 skb->len - tcphoff, 317 - ipv6_hdr(skb)->nexthdr, 324 + IPPROTO_TCP, 318 325 skb->csum)) { 319 326 IP_VS_DBG_RL_PKT(0, af, pp, skb, 0, 320 327 "Failed checksum for");
+7 -13
net/netfilter/ipvs/ip_vs_proto_udp.c
··· 24 24 #include <net/ip6_checksum.h> 25 25 26 26 static int 27 - udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp); 27 + udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 28 + unsigned int udphoff); 28 29 29 30 static int 30 31 udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, ··· 155 154 int ret; 156 155 157 156 /* Some checks before mangling */ 158 - if (!udp_csum_check(cp->af, skb, pp)) 157 + if (!udp_csum_check(cp->af, skb, pp, udphoff)) 159 158 return 0; 160 159 161 160 /* ··· 238 237 int ret; 239 238 240 239 /* Some checks before mangling */ 241 - if (!udp_csum_check(cp->af, skb, pp)) 240 + if (!udp_csum_check(cp->af, skb, pp, udphoff)) 242 241 return 0; 243 242 244 243 /* ··· 297 296 298 297 299 298 static int 300 - udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) 299 + udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 300 + unsigned int udphoff) 301 301 { 302 302 struct udphdr _udph, *uh; 303 - unsigned int udphoff; 304 - 305 - #ifdef CONFIG_IP_VS_IPV6 306 - if (af == AF_INET6) 307 - udphoff = sizeof(struct ipv6hdr); 308 - else 309 - #endif 310 - udphoff = ip_hdrlen(skb); 311 303 312 304 uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph); 313 305 if (uh == NULL) ··· 318 324 if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, 319 325 &ipv6_hdr(skb)->daddr, 320 326 skb->len - udphoff, 321 - ipv6_hdr(skb)->nexthdr, 327 + IPPROTO_UDP, 322 328 skb->csum)) { 323 329 IP_VS_DBG_RL_PKT(0, af, pp, skb, 0, 324 330 "Failed checksum for");
+36 -10
net/netfilter/ipvs/ip_vs_xmit.c
··· 294 294 return true; 295 295 } 296 296 297 + /* rt has device that is down */ 298 + static bool rt_dev_is_down(const struct net_device *dev) 299 + { 300 + return dev && !netif_running(dev); 301 + } 302 + 297 303 /* Get route to destination or remote server */ 298 304 static int 299 305 __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, ··· 315 309 316 310 if (dest) { 317 311 dest_dst = __ip_vs_dst_check(dest); 318 - if (likely(dest_dst)) 312 + if (likely(dest_dst)) { 319 313 rt = dst_rtable(dest_dst->dst_cache); 320 - else { 314 + if (ret_saddr) 315 + *ret_saddr = dest_dst->dst_saddr.ip; 316 + } else { 321 317 dest_dst = ip_vs_dest_dst_alloc(); 322 318 spin_lock_bh(&dest->dst_lock); 323 319 if (!dest_dst) { ··· 335 327 ip_vs_dest_dst_free(dest_dst); 336 328 goto err_unreach; 337 329 } 338 - __ip_vs_dst_set(dest, dest_dst, &rt->dst, 0); 330 + /* It is forbidden to attach dest->dest_dst if 331 + * device is going down. 332 + */ 333 + if (!rt_dev_is_down(dst_dev_rcu(&rt->dst))) 334 + __ip_vs_dst_set(dest, dest_dst, &rt->dst, 0); 335 + else 336 + noref = 0; 339 337 spin_unlock_bh(&dest->dst_lock); 340 338 IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n", 341 339 &dest->addr.ip, &dest_dst->dst_saddr.ip, 342 340 rcuref_read(&rt->dst.__rcuref)); 341 + if (ret_saddr) 342 + *ret_saddr = dest_dst->dst_saddr.ip; 343 + if (!noref) 344 + ip_vs_dest_dst_free(dest_dst); 343 345 } 344 - if (ret_saddr) 345 - *ret_saddr = dest_dst->dst_saddr.ip; 346 346 } else { 347 347 noref = 0; 348 348 ··· 487 471 488 472 if (dest) { 489 473 dest_dst = __ip_vs_dst_check(dest); 490 - if (likely(dest_dst)) 474 + if (likely(dest_dst)) { 491 475 rt = dst_rt6_info(dest_dst->dst_cache); 492 - else { 476 + if (ret_saddr) 477 + *ret_saddr = dest_dst->dst_saddr.in6; 478 + } else { 493 479 u32 cookie; 494 480 495 481 dest_dst = ip_vs_dest_dst_alloc(); ··· 512 494 } 513 495 rt = dst_rt6_info(dst); 514 496 cookie = rt6_get_cookie(rt); 515 - __ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie); 497 + /* It is forbidden to attach dest->dest_dst if 498 + * device is going down. 499 + */ 500 + if (!rt_dev_is_down(dst_dev_rcu(&rt->dst))) 501 + __ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie); 502 + else 503 + noref = 0; 516 504 spin_unlock_bh(&dest->dst_lock); 517 505 IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n", 518 506 &dest->addr.in6, &dest_dst->dst_saddr.in6, 519 507 rcuref_read(&rt->dst.__rcuref)); 508 + if (ret_saddr) 509 + *ret_saddr = dest_dst->dst_saddr.in6; 510 + if (!noref) 511 + ip_vs_dest_dst_free(dest_dst); 520 512 } 521 - if (ret_saddr) 522 - *ret_saddr = dest_dst->dst_saddr.in6; 523 513 } else { 524 514 noref = 0; 525 515 dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm,
+7 -7
net/netfilter/nf_conntrack_amanda.c
··· 37 37 module_param(ts_algo, charp, 0400); 38 38 MODULE_PARM_DESC(ts_algo, "textsearch algorithm to use (default kmp)"); 39 39 40 - unsigned int (*nf_nat_amanda_hook)(struct sk_buff *skb, 41 - enum ip_conntrack_info ctinfo, 42 - unsigned int protoff, 43 - unsigned int matchoff, 44 - unsigned int matchlen, 45 - struct nf_conntrack_expect *exp) 46 - __read_mostly; 40 + unsigned int (__rcu *nf_nat_amanda_hook)(struct sk_buff *skb, 41 + enum ip_conntrack_info ctinfo, 42 + unsigned int protoff, 43 + unsigned int matchoff, 44 + unsigned int matchlen, 45 + struct nf_conntrack_expect *exp) 46 + __read_mostly; 47 47 EXPORT_SYMBOL_GPL(nf_nat_amanda_hook); 48 48 49 49 enum amanda_strings {
+7 -7
net/netfilter/nf_conntrack_ftp.c
··· 43 43 static bool loose; 44 44 module_param(loose, bool, 0600); 45 45 46 - unsigned int (*nf_nat_ftp_hook)(struct sk_buff *skb, 47 - enum ip_conntrack_info ctinfo, 48 - enum nf_ct_ftp_type type, 49 - unsigned int protoff, 50 - unsigned int matchoff, 51 - unsigned int matchlen, 52 - struct nf_conntrack_expect *exp); 46 + unsigned int (__rcu *nf_nat_ftp_hook)(struct sk_buff *skb, 47 + enum ip_conntrack_info ctinfo, 48 + enum nf_ct_ftp_type type, 49 + unsigned int protoff, 50 + unsigned int matchoff, 51 + unsigned int matchlen, 52 + struct nf_conntrack_expect *exp); 53 53 EXPORT_SYMBOL_GPL(nf_nat_ftp_hook); 54 54 55 55 static int try_rfc959(const char *, size_t, struct nf_conntrack_man *,
+5 -5
net/netfilter/nf_conntrack_h323_main.c
··· 1187 1187 { 1188 1188 struct net *net = nf_ct_net(ct); 1189 1189 struct nf_conntrack_expect *exp; 1190 - struct nf_conntrack_tuple tuple; 1190 + struct nf_conntrack_tuple tuple = { 1191 + .src.l3num = nf_ct_l3num(ct), 1192 + .dst.protonum = IPPROTO_TCP, 1193 + .dst.u.tcp.port = port, 1194 + }; 1191 1195 1192 - memset(&tuple.src.u3, 0, sizeof(tuple.src.u3)); 1193 - tuple.src.u.tcp.port = 0; 1194 1196 memcpy(&tuple.dst.u3, addr, sizeof(tuple.dst.u3)); 1195 - tuple.dst.u.tcp.port = port; 1196 - tuple.dst.protonum = IPPROTO_TCP; 1197 1197 1198 1198 exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple); 1199 1199 if (exp && exp->master == ct)
+7 -6
net/netfilter/nf_conntrack_irc.c
··· 30 30 static char *irc_buffer; 31 31 static DEFINE_SPINLOCK(irc_buffer_lock); 32 32 33 - unsigned int (*nf_nat_irc_hook)(struct sk_buff *skb, 34 - enum ip_conntrack_info ctinfo, 35 - unsigned int protoff, 36 - unsigned int matchoff, 37 - unsigned int matchlen, 38 - struct nf_conntrack_expect *exp) __read_mostly; 33 + unsigned int (__rcu *nf_nat_irc_hook)(struct sk_buff *skb, 34 + enum ip_conntrack_info ctinfo, 35 + unsigned int protoff, 36 + unsigned int matchoff, 37 + unsigned int matchlen, 38 + struct nf_conntrack_expect *exp) 39 + __read_mostly; 39 40 EXPORT_SYMBOL_GPL(nf_nat_irc_hook); 40 41 41 42 #define HELPER_NAME "irc"
+4 -4
net/netfilter/nf_conntrack_snmp.c
··· 25 25 module_param(timeout, uint, 0400); 26 26 MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds"); 27 27 28 - int (*nf_nat_snmp_hook)(struct sk_buff *skb, 29 - unsigned int protoff, 30 - struct nf_conn *ct, 31 - enum ip_conntrack_info ctinfo); 28 + int (__rcu *nf_nat_snmp_hook)(struct sk_buff *skb, 29 + unsigned int protoff, 30 + struct nf_conn *ct, 31 + enum ip_conntrack_info ctinfo); 32 32 EXPORT_SYMBOL_GPL(nf_nat_snmp_hook); 33 33 34 34 static int snmp_conntrack_help(struct sk_buff *skb, unsigned int protoff,
+4 -3
net/netfilter/nf_conntrack_tftp.c
··· 32 32 module_param_array(ports, ushort, &ports_c, 0400); 33 33 MODULE_PARM_DESC(ports, "Port numbers of TFTP servers"); 34 34 35 - unsigned int (*nf_nat_tftp_hook)(struct sk_buff *skb, 36 - enum ip_conntrack_info ctinfo, 37 - struct nf_conntrack_expect *exp) __read_mostly; 35 + unsigned int (__rcu *nf_nat_tftp_hook)(struct sk_buff *skb, 36 + enum ip_conntrack_info ctinfo, 37 + struct nf_conntrack_expect *exp) 38 + __read_mostly; 38 39 EXPORT_SYMBOL_GPL(nf_nat_tftp_hook); 39 40 40 41 static int tftp_help(struct sk_buff *skb,
+43 -206
net/netfilter/nf_tables_api.c
··· 2823 2823 2824 2824 err_register_hook: 2825 2825 nft_chain_del(chain); 2826 + synchronize_rcu(); 2826 2827 err_chain_add: 2827 2828 nft_trans_destroy(trans); 2828 2829 err_trans: ··· 3902 3901 return skb->len; 3903 3902 } 3904 3903 3905 - static int nf_tables_dumpreset_rules(struct sk_buff *skb, 3906 - struct netlink_callback *cb) 3907 - { 3908 - struct nftables_pernet *nft_net = nft_pernet(sock_net(skb->sk)); 3909 - int ret; 3910 - 3911 - /* Mutex is held is to prevent that two concurrent dump-and-reset calls 3912 - * do not underrun counters and quotas. The commit_mutex is used for 3913 - * the lack a better lock, this is not transaction path. 3914 - */ 3915 - mutex_lock(&nft_net->commit_mutex); 3916 - ret = nf_tables_dump_rules(skb, cb); 3917 - mutex_unlock(&nft_net->commit_mutex); 3918 - 3919 - return ret; 3920 - } 3921 - 3922 3904 static int nf_tables_dump_rules_start(struct netlink_callback *cb) 3923 3905 { 3924 3906 struct nft_rule_dump_ctx *ctx = (void *)cb->ctx; ··· 3921 3937 return -ENOMEM; 3922 3938 } 3923 3939 } 3940 + if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETRULE_RESET) 3941 + ctx->reset = true; 3942 + 3924 3943 return 0; 3925 - } 3926 - 3927 - static int nf_tables_dumpreset_rules_start(struct netlink_callback *cb) 3928 - { 3929 - struct nft_rule_dump_ctx *ctx = (void *)cb->ctx; 3930 - 3931 - ctx->reset = true; 3932 - 3933 - return nf_tables_dump_rules_start(cb); 3934 3944 } 3935 3945 3936 3946 static int nf_tables_dump_rules_done(struct netlink_callback *cb) ··· 3990 4012 u32 portid = NETLINK_CB(skb).portid; 3991 4013 struct net *net = info->net; 3992 4014 struct sk_buff *skb2; 4015 + bool reset = false; 4016 + char *buf; 3993 4017 3994 4018 if (info->nlh->nlmsg_flags & NLM_F_DUMP) { 3995 4019 struct netlink_dump_control c = { ··· 4005 4025 return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); 4006 4026 } 4007 4027 4008 - skb2 = nf_tables_getrule_single(portid, info, nla, false); 4028 + if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETRULE_RESET) 4029 + reset = true; 4030 + 4031 + skb2 = nf_tables_getrule_single(portid, info, nla, reset); 4009 4032 if (IS_ERR(skb2)) 4010 4033 return PTR_ERR(skb2); 4011 4034 4012 - return nfnetlink_unicast(skb2, net, portid); 4013 - } 4014 - 4015 - static int nf_tables_getrule_reset(struct sk_buff *skb, 4016 - const struct nfnl_info *info, 4017 - const struct nlattr * const nla[]) 4018 - { 4019 - struct nftables_pernet *nft_net = nft_pernet(info->net); 4020 - u32 portid = NETLINK_CB(skb).portid; 4021 - struct net *net = info->net; 4022 - struct sk_buff *skb2; 4023 - char *buf; 4024 - 4025 - if (info->nlh->nlmsg_flags & NLM_F_DUMP) { 4026 - struct netlink_dump_control c = { 4027 - .start= nf_tables_dumpreset_rules_start, 4028 - .dump = nf_tables_dumpreset_rules, 4029 - .done = nf_tables_dump_rules_done, 4030 - .module = THIS_MODULE, 4031 - .data = (void *)nla, 4032 - }; 4033 - 4034 - return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); 4035 - } 4036 - 4037 - if (!try_module_get(THIS_MODULE)) 4038 - return -EINVAL; 4039 - rcu_read_unlock(); 4040 - mutex_lock(&nft_net->commit_mutex); 4041 - skb2 = nf_tables_getrule_single(portid, info, nla, true); 4042 - mutex_unlock(&nft_net->commit_mutex); 4043 - rcu_read_lock(); 4044 - module_put(THIS_MODULE); 4045 - 4046 - if (IS_ERR(skb2)) 4047 - return PTR_ERR(skb2); 4035 + if (!reset) 4036 + return nfnetlink_unicast(skb2, net, portid); 4048 4037 4049 4038 buf = kasprintf(GFP_ATOMIC, "%.*s:%u", 4050 4039 nla_len(nla[NFTA_RULE_TABLE]), ··· 6273 6324 nla_nest_end(skb, nest); 6274 6325 nlmsg_end(skb, nlh); 6275 6326 6327 + if (dump_ctx->reset && args.iter.count > args.iter.skip) 6328 + audit_log_nft_set_reset(table, cb->seq, 6329 + args.iter.count - args.iter.skip); 6330 + 6276 6331 rcu_read_unlock(); 6277 6332 6278 6333 if (args.iter.err && args.iter.err != -EMSGSIZE) ··· 6290 6337 nla_put_failure: 6291 6338 rcu_read_unlock(); 6292 6339 return -ENOSPC; 6293 - } 6294 - 6295 - static int nf_tables_dumpreset_set(struct sk_buff *skb, 6296 - struct netlink_callback *cb) 6297 - { 6298 - struct nftables_pernet *nft_net = nft_pernet(sock_net(skb->sk)); 6299 - struct nft_set_dump_ctx *dump_ctx = cb->data; 6300 - int ret, skip = cb->args[0]; 6301 - 6302 - mutex_lock(&nft_net->commit_mutex); 6303 - 6304 - ret = nf_tables_dump_set(skb, cb); 6305 - 6306 - if (cb->args[0] > skip) 6307 - audit_log_nft_set_reset(dump_ctx->ctx.table, cb->seq, 6308 - cb->args[0] - skip); 6309 - 6310 - mutex_unlock(&nft_net->commit_mutex); 6311 - 6312 - return ret; 6313 6340 } 6314 6341 6315 6342 static int nf_tables_dump_set_start(struct netlink_callback *cb) ··· 6535 6602 { 6536 6603 struct netlink_ext_ack *extack = info->extack; 6537 6604 struct nft_set_dump_ctx dump_ctx; 6605 + int rem, err = 0, nelems = 0; 6606 + struct net *net = info->net; 6538 6607 struct nlattr *attr; 6539 - int rem, err = 0; 6608 + bool reset = false; 6609 + 6610 + if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETSETELEM_RESET) 6611 + reset = true; 6540 6612 6541 6613 if (info->nlh->nlmsg_flags & NLM_F_DUMP) { 6542 6614 struct netlink_dump_control c = { ··· 6551 6613 .module = THIS_MODULE, 6552 6614 }; 6553 6615 6554 - err = nft_set_dump_ctx_init(&dump_ctx, skb, info, nla, false); 6616 + err = nft_set_dump_ctx_init(&dump_ctx, skb, info, nla, reset); 6555 6617 if (err) 6556 6618 return err; 6557 6619 ··· 6562 6624 if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS]) 6563 6625 return -EINVAL; 6564 6626 6565 - err = nft_set_dump_ctx_init(&dump_ctx, skb, info, nla, false); 6627 + err = nft_set_dump_ctx_init(&dump_ctx, skb, info, nla, reset); 6566 6628 if (err) 6567 6629 return err; 6568 6630 6569 6631 nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { 6570 - err = nft_get_set_elem(&dump_ctx.ctx, dump_ctx.set, attr, false); 6571 - if (err < 0) { 6572 - NL_SET_BAD_ATTR(extack, attr); 6573 - break; 6574 - } 6575 - } 6576 - 6577 - return err; 6578 - } 6579 - 6580 - static int nf_tables_getsetelem_reset(struct sk_buff *skb, 6581 - const struct nfnl_info *info, 6582 - const struct nlattr * const nla[]) 6583 - { 6584 - struct nftables_pernet *nft_net = nft_pernet(info->net); 6585 - struct netlink_ext_ack *extack = info->extack; 6586 - struct nft_set_dump_ctx dump_ctx; 6587 - int rem, err = 0, nelems = 0; 6588 - struct nlattr *attr; 6589 - 6590 - if (info->nlh->nlmsg_flags & NLM_F_DUMP) { 6591 - struct netlink_dump_control c = { 6592 - .start = nf_tables_dump_set_start, 6593 - .dump = nf_tables_dumpreset_set, 6594 - .done = nf_tables_dump_set_done, 6595 - .module = THIS_MODULE, 6596 - }; 6597 - 6598 - err = nft_set_dump_ctx_init(&dump_ctx, skb, info, nla, true); 6599 - if (err) 6600 - return err; 6601 - 6602 - c.data = &dump_ctx; 6603 - return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); 6604 - } 6605 - 6606 - if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS]) 6607 - return -EINVAL; 6608 - 6609 - if (!try_module_get(THIS_MODULE)) 6610 - return -EINVAL; 6611 - rcu_read_unlock(); 6612 - mutex_lock(&nft_net->commit_mutex); 6613 - rcu_read_lock(); 6614 - 6615 - err = nft_set_dump_ctx_init(&dump_ctx, skb, info, nla, true); 6616 - if (err) 6617 - goto out_unlock; 6618 - 6619 - nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { 6620 - err = nft_get_set_elem(&dump_ctx.ctx, dump_ctx.set, attr, true); 6632 + err = nft_get_set_elem(&dump_ctx.ctx, dump_ctx.set, attr, reset); 6621 6633 if (err < 0) { 6622 6634 NL_SET_BAD_ATTR(extack, attr); 6623 6635 break; 6624 6636 } 6625 6637 nelems++; 6626 6638 } 6627 - audit_log_nft_set_reset(dump_ctx.ctx.table, nft_base_seq(info->net), nelems); 6628 - 6629 - out_unlock: 6630 - rcu_read_unlock(); 6631 - mutex_unlock(&nft_net->commit_mutex); 6632 - rcu_read_lock(); 6633 - module_put(THIS_MODULE); 6639 + if (reset) 6640 + audit_log_nft_set_reset(dump_ctx.ctx.table, nft_base_seq(net), 6641 + nelems); 6634 6642 6635 6643 return err; 6636 6644 } ··· 8448 8564 return skb->len; 8449 8565 } 8450 8566 8451 - static int nf_tables_dumpreset_obj(struct sk_buff *skb, 8452 - struct netlink_callback *cb) 8453 - { 8454 - struct nftables_pernet *nft_net = nft_pernet(sock_net(skb->sk)); 8455 - int ret; 8456 - 8457 - mutex_lock(&nft_net->commit_mutex); 8458 - ret = nf_tables_dump_obj(skb, cb); 8459 - mutex_unlock(&nft_net->commit_mutex); 8460 - 8461 - return ret; 8462 - } 8463 - 8464 8567 static int nf_tables_dump_obj_start(struct netlink_callback *cb) 8465 8568 { 8466 8569 struct nft_obj_dump_ctx *ctx = (void *)cb->ctx; ··· 8464 8593 if (nla[NFTA_OBJ_TYPE]) 8465 8594 ctx->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); 8466 8595 8596 + if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET) 8597 + ctx->reset = true; 8598 + 8467 8599 return 0; 8468 - } 8469 - 8470 - static int nf_tables_dumpreset_obj_start(struct netlink_callback *cb) 8471 - { 8472 - struct nft_obj_dump_ctx *ctx = (void *)cb->ctx; 8473 - 8474 - ctx->reset = true; 8475 - 8476 - return nf_tables_dump_obj_start(cb); 8477 8600 } 8478 8601 8479 8602 static int nf_tables_dump_obj_done(struct netlink_callback *cb) ··· 8530 8665 const struct nlattr * const nla[]) 8531 8666 { 8532 8667 u32 portid = NETLINK_CB(skb).portid; 8668 + struct net *net = info->net; 8533 8669 struct sk_buff *skb2; 8670 + bool reset = false; 8671 + char *buf; 8534 8672 8535 8673 if (info->nlh->nlmsg_flags & NLM_F_DUMP) { 8536 8674 struct netlink_dump_control c = { ··· 8547 8679 return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); 8548 8680 } 8549 8681 8550 - skb2 = nf_tables_getobj_single(portid, info, nla, false); 8682 + if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET) 8683 + reset = true; 8684 + 8685 + skb2 = nf_tables_getobj_single(portid, info, nla, reset); 8551 8686 if (IS_ERR(skb2)) 8552 8687 return PTR_ERR(skb2); 8553 8688 8554 - return nfnetlink_unicast(skb2, info->net, portid); 8555 - } 8556 - 8557 - static int nf_tables_getobj_reset(struct sk_buff *skb, 8558 - const struct nfnl_info *info, 8559 - const struct nlattr * const nla[]) 8560 - { 8561 - struct nftables_pernet *nft_net = nft_pernet(info->net); 8562 - u32 portid = NETLINK_CB(skb).portid; 8563 - struct net *net = info->net; 8564 - struct sk_buff *skb2; 8565 - char *buf; 8566 - 8567 - if (info->nlh->nlmsg_flags & NLM_F_DUMP) { 8568 - struct netlink_dump_control c = { 8569 - .start = nf_tables_dumpreset_obj_start, 8570 - .dump = nf_tables_dumpreset_obj, 8571 - .done = nf_tables_dump_obj_done, 8572 - .module = THIS_MODULE, 8573 - .data = (void *)nla, 8574 - }; 8575 - 8576 - return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); 8577 - } 8578 - 8579 - if (!try_module_get(THIS_MODULE)) 8580 - return -EINVAL; 8581 - rcu_read_unlock(); 8582 - mutex_lock(&nft_net->commit_mutex); 8583 - skb2 = nf_tables_getobj_single(portid, info, nla, true); 8584 - mutex_unlock(&nft_net->commit_mutex); 8585 - rcu_read_lock(); 8586 - module_put(THIS_MODULE); 8587 - 8588 - if (IS_ERR(skb2)) 8589 - return PTR_ERR(skb2); 8689 + if (!reset) 8690 + return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid); 8590 8691 8591 8692 buf = kasprintf(GFP_ATOMIC, "%.*s:%u", 8592 8693 nla_len(nla[NFTA_OBJ_TABLE]), ··· 9874 10037 .policy = nft_rule_policy, 9875 10038 }, 9876 10039 [NFT_MSG_GETRULE_RESET] = { 9877 - .call = nf_tables_getrule_reset, 10040 + .call = nf_tables_getrule, 9878 10041 .type = NFNL_CB_RCU, 9879 10042 .attr_count = NFTA_RULE_MAX, 9880 10043 .policy = nft_rule_policy, ··· 9928 10091 .policy = nft_set_elem_list_policy, 9929 10092 }, 9930 10093 [NFT_MSG_GETSETELEM_RESET] = { 9931 - .call = nf_tables_getsetelem_reset, 10094 + .call = nf_tables_getsetelem, 9932 10095 .type = NFNL_CB_RCU, 9933 10096 .attr_count = NFTA_SET_ELEM_LIST_MAX, 9934 10097 .policy = nft_set_elem_list_policy, ··· 9974 10137 .policy = nft_obj_policy, 9975 10138 }, 9976 10139 [NFT_MSG_GETOBJ_RESET] = { 9977 - .call = nf_tables_getobj_reset, 10140 + .call = nf_tables_getobj, 9978 10141 .type = NFNL_CB_RCU, 9979 10142 .attr_count = NFTA_OBJ_MAX, 9980 10143 .policy = nft_obj_policy,
+16 -4
net/netfilter/nft_counter.c
··· 32 32 33 33 static DEFINE_PER_CPU(struct u64_stats_sync, nft_counter_sync); 34 34 35 + /* control plane only: sync fetch+reset */ 36 + static DEFINE_SPINLOCK(nft_counter_lock); 37 + 35 38 static inline void nft_counter_do_eval(struct nft_counter_percpu_priv *priv, 36 39 struct nft_regs *regs, 37 40 const struct nft_pktinfo *pkt) ··· 151 148 } 152 149 } 153 150 151 + static void nft_counter_fetch_and_reset(struct nft_counter_percpu_priv *priv, 152 + struct nft_counter_tot *total) 153 + { 154 + spin_lock(&nft_counter_lock); 155 + nft_counter_fetch(priv, total); 156 + nft_counter_reset(priv, total); 157 + spin_unlock(&nft_counter_lock); 158 + } 159 + 154 160 static int nft_counter_do_dump(struct sk_buff *skb, 155 161 struct nft_counter_percpu_priv *priv, 156 162 bool reset) 157 163 { 158 164 struct nft_counter_tot total; 159 165 160 - nft_counter_fetch(priv, &total); 166 + if (unlikely(reset)) 167 + nft_counter_fetch_and_reset(priv, &total); 168 + else 169 + nft_counter_fetch(priv, &total); 161 170 162 171 if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes), 163 172 NFTA_COUNTER_PAD) || 164 173 nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.packets), 165 174 NFTA_COUNTER_PAD)) 166 175 goto nla_put_failure; 167 - 168 - if (reset) 169 - nft_counter_reset(priv, &total); 170 176 171 177 return 0; 172 178
+7 -6
net/netfilter/nft_quota.c
··· 140 140 u64 consumed, consumed_cap, quota; 141 141 u32 flags = priv->flags; 142 142 143 - /* Since we inconditionally increment consumed quota for each packet 143 + /* Since we unconditionally increment consumed quota for each packet 144 144 * that we see, don't go over the quota boundary in what we send to 145 145 * userspace. 146 146 */ 147 - consumed = atomic64_read(priv->consumed); 147 + if (reset) { 148 + consumed = atomic64_xchg(priv->consumed, 0); 149 + clear_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags); 150 + } else { 151 + consumed = atomic64_read(priv->consumed); 152 + } 148 153 quota = atomic64_read(&priv->quota); 149 154 if (consumed >= quota) { 150 155 consumed_cap = quota; ··· 165 160 nla_put_be32(skb, NFTA_QUOTA_FLAGS, htonl(flags))) 166 161 goto nla_put_failure; 167 162 168 - if (reset) { 169 - atomic64_sub(consumed, priv->consumed); 170 - clear_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags); 171 - } 172 163 return 0; 173 164 174 165 nla_put_failure: