Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

netlink: Fix wraparounds of sk->sk_rmem_alloc.

Netlink has this pattern in some places

if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
atomic_add(skb->truesize, &sk->sk_rmem_alloc);

, which has the same problem fixed by commit 5a465a0da13e ("udp:
Fix multiple wraparounds of sk->sk_rmem_alloc.").

For example, if we set INT_MAX to SO_RCVBUFFORCE, the condition
is always false as the two operands are of int.

Then, a single socket can eat as many skb as possible until OOM
happens, and we can see multiple wraparounds of sk->sk_rmem_alloc.

Let's fix it by using atomic_add_return() and comparing the two
variables as unsigned int.

Before:
[root@fedora ~]# ss -f netlink
Recv-Q Send-Q Local Address:Port Peer Address:Port
-1668710080 0 rtnl:nl_wraparound/293 *

After:
[root@fedora ~]# ss -f netlink
Recv-Q Send-Q Local Address:Port Peer Address:Port
2147483072 0 rtnl:nl_wraparound/290 *
^
`--- INT_MAX - 576

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: Jason Baron <jbaron@akamai.com>
Closes: https://lore.kernel.org/netdev/cover.1750285100.git.jbaron@akamai.com/
Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20250704054824.1580222-1-kuniyu@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Kuniyuki Iwashima and committed by
Jakub Kicinski
ae8f160e 4e2bba30

+53 -36
+53 -36
net/netlink/af_netlink.c
··· 387 387 WARN_ON(skb->sk != NULL); 388 388 skb->sk = sk; 389 389 skb->destructor = netlink_skb_destructor; 390 - atomic_add(skb->truesize, &sk->sk_rmem_alloc); 391 390 sk_mem_charge(sk, skb->truesize); 392 391 } 393 392 ··· 1211 1212 int netlink_attachskb(struct sock *sk, struct sk_buff *skb, 1212 1213 long *timeo, struct sock *ssk) 1213 1214 { 1215 + DECLARE_WAITQUEUE(wait, current); 1214 1216 struct netlink_sock *nlk; 1217 + unsigned int rmem; 1215 1218 1216 1219 nlk = nlk_sk(sk); 1220 + rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc); 1217 1221 1218 - if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 1219 - test_bit(NETLINK_S_CONGESTED, &nlk->state))) { 1220 - DECLARE_WAITQUEUE(wait, current); 1221 - if (!*timeo) { 1222 - if (!ssk || netlink_is_kernel(ssk)) 1223 - netlink_overrun(sk); 1224 - sock_put(sk); 1225 - kfree_skb(skb); 1226 - return -EAGAIN; 1227 - } 1228 - 1229 - __set_current_state(TASK_INTERRUPTIBLE); 1230 - add_wait_queue(&nlk->wait, &wait); 1231 - 1232 - if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 1233 - test_bit(NETLINK_S_CONGESTED, &nlk->state)) && 1234 - !sock_flag(sk, SOCK_DEAD)) 1235 - *timeo = schedule_timeout(*timeo); 1236 - 1237 - __set_current_state(TASK_RUNNING); 1238 - remove_wait_queue(&nlk->wait, &wait); 1239 - sock_put(sk); 1240 - 1241 - if (signal_pending(current)) { 1242 - kfree_skb(skb); 1243 - return sock_intr_errno(*timeo); 1244 - } 1245 - return 1; 1222 + if ((rmem == skb->truesize || rmem < READ_ONCE(sk->sk_rcvbuf)) && 1223 + !test_bit(NETLINK_S_CONGESTED, &nlk->state)) { 1224 + netlink_skb_set_owner_r(skb, sk); 1225 + return 0; 1246 1226 } 1247 - netlink_skb_set_owner_r(skb, sk); 1248 - return 0; 1227 + 1228 + atomic_sub(skb->truesize, &sk->sk_rmem_alloc); 1229 + 1230 + if (!*timeo) { 1231 + if (!ssk || netlink_is_kernel(ssk)) 1232 + netlink_overrun(sk); 1233 + sock_put(sk); 1234 + kfree_skb(skb); 1235 + return -EAGAIN; 1236 + } 1237 + 1238 + __set_current_state(TASK_INTERRUPTIBLE); 1239 + add_wait_queue(&nlk->wait, &wait); 1240 + rmem = atomic_read(&sk->sk_rmem_alloc); 1241 + 1242 + if (((rmem && rmem + skb->truesize > READ_ONCE(sk->sk_rcvbuf)) || 1243 + test_bit(NETLINK_S_CONGESTED, &nlk->state)) && 1244 + !sock_flag(sk, SOCK_DEAD)) 1245 + *timeo = schedule_timeout(*timeo); 1246 + 1247 + __set_current_state(TASK_RUNNING); 1248 + remove_wait_queue(&nlk->wait, &wait); 1249 + sock_put(sk); 1250 + 1251 + if (signal_pending(current)) { 1252 + kfree_skb(skb); 1253 + return sock_intr_errno(*timeo); 1254 + } 1255 + 1256 + return 1; 1249 1257 } 1250 1258 1251 1259 static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb) ··· 1313 1307 ret = -ECONNREFUSED; 1314 1308 if (nlk->netlink_rcv != NULL) { 1315 1309 ret = skb->len; 1310 + atomic_add(skb->truesize, &sk->sk_rmem_alloc); 1316 1311 netlink_skb_set_owner_r(skb, sk); 1317 1312 NETLINK_CB(skb).sk = ssk; 1318 1313 netlink_deliver_tap_kernel(sk, ssk, skb); ··· 1390 1383 static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb) 1391 1384 { 1392 1385 struct netlink_sock *nlk = nlk_sk(sk); 1386 + unsigned int rmem, rcvbuf; 1393 1387 1394 - if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && 1388 + rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc); 1389 + rcvbuf = READ_ONCE(sk->sk_rcvbuf); 1390 + 1391 + if ((rmem != skb->truesize || rmem <= rcvbuf) && 1395 1392 !test_bit(NETLINK_S_CONGESTED, &nlk->state)) { 1396 1393 netlink_skb_set_owner_r(skb, sk); 1397 1394 __netlink_sendskb(sk, skb); 1398 - return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1); 1395 + return rmem > (rcvbuf >> 1); 1399 1396 } 1397 + 1398 + atomic_sub(skb->truesize, &sk->sk_rmem_alloc); 1400 1399 return -1; 1401 1400 } 1402 1401 ··· 2262 2249 struct module *module; 2263 2250 int err = -ENOBUFS; 2264 2251 int alloc_min_size; 2252 + unsigned int rmem; 2265 2253 int alloc_size; 2266 2254 2267 2255 if (!lock_taken) ··· 2271 2257 err = -EINVAL; 2272 2258 goto errout_skb; 2273 2259 } 2274 - 2275 - if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) 2276 - goto errout_skb; 2277 2260 2278 2261 /* NLMSG_GOODSIZE is small to avoid high order allocations being 2279 2262 * required, but it makes sense to _attempt_ a 32KiB allocation ··· 2293 2282 } 2294 2283 if (!skb) 2295 2284 goto errout_skb; 2285 + 2286 + rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc); 2287 + if (rmem >= READ_ONCE(sk->sk_rcvbuf)) { 2288 + atomic_sub(skb->truesize, &sk->sk_rmem_alloc); 2289 + goto errout_skb; 2290 + } 2296 2291 2297 2292 /* Trim skb to allocated size. User is expected to provide buffer as 2298 2293 * large as max(min_dump_alloc, 32KiB (max_recvmsg_len capped at