Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

net: annotate data-races around sk->sk_{data_ready,write_space}

skmsg (and probably other layers) are changing these pointers
while other cpus might read them concurrently.

Add corresponding READ_ONCE()/WRITE_ONCE() annotations
for UDP, TCP and AF_UNIX.

Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface")
Reported-by: syzbot+87f770387a9e5dc6b79b@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/netdev/699ee9fc.050a0220.1cd54b.0009.GAE@google.com/
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Jakub Sitnicki <jakub@cloudflare.com>
Cc: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20260225131547.1085509-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Eric Dumazet and committed by
Jakub Kicinski
2ef2b20c 754a3d08

+25 -23
+7 -7
net/core/skmsg.c
··· 1205 1205 return; 1206 1206 1207 1207 psock->saved_data_ready = sk->sk_data_ready; 1208 - sk->sk_data_ready = sk_psock_strp_data_ready; 1209 - sk->sk_write_space = sk_psock_write_space; 1208 + WRITE_ONCE(sk->sk_data_ready, sk_psock_strp_data_ready); 1209 + WRITE_ONCE(sk->sk_write_space, sk_psock_write_space); 1210 1210 } 1211 1211 1212 1212 void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock) ··· 1216 1216 if (!psock->saved_data_ready) 1217 1217 return; 1218 1218 1219 - sk->sk_data_ready = psock->saved_data_ready; 1220 - psock->saved_data_ready = NULL; 1219 + WRITE_ONCE(sk->sk_data_ready, psock->saved_data_ready); 1220 + WRITE_ONCE(psock->saved_data_ready, NULL); 1221 1221 strp_stop(&psock->strp); 1222 1222 } 1223 1223 ··· 1296 1296 return; 1297 1297 1298 1298 psock->saved_data_ready = sk->sk_data_ready; 1299 - sk->sk_data_ready = sk_psock_verdict_data_ready; 1300 - sk->sk_write_space = sk_psock_write_space; 1299 + WRITE_ONCE(sk->sk_data_ready, sk_psock_verdict_data_ready); 1300 + WRITE_ONCE(sk->sk_write_space, sk_psock_write_space); 1301 1301 } 1302 1302 1303 1303 void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock) ··· 1308 1308 if (!psock->saved_data_ready) 1309 1309 return; 1310 1310 1311 - sk->sk_data_ready = psock->saved_data_ready; 1311 + WRITE_ONCE(sk->sk_data_ready, psock->saved_data_ready); 1312 1312 psock->saved_data_ready = NULL; 1313 1313 }
+2 -2
net/ipv4/tcp.c
··· 1446 1446 err = sk_stream_error(sk, flags, err); 1447 1447 /* make sure we wake any epoll edge trigger waiter */ 1448 1448 if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) { 1449 - sk->sk_write_space(sk); 1449 + READ_ONCE(sk->sk_write_space)(sk); 1450 1450 tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED); 1451 1451 } 1452 1452 if (binding) ··· 4181 4181 break; 4182 4182 case TCP_NOTSENT_LOWAT: 4183 4183 WRITE_ONCE(tp->notsent_lowat, val); 4184 - sk->sk_write_space(sk); 4184 + READ_ONCE(sk->sk_write_space)(sk); 4185 4185 break; 4186 4186 case TCP_INQ: 4187 4187 if (val > 1 || val < 0)
+1 -1
net/ipv4/tcp_bpf.c
··· 725 725 WRITE_ONCE(sk->sk_prot->unhash, psock->saved_unhash); 726 726 tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); 727 727 } else { 728 - sk->sk_write_space = psock->saved_write_space; 728 + WRITE_ONCE(sk->sk_write_space, psock->saved_write_space); 729 729 /* Pairs with lockless read in sk_clone_lock() */ 730 730 sock_replace_proto(sk, psock->sk_proto); 731 731 }
+8 -6
net/ipv4/tcp_input.c
··· 5425 5425 5426 5426 if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) { 5427 5427 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP); 5428 - sk->sk_data_ready(sk); 5428 + READ_ONCE(sk->sk_data_ready)(sk); 5429 5429 tcp_drop_reason(sk, skb, SKB_DROP_REASON_PROTO_MEM); 5430 5430 return; 5431 5431 } ··· 5635 5635 void tcp_data_ready(struct sock *sk) 5636 5636 { 5637 5637 if (tcp_epollin_ready(sk, sk->sk_rcvlowat) || sock_flag(sk, SOCK_DONE)) 5638 - sk->sk_data_ready(sk); 5638 + READ_ONCE(sk->sk_data_ready)(sk); 5639 5639 } 5640 5640 5641 5641 static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) ··· 5691 5691 inet_csk(sk)->icsk_ack.pending |= 5692 5692 (ICSK_ACK_NOMEM | ICSK_ACK_NOW); 5693 5693 inet_csk_schedule_ack(sk); 5694 - sk->sk_data_ready(sk); 5694 + READ_ONCE(sk->sk_data_ready)(sk); 5695 5695 5696 5696 if (skb_queue_len(&sk->sk_receive_queue) && skb->len) { 5697 5697 reason = SKB_DROP_REASON_PROTO_MEM; ··· 6114 6114 tp->snd_cwnd_stamp = tcp_jiffies32; 6115 6115 } 6116 6116 6117 - INDIRECT_CALL_1(sk->sk_write_space, sk_stream_write_space, sk); 6117 + INDIRECT_CALL_1(READ_ONCE(sk->sk_write_space), 6118 + sk_stream_write_space, 6119 + sk); 6118 6120 } 6119 6121 6120 6122 /* Caller made space either from: ··· 6327 6325 BUG(); 6328 6326 WRITE_ONCE(tp->urg_data, TCP_URG_VALID | tmp); 6329 6327 if (!sock_flag(sk, SOCK_DEAD)) 6330 - sk->sk_data_ready(sk); 6328 + READ_ONCE(sk->sk_data_ready)(sk); 6331 6329 } 6332 6330 } 6333 6331 } ··· 7794 7792 sock_put(fastopen_sk); 7795 7793 goto drop_and_free; 7796 7794 } 7797 - sk->sk_data_ready(sk); 7795 + READ_ONCE(sk->sk_data_ready)(sk); 7798 7796 bh_unlock_sock(fastopen_sk); 7799 7797 sock_put(fastopen_sk); 7800 7798 } else {
+1 -1
net/ipv4/tcp_minisocks.c
··· 1004 1004 reason = tcp_rcv_state_process(child, skb); 1005 1005 /* Wakeup parent, send SIGIO */ 1006 1006 if (state == TCP_SYN_RECV && child->sk_state != state) 1007 - parent->sk_data_ready(parent); 1007 + READ_ONCE(parent->sk_data_ready)(parent); 1008 1008 } else { 1009 1009 /* Alas, it is possible again, because we do lookup 1010 1010 * in main socket hash table and lock on listening
+1 -1
net/ipv4/udp.c
··· 1787 1787 * using prepare_to_wait_exclusive(). 1788 1788 */ 1789 1789 while (nb) { 1790 - INDIRECT_CALL_1(sk->sk_data_ready, 1790 + INDIRECT_CALL_1(READ_ONCE(sk->sk_data_ready), 1791 1791 sock_def_readable, sk); 1792 1792 nb--; 1793 1793 }
+1 -1
net/ipv4/udp_bpf.c
··· 158 158 int family = sk->sk_family == AF_INET ? UDP_BPF_IPV4 : UDP_BPF_IPV6; 159 159 160 160 if (restore) { 161 - sk->sk_write_space = psock->saved_write_space; 161 + WRITE_ONCE(sk->sk_write_space, psock->saved_write_space); 162 162 sock_replace_proto(sk, psock->sk_proto); 163 163 return 0; 164 164 }
+4 -4
net/unix/af_unix.c
··· 1785 1785 __skb_queue_tail(&other->sk_receive_queue, skb); 1786 1786 spin_unlock(&other->sk_receive_queue.lock); 1787 1787 unix_state_unlock(other); 1788 - other->sk_data_ready(other); 1788 + READ_ONCE(other->sk_data_ready)(other); 1789 1789 sock_put(other); 1790 1790 return 0; 1791 1791 ··· 2278 2278 scm_stat_add(other, skb); 2279 2279 skb_queue_tail(&other->sk_receive_queue, skb); 2280 2280 unix_state_unlock(other); 2281 - other->sk_data_ready(other); 2281 + READ_ONCE(other->sk_data_ready)(other); 2282 2282 sock_put(other); 2283 2283 scm_destroy(&scm); 2284 2284 return len; ··· 2351 2351 2352 2352 sk_send_sigurg(other); 2353 2353 unix_state_unlock(other); 2354 - other->sk_data_ready(other); 2354 + READ_ONCE(other->sk_data_ready)(other); 2355 2355 2356 2356 return 0; 2357 2357 out_unlock: ··· 2477 2477 spin_unlock(&other->sk_receive_queue.lock); 2478 2478 2479 2479 unix_state_unlock(other); 2480 - other->sk_data_ready(other); 2480 + READ_ONCE(other->sk_data_ready)(other); 2481 2481 sent += size; 2482 2482 } 2483 2483