Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

net: unix: fix inflight counting bug in garbage collector

Previously I assumed that the receive queues of candidates don't
change during the GC. This is only half true, nothing can be received
from the queues (see comment in unix_gc()), but buffers could be added
through the other half of the socket pair, which may still have file
descriptors referring to it.

This can result in inc_inflight_move_tail() erronously increasing the
"inflight" counter for a unix socket for which dec_inflight() wasn't
previously called. This in turn can trigger the "BUG_ON(total_refs <
inflight_refs)" in a later garbage collection run.

Fix this by only manipulating the "inflight" counter for sockets which
are candidates themselves. Duplicating the file references in
unix_attach_fds() is also needed to prevent a socket becoming a
candidate for GC while the skb that contains it is not yet queued.

Reported-by: Andrea Bittau <a.bittau@cs.ucl.ac.uk>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
CC: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Miklos Szeredi and committed by
Linus Torvalds
6209344f 058e3739

+62 -19
+1
include/net/af_unix.h
··· 54 54 atomic_long_t inflight; 55 55 spinlock_t lock; 56 56 unsigned int gc_candidate : 1; 57 + unsigned int gc_maybe_cycle : 1; 57 58 wait_queue_head_t peer_wait; 58 59 }; 59 60 #define unix_sk(__sk) ((struct unix_sock *)__sk)
+24 -7
net/unix/af_unix.c
··· 1302 1302 sock_wfree(skb); 1303 1303 } 1304 1304 1305 - static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) 1305 + static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) 1306 1306 { 1307 1307 int i; 1308 + 1309 + /* 1310 + * Need to duplicate file references for the sake of garbage 1311 + * collection. Otherwise a socket in the fps might become a 1312 + * candidate for GC while the skb is not yet queued. 1313 + */ 1314 + UNIXCB(skb).fp = scm_fp_dup(scm->fp); 1315 + if (!UNIXCB(skb).fp) 1316 + return -ENOMEM; 1317 + 1308 1318 for (i=scm->fp->count-1; i>=0; i--) 1309 1319 unix_inflight(scm->fp->fp[i]); 1310 - UNIXCB(skb).fp = scm->fp; 1311 1320 skb->destructor = unix_destruct_fds; 1312 - scm->fp = NULL; 1321 + return 0; 1313 1322 } 1314 1323 1315 1324 /* ··· 1377 1368 goto out; 1378 1369 1379 1370 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); 1380 - if (siocb->scm->fp) 1381 - unix_attach_fds(siocb->scm, skb); 1371 + if (siocb->scm->fp) { 1372 + err = unix_attach_fds(siocb->scm, skb); 1373 + if (err) 1374 + goto out_free; 1375 + } 1382 1376 unix_get_secdata(siocb->scm, skb); 1383 1377 1384 1378 skb_reset_transport_header(skb); ··· 1550 1538 size = min_t(int, size, skb_tailroom(skb)); 1551 1539 1552 1540 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); 1553 - if (siocb->scm->fp) 1554 - unix_attach_fds(siocb->scm, skb); 1541 + if (siocb->scm->fp) { 1542 + err = unix_attach_fds(siocb->scm, skb); 1543 + if (err) { 1544 + kfree_skb(skb); 1545 + goto out_err; 1546 + } 1547 + } 1555 1548 1556 1549 if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) { 1557 1550 kfree_skb(skb);
+37 -12
net/unix/garbage.c
··· 186 186 */ 187 187 struct sock *sk = unix_get_socket(*fp++); 188 188 if (sk) { 189 - hit = true; 190 - func(unix_sk(sk)); 189 + struct unix_sock *u = unix_sk(sk); 190 + 191 + /* 192 + * Ignore non-candidates, they could 193 + * have been added to the queues after 194 + * starting the garbage collection 195 + */ 196 + if (u->gc_candidate) { 197 + hit = true; 198 + func(u); 199 + } 191 200 } 192 201 } 193 202 if (hit && hitlist != NULL) { ··· 258 249 { 259 250 atomic_long_inc(&u->inflight); 260 251 /* 261 - * If this is still a candidate, move it to the end of the 262 - * list, so that it's checked even if it was already passed 263 - * over 252 + * If this still might be part of a cycle, move it to the end 253 + * of the list, so that it's checked even if it was already 254 + * passed over 264 255 */ 265 - if (u->gc_candidate) 256 + if (u->gc_maybe_cycle) 266 257 list_move_tail(&u->link, &gc_candidates); 267 258 } 268 259 ··· 276 267 struct unix_sock *next; 277 268 struct sk_buff_head hitlist; 278 269 struct list_head cursor; 270 + LIST_HEAD(not_cycle_list); 279 271 280 272 spin_lock(&unix_gc_lock); 281 273 ··· 292 282 * 293 283 * Holding unix_gc_lock will protect these candidates from 294 284 * being detached, and hence from gaining an external 295 - * reference. This also means, that since there are no 296 - * possible receivers, the receive queues of these sockets are 297 - * static during the GC, even though the dequeue is done 298 - * before the detach without atomicity guarantees. 285 + * reference. Since there are no possible receivers, all 286 + * buffers currently on the candidates' queues stay there 287 + * during the garbage collection. 288 + * 289 + * We also know that no new candidate can be added onto the 290 + * receive queues. Other, non candidate sockets _can_ be 291 + * added to queue, so we must make sure only to touch 292 + * candidates. 299 293 */ 300 294 list_for_each_entry_safe(u, next, &gc_inflight_list, link) { 301 295 long total_refs; ··· 313 299 if (total_refs == inflight_refs) { 314 300 list_move_tail(&u->link, &gc_candidates); 315 301 u->gc_candidate = 1; 302 + u->gc_maybe_cycle = 1; 316 303 } 317 304 } 318 305 ··· 340 325 list_move(&cursor, &u->link); 341 326 342 327 if (atomic_long_read(&u->inflight) > 0) { 343 - list_move_tail(&u->link, &gc_inflight_list); 344 - u->gc_candidate = 0; 328 + list_move_tail(&u->link, &not_cycle_list); 329 + u->gc_maybe_cycle = 0; 345 330 scan_children(&u->sk, inc_inflight_move_tail, NULL); 346 331 } 347 332 } 348 333 list_del(&cursor); 334 + 335 + /* 336 + * not_cycle_list contains those sockets which do not make up a 337 + * cycle. Restore these to the inflight list. 338 + */ 339 + while (!list_empty(&not_cycle_list)) { 340 + u = list_entry(not_cycle_list.next, struct unix_sock, link); 341 + u->gc_candidate = 0; 342 + list_move_tail(&u->link, &gc_inflight_list); 343 + } 349 344 350 345 /* 351 346 * Now gc_candidates contains only garbage. Restore original