Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'af_unix-gc-cleanup-and-optimisation'

Kuniyuki Iwashima says:

====================
af_unix: GC cleanup and optimisation.

Currently, AF_UNIX GC is triggered from close() and sendmsg()
based on the number of inflight AF_UNIX sockets.

This is because the old GC implementation had no idea of the
shape of the graph formed by SCM_RIGHTS references.

The new GC knows whether cyclic references (could) exist.

This series refines such conditions not to trigger GC unless
really needed.
====================

Link: https://patch.msgid.link/20251115020935.2643121-1-kuniyu@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+52 -53
+1 -6
net/unix/af_unix.c
··· 733 733 734 734 /* ---- Socket is dead now and most probably destroyed ---- */ 735 735 736 - if (READ_ONCE(unix_tot_inflight)) 737 - unix_gc(); /* Garbage collect fds */ 736 + unix_schedule_gc(NULL); 738 737 } 739 738 740 739 struct unix_peercred { ··· 2098 2099 if (err < 0) 2099 2100 return err; 2100 2101 2101 - wait_for_unix_gc(scm.fp); 2102 - 2103 2102 if (msg->msg_flags & MSG_OOB) { 2104 2103 err = -EOPNOTSUPP; 2105 2104 goto out; ··· 2390 2393 err = scm_send(sock, msg, &scm, false); 2391 2394 if (err < 0) 2392 2395 return err; 2393 - 2394 - wait_for_unix_gc(scm.fp); 2395 2396 2396 2397 if (msg->msg_flags & MSG_OOB) { 2397 2398 err = -EOPNOTSUPP;
+1 -3
net/unix/af_unix.h
··· 24 24 #define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb)) 25 25 26 26 /* GC for SCM_RIGHTS */ 27 - extern unsigned int unix_tot_inflight; 28 27 void unix_add_edges(struct scm_fp_list *fpl, struct unix_sock *receiver); 29 28 void unix_del_edges(struct scm_fp_list *fpl); 30 29 void unix_update_edges(struct unix_sock *receiver); 31 30 int unix_prepare_fpl(struct scm_fp_list *fpl); 32 31 void unix_destroy_fpl(struct scm_fp_list *fpl); 33 - void unix_gc(void); 34 - void wait_for_unix_gc(struct scm_fp_list *fpl); 32 + void unix_schedule_gc(struct user_struct *user); 35 33 36 34 /* SOCK_DIAG */ 37 35 long unix_inq_len(struct sock *sk);
+50 -44
net/unix/garbage.c
··· 121 121 return edge->successor->vertex; 122 122 } 123 123 124 - static bool unix_graph_maybe_cyclic; 125 - static bool unix_graph_grouped; 124 + enum { 125 + UNIX_GRAPH_NOT_CYCLIC, 126 + UNIX_GRAPH_MAYBE_CYCLIC, 127 + UNIX_GRAPH_CYCLIC, 128 + }; 129 + 130 + static unsigned char unix_graph_state; 126 131 127 132 static void unix_update_graph(struct unix_vertex *vertex) 128 133 { ··· 137 132 if (!vertex) 138 133 return; 139 134 140 - unix_graph_maybe_cyclic = true; 141 - unix_graph_grouped = false; 135 + WRITE_ONCE(unix_graph_state, UNIX_GRAPH_MAYBE_CYCLIC); 142 136 } 143 137 144 138 static LIST_HEAD(unix_unvisited_vertices); ··· 200 196 } 201 197 202 198 static DEFINE_SPINLOCK(unix_gc_lock); 203 - unsigned int unix_tot_inflight; 204 199 205 200 void unix_add_edges(struct scm_fp_list *fpl, struct unix_sock *receiver) 206 201 { ··· 225 222 } while (i < fpl->count_unix); 226 223 227 224 receiver->scm_stat.nr_unix_fds += fpl->count_unix; 228 - WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + fpl->count_unix); 229 225 out: 230 226 WRITE_ONCE(fpl->user->unix_inflight, fpl->user->unix_inflight + fpl->count); 231 227 ··· 255 253 receiver = fpl->edges[0].successor; 256 254 receiver->scm_stat.nr_unix_fds -= fpl->count_unix; 257 255 } 258 - WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - fpl->count_unix); 259 256 out: 260 257 WRITE_ONCE(fpl->user->unix_inflight, fpl->user->unix_inflight - fpl->count); 261 258 ··· 299 298 GFP_KERNEL_ACCOUNT); 300 299 if (!fpl->edges) 301 300 goto err; 301 + 302 + unix_schedule_gc(fpl->user); 302 303 303 304 return 0; 304 305 ··· 407 404 static LIST_HEAD(unix_visited_vertices); 408 405 static unsigned long unix_vertex_grouped_index = UNIX_VERTEX_INDEX_MARK2; 409 406 410 - static void __unix_walk_scc(struct unix_vertex *vertex, unsigned long *last_index, 411 - struct sk_buff_head *hitlist) 407 + static unsigned long __unix_walk_scc(struct unix_vertex *vertex, 408 + unsigned long *last_index, 409 + struct sk_buff_head *hitlist) 412 410 { 411 + unsigned long cyclic_sccs = 0; 413 412 LIST_HEAD(vertex_stack); 414 413 struct unix_edge *edge; 415 414 LIST_HEAD(edge_stack); ··· 502 497 if (unix_vertex_max_scc_index < vertex->scc_index) 503 498 unix_vertex_max_scc_index = vertex->scc_index; 504 499 505 - if (!unix_graph_maybe_cyclic) 506 - unix_graph_maybe_cyclic = unix_scc_cyclic(&scc); 500 + if (unix_scc_cyclic(&scc)) 501 + cyclic_sccs++; 507 502 } 508 503 509 504 list_del(&scc); ··· 512 507 /* Need backtracking ? */ 513 508 if (!list_empty(&edge_stack)) 514 509 goto prev_vertex; 510 + 511 + return cyclic_sccs; 515 512 } 513 + 514 + static unsigned long unix_graph_cyclic_sccs; 516 515 517 516 static void unix_walk_scc(struct sk_buff_head *hitlist) 518 517 { 519 518 unsigned long last_index = UNIX_VERTEX_INDEX_START; 519 + unsigned long cyclic_sccs = 0; 520 520 521 - unix_graph_maybe_cyclic = false; 522 521 unix_vertex_max_scc_index = UNIX_VERTEX_INDEX_START; 523 522 524 523 /* Visit every vertex exactly once. ··· 532 523 struct unix_vertex *vertex; 533 524 534 525 vertex = list_first_entry(&unix_unvisited_vertices, typeof(*vertex), entry); 535 - __unix_walk_scc(vertex, &last_index, hitlist); 526 + cyclic_sccs += __unix_walk_scc(vertex, &last_index, hitlist); 536 527 } 537 528 538 529 list_replace_init(&unix_visited_vertices, &unix_unvisited_vertices); 539 530 swap(unix_vertex_unvisited_index, unix_vertex_grouped_index); 540 531 541 - unix_graph_grouped = true; 532 + WRITE_ONCE(unix_graph_cyclic_sccs, cyclic_sccs); 533 + WRITE_ONCE(unix_graph_state, 534 + cyclic_sccs ? UNIX_GRAPH_CYCLIC : UNIX_GRAPH_NOT_CYCLIC); 542 535 } 543 536 544 537 static void unix_walk_scc_fast(struct sk_buff_head *hitlist) 545 538 { 546 - unix_graph_maybe_cyclic = false; 539 + unsigned long cyclic_sccs = unix_graph_cyclic_sccs; 547 540 548 541 while (!list_empty(&unix_unvisited_vertices)) { 549 542 struct unix_vertex *vertex; ··· 562 551 scc_dead = unix_vertex_dead(vertex); 563 552 } 564 553 565 - if (scc_dead) 554 + if (scc_dead) { 555 + cyclic_sccs--; 566 556 unix_collect_skb(&scc, hitlist); 567 - else if (!unix_graph_maybe_cyclic) 568 - unix_graph_maybe_cyclic = unix_scc_cyclic(&scc); 557 + } 569 558 570 559 list_del(&scc); 571 560 } 572 561 573 562 list_replace_init(&unix_visited_vertices, &unix_unvisited_vertices); 563 + 564 + WRITE_ONCE(unix_graph_cyclic_sccs, cyclic_sccs); 565 + WRITE_ONCE(unix_graph_state, 566 + cyclic_sccs ? UNIX_GRAPH_CYCLIC : UNIX_GRAPH_NOT_CYCLIC); 574 567 } 575 568 576 569 static bool gc_in_progress; 577 570 578 - static void __unix_gc(struct work_struct *work) 571 + static void unix_gc(struct work_struct *work) 579 572 { 580 573 struct sk_buff_head hitlist; 581 574 struct sk_buff *skb; 582 575 583 576 spin_lock(&unix_gc_lock); 584 577 585 - if (!unix_graph_maybe_cyclic) { 578 + if (unix_graph_state == UNIX_GRAPH_NOT_CYCLIC) { 586 579 spin_unlock(&unix_gc_lock); 587 580 goto skip_gc; 588 581 } 589 582 590 583 __skb_queue_head_init(&hitlist); 591 584 592 - if (unix_graph_grouped) 585 + if (unix_graph_state == UNIX_GRAPH_CYCLIC) 593 586 unix_walk_scc_fast(&hitlist); 594 587 else 595 588 unix_walk_scc(&hitlist); ··· 610 595 WRITE_ONCE(gc_in_progress, false); 611 596 } 612 597 613 - static DECLARE_WORK(unix_gc_work, __unix_gc); 598 + static DECLARE_WORK(unix_gc_work, unix_gc); 614 599 615 - void unix_gc(void) 600 + #define UNIX_INFLIGHT_SANE_USER (SCM_MAX_FD * 8) 601 + 602 + void unix_schedule_gc(struct user_struct *user) 616 603 { 617 - WRITE_ONCE(gc_in_progress, true); 618 - queue_work(system_dfl_wq, &unix_gc_work); 619 - } 620 - 621 - #define UNIX_INFLIGHT_TRIGGER_GC 16000 622 - #define UNIX_INFLIGHT_SANE_USER (SCM_MAX_FD * 8) 623 - 624 - void wait_for_unix_gc(struct scm_fp_list *fpl) 625 - { 626 - /* If number of inflight sockets is insane, 627 - * force a garbage collect right now. 628 - * 629 - * Paired with the WRITE_ONCE() in unix_inflight(), 630 - * unix_notinflight(), and __unix_gc(). 631 - */ 632 - if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC && 633 - !READ_ONCE(gc_in_progress)) 634 - unix_gc(); 604 + if (READ_ONCE(unix_graph_state) == UNIX_GRAPH_NOT_CYCLIC) 605 + return; 635 606 636 607 /* Penalise users who want to send AF_UNIX sockets 637 608 * but whose sockets have not been received yet. 638 609 */ 639 - if (!fpl || !fpl->count_unix || 640 - READ_ONCE(fpl->user->unix_inflight) < UNIX_INFLIGHT_SANE_USER) 610 + if (user && 611 + READ_ONCE(user->unix_inflight) < UNIX_INFLIGHT_SANE_USER) 641 612 return; 642 613 643 - if (READ_ONCE(gc_in_progress)) 614 + if (!READ_ONCE(gc_in_progress)) { 615 + WRITE_ONCE(gc_in_progress, true); 616 + queue_work(system_dfl_wq, &unix_gc_work); 617 + } 618 + 619 + if (user && READ_ONCE(unix_graph_cyclic_sccs)) 644 620 flush_work(&unix_gc_work); 645 621 }