Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'bcachefs-2024-07-10' of https://evilpiepirate.org/git/bcachefs

Pull bcachefs fixes from Kent Overstreet:

- Switch some asserts to WARN()

- Fix a few "transaction not locked" asserts in the data read retry
paths and backpointers gc

- Fix a race that would cause the journal to get stuck on a flush
commit

- Add missing fsck checks for the fragmentation LRU

- The usual assorted ssorted syzbot fixes

* tag 'bcachefs-2024-07-10' of https://evilpiepirate.org/git/bcachefs: (22 commits)
bcachefs: Add missing bch2_trans_begin()
bcachefs: Fix missing error check in journal_entry_btree_keys_validate()
bcachefs: Warn on attempting a move with no replicas
bcachefs: bch2_data_update_to_text()
bcachefs: Log mount failure error code
bcachefs: Fix undefined behaviour in eytzinger1_first()
bcachefs: Mark bch_inode_info as SLAB_ACCOUNT
bcachefs: Fix bch2_inode_insert() race path for tmpfiles
closures: fix closure_sync + closure debugging
bcachefs: Fix journal getting stuck on a flush commit
bcachefs: io clock: run timer fns under clock lock
bcachefs: Repair fragmentation_lru in alloc_write_key()
bcachefs: add check for missing fragmentation in check_alloc_to_lru_ref()
bcachefs: bch2_btree_write_buffer_maybe_flush()
bcachefs: Add missing printbuf_tabstops_reset() calls
bcachefs: Fix loop restart in bch2_btree_transactions_read()
bcachefs: Fix bch2_read_retry_nodecode()
bcachefs: Don't use the new_fs() bucket alloc path on an initialized fs
bcachefs: Fix shift greater than integer size
bcachefs: Change bch2_fs_journal_stop() BUG_ON() to warning
...

+266 -142
+23 -25
fs/bcachefs/alloc_background.c
··· 3 3 #include "alloc_background.h" 4 4 #include "alloc_foreground.h" 5 5 #include "backpointers.h" 6 + #include "bkey_buf.h" 6 7 #include "btree_cache.h" 7 8 #include "btree_io.h" 8 9 #include "btree_key_cache.h" ··· 1554 1553 } 1555 1554 1556 1555 static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, 1557 - struct btree_iter *alloc_iter) 1556 + struct btree_iter *alloc_iter, 1557 + struct bkey_buf *last_flushed) 1558 1558 { 1559 1559 struct bch_fs *c = trans->c; 1560 - struct btree_iter lru_iter; 1561 1560 struct bch_alloc_v4 a_convert; 1562 1561 const struct bch_alloc_v4 *a; 1563 - struct bkey_s_c alloc_k, lru_k; 1562 + struct bkey_s_c alloc_k; 1564 1563 struct printbuf buf = PRINTBUF; 1565 1564 int ret; 1566 1565 ··· 1573 1572 return ret; 1574 1573 1575 1574 a = bch2_alloc_to_v4(alloc_k, &a_convert); 1575 + 1576 + if (a->fragmentation_lru) { 1577 + ret = bch2_lru_check_set(trans, BCH_LRU_FRAGMENTATION_START, 1578 + a->fragmentation_lru, 1579 + alloc_k, last_flushed); 1580 + if (ret) 1581 + return ret; 1582 + } 1576 1583 1577 1584 if (a->data_type != BCH_DATA_cached) 1578 1585 return 0; ··· 1606 1597 a = &a_mut->v; 1607 1598 } 1608 1599 1609 - lru_k = bch2_bkey_get_iter(trans, &lru_iter, BTREE_ID_lru, 1610 - lru_pos(alloc_k.k->p.inode, 1611 - bucket_to_u64(alloc_k.k->p), 1612 - a->io_time[READ]), 0); 1613 - ret = bkey_err(lru_k); 1600 + ret = bch2_lru_check_set(trans, alloc_k.k->p.inode, a->io_time[READ], 1601 + alloc_k, last_flushed); 1614 1602 if (ret) 1615 - return ret; 1616 - 1617 - if (fsck_err_on(lru_k.k->type != KEY_TYPE_set, c, 1618 - alloc_key_to_missing_lru_entry, 1619 - "missing lru entry\n" 1620 - " %s", 1621 - (printbuf_reset(&buf), 1622 - bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { 1623 - ret = bch2_lru_set(trans, 1624 - alloc_k.k->p.inode, 1625 - bucket_to_u64(alloc_k.k->p), 1626 - a->io_time[READ]); 1627 - if (ret) 1628 - goto err; 1629 - } 1603 + goto err; 1630 1604 err: 1631 1605 fsck_err: 1632 - bch2_trans_iter_exit(trans, &lru_iter); 1633 1606 printbuf_exit(&buf); 1634 1607 return ret; 1635 1608 } 1636 1609 1637 1610 int bch2_check_alloc_to_lru_refs(struct bch_fs *c) 1638 1611 { 1612 + struct bkey_buf last_flushed; 1613 + 1614 + bch2_bkey_buf_init(&last_flushed); 1615 + bkey_init(&last_flushed.k->k); 1616 + 1639 1617 int ret = bch2_trans_run(c, 1640 1618 for_each_btree_key_commit(trans, iter, BTREE_ID_alloc, 1641 1619 POS_MIN, BTREE_ITER_prefetch, k, 1642 1620 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 1643 - bch2_check_alloc_to_lru_ref(trans, &iter))); 1621 + bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed))); 1622 + 1623 + bch2_bkey_buf_exit(&last_flushed, c); 1644 1624 bch_err_fn(c, ret); 1645 1625 return ret; 1646 1626 }
+2
fs/bcachefs/alloc_foreground.c
··· 1703 1703 for (unsigned i = 0; i < ARRAY_SIZE(c->open_buckets); i++) 1704 1704 nr[c->open_buckets[i].data_type]++; 1705 1705 1706 + printbuf_tabstops_reset(out); 1706 1707 printbuf_tabstop_push(out, 24); 1707 1708 1708 1709 percpu_down_read(&c->mark_lock); ··· 1737 1736 for (unsigned i = 0; i < ARRAY_SIZE(c->open_buckets); i++) 1738 1737 nr[c->open_buckets[i].data_type]++; 1739 1738 1739 + printbuf_tabstops_reset(out); 1740 1740 printbuf_tabstop_push(out, 12); 1741 1741 printbuf_tabstop_push(out, 16); 1742 1742 printbuf_tabstop_push(out, 16);
+26 -44
fs/bcachefs/backpointers.c
··· 434 434 return ret; 435 435 } 436 436 437 - static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r) 438 - { 439 - return bpos_eq(l.k->p, r.k->p) && 440 - bkey_bytes(l.k) == bkey_bytes(r.k) && 441 - !memcmp(l.v, r.v, bkey_val_bytes(l.k)); 442 - } 443 - 444 437 struct extents_to_bp_state { 445 438 struct bpos bucket_start; 446 439 struct bpos bucket_end; ··· 529 536 struct btree_iter other_extent_iter = {}; 530 537 struct printbuf buf = PRINTBUF; 531 538 struct bkey_s_c bp_k; 532 - struct bkey_buf tmp; 533 539 int ret = 0; 534 - 535 - bch2_bkey_buf_init(&tmp); 536 540 537 541 struct bch_dev *ca = bch2_dev_bucket_tryget(c, bucket); 538 542 if (!ca) { ··· 555 565 556 566 if (bp_k.k->type != KEY_TYPE_backpointer || 557 567 memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) { 558 - bch2_bkey_buf_reassemble(&tmp, c, orig_k); 559 - 560 - if (!bkey_and_val_eq(orig_k, bkey_i_to_s_c(s->last_flushed.k))) { 561 - if (bp.level) { 562 - bch2_trans_unlock(trans); 563 - bch2_btree_interior_updates_flush(c); 564 - } 565 - 566 - ret = bch2_btree_write_buffer_flush_sync(trans); 567 - if (ret) 568 - goto err; 569 - 570 - bch2_bkey_buf_copy(&s->last_flushed, c, tmp.k); 571 - ret = -BCH_ERR_transaction_restart_write_buffer_flush; 572 - goto out; 573 - } 568 + ret = bch2_btree_write_buffer_maybe_flush(trans, orig_k, &s->last_flushed); 569 + if (ret) 570 + goto err; 574 571 575 572 goto check_existing_bp; 576 573 } ··· 566 589 fsck_err: 567 590 bch2_trans_iter_exit(trans, &other_extent_iter); 568 591 bch2_trans_iter_exit(trans, &bp_iter); 569 - bch2_bkey_buf_exit(&tmp, c); 570 592 bch2_dev_put(ca); 571 593 printbuf_exit(&buf); 572 594 return ret; ··· 770 794 !((1U << btree) & btree_interior_mask)) 771 795 continue; 772 796 797 + bch2_trans_begin(trans); 798 + 773 799 __for_each_btree_node(trans, iter, btree, 774 800 btree == start.btree ? start.pos : POS_MIN, 775 801 0, depth, BTREE_ITER_prefetch, b, ret) { ··· 883 905 struct bbpos start, 884 906 struct bbpos end, 885 907 struct bkey_s_c_backpointer bp, 886 - struct bpos *last_flushed_pos) 908 + struct bkey_buf *last_flushed) 887 909 { 888 910 struct bch_fs *c = trans->c; 889 911 struct btree_iter iter; ··· 903 925 if (ret) 904 926 return ret; 905 927 906 - if (!k.k && !bpos_eq(*last_flushed_pos, bp.k->p)) { 907 - *last_flushed_pos = bp.k->p; 908 - ret = bch2_btree_write_buffer_flush_sync(trans) ?: 909 - -BCH_ERR_transaction_restart_write_buffer_flush; 910 - goto out; 911 - } 928 + if (!k.k) { 929 + ret = bch2_btree_write_buffer_maybe_flush(trans, bp.s_c, last_flushed); 930 + if (ret) 931 + goto out; 912 932 913 - if (fsck_err_on(!k.k, c, 914 - backpointer_to_missing_ptr, 915 - "backpointer for missing %s\n %s", 916 - bp.v->level ? "btree node" : "extent", 917 - (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) { 918 - ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p); 919 - goto out; 933 + if (fsck_err(c, backpointer_to_missing_ptr, 934 + "backpointer for missing %s\n %s", 935 + bp.v->level ? "btree node" : "extent", 936 + (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) { 937 + ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p); 938 + goto out; 939 + } 920 940 } 921 941 out: 922 942 fsck_err: ··· 927 951 struct bbpos start, 928 952 struct bbpos end) 929 953 { 930 - struct bpos last_flushed_pos = SPOS_MAX; 954 + struct bkey_buf last_flushed; 931 955 932 - return for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers, 956 + bch2_bkey_buf_init(&last_flushed); 957 + bkey_init(&last_flushed.k->k); 958 + 959 + int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers, 933 960 POS_MIN, BTREE_ITER_prefetch, k, 934 961 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 935 962 check_one_backpointer(trans, start, end, 936 963 bkey_s_c_to_backpointer(k), 937 - &last_flushed_pos)); 964 + &last_flushed)); 965 + 966 + bch2_bkey_buf_exit(&last_flushed, trans->c); 967 + return ret; 938 968 } 939 969 940 970 int bch2_check_backpointers_to_extents(struct bch_fs *c)
+3 -2
fs/bcachefs/bkey.c
··· 660 660 bch2_bkey_format_field_overflows(f, i)) { 661 661 unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i]; 662 662 u64 unpacked_max = ~((~0ULL << 1) << (unpacked_bits - 1)); 663 - u64 packed_max = f->bits_per_field[i] 664 - ? ~((~0ULL << 1) << (f->bits_per_field[i] - 1)) 663 + unsigned packed_bits = min(64, f->bits_per_field[i]); 664 + u64 packed_max = packed_bits 665 + ? ~((~0ULL << 1) << (packed_bits - 1)) 665 666 : 0; 666 667 667 668 prt_printf(err, "field %u too large: %llu + %llu > %llu",
+7
fs/bcachefs/bkey.h
··· 194 194 return bkey_gt(l, r) ? l : r; 195 195 } 196 196 197 + static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r) 198 + { 199 + return bpos_eq(l.k->p, r.k->p) && 200 + bkey_bytes(l.k) == bkey_bytes(r.k) && 201 + !memcmp(l.v, r.v, bkey_val_bytes(l.k)); 202 + } 203 + 197 204 void bch2_bpos_swab(struct bpos *); 198 205 void bch2_bkey_swab_key(const struct bkey_format *, struct bkey_packed *); 199 206
+11 -13
fs/bcachefs/btree_gc.c
··· 903 903 bch2_dev_usage_update(c, ca, &old_gc, &gc, 0, true); 904 904 percpu_up_read(&c->mark_lock); 905 905 906 + gc.fragmentation_lru = alloc_lru_idx_fragmentation(gc, ca); 907 + 906 908 if (fsck_err_on(new.data_type != gc.data_type, c, 907 909 alloc_key_data_type_wrong, 908 910 "bucket %llu:%llu gen %u has wrong data_type" ··· 918 916 #define copy_bucket_field(_errtype, _f) \ 919 917 if (fsck_err_on(new._f != gc._f, c, _errtype, \ 920 918 "bucket %llu:%llu gen %u data type %s has wrong " #_f \ 921 - ": got %u, should be %u", \ 919 + ": got %llu, should be %llu", \ 922 920 iter->pos.inode, iter->pos.offset, \ 923 921 gc.gen, \ 924 922 bch2_data_type_str(gc.data_type), \ 925 - new._f, gc._f)) \ 923 + (u64) new._f, (u64) gc._f)) \ 926 924 new._f = gc._f; \ 927 925 928 - copy_bucket_field(alloc_key_gen_wrong, 929 - gen); 930 - copy_bucket_field(alloc_key_dirty_sectors_wrong, 931 - dirty_sectors); 932 - copy_bucket_field(alloc_key_cached_sectors_wrong, 933 - cached_sectors); 934 - copy_bucket_field(alloc_key_stripe_wrong, 935 - stripe); 936 - copy_bucket_field(alloc_key_stripe_redundancy_wrong, 937 - stripe_redundancy); 926 + copy_bucket_field(alloc_key_gen_wrong, gen); 927 + copy_bucket_field(alloc_key_dirty_sectors_wrong, dirty_sectors); 928 + copy_bucket_field(alloc_key_cached_sectors_wrong, cached_sectors); 929 + copy_bucket_field(alloc_key_stripe_wrong, stripe); 930 + copy_bucket_field(alloc_key_stripe_redundancy_wrong, stripe_redundancy); 931 + copy_bucket_field(alloc_key_fragmentation_lru_wrong, fragmentation_lru); 938 932 #undef copy_bucket_field 939 933 940 934 if (!bch2_alloc_v4_cmp(*old, new)) ··· 944 946 a->v = new; 945 947 946 948 /* 947 - * The trigger normally makes sure this is set, but we're not running 949 + * The trigger normally makes sure these are set, but we're not running 948 950 * triggers: 949 951 */ 950 952 if (a->v.data_type == BCH_DATA_cached && !a->v.io_time[READ])
+37
fs/bcachefs/btree_write_buffer.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 3 3 #include "bcachefs.h" 4 + #include "bkey_buf.h" 4 5 #include "btree_locking.h" 5 6 #include "btree_update.h" 6 7 #include "btree_update_interior.h" 7 8 #include "btree_write_buffer.h" 8 9 #include "error.h" 10 + #include "extents.h" 9 11 #include "journal.h" 10 12 #include "journal_io.h" 11 13 #include "journal_reclaim.h" ··· 491 489 492 490 int ret = bch2_btree_write_buffer_flush_nocheck_rw(trans); 493 491 bch2_write_ref_put(c, BCH_WRITE_REF_btree_write_buffer); 492 + return ret; 493 + } 494 + 495 + /** 496 + * In check and repair code, when checking references to write buffer btrees we 497 + * need to issue a flush before we have a definitive error: this issues a flush 498 + * if this is a key we haven't yet checked. 499 + */ 500 + int bch2_btree_write_buffer_maybe_flush(struct btree_trans *trans, 501 + struct bkey_s_c referring_k, 502 + struct bkey_buf *last_flushed) 503 + { 504 + struct bch_fs *c = trans->c; 505 + struct bkey_buf tmp; 506 + int ret = 0; 507 + 508 + bch2_bkey_buf_init(&tmp); 509 + 510 + if (!bkey_and_val_eq(referring_k, bkey_i_to_s_c(last_flushed->k))) { 511 + bch2_bkey_buf_reassemble(&tmp, c, referring_k); 512 + 513 + if (bkey_is_btree_ptr(referring_k.k)) { 514 + bch2_trans_unlock(trans); 515 + bch2_btree_interior_updates_flush(c); 516 + } 517 + 518 + ret = bch2_btree_write_buffer_flush_sync(trans); 519 + if (ret) 520 + goto err; 521 + 522 + bch2_bkey_buf_copy(last_flushed, c, tmp.k); 523 + ret = -BCH_ERR_transaction_restart_write_buffer_flush; 524 + } 525 + err: 526 + bch2_bkey_buf_exit(&tmp, c); 494 527 return ret; 495 528 } 496 529
+3
fs/bcachefs/btree_write_buffer.h
··· 23 23 int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *); 24 24 int bch2_btree_write_buffer_tryflush(struct btree_trans *); 25 25 26 + struct bkey_buf; 27 + int bch2_btree_write_buffer_maybe_flush(struct btree_trans *, struct bkey_s_c, struct bkey_buf *); 28 + 26 29 struct journal_keys_to_wb { 27 30 struct btree_write_buffer_keys *wb; 28 31 size_t room;
+2 -5
fs/bcachefs/clock.c
··· 132 132 { 133 133 struct io_timer *ret = NULL; 134 134 135 - spin_lock(&clock->timer_lock); 136 - 137 135 if (clock->timers.used && 138 136 time_after_eq(now, clock->timers.data[0]->expire)) 139 137 heap_pop(&clock->timers, ret, io_timer_cmp, NULL); 140 - 141 - spin_unlock(&clock->timer_lock); 142 - 143 138 return ret; 144 139 } 145 140 ··· 143 148 struct io_timer *timer; 144 149 unsigned long now = atomic64_add_return(sectors, &clock->now); 145 150 151 + spin_lock(&clock->timer_lock); 146 152 while ((timer = get_expired_timer(clock, now))) 147 153 timer->fn(timer); 154 + spin_unlock(&clock->timer_lock); 148 155 } 149 156 150 157 void bch2_io_timers_to_text(struct printbuf *out, struct io_clock *clock)
+44
fs/bcachefs/data_update.c
··· 5 5 #include "bkey_buf.h" 6 6 #include "btree_update.h" 7 7 #include "buckets.h" 8 + #include "compress.h" 8 9 #include "data_update.h" 10 + #include "disk_groups.h" 9 11 #include "ec.h" 10 12 #include "error.h" 11 13 #include "extents.h" ··· 456 454 } 457 455 } 458 456 457 + void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c, 458 + struct bch_io_opts *io_opts, 459 + struct data_update_opts *data_opts) 460 + { 461 + printbuf_tabstop_push(out, 20); 462 + prt_str(out, "rewrite ptrs:\t"); 463 + bch2_prt_u64_base2(out, data_opts->rewrite_ptrs); 464 + prt_newline(out); 465 + 466 + prt_str(out, "kill ptrs:\t"); 467 + bch2_prt_u64_base2(out, data_opts->kill_ptrs); 468 + prt_newline(out); 469 + 470 + prt_str(out, "target:\t"); 471 + bch2_target_to_text(out, c, data_opts->target); 472 + prt_newline(out); 473 + 474 + prt_str(out, "compression:\t"); 475 + bch2_compression_opt_to_text(out, background_compression(*io_opts)); 476 + prt_newline(out); 477 + 478 + prt_str(out, "extra replicas:\t"); 479 + prt_u64(out, data_opts->extra_replicas); 480 + } 481 + 482 + void bch2_data_update_to_text(struct printbuf *out, struct data_update *m) 483 + { 484 + bch2_bkey_val_to_text(out, m->op.c, bkey_i_to_s_c(m->k.k)); 485 + prt_newline(out); 486 + bch2_data_update_opts_to_text(out, m->op.c, &m->op.opts, &m->data_opts); 487 + } 488 + 459 489 int bch2_extent_drop_ptrs(struct btree_trans *trans, 460 490 struct btree_iter *iter, 461 491 struct bkey_s_c k, ··· 676 642 */ 677 643 if (!(durability_have + durability_removing)) 678 644 m->op.nr_replicas = max((unsigned) m->op.nr_replicas, 1); 645 + 646 + if (!m->op.nr_replicas) { 647 + struct printbuf buf = PRINTBUF; 648 + 649 + bch2_data_update_to_text(&buf, m); 650 + WARN(1, "trying to move an extent, but nr_replicas=0\n%s", buf.buf); 651 + printbuf_exit(&buf); 652 + ret = -BCH_ERR_data_update_done; 653 + goto done; 654 + } 679 655 680 656 m->op.nr_replicas_required = m->op.nr_replicas; 681 657
+5
fs/bcachefs/data_update.h
··· 17 17 unsigned write_flags; 18 18 }; 19 19 20 + void bch2_data_update_opts_to_text(struct printbuf *, struct bch_fs *, 21 + struct bch_io_opts *, struct data_update_opts *); 22 + 20 23 struct data_update { 21 24 /* extent being updated: */ 22 25 enum btree_id btree_id; ··· 29 26 struct bch_move_stats *stats; 30 27 struct bch_write_op op; 31 28 }; 29 + 30 + void bch2_data_update_to_text(struct printbuf *, struct data_update *); 32 31 33 32 int bch2_data_update_index_update(struct bch_write_op *); 34 33
+6 -6
fs/bcachefs/debug.c
··· 610 610 list_sort(&c->btree_trans_list, list_ptr_order_cmp); 611 611 612 612 list_for_each_entry(trans, &c->btree_trans_list, list) { 613 - if ((ulong) trans < i->iter) 613 + if ((ulong) trans <= i->iter) 614 614 continue; 615 615 616 616 i->iter = (ulong) trans; ··· 832 832 static void btree_deadlock_to_text(struct printbuf *out, struct bch_fs *c) 833 833 { 834 834 struct btree_trans *trans; 835 - pid_t iter = 0; 835 + ulong iter = 0; 836 836 restart: 837 837 seqmutex_lock(&c->btree_trans_lock); 838 - list_for_each_entry(trans, &c->btree_trans_list, list) { 839 - struct task_struct *task = READ_ONCE(trans->locking_wait.task); 838 + list_sort(&c->btree_trans_list, list_ptr_order_cmp); 840 839 841 - if (!task || task->pid <= iter) 840 + list_for_each_entry(trans, &c->btree_trans_list, list) { 841 + if ((ulong) trans <= iter) 842 842 continue; 843 843 844 - iter = task->pid; 844 + iter = (ulong) trans; 845 845 846 846 if (!closure_get_not_zero(&trans->ref)) 847 847 continue;
+4 -2
fs/bcachefs/eytzinger.h
··· 48 48 49 49 static inline unsigned eytzinger1_first(unsigned size) 50 50 { 51 - return rounddown_pow_of_two(size); 51 + return size ? rounddown_pow_of_two(size) : 0; 52 52 } 53 53 54 54 static inline unsigned eytzinger1_last(unsigned size) ··· 101 101 102 102 static inline unsigned eytzinger1_extra(unsigned size) 103 103 { 104 - return (size + 1 - rounddown_pow_of_two(size)) << 1; 104 + return size 105 + ? (size + 1 - rounddown_pow_of_two(size)) << 1 106 + : 0; 105 107 } 106 108 107 109 static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size,
+10 -1
fs/bcachefs/fs.c
··· 194 194 * discard_new_inode() expects it to be set... 195 195 */ 196 196 inode->v.i_flags |= I_NEW; 197 + /* 198 + * We don't want bch2_evict_inode() to delete the inode on disk, 199 + * we just raced and had another inode in cache. Normally new 200 + * inodes don't have nlink == 0 - except tmpfiles do... 201 + */ 202 + set_nlink(&inode->v, 1); 197 203 discard_new_inode(&inode->v); 198 204 inode = old; 199 205 } else { ··· 2032 2026 __bch2_fs_stop(c); 2033 2027 deactivate_locked_super(sb); 2034 2028 err: 2029 + if (ret) 2030 + pr_err("error: %s", bch2_err_str(ret)); 2035 2031 /* 2036 2032 * On an inconsistency error in recovery we might see an -EROFS derived 2037 2033 * errorcode (from the journal), but we don't want to return that to ··· 2073 2065 { 2074 2066 int ret = -ENOMEM; 2075 2067 2076 - bch2_inode_cache = KMEM_CACHE(bch_inode_info, SLAB_RECLAIM_ACCOUNT); 2068 + bch2_inode_cache = KMEM_CACHE(bch_inode_info, SLAB_RECLAIM_ACCOUNT | 2069 + SLAB_ACCOUNT); 2077 2070 if (!bch2_inode_cache) 2078 2071 goto err; 2079 2072
+3 -1
fs/bcachefs/io_read.c
··· 389 389 390 390 bch2_bkey_buf_reassemble(&sk, c, k); 391 391 k = bkey_i_to_s_c(sk.k); 392 - bch2_trans_unlock(trans); 393 392 394 393 if (!bch2_bkey_matches_ptr(c, k, 395 394 rbio->pick.ptr, ··· 1002 1003 rbio->version = k.k->version; 1003 1004 rbio->promote = promote; 1004 1005 INIT_WORK(&rbio->work, NULL); 1006 + 1007 + if (flags & BCH_READ_NODECODE) 1008 + orig->pick = pick; 1005 1009 1006 1010 rbio->bio.bi_opf = orig->bio.bi_opf; 1007 1011 rbio->bio.bi_iter.bi_sector = pick.ptr.offset;
+10 -8
fs/bcachefs/journal.c
··· 1095 1095 return ret; 1096 1096 } 1097 1097 1098 - int bch2_dev_journal_alloc(struct bch_dev *ca) 1098 + int bch2_dev_journal_alloc(struct bch_dev *ca, bool new_fs) 1099 1099 { 1100 1100 unsigned nr; 1101 1101 int ret; ··· 1117 1117 min(1 << 13, 1118 1118 (1 << 24) / ca->mi.bucket_size)); 1119 1119 1120 - ret = __bch2_set_nr_journal_buckets(ca, nr, true, NULL); 1120 + ret = __bch2_set_nr_journal_buckets(ca, nr, new_fs, NULL); 1121 1121 err: 1122 1122 bch_err_fn(ca, ret); 1123 1123 return ret; ··· 1129 1129 if (ca->journal.nr) 1130 1130 continue; 1131 1131 1132 - int ret = bch2_dev_journal_alloc(ca); 1132 + int ret = bch2_dev_journal_alloc(ca, true); 1133 1133 if (ret) { 1134 1134 percpu_ref_put(&ca->io_ref); 1135 1135 return ret; ··· 1184 1184 journal_quiesce(j); 1185 1185 cancel_delayed_work_sync(&j->write_work); 1186 1186 1187 - BUG_ON(!bch2_journal_error(j) && 1188 - test_bit(JOURNAL_replay_done, &j->flags) && 1189 - j->last_empty_seq != journal_cur_seq(j)); 1187 + WARN(!bch2_journal_error(j) && 1188 + test_bit(JOURNAL_replay_done, &j->flags) && 1189 + j->last_empty_seq != journal_cur_seq(j), 1190 + "journal shutdown error: cur seq %llu but last empty seq %llu", 1191 + journal_cur_seq(j), j->last_empty_seq); 1190 1192 1191 1193 if (!bch2_journal_error(j)) 1192 1194 clear_bit(JOURNAL_running, &j->flags); ··· 1420 1418 unsigned long now = jiffies; 1421 1419 u64 nr_writes = j->nr_flush_writes + j->nr_noflush_writes; 1422 1420 1423 - if (!out->nr_tabstops) 1424 - printbuf_tabstop_push(out, 28); 1421 + printbuf_tabstops_reset(out); 1422 + printbuf_tabstop_push(out, 28); 1425 1423 out->atomic++; 1426 1424 1427 1425 rcu_read_lock();
+1 -1
fs/bcachefs/journal.h
··· 433 433 434 434 int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *, 435 435 unsigned nr); 436 - int bch2_dev_journal_alloc(struct bch_dev *); 436 + int bch2_dev_journal_alloc(struct bch_dev *, bool); 437 437 int bch2_fs_journal_alloc(struct bch_fs *); 438 438 439 439 void bch2_dev_journal_stop(struct journal *, struct bch_dev *);
+8 -4
fs/bcachefs/journal_io.c
··· 415 415 flags|BCH_VALIDATE_journal); 416 416 if (ret == FSCK_DELETED_KEY) 417 417 continue; 418 + else if (ret) 419 + return ret; 418 420 419 421 k = bkey_next(k); 420 422 } ··· 1764 1762 1765 1763 if (j->seq_ondisk + 1 != le64_to_cpu(w->data->seq)) { 1766 1764 spin_lock(&j->lock); 1767 - closure_wait(&j->async_wait, cl); 1765 + if (j->seq_ondisk + 1 != le64_to_cpu(w->data->seq)) { 1766 + closure_wait(&j->async_wait, cl); 1767 + spin_unlock(&j->lock); 1768 + continue_at(cl, journal_write_preflush, j->wq); 1769 + return; 1770 + } 1768 1771 spin_unlock(&j->lock); 1769 - 1770 - continue_at(cl, journal_write_preflush, j->wq); 1771 - return; 1772 1772 } 1773 1773 1774 1774 if (w->separate_flush) {
+39
fs/bcachefs/lru.c
··· 77 77 NULL 78 78 }; 79 79 80 + int bch2_lru_check_set(struct btree_trans *trans, 81 + u16 lru_id, u64 time, 82 + struct bkey_s_c referring_k, 83 + struct bkey_buf *last_flushed) 84 + { 85 + struct bch_fs *c = trans->c; 86 + struct printbuf buf = PRINTBUF; 87 + struct btree_iter lru_iter; 88 + struct bkey_s_c lru_k = 89 + bch2_bkey_get_iter(trans, &lru_iter, BTREE_ID_lru, 90 + lru_pos(lru_id, 91 + bucket_to_u64(referring_k.k->p), 92 + time), 0); 93 + int ret = bkey_err(lru_k); 94 + if (ret) 95 + return ret; 96 + 97 + if (lru_k.k->type != KEY_TYPE_set) { 98 + ret = bch2_btree_write_buffer_maybe_flush(trans, referring_k, last_flushed); 99 + if (ret) 100 + goto err; 101 + 102 + if (fsck_err(c, alloc_key_to_missing_lru_entry, 103 + "missing %s lru entry\n" 104 + " %s", 105 + bch2_lru_types[lru_type(lru_k)], 106 + (bch2_bkey_val_to_text(&buf, c, referring_k), buf.buf))) { 107 + ret = bch2_lru_set(trans, lru_id, bucket_to_u64(referring_k.k->p), time); 108 + if (ret) 109 + goto err; 110 + } 111 + } 112 + err: 113 + fsck_err: 114 + bch2_trans_iter_exit(trans, &lru_iter); 115 + printbuf_exit(&buf); 116 + return ret; 117 + } 118 + 80 119 static int bch2_check_lru_key(struct btree_trans *trans, 81 120 struct btree_iter *lru_iter, 82 121 struct bkey_s_c lru_k,
+3
fs/bcachefs/lru.h
··· 61 61 int bch2_lru_set(struct btree_trans *, u16, u64, u64); 62 62 int bch2_lru_change(struct btree_trans *, u16, u64, u64, u64); 63 63 64 + struct bkey_buf; 65 + int bch2_lru_check_set(struct btree_trans *, u16, u64, struct bkey_s_c, struct bkey_buf *); 66 + 64 67 int bch2_check_lrus(struct bch_fs *); 65 68 66 69 #endif /* _BCACHEFS_LRU_H */
-25
fs/bcachefs/move.c
··· 36 36 NULL 37 37 }; 38 38 39 - static void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c, 40 - struct bch_io_opts *io_opts, 41 - struct data_update_opts *data_opts) 42 - { 43 - printbuf_tabstop_push(out, 20); 44 - prt_str(out, "rewrite ptrs:\t"); 45 - bch2_prt_u64_base2(out, data_opts->rewrite_ptrs); 46 - prt_newline(out); 47 - 48 - prt_str(out, "kill ptrs:\t"); 49 - bch2_prt_u64_base2(out, data_opts->kill_ptrs); 50 - prt_newline(out); 51 - 52 - prt_str(out, "target:\t"); 53 - bch2_target_to_text(out, c, data_opts->target); 54 - prt_newline(out); 55 - 56 - prt_str(out, "compression:\t"); 57 - bch2_compression_opt_to_text(out, background_compression(*io_opts)); 58 - prt_newline(out); 59 - 60 - prt_str(out, "extra replicas:\t"); 61 - prt_u64(out, data_opts->extra_replicas); 62 - } 63 - 64 39 static void trace_move_extent2(struct bch_fs *c, struct bkey_s_c k, 65 40 struct bch_io_opts *io_opts, 66 41 struct data_update_opts *data_opts)
+2 -1
fs/bcachefs/sb-errors_format.h
··· 286 286 x(accounting_mismatch, 272, 0) \ 287 287 x(accounting_replicas_not_marked, 273, 0) \ 288 288 x(invalid_btree_id, 274, 0) \ 289 - x(alloc_key_io_time_bad, 275, 0) 289 + x(alloc_key_io_time_bad, 275, 0) \ 290 + x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX) 290 291 291 292 enum bch_sb_error_id { 292 293 #define x(t, n, ...) BCH_FSCK_ERR_##t = n,
+7 -4
fs/bcachefs/super.c
··· 563 563 BUG_ON(atomic_read(&c->journal_keys.ref)); 564 564 bch2_fs_btree_write_buffer_exit(c); 565 565 percpu_free_rwsem(&c->mark_lock); 566 - EBUG_ON(c->online_reserved && percpu_u64_get(c->online_reserved)); 567 - free_percpu(c->online_reserved); 566 + if (c->online_reserved) { 567 + u64 v = percpu_u64_get(c->online_reserved); 568 + WARN(v, "online_reserved not 0 at shutdown: %lli", v); 569 + free_percpu(c->online_reserved); 570 + } 568 571 569 572 darray_exit(&c->btree_roots_extra); 570 573 free_percpu(c->pcpu); ··· 1772 1769 if (ret) 1773 1770 goto err; 1774 1771 1775 - ret = bch2_dev_journal_alloc(ca); 1772 + ret = bch2_dev_journal_alloc(ca, true); 1776 1773 bch_err_msg(c, ret, "allocating journal"); 1777 1774 if (ret) 1778 1775 goto err; ··· 1932 1929 } 1933 1930 1934 1931 if (!ca->journal.nr) { 1935 - ret = bch2_dev_journal_alloc(ca); 1932 + ret = bch2_dev_journal_alloc(ca, false); 1936 1933 bch_err_msg(ca, ret, "allocating journal"); 1937 1934 if (ret) 1938 1935 goto err;
+7
include/linux/closure.h
··· 159 159 #ifdef CONFIG_DEBUG_CLOSURES 160 160 #define CLOSURE_MAGIC_DEAD 0xc054dead 161 161 #define CLOSURE_MAGIC_ALIVE 0xc054a11e 162 + #define CLOSURE_MAGIC_STACK 0xc05451cc 162 163 163 164 unsigned int magic; 164 165 struct list_head all; ··· 324 323 { 325 324 memset(cl, 0, sizeof(struct closure)); 326 325 atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER); 326 + #ifdef CONFIG_DEBUG_CLOSURES 327 + cl->magic = CLOSURE_MAGIC_STACK; 328 + #endif 327 329 } 328 330 329 331 static inline void closure_init_stack_release(struct closure *cl) 330 332 { 331 333 memset(cl, 0, sizeof(struct closure)); 332 334 atomic_set_release(&cl->remaining, CLOSURE_REMAINING_INITIALIZER); 335 + #ifdef CONFIG_DEBUG_CLOSURES 336 + cl->magic = CLOSURE_MAGIC_STACK; 337 + #endif 333 338 } 334 339 335 340 /**
+3
lib/closure.c
··· 244 244 { 245 245 unsigned long flags; 246 246 247 + if (cl->magic == CLOSURE_MAGIC_STACK) 248 + return; 249 + 247 250 BUG_ON(cl->magic != CLOSURE_MAGIC_ALIVE); 248 251 cl->magic = CLOSURE_MAGIC_DEAD; 249 252