Merge tag 'bcachefs-2024-09-28' of git://evilpiepirate.org/bcachefs

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Pull more bcachefs updates from Kent Overstreet:
"Assorted minor syzbot fixes, and for bigger stuff:

Fix two disk accounting rewrite bugs:

- Disk accounting keys use the version field of bkey so that journal
replay can tell which updates have been applied to the btree.

This is set in the transaction commit path, after we've gotten our
journal reservation (and our time ordering), but the
BCH_TRANS_COMMIT_skip_accounting_apply flag that journal replay
uses was incorrectly skipping this for new updates generated prior
to journal replay.

This fixes the underlying cause of an assertion pop in
disk_accounting_read.

- A couple of fixes for disk accounting + device removal.

Checking if acocunting replicas entries were marked in the
superblock was being done at the wrong point, when deltas in the
journal could still zero them out, and then additionally we'd try
to add a missing replicas entry to the superblock without checking
if it referred to an invalid (removed) device.

A whole slew of repair fixes:

- fix infinite loop in propagate_key_to_snapshot_leaves(), this fixes
an infinite loop when repairing a filesystem with many snapshots

- fix incorrect transaction restart handling leading to occasional
"fsck counted ..." warnings

- fix warning in __bch2_fsck_err() for bkey fsck errors

- check_inode() in fsck now correctly checks if the filesystem was
clean

- there shouldn't be pending logged ops if the fs was clean, we now
check for this

- remove_backpointer() doesn't remove a dirent that doesn't actually
point to the inode

- many more fsck errors are AUTOFIX"

* tag 'bcachefs-2024-09-28' of git://evilpiepirate.org/bcachefs: (35 commits)
bcachefs: check_subvol_path() now prints subvol root inode
bcachefs: remove_backpointer() now checks if dirent points to inode
bcachefs: dirent_points_to_inode() now warns on mismatch
bcachefs: Fix lost wake up
bcachefs: Check for logged ops when clean
bcachefs: BCH_FS_clean_recovery
bcachefs: Convert disk accounting BUG_ON() to WARN_ON()
bcachefs: Fix BCH_TRANS_COMMIT_skip_accounting_apply
bcachefs: Check for accounting keys with bversion=0
bcachefs: rename version -> bversion
bcachefs: Don't delete unlinked inodes before logged op resume
bcachefs: Fix BCH_SB_ERRS() so we can reorder
bcachefs: Fix fsck warnings from bkey validation
bcachefs: Move transaction commit path validation to as late as possible
bcachefs: Fix disk accounting attempting to mark invalid replicas entry
bcachefs: Fix unlocked access to c->disk_sb.sb in bch2_replicas_entry_validate()
bcachefs: Fix accounting read + device removal
bcachefs: bch_accounting_mode
bcachefs: fix transaction restart handling in check_extents(), check_dirents()
bcachefs: kill inode_walker_entry.seen_this_pos
...

Linus Torvalds 2 years ago 9f9a5347 d37421e6

+470 -310

39 changed files

expand all

bcachefs

backpointers.c

bcachefs.h

bcachefs_format.h

bkey.h

bkey_methods.c

bkey_methods.h

btree_gc.c

btree_io.c

btree_node_scan.c

btree_trans_commit.c

btree_update.h

data_update.c

disk_accounting.c

disk_accounting.h

disk_accounting_types.h

error.c

error.h

fsck.c

inode.c

inode.h

io_read.c

io_write.c

journal_io.c

logged_ops.c

recovery.c

recovery_passes_types.h

reflink.c

replicas.c

replicas.h

sb-clean.c

sb-downgrade.c

sb-errors.c

sb-errors.h

sb-errors_format.h

six.c

snapshot.c

subvolume.c

super-io.c

tests.c

+1 -1

fs/bcachefs/backpointers.c

··· 501 501 prt_printf(&buf, "\n %s ", bch2_btree_id_str(o_btree)); 502 502 bch2_bkey_val_to_text(&buf, c, extent2); 503 503 504 - struct nonce nonce = extent_nonce(extent.k->version, p.crc); 504 + struct nonce nonce = extent_nonce(extent.k->bversion, p.crc); 505 505 struct bch_csum csum = bch2_checksum(c, p.crc.csum_type, nonce, data_buf, bytes); 506 506 if (fsck_err_on(bch2_crc_cmp(csum, p.crc.csum), 507 507 trans, dup_backpointer_to_bad_csum_extent,

+2 -1

fs/bcachefs/bcachefs.h

··· 594 594 #define BCH_FS_FLAGS() \ 595 595 x(new_fs) \ 596 596 x(started) \ 597 + x(clean_recovery) \ 597 598 x(btree_running) \ 598 599 x(accounting_replay_done) \ 599 600 x(may_go_rw) \ ··· 777 776 unsigned nsec_per_time_unit; 778 777 u64 features; 779 778 u64 compat; 780 - unsigned long errors_silent[BITS_TO_LONGS(BCH_SB_ERR_MAX)]; 779 + unsigned long errors_silent[BITS_TO_LONGS(BCH_FSCK_ERR_MAX)]; 781 780 u64 btrees_lost_data; 782 781 } sb; 783 782

+3 -3

fs/bcachefs/bcachefs_format.h

··· 217 217 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 218 218 __u8 pad[1]; 219 219 220 - struct bversion version; 220 + struct bversion bversion; 221 221 __u32 size; /* extent size, in sectors */ 222 222 struct bpos p; 223 223 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ ··· 328 328 bkey_format_field(OFFSET, p.offset), \ 329 329 bkey_format_field(SNAPSHOT, p.snapshot), \ 330 330 bkey_format_field(SIZE, size), \ 331 - bkey_format_field(VERSION_HI, version.hi), \ 332 - bkey_format_field(VERSION_LO, version.lo), \ 331 + bkey_format_field(VERSION_HI, bversion.hi), \ 332 + bkey_format_field(VERSION_LO, bversion.lo), \ 333 333 }, \ 334 334 }) 335 335

+4 -4

fs/bcachefs/bkey.h

··· 214 214 #define ZERO_VERSION ((struct bversion) { .hi = 0, .lo = 0 }) 215 215 #define MAX_VERSION ((struct bversion) { .hi = ~0, .lo = ~0ULL }) 216 216 217 - static __always_inline int bversion_zero(struct bversion v) 217 + static __always_inline bool bversion_zero(struct bversion v) 218 218 { 219 - return !bversion_cmp(v, ZERO_VERSION); 219 + return bversion_cmp(v, ZERO_VERSION) == 0; 220 220 } 221 221 222 222 #ifdef CONFIG_BCACHEFS_DEBUG ··· 554 554 x(BKEY_FIELD_OFFSET, p.offset) \ 555 555 x(BKEY_FIELD_SNAPSHOT, p.snapshot) \ 556 556 x(BKEY_FIELD_SIZE, size) \ 557 - x(BKEY_FIELD_VERSION_HI, version.hi) \ 558 - x(BKEY_FIELD_VERSION_LO, version.lo) 557 + x(BKEY_FIELD_VERSION_HI, bversion.hi) \ 558 + x(BKEY_FIELD_VERSION_LO, bversion.lo) 559 559 560 560 struct bkey_format_state { 561 561 u64 field_min[BKEY_NR_FIELDS];

+1 -1

fs/bcachefs/bkey_methods.c

··· 289 289 290 290 bch2_bpos_to_text(out, k->p); 291 291 292 - prt_printf(out, " len %u ver %llu", k->size, k->version.lo); 292 + prt_printf(out, " len %u ver %llu", k->size, k->bversion.lo); 293 293 } else { 294 294 prt_printf(out, "(null)"); 295 295 }

+1 -1

fs/bcachefs/bkey_methods.h

··· 70 70 static inline bool bch2_bkey_maybe_mergable(const struct bkey *l, const struct bkey *r) 71 71 { 72 72 return l->type == r->type && 73 - !bversion_cmp(l->version, r->version) && 73 + !bversion_cmp(l->bversion, r->bversion) && 74 74 bpos_eq(l->p, bkey_start_pos(r)); 75 75 } 76 76

+5 -3

fs/bcachefs/btree_gc.c

··· 513 513 struct bpos pulled_from_scan = POS_MIN; 514 514 int ret = 0; 515 515 516 + bch2_trans_srcu_unlock(trans); 517 + 516 518 for (unsigned i = 0; i < btree_id_nr_alive(c) && !ret; i++) { 517 519 struct btree_root *r = bch2_btree_id_root(c, i); 518 520 bool reconstructed_root = false; ··· 601 599 602 600 if (initial) { 603 601 BUG_ON(bch2_journal_seq_verify && 604 - k.k->version.lo > atomic64_read(&c->journal.seq)); 602 + k.k->bversion.lo > atomic64_read(&c->journal.seq)); 605 603 606 604 if (fsck_err_on(btree_id != BTREE_ID_accounting && 607 - k.k->version.lo > atomic64_read(&c->key_version), 605 + k.k->bversion.lo > atomic64_read(&c->key_version), 608 606 trans, bkey_version_in_future, 609 607 "key version number higher than recorded %llu\n %s", 610 608 atomic64_read(&c->key_version), 611 609 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 612 - atomic64_set(&c->key_version, k.k->version.lo); 610 + atomic64_set(&c->key_version, k.k->bversion.lo); 613 611 } 614 612 615 613 if (mustfix_fsck_err_on(level && !bch2_dev_btree_bitmap_marked(c, k),

+5 -1

fs/bcachefs/btree_io.c

··· 1195 1195 set_btree_bset(b, b->set, &b->data->keys); 1196 1196 1197 1197 b->nr = bch2_key_sort_fix_overlapping(c, &sorted->keys, iter); 1198 + memset((uint8_t *)(sorted + 1) + b->nr.live_u64s * sizeof(u64), 0, 1199 + btree_buf_bytes(b) - 1200 + sizeof(struct btree_node) - 1201 + b->nr.live_u64s * sizeof(u64)); 1198 1202 1199 1203 u64s = le16_to_cpu(sorted->keys.u64s); 1200 1204 *sorted = *b->data; ··· 1223 1219 ret = bch2_bkey_val_validate(c, u.s_c, READ); 1224 1220 if (ret == -BCH_ERR_fsck_delete_bkey || 1225 1221 (bch2_inject_invalid_keys && 1226 - !bversion_cmp(u.k->version, MAX_VERSION))) { 1222 + !bversion_cmp(u.k->bversion, MAX_VERSION))) { 1227 1223 btree_keys_account_key_drop(&b->nr, 0, k); 1228 1224 1229 1225 i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);

+1 -1

fs/bcachefs/btree_node_scan.c

··· 275 275 w->ca = ca; 276 276 277 277 t = kthread_run(read_btree_nodes_worker, w, "read_btree_nodes/%s", ca->name); 278 - ret = IS_ERR_OR_NULL(t); 278 + ret = PTR_ERR_OR_ZERO(t); 279 279 if (ret) { 280 280 percpu_ref_put(&ca->io_ref); 281 281 closure_put(&cl);

+56 -52

fs/bcachefs/btree_trans_commit.c

··· 684 684 !(flags & BCH_TRANS_COMMIT_no_journal_res)) { 685 685 if (bch2_journal_seq_verify) 686 686 trans_for_each_update(trans, i) 687 - i->k->k.version.lo = trans->journal_res.seq; 687 + i->k->k.bversion.lo = trans->journal_res.seq; 688 688 else if (bch2_inject_invalid_keys) 689 689 trans_for_each_update(trans, i) 690 - i->k->k.version = MAX_VERSION; 690 + i->k->k.bversion = MAX_VERSION; 691 691 } 692 692 693 693 h = trans->hooks; ··· 700 700 701 701 struct jset_entry *entry = trans->journal_entries; 702 702 703 - if (likely(!(flags & BCH_TRANS_COMMIT_skip_accounting_apply))) { 704 - percpu_down_read(&c->mark_lock); 703 + percpu_down_read(&c->mark_lock); 705 704 706 - for (entry = trans->journal_entries; 707 - entry != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s); 708 - entry = vstruct_next(entry)) 709 - if (jset_entry_is_key(entry) && entry->start->k.type == KEY_TYPE_accounting) { 710 - struct bkey_i_accounting *a = bkey_i_to_accounting(entry->start); 705 + for (entry = trans->journal_entries; 706 + entry != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s); 707 + entry = vstruct_next(entry)) 708 + if (entry->type == BCH_JSET_ENTRY_write_buffer_keys && 709 + entry->start->k.type == KEY_TYPE_accounting) { 710 + BUG_ON(!trans->journal_res.ref); 711 711 712 - a->k.version = journal_pos_to_bversion(&trans->journal_res, 713 - (u64 *) entry - (u64 *) trans->journal_entries); 714 - BUG_ON(bversion_zero(a->k.version)); 715 - ret = bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), false, false); 712 + struct bkey_i_accounting *a = bkey_i_to_accounting(entry->start); 713 + 714 + a->k.bversion = journal_pos_to_bversion(&trans->journal_res, 715 + (u64 *) entry - (u64 *) trans->journal_entries); 716 + BUG_ON(bversion_zero(a->k.bversion)); 717 + 718 + if (likely(!(flags & BCH_TRANS_COMMIT_skip_accounting_apply))) { 719 + ret = bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), BCH_ACCOUNTING_normal); 716 720 if (ret) 717 721 goto revert_fs_usage; 718 722 } 719 - percpu_up_read(&c->mark_lock); 723 + } 724 + percpu_up_read(&c->mark_lock); 720 725 721 - /* XXX: we only want to run this if deltas are nonzero */ 722 - bch2_trans_account_disk_usage_change(trans); 723 - } 726 + /* XXX: we only want to run this if deltas are nonzero */ 727 + bch2_trans_account_disk_usage_change(trans); 724 728 725 729 trans_for_each_update(trans, i) 726 730 if (btree_node_type_has_atomic_triggers(i->bkey_type)) { ··· 737 733 ret = bch2_trans_commit_run_gc_triggers(trans); 738 734 if (ret) 739 735 goto fatal_err; 736 + } 737 + 738 + trans_for_each_update(trans, i) { 739 + enum bch_validate_flags invalid_flags = 0; 740 + 741 + if (!(flags & BCH_TRANS_COMMIT_no_journal_res)) 742 + invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit; 743 + 744 + ret = bch2_bkey_validate(c, bkey_i_to_s_c(i->k), 745 + i->bkey_type, invalid_flags); 746 + if (unlikely(ret)){ 747 + bch2_trans_inconsistent(trans, "invalid bkey on insert from %s -> %ps\n", 748 + trans->fn, (void *) i->ip_allocated); 749 + goto fatal_err; 750 + } 751 + btree_insert_entry_checks(trans, i); 752 + } 753 + 754 + for (struct jset_entry *i = trans->journal_entries; 755 + i != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s); 756 + i = vstruct_next(i)) { 757 + enum bch_validate_flags invalid_flags = 0; 758 + 759 + if (!(flags & BCH_TRANS_COMMIT_no_journal_res)) 760 + invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit; 761 + 762 + ret = bch2_journal_entry_validate(c, NULL, i, 763 + bcachefs_metadata_version_current, 764 + CPU_BIG_ENDIAN, invalid_flags); 765 + if (unlikely(ret)) { 766 + bch2_trans_inconsistent(trans, "invalid journal entry on insert from %s\n", 767 + trans->fn); 768 + goto fatal_err; 769 + } 740 770 } 741 771 742 772 if (likely(!(flags & BCH_TRANS_COMMIT_no_journal_res))) { ··· 836 798 struct bkey_s_accounting a = bkey_i_to_s_accounting(entry2->start); 837 799 838 800 bch2_accounting_neg(a); 839 - bch2_accounting_mem_mod_locked(trans, a.c, false, false); 801 + bch2_accounting_mem_mod_locked(trans, a.c, BCH_ACCOUNTING_normal); 840 802 bch2_accounting_neg(a); 841 803 } 842 804 percpu_up_read(&c->mark_lock); ··· 1056 1018 ret = bch2_trans_commit_run_triggers(trans); 1057 1019 if (ret) 1058 1020 goto out_reset; 1059 - 1060 - trans_for_each_update(trans, i) { 1061 - enum bch_validate_flags invalid_flags = 0; 1062 - 1063 - if (!(flags & BCH_TRANS_COMMIT_no_journal_res)) 1064 - invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit; 1065 - 1066 - ret = bch2_bkey_validate(c, bkey_i_to_s_c(i->k), 1067 - i->bkey_type, invalid_flags); 1068 - if (unlikely(ret)){ 1069 - bch2_trans_inconsistent(trans, "invalid bkey on insert from %s -> %ps\n", 1070 - trans->fn, (void *) i->ip_allocated); 1071 - return ret; 1072 - } 1073 - btree_insert_entry_checks(trans, i); 1074 - } 1075 - 1076 - for (struct jset_entry *i = trans->journal_entries; 1077 - i != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s); 1078 - i = vstruct_next(i)) { 1079 - enum bch_validate_flags invalid_flags = 0; 1080 - 1081 - if (!(flags & BCH_TRANS_COMMIT_no_journal_res)) 1082 - invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit; 1083 - 1084 - ret = bch2_journal_entry_validate(c, NULL, i, 1085 - bcachefs_metadata_version_current, 1086 - CPU_BIG_ENDIAN, invalid_flags); 1087 - if (unlikely(ret)) { 1088 - bch2_trans_inconsistent(trans, "invalid journal entry on insert from %s\n", 1089 - trans->fn); 1090 - return ret; 1091 - } 1092 - } 1093 1021 1094 1022 if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags))) { 1095 1023 ret = do_bch2_trans_commit_to_journal_replay(trans);

+2 -1

fs/bcachefs/btree_update.h

··· 220 220 if (type && k.k->type != type) 221 221 return ERR_PTR(-ENOENT); 222 222 223 - mut = bch2_trans_kmalloc_nomemzero(trans, bytes); 223 + /* extra padding for varint_decode_fast... */ 224 + mut = bch2_trans_kmalloc_nomemzero(trans, bytes + 8); 224 225 if (!IS_ERR(mut)) { 225 226 bkey_reassemble(mut, k); 226 227

+1 -1

fs/bcachefs/data_update.c

··· 639 639 640 640 bch2_write_op_init(&m->op, c, io_opts); 641 641 m->op.pos = bkey_start_pos(k.k); 642 - m->op.version = k.k->version; 642 + m->op.version = k.k->bversion; 643 643 m->op.target = data_opts.target; 644 644 m->op.write_point = wp; 645 645 m->op.nr_replicas = 0;

+58 -24

fs/bcachefs/disk_accounting.c

··· 134 134 void *end = &acc_k + 1; 135 135 int ret = 0; 136 136 137 + bkey_fsck_err_on(bversion_zero(k.k->bversion), 138 + c, accounting_key_version_0, 139 + "accounting key with version=0"); 140 + 137 141 switch (acc_k.type) { 138 142 case BCH_DISK_ACCOUNTING_nr_inodes: 139 143 end = field_end(acc_k, nr_inodes); ··· 295 291 296 292 struct accounting_mem_entry n = { 297 293 .pos = a.k->p, 298 - .version = a.k->version, 294 + .bversion = a.k->bversion, 299 295 .nr_counters = bch2_accounting_counters(a.k), 300 296 .v[0] = __alloc_percpu_gfp(n.nr_counters * sizeof(u64), 301 297 sizeof(u64), GFP_KERNEL), ··· 323 319 return -BCH_ERR_ENOMEM_disk_accounting; 324 320 } 325 321 326 - int bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accounting a, bool gc) 322 + int bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accounting a, 323 + enum bch_accounting_mode mode) 327 324 { 328 325 struct bch_replicas_padded r; 329 326 330 - if (accounting_to_replicas(&r.e, a.k->p) && 327 + if (mode != BCH_ACCOUNTING_read && 328 + accounting_to_replicas(&r.e, a.k->p) && 331 329 !bch2_replicas_marked_locked(c, &r.e)) 332 330 return -BCH_ERR_btree_insert_need_mark_replicas; 333 331 ··· 572 566 struct { __BKEY_PADDED(k, BCH_ACCOUNTING_MAX_COUNTERS); } k_i; 573 567 574 568 accounting_key_init(&k_i.k, &acc_k, src_v, nr); 575 - bch2_accounting_mem_mod_locked(trans, bkey_i_to_s_c_accounting(&k_i.k), false, false); 569 + bch2_accounting_mem_mod_locked(trans, 570 + bkey_i_to_s_c_accounting(&k_i.k), 571 + BCH_ACCOUNTING_normal); 576 572 577 573 preempt_disable(); 578 574 struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage); ··· 597 589 static int accounting_read_key(struct btree_trans *trans, struct bkey_s_c k) 598 590 { 599 591 struct bch_fs *c = trans->c; 600 - struct printbuf buf = PRINTBUF; 601 592 602 593 if (k.k->type != KEY_TYPE_accounting) 603 594 return 0; 604 595 605 596 percpu_down_read(&c->mark_lock); 606 - int ret = bch2_accounting_mem_mod_locked(trans, bkey_s_c_to_accounting(k), false, true); 597 + int ret = bch2_accounting_mem_mod_locked(trans, bkey_s_c_to_accounting(k), 598 + BCH_ACCOUNTING_read); 607 599 percpu_up_read(&c->mark_lock); 608 - 609 - if (bch2_accounting_key_is_zero(bkey_s_c_to_accounting(k)) && 610 - ret == -BCH_ERR_btree_insert_need_mark_replicas) 611 - ret = 0; 612 - 613 - struct disk_accounting_pos acc; 614 - bpos_to_disk_accounting_pos(&acc, k.k->p); 615 - 616 - if (fsck_err_on(ret == -BCH_ERR_btree_insert_need_mark_replicas, 617 - trans, accounting_replicas_not_marked, 618 - "accounting not marked in superblock replicas\n %s", 619 - (bch2_accounting_key_to_text(&buf, &acc), 620 - buf.buf))) 621 - ret = bch2_accounting_update_sb_one(c, k.k->p); 622 - fsck_err: 623 - printbuf_exit(&buf); 624 600 return ret; 625 601 } 626 602 ··· 616 624 { 617 625 struct bch_accounting_mem *acc = &c->accounting; 618 626 struct btree_trans *trans = bch2_trans_get(c); 627 + struct printbuf buf = PRINTBUF; 619 628 620 629 int ret = for_each_btree_key(trans, iter, 621 630 BTREE_ID_accounting, POS_MIN, ··· 640 647 accounting_pos_cmp, &k.k->p); 641 648 642 649 bool applied = idx < acc->k.nr && 643 - bversion_cmp(acc->k.data[idx].version, k.k->version) >= 0; 650 + bversion_cmp(acc->k.data[idx].bversion, k.k->bversion) >= 0; 644 651 645 652 if (applied) 646 653 continue; ··· 648 655 if (i + 1 < &darray_top(*keys) && 649 656 i[1].k->k.type == KEY_TYPE_accounting && 650 657 !journal_key_cmp(i, i + 1)) { 651 - BUG_ON(bversion_cmp(i[0].k->k.version, i[1].k->k.version) >= 0); 658 + WARN_ON(bversion_cmp(i[0].k->k.bversion, i[1].k->k.bversion) >= 0); 652 659 653 660 i[1].journal_seq = i[0].journal_seq; 654 661 ··· 667 674 keys->gap = keys->nr = dst - keys->data; 668 675 669 676 percpu_down_read(&c->mark_lock); 677 + for (unsigned i = 0; i < acc->k.nr; i++) { 678 + u64 v[BCH_ACCOUNTING_MAX_COUNTERS]; 679 + bch2_accounting_mem_read_counters(acc, i, v, ARRAY_SIZE(v), false); 680 + 681 + if (bch2_is_zero(v, sizeof(v[0]) * acc->k.data[i].nr_counters)) 682 + continue; 683 + 684 + struct bch_replicas_padded r; 685 + if (!accounting_to_replicas(&r.e, acc->k.data[i].pos)) 686 + continue; 687 + 688 + /* 689 + * If the replicas entry is invalid it'll get cleaned up by 690 + * check_allocations: 691 + */ 692 + if (bch2_replicas_entry_validate(&r.e, c, &buf)) 693 + continue; 694 + 695 + struct disk_accounting_pos k; 696 + bpos_to_disk_accounting_pos(&k, acc->k.data[i].pos); 697 + 698 + if (fsck_err_on(!bch2_replicas_marked_locked(c, &r.e), 699 + trans, accounting_replicas_not_marked, 700 + "accounting not marked in superblock replicas\n %s", 701 + (printbuf_reset(&buf), 702 + bch2_accounting_key_to_text(&buf, &k), 703 + buf.buf))) { 704 + /* 705 + * We're not RW yet and still single threaded, dropping 706 + * and retaking lock is ok: 707 + */ 708 + percpu_up_read(&c->mark_lock); 709 + ret = bch2_mark_replicas(c, &r.e); 710 + if (ret) 711 + goto fsck_err; 712 + percpu_down_read(&c->mark_lock); 713 + } 714 + } 715 + 670 716 preempt_disable(); 671 717 struct bch_fs_usage_base *usage = this_cpu_ptr(c->usage); 672 718 ··· 741 709 } 742 710 } 743 711 preempt_enable(); 712 + fsck_err: 744 713 percpu_up_read(&c->mark_lock); 745 714 err: 715 + printbuf_exit(&buf); 746 716 bch2_trans_put(trans); 747 717 bch_err_fn(c, ret); 748 718 return ret;

+19 -10

fs/bcachefs/disk_accounting.h

··· 36 36 37 37 for (unsigned i = 0; i < bch2_accounting_counters(&dst->k); i++) 38 38 dst->v.d[i] += src.v->d[i]; 39 - if (bversion_cmp(dst->k.version, src.k->version) < 0) 40 - dst->k.version = src.k->version; 39 + if (bversion_cmp(dst->k.bversion, src.k->bversion) < 0) 40 + dst->k.bversion = src.k->bversion; 41 41 } 42 42 43 43 static inline void fs_usage_data_type_to_base(struct bch_fs_usage_base *fs_usage, ··· 103 103 return bpos_cmp(*l, *r); 104 104 } 105 105 106 - int bch2_accounting_mem_insert(struct bch_fs *, struct bkey_s_c_accounting, bool); 106 + enum bch_accounting_mode { 107 + BCH_ACCOUNTING_normal, 108 + BCH_ACCOUNTING_gc, 109 + BCH_ACCOUNTING_read, 110 + }; 111 + 112 + int bch2_accounting_mem_insert(struct bch_fs *, struct bkey_s_c_accounting, enum bch_accounting_mode); 107 113 void bch2_accounting_mem_gc(struct bch_fs *); 108 114 109 115 /* 110 116 * Update in memory counters so they match the btree update we're doing; called 111 117 * from transaction commit path 112 118 */ 113 - static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc, bool read) 119 + static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, 120 + struct bkey_s_c_accounting a, 121 + enum bch_accounting_mode mode) 114 122 { 115 123 struct bch_fs *c = trans->c; 124 + struct bch_accounting_mem *acc = &c->accounting; 116 125 struct disk_accounting_pos acc_k; 117 126 bpos_to_disk_accounting_pos(&acc_k, a.k->p); 127 + bool gc = mode == BCH_ACCOUNTING_gc; 128 + 129 + EBUG_ON(gc && !acc->gc_running); 118 130 119 131 if (acc_k.type == BCH_DISK_ACCOUNTING_inum) 120 132 return 0; 121 133 122 - if (!gc && !read) { 134 + if (mode == BCH_ACCOUNTING_normal) { 123 135 switch (acc_k.type) { 124 136 case BCH_DISK_ACCOUNTING_persistent_reserved: 125 137 trans->fs_usage_delta.reserved += acc_k.persistent_reserved.nr_replicas * a.v->d[0]; ··· 152 140 } 153 141 } 154 142 155 - struct bch_accounting_mem *acc = &c->accounting; 156 143 unsigned idx; 157 - 158 - EBUG_ON(gc && !acc->gc_running); 159 144 160 145 while ((idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), 161 146 accounting_pos_cmp, &a.k->p)) >= acc->k.nr) { 162 - int ret = bch2_accounting_mem_insert(c, a, gc); 147 + int ret = bch2_accounting_mem_insert(c, a, mode); 163 148 if (ret) 164 149 return ret; 165 150 } ··· 173 164 static inline int bch2_accounting_mem_add(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc) 174 165 { 175 166 percpu_down_read(&trans->c->mark_lock); 176 - int ret = bch2_accounting_mem_mod_locked(trans, a, gc, false); 167 + int ret = bch2_accounting_mem_mod_locked(trans, a, gc ? BCH_ACCOUNTING_gc : BCH_ACCOUNTING_normal); 177 168 percpu_up_read(&trans->c->mark_lock); 178 169 return ret; 179 170 }

+1 -1

fs/bcachefs/disk_accounting_types.h

··· 6 6 7 7 struct accounting_mem_entry { 8 8 struct bpos pos; 9 - struct bversion version; 9 + struct bversion bversion; 10 10 unsigned nr_counters; 11 11 u64 __percpu *v[2]; 12 12 };

+13 -1

fs/bcachefs/error.c

··· 239 239 if (!c) 240 240 c = trans->c; 241 241 242 - WARN_ON(!trans && bch2_current_has_btree_trans(c)); 242 + /* 243 + * Ugly: if there's a transaction in the current task it has to be 244 + * passed in to unlock if we prompt for user input. 245 + * 246 + * But, plumbing a transaction and transaction restarts into 247 + * bkey_validate() is problematic. 248 + * 249 + * So: 250 + * - make all bkey errors AUTOFIX, they're simple anyways (we just 251 + * delete the key) 252 + * - and we don't need to warn if we're not prompting 253 + */ 254 + WARN_ON(!(flags & FSCK_AUTOFIX) && !trans && bch2_current_has_btree_trans(c)); 243 255 244 256 if ((flags & FSCK_CAN_FIX) && 245 257 test_bit(err, c->sb.errors_silent))

+1 -1

fs/bcachefs/error.h

··· 184 184 ret = -BCH_ERR_fsck_delete_bkey; \ 185 185 goto fsck_err; \ 186 186 } \ 187 - int _ret = __bch2_bkey_fsck_err(c, k, FSCK_CAN_FIX, \ 187 + int _ret = __bch2_bkey_fsck_err(c, k, FSCK_CAN_FIX|FSCK_AUTOFIX,\ 188 188 BCH_FSCK_ERR_##_err_type, \ 189 189 _err_msg, ##__VA_ARGS__); \ 190 190 if (_ret != -BCH_ERR_fsck_fix && \

+173 -124

fs/bcachefs/fsck.c

··· 21 21 #include <linux/bsearch.h> 22 22 #include <linux/dcache.h> /* struct qstr */ 23 23 24 + static bool inode_points_to_dirent(struct bch_inode_unpacked *inode, 25 + struct bkey_s_c_dirent d) 26 + { 27 + return inode->bi_dir == d.k->p.inode && 28 + inode->bi_dir_offset == d.k->p.offset; 29 + } 30 + 31 + static bool dirent_points_to_inode_nowarn(struct bkey_s_c_dirent d, 32 + struct bch_inode_unpacked *inode) 33 + { 34 + if (d.v->d_type == DT_SUBVOL 35 + ? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol 36 + : le64_to_cpu(d.v->d_inum) == inode->bi_inum) 37 + return 0; 38 + return -BCH_ERR_ENOENT_dirent_doesnt_match_inode; 39 + } 40 + 41 + static void dirent_inode_mismatch_msg(struct printbuf *out, 42 + struct bch_fs *c, 43 + struct bkey_s_c_dirent dirent, 44 + struct bch_inode_unpacked *inode) 45 + { 46 + prt_str(out, "inode points to dirent that does not point back:"); 47 + prt_newline(out); 48 + bch2_bkey_val_to_text(out, c, dirent.s_c); 49 + prt_newline(out); 50 + bch2_inode_unpacked_to_text(out, inode); 51 + } 52 + 53 + static int dirent_points_to_inode(struct bch_fs *c, 54 + struct bkey_s_c_dirent dirent, 55 + struct bch_inode_unpacked *inode) 56 + { 57 + int ret = dirent_points_to_inode_nowarn(dirent, inode); 58 + if (ret) { 59 + struct printbuf buf = PRINTBUF; 60 + dirent_inode_mismatch_msg(&buf, c, dirent, inode); 61 + bch_warn(c, "%s", buf.buf); 62 + printbuf_exit(&buf); 63 + } 64 + return ret; 65 + } 66 + 24 67 /* 25 68 * XXX: this is handling transaction restarts without returning 26 69 * -BCH_ERR_transaction_restart_nested, this is not how we do things anymore: ··· 389 346 static int remove_backpointer(struct btree_trans *trans, 390 347 struct bch_inode_unpacked *inode) 391 348 { 392 - struct btree_iter iter; 393 - struct bkey_s_c_dirent d; 394 - int ret; 349 + if (!inode->bi_dir) 350 + return 0; 395 351 396 - d = bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_dirents, 397 - POS(inode->bi_dir, inode->bi_dir_offset), 0, 352 + struct bch_fs *c = trans->c; 353 + struct btree_iter iter; 354 + struct bkey_s_c_dirent d = 355 + bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_dirents, 356 + SPOS(inode->bi_dir, inode->bi_dir_offset, inode->bi_snapshot), 0, 398 357 dirent); 399 - ret = bkey_err(d) ?: 358 + int ret = bkey_err(d) ?: 359 + dirent_points_to_inode(c, d, inode) ?: 400 360 __remove_dirent(trans, d.k->p); 401 361 bch2_trans_iter_exit(trans, &iter); 402 362 return ret; ··· 417 371 return ret; 418 372 419 373 ret = remove_backpointer(trans, &inode); 420 - bch_err_msg(c, ret, "removing dirent"); 374 + if (!bch2_err_matches(ret, ENOENT)) 375 + bch_err_msg(c, ret, "removing dirent"); 421 376 if (ret) 422 377 return ret; 423 378 ··· 673 626 struct inode_walker_entry { 674 627 struct bch_inode_unpacked inode; 675 628 u32 snapshot; 676 - bool seen_this_pos; 677 629 u64 count; 678 630 }; 679 631 680 632 struct inode_walker { 681 633 bool first_this_inode; 634 + bool have_inodes; 682 635 bool recalculate_sums; 683 636 struct bpos last_pos; 684 637 ··· 716 669 struct bkey_s_c k; 717 670 int ret; 718 671 672 + /* 673 + * We no longer have inodes for w->last_pos; clear this to avoid 674 + * screwing up check_i_sectors/check_subdir_count if we take a 675 + * transaction restart here: 676 + */ 677 + w->have_inodes = false; 719 678 w->recalculate_sums = false; 720 679 w->inodes.nr = 0; 721 680 ··· 739 686 return ret; 740 687 741 688 w->first_this_inode = true; 689 + w->have_inodes = true; 742 690 return 0; 743 691 } 744 692 ··· 794 740 int ret = get_inodes_all_snapshots(trans, w, k.k->p.inode); 795 741 if (ret) 796 742 return ERR_PTR(ret); 797 - } else if (bkey_cmp(w->last_pos, k.k->p)) { 798 - darray_for_each(w->inodes, i) 799 - i->seen_this_pos = false; 800 743 } 801 744 802 745 w->last_pos = k.k->p; ··· 947 896 return dirent_get_by_pos(trans, iter, SPOS(inode->bi_dir, inode->bi_dir_offset, *snapshot)); 948 897 } 949 898 950 - static bool inode_points_to_dirent(struct bch_inode_unpacked *inode, 951 - struct bkey_s_c_dirent d) 952 - { 953 - return inode->bi_dir == d.k->p.inode && 954 - inode->bi_dir_offset == d.k->p.offset; 955 - } 956 - 957 - static bool dirent_points_to_inode(struct bkey_s_c_dirent d, 958 - struct bch_inode_unpacked *inode) 959 - { 960 - return d.v->d_type == DT_SUBVOL 961 - ? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol 962 - : le64_to_cpu(d.v->d_inum) == inode->bi_inum; 963 - } 964 - 965 899 static int check_inode_deleted_list(struct btree_trans *trans, struct bpos p) 966 900 { 967 901 struct btree_iter iter; ··· 956 920 return ret; 957 921 } 958 922 959 - static int check_inode_dirent_inode(struct btree_trans *trans, struct bkey_s_c inode_k, 923 + static int check_inode_dirent_inode(struct btree_trans *trans, 960 924 struct bch_inode_unpacked *inode, 961 - u32 inode_snapshot, bool *write_inode) 925 + bool *write_inode) 962 926 { 963 927 struct bch_fs *c = trans->c; 964 928 struct printbuf buf = PRINTBUF; 965 929 930 + u32 inode_snapshot = inode->bi_snapshot; 966 931 struct btree_iter dirent_iter = {}; 967 932 struct bkey_s_c_dirent d = inode_get_dirent(trans, &dirent_iter, inode, &inode_snapshot); 968 933 int ret = bkey_err(d); ··· 973 936 if (fsck_err_on(ret, 974 937 trans, inode_points_to_missing_dirent, 975 938 "inode points to missing dirent\n%s", 976 - (bch2_bkey_val_to_text(&buf, c, inode_k), buf.buf)) || 977 - fsck_err_on(!ret && !dirent_points_to_inode(d, inode), 939 + (bch2_inode_unpacked_to_text(&buf, inode), buf.buf)) || 940 + fsck_err_on(!ret && dirent_points_to_inode_nowarn(d, inode), 978 941 trans, inode_points_to_wrong_dirent, 979 - "inode points to dirent that does not point back:\n%s", 980 - (bch2_bkey_val_to_text(&buf, c, inode_k), 981 - prt_newline(&buf), 982 - bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) { 942 + "%s", 943 + (printbuf_reset(&buf), 944 + dirent_inode_mismatch_msg(&buf, c, d, inode), 945 + buf.buf))) { 983 946 /* 984 947 * We just clear the backpointer fields for now. If we find a 985 948 * dirent that points to this inode in check_dirents(), we'll ··· 1000 963 return ret; 1001 964 } 1002 965 1003 - static bool bch2_inode_open(struct bch_fs *c, struct bpos p) 966 + static bool bch2_inode_is_open(struct bch_fs *c, struct bpos p) 1004 967 { 1005 968 subvol_inum inum = { 1006 969 .subvol = snapshot_t(c, p.snapshot)->subvol, ··· 1009 972 1010 973 /* snapshot tree corruption, can't safely delete */ 1011 974 if (!inum.subvol) { 1012 - bch_err_ratelimited(c, "%s(): snapshot %u has no subvol", __func__, p.snapshot); 975 + bch_warn_ratelimited(c, "%s(): snapshot %u has no subvol, unlinked but can't safely delete", __func__, p.snapshot); 1013 976 return true; 1014 977 } 1015 978 ··· 1082 1045 } 1083 1046 1084 1047 if (u.bi_flags & BCH_INODE_unlinked) { 1085 - ret = check_inode_deleted_list(trans, k.k->p); 1086 - if (ret < 0) 1087 - return ret; 1048 + if (!test_bit(BCH_FS_started, &c->flags)) { 1049 + /* 1050 + * If we're not in online fsck, don't delete unlinked 1051 + * inodes, just make sure they're on the deleted list. 1052 + * 1053 + * They might be referred to by a logged operation - 1054 + * i.e. we might have crashed in the middle of a 1055 + * truncate on an unlinked but open file - so we want to 1056 + * let the delete_dead_inodes kill it after resuming 1057 + * logged ops. 1058 + */ 1059 + ret = check_inode_deleted_list(trans, k.k->p); 1060 + if (ret < 0) 1061 + return ret; 1088 1062 1089 - fsck_err_on(!ret, 1090 - trans, unlinked_inode_not_on_deleted_list, 1091 - "inode %llu:%u unlinked, but not on deleted list", 1092 - u.bi_inum, k.k->p.snapshot); 1093 - ret = 0; 1063 + fsck_err_on(!ret, 1064 + trans, unlinked_inode_not_on_deleted_list, 1065 + "inode %llu:%u unlinked, but not on deleted list", 1066 + u.bi_inum, k.k->p.snapshot); 1067 + 1068 + ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_deleted_inodes, k.k->p, 1); 1069 + if (ret) 1070 + goto err; 1071 + } else { 1072 + if (fsck_err_on(bch2_inode_is_open(c, k.k->p), 1073 + trans, inode_unlinked_and_not_open, 1074 + "inode %llu%u unlinked and not open", 1075 + u.bi_inum, u.bi_snapshot)) { 1076 + ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot); 1077 + bch_err_msg(c, ret, "in fsck deleting inode"); 1078 + return ret; 1079 + } 1080 + } 1094 1081 } 1095 1082 1096 - if (u.bi_flags & BCH_INODE_unlinked && 1097 - !bch2_inode_open(c, k.k->p) && 1098 - (!c->sb.clean || 1099 - fsck_err(trans, inode_unlinked_but_clean, 1100 - "filesystem marked clean, but inode %llu unlinked", 1101 - u.bi_inum))) { 1102 - ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot); 1103 - bch_err_msg(c, ret, "in fsck deleting inode"); 1104 - return ret; 1105 - } 1106 - 1083 + /* i_size_dirty is vestigal, since we now have logged ops for truncate * */ 1107 1084 if (u.bi_flags & BCH_INODE_i_size_dirty && 1108 - (!c->sb.clean || 1085 + (!test_bit(BCH_FS_clean_recovery, &c->flags) || 1109 1086 fsck_err(trans, inode_i_size_dirty_but_clean, 1110 1087 "filesystem marked clean, but inode %llu has i_size dirty", 1111 1088 u.bi_inum))) { ··· 1148 1097 do_update = true; 1149 1098 } 1150 1099 1100 + /* i_sectors_dirty is vestigal, i_sectors is always updated transactionally */ 1151 1101 if (u.bi_flags & BCH_INODE_i_sectors_dirty && 1152 - (!c->sb.clean || 1102 + (!test_bit(BCH_FS_clean_recovery, &c->flags) || 1153 1103 fsck_err(trans, inode_i_sectors_dirty_but_clean, 1154 1104 "filesystem marked clean, but inode %llu has i_sectors dirty", 1155 1105 u.bi_inum))) { ··· 1178 1126 } 1179 1127 1180 1128 if (u.bi_dir || u.bi_dir_offset) { 1181 - ret = check_inode_dirent_inode(trans, k, &u, k.k->p.snapshot, &do_update); 1129 + ret = check_inode_dirent_inode(trans, &u, &do_update); 1182 1130 if (ret) 1183 1131 goto err; 1184 1132 } ··· 1607 1555 struct bkey_s_c k, 1608 1556 struct inode_walker *inode, 1609 1557 struct snapshots_seen *s, 1610 - struct extent_ends *extent_ends) 1558 + struct extent_ends *extent_ends, 1559 + struct disk_reservation *res) 1611 1560 { 1612 1561 struct bch_fs *c = trans->c; 1613 - struct inode_walker_entry *i; 1614 1562 struct printbuf buf = PRINTBUF; 1615 1563 int ret = 0; 1616 1564 ··· 1620 1568 goto out; 1621 1569 } 1622 1570 1623 - if (inode->last_pos.inode != k.k->p.inode) { 1571 + if (inode->last_pos.inode != k.k->p.inode && inode->have_inodes) { 1624 1572 ret = check_i_sectors(trans, inode); 1625 1573 if (ret) 1626 1574 goto err; ··· 1630 1578 if (ret) 1631 1579 goto err; 1632 1580 1633 - i = walk_inode(trans, inode, k); 1634 - ret = PTR_ERR_OR_ZERO(i); 1581 + struct inode_walker_entry *extent_i = walk_inode(trans, inode, k); 1582 + ret = PTR_ERR_OR_ZERO(extent_i); 1635 1583 if (ret) 1636 1584 goto err; 1637 1585 1638 - ret = check_key_has_inode(trans, iter, inode, i, k); 1586 + ret = check_key_has_inode(trans, iter, inode, extent_i, k); 1639 1587 if (ret) 1640 1588 goto err; 1641 1589 ··· 1644 1592 &inode->recalculate_sums); 1645 1593 if (ret) 1646 1594 goto err; 1647 - } 1648 1595 1649 - /* 1650 - * Check inodes in reverse order, from oldest snapshots to newest, 1651 - * starting from the inode that matches this extent's snapshot. If we 1652 - * didn't have one, iterate over all inodes: 1653 - */ 1654 - if (!i) 1655 - i = &darray_last(inode->inodes); 1596 + /* 1597 + * Check inodes in reverse order, from oldest snapshots to 1598 + * newest, starting from the inode that matches this extent's 1599 + * snapshot. If we didn't have one, iterate over all inodes: 1600 + */ 1601 + for (struct inode_walker_entry *i = extent_i ?: &darray_last(inode->inodes); 1602 + inode->inodes.data && i >= inode->inodes.data; 1603 + --i) { 1604 + if (i->snapshot > k.k->p.snapshot || 1605 + !key_visible_in_snapshot(c, s, i->snapshot, k.k->p.snapshot)) 1606 + continue; 1656 1607 1657 - for (; 1658 - inode->inodes.data && i >= inode->inodes.data; 1659 - --i) { 1660 - if (i->snapshot > k.k->p.snapshot || 1661 - !key_visible_in_snapshot(c, s, i->snapshot, k.k->p.snapshot)) 1662 - continue; 1663 - 1664 - if (k.k->type != KEY_TYPE_whiteout) { 1665 1608 if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_size_dirty) && 1666 1609 k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 && 1667 1610 !bkey_extent_is_reservation(k), ··· 1676 1629 goto err; 1677 1630 1678 1631 iter->k.type = KEY_TYPE_whiteout; 1632 + break; 1679 1633 } 1680 - 1681 - if (bkey_extent_is_allocation(k.k)) 1682 - i->count += k.k->size; 1683 1634 } 1635 + } 1684 1636 1685 - i->seen_this_pos = true; 1637 + ret = bch2_trans_commit(trans, res, NULL, BCH_TRANS_COMMIT_no_enospc); 1638 + if (ret) 1639 + goto err; 1640 + 1641 + if (bkey_extent_is_allocation(k.k)) { 1642 + for (struct inode_walker_entry *i = extent_i ?: &darray_last(inode->inodes); 1643 + inode->inodes.data && i >= inode->inodes.data; 1644 + --i) { 1645 + if (i->snapshot > k.k->p.snapshot || 1646 + !key_visible_in_snapshot(c, s, i->snapshot, k.k->p.snapshot)) 1647 + continue; 1648 + 1649 + i->count += k.k->size; 1650 + } 1686 1651 } 1687 1652 1688 1653 if (k.k->type != KEY_TYPE_whiteout) { ··· 1725 1666 extent_ends_init(&extent_ends); 1726 1667 1727 1668 int ret = bch2_trans_run(c, 1728 - for_each_btree_key_commit(trans, iter, BTREE_ID_extents, 1669 + for_each_btree_key(trans, iter, BTREE_ID_extents, 1729 1670 POS(BCACHEFS_ROOT_INO, 0), 1730 - BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, 1731 - &res, NULL, 1732 - BCH_TRANS_COMMIT_no_enospc, ({ 1671 + BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, ({ 1733 1672 bch2_disk_reservation_put(c, &res); 1734 - check_extent(trans, &iter, k, &w, &s, &extent_ends) ?: 1673 + check_extent(trans, &iter, k, &w, &s, &extent_ends, &res) ?: 1735 1674 check_extent_overbig(trans, &iter, k); 1736 1675 })) ?: 1737 1676 check_i_sectors_notnested(trans, &w)); ··· 1815 1758 { 1816 1759 struct bch_fs *c = trans->c; 1817 1760 struct printbuf buf = PRINTBUF; 1761 + struct btree_iter bp_iter = { NULL }; 1818 1762 int ret = 0; 1819 1763 1820 1764 if (inode_points_to_dirent(target, d)) ··· 1828 1770 prt_printf(&buf, "\n "), 1829 1771 bch2_inode_unpacked_to_text(&buf, target), 1830 1772 buf.buf))) 1831 - goto out_noiter; 1773 + goto err; 1832 1774 1833 1775 if (!target->bi_dir && 1834 1776 !target->bi_dir_offset) { ··· 1837 1779 return __bch2_fsck_write_inode(trans, target, target_snapshot); 1838 1780 } 1839 1781 1840 - struct btree_iter bp_iter = { NULL }; 1841 1782 struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter, 1842 1783 SPOS(target->bi_dir, target->bi_dir_offset, target_snapshot)); 1843 1784 ret = bkey_err(bp_dirent); ··· 1897 1840 err: 1898 1841 fsck_err: 1899 1842 bch2_trans_iter_exit(trans, &bp_iter); 1900 - out_noiter: 1901 1843 printbuf_exit(&buf); 1902 1844 bch_err_fn(c, ret); 1903 1845 return ret; ··· 2131 2075 if (k.k->type == KEY_TYPE_whiteout) 2132 2076 goto out; 2133 2077 2134 - if (dir->last_pos.inode != k.k->p.inode) { 2078 + if (dir->last_pos.inode != k.k->p.inode && dir->have_inodes) { 2135 2079 ret = check_subdir_count(trans, dir); 2136 2080 if (ret) 2137 2081 goto err; ··· 2193 2137 if (ret) 2194 2138 goto err; 2195 2139 } 2196 - 2197 - if (d.v->d_type == DT_DIR) 2198 - for_each_visible_inode(c, s, dir, d.k->p.snapshot, i) 2199 - i->count++; 2200 2140 } 2141 + 2142 + ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); 2143 + if (ret) 2144 + goto err; 2145 + 2146 + if (d.v->d_type == DT_DIR) 2147 + for_each_visible_inode(c, s, dir, d.k->p.snapshot, i) 2148 + i->count++; 2201 2149 out: 2202 2150 err: 2203 2151 fsck_err: ··· 2224 2164 snapshots_seen_init(&s); 2225 2165 2226 2166 int ret = bch2_trans_run(c, 2227 - for_each_btree_key_commit(trans, iter, BTREE_ID_dirents, 2167 + for_each_btree_key(trans, iter, BTREE_ID_dirents, 2228 2168 POS(BCACHEFS_ROOT_INO, 0), 2229 - BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, 2230 - k, 2231 - NULL, NULL, 2232 - BCH_TRANS_COMMIT_no_enospc, 2169 + BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, 2233 2170 check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s)) ?: 2234 2171 check_subdir_count_notnested(trans, &dir)); 2235 2172 ··· 2371 2314 return false; 2372 2315 } 2373 2316 2374 - /* 2375 - * We've checked that inode backpointers point to valid dirents; here, it's 2376 - * sufficient to check that the subvolume root has a dirent: 2377 - */ 2378 - static int subvol_has_dirent(struct btree_trans *trans, struct bkey_s_c_subvolume s) 2379 - { 2380 - struct bch_inode_unpacked inode; 2381 - int ret = bch2_inode_find_by_inum_trans(trans, 2382 - (subvol_inum) { s.k->p.offset, le64_to_cpu(s.v->inode) }, 2383 - &inode); 2384 - if (ret) 2385 - return ret; 2386 - 2387 - return inode.bi_dir != 0; 2388 - } 2389 - 2390 2317 static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k) 2391 2318 { 2392 2319 struct bch_fs *c = trans->c; ··· 2389 2348 2390 2349 struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k); 2391 2350 2392 - ret = subvol_has_dirent(trans, s); 2393 - if (ret < 0) 2351 + struct bch_inode_unpacked subvol_root; 2352 + ret = bch2_inode_find_by_inum_trans(trans, 2353 + (subvol_inum) { s.k->p.offset, le64_to_cpu(s.v->inode) }, 2354 + &subvol_root); 2355 + if (ret) 2394 2356 break; 2395 2357 2396 - if (fsck_err_on(!ret, 2358 + /* 2359 + * We've checked that inode backpointers point to valid dirents; 2360 + * here, it's sufficient to check that the subvolume root has a 2361 + * dirent: 2362 + */ 2363 + if (fsck_err_on(!subvol_root.bi_dir, 2397 2364 trans, subvol_unreachable, 2398 2365 "unreachable subvolume %s", 2399 2366 (bch2_bkey_val_to_text(&buf, c, s.s_c), 2367 + prt_newline(&buf), 2368 + bch2_inode_unpacked_to_text(&buf, &subvol_root), 2400 2369 buf.buf))) { 2401 2370 ret = reattach_subvol(trans, s); 2402 2371 break; ··· 2501 2450 if (ret && !bch2_err_matches(ret, ENOENT)) 2502 2451 break; 2503 2452 2504 - if (!ret && !dirent_points_to_inode(d, &inode)) { 2453 + if (!ret && (ret = dirent_points_to_inode(c, d, &inode))) 2505 2454 bch2_trans_iter_exit(trans, &dirent_iter); 2506 - ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode; 2507 - } 2508 2455 2509 2456 if (bch2_err_matches(ret, ENOENT)) { 2510 2457 ret = 0;

+7 -5

fs/bcachefs/inode.c

··· 320 320 int bch2_inode_unpack(struct bkey_s_c k, 321 321 struct bch_inode_unpacked *unpacked) 322 322 { 323 - if (likely(k.k->type == KEY_TYPE_inode_v3)) 324 - return bch2_inode_unpack_v3(k, unpacked); 325 - return bch2_inode_unpack_slowpath(k, unpacked); 323 + unpacked->bi_snapshot = k.k->p.snapshot; 324 + 325 + return likely(k.k->type == KEY_TYPE_inode_v3) 326 + ? bch2_inode_unpack_v3(k, unpacked) 327 + : bch2_inode_unpack_slowpath(k, unpacked); 326 328 } 327 329 328 330 int bch2_inode_peek_nowarn(struct btree_trans *trans, ··· 559 557 560 558 void bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked *inode) 561 559 { 562 - prt_printf(out, "inum: %llu ", inode->bi_inum); 560 + prt_printf(out, "inum: %llu:%u ", inode->bi_inum, inode->bi_snapshot); 563 561 __bch2_inode_unpacked_to_text(out, inode); 564 562 } 565 563 ··· 1113 1111 pos.offset, pos.snapshot)) 1114 1112 goto delete; 1115 1113 1116 - if (c->sb.clean && 1114 + if (test_bit(BCH_FS_clean_recovery, &c->flags) && 1117 1115 !fsck_err(trans, deleted_inode_but_clean, 1118 1116 "filesystem marked as clean but have deleted inode %llu:%u", 1119 1117 pos.offset, pos.snapshot)) {

fs/bcachefs/inode.h

··· 69 69 70 70 struct bch_inode_unpacked { 71 71 u64 bi_inum; 72 + u32 bi_snapshot; 72 73 u64 bi_journal_seq; 73 74 __le64 bi_hash_seed; 74 75 u64 bi_size;

+2 -2

fs/bcachefs/io_read.c

··· 517 517 if ((ret = bkey_err(k))) 518 518 goto out; 519 519 520 - if (bversion_cmp(k.k->version, rbio->version) || 520 + if (bversion_cmp(k.k->bversion, rbio->version) || 521 521 !bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset)) 522 522 goto out; 523 523 ··· 1031 1031 rbio->read_pos = read_pos; 1032 1032 rbio->data_btree = data_btree; 1033 1033 rbio->data_pos = data_pos; 1034 - rbio->version = k.k->version; 1034 + rbio->version = k.k->bversion; 1035 1035 rbio->promote = promote; 1036 1036 INIT_WORK(&rbio->work, NULL); 1037 1037

+2 -2

fs/bcachefs/io_write.c

··· 697 697 e = bkey_extent_init(op->insert_keys.top); 698 698 e->k.p = op->pos; 699 699 e->k.size = crc.uncompressed_size; 700 - e->k.version = version; 700 + e->k.bversion = version; 701 701 702 702 if (crc.csum_type || 703 703 crc.compression_type || ··· 1544 1544 1545 1545 id = bkey_inline_data_init(op->insert_keys.top); 1546 1546 id->k.p = op->pos; 1547 - id->k.version = op->version; 1547 + id->k.bversion = op->version; 1548 1548 id->k.size = sectors; 1549 1549 1550 1550 iter = bio->bi_iter;

+1 -1

fs/bcachefs/journal_io.c

··· 605 605 goto out; 606 606 } 607 607 608 - if (journal_entry_err_on(bch2_replicas_entry_validate(&u->r, c->disk_sb.sb, &err), 608 + if (journal_entry_err_on(bch2_replicas_entry_validate(&u->r, c, &err), 609 609 c, version, jset, entry, 610 610 journal_entry_data_usage_bad_size, 611 611 "invalid journal entry usage: %s", err.buf)) {

+11 -2

fs/bcachefs/logged_ops.c

··· 37 37 const struct bch_logged_op_fn *fn = logged_op_fn(k.k->type); 38 38 struct bkey_buf sk; 39 39 u32 restart_count = trans->restart_count; 40 + struct printbuf buf = PRINTBUF; 41 + int ret = 0; 42 + 43 + fsck_err_on(test_bit(BCH_FS_clean_recovery, &c->flags), 44 + trans, logged_op_but_clean, 45 + "filesystem marked as clean but have logged op\n%s", 46 + (bch2_bkey_val_to_text(&buf, c, k), 47 + buf.buf)); 40 48 41 49 if (!fn) 42 50 return 0; ··· 55 47 fn->resume(trans, sk.k); 56 48 57 49 bch2_bkey_buf_exit(&sk, c); 58 - 59 - return trans_was_restarted(trans, restart_count); 50 + fsck_err: 51 + printbuf_exit(&buf); 52 + return ret ?: trans_was_restarted(trans, restart_count); 60 53 } 61 54 62 55 int bch2_resume_logged_ops(struct bch_fs *c)

+6 -1

fs/bcachefs/recovery.c

··· 151 151 struct bkey_s_c old = bch2_btree_path_peek_slot(btree_iter_path(trans, &iter), &u); 152 152 153 153 /* Has this delta already been applied to the btree? */ 154 - if (bversion_cmp(old.k->version, k->k->k.version) >= 0) { 154 + if (bversion_cmp(old.k->bversion, k->k->k.bversion) >= 0) { 155 155 ret = 0; 156 156 goto out; 157 157 } ··· 717 717 718 718 if (c->opts.fsck) 719 719 set_bit(BCH_FS_fsck_running, &c->flags); 720 + if (c->sb.clean) 721 + set_bit(BCH_FS_clean_recovery, &c->flags); 720 722 721 723 ret = bch2_blacklist_table_initialize(c); 722 724 if (ret) { ··· 863 861 goto err; 864 862 865 863 clear_bit(BCH_FS_fsck_running, &c->flags); 864 + 865 + /* in case we don't run journal replay, i.e. norecovery mode */ 866 + set_bit(BCH_FS_accounting_replay_done, &c->flags); 866 867 867 868 /* fsync if we fixed errors */ 868 869 if (test_bit(BCH_FS_errors_fixed, &c->flags) &&

+1 -1

fs/bcachefs/recovery_passes_types.h

··· 50 50 x(check_directory_structure, 30, PASS_ONLINE|PASS_FSCK) \ 51 51 x(check_nlinks, 31, PASS_FSCK) \ 52 52 x(resume_logged_ops, 23, PASS_ALWAYS) \ 53 - x(delete_dead_inodes, 32, PASS_FSCK|PASS_UNCLEAN) \ 53 + x(delete_dead_inodes, 32, PASS_ALWAYS) \ 54 54 x(fix_reflink_p, 33, 0) \ 55 55 x(set_fs_needs_rebalance, 34, 0) \ 56 56

+1 -1

fs/bcachefs/reflink.c

··· 367 367 r_v->k.type = bkey_type_to_indirect(&orig->k); 368 368 r_v->k.p = reflink_iter.pos; 369 369 bch2_key_resize(&r_v->k, orig->k.size); 370 - r_v->k.version = orig->k.version; 370 + r_v->k.bversion = orig->k.bversion; 371 371 372 372 set_bkey_val_bytes(&r_v->k, sizeof(__le64) + bkey_val_bytes(&orig->k)); 373 373

+14 -4

fs/bcachefs/replicas.c

··· 66 66 prt_printf(out, "]"); 67 67 } 68 68 69 - int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *r, 70 - struct bch_sb *sb, 71 - struct printbuf *err) 69 + static int bch2_replicas_entry_validate_locked(struct bch_replicas_entry_v1 *r, 70 + struct bch_sb *sb, 71 + struct printbuf *err) 72 72 { 73 73 if (!r->nr_devs) { 74 74 prt_printf(err, "no devices in entry "); ··· 92 92 bad: 93 93 bch2_replicas_entry_to_text(err, r); 94 94 return -BCH_ERR_invalid_replicas_entry; 95 + } 96 + 97 + int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *r, 98 + struct bch_fs *c, 99 + struct printbuf *err) 100 + { 101 + mutex_lock(&c->sb_lock); 102 + int ret = bch2_replicas_entry_validate_locked(r, c->disk_sb.sb, err); 103 + mutex_unlock(&c->sb_lock); 104 + return ret; 95 105 } 96 106 97 107 void bch2_cpu_replicas_to_text(struct printbuf *out, ··· 686 676 struct bch_replicas_entry_v1 *e = 687 677 cpu_replicas_entry(cpu_r, i); 688 678 689 - int ret = bch2_replicas_entry_validate(e, sb, err); 679 + int ret = bch2_replicas_entry_validate_locked(e, sb, err); 690 680 if (ret) 691 681 return ret; 692 682

+1 -1

fs/bcachefs/replicas.h

··· 10 10 void bch2_replicas_entry_to_text(struct printbuf *, 11 11 struct bch_replicas_entry_v1 *); 12 12 int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *, 13 - struct bch_sb *, struct printbuf *); 13 + struct bch_fs *, struct printbuf *); 14 14 void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *); 15 15 16 16 static inline struct bch_replicas_entry_v1 *

fs/bcachefs/sb-clean.c

··· 167 167 168 168 ret = bch2_sb_clean_validate_late(c, clean, READ); 169 169 if (ret) { 170 + kfree(clean); 170 171 mutex_unlock(&c->sb_lock); 171 172 return ERR_PTR(ret); 172 173 }

+5 -4

fs/bcachefs/sb-downgrade.c

··· 312 312 if (!first) 313 313 prt_char(out, ','); 314 314 first = false; 315 - unsigned e = le16_to_cpu(i->errors[j]); 316 - prt_str(out, e < BCH_SB_ERR_MAX ? bch2_sb_error_strs[e] : "(unknown)"); 315 + bch2_sb_error_id_to_text(out, le16_to_cpu(i->errors[j])); 317 316 } 318 317 prt_newline(out); 319 318 } ··· 352 353 for (unsigned i = 0; i < src->nr_errors; i++) 353 354 dst->errors[i] = cpu_to_le16(src->errors[i]); 354 355 355 - downgrade_table_extra(c, &table); 356 + ret = downgrade_table_extra(c, &table); 357 + if (ret) 358 + goto out; 356 359 357 360 if (!dst->recovery_passes[0] && 358 361 !dst->recovery_passes[1] && ··· 400 399 401 400 for (unsigned j = 0; j < le16_to_cpu(i->nr_errors); j++) { 402 401 unsigned e = le16_to_cpu(i->errors[j]); 403 - if (e < BCH_SB_ERR_MAX) 402 + if (e < BCH_FSCK_ERR_MAX) 404 403 __set_bit(e, c->sb.errors_silent); 405 404 if (e < sizeof(ext->errors_silent) * 8) 406 405 __set_bit_le64(e, ext->errors_silent);

+3 -3

fs/bcachefs/sb-errors.c

··· 7 7 const char * const bch2_sb_error_strs[] = { 8 8 #define x(t, n, ...) [n] = #t, 9 9 BCH_SB_ERRS() 10 - NULL 10 + #undef x 11 11 }; 12 12 13 - static void bch2_sb_error_id_to_text(struct printbuf *out, enum bch_sb_error_id id) 13 + void bch2_sb_error_id_to_text(struct printbuf *out, enum bch_sb_error_id id) 14 14 { 15 - if (id < BCH_SB_ERR_MAX) 15 + if (id < BCH_FSCK_ERR_MAX) 16 16 prt_str(out, bch2_sb_error_strs[id]); 17 17 else 18 18 prt_printf(out, "(unknown error %u)", id);

fs/bcachefs/sb-errors.h

··· 6 6 7 7 extern const char * const bch2_sb_error_strs[]; 8 8 9 + void bch2_sb_error_id_to_text(struct printbuf *, enum bch_sb_error_id); 10 + 9 11 extern const struct bch_sb_field_ops bch_sb_field_ops_errors; 10 12 11 13 void bch2_sb_error_count(struct bch_fs *, enum bch_sb_error_id);

+21 -18

fs/bcachefs/sb-errors_format.h

··· 210 210 x(inode_snapshot_mismatch, 196, 0) \ 211 211 x(inode_unlinked_but_clean, 197, 0) \ 212 212 x(inode_unlinked_but_nlink_nonzero, 198, 0) \ 213 + x(inode_unlinked_and_not_open, 281, 0) \ 213 214 x(inode_checksum_type_invalid, 199, 0) \ 214 215 x(inode_compression_type_invalid, 200, 0) \ 215 216 x(inode_subvol_root_but_not_dir, 201, 0) \ 216 - x(inode_i_size_dirty_but_clean, 202, 0) \ 217 - x(inode_i_sectors_dirty_but_clean, 203, 0) \ 218 - x(inode_i_sectors_wrong, 204, 0) \ 219 - x(inode_dir_wrong_nlink, 205, 0) \ 220 - x(inode_dir_multiple_links, 206, 0) \ 221 - x(inode_multiple_links_but_nlink_0, 207, 0) \ 222 - x(inode_wrong_backpointer, 208, 0) \ 223 - x(inode_wrong_nlink, 209, 0) \ 224 - x(inode_unreachable, 210, 0) \ 225 - x(deleted_inode_but_clean, 211, 0) \ 226 - x(deleted_inode_missing, 212, 0) \ 227 - x(deleted_inode_is_dir, 213, 0) \ 228 - x(deleted_inode_not_unlinked, 214, 0) \ 217 + x(inode_i_size_dirty_but_clean, 202, FSCK_AUTOFIX) \ 218 + x(inode_i_sectors_dirty_but_clean, 203, FSCK_AUTOFIX) \ 219 + x(inode_i_sectors_wrong, 204, FSCK_AUTOFIX) \ 220 + x(inode_dir_wrong_nlink, 205, FSCK_AUTOFIX) \ 221 + x(inode_dir_multiple_links, 206, FSCK_AUTOFIX) \ 222 + x(inode_multiple_links_but_nlink_0, 207, FSCK_AUTOFIX) \ 223 + x(inode_wrong_backpointer, 208, FSCK_AUTOFIX) \ 224 + x(inode_wrong_nlink, 209, FSCK_AUTOFIX) \ 225 + x(inode_unreachable, 210, FSCK_AUTOFIX) \ 226 + x(deleted_inode_but_clean, 211, FSCK_AUTOFIX) \ 227 + x(deleted_inode_missing, 212, FSCK_AUTOFIX) \ 228 + x(deleted_inode_is_dir, 213, FSCK_AUTOFIX) \ 229 + x(deleted_inode_not_unlinked, 214, FSCK_AUTOFIX) \ 229 230 x(extent_overlapping, 215, 0) \ 230 231 x(key_in_missing_inode, 216, 0) \ 231 232 x(key_in_wrong_inode_type, 217, 0) \ ··· 256 255 x(dir_loop, 241, 0) \ 257 256 x(hash_table_key_duplicate, 242, 0) \ 258 257 x(hash_table_key_wrong_offset, 243, 0) \ 259 - x(unlinked_inode_not_on_deleted_list, 244, 0) \ 258 + x(unlinked_inode_not_on_deleted_list, 244, FSCK_AUTOFIX) \ 260 259 x(reflink_p_front_pad_bad, 245, 0) \ 261 260 x(journal_entry_dup_same_device, 246, 0) \ 262 261 x(inode_bi_subvol_missing, 247, 0) \ ··· 271 270 x(subvol_children_not_set, 256, 0) \ 272 271 x(subvol_children_bad, 257, 0) \ 273 272 x(subvol_loop, 258, 0) \ 274 - x(subvol_unreachable, 259, 0) \ 273 + x(subvol_unreachable, 259, FSCK_AUTOFIX) \ 275 274 x(btree_node_bkey_bad_u64s, 260, 0) \ 276 275 x(btree_node_topology_empty_interior_node, 261, 0) \ 277 276 x(btree_ptr_v2_min_key_bad, 262, 0) \ ··· 283 282 x(btree_ptr_v2_written_0, 268, 0) \ 284 283 x(subvol_snapshot_bad, 269, 0) \ 285 284 x(subvol_inode_bad, 270, 0) \ 286 - x(alloc_key_stripe_sectors_wrong, 271, 0) \ 287 - x(accounting_mismatch, 272, 0) \ 285 + x(alloc_key_stripe_sectors_wrong, 271, FSCK_AUTOFIX) \ 286 + x(accounting_mismatch, 272, FSCK_AUTOFIX) \ 288 287 x(accounting_replicas_not_marked, 273, 0) \ 289 288 x(invalid_btree_id, 274, 0) \ 290 289 x(alloc_key_io_time_bad, 275, 0) \ ··· 293 292 x(accounting_key_replicas_nr_devs_0, 278, FSCK_AUTOFIX) \ 294 293 x(accounting_key_replicas_nr_required_bad, 279, FSCK_AUTOFIX) \ 295 294 x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \ 295 + x(accounting_key_version_0, 282, FSCK_AUTOFIX) \ 296 + x(logged_op_but_clean, 283, FSCK_AUTOFIX) \ 297 + x(MAX, 284, 0) 296 298 297 299 enum bch_sb_error_id { 298 300 #define x(t, n, ...) BCH_FSCK_ERR_##t = n, 299 301 BCH_SB_ERRS() 300 302 #undef x 301 - BCH_SB_ERR_MAX 302 303 }; 303 304 304 305 struct bch_sb_field_errors {

+9 -3

fs/bcachefs/six.c

··· 169 169 ret = -1 - SIX_LOCK_write; 170 170 } 171 171 } else if (type == SIX_LOCK_write && lock->readers) { 172 - if (try) { 172 + if (try) 173 173 atomic_add(SIX_LOCK_HELD_write, &lock->state); 174 - smp_mb__after_atomic(); 175 - } 176 174 175 + /* 176 + * Make sure atomic_add happens before pcpu_read_count and 177 + * six_set_bitmask in slow path happens before pcpu_read_count. 178 + * 179 + * Paired with the smp_mb() in read lock fast path (per-cpu mode) 180 + * and the one before atomic_read in read unlock path. 181 + */ 182 + smp_mb(); 177 183 ret = !pcpu_read_count(lock); 178 184 179 185 if (try && !ret) {

fs/bcachefs/snapshot.c

··· 469 469 u32 id = snapshot_root; 470 470 u32 subvol = 0, s; 471 471 472 + rcu_read_lock(); 472 473 while (id) { 473 474 s = snapshot_t(c, id)->subvol; 474 475 ··· 478 477 479 478 id = bch2_snapshot_tree_next(c, id); 480 479 } 480 + rcu_read_unlock(); 481 481 482 482 return subvol; 483 483 } ··· 1784 1782 new->k.p.snapshot = leaf_id; 1785 1783 ret = bch2_trans_update(trans, &iter, new, 0); 1786 1784 out: 1785 + bch2_set_btree_iter_dontneed(&iter); 1787 1786 bch2_trans_iter_exit(trans, &iter); 1788 1787 return ret; 1789 1788 }

+26 -28

fs/bcachefs/subvolume.c

··· 92 92 } 93 93 94 94 struct bch_inode_unpacked inode; 95 - struct btree_iter inode_iter = {}; 96 - ret = bch2_inode_peek_nowarn(trans, &inode_iter, &inode, 95 + ret = bch2_inode_find_by_inum_nowarn_trans(trans, 97 96 (subvol_inum) { k.k->p.offset, le64_to_cpu(subvol.v->inode) }, 98 - 0); 99 - bch2_trans_iter_exit(trans, &inode_iter); 100 - 101 - if (ret && !bch2_err_matches(ret, ENOENT)) 102 - return ret; 103 - 104 - if (fsck_err_on(ret, 105 - trans, subvol_to_missing_root, 106 - "subvolume %llu points to missing subvolume root %llu:%u", 107 - k.k->p.offset, le64_to_cpu(subvol.v->inode), 108 - le32_to_cpu(subvol.v->snapshot))) { 109 - ret = bch2_subvolume_delete(trans, iter->pos.offset); 110 - bch_err_msg(c, ret, "deleting subvolume %llu", iter->pos.offset); 111 - return ret ?: -BCH_ERR_transaction_restart_nested; 112 - } 113 - 114 - if (fsck_err_on(inode.bi_subvol != subvol.k->p.offset, 115 - trans, subvol_root_wrong_bi_subvol, 116 - "subvol root %llu:%u has wrong bi_subvol field: got %u, should be %llu", 117 - inode.bi_inum, inode_iter.k.p.snapshot, 118 - inode.bi_subvol, subvol.k->p.offset)) { 119 - inode.bi_subvol = subvol.k->p.offset; 120 - ret = __bch2_fsck_write_inode(trans, &inode, le32_to_cpu(subvol.v->snapshot)); 121 - if (ret) 97 + &inode); 98 + if (!ret) { 99 + if (fsck_err_on(inode.bi_subvol != subvol.k->p.offset, 100 + trans, subvol_root_wrong_bi_subvol, 101 + "subvol root %llu:%u has wrong bi_subvol field: got %u, should be %llu", 102 + inode.bi_inum, inode.bi_snapshot, 103 + inode.bi_subvol, subvol.k->p.offset)) { 104 + inode.bi_subvol = subvol.k->p.offset; 105 + ret = __bch2_fsck_write_inode(trans, &inode, le32_to_cpu(subvol.v->snapshot)); 106 + if (ret) 107 + goto err; 108 + } 109 + } else if (bch2_err_matches(ret, ENOENT)) { 110 + if (fsck_err(trans, subvol_to_missing_root, 111 + "subvolume %llu points to missing subvolume root %llu:%u", 112 + k.k->p.offset, le64_to_cpu(subvol.v->inode), 113 + le32_to_cpu(subvol.v->snapshot))) { 114 + ret = bch2_subvolume_delete(trans, iter->pos.offset); 115 + bch_err_msg(c, ret, "deleting subvolume %llu", iter->pos.offset); 116 + ret = ret ?: -BCH_ERR_transaction_restart_nested; 122 117 goto err; 118 + } 119 + } else { 120 + goto err; 123 121 } 124 122 125 123 if (!BCH_SUBVOLUME_SNAP(subvol.v)) { ··· 135 137 "%s: snapshot tree %u not found", __func__, snapshot_tree); 136 138 137 139 if (ret) 138 - return ret; 140 + goto err; 139 141 140 142 if (fsck_err_on(le32_to_cpu(st.master_subvol) != subvol.k->p.offset, 141 143 trans, subvol_not_master_and_not_snapshot, ··· 145 147 bch2_bkey_make_mut_typed(trans, iter, &subvol.s_c, 0, subvolume); 146 148 ret = PTR_ERR_OR_ZERO(s); 147 149 if (ret) 148 - return ret; 150 + goto err; 149 151 150 152 SET_BCH_SUBVOLUME_SNAP(&s->v, true); 151 153 }

+5 -2

fs/bcachefs/super-io.c

··· 799 799 i < layout.sb_offset + layout.nr_superblocks; i++) { 800 800 offset = le64_to_cpu(*i); 801 801 802 - if (offset == opt_get(*opts, sb)) 802 + if (offset == opt_get(*opts, sb)) { 803 + ret = -BCH_ERR_invalid; 803 804 continue; 805 + } 804 806 805 807 ret = read_one_super(sb, offset, &err); 806 808 if (!ret) ··· 1190 1188 le_bitvector_to_cpu(errors_silent, (void *) e->errors_silent, sizeof(e->errors_silent) * 8); 1191 1189 1192 1190 prt_printf(out, "Errors to silently fix:\t"); 1193 - prt_bitflags_vector(out, bch2_sb_error_strs, errors_silent, sizeof(e->errors_silent) * 8); 1191 + prt_bitflags_vector(out, bch2_sb_error_strs, errors_silent, 1192 + min(BCH_FSCK_ERR_MAX, sizeof(e->errors_silent) * 8)); 1194 1193 prt_newline(out); 1195 1194 1196 1195 kfree(errors_silent);

+1 -1

fs/bcachefs/tests.c

··· 394 394 k.k_i.k.p.offset = end; 395 395 k.k_i.k.p.snapshot = U32_MAX; 396 396 k.k_i.k.size = end - start; 397 - k.k_i.k.version.lo = test_version++; 397 + k.k_i.k.bversion.lo = test_version++; 398 398 399 399 ret = bch2_btree_insert(c, BTREE_ID_extents, &k.k_i, NULL, 0, 0); 400 400 bch_err_fn(c, ret);

Configure Feed

Configure Feed