Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'bcachefs-2024-01-21' of https://evilpiepirate.org/git/bcachefs

Pull more bcachefs updates from Kent Overstreet:
"Some fixes, Some refactoring, some minor features:

- Assorted prep work for disk space accounting rewrite

- BTREE_TRIGGER_ATOMIC: after combining our trigger callbacks, this
makes our trigger context more explicit

- A few fixes to avoid excessive transaction restarts on
multithreaded workloads: fstests (in addition to ktest tests) are
now checking slowpath counters, and that's shaking out a few bugs

- Assorted tracepoint improvements

- Starting to break up bcachefs_format.h and move on disk types so
they're with the code they belong to; this will make room to start
documenting the on disk format better.

- A few minor fixes"

* tag 'bcachefs-2024-01-21' of https://evilpiepirate.org/git/bcachefs: (46 commits)
bcachefs: Improve inode_to_text()
bcachefs: logged_ops_format.h
bcachefs: reflink_format.h
bcachefs; extents_format.h
bcachefs: ec_format.h
bcachefs: subvolume_format.h
bcachefs: snapshot_format.h
bcachefs: alloc_background_format.h
bcachefs: xattr_format.h
bcachefs: dirent_format.h
bcachefs: inode_format.h
bcachefs; quota_format.h
bcachefs: sb-counters_format.h
bcachefs: counters.c -> sb-counters.c
bcachefs: comment bch_subvolume
bcachefs: bch_snapshot::btime
bcachefs: add missing __GFP_NOWARN
bcachefs: opts->compression can now also be applied in the background
bcachefs: Prep work for variable size btree node buffers
bcachefs: grab s_umount only if snapshotting
...

+1629 -1426
+1 -1
fs/bcachefs/Makefile
··· 27 27 checksum.o \ 28 28 clock.o \ 29 29 compress.o \ 30 - counters.o \ 31 30 darray.o \ 32 31 debug.o \ 33 32 dirent.o \ ··· 70 71 reflink.o \ 71 72 replicas.o \ 72 73 sb-clean.o \ 74 + sb-counters.o \ 73 75 sb-downgrade.o \ 74 76 sb-errors.o \ 75 77 sb-members.o \
+45 -44
fs/bcachefs/alloc_background.c
··· 273 273 bkey_fsck_err_on(!bch2_bucket_sectors_dirty(*a.v), 274 274 c, err, alloc_key_dirty_sectors_0, 275 275 "data_type %s but dirty_sectors==0", 276 - bch2_data_types[a.v->data_type]); 276 + bch2_data_type_str(a.v->data_type)); 277 277 break; 278 278 case BCH_DATA_cached: 279 279 bkey_fsck_err_on(!a.v->cached_sectors || ··· 321 321 { 322 322 struct bch_alloc_v4 _a; 323 323 const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &_a); 324 - unsigned i; 325 324 326 325 prt_newline(out); 327 326 printbuf_indent_add(out, 2); 328 327 329 - prt_printf(out, "gen %u oldest_gen %u data_type %s", 330 - a->gen, a->oldest_gen, 331 - a->data_type < BCH_DATA_NR 332 - ? bch2_data_types[a->data_type] 333 - : "(invalid data type)"); 328 + prt_printf(out, "gen %u oldest_gen %u data_type ", a->gen, a->oldest_gen); 329 + bch2_prt_data_type(out, a->data_type); 334 330 prt_newline(out); 335 331 prt_printf(out, "journal_seq %llu", a->journal_seq); 336 332 prt_newline(out); ··· 349 353 prt_printf(out, "fragmentation %llu", a->fragmentation_lru); 350 354 prt_newline(out); 351 355 prt_printf(out, "bp_start %llu", BCH_ALLOC_V4_BACKPOINTERS_START(a)); 352 - prt_newline(out); 353 - 354 - if (BCH_ALLOC_V4_NR_BACKPOINTERS(a)) { 355 - struct bkey_s_c_alloc_v4 a_raw = bkey_s_c_to_alloc_v4(k); 356 - const struct bch_backpointer *bps = alloc_v4_backpointers_c(a_raw.v); 357 - 358 - prt_printf(out, "backpointers: %llu", BCH_ALLOC_V4_NR_BACKPOINTERS(a_raw.v)); 359 - printbuf_indent_add(out, 2); 360 - 361 - for (i = 0; i < BCH_ALLOC_V4_NR_BACKPOINTERS(a_raw.v); i++) { 362 - prt_newline(out); 363 - bch2_backpointer_to_text(out, &bps[i]); 364 - } 365 - 366 - printbuf_indent_sub(out, 2); 367 - } 368 - 369 356 printbuf_indent_sub(out, 2); 370 357 } 371 358 ··· 818 839 } 819 840 } 820 841 821 - if (!(flags & BTREE_TRIGGER_TRANSACTIONAL) && (flags & BTREE_TRIGGER_INSERT)) { 842 + if ((flags & BTREE_TRIGGER_ATOMIC) && (flags & BTREE_TRIGGER_INSERT)) { 822 843 struct bch_alloc_v4 *new_a = bkey_s_to_alloc_v4(new).v; 823 844 u64 journal_seq = trans->journal_res.seq; 824 845 u64 bucket_journal_seq = new_a->journal_seq; ··· 1604 1625 return ret; 1605 1626 } 1606 1627 1628 + struct discard_buckets_state { 1629 + u64 seen; 1630 + u64 open; 1631 + u64 need_journal_commit; 1632 + u64 discarded; 1633 + struct bch_dev *ca; 1634 + u64 need_journal_commit_this_dev; 1635 + }; 1636 + 1637 + static void discard_buckets_next_dev(struct bch_fs *c, struct discard_buckets_state *s, struct bch_dev *ca) 1638 + { 1639 + if (s->ca == ca) 1640 + return; 1641 + 1642 + if (s->ca && s->need_journal_commit_this_dev > 1643 + bch2_dev_usage_read(s->ca).d[BCH_DATA_free].buckets) 1644 + bch2_journal_flush_async(&c->journal, NULL); 1645 + 1646 + if (s->ca) 1647 + percpu_ref_put(&s->ca->ref); 1648 + if (ca) 1649 + percpu_ref_get(&ca->ref); 1650 + s->ca = ca; 1651 + s->need_journal_commit_this_dev = 0; 1652 + } 1653 + 1607 1654 static int bch2_discard_one_bucket(struct btree_trans *trans, 1608 1655 struct btree_iter *need_discard_iter, 1609 1656 struct bpos *discard_pos_done, 1610 - u64 *seen, 1611 - u64 *open, 1612 - u64 *need_journal_commit, 1613 - u64 *discarded) 1657 + struct discard_buckets_state *s) 1614 1658 { 1615 1659 struct bch_fs *c = trans->c; 1616 1660 struct bpos pos = need_discard_iter->pos; ··· 1645 1643 int ret = 0; 1646 1644 1647 1645 ca = bch_dev_bkey_exists(c, pos.inode); 1646 + 1648 1647 if (!percpu_ref_tryget(&ca->io_ref)) { 1649 1648 bch2_btree_iter_set_pos(need_discard_iter, POS(pos.inode + 1, 0)); 1650 1649 return 0; 1651 1650 } 1652 1651 1652 + discard_buckets_next_dev(c, s, ca); 1653 + 1653 1654 if (bch2_bucket_is_open_safe(c, pos.inode, pos.offset)) { 1654 - (*open)++; 1655 + s->open++; 1655 1656 goto out; 1656 1657 } 1657 1658 1658 1659 if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, 1659 1660 c->journal.flushed_seq_ondisk, 1660 1661 pos.inode, pos.offset)) { 1661 - (*need_journal_commit)++; 1662 + s->need_journal_commit++; 1663 + s->need_journal_commit_this_dev++; 1662 1664 goto out; 1663 1665 } 1664 1666 ··· 1738 1732 goto out; 1739 1733 1740 1734 count_event(c, bucket_discard); 1741 - (*discarded)++; 1735 + s->discarded++; 1742 1736 out: 1743 - (*seen)++; 1737 + s->seen++; 1744 1738 bch2_trans_iter_exit(trans, &iter); 1745 1739 percpu_ref_put(&ca->io_ref); 1746 1740 printbuf_exit(&buf); ··· 1750 1744 static void bch2_do_discards_work(struct work_struct *work) 1751 1745 { 1752 1746 struct bch_fs *c = container_of(work, struct bch_fs, discard_work); 1753 - u64 seen = 0, open = 0, need_journal_commit = 0, discarded = 0; 1747 + struct discard_buckets_state s = {}; 1754 1748 struct bpos discard_pos_done = POS_MAX; 1755 1749 int ret; 1756 1750 ··· 1762 1756 ret = bch2_trans_run(c, 1763 1757 for_each_btree_key(trans, iter, 1764 1758 BTREE_ID_need_discard, POS_MIN, 0, k, 1765 - bch2_discard_one_bucket(trans, &iter, &discard_pos_done, 1766 - &seen, 1767 - &open, 1768 - &need_journal_commit, 1769 - &discarded))); 1759 + bch2_discard_one_bucket(trans, &iter, &discard_pos_done, &s))); 1770 1760 1771 - if (need_journal_commit * 2 > seen) 1772 - bch2_journal_flush_async(&c->journal, NULL); 1761 + discard_buckets_next_dev(c, &s, NULL); 1762 + 1763 + trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded, 1764 + bch2_err_str(ret)); 1773 1765 1774 1766 bch2_write_ref_put(c, BCH_WRITE_REF_discard); 1775 - 1776 - trace_discard_buckets(c, seen, open, need_journal_commit, discarded, 1777 - bch2_err_str(ret)); 1778 1767 } 1779 1768 1780 1769 void bch2_do_discards(struct bch_fs *c)
+92
fs/bcachefs/alloc_background_format.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _BCACHEFS_ALLOC_BACKGROUND_FORMAT_H 3 + #define _BCACHEFS_ALLOC_BACKGROUND_FORMAT_H 4 + 5 + struct bch_alloc { 6 + struct bch_val v; 7 + __u8 fields; 8 + __u8 gen; 9 + __u8 data[]; 10 + } __packed __aligned(8); 11 + 12 + #define BCH_ALLOC_FIELDS_V1() \ 13 + x(read_time, 16) \ 14 + x(write_time, 16) \ 15 + x(data_type, 8) \ 16 + x(dirty_sectors, 16) \ 17 + x(cached_sectors, 16) \ 18 + x(oldest_gen, 8) \ 19 + x(stripe, 32) \ 20 + x(stripe_redundancy, 8) 21 + 22 + enum { 23 + #define x(name, _bits) BCH_ALLOC_FIELD_V1_##name, 24 + BCH_ALLOC_FIELDS_V1() 25 + #undef x 26 + }; 27 + 28 + struct bch_alloc_v2 { 29 + struct bch_val v; 30 + __u8 nr_fields; 31 + __u8 gen; 32 + __u8 oldest_gen; 33 + __u8 data_type; 34 + __u8 data[]; 35 + } __packed __aligned(8); 36 + 37 + #define BCH_ALLOC_FIELDS_V2() \ 38 + x(read_time, 64) \ 39 + x(write_time, 64) \ 40 + x(dirty_sectors, 32) \ 41 + x(cached_sectors, 32) \ 42 + x(stripe, 32) \ 43 + x(stripe_redundancy, 8) 44 + 45 + struct bch_alloc_v3 { 46 + struct bch_val v; 47 + __le64 journal_seq; 48 + __le32 flags; 49 + __u8 nr_fields; 50 + __u8 gen; 51 + __u8 oldest_gen; 52 + __u8 data_type; 53 + __u8 data[]; 54 + } __packed __aligned(8); 55 + 56 + LE32_BITMASK(BCH_ALLOC_V3_NEED_DISCARD,struct bch_alloc_v3, flags, 0, 1) 57 + LE32_BITMASK(BCH_ALLOC_V3_NEED_INC_GEN,struct bch_alloc_v3, flags, 1, 2) 58 + 59 + struct bch_alloc_v4 { 60 + struct bch_val v; 61 + __u64 journal_seq; 62 + __u32 flags; 63 + __u8 gen; 64 + __u8 oldest_gen; 65 + __u8 data_type; 66 + __u8 stripe_redundancy; 67 + __u32 dirty_sectors; 68 + __u32 cached_sectors; 69 + __u64 io_time[2]; 70 + __u32 stripe; 71 + __u32 nr_external_backpointers; 72 + __u64 fragmentation_lru; 73 + } __packed __aligned(8); 74 + 75 + #define BCH_ALLOC_V4_U64s_V0 6 76 + #define BCH_ALLOC_V4_U64s (sizeof(struct bch_alloc_v4) / sizeof(__u64)) 77 + 78 + BITMASK(BCH_ALLOC_V4_NEED_DISCARD, struct bch_alloc_v4, flags, 0, 1) 79 + BITMASK(BCH_ALLOC_V4_NEED_INC_GEN, struct bch_alloc_v4, flags, 1, 2) 80 + BITMASK(BCH_ALLOC_V4_BACKPOINTERS_START,struct bch_alloc_v4, flags, 2, 8) 81 + BITMASK(BCH_ALLOC_V4_NR_BACKPOINTERS, struct bch_alloc_v4, flags, 8, 14) 82 + 83 + #define KEY_TYPE_BUCKET_GENS_BITS 8 84 + #define KEY_TYPE_BUCKET_GENS_NR (1U << KEY_TYPE_BUCKET_GENS_BITS) 85 + #define KEY_TYPE_BUCKET_GENS_MASK (KEY_TYPE_BUCKET_GENS_NR - 1) 86 + 87 + struct bch_bucket_gens { 88 + struct bch_val v; 89 + u8 gens[KEY_TYPE_BUCKET_GENS_NR]; 90 + } __packed __aligned(8); 91 + 92 + #endif /* _BCACHEFS_ALLOC_BACKGROUND_FORMAT_H */
+4 -3
fs/bcachefs/alloc_foreground.c
··· 1525 1525 unsigned data_type = ob->data_type; 1526 1526 barrier(); /* READ_ONCE() doesn't work on bitfields */ 1527 1527 1528 - prt_printf(out, "%zu ref %u %s %u:%llu gen %u allocated %u/%u", 1528 + prt_printf(out, "%zu ref %u ", 1529 1529 ob - c->open_buckets, 1530 - atomic_read(&ob->pin), 1531 - data_type < BCH_DATA_NR ? bch2_data_types[data_type] : "invalid data type", 1530 + atomic_read(&ob->pin)); 1531 + bch2_prt_data_type(out, data_type); 1532 + prt_printf(out, " %u:%llu gen %u allocated %u/%u", 1532 1533 ob->dev, ob->bucket, ob->gen, 1533 1534 ca->mi.bucket_size - ob->sectors_free, ca->mi.bucket_size); 1534 1535 if (ob->ec)
+49 -51
fs/bcachefs/backpointers.c
··· 400 400 return ret; 401 401 } 402 402 403 + static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r) 404 + { 405 + return bpos_eq(l.k->p, r.k->p) && 406 + bkey_bytes(l.k) == bkey_bytes(r.k) && 407 + !memcmp(l.v, r.v, bkey_val_bytes(l.k)); 408 + } 409 + 410 + struct extents_to_bp_state { 411 + struct bpos bucket_start; 412 + struct bpos bucket_end; 413 + struct bkey_buf last_flushed; 414 + }; 415 + 403 416 static int check_bp_exists(struct btree_trans *trans, 417 + struct extents_to_bp_state *s, 404 418 struct bpos bucket, 405 419 struct bch_backpointer bp, 406 - struct bkey_s_c orig_k, 407 - struct bpos bucket_start, 408 - struct bpos bucket_end, 409 - struct bkey_buf *last_flushed) 420 + struct bkey_s_c orig_k) 410 421 { 411 422 struct bch_fs *c = trans->c; 412 423 struct btree_iter bp_iter = { NULL }; ··· 428 417 429 418 bch2_bkey_buf_init(&tmp); 430 419 431 - if (bpos_lt(bucket, bucket_start) || 432 - bpos_gt(bucket, bucket_end)) 420 + if (bpos_lt(bucket, s->bucket_start) || 421 + bpos_gt(bucket, s->bucket_end)) 433 422 return 0; 434 423 435 424 if (!bch2_dev_bucket_exists(c, bucket)) ··· 444 433 445 434 if (bp_k.k->type != KEY_TYPE_backpointer || 446 435 memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) { 447 - if (!bpos_eq(orig_k.k->p, last_flushed->k->k.p) || 448 - bkey_bytes(orig_k.k) != bkey_bytes(&last_flushed->k->k) || 449 - memcmp(orig_k.v, &last_flushed->k->v, bkey_val_bytes(orig_k.k))) { 450 - bch2_bkey_buf_reassemble(&tmp, c, orig_k); 436 + bch2_bkey_buf_reassemble(&tmp, c, orig_k); 451 437 438 + if (!bkey_and_val_eq(orig_k, bkey_i_to_s_c(s->last_flushed.k))) { 452 439 if (bp.level) { 453 440 bch2_trans_unlock(trans); 454 441 bch2_btree_interior_updates_flush(c); ··· 456 447 if (ret) 457 448 goto err; 458 449 459 - bch2_bkey_buf_copy(last_flushed, c, tmp.k); 450 + bch2_bkey_buf_copy(&s->last_flushed, c, tmp.k); 460 451 ret = -BCH_ERR_transaction_restart_write_buffer_flush; 461 452 goto out; 462 453 } ··· 484 475 } 485 476 486 477 static int check_extent_to_backpointers(struct btree_trans *trans, 478 + struct extents_to_bp_state *s, 487 479 enum btree_id btree, unsigned level, 488 - struct bpos bucket_start, 489 - struct bpos bucket_end, 490 - struct bkey_buf *last_flushed, 491 480 struct bkey_s_c k) 492 481 { 493 482 struct bch_fs *c = trans->c; ··· 505 498 bch2_extent_ptr_to_bp(c, btree, level, 506 499 k, p, &bucket_pos, &bp); 507 500 508 - ret = check_bp_exists(trans, bucket_pos, bp, k, 509 - bucket_start, bucket_end, 510 - last_flushed); 501 + ret = check_bp_exists(trans, s, bucket_pos, bp, k); 511 502 if (ret) 512 503 return ret; 513 504 } ··· 514 509 } 515 510 516 511 static int check_btree_root_to_backpointers(struct btree_trans *trans, 512 + struct extents_to_bp_state *s, 517 513 enum btree_id btree_id, 518 - struct bpos bucket_start, 519 - struct bpos bucket_end, 520 - struct bkey_buf *last_flushed, 521 514 int *level) 522 515 { 523 516 struct bch_fs *c = trans->c; ··· 539 536 *level = b->c.level; 540 537 541 538 k = bkey_i_to_s_c(&b->key); 542 - ret = check_extent_to_backpointers(trans, btree_id, b->c.level + 1, 543 - bucket_start, bucket_end, 544 - last_flushed, k); 539 + ret = check_extent_to_backpointers(trans, s, btree_id, b->c.level + 1, k); 545 540 err: 546 541 bch2_trans_iter_exit(trans, &iter); 547 542 return ret; ··· 560 559 561 560 si_meminfo(&i); 562 561 mem_bytes = i.totalram * i.mem_unit; 563 - return div_u64(mem_bytes >> 1, btree_bytes(c)); 562 + return div_u64(mem_bytes >> 1, c->opts.btree_node_size); 564 563 } 565 564 566 565 static int bch2_get_btree_in_memory_pos(struct btree_trans *trans, ··· 611 610 } 612 611 613 612 static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, 614 - struct bpos bucket_start, 615 - struct bpos bucket_end) 613 + struct extents_to_bp_state *s) 616 614 { 617 615 struct bch_fs *c = trans->c; 618 - struct btree_iter iter; 619 - enum btree_id btree_id; 620 - struct bkey_s_c k; 621 - struct bkey_buf last_flushed; 622 616 int ret = 0; 623 617 624 - bch2_bkey_buf_init(&last_flushed); 625 - bkey_init(&last_flushed.k->k); 626 - 627 - for (btree_id = 0; btree_id < btree_id_nr_alive(c); btree_id++) { 618 + for (enum btree_id btree_id = 0; 619 + btree_id < btree_id_nr_alive(c); 620 + btree_id++) { 628 621 int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1; 629 622 630 623 ret = commit_do(trans, NULL, NULL, 631 624 BCH_TRANS_COMMIT_no_enospc, 632 - check_btree_root_to_backpointers(trans, btree_id, 633 - bucket_start, bucket_end, 634 - &last_flushed, &level)); 625 + check_btree_root_to_backpointers(trans, s, btree_id, &level)); 635 626 if (ret) 636 627 return ret; 637 628 638 629 while (level >= depth) { 630 + struct btree_iter iter; 639 631 bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0, 640 632 level, 641 633 BTREE_ITER_PREFETCH); 642 634 while (1) { 643 635 bch2_trans_begin(trans); 644 - k = bch2_btree_iter_peek(&iter); 636 + 637 + struct bkey_s_c k = bch2_btree_iter_peek(&iter); 645 638 if (!k.k) 646 639 break; 647 640 ret = bkey_err(k) ?: 648 - check_extent_to_backpointers(trans, btree_id, level, 649 - bucket_start, bucket_end, 650 - &last_flushed, k) ?: 641 + check_extent_to_backpointers(trans, s, btree_id, level, k) ?: 651 642 bch2_trans_commit(trans, NULL, NULL, 652 643 BCH_TRANS_COMMIT_no_enospc); 653 644 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) { ··· 661 668 } 662 669 } 663 670 664 - bch2_bkey_buf_exit(&last_flushed, c); 665 671 return 0; 666 672 } 667 673 ··· 723 731 int bch2_check_extents_to_backpointers(struct bch_fs *c) 724 732 { 725 733 struct btree_trans *trans = bch2_trans_get(c); 726 - struct bpos start = POS_MIN, end; 734 + struct extents_to_bp_state s = { .bucket_start = POS_MIN }; 727 735 int ret; 728 736 737 + bch2_bkey_buf_init(&s.last_flushed); 738 + bkey_init(&s.last_flushed.k->k); 739 + 729 740 while (1) { 730 - ret = bch2_get_alloc_in_memory_pos(trans, start, &end); 741 + ret = bch2_get_alloc_in_memory_pos(trans, s.bucket_start, &s.bucket_end); 731 742 if (ret) 732 743 break; 733 744 734 - if (bpos_eq(start, POS_MIN) && !bpos_eq(end, SPOS_MAX)) 745 + if ( bpos_eq(s.bucket_start, POS_MIN) && 746 + !bpos_eq(s.bucket_end, SPOS_MAX)) 735 747 bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass", 736 748 __func__, btree_nodes_fit_in_ram(c)); 737 749 738 - if (!bpos_eq(start, POS_MIN) || !bpos_eq(end, SPOS_MAX)) { 750 + if (!bpos_eq(s.bucket_start, POS_MIN) || 751 + !bpos_eq(s.bucket_end, SPOS_MAX)) { 739 752 struct printbuf buf = PRINTBUF; 740 753 741 754 prt_str(&buf, "check_extents_to_backpointers(): "); 742 - bch2_bpos_to_text(&buf, start); 755 + bch2_bpos_to_text(&buf, s.bucket_start); 743 756 prt_str(&buf, "-"); 744 - bch2_bpos_to_text(&buf, end); 757 + bch2_bpos_to_text(&buf, s.bucket_end); 745 758 746 759 bch_verbose(c, "%s", buf.buf); 747 760 printbuf_exit(&buf); 748 761 } 749 762 750 - ret = bch2_check_extents_to_backpointers_pass(trans, start, end); 751 - if (ret || bpos_eq(end, SPOS_MAX)) 763 + ret = bch2_check_extents_to_backpointers_pass(trans, &s); 764 + if (ret || bpos_eq(s.bucket_end, SPOS_MAX)) 752 765 break; 753 766 754 - start = bpos_successor(end); 767 + s.bucket_start = bpos_successor(s.bucket_end); 755 768 } 756 769 bch2_trans_put(trans); 770 + bch2_bkey_buf_exit(&s.last_flushed, c); 757 771 758 772 bch_err_fn(c, ret); 759 773 return ret;
+1
fs/bcachefs/backpointers.h
··· 2 2 #ifndef _BCACHEFS_BACKPOINTERS_BACKGROUND_H 3 3 #define _BCACHEFS_BACKPOINTERS_BACKGROUND_H 4 4 5 + #include "btree_cache.h" 5 6 #include "btree_iter.h" 6 7 #include "btree_update.h" 7 8 #include "buckets.h"
-5
fs/bcachefs/bcachefs.h
··· 1204 1204 return c->opts.block_size >> 9; 1205 1205 } 1206 1206 1207 - static inline size_t btree_sectors(const struct bch_fs *c) 1208 - { 1209 - return c->opts.btree_node_size >> 9; 1210 - } 1211 - 1212 1207 static inline bool btree_id_cached(const struct bch_fs *c, enum btree_id btree) 1213 1208 { 1214 1209 return c->btree_key_cache_btrees & (1U << btree);
+13 -875
fs/bcachefs/bcachefs_format.h
··· 417 417 struct bch_val v; 418 418 }; 419 419 420 - /* Extents */ 421 - 422 - /* 423 - * In extent bkeys, the value is a list of pointers (bch_extent_ptr), optionally 424 - * preceded by checksum/compression information (bch_extent_crc32 or 425 - * bch_extent_crc64). 426 - * 427 - * One major determining factor in the format of extents is how we handle and 428 - * represent extents that have been partially overwritten and thus trimmed: 429 - * 430 - * If an extent is not checksummed or compressed, when the extent is trimmed we 431 - * don't have to remember the extent we originally allocated and wrote: we can 432 - * merely adjust ptr->offset to point to the start of the data that is currently 433 - * live. The size field in struct bkey records the current (live) size of the 434 - * extent, and is also used to mean "size of region on disk that we point to" in 435 - * this case. 436 - * 437 - * Thus an extent that is not checksummed or compressed will consist only of a 438 - * list of bch_extent_ptrs, with none of the fields in 439 - * bch_extent_crc32/bch_extent_crc64. 440 - * 441 - * When an extent is checksummed or compressed, it's not possible to read only 442 - * the data that is currently live: we have to read the entire extent that was 443 - * originally written, and then return only the part of the extent that is 444 - * currently live. 445 - * 446 - * Thus, in addition to the current size of the extent in struct bkey, we need 447 - * to store the size of the originally allocated space - this is the 448 - * compressed_size and uncompressed_size fields in bch_extent_crc32/64. Also, 449 - * when the extent is trimmed, instead of modifying the offset field of the 450 - * pointer, we keep a second smaller offset field - "offset into the original 451 - * extent of the currently live region". 452 - * 453 - * The other major determining factor is replication and data migration: 454 - * 455 - * Each pointer may have its own bch_extent_crc32/64. When doing a replicated 456 - * write, we will initially write all the replicas in the same format, with the 457 - * same checksum type and compression format - however, when copygc runs later (or 458 - * tiering/cache promotion, anything that moves data), it is not in general 459 - * going to rewrite all the pointers at once - one of the replicas may be in a 460 - * bucket on one device that has very little fragmentation while another lives 461 - * in a bucket that has become heavily fragmented, and thus is being rewritten 462 - * sooner than the rest. 463 - * 464 - * Thus it will only move a subset of the pointers (or in the case of 465 - * tiering/cache promotion perhaps add a single pointer without dropping any 466 - * current pointers), and if the extent has been partially overwritten it must 467 - * write only the currently live portion (or copygc would not be able to reduce 468 - * fragmentation!) - which necessitates a different bch_extent_crc format for 469 - * the new pointer. 470 - * 471 - * But in the interests of space efficiency, we don't want to store one 472 - * bch_extent_crc for each pointer if we don't have to. 473 - * 474 - * Thus, a bch_extent consists of bch_extent_crc32s, bch_extent_crc64s, and 475 - * bch_extent_ptrs appended arbitrarily one after the other. We determine the 476 - * type of a given entry with a scheme similar to utf8 (except we're encoding a 477 - * type, not a size), encoding the type in the position of the first set bit: 478 - * 479 - * bch_extent_crc32 - 0b1 480 - * bch_extent_ptr - 0b10 481 - * bch_extent_crc64 - 0b100 482 - * 483 - * We do it this way because bch_extent_crc32 is _very_ constrained on bits (and 484 - * bch_extent_crc64 is the least constrained). 485 - * 486 - * Then, each bch_extent_crc32/64 applies to the pointers that follow after it, 487 - * until the next bch_extent_crc32/64. 488 - * 489 - * If there are no bch_extent_crcs preceding a bch_extent_ptr, then that pointer 490 - * is neither checksummed nor compressed. 491 - */ 492 - 493 420 /* 128 bits, sufficient for cryptographic MACs: */ 494 421 struct bch_csum { 495 422 __le64 lo; 496 423 __le64 hi; 497 424 } __packed __aligned(8); 498 - 499 - #define BCH_EXTENT_ENTRY_TYPES() \ 500 - x(ptr, 0) \ 501 - x(crc32, 1) \ 502 - x(crc64, 2) \ 503 - x(crc128, 3) \ 504 - x(stripe_ptr, 4) \ 505 - x(rebalance, 5) 506 - #define BCH_EXTENT_ENTRY_MAX 6 507 - 508 - enum bch_extent_entry_type { 509 - #define x(f, n) BCH_EXTENT_ENTRY_##f = n, 510 - BCH_EXTENT_ENTRY_TYPES() 511 - #undef x 512 - }; 513 - 514 - /* Compressed/uncompressed size are stored biased by 1: */ 515 - struct bch_extent_crc32 { 516 - #if defined(__LITTLE_ENDIAN_BITFIELD) 517 - __u32 type:2, 518 - _compressed_size:7, 519 - _uncompressed_size:7, 520 - offset:7, 521 - _unused:1, 522 - csum_type:4, 523 - compression_type:4; 524 - __u32 csum; 525 - #elif defined (__BIG_ENDIAN_BITFIELD) 526 - __u32 csum; 527 - __u32 compression_type:4, 528 - csum_type:4, 529 - _unused:1, 530 - offset:7, 531 - _uncompressed_size:7, 532 - _compressed_size:7, 533 - type:2; 534 - #endif 535 - } __packed __aligned(8); 536 - 537 - #define CRC32_SIZE_MAX (1U << 7) 538 - #define CRC32_NONCE_MAX 0 539 - 540 - struct bch_extent_crc64 { 541 - #if defined(__LITTLE_ENDIAN_BITFIELD) 542 - __u64 type:3, 543 - _compressed_size:9, 544 - _uncompressed_size:9, 545 - offset:9, 546 - nonce:10, 547 - csum_type:4, 548 - compression_type:4, 549 - csum_hi:16; 550 - #elif defined (__BIG_ENDIAN_BITFIELD) 551 - __u64 csum_hi:16, 552 - compression_type:4, 553 - csum_type:4, 554 - nonce:10, 555 - offset:9, 556 - _uncompressed_size:9, 557 - _compressed_size:9, 558 - type:3; 559 - #endif 560 - __u64 csum_lo; 561 - } __packed __aligned(8); 562 - 563 - #define CRC64_SIZE_MAX (1U << 9) 564 - #define CRC64_NONCE_MAX ((1U << 10) - 1) 565 - 566 - struct bch_extent_crc128 { 567 - #if defined(__LITTLE_ENDIAN_BITFIELD) 568 - __u64 type:4, 569 - _compressed_size:13, 570 - _uncompressed_size:13, 571 - offset:13, 572 - nonce:13, 573 - csum_type:4, 574 - compression_type:4; 575 - #elif defined (__BIG_ENDIAN_BITFIELD) 576 - __u64 compression_type:4, 577 - csum_type:4, 578 - nonce:13, 579 - offset:13, 580 - _uncompressed_size:13, 581 - _compressed_size:13, 582 - type:4; 583 - #endif 584 - struct bch_csum csum; 585 - } __packed __aligned(8); 586 - 587 - #define CRC128_SIZE_MAX (1U << 13) 588 - #define CRC128_NONCE_MAX ((1U << 13) - 1) 589 - 590 - /* 591 - * @reservation - pointer hasn't been written to, just reserved 592 - */ 593 - struct bch_extent_ptr { 594 - #if defined(__LITTLE_ENDIAN_BITFIELD) 595 - __u64 type:1, 596 - cached:1, 597 - unused:1, 598 - unwritten:1, 599 - offset:44, /* 8 petabytes */ 600 - dev:8, 601 - gen:8; 602 - #elif defined (__BIG_ENDIAN_BITFIELD) 603 - __u64 gen:8, 604 - dev:8, 605 - offset:44, 606 - unwritten:1, 607 - unused:1, 608 - cached:1, 609 - type:1; 610 - #endif 611 - } __packed __aligned(8); 612 - 613 - struct bch_extent_stripe_ptr { 614 - #if defined(__LITTLE_ENDIAN_BITFIELD) 615 - __u64 type:5, 616 - block:8, 617 - redundancy:4, 618 - idx:47; 619 - #elif defined (__BIG_ENDIAN_BITFIELD) 620 - __u64 idx:47, 621 - redundancy:4, 622 - block:8, 623 - type:5; 624 - #endif 625 - }; 626 - 627 - struct bch_extent_rebalance { 628 - #if defined(__LITTLE_ENDIAN_BITFIELD) 629 - __u64 type:6, 630 - unused:34, 631 - compression:8, /* enum bch_compression_opt */ 632 - target:16; 633 - #elif defined (__BIG_ENDIAN_BITFIELD) 634 - __u64 target:16, 635 - compression:8, 636 - unused:34, 637 - type:6; 638 - #endif 639 - }; 640 - 641 - union bch_extent_entry { 642 - #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || __BITS_PER_LONG == 64 643 - unsigned long type; 644 - #elif __BITS_PER_LONG == 32 645 - struct { 646 - unsigned long pad; 647 - unsigned long type; 648 - }; 649 - #else 650 - #error edit for your odd byteorder. 651 - #endif 652 - 653 - #define x(f, n) struct bch_extent_##f f; 654 - BCH_EXTENT_ENTRY_TYPES() 655 - #undef x 656 - }; 657 - 658 - struct bch_btree_ptr { 659 - struct bch_val v; 660 - 661 - __u64 _data[0]; 662 - struct bch_extent_ptr start[]; 663 - } __packed __aligned(8); 664 - 665 - struct bch_btree_ptr_v2 { 666 - struct bch_val v; 667 - 668 - __u64 mem_ptr; 669 - __le64 seq; 670 - __le16 sectors_written; 671 - __le16 flags; 672 - struct bpos min_key; 673 - __u64 _data[0]; 674 - struct bch_extent_ptr start[]; 675 - } __packed __aligned(8); 676 - 677 - LE16_BITMASK(BTREE_PTR_RANGE_UPDATED, struct bch_btree_ptr_v2, flags, 0, 1); 678 - 679 - struct bch_extent { 680 - struct bch_val v; 681 - 682 - __u64 _data[0]; 683 - union bch_extent_entry start[]; 684 - } __packed __aligned(8); 685 - 686 - struct bch_reservation { 687 - struct bch_val v; 688 - 689 - __le32 generation; 690 - __u8 nr_replicas; 691 - __u8 pad[3]; 692 - } __packed __aligned(8); 693 - 694 - /* Maximum size (in u64s) a single pointer could be: */ 695 - #define BKEY_EXTENT_PTR_U64s_MAX\ 696 - ((sizeof(struct bch_extent_crc128) + \ 697 - sizeof(struct bch_extent_ptr)) / sizeof(__u64)) 698 - 699 - /* Maximum possible size of an entire extent value: */ 700 - #define BKEY_EXTENT_VAL_U64s_MAX \ 701 - (1 + BKEY_EXTENT_PTR_U64s_MAX * (BCH_REPLICAS_MAX + 1)) 702 - 703 - /* * Maximum possible size of an entire extent, key + value: */ 704 - #define BKEY_EXTENT_U64s_MAX (BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX) 705 - 706 - /* Btree pointers don't carry around checksums: */ 707 - #define BKEY_BTREE_PTR_VAL_U64s_MAX \ 708 - ((sizeof(struct bch_btree_ptr_v2) + \ 709 - sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX) / sizeof(__u64)) 710 - #define BKEY_BTREE_PTR_U64s_MAX \ 711 - (BKEY_U64s + BKEY_BTREE_PTR_VAL_U64s_MAX) 712 - 713 - /* Inodes */ 714 - 715 - #define BLOCKDEV_INODE_MAX 4096 716 - 717 - #define BCACHEFS_ROOT_INO 4096 718 - 719 - struct bch_inode { 720 - struct bch_val v; 721 - 722 - __le64 bi_hash_seed; 723 - __le32 bi_flags; 724 - __le16 bi_mode; 725 - __u8 fields[]; 726 - } __packed __aligned(8); 727 - 728 - struct bch_inode_v2 { 729 - struct bch_val v; 730 - 731 - __le64 bi_journal_seq; 732 - __le64 bi_hash_seed; 733 - __le64 bi_flags; 734 - __le16 bi_mode; 735 - __u8 fields[]; 736 - } __packed __aligned(8); 737 - 738 - struct bch_inode_v3 { 739 - struct bch_val v; 740 - 741 - __le64 bi_journal_seq; 742 - __le64 bi_hash_seed; 743 - __le64 bi_flags; 744 - __le64 bi_sectors; 745 - __le64 bi_size; 746 - __le64 bi_version; 747 - __u8 fields[]; 748 - } __packed __aligned(8); 749 - 750 - #define INODEv3_FIELDS_START_INITIAL 6 751 - #define INODEv3_FIELDS_START_CUR (offsetof(struct bch_inode_v3, fields) / sizeof(__u64)) 752 - 753 - struct bch_inode_generation { 754 - struct bch_val v; 755 - 756 - __le32 bi_generation; 757 - __le32 pad; 758 - } __packed __aligned(8); 759 - 760 - /* 761 - * bi_subvol and bi_parent_subvol are only set for subvolume roots: 762 - */ 763 - 764 - #define BCH_INODE_FIELDS_v2() \ 765 - x(bi_atime, 96) \ 766 - x(bi_ctime, 96) \ 767 - x(bi_mtime, 96) \ 768 - x(bi_otime, 96) \ 769 - x(bi_size, 64) \ 770 - x(bi_sectors, 64) \ 771 - x(bi_uid, 32) \ 772 - x(bi_gid, 32) \ 773 - x(bi_nlink, 32) \ 774 - x(bi_generation, 32) \ 775 - x(bi_dev, 32) \ 776 - x(bi_data_checksum, 8) \ 777 - x(bi_compression, 8) \ 778 - x(bi_project, 32) \ 779 - x(bi_background_compression, 8) \ 780 - x(bi_data_replicas, 8) \ 781 - x(bi_promote_target, 16) \ 782 - x(bi_foreground_target, 16) \ 783 - x(bi_background_target, 16) \ 784 - x(bi_erasure_code, 16) \ 785 - x(bi_fields_set, 16) \ 786 - x(bi_dir, 64) \ 787 - x(bi_dir_offset, 64) \ 788 - x(bi_subvol, 32) \ 789 - x(bi_parent_subvol, 32) 790 - 791 - #define BCH_INODE_FIELDS_v3() \ 792 - x(bi_atime, 96) \ 793 - x(bi_ctime, 96) \ 794 - x(bi_mtime, 96) \ 795 - x(bi_otime, 96) \ 796 - x(bi_uid, 32) \ 797 - x(bi_gid, 32) \ 798 - x(bi_nlink, 32) \ 799 - x(bi_generation, 32) \ 800 - x(bi_dev, 32) \ 801 - x(bi_data_checksum, 8) \ 802 - x(bi_compression, 8) \ 803 - x(bi_project, 32) \ 804 - x(bi_background_compression, 8) \ 805 - x(bi_data_replicas, 8) \ 806 - x(bi_promote_target, 16) \ 807 - x(bi_foreground_target, 16) \ 808 - x(bi_background_target, 16) \ 809 - x(bi_erasure_code, 16) \ 810 - x(bi_fields_set, 16) \ 811 - x(bi_dir, 64) \ 812 - x(bi_dir_offset, 64) \ 813 - x(bi_subvol, 32) \ 814 - x(bi_parent_subvol, 32) \ 815 - x(bi_nocow, 8) 816 - 817 - /* subset of BCH_INODE_FIELDS */ 818 - #define BCH_INODE_OPTS() \ 819 - x(data_checksum, 8) \ 820 - x(compression, 8) \ 821 - x(project, 32) \ 822 - x(background_compression, 8) \ 823 - x(data_replicas, 8) \ 824 - x(promote_target, 16) \ 825 - x(foreground_target, 16) \ 826 - x(background_target, 16) \ 827 - x(erasure_code, 16) \ 828 - x(nocow, 8) 829 - 830 - enum inode_opt_id { 831 - #define x(name, ...) \ 832 - Inode_opt_##name, 833 - BCH_INODE_OPTS() 834 - #undef x 835 - Inode_opt_nr, 836 - }; 837 - 838 - #define BCH_INODE_FLAGS() \ 839 - x(sync, 0) \ 840 - x(immutable, 1) \ 841 - x(append, 2) \ 842 - x(nodump, 3) \ 843 - x(noatime, 4) \ 844 - x(i_size_dirty, 5) \ 845 - x(i_sectors_dirty, 6) \ 846 - x(unlinked, 7) \ 847 - x(backptr_untrusted, 8) 848 - 849 - /* bits 20+ reserved for packed fields below: */ 850 - 851 - enum bch_inode_flags { 852 - #define x(t, n) BCH_INODE_##t = 1U << n, 853 - BCH_INODE_FLAGS() 854 - #undef x 855 - }; 856 - 857 - enum __bch_inode_flags { 858 - #define x(t, n) __BCH_INODE_##t = n, 859 - BCH_INODE_FLAGS() 860 - #undef x 861 - }; 862 - 863 - LE32_BITMASK(INODE_STR_HASH, struct bch_inode, bi_flags, 20, 24); 864 - LE32_BITMASK(INODE_NR_FIELDS, struct bch_inode, bi_flags, 24, 31); 865 - LE32_BITMASK(INODE_NEW_VARINT, struct bch_inode, bi_flags, 31, 32); 866 - 867 - LE64_BITMASK(INODEv2_STR_HASH, struct bch_inode_v2, bi_flags, 20, 24); 868 - LE64_BITMASK(INODEv2_NR_FIELDS, struct bch_inode_v2, bi_flags, 24, 31); 869 - 870 - LE64_BITMASK(INODEv3_STR_HASH, struct bch_inode_v3, bi_flags, 20, 24); 871 - LE64_BITMASK(INODEv3_NR_FIELDS, struct bch_inode_v3, bi_flags, 24, 31); 872 - 873 - LE64_BITMASK(INODEv3_FIELDS_START, 874 - struct bch_inode_v3, bi_flags, 31, 36); 875 - LE64_BITMASK(INODEv3_MODE, struct bch_inode_v3, bi_flags, 36, 52); 876 - 877 - /* Dirents */ 878 - 879 - /* 880 - * Dirents (and xattrs) have to implement string lookups; since our b-tree 881 - * doesn't support arbitrary length strings for the key, we instead index by a 882 - * 64 bit hash (currently truncated sha1) of the string, stored in the offset 883 - * field of the key - using linear probing to resolve hash collisions. This also 884 - * provides us with the readdir cookie posix requires. 885 - * 886 - * Linear probing requires us to use whiteouts for deletions, in the event of a 887 - * collision: 888 - */ 889 - 890 - struct bch_dirent { 891 - struct bch_val v; 892 - 893 - /* Target inode number: */ 894 - union { 895 - __le64 d_inum; 896 - struct { /* DT_SUBVOL */ 897 - __le32 d_child_subvol; 898 - __le32 d_parent_subvol; 899 - }; 900 - }; 901 - 902 - /* 903 - * Copy of mode bits 12-15 from the target inode - so userspace can get 904 - * the filetype without having to do a stat() 905 - */ 906 - __u8 d_type; 907 - 908 - __u8 d_name[]; 909 - } __packed __aligned(8); 910 - 911 - #define DT_SUBVOL 16 912 - #define BCH_DT_MAX 17 913 - 914 - #define BCH_NAME_MAX 512 915 - 916 - /* Xattrs */ 917 - 918 - #define KEY_TYPE_XATTR_INDEX_USER 0 919 - #define KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS 1 920 - #define KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT 2 921 - #define KEY_TYPE_XATTR_INDEX_TRUSTED 3 922 - #define KEY_TYPE_XATTR_INDEX_SECURITY 4 923 - 924 - struct bch_xattr { 925 - struct bch_val v; 926 - __u8 x_type; 927 - __u8 x_name_len; 928 - __le16 x_val_len; 929 - __u8 x_name[]; 930 - } __packed __aligned(8); 931 - 932 - /* Bucket/allocation information: */ 933 - 934 - struct bch_alloc { 935 - struct bch_val v; 936 - __u8 fields; 937 - __u8 gen; 938 - __u8 data[]; 939 - } __packed __aligned(8); 940 - 941 - #define BCH_ALLOC_FIELDS_V1() \ 942 - x(read_time, 16) \ 943 - x(write_time, 16) \ 944 - x(data_type, 8) \ 945 - x(dirty_sectors, 16) \ 946 - x(cached_sectors, 16) \ 947 - x(oldest_gen, 8) \ 948 - x(stripe, 32) \ 949 - x(stripe_redundancy, 8) 950 - 951 - enum { 952 - #define x(name, _bits) BCH_ALLOC_FIELD_V1_##name, 953 - BCH_ALLOC_FIELDS_V1() 954 - #undef x 955 - }; 956 - 957 - struct bch_alloc_v2 { 958 - struct bch_val v; 959 - __u8 nr_fields; 960 - __u8 gen; 961 - __u8 oldest_gen; 962 - __u8 data_type; 963 - __u8 data[]; 964 - } __packed __aligned(8); 965 - 966 - #define BCH_ALLOC_FIELDS_V2() \ 967 - x(read_time, 64) \ 968 - x(write_time, 64) \ 969 - x(dirty_sectors, 32) \ 970 - x(cached_sectors, 32) \ 971 - x(stripe, 32) \ 972 - x(stripe_redundancy, 8) 973 - 974 - struct bch_alloc_v3 { 975 - struct bch_val v; 976 - __le64 journal_seq; 977 - __le32 flags; 978 - __u8 nr_fields; 979 - __u8 gen; 980 - __u8 oldest_gen; 981 - __u8 data_type; 982 - __u8 data[]; 983 - } __packed __aligned(8); 984 - 985 - LE32_BITMASK(BCH_ALLOC_V3_NEED_DISCARD,struct bch_alloc_v3, flags, 0, 1) 986 - LE32_BITMASK(BCH_ALLOC_V3_NEED_INC_GEN,struct bch_alloc_v3, flags, 1, 2) 987 - 988 - struct bch_alloc_v4 { 989 - struct bch_val v; 990 - __u64 journal_seq; 991 - __u32 flags; 992 - __u8 gen; 993 - __u8 oldest_gen; 994 - __u8 data_type; 995 - __u8 stripe_redundancy; 996 - __u32 dirty_sectors; 997 - __u32 cached_sectors; 998 - __u64 io_time[2]; 999 - __u32 stripe; 1000 - __u32 nr_external_backpointers; 1001 - __u64 fragmentation_lru; 1002 - } __packed __aligned(8); 1003 - 1004 - #define BCH_ALLOC_V4_U64s_V0 6 1005 - #define BCH_ALLOC_V4_U64s (sizeof(struct bch_alloc_v4) / sizeof(__u64)) 1006 - 1007 - BITMASK(BCH_ALLOC_V4_NEED_DISCARD, struct bch_alloc_v4, flags, 0, 1) 1008 - BITMASK(BCH_ALLOC_V4_NEED_INC_GEN, struct bch_alloc_v4, flags, 1, 2) 1009 - BITMASK(BCH_ALLOC_V4_BACKPOINTERS_START,struct bch_alloc_v4, flags, 2, 8) 1010 - BITMASK(BCH_ALLOC_V4_NR_BACKPOINTERS, struct bch_alloc_v4, flags, 8, 14) 1011 - 1012 - #define BCH_ALLOC_V4_NR_BACKPOINTERS_MAX 40 1013 425 1014 426 struct bch_backpointer { 1015 427 struct bch_val v; ··· 433 1021 struct bpos pos; 434 1022 } __packed __aligned(8); 435 1023 436 - #define KEY_TYPE_BUCKET_GENS_BITS 8 437 - #define KEY_TYPE_BUCKET_GENS_NR (1U << KEY_TYPE_BUCKET_GENS_BITS) 438 - #define KEY_TYPE_BUCKET_GENS_MASK (KEY_TYPE_BUCKET_GENS_NR - 1) 439 - 440 - struct bch_bucket_gens { 441 - struct bch_val v; 442 - u8 gens[KEY_TYPE_BUCKET_GENS_NR]; 443 - } __packed __aligned(8); 444 - 445 - /* Quotas: */ 446 - 447 - enum quota_types { 448 - QTYP_USR = 0, 449 - QTYP_GRP = 1, 450 - QTYP_PRJ = 2, 451 - QTYP_NR = 3, 452 - }; 453 - 454 - enum quota_counters { 455 - Q_SPC = 0, 456 - Q_INO = 1, 457 - Q_COUNTERS = 2, 458 - }; 459 - 460 - struct bch_quota_counter { 461 - __le64 hardlimit; 462 - __le64 softlimit; 463 - }; 464 - 465 - struct bch_quota { 466 - struct bch_val v; 467 - struct bch_quota_counter c[Q_COUNTERS]; 468 - } __packed __aligned(8); 469 - 470 - /* Erasure coding */ 471 - 472 - struct bch_stripe { 473 - struct bch_val v; 474 - __le16 sectors; 475 - __u8 algorithm; 476 - __u8 nr_blocks; 477 - __u8 nr_redundant; 478 - 479 - __u8 csum_granularity_bits; 480 - __u8 csum_type; 481 - __u8 pad; 482 - 483 - struct bch_extent_ptr ptrs[]; 484 - } __packed __aligned(8); 485 - 486 - /* Reflink: */ 487 - 488 - struct bch_reflink_p { 489 - struct bch_val v; 490 - __le64 idx; 491 - /* 492 - * A reflink pointer might point to an indirect extent which is then 493 - * later split (by copygc or rebalance). If we only pointed to part of 494 - * the original indirect extent, and then one of the fragments is 495 - * outside the range we point to, we'd leak a refcount: so when creating 496 - * reflink pointers, we need to store pad values to remember the full 497 - * range we were taking a reference on. 498 - */ 499 - __le32 front_pad; 500 - __le32 back_pad; 501 - } __packed __aligned(8); 502 - 503 - struct bch_reflink_v { 504 - struct bch_val v; 505 - __le64 refcount; 506 - union bch_extent_entry start[0]; 507 - __u64 _data[]; 508 - } __packed __aligned(8); 509 - 510 - struct bch_indirect_inline_data { 511 - struct bch_val v; 512 - __le64 refcount; 513 - u8 data[]; 514 - }; 515 - 516 - /* Inline data */ 517 - 518 - struct bch_inline_data { 519 - struct bch_val v; 520 - u8 data[]; 521 - }; 522 - 523 - /* Subvolumes: */ 524 - 525 - #define SUBVOL_POS_MIN POS(0, 1) 526 - #define SUBVOL_POS_MAX POS(0, S32_MAX) 527 - #define BCACHEFS_ROOT_SUBVOL 1 528 - 529 - struct bch_subvolume { 530 - struct bch_val v; 531 - __le32 flags; 532 - __le32 snapshot; 533 - __le64 inode; 534 - /* 535 - * Snapshot subvolumes form a tree, separate from the snapshot nodes 536 - * tree - if this subvolume is a snapshot, this is the ID of the 537 - * subvolume it was created from: 538 - */ 539 - __le32 parent; 540 - __le32 pad; 541 - bch_le128 otime; 542 - }; 543 - 544 - LE32_BITMASK(BCH_SUBVOLUME_RO, struct bch_subvolume, flags, 0, 1) 545 - /* 546 - * We need to know whether a subvolume is a snapshot so we can know whether we 547 - * can delete it (or whether it should just be rm -rf'd) 548 - */ 549 - LE32_BITMASK(BCH_SUBVOLUME_SNAP, struct bch_subvolume, flags, 1, 2) 550 - LE32_BITMASK(BCH_SUBVOLUME_UNLINKED, struct bch_subvolume, flags, 2, 3) 551 - 552 - /* Snapshots */ 553 - 554 - struct bch_snapshot { 555 - struct bch_val v; 556 - __le32 flags; 557 - __le32 parent; 558 - __le32 children[2]; 559 - __le32 subvol; 560 - /* corresponds to a bch_snapshot_tree in BTREE_ID_snapshot_trees */ 561 - __le32 tree; 562 - __le32 depth; 563 - __le32 skip[3]; 564 - }; 565 - 566 - LE32_BITMASK(BCH_SNAPSHOT_DELETED, struct bch_snapshot, flags, 0, 1) 567 - 568 - /* True if a subvolume points to this snapshot node: */ 569 - LE32_BITMASK(BCH_SNAPSHOT_SUBVOL, struct bch_snapshot, flags, 1, 2) 570 - 571 - /* 572 - * Snapshot trees: 573 - * 574 - * The snapshot_trees btree gives us persistent indentifier for each tree of 575 - * bch_snapshot nodes, and allow us to record and easily find the root/master 576 - * subvolume that other snapshots were created from: 577 - */ 578 - struct bch_snapshot_tree { 579 - struct bch_val v; 580 - __le32 master_subvol; 581 - __le32 root_snapshot; 582 - }; 583 - 584 1024 /* LRU btree: */ 585 1025 586 1026 struct bch_lru { ··· 441 1177 } __packed __aligned(8); 442 1178 443 1179 #define LRU_ID_STRIPES (1U << 16) 444 - 445 - /* Logged operations btree: */ 446 - 447 - struct bch_logged_op_truncate { 448 - struct bch_val v; 449 - __le32 subvol; 450 - __le32 pad; 451 - __le64 inum; 452 - __le64 new_i_size; 453 - }; 454 - 455 - enum logged_op_finsert_state { 456 - LOGGED_OP_FINSERT_start, 457 - LOGGED_OP_FINSERT_shift_extents, 458 - LOGGED_OP_FINSERT_finish, 459 - }; 460 - 461 - struct bch_logged_op_finsert { 462 - struct bch_val v; 463 - __u8 state; 464 - __u8 pad[3]; 465 - __le32 subvol; 466 - __le64 inum; 467 - __le64 dst_offset; 468 - __le64 src_offset; 469 - __le64 pos; 470 - }; 471 1180 472 1181 /* Optional/variable size superblock sections: */ 473 1182 ··· 466 1229 x(errors, 12) \ 467 1230 x(ext, 13) \ 468 1231 x(downgrade, 14) 1232 + 1233 + #include "alloc_background_format.h" 1234 + #include "extents_format.h" 1235 + #include "reflink_format.h" 1236 + #include "ec_format.h" 1237 + #include "inode_format.h" 1238 + #include "dirent_format.h" 1239 + #include "xattr_format.h" 1240 + #include "quota_format.h" 1241 + #include "logged_ops_format.h" 1242 + #include "snapshot_format.h" 1243 + #include "subvolume_format.h" 1244 + #include "sb-counters_format.h" 469 1245 470 1246 enum bch_sb_field_type { 471 1247 #define x(f, nr) BCH_SB_FIELD_##f = nr, ··· 715 1465 struct bch_replicas_entry_v1 entries[]; 716 1466 } __packed __aligned(8); 717 1467 718 - /* BCH_SB_FIELD_quota: */ 719 - 720 - struct bch_sb_quota_counter { 721 - __le32 timelimit; 722 - __le32 warnlimit; 723 - }; 724 - 725 - struct bch_sb_quota_type { 726 - __le64 flags; 727 - struct bch_sb_quota_counter c[Q_COUNTERS]; 728 - }; 729 - 730 - struct bch_sb_field_quota { 731 - struct bch_sb_field field; 732 - struct bch_sb_quota_type q[QTYP_NR]; 733 - } __packed __aligned(8); 734 - 735 1468 /* BCH_SB_FIELD_disk_groups: */ 736 1469 737 1470 #define BCH_SB_LABEL_SIZE 32 ··· 732 1499 struct bch_sb_field field; 733 1500 struct bch_disk_group entries[]; 734 1501 } __packed __aligned(8); 735 - 736 - /* BCH_SB_FIELD_counters */ 737 - 738 - #define BCH_PERSISTENT_COUNTERS() \ 739 - x(io_read, 0) \ 740 - x(io_write, 1) \ 741 - x(io_move, 2) \ 742 - x(bucket_invalidate, 3) \ 743 - x(bucket_discard, 4) \ 744 - x(bucket_alloc, 5) \ 745 - x(bucket_alloc_fail, 6) \ 746 - x(btree_cache_scan, 7) \ 747 - x(btree_cache_reap, 8) \ 748 - x(btree_cache_cannibalize, 9) \ 749 - x(btree_cache_cannibalize_lock, 10) \ 750 - x(btree_cache_cannibalize_lock_fail, 11) \ 751 - x(btree_cache_cannibalize_unlock, 12) \ 752 - x(btree_node_write, 13) \ 753 - x(btree_node_read, 14) \ 754 - x(btree_node_compact, 15) \ 755 - x(btree_node_merge, 16) \ 756 - x(btree_node_split, 17) \ 757 - x(btree_node_rewrite, 18) \ 758 - x(btree_node_alloc, 19) \ 759 - x(btree_node_free, 20) \ 760 - x(btree_node_set_root, 21) \ 761 - x(btree_path_relock_fail, 22) \ 762 - x(btree_path_upgrade_fail, 23) \ 763 - x(btree_reserve_get_fail, 24) \ 764 - x(journal_entry_full, 25) \ 765 - x(journal_full, 26) \ 766 - x(journal_reclaim_finish, 27) \ 767 - x(journal_reclaim_start, 28) \ 768 - x(journal_write, 29) \ 769 - x(read_promote, 30) \ 770 - x(read_bounce, 31) \ 771 - x(read_split, 33) \ 772 - x(read_retry, 32) \ 773 - x(read_reuse_race, 34) \ 774 - x(move_extent_read, 35) \ 775 - x(move_extent_write, 36) \ 776 - x(move_extent_finish, 37) \ 777 - x(move_extent_fail, 38) \ 778 - x(move_extent_start_fail, 39) \ 779 - x(copygc, 40) \ 780 - x(copygc_wait, 41) \ 781 - x(gc_gens_end, 42) \ 782 - x(gc_gens_start, 43) \ 783 - x(trans_blocked_journal_reclaim, 44) \ 784 - x(trans_restart_btree_node_reused, 45) \ 785 - x(trans_restart_btree_node_split, 46) \ 786 - x(trans_restart_fault_inject, 47) \ 787 - x(trans_restart_iter_upgrade, 48) \ 788 - x(trans_restart_journal_preres_get, 49) \ 789 - x(trans_restart_journal_reclaim, 50) \ 790 - x(trans_restart_journal_res_get, 51) \ 791 - x(trans_restart_key_cache_key_realloced, 52) \ 792 - x(trans_restart_key_cache_raced, 53) \ 793 - x(trans_restart_mark_replicas, 54) \ 794 - x(trans_restart_mem_realloced, 55) \ 795 - x(trans_restart_memory_allocation_failure, 56) \ 796 - x(trans_restart_relock, 57) \ 797 - x(trans_restart_relock_after_fill, 58) \ 798 - x(trans_restart_relock_key_cache_fill, 59) \ 799 - x(trans_restart_relock_next_node, 60) \ 800 - x(trans_restart_relock_parent_for_fill, 61) \ 801 - x(trans_restart_relock_path, 62) \ 802 - x(trans_restart_relock_path_intent, 63) \ 803 - x(trans_restart_too_many_iters, 64) \ 804 - x(trans_restart_traverse, 65) \ 805 - x(trans_restart_upgrade, 66) \ 806 - x(trans_restart_would_deadlock, 67) \ 807 - x(trans_restart_would_deadlock_write, 68) \ 808 - x(trans_restart_injected, 69) \ 809 - x(trans_restart_key_cache_upgrade, 70) \ 810 - x(trans_traverse_all, 71) \ 811 - x(transaction_commit, 72) \ 812 - x(write_super, 73) \ 813 - x(trans_restart_would_deadlock_recursion_limit, 74) \ 814 - x(trans_restart_write_buffer_flush, 75) \ 815 - x(trans_restart_split_race, 76) \ 816 - x(write_buffer_flush_slowpath, 77) \ 817 - x(write_buffer_flush_sync, 78) 818 - 819 - enum bch_persistent_counters { 820 - #define x(t, n, ...) BCH_COUNTER_##t, 821 - BCH_PERSISTENT_COUNTERS() 822 - #undef x 823 - BCH_COUNTER_NR 824 - }; 825 - 826 - struct bch_sb_field_counters { 827 - struct bch_sb_field field; 828 - __le64 d[]; 829 - }; 830 1502 831 1503 /* 832 1504 * On clean shutdown, store btree roots and current journal sequence number in
+1 -1
fs/bcachefs/bkey.c
··· 33 33 next_key_bits -= 64; 34 34 } 35 35 36 - bch2_prt_u64_binary(out, v, min(word_bits, nr_key_bits)); 36 + bch2_prt_u64_base2_nbits(out, v, min(word_bits, nr_key_bits)); 37 37 38 38 if (!next_key_bits) 39 39 break;
+9
fs/bcachefs/bkey_methods.c
··· 63 63 return 0; 64 64 } 65 65 66 + static void key_type_cookie_to_text(struct printbuf *out, struct bch_fs *c, 67 + struct bkey_s_c k) 68 + { 69 + struct bkey_s_c_cookie ck = bkey_s_c_to_cookie(k); 70 + 71 + prt_printf(out, "%llu", le64_to_cpu(ck.v->cookie)); 72 + } 73 + 66 74 #define bch2_bkey_ops_cookie ((struct bkey_ops) { \ 67 75 .key_invalid = key_type_cookie_invalid, \ 76 + .val_to_text = key_type_cookie_to_text, \ 68 77 .min_val_size = 8, \ 69 78 }) 70 79
+6 -4
fs/bcachefs/bkey_methods.h
··· 83 83 84 84 __BTREE_TRIGGER_NORUN, 85 85 __BTREE_TRIGGER_TRANSACTIONAL, 86 + __BTREE_TRIGGER_ATOMIC, 87 + __BTREE_TRIGGER_GC, 86 88 __BTREE_TRIGGER_INSERT, 87 89 __BTREE_TRIGGER_OVERWRITE, 88 - __BTREE_TRIGGER_GC, 89 90 __BTREE_TRIGGER_BUCKET_INVALIDATE, 90 91 }; 91 92 ··· 108 107 * causing us to go emergency read-only) 109 108 */ 110 109 #define BTREE_TRIGGER_TRANSACTIONAL (1U << __BTREE_TRIGGER_TRANSACTIONAL) 110 + #define BTREE_TRIGGER_ATOMIC (1U << __BTREE_TRIGGER_ATOMIC) 111 + 112 + /* We're in gc/fsck: running triggers to recalculate e.g. disk usage */ 113 + #define BTREE_TRIGGER_GC (1U << __BTREE_TRIGGER_GC) 111 114 112 115 /* @new is entering the btree */ 113 116 #define BTREE_TRIGGER_INSERT (1U << __BTREE_TRIGGER_INSERT) 114 117 115 118 /* @old is leaving the btree */ 116 119 #define BTREE_TRIGGER_OVERWRITE (1U << __BTREE_TRIGGER_OVERWRITE) 117 - 118 - /* We're in gc/fsck: running triggers to recalculate e.g. disk usage */ 119 - #define BTREE_TRIGGER_GC (1U << __BTREE_TRIGGER_GC) 120 120 121 121 /* signal from bucket invalidate path to alloc trigger */ 122 122 #define BTREE_TRIGGER_BUCKET_INVALIDATE (1U << __BTREE_TRIGGER_BUCKET_INVALIDATE)
+3 -4
fs/bcachefs/bset.c
··· 720 720 { 721 721 struct bkey_packed *prev = NULL, *k = btree_bkey_first(b, t); 722 722 struct bkey_i min_key, max_key; 723 - unsigned j, cacheline = 1; 723 + unsigned cacheline = 1; 724 724 725 725 t->size = min(bkey_to_cacheline(b, t, btree_bkey_last(b, t)), 726 726 bset_ro_tree_capacity(b, t)); ··· 823 823 set_btree_bset(b, t, i); 824 824 } 825 825 826 - void bch2_bset_init_next(struct bch_fs *c, struct btree *b, 827 - struct btree_node_entry *bne) 826 + void bch2_bset_init_next(struct btree *b, struct btree_node_entry *bne) 828 827 { 829 828 struct bset *i = &bne->keys; 830 829 struct bset_tree *t; 831 830 832 - BUG_ON(bset_byte_offset(b, bne) >= btree_bytes(c)); 831 + BUG_ON(bset_byte_offset(b, bne) >= btree_buf_bytes(b)); 833 832 BUG_ON((void *) bne < (void *) btree_bkey_last(b, bset_tree_last(b))); 834 833 BUG_ON(b->nsets >= MAX_BSETS); 835 834
+1 -2
fs/bcachefs/bset.h
··· 264 264 void bch2_btree_keys_init(struct btree *); 265 265 266 266 void bch2_bset_init_first(struct btree *, struct bset *); 267 - void bch2_bset_init_next(struct bch_fs *, struct btree *, 268 - struct btree_node_entry *); 267 + void bch2_bset_init_next(struct btree *, struct btree_node_entry *); 269 268 void bch2_bset_build_aux_tree(struct btree *, struct bset_tree *, bool); 270 269 271 270 void bch2_bset_insert(struct btree *, struct btree_node_iter *,
+6 -6
fs/bcachefs/btree_cache.c
··· 60 60 61 61 clear_btree_node_just_written(b); 62 62 63 - kvpfree(b->data, btree_bytes(c)); 63 + kvpfree(b->data, btree_buf_bytes(b)); 64 64 b->data = NULL; 65 65 #ifdef __KERNEL__ 66 66 kvfree(b->aux_data); ··· 94 94 { 95 95 BUG_ON(b->data || b->aux_data); 96 96 97 - b->data = kvpmalloc(btree_bytes(c), gfp); 97 + b->data = kvpmalloc(btree_buf_bytes(b), gfp); 98 98 if (!b->data) 99 99 return -BCH_ERR_ENOMEM_btree_node_mem_alloc; 100 100 #ifdef __KERNEL__ ··· 107 107 b->aux_data = NULL; 108 108 #endif 109 109 if (!b->aux_data) { 110 - kvpfree(b->data, btree_bytes(c)); 110 + kvpfree(b->data, btree_buf_bytes(b)); 111 111 b->data = NULL; 112 112 return -BCH_ERR_ENOMEM_btree_node_mem_alloc; 113 113 } ··· 126 126 bkey_btree_ptr_init(&b->key); 127 127 INIT_LIST_HEAD(&b->list); 128 128 INIT_LIST_HEAD(&b->write_blocked); 129 - b->byte_order = ilog2(btree_bytes(c)); 129 + b->byte_order = ilog2(c->opts.btree_node_size); 130 130 return b; 131 131 } 132 132 ··· 408 408 if (c->verify_data) 409 409 list_move(&c->verify_data->list, &bc->live); 410 410 411 - kvpfree(c->verify_ondisk, btree_bytes(c)); 411 + kvpfree(c->verify_ondisk, c->opts.btree_node_size); 412 412 413 413 for (i = 0; i < btree_id_nr_alive(c); i++) { 414 414 struct btree_root *r = bch2_btree_id_root(c, i); ··· 1192 1192 " failed unpacked %zu\n", 1193 1193 b->unpack_fn_len, 1194 1194 b->nr.live_u64s * sizeof(u64), 1195 - btree_bytes(c) - sizeof(struct btree_node), 1195 + btree_buf_bytes(b) - sizeof(struct btree_node), 1196 1196 b->nr.live_u64s * 100 / btree_max_u64s(c), 1197 1197 b->sib_u64s[0], 1198 1198 b->sib_u64s[1],
+12 -7
fs/bcachefs/btree_cache.h
··· 74 74 _iter = 0; _iter < (_tbl)->size; _iter++) \ 75 75 rht_for_each_entry_rcu((_b), (_pos), _tbl, _iter, hash) 76 76 77 - static inline size_t btree_bytes(struct bch_fs *c) 77 + static inline size_t btree_buf_bytes(const struct btree *b) 78 78 { 79 - return c->opts.btree_node_size; 79 + return 1UL << b->byte_order; 80 80 } 81 81 82 - static inline size_t btree_max_u64s(struct bch_fs *c) 82 + static inline size_t btree_buf_max_u64s(const struct btree *b) 83 83 { 84 - return (btree_bytes(c) - sizeof(struct btree_node)) / sizeof(u64); 84 + return (btree_buf_bytes(b) - sizeof(struct btree_node)) / sizeof(u64); 85 85 } 86 86 87 - static inline size_t btree_pages(struct bch_fs *c) 87 + static inline size_t btree_max_u64s(const struct bch_fs *c) 88 88 { 89 - return btree_bytes(c) / PAGE_SIZE; 89 + return (c->opts.btree_node_size - sizeof(struct btree_node)) / sizeof(u64); 90 90 } 91 91 92 - static inline unsigned btree_blocks(struct bch_fs *c) 92 + static inline size_t btree_sectors(const struct bch_fs *c) 93 + { 94 + return c->opts.btree_node_size >> SECTOR_SHIFT; 95 + } 96 + 97 + static inline unsigned btree_blocks(const struct bch_fs *c) 93 98 { 94 99 return btree_sectors(c) >> c->block_bits; 95 100 }
+18 -18
fs/bcachefs/btree_gc.c
··· 597 597 "bucket %u:%zu data type %s ptr gen %u missing in alloc btree\n" 598 598 "while marking %s", 599 599 p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), 600 - bch2_data_types[ptr_data_type(k->k, &p.ptr)], 600 + bch2_data_type_str(ptr_data_type(k->k, &p.ptr)), 601 601 p.ptr.gen, 602 602 (printbuf_reset(&buf), 603 603 bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))) { ··· 615 615 "bucket %u:%zu data type %s ptr gen in the future: %u > %u\n" 616 616 "while marking %s", 617 617 p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), 618 - bch2_data_types[ptr_data_type(k->k, &p.ptr)], 618 + bch2_data_type_str(ptr_data_type(k->k, &p.ptr)), 619 619 p.ptr.gen, g->gen, 620 620 (printbuf_reset(&buf), 621 621 bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))) { ··· 637 637 "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n" 638 638 "while marking %s", 639 639 p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), g->gen, 640 - bch2_data_types[ptr_data_type(k->k, &p.ptr)], 640 + bch2_data_type_str(ptr_data_type(k->k, &p.ptr)), 641 641 p.ptr.gen, 642 642 (printbuf_reset(&buf), 643 643 bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))) ··· 649 649 "bucket %u:%zu data type %s stale dirty ptr: %u < %u\n" 650 650 "while marking %s", 651 651 p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), 652 - bch2_data_types[ptr_data_type(k->k, &p.ptr)], 652 + bch2_data_type_str(ptr_data_type(k->k, &p.ptr)), 653 653 p.ptr.gen, g->gen, 654 654 (printbuf_reset(&buf), 655 655 bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))) ··· 664 664 "bucket %u:%zu different types of data in same bucket: %s, %s\n" 665 665 "while marking %s", 666 666 p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), 667 - bch2_data_types[g->data_type], 668 - bch2_data_types[data_type], 667 + bch2_data_type_str(g->data_type), 668 + bch2_data_type_str(data_type), 669 669 (printbuf_reset(&buf), 670 670 bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) { 671 671 if (data_type == BCH_DATA_btree) { ··· 1238 1238 1239 1239 for (i = 0; i < BCH_DATA_NR; i++) { 1240 1240 copy_dev_field(dev_usage_buckets_wrong, 1241 - d[i].buckets, "%s buckets", bch2_data_types[i]); 1241 + d[i].buckets, "%s buckets", bch2_data_type_str(i)); 1242 1242 copy_dev_field(dev_usage_sectors_wrong, 1243 - d[i].sectors, "%s sectors", bch2_data_types[i]); 1243 + d[i].sectors, "%s sectors", bch2_data_type_str(i)); 1244 1244 copy_dev_field(dev_usage_fragmented_wrong, 1245 - d[i].fragmented, "%s fragmented", bch2_data_types[i]); 1245 + d[i].fragmented, "%s fragmented", bch2_data_type_str(i)); 1246 1246 } 1247 1247 } 1248 1248 ··· 1253 1253 bch2_acc_percpu_u64s((u64 __percpu *) c->usage_gc, nr); 1254 1254 1255 1255 copy_fs_field(fs_usage_hidden_wrong, 1256 - hidden, "hidden"); 1256 + b.hidden, "hidden"); 1257 1257 copy_fs_field(fs_usage_btree_wrong, 1258 - btree, "btree"); 1258 + b.btree, "btree"); 1259 1259 1260 1260 if (!metadata_only) { 1261 1261 copy_fs_field(fs_usage_data_wrong, 1262 - data, "data"); 1262 + b.data, "data"); 1263 1263 copy_fs_field(fs_usage_cached_wrong, 1264 - cached, "cached"); 1264 + b.cached, "cached"); 1265 1265 copy_fs_field(fs_usage_reserved_wrong, 1266 - reserved, "reserved"); 1266 + b.reserved, "reserved"); 1267 1267 copy_fs_field(fs_usage_nr_inodes_wrong, 1268 - nr_inodes,"nr_inodes"); 1268 + b.nr_inodes,"nr_inodes"); 1269 1269 1270 1270 for (i = 0; i < BCH_REPLICAS_MAX; i++) 1271 1271 copy_fs_field(fs_usage_persistent_reserved_wrong, ··· 1417 1417 ": got %s, should be %s", 1418 1418 iter->pos.inode, iter->pos.offset, 1419 1419 gc.gen, 1420 - bch2_data_types[new.data_type], 1421 - bch2_data_types[gc.data_type])) 1420 + bch2_data_type_str(new.data_type), 1421 + bch2_data_type_str(gc.data_type))) 1422 1422 new.data_type = gc.data_type; 1423 1423 1424 1424 #define copy_bucket_field(_errtype, _f) \ ··· 1428 1428 ": got %u, should be %u", \ 1429 1429 iter->pos.inode, iter->pos.offset, \ 1430 1430 gc.gen, \ 1431 - bch2_data_types[gc.data_type], \ 1431 + bch2_data_type_str(gc.data_type), \ 1432 1432 new._f, gc._f)) \ 1433 1433 new._f = gc._f; \ 1434 1434
+19 -19
fs/bcachefs/btree_io.c
··· 112 112 unsigned flags = memalloc_nofs_save(); 113 113 void *p; 114 114 115 - BUG_ON(size > btree_bytes(c)); 115 + BUG_ON(size > c->opts.btree_node_size); 116 116 117 117 *used_mempool = false; 118 118 p = vpmalloc(size, __GFP_NOWARN|GFP_NOWAIT); ··· 174 174 175 175 ptrs = ptrs_end = ((void *) new_whiteouts + bytes); 176 176 177 - for (k = unwritten_whiteouts_start(c, b); 178 - k != unwritten_whiteouts_end(c, b); 177 + for (k = unwritten_whiteouts_start(b); 178 + k != unwritten_whiteouts_end(b); 179 179 k = bkey_p_next(k)) 180 180 *--ptrs = k; 181 181 ··· 192 192 verify_no_dups(b, new_whiteouts, 193 193 (void *) ((u64 *) new_whiteouts + b->whiteout_u64s)); 194 194 195 - memcpy_u64s(unwritten_whiteouts_start(c, b), 195 + memcpy_u64s(unwritten_whiteouts_start(b), 196 196 new_whiteouts, b->whiteout_u64s); 197 197 198 198 btree_bounce_free(c, bytes, used_mempool, new_whiteouts); ··· 313 313 } 314 314 315 315 bytes = sorting_entire_node 316 - ? btree_bytes(c) 316 + ? btree_buf_bytes(b) 317 317 : __vstruct_bytes(struct btree_node, u64s); 318 318 319 319 out = btree_bounce_alloc(c, bytes, &used_mempool); ··· 338 338 if (sorting_entire_node) { 339 339 u64s = le16_to_cpu(out->keys.u64s); 340 340 341 - BUG_ON(bytes != btree_bytes(c)); 341 + BUG_ON(bytes != btree_buf_bytes(b)); 342 342 343 343 /* 344 344 * Our temporary buffer is the same size as the btree node's ··· 502 502 503 503 bne = want_new_bset(c, b); 504 504 if (bne) 505 - bch2_bset_init_next(c, b, bne); 505 + bch2_bset_init_next(b, bne); 506 506 507 507 bch2_btree_build_aux_trees(b); 508 508 ··· 1160 1160 ptr_written, b->written); 1161 1161 } else { 1162 1162 for (bne = write_block(b); 1163 - bset_byte_offset(b, bne) < btree_bytes(c); 1163 + bset_byte_offset(b, bne) < btree_buf_bytes(b); 1164 1164 bne = (void *) bne + block_bytes(c)) 1165 1165 btree_err_on(bne->keys.seq == b->data->keys.seq && 1166 1166 !bch2_journal_seq_is_blacklisted(c, ··· 1172 1172 "found bset signature after last bset"); 1173 1173 } 1174 1174 1175 - sorted = btree_bounce_alloc(c, btree_bytes(c), &used_mempool); 1175 + sorted = btree_bounce_alloc(c, btree_buf_bytes(b), &used_mempool); 1176 1176 sorted->keys.u64s = 0; 1177 1177 1178 1178 set_btree_bset(b, b->set, &b->data->keys); ··· 1188 1188 1189 1189 BUG_ON(b->nr.live_u64s != u64s); 1190 1190 1191 - btree_bounce_free(c, btree_bytes(c), used_mempool, sorted); 1191 + btree_bounce_free(c, btree_buf_bytes(b), used_mempool, sorted); 1192 1192 1193 1193 if (updated_range) 1194 1194 bch2_btree_node_drop_keys_outside_node(b); ··· 1284 1284 rb->have_ioref = bch2_dev_get_ioref(ca, READ); 1285 1285 bio_reset(bio, NULL, REQ_OP_READ|REQ_SYNC|REQ_META); 1286 1286 bio->bi_iter.bi_sector = rb->pick.ptr.offset; 1287 - bio->bi_iter.bi_size = btree_bytes(c); 1287 + bio->bi_iter.bi_size = btree_buf_bytes(b); 1288 1288 1289 1289 if (rb->have_ioref) { 1290 1290 bio_set_dev(bio, ca->disk_sb.bdev); ··· 1512 1512 } 1513 1513 1514 1514 if (best >= 0) { 1515 - memcpy(b->data, ra->buf[best], btree_bytes(c)); 1515 + memcpy(b->data, ra->buf[best], btree_buf_bytes(b)); 1516 1516 ret = bch2_btree_node_read_done(c, NULL, b, false, saw_error); 1517 1517 } else { 1518 1518 ret = -1; ··· 1578 1578 for (i = 0; i < ra->nr; i++) { 1579 1579 ra->buf[i] = mempool_alloc(&c->btree_bounce_pool, GFP_NOFS); 1580 1580 ra->bio[i] = bio_alloc_bioset(NULL, 1581 - buf_pages(ra->buf[i], btree_bytes(c)), 1581 + buf_pages(ra->buf[i], btree_buf_bytes(b)), 1582 1582 REQ_OP_READ|REQ_SYNC|REQ_META, 1583 1583 GFP_NOFS, 1584 1584 &c->btree_bio); ··· 1598 1598 rb->pick = pick; 1599 1599 rb->bio.bi_iter.bi_sector = pick.ptr.offset; 1600 1600 rb->bio.bi_end_io = btree_node_read_all_replicas_endio; 1601 - bch2_bio_map(&rb->bio, ra->buf[i], btree_bytes(c)); 1601 + bch2_bio_map(&rb->bio, ra->buf[i], btree_buf_bytes(b)); 1602 1602 1603 1603 if (rb->have_ioref) { 1604 1604 this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree], ··· 1665 1665 ca = bch_dev_bkey_exists(c, pick.ptr.dev); 1666 1666 1667 1667 bio = bio_alloc_bioset(NULL, 1668 - buf_pages(b->data, btree_bytes(c)), 1668 + buf_pages(b->data, btree_buf_bytes(b)), 1669 1669 REQ_OP_READ|REQ_SYNC|REQ_META, 1670 1670 GFP_NOFS, 1671 1671 &c->btree_bio); ··· 1679 1679 INIT_WORK(&rb->work, btree_node_read_work); 1680 1680 bio->bi_iter.bi_sector = pick.ptr.offset; 1681 1681 bio->bi_end_io = btree_node_read_endio; 1682 - bch2_bio_map(bio, b->data, btree_bytes(c)); 1682 + bch2_bio_map(bio, b->data, btree_buf_bytes(b)); 1683 1683 1684 1684 if (rb->have_ioref) { 1685 1685 this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree], ··· 2074 2074 i->u64s = 0; 2075 2075 2076 2076 sort_iter_add(&sort_iter.iter, 2077 - unwritten_whiteouts_start(c, b), 2078 - unwritten_whiteouts_end(c, b)); 2077 + unwritten_whiteouts_start(b), 2078 + unwritten_whiteouts_end(b)); 2079 2079 SET_BSET_SEPARATE_WHITEOUTS(i, false); 2080 2080 2081 2081 b->whiteout_u64s = 0; ··· 2251 2251 2252 2252 bne = want_new_bset(c, b); 2253 2253 if (bne) 2254 - bch2_bset_init_next(c, b, bne); 2254 + bch2_bset_init_next(b, bne); 2255 2255 2256 2256 bch2_btree_build_aux_trees(b); 2257 2257
+1 -1
fs/bcachefs/btree_iter.c
··· 1337 1337 1338 1338 if (path->should_be_locked && 1339 1339 !trans->restarted && 1340 - (!dup || !bch2_btree_path_relock_norestart(trans, dup, _THIS_IP_))) 1340 + (!dup || !bch2_btree_path_relock_norestart(trans, dup))) 1341 1341 return; 1342 1342 1343 1343 if (dup) {
+5
fs/bcachefs/btree_iter.h
··· 819 819 #define for_each_btree_key_continue_norestart(_iter, _flags, _k, _ret) \ 820 820 for_each_btree_key_upto_continue_norestart(_iter, SPOS_MAX, _flags, _k, _ret) 821 821 822 + /* 823 + * This should not be used in a fastpath, without first trying _do in 824 + * nonblocking mode - it will cause excessive transaction restarts and 825 + * potentially livelocking: 826 + */ 822 827 #define drop_locks_do(_trans, _do) \ 823 828 ({ \ 824 829 bch2_trans_unlock(_trans); \
+33 -7
fs/bcachefs/btree_locking.c
··· 631 631 } 632 632 633 633 __flatten 634 - bool bch2_btree_path_relock_norestart(struct btree_trans *trans, 635 - struct btree_path *path, unsigned long trace_ip) 634 + bool bch2_btree_path_relock_norestart(struct btree_trans *trans, struct btree_path *path) 636 635 { 637 636 struct get_locks_fail f; 638 637 ··· 641 642 int __bch2_btree_path_relock(struct btree_trans *trans, 642 643 struct btree_path *path, unsigned long trace_ip) 643 644 { 644 - if (!bch2_btree_path_relock_norestart(trans, path, trace_ip)) { 645 + if (!bch2_btree_path_relock_norestart(trans, path)) { 645 646 trace_and_count(trans->c, trans_restart_relock_path, trans, trace_ip, path); 646 647 return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_path); 647 648 } ··· 758 759 if (unlikely(trans->restarted)) 759 760 return -((int) trans->restarted); 760 761 761 - trans_for_each_path(trans, path, i) 762 + trans_for_each_path(trans, path, i) { 763 + struct get_locks_fail f; 764 + 762 765 if (path->should_be_locked && 763 - !bch2_btree_path_relock_norestart(trans, path, _RET_IP_)) { 764 - trace_and_count(trans->c, trans_restart_relock, trans, _RET_IP_, path); 766 + !btree_path_get_locks(trans, path, false, &f)) { 767 + if (trace_trans_restart_relock_enabled()) { 768 + struct printbuf buf = PRINTBUF; 769 + 770 + bch2_bpos_to_text(&buf, path->pos); 771 + prt_printf(&buf, " l=%u seq=%u node seq=", 772 + f.l, path->l[f.l].lock_seq); 773 + if (IS_ERR_OR_NULL(f.b)) { 774 + prt_str(&buf, bch2_err_str(PTR_ERR(f.b))); 775 + } else { 776 + prt_printf(&buf, "%u", f.b->c.lock.seq); 777 + 778 + struct six_lock_count c = 779 + bch2_btree_node_lock_counts(trans, NULL, &f.b->c, f.l); 780 + prt_printf(&buf, " self locked %u.%u.%u", c.n[0], c.n[1], c.n[2]); 781 + 782 + c = six_lock_counts(&f.b->c.lock); 783 + prt_printf(&buf, " total locked %u.%u.%u", c.n[0], c.n[1], c.n[2]); 784 + } 785 + 786 + trace_trans_restart_relock(trans, _RET_IP_, buf.buf); 787 + printbuf_exit(&buf); 788 + } 789 + 790 + count_event(trans->c, trans_restart_relock); 765 791 return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock); 766 792 } 793 + } 794 + 767 795 return 0; 768 796 } 769 797 ··· 804 778 805 779 trans_for_each_path(trans, path, i) 806 780 if (path->should_be_locked && 807 - !bch2_btree_path_relock_norestart(trans, path, _RET_IP_)) { 781 + !bch2_btree_path_relock_norestart(trans, path)) { 808 782 return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock); 809 783 } 810 784 return 0;
+1 -8
fs/bcachefs/btree_locking.h
··· 312 312 313 313 /* relock: */ 314 314 315 - bool bch2_btree_path_relock_norestart(struct btree_trans *, 316 - struct btree_path *, unsigned long); 315 + bool bch2_btree_path_relock_norestart(struct btree_trans *, struct btree_path *); 317 316 int __bch2_btree_path_relock(struct btree_trans *, 318 317 struct btree_path *, unsigned long); 319 318 ··· 351 352 } 352 353 353 354 /* upgrade */ 354 - 355 - 356 - struct get_locks_fail { 357 - unsigned l; 358 - struct btree *b; 359 - }; 360 355 361 356 bool bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans *, 362 357 struct btree_path *, unsigned,
+16 -19
fs/bcachefs/btree_trans_commit.c
··· 139 139 EBUG_ON(bkey_deleted(&insert->k) && bkey_val_u64s(&insert->k)); 140 140 EBUG_ON(bpos_lt(insert->k.p, b->data->min_key)); 141 141 EBUG_ON(bpos_gt(insert->k.p, b->data->max_key)); 142 - EBUG_ON(insert->k.u64s > 143 - bch_btree_keys_u64s_remaining(trans->c, b)); 142 + EBUG_ON(insert->k.u64s > bch2_btree_keys_u64s_remaining(b)); 144 143 EBUG_ON(!b->c.level && !bpos_eq(insert->k.p, path->pos)); 145 144 146 145 k = bch2_btree_node_iter_peek_all(node_iter, b); ··· 159 160 k->type = KEY_TYPE_deleted; 160 161 161 162 if (k->needs_whiteout) 162 - push_whiteout(trans->c, b, insert->k.p); 163 + push_whiteout(b, insert->k.p); 163 164 k->needs_whiteout = false; 164 165 165 166 if (k >= btree_bset_last(b)->start) { ··· 347 348 static inline int btree_key_can_insert(struct btree_trans *trans, 348 349 struct btree *b, unsigned u64s) 349 350 { 350 - struct bch_fs *c = trans->c; 351 - 352 - if (!bch2_btree_node_insert_fits(c, b, u64s)) 351 + if (!bch2_btree_node_insert_fits(b, u64s)) 353 352 return -BCH_ERR_btree_insert_btree_node_full; 354 353 355 354 return 0; ··· 415 418 return 0; 416 419 417 420 new_u64s = roundup_pow_of_two(u64s); 418 - new_k = krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOWAIT); 421 + new_k = krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOWAIT|__GFP_NOWARN); 419 422 if (unlikely(!new_k)) 420 423 return btree_key_can_insert_cached_slowpath(trans, flags, path, new_u64s); 421 424 ··· 443 446 verify_update_old_key(trans, i); 444 447 445 448 if (unlikely(flags & BTREE_TRIGGER_NORUN)) 446 - return 0; 447 - 448 - if (!btree_node_type_needs_gc(__btree_node_type(i->level, i->btree_id))) 449 449 return 0; 450 450 451 451 if (old_ops->trigger == new_ops->trigger) { ··· 580 586 581 587 static noinline int bch2_trans_commit_run_gc_triggers(struct btree_trans *trans) 582 588 { 583 - struct bch_fs *c = trans->c; 584 - int ret = 0; 585 - 586 589 trans_for_each_update(trans, i) { 587 590 /* 588 591 * XXX: synchronization of cached update triggers with gc ··· 587 596 */ 588 597 BUG_ON(i->cached || i->level); 589 598 590 - if (gc_visited(c, gc_pos_btree_node(insert_l(trans, i)->b))) { 591 - ret = run_one_mem_trigger(trans, i, i->flags|BTREE_TRIGGER_GC); 599 + if (btree_node_type_needs_gc(__btree_node_type(i->level, i->btree_id)) && 600 + gc_visited(trans->c, gc_pos_btree_node(insert_l(trans, i)->b))) { 601 + int ret = run_one_mem_trigger(trans, i, i->flags|BTREE_TRIGGER_GC); 592 602 if (ret) 593 - break; 603 + return ret; 594 604 } 595 605 } 596 606 597 - return ret; 607 + return 0; 598 608 } 599 609 600 610 static inline int ··· 672 680 bch2_trans_fs_usage_apply(trans, trans->fs_usage_deltas)) 673 681 return -BCH_ERR_btree_insert_need_mark_replicas; 674 682 683 + /* XXX: we only want to run this if deltas are nonzero */ 684 + bch2_trans_account_disk_usage_change(trans); 685 + 675 686 h = trans->hooks; 676 687 while (h) { 677 688 ret = h->fn(trans, h); ··· 684 689 } 685 690 686 691 trans_for_each_update(trans, i) 687 - if (BTREE_NODE_TYPE_HAS_MEM_TRIGGERS & (1U << i->bkey_type)) { 688 - ret = run_one_mem_trigger(trans, i, i->flags); 692 + if (BTREE_NODE_TYPE_HAS_ATOMIC_TRIGGERS & (1U << i->bkey_type)) { 693 + ret = run_one_mem_trigger(trans, i, BTREE_TRIGGER_ATOMIC|i->flags); 689 694 if (ret) 690 695 goto fatal_err; 691 696 } ··· 988 993 if (!trans->nr_updates && 989 994 !trans->journal_entries_u64s) 990 995 goto out_reset; 996 + 997 + memset(&trans->fs_usage_delta, 0, sizeof(trans->fs_usage_delta)); 991 998 992 999 ret = bch2_trans_commit_run_triggers(trans); 993 1000 if (ret)
+10 -2
fs/bcachefs/btree_types.h
··· 430 430 struct journal_res journal_res; 431 431 u64 *journal_seq; 432 432 struct disk_reservation *disk_res; 433 + 434 + struct bch_fs_usage_base fs_usage_delta; 435 + 433 436 unsigned journal_u64s; 434 437 unsigned extra_disk_res; /* XXX kill */ 435 438 struct replicas_delta_list *fs_usage_deltas; ··· 656 653 BIT_ULL(BKEY_TYPE_reflink)| \ 657 654 BIT_ULL(BKEY_TYPE_btree)) 658 655 659 - #define BTREE_NODE_TYPE_HAS_MEM_TRIGGERS \ 656 + #define BTREE_NODE_TYPE_HAS_ATOMIC_TRIGGERS \ 660 657 (BIT_ULL(BKEY_TYPE_alloc)| \ 661 658 BIT_ULL(BKEY_TYPE_inodes)| \ 662 659 BIT_ULL(BKEY_TYPE_stripes)| \ ··· 664 661 665 662 #define BTREE_NODE_TYPE_HAS_TRIGGERS \ 666 663 (BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS| \ 667 - BTREE_NODE_TYPE_HAS_MEM_TRIGGERS) 664 + BTREE_NODE_TYPE_HAS_ATOMIC_TRIGGERS) 668 665 669 666 static inline bool btree_node_type_needs_gc(enum btree_node_type type) 670 667 { ··· 739 736 enum btree_node_sibling { 740 737 btree_prev_sib, 741 738 btree_next_sib, 739 + }; 740 + 741 + struct get_locks_fail { 742 + unsigned l; 743 + struct btree *b; 742 744 }; 743 745 744 746 #endif /* _BCACHEFS_BTREE_TYPES_H */
+4 -4
fs/bcachefs/btree_update_interior.c
··· 159 159 { 160 160 size_t u64s = btree_node_u64s_with_format(nr, &b->format, new_f); 161 161 162 - return __vstruct_bytes(struct btree_node, u64s) < btree_bytes(c); 162 + return __vstruct_bytes(struct btree_node, u64s) < btree_buf_bytes(b); 163 163 } 164 164 165 165 /* Btree node freeing/allocation: */ ··· 1097 1097 * Always check for space for two keys, even if we won't have to 1098 1098 * split at prior level - it might have been a merge instead: 1099 1099 */ 1100 - if (bch2_btree_node_insert_fits(c, path->l[update_level].b, 1100 + if (bch2_btree_node_insert_fits(path->l[update_level].b, 1101 1101 BKEY_BTREE_PTR_U64s_MAX * 2)) 1102 1102 break; 1103 1103 ··· 1401 1401 1402 1402 unsigned u64s = nr_keys[i].nr_keys * n[i]->data->format.key_u64s + 1403 1403 nr_keys[i].val_u64s; 1404 - if (__vstruct_bytes(struct btree_node, u64s) > btree_bytes(as->c)) 1404 + if (__vstruct_bytes(struct btree_node, u64s) > btree_buf_bytes(b)) 1405 1405 n[i]->data->format = b->format; 1406 1406 1407 1407 btree_node_set_format(n[i], n[i]->data->format); ··· 1703 1703 1704 1704 bch2_btree_node_prep_for_write(trans, path, b); 1705 1705 1706 - if (!bch2_btree_node_insert_fits(c, b, bch2_keylist_u64s(keys))) { 1706 + if (!bch2_btree_node_insert_fits(b, bch2_keylist_u64s(keys))) { 1707 1707 bch2_btree_node_unlock_write(trans, path, b); 1708 1708 goto split; 1709 1709 }
+17 -25
fs/bcachefs/btree_update_interior.h
··· 184 184 b->sib_u64s[1] = b->nr.live_u64s; 185 185 } 186 186 187 - static inline void *btree_data_end(struct bch_fs *c, struct btree *b) 187 + static inline void *btree_data_end(struct btree *b) 188 188 { 189 - return (void *) b->data + btree_bytes(c); 189 + return (void *) b->data + btree_buf_bytes(b); 190 190 } 191 191 192 - static inline struct bkey_packed *unwritten_whiteouts_start(struct bch_fs *c, 193 - struct btree *b) 192 + static inline struct bkey_packed *unwritten_whiteouts_start(struct btree *b) 194 193 { 195 - return (void *) ((u64 *) btree_data_end(c, b) - b->whiteout_u64s); 194 + return (void *) ((u64 *) btree_data_end(b) - b->whiteout_u64s); 196 195 } 197 196 198 - static inline struct bkey_packed *unwritten_whiteouts_end(struct bch_fs *c, 199 - struct btree *b) 197 + static inline struct bkey_packed *unwritten_whiteouts_end(struct btree *b) 200 198 { 201 - return btree_data_end(c, b); 199 + return btree_data_end(b); 202 200 } 203 201 204 202 static inline void *write_block(struct btree *b) ··· 219 221 return __btree_addr_written(b, k); 220 222 } 221 223 222 - static inline ssize_t __bch_btree_u64s_remaining(struct bch_fs *c, 223 - struct btree *b, 224 - void *end) 224 + static inline ssize_t __bch2_btree_u64s_remaining(struct btree *b, void *end) 225 225 { 226 226 ssize_t used = bset_byte_offset(b, end) / sizeof(u64) + 227 227 b->whiteout_u64s; 228 - ssize_t total = c->opts.btree_node_size >> 3; 228 + ssize_t total = btree_buf_bytes(b) >> 3; 229 229 230 230 /* Always leave one extra u64 for bch2_varint_decode: */ 231 231 used++; ··· 231 235 return total - used; 232 236 } 233 237 234 - static inline size_t bch_btree_keys_u64s_remaining(struct bch_fs *c, 235 - struct btree *b) 238 + static inline size_t bch2_btree_keys_u64s_remaining(struct btree *b) 236 239 { 237 - ssize_t remaining = __bch_btree_u64s_remaining(c, b, 240 + ssize_t remaining = __bch2_btree_u64s_remaining(b, 238 241 btree_bkey_last(b, bset_tree_last(b))); 239 242 240 243 BUG_ON(remaining < 0); ··· 255 260 return 8 << BTREE_WRITE_SET_U64s_BITS; 256 261 } 257 262 258 - static inline struct btree_node_entry *want_new_bset(struct bch_fs *c, 259 - struct btree *b) 263 + static inline struct btree_node_entry *want_new_bset(struct bch_fs *c, struct btree *b) 260 264 { 261 265 struct bset_tree *t = bset_tree_last(b); 262 266 struct btree_node_entry *bne = max(write_block(b), 263 267 (void *) btree_bkey_last(b, bset_tree_last(b))); 264 268 ssize_t remaining_space = 265 - __bch_btree_u64s_remaining(c, b, bne->keys.start); 269 + __bch2_btree_u64s_remaining(b, bne->keys.start); 266 270 267 271 if (unlikely(bset_written(b, bset(b, t)))) { 268 272 if (remaining_space > (ssize_t) (block_bytes(c) >> 3)) ··· 275 281 return NULL; 276 282 } 277 283 278 - static inline void push_whiteout(struct bch_fs *c, struct btree *b, 279 - struct bpos pos) 284 + static inline void push_whiteout(struct btree *b, struct bpos pos) 280 285 { 281 286 struct bkey_packed k; 282 287 283 - BUG_ON(bch_btree_keys_u64s_remaining(c, b) < BKEY_U64s); 288 + BUG_ON(bch2_btree_keys_u64s_remaining(b) < BKEY_U64s); 284 289 EBUG_ON(btree_node_just_written(b)); 285 290 286 291 if (!bkey_pack_pos(&k, pos, b)) { ··· 292 299 k.needs_whiteout = true; 293 300 294 301 b->whiteout_u64s += k.u64s; 295 - bkey_p_copy(unwritten_whiteouts_start(c, b), &k); 302 + bkey_p_copy(unwritten_whiteouts_start(b), &k); 296 303 } 297 304 298 305 /* 299 306 * write lock must be held on @b (else the dirty bset that we were going to 300 307 * insert into could be written out from under us) 301 308 */ 302 - static inline bool bch2_btree_node_insert_fits(struct bch_fs *c, 303 - struct btree *b, unsigned u64s) 309 + static inline bool bch2_btree_node_insert_fits(struct btree *b, unsigned u64s) 304 310 { 305 311 if (unlikely(btree_node_need_rewrite(b))) 306 312 return false; 307 313 308 - return u64s <= bch_btree_keys_u64s_remaining(c, b); 314 + return u64s <= bch2_btree_keys_u64s_remaining(b); 309 315 } 310 316 311 317 void bch2_btree_updates_to_text(struct printbuf *, struct bch_fs *);
+3 -4
fs/bcachefs/btree_write_buffer.c
··· 125 125 struct btree_write_buffered_key *wb, 126 126 bool *write_locked, size_t *fast) 127 127 { 128 - struct bch_fs *c = trans->c; 129 128 struct btree_path *path; 130 129 int ret; 131 130 132 131 EBUG_ON(!wb->journal_seq); 133 - EBUG_ON(!c->btree_write_buffer.flushing.pin.seq); 134 - EBUG_ON(c->btree_write_buffer.flushing.pin.seq > wb->journal_seq); 132 + EBUG_ON(!trans->c->btree_write_buffer.flushing.pin.seq); 133 + EBUG_ON(trans->c->btree_write_buffer.flushing.pin.seq > wb->journal_seq); 135 134 136 135 ret = bch2_btree_iter_traverse(iter); 137 136 if (ret) ··· 154 155 *write_locked = true; 155 156 } 156 157 157 - if (unlikely(!bch2_btree_node_insert_fits(c, path->l[0].b, wb->k.k.u64s))) { 158 + if (unlikely(!bch2_btree_node_insert_fits(path->l[0].b, wb->k.k.u64s))) { 158 159 *write_locked = false; 159 160 return wb_flush_one_slowpath(trans, iter, wb); 160 161 }
+80 -68
fs/bcachefs/buckets.c
··· 25 25 26 26 #include <linux/preempt.h> 27 27 28 - static inline void fs_usage_data_type_to_base(struct bch_fs_usage *fs_usage, 28 + static inline void fs_usage_data_type_to_base(struct bch_fs_usage_base *fs_usage, 29 29 enum bch_data_type data_type, 30 30 s64 sectors) 31 31 { ··· 54 54 bch2_fs_usage_acc_to_base(c, i); 55 55 56 56 for (unsigned i = 0; i < BCH_REPLICAS_MAX; i++) 57 - usage->reserved += usage->persistent_reserved[i]; 57 + usage->b.reserved += usage->persistent_reserved[i]; 58 58 59 59 for (unsigned i = 0; i < c->replicas.nr; i++) { 60 60 struct bch_replicas_entry_v1 *e = 61 61 cpu_replicas_entry(&c->replicas, i); 62 62 63 - fs_usage_data_type_to_base(usage, e->data_type, usage->replicas[i]); 63 + fs_usage_data_type_to_base(&usage->b, e->data_type, usage->replicas[i]); 64 64 } 65 65 66 66 for_each_member_device(c, ca) { 67 67 struct bch_dev_usage dev = bch2_dev_usage_read(ca); 68 68 69 - usage->hidden += (dev.d[BCH_DATA_sb].buckets + 70 - dev.d[BCH_DATA_journal].buckets) * 69 + usage->b.hidden += (dev.d[BCH_DATA_sb].buckets + 70 + dev.d[BCH_DATA_journal].buckets) * 71 71 ca->mi.bucket_size; 72 72 } 73 73 ··· 188 188 prt_printf(out, "capacity:\t\t\t%llu\n", c->capacity); 189 189 190 190 prt_printf(out, "hidden:\t\t\t\t%llu\n", 191 - fs_usage->u.hidden); 191 + fs_usage->u.b.hidden); 192 192 prt_printf(out, "data:\t\t\t\t%llu\n", 193 - fs_usage->u.data); 193 + fs_usage->u.b.data); 194 194 prt_printf(out, "cached:\t\t\t\t%llu\n", 195 - fs_usage->u.cached); 195 + fs_usage->u.b.cached); 196 196 prt_printf(out, "reserved:\t\t\t%llu\n", 197 - fs_usage->u.reserved); 197 + fs_usage->u.b.reserved); 198 198 prt_printf(out, "nr_inodes:\t\t\t%llu\n", 199 - fs_usage->u.nr_inodes); 199 + fs_usage->u.b.nr_inodes); 200 200 prt_printf(out, "online reserved:\t\t%llu\n", 201 201 fs_usage->online_reserved); 202 202 ··· 225 225 226 226 u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage_online *fs_usage) 227 227 { 228 - return min(fs_usage->u.hidden + 229 - fs_usage->u.btree + 230 - fs_usage->u.data + 231 - reserve_factor(fs_usage->u.reserved + 228 + return min(fs_usage->u.b.hidden + 229 + fs_usage->u.b.btree + 230 + fs_usage->u.b.data + 231 + reserve_factor(fs_usage->u.b.reserved + 232 232 fs_usage->online_reserved), 233 233 c->capacity); 234 234 } ··· 240 240 u64 data, reserved; 241 241 242 242 ret.capacity = c->capacity - 243 - bch2_fs_usage_read_one(c, &c->usage_base->hidden); 243 + bch2_fs_usage_read_one(c, &c->usage_base->b.hidden); 244 244 245 - data = bch2_fs_usage_read_one(c, &c->usage_base->data) + 246 - bch2_fs_usage_read_one(c, &c->usage_base->btree); 247 - reserved = bch2_fs_usage_read_one(c, &c->usage_base->reserved) + 245 + data = bch2_fs_usage_read_one(c, &c->usage_base->b.data) + 246 + bch2_fs_usage_read_one(c, &c->usage_base->b.btree); 247 + reserved = bch2_fs_usage_read_one(c, &c->usage_base->b.reserved) + 248 248 percpu_u64_get(c->online_reserved); 249 249 250 250 ret.used = min(ret.capacity, data + reserve_factor(reserved)); 251 251 ret.free = ret.capacity - ret.used; 252 252 253 - ret.nr_inodes = bch2_fs_usage_read_one(c, &c->usage_base->nr_inodes); 253 + ret.nr_inodes = bch2_fs_usage_read_one(c, &c->usage_base->b.nr_inodes); 254 254 255 255 return ret; 256 256 } ··· 284 284 prt_newline(out); 285 285 286 286 for (unsigned i = 0; i < BCH_DATA_NR; i++) { 287 - prt_str(out, bch2_data_types[i]); 287 + bch2_prt_data_type(out, i); 288 288 prt_tab(out); 289 289 prt_u64(out, usage->d[i].buckets); 290 290 prt_tab_rjust(out); ··· 308 308 fs_usage = fs_usage_ptr(c, journal_seq, gc); 309 309 310 310 if (data_type_is_hidden(old->data_type)) 311 - fs_usage->hidden -= ca->mi.bucket_size; 311 + fs_usage->b.hidden -= ca->mi.bucket_size; 312 312 if (data_type_is_hidden(new->data_type)) 313 - fs_usage->hidden += ca->mi.bucket_size; 313 + fs_usage->b.hidden += ca->mi.bucket_size; 314 314 315 315 u = dev_usage_ptr(ca, journal_seq, gc); 316 316 ··· 359 359 if (idx < 0) 360 360 return -1; 361 361 362 - fs_usage_data_type_to_base(fs_usage, r->data_type, sectors); 362 + fs_usage_data_type_to_base(&fs_usage->b, r->data_type, sectors); 363 363 fs_usage->replicas[idx] += sectors; 364 364 return 0; 365 365 } ··· 394 394 395 395 preempt_disable(); 396 396 fs_usage = fs_usage_ptr(c, journal_seq, gc); 397 - fs_usage_data_type_to_base(fs_usage, r->data_type, sectors); 397 + fs_usage_data_type_to_base(&fs_usage->b, r->data_type, sectors); 398 398 fs_usage->replicas[idx] += sectors; 399 399 preempt_enable(); 400 400 err: ··· 523 523 if (bch2_fs_inconsistent_on(g->data_type && 524 524 g->data_type != data_type, c, 525 525 "different types of data in same bucket: %s, %s", 526 - bch2_data_types[g->data_type], 527 - bch2_data_types[data_type])) { 526 + bch2_data_type_str(g->data_type), 527 + bch2_data_type_str(data_type))) { 528 528 ret = -EIO; 529 529 goto err; 530 530 } ··· 532 532 if (bch2_fs_inconsistent_on((u64) g->dirty_sectors + sectors > ca->mi.bucket_size, c, 533 533 "bucket %u:%zu gen %u data type %s sector count overflow: %u + %u > bucket size", 534 534 ca->dev_idx, b, g->gen, 535 - bch2_data_types[g->data_type ?: data_type], 535 + bch2_data_type_str(g->data_type ?: data_type), 536 536 g->dirty_sectors, sectors)) { 537 537 ret = -EIO; 538 538 goto err; ··· 575 575 "bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n" 576 576 "while marking %s", 577 577 ptr->dev, bucket_nr, b_gen, 578 - bch2_data_types[bucket_data_type ?: ptr_data_type], 578 + bch2_data_type_str(bucket_data_type ?: ptr_data_type), 579 579 ptr->gen, 580 580 (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); 581 581 ret = -EIO; ··· 588 588 "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n" 589 589 "while marking %s", 590 590 ptr->dev, bucket_nr, b_gen, 591 - bch2_data_types[bucket_data_type ?: ptr_data_type], 591 + bch2_data_type_str(bucket_data_type ?: ptr_data_type), 592 592 ptr->gen, 593 593 (printbuf_reset(&buf), 594 594 bch2_bkey_val_to_text(&buf, c, k), buf.buf)); ··· 603 603 "while marking %s", 604 604 ptr->dev, bucket_nr, b_gen, 605 605 *bucket_gen(ca, bucket_nr), 606 - bch2_data_types[bucket_data_type ?: ptr_data_type], 606 + bch2_data_type_str(bucket_data_type ?: ptr_data_type), 607 607 ptr->gen, 608 608 (printbuf_reset(&buf), 609 609 bch2_bkey_val_to_text(&buf, c, k), buf.buf)); ··· 624 624 "bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n" 625 625 "while marking %s", 626 626 ptr->dev, bucket_nr, b_gen, 627 - bch2_data_types[bucket_data_type], 628 - bch2_data_types[ptr_data_type], 627 + bch2_data_type_str(bucket_data_type), 628 + bch2_data_type_str(ptr_data_type), 629 629 (printbuf_reset(&buf), 630 630 bch2_bkey_val_to_text(&buf, c, k), buf.buf)); 631 631 ret = -EIO; ··· 638 638 "bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U32_MAX\n" 639 639 "while marking %s", 640 640 ptr->dev, bucket_nr, b_gen, 641 - bch2_data_types[bucket_data_type ?: ptr_data_type], 641 + bch2_data_type_str(bucket_data_type ?: ptr_data_type), 642 642 bucket_sectors, sectors, 643 643 (printbuf_reset(&buf), 644 644 bch2_bkey_val_to_text(&buf, c, k), buf.buf)); ··· 677 677 BUG_ON(__update_replicas(c, dst, &d->r, -d->delta)); 678 678 } 679 679 680 - dst->nr_inodes -= deltas->nr_inodes; 680 + dst->b.nr_inodes -= deltas->nr_inodes; 681 681 682 682 for (i = 0; i < BCH_REPLICAS_MAX; i++) { 683 683 added -= deltas->persistent_reserved[i]; 684 - dst->reserved -= deltas->persistent_reserved[i]; 684 + dst->b.reserved -= deltas->persistent_reserved[i]; 685 685 dst->persistent_reserved[i] -= deltas->persistent_reserved[i]; 686 686 } 687 687 ··· 694 694 percpu_up_read(&c->mark_lock); 695 695 } 696 696 697 - int bch2_trans_fs_usage_apply(struct btree_trans *trans, 698 - struct replicas_delta_list *deltas) 697 + void bch2_trans_account_disk_usage_change(struct btree_trans *trans) 699 698 { 700 699 struct bch_fs *c = trans->c; 700 + u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0; 701 701 static int warned_disk_usage = 0; 702 702 bool warn = false; 703 - u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0; 704 - struct replicas_delta *d, *d2; 705 - struct replicas_delta *top = (void *) deltas->d + deltas->used; 706 - struct bch_fs_usage *dst; 707 - s64 added = 0, should_not_have_added; 708 - unsigned i; 709 703 710 704 percpu_down_read(&c->mark_lock); 711 705 preempt_disable(); 712 - dst = fs_usage_ptr(c, trans->journal_res.seq, false); 706 + struct bch_fs_usage_base *dst = &fs_usage_ptr(c, trans->journal_res.seq, false)->b; 707 + struct bch_fs_usage_base *src = &trans->fs_usage_delta; 713 708 714 - for (d = deltas->d; d != top; d = replicas_delta_next(d)) { 715 - switch (d->r.data_type) { 716 - case BCH_DATA_btree: 717 - case BCH_DATA_user: 718 - case BCH_DATA_parity: 719 - added += d->delta; 720 - } 721 - 722 - if (__update_replicas(c, dst, &d->r, d->delta)) 723 - goto need_mark; 724 - } 725 - 726 - dst->nr_inodes += deltas->nr_inodes; 727 - 728 - for (i = 0; i < BCH_REPLICAS_MAX; i++) { 729 - added += deltas->persistent_reserved[i]; 730 - dst->reserved += deltas->persistent_reserved[i]; 731 - dst->persistent_reserved[i] += deltas->persistent_reserved[i]; 732 - } 709 + s64 added = src->btree + src->data + src->reserved; 733 710 734 711 /* 735 712 * Not allowed to reduce sectors_available except by getting a 736 713 * reservation: 737 714 */ 738 - should_not_have_added = added - (s64) disk_res_sectors; 715 + s64 should_not_have_added = added - (s64) disk_res_sectors; 739 716 if (unlikely(should_not_have_added > 0)) { 740 717 u64 old, new, v = atomic64_read(&c->sectors_available); 741 718 ··· 731 754 this_cpu_sub(*c->online_reserved, added); 732 755 } 733 756 757 + dst->hidden += src->hidden; 758 + dst->btree += src->btree; 759 + dst->data += src->data; 760 + dst->cached += src->cached; 761 + dst->reserved += src->reserved; 762 + dst->nr_inodes += src->nr_inodes; 763 + 734 764 preempt_enable(); 735 765 percpu_up_read(&c->mark_lock); 736 766 ··· 745 761 bch2_trans_inconsistent(trans, 746 762 "disk usage increased %lli more than %llu sectors reserved)", 747 763 should_not_have_added, disk_res_sectors); 764 + } 765 + 766 + int bch2_trans_fs_usage_apply(struct btree_trans *trans, 767 + struct replicas_delta_list *deltas) 768 + { 769 + struct bch_fs *c = trans->c; 770 + struct replicas_delta *d, *d2; 771 + struct replicas_delta *top = (void *) deltas->d + deltas->used; 772 + struct bch_fs_usage *dst; 773 + unsigned i; 774 + 775 + percpu_down_read(&c->mark_lock); 776 + preempt_disable(); 777 + dst = fs_usage_ptr(c, trans->journal_res.seq, false); 778 + 779 + for (d = deltas->d; d != top; d = replicas_delta_next(d)) 780 + if (__update_replicas(c, dst, &d->r, d->delta)) 781 + goto need_mark; 782 + 783 + dst->b.nr_inodes += deltas->nr_inodes; 784 + 785 + for (i = 0; i < BCH_REPLICAS_MAX; i++) { 786 + dst->b.reserved += deltas->persistent_reserved[i]; 787 + dst->persistent_reserved[i] += deltas->persistent_reserved[i]; 788 + } 789 + 790 + preempt_enable(); 791 + percpu_up_read(&c->mark_lock); 748 792 return 0; 749 793 need_mark: 750 794 /* revert changes: */ ··· 1096 1084 struct bch_fs_usage *fs_usage = this_cpu_ptr(c->usage_gc); 1097 1085 1098 1086 replicas = min(replicas, ARRAY_SIZE(fs_usage->persistent_reserved)); 1099 - fs_usage->reserved += sectors; 1087 + fs_usage->b.reserved += sectors; 1100 1088 fs_usage->persistent_reserved[replicas - 1] += sectors; 1101 1089 1102 1090 preempt_enable(); ··· 1142 1130 "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n" 1143 1131 "while marking %s", 1144 1132 iter.pos.inode, iter.pos.offset, a->v.gen, 1145 - bch2_data_types[a->v.data_type], 1146 - bch2_data_types[type], 1147 - bch2_data_types[type]); 1133 + bch2_data_type_str(a->v.data_type), 1134 + bch2_data_type_str(type), 1135 + bch2_data_type_str(type)); 1148 1136 ret = -EIO; 1149 1137 goto err; 1150 1138 }
+17
fs/bcachefs/buckets.h
··· 356 356 ret; \ 357 357 }) 358 358 359 + void bch2_trans_account_disk_usage_change(struct btree_trans *); 360 + 359 361 void bch2_trans_fs_usage_revert(struct btree_trans *, struct replicas_delta_list *); 360 362 int bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *); 361 363 ··· 385 383 } 386 384 387 385 return false; 386 + } 387 + 388 + static inline const char *bch2_data_type_str(enum bch_data_type type) 389 + { 390 + return type < BCH_DATA_NR 391 + ? __bch2_data_types[type] 392 + : "(invalid data type)"; 393 + } 394 + 395 + static inline void bch2_prt_data_type(struct printbuf *out, enum bch_data_type type) 396 + { 397 + if (type < BCH_DATA_NR) 398 + prt_str(out, __bch2_data_types[type]); 399 + else 400 + prt_printf(out, "(invalid data type %u)", type); 388 401 } 389 402 390 403 /* disk reservations: */
+5 -10
fs/bcachefs/buckets_types.h
··· 45 45 } d[BCH_DATA_NR]; 46 46 }; 47 47 48 - struct bch_fs_usage { 49 - /* all fields are in units of 512 byte sectors: */ 48 + struct bch_fs_usage_base { 50 49 u64 hidden; 51 50 u64 btree; 52 51 u64 data; 53 52 u64 cached; 54 53 u64 reserved; 55 54 u64 nr_inodes; 55 + }; 56 56 57 - /* XXX: add stats for compression ratio */ 58 - #if 0 59 - u64 uncompressed; 60 - u64 compressed; 61 - #endif 62 - 63 - /* broken out: */ 64 - 57 + struct bch_fs_usage { 58 + /* all fields are in units of 512 byte sectors: */ 59 + struct bch_fs_usage_base b; 65 60 u64 persistent_reserved[BCH_REPLICAS_MAX]; 66 61 u64 replicas[]; 67 62 };
+2 -2
fs/bcachefs/clock.c
··· 109 109 if (cpu_timeout != MAX_SCHEDULE_TIMEOUT) 110 110 mod_timer(&wait.cpu_timer, cpu_timeout + jiffies); 111 111 112 - while (1) { 112 + do { 113 113 set_current_state(TASK_INTERRUPTIBLE); 114 114 if (kthread && kthread_should_stop()) 115 115 break; ··· 119 119 120 120 schedule(); 121 121 try_to_freeze(); 122 - } 122 + } while (0); 123 123 124 124 __set_current_state(TASK_RUNNING); 125 125 del_timer_sync(&wait.cpu_timer);
+8
fs/bcachefs/compress.h
··· 47 47 return __bch2_compression_opt_to_type[bch2_compression_decode(v).type]; 48 48 } 49 49 50 + static inline void bch2_prt_compression_type(struct printbuf *out, enum bch_compression_type type) 51 + { 52 + if (type < BCH_COMPRESSION_TYPE_NR) 53 + prt_str(out, __bch2_compression_types[type]); 54 + else 55 + prt_printf(out, "(invalid compression type %u)", type); 56 + } 57 + 50 58 int bch2_bio_uncompress_inplace(struct bch_fs *, struct bio *, 51 59 struct bch_extent_crc_unpacked *); 52 60 int bch2_bio_uncompress(struct bch_fs *, struct bio *, struct bio *,
+1 -1
fs/bcachefs/counters.c fs/bcachefs/sb-counters.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include "bcachefs.h" 3 3 #include "super-io.h" 4 - #include "counters.h" 4 + #include "sb-counters.h" 5 5 6 6 /* BCH_SB_FIELD_counters */ 7 7
+3 -4
fs/bcachefs/counters.h fs/bcachefs/sb-counters.h
··· 1 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 - #ifndef _BCACHEFS_COUNTERS_H 3 - #define _BCACHEFS_COUNTERS_H 2 + #ifndef _BCACHEFS_SB_COUNTERS_H 3 + #define _BCACHEFS_SB_COUNTERS_H 4 4 5 5 #include "bcachefs.h" 6 6 #include "super-io.h" 7 - 8 7 9 8 int bch2_sb_counters_to_cpu(struct bch_fs *); 10 9 int bch2_sb_counters_from_cpu(struct bch_fs *); ··· 13 14 14 15 extern const struct bch_sb_field_ops bch_sb_field_ops_counters; 15 16 16 - #endif // _BCACHEFS_COUNTERS_H 17 + #endif // _BCACHEFS_SB_COUNTERS_H
+2 -4
fs/bcachefs/data_update.c
··· 285 285 k.k->p, bkey_start_pos(&insert->k)) ?: 286 286 bch2_insert_snapshot_whiteouts(trans, m->btree_id, 287 287 k.k->p, insert->k.p) ?: 288 - bch2_bkey_set_needs_rebalance(c, insert, 289 - op->opts.background_target, 290 - op->opts.background_compression) ?: 288 + bch2_bkey_set_needs_rebalance(c, insert, &op->opts) ?: 291 289 bch2_trans_update(trans, &iter, insert, 292 290 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: 293 291 bch2_trans_commit(trans, &op->res, ··· 527 529 BCH_WRITE_DATA_ENCODED| 528 530 BCH_WRITE_MOVE| 529 531 m->data_opts.write_flags; 530 - m->op.compression_opt = io_opts.background_compression ?: io_opts.compression; 532 + m->op.compression_opt = background_compression(io_opts); 531 533 m->op.watermark = m->data_opts.btree_insert_flags & BCH_WATERMARK_MASK; 532 534 533 535 bkey_for_each_ptr(ptrs, ptr)
+8 -8
fs/bcachefs/debug.c
··· 44 44 return false; 45 45 46 46 bio = bio_alloc_bioset(ca->disk_sb.bdev, 47 - buf_pages(n_sorted, btree_bytes(c)), 47 + buf_pages(n_sorted, btree_buf_bytes(b)), 48 48 REQ_OP_READ|REQ_META, 49 49 GFP_NOFS, 50 50 &c->btree_bio); 51 51 bio->bi_iter.bi_sector = pick.ptr.offset; 52 - bch2_bio_map(bio, n_sorted, btree_bytes(c)); 52 + bch2_bio_map(bio, n_sorted, btree_buf_bytes(b)); 53 53 54 54 submit_bio_wait(bio); 55 55 56 56 bio_put(bio); 57 57 percpu_ref_put(&ca->io_ref); 58 58 59 - memcpy(n_ondisk, n_sorted, btree_bytes(c)); 59 + memcpy(n_ondisk, n_sorted, btree_buf_bytes(b)); 60 60 61 61 v->written = 0; 62 62 if (bch2_btree_node_read_done(c, ca, v, false, &saw_error) || saw_error) ··· 137 137 mutex_lock(&c->verify_lock); 138 138 139 139 if (!c->verify_ondisk) { 140 - c->verify_ondisk = kvpmalloc(btree_bytes(c), GFP_KERNEL); 140 + c->verify_ondisk = kvpmalloc(btree_buf_bytes(b), GFP_KERNEL); 141 141 if (!c->verify_ondisk) 142 142 goto out; 143 143 } ··· 199 199 return; 200 200 } 201 201 202 - n_ondisk = kvpmalloc(btree_bytes(c), GFP_KERNEL); 202 + n_ondisk = kvpmalloc(btree_buf_bytes(b), GFP_KERNEL); 203 203 if (!n_ondisk) { 204 204 prt_printf(out, "memory allocation failure\n"); 205 205 goto out; 206 206 } 207 207 208 208 bio = bio_alloc_bioset(ca->disk_sb.bdev, 209 - buf_pages(n_ondisk, btree_bytes(c)), 209 + buf_pages(n_ondisk, btree_buf_bytes(b)), 210 210 REQ_OP_READ|REQ_META, 211 211 GFP_NOFS, 212 212 &c->btree_bio); 213 213 bio->bi_iter.bi_sector = pick.ptr.offset; 214 - bch2_bio_map(bio, n_ondisk, btree_bytes(c)); 214 + bch2_bio_map(bio, n_ondisk, btree_buf_bytes(b)); 215 215 216 216 ret = submit_bio_wait(bio); 217 217 if (ret) { ··· 293 293 out: 294 294 if (bio) 295 295 bio_put(bio); 296 - kvpfree(n_ondisk, btree_bytes(c)); 296 + kvpfree(n_ondisk, btree_buf_bytes(b)); 297 297 percpu_ref_put(&ca->io_ref); 298 298 } 299 299
+42
fs/bcachefs/dirent_format.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _BCACHEFS_DIRENT_FORMAT_H 3 + #define _BCACHEFS_DIRENT_FORMAT_H 4 + 5 + /* 6 + * Dirents (and xattrs) have to implement string lookups; since our b-tree 7 + * doesn't support arbitrary length strings for the key, we instead index by a 8 + * 64 bit hash (currently truncated sha1) of the string, stored in the offset 9 + * field of the key - using linear probing to resolve hash collisions. This also 10 + * provides us with the readdir cookie posix requires. 11 + * 12 + * Linear probing requires us to use whiteouts for deletions, in the event of a 13 + * collision: 14 + */ 15 + 16 + struct bch_dirent { 17 + struct bch_val v; 18 + 19 + /* Target inode number: */ 20 + union { 21 + __le64 d_inum; 22 + struct { /* DT_SUBVOL */ 23 + __le32 d_child_subvol; 24 + __le32 d_parent_subvol; 25 + }; 26 + }; 27 + 28 + /* 29 + * Copy of mode bits 12-15 from the target inode - so userspace can get 30 + * the filetype without having to do a stat() 31 + */ 32 + __u8 d_type; 33 + 34 + __u8 d_name[]; 35 + } __packed __aligned(8); 36 + 37 + #define DT_SUBVOL 16 38 + #define BCH_DT_MAX 17 39 + 40 + #define BCH_NAME_MAX 512 41 + 42 + #endif /* _BCACHEFS_DIRENT_FORMAT_H */
+3 -3
fs/bcachefs/ec.c
··· 190 190 a->v.stripe_redundancy, trans, 191 191 "bucket %llu:%llu gen %u data type %s dirty_sectors %u: multiple stripes using same bucket (%u, %llu)", 192 192 iter.pos.inode, iter.pos.offset, a->v.gen, 193 - bch2_data_types[a->v.data_type], 193 + bch2_data_type_str(a->v.data_type), 194 194 a->v.dirty_sectors, 195 195 a->v.stripe, s.k->p.offset)) { 196 196 ret = -EIO; ··· 200 200 if (bch2_trans_inconsistent_on(data_type && a->v.dirty_sectors, trans, 201 201 "bucket %llu:%llu gen %u data type %s dirty_sectors %u: data already in stripe bucket %llu", 202 202 iter.pos.inode, iter.pos.offset, a->v.gen, 203 - bch2_data_types[a->v.data_type], 203 + bch2_data_type_str(a->v.data_type), 204 204 a->v.dirty_sectors, 205 205 s.k->p.offset)) { 206 206 ret = -EIO; ··· 367 367 } 368 368 } 369 369 370 - if (!(flags & (BTREE_TRIGGER_TRANSACTIONAL|BTREE_TRIGGER_GC))) { 370 + if (flags & BTREE_TRIGGER_ATOMIC) { 371 371 struct stripe *m = genradix_ptr(&c->stripes, idx); 372 372 373 373 if (!m) {
+19
fs/bcachefs/ec_format.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _BCACHEFS_EC_FORMAT_H 3 + #define _BCACHEFS_EC_FORMAT_H 4 + 5 + struct bch_stripe { 6 + struct bch_val v; 7 + __le16 sectors; 8 + __u8 algorithm; 9 + __u8 nr_blocks; 10 + __u8 nr_redundant; 11 + 12 + __u8 csum_granularity_bits; 13 + __u8 csum_type; 14 + __u8 pad; 15 + 16 + struct bch_extent_ptr ptrs[]; 17 + } __packed __aligned(8); 18 + 19 + #endif /* _BCACHEFS_EC_FORMAT_H */
+7 -4
fs/bcachefs/extents.c
··· 8 8 9 9 #include "bcachefs.h" 10 10 #include "bkey_methods.h" 11 + #include "btree_cache.h" 11 12 #include "btree_gc.h" 12 13 #include "btree_io.h" 13 14 #include "btree_iter.h" ··· 1019 1018 struct bch_extent_crc_unpacked crc = 1020 1019 bch2_extent_crc_unpack(k.k, entry_to_crc(entry)); 1021 1020 1022 - prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum %s compress %s", 1021 + prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum %s compress ", 1023 1022 crc.compressed_size, 1024 1023 crc.uncompressed_size, 1025 1024 crc.offset, crc.nonce, 1026 - bch2_csum_types[crc.csum_type], 1027 - bch2_compression_types[crc.compression_type]); 1025 + bch2_csum_types[crc.csum_type]); 1026 + bch2_prt_compression_type(out, crc.compression_type); 1028 1027 break; 1029 1028 } 1030 1029 case BCH_EXTENT_ENTRY_stripe_ptr: { ··· 1335 1334 } 1336 1335 1337 1336 int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bkey_i *_k, 1338 - unsigned target, unsigned compression) 1337 + struct bch_io_opts *opts) 1339 1338 { 1340 1339 struct bkey_s k = bkey_i_to_s(_k); 1341 1340 struct bch_extent_rebalance *r; 1341 + unsigned target = opts->background_target; 1342 + unsigned compression = background_compression(*opts); 1342 1343 bool needs_rebalance; 1343 1344 1344 1345 if (!bkey_extent_is_direct_data(k.k))
+1 -1
fs/bcachefs/extents.h
··· 708 708 bool bch2_bkey_needs_rebalance(struct bch_fs *, struct bkey_s_c); 709 709 710 710 int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bkey_i *, 711 - unsigned, unsigned); 711 + struct bch_io_opts *); 712 712 713 713 /* Generic extent code: */ 714 714
+295
fs/bcachefs/extents_format.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _BCACHEFS_EXTENTS_FORMAT_H 3 + #define _BCACHEFS_EXTENTS_FORMAT_H 4 + 5 + /* 6 + * In extent bkeys, the value is a list of pointers (bch_extent_ptr), optionally 7 + * preceded by checksum/compression information (bch_extent_crc32 or 8 + * bch_extent_crc64). 9 + * 10 + * One major determining factor in the format of extents is how we handle and 11 + * represent extents that have been partially overwritten and thus trimmed: 12 + * 13 + * If an extent is not checksummed or compressed, when the extent is trimmed we 14 + * don't have to remember the extent we originally allocated and wrote: we can 15 + * merely adjust ptr->offset to point to the start of the data that is currently 16 + * live. The size field in struct bkey records the current (live) size of the 17 + * extent, and is also used to mean "size of region on disk that we point to" in 18 + * this case. 19 + * 20 + * Thus an extent that is not checksummed or compressed will consist only of a 21 + * list of bch_extent_ptrs, with none of the fields in 22 + * bch_extent_crc32/bch_extent_crc64. 23 + * 24 + * When an extent is checksummed or compressed, it's not possible to read only 25 + * the data that is currently live: we have to read the entire extent that was 26 + * originally written, and then return only the part of the extent that is 27 + * currently live. 28 + * 29 + * Thus, in addition to the current size of the extent in struct bkey, we need 30 + * to store the size of the originally allocated space - this is the 31 + * compressed_size and uncompressed_size fields in bch_extent_crc32/64. Also, 32 + * when the extent is trimmed, instead of modifying the offset field of the 33 + * pointer, we keep a second smaller offset field - "offset into the original 34 + * extent of the currently live region". 35 + * 36 + * The other major determining factor is replication and data migration: 37 + * 38 + * Each pointer may have its own bch_extent_crc32/64. When doing a replicated 39 + * write, we will initially write all the replicas in the same format, with the 40 + * same checksum type and compression format - however, when copygc runs later (or 41 + * tiering/cache promotion, anything that moves data), it is not in general 42 + * going to rewrite all the pointers at once - one of the replicas may be in a 43 + * bucket on one device that has very little fragmentation while another lives 44 + * in a bucket that has become heavily fragmented, and thus is being rewritten 45 + * sooner than the rest. 46 + * 47 + * Thus it will only move a subset of the pointers (or in the case of 48 + * tiering/cache promotion perhaps add a single pointer without dropping any 49 + * current pointers), and if the extent has been partially overwritten it must 50 + * write only the currently live portion (or copygc would not be able to reduce 51 + * fragmentation!) - which necessitates a different bch_extent_crc format for 52 + * the new pointer. 53 + * 54 + * But in the interests of space efficiency, we don't want to store one 55 + * bch_extent_crc for each pointer if we don't have to. 56 + * 57 + * Thus, a bch_extent consists of bch_extent_crc32s, bch_extent_crc64s, and 58 + * bch_extent_ptrs appended arbitrarily one after the other. We determine the 59 + * type of a given entry with a scheme similar to utf8 (except we're encoding a 60 + * type, not a size), encoding the type in the position of the first set bit: 61 + * 62 + * bch_extent_crc32 - 0b1 63 + * bch_extent_ptr - 0b10 64 + * bch_extent_crc64 - 0b100 65 + * 66 + * We do it this way because bch_extent_crc32 is _very_ constrained on bits (and 67 + * bch_extent_crc64 is the least constrained). 68 + * 69 + * Then, each bch_extent_crc32/64 applies to the pointers that follow after it, 70 + * until the next bch_extent_crc32/64. 71 + * 72 + * If there are no bch_extent_crcs preceding a bch_extent_ptr, then that pointer 73 + * is neither checksummed nor compressed. 74 + */ 75 + 76 + #define BCH_EXTENT_ENTRY_TYPES() \ 77 + x(ptr, 0) \ 78 + x(crc32, 1) \ 79 + x(crc64, 2) \ 80 + x(crc128, 3) \ 81 + x(stripe_ptr, 4) \ 82 + x(rebalance, 5) 83 + #define BCH_EXTENT_ENTRY_MAX 6 84 + 85 + enum bch_extent_entry_type { 86 + #define x(f, n) BCH_EXTENT_ENTRY_##f = n, 87 + BCH_EXTENT_ENTRY_TYPES() 88 + #undef x 89 + }; 90 + 91 + /* Compressed/uncompressed size are stored biased by 1: */ 92 + struct bch_extent_crc32 { 93 + #if defined(__LITTLE_ENDIAN_BITFIELD) 94 + __u32 type:2, 95 + _compressed_size:7, 96 + _uncompressed_size:7, 97 + offset:7, 98 + _unused:1, 99 + csum_type:4, 100 + compression_type:4; 101 + __u32 csum; 102 + #elif defined (__BIG_ENDIAN_BITFIELD) 103 + __u32 csum; 104 + __u32 compression_type:4, 105 + csum_type:4, 106 + _unused:1, 107 + offset:7, 108 + _uncompressed_size:7, 109 + _compressed_size:7, 110 + type:2; 111 + #endif 112 + } __packed __aligned(8); 113 + 114 + #define CRC32_SIZE_MAX (1U << 7) 115 + #define CRC32_NONCE_MAX 0 116 + 117 + struct bch_extent_crc64 { 118 + #if defined(__LITTLE_ENDIAN_BITFIELD) 119 + __u64 type:3, 120 + _compressed_size:9, 121 + _uncompressed_size:9, 122 + offset:9, 123 + nonce:10, 124 + csum_type:4, 125 + compression_type:4, 126 + csum_hi:16; 127 + #elif defined (__BIG_ENDIAN_BITFIELD) 128 + __u64 csum_hi:16, 129 + compression_type:4, 130 + csum_type:4, 131 + nonce:10, 132 + offset:9, 133 + _uncompressed_size:9, 134 + _compressed_size:9, 135 + type:3; 136 + #endif 137 + __u64 csum_lo; 138 + } __packed __aligned(8); 139 + 140 + #define CRC64_SIZE_MAX (1U << 9) 141 + #define CRC64_NONCE_MAX ((1U << 10) - 1) 142 + 143 + struct bch_extent_crc128 { 144 + #if defined(__LITTLE_ENDIAN_BITFIELD) 145 + __u64 type:4, 146 + _compressed_size:13, 147 + _uncompressed_size:13, 148 + offset:13, 149 + nonce:13, 150 + csum_type:4, 151 + compression_type:4; 152 + #elif defined (__BIG_ENDIAN_BITFIELD) 153 + __u64 compression_type:4, 154 + csum_type:4, 155 + nonce:13, 156 + offset:13, 157 + _uncompressed_size:13, 158 + _compressed_size:13, 159 + type:4; 160 + #endif 161 + struct bch_csum csum; 162 + } __packed __aligned(8); 163 + 164 + #define CRC128_SIZE_MAX (1U << 13) 165 + #define CRC128_NONCE_MAX ((1U << 13) - 1) 166 + 167 + /* 168 + * @reservation - pointer hasn't been written to, just reserved 169 + */ 170 + struct bch_extent_ptr { 171 + #if defined(__LITTLE_ENDIAN_BITFIELD) 172 + __u64 type:1, 173 + cached:1, 174 + unused:1, 175 + unwritten:1, 176 + offset:44, /* 8 petabytes */ 177 + dev:8, 178 + gen:8; 179 + #elif defined (__BIG_ENDIAN_BITFIELD) 180 + __u64 gen:8, 181 + dev:8, 182 + offset:44, 183 + unwritten:1, 184 + unused:1, 185 + cached:1, 186 + type:1; 187 + #endif 188 + } __packed __aligned(8); 189 + 190 + struct bch_extent_stripe_ptr { 191 + #if defined(__LITTLE_ENDIAN_BITFIELD) 192 + __u64 type:5, 193 + block:8, 194 + redundancy:4, 195 + idx:47; 196 + #elif defined (__BIG_ENDIAN_BITFIELD) 197 + __u64 idx:47, 198 + redundancy:4, 199 + block:8, 200 + type:5; 201 + #endif 202 + }; 203 + 204 + struct bch_extent_rebalance { 205 + #if defined(__LITTLE_ENDIAN_BITFIELD) 206 + __u64 type:6, 207 + unused:34, 208 + compression:8, /* enum bch_compression_opt */ 209 + target:16; 210 + #elif defined (__BIG_ENDIAN_BITFIELD) 211 + __u64 target:16, 212 + compression:8, 213 + unused:34, 214 + type:6; 215 + #endif 216 + }; 217 + 218 + union bch_extent_entry { 219 + #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || __BITS_PER_LONG == 64 220 + unsigned long type; 221 + #elif __BITS_PER_LONG == 32 222 + struct { 223 + unsigned long pad; 224 + unsigned long type; 225 + }; 226 + #else 227 + #error edit for your odd byteorder. 228 + #endif 229 + 230 + #define x(f, n) struct bch_extent_##f f; 231 + BCH_EXTENT_ENTRY_TYPES() 232 + #undef x 233 + }; 234 + 235 + struct bch_btree_ptr { 236 + struct bch_val v; 237 + 238 + __u64 _data[0]; 239 + struct bch_extent_ptr start[]; 240 + } __packed __aligned(8); 241 + 242 + struct bch_btree_ptr_v2 { 243 + struct bch_val v; 244 + 245 + __u64 mem_ptr; 246 + __le64 seq; 247 + __le16 sectors_written; 248 + __le16 flags; 249 + struct bpos min_key; 250 + __u64 _data[0]; 251 + struct bch_extent_ptr start[]; 252 + } __packed __aligned(8); 253 + 254 + LE16_BITMASK(BTREE_PTR_RANGE_UPDATED, struct bch_btree_ptr_v2, flags, 0, 1); 255 + 256 + struct bch_extent { 257 + struct bch_val v; 258 + 259 + __u64 _data[0]; 260 + union bch_extent_entry start[]; 261 + } __packed __aligned(8); 262 + 263 + /* Maximum size (in u64s) a single pointer could be: */ 264 + #define BKEY_EXTENT_PTR_U64s_MAX\ 265 + ((sizeof(struct bch_extent_crc128) + \ 266 + sizeof(struct bch_extent_ptr)) / sizeof(__u64)) 267 + 268 + /* Maximum possible size of an entire extent value: */ 269 + #define BKEY_EXTENT_VAL_U64s_MAX \ 270 + (1 + BKEY_EXTENT_PTR_U64s_MAX * (BCH_REPLICAS_MAX + 1)) 271 + 272 + /* * Maximum possible size of an entire extent, key + value: */ 273 + #define BKEY_EXTENT_U64s_MAX (BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX) 274 + 275 + /* Btree pointers don't carry around checksums: */ 276 + #define BKEY_BTREE_PTR_VAL_U64s_MAX \ 277 + ((sizeof(struct bch_btree_ptr_v2) + \ 278 + sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX) / sizeof(__u64)) 279 + #define BKEY_BTREE_PTR_U64s_MAX \ 280 + (BKEY_U64s + BKEY_BTREE_PTR_VAL_U64s_MAX) 281 + 282 + struct bch_reservation { 283 + struct bch_val v; 284 + 285 + __le32 generation; 286 + __u8 nr_replicas; 287 + __u8 pad[3]; 288 + } __packed __aligned(8); 289 + 290 + struct bch_inline_data { 291 + struct bch_val v; 292 + u8 data[]; 293 + }; 294 + 295 + #endif /* _BCACHEFS_EXTENTS_FORMAT_H */
+2 -2
fs/bcachefs/eytzinger.h
··· 156 156 } 157 157 158 158 #define eytzinger1_for_each(_i, _size) \ 159 - for ((_i) = eytzinger1_first((_size)); \ 159 + for (unsigned (_i) = eytzinger1_first((_size)); \ 160 160 (_i) != 0; \ 161 161 (_i) = eytzinger1_next((_i), (_size))) 162 162 ··· 227 227 } 228 228 229 229 #define eytzinger0_for_each(_i, _size) \ 230 - for ((_i) = eytzinger0_first((_size)); \ 230 + for (unsigned (_i) = eytzinger0_first((_size)); \ 231 231 (_i) != -1; \ 232 232 (_i) = eytzinger0_next((_i), (_size))) 233 233
+4
fs/bcachefs/fs-io-direct.c
··· 77 77 78 78 bch2_inode_opts_get(&opts, c, &inode->ei_inode); 79 79 80 + /* bios must be 512 byte aligned: */ 81 + if ((offset|iter->count) & (SECTOR_SIZE - 1)) 82 + return -EINVAL; 83 + 80 84 ret = min_t(loff_t, iter->count, 81 85 max_t(loff_t, 0, i_size_read(&inode->v) - offset)); 82 86
+24 -13
fs/bcachefs/fs-io-pagecache.c
··· 309 309 } 310 310 } 311 311 312 - void bch2_mark_pagecache_reserved(struct bch_inode_info *inode, 313 - u64 start, u64 end) 312 + int bch2_mark_pagecache_reserved(struct bch_inode_info *inode, 313 + u64 *start, u64 end, 314 + bool nonblocking) 314 315 { 315 316 struct bch_fs *c = inode->v.i_sb->s_fs_info; 316 - pgoff_t index = start >> PAGE_SECTORS_SHIFT; 317 + pgoff_t index = *start >> PAGE_SECTORS_SHIFT; 317 318 pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT; 318 319 struct folio_batch fbatch; 319 320 s64 i_sectors_delta = 0; 320 - unsigned i, j; 321 + int ret = 0; 321 322 322 - if (end <= start) 323 - return; 323 + if (end <= *start) 324 + return 0; 324 325 325 326 folio_batch_init(&fbatch); 326 327 327 328 while (filemap_get_folios(inode->v.i_mapping, 328 329 &index, end_index, &fbatch)) { 329 - for (i = 0; i < folio_batch_count(&fbatch); i++) { 330 + for (unsigned i = 0; i < folio_batch_count(&fbatch); i++) { 330 331 struct folio *folio = fbatch.folios[i]; 332 + 333 + if (!nonblocking) 334 + folio_lock(folio); 335 + else if (!folio_trylock(folio)) { 336 + folio_batch_release(&fbatch); 337 + ret = -EAGAIN; 338 + break; 339 + } 340 + 331 341 u64 folio_start = folio_sector(folio); 332 342 u64 folio_end = folio_end_sector(folio); 333 - unsigned folio_offset = max(start, folio_start) - folio_start; 334 - unsigned folio_len = min(end, folio_end) - folio_offset - folio_start; 335 - struct bch_folio *s; 336 343 337 344 BUG_ON(end <= folio_start); 338 345 339 - folio_lock(folio); 340 - s = bch2_folio(folio); 346 + *start = min(end, folio_end); 341 347 348 + struct bch_folio *s = bch2_folio(folio); 342 349 if (s) { 350 + unsigned folio_offset = max(*start, folio_start) - folio_start; 351 + unsigned folio_len = min(end, folio_end) - folio_offset - folio_start; 352 + 343 353 spin_lock(&s->lock); 344 - for (j = folio_offset; j < folio_offset + folio_len; j++) { 354 + for (unsigned j = folio_offset; j < folio_offset + folio_len; j++) { 345 355 i_sectors_delta -= s->s[j].state == SECTOR_dirty; 346 356 bch2_folio_sector_set(folio, s, j, 347 357 folio_sector_reserve(s->s[j].state)); ··· 366 356 } 367 357 368 358 bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta); 359 + return ret; 369 360 } 370 361 371 362 static inline unsigned sectors_to_reserve(struct bch_folio_sector *s,
+1 -1
fs/bcachefs/fs-io-pagecache.h
··· 143 143 void bch2_bio_page_state_set(struct bio *, struct bkey_s_c); 144 144 145 145 void bch2_mark_pagecache_unallocated(struct bch_inode_info *, u64, u64); 146 - void bch2_mark_pagecache_reserved(struct bch_inode_info *, u64, u64); 146 + int bch2_mark_pagecache_reserved(struct bch_inode_info *, u64 *, u64, bool); 147 147 148 148 int bch2_get_folio_disk_reservation(struct bch_fs *, 149 149 struct bch_inode_info *,
+5 -2
fs/bcachefs/fs-io.c
··· 675 675 676 676 bch2_i_sectors_acct(c, inode, &quota_res, i_sectors_delta); 677 677 678 - drop_locks_do(trans, 679 - (bch2_mark_pagecache_reserved(inode, hole_start, iter.pos.offset), 0)); 678 + if (bch2_mark_pagecache_reserved(inode, &hole_start, 679 + iter.pos.offset, true)) 680 + drop_locks_do(trans, 681 + bch2_mark_pagecache_reserved(inode, &hole_start, 682 + iter.pos.offset, false)); 680 683 bkey_err: 681 684 bch2_quota_reservation_put(c, inode, &quota_res); 682 685 if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+5 -6
fs/bcachefs/fs-ioctl.c
··· 337 337 if (arg.flags & BCH_SUBVOL_SNAPSHOT_RO) 338 338 create_flags |= BCH_CREATE_SNAPSHOT_RO; 339 339 340 - /* why do we need this lock? */ 341 - down_read(&c->vfs_sb->s_umount); 342 - 343 - if (arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) 340 + if (arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) { 341 + /* sync_inodes_sb enforce s_umount is locked */ 342 + down_read(&c->vfs_sb->s_umount); 344 343 sync_inodes_sb(c->vfs_sb); 344 + up_read(&c->vfs_sb->s_umount); 345 + } 345 346 retry: 346 347 if (arg.src_ptr) { 347 348 error = user_path_at(arg.dirfd, ··· 426 425 goto retry; 427 426 } 428 427 err1: 429 - up_read(&c->vfs_sb->s_umount); 430 - 431 428 return error; 432 429 } 433 430
+20 -9
fs/bcachefs/inode.c
··· 506 506 static void __bch2_inode_unpacked_to_text(struct printbuf *out, 507 507 struct bch_inode_unpacked *inode) 508 508 { 509 - prt_printf(out, "mode=%o ", inode->bi_mode); 509 + printbuf_indent_add(out, 2); 510 + prt_printf(out, "mode=%o", inode->bi_mode); 511 + prt_newline(out); 510 512 511 513 prt_str(out, "flags="); 512 514 prt_bitflags(out, bch2_inode_flag_strs, inode->bi_flags & ((1U << 20) - 1)); 513 515 prt_printf(out, " (%x)", inode->bi_flags); 516 + prt_newline(out); 514 517 515 - prt_printf(out, " journal_seq=%llu bi_size=%llu bi_sectors=%llu bi_version=%llu", 516 - inode->bi_journal_seq, 517 - inode->bi_size, 518 - inode->bi_sectors, 519 - inode->bi_version); 518 + prt_printf(out, "journal_seq=%llu", inode->bi_journal_seq); 519 + prt_newline(out); 520 + 521 + prt_printf(out, "bi_size=%llu", inode->bi_size); 522 + prt_newline(out); 523 + 524 + prt_printf(out, "bi_sectors=%llu", inode->bi_sectors); 525 + prt_newline(out); 526 + 527 + prt_newline(out); 528 + prt_printf(out, "bi_version=%llu", inode->bi_version); 520 529 521 530 #define x(_name, _bits) \ 522 - prt_printf(out, " "#_name "=%llu", (u64) inode->_name); 531 + prt_printf(out, #_name "=%llu", (u64) inode->_name); \ 532 + prt_newline(out); 523 533 BCH_INODE_FIELDS_v3() 524 534 #undef x 535 + printbuf_indent_sub(out, 2); 525 536 } 526 537 527 538 void bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked *inode) ··· 598 587 } 599 588 } 600 589 601 - if (!(flags & BTREE_TRIGGER_TRANSACTIONAL) && (flags & BTREE_TRIGGER_INSERT)) { 590 + if ((flags & BTREE_TRIGGER_ATOMIC) && (flags & BTREE_TRIGGER_INSERT)) { 602 591 BUG_ON(!trans->journal_res.seq); 603 592 604 593 bkey_s_to_inode_v3(new).v->bi_journal_seq = cpu_to_le64(trans->journal_res.seq); ··· 608 597 struct bch_fs *c = trans->c; 609 598 610 599 percpu_down_read(&c->mark_lock); 611 - this_cpu_add(c->usage_gc->nr_inodes, nr); 600 + this_cpu_add(c->usage_gc->b.nr_inodes, nr); 612 601 percpu_up_read(&c->mark_lock); 613 602 } 614 603
+166
fs/bcachefs/inode_format.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _BCACHEFS_INODE_FORMAT_H 3 + #define _BCACHEFS_INODE_FORMAT_H 4 + 5 + #define BLOCKDEV_INODE_MAX 4096 6 + #define BCACHEFS_ROOT_INO 4096 7 + 8 + struct bch_inode { 9 + struct bch_val v; 10 + 11 + __le64 bi_hash_seed; 12 + __le32 bi_flags; 13 + __le16 bi_mode; 14 + __u8 fields[]; 15 + } __packed __aligned(8); 16 + 17 + struct bch_inode_v2 { 18 + struct bch_val v; 19 + 20 + __le64 bi_journal_seq; 21 + __le64 bi_hash_seed; 22 + __le64 bi_flags; 23 + __le16 bi_mode; 24 + __u8 fields[]; 25 + } __packed __aligned(8); 26 + 27 + struct bch_inode_v3 { 28 + struct bch_val v; 29 + 30 + __le64 bi_journal_seq; 31 + __le64 bi_hash_seed; 32 + __le64 bi_flags; 33 + __le64 bi_sectors; 34 + __le64 bi_size; 35 + __le64 bi_version; 36 + __u8 fields[]; 37 + } __packed __aligned(8); 38 + 39 + #define INODEv3_FIELDS_START_INITIAL 6 40 + #define INODEv3_FIELDS_START_CUR (offsetof(struct bch_inode_v3, fields) / sizeof(__u64)) 41 + 42 + struct bch_inode_generation { 43 + struct bch_val v; 44 + 45 + __le32 bi_generation; 46 + __le32 pad; 47 + } __packed __aligned(8); 48 + 49 + /* 50 + * bi_subvol and bi_parent_subvol are only set for subvolume roots: 51 + */ 52 + 53 + #define BCH_INODE_FIELDS_v2() \ 54 + x(bi_atime, 96) \ 55 + x(bi_ctime, 96) \ 56 + x(bi_mtime, 96) \ 57 + x(bi_otime, 96) \ 58 + x(bi_size, 64) \ 59 + x(bi_sectors, 64) \ 60 + x(bi_uid, 32) \ 61 + x(bi_gid, 32) \ 62 + x(bi_nlink, 32) \ 63 + x(bi_generation, 32) \ 64 + x(bi_dev, 32) \ 65 + x(bi_data_checksum, 8) \ 66 + x(bi_compression, 8) \ 67 + x(bi_project, 32) \ 68 + x(bi_background_compression, 8) \ 69 + x(bi_data_replicas, 8) \ 70 + x(bi_promote_target, 16) \ 71 + x(bi_foreground_target, 16) \ 72 + x(bi_background_target, 16) \ 73 + x(bi_erasure_code, 16) \ 74 + x(bi_fields_set, 16) \ 75 + x(bi_dir, 64) \ 76 + x(bi_dir_offset, 64) \ 77 + x(bi_subvol, 32) \ 78 + x(bi_parent_subvol, 32) 79 + 80 + #define BCH_INODE_FIELDS_v3() \ 81 + x(bi_atime, 96) \ 82 + x(bi_ctime, 96) \ 83 + x(bi_mtime, 96) \ 84 + x(bi_otime, 96) \ 85 + x(bi_uid, 32) \ 86 + x(bi_gid, 32) \ 87 + x(bi_nlink, 32) \ 88 + x(bi_generation, 32) \ 89 + x(bi_dev, 32) \ 90 + x(bi_data_checksum, 8) \ 91 + x(bi_compression, 8) \ 92 + x(bi_project, 32) \ 93 + x(bi_background_compression, 8) \ 94 + x(bi_data_replicas, 8) \ 95 + x(bi_promote_target, 16) \ 96 + x(bi_foreground_target, 16) \ 97 + x(bi_background_target, 16) \ 98 + x(bi_erasure_code, 16) \ 99 + x(bi_fields_set, 16) \ 100 + x(bi_dir, 64) \ 101 + x(bi_dir_offset, 64) \ 102 + x(bi_subvol, 32) \ 103 + x(bi_parent_subvol, 32) \ 104 + x(bi_nocow, 8) 105 + 106 + /* subset of BCH_INODE_FIELDS */ 107 + #define BCH_INODE_OPTS() \ 108 + x(data_checksum, 8) \ 109 + x(compression, 8) \ 110 + x(project, 32) \ 111 + x(background_compression, 8) \ 112 + x(data_replicas, 8) \ 113 + x(promote_target, 16) \ 114 + x(foreground_target, 16) \ 115 + x(background_target, 16) \ 116 + x(erasure_code, 16) \ 117 + x(nocow, 8) 118 + 119 + enum inode_opt_id { 120 + #define x(name, ...) \ 121 + Inode_opt_##name, 122 + BCH_INODE_OPTS() 123 + #undef x 124 + Inode_opt_nr, 125 + }; 126 + 127 + #define BCH_INODE_FLAGS() \ 128 + x(sync, 0) \ 129 + x(immutable, 1) \ 130 + x(append, 2) \ 131 + x(nodump, 3) \ 132 + x(noatime, 4) \ 133 + x(i_size_dirty, 5) \ 134 + x(i_sectors_dirty, 6) \ 135 + x(unlinked, 7) \ 136 + x(backptr_untrusted, 8) 137 + 138 + /* bits 20+ reserved for packed fields below: */ 139 + 140 + enum bch_inode_flags { 141 + #define x(t, n) BCH_INODE_##t = 1U << n, 142 + BCH_INODE_FLAGS() 143 + #undef x 144 + }; 145 + 146 + enum __bch_inode_flags { 147 + #define x(t, n) __BCH_INODE_##t = n, 148 + BCH_INODE_FLAGS() 149 + #undef x 150 + }; 151 + 152 + LE32_BITMASK(INODE_STR_HASH, struct bch_inode, bi_flags, 20, 24); 153 + LE32_BITMASK(INODE_NR_FIELDS, struct bch_inode, bi_flags, 24, 31); 154 + LE32_BITMASK(INODE_NEW_VARINT, struct bch_inode, bi_flags, 31, 32); 155 + 156 + LE64_BITMASK(INODEv2_STR_HASH, struct bch_inode_v2, bi_flags, 20, 24); 157 + LE64_BITMASK(INODEv2_NR_FIELDS, struct bch_inode_v2, bi_flags, 24, 31); 158 + 159 + LE64_BITMASK(INODEv3_STR_HASH, struct bch_inode_v3, bi_flags, 20, 24); 160 + LE64_BITMASK(INODEv3_NR_FIELDS, struct bch_inode_v3, bi_flags, 24, 31); 161 + 162 + LE64_BITMASK(INODEv3_FIELDS_START, 163 + struct bch_inode_v3, bi_flags, 31, 36); 164 + LE64_BITMASK(INODEv3_MODE, struct bch_inode_v3, bi_flags, 36, 52); 165 + 166 + #endif /* _BCACHEFS_INODE_FORMAT_H */
+1 -3
fs/bcachefs/io_misc.c
··· 442 442 443 443 op->v.pos = cpu_to_le64(insert ? bkey_start_offset(&delete.k) : delete.k.p.offset); 444 444 445 - ret = bch2_bkey_set_needs_rebalance(c, copy, 446 - opts.background_target, 447 - opts.background_compression) ?: 445 + ret = bch2_bkey_set_needs_rebalance(c, copy, &opts) ?: 448 446 bch2_btree_insert_trans(trans, BTREE_ID_extents, &delete, 0) ?: 449 447 bch2_btree_insert_trans(trans, BTREE_ID_extents, copy, 0) ?: 450 448 bch2_logged_op_update(trans, &op->k_i) ?:
+6 -7
fs/bcachefs/io_write.c
··· 362 362 bkey_start_pos(&sk.k->k), 363 363 BTREE_ITER_SLOTS|BTREE_ITER_INTENT); 364 364 365 - ret = bch2_bkey_set_needs_rebalance(c, sk.k, 366 - op->opts.background_target, 367 - op->opts.background_compression) ?: 365 + ret = bch2_bkey_set_needs_rebalance(c, sk.k, &op->opts) ?: 368 366 bch2_extent_update(trans, inum, &iter, sk.k, 369 367 &op->res, 370 368 op->new_i_size, &op->i_sectors_delta, ··· 1445 1447 op->flags |= BCH_WRITE_DONE; 1446 1448 1447 1449 if (ret < 0) { 1448 - bch_err_inum_offset_ratelimited(c, 1449 - op->pos.inode, 1450 - op->pos.offset << 9, 1451 - "%s(): error: %s", __func__, bch2_err_str(ret)); 1450 + if (!(op->flags & BCH_WRITE_ALLOC_NOWAIT)) 1451 + bch_err_inum_offset_ratelimited(c, 1452 + op->pos.inode, 1453 + op->pos.offset << 9, 1454 + "%s(): error: %s", __func__, bch2_err_str(ret)); 1452 1455 op->error = ret; 1453 1456 break; 1454 1457 }
+72 -39
fs/bcachefs/journal.c
··· 27 27 NULL 28 28 }; 29 29 30 + static void bch2_journal_buf_to_text(struct printbuf *out, struct journal *j, u64 seq) 31 + { 32 + union journal_res_state s = READ_ONCE(j->reservations); 33 + unsigned i = seq & JOURNAL_BUF_MASK; 34 + struct journal_buf *buf = j->buf + i; 35 + 36 + prt_printf(out, "seq:"); 37 + prt_tab(out); 38 + prt_printf(out, "%llu", seq); 39 + prt_newline(out); 40 + printbuf_indent_add(out, 2); 41 + 42 + prt_printf(out, "refcount:"); 43 + prt_tab(out); 44 + prt_printf(out, "%u", journal_state_count(s, i)); 45 + prt_newline(out); 46 + 47 + prt_printf(out, "size:"); 48 + prt_tab(out); 49 + prt_human_readable_u64(out, vstruct_bytes(buf->data)); 50 + prt_newline(out); 51 + 52 + prt_printf(out, "expires"); 53 + prt_tab(out); 54 + prt_printf(out, "%li jiffies", buf->expires - jiffies); 55 + prt_newline(out); 56 + 57 + printbuf_indent_sub(out, 2); 58 + } 59 + 60 + static void bch2_journal_bufs_to_text(struct printbuf *out, struct journal *j) 61 + { 62 + if (!out->nr_tabstops) 63 + printbuf_tabstop_push(out, 24); 64 + 65 + for (u64 seq = journal_last_unwritten_seq(j); 66 + seq <= journal_cur_seq(j); 67 + seq++) 68 + bch2_journal_buf_to_text(out, j, seq); 69 + } 70 + 30 71 static inline bool journal_seq_unwritten(struct journal *j, u64 seq) 31 72 { 32 73 return seq > j->seq_ondisk; ··· 197 156 * We don't close a journal_buf until the next journal_buf is finished writing, 198 157 * and can be opened again - this also initializes the next journal_buf: 199 158 */ 200 - static void __journal_entry_close(struct journal *j, unsigned closed_val) 159 + static void __journal_entry_close(struct journal *j, unsigned closed_val, bool trace) 201 160 { 202 161 struct bch_fs *c = container_of(j, struct bch_fs, journal); 203 162 struct journal_buf *buf = journal_cur_buf(j); ··· 226 185 /* Close out old buffer: */ 227 186 buf->data->u64s = cpu_to_le32(old.cur_entry_offset); 228 187 229 - trace_journal_entry_close(c, vstruct_bytes(buf->data)); 188 + if (trace_journal_entry_close_enabled() && trace) { 189 + struct printbuf pbuf = PRINTBUF; 190 + pbuf.atomic++; 191 + 192 + prt_str(&pbuf, "entry size: "); 193 + prt_human_readable_u64(&pbuf, vstruct_bytes(buf->data)); 194 + prt_newline(&pbuf); 195 + bch2_prt_task_backtrace(&pbuf, current, 1); 196 + trace_journal_entry_close(c, pbuf.buf); 197 + printbuf_exit(&pbuf); 198 + } 230 199 231 200 sectors = vstruct_blocks_plus(buf->data, c->block_bits, 232 201 buf->u64s_reserved) << c->block_bits; ··· 276 225 void bch2_journal_halt(struct journal *j) 277 226 { 278 227 spin_lock(&j->lock); 279 - __journal_entry_close(j, JOURNAL_ENTRY_ERROR_VAL); 228 + __journal_entry_close(j, JOURNAL_ENTRY_ERROR_VAL, true); 280 229 if (!j->err_seq) 281 230 j->err_seq = journal_cur_seq(j); 282 231 journal_wake(j); ··· 290 239 291 240 /* Don't close it yet if we already have a write in flight: */ 292 241 if (ret) 293 - __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL); 242 + __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, true); 294 243 else if (nr_unwritten_journal_entries(j)) { 295 244 struct journal_buf *buf = journal_cur_buf(j); 296 245 ··· 457 406 if (delta > 0) 458 407 mod_delayed_work(c->io_complete_wq, &j->write_work, delta); 459 408 else 460 - __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL); 409 + __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, true); 461 410 unlock: 462 411 spin_unlock(&j->lock); 463 412 } ··· 514 463 buf->buf_size < JOURNAL_ENTRY_SIZE_MAX) 515 464 j->buf_size_want = max(j->buf_size_want, buf->buf_size << 1); 516 465 517 - __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL); 466 + __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, false); 518 467 ret = journal_entry_open(j); 519 468 520 469 if (ret == JOURNAL_ERR_max_in_flight) { 521 470 track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight], 522 471 &j->max_in_flight_start, true); 523 - trace_and_count(c, journal_entry_full, c); 472 + if (trace_journal_entry_full_enabled()) { 473 + struct printbuf buf = PRINTBUF; 474 + buf.atomic++; 475 + 476 + bch2_journal_bufs_to_text(&buf, j); 477 + trace_journal_entry_full(c, buf.buf); 478 + printbuf_exit(&buf); 479 + } 480 + count_event(c, journal_entry_full); 524 481 } 525 482 unlock: 526 483 can_discard = j->can_discard; ··· 608 549 /* 609 550 * Not enough room in current journal entry, have to flush it: 610 551 */ 611 - __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL); 552 + __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, true); 612 553 } else { 613 554 journal_cur_buf(j)->u64s_reserved += d; 614 555 } ··· 665 606 struct journal_res res = { 0 }; 666 607 667 608 if (journal_entry_is_open(j)) 668 - __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL); 609 + __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, true); 669 610 670 611 spin_unlock(&j->lock); 671 612 ··· 845 786 846 787 if (buf->need_flush_to_write_buffer) { 847 788 if (seq == journal_cur_seq(j)) 848 - __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL); 789 + __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, true); 849 790 850 791 union journal_res_state s; 851 792 s.v = atomic64_read_acquire(&j->reservations.counter); ··· 1398 1339 } 1399 1340 1400 1341 prt_newline(out); 1401 - 1402 - for (u64 seq = journal_cur_seq(j); 1403 - seq >= journal_last_unwritten_seq(j); 1404 - --seq) { 1405 - unsigned i = seq & JOURNAL_BUF_MASK; 1406 - 1407 - prt_printf(out, "unwritten entry:"); 1408 - prt_tab(out); 1409 - prt_printf(out, "%llu", seq); 1410 - prt_newline(out); 1411 - printbuf_indent_add(out, 2); 1412 - 1413 - prt_printf(out, "refcount:"); 1414 - prt_tab(out); 1415 - prt_printf(out, "%u", journal_state_count(s, i)); 1416 - prt_newline(out); 1417 - 1418 - prt_printf(out, "sectors:"); 1419 - prt_tab(out); 1420 - prt_printf(out, "%u", j->buf[i].sectors); 1421 - prt_newline(out); 1422 - 1423 - prt_printf(out, "expires"); 1424 - prt_tab(out); 1425 - prt_printf(out, "%li jiffies", j->buf[i].expires - jiffies); 1426 - prt_newline(out); 1427 - 1428 - printbuf_indent_sub(out, 2); 1429 - } 1342 + prt_printf(out, "unwritten entries:"); 1343 + prt_newline(out); 1344 + bch2_journal_bufs_to_text(out, j); 1430 1345 1431 1346 prt_printf(out, 1432 1347 "replay done:\t\t%i\n",
+1 -4
fs/bcachefs/journal_io.c
··· 683 683 prt_printf(out, "dev=%u", le32_to_cpu(u->dev)); 684 684 685 685 for (i = 0; i < nr_types; i++) { 686 - if (i < BCH_DATA_NR) 687 - prt_printf(out, " %s", bch2_data_types[i]); 688 - else 689 - prt_printf(out, " (unknown data type %u)", i); 686 + bch2_prt_data_type(out, i); 690 687 prt_printf(out, ": buckets=%llu sectors=%llu fragmented=%llu", 691 688 le64_to_cpu(u->d[i].buckets), 692 689 le64_to_cpu(u->d[i].sectors),
+30
fs/bcachefs/logged_ops_format.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _BCACHEFS_LOGGED_OPS_FORMAT_H 3 + #define _BCACHEFS_LOGGED_OPS_FORMAT_H 4 + 5 + struct bch_logged_op_truncate { 6 + struct bch_val v; 7 + __le32 subvol; 8 + __le32 pad; 9 + __le64 inum; 10 + __le64 new_i_size; 11 + }; 12 + 13 + enum logged_op_finsert_state { 14 + LOGGED_OP_FINSERT_start, 15 + LOGGED_OP_FINSERT_shift_extents, 16 + LOGGED_OP_FINSERT_finish, 17 + }; 18 + 19 + struct bch_logged_op_finsert { 20 + struct bch_val v; 21 + __u8 state; 22 + __u8 pad[3]; 23 + __le32 subvol; 24 + __le64 inum; 25 + __le64 dst_offset; 26 + __le64 src_offset; 27 + __le64 pos; 28 + }; 29 + 30 + #endif /* _BCACHEFS_LOGGED_OPS_FORMAT_H */
+56 -9
fs/bcachefs/move.c
··· 6 6 #include "backpointers.h" 7 7 #include "bkey_buf.h" 8 8 #include "btree_gc.h" 9 + #include "btree_io.h" 9 10 #include "btree_update.h" 10 11 #include "btree_update_interior.h" 11 12 #include "btree_write_buffer.h" 13 + #include "compress.h" 12 14 #include "disk_groups.h" 13 15 #include "ec.h" 14 16 #include "errcode.h" ··· 36 34 NULL 37 35 }; 38 36 39 - static void trace_move_extent2(struct bch_fs *c, struct bkey_s_c k) 37 + static void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c, 38 + struct bch_io_opts *io_opts, 39 + struct data_update_opts *data_opts) 40 + { 41 + printbuf_tabstop_push(out, 20); 42 + prt_str(out, "rewrite ptrs:"); 43 + prt_tab(out); 44 + bch2_prt_u64_base2(out, data_opts->rewrite_ptrs); 45 + prt_newline(out); 46 + 47 + prt_str(out, "kill ptrs: "); 48 + prt_tab(out); 49 + bch2_prt_u64_base2(out, data_opts->kill_ptrs); 50 + prt_newline(out); 51 + 52 + prt_str(out, "target: "); 53 + prt_tab(out); 54 + bch2_target_to_text(out, c, data_opts->target); 55 + prt_newline(out); 56 + 57 + prt_str(out, "compression: "); 58 + prt_tab(out); 59 + bch2_compression_opt_to_text(out, background_compression(*io_opts)); 60 + prt_newline(out); 61 + 62 + prt_str(out, "extra replicas: "); 63 + prt_tab(out); 64 + prt_u64(out, data_opts->extra_replicas); 65 + } 66 + 67 + static void trace_move_extent2(struct bch_fs *c, struct bkey_s_c k, 68 + struct bch_io_opts *io_opts, 69 + struct data_update_opts *data_opts) 40 70 { 41 71 if (trace_move_extent_enabled()) { 42 72 struct printbuf buf = PRINTBUF; 43 73 44 74 bch2_bkey_val_to_text(&buf, c, k); 75 + prt_newline(&buf); 76 + bch2_data_update_opts_to_text(&buf, c, io_opts, data_opts); 45 77 trace_move_extent(c, buf.buf); 46 78 printbuf_exit(&buf); 47 79 } ··· 145 109 if (unlikely(io->rbio.bio.bi_status || io->rbio.hole)) { 146 110 move_free(io); 147 111 return; 112 + } 113 + 114 + if (trace_move_extent_write_enabled()) { 115 + struct bch_fs *c = io->write.op.c; 116 + struct printbuf buf = PRINTBUF; 117 + 118 + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(io->write.k.k)); 119 + trace_move_extent_write(c, buf.buf); 120 + printbuf_exit(&buf); 148 121 } 149 122 150 123 closure_get(&io->write.ctxt->cl); ··· 286 241 unsigned sectors = k.k->size, pages; 287 242 int ret = -ENOMEM; 288 243 244 + trace_move_extent2(c, k, &io_opts, &data_opts); 245 + 289 246 if (ctxt->stats) 290 247 ctxt->stats->pos = BBPOS(iter->btree_id, iter->pos); 291 - trace_move_extent2(c, k); 292 248 293 249 bch2_data_update_opts_normalize(k, &data_opts); 294 250 ··· 805 759 if (!b) 806 760 goto next; 807 761 762 + unsigned sectors = btree_ptr_sectors_written(&b->key); 763 + 808 764 ret = bch2_btree_node_rewrite(trans, &iter, b, 0); 809 765 bch2_trans_iter_exit(trans, &iter); 810 766 ··· 816 768 goto err; 817 769 818 770 if (ctxt->rate) 819 - bch2_ratelimit_increment(ctxt->rate, 820 - c->opts.btree_node_size >> 9); 771 + bch2_ratelimit_increment(ctxt->rate, sectors); 821 772 if (ctxt->stats) { 822 - atomic64_add(c->opts.btree_node_size >> 9, &ctxt->stats->sectors_seen); 823 - atomic64_add(c->opts.btree_node_size >> 9, &ctxt->stats->sectors_moved); 773 + atomic64_add(sectors, &ctxt->stats->sectors_seen); 774 + atomic64_add(sectors, &ctxt->stats->sectors_moved); 824 775 } 825 776 } 826 777 next: ··· 1130 1083 1131 1084 void bch2_move_stats_to_text(struct printbuf *out, struct bch_move_stats *stats) 1132 1085 { 1133 - prt_printf(out, "%s: data type=%s pos=", 1134 - stats->name, 1135 - bch2_data_types[stats->data_type]); 1086 + prt_printf(out, "%s: data type==", stats->name); 1087 + bch2_prt_data_type(out, stats->data_type); 1088 + prt_str(out, " pos="); 1136 1089 bch2_bbpos_to_text(out, stats->pos); 1137 1090 prt_newline(out); 1138 1091 printbuf_indent_add(out, 2);
+2 -2
fs/bcachefs/opts.c
··· 52 52 NULL 53 53 }; 54 54 55 - const char * const bch2_compression_types[] = { 55 + const char * const __bch2_compression_types[] = { 56 56 BCH_COMPRESSION_TYPES() 57 57 NULL 58 58 }; ··· 72 72 NULL 73 73 }; 74 74 75 - const char * const bch2_data_types[] = { 75 + const char * const __bch2_data_types[] = { 76 76 BCH_DATA_TYPES() 77 77 NULL 78 78 };
+7 -2
fs/bcachefs/opts.h
··· 18 18 extern const char * const __bch2_btree_ids[]; 19 19 extern const char * const bch2_csum_types[]; 20 20 extern const char * const bch2_csum_opts[]; 21 - extern const char * const bch2_compression_types[]; 21 + extern const char * const __bch2_compression_types[]; 22 22 extern const char * const bch2_compression_opts[]; 23 23 extern const char * const bch2_str_hash_types[]; 24 24 extern const char * const bch2_str_hash_opts[]; 25 - extern const char * const bch2_data_types[]; 25 + extern const char * const __bch2_data_types[]; 26 26 extern const char * const bch2_member_states[]; 27 27 extern const char * const bch2_jset_entry_types[]; 28 28 extern const char * const bch2_fs_usage_types[]; ··· 563 563 BCH_INODE_OPTS() 564 564 #undef x 565 565 }; 566 + 567 + static inline unsigned background_compression(struct bch_io_opts opts) 568 + { 569 + return opts.background_compression ?: opts.compression; 570 + } 566 571 567 572 struct bch_io_opts bch2_opts_to_inode_opts(struct bch_opts); 568 573 bool bch2_opt_is_inode_opt(enum bch_opt_id);
+47
fs/bcachefs/quota_format.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _BCACHEFS_QUOTA_FORMAT_H 3 + #define _BCACHEFS_QUOTA_FORMAT_H 4 + 5 + /* KEY_TYPE_quota: */ 6 + 7 + enum quota_types { 8 + QTYP_USR = 0, 9 + QTYP_GRP = 1, 10 + QTYP_PRJ = 2, 11 + QTYP_NR = 3, 12 + }; 13 + 14 + enum quota_counters { 15 + Q_SPC = 0, 16 + Q_INO = 1, 17 + Q_COUNTERS = 2, 18 + }; 19 + 20 + struct bch_quota_counter { 21 + __le64 hardlimit; 22 + __le64 softlimit; 23 + }; 24 + 25 + struct bch_quota { 26 + struct bch_val v; 27 + struct bch_quota_counter c[Q_COUNTERS]; 28 + } __packed __aligned(8); 29 + 30 + /* BCH_SB_FIELD_quota: */ 31 + 32 + struct bch_sb_quota_counter { 33 + __le32 timelimit; 34 + __le32 warnlimit; 35 + }; 36 + 37 + struct bch_sb_quota_type { 38 + __le64 flags; 39 + struct bch_sb_quota_counter c[Q_COUNTERS]; 40 + }; 41 + 42 + struct bch_sb_field_quota { 43 + struct bch_sb_field field; 44 + struct bch_sb_quota_type q[QTYP_NR]; 45 + } __packed __aligned(8); 46 + 47 + #endif /* _BCACHEFS_QUOTA_FORMAT_H */
+5 -8
fs/bcachefs/rebalance.c
··· 177 177 prt_str(&buf, "target="); 178 178 bch2_target_to_text(&buf, c, r->target); 179 179 prt_str(&buf, " compression="); 180 - struct bch_compression_opt opt = __bch2_compression_decode(r->compression); 181 - prt_str(&buf, bch2_compression_opts[opt.type]); 180 + bch2_compression_opt_to_text(&buf, r->compression); 182 181 prt_str(&buf, " "); 183 182 bch2_bkey_val_to_text(&buf, c, k); 184 183 ··· 253 254 254 255 if (k.k->p.inode) { 255 256 target = io_opts->background_target; 256 - compression = io_opts->background_compression ?: io_opts->compression; 257 + compression = background_compression(*io_opts); 257 258 } else { 258 259 const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k); 259 260 260 261 target = r ? r->target : io_opts->background_target; 261 - compression = r ? r->compression : 262 - (io_opts->background_compression ?: io_opts->compression); 262 + compression = r ? r->compression : background_compression(*io_opts); 263 263 } 264 264 265 265 data_opts->rewrite_ptrs = bch2_bkey_ptrs_need_rebalance(c, k, target, compression); ··· 369 371 !kthread_should_stop() && 370 372 !atomic64_read(&r->work_stats.sectors_seen) && 371 373 !atomic64_read(&r->scan_stats.sectors_seen)) { 374 + bch2_moving_ctxt_flush_all(ctxt); 372 375 bch2_trans_unlock_long(trans); 373 376 rebalance_wait(c); 374 377 } ··· 384 385 struct bch_fs *c = arg; 385 386 struct bch_fs_rebalance *r = &c->rebalance; 386 387 struct moving_context ctxt; 387 - int ret; 388 388 389 389 set_freezable(); 390 390 ··· 391 393 writepoint_ptr(&c->rebalance_write_point), 392 394 true); 393 395 394 - while (!kthread_should_stop() && 395 - !(ret = do_rebalance(&ctxt))) 396 + while (!kthread_should_stop() && !do_rebalance(&ctxt)) 396 397 ; 397 398 398 399 bch2_moving_ctxt_exit(&ctxt);
+1 -1
fs/bcachefs/recovery.c
··· 280 280 le64_to_cpu(u->v); 281 281 break; 282 282 case BCH_FS_USAGE_inodes: 283 - c->usage_base->nr_inodes = le64_to_cpu(u->v); 283 + c->usage_base->b.nr_inodes = le64_to_cpu(u->v); 284 284 break; 285 285 case BCH_FS_USAGE_key_version: 286 286 atomic64_set(&c->key_version,
+13 -8
fs/bcachefs/reflink.c
··· 292 292 } 293 293 } 294 294 295 - int bch2_trans_mark_reflink_v(struct btree_trans *trans, 296 - enum btree_id btree_id, unsigned level, 297 - struct bkey_s_c old, struct bkey_s new, 298 - unsigned flags) 295 + int bch2_trigger_reflink_v(struct btree_trans *trans, 296 + enum btree_id btree_id, unsigned level, 297 + struct bkey_s_c old, struct bkey_s new, 298 + unsigned flags) 299 299 { 300 300 if ((flags & BTREE_TRIGGER_TRANSACTIONAL) && 301 301 (flags & BTREE_TRIGGER_INSERT)) ··· 324 324 min(datalen, 32U), d.v->data); 325 325 } 326 326 327 - int bch2_trans_mark_indirect_inline_data(struct btree_trans *trans, 327 + int bch2_trigger_indirect_inline_data(struct btree_trans *trans, 328 328 enum btree_id btree_id, unsigned level, 329 329 struct bkey_s_c old, struct bkey_s new, 330 330 unsigned flags) ··· 486 486 487 487 bch2_btree_iter_set_snapshot(&dst_iter, dst_snapshot); 488 488 489 + if (dst_inum.inum < src_inum.inum) { 490 + /* Avoid some lock cycle transaction restarts */ 491 + ret = bch2_btree_iter_traverse(&dst_iter); 492 + if (ret) 493 + continue; 494 + } 495 + 489 496 dst_done = dst_iter.pos.offset - dst_start.offset; 490 497 src_want = POS(src_start.inode, src_start.offset + dst_done); 491 498 bch2_btree_iter_set_pos(&src_iter, src_want); ··· 545 538 min(src_k.k->p.offset - src_want.offset, 546 539 dst_end.offset - dst_iter.pos.offset)); 547 540 548 - ret = bch2_bkey_set_needs_rebalance(c, new_dst.k, 549 - opts.background_target, 550 - opts.background_compression) ?: 541 + ret = bch2_bkey_set_needs_rebalance(c, new_dst.k, &opts) ?: 551 542 bch2_extent_update(trans, dst_inum, &dst_iter, 552 543 new_dst.k, &disk_res, 553 544 new_i_size, i_sectors_delta,
+4 -4
fs/bcachefs/reflink.h
··· 24 24 enum bkey_invalid_flags, struct printbuf *); 25 25 void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *, 26 26 struct bkey_s_c); 27 - int bch2_trans_mark_reflink_v(struct btree_trans *, enum btree_id, unsigned, 27 + int bch2_trigger_reflink_v(struct btree_trans *, enum btree_id, unsigned, 28 28 struct bkey_s_c, struct bkey_s, unsigned); 29 29 30 30 #define bch2_bkey_ops_reflink_v ((struct bkey_ops) { \ 31 31 .key_invalid = bch2_reflink_v_invalid, \ 32 32 .val_to_text = bch2_reflink_v_to_text, \ 33 33 .swab = bch2_ptr_swab, \ 34 - .trigger = bch2_trans_mark_reflink_v, \ 34 + .trigger = bch2_trigger_reflink_v, \ 35 35 .min_val_size = 8, \ 36 36 }) 37 37 ··· 39 39 enum bkey_invalid_flags, struct printbuf *); 40 40 void bch2_indirect_inline_data_to_text(struct printbuf *, 41 41 struct bch_fs *, struct bkey_s_c); 42 - int bch2_trans_mark_indirect_inline_data(struct btree_trans *, 42 + int bch2_trigger_indirect_inline_data(struct btree_trans *, 43 43 enum btree_id, unsigned, 44 44 struct bkey_s_c, struct bkey_s, 45 45 unsigned); ··· 47 47 #define bch2_bkey_ops_indirect_inline_data ((struct bkey_ops) { \ 48 48 .key_invalid = bch2_indirect_inline_data_invalid, \ 49 49 .val_to_text = bch2_indirect_inline_data_to_text, \ 50 - .trigger = bch2_trans_mark_indirect_inline_data, \ 50 + .trigger = bch2_trigger_indirect_inline_data, \ 51 51 .min_val_size = 8, \ 52 52 }) 53 53
+12 -16
fs/bcachefs/replicas.c
··· 9 9 static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *, 10 10 struct bch_replicas_cpu *); 11 11 12 + /* Some (buggy!) compilers don't allow memcmp to be passed as a pointer */ 13 + static int bch2_memcmp(const void *l, const void *r, size_t size) 14 + { 15 + return memcmp(l, r, size); 16 + } 17 + 12 18 /* Replicas tracking - in memory: */ 13 19 14 20 static void verify_replicas_entry(struct bch_replicas_entry_v1 *e) ··· 39 33 40 34 static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r) 41 35 { 42 - eytzinger0_sort(r->entries, r->nr, r->entry_size, memcmp, NULL); 36 + eytzinger0_sort(r->entries, r->nr, r->entry_size, bch2_memcmp, NULL); 43 37 } 44 38 45 39 static void bch2_replicas_entry_v0_to_text(struct printbuf *out, 46 40 struct bch_replicas_entry_v0 *e) 47 41 { 48 - unsigned i; 49 - 50 - if (e->data_type < BCH_DATA_NR) 51 - prt_printf(out, "%s", bch2_data_types[e->data_type]); 52 - else 53 - prt_printf(out, "(invalid data type %u)", e->data_type); 42 + bch2_prt_data_type(out, e->data_type); 54 43 55 44 prt_printf(out, ": %u [", e->nr_devs); 56 - for (i = 0; i < e->nr_devs; i++) 45 + for (unsigned i = 0; i < e->nr_devs; i++) 57 46 prt_printf(out, i ? " %u" : "%u", e->devs[i]); 58 47 prt_printf(out, "]"); 59 48 } ··· 56 55 void bch2_replicas_entry_to_text(struct printbuf *out, 57 56 struct bch_replicas_entry_v1 *e) 58 57 { 59 - unsigned i; 60 - 61 - if (e->data_type < BCH_DATA_NR) 62 - prt_printf(out, "%s", bch2_data_types[e->data_type]); 63 - else 64 - prt_printf(out, "(invalid data type %u)", e->data_type); 58 + bch2_prt_data_type(out, e->data_type); 65 59 66 60 prt_printf(out, ": %u/%u [", e->nr_required, e->nr_devs); 67 - for (i = 0; i < e->nr_devs; i++) 61 + for (unsigned i = 0; i < e->nr_devs; i++) 68 62 prt_printf(out, i ? " %u" : "%u", e->devs[i]); 69 63 prt_printf(out, "]"); 70 64 } ··· 827 831 sort_cmp_size(cpu_r->entries, 828 832 cpu_r->nr, 829 833 cpu_r->entry_size, 830 - memcmp, NULL); 834 + bch2_memcmp, NULL); 831 835 832 836 for (i = 0; i < cpu_r->nr; i++) { 833 837 struct bch_replicas_entry_v1 *e =
+1 -1
fs/bcachefs/sb-clean.c
··· 207 207 208 208 u->entry.type = BCH_JSET_ENTRY_usage; 209 209 u->entry.btree_id = BCH_FS_USAGE_inodes; 210 - u->v = cpu_to_le64(c->usage_base->nr_inodes); 210 + u->v = cpu_to_le64(c->usage_base->b.nr_inodes); 211 211 } 212 212 213 213 {
+98
fs/bcachefs/sb-counters_format.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _BCACHEFS_SB_COUNTERS_FORMAT_H 3 + #define _BCACHEFS_SB_COUNTERS_FORMAT_H 4 + 5 + #define BCH_PERSISTENT_COUNTERS() \ 6 + x(io_read, 0) \ 7 + x(io_write, 1) \ 8 + x(io_move, 2) \ 9 + x(bucket_invalidate, 3) \ 10 + x(bucket_discard, 4) \ 11 + x(bucket_alloc, 5) \ 12 + x(bucket_alloc_fail, 6) \ 13 + x(btree_cache_scan, 7) \ 14 + x(btree_cache_reap, 8) \ 15 + x(btree_cache_cannibalize, 9) \ 16 + x(btree_cache_cannibalize_lock, 10) \ 17 + x(btree_cache_cannibalize_lock_fail, 11) \ 18 + x(btree_cache_cannibalize_unlock, 12) \ 19 + x(btree_node_write, 13) \ 20 + x(btree_node_read, 14) \ 21 + x(btree_node_compact, 15) \ 22 + x(btree_node_merge, 16) \ 23 + x(btree_node_split, 17) \ 24 + x(btree_node_rewrite, 18) \ 25 + x(btree_node_alloc, 19) \ 26 + x(btree_node_free, 20) \ 27 + x(btree_node_set_root, 21) \ 28 + x(btree_path_relock_fail, 22) \ 29 + x(btree_path_upgrade_fail, 23) \ 30 + x(btree_reserve_get_fail, 24) \ 31 + x(journal_entry_full, 25) \ 32 + x(journal_full, 26) \ 33 + x(journal_reclaim_finish, 27) \ 34 + x(journal_reclaim_start, 28) \ 35 + x(journal_write, 29) \ 36 + x(read_promote, 30) \ 37 + x(read_bounce, 31) \ 38 + x(read_split, 33) \ 39 + x(read_retry, 32) \ 40 + x(read_reuse_race, 34) \ 41 + x(move_extent_read, 35) \ 42 + x(move_extent_write, 36) \ 43 + x(move_extent_finish, 37) \ 44 + x(move_extent_fail, 38) \ 45 + x(move_extent_start_fail, 39) \ 46 + x(copygc, 40) \ 47 + x(copygc_wait, 41) \ 48 + x(gc_gens_end, 42) \ 49 + x(gc_gens_start, 43) \ 50 + x(trans_blocked_journal_reclaim, 44) \ 51 + x(trans_restart_btree_node_reused, 45) \ 52 + x(trans_restart_btree_node_split, 46) \ 53 + x(trans_restart_fault_inject, 47) \ 54 + x(trans_restart_iter_upgrade, 48) \ 55 + x(trans_restart_journal_preres_get, 49) \ 56 + x(trans_restart_journal_reclaim, 50) \ 57 + x(trans_restart_journal_res_get, 51) \ 58 + x(trans_restart_key_cache_key_realloced, 52) \ 59 + x(trans_restart_key_cache_raced, 53) \ 60 + x(trans_restart_mark_replicas, 54) \ 61 + x(trans_restart_mem_realloced, 55) \ 62 + x(trans_restart_memory_allocation_failure, 56) \ 63 + x(trans_restart_relock, 57) \ 64 + x(trans_restart_relock_after_fill, 58) \ 65 + x(trans_restart_relock_key_cache_fill, 59) \ 66 + x(trans_restart_relock_next_node, 60) \ 67 + x(trans_restart_relock_parent_for_fill, 61) \ 68 + x(trans_restart_relock_path, 62) \ 69 + x(trans_restart_relock_path_intent, 63) \ 70 + x(trans_restart_too_many_iters, 64) \ 71 + x(trans_restart_traverse, 65) \ 72 + x(trans_restart_upgrade, 66) \ 73 + x(trans_restart_would_deadlock, 67) \ 74 + x(trans_restart_would_deadlock_write, 68) \ 75 + x(trans_restart_injected, 69) \ 76 + x(trans_restart_key_cache_upgrade, 70) \ 77 + x(trans_traverse_all, 71) \ 78 + x(transaction_commit, 72) \ 79 + x(write_super, 73) \ 80 + x(trans_restart_would_deadlock_recursion_limit, 74) \ 81 + x(trans_restart_write_buffer_flush, 75) \ 82 + x(trans_restart_split_race, 76) \ 83 + x(write_buffer_flush_slowpath, 77) \ 84 + x(write_buffer_flush_sync, 78) 85 + 86 + enum bch_persistent_counters { 87 + #define x(t, n, ...) BCH_COUNTER_##t, 88 + BCH_PERSISTENT_COUNTERS() 89 + #undef x 90 + BCH_COUNTER_NR 91 + }; 92 + 93 + struct bch_sb_field_counters { 94 + struct bch_sb_field field; 95 + __le64 d[]; 96 + }; 97 + 98 + #endif /* _BCACHEFS_SB_COUNTERS_FORMAT_H */
+2 -2
fs/bcachefs/sb-members.c
··· 251 251 prt_printf(out, "Data allowed:"); 252 252 prt_tab(out); 253 253 if (BCH_MEMBER_DATA_ALLOWED(&m)) 254 - prt_bitflags(out, bch2_data_types, BCH_MEMBER_DATA_ALLOWED(&m)); 254 + prt_bitflags(out, __bch2_data_types, BCH_MEMBER_DATA_ALLOWED(&m)); 255 255 else 256 256 prt_printf(out, "(none)"); 257 257 prt_newline(out); ··· 259 259 prt_printf(out, "Has data:"); 260 260 prt_tab(out); 261 261 if (data_have) 262 - prt_bitflags(out, bch2_data_types, data_have); 262 + prt_bitflags(out, __bch2_data_types, data_have); 263 263 else 264 264 prt_printf(out, "(none)"); 265 265 prt_newline(out);
+3 -1
fs/bcachefs/snapshot.c
··· 1053 1053 n->v.subvol = cpu_to_le32(snapshot_subvols[i]); 1054 1054 n->v.tree = cpu_to_le32(tree); 1055 1055 n->v.depth = cpu_to_le32(depth); 1056 + n->v.btime.lo = cpu_to_le64(bch2_current_time(c)); 1057 + n->v.btime.hi = 0; 1056 1058 1057 1059 for (j = 0; j < ARRAY_SIZE(n->v.skip); j++) 1058 1060 n->v.skip[j] = cpu_to_le32(bch2_snapshot_skiplist_get(c, parent)); ··· 1683 1681 1684 1682 void bch2_fs_snapshots_exit(struct bch_fs *c) 1685 1683 { 1686 - kfree(rcu_dereference_protected(c->snapshots, true)); 1684 + kvfree(rcu_dereference_protected(c->snapshots, true)); 1687 1685 }
+36
fs/bcachefs/snapshot_format.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _BCACHEFS_SNAPSHOT_FORMAT_H 3 + #define _BCACHEFS_SNAPSHOT_FORMAT_H 4 + 5 + struct bch_snapshot { 6 + struct bch_val v; 7 + __le32 flags; 8 + __le32 parent; 9 + __le32 children[2]; 10 + __le32 subvol; 11 + /* corresponds to a bch_snapshot_tree in BTREE_ID_snapshot_trees */ 12 + __le32 tree; 13 + __le32 depth; 14 + __le32 skip[3]; 15 + bch_le128 btime; 16 + }; 17 + 18 + LE32_BITMASK(BCH_SNAPSHOT_DELETED, struct bch_snapshot, flags, 0, 1) 19 + 20 + /* True if a subvolume points to this snapshot node: */ 21 + LE32_BITMASK(BCH_SNAPSHOT_SUBVOL, struct bch_snapshot, flags, 1, 2) 22 + 23 + /* 24 + * Snapshot trees: 25 + * 26 + * The snapshot_trees btree gives us persistent indentifier for each tree of 27 + * bch_snapshot nodes, and allow us to record and easily find the root/master 28 + * subvolume that other snapshots were created from: 29 + */ 30 + struct bch_snapshot_tree { 31 + struct bch_val v; 32 + __le32 master_subvol; 33 + __le32 root_snapshot; 34 + }; 35 + 36 + #endif /* _BCACHEFS_SNAPSHOT_FORMAT_H */
+35
fs/bcachefs/subvolume_format.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _BCACHEFS_SUBVOLUME_FORMAT_H 3 + #define _BCACHEFS_SUBVOLUME_FORMAT_H 4 + 5 + #define SUBVOL_POS_MIN POS(0, 1) 6 + #define SUBVOL_POS_MAX POS(0, S32_MAX) 7 + #define BCACHEFS_ROOT_SUBVOL 1 8 + 9 + struct bch_subvolume { 10 + struct bch_val v; 11 + __le32 flags; 12 + __le32 snapshot; 13 + __le64 inode; 14 + /* 15 + * Snapshot subvolumes form a tree, separate from the snapshot nodes 16 + * tree - if this subvolume is a snapshot, this is the ID of the 17 + * subvolume it was created from: 18 + * 19 + * This is _not_ necessarily the subvolume of the directory containing 20 + * this subvolume: 21 + */ 22 + __le32 parent; 23 + __le32 pad; 24 + bch_le128 otime; 25 + }; 26 + 27 + LE32_BITMASK(BCH_SUBVOLUME_RO, struct bch_subvolume, flags, 0, 1) 28 + /* 29 + * We need to know whether a subvolume is a snapshot so we can know whether we 30 + * can delete it (or whether it should just be rm -rf'd) 31 + */ 32 + LE32_BITMASK(BCH_SUBVOLUME_SNAP, struct bch_subvolume, flags, 1, 2) 33 + LE32_BITMASK(BCH_SUBVOLUME_UNLINKED, struct bch_subvolume, flags, 2, 3) 34 + 35 + #endif /* _BCACHEFS_SUBVOLUME_FORMAT_H */
+4 -2
fs/bcachefs/super-io.c
··· 2 2 3 3 #include "bcachefs.h" 4 4 #include "checksum.h" 5 - #include "counters.h" 6 5 #include "disk_groups.h" 7 6 #include "ec.h" 8 7 #include "error.h" ··· 12 13 #include "replicas.h" 13 14 #include "quota.h" 14 15 #include "sb-clean.h" 16 + #include "sb-counters.h" 15 17 #include "sb-downgrade.h" 16 18 #include "sb-errors.h" 17 19 #include "sb-members.h" ··· 1321 1321 1322 1322 prt_printf(out, "Superblock size:"); 1323 1323 prt_tab(out); 1324 - prt_printf(out, "%zu", vstruct_bytes(sb)); 1324 + prt_units_u64(out, vstruct_bytes(sb)); 1325 + prt_str(out, "/"); 1326 + prt_units_u64(out, 512ULL << sb->layout.sb_max_size_bits); 1325 1327 prt_newline(out); 1326 1328 1327 1329 prt_printf(out, "Clean:");
+3 -3
fs/bcachefs/super.c
··· 23 23 #include "checksum.h" 24 24 #include "clock.h" 25 25 #include "compress.h" 26 - #include "counters.h" 27 26 #include "debug.h" 28 27 #include "disk_groups.h" 29 28 #include "ec.h" ··· 48 49 #include "recovery.h" 49 50 #include "replicas.h" 50 51 #include "sb-clean.h" 52 + #include "sb-counters.h" 51 53 #include "sb-errors.h" 52 54 #include "sb-members.h" 53 55 #include "snapshot.h" ··· 883 883 !(c->pcpu = alloc_percpu(struct bch_fs_pcpu)) || 884 884 !(c->online_reserved = alloc_percpu(u64)) || 885 885 mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1, 886 - btree_bytes(c)) || 886 + c->opts.btree_node_size) || 887 887 mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048) || 888 888 !(c->unused_inode_hints = kcalloc(1U << c->inode_shard_bits, 889 889 sizeof(u64), GFP_KERNEL))) { ··· 1625 1625 if (data) { 1626 1626 struct printbuf data_has = PRINTBUF; 1627 1627 1628 - prt_bitflags(&data_has, bch2_data_types, data); 1628 + prt_bitflags(&data_has, __bch2_data_types, data); 1629 1629 bch_err(ca, "Remove failed, still has data (%s)", data_has.buf); 1630 1630 printbuf_exit(&data_has); 1631 1631 ret = -EBUSY;
+9 -6
fs/bcachefs/sysfs.c
··· 21 21 #include "btree_gc.h" 22 22 #include "buckets.h" 23 23 #include "clock.h" 24 + #include "compress.h" 24 25 #include "disk_groups.h" 25 26 #include "ec.h" 26 27 #include "inode.h" ··· 248 247 249 248 mutex_lock(&c->btree_cache.lock); 250 249 list_for_each_entry(b, &c->btree_cache.live, list) 251 - ret += btree_bytes(c); 250 + ret += btree_buf_bytes(b); 252 251 253 252 mutex_unlock(&c->btree_cache.lock); 254 253 return ret; ··· 331 330 prt_newline(out); 332 331 333 332 for (unsigned i = 0; i < ARRAY_SIZE(s); i++) { 334 - prt_str(out, bch2_compression_types[i]); 333 + bch2_prt_compression_type(out, i); 335 334 prt_tab(out); 336 335 337 336 prt_human_readable_u64(out, s[i].sectors_compressed << 9); ··· 726 725 bch2_opt_set_sb(c, opt, v); 727 726 bch2_opt_set_by_id(&c->opts, id, v); 728 727 729 - if ((id == Opt_background_target || 730 - id == Opt_background_compression) && v) 728 + if (v && 729 + (id == Opt_background_target || 730 + id == Opt_background_compression || 731 + (id == Opt_compression && !c->opts.background_compression))) 731 732 bch2_set_rebalance_needs_scan(c, 0); 732 733 733 734 ret = size; ··· 886 883 887 884 for (i = 1; i < BCH_DATA_NR; i++) 888 885 prt_printf(out, "%-12s:%12llu\n", 889 - bch2_data_types[i], 886 + bch2_data_type_str(i), 890 887 percpu_u64_get(&ca->io_done->sectors[rw][i]) << 9); 891 888 } 892 889 } ··· 911 908 } 912 909 913 910 if (attr == &sysfs_has_data) { 914 - prt_bitflags(out, bch2_data_types, bch2_dev_has_data(c, ca)); 911 + prt_bitflags(out, __bch2_data_types, bch2_dev_has_data(c, ca)); 915 912 prt_char(out, '\n'); 916 913 } 917 914
+23 -53
fs/bcachefs/trace.h
··· 46 46 __assign_str(str, str); 47 47 ), 48 48 49 - TP_printk("%d,%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(str)) 49 + TP_printk("%d,%d\n%s", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(str)) 50 50 ); 51 51 52 52 DECLARE_EVENT_CLASS(trans_str, ··· 273 273 TP_ARGS(c) 274 274 ); 275 275 276 - DEFINE_EVENT(bch_fs, journal_entry_full, 277 - TP_PROTO(struct bch_fs *c), 278 - TP_ARGS(c) 276 + DEFINE_EVENT(fs_str, journal_entry_full, 277 + TP_PROTO(struct bch_fs *c, const char *str), 278 + TP_ARGS(c, str) 279 279 ); 280 280 281 - TRACE_EVENT(journal_entry_close, 282 - TP_PROTO(struct bch_fs *c, unsigned bytes), 283 - TP_ARGS(c, bytes), 284 - 285 - TP_STRUCT__entry( 286 - __field(dev_t, dev ) 287 - __field(u32, bytes ) 288 - ), 289 - 290 - TP_fast_assign( 291 - __entry->dev = c->dev; 292 - __entry->bytes = bytes; 293 - ), 294 - 295 - TP_printk("%d,%d entry bytes %u", 296 - MAJOR(__entry->dev), MINOR(__entry->dev), 297 - __entry->bytes) 281 + DEFINE_EVENT(fs_str, journal_entry_close, 282 + TP_PROTO(struct bch_fs *c, const char *str), 283 + TP_ARGS(c, str) 298 284 ); 299 285 300 286 DEFINE_EVENT(bio, journal_write, ··· 528 542 __entry->level = path->level; 529 543 TRACE_BPOS_assign(pos, path->pos); 530 544 531 - c = bch2_btree_node_lock_counts(trans, NULL, &path->l[level].b->c, level), 545 + c = bch2_btree_node_lock_counts(trans, NULL, &path->l[level].b->c, level); 532 546 __entry->self_read_count = c.n[SIX_LOCK_read]; 533 547 __entry->self_intent_count = c.n[SIX_LOCK_intent]; 534 548 ··· 813 827 ); 814 828 815 829 DEFINE_EVENT(fs_str, move_extent, 816 - TP_PROTO(struct bch_fs *c, const char *k), 817 - TP_ARGS(c, k) 830 + TP_PROTO(struct bch_fs *c, const char *str), 831 + TP_ARGS(c, str) 818 832 ); 819 833 820 834 DEFINE_EVENT(fs_str, move_extent_read, 821 - TP_PROTO(struct bch_fs *c, const char *k), 822 - TP_ARGS(c, k) 835 + TP_PROTO(struct bch_fs *c, const char *str), 836 + TP_ARGS(c, str) 823 837 ); 824 838 825 839 DEFINE_EVENT(fs_str, move_extent_write, 826 - TP_PROTO(struct bch_fs *c, const char *k), 827 - TP_ARGS(c, k) 840 + TP_PROTO(struct bch_fs *c, const char *str), 841 + TP_ARGS(c, str) 828 842 ); 829 843 830 844 DEFINE_EVENT(fs_str, move_extent_finish, 831 - TP_PROTO(struct bch_fs *c, const char *k), 832 - TP_ARGS(c, k) 845 + TP_PROTO(struct bch_fs *c, const char *str), 846 + TP_ARGS(c, str) 833 847 ); 834 848 835 - TRACE_EVENT(move_extent_fail, 836 - TP_PROTO(struct bch_fs *c, const char *msg), 837 - TP_ARGS(c, msg), 838 - 839 - TP_STRUCT__entry( 840 - __field(dev_t, dev ) 841 - __string(msg, msg ) 842 - ), 843 - 844 - TP_fast_assign( 845 - __entry->dev = c->dev; 846 - __assign_str(msg, msg); 847 - ), 848 - 849 - TP_printk("%d:%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(msg)) 849 + DEFINE_EVENT(fs_str, move_extent_fail, 850 + TP_PROTO(struct bch_fs *c, const char *str), 851 + TP_ARGS(c, str) 850 852 ); 851 853 852 854 DEFINE_EVENT(fs_str, move_extent_start_fail, ··· 1013 1039 __entry->level = b->c.level; 1014 1040 __entry->written = b->written; 1015 1041 __entry->blocks = btree_blocks(trans->c); 1016 - __entry->u64s_remaining = bch_btree_keys_u64s_remaining(trans->c, b); 1042 + __entry->u64s_remaining = bch2_btree_keys_u64s_remaining(b); 1017 1043 ), 1018 1044 1019 1045 TP_printk("%s %pS l=%u written %u/%u u64s remaining %u", ··· 1120 1146 TP_ARGS(trans, caller_ip, path) 1121 1147 ); 1122 1148 1123 - struct get_locks_fail; 1124 - 1125 1149 TRACE_EVENT(trans_restart_upgrade, 1126 1150 TP_PROTO(struct btree_trans *trans, 1127 1151 unsigned long caller_ip, ··· 1167 1195 __entry->node_seq) 1168 1196 ); 1169 1197 1170 - DEFINE_EVENT(transaction_restart_iter, trans_restart_relock, 1171 - TP_PROTO(struct btree_trans *trans, 1172 - unsigned long caller_ip, 1173 - struct btree_path *path), 1174 - TP_ARGS(trans, caller_ip, path) 1198 + DEFINE_EVENT(trans_str, trans_restart_relock, 1199 + TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, const char *str), 1200 + TP_ARGS(trans, caller_ip, str) 1175 1201 ); 1176 1202 1177 1203 DEFINE_EVENT(transaction_restart_iter, trans_restart_relock_next_node,
+11 -4
fs/bcachefs/util.c
··· 241 241 return true; 242 242 } 243 243 244 - void bch2_prt_u64_binary(struct printbuf *out, u64 v, unsigned nr_bits) 244 + void bch2_prt_u64_base2_nbits(struct printbuf *out, u64 v, unsigned nr_bits) 245 245 { 246 246 while (nr_bits) 247 247 prt_char(out, '0' + ((v >> --nr_bits) & 1)); 248 + } 249 + 250 + void bch2_prt_u64_base2(struct printbuf *out, u64 v) 251 + { 252 + bch2_prt_u64_base2_nbits(out, v, fls64(v) ?: 1); 248 253 } 249 254 250 255 void bch2_print_string_as_lines(const char *prefix, const char *lines) ··· 1191 1186 { 1192 1187 darray_init(ret); 1193 1188 1194 - char *dev_name = kstrdup(_dev_name, GFP_KERNEL), *s = dev_name; 1189 + char *dev_name, *s, *orig; 1190 + 1191 + dev_name = orig = kstrdup(_dev_name, GFP_KERNEL); 1195 1192 if (!dev_name) 1196 1193 return -ENOMEM; 1197 1194 ··· 1208 1201 } 1209 1202 } 1210 1203 1211 - kfree(dev_name); 1204 + kfree(orig); 1212 1205 return 0; 1213 1206 err: 1214 1207 bch2_darray_str_exit(ret); 1215 - kfree(dev_name); 1208 + kfree(orig); 1216 1209 return -ENOMEM; 1217 1210 }
+2 -1
fs/bcachefs/util.h
··· 342 342 343 343 u64 bch2_read_flag_list(char *, const char * const[]); 344 344 345 - void bch2_prt_u64_binary(struct printbuf *, u64, unsigned); 345 + void bch2_prt_u64_base2_nbits(struct printbuf *, u64, unsigned); 346 + void bch2_prt_u64_base2(struct printbuf *, u64); 346 347 347 348 void bch2_print_string_as_lines(const char *prefix, const char *lines); 348 349
+3 -2
fs/bcachefs/xattr.c
··· 590 590 mutex_unlock(&inode->ei_update_lock); 591 591 592 592 if (value && 593 - (opt_id == Opt_background_compression || 594 - opt_id == Opt_background_target)) 593 + (opt_id == Opt_background_target || 594 + opt_id == Opt_background_compression || 595 + (opt_id == Opt_compression && !inode_opt_get(c, &inode->ei_inode, background_compression)))) 595 596 bch2_set_rebalance_needs_scan(c, inode->ei_inode.bi_inum); 596 597 597 598 return bch2_err_class(ret);
+19
fs/bcachefs/xattr_format.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _BCACHEFS_XATTR_FORMAT_H 3 + #define _BCACHEFS_XATTR_FORMAT_H 4 + 5 + #define KEY_TYPE_XATTR_INDEX_USER 0 6 + #define KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS 1 7 + #define KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT 2 8 + #define KEY_TYPE_XATTR_INDEX_TRUSTED 3 9 + #define KEY_TYPE_XATTR_INDEX_SECURITY 4 10 + 11 + struct bch_xattr { 12 + struct bch_val v; 13 + __u8 x_type; 14 + __u8 x_name_len; 15 + __le16 x_val_len; 16 + __u8 x_name[]; 17 + } __packed __aligned(8); 18 + 19 + #endif /* _BCACHEFS_XATTR_FORMAT_H */