Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'bcachefs-2024-10-22' of https://github.com/koverstreet/bcachefs

Pull bcachefs fixes from Kent Overstreet:
"Lots of hotfixes:

- transaction restart injection has been shaking out a few things

- fix a data corruption in the buffered write path on -ENOSPC, found
by xfstests generic/299

- Some small show_options fixes

- Repair mismatches in inode hash type, seed: different snapshot
versions of an inode must have the same hash/type seed, used for
directory entries and xattrs. We were checking the hash seed, but
not the type, and a user contributed a filesystem where the hash
type on one inode had somehow been flipped; these fixes allow his
filesystem to repair.

Additionally, the hash type flip made some directory entries
invisible, which were then recreated by userspace; so the hash
check code now checks for duplicate non dangling dirents, and
renames one of them if necessary.

- Don't use wait_event_interruptible() in recovery: this fixes some
filesystems failing to mount with -ERESTARTSYS

- Workaround for kvmalloc not supporting > INT_MAX allocations,
causing an -ENOMEM when allocating the sorted array of journal
keys: this allows a 75 TB filesystem to mount

- Make sure bch_inode_unpacked.bi_snapshot is set in the old inode
compat path: this alllows Marcin's filesystem (in use since before
6.7) to repair and mount"

* tag 'bcachefs-2024-10-22' of https://github.com/koverstreet/bcachefs: (26 commits)
bcachefs: Set bch_inode_unpacked.bi_snapshot in old inode path
bcachefs: Mark more errors as AUTOFIX
bcachefs: Workaround for kvmalloc() not supporting > INT_MAX allocations
bcachefs: Don't use wait_event_interruptible() in recovery
bcachefs: Fix __bch2_fsck_err() warning
bcachefs: fsck: Improve hash_check_key()
bcachefs: bch2_hash_set_or_get_in_snapshot()
bcachefs: Repair mismatches in inode hash seed, type
bcachefs: Add hash seed, type to inode_to_text()
bcachefs: INODE_STR_HASH() for bch_inode_unpacked
bcachefs: Run in-kernel offline fsck without ratelimit errors
bcachefs: skip mount option handle for empty string.
bcachefs: fix incorrect show_options results
bcachefs: Fix data corruption on -ENOSPC in buffered write path
bcachefs: bch2_folio_reservation_get_partial() is now better behaved
bcachefs: fix disk reservation accounting in bch2_folio_reservation_get()
bcachefS: ec: fix data type on stripe deletion
bcachefs: Don't use commit_do() unnecessarily
bcachefs: handle restarts in bch2_bucket_io_time_reset()
bcachefs: fix restart handling in __bch2_resume_logged_op_finsert()
...

+471 -201
+21 -16
fs/bcachefs/alloc_background.c
··· 1977 1977 ca->mi.bucket_size, 1978 1978 GFP_KERNEL); 1979 1979 1980 - int ret = bch2_trans_do(c, NULL, NULL, 1980 + int ret = bch2_trans_commit_do(c, NULL, NULL, 1981 1981 BCH_WATERMARK_btree| 1982 1982 BCH_TRANS_COMMIT_no_enospc, 1983 1983 bch2_clear_bucket_needs_discard(trans, POS(ca->dev_idx, bucket))); ··· 2137 2137 2138 2138 struct bkey_s_c k = next_lru_key(trans, &iter, ca, &wrapped); 2139 2139 ret = bkey_err(k); 2140 - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 2141 - continue; 2142 2140 if (ret) 2143 - break; 2141 + goto restart_err; 2144 2142 if (!k.k) 2145 2143 break; 2146 2144 2147 2145 ret = invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate); 2146 + restart_err: 2147 + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 2148 + continue; 2148 2149 if (ret) 2149 2150 break; 2150 2151 ··· 2351 2350 2352 2351 /* Bucket IO clocks: */ 2353 2352 2354 - int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev, 2355 - size_t bucket_nr, int rw) 2353 + static int __bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev, 2354 + size_t bucket_nr, int rw) 2356 2355 { 2357 2356 struct bch_fs *c = trans->c; 2357 + 2358 2358 struct btree_iter iter; 2359 - struct bkey_i_alloc_v4 *a; 2360 - u64 now; 2361 - int ret = 0; 2362 - 2363 - if (bch2_trans_relock(trans)) 2364 - bch2_trans_begin(trans); 2365 - 2366 - a = bch2_trans_start_alloc_update_noupdate(trans, &iter, POS(dev, bucket_nr)); 2367 - ret = PTR_ERR_OR_ZERO(a); 2359 + struct bkey_i_alloc_v4 *a = 2360 + bch2_trans_start_alloc_update_noupdate(trans, &iter, POS(dev, bucket_nr)); 2361 + int ret = PTR_ERR_OR_ZERO(a); 2368 2362 if (ret) 2369 2363 return ret; 2370 2364 2371 - now = bch2_current_io_time(c, rw); 2365 + u64 now = bch2_current_io_time(c, rw); 2372 2366 if (a->v.io_time[rw] == now) 2373 2367 goto out; 2374 2368 ··· 2374 2378 out: 2375 2379 bch2_trans_iter_exit(trans, &iter); 2376 2380 return ret; 2381 + } 2382 + 2383 + int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev, 2384 + size_t bucket_nr, int rw) 2385 + { 2386 + if (bch2_trans_relock(trans)) 2387 + bch2_trans_begin(trans); 2388 + 2389 + return nested_lockrestart_do(trans, __bch2_bucket_io_time_reset(trans, dev, bucket_nr, rw)); 2377 2390 } 2378 2391 2379 2392 /* Startup/shutdown (ro/rw): */
+1 -1
fs/bcachefs/alloc_foreground.c
··· 684 684 struct bch_dev_usage usage; 685 685 struct open_bucket *ob; 686 686 687 - bch2_trans_do(c, NULL, NULL, 0, 687 + bch2_trans_do(c, 688 688 PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(trans, ca, watermark, 689 689 data_type, cl, false, &usage))); 690 690 return ob;
+11 -1
fs/bcachefs/btree_gc.c
··· 820 820 * fix that here: 821 821 */ 822 822 alloc_data_type_set(&gc, gc.data_type); 823 - 824 823 if (gc.data_type != old_gc.data_type || 825 824 gc.dirty_sectors != old_gc.dirty_sectors) { 826 825 ret = bch2_alloc_key_to_dev_counters(trans, ca, &old_gc, &gc, BTREE_TRIGGER_gc); 827 826 if (ret) 828 827 return ret; 828 + 829 + /* 830 + * Ugly: alloc_key_to_dev_counters(..., BTREE_TRIGGER_gc) is not 831 + * safe w.r.t. transaction restarts, so fixup the gc_bucket so 832 + * we don't run it twice: 833 + */ 834 + percpu_down_read(&c->mark_lock); 835 + struct bucket *gc_m = gc_bucket(ca, iter->pos.offset); 836 + gc_m->data_type = gc.data_type; 837 + gc_m->dirty_sectors = gc.dirty_sectors; 838 + percpu_up_read(&c->mark_lock); 829 839 } 830 840 831 841 if (fsck_err_on(new.data_type != gc.data_type,
+1 -1
fs/bcachefs/btree_io.c
··· 1871 1871 1872 1872 } 1873 1873 } else { 1874 - ret = bch2_trans_do(c, NULL, NULL, 0, 1874 + ret = bch2_trans_do(c, 1875 1875 bch2_btree_node_update_key_get_iter(trans, b, &wbio->key, 1876 1876 BCH_WATERMARK_interior_updates| 1877 1877 BCH_TRANS_COMMIT_journal_reclaim|
+2
fs/bcachefs/btree_iter.h
··· 912 912 _ret; \ 913 913 }) 914 914 915 + #define bch2_trans_do(_c, _do) bch2_trans_run(_c, lockrestart_do(trans, _do)) 916 + 915 917 struct btree_trans *__bch2_trans_get(struct bch_fs *, unsigned); 916 918 void bch2_trans_put(struct btree_trans *); 917 919
+2 -2
fs/bcachefs/btree_update.c
··· 668 668 struct disk_reservation *disk_res, int flags, 669 669 enum btree_iter_update_trigger_flags iter_flags) 670 670 { 671 - return bch2_trans_do(c, disk_res, NULL, flags, 671 + return bch2_trans_commit_do(c, disk_res, NULL, flags, 672 672 bch2_btree_insert_trans(trans, id, k, iter_flags)); 673 673 } 674 674 ··· 865 865 memcpy(l->d, buf.buf, buf.pos); 866 866 c->journal.early_journal_entries.nr += jset_u64s(u64s); 867 867 } else { 868 - ret = bch2_trans_do(c, NULL, NULL, 868 + ret = bch2_trans_commit_do(c, NULL, NULL, 869 869 BCH_TRANS_COMMIT_lazy_rw|commit_flags, 870 870 __bch2_trans_log_msg(trans, &buf, u64s)); 871 871 }
+1 -1
fs/bcachefs/btree_update.h
··· 192 192 nested_lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\ 193 193 (_journal_seq), (_flags))) 194 194 195 - #define bch2_trans_do(_c, _disk_res, _journal_seq, _flags, _do) \ 195 + #define bch2_trans_commit_do(_c, _disk_res, _journal_seq, _flags, _do) \ 196 196 bch2_trans_run(_c, commit_do(trans, _disk_res, _journal_seq, _flags, _do)) 197 197 198 198 #define trans_for_each_update(_trans, _i) \
+1 -3
fs/bcachefs/btree_update_interior.c
··· 2239 2239 struct async_btree_rewrite *a = 2240 2240 container_of(work, struct async_btree_rewrite, work); 2241 2241 struct bch_fs *c = a->c; 2242 - int ret; 2243 2242 2244 - ret = bch2_trans_do(c, NULL, NULL, 0, 2245 - async_btree_node_rewrite_trans(trans, a)); 2243 + int ret = bch2_trans_do(c, async_btree_node_rewrite_trans(trans, a)); 2246 2244 bch_err_fn_ratelimited(c, ret); 2247 2245 bch2_write_ref_put(c, BCH_WRITE_REF_node_rewrite); 2248 2246 kfree(a);
+5 -2
fs/bcachefs/buckets.c
··· 1160 1160 #define SECTORS_CACHE 1024 1161 1161 1162 1162 int __bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res, 1163 - u64 sectors, int flags) 1163 + u64 sectors, enum bch_reservation_flags flags) 1164 1164 { 1165 1165 struct bch_fs_pcpu *pcpu; 1166 1166 u64 old, get; 1167 - s64 sectors_available; 1167 + u64 sectors_available; 1168 1168 int ret; 1169 1169 1170 1170 percpu_down_read(&c->mark_lock); ··· 1201 1201 1202 1202 percpu_u64_set(&c->pcpu->sectors_available, 0); 1203 1203 sectors_available = avail_factor(__bch2_fs_usage_read_short(c).free); 1204 + 1205 + if (sectors_available && (flags & BCH_DISK_RESERVATION_PARTIAL)) 1206 + sectors = min(sectors, sectors_available); 1204 1207 1205 1208 if (sectors <= sectors_available || 1206 1209 (flags & BCH_DISK_RESERVATION_NOFAIL)) {
+7 -5
fs/bcachefs/buckets.h
··· 344 344 } 345 345 } 346 346 347 - #define BCH_DISK_RESERVATION_NOFAIL (1 << 0) 347 + enum bch_reservation_flags { 348 + BCH_DISK_RESERVATION_NOFAIL = 1 << 0, 349 + BCH_DISK_RESERVATION_PARTIAL = 1 << 1, 350 + }; 348 351 349 - int __bch2_disk_reservation_add(struct bch_fs *, 350 - struct disk_reservation *, 351 - u64, int); 352 + int __bch2_disk_reservation_add(struct bch_fs *, struct disk_reservation *, 353 + u64, enum bch_reservation_flags); 352 354 353 355 static inline int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res, 354 - u64 sectors, int flags) 356 + u64 sectors, enum bch_reservation_flags flags) 355 357 { 356 358 #ifdef __KERNEL__ 357 359 u64 old, new;
+1
fs/bcachefs/chardev.c
··· 225 225 226 226 opt_set(thr->opts, stdio, (u64)(unsigned long)&thr->thr.stdio); 227 227 opt_set(thr->opts, read_only, 1); 228 + opt_set(thr->opts, ratelimit_errors, 0); 228 229 229 230 /* We need request_key() to be called before we punt to kthread: */ 230 231 opt_set(thr->opts, nostart, true);
+14 -1
fs/bcachefs/darray.c
··· 2 2 3 3 #include <linux/log2.h> 4 4 #include <linux/slab.h> 5 + #include <linux/vmalloc.h> 5 6 #include "darray.h" 6 7 7 8 int __bch2_darray_resize_noprof(darray_char *d, size_t element_size, size_t new_size, gfp_t gfp) ··· 10 9 if (new_size > d->size) { 11 10 new_size = roundup_pow_of_two(new_size); 12 11 13 - void *data = kvmalloc_array_noprof(new_size, element_size, gfp); 12 + /* 13 + * This is a workaround: kvmalloc() doesn't support > INT_MAX 14 + * allocations, but vmalloc() does. 15 + * The limit needs to be lifted from kvmalloc, and when it does 16 + * we'll go back to just using that. 17 + */ 18 + size_t bytes; 19 + if (unlikely(check_mul_overflow(new_size, element_size, &bytes))) 20 + return -ENOMEM; 21 + 22 + void *data = likely(bytes < INT_MAX) 23 + ? kvmalloc_noprof(bytes, gfp) 24 + : vmalloc_noprof(bytes); 14 25 if (!data) 15 26 return -ENOMEM; 16 27
-7
fs/bcachefs/dirent.c
··· 250 250 return ret; 251 251 } 252 252 253 - static void dirent_copy_target(struct bkey_i_dirent *dst, 254 - struct bkey_s_c_dirent src) 255 - { 256 - dst->v.d_inum = src.v->d_inum; 257 - dst->v.d_type = src.v->d_type; 258 - } 259 - 260 253 int bch2_dirent_read_target(struct btree_trans *trans, subvol_inum dir, 261 254 struct bkey_s_c_dirent d, subvol_inum *target) 262 255 {
+7
fs/bcachefs/dirent.h
··· 34 34 int bch2_dirent_read_target(struct btree_trans *, subvol_inum, 35 35 struct bkey_s_c_dirent, subvol_inum *); 36 36 37 + static inline void dirent_copy_target(struct bkey_i_dirent *dst, 38 + struct bkey_s_c_dirent src) 39 + { 40 + dst->v.d_inum = src.v->d_inum; 41 + dst->v.d_type = src.v->d_type; 42 + } 43 + 37 44 int bch2_dirent_create_snapshot(struct btree_trans *, u32, u64, u32, 38 45 const struct bch_hash_info *, u8, 39 46 const struct qstr *, u64, u64 *,
+4 -2
fs/bcachefs/disk_accounting.c
··· 856 856 }; 857 857 u64 v[3] = { ca->mi.nbuckets - ca->mi.first_bucket, 0, 0 }; 858 858 859 - int ret = bch2_trans_do(c, NULL, NULL, 0, 860 - bch2_disk_accounting_mod(trans, &acc, v, ARRAY_SIZE(v), gc)); 859 + int ret = bch2_trans_do(c, ({ 860 + bch2_disk_accounting_mod(trans, &acc, v, ARRAY_SIZE(v), gc) ?: 861 + (!gc ? bch2_trans_commit(trans, NULL, NULL, 0) : 0); 862 + })); 861 863 bch_err_fn(c, ret); 862 864 return ret; 863 865 }
+11 -11
fs/bcachefs/ec.c
··· 266 266 if (!deleting) { 267 267 a->stripe = s.k->p.offset; 268 268 a->stripe_redundancy = s.v->nr_redundant; 269 + alloc_data_type_set(a, data_type); 269 270 } else { 270 271 a->stripe = 0; 271 272 a->stripe_redundancy = 0; 273 + alloc_data_type_set(a, BCH_DATA_user); 272 274 } 273 - 274 - alloc_data_type_set(a, data_type); 275 275 err: 276 276 printbuf_exit(&buf); 277 277 return ret; ··· 1186 1186 if (!idx) 1187 1187 break; 1188 1188 1189 - int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 1189 + int ret = bch2_trans_commit_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 1190 1190 ec_stripe_delete(trans, idx)); 1191 1191 bch_err_fn(c, ret); 1192 1192 if (ret) ··· 1519 1519 goto err; 1520 1520 } 1521 1521 1522 - ret = bch2_trans_do(c, &s->res, NULL, 1523 - BCH_TRANS_COMMIT_no_check_rw| 1524 - BCH_TRANS_COMMIT_no_enospc, 1525 - ec_stripe_key_update(trans, 1526 - s->have_existing_stripe 1527 - ? bkey_i_to_stripe(&s->existing_stripe.key) 1528 - : NULL, 1529 - bkey_i_to_stripe(&s->new_stripe.key))); 1522 + ret = bch2_trans_commit_do(c, &s->res, NULL, 1523 + BCH_TRANS_COMMIT_no_check_rw| 1524 + BCH_TRANS_COMMIT_no_enospc, 1525 + ec_stripe_key_update(trans, 1526 + s->have_existing_stripe 1527 + ? bkey_i_to_stripe(&s->existing_stripe.key) 1528 + : NULL, 1529 + bkey_i_to_stripe(&s->new_stripe.key))); 1530 1530 bch_err_msg(c, ret, "creating stripe key"); 1531 1531 if (ret) { 1532 1532 goto err;
+4 -1
fs/bcachefs/error.c
··· 251 251 * delete the key) 252 252 * - and we don't need to warn if we're not prompting 253 253 */ 254 - WARN_ON(!(flags & FSCK_AUTOFIX) && !trans && bch2_current_has_btree_trans(c)); 254 + WARN_ON((flags & FSCK_CAN_FIX) && 255 + !(flags & FSCK_AUTOFIX) && 256 + !trans && 257 + bch2_current_has_btree_trans(c)); 255 258 256 259 if ((flags & FSCK_CAN_FIX) && 257 260 test_bit(err, c->sb.errors_silent))
+6
fs/bcachefs/fs-io-buffered.c
··· 856 856 folios_trunc(&fs, fi); 857 857 end = min(end, folio_end_pos(darray_last(fs))); 858 858 } else { 859 + if (!folio_test_uptodate(f)) { 860 + ret = bch2_read_single_folio(f, mapping); 861 + if (ret) 862 + goto out; 863 + } 864 + 859 865 folios_trunc(&fs, fi + 1); 860 866 end = f_pos + f_reserved; 861 867 }
+45 -25
fs/bcachefs/fs-io-pagecache.c
··· 399 399 bch2_quota_reservation_put(c, inode, &res->quota); 400 400 } 401 401 402 - int bch2_folio_reservation_get(struct bch_fs *c, 402 + static int __bch2_folio_reservation_get(struct bch_fs *c, 403 403 struct bch_inode_info *inode, 404 404 struct folio *folio, 405 405 struct bch2_folio_reservation *res, 406 - size_t offset, size_t len) 406 + size_t offset, size_t len, 407 + bool partial) 407 408 { 408 409 struct bch_folio *s = bch2_folio_create(folio, 0); 409 410 unsigned i, disk_sectors = 0, quota_sectors = 0; 411 + struct disk_reservation disk_res = {}; 412 + size_t reserved = len; 410 413 int ret; 411 414 412 415 if (!s) ··· 425 422 } 426 423 427 424 if (disk_sectors) { 428 - ret = bch2_disk_reservation_add(c, &res->disk, disk_sectors, 0); 425 + ret = bch2_disk_reservation_add(c, &disk_res, disk_sectors, 426 + partial ? BCH_DISK_RESERVATION_PARTIAL : 0); 429 427 if (unlikely(ret)) 430 428 return ret; 429 + 430 + if (unlikely(disk_res.sectors != disk_sectors)) { 431 + disk_sectors = quota_sectors = 0; 432 + 433 + for (i = round_down(offset, block_bytes(c)) >> 9; 434 + i < round_up(offset + len, block_bytes(c)) >> 9; 435 + i++) { 436 + disk_sectors += sectors_to_reserve(&s->s[i], res->disk.nr_replicas); 437 + if (disk_sectors > disk_res.sectors) { 438 + /* 439 + * Make sure to get a reservation that's 440 + * aligned to the filesystem blocksize: 441 + */ 442 + unsigned reserved_offset = round_down(i << 9, block_bytes(c)); 443 + reserved = clamp(reserved_offset, offset, offset + len) - offset; 444 + 445 + if (!reserved) { 446 + bch2_disk_reservation_put(c, &disk_res); 447 + return -BCH_ERR_ENOSPC_disk_reservation; 448 + } 449 + break; 450 + } 451 + quota_sectors += s->s[i].state == SECTOR_unallocated; 452 + } 453 + } 431 454 } 432 455 433 456 if (quota_sectors) { 434 457 ret = bch2_quota_reservation_add(c, inode, &res->quota, quota_sectors, true); 435 458 if (unlikely(ret)) { 436 - struct disk_reservation tmp = { .sectors = disk_sectors }; 437 - 438 - bch2_disk_reservation_put(c, &tmp); 439 - res->disk.sectors -= disk_sectors; 459 + bch2_disk_reservation_put(c, &disk_res); 440 460 return ret; 441 461 } 442 462 } 443 463 444 - return 0; 464 + res->disk.sectors += disk_res.sectors; 465 + return partial ? reserved : 0; 466 + } 467 + 468 + int bch2_folio_reservation_get(struct bch_fs *c, 469 + struct bch_inode_info *inode, 470 + struct folio *folio, 471 + struct bch2_folio_reservation *res, 472 + size_t offset, size_t len) 473 + { 474 + return __bch2_folio_reservation_get(c, inode, folio, res, offset, len, false); 445 475 } 446 476 447 477 ssize_t bch2_folio_reservation_get_partial(struct bch_fs *c, ··· 483 447 struct bch2_folio_reservation *res, 484 448 size_t offset, size_t len) 485 449 { 486 - size_t l, reserved = 0; 487 - int ret; 488 - 489 - while ((l = len - reserved)) { 490 - while ((ret = bch2_folio_reservation_get(c, inode, folio, res, offset, l))) { 491 - if ((offset & (block_bytes(c) - 1)) + l <= block_bytes(c)) 492 - return reserved ?: ret; 493 - 494 - len = reserved + l; 495 - l /= 2; 496 - } 497 - 498 - offset += l; 499 - reserved += l; 500 - } 501 - 502 - return reserved; 450 + return __bch2_folio_reservation_get(c, inode, folio, res, offset, len, true); 503 451 } 504 452 505 453 static void bch2_clear_folio_bits(struct folio *folio)
+1 -1
fs/bcachefs/fs-io.c
··· 182 182 183 183 struct bch_inode_unpacked u; 184 184 int ret = bch2_inode_find_by_inum(c, inode_inum(inode), &u) ?: 185 - bch2_journal_flush_seq(&c->journal, u.bi_journal_seq) ?: 185 + bch2_journal_flush_seq(&c->journal, u.bi_journal_seq, TASK_INTERRUPTIBLE) ?: 186 186 bch2_inode_flush_nocow_writes(c, inode); 187 187 bch2_write_ref_put(c, BCH_WRITE_REF_fsync); 188 188 return ret;
+7 -11
fs/bcachefs/fs.c
··· 656 656 struct bch_hash_info hash = bch2_hash_info_init(c, &dir->ei_inode); 657 657 658 658 struct bch_inode_info *inode; 659 - bch2_trans_do(c, NULL, NULL, 0, 659 + bch2_trans_do(c, 660 660 PTR_ERR_OR_ZERO(inode = bch2_lookup_trans(trans, inode_inum(dir), 661 661 &hash, &dentry->d_name))); 662 662 if (IS_ERR(inode)) ··· 869 869 ret = bch2_subvol_is_ro_trans(trans, src_dir->ei_inum.subvol) ?: 870 870 bch2_subvol_is_ro_trans(trans, dst_dir->ei_inum.subvol); 871 871 if (ret) 872 - goto err; 872 + goto err_tx_restart; 873 873 874 874 if (inode_attr_changing(dst_dir, src_inode, Inode_opt_project)) { 875 875 ret = bch2_fs_quota_transfer(c, src_inode, ··· 1266 1266 bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, 1267 1267 POS(ei->v.i_ino, start), 0); 1268 1268 1269 - while (true) { 1269 + while (!ret || bch2_err_matches(ret, BCH_ERR_transaction_restart)) { 1270 1270 enum btree_id data_btree = BTREE_ID_extents; 1271 1271 1272 1272 bch2_trans_begin(trans); ··· 1274 1274 u32 snapshot; 1275 1275 ret = bch2_subvolume_get_snapshot(trans, ei->ei_inum.subvol, &snapshot); 1276 1276 if (ret) 1277 - goto err; 1277 + continue; 1278 1278 1279 1279 bch2_btree_iter_set_snapshot(&iter, snapshot); 1280 1280 1281 1281 k = bch2_btree_iter_peek_upto(&iter, end); 1282 1282 ret = bkey_err(k); 1283 1283 if (ret) 1284 - goto err; 1284 + continue; 1285 1285 1286 1286 if (!k.k) 1287 1287 break; ··· 1301 1301 ret = bch2_read_indirect_extent(trans, &data_btree, 1302 1302 &offset_into_extent, &cur); 1303 1303 if (ret) 1304 - break; 1304 + continue; 1305 1305 1306 1306 k = bkey_i_to_s_c(cur.k); 1307 1307 bch2_bkey_buf_realloc(&prev, c, k.k->u64s); ··· 1329 1329 1330 1330 bch2_btree_iter_set_pos(&iter, 1331 1331 POS(iter.pos.inode, iter.pos.offset + sectors)); 1332 - err: 1333 - if (ret && 1334 - !bch2_err_matches(ret, BCH_ERR_transaction_restart)) 1335 - break; 1336 1332 } 1337 1333 bch2_trans_iter_exit(trans, &iter); 1338 1334 ··· 2036 2040 bch2_opts_to_text(&buf, c->opts, c, c->disk_sb.sb, 2037 2041 OPT_MOUNT, OPT_HIDDEN, OPT_SHOW_MOUNT_STYLE); 2038 2042 printbuf_nul_terminate(&buf); 2039 - seq_puts(seq, buf.buf); 2043 + seq_printf(seq, ",%s", buf.buf); 2040 2044 2041 2045 int ret = buf.allocation_failure ? -ENOMEM : 0; 2042 2046 printbuf_exit(&buf);
+223 -50
fs/bcachefs/fsck.c
··· 929 929 return ret; 930 930 } 931 931 932 - static int hash_redo_key(struct btree_trans *trans, 933 - const struct bch_hash_desc desc, 934 - struct bch_hash_info *hash_info, 935 - struct btree_iter *k_iter, struct bkey_s_c k) 932 + static int dirent_has_target(struct btree_trans *trans, struct bkey_s_c_dirent d) 936 933 { 937 - struct bkey_i *delete; 938 - struct bkey_i *tmp; 934 + if (d.v->d_type == DT_SUBVOL) { 935 + u32 snap; 936 + u64 inum; 937 + int ret = subvol_lookup(trans, le32_to_cpu(d.v->d_child_subvol), &snap, &inum); 938 + if (ret && !bch2_err_matches(ret, ENOENT)) 939 + return ret; 940 + return !ret; 941 + } else { 942 + struct btree_iter iter; 943 + struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, 944 + SPOS(0, le64_to_cpu(d.v->d_inum), d.k->p.snapshot), 0); 945 + int ret = bkey_err(k); 946 + if (ret) 947 + return ret; 939 948 940 - delete = bch2_trans_kmalloc(trans, sizeof(*delete)); 941 - if (IS_ERR(delete)) 942 - return PTR_ERR(delete); 949 + ret = bkey_is_inode(k.k); 950 + bch2_trans_iter_exit(trans, &iter); 951 + return ret; 952 + } 953 + } 943 954 944 - tmp = bch2_bkey_make_mut_noupdate(trans, k); 945 - if (IS_ERR(tmp)) 946 - return PTR_ERR(tmp); 955 + /* 956 + * Prefer to delete the first one, since that will be the one at the wrong 957 + * offset: 958 + * return value: 0 -> delete k1, 1 -> delete k2 959 + */ 960 + static int hash_pick_winner(struct btree_trans *trans, 961 + const struct bch_hash_desc desc, 962 + struct bch_hash_info *hash_info, 963 + struct bkey_s_c k1, 964 + struct bkey_s_c k2) 965 + { 966 + if (bkey_val_bytes(k1.k) == bkey_val_bytes(k2.k) && 967 + !memcmp(k1.v, k2.v, bkey_val_bytes(k1.k))) 968 + return 0; 947 969 948 - bkey_init(&delete->k); 949 - delete->k.p = k_iter->pos; 950 - return bch2_btree_iter_traverse(k_iter) ?: 951 - bch2_trans_update(trans, k_iter, delete, 0) ?: 952 - bch2_hash_set_in_snapshot(trans, desc, hash_info, 953 - (subvol_inum) { 0, k.k->p.inode }, 954 - k.k->p.snapshot, tmp, 955 - STR_HASH_must_create| 956 - BTREE_UPDATE_internal_snapshot_node) ?: 957 - bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); 970 + switch (desc.btree_id) { 971 + case BTREE_ID_dirents: { 972 + int ret = dirent_has_target(trans, bkey_s_c_to_dirent(k1)); 973 + if (ret < 0) 974 + return ret; 975 + if (!ret) 976 + return 0; 977 + 978 + ret = dirent_has_target(trans, bkey_s_c_to_dirent(k2)); 979 + if (ret < 0) 980 + return ret; 981 + if (!ret) 982 + return 1; 983 + return 2; 984 + } 985 + default: 986 + return 0; 987 + } 988 + } 989 + 990 + static int fsck_update_backpointers(struct btree_trans *trans, 991 + struct snapshots_seen *s, 992 + const struct bch_hash_desc desc, 993 + struct bch_hash_info *hash_info, 994 + struct bkey_i *new) 995 + { 996 + if (new->k.type != KEY_TYPE_dirent) 997 + return 0; 998 + 999 + struct bkey_i_dirent *d = bkey_i_to_dirent(new); 1000 + struct inode_walker target = inode_walker_init(); 1001 + int ret = 0; 1002 + 1003 + if (d->v.d_type == DT_SUBVOL) { 1004 + BUG(); 1005 + } else { 1006 + ret = get_visible_inodes(trans, &target, s, le64_to_cpu(d->v.d_inum)); 1007 + if (ret) 1008 + goto err; 1009 + 1010 + darray_for_each(target.inodes, i) { 1011 + i->inode.bi_dir_offset = d->k.p.offset; 1012 + ret = __bch2_fsck_write_inode(trans, &i->inode); 1013 + if (ret) 1014 + goto err; 1015 + } 1016 + } 1017 + err: 1018 + inode_walker_exit(&target); 1019 + return ret; 1020 + } 1021 + 1022 + static int fsck_rename_dirent(struct btree_trans *trans, 1023 + struct snapshots_seen *s, 1024 + const struct bch_hash_desc desc, 1025 + struct bch_hash_info *hash_info, 1026 + struct bkey_s_c_dirent old) 1027 + { 1028 + struct qstr old_name = bch2_dirent_get_name(old); 1029 + struct bkey_i_dirent *new = bch2_trans_kmalloc(trans, bkey_bytes(old.k) + 32); 1030 + int ret = PTR_ERR_OR_ZERO(new); 1031 + if (ret) 1032 + return ret; 1033 + 1034 + bkey_dirent_init(&new->k_i); 1035 + dirent_copy_target(new, old); 1036 + new->k.p = old.k->p; 1037 + 1038 + for (unsigned i = 0; i < 1000; i++) { 1039 + unsigned len = sprintf(new->v.d_name, "%.*s.fsck_renamed-%u", 1040 + old_name.len, old_name.name, i); 1041 + unsigned u64s = BKEY_U64s + dirent_val_u64s(len); 1042 + 1043 + if (u64s > U8_MAX) 1044 + return -EINVAL; 1045 + 1046 + new->k.u64s = u64s; 1047 + 1048 + ret = bch2_hash_set_in_snapshot(trans, bch2_dirent_hash_desc, hash_info, 1049 + (subvol_inum) { 0, old.k->p.inode }, 1050 + old.k->p.snapshot, &new->k_i, 1051 + BTREE_UPDATE_internal_snapshot_node); 1052 + if (!bch2_err_matches(ret, EEXIST)) 1053 + break; 1054 + } 1055 + 1056 + if (ret) 1057 + return ret; 1058 + 1059 + return fsck_update_backpointers(trans, s, desc, hash_info, &new->k_i); 958 1060 } 959 1061 960 1062 static int hash_check_key(struct btree_trans *trans, 1063 + struct snapshots_seen *s, 961 1064 const struct bch_hash_desc desc, 962 1065 struct bch_hash_info *hash_info, 963 1066 struct btree_iter *k_iter, struct bkey_s_c hash_k) ··· 1089 986 if (bkey_eq(k.k->p, hash_k.k->p)) 1090 987 break; 1091 988 1092 - if (fsck_err_on(k.k->type == desc.key_type && 1093 - !desc.cmp_bkey(k, hash_k), 1094 - trans, hash_table_key_duplicate, 1095 - "duplicate hash table keys:\n%s", 1096 - (printbuf_reset(&buf), 1097 - bch2_bkey_val_to_text(&buf, c, hash_k), 1098 - buf.buf))) { 1099 - ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0) ?: 1; 1100 - break; 1101 - } 989 + if (k.k->type == desc.key_type && 990 + !desc.cmp_bkey(k, hash_k)) 991 + goto duplicate_entries; 1102 992 1103 993 if (bkey_deleted(k.k)) { 1104 994 bch2_trans_iter_exit(trans, &iter); ··· 1104 1008 return ret; 1105 1009 bad_hash: 1106 1010 if (fsck_err(trans, hash_table_key_wrong_offset, 1107 - "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n%s", 1011 + "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n %s", 1108 1012 bch2_btree_id_str(desc.btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash, 1109 1013 (printbuf_reset(&buf), 1110 1014 bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) { 1111 - ret = hash_redo_key(trans, desc, hash_info, k_iter, hash_k); 1112 - bch_err_fn(c, ret); 1015 + struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, hash_k); 1016 + if (IS_ERR(new)) 1017 + return PTR_ERR(new); 1018 + 1019 + k = bch2_hash_set_or_get_in_snapshot(trans, &iter, desc, hash_info, 1020 + (subvol_inum) { 0, hash_k.k->p.inode }, 1021 + hash_k.k->p.snapshot, new, 1022 + STR_HASH_must_create| 1023 + BTREE_ITER_with_updates| 1024 + BTREE_UPDATE_internal_snapshot_node); 1025 + ret = bkey_err(k); 1113 1026 if (ret) 1114 - return ret; 1115 - ret = -BCH_ERR_transaction_restart_nested; 1027 + goto out; 1028 + if (k.k) 1029 + goto duplicate_entries; 1030 + 1031 + ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, 1032 + BTREE_UPDATE_internal_snapshot_node) ?: 1033 + fsck_update_backpointers(trans, s, desc, hash_info, new) ?: 1034 + bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: 1035 + -BCH_ERR_transaction_restart_nested; 1036 + goto out; 1116 1037 } 1117 1038 fsck_err: 1039 + goto out; 1040 + duplicate_entries: 1041 + ret = hash_pick_winner(trans, desc, hash_info, hash_k, k); 1042 + if (ret < 0) 1043 + goto out; 1044 + 1045 + if (!fsck_err(trans, hash_table_key_duplicate, 1046 + "duplicate hash table keys%s:\n%s", 1047 + ret != 2 ? "" : ", both point to valid inodes", 1048 + (printbuf_reset(&buf), 1049 + bch2_bkey_val_to_text(&buf, c, hash_k), 1050 + prt_newline(&buf), 1051 + bch2_bkey_val_to_text(&buf, c, k), 1052 + buf.buf))) 1053 + goto out; 1054 + 1055 + switch (ret) { 1056 + case 0: 1057 + ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0); 1058 + break; 1059 + case 1: 1060 + ret = bch2_hash_delete_at(trans, desc, hash_info, &iter, 0); 1061 + break; 1062 + case 2: 1063 + ret = fsck_rename_dirent(trans, s, desc, hash_info, bkey_s_c_to_dirent(hash_k)) ?: 1064 + bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0); 1065 + goto out; 1066 + } 1067 + 1068 + ret = bch2_trans_commit(trans, NULL, NULL, 0) ?: 1069 + -BCH_ERR_transaction_restart_nested; 1118 1070 goto out; 1119 1071 } 1120 1072 ··· 1240 1096 return ret; 1241 1097 } 1242 1098 1099 + static int get_snapshot_root_inode(struct btree_trans *trans, 1100 + struct bch_inode_unpacked *root, 1101 + u64 inum) 1102 + { 1103 + struct btree_iter iter; 1104 + struct bkey_s_c k; 1105 + int ret = 0; 1106 + 1107 + for_each_btree_key_reverse_norestart(trans, iter, BTREE_ID_inodes, 1108 + SPOS(0, inum, U32_MAX), 1109 + BTREE_ITER_all_snapshots, k, ret) { 1110 + if (k.k->p.offset != inum) 1111 + break; 1112 + if (bkey_is_inode(k.k)) 1113 + goto found_root; 1114 + } 1115 + if (ret) 1116 + goto err; 1117 + BUG(); 1118 + found_root: 1119 + BUG_ON(bch2_inode_unpack(k, root)); 1120 + err: 1121 + bch2_trans_iter_exit(trans, &iter); 1122 + return ret; 1123 + } 1124 + 1243 1125 static int check_inode(struct btree_trans *trans, 1244 1126 struct btree_iter *iter, 1245 1127 struct bkey_s_c k, 1246 - struct bch_inode_unpacked *prev, 1128 + struct bch_inode_unpacked *snapshot_root, 1247 1129 struct snapshots_seen *s) 1248 1130 { 1249 1131 struct bch_fs *c = trans->c; ··· 1293 1123 1294 1124 BUG_ON(bch2_inode_unpack(k, &u)); 1295 1125 1296 - if (prev->bi_inum != u.bi_inum) 1297 - *prev = u; 1126 + if (snapshot_root->bi_inum != u.bi_inum) { 1127 + ret = get_snapshot_root_inode(trans, snapshot_root, u.bi_inum); 1128 + if (ret) 1129 + goto err; 1130 + } 1298 1131 1299 - if (fsck_err_on(prev->bi_hash_seed != u.bi_hash_seed || 1300 - inode_d_type(prev) != inode_d_type(&u), 1132 + if (fsck_err_on(u.bi_hash_seed != snapshot_root->bi_hash_seed || 1133 + INODE_STR_HASH(&u) != INODE_STR_HASH(snapshot_root), 1301 1134 trans, inode_snapshot_mismatch, 1302 1135 "inodes in different snapshots don't match")) { 1303 - bch_err(c, "repair not implemented yet"); 1304 - ret = -BCH_ERR_fsck_repair_unimplemented; 1305 - goto err_noprint; 1136 + u.bi_hash_seed = snapshot_root->bi_hash_seed; 1137 + SET_INODE_STR_HASH(&u, INODE_STR_HASH(snapshot_root)); 1138 + do_update = true; 1306 1139 } 1307 1140 1308 1141 if (u.bi_dir || u.bi_dir_offset) { ··· 1458 1285 1459 1286 int bch2_check_inodes(struct bch_fs *c) 1460 1287 { 1461 - struct bch_inode_unpacked prev = { 0 }; 1288 + struct bch_inode_unpacked snapshot_root = {}; 1462 1289 struct snapshots_seen s; 1463 1290 1464 1291 snapshots_seen_init(&s); ··· 1468 1295 POS_MIN, 1469 1296 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, 1470 1297 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 1471 - check_inode(trans, &iter, k, &prev, &s))); 1298 + check_inode(trans, &iter, k, &snapshot_root, &s))); 1472 1299 1473 1300 snapshots_seen_exit(&s); 1474 1301 bch_err_fn(c, ret); ··· 2480 2307 *hash_info = bch2_hash_info_init(c, &i->inode); 2481 2308 dir->first_this_inode = false; 2482 2309 2483 - ret = hash_check_key(trans, bch2_dirent_hash_desc, hash_info, iter, k); 2310 + ret = hash_check_key(trans, s, bch2_dirent_hash_desc, hash_info, iter, k); 2484 2311 if (ret < 0) 2485 2312 goto err; 2486 2313 if (ret) { ··· 2594 2421 *hash_info = bch2_hash_info_init(c, &i->inode); 2595 2422 inode->first_this_inode = false; 2596 2423 2597 - ret = hash_check_key(trans, bch2_xattr_hash_desc, hash_info, iter, k); 2424 + ret = hash_check_key(trans, NULL, bch2_xattr_hash_desc, hash_info, iter, k); 2598 2425 bch_err_fn(c, ret); 2599 2426 return ret; 2600 2427 } ··· 2682 2509 /* Get root directory, create if it doesn't exist: */ 2683 2510 int bch2_check_root(struct bch_fs *c) 2684 2511 { 2685 - int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 2512 + int ret = bch2_trans_commit_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 2686 2513 check_root_trans(trans)); 2687 2514 bch_err_fn(c, ret); 2688 2515 return ret;
+15 -12
fs/bcachefs/inode.c
··· 163 163 unsigned fieldnr = 0, field_bits; 164 164 int ret; 165 165 166 - #define x(_name, _bits) \ 167 - if (fieldnr++ == INODE_NR_FIELDS(inode.v)) { \ 166 + #define x(_name, _bits) \ 167 + if (fieldnr++ == INODEv1_NR_FIELDS(inode.v)) { \ 168 168 unsigned offset = offsetof(struct bch_inode_unpacked, _name);\ 169 169 memset((void *) unpacked + offset, 0, \ 170 170 sizeof(*unpacked) - offset); \ ··· 283 283 { 284 284 memset(unpacked, 0, sizeof(*unpacked)); 285 285 286 + unpacked->bi_snapshot = k.k->p.snapshot; 287 + 286 288 switch (k.k->type) { 287 289 case KEY_TYPE_inode: { 288 290 struct bkey_s_c_inode inode = bkey_s_c_to_inode(k); ··· 295 293 unpacked->bi_flags = le32_to_cpu(inode.v->bi_flags); 296 294 unpacked->bi_mode = le16_to_cpu(inode.v->bi_mode); 297 295 298 - if (INODE_NEW_VARINT(inode.v)) { 296 + if (INODEv1_NEW_VARINT(inode.v)) { 299 297 return bch2_inode_unpack_v2(unpacked, inode.v->fields, 300 298 bkey_val_end(inode), 301 - INODE_NR_FIELDS(inode.v)); 299 + INODEv1_NR_FIELDS(inode.v)); 302 300 } else { 303 301 return bch2_inode_unpack_v1(inode, unpacked); 304 302 } ··· 473 471 struct bkey_s_c_inode inode = bkey_s_c_to_inode(k); 474 472 int ret = 0; 475 473 476 - bkey_fsck_err_on(INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR, 474 + bkey_fsck_err_on(INODEv1_STR_HASH(inode.v) >= BCH_STR_HASH_NR, 477 475 c, inode_str_hash_invalid, 478 476 "invalid str hash type (%llu >= %u)", 479 - INODE_STR_HASH(inode.v), BCH_STR_HASH_NR); 477 + INODEv1_STR_HASH(inode.v), BCH_STR_HASH_NR); 480 478 481 479 ret = __bch2_inode_validate(c, k, flags); 482 480 fsck_err: ··· 535 533 prt_printf(out, "(%x)\n", inode->bi_flags); 536 534 537 535 prt_printf(out, "journal_seq=%llu\n", inode->bi_journal_seq); 536 + prt_printf(out, "hash_seed=%llx\n", inode->bi_hash_seed); 537 + prt_printf(out, "hash_type="); 538 + bch2_prt_str_hash_type(out, INODE_STR_HASH(inode)); 539 + prt_newline(out); 538 540 prt_printf(out, "bi_size=%llu\n", inode->bi_size); 539 541 prt_printf(out, "bi_sectors=%llu\n", inode->bi_sectors); 540 542 prt_printf(out, "bi_version=%llu\n", inode->bi_version); ··· 806 800 807 801 memset(inode_u, 0, sizeof(*inode_u)); 808 802 809 - /* ick */ 810 - inode_u->bi_flags |= str_hash << INODE_STR_HASH_OFFSET; 811 - get_random_bytes(&inode_u->bi_hash_seed, 812 - sizeof(inode_u->bi_hash_seed)); 803 + SET_INODE_STR_HASH(inode_u, str_hash); 804 + get_random_bytes(&inode_u->bi_hash_seed, sizeof(inode_u->bi_hash_seed)); 813 805 } 814 806 815 807 void bch2_inode_init_late(struct bch_inode_unpacked *inode_u, u64 now, ··· 1091 1087 int bch2_inode_find_by_inum(struct bch_fs *c, subvol_inum inum, 1092 1088 struct bch_inode_unpacked *inode) 1093 1089 { 1094 - return bch2_trans_do(c, NULL, NULL, 0, 1095 - bch2_inode_find_by_inum_trans(trans, inum, inode)); 1090 + return bch2_trans_do(c, bch2_inode_find_by_inum_trans(trans, inum, inode)); 1096 1091 } 1097 1092 1098 1093 int bch2_inode_nlink_inc(struct bch_inode_unpacked *bi)
+1
fs/bcachefs/inode.h
··· 92 92 BCH_INODE_FIELDS_v3() 93 93 #undef x 94 94 }; 95 + BITMASK(INODE_STR_HASH, struct bch_inode_unpacked, bi_flags, 20, 24); 95 96 96 97 struct bkey_inode_buf { 97 98 struct bkey_i_inode_v3 inode;
+3 -3
fs/bcachefs/inode_format.h
··· 150 150 #undef x 151 151 }; 152 152 153 - LE32_BITMASK(INODE_STR_HASH, struct bch_inode, bi_flags, 20, 24); 154 - LE32_BITMASK(INODE_NR_FIELDS, struct bch_inode, bi_flags, 24, 31); 155 - LE32_BITMASK(INODE_NEW_VARINT, struct bch_inode, bi_flags, 31, 32); 153 + LE32_BITMASK(INODEv1_STR_HASH, struct bch_inode, bi_flags, 20, 24); 154 + LE32_BITMASK(INODEv1_NR_FIELDS, struct bch_inode, bi_flags, 24, 31); 155 + LE32_BITMASK(INODEv1_NEW_VARINT,struct bch_inode, bi_flags, 31, 32); 156 156 157 157 LE64_BITMASK(INODEv2_STR_HASH, struct bch_inode_v2, bi_flags, 20, 24); 158 158 LE64_BITMASK(INODEv2_NR_FIELDS, struct bch_inode_v2, bi_flags, 24, 31);
+1 -1
fs/bcachefs/io_misc.c
··· 377 377 * check for missing subvolume before fpunch, as in resume we don't want 378 378 * it to be a fatal error 379 379 */ 380 - ret = __bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot, warn_errors); 380 + ret = lockrestart_do(trans, __bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot, warn_errors)); 381 381 if (ret) 382 382 return ret; 383 383
+4 -4
fs/bcachefs/io_read.c
··· 409 409 bch2_trans_begin(trans); 410 410 rbio->bio.bi_status = 0; 411 411 412 - k = bch2_btree_iter_peek_slot(&iter); 413 - if (bkey_err(k)) 412 + ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter))); 413 + if (ret) 414 414 goto err; 415 415 416 416 bch2_bkey_buf_reassemble(&sk, c, k); ··· 557 557 558 558 static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio) 559 559 { 560 - bch2_trans_do(rbio->c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 561 - __bch2_rbio_narrow_crcs(trans, rbio)); 560 + bch2_trans_commit_do(rbio->c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 561 + __bch2_rbio_narrow_crcs(trans, rbio)); 562 562 } 563 563 564 564 /* Inner part that may run in process context */
+2 -2
fs/bcachefs/io_write.c
··· 1437 1437 * freeing up space on specific disks, which means that 1438 1438 * allocations for specific disks may hang arbitrarily long: 1439 1439 */ 1440 - ret = bch2_trans_do(c, NULL, NULL, 0, 1440 + ret = bch2_trans_run(c, lockrestart_do(trans, 1441 1441 bch2_alloc_sectors_start_trans(trans, 1442 1442 op->target, 1443 1443 op->opts.erasure_code && !(op->flags & BCH_WRITE_CACHED), ··· 1447 1447 op->nr_replicas_required, 1448 1448 op->watermark, 1449 1449 op->flags, 1450 - &op->cl, &wp)); 1450 + &op->cl, &wp))); 1451 1451 if (unlikely(ret)) { 1452 1452 if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) 1453 1453 break;
+6 -4
fs/bcachefs/journal.c
··· 758 758 return ret; 759 759 } 760 760 761 - int bch2_journal_flush_seq(struct journal *j, u64 seq) 761 + int bch2_journal_flush_seq(struct journal *j, u64 seq, unsigned task_state) 762 762 { 763 763 u64 start_time = local_clock(); 764 764 int ret, ret2; ··· 769 769 if (seq <= j->flushed_seq_ondisk) 770 770 return 0; 771 771 772 - ret = wait_event_interruptible(j->wait, (ret2 = bch2_journal_flush_seq_async(j, seq, NULL))); 772 + ret = wait_event_state(j->wait, 773 + (ret2 = bch2_journal_flush_seq_async(j, seq, NULL)), 774 + task_state); 773 775 774 776 if (!ret) 775 777 bch2_time_stats_update(j->flush_seq_time, start_time); ··· 790 788 791 789 int bch2_journal_flush(struct journal *j) 792 790 { 793 - return bch2_journal_flush_seq(j, atomic64_read(&j->seq)); 791 + return bch2_journal_flush_seq(j, atomic64_read(&j->seq), TASK_UNINTERRUPTIBLE); 794 792 } 795 793 796 794 /* ··· 853 851 854 852 bch2_journal_res_put(j, &res); 855 853 856 - return bch2_journal_flush_seq(j, res.seq); 854 + return bch2_journal_flush_seq(j, res.seq, TASK_UNINTERRUPTIBLE); 857 855 } 858 856 859 857 /* block/unlock the journal: */
+1 -1
fs/bcachefs/journal.h
··· 401 401 int bch2_journal_flush_seq_async(struct journal *, u64, struct closure *); 402 402 void bch2_journal_flush_async(struct journal *, struct closure *); 403 403 404 - int bch2_journal_flush_seq(struct journal *, u64); 404 + int bch2_journal_flush_seq(struct journal *, u64, unsigned); 405 405 int bch2_journal_flush(struct journal *); 406 406 bool bch2_journal_noflush_seq(struct journal *, u64); 407 407 int bch2_journal_meta(struct journal *);
+5 -1
fs/bcachefs/opts.c
··· 63 63 NULL 64 64 }; 65 65 66 - const char * const bch2_str_hash_types[] = { 66 + const char * const __bch2_str_hash_types[] = { 67 67 BCH_STR_HASH_TYPES() 68 68 NULL 69 69 }; ··· 115 115 PRT_STR_OPT_BOUNDSCHECKED(data_type, enum bch_data_type); 116 116 PRT_STR_OPT_BOUNDSCHECKED(csum_type, enum bch_csum_type); 117 117 PRT_STR_OPT_BOUNDSCHECKED(compression_type, enum bch_compression_type); 118 + PRT_STR_OPT_BOUNDSCHECKED(str_hash_type, enum bch_str_hash_type); 118 119 119 120 static int bch2_opt_fix_errors_parse(struct bch_fs *c, const char *val, u64 *res, 120 121 struct printbuf *err) ··· 597 596 copied_opts_start = copied_opts; 598 597 599 598 while ((opt = strsep(&copied_opts, ",")) != NULL) { 599 + if (!*opt) 600 + continue; 601 + 600 602 name = strsep(&opt, "="); 601 603 val = opt; 602 604
+2 -1
fs/bcachefs/opts.h
··· 18 18 extern const char * const __bch2_btree_ids[]; 19 19 extern const char * const bch2_csum_opts[]; 20 20 extern const char * const bch2_compression_opts[]; 21 - extern const char * const bch2_str_hash_types[]; 21 + extern const char * const __bch2_str_hash_types[]; 22 22 extern const char * const bch2_str_hash_opts[]; 23 23 extern const char * const __bch2_data_types[]; 24 24 extern const char * const bch2_member_states[]; ··· 29 29 void bch2_prt_data_type(struct printbuf *, enum bch_data_type); 30 30 void bch2_prt_csum_type(struct printbuf *, enum bch_csum_type); 31 31 void bch2_prt_compression_type(struct printbuf *, enum bch_compression_type); 32 + void bch2_prt_str_hash_type(struct printbuf *, enum bch_str_hash_type); 32 33 33 34 static inline const char *bch2_d_type_str(unsigned d_type) 34 35 {
+1 -1
fs/bcachefs/quota.c
··· 869 869 bkey_quota_init(&new_quota.k_i); 870 870 new_quota.k.p = POS(qid.type, from_kqid(&init_user_ns, qid)); 871 871 872 - ret = bch2_trans_do(c, NULL, NULL, 0, 872 + ret = bch2_trans_commit_do(c, NULL, NULL, 0, 873 873 bch2_set_quota_trans(trans, &new_quota, qdq)) ?: 874 874 __bch2_quota_set(c, bkey_i_to_s_c(&new_quota.k_i), qdq); 875 875
+3 -1
fs/bcachefs/rebalance.c
··· 70 70 71 71 int bch2_set_rebalance_needs_scan(struct bch_fs *c, u64 inum) 72 72 { 73 - int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw, 73 + int ret = bch2_trans_commit_do(c, NULL, NULL, 74 + BCH_TRANS_COMMIT_no_enospc| 75 + BCH_TRANS_COMMIT_lazy_rw, 74 76 __bch2_set_rebalance_needs_scan(trans, inum)); 75 77 rebalance_wakeup(c); 76 78 return ret;
+1 -1
fs/bcachefs/recovery.c
··· 1091 1091 1092 1092 bch2_inode_init_early(c, &lostfound_inode); 1093 1093 1094 - ret = bch2_trans_do(c, NULL, NULL, 0, 1094 + ret = bch2_trans_commit_do(c, NULL, NULL, 0, 1095 1095 bch2_create_trans(trans, 1096 1096 BCACHEFS_ROOT_SUBVOL_INUM, 1097 1097 &root_inode, &lostfound_inode,
+2 -2
fs/bcachefs/sb-errors_format.h
··· 267 267 x(journal_entry_dup_same_device, 246, 0) \ 268 268 x(inode_bi_subvol_missing, 247, 0) \ 269 269 x(inode_bi_subvol_wrong, 248, 0) \ 270 - x(inode_points_to_missing_dirent, 249, 0) \ 271 - x(inode_points_to_wrong_dirent, 250, 0) \ 270 + x(inode_points_to_missing_dirent, 249, FSCK_AUTOFIX) \ 271 + x(inode_points_to_wrong_dirent, 250, FSCK_AUTOFIX) \ 272 272 x(inode_bi_parent_nonzero, 251, 0) \ 273 273 x(dirent_to_missing_parent_subvol, 252, 0) \ 274 274 x(dirent_not_visible_in_parent_subvol, 253, 0) \
+42 -18
fs/bcachefs/str_hash.h
··· 46 46 { 47 47 /* XXX ick */ 48 48 struct bch_hash_info info = { 49 - .type = (bi->bi_flags >> INODE_STR_HASH_OFFSET) & 50 - ~(~0U << INODE_STR_HASH_BITS), 49 + .type = INODE_STR_HASH(bi), 51 50 .siphash_key = { .k0 = bi->bi_hash_seed } 52 51 }; 53 52 ··· 252 253 } 253 254 254 255 static __always_inline 255 - int bch2_hash_set_in_snapshot(struct btree_trans *trans, 256 + struct bkey_s_c bch2_hash_set_or_get_in_snapshot(struct btree_trans *trans, 257 + struct btree_iter *iter, 256 258 const struct bch_hash_desc desc, 257 259 const struct bch_hash_info *info, 258 260 subvol_inum inum, u32 snapshot, 259 261 struct bkey_i *insert, 260 262 enum btree_iter_update_trigger_flags flags) 261 263 { 262 - struct btree_iter iter, slot = { NULL }; 264 + struct btree_iter slot = {}; 263 265 struct bkey_s_c k; 264 266 bool found = false; 265 267 int ret; 266 268 267 - for_each_btree_key_upto_norestart(trans, iter, desc.btree_id, 269 + for_each_btree_key_upto_norestart(trans, *iter, desc.btree_id, 268 270 SPOS(insert->k.p.inode, 269 271 desc.hash_bkey(info, bkey_i_to_s_c(insert)), 270 272 snapshot), ··· 280 280 } 281 281 282 282 if (!slot.path && !(flags & STR_HASH_must_replace)) 283 - bch2_trans_copy_iter(&slot, &iter); 283 + bch2_trans_copy_iter(&slot, iter); 284 284 285 285 if (k.k->type != KEY_TYPE_hash_whiteout) 286 286 goto not_found; ··· 290 290 ret = -BCH_ERR_ENOSPC_str_hash_create; 291 291 out: 292 292 bch2_trans_iter_exit(trans, &slot); 293 - bch2_trans_iter_exit(trans, &iter); 294 - 295 - return ret; 293 + bch2_trans_iter_exit(trans, iter); 294 + return ret ? bkey_s_c_err(ret) : bkey_s_c_null; 296 295 found: 297 296 found = true; 298 297 not_found: 299 - 300 - if (!found && (flags & STR_HASH_must_replace)) { 298 + if (found && (flags & STR_HASH_must_create)) { 299 + bch2_trans_iter_exit(trans, &slot); 300 + return k; 301 + } else if (!found && (flags & STR_HASH_must_replace)) { 301 302 ret = -BCH_ERR_ENOENT_str_hash_set_must_replace; 302 - } else if (found && (flags & STR_HASH_must_create)) { 303 - ret = -BCH_ERR_EEXIST_str_hash_set; 304 303 } else { 305 304 if (!found && slot.path) 306 - swap(iter, slot); 305 + swap(*iter, slot); 307 306 308 - insert->k.p = iter.pos; 309 - ret = bch2_trans_update(trans, &iter, insert, flags); 307 + insert->k.p = iter->pos; 308 + ret = bch2_trans_update(trans, iter, insert, flags); 310 309 } 311 310 312 311 goto out; 312 + } 313 + 314 + static __always_inline 315 + int bch2_hash_set_in_snapshot(struct btree_trans *trans, 316 + const struct bch_hash_desc desc, 317 + const struct bch_hash_info *info, 318 + subvol_inum inum, u32 snapshot, 319 + struct bkey_i *insert, 320 + enum btree_iter_update_trigger_flags flags) 321 + { 322 + struct btree_iter iter; 323 + struct bkey_s_c k = bch2_hash_set_or_get_in_snapshot(trans, &iter, desc, info, inum, 324 + snapshot, insert, flags); 325 + int ret = bkey_err(k); 326 + if (ret) 327 + return ret; 328 + if (k.k) { 329 + bch2_trans_iter_exit(trans, &iter); 330 + return -BCH_ERR_EEXIST_str_hash_set; 331 + } 332 + 333 + return 0; 313 334 } 314 335 315 336 static __always_inline ··· 384 363 struct btree_iter iter; 385 364 struct bkey_s_c k = bch2_hash_lookup(trans, &iter, desc, info, inum, key, 386 365 BTREE_ITER_intent); 387 - int ret = bkey_err(k) ?: 388 - bch2_hash_delete_at(trans, desc, info, &iter, 0); 366 + int ret = bkey_err(k); 367 + if (ret) 368 + return ret; 369 + 370 + ret = bch2_hash_delete_at(trans, desc, info, &iter, 0); 389 371 bch2_trans_iter_exit(trans, &iter); 390 372 return ret; 391 373 }
+3 -4
fs/bcachefs/subvolume.c
··· 319 319 320 320 int bch2_subvol_is_ro(struct bch_fs *c, u32 subvol) 321 321 { 322 - return bch2_trans_do(c, NULL, NULL, 0, 323 - bch2_subvol_is_ro_trans(trans, subvol)); 322 + return bch2_trans_do(c, bch2_subvol_is_ro_trans(trans, subvol)); 324 323 } 325 324 326 325 int bch2_snapshot_get_subvol(struct btree_trans *trans, u32 snapshot, ··· 675 676 /* set bi_subvol on root inode */ 676 677 int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c) 677 678 { 678 - int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_lazy_rw, 679 - __bch2_fs_upgrade_for_subvolumes(trans)); 679 + int ret = bch2_trans_commit_do(c, NULL, NULL, BCH_TRANS_COMMIT_lazy_rw, 680 + __bch2_fs_upgrade_for_subvolumes(trans)); 680 681 bch_err_fn(c, ret); 681 682 return ret; 682 683 }
+1 -1
fs/bcachefs/super.c
··· 1972 1972 }; 1973 1973 u64 v[3] = { nbuckets - old_nbuckets, 0, 0 }; 1974 1974 1975 - ret = bch2_trans_do(ca->fs, NULL, NULL, 0, 1975 + ret = bch2_trans_commit_do(ca->fs, NULL, NULL, 0, 1976 1976 bch2_disk_accounting_mod(trans, &acc, v, ARRAY_SIZE(v), false)) ?: 1977 1977 bch2_dev_freespace_init(c, ca, old_nbuckets, nbuckets); 1978 1978 if (ret)
+2 -2
fs/bcachefs/tests.c
··· 450 450 k.k_i.k.p.snapshot = snapid; 451 451 k.k_i.k.size = len; 452 452 453 - ret = bch2_trans_do(c, NULL, NULL, 0, 453 + ret = bch2_trans_commit_do(c, NULL, NULL, 0, 454 454 bch2_btree_insert_nonextent(trans, BTREE_ID_extents, &k.k_i, 455 455 BTREE_UPDATE_internal_snapshot_node)); 456 456 bch_err_fn(c, ret); ··· 510 510 if (ret) 511 511 return ret; 512 512 513 - ret = bch2_trans_do(c, NULL, NULL, 0, 513 + ret = bch2_trans_commit_do(c, NULL, NULL, 0, 514 514 bch2_snapshot_node_create(trans, U32_MAX, 515 515 snapids, 516 516 snapid_subvols,
+1 -1
fs/bcachefs/xattr.c
··· 330 330 { 331 331 struct bch_inode_info *inode = to_bch_ei(vinode); 332 332 struct bch_fs *c = inode->v.i_sb->s_fs_info; 333 - int ret = bch2_trans_do(c, NULL, NULL, 0, 333 + int ret = bch2_trans_do(c, 334 334 bch2_xattr_get_trans(trans, inode, name, buffer, size, handler->flags)); 335 335 336 336 if (ret < 0 && bch2_err_matches(ret, ENOENT))