Merge tag 'for-6.19-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

+1

fs/btrfs/accessors.h

··· 12 12 #include <linux/string.h> 13 13 #include <linux/mm.h> 14 14 #include <uapi/linux/btrfs_tree.h> 15 + #include "fs.h" 15 16 #include "extent_io.h" 16 17 17 18 struct extent_buffer;

+10 -15

fs/btrfs/acl.c

··· 14 14 #include "ctree.h" 15 15 #include "xattr.h" 16 16 #include "acl.h" 17 + #include "misc.h" 17 18 18 19 struct posix_acl *btrfs_get_acl(struct inode *inode, int type, bool rcu) 19 20 { 20 21 int size; 21 22 const char *name; 22 - char *value = NULL; 23 + char AUTO_KFREE(value); 23 24 struct posix_acl *acl; 24 25 25 26 if (rcu) ··· 50 49 acl = NULL; 51 50 else 52 51 acl = ERR_PTR(size); 53 - kfree(value); 54 52 55 53 return acl; 56 54 } ··· 59 59 { 60 60 int ret, size = 0; 61 61 const char *name; 62 - char *value = NULL; 62 + char AUTO_KFREE(value); 63 63 64 64 switch (type) { 65 65 case ACL_TYPE_ACCESS: ··· 85 85 nofs_flag = memalloc_nofs_save(); 86 86 value = kmalloc(size, GFP_KERNEL); 87 87 memalloc_nofs_restore(nofs_flag); 88 - if (!value) { 89 - ret = -ENOMEM; 90 - goto out; 91 - } 88 + if (!value) 89 + return -ENOMEM; 92 90 93 91 ret = posix_acl_to_xattr(&init_user_ns, acl, value, size); 94 92 if (ret < 0) 95 - goto out; 93 + return ret; 96 94 } 97 95 98 96 if (trans) 99 97 ret = btrfs_setxattr(trans, inode, name, value, size, 0); 100 98 else 101 99 ret = btrfs_setxattr_trans(inode, name, value, size, 0); 100 + if (ret < 0) 101 + return ret; 102 102 103 - out: 104 - kfree(value); 105 - 106 - if (!ret) 107 - set_cached_acl(inode, type, acl); 108 - 109 - return ret; 103 + set_cached_acl(inode, type, acl); 104 + return 0; 110 105 } 111 106 112 107 int btrfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,

+14 -23

fs/btrfs/backref.c

··· 666 666 ret = btrfs_search_old_slot(root, &search_key, path, ctx->time_seq); 667 667 668 668 btrfs_debug(ctx->fs_info, 669 - "search slot in root %llu (level %d, ref count %d) returned %d for key (%llu %u %llu)", 670 - ref->root_id, level, ref->count, ret, 671 - ref->key_for_search.objectid, ref->key_for_search.type, 672 - ref->key_for_search.offset); 669 + "search slot in root %llu (level %d, ref count %d) returned %d for key " BTRFS_KEY_FMT, 670 + ref->root_id, level, ref->count, ret, 671 + BTRFS_KEY_FMT_VALUE(&ref->key_for_search)); 673 672 if (ret < 0) 674 673 goto out; 675 674 ··· 1408 1409 if (!path) 1409 1410 return -ENOMEM; 1410 1411 if (!ctx->trans) { 1411 - path->search_commit_root = 1; 1412 - path->skip_locking = 1; 1412 + path->search_commit_root = true; 1413 + path->skip_locking = true; 1413 1414 } 1414 1415 1415 1416 if (ctx->time_seq == BTRFS_SEQ_LAST) 1416 - path->skip_locking = 1; 1417 + path->skip_locking = true; 1417 1418 1418 1419 again: 1419 1420 head = NULL; ··· 1560 1561 1561 1562 btrfs_release_path(path); 1562 1563 1563 - ret = add_missing_keys(ctx->fs_info, &preftrees, path->skip_locking == 0); 1564 + ret = add_missing_keys(ctx->fs_info, &preftrees, !path->skip_locking); 1564 1565 if (ret) 1565 1566 goto out; 1566 1567 ··· 2785 2786 * allocates space to return multiple file system paths for an inode. 2786 2787 * total_bytes to allocate are passed, note that space usable for actual path 2787 2788 * information will be total_bytes - sizeof(struct inode_fs_paths). 2788 - * the returned pointer must be freed with free_ipath() in the end. 2789 + * the returned pointer must be freed with __free_inode_fs_paths() in the end. 2789 2790 */ 2790 2791 struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, 2791 2792 struct btrfs_path *path) ··· 2810 2811 return ifp; 2811 2812 } 2812 2813 2813 - void free_ipath(struct inode_fs_paths *ipath) 2814 - { 2815 - if (!ipath) 2816 - return; 2817 - kvfree(ipath->fspath); 2818 - kfree(ipath); 2819 - } 2820 - 2821 2814 struct btrfs_backref_iter *btrfs_backref_iter_alloc(struct btrfs_fs_info *fs_info) 2822 2815 { 2823 2816 struct btrfs_backref_iter *ret; ··· 2825 2834 } 2826 2835 2827 2836 /* Current backref iterator only supports iteration in commit root */ 2828 - ret->path->search_commit_root = 1; 2829 - ret->path->skip_locking = 1; 2837 + ret->path->search_commit_root = true; 2838 + ret->path->skip_locking = true; 2830 2839 ret->fs_info = fs_info; 2831 2840 2832 2841 return ret; ··· 3299 3308 level = cur->level + 1; 3300 3309 3301 3310 /* Search the tree to find parent blocks referring to the block */ 3302 - path->search_commit_root = 1; 3303 - path->skip_locking = 1; 3311 + path->search_commit_root = true; 3312 + path->skip_locking = true; 3304 3313 path->lowest_level = level; 3305 3314 ret = btrfs_search_slot(NULL, root, tree_key, path, 0, 0); 3306 3315 path->lowest_level = 0; ··· 3314 3323 eb = path->nodes[level]; 3315 3324 if (btrfs_node_blockptr(eb, path->slots[level]) != cur->bytenr) { 3316 3325 btrfs_err(fs_info, 3317 - "couldn't find block (%llu) (level %d) in tree (%llu) with key (%llu %u %llu)", 3326 + "couldn't find block (%llu) (level %d) in tree (%llu) with key " BTRFS_KEY_FMT, 3318 3327 cur->bytenr, level - 1, btrfs_root_id(root), 3319 - tree_key->objectid, tree_key->type, tree_key->offset); 3328 + BTRFS_KEY_FMT_VALUE(tree_key)); 3320 3329 btrfs_put_root(root); 3321 3330 ret = -ENOENT; 3322 3331 goto out;

+6 -1

fs/btrfs/backref.h

··· 241 241 struct btrfs_data_container *init_data_container(u32 total_bytes); 242 242 struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, 243 243 struct btrfs_path *path); 244 - void free_ipath(struct inode_fs_paths *ipath); 244 + 245 + DEFINE_FREE(inode_fs_paths, struct inode_fs_paths *, 246 + if (_T) { 247 + kvfree(_T->fspath); 248 + kfree(_T); 249 + }) 245 250 246 251 int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, 247 252 u64 start_off, struct btrfs_path *path,

+202 -88

fs/btrfs/bio.c

··· 41 41 * Initialize a btrfs_bio structure. This skips the embedded bio itself as it 42 42 * is already initialized by the block layer. 43 43 */ 44 - void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_fs_info *fs_info, 44 + void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_inode *inode, u64 file_offset, 45 45 btrfs_bio_end_io_t end_io, void *private) 46 46 { 47 + /* @inode parameter is mandatory. */ 48 + ASSERT(inode); 49 + 47 50 memset(bbio, 0, offsetof(struct btrfs_bio, bio)); 48 - bbio->fs_info = fs_info; 51 + bbio->inode = inode; 49 52 bbio->end_io = end_io; 50 53 bbio->private = private; 54 + bbio->file_offset = file_offset; 51 55 atomic_set(&bbio->pending_ios, 1); 52 56 WRITE_ONCE(bbio->status, BLK_STS_OK); 53 57 } ··· 64 60 * a mempool. 65 61 */ 66 62 struct btrfs_bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf, 67 - struct btrfs_fs_info *fs_info, 63 + struct btrfs_inode *inode, u64 file_offset, 68 64 btrfs_bio_end_io_t end_io, void *private) 69 65 { 70 66 struct btrfs_bio *bbio; ··· 72 68 73 69 bio = bio_alloc_bioset(NULL, nr_vecs, opf, GFP_NOFS, &btrfs_bioset); 74 70 bbio = btrfs_bio(bio); 75 - btrfs_bio_init(bbio, fs_info, end_io, private); 71 + btrfs_bio_init(bbio, inode, file_offset, end_io, private); 76 72 return bbio; 77 73 } 78 74 ··· 89 85 return ERR_CAST(bio); 90 86 91 87 bbio = btrfs_bio(bio); 92 - btrfs_bio_init(bbio, fs_info, NULL, orig_bbio); 93 - bbio->inode = orig_bbio->inode; 94 - bbio->file_offset = orig_bbio->file_offset; 88 + btrfs_bio_init(bbio, orig_bbio->inode, orig_bbio->file_offset, NULL, orig_bbio); 95 89 orig_bbio->file_offset += map_length; 96 90 if (bbio_has_ordered_extent(bbio)) { 97 91 refcount_inc(&orig_bbio->ordered->refs); 98 92 bbio->ordered = orig_bbio->ordered; 93 + bbio->orig_logical = orig_bbio->orig_logical; 94 + orig_bbio->orig_logical += map_length; 99 95 } 100 96 bbio->csum_search_commit_root = orig_bbio->csum_search_commit_root; 101 97 atomic_inc(&orig_bbio->pending_ios); ··· 104 100 105 101 void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status) 106 102 { 103 + /* Make sure we're already in task context. */ 104 + ASSERT(in_task()); 105 + 106 + if (bbio->async_csum) 107 + wait_for_completion(&bbio->csum_done); 108 + 107 109 bbio->bio.bi_status = status; 108 110 if (bbio->bio.bi_pool == &btrfs_clone_bioset) { 109 111 struct btrfs_bio *orig_bbio = bbio->private; ··· 173 163 struct btrfs_failed_bio *fbio = repair_bbio->private; 174 164 struct btrfs_inode *inode = repair_bbio->inode; 175 165 struct btrfs_fs_info *fs_info = inode->root->fs_info; 176 - struct bio_vec *bv = bio_first_bvec_all(&repair_bbio->bio); 166 + /* 167 + * We can not move forward the saved_iter, as it will be later 168 + * utilized by repair_bbio again. 169 + */ 170 + struct bvec_iter saved_iter = repair_bbio->saved_iter; 171 + const u32 step = min(fs_info->sectorsize, PAGE_SIZE); 172 + const u64 logical = repair_bbio->saved_iter.bi_sector << SECTOR_SHIFT; 173 + const u32 nr_steps = repair_bbio->saved_iter.bi_size / step; 177 174 int mirror = repair_bbio->mirror_num; 175 + phys_addr_t paddrs[BTRFS_MAX_BLOCKSIZE / PAGE_SIZE]; 176 + phys_addr_t paddr; 177 + unsigned int slot = 0; 178 + 179 + /* Repair bbio should be eaxctly one block sized. */ 180 + ASSERT(repair_bbio->saved_iter.bi_size == fs_info->sectorsize); 181 + 182 + btrfs_bio_for_each_block(paddr, &repair_bbio->bio, &saved_iter, step) { 183 + ASSERT(slot < nr_steps); 184 + paddrs[slot] = paddr; 185 + slot++; 186 + } 178 187 179 188 if (repair_bbio->bio.bi_status || 180 - !btrfs_data_csum_ok(repair_bbio, dev, 0, bvec_phys(bv))) { 189 + !btrfs_data_csum_ok(repair_bbio, dev, 0, paddrs)) { 181 190 bio_reset(&repair_bbio->bio, NULL, REQ_OP_READ); 182 191 repair_bbio->bio.bi_iter = repair_bbio->saved_iter; 183 192 ··· 215 186 mirror = prev_repair_mirror(fbio, mirror); 216 187 btrfs_repair_io_failure(fs_info, btrfs_ino(inode), 217 188 repair_bbio->file_offset, fs_info->sectorsize, 218 - repair_bbio->saved_iter.bi_sector << SECTOR_SHIFT, 219 - bvec_phys(bv), mirror); 189 + logical, paddrs, step, mirror); 220 190 } while (mirror != fbio->bbio->mirror_num); 221 191 222 192 done: ··· 232 204 */ 233 205 static struct btrfs_failed_bio *repair_one_sector(struct btrfs_bio *failed_bbio, 234 206 u32 bio_offset, 235 - phys_addr_t paddr, 207 + phys_addr_t paddrs[], 236 208 struct btrfs_failed_bio *fbio) 237 209 { 238 210 struct btrfs_inode *inode = failed_bbio->inode; 239 211 struct btrfs_fs_info *fs_info = inode->root->fs_info; 240 - struct folio *folio = page_folio(phys_to_page(paddr)); 241 212 const u32 sectorsize = fs_info->sectorsize; 242 - const u32 foff = offset_in_folio(folio, paddr); 243 - const u64 logical = (failed_bbio->saved_iter.bi_sector << SECTOR_SHIFT); 213 + const u32 step = min(fs_info->sectorsize, PAGE_SIZE); 214 + const u32 nr_steps = sectorsize / step; 215 + /* 216 + * For bs > ps cases, the saved_iter can be partially moved forward. 217 + * In that case we should round it down to the block boundary. 218 + */ 219 + const u64 logical = round_down(failed_bbio->saved_iter.bi_sector << SECTOR_SHIFT, 220 + sectorsize); 244 221 struct btrfs_bio *repair_bbio; 245 222 struct bio *repair_bio; 246 223 int num_copies; 247 224 int mirror; 248 225 249 - ASSERT(foff + sectorsize <= folio_size(folio)); 250 226 btrfs_debug(fs_info, "repair read error: read error at %llu", 251 227 failed_bbio->file_offset + bio_offset); 252 228 ··· 270 238 271 239 atomic_inc(&fbio->repair_count); 272 240 273 - repair_bio = bio_alloc_bioset(NULL, 1, REQ_OP_READ, GFP_NOFS, 241 + repair_bio = bio_alloc_bioset(NULL, nr_steps, REQ_OP_READ, GFP_NOFS, 274 242 &btrfs_repair_bioset); 275 - repair_bio->bi_iter.bi_sector = failed_bbio->saved_iter.bi_sector; 276 - bio_add_folio_nofail(repair_bio, folio, sectorsize, foff); 243 + repair_bio->bi_iter.bi_sector = logical >> SECTOR_SHIFT; 244 + for (int i = 0; i < nr_steps; i++) { 245 + int ret; 246 + 247 + ASSERT(offset_in_page(paddrs[i]) + step <= PAGE_SIZE); 248 + 249 + ret = bio_add_page(repair_bio, phys_to_page(paddrs[i]), step, 250 + offset_in_page(paddrs[i])); 251 + ASSERT(ret == step); 252 + } 277 253 278 254 repair_bbio = btrfs_bio(repair_bio); 279 - btrfs_bio_init(repair_bbio, fs_info, NULL, fbio); 280 - repair_bbio->inode = failed_bbio->inode; 281 - repair_bbio->file_offset = failed_bbio->file_offset + bio_offset; 255 + btrfs_bio_init(repair_bbio, failed_bbio->inode, failed_bbio->file_offset + bio_offset, 256 + NULL, fbio); 282 257 283 258 mirror = next_repair_mirror(fbio, failed_bbio->mirror_num); 284 259 btrfs_debug(fs_info, "submitting repair read to mirror %d", mirror); ··· 297 258 { 298 259 struct btrfs_inode *inode = bbio->inode; 299 260 struct btrfs_fs_info *fs_info = inode->root->fs_info; 300 - u32 sectorsize = fs_info->sectorsize; 261 + const u32 sectorsize = fs_info->sectorsize; 262 + const u32 step = min(sectorsize, PAGE_SIZE); 263 + const u32 nr_steps = sectorsize / step; 301 264 struct bvec_iter *iter = &bbio->saved_iter; 302 265 blk_status_t status = bbio->bio.bi_status; 303 266 struct btrfs_failed_bio *fbio = NULL; 267 + phys_addr_t paddrs[BTRFS_MAX_BLOCKSIZE / PAGE_SIZE]; 304 268 phys_addr_t paddr; 305 269 u32 offset = 0; 306 270 ··· 322 280 /* Clear the I/O error. A failed repair will reset it. */ 323 281 bbio->bio.bi_status = BLK_STS_OK; 324 282 325 - btrfs_bio_for_each_block(paddr, &bbio->bio, iter, fs_info->sectorsize) { 326 - if (status || !btrfs_data_csum_ok(bbio, dev, offset, paddr)) 327 - fbio = repair_one_sector(bbio, offset, paddr, fbio); 328 - offset += sectorsize; 283 + btrfs_bio_for_each_block(paddr, &bbio->bio, iter, step) { 284 + paddrs[(offset / step) % nr_steps] = paddr; 285 + offset += step; 286 + 287 + if (IS_ALIGNED(offset, sectorsize)) { 288 + if (status || 289 + !btrfs_data_csum_ok(bbio, dev, offset - sectorsize, paddrs)) 290 + fbio = repair_one_sector(bbio, offset - sectorsize, 291 + paddrs, fbio); 292 + } 329 293 } 330 294 if (bbio->csum != bbio->csum_inline) 331 - kfree(bbio->csum); 295 + kvfree(bbio->csum); 332 296 333 297 if (fbio) 334 298 btrfs_repair_done(fbio); ··· 365 317 return fs_info->endio_workers; 366 318 } 367 319 368 - static void btrfs_end_bio_work(struct work_struct *work) 320 + static void simple_end_io_work(struct work_struct *work) 369 321 { 370 322 struct btrfs_bio *bbio = container_of(work, struct btrfs_bio, end_io_work); 323 + struct bio *bio = &bbio->bio; 371 324 372 - /* Metadata reads are checked and repaired by the submitter. */ 373 - if (is_data_bbio(bbio)) 374 - btrfs_check_read_bio(bbio, bbio->bio.bi_private); 375 - else 376 - btrfs_bio_end_io(bbio, bbio->bio.bi_status); 325 + if (bio_op(bio) == REQ_OP_READ) { 326 + /* Metadata reads are checked and repaired by the submitter. */ 327 + if (is_data_bbio(bbio)) 328 + return btrfs_check_read_bio(bbio, bbio->bio.bi_private); 329 + return btrfs_bio_end_io(bbio, bbio->bio.bi_status); 330 + } 331 + if (bio_is_zone_append(bio) && !bio->bi_status) 332 + btrfs_record_physical_zoned(bbio); 333 + btrfs_bio_end_io(bbio, bbio->bio.bi_status); 377 334 } 378 335 379 336 static void btrfs_simple_end_io(struct bio *bio) 380 337 { 381 338 struct btrfs_bio *bbio = btrfs_bio(bio); 382 339 struct btrfs_device *dev = bio->bi_private; 383 - struct btrfs_fs_info *fs_info = bbio->fs_info; 340 + struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info; 384 341 385 342 btrfs_bio_counter_dec(fs_info); 386 343 387 344 if (bio->bi_status) 388 345 btrfs_log_dev_io_error(bio, dev); 389 346 390 - if (bio_op(bio) == REQ_OP_READ) { 391 - INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work); 392 - queue_work(btrfs_end_io_wq(fs_info, bio), &bbio->end_io_work); 393 - } else { 394 - if (bio_is_zone_append(bio) && !bio->bi_status) 395 - btrfs_record_physical_zoned(bbio); 396 - btrfs_bio_end_io(bbio, bbio->bio.bi_status); 397 - } 347 + INIT_WORK(&bbio->end_io_work, simple_end_io_work); 348 + queue_work(btrfs_end_io_wq(fs_info, bio), &bbio->end_io_work); 398 349 } 399 350 400 351 static void btrfs_raid56_end_io(struct bio *bio) 401 352 { 402 353 struct btrfs_io_context *bioc = bio->bi_private; 403 354 struct btrfs_bio *bbio = btrfs_bio(bio); 355 + 356 + /* RAID56 endio is always handled in workqueue. */ 357 + ASSERT(in_task()); 404 358 405 359 btrfs_bio_counter_dec(bioc->fs_info); 406 360 bbio->mirror_num = bioc->mirror_num; ··· 414 364 btrfs_put_bioc(bioc); 415 365 } 416 366 417 - static void btrfs_orig_write_end_io(struct bio *bio) 367 + static void orig_write_end_io_work(struct work_struct *work) 418 368 { 369 + struct btrfs_bio *bbio = container_of(work, struct btrfs_bio, end_io_work); 370 + struct bio *bio = &bbio->bio; 419 371 struct btrfs_io_stripe *stripe = bio->bi_private; 420 372 struct btrfs_io_context *bioc = stripe->bioc; 421 - struct btrfs_bio *bbio = btrfs_bio(bio); 422 373 423 374 btrfs_bio_counter_dec(bioc->fs_info); 424 375 ··· 444 393 btrfs_put_bioc(bioc); 445 394 } 446 395 447 - static void btrfs_clone_write_end_io(struct bio *bio) 396 + static void btrfs_orig_write_end_io(struct bio *bio) 448 397 { 398 + struct btrfs_bio *bbio = btrfs_bio(bio); 399 + 400 + INIT_WORK(&bbio->end_io_work, orig_write_end_io_work); 401 + queue_work(btrfs_end_io_wq(bbio->inode->root->fs_info, bio), &bbio->end_io_work); 402 + } 403 + 404 + static void clone_write_end_io_work(struct work_struct *work) 405 + { 406 + struct btrfs_bio *bbio = container_of(work, struct btrfs_bio, end_io_work); 407 + struct bio *bio = &bbio->bio; 449 408 struct btrfs_io_stripe *stripe = bio->bi_private; 450 409 451 410 if (bio->bi_status) { ··· 468 407 /* Pass on control to the original bio this one was cloned from */ 469 408 bio_endio(stripe->bioc->orig_bio); 470 409 bio_put(bio); 410 + } 411 + 412 + static void btrfs_clone_write_end_io(struct bio *bio) 413 + { 414 + struct btrfs_bio *bbio = btrfs_bio(bio); 415 + 416 + INIT_WORK(&bbio->end_io_work, clone_write_end_io_work); 417 + queue_work(btrfs_end_io_wq(bbio->inode->root->fs_info, bio), &bbio->end_io_work); 471 418 } 472 419 473 420 static void btrfs_submit_dev_bio(struct btrfs_device *dev, struct bio *bio) ··· 524 455 static void btrfs_submit_mirrored_bio(struct btrfs_io_context *bioc, int dev_nr) 525 456 { 526 457 struct bio *orig_bio = bioc->orig_bio, *bio; 458 + struct btrfs_bio *orig_bbio = btrfs_bio(orig_bio); 527 459 528 460 ASSERT(bio_op(orig_bio) != REQ_OP_READ); 529 461 ··· 533 463 bio = orig_bio; 534 464 bio->bi_end_io = btrfs_orig_write_end_io; 535 465 } else { 536 - bio = bio_alloc_clone(NULL, orig_bio, GFP_NOFS, &fs_bio_set); 466 + /* We need to use endio_work to run end_io in task context. */ 467 + bio = bio_alloc_clone(NULL, orig_bio, GFP_NOFS, &btrfs_bioset); 537 468 bio_inc_remaining(orig_bio); 469 + btrfs_bio_init(btrfs_bio(bio), orig_bbio->inode, 470 + orig_bbio->file_offset, NULL, NULL); 538 471 bio->bi_end_io = btrfs_clone_write_end_io; 539 472 } 540 473 ··· 582 509 { 583 510 if (bbio->bio.bi_opf & REQ_META) 584 511 return btree_csum_one_bio(bbio); 585 - return btrfs_csum_one_bio(bbio); 512 + #ifdef CONFIG_BTRFS_EXPERIMENTAL 513 + return btrfs_csum_one_bio(bbio, true); 514 + #else 515 + return btrfs_csum_one_bio(bbio, false); 516 + #endif 586 517 } 587 518 588 519 /* ··· 658 581 659 582 static bool should_async_write(struct btrfs_bio *bbio) 660 583 { 584 + struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info; 661 585 bool auto_csum_mode = true; 662 586 663 587 #ifdef CONFIG_BTRFS_EXPERIMENTAL 664 - struct btrfs_fs_devices *fs_devices = bbio->fs_info->fs_devices; 588 + struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; 665 589 enum btrfs_offload_csum_mode csum_mode = READ_ONCE(fs_devices->offload_csum_mode); 666 590 667 - if (csum_mode == BTRFS_OFFLOAD_CSUM_FORCE_OFF) 668 - return false; 669 - 670 - auto_csum_mode = (csum_mode == BTRFS_OFFLOAD_CSUM_AUTO); 591 + if (csum_mode == BTRFS_OFFLOAD_CSUM_FORCE_ON) 592 + return true; 593 + /* 594 + * Write bios will calculate checksum and submit bio at the same time. 595 + * Unless explicitly required don't offload serial csum calculate and bio 596 + * submit into a workqueue. 597 + */ 598 + return false; 671 599 #endif 672 600 673 601 /* Submit synchronously if the checksum implementation is fast. */ 674 - if (auto_csum_mode && test_bit(BTRFS_FS_CSUM_IMPL_FAST, &bbio->fs_info->flags)) 602 + if (auto_csum_mode && test_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags)) 675 603 return false; 676 604 677 605 /* ··· 687 605 return false; 688 606 689 607 /* Zoned devices require I/O to be submitted in order. */ 690 - if ((bbio->bio.bi_opf & REQ_META) && btrfs_is_zoned(bbio->fs_info)) 608 + if ((bbio->bio.bi_opf & REQ_META) && btrfs_is_zoned(fs_info)) 691 609 return false; 692 610 693 611 return true; ··· 702 620 struct btrfs_io_context *bioc, 703 621 struct btrfs_io_stripe *smap, int mirror_num) 704 622 { 705 - struct btrfs_fs_info *fs_info = bbio->fs_info; 623 + struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info; 706 624 struct async_submit_bio *async; 707 625 708 626 async = kmalloc(sizeof(*async), GFP_NOFS); ··· 721 639 722 640 static u64 btrfs_append_map_length(struct btrfs_bio *bbio, u64 map_length) 723 641 { 642 + struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info; 724 643 unsigned int nr_segs; 725 644 int sector_offset; 726 645 727 - map_length = min(map_length, bbio->fs_info->max_zone_append_size); 728 - sector_offset = bio_split_rw_at(&bbio->bio, &bbio->fs_info->limits, 646 + map_length = min(map_length, fs_info->max_zone_append_size); 647 + sector_offset = bio_split_rw_at(&bbio->bio, &fs_info->limits, 729 648 &nr_segs, map_length); 730 649 if (sector_offset) { 731 650 /* ··· 734 651 * sectorsize and thus cause unaligned I/Os. Fix that by 735 652 * always rounding down to the nearest boundary. 736 653 */ 737 - return ALIGN_DOWN(sector_offset << SECTOR_SHIFT, bbio->fs_info->sectorsize); 654 + return ALIGN_DOWN(sector_offset << SECTOR_SHIFT, fs_info->sectorsize); 738 655 } 739 656 return map_length; 740 657 } ··· 742 659 static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num) 743 660 { 744 661 struct btrfs_inode *inode = bbio->inode; 745 - struct btrfs_fs_info *fs_info = bbio->fs_info; 662 + struct btrfs_fs_info *fs_info = inode->root->fs_info; 746 663 struct bio *bio = &bbio->bio; 747 664 u64 logical = bio->bi_iter.bi_sector << SECTOR_SHIFT; 748 665 u64 length = bio->bi_iter.bi_size; ··· 753 670 blk_status_t status; 754 671 int ret; 755 672 756 - if (!bbio->inode || btrfs_is_data_reloc_root(inode->root)) 673 + if (bbio->is_scrub || btrfs_is_data_reloc_root(inode->root)) 757 674 smap.rst_search_commit_root = true; 758 675 else 759 676 smap.rst_search_commit_root = false; ··· 766 683 btrfs_bio_counter_dec(fs_info); 767 684 goto end_bbio; 768 685 } 686 + 687 + /* 688 + * For fscrypt writes we will get the encrypted bio after we've remapped 689 + * our bio to the physical disk location, so we need to save the 690 + * original bytenr so we know what we're checksumming. 691 + */ 692 + if (bio_op(bio) == REQ_OP_WRITE && is_data_bbio(bbio)) 693 + bbio->orig_logical = logical; 769 694 770 695 map_length = min(map_length, length); 771 696 if (use_append) ··· 825 734 * Csum items for reloc roots have already been cloned at this 826 735 * point, so they are handled as part of the no-checksum case. 827 736 */ 828 - if (inode && !(inode->flags & BTRFS_INODE_NODATASUM) && 737 + if (!(inode->flags & BTRFS_INODE_NODATASUM) && 829 738 !test_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state) && 830 739 !btrfs_is_data_reloc_root(inode->root)) { 831 740 if (should_async_write(bbio) && ··· 873 782 static void assert_bbio_alignment(struct btrfs_bio *bbio) 874 783 { 875 784 #ifdef CONFIG_BTRFS_ASSERT 876 - struct btrfs_fs_info *fs_info = bbio->fs_info; 785 + struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info; 877 786 struct bio_vec bvec; 878 787 struct bvec_iter iter; 879 788 const u32 blocksize = fs_info->sectorsize; 789 + const u32 alignment = min(blocksize, PAGE_SIZE); 790 + const u64 logical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT; 791 + const u32 length = bbio->bio.bi_iter.bi_size; 880 792 881 - /* Metadata has no extra bs > ps alignment requirement. */ 882 - if (!is_data_bbio(bbio)) 883 - return; 793 + /* The logical and length should still be aligned to blocksize. */ 794 + ASSERT(IS_ALIGNED(logical, blocksize) && IS_ALIGNED(length, blocksize) && 795 + length != 0, "root=%llu inode=%llu logical=%llu length=%u", 796 + btrfs_root_id(bbio->inode->root), 797 + btrfs_ino(bbio->inode), logical, length); 884 798 885 799 bio_for_each_bvec(bvec, &bbio->bio, iter) 886 - ASSERT(IS_ALIGNED(bvec.bv_offset, blocksize) && 887 - IS_ALIGNED(bvec.bv_len, blocksize), 800 + ASSERT(IS_ALIGNED(bvec.bv_offset, alignment) && 801 + IS_ALIGNED(bvec.bv_len, alignment), 888 802 "root=%llu inode=%llu logical=%llu length=%u index=%u bv_offset=%u bv_len=%u", 889 803 btrfs_root_id(bbio->inode->root), 890 - btrfs_ino(bbio->inode), 891 - bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT, 892 - bbio->bio.bi_iter.bi_size, iter.bi_idx, 893 - bvec.bv_offset, 894 - bvec.bv_len); 804 + btrfs_ino(bbio->inode), logical, length, iter.bi_idx, 805 + bvec.bv_offset, bvec.bv_len); 895 806 #endif 896 807 } 897 808 ··· 917 824 * 918 825 * The I/O is issued synchronously to block the repair read completion from 919 826 * freeing the bio. 827 + * 828 + * @ino: Offending inode number 829 + * @fileoff: File offset inside the inode 830 + * @length: Length of the repair write 831 + * @logical: Logical address of the range 832 + * @paddrs: Physical address array of the content 833 + * @step: Length of for each paddrs 834 + * @mirror_num: Mirror number to write to. Must not be zero 920 835 */ 921 - int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, 922 - u64 length, u64 logical, phys_addr_t paddr, int mirror_num) 836 + int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 fileoff, 837 + u32 length, u64 logical, const phys_addr_t paddrs[], 838 + unsigned int step, int mirror_num) 923 839 { 840 + const u32 nr_steps = DIV_ROUND_UP_POW2(length, step); 924 841 struct btrfs_io_stripe smap = { 0 }; 925 - struct bio_vec bvec; 926 - struct bio bio; 842 + struct bio *bio = NULL; 927 843 int ret = 0; 928 844 929 845 ASSERT(!(fs_info->sb->s_flags & SB_RDONLY)); 930 846 BUG_ON(!mirror_num); 847 + 848 + /* Basic alignment checks. */ 849 + ASSERT(IS_ALIGNED(logical, fs_info->sectorsize)); 850 + ASSERT(IS_ALIGNED(length, fs_info->sectorsize)); 851 + ASSERT(IS_ALIGNED(fileoff, fs_info->sectorsize)); 852 + /* Either it's a single data or metadata block. */ 853 + ASSERT(length <= BTRFS_MAX_BLOCKSIZE); 854 + ASSERT(step <= length); 855 + ASSERT(is_power_of_2(step)); 931 856 932 857 if (btrfs_repair_one_zone(fs_info, logical)) 933 858 return 0; ··· 966 855 goto out_counter_dec; 967 856 } 968 857 969 - bio_init(&bio, smap.dev->bdev, &bvec, 1, REQ_OP_WRITE | REQ_SYNC); 970 - bio.bi_iter.bi_sector = smap.physical >> SECTOR_SHIFT; 971 - __bio_add_page(&bio, phys_to_page(paddr), length, offset_in_page(paddr)); 972 - ret = submit_bio_wait(&bio); 858 + bio = bio_alloc(smap.dev->bdev, nr_steps, REQ_OP_WRITE | REQ_SYNC, GFP_NOFS); 859 + bio->bi_iter.bi_sector = smap.physical >> SECTOR_SHIFT; 860 + for (int i = 0; i < nr_steps; i++) { 861 + ret = bio_add_page(bio, phys_to_page(paddrs[i]), step, offset_in_page(paddrs[i])); 862 + /* We should have allocated enough slots to contain all the different pages. */ 863 + ASSERT(ret == step); 864 + } 865 + ret = submit_bio_wait(bio); 866 + bio_put(bio); 973 867 if (ret) { 974 868 /* try to remap that extent elsewhere? */ 975 869 btrfs_dev_stat_inc_and_print(smap.dev, BTRFS_DEV_STAT_WRITE_ERRS); 976 - goto out_bio_uninit; 870 + goto out_counter_dec; 977 871 } 978 872 979 873 btrfs_info_rl(fs_info, 980 874 "read error corrected: ino %llu off %llu (dev %s sector %llu)", 981 - ino, start, btrfs_dev_name(smap.dev), 875 + ino, fileoff, btrfs_dev_name(smap.dev), 982 876 smap.physical >> SECTOR_SHIFT); 983 877 ret = 0; 984 878 985 - out_bio_uninit: 986 - bio_uninit(&bio); 987 879 out_counter_dec: 988 880 btrfs_bio_counter_dec(fs_info); 989 881 return ret; ··· 999 885 */ 1000 886 void btrfs_submit_repair_write(struct btrfs_bio *bbio, int mirror_num, bool dev_replace) 1001 887 { 1002 - struct btrfs_fs_info *fs_info = bbio->fs_info; 888 + struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info; 1003 889 u64 logical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT; 1004 890 u64 length = bbio->bio.bi_iter.bi_size; 1005 891 struct btrfs_io_stripe smap = { 0 }; 1006 892 int ret; 1007 893 1008 - ASSERT(fs_info); 1009 894 ASSERT(mirror_num > 0); 1010 895 ASSERT(btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE); 1011 - ASSERT(!bbio->inode); 896 + ASSERT(!is_data_inode(bbio->inode)); 897 + ASSERT(bbio->is_scrub); 1012 898 1013 899 btrfs_bio_counter_inc_blocked(fs_info); 1014 900 ret = btrfs_map_repair_block(fs_info, &smap, logical, length, mirror_num);

+24 -15

fs/btrfs/bio.h

··· 18 18 19 19 #define BTRFS_BIO_INLINE_CSUM_SIZE 64 20 20 21 - /* 22 - * Maximum number of sectors for a single bio to limit the size of the 23 - * checksum array. This matches the number of bio_vecs per bio and thus the 24 - * I/O size for buffered I/O. 25 - */ 26 - #define BTRFS_MAX_BIO_SECTORS (256) 27 - 28 21 typedef void (*btrfs_bio_end_io_t)(struct btrfs_bio *bbio); 29 22 30 23 /* ··· 27 34 struct btrfs_bio { 28 35 /* 29 36 * Inode and offset into it that this I/O operates on. 30 - * Only set for data I/O. 37 + * 38 + * If the inode is a data one, csum verification and read-repair 39 + * will be done automatically. 40 + * If the inode is a metadata one, everything is handled by the caller. 31 41 */ 32 42 struct btrfs_inode *inode; 33 43 u64 file_offset; ··· 52 56 * - pointer to the checksums for this bio 53 57 * - original physical address from the allocator 54 58 * (for zone append only) 59 + * - original logical address, used for checksumming fscrypt bios 55 60 */ 56 61 struct { 57 62 struct btrfs_ordered_extent *ordered; 58 63 struct btrfs_ordered_sum *sums; 64 + struct work_struct csum_work; 65 + struct completion csum_done; 66 + struct bvec_iter csum_saved_iter; 59 67 u64 orig_physical; 68 + u64 orig_logical; 60 69 }; 61 70 62 71 /* For metadata reads: parentness verification. */ ··· 77 76 atomic_t pending_ios; 78 77 struct work_struct end_io_work; 79 78 80 - /* File system that this I/O operates on. */ 81 - struct btrfs_fs_info *fs_info; 82 - 83 79 /* Save the first error status of split bio. */ 84 80 blk_status_t status; 85 81 86 82 /* Use the commit root to look up csums (data read bio only). */ 87 83 bool csum_search_commit_root; 84 + 85 + /* 86 + * Since scrub will reuse btree inode, we need this flag to distinguish 87 + * scrub bios. 88 + */ 89 + bool is_scrub; 90 + 91 + /* Whether the csum generation for data write is async. */ 92 + bool async_csum; 93 + 88 94 /* 89 95 * This member must come last, bio_alloc_bioset will allocate enough 90 96 * bytes for entire btrfs_bio but relies on bio being last. ··· 107 99 int __init btrfs_bioset_init(void); 108 100 void __cold btrfs_bioset_exit(void); 109 101 110 - void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_fs_info *fs_info, 102 + void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_inode *inode, u64 file_offset, 111 103 btrfs_bio_end_io_t end_io, void *private); 112 104 struct btrfs_bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf, 113 - struct btrfs_fs_info *fs_info, 105 + struct btrfs_inode *inode, u64 file_offset, 114 106 btrfs_bio_end_io_t end_io, void *private); 115 107 void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status); 116 108 ··· 119 111 120 112 void btrfs_submit_bbio(struct btrfs_bio *bbio, int mirror_num); 121 113 void btrfs_submit_repair_write(struct btrfs_bio *bbio, int mirror_num, bool dev_replace); 122 - int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, 123 - u64 length, u64 logical, phys_addr_t paddr, int mirror_num); 114 + int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 fileoff, 115 + u32 length, u64 logical, const phys_addr_t paddrs[], 116 + unsigned int step, int mirror_num); 124 117 125 118 #endif

+46 -37

fs/btrfs/block-group.c

··· 613 613 extent_root = btrfs_extent_root(fs_info, max_t(u64, block_group->start, 614 614 BTRFS_SUPER_INFO_OFFSET)); 615 615 616 - path->skip_locking = 1; 617 - path->search_commit_root = 1; 616 + path->skip_locking = true; 617 + path->search_commit_root = true; 618 618 path->reada = READA_FORWARD; 619 619 620 620 search_offset = index * div_u64(block_group->length, max_index); ··· 744 744 * root to add free space. So we skip locking and search the commit 745 745 * root, since its read-only 746 746 */ 747 - path->skip_locking = 1; 748 - path->search_commit_root = 1; 747 + path->skip_locking = true; 748 + path->search_commit_root = true; 749 749 path->reada = READA_FORWARD; 750 750 751 751 key.objectid = last; ··· 1065 1065 struct btrfs_chunk_map *map) 1066 1066 { 1067 1067 struct btrfs_fs_info *fs_info = trans->fs_info; 1068 - struct btrfs_path *path; 1068 + BTRFS_PATH_AUTO_FREE(path); 1069 1069 struct btrfs_block_group *block_group; 1070 1070 struct btrfs_free_cluster *cluster; 1071 1071 struct inode *inode; ··· 1305 1305 btrfs_put_block_group(block_group); 1306 1306 if (remove_rsv) 1307 1307 btrfs_dec_delayed_refs_rsv_bg_updates(fs_info); 1308 - btrfs_free_path(path); 1309 1308 return ret; 1310 1309 } 1311 1310 ··· 1402 1403 * BTRFS_RESERVE_NO_FLUSH to give ourselves the most amount of 1403 1404 * leeway to allow us to mark this block group as read only. 1404 1405 */ 1405 - if (btrfs_can_overcommit(cache->fs_info, sinfo, num_bytes, 1406 - BTRFS_RESERVE_NO_FLUSH)) 1406 + if (btrfs_can_overcommit(sinfo, num_bytes, BTRFS_RESERVE_NO_FLUSH)) 1407 1407 ret = 0; 1408 1408 } 1409 1409 ··· 1423 1425 if (ret == -ENOSPC && btrfs_test_opt(cache->fs_info, ENOSPC_DEBUG)) { 1424 1426 btrfs_info(cache->fs_info, 1425 1427 "unable to make block group %llu ro", cache->start); 1426 - btrfs_dump_space_info(cache->fs_info, cache->space_info, 0, false); 1428 + btrfs_dump_space_info(cache->space_info, 0, false); 1427 1429 } 1428 1430 return ret; 1429 1431 } ··· 3066 3068 * We have allocated a new chunk. We also need to activate that chunk to 3067 3069 * grant metadata tickets for zoned filesystem. 3068 3070 */ 3069 - ret = btrfs_zoned_activate_one_bg(fs_info, space_info, true); 3071 + ret = btrfs_zoned_activate_one_bg(space_info, true); 3070 3072 if (ret < 0) 3071 3073 goto out; 3072 3074 ··· 3797 3799 * reservation and return -EAGAIN, otherwise this function always succeeds. 3798 3800 */ 3799 3801 int btrfs_add_reserved_bytes(struct btrfs_block_group *cache, 3800 - u64 ram_bytes, u64 num_bytes, int delalloc, 3802 + u64 ram_bytes, u64 num_bytes, bool delalloc, 3801 3803 bool force_wrong_size_class) 3802 3804 { 3803 3805 struct btrfs_space_info *space_info = cache->space_info; ··· 3808 3810 spin_lock(&cache->lock); 3809 3811 if (cache->ro) { 3810 3812 ret = -EAGAIN; 3811 - goto out; 3813 + goto out_error; 3812 3814 } 3813 3815 3814 3816 if (btrfs_block_group_should_use_size_class(cache)) { 3815 3817 size_class = btrfs_calc_block_group_size_class(num_bytes); 3816 3818 ret = btrfs_use_block_group_size_class(cache, size_class, force_wrong_size_class); 3817 3819 if (ret) 3818 - goto out; 3820 + goto out_error; 3819 3821 } 3822 + 3820 3823 cache->reserved += num_bytes; 3821 - space_info->bytes_reserved += num_bytes; 3822 - trace_btrfs_space_reservation(cache->fs_info, "space_info", 3823 - space_info->flags, num_bytes, 1); 3824 - btrfs_space_info_update_bytes_may_use(space_info, -ram_bytes); 3825 3824 if (delalloc) 3826 3825 cache->delalloc_bytes += num_bytes; 3826 + 3827 + trace_btrfs_space_reservation(cache->fs_info, "space_info", 3828 + space_info->flags, num_bytes, 1); 3829 + spin_unlock(&cache->lock); 3830 + 3831 + space_info->bytes_reserved += num_bytes; 3832 + btrfs_space_info_update_bytes_may_use(space_info, -ram_bytes); 3827 3833 3828 3834 /* 3829 3835 * Compression can use less space than we reserved, so wake tickets if 3830 3836 * that happens. 3831 3837 */ 3832 3838 if (num_bytes < ram_bytes) 3833 - btrfs_try_granting_tickets(cache->fs_info, space_info); 3834 - out: 3839 + btrfs_try_granting_tickets(space_info); 3840 + spin_unlock(&space_info->lock); 3841 + 3842 + return 0; 3843 + 3844 + out_error: 3835 3845 spin_unlock(&cache->lock); 3836 3846 spin_unlock(&space_info->lock); 3837 3847 return ret; ··· 3861 3855 bool is_delalloc) 3862 3856 { 3863 3857 struct btrfs_space_info *space_info = cache->space_info; 3858 + bool bg_ro; 3864 3859 3865 3860 spin_lock(&space_info->lock); 3866 3861 spin_lock(&cache->lock); 3867 - if (cache->ro) 3868 - space_info->bytes_readonly += num_bytes; 3869 - else if (btrfs_is_zoned(cache->fs_info)) 3870 - space_info->bytes_zone_unusable += num_bytes; 3862 + bg_ro = cache->ro; 3871 3863 cache->reserved -= num_bytes; 3872 - space_info->bytes_reserved -= num_bytes; 3873 - space_info->max_extent_size = 0; 3874 - 3875 3864 if (is_delalloc) 3876 3865 cache->delalloc_bytes -= num_bytes; 3877 3866 spin_unlock(&cache->lock); 3878 3867 3879 - btrfs_try_granting_tickets(cache->fs_info, space_info); 3868 + if (bg_ro) 3869 + space_info->bytes_readonly += num_bytes; 3870 + else if (btrfs_is_zoned(cache->fs_info)) 3871 + space_info->bytes_zone_unusable += num_bytes; 3872 + 3873 + space_info->bytes_reserved -= num_bytes; 3874 + space_info->max_extent_size = 0; 3875 + 3876 + btrfs_try_granting_tickets(space_info); 3880 3877 spin_unlock(&space_info->lock); 3881 3878 } 3882 3879 ··· 4197 4188 should_alloc = should_alloc_chunk(fs_info, space_info, force); 4198 4189 if (space_info->full) { 4199 4190 /* No more free physical space */ 4191 + spin_unlock(&space_info->lock); 4200 4192 if (should_alloc) 4201 4193 ret = -ENOSPC; 4202 4194 else 4203 4195 ret = 0; 4204 - spin_unlock(&space_info->lock); 4205 4196 return ret; 4206 4197 } else if (!should_alloc) { 4207 4198 spin_unlock(&space_info->lock); ··· 4213 4204 * recheck if we should continue with our allocation 4214 4205 * attempt. 4215 4206 */ 4207 + spin_unlock(&space_info->lock); 4216 4208 wait_for_alloc = true; 4217 4209 force = CHUNK_ALLOC_NO_FORCE; 4218 - spin_unlock(&space_info->lock); 4219 4210 mutex_lock(&fs_info->chunk_mutex); 4220 4211 mutex_unlock(&fs_info->chunk_mutex); 4221 4212 } else { 4222 4213 /* Proceed with allocation */ 4223 - space_info->chunk_alloc = 1; 4224 - wait_for_alloc = false; 4214 + space_info->chunk_alloc = true; 4225 4215 spin_unlock(&space_info->lock); 4216 + wait_for_alloc = false; 4226 4217 } 4227 4218 4228 4219 cond_resched(); ··· 4269 4260 spin_lock(&space_info->lock); 4270 4261 if (ret < 0) { 4271 4262 if (ret == -ENOSPC) 4272 - space_info->full = 1; 4263 + space_info->full = true; 4273 4264 else 4274 4265 goto out; 4275 4266 } else { ··· 4279 4270 4280 4271 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; 4281 4272 out: 4282 - space_info->chunk_alloc = 0; 4273 + space_info->chunk_alloc = false; 4283 4274 spin_unlock(&space_info->lock); 4284 4275 mutex_unlock(&fs_info->chunk_mutex); 4285 4276 ··· 4320 4311 if (left < bytes && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) { 4321 4312 btrfs_info(fs_info, "left=%llu, need=%llu, flags=%llu", 4322 4313 left, bytes, type); 4323 - btrfs_dump_space_info(fs_info, info, 0, false); 4314 + btrfs_dump_space_info(info, 0, false); 4324 4315 } 4325 4316 4326 4317 if (left < bytes) { ··· 4345 4336 * We have a new chunk. We also need to activate it for 4346 4337 * zoned filesystem. 4347 4338 */ 4348 - ret = btrfs_zoned_activate_one_bg(fs_info, info, true); 4339 + ret = btrfs_zoned_activate_one_bg(info, true); 4349 4340 if (ret < 0) 4350 4341 return; 4351 4342 ··· 4465 4456 * indicates a real bug if this happens. 4466 4457 */ 4467 4458 if (WARN_ON(space_info->bytes_pinned > 0 || space_info->bytes_may_use > 0)) 4468 - btrfs_dump_space_info(info, space_info, 0, false); 4459 + btrfs_dump_space_info(space_info, 0, false); 4469 4460 4470 4461 /* 4471 4462 * If there was a failure to cleanup a log tree, very likely due to an ··· 4476 4467 if (!(space_info->flags & BTRFS_BLOCK_GROUP_METADATA) || 4477 4468 !BTRFS_FS_LOG_CLEANUP_ERROR(info)) { 4478 4469 if (WARN_ON(space_info->bytes_reserved > 0)) 4479 - btrfs_dump_space_info(info, space_info, 0, false); 4470 + btrfs_dump_space_info(space_info, 0, false); 4480 4471 } 4481 4472 4482 4473 WARN_ON(space_info->reclaim_size > 0);

+1 -1

fs/btrfs/block-group.h

··· 345 345 int btrfs_update_block_group(struct btrfs_trans_handle *trans, 346 346 u64 bytenr, u64 num_bytes, bool alloc); 347 347 int btrfs_add_reserved_bytes(struct btrfs_block_group *cache, 348 - u64 ram_bytes, u64 num_bytes, int delalloc, 348 + u64 ram_bytes, u64 num_bytes, bool delalloc, 349 349 bool force_wrong_size_class); 350 350 void btrfs_free_reserved_bytes(struct btrfs_block_group *cache, u64 num_bytes, 351 351 bool is_delalloc);

+6 -8

fs/btrfs/block-rsv.c

··· 218 218 if (num_bytes == 0) 219 219 return 0; 220 220 221 - ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv->space_info, 222 - num_bytes, flush); 221 + ret = btrfs_reserve_metadata_bytes(block_rsv->space_info, num_bytes, flush); 223 222 if (!ret) 224 223 btrfs_block_rsv_add_bytes(block_rsv, num_bytes, true); 225 224 ··· 258 259 if (!ret) 259 260 return 0; 260 261 261 - ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv->space_info, 262 - num_bytes, flush); 262 + ret = btrfs_reserve_metadata_bytes(block_rsv->space_info, num_bytes, flush); 263 263 if (!ret) { 264 264 btrfs_block_rsv_add_bytes(block_rsv, num_bytes, false); 265 265 return 0; ··· 385 387 num_bytes = block_rsv->reserved - block_rsv->size; 386 388 btrfs_space_info_update_bytes_may_use(sinfo, -num_bytes); 387 389 block_rsv->reserved = block_rsv->size; 388 - btrfs_try_granting_tickets(fs_info, sinfo); 390 + btrfs_try_granting_tickets(sinfo); 389 391 } 390 392 391 393 block_rsv->full = (block_rsv->reserved == block_rsv->size); ··· 528 530 block_rsv->type, ret); 529 531 } 530 532 try_reserve: 531 - ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv->space_info, 532 - blocksize, BTRFS_RESERVE_NO_FLUSH); 533 + ret = btrfs_reserve_metadata_bytes(block_rsv->space_info, blocksize, 534 + BTRFS_RESERVE_NO_FLUSH); 533 535 if (!ret) 534 536 return block_rsv; 535 537 /* ··· 550 552 * one last time to force a reservation if there's enough actual space 551 553 * on disk to make the reservation. 552 554 */ 553 - ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv->space_info, blocksize, 555 + ret = btrfs_reserve_metadata_bytes(block_rsv->space_info, blocksize, 554 556 BTRFS_RESERVE_FLUSH_EMERGENCY); 555 557 if (!ret) 556 558 return block_rsv;

+9 -11

fs/btrfs/btrfs_inode.h

··· 18 18 #include <linux/lockdep.h> 19 19 #include <uapi/linux/btrfs_tree.h> 20 20 #include <trace/events/btrfs.h> 21 + #include "ctree.h" 21 22 #include "block-rsv.h" 22 23 #include "extent_map.h" 23 - #include "extent_io.h" 24 24 #include "extent-io-tree.h" 25 - #include "ordered-data.h" 26 - #include "delayed-inode.h" 27 25 28 - struct extent_state; 29 26 struct posix_acl; 30 27 struct iov_iter; 31 28 struct writeback_control; 32 29 struct btrfs_root; 33 30 struct btrfs_fs_info; 34 31 struct btrfs_trans_handle; 32 + struct btrfs_bio; 33 + struct btrfs_file_extent; 34 + struct btrfs_delayed_node; 35 35 36 36 /* 37 37 * Since we search a directory based on f_pos (struct dir_context::pos) we have ··· 543 543 #endif 544 544 } 545 545 546 - /* Array of bytes with variable length, hexadecimal format 0x1234 */ 547 - #define CSUM_FMT "0x%*phN" 548 - #define CSUM_FMT_VALUE(size, bytes) size, bytes 549 - 550 - void btrfs_calculate_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr, 551 - u8 *dest); 546 + void btrfs_calculate_block_csum_folio(struct btrfs_fs_info *fs_info, 547 + const phys_addr_t paddr, u8 *dest); 548 + void btrfs_calculate_block_csum_pages(struct btrfs_fs_info *fs_info, 549 + const phys_addr_t paddrs[], u8 *dest); 552 550 int btrfs_check_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr, u8 *csum, 553 551 const u8 * const csum_expected); 554 552 bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev, 555 - u32 bio_offset, phys_addr_t paddr); 553 + u32 bio_offset, const phys_addr_t paddrs[]); 556 554 noinline int can_nocow_extent(struct btrfs_inode *inode, u64 offset, u64 *len, 557 555 struct btrfs_file_extent *file_extent, 558 556 bool nowait);

+16 -31

fs/btrfs/compression.c

··· 67 67 68 68 bbio = btrfs_bio(bio_alloc_bioset(NULL, BTRFS_MAX_COMPRESSED_PAGES, op, 69 69 GFP_NOFS, &btrfs_compressed_bioset)); 70 - btrfs_bio_init(bbio, inode->root->fs_info, end_io, NULL); 71 - bbio->inode = inode; 72 - bbio->file_offset = start; 70 + btrfs_bio_init(bbio, inode, start, end_io, NULL); 73 71 return to_compressed_bio(bbio); 74 72 } 75 73 ··· 192 194 193 195 static unsigned long btrfs_compr_pool_scan(struct shrinker *sh, struct shrink_control *sc) 194 196 { 195 - struct list_head remove; 197 + LIST_HEAD(remove); 196 198 struct list_head *tmp, *next; 197 199 int freed; 198 200 199 201 if (compr_pool.count == 0) 200 202 return SHRINK_STOP; 201 - 202 - INIT_LIST_HEAD(&remove); 203 203 204 204 /* For now, just simply drain the whole list. */ 205 205 spin_lock(&compr_pool.lock); ··· 317 321 /* the inode may be gone now */ 318 322 } 319 323 320 - static void btrfs_finish_compressed_write_work(struct work_struct *work) 321 - { 322 - struct compressed_bio *cb = 323 - container_of(work, struct compressed_bio, write_end_work); 324 - 325 - btrfs_finish_ordered_extent(cb->bbio.ordered, NULL, cb->start, cb->len, 326 - cb->bbio.bio.bi_status == BLK_STS_OK); 327 - 328 - if (cb->writeback) 329 - end_compressed_writeback(cb); 330 - /* Note, our inode could be gone now */ 331 - 332 - btrfs_free_compressed_folios(cb); 333 - bio_put(&cb->bbio.bio); 334 - } 335 - 336 324 /* 337 325 * Do the cleanup once all the compressed pages hit the disk. This will clear 338 326 * writeback on the file pages and free the compressed pages. ··· 327 347 static void end_bbio_compressed_write(struct btrfs_bio *bbio) 328 348 { 329 349 struct compressed_bio *cb = to_compressed_bio(bbio); 330 - struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info; 331 350 332 - queue_work(fs_info->compressed_write_workers, &cb->write_end_work); 351 + btrfs_finish_ordered_extent(cb->bbio.ordered, NULL, cb->start, cb->len, 352 + cb->bbio.bio.bi_status == BLK_STS_OK); 353 + 354 + if (cb->writeback) 355 + end_compressed_writeback(cb); 356 + /* Note, our inode could be gone now. */ 357 + btrfs_free_compressed_folios(cb); 358 + bio_put(&cb->bbio.bio); 333 359 } 334 360 335 361 static void btrfs_add_compressed_bio_folios(struct compressed_bio *cb) 336 362 { 337 - struct btrfs_fs_info *fs_info = cb->bbio.fs_info; 338 363 struct bio *bio = &cb->bbio.bio; 339 364 u32 offset = 0; 365 + unsigned int findex = 0; 340 366 341 367 while (offset < cb->compressed_len) { 342 - struct folio *folio; 368 + struct folio *folio = cb->compressed_folios[findex]; 369 + u32 len = min_t(u32, cb->compressed_len - offset, folio_size(folio)); 343 370 int ret; 344 - u32 len = min_t(u32, cb->compressed_len - offset, 345 - btrfs_min_folio_size(fs_info)); 346 371 347 - folio = cb->compressed_folios[offset >> (PAGE_SHIFT + fs_info->block_min_order)]; 348 372 /* Maximum compressed extent is smaller than bio size limit. */ 349 373 ret = bio_add_folio(bio, folio, len, 0); 350 374 ASSERT(ret); 351 375 offset += len; 376 + findex++; 352 377 } 353 378 } 354 379 ··· 387 402 cb->compressed_folios = compressed_folios; 388 403 cb->compressed_len = ordered->disk_num_bytes; 389 404 cb->writeback = writeback; 390 - INIT_WORK(&cb->write_end_work, btrfs_finish_compressed_write_work); 391 405 cb->nr_folios = nr_folios; 392 406 cb->bbio.bio.bi_iter.bi_sector = ordered->disk_bytenr >> SECTOR_SHIFT; 393 407 cb->bbio.ordered = ordered; ··· 1084 1100 /* 1085 1101 * a less complex decompression routine. Our compressed data fits in a 1086 1102 * single page, and we want to read a single page out of it. 1087 - * start_byte tells us the offset into the compressed data we're interested in 1103 + * dest_pgoff tells us the offset into the destination folio where we write the 1104 + * decompressed data. 1088 1105 */ 1089 1106 int btrfs_decompress(int type, const u8 *data_in, struct folio *dest_folio, 1090 1107 unsigned long dest_pgoff, size_t srclen, size_t destlen)

+5 -10

fs/btrfs/compression.h

··· 14 14 #include <linux/pagemap.h> 15 15 #include "bio.h" 16 16 #include "fs.h" 17 - #include "messages.h" 17 + #include "btrfs_inode.h" 18 18 19 19 struct address_space; 20 - struct page; 21 20 struct inode; 22 21 struct btrfs_inode; 23 22 struct btrfs_ordered_extent; 24 - struct btrfs_bio; 25 23 26 24 /* 27 25 * We want to make sure that amount of RAM required to uncompress an extent is ··· 63 65 /* Whether this is a write for writeback. */ 64 66 bool writeback; 65 67 66 - union { 67 - /* For reads, this is the bio we are copying the data into */ 68 - struct btrfs_bio *orig_bbio; 69 - struct work_struct write_end_work; 70 - }; 68 + /* For reads, this is the bio we are copying the data into. */ 69 + struct btrfs_bio *orig_bbio; 71 70 72 71 /* Must be last. */ 73 72 struct btrfs_bio bbio; ··· 72 77 73 78 static inline struct btrfs_fs_info *cb_to_fs_info(const struct compressed_bio *cb) 74 79 { 75 - return cb->bbio.fs_info; 80 + return cb->bbio.inode->root->fs_info; 76 81 } 77 82 78 83 /* @range_end must be exclusive. */ ··· 95 100 u64 start, struct folio **folios, unsigned long *out_folios, 96 101 unsigned long *total_in, unsigned long *total_out); 97 102 int btrfs_decompress(int type, const u8 *data_in, struct folio *dest_folio, 98 - unsigned long start_byte, size_t srclen, size_t destlen); 103 + unsigned long dest_pgoff, size_t srclen, size_t destlen); 99 104 int btrfs_decompress_buf2page(const char *buf, u32 buf_len, 100 105 struct compressed_bio *cb, u32 decompressed); 101 106

+124 -116

fs/btrfs/ctree.c

··· 862 862 } 863 863 864 864 /* 865 + * Promote a child node to become the new tree root. 866 + * 867 + * @trans: Transaction handle 868 + * @root: Tree root structure to update 869 + * @path: Path holding nodes and locks 870 + * @level: Level of the parent (old root) 871 + * @parent: The parent (old root) with exactly one item 872 + * 873 + * This helper is called during rebalancing when the root node contains only 874 + * a single item (nritems == 1). We can reduce the tree height by promoting 875 + * that child to become the new root and freeing the old root node. The path 876 + * locks and references are updated accordingly. 877 + * 878 + * Return: 0 on success, negative errno on failure. The transaction is aborted 879 + * on critical errors. 880 + */ 881 + static int promote_child_to_root(struct btrfs_trans_handle *trans, 882 + struct btrfs_root *root, struct btrfs_path *path, 883 + int level, struct extent_buffer *parent) 884 + { 885 + struct extent_buffer *child; 886 + int ret; 887 + 888 + ASSERT(btrfs_header_nritems(parent) == 1); 889 + 890 + child = btrfs_read_node_slot(parent, 0); 891 + if (IS_ERR(child)) 892 + return PTR_ERR(child); 893 + 894 + btrfs_tree_lock(child); 895 + ret = btrfs_cow_block(trans, root, child, parent, 0, &child, BTRFS_NESTING_COW); 896 + if (ret) { 897 + btrfs_tree_unlock(child); 898 + free_extent_buffer(child); 899 + return ret; 900 + } 901 + 902 + ret = btrfs_tree_mod_log_insert_root(root->node, child, true); 903 + if (unlikely(ret < 0)) { 904 + btrfs_tree_unlock(child); 905 + free_extent_buffer(child); 906 + btrfs_abort_transaction(trans, ret); 907 + return ret; 908 + } 909 + rcu_assign_pointer(root->node, child); 910 + 911 + add_root_to_dirty_list(root); 912 + btrfs_tree_unlock(child); 913 + 914 + path->locks[level] = 0; 915 + path->nodes[level] = NULL; 916 + btrfs_clear_buffer_dirty(trans, parent); 917 + btrfs_tree_unlock(parent); 918 + /* Once for the path. */ 919 + free_extent_buffer(parent); 920 + 921 + root_sub_used_bytes(root); 922 + ret = btrfs_free_tree_block(trans, btrfs_root_id(root), parent, 0, 1); 923 + /* Once for the root ptr. */ 924 + free_extent_buffer_stale(parent); 925 + if (unlikely(ret < 0)) { 926 + btrfs_abort_transaction(trans, ret); 927 + return ret; 928 + } 929 + 930 + return 0; 931 + } 932 + 933 + /* 865 934 * node level balancing, used to make sure nodes are in proper order for 866 935 * item deletion. We balance from the top down, so we have to make sure 867 936 * that a deletion won't leave an node completely empty later on. ··· 969 900 * by promoting the node below to a root 970 901 */ 971 902 if (!parent) { 972 - struct extent_buffer *child; 973 - 974 903 if (btrfs_header_nritems(mid) != 1) 975 904 return 0; 976 905 977 - /* promote the child to a root */ 978 - child = btrfs_read_node_slot(mid, 0); 979 - if (IS_ERR(child)) { 980 - ret = PTR_ERR(child); 981 - goto out; 982 - } 983 - 984 - btrfs_tree_lock(child); 985 - ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 986 - BTRFS_NESTING_COW); 987 - if (ret) { 988 - btrfs_tree_unlock(child); 989 - free_extent_buffer(child); 990 - goto out; 991 - } 992 - 993 - ret = btrfs_tree_mod_log_insert_root(root->node, child, true); 994 - if (unlikely(ret < 0)) { 995 - btrfs_tree_unlock(child); 996 - free_extent_buffer(child); 997 - btrfs_abort_transaction(trans, ret); 998 - goto out; 999 - } 1000 - rcu_assign_pointer(root->node, child); 1001 - 1002 - add_root_to_dirty_list(root); 1003 - btrfs_tree_unlock(child); 1004 - 1005 - path->locks[level] = 0; 1006 - path->nodes[level] = NULL; 1007 - btrfs_clear_buffer_dirty(trans, mid); 1008 - btrfs_tree_unlock(mid); 1009 - /* once for the path */ 1010 - free_extent_buffer(mid); 1011 - 1012 - root_sub_used_bytes(root); 1013 - ret = btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1); 1014 - /* once for the root ptr */ 1015 - free_extent_buffer_stale(mid); 1016 - if (unlikely(ret < 0)) { 1017 - btrfs_abort_transaction(trans, ret); 1018 - goto out; 1019 - } 1020 - return 0; 906 + return promote_child_to_root(trans, root, path, level, mid); 1021 907 } 1022 908 if (btrfs_header_nritems(mid) > 1023 909 BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 4) ··· 1125 1101 /* update the path */ 1126 1102 if (left) { 1127 1103 if (btrfs_header_nritems(left) > orig_slot) { 1128 - refcount_inc(&left->refs); 1129 1104 /* left was locked after cow */ 1130 1105 path->nodes[level] = left; 1131 1106 path->slots[level + 1] -= 1; 1132 1107 path->slots[level] = orig_slot; 1108 + /* Left is now owned by path. */ 1109 + left = NULL; 1133 1110 if (mid) { 1134 1111 btrfs_tree_unlock(mid); 1135 1112 free_extent_buffer(mid); ··· 1150 1125 free_extent_buffer(right); 1151 1126 } 1152 1127 if (left) { 1153 - if (path->nodes[level] != left) 1154 - btrfs_tree_unlock(left); 1128 + btrfs_tree_unlock(left); 1155 1129 free_extent_buffer(left); 1156 1130 } 1157 1131 return ret; ··· 1459 1435 } 1460 1436 1461 1437 if (i >= lowest_unlock && i > skip_level) { 1462 - check_skip = false; 1463 1438 btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]); 1439 + check_skip = false; 1464 1440 path->locks[i] = 0; 1465 1441 if (write_lock_level && 1466 1442 i > min_write_lock_level && ··· 1733 1709 level = btrfs_header_level(b); 1734 1710 /* 1735 1711 * Ensure that all callers have set skip_locking when 1736 - * p->search_commit_root = 1. 1712 + * p->search_commit_root is true. 1737 1713 */ 1738 - ASSERT(p->skip_locking == 1); 1714 + ASSERT(p->skip_locking); 1739 1715 1740 1716 goto out; 1741 1717 } ··· 2623 2599 if (unlikely(btrfs_comp_keys(&disk_key, new_key) >= 0)) { 2624 2600 btrfs_print_leaf(eb); 2625 2601 btrfs_crit(fs_info, 2626 - "slot %u key (%llu %u %llu) new key (%llu %u %llu)", 2602 + "slot %u key " BTRFS_KEY_FMT " new key " BTRFS_KEY_FMT, 2627 2603 slot, btrfs_disk_key_objectid(&disk_key), 2628 2604 btrfs_disk_key_type(&disk_key), 2629 2605 btrfs_disk_key_offset(&disk_key), 2630 - new_key->objectid, new_key->type, 2631 - new_key->offset); 2606 + BTRFS_KEY_FMT_VALUE(new_key)); 2632 2607 BUG(); 2633 2608 } 2634 2609 } ··· 2636 2613 if (unlikely(btrfs_comp_keys(&disk_key, new_key) <= 0)) { 2637 2614 btrfs_print_leaf(eb); 2638 2615 btrfs_crit(fs_info, 2639 - "slot %u key (%llu %u %llu) new key (%llu %u %llu)", 2616 + "slot %u key " BTRFS_KEY_FMT " new key " BTRFS_KEY_FMT, 2640 2617 slot, btrfs_disk_key_objectid(&disk_key), 2641 2618 btrfs_disk_key_type(&disk_key), 2642 2619 btrfs_disk_key_offset(&disk_key), 2643 - new_key->objectid, new_key->type, 2644 - new_key->offset); 2620 + BTRFS_KEY_FMT_VALUE(new_key)); 2645 2621 BUG(); 2646 2622 } 2647 2623 } ··· 2699 2677 btrfs_crit(left->fs_info, "right extent buffer:"); 2700 2678 btrfs_print_tree(right, false); 2701 2679 btrfs_crit(left->fs_info, 2702 - "bad key order, sibling blocks, left last (%llu %u %llu) right first (%llu %u %llu)", 2703 - left_last.objectid, left_last.type, 2704 - left_last.offset, right_first.objectid, 2705 - right_first.type, right_first.offset); 2680 + "bad key order, sibling blocks, left last " BTRFS_KEY_FMT " right first " BTRFS_KEY_FMT, 2681 + BTRFS_KEY_FMT_VALUE(&left_last), 2682 + BTRFS_KEY_FMT_VALUE(&right_first)); 2706 2683 return true; 2707 2684 } 2708 2685 return false; ··· 3238 3217 /* then fixup the leaf pointer in the path */ 3239 3218 if (path->slots[0] >= left_nritems) { 3240 3219 path->slots[0] -= left_nritems; 3241 - if (btrfs_header_nritems(path->nodes[0]) == 0) 3242 - btrfs_clear_buffer_dirty(trans, path->nodes[0]); 3243 - btrfs_tree_unlock(path->nodes[0]); 3244 - free_extent_buffer(path->nodes[0]); 3220 + btrfs_tree_unlock(left); 3221 + free_extent_buffer(left); 3245 3222 path->nodes[0] = right; 3246 3223 path->slots[1] += 1; 3247 3224 } else { ··· 3417 3398 btrfs_set_header_nritems(left, old_left_nritems + push_items); 3418 3399 3419 3400 /* fixup right node */ 3420 - if (push_items > right_nritems) 3421 - WARN(1, KERN_CRIT "push items %d nr %u\n", push_items, 3422 - right_nritems); 3401 + if (unlikely(push_items > right_nritems)) { 3402 + ret = -EUCLEAN; 3403 + btrfs_abort_transaction(trans, ret); 3404 + btrfs_crit(fs_info, "push items (%d) > right leaf items (%u)", 3405 + push_items, right_nritems); 3406 + goto out; 3407 + } 3423 3408 3424 3409 if (push_items < right_nritems) { 3425 3410 push_space = btrfs_item_offset(right, push_items - 1) - ··· 3456 3433 /* then fixup the leaf pointer in the path */ 3457 3434 if (path->slots[0] < push_items) { 3458 3435 path->slots[0] += old_left_nritems; 3459 - btrfs_tree_unlock(path->nodes[0]); 3460 - free_extent_buffer(path->nodes[0]); 3436 + btrfs_tree_unlock(right); 3437 + free_extent_buffer(right); 3461 3438 path->nodes[0] = left; 3462 3439 path->slots[1] -= 1; 3463 3440 } else { ··· 3884 3861 } 3885 3862 btrfs_release_path(path); 3886 3863 3887 - path->keep_locks = 1; 3888 - path->search_for_split = 1; 3864 + path->keep_locks = true; 3865 + path->search_for_split = true; 3889 3866 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 3890 - path->search_for_split = 0; 3867 + path->search_for_split = false; 3891 3868 if (ret > 0) 3892 3869 ret = -EAGAIN; 3893 3870 if (ret < 0) ··· 3914 3891 if (ret) 3915 3892 goto err; 3916 3893 3917 - path->keep_locks = 0; 3894 + path->keep_locks = false; 3918 3895 btrfs_unlock_up_safe(path, 1); 3919 3896 return 0; 3920 3897 err: 3921 - path->keep_locks = 0; 3898 + path->keep_locks = false; 3922 3899 return ret; 3923 3900 } 3924 3901 ··· 4132 4109 nritems = btrfs_header_nritems(leaf); 4133 4110 data_end = leaf_data_end(leaf); 4134 4111 4135 - if (btrfs_leaf_free_space(leaf) < data_size) { 4112 + if (unlikely(btrfs_leaf_free_space(leaf) < data_size)) { 4136 4113 btrfs_print_leaf(leaf); 4137 4114 BUG(); 4138 4115 } ··· 4162 4139 memmove_leaf_data(leaf, data_end - data_size, data_end, 4163 4140 old_data - data_end); 4164 4141 4165 - data_end = old_data; 4166 4142 old_size = btrfs_item_size(leaf, slot); 4167 4143 btrfs_set_item_size(leaf, slot, old_size + data_size); 4168 4144 btrfs_mark_buffer_dirty(trans, leaf); ··· 4520 4498 4521 4499 /* delete the leaf if we've emptied it */ 4522 4500 if (nritems == 0) { 4523 - if (leaf == root->node) { 4524 - btrfs_set_header_level(leaf, 0); 4525 - } else { 4501 + if (leaf != root->node) { 4526 4502 btrfs_clear_buffer_dirty(trans, leaf); 4527 4503 ret = btrfs_del_leaf(trans, root, path, leaf); 4528 4504 if (ret < 0) ··· 4586 4566 if (btrfs_header_nritems(leaf) == 0) { 4587 4567 path->slots[1] = slot; 4588 4568 ret = btrfs_del_leaf(trans, root, path, leaf); 4569 + free_extent_buffer(leaf); 4589 4570 if (ret < 0) 4590 4571 return ret; 4591 - free_extent_buffer(leaf); 4592 - ret = 0; 4593 4572 } else { 4594 4573 /* if we're still in the path, make sure 4595 4574 * we're dirty. Otherwise, one of the ··· 4632 4613 u32 nritems; 4633 4614 int level; 4634 4615 int ret = 1; 4635 - int keep_locks = path->keep_locks; 4616 + const bool keep_locks = path->keep_locks; 4636 4617 4637 4618 ASSERT(!path->nowait); 4638 4619 ASSERT(path->lowest_level == 0); 4639 - path->keep_locks = 1; 4620 + path->keep_locks = true; 4640 4621 again: 4641 4622 cur = btrfs_read_lock_root_node(root); 4642 4623 level = btrfs_header_level(cur); ··· 4726 4707 * 0 is returned if another key is found, < 0 if there are any errors 4727 4708 * and 1 is returned if there are no higher keys in the tree 4728 4709 * 4729 - * path->keep_locks should be set to 1 on the search made before 4710 + * path->keep_locks should be set to true on the search made before 4730 4711 * calling this function. 4731 4712 */ 4732 4713 int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, ··· 4825 4806 next = NULL; 4826 4807 btrfs_release_path(path); 4827 4808 4828 - path->keep_locks = 1; 4809 + path->keep_locks = true; 4829 4810 4830 4811 if (time_seq) { 4831 4812 ret = btrfs_search_old_slot(root, &key, path, time_seq); 4832 4813 } else { 4833 4814 if (path->need_commit_sem) { 4834 - path->need_commit_sem = 0; 4815 + path->need_commit_sem = false; 4835 4816 need_commit_sem = true; 4836 4817 if (path->nowait) { 4837 4818 if (!down_read_trylock(&fs_info->commit_root_sem)) { ··· 4844 4825 } 4845 4826 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4846 4827 } 4847 - path->keep_locks = 0; 4828 + path->keep_locks = false; 4848 4829 4849 4830 if (ret < 0) 4850 4831 goto done; 4851 4832 4852 4833 nritems = btrfs_header_nritems(path->nodes[0]); 4853 4834 /* 4854 - * by releasing the path above we dropped all our locks. A balance 4855 - * could have added more items next to the key that used to be 4856 - * at the very end of the block. So, check again here and 4857 - * advance the path if there are now more items available. 4835 + * By releasing the path above we dropped all our locks. A balance 4836 + * could have happened and 4837 + * 4838 + * 1. added more items after the previous last item 4839 + * 2. deleted the previous last item 4840 + * 4841 + * So, check again here and advance the path if there are now more 4842 + * items available. 4858 4843 */ 4859 - if (nritems > 0 && path->slots[0] < nritems - 1) { 4860 - if (ret == 0) 4844 + if (nritems > 0 && path->slots[0] <= nritems - 1) { 4845 + if (ret == 0 && path->slots[0] != nritems - 1) { 4861 4846 path->slots[0]++; 4862 - ret = 0; 4863 - goto done; 4864 - } 4865 - /* 4866 - * So the above check misses one case: 4867 - * - after releasing the path above, someone has removed the item that 4868 - * used to be at the very end of the block, and balance between leafs 4869 - * gets another one with bigger key.offset to replace it. 4870 - * 4871 - * This one should be returned as well, or we can get leaf corruption 4872 - * later(esp. in __btrfs_drop_extents()). 4873 - * 4874 - * And a bit more explanation about this check, 4875 - * with ret > 0, the key isn't found, the path points to the slot 4876 - * where it should be inserted, so the path->slots[0] item must be the 4877 - * bigger one. 4878 - */ 4879 - if (nritems > 0 && ret > 0 && path->slots[0] == nritems - 1) { 4880 - ret = 0; 4881 - goto done; 4847 + goto done; 4848 + } else if (ret > 0) { 4849 + ret = 0; 4850 + goto done; 4851 + } 4882 4852 } 4883 4853 4884 4854 while (level < BTRFS_MAX_LEVEL) { ··· 4972 4964 if (need_commit_sem) { 4973 4965 int ret2; 4974 4966 4975 - path->need_commit_sem = 1; 4967 + path->need_commit_sem = true; 4976 4968 ret2 = finish_need_commit_sem_search(path); 4977 4969 up_read(&fs_info->commit_root_sem); 4978 4970 if (ret2)

+8 -10

fs/btrfs/ctree.h

··· 17 17 #include <linux/refcount.h> 18 18 #include <uapi/linux/btrfs_tree.h> 19 19 #include "locking.h" 20 - #include "fs.h" 21 20 #include "accessors.h" 22 - #include "extent-io-tree.h" 23 21 24 22 struct extent_buffer; 25 23 struct btrfs_block_rsv; ··· 65 67 * set by btrfs_split_item, tells search_slot to keep all locks 66 68 * and to force calls to keep space in the nodes 67 69 */ 68 - unsigned int search_for_split:1; 70 + bool search_for_split:1; 69 71 /* Keep some upper locks as we walk down. */ 70 - unsigned int keep_locks:1; 71 - unsigned int skip_locking:1; 72 - unsigned int search_commit_root:1; 73 - unsigned int need_commit_sem:1; 74 - unsigned int skip_release_on_error:1; 72 + bool keep_locks:1; 73 + bool skip_locking:1; 74 + bool search_commit_root:1; 75 + bool need_commit_sem:1; 76 + bool skip_release_on_error:1; 75 77 /* 76 78 * Indicate that new item (btrfs_search_slot) is extending already 77 79 * existing item and ins_len contains only the data size and not item 78 80 * header (ie. sizeof(struct btrfs_item) is not included). 79 81 */ 80 - unsigned int search_for_extension:1; 82 + bool search_for_extension:1; 81 83 /* Stop search if any locks need to be taken (for read) */ 82 - unsigned int nowait:1; 84 + bool nowait:1; 83 85 }; 84 86 85 87 #define BTRFS_PATH_AUTO_FREE(path_name) \

+3 -2

fs/btrfs/defrag.c

··· 15 15 #include "defrag.h" 16 16 #include "file-item.h" 17 17 #include "super.h" 18 + #include "compression.h" 18 19 19 20 static struct kmem_cache *btrfs_inode_defrag_cachep; 20 21 ··· 471 470 memcpy(&key, &root->defrag_progress, sizeof(key)); 472 471 } 473 472 474 - path->keep_locks = 1; 473 + path->keep_locks = true; 475 474 476 475 ret = btrfs_search_forward(root, &key, path, BTRFS_OLDEST_GENERATION); 477 476 if (ret < 0) ··· 514 513 /* 515 514 * Now that we reallocated the node we can find the next key. Note that 516 515 * btrfs_find_next_key() can release our path and do another search 517 - * without COWing, this is because even with path->keep_locks = 1, 516 + * without COWing, this is because even with path->keep_locks == true, 518 517 * btrfs_search_slot() / ctree.c:unlock_up() does not keeps a lock on a 519 518 * node when path->slots[node_level - 1] does not point to the last 520 519 * item or a slot beyond the last item (ctree.c:unlock_up()). Therefore

+2 -2

fs/btrfs/delalloc-space.c

··· 358 358 noflush); 359 359 if (ret) 360 360 return ret; 361 - ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv->space_info, 362 - meta_reserve, flush); 361 + ret = btrfs_reserve_metadata_bytes(block_rsv->space_info, meta_reserve, 362 + flush); 363 363 if (ret) { 364 364 btrfs_qgroup_free_meta_prealloc(root, qgroup_reserve); 365 365 return ret;

+10 -16

fs/btrfs/delayed-inode.c

··· 668 668 struct btrfs_key first_key; 669 669 const u32 first_data_size = first_item->data_len; 670 670 int total_size; 671 - char *ins_data = NULL; 671 + char AUTO_KFREE(ins_data); 672 672 int ret; 673 673 bool continuous_keys_only = false; 674 674 ··· 740 740 741 741 ins_data = kmalloc_array(batch.nr, 742 742 sizeof(u32) + sizeof(struct btrfs_key), GFP_NOFS); 743 - if (!ins_data) { 744 - ret = -ENOMEM; 745 - goto out; 746 - } 743 + if (!ins_data) 744 + return -ENOMEM; 747 745 ins_sizes = (u32 *)ins_data; 748 746 ins_keys = (struct btrfs_key *)(ins_data + batch.nr * sizeof(u32)); 749 747 batch.keys = ins_keys; ··· 757 759 758 760 ret = btrfs_insert_empty_items(trans, root, path, &batch); 759 761 if (ret) 760 - goto out; 762 + return ret; 761 763 762 764 list_for_each_entry(curr, &item_list, tree_list) { 763 765 char *data_ptr; ··· 812 814 list_del(&curr->tree_list); 813 815 btrfs_release_delayed_item(curr); 814 816 } 815 - out: 816 - kfree(ins_data); 817 - return ret; 817 + 818 + return 0; 818 819 } 819 820 820 821 static int btrfs_insert_delayed_items(struct btrfs_trans_handle *trans, ··· 2008 2011 * It is very rare. 2009 2012 */ 2010 2013 mutex_lock(&delayed_node->mutex); 2011 - if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags)) 2012 - goto release_node; 2013 - 2014 - set_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags); 2015 - delayed_node->count++; 2016 - atomic_inc(&fs_info->delayed_root->items); 2017 - release_node: 2014 + if (!test_and_set_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags)) { 2015 + delayed_node->count++; 2016 + atomic_inc(&fs_info->delayed_root->items); 2017 + } 2018 2018 mutex_unlock(&delayed_node->mutex); 2019 2019 btrfs_release_delayed_node(delayed_node, &delayed_node_tracker); 2020 2020 return 0;

+34 -11

fs/btrfs/delayed-ref.c

··· 228 228 if (!num_bytes) 229 229 return 0; 230 230 231 - ret = btrfs_reserve_metadata_bytes(fs_info, space_info, num_bytes, flush); 231 + ret = btrfs_reserve_metadata_bytes(space_info, num_bytes, flush); 232 232 if (ret) 233 233 return ret; 234 234 ··· 798 798 } 799 799 800 800 /* 801 - * helper function to actually insert a head node into the rbtree. 802 - * this does all the dirty work in terms of maintaining the correct 803 - * overall modification count. 801 + * Helper function to actually insert a head node into the xarray. This does all 802 + * the dirty work in terms of maintaining the correct overall modification 803 + * count. 804 + * 805 + * The caller is responsible for calling kfree() on @qrecord. More specifically, 806 + * if this function reports that it did not insert it as noted in 807 + * @qrecord_inserted_ret, then it's safe to call kfree() on it. 804 808 * 805 809 * Returns an error pointer in case of an error. 806 810 */ ··· 818 814 struct btrfs_delayed_ref_head *existing; 819 815 struct btrfs_delayed_ref_root *delayed_refs; 820 816 const unsigned long index = (head_ref->bytenr >> fs_info->sectorsize_bits); 821 - bool qrecord_inserted = false; 817 + 818 + /* 819 + * If 'qrecord_inserted_ret' is provided, then the first thing we need 820 + * to do is to initialize it to false just in case we have an exit 821 + * before trying to insert the record. 822 + */ 823 + if (qrecord_inserted_ret) 824 + *qrecord_inserted_ret = false; 822 825 823 826 delayed_refs = &trans->transaction->delayed_refs; 824 827 lockdep_assert_held(&delayed_refs->lock); ··· 844 833 845 834 /* Record qgroup extent info if provided */ 846 835 if (qrecord) { 836 + /* 837 + * Setting 'qrecord' but not 'qrecord_inserted_ret' will likely 838 + * result in a memory leakage. 839 + */ 840 + ASSERT(qrecord_inserted_ret != NULL); 841 + 847 842 int ret; 848 843 849 844 ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, qrecord, ··· 857 840 if (ret) { 858 841 /* Clean up if insertion fails or item exists. */ 859 842 xa_release(&delayed_refs->dirty_extents, index); 860 - /* Caller responsible for freeing qrecord on error. */ 861 843 if (ret < 0) 862 844 return ERR_PTR(ret); 863 - kfree(qrecord); 864 - } else { 865 - qrecord_inserted = true; 845 + } else if (qrecord_inserted_ret) { 846 + *qrecord_inserted_ret = true; 866 847 } 867 848 } 868 849 ··· 903 888 delayed_refs->num_heads++; 904 889 delayed_refs->num_heads_ready++; 905 890 } 906 - if (qrecord_inserted_ret) 907 - *qrecord_inserted_ret = qrecord_inserted; 908 891 909 892 return head_ref; 910 893 } ··· 1062 1049 xa_release(&delayed_refs->head_refs, index); 1063 1050 spin_unlock(&delayed_refs->lock); 1064 1051 ret = PTR_ERR(new_head_ref); 1052 + 1053 + /* 1054 + * It's only safe to call kfree() on 'qrecord' if 1055 + * add_delayed_ref_head() has _not_ inserted it for 1056 + * tracing. Otherwise we need to handle this here. 1057 + */ 1058 + if (!qrecord_reserved || qrecord_inserted) 1059 + goto free_head_ref; 1065 1060 goto free_record; 1066 1061 } 1067 1062 head_ref = new_head_ref; ··· 1092 1071 1093 1072 if (qrecord_inserted) 1094 1073 return btrfs_qgroup_trace_extent_post(trans, record, generic_ref->bytenr); 1074 + 1075 + kfree(record); 1095 1076 return 0; 1096 1077 1097 1078 free_record:

+2 -2

fs/btrfs/dev-replace.c

··· 489 489 } 490 490 491 491 path->reada = READA_FORWARD; 492 - path->search_commit_root = 1; 493 - path->skip_locking = 1; 492 + path->search_commit_root = true; 493 + path->skip_locking = true; 494 494 495 495 key.objectid = src_dev->devid; 496 496 key.type = BTRFS_DEV_EXTENT_KEY;

+2 -2

fs/btrfs/dir-item.c

··· 9 9 #include "transaction.h" 10 10 #include "accessors.h" 11 11 #include "dir-item.h" 12 + #include "delayed-inode.h" 12 13 13 14 /* 14 15 * insert a name into a directory, doing overflow properly if there is a hash ··· 112 111 int ret = 0; 113 112 int ret2 = 0; 114 113 struct btrfs_root *root = dir->root; 115 - struct btrfs_path *path; 114 + BTRFS_PATH_AUTO_FREE(path); 116 115 struct btrfs_dir_item *dir_item; 117 116 struct extent_buffer *leaf; 118 117 unsigned long name_ptr; ··· 164 163 ret2 = btrfs_insert_delayed_dir_index(trans, name->name, name->len, dir, 165 164 &disk_key, type, index); 166 165 out_free: 167 - btrfs_free_path(path); 168 166 if (ret) 169 167 return ret; 170 168 if (ret2)

+5 -5

fs/btrfs/direct-io.c

··· 10 10 #include "fs.h" 11 11 #include "transaction.h" 12 12 #include "volumes.h" 13 + #include "bio.h" 14 + #include "ordered-data.h" 13 15 14 16 struct btrfs_dio_data { 15 17 ssize_t submitted; ··· 186 184 alloc_hint = btrfs_get_extent_allocation_hint(inode, start, len); 187 185 again: 188 186 ret = btrfs_reserve_extent(root, len, len, fs_info->sectorsize, 189 - 0, alloc_hint, &ins, 1, 1); 187 + 0, alloc_hint, &ins, true, true); 190 188 if (ret == -EAGAIN) { 191 189 ASSERT(btrfs_is_zoned(fs_info)); 192 190 wait_on_bit_io(&inode->root->fs_info->flags, BTRFS_FS_NEED_ZONE_FINISH, ··· 387 385 * to allocate a contiguous array for the checksums. 388 386 */ 389 387 if (!write) 390 - len = min_t(u64, len, fs_info->sectorsize * BTRFS_MAX_BIO_SECTORS); 388 + len = min_t(u64, len, fs_info->sectorsize * BIO_MAX_VECS); 391 389 392 390 lockstart = start; 393 391 lockend = start + len - 1; ··· 715 713 container_of(bbio, struct btrfs_dio_private, bbio); 716 714 struct btrfs_dio_data *dio_data = iter->private; 717 715 718 - btrfs_bio_init(bbio, BTRFS_I(iter->inode)->root->fs_info, 716 + btrfs_bio_init(bbio, BTRFS_I(iter->inode), file_offset, 719 717 btrfs_dio_end_io, bio->bi_private); 720 - bbio->inode = BTRFS_I(iter->inode); 721 - bbio->file_offset = file_offset; 722 718 723 719 dip->file_offset = file_offset; 724 720 dip->bytes = bio->bi_iter.bi_size;

+24 -40

fs/btrfs/disk-io.c

··· 50 50 #include "relocation.h" 51 51 #include "scrub.h" 52 52 #include "super.h" 53 + #include "delayed-inode.h" 53 54 54 55 #define BTRFS_SUPER_FLAG_SUPP (BTRFS_HEADER_FLAG_WRITTEN |\ 55 56 BTRFS_HEADER_FLAG_RELOC |\ ··· 183 182 int mirror_num) 184 183 { 185 184 struct btrfs_fs_info *fs_info = eb->fs_info; 185 + const u32 step = min(fs_info->nodesize, PAGE_SIZE); 186 + const u32 nr_steps = eb->len / step; 187 + phys_addr_t paddrs[BTRFS_MAX_BLOCKSIZE / PAGE_SIZE]; 186 188 int ret = 0; 187 189 188 190 if (sb_rdonly(fs_info->sb)) 189 191 return -EROFS; 190 192 191 - for (int i = 0; i < num_extent_folios(eb); i++) { 193 + for (int i = 0; i < num_extent_pages(eb); i++) { 192 194 struct folio *folio = eb->folios[i]; 193 - u64 start = max_t(u64, eb->start, folio_pos(folio)); 194 - u64 end = min_t(u64, eb->start + eb->len, 195 - folio_pos(folio) + eb->folio_size); 196 - u32 len = end - start; 197 - phys_addr_t paddr = PFN_PHYS(folio_pfn(folio)) + 198 - offset_in_folio(folio, start); 199 195 200 - ret = btrfs_repair_io_failure(fs_info, 0, start, len, start, 201 - paddr, mirror_num); 202 - if (ret) 203 - break; 196 + /* No large folio support yet. */ 197 + ASSERT(folio_order(folio) == 0); 198 + ASSERT(i < nr_steps); 199 + 200 + /* 201 + * For nodesize < page size, there is just one paddr, with some 202 + * offset inside the page. 203 + * 204 + * For nodesize >= page size, it's one or more paddrs, and eb->start 205 + * must be aligned to page boundary. 206 + */ 207 + paddrs[i] = page_to_phys(&folio->page) + offset_in_page(eb->start); 204 208 } 205 209 210 + ret = btrfs_repair_io_failure(fs_info, 0, eb->start, eb->len, eb->start, 211 + paddrs, step, mirror_num); 206 212 return ret; 207 213 } 208 214 ··· 406 398 407 399 if (memcmp(result, header_csum, csum_size) != 0) { 408 400 btrfs_warn_rl(fs_info, 409 - "checksum verify failed on logical %llu mirror %u wanted " CSUM_FMT " found " CSUM_FMT " level %d%s", 401 + "checksum verify failed on logical %llu mirror %u wanted " BTRFS_CSUM_FMT " found " BTRFS_CSUM_FMT " level %d%s", 410 402 eb->start, eb->read_mirror, 411 - CSUM_FMT_VALUE(csum_size, header_csum), 412 - CSUM_FMT_VALUE(csum_size, result), 403 + BTRFS_CSUM_FMT_VALUE(csum_size, header_csum), 404 + BTRFS_CSUM_FMT_VALUE(csum_size, result), 413 405 btrfs_header_level(eb), 414 406 ignore_csum ? ", ignored" : ""); 415 407 if (unlikely(!ignore_csum)) { ··· 652 644 if (!root) 653 645 return NULL; 654 646 655 - memset(&root->root_key, 0, sizeof(root->root_key)); 656 - memset(&root->root_item, 0, sizeof(root->root_item)); 657 - memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); 658 647 root->fs_info = fs_info; 659 648 root->root_key.objectid = objectid; 660 - root->node = NULL; 661 - root->commit_root = NULL; 662 - root->state = 0; 663 649 RB_CLEAR_NODE(&root->rb_node); 664 650 665 - btrfs_set_root_last_trans(root, 0); 666 - root->free_objectid = 0; 667 - root->nr_delalloc_inodes = 0; 668 - root->nr_ordered_extents = 0; 669 651 xa_init(&root->inodes); 670 652 xa_init(&root->delayed_nodes); 671 653 ··· 689 691 refcount_set(&root->refs, 1); 690 692 atomic_set(&root->snapshot_force_cow, 0); 691 693 atomic_set(&root->nr_swapfiles, 0); 692 - btrfs_set_root_log_transid(root, 0); 693 694 root->log_transid_committed = -1; 694 - btrfs_set_root_last_log_commit(root, 0); 695 - root->anon_dev = 0; 696 695 if (!btrfs_is_testing(fs_info)) { 697 696 btrfs_extent_io_tree_init(fs_info, &root->dirty_log_pages, 698 697 IO_TREE_ROOT_DIRTY_LOG_PAGES); ··· 1768 1773 destroy_workqueue(fs_info->endio_workers); 1769 1774 if (fs_info->rmw_workers) 1770 1775 destroy_workqueue(fs_info->rmw_workers); 1771 - if (fs_info->compressed_write_workers) 1772 - destroy_workqueue(fs_info->compressed_write_workers); 1773 1776 btrfs_destroy_workqueue(fs_info->endio_write_workers); 1774 1777 btrfs_destroy_workqueue(fs_info->endio_freespace_worker); 1775 1778 btrfs_destroy_workqueue(fs_info->delayed_workers); ··· 1979 1986 fs_info->endio_write_workers = 1980 1987 btrfs_alloc_workqueue(fs_info, "endio-write", flags, 1981 1988 max_active, 2); 1982 - fs_info->compressed_write_workers = 1983 - alloc_workqueue("btrfs-compressed-write", flags, max_active); 1984 1989 fs_info->endio_freespace_worker = 1985 1990 btrfs_alloc_workqueue(fs_info, "freespace-write", flags, 1986 1991 max_active, 0); ··· 1994 2003 if (!(fs_info->workers && 1995 2004 fs_info->delalloc_workers && fs_info->flush_workers && 1996 2005 fs_info->endio_workers && fs_info->endio_meta_workers && 1997 - fs_info->compressed_write_workers && 1998 2006 fs_info->endio_write_workers && 1999 2007 fs_info->endio_freespace_worker && fs_info->rmw_workers && 2000 2008 fs_info->caching_workers && fs_info->fixup_workers && ··· 3245 3255 PAGE_SIZE, fs_info->sectorsize); 3246 3256 return -EINVAL; 3247 3257 } 3248 - if (fs_info->sectorsize > PAGE_SIZE && btrfs_fs_incompat(fs_info, RAID56)) { 3249 - btrfs_err(fs_info, 3250 - "RAID56 is not supported for page size %lu with sectorsize %u", 3251 - PAGE_SIZE, fs_info->sectorsize); 3252 - return -EINVAL; 3253 - } 3254 3258 3255 3259 /* This can be called by remount, we need to protect the super block. */ 3256 3260 spin_lock(&fs_info->super_lock); ··· 4274 4290 4275 4291 /* 4276 4292 * When finishing a compressed write bio we schedule a work queue item 4277 - * to finish an ordered extent - btrfs_finish_compressed_write_work() 4293 + * to finish an ordered extent - end_bbio_compressed_write() 4278 4294 * calls btrfs_finish_ordered_extent() which in turns does a call to 4279 4295 * btrfs_queue_ordered_fn(), and that queues the ordered extent 4280 4296 * completion either in the endio_write_workers work queue or in the ··· 4282 4298 * below, so before we flush them we must flush this queue for the 4283 4299 * workers of compressed writes. 4284 4300 */ 4285 - flush_workqueue(fs_info->compressed_write_workers); 4301 + flush_workqueue(fs_info->endio_workers); 4286 4302 4287 4303 /* 4288 4304 * After we parked the cleaner kthread, ordered extents may have

+2 -1

fs/btrfs/disk-io.h

··· 9 9 #include <linux/sizes.h> 10 10 #include <linux/compiler_types.h> 11 11 #include "ctree.h" 12 - #include "fs.h" 12 + #include "bio.h" 13 + #include "ordered-data.h" 13 14 14 15 struct block_device; 15 16 struct super_block;

+78 -94

fs/btrfs/extent-tree.c

··· 40 40 #include "orphan.h" 41 41 #include "tree-checker.h" 42 42 #include "raid-stripe-tree.h" 43 + #include "delayed-inode.h" 43 44 44 45 #undef SCRAMBLE_DELAYED_REFS 45 46 ··· 165 164 if (unlikely(num_refs == 0)) { 166 165 ret = -EUCLEAN; 167 166 btrfs_err(fs_info, 168 - "unexpected zero reference count for extent item (%llu %u %llu)", 169 - key.objectid, key.type, key.offset); 167 + "unexpected zero reference count for extent item " BTRFS_KEY_FMT, 168 + BTRFS_KEY_FMT_VALUE(&key)); 170 169 btrfs_abort_transaction(trans, ret); 171 170 return ret; 172 171 } ··· 598 597 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 599 598 } else { 600 599 btrfs_err(trans->fs_info, 601 - "unrecognized backref key (%llu %u %llu)", 602 - key.objectid, key.type, key.offset); 600 + "unrecognized backref key " BTRFS_KEY_FMT, 601 + BTRFS_KEY_FMT_VALUE(&key)); 603 602 btrfs_abort_transaction(trans, -EUCLEAN); 604 603 return -EUCLEAN; 605 604 } ··· 789 788 want = extent_ref_type(parent, owner); 790 789 if (insert) { 791 790 extra_size = btrfs_extent_inline_ref_size(want); 792 - path->search_for_extension = 1; 791 + path->search_for_extension = true; 793 792 } else 794 793 extra_size = -1; 795 794 ··· 955 954 956 955 if (!path->keep_locks) { 957 956 btrfs_release_path(path); 958 - path->keep_locks = 1; 957 + path->keep_locks = true; 959 958 goto again; 960 959 } 961 960 ··· 976 975 *ref_ret = (struct btrfs_extent_inline_ref *)ptr; 977 976 out: 978 977 if (path->keep_locks) { 979 - path->keep_locks = 0; 978 + path->keep_locks = false; 980 979 btrfs_unlock_up_safe(path, 1); 981 980 } 982 981 if (insert) 983 - path->search_for_extension = 0; 982 + path->search_for_extension = false; 984 983 return ret; 985 984 } 986 985 ··· 1765 1764 1766 1765 if (TRANS_ABORTED(trans)) { 1767 1766 if (insert_reserved) { 1768 - btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1); 1767 + btrfs_pin_extent(trans, node->bytenr, node->num_bytes); 1769 1768 free_head_ref_squota_rsv(trans->fs_info, href); 1770 1769 } 1771 1770 return 0; ··· 1784 1783 else 1785 1784 BUG(); 1786 1785 if (ret && insert_reserved) 1787 - btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1); 1786 + btrfs_pin_extent(trans, node->bytenr, node->num_bytes); 1788 1787 if (ret < 0) 1789 1788 btrfs_err(trans->fs_info, 1790 1789 "failed to run delayed ref for logical %llu num_bytes %llu type %u action %u ref_mod %d: %d", ··· 1891 1890 spin_unlock(&delayed_refs->lock); 1892 1891 1893 1892 if (head->must_insert_reserved) { 1894 - btrfs_pin_extent(trans, head->bytenr, head->num_bytes, 1); 1893 + btrfs_pin_extent(trans, head->bytenr, head->num_bytes); 1895 1894 if (head->is_data) { 1896 1895 struct btrfs_root *csum_root; 1897 1896 ··· 2592 2591 } 2593 2592 2594 2593 static int pin_down_extent(struct btrfs_trans_handle *trans, 2595 - struct btrfs_block_group *cache, 2596 - u64 bytenr, u64 num_bytes, int reserved) 2594 + struct btrfs_block_group *bg, 2595 + u64 bytenr, u64 num_bytes, bool reserved) 2597 2596 { 2598 - spin_lock(&cache->space_info->lock); 2599 - spin_lock(&cache->lock); 2600 - cache->pinned += num_bytes; 2601 - btrfs_space_info_update_bytes_pinned(cache->space_info, num_bytes); 2602 - if (reserved) { 2603 - cache->reserved -= num_bytes; 2604 - cache->space_info->bytes_reserved -= num_bytes; 2605 - } 2606 - spin_unlock(&cache->lock); 2607 - spin_unlock(&cache->space_info->lock); 2597 + struct btrfs_space_info *space_info = bg->space_info; 2598 + const u64 reserved_bytes = (reserved ? num_bytes : 0); 2599 + 2600 + spin_lock(&space_info->lock); 2601 + spin_lock(&bg->lock); 2602 + bg->pinned += num_bytes; 2603 + bg->reserved -= reserved_bytes; 2604 + spin_unlock(&bg->lock); 2605 + space_info->bytes_reserved -= reserved_bytes; 2606 + btrfs_space_info_update_bytes_pinned(space_info, num_bytes); 2607 + spin_unlock(&space_info->lock); 2608 2608 2609 2609 btrfs_set_extent_bit(&trans->transaction->pinned_extents, bytenr, 2610 2610 bytenr + num_bytes - 1, EXTENT_DIRTY, NULL); 2611 2611 return 0; 2612 2612 } 2613 2613 2614 - int btrfs_pin_extent(struct btrfs_trans_handle *trans, 2615 - u64 bytenr, u64 num_bytes, int reserved) 2614 + int btrfs_pin_extent(struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes) 2616 2615 { 2617 2616 struct btrfs_block_group *cache; 2618 2617 2619 2618 cache = btrfs_lookup_block_group(trans->fs_info, bytenr); 2620 2619 BUG_ON(!cache); /* Logic error */ 2621 2620 2622 - pin_down_extent(trans, cache, bytenr, num_bytes, reserved); 2621 + pin_down_extent(trans, cache, bytenr, num_bytes, true); 2623 2622 2624 2623 btrfs_put_block_group(cache); 2625 2624 return 0; ··· 2643 2642 if (ret) 2644 2643 goto out; 2645 2644 2646 - pin_down_extent(trans, cache, eb->start, eb->len, 0); 2645 + pin_down_extent(trans, cache, eb->start, eb->len, false); 2647 2646 2648 2647 /* remove us from the free space cache (if we're there at all) */ 2649 2648 ret = btrfs_remove_free_space(cache, eb->start, eb->len); ··· 2748 2747 struct btrfs_free_cluster *cluster = NULL; 2749 2748 u64 total_unpinned = 0; 2750 2749 u64 empty_cluster = 0; 2751 - bool readonly; 2752 - int ret = 0; 2753 2750 2754 2751 while (start <= end) { 2755 2752 u64 len; 2753 + bool readonly; 2756 2754 2757 - readonly = false; 2758 2755 if (!cache || 2759 2756 start >= cache->start + cache->length) { 2760 2757 if (cache) ··· 2761 2762 cache = btrfs_lookup_block_group(fs_info, start); 2762 2763 if (unlikely(cache == NULL)) { 2763 2764 /* Logic error, something removed the block group. */ 2764 - ret = -EUCLEAN; 2765 - goto out; 2765 + return -EUCLEAN; 2766 2766 } 2767 2767 2768 2768 cluster = fetch_cluster_info(fs_info, ··· 2795 2797 2796 2798 spin_lock(&space_info->lock); 2797 2799 spin_lock(&cache->lock); 2800 + readonly = cache->ro; 2798 2801 cache->pinned -= len; 2802 + spin_unlock(&cache->lock); 2803 + 2799 2804 btrfs_space_info_update_bytes_pinned(space_info, -len); 2800 2805 space_info->max_extent_size = 0; 2801 - if (cache->ro) { 2806 + 2807 + if (readonly) { 2802 2808 space_info->bytes_readonly += len; 2803 - readonly = true; 2804 2809 } else if (btrfs_is_zoned(fs_info)) { 2805 2810 /* Need reset before reusing in a zoned block group */ 2806 2811 btrfs_space_info_update_bytes_zone_unusable(space_info, len); 2807 - readonly = true; 2808 - } 2809 - spin_unlock(&cache->lock); 2810 - if (!readonly && return_free_space) 2812 + } else if (return_free_space) { 2811 2813 btrfs_return_free_space(space_info, len); 2814 + } 2812 2815 spin_unlock(&space_info->lock); 2813 2816 } 2814 2817 2815 2818 if (cache) 2816 2819 btrfs_put_block_group(cache); 2817 - out: 2818 - return ret; 2820 + 2821 + return 0; 2819 2822 } 2820 2823 2821 2824 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans) ··· 3085 3086 { 3086 3087 struct btrfs_fs_info *info = trans->fs_info; 3087 3088 struct btrfs_key key; 3088 - struct btrfs_path *path; 3089 + BTRFS_PATH_AUTO_FREE(path); 3089 3090 struct btrfs_root *extent_root; 3090 3091 struct extent_buffer *leaf; 3091 3092 struct btrfs_extent_item *ei; ··· 3120 3121 node->bytenr, refs_to_drop); 3121 3122 ret = -EINVAL; 3122 3123 btrfs_abort_transaction(trans, ret); 3123 - goto out; 3124 + return ret; 3124 3125 } 3125 3126 3126 3127 if (is_data) ··· 3165 3166 abort_and_dump(trans, path, 3166 3167 "invalid iref slot %u, no EXTENT/METADATA_ITEM found but has inline extent ref", 3167 3168 path->slots[0]); 3168 - ret = -EUCLEAN; 3169 - goto out; 3169 + return -EUCLEAN; 3170 3170 } 3171 3171 /* Must be SHARED_* item, remove the backref first */ 3172 3172 ret = remove_extent_backref(trans, extent_root, path, 3173 3173 NULL, refs_to_drop, is_data); 3174 3174 if (unlikely(ret)) { 3175 3175 btrfs_abort_transaction(trans, ret); 3176 - goto out; 3176 + return ret; 3177 3177 } 3178 3178 btrfs_release_path(path); 3179 3179 ··· 3221 3223 } 3222 3224 if (unlikely(ret < 0)) { 3223 3225 btrfs_abort_transaction(trans, ret); 3224 - goto out; 3226 + return ret; 3225 3227 } 3226 3228 extent_slot = path->slots[0]; 3227 3229 } ··· 3230 3232 "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu slot %d", 3231 3233 bytenr, node->parent, node->ref_root, owner_objectid, 3232 3234 owner_offset, path->slots[0]); 3233 - goto out; 3235 + return ret; 3234 3236 } else { 3235 3237 btrfs_abort_transaction(trans, ret); 3236 - goto out; 3238 + return ret; 3237 3239 } 3238 3240 3239 3241 leaf = path->nodes[0]; ··· 3244 3246 "unexpected extent item size, has %u expect >= %zu", 3245 3247 item_size, sizeof(*ei)); 3246 3248 btrfs_abort_transaction(trans, ret); 3247 - goto out; 3249 + return ret; 3248 3250 } 3249 3251 ei = btrfs_item_ptr(leaf, extent_slot, 3250 3252 struct btrfs_extent_item); ··· 3258 3260 key.objectid, key.type, key.offset, 3259 3261 path->slots[0], owner_objectid, item_size, 3260 3262 sizeof(*ei) + sizeof(*bi)); 3261 - ret = -EUCLEAN; 3262 - goto out; 3263 + return -EUCLEAN; 3263 3264 } 3264 3265 bi = (struct btrfs_tree_block_info *)(ei + 1); 3265 3266 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi)); ··· 3269 3272 abort_and_dump(trans, path, 3270 3273 "trying to drop %d refs but we only have %llu for bytenr %llu slot %u", 3271 3274 refs_to_drop, refs, bytenr, path->slots[0]); 3272 - ret = -EUCLEAN; 3273 - goto out; 3275 + return -EUCLEAN; 3274 3276 } 3275 3277 refs -= refs_to_drop; 3276 3278 ··· 3285 3289 abort_and_dump(trans, path, 3286 3290 "invalid iref, got inlined extent ref but no EXTENT/METADATA_ITEM found, slot %u", 3287 3291 path->slots[0]); 3288 - ret = -EUCLEAN; 3289 - goto out; 3292 + return -EUCLEAN; 3290 3293 } 3291 3294 } else { 3292 3295 btrfs_set_extent_refs(leaf, ei, refs); ··· 3295 3300 iref, refs_to_drop, is_data); 3296 3301 if (unlikely(ret)) { 3297 3302 btrfs_abort_transaction(trans, ret); 3298 - goto out; 3303 + return ret; 3299 3304 } 3300 3305 } 3301 3306 } else { ··· 3315 3320 "invalid refs_to_drop, current refs %u refs_to_drop %u slot %u", 3316 3321 extent_data_ref_count(path, iref), 3317 3322 refs_to_drop, path->slots[0]); 3318 - ret = -EUCLEAN; 3319 - goto out; 3323 + return -EUCLEAN; 3320 3324 } 3321 3325 if (iref) { 3322 3326 if (unlikely(path->slots[0] != extent_slot)) { 3323 3327 abort_and_dump(trans, path, 3324 - "invalid iref, extent item key (%llu %u %llu) slot %u doesn't have wanted iref", 3325 - key.objectid, key.type, 3326 - key.offset, path->slots[0]); 3327 - ret = -EUCLEAN; 3328 - goto out; 3328 + "invalid iref, extent item key " BTRFS_KEY_FMT " slot %u doesn't have wanted iref", 3329 + BTRFS_KEY_FMT_VALUE(&key), 3330 + path->slots[0]); 3331 + return -EUCLEAN; 3329 3332 } 3330 3333 } else { 3331 3334 /* ··· 3336 3343 abort_and_dump(trans, path, 3337 3344 "invalid SHARED_* item slot %u, previous item is not EXTENT/METADATA_ITEM", 3338 3345 path->slots[0]); 3339 - ret = -EUCLEAN; 3340 - goto out; 3346 + return -EUCLEAN; 3341 3347 } 3342 3348 path->slots[0] = extent_slot; 3343 3349 num_to_del = 2; ··· 3357 3365 num_to_del); 3358 3366 if (unlikely(ret)) { 3359 3367 btrfs_abort_transaction(trans, ret); 3360 - goto out; 3368 + return ret; 3361 3369 } 3362 3370 btrfs_release_path(path); 3363 3371 ··· 3365 3373 } 3366 3374 btrfs_release_path(path); 3367 3375 3368 - out: 3369 - btrfs_free_path(path); 3370 3376 return ret; 3371 3377 } 3372 3378 ··· 3473 3483 bg = btrfs_lookup_block_group(fs_info, buf->start); 3474 3484 3475 3485 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { 3476 - pin_down_extent(trans, bg, buf->start, buf->len, 1); 3486 + pin_down_extent(trans, bg, buf->start, buf->len, true); 3477 3487 btrfs_put_block_group(bg); 3478 3488 goto out; 3479 3489 } ··· 3497 3507 3498 3508 if (test_bit(BTRFS_FS_TREE_MOD_LOG_USERS, &fs_info->flags) 3499 3509 || btrfs_is_zoned(fs_info)) { 3500 - pin_down_extent(trans, bg, buf->start, buf->len, 1); 3510 + pin_down_extent(trans, bg, buf->start, buf->len, true); 3501 3511 btrfs_put_block_group(bg); 3502 3512 goto out; 3503 3513 } ··· 3527 3537 * tree, just update pinning info and exit early. 3528 3538 */ 3529 3539 if (ref->ref_root == BTRFS_TREE_LOG_OBJECTID) { 3530 - btrfs_pin_extent(trans, ref->bytenr, ref->num_bytes, 1); 3540 + btrfs_pin_extent(trans, ref->bytenr, ref->num_bytes); 3531 3541 ret = 0; 3532 3542 } else if (ref->type == BTRFS_REF_METADATA) { 3533 3543 ret = btrfs_add_delayed_tree_ref(trans, ref, NULL); ··· 3578 3588 }; 3579 3589 3580 3590 static inline void 3581 - btrfs_lock_block_group(struct btrfs_block_group *cache, 3582 - int delalloc) 3591 + btrfs_lock_block_group(struct btrfs_block_group *cache, bool delalloc) 3583 3592 { 3584 3593 if (delalloc) 3585 3594 down_read(&cache->data_rwsem); 3586 3595 } 3587 3596 3588 3597 static inline void btrfs_grab_block_group(struct btrfs_block_group *cache, 3589 - int delalloc) 3598 + bool delalloc) 3590 3599 { 3591 3600 btrfs_get_block_group(cache); 3592 3601 if (delalloc) ··· 3595 3606 static struct btrfs_block_group *btrfs_lock_cluster( 3596 3607 struct btrfs_block_group *block_group, 3597 3608 struct btrfs_free_cluster *cluster, 3598 - int delalloc) 3609 + bool delalloc) 3599 3610 __acquires(&cluster->refill_lock) 3600 3611 { 3601 3612 struct btrfs_block_group *used_bg = NULL; ··· 3632 3643 } 3633 3644 3634 3645 static inline void 3635 - btrfs_release_block_group(struct btrfs_block_group *cache, 3636 - int delalloc) 3646 + btrfs_release_block_group(struct btrfs_block_group *cache, bool delalloc) 3637 3647 { 3638 3648 if (delalloc) 3639 3649 up_read(&cache->data_rwsem); ··· 4022 4034 4023 4035 static void release_block_group(struct btrfs_block_group *block_group, 4024 4036 struct find_free_extent_ctl *ffe_ctl, 4025 - int delalloc) 4037 + bool delalloc) 4026 4038 { 4027 4039 switch (ffe_ctl->policy) { 4028 4040 case BTRFS_EXTENT_ALLOC_CLUSTERED: ··· 4678 4690 int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, 4679 4691 u64 num_bytes, u64 min_alloc_size, 4680 4692 u64 empty_size, u64 hint_byte, 4681 - struct btrfs_key *ins, int is_data, int delalloc) 4693 + struct btrfs_key *ins, bool is_data, bool delalloc) 4682 4694 { 4683 4695 struct btrfs_fs_info *fs_info = root->fs_info; 4684 4696 struct find_free_extent_ctl ffe_ctl = {}; ··· 4723 4735 "allocation failed flags %llu, wanted %llu tree-log %d, relocation: %d", 4724 4736 flags, num_bytes, for_treelog, for_data_reloc); 4725 4737 if (sinfo) 4726 - btrfs_dump_space_info(fs_info, sinfo, 4727 - num_bytes, 1); 4738 + btrfs_dump_space_info(sinfo, num_bytes, 1); 4728 4739 } 4729 4740 } 4730 4741 ··· 4763 4776 return -ENOSPC; 4764 4777 } 4765 4778 4766 - ret = pin_down_extent(trans, cache, eb->start, eb->len, 1); 4779 + ret = pin_down_extent(trans, cache, eb->start, eb->len, true); 4767 4780 btrfs_put_block_group(cache); 4768 4781 return ret; 4769 4782 } ··· 5009 5022 ret = alloc_reserved_file_extent(trans, 0, root_objectid, 0, owner, 5010 5023 offset, ins, 1, root_objectid); 5011 5024 if (ret) 5012 - btrfs_pin_extent(trans, ins->objectid, ins->offset, 1); 5025 + btrfs_pin_extent(trans, ins->objectid, ins->offset); 5013 5026 ret = btrfs_record_squota_delta(fs_info, &delta); 5014 5027 btrfs_put_block_group(block_group); 5015 5028 return ret; ··· 5155 5168 return ERR_CAST(block_rsv); 5156 5169 5157 5170 ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize, 5158 - empty_size, hint, &ins, 0, 0); 5171 + empty_size, hint, &ins, false, false); 5159 5172 if (ret) 5160 5173 goto out_unuse; 5161 5174 ··· 6048 6061 struct btrfs_trans_handle *trans; 6049 6062 struct btrfs_root *tree_root = fs_info->tree_root; 6050 6063 struct btrfs_root_item *root_item = &root->root_item; 6051 - struct walk_control *wc; 6064 + struct walk_control AUTO_KFREE(wc); 6052 6065 struct btrfs_key key; 6053 6066 const u64 rootid = btrfs_root_id(root); 6054 6067 int ret = 0; ··· 6066 6079 6067 6080 wc = kzalloc(sizeof(*wc), GFP_NOFS); 6068 6081 if (!wc) { 6069 - btrfs_free_path(path); 6070 6082 ret = -ENOMEM; 6071 - goto out; 6083 + goto out_free; 6072 6084 } 6073 6085 6074 6086 /* ··· 6277 6291 6278 6292 btrfs_end_transaction_throttle(trans); 6279 6293 out_free: 6280 - kfree(wc); 6281 6294 btrfs_free_path(path); 6282 6295 out: 6283 6296 if (!ret && root_dropped) { ··· 6319 6334 { 6320 6335 struct btrfs_fs_info *fs_info = root->fs_info; 6321 6336 BTRFS_PATH_AUTO_FREE(path); 6322 - struct walk_control *wc; 6337 + struct walk_control AUTO_KFREE(wc); 6323 6338 int level; 6324 6339 int parent_level; 6325 6340 int ret = 0; ··· 6358 6373 while (1) { 6359 6374 ret = walk_down_tree(trans, root, path, wc); 6360 6375 if (ret < 0) 6361 - break; 6376 + return ret; 6362 6377 6363 6378 ret = walk_up_tree(trans, root, path, wc, parent_level); 6364 6379 if (ret) { 6365 - if (ret > 0) 6366 - ret = 0; 6380 + if (ret < 0) 6381 + return ret; 6367 6382 break; 6368 6383 } 6369 6384 } 6370 6385 6371 - kfree(wc); 6372 - return ret; 6386 + return 0; 6373 6387 } 6374 6388 6375 6389 /*

+13 -14

fs/btrfs/extent-tree.h

··· 30 30 u64 min_alloc_size; 31 31 u64 empty_size; 32 32 u64 flags; 33 - int delalloc; 34 33 35 34 /* Where to start the search inside the bg */ 36 35 u64 search_start; ··· 39 40 struct btrfs_free_cluster *last_ptr; 40 41 bool use_cluster; 41 42 43 + bool delalloc; 42 44 bool have_caching_bg; 43 45 bool orig_have_caching_bg; 44 46 ··· 49 49 /* Allocation is called for data relocation */ 50 50 bool for_data_reloc; 51 51 52 + /* 53 + * Set to true if we're retrying the allocation on this block group 54 + * after waiting for caching progress, this is so that we retry only 55 + * once before moving on to another block group. 56 + */ 57 + bool retry_uncached; 58 + 59 + /* Whether or not the allocator is currently following a hint. */ 60 + bool hinted; 61 + 52 62 /* RAID index, converted from flags */ 53 63 int index; 54 64 ··· 66 56 * Current loop number, check find_free_extent_update_loop() for details 67 57 */ 68 58 int loop; 69 - 70 - /* 71 - * Set to true if we're retrying the allocation on this block group 72 - * after waiting for caching progress, this is so that we retry only 73 - * once before moving on to another block group. 74 - */ 75 - bool retry_uncached; 76 59 77 60 /* If current block group is cached */ 78 61 int cached; ··· 84 81 85 82 /* Allocation policy */ 86 83 enum btrfs_extent_allocation_policy policy; 87 - 88 - /* Whether or not the allocator is currently following a hint */ 89 - bool hinted; 90 84 91 85 /* Size class of block groups to prefer in early loops */ 92 86 enum btrfs_block_group_size_class size_class; ··· 110 110 struct btrfs_fs_info *fs_info, u64 bytenr, 111 111 u64 offset, int metadata, u64 *refs, u64 *flags, 112 112 u64 *owner_root); 113 - int btrfs_pin_extent(struct btrfs_trans_handle *trans, u64 bytenr, u64 num, 114 - int reserved); 113 + int btrfs_pin_extent(struct btrfs_trans_handle *trans, u64 bytenr, u64 num); 115 114 int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, 116 115 const struct extent_buffer *eb); 117 116 int btrfs_exclude_logged_extents(struct extent_buffer *eb); ··· 137 138 struct btrfs_key *ins); 138 139 int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, u64 num_bytes, 139 140 u64 min_alloc_size, u64 empty_size, u64 hint_byte, 140 - struct btrfs_key *ins, int is_data, int delalloc); 141 + struct btrfs_key *ins, bool is_data, bool delalloc); 141 142 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 142 143 struct extent_buffer *buf, bool full_backref); 143 144 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,

+35 -22

fs/btrfs/extent_io.c

··· 374 374 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; 375 375 const u64 orig_start = *start; 376 376 const u64 orig_end = *end; 377 - /* The sanity tests may not set a valid fs_info. */ 378 - u64 max_bytes = fs_info ? fs_info->max_extent_size : BTRFS_MAX_EXTENT_SIZE; 377 + u64 max_bytes = fs_info->max_extent_size; 379 378 u64 delalloc_start; 380 379 u64 delalloc_end; 381 380 bool found; ··· 517 518 */ 518 519 static void end_bbio_data_write(struct btrfs_bio *bbio) 519 520 { 520 - struct btrfs_fs_info *fs_info = bbio->fs_info; 521 + struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info; 521 522 struct bio *bio = &bbio->bio; 522 523 int error = blk_status_to_errno(bio->bi_status); 523 524 struct folio_iter fi; ··· 573 574 */ 574 575 static void end_bbio_data_read(struct btrfs_bio *bbio) 575 576 { 576 - struct btrfs_fs_info *fs_info = bbio->fs_info; 577 + struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info; 577 578 struct bio *bio = &bbio->bio; 578 579 struct folio_iter fi; 579 580 ··· 738 739 struct btrfs_fs_info *fs_info = inode->root->fs_info; 739 740 struct btrfs_bio *bbio; 740 741 741 - bbio = btrfs_bio_alloc(BIO_MAX_VECS, bio_ctrl->opf, fs_info, 742 - bio_ctrl->end_io_func, NULL); 742 + bbio = btrfs_bio_alloc(BIO_MAX_VECS, bio_ctrl->opf, inode, 743 + file_offset, bio_ctrl->end_io_func, NULL); 743 744 bbio->bio.bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT; 744 745 bbio->bio.bi_write_hint = inode->vfs_inode.i_write_hint; 745 - bbio->inode = inode; 746 - bbio->file_offset = file_offset; 747 746 bio_ctrl->bbio = bbio; 748 747 bio_ctrl->len_to_oe_boundary = U32_MAX; 749 748 bio_ctrl->next_file_offset = file_offset; ··· 1688 1691 unsigned long range_bitmap = 0; 1689 1692 bool submitted_io = false; 1690 1693 int found_error = 0; 1694 + const u64 end = start + len; 1691 1695 const u64 folio_start = folio_pos(folio); 1696 + const u64 folio_end = folio_start + folio_size(folio); 1692 1697 const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio); 1693 1698 u64 cur; 1694 1699 int bit; 1695 1700 int ret = 0; 1696 1701 1697 - ASSERT(start >= folio_start && 1698 - start + len <= folio_start + folio_size(folio)); 1702 + ASSERT(start >= folio_start, "start=%llu folio_start=%llu", start, folio_start); 1703 + ASSERT(end <= folio_end, "start=%llu len=%u folio_start=%llu folio_size=%zu", 1704 + start, len, folio_start, folio_size(folio)); 1699 1705 1700 1706 ret = btrfs_writepage_cow_fixup(folio); 1701 1707 if (ret == -EAGAIN) { ··· 1714 1714 return ret; 1715 1715 } 1716 1716 1717 - for (cur = start; cur < start + len; cur += fs_info->sectorsize) 1717 + for (cur = start; cur < end; cur += fs_info->sectorsize) 1718 1718 set_bit((cur - folio_start) >> fs_info->sectorsize_bits, &range_bitmap); 1719 1719 bitmap_and(&bio_ctrl->submit_bitmap, &bio_ctrl->submit_bitmap, &range_bitmap, 1720 1720 blocks_per_folio); ··· 1725 1725 cur = folio_pos(folio) + (bit << fs_info->sectorsize_bits); 1726 1726 1727 1727 if (cur >= i_size) { 1728 + struct btrfs_ordered_extent *ordered; 1729 + 1730 + ordered = btrfs_lookup_first_ordered_range(inode, cur, 1731 + folio_end - cur); 1732 + /* 1733 + * We have just run delalloc before getting here, so 1734 + * there must be an ordered extent. 1735 + */ 1736 + ASSERT(ordered != NULL); 1737 + spin_lock(&inode->ordered_tree_lock); 1738 + set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags); 1739 + ordered->truncated_len = min(ordered->truncated_len, 1740 + cur - ordered->file_offset); 1741 + spin_unlock(&inode->ordered_tree_lock); 1742 + btrfs_put_ordered_extent(ordered); 1743 + 1728 1744 btrfs_mark_ordered_io_finished(inode, folio, cur, 1729 - start + len - cur, true); 1745 + end - cur, true); 1730 1746 /* 1731 1747 * This range is beyond i_size, thus we don't need to 1732 1748 * bother writing back. ··· 1751 1735 * writeback the sectors with subpage dirty bits, 1752 1736 * causing writeback without ordered extent. 1753 1737 */ 1754 - btrfs_folio_clear_dirty(fs_info, folio, cur, 1755 - start + len - cur); 1738 + btrfs_folio_clear_dirty(fs_info, folio, cur, end - cur); 1756 1739 break; 1757 1740 } 1758 1741 ret = submit_one_sector(inode, folio, cur, bio_ctrl, i_size); ··· 1871 1856 folio_size(folio), bio_ctrl, i_size); 1872 1857 if (ret == 1) 1873 1858 return 0; 1874 - if (ret < 0) 1859 + if (unlikely(ret < 0)) 1875 1860 btrfs_err_rl(fs_info, 1876 1861 "failed to submit blocks, root=%lld inode=%llu folio=%llu submit_bitmap=%*pbl: %d", 1877 1862 btrfs_root_id(inode->root), btrfs_ino(inode), ··· 2221 2206 2222 2207 bbio = btrfs_bio_alloc(INLINE_EXTENT_BUFFER_PAGES, 2223 2208 REQ_OP_WRITE | REQ_META | wbc_to_write_flags(wbc), 2224 - eb->fs_info, end_bbio_meta_write, eb); 2209 + BTRFS_I(fs_info->btree_inode), eb->start, 2210 + end_bbio_meta_write, eb); 2225 2211 bbio->bio.bi_iter.bi_sector = eb->start >> SECTOR_SHIFT; 2226 2212 bio_set_dev(&bbio->bio, fs_info->fs_devices->latest_dev->bdev); 2227 2213 wbc_init_bio(wbc, &bbio->bio); 2228 - bbio->inode = BTRFS_I(eb->fs_info->btree_inode); 2229 - bbio->file_offset = eb->start; 2230 2214 for (int i = 0; i < num_extent_folios(eb); i++) { 2231 2215 struct folio *folio = eb->folios[i]; 2232 2216 u64 range_start = max_t(u64, eb->start, folio_pos(folio)); ··· 3837 3823 int read_extent_buffer_pages_nowait(struct extent_buffer *eb, int mirror_num, 3838 3824 const struct btrfs_tree_parent_check *check) 3839 3825 { 3826 + struct btrfs_fs_info *fs_info = eb->fs_info; 3840 3827 struct btrfs_bio *bbio; 3841 3828 3842 3829 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) ··· 3871 3856 refcount_inc(&eb->refs); 3872 3857 3873 3858 bbio = btrfs_bio_alloc(INLINE_EXTENT_BUFFER_PAGES, 3874 - REQ_OP_READ | REQ_META, eb->fs_info, 3875 - end_bbio_meta_read, eb); 3859 + REQ_OP_READ | REQ_META, BTRFS_I(fs_info->btree_inode), 3860 + eb->start, end_bbio_meta_read, eb); 3876 3861 bbio->bio.bi_iter.bi_sector = eb->start >> SECTOR_SHIFT; 3877 - bbio->inode = BTRFS_I(eb->fs_info->btree_inode); 3878 - bbio->file_offset = eb->start; 3879 3862 memcpy(&bbio->parent_check, check, sizeof(*check)); 3880 3863 for (int i = 0; i < num_extent_folios(eb); i++) { 3881 3864 struct folio *folio = eb->folios[i];

-1

fs/btrfs/extent_io.h

··· 12 12 #include <linux/rwsem.h> 13 13 #include <linux/list.h> 14 14 #include <linux/slab.h> 15 - #include "compression.h" 16 15 #include "messages.h" 17 16 #include "ulist.h" 18 17 #include "misc.h"

+1 -2

fs/btrfs/extent_map.h

··· 8 8 #include <linux/rbtree.h> 9 9 #include <linux/list.h> 10 10 #include <linux/refcount.h> 11 - #include "misc.h" 12 - #include "compression.h" 11 + #include "fs.h" 13 12 14 13 struct btrfs_inode; 15 14 struct btrfs_fs_info;

+62 -27

fs/btrfs/file-item.c

··· 18 18 #include "fs.h" 19 19 #include "accessors.h" 20 20 #include "file-item.h" 21 + #include "volumes.h" 21 22 22 23 #define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \ 23 24 sizeof(struct btrfs_item) * 2) / \ ··· 373 372 return -ENOMEM; 374 373 375 374 if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) { 376 - bbio->csum = kmalloc_array(nblocks, csum_size, GFP_NOFS); 375 + bbio->csum = kvcalloc(nblocks, csum_size, GFP_NOFS); 377 376 if (!bbio->csum) 378 377 return -ENOMEM; 379 378 } else { ··· 394 393 * between reading the free space cache and updating the csum tree. 395 394 */ 396 395 if (btrfs_is_free_space_inode(inode)) { 397 - path->search_commit_root = 1; 398 - path->skip_locking = 1; 396 + path->search_commit_root = true; 397 + path->skip_locking = true; 399 398 } 400 399 401 400 /* ··· 423 422 * from across transactions. 424 423 */ 425 424 if (bbio->csum_search_commit_root) { 426 - path->search_commit_root = 1; 427 - path->skip_locking = 1; 425 + path->search_commit_root = true; 426 + path->skip_locking = true; 428 427 down_read(&fs_info->commit_root_sem); 429 428 } 430 429 ··· 439 438 if (count < 0) { 440 439 ret = count; 441 440 if (bbio->csum != bbio->csum_inline) 442 - kfree(bbio->csum); 441 + kvfree(bbio->csum); 443 442 bbio->csum = NULL; 444 443 break; 445 444 } ··· 765 764 return ret; 766 765 } 767 766 768 - /* 769 - * Calculate checksums of the data contained inside a bio. 770 - */ 771 - int btrfs_csum_one_bio(struct btrfs_bio *bbio) 767 + static void csum_one_bio(struct btrfs_bio *bbio, struct bvec_iter *src) 772 768 { 773 - struct btrfs_ordered_extent *ordered = bbio->ordered; 774 769 struct btrfs_inode *inode = bbio->inode; 775 770 struct btrfs_fs_info *fs_info = inode->root->fs_info; 776 771 SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); 777 772 struct bio *bio = &bbio->bio; 778 - struct btrfs_ordered_sum *sums; 779 - struct bvec_iter iter = bio->bi_iter; 773 + struct btrfs_ordered_sum *sums = bbio->sums; 774 + struct bvec_iter iter = *src; 780 775 phys_addr_t paddr; 781 776 const u32 blocksize = fs_info->sectorsize; 782 - int index; 777 + const u32 step = min(blocksize, PAGE_SIZE); 778 + const u32 nr_steps = blocksize / step; 779 + phys_addr_t paddrs[BTRFS_MAX_BLOCKSIZE / PAGE_SIZE]; 780 + u32 offset = 0; 781 + int index = 0; 782 + 783 + shash->tfm = fs_info->csum_shash; 784 + 785 + btrfs_bio_for_each_block(paddr, bio, &iter, step) { 786 + paddrs[(offset / step) % nr_steps] = paddr; 787 + offset += step; 788 + 789 + if (IS_ALIGNED(offset, blocksize)) { 790 + btrfs_calculate_block_csum_pages(fs_info, paddrs, sums->sums + index); 791 + index += fs_info->csum_size; 792 + } 793 + } 794 + } 795 + 796 + static void csum_one_bio_work(struct work_struct *work) 797 + { 798 + struct btrfs_bio *bbio = container_of(work, struct btrfs_bio, csum_work); 799 + 800 + ASSERT(btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE); 801 + ASSERT(bbio->async_csum == true); 802 + csum_one_bio(bbio, &bbio->csum_saved_iter); 803 + complete(&bbio->csum_done); 804 + } 805 + 806 + /* 807 + * Calculate checksums of the data contained inside a bio. 808 + */ 809 + int btrfs_csum_one_bio(struct btrfs_bio *bbio, bool async) 810 + { 811 + struct btrfs_ordered_extent *ordered = bbio->ordered; 812 + struct btrfs_inode *inode = bbio->inode; 813 + struct btrfs_fs_info *fs_info = inode->root->fs_info; 814 + struct bio *bio = &bbio->bio; 815 + struct btrfs_ordered_sum *sums; 783 816 unsigned nofs_flag; 784 817 785 818 nofs_flag = memalloc_nofs_save(); ··· 824 789 if (!sums) 825 790 return -ENOMEM; 826 791 792 + sums->logical = bbio->orig_logical; 827 793 sums->len = bio->bi_iter.bi_size; 828 794 INIT_LIST_HEAD(&sums->list); 829 - 830 - sums->logical = bio->bi_iter.bi_sector << SECTOR_SHIFT; 831 - index = 0; 832 - 833 - shash->tfm = fs_info->csum_shash; 834 - 835 - btrfs_bio_for_each_block(paddr, bio, &iter, blocksize) { 836 - btrfs_calculate_block_csum(fs_info, paddr, sums->sums + index); 837 - index += fs_info->csum_size; 838 - } 839 - 840 795 bbio->sums = sums; 841 796 btrfs_add_ordered_sum(ordered, sums); 797 + 798 + if (!async) { 799 + csum_one_bio(bbio, &bbio->bio.bi_iter); 800 + return 0; 801 + } 802 + init_completion(&bbio->csum_done); 803 + bbio->async_csum = true; 804 + bbio->csum_saved_iter = bbio->bio.bi_iter; 805 + INIT_WORK(&bbio->csum_work, csum_one_bio_work); 806 + schedule_work(&bbio->csum_work); 842 807 return 0; 843 808 } 844 809 ··· 1177 1142 } 1178 1143 1179 1144 btrfs_release_path(path); 1180 - path->search_for_extension = 1; 1145 + path->search_for_extension = true; 1181 1146 ret = btrfs_search_slot(trans, root, &file_key, path, 1182 1147 csum_size, 1); 1183 - path->search_for_extension = 0; 1148 + path->search_for_extension = false; 1184 1149 if (ret < 0) 1185 1150 goto out; 1186 1151

+2 -2

fs/btrfs/file-item.h

··· 7 7 #include <linux/list.h> 8 8 #include <uapi/linux/btrfs_tree.h> 9 9 #include "ctree.h" 10 - #include "accessors.h" 10 + #include "ordered-data.h" 11 11 12 12 struct extent_map; 13 13 struct btrfs_file_extent_item; ··· 64 64 int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, 65 65 struct btrfs_root *root, 66 66 struct btrfs_ordered_sum *sums); 67 - int btrfs_csum_one_bio(struct btrfs_bio *bbio); 67 + int btrfs_csum_one_bio(struct btrfs_bio *bbio, bool async); 68 68 int btrfs_alloc_dummy_sum(struct btrfs_bio *bbio); 69 69 int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, 70 70 struct list_head *list, int search_commit,

+27 -6

fs/btrfs/file.c

··· 75 75 u64 num_bytes; 76 76 u64 start_pos; 77 77 u64 end_of_last_block; 78 - u64 end_pos = pos + write_bytes; 78 + const u64 end_pos = pos + write_bytes; 79 79 loff_t isize = i_size_read(&inode->vfs_inode); 80 80 unsigned int extra_bits = 0; 81 81 ··· 86 86 extra_bits |= EXTENT_NORESERVE; 87 87 88 88 start_pos = round_down(pos, fs_info->sectorsize); 89 - num_bytes = round_up(write_bytes + pos - start_pos, 90 - fs_info->sectorsize); 89 + num_bytes = round_up(end_pos - start_pos, fs_info->sectorsize); 91 90 ASSERT(num_bytes <= U32_MAX); 92 - ASSERT(folio_pos(folio) <= pos && 93 - folio_next_pos(folio) >= pos + write_bytes); 91 + ASSERT(folio_pos(folio) <= pos && folio_next_pos(folio) >= end_pos); 94 92 95 93 end_of_last_block = start_pos + num_bytes - 1; 96 94 ··· 1440 1442 struct btrfs_inode *inode = BTRFS_I(file_inode(file)); 1441 1443 ssize_t num_written, num_sync; 1442 1444 1445 + if (unlikely(btrfs_is_shutdown(inode->root->fs_info))) 1446 + return -EIO; 1443 1447 /* 1444 1448 * If the fs flips readonly due to some impossible error, although we 1445 1449 * have opened a file as writable, we have to stop this write operation ··· 2044 2044 struct file *filp = desc->file; 2045 2045 struct address_space *mapping = filp->f_mapping; 2046 2046 2047 + if (unlikely(btrfs_is_shutdown(inode_to_fs_info(file_inode(filp))))) 2048 + return -EIO; 2047 2049 if (!mapping->a_ops->read_folio) 2048 2050 return -ENOEXEC; 2049 2051 ··· 3115 3113 int blocksize = BTRFS_I(inode)->root->fs_info->sectorsize; 3116 3114 int ret; 3117 3115 3116 + if (unlikely(btrfs_is_shutdown(inode_to_fs_info(inode)))) 3117 + return -EIO; 3118 + 3118 3119 /* Do not allow fallocate in ZONED mode */ 3119 3120 if (btrfs_is_zoned(inode_to_fs_info(inode))) 3120 3121 return -EOPNOTSUPP; ··· 3809 3804 { 3810 3805 int ret; 3811 3806 3807 + if (unlikely(btrfs_is_shutdown(inode_to_fs_info(inode)))) 3808 + return -EIO; 3809 + 3812 3810 filp->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT; 3813 3811 3814 3812 ret = fsverity_file_open(inode, filp); ··· 3824 3816 { 3825 3817 ssize_t ret = 0; 3826 3818 3819 + if (unlikely(btrfs_is_shutdown(inode_to_fs_info(file_inode(iocb->ki_filp))))) 3820 + return -EIO; 3821 + 3827 3822 if (iocb->ki_flags & IOCB_DIRECT) { 3828 3823 ret = btrfs_direct_read(iocb, to); 3829 3824 if (ret < 0 || !iov_iter_count(to) || ··· 3837 3826 return filemap_read(iocb, to, ret); 3838 3827 } 3839 3828 3829 + static ssize_t btrfs_file_splice_read(struct file *in, loff_t *ppos, 3830 + struct pipe_inode_info *pipe, 3831 + size_t len, unsigned int flags) 3832 + { 3833 + if (unlikely(btrfs_is_shutdown(inode_to_fs_info(file_inode(in))))) 3834 + return -EIO; 3835 + 3836 + return filemap_splice_read(in, ppos, pipe, len, flags); 3837 + } 3838 + 3840 3839 const struct file_operations btrfs_file_operations = { 3841 3840 .llseek = btrfs_file_llseek, 3842 3841 .read_iter = btrfs_file_read_iter, 3843 - .splice_read = filemap_splice_read, 3842 + .splice_read = btrfs_file_splice_read, 3844 3843 .write_iter = btrfs_file_write_iter, 3845 3844 .splice_write = iter_file_splice_write, 3846 3845 .mmap_prepare = btrfs_file_mmap_prepare,

+14 -10

fs/btrfs/free-space-cache.c

··· 968 968 path = btrfs_alloc_path(); 969 969 if (!path) 970 970 return 0; 971 - path->search_commit_root = 1; 972 - path->skip_locking = 1; 971 + path->search_commit_root = true; 972 + path->skip_locking = true; 973 973 974 974 /* 975 975 * We must pass a path with search_commit_root set to btrfs_iget in ··· 3656 3656 struct btrfs_fs_info *fs_info = block_group->fs_info; 3657 3657 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3658 3658 int ret; 3659 - int update = 0; 3659 + bool bg_ro; 3660 3660 const u64 end = start + bytes; 3661 3661 const u64 reserved_end = reserved_start + reserved_bytes; 3662 3662 enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED; ··· 3664 3664 3665 3665 spin_lock(&space_info->lock); 3666 3666 spin_lock(&block_group->lock); 3667 - if (!block_group->ro) { 3667 + bg_ro = block_group->ro; 3668 + if (!bg_ro) { 3668 3669 block_group->reserved += reserved_bytes; 3670 + spin_unlock(&block_group->lock); 3669 3671 space_info->bytes_reserved += reserved_bytes; 3670 - update = 1; 3672 + } else { 3673 + spin_unlock(&block_group->lock); 3671 3674 } 3672 - spin_unlock(&block_group->lock); 3673 3675 spin_unlock(&space_info->lock); 3674 3676 3675 3677 ret = btrfs_discard_extent(fs_info, start, bytes, &trimmed); ··· 3692 3690 list_del(&trim_entry->list); 3693 3691 mutex_unlock(&ctl->cache_writeout_mutex); 3694 3692 3695 - if (update) { 3693 + if (!bg_ro) { 3696 3694 spin_lock(&space_info->lock); 3697 3695 spin_lock(&block_group->lock); 3698 - if (block_group->ro) 3699 - space_info->bytes_readonly += reserved_bytes; 3696 + bg_ro = block_group->ro; 3700 3697 block_group->reserved -= reserved_bytes; 3701 - space_info->bytes_reserved -= reserved_bytes; 3702 3698 spin_unlock(&block_group->lock); 3699 + 3700 + space_info->bytes_reserved -= reserved_bytes; 3701 + if (bg_ro) 3702 + space_info->bytes_readonly += reserved_bytes; 3703 3703 spin_unlock(&space_info->lock); 3704 3704 } 3705 3705

+22 -33

fs/btrfs/free-space-tree.c

··· 165 165 166 166 /* 167 167 * GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse 168 - * into the filesystem as the free space bitmap can be modified in the 169 - * critical section of a transaction commit. 170 - * 171 - * TODO: push the memalloc_nofs_{save,restore}() to the caller where we 172 - * know that recursion is unsafe. 168 + * into the filesystem here. All callers hold a transaction handle 169 + * open, so if a GFP_KERNEL allocation recurses into the filesystem 170 + * and triggers a transaction commit, we would deadlock. 173 171 */ 174 172 nofs_flag = memalloc_nofs_save(); 175 173 ret = kvzalloc(bitmap_rounded_size, GFP_KERNEL); ··· 216 218 217 219 bitmap_size = free_space_bitmap_size(fs_info, block_group->length); 218 220 bitmap = alloc_bitmap(bitmap_size); 219 - if (unlikely(!bitmap)) { 220 - ret = -ENOMEM; 221 - btrfs_abort_transaction(trans, ret); 222 - goto out; 223 - } 221 + if (unlikely(!bitmap)) 222 + return 0; 224 223 225 224 start = block_group->start; 226 225 end = block_group->start + block_group->length; ··· 356 361 357 362 bitmap_size = free_space_bitmap_size(fs_info, block_group->length); 358 363 bitmap = alloc_bitmap(bitmap_size); 359 - if (unlikely(!bitmap)) { 360 - ret = -ENOMEM; 361 - btrfs_abort_transaction(trans, ret); 362 - goto out; 363 - } 364 + if (unlikely(!bitmap)) 365 + return 0; 364 366 365 367 start = block_group->start; 366 368 end = block_group->start + block_group->length; ··· 833 841 u64 start, u64 size) 834 842 { 835 843 struct btrfs_block_group *block_group; 836 - struct btrfs_path *path; 844 + BTRFS_PATH_AUTO_FREE(path); 837 845 int ret; 838 846 839 847 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE)) ··· 843 851 if (unlikely(!path)) { 844 852 ret = -ENOMEM; 845 853 btrfs_abort_transaction(trans, ret); 846 - goto out; 854 + return ret; 847 855 } 848 856 849 857 block_group = btrfs_lookup_block_group(trans->fs_info, start); ··· 851 859 DEBUG_WARN("no block group found for start=%llu", start); 852 860 ret = -ENOENT; 853 861 btrfs_abort_transaction(trans, ret); 854 - goto out; 862 + return ret; 855 863 } 856 864 857 865 mutex_lock(&block_group->free_space_lock); ··· 861 869 btrfs_abort_transaction(trans, ret); 862 870 863 871 btrfs_put_block_group(block_group); 864 - out: 865 - btrfs_free_path(path); 872 + 866 873 return ret; 867 874 } 868 875 ··· 1014 1023 u64 start, u64 size) 1015 1024 { 1016 1025 struct btrfs_block_group *block_group; 1017 - struct btrfs_path *path; 1026 + BTRFS_PATH_AUTO_FREE(path); 1018 1027 int ret; 1019 1028 1020 1029 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE)) ··· 1024 1033 if (unlikely(!path)) { 1025 1034 ret = -ENOMEM; 1026 1035 btrfs_abort_transaction(trans, ret); 1027 - goto out; 1036 + return ret; 1028 1037 } 1029 1038 1030 1039 block_group = btrfs_lookup_block_group(trans->fs_info, start); ··· 1032 1041 DEBUG_WARN("no block group found for start=%llu", start); 1033 1042 ret = -ENOENT; 1034 1043 btrfs_abort_transaction(trans, ret); 1035 - goto out; 1044 + return ret; 1036 1045 } 1037 1046 1038 1047 mutex_lock(&block_group->free_space_lock); ··· 1042 1051 btrfs_abort_transaction(trans, ret); 1043 1052 1044 1053 btrfs_put_block_group(block_group); 1045 - out: 1046 - btrfs_free_path(path); 1054 + 1047 1055 return ret; 1048 1056 } 1049 1057 ··· 1456 1466 struct btrfs_block_group *block_group) 1457 1467 { 1458 1468 struct btrfs_root *root = btrfs_free_space_root(block_group); 1459 - struct btrfs_path *path; 1469 + BTRFS_PATH_AUTO_FREE(path); 1460 1470 struct btrfs_key key, found_key; 1461 1471 struct extent_buffer *leaf; 1462 1472 u64 start, end; ··· 1475 1485 if (unlikely(!path)) { 1476 1486 ret = -ENOMEM; 1477 1487 btrfs_abort_transaction(trans, ret); 1478 - goto out; 1488 + return ret; 1479 1489 } 1480 1490 1481 1491 start = block_group->start; ··· 1489 1499 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 1490 1500 if (unlikely(ret)) { 1491 1501 btrfs_abort_transaction(trans, ret); 1492 - goto out; 1502 + return ret; 1493 1503 } 1494 1504 1495 1505 leaf = path->nodes[0]; ··· 1520 1530 ret = btrfs_del_items(trans, root, path, path->slots[0], nr); 1521 1531 if (unlikely(ret)) { 1522 1532 btrfs_abort_transaction(trans, ret); 1523 - goto out; 1533 + return ret; 1524 1534 } 1525 1535 btrfs_release_path(path); 1526 1536 } 1527 1537 1528 1538 ret = 0; 1529 - out: 1530 - btrfs_free_path(path); 1539 + 1531 1540 return ret; 1532 1541 } 1533 1542 ··· 1691 1702 * Just like caching_thread() doesn't want to deadlock on the extent 1692 1703 * tree, we don't want to deadlock on the free space tree. 1693 1704 */ 1694 - path->skip_locking = 1; 1695 - path->search_commit_root = 1; 1705 + path->skip_locking = true; 1706 + path->search_commit_root = true; 1696 1707 path->reada = READA_FORWARD; 1697 1708 1698 1709 info = btrfs_search_free_space_info(NULL, block_group, path, 0);

+35 -1

fs/btrfs/fs.h

··· 29 29 #include "extent-io-tree.h" 30 30 #include "async-thread.h" 31 31 #include "block-rsv.h" 32 + #include "messages.h" 32 33 33 34 struct inode; 34 35 struct super_block; ··· 73 72 #define BTRFS_SUPER_INFO_OFFSET SZ_64K 74 73 #define BTRFS_SUPER_INFO_SIZE 4096 75 74 static_assert(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE); 75 + 76 + /* Array of bytes with variable length, hexadecimal format 0x1234 */ 77 + #define BTRFS_CSUM_FMT "0x%*phN" 78 + #define BTRFS_CSUM_FMT_VALUE(size, bytes) size, bytes 79 + 80 + #define BTRFS_KEY_FMT "(%llu %u %llu)" 81 + #define BTRFS_KEY_FMT_VALUE(key) (key)->objectid, (key)->type, (key)->offset 76 82 77 83 /* 78 84 * Number of metadata items necessary for an unlink operation: ··· 131 123 132 124 /* No more delayed iput can be queued. */ 133 125 BTRFS_FS_STATE_NO_DELAYED_IPUT, 126 + 127 + /* 128 + * Emergency shutdown, a step further than transaction aborted by 129 + * rejecting all operations. 130 + */ 131 + BTRFS_FS_STATE_EMERGENCY_SHUTDOWN, 134 132 135 133 BTRFS_FS_STATE_COUNT 136 134 }; ··· 658 644 struct workqueue_struct *endio_workers; 659 645 struct workqueue_struct *endio_meta_workers; 660 646 struct workqueue_struct *rmw_workers; 661 - struct workqueue_struct *compressed_write_workers; 662 647 struct btrfs_workqueue *endio_write_workers; 663 648 struct btrfs_workqueue *endio_freespace_worker; 664 649 struct btrfs_workqueue *caching_workers; ··· 1132 1119 #define BTRFS_FS_LOG_CLEANUP_ERROR(fs_info) \ 1133 1120 (unlikely(test_bit(BTRFS_FS_STATE_LOG_CLEANUP_ERROR, \ 1134 1121 &(fs_info)->fs_state))) 1122 + 1123 + static inline bool btrfs_is_shutdown(struct btrfs_fs_info *fs_info) 1124 + { 1125 + return test_bit(BTRFS_FS_STATE_EMERGENCY_SHUTDOWN, &fs_info->fs_state); 1126 + } 1127 + 1128 + static inline void btrfs_force_shutdown(struct btrfs_fs_info *fs_info) 1129 + { 1130 + /* 1131 + * Here we do not want to use handle_fs_error(), which will mark the fs 1132 + * read-only. 1133 + * Some call sites like shutdown ioctl will mark the fs shutdown when 1134 + * the fs is frozen. But thaw path will handle RO and RW fs 1135 + * differently. 1136 + * 1137 + * So here we only mark the fs error without flipping it RO. 1138 + */ 1139 + WRITE_ONCE(fs_info->fs_error, -EIO); 1140 + if (!test_and_set_bit(BTRFS_FS_STATE_EMERGENCY_SHUTDOWN, &fs_info->fs_state)) 1141 + btrfs_crit(fs_info, "emergency shutdown"); 1142 + } 1135 1143 1136 1144 /* 1137 1145 * We use folio flag owner_2 to indicate there is an ordered extent with

+2 -3

fs/btrfs/inode-item.c

··· 312 312 if (!path) 313 313 return -ENOMEM; 314 314 315 - path->skip_release_on_error = 1; 315 + path->skip_release_on_error = true; 316 316 ret = btrfs_insert_empty_item(trans, root, path, &key, 317 317 ins_len); 318 318 if (ret == -EEXIST) { ··· 444 444 struct btrfs_truncate_control *control) 445 445 { 446 446 struct btrfs_fs_info *fs_info = root->fs_info; 447 - struct btrfs_path *path; 447 + BTRFS_PATH_AUTO_FREE(path); 448 448 struct extent_buffer *leaf; 449 449 struct btrfs_file_extent_item *fi; 450 450 struct btrfs_key key; ··· 730 730 if (!ret && control->last_size > new_size) 731 731 control->last_size = new_size; 732 732 733 - btrfs_free_path(path); 734 733 return ret; 735 734 }

+117 -77

fs/btrfs/inode.c

··· 72 72 #include "backref.h" 73 73 #include "raid-stripe-tree.h" 74 74 #include "fiemap.h" 75 + #include "delayed-inode.h" 75 76 76 77 #define COW_FILE_RANGE_KEEP_LOCKED (1UL << 0) 77 78 #define COW_FILE_RANGE_NO_INLINE (1UL << 1) ··· 132 131 struct btrfs_fs_info *fs_info = warn->fs_info; 133 132 struct extent_buffer *eb; 134 133 struct btrfs_inode_item *inode_item; 135 - struct inode_fs_paths *ipath = NULL; 134 + struct inode_fs_paths *ipath __free(inode_fs_paths) = NULL; 136 135 struct btrfs_root *local_root; 137 136 struct btrfs_key key; 138 137 unsigned int nofs_flag; ··· 197 196 } 198 197 199 198 btrfs_put_root(local_root); 200 - free_ipath(ipath); 201 199 return 0; 202 200 203 201 err: ··· 204 204 "checksum error at logical %llu mirror %u root %llu inode %llu offset %llu, path resolving failed with ret=%d", 205 205 warn->logical, warn->mirror_num, root, inum, offset, ret); 206 206 207 - free_ipath(ipath); 208 207 return ret; 209 208 } 210 209 ··· 235 236 if (logical == U64_MAX) { 236 237 btrfs_warn_rl(fs_info, "has data reloc tree but no running relocation"); 237 238 btrfs_warn_rl(fs_info, 238 - "csum failed root %lld ino %llu off %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d", 239 + "csum failed root %lld ino %llu off %llu csum " BTRFS_CSUM_FMT " expected csum " BTRFS_CSUM_FMT " mirror %d", 239 240 btrfs_root_id(inode->root), btrfs_ino(inode), file_off, 240 - CSUM_FMT_VALUE(csum_size, csum), 241 - CSUM_FMT_VALUE(csum_size, csum_expected), 241 + BTRFS_CSUM_FMT_VALUE(csum_size, csum), 242 + BTRFS_CSUM_FMT_VALUE(csum_size, csum_expected), 242 243 mirror_num); 243 244 return; 244 245 } 245 246 246 247 logical += file_off; 247 248 btrfs_warn_rl(fs_info, 248 - "csum failed root %lld ino %llu off %llu logical %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d", 249 + "csum failed root %lld ino %llu off %llu logical %llu csum " BTRFS_CSUM_FMT " expected csum " BTRFS_CSUM_FMT " mirror %d", 249 250 btrfs_root_id(inode->root), 250 251 btrfs_ino(inode), file_off, logical, 251 - CSUM_FMT_VALUE(csum_size, csum), 252 - CSUM_FMT_VALUE(csum_size, csum_expected), 252 + BTRFS_CSUM_FMT_VALUE(csum_size, csum), 253 + BTRFS_CSUM_FMT_VALUE(csum_size, csum_expected), 253 254 mirror_num); 254 255 255 256 ret = extent_from_logical(fs_info, logical, &path, &found_key, &flags); ··· 320 321 /* Output without objectid, which is more meaningful */ 321 322 if (btrfs_root_id(root) >= BTRFS_LAST_FREE_OBJECTID) { 322 323 btrfs_warn_rl(root->fs_info, 323 - "csum failed root %lld ino %lld off %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d", 324 + "csum failed root %lld ino %lld off %llu csum " BTRFS_CSUM_FMT " expected csum " BTRFS_CSUM_FMT " mirror %d", 324 325 btrfs_root_id(root), btrfs_ino(inode), 325 326 logical_start, 326 - CSUM_FMT_VALUE(csum_size, csum), 327 - CSUM_FMT_VALUE(csum_size, csum_expected), 327 + BTRFS_CSUM_FMT_VALUE(csum_size, csum), 328 + BTRFS_CSUM_FMT_VALUE(csum_size, csum_expected), 328 329 mirror_num); 329 330 } else { 330 331 btrfs_warn_rl(root->fs_info, 331 - "csum failed root %llu ino %llu off %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d", 332 + "csum failed root %llu ino %llu off %llu csum " BTRFS_CSUM_FMT " expected csum " BTRFS_CSUM_FMT " mirror %d", 332 333 btrfs_root_id(root), btrfs_ino(inode), 333 334 logical_start, 334 - CSUM_FMT_VALUE(csum_size, csum), 335 - CSUM_FMT_VALUE(csum_size, csum_expected), 335 + BTRFS_CSUM_FMT_VALUE(csum_size, csum), 336 + BTRFS_CSUM_FMT_VALUE(csum_size, csum_expected), 336 337 mirror_num); 337 338 } 338 339 } ··· 591 592 592 593 /* Inline extents must be the entirety of the file. */ 593 594 if (size < i_size_read(&inode->vfs_inode)) 595 + return false; 596 + 597 + /* Encrypted file cannot be inlined. */ 598 + if (IS_ENCRYPTED(&inode->vfs_inode)) 594 599 return false; 595 600 596 601 return true; ··· 868 865 u64 actual_end; 869 866 u64 i_size; 870 867 int ret = 0; 871 - struct folio **folios; 868 + struct folio **folios = NULL; 872 869 unsigned long nr_folios; 873 870 unsigned long total_compressed = 0; 874 871 unsigned long total_in = 0; ··· 876 873 int i; 877 874 int compress_type = fs_info->compress_type; 878 875 int compress_level = fs_info->compress_level; 876 + 877 + if (unlikely(btrfs_is_shutdown(fs_info))) 878 + goto cleanup_and_bail_uncompressed; 879 879 880 880 inode_should_defrag(inode, start, end, end - start + 1, SZ_16K); 881 881 ··· 1141 1135 ret = btrfs_reserve_extent(root, async_extent->ram_size, 1142 1136 async_extent->compressed_size, 1143 1137 async_extent->compressed_size, 1144 - 0, *alloc_hint, &ins, 1, 1); 1138 + 0, *alloc_hint, &ins, true, true); 1145 1139 if (ret) { 1146 1140 /* 1147 1141 * We can't reserve contiguous space for the compressed size. ··· 1295 1289 unsigned long page_ops; 1296 1290 int ret = 0; 1297 1291 1292 + if (unlikely(btrfs_is_shutdown(fs_info))) { 1293 + ret = -EIO; 1294 + goto out_unlock; 1295 + } 1296 + 1298 1297 if (btrfs_is_free_space_inode(inode)) { 1299 1298 ret = -EINVAL; 1300 1299 goto out_unlock; ··· 1364 1353 1365 1354 ret = btrfs_reserve_extent(root, num_bytes, num_bytes, 1366 1355 min_alloc_size, 0, alloc_hint, 1367 - &ins, 1, 1); 1356 + &ins, true, true); 1368 1357 if (ret == -EAGAIN) { 1369 1358 /* 1370 1359 * btrfs_reserve_extent only returns -EAGAIN for zoned ··· 2018 2007 { 2019 2008 struct btrfs_fs_info *fs_info = inode->root->fs_info; 2020 2009 struct btrfs_root *root = inode->root; 2021 - struct btrfs_path *path; 2010 + struct btrfs_path *path = NULL; 2022 2011 u64 cow_start = (u64)-1; 2023 2012 /* 2024 2013 * If not 0, represents the inclusive end of the last fallback_to_cow() ··· 2048 2037 */ 2049 2038 ASSERT(!btrfs_is_zoned(fs_info) || btrfs_is_data_reloc_root(root)); 2050 2039 2040 + if (unlikely(btrfs_is_shutdown(fs_info))) { 2041 + ret = -EIO; 2042 + goto error; 2043 + } 2051 2044 path = btrfs_alloc_path(); 2052 2045 if (!path) { 2053 2046 ret = -ENOMEM; ··· 3349 3334 return btrfs_finish_one_ordered(ordered); 3350 3335 } 3351 3336 3352 - void btrfs_calculate_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr, 3353 - u8 *dest) 3337 + /* 3338 + * Calculate the checksum of an fs block at physical memory address @paddr, 3339 + * and save the result to @dest. 3340 + * 3341 + * The folio containing @paddr must be large enough to contain a full fs block. 3342 + */ 3343 + void btrfs_calculate_block_csum_folio(struct btrfs_fs_info *fs_info, 3344 + const phys_addr_t paddr, u8 *dest) 3354 3345 { 3355 3346 struct folio *folio = page_folio(phys_to_page(paddr)); 3356 3347 const u32 blocksize = fs_info->sectorsize; 3357 - SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); 3348 + const u32 step = min(blocksize, PAGE_SIZE); 3349 + const u32 nr_steps = blocksize / step; 3350 + phys_addr_t paddrs[BTRFS_MAX_BLOCKSIZE / PAGE_SIZE]; 3358 3351 3359 - shash->tfm = fs_info->csum_shash; 3360 3352 /* The full block must be inside the folio. */ 3361 3353 ASSERT(offset_in_folio(folio, paddr) + blocksize <= folio_size(folio)); 3362 3354 3363 - if (folio_test_partial_kmap(folio)) { 3364 - size_t cur = paddr; 3355 + for (int i = 0; i < nr_steps; i++) { 3356 + u32 pindex = offset_in_folio(folio, paddr + i * step) >> PAGE_SHIFT; 3365 3357 3366 - crypto_shash_init(shash); 3367 - while (cur < paddr + blocksize) { 3368 - void *kaddr; 3369 - size_t len = min(paddr + blocksize - cur, 3370 - PAGE_SIZE - offset_in_page(cur)); 3371 - 3372 - kaddr = kmap_local_folio(folio, offset_in_folio(folio, cur)); 3373 - crypto_shash_update(shash, kaddr, len); 3374 - kunmap_local(kaddr); 3375 - cur += len; 3376 - } 3377 - crypto_shash_final(shash, dest); 3378 - } else { 3379 - crypto_shash_digest(shash, phys_to_virt(paddr), blocksize, dest); 3358 + /* 3359 + * For bs <= ps cases, we will only run the loop once, so the offset 3360 + * inside the page will only added to paddrs[0]. 3361 + * 3362 + * For bs > ps cases, the block must be page aligned, thus offset 3363 + * inside the page will always be 0. 3364 + */ 3365 + paddrs[i] = page_to_phys(folio_page(folio, pindex)) + offset_in_page(paddr); 3380 3366 } 3367 + return btrfs_calculate_block_csum_pages(fs_info, paddrs, dest); 3381 3368 } 3369 + 3370 + /* 3371 + * Calculate the checksum of a fs block backed by multiple noncontiguous pages 3372 + * at @paddrs[] and save the result to @dest. 3373 + * 3374 + * The folio containing @paddr must be large enough to contain a full fs block. 3375 + */ 3376 + void btrfs_calculate_block_csum_pages(struct btrfs_fs_info *fs_info, 3377 + const phys_addr_t paddrs[], u8 *dest) 3378 + { 3379 + const u32 blocksize = fs_info->sectorsize; 3380 + const u32 step = min(blocksize, PAGE_SIZE); 3381 + const u32 nr_steps = blocksize / step; 3382 + SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); 3383 + 3384 + shash->tfm = fs_info->csum_shash; 3385 + crypto_shash_init(shash); 3386 + for (int i = 0; i < nr_steps; i++) { 3387 + const phys_addr_t paddr = paddrs[i]; 3388 + void *kaddr; 3389 + 3390 + ASSERT(offset_in_page(paddr) + step <= PAGE_SIZE); 3391 + kaddr = kmap_local_page(phys_to_page(paddr)) + offset_in_page(paddr); 3392 + crypto_shash_update(shash, kaddr, step); 3393 + kunmap_local(kaddr); 3394 + } 3395 + crypto_shash_final(shash, dest); 3396 + } 3397 + 3382 3398 /* 3383 3399 * Verify the checksum for a single sector without any extra action that depend 3384 3400 * on the type of I/O. ··· 3419 3373 int btrfs_check_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr, u8 *csum, 3420 3374 const u8 * const csum_expected) 3421 3375 { 3422 - btrfs_calculate_block_csum(fs_info, paddr, csum); 3376 + btrfs_calculate_block_csum_folio(fs_info, paddr, csum); 3423 3377 if (unlikely(memcmp(csum, csum_expected, fs_info->csum_size) != 0)) 3424 3378 return -EIO; 3425 3379 return 0; 3426 3380 } 3427 3381 3428 3382 /* 3429 - * Verify the checksum of a single data sector. 3383 + * Verify the checksum of a single data sector, which can be scattered at 3384 + * different noncontiguous pages. 3430 3385 * 3431 3386 * @bbio: btrfs_io_bio which contains the csum 3432 3387 * @dev: device the sector is on 3433 3388 * @bio_offset: offset to the beginning of the bio (in bytes) 3434 - * @bv: bio_vec to check 3389 + * @paddrs: physical addresses which back the fs block 3435 3390 * 3436 3391 * Check if the checksum on a data block is valid. When a checksum mismatch is 3437 3392 * detected, report the error and fill the corrupted range with zero. ··· 3440 3393 * Return %true if the sector is ok or had no checksum to start with, else %false. 3441 3394 */ 3442 3395 bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev, 3443 - u32 bio_offset, phys_addr_t paddr) 3396 + u32 bio_offset, const phys_addr_t paddrs[]) 3444 3397 { 3445 3398 struct btrfs_inode *inode = bbio->inode; 3446 3399 struct btrfs_fs_info *fs_info = inode->root->fs_info; 3447 3400 const u32 blocksize = fs_info->sectorsize; 3448 - struct folio *folio; 3401 + const u32 step = min(blocksize, PAGE_SIZE); 3402 + const u32 nr_steps = blocksize / step; 3449 3403 u64 file_offset = bbio->file_offset + bio_offset; 3450 3404 u64 end = file_offset + blocksize - 1; 3451 3405 u8 *csum_expected; ··· 3466 3418 3467 3419 csum_expected = bbio->csum + (bio_offset >> fs_info->sectorsize_bits) * 3468 3420 fs_info->csum_size; 3469 - if (btrfs_check_block_csum(fs_info, paddr, csum, csum_expected)) 3421 + btrfs_calculate_block_csum_pages(fs_info, paddrs, csum); 3422 + if (unlikely(memcmp(csum, csum_expected, fs_info->csum_size) != 0)) 3470 3423 goto zeroit; 3471 3424 return true; 3472 3425 ··· 3476 3427 bbio->mirror_num); 3477 3428 if (dev) 3478 3429 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS); 3479 - folio = page_folio(phys_to_page(paddr)); 3480 - ASSERT(offset_in_folio(folio, paddr) + blocksize <= folio_size(folio)); 3481 - folio_zero_range(folio, offset_in_folio(folio, paddr), blocksize); 3430 + for (int i = 0; i < nr_steps; i++) 3431 + memzero_page(phys_to_page(paddrs[i]), offset_in_page(paddrs[i]), step); 3482 3432 return false; 3483 3433 } 3484 3434 ··· 4364 4316 * operations on the log tree, increasing latency for applications. 4365 4317 */ 4366 4318 if (!rename_ctx) { 4367 - btrfs_del_inode_ref_in_log(trans, root, name, inode, dir_ino); 4368 - btrfs_del_dir_entries_in_log(trans, root, name, dir, index); 4319 + btrfs_del_inode_ref_in_log(trans, name, inode, dir); 4320 + btrfs_del_dir_entries_in_log(trans, name, dir, index); 4369 4321 } 4370 4322 4371 4323 /* ··· 4464 4416 { 4465 4417 struct btrfs_root *root = dir->root; 4466 4418 struct btrfs_inode *inode = BTRFS_I(d_inode(dentry)); 4467 - struct btrfs_path *path; 4419 + BTRFS_PATH_AUTO_FREE(path); 4468 4420 struct extent_buffer *leaf; 4469 4421 struct btrfs_dir_item *di; 4470 4422 struct btrfs_key key; ··· 4557 4509 if (ret) 4558 4510 btrfs_abort_transaction(trans, ret); 4559 4511 out: 4560 - btrfs_free_path(path); 4561 4512 fscrypt_free_filename(&fname); 4562 4513 return ret; 4563 4514 } ··· 5681 5634 location->type != BTRFS_ROOT_ITEM_KEY)) { 5682 5635 ret = -EUCLEAN; 5683 5636 btrfs_warn(root->fs_info, 5684 - "%s gets something invalid in DIR_ITEM (name %s, directory ino %llu, location(%llu %u %llu))", 5637 + "%s gets something invalid in DIR_ITEM (name %s, directory ino %llu, location " BTRFS_KEY_FMT ")", 5685 5638 __func__, fname.disk_name.name, btrfs_ino(dir), 5686 - location->objectid, location->type, location->offset); 5639 + BTRFS_KEY_FMT_VALUE(location)); 5687 5640 } 5688 5641 if (!ret) 5689 5642 *type = btrfs_dir_ftype(path->nodes[0], di); ··· 7121 7074 * point the commit_root has everything we need. 7122 7075 */ 7123 7076 if (btrfs_is_free_space_inode(inode)) { 7124 - path->search_commit_root = 1; 7125 - path->skip_locking = 1; 7077 + path->search_commit_root = true; 7078 + path->skip_locking = true; 7126 7079 } 7127 7080 7128 7081 ret = btrfs_lookup_file_extent(NULL, root, path, objectid, start, 0); ··· 7632 7585 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING | 7633 7586 EXTENT_DEFRAG, &cached_state); 7634 7587 7635 - spin_lock_irq(&inode->ordered_tree_lock); 7588 + spin_lock(&inode->ordered_tree_lock); 7636 7589 set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags); 7637 7590 ordered->truncated_len = min(ordered->truncated_len, 7638 7591 cur - ordered->file_offset); 7639 - spin_unlock_irq(&inode->ordered_tree_lock); 7592 + spin_unlock(&inode->ordered_tree_lock); 7640 7593 7641 7594 /* 7642 7595 * If the ordered extent has finished, we're safe to delete all ··· 7698 7651 .ino = btrfs_ino(inode), 7699 7652 .min_type = BTRFS_EXTENT_DATA_KEY, 7700 7653 .clear_extent_range = true, 7654 + .new_size = inode->vfs_inode.i_size, 7701 7655 }; 7702 7656 struct btrfs_root *root = inode->root; 7703 7657 struct btrfs_fs_info *fs_info = root->fs_info; 7704 7658 struct btrfs_block_rsv rsv; 7705 7659 int ret; 7706 7660 struct btrfs_trans_handle *trans; 7707 - u64 mask = fs_info->sectorsize - 1; 7708 7661 const u64 min_size = btrfs_calc_metadata_size(fs_info, 1); 7662 + const u64 lock_start = round_down(inode->vfs_inode.i_size, fs_info->sectorsize); 7663 + const u64 i_size_up = round_up(inode->vfs_inode.i_size, fs_info->sectorsize); 7664 + 7665 + /* Our inode is locked and the i_size can't be changed concurrently. */ 7666 + btrfs_assert_inode_locked(inode); 7709 7667 7710 7668 if (!skip_writeback) { 7711 - ret = btrfs_wait_ordered_range(inode, 7712 - inode->vfs_inode.i_size & (~mask), 7713 - (u64)-1); 7669 + ret = btrfs_wait_ordered_range(inode, lock_start, (u64)-1); 7714 7670 if (ret) 7715 7671 return ret; 7716 7672 } ··· 7777 7727 7778 7728 while (1) { 7779 7729 struct extent_state *cached_state = NULL; 7780 - const u64 new_size = inode->vfs_inode.i_size; 7781 - const u64 lock_start = ALIGN_DOWN(new_size, fs_info->sectorsize); 7782 7730 7783 - control.new_size = new_size; 7784 7731 btrfs_lock_extent(&inode->io_tree, lock_start, (u64)-1, &cached_state); 7785 7732 /* 7786 7733 * We want to drop from the next block forward in case this new 7787 7734 * size is not block aligned since we will be keeping the last 7788 7735 * block of the extent just the way it is. 7789 7736 */ 7790 - btrfs_drop_extent_map_range(inode, 7791 - ALIGN(new_size, fs_info->sectorsize), 7792 - (u64)-1, false); 7737 + btrfs_drop_extent_map_range(inode, i_size_up, (u64)-1, false); 7793 7738 7794 7739 ret = btrfs_truncate_inode_items(trans, root, &control); 7795 7740 ··· 9098 9053 */ 9099 9054 cur_bytes = min(cur_bytes, last_alloc); 9100 9055 ret = btrfs_reserve_extent(root, cur_bytes, cur_bytes, 9101 - min_size, 0, *alloc_hint, &ins, 1, 0); 9056 + min_size, 0, *alloc_hint, &ins, true, false); 9102 9057 if (ret) 9103 9058 break; 9104 9059 ··· 9434 9389 u64 disk_bytenr, u64 disk_io_size, 9435 9390 struct page **pages, void *uring_ctx) 9436 9391 { 9437 - struct btrfs_fs_info *fs_info = inode->root->fs_info; 9438 9392 struct btrfs_encoded_read_private *priv, sync_priv; 9439 9393 struct completion sync_reads; 9440 9394 unsigned long i = 0; ··· 9458 9414 priv->status = 0; 9459 9415 priv->uring_ctx = uring_ctx; 9460 9416 9461 - bbio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, fs_info, 9417 + bbio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, inode, 0, 9462 9418 btrfs_encoded_read_endio, priv); 9463 9419 bbio->bio.bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT; 9464 - bbio->inode = inode; 9465 9420 9466 9421 do { 9467 9422 size_t bytes = min_t(u64, disk_io_size, PAGE_SIZE); ··· 9469 9426 refcount_inc(&priv->pending_refs); 9470 9427 btrfs_submit_bbio(bbio, 0); 9471 9428 9472 - bbio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, fs_info, 9429 + bbio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, inode, 0, 9473 9430 btrfs_encoded_read_endio, priv); 9474 9431 bbio->bio.bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT; 9475 - bbio->inode = inode; 9476 9432 continue; 9477 9433 } 9478 9434 ··· 9862 9820 } 9863 9821 9864 9822 for (;;) { 9865 - struct btrfs_ordered_extent *ordered; 9866 - 9867 9823 ret = btrfs_wait_ordered_range(inode, start, num_bytes); 9868 9824 if (ret) 9869 9825 goto out_folios; ··· 9911 9871 } 9912 9872 9913 9873 ret = btrfs_reserve_extent(root, disk_num_bytes, disk_num_bytes, 9914 - disk_num_bytes, 0, 0, &ins, 1, 1); 9874 + disk_num_bytes, 0, 0, &ins, true, true); 9915 9875 if (ret) 9916 9876 goto out_delalloc_release; 9917 9877 extent_reserved = true;

+78 -95

fs/btrfs/ioctl.c

··· 503 503 struct btrfs_fs_info *fs_info = inode_to_fs_info(dir); 504 504 struct btrfs_trans_handle *trans; 505 505 struct btrfs_key key; 506 - struct btrfs_root_item *root_item; 506 + struct btrfs_root_item AUTO_KFREE(root_item); 507 507 struct btrfs_inode_item *inode_item; 508 508 struct extent_buffer *leaf; 509 509 struct btrfs_root *root = BTRFS_I(dir)->root; ··· 527 527 528 528 ret = btrfs_get_free_objectid(fs_info->tree_root, &objectid); 529 529 if (ret) 530 - goto out_root_item; 530 + return ret; 531 531 532 532 /* 533 533 * Don't create subvolume whose level is not zero. Or qgroup will be 534 534 * screwed up since it assumes subvolume qgroup's level to be 0. 535 535 */ 536 - if (btrfs_qgroup_level(objectid)) { 537 - ret = -ENOSPC; 538 - goto out_root_item; 539 - } 536 + if (btrfs_qgroup_level(objectid)) 537 + return -ENOSPC; 540 538 541 539 ret = get_anon_bdev(&anon_dev); 542 540 if (ret < 0) 543 - goto out_root_item; 541 + return ret; 544 542 545 543 new_inode_args.inode = btrfs_new_subvol_inode(idmap, dir); 546 544 if (!new_inode_args.inode) { ··· 690 692 out_anon_dev: 691 693 if (anon_dev) 692 694 free_anon_bdev(anon_dev); 693 - out_root_item: 694 - kfree(root_item); 695 + 695 696 return ret; 696 697 } 697 698 ··· 1596 1599 { 1597 1600 struct btrfs_fs_info *info = root->fs_info; 1598 1601 struct btrfs_key key; 1599 - struct btrfs_path *path; 1602 + BTRFS_PATH_AUTO_FREE(path); 1600 1603 int ret; 1601 1604 int num_found = 0; 1602 1605 unsigned long sk_offset = 0; ··· 1616 1619 } else { 1617 1620 /* Look up the root from the arguments. */ 1618 1621 root = btrfs_get_fs_root(info, sk->tree_id, true); 1619 - if (IS_ERR(root)) { 1620 - btrfs_free_path(path); 1622 + if (IS_ERR(root)) 1621 1623 return PTR_ERR(root); 1622 - } 1623 1624 } 1624 1625 1625 1626 key.objectid = sk->min_objectid; ··· 1651 1656 1652 1657 sk->nr_items = num_found; 1653 1658 btrfs_put_root(root); 1654 - btrfs_free_path(path); 1655 1659 return ret; 1656 1660 } 1657 1661 ··· 1733 1739 int total_len = 0; 1734 1740 struct btrfs_inode_ref *iref; 1735 1741 struct extent_buffer *l; 1736 - struct btrfs_path *path; 1742 + BTRFS_PATH_AUTO_FREE(path); 1737 1743 1738 1744 if (dirid == BTRFS_FIRST_FREE_OBJECTID) { 1739 1745 name[0]='\0'; ··· 1794 1800 ret = 0; 1795 1801 out: 1796 1802 btrfs_put_root(root); 1797 - btrfs_free_path(path); 1798 1803 return ret; 1799 1804 } 1800 1805 ··· 1810 1817 struct btrfs_inode_ref *iref; 1811 1818 struct btrfs_root_ref *rref; 1812 1819 struct btrfs_root *root = NULL; 1813 - struct btrfs_path *path; 1814 - struct btrfs_key key, key2; 1820 + BTRFS_PATH_AUTO_FREE(path); 1821 + struct btrfs_key key; 1815 1822 struct extent_buffer *leaf; 1816 1823 char *ptr; 1817 1824 int slot; ··· 1831 1838 ptr = &args->path[BTRFS_INO_LOOKUP_USER_PATH_MAX - 1]; 1832 1839 1833 1840 root = btrfs_get_fs_root(fs_info, treeid, true); 1834 - if (IS_ERR(root)) { 1835 - ret = PTR_ERR(root); 1836 - goto out; 1837 - } 1841 + if (IS_ERR(root)) 1842 + return PTR_ERR(root); 1838 1843 1839 1844 key.objectid = dirid; 1840 1845 key.type = BTRFS_INODE_REF_KEY; ··· 1864 1873 read_extent_buffer(leaf, ptr, 1865 1874 (unsigned long)(iref + 1), len); 1866 1875 1867 - /* Check the read+exec permission of this directory */ 1868 - ret = btrfs_previous_item(root, path, dirid, 1869 - BTRFS_INODE_ITEM_KEY); 1870 - if (ret < 0) { 1871 - goto out_put; 1872 - } else if (ret > 0) { 1873 - ret = -ENOENT; 1874 - goto out_put; 1875 - } 1876 - 1877 - leaf = path->nodes[0]; 1878 - slot = path->slots[0]; 1879 - btrfs_item_key_to_cpu(leaf, &key2, slot); 1880 - if (key2.objectid != dirid) { 1881 - ret = -ENOENT; 1882 - goto out_put; 1883 - } 1884 - 1885 1876 /* 1886 1877 * We don't need the path anymore, so release it and 1887 1878 * avoid deadlocks and lockdep warnings in case ··· 1871 1898 * btree and lock the same leaf. 1872 1899 */ 1873 1900 btrfs_release_path(path); 1874 - temp_inode = btrfs_iget(key2.objectid, root); 1901 + temp_inode = btrfs_iget(key.offset, root); 1875 1902 if (IS_ERR(temp_inode)) { 1876 1903 ret = PTR_ERR(temp_inode); 1877 1904 goto out_put; 1878 1905 } 1906 + /* Check the read+exec permission of this directory. */ 1879 1907 ret = inode_permission(idmap, &temp_inode->vfs_inode, 1880 1908 MAY_READ | MAY_EXEC); 1881 1909 iput(&temp_inode->vfs_inode); 1882 - if (ret) { 1883 - ret = -EACCES; 1910 + if (ret) 1884 1911 goto out_put; 1885 - } 1886 1912 1887 1913 if (key.offset == upper_limit) 1888 1914 break; ··· 1907 1935 key.type = BTRFS_ROOT_REF_KEY; 1908 1936 key.offset = args->treeid; 1909 1937 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0); 1910 - if (ret < 0) { 1911 - goto out; 1912 - } else if (ret > 0) { 1913 - ret = -ENOENT; 1914 - goto out; 1915 - } 1938 + if (ret < 0) 1939 + return ret; 1940 + else if (ret > 0) 1941 + return -ENOENT; 1916 1942 1917 1943 leaf = path->nodes[0]; 1918 1944 slot = path->slots[0]; ··· 1920 1950 item_len = btrfs_item_size(leaf, slot); 1921 1951 /* Check if dirid in ROOT_REF corresponds to passed dirid */ 1922 1952 rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref); 1923 - if (args->dirid != btrfs_root_ref_dirid(leaf, rref)) { 1924 - ret = -EINVAL; 1925 - goto out; 1926 - } 1953 + if (args->dirid != btrfs_root_ref_dirid(leaf, rref)) 1954 + return -EINVAL; 1927 1955 1928 1956 /* Copy subvolume's name */ 1929 1957 item_off += sizeof(struct btrfs_root_ref); ··· 1931 1963 1932 1964 out_put: 1933 1965 btrfs_put_root(root); 1934 - out: 1935 - btrfs_free_path(path); 1966 + 1936 1967 return ret; 1937 1968 } 1938 1969 ··· 2906 2939 struct btrfs_ioctl_space_args space_args = { 0 }; 2907 2940 struct btrfs_ioctl_space_info space; 2908 2941 struct btrfs_ioctl_space_info *dest; 2909 - struct btrfs_ioctl_space_info *dest_orig; 2942 + struct btrfs_ioctl_space_info AUTO_KFREE(dest_orig); 2910 2943 struct btrfs_ioctl_space_info __user *user_dest; 2911 2944 struct btrfs_space_info *info; 2912 2945 static const u64 types[] = { ··· 3027 3060 (arg + sizeof(struct btrfs_ioctl_space_args)); 3028 3061 3029 3062 if (copy_to_user(user_dest, dest_orig, alloc_size)) 3030 - ret = -EFAULT; 3063 + return -EFAULT; 3031 3064 3032 - kfree(dest_orig); 3033 3065 out: 3034 3066 if (ret == 0 && copy_to_user(arg, &space_args, sizeof(space_args))) 3035 3067 ret = -EFAULT; ··· 3247 3281 u64 rel_ptr; 3248 3282 int size; 3249 3283 struct btrfs_ioctl_ino_path_args *ipa = NULL; 3250 - struct inode_fs_paths *ipath = NULL; 3284 + struct inode_fs_paths *ipath __free(inode_fs_paths) = NULL; 3251 3285 struct btrfs_path *path; 3252 3286 3253 3287 if (!capable(CAP_DAC_READ_SEARCH)) ··· 3295 3329 3296 3330 out: 3297 3331 btrfs_free_path(path); 3298 - free_ipath(ipath); 3299 3332 kfree(ipa); 3300 3333 3301 3334 return ret; ··· 3559 3594 static long btrfs_ioctl_balance_progress(struct btrfs_fs_info *fs_info, 3560 3595 void __user *arg) 3561 3596 { 3562 - struct btrfs_ioctl_balance_args *bargs; 3597 + struct btrfs_ioctl_balance_args AUTO_KFREE(bargs); 3563 3598 int ret = 0; 3564 3599 3565 3600 if (!capable(CAP_SYS_ADMIN)) ··· 3581 3616 3582 3617 if (copy_to_user(arg, bargs, sizeof(*bargs))) 3583 3618 ret = -EFAULT; 3584 - 3585 - kfree(bargs); 3586 3619 out: 3587 3620 mutex_unlock(&fs_info->balance_mutex); 3588 3621 return ret; ··· 4174 4211 u64 safe_set, u64 safe_clear) 4175 4212 { 4176 4213 const char *type = btrfs_feature_set_name(set); 4177 - char *names; 4214 + const char AUTO_KFREE(names); 4178 4215 u64 disallowed, unsupported; 4179 4216 u64 set_mask = flags & change_mask; 4180 4217 u64 clear_mask = ~flags & change_mask; ··· 4182 4219 unsupported = set_mask & ~supported_flags; 4183 4220 if (unsupported) { 4184 4221 names = btrfs_printable_features(set, unsupported); 4185 - if (names) { 4222 + if (names) 4186 4223 btrfs_warn(fs_info, 4187 4224 "this kernel does not support the %s feature bit%s", 4188 4225 names, strchr(names, ',') ? "s" : ""); 4189 - kfree(names); 4190 - } else 4226 + else 4191 4227 btrfs_warn(fs_info, 4192 4228 "this kernel does not support %s bits 0x%llx", 4193 4229 type, unsupported); ··· 4196 4234 disallowed = set_mask & ~safe_set; 4197 4235 if (disallowed) { 4198 4236 names = btrfs_printable_features(set, disallowed); 4199 - if (names) { 4237 + if (names) 4200 4238 btrfs_warn(fs_info, 4201 4239 "can't set the %s feature bit%s while mounted", 4202 4240 names, strchr(names, ',') ? "s" : ""); 4203 - kfree(names); 4204 - } else 4241 + else 4205 4242 btrfs_warn(fs_info, 4206 4243 "can't set %s bits 0x%llx while mounted", 4207 4244 type, disallowed); ··· 4210 4249 disallowed = clear_mask & ~safe_clear; 4211 4250 if (disallowed) { 4212 4251 names = btrfs_printable_features(set, disallowed); 4213 - if (names) { 4252 + if (names) 4214 4253 btrfs_warn(fs_info, 4215 4254 "can't clear the %s feature bit%s while mounted", 4216 4255 names, strchr(names, ',') ? "s" : ""); 4217 - kfree(names); 4218 - } else 4256 + else 4219 4257 btrfs_warn(fs_info, 4220 4258 "can't clear %s bits 0x%llx while mounted", 4221 4259 type, disallowed); ··· 4361 4401 goto out_acct; 4362 4402 } 4363 4403 4364 - if (fs_info->sectorsize > PAGE_SIZE) { 4365 - ret = -ENOTTY; 4366 - goto out_acct; 4367 - } 4368 4404 if (compat) { 4369 4405 #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) 4370 4406 struct btrfs_ioctl_encoded_io_args_32 args32; ··· 4452 4496 4453 4497 static int btrfs_ioctl_encoded_write(struct file *file, void __user *argp, bool compat) 4454 4498 { 4455 - struct btrfs_fs_info *fs_info = inode_to_fs_info(file->f_inode); 4456 4499 struct btrfs_ioctl_encoded_io_args args; 4457 4500 struct iovec iovstack[UIO_FASTIOV]; 4458 4501 struct iovec *iov = iovstack; ··· 4462 4507 4463 4508 if (!capable(CAP_SYS_ADMIN)) { 4464 4509 ret = -EPERM; 4465 - goto out_acct; 4466 - } 4467 - 4468 - if (fs_info->sectorsize > PAGE_SIZE) { 4469 - ret = -ENOTTY; 4470 4510 goto out_acct; 4471 4511 } 4472 4512 ··· 4747 4797 ret = -EPERM; 4748 4798 goto out_acct; 4749 4799 } 4750 - if (fs_info->sectorsize > PAGE_SIZE) { 4751 - ret = -ENOTTY; 4752 - goto out_acct; 4753 - } 4754 - 4755 4800 sqe_addr = u64_to_user_ptr(READ_ONCE(cmd->sqe->addr)); 4756 4801 4757 4802 if (issue_flags & IO_URING_F_COMPAT) { ··· 4874 4929 static int btrfs_uring_encoded_write(struct io_uring_cmd *cmd, unsigned int issue_flags) 4875 4930 { 4876 4931 struct file *file = cmd->file; 4877 - struct btrfs_fs_info *fs_info = inode_to_fs_info(file->f_inode); 4878 4932 loff_t pos; 4879 4933 struct kiocb kiocb; 4880 4934 ssize_t ret; ··· 4888 4944 ret = -EPERM; 4889 4945 goto out_acct; 4890 4946 } 4891 - if (fs_info->sectorsize > PAGE_SIZE) { 4892 - ret = -ENOTTY; 4893 - goto out_acct; 4894 - } 4895 - 4896 4947 sqe_addr = u64_to_user_ptr(READ_ONCE(cmd->sqe->addr)); 4897 4948 4898 4949 if (!(file->f_mode & FMODE_WRITE)) { ··· 5000 5061 5001 5062 int btrfs_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) 5002 5063 { 5064 + if (unlikely(btrfs_is_shutdown(inode_to_fs_info(file_inode(cmd->file))))) 5065 + return -EIO; 5066 + 5003 5067 switch (cmd->cmd_op) { 5004 5068 case BTRFS_IOC_ENCODED_READ: 5005 5069 #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) ··· 5145 5203 5146 5204 return 0; 5147 5205 } 5206 + 5207 + #ifdef CONFIG_BTRFS_EXPERIMENTAL 5208 + static int btrfs_ioctl_shutdown(struct btrfs_fs_info *fs_info, unsigned long arg) 5209 + { 5210 + int ret = 0; 5211 + u32 flags; 5212 + 5213 + if (!capable(CAP_SYS_ADMIN)) 5214 + return -EPERM; 5215 + 5216 + if (get_user(flags, (u32 __user *)arg)) 5217 + return -EFAULT; 5218 + 5219 + if (flags >= BTRFS_SHUTDOWN_FLAGS_LAST) 5220 + return -EINVAL; 5221 + 5222 + if (btrfs_is_shutdown(fs_info)) 5223 + return 0; 5224 + 5225 + switch (flags) { 5226 + case BTRFS_SHUTDOWN_FLAGS_LOGFLUSH: 5227 + case BTRFS_SHUTDOWN_FLAGS_DEFAULT: 5228 + ret = freeze_super(fs_info->sb, FREEZE_HOLDER_KERNEL, NULL); 5229 + if (ret) 5230 + return ret; 5231 + btrfs_force_shutdown(fs_info); 5232 + ret = thaw_super(fs_info->sb, FREEZE_HOLDER_KERNEL, NULL); 5233 + if (ret) 5234 + return ret; 5235 + break; 5236 + case BTRFS_SHUTDOWN_FLAGS_NOLOGFLUSH: 5237 + btrfs_force_shutdown(fs_info); 5238 + break; 5239 + } 5240 + return ret; 5241 + } 5242 + #endif 5148 5243 5149 5244 long btrfs_ioctl(struct file *file, unsigned int 5150 5245 cmd, unsigned long arg) ··· 5338 5359 #endif 5339 5360 case BTRFS_IOC_SUBVOL_SYNC_WAIT: 5340 5361 return btrfs_ioctl_subvol_sync(fs_info, argp); 5362 + #ifdef CONFIG_BTRFS_EXPERIMENTAL 5363 + case BTRFS_IOC_SHUTDOWN: 5364 + return btrfs_ioctl_shutdown(fs_info, arg); 5365 + #endif 5341 5366 } 5342 5367 5343 5368 return -ENOTTY;

+1

fs/btrfs/messages.c

··· 24 24 [BTRFS_FS_STATE_NO_DATA_CSUMS] = 'C', 25 25 [BTRFS_FS_STATE_SKIP_META_CSUMS] = 'S', 26 26 [BTRFS_FS_STATE_LOG_CLEANUP_ERROR] = 'L', 27 + [BTRFS_FS_STATE_EMERGENCY_SHUTDOWN] = 'E', 27 28 }; 28 29 29 30 static void btrfs_state_to_string(const struct btrfs_fs_info *info, char *buf)

+2 -1

fs/btrfs/messages.h

··· 168 168 #endif 169 169 170 170 #else 171 - #define ASSERT(cond, args...) (void)(cond) 171 + /* Compile check the @cond expression but don't generate any code. */ 172 + #define ASSERT(cond, args...) BUILD_BUG_ON_INVALID(cond) 172 173 #endif 173 174 174 175 #ifdef CONFIG_BTRFS_DEBUG

+7

fs/btrfs/misc.h

··· 14 14 #include <linux/bio.h> 15 15 16 16 /* 17 + * Convenience macros to define a pointer with the __free(kfree) and 18 + * __free(kvfree) cleanup attributes and initialized to NULL. 19 + */ 20 + #define AUTO_KFREE(name) *name __free(kfree) = NULL 21 + #define AUTO_KVFREE(name) *name __free(kvfree) = NULL 22 + 23 + /* 17 24 * Enumerate bits using enum autoincrement. Define the @name as the n-th bit. 18 25 */ 19 26 #define ENUM_BIT(name) \

+33 -41

fs/btrfs/ordered-data.c

··· 237 237 /* One ref for the tree. */ 238 238 refcount_inc(&entry->refs); 239 239 240 - spin_lock_irq(&inode->ordered_tree_lock); 240 + spin_lock(&inode->ordered_tree_lock); 241 241 node = tree_insert(&inode->ordered_tree, entry->file_offset, 242 242 &entry->rb_node); 243 243 if (unlikely(node)) 244 244 btrfs_panic(fs_info, -EEXIST, 245 245 "inconsistency in ordered tree at offset %llu", 246 246 entry->file_offset); 247 - spin_unlock_irq(&inode->ordered_tree_lock); 247 + spin_unlock(&inode->ordered_tree_lock); 248 248 249 249 spin_lock(&root->ordered_extent_lock); 250 250 list_add_tail(&entry->root_extent_list, ··· 328 328 { 329 329 struct btrfs_inode *inode = entry->inode; 330 330 331 - spin_lock_irq(&inode->ordered_tree_lock); 331 + spin_lock(&inode->ordered_tree_lock); 332 332 list_add_tail(&sum->list, &entry->list); 333 - spin_unlock_irq(&inode->ordered_tree_lock); 333 + spin_unlock(&inode->ordered_tree_lock); 334 334 } 335 335 336 336 void btrfs_mark_ordered_extent_error(struct btrfs_ordered_extent *ordered) ··· 417 417 bool uptodate) 418 418 { 419 419 struct btrfs_inode *inode = ordered->inode; 420 - unsigned long flags; 421 420 bool ret; 422 421 423 422 trace_btrfs_finish_ordered_extent(inode, file_offset, len, uptodate); 424 423 425 - spin_lock_irqsave(&inode->ordered_tree_lock, flags); 424 + spin_lock(&inode->ordered_tree_lock); 426 425 ret = can_finish_ordered_extent(ordered, folio, file_offset, len, 427 426 uptodate); 428 - spin_unlock_irqrestore(&inode->ordered_tree_lock, flags); 427 + spin_unlock(&inode->ordered_tree_lock); 429 428 430 429 /* 431 430 * If this is a COW write it means we created new extent maps for the ··· 480 481 { 481 482 struct rb_node *node; 482 483 struct btrfs_ordered_extent *entry = NULL; 483 - unsigned long flags; 484 484 u64 cur = file_offset; 485 + const u64 end = file_offset + num_bytes; 485 486 486 - trace_btrfs_writepage_end_io_hook(inode, file_offset, 487 - file_offset + num_bytes - 1, 488 - uptodate); 487 + trace_btrfs_writepage_end_io_hook(inode, file_offset, end - 1, uptodate); 489 488 490 - spin_lock_irqsave(&inode->ordered_tree_lock, flags); 491 - while (cur < file_offset + num_bytes) { 489 + spin_lock(&inode->ordered_tree_lock); 490 + while (cur < end) { 492 491 u64 entry_end; 493 - u64 end; 494 - u32 len; 492 + u64 this_end; 493 + u64 len; 495 494 496 495 node = ordered_tree_search(inode, cur); 497 496 /* No ordered extents at all */ ··· 532 535 * | 533 536 * cur 534 537 */ 535 - end = min(entry->file_offset + entry->num_bytes, 536 - file_offset + num_bytes) - 1; 537 - ASSERT(end + 1 - cur < U32_MAX); 538 - len = end + 1 - cur; 538 + this_end = min(entry_end, end); 539 + len = this_end - cur; 540 + ASSERT(len < U32_MAX); 539 541 540 542 if (can_finish_ordered_extent(entry, folio, cur, len, uptodate)) { 541 - spin_unlock_irqrestore(&inode->ordered_tree_lock, flags); 543 + spin_unlock(&inode->ordered_tree_lock); 542 544 btrfs_queue_ordered_fn(entry); 543 - spin_lock_irqsave(&inode->ordered_tree_lock, flags); 545 + spin_lock(&inode->ordered_tree_lock); 544 546 } 545 547 cur += len; 546 548 } 547 - spin_unlock_irqrestore(&inode->ordered_tree_lock, flags); 549 + spin_unlock(&inode->ordered_tree_lock); 548 550 } 549 551 550 552 /* ··· 569 573 { 570 574 struct rb_node *node; 571 575 struct btrfs_ordered_extent *entry = NULL; 572 - unsigned long flags; 573 576 bool finished = false; 574 577 575 - spin_lock_irqsave(&inode->ordered_tree_lock, flags); 578 + spin_lock(&inode->ordered_tree_lock); 576 579 if (cached && *cached) { 577 580 entry = *cached; 578 581 goto have_entry; ··· 608 613 refcount_inc(&entry->refs); 609 614 trace_btrfs_ordered_extent_dec_test_pending(inode, entry); 610 615 } 611 - spin_unlock_irqrestore(&inode->ordered_tree_lock, flags); 616 + spin_unlock(&inode->ordered_tree_lock); 612 617 return finished; 613 618 } 614 619 ··· 673 678 percpu_counter_add_batch(&fs_info->ordered_bytes, -entry->num_bytes, 674 679 fs_info->delalloc_batch); 675 680 676 - spin_lock_irq(&btrfs_inode->ordered_tree_lock); 681 + spin_lock(&btrfs_inode->ordered_tree_lock); 677 682 node = &entry->rb_node; 678 683 rb_erase(node, &btrfs_inode->ordered_tree); 679 684 RB_CLEAR_NODE(node); ··· 681 686 btrfs_inode->ordered_tree_last = NULL; 682 687 set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); 683 688 pending = test_and_clear_bit(BTRFS_ORDERED_PENDING, &entry->flags); 684 - spin_unlock_irq(&btrfs_inode->ordered_tree_lock); 689 + spin_unlock(&btrfs_inode->ordered_tree_lock); 685 690 686 691 /* 687 692 * The current running transaction is waiting on us, we need to let it ··· 966 971 { 967 972 struct rb_node *node; 968 973 struct btrfs_ordered_extent *entry = NULL; 969 - unsigned long flags; 970 974 971 - spin_lock_irqsave(&inode->ordered_tree_lock, flags); 975 + spin_lock(&inode->ordered_tree_lock); 972 976 node = ordered_tree_search(inode, file_offset); 973 977 if (!node) 974 978 goto out; ··· 980 986 trace_btrfs_ordered_extent_lookup(inode, entry); 981 987 } 982 988 out: 983 - spin_unlock_irqrestore(&inode->ordered_tree_lock, flags); 989 + spin_unlock(&inode->ordered_tree_lock); 984 990 return entry; 985 991 } 986 992 ··· 993 999 struct rb_node *node; 994 1000 struct btrfs_ordered_extent *entry = NULL; 995 1001 996 - spin_lock_irq(&inode->ordered_tree_lock); 1002 + spin_lock(&inode->ordered_tree_lock); 997 1003 node = ordered_tree_search(inode, file_offset); 998 1004 if (!node) { 999 1005 node = ordered_tree_search(inode, file_offset + len); ··· 1020 1026 refcount_inc(&entry->refs); 1021 1027 trace_btrfs_ordered_extent_lookup_range(inode, entry); 1022 1028 } 1023 - spin_unlock_irq(&inode->ordered_tree_lock); 1029 + spin_unlock(&inode->ordered_tree_lock); 1024 1030 return entry; 1025 1031 } 1026 1032 ··· 1035 1041 1036 1042 btrfs_assert_inode_locked(inode); 1037 1043 1038 - spin_lock_irq(&inode->ordered_tree_lock); 1044 + spin_lock(&inode->ordered_tree_lock); 1039 1045 for (n = rb_first(&inode->ordered_tree); n; n = rb_next(n)) { 1040 1046 struct btrfs_ordered_extent *ordered; 1041 1047 ··· 1049 1055 refcount_inc(&ordered->refs); 1050 1056 trace_btrfs_ordered_extent_lookup_for_logging(inode, ordered); 1051 1057 } 1052 - spin_unlock_irq(&inode->ordered_tree_lock); 1058 + spin_unlock(&inode->ordered_tree_lock); 1053 1059 } 1054 1060 1055 1061 /* ··· 1062 1068 struct rb_node *node; 1063 1069 struct btrfs_ordered_extent *entry = NULL; 1064 1070 1065 - spin_lock_irq(&inode->ordered_tree_lock); 1071 + spin_lock(&inode->ordered_tree_lock); 1066 1072 node = ordered_tree_search(inode, file_offset); 1067 1073 if (!node) 1068 1074 goto out; ··· 1071 1077 refcount_inc(&entry->refs); 1072 1078 trace_btrfs_ordered_extent_lookup_first(inode, entry); 1073 1079 out: 1074 - spin_unlock_irq(&inode->ordered_tree_lock); 1080 + spin_unlock(&inode->ordered_tree_lock); 1075 1081 return entry; 1076 1082 } 1077 1083 ··· 1093 1099 struct rb_node *next; 1094 1100 struct btrfs_ordered_extent *entry = NULL; 1095 1101 1096 - spin_lock_irq(&inode->ordered_tree_lock); 1102 + spin_lock(&inode->ordered_tree_lock); 1097 1103 node = inode->ordered_tree.rb_node; 1098 1104 /* 1099 1105 * Here we don't want to use tree_search() which will use tree->last ··· 1148 1154 trace_btrfs_ordered_extent_lookup_first_range(inode, entry); 1149 1155 } 1150 1156 1151 - spin_unlock_irq(&inode->ordered_tree_lock); 1157 + spin_unlock(&inode->ordered_tree_lock); 1152 1158 return entry; 1153 1159 } 1154 1160 ··· 1280 1286 /* 1281 1287 * Take the root's ordered_extent_lock to avoid a race with 1282 1288 * btrfs_wait_ordered_extents() when updating the disk_bytenr and 1283 - * disk_num_bytes fields of the ordered extent below. And we disable 1284 - * IRQs because the inode's ordered_tree_lock is used in IRQ context 1285 - * elsewhere. 1289 + * disk_num_bytes fields of the ordered extent below. 1286 1290 * 1287 1291 * There's no concern about a previous caller of 1288 1292 * btrfs_wait_ordered_extents() getting the trimmed ordered extent

+7 -9

fs/btrfs/print-tree.c

··· 131 131 struct btrfs_tree_block_info *info; 132 132 info = (struct btrfs_tree_block_info *)(ei + 1); 133 133 btrfs_tree_block_key(eb, info, &key); 134 - pr_info("\t\ttree block key (%llu %u %llu) level %d\n", 134 + pr_info("\t\ttree block key " BTRFS_KEY_FMT " level %d\n", 135 135 btrfs_disk_key_objectid(&key), key.type, 136 136 btrfs_disk_key_offset(&key), 137 137 btrfs_tree_block_level(eb, info)); ··· 277 277 struct btrfs_key location; 278 278 279 279 btrfs_dir_item_key_to_cpu(eb, di, &location); 280 - pr_info("\t\tlocation key (%llu %u %llu) type %d\n", 281 - location.objectid, location.type, location.offset, 282 - btrfs_dir_ftype(eb, di)); 280 + pr_info("\t\tlocation key " BTRFS_KEY_FMT " type %d\n", 281 + BTRFS_KEY_FMT_VALUE(&location), btrfs_dir_ftype(eb, di)); 283 282 pr_info("\t\ttransid %llu data_len %u name_len %u\n", 284 283 btrfs_dir_transid(eb, di), data_len, name_len); 285 284 di = (struct btrfs_dir_item *)((char *)di + len); ··· 420 421 if (key->type == 0 && key->objectid == BTRFS_FREE_SPACE_OBJECTID) 421 422 scnprintf(buf, buf_size, "UNTYPED"); 422 423 else if (key_to_str[key->type]) 423 - scnprintf(buf, buf_size, key_to_str[key->type]); 424 + scnprintf(buf, buf_size, "%s", key_to_str[key->type]); 424 425 else 425 426 scnprintf(buf, buf_size, "UNKNOWN.%d", key->type); 426 427 } ··· 597 598 print_eb_refs_lock(c); 598 599 for (i = 0; i < nr; i++) { 599 600 btrfs_node_key_to_cpu(c, &key, i); 600 - pr_info("\tkey %d (%llu %u %llu) block %llu gen %llu\n", 601 - i, key.objectid, key.type, key.offset, 602 - btrfs_node_blockptr(c, i), 603 - btrfs_node_ptr_generation(c, i)); 601 + pr_info("\tkey %d " BTRFS_KEY_FMT " block %llu gen %llu\n", 602 + i, BTRFS_KEY_FMT_VALUE(&key), btrfs_node_blockptr(c, i), 603 + btrfs_node_ptr_generation(c, i)); 604 604 } 605 605 if (!follow) 606 606 return;

+81 -101

fs/btrfs/qgroup.c

··· 660 660 { 661 661 int ret; 662 662 struct btrfs_root *quota_root = trans->fs_info->quota_root; 663 - struct btrfs_path *path; 663 + BTRFS_PATH_AUTO_FREE(path); 664 664 struct btrfs_key key; 665 665 666 666 path = btrfs_alloc_path(); ··· 672 672 key.offset = dst; 673 673 674 674 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0); 675 - btrfs_free_path(path); 676 675 return ret; 677 676 } 678 677 ··· 680 681 { 681 682 int ret; 682 683 struct btrfs_root *quota_root = trans->fs_info->quota_root; 683 - struct btrfs_path *path; 684 + BTRFS_PATH_AUTO_FREE(path); 684 685 struct btrfs_key key; 685 686 686 687 path = btrfs_alloc_path(); ··· 693 694 694 695 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 695 696 if (ret < 0) 696 - goto out; 697 + return ret; 697 698 698 - if (ret > 0) { 699 - ret = -ENOENT; 700 - goto out; 701 - } 699 + if (ret > 0) 700 + return -ENOENT; 702 701 703 - ret = btrfs_del_item(trans, quota_root, path); 704 - out: 705 - btrfs_free_path(path); 706 - return ret; 702 + return btrfs_del_item(trans, quota_root, path); 707 703 } 708 704 709 705 static int add_qgroup_item(struct btrfs_trans_handle *trans, 710 706 struct btrfs_root *quota_root, u64 qgroupid) 711 707 { 712 708 int ret; 713 - struct btrfs_path *path; 709 + BTRFS_PATH_AUTO_FREE(path); 714 710 struct btrfs_qgroup_info_item *qgroup_info; 715 711 struct btrfs_qgroup_limit_item *qgroup_limit; 716 712 struct extent_buffer *leaf; ··· 731 737 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 732 738 sizeof(*qgroup_info)); 733 739 if (ret && ret != -EEXIST) 734 - goto out; 740 + return ret; 735 741 736 742 leaf = path->nodes[0]; 737 743 qgroup_info = btrfs_item_ptr(leaf, path->slots[0], ··· 748 754 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 749 755 sizeof(*qgroup_limit)); 750 756 if (ret && ret != -EEXIST) 751 - goto out; 757 + return ret; 752 758 753 759 leaf = path->nodes[0]; 754 760 qgroup_limit = btrfs_item_ptr(leaf, path->slots[0], ··· 759 765 btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0); 760 766 btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0); 761 767 762 - ret = 0; 763 - out: 764 - btrfs_free_path(path); 765 - return ret; 768 + return 0; 766 769 } 767 770 768 771 static int del_qgroup_item(struct btrfs_trans_handle *trans, u64 qgroupid) 769 772 { 770 773 int ret; 771 774 struct btrfs_root *quota_root = trans->fs_info->quota_root; 772 - struct btrfs_path *path; 775 + BTRFS_PATH_AUTO_FREE(path); 773 776 struct btrfs_key key; 774 777 775 778 path = btrfs_alloc_path(); ··· 778 787 key.offset = qgroupid; 779 788 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 780 789 if (ret < 0) 781 - goto out; 790 + return ret; 782 791 783 - if (ret > 0) { 784 - ret = -ENOENT; 785 - goto out; 786 - } 792 + if (ret > 0) 793 + return -ENOENT; 787 794 788 795 ret = btrfs_del_item(trans, quota_root, path); 789 796 if (ret) 790 - goto out; 797 + return ret; 791 798 792 799 btrfs_release_path(path); 793 800 794 801 key.type = BTRFS_QGROUP_LIMIT_KEY; 795 802 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 796 803 if (ret < 0) 797 - goto out; 804 + return ret; 798 805 799 - if (ret > 0) { 800 - ret = -ENOENT; 801 - goto out; 802 - } 806 + if (ret > 0) 807 + return -ENOENT; 803 808 804 809 ret = btrfs_del_item(trans, quota_root, path); 805 810 806 - out: 807 - btrfs_free_path(path); 808 811 return ret; 809 812 } 810 813 ··· 806 821 struct btrfs_qgroup *qgroup) 807 822 { 808 823 struct btrfs_root *quota_root = trans->fs_info->quota_root; 809 - struct btrfs_path *path; 824 + BTRFS_PATH_AUTO_FREE(path); 810 825 struct btrfs_key key; 811 826 struct extent_buffer *l; 812 827 struct btrfs_qgroup_limit_item *qgroup_limit; ··· 826 841 ret = -ENOENT; 827 842 828 843 if (ret) 829 - goto out; 844 + return ret; 830 845 831 846 l = path->nodes[0]; 832 847 slot = path->slots[0]; ··· 836 851 btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, qgroup->max_excl); 837 852 btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, qgroup->rsv_rfer); 838 853 btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, qgroup->rsv_excl); 839 - out: 840 - btrfs_free_path(path); 854 + 841 855 return ret; 842 856 } 843 857 ··· 845 861 { 846 862 struct btrfs_fs_info *fs_info = trans->fs_info; 847 863 struct btrfs_root *quota_root = fs_info->quota_root; 848 - struct btrfs_path *path; 864 + BTRFS_PATH_AUTO_FREE(path); 849 865 struct btrfs_key key; 850 866 struct extent_buffer *l; 851 867 struct btrfs_qgroup_info_item *qgroup_info; ··· 868 884 ret = -ENOENT; 869 885 870 886 if (ret) 871 - goto out; 887 + return ret; 872 888 873 889 l = path->nodes[0]; 874 890 slot = path->slots[0]; ··· 878 894 btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr); 879 895 btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl); 880 896 btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr); 881 - out: 882 - btrfs_free_path(path); 897 + 883 898 return ret; 884 899 } 885 900 ··· 886 903 { 887 904 struct btrfs_fs_info *fs_info = trans->fs_info; 888 905 struct btrfs_root *quota_root = fs_info->quota_root; 889 - struct btrfs_path *path; 906 + BTRFS_PATH_AUTO_FREE(path); 890 907 struct btrfs_key key; 891 908 struct extent_buffer *l; 892 909 struct btrfs_qgroup_status_item *ptr; ··· 906 923 ret = -ENOENT; 907 924 908 925 if (ret) 909 - goto out; 926 + return ret; 910 927 911 928 l = path->nodes[0]; 912 929 slot = path->slots[0]; ··· 916 933 btrfs_set_qgroup_status_generation(l, ptr, trans->transid); 917 934 btrfs_set_qgroup_status_rescan(l, ptr, 918 935 fs_info->qgroup_rescan_progress.objectid); 919 - out: 920 - btrfs_free_path(path); 936 + 921 937 return ret; 922 938 } 923 939 ··· 926 944 static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans, 927 945 struct btrfs_root *root) 928 946 { 929 - struct btrfs_path *path; 947 + BTRFS_PATH_AUTO_FREE(path); 930 948 struct btrfs_key key; 931 949 struct extent_buffer *leaf = NULL; 932 950 int ret; ··· 943 961 while (1) { 944 962 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 945 963 if (ret < 0) 946 - goto out; 964 + return ret; 947 965 leaf = path->nodes[0]; 948 966 nr = btrfs_header_nritems(leaf); 949 967 if (!nr) ··· 956 974 path->slots[0] = 0; 957 975 ret = btrfs_del_items(trans, root, path, 0, nr); 958 976 if (ret) 959 - goto out; 977 + return ret; 960 978 961 979 btrfs_release_path(path); 962 980 } 963 - ret = 0; 964 - out: 965 - btrfs_free_path(path); 966 - return ret; 981 + 982 + return 0; 967 983 } 968 984 969 985 int btrfs_quota_enable(struct btrfs_fs_info *fs_info, ··· 1243 1263 btrfs_end_transaction(trans); 1244 1264 else if (trans) 1245 1265 ret = btrfs_end_transaction(trans); 1246 - kfree(prealloc); 1266 + 1267 + /* 1268 + * At this point we either failed at allocating prealloc, or we 1269 + * succeeded and passed the ownership to it to add_qgroup_rb(). In any 1270 + * case, this needs to be NULL or there is something wrong. 1271 + */ 1272 + ASSERT(prealloc == NULL); 1273 + 1247 1274 return ret; 1248 1275 } 1249 1276 ··· 1682 1695 ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup); 1683 1696 out: 1684 1697 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1685 - kfree(prealloc); 1698 + /* 1699 + * At this point we either failed at allocating prealloc, or we 1700 + * succeeded and passed the ownership to it to add_qgroup_rb(). In any 1701 + * case, this needs to be NULL or there is something wrong. 1702 + */ 1703 + ASSERT(prealloc == NULL); 1686 1704 return ret; 1687 1705 } 1688 1706 ··· 1699 1707 static int can_delete_qgroup(struct btrfs_fs_info *fs_info, struct btrfs_qgroup *qgroup) 1700 1708 { 1701 1709 struct btrfs_key key; 1702 - struct btrfs_path *path; 1703 - int ret; 1710 + BTRFS_PATH_AUTO_FREE(path); 1704 1711 1705 1712 /* 1706 1713 * Squota would never be inconsistent, but there can still be case ··· 1732 1741 if (!path) 1733 1742 return -ENOMEM; 1734 1743 1735 - ret = btrfs_find_root(fs_info->tree_root, &key, path, NULL, NULL); 1736 - btrfs_free_path(path); 1737 1744 /* 1738 1745 * The @ret from btrfs_find_root() exactly matches our definition for 1739 1746 * the return value, thus can be returned directly. 1740 1747 */ 1741 - return ret; 1748 + return btrfs_find_root(fs_info->tree_root, &key, path, NULL, NULL); 1742 1749 } 1743 1750 1744 1751 int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) ··· 2285 2296 bool trace_leaf) 2286 2297 { 2287 2298 struct btrfs_key key; 2288 - struct btrfs_path *src_path; 2299 + BTRFS_PATH_AUTO_FREE(src_path); 2289 2300 struct btrfs_fs_info *fs_info = trans->fs_info; 2290 2301 u32 nodesize = fs_info->nodesize; 2291 2302 int cur_level = root_level; ··· 2297 2308 return -EINVAL; 2298 2309 2299 2310 src_path = btrfs_alloc_path(); 2300 - if (!src_path) { 2301 - ret = -ENOMEM; 2302 - goto out; 2303 - } 2311 + if (!src_path) 2312 + return -ENOMEM; 2304 2313 2305 2314 if (dst_level) 2306 2315 btrfs_node_key_to_cpu(dst_path->nodes[dst_level], &key, 0); ··· 2324 2337 parent_slot = src_path->slots[cur_level + 1]; 2325 2338 2326 2339 eb = btrfs_read_node_slot(eb, parent_slot); 2327 - if (IS_ERR(eb)) { 2328 - ret = PTR_ERR(eb); 2329 - goto out; 2330 - } 2340 + if (IS_ERR(eb)) 2341 + return PTR_ERR(eb); 2331 2342 2332 2343 src_path->nodes[cur_level] = eb; 2333 2344 ··· 2346 2361 &src_key, src_path->slots[cur_level]); 2347 2362 } 2348 2363 /* Content mismatch, something went wrong */ 2349 - if (btrfs_comp_cpu_keys(&dst_key, &src_key)) { 2350 - ret = -ENOENT; 2351 - goto out; 2352 - } 2364 + if (btrfs_comp_cpu_keys(&dst_key, &src_key)) 2365 + return -ENOENT; 2353 2366 cur_level--; 2354 2367 } 2355 2368 ··· 2358 2375 ret = btrfs_qgroup_trace_extent(trans, src_path->nodes[dst_level]->start, 2359 2376 nodesize); 2360 2377 if (ret < 0) 2361 - goto out; 2378 + return ret; 2362 2379 ret = btrfs_qgroup_trace_extent(trans, dst_path->nodes[dst_level]->start, 2363 2380 nodesize); 2364 2381 if (ret < 0) 2365 - goto out; 2382 + return ret; 2366 2383 2367 2384 /* Record leaf file extents */ 2368 2385 if (dst_level == 0 && trace_leaf) { 2369 2386 ret = btrfs_qgroup_trace_leaf_items(trans, src_path->nodes[0]); 2370 2387 if (ret < 0) 2371 - goto out; 2388 + return ret; 2372 2389 ret = btrfs_qgroup_trace_leaf_items(trans, dst_path->nodes[0]); 2373 2390 } 2374 - out: 2375 - btrfs_free_path(src_path); 2391 + 2376 2392 return ret; 2377 2393 } 2378 2394 ··· 2572 2590 int level; 2573 2591 u8 drop_subptree_thres; 2574 2592 struct extent_buffer *eb = root_eb; 2575 - struct btrfs_path *path = NULL; 2593 + BTRFS_PATH_AUTO_FREE(path); 2576 2594 2577 2595 ASSERT(0 <= root_level && root_level < BTRFS_MAX_LEVEL); 2578 2596 ASSERT(root_eb != NULL); ··· 2605 2623 2606 2624 ret = btrfs_read_extent_buffer(root_eb, &check); 2607 2625 if (ret) 2608 - goto out; 2626 + return ret; 2609 2627 } 2610 2628 2611 2629 if (root_level == 0) { 2612 2630 ret = btrfs_qgroup_trace_leaf_items(trans, root_eb); 2613 - goto out; 2631 + return ret; 2614 2632 } 2615 2633 2616 2634 path = btrfs_alloc_path(); ··· 2646 2664 child_bytenr = btrfs_node_blockptr(eb, parent_slot); 2647 2665 2648 2666 eb = btrfs_read_node_slot(eb, parent_slot); 2649 - if (IS_ERR(eb)) { 2650 - ret = PTR_ERR(eb); 2651 - goto out; 2652 - } 2667 + if (IS_ERR(eb)) 2668 + return PTR_ERR(eb); 2653 2669 2654 2670 path->nodes[level] = eb; 2655 2671 path->slots[level] = 0; ··· 2658 2678 ret = btrfs_qgroup_trace_extent(trans, child_bytenr, 2659 2679 fs_info->nodesize); 2660 2680 if (ret) 2661 - goto out; 2681 + return ret; 2662 2682 } 2663 2683 2664 2684 if (level == 0) { 2665 2685 ret = btrfs_qgroup_trace_leaf_items(trans, 2666 2686 path->nodes[level]); 2667 2687 if (ret) 2668 - goto out; 2688 + return ret; 2669 2689 2670 2690 /* Nonzero return here means we completed our search */ 2671 2691 ret = adjust_slots_upwards(path, root_level); ··· 2679 2699 level--; 2680 2700 } 2681 2701 2682 - ret = 0; 2683 - out: 2684 - btrfs_free_path(path); 2685 - 2686 - return ret; 2702 + return 0; 2687 2703 } 2688 2704 2689 2705 static void qgroup_iterator_nested_add(struct list_head *head, struct btrfs_qgroup *qgroup) ··· 3279 3303 struct btrfs_root *quota_root; 3280 3304 struct btrfs_qgroup *srcgroup; 3281 3305 struct btrfs_qgroup *dstgroup; 3282 - struct btrfs_qgroup *prealloc; 3306 + struct btrfs_qgroup *prealloc = NULL; 3283 3307 struct btrfs_qgroup_list **qlist_prealloc = NULL; 3284 3308 bool free_inherit = false; 3285 3309 bool need_rescan = false; ··· 3520 3544 } 3521 3545 if (free_inherit) 3522 3546 kfree(inherit); 3523 - kfree(prealloc); 3547 + 3548 + /* 3549 + * At this point we either failed at allocating prealloc, or we 3550 + * succeeded and passed the ownership to it to add_qgroup_rb(). In any 3551 + * case, this needs to be NULL or there is something wrong. 3552 + */ 3553 + ASSERT(prealloc == NULL); 3554 + 3524 3555 return ret; 3525 3556 } 3526 3557 ··· 3695 3712 path, 1, 0); 3696 3713 3697 3714 btrfs_debug(fs_info, 3698 - "current progress key (%llu %u %llu), search_slot ret %d", 3699 - fs_info->qgroup_rescan_progress.objectid, 3700 - fs_info->qgroup_rescan_progress.type, 3701 - fs_info->qgroup_rescan_progress.offset, ret); 3715 + "current progress key " BTRFS_KEY_FMT ", search_slot ret %d", 3716 + BTRFS_KEY_FMT_VALUE(&fs_info->qgroup_rescan_progress), ret); 3702 3717 3703 3718 if (ret) { 3704 3719 /* ··· 3798 3817 * Rescan should only search for commit root, and any later difference 3799 3818 * should be recorded by qgroup 3800 3819 */ 3801 - path->search_commit_root = 1; 3802 - path->skip_locking = 1; 3820 + path->search_commit_root = true; 3821 + path->skip_locking = true; 3803 3822 3804 3823 while (!ret && !(stopped = rescan_should_stop(fs_info))) { 3805 3824 trans = btrfs_start_transaction(fs_info->fs_root, 0); ··· 4777 4796 struct btrfs_fs_info *fs_info = root->fs_info; 4778 4797 struct btrfs_tree_parent_check check = { 0 }; 4779 4798 struct btrfs_qgroup_swapped_blocks *blocks = &root->swapped_blocks; 4780 - struct btrfs_qgroup_swapped_block *block; 4799 + struct btrfs_qgroup_swapped_block AUTO_KFREE(block); 4781 4800 struct extent_buffer *reloc_eb = NULL; 4782 4801 struct rb_node *node; 4783 4802 bool swapped = false; ··· 4834 4853 ret = qgroup_trace_subtree_swap(trans, reloc_eb, subvol_eb, 4835 4854 block->last_snapshot, block->trace_leaf); 4836 4855 free_out: 4837 - kfree(block); 4838 4856 free_extent_buffer(reloc_eb); 4839 4857 out: 4840 4858 if (ret < 0) {

+6 -12

fs/btrfs/raid-stripe-tree.c

··· 19 19 u64 newlen, u64 frontpad) 20 20 { 21 21 struct btrfs_root *stripe_root = trans->fs_info->stripe_root; 22 - struct btrfs_stripe_extent *extent, *newitem; 22 + struct btrfs_stripe_extent *extent, AUTO_KFREE(newitem); 23 23 struct extent_buffer *leaf; 24 24 int slot; 25 25 size_t item_size; ··· 53 53 54 54 ret = btrfs_del_item(trans, stripe_root, path); 55 55 if (ret) 56 - goto out; 56 + return ret; 57 57 58 58 btrfs_release_path(path); 59 - ret = btrfs_insert_item(trans, stripe_root, &newkey, newitem, item_size); 60 - 61 - out: 62 - kfree(newitem); 63 - return ret; 59 + return btrfs_insert_item(trans, stripe_root, &newkey, newitem, item_size); 64 60 } 65 61 66 62 int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 length) ··· 295 299 struct btrfs_key stripe_key; 296 300 struct btrfs_root *stripe_root = fs_info->stripe_root; 297 301 const int num_stripes = btrfs_bg_type_to_factor(bioc->map_type); 298 - struct btrfs_stripe_extent *stripe_extent; 302 + struct btrfs_stripe_extent AUTO_KFREE(stripe_extent); 299 303 const size_t item_size = struct_size(stripe_extent, strides, num_stripes); 300 304 int ret; 301 305 ··· 331 335 } else if (ret) { 332 336 btrfs_abort_transaction(trans, ret); 333 337 } 334 - 335 - kfree(stripe_extent); 336 338 337 339 return ret; 338 340 } ··· 388 394 return -ENOMEM; 389 395 390 396 if (stripe->rst_search_commit_root) { 391 - path->skip_locking = 1; 392 - path->search_commit_root = 1; 397 + path->skip_locking = true; 398 + path->search_commit_root = true; 393 399 } 394 400 395 401 ret = btrfs_search_slot(NULL, stripe_root, &stripe_key, path, 0, 0);

+508 -351

fs/btrfs/raid56.c

··· 66 66 67 67 dump_bioc(fs_info, rbio->bioc); 68 68 btrfs_crit(fs_info, 69 - "rbio flags=0x%lx nr_sectors=%u nr_data=%u real_stripes=%u stripe_nsectors=%u scrubp=%u dbitmap=0x%lx", 69 + "rbio flags=0x%lx nr_sectors=%u nr_data=%u real_stripes=%u stripe_nsectors=%u sector_nsteps=%u scrubp=%u dbitmap=0x%lx", 70 70 rbio->flags, rbio->nr_sectors, rbio->nr_data, 71 71 rbio->real_stripes, rbio->stripe_nsectors, 72 - rbio->scrubp, rbio->dbitmap); 72 + rbio->sector_nsteps, rbio->scrubp, rbio->dbitmap); 73 73 } 74 74 75 75 #define ASSERT_RBIO(expr, rbio) \ ··· 134 134 }; 135 135 136 136 /* 137 - * A structure to present a sector inside a page, the length is fixed to 138 - * sectorsize; 137 + * The PFN may still be valid, but our paddrs should always be block size 138 + * aligned, thus such -1 paddr is definitely not a valid one. 139 139 */ 140 - struct sector_ptr { 141 - /* 142 - * Blocks from the bio list can still be highmem. 143 - * So here we use physical address to present a page and the offset inside it. 144 - */ 145 - phys_addr_t paddr; 146 - bool has_paddr; 147 - bool uptodate; 148 - }; 140 + #define INVALID_PADDR (~(phys_addr_t)0) 149 141 150 142 static void rmw_rbio_work(struct work_struct *work); 151 143 static void rmw_rbio_work_locked(struct work_struct *work); ··· 151 159 { 152 160 bitmap_free(rbio->error_bitmap); 153 161 kfree(rbio->stripe_pages); 154 - kfree(rbio->bio_sectors); 155 - kfree(rbio->stripe_sectors); 162 + kfree(rbio->bio_paddrs); 163 + kfree(rbio->stripe_paddrs); 156 164 kfree(rbio->finish_pointers); 157 165 } 158 166 ··· 227 235 return 0; 228 236 } 229 237 230 - static void memcpy_sectors(const struct sector_ptr *dst, 231 - const struct sector_ptr *src, u32 blocksize) 238 + static void memcpy_from_bio_to_stripe(struct btrfs_raid_bio *rbio, unsigned int sector_nr) 232 239 { 233 - memcpy_page(phys_to_page(dst->paddr), offset_in_page(dst->paddr), 234 - phys_to_page(src->paddr), offset_in_page(src->paddr), 235 - blocksize); 240 + const u32 step = min(rbio->bioc->fs_info->sectorsize, PAGE_SIZE); 241 + 242 + ASSERT(sector_nr < rbio->nr_sectors); 243 + for (int i = 0; i < rbio->sector_nsteps; i++) { 244 + unsigned int index = sector_nr * rbio->sector_nsteps + i; 245 + phys_addr_t dst = rbio->stripe_paddrs[index]; 246 + phys_addr_t src = rbio->bio_paddrs[index]; 247 + 248 + ASSERT(dst != INVALID_PADDR); 249 + ASSERT(src != INVALID_PADDR); 250 + 251 + memcpy_page(phys_to_page(dst), offset_in_page(dst), 252 + phys_to_page(src), offset_in_page(src), step); 253 + } 236 254 } 237 255 238 256 /* ··· 265 263 266 264 for (i = 0; i < rbio->nr_sectors; i++) { 267 265 /* Some range not covered by bio (partial write), skip it */ 268 - if (!rbio->bio_sectors[i].has_paddr) { 266 + if (rbio->bio_paddrs[i * rbio->sector_nsteps] == INVALID_PADDR) { 269 267 /* 270 268 * Even if the sector is not covered by bio, if it is 271 269 * a data sector it should still be uptodate as it is 272 270 * read from disk. 273 271 */ 274 272 if (i < rbio->nr_data * rbio->stripe_nsectors) 275 - ASSERT(rbio->stripe_sectors[i].uptodate); 273 + ASSERT(test_bit(i, rbio->stripe_uptodate_bitmap)); 276 274 continue; 277 275 } 278 276 279 - memcpy_sectors(&rbio->stripe_sectors[i], &rbio->bio_sectors[i], 280 - rbio->bioc->fs_info->sectorsize); 281 - rbio->stripe_sectors[i].uptodate = 1; 277 + memcpy_from_bio_to_stripe(rbio, i); 278 + set_bit(i, rbio->stripe_uptodate_bitmap); 282 279 } 283 280 set_bit(RBIO_CACHE_READY_BIT, &rbio->flags); 284 281 } ··· 300 299 return hash_64(num >> 16, BTRFS_STRIPE_HASH_TABLE_BITS); 301 300 } 302 301 303 - static bool full_page_sectors_uptodate(struct btrfs_raid_bio *rbio, 304 - unsigned int page_nr) 302 + /* Get the sector number of the first sector covered by @page_nr. */ 303 + static u32 page_nr_to_sector_nr(struct btrfs_raid_bio *rbio, unsigned int page_nr) 305 304 { 306 - const u32 sectorsize = rbio->bioc->fs_info->sectorsize; 307 - const u32 sectors_per_page = PAGE_SIZE / sectorsize; 308 - int i; 305 + u32 sector_nr; 309 306 310 307 ASSERT(page_nr < rbio->nr_pages); 311 308 312 - for (i = sectors_per_page * page_nr; 313 - i < sectors_per_page * page_nr + sectors_per_page; 314 - i++) { 315 - if (!rbio->stripe_sectors[i].uptodate) 309 + sector_nr = (page_nr << PAGE_SHIFT) >> rbio->bioc->fs_info->sectorsize_bits; 310 + ASSERT(sector_nr < rbio->nr_sectors); 311 + return sector_nr; 312 + } 313 + 314 + /* 315 + * Get the number of sectors covered by @page_nr. 316 + * 317 + * For bs > ps cases, the result will always be 1. 318 + * For bs <= ps cases, the result will be ps / bs. 319 + */ 320 + static u32 page_nr_to_num_sectors(struct btrfs_raid_bio *rbio, unsigned int page_nr) 321 + { 322 + struct btrfs_fs_info *fs_info = rbio->bioc->fs_info; 323 + u32 nr_sectors; 324 + 325 + ASSERT(page_nr < rbio->nr_pages); 326 + 327 + nr_sectors = round_up(PAGE_SIZE, fs_info->sectorsize) >> fs_info->sectorsize_bits; 328 + ASSERT(nr_sectors > 0); 329 + return nr_sectors; 330 + } 331 + 332 + static __maybe_unused bool full_page_sectors_uptodate(struct btrfs_raid_bio *rbio, 333 + unsigned int page_nr) 334 + { 335 + const u32 sector_nr = page_nr_to_sector_nr(rbio, page_nr); 336 + const u32 nr_bits = page_nr_to_num_sectors(rbio, page_nr); 337 + int i; 338 + 339 + ASSERT(page_nr < rbio->nr_pages); 340 + ASSERT(sector_nr + nr_bits < rbio->nr_sectors); 341 + 342 + for (i = sector_nr; i < sector_nr + nr_bits; i++) { 343 + if (!test_bit(i, rbio->stripe_uptodate_bitmap)) 316 344 return false; 317 345 } 318 346 return true; ··· 354 324 */ 355 325 static void index_stripe_sectors(struct btrfs_raid_bio *rbio) 356 326 { 357 - const u32 sectorsize = rbio->bioc->fs_info->sectorsize; 327 + const u32 step = min(rbio->bioc->fs_info->sectorsize, PAGE_SIZE); 358 328 u32 offset; 359 329 int i; 360 330 361 - for (i = 0, offset = 0; i < rbio->nr_sectors; i++, offset += sectorsize) { 331 + for (i = 0, offset = 0; i < rbio->nr_sectors * rbio->sector_nsteps; 332 + i++, offset += step) { 362 333 int page_index = offset >> PAGE_SHIFT; 363 334 364 335 ASSERT(page_index < rbio->nr_pages); 365 336 if (!rbio->stripe_pages[page_index]) 366 337 continue; 367 338 368 - rbio->stripe_sectors[i].has_paddr = true; 369 - rbio->stripe_sectors[i].paddr = 370 - page_to_phys(rbio->stripe_pages[page_index]) + 371 - offset_in_page(offset); 339 + rbio->stripe_paddrs[i] = page_to_phys(rbio->stripe_pages[page_index]) + 340 + offset_in_page(offset); 372 341 } 373 342 } 374 343 375 344 static void steal_rbio_page(struct btrfs_raid_bio *src, 376 345 struct btrfs_raid_bio *dest, int page_nr) 377 346 { 378 - const u32 sectorsize = src->bioc->fs_info->sectorsize; 379 - const u32 sectors_per_page = PAGE_SIZE / sectorsize; 380 - int i; 347 + const u32 sector_nr = page_nr_to_sector_nr(src, page_nr); 348 + const u32 nr_bits = page_nr_to_num_sectors(src, page_nr); 349 + 350 + ASSERT(page_nr < src->nr_pages); 351 + ASSERT(sector_nr + nr_bits < src->nr_sectors); 381 352 382 353 if (dest->stripe_pages[page_nr]) 383 354 __free_page(dest->stripe_pages[page_nr]); 384 355 dest->stripe_pages[page_nr] = src->stripe_pages[page_nr]; 385 356 src->stripe_pages[page_nr] = NULL; 386 357 387 - /* Also update the sector->uptodate bits. */ 388 - for (i = sectors_per_page * page_nr; 389 - i < sectors_per_page * page_nr + sectors_per_page; i++) 390 - dest->stripe_sectors[i].uptodate = true; 358 + /* Also update the stripe_uptodate_bitmap bits. */ 359 + bitmap_set(dest->stripe_uptodate_bitmap, sector_nr, nr_bits); 391 360 } 392 361 393 362 static bool is_data_stripe_page(struct btrfs_raid_bio *rbio, int page_nr) 394 363 { 395 - const int sector_nr = (page_nr << PAGE_SHIFT) >> 396 - rbio->bioc->fs_info->sectorsize_bits; 364 + const int sector_nr = page_nr_to_sector_nr(rbio, page_nr); 397 365 398 366 /* 399 367 * We have ensured PAGE_SIZE is aligned with sectorsize, thus ··· 705 677 return 1; 706 678 } 707 679 708 - static unsigned int rbio_stripe_sector_index(const struct btrfs_raid_bio *rbio, 709 - unsigned int stripe_nr, 710 - unsigned int sector_nr) 680 + /* Return the sector index for @stripe_nr and @sector_nr. */ 681 + static unsigned int rbio_sector_index(const struct btrfs_raid_bio *rbio, 682 + unsigned int stripe_nr, 683 + unsigned int sector_nr) 711 684 { 685 + unsigned int ret; 686 + 712 687 ASSERT_RBIO_STRIPE(stripe_nr < rbio->real_stripes, rbio, stripe_nr); 713 688 ASSERT_RBIO_SECTOR(sector_nr < rbio->stripe_nsectors, rbio, sector_nr); 714 689 715 - return stripe_nr * rbio->stripe_nsectors + sector_nr; 690 + ret = stripe_nr * rbio->stripe_nsectors + sector_nr; 691 + ASSERT(ret < rbio->nr_sectors); 692 + return ret; 716 693 } 717 694 718 - /* Return a sector from rbio->stripe_sectors, not from the bio list */ 719 - static struct sector_ptr *rbio_stripe_sector(const struct btrfs_raid_bio *rbio, 720 - unsigned int stripe_nr, 721 - unsigned int sector_nr) 695 + /* Return the paddr array index for @stripe_nr, @sector_nr and @step_nr. */ 696 + static unsigned int rbio_paddr_index(const struct btrfs_raid_bio *rbio, 697 + unsigned int stripe_nr, 698 + unsigned int sector_nr, 699 + unsigned int step_nr) 722 700 { 723 - return &rbio->stripe_sectors[rbio_stripe_sector_index(rbio, stripe_nr, 724 - sector_nr)]; 701 + unsigned int ret; 702 + 703 + ASSERT_RBIO_SECTOR(step_nr < rbio->sector_nsteps, rbio, step_nr); 704 + 705 + ret = rbio_sector_index(rbio, stripe_nr, sector_nr) * rbio->sector_nsteps + step_nr; 706 + ASSERT(ret < rbio->nr_sectors * rbio->sector_nsteps); 707 + return ret; 725 708 } 726 709 727 - /* Grab a sector inside P stripe */ 728 - static struct sector_ptr *rbio_pstripe_sector(const struct btrfs_raid_bio *rbio, 729 - unsigned int sector_nr) 710 + static phys_addr_t rbio_stripe_paddr(const struct btrfs_raid_bio *rbio, 711 + unsigned int stripe_nr, unsigned int sector_nr, 712 + unsigned int step_nr) 730 713 { 731 - return rbio_stripe_sector(rbio, rbio->nr_data, sector_nr); 714 + return rbio->stripe_paddrs[rbio_paddr_index(rbio, stripe_nr, sector_nr, step_nr)]; 732 715 } 733 716 734 - /* Grab a sector inside Q stripe, return NULL if not RAID6 */ 735 - static struct sector_ptr *rbio_qstripe_sector(const struct btrfs_raid_bio *rbio, 736 - unsigned int sector_nr) 717 + static phys_addr_t rbio_pstripe_paddr(const struct btrfs_raid_bio *rbio, 718 + unsigned int sector_nr, unsigned int step_nr) 719 + { 720 + return rbio_stripe_paddr(rbio, rbio->nr_data, sector_nr, step_nr); 721 + } 722 + 723 + static phys_addr_t rbio_qstripe_paddr(const struct btrfs_raid_bio *rbio, 724 + unsigned int sector_nr, unsigned int step_nr) 737 725 { 738 726 if (rbio->nr_data + 1 == rbio->real_stripes) 739 - return NULL; 740 - return rbio_stripe_sector(rbio, rbio->nr_data + 1, sector_nr); 727 + return INVALID_PADDR; 728 + return rbio_stripe_paddr(rbio, rbio->nr_data + 1, sector_nr, step_nr); 729 + } 730 + 731 + /* Return a paddr pointer into the rbio::stripe_paddrs[] for the specified sector. */ 732 + static phys_addr_t *rbio_stripe_paddrs(const struct btrfs_raid_bio *rbio, 733 + unsigned int stripe_nr, unsigned int sector_nr) 734 + { 735 + return &rbio->stripe_paddrs[rbio_paddr_index(rbio, stripe_nr, sector_nr, 0)]; 741 736 } 742 737 743 738 /* ··· 995 944 } 996 945 997 946 /* 998 - * Get a sector pointer specified by its @stripe_nr and @sector_nr. 947 + * Get paddr pointer for the sector specified by its @stripe_nr and @sector_nr. 999 948 * 1000 949 * @rbio: The raid bio 1001 950 * @stripe_nr: Stripe number, valid range [0, real_stripe) ··· 1005 954 * 1006 955 * The read/modify/write code wants to reuse the original bio page as much 1007 956 * as possible, and only use stripe_sectors as fallback. 957 + * 958 + * Return NULL if bio_list_only is set but the specified sector has no 959 + * coresponding bio. 1008 960 */ 1009 - static struct sector_ptr *sector_in_rbio(struct btrfs_raid_bio *rbio, 1010 - int stripe_nr, int sector_nr, 1011 - bool bio_list_only) 961 + static phys_addr_t *sector_paddrs_in_rbio(struct btrfs_raid_bio *rbio, 962 + int stripe_nr, int sector_nr, 963 + bool bio_list_only) 1012 964 { 1013 - struct sector_ptr *sector; 1014 - int index; 965 + phys_addr_t *ret = NULL; 966 + const int index = rbio_paddr_index(rbio, stripe_nr, sector_nr, 0); 1015 967 1016 - ASSERT_RBIO_STRIPE(stripe_nr >= 0 && stripe_nr < rbio->real_stripes, 1017 - rbio, stripe_nr); 1018 - ASSERT_RBIO_SECTOR(sector_nr >= 0 && sector_nr < rbio->stripe_nsectors, 1019 - rbio, sector_nr); 968 + ASSERT(index >= 0 && index < rbio->nr_sectors * rbio->sector_nsteps); 1020 969 1021 - index = stripe_nr * rbio->stripe_nsectors + sector_nr; 1022 - ASSERT(index >= 0 && index < rbio->nr_sectors); 1023 - 1024 - spin_lock(&rbio->bio_list_lock); 1025 - sector = &rbio->bio_sectors[index]; 1026 - if (sector->has_paddr || bio_list_only) { 1027 - /* Don't return sector without a valid page pointer */ 1028 - if (!sector->has_paddr) 1029 - sector = NULL; 1030 - spin_unlock(&rbio->bio_list_lock); 1031 - return sector; 970 + scoped_guard(spinlock, &rbio->bio_list_lock) { 971 + if (rbio->bio_paddrs[index] != INVALID_PADDR || bio_list_only) { 972 + /* Don't return sector without a valid page pointer */ 973 + if (rbio->bio_paddrs[index] != INVALID_PADDR) 974 + ret = &rbio->bio_paddrs[index]; 975 + return ret; 976 + } 1032 977 } 1033 - spin_unlock(&rbio->bio_list_lock); 978 + return &rbio->stripe_paddrs[index]; 979 + } 1034 980 1035 - return &rbio->stripe_sectors[index]; 981 + /* 982 + * Similar to sector_paddr_in_rbio(), but with extra consideration for 983 + * bs > ps cases, where we can have multiple steps for a fs block. 984 + */ 985 + static phys_addr_t sector_paddr_in_rbio(struct btrfs_raid_bio *rbio, 986 + int stripe_nr, int sector_nr, int step_nr, 987 + bool bio_list_only) 988 + { 989 + phys_addr_t ret = INVALID_PADDR; 990 + const int index = rbio_paddr_index(rbio, stripe_nr, sector_nr, step_nr); 991 + 992 + ASSERT(index >= 0 && index < rbio->nr_sectors * rbio->sector_nsteps); 993 + 994 + scoped_guard(spinlock, &rbio->bio_list_lock) { 995 + if (rbio->bio_paddrs[index] != INVALID_PADDR || bio_list_only) { 996 + /* Don't return sector without a valid page pointer */ 997 + if (rbio->bio_paddrs[index] != INVALID_PADDR) 998 + ret = rbio->bio_paddrs[index]; 999 + return ret; 1000 + } 1001 + } 1002 + return rbio->stripe_paddrs[index]; 1036 1003 } 1037 1004 1038 1005 /* ··· 1066 997 const unsigned int stripe_nsectors = 1067 998 BTRFS_STRIPE_LEN >> fs_info->sectorsize_bits; 1068 999 const unsigned int num_sectors = stripe_nsectors * real_stripes; 1000 + const unsigned int step = min(fs_info->sectorsize, PAGE_SIZE); 1001 + const unsigned int sector_nsteps = fs_info->sectorsize / step; 1069 1002 struct btrfs_raid_bio *rbio; 1070 1003 1071 - /* PAGE_SIZE must also be aligned to sectorsize for subpage support */ 1072 - ASSERT(IS_ALIGNED(PAGE_SIZE, fs_info->sectorsize)); 1004 + /* 1005 + * For bs <= ps cases, ps must be aligned to bs. 1006 + * For bs > ps cases, bs must be aligned to ps. 1007 + */ 1008 + ASSERT(IS_ALIGNED(PAGE_SIZE, fs_info->sectorsize) || 1009 + IS_ALIGNED(fs_info->sectorsize, PAGE_SIZE)); 1073 1010 /* 1074 1011 * Our current stripe len should be fixed to 64k thus stripe_nsectors 1075 1012 * (at most 16) should be no larger than BITS_PER_LONG. ··· 1094 1019 return ERR_PTR(-ENOMEM); 1095 1020 rbio->stripe_pages = kcalloc(num_pages, sizeof(struct page *), 1096 1021 GFP_NOFS); 1097 - rbio->bio_sectors = kcalloc(num_sectors, sizeof(struct sector_ptr), 1098 - GFP_NOFS); 1099 - rbio->stripe_sectors = kcalloc(num_sectors, sizeof(struct sector_ptr), 1100 - GFP_NOFS); 1022 + rbio->bio_paddrs = kcalloc(num_sectors * sector_nsteps, sizeof(phys_addr_t), GFP_NOFS); 1023 + rbio->stripe_paddrs = kcalloc(num_sectors * sector_nsteps, sizeof(phys_addr_t), GFP_NOFS); 1101 1024 rbio->finish_pointers = kcalloc(real_stripes, sizeof(void *), GFP_NOFS); 1102 1025 rbio->error_bitmap = bitmap_zalloc(num_sectors, GFP_NOFS); 1026 + rbio->stripe_uptodate_bitmap = bitmap_zalloc(num_sectors, GFP_NOFS); 1103 1027 1104 - if (!rbio->stripe_pages || !rbio->bio_sectors || !rbio->stripe_sectors || 1105 - !rbio->finish_pointers || !rbio->error_bitmap) { 1028 + if (!rbio->stripe_pages || !rbio->bio_paddrs || !rbio->stripe_paddrs || 1029 + !rbio->finish_pointers || !rbio->error_bitmap || !rbio->stripe_uptodate_bitmap) { 1106 1030 free_raid_bio_pointers(rbio); 1107 1031 kfree(rbio); 1108 1032 return ERR_PTR(-ENOMEM); 1033 + } 1034 + for (int i = 0; i < num_sectors * sector_nsteps; i++) { 1035 + rbio->stripe_paddrs[i] = INVALID_PADDR; 1036 + rbio->bio_paddrs[i] = INVALID_PADDR; 1109 1037 } 1110 1038 1111 1039 bio_list_init(&rbio->bio_list); ··· 1124 1046 rbio->real_stripes = real_stripes; 1125 1047 rbio->stripe_npages = stripe_npages; 1126 1048 rbio->stripe_nsectors = stripe_nsectors; 1049 + rbio->sector_nsteps = sector_nsteps; 1127 1050 refcount_set(&rbio->refs, 1); 1128 1051 atomic_set(&rbio->stripes_pending, 0); 1129 1052 ··· 1169 1090 * @faila and @failb will also be updated to the first and second stripe 1170 1091 * number of the errors. 1171 1092 */ 1172 - static int get_rbio_veritical_errors(struct btrfs_raid_bio *rbio, int sector_nr, 1173 - int *faila, int *failb) 1093 + static int get_rbio_vertical_errors(struct btrfs_raid_bio *rbio, int sector_nr, 1094 + int *faila, int *failb) 1174 1095 { 1175 1096 int stripe_nr; 1176 1097 int found_errors = 0; ··· 1202 1123 return found_errors; 1203 1124 } 1204 1125 1126 + static int bio_add_paddrs(struct bio *bio, phys_addr_t *paddrs, unsigned int nr_steps, 1127 + unsigned int step) 1128 + { 1129 + int added = 0; 1130 + int ret; 1131 + 1132 + for (int i = 0; i < nr_steps; i++) { 1133 + ret = bio_add_page(bio, phys_to_page(paddrs[i]), step, 1134 + offset_in_page(paddrs[i])); 1135 + if (ret != step) 1136 + goto revert; 1137 + added += ret; 1138 + } 1139 + return added; 1140 + revert: 1141 + /* 1142 + * We don't need to revert the bvec, as the bio will be submitted immediately, 1143 + * as long as the size is reduced the extra bvec will not be accessed. 1144 + */ 1145 + bio->bi_iter.bi_size -= added; 1146 + return 0; 1147 + } 1148 + 1205 1149 /* 1206 1150 * Add a single sector @sector into our list of bios for IO. 1207 1151 * 1208 1152 * Return 0 if everything went well. 1209 - * Return <0 for error. 1153 + * Return <0 for error, and no byte will be added to @rbio. 1210 1154 */ 1211 - static int rbio_add_io_sector(struct btrfs_raid_bio *rbio, 1212 - struct bio_list *bio_list, 1213 - struct sector_ptr *sector, 1214 - unsigned int stripe_nr, 1215 - unsigned int sector_nr, 1216 - enum req_op op) 1155 + static int rbio_add_io_paddrs(struct btrfs_raid_bio *rbio, struct bio_list *bio_list, 1156 + phys_addr_t *paddrs, unsigned int stripe_nr, 1157 + unsigned int sector_nr, enum req_op op) 1217 1158 { 1218 1159 const u32 sectorsize = rbio->bioc->fs_info->sectorsize; 1160 + const u32 step = min(sectorsize, PAGE_SIZE); 1219 1161 struct bio *last = bio_list->tail; 1220 1162 int ret; 1221 1163 struct bio *bio; ··· 1252 1152 rbio, stripe_nr); 1253 1153 ASSERT_RBIO_SECTOR(sector_nr >= 0 && sector_nr < rbio->stripe_nsectors, 1254 1154 rbio, sector_nr); 1255 - ASSERT(sector->has_paddr); 1155 + ASSERT(paddrs != NULL); 1256 1156 1257 1157 stripe = &rbio->bioc->stripes[stripe_nr]; 1258 1158 disk_start = stripe->physical + sector_nr * sectorsize; ··· 1265 1165 rbio->error_bitmap); 1266 1166 1267 1167 /* Check if we have reached tolerance early. */ 1268 - found_errors = get_rbio_veritical_errors(rbio, sector_nr, 1269 - NULL, NULL); 1168 + found_errors = get_rbio_vertical_errors(rbio, sector_nr, 1169 + NULL, NULL); 1270 1170 if (unlikely(found_errors > rbio->bioc->max_errors)) 1271 1171 return -EIO; 1272 1172 return 0; ··· 1283 1183 */ 1284 1184 if (last_end == disk_start && !last->bi_status && 1285 1185 last->bi_bdev == stripe->dev->bdev) { 1286 - ret = bio_add_page(last, phys_to_page(sector->paddr), 1287 - sectorsize, offset_in_page(sector->paddr)); 1186 + ret = bio_add_paddrs(last, paddrs, rbio->sector_nsteps, step); 1288 1187 if (ret == sectorsize) 1289 1188 return 0; 1290 1189 } ··· 1296 1197 bio->bi_iter.bi_sector = disk_start >> SECTOR_SHIFT; 1297 1198 bio->bi_private = rbio; 1298 1199 1299 - __bio_add_page(bio, phys_to_page(sector->paddr), sectorsize, 1300 - offset_in_page(sector->paddr)); 1200 + ret = bio_add_paddrs(bio, paddrs, rbio->sector_nsteps, step); 1201 + ASSERT(ret == sectorsize); 1301 1202 bio_list_add(bio_list, bio); 1302 1203 return 0; 1303 1204 } 1304 1205 1305 1206 static void index_one_bio(struct btrfs_raid_bio *rbio, struct bio *bio) 1306 1207 { 1307 - const u32 sectorsize = rbio->bioc->fs_info->sectorsize; 1308 - const u32 sectorsize_bits = rbio->bioc->fs_info->sectorsize_bits; 1208 + struct btrfs_fs_info *fs_info = rbio->bioc->fs_info; 1209 + const u32 step = min(fs_info->sectorsize, PAGE_SIZE); 1210 + const u32 step_bits = min(fs_info->sectorsize_bits, PAGE_SHIFT); 1309 1211 struct bvec_iter iter = bio->bi_iter; 1310 1212 phys_addr_t paddr; 1311 1213 u32 offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) - 1312 1214 rbio->bioc->full_stripe_logical; 1313 1215 1314 - btrfs_bio_for_each_block(paddr, bio, &iter, sectorsize) { 1315 - unsigned int index = (offset >> sectorsize_bits); 1316 - struct sector_ptr *sector = &rbio->bio_sectors[index]; 1216 + btrfs_bio_for_each_block(paddr, bio, &iter, step) { 1217 + unsigned int index = (offset >> step_bits); 1317 1218 1318 - sector->has_paddr = true; 1319 - sector->paddr = paddr; 1320 - offset += sectorsize; 1219 + rbio->bio_paddrs[index] = paddr; 1220 + offset += step; 1321 1221 } 1322 1222 } 1323 1223 ··· 1394 1296 ASSERT_RBIO(rbio->nr_data < rbio->real_stripes, rbio); 1395 1297 } 1396 1298 1397 - static inline void *kmap_local_sector(const struct sector_ptr *sector) 1299 + static inline void *kmap_local_paddr(phys_addr_t paddr) 1398 1300 { 1399 1301 /* The sector pointer must have a page mapped to it. */ 1400 - ASSERT(sector->has_paddr); 1302 + ASSERT(paddr != INVALID_PADDR); 1401 1303 1402 - return kmap_local_page(phys_to_page(sector->paddr)) + 1403 - offset_in_page(sector->paddr); 1304 + return kmap_local_page(phys_to_page(paddr)) + offset_in_page(paddr); 1404 1305 } 1405 1306 1406 - /* Generate PQ for one vertical stripe. */ 1407 - static void generate_pq_vertical(struct btrfs_raid_bio *rbio, int sectornr) 1307 + static void generate_pq_vertical_step(struct btrfs_raid_bio *rbio, unsigned int sector_nr, 1308 + unsigned int step_nr) 1408 1309 { 1409 1310 void **pointers = rbio->finish_pointers; 1410 - const u32 sectorsize = rbio->bioc->fs_info->sectorsize; 1411 - struct sector_ptr *sector; 1311 + const u32 step = min(rbio->bioc->fs_info->sectorsize, PAGE_SIZE); 1412 1312 int stripe; 1413 1313 const bool has_qstripe = rbio->bioc->map_type & BTRFS_BLOCK_GROUP_RAID6; 1414 1314 1415 1315 /* First collect one sector from each data stripe */ 1416 - for (stripe = 0; stripe < rbio->nr_data; stripe++) { 1417 - sector = sector_in_rbio(rbio, stripe, sectornr, 0); 1418 - pointers[stripe] = kmap_local_sector(sector); 1419 - } 1316 + for (stripe = 0; stripe < rbio->nr_data; stripe++) 1317 + pointers[stripe] = kmap_local_paddr( 1318 + sector_paddr_in_rbio(rbio, stripe, sector_nr, step_nr, 0)); 1420 1319 1421 1320 /* Then add the parity stripe */ 1422 - sector = rbio_pstripe_sector(rbio, sectornr); 1423 - sector->uptodate = 1; 1424 - pointers[stripe++] = kmap_local_sector(sector); 1321 + pointers[stripe++] = kmap_local_paddr(rbio_pstripe_paddr(rbio, sector_nr, step_nr)); 1425 1322 1426 1323 if (has_qstripe) { 1427 1324 /* 1428 1325 * RAID6, add the qstripe and call the library function 1429 1326 * to fill in our p/q 1430 1327 */ 1431 - sector = rbio_qstripe_sector(rbio, sectornr); 1432 - sector->uptodate = 1; 1433 - pointers[stripe++] = kmap_local_sector(sector); 1328 + pointers[stripe++] = kmap_local_paddr( 1329 + rbio_qstripe_paddr(rbio, sector_nr, step_nr)); 1434 1330 1435 1331 assert_rbio(rbio); 1436 - raid6_call.gen_syndrome(rbio->real_stripes, sectorsize, 1437 - pointers); 1332 + raid6_call.gen_syndrome(rbio->real_stripes, step, pointers); 1438 1333 } else { 1439 1334 /* raid5 */ 1440 - memcpy(pointers[rbio->nr_data], pointers[0], sectorsize); 1441 - run_xor(pointers + 1, rbio->nr_data - 1, sectorsize); 1335 + memcpy(pointers[rbio->nr_data], pointers[0], step); 1336 + run_xor(pointers + 1, rbio->nr_data - 1, step); 1442 1337 } 1443 1338 for (stripe = stripe - 1; stripe >= 0; stripe--) 1444 1339 kunmap_local(pointers[stripe]); 1340 + } 1341 + 1342 + /* Generate PQ for one vertical stripe. */ 1343 + static void generate_pq_vertical(struct btrfs_raid_bio *rbio, int sectornr) 1344 + { 1345 + const bool has_qstripe = (rbio->bioc->map_type & BTRFS_BLOCK_GROUP_RAID6); 1346 + 1347 + for (int i = 0; i < rbio->sector_nsteps; i++) 1348 + generate_pq_vertical_step(rbio, sectornr, i); 1349 + 1350 + set_bit(rbio_sector_index(rbio, rbio->nr_data, sectornr), 1351 + rbio->stripe_uptodate_bitmap); 1352 + if (has_qstripe) 1353 + set_bit(rbio_sector_index(rbio, rbio->nr_data + 1, sectornr), 1354 + rbio->stripe_uptodate_bitmap); 1445 1355 } 1446 1356 1447 1357 static int rmw_assemble_write_bios(struct btrfs_raid_bio *rbio, ··· 1478 1372 */ 1479 1373 for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors; 1480 1374 total_sector_nr++) { 1481 - struct sector_ptr *sector; 1375 + phys_addr_t *paddrs; 1482 1376 1483 1377 stripe = total_sector_nr / rbio->stripe_nsectors; 1484 1378 sectornr = total_sector_nr % rbio->stripe_nsectors; ··· 1488 1382 continue; 1489 1383 1490 1384 if (stripe < rbio->nr_data) { 1491 - sector = sector_in_rbio(rbio, stripe, sectornr, 1); 1492 - if (!sector) 1385 + paddrs = sector_paddrs_in_rbio(rbio, stripe, sectornr, 1); 1386 + if (paddrs == NULL) 1493 1387 continue; 1494 1388 } else { 1495 - sector = rbio_stripe_sector(rbio, stripe, sectornr); 1389 + paddrs = rbio_stripe_paddrs(rbio, stripe, sectornr); 1496 1390 } 1497 1391 1498 - ret = rbio_add_io_sector(rbio, bio_list, sector, stripe, 1392 + ret = rbio_add_io_paddrs(rbio, bio_list, paddrs, stripe, 1499 1393 sectornr, REQ_OP_WRITE); 1500 1394 if (ret) 1501 1395 goto error; ··· 1513 1407 1514 1408 for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors; 1515 1409 total_sector_nr++) { 1516 - struct sector_ptr *sector; 1410 + phys_addr_t *paddrs; 1517 1411 1518 1412 stripe = total_sector_nr / rbio->stripe_nsectors; 1519 1413 sectornr = total_sector_nr % rbio->stripe_nsectors; ··· 1538 1432 continue; 1539 1433 1540 1434 if (stripe < rbio->nr_data) { 1541 - sector = sector_in_rbio(rbio, stripe, sectornr, 1); 1542 - if (!sector) 1435 + paddrs = sector_paddrs_in_rbio(rbio, stripe, sectornr, 1); 1436 + if (paddrs == NULL) 1543 1437 continue; 1544 1438 } else { 1545 - sector = rbio_stripe_sector(rbio, stripe, sectornr); 1439 + paddrs = rbio_stripe_paddrs(rbio, stripe, sectornr); 1546 1440 } 1547 1441 1548 - ret = rbio_add_io_sector(rbio, bio_list, sector, 1442 + ret = rbio_add_io_paddrs(rbio, bio_list, paddrs, 1549 1443 rbio->real_stripes, 1550 1444 sectornr, REQ_OP_WRITE); 1551 1445 if (ret) ··· 1593 1487 } 1594 1488 1595 1489 /* 1596 - * For subpage case, we can no longer set page Up-to-date directly for 1597 - * stripe_pages[], thus we need to locate the sector. 1490 + * Return the index inside the rbio->stripe_sectors[] array. 1491 + * 1492 + * Return -1 if not found. 1598 1493 */ 1599 - static struct sector_ptr *find_stripe_sector(struct btrfs_raid_bio *rbio, 1600 - phys_addr_t paddr) 1494 + static int find_stripe_sector_nr(struct btrfs_raid_bio *rbio, phys_addr_t paddr) 1601 1495 { 1602 - int i; 1603 - 1604 - for (i = 0; i < rbio->nr_sectors; i++) { 1605 - struct sector_ptr *sector = &rbio->stripe_sectors[i]; 1606 - 1607 - if (sector->has_paddr && sector->paddr == paddr) 1608 - return sector; 1496 + for (int i = 0; i < rbio->nr_sectors; i++) { 1497 + if (rbio->stripe_paddrs[i * rbio->sector_nsteps] == paddr) 1498 + return i; 1609 1499 } 1610 - return NULL; 1500 + return -1; 1611 1501 } 1612 1502 1613 1503 /* ··· 1612 1510 */ 1613 1511 static void set_bio_pages_uptodate(struct btrfs_raid_bio *rbio, struct bio *bio) 1614 1512 { 1615 - const u32 blocksize = rbio->bioc->fs_info->sectorsize; 1513 + const u32 sectorsize = rbio->bioc->fs_info->sectorsize; 1514 + const u32 step = min(sectorsize, PAGE_SIZE); 1515 + u32 offset = 0; 1616 1516 phys_addr_t paddr; 1617 1517 1618 1518 ASSERT(!bio_flagged(bio, BIO_CLONED)); 1619 1519 1620 - btrfs_bio_for_each_block_all(paddr, bio, blocksize) { 1621 - struct sector_ptr *sector = find_stripe_sector(rbio, paddr); 1520 + btrfs_bio_for_each_block_all(paddr, bio, step) { 1521 + /* Hitting the first step of a sector. */ 1522 + if (IS_ALIGNED(offset, sectorsize)) { 1523 + int sector_nr = find_stripe_sector_nr(rbio, paddr); 1622 1524 1623 - ASSERT(sector); 1624 - if (sector) 1625 - sector->uptodate = 1; 1525 + ASSERT(sector_nr >= 0); 1526 + if (sector_nr >= 0) 1527 + set_bit(sector_nr, rbio->stripe_uptodate_bitmap); 1528 + } 1529 + offset += step; 1626 1530 } 1627 1531 } 1628 1532 ··· 1638 1530 int i; 1639 1531 1640 1532 for (i = 0; i < rbio->nr_sectors; i++) { 1641 - if (rbio->stripe_sectors[i].paddr == bvec_paddr) 1533 + if (rbio->stripe_paddrs[i * rbio->sector_nsteps] == bvec_paddr) 1642 1534 break; 1643 - if (rbio->bio_sectors[i].has_paddr && 1644 - rbio->bio_sectors[i].paddr == bvec_paddr) 1535 + if (rbio->bio_paddrs[i * rbio->sector_nsteps] == bvec_paddr) 1645 1536 break; 1646 1537 } 1647 1538 ASSERT(i < rbio->nr_sectors); ··· 1673 1566 struct bio *bio) 1674 1567 { 1675 1568 struct btrfs_fs_info *fs_info = rbio->bioc->fs_info; 1569 + const u32 step = min(fs_info->sectorsize, PAGE_SIZE); 1570 + const u32 nr_steps = rbio->sector_nsteps; 1676 1571 int total_sector_nr = get_bio_sector_nr(rbio, bio); 1572 + u32 offset = 0; 1573 + phys_addr_t paddrs[BTRFS_MAX_BLOCKSIZE / PAGE_SIZE]; 1677 1574 phys_addr_t paddr; 1678 1575 1679 1576 /* No data csum for the whole stripe, no need to verify. */ ··· 1688 1577 if (total_sector_nr >= rbio->nr_data * rbio->stripe_nsectors) 1689 1578 return; 1690 1579 1691 - btrfs_bio_for_each_block_all(paddr, bio, fs_info->sectorsize) { 1580 + btrfs_bio_for_each_block_all(paddr, bio, step) { 1692 1581 u8 csum_buf[BTRFS_CSUM_SIZE]; 1693 - u8 *expected_csum = rbio->csum_buf + total_sector_nr * fs_info->csum_size; 1694 - int ret; 1582 + u8 *expected_csum; 1583 + 1584 + paddrs[(offset / step) % nr_steps] = paddr; 1585 + offset += step; 1586 + 1587 + /* Not yet covering the full fs block, continue to the next step. */ 1588 + if (!IS_ALIGNED(offset, fs_info->sectorsize)) 1589 + continue; 1695 1590 1696 1591 /* No csum for this sector, skip to the next sector. */ 1697 1592 if (!test_bit(total_sector_nr, rbio->csum_bitmap)) 1698 1593 continue; 1699 1594 1700 - ret = btrfs_check_block_csum(fs_info, paddr, 1701 - csum_buf, expected_csum); 1702 - if (ret < 0) 1595 + expected_csum = rbio->csum_buf + total_sector_nr * fs_info->csum_size; 1596 + btrfs_calculate_block_csum_pages(fs_info, paddrs, csum_buf); 1597 + if (unlikely(memcmp(csum_buf, expected_csum, fs_info->csum_size) != 0)) 1703 1598 set_bit(total_sector_nr, rbio->error_bitmap); 1704 1599 total_sector_nr++; 1705 1600 } ··· 1902 1785 int stripe_nr, int sector_nr) 1903 1786 { 1904 1787 struct btrfs_fs_info *fs_info = rbio->bioc->fs_info; 1905 - struct sector_ptr *sector; 1788 + phys_addr_t *paddrs; 1906 1789 u8 csum_buf[BTRFS_CSUM_SIZE]; 1907 1790 u8 *csum_expected; 1908 - int ret; 1909 1791 1910 1792 if (!rbio->csum_bitmap || !rbio->csum_buf) 1911 1793 return 0; ··· 1917 1801 * bio list if possible. 1918 1802 */ 1919 1803 if (rbio->operation == BTRFS_RBIO_READ_REBUILD) { 1920 - sector = sector_in_rbio(rbio, stripe_nr, sector_nr, 0); 1804 + paddrs = sector_paddrs_in_rbio(rbio, stripe_nr, sector_nr, 0); 1921 1805 } else { 1922 - sector = rbio_stripe_sector(rbio, stripe_nr, sector_nr); 1806 + paddrs = rbio_stripe_paddrs(rbio, stripe_nr, sector_nr); 1923 1807 } 1924 1808 1925 1809 csum_expected = rbio->csum_buf + 1926 1810 (stripe_nr * rbio->stripe_nsectors + sector_nr) * 1927 1811 fs_info->csum_size; 1928 - ret = btrfs_check_block_csum(fs_info, sector->paddr, csum_buf, csum_expected); 1929 - return ret; 1812 + btrfs_calculate_block_csum_pages(fs_info, paddrs, csum_buf); 1813 + if (unlikely(memcmp(csum_buf, csum_expected, fs_info->csum_size) != 0)) 1814 + return -EIO; 1815 + return 0; 1930 1816 } 1931 1817 1932 - /* 1933 - * Recover a vertical stripe specified by @sector_nr. 1934 - * @*pointers are the pre-allocated pointers by the caller, so we don't 1935 - * need to allocate/free the pointers again and again. 1936 - */ 1937 - static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr, 1938 - void **pointers, void **unmap_array) 1818 + static void recover_vertical_step(struct btrfs_raid_bio *rbio, 1819 + unsigned int sector_nr, 1820 + unsigned int step_nr, 1821 + int faila, int failb, 1822 + void **pointers, void **unmap_array) 1939 1823 { 1940 1824 struct btrfs_fs_info *fs_info = rbio->bioc->fs_info; 1941 - struct sector_ptr *sector; 1942 - const u32 sectorsize = fs_info->sectorsize; 1943 - int found_errors; 1944 - int faila; 1945 - int failb; 1825 + const u32 step = min(fs_info->sectorsize, PAGE_SIZE); 1946 1826 int stripe_nr; 1947 - int ret = 0; 1948 1827 1949 - /* 1950 - * Now we just use bitmap to mark the horizontal stripes in 1951 - * which we have data when doing parity scrub. 1952 - */ 1953 - if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB && 1954 - !test_bit(sector_nr, &rbio->dbitmap)) 1955 - return 0; 1956 - 1957 - found_errors = get_rbio_veritical_errors(rbio, sector_nr, &faila, 1958 - &failb); 1959 - /* 1960 - * No errors in the vertical stripe, skip it. Can happen for recovery 1961 - * which only part of a stripe failed csum check. 1962 - */ 1963 - if (!found_errors) 1964 - return 0; 1965 - 1966 - if (unlikely(found_errors > rbio->bioc->max_errors)) 1967 - return -EIO; 1828 + ASSERT(step_nr < rbio->sector_nsteps); 1829 + ASSERT(sector_nr < rbio->stripe_nsectors); 1968 1830 1969 1831 /* 1970 1832 * Setup our array of pointers with sectors from each stripe ··· 1951 1857 * pointer order. 1952 1858 */ 1953 1859 for (stripe_nr = 0; stripe_nr < rbio->real_stripes; stripe_nr++) { 1860 + phys_addr_t paddr; 1861 + 1954 1862 /* 1955 1863 * If we're rebuilding a read, we have to use pages from the 1956 1864 * bio list if possible. 1957 1865 */ 1958 1866 if (rbio->operation == BTRFS_RBIO_READ_REBUILD) { 1959 - sector = sector_in_rbio(rbio, stripe_nr, sector_nr, 0); 1867 + paddr = sector_paddr_in_rbio(rbio, stripe_nr, sector_nr, step_nr, 0); 1960 1868 } else { 1961 - sector = rbio_stripe_sector(rbio, stripe_nr, sector_nr); 1869 + paddr = rbio_stripe_paddr(rbio, stripe_nr, sector_nr, step_nr); 1962 1870 } 1963 - pointers[stripe_nr] = kmap_local_sector(sector); 1871 + pointers[stripe_nr] = kmap_local_paddr(paddr); 1964 1872 unmap_array[stripe_nr] = pointers[stripe_nr]; 1965 1873 } 1966 1874 ··· 2008 1912 } 2009 1913 2010 1914 if (failb == rbio->real_stripes - 2) { 2011 - raid6_datap_recov(rbio->real_stripes, sectorsize, 1915 + raid6_datap_recov(rbio->real_stripes, step, 2012 1916 faila, pointers); 2013 1917 } else { 2014 - raid6_2data_recov(rbio->real_stripes, sectorsize, 1918 + raid6_2data_recov(rbio->real_stripes, step, 2015 1919 faila, failb, pointers); 2016 1920 } 2017 1921 } else { ··· 2021 1925 ASSERT(failb == -1); 2022 1926 pstripe: 2023 1927 /* Copy parity block into failed block to start with */ 2024 - memcpy(pointers[faila], pointers[rbio->nr_data], sectorsize); 1928 + memcpy(pointers[faila], pointers[rbio->nr_data], step); 2025 1929 2026 1930 /* Rearrange the pointer array */ 2027 1931 p = pointers[faila]; ··· 2031 1935 pointers[rbio->nr_data - 1] = p; 2032 1936 2033 1937 /* Xor in the rest */ 2034 - run_xor(pointers, rbio->nr_data - 1, sectorsize); 2035 - 2036 - } 2037 - 2038 - /* 2039 - * No matter if this is a RMW or recovery, we should have all 2040 - * failed sectors repaired in the vertical stripe, thus they are now 2041 - * uptodate. 2042 - * Especially if we determine to cache the rbio, we need to 2043 - * have at least all data sectors uptodate. 2044 - * 2045 - * If possible, also check if the repaired sector matches its data 2046 - * checksum. 2047 - */ 2048 - if (faila >= 0) { 2049 - ret = verify_one_sector(rbio, faila, sector_nr); 2050 - if (ret < 0) 2051 - goto cleanup; 2052 - 2053 - sector = rbio_stripe_sector(rbio, faila, sector_nr); 2054 - sector->uptodate = 1; 2055 - } 2056 - if (failb >= 0) { 2057 - ret = verify_one_sector(rbio, failb, sector_nr); 2058 - if (ret < 0) 2059 - goto cleanup; 2060 - 2061 - sector = rbio_stripe_sector(rbio, failb, sector_nr); 2062 - sector->uptodate = 1; 1938 + run_xor(pointers, rbio->nr_data - 1, step); 2063 1939 } 2064 1940 2065 1941 cleanup: 2066 1942 for (stripe_nr = rbio->real_stripes - 1; stripe_nr >= 0; stripe_nr--) 2067 1943 kunmap_local(unmap_array[stripe_nr]); 1944 + } 1945 + 1946 + /* 1947 + * Recover a vertical stripe specified by @sector_nr. 1948 + * @*pointers are the pre-allocated pointers by the caller, so we don't 1949 + * need to allocate/free the pointers again and again. 1950 + */ 1951 + static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr, 1952 + void **pointers, void **unmap_array) 1953 + { 1954 + int found_errors; 1955 + int faila; 1956 + int failb; 1957 + int ret = 0; 1958 + 1959 + /* 1960 + * Now we just use bitmap to mark the horizontal stripes in 1961 + * which we have data when doing parity scrub. 1962 + */ 1963 + if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB && 1964 + !test_bit(sector_nr, &rbio->dbitmap)) 1965 + return 0; 1966 + 1967 + found_errors = get_rbio_vertical_errors(rbio, sector_nr, &faila, 1968 + &failb); 1969 + /* 1970 + * No errors in the vertical stripe, skip it. Can happen for recovery 1971 + * which only part of a stripe failed csum check. 1972 + */ 1973 + if (!found_errors) 1974 + return 0; 1975 + 1976 + if (unlikely(found_errors > rbio->bioc->max_errors)) 1977 + return -EIO; 1978 + 1979 + for (int i = 0; i < rbio->sector_nsteps; i++) 1980 + recover_vertical_step(rbio, sector_nr, i, faila, failb, 1981 + pointers, unmap_array); 1982 + if (faila >= 0) { 1983 + ret = verify_one_sector(rbio, faila, sector_nr); 1984 + if (ret < 0) 1985 + return ret; 1986 + 1987 + set_bit(rbio_sector_index(rbio, faila, sector_nr), 1988 + rbio->stripe_uptodate_bitmap); 1989 + } 1990 + if (failb >= 0) { 1991 + ret = verify_one_sector(rbio, failb, sector_nr); 1992 + if (ret < 0) 1993 + return ret; 1994 + 1995 + set_bit(rbio_sector_index(rbio, failb, sector_nr), 1996 + rbio->stripe_uptodate_bitmap); 1997 + } 2068 1998 return ret; 2069 1999 } 2070 2000 ··· 2165 2043 total_sector_nr++) { 2166 2044 int stripe = total_sector_nr / rbio->stripe_nsectors; 2167 2045 int sectornr = total_sector_nr % rbio->stripe_nsectors; 2168 - struct sector_ptr *sector; 2046 + phys_addr_t *paddrs; 2169 2047 2170 2048 /* 2171 2049 * Skip the range which has error. It can be a range which is ··· 2182 2060 continue; 2183 2061 } 2184 2062 2185 - sector = rbio_stripe_sector(rbio, stripe, sectornr); 2186 - ret = rbio_add_io_sector(rbio, &bio_list, sector, stripe, 2063 + paddrs = rbio_stripe_paddrs(rbio, stripe, sectornr); 2064 + ret = rbio_add_io_paddrs(rbio, &bio_list, paddrs, stripe, 2187 2065 sectornr, REQ_OP_READ); 2188 2066 if (ret < 0) { 2189 2067 bio_list_put(&bio_list); ··· 2228 2106 int faila; 2229 2107 int failb; 2230 2108 2231 - found_errors = get_rbio_veritical_errors(rbio, sector_nr, 2109 + found_errors = get_rbio_vertical_errors(rbio, sector_nr, 2232 2110 &faila, &failb); 2233 2111 /* This vertical stripe doesn't have errors. */ 2234 2112 if (!found_errors) ··· 2372 2250 */ 2373 2251 for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors; 2374 2252 total_sector_nr++) { 2375 - struct sector_ptr *sector; 2376 2253 int stripe = total_sector_nr / rbio->stripe_nsectors; 2377 2254 int sectornr = total_sector_nr % rbio->stripe_nsectors; 2255 + phys_addr_t *paddrs; 2378 2256 2379 - sector = rbio_stripe_sector(rbio, stripe, sectornr); 2380 - ret = rbio_add_io_sector(rbio, &bio_list, sector, 2381 - stripe, sectornr, REQ_OP_READ); 2257 + paddrs = rbio_stripe_paddrs(rbio, stripe, sectornr); 2258 + ret = rbio_add_io_paddrs(rbio, &bio_list, paddrs, stripe, 2259 + sectornr, REQ_OP_READ); 2382 2260 if (ret) { 2383 2261 bio_list_put(&bio_list); 2384 2262 return ret; ··· 2432 2310 int i; 2433 2311 2434 2312 for (i = 0; i < rbio->nr_data * rbio->stripe_nsectors; i++) { 2435 - struct sector_ptr *sector = &rbio->stripe_sectors[i]; 2313 + phys_addr_t paddr = rbio->stripe_paddrs[i * rbio->sector_nsteps]; 2436 2314 2437 2315 /* 2438 2316 * We have a sector which doesn't have page nor uptodate, 2439 2317 * thus this rbio can not be cached one, as cached one must 2440 2318 * have all its data sectors present and uptodate. 2441 2319 */ 2442 - if (!sector->has_paddr || !sector->uptodate) 2320 + if (paddr == INVALID_PADDR || 2321 + !test_bit(i, rbio->stripe_uptodate_bitmap)) 2443 2322 return true; 2444 2323 } 2445 2324 return false; ··· 2521 2398 for (sectornr = 0; sectornr < rbio->stripe_nsectors; sectornr++) { 2522 2399 int found_errors; 2523 2400 2524 - found_errors = get_rbio_veritical_errors(rbio, sectornr, NULL, NULL); 2401 + found_errors = get_rbio_vertical_errors(rbio, sectornr, NULL, NULL); 2525 2402 if (unlikely(found_errors > rbio->bioc->max_errors)) { 2526 2403 ret = -EIO; 2527 2404 break; ··· 2592 2469 return rbio; 2593 2470 } 2594 2471 2472 + static int alloc_rbio_sector_pages(struct btrfs_raid_bio *rbio, 2473 + int sector_nr) 2474 + { 2475 + const u32 step = min(PAGE_SIZE, rbio->bioc->fs_info->sectorsize); 2476 + const u32 base = sector_nr * rbio->sector_nsteps; 2477 + 2478 + for (int i = base; i < base + rbio->sector_nsteps; i++) { 2479 + const unsigned int page_index = (i * step) >> PAGE_SHIFT; 2480 + struct page *page; 2481 + 2482 + if (rbio->stripe_pages[page_index]) 2483 + continue; 2484 + page = alloc_page(GFP_NOFS); 2485 + if (!page) 2486 + return -ENOMEM; 2487 + rbio->stripe_pages[page_index] = page; 2488 + } 2489 + return 0; 2490 + } 2491 + 2595 2492 /* 2596 2493 * We just scrub the parity that we have correct data on the same horizontal, 2597 2494 * so we needn't allocate all pages for all the stripes. 2598 2495 */ 2599 2496 static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio) 2600 2497 { 2601 - const u32 sectorsize = rbio->bioc->fs_info->sectorsize; 2602 2498 int total_sector_nr; 2603 2499 2604 2500 for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors; 2605 2501 total_sector_nr++) { 2606 - struct page *page; 2607 2502 int sectornr = total_sector_nr % rbio->stripe_nsectors; 2608 - int index = (total_sector_nr * sectorsize) >> PAGE_SHIFT; 2503 + int ret; 2609 2504 2610 2505 if (!test_bit(sectornr, &rbio->dbitmap)) 2611 2506 continue; 2612 - if (rbio->stripe_pages[index]) 2613 - continue; 2614 - page = alloc_page(GFP_NOFS); 2615 - if (!page) 2616 - return -ENOMEM; 2617 - rbio->stripe_pages[index] = page; 2507 + ret = alloc_rbio_sector_pages(rbio, total_sector_nr); 2508 + if (ret < 0) 2509 + return ret; 2618 2510 } 2619 2511 index_stripe_sectors(rbio); 2620 2512 return 0; 2621 2513 } 2622 2514 2515 + /* Return true if the content of the step matches the caclulated one. */ 2516 + static bool verify_one_parity_step(struct btrfs_raid_bio *rbio, 2517 + void *pointers[], unsigned int sector_nr, 2518 + unsigned int step_nr) 2519 + { 2520 + const unsigned int nr_data = rbio->nr_data; 2521 + const bool has_qstripe = (rbio->real_stripes - rbio->nr_data == 2); 2522 + const u32 step = min(rbio->bioc->fs_info->sectorsize, PAGE_SIZE); 2523 + void *parity; 2524 + bool ret = false; 2525 + 2526 + ASSERT(step_nr < rbio->sector_nsteps); 2527 + 2528 + /* First collect one page from each data stripe. */ 2529 + for (int stripe = 0; stripe < nr_data; stripe++) 2530 + pointers[stripe] = kmap_local_paddr( 2531 + sector_paddr_in_rbio(rbio, stripe, sector_nr, 2532 + step_nr, 0)); 2533 + 2534 + if (has_qstripe) { 2535 + assert_rbio(rbio); 2536 + /* RAID6, call the library function to fill in our P/Q. */ 2537 + raid6_call.gen_syndrome(rbio->real_stripes, step, pointers); 2538 + } else { 2539 + /* RAID5. */ 2540 + memcpy(pointers[nr_data], pointers[0], step); 2541 + run_xor(pointers + 1, nr_data - 1, step); 2542 + } 2543 + 2544 + /* Check scrubbing parity and repair it. */ 2545 + parity = kmap_local_paddr(rbio_stripe_paddr(rbio, rbio->scrubp, sector_nr, step_nr)); 2546 + if (memcmp(parity, pointers[rbio->scrubp], step) != 0) 2547 + memcpy(parity, pointers[rbio->scrubp], step); 2548 + else 2549 + ret = true; 2550 + kunmap_local(parity); 2551 + 2552 + for (int stripe = nr_data - 1; stripe >= 0; stripe--) 2553 + kunmap_local(pointers[stripe]); 2554 + return ret; 2555 + } 2556 + 2557 + /* 2558 + * The @pointers array should have the P/Q parity already mapped. 2559 + */ 2560 + static void verify_one_parity_sector(struct btrfs_raid_bio *rbio, 2561 + void *pointers[], unsigned int sector_nr) 2562 + { 2563 + bool found_error = false; 2564 + 2565 + for (int step_nr = 0; step_nr < rbio->sector_nsteps; step_nr++) { 2566 + bool match; 2567 + 2568 + match = verify_one_parity_step(rbio, pointers, sector_nr, step_nr); 2569 + if (!match) 2570 + found_error = true; 2571 + } 2572 + if (!found_error) 2573 + bitmap_clear(&rbio->dbitmap, sector_nr, 1); 2574 + } 2575 + 2623 2576 static int finish_parity_scrub(struct btrfs_raid_bio *rbio) 2624 2577 { 2625 2578 struct btrfs_io_context *bioc = rbio->bioc; 2626 - const u32 sectorsize = bioc->fs_info->sectorsize; 2627 2579 void **pointers = rbio->finish_pointers; 2628 2580 unsigned long *pbitmap = &rbio->finish_pbitmap; 2629 2581 int nr_data = rbio->nr_data; 2630 - int stripe; 2631 2582 int sectornr; 2632 2583 bool has_qstripe; 2633 2584 struct page *page; 2634 - struct sector_ptr p_sector = { 0 }; 2635 - struct sector_ptr q_sector = { 0 }; 2585 + phys_addr_t p_paddr = INVALID_PADDR; 2586 + phys_addr_t q_paddr = INVALID_PADDR; 2636 2587 struct bio_list bio_list; 2637 2588 int is_replace = 0; 2638 2589 int ret; ··· 2739 2542 page = alloc_page(GFP_NOFS); 2740 2543 if (!page) 2741 2544 return -ENOMEM; 2742 - p_sector.has_paddr = true; 2743 - p_sector.paddr = page_to_phys(page); 2744 - p_sector.uptodate = 1; 2545 + p_paddr = page_to_phys(page); 2745 2546 page = NULL; 2547 + pointers[nr_data] = kmap_local_paddr(p_paddr); 2746 2548 2747 2549 if (has_qstripe) { 2748 2550 /* RAID6, allocate and map temp space for the Q stripe */ 2749 2551 page = alloc_page(GFP_NOFS); 2750 2552 if (!page) { 2751 - __free_page(phys_to_page(p_sector.paddr)); 2752 - p_sector.has_paddr = false; 2553 + __free_page(phys_to_page(p_paddr)); 2554 + p_paddr = INVALID_PADDR; 2753 2555 return -ENOMEM; 2754 2556 } 2755 - q_sector.has_paddr = true; 2756 - q_sector.paddr = page_to_phys(page); 2757 - q_sector.uptodate = 1; 2557 + q_paddr = page_to_phys(page); 2758 2558 page = NULL; 2759 - pointers[rbio->real_stripes - 1] = kmap_local_sector(&q_sector); 2559 + pointers[rbio->real_stripes - 1] = kmap_local_paddr(q_paddr); 2760 2560 } 2761 2561 2762 2562 bitmap_clear(rbio->error_bitmap, 0, rbio->nr_sectors); 2763 2563 2764 2564 /* Map the parity stripe just once */ 2765 - pointers[nr_data] = kmap_local_sector(&p_sector); 2766 2565 2767 - for_each_set_bit(sectornr, &rbio->dbitmap, rbio->stripe_nsectors) { 2768 - struct sector_ptr *sector; 2769 - void *parity; 2770 - 2771 - /* first collect one page from each data stripe */ 2772 - for (stripe = 0; stripe < nr_data; stripe++) { 2773 - sector = sector_in_rbio(rbio, stripe, sectornr, 0); 2774 - pointers[stripe] = kmap_local_sector(sector); 2775 - } 2776 - 2777 - if (has_qstripe) { 2778 - assert_rbio(rbio); 2779 - /* RAID6, call the library function to fill in our P/Q */ 2780 - raid6_call.gen_syndrome(rbio->real_stripes, sectorsize, 2781 - pointers); 2782 - } else { 2783 - /* raid5 */ 2784 - memcpy(pointers[nr_data], pointers[0], sectorsize); 2785 - run_xor(pointers + 1, nr_data - 1, sectorsize); 2786 - } 2787 - 2788 - /* Check scrubbing parity and repair it */ 2789 - sector = rbio_stripe_sector(rbio, rbio->scrubp, sectornr); 2790 - parity = kmap_local_sector(sector); 2791 - if (memcmp(parity, pointers[rbio->scrubp], sectorsize) != 0) 2792 - memcpy(parity, pointers[rbio->scrubp], sectorsize); 2793 - else 2794 - /* Parity is right, needn't writeback */ 2795 - bitmap_clear(&rbio->dbitmap, sectornr, 1); 2796 - kunmap_local(parity); 2797 - 2798 - for (stripe = nr_data - 1; stripe >= 0; stripe--) 2799 - kunmap_local(pointers[stripe]); 2800 - } 2566 + for_each_set_bit(sectornr, &rbio->dbitmap, rbio->stripe_nsectors) 2567 + verify_one_parity_sector(rbio, pointers, sectornr); 2801 2568 2802 2569 kunmap_local(pointers[nr_data]); 2803 - __free_page(phys_to_page(p_sector.paddr)); 2804 - p_sector.has_paddr = false; 2805 - if (q_sector.has_paddr) { 2806 - __free_page(phys_to_page(q_sector.paddr)); 2807 - q_sector.has_paddr = false; 2570 + __free_page(phys_to_page(p_paddr)); 2571 + p_paddr = INVALID_PADDR; 2572 + if (q_paddr != INVALID_PADDR) { 2573 + __free_page(phys_to_page(q_paddr)); 2574 + q_paddr = INVALID_PADDR; 2808 2575 } 2809 2576 2810 2577 /* ··· 2777 2616 * everything else. 2778 2617 */ 2779 2618 for_each_set_bit(sectornr, &rbio->dbitmap, rbio->stripe_nsectors) { 2780 - struct sector_ptr *sector; 2619 + phys_addr_t *paddrs; 2781 2620 2782 - sector = rbio_stripe_sector(rbio, rbio->scrubp, sectornr); 2783 - ret = rbio_add_io_sector(rbio, &bio_list, sector, rbio->scrubp, 2621 + paddrs = rbio_stripe_paddrs(rbio, rbio->scrubp, sectornr); 2622 + ret = rbio_add_io_paddrs(rbio, &bio_list, paddrs, rbio->scrubp, 2784 2623 sectornr, REQ_OP_WRITE); 2785 2624 if (ret) 2786 2625 goto cleanup; ··· 2795 2634 */ 2796 2635 ASSERT_RBIO(rbio->bioc->replace_stripe_src >= 0, rbio); 2797 2636 for_each_set_bit(sectornr, pbitmap, rbio->stripe_nsectors) { 2798 - struct sector_ptr *sector; 2637 + phys_addr_t *paddrs; 2799 2638 2800 - sector = rbio_stripe_sector(rbio, rbio->scrubp, sectornr); 2801 - ret = rbio_add_io_sector(rbio, &bio_list, sector, 2802 - rbio->real_stripes, 2639 + paddrs = rbio_stripe_paddrs(rbio, rbio->scrubp, sectornr); 2640 + ret = rbio_add_io_paddrs(rbio, &bio_list, paddrs, rbio->real_stripes, 2803 2641 sectornr, REQ_OP_WRITE); 2804 2642 if (ret) 2805 2643 goto cleanup; ··· 2846 2686 int failb; 2847 2687 int found_errors; 2848 2688 2849 - found_errors = get_rbio_veritical_errors(rbio, sector_nr, 2689 + found_errors = get_rbio_vertical_errors(rbio, sector_nr, 2850 2690 &faila, &failb); 2851 2691 if (unlikely(found_errors > rbio->bioc->max_errors)) { 2852 2692 ret = -EIO; ··· 2915 2755 total_sector_nr++) { 2916 2756 int sectornr = total_sector_nr % rbio->stripe_nsectors; 2917 2757 int stripe = total_sector_nr / rbio->stripe_nsectors; 2918 - struct sector_ptr *sector; 2758 + phys_addr_t *paddrs; 2919 2759 2920 2760 /* No data in the vertical stripe, no need to read. */ 2921 2761 if (!test_bit(sectornr, &rbio->dbitmap)) ··· 2923 2763 2924 2764 /* 2925 2765 * We want to find all the sectors missing from the rbio and 2926 - * read them from the disk. If sector_in_rbio() finds a sector 2766 + * read them from the disk. If sector_paddr_in_rbio() finds a sector 2927 2767 * in the bio list we don't need to read it off the stripe. 2928 2768 */ 2929 - sector = sector_in_rbio(rbio, stripe, sectornr, 1); 2930 - if (sector) 2769 + paddrs = sector_paddrs_in_rbio(rbio, stripe, sectornr, 1); 2770 + if (paddrs == NULL) 2931 2771 continue; 2932 2772 2933 - sector = rbio_stripe_sector(rbio, stripe, sectornr); 2773 + paddrs = rbio_stripe_paddrs(rbio, stripe, sectornr); 2934 2774 /* 2935 2775 * The bio cache may have handed us an uptodate sector. If so, 2936 2776 * use it. 2937 2777 */ 2938 - if (sector->uptodate) 2778 + if (test_bit(rbio_sector_index(rbio, stripe, sectornr), 2779 + rbio->stripe_uptodate_bitmap)) 2939 2780 continue; 2940 2781 2941 - ret = rbio_add_io_sector(rbio, &bio_list, sector, stripe, 2782 + ret = rbio_add_io_paddrs(rbio, &bio_list, paddrs, stripe, 2942 2783 sectornr, REQ_OP_READ); 2943 2784 if (ret) { 2944 2785 bio_list_put(&bio_list); ··· 2980 2819 for (sector_nr = 0; sector_nr < rbio->stripe_nsectors; sector_nr++) { 2981 2820 int found_errors; 2982 2821 2983 - found_errors = get_rbio_veritical_errors(rbio, sector_nr, NULL, NULL); 2822 + found_errors = get_rbio_vertical_errors(rbio, sector_nr, NULL, NULL); 2984 2823 if (unlikely(found_errors > rbio->bioc->max_errors)) { 2985 2824 ret = -EIO; 2986 2825 break; ··· 3018 2857 unsigned int foffset = 0; 3019 2858 int ret; 3020 2859 3021 - /* We shouldn't hit RAID56 for bs > ps cases for now. */ 3022 - ASSERT(fs_info->sectorsize <= PAGE_SIZE); 3023 - 3024 2860 /* 3025 2861 * If we hit ENOMEM temporarily, but later at 3026 2862 * raid56_parity_submit_scrub_rbio() time it succeeded, we just do ··· 3051 2893 foffset = 0; 3052 2894 } 3053 2895 } 3054 - for (unsigned int sector_nr = offset_in_full_stripe >> fs_info->sectorsize_bits; 3055 - sector_nr < (offset_in_full_stripe + BTRFS_STRIPE_LEN) >> fs_info->sectorsize_bits; 3056 - sector_nr++) 3057 - rbio->stripe_sectors[sector_nr].uptodate = true; 2896 + bitmap_set(rbio->stripe_uptodate_bitmap, 2897 + offset_in_full_stripe >> fs_info->sectorsize_bits, 2898 + BTRFS_STRIPE_LEN >> fs_info->sectorsize_bits); 3058 2899 }

+92 -11

fs/btrfs/raid56.h

··· 16 16 #include "volumes.h" 17 17 18 18 struct page; 19 - struct sector_ptr; 20 19 struct btrfs_fs_info; 21 20 22 21 enum btrfs_rbio_ops { ··· 24 25 BTRFS_RBIO_PARITY_SCRUB, 25 26 }; 26 27 28 + /* 29 + * Overview of btrfs_raid_bio. 30 + * 31 + * One btrfs_raid_bio represents a full stripe of RAID56, including both data 32 + * and P/Q stripes. For now, each data and P/Q stripe is of a fixed length (64K). 33 + * 34 + * One btrfs_raid_bio can have one or more bios from higher layer, covering 35 + * part or all of the data stripes. 36 + * 37 + * [PAGES FROM HIGHER LAYER BIOS] 38 + * Higher layer bios are in the btrfs_raid_bio::bio_list. 39 + * 40 + * Pages from the bio_list are represented like the following: 41 + * 42 + * bio_list: |<- Bio 1 ->| |<- Bio 2 ->| ... 43 + * bio_paddrs: [0] [1] [2] [3] [4] [5] ... 44 + * 45 + * If there is a bio covering a sector (one btrfs fs block), the corresponding 46 + * pointer in btrfs_raid_bio::bio_paddrs[] will point to the physical address 47 + * (with the offset inside the page) of the corresponding bio. 48 + * 49 + * If there is no bio covering a sector, then btrfs_raid_bio::bio_paddrs[i] will 50 + * be INVALID_PADDR. 51 + * 52 + * The length of each entry in bio_paddrs[] is a step (aka, min(sectorsize, PAGE_SIZE)). 53 + * 54 + * [PAGES FOR INTERNAL USAGES] 55 + * Pages not covered by any bio or belonging to P/Q stripes are stored in 56 + * btrfs_raid_bio::stripe_pages[] and stripe_paddrs[], like the following: 57 + * 58 + * stripe_pages: |<- Page 0 ->|<- Page 1 ->| ... 59 + * stripe_paddrs: [0] [1] [2] [3] [4] ... 60 + * 61 + * stripe_pages[] array stores all the pages covering the full stripe, including 62 + * data and P/Q pages. 63 + * stripe_pages[0] is the first page of the first data stripe. 64 + * stripe_pages[BTRFS_STRIPE_LEN / PAGE_SIZE] is the first page of the second 65 + * data stripe. 66 + * 67 + * Some pointers inside stripe_pages[] can be NULL, e.g. for a full stripe write 68 + * (the bio covers all data stripes) there is no need to allocate pages for 69 + * data stripes (can grab from bio_paddrs[]). 70 + * 71 + * If the corresponding page of stripe_paddrs[i] is not allocated, the value of 72 + * stripe_paddrs[i] will be INVALID_PADDR. 73 + * 74 + * The length of each entry in stripe_paddrs[] is a step. 75 + * 76 + * [LOCATING A SECTOR] 77 + * To locate a sector for IO, we need the following info: 78 + * 79 + * - stripe_nr 80 + * Starts from 0 (representing the first data stripe), ends at 81 + * @nr_data (RAID5, P stripe) or @nr_data + 1 (RAID6, Q stripe). 82 + * 83 + * - sector_nr 84 + * Starts from 0 (representing the first sector of the stripe), ends 85 + * at BTRFS_STRIPE_LEN / sectorsize - 1. 86 + * 87 + * - step_nr 88 + * A step is min(sector_size, PAGE_SIZE). 89 + * 90 + * Starts from 0 (representing the first step of the sector), ends 91 + * at @sector_nsteps - 1. 92 + * 93 + * For most call sites they do not need to bother this parameter. 94 + * It is for bs > ps support and only for vertical stripe related works. 95 + * (e.g. RMW/recover) 96 + * 97 + * - from which array 98 + * Whether grabbing from stripe_paddrs[] (aka, internal pages) or from the 99 + * bio_paddrs[] (aka, from the higher layer bios). 100 + * 101 + * For IO, a physical address is returned, so that we can extract the page and 102 + * the offset inside the page for IO. 103 + * A special value INVALID_PADDR represents when the physical address is invalid, 104 + * normally meaning there is no page allocated for the specified sector. 105 + */ 27 106 struct btrfs_raid_bio { 28 107 struct btrfs_io_context *bioc; 29 108 ··· 159 82 /* How many sectors there are for each stripe */ 160 83 u8 stripe_nsectors; 161 84 85 + /* 86 + * How many steps there are for one sector. 87 + * 88 + * For bs > ps cases, it's sectorsize / PAGE_SIZE. 89 + * For bs <= ps cases, it's always 1. 90 + */ 91 + u8 sector_nsteps; 92 + 162 93 /* Stripe number that we're scrubbing */ 163 94 u8 scrubp; 164 95 ··· 201 116 struct page **stripe_pages; 202 117 203 118 /* Pointers to the sectors in the bio_list, for faster lookup */ 204 - struct sector_ptr *bio_sectors; 119 + phys_addr_t *bio_paddrs; 205 120 206 - /* 207 - * For subpage support, we need to map each sector to above 208 - * stripe_pages. 209 - */ 210 - struct sector_ptr *stripe_sectors; 121 + /* Pointers to the sectors in the stripe_pages[]. */ 122 + phys_addr_t *stripe_paddrs; 123 + 124 + /* Each set bit means the corresponding sector in stripe_sectors[] is uptodate. */ 125 + unsigned long *stripe_uptodate_bitmap; 211 126 212 127 /* Allocated with real_stripes-many pointers for finish_*() calls */ 213 128 void **finish_pointers; ··· 216 131 * The bitmap recording where IO errors happened. 217 132 * Each bit is corresponding to one sector in either bio_sectors[] or 218 133 * stripe_sectors[] array. 219 - * 220 - * The reason we don't use another bit in sector_ptr is, we have two 221 - * arrays of sectors, and a lot of IO can use sectors in both arrays. 222 - * Thus making it much harder to iterate. 223 134 */ 224 135 unsigned long *error_bitmap; 225 136

+10 -5

fs/btrfs/reflink.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 3 3 #include <linux/blkdev.h> 4 + #include <linux/fscrypt.h> 4 5 #include <linux/iversion.h> 5 6 #include "ctree.h" 6 7 #include "fs.h" ··· 344 343 BTRFS_PATH_AUTO_FREE(path); 345 344 struct extent_buffer *leaf; 346 345 struct btrfs_trans_handle *trans; 347 - char *buf = NULL; 346 + char AUTO_KVFREE(buf); 348 347 struct btrfs_key key; 349 348 u32 nritems; 350 349 int slot; ··· 359 358 return ret; 360 359 361 360 path = btrfs_alloc_path(); 362 - if (!path) { 363 - kvfree(buf); 361 + if (!path) 364 362 return ret; 365 - } 366 363 367 364 path->reada = READA_FORWARD; 368 365 /* Clone data */ ··· 610 611 } 611 612 612 613 out: 613 - kvfree(buf); 614 614 clear_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &BTRFS_I(inode)->runtime_flags); 615 615 616 616 return ret; ··· 790 792 ASSERT(inode_in->vfs_inode.i_sb == inode_out->vfs_inode.i_sb); 791 793 } 792 794 795 + /* Can only reflink encrypted files if both files are encrypted. */ 796 + if (IS_ENCRYPTED(&inode_in->vfs_inode) != IS_ENCRYPTED(&inode_out->vfs_inode)) 797 + return -EINVAL; 798 + 793 799 /* Don't make the dst file partly checksummed */ 794 800 if ((inode_in->flags & BTRFS_INODE_NODATASUM) != 795 801 (inode_out->flags & BTRFS_INODE_NODATASUM)) { ··· 869 867 struct btrfs_inode *dst_inode = BTRFS_I(file_inode(dst_file)); 870 868 bool same_inode = dst_inode == src_inode; 871 869 int ret; 870 + 871 + if (unlikely(btrfs_is_shutdown(inode_to_fs_info(file_inode(src_file))))) 872 + return -EIO; 872 873 873 874 if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) 874 875 return -EINVAL;

+34 -51

fs/btrfs/relocation.c

··· 511 511 { 512 512 struct btrfs_fs_info *fs_info = root->fs_info; 513 513 struct rb_node *rb_node; 514 - struct mapping_node *node = NULL; 514 + struct mapping_node AUTO_KFREE(node); 515 515 struct reloc_control *rc = fs_info->reloc_ctl; 516 516 bool put_ref = false; 517 517 ··· 544 544 spin_unlock(&fs_info->trans_lock); 545 545 if (put_ref) 546 546 btrfs_put_root(root); 547 - kfree(node); 548 547 } 549 548 550 549 /* ··· 585 586 struct btrfs_fs_info *fs_info = root->fs_info; 586 587 struct btrfs_root *reloc_root; 587 588 struct extent_buffer *eb; 588 - struct btrfs_root_item *root_item; 589 + struct btrfs_root_item AUTO_KFREE(root_item); 589 590 struct btrfs_key root_key; 590 591 int ret = 0; 591 - bool must_abort = false; 592 592 593 593 root_item = kmalloc(sizeof(*root_item), GFP_NOFS); 594 594 if (!root_item) ··· 613 615 614 616 btrfs_disk_key_to_cpu(&cpu_key, &root->root_item.drop_progress); 615 617 btrfs_err(fs_info, 616 - "cannot relocate partially dropped subvolume %llu, drop progress key (%llu %u %llu)", 617 - objectid, cpu_key.objectid, cpu_key.type, cpu_key.offset); 618 - ret = -EUCLEAN; 619 - goto fail; 618 + "cannot relocate partially dropped subvolume %llu, drop progress key " BTRFS_KEY_FMT, 619 + objectid, BTRFS_KEY_FMT_VALUE(&cpu_key)); 620 + return ERR_PTR(-EUCLEAN); 620 621 } 621 622 622 623 /* called by btrfs_init_reloc_root */ 623 624 ret = btrfs_copy_root(trans, root, root->commit_root, &eb, 624 625 BTRFS_TREE_RELOC_OBJECTID); 625 626 if (ret) 626 - goto fail; 627 + return ERR_PTR(ret); 627 628 628 629 /* 629 630 * Set the last_snapshot field to the generation of the commit ··· 645 648 ret = btrfs_copy_root(trans, root, root->node, &eb, 646 649 BTRFS_TREE_RELOC_OBJECTID); 647 650 if (ret) 648 - goto fail; 651 + return ERR_PTR(ret); 649 652 } 650 653 651 654 /* 652 655 * We have changed references at this point, we must abort the 653 - * transaction if anything fails. 656 + * transaction if anything fails (i.e. 'goto abort'). 654 657 */ 655 - must_abort = true; 656 658 657 659 memcpy(root_item, &root->root_item, sizeof(*root_item)); 658 660 btrfs_set_root_bytenr(root_item, eb->start); ··· 671 675 ret = btrfs_insert_root(trans, fs_info->tree_root, 672 676 &root_key, root_item); 673 677 if (ret) 674 - goto fail; 675 - 676 - kfree(root_item); 678 + goto abort; 677 679 678 680 reloc_root = btrfs_read_tree_root(fs_info->tree_root, &root_key); 679 681 if (IS_ERR(reloc_root)) { ··· 681 687 set_bit(BTRFS_ROOT_SHAREABLE, &reloc_root->state); 682 688 btrfs_set_root_last_trans(reloc_root, trans->transid); 683 689 return reloc_root; 684 - fail: 685 - kfree(root_item); 690 + 686 691 abort: 687 - if (must_abort) 688 - btrfs_abort_transaction(trans, ret); 692 + btrfs_abort_transaction(trans, ret); 689 693 return ERR_PTR(ret); 690 694 } 691 695 ··· 2939 2947 const struct file_extent_cluster *cluster = &rc->cluster; 2940 2948 u64 offset = BTRFS_I(inode)->reloc_block_group_start; 2941 2949 u64 cur_file_offset = cluster->start - offset; 2942 - struct file_ra_state *ra; 2950 + struct file_ra_state AUTO_KFREE(ra); 2943 2951 int cluster_nr = 0; 2944 2952 int ret = 0; 2945 2953 ··· 2952 2960 2953 2961 ret = prealloc_file_extent_cluster(rc); 2954 2962 if (ret) 2955 - goto out; 2963 + return ret; 2956 2964 2957 2965 file_ra_state_init(ra, inode->i_mapping); 2958 2966 2959 2967 ret = setup_relocation_extent_mapping(rc); 2960 2968 if (ret) 2961 - goto out; 2969 + return ret; 2962 2970 2963 2971 while (cur_file_offset < cluster->end - offset) { 2964 2972 ret = relocate_one_folio(rc, ra, &cluster_nr, &cur_file_offset); ··· 2967 2975 } 2968 2976 if (ret == 0) 2969 2977 WARN_ON(cluster_nr != cluster->nr); 2970 - out: 2971 - kfree(ra); 2972 2978 return ret; 2973 2979 } 2974 2980 ··· 3165 3175 key.offset = blocksize; 3166 3176 } 3167 3177 3168 - path->search_commit_root = 1; 3169 - path->skip_locking = 1; 3178 + path->search_commit_root = true; 3179 + path->skip_locking = true; 3170 3180 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0); 3171 3181 if (ret < 0) 3172 3182 return ret; ··· 3358 3368 key.type = BTRFS_EXTENT_ITEM_KEY; 3359 3369 key.offset = 0; 3360 3370 3361 - path->search_commit_root = 1; 3362 - path->skip_locking = 1; 3371 + path->search_commit_root = true; 3372 + path->skip_locking = true; 3363 3373 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 3364 3374 0, 0); 3365 3375 if (ret < 0) ··· 3872 3882 struct inode *inode; 3873 3883 struct btrfs_path *path; 3874 3884 int ret; 3875 - int rw = 0; 3876 - int err = 0; 3885 + bool bg_is_ro = false; 3877 3886 3878 3887 /* 3879 3888 * This only gets set if we had a half-deleted snapshot on mount. We ··· 3914 3925 } 3915 3926 3916 3927 ret = reloc_chunk_start(fs_info); 3917 - if (ret < 0) { 3918 - err = ret; 3928 + if (ret < 0) 3919 3929 goto out_put_bg; 3920 - } 3921 3930 3922 3931 rc->extent_root = extent_root; 3923 3932 rc->block_group = bg; 3924 3933 3925 3934 ret = btrfs_inc_block_group_ro(rc->block_group, true); 3926 - if (ret) { 3927 - err = ret; 3935 + if (ret) 3928 3936 goto out; 3929 - } 3930 - rw = 1; 3937 + bg_is_ro = true; 3931 3938 3932 3939 path = btrfs_alloc_path(); 3933 3940 if (!path) { 3934 - err = -ENOMEM; 3941 + ret = -ENOMEM; 3935 3942 goto out; 3936 3943 } 3937 3944 ··· 3939 3954 else 3940 3955 ret = PTR_ERR(inode); 3941 3956 3942 - if (ret && ret != -ENOENT) { 3943 - err = ret; 3957 + if (ret && ret != -ENOENT) 3944 3958 goto out; 3945 - } 3946 3959 3947 3960 rc->data_inode = create_reloc_inode(rc->block_group); 3948 3961 if (IS_ERR(rc->data_inode)) { 3949 - err = PTR_ERR(rc->data_inode); 3962 + ret = PTR_ERR(rc->data_inode); 3950 3963 rc->data_inode = NULL; 3951 3964 goto out; 3952 3965 } ··· 3965 3982 mutex_lock(&fs_info->cleaner_mutex); 3966 3983 ret = relocate_block_group(rc); 3967 3984 mutex_unlock(&fs_info->cleaner_mutex); 3968 - if (ret < 0) 3969 - err = ret; 3970 3985 3971 3986 finishes_stage = rc->stage; 3972 3987 /* ··· 3977 3996 * out of the loop if we hit an error. 3978 3997 */ 3979 3998 if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) { 3980 - ret = btrfs_wait_ordered_range(BTRFS_I(rc->data_inode), 0, 3981 - (u64)-1); 3982 - if (ret) 3983 - err = ret; 3999 + int wb_ret; 4000 + 4001 + wb_ret = btrfs_wait_ordered_range(BTRFS_I(rc->data_inode), 0, 4002 + (u64)-1); 4003 + if (wb_ret && ret == 0) 4004 + ret = wb_ret; 3984 4005 invalidate_mapping_pages(rc->data_inode->i_mapping, 3985 4006 0, -1); 3986 4007 rc->stage = UPDATE_DATA_PTRS; 3987 4008 } 3988 4009 3989 - if (err < 0) 4010 + if (ret < 0) 3990 4011 goto out; 3991 4012 3992 4013 if (rc->extents_found == 0) ··· 4004 4021 WARN_ON(rc->block_group->reserved > 0); 4005 4022 WARN_ON(rc->block_group->used > 0); 4006 4023 out: 4007 - if (err && rw) 4024 + if (ret && bg_is_ro) 4008 4025 btrfs_dec_block_group_ro(rc->block_group); 4009 4026 iput(rc->data_inode); 4010 4027 reloc_chunk_end(fs_info); 4011 4028 out_put_bg: 4012 4029 btrfs_put_block_group(bg); 4013 4030 free_reloc_control(rc); 4014 - return err; 4031 + return ret; 4015 4032 } 4016 4033 4017 4034 static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)

+2 -2

fs/btrfs/root-tree.c

··· 147 147 148 148 if (unlikely(ret > 0)) { 149 149 btrfs_crit(fs_info, 150 - "unable to find root key (%llu %u %llu) in tree %llu", 151 - key->objectid, key->type, key->offset, btrfs_root_id(root)); 150 + "unable to find root key " BTRFS_KEY_FMT " in tree %llu", 151 + BTRFS_KEY_FMT_VALUE(key), btrfs_root_id(root)); 152 152 ret = -EUCLEAN; 153 153 btrfs_abort_transaction(trans, ret); 154 154 return ret;

+170 -100

fs/btrfs/scrub.c

··· 463 463 refcount_set(&sctx->refs, 1); 464 464 sctx->is_dev_replace = is_dev_replace; 465 465 sctx->fs_info = fs_info; 466 - sctx->extent_path.search_commit_root = 1; 467 - sctx->extent_path.skip_locking = 1; 468 - sctx->csum_path.search_commit_root = 1; 469 - sctx->csum_path.skip_locking = 1; 466 + sctx->extent_path.search_commit_root = true; 467 + sctx->extent_path.skip_locking = true; 468 + sctx->csum_path.search_commit_root = true; 469 + sctx->csum_path.skip_locking = true; 470 470 for (i = 0; i < SCRUB_TOTAL_STRIPES; i++) { 471 471 int ret; 472 472 ··· 505 505 struct btrfs_inode_item *inode_item; 506 506 struct scrub_warning *swarn = warn_ctx; 507 507 struct btrfs_fs_info *fs_info = swarn->dev->fs_info; 508 - struct inode_fs_paths *ipath = NULL; 508 + struct inode_fs_paths *ipath __free(inode_fs_paths) = NULL; 509 509 struct btrfs_root *local_root; 510 510 struct btrfs_key key; 511 511 ··· 569 569 (char *)(unsigned long)ipath->fspath->val[i]); 570 570 571 571 btrfs_put_root(local_root); 572 - free_ipath(ipath); 573 572 return 0; 574 573 575 574 err: ··· 579 580 swarn->physical, 580 581 root, inum, offset, ret); 581 582 582 - free_ipath(ipath); 583 583 return 0; 584 584 } 585 585 ··· 775 777 scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree); 776 778 scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree); 777 779 btrfs_warn_rl(fs_info, 778 - "scrub: tree block %llu mirror %u has bad csum, has " CSUM_FMT " want " CSUM_FMT, 780 + "scrub: tree block %llu mirror %u has bad csum, has " BTRFS_CSUM_FMT " want " BTRFS_CSUM_FMT, 779 781 logical, stripe->mirror_num, 780 - CSUM_FMT_VALUE(fs_info->csum_size, on_disk_csum), 781 - CSUM_FMT_VALUE(fs_info->csum_size, calculated_csum)); 782 + BTRFS_CSUM_FMT_VALUE(fs_info->csum_size, on_disk_csum), 783 + BTRFS_CSUM_FMT_VALUE(fs_info->csum_size, calculated_csum)); 782 784 return; 783 785 } 784 786 if (stripe->sectors[sector_nr].generation != ··· 927 929 static void scrub_bio_add_sector(struct btrfs_bio *bbio, struct scrub_stripe *stripe, 928 930 int sector_nr) 929 931 { 932 + struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info; 930 933 void *kaddr = scrub_stripe_get_kaddr(stripe, sector_nr); 931 934 int ret; 932 935 933 - ret = bio_add_page(&bbio->bio, virt_to_page(kaddr), bbio->fs_info->sectorsize, 936 + ret = bio_add_page(&bbio->bio, virt_to_page(kaddr), fs_info->sectorsize, 934 937 offset_in_page(kaddr)); 935 938 /* 936 939 * Caller should ensure the bbio has enough size. ··· 941 942 * to create the minimal amount of bio vectors, for fs block size < page 942 943 * size cases. 943 944 */ 944 - ASSERT(ret == bbio->fs_info->sectorsize); 945 + ASSERT(ret == fs_info->sectorsize); 946 + } 947 + 948 + static struct btrfs_bio *alloc_scrub_bbio(struct btrfs_fs_info *fs_info, 949 + unsigned int nr_vecs, blk_opf_t opf, 950 + u64 logical, 951 + btrfs_bio_end_io_t end_io, void *private) 952 + { 953 + struct btrfs_bio *bbio; 954 + 955 + bbio = btrfs_bio_alloc(nr_vecs, opf, BTRFS_I(fs_info->btree_inode), 956 + logical, end_io, private); 957 + bbio->is_scrub = true; 958 + bbio->bio.bi_iter.bi_sector = logical >> SECTOR_SHIFT; 959 + return bbio; 945 960 } 946 961 947 962 static void scrub_stripe_submit_repair_read(struct scrub_stripe *stripe, ··· 966 953 const unsigned long old_error_bitmap = scrub_bitmap_read_error(stripe); 967 954 int i; 968 955 969 - ASSERT(stripe->mirror_num >= 1); 970 - ASSERT(atomic_read(&stripe->pending_io) == 0); 956 + ASSERT(stripe->mirror_num >= 1, "stripe->mirror_num=%d", stripe->mirror_num); 957 + ASSERT(atomic_read(&stripe->pending_io) == 0, 958 + "atomic_read(&stripe->pending_io)=%d", atomic_read(&stripe->pending_io)); 971 959 972 960 for_each_set_bit(i, &old_error_bitmap, stripe->nr_sectors) { 973 961 /* The current sector cannot be merged, submit the bio. */ ··· 982 968 bbio = NULL; 983 969 } 984 970 985 - if (!bbio) { 986 - bbio = btrfs_bio_alloc(stripe->nr_sectors, REQ_OP_READ, 987 - fs_info, scrub_repair_read_endio, stripe); 988 - bbio->bio.bi_iter.bi_sector = (stripe->logical + 989 - (i << fs_info->sectorsize_bits)) >> SECTOR_SHIFT; 990 - } 971 + if (!bbio) 972 + bbio = alloc_scrub_bbio(fs_info, stripe->nr_sectors, REQ_OP_READ, 973 + stripe->logical + (i << fs_info->sectorsize_bits), 974 + scrub_repair_read_endio, stripe); 991 975 992 976 scrub_bio_add_sector(bbio, stripe, i); 993 977 } ··· 1031 1019 int ret; 1032 1020 1033 1021 /* For scrub, our mirror_num should always start at 1. */ 1034 - ASSERT(stripe->mirror_num >= 1); 1022 + ASSERT(stripe->mirror_num >= 1, "stripe->mirror_num=%d", stripe->mirror_num); 1035 1023 ret = btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS, 1036 1024 stripe->logical, &mapped_len, &bioc, 1037 1025 NULL, NULL); ··· 1171 1159 int mirror; 1172 1160 int i; 1173 1161 1174 - ASSERT(stripe->mirror_num > 0); 1162 + ASSERT(stripe->mirror_num >= 1, "stripe->mirror_num=%d", stripe->mirror_num); 1175 1163 1176 1164 wait_scrub_stripe_io(stripe); 1177 1165 scrub_verify_one_stripe(stripe, scrub_bitmap_read_has_extent(stripe)); ··· 1296 1284 bitmap_set(&stripe->write_error_bitmap, sector_nr, 1297 1285 bio_size >> fs_info->sectorsize_bits); 1298 1286 spin_unlock_irqrestore(&stripe->write_error_lock, flags); 1299 - for (int i = 0; i < (bio_size >> fs_info->sectorsize_bits); i++) 1287 + for (i = 0; i < (bio_size >> fs_info->sectorsize_bits); i++) 1300 1288 btrfs_dev_stat_inc_and_print(stripe->dev, 1301 1289 BTRFS_DEV_STAT_WRITE_ERRS); 1302 1290 } ··· 1364 1352 scrub_submit_write_bio(sctx, stripe, bbio, dev_replace); 1365 1353 bbio = NULL; 1366 1354 } 1367 - if (!bbio) { 1368 - bbio = btrfs_bio_alloc(stripe->nr_sectors, REQ_OP_WRITE, 1369 - fs_info, scrub_write_endio, stripe); 1370 - bbio->bio.bi_iter.bi_sector = (stripe->logical + 1371 - (sector_nr << fs_info->sectorsize_bits)) >> 1372 - SECTOR_SHIFT; 1373 - } 1355 + if (!bbio) 1356 + bbio = alloc_scrub_bbio(fs_info, stripe->nr_sectors, REQ_OP_WRITE, 1357 + stripe->logical + (sector_nr << fs_info->sectorsize_bits), 1358 + scrub_write_endio, stripe); 1374 1359 scrub_bio_add_sector(bbio, stripe, sector_nr); 1375 1360 } 1376 1361 if (bbio) ··· 1487 1478 1488 1479 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1489 1480 ASSERT(key.type == BTRFS_EXTENT_ITEM_KEY || 1490 - key.type == BTRFS_METADATA_ITEM_KEY); 1481 + key.type == BTRFS_METADATA_ITEM_KEY, "key.type=%u", key.type); 1491 1482 if (key.type == BTRFS_METADATA_ITEM_KEY) 1492 1483 len = fs_info->nodesize; 1493 1484 else ··· 1592 1583 1593 1584 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1594 1585 ASSERT(key.type == BTRFS_METADATA_ITEM_KEY || 1595 - key.type == BTRFS_EXTENT_ITEM_KEY); 1586 + key.type == BTRFS_EXTENT_ITEM_KEY, "key.type=%u", key.type); 1596 1587 *extent_start_ret = key.objectid; 1597 1588 if (key.type == BTRFS_METADATA_ITEM_KEY) 1598 1589 *size_ret = path->nodes[0]->fs_info->nodesize; ··· 1690 1681 scrub_stripe_reset_bitmaps(stripe); 1691 1682 1692 1683 /* The range must be inside the bg. */ 1693 - ASSERT(logical_start >= bg->start && logical_end <= bg->start + bg->length); 1684 + ASSERT(logical_start >= bg->start && logical_end <= bg->start + bg->length, 1685 + "bg->start=%llu logical_start=%llu logical_end=%llu end=%llu", 1686 + bg->start, logical_start, logical_end, bg->start + bg->length); 1694 1687 1695 1688 ret = find_first_extent_item(extent_root, extent_path, logical_start, 1696 1689 logical_len); ··· 1860 1849 continue; 1861 1850 } 1862 1851 1863 - bbio = btrfs_bio_alloc(stripe->nr_sectors, REQ_OP_READ, 1864 - fs_info, scrub_read_endio, stripe); 1865 - bbio->bio.bi_iter.bi_sector = logical >> SECTOR_SHIFT; 1852 + bbio = alloc_scrub_bbio(fs_info, stripe->nr_sectors, REQ_OP_READ, 1853 + logical, scrub_read_endio, stripe); 1866 1854 } 1867 1855 1868 1856 scrub_bio_add_sector(bbio, stripe, i); ··· 1898 1888 return; 1899 1889 } 1900 1890 1901 - bbio = btrfs_bio_alloc(BTRFS_STRIPE_LEN >> min_folio_shift, REQ_OP_READ, fs_info, 1902 - scrub_read_endio, stripe); 1903 - 1904 - bbio->bio.bi_iter.bi_sector = stripe->logical >> SECTOR_SHIFT; 1891 + bbio = alloc_scrub_bbio(fs_info, BTRFS_STRIPE_LEN >> min_folio_shift, REQ_OP_READ, 1892 + stripe->logical, scrub_read_endio, stripe); 1905 1893 /* Read the whole range inside the chunk boundary. */ 1906 1894 for (unsigned int cur = 0; cur < nr_sectors; cur++) 1907 1895 scrub_bio_add_sector(bbio, stripe, cur); ··· 2077 2069 return 0; 2078 2070 } 2079 2071 2072 + /* 2073 + * Return 0 if we should not cancel the scrub. 2074 + * Return <0 if we need to cancel the scrub, returned value will 2075 + * indicate the reason: 2076 + * - -ECANCELED - Being explicitly canceled through ioctl. 2077 + * - -EINTR - Being interrupted by signal or fs/process freezing. 2078 + */ 2079 + static int should_cancel_scrub(const struct scrub_ctx *sctx) 2080 + { 2081 + struct btrfs_fs_info *fs_info = sctx->fs_info; 2082 + 2083 + if (atomic_read(&fs_info->scrub_cancel_req) || 2084 + atomic_read(&sctx->cancel_req)) 2085 + return -ECANCELED; 2086 + 2087 + /* 2088 + * The user (e.g. fsfreeze command) or power management (PM) 2089 + * suspend/hibernate can freeze the fs. And PM suspend/hibernate will 2090 + * also freeze all user processes. 2091 + * 2092 + * A user process can only be frozen when it is in user space, thus we 2093 + * have to cancel the run so that the process can return to the user 2094 + * space. 2095 + * 2096 + * Furthermore we have to check both filesystem and process freezing, 2097 + * as PM can be configured to freeze the filesystems before processes. 2098 + * 2099 + * If we only check fs freezing, then suspend without fs freezing 2100 + * will timeout, as the process is still in kernel space. 2101 + * 2102 + * If we only check process freezing, then suspend with fs freezing 2103 + * will timeout, as the running scrub will prevent the fs from being frozen. 2104 + */ 2105 + if (fs_info->sb->s_writers.frozen > SB_UNFROZEN || 2106 + freezing(current) || signal_pending(current)) 2107 + return -EINTR; 2108 + return 0; 2109 + } 2110 + 2111 + static int scrub_raid56_cached_parity(struct scrub_ctx *sctx, 2112 + struct btrfs_device *scrub_dev, 2113 + struct btrfs_chunk_map *map, 2114 + u64 full_stripe_start, 2115 + unsigned long *extent_bitmap) 2116 + { 2117 + DECLARE_COMPLETION_ONSTACK(io_done); 2118 + struct btrfs_fs_info *fs_info = sctx->fs_info; 2119 + struct btrfs_io_context *bioc = NULL; 2120 + struct btrfs_raid_bio *rbio; 2121 + struct bio bio; 2122 + const int data_stripes = nr_data_stripes(map); 2123 + u64 length = btrfs_stripe_nr_to_offset(data_stripes); 2124 + int ret; 2125 + 2126 + bio_init(&bio, NULL, NULL, 0, REQ_OP_READ); 2127 + bio.bi_iter.bi_sector = full_stripe_start >> SECTOR_SHIFT; 2128 + bio.bi_private = &io_done; 2129 + bio.bi_end_io = raid56_scrub_wait_endio; 2130 + 2131 + btrfs_bio_counter_inc_blocked(fs_info); 2132 + ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, full_stripe_start, 2133 + &length, &bioc, NULL, NULL); 2134 + if (ret < 0) 2135 + goto out; 2136 + /* For RAID56 write there must be an @bioc allocated. */ 2137 + ASSERT(bioc); 2138 + rbio = raid56_parity_alloc_scrub_rbio(&bio, bioc, scrub_dev, extent_bitmap, 2139 + BTRFS_STRIPE_LEN >> fs_info->sectorsize_bits); 2140 + btrfs_put_bioc(bioc); 2141 + if (!rbio) { 2142 + ret = -ENOMEM; 2143 + goto out; 2144 + } 2145 + /* Use the recovered stripes as cache to avoid read them from disk again. */ 2146 + for (int i = 0; i < data_stripes; i++) { 2147 + struct scrub_stripe *stripe = &sctx->raid56_data_stripes[i]; 2148 + 2149 + raid56_parity_cache_data_folios(rbio, stripe->folios, 2150 + full_stripe_start + (i << BTRFS_STRIPE_LEN_SHIFT)); 2151 + } 2152 + raid56_parity_submit_scrub_rbio(rbio); 2153 + wait_for_completion_io(&io_done); 2154 + ret = blk_status_to_errno(bio.bi_status); 2155 + out: 2156 + btrfs_bio_counter_dec(fs_info); 2157 + bio_uninit(&bio); 2158 + return ret; 2159 + } 2160 + 2080 2161 static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx, 2081 2162 struct btrfs_device *scrub_dev, 2082 2163 struct btrfs_block_group *bg, 2083 2164 struct btrfs_chunk_map *map, 2084 2165 u64 full_stripe_start) 2085 2166 { 2086 - DECLARE_COMPLETION_ONSTACK(io_done); 2087 2167 struct btrfs_fs_info *fs_info = sctx->fs_info; 2088 - struct btrfs_raid_bio *rbio; 2089 - struct btrfs_io_context *bioc = NULL; 2090 2168 struct btrfs_path extent_path = { 0 }; 2091 2169 struct btrfs_path csum_path = { 0 }; 2092 - struct bio *bio; 2093 2170 struct scrub_stripe *stripe; 2094 2171 bool all_empty = true; 2095 2172 const int data_stripes = nr_data_stripes(map); 2096 2173 unsigned long extent_bitmap = 0; 2097 - u64 length = btrfs_stripe_nr_to_offset(data_stripes); 2098 2174 int ret; 2099 2175 2100 2176 ASSERT(sctx->raid56_data_stripes); 2177 + 2178 + ret = should_cancel_scrub(sctx); 2179 + if (ret < 0) 2180 + return ret; 2181 + 2182 + if (atomic_read(&fs_info->scrub_pause_req)) 2183 + scrub_blocked_if_needed(fs_info); 2184 + 2185 + spin_lock(&bg->lock); 2186 + if (test_bit(BLOCK_GROUP_FLAG_REMOVED, &bg->runtime_flags)) { 2187 + spin_unlock(&bg->lock); 2188 + return 0; 2189 + } 2190 + spin_unlock(&bg->lock); 2101 2191 2102 2192 /* 2103 2193 * For data stripe search, we cannot reuse the same extent/csum paths, 2104 2194 * as the data stripe bytenr may be smaller than previous extent. Thus 2105 2195 * we have to use our own extent/csum paths. 2106 2196 */ 2107 - extent_path.search_commit_root = 1; 2108 - extent_path.skip_locking = 1; 2109 - csum_path.search_commit_root = 1; 2110 - csum_path.skip_locking = 1; 2197 + extent_path.search_commit_root = true; 2198 + extent_path.skip_locking = true; 2199 + csum_path.search_commit_root = true; 2200 + csum_path.skip_locking = true; 2111 2201 2112 2202 for (int i = 0; i < data_stripes; i++) { 2113 2203 int stripe_index; ··· 2300 2194 } 2301 2195 2302 2196 /* Now we can check and regenerate the P/Q stripe. */ 2303 - bio = bio_alloc(NULL, 1, REQ_OP_READ, GFP_NOFS); 2304 - bio->bi_iter.bi_sector = full_stripe_start >> SECTOR_SHIFT; 2305 - bio->bi_private = &io_done; 2306 - bio->bi_end_io = raid56_scrub_wait_endio; 2307 - 2308 - btrfs_bio_counter_inc_blocked(fs_info); 2309 - ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, full_stripe_start, 2310 - &length, &bioc, NULL, NULL); 2311 - if (ret < 0) { 2312 - bio_put(bio); 2313 - btrfs_put_bioc(bioc); 2314 - btrfs_bio_counter_dec(fs_info); 2315 - goto out; 2316 - } 2317 - rbio = raid56_parity_alloc_scrub_rbio(bio, bioc, scrub_dev, &extent_bitmap, 2318 - BTRFS_STRIPE_LEN >> fs_info->sectorsize_bits); 2319 - btrfs_put_bioc(bioc); 2320 - if (!rbio) { 2321 - ret = -ENOMEM; 2322 - bio_put(bio); 2323 - btrfs_bio_counter_dec(fs_info); 2324 - goto out; 2325 - } 2326 - /* Use the recovered stripes as cache to avoid read them from disk again. */ 2327 - for (int i = 0; i < data_stripes; i++) { 2328 - stripe = &sctx->raid56_data_stripes[i]; 2329 - 2330 - raid56_parity_cache_data_folios(rbio, stripe->folios, 2331 - full_stripe_start + (i << BTRFS_STRIPE_LEN_SHIFT)); 2332 - } 2333 - raid56_parity_submit_scrub_rbio(rbio); 2334 - wait_for_completion_io(&io_done); 2335 - ret = blk_status_to_errno(bio->bi_status); 2336 - bio_put(bio); 2337 - btrfs_bio_counter_dec(fs_info); 2338 - 2197 + ret = scrub_raid56_cached_parity(sctx, scrub_dev, map, full_stripe_start, 2198 + &extent_bitmap); 2199 + out: 2339 2200 btrfs_release_path(&extent_path); 2340 2201 btrfs_release_path(&csum_path); 2341 - out: 2342 2202 return ret; 2343 2203 } 2344 2204 ··· 2335 2263 u64 found_logical = U64_MAX; 2336 2264 u64 cur_physical = physical + cur_logical - logical_start; 2337 2265 2338 - /* Canceled? */ 2339 - if (atomic_read(&fs_info->scrub_cancel_req) || 2340 - atomic_read(&sctx->cancel_req)) { 2341 - ret = -ECANCELED; 2266 + ret = should_cancel_scrub(sctx); 2267 + if (ret < 0) 2342 2268 break; 2343 - } 2344 - /* Paused? */ 2345 - if (atomic_read(&fs_info->scrub_pause_req)) { 2346 - /* Push queued extents */ 2269 + 2270 + if (atomic_read(&fs_info->scrub_pause_req)) 2347 2271 scrub_blocked_if_needed(fs_info); 2348 - } 2349 - /* Block group removed? */ 2272 + 2350 2273 spin_lock(&bg->lock); 2351 2274 if (test_bit(BLOCK_GROUP_FLAG_REMOVED, &bg->runtime_flags)) { 2352 2275 spin_unlock(&bg->lock); ··· 2596 2529 } 2597 2530 2598 2531 if (sctx->is_dev_replace && ret >= 0) { 2599 - int ret2; 2600 - 2601 2532 ret2 = sync_write_pointer_for_zoned(sctx, 2602 2533 chunk_logical + offset, 2603 2534 map->stripes[stripe_index].physical, ··· 2688 2623 return -ENOMEM; 2689 2624 2690 2625 path->reada = READA_FORWARD; 2691 - path->search_commit_root = 1; 2692 - path->skip_locking = 1; 2626 + path->search_commit_root = true; 2627 + path->skip_locking = true; 2693 2628 2694 2629 key.objectid = scrub_dev->devid; 2695 2630 key.type = BTRFS_DEV_EXTENT_KEY; ··· 3104 3039 unsigned int nofs_flag; 3105 3040 bool need_commit = false; 3106 3041 3042 + /* Set the basic fallback @last_physical before we got a sctx. */ 3043 + if (progress) 3044 + progress->last_physical = start; 3045 + 3107 3046 if (btrfs_fs_closing(fs_info)) 3108 3047 return -EAGAIN; 3109 3048 ··· 3126 3057 sctx = scrub_setup_ctx(fs_info, is_dev_replace); 3127 3058 if (IS_ERR(sctx)) 3128 3059 return PTR_ERR(sctx); 3060 + sctx->stat.last_physical = start; 3129 3061 3130 3062 ret = scrub_workers_get(fs_info); 3131 3063 if (ret)

+48 -65

fs/btrfs/send.c

··· 634 634 path = btrfs_alloc_path(); 635 635 if (!path) 636 636 return NULL; 637 - path->search_commit_root = 1; 638 - path->skip_locking = 1; 639 - path->need_commit_sem = 1; 637 + path->search_commit_root = true; 638 + path->skip_locking = true; 639 + path->need_commit_sem = true; 640 640 return path; 641 641 } 642 642 ··· 1054 1054 } 1055 1055 if (unlikely(start < p->buf)) { 1056 1056 btrfs_err(root->fs_info, 1057 - "send: path ref buffer underflow for key (%llu %u %llu)", 1058 - found_key->objectid, 1059 - found_key->type, 1060 - found_key->offset); 1057 + "send: path ref buffer underflow for key " BTRFS_KEY_FMT, 1058 + BTRFS_KEY_FMT_VALUE(found_key)); 1061 1059 ret = -EINVAL; 1062 1060 goto out; 1063 1061 } ··· 1135 1137 btrfs_dir_item_key_to_cpu(eb, di, &di_key); 1136 1138 1137 1139 if (btrfs_dir_ftype(eb, di) == BTRFS_FT_XATTR) { 1138 - if (name_len > XATTR_NAME_MAX) { 1140 + if (unlikely(name_len > XATTR_NAME_MAX)) { 1139 1141 ret = -ENAMETOOLONG; 1140 1142 goto out; 1141 1143 } 1142 - if (name_len + data_len > 1143 - BTRFS_MAX_XATTR_SIZE(root->fs_info)) { 1144 + if (unlikely(name_len + data_len > 1145 + BTRFS_MAX_XATTR_SIZE(root->fs_info))) { 1144 1146 ret = -E2BIG; 1145 1147 goto out; 1146 1148 } ··· 1148 1150 /* 1149 1151 * Path too long 1150 1152 */ 1151 - if (name_len + data_len > PATH_MAX) { 1153 + if (unlikely(name_len + data_len > PATH_MAX)) { 1152 1154 ret = -ENAMETOOLONG; 1153 1155 goto out; 1154 1156 } ··· 2459 2461 struct btrfs_key key; 2460 2462 struct btrfs_root_ref *ref; 2461 2463 struct extent_buffer *leaf; 2462 - char *name = NULL; 2464 + char AUTO_KFREE(name); 2463 2465 int namelen; 2464 2466 2465 2467 path = btrfs_alloc_path(); ··· 2477 2479 ret = btrfs_search_slot_for_read(send_root->fs_info->tree_root, 2478 2480 &key, path, 1, 0); 2479 2481 if (ret < 0) 2480 - goto out; 2481 - if (ret) { 2482 - ret = -ENOENT; 2483 - goto out; 2484 - } 2482 + return ret; 2483 + if (ret) 2484 + return -ENOENT; 2485 2485 2486 2486 leaf = path->nodes[0]; 2487 2487 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 2488 2488 if (key.type != BTRFS_ROOT_BACKREF_KEY || 2489 2489 key.objectid != btrfs_root_id(send_root)) { 2490 - ret = -ENOENT; 2491 - goto out; 2490 + return -ENOENT; 2492 2491 } 2493 2492 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); 2494 2493 namelen = btrfs_root_ref_name_len(leaf, ref); ··· 2495 2500 if (parent_root) { 2496 2501 ret = begin_cmd(sctx, BTRFS_SEND_C_SNAPSHOT); 2497 2502 if (ret < 0) 2498 - goto out; 2503 + return ret; 2499 2504 } else { 2500 2505 ret = begin_cmd(sctx, BTRFS_SEND_C_SUBVOL); 2501 2506 if (ret < 0) 2502 - goto out; 2507 + return ret; 2503 2508 } 2504 2509 2505 2510 TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen); ··· 2527 2532 ret = send_cmd(sctx); 2528 2533 2529 2534 tlv_put_failure: 2530 - out: 2531 - kfree(name); 2532 2535 return ret; 2533 2536 } 2534 2537 ··· 4073 4080 */ 4074 4081 static int refresh_ref_path(struct send_ctx *sctx, struct recorded_ref *ref) 4075 4082 { 4076 - char *name; 4083 + char AUTO_KFREE(name); 4077 4084 int ret; 4078 4085 4079 4086 name = kmemdup(ref->name, ref->name_len, GFP_KERNEL); ··· 4083 4090 fs_path_reset(ref->full_path); 4084 4091 ret = get_cur_path(sctx, ref->dir, ref->dir_gen, ref->full_path); 4085 4092 if (ret < 0) 4086 - goto out; 4093 + return ret; 4087 4094 4088 4095 ret = fs_path_add(ref->full_path, name, ref->name_len); 4089 4096 if (ret < 0) 4090 - goto out; 4097 + return ret; 4091 4098 4092 4099 /* Update the reference's base name pointer. */ 4093 4100 set_ref_path(ref, ref->full_path); 4094 - out: 4095 - kfree(name); 4096 - return ret; 4101 + 4102 + return 0; 4097 4103 } 4098 4104 4099 4105 static int rbtree_check_dir_ref_comp(const void *k, const struct rb_node *node) ··· 4944 4952 int found_idx; 4945 4953 char *found_data; 4946 4954 int found_data_len; 4955 + bool copy_data; 4947 4956 }; 4948 4957 4949 4958 static int __find_xattr(int num, struct btrfs_key *di_key, const char *name, ··· 4956 4963 strncmp(name, ctx->name, name_len) == 0) { 4957 4964 ctx->found_idx = num; 4958 4965 ctx->found_data_len = data_len; 4959 - ctx->found_data = kmemdup(data, data_len, GFP_KERNEL); 4960 - if (!ctx->found_data) 4961 - return -ENOMEM; 4966 + if (ctx->copy_data) { 4967 + ctx->found_data = kmemdup(data, data_len, GFP_KERNEL); 4968 + if (!ctx->found_data) 4969 + return -ENOMEM; 4970 + } 4962 4971 return 1; 4963 4972 } 4964 4973 return 0; ··· 4980 4985 ctx.found_idx = -1; 4981 4986 ctx.found_data = NULL; 4982 4987 ctx.found_data_len = 0; 4988 + ctx.copy_data = (data != NULL); 4983 4989 4984 4990 ret = iterate_dir_item(root, path, __find_xattr, &ctx); 4985 4991 if (ret < 0) ··· 4992 4996 *data = ctx.found_data; 4993 4997 *data_len = ctx.found_data_len; 4994 4998 } else { 4995 - kfree(ctx.found_data); 4999 + ASSERT(ctx.found_data == NULL); 4996 5000 } 4997 5001 return ctx.found_idx; 4998 5002 } ··· 5005 5009 { 5006 5010 int ret; 5007 5011 struct send_ctx *sctx = ctx; 5008 - char *found_data = NULL; 5009 - int found_data_len = 0; 5012 + char AUTO_KFREE(found_data); 5013 + int found_data_len = 0; 5010 5014 5011 5015 ret = find_xattr(sctx->parent_root, sctx->right_path, 5012 5016 sctx->cmp_key, name, name_len, &found_data, ··· 5024 5028 } 5025 5029 } 5026 5030 5027 - kfree(found_data); 5028 5031 return ret; 5029 5032 } 5030 5033 ··· 5134 5139 if (ret < 0) 5135 5140 goto iput; 5136 5141 5137 - if (ret > FS_VERITY_MAX_DESCRIPTOR_SIZE) { 5142 + if (unlikely(ret > FS_VERITY_MAX_DESCRIPTOR_SIZE)) { 5138 5143 ret = -EMSGSIZE; 5139 5144 goto iput; 5140 5145 } ··· 5178 5183 * Since v2, the data attribute header doesn't include a length, 5179 5184 * it is implicitly to the end of the command. 5180 5185 */ 5181 - if (sctx->send_max_size - sctx->send_size < sizeof(__le16) + len) 5186 + if (unlikely(sctx->send_max_size - sctx->send_size < sizeof(__le16) + len)) 5182 5187 return -EOVERFLOW; 5183 5188 put_unaligned_le16(BTRFS_SEND_A_DATA, sctx->send_buf + sctx->send_size); 5184 5189 sctx->send_size += sizeof(__le16); 5185 5190 } else { 5186 5191 struct btrfs_tlv_header *hdr; 5187 5192 5188 - if (sctx->send_max_size - sctx->send_size < sizeof(*hdr) + len) 5193 + if (unlikely(sctx->send_max_size - sctx->send_size < sizeof(*hdr) + len)) 5189 5194 return -EOVERFLOW; 5190 5195 hdr = (struct btrfs_tlv_header *)(sctx->send_buf + sctx->send_size); 5191 5196 put_unaligned_le16(BTRFS_SEND_A_DATA, &hdr->tlv_type); ··· 5585 5590 * between the beginning of the command and the file data. 5586 5591 */ 5587 5592 data_offset = PAGE_ALIGN(sctx->send_size); 5588 - if (data_offset > sctx->send_max_size || 5589 - sctx->send_max_size - data_offset < disk_num_bytes) { 5593 + if (unlikely(data_offset > sctx->send_max_size || 5594 + sctx->send_max_size - data_offset < disk_num_bytes)) { 5590 5595 ret = -EOVERFLOW; 5591 5596 goto out; 5592 5597 } ··· 5639 5644 5640 5645 ei = btrfs_item_ptr(leaf, path->slots[0], 5641 5646 struct btrfs_file_extent_item); 5642 - /* 5643 - * Do not go through encoded read for bs > ps cases. 5644 - * 5645 - * Encoded send is using vmallocated pages as buffer, which we can 5646 - * not ensure every folio is large enough to contain a block. 5647 - */ 5648 - if (sctx->send_root->fs_info->sectorsize <= PAGE_SIZE && 5649 - (sctx->flags & BTRFS_SEND_FLAG_COMPRESSED) && 5647 + if ((sctx->flags & BTRFS_SEND_FLAG_COMPRESSED) && 5650 5648 btrfs_file_extent_compression(leaf, ei) != BTRFS_COMPRESS_NONE) { 5651 5649 bool is_inline = (btrfs_file_extent_type(leaf, ei) == 5652 5650 BTRFS_FILE_EXTENT_INLINE); ··· 5753 5765 struct btrfs_dir_item *di; 5754 5766 struct extent_buffer *leaf; 5755 5767 unsigned long data_ptr; 5756 - char *buf = NULL; 5768 + char AUTO_KFREE(buf); 5757 5769 int buf_len; 5758 5770 int ret = 0; 5759 5771 ··· 5765 5777 XATTR_NAME_CAPS, strlen(XATTR_NAME_CAPS), 0); 5766 5778 if (!di) { 5767 5779 /* There is no xattr for this inode */ 5768 - goto out; 5780 + return 0; 5769 5781 } else if (IS_ERR(di)) { 5770 - ret = PTR_ERR(di); 5771 - goto out; 5782 + return PTR_ERR(di); 5772 5783 } 5773 5784 5774 5785 leaf = path->nodes[0]; 5775 5786 buf_len = btrfs_dir_data_len(leaf, di); 5776 5787 5777 5788 buf = kmalloc(buf_len, GFP_KERNEL); 5778 - if (!buf) { 5779 - ret = -ENOMEM; 5780 - goto out; 5781 - } 5789 + if (!buf) 5790 + return -ENOMEM; 5782 5791 5783 5792 data_ptr = (unsigned long)(di + 1) + btrfs_dir_name_len(leaf, di); 5784 5793 read_extent_buffer(leaf, buf, data_ptr, buf_len); 5785 5794 5786 5795 ret = send_set_xattr(sctx, XATTR_NAME_CAPS, 5787 5796 strlen(XATTR_NAME_CAPS), buf, buf_len); 5788 - out: 5789 - kfree(buf); 5790 5797 return ret; 5791 5798 } 5792 5799 ··· 7258 7275 if (unlikely(ret > 0)) { 7259 7276 btrfs_print_tree(path->nodes[path->lowest_level], false); 7260 7277 btrfs_err(root->fs_info, 7261 - "send: key (%llu %u %llu) not found in %s root %llu, lowest_level %d, slot %d", 7262 - key->objectid, key->type, key->offset, 7278 + "send: key " BTRFS_KEY_FMT" not found in %s root %llu, lowest_level %d, slot %d", 7279 + BTRFS_KEY_FMT_VALUE(key), 7263 7280 (root == sctx->parent_root ? "parent" : "send"), 7264 7281 btrfs_root_id(root), path->lowest_level, 7265 7282 path->slots[path->lowest_level]); ··· 7627 7644 goto out; 7628 7645 } 7629 7646 7630 - left_path->search_commit_root = 1; 7631 - left_path->skip_locking = 1; 7632 - right_path->search_commit_root = 1; 7633 - right_path->skip_locking = 1; 7647 + left_path->search_commit_root = true; 7648 + left_path->skip_locking = true; 7649 + right_path->search_commit_root = true; 7650 + right_path->skip_locking = true; 7634 7651 7635 7652 /* 7636 7653 * Strategy: Go to the first items of both trees. Then do

+245 -219

fs/btrfs/space-info.c

··· 15 15 #include "accessors.h" 16 16 #include "extent-tree.h" 17 17 #include "zoned.h" 18 + #include "delayed-inode.h" 18 19 19 20 /* 20 21 * HOW DOES SPACE RESERVATION WORK ··· 68 67 * Assume we are unable to simply make the reservation because we do not have 69 68 * enough space 70 69 * 71 - * -> __reserve_bytes 70 + * -> reserve_bytes 72 71 * create a reserve_ticket with ->bytes set to our reservation, add it to 73 72 * the tail of space_info->tickets, kick async flush thread 74 73 * ··· 173 172 * thing with or without extra unallocated space. 174 173 */ 175 174 176 - u64 __pure btrfs_space_info_used(const struct btrfs_space_info *s_info, 177 - bool may_use_included) 178 - { 179 - ASSERT(s_info); 180 - return s_info->bytes_used + s_info->bytes_reserved + 181 - s_info->bytes_pinned + s_info->bytes_readonly + 182 - s_info->bytes_zone_unusable + 183 - (may_use_included ? s_info->bytes_may_use : 0); 184 - } 175 + struct reserve_ticket { 176 + u64 bytes; 177 + int error; 178 + bool steal; 179 + struct list_head list; 180 + wait_queue_head_t wait; 181 + spinlock_t lock; 182 + }; 185 183 186 184 /* 187 185 * after adding space to the filesystem, we need to clear the full flags ··· 192 192 struct btrfs_space_info *found; 193 193 194 194 list_for_each_entry(found, head, list) 195 - found->full = 0; 195 + found->full = false; 196 196 } 197 197 198 198 /* ··· 211 211 if (btrfs_is_zoned(fs_info)) 212 212 return fs_info->zone_size; 213 213 214 - ASSERT(flags & BTRFS_BLOCK_GROUP_TYPE_MASK); 214 + ASSERT(flags & BTRFS_BLOCK_GROUP_TYPE_MASK, "flags=%llu", flags); 215 215 216 216 if (flags & BTRFS_BLOCK_GROUP_DATA) 217 217 return BTRFS_MAX_DATA_CHUNK_SIZE; ··· 262 262 struct btrfs_space_info *sub_group; 263 263 int ret; 264 264 265 - ASSERT(parent->subgroup_id == BTRFS_SUB_GROUP_PRIMARY); 266 - ASSERT(id != BTRFS_SUB_GROUP_PRIMARY); 265 + ASSERT(parent->subgroup_id == BTRFS_SUB_GROUP_PRIMARY, 266 + "parent->subgroup_id=%d", parent->subgroup_id); 267 + ASSERT(id != BTRFS_SUB_GROUP_PRIMARY, "id=%d", id); 267 268 268 269 sub_group = kzalloc(sizeof(*sub_group), GFP_NOFS); 269 270 if (!sub_group) ··· 275 274 sub_group->parent = parent; 276 275 sub_group->subgroup_id = id; 277 276 278 - ret = btrfs_sysfs_add_space_info_type(fs_info, sub_group); 277 + ret = btrfs_sysfs_add_space_info_type(sub_group); 279 278 if (ret) { 280 279 kfree(sub_group); 281 280 parent->sub_group[index] = NULL; ··· 309 308 return ret; 310 309 } 311 310 312 - ret = btrfs_sysfs_add_space_info_type(info, space_info); 311 + ret = btrfs_sysfs_add_space_info_type(space_info); 313 312 if (ret) 314 313 return ret; 315 314 ··· 373 372 space_info->bytes_readonly += block_group->bytes_super; 374 373 btrfs_space_info_update_bytes_zone_unusable(space_info, block_group->zone_unusable); 375 374 if (block_group->length > 0) 376 - space_info->full = 0; 377 - btrfs_try_granting_tickets(info, space_info); 375 + space_info->full = false; 376 + btrfs_try_granting_tickets(space_info); 378 377 spin_unlock(&space_info->lock); 379 378 380 379 block_group->space_info = space_info; ··· 422 421 return min_t(u64, data_chunk_size, SZ_1G); 423 422 } 424 423 425 - static u64 calc_available_free_space(struct btrfs_fs_info *fs_info, 426 - const struct btrfs_space_info *space_info, 427 - enum btrfs_reserve_flush_enum flush) 424 + static u64 calc_available_free_space(const struct btrfs_space_info *space_info, 425 + enum btrfs_reserve_flush_enum flush) 428 426 { 427 + struct btrfs_fs_info *fs_info = space_info->fs_info; 429 428 u64 profile; 430 429 u64 avail; 431 430 u64 data_chunk_size; ··· 491 490 return avail; 492 491 } 493 492 494 - int btrfs_can_overcommit(struct btrfs_fs_info *fs_info, 495 - const struct btrfs_space_info *space_info, u64 bytes, 496 - enum btrfs_reserve_flush_enum flush) 493 + static inline bool check_can_overcommit(const struct btrfs_space_info *space_info, 494 + u64 space_info_used_bytes, u64 bytes, 495 + enum btrfs_reserve_flush_enum flush) 497 496 { 498 - u64 avail; 497 + const u64 avail = calc_available_free_space(space_info, flush); 498 + 499 + return (space_info_used_bytes + bytes < space_info->total_bytes + avail); 500 + } 501 + 502 + static inline bool can_overcommit(const struct btrfs_space_info *space_info, 503 + u64 space_info_used_bytes, u64 bytes, 504 + enum btrfs_reserve_flush_enum flush) 505 + { 506 + /* Don't overcommit when in mixed mode. */ 507 + if (space_info->flags & BTRFS_BLOCK_GROUP_DATA) 508 + return false; 509 + 510 + return check_can_overcommit(space_info, space_info_used_bytes, bytes, flush); 511 + } 512 + 513 + bool btrfs_can_overcommit(const struct btrfs_space_info *space_info, u64 bytes, 514 + enum btrfs_reserve_flush_enum flush) 515 + { 499 516 u64 used; 500 517 501 518 /* Don't overcommit when in mixed mode */ 502 519 if (space_info->flags & BTRFS_BLOCK_GROUP_DATA) 503 - return 0; 520 + return false; 504 521 505 522 used = btrfs_space_info_used(space_info, true); 506 - avail = calc_available_free_space(fs_info, space_info, flush); 507 523 508 - if (used + bytes < space_info->total_bytes + avail) 509 - return 1; 510 - return 0; 524 + return check_can_overcommit(space_info, used, bytes, flush); 511 525 } 512 526 513 527 static void remove_ticket(struct btrfs_space_info *space_info, 514 - struct reserve_ticket *ticket) 528 + struct reserve_ticket *ticket, int error) 515 529 { 530 + lockdep_assert_held(&space_info->lock); 531 + 516 532 if (!list_empty(&ticket->list)) { 517 533 list_del_init(&ticket->list); 518 - ASSERT(space_info->reclaim_size >= ticket->bytes); 534 + ASSERT(space_info->reclaim_size >= ticket->bytes, 535 + "space_info->reclaim_size=%llu ticket->bytes=%llu", 536 + space_info->reclaim_size, ticket->bytes); 519 537 space_info->reclaim_size -= ticket->bytes; 520 538 } 539 + 540 + spin_lock(&ticket->lock); 541 + /* 542 + * If we are called from a task waiting on the ticket, it may happen 543 + * that before it sets an error on the ticket, a reclaim task was able 544 + * to satisfy the ticket. In that case ignore the error. 545 + */ 546 + if (error && ticket->bytes > 0) 547 + ticket->error = error; 548 + else 549 + ticket->bytes = 0; 550 + 551 + wake_up(&ticket->wait); 552 + spin_unlock(&ticket->lock); 521 553 } 522 554 523 555 /* 524 556 * This is for space we already have accounted in space_info->bytes_may_use, so 525 557 * basically when we're returning space from block_rsv's. 526 558 */ 527 - void btrfs_try_granting_tickets(struct btrfs_fs_info *fs_info, 528 - struct btrfs_space_info *space_info) 559 + void btrfs_try_granting_tickets(struct btrfs_space_info *space_info) 529 560 { 530 561 struct list_head *head; 531 562 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH; 563 + u64 used = btrfs_space_info_used(space_info, true); 532 564 533 565 lockdep_assert_held(&space_info->lock); 534 566 ··· 569 535 again: 570 536 while (!list_empty(head)) { 571 537 struct reserve_ticket *ticket; 572 - u64 used = btrfs_space_info_used(space_info, true); 538 + u64 used_after; 573 539 574 540 ticket = list_first_entry(head, struct reserve_ticket, list); 541 + used_after = used + ticket->bytes; 575 542 576 543 /* Check and see if our ticket can be satisfied now. */ 577 - if ((used + ticket->bytes <= space_info->total_bytes) || 578 - btrfs_can_overcommit(fs_info, space_info, ticket->bytes, 579 - flush)) { 544 + if (used_after <= space_info->total_bytes || 545 + can_overcommit(space_info, used, ticket->bytes, flush)) { 580 546 btrfs_space_info_update_bytes_may_use(space_info, ticket->bytes); 581 - remove_ticket(space_info, ticket); 582 - ticket->bytes = 0; 547 + remove_ticket(space_info, ticket, 0); 583 548 space_info->tickets_id++; 584 - wake_up(&ticket->wait); 549 + used = used_after; 585 550 } else { 586 551 break; 587 552 } ··· 627 594 DUMP_BLOCK_RSV(fs_info, delayed_refs_rsv); 628 595 } 629 596 630 - static void __btrfs_dump_space_info(const struct btrfs_fs_info *fs_info, 631 - const struct btrfs_space_info *info) 597 + static void __btrfs_dump_space_info(const struct btrfs_space_info *info) 632 598 { 599 + const struct btrfs_fs_info *fs_info = info->fs_info; 633 600 const char *flag_str = space_info_flag_to_str(info); 634 601 lockdep_assert_held(&info->lock); 635 602 ··· 646 613 info->bytes_readonly, info->bytes_zone_unusable); 647 614 } 648 615 649 - void btrfs_dump_space_info(struct btrfs_fs_info *fs_info, 650 - struct btrfs_space_info *info, u64 bytes, 616 + void btrfs_dump_space_info(struct btrfs_space_info *info, u64 bytes, 651 617 bool dump_block_groups) 652 618 { 619 + struct btrfs_fs_info *fs_info = info->fs_info; 653 620 struct btrfs_block_group *cache; 654 621 u64 total_avail = 0; 655 622 int index = 0; 656 623 657 624 spin_lock(&info->lock); 658 - __btrfs_dump_space_info(fs_info, info); 625 + __btrfs_dump_space_info(info); 659 626 dump_global_block_rsv(fs_info); 660 627 spin_unlock(&info->lock); 661 628 ··· 703 670 /* 704 671 * shrink metadata reservation for delalloc 705 672 */ 706 - static void shrink_delalloc(struct btrfs_fs_info *fs_info, 707 - struct btrfs_space_info *space_info, 673 + static void shrink_delalloc(struct btrfs_space_info *space_info, 708 674 u64 to_reclaim, bool wait_ordered, 709 675 bool for_preempt) 710 676 { 677 + struct btrfs_fs_info *fs_info = space_info->fs_info; 711 678 struct btrfs_trans_handle *trans; 712 679 u64 delalloc_bytes; 713 680 u64 ordered_bytes; ··· 834 801 * and may fail for various reasons. The caller is supposed to examine the 835 802 * state of @space_info to detect the outcome. 836 803 */ 837 - static void flush_space(struct btrfs_fs_info *fs_info, 838 - struct btrfs_space_info *space_info, u64 num_bytes, 839 - enum btrfs_flush_state state, bool for_preempt) 804 + static void flush_space(struct btrfs_space_info *space_info, u64 num_bytes, 805 + enum btrfs_flush_state state, bool for_preempt) 840 806 { 807 + struct btrfs_fs_info *fs_info = space_info->fs_info; 841 808 struct btrfs_root *root = fs_info->tree_root; 842 809 struct btrfs_trans_handle *trans; 843 810 int nr; ··· 866 833 case FLUSH_DELALLOC_FULL: 867 834 if (state == FLUSH_DELALLOC_FULL) 868 835 num_bytes = U64_MAX; 869 - shrink_delalloc(fs_info, space_info, num_bytes, 836 + shrink_delalloc(space_info, num_bytes, 870 837 state != FLUSH_DELALLOC, for_preempt); 871 838 break; 872 839 case FLUSH_DELAYED_REFS_NR: ··· 933 900 return; 934 901 } 935 902 936 - static u64 btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info, 937 - const struct btrfs_space_info *space_info) 903 + static u64 btrfs_calc_reclaim_metadata_size(const struct btrfs_space_info *space_info) 938 904 { 939 905 u64 used; 940 906 u64 avail; ··· 941 909 942 910 lockdep_assert_held(&space_info->lock); 943 911 944 - avail = calc_available_free_space(fs_info, space_info, 945 - BTRFS_RESERVE_FLUSH_ALL); 912 + avail = calc_available_free_space(space_info, BTRFS_RESERVE_FLUSH_ALL); 946 913 used = btrfs_space_info_used(space_info, true); 947 914 948 915 /* ··· 956 925 return to_reclaim; 957 926 } 958 927 959 - static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info, 960 - const struct btrfs_space_info *space_info) 928 + static bool need_preemptive_reclaim(const struct btrfs_space_info *space_info) 961 929 { 930 + struct btrfs_fs_info *fs_info = space_info->fs_info; 962 931 const u64 global_rsv_size = btrfs_block_rsv_reserved(&fs_info->global_block_rsv); 963 932 u64 ordered, delalloc; 964 933 u64 thresh; 965 934 u64 used; 966 935 967 - thresh = mult_perc(space_info->total_bytes, 90); 968 - 969 936 lockdep_assert_held(&space_info->lock); 937 + 938 + /* 939 + * We have tickets queued, bail so we don't compete with the async 940 + * flushers. 941 + */ 942 + if (space_info->reclaim_size) 943 + return false; 944 + 945 + thresh = mult_perc(space_info->total_bytes, 90); 970 946 971 947 /* If we're just plain full then async reclaim just slows us down. */ 972 948 if ((space_info->bytes_used + space_info->bytes_reserved + ··· 992 954 * we don't have a lot of things that need flushing. 993 955 */ 994 956 if (used - global_rsv_size <= SZ_128M) 995 - return false; 996 - 997 - /* 998 - * We have tickets queued, bail so we don't compete with the async 999 - * flushers. 1000 - */ 1001 - if (space_info->reclaim_size) 1002 957 return false; 1003 958 1004 959 /* ··· 1023 992 * much delalloc we need for the background flusher to kick in. 1024 993 */ 1025 994 1026 - thresh = calc_available_free_space(fs_info, space_info, 1027 - BTRFS_RESERVE_FLUSH_ALL); 995 + thresh = calc_available_free_space(space_info, BTRFS_RESERVE_FLUSH_ALL); 1028 996 used = space_info->bytes_used + space_info->bytes_reserved + 1029 997 space_info->bytes_readonly + global_rsv_size; 1030 998 if (used < space_info->total_bytes) ··· 1067 1037 !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state)); 1068 1038 } 1069 1039 1070 - static bool steal_from_global_rsv(struct btrfs_fs_info *fs_info, 1071 - struct btrfs_space_info *space_info, 1040 + static bool steal_from_global_rsv(struct btrfs_space_info *space_info, 1072 1041 struct reserve_ticket *ticket) 1073 1042 { 1043 + struct btrfs_fs_info *fs_info = space_info->fs_info; 1074 1044 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; 1075 1045 u64 min_bytes; 1046 + 1047 + lockdep_assert_held(&space_info->lock); 1076 1048 1077 1049 if (!ticket->steal) 1078 1050 return false; ··· 1089 1057 return false; 1090 1058 } 1091 1059 global_rsv->reserved -= ticket->bytes; 1092 - remove_ticket(space_info, ticket); 1093 - ticket->bytes = 0; 1094 - wake_up(&ticket->wait); 1095 - space_info->tickets_id++; 1096 1060 if (global_rsv->reserved < global_rsv->size) 1097 - global_rsv->full = 0; 1061 + global_rsv->full = false; 1098 1062 spin_unlock(&global_rsv->lock); 1063 + 1064 + remove_ticket(space_info, ticket, 0); 1065 + space_info->tickets_id++; 1099 1066 1100 1067 return true; 1101 1068 } ··· 1102 1071 /* 1103 1072 * We've exhausted our flushing, start failing tickets. 1104 1073 * 1105 - * @fs_info - fs_info for this fs 1106 1074 * @space_info - the space info we were flushing 1107 1075 * 1108 1076 * We call this when we've exhausted our flushing ability and haven't made ··· 1114 1084 * other tickets, or if it stumbles across a ticket that was smaller than the 1115 1085 * first ticket. 1116 1086 */ 1117 - static bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info, 1118 - struct btrfs_space_info *space_info) 1087 + static bool maybe_fail_all_tickets(struct btrfs_space_info *space_info) 1119 1088 { 1089 + struct btrfs_fs_info *fs_info = space_info->fs_info; 1120 1090 struct reserve_ticket *ticket; 1121 1091 u64 tickets_id = space_info->tickets_id; 1122 - const bool aborted = BTRFS_FS_ERROR(fs_info); 1092 + const int abort_error = BTRFS_FS_ERROR(fs_info); 1123 1093 1124 1094 trace_btrfs_fail_all_tickets(fs_info, space_info); 1125 1095 1126 1096 if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) { 1127 1097 btrfs_info(fs_info, "cannot satisfy tickets, dumping space info"); 1128 - __btrfs_dump_space_info(fs_info, space_info); 1098 + __btrfs_dump_space_info(space_info); 1129 1099 } 1130 1100 1131 1101 while (!list_empty(&space_info->tickets) && 1132 1102 tickets_id == space_info->tickets_id) { 1133 1103 ticket = list_first_entry(&space_info->tickets, 1134 1104 struct reserve_ticket, list); 1105 + if (unlikely(abort_error)) { 1106 + remove_ticket(space_info, ticket, abort_error); 1107 + } else { 1108 + if (steal_from_global_rsv(space_info, ticket)) 1109 + return true; 1135 1110 1136 - if (!aborted && steal_from_global_rsv(fs_info, space_info, ticket)) 1137 - return true; 1111 + if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) 1112 + btrfs_info(fs_info, "failing ticket with %llu bytes", 1113 + ticket->bytes); 1138 1114 1139 - if (!aborted && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) 1140 - btrfs_info(fs_info, "failing ticket with %llu bytes", 1141 - ticket->bytes); 1115 + remove_ticket(space_info, ticket, -ENOSPC); 1142 1116 1143 - remove_ticket(space_info, ticket); 1144 - if (aborted) 1145 - ticket->error = -EIO; 1146 - else 1147 - ticket->error = -ENOSPC; 1148 - wake_up(&ticket->wait); 1149 - 1150 - /* 1151 - * We're just throwing tickets away, so more flushing may not 1152 - * trip over btrfs_try_granting_tickets, so we need to call it 1153 - * here to see if we can make progress with the next ticket in 1154 - * the list. 1155 - */ 1156 - if (!aborted) 1157 - btrfs_try_granting_tickets(fs_info, space_info); 1117 + /* 1118 + * We're just throwing tickets away, so more flushing may 1119 + * not trip over btrfs_try_granting_tickets, so we need 1120 + * to call it here to see if we can make progress with 1121 + * the next ticket in the list. 1122 + */ 1123 + btrfs_try_granting_tickets(space_info); 1124 + } 1158 1125 } 1159 1126 return (tickets_id != space_info->tickets_id); 1160 1127 } ··· 1171 1144 final_state = COMMIT_TRANS; 1172 1145 1173 1146 spin_lock(&space_info->lock); 1174 - to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info); 1147 + to_reclaim = btrfs_calc_reclaim_metadata_size(space_info); 1175 1148 if (!to_reclaim) { 1176 - space_info->flush = 0; 1149 + space_info->flush = false; 1177 1150 spin_unlock(&space_info->lock); 1178 1151 return; 1179 1152 } ··· 1182 1155 1183 1156 flush_state = FLUSH_DELAYED_ITEMS_NR; 1184 1157 do { 1185 - flush_space(fs_info, space_info, to_reclaim, flush_state, false); 1158 + flush_space(space_info, to_reclaim, flush_state, false); 1186 1159 spin_lock(&space_info->lock); 1187 1160 if (list_empty(&space_info->tickets)) { 1188 - space_info->flush = 0; 1161 + space_info->flush = false; 1189 1162 spin_unlock(&space_info->lock); 1190 1163 return; 1191 1164 } 1192 - to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, 1193 - space_info); 1165 + to_reclaim = btrfs_calc_reclaim_metadata_size(space_info); 1194 1166 if (last_tickets_id == space_info->tickets_id) { 1195 1167 flush_state++; 1196 1168 } else { ··· 1223 1197 if (flush_state > final_state) { 1224 1198 commit_cycles++; 1225 1199 if (commit_cycles > 2) { 1226 - if (maybe_fail_all_tickets(fs_info, space_info)) { 1200 + if (maybe_fail_all_tickets(space_info)) { 1227 1201 flush_state = FLUSH_DELAYED_ITEMS_NR; 1228 1202 commit_cycles--; 1229 1203 } else { 1230 - space_info->flush = 0; 1204 + space_info->flush = false; 1231 1205 } 1232 1206 } else { 1233 1207 flush_state = FLUSH_DELAYED_ITEMS_NR; ··· 1283 1257 trans_rsv = &fs_info->trans_block_rsv; 1284 1258 1285 1259 spin_lock(&space_info->lock); 1286 - while (need_preemptive_reclaim(fs_info, space_info)) { 1260 + while (need_preemptive_reclaim(space_info)) { 1287 1261 enum btrfs_flush_state flush; 1288 1262 u64 delalloc_size = 0; 1289 1263 u64 to_reclaim, block_rsv_size; 1290 1264 const u64 global_rsv_size = btrfs_block_rsv_reserved(global_rsv); 1265 + const u64 bytes_may_use = space_info->bytes_may_use; 1266 + const u64 bytes_pinned = space_info->bytes_pinned; 1291 1267 1292 - loops++; 1293 - 1268 + spin_unlock(&space_info->lock); 1294 1269 /* 1295 1270 * We don't have a precise counter for the metadata being 1296 1271 * reserved for delalloc, so we'll approximate it by subtracting ··· 1303 1276 btrfs_block_rsv_reserved(delayed_block_rsv) + 1304 1277 btrfs_block_rsv_reserved(delayed_refs_rsv) + 1305 1278 btrfs_block_rsv_reserved(trans_rsv); 1306 - if (block_rsv_size < space_info->bytes_may_use) 1307 - delalloc_size = space_info->bytes_may_use - block_rsv_size; 1279 + if (block_rsv_size < bytes_may_use) 1280 + delalloc_size = bytes_may_use - block_rsv_size; 1308 1281 1309 1282 /* 1310 1283 * We don't want to include the global_rsv in our calculation, ··· 1321 1294 if (delalloc_size > block_rsv_size) { 1322 1295 to_reclaim = delalloc_size; 1323 1296 flush = FLUSH_DELALLOC; 1324 - } else if (space_info->bytes_pinned > 1297 + } else if (bytes_pinned > 1325 1298 (btrfs_block_rsv_reserved(delayed_block_rsv) + 1326 1299 btrfs_block_rsv_reserved(delayed_refs_rsv))) { 1327 - to_reclaim = space_info->bytes_pinned; 1300 + to_reclaim = bytes_pinned; 1328 1301 flush = COMMIT_TRANS; 1329 1302 } else if (btrfs_block_rsv_reserved(delayed_block_rsv) > 1330 1303 btrfs_block_rsv_reserved(delayed_refs_rsv)) { ··· 1335 1308 flush = FLUSH_DELAYED_REFS_NR; 1336 1309 } 1337 1310 1338 - spin_unlock(&space_info->lock); 1311 + loops++; 1339 1312 1340 1313 /* 1341 1314 * We don't want to reclaim everything, just a portion, so scale ··· 1345 1318 to_reclaim >>= 2; 1346 1319 if (!to_reclaim) 1347 1320 to_reclaim = btrfs_calc_insert_metadata_size(fs_info, 1); 1348 - flush_space(fs_info, space_info, to_reclaim, flush, true); 1321 + flush_space(space_info, to_reclaim, flush, true); 1349 1322 cond_resched(); 1350 1323 spin_lock(&space_info->lock); 1351 1324 } ··· 1410 1383 1411 1384 spin_lock(&space_info->lock); 1412 1385 if (list_empty(&space_info->tickets)) { 1413 - space_info->flush = 0; 1386 + space_info->flush = false; 1414 1387 spin_unlock(&space_info->lock); 1415 1388 return; 1416 1389 } ··· 1418 1391 spin_unlock(&space_info->lock); 1419 1392 1420 1393 while (!space_info->full) { 1421 - flush_space(fs_info, space_info, U64_MAX, ALLOC_CHUNK_FORCE, false); 1394 + flush_space(space_info, U64_MAX, ALLOC_CHUNK_FORCE, false); 1422 1395 spin_lock(&space_info->lock); 1423 1396 if (list_empty(&space_info->tickets)) { 1424 - space_info->flush = 0; 1397 + space_info->flush = false; 1425 1398 spin_unlock(&space_info->lock); 1426 1399 return; 1427 1400 } 1428 1401 1429 1402 /* Something happened, fail everything and bail. */ 1430 - if (BTRFS_FS_ERROR(fs_info)) 1403 + if (unlikely(BTRFS_FS_ERROR(fs_info))) 1431 1404 goto aborted_fs; 1432 1405 last_tickets_id = space_info->tickets_id; 1433 1406 spin_unlock(&space_info->lock); 1434 1407 } 1435 1408 1436 1409 while (flush_state < ARRAY_SIZE(data_flush_states)) { 1437 - flush_space(fs_info, space_info, U64_MAX, 1410 + flush_space(space_info, U64_MAX, 1438 1411 data_flush_states[flush_state], false); 1439 1412 spin_lock(&space_info->lock); 1440 1413 if (list_empty(&space_info->tickets)) { 1441 - space_info->flush = 0; 1414 + space_info->flush = false; 1442 1415 spin_unlock(&space_info->lock); 1443 1416 return; 1444 1417 } ··· 1452 1425 1453 1426 if (flush_state >= ARRAY_SIZE(data_flush_states)) { 1454 1427 if (space_info->full) { 1455 - if (maybe_fail_all_tickets(fs_info, space_info)) 1428 + if (maybe_fail_all_tickets(space_info)) 1456 1429 flush_state = 0; 1457 1430 else 1458 - space_info->flush = 0; 1431 + space_info->flush = false; 1459 1432 } else { 1460 1433 flush_state = 0; 1461 1434 } 1462 1435 1463 1436 /* Something happened, fail everything and bail. */ 1464 - if (BTRFS_FS_ERROR(fs_info)) 1437 + if (unlikely(BTRFS_FS_ERROR(fs_info))) 1465 1438 goto aborted_fs; 1466 1439 1467 1440 } ··· 1470 1443 return; 1471 1444 1472 1445 aborted_fs: 1473 - maybe_fail_all_tickets(fs_info, space_info); 1474 - space_info->flush = 0; 1446 + maybe_fail_all_tickets(space_info); 1447 + space_info->flush = false; 1475 1448 spin_unlock(&space_info->lock); 1476 1449 } 1477 1450 ··· 1516 1489 RESET_ZONES, 1517 1490 }; 1518 1491 1519 - static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info, 1520 - struct btrfs_space_info *space_info, 1521 - struct reserve_ticket *ticket, 1522 - const enum btrfs_flush_state *states, 1523 - int states_nr) 1492 + static bool is_ticket_served(struct reserve_ticket *ticket) 1524 1493 { 1494 + bool ret; 1495 + 1496 + spin_lock(&ticket->lock); 1497 + ret = (ticket->bytes == 0); 1498 + spin_unlock(&ticket->lock); 1499 + 1500 + return ret; 1501 + } 1502 + 1503 + static void priority_reclaim_metadata_space(struct btrfs_space_info *space_info, 1504 + struct reserve_ticket *ticket, 1505 + const enum btrfs_flush_state *states, 1506 + int states_nr) 1507 + { 1508 + struct btrfs_fs_info *fs_info = space_info->fs_info; 1525 1509 u64 to_reclaim; 1526 1510 int flush_state = 0; 1527 1511 1528 - spin_lock(&space_info->lock); 1529 - to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info); 1530 1512 /* 1531 1513 * This is the priority reclaim path, so to_reclaim could be >0 still 1532 1514 * because we may have only satisfied the priority tickets and still 1533 1515 * left non priority tickets on the list. We would then have 1534 1516 * to_reclaim but ->bytes == 0. 1535 1517 */ 1536 - if (ticket->bytes == 0) { 1537 - spin_unlock(&space_info->lock); 1518 + if (is_ticket_served(ticket)) 1538 1519 return; 1539 - } 1520 + 1521 + spin_lock(&space_info->lock); 1522 + to_reclaim = btrfs_calc_reclaim_metadata_size(space_info); 1523 + spin_unlock(&space_info->lock); 1540 1524 1541 1525 while (flush_state < states_nr) { 1542 - spin_unlock(&space_info->lock); 1543 - flush_space(fs_info, space_info, to_reclaim, states[flush_state], 1544 - false); 1545 - flush_state++; 1546 - spin_lock(&space_info->lock); 1547 - if (ticket->bytes == 0) { 1548 - spin_unlock(&space_info->lock); 1526 + flush_space(space_info, to_reclaim, states[flush_state], false); 1527 + if (is_ticket_served(ticket)) 1549 1528 return; 1550 - } 1529 + flush_state++; 1551 1530 } 1552 1531 1532 + spin_lock(&space_info->lock); 1553 1533 /* 1554 1534 * Attempt to steal from the global rsv if we can, except if the fs was 1555 1535 * turned into error mode due to a transaction abort when flushing space ··· 1565 1531 * just to have caller fail immediately instead of later when trying to 1566 1532 * modify the fs, making it easier to debug -ENOSPC problems. 1567 1533 */ 1568 - if (BTRFS_FS_ERROR(fs_info)) { 1569 - ticket->error = BTRFS_FS_ERROR(fs_info); 1570 - remove_ticket(space_info, ticket); 1571 - } else if (!steal_from_global_rsv(fs_info, space_info, ticket)) { 1572 - ticket->error = -ENOSPC; 1573 - remove_ticket(space_info, ticket); 1574 - } 1534 + if (unlikely(BTRFS_FS_ERROR(fs_info))) 1535 + remove_ticket(space_info, ticket, BTRFS_FS_ERROR(fs_info)); 1536 + else if (!steal_from_global_rsv(space_info, ticket)) 1537 + remove_ticket(space_info, ticket, -ENOSPC); 1575 1538 1576 1539 /* 1577 1540 * We must run try_granting_tickets here because we could be a large 1578 1541 * ticket in front of a smaller ticket that can now be satisfied with 1579 1542 * the available space. 1580 1543 */ 1581 - btrfs_try_granting_tickets(fs_info, space_info); 1544 + btrfs_try_granting_tickets(space_info); 1582 1545 spin_unlock(&space_info->lock); 1583 1546 } 1584 1547 1585 - static void priority_reclaim_data_space(struct btrfs_fs_info *fs_info, 1586 - struct btrfs_space_info *space_info, 1548 + static void priority_reclaim_data_space(struct btrfs_space_info *space_info, 1587 1549 struct reserve_ticket *ticket) 1588 1550 { 1589 - spin_lock(&space_info->lock); 1590 - 1591 1551 /* We could have been granted before we got here. */ 1592 - if (ticket->bytes == 0) { 1593 - spin_unlock(&space_info->lock); 1552 + if (is_ticket_served(ticket)) 1594 1553 return; 1595 - } 1596 1554 1555 + spin_lock(&space_info->lock); 1597 1556 while (!space_info->full) { 1598 1557 spin_unlock(&space_info->lock); 1599 - flush_space(fs_info, space_info, U64_MAX, ALLOC_CHUNK_FORCE, false); 1600 - spin_lock(&space_info->lock); 1601 - if (ticket->bytes == 0) { 1602 - spin_unlock(&space_info->lock); 1558 + flush_space(space_info, U64_MAX, ALLOC_CHUNK_FORCE, false); 1559 + if (is_ticket_served(ticket)) 1603 1560 return; 1604 - } 1561 + spin_lock(&space_info->lock); 1605 1562 } 1606 1563 1607 - ticket->error = -ENOSPC; 1608 - remove_ticket(space_info, ticket); 1609 - btrfs_try_granting_tickets(fs_info, space_info); 1564 + remove_ticket(space_info, ticket, -ENOSPC); 1565 + btrfs_try_granting_tickets(space_info); 1610 1566 spin_unlock(&space_info->lock); 1611 1567 } 1612 1568 ··· 1605 1581 1606 1582 { 1607 1583 DEFINE_WAIT(wait); 1608 - int ret = 0; 1609 1584 1610 - spin_lock(&space_info->lock); 1585 + spin_lock(&ticket->lock); 1611 1586 while (ticket->bytes > 0 && ticket->error == 0) { 1587 + int ret; 1588 + 1612 1589 ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE); 1590 + spin_unlock(&ticket->lock); 1613 1591 if (ret) { 1614 1592 /* 1615 1593 * Delete us from the list. After we unlock the space ··· 1621 1595 * despite getting an error, resulting in a space leak 1622 1596 * (bytes_may_use counter of our space_info). 1623 1597 */ 1624 - remove_ticket(space_info, ticket); 1625 - ticket->error = -EINTR; 1626 - break; 1598 + spin_lock(&space_info->lock); 1599 + remove_ticket(space_info, ticket, -EINTR); 1600 + spin_unlock(&space_info->lock); 1601 + return; 1627 1602 } 1628 - spin_unlock(&space_info->lock); 1629 1603 1630 1604 schedule(); 1631 1605 1632 1606 finish_wait(&ticket->wait, &wait); 1633 - spin_lock(&space_info->lock); 1607 + spin_lock(&ticket->lock); 1634 1608 } 1635 - spin_unlock(&space_info->lock); 1609 + spin_unlock(&ticket->lock); 1636 1610 } 1637 1611 1638 1612 /* 1639 1613 * Do the appropriate flushing and waiting for a ticket. 1640 1614 * 1641 - * @fs_info: the filesystem 1642 1615 * @space_info: space info for the reservation 1643 1616 * @ticket: ticket for the reservation 1644 1617 * @start_ns: timestamp when the reservation started ··· 1647 1622 * This does the work of figuring out how to flush for the ticket, waiting for 1648 1623 * the reservation, and returning the appropriate error if there is one. 1649 1624 */ 1650 - static int handle_reserve_ticket(struct btrfs_fs_info *fs_info, 1651 - struct btrfs_space_info *space_info, 1625 + static int handle_reserve_ticket(struct btrfs_space_info *space_info, 1652 1626 struct reserve_ticket *ticket, 1653 1627 u64 start_ns, u64 orig_bytes, 1654 1628 enum btrfs_reserve_flush_enum flush) ··· 1661 1637 wait_reserve_ticket(space_info, ticket); 1662 1638 break; 1663 1639 case BTRFS_RESERVE_FLUSH_LIMIT: 1664 - priority_reclaim_metadata_space(fs_info, space_info, ticket, 1640 + priority_reclaim_metadata_space(space_info, ticket, 1665 1641 priority_flush_states, 1666 1642 ARRAY_SIZE(priority_flush_states)); 1667 1643 break; 1668 1644 case BTRFS_RESERVE_FLUSH_EVICT: 1669 - priority_reclaim_metadata_space(fs_info, space_info, ticket, 1645 + priority_reclaim_metadata_space(space_info, ticket, 1670 1646 evict_flush_states, 1671 1647 ARRAY_SIZE(evict_flush_states)); 1672 1648 break; 1673 1649 case BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE: 1674 - priority_reclaim_data_space(fs_info, space_info, ticket); 1650 + priority_reclaim_data_space(space_info, ticket); 1675 1651 break; 1676 1652 default: 1677 - ASSERT(0); 1653 + ASSERT(0, "flush=%d", flush); 1678 1654 break; 1679 1655 } 1680 1656 ··· 1686 1662 * releasing reserved space (if an error happens the expectation is that 1687 1663 * space wasn't reserved at all). 1688 1664 */ 1689 - ASSERT(!(ticket->bytes == 0 && ticket->error)); 1690 - trace_btrfs_reserve_ticket(fs_info, space_info->flags, orig_bytes, 1691 - start_ns, flush, ticket->error); 1665 + ASSERT(!(ticket->bytes == 0 && ticket->error), 1666 + "ticket->bytes=%llu ticket->error=%d", ticket->bytes, ticket->error); 1667 + trace_btrfs_reserve_ticket(space_info->fs_info, space_info->flags, 1668 + orig_bytes, start_ns, flush, ticket->error); 1692 1669 return ret; 1693 1670 } 1694 1671 ··· 1703 1678 (flush == BTRFS_RESERVE_FLUSH_ALL_STEAL); 1704 1679 } 1705 1680 1706 - static inline void maybe_clamp_preempt(struct btrfs_fs_info *fs_info, 1707 - struct btrfs_space_info *space_info) 1681 + static inline void maybe_clamp_preempt(struct btrfs_space_info *space_info) 1708 1682 { 1683 + struct btrfs_fs_info *fs_info = space_info->fs_info; 1709 1684 u64 ordered = percpu_counter_sum_positive(&fs_info->ordered_bytes); 1710 1685 u64 delalloc = percpu_counter_sum_positive(&fs_info->delalloc_bytes); 1711 1686 ··· 1740 1715 /* 1741 1716 * Try to reserve bytes from the block_rsv's space. 1742 1717 * 1743 - * @fs_info: the filesystem 1744 1718 * @space_info: space info we want to allocate from 1745 1719 * @orig_bytes: number of bytes we want 1746 1720 * @flush: whether or not we can flush to make our reservation ··· 1751 1727 * regain reservations will be made and this will fail if there is not enough 1752 1728 * space already. 1753 1729 */ 1754 - static int __reserve_bytes(struct btrfs_fs_info *fs_info, 1755 - struct btrfs_space_info *space_info, u64 orig_bytes, 1756 - enum btrfs_reserve_flush_enum flush) 1730 + static int reserve_bytes(struct btrfs_space_info *space_info, u64 orig_bytes, 1731 + enum btrfs_reserve_flush_enum flush) 1757 1732 { 1733 + struct btrfs_fs_info *fs_info = space_info->fs_info; 1758 1734 struct work_struct *async_work; 1759 1735 struct reserve_ticket ticket; 1760 1736 u64 start_ns = 0; ··· 1762 1738 int ret = -ENOSPC; 1763 1739 bool pending_tickets; 1764 1740 1765 - ASSERT(orig_bytes); 1741 + ASSERT(orig_bytes, "orig_bytes=%llu", orig_bytes); 1766 1742 /* 1767 1743 * If have a transaction handle (current->journal_info != NULL), then 1768 1744 * the flush method can not be neither BTRFS_RESERVE_FLUSH_ALL* nor ··· 1771 1747 */ 1772 1748 if (current->journal_info) { 1773 1749 /* One assert per line for easier debugging. */ 1774 - ASSERT(flush != BTRFS_RESERVE_FLUSH_ALL); 1775 - ASSERT(flush != BTRFS_RESERVE_FLUSH_ALL_STEAL); 1776 - ASSERT(flush != BTRFS_RESERVE_FLUSH_EVICT); 1750 + ASSERT(flush != BTRFS_RESERVE_FLUSH_ALL, "flush=%d", flush); 1751 + ASSERT(flush != BTRFS_RESERVE_FLUSH_ALL_STEAL, "flush=%d", flush); 1752 + ASSERT(flush != BTRFS_RESERVE_FLUSH_EVICT, "flush=%d", flush); 1777 1753 } 1778 1754 1779 1755 if (flush == BTRFS_RESERVE_FLUSH_DATA) ··· 1801 1777 */ 1802 1778 if (!pending_tickets && 1803 1779 ((used + orig_bytes <= space_info->total_bytes) || 1804 - btrfs_can_overcommit(fs_info, space_info, orig_bytes, flush))) { 1780 + can_overcommit(space_info, used, orig_bytes, flush))) { 1805 1781 btrfs_space_info_update_bytes_may_use(space_info, orig_bytes); 1806 1782 ret = 0; 1807 1783 } ··· 1812 1788 * left to allocate for the block. 1813 1789 */ 1814 1790 if (ret && unlikely(flush == BTRFS_RESERVE_FLUSH_EMERGENCY)) { 1815 - used = btrfs_space_info_used(space_info, false); 1791 + used -= space_info->bytes_may_use; 1816 1792 if (used + orig_bytes <= space_info->total_bytes) { 1817 1793 btrfs_space_info_update_bytes_may_use(space_info, orig_bytes); 1818 1794 ret = 0; ··· 1831 1807 ticket.error = 0; 1832 1808 space_info->reclaim_size += ticket.bytes; 1833 1809 init_waitqueue_head(&ticket.wait); 1810 + spin_lock_init(&ticket.lock); 1834 1811 ticket.steal = can_steal(flush); 1835 1812 if (trace_btrfs_reserve_ticket_enabled()) 1836 1813 start_ns = ktime_get_ns(); ··· 1848 1823 * preemptive flushing in order to keep up with 1849 1824 * the workload. 1850 1825 */ 1851 - maybe_clamp_preempt(fs_info, space_info); 1826 + maybe_clamp_preempt(space_info); 1852 1827 1853 - space_info->flush = 1; 1828 + space_info->flush = true; 1854 1829 trace_btrfs_trigger_flush(fs_info, 1855 1830 space_info->flags, 1856 1831 orig_bytes, flush, ··· 1869 1844 */ 1870 1845 if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags) && 1871 1846 !work_busy(&fs_info->preempt_reclaim_work) && 1872 - need_preemptive_reclaim(fs_info, space_info)) { 1847 + need_preemptive_reclaim(space_info)) { 1873 1848 trace_btrfs_trigger_flush(fs_info, space_info->flags, 1874 1849 orig_bytes, flush, "preempt"); 1875 1850 queue_work(system_dfl_wq, ··· 1880 1855 if (!ret || !can_ticket(flush)) 1881 1856 return ret; 1882 1857 1883 - return handle_reserve_ticket(fs_info, space_info, &ticket, start_ns, 1884 - orig_bytes, flush); 1858 + return handle_reserve_ticket(space_info, &ticket, start_ns, orig_bytes, flush); 1885 1859 } 1886 1860 1887 1861 /* 1888 1862 * Try to reserve metadata bytes from the block_rsv's space. 1889 1863 * 1890 - * @fs_info: the filesystem 1891 1864 * @space_info: the space_info we're allocating for 1892 1865 * @orig_bytes: number of bytes we want 1893 1866 * @flush: whether or not we can flush to make our reservation ··· 1897 1874 * regain reservations will be made and this will fail if there is not enough 1898 1875 * space already. 1899 1876 */ 1900 - int btrfs_reserve_metadata_bytes(struct btrfs_fs_info *fs_info, 1901 - struct btrfs_space_info *space_info, 1877 + int btrfs_reserve_metadata_bytes(struct btrfs_space_info *space_info, 1902 1878 u64 orig_bytes, 1903 1879 enum btrfs_reserve_flush_enum flush) 1904 1880 { 1905 1881 int ret; 1906 1882 1907 - ret = __reserve_bytes(fs_info, space_info, orig_bytes, flush); 1883 + ret = reserve_bytes(space_info, orig_bytes, flush); 1908 1884 if (ret == -ENOSPC) { 1885 + struct btrfs_fs_info *fs_info = space_info->fs_info; 1886 + 1909 1887 trace_btrfs_space_reservation(fs_info, "space_info:enospc", 1910 1888 space_info->flags, orig_bytes, 1); 1911 1889 1912 1890 if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) 1913 - btrfs_dump_space_info(fs_info, space_info, orig_bytes, false); 1891 + btrfs_dump_space_info(space_info, orig_bytes, false); 1914 1892 } 1915 1893 return ret; 1916 1894 } ··· 1919 1895 /* 1920 1896 * Try to reserve data bytes for an allocation. 1921 1897 * 1922 - * @fs_info: the filesystem 1898 + * @space_info: the space_info we're allocating for 1923 1899 * @bytes: number of bytes we need 1924 1900 * @flush: how we are allowed to flush 1925 1901 * ··· 1934 1910 1935 1911 ASSERT(flush == BTRFS_RESERVE_FLUSH_DATA || 1936 1912 flush == BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE || 1937 - flush == BTRFS_RESERVE_NO_FLUSH); 1938 - ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_DATA); 1913 + flush == BTRFS_RESERVE_NO_FLUSH, "flush=%d", flush); 1914 + ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_DATA, 1915 + "current->journal_info=0x%lx flush=%d", 1916 + (unsigned long)current->journal_info, flush); 1939 1917 1940 - ret = __reserve_bytes(fs_info, space_info, bytes, flush); 1918 + ret = reserve_bytes(space_info, bytes, flush); 1941 1919 if (ret == -ENOSPC) { 1942 1920 trace_btrfs_space_reservation(fs_info, "space_info:enospc", 1943 1921 space_info->flags, bytes, 1); 1944 1922 if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) 1945 - btrfs_dump_space_info(fs_info, space_info, bytes, false); 1923 + btrfs_dump_space_info(space_info, bytes, false); 1946 1924 } 1947 1925 return ret; 1948 1926 } ··· 1957 1931 btrfs_info(fs_info, "dumping space info:"); 1958 1932 list_for_each_entry(space_info, &fs_info->space_info, list) { 1959 1933 spin_lock(&space_info->lock); 1960 - __btrfs_dump_space_info(fs_info, space_info); 1934 + __btrfs_dump_space_info(space_info); 1961 1935 spin_unlock(&space_info->lock); 1962 1936 } 1963 1937 dump_global_block_rsv(fs_info); ··· 1974 1948 int factor; 1975 1949 1976 1950 /* It's df, we don't care if it's racy */ 1977 - if (list_empty(&sinfo->ro_bgs)) 1951 + if (data_race(list_empty(&sinfo->ro_bgs))) 1978 1952 return 0; 1979 1953 1980 1954 spin_lock(&sinfo->lock); ··· 2213 2187 global_rsv->reserved += to_add; 2214 2188 btrfs_space_info_update_bytes_may_use(space_info, to_add); 2215 2189 if (global_rsv->reserved >= global_rsv->size) 2216 - global_rsv->full = 1; 2190 + global_rsv->full = true; 2217 2191 len -= to_add; 2218 2192 } 2219 2193 spin_unlock(&global_rsv->lock); ··· 2221 2195 grant: 2222 2196 /* Add to any tickets we may have. */ 2223 2197 if (len) 2224 - btrfs_try_granting_tickets(fs_info, space_info); 2198 + btrfs_try_granting_tickets(space_info); 2225 2199 }

+20 -23

fs/btrfs/space-info.h

··· 142 142 flushing. The value is >> clamp, so turns 143 143 out to be a 2^clamp divisor. */ 144 144 145 - unsigned int full:1; /* indicates that we cannot allocate any more 145 + bool full; /* indicates that we cannot allocate any more 146 146 chunks for this space */ 147 - unsigned int chunk_alloc:1; /* set if we are allocating a chunk */ 147 + bool chunk_alloc; /* set if we are allocating a chunk */ 148 148 149 - unsigned int flush:1; /* set if we are trying to make space */ 149 + bool flush; /* set if we are trying to make space */ 150 150 151 151 unsigned int force_alloc; /* set if we need to force a chunk 152 152 alloc for this space */ ··· 224 224 s64 reclaimable_bytes; 225 225 }; 226 226 227 - struct reserve_ticket { 228 - u64 bytes; 229 - int error; 230 - bool steal; 231 - struct list_head list; 232 - wait_queue_head_t wait; 233 - }; 234 - 235 227 static inline bool btrfs_mixed_space_info(const struct btrfs_space_info *space_info) 236 228 { 237 229 return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) && ··· 258 266 DECLARE_SPACE_INFO_UPDATE(bytes_pinned, "pinned"); 259 267 DECLARE_SPACE_INFO_UPDATE(bytes_zone_unusable, "zone_unusable"); 260 268 269 + static inline u64 btrfs_space_info_used(const struct btrfs_space_info *s_info, 270 + bool may_use_included) 271 + { 272 + lockdep_assert_held(&s_info->lock); 273 + 274 + return s_info->bytes_used + s_info->bytes_reserved + 275 + s_info->bytes_pinned + s_info->bytes_readonly + 276 + s_info->bytes_zone_unusable + 277 + (may_use_included ? s_info->bytes_may_use : 0); 278 + } 279 + 261 280 int btrfs_init_space_info(struct btrfs_fs_info *fs_info); 262 281 void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info, 263 282 struct btrfs_block_group *block_group); ··· 276 273 u64 chunk_size); 277 274 struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info, 278 275 u64 flags); 279 - u64 __pure btrfs_space_info_used(const struct btrfs_space_info *s_info, 280 - bool may_use_included); 281 276 void btrfs_clear_space_info_full(struct btrfs_fs_info *info); 282 - void btrfs_dump_space_info(struct btrfs_fs_info *fs_info, 283 - struct btrfs_space_info *info, u64 bytes, 277 + void btrfs_dump_space_info(struct btrfs_space_info *info, u64 bytes, 284 278 bool dump_block_groups); 285 - int btrfs_reserve_metadata_bytes(struct btrfs_fs_info *fs_info, 286 - struct btrfs_space_info *space_info, 279 + int btrfs_reserve_metadata_bytes(struct btrfs_space_info *space_info, 287 280 u64 orig_bytes, 288 281 enum btrfs_reserve_flush_enum flush); 289 - void btrfs_try_granting_tickets(struct btrfs_fs_info *fs_info, 290 - struct btrfs_space_info *space_info); 291 - int btrfs_can_overcommit(struct btrfs_fs_info *fs_info, 292 - const struct btrfs_space_info *space_info, u64 bytes, 293 - enum btrfs_reserve_flush_enum flush); 282 + void btrfs_try_granting_tickets(struct btrfs_space_info *space_info); 283 + bool btrfs_can_overcommit(const struct btrfs_space_info *space_info, u64 bytes, 284 + enum btrfs_reserve_flush_enum flush); 294 285 295 286 static inline void btrfs_space_info_free_bytes_may_use( 296 287 struct btrfs_space_info *space_info, ··· 292 295 { 293 296 spin_lock(&space_info->lock); 294 297 btrfs_space_info_update_bytes_may_use(space_info, -num_bytes); 295 - btrfs_try_granting_tickets(space_info->fs_info, space_info); 298 + btrfs_try_granting_tickets(space_info); 296 299 spin_unlock(&space_info->lock); 297 300 } 298 301 int btrfs_reserve_data_bytes(struct btrfs_space_info *space_info, u64 bytes,

+27 -40

fs/btrfs/subpage.c

··· 180 180 /* Basic checks */ 181 181 ASSERT(folio_test_private(folio) && folio_get_private(folio)); 182 182 ASSERT(IS_ALIGNED(start, fs_info->sectorsize) && 183 - IS_ALIGNED(len, fs_info->sectorsize)); 183 + IS_ALIGNED(len, fs_info->sectorsize), "start=%llu len=%u", start, len); 184 184 /* 185 185 * The range check only works for mapped page, we can still have 186 186 * unmapped page like dummy extent buffer pages. ··· 195 195 #define subpage_calc_start_bit(fs_info, folio, name, start, len) \ 196 196 ({ \ 197 197 unsigned int __start_bit; \ 198 - const unsigned int blocks_per_folio = \ 199 - btrfs_blocks_per_folio(fs_info, folio); \ 198 + const unsigned int __bpf = btrfs_blocks_per_folio(fs_info, folio); \ 200 199 \ 201 200 btrfs_subpage_assert(fs_info, folio, start, len); \ 202 201 __start_bit = offset_in_folio(folio, start) >> fs_info->sectorsize_bits; \ 203 - __start_bit += blocks_per_folio * btrfs_bitmap_nr_##name; \ 202 + __start_bit += __bpf * btrfs_bitmap_nr_##name; \ 204 203 __start_bit; \ 205 204 }) 206 205 ··· 250 251 clear_bit(bit, bfs->bitmaps); 251 252 cleared++; 252 253 } 253 - ASSERT(atomic_read(&bfs->nr_locked) >= cleared); 254 + ASSERT(atomic_read(&bfs->nr_locked) >= cleared, 255 + "atomic_read(&bfs->nr_locked)=%d cleared=%d", 256 + atomic_read(&bfs->nr_locked), cleared); 254 257 last = atomic_sub_and_test(cleared, &bfs->nr_locked); 255 258 spin_unlock_irqrestore(&bfs->lock, flags); 256 259 return last; ··· 331 330 if (test_and_clear_bit(bit + start_bit, bfs->bitmaps)) 332 331 cleared++; 333 332 } 334 - ASSERT(atomic_read(&bfs->nr_locked) >= cleared); 333 + ASSERT(atomic_read(&bfs->nr_locked) >= cleared, 334 + "atomic_read(&bfs->nr_locked)=%d cleared=%d", 335 + atomic_read(&bfs->nr_locked), cleared); 335 336 last = atomic_sub_and_test(cleared, &bfs->nr_locked); 336 337 spin_unlock_irqrestore(&bfs->lock, flags); 337 338 if (last) ··· 342 339 343 340 #define subpage_test_bitmap_all_set(fs_info, folio, name) \ 344 341 ({ \ 345 - struct btrfs_folio_state *bfs = folio_get_private(folio); \ 346 - const unsigned int blocks_per_folio = \ 347 - btrfs_blocks_per_folio(fs_info, folio); \ 342 + struct btrfs_folio_state *__bfs = folio_get_private(folio); \ 343 + const unsigned int __bpf = btrfs_blocks_per_folio(fs_info, folio); \ 348 344 \ 349 - bitmap_test_range_all_set(bfs->bitmaps, \ 350 - blocks_per_folio * btrfs_bitmap_nr_##name, \ 351 - blocks_per_folio); \ 345 + bitmap_test_range_all_set(__bfs->bitmaps, \ 346 + __bpf * btrfs_bitmap_nr_##name, __bpf); \ 352 347 }) 353 348 354 349 #define subpage_test_bitmap_all_zero(fs_info, folio, name) \ 355 350 ({ \ 356 - struct btrfs_folio_state *bfs = folio_get_private(folio); \ 357 - const unsigned int blocks_per_folio = \ 358 - btrfs_blocks_per_folio(fs_info, folio); \ 351 + struct btrfs_folio_state *__bfs = folio_get_private(folio); \ 352 + const unsigned int __bpf = btrfs_blocks_per_folio(fs_info, folio); \ 359 353 \ 360 - bitmap_test_range_all_zero(bfs->bitmaps, \ 361 - blocks_per_folio * btrfs_bitmap_nr_##name, \ 362 - blocks_per_folio); \ 354 + bitmap_test_range_all_zero(__bfs->bitmaps, \ 355 + __bpf * btrfs_bitmap_nr_##name, __bpf); \ 363 356 }) 364 357 365 358 void btrfs_subpage_set_uptodate(const struct btrfs_fs_info *fs_info, ··· 445 446 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 446 447 writeback, start, len); 447 448 unsigned long flags; 449 + bool keep_write; 448 450 449 451 spin_lock_irqsave(&bfs->lock, flags); 450 452 bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); ··· 456 456 * assume writeback is complete, and exit too early — violating sync 457 457 * ordering guarantees. 458 458 */ 459 + keep_write = folio_test_dirty(folio); 459 460 if (!folio_test_writeback(folio)) 460 - __folio_start_writeback(folio, true); 461 - if (!folio_test_dirty(folio)) { 462 - struct address_space *mapping = folio_mapping(folio); 463 - XA_STATE(xas, &mapping->i_pages, folio->index); 464 - unsigned long flags; 465 - 466 - xas_lock_irqsave(&xas, flags); 467 - xas_load(&xas); 468 - xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE); 469 - xas_unlock_irqrestore(&xas, flags); 470 - } 461 + __folio_start_writeback(folio, keep_write); 471 462 spin_unlock_irqrestore(&bfs->lock, flags); 472 463 } 473 464 ··· 664 673 665 674 #define GET_SUBPAGE_BITMAP(fs_info, folio, name, dst) \ 666 675 { \ 667 - const unsigned int blocks_per_folio = \ 668 - btrfs_blocks_per_folio(fs_info, folio); \ 669 - const struct btrfs_folio_state *bfs = folio_get_private(folio); \ 676 + const unsigned int __bpf = btrfs_blocks_per_folio(fs_info, folio); \ 677 + const struct btrfs_folio_state *__bfs = folio_get_private(folio); \ 670 678 \ 671 - ASSERT(blocks_per_folio <= BITS_PER_LONG); \ 672 - *dst = bitmap_read(bfs->bitmaps, \ 673 - blocks_per_folio * btrfs_bitmap_nr_##name, \ 674 - blocks_per_folio); \ 679 + ASSERT(__bpf <= BITS_PER_LONG); \ 680 + *dst = bitmap_read(__bfs->bitmaps, \ 681 + __bpf * btrfs_bitmap_nr_##name, __bpf); \ 675 682 } 676 683 677 684 #define SUBPAGE_DUMP_BITMAP(fs_info, folio, name, start, len) \ 678 685 { \ 679 686 unsigned long bitmap; \ 680 - const unsigned int blocks_per_folio = \ 681 - btrfs_blocks_per_folio(fs_info, folio); \ 687 + const unsigned int __bpf = btrfs_blocks_per_folio(fs_info, folio); \ 682 688 \ 683 689 GET_SUBPAGE_BITMAP(fs_info, folio, name, &bitmap); \ 684 690 btrfs_warn(fs_info, \ 685 691 "dumping bitmap start=%llu len=%u folio=%llu " #name "_bitmap=%*pbl", \ 686 - start, len, folio_pos(folio), \ 687 - blocks_per_folio, &bitmap); \ 692 + start, len, folio_pos(folio), __bpf, &bitmap); \ 688 693 } 689 694 690 695 /*

-1

fs/btrfs/subpage.h

··· 7 7 #include <linux/atomic.h> 8 8 #include <linux/sizes.h> 9 9 #include "btrfs_inode.h" 10 - #include "fs.h" 11 10 12 11 struct address_space; 13 12 struct folio;

+68 -9

fs/btrfs/super.c

··· 807 807 struct btrfs_root_ref *root_ref; 808 808 struct btrfs_inode_ref *inode_ref; 809 809 struct btrfs_key key; 810 - struct btrfs_path *path = NULL; 810 + BTRFS_PATH_AUTO_FREE(path); 811 811 char *name = NULL, *ptr; 812 812 u64 dirid; 813 813 int len; 814 814 int ret; 815 815 816 816 path = btrfs_alloc_path(); 817 - if (!path) { 818 - ret = -ENOMEM; 819 - goto err; 820 - } 817 + if (!path) 818 + return ERR_PTR(-ENOMEM); 821 819 822 820 name = kmalloc(PATH_MAX, GFP_KERNEL); 823 821 if (!name) { ··· 903 905 fs_root = NULL; 904 906 } 905 907 906 - btrfs_free_path(path); 907 908 if (ptr == name + PATH_MAX - 1) { 908 909 name[0] = '/'; 909 910 name[1] = '\0'; ··· 913 916 914 917 err: 915 918 btrfs_put_root(fs_root); 916 - btrfs_free_path(path); 917 919 kfree(name); 918 920 return ERR_PTR(ret); 919 921 } ··· 1610 1614 static inline int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info, 1611 1615 u64 *free_bytes) 1612 1616 { 1613 - struct btrfs_device_info *devices_info; 1617 + struct btrfs_device_info AUTO_KFREE(devices_info); 1614 1618 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; 1615 1619 struct btrfs_device *device; 1616 1620 u64 type; ··· 1708 1712 nr_devices--; 1709 1713 } 1710 1714 1711 - kfree(devices_info); 1712 1715 *free_bytes = avail_space; 1713 1716 return 0; 1714 1717 } ··· 2425 2430 return 0; 2426 2431 } 2427 2432 2433 + #ifdef CONFIG_BTRFS_EXPERIMENTAL 2434 + static int btrfs_remove_bdev(struct super_block *sb, struct block_device *bdev) 2435 + { 2436 + struct btrfs_fs_info *fs_info = btrfs_sb(sb); 2437 + struct btrfs_device *device; 2438 + struct btrfs_dev_lookup_args lookup_args = { .devt = bdev->bd_dev }; 2439 + bool can_rw; 2440 + 2441 + mutex_lock(&fs_info->fs_devices->device_list_mutex); 2442 + device = btrfs_find_device(fs_info->fs_devices, &lookup_args); 2443 + if (!device) { 2444 + mutex_unlock(&fs_info->fs_devices->device_list_mutex); 2445 + /* Device not found, should not affect the running fs, just give a warning. */ 2446 + btrfs_warn(fs_info, "unable to find btrfs device for block device '%pg'", bdev); 2447 + return 0; 2448 + } 2449 + /* 2450 + * The to-be-removed device is already missing? 2451 + * 2452 + * That's weird but no special handling needed and can exit right now. 2453 + */ 2454 + if (unlikely(test_and_set_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))) { 2455 + mutex_unlock(&fs_info->fs_devices->device_list_mutex); 2456 + btrfs_warn(fs_info, "btrfs device id %llu is already missing", device->devid); 2457 + return 0; 2458 + } 2459 + 2460 + device->fs_devices->missing_devices++; 2461 + if (test_and_clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) { 2462 + list_del_init(&device->dev_alloc_list); 2463 + WARN_ON(device->fs_devices->rw_devices < 1); 2464 + device->fs_devices->rw_devices--; 2465 + } 2466 + can_rw = btrfs_check_rw_degradable(fs_info, device); 2467 + mutex_unlock(&fs_info->fs_devices->device_list_mutex); 2468 + /* 2469 + * Now device is considered missing, btrfs_device_name() won't give a 2470 + * meaningful result anymore, so only output the devid. 2471 + */ 2472 + if (unlikely(!can_rw)) { 2473 + btrfs_crit(fs_info, 2474 + "btrfs device id %llu has gone missing, can not maintain read-write", 2475 + device->devid); 2476 + return -EIO; 2477 + } 2478 + btrfs_warn(fs_info, 2479 + "btrfs device id %llu has gone missing, continue as degraded", 2480 + device->devid); 2481 + btrfs_set_opt(fs_info->mount_opt, DEGRADED); 2482 + return 0; 2483 + } 2484 + 2485 + static void btrfs_shutdown(struct super_block *sb) 2486 + { 2487 + struct btrfs_fs_info *fs_info = btrfs_sb(sb); 2488 + 2489 + btrfs_force_shutdown(fs_info); 2490 + } 2491 + #endif 2492 + 2428 2493 static const struct super_operations btrfs_super_ops = { 2429 2494 .drop_inode = btrfs_drop_inode, 2430 2495 .evict_inode = btrfs_evict_inode, ··· 2500 2445 .unfreeze_fs = btrfs_unfreeze, 2501 2446 .nr_cached_objects = btrfs_nr_cached_objects, 2502 2447 .free_cached_objects = btrfs_free_cached_objects, 2448 + #ifdef CONFIG_BTRFS_EXPERIMENTAL 2449 + .remove_bdev = btrfs_remove_bdev, 2450 + .shutdown = btrfs_shutdown, 2451 + #endif 2503 2452 }; 2504 2453 2505 2454 static const struct file_operations btrfs_ctl_fops = {

+55 -3

fs/btrfs/sysfs.c

··· 10 10 #include <linux/completion.h> 11 11 #include <linux/bug.h> 12 12 #include <linux/list.h> 13 + #include <linux/string_choices.h> 13 14 #include <crypto/hash.h> 14 15 #include "messages.h" 15 16 #include "ctree.h" ··· 26 25 #include "misc.h" 27 26 #include "fs.h" 28 27 #include "accessors.h" 28 + #include "zoned.h" 29 29 30 30 /* 31 31 * Structure name Path ··· 1189 1187 } 1190 1188 BTRFS_ATTR_RW(, commit_stats, btrfs_commit_stats_show, btrfs_commit_stats_store); 1191 1189 1190 + static ssize_t btrfs_zoned_stats_show(struct kobject *kobj, 1191 + struct kobj_attribute *a, char *buf) 1192 + { 1193 + struct btrfs_fs_info *fs_info = to_fs_info(kobj); 1194 + struct btrfs_block_group *bg; 1195 + size_t ret = 0; 1196 + 1197 + 1198 + if (!btrfs_is_zoned(fs_info)) 1199 + return ret; 1200 + 1201 + spin_lock(&fs_info->zone_active_bgs_lock); 1202 + ret += sysfs_emit_at(buf, ret, "active block-groups: %zu\n", 1203 + list_count_nodes(&fs_info->zone_active_bgs)); 1204 + spin_unlock(&fs_info->zone_active_bgs_lock); 1205 + 1206 + mutex_lock(&fs_info->reclaim_bgs_lock); 1207 + spin_lock(&fs_info->unused_bgs_lock); 1208 + ret += sysfs_emit_at(buf, ret, "\treclaimable: %zu\n", 1209 + list_count_nodes(&fs_info->reclaim_bgs)); 1210 + ret += sysfs_emit_at(buf, ret, "\tunused: %zu\n", 1211 + list_count_nodes(&fs_info->unused_bgs)); 1212 + spin_unlock(&fs_info->unused_bgs_lock); 1213 + mutex_unlock(&fs_info->reclaim_bgs_lock); 1214 + 1215 + ret += sysfs_emit_at(buf, ret, "\tneed reclaim: %s\n", 1216 + str_true_false(btrfs_zoned_should_reclaim(fs_info))); 1217 + 1218 + if (fs_info->data_reloc_bg) 1219 + ret += sysfs_emit_at(buf, ret, 1220 + "data relocation block-group: %llu\n", 1221 + fs_info->data_reloc_bg); 1222 + if (fs_info->treelog_bg) 1223 + ret += sysfs_emit_at(buf, ret, 1224 + "tree-log block-group: %llu\n", 1225 + fs_info->treelog_bg); 1226 + 1227 + spin_lock(&fs_info->zone_active_bgs_lock); 1228 + ret += sysfs_emit_at(buf, ret, "active zones:\n"); 1229 + list_for_each_entry(bg, &fs_info->zone_active_bgs, active_bg_list) { 1230 + ret += sysfs_emit_at(buf, ret, 1231 + "\tstart: %llu, wp: %llu used: %llu, reserved: %llu, unusable: %llu\n", 1232 + bg->start, bg->alloc_offset, bg->used, 1233 + bg->reserved, bg->zone_unusable); 1234 + } 1235 + spin_unlock(&fs_info->zone_active_bgs_lock); 1236 + return ret; 1237 + } 1238 + BTRFS_ATTR(, zoned_stats, btrfs_zoned_stats_show); 1239 + 1192 1240 static ssize_t btrfs_clone_alignment_show(struct kobject *kobj, 1193 1241 struct kobj_attribute *a, char *buf) 1194 1242 { ··· 1651 1599 BTRFS_ATTR_PTR(, bg_reclaim_threshold), 1652 1600 BTRFS_ATTR_PTR(, commit_stats), 1653 1601 BTRFS_ATTR_PTR(, temp_fsid), 1602 + BTRFS_ATTR_PTR(, zoned_stats), 1654 1603 #ifdef CONFIG_BTRFS_EXPERIMENTAL 1655 1604 BTRFS_ATTR_PTR(, offload_csum), 1656 1605 #endif ··· 2034 1981 * Create a sysfs entry for a space info type at path 2035 1982 * /sys/fs/btrfs/UUID/allocation/TYPE 2036 1983 */ 2037 - int btrfs_sysfs_add_space_info_type(struct btrfs_fs_info *fs_info, 2038 - struct btrfs_space_info *space_info) 1984 + int btrfs_sysfs_add_space_info_type(struct btrfs_space_info *space_info) 2039 1985 { 2040 1986 int ret; 2041 1987 2042 1988 ret = kobject_init_and_add(&space_info->kobj, &space_info_ktype, 2043 - fs_info->space_info_kobj, "%s", 1989 + space_info->fs_info->space_info_kobj, "%s", 2044 1990 alloc_name(space_info)); 2045 1991 if (ret) { 2046 1992 kobject_put(&space_info->kobj);

+1 -2

fs/btrfs/sysfs.h

··· 37 37 int btrfs_sysfs_add_mounted(struct btrfs_fs_info *fs_info); 38 38 void btrfs_sysfs_remove_mounted(struct btrfs_fs_info *fs_info); 39 39 void btrfs_sysfs_add_block_group_type(struct btrfs_block_group *cache); 40 - int btrfs_sysfs_add_space_info_type(struct btrfs_fs_info *fs_info, 41 - struct btrfs_space_info *space_info); 40 + int btrfs_sysfs_add_space_info_type(struct btrfs_space_info *space_info); 42 41 void btrfs_sysfs_remove_space_info(struct btrfs_space_info *space_info); 43 42 void btrfs_sysfs_update_devid(struct btrfs_device *device); 44 43

+1 -2

fs/btrfs/tests/extent-io-tests.c

··· 505 505 static int test_eb_bitmaps(u32 sectorsize, u32 nodesize) 506 506 { 507 507 struct btrfs_fs_info *fs_info; 508 - unsigned long *bitmap = NULL; 508 + unsigned long AUTO_KFREE(bitmap); 509 509 struct extent_buffer *eb = NULL; 510 510 int ret; 511 511 ··· 551 551 ret = __test_eb_bitmaps(bitmap, eb); 552 552 out: 553 553 free_extent_buffer(eb); 554 - kfree(bitmap); 555 554 btrfs_free_dummy_fs_info(fs_info); 556 555 return ret; 557 556 }

+2 -4

fs/btrfs/tests/extent-map-tests.c

··· 1013 1013 struct rmap_test_vector *test) 1014 1014 { 1015 1015 struct btrfs_chunk_map *map; 1016 - u64 *logical = NULL; 1016 + u64 AUTO_KFREE(logical); 1017 1017 int i, out_ndaddrs, out_stripe_len; 1018 1018 int ret; 1019 1019 ··· 1046 1046 if (ret) { 1047 1047 test_err("error adding chunk map to mapping tree"); 1048 1048 btrfs_free_chunk_map(map); 1049 - goto out_free; 1049 + return ret; 1050 1050 } 1051 1051 1052 1052 ret = btrfs_rmap_block(fs_info, map->start, btrfs_sb_offset(1), ··· 1079 1079 ret = 0; 1080 1080 out: 1081 1081 btrfs_remove_chunk_map(fs_info, map); 1082 - out_free: 1083 - kfree(logical); 1084 1082 return ret; 1085 1083 } 1086 1084

+4 -12

fs/btrfs/tests/qgroup-tests.c

··· 20 20 struct btrfs_extent_item *item; 21 21 struct btrfs_extent_inline_ref *iref; 22 22 struct btrfs_tree_block_info *block_info; 23 - struct btrfs_path *path; 23 + BTRFS_PATH_AUTO_FREE(path); 24 24 struct extent_buffer *leaf; 25 25 struct btrfs_key ins; 26 26 u32 size = sizeof(*item) + sizeof(*iref) + sizeof(*block_info); ··· 41 41 ret = btrfs_insert_empty_item(&trans, root, path, &ins, size); 42 42 if (ret) { 43 43 test_err("couldn't insert ref %d", ret); 44 - btrfs_free_path(path); 45 44 return ret; 46 45 } 47 46 ··· 60 61 btrfs_set_extent_inline_ref_type(leaf, iref, BTRFS_TREE_BLOCK_REF_KEY); 61 62 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid); 62 63 } 63 - btrfs_free_path(path); 64 64 return 0; 65 65 } 66 66 ··· 68 70 { 69 71 struct btrfs_trans_handle trans; 70 72 struct btrfs_extent_item *item; 71 - struct btrfs_path *path; 73 + BTRFS_PATH_AUTO_FREE(path); 72 74 struct btrfs_key key; 73 75 u64 refs; 74 76 int ret; ··· 88 90 ret = btrfs_search_slot(&trans, root, &key, path, 0, 1); 89 91 if (ret) { 90 92 test_err("couldn't find extent ref"); 91 - btrfs_free_path(path); 92 93 return ret; 93 94 } 94 95 ··· 109 112 ret = btrfs_insert_empty_item(&trans, root, path, &key, 0); 110 113 if (ret) 111 114 test_err("failed to insert backref"); 112 - btrfs_free_path(path); 113 115 return ret; 114 116 } 115 117 ··· 117 121 { 118 122 struct btrfs_trans_handle trans; 119 123 struct btrfs_key key; 120 - struct btrfs_path *path; 124 + BTRFS_PATH_AUTO_FREE(path); 121 125 int ret; 122 126 123 127 btrfs_init_dummy_trans(&trans, NULL); ··· 135 139 ret = btrfs_search_slot(&trans, root, &key, path, -1, 1); 136 140 if (ret) { 137 141 test_err("didn't find our key %d", ret); 138 - btrfs_free_path(path); 139 142 return ret; 140 143 } 141 144 btrfs_del_item(&trans, root, path); 142 - btrfs_free_path(path); 143 145 return 0; 144 146 } 145 147 ··· 146 152 { 147 153 struct btrfs_trans_handle trans; 148 154 struct btrfs_extent_item *item; 149 - struct btrfs_path *path; 155 + BTRFS_PATH_AUTO_FREE(path); 150 156 struct btrfs_key key; 151 157 u64 refs; 152 158 int ret; ··· 166 172 ret = btrfs_search_slot(&trans, root, &key, path, 0, 1); 167 173 if (ret) { 168 174 test_err("couldn't find extent ref"); 169 - btrfs_free_path(path); 170 175 return ret; 171 176 } 172 177 ··· 191 198 return ret; 192 199 } 193 200 btrfs_del_item(&trans, root, path); 194 - btrfs_free_path(path); 195 201 return ret; 196 202 } 197 203

+33 -15

fs/btrfs/transaction.c

··· 32 32 #include "ioctl.h" 33 33 #include "relocation.h" 34 34 #include "scrub.h" 35 + #include "ordered-data.h" 36 + #include "delayed-inode.h" 35 37 36 38 static struct kmem_cache *btrfs_trans_handle_cachep; 37 39 ··· 140 138 141 139 void btrfs_put_transaction(struct btrfs_transaction *transaction) 142 140 { 143 - WARN_ON(refcount_read(&transaction->use_count) == 0); 144 141 if (refcount_dec_and_test(&transaction->use_count)) { 145 142 BUG_ON(!list_empty(&transaction->list)); 146 143 WARN_ON(!xa_empty(&transaction->delayed_refs.head_refs)); ··· 186 185 * At this point no one can be using this transaction to modify any tree 187 186 * and no one can start another transaction to modify any tree either. 188 187 */ 189 - ASSERT(cur_trans->state == TRANS_STATE_COMMIT_DOING); 188 + ASSERT(cur_trans->state == TRANS_STATE_COMMIT_DOING, 189 + "cur_trans->state=%d", cur_trans->state); 190 190 191 191 down_write(&fs_info->commit_root_sem); 192 192 ··· 577 575 * We want to reserve all the bytes we may need all at once, so we only 578 576 * do 1 enospc flushing cycle per transaction start. 579 577 */ 580 - ret = btrfs_reserve_metadata_bytes(fs_info, si, bytes, flush); 578 + ret = btrfs_reserve_metadata_bytes(si, bytes, flush); 581 579 582 580 /* 583 581 * If we are an emergency flush, which can steal from the global block ··· 587 585 if (ret && flush == BTRFS_RESERVE_FLUSH_ALL_STEAL) { 588 586 bytes -= *delayed_refs_bytes; 589 587 *delayed_refs_bytes = 0; 590 - ret = btrfs_reserve_metadata_bytes(fs_info, si, bytes, flush); 588 + ret = btrfs_reserve_metadata_bytes(si, bytes, flush); 591 589 } 592 590 593 591 return ret; ··· 1026 1024 struct btrfs_fs_info *fs_info = trans->fs_info; 1027 1025 1028 1026 if (!trans->block_rsv) { 1029 - ASSERT(!trans->bytes_reserved); 1030 - ASSERT(!trans->delayed_refs_bytes_reserved); 1027 + ASSERT(trans->bytes_reserved == 0, 1028 + "trans->bytes_reserved=%llu", trans->bytes_reserved); 1029 + ASSERT(trans->delayed_refs_bytes_reserved == 0, 1030 + "trans->delayed_refs_bytes_reserved=%llu", 1031 + trans->delayed_refs_bytes_reserved); 1031 1032 return; 1032 1033 } 1033 1034 1034 1035 if (!trans->bytes_reserved) { 1035 - ASSERT(!trans->delayed_refs_bytes_reserved); 1036 + ASSERT(trans->delayed_refs_bytes_reserved == 0, 1037 + "trans->delayed_refs_bytes_reserved=%llu", 1038 + trans->delayed_refs_bytes_reserved); 1036 1039 return; 1037 1040 } 1038 1041 ··· 1236 1229 bool errors = false; 1237 1230 int ret; 1238 1231 1239 - ASSERT(btrfs_root_id(log_root) == BTRFS_TREE_LOG_OBJECTID); 1232 + ASSERT(btrfs_root_id(log_root) == BTRFS_TREE_LOG_OBJECTID, 1233 + "root_id(log_root)=%llu", btrfs_root_id(log_root)); 1240 1234 1241 1235 ret = __btrfs_wait_marked_extents(fs_info, dirty_pages); 1242 1236 if ((mark & EXTENT_DIRTY_LOG1) && ··· 1342 1334 * At this point no one can be using this transaction to modify any tree 1343 1335 * and no one can start another transaction to modify any tree either. 1344 1336 */ 1345 - ASSERT(trans->transaction->state == TRANS_STATE_COMMIT_DOING); 1337 + ASSERT(trans->transaction->state == TRANS_STATE_COMMIT_DOING, 1338 + "trans->transaction->state=%d", trans->transaction->state); 1346 1339 1347 1340 eb = btrfs_lock_root_node(fs_info->tree_root); 1348 1341 ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, ··· 1477 1468 * At this point no one can be using this transaction to modify any tree 1478 1469 * and no one can start another transaction to modify any tree either. 1479 1470 */ 1480 - ASSERT(trans->transaction->state == TRANS_STATE_COMMIT_DOING); 1471 + ASSERT(trans->transaction->state == TRANS_STATE_COMMIT_DOING, 1472 + "trans->transaction->state=%d", trans->transaction->state); 1481 1473 1482 1474 spin_lock(&fs_info->fs_roots_radix_lock); 1483 1475 while (1) { ··· 1496 1486 * At this point we can neither have tasks logging inodes 1497 1487 * from a root nor trying to commit a log tree. 1498 1488 */ 1499 - ASSERT(atomic_read(&root->log_writers) == 0); 1500 - ASSERT(atomic_read(&root->log_commit[0]) == 0); 1501 - ASSERT(atomic_read(&root->log_commit[1]) == 0); 1489 + ASSERT(atomic_read(&root->log_writers) == 0, 1490 + "atomic_read(&root->log_writers)=%d", 1491 + atomic_read(&root->log_writers)); 1492 + ASSERT(atomic_read(&root->log_commit[0]) == 0, 1493 + "atomic_read(&root->log_commit[0])=%d", 1494 + atomic_read(&root->log_commit[0])); 1495 + ASSERT(atomic_read(&root->log_commit[1]) == 0, 1496 + "atomic_read(&root->log_commit[1])=%d", 1497 + atomic_read(&root->log_commit[1])); 1502 1498 1503 1499 radix_tree_tag_clear(&fs_info->fs_roots_radix, 1504 1500 (unsigned long)btrfs_root_id(root), ··· 2173 2157 return; 2174 2158 2175 2159 lockdep_assert_held(&trans->fs_info->trans_lock); 2176 - ASSERT(cur_trans->state >= TRANS_STATE_COMMIT_PREP); 2160 + ASSERT(cur_trans->state >= TRANS_STATE_COMMIT_PREP, 2161 + "cur_trans->state=%d", cur_trans->state); 2177 2162 2178 2163 list_add(&trans->pending_snapshot->list, &cur_trans->pending_snapshots); 2179 2164 } ··· 2201 2184 struct btrfs_transaction *prev_trans = NULL; 2202 2185 int ret; 2203 2186 2204 - ASSERT(refcount_read(&trans->use_count) == 1); 2187 + ASSERT(refcount_read(&trans->use_count) == 1, 2188 + "refcount_read(&trans->use_count)=%d", refcount_read(&trans->use_count)); 2205 2189 btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_PREP); 2206 2190 2207 2191 clear_bit(BTRFS_FS_NEED_TRANS_COMMIT, &fs_info->flags);

-4

fs/btrfs/transaction.h

··· 14 14 #include <linux/wait.h> 15 15 #include "btrfs_inode.h" 16 16 #include "delayed-ref.h" 17 - #include "extent-io-tree.h" 18 - #include "block-rsv.h" 19 - #include "messages.h" 20 - #include "misc.h" 21 17 22 18 struct dentry; 23 19 struct inode;

+10 -13

fs/btrfs/tree-checker.c

··· 186 186 key->type == BTRFS_INODE_EXTREF_KEY || 187 187 key->type == BTRFS_DIR_INDEX_KEY || 188 188 key->type == BTRFS_DIR_ITEM_KEY || 189 - key->type == BTRFS_EXTENT_DATA_KEY); 189 + key->type == BTRFS_EXTENT_DATA_KEY, "key->type=%u", key->type); 190 190 191 191 /* 192 192 * Only subvolume trees along with their reloc trees need this check. ··· 1618 1618 1619 1619 if (unlikely(prev_end > key->objectid)) { 1620 1620 extent_err(leaf, slot, 1621 - "previous extent [%llu %u %llu] overlaps current extent [%llu %u %llu]", 1622 - prev_key->objectid, prev_key->type, 1623 - prev_key->offset, key->objectid, key->type, 1624 - key->offset); 1621 + "previous extent " BTRFS_KEY_FMT " overlaps current extent " BTRFS_KEY_FMT, 1622 + BTRFS_KEY_FMT_VALUE(prev_key), 1623 + BTRFS_KEY_FMT_VALUE(key)); 1625 1624 return -EUCLEAN; 1626 1625 } 1627 1626 } ··· 2059 2060 /* Make sure the keys are in the right order */ 2060 2061 if (unlikely(btrfs_comp_cpu_keys(&prev_key, &key) >= 0)) { 2061 2062 generic_err(leaf, slot, 2062 - "bad key order, prev (%llu %u %llu) current (%llu %u %llu)", 2063 - prev_key.objectid, prev_key.type, 2064 - prev_key.offset, key.objectid, key.type, 2065 - key.offset); 2063 + "bad key order, prev " BTRFS_KEY_FMT " current " BTRFS_KEY_FMT, 2064 + BTRFS_KEY_FMT_VALUE(&prev_key), 2065 + BTRFS_KEY_FMT_VALUE(&key)); 2066 2066 return BTRFS_TREE_BLOCK_BAD_KEY_ORDER; 2067 2067 } 2068 2068 ··· 2179 2181 2180 2182 if (unlikely(btrfs_comp_cpu_keys(&key, &next_key) >= 0)) { 2181 2183 generic_err(node, slot, 2182 - "bad key order, current (%llu %u %llu) next (%llu %u %llu)", 2183 - key.objectid, key.type, key.offset, 2184 - next_key.objectid, next_key.type, 2185 - next_key.offset); 2184 + "bad key order, current " BTRFS_KEY_FMT " next " BTRFS_KEY_FMT, 2185 + BTRFS_KEY_FMT_VALUE(&key), 2186 + BTRFS_KEY_FMT_VALUE(&next_key)); 2186 2187 return BTRFS_TREE_BLOCK_BAD_KEY_ORDER; 2187 2188 } 2188 2189 }

+89 -94

fs/btrfs/tree-log.c

··· 29 29 #include "orphan.h" 30 30 #include "print-tree.h" 31 31 #include "tree-checker.h" 32 + #include "delayed-inode.h" 32 33 33 34 #define MAX_CONFLICT_INODES 10 34 35 ··· 199 198 200 199 if (wc->log_leaf) { 201 200 btrfs_crit(fs_info, 202 - "log tree (for root %llu) leaf currently being processed (slot %d key %llu %u %llu):", 201 + "log tree (for root %llu) leaf currently being processed (slot %d key " BTRFS_KEY_FMT "):", 203 202 btrfs_root_id(wc->root), wc->log_slot, 204 - wc->log_key.objectid, wc->log_key.type, wc->log_key.offset); 203 + BTRFS_KEY_FMT_VALUE(&wc->log_key)); 205 204 btrfs_print_leaf(wc->log_leaf); 206 205 } 207 206 ··· 263 262 struct btrfs_inode *inode; 264 263 265 264 /* Only meant to be called for subvolume roots and not for log roots. */ 266 - ASSERT(btrfs_is_fstree(btrfs_root_id(root))); 265 + ASSERT(btrfs_is_fstree(btrfs_root_id(root)), "root_id=%llu", btrfs_root_id(root)); 267 266 268 267 /* 269 268 * We're holding a transaction handle whether we are logging or ··· 502 501 * the leaf before writing into the log tree. See the comments at 503 502 * copy_items() for more details. 504 503 */ 505 - ASSERT(btrfs_root_id(root) != BTRFS_TREE_LOG_OBJECTID); 504 + ASSERT(btrfs_root_id(root) != BTRFS_TREE_LOG_OBJECTID, "root_id=%llu", btrfs_root_id(root)); 506 505 507 506 item_size = btrfs_item_size(wc->log_leaf, wc->log_slot); 508 507 src_ptr = btrfs_item_ptr_offset(wc->log_leaf, wc->log_slot); ··· 511 510 ret = btrfs_search_slot(NULL, root, &wc->log_key, wc->subvol_path, 0, 0); 512 511 if (ret < 0) { 513 512 btrfs_abort_log_replay(wc, ret, 514 - "failed to search subvolume tree for key (%llu %u %llu) root %llu", 515 - wc->log_key.objectid, wc->log_key.type, 516 - wc->log_key.offset, btrfs_root_id(root)); 513 + "failed to search subvolume tree for key " BTRFS_KEY_FMT " root %llu", 514 + BTRFS_KEY_FMT_VALUE(&wc->log_key), 515 + btrfs_root_id(root)); 517 516 return ret; 518 517 } 519 518 ··· 602 601 insert: 603 602 btrfs_release_path(wc->subvol_path); 604 603 /* try to insert the key into the destination tree */ 605 - wc->subvol_path->skip_release_on_error = 1; 604 + wc->subvol_path->skip_release_on_error = true; 606 605 ret = btrfs_insert_empty_item(trans, root, wc->subvol_path, &wc->log_key, item_size); 607 - wc->subvol_path->skip_release_on_error = 0; 606 + wc->subvol_path->skip_release_on_error = false; 608 607 609 608 dst_eb = wc->subvol_path->nodes[0]; 610 609 dst_slot = wc->subvol_path->slots[0]; ··· 619 618 btrfs_extend_item(trans, wc->subvol_path, item_size - found_size); 620 619 } else if (ret) { 621 620 btrfs_abort_log_replay(wc, ret, 622 - "failed to insert item for key (%llu %u %llu)", 623 - wc->log_key.objectid, wc->log_key.type, 624 - wc->log_key.offset); 621 + "failed to insert item for key " BTRFS_KEY_FMT, 622 + BTRFS_KEY_FMT_VALUE(&wc->log_key)); 625 623 return ret; 626 624 } 627 625 dst_ptr = btrfs_item_ptr_offset(dst_eb, dst_slot); ··· 829 829 &wc->log_key, sizeof(*item)); 830 830 if (ret) { 831 831 btrfs_abort_log_replay(wc, ret, 832 - "failed to insert item with key (%llu %u %llu) root %llu", 833 - wc->log_key.objectid, wc->log_key.type, 834 - wc->log_key.offset, btrfs_root_id(root)); 832 + "failed to insert item with key " BTRFS_KEY_FMT " root %llu", 833 + BTRFS_KEY_FMT_VALUE(&wc->log_key), 834 + btrfs_root_id(root)); 835 835 goto out; 836 836 } 837 837 dest_offset = btrfs_item_ptr_offset(wc->subvol_path->nodes[0], ··· 1348 1348 ret = btrfs_search_slot(NULL, root, &search_key, wc->subvol_path, 0, 0); 1349 1349 if (ret < 0) { 1350 1350 btrfs_abort_log_replay(wc, ret, 1351 - "failed to search subvolume tree for key (%llu %u %llu) root %llu", 1352 - search_key.objectid, search_key.type, 1353 - search_key.offset, btrfs_root_id(root)); 1351 + "failed to search subvolume tree for key " BTRFS_KEY_FMT " root %llu", 1352 + BTRFS_KEY_FMT_VALUE(&search_key), 1353 + btrfs_root_id(root)); 1354 1354 return ret; 1355 1355 } else if (ret == 0) { 1356 1356 /* ··· 1483 1483 } 1484 1484 if (ret < 0) { 1485 1485 btrfs_abort_log_replay(wc, ret, 1486 - "failed to search subvolume tree for key (%llu %u %llu) root %llu", 1487 - wc->log_key.objectid, wc->log_key.type, 1488 - wc->log_key.offset, btrfs_root_id(root)); 1486 + "failed to search subvolume tree for key " BTRFS_KEY_FMT " root %llu", 1487 + BTRFS_KEY_FMT_VALUE(&wc->log_key), 1488 + btrfs_root_id(root)); 1489 1489 goto out; 1490 1490 } 1491 1491 ··· 2282 2282 struct btrfs_dir_item *di; 2283 2283 2284 2284 /* We only log dir index keys, which only contain a single dir item. */ 2285 - ASSERT(wc->log_key.type == BTRFS_DIR_INDEX_KEY); 2285 + ASSERT(wc->log_key.type == BTRFS_DIR_INDEX_KEY, 2286 + "wc->log_key.type=%u", wc->log_key.type); 2286 2287 2287 2288 di = btrfs_item_ptr(wc->log_leaf, wc->log_slot, struct btrfs_dir_item); 2288 2289 ret = replay_one_name(wc, di); ··· 2435 2434 * we need to do is process the dir index keys, we (and our caller) can 2436 2435 * safely ignore dir item keys (key type BTRFS_DIR_ITEM_KEY). 2437 2436 */ 2438 - ASSERT(dir_key->type == BTRFS_DIR_INDEX_KEY); 2437 + ASSERT(dir_key->type == BTRFS_DIR_INDEX_KEY, "dir_key->type=%u", dir_key->type); 2439 2438 2440 2439 eb = wc->subvol_path->nodes[0]; 2441 2440 slot = wc->subvol_path->slots[0]; ··· 2648 2647 int ret = 0; 2649 2648 struct btrfs_key dir_key; 2650 2649 struct btrfs_key found_key; 2651 - struct btrfs_path *log_path; 2650 + BTRFS_PATH_AUTO_FREE(log_path); 2652 2651 struct btrfs_inode *dir; 2653 2652 2654 2653 dir_key.objectid = dirid; ··· 2665 2664 * we replay the deletes before we copy in the inode item from the log. 2666 2665 */ 2667 2666 if (IS_ERR(dir)) { 2668 - btrfs_free_path(log_path); 2669 2667 ret = PTR_ERR(dir); 2670 2668 if (ret == -ENOENT) 2671 2669 ret = 0; ··· 2700 2700 wc->subvol_path, 0, 0); 2701 2701 if (ret < 0) { 2702 2702 btrfs_abort_log_replay(wc, ret, 2703 - "failed to search root %llu for key (%llu %u %llu)", 2703 + "failed to search root %llu for key " BTRFS_KEY_FMT, 2704 2704 btrfs_root_id(root), 2705 - dir_key.objectid, dir_key.type, 2706 - dir_key.offset); 2705 + BTRFS_KEY_FMT_VALUE(&dir_key)); 2707 2706 goto out; 2708 2707 } 2709 2708 ··· 2744 2745 ret = 0; 2745 2746 out: 2746 2747 btrfs_release_path(wc->subvol_path); 2747 - btrfs_free_path(log_path); 2748 2748 iput(&dir->vfs_inode); 2749 2749 return ret; 2750 2750 } ··· 3338 3340 mutex_unlock(&root->log_mutex); 3339 3341 return ctx->log_ret; 3340 3342 } 3341 - ASSERT(log_transid == root->log_transid); 3343 + ASSERT(log_transid == root->log_transid, 3344 + "log_transid=%d root->log_transid=%d", log_transid, root->log_transid); 3342 3345 atomic_set(&root->log_commit[index1], 1); 3343 3346 3344 3347 /* wait for previous tree log sync to complete */ ··· 3479 3480 ret = root_log_ctx.log_ret; 3480 3481 goto out; 3481 3482 } 3482 - ASSERT(root_log_ctx.log_transid == log_root_tree->log_transid); 3483 + ASSERT(root_log_ctx.log_transid == log_root_tree->log_transid, 3484 + "root_log_ctx.log_transid=%d log_root_tree->log_transid=%d", 3485 + root_log_ctx.log_transid, log_root_tree->log_transid); 3483 3486 atomic_set(&log_root_tree->log_commit[index2], 1); 3484 3487 3485 3488 if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) { ··· 3585 3584 * someone else already started it. We use <= and not < because the 3586 3585 * first log transaction has an ID of 0. 3587 3586 */ 3588 - ASSERT(btrfs_get_root_last_log_commit(root) <= log_transid); 3587 + ASSERT(btrfs_get_root_last_log_commit(root) <= log_transid, 3588 + "last_log_commit(root)=%d log_transid=%d", 3589 + btrfs_get_root_last_log_commit(root), log_transid); 3589 3590 btrfs_set_root_last_log_commit(root, log_transid); 3590 3591 3591 3592 out_wake_log_root: ··· 3898 3895 * or the entire directory. 3899 3896 */ 3900 3897 void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, 3901 - struct btrfs_root *root, 3902 3898 const struct fscrypt_str *name, 3903 3899 struct btrfs_inode *dir, u64 index) 3904 3900 { 3901 + struct btrfs_root *root = dir->root; 3905 3902 BTRFS_PATH_AUTO_FREE(path); 3906 3903 int ret; 3907 3904 ··· 3936 3933 3937 3934 /* see comments for btrfs_del_dir_entries_in_log */ 3938 3935 void btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, 3939 - struct btrfs_root *root, 3940 3936 const struct fscrypt_str *name, 3941 - struct btrfs_inode *inode, u64 dirid) 3937 + struct btrfs_inode *inode, 3938 + struct btrfs_inode *dir) 3942 3939 { 3943 - struct btrfs_root *log; 3940 + struct btrfs_root *root = dir->root; 3944 3941 int ret; 3945 3942 3946 3943 ret = inode_logged(trans, inode, NULL); ··· 3955 3952 ASSERT(ret == 0, "join_running_log_trans() ret=%d", ret); 3956 3953 if (WARN_ON(ret)) 3957 3954 return; 3958 - log = root->log_root; 3959 3955 mutex_lock(&inode->log_mutex); 3960 3956 3961 - ret = btrfs_del_inode_ref(trans, log, name, btrfs_ino(inode), dirid, NULL); 3957 + ret = btrfs_del_inode_ref(trans, root->log_root, name, btrfs_ino(inode), 3958 + btrfs_ino(dir), NULL); 3962 3959 mutex_unlock(&inode->log_mutex); 3963 3960 if (ret < 0 && ret != -ENOENT) 3964 3961 btrfs_set_log_full_commit(trans); ··· 4020 4017 int count) 4021 4018 { 4022 4019 struct btrfs_root *log = inode->root->log_root; 4023 - char *ins_data = NULL; 4020 + char AUTO_KFREE(ins_data); 4024 4021 struct btrfs_item_batch batch; 4025 4022 struct extent_buffer *dst; 4026 4023 unsigned long src_offset; ··· 4031 4028 int ret; 4032 4029 int i; 4033 4030 4034 - ASSERT(count > 0); 4031 + ASSERT(count > 0, "count=%d", count); 4035 4032 batch.nr = count; 4036 4033 4037 4034 if (count == 1) { ··· 4065 4062 4066 4063 ret = btrfs_insert_empty_items(trans, log, dst_path, &batch); 4067 4064 if (ret) 4068 - goto out; 4065 + return ret; 4069 4066 4070 4067 dst = dst_path->nodes[0]; 4071 4068 /* ··· 4084 4081 btrfs_release_path(dst_path); 4085 4082 4086 4083 last_index = batch.keys[count - 1].offset; 4087 - ASSERT(last_index > inode->last_dir_index_offset); 4084 + ASSERT(last_index > inode->last_dir_index_offset, 4085 + "last_index=%llu inode->last_dir_index_offset=%llu", 4086 + last_index, inode->last_dir_index_offset); 4088 4087 4089 4088 /* 4090 4089 * If for some unexpected reason the last item's index is not greater ··· 4099 4094 4100 4095 if (btrfs_get_first_dir_index_to_log(inode) == 0) 4101 4096 btrfs_set_first_dir_index_to_log(inode, batch.keys[0].offset); 4102 - out: 4103 - kfree(ins_data); 4104 4097 4105 4098 return ret; 4106 4099 } ··· 4157 4154 for (int i = path->slots[0]; i < nritems; i++) { 4158 4155 struct btrfs_dir_item *di; 4159 4156 struct btrfs_key key; 4160 - int ret; 4161 4157 4162 4158 btrfs_item_key_to_cpu(src, &key, i); 4163 4159 ··· 4226 4224 } 4227 4225 4228 4226 if (batch_size > 0) { 4229 - int ret; 4230 - 4231 4227 ret = flush_dir_items_batch(trans, inode, src, dst_path, 4232 4228 batch_start, batch_size); 4233 4229 if (ret < 0) ··· 4410 4410 * change in the current transaction), then we don't need to log 4411 4411 * a range, last_old_dentry_offset is == to last_offset. 4412 4412 */ 4413 - ASSERT(last_old_dentry_offset <= last_offset); 4413 + ASSERT(last_old_dentry_offset <= last_offset, 4414 + "last_old_dentry_offset=%llu last_offset=%llu", 4415 + last_old_dentry_offset, last_offset); 4414 4416 if (last_old_dentry_offset < last_offset) 4415 4417 ret = insert_dir_log_key(trans, log, path, ino, 4416 4418 last_old_dentry_offset + 1, ··· 4767 4765 struct btrfs_key *ins_keys; 4768 4766 u32 *ins_sizes; 4769 4767 struct btrfs_item_batch batch; 4770 - char *ins_data; 4768 + char AUTO_KFREE(ins_data); 4771 4769 int dst_index; 4772 4770 const bool skip_csum = (inode->flags & BTRFS_INODE_NODATASUM); 4773 4771 const u64 i_size = i_size_read(&inode->vfs_inode); ··· 4895 4893 disk_bytenr + extent_num_bytes - 1, 4896 4894 &ordered_sums, false); 4897 4895 if (ret < 0) 4898 - goto out; 4896 + return ret; 4899 4897 ret = 0; 4900 4898 4901 4899 list_for_each_entry_safe(sums, sums_next, &ordered_sums, list) { ··· 4905 4903 kfree(sums); 4906 4904 } 4907 4905 if (ret) 4908 - goto out; 4906 + return ret; 4909 4907 4910 4908 add_to_batch: 4911 4909 ins_sizes[dst_index] = btrfs_item_size(src, src_slot); ··· 4919 4917 * so we don't need to do anything. 4920 4918 */ 4921 4919 if (batch.nr == 0) 4922 - goto out; 4920 + return 0; 4923 4921 4924 4922 ret = btrfs_insert_empty_items(trans, log, dst_path, &batch); 4925 4923 if (ret) 4926 - goto out; 4924 + return ret; 4927 4925 4928 4926 dst_index = 0; 4929 4927 for (int i = 0; i < nr; i++) { ··· 4976 4974 } 4977 4975 4978 4976 btrfs_release_path(dst_path); 4979 - out: 4980 - kfree(ins_data); 4981 4977 4982 4978 return ret; 4983 4979 } ··· 5414 5414 set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags); 5415 5415 5416 5416 if (!test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) { 5417 - spin_lock_irq(&inode->ordered_tree_lock); 5417 + spin_lock(&inode->ordered_tree_lock); 5418 5418 if (!test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) { 5419 5419 set_bit(BTRFS_ORDERED_PENDING, &ordered->flags); 5420 5420 atomic_inc(&trans->transaction->pending_ordered); 5421 5421 } 5422 - spin_unlock_irq(&inode->ordered_tree_lock); 5422 + spin_unlock(&inode->ordered_tree_lock); 5423 5423 } 5424 5424 btrfs_put_ordered_extent(ordered); 5425 5425 } ··· 5694 5694 struct btrfs_inode *inode, 5695 5695 u64 *other_ino, u64 *other_parent) 5696 5696 { 5697 - int ret; 5698 5697 BTRFS_PATH_AUTO_FREE(search_path); 5699 - char *name = NULL; 5698 + char AUTO_KFREE(name); 5700 5699 u32 name_len = 0; 5701 5700 u32 item_size = btrfs_item_size(eb, slot); 5702 5701 u32 cur_offset = 0; ··· 5704 5705 search_path = btrfs_alloc_path(); 5705 5706 if (!search_path) 5706 5707 return -ENOMEM; 5707 - search_path->search_commit_root = 1; 5708 - search_path->skip_locking = 1; 5708 + search_path->search_commit_root = true; 5709 + search_path->skip_locking = true; 5709 5710 5710 5711 while (cur_offset < item_size) { 5711 5712 u64 parent; ··· 5738 5739 char *new_name; 5739 5740 5740 5741 new_name = krealloc(name, this_name_len, GFP_NOFS); 5741 - if (!new_name) { 5742 - ret = -ENOMEM; 5743 - goto out; 5744 - } 5742 + if (!new_name) 5743 + return -ENOMEM; 5745 5744 name_len = this_name_len; 5746 5745 name = new_name; 5747 5746 } ··· 5757 5760 di, &di_key); 5758 5761 if (di_key.type == BTRFS_INODE_ITEM_KEY) { 5759 5762 if (di_key.objectid != key->objectid) { 5760 - ret = 1; 5761 5763 *other_ino = di_key.objectid; 5762 5764 *other_parent = parent; 5765 + return 1; 5763 5766 } else { 5764 - ret = 0; 5767 + return 0; 5765 5768 } 5766 5769 } else { 5767 - ret = -EAGAIN; 5770 + return -EAGAIN; 5768 5771 } 5769 - goto out; 5770 5772 } else if (IS_ERR(di)) { 5771 - ret = PTR_ERR(di); 5772 - goto out; 5773 + return PTR_ERR(di); 5773 5774 } 5774 5775 btrfs_release_path(search_path); 5775 5776 5776 5777 cur_offset += this_len; 5777 5778 } 5778 - ret = 0; 5779 - out: 5780 - kfree(name); 5781 - return ret; 5779 + 5780 + return 0; 5782 5781 } 5783 5782 5784 5783 /* ··· 6024 6031 key.type = BTRFS_INODE_ITEM_KEY; 6025 6032 key.offset = 0; 6026 6033 6027 - path->search_commit_root = 1; 6028 - path->skip_locking = 1; 6034 + path->search_commit_root = true; 6035 + path->skip_locking = true; 6029 6036 6030 6037 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 6031 6038 if (WARN_ON_ONCE(ret > 0)) { ··· 6045 6052 } 6046 6053 6047 6054 btrfs_release_path(path); 6048 - path->search_commit_root = 0; 6049 - path->skip_locking = 0; 6055 + path->search_commit_root = false; 6056 + path->skip_locking = false; 6050 6057 6051 6058 return ret; 6052 6059 } ··· 6536 6543 curr = list_next_entry(curr, log_list); 6537 6544 } 6538 6545 6539 - ASSERT(batch.nr >= 1); 6546 + ASSERT(batch.nr >= 1, "batch.nr=%d", batch.nr); 6540 6547 ret = insert_delayed_items_batch(trans, log, path, &batch, first); 6541 6548 6542 6549 curr = list_last_entry(delayed_ins_list, struct btrfs_delayed_item, ··· 6580 6587 } 6581 6588 6582 6589 last_dir_index = curr->index; 6583 - ASSERT(last_dir_index >= first_dir_index); 6590 + ASSERT(last_dir_index >= first_dir_index, 6591 + "last_dir_index=%llu first_dir_index=%llu", 6592 + last_dir_index, first_dir_index); 6584 6593 6585 6594 ret = insert_dir_log_key(trans, inode->root->log_root, path, 6586 6595 ino, first_dir_index, last_dir_index); ··· 6676 6681 goto next_batch; 6677 6682 6678 6683 last_dir_index = last->index; 6679 - ASSERT(last_dir_index >= first_dir_index); 6684 + ASSERT(last_dir_index >= first_dir_index, 6685 + "last_dir_index=%llu first_dir_index=%llu", 6686 + last_dir_index, first_dir_index); 6680 6687 /* 6681 6688 * If this range starts right after where the previous one ends, 6682 6689 * then we want to reuse the previous range item and change its ··· 6745 6748 */ 6746 6749 lockdep_assert_not_held(&inode->log_mutex); 6747 6750 6748 - ASSERT(!ctx->logging_new_delayed_dentries); 6751 + ASSERT(!ctx->logging_new_delayed_dentries, 6752 + "ctx->logging_new_delayed_dentries=%d", ctx->logging_new_delayed_dentries); 6749 6753 ctx->logging_new_delayed_dentries = true; 6750 6754 6751 6755 list_for_each_entry(item, delayed_ins_list, log_list) { ··· 7167 7169 path = btrfs_alloc_path(); 7168 7170 if (!path) 7169 7171 return -ENOMEM; 7170 - path->skip_locking = 1; 7171 - path->search_commit_root = 1; 7172 + path->skip_locking = true; 7173 + path->search_commit_root = true; 7172 7174 7173 7175 key.objectid = ino; 7174 7176 key.type = BTRFS_INODE_REF_KEY; ··· 7201 7203 item_size = btrfs_item_size(leaf, slot); 7202 7204 ptr = btrfs_item_ptr_offset(leaf, slot); 7203 7205 while (cur_offset < item_size) { 7204 - struct btrfs_key inode_key; 7206 + u64 dir_id; 7205 7207 struct btrfs_inode *dir_inode; 7206 - 7207 - inode_key.type = BTRFS_INODE_ITEM_KEY; 7208 - inode_key.offset = 0; 7209 7208 7210 7209 if (key.type == BTRFS_INODE_EXTREF_KEY) { 7211 7210 struct btrfs_inode_extref *extref; 7212 7211 7213 7212 extref = (struct btrfs_inode_extref *) 7214 7213 (ptr + cur_offset); 7215 - inode_key.objectid = btrfs_inode_extref_parent( 7216 - leaf, extref); 7214 + dir_id = btrfs_inode_extref_parent(leaf, extref); 7217 7215 cur_offset += sizeof(*extref); 7218 7216 cur_offset += btrfs_inode_extref_name_len(leaf, 7219 7217 extref); 7220 7218 } else { 7221 - inode_key.objectid = key.offset; 7219 + dir_id = key.offset; 7222 7220 cur_offset = item_size; 7223 7221 } 7224 7222 7225 - dir_inode = btrfs_iget_logging(inode_key.objectid, root); 7223 + dir_inode = btrfs_iget_logging(dir_id, root); 7226 7224 /* 7227 7225 * If the parent inode was deleted, return an error to 7228 7226 * fallback to a transaction commit. This is to prevent ··· 7959 7965 struct btrfs_path *path; 7960 7966 struct fscrypt_name fname; 7961 7967 7962 - ASSERT(old_dir_index >= BTRFS_DIR_START_INDEX); 7968 + ASSERT(old_dir_index >= BTRFS_DIR_START_INDEX, 7969 + "old_dir_index=%llu", old_dir_index); 7963 7970 7964 7971 ret = fscrypt_setup_filename(&old_dir->vfs_inode, 7965 7972 &old_dentry->d_name, 0, &fname);

+3 -5

fs/btrfs/tree-log.h

··· 8 8 9 9 #include <linux/list.h> 10 10 #include <linux/fs.h> 11 - #include "messages.h" 12 - #include "ctree.h" 11 + #include <linux/fscrypt.h> 13 12 #include "transaction.h" 14 13 15 14 struct inode; ··· 79 80 struct dentry *dentry, 80 81 struct btrfs_log_ctx *ctx); 81 82 void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, 82 - struct btrfs_root *root, 83 83 const struct fscrypt_str *name, 84 84 struct btrfs_inode *dir, u64 index); 85 85 void btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, 86 - struct btrfs_root *root, 87 86 const struct fscrypt_str *name, 88 - struct btrfs_inode *inode, u64 dirid); 87 + struct btrfs_inode *inode, 88 + struct btrfs_inode *dir); 89 89 void btrfs_end_log_trans(struct btrfs_root *root); 90 90 void btrfs_pin_log_trans(struct btrfs_root *root); 91 91 void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,

+43 -77

fs/btrfs/uuid-tree.c

··· 27 27 u8 type, u64 subid) 28 28 { 29 29 int ret; 30 - struct btrfs_path *path = NULL; 30 + BTRFS_PATH_AUTO_FREE(path); 31 31 struct extent_buffer *eb; 32 32 int slot; 33 33 u32 item_size; 34 34 unsigned long offset; 35 35 struct btrfs_key key; 36 36 37 - if (WARN_ON_ONCE(!uuid_root)) { 38 - ret = -ENOENT; 39 - goto out; 40 - } 37 + if (WARN_ON_ONCE(!uuid_root)) 38 + return -ENOENT; 41 39 42 40 path = btrfs_alloc_path(); 43 - if (!path) { 44 - ret = -ENOMEM; 45 - goto out; 46 - } 41 + if (!path) 42 + return -ENOMEM; 47 43 48 44 btrfs_uuid_to_key(uuid, type, &key); 49 45 ret = btrfs_search_slot(NULL, uuid_root, &key, path, 0, 0); 50 - if (ret < 0) { 51 - goto out; 52 - } else if (ret > 0) { 53 - ret = -ENOENT; 54 - goto out; 55 - } 46 + if (ret < 0) 47 + return ret; 48 + if (ret > 0) 49 + return -ENOENT; 56 50 57 51 eb = path->nodes[0]; 58 52 slot = path->slots[0]; ··· 58 64 btrfs_warn(uuid_root->fs_info, 59 65 "uuid item with illegal size %lu!", 60 66 (unsigned long)item_size); 61 - goto out; 67 + return ret; 62 68 } 63 69 while (item_size) { 64 70 __le64 data; ··· 72 78 item_size -= sizeof(data); 73 79 } 74 80 75 - out: 76 - btrfs_free_path(path); 77 81 return ret; 78 82 } 79 83 ··· 81 89 struct btrfs_fs_info *fs_info = trans->fs_info; 82 90 struct btrfs_root *uuid_root = fs_info->uuid_root; 83 91 int ret; 84 - struct btrfs_path *path = NULL; 92 + BTRFS_PATH_AUTO_FREE(path); 85 93 struct btrfs_key key; 86 94 struct extent_buffer *eb; 87 95 int slot; ··· 92 100 if (ret != -ENOENT) 93 101 return ret; 94 102 95 - if (WARN_ON_ONCE(!uuid_root)) { 96 - ret = -EINVAL; 97 - goto out; 98 - } 103 + if (WARN_ON_ONCE(!uuid_root)) 104 + return -EINVAL; 99 105 100 106 btrfs_uuid_to_key(uuid, type, &key); 101 107 102 108 path = btrfs_alloc_path(); 103 - if (!path) { 104 - ret = -ENOMEM; 105 - goto out; 106 - } 109 + if (!path) 110 + return -ENOMEM; 107 111 108 112 ret = btrfs_insert_empty_item(trans, uuid_root, path, &key, 109 113 sizeof(subid_le)); ··· 122 134 btrfs_warn(fs_info, 123 135 "insert uuid item failed %d (0x%016llx, 0x%016llx) type %u!", 124 136 ret, key.objectid, key.offset, type); 125 - goto out; 137 + return ret; 126 138 } 127 139 128 - ret = 0; 129 140 subid_le = cpu_to_le64(subid_cpu); 130 141 write_extent_buffer(eb, &subid_le, offset, sizeof(subid_le)); 131 - out: 132 - btrfs_free_path(path); 133 - return ret; 142 + return 0; 134 143 } 135 144 136 145 int btrfs_uuid_tree_remove(struct btrfs_trans_handle *trans, const u8 *uuid, u8 type, ··· 136 151 struct btrfs_fs_info *fs_info = trans->fs_info; 137 152 struct btrfs_root *uuid_root = fs_info->uuid_root; 138 153 int ret; 139 - struct btrfs_path *path = NULL; 154 + BTRFS_PATH_AUTO_FREE(path); 140 155 struct btrfs_key key; 141 156 struct extent_buffer *eb; 142 157 int slot; ··· 146 161 unsigned long move_src; 147 162 unsigned long move_len; 148 163 149 - if (WARN_ON_ONCE(!uuid_root)) { 150 - ret = -EINVAL; 151 - goto out; 152 - } 164 + if (WARN_ON_ONCE(!uuid_root)) 165 + return -EINVAL; 153 166 154 167 btrfs_uuid_to_key(uuid, type, &key); 155 168 156 169 path = btrfs_alloc_path(); 157 - if (!path) { 158 - ret = -ENOMEM; 159 - goto out; 160 - } 170 + if (!path) 171 + return -ENOMEM; 161 172 162 173 ret = btrfs_search_slot(trans, uuid_root, &key, path, -1, 1); 163 174 if (ret < 0) { 164 175 btrfs_warn(fs_info, "error %d while searching for uuid item!", 165 176 ret); 166 - goto out; 177 + return ret; 167 178 } 168 - if (ret > 0) { 169 - ret = -ENOENT; 170 - goto out; 171 - } 179 + if (ret > 0) 180 + return -ENOENT; 172 181 173 182 eb = path->nodes[0]; 174 183 slot = path->slots[0]; ··· 171 192 if (!IS_ALIGNED(item_size, sizeof(u64))) { 172 193 btrfs_warn(fs_info, "uuid item with illegal size %lu!", 173 194 (unsigned long)item_size); 174 - ret = -ENOENT; 175 - goto out; 195 + return -ENOENT; 176 196 } 177 197 while (item_size) { 178 198 __le64 read_subid; ··· 183 205 item_size -= sizeof(read_subid); 184 206 } 185 207 186 - if (!item_size) { 187 - ret = -ENOENT; 188 - goto out; 189 - } 208 + if (!item_size) 209 + return -ENOENT; 190 210 191 211 item_size = btrfs_item_size(eb, slot); 192 - if (item_size == sizeof(subid)) { 193 - ret = btrfs_del_item(trans, uuid_root, path); 194 - goto out; 195 - } 212 + if (item_size == sizeof(subid)) 213 + return btrfs_del_item(trans, uuid_root, path); 196 214 197 215 move_dst = offset; 198 216 move_src = offset + sizeof(subid); ··· 196 222 memmove_extent_buffer(eb, move_dst, move_src, move_len); 197 223 btrfs_truncate_item(trans, path, item_size - sizeof(subid), 1); 198 224 199 - out: 200 - btrfs_free_path(path); 201 - return ret; 225 + return 0; 202 226 } 203 227 204 228 static int btrfs_uuid_iter_rem(struct btrfs_root *uuid_root, u8 *uuid, u8 type, ··· 265 293 { 266 294 struct btrfs_root *root = fs_info->uuid_root; 267 295 struct btrfs_key key; 268 - struct btrfs_path *path; 296 + BTRFS_PATH_AUTO_FREE(path); 269 297 int ret = 0; 270 298 struct extent_buffer *leaf; 271 299 int slot; ··· 273 301 unsigned long offset; 274 302 275 303 path = btrfs_alloc_path(); 276 - if (!path) { 277 - ret = -ENOMEM; 278 - goto out; 279 - } 304 + if (!path) 305 + return -ENOMEM; 280 306 281 307 key.objectid = 0; 282 308 key.type = 0; ··· 282 312 283 313 again_search_slot: 284 314 ret = btrfs_search_forward(root, &key, path, BTRFS_OLDEST_GENERATION); 285 - if (ret) { 286 - if (ret > 0) 287 - ret = 0; 288 - goto out; 289 - } 315 + if (ret < 0) 316 + return ret; 317 + if (ret > 0) 318 + return 0; 290 319 291 320 while (1) { 292 - if (btrfs_fs_closing(fs_info)) { 293 - ret = -EINTR; 294 - goto out; 295 - } 321 + if (btrfs_fs_closing(fs_info)) 322 + return -EINTR; 323 + 296 324 cond_resched(); 297 325 leaf = path->nodes[0]; 298 326 slot = path->slots[0]; ··· 321 353 ret = btrfs_check_uuid_tree_entry(fs_info, uuid, 322 354 key.type, subid_cpu); 323 355 if (ret < 0) 324 - goto out; 356 + return ret; 325 357 if (ret > 0) { 326 358 btrfs_release_path(path); 327 359 ret = btrfs_uuid_iter_rem(root, uuid, key.type, ··· 337 369 goto again_search_slot; 338 370 } 339 371 if (ret < 0 && ret != -ENOENT) 340 - goto out; 372 + return ret; 341 373 key.offset++; 342 374 goto again_search_slot; 343 375 } ··· 354 386 break; 355 387 } 356 388 357 - out: 358 - btrfs_free_path(path); 359 389 return ret; 360 390 } 361 391

+13 -19

fs/btrfs/verity.c

··· 109 109 { 110 110 struct btrfs_trans_handle *trans; 111 111 struct btrfs_root *root = inode->root; 112 - struct btrfs_path *path; 112 + BTRFS_PATH_AUTO_FREE(path); 113 113 struct btrfs_key key; 114 114 int count = 0; 115 115 int ret; ··· 121 121 while (1) { 122 122 /* 1 for the item being dropped */ 123 123 trans = btrfs_start_transaction(root, 1); 124 - if (IS_ERR(trans)) { 125 - ret = PTR_ERR(trans); 126 - goto out; 127 - } 124 + if (IS_ERR(trans)) 125 + return PTR_ERR(trans); 128 126 129 127 /* 130 128 * Walk backwards through all the items until we find one that ··· 141 143 path->slots[0]--; 142 144 } else if (ret < 0) { 143 145 btrfs_end_transaction(trans); 144 - goto out; 146 + return ret; 145 147 } 146 148 147 149 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); ··· 159 161 ret = btrfs_del_items(trans, root, path, path->slots[0], 1); 160 162 if (ret) { 161 163 btrfs_end_transaction(trans); 162 - goto out; 164 + return ret; 163 165 } 164 166 count++; 165 167 btrfs_release_path(path); 166 168 btrfs_end_transaction(trans); 167 169 } 168 - ret = count; 169 170 btrfs_end_transaction(trans); 170 - out: 171 - btrfs_free_path(path); 172 - return ret; 171 + return count; 173 172 } 174 173 175 174 /* ··· 212 217 const char *src, u64 len) 213 218 { 214 219 struct btrfs_trans_handle *trans; 215 - struct btrfs_path *path; 220 + BTRFS_PATH_AUTO_FREE(path); 216 221 struct btrfs_root *root = inode->root; 217 222 struct extent_buffer *leaf; 218 223 struct btrfs_key key; ··· 228 233 while (len > 0) { 229 234 /* 1 for the new item being inserted */ 230 235 trans = btrfs_start_transaction(root, 1); 231 - if (IS_ERR(trans)) { 232 - ret = PTR_ERR(trans); 233 - break; 234 - } 236 + if (IS_ERR(trans)) 237 + return PTR_ERR(trans); 235 238 236 239 key.objectid = btrfs_ino(inode); 237 240 key.type = key_type; ··· 260 267 btrfs_end_transaction(trans); 261 268 } 262 269 263 - btrfs_free_path(path); 264 270 return ret; 265 271 } 266 272 ··· 288 296 static int read_key_bytes(struct btrfs_inode *inode, u8 key_type, u64 offset, 289 297 char *dest, u64 len, struct folio *dest_folio) 290 298 { 291 - struct btrfs_path *path; 299 + BTRFS_PATH_AUTO_FREE(path); 292 300 struct btrfs_root *root = inode->root; 293 301 struct extent_buffer *leaf; 294 302 struct btrfs_key key; ··· 396 404 } 397 405 } 398 406 out: 399 - btrfs_free_path(path); 400 407 if (!ret) 401 408 ret = copied; 402 409 return ret; ··· 577 586 int ret; 578 587 579 588 btrfs_assert_inode_locked(inode); 589 + 590 + if (IS_ENCRYPTED(&inode->vfs_inode)) 591 + return -EOPNOTSUPP; 580 592 581 593 if (test_bit(BTRFS_INODE_VERITY_IN_PROGRESS, &inode->runtime_flags)) 582 594 return -EBUSY;

+70 -129

fs/btrfs/volumes.c

··· 739 739 { 740 740 struct path old = { .mnt = NULL, .dentry = NULL }; 741 741 struct path new = { .mnt = NULL, .dentry = NULL }; 742 - char *old_path = NULL; 742 + char AUTO_KFREE(old_path); 743 743 bool is_same = false; 744 744 int ret; 745 745 ··· 765 765 if (path_equal(&old, &new)) 766 766 is_same = true; 767 767 out: 768 - kfree(old_path); 769 768 path_put(&old); 770 769 path_put(&new); 771 770 return is_same; ··· 1680 1681 struct btrfs_root *root = fs_info->dev_root; 1681 1682 struct btrfs_key key; 1682 1683 struct btrfs_dev_extent *dev_extent; 1683 - struct btrfs_path *path; 1684 + BTRFS_PATH_AUTO_FREE(path); 1684 1685 u64 search_start; 1685 1686 u64 hole_size; 1686 1687 u64 max_hole_start; ··· 1710 1711 } 1711 1712 1712 1713 path->reada = READA_FORWARD; 1713 - path->search_commit_root = 1; 1714 - path->skip_locking = 1; 1714 + path->search_commit_root = true; 1715 + path->skip_locking = true; 1715 1716 1716 1717 key.objectid = device->devid; 1717 1718 key.type = BTRFS_DEV_EXTENT_KEY; ··· 1811 1812 "max_hole_start=%llu max_hole_size=%llu search_end=%llu", 1812 1813 max_hole_start, max_hole_size, search_end); 1813 1814 out: 1814 - btrfs_free_path(path); 1815 1815 *start = max_hole_start; 1816 1816 if (len) 1817 1817 *len = max_hole_size; ··· 1824 1826 struct btrfs_fs_info *fs_info = device->fs_info; 1825 1827 struct btrfs_root *root = fs_info->dev_root; 1826 1828 int ret; 1827 - struct btrfs_path *path; 1829 + BTRFS_PATH_AUTO_FREE(path); 1828 1830 struct btrfs_key key; 1829 1831 struct btrfs_key found_key; 1830 1832 struct extent_buffer *leaf = NULL; ··· 1843 1845 ret = btrfs_previous_item(root, path, key.objectid, 1844 1846 BTRFS_DEV_EXTENT_KEY); 1845 1847 if (ret) 1846 - goto out; 1848 + return ret; 1847 1849 leaf = path->nodes[0]; 1848 1850 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 1849 1851 extent = btrfs_item_ptr(leaf, path->slots[0], ··· 1858 1860 extent = btrfs_item_ptr(leaf, path->slots[0], 1859 1861 struct btrfs_dev_extent); 1860 1862 } else { 1861 - goto out; 1863 + return ret; 1862 1864 } 1863 1865 1864 1866 *dev_extent_len = btrfs_dev_extent_length(leaf, extent); ··· 1866 1868 ret = btrfs_del_item(trans, root, path); 1867 1869 if (ret == 0) 1868 1870 set_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags); 1869 - out: 1870 - btrfs_free_path(path); 1871 1871 return ret; 1872 1872 } 1873 1873 ··· 1893 1897 int ret; 1894 1898 struct btrfs_key key; 1895 1899 struct btrfs_key found_key; 1896 - struct btrfs_path *path; 1900 + BTRFS_PATH_AUTO_FREE(path); 1897 1901 1898 1902 path = btrfs_alloc_path(); 1899 1903 if (!path) ··· 1905 1909 1906 1910 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0); 1907 1911 if (ret < 0) 1908 - goto error; 1912 + return ret; 1909 1913 1910 1914 if (unlikely(ret == 0)) { 1911 1915 /* Corruption */ 1912 1916 btrfs_err(fs_info, "corrupted chunk tree devid -1 matched"); 1913 - ret = -EUCLEAN; 1914 - goto error; 1917 + return -EUCLEAN; 1915 1918 } 1916 1919 1917 1920 ret = btrfs_previous_item(fs_info->chunk_root, path, ··· 1923 1928 path->slots[0]); 1924 1929 *devid_ret = found_key.offset + 1; 1925 1930 } 1926 - ret = 0; 1927 - error: 1928 - btrfs_free_path(path); 1929 - return ret; 1931 + return 0; 1930 1932 } 1931 1933 1932 1934 /* ··· 1934 1942 struct btrfs_device *device) 1935 1943 { 1936 1944 int ret; 1937 - struct btrfs_path *path; 1945 + BTRFS_PATH_AUTO_FREE(path); 1938 1946 struct btrfs_dev_item *dev_item; 1939 1947 struct extent_buffer *leaf; 1940 1948 struct btrfs_key key; ··· 1953 1961 &key, sizeof(*dev_item)); 1954 1962 btrfs_trans_release_chunk_metadata(trans); 1955 1963 if (ret) 1956 - goto out; 1964 + return ret; 1957 1965 1958 1966 leaf = path->nodes[0]; 1959 1967 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item); ··· 1979 1987 write_extent_buffer(leaf, trans->fs_info->fs_devices->metadata_uuid, 1980 1988 ptr, BTRFS_FSID_SIZE); 1981 1989 1982 - ret = 0; 1983 - out: 1984 - btrfs_free_path(path); 1985 - return ret; 1990 + return 0; 1986 1991 } 1987 1992 1988 1993 /* ··· 2003 2014 { 2004 2015 struct btrfs_root *root = device->fs_info->chunk_root; 2005 2016 int ret; 2006 - struct btrfs_path *path; 2017 + BTRFS_PATH_AUTO_FREE(path); 2007 2018 struct btrfs_key key; 2008 2019 2009 2020 path = btrfs_alloc_path(); ··· 2017 2028 btrfs_reserve_chunk_metadata(trans, false); 2018 2029 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 2019 2030 btrfs_trans_release_chunk_metadata(trans); 2020 - if (ret) { 2021 - if (ret > 0) 2022 - ret = -ENOENT; 2023 - goto out; 2024 - } 2031 + if (ret > 0) 2032 + return -ENOENT; 2033 + if (ret < 0) 2034 + return ret; 2025 2035 2026 - ret = btrfs_del_item(trans, root, path); 2027 - out: 2028 - btrfs_free_path(path); 2029 - return ret; 2036 + return btrfs_del_item(trans, root, path); 2030 2037 } 2031 2038 2032 2039 /* ··· 2608 2623 BTRFS_DEV_LOOKUP_ARGS(args); 2609 2624 struct btrfs_fs_info *fs_info = trans->fs_info; 2610 2625 struct btrfs_root *root = fs_info->chunk_root; 2611 - struct btrfs_path *path; 2626 + BTRFS_PATH_AUTO_FREE(path); 2612 2627 struct extent_buffer *leaf; 2613 2628 struct btrfs_dev_item *dev_item; 2614 2629 struct btrfs_device *device; ··· 2630 2645 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 2631 2646 btrfs_trans_release_chunk_metadata(trans); 2632 2647 if (ret < 0) 2633 - goto error; 2648 + return ret; 2634 2649 2635 2650 leaf = path->nodes[0]; 2636 2651 next_slot: ··· 2639 2654 if (ret > 0) 2640 2655 break; 2641 2656 if (ret < 0) 2642 - goto error; 2657 + return ret; 2643 2658 leaf = path->nodes[0]; 2644 2659 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 2645 2660 btrfs_release_path(path); ··· 2670 2685 path->slots[0]++; 2671 2686 goto next_slot; 2672 2687 } 2673 - ret = 0; 2674 - error: 2675 - btrfs_free_path(path); 2676 - return ret; 2688 + return 0; 2677 2689 } 2678 2690 2679 2691 int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path) ··· 2925 2943 struct btrfs_device *device) 2926 2944 { 2927 2945 int ret; 2928 - struct btrfs_path *path; 2946 + BTRFS_PATH_AUTO_FREE(path); 2929 2947 struct btrfs_root *root = device->fs_info->chunk_root; 2930 2948 struct btrfs_dev_item *dev_item; 2931 2949 struct extent_buffer *leaf; ··· 2941 2959 2942 2960 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 2943 2961 if (ret < 0) 2944 - goto out; 2962 + return ret; 2945 2963 2946 - if (ret > 0) { 2947 - ret = -ENOENT; 2948 - goto out; 2949 - } 2964 + if (ret > 0) 2965 + return -ENOENT; 2950 2966 2951 2967 leaf = path->nodes[0]; 2952 2968 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item); ··· 2958 2978 btrfs_device_get_disk_total_bytes(device)); 2959 2979 btrfs_set_device_bytes_used(leaf, dev_item, 2960 2980 btrfs_device_get_bytes_used(device)); 2961 - out: 2962 - btrfs_free_path(path); 2963 2981 return ret; 2964 2982 } 2965 2983 ··· 3010 3032 struct btrfs_fs_info *fs_info = trans->fs_info; 3011 3033 struct btrfs_root *root = fs_info->chunk_root; 3012 3034 int ret; 3013 - struct btrfs_path *path; 3035 + BTRFS_PATH_AUTO_FREE(path); 3014 3036 struct btrfs_key key; 3015 3037 3016 3038 path = btrfs_alloc_path(); ··· 3023 3045 3024 3046 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 3025 3047 if (ret < 0) 3026 - goto out; 3027 - else if (unlikely(ret > 0)) { /* Logic error or corruption */ 3048 + return ret; 3049 + if (unlikely(ret > 0)) { 3050 + /* Logic error or corruption */ 3028 3051 btrfs_err(fs_info, "failed to lookup chunk %llu when freeing", 3029 3052 chunk_offset); 3030 3053 btrfs_abort_transaction(trans, -ENOENT); 3031 - ret = -EUCLEAN; 3032 - goto out; 3054 + return -EUCLEAN; 3033 3055 } 3034 3056 3035 3057 ret = btrfs_del_item(trans, root, path); 3036 3058 if (unlikely(ret < 0)) { 3037 3059 btrfs_err(fs_info, "failed to delete chunk %llu item", chunk_offset); 3038 3060 btrfs_abort_transaction(trans, ret); 3039 - goto out; 3061 + return ret; 3040 3062 } 3041 - out: 3042 - btrfs_free_path(path); 3043 3063 return ret; 3044 3064 } 3045 3065 ··· 3474 3498 static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info) 3475 3499 { 3476 3500 struct btrfs_root *chunk_root = fs_info->chunk_root; 3477 - struct btrfs_path *path; 3501 + BTRFS_PATH_AUTO_FREE(path); 3478 3502 struct extent_buffer *leaf; 3479 3503 struct btrfs_chunk *chunk; 3480 3504 struct btrfs_key key; ··· 3498 3522 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); 3499 3523 if (ret < 0) { 3500 3524 mutex_unlock(&fs_info->reclaim_bgs_lock); 3501 - goto error; 3525 + return ret; 3502 3526 } 3503 3527 if (unlikely(ret == 0)) { 3504 3528 /* ··· 3508 3532 * offset (one less than the previous one, wrong 3509 3533 * alignment and size). 3510 3534 */ 3511 - ret = -EUCLEAN; 3512 3535 mutex_unlock(&fs_info->reclaim_bgs_lock); 3513 - goto error; 3536 + return -EUCLEAN; 3514 3537 } 3515 3538 3516 3539 ret = btrfs_previous_item(chunk_root, path, key.objectid, ··· 3517 3542 if (ret) 3518 3543 mutex_unlock(&fs_info->reclaim_bgs_lock); 3519 3544 if (ret < 0) 3520 - goto error; 3545 + return ret; 3521 3546 if (ret > 0) 3522 3547 break; 3523 3548 ··· 3551 3576 } else if (WARN_ON(failed && retried)) { 3552 3577 ret = -ENOSPC; 3553 3578 } 3554 - error: 3555 - btrfs_free_path(path); 3556 3579 return ret; 3557 3580 } 3558 3581 ··· 4051 4078 struct btrfs_root *chunk_root = fs_info->chunk_root; 4052 4079 u64 chunk_type; 4053 4080 struct btrfs_chunk *chunk; 4054 - struct btrfs_path *path = NULL; 4081 + BTRFS_PATH_AUTO_FREE(path); 4055 4082 struct btrfs_key key; 4056 4083 struct btrfs_key found_key; 4057 4084 struct extent_buffer *leaf; ··· 4222 4249 goto again; 4223 4250 } 4224 4251 error: 4225 - btrfs_free_path(path); 4226 4252 if (enospc_errors) { 4227 4253 btrfs_info(fs_info, "%d enospc errors during balance", 4228 4254 enospc_errors); ··· 4379 4407 { 4380 4408 u32 size_buf = 1024; 4381 4409 char tmp_buf[192] = {'\0'}; 4382 - char *buf; 4410 + char AUTO_KFREE(buf); 4383 4411 char *bp; 4384 4412 u32 size_bp = size_buf; 4385 4413 int ret; ··· 4427 4455 btrfs_info(fs_info, "balance: %s %s", 4428 4456 (bctl->flags & BTRFS_BALANCE_RESUME) ? 4429 4457 "resume" : "start", buf); 4430 - 4431 - kfree(buf); 4432 4458 } 4433 4459 4434 4460 /* ··· 4676 4706 struct btrfs_balance_control *bctl; 4677 4707 struct btrfs_balance_item *item; 4678 4708 struct btrfs_disk_balance_args disk_bargs; 4679 - struct btrfs_path *path; 4709 + BTRFS_PATH_AUTO_FREE(path); 4680 4710 struct extent_buffer *leaf; 4681 4711 struct btrfs_key key; 4682 4712 int ret; ··· 4691 4721 4692 4722 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0); 4693 4723 if (ret < 0) 4694 - goto out; 4724 + return ret; 4695 4725 if (ret > 0) { /* ret = -ENOENT; */ 4696 - ret = 0; 4697 - goto out; 4726 + return 0; 4698 4727 } 4699 4728 4700 4729 bctl = kzalloc(sizeof(*bctl), GFP_NOFS); 4701 - if (!bctl) { 4702 - ret = -ENOMEM; 4703 - goto out; 4704 - } 4730 + if (!bctl) 4731 + return -ENOMEM; 4705 4732 4706 4733 leaf = path->nodes[0]; 4707 4734 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item); ··· 4735 4768 fs_info->balance_ctl = bctl; 4736 4769 spin_unlock(&fs_info->balance_lock); 4737 4770 mutex_unlock(&fs_info->balance_mutex); 4738 - out: 4739 - btrfs_free_path(path); 4740 4771 return ret; 4741 4772 } 4742 4773 ··· 5555 5590 { 5556 5591 struct btrfs_fs_info *info = trans->fs_info; 5557 5592 struct btrfs_fs_devices *fs_devices = info->fs_devices; 5558 - struct btrfs_device_info *devices_info = NULL; 5593 + struct btrfs_device_info AUTO_KFREE(devices_info); 5559 5594 struct alloc_chunk_ctl ctl; 5560 - struct btrfs_block_group *block_group; 5561 5595 int ret; 5562 5596 5563 5597 lockdep_assert_held(&info->chunk_mutex); ··· 5589 5625 return ERR_PTR(-ENOMEM); 5590 5626 5591 5627 ret = gather_device_info(fs_devices, &ctl, devices_info); 5592 - if (ret < 0) { 5593 - block_group = ERR_PTR(ret); 5594 - goto out; 5595 - } 5628 + if (ret < 0) 5629 + return ERR_PTR(ret); 5596 5630 5597 5631 ret = decide_stripe_size(fs_devices, &ctl, devices_info); 5598 - if (ret < 0) { 5599 - block_group = ERR_PTR(ret); 5600 - goto out; 5601 - } 5632 + if (ret < 0) 5633 + return ERR_PTR(ret); 5602 5634 5603 - block_group = create_chunk(trans, &ctl, devices_info); 5604 - 5605 - out: 5606 - kfree(devices_info); 5607 - return block_group; 5635 + return create_chunk(trans, &ctl, devices_info); 5608 5636 } 5609 5637 5610 5638 /* ··· 6029 6073 { 6030 6074 struct btrfs_io_context *bioc; 6031 6075 6032 - bioc = kzalloc( 6033 - /* The size of btrfs_io_context */ 6034 - sizeof(struct btrfs_io_context) + 6035 - /* Plus the variable array for the stripes */ 6036 - sizeof(struct btrfs_io_stripe) * (total_stripes), 6037 - GFP_NOFS); 6076 + bioc = kzalloc(struct_size(bioc, stripes, total_stripes), GFP_NOFS); 6038 6077 6039 6078 if (!bioc) 6040 6079 return NULL; ··· 6755 6804 static bool dev_args_match_device(const struct btrfs_dev_lookup_args *args, 6756 6805 const struct btrfs_device *device) 6757 6806 { 6807 + if (args->devt) 6808 + return device->devt == args->devt; 6758 6809 if (args->missing) { 6759 6810 if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state) && 6760 6811 !device->bdev) ··· 7405 7452 int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info) 7406 7453 { 7407 7454 struct btrfs_root *root = fs_info->chunk_root; 7408 - struct btrfs_path *path; 7455 + BTRFS_PATH_AUTO_FREE(path); 7409 7456 struct extent_buffer *leaf; 7410 7457 struct btrfs_key key; 7411 7458 struct btrfs_key found_key; ··· 7444 7491 * chunk tree, to keep it simple, just skip locking on the chunk tree. 7445 7492 */ 7446 7493 ASSERT(!test_bit(BTRFS_FS_OPEN, &fs_info->flags)); 7447 - path->skip_locking = 1; 7494 + path->skip_locking = true; 7448 7495 7449 7496 /* 7450 7497 * Read all device items, and then all the chunk items. All ··· 7522 7569 ret = 0; 7523 7570 error: 7524 7571 mutex_unlock(&uuid_mutex); 7525 - 7526 - btrfs_free_path(path); 7527 7572 return ret; 7528 7573 } 7529 7574 ··· 7621 7670 { 7622 7671 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices, *seed_devs; 7623 7672 struct btrfs_device *device; 7624 - struct btrfs_path *path = NULL; 7673 + BTRFS_PATH_AUTO_FREE(path); 7625 7674 int ret = 0; 7626 7675 7627 7676 path = btrfs_alloc_path(); ··· 7643 7692 } 7644 7693 out: 7645 7694 mutex_unlock(&fs_devices->device_list_mutex); 7646 - 7647 - btrfs_free_path(path); 7648 7695 return ret; 7649 7696 } 7650 7697 ··· 7651 7702 { 7652 7703 struct btrfs_fs_info *fs_info = trans->fs_info; 7653 7704 struct btrfs_root *dev_root = fs_info->dev_root; 7654 - struct btrfs_path *path; 7705 + BTRFS_PATH_AUTO_FREE(path); 7655 7706 struct btrfs_key key; 7656 7707 struct extent_buffer *eb; 7657 7708 struct btrfs_dev_stats_item *ptr; ··· 7670 7721 btrfs_warn(fs_info, 7671 7722 "error %d while searching for dev_stats item for device %s", 7672 7723 ret, btrfs_dev_name(device)); 7673 - goto out; 7724 + return ret; 7674 7725 } 7675 7726 7676 7727 if (ret == 0 && ··· 7681 7732 btrfs_warn(fs_info, 7682 7733 "delete too small dev_stats item for device %s failed %d", 7683 7734 btrfs_dev_name(device), ret); 7684 - goto out; 7735 + return ret; 7685 7736 } 7686 7737 ret = 1; 7687 7738 } ··· 7695 7746 btrfs_warn(fs_info, 7696 7747 "insert dev_stats item for device %s failed %d", 7697 7748 btrfs_dev_name(device), ret); 7698 - goto out; 7749 + return ret; 7699 7750 } 7700 7751 } 7701 7752 ··· 7704 7755 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) 7705 7756 btrfs_set_dev_stats_value(eb, ptr, i, 7706 7757 btrfs_dev_stat_read(device, i)); 7707 - out: 7708 - btrfs_free_path(path); 7709 7758 return ret; 7710 7759 } 7711 7760 ··· 7993 8046 */ 7994 8047 int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info) 7995 8048 { 7996 - struct btrfs_path *path; 8049 + BTRFS_PATH_AUTO_FREE(path); 7997 8050 struct btrfs_root *root = fs_info->dev_root; 7998 8051 struct btrfs_key key; 7999 8052 u64 prev_devid = 0; ··· 8024 8077 path->reada = READA_FORWARD; 8025 8078 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 8026 8079 if (ret < 0) 8027 - goto out; 8080 + return ret; 8028 8081 8029 8082 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { 8030 8083 ret = btrfs_next_leaf(root, path); 8031 8084 if (ret < 0) 8032 - goto out; 8085 + return ret; 8033 8086 /* No dev extents at all? Not good */ 8034 - if (unlikely(ret > 0)) { 8035 - ret = -EUCLEAN; 8036 - goto out; 8037 - } 8087 + if (unlikely(ret > 0)) 8088 + return -EUCLEAN; 8038 8089 } 8039 8090 while (1) { 8040 8091 struct extent_buffer *leaf = path->nodes[0]; ··· 8058 8113 btrfs_err(fs_info, 8059 8114 "dev extent devid %llu physical offset %llu overlap with previous dev extent end %llu", 8060 8115 devid, physical_offset, prev_dev_ext_end); 8061 - ret = -EUCLEAN; 8062 - goto out; 8116 + return -EUCLEAN; 8063 8117 } 8064 8118 8065 8119 ret = verify_one_dev_extent(fs_info, chunk_offset, devid, 8066 8120 physical_offset, physical_len); 8067 8121 if (ret < 0) 8068 - goto out; 8122 + return ret; 8069 8123 prev_devid = devid; 8070 8124 prev_dev_ext_end = physical_offset + physical_len; 8071 8125 8072 8126 ret = btrfs_next_item(root, path); 8073 8127 if (ret < 0) 8074 - goto out; 8128 + return ret; 8075 8129 if (ret > 0) { 8076 8130 ret = 0; 8077 8131 break; ··· 8078 8134 } 8079 8135 8080 8136 /* Ensure all chunks have corresponding dev extents */ 8081 - ret = verify_chunk_dev_extent_mapping(fs_info); 8082 - out: 8083 - btrfs_free_path(path); 8084 - return ret; 8137 + return verify_chunk_dev_extent_mapping(fs_info); 8085 8138 } 8086 8139 8087 8140 /*

+7 -3

fs/btrfs/volumes.h

··· 45 45 #define BTRFS_STRIPE_LEN_SHIFT (16) 46 46 #define BTRFS_STRIPE_LEN_MASK (BTRFS_STRIPE_LEN - 1) 47 47 48 - static_assert(const_ilog2(BTRFS_STRIPE_LEN) == BTRFS_STRIPE_LEN_SHIFT); 48 + static_assert(ilog2(BTRFS_STRIPE_LEN) == BTRFS_STRIPE_LEN_SHIFT); 49 49 50 50 /* Used by sanity check for btrfs_raid_types. */ 51 51 #define const_ffs(n) (__builtin_ctzll(n) + 1) ··· 58 58 */ 59 59 static_assert(const_ffs(BTRFS_BLOCK_GROUP_RAID0) < 60 60 const_ffs(BTRFS_BLOCK_GROUP_PROFILE_MASK & ~BTRFS_BLOCK_GROUP_RAID0)); 61 - static_assert(const_ilog2(BTRFS_BLOCK_GROUP_RAID0) > 62 - ilog2(BTRFS_BLOCK_GROUP_TYPE_MASK)); 61 + static_assert(ilog2(BTRFS_BLOCK_GROUP_RAID0) > ilog2(BTRFS_BLOCK_GROUP_TYPE_MASK)); 63 62 64 63 /* ilog2() can handle both constants and variables */ 65 64 #define BTRFS_BG_FLAG_TO_INDEX(profile) \ ··· 661 662 u64 devid; 662 663 u8 *uuid; 663 664 u8 *fsid; 665 + /* 666 + * If devt is specified, all other members will be ignored as it is 667 + * enough to uniquely locate a device. 668 + */ 669 + dev_t devt; 664 670 bool missing; 665 671 }; 666 672

+13 -28

fs/btrfs/xattr.c

··· 29 29 { 30 30 struct btrfs_dir_item *di; 31 31 struct btrfs_root *root = BTRFS_I(inode)->root; 32 - struct btrfs_path *path; 32 + BTRFS_PATH_AUTO_FREE(path); 33 33 struct extent_buffer *leaf; 34 - int ret = 0; 35 34 unsigned long data_ptr; 36 35 37 36 path = btrfs_alloc_path(); ··· 40 41 /* lookup the xattr by name */ 41 42 di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(BTRFS_I(inode)), 42 43 name, strlen(name), 0); 43 - if (!di) { 44 - ret = -ENODATA; 45 - goto out; 46 - } else if (IS_ERR(di)) { 47 - ret = PTR_ERR(di); 48 - goto out; 49 - } 44 + if (!di) 45 + return -ENODATA; 46 + if (IS_ERR(di)) 47 + return PTR_ERR(di); 50 48 51 49 leaf = path->nodes[0]; 52 50 /* if size is 0, that means we want the size of the attr */ 53 - if (!size) { 54 - ret = btrfs_dir_data_len(leaf, di); 55 - goto out; 56 - } 51 + if (!size) 52 + return btrfs_dir_data_len(leaf, di); 57 53 58 54 /* now get the data out of our dir_item */ 59 - if (btrfs_dir_data_len(leaf, di) > size) { 60 - ret = -ERANGE; 61 - goto out; 62 - } 55 + if (btrfs_dir_data_len(leaf, di) > size) 56 + return -ERANGE; 63 57 64 58 /* 65 59 * The way things are packed into the leaf is like this ··· 65 73 btrfs_dir_name_len(leaf, di)); 66 74 read_extent_buffer(leaf, buffer, data_ptr, 67 75 btrfs_dir_data_len(leaf, di)); 68 - ret = btrfs_dir_data_len(leaf, di); 69 - 70 - out: 71 - btrfs_free_path(path); 72 - return ret; 76 + return btrfs_dir_data_len(leaf, di); 73 77 } 74 78 75 79 int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode, ··· 73 85 { 74 86 struct btrfs_dir_item *di = NULL; 75 87 struct btrfs_root *root = BTRFS_I(inode)->root; 76 - struct btrfs_path *path; 88 + BTRFS_PATH_AUTO_FREE(path); 77 89 size_t name_len = strlen(name); 78 90 int ret = 0; 79 91 ··· 85 97 path = btrfs_alloc_path(); 86 98 if (!path) 87 99 return -ENOMEM; 88 - path->skip_release_on_error = 1; 100 + path->skip_release_on_error = true; 89 101 90 102 if (!value) { 91 103 di = btrfs_lookup_xattr(trans, root, path, ··· 200 212 */ 201 213 } 202 214 out: 203 - btrfs_free_path(path); 204 215 if (!ret) { 205 216 set_bit(BTRFS_INODE_COPY_EVERYTHING, 206 217 &BTRFS_I(inode)->runtime_flags); ··· 265 278 struct btrfs_key key; 266 279 struct inode *inode = d_inode(dentry); 267 280 struct btrfs_root *root = BTRFS_I(inode)->root; 268 - struct btrfs_path *path; 281 + BTRFS_PATH_AUTO_FREE(path); 269 282 int iter_ret = 0; 270 283 int ret = 0; 271 284 size_t total_size = 0, size_left = size; ··· 340 353 ret = iter_ret; 341 354 else 342 355 ret = total_size; 343 - 344 - btrfs_free_path(path); 345 356 346 357 return ret; 347 358 }

+30 -23

fs/btrfs/zoned.c

··· 37 37 #define BTRFS_SB_LOG_FIRST_OFFSET (512ULL * SZ_1G) 38 38 #define BTRFS_SB_LOG_SECOND_OFFSET (4096ULL * SZ_1G) 39 39 40 - #define BTRFS_SB_LOG_FIRST_SHIFT const_ilog2(BTRFS_SB_LOG_FIRST_OFFSET) 41 - #define BTRFS_SB_LOG_SECOND_SHIFT const_ilog2(BTRFS_SB_LOG_SECOND_OFFSET) 40 + #define BTRFS_SB_LOG_FIRST_SHIFT ilog2(BTRFS_SB_LOG_FIRST_OFFSET) 41 + #define BTRFS_SB_LOG_SECOND_SHIFT ilog2(BTRFS_SB_LOG_SECOND_OFFSET) 42 42 43 43 /* Number of superblock log zones */ 44 44 #define BTRFS_NR_SB_LOG_ZONES 2 ··· 93 93 sector_t sector; 94 94 95 95 for (int i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) { 96 - ASSERT(zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL); 96 + ASSERT(zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL, 97 + "zones[%d].type=%d", i, zones[i].type); 97 98 empty[i] = (zones[i].cond == BLK_ZONE_COND_EMPTY); 98 99 full[i] = sb_zone_is_full(&zones[i]); 99 100 } ··· 167 166 { 168 167 u64 zone = U64_MAX; 169 168 170 - ASSERT(mirror < BTRFS_SUPER_MIRROR_MAX); 169 + ASSERT(mirror < BTRFS_SUPER_MIRROR_MAX, "mirror=%d", mirror); 171 170 switch (mirror) { 172 171 case 0: zone = 0; break; 173 172 case 1: zone = 1ULL << (BTRFS_SB_LOG_FIRST_SHIFT - shift); break; 174 173 case 2: zone = 1ULL << (BTRFS_SB_LOG_SECOND_SHIFT - shift); break; 175 174 } 176 175 177 - ASSERT(zone <= U32_MAX); 176 + ASSERT(zone <= U32_MAX, "zone=%llu", zone); 178 177 179 178 return (u32)zone; 180 179 } ··· 241 240 unsigned int i; 242 241 u32 zno; 243 242 244 - ASSERT(IS_ALIGNED(pos, zinfo->zone_size)); 243 + ASSERT(IS_ALIGNED(pos, zinfo->zone_size), 244 + "pos=%llu zinfo->zone_size=%llu", pos, zinfo->zone_size); 245 245 zno = pos >> zinfo->zone_size_shift; 246 246 /* 247 247 * We cannot report zones beyond the zone end. So, it is OK to ··· 1058 1056 bool have_sb; 1059 1057 int i; 1060 1058 1061 - ASSERT(IS_ALIGNED(hole_start, zinfo->zone_size)); 1062 - ASSERT(IS_ALIGNED(num_bytes, zinfo->zone_size)); 1059 + ASSERT(IS_ALIGNED(hole_start, zinfo->zone_size), 1060 + "hole_start=%llu zinfo->zone_size=%llu", hole_start, zinfo->zone_size); 1061 + ASSERT(IS_ALIGNED(num_bytes, zinfo->zone_size), 1062 + "num_bytes=%llu zinfo->zone_size=%llu", num_bytes, zinfo->zone_size); 1063 1063 1064 1064 while (pos < hole_end) { 1065 1065 begin = pos >> shift; ··· 1177 1173 u64 pos; 1178 1174 int ret; 1179 1175 1180 - ASSERT(IS_ALIGNED(start, zinfo->zone_size)); 1181 - ASSERT(IS_ALIGNED(size, zinfo->zone_size)); 1176 + ASSERT(IS_ALIGNED(start, zinfo->zone_size), 1177 + "start=%llu, zinfo->zone_size=%llu", start, zinfo->zone_size); 1178 + ASSERT(IS_ALIGNED(size, zinfo->zone_size), 1179 + "size=%llu, zinfo->zone_size=%llu", size, zinfo->zone_size); 1182 1180 1183 1181 if (begin + nbits > zinfo->nr_zones) 1184 1182 return -ERANGE; ··· 1635 1629 struct btrfs_chunk_map *map; 1636 1630 u64 logical = cache->start; 1637 1631 u64 length = cache->length; 1638 - struct zone_info *zone_info = NULL; 1632 + struct zone_info AUTO_KFREE(zone_info); 1639 1633 int ret; 1640 1634 int i; 1641 1635 unsigned long *active = NULL; ··· 1789 1783 cache->physical_map = NULL; 1790 1784 } 1791 1785 bitmap_free(active); 1792 - kfree(zone_info); 1793 1786 1794 1787 return ret; 1795 1788 } ··· 1815 1810 { 1816 1811 u64 start = (bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT); 1817 1812 struct btrfs_inode *inode = bbio->inode; 1818 - struct btrfs_fs_info *fs_info = bbio->fs_info; 1813 + struct btrfs_fs_info *fs_info = inode->root->fs_info; 1819 1814 struct btrfs_block_group *cache; 1820 1815 bool ret = false; 1821 1816 1822 1817 if (!btrfs_is_zoned(fs_info)) 1823 1818 return false; 1824 1819 1825 - if (!inode || !is_data_inode(inode)) 1820 + if (!is_data_inode(inode)) 1826 1821 return false; 1827 1822 1828 1823 if (btrfs_op(&bbio->bio) != BTRFS_MAP_WRITE) ··· 1873 1868 em = btrfs_search_extent_mapping(em_tree, ordered->file_offset, 1874 1869 ordered->num_bytes); 1875 1870 /* The em should be a new COW extent, thus it should not have an offset. */ 1876 - ASSERT(em->offset == 0); 1871 + ASSERT(em->offset == 0, "em->offset=%llu", em->offset); 1877 1872 em->disk_bytenr = logical; 1878 1873 btrfs_free_extent_map(em); 1879 1874 write_unlock(&em_tree->lock); ··· 2584 2579 struct btrfs_space_info *reloc_sinfo = data_sinfo->sub_group[0]; 2585 2580 int factor; 2586 2581 2587 - ASSERT(reloc_sinfo->subgroup_id == BTRFS_SUB_GROUP_DATA_RELOC); 2582 + ASSERT(reloc_sinfo->subgroup_id == BTRFS_SUB_GROUP_DATA_RELOC, 2583 + "reloc_sinfo->subgroup_id=%d", reloc_sinfo->subgroup_id); 2588 2584 factor = btrfs_bg_type_to_factor(bg->flags); 2589 2585 2590 2586 down_write(&space_info->groups_sem); ··· 2599 2593 space_info->disk_total -= bg->length * factor; 2600 2594 space_info->disk_total -= bg->zone_unusable; 2601 2595 /* There is no allocation ever happened. */ 2602 - ASSERT(bg->used == 0); 2596 + ASSERT(bg->used == 0, "bg->used=%llu", bg->used); 2603 2597 /* No super block in a block group on the zoned setup. */ 2604 - ASSERT(bg->bytes_super == 0); 2598 + ASSERT(bg->bytes_super == 0, "bg->bytes_super=%llu", bg->bytes_super); 2605 2599 spin_unlock(&space_info->lock); 2606 2600 2607 2601 bg->space_info = reloc_sinfo; ··· 2627 2621 2628 2622 /* Allocate new BG in the data relocation space_info. */ 2629 2623 space_info = data_sinfo->sub_group[0]; 2630 - ASSERT(space_info->subgroup_id == BTRFS_SUB_GROUP_DATA_RELOC); 2624 + ASSERT(space_info->subgroup_id == BTRFS_SUB_GROUP_DATA_RELOC, 2625 + "space_info->subgroup_id=%d", space_info->subgroup_id); 2631 2626 ret = btrfs_chunk_alloc(trans, space_info, alloc_flags, CHUNK_ALLOC_FORCE); 2632 2627 btrfs_end_transaction(trans); 2633 2628 if (ret == 1) { ··· 2758 2751 return ret < 0 ? ret : 1; 2759 2752 } 2760 2753 2761 - int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info, 2762 - struct btrfs_space_info *space_info, 2763 - bool do_finish) 2754 + int btrfs_zoned_activate_one_bg(struct btrfs_space_info *space_info, bool do_finish) 2764 2755 { 2756 + struct btrfs_fs_info *fs_info = space_info->fs_info; 2765 2757 struct btrfs_block_group *bg; 2766 2758 int index; 2767 2759 ··· 2969 2963 * This holds because we currently reset fully used then freed 2970 2964 * block group. 2971 2965 */ 2972 - ASSERT(reclaimed == bg->zone_capacity); 2966 + ASSERT(reclaimed == bg->zone_capacity, 2967 + "reclaimed=%llu bg->zone_capacity=%llu", reclaimed, bg->zone_capacity); 2973 2968 bg->free_space_ctl->free_space += reclaimed; 2974 2969 space_info->bytes_zone_unusable -= reclaimed; 2975 2970 spin_unlock(&bg->lock);

+2 -5

fs/btrfs/zoned.h

··· 15 15 #include "disk-io.h" 16 16 #include "block-group.h" 17 17 #include "btrfs_inode.h" 18 - #include "fs.h" 19 18 20 19 struct block_device; 21 20 struct extent_buffer; ··· 93 94 void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logical, 94 95 u64 length); 95 96 int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info); 96 - int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info, 97 - struct btrfs_space_info *space_info, bool do_finish); 97 + int btrfs_zoned_activate_one_bg(struct btrfs_space_info *space_info, bool do_finish); 98 98 void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info); 99 99 int btrfs_reset_unused_block_groups(struct btrfs_space_info *space_info, u64 num_bytes); 100 100 #else /* CONFIG_BLK_DEV_ZONED */ ··· 260 262 return 1; 261 263 } 262 264 263 - static inline int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info, 264 - struct btrfs_space_info *space_info, 265 + static inline int btrfs_zoned_activate_one_bg(struct btrfs_space_info *space_info, 265 266 bool do_finish) 266 267 { 267 268 /* Consider all the block groups are active */

+9

include/uapi/linux/btrfs.h

··· 1099 1099 BTRFS_ERROR_DEV_RAID1C4_MIN_NOT_MET, 1100 1100 }; 1101 1101 1102 + /* Flags for IOC_SHUTDOWN, must match XFS_FSOP_GOING_FLAGS_* flags. */ 1103 + #define BTRFS_SHUTDOWN_FLAGS_DEFAULT 0x0 1104 + #define BTRFS_SHUTDOWN_FLAGS_LOGFLUSH 0x1 1105 + #define BTRFS_SHUTDOWN_FLAGS_NOLOGFLUSH 0x2 1106 + #define BTRFS_SHUTDOWN_FLAGS_LAST 0x3 1107 + 1102 1108 #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ 1103 1109 struct btrfs_ioctl_vol_args) 1104 1110 #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ ··· 1225 1219 struct btrfs_ioctl_encoded_io_args) 1226 1220 #define BTRFS_IOC_SUBVOL_SYNC_WAIT _IOW(BTRFS_IOCTL_MAGIC, 65, \ 1227 1221 struct btrfs_ioctl_subvol_wait) 1222 + 1223 + /* Shutdown ioctl should follow XFS's interfaces, thus not using btrfs magic. */ 1224 + #define BTRFS_IOC_SHUTDOWN _IOR('X', 125, __u32) 1228 1225 1229 1226 #ifdef __cplusplus 1230 1227 }

Configure Feed

Configure Feed