Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-6.7-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:
"Some fixes to quota accounting code, mostly around error handling and
correctness:

- free reserves on various error paths, after IO errors or
transaction abort

- don't clear reserved range at the folio release time, it'll be
properly cleared after final write

- fix integer overflow due to int used when passing around size of
freed reservations

- fix a regression in squota accounting that missed some cases with
delayed refs"

* tag 'for-6.7-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
btrfs: ensure releasing squota reserve on head refs
btrfs: don't clear qgroup reserved bit in release_folio
btrfs: free qgroup pertrans reserve on transaction abort
btrfs: fix qgroup_free_reserved_data int overflow
btrfs: free qgroup reserve when ORDERED_IOERR is set

+116 -50
+1 -1
fs/btrfs/delalloc-space.c
··· 199 199 start = round_down(start, fs_info->sectorsize); 200 200 201 201 btrfs_free_reserved_data_space_noquota(fs_info, len); 202 - btrfs_qgroup_free_data(inode, reserved, start, len); 202 + btrfs_qgroup_free_data(inode, reserved, start, len, NULL); 203 203 } 204 204 205 205 /*
+28
fs/btrfs/disk-io.c
··· 4799 4799 } 4800 4800 } 4801 4801 4802 + static void btrfs_free_all_qgroup_pertrans(struct btrfs_fs_info *fs_info) 4803 + { 4804 + struct btrfs_root *gang[8]; 4805 + int i; 4806 + int ret; 4807 + 4808 + spin_lock(&fs_info->fs_roots_radix_lock); 4809 + while (1) { 4810 + ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix, 4811 + (void **)gang, 0, 4812 + ARRAY_SIZE(gang), 4813 + BTRFS_ROOT_TRANS_TAG); 4814 + if (ret == 0) 4815 + break; 4816 + for (i = 0; i < ret; i++) { 4817 + struct btrfs_root *root = gang[i]; 4818 + 4819 + btrfs_qgroup_free_meta_all_pertrans(root); 4820 + radix_tree_tag_clear(&fs_info->fs_roots_radix, 4821 + (unsigned long)root->root_key.objectid, 4822 + BTRFS_ROOT_TRANS_TAG); 4823 + } 4824 + } 4825 + spin_unlock(&fs_info->fs_roots_radix_lock); 4826 + } 4827 + 4802 4828 void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, 4803 4829 struct btrfs_fs_info *fs_info) 4804 4830 { ··· 4852 4826 btrfs_destroy_marked_extents(fs_info, &cur_trans->dirty_pages, 4853 4827 EXTENT_DIRTY); 4854 4828 btrfs_destroy_pinned_extent(fs_info, &cur_trans->pinned_extents); 4829 + 4830 + btrfs_free_all_qgroup_pertrans(fs_info); 4855 4831 4856 4832 cur_trans->state =TRANS_STATE_COMPLETED; 4857 4833 wake_up(&cur_trans->commit_wait);
+34 -14
fs/btrfs/extent-tree.c
··· 1547 1547 return ret; 1548 1548 } 1549 1549 1550 + static void free_head_ref_squota_rsv(struct btrfs_fs_info *fs_info, 1551 + struct btrfs_delayed_ref_head *href) 1552 + { 1553 + u64 root = href->owning_root; 1554 + 1555 + /* 1556 + * Don't check must_insert_reserved, as this is called from contexts 1557 + * where it has already been unset. 1558 + */ 1559 + if (btrfs_qgroup_mode(fs_info) != BTRFS_QGROUP_MODE_SIMPLE || 1560 + !href->is_data || !is_fstree(root)) 1561 + return; 1562 + 1563 + btrfs_qgroup_free_refroot(fs_info, root, href->reserved_bytes, 1564 + BTRFS_QGROUP_RSV_DATA); 1565 + } 1566 + 1550 1567 static int run_delayed_data_ref(struct btrfs_trans_handle *trans, 1551 1568 struct btrfs_delayed_ref_head *href, 1552 1569 struct btrfs_delayed_ref_node *node, ··· 1586 1569 struct btrfs_squota_delta delta = { 1587 1570 .root = href->owning_root, 1588 1571 .num_bytes = node->num_bytes, 1589 - .rsv_bytes = href->reserved_bytes, 1590 1572 .is_data = true, 1591 1573 .is_inc = true, 1592 1574 .generation = trans->transid, ··· 1602 1586 flags, ref->objectid, 1603 1587 ref->offset, &key, 1604 1588 node->ref_mod, href->owning_root); 1589 + free_head_ref_squota_rsv(trans->fs_info, href); 1605 1590 if (!ret) 1606 1591 ret = btrfs_record_squota_delta(trans->fs_info, &delta); 1607 - else 1608 - btrfs_qgroup_free_refroot(trans->fs_info, delta.root, 1609 - delta.rsv_bytes, BTRFS_QGROUP_RSV_DATA); 1610 1592 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 1611 1593 ret = __btrfs_inc_extent_ref(trans, node, parent, ref->root, 1612 1594 ref->objectid, ref->offset, ··· 1756 1742 struct btrfs_squota_delta delta = { 1757 1743 .root = href->owning_root, 1758 1744 .num_bytes = fs_info->nodesize, 1759 - .rsv_bytes = 0, 1760 1745 .is_data = false, 1761 1746 .is_inc = true, 1762 1747 .generation = trans->transid, ··· 1787 1774 int ret = 0; 1788 1775 1789 1776 if (TRANS_ABORTED(trans)) { 1790 - if (insert_reserved) 1777 + if (insert_reserved) { 1791 1778 btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1); 1779 + free_head_ref_squota_rsv(trans->fs_info, href); 1780 + } 1792 1781 return 0; 1793 1782 } 1794 1783 ··· 1886 1871 struct btrfs_delayed_ref_root *delayed_refs, 1887 1872 struct btrfs_delayed_ref_head *head) 1888 1873 { 1874 + u64 ret = 0; 1875 + 1889 1876 /* 1890 1877 * We had csum deletions accounted for in our delayed refs rsv, we need 1891 1878 * to drop the csum leaves for this update from our delayed_refs_rsv. ··· 1902 1885 1903 1886 btrfs_delayed_refs_rsv_release(fs_info, 0, nr_csums); 1904 1887 1905 - return btrfs_calc_delayed_ref_csum_bytes(fs_info, nr_csums); 1888 + ret = btrfs_calc_delayed_ref_csum_bytes(fs_info, nr_csums); 1906 1889 } 1907 - if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_SIMPLE && 1908 - head->must_insert_reserved && head->is_data) 1909 - btrfs_qgroup_free_refroot(fs_info, head->owning_root, 1910 - head->reserved_bytes, BTRFS_QGROUP_RSV_DATA); 1890 + /* must_insert_reserved can be set only if we didn't run the head ref. */ 1891 + if (head->must_insert_reserved) 1892 + free_head_ref_squota_rsv(fs_info, head); 1911 1893 1912 - return 0; 1894 + return ret; 1913 1895 } 1914 1896 1915 1897 static int cleanup_ref_head(struct btrfs_trans_handle *trans, ··· 2049 2033 * spin lock. 2050 2034 */ 2051 2035 must_insert_reserved = locked_ref->must_insert_reserved; 2036 + /* 2037 + * Unsetting this on the head ref relinquishes ownership of 2038 + * the rsv_bytes, so it is critical that every possible code 2039 + * path from here forward frees all reserves including qgroup 2040 + * reserve. 2041 + */ 2052 2042 locked_ref->must_insert_reserved = false; 2053 2043 2054 2044 extent_op = locked_ref->extent_op; ··· 3314 3292 struct btrfs_squota_delta delta = { 3315 3293 .root = delayed_ref_root, 3316 3294 .num_bytes = num_bytes, 3317 - .rsv_bytes = 0, 3318 3295 .is_data = is_data, 3319 3296 .is_inc = false, 3320 3297 .generation = btrfs_extent_generation(leaf, ei), ··· 4958 4937 .root = root_objectid, 4959 4938 .num_bytes = ins->offset, 4960 4939 .generation = trans->transid, 4961 - .rsv_bytes = 0, 4962 4940 .is_data = true, 4963 4941 .is_inc = true, 4964 4942 };
+2 -1
fs/btrfs/extent_io.c
··· 2302 2302 ret = 0; 2303 2303 } else { 2304 2304 u32 clear_bits = ~(EXTENT_LOCKED | EXTENT_NODATASUM | 2305 - EXTENT_DELALLOC_NEW | EXTENT_CTLBITS); 2305 + EXTENT_DELALLOC_NEW | EXTENT_CTLBITS | 2306 + EXTENT_QGROUP_RESERVED); 2306 2307 2307 2308 /* 2308 2309 * At this point we can safely clear everything except the
+1 -1
fs/btrfs/file.c
··· 3192 3192 qgroup_reserved -= range->len; 3193 3193 } else if (qgroup_reserved > 0) { 3194 3194 btrfs_qgroup_free_data(BTRFS_I(inode), data_reserved, 3195 - range->start, range->len); 3195 + range->start, range->len, NULL); 3196 3196 qgroup_reserved -= range->len; 3197 3197 } 3198 3198 list_del(&range->list);
+8 -8
fs/btrfs/inode.c
··· 688 688 * And at reserve time, it's always aligned to page size, so 689 689 * just free one page here. 690 690 */ 691 - btrfs_qgroup_free_data(inode, NULL, 0, PAGE_SIZE); 691 + btrfs_qgroup_free_data(inode, NULL, 0, PAGE_SIZE, NULL); 692 692 btrfs_free_path(path); 693 693 btrfs_end_transaction(trans); 694 694 return ret; ··· 5132 5132 */ 5133 5133 if (state_flags & EXTENT_DELALLOC) 5134 5134 btrfs_qgroup_free_data(BTRFS_I(inode), NULL, start, 5135 - end - start + 1); 5135 + end - start + 1, NULL); 5136 5136 5137 5137 clear_extent_bit(io_tree, start, end, 5138 5138 EXTENT_CLEAR_ALL_BITS | EXTENT_DO_ACCOUNTING, ··· 8059 8059 * reserved data space. 8060 8060 * Since the IO will never happen for this page. 8061 8061 */ 8062 - btrfs_qgroup_free_data(inode, NULL, cur, range_end + 1 - cur); 8062 + btrfs_qgroup_free_data(inode, NULL, cur, range_end + 1 - cur, NULL); 8063 8063 if (!inode_evicting) { 8064 8064 clear_extent_bit(tree, cur, range_end, EXTENT_LOCKED | 8065 8065 EXTENT_DELALLOC | EXTENT_UPTODATE | ··· 9491 9491 struct btrfs_path *path; 9492 9492 u64 start = ins->objectid; 9493 9493 u64 len = ins->offset; 9494 - int qgroup_released; 9494 + u64 qgroup_released = 0; 9495 9495 int ret; 9496 9496 9497 9497 memset(&stack_fi, 0, sizeof(stack_fi)); ··· 9504 9504 btrfs_set_stack_file_extent_compression(&stack_fi, BTRFS_COMPRESS_NONE); 9505 9505 /* Encryption and other encoding is reserved and all 0 */ 9506 9506 9507 - qgroup_released = btrfs_qgroup_release_data(inode, file_offset, len); 9508 - if (qgroup_released < 0) 9509 - return ERR_PTR(qgroup_released); 9507 + ret = btrfs_qgroup_release_data(inode, file_offset, len, &qgroup_released); 9508 + if (ret < 0) 9509 + return ERR_PTR(ret); 9510 9510 9511 9511 if (trans) { 9512 9512 ret = insert_reserved_file_extent(trans, inode, ··· 10401 10401 btrfs_delalloc_release_metadata(inode, disk_num_bytes, ret < 0); 10402 10402 out_qgroup_free_data: 10403 10403 if (ret < 0) 10404 - btrfs_qgroup_free_data(inode, data_reserved, start, num_bytes); 10404 + btrfs_qgroup_free_data(inode, data_reserved, start, num_bytes, NULL); 10405 10405 out_free_data_space: 10406 10406 /* 10407 10407 * If btrfs_reserve_extent() succeeded, then we already decremented
+7 -4
fs/btrfs/ordered-data.c
··· 152 152 { 153 153 struct btrfs_ordered_extent *entry; 154 154 int ret; 155 + u64 qgroup_rsv = 0; 155 156 156 157 if (flags & 157 158 ((1 << BTRFS_ORDERED_NOCOW) | (1 << BTRFS_ORDERED_PREALLOC))) { 158 159 /* For nocow write, we can release the qgroup rsv right now */ 159 - ret = btrfs_qgroup_free_data(inode, NULL, file_offset, num_bytes); 160 + ret = btrfs_qgroup_free_data(inode, NULL, file_offset, num_bytes, &qgroup_rsv); 160 161 if (ret < 0) 161 162 return ERR_PTR(ret); 162 163 } else { ··· 165 164 * The ordered extent has reserved qgroup space, release now 166 165 * and pass the reserved number for qgroup_record to free. 167 166 */ 168 - ret = btrfs_qgroup_release_data(inode, file_offset, num_bytes); 167 + ret = btrfs_qgroup_release_data(inode, file_offset, num_bytes, &qgroup_rsv); 169 168 if (ret < 0) 170 169 return ERR_PTR(ret); 171 170 } ··· 183 182 entry->inode = igrab(&inode->vfs_inode); 184 183 entry->compress_type = compress_type; 185 184 entry->truncated_len = (u64)-1; 186 - entry->qgroup_rsv = ret; 185 + entry->qgroup_rsv = qgroup_rsv; 187 186 entry->flags = flags; 188 187 refcount_set(&entry->refs, 1); 189 188 init_waitqueue_head(&entry->wait); ··· 600 599 release = entry->disk_num_bytes; 601 600 else 602 601 release = entry->num_bytes; 603 - btrfs_delalloc_release_metadata(btrfs_inode, release, false); 602 + btrfs_delalloc_release_metadata(btrfs_inode, release, 603 + test_bit(BTRFS_ORDERED_IOERR, 604 + &entry->flags)); 604 605 } 605 606 606 607 percpu_counter_add_batch(&fs_info->ordered_bytes, -entry->num_bytes,
+29 -15
fs/btrfs/qgroup.c
··· 4057 4057 4058 4058 /* Free ranges specified by @reserved, normally in error path */ 4059 4059 static int qgroup_free_reserved_data(struct btrfs_inode *inode, 4060 - struct extent_changeset *reserved, u64 start, u64 len) 4060 + struct extent_changeset *reserved, 4061 + u64 start, u64 len, u64 *freed_ret) 4061 4062 { 4062 4063 struct btrfs_root *root = inode->root; 4063 4064 struct ulist_node *unode; 4064 4065 struct ulist_iterator uiter; 4065 4066 struct extent_changeset changeset; 4066 - int freed = 0; 4067 + u64 freed = 0; 4067 4068 int ret; 4068 4069 4069 4070 extent_changeset_init(&changeset); ··· 4105 4104 } 4106 4105 btrfs_qgroup_free_refroot(root->fs_info, root->root_key.objectid, freed, 4107 4106 BTRFS_QGROUP_RSV_DATA); 4108 - ret = freed; 4107 + if (freed_ret) 4108 + *freed_ret = freed; 4109 + ret = 0; 4109 4110 out: 4110 4111 extent_changeset_release(&changeset); 4111 4112 return ret; ··· 4115 4112 4116 4113 static int __btrfs_qgroup_release_data(struct btrfs_inode *inode, 4117 4114 struct extent_changeset *reserved, u64 start, u64 len, 4118 - int free) 4115 + u64 *released, int free) 4119 4116 { 4120 4117 struct extent_changeset changeset; 4121 4118 int trace_op = QGROUP_RELEASE; ··· 4131 4128 /* In release case, we shouldn't have @reserved */ 4132 4129 WARN_ON(!free && reserved); 4133 4130 if (free && reserved) 4134 - return qgroup_free_reserved_data(inode, reserved, start, len); 4131 + return qgroup_free_reserved_data(inode, reserved, start, len, released); 4135 4132 extent_changeset_init(&changeset); 4136 4133 ret = clear_record_extent_bits(&inode->io_tree, start, start + len -1, 4137 4134 EXTENT_QGROUP_RESERVED, &changeset); ··· 4146 4143 btrfs_qgroup_free_refroot(inode->root->fs_info, 4147 4144 inode->root->root_key.objectid, 4148 4145 changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA); 4149 - ret = changeset.bytes_changed; 4146 + if (released) 4147 + *released = changeset.bytes_changed; 4150 4148 out: 4151 4149 extent_changeset_release(&changeset); 4152 4150 return ret; ··· 4166 4162 * NOTE: This function may sleep for memory allocation. 4167 4163 */ 4168 4164 int btrfs_qgroup_free_data(struct btrfs_inode *inode, 4169 - struct extent_changeset *reserved, u64 start, u64 len) 4165 + struct extent_changeset *reserved, 4166 + u64 start, u64 len, u64 *freed) 4170 4167 { 4171 - return __btrfs_qgroup_release_data(inode, reserved, start, len, 1); 4168 + return __btrfs_qgroup_release_data(inode, reserved, start, len, freed, 1); 4172 4169 } 4173 4170 4174 4171 /* ··· 4187 4182 * 4188 4183 * NOTE: This function may sleep for memory allocation. 4189 4184 */ 4190 - int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len) 4185 + int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len, u64 *released) 4191 4186 { 4192 - return __btrfs_qgroup_release_data(inode, NULL, start, len, 0); 4187 + return __btrfs_qgroup_release_data(inode, NULL, start, len, released, 0); 4193 4188 } 4194 4189 4195 4190 static void add_root_meta_rsv(struct btrfs_root *root, int num_bytes, ··· 4337 4332 4338 4333 qgroup_rsv_release(fs_info, qgroup, num_bytes, 4339 4334 BTRFS_QGROUP_RSV_META_PREALLOC); 4340 - qgroup_rsv_add(fs_info, qgroup, num_bytes, 4341 - BTRFS_QGROUP_RSV_META_PERTRANS); 4335 + if (!sb_rdonly(fs_info->sb)) 4336 + qgroup_rsv_add(fs_info, qgroup, num_bytes, 4337 + BTRFS_QGROUP_RSV_META_PERTRANS); 4342 4338 4343 4339 list_for_each_entry(glist, &qgroup->groups, next_group) 4344 4340 qgroup_iterator_add(&qgroup_list, glist->group); ··· 4661 4655 *root = RB_ROOT; 4662 4656 } 4663 4657 4658 + void btrfs_free_squota_rsv(struct btrfs_fs_info *fs_info, u64 root, u64 rsv_bytes) 4659 + { 4660 + if (btrfs_qgroup_mode(fs_info) != BTRFS_QGROUP_MODE_SIMPLE) 4661 + return; 4662 + 4663 + if (!is_fstree(root)) 4664 + return; 4665 + 4666 + btrfs_qgroup_free_refroot(fs_info, root, rsv_bytes, BTRFS_QGROUP_RSV_DATA); 4667 + } 4668 + 4664 4669 int btrfs_record_squota_delta(struct btrfs_fs_info *fs_info, 4665 4670 struct btrfs_squota_delta *delta) 4666 4671 { ··· 4716 4699 4717 4700 out: 4718 4701 spin_unlock(&fs_info->qgroup_lock); 4719 - if (!ret && delta->rsv_bytes) 4720 - btrfs_qgroup_free_refroot(fs_info, root, delta->rsv_bytes, 4721 - BTRFS_QGROUP_RSV_DATA); 4722 4702 return ret; 4723 4703 }
+3 -4
fs/btrfs/qgroup.h
··· 274 274 u64 root; 275 275 /* The number of bytes in the extent being counted. */ 276 276 u64 num_bytes; 277 - /* The number of bytes reserved for this extent. */ 278 - u64 rsv_bytes; 279 277 /* The generation the extent was created in. */ 280 278 u64 generation; 281 279 /* Whether we are using or freeing the extent. */ ··· 356 358 /* New io_tree based accurate qgroup reserve API */ 357 359 int btrfs_qgroup_reserve_data(struct btrfs_inode *inode, 358 360 struct extent_changeset **reserved, u64 start, u64 len); 359 - int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len); 361 + int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len, u64 *released); 360 362 int btrfs_qgroup_free_data(struct btrfs_inode *inode, 361 363 struct extent_changeset *reserved, u64 start, 362 - u64 len); 364 + u64 len, u64 *freed); 363 365 int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, 364 366 enum btrfs_qgroup_rsv_type type, bool enforce); 365 367 int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, ··· 420 422 struct btrfs_root *root, struct extent_buffer *eb); 421 423 void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans); 422 424 bool btrfs_check_quota_leak(struct btrfs_fs_info *fs_info); 425 + void btrfs_free_squota_rsv(struct btrfs_fs_info *fs_info, u64 root, u64 rsv_bytes); 423 426 int btrfs_record_squota_delta(struct btrfs_fs_info *fs_info, 424 427 struct btrfs_squota_delta *delta); 425 428
-2
fs/btrfs/transaction.c
··· 37 37 38 38 static struct kmem_cache *btrfs_trans_handle_cachep; 39 39 40 - #define BTRFS_ROOT_TRANS_TAG 0 41 - 42 40 /* 43 41 * Transaction states and transitions 44 42 *
+3
fs/btrfs/transaction.h
··· 12 12 #include "ctree.h" 13 13 #include "misc.h" 14 14 15 + /* Radix-tree tag for roots that are part of the trasaction. */ 16 + #define BTRFS_ROOT_TRANS_TAG 0 17 + 15 18 enum btrfs_trans_state { 16 19 TRANS_STATE_RUNNING, 17 20 TRANS_STATE_COMMIT_PREP,