Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs fixes from Chris Mason:
"We've had a busy two weeks of bug fixing. The biggest patches in here
are some long standing early-enospc problems (Josef) and a very old
race where compression and mmap combine forces to lose writes (me).
I'm fairly sure the mmap bug goes all the way back to the introduction
of the compression code, which is proof that fsx doesn't trigger every
possible mmap corner after all.

I'm sure you'll notice one of these is from this morning, it's a small
and isolated use-after-free fix in our scrub error reporting. I
double checked it here."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
Btrfs: don't drop path when printing out tree errors in scrub
Btrfs: fix wrong return value of btrfs_lookup_csum()
Btrfs: fix wrong reservation of csums
Btrfs: fix double free in the btrfs_qgroup_account_ref()
Btrfs: limit the global reserve to 512mb
Btrfs: hold the ordered operations mutex when waiting on ordered extents
Btrfs: fix space accounting for unlink and rename
Btrfs: fix space leak when we fail to reserve metadata space
Btrfs: fix EIO from btrfs send in is_extent_unchanged for punched holes
Btrfs: fix race between mmap writes and compression
Btrfs: fix memory leak in btrfs_create_tree()
Btrfs: fix locking on ROOT_REPLACE operations in tree mod log
Btrfs: fix missing qgroup reservation before fallocating
Btrfs: handle a bogus chunk tree nicely
Btrfs: update to use fs_state bit

+189 -44
+20 -10
fs/btrfs/ctree.c
··· 651 651 if (tree_mod_dont_log(fs_info, NULL)) 652 652 return 0; 653 653 654 + __tree_mod_log_free_eb(fs_info, old_root); 655 + 654 656 ret = tree_mod_alloc(fs_info, flags, &tm); 655 657 if (ret < 0) 656 658 goto out; ··· 738 736 static noinline void 739 737 tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, 740 738 struct extent_buffer *src, unsigned long dst_offset, 741 - unsigned long src_offset, int nr_items) 739 + unsigned long src_offset, int nr_items, int log_removal) 742 740 { 743 741 int ret; 744 742 int i; ··· 752 750 } 753 751 754 752 for (i = 0; i < nr_items; i++) { 755 - ret = tree_mod_log_insert_key_locked(fs_info, src, 756 - i + src_offset, 757 - MOD_LOG_KEY_REMOVE); 758 - BUG_ON(ret < 0); 753 + if (log_removal) { 754 + ret = tree_mod_log_insert_key_locked(fs_info, src, 755 + i + src_offset, 756 + MOD_LOG_KEY_REMOVE); 757 + BUG_ON(ret < 0); 758 + } 759 759 ret = tree_mod_log_insert_key_locked(fs_info, dst, 760 760 i + dst_offset, 761 761 MOD_LOG_KEY_ADD); ··· 931 927 ret = btrfs_dec_ref(trans, root, buf, 1, 1); 932 928 BUG_ON(ret); /* -ENOMEM */ 933 929 } 934 - tree_mod_log_free_eb(root->fs_info, buf); 935 930 clean_tree_block(trans, root, buf); 936 931 *last_ref = 1; 937 932 } ··· 1049 1046 btrfs_set_node_ptr_generation(parent, parent_slot, 1050 1047 trans->transid); 1051 1048 btrfs_mark_buffer_dirty(parent); 1049 + tree_mod_log_free_eb(root->fs_info, buf); 1052 1050 btrfs_free_tree_block(trans, root, buf, parent_start, 1053 1051 last_ref); 1054 1052 } ··· 1754 1750 goto enospc; 1755 1751 } 1756 1752 1757 - tree_mod_log_free_eb(root->fs_info, root->node); 1758 1753 tree_mod_log_set_root_pointer(root, child); 1759 1754 rcu_assign_pointer(root->node, child); 1760 1755 ··· 2998 2995 push_items = min(src_nritems - 8, push_items); 2999 2996 3000 2997 tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0, 3001 - push_items); 2998 + push_items, 1); 3002 2999 copy_extent_buffer(dst, src, 3003 3000 btrfs_node_key_ptr_offset(dst_nritems), 3004 3001 btrfs_node_key_ptr_offset(0), ··· 3069 3066 sizeof(struct btrfs_key_ptr)); 3070 3067 3071 3068 tree_mod_log_eb_copy(root->fs_info, dst, src, 0, 3072 - src_nritems - push_items, push_items); 3069 + src_nritems - push_items, push_items, 1); 3073 3070 copy_extent_buffer(dst, src, 3074 3071 btrfs_node_key_ptr_offset(0), 3075 3072 btrfs_node_key_ptr_offset(src_nritems - push_items), ··· 3221 3218 int mid; 3222 3219 int ret; 3223 3220 u32 c_nritems; 3221 + int tree_mod_log_removal = 1; 3224 3222 3225 3223 c = path->nodes[level]; 3226 3224 WARN_ON(btrfs_header_generation(c) != trans->transid); 3227 3225 if (c == root->node) { 3228 3226 /* trying to split the root, lets make a new one */ 3229 3227 ret = insert_new_root(trans, root, path, level + 1); 3228 + /* 3229 + * removal of root nodes has been logged by 3230 + * tree_mod_log_set_root_pointer due to locking 3231 + */ 3232 + tree_mod_log_removal = 0; 3230 3233 if (ret) 3231 3234 return ret; 3232 3235 } else { ··· 3270 3261 (unsigned long)btrfs_header_chunk_tree_uuid(split), 3271 3262 BTRFS_UUID_SIZE); 3272 3263 3273 - tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid); 3264 + tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid, 3265 + tree_mod_log_removal); 3274 3266 copy_extent_buffer(split, c, 3275 3267 btrfs_node_key_ptr_offset(0), 3276 3268 btrfs_node_key_ptr_offset(mid),
+11 -5
fs/btrfs/disk-io.c
··· 1291 1291 0, objectid, NULL, 0, 0, 0); 1292 1292 if (IS_ERR(leaf)) { 1293 1293 ret = PTR_ERR(leaf); 1294 + leaf = NULL; 1294 1295 goto fail; 1295 1296 } 1296 1297 ··· 1335 1334 1336 1335 btrfs_tree_unlock(leaf); 1337 1336 1338 - fail: 1339 - if (ret) 1340 - return ERR_PTR(ret); 1341 - 1342 1337 return root; 1338 + 1339 + fail: 1340 + if (leaf) { 1341 + btrfs_tree_unlock(leaf); 1342 + free_extent_buffer(leaf); 1343 + } 1344 + kfree(root); 1345 + 1346 + return ERR_PTR(ret); 1343 1347 } 1344 1348 1345 1349 static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, ··· 3259 3253 if (btrfs_root_refs(&root->root_item) == 0) 3260 3254 synchronize_srcu(&fs_info->subvol_srcu); 3261 3255 3262 - if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { 3256 + if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { 3263 3257 btrfs_free_log(NULL, root); 3264 3258 btrfs_free_log_root_tree(NULL, fs_info); 3265 3259 }
+72 -12
fs/btrfs/extent-tree.c
··· 257 257 cache->bytes_super += stripe_len; 258 258 ret = add_excluded_extent(root, cache->key.objectid, 259 259 stripe_len); 260 - BUG_ON(ret); /* -ENOMEM */ 260 + if (ret) 261 + return ret; 261 262 } 262 263 263 264 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { ··· 266 265 ret = btrfs_rmap_block(&root->fs_info->mapping_tree, 267 266 cache->key.objectid, bytenr, 268 267 0, &logical, &nr, &stripe_len); 269 - BUG_ON(ret); /* -ENOMEM */ 268 + if (ret) 269 + return ret; 270 270 271 271 while (nr--) { 272 272 cache->bytes_super += stripe_len; 273 273 ret = add_excluded_extent(root, logical[nr], 274 274 stripe_len); 275 - BUG_ON(ret); /* -ENOMEM */ 275 + if (ret) { 276 + kfree(logical); 277 + return ret; 278 + } 276 279 } 277 280 278 281 kfree(logical); ··· 4443 4438 spin_lock(&sinfo->lock); 4444 4439 spin_lock(&block_rsv->lock); 4445 4440 4446 - block_rsv->size = num_bytes; 4441 + block_rsv->size = min_t(u64, num_bytes, 512 * 1024 * 1024); 4447 4442 4448 4443 num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + 4449 4444 sinfo->bytes_reserved + sinfo->bytes_readonly + ··· 4798 4793 * If the inodes csum_bytes is the same as the original 4799 4794 * csum_bytes then we know we haven't raced with any free()ers 4800 4795 * so we can just reduce our inodes csum bytes and carry on. 4801 - * Otherwise we have to do the normal free thing to account for 4802 - * the case that the free side didn't free up its reserve 4803 - * because of this outstanding reservation. 4804 4796 */ 4805 - if (BTRFS_I(inode)->csum_bytes == csum_bytes) 4797 + if (BTRFS_I(inode)->csum_bytes == csum_bytes) { 4806 4798 calc_csum_metadata_size(inode, num_bytes, 0); 4807 - else 4808 - to_free = calc_csum_metadata_size(inode, num_bytes, 0); 4799 + } else { 4800 + u64 orig_csum_bytes = BTRFS_I(inode)->csum_bytes; 4801 + u64 bytes; 4802 + 4803 + /* 4804 + * This is tricky, but first we need to figure out how much we 4805 + * free'd from any free-ers that occured during this 4806 + * reservation, so we reset ->csum_bytes to the csum_bytes 4807 + * before we dropped our lock, and then call the free for the 4808 + * number of bytes that were freed while we were trying our 4809 + * reservation. 4810 + */ 4811 + bytes = csum_bytes - BTRFS_I(inode)->csum_bytes; 4812 + BTRFS_I(inode)->csum_bytes = csum_bytes; 4813 + to_free = calc_csum_metadata_size(inode, bytes, 0); 4814 + 4815 + 4816 + /* 4817 + * Now we need to see how much we would have freed had we not 4818 + * been making this reservation and our ->csum_bytes were not 4819 + * artificially inflated. 4820 + */ 4821 + BTRFS_I(inode)->csum_bytes = csum_bytes - num_bytes; 4822 + bytes = csum_bytes - orig_csum_bytes; 4823 + bytes = calc_csum_metadata_size(inode, bytes, 0); 4824 + 4825 + /* 4826 + * Now reset ->csum_bytes to what it should be. If bytes is 4827 + * more than to_free then we would have free'd more space had we 4828 + * not had an artificially high ->csum_bytes, so we need to free 4829 + * the remainder. If bytes is the same or less then we don't 4830 + * need to do anything, the other free-ers did the correct 4831 + * thing. 4832 + */ 4833 + BTRFS_I(inode)->csum_bytes = orig_csum_bytes - num_bytes; 4834 + if (bytes > to_free) 4835 + to_free = bytes - to_free; 4836 + else 4837 + to_free = 0; 4838 + } 4809 4839 spin_unlock(&BTRFS_I(inode)->lock); 4810 4840 if (dropped) 4811 4841 to_free += btrfs_calc_trans_metadata_size(root, dropped); ··· 7987 7947 * info has super bytes accounted for, otherwise we'll think 7988 7948 * we have more space than we actually do. 7989 7949 */ 7990 - exclude_super_stripes(root, cache); 7950 + ret = exclude_super_stripes(root, cache); 7951 + if (ret) { 7952 + /* 7953 + * We may have excluded something, so call this just in 7954 + * case. 7955 + */ 7956 + free_excluded_extents(root, cache); 7957 + kfree(cache->free_space_ctl); 7958 + kfree(cache); 7959 + goto error; 7960 + } 7991 7961 7992 7962 /* 7993 7963 * check for two cases, either we are full, and therefore ··· 8139 8089 8140 8090 cache->last_byte_to_unpin = (u64)-1; 8141 8091 cache->cached = BTRFS_CACHE_FINISHED; 8142 - exclude_super_stripes(root, cache); 8092 + ret = exclude_super_stripes(root, cache); 8093 + if (ret) { 8094 + /* 8095 + * We may have excluded something, so call this just in 8096 + * case. 8097 + */ 8098 + free_excluded_extents(root, cache); 8099 + kfree(cache->free_space_ctl); 8100 + kfree(cache); 8101 + return ret; 8102 + } 8143 8103 8144 8104 add_new_free_space(cache, root->fs_info, chunk_offset, 8145 8105 chunk_offset + size);
+33
fs/btrfs/extent_io.c
··· 1257 1257 GFP_NOFS); 1258 1258 } 1259 1259 1260 + int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end) 1261 + { 1262 + unsigned long index = start >> PAGE_CACHE_SHIFT; 1263 + unsigned long end_index = end >> PAGE_CACHE_SHIFT; 1264 + struct page *page; 1265 + 1266 + while (index <= end_index) { 1267 + page = find_get_page(inode->i_mapping, index); 1268 + BUG_ON(!page); /* Pages should be in the extent_io_tree */ 1269 + clear_page_dirty_for_io(page); 1270 + page_cache_release(page); 1271 + index++; 1272 + } 1273 + return 0; 1274 + } 1275 + 1276 + int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end) 1277 + { 1278 + unsigned long index = start >> PAGE_CACHE_SHIFT; 1279 + unsigned long end_index = end >> PAGE_CACHE_SHIFT; 1280 + struct page *page; 1281 + 1282 + while (index <= end_index) { 1283 + page = find_get_page(inode->i_mapping, index); 1284 + BUG_ON(!page); /* Pages should be in the extent_io_tree */ 1285 + account_page_redirty(page); 1286 + __set_page_dirty_nobuffers(page); 1287 + page_cache_release(page); 1288 + index++; 1289 + } 1290 + return 0; 1291 + } 1292 + 1260 1293 /* 1261 1294 * helper function to set both pages and extents in the tree writeback 1262 1295 */
+2
fs/btrfs/extent_io.h
··· 325 325 unsigned long *map_len); 326 326 int extent_range_uptodate(struct extent_io_tree *tree, 327 327 u64 start, u64 end); 328 + int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end); 329 + int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end); 328 330 int extent_clear_unlock_delalloc(struct inode *inode, 329 331 struct extent_io_tree *tree, 330 332 u64 start, u64 end, struct page *locked_page,
+3 -3
fs/btrfs/file-item.c
··· 118 118 csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]); 119 119 csums_in_item /= csum_size; 120 120 121 - if (csum_offset >= csums_in_item) { 121 + if (csum_offset == csums_in_item) { 122 122 ret = -EFBIG; 123 + goto fail; 124 + } else if (csum_offset > csums_in_item) { 123 125 goto fail; 124 126 } 125 127 } ··· 730 728 return -ENOMEM; 731 729 732 730 sector_sum = sums->sums; 733 - trans->adding_csums = 1; 734 731 again: 735 732 next_offset = (u64)-1; 736 733 found_next = 0; ··· 900 899 goto again; 901 900 } 902 901 out: 903 - trans->adding_csums = 0; 904 902 btrfs_free_path(path); 905 903 return ret; 906 904
+9
fs/btrfs/file.c
··· 2142 2142 { 2143 2143 struct inode *inode = file_inode(file); 2144 2144 struct extent_state *cached_state = NULL; 2145 + struct btrfs_root *root = BTRFS_I(inode)->root; 2145 2146 u64 cur_offset; 2146 2147 u64 last_byte; 2147 2148 u64 alloc_start; ··· 2170 2169 ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); 2171 2170 if (ret) 2172 2171 return ret; 2172 + if (root->fs_info->quota_enabled) { 2173 + ret = btrfs_qgroup_reserve(root, alloc_end - alloc_start); 2174 + if (ret) 2175 + goto out_reserve_fail; 2176 + } 2173 2177 2174 2178 /* 2175 2179 * wait for ordered IO before we have any locks. We'll loop again ··· 2278 2272 &cached_state, GFP_NOFS); 2279 2273 out: 2280 2274 mutex_unlock(&inode->i_mutex); 2275 + if (root->fs_info->quota_enabled) 2276 + btrfs_qgroup_free(root, alloc_end - alloc_start); 2277 + out_reserve_fail: 2281 2278 /* Let go of our reservation. */ 2282 2279 btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); 2283 2280 return ret;
+18 -4
fs/btrfs/inode.c
··· 353 353 int i; 354 354 int will_compress; 355 355 int compress_type = root->fs_info->compress_type; 356 + int redirty = 0; 356 357 357 358 /* if this is a small write inside eof, kick off a defrag */ 358 359 if ((end - start + 1) < 16 * 1024 && ··· 416 415 if (BTRFS_I(inode)->force_compress) 417 416 compress_type = BTRFS_I(inode)->force_compress; 418 417 418 + /* 419 + * we need to call clear_page_dirty_for_io on each 420 + * page in the range. Otherwise applications with the file 421 + * mmap'd can wander in and change the page contents while 422 + * we are compressing them. 423 + * 424 + * If the compression fails for any reason, we set the pages 425 + * dirty again later on. 426 + */ 427 + extent_range_clear_dirty_for_io(inode, start, end); 428 + redirty = 1; 419 429 ret = btrfs_compress_pages(compress_type, 420 430 inode->i_mapping, start, 421 431 total_compressed, pages, ··· 566 554 __set_page_dirty_nobuffers(locked_page); 567 555 /* unlocked later on in the async handlers */ 568 556 } 557 + if (redirty) 558 + extent_range_redirty_for_io(inode, start, end); 569 559 add_async_extent(async_cow, start, end - start + 1, 570 560 0, NULL, 0, BTRFS_COMPRESS_NONE); 571 561 *num_added += 1; ··· 1757 1743 struct btrfs_ordered_sum *sum; 1758 1744 1759 1745 list_for_each_entry(sum, list, list) { 1746 + trans->adding_csums = 1; 1760 1747 btrfs_csum_file_blocks(trans, 1761 1748 BTRFS_I(inode)->root->fs_info->csum_root, sum); 1749 + trans->adding_csums = 0; 1762 1750 } 1763 1751 return 0; 1764 1752 } ··· 3695 3679 * 1 for the dir item 3696 3680 * 1 for the dir index 3697 3681 * 1 for the inode ref 3698 - * 1 for the inode ref in the tree log 3699 - * 2 for the dir entries in the log 3700 3682 * 1 for the inode 3701 3683 */ 3702 - trans = btrfs_start_transaction(root, 8); 3684 + trans = btrfs_start_transaction(root, 5); 3703 3685 if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) 3704 3686 return trans; 3705 3687 ··· 8141 8127 * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items 8142 8128 * should cover the worst case number of items we'll modify. 8143 8129 */ 8144 - trans = btrfs_start_transaction(root, 20); 8130 + trans = btrfs_start_transaction(root, 11); 8145 8131 if (IS_ERR(trans)) { 8146 8132 ret = PTR_ERR(trans); 8147 8133 goto out_notrans;
+2
fs/btrfs/ordered-data.c
··· 557 557 INIT_LIST_HEAD(&splice); 558 558 INIT_LIST_HEAD(&works); 559 559 560 + mutex_lock(&root->fs_info->ordered_operations_mutex); 560 561 spin_lock(&root->fs_info->ordered_extent_lock); 561 562 list_splice_init(&root->fs_info->ordered_extents, &splice); 562 563 while (!list_empty(&splice)) { ··· 601 600 602 601 cond_resched(); 603 602 } 603 + mutex_unlock(&root->fs_info->ordered_operations_mutex); 604 604 } 605 605 606 606 /*
+1 -2
fs/btrfs/qgroup.c
··· 1153 1153 ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, 1154 1154 sgn > 0 ? node->seq - 1 : node->seq, &roots); 1155 1155 if (ret < 0) 1156 - goto out; 1156 + return ret; 1157 1157 1158 1158 spin_lock(&fs_info->qgroup_lock); 1159 1159 quota_root = fs_info->quota_root; ··· 1275 1275 ret = 0; 1276 1276 unlock: 1277 1277 spin_unlock(&fs_info->qgroup_lock); 1278 - out: 1279 1278 ulist_free(roots); 1280 1279 ulist_free(tmp); 1281 1280
+2 -1
fs/btrfs/scrub.c
··· 542 542 eb = path->nodes[0]; 543 543 ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item); 544 544 item_size = btrfs_item_size_nr(eb, path->slots[0]); 545 - btrfs_release_path(path); 546 545 547 546 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 548 547 do { ··· 557 558 ret < 0 ? -1 : ref_level, 558 559 ret < 0 ? -1 : ref_root); 559 560 } while (ret != 1); 561 + btrfs_release_path(path); 560 562 } else { 563 + btrfs_release_path(path); 561 564 swarn.path = path; 562 565 swarn.dev = dev; 563 566 iterate_extent_inodes(fs_info, found_key.objectid,
+4 -6
fs/btrfs/send.c
··· 3945 3945 found_key.type != key.type) { 3946 3946 key.offset += right_len; 3947 3947 break; 3948 - } else { 3949 - if (found_key.offset != key.offset + right_len) { 3950 - /* Should really not happen */ 3951 - ret = -EIO; 3952 - goto out; 3953 - } 3948 + } 3949 + if (found_key.offset != key.offset + right_len) { 3950 + ret = 0; 3951 + goto out; 3954 3952 } 3955 3953 key = found_key; 3956 3954 }
+12 -1
fs/btrfs/volumes.c
··· 4935 4935 em = lookup_extent_mapping(em_tree, chunk_start, 1); 4936 4936 read_unlock(&em_tree->lock); 4937 4937 4938 - BUG_ON(!em || em->start != chunk_start); 4938 + if (!em) { 4939 + printk(KERN_ERR "btrfs: couldn't find em for chunk %Lu\n", 4940 + chunk_start); 4941 + return -EIO; 4942 + } 4943 + 4944 + if (em->start != chunk_start) { 4945 + printk(KERN_ERR "btrfs: bad chunk start, em=%Lu, wanted=%Lu\n", 4946 + em->start, chunk_start); 4947 + free_extent_map(em); 4948 + return -EIO; 4949 + } 4939 4950 map = (struct map_lookup *)em->bdev; 4940 4951 4941 4952 length = em->len;