Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs updates from Chris Mason:
"I held off on my rc5 pull because I hit an oops during log recovery
after a crash. I wanted to make sure it wasn't a regression because
we have some logging fixes in here.

It turns out that a commit during the merge window just made it much
more likely to trigger directory logging instead of full commits,
which exposed an old bug.

The new backref walking code got some additional fixes. This should
be the final set of them.

Josef fixed up a corner where our O_DIRECT writes and buffered reads
could expose old file contents (not stale, just not the most recent).
He and Liu Bo fixed crashes during tree log recover as well.

Ilya fixed errors while we resume disk balancing operations on
readonly mounts."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
Btrfs: run delayed directory updates during log replay
Btrfs: hold a ref on the inode during writepages
Btrfs: fix tree log remove space corner case
Btrfs: fix wrong check during log recovery
Btrfs: use _IOR for BTRFS_IOC_SUBVOL_GETFLAGS
Btrfs: resume balance on rw (re)mounts properly
Btrfs: restore restriper state on all mounts
Btrfs: fix dio write vs buffered read race
Btrfs: don't count I/O statistic read errors for missing devices
Btrfs: resolve tree mod log locking issue in btrfs_next_leaf
Btrfs: fix tree mod log rewind of ADD operations
Btrfs: leave critical region in btrfs_find_all_roots as soon as possible
Btrfs: always put insert_ptr modifications into the tree mod log
Btrfs: fix tree mod log for root replacements at leaf level
Btrfs: support root level changes in __resolve_indirect_ref
Btrfs: avoid waiting for delayed refs when we must not

+260 -203
+9 -6
fs/btrfs/backref.c
··· 301 301 goto out; 302 302 303 303 eb = path->nodes[level]; 304 - if (!eb) { 305 - WARN_ON(1); 306 - ret = 1; 307 - goto out; 304 + while (!eb) { 305 + if (!level) { 306 + WARN_ON(1); 307 + ret = 1; 308 + goto out; 309 + } 310 + level--; 311 + eb = path->nodes[level]; 308 312 } 309 313 310 314 ret = add_all_parents(root, path, parents, level, &ref->key_for_search, ··· 839 835 } 840 836 ret = __add_delayed_refs(head, delayed_ref_seq, 841 837 &prefs_delayed); 838 + mutex_unlock(&head->mutex); 842 839 if (ret) { 843 840 spin_unlock(&delayed_refs->lock); 844 841 goto out; ··· 933 928 } 934 929 935 930 out: 936 - if (head) 937 - mutex_unlock(&head->mutex); 938 931 btrfs_free_path(path); 939 932 while (!list_empty(&prefs)) { 940 933 ref = list_first_entry(&prefs, struct __prelim_ref, list);
+35 -25
fs/btrfs/ctree.c
··· 1024 1024 if (!looped && !tm) 1025 1025 return 0; 1026 1026 /* 1027 - * we must have key remove operations in the log before the 1028 - * replace operation. 1027 + * if there are no tree operation for the oldest root, we simply 1028 + * return it. this should only happen if that (old) root is at 1029 + * level 0. 1029 1030 */ 1030 - BUG_ON(!tm); 1031 + if (!tm) 1032 + break; 1031 1033 1034 + /* 1035 + * if there's an operation that's not a root replacement, we 1036 + * found the oldest version of our root. normally, we'll find a 1037 + * MOD_LOG_KEY_REMOVE_WHILE_FREEING operation here. 1038 + */ 1032 1039 if (tm->op != MOD_LOG_ROOT_REPLACE) 1033 1040 break; 1034 1041 ··· 1094 1087 tm->generation); 1095 1088 break; 1096 1089 case MOD_LOG_KEY_ADD: 1097 - if (tm->slot != n - 1) { 1098 - o_dst = btrfs_node_key_ptr_offset(tm->slot); 1099 - o_src = btrfs_node_key_ptr_offset(tm->slot + 1); 1100 - memmove_extent_buffer(eb, o_dst, o_src, p_size); 1101 - } 1090 + /* if a move operation is needed it's in the log */ 1102 1091 n--; 1103 1092 break; 1104 1093 case MOD_LOG_MOVE_KEYS: ··· 1195 1192 } 1196 1193 1197 1194 tm = tree_mod_log_search(root->fs_info, logical, time_seq); 1198 - /* 1199 - * there was an item in the log when __tree_mod_log_oldest_root 1200 - * returned. this one must not go away, because the time_seq passed to 1201 - * us must be blocking its removal. 1202 - */ 1203 - BUG_ON(!tm); 1204 - 1205 1195 if (old_root) 1206 - eb = alloc_dummy_extent_buffer(tm->index << PAGE_CACHE_SHIFT, 1207 - root->nodesize); 1196 + eb = alloc_dummy_extent_buffer(logical, root->nodesize); 1208 1197 else 1209 1198 eb = btrfs_clone_extent_buffer(root->node); 1210 1199 btrfs_tree_read_unlock(root->node); ··· 1211 1216 btrfs_set_header_level(eb, old_root->level); 1212 1217 btrfs_set_header_generation(eb, old_generation); 1213 1218 } 1214 - __tree_mod_log_rewind(eb, time_seq, tm); 1219 + if (tm) 1220 + __tree_mod_log_rewind(eb, time_seq, tm); 1221 + else 1222 + WARN_ON(btrfs_header_level(eb) != 0); 1215 1223 extent_buffer_get(eb); 1216 1224 1217 1225 return eb; ··· 2993 2995 static void insert_ptr(struct btrfs_trans_handle *trans, 2994 2996 struct btrfs_root *root, struct btrfs_path *path, 2995 2997 struct btrfs_disk_key *key, u64 bytenr, 2996 - int slot, int level, int tree_mod_log) 2998 + int slot, int level) 2997 2999 { 2998 3000 struct extent_buffer *lower; 2999 3001 int nritems; ··· 3006 3008 BUG_ON(slot > nritems); 3007 3009 BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root)); 3008 3010 if (slot != nritems) { 3009 - if (tree_mod_log && level) 3011 + if (level) 3010 3012 tree_mod_log_eb_move(root->fs_info, lower, slot + 1, 3011 3013 slot, nritems - slot); 3012 3014 memmove_extent_buffer(lower, ··· 3014 3016 btrfs_node_key_ptr_offset(slot), 3015 3017 (nritems - slot) * sizeof(struct btrfs_key_ptr)); 3016 3018 } 3017 - if (tree_mod_log && level) { 3019 + if (level) { 3018 3020 ret = tree_mod_log_insert_key(root->fs_info, lower, slot, 3019 3021 MOD_LOG_KEY_ADD); 3020 3022 BUG_ON(ret < 0); ··· 3102 3104 btrfs_mark_buffer_dirty(split); 3103 3105 3104 3106 insert_ptr(trans, root, path, &disk_key, split->start, 3105 - path->slots[level + 1] + 1, level + 1, 1); 3107 + path->slots[level + 1] + 1, level + 1); 3106 3108 3107 3109 if (path->slots[level] >= mid) { 3108 3110 path->slots[level] -= mid; ··· 3639 3641 btrfs_set_header_nritems(l, mid); 3640 3642 btrfs_item_key(right, &disk_key, 0); 3641 3643 insert_ptr(trans, root, path, &disk_key, right->start, 3642 - path->slots[1] + 1, 1, 0); 3644 + path->slots[1] + 1, 1); 3643 3645 3644 3646 btrfs_mark_buffer_dirty(right); 3645 3647 btrfs_mark_buffer_dirty(l); ··· 3846 3848 if (mid <= slot) { 3847 3849 btrfs_set_header_nritems(right, 0); 3848 3850 insert_ptr(trans, root, path, &disk_key, right->start, 3849 - path->slots[1] + 1, 1, 0); 3851 + path->slots[1] + 1, 1); 3850 3852 btrfs_tree_unlock(path->nodes[0]); 3851 3853 free_extent_buffer(path->nodes[0]); 3852 3854 path->nodes[0] = right; ··· 3855 3857 } else { 3856 3858 btrfs_set_header_nritems(right, 0); 3857 3859 insert_ptr(trans, root, path, &disk_key, right->start, 3858 - path->slots[1], 1, 0); 3860 + path->slots[1], 1); 3859 3861 btrfs_tree_unlock(path->nodes[0]); 3860 3862 free_extent_buffer(path->nodes[0]); 3861 3863 path->nodes[0] = right; ··· 5119 5121 5120 5122 if (!path->skip_locking) { 5121 5123 ret = btrfs_try_tree_read_lock(next); 5124 + if (!ret && time_seq) { 5125 + /* 5126 + * If we don't get the lock, we may be racing 5127 + * with push_leaf_left, holding that lock while 5128 + * itself waiting for the leaf we've currently 5129 + * locked. To solve this situation, we give up 5130 + * on our lock and cycle. 5131 + */ 5132 + btrfs_release_path(path); 5133 + cond_resched(); 5134 + goto again; 5135 + } 5122 5136 if (!ret) { 5123 5137 btrfs_set_path_blocking(path); 5124 5138 btrfs_tree_read_lock(next);
+21 -13
fs/btrfs/disk-io.c
··· 2354 2354 BTRFS_CSUM_TREE_OBJECTID, csum_root); 2355 2355 if (ret) 2356 2356 goto recovery_tree_root; 2357 - 2358 2357 csum_root->track_dirty = 1; 2359 2358 2360 2359 fs_info->generation = generation; 2361 2360 fs_info->last_trans_committed = generation; 2361 + 2362 + ret = btrfs_recover_balance(fs_info); 2363 + if (ret) { 2364 + printk(KERN_WARNING "btrfs: failed to recover balance\n"); 2365 + goto fail_block_groups; 2366 + } 2362 2367 2363 2368 ret = btrfs_init_dev_stats(fs_info); 2364 2369 if (ret) { ··· 2490 2485 goto fail_trans_kthread; 2491 2486 } 2492 2487 2493 - if (!(sb->s_flags & MS_RDONLY)) { 2494 - down_read(&fs_info->cleanup_work_sem); 2495 - err = btrfs_orphan_cleanup(fs_info->fs_root); 2496 - if (!err) 2497 - err = btrfs_orphan_cleanup(fs_info->tree_root); 2488 + if (sb->s_flags & MS_RDONLY) 2489 + return 0; 2490 + 2491 + down_read(&fs_info->cleanup_work_sem); 2492 + if ((ret = btrfs_orphan_cleanup(fs_info->fs_root)) || 2493 + (ret = btrfs_orphan_cleanup(fs_info->tree_root))) { 2498 2494 up_read(&fs_info->cleanup_work_sem); 2495 + close_ctree(tree_root); 2496 + return ret; 2497 + } 2498 + up_read(&fs_info->cleanup_work_sem); 2499 2499 2500 - if (!err) 2501 - err = btrfs_recover_balance(fs_info->tree_root); 2502 - 2503 - if (err) { 2504 - close_ctree(tree_root); 2505 - return err; 2506 - } 2500 + ret = btrfs_resume_balance_async(fs_info); 2501 + if (ret) { 2502 + printk(KERN_WARNING "btrfs: failed to resume balance\n"); 2503 + close_ctree(tree_root); 2504 + return ret; 2507 2505 } 2508 2506 2509 2507 return 0;
+6 -5
fs/btrfs/extent-tree.c
··· 2347 2347 return count; 2348 2348 } 2349 2349 2350 - 2351 2350 static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs, 2352 - unsigned long num_refs) 2351 + unsigned long num_refs, 2352 + struct list_head *first_seq) 2353 2353 { 2354 - struct list_head *first_seq = delayed_refs->seq_head.next; 2355 - 2356 2354 spin_unlock(&delayed_refs->lock); 2357 2355 pr_debug("waiting for more refs (num %ld, first %p)\n", 2358 2356 num_refs, first_seq); ··· 2379 2381 struct btrfs_delayed_ref_root *delayed_refs; 2380 2382 struct btrfs_delayed_ref_node *ref; 2381 2383 struct list_head cluster; 2384 + struct list_head *first_seq = NULL; 2382 2385 int ret; 2383 2386 u64 delayed_start; 2384 2387 int run_all = count == (unsigned long)-1; ··· 2435 2436 */ 2436 2437 consider_waiting = 1; 2437 2438 num_refs = delayed_refs->num_entries; 2439 + first_seq = root->fs_info->tree_mod_seq_list.next; 2438 2440 } else { 2439 - wait_for_more_refs(delayed_refs, num_refs); 2441 + wait_for_more_refs(delayed_refs, 2442 + num_refs, first_seq); 2440 2443 /* 2441 2444 * after waiting, things have changed. we 2442 2445 * dropped the lock and someone else might have
+14
fs/btrfs/extent_io.c
··· 3324 3324 writepage_t writepage, void *data, 3325 3325 void (*flush_fn)(void *)) 3326 3326 { 3327 + struct inode *inode = mapping->host; 3327 3328 int ret = 0; 3328 3329 int done = 0; 3329 3330 int nr_to_write_done = 0; ··· 3334 3333 pgoff_t end; /* Inclusive */ 3335 3334 int scanned = 0; 3336 3335 int tag; 3336 + 3337 + /* 3338 + * We have to hold onto the inode so that ordered extents can do their 3339 + * work when the IO finishes. The alternative to this is failing to add 3340 + * an ordered extent if the igrab() fails there and that is a huge pain 3341 + * to deal with, so instead just hold onto the inode throughout the 3342 + * writepages operation. If it fails here we are freeing up the inode 3343 + * anyway and we'd rather not waste our time writing out stuff that is 3344 + * going to be truncated anyway. 3345 + */ 3346 + if (!igrab(inode)) 3347 + return 0; 3337 3348 3338 3349 pagevec_init(&pvec, 0); 3339 3350 if (wbc->range_cyclic) { ··· 3441 3428 index = 0; 3442 3429 goto retry; 3443 3430 } 3431 + btrfs_add_delayed_iput(inode); 3444 3432 return ret; 3445 3433 } 3446 3434
-13
fs/btrfs/file.c
··· 1334 1334 loff_t *ppos, size_t count, size_t ocount) 1335 1335 { 1336 1336 struct file *file = iocb->ki_filp; 1337 - struct inode *inode = fdentry(file)->d_inode; 1338 1337 struct iov_iter i; 1339 1338 ssize_t written; 1340 1339 ssize_t written_buffered; ··· 1342 1343 1343 1344 written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos, 1344 1345 count, ocount); 1345 - 1346 - /* 1347 - * the generic O_DIRECT will update in-memory i_size after the 1348 - * DIOs are done. But our endio handlers that update the on 1349 - * disk i_size never update past the in memory i_size. So we 1350 - * need one more update here to catch any additions to the 1351 - * file 1352 - */ 1353 - if (inode->i_size != BTRFS_I(inode)->disk_i_size) { 1354 - btrfs_ordered_update_i_size(inode, inode->i_size, NULL); 1355 - mark_inode_dirty(inode); 1356 - } 1357 1346 1358 1347 if (written < 0 || written == count) 1359 1348 return written;
+51 -92
fs/btrfs/free-space-cache.c
··· 1543 1543 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1; 1544 1544 1545 1545 /* 1546 - * XXX - this can go away after a few releases. 1547 - * 1548 - * since the only user of btrfs_remove_free_space is the tree logging 1549 - * stuff, and the only way to test that is under crash conditions, we 1550 - * want to have this debug stuff here just in case somethings not 1551 - * working. Search the bitmap for the space we are trying to use to 1552 - * make sure its actually there. If its not there then we need to stop 1553 - * because something has gone wrong. 1546 + * We need to search for bits in this bitmap. We could only cover some 1547 + * of the extent in this bitmap thanks to how we add space, so we need 1548 + * to search for as much as it as we can and clear that amount, and then 1549 + * go searching for the next bit. 1554 1550 */ 1555 1551 search_start = *offset; 1556 - search_bytes = *bytes; 1552 + search_bytes = ctl->unit; 1557 1553 search_bytes = min(search_bytes, end - search_start + 1); 1558 1554 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes); 1559 1555 BUG_ON(ret < 0 || search_start != *offset); 1560 1556 1561 - if (*offset > bitmap_info->offset && *offset + *bytes > end) { 1562 - bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1); 1563 - *bytes -= end - *offset + 1; 1564 - *offset = end + 1; 1565 - } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) { 1566 - bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes); 1567 - *bytes = 0; 1568 - } 1557 + /* We may have found more bits than what we need */ 1558 + search_bytes = min(search_bytes, *bytes); 1559 + 1560 + /* Cannot clear past the end of the bitmap */ 1561 + search_bytes = min(search_bytes, end - search_start + 1); 1562 + 1563 + bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes); 1564 + *offset += search_bytes; 1565 + *bytes -= search_bytes; 1569 1566 1570 1567 if (*bytes) { 1571 1568 struct rb_node *next = rb_next(&bitmap_info->offset_index); ··· 1593 1596 * everything over again. 1594 1597 */ 1595 1598 search_start = *offset; 1596 - search_bytes = *bytes; 1599 + search_bytes = ctl->unit; 1597 1600 ret = search_bitmap(ctl, bitmap_info, &search_start, 1598 1601 &search_bytes); 1599 1602 if (ret < 0 || search_start != *offset) ··· 1876 1879 { 1877 1880 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1878 1881 struct btrfs_free_space *info; 1879 - struct btrfs_free_space *next_info = NULL; 1880 1882 int ret = 0; 1881 1883 1882 1884 spin_lock(&ctl->tree_lock); 1883 1885 1884 1886 again: 1887 + if (!bytes) 1888 + goto out_lock; 1889 + 1885 1890 info = tree_search_offset(ctl, offset, 0, 0); 1886 1891 if (!info) { 1887 1892 /* ··· 1904 1905 } 1905 1906 } 1906 1907 1907 - if (info->bytes < bytes && rb_next(&info->offset_index)) { 1908 - u64 end; 1909 - next_info = rb_entry(rb_next(&info->offset_index), 1910 - struct btrfs_free_space, 1911 - offset_index); 1912 - 1913 - if (next_info->bitmap) 1914 - end = next_info->offset + 1915 - BITS_PER_BITMAP * ctl->unit - 1; 1916 - else 1917 - end = next_info->offset + next_info->bytes; 1918 - 1919 - if (next_info->bytes < bytes || 1920 - next_info->offset > offset || offset > end) { 1921 - printk(KERN_CRIT "Found free space at %llu, size %llu," 1922 - " trying to use %llu\n", 1923 - (unsigned long long)info->offset, 1924 - (unsigned long long)info->bytes, 1925 - (unsigned long long)bytes); 1926 - WARN_ON(1); 1927 - ret = -EINVAL; 1928 - goto out_lock; 1929 - } 1930 - 1931 - info = next_info; 1932 - } 1933 - 1934 - if (info->bytes == bytes) { 1908 + if (!info->bitmap) { 1935 1909 unlink_free_space(ctl, info); 1936 - if (info->bitmap) { 1937 - kfree(info->bitmap); 1938 - ctl->total_bitmaps--; 1939 - } 1940 - kmem_cache_free(btrfs_free_space_cachep, info); 1941 - ret = 0; 1942 - goto out_lock; 1943 - } 1910 + if (offset == info->offset) { 1911 + u64 to_free = min(bytes, info->bytes); 1944 1912 1945 - if (!info->bitmap && info->offset == offset) { 1946 - unlink_free_space(ctl, info); 1947 - info->offset += bytes; 1948 - info->bytes -= bytes; 1949 - ret = link_free_space(ctl, info); 1950 - WARN_ON(ret); 1951 - goto out_lock; 1952 - } 1913 + info->bytes -= to_free; 1914 + info->offset += to_free; 1915 + if (info->bytes) { 1916 + ret = link_free_space(ctl, info); 1917 + WARN_ON(ret); 1918 + } else { 1919 + kmem_cache_free(btrfs_free_space_cachep, info); 1920 + } 1953 1921 1954 - if (!info->bitmap && info->offset <= offset && 1955 - info->offset + info->bytes >= offset + bytes) { 1956 - u64 old_start = info->offset; 1957 - /* 1958 - * we're freeing space in the middle of the info, 1959 - * this can happen during tree log replay 1960 - * 1961 - * first unlink the old info and then 1962 - * insert it again after the hole we're creating 1963 - */ 1964 - unlink_free_space(ctl, info); 1965 - if (offset + bytes < info->offset + info->bytes) { 1966 - u64 old_end = info->offset + info->bytes; 1922 + offset += to_free; 1923 + bytes -= to_free; 1924 + goto again; 1925 + } else { 1926 + u64 old_end = info->bytes + info->offset; 1967 1927 1968 - info->offset = offset + bytes; 1969 - info->bytes = old_end - info->offset; 1928 + info->bytes = offset - info->offset; 1970 1929 ret = link_free_space(ctl, info); 1971 1930 WARN_ON(ret); 1972 1931 if (ret) 1973 1932 goto out_lock; 1974 - } else { 1975 - /* the hole we're creating ends at the end 1976 - * of the info struct, just free the info 1977 - */ 1978 - kmem_cache_free(btrfs_free_space_cachep, info); 1979 - } 1980 - spin_unlock(&ctl->tree_lock); 1981 1933 1982 - /* step two, insert a new info struct to cover 1983 - * anything before the hole 1984 - */ 1985 - ret = btrfs_add_free_space(block_group, old_start, 1986 - offset - old_start); 1987 - WARN_ON(ret); /* -ENOMEM */ 1988 - goto out; 1934 + /* Not enough bytes in this entry to satisfy us */ 1935 + if (old_end < offset + bytes) { 1936 + bytes -= old_end - offset; 1937 + offset = old_end; 1938 + goto again; 1939 + } else if (old_end == offset + bytes) { 1940 + /* all done */ 1941 + goto out_lock; 1942 + } 1943 + spin_unlock(&ctl->tree_lock); 1944 + 1945 + ret = btrfs_add_free_space(block_group, offset + bytes, 1946 + old_end - (offset + bytes)); 1947 + WARN_ON(ret); 1948 + goto out; 1949 + } 1989 1950 } 1990 1951 1991 1952 ret = remove_from_bitmap(ctl, info, &offset, &bytes);
+51 -6
fs/btrfs/inode.c
··· 3754 3754 btrfs_wait_ordered_range(inode, 0, (u64)-1); 3755 3755 3756 3756 if (root->fs_info->log_root_recovering) { 3757 - BUG_ON(!test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, 3757 + BUG_ON(test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, 3758 3758 &BTRFS_I(inode)->runtime_flags)); 3759 3759 goto no_delete; 3760 3760 } ··· 5876 5876 bh_result->b_size = len; 5877 5877 bh_result->b_bdev = em->bdev; 5878 5878 set_buffer_mapped(bh_result); 5879 - if (create && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) 5880 - set_buffer_new(bh_result); 5879 + if (create) { 5880 + if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) 5881 + set_buffer_new(bh_result); 5882 + 5883 + /* 5884 + * Need to update the i_size under the extent lock so buffered 5885 + * readers will get the updated i_size when we unlock. 5886 + */ 5887 + if (start + len > i_size_read(inode)) 5888 + i_size_write(inode, start + len); 5889 + } 5881 5890 5882 5891 free_extent_map(em); 5883 5892 ··· 6369 6360 */ 6370 6361 ordered = btrfs_lookup_ordered_range(inode, lockstart, 6371 6362 lockend - lockstart + 1); 6372 - if (!ordered) 6363 + 6364 + /* 6365 + * We need to make sure there are no buffered pages in this 6366 + * range either, we could have raced between the invalidate in 6367 + * generic_file_direct_write and locking the extent. The 6368 + * invalidate needs to happen so that reads after a write do not 6369 + * get stale data. 6370 + */ 6371 + if (!ordered && (!writing || 6372 + !test_range_bit(&BTRFS_I(inode)->io_tree, 6373 + lockstart, lockend, EXTENT_UPTODATE, 0, 6374 + cached_state))) 6373 6375 break; 6376 + 6374 6377 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, 6375 6378 &cached_state, GFP_NOFS); 6376 - btrfs_start_ordered_extent(inode, ordered, 1); 6377 - btrfs_put_ordered_extent(ordered); 6379 + 6380 + if (ordered) { 6381 + btrfs_start_ordered_extent(inode, ordered, 1); 6382 + btrfs_put_ordered_extent(ordered); 6383 + } else { 6384 + /* Screw you mmap */ 6385 + ret = filemap_write_and_wait_range(file->f_mapping, 6386 + lockstart, 6387 + lockend); 6388 + if (ret) 6389 + goto out; 6390 + 6391 + /* 6392 + * If we found a page that couldn't be invalidated just 6393 + * fall back to buffered. 6394 + */ 6395 + ret = invalidate_inode_pages2_range(file->f_mapping, 6396 + lockstart >> PAGE_CACHE_SHIFT, 6397 + lockend >> PAGE_CACHE_SHIFT); 6398 + if (ret) { 6399 + if (ret == -EBUSY) 6400 + ret = 0; 6401 + goto out; 6402 + } 6403 + } 6404 + 6378 6405 cond_resched(); 6379 6406 } 6380 6407
+1 -1
fs/btrfs/ioctl.h
··· 339 339 #define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) 340 340 #define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ 341 341 struct btrfs_ioctl_vol_args_v2) 342 - #define BTRFS_IOC_SUBVOL_GETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 25, __u64) 342 + #define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64) 343 343 #define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64) 344 344 #define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \ 345 345 struct btrfs_ioctl_scrub_args)
+4
fs/btrfs/super.c
··· 1187 1187 if (ret) 1188 1188 goto restore; 1189 1189 1190 + ret = btrfs_resume_balance_async(fs_info); 1191 + if (ret) 1192 + goto restore; 1193 + 1190 1194 sb->s_flags &= ~MS_RDONLY; 1191 1195 } 1192 1196
+6
fs/btrfs/tree-log.c
··· 690 690 kfree(name); 691 691 692 692 iput(inode); 693 + 694 + btrfs_run_delayed_items(trans, root); 693 695 return ret; 694 696 } 695 697 ··· 897 895 ret = btrfs_unlink_inode(trans, root, dir, 898 896 inode, victim_name, 899 897 victim_name_len); 898 + btrfs_run_delayed_items(trans, root); 900 899 } 901 900 kfree(victim_name); 902 901 ptr = (unsigned long)(victim_ref + 1) + victim_name_len; ··· 1478 1475 ret = btrfs_unlink_inode(trans, root, dir, inode, 1479 1476 name, name_len); 1480 1477 BUG_ON(ret); 1478 + 1479 + btrfs_run_delayed_items(trans, root); 1480 + 1481 1481 kfree(name); 1482 1482 iput(inode); 1483 1483
+60 -41
fs/btrfs/volumes.c
··· 2845 2845 2846 2846 static int balance_kthread(void *data) 2847 2847 { 2848 - struct btrfs_balance_control *bctl = 2849 - (struct btrfs_balance_control *)data; 2850 - struct btrfs_fs_info *fs_info = bctl->fs_info; 2848 + struct btrfs_fs_info *fs_info = data; 2851 2849 int ret = 0; 2852 2850 2853 2851 mutex_lock(&fs_info->volume_mutex); 2854 2852 mutex_lock(&fs_info->balance_mutex); 2855 2853 2856 - set_balance_control(bctl); 2857 - 2858 - if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) { 2859 - printk(KERN_INFO "btrfs: force skipping balance\n"); 2860 - } else { 2854 + if (fs_info->balance_ctl) { 2861 2855 printk(KERN_INFO "btrfs: continuing balance\n"); 2862 - ret = btrfs_balance(bctl, NULL); 2856 + ret = btrfs_balance(fs_info->balance_ctl, NULL); 2863 2857 } 2864 2858 2865 2859 mutex_unlock(&fs_info->balance_mutex); 2866 2860 mutex_unlock(&fs_info->volume_mutex); 2861 + 2867 2862 return ret; 2868 2863 } 2869 2864 2870 - int btrfs_recover_balance(struct btrfs_root *tree_root) 2865 + int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info) 2871 2866 { 2872 2867 struct task_struct *tsk; 2868 + 2869 + spin_lock(&fs_info->balance_lock); 2870 + if (!fs_info->balance_ctl) { 2871 + spin_unlock(&fs_info->balance_lock); 2872 + return 0; 2873 + } 2874 + spin_unlock(&fs_info->balance_lock); 2875 + 2876 + if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) { 2877 + printk(KERN_INFO "btrfs: force skipping balance\n"); 2878 + return 0; 2879 + } 2880 + 2881 + tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance"); 2882 + if (IS_ERR(tsk)) 2883 + return PTR_ERR(tsk); 2884 + 2885 + return 0; 2886 + } 2887 + 2888 + int btrfs_recover_balance(struct btrfs_fs_info *fs_info) 2889 + { 2873 2890 struct btrfs_balance_control *bctl; 2874 2891 struct btrfs_balance_item *item; 2875 2892 struct btrfs_disk_balance_args disk_bargs; ··· 2899 2882 if (!path) 2900 2883 return -ENOMEM; 2901 2884 2885 + key.objectid = BTRFS_BALANCE_OBJECTID; 2886 + key.type = BTRFS_BALANCE_ITEM_KEY; 2887 + key.offset = 0; 2888 + 2889 + ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0); 2890 + if (ret < 0) 2891 + goto out; 2892 + if (ret > 0) { /* ret = -ENOENT; */ 2893 + ret = 0; 2894 + goto out; 2895 + } 2896 + 2902 2897 bctl = kzalloc(sizeof(*bctl), GFP_NOFS); 2903 2898 if (!bctl) { 2904 2899 ret = -ENOMEM; 2905 2900 goto out; 2906 2901 } 2907 2902 2908 - key.objectid = BTRFS_BALANCE_OBJECTID; 2909 - key.type = BTRFS_BALANCE_ITEM_KEY; 2910 - key.offset = 0; 2911 - 2912 - ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0); 2913 - if (ret < 0) 2914 - goto out_bctl; 2915 - if (ret > 0) { /* ret = -ENOENT; */ 2916 - ret = 0; 2917 - goto out_bctl; 2918 - } 2919 - 2920 2903 leaf = path->nodes[0]; 2921 2904 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item); 2922 2905 2923 - bctl->fs_info = tree_root->fs_info; 2924 - bctl->flags = btrfs_balance_flags(leaf, item) | BTRFS_BALANCE_RESUME; 2906 + bctl->fs_info = fs_info; 2907 + bctl->flags = btrfs_balance_flags(leaf, item); 2908 + bctl->flags |= BTRFS_BALANCE_RESUME; 2925 2909 2926 2910 btrfs_balance_data(leaf, item, &disk_bargs); 2927 2911 btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs); ··· 2931 2913 btrfs_balance_sys(leaf, item, &disk_bargs); 2932 2914 btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs); 2933 2915 2934 - tsk = kthread_run(balance_kthread, bctl, "btrfs-balance"); 2935 - if (IS_ERR(tsk)) 2936 - ret = PTR_ERR(tsk); 2937 - else 2938 - goto out; 2916 + mutex_lock(&fs_info->volume_mutex); 2917 + mutex_lock(&fs_info->balance_mutex); 2939 2918 2940 - out_bctl: 2941 - kfree(bctl); 2919 + set_balance_control(bctl); 2920 + 2921 + mutex_unlock(&fs_info->balance_mutex); 2922 + mutex_unlock(&fs_info->volume_mutex); 2942 2923 out: 2943 2924 btrfs_free_path(path); 2944 2925 return ret; ··· 4078 4061 4079 4062 BUG_ON(stripe_index >= bbio->num_stripes); 4080 4063 dev = bbio->stripes[stripe_index].dev; 4081 - if (bio->bi_rw & WRITE) 4082 - btrfs_dev_stat_inc(dev, 4083 - BTRFS_DEV_STAT_WRITE_ERRS); 4084 - else 4085 - btrfs_dev_stat_inc(dev, 4086 - BTRFS_DEV_STAT_READ_ERRS); 4087 - if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH) 4088 - btrfs_dev_stat_inc(dev, 4089 - BTRFS_DEV_STAT_FLUSH_ERRS); 4090 - btrfs_dev_stat_print_on_error(dev); 4064 + if (dev->bdev) { 4065 + if (bio->bi_rw & WRITE) 4066 + btrfs_dev_stat_inc(dev, 4067 + BTRFS_DEV_STAT_WRITE_ERRS); 4068 + else 4069 + btrfs_dev_stat_inc(dev, 4070 + BTRFS_DEV_STAT_READ_ERRS); 4071 + if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH) 4072 + btrfs_dev_stat_inc(dev, 4073 + BTRFS_DEV_STAT_FLUSH_ERRS); 4074 + btrfs_dev_stat_print_on_error(dev); 4075 + } 4091 4076 } 4092 4077 } 4093 4078
+2 -1
fs/btrfs/volumes.h
··· 281 281 int btrfs_init_new_device(struct btrfs_root *root, char *path); 282 282 int btrfs_balance(struct btrfs_balance_control *bctl, 283 283 struct btrfs_ioctl_balance_args *bargs); 284 - int btrfs_recover_balance(struct btrfs_root *tree_root); 284 + int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info); 285 + int btrfs_recover_balance(struct btrfs_fs_info *fs_info); 285 286 int btrfs_pause_balance(struct btrfs_fs_info *fs_info); 286 287 int btrfs_cancel_balance(struct btrfs_fs_info *fs_info); 287 288 int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);