Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs fixes from Chris Mason:
"I've split out the big send/receive update from my last pull request
and now have just the fixes in my for-linus branch. The send/recv
branch will wander over to linux-next shortly though.

The largest patches in this pull are Josef's patches to fix DIO
locking problems and his patch to fix a crash during balance. They
are both well tested.

The rest are smaller fixes that we've had queued. The last rc came
out while I was hacking new and exciting ways to recover from a
misplaced rm -rf on my dev box, so these missed rc3."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (25 commits)
Btrfs: fix that repair code is spuriously executed for transid failures
Btrfs: fix ordered extent leak when failing to start a transaction
Btrfs: fix a dio write regression
Btrfs: fix deadlock with freeze and sync V2
Btrfs: revert checksum error statistic which can cause a BUG()
Btrfs: remove superblock writing after fatal error
Btrfs: allow delayed refs to be merged
Btrfs: fix enospc problems when deleting a subvol
Btrfs: fix wrong mtime and ctime when creating snapshots
Btrfs: fix race in run_clustered_refs
Btrfs: don't run __tree_mod_log_free_eb on leaves
Btrfs: increase the size of the free space cache
Btrfs: barrier before waitqueue_active
Btrfs: fix deadlock in wait_for_more_refs
btrfs: fix second lock in btrfs_delete_delayed_items()
Btrfs: don't allocate a seperate csums array for direct reads
Btrfs: do not strdup non existent strings
Btrfs: do not use missing devices when showing devname
Btrfs: fix that error value is changed by mistake
Btrfs: lock extents as we map them in DIO
...

+418 -376
+2 -2
fs/btrfs/backref.c
··· 1438 1438 ret = extent_from_logical(fs_info, logical, path, 1439 1439 &found_key); 1440 1440 btrfs_release_path(path); 1441 - if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) 1442 - ret = -EINVAL; 1443 1441 if (ret < 0) 1444 1442 return ret; 1443 + if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) 1444 + return -EINVAL; 1445 1445 1446 1446 extent_item_pos = logical - found_key.objectid; 1447 1447 ret = iterate_extent_inodes(fs_info, found_key.objectid,
+1
fs/btrfs/compression.c
··· 818 818 btrfs_compress_op[idx]->free_workspace(workspace); 819 819 atomic_dec(alloc_workspace); 820 820 wake: 821 + smp_mb(); 821 822 if (waitqueue_active(workspace_wait)) 822 823 wake_up(workspace_wait); 823 824 }
+3 -6
fs/btrfs/ctree.c
··· 421 421 spin_unlock(&fs_info->tree_mod_seq_lock); 422 422 423 423 /* 424 - * we removed the lowest blocker from the blocker list, so there may be 425 - * more processible delayed refs. 426 - */ 427 - wake_up(&fs_info->tree_mod_seq_wait); 428 - 429 - /* 430 424 * anything that's lower than the lowest existing (read: blocked) 431 425 * sequence number can be removed from the tree. 432 426 */ ··· 624 630 int i; 625 631 u32 nritems; 626 632 int ret; 633 + 634 + if (btrfs_header_level(eb) == 0) 635 + return; 627 636 628 637 nritems = btrfs_header_nritems(eb); 629 638 for (i = nritems - 1; i >= 0; i--) {
+1 -2
fs/btrfs/ctree.h
··· 1252 1252 atomic_t tree_mod_seq; 1253 1253 struct list_head tree_mod_seq_list; 1254 1254 struct seq_list tree_mod_seq_elem; 1255 - wait_queue_head_t tree_mod_seq_wait; 1256 1255 1257 1256 /* this protects tree_mod_log */ 1258 1257 rwlock_t tree_mod_log_lock; ··· 3191 3192 int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, 3192 3193 struct bio *bio, u32 *dst); 3193 3194 int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, 3194 - struct bio *bio, u64 logical_offset, u32 *dst); 3195 + struct bio *bio, u64 logical_offset); 3195 3196 int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, 3196 3197 struct btrfs_root *root, 3197 3198 u64 objectid, u64 pos,
+6 -6
fs/btrfs/delayed-inode.c
··· 512 512 513 513 rb_erase(&delayed_item->rb_node, root); 514 514 delayed_item->delayed_node->count--; 515 - atomic_dec(&delayed_root->items); 516 - if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND && 515 + if (atomic_dec_return(&delayed_root->items) < 516 + BTRFS_DELAYED_BACKGROUND && 517 517 waitqueue_active(&delayed_root->wait)) 518 518 wake_up(&delayed_root->wait); 519 519 } ··· 1028 1028 btrfs_release_delayed_item(prev); 1029 1029 ret = 0; 1030 1030 btrfs_release_path(path); 1031 - if (curr) 1031 + if (curr) { 1032 + mutex_unlock(&node->mutex); 1032 1033 goto do_again; 1033 - else 1034 + } else 1034 1035 goto delete_fail; 1035 1036 } 1036 1037 ··· 1056 1055 delayed_node->count--; 1057 1056 1058 1057 delayed_root = delayed_node->root->fs_info->delayed_root; 1059 - atomic_dec(&delayed_root->items); 1060 - if (atomic_read(&delayed_root->items) < 1058 + if (atomic_dec_return(&delayed_root->items) < 1061 1059 BTRFS_DELAYED_BACKGROUND && 1062 1060 waitqueue_active(&delayed_root->wait)) 1063 1061 wake_up(&delayed_root->wait);
+128 -35
fs/btrfs/delayed-ref.c
··· 38 38 static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2, 39 39 struct btrfs_delayed_tree_ref *ref1) 40 40 { 41 - if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) { 42 - if (ref1->root < ref2->root) 43 - return -1; 44 - if (ref1->root > ref2->root) 45 - return 1; 46 - } else { 47 - if (ref1->parent < ref2->parent) 48 - return -1; 49 - if (ref1->parent > ref2->parent) 50 - return 1; 51 - } 41 + if (ref1->root < ref2->root) 42 + return -1; 43 + if (ref1->root > ref2->root) 44 + return 1; 45 + if (ref1->parent < ref2->parent) 46 + return -1; 47 + if (ref1->parent > ref2->parent) 48 + return 1; 52 49 return 0; 53 50 } 54 51 ··· 82 85 * type of the delayed backrefs and content of delayed backrefs. 83 86 */ 84 87 static int comp_entry(struct btrfs_delayed_ref_node *ref2, 85 - struct btrfs_delayed_ref_node *ref1) 88 + struct btrfs_delayed_ref_node *ref1, 89 + bool compare_seq) 86 90 { 87 91 if (ref1->bytenr < ref2->bytenr) 88 92 return -1; ··· 100 102 if (ref1->type > ref2->type) 101 103 return 1; 102 104 /* merging of sequenced refs is not allowed */ 103 - if (ref1->seq < ref2->seq) 104 - return -1; 105 - if (ref1->seq > ref2->seq) 106 - return 1; 105 + if (compare_seq) { 106 + if (ref1->seq < ref2->seq) 107 + return -1; 108 + if (ref1->seq > ref2->seq) 109 + return 1; 110 + } 107 111 if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY || 108 112 ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) { 109 113 return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2), ··· 139 139 entry = rb_entry(parent_node, struct btrfs_delayed_ref_node, 140 140 rb_node); 141 141 142 - cmp = comp_entry(entry, ins); 142 + cmp = comp_entry(entry, ins, 1); 143 143 if (cmp < 0) 144 144 p = &(*p)->rb_left; 145 145 else if (cmp > 0) ··· 231 231 } 232 232 btrfs_put_delayed_ref(&head->node); 233 233 return 0; 234 + } 235 + 236 + static void inline drop_delayed_ref(struct btrfs_trans_handle *trans, 237 + struct btrfs_delayed_ref_root *delayed_refs, 238 + struct btrfs_delayed_ref_node *ref) 239 + { 240 + rb_erase(&ref->rb_node, &delayed_refs->root); 241 + ref->in_tree = 0; 242 + btrfs_put_delayed_ref(ref); 243 + delayed_refs->num_entries--; 244 + if (trans->delayed_ref_updates) 245 + trans->delayed_ref_updates--; 246 + } 247 + 248 + static int merge_ref(struct btrfs_trans_handle *trans, 249 + struct btrfs_delayed_ref_root *delayed_refs, 250 + struct btrfs_delayed_ref_node *ref, u64 seq) 251 + { 252 + struct rb_node *node; 253 + int merged = 0; 254 + int mod = 0; 255 + int done = 0; 256 + 257 + node = rb_prev(&ref->rb_node); 258 + while (node) { 259 + struct btrfs_delayed_ref_node *next; 260 + 261 + next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 262 + node = rb_prev(node); 263 + if (next->bytenr != ref->bytenr) 264 + break; 265 + if (seq && next->seq >= seq) 266 + break; 267 + if (comp_entry(ref, next, 0)) 268 + continue; 269 + 270 + if (ref->action == next->action) { 271 + mod = next->ref_mod; 272 + } else { 273 + if (ref->ref_mod < next->ref_mod) { 274 + struct btrfs_delayed_ref_node *tmp; 275 + 276 + tmp = ref; 277 + ref = next; 278 + next = tmp; 279 + done = 1; 280 + } 281 + mod = -next->ref_mod; 282 + } 283 + 284 + merged++; 285 + drop_delayed_ref(trans, delayed_refs, next); 286 + ref->ref_mod += mod; 287 + if (ref->ref_mod == 0) { 288 + drop_delayed_ref(trans, delayed_refs, ref); 289 + break; 290 + } else { 291 + /* 292 + * You can't have multiples of the same ref on a tree 293 + * block. 294 + */ 295 + WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY || 296 + ref->type == BTRFS_SHARED_BLOCK_REF_KEY); 297 + } 298 + 299 + if (done) 300 + break; 301 + node = rb_prev(&ref->rb_node); 302 + } 303 + 304 + return merged; 305 + } 306 + 307 + void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, 308 + struct btrfs_fs_info *fs_info, 309 + struct btrfs_delayed_ref_root *delayed_refs, 310 + struct btrfs_delayed_ref_head *head) 311 + { 312 + struct rb_node *node; 313 + u64 seq = 0; 314 + 315 + spin_lock(&fs_info->tree_mod_seq_lock); 316 + if (!list_empty(&fs_info->tree_mod_seq_list)) { 317 + struct seq_list *elem; 318 + 319 + elem = list_first_entry(&fs_info->tree_mod_seq_list, 320 + struct seq_list, list); 321 + seq = elem->seq; 322 + } 323 + spin_unlock(&fs_info->tree_mod_seq_lock); 324 + 325 + node = rb_prev(&head->node.rb_node); 326 + while (node) { 327 + struct btrfs_delayed_ref_node *ref; 328 + 329 + ref = rb_entry(node, struct btrfs_delayed_ref_node, 330 + rb_node); 331 + if (ref->bytenr != head->node.bytenr) 332 + break; 333 + 334 + /* We can't merge refs that are outside of our seq count */ 335 + if (seq && ref->seq >= seq) 336 + break; 337 + if (merge_ref(trans, delayed_refs, ref, seq)) 338 + node = rb_prev(&head->node.rb_node); 339 + else 340 + node = rb_prev(node); 341 + } 234 342 } 235 343 236 344 int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, ··· 444 336 * every changing the extent allocation tree. 445 337 */ 446 338 existing->ref_mod--; 447 - if (existing->ref_mod == 0) { 448 - rb_erase(&existing->rb_node, 449 - &delayed_refs->root); 450 - existing->in_tree = 0; 451 - btrfs_put_delayed_ref(existing); 452 - delayed_refs->num_entries--; 453 - if (trans->delayed_ref_updates) 454 - trans->delayed_ref_updates--; 455 - } else { 339 + if (existing->ref_mod == 0) 340 + drop_delayed_ref(trans, delayed_refs, existing); 341 + else 456 342 WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY || 457 343 existing->type == BTRFS_SHARED_BLOCK_REF_KEY); 458 - } 459 344 } else { 460 345 WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY || 461 346 existing->type == BTRFS_SHARED_BLOCK_REF_KEY); ··· 763 662 add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr, 764 663 num_bytes, parent, ref_root, level, action, 765 664 for_cow); 766 - if (!need_ref_seq(for_cow, ref_root) && 767 - waitqueue_active(&fs_info->tree_mod_seq_wait)) 768 - wake_up(&fs_info->tree_mod_seq_wait); 769 665 spin_unlock(&delayed_refs->lock); 770 666 if (need_ref_seq(for_cow, ref_root)) 771 667 btrfs_qgroup_record_ref(trans, &ref->node, extent_op); ··· 811 713 add_delayed_data_ref(fs_info, trans, &ref->node, bytenr, 812 714 num_bytes, parent, ref_root, owner, offset, 813 715 action, for_cow); 814 - if (!need_ref_seq(for_cow, ref_root) && 815 - waitqueue_active(&fs_info->tree_mod_seq_wait)) 816 - wake_up(&fs_info->tree_mod_seq_wait); 817 716 spin_unlock(&delayed_refs->lock); 818 717 if (need_ref_seq(for_cow, ref_root)) 819 718 btrfs_qgroup_record_ref(trans, &ref->node, extent_op); ··· 839 744 num_bytes, BTRFS_UPDATE_DELAYED_HEAD, 840 745 extent_op->is_data); 841 746 842 - if (waitqueue_active(&fs_info->tree_mod_seq_wait)) 843 - wake_up(&fs_info->tree_mod_seq_wait); 844 747 spin_unlock(&delayed_refs->lock); 845 748 return 0; 846 749 }
+4
fs/btrfs/delayed-ref.h
··· 167 167 struct btrfs_trans_handle *trans, 168 168 u64 bytenr, u64 num_bytes, 169 169 struct btrfs_delayed_extent_op *extent_op); 170 + void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, 171 + struct btrfs_fs_info *fs_info, 172 + struct btrfs_delayed_ref_root *delayed_refs, 173 + struct btrfs_delayed_ref_head *head); 170 174 171 175 struct btrfs_delayed_ref_head * 172 176 btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr);
+14 -39
fs/btrfs/disk-io.c
··· 377 377 ret = read_extent_buffer_pages(io_tree, eb, start, 378 378 WAIT_COMPLETE, 379 379 btree_get_extent, mirror_num); 380 - if (!ret && !verify_parent_transid(io_tree, eb, 380 + if (!ret) { 381 + if (!verify_parent_transid(io_tree, eb, 381 382 parent_transid, 0)) 382 - break; 383 + break; 384 + else 385 + ret = -EIO; 386 + } 383 387 384 388 /* 385 389 * This buffer's crc is fine, but its contents are corrupted, so ··· 758 754 limit = btrfs_async_submit_limit(fs_info); 759 755 limit = limit * 2 / 3; 760 756 761 - atomic_dec(&fs_info->nr_async_submits); 762 - 763 - if (atomic_read(&fs_info->nr_async_submits) < limit && 757 + if (atomic_dec_return(&fs_info->nr_async_submits) < limit && 764 758 waitqueue_active(&fs_info->async_submit_wait)) 765 759 wake_up(&fs_info->async_submit_wait); 766 760 ··· 2034 2032 fs_info->free_chunk_space = 0; 2035 2033 fs_info->tree_mod_log = RB_ROOT; 2036 2034 2037 - init_waitqueue_head(&fs_info->tree_mod_seq_wait); 2038 - 2039 2035 /* readahead state */ 2040 2036 INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); 2041 2037 spin_lock_init(&fs_info->reada_lock); ··· 2528 2528 goto fail_trans_kthread; 2529 2529 2530 2530 /* do not make disk changes in broken FS */ 2531 - if (btrfs_super_log_root(disk_super) != 0 && 2532 - !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) { 2531 + if (btrfs_super_log_root(disk_super) != 0) { 2533 2532 u64 bytenr = btrfs_super_log_root(disk_super); 2534 2533 2535 2534 if (fs_devices->rw_devices == 0) { ··· 3188 3189 /* clear out the rbtree of defraggable inodes */ 3189 3190 btrfs_run_defrag_inodes(fs_info); 3190 3191 3191 - /* 3192 - * Here come 2 situations when btrfs is broken to flip readonly: 3193 - * 3194 - * 1. when btrfs flips readonly somewhere else before 3195 - * btrfs_commit_super, sb->s_flags has MS_RDONLY flag, 3196 - * and btrfs will skip to write sb directly to keep 3197 - * ERROR state on disk. 3198 - * 3199 - * 2. when btrfs flips readonly just in btrfs_commit_super, 3200 - * and in such case, btrfs cannot write sb via btrfs_commit_super, 3201 - * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag, 3202 - * btrfs will cleanup all FS resources first and write sb then. 3203 - */ 3204 3192 if (!(fs_info->sb->s_flags & MS_RDONLY)) { 3205 3193 ret = btrfs_commit_super(root); 3206 3194 if (ret) 3207 3195 printk(KERN_ERR "btrfs: commit super ret %d\n", ret); 3208 3196 } 3209 3197 3210 - if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { 3211 - ret = btrfs_error_commit_super(root); 3212 - if (ret) 3213 - printk(KERN_ERR "btrfs: commit super ret %d\n", ret); 3214 - } 3198 + if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) 3199 + btrfs_error_commit_super(root); 3215 3200 3216 3201 btrfs_put_block_group_cache(fs_info); 3217 3202 ··· 3417 3434 if (read_only) 3418 3435 return 0; 3419 3436 3420 - if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { 3421 - printk(KERN_WARNING "warning: mount fs with errors, " 3422 - "running btrfsck is recommended\n"); 3423 - } 3424 - 3425 3437 return 0; 3426 3438 } 3427 3439 3428 - int btrfs_error_commit_super(struct btrfs_root *root) 3440 + void btrfs_error_commit_super(struct btrfs_root *root) 3429 3441 { 3430 - int ret; 3431 - 3432 3442 mutex_lock(&root->fs_info->cleaner_mutex); 3433 3443 btrfs_run_delayed_iputs(root); 3434 3444 mutex_unlock(&root->fs_info->cleaner_mutex); ··· 3431 3455 3432 3456 /* cleanup FS via transaction */ 3433 3457 btrfs_cleanup_transaction(root); 3434 - 3435 - ret = write_ctree_super(NULL, root, 0); 3436 - 3437 - return ret; 3438 3458 } 3439 3459 3440 3460 static void btrfs_destroy_ordered_operations(struct btrfs_root *root) ··· 3754 3782 /* FIXME: cleanup wait for commit */ 3755 3783 t->in_commit = 1; 3756 3784 t->blocked = 1; 3785 + smp_mb(); 3757 3786 if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) 3758 3787 wake_up(&root->fs_info->transaction_blocked_wait); 3759 3788 3760 3789 t->blocked = 0; 3790 + smp_mb(); 3761 3791 if (waitqueue_active(&root->fs_info->transaction_wait)) 3762 3792 wake_up(&root->fs_info->transaction_wait); 3763 3793 3764 3794 t->commit_done = 1; 3795 + smp_mb(); 3765 3796 if (waitqueue_active(&t->commit_wait)) 3766 3797 wake_up(&t->commit_wait); 3767 3798
+1 -1
fs/btrfs/disk-io.h
··· 54 54 struct btrfs_root *root, int max_mirrors); 55 55 struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); 56 56 int btrfs_commit_super(struct btrfs_root *root); 57 - int btrfs_error_commit_super(struct btrfs_root *root); 57 + void btrfs_error_commit_super(struct btrfs_root *root); 58 58 struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, 59 59 u64 bytenr, u32 blocksize); 60 60 struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
+58 -65
fs/btrfs/extent-tree.c
··· 2252 2252 } 2253 2253 2254 2254 /* 2255 + * We need to try and merge add/drops of the same ref since we 2256 + * can run into issues with relocate dropping the implicit ref 2257 + * and then it being added back again before the drop can 2258 + * finish. If we merged anything we need to re-loop so we can 2259 + * get a good ref. 2260 + */ 2261 + btrfs_merge_delayed_refs(trans, fs_info, delayed_refs, 2262 + locked_ref); 2263 + 2264 + /* 2255 2265 * locked_ref is the head node, so we have to go one 2256 2266 * node back for any delayed ref updates 2257 2267 */ ··· 2328 2318 ref->in_tree = 0; 2329 2319 rb_erase(&ref->rb_node, &delayed_refs->root); 2330 2320 delayed_refs->num_entries--; 2331 - /* 2332 - * we modified num_entries, but as we're currently running 2333 - * delayed refs, skip 2334 - * wake_up(&delayed_refs->seq_wait); 2335 - * here. 2336 - */ 2321 + if (locked_ref) { 2322 + /* 2323 + * when we play the delayed ref, also correct the 2324 + * ref_mod on head 2325 + */ 2326 + switch (ref->action) { 2327 + case BTRFS_ADD_DELAYED_REF: 2328 + case BTRFS_ADD_DELAYED_EXTENT: 2329 + locked_ref->node.ref_mod -= ref->ref_mod; 2330 + break; 2331 + case BTRFS_DROP_DELAYED_REF: 2332 + locked_ref->node.ref_mod += ref->ref_mod; 2333 + break; 2334 + default: 2335 + WARN_ON(1); 2336 + } 2337 + } 2337 2338 spin_unlock(&delayed_refs->lock); 2338 2339 2339 2340 ret = run_one_delayed_ref(trans, root, ref, extent_op, ··· 2369 2348 spin_lock(&delayed_refs->lock); 2370 2349 } 2371 2350 return count; 2372 - } 2373 - 2374 - static void wait_for_more_refs(struct btrfs_fs_info *fs_info, 2375 - struct btrfs_delayed_ref_root *delayed_refs, 2376 - unsigned long num_refs, 2377 - struct list_head *first_seq) 2378 - { 2379 - spin_unlock(&delayed_refs->lock); 2380 - pr_debug("waiting for more refs (num %ld, first %p)\n", 2381 - num_refs, first_seq); 2382 - wait_event(fs_info->tree_mod_seq_wait, 2383 - num_refs != delayed_refs->num_entries || 2384 - fs_info->tree_mod_seq_list.next != first_seq); 2385 - pr_debug("done waiting for more refs (num %ld, first %p)\n", 2386 - delayed_refs->num_entries, fs_info->tree_mod_seq_list.next); 2387 - spin_lock(&delayed_refs->lock); 2388 2351 } 2389 2352 2390 2353 #ifdef SCRAMBLE_DELAYED_REFS ··· 2465 2460 struct btrfs_delayed_ref_root *delayed_refs; 2466 2461 struct btrfs_delayed_ref_node *ref; 2467 2462 struct list_head cluster; 2468 - struct list_head *first_seq = NULL; 2469 2463 int ret; 2470 2464 u64 delayed_start; 2471 2465 int run_all = count == (unsigned long)-1; 2472 2466 int run_most = 0; 2473 - unsigned long num_refs = 0; 2474 - int consider_waiting; 2467 + int loops; 2475 2468 2476 2469 /* We'll clean this up in btrfs_cleanup_transaction */ 2477 2470 if (trans->aborted) ··· 2487 2484 delayed_refs = &trans->transaction->delayed_refs; 2488 2485 INIT_LIST_HEAD(&cluster); 2489 2486 again: 2490 - consider_waiting = 0; 2487 + loops = 0; 2491 2488 spin_lock(&delayed_refs->lock); 2492 2489 2493 2490 #ifdef SCRAMBLE_DELAYED_REFS ··· 2515 2512 if (ret) 2516 2513 break; 2517 2514 2518 - if (delayed_start >= delayed_refs->run_delayed_start) { 2519 - if (consider_waiting == 0) { 2520 - /* 2521 - * btrfs_find_ref_cluster looped. let's do one 2522 - * more cycle. if we don't run any delayed ref 2523 - * during that cycle (because we can't because 2524 - * all of them are blocked) and if the number of 2525 - * refs doesn't change, we avoid busy waiting. 2526 - */ 2527 - consider_waiting = 1; 2528 - num_refs = delayed_refs->num_entries; 2529 - first_seq = root->fs_info->tree_mod_seq_list.next; 2530 - } else { 2531 - wait_for_more_refs(root->fs_info, delayed_refs, 2532 - num_refs, first_seq); 2533 - /* 2534 - * after waiting, things have changed. we 2535 - * dropped the lock and someone else might have 2536 - * run some refs, built new clusters and so on. 2537 - * therefore, we restart staleness detection. 2538 - */ 2539 - consider_waiting = 0; 2540 - } 2541 - } 2542 - 2543 2515 ret = run_clustered_refs(trans, root, &cluster); 2544 2516 if (ret < 0) { 2545 2517 spin_unlock(&delayed_refs->lock); ··· 2527 2549 if (count == 0) 2528 2550 break; 2529 2551 2530 - if (ret || delayed_refs->run_delayed_start == 0) { 2552 + if (delayed_start >= delayed_refs->run_delayed_start) { 2553 + if (loops == 0) { 2554 + /* 2555 + * btrfs_find_ref_cluster looped. let's do one 2556 + * more cycle. if we don't run any delayed ref 2557 + * during that cycle (because we can't because 2558 + * all of them are blocked), bail out. 2559 + */ 2560 + loops = 1; 2561 + } else { 2562 + /* 2563 + * no runnable refs left, stop trying 2564 + */ 2565 + BUG_ON(run_all); 2566 + break; 2567 + } 2568 + } 2569 + if (ret) { 2531 2570 /* refs were run, let's reset staleness detection */ 2532 - consider_waiting = 0; 2571 + loops = 0; 2533 2572 } 2534 2573 } 2535 2574 ··· 3002 3007 } 3003 3008 spin_unlock(&block_group->lock); 3004 3009 3005 - num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024); 3010 + /* 3011 + * Try to preallocate enough space based on how big the block group is. 3012 + * Keep in mind this has to include any pinned space which could end up 3013 + * taking up quite a bit since it's not folded into the other space 3014 + * cache. 3015 + */ 3016 + num_pages = (int)div64_u64(block_group->key.offset, 256 * 1024 * 1024); 3006 3017 if (!num_pages) 3007 3018 num_pages = 1; 3008 3019 3009 - /* 3010 - * Just to make absolutely sure we have enough space, we're going to 3011 - * preallocate 12 pages worth of space for each block group. In 3012 - * practice we ought to use at most 8, but we need extra space so we can 3013 - * add our header and have a terminator between the extents and the 3014 - * bitmaps. 3015 - */ 3016 3020 num_pages *= 16; 3017 3021 num_pages *= PAGE_CACHE_SIZE; 3018 3022 ··· 4565 4571 if (root->fs_info->quota_enabled) { 4566 4572 ret = btrfs_qgroup_reserve(root, num_bytes + 4567 4573 nr_extents * root->leafsize); 4568 - if (ret) 4574 + if (ret) { 4575 + mutex_unlock(&BTRFS_I(inode)->delalloc_mutex); 4569 4576 return ret; 4577 + } 4570 4578 } 4571 4579 4572 4580 ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); ··· 5290 5294 rb_erase(&head->node.rb_node, &delayed_refs->root); 5291 5295 5292 5296 delayed_refs->num_entries--; 5293 - smp_mb(); 5294 - if (waitqueue_active(&root->fs_info->tree_mod_seq_wait)) 5295 - wake_up(&root->fs_info->tree_mod_seq_wait); 5296 5297 5297 5298 /* 5298 5299 * we don't take a ref on the node because we're removing it from the
+2 -15
fs/btrfs/extent_io.c
··· 2330 2330 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { 2331 2331 ret = tree->ops->readpage_end_io_hook(page, start, end, 2332 2332 state, mirror); 2333 - if (ret) { 2334 - /* no IO indicated but software detected errors 2335 - * in the block, either checksum errors or 2336 - * issues with the contents */ 2337 - struct btrfs_root *root = 2338 - BTRFS_I(page->mapping->host)->root; 2339 - struct btrfs_device *device; 2340 - 2333 + if (ret) 2341 2334 uptodate = 0; 2342 - device = btrfs_find_device_for_logical( 2343 - root, start, mirror); 2344 - if (device) 2345 - btrfs_dev_stat_inc_and_print(device, 2346 - BTRFS_DEV_STAT_CORRUPTION_ERRS); 2347 - } else { 2335 + else 2348 2336 clean_io_failure(start, page); 2349 - } 2350 2337 } 2351 2338 2352 2339 if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) {
+2 -2
fs/btrfs/file-item.c
··· 272 272 } 273 273 274 274 int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, 275 - struct bio *bio, u64 offset, u32 *dst) 275 + struct bio *bio, u64 offset) 276 276 { 277 - return __btrfs_lookup_bio_sums(root, inode, bio, offset, dst, 1); 277 + return __btrfs_lookup_bio_sums(root, inode, bio, offset, NULL, 1); 278 278 } 279 279 280 280 int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
+164 -162
fs/btrfs/inode.c
··· 1008 1008 nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >> 1009 1009 PAGE_CACHE_SHIFT; 1010 1010 1011 - atomic_sub(nr_pages, &root->fs_info->async_delalloc_pages); 1012 - 1013 - if (atomic_read(&root->fs_info->async_delalloc_pages) < 1011 + if (atomic_sub_return(nr_pages, &root->fs_info->async_delalloc_pages) < 1014 1012 5 * 1024 * 1024 && 1015 1013 waitqueue_active(&root->fs_info->async_submit_wait)) 1016 1014 wake_up(&root->fs_info->async_submit_wait); ··· 1883 1885 trans = btrfs_join_transaction_nolock(root); 1884 1886 else 1885 1887 trans = btrfs_join_transaction(root); 1886 - if (IS_ERR(trans)) 1887 - return PTR_ERR(trans); 1888 + if (IS_ERR(trans)) { 1889 + ret = PTR_ERR(trans); 1890 + trans = NULL; 1891 + goto out; 1892 + } 1888 1893 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1889 1894 ret = btrfs_update_inode_fallback(trans, root, inode); 1890 1895 if (ret) /* -ENOMEM or corruption */ ··· 3175 3174 btrfs_i_size_write(dir, dir->i_size - name_len * 2); 3176 3175 inode_inc_iversion(dir); 3177 3176 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 3178 - ret = btrfs_update_inode(trans, root, dir); 3177 + ret = btrfs_update_inode_fallback(trans, root, dir); 3179 3178 if (ret) 3180 3179 btrfs_abort_transaction(trans, root, ret); 3181 3180 out: ··· 5775 5774 return ret; 5776 5775 } 5777 5776 5777 + static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend, 5778 + struct extent_state **cached_state, int writing) 5779 + { 5780 + struct btrfs_ordered_extent *ordered; 5781 + int ret = 0; 5782 + 5783 + while (1) { 5784 + lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 5785 + 0, cached_state); 5786 + /* 5787 + * We're concerned with the entire range that we're going to be 5788 + * doing DIO to, so we need to make sure theres no ordered 5789 + * extents in this range. 5790 + */ 5791 + ordered = btrfs_lookup_ordered_range(inode, lockstart, 5792 + lockend - lockstart + 1); 5793 + 5794 + /* 5795 + * We need to make sure there are no buffered pages in this 5796 + * range either, we could have raced between the invalidate in 5797 + * generic_file_direct_write and locking the extent. The 5798 + * invalidate needs to happen so that reads after a write do not 5799 + * get stale data. 5800 + */ 5801 + if (!ordered && (!writing || 5802 + !test_range_bit(&BTRFS_I(inode)->io_tree, 5803 + lockstart, lockend, EXTENT_UPTODATE, 0, 5804 + *cached_state))) 5805 + break; 5806 + 5807 + unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, 5808 + cached_state, GFP_NOFS); 5809 + 5810 + if (ordered) { 5811 + btrfs_start_ordered_extent(inode, ordered, 1); 5812 + btrfs_put_ordered_extent(ordered); 5813 + } else { 5814 + /* Screw you mmap */ 5815 + ret = filemap_write_and_wait_range(inode->i_mapping, 5816 + lockstart, 5817 + lockend); 5818 + if (ret) 5819 + break; 5820 + 5821 + /* 5822 + * If we found a page that couldn't be invalidated just 5823 + * fall back to buffered. 5824 + */ 5825 + ret = invalidate_inode_pages2_range(inode->i_mapping, 5826 + lockstart >> PAGE_CACHE_SHIFT, 5827 + lockend >> PAGE_CACHE_SHIFT); 5828 + if (ret) 5829 + break; 5830 + } 5831 + 5832 + cond_resched(); 5833 + } 5834 + 5835 + return ret; 5836 + } 5837 + 5778 5838 static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, 5779 5839 struct buffer_head *bh_result, int create) 5780 5840 { 5781 5841 struct extent_map *em; 5782 5842 struct btrfs_root *root = BTRFS_I(inode)->root; 5843 + struct extent_state *cached_state = NULL; 5783 5844 u64 start = iblock << inode->i_blkbits; 5845 + u64 lockstart, lockend; 5784 5846 u64 len = bh_result->b_size; 5785 5847 struct btrfs_trans_handle *trans; 5848 + int unlock_bits = EXTENT_LOCKED; 5849 + int ret; 5850 + 5851 + if (create) { 5852 + ret = btrfs_delalloc_reserve_space(inode, len); 5853 + if (ret) 5854 + return ret; 5855 + unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY; 5856 + } else { 5857 + len = min_t(u64, len, root->sectorsize); 5858 + } 5859 + 5860 + lockstart = start; 5861 + lockend = start + len - 1; 5862 + 5863 + /* 5864 + * If this errors out it's because we couldn't invalidate pagecache for 5865 + * this range and we need to fallback to buffered. 5866 + */ 5867 + if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create)) 5868 + return -ENOTBLK; 5869 + 5870 + if (create) { 5871 + ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, 5872 + lockend, EXTENT_DELALLOC, NULL, 5873 + &cached_state, GFP_NOFS); 5874 + if (ret) 5875 + goto unlock_err; 5876 + } 5786 5877 5787 5878 em = btrfs_get_extent(inode, NULL, 0, start, len, 0); 5788 - if (IS_ERR(em)) 5789 - return PTR_ERR(em); 5879 + if (IS_ERR(em)) { 5880 + ret = PTR_ERR(em); 5881 + goto unlock_err; 5882 + } 5790 5883 5791 5884 /* 5792 5885 * Ok for INLINE and COMPRESSED extents we need to fallback on buffered ··· 5899 5804 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) || 5900 5805 em->block_start == EXTENT_MAP_INLINE) { 5901 5806 free_extent_map(em); 5902 - return -ENOTBLK; 5807 + ret = -ENOTBLK; 5808 + goto unlock_err; 5903 5809 } 5904 5810 5905 5811 /* Just a good old fashioned hole, return */ 5906 5812 if (!create && (em->block_start == EXTENT_MAP_HOLE || 5907 5813 test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { 5908 5814 free_extent_map(em); 5909 - /* DIO will do one hole at a time, so just unlock a sector */ 5910 - unlock_extent(&BTRFS_I(inode)->io_tree, start, 5911 - start + root->sectorsize - 1); 5912 - return 0; 5815 + ret = 0; 5816 + goto unlock_err; 5913 5817 } 5914 5818 5915 5819 /* ··· 5921 5827 * 5922 5828 */ 5923 5829 if (!create) { 5924 - len = em->len - (start - em->start); 5925 - goto map; 5830 + len = min(len, em->len - (start - em->start)); 5831 + lockstart = start + len; 5832 + goto unlock; 5926 5833 } 5927 5834 5928 5835 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) || ··· 5955 5860 btrfs_end_transaction(trans, root); 5956 5861 if (ret) { 5957 5862 free_extent_map(em); 5958 - return ret; 5863 + goto unlock_err; 5959 5864 } 5960 5865 goto unlock; 5961 5866 } ··· 5968 5873 */ 5969 5874 len = bh_result->b_size; 5970 5875 em = btrfs_new_extent_direct(inode, em, start, len); 5971 - if (IS_ERR(em)) 5972 - return PTR_ERR(em); 5876 + if (IS_ERR(em)) { 5877 + ret = PTR_ERR(em); 5878 + goto unlock_err; 5879 + } 5973 5880 len = min(len, em->len - (start - em->start)); 5974 5881 unlock: 5975 - clear_extent_bit(&BTRFS_I(inode)->io_tree, start, start + len - 1, 5976 - EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DIRTY, 1, 5977 - 0, NULL, GFP_NOFS); 5978 - map: 5979 5882 bh_result->b_blocknr = (em->block_start + (start - em->start)) >> 5980 5883 inode->i_blkbits; 5981 5884 bh_result->b_size = len; ··· 5991 5898 i_size_write(inode, start + len); 5992 5899 } 5993 5900 5901 + /* 5902 + * In the case of write we need to clear and unlock the entire range, 5903 + * in the case of read we need to unlock only the end area that we 5904 + * aren't using if there is any left over space. 5905 + */ 5906 + if (lockstart < lockend) { 5907 + if (create && len < lockend - lockstart) { 5908 + clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, 5909 + lockstart + len - 1, unlock_bits, 1, 0, 5910 + &cached_state, GFP_NOFS); 5911 + /* 5912 + * Beside unlock, we also need to cleanup reserved space 5913 + * for the left range by attaching EXTENT_DO_ACCOUNTING. 5914 + */ 5915 + clear_extent_bit(&BTRFS_I(inode)->io_tree, 5916 + lockstart + len, lockend, 5917 + unlock_bits | EXTENT_DO_ACCOUNTING, 5918 + 1, 0, NULL, GFP_NOFS); 5919 + } else { 5920 + clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, 5921 + lockend, unlock_bits, 1, 0, 5922 + &cached_state, GFP_NOFS); 5923 + } 5924 + } else { 5925 + free_extent_state(cached_state); 5926 + } 5927 + 5994 5928 free_extent_map(em); 5995 5929 5996 5930 return 0; 5931 + 5932 + unlock_err: 5933 + if (create) 5934 + unlock_bits |= EXTENT_DO_ACCOUNTING; 5935 + 5936 + clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, 5937 + unlock_bits, 1, 0, &cached_state, GFP_NOFS); 5938 + return ret; 5997 5939 } 5998 5940 5999 5941 struct btrfs_dio_private { ··· 6036 5908 u64 logical_offset; 6037 5909 u64 disk_bytenr; 6038 5910 u64 bytes; 6039 - u32 *csums; 6040 5911 void *private; 6041 5912 6042 5913 /* number of bios pending for this dio */ ··· 6055 5928 struct inode *inode = dip->inode; 6056 5929 struct btrfs_root *root = BTRFS_I(inode)->root; 6057 5930 u64 start; 6058 - u32 *private = dip->csums; 6059 5931 6060 5932 start = dip->logical_offset; 6061 5933 do { ··· 6062 5936 struct page *page = bvec->bv_page; 6063 5937 char *kaddr; 6064 5938 u32 csum = ~(u32)0; 5939 + u64 private = ~(u32)0; 6065 5940 unsigned long flags; 6066 5941 5942 + if (get_state_private(&BTRFS_I(inode)->io_tree, 5943 + start, &private)) 5944 + goto failed; 6067 5945 local_irq_save(flags); 6068 5946 kaddr = kmap_atomic(page); 6069 5947 csum = btrfs_csum_data(root, kaddr + bvec->bv_offset, ··· 6077 5947 local_irq_restore(flags); 6078 5948 6079 5949 flush_dcache_page(bvec->bv_page); 6080 - if (csum != *private) { 5950 + if (csum != private) { 5951 + failed: 6081 5952 printk(KERN_ERR "btrfs csum failed ino %llu off" 6082 5953 " %llu csum %u private %u\n", 6083 5954 (unsigned long long)btrfs_ino(inode), 6084 5955 (unsigned long long)start, 6085 - csum, *private); 5956 + csum, (unsigned)private); 6086 5957 err = -EIO; 6087 5958 } 6088 5959 } 6089 5960 6090 5961 start += bvec->bv_len; 6091 - private++; 6092 5962 bvec++; 6093 5963 } while (bvec <= bvec_end); 6094 5964 ··· 6096 5966 dip->logical_offset + dip->bytes - 1); 6097 5967 bio->bi_private = dip->private; 6098 5968 6099 - kfree(dip->csums); 6100 5969 kfree(dip); 6101 5970 6102 5971 /* If we had a csum failure make sure to clear the uptodate flag */ ··· 6201 6072 6202 6073 static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, 6203 6074 int rw, u64 file_offset, int skip_sum, 6204 - u32 *csums, int async_submit) 6075 + int async_submit) 6205 6076 { 6206 6077 int write = rw & REQ_WRITE; 6207 6078 struct btrfs_root *root = BTRFS_I(inode)->root; ··· 6234 6105 if (ret) 6235 6106 goto err; 6236 6107 } else if (!skip_sum) { 6237 - ret = btrfs_lookup_bio_sums_dio(root, inode, bio, 6238 - file_offset, csums); 6108 + ret = btrfs_lookup_bio_sums_dio(root, inode, bio, file_offset); 6239 6109 if (ret) 6240 6110 goto err; 6241 6111 } ··· 6260 6132 u64 submit_len = 0; 6261 6133 u64 map_length; 6262 6134 int nr_pages = 0; 6263 - u32 *csums = dip->csums; 6264 6135 int ret = 0; 6265 6136 int async_submit = 0; 6266 - int write = rw & REQ_WRITE; 6267 6137 6268 6138 map_length = orig_bio->bi_size; 6269 6139 ret = btrfs_map_block(map_tree, READ, start_sector << 9, ··· 6297 6171 atomic_inc(&dip->pending_bios); 6298 6172 ret = __btrfs_submit_dio_bio(bio, inode, rw, 6299 6173 file_offset, skip_sum, 6300 - csums, async_submit); 6174 + async_submit); 6301 6175 if (ret) { 6302 6176 bio_put(bio); 6303 6177 atomic_dec(&dip->pending_bios); 6304 6178 goto out_err; 6305 6179 } 6306 6180 6307 - /* Write's use the ordered csums */ 6308 - if (!write && !skip_sum) 6309 - csums = csums + nr_pages; 6310 6181 start_sector += submit_len >> 9; 6311 6182 file_offset += submit_len; 6312 6183 ··· 6333 6210 6334 6211 submit: 6335 6212 ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, 6336 - csums, async_submit); 6213 + async_submit); 6337 6214 if (!ret) 6338 6215 return 0; 6339 6216 ··· 6368 6245 if (!dip) { 6369 6246 ret = -ENOMEM; 6370 6247 goto free_ordered; 6371 - } 6372 - dip->csums = NULL; 6373 - 6374 - /* Write's use the ordered csum stuff, so we don't need dip->csums */ 6375 - if (!write && !skip_sum) { 6376 - dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS); 6377 - if (!dip->csums) { 6378 - kfree(dip); 6379 - ret = -ENOMEM; 6380 - goto free_ordered; 6381 - } 6382 6248 } 6383 6249 6384 6250 dip->private = bio->bi_private; ··· 6453 6341 out: 6454 6342 return retval; 6455 6343 } 6344 + 6456 6345 static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, 6457 6346 const struct iovec *iov, loff_t offset, 6458 6347 unsigned long nr_segs) 6459 6348 { 6460 6349 struct file *file = iocb->ki_filp; 6461 6350 struct inode *inode = file->f_mapping->host; 6462 - struct btrfs_ordered_extent *ordered; 6463 - struct extent_state *cached_state = NULL; 6464 - u64 lockstart, lockend; 6465 - ssize_t ret; 6466 - int writing = rw & WRITE; 6467 - int write_bits = 0; 6468 - size_t count = iov_length(iov, nr_segs); 6469 6351 6470 6352 if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, 6471 - offset, nr_segs)) { 6353 + offset, nr_segs)) 6472 6354 return 0; 6473 - } 6474 6355 6475 - lockstart = offset; 6476 - lockend = offset + count - 1; 6477 - 6478 - if (writing) { 6479 - ret = btrfs_delalloc_reserve_space(inode, count); 6480 - if (ret) 6481 - goto out; 6482 - } 6483 - 6484 - while (1) { 6485 - lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 6486 - 0, &cached_state); 6487 - /* 6488 - * We're concerned with the entire range that we're going to be 6489 - * doing DIO to, so we need to make sure theres no ordered 6490 - * extents in this range. 6491 - */ 6492 - ordered = btrfs_lookup_ordered_range(inode, lockstart, 6493 - lockend - lockstart + 1); 6494 - 6495 - /* 6496 - * We need to make sure there are no buffered pages in this 6497 - * range either, we could have raced between the invalidate in 6498 - * generic_file_direct_write and locking the extent. The 6499 - * invalidate needs to happen so that reads after a write do not 6500 - * get stale data. 6501 - */ 6502 - if (!ordered && (!writing || 6503 - !test_range_bit(&BTRFS_I(inode)->io_tree, 6504 - lockstart, lockend, EXTENT_UPTODATE, 0, 6505 - cached_state))) 6506 - break; 6507 - 6508 - unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, 6509 - &cached_state, GFP_NOFS); 6510 - 6511 - if (ordered) { 6512 - btrfs_start_ordered_extent(inode, ordered, 1); 6513 - btrfs_put_ordered_extent(ordered); 6514 - } else { 6515 - /* Screw you mmap */ 6516 - ret = filemap_write_and_wait_range(file->f_mapping, 6517 - lockstart, 6518 - lockend); 6519 - if (ret) 6520 - goto out; 6521 - 6522 - /* 6523 - * If we found a page that couldn't be invalidated just 6524 - * fall back to buffered. 6525 - */ 6526 - ret = invalidate_inode_pages2_range(file->f_mapping, 6527 - lockstart >> PAGE_CACHE_SHIFT, 6528 - lockend >> PAGE_CACHE_SHIFT); 6529 - if (ret) { 6530 - if (ret == -EBUSY) 6531 - ret = 0; 6532 - goto out; 6533 - } 6534 - } 6535 - 6536 - cond_resched(); 6537 - } 6538 - 6539 - /* 6540 - * we don't use btrfs_set_extent_delalloc because we don't want 6541 - * the dirty or uptodate bits 6542 - */ 6543 - if (writing) { 6544 - write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING; 6545 - ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, 6546 - EXTENT_DELALLOC, NULL, &cached_state, 6547 - GFP_NOFS); 6548 - if (ret) { 6549 - clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, 6550 - lockend, EXTENT_LOCKED | write_bits, 6551 - 1, 0, &cached_state, GFP_NOFS); 6552 - goto out; 6553 - } 6554 - } 6555 - 6556 - free_extent_state(cached_state); 6557 - cached_state = NULL; 6558 - 6559 - ret = __blockdev_direct_IO(rw, iocb, inode, 6356 + return __blockdev_direct_IO(rw, iocb, inode, 6560 6357 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, 6561 6358 iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, 6562 6359 btrfs_submit_direct, 0); 6563 - 6564 - if (ret < 0 && ret != -EIOCBQUEUED) { 6565 - clear_extent_bit(&BTRFS_I(inode)->io_tree, offset, 6566 - offset + iov_length(iov, nr_segs) - 1, 6567 - EXTENT_LOCKED | write_bits, 1, 0, 6568 - &cached_state, GFP_NOFS); 6569 - } else if (ret >= 0 && ret < iov_length(iov, nr_segs)) { 6570 - /* 6571 - * We're falling back to buffered, unlock the section we didn't 6572 - * do IO on. 6573 - */ 6574 - clear_extent_bit(&BTRFS_I(inode)->io_tree, offset + ret, 6575 - offset + iov_length(iov, nr_segs) - 1, 6576 - EXTENT_LOCKED | write_bits, 1, 0, 6577 - &cached_state, GFP_NOFS); 6578 - } 6579 - out: 6580 - free_extent_state(cached_state); 6581 - return ret; 6582 6360 } 6583 6361 6584 6362 static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+1 -1
fs/btrfs/ioctl.c
··· 424 424 uuid_le_gen(&new_uuid); 425 425 memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE); 426 426 root_item.otime.sec = cpu_to_le64(cur_time.tv_sec); 427 - root_item.otime.nsec = cpu_to_le64(cur_time.tv_nsec); 427 + root_item.otime.nsec = cpu_to_le32(cur_time.tv_nsec); 428 428 root_item.ctime = root_item.otime; 429 429 btrfs_set_root_ctransid(&root_item, trans->transid); 430 430 btrfs_set_root_otransid(&root_item, trans->transid);
+1 -1
fs/btrfs/locking.c
··· 67 67 { 68 68 if (eb->lock_nested) { 69 69 read_lock(&eb->lock); 70 - if (&eb->lock_nested && current->pid == eb->lock_owner) { 70 + if (eb->lock_nested && current->pid == eb->lock_owner) { 71 71 read_unlock(&eb->lock); 72 72 return; 73 73 }
+9 -3
fs/btrfs/qgroup.c
··· 1364 1364 spin_lock(&fs_info->qgroup_lock); 1365 1365 1366 1366 dstgroup = add_qgroup_rb(fs_info, objectid); 1367 - if (!dstgroup) 1367 + if (IS_ERR(dstgroup)) { 1368 + ret = PTR_ERR(dstgroup); 1368 1369 goto unlock; 1370 + } 1369 1371 1370 1372 if (srcid) { 1371 1373 srcgroup = find_qgroup_rb(fs_info, srcid); 1372 - if (!srcgroup) 1374 + if (!srcgroup) { 1375 + ret = -EINVAL; 1373 1376 goto unlock; 1377 + } 1374 1378 dstgroup->rfer = srcgroup->rfer - level_size; 1375 1379 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size; 1376 1380 srcgroup->excl = level_size; ··· 1383 1379 qgroup_dirty(fs_info, srcgroup); 1384 1380 } 1385 1381 1386 - if (!inherit) 1382 + if (!inherit) { 1383 + ret = -EINVAL; 1387 1384 goto unlock; 1385 + } 1388 1386 1389 1387 i_qgroups = (u64 *)(inherit + 1); 1390 1388 for (i = 0; i < inherit->num_qgroups; ++i) {
+2 -2
fs/btrfs/root-tree.c
··· 544 544 struct timespec ct = CURRENT_TIME; 545 545 546 546 spin_lock(&root->root_times_lock); 547 - item->ctransid = trans->transid; 547 + item->ctransid = cpu_to_le64(trans->transid); 548 548 item->ctime.sec = cpu_to_le64(ct.tv_sec); 549 - item->ctime.nsec = cpu_to_le64(ct.tv_nsec); 549 + item->ctime.nsec = cpu_to_le32(ct.tv_nsec); 550 550 spin_unlock(&root->root_times_lock); 551 551 }
+11 -4
fs/btrfs/super.c
··· 838 838 struct btrfs_trans_handle *trans; 839 839 struct btrfs_fs_info *fs_info = btrfs_sb(sb); 840 840 struct btrfs_root *root = fs_info->tree_root; 841 - int ret; 842 841 843 842 trace_btrfs_sync_fs(wait); 844 843 ··· 848 849 849 850 btrfs_wait_ordered_extents(root, 0, 0); 850 851 851 - trans = btrfs_start_transaction(root, 0); 852 + spin_lock(&fs_info->trans_lock); 853 + if (!fs_info->running_transaction) { 854 + spin_unlock(&fs_info->trans_lock); 855 + return 0; 856 + } 857 + spin_unlock(&fs_info->trans_lock); 858 + 859 + trans = btrfs_join_transaction(root); 852 860 if (IS_ERR(trans)) 853 861 return PTR_ERR(trans); 854 - ret = btrfs_commit_transaction(trans, root); 855 - return ret; 862 + return btrfs_commit_transaction(trans, root); 856 863 } 857 864 858 865 static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) ··· 1535 1530 while (cur_devices) { 1536 1531 head = &cur_devices->devices; 1537 1532 list_for_each_entry(dev, head, dev_list) { 1533 + if (dev->missing) 1534 + continue; 1538 1535 if (!first_dev || dev->devid < first_dev->devid) 1539 1536 first_dev = dev; 1540 1537 }
+2 -1
fs/btrfs/transaction.c
··· 1031 1031 1032 1032 btrfs_i_size_write(parent_inode, parent_inode->i_size + 1033 1033 dentry->d_name.len * 2); 1034 + parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; 1034 1035 ret = btrfs_update_inode(trans, parent_root, parent_inode); 1035 1036 if (ret) 1036 1037 goto abort_trans_dput; ··· 1067 1066 memcpy(new_root_item->parent_uuid, root->root_item.uuid, 1068 1067 BTRFS_UUID_SIZE); 1069 1068 new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec); 1070 - new_root_item->otime.nsec = cpu_to_le64(cur_time.tv_nsec); 1069 + new_root_item->otime.nsec = cpu_to_le32(cur_time.tv_nsec); 1071 1070 btrfs_set_root_otransid(new_root_item, trans->transid); 1072 1071 memset(&new_root_item->stime, 0, sizeof(new_root_item->stime)); 1073 1072 memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime));
+6 -27
fs/btrfs/volumes.c
··· 227 227 cur = pending; 228 228 pending = pending->bi_next; 229 229 cur->bi_next = NULL; 230 - atomic_dec(&fs_info->nr_async_bios); 231 230 232 - if (atomic_read(&fs_info->nr_async_bios) < limit && 231 + if (atomic_dec_return(&fs_info->nr_async_bios) < limit && 233 232 waitqueue_active(&fs_info->async_submit_wait)) 234 233 wake_up(&fs_info->async_submit_wait); 235 234 ··· 568 569 memcpy(new_device, device, sizeof(*new_device)); 569 570 570 571 /* Safe because we are under uuid_mutex */ 571 - name = rcu_string_strdup(device->name->str, GFP_NOFS); 572 - BUG_ON(device->name && !name); /* -ENOMEM */ 573 - rcu_assign_pointer(new_device->name, name); 572 + if (device->name) { 573 + name = rcu_string_strdup(device->name->str, GFP_NOFS); 574 + BUG_ON(device->name && !name); /* -ENOMEM */ 575 + rcu_assign_pointer(new_device->name, name); 576 + } 574 577 new_device->bdev = NULL; 575 578 new_device->writeable = 0; 576 579 new_device->in_fs_metadata = 0; ··· 4604 4603 } 4605 4604 free_extent_buffer(sb); 4606 4605 return ret; 4607 - } 4608 - 4609 - struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root, 4610 - u64 logical, int mirror_num) 4611 - { 4612 - struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; 4613 - int ret; 4614 - u64 map_length = 0; 4615 - struct btrfs_bio *bbio = NULL; 4616 - struct btrfs_device *device; 4617 - 4618 - BUG_ON(mirror_num == 0); 4619 - ret = btrfs_map_block(map_tree, WRITE, logical, &map_length, &bbio, 4620 - mirror_num); 4621 - if (ret) { 4622 - BUG_ON(bbio != NULL); 4623 - return NULL; 4624 - } 4625 - BUG_ON(mirror_num != bbio->mirror_num); 4626 - device = bbio->stripes[mirror_num - 1].dev; 4627 - kfree(bbio); 4628 - return device; 4629 4606 } 4630 4607 4631 4608 int btrfs_read_chunk_tree(struct btrfs_root *root)
-2
fs/btrfs/volumes.h
··· 289 289 int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); 290 290 int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, 291 291 u64 *start, u64 *max_avail); 292 - struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root, 293 - u64 logical, int mirror_num); 294 292 void btrfs_dev_stat_print_on_error(struct btrfs_device *device); 295 293 void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index); 296 294 int btrfs_get_dev_stats(struct btrfs_root *root,