Merge branch 'for-linus2' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

+8 -6

fs/btrfs/backref.c

··· 276 276 } 277 277 if (ret > 0) 278 278 goto next; 279 - ret = ulist_add_merge(parents, eb->start, 280 - (uintptr_t)eie, 281 - (u64 *)&old, GFP_NOFS); 279 + ret = ulist_add_merge_ptr(parents, eb->start, 280 + eie, (void **)&old, GFP_NOFS); 282 281 if (ret < 0) 283 282 break; 284 283 if (!ret && extent_item_pos) { ··· 1000 1001 ret = -EIO; 1001 1002 goto out; 1002 1003 } 1004 + btrfs_tree_read_lock(eb); 1005 + btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); 1003 1006 ret = find_extent_in_eb(eb, bytenr, 1004 1007 *extent_item_pos, &eie); 1008 + btrfs_tree_read_unlock_blocking(eb); 1005 1009 free_extent_buffer(eb); 1006 1010 if (ret < 0) 1007 1011 goto out; 1008 1012 ref->inode_list = eie; 1009 1013 } 1010 - ret = ulist_add_merge(refs, ref->parent, 1011 - (uintptr_t)ref->inode_list, 1012 - (u64 *)&eie, GFP_NOFS); 1014 + ret = ulist_add_merge_ptr(refs, ref->parent, 1015 + ref->inode_list, 1016 + (void **)&eie, GFP_NOFS); 1013 1017 if (ret < 0) 1014 1018 goto out; 1015 1019 if (!ret && extent_item_pos) {

-6

fs/btrfs/btrfs_inode.h

··· 84 84 */ 85 85 struct list_head delalloc_inodes; 86 86 87 - /* 88 - * list for tracking inodes that must be sent to disk before a 89 - * rename or truncate commit 90 - */ 91 - struct list_head ordered_operations; 92 - 93 87 /* node for the red-black tree that links inodes in subvolume root */ 94 88 struct rb_node rb_node; 95 89

+10 -10

fs/btrfs/ctree.c

··· 280 280 281 281 WARN_ON(btrfs_header_generation(buf) > trans->transid); 282 282 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) 283 - ret = btrfs_inc_ref(trans, root, cow, 1, 1); 283 + ret = btrfs_inc_ref(trans, root, cow, 1); 284 284 else 285 - ret = btrfs_inc_ref(trans, root, cow, 0, 1); 285 + ret = btrfs_inc_ref(trans, root, cow, 0); 286 286 287 287 if (ret) 288 288 return ret; ··· 1035 1035 if ((owner == root->root_key.objectid || 1036 1036 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && 1037 1037 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) { 1038 - ret = btrfs_inc_ref(trans, root, buf, 1, 1); 1038 + ret = btrfs_inc_ref(trans, root, buf, 1); 1039 1039 BUG_ON(ret); /* -ENOMEM */ 1040 1040 1041 1041 if (root->root_key.objectid == 1042 1042 BTRFS_TREE_RELOC_OBJECTID) { 1043 - ret = btrfs_dec_ref(trans, root, buf, 0, 1); 1043 + ret = btrfs_dec_ref(trans, root, buf, 0); 1044 1044 BUG_ON(ret); /* -ENOMEM */ 1045 - ret = btrfs_inc_ref(trans, root, cow, 1, 1); 1045 + ret = btrfs_inc_ref(trans, root, cow, 1); 1046 1046 BUG_ON(ret); /* -ENOMEM */ 1047 1047 } 1048 1048 new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; ··· 1050 1050 1051 1051 if (root->root_key.objectid == 1052 1052 BTRFS_TREE_RELOC_OBJECTID) 1053 - ret = btrfs_inc_ref(trans, root, cow, 1, 1); 1053 + ret = btrfs_inc_ref(trans, root, cow, 1); 1054 1054 else 1055 - ret = btrfs_inc_ref(trans, root, cow, 0, 1); 1055 + ret = btrfs_inc_ref(trans, root, cow, 0); 1056 1056 BUG_ON(ret); /* -ENOMEM */ 1057 1057 } 1058 1058 if (new_flags != 0) { ··· 1069 1069 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { 1070 1070 if (root->root_key.objectid == 1071 1071 BTRFS_TREE_RELOC_OBJECTID) 1072 - ret = btrfs_inc_ref(trans, root, cow, 1, 1); 1072 + ret = btrfs_inc_ref(trans, root, cow, 1); 1073 1073 else 1074 - ret = btrfs_inc_ref(trans, root, cow, 0, 1); 1074 + ret = btrfs_inc_ref(trans, root, cow, 0); 1075 1075 BUG_ON(ret); /* -ENOMEM */ 1076 - ret = btrfs_dec_ref(trans, root, buf, 1, 1); 1076 + ret = btrfs_dec_ref(trans, root, buf, 1); 1077 1077 BUG_ON(ret); /* -ENOMEM */ 1078 1078 } 1079 1079 clean_tree_block(trans, root, buf);

+2 -2

fs/btrfs/ctree.h

··· 3326 3326 u64 min_alloc_size, u64 empty_size, u64 hint_byte, 3327 3327 struct btrfs_key *ins, int is_data, int delalloc); 3328 3328 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3329 - struct extent_buffer *buf, int full_backref, int no_quota); 3329 + struct extent_buffer *buf, int full_backref); 3330 3330 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3331 - struct extent_buffer *buf, int full_backref, int no_quota); 3331 + struct extent_buffer *buf, int full_backref); 3332 3332 int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, 3333 3333 struct btrfs_root *root, 3334 3334 u64 bytenr, u64 num_bytes, u64 flags,

-32

fs/btrfs/disk-io.c

··· 60 60 static void free_fs_root(struct btrfs_root *root); 61 61 static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, 62 62 int read_only); 63 - static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t, 64 - struct btrfs_root *root); 65 63 static void btrfs_destroy_ordered_extents(struct btrfs_root *root); 66 64 static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, 67 65 struct btrfs_root *root); ··· 3827 3829 btrfs_cleanup_transaction(root); 3828 3830 } 3829 3831 3830 - static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t, 3831 - struct btrfs_root *root) 3832 - { 3833 - struct btrfs_inode *btrfs_inode; 3834 - struct list_head splice; 3835 - 3836 - INIT_LIST_HEAD(&splice); 3837 - 3838 - mutex_lock(&root->fs_info->ordered_operations_mutex); 3839 - spin_lock(&root->fs_info->ordered_root_lock); 3840 - 3841 - list_splice_init(&t->ordered_operations, &splice); 3842 - while (!list_empty(&splice)) { 3843 - btrfs_inode = list_entry(splice.next, struct btrfs_inode, 3844 - ordered_operations); 3845 - 3846 - list_del_init(&btrfs_inode->ordered_operations); 3847 - spin_unlock(&root->fs_info->ordered_root_lock); 3848 - 3849 - btrfs_invalidate_inodes(btrfs_inode->root); 3850 - 3851 - spin_lock(&root->fs_info->ordered_root_lock); 3852 - } 3853 - 3854 - spin_unlock(&root->fs_info->ordered_root_lock); 3855 - mutex_unlock(&root->fs_info->ordered_operations_mutex); 3856 - } 3857 - 3858 3832 static void btrfs_destroy_ordered_extents(struct btrfs_root *root) 3859 3833 { 3860 3834 struct btrfs_ordered_extent *ordered; ··· 4063 4093 void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, 4064 4094 struct btrfs_root *root) 4065 4095 { 4066 - btrfs_destroy_ordered_operations(cur_trans, root); 4067 - 4068 4096 btrfs_destroy_delayed_refs(cur_trans, root); 4069 4097 4070 4098 cur_trans->state = TRANS_STATE_COMMIT_START;

+272 -13

fs/btrfs/extent-tree.c

··· 3057 3057 static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, 3058 3058 struct btrfs_root *root, 3059 3059 struct extent_buffer *buf, 3060 - int full_backref, int inc, int no_quota) 3060 + int full_backref, int inc) 3061 3061 { 3062 3062 u64 bytenr; 3063 3063 u64 num_bytes; ··· 3111 3111 key.offset -= btrfs_file_extent_offset(buf, fi); 3112 3112 ret = process_func(trans, root, bytenr, num_bytes, 3113 3113 parent, ref_root, key.objectid, 3114 - key.offset, no_quota); 3114 + key.offset, 1); 3115 3115 if (ret) 3116 3116 goto fail; 3117 3117 } else { ··· 3119 3119 num_bytes = btrfs_level_size(root, level - 1); 3120 3120 ret = process_func(trans, root, bytenr, num_bytes, 3121 3121 parent, ref_root, level - 1, 0, 3122 - no_quota); 3122 + 1); 3123 3123 if (ret) 3124 3124 goto fail; 3125 3125 } ··· 3130 3130 } 3131 3131 3132 3132 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3133 - struct extent_buffer *buf, int full_backref, int no_quota) 3133 + struct extent_buffer *buf, int full_backref) 3134 3134 { 3135 - return __btrfs_mod_ref(trans, root, buf, full_backref, 1, no_quota); 3135 + return __btrfs_mod_ref(trans, root, buf, full_backref, 1); 3136 3136 } 3137 3137 3138 3138 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3139 - struct extent_buffer *buf, int full_backref, int no_quota) 3139 + struct extent_buffer *buf, int full_backref) 3140 3140 { 3141 - return __btrfs_mod_ref(trans, root, buf, full_backref, 0, no_quota); 3141 + return __btrfs_mod_ref(trans, root, buf, full_backref, 0); 3142 3142 } 3143 3143 3144 3144 static int write_one_cache_group(struct btrfs_trans_handle *trans, ··· 7478 7478 wc->reada_slot = slot; 7479 7479 } 7480 7480 7481 + static int account_leaf_items(struct btrfs_trans_handle *trans, 7482 + struct btrfs_root *root, 7483 + struct extent_buffer *eb) 7484 + { 7485 + int nr = btrfs_header_nritems(eb); 7486 + int i, extent_type, ret; 7487 + struct btrfs_key key; 7488 + struct btrfs_file_extent_item *fi; 7489 + u64 bytenr, num_bytes; 7490 + 7491 + for (i = 0; i < nr; i++) { 7492 + btrfs_item_key_to_cpu(eb, &key, i); 7493 + 7494 + if (key.type != BTRFS_EXTENT_DATA_KEY) 7495 + continue; 7496 + 7497 + fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item); 7498 + /* filter out non qgroup-accountable extents */ 7499 + extent_type = btrfs_file_extent_type(eb, fi); 7500 + 7501 + if (extent_type == BTRFS_FILE_EXTENT_INLINE) 7502 + continue; 7503 + 7504 + bytenr = btrfs_file_extent_disk_bytenr(eb, fi); 7505 + if (!bytenr) 7506 + continue; 7507 + 7508 + num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); 7509 + 7510 + ret = btrfs_qgroup_record_ref(trans, root->fs_info, 7511 + root->objectid, 7512 + bytenr, num_bytes, 7513 + BTRFS_QGROUP_OPER_SUB_SUBTREE, 0); 7514 + if (ret) 7515 + return ret; 7516 + } 7517 + return 0; 7518 + } 7519 + 7520 + /* 7521 + * Walk up the tree from the bottom, freeing leaves and any interior 7522 + * nodes which have had all slots visited. If a node (leaf or 7523 + * interior) is freed, the node above it will have it's slot 7524 + * incremented. The root node will never be freed. 7525 + * 7526 + * At the end of this function, we should have a path which has all 7527 + * slots incremented to the next position for a search. If we need to 7528 + * read a new node it will be NULL and the node above it will have the 7529 + * correct slot selected for a later read. 7530 + * 7531 + * If we increment the root nodes slot counter past the number of 7532 + * elements, 1 is returned to signal completion of the search. 7533 + */ 7534 + static int adjust_slots_upwards(struct btrfs_root *root, 7535 + struct btrfs_path *path, int root_level) 7536 + { 7537 + int level = 0; 7538 + int nr, slot; 7539 + struct extent_buffer *eb; 7540 + 7541 + if (root_level == 0) 7542 + return 1; 7543 + 7544 + while (level <= root_level) { 7545 + eb = path->nodes[level]; 7546 + nr = btrfs_header_nritems(eb); 7547 + path->slots[level]++; 7548 + slot = path->slots[level]; 7549 + if (slot >= nr || level == 0) { 7550 + /* 7551 + * Don't free the root - we will detect this 7552 + * condition after our loop and return a 7553 + * positive value for caller to stop walking the tree. 7554 + */ 7555 + if (level != root_level) { 7556 + btrfs_tree_unlock_rw(eb, path->locks[level]); 7557 + path->locks[level] = 0; 7558 + 7559 + free_extent_buffer(eb); 7560 + path->nodes[level] = NULL; 7561 + path->slots[level] = 0; 7562 + } 7563 + } else { 7564 + /* 7565 + * We have a valid slot to walk back down 7566 + * from. Stop here so caller can process these 7567 + * new nodes. 7568 + */ 7569 + break; 7570 + } 7571 + 7572 + level++; 7573 + } 7574 + 7575 + eb = path->nodes[root_level]; 7576 + if (path->slots[root_level] >= btrfs_header_nritems(eb)) 7577 + return 1; 7578 + 7579 + return 0; 7580 + } 7581 + 7582 + /* 7583 + * root_eb is the subtree root and is locked before this function is called. 7584 + */ 7585 + static int account_shared_subtree(struct btrfs_trans_handle *trans, 7586 + struct btrfs_root *root, 7587 + struct extent_buffer *root_eb, 7588 + u64 root_gen, 7589 + int root_level) 7590 + { 7591 + int ret = 0; 7592 + int level; 7593 + struct extent_buffer *eb = root_eb; 7594 + struct btrfs_path *path = NULL; 7595 + 7596 + BUG_ON(root_level < 0 || root_level > BTRFS_MAX_LEVEL); 7597 + BUG_ON(root_eb == NULL); 7598 + 7599 + if (!root->fs_info->quota_enabled) 7600 + return 0; 7601 + 7602 + if (!extent_buffer_uptodate(root_eb)) { 7603 + ret = btrfs_read_buffer(root_eb, root_gen); 7604 + if (ret) 7605 + goto out; 7606 + } 7607 + 7608 + if (root_level == 0) { 7609 + ret = account_leaf_items(trans, root, root_eb); 7610 + goto out; 7611 + } 7612 + 7613 + path = btrfs_alloc_path(); 7614 + if (!path) 7615 + return -ENOMEM; 7616 + 7617 + /* 7618 + * Walk down the tree. Missing extent blocks are filled in as 7619 + * we go. Metadata is accounted every time we read a new 7620 + * extent block. 7621 + * 7622 + * When we reach a leaf, we account for file extent items in it, 7623 + * walk back up the tree (adjusting slot pointers as we go) 7624 + * and restart the search process. 7625 + */ 7626 + extent_buffer_get(root_eb); /* For path */ 7627 + path->nodes[root_level] = root_eb; 7628 + path->slots[root_level] = 0; 7629 + path->locks[root_level] = 0; /* so release_path doesn't try to unlock */ 7630 + walk_down: 7631 + level = root_level; 7632 + while (level >= 0) { 7633 + if (path->nodes[level] == NULL) { 7634 + int child_bsize = root->nodesize; 7635 + int parent_slot; 7636 + u64 child_gen; 7637 + u64 child_bytenr; 7638 + 7639 + /* We need to get child blockptr/gen from 7640 + * parent before we can read it. */ 7641 + eb = path->nodes[level + 1]; 7642 + parent_slot = path->slots[level + 1]; 7643 + child_bytenr = btrfs_node_blockptr(eb, parent_slot); 7644 + child_gen = btrfs_node_ptr_generation(eb, parent_slot); 7645 + 7646 + eb = read_tree_block(root, child_bytenr, child_bsize, 7647 + child_gen); 7648 + if (!eb || !extent_buffer_uptodate(eb)) { 7649 + ret = -EIO; 7650 + goto out; 7651 + } 7652 + 7653 + path->nodes[level] = eb; 7654 + path->slots[level] = 0; 7655 + 7656 + btrfs_tree_read_lock(eb); 7657 + btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); 7658 + path->locks[level] = BTRFS_READ_LOCK_BLOCKING; 7659 + 7660 + ret = btrfs_qgroup_record_ref(trans, root->fs_info, 7661 + root->objectid, 7662 + child_bytenr, 7663 + child_bsize, 7664 + BTRFS_QGROUP_OPER_SUB_SUBTREE, 7665 + 0); 7666 + if (ret) 7667 + goto out; 7668 + 7669 + } 7670 + 7671 + if (level == 0) { 7672 + ret = account_leaf_items(trans, root, path->nodes[level]); 7673 + if (ret) 7674 + goto out; 7675 + 7676 + /* Nonzero return here means we completed our search */ 7677 + ret = adjust_slots_upwards(root, path, root_level); 7678 + if (ret) 7679 + break; 7680 + 7681 + /* Restart search with new slots */ 7682 + goto walk_down; 7683 + } 7684 + 7685 + level--; 7686 + } 7687 + 7688 + ret = 0; 7689 + out: 7690 + btrfs_free_path(path); 7691 + 7692 + return ret; 7693 + } 7694 + 7481 7695 /* 7482 7696 * helper to process tree block while walking down the tree. 7483 7697 * ··· 7746 7532 /* wc->stage == UPDATE_BACKREF */ 7747 7533 if (!(wc->flags[level] & flag)) { 7748 7534 BUG_ON(!path->locks[level]); 7749 - ret = btrfs_inc_ref(trans, root, eb, 1, wc->for_reloc); 7535 + ret = btrfs_inc_ref(trans, root, eb, 1); 7750 7536 BUG_ON(ret); /* -ENOMEM */ 7751 - ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc); 7537 + ret = btrfs_dec_ref(trans, root, eb, 0); 7752 7538 BUG_ON(ret); /* -ENOMEM */ 7753 7539 ret = btrfs_set_disk_extent_flags(trans, root, eb->start, 7754 7540 eb->len, flag, ··· 7795 7581 int level = wc->level; 7796 7582 int reada = 0; 7797 7583 int ret = 0; 7584 + bool need_account = false; 7798 7585 7799 7586 generation = btrfs_node_ptr_generation(path->nodes[level], 7800 7587 path->slots[level]); ··· 7841 7626 7842 7627 if (wc->stage == DROP_REFERENCE) { 7843 7628 if (wc->refs[level - 1] > 1) { 7629 + need_account = true; 7844 7630 if (level == 1 && 7845 7631 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 7846 7632 goto skip; ··· 7905 7689 parent = 0; 7906 7690 } 7907 7691 7692 + if (need_account) { 7693 + ret = account_shared_subtree(trans, root, next, 7694 + generation, level - 1); 7695 + if (ret) { 7696 + printk_ratelimited(KERN_ERR "BTRFS: %s Error " 7697 + "%d accounting shared subtree. Quota " 7698 + "is out of sync, rescan required.\n", 7699 + root->fs_info->sb->s_id, ret); 7700 + } 7701 + } 7908 7702 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, 7909 7703 root->root_key.objectid, level - 1, 0, 0); 7910 7704 BUG_ON(ret); /* -ENOMEM */ ··· 7995 7769 if (wc->refs[level] == 1) { 7996 7770 if (level == 0) { 7997 7771 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 7998 - ret = btrfs_dec_ref(trans, root, eb, 1, 7999 - wc->for_reloc); 7772 + ret = btrfs_dec_ref(trans, root, eb, 1); 8000 7773 else 8001 - ret = btrfs_dec_ref(trans, root, eb, 0, 8002 - wc->for_reloc); 7774 + ret = btrfs_dec_ref(trans, root, eb, 0); 8003 7775 BUG_ON(ret); /* -ENOMEM */ 7776 + ret = account_leaf_items(trans, root, eb); 7777 + if (ret) { 7778 + printk_ratelimited(KERN_ERR "BTRFS: %s Error " 7779 + "%d accounting leaf items. Quota " 7780 + "is out of sync, rescan required.\n", 7781 + root->fs_info->sb->s_id, ret); 7782 + } 8004 7783 } 8005 7784 /* make block locked assertion in clean_tree_block happy */ 8006 7785 if (!path->locks[level] && ··· 8130 7899 int ret; 8131 7900 int level; 8132 7901 bool root_dropped = false; 7902 + 7903 + btrfs_debug(root->fs_info, "Drop subvolume %llu", root->objectid); 8133 7904 8134 7905 path = btrfs_alloc_path(); 8135 7906 if (!path) { ··· 8258 8025 goto out_end_trans; 8259 8026 } 8260 8027 8028 + /* 8029 + * Qgroup update accounting is run from 8030 + * delayed ref handling. This usually works 8031 + * out because delayed refs are normally the 8032 + * only way qgroup updates are added. However, 8033 + * we may have added updates during our tree 8034 + * walk so run qgroups here to make sure we 8035 + * don't lose any updates. 8036 + */ 8037 + ret = btrfs_delayed_qgroup_accounting(trans, 8038 + root->fs_info); 8039 + if (ret) 8040 + printk_ratelimited(KERN_ERR "BTRFS: Failure %d " 8041 + "running qgroup updates " 8042 + "during snapshot delete. " 8043 + "Quota is out of sync, " 8044 + "rescan required.\n", ret); 8045 + 8261 8046 btrfs_end_transaction_throttle(trans, tree_root); 8262 8047 if (!for_reloc && btrfs_need_cleaner_sleep(root)) { 8263 8048 pr_debug("BTRFS: drop snapshot early exit\n"); ··· 8329 8078 } 8330 8079 root_dropped = true; 8331 8080 out_end_trans: 8081 + ret = btrfs_delayed_qgroup_accounting(trans, tree_root->fs_info); 8082 + if (ret) 8083 + printk_ratelimited(KERN_ERR "BTRFS: Failure %d " 8084 + "running qgroup updates " 8085 + "during snapshot delete. " 8086 + "Quota is out of sync, " 8087 + "rescan required.\n", ret); 8088 + 8332 8089 btrfs_end_transaction_throttle(trans, tree_root); 8333 8090 out_free: 8334 8091 kfree(wc);

+1 -1

fs/btrfs/file-item.c

··· 756 756 found_next = 1; 757 757 if (ret != 0) 758 758 goto insert; 759 - slot = 0; 759 + slot = path->slots[0]; 760 760 } 761 761 btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot); 762 762 if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||

+1 -25

fs/btrfs/file.c

··· 1838 1838 1839 1839 int btrfs_release_file(struct inode *inode, struct file *filp) 1840 1840 { 1841 - /* 1842 - * ordered_data_close is set by settattr when we are about to truncate 1843 - * a file from a non-zero size to a zero size. This tries to 1844 - * flush down new bytes that may have been written if the 1845 - * application were using truncate to replace a file in place. 1846 - */ 1847 - if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, 1848 - &BTRFS_I(inode)->runtime_flags)) { 1849 - struct btrfs_trans_handle *trans; 1850 - struct btrfs_root *root = BTRFS_I(inode)->root; 1851 - 1852 - /* 1853 - * We need to block on a committing transaction to keep us from 1854 - * throwing a ordered operation on to the list and causing 1855 - * something like sync to deadlock trying to flush out this 1856 - * inode. 1857 - */ 1858 - trans = btrfs_start_transaction(root, 0); 1859 - if (IS_ERR(trans)) 1860 - return PTR_ERR(trans); 1861 - btrfs_add_ordered_operation(trans, BTRFS_I(inode)->root, inode); 1862 - btrfs_end_transaction(trans, root); 1863 - if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) 1864 - filemap_flush(inode->i_mapping); 1865 - } 1866 1841 if (filp->private_data) 1867 1842 btrfs_ioctl_trans_end(filp); 1843 + filemap_flush(inode->i_mapping); 1868 1844 return 0; 1869 1845 } 1870 1846

+15 -44

fs/btrfs/inode.c

··· 709 709 unlock_extent(io_tree, async_extent->start, 710 710 async_extent->start + 711 711 async_extent->ram_size - 1); 712 + 713 + /* 714 + * we need to redirty the pages if we decide to 715 + * fallback to uncompressed IO, otherwise we 716 + * will not submit these pages down to lower 717 + * layers. 718 + */ 719 + extent_range_redirty_for_io(inode, 720 + async_extent->start, 721 + async_extent->start + 722 + async_extent->ram_size - 1); 723 + 712 724 goto retry; 713 725 } 714 726 goto out_free; ··· 7951 7939 BUG_ON(ret); 7952 7940 7953 7941 /* 7954 - * setattr is responsible for setting the ordered_data_close flag, 7955 - * but that is only tested during the last file release. That 7956 - * could happen well after the next commit, leaving a great big 7957 - * window where new writes may get lost if someone chooses to write 7958 - * to this file after truncating to zero 7959 - * 7960 - * The inode doesn't have any dirty data here, and so if we commit 7961 - * this is a noop. If someone immediately starts writing to the inode 7962 - * it is very likely we'll catch some of their writes in this 7963 - * transaction, and the commit will find this file on the ordered 7964 - * data list with good things to send down. 7965 - * 7966 - * This is a best effort solution, there is still a window where 7967 - * using truncate to replace the contents of the file will 7968 - * end up with a zero length file after a crash. 7969 - */ 7970 - if (inode->i_size == 0 && test_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, 7971 - &BTRFS_I(inode)->runtime_flags)) 7972 - btrfs_add_ordered_operation(trans, root, inode); 7973 - 7974 - /* 7975 7942 * So if we truncate and then write and fsync we normally would just 7976 7943 * write the extents that changed, which is a problem if we need to 7977 7944 * first truncate that entire inode. So set this flag so we write out ··· 8097 8106 mutex_init(&ei->delalloc_mutex); 8098 8107 btrfs_ordered_inode_tree_init(&ei->ordered_tree); 8099 8108 INIT_LIST_HEAD(&ei->delalloc_inodes); 8100 - INIT_LIST_HEAD(&ei->ordered_operations); 8101 8109 RB_CLEAR_NODE(&ei->rb_node); 8102 8110 8103 8111 return inode; ··· 8135 8145 */ 8136 8146 if (!root) 8137 8147 goto free; 8138 - 8139 - /* 8140 - * Make sure we're properly removed from the ordered operation 8141 - * lists. 8142 - */ 8143 - smp_mb(); 8144 - if (!list_empty(&BTRFS_I(inode)->ordered_operations)) { 8145 - spin_lock(&root->fs_info->ordered_root_lock); 8146 - list_del_init(&BTRFS_I(inode)->ordered_operations); 8147 - spin_unlock(&root->fs_info->ordered_root_lock); 8148 - } 8149 8148 8150 8149 if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, 8151 8150 &BTRFS_I(inode)->runtime_flags)) { ··· 8317 8338 ret = 0; 8318 8339 8319 8340 /* 8320 - * we're using rename to replace one file with another. 8321 - * and the replacement file is large. Start IO on it now so 8322 - * we don't add too much work to the end of the transaction 8341 + * we're using rename to replace one file with another. Start IO on it 8342 + * now so we don't add too much work to the end of the transaction 8323 8343 */ 8324 - if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size && 8325 - old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) 8344 + if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size) 8326 8345 filemap_flush(old_inode->i_mapping); 8327 8346 8328 8347 /* close the racy window with snapshot create/destroy ioctl */ ··· 8368 8391 */ 8369 8392 btrfs_pin_log_trans(root); 8370 8393 } 8371 - /* 8372 - * make sure the inode gets flushed if it is replacing 8373 - * something. 8374 - */ 8375 - if (new_inode && new_inode->i_size && S_ISREG(old_inode->i_mode)) 8376 - btrfs_add_ordered_operation(trans, root, old_inode); 8377 8394 8378 8395 inode_inc_iversion(old_dir); 8379 8396 inode_inc_iversion(new_dir);

-123

fs/btrfs/ordered-data.c

··· 571 571 572 572 trace_btrfs_ordered_extent_remove(inode, entry); 573 573 574 - /* 575 - * we have no more ordered extents for this inode and 576 - * no dirty pages. We can safely remove it from the 577 - * list of ordered extents 578 - */ 579 - if (RB_EMPTY_ROOT(&tree->tree) && 580 - !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) { 581 - spin_lock(&root->fs_info->ordered_root_lock); 582 - list_del_init(&BTRFS_I(inode)->ordered_operations); 583 - spin_unlock(&root->fs_info->ordered_root_lock); 584 - } 585 - 586 574 if (!root->nr_ordered_extents) { 587 575 spin_lock(&root->fs_info->ordered_root_lock); 588 576 BUG_ON(list_empty(&root->ordered_root)); ··· 672 684 list_splice_tail(&splice, &fs_info->ordered_roots); 673 685 spin_unlock(&fs_info->ordered_root_lock); 674 686 mutex_unlock(&fs_info->ordered_operations_mutex); 675 - } 676 - 677 - /* 678 - * this is used during transaction commit to write all the inodes 679 - * added to the ordered operation list. These files must be fully on 680 - * disk before the transaction commits. 681 - * 682 - * we have two modes here, one is to just start the IO via filemap_flush 683 - * and the other is to wait for all the io. When we wait, we have an 684 - * extra check to make sure the ordered operation list really is empty 685 - * before we return 686 - */ 687 - int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans, 688 - struct btrfs_root *root, int wait) 689 - { 690 - struct btrfs_inode *btrfs_inode; 691 - struct inode *inode; 692 - struct btrfs_transaction *cur_trans = trans->transaction; 693 - struct list_head splice; 694 - struct list_head works; 695 - struct btrfs_delalloc_work *work, *next; 696 - int ret = 0; 697 - 698 - INIT_LIST_HEAD(&splice); 699 - INIT_LIST_HEAD(&works); 700 - 701 - mutex_lock(&root->fs_info->ordered_extent_flush_mutex); 702 - spin_lock(&root->fs_info->ordered_root_lock); 703 - list_splice_init(&cur_trans->ordered_operations, &splice); 704 - while (!list_empty(&splice)) { 705 - btrfs_inode = list_entry(splice.next, struct btrfs_inode, 706 - ordered_operations); 707 - inode = &btrfs_inode->vfs_inode; 708 - 709 - list_del_init(&btrfs_inode->ordered_operations); 710 - 711 - /* 712 - * the inode may be getting freed (in sys_unlink path). 713 - */ 714 - inode = igrab(inode); 715 - if (!inode) 716 - continue; 717 - 718 - if (!wait) 719 - list_add_tail(&BTRFS_I(inode)->ordered_operations, 720 - &cur_trans->ordered_operations); 721 - spin_unlock(&root->fs_info->ordered_root_lock); 722 - 723 - work = btrfs_alloc_delalloc_work(inode, wait, 1); 724 - if (!work) { 725 - spin_lock(&root->fs_info->ordered_root_lock); 726 - if (list_empty(&BTRFS_I(inode)->ordered_operations)) 727 - list_add_tail(&btrfs_inode->ordered_operations, 728 - &splice); 729 - list_splice_tail(&splice, 730 - &cur_trans->ordered_operations); 731 - spin_unlock(&root->fs_info->ordered_root_lock); 732 - ret = -ENOMEM; 733 - goto out; 734 - } 735 - list_add_tail(&work->list, &works); 736 - btrfs_queue_work(root->fs_info->flush_workers, 737 - &work->work); 738 - 739 - cond_resched(); 740 - spin_lock(&root->fs_info->ordered_root_lock); 741 - } 742 - spin_unlock(&root->fs_info->ordered_root_lock); 743 - out: 744 - list_for_each_entry_safe(work, next, &works, list) { 745 - list_del_init(&work->list); 746 - btrfs_wait_and_free_delalloc_work(work); 747 - } 748 - mutex_unlock(&root->fs_info->ordered_extent_flush_mutex); 749 - return ret; 750 687 } 751 688 752 689 /* ··· 1031 1118 spin_unlock_irq(&tree->lock); 1032 1119 btrfs_put_ordered_extent(ordered); 1033 1120 return index; 1034 - } 1035 - 1036 - 1037 - /* 1038 - * add a given inode to the list of inodes that must be fully on 1039 - * disk before a transaction commit finishes. 1040 - * 1041 - * This basically gives us the ext3 style data=ordered mode, and it is mostly 1042 - * used to make sure renamed files are fully on disk. 1043 - * 1044 - * It is a noop if the inode is already fully on disk. 1045 - * 1046 - * If trans is not null, we'll do a friendly check for a transaction that 1047 - * is already flushing things and force the IO down ourselves. 1048 - */ 1049 - void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, 1050 - struct btrfs_root *root, struct inode *inode) 1051 - { 1052 - struct btrfs_transaction *cur_trans = trans->transaction; 1053 - u64 last_mod; 1054 - 1055 - last_mod = max(BTRFS_I(inode)->generation, BTRFS_I(inode)->last_trans); 1056 - 1057 - /* 1058 - * if this file hasn't been changed since the last transaction 1059 - * commit, we can safely return without doing anything 1060 - */ 1061 - if (last_mod <= root->fs_info->last_trans_committed) 1062 - return; 1063 - 1064 - spin_lock(&root->fs_info->ordered_root_lock); 1065 - if (list_empty(&BTRFS_I(inode)->ordered_operations)) { 1066 - list_add_tail(&BTRFS_I(inode)->ordered_operations, 1067 - &cur_trans->ordered_operations); 1068 - } 1069 - spin_unlock(&root->fs_info->ordered_root_lock); 1070 1121 } 1071 1122 1072 1123 int __init ordered_data_init(void)

-5

fs/btrfs/ordered-data.h

··· 190 190 struct btrfs_ordered_extent *ordered); 191 191 int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, 192 192 u32 *sum, int len); 193 - int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans, 194 - struct btrfs_root *root, int wait); 195 - void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, 196 - struct btrfs_root *root, 197 - struct inode *inode); 198 193 int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr); 199 194 void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr); 200 195 void btrfs_get_logged_extents(struct inode *inode,

+169

fs/btrfs/qgroup.c

··· 1201 1201 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1202 1202 return ret; 1203 1203 } 1204 + 1205 + static int comp_oper_exist(struct btrfs_qgroup_operation *oper1, 1206 + struct btrfs_qgroup_operation *oper2) 1207 + { 1208 + /* 1209 + * Ignore seq and type here, we're looking for any operation 1210 + * at all related to this extent on that root. 1211 + */ 1212 + if (oper1->bytenr < oper2->bytenr) 1213 + return -1; 1214 + if (oper1->bytenr > oper2->bytenr) 1215 + return 1; 1216 + if (oper1->ref_root < oper2->ref_root) 1217 + return -1; 1218 + if (oper1->ref_root > oper2->ref_root) 1219 + return 1; 1220 + return 0; 1221 + } 1222 + 1223 + static int qgroup_oper_exists(struct btrfs_fs_info *fs_info, 1224 + struct btrfs_qgroup_operation *oper) 1225 + { 1226 + struct rb_node *n; 1227 + struct btrfs_qgroup_operation *cur; 1228 + int cmp; 1229 + 1230 + spin_lock(&fs_info->qgroup_op_lock); 1231 + n = fs_info->qgroup_op_tree.rb_node; 1232 + while (n) { 1233 + cur = rb_entry(n, struct btrfs_qgroup_operation, n); 1234 + cmp = comp_oper_exist(cur, oper); 1235 + if (cmp < 0) { 1236 + n = n->rb_right; 1237 + } else if (cmp) { 1238 + n = n->rb_left; 1239 + } else { 1240 + spin_unlock(&fs_info->qgroup_op_lock); 1241 + return -EEXIST; 1242 + } 1243 + } 1244 + spin_unlock(&fs_info->qgroup_op_lock); 1245 + return 0; 1246 + } 1247 + 1204 1248 static int comp_oper(struct btrfs_qgroup_operation *oper1, 1205 1249 struct btrfs_qgroup_operation *oper2) 1206 1250 { ··· 1334 1290 oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq); 1335 1291 INIT_LIST_HEAD(&oper->elem.list); 1336 1292 oper->elem.seq = 0; 1293 + 1294 + if (type == BTRFS_QGROUP_OPER_SUB_SUBTREE) { 1295 + /* 1296 + * If any operation for this bytenr/ref_root combo 1297 + * exists, then we know it's not exclusively owned and 1298 + * shouldn't be queued up. 1299 + * 1300 + * This also catches the case where we have a cloned 1301 + * extent that gets queued up multiple times during 1302 + * drop snapshot. 1303 + */ 1304 + if (qgroup_oper_exists(fs_info, oper)) { 1305 + kfree(oper); 1306 + return 0; 1307 + } 1308 + } 1309 + 1337 1310 ret = insert_qgroup_oper(fs_info, oper); 1338 1311 if (ret) { 1339 1312 /* Shouldn't happen so have an assert for developers */ ··· 1945 1884 } 1946 1885 1947 1886 /* 1887 + * Process a reference to a shared subtree. This type of operation is 1888 + * queued during snapshot removal when we encounter extents which are 1889 + * shared between more than one root. 1890 + */ 1891 + static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans, 1892 + struct btrfs_fs_info *fs_info, 1893 + struct btrfs_qgroup_operation *oper) 1894 + { 1895 + struct ulist *roots = NULL; 1896 + struct ulist_node *unode; 1897 + struct ulist_iterator uiter; 1898 + struct btrfs_qgroup_list *glist; 1899 + struct ulist *parents; 1900 + int ret = 0; 1901 + int err; 1902 + struct btrfs_qgroup *qg; 1903 + u64 root_obj = 0; 1904 + struct seq_list elem = {}; 1905 + 1906 + parents = ulist_alloc(GFP_NOFS); 1907 + if (!parents) 1908 + return -ENOMEM; 1909 + 1910 + btrfs_get_tree_mod_seq(fs_info, &elem); 1911 + ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, 1912 + elem.seq, &roots); 1913 + btrfs_put_tree_mod_seq(fs_info, &elem); 1914 + if (ret < 0) 1915 + return ret; 1916 + 1917 + if (roots->nnodes != 1) 1918 + goto out; 1919 + 1920 + ULIST_ITER_INIT(&uiter); 1921 + unode = ulist_next(roots, &uiter); /* Only want 1 so no need to loop */ 1922 + /* 1923 + * If we find our ref root then that means all refs 1924 + * this extent has to the root have not yet been 1925 + * deleted. In that case, we do nothing and let the 1926 + * last ref for this bytenr drive our update. 1927 + * 1928 + * This can happen for example if an extent is 1929 + * referenced multiple times in a snapshot (clone, 1930 + * etc). If we are in the middle of snapshot removal, 1931 + * queued updates for such an extent will find the 1932 + * root if we have not yet finished removing the 1933 + * snapshot. 1934 + */ 1935 + if (unode->val == oper->ref_root) 1936 + goto out; 1937 + 1938 + root_obj = unode->val; 1939 + BUG_ON(!root_obj); 1940 + 1941 + spin_lock(&fs_info->qgroup_lock); 1942 + qg = find_qgroup_rb(fs_info, root_obj); 1943 + if (!qg) 1944 + goto out_unlock; 1945 + 1946 + qg->excl += oper->num_bytes; 1947 + qg->excl_cmpr += oper->num_bytes; 1948 + qgroup_dirty(fs_info, qg); 1949 + 1950 + /* 1951 + * Adjust counts for parent groups. First we find all 1952 + * parents, then in the 2nd loop we do the adjustment 1953 + * while adding parents of the parents to our ulist. 1954 + */ 1955 + list_for_each_entry(glist, &qg->groups, next_group) { 1956 + err = ulist_add(parents, glist->group->qgroupid, 1957 + ptr_to_u64(glist->group), GFP_ATOMIC); 1958 + if (err < 0) { 1959 + ret = err; 1960 + goto out_unlock; 1961 + } 1962 + } 1963 + 1964 + ULIST_ITER_INIT(&uiter); 1965 + while ((unode = ulist_next(parents, &uiter))) { 1966 + qg = u64_to_ptr(unode->aux); 1967 + qg->excl += oper->num_bytes; 1968 + qg->excl_cmpr += oper->num_bytes; 1969 + qgroup_dirty(fs_info, qg); 1970 + 1971 + /* Add any parents of the parents */ 1972 + list_for_each_entry(glist, &qg->groups, next_group) { 1973 + err = ulist_add(parents, glist->group->qgroupid, 1974 + ptr_to_u64(glist->group), GFP_ATOMIC); 1975 + if (err < 0) { 1976 + ret = err; 1977 + goto out_unlock; 1978 + } 1979 + } 1980 + } 1981 + 1982 + out_unlock: 1983 + spin_unlock(&fs_info->qgroup_lock); 1984 + 1985 + out: 1986 + ulist_free(roots); 1987 + ulist_free(parents); 1988 + return ret; 1989 + } 1990 + 1991 + /* 1948 1992 * btrfs_qgroup_account_ref is called for every ref that is added to or deleted 1949 1993 * from the fs. First, all roots referencing the extent are searched, and 1950 1994 * then the space is accounted accordingly to the different roots. The ··· 2085 1919 case BTRFS_QGROUP_OPER_ADD_SHARED: 2086 1920 case BTRFS_QGROUP_OPER_SUB_SHARED: 2087 1921 ret = qgroup_shared_accounting(trans, fs_info, oper); 1922 + break; 1923 + case BTRFS_QGROUP_OPER_SUB_SUBTREE: 1924 + ret = qgroup_subtree_accounting(trans, fs_info, oper); 2088 1925 break; 2089 1926 default: 2090 1927 ASSERT(0);

+1

fs/btrfs/qgroup.h

··· 44 44 BTRFS_QGROUP_OPER_ADD_SHARED, 45 45 BTRFS_QGROUP_OPER_SUB_EXCL, 46 46 BTRFS_QGROUP_OPER_SUB_SHARED, 47 + BTRFS_QGROUP_OPER_SUB_SUBTREE, 47 48 }; 48 49 49 50 struct btrfs_qgroup_operation {

+45 -6

fs/btrfs/super.c

··· 1665 1665 return 0; 1666 1666 } 1667 1667 1668 + /* 1669 + * Calculate numbers for 'df', pessimistic in case of mixed raid profiles. 1670 + * 1671 + * If there's a redundant raid level at DATA block groups, use the respective 1672 + * multiplier to scale the sizes. 1673 + * 1674 + * Unused device space usage is based on simulating the chunk allocator 1675 + * algorithm that respects the device sizes, order of allocations and the 1676 + * 'alloc_start' value, this is a close approximation of the actual use but 1677 + * there are other factors that may change the result (like a new metadata 1678 + * chunk). 1679 + * 1680 + * FIXME: not accurate for mixed block groups, total and free/used are ok, 1681 + * available appears slightly larger. 1682 + */ 1668 1683 static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) 1669 1684 { 1670 1685 struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb); ··· 1690 1675 u64 total_free_data = 0; 1691 1676 int bits = dentry->d_sb->s_blocksize_bits; 1692 1677 __be32 *fsid = (__be32 *)fs_info->fsid; 1678 + unsigned factor = 1; 1679 + struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; 1693 1680 int ret; 1694 1681 1695 1682 /* holding chunk_muext to avoid allocating new chunks */ ··· 1699 1682 rcu_read_lock(); 1700 1683 list_for_each_entry_rcu(found, head, list) { 1701 1684 if (found->flags & BTRFS_BLOCK_GROUP_DATA) { 1685 + int i; 1686 + 1702 1687 total_free_data += found->disk_total - found->disk_used; 1703 1688 total_free_data -= 1704 1689 btrfs_account_ro_block_groups_free_space(found); 1690 + 1691 + for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) { 1692 + if (!list_empty(&found->block_groups[i])) { 1693 + switch (i) { 1694 + case BTRFS_RAID_DUP: 1695 + case BTRFS_RAID_RAID1: 1696 + case BTRFS_RAID_RAID10: 1697 + factor = 2; 1698 + } 1699 + } 1700 + } 1705 1701 } 1706 1702 1707 1703 total_used += found->disk_used; 1708 1704 } 1705 + 1709 1706 rcu_read_unlock(); 1710 1707 1711 - buf->f_namelen = BTRFS_NAME_LEN; 1712 - buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; 1713 - buf->f_bfree = buf->f_blocks - (total_used >> bits); 1714 - buf->f_bsize = dentry->d_sb->s_blocksize; 1715 - buf->f_type = BTRFS_SUPER_MAGIC; 1708 + buf->f_blocks = div_u64(btrfs_super_total_bytes(disk_super), factor); 1709 + buf->f_blocks >>= bits; 1710 + buf->f_bfree = buf->f_blocks - (div_u64(total_used, factor) >> bits); 1711 + 1712 + /* Account global block reserve as used, it's in logical size already */ 1713 + spin_lock(&block_rsv->lock); 1714 + buf->f_bfree -= block_rsv->size >> bits; 1715 + spin_unlock(&block_rsv->lock); 1716 + 1716 1717 buf->f_bavail = total_free_data; 1717 1718 ret = btrfs_calc_avail_data_space(fs_info->tree_root, &total_free_data); 1718 1719 if (ret) { 1719 1720 mutex_unlock(&fs_info->chunk_mutex); 1720 1721 return ret; 1721 1722 } 1722 - buf->f_bavail += total_free_data; 1723 + buf->f_bavail += div_u64(total_free_data, factor); 1723 1724 buf->f_bavail = buf->f_bavail >> bits; 1724 1725 mutex_unlock(&fs_info->chunk_mutex); 1726 + 1727 + buf->f_type = BTRFS_SUPER_MAGIC; 1728 + buf->f_bsize = dentry->d_sb->s_blocksize; 1729 + buf->f_namelen = BTRFS_NAME_LEN; 1725 1730 1726 1731 /* We treat it as constant endianness (it doesn't matter _which_) 1727 1732 because we want the fsid to come out the same whether mounted

+2 -31

fs/btrfs/transaction.c

··· 218 218 spin_lock_init(&cur_trans->delayed_refs.lock); 219 219 220 220 INIT_LIST_HEAD(&cur_trans->pending_snapshots); 221 - INIT_LIST_HEAD(&cur_trans->ordered_operations); 222 221 INIT_LIST_HEAD(&cur_trans->pending_chunks); 223 222 INIT_LIST_HEAD(&cur_trans->switch_commits); 224 223 list_add_tail(&cur_trans->list, &fs_info->trans_list); ··· 1611 1612 kmem_cache_free(btrfs_trans_handle_cachep, trans); 1612 1613 } 1613 1614 1614 - static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans, 1615 - struct btrfs_root *root) 1616 - { 1617 - int ret; 1618 - 1619 - ret = btrfs_run_delayed_items(trans, root); 1620 - if (ret) 1621 - return ret; 1622 - 1623 - /* 1624 - * rename don't use btrfs_join_transaction, so, once we 1625 - * set the transaction to blocked above, we aren't going 1626 - * to get any new ordered operations. We can safely run 1627 - * it here and no for sure that nothing new will be added 1628 - * to the list 1629 - */ 1630 - ret = btrfs_run_ordered_operations(trans, root, 1); 1631 - 1632 - return ret; 1633 - } 1634 - 1635 1615 static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) 1636 1616 { 1637 1617 if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT)) ··· 1630 1652 struct btrfs_transaction *cur_trans = trans->transaction; 1631 1653 struct btrfs_transaction *prev_trans = NULL; 1632 1654 int ret; 1633 - 1634 - ret = btrfs_run_ordered_operations(trans, root, 0); 1635 - if (ret) { 1636 - btrfs_abort_transaction(trans, root, ret); 1637 - btrfs_end_transaction(trans, root); 1638 - return ret; 1639 - } 1640 1655 1641 1656 /* Stop the commit early if ->aborted is set */ 1642 1657 if (unlikely(ACCESS_ONCE(cur_trans->aborted))) { ··· 1711 1740 if (ret) 1712 1741 goto cleanup_transaction; 1713 1742 1714 - ret = btrfs_flush_all_pending_stuffs(trans, root); 1743 + ret = btrfs_run_delayed_items(trans, root); 1715 1744 if (ret) 1716 1745 goto cleanup_transaction; 1717 1746 ··· 1719 1748 extwriter_counter_read(cur_trans) == 0); 1720 1749 1721 1750 /* some pending stuffs might be added after the previous flush. */ 1722 - ret = btrfs_flush_all_pending_stuffs(trans, root); 1751 + ret = btrfs_run_delayed_items(trans, root); 1723 1752 if (ret) 1724 1753 goto cleanup_transaction; 1725 1754

-1

fs/btrfs/transaction.h

··· 55 55 wait_queue_head_t writer_wait; 56 56 wait_queue_head_t commit_wait; 57 57 struct list_head pending_snapshots; 58 - struct list_head ordered_operations; 59 58 struct list_head pending_chunks; 60 59 struct list_head switch_commits; 61 60 struct btrfs_delayed_ref_root delayed_refs;

+15

fs/btrfs/ulist.h

··· 57 57 int ulist_add(struct ulist *ulist, u64 val, u64 aux, gfp_t gfp_mask); 58 58 int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux, 59 59 u64 *old_aux, gfp_t gfp_mask); 60 + 61 + /* just like ulist_add_merge() but take a pointer for the aux data */ 62 + static inline int ulist_add_merge_ptr(struct ulist *ulist, u64 val, void *aux, 63 + void **old_aux, gfp_t gfp_mask) 64 + { 65 + #if BITS_PER_LONG == 32 66 + u64 old64 = (uintptr_t)*old_aux; 67 + int ret = ulist_add_merge(ulist, val, (uintptr_t)aux, &old64, gfp_mask); 68 + *old_aux = (void *)((uintptr_t)old64); 69 + return ret; 70 + #else 71 + return ulist_add_merge(ulist, val, (u64)aux, (u64 *)old_aux, gfp_mask); 72 + #endif 73 + } 74 + 60 75 struct ulist_node *ulist_next(struct ulist *ulist, 61 76 struct ulist_iterator *uiter); 62 77

Configure Feed

Configure Feed