Merge branch 'for-4.14' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

+5 -1

fs/btrfs/acl.c

··· 114 114 int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) 115 115 { 116 116 int ret; 117 + umode_t old_mode = inode->i_mode; 117 118 118 119 if (type == ACL_TYPE_ACCESS && acl) { 119 120 ret = posix_acl_update_mode(inode, &inode->i_mode, &acl); 120 121 if (ret) 121 122 return ret; 122 123 } 123 - return __btrfs_set_acl(NULL, inode, acl, type); 124 + ret = __btrfs_set_acl(NULL, inode, acl, type); 125 + if (ret) 126 + inode->i_mode = old_mode; 127 + return ret; 124 128 } 125 129 126 130 /*

+3 -3

fs/btrfs/async-thread.c

··· 75 75 } 76 76 77 77 struct btrfs_fs_info * 78 - btrfs_workqueue_owner(struct __btrfs_workqueue *wq) 78 + btrfs_workqueue_owner(const struct __btrfs_workqueue *wq) 79 79 { 80 80 return wq->fs_info; 81 81 } 82 82 83 83 struct btrfs_fs_info * 84 - btrfs_work_owner(struct btrfs_work *work) 84 + btrfs_work_owner(const struct btrfs_work *work) 85 85 { 86 86 return work->wq->fs_info; 87 87 } 88 88 89 - bool btrfs_workqueue_normal_congested(struct btrfs_workqueue *wq) 89 + bool btrfs_workqueue_normal_congested(const struct btrfs_workqueue *wq) 90 90 { 91 91 /* 92 92 * We could compare wq->normal->pending with num_online_cpus()

+3 -3

fs/btrfs/async-thread.h

··· 82 82 void btrfs_destroy_workqueue(struct btrfs_workqueue *wq); 83 83 void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max); 84 84 void btrfs_set_work_high_priority(struct btrfs_work *work); 85 - struct btrfs_fs_info *btrfs_work_owner(struct btrfs_work *work); 86 - struct btrfs_fs_info *btrfs_workqueue_owner(struct __btrfs_workqueue *wq); 87 - bool btrfs_workqueue_normal_congested(struct btrfs_workqueue *wq); 85 + struct btrfs_fs_info *btrfs_work_owner(const struct btrfs_work *work); 86 + struct btrfs_fs_info *btrfs_workqueue_owner(const struct __btrfs_workqueue *wq); 87 + bool btrfs_workqueue_normal_congested(const struct btrfs_workqueue *wq); 88 88 #endif

+482 -607

fs/btrfs/backref.c

··· 18 18 19 19 #include <linux/mm.h> 20 20 #include <linux/rbtree.h> 21 + #include <trace/events/btrfs.h> 21 22 #include "ctree.h" 22 23 #include "disk-io.h" 23 24 #include "backref.h" ··· 26 25 #include "transaction.h" 27 26 #include "delayed-ref.h" 28 27 #include "locking.h" 29 - 30 - enum merge_mode { 31 - MERGE_IDENTICAL_KEYS = 1, 32 - MERGE_IDENTICAL_PARENTS, 33 - }; 34 28 35 29 /* Just an arbitrary number so we can be sure this happened */ 36 30 #define BACKREF_FOUND_SHARED 6 ··· 36 40 struct extent_inode_elem *next; 37 41 }; 38 42 39 - /* 40 - * ref_root is used as the root of the ref tree that hold a collection 41 - * of unique references. 42 - */ 43 - struct ref_root { 44 - struct rb_root rb_root; 45 - 46 - /* 47 - * The unique_refs represents the number of ref_nodes with a positive 48 - * count stored in the tree. Even if a ref_node (the count is greater 49 - * than one) is added, the unique_refs will only increase by one. 50 - */ 51 - unsigned int unique_refs; 52 - }; 53 - 54 - /* ref_node is used to store a unique reference to the ref tree. */ 55 - struct ref_node { 56 - struct rb_node rb_node; 57 - 58 - /* For NORMAL_REF, otherwise all these fields should be set to 0 */ 59 - u64 root_id; 60 - u64 object_id; 61 - u64 offset; 62 - 63 - /* For SHARED_REF, otherwise parent field should be set to 0 */ 64 - u64 parent; 65 - 66 - /* Ref to the ref_mod of btrfs_delayed_ref_node */ 67 - int ref_mod; 68 - }; 69 - 70 - /* Dynamically allocate and initialize a ref_root */ 71 - static struct ref_root *ref_root_alloc(void) 72 - { 73 - struct ref_root *ref_tree; 74 - 75 - ref_tree = kmalloc(sizeof(*ref_tree), GFP_NOFS); 76 - if (!ref_tree) 77 - return NULL; 78 - 79 - ref_tree->rb_root = RB_ROOT; 80 - ref_tree->unique_refs = 0; 81 - 82 - return ref_tree; 83 - } 84 - 85 - /* Free all nodes in the ref tree, and reinit ref_root */ 86 - static void ref_root_fini(struct ref_root *ref_tree) 87 - { 88 - struct ref_node *node; 89 - struct rb_node *next; 90 - 91 - while ((next = rb_first(&ref_tree->rb_root)) != NULL) { 92 - node = rb_entry(next, struct ref_node, rb_node); 93 - rb_erase(next, &ref_tree->rb_root); 94 - kfree(node); 95 - } 96 - 97 - ref_tree->rb_root = RB_ROOT; 98 - ref_tree->unique_refs = 0; 99 - } 100 - 101 - static void ref_root_free(struct ref_root *ref_tree) 102 - { 103 - if (!ref_tree) 104 - return; 105 - 106 - ref_root_fini(ref_tree); 107 - kfree(ref_tree); 108 - } 109 - 110 - /* 111 - * Compare ref_node with (root_id, object_id, offset, parent) 112 - * 113 - * The function compares two ref_node a and b. It returns an integer less 114 - * than, equal to, or greater than zero , respectively, to be less than, to 115 - * equal, or be greater than b. 116 - */ 117 - static int ref_node_cmp(struct ref_node *a, struct ref_node *b) 118 - { 119 - if (a->root_id < b->root_id) 120 - return -1; 121 - else if (a->root_id > b->root_id) 122 - return 1; 123 - 124 - if (a->object_id < b->object_id) 125 - return -1; 126 - else if (a->object_id > b->object_id) 127 - return 1; 128 - 129 - if (a->offset < b->offset) 130 - return -1; 131 - else if (a->offset > b->offset) 132 - return 1; 133 - 134 - if (a->parent < b->parent) 135 - return -1; 136 - else if (a->parent > b->parent) 137 - return 1; 138 - 139 - return 0; 140 - } 141 - 142 - /* 143 - * Search ref_node with (root_id, object_id, offset, parent) in the tree 144 - * 145 - * if found, the pointer of the ref_node will be returned; 146 - * if not found, NULL will be returned and pos will point to the rb_node for 147 - * insert, pos_parent will point to pos'parent for insert; 148 - */ 149 - static struct ref_node *__ref_tree_search(struct ref_root *ref_tree, 150 - struct rb_node ***pos, 151 - struct rb_node **pos_parent, 152 - u64 root_id, u64 object_id, 153 - u64 offset, u64 parent) 154 - { 155 - struct ref_node *cur = NULL; 156 - struct ref_node entry; 157 - int ret; 158 - 159 - entry.root_id = root_id; 160 - entry.object_id = object_id; 161 - entry.offset = offset; 162 - entry.parent = parent; 163 - 164 - *pos = &ref_tree->rb_root.rb_node; 165 - 166 - while (**pos) { 167 - *pos_parent = **pos; 168 - cur = rb_entry(*pos_parent, struct ref_node, rb_node); 169 - 170 - ret = ref_node_cmp(cur, &entry); 171 - if (ret > 0) 172 - *pos = &(**pos)->rb_left; 173 - else if (ret < 0) 174 - *pos = &(**pos)->rb_right; 175 - else 176 - return cur; 177 - } 178 - 179 - return NULL; 180 - } 181 - 182 - /* 183 - * Insert a ref_node to the ref tree 184 - * @pos used for specifiy the position to insert 185 - * @pos_parent for specifiy pos's parent 186 - * 187 - * success, return 0; 188 - * ref_node already exists, return -EEXIST; 189 - */ 190 - static int ref_tree_insert(struct ref_root *ref_tree, struct rb_node **pos, 191 - struct rb_node *pos_parent, struct ref_node *ins) 192 - { 193 - struct rb_node **p = NULL; 194 - struct rb_node *parent = NULL; 195 - struct ref_node *cur = NULL; 196 - 197 - if (!pos) { 198 - cur = __ref_tree_search(ref_tree, &p, &parent, ins->root_id, 199 - ins->object_id, ins->offset, 200 - ins->parent); 201 - if (cur) 202 - return -EEXIST; 203 - } else { 204 - p = pos; 205 - parent = pos_parent; 206 - } 207 - 208 - rb_link_node(&ins->rb_node, parent, p); 209 - rb_insert_color(&ins->rb_node, &ref_tree->rb_root); 210 - 211 - return 0; 212 - } 213 - 214 - /* Erase and free ref_node, caller should update ref_root->unique_refs */ 215 - static void ref_tree_remove(struct ref_root *ref_tree, struct ref_node *node) 216 - { 217 - rb_erase(&node->rb_node, &ref_tree->rb_root); 218 - kfree(node); 219 - } 220 - 221 - /* 222 - * Update ref_root->unique_refs 223 - * 224 - * Call __ref_tree_search 225 - * 1. if ref_node doesn't exist, ref_tree_insert this node, and update 226 - * ref_root->unique_refs: 227 - * if ref_node->ref_mod > 0, ref_root->unique_refs++; 228 - * if ref_node->ref_mod < 0, do noting; 229 - * 230 - * 2. if ref_node is found, then get origin ref_node->ref_mod, and update 231 - * ref_node->ref_mod. 232 - * if ref_node->ref_mod is equal to 0,then call ref_tree_remove 233 - * 234 - * according to origin_mod and new_mod, update ref_root->items 235 - * +----------------+--------------+-------------+ 236 - * | |new_count <= 0|new_count > 0| 237 - * +----------------+--------------+-------------+ 238 - * |origin_count < 0| 0 | 1 | 239 - * +----------------+--------------+-------------+ 240 - * |origin_count > 0| -1 | 0 | 241 - * +----------------+--------------+-------------+ 242 - * 243 - * In case of allocation failure, -ENOMEM is returned and the ref_tree stays 244 - * unaltered. 245 - * Success, return 0 246 - */ 247 - static int ref_tree_add(struct ref_root *ref_tree, u64 root_id, u64 object_id, 248 - u64 offset, u64 parent, int count) 249 - { 250 - struct ref_node *node = NULL; 251 - struct rb_node **pos = NULL; 252 - struct rb_node *pos_parent = NULL; 253 - int origin_count; 254 - int ret; 255 - 256 - if (!count) 257 - return 0; 258 - 259 - node = __ref_tree_search(ref_tree, &pos, &pos_parent, root_id, 260 - object_id, offset, parent); 261 - if (node == NULL) { 262 - node = kmalloc(sizeof(*node), GFP_NOFS); 263 - if (!node) 264 - return -ENOMEM; 265 - 266 - node->root_id = root_id; 267 - node->object_id = object_id; 268 - node->offset = offset; 269 - node->parent = parent; 270 - node->ref_mod = count; 271 - 272 - ret = ref_tree_insert(ref_tree, pos, pos_parent, node); 273 - ASSERT(!ret); 274 - if (ret) { 275 - kfree(node); 276 - return ret; 277 - } 278 - 279 - ref_tree->unique_refs += node->ref_mod > 0 ? 1 : 0; 280 - 281 - return 0; 282 - } 283 - 284 - origin_count = node->ref_mod; 285 - node->ref_mod += count; 286 - 287 - if (node->ref_mod > 0) 288 - ref_tree->unique_refs += origin_count > 0 ? 0 : 1; 289 - else if (node->ref_mod <= 0) 290 - ref_tree->unique_refs += origin_count > 0 ? -1 : 0; 291 - 292 - if (!node->ref_mod) 293 - ref_tree_remove(ref_tree, node); 294 - 295 - return 0; 296 - } 297 - 298 - static int check_extent_in_eb(struct btrfs_key *key, struct extent_buffer *eb, 299 - struct btrfs_file_extent_item *fi, 300 - u64 extent_item_pos, 301 - struct extent_inode_elem **eie) 43 + static int check_extent_in_eb(const struct btrfs_key *key, 44 + const struct extent_buffer *eb, 45 + const struct btrfs_file_extent_item *fi, 46 + u64 extent_item_pos, 47 + struct extent_inode_elem **eie) 302 48 { 303 49 u64 offset = 0; 304 50 struct extent_inode_elem *e; ··· 82 344 } 83 345 } 84 346 85 - static int find_extent_in_eb(struct extent_buffer *eb, u64 wanted_disk_byte, 86 - u64 extent_item_pos, 87 - struct extent_inode_elem **eie) 347 + static int find_extent_in_eb(const struct extent_buffer *eb, 348 + u64 wanted_disk_byte, u64 extent_item_pos, 349 + struct extent_inode_elem **eie) 88 350 { 89 351 u64 disk_byte; 90 352 struct btrfs_key key; ··· 121 383 return 0; 122 384 } 123 385 124 - /* 125 - * this structure records all encountered refs on the way up to the root 126 - */ 127 - struct __prelim_ref { 128 - struct list_head list; 129 - u64 root_id; 130 - struct btrfs_key key_for_search; 131 - int level; 132 - int count; 133 - struct extent_inode_elem *inode_list; 134 - u64 parent; 135 - u64 wanted_disk_byte; 386 + struct preftree { 387 + struct rb_root root; 388 + unsigned int count; 136 389 }; 390 + 391 + #define PREFTREE_INIT { .root = RB_ROOT, .count = 0 } 392 + 393 + struct preftrees { 394 + struct preftree direct; /* BTRFS_SHARED_[DATA|BLOCK]_REF_KEY */ 395 + struct preftree indirect; /* BTRFS_[TREE_BLOCK|EXTENT_DATA]_REF_KEY */ 396 + struct preftree indirect_missing_keys; 397 + }; 398 + 399 + /* 400 + * Checks for a shared extent during backref search. 401 + * 402 + * The share_count tracks prelim_refs (direct and indirect) having a 403 + * ref->count >0: 404 + * - incremented when a ref->count transitions to >0 405 + * - decremented when a ref->count transitions to <1 406 + */ 407 + struct share_check { 408 + u64 root_objectid; 409 + u64 inum; 410 + int share_count; 411 + }; 412 + 413 + static inline int extent_is_shared(struct share_check *sc) 414 + { 415 + return (sc && sc->share_count > 1) ? BACKREF_FOUND_SHARED : 0; 416 + } 137 417 138 418 static struct kmem_cache *btrfs_prelim_ref_cache; 139 419 140 420 int __init btrfs_prelim_ref_init(void) 141 421 { 142 422 btrfs_prelim_ref_cache = kmem_cache_create("btrfs_prelim_ref", 143 - sizeof(struct __prelim_ref), 423 + sizeof(struct prelim_ref), 144 424 0, 145 425 SLAB_MEM_SPREAD, 146 426 NULL); ··· 170 414 void btrfs_prelim_ref_exit(void) 171 415 { 172 416 kmem_cache_destroy(btrfs_prelim_ref_cache); 417 + } 418 + 419 + static void free_pref(struct prelim_ref *ref) 420 + { 421 + kmem_cache_free(btrfs_prelim_ref_cache, ref); 422 + } 423 + 424 + /* 425 + * Return 0 when both refs are for the same block (and can be merged). 426 + * A -1 return indicates ref1 is a 'lower' block than ref2, while 1 427 + * indicates a 'higher' block. 428 + */ 429 + static int prelim_ref_compare(struct prelim_ref *ref1, 430 + struct prelim_ref *ref2) 431 + { 432 + if (ref1->level < ref2->level) 433 + return -1; 434 + if (ref1->level > ref2->level) 435 + return 1; 436 + if (ref1->root_id < ref2->root_id) 437 + return -1; 438 + if (ref1->root_id > ref2->root_id) 439 + return 1; 440 + if (ref1->key_for_search.type < ref2->key_for_search.type) 441 + return -1; 442 + if (ref1->key_for_search.type > ref2->key_for_search.type) 443 + return 1; 444 + if (ref1->key_for_search.objectid < ref2->key_for_search.objectid) 445 + return -1; 446 + if (ref1->key_for_search.objectid > ref2->key_for_search.objectid) 447 + return 1; 448 + if (ref1->key_for_search.offset < ref2->key_for_search.offset) 449 + return -1; 450 + if (ref1->key_for_search.offset > ref2->key_for_search.offset) 451 + return 1; 452 + if (ref1->parent < ref2->parent) 453 + return -1; 454 + if (ref1->parent > ref2->parent) 455 + return 1; 456 + 457 + return 0; 458 + } 459 + 460 + void update_share_count(struct share_check *sc, int oldcount, int newcount) 461 + { 462 + if ((!sc) || (oldcount == 0 && newcount < 1)) 463 + return; 464 + 465 + if (oldcount > 0 && newcount < 1) 466 + sc->share_count--; 467 + else if (oldcount < 1 && newcount > 0) 468 + sc->share_count++; 469 + } 470 + 471 + /* 472 + * Add @newref to the @root rbtree, merging identical refs. 473 + * 474 + * Callers should assume that newref has been freed after calling. 475 + */ 476 + static void prelim_ref_insert(const struct btrfs_fs_info *fs_info, 477 + struct preftree *preftree, 478 + struct prelim_ref *newref, 479 + struct share_check *sc) 480 + { 481 + struct rb_root *root; 482 + struct rb_node **p; 483 + struct rb_node *parent = NULL; 484 + struct prelim_ref *ref; 485 + int result; 486 + 487 + root = &preftree->root; 488 + p = &root->rb_node; 489 + 490 + while (*p) { 491 + parent = *p; 492 + ref = rb_entry(parent, struct prelim_ref, rbnode); 493 + result = prelim_ref_compare(ref, newref); 494 + if (result < 0) { 495 + p = &(*p)->rb_left; 496 + } else if (result > 0) { 497 + p = &(*p)->rb_right; 498 + } else { 499 + /* Identical refs, merge them and free @newref */ 500 + struct extent_inode_elem *eie = ref->inode_list; 501 + 502 + while (eie && eie->next) 503 + eie = eie->next; 504 + 505 + if (!eie) 506 + ref->inode_list = newref->inode_list; 507 + else 508 + eie->next = newref->inode_list; 509 + trace_btrfs_prelim_ref_merge(fs_info, ref, newref, 510 + preftree->count); 511 + /* 512 + * A delayed ref can have newref->count < 0. 513 + * The ref->count is updated to follow any 514 + * BTRFS_[ADD|DROP]_DELAYED_REF actions. 515 + */ 516 + update_share_count(sc, ref->count, 517 + ref->count + newref->count); 518 + ref->count += newref->count; 519 + free_pref(newref); 520 + return; 521 + } 522 + } 523 + 524 + update_share_count(sc, 0, newref->count); 525 + preftree->count++; 526 + trace_btrfs_prelim_ref_insert(fs_info, newref, NULL, preftree->count); 527 + rb_link_node(&newref->rbnode, parent, p); 528 + rb_insert_color(&newref->rbnode, root); 529 + } 530 + 531 + /* 532 + * Release the entire tree. We don't care about internal consistency so 533 + * just free everything and then reset the tree root. 534 + */ 535 + static void prelim_release(struct preftree *preftree) 536 + { 537 + struct prelim_ref *ref, *next_ref; 538 + 539 + rbtree_postorder_for_each_entry_safe(ref, next_ref, &preftree->root, 540 + rbnode) 541 + free_pref(ref); 542 + 543 + preftree->root = RB_ROOT; 544 + preftree->count = 0; 173 545 } 174 546 175 547 /* ··· 332 448 * 333 449 * - column 1, 3: we've the parent -> done 334 450 * - column 2: we take the first key from the block to find the parent 335 - * (see __add_missing_keys) 451 + * (see add_missing_keys) 336 452 * - column 4: we use the key to find the parent 337 453 * 338 454 * additional information that's available but not required to find the parent 339 455 * block might help in merging entries to gain some speed. 340 456 */ 341 - 342 - static int __add_prelim_ref(struct list_head *head, u64 root_id, 343 - struct btrfs_key *key, int level, 344 - u64 parent, u64 wanted_disk_byte, int count, 345 - gfp_t gfp_mask) 457 + static int add_prelim_ref(const struct btrfs_fs_info *fs_info, 458 + struct preftree *preftree, u64 root_id, 459 + const struct btrfs_key *key, int level, u64 parent, 460 + u64 wanted_disk_byte, int count, 461 + struct share_check *sc, gfp_t gfp_mask) 346 462 { 347 - struct __prelim_ref *ref; 463 + struct prelim_ref *ref; 348 464 349 465 if (root_id == BTRFS_DATA_RELOC_TREE_OBJECTID) 350 466 return 0; ··· 387 503 ref->count = count; 388 504 ref->parent = parent; 389 505 ref->wanted_disk_byte = wanted_disk_byte; 390 - list_add_tail(&ref->list, head); 506 + prelim_ref_insert(fs_info, preftree, ref, sc); 507 + return extent_is_shared(sc); 508 + } 391 509 392 - return 0; 510 + /* direct refs use root == 0, key == NULL */ 511 + static int add_direct_ref(const struct btrfs_fs_info *fs_info, 512 + struct preftrees *preftrees, int level, u64 parent, 513 + u64 wanted_disk_byte, int count, 514 + struct share_check *sc, gfp_t gfp_mask) 515 + { 516 + return add_prelim_ref(fs_info, &preftrees->direct, 0, NULL, level, 517 + parent, wanted_disk_byte, count, sc, gfp_mask); 518 + } 519 + 520 + /* indirect refs use parent == 0 */ 521 + static int add_indirect_ref(const struct btrfs_fs_info *fs_info, 522 + struct preftrees *preftrees, u64 root_id, 523 + const struct btrfs_key *key, int level, 524 + u64 wanted_disk_byte, int count, 525 + struct share_check *sc, gfp_t gfp_mask) 526 + { 527 + struct preftree *tree = &preftrees->indirect; 528 + 529 + if (!key) 530 + tree = &preftrees->indirect_missing_keys; 531 + return add_prelim_ref(fs_info, tree, root_id, key, level, 0, 532 + wanted_disk_byte, count, sc, gfp_mask); 393 533 } 394 534 395 535 static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, 396 - struct ulist *parents, struct __prelim_ref *ref, 536 + struct ulist *parents, struct prelim_ref *ref, 397 537 int level, u64 time_seq, const u64 *extent_item_pos, 398 538 u64 total_refs) 399 539 { ··· 507 599 * resolve an indirect backref in the form (root_id, key, level) 508 600 * to a logical address 509 601 */ 510 - static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, 511 - struct btrfs_path *path, u64 time_seq, 512 - struct __prelim_ref *ref, 513 - struct ulist *parents, 514 - const u64 *extent_item_pos, u64 total_refs) 602 + static int resolve_indirect_ref(struct btrfs_fs_info *fs_info, 603 + struct btrfs_path *path, u64 time_seq, 604 + struct prelim_ref *ref, struct ulist *parents, 605 + const u64 *extent_item_pos, u64 total_refs) 515 606 { 516 607 struct btrfs_root *root; 517 608 struct btrfs_key root_key; ··· 588 681 return ret; 589 682 } 590 683 684 + static struct extent_inode_elem * 685 + unode_aux_to_inode_list(struct ulist_node *node) 686 + { 687 + if (!node) 688 + return NULL; 689 + return (struct extent_inode_elem *)(uintptr_t)node->aux; 690 + } 691 + 591 692 /* 592 - * resolve all indirect backrefs from the list 693 + * We maintain three seperate rbtrees: one for direct refs, one for 694 + * indirect refs which have a key, and one for indirect refs which do not 695 + * have a key. Each tree does merge on insertion. 696 + * 697 + * Once all of the references are located, we iterate over the tree of 698 + * indirect refs with missing keys. An appropriate key is located and 699 + * the ref is moved onto the tree for indirect refs. After all missing 700 + * keys are thus located, we iterate over the indirect ref tree, resolve 701 + * each reference, and then insert the resolved reference onto the 702 + * direct tree (merging there too). 703 + * 704 + * New backrefs (i.e., for parent nodes) are added to the appropriate 705 + * rbtree as they are encountered. The new backrefs are subsequently 706 + * resolved as above. 593 707 */ 594 - static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, 595 - struct btrfs_path *path, u64 time_seq, 596 - struct list_head *head, 597 - const u64 *extent_item_pos, u64 total_refs, 598 - u64 root_objectid) 708 + static int resolve_indirect_refs(struct btrfs_fs_info *fs_info, 709 + struct btrfs_path *path, u64 time_seq, 710 + struct preftrees *preftrees, 711 + const u64 *extent_item_pos, u64 total_refs, 712 + struct share_check *sc) 599 713 { 600 714 int err; 601 715 int ret = 0; 602 - struct __prelim_ref *ref; 603 - struct __prelim_ref *ref_safe; 604 - struct __prelim_ref *new_ref; 605 716 struct ulist *parents; 606 717 struct ulist_node *node; 607 718 struct ulist_iterator uiter; 719 + struct rb_node *rnode; 608 720 609 721 parents = ulist_alloc(GFP_NOFS); 610 722 if (!parents) 611 723 return -ENOMEM; 612 724 613 725 /* 614 - * _safe allows us to insert directly after the current item without 615 - * iterating over the newly inserted items. 616 - * we're also allowed to re-assign ref during iteration. 726 + * We could trade memory usage for performance here by iterating 727 + * the tree, allocating new refs for each insertion, and then 728 + * freeing the entire indirect tree when we're done. In some test 729 + * cases, the tree can grow quite large (~200k objects). 617 730 */ 618 - list_for_each_entry_safe(ref, ref_safe, head, list) { 619 - if (ref->parent) /* already direct */ 731 + while ((rnode = rb_first(&preftrees->indirect.root))) { 732 + struct prelim_ref *ref; 733 + 734 + ref = rb_entry(rnode, struct prelim_ref, rbnode); 735 + if (WARN(ref->parent, 736 + "BUG: direct ref found in indirect tree")) { 737 + ret = -EINVAL; 738 + goto out; 739 + } 740 + 741 + rb_erase(&ref->rbnode, &preftrees->indirect.root); 742 + preftrees->indirect.count--; 743 + 744 + if (ref->count == 0) { 745 + free_pref(ref); 620 746 continue; 621 - if (ref->count == 0) 622 - continue; 623 - if (root_objectid && ref->root_id != root_objectid) { 747 + } 748 + 749 + if (sc && sc->root_objectid && 750 + ref->root_id != sc->root_objectid) { 751 + free_pref(ref); 624 752 ret = BACKREF_FOUND_SHARED; 625 753 goto out; 626 754 } 627 - err = __resolve_indirect_ref(fs_info, path, time_seq, ref, 628 - parents, extent_item_pos, 629 - total_refs); 755 + err = resolve_indirect_ref(fs_info, path, time_seq, ref, 756 + parents, extent_item_pos, 757 + total_refs); 630 758 /* 631 759 * we can only tolerate ENOENT,otherwise,we should catch error 632 760 * and return directly. 633 761 */ 634 762 if (err == -ENOENT) { 763 + prelim_ref_insert(fs_info, &preftrees->direct, ref, 764 + NULL); 635 765 continue; 636 766 } else if (err) { 767 + free_pref(ref); 637 768 ret = err; 638 769 goto out; 639 770 } ··· 680 735 ULIST_ITER_INIT(&uiter); 681 736 node = ulist_next(parents, &uiter); 682 737 ref->parent = node ? node->val : 0; 683 - ref->inode_list = node ? 684 - (struct extent_inode_elem *)(uintptr_t)node->aux : NULL; 738 + ref->inode_list = unode_aux_to_inode_list(node); 685 739 686 - /* additional parents require new refs being added here */ 740 + /* Add a prelim_ref(s) for any other parent(s). */ 687 741 while ((node = ulist_next(parents, &uiter))) { 742 + struct prelim_ref *new_ref; 743 + 688 744 new_ref = kmem_cache_alloc(btrfs_prelim_ref_cache, 689 745 GFP_NOFS); 690 746 if (!new_ref) { 747 + free_pref(ref); 691 748 ret = -ENOMEM; 692 749 goto out; 693 750 } 694 751 memcpy(new_ref, ref, sizeof(*ref)); 695 752 new_ref->parent = node->val; 696 - new_ref->inode_list = (struct extent_inode_elem *) 697 - (uintptr_t)node->aux; 698 - list_add(&new_ref->list, &ref->list); 753 + new_ref->inode_list = unode_aux_to_inode_list(node); 754 + prelim_ref_insert(fs_info, &preftrees->direct, 755 + new_ref, NULL); 699 756 } 757 + 758 + /* 759 + * Now it's a direct ref, put it in the the direct tree. We must 760 + * do this last because the ref could be merged/freed here. 761 + */ 762 + prelim_ref_insert(fs_info, &preftrees->direct, ref, NULL); 763 + 700 764 ulist_reinit(parents); 765 + cond_resched(); 701 766 } 702 767 out: 703 768 ulist_free(parents); 704 769 return ret; 705 770 } 706 771 707 - static inline int ref_for_same_block(struct __prelim_ref *ref1, 708 - struct __prelim_ref *ref2) 709 - { 710 - if (ref1->level != ref2->level) 711 - return 0; 712 - if (ref1->root_id != ref2->root_id) 713 - return 0; 714 - if (ref1->key_for_search.type != ref2->key_for_search.type) 715 - return 0; 716 - if (ref1->key_for_search.objectid != ref2->key_for_search.objectid) 717 - return 0; 718 - if (ref1->key_for_search.offset != ref2->key_for_search.offset) 719 - return 0; 720 - if (ref1->parent != ref2->parent) 721 - return 0; 722 - 723 - return 1; 724 - } 725 - 726 772 /* 727 773 * read tree blocks and add keys where required. 728 774 */ 729 - static int __add_missing_keys(struct btrfs_fs_info *fs_info, 730 - struct list_head *head) 775 + static int add_missing_keys(struct btrfs_fs_info *fs_info, 776 + struct preftrees *preftrees) 731 777 { 732 - struct __prelim_ref *ref; 778 + struct prelim_ref *ref; 733 779 struct extent_buffer *eb; 780 + struct preftree *tree = &preftrees->indirect_missing_keys; 781 + struct rb_node *node; 734 782 735 - list_for_each_entry(ref, head, list) { 736 - if (ref->parent) 737 - continue; 738 - if (ref->key_for_search.type) 739 - continue; 783 + while ((node = rb_first(&tree->root))) { 784 + ref = rb_entry(node, struct prelim_ref, rbnode); 785 + rb_erase(node, &tree->root); 786 + 787 + BUG_ON(ref->parent); /* should not be a direct ref */ 788 + BUG_ON(ref->key_for_search.type); 740 789 BUG_ON(!ref->wanted_disk_byte); 790 + 741 791 eb = read_tree_block(fs_info, ref->wanted_disk_byte, 0); 742 792 if (IS_ERR(eb)) { 793 + free_pref(ref); 743 794 return PTR_ERR(eb); 744 795 } else if (!extent_buffer_uptodate(eb)) { 796 + free_pref(ref); 745 797 free_extent_buffer(eb); 746 798 return -EIO; 747 799 } ··· 749 807 btrfs_node_key_to_cpu(eb, &ref->key_for_search, 0); 750 808 btrfs_tree_read_unlock(eb); 751 809 free_extent_buffer(eb); 810 + prelim_ref_insert(fs_info, &preftrees->indirect, ref, NULL); 811 + cond_resched(); 752 812 } 753 813 return 0; 754 - } 755 - 756 - /* 757 - * merge backrefs and adjust counts accordingly 758 - * 759 - * FIXME: For MERGE_IDENTICAL_KEYS, if we add more keys in __add_prelim_ref 760 - * then we can merge more here. Additionally, we could even add a key 761 - * range for the blocks we looked into to merge even more (-> replace 762 - * unresolved refs by those having a parent). 763 - */ 764 - static void __merge_refs(struct list_head *head, enum merge_mode mode) 765 - { 766 - struct __prelim_ref *pos1; 767 - 768 - list_for_each_entry(pos1, head, list) { 769 - struct __prelim_ref *pos2 = pos1, *tmp; 770 - 771 - list_for_each_entry_safe_continue(pos2, tmp, head, list) { 772 - struct __prelim_ref *ref1 = pos1, *ref2 = pos2; 773 - struct extent_inode_elem *eie; 774 - 775 - if (!ref_for_same_block(ref1, ref2)) 776 - continue; 777 - if (mode == MERGE_IDENTICAL_KEYS) { 778 - if (!ref1->parent && ref2->parent) 779 - swap(ref1, ref2); 780 - } else { 781 - if (ref1->parent != ref2->parent) 782 - continue; 783 - } 784 - 785 - eie = ref1->inode_list; 786 - while (eie && eie->next) 787 - eie = eie->next; 788 - if (eie) 789 - eie->next = ref2->inode_list; 790 - else 791 - ref1->inode_list = ref2->inode_list; 792 - ref1->count += ref2->count; 793 - 794 - list_del(&ref2->list); 795 - kmem_cache_free(btrfs_prelim_ref_cache, ref2); 796 - cond_resched(); 797 - } 798 - 799 - } 800 814 } 801 815 802 816 /* 803 817 * add all currently queued delayed refs from this head whose seq nr is 804 818 * smaller or equal that seq to the list 805 819 */ 806 - static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, 807 - struct list_head *prefs, u64 *total_refs, 808 - u64 inum) 820 + static int add_delayed_refs(const struct btrfs_fs_info *fs_info, 821 + struct btrfs_delayed_ref_head *head, u64 seq, 822 + struct preftrees *preftrees, u64 *total_refs, 823 + struct share_check *sc) 809 824 { 810 825 struct btrfs_delayed_ref_node *node; 811 826 struct btrfs_delayed_extent_op *extent_op = head->extent_op; 812 827 struct btrfs_key key; 813 - struct btrfs_key op_key = {0}; 814 - int sgn; 828 + struct btrfs_key tmp_op_key; 829 + struct btrfs_key *op_key = NULL; 830 + int count; 815 831 int ret = 0; 816 832 817 - if (extent_op && extent_op->update_key) 818 - btrfs_disk_key_to_cpu(&op_key, &extent_op->key); 833 + if (extent_op && extent_op->update_key) { 834 + btrfs_disk_key_to_cpu(&tmp_op_key, &extent_op->key); 835 + op_key = &tmp_op_key; 836 + } 819 837 820 838 spin_lock(&head->lock); 821 839 list_for_each_entry(node, &head->ref_list, list) { ··· 788 886 WARN_ON(1); 789 887 continue; 790 888 case BTRFS_ADD_DELAYED_REF: 791 - sgn = 1; 889 + count = node->ref_mod; 792 890 break; 793 891 case BTRFS_DROP_DELAYED_REF: 794 - sgn = -1; 892 + count = node->ref_mod * -1; 795 893 break; 796 894 default: 797 895 BUG_ON(1); 798 896 } 799 - *total_refs += (node->ref_mod * sgn); 897 + *total_refs += count; 800 898 switch (node->type) { 801 899 case BTRFS_TREE_BLOCK_REF_KEY: { 900 + /* NORMAL INDIRECT METADATA backref */ 802 901 struct btrfs_delayed_tree_ref *ref; 803 902 804 903 ref = btrfs_delayed_node_to_tree_ref(node); 805 - ret = __add_prelim_ref(prefs, ref->root, &op_key, 806 - ref->level + 1, 0, node->bytenr, 807 - node->ref_mod * sgn, GFP_ATOMIC); 904 + ret = add_indirect_ref(fs_info, preftrees, ref->root, 905 + &tmp_op_key, ref->level + 1, 906 + node->bytenr, count, sc, 907 + GFP_ATOMIC); 808 908 break; 809 909 } 810 910 case BTRFS_SHARED_BLOCK_REF_KEY: { 911 + /* SHARED DIRECT METADATA backref */ 811 912 struct btrfs_delayed_tree_ref *ref; 812 913 813 914 ref = btrfs_delayed_node_to_tree_ref(node); 814 - ret = __add_prelim_ref(prefs, 0, NULL, 815 - ref->level + 1, ref->parent, 816 - node->bytenr, 817 - node->ref_mod * sgn, GFP_ATOMIC); 915 + 916 + ret = add_direct_ref(fs_info, preftrees, ref->level + 1, 917 + ref->parent, node->bytenr, count, 918 + sc, GFP_ATOMIC); 818 919 break; 819 920 } 820 921 case BTRFS_EXTENT_DATA_REF_KEY: { 922 + /* NORMAL INDIRECT DATA backref */ 821 923 struct btrfs_delayed_data_ref *ref; 822 924 ref = btrfs_delayed_node_to_data_ref(node); 823 925 ··· 833 927 * Found a inum that doesn't match our known inum, we 834 928 * know it's shared. 835 929 */ 836 - if (inum && ref->objectid != inum) { 930 + if (sc && sc->inum && ref->objectid != sc->inum) { 837 931 ret = BACKREF_FOUND_SHARED; 838 - break; 932 + goto out; 839 933 } 840 934 841 - ret = __add_prelim_ref(prefs, ref->root, &key, 0, 0, 842 - node->bytenr, 843 - node->ref_mod * sgn, GFP_ATOMIC); 935 + ret = add_indirect_ref(fs_info, preftrees, ref->root, 936 + &key, 0, node->bytenr, count, sc, 937 + GFP_ATOMIC); 844 938 break; 845 939 } 846 940 case BTRFS_SHARED_DATA_REF_KEY: { 941 + /* SHARED DIRECT FULL backref */ 847 942 struct btrfs_delayed_data_ref *ref; 848 943 849 944 ref = btrfs_delayed_node_to_data_ref(node); 850 - ret = __add_prelim_ref(prefs, 0, NULL, 0, 851 - ref->parent, node->bytenr, 852 - node->ref_mod * sgn, GFP_ATOMIC); 945 + 946 + ret = add_direct_ref(fs_info, preftrees, 0, ref->parent, 947 + node->bytenr, count, sc, 948 + GFP_ATOMIC); 853 949 break; 854 950 } 855 951 default: 856 952 WARN_ON(1); 857 953 } 858 - if (ret) 954 + /* 955 + * We must ignore BACKREF_FOUND_SHARED until all delayed 956 + * refs have been checked. 957 + */ 958 + if (ret && (ret != BACKREF_FOUND_SHARED)) 859 959 break; 860 960 } 961 + if (!ret) 962 + ret = extent_is_shared(sc); 963 + out: 861 964 spin_unlock(&head->lock); 862 965 return ret; 863 966 } 864 967 865 968 /* 866 969 * add all inline backrefs for bytenr to the list 970 + * 971 + * Returns 0 on success, <0 on error, or BACKREF_FOUND_SHARED. 867 972 */ 868 - static int __add_inline_refs(struct btrfs_path *path, u64 bytenr, 869 - int *info_level, struct list_head *prefs, 870 - struct ref_root *ref_tree, 871 - u64 *total_refs, u64 inum) 973 + static int add_inline_refs(const struct btrfs_fs_info *fs_info, 974 + struct btrfs_path *path, u64 bytenr, 975 + int *info_level, struct preftrees *preftrees, 976 + u64 *total_refs, struct share_check *sc) 872 977 { 873 978 int ret = 0; 874 979 int slot; ··· 929 1012 int type; 930 1013 931 1014 iref = (struct btrfs_extent_inline_ref *)ptr; 932 - type = btrfs_extent_inline_ref_type(leaf, iref); 1015 + type = btrfs_get_extent_inline_ref_type(leaf, iref, 1016 + BTRFS_REF_TYPE_ANY); 1017 + if (type == BTRFS_REF_TYPE_INVALID) 1018 + return -EINVAL; 1019 + 933 1020 offset = btrfs_extent_inline_ref_offset(leaf, iref); 934 1021 935 1022 switch (type) { 936 1023 case BTRFS_SHARED_BLOCK_REF_KEY: 937 - ret = __add_prelim_ref(prefs, 0, NULL, 938 - *info_level + 1, offset, 939 - bytenr, 1, GFP_NOFS); 1024 + ret = add_direct_ref(fs_info, preftrees, 1025 + *info_level + 1, offset, 1026 + bytenr, 1, NULL, GFP_NOFS); 940 1027 break; 941 1028 case BTRFS_SHARED_DATA_REF_KEY: { 942 1029 struct btrfs_shared_data_ref *sdref; ··· 948 1027 949 1028 sdref = (struct btrfs_shared_data_ref *)(iref + 1); 950 1029 count = btrfs_shared_data_ref_count(leaf, sdref); 951 - ret = __add_prelim_ref(prefs, 0, NULL, 0, offset, 952 - bytenr, count, GFP_NOFS); 953 - if (ref_tree) { 954 - if (!ret) 955 - ret = ref_tree_add(ref_tree, 0, 0, 0, 956 - bytenr, count); 957 - if (!ret && ref_tree->unique_refs > 1) 958 - ret = BACKREF_FOUND_SHARED; 959 - } 1030 + 1031 + ret = add_direct_ref(fs_info, preftrees, 0, offset, 1032 + bytenr, count, sc, GFP_NOFS); 960 1033 break; 961 1034 } 962 1035 case BTRFS_TREE_BLOCK_REF_KEY: 963 - ret = __add_prelim_ref(prefs, offset, NULL, 964 - *info_level + 1, 0, 965 - bytenr, 1, GFP_NOFS); 1036 + ret = add_indirect_ref(fs_info, preftrees, offset, 1037 + NULL, *info_level + 1, 1038 + bytenr, 1, NULL, GFP_NOFS); 966 1039 break; 967 1040 case BTRFS_EXTENT_DATA_REF_KEY: { 968 1041 struct btrfs_extent_data_ref *dref; ··· 970 1055 key.type = BTRFS_EXTENT_DATA_KEY; 971 1056 key.offset = btrfs_extent_data_ref_offset(leaf, dref); 972 1057 973 - if (inum && key.objectid != inum) { 1058 + if (sc && sc->inum && key.objectid != sc->inum) { 974 1059 ret = BACKREF_FOUND_SHARED; 975 1060 break; 976 1061 } 977 1062 978 1063 root = btrfs_extent_data_ref_root(leaf, dref); 979 - ret = __add_prelim_ref(prefs, root, &key, 0, 0, 980 - bytenr, count, GFP_NOFS); 981 - if (ref_tree) { 982 - if (!ret) 983 - ret = ref_tree_add(ref_tree, root, 984 - key.objectid, 985 - key.offset, 0, 986 - count); 987 - if (!ret && ref_tree->unique_refs > 1) 988 - ret = BACKREF_FOUND_SHARED; 989 - } 1064 + 1065 + ret = add_indirect_ref(fs_info, preftrees, root, 1066 + &key, 0, bytenr, count, 1067 + sc, GFP_NOFS); 990 1068 break; 991 1069 } 992 1070 default: ··· 995 1087 996 1088 /* 997 1089 * add all non-inline backrefs for bytenr to the list 1090 + * 1091 + * Returns 0 on success, <0 on error, or BACKREF_FOUND_SHARED. 998 1092 */ 999 - static int __add_keyed_refs(struct btrfs_fs_info *fs_info, 1000 - struct btrfs_path *path, u64 bytenr, 1001 - int info_level, struct list_head *prefs, 1002 - struct ref_root *ref_tree, u64 inum) 1093 + static int add_keyed_refs(struct btrfs_fs_info *fs_info, 1094 + struct btrfs_path *path, u64 bytenr, 1095 + int info_level, struct preftrees *preftrees, 1096 + struct share_check *sc) 1003 1097 { 1004 1098 struct btrfs_root *extent_root = fs_info->extent_root; 1005 1099 int ret; ··· 1031 1121 1032 1122 switch (key.type) { 1033 1123 case BTRFS_SHARED_BLOCK_REF_KEY: 1034 - ret = __add_prelim_ref(prefs, 0, NULL, 1035 - info_level + 1, key.offset, 1036 - bytenr, 1, GFP_NOFS); 1124 + /* SHARED DIRECT METADATA backref */ 1125 + ret = add_direct_ref(fs_info, preftrees, 1126 + info_level + 1, key.offset, 1127 + bytenr, 1, NULL, GFP_NOFS); 1037 1128 break; 1038 1129 case BTRFS_SHARED_DATA_REF_KEY: { 1130 + /* SHARED DIRECT FULL backref */ 1039 1131 struct btrfs_shared_data_ref *sdref; 1040 1132 int count; 1041 1133 1042 1134 sdref = btrfs_item_ptr(leaf, slot, 1043 1135 struct btrfs_shared_data_ref); 1044 1136 count = btrfs_shared_data_ref_count(leaf, sdref); 1045 - ret = __add_prelim_ref(prefs, 0, NULL, 0, key.offset, 1046 - bytenr, count, GFP_NOFS); 1047 - if (ref_tree) { 1048 - if (!ret) 1049 - ret = ref_tree_add(ref_tree, 0, 0, 0, 1050 - bytenr, count); 1051 - if (!ret && ref_tree->unique_refs > 1) 1052 - ret = BACKREF_FOUND_SHARED; 1053 - } 1137 + ret = add_direct_ref(fs_info, preftrees, 0, 1138 + key.offset, bytenr, count, 1139 + sc, GFP_NOFS); 1054 1140 break; 1055 1141 } 1056 1142 case BTRFS_TREE_BLOCK_REF_KEY: 1057 - ret = __add_prelim_ref(prefs, key.offset, NULL, 1058 - info_level + 1, 0, 1059 - bytenr, 1, GFP_NOFS); 1143 + /* NORMAL INDIRECT METADATA backref */ 1144 + ret = add_indirect_ref(fs_info, preftrees, key.offset, 1145 + NULL, info_level + 1, bytenr, 1146 + 1, NULL, GFP_NOFS); 1060 1147 break; 1061 1148 case BTRFS_EXTENT_DATA_REF_KEY: { 1149 + /* NORMAL INDIRECT DATA backref */ 1062 1150 struct btrfs_extent_data_ref *dref; 1063 1151 int count; 1064 1152 u64 root; ··· 1069 1161 key.type = BTRFS_EXTENT_DATA_KEY; 1070 1162 key.offset = btrfs_extent_data_ref_offset(leaf, dref); 1071 1163 1072 - if (inum && key.objectid != inum) { 1164 + if (sc && sc->inum && key.objectid != sc->inum) { 1073 1165 ret = BACKREF_FOUND_SHARED; 1074 1166 break; 1075 1167 } 1076 1168 1077 1169 root = btrfs_extent_data_ref_root(leaf, dref); 1078 - ret = __add_prelim_ref(prefs, root, &key, 0, 0, 1079 - bytenr, count, GFP_NOFS); 1080 - if (ref_tree) { 1081 - if (!ret) 1082 - ret = ref_tree_add(ref_tree, root, 1083 - key.objectid, 1084 - key.offset, 0, 1085 - count); 1086 - if (!ret && ref_tree->unique_refs > 1) 1087 - ret = BACKREF_FOUND_SHARED; 1088 - } 1170 + ret = add_indirect_ref(fs_info, preftrees, root, 1171 + &key, 0, bytenr, count, 1172 + sc, GFP_NOFS); 1089 1173 break; 1090 1174 } 1091 1175 default: ··· 1097 1197 * indirect refs to their parent bytenr. 1098 1198 * When roots are found, they're added to the roots list 1099 1199 * 1100 - * NOTE: This can return values > 0 1101 - * 1102 1200 * If time_seq is set to SEQ_LAST, it will not search delayed_refs, and behave 1103 1201 * much like trans == NULL case, the difference only lies in it will not 1104 1202 * commit root. 1105 1203 * The special case is for qgroup to search roots in commit_transaction(). 1106 1204 * 1107 - * If check_shared is set to 1, any extent has more than one ref item, will 1108 - * be returned BACKREF_FOUND_SHARED immediately. 1205 + * @sc - if !NULL, then immediately return BACKREF_FOUND_SHARED when a 1206 + * shared extent is detected. 1207 + * 1208 + * Otherwise this returns 0 for success and <0 for an error. 1109 1209 * 1110 1210 * FIXME some caching might speed things up 1111 1211 */ ··· 1113 1213 struct btrfs_fs_info *fs_info, u64 bytenr, 1114 1214 u64 time_seq, struct ulist *refs, 1115 1215 struct ulist *roots, const u64 *extent_item_pos, 1116 - u64 root_objectid, u64 inum, int check_shared) 1216 + struct share_check *sc) 1117 1217 { 1118 1218 struct btrfs_key key; 1119 1219 struct btrfs_path *path; ··· 1121 1221 struct btrfs_delayed_ref_head *head; 1122 1222 int info_level = 0; 1123 1223 int ret; 1124 - struct list_head prefs_delayed; 1125 - struct list_head prefs; 1126 - struct __prelim_ref *ref; 1224 + struct prelim_ref *ref; 1225 + struct rb_node *node; 1127 1226 struct extent_inode_elem *eie = NULL; 1128 - struct ref_root *ref_tree = NULL; 1227 + /* total of both direct AND indirect refs! */ 1129 1228 u64 total_refs = 0; 1130 - 1131 - INIT_LIST_HEAD(&prefs); 1132 - INIT_LIST_HEAD(&prefs_delayed); 1229 + struct preftrees preftrees = { 1230 + .direct = PREFTREE_INIT, 1231 + .indirect = PREFTREE_INIT, 1232 + .indirect_missing_keys = PREFTREE_INIT 1233 + }; 1133 1234 1134 1235 key.objectid = bytenr; 1135 1236 key.offset = (u64)-1; ··· 1157 1256 */ 1158 1257 again: 1159 1258 head = NULL; 1160 - 1161 - if (check_shared) { 1162 - if (!ref_tree) { 1163 - ref_tree = ref_root_alloc(); 1164 - if (!ref_tree) { 1165 - ret = -ENOMEM; 1166 - goto out; 1167 - } 1168 - } else { 1169 - ref_root_fini(ref_tree); 1170 - } 1171 - } 1172 1259 1173 1260 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0); 1174 1261 if (ret < 0) ··· 1193 1304 goto again; 1194 1305 } 1195 1306 spin_unlock(&delayed_refs->lock); 1196 - ret = __add_delayed_refs(head, time_seq, 1197 - &prefs_delayed, &total_refs, 1198 - inum); 1307 + ret = add_delayed_refs(fs_info, head, time_seq, 1308 + &preftrees, &total_refs, sc); 1199 1309 mutex_unlock(&head->mutex); 1200 1310 if (ret) 1201 1311 goto out; 1202 1312 } else { 1203 1313 spin_unlock(&delayed_refs->lock); 1204 - } 1205 - 1206 - if (check_shared && !list_empty(&prefs_delayed)) { 1207 - /* 1208 - * Add all delay_ref to the ref_tree and check if there 1209 - * are multiple ref items added. 1210 - */ 1211 - list_for_each_entry(ref, &prefs_delayed, list) { 1212 - if (ref->key_for_search.type) { 1213 - ret = ref_tree_add(ref_tree, 1214 - ref->root_id, 1215 - ref->key_for_search.objectid, 1216 - ref->key_for_search.offset, 1217 - 0, ref->count); 1218 - if (ret) 1219 - goto out; 1220 - } else { 1221 - ret = ref_tree_add(ref_tree, 0, 0, 0, 1222 - ref->parent, ref->count); 1223 - if (ret) 1224 - goto out; 1225 - } 1226 - 1227 - } 1228 - 1229 - if (ref_tree->unique_refs > 1) { 1230 - ret = BACKREF_FOUND_SHARED; 1231 - goto out; 1232 - } 1233 - 1234 1314 } 1235 1315 } 1236 1316 ··· 1214 1356 if (key.objectid == bytenr && 1215 1357 (key.type == BTRFS_EXTENT_ITEM_KEY || 1216 1358 key.type == BTRFS_METADATA_ITEM_KEY)) { 1217 - ret = __add_inline_refs(path, bytenr, 1218 - &info_level, &prefs, 1219 - ref_tree, &total_refs, 1220 - inum); 1359 + ret = add_inline_refs(fs_info, path, bytenr, 1360 + &info_level, &preftrees, 1361 + &total_refs, sc); 1221 1362 if (ret) 1222 1363 goto out; 1223 - ret = __add_keyed_refs(fs_info, path, bytenr, 1224 - info_level, &prefs, 1225 - ref_tree, inum); 1364 + ret = add_keyed_refs(fs_info, path, bytenr, info_level, 1365 + &preftrees, sc); 1226 1366 if (ret) 1227 1367 goto out; 1228 1368 } 1229 1369 } 1370 + 1230 1371 btrfs_release_path(path); 1231 1372 1232 - list_splice_init(&prefs_delayed, &prefs); 1233 - 1234 - ret = __add_missing_keys(fs_info, &prefs); 1373 + ret = add_missing_keys(fs_info, &preftrees); 1235 1374 if (ret) 1236 1375 goto out; 1237 1376 1238 - __merge_refs(&prefs, MERGE_IDENTICAL_KEYS); 1377 + WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect_missing_keys.root)); 1239 1378 1240 - ret = __resolve_indirect_refs(fs_info, path, time_seq, &prefs, 1241 - extent_item_pos, total_refs, 1242 - root_objectid); 1379 + ret = resolve_indirect_refs(fs_info, path, time_seq, &preftrees, 1380 + extent_item_pos, total_refs, sc); 1243 1381 if (ret) 1244 1382 goto out; 1245 1383 1246 - __merge_refs(&prefs, MERGE_IDENTICAL_PARENTS); 1384 + WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect.root)); 1247 1385 1248 - while (!list_empty(&prefs)) { 1249 - ref = list_first_entry(&prefs, struct __prelim_ref, list); 1386 + /* 1387 + * This walks the tree of merged and resolved refs. Tree blocks are 1388 + * read in as needed. Unique entries are added to the ulist, and 1389 + * the list of found roots is updated. 1390 + * 1391 + * We release the entire tree in one go before returning. 1392 + */ 1393 + node = rb_first(&preftrees.direct.root); 1394 + while (node) { 1395 + ref = rb_entry(node, struct prelim_ref, rbnode); 1396 + node = rb_next(&ref->rbnode); 1250 1397 WARN_ON(ref->count < 0); 1251 1398 if (roots && ref->count && ref->root_id && ref->parent == 0) { 1252 - if (root_objectid && ref->root_id != root_objectid) { 1399 + if (sc && sc->root_objectid && 1400 + ref->root_id != sc->root_objectid) { 1253 1401 ret = BACKREF_FOUND_SHARED; 1254 1402 goto out; 1255 1403 } ··· 1306 1442 } 1307 1443 eie = NULL; 1308 1444 } 1309 - list_del(&ref->list); 1310 - kmem_cache_free(btrfs_prelim_ref_cache, ref); 1445 + cond_resched(); 1311 1446 } 1312 1447 1313 1448 out: 1314 1449 btrfs_free_path(path); 1315 - ref_root_free(ref_tree); 1316 - while (!list_empty(&prefs)) { 1317 - ref = list_first_entry(&prefs, struct __prelim_ref, list); 1318 - list_del(&ref->list); 1319 - kmem_cache_free(btrfs_prelim_ref_cache, ref); 1320 - } 1321 - while (!list_empty(&prefs_delayed)) { 1322 - ref = list_first_entry(&prefs_delayed, struct __prelim_ref, 1323 - list); 1324 - list_del(&ref->list); 1325 - kmem_cache_free(btrfs_prelim_ref_cache, ref); 1326 - } 1450 + 1451 + prelim_release(&preftrees.direct); 1452 + prelim_release(&preftrees.indirect); 1453 + prelim_release(&preftrees.indirect_missing_keys); 1454 + 1327 1455 if (ret < 0) 1328 1456 free_inode_elem_list(eie); 1329 1457 return ret; ··· 1331 1475 while ((node = ulist_next(blocks, &uiter))) { 1332 1476 if (!node->aux) 1333 1477 continue; 1334 - eie = (struct extent_inode_elem *)(uintptr_t)node->aux; 1478 + eie = unode_aux_to_inode_list(node); 1335 1479 free_inode_elem_list(eie); 1336 1480 node->aux = 0; 1337 1481 } ··· 1359 1503 return -ENOMEM; 1360 1504 1361 1505 ret = find_parent_nodes(trans, fs_info, bytenr, time_seq, 1362 - *leafs, NULL, extent_item_pos, 0, 0, 0); 1506 + *leafs, NULL, extent_item_pos, NULL); 1363 1507 if (ret < 0 && ret != -ENOENT) { 1364 1508 free_leaf_list(*leafs); 1365 1509 return ret; ··· 1381 1525 * 1382 1526 * returns 0 on success, < 0 on error. 1383 1527 */ 1384 - static int __btrfs_find_all_roots(struct btrfs_trans_handle *trans, 1385 - struct btrfs_fs_info *fs_info, u64 bytenr, 1386 - u64 time_seq, struct ulist **roots) 1528 + static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans, 1529 + struct btrfs_fs_info *fs_info, u64 bytenr, 1530 + u64 time_seq, struct ulist **roots) 1387 1531 { 1388 1532 struct ulist *tmp; 1389 1533 struct ulist_node *node = NULL; ··· 1402 1546 ULIST_ITER_INIT(&uiter); 1403 1547 while (1) { 1404 1548 ret = find_parent_nodes(trans, fs_info, bytenr, time_seq, 1405 - tmp, *roots, NULL, 0, 0, 0); 1549 + tmp, *roots, NULL, NULL); 1406 1550 if (ret < 0 && ret != -ENOENT) { 1407 1551 ulist_free(tmp); 1408 1552 ulist_free(*roots); ··· 1427 1571 1428 1572 if (!trans) 1429 1573 down_read(&fs_info->commit_root_sem); 1430 - ret = __btrfs_find_all_roots(trans, fs_info, bytenr, time_seq, roots); 1574 + ret = btrfs_find_all_roots_safe(trans, fs_info, bytenr, 1575 + time_seq, roots); 1431 1576 if (!trans) 1432 1577 up_read(&fs_info->commit_root_sem); 1433 1578 return ret; ··· 1437 1580 /** 1438 1581 * btrfs_check_shared - tell us whether an extent is shared 1439 1582 * 1440 - * @trans: optional trans handle 1441 - * 1442 1583 * btrfs_check_shared uses the backref walking code but will short 1443 1584 * circuit as soon as it finds a root or inode that doesn't match the 1444 1585 * one passed in. This provides a significant performance benefit for 1445 1586 * callers (such as fiemap) which want to know whether the extent is 1446 1587 * shared but do not need a ref count. 1447 1588 * 1589 + * This attempts to allocate a transaction in order to account for 1590 + * delayed refs, but continues on even when the alloc fails. 1591 + * 1448 1592 * Return: 0 if extent is not shared, 1 if it is shared, < 0 on error. 1449 1593 */ 1450 - int btrfs_check_shared(struct btrfs_trans_handle *trans, 1451 - struct btrfs_fs_info *fs_info, u64 root_objectid, 1452 - u64 inum, u64 bytenr) 1594 + int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr) 1453 1595 { 1596 + struct btrfs_fs_info *fs_info = root->fs_info; 1597 + struct btrfs_trans_handle *trans; 1454 1598 struct ulist *tmp = NULL; 1455 1599 struct ulist *roots = NULL; 1456 1600 struct ulist_iterator uiter; 1457 1601 struct ulist_node *node; 1458 1602 struct seq_list elem = SEQ_LIST_INIT(elem); 1459 1603 int ret = 0; 1604 + struct share_check shared = { 1605 + .root_objectid = root->objectid, 1606 + .inum = inum, 1607 + .share_count = 0, 1608 + }; 1460 1609 1461 1610 tmp = ulist_alloc(GFP_NOFS); 1462 1611 roots = ulist_alloc(GFP_NOFS); ··· 1472 1609 return -ENOMEM; 1473 1610 } 1474 1611 1475 - if (trans) 1476 - btrfs_get_tree_mod_seq(fs_info, &elem); 1477 - else 1612 + trans = btrfs_join_transaction(root); 1613 + if (IS_ERR(trans)) { 1614 + trans = NULL; 1478 1615 down_read(&fs_info->commit_root_sem); 1616 + } else { 1617 + btrfs_get_tree_mod_seq(fs_info, &elem); 1618 + } 1619 + 1479 1620 ULIST_ITER_INIT(&uiter); 1480 1621 while (1) { 1481 1622 ret = find_parent_nodes(trans, fs_info, bytenr, elem.seq, tmp, 1482 - roots, NULL, root_objectid, inum, 1); 1623 + roots, NULL, &shared); 1483 1624 if (ret == BACKREF_FOUND_SHARED) { 1484 1625 /* this is the only condition under which we return 1 */ 1485 1626 ret = 1; ··· 1498 1631 bytenr = node->val; 1499 1632 cond_resched(); 1500 1633 } 1501 - if (trans) 1634 + 1635 + if (trans) { 1502 1636 btrfs_put_tree_mod_seq(fs_info, &elem); 1503 - else 1637 + btrfs_end_transaction(trans); 1638 + } else { 1504 1639 up_read(&fs_info->commit_root_sem); 1640 + } 1505 1641 ulist_free(tmp); 1506 1642 ulist_free(roots); 1507 1643 return ret; ··· 1519 1649 struct btrfs_key key; 1520 1650 struct btrfs_key found_key; 1521 1651 struct btrfs_inode_extref *extref; 1522 - struct extent_buffer *leaf; 1652 + const struct extent_buffer *leaf; 1523 1653 unsigned long ptr; 1524 1654 1525 1655 key.objectid = inode_objectid; ··· 1676 1806 u64 flags; 1677 1807 u64 size = 0; 1678 1808 u32 item_size; 1679 - struct extent_buffer *eb; 1809 + const struct extent_buffer *eb; 1680 1810 struct btrfs_extent_item *ei; 1681 1811 struct btrfs_key key; 1682 1812 ··· 1740 1870 * helper function to iterate extent inline refs. ptr must point to a 0 value 1741 1871 * for the first call and may be modified. it is used to track state. 1742 1872 * if more refs exist, 0 is returned and the next call to 1743 - * __get_extent_inline_ref must pass the modified ptr parameter to get the 1873 + * get_extent_inline_ref must pass the modified ptr parameter to get the 1744 1874 * next ref. after the last ref was processed, 1 is returned. 1745 1875 * returns <0 on error 1746 1876 */ 1747 - static int __get_extent_inline_ref(unsigned long *ptr, struct extent_buffer *eb, 1748 - struct btrfs_key *key, 1749 - struct btrfs_extent_item *ei, u32 item_size, 1750 - struct btrfs_extent_inline_ref **out_eiref, 1751 - int *out_type) 1877 + static int get_extent_inline_ref(unsigned long *ptr, 1878 + const struct extent_buffer *eb, 1879 + const struct btrfs_key *key, 1880 + const struct btrfs_extent_item *ei, 1881 + u32 item_size, 1882 + struct btrfs_extent_inline_ref **out_eiref, 1883 + int *out_type) 1752 1884 { 1753 1885 unsigned long end; 1754 1886 u64 flags; ··· 1780 1908 1781 1909 end = (unsigned long)ei + item_size; 1782 1910 *out_eiref = (struct btrfs_extent_inline_ref *)(*ptr); 1783 - *out_type = btrfs_extent_inline_ref_type(eb, *out_eiref); 1911 + *out_type = btrfs_get_extent_inline_ref_type(eb, *out_eiref, 1912 + BTRFS_REF_TYPE_ANY); 1913 + if (*out_type == BTRFS_REF_TYPE_INVALID) 1914 + return -EINVAL; 1784 1915 1785 1916 *ptr += btrfs_extent_inline_ref_size(*out_type); 1786 1917 WARN_ON(*ptr > end); ··· 1796 1921 /* 1797 1922 * reads the tree block backref for an extent. tree level and root are returned 1798 1923 * through out_level and out_root. ptr must point to a 0 value for the first 1799 - * call and may be modified (see __get_extent_inline_ref comment). 1924 + * call and may be modified (see get_extent_inline_ref comment). 1800 1925 * returns 0 if data was provided, 1 if there was no more data to provide or 1801 1926 * <0 on error. 1802 1927 */ ··· 1812 1937 return 1; 1813 1938 1814 1939 while (1) { 1815 - ret = __get_extent_inline_ref(ptr, eb, key, ei, item_size, 1940 + ret = get_extent_inline_ref(ptr, eb, key, ei, item_size, 1816 1941 &eiref, &type); 1817 1942 if (ret < 0) 1818 1943 return ret; ··· 1909 2034 1910 2035 ULIST_ITER_INIT(&ref_uiter); 1911 2036 while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) { 1912 - ret = __btrfs_find_all_roots(trans, fs_info, ref_node->val, 1913 - tree_mod_seq_elem.seq, &roots); 2037 + ret = btrfs_find_all_roots_safe(trans, fs_info, ref_node->val, 2038 + tree_mod_seq_elem.seq, &roots); 1914 2039 if (ret) 1915 2040 break; 1916 2041 ULIST_ITER_INIT(&root_uiter);

+13 -3

fs/btrfs/backref.h

··· 68 68 u64 start_off, struct btrfs_path *path, 69 69 struct btrfs_inode_extref **ret_extref, 70 70 u64 *found_off); 71 - int btrfs_check_shared(struct btrfs_trans_handle *trans, 72 - struct btrfs_fs_info *fs_info, u64 root_objectid, 73 - u64 inum, u64 bytenr); 71 + int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr); 74 72 75 73 int __init btrfs_prelim_ref_init(void); 76 74 void btrfs_prelim_ref_exit(void); 75 + 76 + struct prelim_ref { 77 + struct rb_node rbnode; 78 + u64 root_id; 79 + struct btrfs_key key_for_search; 80 + int level; 81 + int count; 82 + struct extent_inode_elem *inode_list; 83 + u64 parent; 84 + u64 wanted_disk_byte; 85 + }; 86 + 77 87 #endif

+9 -4

fs/btrfs/btrfs_inode.h

··· 179 179 unsigned reserved_extents; 180 180 181 181 /* 182 - * always compress this one file 182 + * Cached values of inode properties 183 183 */ 184 - unsigned force_compress; 184 + unsigned prop_compress; /* per-file compression algorithm */ 185 + /* 186 + * Force compression on the file using the defrag ioctl, could be 187 + * different from prop_compress and takes precedence if set 188 + */ 189 + unsigned defrag_compress; 185 190 186 191 struct btrfs_delayed_node *delayed_node; 187 192 ··· 212 207 213 208 extern unsigned char btrfs_filetype_table[]; 214 209 215 - static inline struct btrfs_inode *BTRFS_I(struct inode *inode) 210 + static inline struct btrfs_inode *BTRFS_I(const struct inode *inode) 216 211 { 217 212 return container_of(inode, struct btrfs_inode, vfs_inode); 218 213 } ··· 236 231 __insert_inode_hash(inode, h); 237 232 } 238 233 239 - static inline u64 btrfs_ino(struct btrfs_inode *inode) 234 + static inline u64 btrfs_ino(const struct btrfs_inode *inode) 240 235 { 241 236 u64 ino = inode->location.objectid; 242 237

+4 -4

fs/btrfs/check-integrity.c

··· 791 791 dev_bytenr = btrfs_sb_offset(superblock_mirror_num); 792 792 if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->commit_total_bytes) 793 793 return -1; 794 - bh = __bread(superblock_bdev, dev_bytenr / 4096, 794 + bh = __bread(superblock_bdev, dev_bytenr / BTRFS_BDEV_BLOCKSIZE, 795 795 BTRFS_SUPER_INFO_SIZE); 796 796 if (NULL == bh) 797 797 return -1; 798 798 super_tmp = (struct btrfs_super_block *) 799 - (bh->b_data + (dev_bytenr & 4095)); 799 + (bh->b_data + (dev_bytenr & (BTRFS_BDEV_BLOCKSIZE - 1))); 800 800 801 801 if (btrfs_super_bytenr(super_tmp) != dev_bytenr || 802 802 btrfs_super_magic(super_tmp) != BTRFS_MAGIC || ··· 1728 1728 num_pages = state->metablock_size >> PAGE_SHIFT; 1729 1729 h = (struct btrfs_header *)datav[0]; 1730 1730 1731 - if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) 1731 + if (memcmp(h->fsid, fs_info->fsid, BTRFS_FSID_SIZE)) 1732 1732 return 1; 1733 1733 1734 1734 for (i = 0; i < num_pages; i++) { ··· 2753 2753 (op == REQ_OP_WRITE) && bh->b_size > 0) { 2754 2754 u64 dev_bytenr; 2755 2755 2756 - dev_bytenr = 4096 * bh->b_blocknr; 2756 + dev_bytenr = BTRFS_BDEV_BLOCKSIZE * bh->b_blocknr; 2757 2757 if (dev_state->state->print_mask & 2758 2758 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) 2759 2759 pr_info("submit_bh(op=0x%x,0x%x, blocknr=%llu (bytenr %llu), size=%zu, data=%p, bdev=%p)\n",

+34 -1

fs/btrfs/compression.c

··· 825 825 int *free_ws = &btrfs_comp_ws[idx].free_ws; 826 826 827 827 spin_lock(ws_lock); 828 - if (*free_ws < num_online_cpus()) { 828 + if (*free_ws <= num_online_cpus()) { 829 829 list_add(workspace, idle_ws); 830 830 (*free_ws)++; 831 831 spin_unlock(ws_lock); ··· 1046 1046 } 1047 1047 1048 1048 return 1; 1049 + } 1050 + 1051 + /* 1052 + * Compression heuristic. 1053 + * 1054 + * For now is's a naive and optimistic 'return true', we'll extend the logic to 1055 + * quickly (compared to direct compression) detect data characteristics 1056 + * (compressible/uncompressible) to avoid wasting CPU time on uncompressible 1057 + * data. 1058 + * 1059 + * The following types of analysis can be performed: 1060 + * - detect mostly zero data 1061 + * - detect data with low "byte set" size (text, etc) 1062 + * - detect data with low/high "core byte" set 1063 + * 1064 + * Return non-zero if the compression should be done, 0 otherwise. 1065 + */ 1066 + int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end) 1067 + { 1068 + u64 index = start >> PAGE_SHIFT; 1069 + u64 end_index = end >> PAGE_SHIFT; 1070 + struct page *page; 1071 + int ret = 1; 1072 + 1073 + while (index <= end_index) { 1074 + page = find_get_page(inode->i_mapping, index); 1075 + kmap(page); 1076 + kunmap(page); 1077 + put_page(page); 1078 + index++; 1079 + } 1080 + 1081 + return ret; 1049 1082 }

+2 -1

fs/btrfs/compression.h

··· 100 100 BTRFS_COMPRESS_ZLIB = 1, 101 101 BTRFS_COMPRESS_LZO = 2, 102 102 BTRFS_COMPRESS_TYPES = 2, 103 - BTRFS_COMPRESS_LAST = 3, 104 103 }; 105 104 106 105 struct btrfs_compress_op { ··· 127 128 128 129 extern const struct btrfs_compress_op btrfs_zlib_compress; 129 130 extern const struct btrfs_compress_op btrfs_lzo_compress; 131 + 132 + int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end); 130 133 131 134 #endif

+7 -7

fs/btrfs/ctree.c

··· 4650 4650 btrfs_mark_buffer_dirty(leaf); 4651 4651 4652 4652 if (btrfs_leaf_free_space(fs_info, leaf) < 0) { 4653 - btrfs_print_leaf(fs_info, leaf); 4653 + btrfs_print_leaf(leaf); 4654 4654 BUG(); 4655 4655 } 4656 4656 } ··· 4679 4679 data_end = leaf_data_end(fs_info, leaf); 4680 4680 4681 4681 if (btrfs_leaf_free_space(fs_info, leaf) < data_size) { 4682 - btrfs_print_leaf(fs_info, leaf); 4682 + btrfs_print_leaf(leaf); 4683 4683 BUG(); 4684 4684 } 4685 4685 slot = path->slots[0]; ··· 4687 4687 4688 4688 BUG_ON(slot < 0); 4689 4689 if (slot >= nritems) { 4690 - btrfs_print_leaf(fs_info, leaf); 4690 + btrfs_print_leaf(leaf); 4691 4691 btrfs_crit(fs_info, "slot %d too large, nritems %d", 4692 4692 slot, nritems); 4693 4693 BUG_ON(1); ··· 4718 4718 btrfs_mark_buffer_dirty(leaf); 4719 4719 4720 4720 if (btrfs_leaf_free_space(fs_info, leaf) < 0) { 4721 - btrfs_print_leaf(fs_info, leaf); 4721 + btrfs_print_leaf(leaf); 4722 4722 BUG(); 4723 4723 } 4724 4724 } ··· 4757 4757 data_end = leaf_data_end(fs_info, leaf); 4758 4758 4759 4759 if (btrfs_leaf_free_space(fs_info, leaf) < total_size) { 4760 - btrfs_print_leaf(fs_info, leaf); 4760 + btrfs_print_leaf(leaf); 4761 4761 btrfs_crit(fs_info, "not enough freespace need %u have %d", 4762 4762 total_size, btrfs_leaf_free_space(fs_info, leaf)); 4763 4763 BUG(); ··· 4767 4767 unsigned int old_data = btrfs_item_end_nr(leaf, slot); 4768 4768 4769 4769 if (old_data < data_end) { 4770 - btrfs_print_leaf(fs_info, leaf); 4770 + btrfs_print_leaf(leaf); 4771 4771 btrfs_crit(fs_info, "slot %d old_data %d data_end %d", 4772 4772 slot, old_data, data_end); 4773 4773 BUG_ON(1); ··· 4811 4811 btrfs_mark_buffer_dirty(leaf); 4812 4812 4813 4813 if (btrfs_leaf_free_space(fs_info, leaf) < 0) { 4814 - btrfs_print_leaf(fs_info, leaf); 4814 + btrfs_print_leaf(leaf); 4815 4815 BUG(); 4816 4816 } 4817 4817 }

+91 -92

fs/btrfs/ctree.h

··· 470 470 471 471 /* 472 472 * free clusters are used to claim free space in relatively large chunks, 473 - * allowing us to do less seeky writes. They are used for all metadata 474 - * allocations and data allocations in ssd mode. 473 + * allowing us to do less seeky writes. They are used for all metadata 474 + * allocations. In ssd_spread mode they are also used for data allocations. 475 475 */ 476 476 struct btrfs_free_cluster { 477 477 spinlock_t lock; ··· 558 558 u64 bytes_super; 559 559 u64 flags; 560 560 u64 cache_generation; 561 - u32 sectorsize; 562 561 563 562 /* 564 563 * If the free space extent count exceeds this number, convert the block ··· 967 968 968 969 struct reloc_control *reloc_ctl; 969 970 970 - /* data_alloc_cluster is only used in ssd mode */ 971 + /* data_alloc_cluster is only used in ssd_spread mode */ 971 972 struct btrfs_free_cluster data_alloc_cluster; 972 973 973 974 /* all metadata allocations go through this cluster */ ··· 1070 1071 1071 1072 /* next backup root to be overwritten */ 1072 1073 int backup_root_index; 1073 - 1074 - int num_tolerated_disk_barrier_failures; 1075 1074 1076 1075 /* device replace state */ 1077 1076 struct btrfs_dev_replace dev_replace; ··· 1258 1261 */ 1259 1262 int send_in_progress; 1260 1263 struct btrfs_subvolume_writers *subv_writers; 1261 - atomic_t will_be_snapshoted; 1264 + atomic_t will_be_snapshotted; 1262 1265 1263 1266 /* For qgroup metadata space reserve */ 1264 1267 atomic64_t qgroup_meta_rsv; 1268 + }; 1269 + 1270 + struct btrfs_file_private { 1271 + struct btrfs_trans_handle *trans; 1272 + void *filldir_buf; 1265 1273 }; 1266 1274 1267 1275 static inline u32 btrfs_inode_sectorsize(const struct inode *inode) ··· 1437 1435 #define BTRFS_INODE_ROOT_ITEM_INIT (1 << 31) 1438 1436 1439 1437 struct btrfs_map_token { 1440 - struct extent_buffer *eb; 1438 + const struct extent_buffer *eb; 1441 1439 char *kaddr; 1442 1440 unsigned long offset; 1443 1441 }; ··· 1471 1469 sizeof(((type *)0)->member))) 1472 1470 1473 1471 #define DECLARE_BTRFS_SETGET_BITS(bits) \ 1474 - u##bits btrfs_get_token_##bits(struct extent_buffer *eb, void *ptr, \ 1475 - unsigned long off, \ 1476 - struct btrfs_map_token *token); \ 1477 - void btrfs_set_token_##bits(struct extent_buffer *eb, void *ptr, \ 1472 + u##bits btrfs_get_token_##bits(const struct extent_buffer *eb, \ 1473 + const void *ptr, unsigned long off, \ 1474 + struct btrfs_map_token *token); \ 1475 + void btrfs_set_token_##bits(struct extent_buffer *eb, const void *ptr, \ 1478 1476 unsigned long off, u##bits val, \ 1479 1477 struct btrfs_map_token *token); \ 1480 - static inline u##bits btrfs_get_##bits(struct extent_buffer *eb, void *ptr, \ 1478 + static inline u##bits btrfs_get_##bits(const struct extent_buffer *eb, \ 1479 + const void *ptr, \ 1481 1480 unsigned long off) \ 1482 1481 { \ 1483 1482 return btrfs_get_token_##bits(eb, ptr, off, NULL); \ 1484 1483 } \ 1485 - static inline void btrfs_set_##bits(struct extent_buffer *eb, void *ptr, \ 1484 + static inline void btrfs_set_##bits(struct extent_buffer *eb, void *ptr,\ 1486 1485 unsigned long off, u##bits val) \ 1487 1486 { \ 1488 1487 btrfs_set_token_##bits(eb, ptr, off, val, NULL); \ ··· 1495 1492 DECLARE_BTRFS_SETGET_BITS(64) 1496 1493 1497 1494 #define BTRFS_SETGET_FUNCS(name, type, member, bits) \ 1498 - static inline u##bits btrfs_##name(struct extent_buffer *eb, type *s) \ 1495 + static inline u##bits btrfs_##name(const struct extent_buffer *eb, \ 1496 + const type *s) \ 1499 1497 { \ 1500 1498 BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \ 1501 1499 return btrfs_get_##bits(eb, s, offsetof(type, member)); \ ··· 1507 1503 BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \ 1508 1504 btrfs_set_##bits(eb, s, offsetof(type, member), val); \ 1509 1505 } \ 1510 - static inline u##bits btrfs_token_##name(struct extent_buffer *eb, type *s, \ 1506 + static inline u##bits btrfs_token_##name(const struct extent_buffer *eb,\ 1507 + const type *s, \ 1511 1508 struct btrfs_map_token *token) \ 1512 1509 { \ 1513 1510 BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \ ··· 1523 1518 } 1524 1519 1525 1520 #define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ 1526 - static inline u##bits btrfs_##name(struct extent_buffer *eb) \ 1521 + static inline u##bits btrfs_##name(const struct extent_buffer *eb) \ 1527 1522 { \ 1528 - type *p = page_address(eb->pages[0]); \ 1523 + const type *p = page_address(eb->pages[0]); \ 1529 1524 u##bits res = le##bits##_to_cpu(p->member); \ 1530 1525 return res; \ 1531 1526 } \ ··· 1537 1532 } 1538 1533 1539 1534 #define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \ 1540 - static inline u##bits btrfs_##name(type *s) \ 1535 + static inline u##bits btrfs_##name(const type *s) \ 1541 1536 { \ 1542 1537 return le##bits##_to_cpu(s->member); \ 1543 1538 } \ ··· 1804 1799 if (type == BTRFS_EXTENT_DATA_REF_KEY) 1805 1800 return sizeof(struct btrfs_extent_data_ref) + 1806 1801 offsetof(struct btrfs_extent_inline_ref, offset); 1807 - BUG(); 1808 1802 return 0; 1809 1803 } 1810 1804 ··· 1861 1857 sizeof(struct btrfs_key_ptr) * nr; 1862 1858 } 1863 1859 1864 - void btrfs_node_key(struct extent_buffer *eb, 1860 + void btrfs_node_key(const struct extent_buffer *eb, 1865 1861 struct btrfs_disk_key *disk_key, int nr); 1866 1862 1867 1863 static inline void btrfs_set_node_key(struct extent_buffer *eb, ··· 1890 1886 return (struct btrfs_item *)btrfs_item_nr_offset(nr); 1891 1887 } 1892 1888 1893 - static inline u32 btrfs_item_end(struct extent_buffer *eb, 1889 + static inline u32 btrfs_item_end(const struct extent_buffer *eb, 1894 1890 struct btrfs_item *item) 1895 1891 { 1896 1892 return btrfs_item_offset(eb, item) + btrfs_item_size(eb, item); 1897 1893 } 1898 1894 1899 - static inline u32 btrfs_item_end_nr(struct extent_buffer *eb, int nr) 1895 + static inline u32 btrfs_item_end_nr(const struct extent_buffer *eb, int nr) 1900 1896 { 1901 1897 return btrfs_item_end(eb, btrfs_item_nr(nr)); 1902 1898 } 1903 1899 1904 - static inline u32 btrfs_item_offset_nr(struct extent_buffer *eb, int nr) 1900 + static inline u32 btrfs_item_offset_nr(const struct extent_buffer *eb, int nr) 1905 1901 { 1906 1902 return btrfs_item_offset(eb, btrfs_item_nr(nr)); 1907 1903 } 1908 1904 1909 - static inline u32 btrfs_item_size_nr(struct extent_buffer *eb, int nr) 1905 + static inline u32 btrfs_item_size_nr(const struct extent_buffer *eb, int nr) 1910 1906 { 1911 1907 return btrfs_item_size(eb, btrfs_item_nr(nr)); 1912 1908 } 1913 1909 1914 - static inline void btrfs_item_key(struct extent_buffer *eb, 1910 + static inline void btrfs_item_key(const struct extent_buffer *eb, 1915 1911 struct btrfs_disk_key *disk_key, int nr) 1916 1912 { 1917 1913 struct btrfs_item *item = btrfs_item_nr(nr); ··· 1947 1943 BTRFS_SETGET_STACK_FUNCS(stack_dir_transid, struct btrfs_dir_item, 1948 1944 transid, 64); 1949 1945 1950 - static inline void btrfs_dir_item_key(struct extent_buffer *eb, 1951 - struct btrfs_dir_item *item, 1946 + static inline void btrfs_dir_item_key(const struct extent_buffer *eb, 1947 + const struct btrfs_dir_item *item, 1952 1948 struct btrfs_disk_key *key) 1953 1949 { 1954 1950 read_eb_member(eb, item, struct btrfs_dir_item, location, key); ··· 1956 1952 1957 1953 static inline void btrfs_set_dir_item_key(struct extent_buffer *eb, 1958 1954 struct btrfs_dir_item *item, 1959 - struct btrfs_disk_key *key) 1955 + const struct btrfs_disk_key *key) 1960 1956 { 1961 1957 write_eb_member(eb, item, struct btrfs_dir_item, location, key); 1962 1958 } ··· 1968 1964 BTRFS_SETGET_FUNCS(free_space_generation, struct btrfs_free_space_header, 1969 1965 generation, 64); 1970 1966 1971 - static inline void btrfs_free_space_key(struct extent_buffer *eb, 1972 - struct btrfs_free_space_header *h, 1967 + static inline void btrfs_free_space_key(const struct extent_buffer *eb, 1968 + const struct btrfs_free_space_header *h, 1973 1969 struct btrfs_disk_key *key) 1974 1970 { 1975 1971 read_eb_member(eb, h, struct btrfs_free_space_header, location, key); ··· 1977 1973 1978 1974 static inline void btrfs_set_free_space_key(struct extent_buffer *eb, 1979 1975 struct btrfs_free_space_header *h, 1980 - struct btrfs_disk_key *key) 1976 + const struct btrfs_disk_key *key) 1981 1977 { 1982 1978 write_eb_member(eb, h, struct btrfs_free_space_header, location, key); 1983 1979 } ··· 2004 2000 disk->objectid = cpu_to_le64(cpu->objectid); 2005 2001 } 2006 2002 2007 - static inline void btrfs_node_key_to_cpu(struct extent_buffer *eb, 2008 - struct btrfs_key *key, int nr) 2003 + static inline void btrfs_node_key_to_cpu(const struct extent_buffer *eb, 2004 + struct btrfs_key *key, int nr) 2009 2005 { 2010 2006 struct btrfs_disk_key disk_key; 2011 2007 btrfs_node_key(eb, &disk_key, nr); 2012 2008 btrfs_disk_key_to_cpu(key, &disk_key); 2013 2009 } 2014 2010 2015 - static inline void btrfs_item_key_to_cpu(struct extent_buffer *eb, 2016 - struct btrfs_key *key, int nr) 2011 + static inline void btrfs_item_key_to_cpu(const struct extent_buffer *eb, 2012 + struct btrfs_key *key, int nr) 2017 2013 { 2018 2014 struct btrfs_disk_key disk_key; 2019 2015 btrfs_item_key(eb, &disk_key, nr); 2020 2016 btrfs_disk_key_to_cpu(key, &disk_key); 2021 2017 } 2022 2018 2023 - static inline void btrfs_dir_item_key_to_cpu(struct extent_buffer *eb, 2024 - struct btrfs_dir_item *item, 2025 - struct btrfs_key *key) 2019 + static inline void btrfs_dir_item_key_to_cpu(const struct extent_buffer *eb, 2020 + const struct btrfs_dir_item *item, 2021 + struct btrfs_key *key) 2026 2022 { 2027 2023 struct btrfs_disk_key disk_key; 2028 2024 btrfs_dir_item_key(eb, item, &disk_key); ··· 2054 2050 nritems, 32); 2055 2051 BTRFS_SETGET_STACK_FUNCS(stack_header_bytenr, struct btrfs_header, bytenr, 64); 2056 2052 2057 - static inline int btrfs_header_flag(struct extent_buffer *eb, u64 flag) 2053 + static inline int btrfs_header_flag(const struct extent_buffer *eb, u64 flag) 2058 2054 { 2059 2055 return (btrfs_header_flags(eb) & flag) == flag; 2060 2056 } ··· 2073 2069 return (flags & flag) == flag; 2074 2070 } 2075 2071 2076 - static inline int btrfs_header_backref_rev(struct extent_buffer *eb) 2072 + static inline int btrfs_header_backref_rev(const struct extent_buffer *eb) 2077 2073 { 2078 2074 u64 flags = btrfs_header_flags(eb); 2079 2075 return flags >> BTRFS_BACKREF_REV_SHIFT; ··· 2093 2089 return offsetof(struct btrfs_header, fsid); 2094 2090 } 2095 2091 2096 - static inline unsigned long btrfs_header_chunk_tree_uuid(struct extent_buffer *eb) 2092 + static inline unsigned long btrfs_header_chunk_tree_uuid(const struct extent_buffer *eb) 2097 2093 { 2098 2094 return offsetof(struct btrfs_header, chunk_tree_uuid); 2099 2095 } 2100 2096 2101 - static inline int btrfs_is_leaf(struct extent_buffer *eb) 2097 + static inline int btrfs_is_leaf(const struct extent_buffer *eb) 2102 2098 { 2103 2099 return btrfs_header_level(eb) == 0; 2104 2100 } ··· 2132 2128 BTRFS_SETGET_STACK_FUNCS(root_rtransid, struct btrfs_root_item, 2133 2129 rtransid, 64); 2134 2130 2135 - static inline bool btrfs_root_readonly(struct btrfs_root *root) 2131 + static inline bool btrfs_root_readonly(const struct btrfs_root *root) 2136 2132 { 2137 2133 return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_RDONLY)) != 0; 2138 2134 } 2139 2135 2140 - static inline bool btrfs_root_dead(struct btrfs_root *root) 2136 + static inline bool btrfs_root_dead(const struct btrfs_root *root) 2141 2137 { 2142 2138 return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_DEAD)) != 0; 2143 2139 } ··· 2194 2190 /* struct btrfs_balance_item */ 2195 2191 BTRFS_SETGET_FUNCS(balance_flags, struct btrfs_balance_item, flags, 64); 2196 2192 2197 - static inline void btrfs_balance_data(struct extent_buffer *eb, 2198 - struct btrfs_balance_item *bi, 2193 + static inline void btrfs_balance_data(const struct extent_buffer *eb, 2194 + const struct btrfs_balance_item *bi, 2199 2195 struct btrfs_disk_balance_args *ba) 2200 2196 { 2201 2197 read_eb_member(eb, bi, struct btrfs_balance_item, data, ba); 2202 2198 } 2203 2199 2204 2200 static inline void btrfs_set_balance_data(struct extent_buffer *eb, 2205 - struct btrfs_balance_item *bi, 2206 - struct btrfs_disk_balance_args *ba) 2201 + struct btrfs_balance_item *bi, 2202 + const struct btrfs_disk_balance_args *ba) 2207 2203 { 2208 2204 write_eb_member(eb, bi, struct btrfs_balance_item, data, ba); 2209 2205 } 2210 2206 2211 - static inline void btrfs_balance_meta(struct extent_buffer *eb, 2212 - struct btrfs_balance_item *bi, 2207 + static inline void btrfs_balance_meta(const struct extent_buffer *eb, 2208 + const struct btrfs_balance_item *bi, 2213 2209 struct btrfs_disk_balance_args *ba) 2214 2210 { 2215 2211 read_eb_member(eb, bi, struct btrfs_balance_item, meta, ba); 2216 2212 } 2217 2213 2218 2214 static inline void btrfs_set_balance_meta(struct extent_buffer *eb, 2219 - struct btrfs_balance_item *bi, 2220 - struct btrfs_disk_balance_args *ba) 2215 + struct btrfs_balance_item *bi, 2216 + const struct btrfs_disk_balance_args *ba) 2221 2217 { 2222 2218 write_eb_member(eb, bi, struct btrfs_balance_item, meta, ba); 2223 2219 } 2224 2220 2225 - static inline void btrfs_balance_sys(struct extent_buffer *eb, 2226 - struct btrfs_balance_item *bi, 2221 + static inline void btrfs_balance_sys(const struct extent_buffer *eb, 2222 + const struct btrfs_balance_item *bi, 2227 2223 struct btrfs_disk_balance_args *ba) 2228 2224 { 2229 2225 read_eb_member(eb, bi, struct btrfs_balance_item, sys, ba); 2230 2226 } 2231 2227 2232 2228 static inline void btrfs_set_balance_sys(struct extent_buffer *eb, 2233 - struct btrfs_balance_item *bi, 2234 - struct btrfs_disk_balance_args *ba) 2229 + struct btrfs_balance_item *bi, 2230 + const struct btrfs_disk_balance_args *ba) 2235 2231 { 2236 2232 write_eb_member(eb, bi, struct btrfs_balance_item, sys, ba); 2237 2233 } 2238 2234 2239 2235 static inline void 2240 2236 btrfs_disk_balance_args_to_cpu(struct btrfs_balance_args *cpu, 2241 - struct btrfs_disk_balance_args *disk) 2237 + const struct btrfs_disk_balance_args *disk) 2242 2238 { 2243 2239 memset(cpu, 0, sizeof(*cpu)); 2244 2240 ··· 2258 2254 2259 2255 static inline void 2260 2256 btrfs_cpu_balance_args_to_disk(struct btrfs_disk_balance_args *disk, 2261 - struct btrfs_balance_args *cpu) 2257 + const struct btrfs_balance_args *cpu) 2262 2258 { 2263 2259 memset(disk, 0, sizeof(*disk)); 2264 2260 ··· 2326 2322 BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block, 2327 2323 uuid_tree_generation, 64); 2328 2324 2329 - static inline int btrfs_super_csum_size(struct btrfs_super_block *s) 2325 + static inline int btrfs_super_csum_size(const struct btrfs_super_block *s) 2330 2326 { 2331 2327 u16 t = btrfs_super_csum_type(s); 2332 2328 /* ··· 2341 2337 * this returns the address of the start of the last item, 2342 2338 * which is the stop of the leaf data stack 2343 2339 */ 2344 - static inline unsigned int leaf_data_end(struct btrfs_fs_info *fs_info, 2345 - struct extent_buffer *leaf) 2340 + static inline unsigned int leaf_data_end(const struct btrfs_fs_info *fs_info, 2341 + const struct extent_buffer *leaf) 2346 2342 { 2347 2343 u32 nr = btrfs_header_nritems(leaf); 2348 2344 ··· 2367 2363 struct btrfs_file_extent_item, compression, 8); 2368 2364 2369 2365 static inline unsigned long 2370 - btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e) 2366 + btrfs_file_extent_inline_start(const struct btrfs_file_extent_item *e) 2371 2367 { 2372 2368 return (unsigned long)e + BTRFS_FILE_EXTENT_INLINE_DATA_START; 2373 2369 } ··· 2401 2397 * size of any extent headers. If a file is compressed on disk, this is 2402 2398 * the compressed size 2403 2399 */ 2404 - static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb, 2405 - struct btrfs_item *e) 2400 + static inline u32 btrfs_file_extent_inline_item_len( 2401 + const struct extent_buffer *eb, 2402 + struct btrfs_item *e) 2406 2403 { 2407 2404 return btrfs_item_size(eb, e) - BTRFS_FILE_EXTENT_INLINE_DATA_START; 2408 2405 } ··· 2411 2406 /* this returns the number of file bytes represented by the inline item. 2412 2407 * If an item is compressed, this is the uncompressed size 2413 2408 */ 2414 - static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb, 2415 - int slot, 2416 - struct btrfs_file_extent_item *fi) 2409 + static inline u32 btrfs_file_extent_inline_len(const struct extent_buffer *eb, 2410 + int slot, 2411 + const struct btrfs_file_extent_item *fi) 2417 2412 { 2418 2413 struct btrfs_map_token token; 2419 2414 ··· 2435 2430 2436 2431 2437 2432 /* btrfs_dev_stats_item */ 2438 - static inline u64 btrfs_dev_stats_value(struct extent_buffer *eb, 2439 - struct btrfs_dev_stats_item *ptr, 2433 + static inline u64 btrfs_dev_stats_value(const struct extent_buffer *eb, 2434 + const struct btrfs_dev_stats_item *ptr, 2440 2435 int index) 2441 2436 { 2442 2437 u64 val; ··· 2566 2561 2567 2562 /* extent-tree.c */ 2568 2563 2564 + enum btrfs_inline_ref_type { 2565 + BTRFS_REF_TYPE_INVALID = 0, 2566 + BTRFS_REF_TYPE_BLOCK = 1, 2567 + BTRFS_REF_TYPE_DATA = 2, 2568 + BTRFS_REF_TYPE_ANY = 3, 2569 + }; 2570 + 2571 + int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb, 2572 + struct btrfs_extent_inline_ref *iref, 2573 + enum btrfs_inline_ref_type is_data); 2574 + 2569 2575 u64 btrfs_csum_bytes_to_leaves(struct btrfs_fs_info *fs_info, u64 csum_bytes); 2570 2576 2571 2577 static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_fs_info *fs_info, ··· 2686 2670 int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr); 2687 2671 int btrfs_make_block_group(struct btrfs_trans_handle *trans, 2688 2672 struct btrfs_fs_info *fs_info, u64 bytes_used, 2689 - u64 type, u64 chunk_objectid, u64 chunk_offset, 2690 - u64 size); 2673 + u64 type, u64 chunk_offset, u64 size); 2691 2674 struct btrfs_trans_handle *btrfs_start_trans_remove_block_group( 2692 2675 struct btrfs_fs_info *fs_info, 2693 2676 const u64 chunk_offset); ··· 2787 2772 int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, 2788 2773 struct btrfs_fs_info *fs_info); 2789 2774 int __get_raid_index(u64 flags); 2790 - int btrfs_start_write_no_snapshoting(struct btrfs_root *root); 2791 - void btrfs_end_write_no_snapshoting(struct btrfs_root *root); 2775 + int btrfs_start_write_no_snapshotting(struct btrfs_root *root); 2776 + void btrfs_end_write_no_snapshotting(struct btrfs_root *root); 2792 2777 void btrfs_wait_for_snapshot_creation(struct btrfs_root *root); 2793 2778 void check_system_chunk(struct btrfs_trans_handle *trans, 2794 2779 struct btrfs_fs_info *fs_info, const u64 type); ··· 2988 2973 struct btrfs_fs_info *fs_info, 2989 2974 u64 root_id, u64 ref_id, u64 dirid, u64 *sequence, 2990 2975 const char *name, int name_len); 2991 - int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2992 - const struct btrfs_key *key); 2976 + int btrfs_del_root(struct btrfs_trans_handle *trans, 2977 + struct btrfs_fs_info *fs_info, const struct btrfs_key *key); 2993 2978 int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2994 2979 const struct btrfs_key *key, 2995 2980 struct btrfs_root_item *item); ··· 3150 3135 u64 *orig_start, u64 *orig_block_len, 3151 3136 u64 *ram_bytes); 3152 3137 3153 - /* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */ 3154 - #if defined(ClearPageFsMisc) && !defined(ClearPageChecked) 3155 - #define ClearPageChecked ClearPageFsMisc 3156 - #define SetPageChecked SetPageFsMisc 3157 - #define PageChecked PageFsMisc 3158 - #endif 3159 - 3160 - /* This forces readahead on a given range of bytes in an inode */ 3161 - static inline void btrfs_force_ra(struct address_space *mapping, 3162 - struct file_ra_state *ra, struct file *file, 3163 - pgoff_t offset, unsigned long req_size) 3164 - { 3165 - page_cache_sync_readahead(mapping, ra, file, offset, req_size); 3166 - } 3167 - 3168 3138 struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry); 3169 3139 int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index); 3170 3140 int btrfs_unlink_inode(struct btrfs_trans_handle *trans, ··· 3229 3229 long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); 3230 3230 int btrfs_ioctl_get_supported_features(void __user *arg); 3231 3231 void btrfs_update_iflags(struct inode *inode); 3232 - void btrfs_inherit_iflags(struct inode *inode, struct inode *dir); 3233 3232 int btrfs_is_empty_uuid(u8 *uuid); 3234 3233 int btrfs_defrag_file(struct inode *inode, struct file *file, 3235 3234 struct btrfs_ioctl_defrag_range_args *range,

+1

fs/btrfs/delayed-inode.c

··· 1727 1727 1728 1728 if (over) 1729 1729 return 1; 1730 + ctx->pos++; 1730 1731 } 1731 1732 return 0; 1732 1733 }

+41 -33

fs/btrfs/dev-replace.c

··· 639 639 write_unlock(&em_tree->lock); 640 640 } 641 641 642 + /* 643 + * Read progress of device replace status according to the state and last 644 + * stored position. The value format is the same as for 645 + * btrfs_dev_replace::progress_1000 646 + */ 647 + static u64 btrfs_dev_replace_progress(struct btrfs_fs_info *fs_info) 648 + { 649 + struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; 650 + u64 ret = 0; 651 + 652 + switch (dev_replace->replace_state) { 653 + case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: 654 + case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: 655 + ret = 0; 656 + break; 657 + case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: 658 + ret = 1000; 659 + break; 660 + case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: 661 + case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: 662 + ret = div64_u64(dev_replace->cursor_left, 663 + div_u64(btrfs_device_get_total_bytes( 664 + dev_replace->srcdev), 1000)); 665 + break; 666 + } 667 + 668 + return ret; 669 + } 670 + 642 671 void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info, 643 672 struct btrfs_ioctl_dev_replace_args *args) 644 673 { 645 674 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; 646 - struct btrfs_device *srcdev; 647 675 648 676 btrfs_dev_replace_lock(dev_replace, 0); 649 677 /* even if !dev_replace_is_valid, the values are good enough for ··· 684 656 atomic64_read(&dev_replace->num_write_errors); 685 657 args->status.num_uncorrectable_read_errors = 686 658 atomic64_read(&dev_replace->num_uncorrectable_read_errors); 687 - switch (dev_replace->replace_state) { 688 - case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: 689 - case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: 690 - args->status.progress_1000 = 0; 691 - break; 692 - case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: 693 - args->status.progress_1000 = 1000; 694 - break; 695 - case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: 696 - case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: 697 - srcdev = dev_replace->srcdev; 698 - args->status.progress_1000 = div64_u64(dev_replace->cursor_left, 699 - div_u64(btrfs_device_get_total_bytes(srcdev), 1000)); 700 - break; 701 - } 659 + args->status.progress_1000 = btrfs_dev_replace_progress(fs_info); 702 660 btrfs_dev_replace_unlock(dev_replace, 0); 703 661 } 704 662 ··· 809 795 { 810 796 struct btrfs_fs_info *fs_info = data; 811 797 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; 812 - struct btrfs_ioctl_dev_replace_args *status_args; 813 798 u64 progress; 814 799 815 - status_args = kzalloc(sizeof(*status_args), GFP_KERNEL); 816 - if (status_args) { 817 - btrfs_dev_replace_status(fs_info, status_args); 818 - progress = status_args->status.progress_1000; 819 - kfree(status_args); 820 - progress = div_u64(progress, 10); 821 - btrfs_info_in_rcu(fs_info, 822 - "continuing dev_replace from %s (devid %llu) to %s @%u%%", 823 - dev_replace->srcdev->missing ? "<missing disk>" : 824 - rcu_str_deref(dev_replace->srcdev->name), 825 - dev_replace->srcdev->devid, 826 - dev_replace->tgtdev ? 827 - rcu_str_deref(dev_replace->tgtdev->name) : 828 - "<missing target disk>", 829 - (unsigned int)progress); 830 - } 800 + progress = btrfs_dev_replace_progress(fs_info); 801 + progress = div_u64(progress, 10); 802 + btrfs_info_in_rcu(fs_info, 803 + "continuing dev_replace from %s (devid %llu) to %s @%u%%", 804 + dev_replace->srcdev->missing ? "<missing disk>" 805 + : rcu_str_deref(dev_replace->srcdev->name), 806 + dev_replace->srcdev->devid, 807 + dev_replace->tgtdev ? rcu_str_deref(dev_replace->tgtdev->name) 808 + : "<missing target disk>", 809 + (unsigned int)progress); 810 + 831 811 btrfs_dev_replace_continue_on_mount(fs_info); 832 812 clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags); 833 813

+96 -150

fs/btrfs/disk-io.c

··· 529 529 struct extent_buffer *eb) 530 530 { 531 531 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; 532 - u8 fsid[BTRFS_UUID_SIZE]; 532 + u8 fsid[BTRFS_FSID_SIZE]; 533 533 int ret = 1; 534 534 535 535 read_extent_buffer(eb, fsid, btrfs_header_fsid(), BTRFS_FSID_SIZE); ··· 1343 1343 atomic_set(&root->log_batch, 0); 1344 1344 atomic_set(&root->orphan_inodes, 0); 1345 1345 refcount_set(&root->refs, 1); 1346 - atomic_set(&root->will_be_snapshoted, 0); 1346 + atomic_set(&root->will_be_snapshotted, 0); 1347 1347 atomic64_set(&root->qgroup_meta_rsv, 0); 1348 1348 root->log_transid = 0; 1349 1349 root->log_transid_committed = -1; ··· 2694 2694 btrfs_init_balance(fs_info); 2695 2695 btrfs_init_async_reclaim_work(&fs_info->async_reclaim_work); 2696 2696 2697 - sb->s_blocksize = 4096; 2698 - sb->s_blocksize_bits = blksize_bits(4096); 2697 + sb->s_blocksize = BTRFS_BDEV_BLOCKSIZE; 2698 + sb->s_blocksize_bits = blksize_bits(BTRFS_BDEV_BLOCKSIZE); 2699 2699 2700 2700 btrfs_init_btree_inode(fs_info); 2701 2701 ··· 3035 3035 btrfs_err(fs_info, "failed to read block groups: %d", ret); 3036 3036 goto fail_sysfs; 3037 3037 } 3038 - fs_info->num_tolerated_disk_barrier_failures = 3039 - btrfs_calc_num_tolerated_disk_barrier_failures(fs_info); 3040 - if (fs_info->fs_devices->missing_devices > 3041 - fs_info->num_tolerated_disk_barrier_failures && 3042 - !(sb->s_flags & MS_RDONLY)) { 3038 + 3039 + if (!(sb->s_flags & MS_RDONLY) && !btrfs_check_rw_degradable(fs_info)) { 3043 3040 btrfs_warn(fs_info, 3044 - "missing devices (%llu) exceeds the limit (%d), writeable mount is not allowed", 3045 - fs_info->fs_devices->missing_devices, 3046 - fs_info->num_tolerated_disk_barrier_failures); 3041 + "writeable mount is not allowed due to too many missing devices"); 3047 3042 goto fail_sysfs; 3048 3043 } 3049 3044 ··· 3053 3058 if (IS_ERR(fs_info->transaction_kthread)) 3054 3059 goto fail_cleaner; 3055 3060 3056 - if (!btrfs_test_opt(fs_info, SSD) && 3057 - !btrfs_test_opt(fs_info, NOSSD) && 3061 + if (!btrfs_test_opt(fs_info, NOSSD) && 3058 3062 !fs_info->fs_devices->rotating) { 3059 - btrfs_info(fs_info, "detected SSD devices, enabling SSD mode"); 3060 - btrfs_set_opt(fs_info->mount_opt, SSD); 3063 + btrfs_set_and_info(fs_info, SSD, "enabling ssd optimizations"); 3061 3064 } 3062 3065 3063 3066 /* ··· 3314 3321 if (bytenr + BTRFS_SUPER_INFO_SIZE >= i_size_read(bdev->bd_inode)) 3315 3322 return -EINVAL; 3316 3323 3317 - bh = __bread(bdev, bytenr / 4096, BTRFS_SUPER_INFO_SIZE); 3324 + bh = __bread(bdev, bytenr / BTRFS_BDEV_BLOCKSIZE, BTRFS_SUPER_INFO_SIZE); 3318 3325 /* 3319 3326 * If we fail to read from the underlying devices, as of now 3320 3327 * the best option we have is to mark it EIO. ··· 3371 3378 } 3372 3379 3373 3380 /* 3374 - * this should be called twice, once with wait == 0 and 3375 - * once with wait == 1. When wait == 0 is done, all the buffer heads 3376 - * we write are pinned. 3381 + * Write superblock @sb to the @device. Do not wait for completion, all the 3382 + * buffer heads we write are pinned. 3377 3383 * 3378 - * They are released when wait == 1 is done. 3379 - * max_mirrors must be the same for both runs, and it indicates how 3380 - * many supers on this one device should be written. 3384 + * Write @max_mirrors copies of the superblock, where 0 means default that fit 3385 + * the expected device size at commit time. Note that max_mirrors must be 3386 + * same for write and wait phases. 3381 3387 * 3382 - * max_mirrors == 0 means to write them all. 3388 + * Return number of errors when buffer head is not found or submission fails. 3383 3389 */ 3384 3390 static int write_dev_supers(struct btrfs_device *device, 3385 - struct btrfs_super_block *sb, 3386 - int wait, int max_mirrors) 3391 + struct btrfs_super_block *sb, int max_mirrors) 3387 3392 { 3388 3393 struct buffer_head *bh; 3389 3394 int i; ··· 3399 3408 device->commit_total_bytes) 3400 3409 break; 3401 3410 3402 - if (wait) { 3403 - bh = __find_get_block(device->bdev, bytenr / 4096, 3404 - BTRFS_SUPER_INFO_SIZE); 3405 - if (!bh) { 3406 - errors++; 3407 - continue; 3408 - } 3409 - wait_on_buffer(bh); 3410 - if (!buffer_uptodate(bh)) 3411 - errors++; 3411 + btrfs_set_super_bytenr(sb, bytenr); 3412 3412 3413 - /* drop our reference */ 3414 - brelse(bh); 3413 + crc = ~(u32)0; 3414 + crc = btrfs_csum_data((const char *)sb + BTRFS_CSUM_SIZE, crc, 3415 + BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); 3416 + btrfs_csum_final(crc, sb->csum); 3415 3417 3416 - /* drop the reference from the wait == 0 run */ 3417 - brelse(bh); 3418 + /* One reference for us, and we leave it for the caller */ 3419 + bh = __getblk(device->bdev, bytenr / BTRFS_BDEV_BLOCKSIZE, 3420 + BTRFS_SUPER_INFO_SIZE); 3421 + if (!bh) { 3422 + btrfs_err(device->fs_info, 3423 + "couldn't get super buffer head for bytenr %llu", 3424 + bytenr); 3425 + errors++; 3418 3426 continue; 3419 - } else { 3420 - btrfs_set_super_bytenr(sb, bytenr); 3421 - 3422 - crc = ~(u32)0; 3423 - crc = btrfs_csum_data((const char *)sb + 3424 - BTRFS_CSUM_SIZE, crc, 3425 - BTRFS_SUPER_INFO_SIZE - 3426 - BTRFS_CSUM_SIZE); 3427 - btrfs_csum_final(crc, sb->csum); 3428 - 3429 - /* 3430 - * one reference for us, and we leave it for the 3431 - * caller 3432 - */ 3433 - bh = __getblk(device->bdev, bytenr / 4096, 3434 - BTRFS_SUPER_INFO_SIZE); 3435 - if (!bh) { 3436 - btrfs_err(device->fs_info, 3437 - "couldn't get super buffer head for bytenr %llu", 3438 - bytenr); 3439 - errors++; 3440 - continue; 3441 - } 3442 - 3443 - memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE); 3444 - 3445 - /* one reference for submit_bh */ 3446 - get_bh(bh); 3447 - 3448 - set_buffer_uptodate(bh); 3449 - lock_buffer(bh); 3450 - bh->b_end_io = btrfs_end_buffer_write_sync; 3451 - bh->b_private = device; 3452 3427 } 3428 + 3429 + memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE); 3430 + 3431 + /* one reference for submit_bh */ 3432 + get_bh(bh); 3433 + 3434 + set_buffer_uptodate(bh); 3435 + lock_buffer(bh); 3436 + bh->b_end_io = btrfs_end_buffer_write_sync; 3437 + bh->b_private = device; 3453 3438 3454 3439 /* 3455 3440 * we fua the first super. The others we allow ··· 3433 3466 */ 3434 3467 if (i == 0) { 3435 3468 ret = btrfsic_submit_bh(REQ_OP_WRITE, 3436 - REQ_SYNC | REQ_FUA, bh); 3469 + REQ_SYNC | REQ_FUA | REQ_META | REQ_PRIO, bh); 3437 3470 } else { 3438 - ret = btrfsic_submit_bh(REQ_OP_WRITE, REQ_SYNC, bh); 3471 + ret = btrfsic_submit_bh(REQ_OP_WRITE, 3472 + REQ_SYNC | REQ_META | REQ_PRIO, bh); 3439 3473 } 3440 3474 if (ret) 3441 3475 errors++; 3442 3476 } 3477 + return errors < i ? 0 : -1; 3478 + } 3479 + 3480 + /* 3481 + * Wait for write completion of superblocks done by write_dev_supers, 3482 + * @max_mirrors same for write and wait phases. 3483 + * 3484 + * Return number of errors when buffer head is not found or not marked up to 3485 + * date. 3486 + */ 3487 + static int wait_dev_supers(struct btrfs_device *device, int max_mirrors) 3488 + { 3489 + struct buffer_head *bh; 3490 + int i; 3491 + int errors = 0; 3492 + u64 bytenr; 3493 + 3494 + if (max_mirrors == 0) 3495 + max_mirrors = BTRFS_SUPER_MIRROR_MAX; 3496 + 3497 + for (i = 0; i < max_mirrors; i++) { 3498 + bytenr = btrfs_sb_offset(i); 3499 + if (bytenr + BTRFS_SUPER_INFO_SIZE >= 3500 + device->commit_total_bytes) 3501 + break; 3502 + 3503 + bh = __find_get_block(device->bdev, 3504 + bytenr / BTRFS_BDEV_BLOCKSIZE, 3505 + BTRFS_SUPER_INFO_SIZE); 3506 + if (!bh) { 3507 + errors++; 3508 + continue; 3509 + } 3510 + wait_on_buffer(bh); 3511 + if (!buffer_uptodate(bh)) 3512 + errors++; 3513 + 3514 + /* drop our reference */ 3515 + brelse(bh); 3516 + 3517 + /* drop the reference from the writing run */ 3518 + brelse(bh); 3519 + } 3520 + 3443 3521 return errors < i ? 0 : -1; 3444 3522 } 3445 3523 ··· 3516 3504 init_completion(&device->flush_wait); 3517 3505 bio->bi_private = &device->flush_wait; 3518 3506 3519 - submit_bio(bio); 3507 + btrfsic_submit_bio(bio); 3520 3508 device->flush_bio_sent = 1; 3521 3509 } 3522 3510 ··· 3536 3524 return bio->bi_status; 3537 3525 } 3538 3526 3539 - static int check_barrier_error(struct btrfs_fs_devices *fsdevs) 3527 + static int check_barrier_error(struct btrfs_fs_info *fs_info) 3540 3528 { 3541 - int dev_flush_error = 0; 3542 - struct btrfs_device *dev; 3543 - 3544 - list_for_each_entry_rcu(dev, &fsdevs->devices, dev_list) { 3545 - if (!dev->bdev || dev->last_flush_error) 3546 - dev_flush_error++; 3547 - } 3548 - 3549 - if (dev_flush_error > 3550 - fsdevs->fs_info->num_tolerated_disk_barrier_failures) 3529 + if (!btrfs_check_rw_degradable(fs_info)) 3551 3530 return -EIO; 3552 - 3553 3531 return 0; 3554 3532 } 3555 3533 ··· 3594 3592 * to arrive at the volume status. So error checking 3595 3593 * is being pushed to a separate loop. 3596 3594 */ 3597 - return check_barrier_error(info->fs_devices); 3595 + return check_barrier_error(info); 3598 3596 } 3599 3597 return 0; 3600 3598 } ··· 3626 3624 } 3627 3625 3628 3626 return min_tolerated; 3629 - } 3630 - 3631 - int btrfs_calc_num_tolerated_disk_barrier_failures( 3632 - struct btrfs_fs_info *fs_info) 3633 - { 3634 - struct btrfs_ioctl_space_info space; 3635 - struct btrfs_space_info *sinfo; 3636 - u64 types[] = {BTRFS_BLOCK_GROUP_DATA, 3637 - BTRFS_BLOCK_GROUP_SYSTEM, 3638 - BTRFS_BLOCK_GROUP_METADATA, 3639 - BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA}; 3640 - int i; 3641 - int c; 3642 - int num_tolerated_disk_barrier_failures = 3643 - (int)fs_info->fs_devices->num_devices; 3644 - 3645 - for (i = 0; i < ARRAY_SIZE(types); i++) { 3646 - struct btrfs_space_info *tmp; 3647 - 3648 - sinfo = NULL; 3649 - rcu_read_lock(); 3650 - list_for_each_entry_rcu(tmp, &fs_info->space_info, list) { 3651 - if (tmp->flags == types[i]) { 3652 - sinfo = tmp; 3653 - break; 3654 - } 3655 - } 3656 - rcu_read_unlock(); 3657 - 3658 - if (!sinfo) 3659 - continue; 3660 - 3661 - down_read(&sinfo->groups_sem); 3662 - for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { 3663 - u64 flags; 3664 - 3665 - if (list_empty(&sinfo->block_groups[c])) 3666 - continue; 3667 - 3668 - btrfs_get_block_group_info(&sinfo->block_groups[c], 3669 - &space); 3670 - if (space.total_bytes == 0 || space.used_bytes == 0) 3671 - continue; 3672 - flags = space.flags; 3673 - 3674 - num_tolerated_disk_barrier_failures = min( 3675 - num_tolerated_disk_barrier_failures, 3676 - btrfs_get_num_tolerated_disk_barrier_failures( 3677 - flags)); 3678 - } 3679 - up_read(&sinfo->groups_sem); 3680 - } 3681 - 3682 - return num_tolerated_disk_barrier_failures; 3683 3627 } 3684 3628 3685 3629 int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors) ··· 3680 3732 btrfs_set_stack_device_io_width(dev_item, dev->io_width); 3681 3733 btrfs_set_stack_device_sector_size(dev_item, dev->sector_size); 3682 3734 memcpy(dev_item->uuid, dev->uuid, BTRFS_UUID_SIZE); 3683 - memcpy(dev_item->fsid, dev->fs_devices->fsid, BTRFS_UUID_SIZE); 3735 + memcpy(dev_item->fsid, dev->fs_devices->fsid, BTRFS_FSID_SIZE); 3684 3736 3685 3737 flags = btrfs_super_flags(sb); 3686 3738 btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN); 3687 3739 3688 - ret = write_dev_supers(dev, sb, 0, max_mirrors); 3740 + ret = write_dev_supers(dev, sb, max_mirrors); 3689 3741 if (ret) 3690 3742 total_errors++; 3691 3743 } ··· 3708 3760 if (!dev->in_fs_metadata || !dev->writeable) 3709 3761 continue; 3710 3762 3711 - ret = write_dev_supers(dev, sb, 1, max_mirrors); 3763 + ret = wait_dev_supers(dev, max_mirrors); 3712 3764 if (ret) 3713 3765 total_errors++; 3714 3766 } ··· 3943 3995 __btrfs_free_block_rsv(root->orphan_block_rsv); 3944 3996 root->orphan_block_rsv = NULL; 3945 3997 3946 - mutex_lock(&fs_info->chunk_mutex); 3947 3998 while (!list_empty(&fs_info->pinned_chunks)) { 3948 3999 struct extent_map *em; 3949 4000 ··· 3951 4004 list_del_init(&em->list); 3952 4005 free_extent_map(em); 3953 4006 } 3954 - mutex_unlock(&fs_info->chunk_mutex); 3955 4007 } 3956 4008 3957 4009 int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, ··· 3999 4053 fs_info->dirty_metadata_batch); 4000 4054 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY 4001 4055 if (btrfs_header_level(buf) == 0 && check_leaf(root, buf)) { 4002 - btrfs_print_leaf(fs_info, buf); 4056 + btrfs_print_leaf(buf); 4003 4057 ASSERT(0); 4004 4058 } 4005 4059 #endif ··· 4119 4173 ret = -EINVAL; 4120 4174 } 4121 4175 4122 - if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) { 4176 + if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_FSID_SIZE) != 0) { 4123 4177 btrfs_err(fs_info, 4124 4178 "dev_item UUID does not match fsid: %pU != %pU", 4125 4179 fs_info->fsid, sb->dev_item.fsid);

+8 -2

fs/btrfs/disk-io.h

··· 25 25 #define BTRFS_SUPER_MIRROR_MAX 3 26 26 #define BTRFS_SUPER_MIRROR_SHIFT 12 27 27 28 + /* 29 + * Fixed blocksize for all devices, applies to specific ways of reading 30 + * metadata like superblock. Must meet the set_blocksize requirements. 31 + * 32 + * Do not change. 33 + */ 34 + #define BTRFS_BDEV_BLOCKSIZE (4096) 35 + 28 36 enum btrfs_wq_endio_type { 29 37 BTRFS_WQ_ENDIO_DATA = 0, 30 38 BTRFS_WQ_ENDIO_METADATA = 1, ··· 150 142 int btree_lock_page_hook(struct page *page, void *data, 151 143 void (*flush_fn)(void *)); 152 144 int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags); 153 - int btrfs_calc_num_tolerated_disk_barrier_failures( 154 - struct btrfs_fs_info *fs_info); 155 145 int __init btrfs_end_io_wq_init(void); 156 146 void btrfs_end_io_wq_exit(void); 157 147

+135 -65

fs/btrfs/extent-tree.c

··· 1148 1148 } 1149 1149 #endif 1150 1150 1151 + /* 1152 + * is_data == BTRFS_REF_TYPE_BLOCK, tree block type is required, 1153 + * is_data == BTRFS_REF_TYPE_DATA, data type is requried, 1154 + * is_data == BTRFS_REF_TYPE_ANY, either type is OK. 1155 + */ 1156 + int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb, 1157 + struct btrfs_extent_inline_ref *iref, 1158 + enum btrfs_inline_ref_type is_data) 1159 + { 1160 + int type = btrfs_extent_inline_ref_type(eb, iref); 1161 + u64 offset = btrfs_extent_inline_ref_offset(eb, iref); 1162 + 1163 + if (type == BTRFS_TREE_BLOCK_REF_KEY || 1164 + type == BTRFS_SHARED_BLOCK_REF_KEY || 1165 + type == BTRFS_SHARED_DATA_REF_KEY || 1166 + type == BTRFS_EXTENT_DATA_REF_KEY) { 1167 + if (is_data == BTRFS_REF_TYPE_BLOCK) { 1168 + if (type == BTRFS_TREE_BLOCK_REF_KEY) 1169 + return type; 1170 + if (type == BTRFS_SHARED_BLOCK_REF_KEY) { 1171 + ASSERT(eb->fs_info); 1172 + /* 1173 + * Every shared one has parent tree 1174 + * block, which must be aligned to 1175 + * nodesize. 1176 + */ 1177 + if (offset && 1178 + IS_ALIGNED(offset, eb->fs_info->nodesize)) 1179 + return type; 1180 + } 1181 + } else if (is_data == BTRFS_REF_TYPE_DATA) { 1182 + if (type == BTRFS_EXTENT_DATA_REF_KEY) 1183 + return type; 1184 + if (type == BTRFS_SHARED_DATA_REF_KEY) { 1185 + ASSERT(eb->fs_info); 1186 + /* 1187 + * Every shared one has parent tree 1188 + * block, which must be aligned to 1189 + * nodesize. 1190 + */ 1191 + if (offset && 1192 + IS_ALIGNED(offset, eb->fs_info->nodesize)) 1193 + return type; 1194 + } 1195 + } else { 1196 + ASSERT(is_data == BTRFS_REF_TYPE_ANY); 1197 + return type; 1198 + } 1199 + } 1200 + 1201 + btrfs_print_leaf((struct extent_buffer *)eb); 1202 + btrfs_err(eb->fs_info, "eb %llu invalid extent inline ref type %d", 1203 + eb->start, type); 1204 + WARN_ON(1); 1205 + 1206 + return BTRFS_REF_TYPE_INVALID; 1207 + } 1208 + 1151 1209 static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset) 1152 1210 { 1153 1211 u32 high_crc = ~(u32)0; ··· 1475 1417 struct btrfs_extent_data_ref *ref1; 1476 1418 struct btrfs_shared_data_ref *ref2; 1477 1419 u32 num_refs = 0; 1420 + int type; 1478 1421 1479 1422 leaf = path->nodes[0]; 1480 1423 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 1481 1424 if (iref) { 1482 - if (btrfs_extent_inline_ref_type(leaf, iref) == 1483 - BTRFS_EXTENT_DATA_REF_KEY) { 1425 + /* 1426 + * If type is invalid, we should have bailed out earlier than 1427 + * this call. 1428 + */ 1429 + type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA); 1430 + ASSERT(type != BTRFS_REF_TYPE_INVALID); 1431 + if (type == BTRFS_EXTENT_DATA_REF_KEY) { 1484 1432 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset); 1485 1433 num_refs = btrfs_extent_data_ref_count(leaf, ref1); 1486 1434 } else { ··· 1647 1583 int ret; 1648 1584 int err = 0; 1649 1585 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA); 1586 + int needed; 1650 1587 1651 1588 key.objectid = bytenr; 1652 1589 key.type = BTRFS_EXTENT_ITEM_KEY; ··· 1739 1674 BUG_ON(ptr > end); 1740 1675 } 1741 1676 1677 + if (owner >= BTRFS_FIRST_FREE_OBJECTID) 1678 + needed = BTRFS_REF_TYPE_DATA; 1679 + else 1680 + needed = BTRFS_REF_TYPE_BLOCK; 1681 + 1742 1682 err = -ENOENT; 1743 1683 while (1) { 1744 1684 if (ptr >= end) { ··· 1751 1681 break; 1752 1682 } 1753 1683 iref = (struct btrfs_extent_inline_ref *)ptr; 1754 - type = btrfs_extent_inline_ref_type(leaf, iref); 1684 + type = btrfs_get_extent_inline_ref_type(leaf, iref, needed); 1685 + if (type == BTRFS_REF_TYPE_INVALID) { 1686 + err = -EINVAL; 1687 + goto out; 1688 + } 1689 + 1755 1690 if (want < type) 1756 1691 break; 1757 1692 if (want > type) { ··· 1948 1873 if (extent_op) 1949 1874 __run_delayed_extent_op(extent_op, leaf, ei); 1950 1875 1951 - type = btrfs_extent_inline_ref_type(leaf, iref); 1876 + /* 1877 + * If type is invalid, we should have bailed out after 1878 + * lookup_inline_extent_backref(). 1879 + */ 1880 + type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_ANY); 1881 + ASSERT(type != BTRFS_REF_TYPE_INVALID); 1952 1882 1953 1883 if (type == BTRFS_EXTENT_DATA_REF_KEY) { 1954 1884 dref = (struct btrfs_extent_data_ref *)(&iref->offset); ··· 3238 3158 struct btrfs_extent_item *ei; 3239 3159 struct btrfs_key key; 3240 3160 u32 item_size; 3161 + int type; 3241 3162 int ret; 3242 3163 3243 3164 key.objectid = bytenr; ··· 3280 3199 goto out; 3281 3200 3282 3201 iref = (struct btrfs_extent_inline_ref *)(ei + 1); 3283 - if (btrfs_extent_inline_ref_type(leaf, iref) != 3284 - BTRFS_EXTENT_DATA_REF_KEY) 3202 + 3203 + type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA); 3204 + if (type != BTRFS_EXTENT_DATA_REF_KEY) 3285 3205 goto out; 3286 3206 3287 3207 ref = (struct btrfs_extent_data_ref *)(&iref->offset); ··· 4281 4199 4282 4200 int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes) 4283 4201 { 4284 - struct btrfs_space_info *data_sinfo; 4285 4202 struct btrfs_root *root = inode->root; 4286 4203 struct btrfs_fs_info *fs_info = root->fs_info; 4204 + struct btrfs_space_info *data_sinfo = fs_info->data_sinfo; 4287 4205 u64 used; 4288 4206 int ret = 0; 4289 4207 int need_commit = 2; ··· 4296 4214 need_commit = 0; 4297 4215 ASSERT(current->journal_info); 4298 4216 } 4299 - 4300 - data_sinfo = fs_info->data_sinfo; 4301 - if (!data_sinfo) 4302 - goto alloc; 4303 4217 4304 4218 again: 4305 4219 /* make sure we have enough space to handle the data first */ ··· 4314 4236 4315 4237 data_sinfo->force_alloc = CHUNK_ALLOC_FORCE; 4316 4238 spin_unlock(&data_sinfo->lock); 4317 - alloc: 4239 + 4318 4240 alloc_target = btrfs_data_alloc_profile(fs_info); 4319 4241 /* 4320 4242 * It is ugly that we don't call nolock join ··· 4341 4263 goto commit_trans; 4342 4264 } 4343 4265 } 4344 - 4345 - if (!data_sinfo) 4346 - data_sinfo = fs_info->data_sinfo; 4347 4266 4348 4267 goto again; 4349 4268 } ··· 4500 4425 struct btrfs_space_info *sinfo, int force) 4501 4426 { 4502 4427 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; 4503 - u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; 4504 - u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved; 4428 + u64 bytes_used = btrfs_space_info_used(sinfo, false); 4505 4429 u64 thresh; 4506 4430 4507 4431 if (force == CHUNK_ALLOC_FORCE) ··· 4512 4438 * global_rsv, it doesn't change except when the transaction commits. 4513 4439 */ 4514 4440 if (sinfo->flags & BTRFS_BLOCK_GROUP_METADATA) 4515 - num_allocated += calc_global_rsv_need_space(global_rsv); 4441 + bytes_used += calc_global_rsv_need_space(global_rsv); 4516 4442 4517 4443 /* 4518 4444 * in limited mode, we want to have some free space up to ··· 4522 4448 thresh = btrfs_super_total_bytes(fs_info->super_copy); 4523 4449 thresh = max_t(u64, SZ_64M, div_factor_fine(thresh, 1)); 4524 4450 4525 - if (num_bytes - num_allocated < thresh) 4451 + if (sinfo->total_bytes - bytes_used < thresh) 4526 4452 return 1; 4527 4453 } 4528 4454 4529 - if (num_allocated + SZ_2M < div_factor(num_bytes, 8)) 4455 + if (bytes_used + SZ_2M < div_factor(sinfo->total_bytes, 8)) 4530 4456 return 0; 4531 4457 return 1; 4532 4458 } ··· 4978 4904 wait_queue_head_t wait; 4979 4905 }; 4980 4906 4981 - static int flush_space(struct btrfs_fs_info *fs_info, 4907 + /* 4908 + * Try to flush some data based on policy set by @state. This is only advisory 4909 + * and may fail for various reasons. The caller is supposed to examine the 4910 + * state of @space_info to detect the outcome. 4911 + */ 4912 + static void flush_space(struct btrfs_fs_info *fs_info, 4982 4913 struct btrfs_space_info *space_info, u64 num_bytes, 4983 - u64 orig_bytes, int state) 4914 + int state) 4984 4915 { 4985 4916 struct btrfs_root *root = fs_info->extent_root; 4986 4917 struct btrfs_trans_handle *trans; ··· 5010 4931 break; 5011 4932 case FLUSH_DELALLOC: 5012 4933 case FLUSH_DELALLOC_WAIT: 5013 - shrink_delalloc(fs_info, num_bytes * 2, orig_bytes, 4934 + shrink_delalloc(fs_info, num_bytes * 2, num_bytes, 5014 4935 state == FLUSH_DELALLOC_WAIT); 5015 4936 break; 5016 4937 case ALLOC_CHUNK: ··· 5028 4949 break; 5029 4950 case COMMIT_TRANS: 5030 4951 ret = may_commit_transaction(fs_info, space_info, 5031 - orig_bytes, 0); 4952 + num_bytes, 0); 5032 4953 break; 5033 4954 default: 5034 4955 ret = -ENOSPC; 5035 4956 break; 5036 4957 } 5037 4958 5038 - trace_btrfs_flush_space(fs_info, space_info->flags, num_bytes, 5039 - orig_bytes, state, ret); 5040 - return ret; 4959 + trace_btrfs_flush_space(fs_info, space_info->flags, num_bytes, state, 4960 + ret); 4961 + return; 5041 4962 } 5042 4963 5043 4964 static inline u64 ··· 5139 5060 5140 5061 flush_state = FLUSH_DELAYED_ITEMS_NR; 5141 5062 do { 5142 - struct reserve_ticket *ticket; 5143 - int ret; 5144 - 5145 - ret = flush_space(fs_info, space_info, to_reclaim, to_reclaim, 5146 - flush_state); 5063 + flush_space(fs_info, space_info, to_reclaim, flush_state); 5147 5064 spin_lock(&space_info->lock); 5148 5065 if (list_empty(&space_info->tickets)) { 5149 5066 space_info->flush = 0; ··· 5149 5074 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, 5150 5075 space_info, 5151 5076 false); 5152 - ticket = list_first_entry(&space_info->tickets, 5153 - struct reserve_ticket, list); 5154 5077 if (last_tickets_id == space_info->tickets_id) { 5155 5078 flush_state++; 5156 5079 } else { ··· 5193 5120 spin_unlock(&space_info->lock); 5194 5121 5195 5122 do { 5196 - flush_space(fs_info, space_info, to_reclaim, to_reclaim, 5197 - flush_state); 5123 + flush_space(fs_info, space_info, to_reclaim, flush_state); 5198 5124 flush_state++; 5199 5125 spin_lock(&space_info->lock); 5200 5126 if (ticket->bytes == 0) { ··· 6736 6664 struct btrfs_space_info *space_info, u64 *empty_cluster) 6737 6665 { 6738 6666 struct btrfs_free_cluster *ret = NULL; 6739 - bool ssd = btrfs_test_opt(fs_info, SSD); 6740 6667 6741 6668 *empty_cluster = 0; 6742 6669 if (btrfs_mixed_space_info(space_info)) 6743 6670 return ret; 6744 6671 6745 - if (ssd) 6746 - *empty_cluster = SZ_2M; 6747 6672 if (space_info->flags & BTRFS_BLOCK_GROUP_METADATA) { 6748 6673 ret = &fs_info->meta_alloc_cluster; 6749 - if (!ssd) 6674 + if (btrfs_test_opt(fs_info, SSD)) 6675 + *empty_cluster = SZ_2M; 6676 + else 6750 6677 *empty_cluster = SZ_64K; 6751 - } else if ((space_info->flags & BTRFS_BLOCK_GROUP_DATA) && ssd) { 6678 + } else if ((space_info->flags & BTRFS_BLOCK_GROUP_DATA) && 6679 + btrfs_test_opt(fs_info, SSD_SPREAD)) { 6680 + *empty_cluster = SZ_2M; 6752 6681 ret = &fs_info->data_alloc_cluster; 6753 6682 } 6754 6683 ··· 6828 6755 if (!readonly && return_free_space && 6829 6756 global_rsv->space_info == space_info) { 6830 6757 u64 to_add = len; 6831 - WARN_ON(!return_free_space); 6758 + 6832 6759 spin_lock(&global_rsv->lock); 6833 6760 if (!global_rsv->full) { 6834 6761 to_add = min(len, global_rsv->size - ··· 6914 6841 if (ret) { 6915 6842 const char *errstr = btrfs_decode_error(ret); 6916 6843 btrfs_warn(fs_info, 6917 - "Discard failed while removing blockgroup: errno=%d %s\n", 6844 + "discard failed while removing blockgroup: errno=%d %s", 6918 6845 ret, errstr); 6919 6846 } 6920 6847 } ··· 7042 6969 "umm, got %d back from search, was looking for %llu", 7043 6970 ret, bytenr); 7044 6971 if (ret > 0) 7045 - btrfs_print_leaf(info, path->nodes[0]); 6972 + btrfs_print_leaf(path->nodes[0]); 7046 6973 } 7047 6974 if (ret < 0) { 7048 6975 btrfs_abort_transaction(trans, ret); ··· 7051 6978 extent_slot = path->slots[0]; 7052 6979 } 7053 6980 } else if (WARN_ON(ret == -ENOENT)) { 7054 - btrfs_print_leaf(info, path->nodes[0]); 6981 + btrfs_print_leaf(path->nodes[0]); 7055 6982 btrfs_err(info, 7056 6983 "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu", 7057 6984 bytenr, parent, root_objectid, owner_objectid, ··· 7088 7015 btrfs_err(info, 7089 7016 "umm, got %d back from search, was looking for %llu", 7090 7017 ret, bytenr); 7091 - btrfs_print_leaf(info, path->nodes[0]); 7018 + btrfs_print_leaf(path->nodes[0]); 7092 7019 } 7093 7020 if (ret < 0) { 7094 7021 btrfs_abort_transaction(trans, ret); ··· 9266 9193 if (err) 9267 9194 goto out_end_trans; 9268 9195 9269 - ret = btrfs_del_root(trans, tree_root, &root->root_key); 9196 + ret = btrfs_del_root(trans, fs_info, &root->root_key); 9270 9197 if (ret) { 9271 9198 btrfs_abort_transaction(trans, ret); 9272 9199 goto out_end_trans; ··· 10025 9952 cache->key.offset = size; 10026 9953 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; 10027 9954 10028 - cache->sectorsize = fs_info->sectorsize; 10029 9955 cache->fs_info = fs_info; 10030 - cache->full_stripe_len = btrfs_full_stripe_len(fs_info, 10031 - &fs_info->mapping_tree, 10032 - start); 9956 + cache->full_stripe_len = btrfs_full_stripe_len(fs_info, start); 10033 9957 set_free_space_tree_thresholds(cache); 10034 9958 10035 9959 atomic_set(&cache->count, 1); ··· 10262 10192 10263 10193 int btrfs_make_block_group(struct btrfs_trans_handle *trans, 10264 10194 struct btrfs_fs_info *fs_info, u64 bytes_used, 10265 - u64 type, u64 chunk_objectid, u64 chunk_offset, 10266 - u64 size) 10195 + u64 type, u64 chunk_offset, u64 size) 10267 10196 { 10268 10197 struct btrfs_block_group_cache *cache; 10269 10198 int ret; ··· 10274 10205 return -ENOMEM; 10275 10206 10276 10207 btrfs_set_block_group_used(&cache->item, bytes_used); 10277 - btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); 10208 + btrfs_set_block_group_chunk_objectid(&cache->item, 10209 + BTRFS_FIRST_CHUNK_TREE_OBJECTID); 10278 10210 btrfs_set_block_group_flags(&cache->item, type); 10279 10211 10280 10212 cache->flags = type; ··· 11071 11001 } 11072 11002 11073 11003 /* 11074 - * btrfs_{start,end}_write_no_snapshoting() are similar to 11004 + * btrfs_{start,end}_write_no_snapshotting() are similar to 11075 11005 * mnt_{want,drop}_write(), they are used to prevent some tasks from writing 11076 11006 * data into the page cache through nocow before the subvolume is snapshoted, 11077 11007 * but flush the data into disk after the snapshot creation, or to prevent 11078 - * operations while snapshoting is ongoing and that cause the snapshot to be 11008 + * operations while snapshotting is ongoing and that cause the snapshot to be 11079 11009 * inconsistent (writes followed by expanding truncates for example). 11080 11010 */ 11081 - void btrfs_end_write_no_snapshoting(struct btrfs_root *root) 11011 + void btrfs_end_write_no_snapshotting(struct btrfs_root *root) 11082 11012 { 11083 11013 percpu_counter_dec(&root->subv_writers->counter); 11084 11014 /* ··· 11089 11019 wake_up(&root->subv_writers->wait); 11090 11020 } 11091 11021 11092 - int btrfs_start_write_no_snapshoting(struct btrfs_root *root) 11022 + int btrfs_start_write_no_snapshotting(struct btrfs_root *root) 11093 11023 { 11094 - if (atomic_read(&root->will_be_snapshoted)) 11024 + if (atomic_read(&root->will_be_snapshotted)) 11095 11025 return 0; 11096 11026 11097 11027 percpu_counter_inc(&root->subv_writers->counter); ··· 11099 11029 * Make sure counter is updated before we check for snapshot creation. 11100 11030 */ 11101 11031 smp_mb(); 11102 - if (atomic_read(&root->will_be_snapshoted)) { 11103 - btrfs_end_write_no_snapshoting(root); 11032 + if (atomic_read(&root->will_be_snapshotted)) { 11033 + btrfs_end_write_no_snapshotting(root); 11104 11034 return 0; 11105 11035 } 11106 11036 return 1; 11107 11037 } 11108 11038 11109 - static int wait_snapshoting_atomic_t(atomic_t *a) 11039 + static int wait_snapshotting_atomic_t(atomic_t *a) 11110 11040 { 11111 11041 schedule(); 11112 11042 return 0; ··· 11117 11047 while (true) { 11118 11048 int ret; 11119 11049 11120 - ret = btrfs_start_write_no_snapshoting(root); 11050 + ret = btrfs_start_write_no_snapshotting(root); 11121 11051 if (ret) 11122 11052 break; 11123 - wait_on_atomic_t(&root->will_be_snapshoted, 11124 - wait_snapshoting_atomic_t, 11053 + wait_on_atomic_t(&root->will_be_snapshotted, 11054 + wait_snapshotting_atomic_t, 11125 11055 TASK_UNINTERRUPTIBLE); 11126 11056 } 11127 11057 }

+41 -51

fs/btrfs/extent_io.c

··· 20 20 #include "locking.h" 21 21 #include "rcu-string.h" 22 22 #include "backref.h" 23 - #include "transaction.h" 24 23 25 24 static struct kmem_cache *extent_state_cache; 26 25 static struct kmem_cache *extent_buffer_cache; ··· 1997 1998 * read repair operation. 1998 1999 */ 1999 2000 btrfs_bio_counter_inc_blocked(fs_info); 2000 - if (btrfs_is_parity_mirror(fs_info, logical, length, mirror_num)) { 2001 + if (btrfs_is_parity_mirror(fs_info, logical, length)) { 2001 2002 /* 2002 2003 * Note that we don't use BTRFS_MAP_WRITE because it's supposed 2003 2004 * to update all raid stripes, but here we just want to correct ··· 2756 2757 2757 2758 } 2758 2759 2759 - static int submit_extent_page(int op, int op_flags, struct extent_io_tree *tree, 2760 + /* 2761 + * @opf: bio REQ_OP_* and REQ_* flags as one value 2762 + */ 2763 + static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree, 2760 2764 struct writeback_control *wbc, 2761 2765 struct page *page, sector_t sector, 2762 2766 size_t size, unsigned long offset, ··· 2806 2804 bio->bi_end_io = end_io_func; 2807 2805 bio->bi_private = tree; 2808 2806 bio->bi_write_hint = page->mapping->host->i_write_hint; 2809 - bio_set_op_attrs(bio, op, op_flags); 2807 + bio->bi_opf = opf; 2810 2808 if (wbc) { 2811 2809 wbc_init_bio(wbc, bio); 2812 2810 wbc_account_io(wbc, page, page_size); ··· 2880 2878 get_extent_t *get_extent, 2881 2879 struct extent_map **em_cached, 2882 2880 struct bio **bio, int mirror_num, 2883 - unsigned long *bio_flags, int read_flags, 2881 + unsigned long *bio_flags, unsigned int read_flags, 2884 2882 u64 *prev_em_start) 2885 2883 { 2886 2884 struct inode *inode = page->mapping->host; ··· 3061 3059 continue; 3062 3060 } 3063 3061 3064 - ret = submit_extent_page(REQ_OP_READ, read_flags, tree, NULL, 3062 + ret = submit_extent_page(REQ_OP_READ | read_flags, tree, NULL, 3065 3063 page, sector, disk_io_size, pg_offset, 3066 3064 bdev, bio, 3067 3065 end_bio_extent_readpage, mirror_num, ··· 3166 3164 struct page *page, 3167 3165 get_extent_t *get_extent, 3168 3166 struct bio **bio, int mirror_num, 3169 - unsigned long *bio_flags, int read_flags) 3167 + unsigned long *bio_flags, 3168 + unsigned int read_flags) 3170 3169 { 3171 3170 struct inode *inode = page->mapping->host; 3172 3171 struct btrfs_ordered_extent *ordered; ··· 3314 3311 struct extent_page_data *epd, 3315 3312 loff_t i_size, 3316 3313 unsigned long nr_written, 3317 - int write_flags, int *nr_ret) 3314 + unsigned int write_flags, int *nr_ret) 3318 3315 { 3319 3316 struct extent_io_tree *tree = epd->tree; 3320 3317 u64 start = page_offset(page); ··· 3430 3427 page->index, cur, end); 3431 3428 } 3432 3429 3433 - ret = submit_extent_page(REQ_OP_WRITE, write_flags, tree, wbc, 3430 + ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc, 3434 3431 page, sector, iosize, pg_offset, 3435 3432 bdev, &epd->bio, 3436 3433 end_bio_extent_writepage, ··· 3468 3465 size_t pg_offset = 0; 3469 3466 loff_t i_size = i_size_read(inode); 3470 3467 unsigned long end_index = i_size >> PAGE_SHIFT; 3471 - int write_flags = 0; 3468 + unsigned int write_flags = 0; 3472 3469 unsigned long nr_written = 0; 3473 3470 3474 3471 if (wbc->sync_mode == WB_SYNC_ALL) ··· 3718 3715 unsigned long i, num_pages; 3719 3716 unsigned long bio_flags = 0; 3720 3717 unsigned long start, end; 3721 - int write_flags = (epd->sync_io ? REQ_SYNC : 0) | REQ_META; 3718 + unsigned int write_flags = (epd->sync_io ? REQ_SYNC : 0) | REQ_META; 3722 3719 int ret = 0; 3723 3720 3724 3721 clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags); ··· 3748 3745 3749 3746 clear_page_dirty_for_io(p); 3750 3747 set_page_writeback(p); 3751 - ret = submit_extent_page(REQ_OP_WRITE, write_flags, tree, wbc, 3748 + ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc, 3752 3749 p, offset >> 9, PAGE_SIZE, 0, bdev, 3753 3750 &epd->bio, 3754 3751 end_bio_extent_buffer_writepage, ··· 4609 4606 flags |= (FIEMAP_EXTENT_DELALLOC | 4610 4607 FIEMAP_EXTENT_UNKNOWN); 4611 4608 } else if (fieinfo->fi_extents_max) { 4612 - struct btrfs_trans_handle *trans; 4613 - 4614 4609 u64 bytenr = em->block_start - 4615 4610 (em->start - em->orig_start); 4616 4611 4617 4612 disko = em->block_start + offset_in_extent; 4618 - 4619 - /* 4620 - * We need a trans handle to get delayed refs 4621 - */ 4622 - trans = btrfs_join_transaction(root); 4623 - /* 4624 - * It's OK if we can't start a trans we can still check 4625 - * from commit_root 4626 - */ 4627 - if (IS_ERR(trans)) 4628 - trans = NULL; 4629 4613 4630 4614 /* 4631 4615 * As btrfs supports shared space, this information ··· 4621 4631 * then we're just getting a count and we can skip the 4622 4632 * lookup stuff. 4623 4633 */ 4624 - ret = btrfs_check_shared(trans, root->fs_info, 4625 - root->objectid, 4626 - btrfs_ino(BTRFS_I(inode)), bytenr); 4627 - if (trans) 4628 - btrfs_end_transaction(trans); 4634 + ret = btrfs_check_shared(root, 4635 + btrfs_ino(BTRFS_I(inode)), 4636 + bytenr); 4629 4637 if (ret < 0) 4630 4638 goto out_free; 4631 4639 if (ret) ··· 5393 5405 return ret; 5394 5406 } 5395 5407 5396 - void read_extent_buffer(struct extent_buffer *eb, void *dstv, 5397 - unsigned long start, 5398 - unsigned long len) 5408 + void read_extent_buffer(const struct extent_buffer *eb, void *dstv, 5409 + unsigned long start, unsigned long len) 5399 5410 { 5400 5411 size_t cur; 5401 5412 size_t offset; ··· 5404 5417 size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1); 5405 5418 unsigned long i = (start_offset + start) >> PAGE_SHIFT; 5406 5419 5407 - WARN_ON(start > eb->len); 5408 - WARN_ON(start + len > eb->start + eb->len); 5420 + if (start + len > eb->len) { 5421 + WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, wanted %lu %lu\n", 5422 + eb->start, eb->len, start, len); 5423 + memset(dst, 0, len); 5424 + return; 5425 + } 5409 5426 5410 5427 offset = (start_offset + start) & (PAGE_SIZE - 1); 5411 5428 ··· 5427 5436 } 5428 5437 } 5429 5438 5430 - int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv, 5431 - unsigned long start, 5432 - unsigned long len) 5439 + int read_extent_buffer_to_user(const struct extent_buffer *eb, 5440 + void __user *dstv, 5441 + unsigned long start, unsigned long len) 5433 5442 { 5434 5443 size_t cur; 5435 5444 size_t offset; ··· 5469 5478 * return 1 if the item spans two pages. 5470 5479 * return -EINVAL otherwise. 5471 5480 */ 5472 - int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, 5473 - unsigned long min_len, char **map, 5474 - unsigned long *map_start, 5475 - unsigned long *map_len) 5481 + int map_private_extent_buffer(const struct extent_buffer *eb, 5482 + unsigned long start, unsigned long min_len, 5483 + char **map, unsigned long *map_start, 5484 + unsigned long *map_len) 5476 5485 { 5477 5486 size_t offset = start & (PAGE_SIZE - 1); 5478 5487 char *kaddr; ··· 5481 5490 unsigned long i = (start_offset + start) >> PAGE_SHIFT; 5482 5491 unsigned long end_i = (start_offset + start + min_len - 1) >> 5483 5492 PAGE_SHIFT; 5493 + 5494 + if (start + min_len > eb->len) { 5495 + WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, wanted %lu %lu\n", 5496 + eb->start, eb->len, start, min_len); 5497 + return -EINVAL; 5498 + } 5484 5499 5485 5500 if (i != end_i) 5486 5501 return 1; ··· 5499 5502 *map_start = ((u64)i << PAGE_SHIFT) - start_offset; 5500 5503 } 5501 5504 5502 - if (start + min_len > eb->len) { 5503 - WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, wanted %lu %lu\n", 5504 - eb->start, eb->len, start, min_len); 5505 - return -EINVAL; 5506 - } 5507 - 5508 5505 p = eb->pages[i]; 5509 5506 kaddr = page_address(p); 5510 5507 *map = kaddr + offset; ··· 5506 5515 return 0; 5507 5516 } 5508 5517 5509 - int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, 5510 - unsigned long start, 5511 - unsigned long len) 5518 + int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv, 5519 + unsigned long start, unsigned long len) 5512 5520 { 5513 5521 size_t cur; 5514 5522 size_t offset;

+9 -10

fs/btrfs/extent_io.h

··· 449 449 atomic_inc(&eb->refs); 450 450 } 451 451 452 - int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, 453 - unsigned long start, 454 - unsigned long len); 455 - void read_extent_buffer(struct extent_buffer *eb, void *dst, 452 + int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv, 453 + unsigned long start, unsigned long len); 454 + void read_extent_buffer(const struct extent_buffer *eb, void *dst, 456 455 unsigned long start, 457 456 unsigned long len); 458 - int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dst, 459 - unsigned long start, 457 + int read_extent_buffer_to_user(const struct extent_buffer *eb, 458 + void __user *dst, unsigned long start, 460 459 unsigned long len); 461 460 void write_extent_buffer_fsid(struct extent_buffer *eb, const void *src); 462 461 void write_extent_buffer_chunk_tree_uuid(struct extent_buffer *eb, ··· 485 486 void clear_extent_buffer_uptodate(struct extent_buffer *eb); 486 487 int extent_buffer_uptodate(struct extent_buffer *eb); 487 488 int extent_buffer_under_io(struct extent_buffer *eb); 488 - int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, 489 - unsigned long min_len, char **map, 490 - unsigned long *map_start, 491 - unsigned long *map_len); 489 + int map_private_extent_buffer(const struct extent_buffer *eb, 490 + unsigned long offset, unsigned long min_len, 491 + char **map, unsigned long *map_start, 492 + unsigned long *map_len); 492 493 void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end); 493 494 void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end); 494 495 void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,

+13 -6

fs/btrfs/file.c

··· 1536 1536 u64 num_bytes; 1537 1537 int ret; 1538 1538 1539 - ret = btrfs_start_write_no_snapshoting(root); 1539 + ret = btrfs_start_write_no_snapshotting(root); 1540 1540 if (!ret) 1541 1541 return -ENOSPC; 1542 1542 ··· 1561 1561 NULL, NULL, NULL); 1562 1562 if (ret <= 0) { 1563 1563 ret = 0; 1564 - btrfs_end_write_no_snapshoting(root); 1564 + btrfs_end_write_no_snapshotting(root); 1565 1565 } else { 1566 1566 *write_bytes = min_t(size_t, *write_bytes , 1567 1567 num_bytes - pos + lockstart); ··· 1664 1664 data_reserved, pos, 1665 1665 write_bytes); 1666 1666 else 1667 - btrfs_end_write_no_snapshoting(root); 1667 + btrfs_end_write_no_snapshotting(root); 1668 1668 break; 1669 1669 } 1670 1670 ··· 1767 1767 1768 1768 release_bytes = 0; 1769 1769 if (only_release_metadata) 1770 - btrfs_end_write_no_snapshoting(root); 1770 + btrfs_end_write_no_snapshotting(root); 1771 1771 1772 1772 if (only_release_metadata && copied > 0) { 1773 1773 lockstart = round_down(pos, ··· 1797 1797 1798 1798 if (release_bytes) { 1799 1799 if (only_release_metadata) { 1800 - btrfs_end_write_no_snapshoting(root); 1800 + btrfs_end_write_no_snapshotting(root); 1801 1801 btrfs_delalloc_release_metadata(BTRFS_I(inode), 1802 1802 release_bytes); 1803 1803 } else { ··· 1990 1990 1991 1991 int btrfs_release_file(struct inode *inode, struct file *filp) 1992 1992 { 1993 - if (filp->private_data) 1993 + struct btrfs_file_private *private = filp->private_data; 1994 + 1995 + if (private && private->trans) 1994 1996 btrfs_ioctl_trans_end(filp); 1997 + if (private && private->filldir_buf) 1998 + kfree(private->filldir_buf); 1999 + kfree(private); 2000 + filp->private_data = NULL; 2001 + 1995 2002 /* 1996 2003 * ordered_data_close is set by settattr when we are about to truncate 1997 2004 * a file from a non-zero size to a zero size. This tries to

+1 -1

fs/btrfs/free-space-cache.c

··· 709 709 710 710 if (!BTRFS_I(inode)->generation) { 711 711 btrfs_info(fs_info, 712 - "The free space cache file (%llu) is invalid. skip it\n", 712 + "the free space cache file (%llu) is invalid, skip it", 713 713 offset); 714 714 return 0; 715 715 }

+1 -1

fs/btrfs/free-space-tree.c

··· 1257 1257 if (ret) 1258 1258 goto abort; 1259 1259 1260 - ret = btrfs_del_root(trans, tree_root, &free_space_root->root_key); 1260 + ret = btrfs_del_root(trans, fs_info, &free_space_root->root_key); 1261 1261 if (ret) 1262 1262 goto abort; 1263 1263

+2 -1

fs/btrfs/free-space-tree.h

··· 44 44 struct btrfs_fs_info *fs_info, 45 45 u64 start, u64 size); 46 46 47 - /* Exposed for testing. */ 47 + #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 48 48 struct btrfs_free_space_info * 49 49 search_free_space_info(struct btrfs_trans_handle *trans, 50 50 struct btrfs_fs_info *fs_info, ··· 68 68 struct btrfs_path *path); 69 69 int free_space_test_bit(struct btrfs_block_group_cache *block_group, 70 70 struct btrfs_path *path, u64 offset); 71 + #endif 71 72 72 73 #endif

+153 -67

fs/btrfs/inode.c

··· 392 392 return 0; 393 393 } 394 394 395 - static inline int inode_need_compress(struct inode *inode) 395 + static inline int inode_need_compress(struct inode *inode, u64 start, u64 end) 396 396 { 397 397 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 398 398 399 399 /* force compress */ 400 400 if (btrfs_test_opt(fs_info, FORCE_COMPRESS)) 401 401 return 1; 402 + /* defrag ioctl */ 403 + if (BTRFS_I(inode)->defrag_compress) 404 + return 1; 402 405 /* bad compression ratios */ 403 406 if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) 404 407 return 0; 405 408 if (btrfs_test_opt(fs_info, COMPRESS) || 406 409 BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS || 407 - BTRFS_I(inode)->force_compress) 408 - return 1; 410 + BTRFS_I(inode)->prop_compress) 411 + return btrfs_compress_heuristic(inode, start, end); 409 412 return 0; 410 413 } 411 414 ··· 506 503 * inode has not been flagged as nocompress. This flag can 507 504 * change at any time if we discover bad compression ratios. 508 505 */ 509 - if (inode_need_compress(inode)) { 506 + if (inode_need_compress(inode, start, end)) { 510 507 WARN_ON(pages); 511 508 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS); 512 509 if (!pages) { ··· 514 511 goto cont; 515 512 } 516 513 517 - if (BTRFS_I(inode)->force_compress) 518 - compress_type = BTRFS_I(inode)->force_compress; 514 + if (BTRFS_I(inode)->defrag_compress) 515 + compress_type = BTRFS_I(inode)->defrag_compress; 516 + else if (BTRFS_I(inode)->prop_compress) 517 + compress_type = BTRFS_I(inode)->prop_compress; 519 518 520 519 /* 521 520 * we need to call clear_page_dirty_for_io on each ··· 650 645 651 646 /* flag the file so we don't compress in the future */ 652 647 if (!btrfs_test_opt(fs_info, FORCE_COMPRESS) && 653 - !(BTRFS_I(inode)->force_compress)) { 648 + !(BTRFS_I(inode)->prop_compress)) { 654 649 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; 655 650 } 656 651 } ··· 1386 1381 * we fall into common COW way. 1387 1382 */ 1388 1383 if (!nolock) { 1389 - err = btrfs_start_write_no_snapshoting(root); 1384 + err = btrfs_start_write_no_snapshotting(root); 1390 1385 if (!err) 1391 1386 goto out_check; 1392 1387 } ··· 1398 1393 if (csum_exist_in_range(fs_info, disk_bytenr, 1399 1394 num_bytes)) { 1400 1395 if (!nolock) 1401 - btrfs_end_write_no_snapshoting(root); 1396 + btrfs_end_write_no_snapshotting(root); 1402 1397 goto out_check; 1403 1398 } 1404 1399 if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr)) { 1405 1400 if (!nolock) 1406 - btrfs_end_write_no_snapshoting(root); 1401 + btrfs_end_write_no_snapshotting(root); 1407 1402 goto out_check; 1408 1403 } 1409 1404 nocow = 1; ··· 1420 1415 if (extent_end <= start) { 1421 1416 path->slots[0]++; 1422 1417 if (!nolock && nocow) 1423 - btrfs_end_write_no_snapshoting(root); 1418 + btrfs_end_write_no_snapshotting(root); 1424 1419 if (nocow) 1425 1420 btrfs_dec_nocow_writers(fs_info, disk_bytenr); 1426 1421 goto next_slot; ··· 1443 1438 NULL); 1444 1439 if (ret) { 1445 1440 if (!nolock && nocow) 1446 - btrfs_end_write_no_snapshoting(root); 1441 + btrfs_end_write_no_snapshotting(root); 1447 1442 if (nocow) 1448 1443 btrfs_dec_nocow_writers(fs_info, 1449 1444 disk_bytenr); ··· 1464 1459 BTRFS_ORDERED_PREALLOC); 1465 1460 if (IS_ERR(em)) { 1466 1461 if (!nolock && nocow) 1467 - btrfs_end_write_no_snapshoting(root); 1462 + btrfs_end_write_no_snapshotting(root); 1468 1463 if (nocow) 1469 1464 btrfs_dec_nocow_writers(fs_info, 1470 1465 disk_bytenr); ··· 1504 1499 PAGE_UNLOCK | PAGE_SET_PRIVATE2); 1505 1500 1506 1501 if (!nolock && nocow) 1507 - btrfs_end_write_no_snapshoting(root); 1502 + btrfs_end_write_no_snapshotting(root); 1508 1503 cur_offset = extent_end; 1509 1504 1510 1505 /* ··· 1581 1576 } else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC && !force_cow) { 1582 1577 ret = run_delalloc_nocow(inode, locked_page, start, end, 1583 1578 page_started, 0, nr_written); 1584 - } else if (!inode_need_compress(inode)) { 1579 + } else if (!inode_need_compress(inode, start, end)) { 1585 1580 ret = cow_file_range(inode, locked_page, start, end, end, 1586 1581 page_started, nr_written, 1, NULL); 1587 1582 } else { ··· 1801 1796 u64 len = state->end + 1 - state->start; 1802 1797 u32 num_extents = count_max_extents(len); 1803 1798 1804 - spin_lock(&inode->lock); 1805 - if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG)) 1799 + if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG)) { 1800 + spin_lock(&inode->lock); 1806 1801 inode->defrag_bytes -= len; 1807 - spin_unlock(&inode->lock); 1802 + spin_unlock(&inode->lock); 1803 + } 1808 1804 1809 1805 /* 1810 1806 * set_bit and clear bit hooks normally require _irqsave/restore ··· 3165 3159 memset(kaddr + pgoff, 1, len); 3166 3160 flush_dcache_page(page); 3167 3161 kunmap_atomic(kaddr); 3168 - if (csum_expected == 0) 3169 - return 0; 3170 3162 return -EIO; 3171 3163 } 3172 3164 ··· 5059 5055 5060 5056 if (newsize > oldsize) { 5061 5057 /* 5062 - * Don't do an expanding truncate while snapshoting is ongoing. 5058 + * Don't do an expanding truncate while snapshotting is ongoing. 5063 5059 * This is to ensure the snapshot captures a fully consistent 5064 5060 * state of this file - if the snapshot captures this expanding 5065 5061 * truncation, it must capture all writes that happened before ··· 5068 5064 btrfs_wait_for_snapshot_creation(root); 5069 5065 ret = btrfs_cont_expand(inode, oldsize, newsize); 5070 5066 if (ret) { 5071 - btrfs_end_write_no_snapshoting(root); 5067 + btrfs_end_write_no_snapshotting(root); 5072 5068 return ret; 5073 5069 } 5074 5070 5075 5071 trans = btrfs_start_transaction(root, 1); 5076 5072 if (IS_ERR(trans)) { 5077 - btrfs_end_write_no_snapshoting(root); 5073 + btrfs_end_write_no_snapshotting(root); 5078 5074 return PTR_ERR(trans); 5079 5075 } 5080 5076 ··· 5082 5078 btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL); 5083 5079 pagecache_isize_extended(inode, oldsize, newsize); 5084 5080 ret = btrfs_update_inode(trans, root, inode); 5085 - btrfs_end_write_no_snapshoting(root); 5081 + btrfs_end_write_no_snapshotting(root); 5086 5082 btrfs_end_transaction(trans); 5087 5083 } else { 5088 5084 ··· 5877 5873 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK 5878 5874 }; 5879 5875 5876 + /* 5877 + * All this infrastructure exists because dir_emit can fault, and we are holding 5878 + * the tree lock when doing readdir. For now just allocate a buffer and copy 5879 + * our information into that, and then dir_emit from the buffer. This is 5880 + * similar to what NFS does, only we don't keep the buffer around in pagecache 5881 + * because I'm afraid I'll mess that up. Long term we need to make filldir do 5882 + * copy_to_user_inatomic so we don't have to worry about page faulting under the 5883 + * tree lock. 5884 + */ 5885 + static int btrfs_opendir(struct inode *inode, struct file *file) 5886 + { 5887 + struct btrfs_file_private *private; 5888 + 5889 + private = kzalloc(sizeof(struct btrfs_file_private), GFP_KERNEL); 5890 + if (!private) 5891 + return -ENOMEM; 5892 + private->filldir_buf = kzalloc(PAGE_SIZE, GFP_KERNEL); 5893 + if (!private->filldir_buf) { 5894 + kfree(private); 5895 + return -ENOMEM; 5896 + } 5897 + file->private_data = private; 5898 + return 0; 5899 + } 5900 + 5901 + struct dir_entry { 5902 + u64 ino; 5903 + u64 offset; 5904 + unsigned type; 5905 + int name_len; 5906 + }; 5907 + 5908 + static int btrfs_filldir(void *addr, int entries, struct dir_context *ctx) 5909 + { 5910 + while (entries--) { 5911 + struct dir_entry *entry = addr; 5912 + char *name = (char *)(entry + 1); 5913 + 5914 + ctx->pos = entry->offset; 5915 + if (!dir_emit(ctx, name, entry->name_len, entry->ino, 5916 + entry->type)) 5917 + return 1; 5918 + addr += sizeof(struct dir_entry) + entry->name_len; 5919 + ctx->pos++; 5920 + } 5921 + return 0; 5922 + } 5923 + 5880 5924 static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) 5881 5925 { 5882 5926 struct inode *inode = file_inode(file); 5883 5927 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 5884 5928 struct btrfs_root *root = BTRFS_I(inode)->root; 5929 + struct btrfs_file_private *private = file->private_data; 5885 5930 struct btrfs_dir_item *di; 5886 5931 struct btrfs_key key; 5887 5932 struct btrfs_key found_key; 5888 5933 struct btrfs_path *path; 5934 + void *addr; 5889 5935 struct list_head ins_list; 5890 5936 struct list_head del_list; 5891 5937 int ret; 5892 5938 struct extent_buffer *leaf; 5893 5939 int slot; 5894 - unsigned char d_type; 5895 - int over = 0; 5896 - char tmp_name[32]; 5897 5940 char *name_ptr; 5898 5941 int name_len; 5942 + int entries = 0; 5943 + int total_len = 0; 5899 5944 bool put = false; 5900 5945 struct btrfs_key location; 5901 5946 ··· 5955 5902 if (!path) 5956 5903 return -ENOMEM; 5957 5904 5905 + addr = private->filldir_buf; 5958 5906 path->reada = READA_FORWARD; 5959 5907 5960 5908 INIT_LIST_HEAD(&ins_list); 5961 5909 INIT_LIST_HEAD(&del_list); 5962 5910 put = btrfs_readdir_get_delayed_items(inode, &ins_list, &del_list); 5963 5911 5912 + again: 5964 5913 key.type = BTRFS_DIR_INDEX_KEY; 5965 5914 key.offset = ctx->pos; 5966 5915 key.objectid = btrfs_ino(BTRFS_I(inode)); ··· 5972 5917 goto err; 5973 5918 5974 5919 while (1) { 5920 + struct dir_entry *entry; 5921 + 5975 5922 leaf = path->nodes[0]; 5976 5923 slot = path->slots[0]; 5977 5924 if (slot >= btrfs_header_nritems(leaf)) { ··· 5995 5938 goto next; 5996 5939 if (btrfs_should_delete_dir_index(&del_list, found_key.offset)) 5997 5940 goto next; 5998 - 5999 - ctx->pos = found_key.offset; 6000 - 6001 5941 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); 6002 5942 if (verify_dir_item(fs_info, leaf, slot, di)) 6003 5943 goto next; 6004 5944 6005 5945 name_len = btrfs_dir_name_len(leaf, di); 6006 - if (name_len <= sizeof(tmp_name)) { 6007 - name_ptr = tmp_name; 6008 - } else { 6009 - name_ptr = kmalloc(name_len, GFP_KERNEL); 6010 - if (!name_ptr) { 6011 - ret = -ENOMEM; 6012 - goto err; 6013 - } 5946 + if ((total_len + sizeof(struct dir_entry) + name_len) >= 5947 + PAGE_SIZE) { 5948 + btrfs_release_path(path); 5949 + ret = btrfs_filldir(private->filldir_buf, entries, ctx); 5950 + if (ret) 5951 + goto nopos; 5952 + addr = private->filldir_buf; 5953 + entries = 0; 5954 + total_len = 0; 5955 + goto again; 6014 5956 } 5957 + 5958 + entry = addr; 5959 + entry->name_len = name_len; 5960 + name_ptr = (char *)(entry + 1); 6015 5961 read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1), 6016 5962 name_len); 6017 - 6018 - d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; 5963 + entry->type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; 6019 5964 btrfs_dir_item_key_to_cpu(leaf, di, &location); 6020 - 6021 - over = !dir_emit(ctx, name_ptr, name_len, location.objectid, 6022 - d_type); 6023 - 6024 - if (name_ptr != tmp_name) 6025 - kfree(name_ptr); 6026 - 6027 - if (over) 6028 - goto nopos; 6029 - ctx->pos++; 5965 + entry->ino = location.objectid; 5966 + entry->offset = found_key.offset; 5967 + entries++; 5968 + addr += sizeof(struct dir_entry) + name_len; 5969 + total_len += sizeof(struct dir_entry) + name_len; 6030 5970 next: 6031 5971 path->slots[0]++; 6032 5972 } 5973 + btrfs_release_path(path); 5974 + 5975 + ret = btrfs_filldir(private->filldir_buf, entries, ctx); 5976 + if (ret) 5977 + goto nopos; 6033 5978 6034 5979 ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list); 6035 5980 if (ret) ··· 6242 6183 return insert_inode_locked4(inode, 6243 6184 btrfs_inode_hash(inode->i_ino, BTRFS_I(inode)->root), 6244 6185 btrfs_find_actor, &args); 6186 + } 6187 + 6188 + /* 6189 + * Inherit flags from the parent inode. 6190 + * 6191 + * Currently only the compression flags and the cow flags are inherited. 6192 + */ 6193 + static void btrfs_inherit_iflags(struct inode *inode, struct inode *dir) 6194 + { 6195 + unsigned int flags; 6196 + 6197 + if (!dir) 6198 + return; 6199 + 6200 + flags = BTRFS_I(dir)->flags; 6201 + 6202 + if (flags & BTRFS_INODE_NOCOMPRESS) { 6203 + BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS; 6204 + BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; 6205 + } else if (flags & BTRFS_INODE_COMPRESS) { 6206 + BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS; 6207 + BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS; 6208 + } 6209 + 6210 + if (flags & BTRFS_INODE_NODATACOW) { 6211 + BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; 6212 + if (S_ISREG(inode->i_mode)) 6213 + BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; 6214 + } 6215 + 6216 + btrfs_update_iflags(inode); 6245 6217 } 6246 6218 6247 6219 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, ··· 8081 7991 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; 8082 7992 struct bio *bio; 8083 7993 int isector; 8084 - int read_mode = 0; 7994 + unsigned int read_mode = 0; 8085 7995 int segs; 8086 7996 int ret; 8087 7997 blk_status_t status; ··· 8111 8021 bio_set_op_attrs(bio, REQ_OP_READ, read_mode); 8112 8022 8113 8023 btrfs_debug(BTRFS_I(inode)->root->fs_info, 8114 - "Repair DIO Read Error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d\n", 8024 + "repair DIO read error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d", 8115 8025 read_mode, failrec->this_mirror, failrec->in_validation); 8116 8026 8117 8027 status = submit_dio_repair_bio(inode, bio, failrec->this_mirror); ··· 8196 8106 goto next; 8197 8107 } 8198 8108 8199 - wait_for_completion(&done.done); 8109 + wait_for_completion_io(&done.done); 8200 8110 8201 8111 if (!done.uptodate) { 8202 8112 /* We might have another mirror, so try again */ ··· 8311 8221 goto next; 8312 8222 } 8313 8223 8314 - wait_for_completion(&done.done); 8224 + wait_for_completion_io(&done.done); 8315 8225 8316 8226 if (!done.uptodate) { 8317 8227 /* We might have another mirror, so try again */ ··· 8518 8428 8519 8429 static inline blk_status_t 8520 8430 __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, u64 file_offset, 8521 - int skip_sum, int async_submit) 8431 + int async_submit) 8522 8432 { 8523 8433 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 8524 8434 struct btrfs_dio_private *dip = bio->bi_private; ··· 8536 8446 goto err; 8537 8447 } 8538 8448 8539 - if (skip_sum) 8449 + if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) 8540 8450 goto map; 8541 8451 8542 8452 if (write && async_submit) { ··· 8566 8476 return ret; 8567 8477 } 8568 8478 8569 - static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip, 8570 - int skip_sum) 8479 + static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip) 8571 8480 { 8572 8481 struct inode *inode = dip->inode; 8573 8482 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); ··· 8630 8541 */ 8631 8542 atomic_inc(&dip->pending_bios); 8632 8543 8633 - status = __btrfs_submit_dio_bio(bio, inode, file_offset, skip_sum, 8544 + status = __btrfs_submit_dio_bio(bio, inode, file_offset, 8634 8545 async_submit); 8635 8546 if (status) { 8636 8547 bio_put(bio); ··· 8650 8561 } while (submit_len > 0); 8651 8562 8652 8563 submit: 8653 - status = __btrfs_submit_dio_bio(bio, inode, file_offset, skip_sum, 8654 - async_submit); 8564 + status = __btrfs_submit_dio_bio(bio, inode, file_offset, async_submit); 8655 8565 if (!status) 8656 8566 return 0; 8657 8567 ··· 8675 8587 struct btrfs_dio_private *dip = NULL; 8676 8588 struct bio *bio = NULL; 8677 8589 struct btrfs_io_bio *io_bio; 8678 - int skip_sum; 8679 8590 bool write = (bio_op(dio_bio) == REQ_OP_WRITE); 8680 8591 int ret = 0; 8681 - 8682 - skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 8683 8592 8684 8593 bio = btrfs_bio_clone(dio_bio); 8685 8594 ··· 8720 8635 dio_data->unsubmitted_oe_range_end; 8721 8636 } 8722 8637 8723 - ret = btrfs_submit_direct_hook(dip, skip_sum); 8638 + ret = btrfs_submit_direct_hook(dip); 8724 8639 if (!ret) 8725 8640 return; 8726 8641 ··· 8820 8735 return 0; 8821 8736 8822 8737 inode_dio_begin(inode); 8823 - smp_mb__after_atomic(); 8824 8738 8825 8739 /* 8826 8740 * The generic stuff only does filemap_write_and_wait_range, which ··· 9492 9408 ei->reserved_extents = 0; 9493 9409 9494 9410 ei->runtime_flags = 0; 9495 - ei->force_compress = BTRFS_COMPRESS_NONE; 9411 + ei->prop_compress = BTRFS_COMPRESS_NONE; 9412 + ei->defrag_compress = BTRFS_COMPRESS_NONE; 9496 9413 9497 9414 ei->delayed_node = NULL; 9498 9415 ··· 10833 10748 .llseek = generic_file_llseek, 10834 10749 .read = generic_read_dir, 10835 10750 .iterate_shared = btrfs_real_readdir, 10751 + .open = btrfs_opendir, 10836 10752 .unlocked_ioctl = btrfs_ioctl, 10837 10753 #ifdef CONFIG_COMPAT 10838 10754 .compat_ioctl = btrfs_compat_ioctl,

+50 -66

fs/btrfs/ioctl.c

··· 156 156 new_fl); 157 157 } 158 158 159 - /* 160 - * Inherit flags from the parent inode. 161 - * 162 - * Currently only the compression flags and the cow flags are inherited. 163 - */ 164 - void btrfs_inherit_iflags(struct inode *inode, struct inode *dir) 165 - { 166 - unsigned int flags; 167 - 168 - if (!dir) 169 - return; 170 - 171 - flags = BTRFS_I(dir)->flags; 172 - 173 - if (flags & BTRFS_INODE_NOCOMPRESS) { 174 - BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS; 175 - BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; 176 - } else if (flags & BTRFS_INODE_COMPRESS) { 177 - BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS; 178 - BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS; 179 - } 180 - 181 - if (flags & BTRFS_INODE_NODATACOW) { 182 - BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; 183 - if (S_ISREG(inode->i_mode)) 184 - BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; 185 - } 186 - 187 - btrfs_update_iflags(inode); 188 - } 189 - 190 159 static int btrfs_ioctl_getflags(struct file *file, void __user *arg) 191 160 { 192 161 struct btrfs_inode *ip = BTRFS_I(file_inode(file)); ··· 607 638 return ret; 608 639 } 609 640 610 - static void btrfs_wait_for_no_snapshoting_writes(struct btrfs_root *root) 641 + static void btrfs_wait_for_no_snapshotting_writes(struct btrfs_root *root) 611 642 { 612 643 s64 writers; 613 644 DEFINE_WAIT(wait); ··· 650 681 goto free_pending; 651 682 } 652 683 653 - atomic_inc(&root->will_be_snapshoted); 684 + atomic_inc(&root->will_be_snapshotted); 654 685 smp_mb__after_atomic(); 655 - btrfs_wait_for_no_snapshoting_writes(root); 686 + btrfs_wait_for_no_snapshotting_writes(root); 656 687 657 688 ret = btrfs_start_delalloc_inodes(root, 0); 658 689 if (ret) ··· 723 754 fail: 724 755 btrfs_subvolume_release_metadata(fs_info, &pending_snapshot->block_rsv); 725 756 dec_and_free: 726 - if (atomic_dec_and_test(&root->will_be_snapshoted)) 727 - wake_up_atomic_t(&root->will_be_snapshoted); 757 + if (atomic_dec_and_test(&root->will_be_snapshotted)) 758 + wake_up_atomic_t(&root->will_be_snapshotted); 728 759 free_pending: 729 760 kfree(pending_snapshot->root_item); 730 761 btrfs_free_path(pending_snapshot->path); ··· 1255 1286 unsigned long cluster = max_cluster; 1256 1287 u64 new_align = ~((u64)SZ_128K - 1); 1257 1288 struct page **pages = NULL; 1289 + bool do_compress = range->flags & BTRFS_DEFRAG_RANGE_COMPRESS; 1258 1290 1259 1291 if (isize == 0) 1260 1292 return 0; ··· 1263 1293 if (range->start >= isize) 1264 1294 return -EINVAL; 1265 1295 1266 - if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) { 1296 + if (do_compress) { 1267 1297 if (range->compress_type > BTRFS_COMPRESS_TYPES) 1268 1298 return -EINVAL; 1269 1299 if (range->compress_type) ··· 1274 1304 extent_thresh = SZ_256K; 1275 1305 1276 1306 /* 1277 - * if we were not given a file, allocate a readahead 1278 - * context 1307 + * If we were not given a file, allocate a readahead context. As 1308 + * readahead is just an optimization, defrag will work without it so 1309 + * we don't error out. 1279 1310 */ 1280 1311 if (!file) { 1281 - ra = kzalloc(sizeof(*ra), GFP_NOFS); 1282 - if (!ra) 1283 - return -ENOMEM; 1284 - file_ra_state_init(ra, inode->i_mapping); 1312 + ra = kzalloc(sizeof(*ra), GFP_KERNEL); 1313 + if (ra) 1314 + file_ra_state_init(ra, inode->i_mapping); 1285 1315 } else { 1286 1316 ra = &file->f_ra; 1287 1317 } 1288 1318 1289 - pages = kmalloc_array(max_cluster, sizeof(struct page *), 1290 - GFP_NOFS); 1319 + pages = kmalloc_array(max_cluster, sizeof(struct page *), GFP_KERNEL); 1291 1320 if (!pages) { 1292 1321 ret = -ENOMEM; 1293 1322 goto out_ra; ··· 1342 1373 1343 1374 if (!should_defrag_range(inode, (u64)i << PAGE_SHIFT, 1344 1375 extent_thresh, &last_len, &skip, 1345 - &defrag_end, range->flags & 1346 - BTRFS_DEFRAG_RANGE_COMPRESS)) { 1376 + &defrag_end, do_compress)){ 1347 1377 unsigned long next; 1348 1378 /* 1349 1379 * the should_defrag function tells us how much to skip ··· 1363 1395 1364 1396 if (i + cluster > ra_index) { 1365 1397 ra_index = max(i, ra_index); 1366 - btrfs_force_ra(inode->i_mapping, ra, file, ra_index, 1367 - cluster); 1398 + if (ra) 1399 + page_cache_sync_readahead(inode->i_mapping, ra, 1400 + file, ra_index, cluster); 1368 1401 ra_index += cluster; 1369 1402 } 1370 1403 1371 1404 inode_lock(inode); 1372 - if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) 1373 - BTRFS_I(inode)->force_compress = compress_type; 1405 + if (do_compress) 1406 + BTRFS_I(inode)->defrag_compress = compress_type; 1374 1407 ret = cluster_pages_for_defrag(inode, pages, i, cluster); 1375 1408 if (ret < 0) { 1376 1409 inode_unlock(inode); ··· 1418 1449 filemap_flush(inode->i_mapping); 1419 1450 } 1420 1451 1421 - if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { 1452 + if (do_compress) { 1422 1453 /* the filemap_flush will queue IO into the worker threads, but 1423 1454 * we have to make sure the IO is actually started and that 1424 1455 * ordered extents get created before we return ··· 1440 1471 ret = defrag_count; 1441 1472 1442 1473 out_ra: 1443 - if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) { 1474 + if (do_compress) { 1444 1475 inode_lock(inode); 1445 - BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE; 1476 + BTRFS_I(inode)->defrag_compress = BTRFS_COMPRESS_NONE; 1446 1477 inode_unlock(inode); 1447 1478 } 1448 1479 if (!file) ··· 1569 1600 goto out_free; 1570 1601 } 1571 1602 1572 - new_size = div_u64(new_size, fs_info->sectorsize); 1573 - new_size *= fs_info->sectorsize; 1603 + new_size = round_down(new_size, fs_info->sectorsize); 1574 1604 1575 1605 btrfs_info_in_rcu(fs_info, "new size for %s is %llu", 1576 1606 rcu_str_deref(device->name), new_size); ··· 2168 2200 return -EFAULT; 2169 2201 2170 2202 buf_size = args.buf_size; 2171 - 2172 - if (buf_size < sizeof(struct btrfs_ioctl_search_header)) 2173 - return -EOVERFLOW; 2174 2203 2175 2204 /* limit result size to 16MB */ 2176 2205 if (buf_size > buf_limit) ··· 3963 3998 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 3964 3999 struct btrfs_root *root = BTRFS_I(inode)->root; 3965 4000 struct btrfs_trans_handle *trans; 4001 + struct btrfs_file_private *private; 3966 4002 int ret; 4003 + static bool warned = false; 3967 4004 3968 4005 ret = -EPERM; 3969 4006 if (!capable(CAP_SYS_ADMIN)) 3970 4007 goto out; 3971 4008 4009 + if (!warned) { 4010 + btrfs_warn(fs_info, 4011 + "Userspace transaction mechanism is considered " 4012 + "deprecated and slated to be removed in 4.17. " 4013 + "If you have a valid use case please " 4014 + "speak up on the mailing list"); 4015 + WARN_ON(1); 4016 + warned = true; 4017 + } 4018 + 3972 4019 ret = -EINPROGRESS; 3973 - if (file->private_data) 4020 + private = file->private_data; 4021 + if (private && private->trans) 3974 4022 goto out; 4023 + if (!private) { 4024 + private = kzalloc(sizeof(struct btrfs_file_private), 4025 + GFP_KERNEL); 4026 + if (!private) 4027 + return -ENOMEM; 4028 + file->private_data = private; 4029 + } 3975 4030 3976 4031 ret = -EROFS; 3977 4032 if (btrfs_root_readonly(root)) ··· 4008 4023 if (IS_ERR(trans)) 4009 4024 goto out_drop; 4010 4025 4011 - file->private_data = trans; 4026 + private->trans = trans; 4012 4027 return 0; 4013 4028 4014 4029 out_drop: ··· 4263 4278 { 4264 4279 struct inode *inode = file_inode(file); 4265 4280 struct btrfs_root *root = BTRFS_I(inode)->root; 4266 - struct btrfs_trans_handle *trans; 4281 + struct btrfs_file_private *private = file->private_data; 4267 4282 4268 - trans = file->private_data; 4269 - if (!trans) 4283 + if (!private || !private->trans) 4270 4284 return -EINVAL; 4271 - file->private_data = NULL; 4272 4285 4273 - btrfs_end_transaction(trans); 4286 + btrfs_end_transaction(private->trans); 4287 + private->trans = NULL; 4274 4288 4275 4289 atomic_dec(&root->fs_info->open_ioctl_trans); 4276 4290

+31 -9

fs/btrfs/print-tree.c

··· 44 44 static void print_extent_data_ref(struct extent_buffer *eb, 45 45 struct btrfs_extent_data_ref *ref) 46 46 { 47 - pr_info("\t\textent data backref root %llu objectid %llu offset %llu count %u\n", 47 + pr_cont("extent data backref root %llu objectid %llu offset %llu count %u\n", 48 48 btrfs_extent_data_ref_root(eb, ref), 49 49 btrfs_extent_data_ref_objectid(eb, ref), 50 50 btrfs_extent_data_ref_offset(eb, ref), ··· 63 63 u32 item_size = btrfs_item_size_nr(eb, slot); 64 64 u64 flags; 65 65 u64 offset; 66 + int ref_index = 0; 66 67 67 68 if (item_size < sizeof(*ei)) { 68 69 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 ··· 105 104 iref = (struct btrfs_extent_inline_ref *)ptr; 106 105 type = btrfs_extent_inline_ref_type(eb, iref); 107 106 offset = btrfs_extent_inline_ref_offset(eb, iref); 107 + pr_info("\t\tref#%d: ", ref_index++); 108 108 switch (type) { 109 109 case BTRFS_TREE_BLOCK_REF_KEY: 110 - pr_info("\t\ttree block backref root %llu\n", offset); 110 + pr_cont("tree block backref root %llu\n", offset); 111 111 break; 112 112 case BTRFS_SHARED_BLOCK_REF_KEY: 113 - pr_info("\t\tshared block backref parent %llu\n", offset); 113 + pr_cont("shared block backref parent %llu\n", offset); 114 + /* 115 + * offset is supposed to be a tree block which 116 + * must be aligned to nodesize. 117 + */ 118 + if (!IS_ALIGNED(offset, eb->fs_info->nodesize)) 119 + pr_info("\t\t\t(parent %llu is NOT ALIGNED to nodesize %llu)\n", 120 + offset, (unsigned long long)eb->fs_info->nodesize); 114 121 break; 115 122 case BTRFS_EXTENT_DATA_REF_KEY: 116 123 dref = (struct btrfs_extent_data_ref *)(&iref->offset); ··· 126 117 break; 127 118 case BTRFS_SHARED_DATA_REF_KEY: 128 119 sref = (struct btrfs_shared_data_ref *)(iref + 1); 129 - pr_info("\t\tshared data backref parent %llu count %u\n", 120 + pr_cont("shared data backref parent %llu count %u\n", 130 121 offset, btrfs_shared_data_ref_count(eb, sref)); 122 + /* 123 + * offset is supposed to be a tree block which 124 + * must be aligned to nodesize. 125 + */ 126 + if (!IS_ALIGNED(offset, eb->fs_info->nodesize)) 127 + pr_info("\t\t\t(parent %llu is NOT ALIGNED to nodesize %llu)\n", 128 + offset, (unsigned long long)eb->fs_info->nodesize); 131 129 break; 132 130 default: 133 - BUG(); 131 + pr_cont("(extent %llu has INVALID ref type %d)\n", 132 + eb->start, type); 133 + return; 134 134 } 135 135 ptr += btrfs_extent_inline_ref_size(type); 136 136 } ··· 179 161 } 180 162 } 181 163 182 - void btrfs_print_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *l) 164 + void btrfs_print_leaf(struct extent_buffer *l) 183 165 { 166 + struct btrfs_fs_info *fs_info; 184 167 int i; 185 168 u32 type, nr; 186 169 struct btrfs_item *item; ··· 199 180 if (!l) 200 181 return; 201 182 183 + fs_info = l->fs_info; 202 184 nr = btrfs_header_nritems(l); 203 185 204 186 btrfs_info(fs_info, "leaf %llu total ptrs %d free space %d", ··· 338 318 } 339 319 } 340 320 341 - void btrfs_print_tree(struct btrfs_fs_info *fs_info, struct extent_buffer *c) 321 + void btrfs_print_tree(struct extent_buffer *c) 342 322 { 323 + struct btrfs_fs_info *fs_info; 343 324 int i; u32 nr; 344 325 struct btrfs_key key; 345 326 int level; 346 327 347 328 if (!c) 348 329 return; 330 + fs_info = c->fs_info; 349 331 nr = btrfs_header_nritems(c); 350 332 level = btrfs_header_level(c); 351 333 if (level == 0) { 352 - btrfs_print_leaf(fs_info, c); 334 + btrfs_print_leaf(c); 353 335 return; 354 336 } 355 337 btrfs_info(fs_info, ··· 381 359 if (btrfs_header_level(next) != 382 360 level - 1) 383 361 BUG(); 384 - btrfs_print_tree(fs_info, next); 362 + btrfs_print_tree(next); 385 363 free_extent_buffer(next); 386 364 } 387 365 }

+2 -2

fs/btrfs/print-tree.h

··· 18 18 19 19 #ifndef __PRINT_TREE_ 20 20 #define __PRINT_TREE_ 21 - void btrfs_print_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *l); 22 - void btrfs_print_tree(struct btrfs_fs_info *fs_info, struct extent_buffer *c); 21 + void btrfs_print_leaf(struct extent_buffer *l); 22 + void btrfs_print_tree(struct extent_buffer *c); 23 23 #endif

+5 -5

fs/btrfs/props.c

··· 403 403 if (len == 0) { 404 404 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; 405 405 BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS; 406 - BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE; 406 + BTRFS_I(inode)->prop_compress = BTRFS_COMPRESS_NONE; 407 407 408 408 return 0; 409 409 } 410 410 411 - if (!strncmp("lzo", value, len)) 411 + if (!strncmp("lzo", value, 3)) 412 412 type = BTRFS_COMPRESS_LZO; 413 - else if (!strncmp("zlib", value, len)) 413 + else if (!strncmp("zlib", value, 4)) 414 414 type = BTRFS_COMPRESS_ZLIB; 415 415 else 416 416 return -EINVAL; 417 417 418 418 BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS; 419 419 BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS; 420 - BTRFS_I(inode)->force_compress = type; 420 + BTRFS_I(inode)->prop_compress = type; 421 421 422 422 return 0; 423 423 } 424 424 425 425 static const char *prop_compression_extract(struct inode *inode) 426 426 { 427 - switch (BTRFS_I(inode)->force_compress) { 427 + switch (BTRFS_I(inode)->prop_compress) { 428 428 case BTRFS_COMPRESS_ZLIB: 429 429 return "zlib"; 430 430 case BTRFS_COMPRESS_LZO:

+3 -4

fs/btrfs/qgroup.c

··· 946 946 int btrfs_quota_disable(struct btrfs_trans_handle *trans, 947 947 struct btrfs_fs_info *fs_info) 948 948 { 949 - struct btrfs_root *tree_root = fs_info->tree_root; 950 949 struct btrfs_root *quota_root; 951 950 int ret = 0; 952 951 ··· 967 968 if (ret) 968 969 goto out; 969 970 970 - ret = btrfs_del_root(trans, tree_root, &quota_root->root_key); 971 + ret = btrfs_del_root(trans, fs_info, &quota_root->root_key); 971 972 if (ret) 972 973 goto out; 973 974 ··· 1602 1603 struct extent_buffer *eb = root_eb; 1603 1604 struct btrfs_path *path = NULL; 1604 1605 1605 - BUG_ON(root_level < 0 || root_level > BTRFS_MAX_LEVEL); 1606 + BUG_ON(root_level < 0 || root_level >= BTRFS_MAX_LEVEL); 1606 1607 BUG_ON(root_eb == NULL); 1607 1608 1608 1609 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) ··· 2645 2646 if (IS_ERR(trans)) { 2646 2647 err = PTR_ERR(trans); 2647 2648 btrfs_err(fs_info, 2648 - "fail to start transaction for status update: %d\n", 2649 + "fail to start transaction for status update: %d", 2649 2650 err); 2650 2651 goto done; 2651 2652 }

+26 -6

fs/btrfs/relocation.c

··· 32 32 #include "free-space-cache.h" 33 33 #include "inode-map.h" 34 34 #include "qgroup.h" 35 + #include "print-tree.h" 35 36 36 37 /* 37 38 * backref_node, mapping_node and tree_block start with this ··· 800 799 if (ptr < end) { 801 800 /* update key for inline back ref */ 802 801 struct btrfs_extent_inline_ref *iref; 802 + int type; 803 803 iref = (struct btrfs_extent_inline_ref *)ptr; 804 - key.type = btrfs_extent_inline_ref_type(eb, iref); 804 + type = btrfs_get_extent_inline_ref_type(eb, iref, 805 + BTRFS_REF_TYPE_BLOCK); 806 + if (type == BTRFS_REF_TYPE_INVALID) { 807 + err = -EINVAL; 808 + goto out; 809 + } 810 + key.type = type; 805 811 key.offset = btrfs_extent_inline_ref_offset(eb, iref); 812 + 806 813 WARN_ON(key.type != BTRFS_TREE_BLOCK_REF_KEY && 807 814 key.type != BTRFS_SHARED_BLOCK_REF_KEY); 808 815 } ··· 1317 1308 btrfs_panic(fs_info, -EEXIST, 1318 1309 "Duplicate root found for start=%llu while inserting into relocation tree", 1319 1310 node->bytenr); 1320 - kfree(node); 1321 - return -EEXIST; 1322 1311 } 1323 1312 1324 1313 list_add_tail(&root->root_list, &rc->reloc_roots); ··· 3484 3477 goto again; 3485 3478 } 3486 3479 } 3487 - BUG_ON(ret); 3480 + if (ret) { 3481 + ASSERT(ret == 1); 3482 + btrfs_print_leaf(path->nodes[0]); 3483 + btrfs_err(fs_info, 3484 + "tree block extent item (%llu) is not found in extent tree", 3485 + bytenr); 3486 + WARN_ON(1); 3487 + ret = -EINVAL; 3488 + goto out; 3489 + } 3488 3490 3489 3491 ret = add_tree_block(rc, &key, path, blocks); 3490 3492 out: ··· 3771 3755 3772 3756 while (ptr < end) { 3773 3757 iref = (struct btrfs_extent_inline_ref *)ptr; 3774 - key.type = btrfs_extent_inline_ref_type(eb, iref); 3758 + key.type = btrfs_get_extent_inline_ref_type(eb, iref, 3759 + BTRFS_REF_TYPE_DATA); 3775 3760 if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 3776 3761 key.offset = btrfs_extent_inline_ref_offset(eb, iref); 3777 3762 ret = __add_tree_block(rc, key.offset, blocksize, ··· 3782 3765 ret = find_data_references(rc, extent_key, 3783 3766 eb, dref, blocks); 3784 3767 } else { 3785 - BUG(); 3768 + ret = -EINVAL; 3769 + btrfs_err(rc->extent_root->fs_info, 3770 + "extent %llu slot %d has an invalid inline ref type", 3771 + eb->start, path->slots[0]); 3786 3772 } 3787 3773 if (ret) { 3788 3774 err = ret;

+5 -4

fs/btrfs/root-tree.c

··· 151 151 } 152 152 153 153 if (ret != 0) { 154 - btrfs_print_leaf(fs_info, path->nodes[0]); 154 + btrfs_print_leaf(path->nodes[0]); 155 155 btrfs_crit(fs_info, "unable to update root key %llu %u %llu", 156 156 key->objectid, key->type, key->offset); 157 157 BUG_ON(1); ··· 335 335 return err; 336 336 } 337 337 338 - /* drop the root item for 'key' from 'root' */ 339 - int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, 340 - const struct btrfs_key *key) 338 + /* drop the root item for 'key' from the tree root */ 339 + int btrfs_del_root(struct btrfs_trans_handle *trans, 340 + struct btrfs_fs_info *fs_info, const struct btrfs_key *key) 341 341 { 342 + struct btrfs_root *root = fs_info->tree_root; 342 343 struct btrfs_path *path; 343 344 int ret; 344 345

+22 -25

fs/btrfs/scrub.c

··· 182 182 struct scrub_bio *wr_curr_bio; 183 183 struct mutex wr_lock; 184 184 int pages_per_wr_bio; /* <= SCRUB_PAGES_PER_WR_BIO */ 185 - atomic_t flush_all_writes; 186 185 struct btrfs_device *wr_tgtdev; 186 + bool flush_all_writes; 187 187 188 188 /* 189 189 * statistics ··· 717 717 WARN_ON(!fs_info->dev_replace.tgtdev); 718 718 sctx->pages_per_wr_bio = SCRUB_PAGES_PER_WR_BIO; 719 719 sctx->wr_tgtdev = fs_info->dev_replace.tgtdev; 720 - atomic_set(&sctx->flush_all_writes, 0); 720 + sctx->flush_all_writes = false; 721 721 } 722 722 723 723 return sctx; ··· 1704 1704 if (ret) 1705 1705 return ret; 1706 1706 1707 - wait_for_completion(&done.event); 1707 + wait_for_completion_io(&done.event); 1708 1708 if (done.status) 1709 1709 return -EIO; 1710 1710 ··· 1769 1769 struct btrfs_fs_devices *fs_devices = spage->dev->fs_devices; 1770 1770 int ret; 1771 1771 1772 - ret = memcmp(fsid, fs_devices->fsid, BTRFS_UUID_SIZE); 1772 + ret = memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE); 1773 1773 return !ret; 1774 1774 } 1775 1775 ··· 2402 2402 2403 2403 scrub_block_put(sblock); 2404 2404 2405 - if (sctx->is_dev_replace && 2406 - atomic_read(&sctx->flush_all_writes)) { 2405 + if (sctx->is_dev_replace && sctx->flush_all_writes) { 2407 2406 mutex_lock(&sctx->wr_lock); 2408 2407 scrub_wr_submit(sctx); 2409 2408 mutex_unlock(&sctx->wr_lock); ··· 2606 2607 sctx->first_free = sbio->index; 2607 2608 spin_unlock(&sctx->list_lock); 2608 2609 2609 - if (sctx->is_dev_replace && 2610 - atomic_read(&sctx->flush_all_writes)) { 2610 + if (sctx->is_dev_replace && sctx->flush_all_writes) { 2611 2611 mutex_lock(&sctx->wr_lock); 2612 2612 scrub_wr_submit(sctx); 2613 2613 mutex_unlock(&sctx->wr_lock); ··· 2620 2622 u64 start, u64 len) 2621 2623 { 2622 2624 u64 offset; 2623 - int nsectors; 2625 + u64 nsectors64; 2626 + u32 nsectors; 2624 2627 int sectorsize = sparity->sctx->fs_info->sectorsize; 2625 2628 2626 2629 if (len >= sparity->stripe_len) { ··· 2632 2633 start -= sparity->logic_start; 2633 2634 start = div64_u64_rem(start, sparity->stripe_len, &offset); 2634 2635 offset = div_u64(offset, sectorsize); 2635 - nsectors = (int)len / sectorsize; 2636 + nsectors64 = div_u64(len, sectorsize); 2637 + 2638 + ASSERT(nsectors64 < UINT_MAX); 2639 + nsectors = (u32)nsectors64; 2636 2640 2637 2641 if (offset + nsectors <= sparity->nsectors) { 2638 2642 bitmap_set(bitmap, offset, nsectors); ··· 2708 2706 if (!sum) 2709 2707 return 0; 2710 2708 2711 - index = ((u32)(logical - sum->bytenr)) / sctx->fs_info->sectorsize; 2709 + index = div_u64(logical - sum->bytenr, sctx->fs_info->sectorsize); 2710 + ASSERT(index < UINT_MAX); 2711 + 2712 2712 num_sectors = sum->len / sctx->fs_info->sectorsize; 2713 2713 memcpy(csum, sum->sums + index, sctx->csum_size); 2714 2714 if (index == num_sectors - 1) { ··· 3444 3440 */ 3445 3441 if (atomic_read(&fs_info->scrub_pause_req)) { 3446 3442 /* push queued extents */ 3447 - atomic_set(&sctx->flush_all_writes, 1); 3443 + sctx->flush_all_writes = true; 3448 3444 scrub_submit(sctx); 3449 3445 mutex_lock(&sctx->wr_lock); 3450 3446 scrub_wr_submit(sctx); 3451 3447 mutex_unlock(&sctx->wr_lock); 3452 3448 wait_event(sctx->list_wait, 3453 3449 atomic_read(&sctx->bios_in_flight) == 0); 3454 - atomic_set(&sctx->flush_all_writes, 0); 3450 + sctx->flush_all_writes = false; 3455 3451 scrub_blocked_if_needed(fs_info); 3456 3452 } 3457 3453 ··· 3873 3869 ro_set = 0; 3874 3870 } else { 3875 3871 btrfs_warn(fs_info, 3876 - "failed setting block group ro, ret=%d\n", 3877 - ret); 3872 + "failed setting block group ro: %d", ret); 3878 3873 btrfs_put_block_group(cache); 3879 3874 break; 3880 3875 } ··· 3896 3893 * write requests are really completed when bios_in_flight 3897 3894 * changes to 0. 3898 3895 */ 3899 - atomic_set(&sctx->flush_all_writes, 1); 3896 + sctx->flush_all_writes = true; 3900 3897 scrub_submit(sctx); 3901 3898 mutex_lock(&sctx->wr_lock); 3902 3899 scrub_wr_submit(sctx); ··· 3914 3911 */ 3915 3912 wait_event(sctx->list_wait, 3916 3913 atomic_read(&sctx->workers_pending) == 0); 3917 - atomic_set(&sctx->flush_all_writes, 0); 3914 + sctx->flush_all_writes = false; 3918 3915 3919 3916 scrub_pause_off(fs_info); 3920 3917 ··· 4015 4012 int max_active = fs_info->thread_pool_size; 4016 4013 4017 4014 if (fs_info->scrub_workers_refcnt == 0) { 4018 - if (is_dev_replace) 4019 - fs_info->scrub_workers = 4020 - btrfs_alloc_workqueue(fs_info, "scrub", flags, 4021 - 1, 4); 4022 - else 4023 - fs_info->scrub_workers = 4024 - btrfs_alloc_workqueue(fs_info, "scrub", flags, 4025 - max_active, 4); 4015 + fs_info->scrub_workers = btrfs_alloc_workqueue(fs_info, "scrub", 4016 + flags, is_dev_replace ? 1 : max_active, 4); 4026 4017 if (!fs_info->scrub_workers) 4027 4018 goto fail_scrub_workers; 4028 4019

+20 -1

fs/btrfs/send.c

··· 4733 4733 /* initial readahead */ 4734 4734 memset(&sctx->ra, 0, sizeof(struct file_ra_state)); 4735 4735 file_ra_state_init(&sctx->ra, inode->i_mapping); 4736 - btrfs_force_ra(inode->i_mapping, &sctx->ra, NULL, index, 4736 + page_cache_sync_readahead(inode->i_mapping, &sctx->ra, NULL, index, 4737 4737 last_index - index + 1); 4738 4738 4739 4739 while (index <= last_index) { ··· 4991 4991 struct btrfs_path *path; 4992 4992 struct btrfs_key key; 4993 4993 int ret; 4994 + 4995 + /* 4996 + * Prevent cloning from a zero offset with a length matching the sector 4997 + * size because in some scenarios this will make the receiver fail. 4998 + * 4999 + * For example, if in the source filesystem the extent at offset 0 5000 + * has a length of sectorsize and it was written using direct IO, then 5001 + * it can never be an inline extent (even if compression is enabled). 5002 + * Then this extent can be cloned in the original filesystem to a non 5003 + * zero file offset, but it may not be possible to clone in the 5004 + * destination filesystem because it can be inlined due to compression 5005 + * on the destination filesystem (as the receiver's write operations are 5006 + * always done using buffered IO). The same happens when the original 5007 + * filesystem does not have compression enabled but the destination 5008 + * filesystem has. 5009 + */ 5010 + if (clone_root->offset == 0 && 5011 + len == sctx->send_root->fs_info->sectorsize) 5012 + return send_extent_data(sctx, offset, len); 4994 5013 4995 5014 path = alloc_path_for_send(); 4996 5015 if (!path)

+5 -4

fs/btrfs/struct-funcs.c

··· 50 50 */ 51 51 52 52 #define DEFINE_BTRFS_SETGET_BITS(bits) \ 53 - u##bits btrfs_get_token_##bits(struct extent_buffer *eb, void *ptr, \ 54 - unsigned long off, \ 53 + u##bits btrfs_get_token_##bits(const struct extent_buffer *eb, \ 54 + const void *ptr, unsigned long off, \ 55 55 struct btrfs_map_token *token) \ 56 56 { \ 57 57 unsigned long part_offset = (unsigned long)ptr; \ ··· 90 90 return res; \ 91 91 } \ 92 92 void btrfs_set_token_##bits(struct extent_buffer *eb, \ 93 - void *ptr, unsigned long off, u##bits val, \ 93 + const void *ptr, unsigned long off, \ 94 + u##bits val, \ 94 95 struct btrfs_map_token *token) \ 95 96 { \ 96 97 unsigned long part_offset = (unsigned long)ptr; \ ··· 134 133 DEFINE_BTRFS_SETGET_BITS(32) 135 134 DEFINE_BTRFS_SETGET_BITS(64) 136 135 137 - void btrfs_node_key(struct extent_buffer *eb, 136 + void btrfs_node_key(const struct extent_buffer *eb, 138 137 struct btrfs_disk_key *disk_key, int nr) 139 138 { 140 139 unsigned long ptr = btrfs_node_key_ptr_offset(nr);

+23 -18

fs/btrfs/super.c

··· 61 61 #include "tests/btrfs-tests.h" 62 62 63 63 #include "qgroup.h" 64 + #include "backref.h" 64 65 #define CREATE_TRACE_POINTS 65 66 #include <trace/events/btrfs.h> 66 67 ··· 426 425 * strsep changes the string, duplicate it because parse_options 427 426 * gets called twice 428 427 */ 429 - options = kstrdup(options, GFP_NOFS); 428 + options = kstrdup(options, GFP_KERNEL); 430 429 if (!options) 431 430 return -ENOMEM; 432 431 ··· 499 498 btrfs_test_opt(info, FORCE_COMPRESS); 500 499 if (token == Opt_compress || 501 500 token == Opt_compress_force || 502 - strcmp(args[0].from, "zlib") == 0) { 501 + strncmp(args[0].from, "zlib", 4) == 0) { 503 502 compress_type = "zlib"; 504 503 info->compress_type = BTRFS_COMPRESS_ZLIB; 505 504 btrfs_set_opt(info->mount_opt, COMPRESS); 506 505 btrfs_clear_opt(info->mount_opt, NODATACOW); 507 506 btrfs_clear_opt(info->mount_opt, NODATASUM); 508 507 no_compress = 0; 509 - } else if (strcmp(args[0].from, "lzo") == 0) { 508 + } else if (strncmp(args[0].from, "lzo", 3) == 0) { 510 509 compress_type = "lzo"; 511 510 info->compress_type = BTRFS_COMPRESS_LZO; 512 511 btrfs_set_opt(info->mount_opt, COMPRESS); ··· 549 548 break; 550 549 case Opt_ssd: 551 550 btrfs_set_and_info(info, SSD, 552 - "use ssd allocation scheme"); 551 + "enabling ssd optimizations"); 553 552 btrfs_clear_opt(info->mount_opt, NOSSD); 554 553 break; 555 554 case Opt_ssd_spread: 555 + btrfs_set_and_info(info, SSD, 556 + "enabling ssd optimizations"); 556 557 btrfs_set_and_info(info, SSD_SPREAD, 557 - "use spread ssd allocation scheme"); 558 - btrfs_set_opt(info->mount_opt, SSD); 558 + "using spread ssd allocation scheme"); 559 559 btrfs_clear_opt(info->mount_opt, NOSSD); 560 560 break; 561 561 case Opt_nossd: 562 - btrfs_set_and_info(info, NOSSD, 563 - "not using ssd allocation scheme"); 564 - btrfs_clear_opt(info->mount_opt, SSD); 565 - btrfs_clear_opt(info->mount_opt, SSD_SPREAD); 562 + btrfs_set_opt(info->mount_opt, NOSSD); 563 + btrfs_clear_and_info(info, SSD, 564 + "not using ssd optimizations"); 565 + btrfs_clear_and_info(info, SSD_SPREAD, 566 + "not using spread ssd allocation scheme"); 566 567 break; 567 568 case Opt_barrier: 568 569 btrfs_clear_and_info(info, NOBARRIER, ··· 952 949 } 953 950 path->leave_spinning = 1; 954 951 955 - name = kmalloc(PATH_MAX, GFP_NOFS); 952 + name = kmalloc(PATH_MAX, GFP_KERNEL); 956 953 if (!name) { 957 954 ret = -ENOMEM; 958 955 goto err; ··· 1338 1335 char *buf, *dst, *sep; 1339 1336 1340 1337 if (!args) 1341 - return kstrdup("subvolid=0", GFP_NOFS); 1338 + return kstrdup("subvolid=0", GFP_KERNEL); 1342 1339 1343 1340 /* The worst case is that we add ",subvolid=0" to the end. */ 1344 - buf = dst = kmalloc(strlen(args) + strlen(",subvolid=0") + 1, GFP_NOFS); 1341 + buf = dst = kmalloc(strlen(args) + strlen(",subvolid=0") + 1, 1342 + GFP_KERNEL); 1345 1343 if (!buf) 1346 1344 return NULL; 1347 1345 ··· 1571 1567 * it for searching for existing supers, so this lets us do that and 1572 1568 * then open_ctree will properly initialize everything later. 1573 1569 */ 1574 - fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_NOFS); 1570 + fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_KERNEL); 1575 1571 if (!fs_info) { 1576 1572 error = -ENOMEM; 1577 1573 goto error_sec_opts; ··· 1579 1575 1580 1576 fs_info->fs_devices = fs_devices; 1581 1577 1582 - fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS); 1583 - fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS); 1578 + fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL); 1579 + fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL); 1584 1580 security_init_mnt_opts(&fs_info->security_opts); 1585 1581 if (!fs_info->super_copy || !fs_info->super_for_commit) { 1586 1582 error = -ENOMEM; ··· 1784 1780 goto restore; 1785 1781 } 1786 1782 1787 - if (fs_info->fs_devices->missing_devices > 1788 - fs_info->num_tolerated_disk_barrier_failures) { 1783 + if (!btrfs_check_rw_degradable(fs_info)) { 1789 1784 btrfs_warn(fs_info, 1790 1785 "too many missing devices, writeable remount is not allowed"); 1791 1786 ret = -EACCES; ··· 1816 1813 btrfs_warn(fs_info, "failed to resume dev_replace"); 1817 1814 goto restore; 1818 1815 } 1816 + 1817 + btrfs_qgroup_rescan_resume(fs_info); 1819 1818 1820 1819 if (!fs_info->uuid_root) { 1821 1820 btrfs_info(fs_info, "creating UUID tree");

-1

fs/btrfs/tests/btrfs-tests.c

··· 211 211 cache->key.objectid = 0; 212 212 cache->key.offset = length; 213 213 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; 214 - cache->sectorsize = fs_info->sectorsize; 215 214 cache->full_stripe_len = fs_info->sectorsize; 216 215 cache->fs_info = fs_info; 217 216

+1 -1

fs/btrfs/tests/free-space-tree-tests.c

··· 81 81 i++; 82 82 } 83 83 prev_bit = bit; 84 - offset += cache->sectorsize; 84 + offset += fs_info->sectorsize; 85 85 } 86 86 } 87 87 if (prev_bit == 1) {

+5 -4

fs/btrfs/tree-log.c

··· 1143 1143 goto again; 1144 1144 } 1145 1145 kfree(victim_name); 1146 - if (ret) 1147 - return ret; 1148 1146 next: 1149 1147 cur_offset += victim_name_len + sizeof(*extref); 1150 1148 } ··· 3688 3690 3689 3691 src_offset = btrfs_item_ptr_offset(src, start_slot + i); 3690 3692 3691 - if ((i == (nr - 1))) 3693 + if (i == nr - 1) 3692 3694 last_key = ins_keys[i]; 3693 3695 3694 3696 if (ins_keys[i].type == BTRFS_INODE_ITEM_KEY) { ··· 4448 4450 len = btrfs_file_extent_inline_len(leaf, 4449 4451 path->slots[0], 4450 4452 extent); 4451 - ASSERT(len == i_size); 4453 + ASSERT(len == i_size || 4454 + (len == fs_info->sectorsize && 4455 + btrfs_file_extent_compression(leaf, extent) != 4456 + BTRFS_COMPRESS_NONE)); 4452 4457 return 0; 4453 4458 } 4454 4459

+149 -147

fs/btrfs/volumes.c

··· 152 152 return &fs_uuids; 153 153 } 154 154 155 - static struct btrfs_fs_devices *__alloc_fs_devices(void) 155 + /* 156 + * alloc_fs_devices - allocate struct btrfs_fs_devices 157 + * @fsid: if not NULL, copy the uuid to fs_devices::fsid 158 + * 159 + * Return a pointer to a new struct btrfs_fs_devices on success, or ERR_PTR(). 160 + * The returned struct is not linked onto any lists and can be destroyed with 161 + * kfree() right away. 162 + */ 163 + static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid) 156 164 { 157 165 struct btrfs_fs_devices *fs_devs; 158 166 ··· 174 166 INIT_LIST_HEAD(&fs_devs->resized_devices); 175 167 INIT_LIST_HEAD(&fs_devs->alloc_list); 176 168 INIT_LIST_HEAD(&fs_devs->list); 177 - 178 - return fs_devs; 179 - } 180 - 181 - /** 182 - * alloc_fs_devices - allocate struct btrfs_fs_devices 183 - * @fsid: a pointer to UUID for this FS. If NULL a new UUID is 184 - * generated. 185 - * 186 - * Return: a pointer to a new &struct btrfs_fs_devices on success; 187 - * ERR_PTR() on error. Returned struct is not linked onto any lists and 188 - * can be destroyed with kfree() right away. 189 - */ 190 - static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid) 191 - { 192 - struct btrfs_fs_devices *fs_devs; 193 - 194 - fs_devs = __alloc_fs_devices(); 195 - if (IS_ERR(fs_devs)) 196 - return fs_devs; 197 - 198 169 if (fsid) 199 170 memcpy(fs_devs->fsid, fsid, BTRFS_FSID_SIZE); 200 - else 201 - generate_random_uuid(fs_devs->fsid); 202 171 203 172 return fs_devs; 204 173 } ··· 254 269 return dev; 255 270 } 256 271 257 - static noinline struct btrfs_device *__find_device(struct list_head *head, 258 - u64 devid, u8 *uuid) 272 + /* 273 + * Find a device specified by @devid or @uuid in the list of @fs_devices, or 274 + * return NULL. 275 + * 276 + * If devid and uuid are both specified, the match must be exact, otherwise 277 + * only devid is used. 278 + */ 279 + static struct btrfs_device *find_device(struct btrfs_fs_devices *fs_devices, 280 + u64 devid, const u8 *uuid) 259 281 { 282 + struct list_head *head = &fs_devices->devices; 260 283 struct btrfs_device *dev; 261 284 262 285 list_for_each_entry(dev, head, dev_list) { ··· 303 310 304 311 if (flush) 305 312 filemap_write_and_wait((*bdev)->bd_inode->i_mapping); 306 - ret = set_blocksize(*bdev, 4096); 313 + ret = set_blocksize(*bdev, BTRFS_BDEV_BLOCKSIZE); 307 314 if (ret) { 308 315 blkdev_put(*bdev, flags); 309 316 goto error; ··· 629 636 630 637 device = NULL; 631 638 } else { 632 - device = __find_device(&fs_devices->devices, devid, 633 - disk_super->dev_item.uuid); 639 + device = find_device(fs_devices, devid, 640 + disk_super->dev_item.uuid); 634 641 } 635 642 636 643 if (!device) { ··· 1571 1578 1572 1579 static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, 1573 1580 struct btrfs_device *device, 1574 - u64 chunk_tree, u64 chunk_objectid, 1575 1581 u64 chunk_offset, u64 start, u64 num_bytes) 1576 1582 { 1577 1583 int ret; ··· 1598 1606 leaf = path->nodes[0]; 1599 1607 extent = btrfs_item_ptr(leaf, path->slots[0], 1600 1608 struct btrfs_dev_extent); 1601 - btrfs_set_dev_extent_chunk_tree(leaf, extent, chunk_tree); 1602 - btrfs_set_dev_extent_chunk_objectid(leaf, extent, chunk_objectid); 1609 + btrfs_set_dev_extent_chunk_tree(leaf, extent, 1610 + BTRFS_CHUNK_TREE_OBJECTID); 1611 + btrfs_set_dev_extent_chunk_objectid(leaf, extent, 1612 + BTRFS_FIRST_CHUNK_TREE_OBJECTID); 1603 1613 btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset); 1604 - 1605 - write_extent_buffer_chunk_tree_uuid(leaf, fs_info->chunk_tree_uuid); 1606 1614 1607 1615 btrfs_set_dev_extent_length(leaf, extent, num_bytes); 1608 1616 btrfs_mark_buffer_dirty(leaf); ··· 1718 1726 ptr = btrfs_device_uuid(dev_item); 1719 1727 write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); 1720 1728 ptr = btrfs_device_fsid(dev_item); 1721 - write_extent_buffer(leaf, fs_info->fsid, ptr, BTRFS_UUID_SIZE); 1729 + write_extent_buffer(leaf, fs_info->fsid, ptr, BTRFS_FSID_SIZE); 1722 1730 btrfs_mark_buffer_dirty(leaf); 1723 1731 1724 1732 ret = 0; ··· 1864 1872 struct btrfs_fs_devices *cur_devices; 1865 1873 u64 num_devices; 1866 1874 int ret = 0; 1867 - bool clear_super = false; 1868 1875 1869 1876 mutex_lock(&uuid_mutex); 1870 1877 ··· 1899 1908 list_del_init(&device->dev_alloc_list); 1900 1909 device->fs_devices->rw_devices--; 1901 1910 mutex_unlock(&fs_info->chunk_mutex); 1902 - clear_super = true; 1903 1911 } 1904 1912 1905 1913 mutex_unlock(&uuid_mutex); ··· 1976 1986 __btrfs_close_devices(cur_devices); 1977 1987 free_fs_devices(cur_devices); 1978 1988 } 1979 - 1980 - fs_info->num_tolerated_disk_barrier_failures = 1981 - btrfs_calc_num_tolerated_disk_barrier_failures(fs_info); 1982 1989 1983 1990 out: 1984 1991 mutex_unlock(&uuid_mutex); ··· 2189 2202 if (!fs_devices->seeding) 2190 2203 return -EINVAL; 2191 2204 2192 - seed_devices = __alloc_fs_devices(); 2205 + seed_devices = alloc_fs_devices(NULL); 2193 2206 if (IS_ERR(seed_devices)) 2194 2207 return PTR_ERR(seed_devices); 2195 2208 ··· 2248 2261 struct btrfs_dev_item *dev_item; 2249 2262 struct btrfs_device *device; 2250 2263 struct btrfs_key key; 2251 - u8 fs_uuid[BTRFS_UUID_SIZE]; 2264 + u8 fs_uuid[BTRFS_FSID_SIZE]; 2252 2265 u8 dev_uuid[BTRFS_UUID_SIZE]; 2253 2266 u64 devid; 2254 2267 int ret; ··· 2291 2304 read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item), 2292 2305 BTRFS_UUID_SIZE); 2293 2306 read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item), 2294 - BTRFS_UUID_SIZE); 2307 + BTRFS_FSID_SIZE); 2295 2308 device = btrfs_find_device(fs_info, devid, dev_uuid, fs_uuid); 2296 2309 BUG_ON(!device); /* Logic error */ 2297 2310 ··· 2394 2407 device->is_tgtdev_for_dev_replace = 0; 2395 2408 device->mode = FMODE_EXCL; 2396 2409 device->dev_stats_valid = 1; 2397 - set_blocksize(device->bdev, 4096); 2410 + set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE); 2398 2411 2399 2412 if (seeding_dev) { 2400 2413 sb->s_flags &= ~MS_RDONLY; ··· 2474 2487 "sysfs: failed to create fsid for sprout"); 2475 2488 } 2476 2489 2477 - fs_info->num_tolerated_disk_barrier_failures = 2478 - btrfs_calc_num_tolerated_disk_barrier_failures(fs_info); 2479 2490 ret = btrfs_commit_transaction(trans); 2480 2491 2481 2492 if (seeding_dev) { ··· 2597 2612 device->is_tgtdev_for_dev_replace = 1; 2598 2613 device->mode = FMODE_EXCL; 2599 2614 device->dev_stats_valid = 1; 2600 - set_blocksize(device->bdev, 4096); 2615 + set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE); 2601 2616 device->fs_devices = fs_info->fs_devices; 2602 2617 list_add(&device->dev_list, &fs_info->fs_devices->devices); 2603 2618 fs_info->fs_devices->num_devices++; ··· 2713 2728 } 2714 2729 2715 2730 static int btrfs_free_chunk(struct btrfs_trans_handle *trans, 2716 - struct btrfs_fs_info *fs_info, u64 chunk_objectid, 2717 - u64 chunk_offset) 2731 + struct btrfs_fs_info *fs_info, u64 chunk_offset) 2718 2732 { 2719 2733 struct btrfs_root *root = fs_info->chunk_root; 2720 2734 int ret; ··· 2724 2740 if (!path) 2725 2741 return -ENOMEM; 2726 2742 2727 - key.objectid = chunk_objectid; 2743 + key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; 2728 2744 key.offset = chunk_offset; 2729 2745 key.type = BTRFS_CHUNK_ITEM_KEY; 2730 2746 ··· 2747 2763 return ret; 2748 2764 } 2749 2765 2750 - static int btrfs_del_sys_chunk(struct btrfs_fs_info *fs_info, 2751 - u64 chunk_objectid, u64 chunk_offset) 2766 + static int btrfs_del_sys_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset) 2752 2767 { 2753 2768 struct btrfs_super_block *super_copy = fs_info->super_copy; 2754 2769 struct btrfs_disk_key *disk_key; ··· 2780 2797 ret = -EIO; 2781 2798 break; 2782 2799 } 2783 - if (key.objectid == chunk_objectid && 2800 + if (key.objectid == BTRFS_FIRST_CHUNK_TREE_OBJECTID && 2784 2801 key.offset == chunk_offset) { 2785 2802 memmove(ptr, ptr + len, array_size - (cur + len)); 2786 2803 array_size -= len; ··· 2829 2846 struct extent_map *em; 2830 2847 struct map_lookup *map; 2831 2848 u64 dev_extent_len = 0; 2832 - u64 chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; 2833 2849 int i, ret = 0; 2834 2850 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; 2835 2851 ··· 2884 2902 } 2885 2903 mutex_unlock(&fs_devices->device_list_mutex); 2886 2904 2887 - ret = btrfs_free_chunk(trans, fs_info, chunk_objectid, chunk_offset); 2905 + ret = btrfs_free_chunk(trans, fs_info, chunk_offset); 2888 2906 if (ret) { 2889 2907 btrfs_abort_transaction(trans, ret); 2890 2908 goto out; ··· 2893 2911 trace_btrfs_chunk_free(fs_info, map, chunk_offset, em->len); 2894 2912 2895 2913 if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { 2896 - ret = btrfs_del_sys_chunk(fs_info, chunk_objectid, 2897 - chunk_offset); 2914 + ret = btrfs_del_sys_chunk(fs_info, chunk_offset); 2898 2915 if (ret) { 2899 2916 btrfs_abort_transaction(trans, ret); 2900 2917 goto out; ··· 3293 3312 /* [pstart, pend) */ 3294 3313 static int chunk_drange_filter(struct extent_buffer *leaf, 3295 3314 struct btrfs_chunk *chunk, 3296 - u64 chunk_offset, 3297 3315 struct btrfs_balance_args *bargs) 3298 3316 { 3299 3317 struct btrfs_stripe *stripe; ··· 3419 3439 3420 3440 /* drange filter, makes sense only with devid filter */ 3421 3441 if ((bargs->flags & BTRFS_BALANCE_ARGS_DRANGE) && 3422 - chunk_drange_filter(leaf, chunk, chunk_offset, bargs)) { 3442 + chunk_drange_filter(leaf, chunk, bargs)) { 3423 3443 return 0; 3424 3444 } 3425 3445 ··· 3878 3898 meta_target, data_target); 3879 3899 } 3880 3900 3881 - if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) { 3882 - fs_info->num_tolerated_disk_barrier_failures = min( 3883 - btrfs_calc_num_tolerated_disk_barrier_failures(fs_info), 3884 - btrfs_get_num_tolerated_disk_barrier_failures( 3885 - bctl->sys.target)); 3886 - } 3887 - 3888 3901 ret = insert_balance_item(fs_info, bctl); 3889 3902 if (ret && ret != -EEXIST) 3890 3903 goto out; ··· 3899 3926 3900 3927 mutex_lock(&fs_info->balance_mutex); 3901 3928 atomic_dec(&fs_info->balance_running); 3902 - 3903 - if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) { 3904 - fs_info->num_tolerated_disk_barrier_failures = 3905 - btrfs_calc_num_tolerated_disk_barrier_failures(fs_info); 3906 - } 3907 3929 3908 3930 if (bargs) { 3909 3931 memset(bargs, 0, sizeof(*bargs)); ··· 4095 4127 struct btrfs_fs_info *fs_info = data; 4096 4128 struct btrfs_root *root = fs_info->tree_root; 4097 4129 struct btrfs_key key; 4098 - struct btrfs_key max_key; 4099 4130 struct btrfs_path *path = NULL; 4100 4131 int ret = 0; 4101 4132 struct extent_buffer *eb; ··· 4112 4145 key.objectid = 0; 4113 4146 key.type = BTRFS_ROOT_ITEM_KEY; 4114 4147 key.offset = 0; 4115 - 4116 - max_key.objectid = (u64)-1; 4117 - max_key.type = BTRFS_ROOT_ITEM_KEY; 4118 - max_key.offset = (u64)-1; 4119 4148 4120 4149 while (1) { 4121 4150 ret = btrfs_search_forward(root, &key, path, 0); ··· 4564 4601 return 0; 4565 4602 } 4566 4603 4567 - static u32 find_raid56_stripe_len(u32 data_devices, u32 dev_stripe_target) 4568 - { 4569 - /* TODO allow them to set a preferred stripe size */ 4570 - return SZ_64K; 4571 - } 4572 - 4573 4604 static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type) 4574 4605 { 4575 4606 if (!(type & BTRFS_BLOCK_GROUP_RAID56_MASK)) ··· 4586 4629 { 4587 4630 struct btrfs_fs_info *info = trans->fs_info; 4588 4631 struct btrfs_fs_devices *fs_devices = info->fs_devices; 4589 - struct list_head *cur; 4632 + struct btrfs_device *device; 4590 4633 struct map_lookup *map = NULL; 4591 4634 struct extent_map_tree *em_tree; 4592 4635 struct extent_map *em; ··· 4606 4649 u64 max_chunk_size; 4607 4650 u64 stripe_size; 4608 4651 u64 num_bytes; 4609 - u64 raid_stripe_len = BTRFS_STRIPE_LEN; 4610 4652 int ndevs; 4611 4653 int i; 4612 4654 int j; ··· 4659 4703 if (!devices_info) 4660 4704 return -ENOMEM; 4661 4705 4662 - cur = fs_devices->alloc_list.next; 4663 - 4664 4706 /* 4665 4707 * in the first pass through the devices list, we gather information 4666 4708 * about the available holes on each device. 4667 4709 */ 4668 4710 ndevs = 0; 4669 - while (cur != &fs_devices->alloc_list) { 4670 - struct btrfs_device *device; 4711 + list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { 4671 4712 u64 max_avail; 4672 4713 u64 dev_offset; 4673 - 4674 - device = list_entry(cur, struct btrfs_device, dev_alloc_list); 4675 - 4676 - cur = cur->next; 4677 4714 4678 4715 if (!device->writeable) { 4679 4716 WARN(1, KERN_ERR ··· 4718 4769 btrfs_cmp_device_info, NULL); 4719 4770 4720 4771 /* round down to number of usable stripes */ 4721 - ndevs -= ndevs % devs_increment; 4772 + ndevs = round_down(ndevs, devs_increment); 4722 4773 4723 4774 if (ndevs < devs_increment * sub_stripes || ndevs < devs_min) { 4724 4775 ret = -ENOSPC; 4725 4776 goto error; 4726 4777 } 4727 4778 4728 - if (devs_max && ndevs > devs_max) 4729 - ndevs = devs_max; 4779 + ndevs = min(ndevs, devs_max); 4780 + 4730 4781 /* 4731 4782 * the primary goal is to maximize the number of stripes, so use as many 4732 4783 * devices as possible, even if the stripes are not maximum sized. ··· 4740 4791 */ 4741 4792 data_stripes = num_stripes / ncopies; 4742 4793 4743 - if (type & BTRFS_BLOCK_GROUP_RAID5) { 4744 - raid_stripe_len = find_raid56_stripe_len(ndevs - 1, 4745 - info->stripesize); 4794 + if (type & BTRFS_BLOCK_GROUP_RAID5) 4746 4795 data_stripes = num_stripes - 1; 4747 - } 4748 - if (type & BTRFS_BLOCK_GROUP_RAID6) { 4749 - raid_stripe_len = find_raid56_stripe_len(ndevs - 2, 4750 - info->stripesize); 4796 + 4797 + if (type & BTRFS_BLOCK_GROUP_RAID6) 4751 4798 data_stripes = num_stripes - 2; 4752 - } 4753 4799 4754 4800 /* 4755 4801 * Use the number of data stripes to figure out how big this chunk ··· 4769 4825 stripe_size = div_u64(stripe_size, dev_stripes); 4770 4826 4771 4827 /* align to BTRFS_STRIPE_LEN */ 4772 - stripe_size = div64_u64(stripe_size, raid_stripe_len); 4773 - stripe_size *= raid_stripe_len; 4828 + stripe_size = round_down(stripe_size, BTRFS_STRIPE_LEN); 4774 4829 4775 4830 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); 4776 4831 if (!map) { ··· 4786 4843 j * stripe_size; 4787 4844 } 4788 4845 } 4789 - map->sector_size = info->sectorsize; 4790 - map->stripe_len = raid_stripe_len; 4791 - map->io_align = raid_stripe_len; 4792 - map->io_width = raid_stripe_len; 4846 + map->stripe_len = BTRFS_STRIPE_LEN; 4847 + map->io_align = BTRFS_STRIPE_LEN; 4848 + map->io_width = BTRFS_STRIPE_LEN; 4793 4849 map->type = type; 4794 4850 map->sub_stripes = sub_stripes; 4795 4851 ··· 4823 4881 goto error; 4824 4882 } 4825 4883 4826 - ret = btrfs_make_block_group(trans, info, 0, type, 4827 - BTRFS_FIRST_CHUNK_TREE_OBJECTID, 4828 - start, num_bytes); 4884 + ret = btrfs_make_block_group(trans, info, 0, type, start, num_bytes); 4829 4885 if (ret) 4830 4886 goto error_del_extent; 4831 4887 ··· 4903 4963 ret = btrfs_update_device(trans, device); 4904 4964 if (ret) 4905 4965 break; 4906 - ret = btrfs_alloc_dev_extent(trans, device, 4907 - chunk_root->root_key.objectid, 4908 - BTRFS_FIRST_CHUNK_TREE_OBJECTID, 4909 - chunk_offset, dev_offset, 4910 - stripe_size); 4966 + ret = btrfs_alloc_dev_extent(trans, device, chunk_offset, 4967 + dev_offset, stripe_size); 4911 4968 if (ret) 4912 4969 break; 4913 4970 } ··· 5109 5172 } 5110 5173 5111 5174 unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info, 5112 - struct btrfs_mapping_tree *map_tree, 5113 5175 u64 logical) 5114 5176 { 5115 5177 struct extent_map *em; ··· 5116 5180 unsigned long len = fs_info->sectorsize; 5117 5181 5118 5182 em = get_chunk_map(fs_info, logical, len); 5119 - WARN_ON(IS_ERR(em)); 5120 5183 5121 - map = em->map_lookup; 5122 - if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) 5123 - len = map->stripe_len * nr_data_stripes(map); 5124 - free_extent_map(em); 5184 + if (!WARN_ON(IS_ERR(em))) { 5185 + map = em->map_lookup; 5186 + if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) 5187 + len = map->stripe_len * nr_data_stripes(map); 5188 + free_extent_map(em); 5189 + } 5125 5190 return len; 5126 5191 } 5127 5192 5128 - int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info, 5129 - u64 logical, u64 len, int mirror_num) 5193 + int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info, u64 logical, u64 len) 5130 5194 { 5131 5195 struct extent_map *em; 5132 5196 struct map_lookup *map; 5133 5197 int ret = 0; 5134 5198 5135 5199 em = get_chunk_map(fs_info, logical, len); 5136 - WARN_ON(IS_ERR(em)); 5137 5200 5138 - map = em->map_lookup; 5139 - if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) 5140 - ret = 1; 5141 - free_extent_map(em); 5201 + if(!WARN_ON(IS_ERR(em))) { 5202 + map = em->map_lookup; 5203 + if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) 5204 + ret = 1; 5205 + free_extent_map(em); 5206 + } 5142 5207 return ret; 5143 5208 } 5144 5209 ··· 6232 6295 cur_devices = fs_info->fs_devices; 6233 6296 while (cur_devices) { 6234 6297 if (!fsid || 6235 - !memcmp(cur_devices->fsid, fsid, BTRFS_UUID_SIZE)) { 6236 - device = __find_device(&cur_devices->devices, 6237 - devid, uuid); 6298 + !memcmp(cur_devices->fsid, fsid, BTRFS_FSID_SIZE)) { 6299 + device = find_device(cur_devices, devid, uuid); 6238 6300 if (device) 6239 6301 return device; 6240 6302 } ··· 6386 6450 struct extent_map *em; 6387 6451 u64 logical; 6388 6452 u64 length; 6389 - u64 stripe_len; 6390 6453 u64 devid; 6391 6454 u8 uuid[BTRFS_UUID_SIZE]; 6392 6455 int num_stripes; ··· 6394 6459 6395 6460 logical = key->offset; 6396 6461 length = btrfs_chunk_length(leaf, chunk); 6397 - stripe_len = btrfs_chunk_stripe_len(leaf, chunk); 6398 6462 num_stripes = btrfs_chunk_num_stripes(leaf, chunk); 6399 6463 6400 6464 ret = btrfs_check_chunk_valid(fs_info, leaf, chunk, logical); ··· 6432 6498 map->num_stripes = num_stripes; 6433 6499 map->io_width = btrfs_chunk_io_width(leaf, chunk); 6434 6500 map->io_align = btrfs_chunk_io_align(leaf, chunk); 6435 - map->sector_size = btrfs_chunk_sector_size(leaf, chunk); 6436 6501 map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk); 6437 6502 map->type = btrfs_chunk_type(leaf, chunk); 6438 6503 map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); ··· 6447 6514 if (!map->stripes[i].dev && 6448 6515 !btrfs_test_opt(fs_info, DEGRADED)) { 6449 6516 free_extent_map(em); 6517 + btrfs_report_missing_device(fs_info, devid, uuid); 6450 6518 return -EIO; 6451 6519 } 6452 6520 if (!map->stripes[i].dev) { ··· 6458 6524 free_extent_map(em); 6459 6525 return -EIO; 6460 6526 } 6461 - btrfs_warn(fs_info, "devid %llu uuid %pU is missing", 6462 - devid, uuid); 6527 + btrfs_report_missing_device(fs_info, devid, uuid); 6463 6528 } 6464 6529 map->stripes[i].dev->in_fs_metadata = 1; 6465 6530 } ··· 6502 6569 int ret; 6503 6570 6504 6571 BUG_ON(!mutex_is_locked(&uuid_mutex)); 6572 + ASSERT(fsid); 6505 6573 6506 6574 fs_devices = fs_info->fs_devices->seed; 6507 6575 while (fs_devices) { 6508 - if (!memcmp(fs_devices->fsid, fsid, BTRFS_UUID_SIZE)) 6576 + if (!memcmp(fs_devices->fsid, fsid, BTRFS_FSID_SIZE)) 6509 6577 return fs_devices; 6510 6578 6511 6579 fs_devices = fs_devices->seed; ··· 6559 6625 struct btrfs_device *device; 6560 6626 u64 devid; 6561 6627 int ret; 6562 - u8 fs_uuid[BTRFS_UUID_SIZE]; 6628 + u8 fs_uuid[BTRFS_FSID_SIZE]; 6563 6629 u8 dev_uuid[BTRFS_UUID_SIZE]; 6564 6630 6565 6631 devid = btrfs_device_id(leaf, dev_item); 6566 6632 read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item), 6567 6633 BTRFS_UUID_SIZE); 6568 6634 read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item), 6569 - BTRFS_UUID_SIZE); 6635 + BTRFS_FSID_SIZE); 6570 6636 6571 - if (memcmp(fs_uuid, fs_info->fsid, BTRFS_UUID_SIZE)) { 6637 + if (memcmp(fs_uuid, fs_info->fsid, BTRFS_FSID_SIZE)) { 6572 6638 fs_devices = open_seed_devices(fs_info, fs_uuid); 6573 6639 if (IS_ERR(fs_devices)) 6574 6640 return PTR_ERR(fs_devices); ··· 6576 6642 6577 6643 device = btrfs_find_device(fs_info, devid, dev_uuid, fs_uuid); 6578 6644 if (!device) { 6579 - if (!btrfs_test_opt(fs_info, DEGRADED)) 6645 + if (!btrfs_test_opt(fs_info, DEGRADED)) { 6646 + btrfs_report_missing_device(fs_info, devid, dev_uuid); 6580 6647 return -EIO; 6648 + } 6581 6649 6582 6650 device = add_missing_dev(fs_devices, devid, dev_uuid); 6583 6651 if (!device) 6584 6652 return -ENOMEM; 6585 - btrfs_warn(fs_info, "devid %llu uuid %pU missing", 6586 - devid, dev_uuid); 6653 + btrfs_report_missing_device(fs_info, devid, dev_uuid); 6587 6654 } else { 6588 - if (!device->bdev && !btrfs_test_opt(fs_info, DEGRADED)) 6589 - return -EIO; 6655 + if (!device->bdev) { 6656 + btrfs_report_missing_device(fs_info, devid, dev_uuid); 6657 + if (!btrfs_test_opt(fs_info, DEGRADED)) 6658 + return -EIO; 6659 + } 6590 6660 6591 6661 if(!device->bdev && !device->missing) { 6592 6662 /* ··· 6754 6816 clear_extent_buffer_uptodate(sb); 6755 6817 free_extent_buffer_stale(sb); 6756 6818 return -EIO; 6819 + } 6820 + 6821 + void btrfs_report_missing_device(struct btrfs_fs_info *fs_info, u64 devid, 6822 + u8 *uuid) 6823 + { 6824 + btrfs_warn_rl(fs_info, "devid %llu uuid %pU is missing", devid, uuid); 6825 + } 6826 + 6827 + /* 6828 + * Check if all chunks in the fs are OK for read-write degraded mount 6829 + * 6830 + * Return true if all chunks meet the minimal RW mount requirements. 6831 + * Return false if any chunk doesn't meet the minimal RW mount requirements. 6832 + */ 6833 + bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info) 6834 + { 6835 + struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; 6836 + struct extent_map *em; 6837 + u64 next_start = 0; 6838 + bool ret = true; 6839 + 6840 + read_lock(&map_tree->map_tree.lock); 6841 + em = lookup_extent_mapping(&map_tree->map_tree, 0, (u64)-1); 6842 + read_unlock(&map_tree->map_tree.lock); 6843 + /* No chunk at all? Return false anyway */ 6844 + if (!em) { 6845 + ret = false; 6846 + goto out; 6847 + } 6848 + while (em) { 6849 + struct map_lookup *map; 6850 + int missing = 0; 6851 + int max_tolerated; 6852 + int i; 6853 + 6854 + map = em->map_lookup; 6855 + max_tolerated = 6856 + btrfs_get_num_tolerated_disk_barrier_failures( 6857 + map->type); 6858 + for (i = 0; i < map->num_stripes; i++) { 6859 + struct btrfs_device *dev = map->stripes[i].dev; 6860 + 6861 + if (!dev || !dev->bdev || dev->missing || 6862 + dev->last_flush_error) 6863 + missing++; 6864 + } 6865 + if (missing > max_tolerated) { 6866 + btrfs_warn(fs_info, 6867 + "chunk %llu missing %d devices, max tolerance is %d for writeable mount", 6868 + em->start, missing, max_tolerated); 6869 + free_extent_map(em); 6870 + ret = false; 6871 + goto out; 6872 + } 6873 + next_start = extent_map_end(em); 6874 + free_extent_map(em); 6875 + 6876 + read_lock(&map_tree->map_tree.lock); 6877 + em = lookup_extent_mapping(&map_tree->map_tree, next_start, 6878 + (u64)(-1) - next_start); 6879 + read_unlock(&map_tree->map_tree.lock); 6880 + } 6881 + out: 6882 + return ret; 6757 6883 } 6758 6884 6759 6885 int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)

+5 -3

fs/btrfs/volumes.h

··· 353 353 int io_align; 354 354 int io_width; 355 355 u64 stripe_len; 356 - int sector_size; 357 356 int num_stripes; 358 357 int sub_stripes; 359 358 struct btrfs_bio_stripe stripes[]; ··· 480 481 struct btrfs_device *tgtdev); 481 482 void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path); 482 483 int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info, 483 - u64 logical, u64 len, int mirror_num); 484 + u64 logical, u64 len); 484 485 unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info, 485 - struct btrfs_mapping_tree *map_tree, 486 486 u64 logical); 487 487 int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, 488 488 struct btrfs_fs_info *fs_info, ··· 540 542 struct list_head *btrfs_get_fs_uuids(void); 541 543 void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info); 542 544 void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info); 545 + 546 + bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info); 547 + void btrfs_report_missing_device(struct btrfs_fs_info *fs_info, u64 devid, 548 + u8 *uuid); 543 549 544 550 #endif

+200 -131

include/trace/events/btrfs.h

··· 26 26 struct __btrfs_workqueue; 27 27 struct btrfs_qgroup_extent_record; 28 28 struct btrfs_qgroup; 29 + struct prelim_ref; 29 30 30 31 #define show_ref_type(type) \ 31 32 __print_symbolic(type, \ ··· 74 73 { BTRFS_BLOCK_GROUP_RAID5, "RAID5"}, \ 75 74 { BTRFS_BLOCK_GROUP_RAID6, "RAID6"} 76 75 77 - #define BTRFS_UUID_SIZE 16 78 - #define TP_STRUCT__entry_fsid __array(u8, fsid, BTRFS_UUID_SIZE) 76 + #define BTRFS_FSID_SIZE 16 77 + #define TP_STRUCT__entry_fsid __array(u8, fsid, BTRFS_FSID_SIZE) 79 78 80 79 #define TP_fast_assign_fsid(fs_info) \ 81 - memcpy(__entry->fsid, fs_info->fsid, BTRFS_UUID_SIZE) 80 + memcpy(__entry->fsid, fs_info->fsid, BTRFS_FSID_SIZE) 82 81 83 82 #define TP_STRUCT__entry_btrfs(args...) \ 84 83 TP_STRUCT__entry( \ ··· 93 92 94 93 TRACE_EVENT(btrfs_transaction_commit, 95 94 96 - TP_PROTO(struct btrfs_root *root), 95 + TP_PROTO(const struct btrfs_root *root), 97 96 98 97 TP_ARGS(root), 99 98 ··· 114 113 115 114 DECLARE_EVENT_CLASS(btrfs__inode, 116 115 117 - TP_PROTO(struct inode *inode), 116 + TP_PROTO(const struct inode *inode), 118 117 119 118 TP_ARGS(inode), 120 119 ··· 152 151 153 152 DEFINE_EVENT(btrfs__inode, btrfs_inode_new, 154 153 155 - TP_PROTO(struct inode *inode), 154 + TP_PROTO(const struct inode *inode), 156 155 157 156 TP_ARGS(inode) 158 157 ); 159 158 160 159 DEFINE_EVENT(btrfs__inode, btrfs_inode_request, 161 160 162 - TP_PROTO(struct inode *inode), 161 + TP_PROTO(const struct inode *inode), 163 162 164 163 TP_ARGS(inode) 165 164 ); 166 165 167 166 DEFINE_EVENT(btrfs__inode, btrfs_inode_evict, 168 167 169 - TP_PROTO(struct inode *inode), 168 + TP_PROTO(const struct inode *inode), 170 169 171 170 TP_ARGS(inode) 172 171 ); ··· 193 192 194 193 TRACE_EVENT_CONDITION(btrfs_get_extent, 195 194 196 - TP_PROTO(struct btrfs_root *root, struct btrfs_inode *inode, 197 - struct extent_map *map), 195 + TP_PROTO(const struct btrfs_root *root, const struct btrfs_inode *inode, 196 + const struct extent_map *map), 198 197 199 198 TP_ARGS(root, inode, map), 200 199 ··· 389 388 390 389 DECLARE_EVENT_CLASS(btrfs__ordered_extent, 391 390 392 - TP_PROTO(struct inode *inode, struct btrfs_ordered_extent *ordered), 391 + TP_PROTO(const struct inode *inode, 392 + const struct btrfs_ordered_extent *ordered), 393 393 394 394 TP_ARGS(inode, ordered), 395 395 ··· 442 440 443 441 DEFINE_EVENT(btrfs__ordered_extent, btrfs_ordered_extent_add, 444 442 445 - TP_PROTO(struct inode *inode, struct btrfs_ordered_extent *ordered), 443 + TP_PROTO(const struct inode *inode, 444 + const struct btrfs_ordered_extent *ordered), 446 445 447 446 TP_ARGS(inode, ordered) 448 447 ); 449 448 450 449 DEFINE_EVENT(btrfs__ordered_extent, btrfs_ordered_extent_remove, 451 450 452 - TP_PROTO(struct inode *inode, struct btrfs_ordered_extent *ordered), 451 + TP_PROTO(const struct inode *inode, 452 + const struct btrfs_ordered_extent *ordered), 453 453 454 454 TP_ARGS(inode, ordered) 455 455 ); 456 456 457 457 DEFINE_EVENT(btrfs__ordered_extent, btrfs_ordered_extent_start, 458 458 459 - TP_PROTO(struct inode *inode, struct btrfs_ordered_extent *ordered), 459 + TP_PROTO(const struct inode *inode, 460 + const struct btrfs_ordered_extent *ordered), 460 461 461 462 TP_ARGS(inode, ordered) 462 463 ); 463 464 464 465 DEFINE_EVENT(btrfs__ordered_extent, btrfs_ordered_extent_put, 465 466 466 - TP_PROTO(struct inode *inode, struct btrfs_ordered_extent *ordered), 467 + TP_PROTO(const struct inode *inode, 468 + const struct btrfs_ordered_extent *ordered), 467 469 468 470 TP_ARGS(inode, ordered) 469 471 ); 470 472 471 473 DECLARE_EVENT_CLASS(btrfs__writepage, 472 474 473 - TP_PROTO(struct page *page, struct inode *inode, 474 - struct writeback_control *wbc), 475 + TP_PROTO(const struct page *page, const struct inode *inode, 476 + const struct writeback_control *wbc), 475 477 476 478 TP_ARGS(page, inode, wbc), 477 479 ··· 523 517 524 518 DEFINE_EVENT(btrfs__writepage, __extent_writepage, 525 519 526 - TP_PROTO(struct page *page, struct inode *inode, 527 - struct writeback_control *wbc), 520 + TP_PROTO(const struct page *page, const struct inode *inode, 521 + const struct writeback_control *wbc), 528 522 529 523 TP_ARGS(page, inode, wbc) 530 524 ); 531 525 532 526 TRACE_EVENT(btrfs_writepage_end_io_hook, 533 527 534 - TP_PROTO(struct page *page, u64 start, u64 end, int uptodate), 528 + TP_PROTO(const struct page *page, u64 start, u64 end, int uptodate), 535 529 536 530 TP_ARGS(page, start, end, uptodate), 537 531 ··· 564 558 565 559 TRACE_EVENT(btrfs_sync_file, 566 560 567 - TP_PROTO(struct file *file, int datasync), 561 + TP_PROTO(const struct file *file, int datasync), 568 562 569 563 TP_ARGS(file, datasync), 570 564 ··· 576 570 ), 577 571 578 572 TP_fast_assign( 579 - struct dentry *dentry = file->f_path.dentry; 580 - struct inode *inode = d_inode(dentry); 573 + const struct dentry *dentry = file->f_path.dentry; 574 + const struct inode *inode = d_inode(dentry); 581 575 582 576 TP_fast_assign_fsid(btrfs_sb(file->f_path.dentry->d_sb)); 583 577 __entry->ino = inode->i_ino; ··· 595 589 596 590 TRACE_EVENT(btrfs_sync_fs, 597 591 598 - TP_PROTO(struct btrfs_fs_info *fs_info, int wait), 592 + TP_PROTO(const struct btrfs_fs_info *fs_info, int wait), 599 593 600 594 TP_ARGS(fs_info, wait), 601 595 ··· 612 606 613 607 TRACE_EVENT(btrfs_add_block_group, 614 608 615 - TP_PROTO(struct btrfs_fs_info *fs_info, 616 - struct btrfs_block_group_cache *block_group, int create), 609 + TP_PROTO(const struct btrfs_fs_info *fs_info, 610 + const struct btrfs_block_group_cache *block_group, int create), 617 611 618 612 TP_ARGS(fs_info, block_group, create), 619 613 620 614 TP_STRUCT__entry( 621 - __array( u8, fsid, BTRFS_UUID_SIZE ) 615 + __array( u8, fsid, BTRFS_FSID_SIZE ) 622 616 __field( u64, offset ) 623 617 __field( u64, size ) 624 618 __field( u64, flags ) ··· 628 622 ), 629 623 630 624 TP_fast_assign( 631 - memcpy(__entry->fsid, fs_info->fsid, BTRFS_UUID_SIZE); 625 + memcpy(__entry->fsid, fs_info->fsid, BTRFS_FSID_SIZE); 632 626 __entry->offset = block_group->key.objectid; 633 627 __entry->size = block_group->key.offset; 634 628 __entry->flags = block_group->flags; ··· 660 654 661 655 DECLARE_EVENT_CLASS(btrfs_delayed_tree_ref, 662 656 663 - TP_PROTO(struct btrfs_fs_info *fs_info, 664 - struct btrfs_delayed_ref_node *ref, 665 - struct btrfs_delayed_tree_ref *full_ref, 657 + TP_PROTO(const struct btrfs_fs_info *fs_info, 658 + const struct btrfs_delayed_ref_node *ref, 659 + const struct btrfs_delayed_tree_ref *full_ref, 666 660 int action), 667 661 668 662 TP_ARGS(fs_info, ref, full_ref, action), ··· 703 697 704 698 DEFINE_EVENT(btrfs_delayed_tree_ref, add_delayed_tree_ref, 705 699 706 - TP_PROTO(struct btrfs_fs_info *fs_info, 707 - struct btrfs_delayed_ref_node *ref, 708 - struct btrfs_delayed_tree_ref *full_ref, 700 + TP_PROTO(const struct btrfs_fs_info *fs_info, 701 + const struct btrfs_delayed_ref_node *ref, 702 + const struct btrfs_delayed_tree_ref *full_ref, 709 703 int action), 710 704 711 705 TP_ARGS(fs_info, ref, full_ref, action) ··· 713 707 714 708 DEFINE_EVENT(btrfs_delayed_tree_ref, run_delayed_tree_ref, 715 709 716 - TP_PROTO(struct btrfs_fs_info *fs_info, 717 - struct btrfs_delayed_ref_node *ref, 718 - struct btrfs_delayed_tree_ref *full_ref, 710 + TP_PROTO(const struct btrfs_fs_info *fs_info, 711 + const struct btrfs_delayed_ref_node *ref, 712 + const struct btrfs_delayed_tree_ref *full_ref, 719 713 int action), 720 714 721 715 TP_ARGS(fs_info, ref, full_ref, action) ··· 723 717 724 718 DECLARE_EVENT_CLASS(btrfs_delayed_data_ref, 725 719 726 - TP_PROTO(struct btrfs_fs_info *fs_info, 727 - struct btrfs_delayed_ref_node *ref, 728 - struct btrfs_delayed_data_ref *full_ref, 720 + TP_PROTO(const struct btrfs_fs_info *fs_info, 721 + const struct btrfs_delayed_ref_node *ref, 722 + const struct btrfs_delayed_data_ref *full_ref, 729 723 int action), 730 724 731 725 TP_ARGS(fs_info, ref, full_ref, action), ··· 770 764 771 765 DEFINE_EVENT(btrfs_delayed_data_ref, add_delayed_data_ref, 772 766 773 - TP_PROTO(struct btrfs_fs_info *fs_info, 774 - struct btrfs_delayed_ref_node *ref, 775 - struct btrfs_delayed_data_ref *full_ref, 767 + TP_PROTO(const struct btrfs_fs_info *fs_info, 768 + const struct btrfs_delayed_ref_node *ref, 769 + const struct btrfs_delayed_data_ref *full_ref, 776 770 int action), 777 771 778 772 TP_ARGS(fs_info, ref, full_ref, action) ··· 780 774 781 775 DEFINE_EVENT(btrfs_delayed_data_ref, run_delayed_data_ref, 782 776 783 - TP_PROTO(struct btrfs_fs_info *fs_info, 784 - struct btrfs_delayed_ref_node *ref, 785 - struct btrfs_delayed_data_ref *full_ref, 777 + TP_PROTO(const struct btrfs_fs_info *fs_info, 778 + const struct btrfs_delayed_ref_node *ref, 779 + const struct btrfs_delayed_data_ref *full_ref, 786 780 int action), 787 781 788 782 TP_ARGS(fs_info, ref, full_ref, action) ··· 790 784 791 785 DECLARE_EVENT_CLASS(btrfs_delayed_ref_head, 792 786 793 - TP_PROTO(struct btrfs_fs_info *fs_info, 794 - struct btrfs_delayed_ref_node *ref, 795 - struct btrfs_delayed_ref_head *head_ref, 787 + TP_PROTO(const struct btrfs_fs_info *fs_info, 788 + const struct btrfs_delayed_ref_node *ref, 789 + const struct btrfs_delayed_ref_head *head_ref, 796 790 int action), 797 791 798 792 TP_ARGS(fs_info, ref, head_ref, action), ··· 820 814 821 815 DEFINE_EVENT(btrfs_delayed_ref_head, add_delayed_ref_head, 822 816 823 - TP_PROTO(struct btrfs_fs_info *fs_info, 824 - struct btrfs_delayed_ref_node *ref, 825 - struct btrfs_delayed_ref_head *head_ref, 817 + TP_PROTO(const struct btrfs_fs_info *fs_info, 818 + const struct btrfs_delayed_ref_node *ref, 819 + const struct btrfs_delayed_ref_head *head_ref, 826 820 int action), 827 821 828 822 TP_ARGS(fs_info, ref, head_ref, action) ··· 830 824 831 825 DEFINE_EVENT(btrfs_delayed_ref_head, run_delayed_ref_head, 832 826 833 - TP_PROTO(struct btrfs_fs_info *fs_info, 834 - struct btrfs_delayed_ref_node *ref, 835 - struct btrfs_delayed_ref_head *head_ref, 827 + TP_PROTO(const struct btrfs_fs_info *fs_info, 828 + const struct btrfs_delayed_ref_node *ref, 829 + const struct btrfs_delayed_ref_head *head_ref, 836 830 int action), 837 831 838 832 TP_ARGS(fs_info, ref, head_ref, action) ··· 852 846 853 847 DECLARE_EVENT_CLASS(btrfs__chunk, 854 848 855 - TP_PROTO(struct btrfs_fs_info *fs_info, struct map_lookup *map, 856 - u64 offset, u64 size), 849 + TP_PROTO(const struct btrfs_fs_info *fs_info, 850 + const struct map_lookup *map, u64 offset, u64 size), 857 851 858 852 TP_ARGS(fs_info, map, offset, size), 859 853 ··· 886 880 887 881 DEFINE_EVENT(btrfs__chunk, btrfs_chunk_alloc, 888 882 889 - TP_PROTO(struct btrfs_fs_info *fs_info, struct map_lookup *map, 890 - u64 offset, u64 size), 883 + TP_PROTO(const struct btrfs_fs_info *fs_info, 884 + const struct map_lookup *map, u64 offset, u64 size), 891 885 892 886 TP_ARGS(fs_info, map, offset, size) 893 887 ); 894 888 895 889 DEFINE_EVENT(btrfs__chunk, btrfs_chunk_free, 896 890 897 - TP_PROTO(struct btrfs_fs_info *fs_info, struct map_lookup *map, 898 - u64 offset, u64 size), 891 + TP_PROTO(const struct btrfs_fs_info *fs_info, 892 + const struct map_lookup *map, u64 offset, u64 size), 899 893 900 894 TP_ARGS(fs_info, map, offset, size) 901 895 ); 902 896 903 897 TRACE_EVENT(btrfs_cow_block, 904 898 905 - TP_PROTO(struct btrfs_root *root, struct extent_buffer *buf, 906 - struct extent_buffer *cow), 899 + TP_PROTO(const struct btrfs_root *root, const struct extent_buffer *buf, 900 + const struct extent_buffer *cow), 907 901 908 902 TP_ARGS(root, buf, cow), 909 903 ··· 937 931 938 932 TRACE_EVENT(btrfs_space_reservation, 939 933 940 - TP_PROTO(struct btrfs_fs_info *fs_info, char *type, u64 val, 934 + TP_PROTO(const struct btrfs_fs_info *fs_info, char *type, u64 val, 941 935 u64 bytes, int reserve), 942 936 943 937 TP_ARGS(fs_info, type, val, bytes, reserve), ··· 969 963 970 964 TRACE_EVENT(btrfs_trigger_flush, 971 965 972 - TP_PROTO(struct btrfs_fs_info *fs_info, u64 flags, u64 bytes, 966 + TP_PROTO(const struct btrfs_fs_info *fs_info, u64 flags, u64 bytes, 973 967 int flush, char *reason), 974 968 975 969 TP_ARGS(fs_info, flags, bytes, flush, reason), 976 970 977 971 TP_STRUCT__entry( 978 - __array( u8, fsid, BTRFS_UUID_SIZE ) 972 + __array( u8, fsid, BTRFS_FSID_SIZE ) 979 973 __field( u64, flags ) 980 974 __field( u64, bytes ) 981 975 __field( int, flush ) ··· 983 977 ), 984 978 985 979 TP_fast_assign( 986 - memcpy(__entry->fsid, fs_info->fsid, BTRFS_UUID_SIZE); 980 + memcpy(__entry->fsid, fs_info->fsid, BTRFS_FSID_SIZE); 987 981 __entry->flags = flags; 988 982 __entry->bytes = bytes; 989 983 __entry->flush = flush; ··· 1010 1004 1011 1005 TRACE_EVENT(btrfs_flush_space, 1012 1006 1013 - TP_PROTO(struct btrfs_fs_info *fs_info, u64 flags, u64 num_bytes, 1014 - u64 orig_bytes, int state, int ret), 1007 + TP_PROTO(const struct btrfs_fs_info *fs_info, u64 flags, u64 num_bytes, 1008 + int state, int ret), 1015 1009 1016 - TP_ARGS(fs_info, flags, num_bytes, orig_bytes, state, ret), 1010 + TP_ARGS(fs_info, flags, num_bytes, state, ret), 1017 1011 1018 1012 TP_STRUCT__entry( 1019 - __array( u8, fsid, BTRFS_UUID_SIZE ) 1013 + __array( u8, fsid, BTRFS_FSID_SIZE ) 1020 1014 __field( u64, flags ) 1021 1015 __field( u64, num_bytes ) 1022 - __field( u64, orig_bytes ) 1023 1016 __field( int, state ) 1024 1017 __field( int, ret ) 1025 1018 ), 1026 1019 1027 1020 TP_fast_assign( 1028 - memcpy(__entry->fsid, fs_info->fsid, BTRFS_UUID_SIZE); 1021 + memcpy(__entry->fsid, fs_info->fsid, BTRFS_FSID_SIZE); 1029 1022 __entry->flags = flags; 1030 1023 __entry->num_bytes = num_bytes; 1031 - __entry->orig_bytes = orig_bytes; 1032 1024 __entry->state = state; 1033 1025 __entry->ret = ret; 1034 1026 ), 1035 1027 1036 - TP_printk("%pU: state=%d(%s) flags=%llu(%s) num_bytes=%llu " 1037 - "orig_bytes=%llu ret=%d", __entry->fsid, __entry->state, 1028 + TP_printk("%pU: state=%d(%s) flags=%llu(%s) num_bytes=%llu ret=%d", 1029 + __entry->fsid, __entry->state, 1038 1030 show_flush_state(__entry->state), 1039 1031 (unsigned long long)__entry->flags, 1040 1032 __print_flags((unsigned long)__entry->flags, "|", 1041 1033 BTRFS_GROUP_FLAGS), 1042 - (unsigned long long)__entry->num_bytes, 1043 - (unsigned long long)__entry->orig_bytes, __entry->ret) 1034 + (unsigned long long)__entry->num_bytes, __entry->ret) 1044 1035 ); 1045 1036 1046 1037 DECLARE_EVENT_CLASS(btrfs__reserved_extent, 1047 1038 1048 - TP_PROTO(struct btrfs_fs_info *fs_info, u64 start, u64 len), 1039 + TP_PROTO(const struct btrfs_fs_info *fs_info, u64 start, u64 len), 1049 1040 1050 1041 TP_ARGS(fs_info, start, len), 1051 1042 ··· 1064 1061 1065 1062 DEFINE_EVENT(btrfs__reserved_extent, btrfs_reserved_extent_alloc, 1066 1063 1067 - TP_PROTO(struct btrfs_fs_info *fs_info, u64 start, u64 len), 1064 + TP_PROTO(const struct btrfs_fs_info *fs_info, u64 start, u64 len), 1068 1065 1069 1066 TP_ARGS(fs_info, start, len) 1070 1067 ); 1071 1068 1072 1069 DEFINE_EVENT(btrfs__reserved_extent, btrfs_reserved_extent_free, 1073 1070 1074 - TP_PROTO(struct btrfs_fs_info *fs_info, u64 start, u64 len), 1071 + TP_PROTO(const struct btrfs_fs_info *fs_info, u64 start, u64 len), 1075 1072 1076 1073 TP_ARGS(fs_info, start, len) 1077 1074 ); 1078 1075 1079 1076 TRACE_EVENT(find_free_extent, 1080 1077 1081 - TP_PROTO(struct btrfs_fs_info *fs_info, u64 num_bytes, u64 empty_size, 1082 - u64 data), 1078 + TP_PROTO(const struct btrfs_fs_info *fs_info, u64 num_bytes, 1079 + u64 empty_size, u64 data), 1083 1080 1084 1081 TP_ARGS(fs_info, num_bytes, empty_size, data), 1085 1082 ··· 1104 1101 1105 1102 DECLARE_EVENT_CLASS(btrfs__reserve_extent, 1106 1103 1107 - TP_PROTO(struct btrfs_fs_info *fs_info, 1108 - struct btrfs_block_group_cache *block_group, u64 start, 1104 + TP_PROTO(const struct btrfs_fs_info *fs_info, 1105 + const struct btrfs_block_group_cache *block_group, u64 start, 1109 1106 u64 len), 1110 1107 1111 1108 TP_ARGS(fs_info, block_group, start, len), ··· 1135 1132 1136 1133 DEFINE_EVENT(btrfs__reserve_extent, btrfs_reserve_extent, 1137 1134 1138 - TP_PROTO(struct btrfs_fs_info *fs_info, 1139 - struct btrfs_block_group_cache *block_group, u64 start, 1135 + TP_PROTO(const struct btrfs_fs_info *fs_info, 1136 + const struct btrfs_block_group_cache *block_group, u64 start, 1140 1137 u64 len), 1141 1138 1142 1139 TP_ARGS(fs_info, block_group, start, len) ··· 1144 1141 1145 1142 DEFINE_EVENT(btrfs__reserve_extent, btrfs_reserve_extent_cluster, 1146 1143 1147 - TP_PROTO(struct btrfs_fs_info *fs_info, 1148 - struct btrfs_block_group_cache *block_group, u64 start, 1144 + TP_PROTO(const struct btrfs_fs_info *fs_info, 1145 + const struct btrfs_block_group_cache *block_group, u64 start, 1149 1146 u64 len), 1150 1147 1151 1148 TP_ARGS(fs_info, block_group, start, len) ··· 1153 1150 1154 1151 TRACE_EVENT(btrfs_find_cluster, 1155 1152 1156 - TP_PROTO(struct btrfs_block_group_cache *block_group, u64 start, 1153 + TP_PROTO(const struct btrfs_block_group_cache *block_group, u64 start, 1157 1154 u64 bytes, u64 empty_size, u64 min_bytes), 1158 1155 1159 1156 TP_ARGS(block_group, start, bytes, empty_size, min_bytes), ··· 1186 1183 1187 1184 TRACE_EVENT(btrfs_failed_cluster_setup, 1188 1185 1189 - TP_PROTO(struct btrfs_block_group_cache *block_group), 1186 + TP_PROTO(const struct btrfs_block_group_cache *block_group), 1190 1187 1191 1188 TP_ARGS(block_group), 1192 1189 ··· 1203 1200 1204 1201 TRACE_EVENT(btrfs_setup_cluster, 1205 1202 1206 - TP_PROTO(struct btrfs_block_group_cache *block_group, 1207 - struct btrfs_free_cluster *cluster, u64 size, int bitmap), 1203 + TP_PROTO(const struct btrfs_block_group_cache *block_group, 1204 + const struct btrfs_free_cluster *cluster, 1205 + u64 size, int bitmap), 1208 1206 1209 1207 TP_ARGS(block_group, cluster, size, bitmap), 1210 1208 ··· 1239 1235 struct extent_state; 1240 1236 TRACE_EVENT(alloc_extent_state, 1241 1237 1242 - TP_PROTO(struct extent_state *state, gfp_t mask, unsigned long IP), 1238 + TP_PROTO(const struct extent_state *state, 1239 + gfp_t mask, unsigned long IP), 1243 1240 1244 1241 TP_ARGS(state, mask, IP), 1245 1242 1246 1243 TP_STRUCT__entry( 1247 - __field(struct extent_state *, state) 1244 + __field(const struct extent_state *, state) 1248 1245 __field(gfp_t, mask) 1249 1246 __field(unsigned long, ip) 1250 1247 ), ··· 1257 1252 ), 1258 1253 1259 1254 TP_printk("state=%p mask=%s caller=%pS", __entry->state, 1260 - show_gfp_flags(__entry->mask), (void *)__entry->ip) 1255 + show_gfp_flags(__entry->mask), (const void *)__entry->ip) 1261 1256 ); 1262 1257 1263 1258 TRACE_EVENT(free_extent_state, 1264 1259 1265 - TP_PROTO(struct extent_state *state, unsigned long IP), 1260 + TP_PROTO(const struct extent_state *state, unsigned long IP), 1266 1261 1267 1262 TP_ARGS(state, IP), 1268 1263 1269 1264 TP_STRUCT__entry( 1270 - __field(struct extent_state *, state) 1265 + __field(const struct extent_state *, state) 1271 1266 __field(unsigned long, ip) 1272 1267 ), 1273 1268 ··· 1277 1272 ), 1278 1273 1279 1274 TP_printk("state=%p caller=%pS", __entry->state, 1280 - (void *)__entry->ip) 1275 + (const void *)__entry->ip) 1281 1276 ); 1282 1277 1283 1278 DECLARE_EVENT_CLASS(btrfs__work, 1284 1279 1285 - TP_PROTO(struct btrfs_work *work), 1280 + TP_PROTO(const struct btrfs_work *work), 1286 1281 1287 1282 TP_ARGS(work), 1288 1283 1289 1284 TP_STRUCT__entry_btrfs( 1290 - __field( void *, work ) 1291 - __field( void *, wq ) 1292 - __field( void *, func ) 1293 - __field( void *, ordered_func ) 1294 - __field( void *, ordered_free ) 1295 - __field( void *, normal_work ) 1285 + __field( const void *, work ) 1286 + __field( const void *, wq ) 1287 + __field( const void *, func ) 1288 + __field( const void *, ordered_func ) 1289 + __field( const void *, ordered_free ) 1290 + __field( const void *, normal_work ) 1296 1291 ), 1297 1292 1298 1293 TP_fast_assign_btrfs(btrfs_work_owner(work), ··· 1317 1312 */ 1318 1313 DECLARE_EVENT_CLASS(btrfs__work__done, 1319 1314 1320 - TP_PROTO(struct btrfs_fs_info *fs_info, void *wtag), 1315 + TP_PROTO(const struct btrfs_fs_info *fs_info, const void *wtag), 1321 1316 1322 1317 TP_ARGS(fs_info, wtag), 1323 1318 1324 1319 TP_STRUCT__entry_btrfs( 1325 - __field( void *, wtag ) 1320 + __field( const void *, wtag ) 1326 1321 ), 1327 1322 1328 1323 TP_fast_assign_btrfs(fs_info, ··· 1334 1329 1335 1330 DEFINE_EVENT(btrfs__work, btrfs_work_queued, 1336 1331 1337 - TP_PROTO(struct btrfs_work *work), 1332 + TP_PROTO(const struct btrfs_work *work), 1338 1333 1339 1334 TP_ARGS(work) 1340 1335 ); 1341 1336 1342 1337 DEFINE_EVENT(btrfs__work, btrfs_work_sched, 1343 1338 1344 - TP_PROTO(struct btrfs_work *work), 1339 + TP_PROTO(const struct btrfs_work *work), 1345 1340 1346 1341 TP_ARGS(work) 1347 1342 ); 1348 1343 1349 1344 DEFINE_EVENT(btrfs__work__done, btrfs_all_work_done, 1350 1345 1351 - TP_PROTO(struct btrfs_fs_info *fs_info, void *wtag), 1346 + TP_PROTO(const struct btrfs_fs_info *fs_info, const void *wtag), 1352 1347 1353 1348 TP_ARGS(fs_info, wtag) 1354 1349 ); 1355 1350 1356 1351 DEFINE_EVENT(btrfs__work, btrfs_ordered_sched, 1357 1352 1358 - TP_PROTO(struct btrfs_work *work), 1353 + TP_PROTO(const struct btrfs_work *work), 1359 1354 1360 1355 TP_ARGS(work) 1361 1356 ); 1362 1357 1363 1358 DECLARE_EVENT_CLASS(btrfs__workqueue, 1364 1359 1365 - TP_PROTO(struct __btrfs_workqueue *wq, const char *name, int high), 1360 + TP_PROTO(const struct __btrfs_workqueue *wq, 1361 + const char *name, int high), 1366 1362 1367 1363 TP_ARGS(wq, name, high), 1368 1364 1369 1365 TP_STRUCT__entry_btrfs( 1370 - __field( void *, wq ) 1366 + __field( const void *, wq ) 1371 1367 __string( name, name ) 1372 1368 __field( int , high ) 1373 1369 ), ··· 1387 1381 1388 1382 DEFINE_EVENT(btrfs__workqueue, btrfs_workqueue_alloc, 1389 1383 1390 - TP_PROTO(struct __btrfs_workqueue *wq, const char *name, int high), 1384 + TP_PROTO(const struct __btrfs_workqueue *wq, 1385 + const char *name, int high), 1391 1386 1392 1387 TP_ARGS(wq, name, high) 1393 1388 ); 1394 1389 1395 1390 DECLARE_EVENT_CLASS(btrfs__workqueue_done, 1396 1391 1397 - TP_PROTO(struct __btrfs_workqueue *wq), 1392 + TP_PROTO(const struct __btrfs_workqueue *wq), 1398 1393 1399 1394 TP_ARGS(wq), 1400 1395 1401 1396 TP_STRUCT__entry_btrfs( 1402 - __field( void *, wq ) 1397 + __field( const void *, wq ) 1403 1398 ), 1404 1399 1405 1400 TP_fast_assign_btrfs(btrfs_workqueue_owner(wq), ··· 1412 1405 1413 1406 DEFINE_EVENT(btrfs__workqueue_done, btrfs_workqueue_destroy, 1414 1407 1415 - TP_PROTO(struct __btrfs_workqueue *wq), 1408 + TP_PROTO(const struct __btrfs_workqueue *wq), 1416 1409 1417 1410 TP_ARGS(wq) 1418 1411 ); ··· 1424 1417 1425 1418 DECLARE_EVENT_CLASS(btrfs__qgroup_rsv_data, 1426 1419 1427 - TP_PROTO(struct inode *inode, u64 start, u64 len, u64 reserved, int op), 1420 + TP_PROTO(const struct inode *inode, u64 start, u64 len, 1421 + u64 reserved, int op), 1428 1422 1429 1423 TP_ARGS(inode, start, len, reserved, op), 1430 1424 ··· 1457 1449 1458 1450 DEFINE_EVENT(btrfs__qgroup_rsv_data, btrfs_qgroup_reserve_data, 1459 1451 1460 - TP_PROTO(struct inode *inode, u64 start, u64 len, u64 reserved, int op), 1452 + TP_PROTO(const struct inode *inode, u64 start, u64 len, 1453 + u64 reserved, int op), 1461 1454 1462 1455 TP_ARGS(inode, start, len, reserved, op) 1463 1456 ); 1464 1457 1465 1458 DEFINE_EVENT(btrfs__qgroup_rsv_data, btrfs_qgroup_release_data, 1466 1459 1467 - TP_PROTO(struct inode *inode, u64 start, u64 len, u64 reserved, int op), 1460 + TP_PROTO(const struct inode *inode, u64 start, u64 len, 1461 + u64 reserved, int op), 1468 1462 1469 1463 TP_ARGS(inode, start, len, reserved, op) 1470 1464 ); 1471 1465 1472 1466 DECLARE_EVENT_CLASS(btrfs__qgroup_delayed_ref, 1473 1467 1474 - TP_PROTO(struct btrfs_fs_info *fs_info, u64 ref_root, u64 reserved), 1468 + TP_PROTO(const struct btrfs_fs_info *fs_info, 1469 + u64 ref_root, u64 reserved), 1475 1470 1476 1471 TP_ARGS(fs_info, ref_root, reserved), 1477 1472 ··· 1494 1483 1495 1484 DEFINE_EVENT(btrfs__qgroup_delayed_ref, btrfs_qgroup_free_delayed_ref, 1496 1485 1497 - TP_PROTO(struct btrfs_fs_info *fs_info, u64 ref_root, u64 reserved), 1486 + TP_PROTO(const struct btrfs_fs_info *fs_info, 1487 + u64 ref_root, u64 reserved), 1498 1488 1499 1489 TP_ARGS(fs_info, ref_root, reserved) 1500 1490 ); 1501 1491 1502 1492 DECLARE_EVENT_CLASS(btrfs_qgroup_extent, 1503 - TP_PROTO(struct btrfs_fs_info *fs_info, 1504 - struct btrfs_qgroup_extent_record *rec), 1493 + TP_PROTO(const struct btrfs_fs_info *fs_info, 1494 + const struct btrfs_qgroup_extent_record *rec), 1505 1495 1506 1496 TP_ARGS(fs_info, rec), 1507 1497 ··· 1523 1511 1524 1512 DEFINE_EVENT(btrfs_qgroup_extent, btrfs_qgroup_account_extents, 1525 1513 1526 - TP_PROTO(struct btrfs_fs_info *fs_info, 1527 - struct btrfs_qgroup_extent_record *rec), 1514 + TP_PROTO(const struct btrfs_fs_info *fs_info, 1515 + const struct btrfs_qgroup_extent_record *rec), 1528 1516 1529 1517 TP_ARGS(fs_info, rec) 1530 1518 ); 1531 1519 1532 1520 DEFINE_EVENT(btrfs_qgroup_extent, btrfs_qgroup_trace_extent, 1533 1521 1534 - TP_PROTO(struct btrfs_fs_info *fs_info, 1535 - struct btrfs_qgroup_extent_record *rec), 1522 + TP_PROTO(const struct btrfs_fs_info *fs_info, 1523 + const struct btrfs_qgroup_extent_record *rec), 1536 1524 1537 1525 TP_ARGS(fs_info, rec) 1538 1526 ); 1539 1527 1540 1528 TRACE_EVENT(btrfs_qgroup_account_extent, 1541 1529 1542 - TP_PROTO(struct btrfs_fs_info *fs_info, u64 bytenr, 1530 + TP_PROTO(const struct btrfs_fs_info *fs_info, u64 bytenr, 1543 1531 u64 num_bytes, u64 nr_old_roots, u64 nr_new_roots), 1544 1532 1545 1533 TP_ARGS(fs_info, bytenr, num_bytes, nr_old_roots, nr_new_roots), ··· 1568 1556 1569 1557 TRACE_EVENT(qgroup_update_counters, 1570 1558 1571 - TP_PROTO(struct btrfs_fs_info *fs_info, u64 qgid, 1559 + TP_PROTO(const struct btrfs_fs_info *fs_info, u64 qgid, 1572 1560 u64 cur_old_count, u64 cur_new_count), 1573 1561 1574 1562 TP_ARGS(fs_info, qgid, cur_old_count, cur_new_count), ··· 1632 1620 1633 1621 TP_printk_btrfs("refroot=%llu(%s) diff=%lld", 1634 1622 show_root_type(__entry->refroot), __entry->diff) 1623 + ); 1624 + 1625 + DECLARE_EVENT_CLASS(btrfs__prelim_ref, 1626 + TP_PROTO(const struct btrfs_fs_info *fs_info, 1627 + const struct prelim_ref *oldref, 1628 + const struct prelim_ref *newref, u64 tree_size), 1629 + TP_ARGS(fs_info, newref, oldref, tree_size), 1630 + 1631 + TP_STRUCT__entry_btrfs( 1632 + __field( u64, root_id ) 1633 + __field( u64, objectid ) 1634 + __field( u8, type ) 1635 + __field( u64, offset ) 1636 + __field( int, level ) 1637 + __field( int, old_count ) 1638 + __field( u64, parent ) 1639 + __field( u64, bytenr ) 1640 + __field( int, mod_count ) 1641 + __field( u64, tree_size ) 1642 + ), 1643 + 1644 + TP_fast_assign_btrfs(fs_info, 1645 + __entry->root_id = oldref->root_id; 1646 + __entry->objectid = oldref->key_for_search.objectid; 1647 + __entry->type = oldref->key_for_search.type; 1648 + __entry->offset = oldref->key_for_search.offset; 1649 + __entry->level = oldref->level; 1650 + __entry->old_count = oldref->count; 1651 + __entry->parent = oldref->parent; 1652 + __entry->bytenr = oldref->wanted_disk_byte; 1653 + __entry->mod_count = newref ? newref->count : 0; 1654 + __entry->tree_size = tree_size; 1655 + ), 1656 + 1657 + TP_printk_btrfs("root_id=%llu key=[%llu,%u,%llu] level=%d count=[%d+%d=%d] parent=%llu wanted_disk_byte=%llu nodes=%llu", 1658 + (unsigned long long)__entry->root_id, 1659 + (unsigned long long)__entry->objectid, __entry->type, 1660 + (unsigned long long)__entry->offset, __entry->level, 1661 + __entry->old_count, __entry->mod_count, 1662 + __entry->old_count + __entry->mod_count, 1663 + (unsigned long long)__entry->parent, 1664 + (unsigned long long)__entry->bytenr, 1665 + (unsigned long long)__entry->tree_size) 1666 + ); 1667 + 1668 + DEFINE_EVENT(btrfs__prelim_ref, btrfs_prelim_ref_merge, 1669 + TP_PROTO(const struct btrfs_fs_info *fs_info, 1670 + const struct prelim_ref *oldref, 1671 + const struct prelim_ref *newref, u64 tree_size), 1672 + TP_ARGS(fs_info, oldref, newref, tree_size) 1673 + ); 1674 + 1675 + DEFINE_EVENT(btrfs__prelim_ref, btrfs_prelim_ref_insert, 1676 + TP_PROTO(const struct btrfs_fs_info *fs_info, 1677 + const struct prelim_ref *oldref, 1678 + const struct prelim_ref *newref, u64 tree_size), 1679 + TP_ARGS(fs_info, oldref, newref, tree_size) 1635 1680 ); 1636 1681 1637 1682 #endif /* _TRACE_BTRFS_H */

Configure Feed

Configure Feed