Merge branch 'linux-next' of git://git.infradead.org/ubifs-2.6

+2

fs/ubifs/io.c

··· 581 581 ubifs_assert(wbuf->size % c->min_io_size == 0); 582 582 ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); 583 583 ubifs_assert(!c->ro_media && !c->ro_mount); 584 + ubifs_assert(!c->space_fixup); 584 585 if (c->leb_size - wbuf->offs >= c->max_write_size) 585 586 ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size)); 586 587 ··· 760 759 ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); 761 760 ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size); 762 761 ubifs_assert(!c->ro_media && !c->ro_mount); 762 + ubifs_assert(!c->space_fixup); 763 763 764 764 if (c->ro_error) 765 765 return -EROFS;

+1

fs/ubifs/journal.c

··· 669 669 670 670 out_release: 671 671 release_head(c, BASEHD); 672 + kfree(dent); 672 673 out_ro: 673 674 ubifs_ro_mode(c, err); 674 675 if (last_reference)

+1 -1

fs/ubifs/orphan.c

··· 674 674 if (IS_ERR(sleb)) { 675 675 if (PTR_ERR(sleb) == -EUCLEAN) 676 676 sleb = ubifs_recover_leb(c, lnum, 0, 677 - c->sbuf, 0); 677 + c->sbuf, -1); 678 678 if (IS_ERR(sleb)) { 679 679 err = PTR_ERR(sleb); 680 680 break;

+94 -70

fs/ubifs/recovery.c

··· 564 564 } 565 565 566 566 /** 567 - * drop_last_node - drop the last node or group of nodes. 567 + * drop_last_group - drop the last group of nodes. 568 568 * @sleb: scanned LEB information 569 569 * @offs: offset of dropped nodes is returned here 570 - * @grouped: non-zero if whole group of nodes have to be dropped 571 570 * 572 571 * This is a helper function for 'ubifs_recover_leb()' which drops the last 573 - * node of the scanned LEB or the last group of nodes if @grouped is not zero. 574 - * This function returns %1 if a node was dropped and %0 otherwise. 572 + * group of nodes of the scanned LEB. 575 573 */ 576 - static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped) 574 + static void drop_last_group(struct ubifs_scan_leb *sleb, int *offs) 577 575 { 578 - int dropped = 0; 579 - 580 576 while (!list_empty(&sleb->nodes)) { 581 577 struct ubifs_scan_node *snod; 582 578 struct ubifs_ch *ch; ··· 581 585 list); 582 586 ch = snod->node; 583 587 if (ch->group_type != UBIFS_IN_NODE_GROUP) 584 - return dropped; 585 - dbg_rcvry("dropping node at %d:%d", sleb->lnum, snod->offs); 588 + break; 589 + 590 + dbg_rcvry("dropping grouped node at %d:%d", 591 + sleb->lnum, snod->offs); 586 592 *offs = snod->offs; 587 593 list_del(&snod->list); 588 594 kfree(snod); 589 595 sleb->nodes_cnt -= 1; 590 - dropped = 1; 591 - if (!grouped) 592 - break; 593 596 } 594 - return dropped; 597 + } 598 + 599 + /** 600 + * drop_last_node - drop the last node. 601 + * @sleb: scanned LEB information 602 + * @offs: offset of dropped nodes is returned here 603 + * @grouped: non-zero if whole group of nodes have to be dropped 604 + * 605 + * This is a helper function for 'ubifs_recover_leb()' which drops the last 606 + * node of the scanned LEB. 607 + */ 608 + static void drop_last_node(struct ubifs_scan_leb *sleb, int *offs) 609 + { 610 + struct ubifs_scan_node *snod; 611 + 612 + if (!list_empty(&sleb->nodes)) { 613 + snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, 614 + list); 615 + 616 + dbg_rcvry("dropping last node at %d:%d", sleb->lnum, snod->offs); 617 + *offs = snod->offs; 618 + list_del(&snod->list); 619 + kfree(snod); 620 + sleb->nodes_cnt -= 1; 621 + } 595 622 } 596 623 597 624 /** ··· 623 604 * @lnum: LEB number 624 605 * @offs: offset 625 606 * @sbuf: LEB-sized buffer to use 626 - * @grouped: nodes may be grouped for recovery 607 + * @jhead: journal head number this LEB belongs to (%-1 if the LEB does not 608 + * belong to any journal head) 627 609 * 628 610 * This function does a scan of a LEB, but caters for errors that might have 629 611 * been caused by the unclean unmount from which we are attempting to recover. ··· 632 612 * found, and a negative error code in case of failure. 633 613 */ 634 614 struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, 635 - int offs, void *sbuf, int grouped) 615 + int offs, void *sbuf, int jhead) 636 616 { 637 617 int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit; 618 + int grouped = jhead == -1 ? 0 : c->jheads[jhead].grouped; 638 619 struct ubifs_scan_leb *sleb; 639 620 void *buf = sbuf + offs; 640 621 641 - dbg_rcvry("%d:%d", lnum, offs); 622 + dbg_rcvry("%d:%d, jhead %d, grouped %d", lnum, offs, jhead, grouped); 642 623 643 624 sleb = ubifs_start_scan(c, lnum, offs, sbuf); 644 625 if (IS_ERR(sleb)) ··· 656 635 * Scan quietly until there is an error from which we cannot 657 636 * recover 658 637 */ 659 - ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0); 638 + ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1); 660 639 if (ret == SCANNED_A_NODE) { 661 640 /* A valid node, and not a padding node */ 662 641 struct ubifs_ch *ch = buf; ··· 716 695 * If nodes are grouped, always drop the incomplete group at 717 696 * the end. 718 697 */ 719 - drop_last_node(sleb, &offs, 1); 698 + drop_last_group(sleb, &offs); 720 699 721 - /* 722 - * While we are in the middle of the same min. I/O unit keep dropping 723 - * nodes. So basically, what we want is to make sure that the last min. 724 - * I/O unit where we saw the corruption is dropped completely with all 725 - * the uncorrupted node which may possibly sit there. 726 - * 727 - * In other words, let's name the min. I/O unit where the corruption 728 - * starts B, and the previous min. I/O unit A. The below code tries to 729 - * deal with a situation when half of B contains valid nodes or the end 730 - * of a valid node, and the second half of B contains corrupted data or 731 - * garbage. This means that UBIFS had been writing to B just before the 732 - * power cut happened. I do not know how realistic is this scenario 733 - * that half of the min. I/O unit had been written successfully and the 734 - * other half not, but this is possible in our 'failure mode emulation' 735 - * infrastructure at least. 736 - * 737 - * So what is the problem, why we need to drop those nodes? Whey can't 738 - * we just clean-up the second half of B by putting a padding node 739 - * there? We can, and this works fine with one exception which was 740 - * reproduced with power cut emulation testing and happens extremely 741 - * rarely. The description follows, but it is worth noting that that is 742 - * only about the GC head, so we could do this trick only if the bud 743 - * belongs to the GC head, but it does not seem to be worth an 744 - * additional "if" statement. 745 - * 746 - * So, imagine the file-system is full, we run GC which is moving valid 747 - * nodes from LEB X to LEB Y (obviously, LEB Y is the current GC head 748 - * LEB). The @c->gc_lnum is -1, which means that GC will retain LEB X 749 - * and will try to continue. Imagine that LEB X is currently the 750 - * dirtiest LEB, and the amount of used space in LEB Y is exactly the 751 - * same as amount of free space in LEB X. 752 - * 753 - * And a power cut happens when nodes are moved from LEB X to LEB Y. We 754 - * are here trying to recover LEB Y which is the GC head LEB. We find 755 - * the min. I/O unit B as described above. Then we clean-up LEB Y by 756 - * padding min. I/O unit. And later 'ubifs_rcvry_gc_commit()' function 757 - * fails, because it cannot find a dirty LEB which could be GC'd into 758 - * LEB Y! Even LEB X does not match because the amount of valid nodes 759 - * there does not fit the free space in LEB Y any more! And this is 760 - * because of the padding node which we added to LEB Y. The 761 - * user-visible effect of this which I once observed and analysed is 762 - * that we cannot mount the file-system with -ENOSPC error. 763 - * 764 - * So obviously, to make sure that situation does not happen we should 765 - * free min. I/O unit B in LEB Y completely and the last used min. I/O 766 - * unit in LEB Y should be A. This is basically what the below code 767 - * tries to do. 768 - */ 769 - while (min_io_unit == round_down(offs, c->min_io_size) && 770 - min_io_unit != offs && 771 - drop_last_node(sleb, &offs, grouped)); 700 + if (jhead == GCHD) { 701 + /* 702 + * If this LEB belongs to the GC head then while we are in the 703 + * middle of the same min. I/O unit keep dropping nodes. So 704 + * basically, what we want is to make sure that the last min. 705 + * I/O unit where we saw the corruption is dropped completely 706 + * with all the uncorrupted nodes which may possibly sit there. 707 + * 708 + * In other words, let's name the min. I/O unit where the 709 + * corruption starts B, and the previous min. I/O unit A. The 710 + * below code tries to deal with a situation when half of B 711 + * contains valid nodes or the end of a valid node, and the 712 + * second half of B contains corrupted data or garbage. This 713 + * means that UBIFS had been writing to B just before the power 714 + * cut happened. I do not know how realistic is this scenario 715 + * that half of the min. I/O unit had been written successfully 716 + * and the other half not, but this is possible in our 'failure 717 + * mode emulation' infrastructure at least. 718 + * 719 + * So what is the problem, why we need to drop those nodes? Why 720 + * can't we just clean-up the second half of B by putting a 721 + * padding node there? We can, and this works fine with one 722 + * exception which was reproduced with power cut emulation 723 + * testing and happens extremely rarely. 724 + * 725 + * Imagine the file-system is full, we run GC which starts 726 + * moving valid nodes from LEB X to LEB Y (obviously, LEB Y is 727 + * the current GC head LEB). The @c->gc_lnum is -1, which means 728 + * that GC will retain LEB X and will try to continue. Imagine 729 + * that LEB X is currently the dirtiest LEB, and the amount of 730 + * used space in LEB Y is exactly the same as amount of free 731 + * space in LEB X. 732 + * 733 + * And a power cut happens when nodes are moved from LEB X to 734 + * LEB Y. We are here trying to recover LEB Y which is the GC 735 + * head LEB. We find the min. I/O unit B as described above. 736 + * Then we clean-up LEB Y by padding min. I/O unit. And later 737 + * 'ubifs_rcvry_gc_commit()' function fails, because it cannot 738 + * find a dirty LEB which could be GC'd into LEB Y! Even LEB X 739 + * does not match because the amount of valid nodes there does 740 + * not fit the free space in LEB Y any more! And this is 741 + * because of the padding node which we added to LEB Y. The 742 + * user-visible effect of this which I once observed and 743 + * analysed is that we cannot mount the file-system with 744 + * -ENOSPC error. 745 + * 746 + * So obviously, to make sure that situation does not happen we 747 + * should free min. I/O unit B in LEB Y completely and the last 748 + * used min. I/O unit in LEB Y should be A. This is basically 749 + * what the below code tries to do. 750 + */ 751 + while (offs > min_io_unit) 752 + drop_last_node(sleb, &offs); 753 + } 772 754 773 755 buf = sbuf + offs; 774 756 len = c->leb_size - offs; ··· 905 881 } 906 882 ubifs_scan_destroy(sleb); 907 883 } 908 - return ubifs_recover_leb(c, lnum, offs, sbuf, 0); 884 + return ubifs_recover_leb(c, lnum, offs, sbuf, -1); 909 885 } 910 886 911 887 /**

+1 -2

fs/ubifs/replay.c

··· 557 557 * these LEBs could possibly be written to at the power cut 558 558 * time. 559 559 */ 560 - sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, 561 - b->bud->jhead != GCHD); 560 + sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, b->bud->jhead); 562 561 else 563 562 sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); 564 563 if (IS_ERR(sleb))

+5 -1

fs/ubifs/shrinker.c

··· 284 284 long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); 285 285 286 286 if (nr == 0) 287 - return clean_zn_cnt; 287 + /* 288 + * Due to the way UBIFS updates the clean znode counter it may 289 + * temporarily be negative. 290 + */ 291 + return clean_zn_cnt >= 0 ? clean_zn_cnt : 1; 288 292 289 293 if (!clean_zn_cnt) { 290 294 /*

+24 -18

fs/ubifs/super.c

··· 811 811 812 812 c->jheads[i].wbuf.sync_callback = &bud_wbuf_callback; 813 813 c->jheads[i].wbuf.jhead = i; 814 + c->jheads[i].grouped = 1; 814 815 } 815 816 816 817 c->jheads[BASEHD].wbuf.dtype = UBI_SHORTTERM; 817 818 /* 818 819 * Garbage Collector head likely contains long-term data and 819 - * does not need to be synchronized by timer. 820 + * does not need to be synchronized by timer. Also GC head nodes are 821 + * not grouped. 820 822 */ 821 823 c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM; 822 824 c->jheads[GCHD].wbuf.no_timer = 1; 825 + c->jheads[GCHD].grouped = 0; 823 826 824 827 return 0; 825 828 } ··· 1287 1284 if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) { 1288 1285 ubifs_msg("recovery needed"); 1289 1286 c->need_recovery = 1; 1290 - if (!c->ro_mount) { 1291 - err = ubifs_recover_inl_heads(c, c->sbuf); 1292 - if (err) 1293 - goto out_master; 1294 - } 1295 - } else if (!c->ro_mount) { 1287 + } 1288 + 1289 + if (c->need_recovery && !c->ro_mount) { 1290 + err = ubifs_recover_inl_heads(c, c->sbuf); 1291 + if (err) 1292 + goto out_master; 1293 + } 1294 + 1295 + err = ubifs_lpt_init(c, 1, !c->ro_mount); 1296 + if (err) 1297 + goto out_master; 1298 + 1299 + if (!c->ro_mount && c->space_fixup) { 1300 + err = ubifs_fixup_free_space(c); 1301 + if (err) 1302 + goto out_master; 1303 + } 1304 + 1305 + if (!c->ro_mount) { 1296 1306 /* 1297 1307 * Set the "dirty" flag so that if we reboot uncleanly we 1298 1308 * will notice this immediately on the next mount. ··· 1313 1297 c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); 1314 1298 err = ubifs_write_master(c); 1315 1299 if (err) 1316 - goto out_master; 1300 + goto out_lpt; 1317 1301 } 1318 - 1319 - err = ubifs_lpt_init(c, 1, !c->ro_mount); 1320 - if (err) 1321 - goto out_lpt; 1322 1302 1323 1303 err = dbg_check_idx_size(c, c->bi.old_idx_sz); 1324 1304 if (err) ··· 1407 1395 } 1408 1396 } else 1409 1397 ubifs_assert(c->lst.taken_empty_lebs > 0); 1410 - 1411 - if (!c->ro_mount && c->space_fixup) { 1412 - err = ubifs_fixup_free_space(c); 1413 - if (err) 1414 - goto out_infos; 1415 - } 1416 1398 1417 1399 err = dbg_check_filesystem(c); 1418 1400 if (err)

+5 -4

fs/ubifs/tnc.c

··· 2876 2876 */ 2877 2877 void ubifs_tnc_close(struct ubifs_info *c) 2878 2878 { 2879 - long clean_freed; 2880 - 2881 2879 tnc_destroy_cnext(c); 2882 2880 if (c->zroot.znode) { 2883 - clean_freed = ubifs_destroy_tnc_subtree(c->zroot.znode); 2884 - atomic_long_sub(clean_freed, &ubifs_clean_zn_cnt); 2881 + long n; 2882 + 2883 + ubifs_destroy_tnc_subtree(c->zroot.znode); 2884 + n = atomic_long_read(&c->clean_zn_cnt); 2885 + atomic_long_sub(n, &ubifs_clean_zn_cnt); 2885 2886 } 2886 2887 kfree(c->gap_lebs); 2887 2888 kfree(c->ilebs);

+3 -1

fs/ubifs/ubifs.h

··· 722 722 * struct ubifs_jhead - journal head. 723 723 * @wbuf: head's write-buffer 724 724 * @buds_list: list of bud LEBs belonging to this journal head 725 + * @grouped: non-zero if UBIFS groups nodes when writing to this journal head 725 726 * 726 727 * Note, the @buds list is protected by the @c->buds_lock. 727 728 */ 728 729 struct ubifs_jhead { 729 730 struct ubifs_wbuf wbuf; 730 731 struct list_head buds_list; 732 + unsigned int grouped:1; 731 733 }; 732 734 733 735 /** ··· 1744 1742 int ubifs_recover_master_node(struct ubifs_info *c); 1745 1743 int ubifs_write_rcvrd_mst_node(struct ubifs_info *c); 1746 1744 struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, 1747 - int offs, void *sbuf, int grouped); 1745 + int offs, void *sbuf, int jhead); 1748 1746 struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, 1749 1747 int offs, void *sbuf); 1750 1748 int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf);

Configure Feed

Configure Feed