Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs

* 'for-linus' of git://oss.sgi.com/xfs/xfs:
xfs: xfs_bmap_add_extent_delay_real should init br_startblock
xfs: fix dquot shaker deadlock
xfs: handle CIl transaction commit failures correctly
xfs: limit extsize to size of AGs and/or MAXEXTLEN
xfs: prevent extsize alignment from exceeding maximum extent size
xfs: limit extent length for allocation to AG size
xfs: speculative delayed allocation uses rounddown_power_of_2 badly
xfs: fix efi item leak on forced shutdown
xfs: fix log ticket leak on forced shutdown.

+152 -71
+18 -2
fs/xfs/linux-2.6/xfs_ioctl.c
··· 985 985 986 986 /* 987 987 * Extent size must be a multiple of the appropriate block 988 - * size, if set at all. 988 + * size, if set at all. It must also be smaller than the 989 + * maximum extent size supported by the filesystem. 990 + * 991 + * Also, for non-realtime files, limit the extent size hint to 992 + * half the size of the AGs in the filesystem so alignment 993 + * doesn't result in extents larger than an AG. 989 994 */ 990 995 if (fa->fsx_extsize != 0) { 991 - xfs_extlen_t size; 996 + xfs_extlen_t size; 997 + xfs_fsblock_t extsize_fsb; 998 + 999 + extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize); 1000 + if (extsize_fsb > MAXEXTLEN) { 1001 + code = XFS_ERROR(EINVAL); 1002 + goto error_return; 1003 + } 992 1004 993 1005 if (XFS_IS_REALTIME_INODE(ip) || 994 1006 ((mask & FSX_XFLAGS) && ··· 1009 997 mp->m_sb.sb_blocklog; 1010 998 } else { 1011 999 size = mp->m_sb.sb_blocksize; 1000 + if (extsize_fsb > mp->m_sb.sb_agblocks / 2) { 1001 + code = XFS_ERROR(EINVAL); 1002 + goto error_return; 1003 + } 1012 1004 } 1013 1005 1014 1006 if (fa->fsx_extsize % size) {
+21 -25
fs/xfs/quota/xfs_qm.c
··· 1863 1863 xfs_dquot_t *dqpout; 1864 1864 xfs_dquot_t *dqp; 1865 1865 int restarts; 1866 + int startagain; 1866 1867 1867 1868 restarts = 0; 1868 1869 dqpout = NULL; 1869 1870 1870 1871 /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */ 1871 - startagain: 1872 + again: 1873 + startagain = 0; 1872 1874 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); 1873 1875 1874 1876 list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) { ··· 1887 1885 ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE)); 1888 1886 1889 1887 trace_xfs_dqreclaim_want(dqp); 1890 - 1891 - xfs_dqunlock(dqp); 1892 - mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); 1893 - if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) 1894 - return NULL; 1895 1888 XQM_STATS_INC(xqmstats.xs_qm_dqwants); 1896 - goto startagain; 1889 + restarts++; 1890 + startagain = 1; 1891 + goto dqunlock; 1897 1892 } 1898 1893 1899 1894 /* ··· 1905 1906 ASSERT(list_empty(&dqp->q_mplist)); 1906 1907 list_del_init(&dqp->q_freelist); 1907 1908 xfs_Gqm->qm_dqfrlist_cnt--; 1908 - xfs_dqunlock(dqp); 1909 1909 dqpout = dqp; 1910 1910 XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims); 1911 - break; 1911 + goto dqunlock; 1912 1912 } 1913 1913 1914 1914 ASSERT(dqp->q_hash); 1915 1915 ASSERT(!list_empty(&dqp->q_mplist)); 1916 1916 1917 1917 /* 1918 - * Try to grab the flush lock. If this dquot is in the process of 1919 - * getting flushed to disk, we don't want to reclaim it. 1918 + * Try to grab the flush lock. If this dquot is in the process 1919 + * of getting flushed to disk, we don't want to reclaim it. 1920 1920 */ 1921 - if (!xfs_dqflock_nowait(dqp)) { 1922 - xfs_dqunlock(dqp); 1923 - continue; 1924 - } 1921 + if (!xfs_dqflock_nowait(dqp)) 1922 + goto dqunlock; 1925 1923 1926 1924 /* 1927 1925 * We have the flush lock so we know that this is not in the ··· 1940 1944 xfs_fs_cmn_err(CE_WARN, mp, 1941 1945 "xfs_qm_dqreclaim: dquot %p flush failed", dqp); 1942 1946 } 1943 - xfs_dqunlock(dqp); /* dqflush unlocks dqflock */ 1944 - continue; 1947 + goto dqunlock; 1945 1948 } 1946 1949 1947 1950 /* ··· 1962 1967 */ 1963 1968 if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) { 1964 1969 restarts++; 1965 - mutex_unlock(&dqp->q_hash->qh_lock); 1966 - xfs_dqfunlock(dqp); 1967 - xfs_dqunlock(dqp); 1968 - mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); 1969 - if (restarts++ >= XFS_QM_RECLAIM_MAX_RESTARTS) 1970 - return NULL; 1971 - goto startagain; 1970 + startagain = 1; 1971 + goto qhunlock; 1972 1972 } 1973 1973 1974 1974 ASSERT(dqp->q_nrefs == 0); ··· 1976 1986 xfs_Gqm->qm_dqfrlist_cnt--; 1977 1987 dqpout = dqp; 1978 1988 mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); 1989 + qhunlock: 1979 1990 mutex_unlock(&dqp->q_hash->qh_lock); 1980 1991 dqfunlock: 1981 1992 xfs_dqfunlock(dqp); 1993 + dqunlock: 1982 1994 xfs_dqunlock(dqp); 1983 1995 if (dqpout) 1984 1996 break; 1985 1997 if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) 1986 - return NULL; 1998 + break; 1999 + if (startagain) { 2000 + mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); 2001 + goto again; 2002 + } 1987 2003 } 1988 2004 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); 1989 2005 return dqpout;
+16
fs/xfs/xfs_alloc.h
··· 75 75 #define XFS_ALLOC_SET_ASIDE(mp) (4 + ((mp)->m_sb.sb_agcount * 4)) 76 76 77 77 /* 78 + * When deciding how much space to allocate out of an AG, we limit the 79 + * allocation maximum size to the size the AG. However, we cannot use all the 80 + * blocks in the AG - some are permanently used by metadata. These 81 + * blocks are generally: 82 + * - the AG superblock, AGF, AGI and AGFL 83 + * - the AGF (bno and cnt) and AGI btree root blocks 84 + * - 4 blocks on the AGFL according to XFS_ALLOC_SET_ASIDE() limits 85 + * 86 + * The AG headers are sector sized, so the amount of space they take up is 87 + * dependent on filesystem geometry. The others are all single blocks. 88 + */ 89 + #define XFS_ALLOC_AG_MAX_USABLE(mp) \ 90 + ((mp)->m_sb.sb_agblocks - XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)) - 7) 91 + 92 + 93 + /* 78 94 * Argument structure for xfs_alloc routines. 79 95 * This is turned into a structure to avoid having 20 arguments passed 80 96 * down several levels of the stack.
+45 -16
fs/xfs/xfs_bmap.c
··· 1038 1038 * Filling in the middle part of a previous delayed allocation. 1039 1039 * Contiguity is impossible here. 1040 1040 * This case is avoided almost all the time. 1041 + * 1042 + * We start with a delayed allocation: 1043 + * 1044 + * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+ 1045 + * PREV @ idx 1046 + * 1047 + * and we are allocating: 1048 + * +rrrrrrrrrrrrrrrrr+ 1049 + * new 1050 + * 1051 + * and we set it up for insertion as: 1052 + * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+ 1053 + * new 1054 + * PREV @ idx LEFT RIGHT 1055 + * inserted at idx + 1 1041 1056 */ 1042 1057 temp = new->br_startoff - PREV.br_startoff; 1043 - trace_xfs_bmap_pre_update(ip, idx, 0, _THIS_IP_); 1044 - xfs_bmbt_set_blockcount(ep, temp); 1045 - r[0] = *new; 1046 - r[1].br_state = PREV.br_state; 1047 - r[1].br_startblock = 0; 1048 - r[1].br_startoff = new_endoff; 1049 1058 temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff; 1050 - r[1].br_blockcount = temp2; 1051 - xfs_iext_insert(ip, idx + 1, 2, &r[0], state); 1059 + trace_xfs_bmap_pre_update(ip, idx, 0, _THIS_IP_); 1060 + xfs_bmbt_set_blockcount(ep, temp); /* truncate PREV */ 1061 + LEFT = *new; 1062 + RIGHT.br_state = PREV.br_state; 1063 + RIGHT.br_startblock = nullstartblock( 1064 + (int)xfs_bmap_worst_indlen(ip, temp2)); 1065 + RIGHT.br_startoff = new_endoff; 1066 + RIGHT.br_blockcount = temp2; 1067 + /* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */ 1068 + xfs_iext_insert(ip, idx + 1, 2, &LEFT, state); 1052 1069 ip->i_df.if_lastex = idx + 1; 1053 1070 ip->i_d.di_nextents++; 1054 1071 if (cur == NULL) ··· 2447 2430 startag = ag = 0; 2448 2431 2449 2432 pag = xfs_perag_get(mp, ag); 2450 - while (*blen < ap->alen) { 2433 + while (*blen < args->maxlen) { 2451 2434 if (!pag->pagf_init) { 2452 2435 error = xfs_alloc_pagf_init(mp, args->tp, ag, 2453 2436 XFS_ALLOC_FLAG_TRYLOCK); ··· 2469 2452 notinit = 1; 2470 2453 2471 2454 if (xfs_inode_is_filestream(ap->ip)) { 2472 - if (*blen >= ap->alen) 2455 + if (*blen >= args->maxlen) 2473 2456 break; 2474 2457 2475 2458 if (ap->userdata) { ··· 2515 2498 * If the best seen length is less than the request 2516 2499 * length, use the best as the minimum. 2517 2500 */ 2518 - else if (*blen < ap->alen) 2501 + else if (*blen < args->maxlen) 2519 2502 args->minlen = *blen; 2520 2503 /* 2521 - * Otherwise we've seen an extent as big as alen, 2504 + * Otherwise we've seen an extent as big as maxlen, 2522 2505 * use that as the minimum. 2523 2506 */ 2524 2507 else 2525 - args->minlen = ap->alen; 2508 + args->minlen = args->maxlen; 2526 2509 2527 2510 /* 2528 2511 * set the failure fallback case to look in the selected ··· 2590 2573 args.tp = ap->tp; 2591 2574 args.mp = mp; 2592 2575 args.fsbno = ap->rval; 2593 - args.maxlen = MIN(ap->alen, mp->m_sb.sb_agblocks); 2576 + 2577 + /* Trim the allocation back to the maximum an AG can fit. */ 2578 + args.maxlen = MIN(ap->alen, XFS_ALLOC_AG_MAX_USABLE(mp)); 2594 2579 args.firstblock = ap->firstblock; 2595 2580 blen = 0; 2596 2581 if (nullfb) { ··· 2640 2621 /* 2641 2622 * Adjust for alignment 2642 2623 */ 2643 - if (blen > args.alignment && blen <= ap->alen) 2624 + if (blen > args.alignment && blen <= args.maxlen) 2644 2625 args.minlen = blen - args.alignment; 2645 2626 args.minalignslop = 0; 2646 2627 } else { ··· 2659 2640 * of minlen+alignment+slop doesn't go up 2660 2641 * between the calls. 2661 2642 */ 2662 - if (blen > mp->m_dalign && blen <= ap->alen) 2643 + if (blen > mp->m_dalign && blen <= args.maxlen) 2663 2644 nextminlen = blen - mp->m_dalign; 2664 2645 else 2665 2646 nextminlen = args.minlen; ··· 4504 4485 /* Figure out the extent size, adjust alen */ 4505 4486 extsz = xfs_get_extsz_hint(ip); 4506 4487 if (extsz) { 4488 + /* 4489 + * make sure we don't exceed a single 4490 + * extent length when we align the 4491 + * extent by reducing length we are 4492 + * going to allocate by the maximum 4493 + * amount extent size aligment may 4494 + * require. 4495 + */ 4496 + alen = XFS_FILBLKS_MIN(len, 4497 + MAXEXTLEN - (2 * extsz - 1)); 4507 4498 error = xfs_bmap_extsize_align(mp, 4508 4499 &got, &prev, extsz, 4509 4500 rt, eof,
+7 -5
fs/xfs/xfs_buf_item.c
··· 427 427 428 428 if (remove) { 429 429 /* 430 - * We have to remove the log item from the transaction 431 - * as we are about to release our reference to the 432 - * buffer. If we don't, the unlock that occurs later 433 - * in xfs_trans_uncommit() will ry to reference the 430 + * If we are in a transaction context, we have to 431 + * remove the log item from the transaction as we are 432 + * about to release our reference to the buffer. If we 433 + * don't, the unlock that occurs later in 434 + * xfs_trans_uncommit() will try to reference the 434 435 * buffer which we no longer have a hold on. 435 436 */ 436 - xfs_trans_del_item(lip); 437 + if (lip->li_desc) 438 + xfs_trans_del_item(lip); 437 439 438 440 /* 439 441 * Since the transaction no longer refers to the buffer,
+2 -1
fs/xfs/xfs_extfree_item.c
··· 138 138 139 139 if (remove) { 140 140 ASSERT(!(lip->li_flags & XFS_LI_IN_AIL)); 141 - xfs_trans_del_item(lip); 141 + if (lip->li_desc) 142 + xfs_trans_del_item(lip); 142 143 xfs_efi_item_free(efip); 143 144 return; 144 145 }
+6 -1
fs/xfs/xfs_iomap.c
··· 337 337 int shift = 0; 338 338 int64_t freesp; 339 339 340 - alloc_blocks = XFS_B_TO_FSB(mp, ip->i_size); 340 + /* 341 + * rounddown_pow_of_two() returns an undefined result 342 + * if we pass in alloc_blocks = 0. Hence the "+ 1" to 343 + * ensure we always pass in a non-zero value. 344 + */ 345 + alloc_blocks = XFS_B_TO_FSB(mp, ip->i_size) + 1; 341 346 alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN, 342 347 rounddown_pow_of_two(alloc_blocks)); 343 348
+1 -1
fs/xfs/xfs_log.h
··· 191 191 192 192 xlog_tid_t xfs_log_get_trans_ident(struct xfs_trans *tp); 193 193 194 - int xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, 194 + void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, 195 195 struct xfs_log_vec *log_vector, 196 196 xfs_lsn_t *commit_lsn, int flags); 197 197 bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
+6 -9
fs/xfs/xfs_log_cil.c
··· 543 543 544 544 error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL, 0); 545 545 if (error) 546 - goto out_abort; 546 + goto out_abort_free_ticket; 547 547 548 548 /* 549 549 * now that we've written the checkpoint into the log, strictly ··· 569 569 } 570 570 spin_unlock(&cil->xc_cil_lock); 571 571 572 + /* xfs_log_done always frees the ticket on error. */ 572 573 commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, 0); 573 - if (error || commit_lsn == -1) 574 + if (commit_lsn == -1) 574 575 goto out_abort; 575 576 576 577 /* attach all the transactions w/ busy extents to iclog */ ··· 601 600 kmem_free(new_ctx); 602 601 return 0; 603 602 603 + out_abort_free_ticket: 604 + xfs_log_ticket_put(tic); 604 605 out_abort: 605 606 xlog_cil_committed(ctx, XFS_LI_ABORTED); 606 607 return XFS_ERROR(EIO); ··· 625 622 * background commit, returns without it held once background commits are 626 623 * allowed again. 627 624 */ 628 - int 625 + void 629 626 xfs_log_commit_cil( 630 627 struct xfs_mount *mp, 631 628 struct xfs_trans *tp, ··· 639 636 640 637 if (flags & XFS_TRANS_RELEASE_LOG_RES) 641 638 log_flags = XFS_LOG_REL_PERM_RESERV; 642 - 643 - if (XLOG_FORCED_SHUTDOWN(log)) { 644 - xlog_cil_free_logvec(log_vector); 645 - return XFS_ERROR(EIO); 646 - } 647 639 648 640 /* 649 641 * do all the hard work of formatting items (including memory ··· 699 701 */ 700 702 if (push) 701 703 xlog_cil_push(log, 0); 702 - return 0; 703 704 } 704 705 705 706 /*
+30 -11
fs/xfs/xfs_trans.c
··· 1446 1446 * Bulk operation version of xfs_trans_committed that takes a log vector of 1447 1447 * items to insert into the AIL. This uses bulk AIL insertion techniques to 1448 1448 * minimise lock traffic. 1449 + * 1450 + * If we are called with the aborted flag set, it is because a log write during 1451 + * a CIL checkpoint commit has failed. In this case, all the items in the 1452 + * checkpoint have already gone through IOP_COMMITED and IOP_UNLOCK, which 1453 + * means that checkpoint commit abort handling is treated exactly the same 1454 + * as an iclog write error even though we haven't started any IO yet. Hence in 1455 + * this case all we need to do is IOP_COMMITTED processing, followed by an 1456 + * IOP_UNPIN(aborted) call. 1449 1457 */ 1450 1458 void 1451 1459 xfs_trans_committed_bulk( ··· 1479 1471 /* item_lsn of -1 means the item was freed */ 1480 1472 if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) 1481 1473 continue; 1474 + 1475 + /* 1476 + * if we are aborting the operation, no point in inserting the 1477 + * object into the AIL as we are in a shutdown situation. 1478 + */ 1479 + if (aborted) { 1480 + ASSERT(XFS_FORCED_SHUTDOWN(ailp->xa_mount)); 1481 + IOP_UNPIN(lip, 1); 1482 + continue; 1483 + } 1482 1484 1483 1485 if (item_lsn != commit_lsn) { 1484 1486 ··· 1521 1503 } 1522 1504 1523 1505 /* 1524 - * Called from the trans_commit code when we notice that 1525 - * the filesystem is in the middle of a forced shutdown. 1506 + * Called from the trans_commit code when we notice that the filesystem is in 1507 + * the middle of a forced shutdown. 1508 + * 1509 + * When we are called here, we have already pinned all the items in the 1510 + * transaction. However, neither IOP_COMMITTING or IOP_UNLOCK has been called 1511 + * so we can simply walk the items in the transaction, unpin them with an abort 1512 + * flag and then free the items. Note that unpinning the items can result in 1513 + * them being freed immediately, so we need to use a safe list traversal method 1514 + * here. 1526 1515 */ 1527 1516 STATIC void 1528 1517 xfs_trans_uncommit( 1529 1518 struct xfs_trans *tp, 1530 1519 uint flags) 1531 1520 { 1532 - struct xfs_log_item_desc *lidp; 1521 + struct xfs_log_item_desc *lidp, *n; 1533 1522 1534 - list_for_each_entry(lidp, &tp->t_items, lid_trans) { 1535 - /* 1536 - * Unpin all but those that aren't dirty. 1537 - */ 1523 + list_for_each_entry_safe(lidp, n, &tp->t_items, lid_trans) { 1538 1524 if (lidp->lid_flags & XFS_LID_DIRTY) 1539 1525 IOP_UNPIN(lidp->lid_item, 1); 1540 1526 } ··· 1755 1733 int flags) 1756 1734 { 1757 1735 struct xfs_log_vec *log_vector; 1758 - int error; 1759 1736 1760 1737 /* 1761 1738 * Get each log item to allocate a vector structure for ··· 1765 1744 if (!log_vector) 1766 1745 return ENOMEM; 1767 1746 1768 - error = xfs_log_commit_cil(mp, tp, log_vector, commit_lsn, flags); 1769 - if (error) 1770 - return error; 1747 + xfs_log_commit_cil(mp, tp, log_vector, commit_lsn, flags); 1771 1748 1772 1749 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); 1773 1750 xfs_trans_free(tp);