Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus-v3.7-rc5' of git://oss.sgi.com/xfs/xfs

Pull xfs bugfixes from Ben Myers:

- fix for large transactions spanning multiple iclog buffers

- zero the allocation_args structure on the stack before using it to
determine whether to use a worker for allocation
- move allocation stack switch to xfs_bmapi_allocate in order to
prevent deadlock on AGF buffers

- growfs no longer reads in garbage for new secondary superblocks

- silence a build warning

- ensure that invalid buffers never get written to disk while on free
list

- don't vmap inode cluster buffers during free

- fix buffer shutdown reference count mismatch

- fix reading of wrapped log data

* tag 'for-linus-v3.7-rc5' of git://oss.sgi.com/xfs/xfs:
xfs: fix reading of wrapped log data
xfs: fix buffer shudown reference count mismatch
xfs: don't vmap inode cluster buffers during free
xfs: invalidate allocbt blocks moved to the free list
xfs: silence uninitialised f.file warning.
xfs: growfs: don't read garbage for new secondary superblocks
xfs: move allocation stack switch up to xfs_bmapi_allocate
xfs: introduce XFS_BMAPI_STACK_SWITCH
xfs: zero allocation_args on the kernel stack
xfs: only update the last_sync_lsn when a transaction completes

+127 -63
+2 -41
fs/xfs/xfs_alloc.c
··· 1866 1866 /* 1867 1867 * Initialize the args structure. 1868 1868 */ 1869 + memset(&targs, 0, sizeof(targs)); 1869 1870 targs.tp = tp; 1870 1871 targs.mp = mp; 1871 1872 targs.agbp = agbp; ··· 2208 2207 * group or loop over the allocation groups to find the result. 2209 2208 */ 2210 2209 int /* error */ 2211 - __xfs_alloc_vextent( 2210 + xfs_alloc_vextent( 2212 2211 xfs_alloc_arg_t *args) /* allocation argument structure */ 2213 2212 { 2214 2213 xfs_agblock_t agsize; /* allocation group size */ ··· 2416 2415 error0: 2417 2416 xfs_perag_put(args->pag); 2418 2417 return error; 2419 - } 2420 - 2421 - static void 2422 - xfs_alloc_vextent_worker( 2423 - struct work_struct *work) 2424 - { 2425 - struct xfs_alloc_arg *args = container_of(work, 2426 - struct xfs_alloc_arg, work); 2427 - unsigned long pflags; 2428 - 2429 - /* we are in a transaction context here */ 2430 - current_set_flags_nested(&pflags, PF_FSTRANS); 2431 - 2432 - args->result = __xfs_alloc_vextent(args); 2433 - complete(args->done); 2434 - 2435 - current_restore_flags_nested(&pflags, PF_FSTRANS); 2436 - } 2437 - 2438 - /* 2439 - * Data allocation requests often come in with little stack to work on. Push 2440 - * them off to a worker thread so there is lots of stack to use. Metadata 2441 - * requests, OTOH, are generally from low stack usage paths, so avoid the 2442 - * context switch overhead here. 2443 - */ 2444 - int 2445 - xfs_alloc_vextent( 2446 - struct xfs_alloc_arg *args) 2447 - { 2448 - DECLARE_COMPLETION_ONSTACK(done); 2449 - 2450 - if (!args->userdata) 2451 - return __xfs_alloc_vextent(args); 2452 - 2453 - 2454 - args->done = &done; 2455 - INIT_WORK_ONSTACK(&args->work, xfs_alloc_vextent_worker); 2456 - queue_work(xfs_alloc_wq, &args->work); 2457 - wait_for_completion(&done); 2458 - return args->result; 2459 2418 } 2460 2419 2461 2420 /*
-3
fs/xfs/xfs_alloc.h
··· 120 120 char isfl; /* set if is freelist blocks - !acctg */ 121 121 char userdata; /* set if this is user data */ 122 122 xfs_fsblock_t firstblock; /* io first block allocated */ 123 - struct completion *done; 124 - struct work_struct work; 125 - int result; 126 123 } xfs_alloc_arg_t; 127 124 128 125 /*
+2
fs/xfs/xfs_alloc_btree.c
··· 121 121 xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1, 122 122 XFS_EXTENT_BUSY_SKIP_DISCARD); 123 123 xfs_trans_agbtree_delta(cur->bc_tp, -1); 124 + 125 + xfs_trans_binval(cur->bc_tp, bp); 124 126 return 0; 125 127 } 126 128
+54 -9
fs/xfs/xfs_bmap.c
··· 2437 2437 * Normal allocation, done through xfs_alloc_vextent. 2438 2438 */ 2439 2439 tryagain = isaligned = 0; 2440 + memset(&args, 0, sizeof(args)); 2440 2441 args.tp = ap->tp; 2441 2442 args.mp = mp; 2442 2443 args.fsbno = ap->blkno; ··· 3083 3082 * Convert to a btree with two levels, one record in root. 3084 3083 */ 3085 3084 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE); 3085 + memset(&args, 0, sizeof(args)); 3086 3086 args.tp = tp; 3087 3087 args.mp = mp; 3088 3088 args.firstblock = *firstblock; ··· 3239 3237 xfs_buf_t *bp; /* buffer for extent block */ 3240 3238 xfs_bmbt_rec_host_t *ep;/* extent record pointer */ 3241 3239 3240 + memset(&args, 0, sizeof(args)); 3242 3241 args.tp = tp; 3243 3242 args.mp = ip->i_mount; 3244 3243 args.firstblock = *firstblock; ··· 4619 4616 4620 4617 4621 4618 STATIC int 4622 - xfs_bmapi_allocate( 4623 - struct xfs_bmalloca *bma, 4624 - int flags) 4619 + __xfs_bmapi_allocate( 4620 + struct xfs_bmalloca *bma) 4625 4621 { 4626 4622 struct xfs_mount *mp = bma->ip->i_mount; 4627 - int whichfork = (flags & XFS_BMAPI_ATTRFORK) ? 4623 + int whichfork = (bma->flags & XFS_BMAPI_ATTRFORK) ? 4628 4624 XFS_ATTR_FORK : XFS_DATA_FORK; 4629 4625 struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork); 4630 4626 int tmp_logflags = 0; ··· 4656 4654 * Indicate if this is the first user data in the file, or just any 4657 4655 * user data. 4658 4656 */ 4659 - if (!(flags & XFS_BMAPI_METADATA)) { 4657 + if (!(bma->flags & XFS_BMAPI_METADATA)) { 4660 4658 bma->userdata = (bma->offset == 0) ? 4661 4659 XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA; 4662 4660 } 4663 4661 4664 - bma->minlen = (flags & XFS_BMAPI_CONTIG) ? bma->length : 1; 4662 + bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1; 4665 4663 4666 4664 /* 4667 4665 * Only want to do the alignment at the eof if it is userdata and 4668 4666 * allocation length is larger than a stripe unit. 4669 4667 */ 4670 4668 if (mp->m_dalign && bma->length >= mp->m_dalign && 4671 - !(flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) { 4669 + !(bma->flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) { 4672 4670 error = xfs_bmap_isaeof(bma, whichfork); 4673 4671 if (error) 4674 4672 return error; 4675 4673 } 4674 + 4675 + if (bma->flags & XFS_BMAPI_STACK_SWITCH) 4676 + bma->stack_switch = 1; 4676 4677 4677 4678 error = xfs_bmap_alloc(bma); 4678 4679 if (error) ··· 4711 4706 * A wasdelay extent has been initialized, so shouldn't be flagged 4712 4707 * as unwritten. 4713 4708 */ 4714 - if (!bma->wasdel && (flags & XFS_BMAPI_PREALLOC) && 4709 + if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) && 4715 4710 xfs_sb_version_hasextflgbit(&mp->m_sb)) 4716 4711 bma->got.br_state = XFS_EXT_UNWRITTEN; 4717 4712 ··· 4737 4732 ASSERT(bma->got.br_state == XFS_EXT_NORM || 4738 4733 bma->got.br_state == XFS_EXT_UNWRITTEN); 4739 4734 return 0; 4735 + } 4736 + 4737 + static void 4738 + xfs_bmapi_allocate_worker( 4739 + struct work_struct *work) 4740 + { 4741 + struct xfs_bmalloca *args = container_of(work, 4742 + struct xfs_bmalloca, work); 4743 + unsigned long pflags; 4744 + 4745 + /* we are in a transaction context here */ 4746 + current_set_flags_nested(&pflags, PF_FSTRANS); 4747 + 4748 + args->result = __xfs_bmapi_allocate(args); 4749 + complete(args->done); 4750 + 4751 + current_restore_flags_nested(&pflags, PF_FSTRANS); 4752 + } 4753 + 4754 + /* 4755 + * Some allocation requests often come in with little stack to work on. Push 4756 + * them off to a worker thread so there is lots of stack to use. Otherwise just 4757 + * call directly to avoid the context switch overhead here. 4758 + */ 4759 + int 4760 + xfs_bmapi_allocate( 4761 + struct xfs_bmalloca *args) 4762 + { 4763 + DECLARE_COMPLETION_ONSTACK(done); 4764 + 4765 + if (!args->stack_switch) 4766 + return __xfs_bmapi_allocate(args); 4767 + 4768 + 4769 + args->done = &done; 4770 + INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker); 4771 + queue_work(xfs_alloc_wq, &args->work); 4772 + wait_for_completion(&done); 4773 + return args->result; 4740 4774 } 4741 4775 4742 4776 STATIC int ··· 4963 4919 bma.conv = !!(flags & XFS_BMAPI_CONVERT); 4964 4920 bma.wasdel = wasdelay; 4965 4921 bma.offset = bno; 4922 + bma.flags = flags; 4966 4923 4967 4924 /* 4968 4925 * There's a 32/64 bit type mismatch between the ··· 4979 4934 4980 4935 ASSERT(len > 0); 4981 4936 ASSERT(bma.length > 0); 4982 - error = xfs_bmapi_allocate(&bma, flags); 4937 + error = xfs_bmapi_allocate(&bma); 4983 4938 if (error) 4984 4939 goto error0; 4985 4940 if (bma.blkno == NULLFSBLOCK)
+8 -1
fs/xfs/xfs_bmap.h
··· 77 77 * from written to unwritten, otherwise convert from unwritten to written. 78 78 */ 79 79 #define XFS_BMAPI_CONVERT 0x040 80 + #define XFS_BMAPI_STACK_SWITCH 0x080 80 81 81 82 #define XFS_BMAPI_FLAGS \ 82 83 { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ ··· 86 85 { XFS_BMAPI_PREALLOC, "PREALLOC" }, \ 87 86 { XFS_BMAPI_IGSTATE, "IGSTATE" }, \ 88 87 { XFS_BMAPI_CONTIG, "CONTIG" }, \ 89 - { XFS_BMAPI_CONVERT, "CONVERT" } 88 + { XFS_BMAPI_CONVERT, "CONVERT" }, \ 89 + { XFS_BMAPI_STACK_SWITCH, "STACK_SWITCH" } 90 90 91 91 92 92 static inline int xfs_bmapi_aflag(int w) ··· 135 133 char userdata;/* set if is user data */ 136 134 char aeof; /* allocated space at eof */ 137 135 char conv; /* overwriting unwritten extents */ 136 + char stack_switch; 137 + int flags; 138 + struct completion *done; 139 + struct work_struct work; 140 + int result; 138 141 } xfs_bmalloca_t; 139 142 140 143 /*
+18
fs/xfs/xfs_buf_item.c
··· 526 526 } 527 527 xfs_buf_relse(bp); 528 528 } else if (freed && remove) { 529 + /* 530 + * There are currently two references to the buffer - the active 531 + * LRU reference and the buf log item. What we are about to do 532 + * here - simulate a failed IO completion - requires 3 533 + * references. 534 + * 535 + * The LRU reference is removed by the xfs_buf_stale() call. The 536 + * buf item reference is removed by the xfs_buf_iodone() 537 + * callback that is run by xfs_buf_do_callbacks() during ioend 538 + * processing (via the bp->b_iodone callback), and then finally 539 + * the ioend processing will drop the IO reference if the buffer 540 + * is marked XBF_ASYNC. 541 + * 542 + * Hence we need to take an additional reference here so that IO 543 + * completion processing doesn't free the buffer prematurely. 544 + */ 529 545 xfs_buf_lock(bp); 546 + xfs_buf_hold(bp); 547 + bp->b_flags |= XBF_ASYNC; 530 548 xfs_buf_ioerror(bp, EIO); 531 549 XFS_BUF_UNDONE(bp); 532 550 xfs_buf_stale(bp);
+19 -2
fs/xfs/xfs_fsops.c
··· 399 399 400 400 /* update secondary superblocks. */ 401 401 for (agno = 1; agno < nagcount; agno++) { 402 - error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, 402 + error = 0; 403 + /* 404 + * new secondary superblocks need to be zeroed, not read from 405 + * disk as the contents of the new area we are growing into is 406 + * completely unknown. 407 + */ 408 + if (agno < oagcount) { 409 + error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, 403 410 XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)), 404 411 XFS_FSS_TO_BB(mp, 1), 0, &bp); 412 + } else { 413 + bp = xfs_trans_get_buf(NULL, mp->m_ddev_targp, 414 + XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)), 415 + XFS_FSS_TO_BB(mp, 1), 0); 416 + if (bp) 417 + xfs_buf_zero(bp, 0, BBTOB(bp->b_length)); 418 + else 419 + error = ENOMEM; 420 + } 421 + 405 422 if (error) { 406 423 xfs_warn(mp, 407 424 "error %d reading secondary superblock for ag %d", ··· 440 423 break; /* no point in continuing */ 441 424 } 442 425 } 443 - return 0; 426 + return error; 444 427 445 428 error0: 446 429 xfs_trans_cancel(tp, XFS_TRANS_ABORT);
+1
fs/xfs/xfs_ialloc.c
··· 250 250 /* boundary */ 251 251 struct xfs_perag *pag; 252 252 253 + memset(&args, 0, sizeof(args)); 253 254 args.tp = tp; 254 255 args.mp = tp->t_mountp; 255 256
+2 -1
fs/xfs/xfs_inode.c
··· 1509 1509 * to mark all the active inodes on the buffer stale. 1510 1510 */ 1511 1511 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, 1512 - mp->m_bsize * blks_per_cluster, 0); 1512 + mp->m_bsize * blks_per_cluster, 1513 + XBF_UNMAPPED); 1513 1514 1514 1515 if (!bp) 1515 1516 return ENOMEM;
+1 -1
fs/xfs/xfs_ioctl.c
··· 70 70 int hsize; 71 71 xfs_handle_t handle; 72 72 struct inode *inode; 73 - struct fd f; 73 + struct fd f = {0}; 74 74 struct path path; 75 75 int error; 76 76 struct xfs_inode *ip;
+3 -1
fs/xfs/xfs_iomap.c
··· 584 584 * pointer that the caller gave to us. 585 585 */ 586 586 error = xfs_bmapi_write(tp, ip, map_start_fsb, 587 - count_fsb, 0, &first_block, 1, 587 + count_fsb, 588 + XFS_BMAPI_STACK_SWITCH, 589 + &first_block, 1, 588 590 imap, &nimaps, &free_list); 589 591 if (error) 590 592 goto trans_cancel;
+16 -3
fs/xfs/xfs_log.c
··· 2387 2387 2388 2388 2389 2389 /* 2390 - * update the last_sync_lsn before we drop the 2390 + * Completion of a iclog IO does not imply that 2391 + * a transaction has completed, as transactions 2392 + * can be large enough to span many iclogs. We 2393 + * cannot change the tail of the log half way 2394 + * through a transaction as this may be the only 2395 + * transaction in the log and moving th etail to 2396 + * point to the middle of it will prevent 2397 + * recovery from finding the start of the 2398 + * transaction. Hence we should only update the 2399 + * last_sync_lsn if this iclog contains 2400 + * transaction completion callbacks on it. 2401 + * 2402 + * We have to do this before we drop the 2391 2403 * icloglock to ensure we are the only one that 2392 2404 * can update it. 2393 2405 */ 2394 2406 ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn), 2395 2407 be64_to_cpu(iclog->ic_header.h_lsn)) <= 0); 2396 - atomic64_set(&log->l_last_sync_lsn, 2397 - be64_to_cpu(iclog->ic_header.h_lsn)); 2408 + if (iclog->ic_callback) 2409 + atomic64_set(&log->l_last_sync_lsn, 2410 + be64_to_cpu(iclog->ic_header.h_lsn)); 2398 2411 2399 2412 } else 2400 2413 ioerrors++;
+1 -1
fs/xfs/xfs_log_recover.c
··· 3541 3541 * - order is important. 3542 3542 */ 3543 3543 error = xlog_bread_offset(log, 0, 3544 - bblks - split_bblks, hbp, 3544 + bblks - split_bblks, dbp, 3545 3545 offset + BBTOB(split_bblks)); 3546 3546 if (error) 3547 3547 goto bread_err2;