Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'xfs-for-linus-4.10-rc6-5' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs uodates from Darrick Wong:
"I have some more fixes this week: better input validation, corruption
avoidance, build fixes, memory leak fixes, and a couple from Christoph
to avoid an ENOSPC failure.

Summary:
- Fix race conditions in the CoW code
- Fix some incorrect input validation checks
- Avoid crashing fs by running out of space when freeing inodes
- Fix toctou race wrt whether or not an inode has an attr
- Fix build error on arm
- Fix page refcount corruption when readahead fails
- Don't corrupt userspace in the bmap ioctl"

* tag 'xfs-for-linus-4.10-rc6-5' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
xfs: prevent quotacheck from overloading inode lru
xfs: fix bmv_count confusion w/ shared extents
xfs: clear _XBF_PAGES from buffers when readahead page
xfs: extsize hints are not unlikely in xfs_bmap_btalloc
xfs: remove racy hasattr check from attr ops
xfs: use per-AG reservations for the finobt
xfs: only update mount/resv fields on success in __xfs_ag_resv_init
xfs: verify dirblocklog correctly
xfs: fix COW writeback race

+220 -63
+55 -15
fs/xfs/libxfs/xfs_ag_resv.c
··· 39 39 #include "xfs_rmap_btree.h" 40 40 #include "xfs_btree.h" 41 41 #include "xfs_refcount_btree.h" 42 + #include "xfs_ialloc_btree.h" 42 43 43 44 /* 44 45 * Per-AG Block Reservations ··· 201 200 struct xfs_mount *mp = pag->pag_mount; 202 201 struct xfs_ag_resv *resv; 203 202 int error; 203 + xfs_extlen_t reserved; 204 204 205 - resv = xfs_perag_resv(pag, type); 206 205 if (used > ask) 207 206 ask = used; 208 - resv->ar_asked = ask; 209 - resv->ar_reserved = resv->ar_orig_reserved = ask - used; 210 - mp->m_ag_max_usable -= ask; 207 + reserved = ask - used; 211 208 212 - trace_xfs_ag_resv_init(pag, type, ask); 213 - 214 - error = xfs_mod_fdblocks(mp, -(int64_t)resv->ar_reserved, true); 215 - if (error) 209 + error = xfs_mod_fdblocks(mp, -(int64_t)reserved, true); 210 + if (error) { 216 211 trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno, 217 212 error, _RET_IP_); 213 + xfs_warn(mp, 214 + "Per-AG reservation for AG %u failed. Filesystem may run out of space.", 215 + pag->pag_agno); 216 + return error; 217 + } 218 218 219 - return error; 219 + mp->m_ag_max_usable -= ask; 220 + 221 + resv = xfs_perag_resv(pag, type); 222 + resv->ar_asked = ask; 223 + resv->ar_reserved = resv->ar_orig_reserved = reserved; 224 + 225 + trace_xfs_ag_resv_init(pag, type, ask); 226 + return 0; 220 227 } 221 228 222 229 /* Create a per-AG block reservation. */ ··· 232 223 xfs_ag_resv_init( 233 224 struct xfs_perag *pag) 234 225 { 226 + struct xfs_mount *mp = pag->pag_mount; 227 + xfs_agnumber_t agno = pag->pag_agno; 235 228 xfs_extlen_t ask; 236 229 xfs_extlen_t used; 237 230 int error = 0; ··· 242 231 if (pag->pag_meta_resv.ar_asked == 0) { 243 232 ask = used = 0; 244 233 245 - error = xfs_refcountbt_calc_reserves(pag->pag_mount, 246 - pag->pag_agno, &ask, &used); 234 + error = xfs_refcountbt_calc_reserves(mp, agno, &ask, &used); 235 + if (error) 236 + goto out; 237 + 238 + error = xfs_finobt_calc_reserves(mp, agno, &ask, &used); 247 239 if (error) 248 240 goto out; 249 241 250 242 error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA, 251 243 ask, used); 252 - if (error) 253 - goto out; 244 + if (error) { 245 + /* 246 + * Because we didn't have per-AG reservations when the 247 + * finobt feature was added we might not be able to 248 + * reserve all needed blocks. Warn and fall back to the 249 + * old and potentially buggy code in that case, but 250 + * ensure we do have the reservation for the refcountbt. 251 + */ 252 + ask = used = 0; 253 + 254 + mp->m_inotbt_nores = true; 255 + 256 + error = xfs_refcountbt_calc_reserves(mp, agno, &ask, 257 + &used); 258 + if (error) 259 + goto out; 260 + 261 + error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA, 262 + ask, used); 263 + if (error) 264 + goto out; 265 + } 254 266 } 255 267 256 268 /* Create the AGFL metadata reservation */ 257 269 if (pag->pag_agfl_resv.ar_asked == 0) { 258 270 ask = used = 0; 259 271 260 - error = xfs_rmapbt_calc_reserves(pag->pag_mount, pag->pag_agno, 261 - &ask, &used); 272 + error = xfs_rmapbt_calc_reserves(mp, agno, &ask, &used); 262 273 if (error) 263 274 goto out; 264 275 ··· 289 256 goto out; 290 257 } 291 258 259 + #ifdef DEBUG 260 + /* need to read in the AGF for the ASSERT below to work */ 261 + error = xfs_alloc_pagf_init(pag->pag_mount, NULL, pag->pag_agno, 0); 262 + if (error) 263 + return error; 264 + 292 265 ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved + 293 266 xfs_perag_resv(pag, XFS_AG_RESV_AGFL)->ar_reserved <= 294 267 pag->pagf_freeblks + pag->pagf_flcount); 268 + #endif 295 269 out: 296 270 return error; 297 271 }
-6
fs/xfs/libxfs/xfs_attr.c
··· 131 131 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 132 132 return -EIO; 133 133 134 - if (!xfs_inode_hasattr(ip)) 135 - return -ENOATTR; 136 - 137 134 error = xfs_attr_args_init(&args, ip, name, flags); 138 135 if (error) 139 136 return error; ··· 388 391 389 392 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 390 393 return -EIO; 391 - 392 - if (!xfs_inode_hasattr(dp)) 393 - return -ENOATTR; 394 394 395 395 error = xfs_attr_args_init(&args, dp, name, flags); 396 396 if (error)
+34 -14
fs/xfs/libxfs/xfs_bmap.c
··· 3629 3629 align = xfs_get_cowextsz_hint(ap->ip); 3630 3630 else if (xfs_alloc_is_userdata(ap->datatype)) 3631 3631 align = xfs_get_extsz_hint(ap->ip); 3632 - if (unlikely(align)) { 3632 + if (align) { 3633 3633 error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, 3634 3634 align, 0, ap->eof, 0, ap->conv, 3635 3635 &ap->offset, &ap->length); ··· 3701 3701 args.minlen = ap->minlen; 3702 3702 } 3703 3703 /* apply extent size hints if obtained earlier */ 3704 - if (unlikely(align)) { 3704 + if (align) { 3705 3705 args.prod = align; 3706 3706 if ((args.mod = (xfs_extlen_t)do_mod(ap->offset, args.prod))) 3707 3707 args.mod = (xfs_extlen_t)(args.prod - args.mod); ··· 4514 4514 int n; /* current extent index */ 4515 4515 xfs_fileoff_t obno; /* old block number (offset) */ 4516 4516 int whichfork; /* data or attr fork */ 4517 - char inhole; /* current location is hole in file */ 4518 - char wasdelay; /* old extent was delayed */ 4519 4517 4520 4518 #ifdef DEBUG 4521 4519 xfs_fileoff_t orig_bno; /* original block number value */ ··· 4601 4603 bma.firstblock = firstblock; 4602 4604 4603 4605 while (bno < end && n < *nmap) { 4604 - inhole = eof || bma.got.br_startoff > bno; 4605 - wasdelay = !inhole && isnullstartblock(bma.got.br_startblock); 4606 + bool need_alloc = false, wasdelay = false; 4606 4607 4607 - /* 4608 - * Make sure we only reflink into a hole. 4609 - */ 4610 - if (flags & XFS_BMAPI_REMAP) 4611 - ASSERT(inhole); 4612 - if (flags & XFS_BMAPI_COWFORK) 4613 - ASSERT(!inhole); 4608 + /* in hole or beyoned EOF? */ 4609 + if (eof || bma.got.br_startoff > bno) { 4610 + if (flags & XFS_BMAPI_DELALLOC) { 4611 + /* 4612 + * For the COW fork we can reasonably get a 4613 + * request for converting an extent that races 4614 + * with other threads already having converted 4615 + * part of it, as there converting COW to 4616 + * regular blocks is not protected using the 4617 + * IOLOCK. 4618 + */ 4619 + ASSERT(flags & XFS_BMAPI_COWFORK); 4620 + if (!(flags & XFS_BMAPI_COWFORK)) { 4621 + error = -EIO; 4622 + goto error0; 4623 + } 4624 + 4625 + if (eof || bno >= end) 4626 + break; 4627 + } else { 4628 + need_alloc = true; 4629 + } 4630 + } else { 4631 + /* 4632 + * Make sure we only reflink into a hole. 4633 + */ 4634 + ASSERT(!(flags & XFS_BMAPI_REMAP)); 4635 + if (isnullstartblock(bma.got.br_startblock)) 4636 + wasdelay = true; 4637 + } 4614 4638 4615 4639 /* 4616 4640 * First, deal with the hole before the allocated space 4617 4641 * that we found, if any. 4618 4642 */ 4619 - if (inhole || wasdelay) { 4643 + if (need_alloc || wasdelay) { 4620 4644 bma.eof = eof; 4621 4645 bma.conv = !!(flags & XFS_BMAPI_CONVERT); 4622 4646 bma.wasdel = wasdelay;
+5 -1
fs/xfs/libxfs/xfs_bmap.h
··· 110 110 /* Map something in the CoW fork. */ 111 111 #define XFS_BMAPI_COWFORK 0x200 112 112 113 + /* Only convert delalloc space, don't allocate entirely new extents */ 114 + #define XFS_BMAPI_DELALLOC 0x400 115 + 113 116 #define XFS_BMAPI_FLAGS \ 114 117 { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ 115 118 { XFS_BMAPI_METADATA, "METADATA" }, \ ··· 123 120 { XFS_BMAPI_CONVERT, "CONVERT" }, \ 124 121 { XFS_BMAPI_ZERO, "ZERO" }, \ 125 122 { XFS_BMAPI_REMAP, "REMAP" }, \ 126 - { XFS_BMAPI_COWFORK, "COWFORK" } 123 + { XFS_BMAPI_COWFORK, "COWFORK" }, \ 124 + { XFS_BMAPI_DELALLOC, "DELALLOC" } 127 125 128 126 129 127 static inline int xfs_bmapi_aflag(int w)
+87 -3
fs/xfs/libxfs/xfs_ialloc_btree.c
··· 82 82 } 83 83 84 84 STATIC int 85 - xfs_inobt_alloc_block( 85 + __xfs_inobt_alloc_block( 86 86 struct xfs_btree_cur *cur, 87 87 union xfs_btree_ptr *start, 88 88 union xfs_btree_ptr *new, 89 - int *stat) 89 + int *stat, 90 + enum xfs_ag_resv_type resv) 90 91 { 91 92 xfs_alloc_arg_t args; /* block allocation args */ 92 93 int error; /* error return value */ ··· 104 103 args.maxlen = 1; 105 104 args.prod = 1; 106 105 args.type = XFS_ALLOCTYPE_NEAR_BNO; 106 + args.resv = resv; 107 107 108 108 error = xfs_alloc_vextent(&args); 109 109 if (error) { ··· 122 120 new->s = cpu_to_be32(XFS_FSB_TO_AGBNO(args.mp, args.fsbno)); 123 121 *stat = 1; 124 122 return 0; 123 + } 124 + 125 + STATIC int 126 + xfs_inobt_alloc_block( 127 + struct xfs_btree_cur *cur, 128 + union xfs_btree_ptr *start, 129 + union xfs_btree_ptr *new, 130 + int *stat) 131 + { 132 + return __xfs_inobt_alloc_block(cur, start, new, stat, XFS_AG_RESV_NONE); 133 + } 134 + 135 + STATIC int 136 + xfs_finobt_alloc_block( 137 + struct xfs_btree_cur *cur, 138 + union xfs_btree_ptr *start, 139 + union xfs_btree_ptr *new, 140 + int *stat) 141 + { 142 + return __xfs_inobt_alloc_block(cur, start, new, stat, 143 + XFS_AG_RESV_METADATA); 125 144 } 126 145 127 146 STATIC int ··· 351 328 352 329 .dup_cursor = xfs_inobt_dup_cursor, 353 330 .set_root = xfs_finobt_set_root, 354 - .alloc_block = xfs_inobt_alloc_block, 331 + .alloc_block = xfs_finobt_alloc_block, 355 332 .free_block = xfs_inobt_free_block, 356 333 .get_minrecs = xfs_inobt_get_minrecs, 357 334 .get_maxrecs = xfs_inobt_get_maxrecs, ··· 503 480 return 0; 504 481 } 505 482 #endif /* DEBUG */ 483 + 484 + static xfs_extlen_t 485 + xfs_inobt_max_size( 486 + struct xfs_mount *mp) 487 + { 488 + /* Bail out if we're uninitialized, which can happen in mkfs. */ 489 + if (mp->m_inobt_mxr[0] == 0) 490 + return 0; 491 + 492 + return xfs_btree_calc_size(mp, mp->m_inobt_mnr, 493 + (uint64_t)mp->m_sb.sb_agblocks * mp->m_sb.sb_inopblock / 494 + XFS_INODES_PER_CHUNK); 495 + } 496 + 497 + static int 498 + xfs_inobt_count_blocks( 499 + struct xfs_mount *mp, 500 + xfs_agnumber_t agno, 501 + xfs_btnum_t btnum, 502 + xfs_extlen_t *tree_blocks) 503 + { 504 + struct xfs_buf *agbp; 505 + struct xfs_btree_cur *cur; 506 + int error; 507 + 508 + error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); 509 + if (error) 510 + return error; 511 + 512 + cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, btnum); 513 + error = xfs_btree_count_blocks(cur, tree_blocks); 514 + xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); 515 + xfs_buf_relse(agbp); 516 + 517 + return error; 518 + } 519 + 520 + /* 521 + * Figure out how many blocks to reserve and how many are used by this btree. 522 + */ 523 + int 524 + xfs_finobt_calc_reserves( 525 + struct xfs_mount *mp, 526 + xfs_agnumber_t agno, 527 + xfs_extlen_t *ask, 528 + xfs_extlen_t *used) 529 + { 530 + xfs_extlen_t tree_len = 0; 531 + int error; 532 + 533 + if (!xfs_sb_version_hasfinobt(&mp->m_sb)) 534 + return 0; 535 + 536 + error = xfs_inobt_count_blocks(mp, agno, XFS_BTNUM_FINO, &tree_len); 537 + if (error) 538 + return error; 539 + 540 + *ask += xfs_inobt_max_size(mp); 541 + *used += tree_len; 542 + return 0; 543 + }
+3
fs/xfs/libxfs/xfs_ialloc_btree.h
··· 72 72 #define xfs_inobt_rec_check_count(mp, rec) 0 73 73 #endif /* DEBUG */ 74 74 75 + int xfs_finobt_calc_reserves(struct xfs_mount *mp, xfs_agnumber_t agno, 76 + xfs_extlen_t *ask, xfs_extlen_t *used); 77 + 75 78 #endif /* __XFS_IALLOC_BTREE_H__ */
+1 -1
fs/xfs/libxfs/xfs_sb.c
··· 242 242 sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG || 243 243 sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || 244 244 sbp->sb_blocksize != (1 << sbp->sb_blocklog) || 245 - sbp->sb_dirblklog > XFS_MAX_BLOCKSIZE_LOG || 245 + sbp->sb_dirblklog + sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || 246 246 sbp->sb_inodesize < XFS_DINODE_MIN_SIZE || 247 247 sbp->sb_inodesize > XFS_DINODE_MAX_SIZE || 248 248 sbp->sb_inodelog < XFS_DINODE_MIN_LOG ||
+18 -10
fs/xfs/xfs_bmap_util.c
··· 528 528 xfs_bmbt_irec_t *map; /* buffer for user's data */ 529 529 xfs_mount_t *mp; /* file system mount point */ 530 530 int nex; /* # of user extents can do */ 531 - int nexleft; /* # of user extents left */ 532 531 int subnex; /* # of bmapi's can do */ 533 532 int nmap; /* number of map entries */ 534 533 struct getbmapx *out; /* output structure */ ··· 685 686 goto out_free_map; 686 687 } 687 688 688 - nexleft = nex; 689 - 690 689 do { 691 - nmap = (nexleft > subnex) ? subnex : nexleft; 690 + nmap = (nex> subnex) ? subnex : nex; 692 691 error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset), 693 692 XFS_BB_TO_FSB(mp, bmv->bmv_length), 694 693 map, &nmap, bmapi_flags); ··· 694 697 goto out_free_map; 695 698 ASSERT(nmap <= subnex); 696 699 697 - for (i = 0; i < nmap && nexleft && bmv->bmv_length && 698 - cur_ext < bmv->bmv_count; i++) { 700 + for (i = 0; i < nmap && bmv->bmv_length && 701 + cur_ext < bmv->bmv_count - 1; i++) { 699 702 out[cur_ext].bmv_oflags = 0; 700 703 if (map[i].br_state == XFS_EXT_UNWRITTEN) 701 704 out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC; ··· 757 760 continue; 758 761 } 759 762 763 + /* 764 + * In order to report shared extents accurately, 765 + * we report each distinct shared/unshared part 766 + * of a single bmbt record using multiple bmap 767 + * extents. To make that happen, we iterate the 768 + * same map array item multiple times, each 769 + * time trimming out the subextent that we just 770 + * reported. 771 + * 772 + * Because of this, we must check the out array 773 + * index (cur_ext) directly against bmv_count-1 774 + * to avoid overflows. 775 + */ 760 776 if (inject_map.br_startblock != NULLFSBLOCK) { 761 777 map[i] = inject_map; 762 778 i--; 763 - } else 764 - nexleft--; 779 + } 765 780 bmv->bmv_entries++; 766 781 cur_ext++; 767 782 } 768 - } while (nmap && nexleft && bmv->bmv_length && 769 - cur_ext < bmv->bmv_count); 783 + } while (nmap && bmv->bmv_length && cur_ext < bmv->bmv_count - 1); 770 784 771 785 out_free_map: 772 786 kmem_free(map);
+1
fs/xfs/xfs_buf.c
··· 422 422 out_free_pages: 423 423 for (i = 0; i < bp->b_page_count; i++) 424 424 __free_page(bp->b_pages[i]); 425 + bp->b_flags &= ~_XBF_PAGES; 425 426 return error; 426 427 } 427 428
+12 -11
fs/xfs/xfs_inode.c
··· 1792 1792 int error; 1793 1793 1794 1794 /* 1795 - * The ifree transaction might need to allocate blocks for record 1796 - * insertion to the finobt. We don't want to fail here at ENOSPC, so 1797 - * allow ifree to dip into the reserved block pool if necessary. 1798 - * 1799 - * Freeing large sets of inodes generally means freeing inode chunks, 1800 - * directory and file data blocks, so this should be relatively safe. 1801 - * Only under severe circumstances should it be possible to free enough 1802 - * inodes to exhaust the reserve block pool via finobt expansion while 1803 - * at the same time not creating free space in the filesystem. 1795 + * We try to use a per-AG reservation for any block needed by the finobt 1796 + * tree, but as the finobt feature predates the per-AG reservation 1797 + * support a degraded file system might not have enough space for the 1798 + * reservation at mount time. In that case try to dip into the reserved 1799 + * pool and pray. 1804 1800 * 1805 1801 * Send a warning if the reservation does happen to fail, as the inode 1806 1802 * now remains allocated and sits on the unlinked list until the fs is 1807 1803 * repaired. 1808 1804 */ 1809 - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree, 1810 - XFS_IFREE_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp); 1805 + if (unlikely(mp->m_inotbt_nores)) { 1806 + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree, 1807 + XFS_IFREE_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, 1808 + &tp); 1809 + } else { 1810 + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree, 0, 0, 0, &tp); 1811 + } 1811 1812 if (error) { 1812 1813 if (error == -ENOSPC) { 1813 1814 xfs_warn_ratelimited(mp,
+1 -1
fs/xfs/xfs_iomap.c
··· 681 681 xfs_trans_t *tp; 682 682 int nimaps; 683 683 int error = 0; 684 - int flags = 0; 684 + int flags = XFS_BMAPI_DELALLOC; 685 685 int nres; 686 686 687 687 if (whichfork == XFS_COW_FORK)
+1
fs/xfs/xfs_mount.h
··· 140 140 int m_fixedfsid[2]; /* unchanged for life of FS */ 141 141 uint m_dmevmask; /* DMI events for this FS */ 142 142 __uint64_t m_flags; /* global mount flags */ 143 + bool m_inotbt_nores; /* no per-AG finobt resv. */ 143 144 int m_ialloc_inos; /* inodes in inode allocation */ 144 145 int m_ialloc_blks; /* blocks in inode allocation */ 145 146 int m_ialloc_min_blks;/* min blocks in sparse inode
+2 -1
fs/xfs/xfs_qm.c
··· 1177 1177 * the case in all other instances. It's OK that we do this because 1178 1178 * quotacheck is done only at mount time. 1179 1179 */ 1180 - error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip); 1180 + error = xfs_iget(mp, NULL, ino, XFS_IGET_DONTCACHE, XFS_ILOCK_EXCL, 1181 + &ip); 1181 1182 if (error) { 1182 1183 *res = BULKSTAT_RV_NOTHING; 1183 1184 return error;