Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'xfs-for-linus-4.1-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs

Pull xfs fixes from Dave Chinner:
"This is a little larger than I'd like late in the release cycle, but
all the fixes are for regressions introduced in the 4.1-rc1 merge, or
are needed back in -stable kernels fairly quickly as they are
filesystem corruption or userspace visible correctness issues.

Changes in this update:

- regression fix for new rename whiteout code

- regression fixes for new superblock generic per-cpu counter code

- fix for incorrect error return sign introduced in 3.17

- metadata corruption fixes that need to go back to -stable kernels"

* tag 'xfs-for-linus-4.1-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs:
xfs: fix broken i_nlink accounting for whiteout tmpfile inode
xfs: xfs_iozero can return positive errno
xfs: xfs_attr_inactive leaves inconsistent attr fork state behind
xfs: extent size hints can round up extents past MAXEXTLEN
xfs: inode and free block counters need to use __percpu_counter_compare
percpu_counter: batch size aware __percpu_counter_compare()
xfs: use percpu_counter_read_positive for mp->m_icount

+129 -85
+4 -4
fs/xfs/libxfs/xfs_attr_leaf.c
··· 574 574 * After the last attribute is removed revert to original inode format, 575 575 * making all literal area available to the data fork once more. 576 576 */ 577 - STATIC void 578 - xfs_attr_fork_reset( 577 + void 578 + xfs_attr_fork_remove( 579 579 struct xfs_inode *ip, 580 580 struct xfs_trans *tp) 581 581 { ··· 641 641 (mp->m_flags & XFS_MOUNT_ATTR2) && 642 642 (dp->i_d.di_format != XFS_DINODE_FMT_BTREE) && 643 643 !(args->op_flags & XFS_DA_OP_ADDNAME)) { 644 - xfs_attr_fork_reset(dp, args->trans); 644 + xfs_attr_fork_remove(dp, args->trans); 645 645 } else { 646 646 xfs_idata_realloc(dp, -size, XFS_ATTR_FORK); 647 647 dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize); ··· 905 905 if (forkoff == -1) { 906 906 ASSERT(dp->i_mount->m_flags & XFS_MOUNT_ATTR2); 907 907 ASSERT(dp->i_d.di_format != XFS_DINODE_FMT_BTREE); 908 - xfs_attr_fork_reset(dp, args->trans); 908 + xfs_attr_fork_remove(dp, args->trans); 909 909 goto out; 910 910 } 911 911
+1 -1
fs/xfs/libxfs/xfs_attr_leaf.h
··· 53 53 int xfs_attr_shortform_list(struct xfs_attr_list_context *context); 54 54 int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp); 55 55 int xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes); 56 - 56 + void xfs_attr_fork_remove(struct xfs_inode *ip, struct xfs_trans *tp); 57 57 58 58 /* 59 59 * Internal routines when attribute fork size == XFS_LBSIZE(mp).
+20 -13
fs/xfs/libxfs/xfs_bmap.c
··· 3224 3224 align_alen += temp; 3225 3225 align_off -= temp; 3226 3226 } 3227 - /* 3228 - * Same adjustment for the end of the requested area. 3229 - */ 3230 - if ((temp = (align_alen % extsz))) { 3227 + 3228 + /* Same adjustment for the end of the requested area. */ 3229 + temp = (align_alen % extsz); 3230 + if (temp) 3231 3231 align_alen += extsz - temp; 3232 - } 3232 + 3233 + /* 3234 + * For large extent hint sizes, the aligned extent might be larger than 3235 + * MAXEXTLEN. In that case, reduce the size by an extsz so that it pulls 3236 + * the length back under MAXEXTLEN. The outer allocation loops handle 3237 + * short allocation just fine, so it is safe to do this. We only want to 3238 + * do it when we are forced to, though, because it means more allocation 3239 + * operations are required. 3240 + */ 3241 + while (align_alen > MAXEXTLEN) 3242 + align_alen -= extsz; 3243 + ASSERT(align_alen <= MAXEXTLEN); 3244 + 3233 3245 /* 3234 3246 * If the previous block overlaps with this proposed allocation 3235 3247 * then move the start forward without adjusting the length. ··· 3330 3318 return -EINVAL; 3331 3319 } else { 3332 3320 ASSERT(orig_off >= align_off); 3333 - ASSERT(orig_end <= align_off + align_alen); 3321 + /* see MAXEXTLEN handling above */ 3322 + ASSERT(orig_end <= align_off + align_alen || 3323 + align_alen + extsz > MAXEXTLEN); 3334 3324 } 3335 3325 3336 3326 #ifdef DEBUG ··· 4113 4099 /* Figure out the extent size, adjust alen */ 4114 4100 extsz = xfs_get_extsz_hint(ip); 4115 4101 if (extsz) { 4116 - /* 4117 - * Make sure we don't exceed a single extent length when we 4118 - * align the extent by reducing length we are going to 4119 - * allocate by the maximum amount extent size aligment may 4120 - * require. 4121 - */ 4122 - alen = XFS_FILBLKS_MIN(len, MAXEXTLEN - (2 * extsz - 1)); 4123 4102 error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof, 4124 4103 1, 0, &aoff, &alen); 4125 4104 ASSERT(!error);
+6 -3
fs/xfs/libxfs/xfs_ialloc.c
··· 376 376 */ 377 377 newlen = args.mp->m_ialloc_inos; 378 378 if (args.mp->m_maxicount && 379 - percpu_counter_read(&args.mp->m_icount) + newlen > 379 + percpu_counter_read_positive(&args.mp->m_icount) + newlen > 380 380 args.mp->m_maxicount) 381 381 return -ENOSPC; 382 382 args.minlen = args.maxlen = args.mp->m_ialloc_blks; ··· 1339 1339 * If we have already hit the ceiling of inode blocks then clear 1340 1340 * okalloc so we scan all available agi structures for a free 1341 1341 * inode. 1342 + * 1343 + * Read rough value of mp->m_icount by percpu_counter_read_positive, 1344 + * which will sacrifice the preciseness but improve the performance. 1342 1345 */ 1343 1346 if (mp->m_maxicount && 1344 - percpu_counter_read(&mp->m_icount) + mp->m_ialloc_inos > 1345 - mp->m_maxicount) { 1347 + percpu_counter_read_positive(&mp->m_icount) + mp->m_ialloc_inos 1348 + > mp->m_maxicount) { 1346 1349 noroom = 1; 1347 1350 okalloc = 0; 1348 1351 }
+50 -35
fs/xfs/xfs_attr_inactive.c
··· 380 380 return error; 381 381 } 382 382 383 + /* 384 + * xfs_attr_inactive kills all traces of an attribute fork on an inode. It 385 + * removes both the on-disk and in-memory inode fork. Note that this also has to 386 + * handle the condition of inodes without attributes but with an attribute fork 387 + * configured, so we can't use xfs_inode_hasattr() here. 388 + * 389 + * The in-memory attribute fork is removed even on error. 390 + */ 383 391 int 384 - xfs_attr_inactive(xfs_inode_t *dp) 392 + xfs_attr_inactive( 393 + struct xfs_inode *dp) 385 394 { 386 - xfs_trans_t *trans; 387 - xfs_mount_t *mp; 388 - int error; 395 + struct xfs_trans *trans; 396 + struct xfs_mount *mp; 397 + int cancel_flags = 0; 398 + int lock_mode = XFS_ILOCK_SHARED; 399 + int error = 0; 389 400 390 401 mp = dp->i_mount; 391 402 ASSERT(! XFS_NOT_DQATTACHED(mp, dp)); 392 403 393 - xfs_ilock(dp, XFS_ILOCK_SHARED); 394 - if (!xfs_inode_hasattr(dp) || 395 - dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { 396 - xfs_iunlock(dp, XFS_ILOCK_SHARED); 397 - return 0; 398 - } 399 - xfs_iunlock(dp, XFS_ILOCK_SHARED); 404 + xfs_ilock(dp, lock_mode); 405 + if (!XFS_IFORK_Q(dp)) 406 + goto out_destroy_fork; 407 + xfs_iunlock(dp, lock_mode); 400 408 401 409 /* 402 410 * Start our first transaction of the day. ··· 416 408 * the inode in every transaction to let it float upward through 417 409 * the log. 418 410 */ 411 + lock_mode = 0; 419 412 trans = xfs_trans_alloc(mp, XFS_TRANS_ATTRINVAL); 420 413 error = xfs_trans_reserve(trans, &M_RES(mp)->tr_attrinval, 0, 0); 421 - if (error) { 422 - xfs_trans_cancel(trans, 0); 423 - return error; 424 - } 425 - xfs_ilock(dp, XFS_ILOCK_EXCL); 414 + if (error) 415 + goto out_cancel; 416 + 417 + lock_mode = XFS_ILOCK_EXCL; 418 + cancel_flags = XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT; 419 + xfs_ilock(dp, lock_mode); 420 + 421 + if (!XFS_IFORK_Q(dp)) 422 + goto out_cancel; 426 423 427 424 /* 428 425 * No need to make quota reservations here. We expect to release some ··· 435 422 */ 436 423 xfs_trans_ijoin(trans, dp, 0); 437 424 438 - /* 439 - * Decide on what work routines to call based on the inode size. 440 - */ 441 - if (!xfs_inode_hasattr(dp) || 442 - dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { 443 - error = 0; 444 - goto out; 445 - } 446 - error = xfs_attr3_root_inactive(&trans, dp); 447 - if (error) 448 - goto out; 425 + /* invalidate and truncate the attribute fork extents */ 426 + if (dp->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) { 427 + error = xfs_attr3_root_inactive(&trans, dp); 428 + if (error) 429 + goto out_cancel; 449 430 450 - error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0); 451 - if (error) 452 - goto out; 431 + error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0); 432 + if (error) 433 + goto out_cancel; 434 + } 435 + 436 + /* Reset the attribute fork - this also destroys the in-core fork */ 437 + xfs_attr_fork_remove(dp, trans); 453 438 454 439 error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES); 455 - xfs_iunlock(dp, XFS_ILOCK_EXCL); 456 - 440 + xfs_iunlock(dp, lock_mode); 457 441 return error; 458 442 459 - out: 460 - xfs_trans_cancel(trans, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); 461 - xfs_iunlock(dp, XFS_ILOCK_EXCL); 443 + out_cancel: 444 + xfs_trans_cancel(trans, cancel_flags); 445 + out_destroy_fork: 446 + /* kill the in-core attr fork before we drop the inode lock */ 447 + if (dp->i_afp) 448 + xfs_idestroy_fork(dp, XFS_ATTR_FORK); 449 + if (lock_mode) 450 + xfs_iunlock(dp, lock_mode); 462 451 return error; 463 452 }
+1 -1
fs/xfs/xfs_file.c
··· 124 124 status = 0; 125 125 } while (count); 126 126 127 - return (-status); 127 + return status; 128 128 } 129 129 130 130 int
+12 -10
fs/xfs/xfs_inode.c
··· 1946 1946 /* 1947 1947 * If there are attributes associated with the file then blow them away 1948 1948 * now. The code calls a routine that recursively deconstructs the 1949 - * attribute fork. We need to just commit the current transaction 1950 - * because we can't use it for xfs_attr_inactive(). 1949 + * attribute fork. If also blows away the in-core attribute fork. 1951 1950 */ 1952 - if (ip->i_d.di_anextents > 0) { 1953 - ASSERT(ip->i_d.di_forkoff != 0); 1954 - 1951 + if (XFS_IFORK_Q(ip)) { 1955 1952 error = xfs_attr_inactive(ip); 1956 1953 if (error) 1957 1954 return; 1958 1955 } 1959 1956 1960 - if (ip->i_afp) 1961 - xfs_idestroy_fork(ip, XFS_ATTR_FORK); 1962 - 1957 + ASSERT(!ip->i_afp); 1963 1958 ASSERT(ip->i_d.di_anextents == 0); 1959 + ASSERT(ip->i_d.di_forkoff == 0); 1964 1960 1965 1961 /* 1966 1962 * Free the inode. ··· 2879 2883 if (error) 2880 2884 return error; 2881 2885 2882 - /* Satisfy xfs_bumplink that this is a real tmpfile */ 2886 + /* 2887 + * Prepare the tmpfile inode as if it were created through the VFS. 2888 + * Otherwise, the link increment paths will complain about nlink 0->1. 2889 + * Drop the link count as done by d_tmpfile(), complete the inode setup 2890 + * and flag it as linkable. 2891 + */ 2892 + drop_nlink(VFS_I(tmpfile)); 2883 2893 xfs_finish_inode_setup(tmpfile); 2884 2894 VFS_I(tmpfile)->i_state |= I_LINKABLE; 2885 2895 ··· 3153 3151 * intermediate state on disk. 3154 3152 */ 3155 3153 if (wip) { 3156 - ASSERT(wip->i_d.di_nlink == 0); 3154 + ASSERT(VFS_I(wip)->i_nlink == 0 && wip->i_d.di_nlink == 0); 3157 3155 error = xfs_bumplink(tp, wip); 3158 3156 if (error) 3159 3157 goto out_trans_abort;
+20 -14
fs/xfs/xfs_mount.c
··· 1084 1084 return xfs_sync_sb(mp, true); 1085 1085 } 1086 1086 1087 + /* 1088 + * Deltas for the inode count are +/-64, hence we use a large batch size 1089 + * of 128 so we don't need to take the counter lock on every update. 1090 + */ 1091 + #define XFS_ICOUNT_BATCH 128 1087 1092 int 1088 1093 xfs_mod_icount( 1089 1094 struct xfs_mount *mp, 1090 1095 int64_t delta) 1091 1096 { 1092 - /* deltas are +/-64, hence the large batch size of 128. */ 1093 - __percpu_counter_add(&mp->m_icount, delta, 128); 1094 - if (percpu_counter_compare(&mp->m_icount, 0) < 0) { 1097 + __percpu_counter_add(&mp->m_icount, delta, XFS_ICOUNT_BATCH); 1098 + if (__percpu_counter_compare(&mp->m_icount, 0, XFS_ICOUNT_BATCH) < 0) { 1095 1099 ASSERT(0); 1096 1100 percpu_counter_add(&mp->m_icount, -delta); 1097 1101 return -EINVAL; ··· 1117 1113 return 0; 1118 1114 } 1119 1115 1116 + /* 1117 + * Deltas for the block count can vary from 1 to very large, but lock contention 1118 + * only occurs on frequent small block count updates such as in the delayed 1119 + * allocation path for buffered writes (page a time updates). Hence we set 1120 + * a large batch count (1024) to minimise global counter updates except when 1121 + * we get near to ENOSPC and we have to be very accurate with our updates. 1122 + */ 1123 + #define XFS_FDBLOCKS_BATCH 1024 1120 1124 int 1121 1125 xfs_mod_fdblocks( 1122 1126 struct xfs_mount *mp, ··· 1163 1151 * Taking blocks away, need to be more accurate the closer we 1164 1152 * are to zero. 1165 1153 * 1166 - * batch size is set to a maximum of 1024 blocks - if we are 1167 - * allocating of freeing extents larger than this then we aren't 1168 - * going to be hammering the counter lock so a lock per update 1169 - * is not a problem. 1170 - * 1171 1154 * If the counter has a value of less than 2 * max batch size, 1172 1155 * then make everything serialise as we are real close to 1173 1156 * ENOSPC. 1174 1157 */ 1175 - #define __BATCH 1024 1176 - if (percpu_counter_compare(&mp->m_fdblocks, 2 * __BATCH) < 0) 1158 + if (__percpu_counter_compare(&mp->m_fdblocks, 2 * XFS_FDBLOCKS_BATCH, 1159 + XFS_FDBLOCKS_BATCH) < 0) 1177 1160 batch = 1; 1178 1161 else 1179 - batch = __BATCH; 1180 - #undef __BATCH 1162 + batch = XFS_FDBLOCKS_BATCH; 1181 1163 1182 1164 __percpu_counter_add(&mp->m_fdblocks, delta, batch); 1183 - if (percpu_counter_compare(&mp->m_fdblocks, 1184 - XFS_ALLOC_SET_ASIDE(mp)) >= 0) { 1165 + if (__percpu_counter_compare(&mp->m_fdblocks, XFS_ALLOC_SET_ASIDE(mp), 1166 + XFS_FDBLOCKS_BATCH) >= 0) { 1185 1167 /* we had space! */ 1186 1168 return 0; 1187 1169 }
+12 -1
include/linux/percpu_counter.h
··· 41 41 void percpu_counter_set(struct percpu_counter *fbc, s64 amount); 42 42 void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch); 43 43 s64 __percpu_counter_sum(struct percpu_counter *fbc); 44 - int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs); 44 + int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch); 45 + 46 + static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) 47 + { 48 + return __percpu_counter_compare(fbc, rhs, percpu_counter_batch); 49 + } 45 50 46 51 static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) 47 52 { ··· 119 114 return -1; 120 115 else 121 116 return 0; 117 + } 118 + 119 + static inline int 120 + __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch) 121 + { 122 + return percpu_counter_compare(fbc, rhs); 122 123 } 123 124 124 125 static inline void
+3 -3
lib/percpu_counter.c
··· 197 197 * Compare counter against given value. 198 198 * Return 1 if greater, 0 if equal and -1 if less 199 199 */ 200 - int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) 200 + int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch) 201 201 { 202 202 s64 count; 203 203 204 204 count = percpu_counter_read(fbc); 205 205 /* Check to see if rough count will be sufficient for comparison */ 206 - if (abs(count - rhs) > (percpu_counter_batch*num_online_cpus())) { 206 + if (abs(count - rhs) > (batch * num_online_cpus())) { 207 207 if (count > rhs) 208 208 return 1; 209 209 else ··· 218 218 else 219 219 return 0; 220 220 } 221 - EXPORT_SYMBOL(percpu_counter_compare); 221 + EXPORT_SYMBOL(__percpu_counter_compare); 222 222 223 223 static int __init percpu_counter_startup(void) 224 224 {