Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'xfs-for-linus-4.5-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs

Pull xfs fixes from Dave Chinner:
"This is a fix for a regression introduced in 4.5-rc1 by the new torn
log write detection code. The regression only affects people moving a
clean filesystem between machines/kernels of different architecture
(such as changing between 32 bit and 64 bit kernels), but this is the
recommended (and only!) safe way to migrate a filesystem between
architectures so we really need to ensure it works.

The changes are larger than I'd prefer right at the end of the release
cycle, but the majority of the change is just factoring code to enable
the detection of a clean log at the correct time to avoid this issue.

Changes:

- Only perform torn log write detection on dirty logs. This prevents
failures being detected due to a clean filesystem being moved
between machines or kernels of different architectures (e.g. 32 ->
64 bit, BE -> LE, etc). This fixes a regression introduced by the
torn log write detection in 4.5-rc1"

* tag 'xfs-for-linus-4.5-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs:
xfs: only run torn log write detection on dirty logs
xfs: refactor in-core log state update to helper
xfs: refactor unmount record detection into helper
xfs: separate log head record discovery from verification

+169 -104
+169 -104
fs/xfs/xfs_log_recover.c
··· 1109 1109 bool tmp_wrapped; 1110 1110 1111 1111 /* 1112 - * Search backwards through the log looking for the log record header 1113 - * block. This wraps all the way back around to the head so something is 1114 - * seriously wrong if we can't find it. 1115 - */ 1116 - found = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp, rhead_blk, 1117 - rhead, wrapped); 1118 - if (found < 0) 1119 - return found; 1120 - if (!found) { 1121 - xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__); 1122 - return -EIO; 1123 - } 1124 - 1125 - *tail_blk = BLOCK_LSN(be64_to_cpu((*rhead)->h_tail_lsn)); 1126 - 1127 - /* 1128 - * Now that we have a tail block, check the head of the log for torn 1129 - * writes. Search again until we hit the tail or the maximum number of 1130 - * log record I/Os that could have been in flight at one time. Use a 1131 - * temporary buffer so we don't trash the rhead/bp pointer from the 1132 - * call above. 1112 + * Check the head of the log for torn writes. Search backwards from the 1113 + * head until we hit the tail or the maximum number of log record I/Os 1114 + * that could have been in flight at one time. Use a temporary buffer so 1115 + * we don't trash the rhead/bp pointers from the caller. 1133 1116 */ 1134 1117 tmp_bp = xlog_get_bp(log, 1); 1135 1118 if (!tmp_bp) ··· 1199 1216 } 1200 1217 1201 1218 /* 1219 + * Check whether the head of the log points to an unmount record. In other 1220 + * words, determine whether the log is clean. If so, update the in-core state 1221 + * appropriately. 1222 + */ 1223 + static int 1224 + xlog_check_unmount_rec( 1225 + struct xlog *log, 1226 + xfs_daddr_t *head_blk, 1227 + xfs_daddr_t *tail_blk, 1228 + struct xlog_rec_header *rhead, 1229 + xfs_daddr_t rhead_blk, 1230 + struct xfs_buf *bp, 1231 + bool *clean) 1232 + { 1233 + struct xlog_op_header *op_head; 1234 + xfs_daddr_t umount_data_blk; 1235 + xfs_daddr_t after_umount_blk; 1236 + int hblks; 1237 + int error; 1238 + char *offset; 1239 + 1240 + *clean = false; 1241 + 1242 + /* 1243 + * Look for unmount record. If we find it, then we know there was a 1244 + * clean unmount. Since 'i' could be the last block in the physical 1245 + * log, we convert to a log block before comparing to the head_blk. 1246 + * 1247 + * Save the current tail lsn to use to pass to xlog_clear_stale_blocks() 1248 + * below. We won't want to clear the unmount record if there is one, so 1249 + * we pass the lsn of the unmount record rather than the block after it. 1250 + */ 1251 + if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { 1252 + int h_size = be32_to_cpu(rhead->h_size); 1253 + int h_version = be32_to_cpu(rhead->h_version); 1254 + 1255 + if ((h_version & XLOG_VERSION_2) && 1256 + (h_size > XLOG_HEADER_CYCLE_SIZE)) { 1257 + hblks = h_size / XLOG_HEADER_CYCLE_SIZE; 1258 + if (h_size % XLOG_HEADER_CYCLE_SIZE) 1259 + hblks++; 1260 + } else { 1261 + hblks = 1; 1262 + } 1263 + } else { 1264 + hblks = 1; 1265 + } 1266 + after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len)); 1267 + after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize); 1268 + if (*head_blk == after_umount_blk && 1269 + be32_to_cpu(rhead->h_num_logops) == 1) { 1270 + umount_data_blk = rhead_blk + hblks; 1271 + umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize); 1272 + error = xlog_bread(log, umount_data_blk, 1, bp, &offset); 1273 + if (error) 1274 + return error; 1275 + 1276 + op_head = (struct xlog_op_header *)offset; 1277 + if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) { 1278 + /* 1279 + * Set tail and last sync so that newly written log 1280 + * records will point recovery to after the current 1281 + * unmount record. 1282 + */ 1283 + xlog_assign_atomic_lsn(&log->l_tail_lsn, 1284 + log->l_curr_cycle, after_umount_blk); 1285 + xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1286 + log->l_curr_cycle, after_umount_blk); 1287 + *tail_blk = after_umount_blk; 1288 + 1289 + *clean = true; 1290 + } 1291 + } 1292 + 1293 + return 0; 1294 + } 1295 + 1296 + static void 1297 + xlog_set_state( 1298 + struct xlog *log, 1299 + xfs_daddr_t head_blk, 1300 + struct xlog_rec_header *rhead, 1301 + xfs_daddr_t rhead_blk, 1302 + bool bump_cycle) 1303 + { 1304 + /* 1305 + * Reset log values according to the state of the log when we 1306 + * crashed. In the case where head_blk == 0, we bump curr_cycle 1307 + * one because the next write starts a new cycle rather than 1308 + * continuing the cycle of the last good log record. At this 1309 + * point we have guaranteed that all partial log records have been 1310 + * accounted for. Therefore, we know that the last good log record 1311 + * written was complete and ended exactly on the end boundary 1312 + * of the physical log. 1313 + */ 1314 + log->l_prev_block = rhead_blk; 1315 + log->l_curr_block = (int)head_blk; 1316 + log->l_curr_cycle = be32_to_cpu(rhead->h_cycle); 1317 + if (bump_cycle) 1318 + log->l_curr_cycle++; 1319 + atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn)); 1320 + atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn)); 1321 + xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle, 1322 + BBTOB(log->l_curr_block)); 1323 + xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle, 1324 + BBTOB(log->l_curr_block)); 1325 + } 1326 + 1327 + /* 1202 1328 * Find the sync block number or the tail of the log. 1203 1329 * 1204 1330 * This will be the block number of the last record to have its ··· 1330 1238 xfs_daddr_t *tail_blk) 1331 1239 { 1332 1240 xlog_rec_header_t *rhead; 1333 - xlog_op_header_t *op_head; 1334 1241 char *offset = NULL; 1335 1242 xfs_buf_t *bp; 1336 1243 int error; 1337 - xfs_daddr_t umount_data_blk; 1338 - xfs_daddr_t after_umount_blk; 1339 1244 xfs_daddr_t rhead_blk; 1340 1245 xfs_lsn_t tail_lsn; 1341 - int hblks; 1342 1246 bool wrapped = false; 1247 + bool clean = false; 1343 1248 1344 1249 /* 1345 1250 * Find previous log record 1346 1251 */ 1347 1252 if ((error = xlog_find_head(log, head_blk))) 1348 1253 return error; 1254 + ASSERT(*head_blk < INT_MAX); 1349 1255 1350 1256 bp = xlog_get_bp(log, 1); 1351 1257 if (!bp) ··· 1361 1271 } 1362 1272 1363 1273 /* 1364 - * Trim the head block back to skip over torn records. We can have 1365 - * multiple log I/Os in flight at any time, so we assume CRC failures 1366 - * back through the previous several records are torn writes and skip 1367 - * them. 1274 + * Search backwards through the log looking for the log record header 1275 + * block. This wraps all the way back around to the head so something is 1276 + * seriously wrong if we can't find it. 1368 1277 */ 1369 - ASSERT(*head_blk < INT_MAX); 1370 - error = xlog_verify_head(log, head_blk, tail_blk, bp, &rhead_blk, 1371 - &rhead, &wrapped); 1278 + error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp, 1279 + &rhead_blk, &rhead, &wrapped); 1280 + if (error < 0) 1281 + return error; 1282 + if (!error) { 1283 + xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__); 1284 + return -EIO; 1285 + } 1286 + *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn)); 1287 + 1288 + /* 1289 + * Set the log state based on the current head record. 1290 + */ 1291 + xlog_set_state(log, *head_blk, rhead, rhead_blk, wrapped); 1292 + tail_lsn = atomic64_read(&log->l_tail_lsn); 1293 + 1294 + /* 1295 + * Look for an unmount record at the head of the log. This sets the log 1296 + * state to determine whether recovery is necessary. 1297 + */ 1298 + error = xlog_check_unmount_rec(log, head_blk, tail_blk, rhead, 1299 + rhead_blk, bp, &clean); 1372 1300 if (error) 1373 1301 goto done; 1374 1302 1375 1303 /* 1376 - * Reset log values according to the state of the log when we 1377 - * crashed. In the case where head_blk == 0, we bump curr_cycle 1378 - * one because the next write starts a new cycle rather than 1379 - * continuing the cycle of the last good log record. At this 1380 - * point we have guaranteed that all partial log records have been 1381 - * accounted for. Therefore, we know that the last good log record 1382 - * written was complete and ended exactly on the end boundary 1383 - * of the physical log. 1384 - */ 1385 - log->l_prev_block = rhead_blk; 1386 - log->l_curr_block = (int)*head_blk; 1387 - log->l_curr_cycle = be32_to_cpu(rhead->h_cycle); 1388 - if (wrapped) 1389 - log->l_curr_cycle++; 1390 - atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn)); 1391 - atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn)); 1392 - xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle, 1393 - BBTOB(log->l_curr_block)); 1394 - xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle, 1395 - BBTOB(log->l_curr_block)); 1396 - 1397 - /* 1398 - * Look for unmount record. If we find it, then we know there 1399 - * was a clean unmount. Since 'i' could be the last block in 1400 - * the physical log, we convert to a log block before comparing 1401 - * to the head_blk. 1304 + * Verify the log head if the log is not clean (e.g., we have anything 1305 + * but an unmount record at the head). This uses CRC verification to 1306 + * detect and trim torn writes. If discovered, CRC failures are 1307 + * considered torn writes and the log head is trimmed accordingly. 1402 1308 * 1403 - * Save the current tail lsn to use to pass to 1404 - * xlog_clear_stale_blocks() below. We won't want to clear the 1405 - * unmount record if there is one, so we pass the lsn of the 1406 - * unmount record rather than the block after it. 1309 + * Note that we can only run CRC verification when the log is dirty 1310 + * because there's no guarantee that the log data behind an unmount 1311 + * record is compatible with the current architecture. 1407 1312 */ 1408 - if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { 1409 - int h_size = be32_to_cpu(rhead->h_size); 1410 - int h_version = be32_to_cpu(rhead->h_version); 1313 + if (!clean) { 1314 + xfs_daddr_t orig_head = *head_blk; 1411 1315 1412 - if ((h_version & XLOG_VERSION_2) && 1413 - (h_size > XLOG_HEADER_CYCLE_SIZE)) { 1414 - hblks = h_size / XLOG_HEADER_CYCLE_SIZE; 1415 - if (h_size % XLOG_HEADER_CYCLE_SIZE) 1416 - hblks++; 1417 - } else { 1418 - hblks = 1; 1419 - } 1420 - } else { 1421 - hblks = 1; 1422 - } 1423 - after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len)); 1424 - after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize); 1425 - tail_lsn = atomic64_read(&log->l_tail_lsn); 1426 - if (*head_blk == after_umount_blk && 1427 - be32_to_cpu(rhead->h_num_logops) == 1) { 1428 - umount_data_blk = rhead_blk + hblks; 1429 - umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize); 1430 - error = xlog_bread(log, umount_data_blk, 1, bp, &offset); 1316 + error = xlog_verify_head(log, head_blk, tail_blk, bp, 1317 + &rhead_blk, &rhead, &wrapped); 1431 1318 if (error) 1432 1319 goto done; 1433 1320 1434 - op_head = (xlog_op_header_t *)offset; 1435 - if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) { 1436 - /* 1437 - * Set tail and last sync so that newly written 1438 - * log records will point recovery to after the 1439 - * current unmount record. 1440 - */ 1441 - xlog_assign_atomic_lsn(&log->l_tail_lsn, 1442 - log->l_curr_cycle, after_umount_blk); 1443 - xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1444 - log->l_curr_cycle, after_umount_blk); 1445 - *tail_blk = after_umount_blk; 1446 - 1447 - /* 1448 - * Note that the unmount was clean. If the unmount 1449 - * was not clean, we need to know this to rebuild the 1450 - * superblock counters from the perag headers if we 1451 - * have a filesystem using non-persistent counters. 1452 - */ 1453 - log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN; 1321 + /* update in-core state again if the head changed */ 1322 + if (*head_blk != orig_head) { 1323 + xlog_set_state(log, *head_blk, rhead, rhead_blk, 1324 + wrapped); 1325 + tail_lsn = atomic64_read(&log->l_tail_lsn); 1326 + error = xlog_check_unmount_rec(log, head_blk, tail_blk, 1327 + rhead, rhead_blk, bp, 1328 + &clean); 1329 + if (error) 1330 + goto done; 1454 1331 } 1455 1332 } 1333 + 1334 + /* 1335 + * Note that the unmount was clean. If the unmount was not clean, we 1336 + * need to know this to rebuild the superblock counters from the perag 1337 + * headers if we have a filesystem using non-persistent counters. 1338 + */ 1339 + if (clean) 1340 + log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN; 1456 1341 1457 1342 /* 1458 1343 * Make sure that there are no blocks in front of the head