Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'iomap-5.17-fixes-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull iomap fix from Darrick Wong:
"A single bugfix for iomap.

The fix should eliminate occasional complaints about stall warnings
when a lot of writeback IO completes all at once and we have to then
go clearing status on a large number of folios.

Summary:

- Limit the length of ioend chains in writeback so that we don't trip
the softlockup watchdog and to limit long tail latency on clearing
PageWriteback"

* tag 'iomap-5.17-fixes-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
xfs, iomap: limit individual ioend chain lengths in writeback

+65 -5
+48 -4
fs/iomap/buffered-io.c
··· 21 21 22 22 #include "../internal.h" 23 23 24 + #define IOEND_BATCH_SIZE 4096 25 + 24 26 /* 25 27 * Structure allocated for each folio when block size < folio size 26 28 * to track sub-folio uptodate status and I/O completions. ··· 1041 1039 * state, release holds on bios, and finally free up memory. Do not use the 1042 1040 * ioend after this. 1043 1041 */ 1044 - static void 1042 + static u32 1045 1043 iomap_finish_ioend(struct iomap_ioend *ioend, int error) 1046 1044 { 1047 1045 struct inode *inode = ioend->io_inode; ··· 1050 1048 u64 start = bio->bi_iter.bi_sector; 1051 1049 loff_t offset = ioend->io_offset; 1052 1050 bool quiet = bio_flagged(bio, BIO_QUIET); 1051 + u32 folio_count = 0; 1053 1052 1054 1053 for (bio = &ioend->io_inline_bio; bio; bio = next) { 1055 1054 struct folio_iter fi; ··· 1065 1062 next = bio->bi_private; 1066 1063 1067 1064 /* walk all folios in bio, ending page IO on them */ 1068 - bio_for_each_folio_all(fi, bio) 1065 + bio_for_each_folio_all(fi, bio) { 1069 1066 iomap_finish_folio_write(inode, fi.folio, fi.length, 1070 1067 error); 1068 + folio_count++; 1069 + } 1071 1070 bio_put(bio); 1072 1071 } 1073 1072 /* The ioend has been freed by bio_put() */ ··· 1079 1074 "%s: writeback error on inode %lu, offset %lld, sector %llu", 1080 1075 inode->i_sb->s_id, inode->i_ino, offset, start); 1081 1076 } 1077 + return folio_count; 1082 1078 } 1083 1079 1080 + /* 1081 + * Ioend completion routine for merged bios. This can only be called from task 1082 + * contexts as merged ioends can be of unbound length. Hence we have to break up 1083 + * the writeback completions into manageable chunks to avoid long scheduler 1084 + * holdoffs. We aim to keep scheduler holdoffs down below 10ms so that we get 1085 + * good batch processing throughput without creating adverse scheduler latency 1086 + * conditions. 1087 + */ 1084 1088 void 1085 1089 iomap_finish_ioends(struct iomap_ioend *ioend, int error) 1086 1090 { 1087 1091 struct list_head tmp; 1092 + u32 completions; 1093 + 1094 + might_sleep(); 1088 1095 1089 1096 list_replace_init(&ioend->io_list, &tmp); 1090 - iomap_finish_ioend(ioend, error); 1097 + completions = iomap_finish_ioend(ioend, error); 1091 1098 1092 1099 while (!list_empty(&tmp)) { 1100 + if (completions > IOEND_BATCH_SIZE * 8) { 1101 + cond_resched(); 1102 + completions = 0; 1103 + } 1093 1104 ioend = list_first_entry(&tmp, struct iomap_ioend, io_list); 1094 1105 list_del_init(&ioend->io_list); 1095 - iomap_finish_ioend(ioend, error); 1106 + completions += iomap_finish_ioend(ioend, error); 1096 1107 } 1097 1108 } 1098 1109 EXPORT_SYMBOL_GPL(iomap_finish_ioends); ··· 1128 1107 (next->io_type == IOMAP_UNWRITTEN)) 1129 1108 return false; 1130 1109 if (ioend->io_offset + ioend->io_size != next->io_offset) 1110 + return false; 1111 + /* 1112 + * Do not merge physically discontiguous ioends. The filesystem 1113 + * completion functions will have to iterate the physical 1114 + * discontiguities even if we merge the ioends at a logical level, so 1115 + * we don't gain anything by merging physical discontiguities here. 1116 + * 1117 + * We cannot use bio->bi_iter.bi_sector here as it is modified during 1118 + * submission so does not point to the start sector of the bio at 1119 + * completion. 1120 + */ 1121 + if (ioend->io_sector + (ioend->io_size >> 9) != next->io_sector) 1131 1122 return false; 1132 1123 return true; 1133 1124 } ··· 1242 1209 ioend->io_flags = wpc->iomap.flags; 1243 1210 ioend->io_inode = inode; 1244 1211 ioend->io_size = 0; 1212 + ioend->io_folios = 0; 1245 1213 ioend->io_offset = offset; 1246 1214 ioend->io_bio = bio; 1215 + ioend->io_sector = sector; 1247 1216 return ioend; 1248 1217 } 1249 1218 ··· 1285 1250 if (offset != wpc->ioend->io_offset + wpc->ioend->io_size) 1286 1251 return false; 1287 1252 if (sector != bio_end_sector(wpc->ioend->io_bio)) 1253 + return false; 1254 + /* 1255 + * Limit ioend bio chain lengths to minimise IO completion latency. This 1256 + * also prevents long tight loops ending page writeback on all the 1257 + * folios in the ioend. 1258 + */ 1259 + if (wpc->ioend->io_folios >= IOEND_BATCH_SIZE) 1288 1260 return false; 1289 1261 return true; 1290 1262 } ··· 1377 1335 &submit_list); 1378 1336 count++; 1379 1337 } 1338 + if (count) 1339 + wpc->ioend->io_folios++; 1380 1340 1381 1341 WARN_ON_ONCE(!wpc->ioend && !list_empty(&submit_list)); 1382 1342 WARN_ON_ONCE(!folio_test_locked(folio));
+15 -1
fs/xfs/xfs_aops.c
··· 136 136 memalloc_nofs_restore(nofs_flag); 137 137 } 138 138 139 - /* Finish all pending io completions. */ 139 + /* 140 + * Finish all pending IO completions that require transactional modifications. 141 + * 142 + * We try to merge physical and logically contiguous ioends before completion to 143 + * minimise the number of transactions we need to perform during IO completion. 144 + * Both unwritten extent conversion and COW remapping need to iterate and modify 145 + * one physical extent at a time, so we gain nothing by merging physically 146 + * discontiguous extents here. 147 + * 148 + * The ioend chain length that we can be processing here is largely unbound in 149 + * length and we may have to perform significant amounts of work on each ioend 150 + * to complete it. Hence we have to be careful about holding the CPU for too 151 + * long in this loop. 152 + */ 140 153 void 141 154 xfs_end_io( 142 155 struct work_struct *work) ··· 170 157 list_del_init(&ioend->io_list); 171 158 iomap_ioend_try_merge(ioend, &tmp); 172 159 xfs_end_ioend(ioend); 160 + cond_resched(); 173 161 } 174 162 } 175 163
+2
include/linux/iomap.h
··· 263 263 struct list_head io_list; /* next ioend in chain */ 264 264 u16 io_type; 265 265 u16 io_flags; /* IOMAP_F_* */ 266 + u32 io_folios; /* folios added to ioend */ 266 267 struct inode *io_inode; /* file being written to */ 267 268 size_t io_size; /* size of the extent */ 268 269 loff_t io_offset; /* offset in the file */ 270 + sector_t io_sector; /* start sector of ioend */ 269 271 struct bio *io_bio; /* bio being built */ 270 272 struct bio io_inline_bio; /* MUST BE LAST! */ 271 273 };