Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

block: align the bio after building it

Instead of ensuring each vector is block size aligned while constructing
the bio, just ensure the entire size is aligned after it's built. This
makes getting bio pages more flexible to accepting device valid io
vectors that would otherwise get rejected by alignment checks.

Signed-off-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Keith Busch and committed by
Jens Axboe
20a0e627 743bf2e0

+40 -25
+40 -25
block/bio.c
··· 1208 1208 * For a multi-segment *iter, this function only adds pages from the next 1209 1209 * non-empty segment of the iov iterator. 1210 1210 */ 1211 - static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter, 1212 - unsigned len_align_mask) 1211 + static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) 1213 1212 { 1214 1213 iov_iter_extraction_t extraction_flags = 0; 1215 1214 unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt; ··· 1217 1218 struct page **pages = (struct page **)bv; 1218 1219 ssize_t size; 1219 1220 unsigned int num_pages, i = 0; 1220 - size_t offset, folio_offset, left, len, trim; 1221 + size_t offset, folio_offset, left, len; 1221 1222 int ret = 0; 1222 1223 1223 1224 /* ··· 1231 1232 if (bio->bi_bdev && blk_queue_pci_p2pdma(bio->bi_bdev->bd_disk->queue)) 1232 1233 extraction_flags |= ITER_ALLOW_P2PDMA; 1233 1234 1234 - /* 1235 - * Each segment in the iov is required to be a block size multiple. 1236 - * However, we may not be able to get the entire segment if it spans 1237 - * more pages than bi_max_vecs allows, so we have to ALIGN_DOWN the 1238 - * result to ensure the bio's total size is correct. The remainder of 1239 - * the iov data will be picked up in the next bio iteration. 1240 - */ 1241 1235 size = iov_iter_extract_pages(iter, &pages, 1242 1236 UINT_MAX - bio->bi_iter.bi_size, 1243 1237 nr_pages, extraction_flags, &offset); ··· 1238 1246 return size ? size : -EFAULT; 1239 1247 1240 1248 nr_pages = DIV_ROUND_UP(offset + size, PAGE_SIZE); 1241 - 1242 - trim = size & len_align_mask; 1243 - if (trim) { 1244 - iov_iter_revert(iter, trim); 1245 - size -= trim; 1246 - } 1247 - 1248 - if (unlikely(!size)) { 1249 - ret = -EFAULT; 1250 - goto out; 1251 - } 1252 - 1253 1249 for (left = size, i = 0; left > 0; left -= len, i += num_pages) { 1254 1250 struct page *page = pages[i]; 1255 1251 struct folio *folio = page_folio(page); ··· 1282 1302 return ret; 1283 1303 } 1284 1304 1305 + /* 1306 + * Aligns the bio size to the len_align_mask, releasing excessive bio vecs that 1307 + * __bio_iov_iter_get_pages may have inserted, and reverts the trimmed length 1308 + * for the next iteration. 1309 + */ 1310 + static int bio_iov_iter_align_down(struct bio *bio, struct iov_iter *iter, 1311 + unsigned len_align_mask) 1312 + { 1313 + size_t nbytes = bio->bi_iter.bi_size & len_align_mask; 1314 + 1315 + if (!nbytes) 1316 + return 0; 1317 + 1318 + iov_iter_revert(iter, nbytes); 1319 + bio->bi_iter.bi_size -= nbytes; 1320 + do { 1321 + struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1]; 1322 + 1323 + if (nbytes < bv->bv_len) { 1324 + bv->bv_len -= nbytes; 1325 + break; 1326 + } 1327 + 1328 + bio_release_page(bio, bv->bv_page); 1329 + bio->bi_vcnt--; 1330 + nbytes -= bv->bv_len; 1331 + } while (nbytes); 1332 + 1333 + if (!bio->bi_vcnt) 1334 + return -EFAULT; 1335 + return 0; 1336 + } 1337 + 1285 1338 /** 1286 1339 * bio_iov_iter_get_pages_aligned - add user or kernel pages to a bio 1287 1340 * @bio: bio to add pages to 1288 1341 * @iter: iov iterator describing the region to be added 1289 - * @len_align_mask: the mask to align each vector size to, 0 for any length 1342 + * @len_align_mask: the mask to align the total size to, 0 for any length 1290 1343 * 1291 1344 * This takes either an iterator pointing to user memory, or one pointing to 1292 1345 * kernel pages (BVEC iterator). If we're adding user pages, we pin them and ··· 1353 1340 if (iov_iter_extract_will_pin(iter)) 1354 1341 bio_set_flag(bio, BIO_PAGE_PINNED); 1355 1342 do { 1356 - ret = __bio_iov_iter_get_pages(bio, iter, len_align_mask); 1343 + ret = __bio_iov_iter_get_pages(bio, iter); 1357 1344 } while (!ret && iov_iter_count(iter) && !bio_full(bio, 0)); 1358 1345 1359 - return bio->bi_vcnt ? 0 : ret; 1346 + if (bio->bi_vcnt) 1347 + return bio_iov_iter_align_down(bio, iter, len_align_mask); 1348 + return ret; 1360 1349 } 1361 1350 EXPORT_SYMBOL_GPL(bio_iov_iter_get_pages_aligned); 1362 1351