Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-4.20/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper fixes from Mike Snitzer:

- Fix DM cache metadata to verify that a cache has block before trying
to continue with operation that requires them.

- Fix bio-based DM core's dm_make_request() to properly impose device
limits on individual bios by making use of blk_queue_split().

- Fix long-standing race with how DM thinp notified userspace of
thin-pool mode state changes before they were actually made.

- Fix the zoned target's bio completion handling; this is a fairly
invassive fix at this stage but it is localized to the zoned target.
Any zoned target users will benefit from this fix.

* tag 'for-4.20/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
dm thin: bump target version
dm thin: send event about thin-pool state change _after_ making it
dm zoned: Fix target BIO completion handling
dm: call blk_queue_split() to impose device limits on bios
dm cache metadata: verify cache has blocks in blocks_are_clean_separate_dirty()

+81 -119
+4
drivers/md/dm-cache-metadata.c
··· 930 930 bool dirty_flag; 931 931 *result = true; 932 932 933 + if (from_cblock(cmd->cache_blocks) == 0) 934 + /* Nothing to do */ 935 + return 0; 936 + 933 937 r = dm_bitset_cursor_begin(&cmd->dirty_info, cmd->dirty_root, 934 938 from_cblock(cmd->cache_blocks), &cmd->dirty_cursor); 935 939 if (r) {
+37 -35
drivers/md/dm-thin.c
··· 195 195 struct dm_thin_new_mapping; 196 196 197 197 /* 198 - * The pool runs in 4 modes. Ordered in degraded order for comparisons. 198 + * The pool runs in various modes. Ordered in degraded order for comparisons. 199 199 */ 200 200 enum pool_mode { 201 201 PM_WRITE, /* metadata may be changed */ ··· 282 282 mempool_t mapping_pool; 283 283 }; 284 284 285 - static enum pool_mode get_pool_mode(struct pool *pool); 286 285 static void metadata_operation_failed(struct pool *pool, const char *op, int r); 286 + 287 + static enum pool_mode get_pool_mode(struct pool *pool) 288 + { 289 + return pool->pf.mode; 290 + } 291 + 292 + static void notify_of_pool_mode_change(struct pool *pool) 293 + { 294 + const char *descs[] = { 295 + "write", 296 + "out-of-data-space", 297 + "read-only", 298 + "read-only", 299 + "fail" 300 + }; 301 + const char *extra_desc = NULL; 302 + enum pool_mode mode = get_pool_mode(pool); 303 + 304 + if (mode == PM_OUT_OF_DATA_SPACE) { 305 + if (!pool->pf.error_if_no_space) 306 + extra_desc = " (queue IO)"; 307 + else 308 + extra_desc = " (error IO)"; 309 + } 310 + 311 + dm_table_event(pool->ti->table); 312 + DMINFO("%s: switching pool to %s%s mode", 313 + dm_device_name(pool->pool_md), 314 + descs[(int)mode], extra_desc ? : ""); 315 + } 287 316 288 317 /* 289 318 * Target context for a pool. ··· 2380 2351 queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD); 2381 2352 } 2382 2353 2383 - static void notify_of_pool_mode_change_to_oods(struct pool *pool); 2384 - 2385 2354 /* 2386 2355 * We're holding onto IO to allow userland time to react. After the 2387 2356 * timeout either the pool will have been resized (and thus back in ··· 2392 2365 2393 2366 if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) { 2394 2367 pool->pf.error_if_no_space = true; 2395 - notify_of_pool_mode_change_to_oods(pool); 2368 + notify_of_pool_mode_change(pool); 2396 2369 error_retry_list_with_code(pool, BLK_STS_NOSPC); 2397 2370 } 2398 2371 } ··· 2460 2433 2461 2434 /*----------------------------------------------------------------*/ 2462 2435 2463 - static enum pool_mode get_pool_mode(struct pool *pool) 2464 - { 2465 - return pool->pf.mode; 2466 - } 2467 - 2468 - static void notify_of_pool_mode_change(struct pool *pool, const char *new_mode) 2469 - { 2470 - dm_table_event(pool->ti->table); 2471 - DMINFO("%s: switching pool to %s mode", 2472 - dm_device_name(pool->pool_md), new_mode); 2473 - } 2474 - 2475 - static void notify_of_pool_mode_change_to_oods(struct pool *pool) 2476 - { 2477 - if (!pool->pf.error_if_no_space) 2478 - notify_of_pool_mode_change(pool, "out-of-data-space (queue IO)"); 2479 - else 2480 - notify_of_pool_mode_change(pool, "out-of-data-space (error IO)"); 2481 - } 2482 - 2483 2436 static bool passdown_enabled(struct pool_c *pt) 2484 2437 { 2485 2438 return pt->adjusted_pf.discard_passdown; ··· 2508 2501 2509 2502 switch (new_mode) { 2510 2503 case PM_FAIL: 2511 - if (old_mode != new_mode) 2512 - notify_of_pool_mode_change(pool, "failure"); 2513 2504 dm_pool_metadata_read_only(pool->pmd); 2514 2505 pool->process_bio = process_bio_fail; 2515 2506 pool->process_discard = process_bio_fail; ··· 2521 2516 2522 2517 case PM_OUT_OF_METADATA_SPACE: 2523 2518 case PM_READ_ONLY: 2524 - if (!is_read_only_pool_mode(old_mode)) 2525 - notify_of_pool_mode_change(pool, "read-only"); 2526 2519 dm_pool_metadata_read_only(pool->pmd); 2527 2520 pool->process_bio = process_bio_read_only; 2528 2521 pool->process_discard = process_bio_success; ··· 2541 2538 * alarming rate. Adjust your low water mark if you're 2542 2539 * frequently seeing this mode. 2543 2540 */ 2544 - if (old_mode != new_mode) 2545 - notify_of_pool_mode_change_to_oods(pool); 2546 2541 pool->out_of_data_space = true; 2547 2542 pool->process_bio = process_bio_read_only; 2548 2543 pool->process_discard = process_discard_bio; ··· 2553 2552 break; 2554 2553 2555 2554 case PM_WRITE: 2556 - if (old_mode != new_mode) 2557 - notify_of_pool_mode_change(pool, "write"); 2558 2555 if (old_mode == PM_OUT_OF_DATA_SPACE) 2559 2556 cancel_delayed_work_sync(&pool->no_space_timeout); 2560 2557 pool->out_of_data_space = false; ··· 2572 2573 * doesn't cause an unexpected mode transition on resume. 2573 2574 */ 2574 2575 pt->adjusted_pf.mode = new_mode; 2576 + 2577 + if (old_mode != new_mode) 2578 + notify_of_pool_mode_change(pool); 2575 2579 } 2576 2580 2577 2581 static void abort_transaction(struct pool *pool) ··· 4025 4023 .name = "thin-pool", 4026 4024 .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | 4027 4025 DM_TARGET_IMMUTABLE, 4028 - .version = {1, 20, 0}, 4026 + .version = {1, 21, 0}, 4029 4027 .module = THIS_MODULE, 4030 4028 .ctr = pool_ctr, 4031 4029 .dtr = pool_dtr, ··· 4399 4397 4400 4398 static struct target_type thin_target = { 4401 4399 .name = "thin", 4402 - .version = {1, 20, 0}, 4400 + .version = {1, 21, 0}, 4403 4401 .module = THIS_MODULE, 4404 4402 .ctr = thin_ctr, 4405 4403 .dtr = thin_dtr,
+38 -84
drivers/md/dm-zoned-target.c
··· 20 20 struct dm_zone *zone; 21 21 struct bio *bio; 22 22 refcount_t ref; 23 - blk_status_t status; 24 23 }; 25 24 26 25 /* ··· 77 78 { 78 79 struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx)); 79 80 80 - if (bioctx->status == BLK_STS_OK && status != BLK_STS_OK) 81 - bioctx->status = status; 82 - bio_endio(bio); 81 + if (status != BLK_STS_OK && bio->bi_status == BLK_STS_OK) 82 + bio->bi_status = status; 83 + 84 + if (refcount_dec_and_test(&bioctx->ref)) { 85 + struct dm_zone *zone = bioctx->zone; 86 + 87 + if (zone) { 88 + if (bio->bi_status != BLK_STS_OK && 89 + bio_op(bio) == REQ_OP_WRITE && 90 + dmz_is_seq(zone)) 91 + set_bit(DMZ_SEQ_WRITE_ERR, &zone->flags); 92 + dmz_deactivate_zone(zone); 93 + } 94 + bio_endio(bio); 95 + } 83 96 } 84 97 85 98 /* 86 - * Partial clone read BIO completion callback. This terminates the 99 + * Completion callback for an internally cloned target BIO. This terminates the 87 100 * target BIO when there are no more references to its context. 88 101 */ 89 - static void dmz_read_bio_end_io(struct bio *bio) 102 + static void dmz_clone_endio(struct bio *clone) 90 103 { 91 - struct dmz_bioctx *bioctx = bio->bi_private; 92 - blk_status_t status = bio->bi_status; 104 + struct dmz_bioctx *bioctx = clone->bi_private; 105 + blk_status_t status = clone->bi_status; 93 106 94 - bio_put(bio); 107 + bio_put(clone); 95 108 dmz_bio_endio(bioctx->bio, status); 96 109 } 97 110 98 111 /* 99 - * Issue a BIO to a zone. The BIO may only partially process the 112 + * Issue a clone of a target BIO. The clone may only partially process the 100 113 * original target BIO. 101 114 */ 102 - static int dmz_submit_read_bio(struct dmz_target *dmz, struct dm_zone *zone, 103 - struct bio *bio, sector_t chunk_block, 104 - unsigned int nr_blocks) 115 + static int dmz_submit_bio(struct dmz_target *dmz, struct dm_zone *zone, 116 + struct bio *bio, sector_t chunk_block, 117 + unsigned int nr_blocks) 105 118 { 106 119 struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx)); 107 - sector_t sector; 108 120 struct bio *clone; 109 121 110 - /* BIO remap sector */ 111 - sector = dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block); 112 - 113 - /* If the read is not partial, there is no need to clone the BIO */ 114 - if (nr_blocks == dmz_bio_blocks(bio)) { 115 - /* Setup and submit the BIO */ 116 - bio->bi_iter.bi_sector = sector; 117 - refcount_inc(&bioctx->ref); 118 - generic_make_request(bio); 119 - return 0; 120 - } 121 - 122 - /* Partial BIO: we need to clone the BIO */ 123 122 clone = bio_clone_fast(bio, GFP_NOIO, &dmz->bio_set); 124 123 if (!clone) 125 124 return -ENOMEM; 126 125 127 - /* Setup the clone */ 128 - clone->bi_iter.bi_sector = sector; 126 + bio_set_dev(clone, dmz->dev->bdev); 127 + clone->bi_iter.bi_sector = 128 + dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block); 129 129 clone->bi_iter.bi_size = dmz_blk2sect(nr_blocks) << SECTOR_SHIFT; 130 - clone->bi_end_io = dmz_read_bio_end_io; 130 + clone->bi_end_io = dmz_clone_endio; 131 131 clone->bi_private = bioctx; 132 132 133 133 bio_advance(bio, clone->bi_iter.bi_size); 134 134 135 - /* Submit the clone */ 136 135 refcount_inc(&bioctx->ref); 137 136 generic_make_request(clone); 137 + 138 + if (bio_op(bio) == REQ_OP_WRITE && dmz_is_seq(zone)) 139 + zone->wp_block += nr_blocks; 138 140 139 141 return 0; 140 142 } ··· 214 214 if (nr_blocks) { 215 215 /* Valid blocks found: read them */ 216 216 nr_blocks = min_t(unsigned int, nr_blocks, end_block - chunk_block); 217 - ret = dmz_submit_read_bio(dmz, rzone, bio, chunk_block, nr_blocks); 217 + ret = dmz_submit_bio(dmz, rzone, bio, chunk_block, nr_blocks); 218 218 if (ret) 219 219 return ret; 220 220 chunk_block += nr_blocks; ··· 226 226 } 227 227 228 228 return 0; 229 - } 230 - 231 - /* 232 - * Issue a write BIO to a zone. 233 - */ 234 - static void dmz_submit_write_bio(struct dmz_target *dmz, struct dm_zone *zone, 235 - struct bio *bio, sector_t chunk_block, 236 - unsigned int nr_blocks) 237 - { 238 - struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx)); 239 - 240 - /* Setup and submit the BIO */ 241 - bio_set_dev(bio, dmz->dev->bdev); 242 - bio->bi_iter.bi_sector = dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block); 243 - refcount_inc(&bioctx->ref); 244 - generic_make_request(bio); 245 - 246 - if (dmz_is_seq(zone)) 247 - zone->wp_block += nr_blocks; 248 229 } 249 230 250 231 /* ··· 246 265 return -EROFS; 247 266 248 267 /* Submit write */ 249 - dmz_submit_write_bio(dmz, zone, bio, chunk_block, nr_blocks); 268 + ret = dmz_submit_bio(dmz, zone, bio, chunk_block, nr_blocks); 269 + if (ret) 270 + return ret; 250 271 251 272 /* 252 273 * Validate the blocks in the data zone and invalidate ··· 284 301 return -EROFS; 285 302 286 303 /* Submit write */ 287 - dmz_submit_write_bio(dmz, bzone, bio, chunk_block, nr_blocks); 304 + ret = dmz_submit_bio(dmz, bzone, bio, chunk_block, nr_blocks); 305 + if (ret) 306 + return ret; 288 307 289 308 /* 290 309 * Validate the blocks in the buffer zone ··· 585 600 bioctx->zone = NULL; 586 601 bioctx->bio = bio; 587 602 refcount_set(&bioctx->ref, 1); 588 - bioctx->status = BLK_STS_OK; 589 603 590 604 /* Set the BIO pending in the flush list */ 591 605 if (!nr_sectors && bio_op(bio) == REQ_OP_WRITE) { ··· 605 621 dmz_queue_chunk_work(dmz, bio); 606 622 607 623 return DM_MAPIO_SUBMITTED; 608 - } 609 - 610 - /* 611 - * Completed target BIO processing. 612 - */ 613 - static int dmz_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *error) 614 - { 615 - struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx)); 616 - 617 - if (bioctx->status == BLK_STS_OK && *error) 618 - bioctx->status = *error; 619 - 620 - if (!refcount_dec_and_test(&bioctx->ref)) 621 - return DM_ENDIO_INCOMPLETE; 622 - 623 - /* Done */ 624 - bio->bi_status = bioctx->status; 625 - 626 - if (bioctx->zone) { 627 - struct dm_zone *zone = bioctx->zone; 628 - 629 - if (*error && bio_op(bio) == REQ_OP_WRITE) { 630 - if (dmz_is_seq(zone)) 631 - set_bit(DMZ_SEQ_WRITE_ERR, &zone->flags); 632 - } 633 - dmz_deactivate_zone(zone); 634 - } 635 - 636 - return DM_ENDIO_DONE; 637 624 } 638 625 639 626 /* ··· 901 946 .ctr = dmz_ctr, 902 947 .dtr = dmz_dtr, 903 948 .map = dmz_map, 904 - .end_io = dmz_end_io, 905 949 .io_hints = dmz_io_hints, 906 950 .prepare_ioctl = dmz_prepare_ioctl, 907 951 .postsuspend = dmz_suspend,
+2
drivers/md/dm.c
··· 1593 1593 return ret; 1594 1594 } 1595 1595 1596 + blk_queue_split(md->queue, &bio); 1597 + 1596 1598 init_clone_info(&ci, md, map, bio); 1597 1599 1598 1600 if (bio->bi_opf & REQ_PREFLUSH) {