Merge tag 'for-5.11/dm-fixes-1' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

+2

drivers/md/Kconfig

··· 605 605 select BLK_DEV_INTEGRITY 606 606 select DM_BUFIO 607 607 select CRYPTO 608 + select CRYPTO_SKCIPHER 608 609 select ASYNC_XOR 609 610 help 610 611 This device-mapper target emulates a block device that has ··· 623 622 tristate "Drive-managed zoned block device target support" 624 623 depends on BLK_DEV_DM 625 624 depends on BLK_DEV_ZONED 625 + select CRC32 626 626 help 627 627 This device-mapper target takes a host-managed or host-aware zoned 628 628 block device and exposes most of its capacity as a regular block

+6

drivers/md/dm-bufio.c

··· 1534 1534 } 1535 1535 EXPORT_SYMBOL_GPL(dm_bufio_get_device_size); 1536 1536 1537 + struct dm_io_client *dm_bufio_get_dm_io_client(struct dm_bufio_client *c) 1538 + { 1539 + return c->dm_io; 1540 + } 1541 + EXPORT_SYMBOL_GPL(dm_bufio_get_dm_io_client); 1542 + 1537 1543 sector_t dm_bufio_get_block_number(struct dm_buffer *b) 1538 1544 { 1539 1545 return b->block;

+152 -18

drivers/md/dm-crypt.c

··· 1454 1454 static void kcryptd_async_done(struct crypto_async_request *async_req, 1455 1455 int error); 1456 1456 1457 - static void crypt_alloc_req_skcipher(struct crypt_config *cc, 1457 + static int crypt_alloc_req_skcipher(struct crypt_config *cc, 1458 1458 struct convert_context *ctx) 1459 1459 { 1460 1460 unsigned key_index = ctx->cc_sector & (cc->tfms_count - 1); 1461 1461 1462 - if (!ctx->r.req) 1463 - ctx->r.req = mempool_alloc(&cc->req_pool, GFP_NOIO); 1462 + if (!ctx->r.req) { 1463 + ctx->r.req = mempool_alloc(&cc->req_pool, in_interrupt() ? GFP_ATOMIC : GFP_NOIO); 1464 + if (!ctx->r.req) 1465 + return -ENOMEM; 1466 + } 1464 1467 1465 1468 skcipher_request_set_tfm(ctx->r.req, cc->cipher_tfm.tfms[key_index]); 1466 1469 ··· 1474 1471 skcipher_request_set_callback(ctx->r.req, 1475 1472 CRYPTO_TFM_REQ_MAY_BACKLOG, 1476 1473 kcryptd_async_done, dmreq_of_req(cc, ctx->r.req)); 1474 + 1475 + return 0; 1477 1476 } 1478 1477 1479 - static void crypt_alloc_req_aead(struct crypt_config *cc, 1478 + static int crypt_alloc_req_aead(struct crypt_config *cc, 1480 1479 struct convert_context *ctx) 1481 1480 { 1482 - if (!ctx->r.req_aead) 1483 - ctx->r.req_aead = mempool_alloc(&cc->req_pool, GFP_NOIO); 1481 + if (!ctx->r.req) { 1482 + ctx->r.req = mempool_alloc(&cc->req_pool, in_interrupt() ? GFP_ATOMIC : GFP_NOIO); 1483 + if (!ctx->r.req) 1484 + return -ENOMEM; 1485 + } 1484 1486 1485 1487 aead_request_set_tfm(ctx->r.req_aead, cc->cipher_tfm.tfms_aead[0]); 1486 1488 ··· 1496 1488 aead_request_set_callback(ctx->r.req_aead, 1497 1489 CRYPTO_TFM_REQ_MAY_BACKLOG, 1498 1490 kcryptd_async_done, dmreq_of_req(cc, ctx->r.req_aead)); 1491 + 1492 + return 0; 1499 1493 } 1500 1494 1501 - static void crypt_alloc_req(struct crypt_config *cc, 1495 + static int crypt_alloc_req(struct crypt_config *cc, 1502 1496 struct convert_context *ctx) 1503 1497 { 1504 1498 if (crypt_integrity_aead(cc)) 1505 - crypt_alloc_req_aead(cc, ctx); 1499 + return crypt_alloc_req_aead(cc, ctx); 1506 1500 else 1507 - crypt_alloc_req_skcipher(cc, ctx); 1501 + return crypt_alloc_req_skcipher(cc, ctx); 1508 1502 } 1509 1503 1510 1504 static void crypt_free_req_skcipher(struct crypt_config *cc, ··· 1539 1529 * Encrypt / decrypt data from one bio to another one (can be the same one) 1540 1530 */ 1541 1531 static blk_status_t crypt_convert(struct crypt_config *cc, 1542 - struct convert_context *ctx, bool atomic) 1532 + struct convert_context *ctx, bool atomic, bool reset_pending) 1543 1533 { 1544 1534 unsigned int tag_offset = 0; 1545 1535 unsigned int sector_step = cc->sector_size >> SECTOR_SHIFT; 1546 1536 int r; 1547 1537 1548 - atomic_set(&ctx->cc_pending, 1); 1538 + /* 1539 + * if reset_pending is set we are dealing with the bio for the first time, 1540 + * else we're continuing to work on the previous bio, so don't mess with 1541 + * the cc_pending counter 1542 + */ 1543 + if (reset_pending) 1544 + atomic_set(&ctx->cc_pending, 1); 1549 1545 1550 1546 while (ctx->iter_in.bi_size && ctx->iter_out.bi_size) { 1551 1547 1552 - crypt_alloc_req(cc, ctx); 1548 + r = crypt_alloc_req(cc, ctx); 1549 + if (r) { 1550 + complete(&ctx->restart); 1551 + return BLK_STS_DEV_RESOURCE; 1552 + } 1553 + 1553 1554 atomic_inc(&ctx->cc_pending); 1554 1555 1555 1556 if (crypt_integrity_aead(cc)) ··· 1574 1553 * but the driver request queue is full, let's wait. 1575 1554 */ 1576 1555 case -EBUSY: 1577 - wait_for_completion(&ctx->restart); 1556 + if (in_interrupt()) { 1557 + if (try_wait_for_completion(&ctx->restart)) { 1558 + /* 1559 + * we don't have to block to wait for completion, 1560 + * so proceed 1561 + */ 1562 + } else { 1563 + /* 1564 + * we can't wait for completion without blocking 1565 + * exit and continue processing in a workqueue 1566 + */ 1567 + ctx->r.req = NULL; 1568 + ctx->cc_sector += sector_step; 1569 + tag_offset++; 1570 + return BLK_STS_DEV_RESOURCE; 1571 + } 1572 + } else { 1573 + wait_for_completion(&ctx->restart); 1574 + } 1578 1575 reinit_completion(&ctx->restart); 1579 1576 fallthrough; 1580 1577 /* ··· 1730 1691 atomic_inc(&io->io_pending); 1731 1692 } 1732 1693 1694 + static void kcryptd_io_bio_endio(struct work_struct *work) 1695 + { 1696 + struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); 1697 + bio_endio(io->base_bio); 1698 + } 1699 + 1733 1700 /* 1734 1701 * One of the bios was finished. Check for completion of 1735 1702 * the whole request and correctly clean up the buffer. ··· 1758 1713 kfree(io->integrity_metadata); 1759 1714 1760 1715 base_bio->bi_status = error; 1761 - bio_endio(base_bio); 1716 + 1717 + /* 1718 + * If we are running this function from our tasklet, 1719 + * we can't call bio_endio() here, because it will call 1720 + * clone_endio() from dm.c, which in turn will 1721 + * free the current struct dm_crypt_io structure with 1722 + * our tasklet. In this case we need to delay bio_endio() 1723 + * execution to after the tasklet is done and dequeued. 1724 + */ 1725 + if (tasklet_trylock(&io->tasklet)) { 1726 + tasklet_unlock(&io->tasklet); 1727 + bio_endio(base_bio); 1728 + return; 1729 + } 1730 + 1731 + INIT_WORK(&io->work, kcryptd_io_bio_endio); 1732 + queue_work(cc->io_queue, &io->work); 1762 1733 } 1763 1734 1764 1735 /* ··· 2006 1945 } 2007 1946 } 2008 1947 1948 + static void kcryptd_crypt_write_continue(struct work_struct *work) 1949 + { 1950 + struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); 1951 + struct crypt_config *cc = io->cc; 1952 + struct convert_context *ctx = &io->ctx; 1953 + int crypt_finished; 1954 + sector_t sector = io->sector; 1955 + blk_status_t r; 1956 + 1957 + wait_for_completion(&ctx->restart); 1958 + reinit_completion(&ctx->restart); 1959 + 1960 + r = crypt_convert(cc, &io->ctx, true, false); 1961 + if (r) 1962 + io->error = r; 1963 + crypt_finished = atomic_dec_and_test(&ctx->cc_pending); 1964 + if (!crypt_finished && kcryptd_crypt_write_inline(cc, ctx)) { 1965 + /* Wait for completion signaled by kcryptd_async_done() */ 1966 + wait_for_completion(&ctx->restart); 1967 + crypt_finished = 1; 1968 + } 1969 + 1970 + /* Encryption was already finished, submit io now */ 1971 + if (crypt_finished) { 1972 + kcryptd_crypt_write_io_submit(io, 0); 1973 + io->sector = sector; 1974 + } 1975 + 1976 + crypt_dec_pending(io); 1977 + } 1978 + 2009 1979 static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) 2010 1980 { 2011 1981 struct crypt_config *cc = io->cc; ··· 2065 1973 2066 1974 crypt_inc_pending(io); 2067 1975 r = crypt_convert(cc, ctx, 2068 - test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags)); 1976 + test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags), true); 1977 + /* 1978 + * Crypto API backlogged the request, because its queue was full 1979 + * and we're in softirq context, so continue from a workqueue 1980 + * (TODO: is it actually possible to be in softirq in the write path?) 1981 + */ 1982 + if (r == BLK_STS_DEV_RESOURCE) { 1983 + INIT_WORK(&io->work, kcryptd_crypt_write_continue); 1984 + queue_work(cc->crypt_queue, &io->work); 1985 + return; 1986 + } 2069 1987 if (r) 2070 1988 io->error = r; 2071 1989 crypt_finished = atomic_dec_and_test(&ctx->cc_pending); ··· 2100 1998 crypt_dec_pending(io); 2101 1999 } 2102 2000 2001 + static void kcryptd_crypt_read_continue(struct work_struct *work) 2002 + { 2003 + struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); 2004 + struct crypt_config *cc = io->cc; 2005 + blk_status_t r; 2006 + 2007 + wait_for_completion(&io->ctx.restart); 2008 + reinit_completion(&io->ctx.restart); 2009 + 2010 + r = crypt_convert(cc, &io->ctx, true, false); 2011 + if (r) 2012 + io->error = r; 2013 + 2014 + if (atomic_dec_and_test(&io->ctx.cc_pending)) 2015 + kcryptd_crypt_read_done(io); 2016 + 2017 + crypt_dec_pending(io); 2018 + } 2019 + 2103 2020 static void kcryptd_crypt_read_convert(struct dm_crypt_io *io) 2104 2021 { 2105 2022 struct crypt_config *cc = io->cc; ··· 2130 2009 io->sector); 2131 2010 2132 2011 r = crypt_convert(cc, &io->ctx, 2133 - test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags)); 2012 + test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true); 2013 + /* 2014 + * Crypto API backlogged the request, because its queue was full 2015 + * and we're in softirq context, so continue from a workqueue 2016 + */ 2017 + if (r == BLK_STS_DEV_RESOURCE) { 2018 + INIT_WORK(&io->work, kcryptd_crypt_read_continue); 2019 + queue_work(cc->crypt_queue, &io->work); 2020 + return; 2021 + } 2134 2022 if (r) 2135 2023 io->error = r; 2136 2024 ··· 2221 2091 2222 2092 if ((bio_data_dir(io->base_bio) == READ && test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags)) || 2223 2093 (bio_data_dir(io->base_bio) == WRITE && test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags))) { 2224 - if (in_irq()) { 2225 - /* Crypto API's "skcipher_walk_first() refuses to work in hard IRQ context */ 2094 + /* 2095 + * in_irq(): Crypto API's skcipher_walk_first() refuses to work in hard IRQ context. 2096 + * irqs_disabled(): the kernel may run some IO completion from the idle thread, but 2097 + * it is being executed with irqs disabled. 2098 + */ 2099 + if (in_irq() || irqs_disabled()) { 2226 2100 tasklet_init(&io->tasklet, kcryptd_crypt_tasklet, (unsigned long)&io->work); 2227 2101 tasklet_schedule(&io->tasklet); 2228 2102 return;

+50 -12

drivers/md/dm-integrity.c

··· 1379 1379 #undef MAY_BE_HASH 1380 1380 } 1381 1381 1382 - static void dm_integrity_flush_buffers(struct dm_integrity_c *ic) 1382 + struct flush_request { 1383 + struct dm_io_request io_req; 1384 + struct dm_io_region io_reg; 1385 + struct dm_integrity_c *ic; 1386 + struct completion comp; 1387 + }; 1388 + 1389 + static void flush_notify(unsigned long error, void *fr_) 1390 + { 1391 + struct flush_request *fr = fr_; 1392 + if (unlikely(error != 0)) 1393 + dm_integrity_io_error(fr->ic, "flusing disk cache", -EIO); 1394 + complete(&fr->comp); 1395 + } 1396 + 1397 + static void dm_integrity_flush_buffers(struct dm_integrity_c *ic, bool flush_data) 1383 1398 { 1384 1399 int r; 1400 + 1401 + struct flush_request fr; 1402 + 1403 + if (!ic->meta_dev) 1404 + flush_data = false; 1405 + if (flush_data) { 1406 + fr.io_req.bi_op = REQ_OP_WRITE, 1407 + fr.io_req.bi_op_flags = REQ_PREFLUSH | REQ_SYNC, 1408 + fr.io_req.mem.type = DM_IO_KMEM, 1409 + fr.io_req.mem.ptr.addr = NULL, 1410 + fr.io_req.notify.fn = flush_notify, 1411 + fr.io_req.notify.context = &fr; 1412 + fr.io_req.client = dm_bufio_get_dm_io_client(ic->bufio), 1413 + fr.io_reg.bdev = ic->dev->bdev, 1414 + fr.io_reg.sector = 0, 1415 + fr.io_reg.count = 0, 1416 + fr.ic = ic; 1417 + init_completion(&fr.comp); 1418 + r = dm_io(&fr.io_req, 1, &fr.io_reg, NULL); 1419 + BUG_ON(r); 1420 + } 1421 + 1385 1422 r = dm_bufio_write_dirty_buffers(ic->bufio); 1386 1423 if (unlikely(r)) 1387 1424 dm_integrity_io_error(ic, "writing tags", r); 1425 + 1426 + if (flush_data) 1427 + wait_for_completion(&fr.comp); 1388 1428 } 1389 1429 1390 1430 static void sleep_on_endio_wait(struct dm_integrity_c *ic) ··· 2150 2110 2151 2111 if (unlikely(dio->op == REQ_OP_DISCARD) && likely(ic->mode != 'D')) { 2152 2112 integrity_metadata(&dio->work); 2153 - dm_integrity_flush_buffers(ic); 2113 + dm_integrity_flush_buffers(ic, false); 2154 2114 2155 2115 dio->in_flight = (atomic_t)ATOMIC_INIT(1); 2156 2116 dio->completion = NULL; ··· 2235 2195 flushes = bio_list_get(&ic->flush_bio_list); 2236 2196 if (unlikely(ic->mode != 'J')) { 2237 2197 spin_unlock_irq(&ic->endio_wait.lock); 2238 - dm_integrity_flush_buffers(ic); 2198 + dm_integrity_flush_buffers(ic, true); 2239 2199 goto release_flush_bios; 2240 2200 } 2241 2201 ··· 2449 2409 complete_journal_op(&comp); 2450 2410 wait_for_completion_io(&comp.comp); 2451 2411 2452 - dm_integrity_flush_buffers(ic); 2412 + dm_integrity_flush_buffers(ic, true); 2453 2413 } 2454 2414 2455 2415 static void integrity_writer(struct work_struct *w) ··· 2491 2451 { 2492 2452 int r; 2493 2453 2494 - dm_integrity_flush_buffers(ic); 2454 + dm_integrity_flush_buffers(ic, false); 2495 2455 if (dm_integrity_failed(ic)) 2496 2456 return; 2497 2457 ··· 2694 2654 unsigned long limit; 2695 2655 struct bio *bio; 2696 2656 2697 - dm_integrity_flush_buffers(ic); 2657 + dm_integrity_flush_buffers(ic, false); 2698 2658 2699 2659 range.logical_sector = 0; 2700 2660 range.n_sectors = ic->provided_data_sectors; ··· 2703 2663 add_new_range_and_wait(ic, &range); 2704 2664 spin_unlock_irq(&ic->endio_wait.lock); 2705 2665 2706 - dm_integrity_flush_buffers(ic); 2707 - if (ic->meta_dev) 2708 - blkdev_issue_flush(ic->dev->bdev, GFP_NOIO); 2666 + dm_integrity_flush_buffers(ic, true); 2709 2667 2710 2668 limit = ic->provided_data_sectors; 2711 2669 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) { ··· 2972 2934 if (ic->meta_dev) 2973 2935 queue_work(ic->writer_wq, &ic->writer_work); 2974 2936 drain_workqueue(ic->writer_wq); 2975 - dm_integrity_flush_buffers(ic); 2937 + dm_integrity_flush_buffers(ic, true); 2976 2938 } 2977 2939 2978 2940 if (ic->mode == 'B') { 2979 - dm_integrity_flush_buffers(ic); 2941 + dm_integrity_flush_buffers(ic, true); 2980 2942 #if 1 2981 2943 /* set to 0 to test bitmap replay code */ 2982 2944 init_journal(ic, 0, ic->journal_sections, 0); ··· 3792 3754 unsigned extra_args; 3793 3755 struct dm_arg_set as; 3794 3756 static const struct dm_arg _args[] = { 3795 - {0, 9, "Invalid number of feature args"}, 3757 + {0, 15, "Invalid number of feature args"}, 3796 3758 }; 3797 3759 unsigned journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec; 3798 3760 bool should_write_sb;

+3 -3

drivers/md/dm-raid.c

··· 3729 3729 blk_limits_io_opt(limits, chunk_size_bytes * mddev_data_stripes(rs)); 3730 3730 3731 3731 /* 3732 - * RAID1 and RAID10 personalities require bio splitting, 3733 - * RAID0/4/5/6 don't and process large discard bios properly. 3732 + * RAID0 and RAID10 personalities require bio splitting, 3733 + * RAID1/4/5/6 don't and process large discard bios properly. 3734 3734 */ 3735 - if (rs_is_raid1(rs) || rs_is_raid10(rs)) { 3735 + if (rs_is_raid0(rs) || rs_is_raid10(rs)) { 3736 3736 limits->discard_granularity = chunk_size_bytes; 3737 3737 limits->max_discard_sectors = rs->md.chunk_sectors; 3738 3738 }

+24

drivers/md/dm-snap.c

··· 141 141 * for them to be committed. 142 142 */ 143 143 struct bio_list bios_queued_during_merge; 144 + 145 + /* 146 + * Flush data after merge. 147 + */ 148 + struct bio flush_bio; 144 149 }; 145 150 146 151 /* ··· 1126 1121 1127 1122 static void error_bios(struct bio *bio); 1128 1123 1124 + static int flush_data(struct dm_snapshot *s) 1125 + { 1126 + struct bio *flush_bio = &s->flush_bio; 1127 + 1128 + bio_reset(flush_bio); 1129 + bio_set_dev(flush_bio, s->origin->bdev); 1130 + flush_bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; 1131 + 1132 + return submit_bio_wait(flush_bio); 1133 + } 1134 + 1129 1135 static void merge_callback(int read_err, unsigned long write_err, void *context) 1130 1136 { 1131 1137 struct dm_snapshot *s = context; ··· 1147 1131 DMERR("Read error: shutting down merge."); 1148 1132 else 1149 1133 DMERR("Write error: shutting down merge."); 1134 + goto shut; 1135 + } 1136 + 1137 + if (flush_data(s) < 0) { 1138 + DMERR("Flush after merge failed: shutting down merge"); 1150 1139 goto shut; 1151 1140 } 1152 1141 ··· 1339 1318 s->first_merging_chunk = 0; 1340 1319 s->num_merging_chunks = 0; 1341 1320 bio_list_init(&s->bios_queued_during_merge); 1321 + bio_init(&s->flush_bio, NULL, 0); 1342 1322 1343 1323 /* Allocate hash table for COW data */ 1344 1324 if (init_hash_tables(s)) { ··· 1525 1503 mempool_exit(&s->pending_pool); 1526 1504 1527 1505 dm_exception_store_destroy(s->store); 1506 + 1507 + bio_uninit(&s->flush_bio); 1528 1508 1529 1509 dm_put_device(ti, s->cow); 1530 1510

+1 -1

drivers/md/dm.c

··· 562 562 * subset of the parent bdev; require extra privileges. 563 563 */ 564 564 if (!capable(CAP_SYS_RAWIO)) { 565 - DMWARN_LIMIT( 565 + DMDEBUG_LIMIT( 566 566 "%s: sending ioctl %x to DM device without required privilege.", 567 567 current->comm, cmd); 568 568 r = -ENOIOCTLCMD;

+1

include/linux/dm-bufio.h

··· 150 150 151 151 unsigned dm_bufio_get_block_size(struct dm_bufio_client *c); 152 152 sector_t dm_bufio_get_device_size(struct dm_bufio_client *c); 153 + struct dm_io_client *dm_bufio_get_dm_io_client(struct dm_bufio_client *c); 153 154 sector_t dm_bufio_get_block_number(struct dm_buffer *b); 154 155 void *dm_bufio_get_block_data(struct dm_buffer *b); 155 156 void *dm_bufio_get_aux_data(struct dm_buffer *b);

Configure Feed

Configure Feed