Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-4.13/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper fixes from Mike Snitzer:

- a few DM integrity fixes that improve performance. One that address
inefficiencies in the on-disk journal device layout. Another that
makes use of the block layer's on-stack plugging when writing the
journal.

- a dm-bufio fix for the blk_status_t conversion that went in during
the merge window.

- a few DM raid fixes that address correctness when suspending the
device and a validation fix for validation that occurs during device
activation.

- a couple DM zoned target fixes. Important one being the fix to not
use GFP_KERNEL in the IO path due to concerns about deadlock in
low-memory conditions (e.g. swap over a DM zoned device, etc).

- a DM DAX device fix to make sure dm_dax_flush() is called if the
underlying DAX device is operating as a write cache.

* tag 'for-4.13/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
dm, dax: Make sure dm_dax_flush() is called if device supports it
dm verity fec: fix GFP flags used with mempool_alloc()
dm zoned: use GFP_NOIO in I/O path
dm zoned: remove test for impossible REQ_OP_FLUSH conditions
dm raid: bump target version
dm raid: avoid mddev->suspended access
dm raid: fix activation check in validate_raid_redundancy()
dm raid: remove WARN_ON() in raid10_md_layout_to_format()
dm bufio: fix error code in dm_bufio_write_dirty_buffers()
dm integrity: test for corrupted disk format during table load
dm integrity: WARN_ON if variables representing journal usage get out of sync
dm integrity: use plugging when writing the journal
dm integrity: fix inefficient allocation of journal space

+94 -46
+1
Documentation/device-mapper/dm-raid.txt
··· 343 343 1.11.0 Fix table line argument order 344 344 (wrong raid10_copies/raid10_format sequence) 345 345 1.11.1 Add raid4/5/6 journal write-back support via journal_mode option 346 + 1.12.1 fix for MD deadlock between mddev_suspend() and md_write_start() available
+6
drivers/dax/super.c
··· 278 278 } 279 279 EXPORT_SYMBOL_GPL(dax_write_cache); 280 280 281 + bool dax_write_cache_enabled(struct dax_device *dax_dev) 282 + { 283 + return test_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags); 284 + } 285 + EXPORT_SYMBOL_GPL(dax_write_cache_enabled); 286 + 281 287 bool dax_alive(struct dax_device *dax_dev) 282 288 { 283 289 lockdep_assert_held(&dax_srcu);
+1 -2
drivers/md/dm-bufio.c
··· 1258 1258 */ 1259 1259 int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c) 1260 1260 { 1261 - blk_status_t a; 1262 - int f; 1261 + int a, f; 1263 1262 unsigned long buffers_processed = 0; 1264 1263 struct dm_buffer *b, *tmp; 1265 1264
+18 -4
drivers/md/dm-integrity.c
··· 1587 1587 if (likely(ic->mode == 'J')) { 1588 1588 if (dio->write) { 1589 1589 unsigned next_entry, i, pos; 1590 - unsigned ws, we; 1590 + unsigned ws, we, range_sectors; 1591 1591 1592 - dio->range.n_sectors = min(dio->range.n_sectors, ic->free_sectors); 1592 + dio->range.n_sectors = min(dio->range.n_sectors, 1593 + ic->free_sectors << ic->sb->log2_sectors_per_block); 1593 1594 if (unlikely(!dio->range.n_sectors)) 1594 1595 goto sleep; 1595 - ic->free_sectors -= dio->range.n_sectors; 1596 + range_sectors = dio->range.n_sectors >> ic->sb->log2_sectors_per_block; 1597 + ic->free_sectors -= range_sectors; 1596 1598 journal_section = ic->free_section; 1597 1599 journal_entry = ic->free_section_entry; 1598 1600 1599 - next_entry = ic->free_section_entry + dio->range.n_sectors; 1601 + next_entry = ic->free_section_entry + range_sectors; 1600 1602 ic->free_section_entry = next_entry % ic->journal_section_entries; 1601 1603 ic->free_section += next_entry / ic->journal_section_entries; 1602 1604 ic->n_uncommitted_sections += next_entry / ic->journal_section_entries; ··· 1729 1727 wraparound_section(ic, &ic->free_section); 1730 1728 ic->n_uncommitted_sections++; 1731 1729 } 1730 + WARN_ON(ic->journal_sections * ic->journal_section_entries != 1731 + (ic->n_uncommitted_sections + ic->n_committed_sections) * ic->journal_section_entries + ic->free_sectors); 1732 1732 } 1733 1733 1734 1734 static void integrity_commit(struct work_struct *w) ··· 1825 1821 { 1826 1822 unsigned i, j, n; 1827 1823 struct journal_completion comp; 1824 + struct blk_plug plug; 1825 + 1826 + blk_start_plug(&plug); 1828 1827 1829 1828 comp.ic = ic; 1830 1829 comp.in_flight = (atomic_t)ATOMIC_INIT(1); ··· 1951 1944 } 1952 1945 1953 1946 dm_bufio_write_dirty_buffers_async(ic->bufio); 1947 + 1948 + blk_finish_plug(&plug); 1954 1949 1955 1950 complete_journal_op(&comp); 1956 1951 wait_for_completion_io(&comp.comp); ··· 3026 3017 if (ic->sb->log2_sectors_per_block != __ffs(ic->sectors_per_block)) { 3027 3018 r = -EINVAL; 3028 3019 ti->error = "Block size doesn't match the information in superblock"; 3020 + goto bad; 3021 + } 3022 + if (!le32_to_cpu(ic->sb->journal_sections)) { 3023 + r = -EINVAL; 3024 + ti->error = "Corrupted superblock, journal_sections is 0"; 3029 3025 goto bad; 3030 3026 } 3031 3027 /* make sure that ti->max_io_len doesn't overflow */
+16 -13
drivers/md/dm-raid.c
··· 208 208 #define RT_FLAG_RS_BITMAP_LOADED 2 209 209 #define RT_FLAG_UPDATE_SBS 3 210 210 #define RT_FLAG_RESHAPE_RS 4 211 + #define RT_FLAG_RS_SUSPENDED 5 211 212 212 213 /* Array elements of 64 bit needed for rebuild/failed disk bits */ 213 214 #define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8) ··· 565 564 if (__raid10_near_copies(layout) > 1) 566 565 return "near"; 567 566 568 - WARN_ON(__raid10_far_copies(layout) < 2); 567 + if (__raid10_far_copies(layout) > 1) 568 + return "far"; 569 569 570 - return "far"; 570 + return "unknown"; 571 571 } 572 572 573 573 /* Return md raid10 algorithm for @name */ ··· 2542 2540 if (!freshest) 2543 2541 return 0; 2544 2542 2545 - if (validate_raid_redundancy(rs)) { 2546 - rs->ti->error = "Insufficient redundancy to activate array"; 2547 - return -EINVAL; 2548 - } 2549 - 2550 2543 /* 2551 2544 * Validation of the freshest device provides the source of 2552 2545 * validation for the remaining devices. ··· 2549 2552 rs->ti->error = "Unable to assemble array: Invalid superblocks"; 2550 2553 if (super_validate(rs, freshest)) 2551 2554 return -EINVAL; 2555 + 2556 + if (validate_raid_redundancy(rs)) { 2557 + rs->ti->error = "Insufficient redundancy to activate array"; 2558 + return -EINVAL; 2559 + } 2552 2560 2553 2561 rdev_for_each(rdev, mddev) 2554 2562 if (!test_bit(Journal, &rdev->flags) && ··· 3170 3168 } 3171 3169 3172 3170 mddev_suspend(&rs->md); 3171 + set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags); 3173 3172 3174 3173 /* Try to adjust the raid4/5/6 stripe cache size to the stripe size */ 3175 3174 if (rs_is_raid456(rs)) { ··· 3628 3625 { 3629 3626 struct raid_set *rs = ti->private; 3630 3627 3631 - if (!rs->md.suspended) 3628 + if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) 3632 3629 mddev_suspend(&rs->md); 3633 3630 3634 3631 rs->md.ro = 1; ··· 3762 3759 return r; 3763 3760 3764 3761 /* Need to be resumed to be able to start reshape, recovery is frozen until raid_resume() though */ 3765 - if (mddev->suspended) 3762 + if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) 3766 3763 mddev_resume(mddev); 3767 3764 3768 3765 /* ··· 3789 3786 } 3790 3787 3791 3788 /* Suspend because a resume will happen in raid_resume() */ 3792 - if (!mddev->suspended) 3793 - mddev_suspend(mddev); 3789 + set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags); 3790 + mddev_suspend(mddev); 3794 3791 3795 3792 /* 3796 3793 * Now reshape got set up, update superblocks to ··· 3886 3883 if (!(rs->ctr_flags & RESUME_STAY_FROZEN_FLAGS)) 3887 3884 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 3888 3885 3889 - if (mddev->suspended) 3886 + if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) 3890 3887 mddev_resume(mddev); 3891 3888 } 3892 3889 3893 3890 static struct target_type raid_target = { 3894 3891 .name = "raid", 3895 - .version = {1, 11, 1}, 3892 + .version = {1, 12, 1}, 3896 3893 .module = THIS_MODULE, 3897 3894 .ctr = raid_ctr, 3898 3895 .dtr = raid_dtr,
+35
drivers/md/dm-table.c
··· 20 20 #include <linux/atomic.h> 21 21 #include <linux/blk-mq.h> 22 22 #include <linux/mount.h> 23 + #include <linux/dax.h> 23 24 24 25 #define DM_MSG_PREFIX "table" 25 26 ··· 1631 1630 return false; 1632 1631 } 1633 1632 1633 + static int device_dax_write_cache_enabled(struct dm_target *ti, 1634 + struct dm_dev *dev, sector_t start, 1635 + sector_t len, void *data) 1636 + { 1637 + struct dax_device *dax_dev = dev->dax_dev; 1638 + 1639 + if (!dax_dev) 1640 + return false; 1641 + 1642 + if (dax_write_cache_enabled(dax_dev)) 1643 + return true; 1644 + return false; 1645 + } 1646 + 1647 + static int dm_table_supports_dax_write_cache(struct dm_table *t) 1648 + { 1649 + struct dm_target *ti; 1650 + unsigned i; 1651 + 1652 + for (i = 0; i < dm_table_get_num_targets(t); i++) { 1653 + ti = dm_table_get_target(t, i); 1654 + 1655 + if (ti->type->iterate_devices && 1656 + ti->type->iterate_devices(ti, 1657 + device_dax_write_cache_enabled, NULL)) 1658 + return true; 1659 + } 1660 + 1661 + return false; 1662 + } 1663 + 1634 1664 static int device_is_nonrot(struct dm_target *ti, struct dm_dev *dev, 1635 1665 sector_t start, sector_t len, void *data) 1636 1666 { ··· 1816 1784 fua = true; 1817 1785 } 1818 1786 blk_queue_write_cache(q, wc, fua); 1787 + 1788 + if (dm_table_supports_dax_write_cache(t)) 1789 + dax_write_cache(t->md->dax_dev, true); 1819 1790 1820 1791 /* Ensure that all underlying devices are non-rotational. */ 1821 1792 if (dm_table_all_devices_attribute(t, device_is_nonrot))
+5 -16
drivers/md/dm-verity-fec.c
··· 308 308 { 309 309 unsigned n; 310 310 311 - if (!fio->rs) { 312 - fio->rs = mempool_alloc(v->fec->rs_pool, 0); 313 - if (unlikely(!fio->rs)) { 314 - DMERR("failed to allocate RS"); 315 - return -ENOMEM; 316 - } 317 - } 311 + if (!fio->rs) 312 + fio->rs = mempool_alloc(v->fec->rs_pool, GFP_NOIO); 318 313 319 314 fec_for_each_prealloc_buffer(n) { 320 315 if (fio->bufs[n]) 321 316 continue; 322 317 323 - fio->bufs[n] = mempool_alloc(v->fec->prealloc_pool, GFP_NOIO); 318 + fio->bufs[n] = mempool_alloc(v->fec->prealloc_pool, GFP_NOWAIT); 324 319 if (unlikely(!fio->bufs[n])) { 325 320 DMERR("failed to allocate FEC buffer"); 326 321 return -ENOMEM; ··· 327 332 if (fio->bufs[n]) 328 333 continue; 329 334 330 - fio->bufs[n] = mempool_alloc(v->fec->extra_pool, GFP_NOIO); 335 + fio->bufs[n] = mempool_alloc(v->fec->extra_pool, GFP_NOWAIT); 331 336 /* we can manage with even one buffer if necessary */ 332 337 if (unlikely(!fio->bufs[n])) 333 338 break; 334 339 } 335 340 fio->nbufs = n; 336 341 337 - if (!fio->output) { 342 + if (!fio->output) 338 343 fio->output = mempool_alloc(v->fec->output_pool, GFP_NOIO); 339 - 340 - if (!fio->output) { 341 - DMERR("failed to allocate FEC page"); 342 - return -ENOMEM; 343 - } 344 - } 345 344 346 345 return 0; 347 346 }
+6 -6
drivers/md/dm-zoned-metadata.c
··· 624 624 625 625 ret = dmz_rdwr_block(zmd, REQ_OP_WRITE, block, mblk->page); 626 626 if (ret == 0) 627 - ret = blkdev_issue_flush(zmd->dev->bdev, GFP_KERNEL, NULL); 627 + ret = blkdev_issue_flush(zmd->dev->bdev, GFP_NOIO, NULL); 628 628 629 629 return ret; 630 630 } ··· 658 658 659 659 /* Flush drive cache (this will also sync data) */ 660 660 if (ret == 0) 661 - ret = blkdev_issue_flush(zmd->dev->bdev, GFP_KERNEL, NULL); 661 + ret = blkdev_issue_flush(zmd->dev->bdev, GFP_NOIO, NULL); 662 662 663 663 return ret; 664 664 } ··· 722 722 723 723 /* If there are no dirty metadata blocks, just flush the device cache */ 724 724 if (list_empty(&write_list)) { 725 - ret = blkdev_issue_flush(zmd->dev->bdev, GFP_KERNEL, NULL); 725 + ret = blkdev_issue_flush(zmd->dev->bdev, GFP_NOIO, NULL); 726 726 goto out; 727 727 } 728 728 ··· 927 927 (zmd->nr_meta_zones << zmd->dev->zone_nr_blocks_shift); 928 928 } 929 929 930 - page = alloc_page(GFP_KERNEL); 930 + page = alloc_page(GFP_NOIO); 931 931 if (!page) 932 932 return -ENOMEM; 933 933 ··· 1183 1183 1184 1184 /* Get zone information from disk */ 1185 1185 ret = blkdev_report_zones(zmd->dev->bdev, dmz_start_sect(zmd, zone), 1186 - &blkz, &nr_blkz, GFP_KERNEL); 1186 + &blkz, &nr_blkz, GFP_NOIO); 1187 1187 if (ret) { 1188 1188 dmz_dev_err(zmd->dev, "Get zone %u report failed", 1189 1189 dmz_id(zmd, zone)); ··· 1257 1257 1258 1258 ret = blkdev_reset_zones(dev->bdev, 1259 1259 dmz_start_sect(zmd, zone), 1260 - dev->zone_nr_sectors, GFP_KERNEL); 1260 + dev->zone_nr_sectors, GFP_NOIO); 1261 1261 if (ret) { 1262 1262 dmz_dev_err(dev, "Reset zone %u failed %d", 1263 1263 dmz_id(zmd, zone), ret);
+1 -1
drivers/md/dm-zoned-reclaim.c
··· 75 75 nr_blocks = block - wp_block; 76 76 ret = blkdev_issue_zeroout(zrc->dev->bdev, 77 77 dmz_start_sect(zmd, zone) + dmz_blk2sect(wp_block), 78 - dmz_blk2sect(nr_blocks), GFP_NOFS, false); 78 + dmz_blk2sect(nr_blocks), GFP_NOIO, 0); 79 79 if (ret) { 80 80 dmz_dev_err(zrc->dev, 81 81 "Align zone %u wp %llu to %llu (wp+%u) blocks failed %d",
+4 -4
drivers/md/dm-zoned-target.c
··· 541 541 int ret; 542 542 543 543 /* Create a new chunk work */ 544 - cw = kmalloc(sizeof(struct dm_chunk_work), GFP_NOFS); 544 + cw = kmalloc(sizeof(struct dm_chunk_work), GFP_NOIO); 545 545 if (!cw) 546 546 goto out; 547 547 ··· 588 588 589 589 bio->bi_bdev = dev->bdev; 590 590 591 - if (!nr_sectors && (bio_op(bio) != REQ_OP_FLUSH) && (bio_op(bio) != REQ_OP_WRITE)) 591 + if (!nr_sectors && bio_op(bio) != REQ_OP_WRITE) 592 592 return DM_MAPIO_REMAPPED; 593 593 594 594 /* The BIO should be block aligned */ ··· 603 603 bioctx->status = BLK_STS_OK; 604 604 605 605 /* Set the BIO pending in the flush list */ 606 - if (bio_op(bio) == REQ_OP_FLUSH || (!nr_sectors && bio_op(bio) == REQ_OP_WRITE)) { 606 + if (!nr_sectors && bio_op(bio) == REQ_OP_WRITE) { 607 607 spin_lock(&dmz->flush_lock); 608 608 bio_list_add(&dmz->flush_list, bio); 609 609 spin_unlock(&dmz->flush_lock); ··· 785 785 786 786 /* Chunk BIO work */ 787 787 mutex_init(&dmz->chunk_lock); 788 - INIT_RADIX_TREE(&dmz->chunk_rxtree, GFP_NOFS); 788 + INIT_RADIX_TREE(&dmz->chunk_rxtree, GFP_KERNEL); 789 789 dmz->chunk_wq = alloc_workqueue("dmz_cwq_%s", WQ_MEM_RECLAIM | WQ_UNBOUND, 790 790 0, dev->name); 791 791 if (!dmz->chunk_wq) {
+1
include/linux/dax.h
··· 87 87 void dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, 88 88 size_t size); 89 89 void dax_write_cache(struct dax_device *dax_dev, bool wc); 90 + bool dax_write_cache_enabled(struct dax_device *dax_dev); 90 91 91 92 /* 92 93 * We use lowest available bit in exceptional entry for locking, one bit for