Merge tag 'for-5.7/dm-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper fixes from Mike Snitzer:

- Document DM integrity allow_discard feature that was added during 5.7
merge window.

- Fix potential for DM writecache data corruption during DM table
reloads.

- Fix DM verity's FEC support's hash block number calculation in
verity_fec_decode().

- Fix bio-based DM multipath crash due to use of stale copy of
MPATHF_QUEUE_IO flag state in __map_bio().

* tag 'for-5.7/dm-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
dm multipath: use updated MPATHF_QUEUE_IO on mapping for bio-based mpath
dm verity fec: fix hash block number in verity_fec_decode
dm writecache: fix data corruption when reloading the target
dm integrity: document allow_discard option

Linus Torvalds 6 years ago c45e8bcc 39e16d93

+50 -23

4 changed files

expand all

Documentation

admin-guide

device-mapper

dm-integrity.rst

drivers

dm-mpath.c

dm-verity-fec.c

dm-writecache.c

+8 -5

Documentation/admin-guide/device-mapper/dm-integrity.rst

··· 182 182 space-efficient. If this option is not present, large padding is 183 183 used - that is for compatibility with older kernels. 184 184 185 + allow_discards 186 + Allow block discard requests (a.k.a. TRIM) for the integrity device. 187 + Discards are only allowed to devices using internal hash. 185 188 186 - The journal mode (D/J), buffer_sectors, journal_watermark, commit_time can 187 - be changed when reloading the target (load an inactive table and swap the 188 - tables with suspend and resume). The other arguments should not be changed 189 - when reloading the target because the layout of disk data depend on them 190 - and the reloaded target would be non-functional. 189 + The journal mode (D/J), buffer_sectors, journal_watermark, commit_time and 190 + allow_discards can be changed when reloading the target (load an inactive 191 + table and swap the tables with suspend and resume). The other arguments 192 + should not be changed when reloading the target because the layout of disk 193 + data depend on them and the reloaded target would be non-functional. 191 194 192 195 193 196 The layout of the formatted block device:

+4 -2

drivers/md/dm-mpath.c

··· 585 585 586 586 /* Do we need to select a new pgpath? */ 587 587 pgpath = READ_ONCE(m->current_pgpath); 588 - queue_io = test_bit(MPATHF_QUEUE_IO, &m->flags); 589 - if (!pgpath || !queue_io) 588 + if (!pgpath || !test_bit(MPATHF_QUEUE_IO, &m->flags)) 590 589 pgpath = choose_pgpath(m, bio->bi_iter.bi_size); 590 + 591 + /* MPATHF_QUEUE_IO might have been cleared by choose_pgpath. */ 592 + queue_io = test_bit(MPATHF_QUEUE_IO, &m->flags); 591 593 592 594 if ((pgpath && queue_io) || 593 595 (!pgpath && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))) {

+1 -1

drivers/md/dm-verity-fec.c

··· 435 435 fio->level++; 436 436 437 437 if (type == DM_VERITY_BLOCK_TYPE_METADATA) 438 - block += v->data_blocks; 438 + block = block - v->hash_start + v->data_blocks; 439 439 440 440 /* 441 441 * For RS(M, N), the continuous FEC data is divided into blocks of N

+37 -15

drivers/md/dm-writecache.c

··· 931 931 return 0; 932 932 } 933 933 934 + static int writecache_read_metadata(struct dm_writecache *wc, sector_t n_sectors) 935 + { 936 + struct dm_io_region region; 937 + struct dm_io_request req; 938 + 939 + region.bdev = wc->ssd_dev->bdev; 940 + region.sector = wc->start_sector; 941 + region.count = n_sectors; 942 + req.bi_op = REQ_OP_READ; 943 + req.bi_op_flags = REQ_SYNC; 944 + req.mem.type = DM_IO_VMA; 945 + req.mem.ptr.vma = (char *)wc->memory_map; 946 + req.client = wc->dm_io; 947 + req.notify.fn = NULL; 948 + 949 + return dm_io(&req, 1, &region, NULL); 950 + } 951 + 934 952 static void writecache_resume(struct dm_target *ti) 935 953 { 936 954 struct dm_writecache *wc = ti->private; ··· 959 941 960 942 wc_lock(wc); 961 943 962 - if (WC_MODE_PMEM(wc)) 944 + if (WC_MODE_PMEM(wc)) { 963 945 persistent_memory_invalidate_cache(wc->memory_map, wc->memory_map_size); 946 + } else { 947 + r = writecache_read_metadata(wc, wc->metadata_sectors); 948 + if (r) { 949 + size_t sb_entries_offset; 950 + writecache_error(wc, r, "unable to read metadata: %d", r); 951 + sb_entries_offset = offsetof(struct wc_memory_superblock, entries); 952 + memset((char *)wc->memory_map + sb_entries_offset, -1, 953 + (wc->metadata_sectors << SECTOR_SHIFT) - sb_entries_offset); 954 + } 955 + } 964 956 965 957 wc->tree = RB_ROOT; 966 958 INIT_LIST_HEAD(&wc->lru); ··· 2130 2102 ti->error = "Invalid block size"; 2131 2103 goto bad; 2132 2104 } 2105 + if (wc->block_size < bdev_logical_block_size(wc->dev->bdev) || 2106 + wc->block_size < bdev_logical_block_size(wc->ssd_dev->bdev)) { 2107 + r = -EINVAL; 2108 + ti->error = "Block size is smaller than device logical block size"; 2109 + goto bad; 2110 + } 2133 2111 wc->block_size_bits = __ffs(wc->block_size); 2134 2112 2135 2113 wc->max_writeback_jobs = MAX_WRITEBACK_JOBS; ··· 2234 2200 goto bad; 2235 2201 } 2236 2202 } else { 2237 - struct dm_io_region region; 2238 - struct dm_io_request req; 2239 2203 size_t n_blocks, n_metadata_blocks; 2240 2204 uint64_t n_bitmap_bits; 2241 2205 ··· 2290 2258 goto bad; 2291 2259 } 2292 2260 2293 - region.bdev = wc->ssd_dev->bdev; 2294 - region.sector = wc->start_sector; 2295 - region.count = wc->metadata_sectors; 2296 - req.bi_op = REQ_OP_READ; 2297 - req.bi_op_flags = REQ_SYNC; 2298 - req.mem.type = DM_IO_VMA; 2299 - req.mem.ptr.vma = (char *)wc->memory_map; 2300 - req.client = wc->dm_io; 2301 - req.notify.fn = NULL; 2302 - 2303 - r = dm_io(&req, 1, &region, NULL); 2261 + r = writecache_read_metadata(wc, wc->block_size >> SECTOR_SHIFT); 2304 2262 if (r) { 2305 - ti->error = "Unable to read metadata"; 2263 + ti->error = "Unable to read first block of metadata"; 2306 2264 goto bad; 2307 2265 } 2308 2266 }

Configure Feed

Configure Feed