Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'dm-3.6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm

Pull dm fixes from Alasdair G Kergon:
"A few fixes for problems discovered during the 3.6 cycle.

Of particular note, are fixes to the thin target's discard support,
which I hope is finally working correctly; and fixes for multipath
ioctls and device limits when there are no paths."

* tag 'dm-3.6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm:
dm verity: fix overflow check
dm thin: fix discard support for data devices
dm thin: tidy discard support
dm: retain table limits when swapping to new table with no devices
dm table: clear add_random unless all devices have it set
dm: handle requests beyond end of device instead of using BUG_ON
dm mpath: only retry ioctl when no paths if queue_if_no_path set
dm thin: do not set discard_zeroes_data

+209 -78
+7 -4
drivers/md/dm-mpath.c
··· 1555 1555 unsigned long arg) 1556 1556 { 1557 1557 struct multipath *m = ti->private; 1558 + struct pgpath *pgpath; 1558 1559 struct block_device *bdev; 1559 1560 fmode_t mode; 1560 1561 unsigned long flags; ··· 1571 1570 if (!m->current_pgpath) 1572 1571 __choose_pgpath(m, 0); 1573 1572 1574 - if (m->current_pgpath) { 1575 - bdev = m->current_pgpath->path.dev->bdev; 1576 - mode = m->current_pgpath->path.dev->mode; 1573 + pgpath = m->current_pgpath; 1574 + 1575 + if (pgpath) { 1576 + bdev = pgpath->path.dev->bdev; 1577 + mode = pgpath->path.dev->mode; 1577 1578 } 1578 1579 1579 - if (m->queue_io) 1580 + if ((pgpath && m->queue_io) || (!pgpath && m->queue_if_no_path)) 1580 1581 r = -EAGAIN; 1581 1582 else if (!bdev) 1582 1583 r = -EIO;
+57 -4
drivers/md/dm-table.c
··· 1212 1212 return &t->targets[(KEYS_PER_NODE * n) + k]; 1213 1213 } 1214 1214 1215 + static int count_device(struct dm_target *ti, struct dm_dev *dev, 1216 + sector_t start, sector_t len, void *data) 1217 + { 1218 + unsigned *num_devices = data; 1219 + 1220 + (*num_devices)++; 1221 + 1222 + return 0; 1223 + } 1224 + 1225 + /* 1226 + * Check whether a table has no data devices attached using each 1227 + * target's iterate_devices method. 1228 + * Returns false if the result is unknown because a target doesn't 1229 + * support iterate_devices. 1230 + */ 1231 + bool dm_table_has_no_data_devices(struct dm_table *table) 1232 + { 1233 + struct dm_target *uninitialized_var(ti); 1234 + unsigned i = 0, num_devices = 0; 1235 + 1236 + while (i < dm_table_get_num_targets(table)) { 1237 + ti = dm_table_get_target(table, i++); 1238 + 1239 + if (!ti->type->iterate_devices) 1240 + return false; 1241 + 1242 + ti->type->iterate_devices(ti, count_device, &num_devices); 1243 + if (num_devices) 1244 + return false; 1245 + } 1246 + 1247 + return true; 1248 + } 1249 + 1215 1250 /* 1216 1251 * Establish the new table's queue_limits and validate them. 1217 1252 */ ··· 1389 1354 return q && blk_queue_nonrot(q); 1390 1355 } 1391 1356 1392 - static bool dm_table_is_nonrot(struct dm_table *t) 1357 + static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev, 1358 + sector_t start, sector_t len, void *data) 1359 + { 1360 + struct request_queue *q = bdev_get_queue(dev->bdev); 1361 + 1362 + return q && !blk_queue_add_random(q); 1363 + } 1364 + 1365 + static bool dm_table_all_devices_attribute(struct dm_table *t, 1366 + iterate_devices_callout_fn func) 1393 1367 { 1394 1368 struct dm_target *ti; 1395 1369 unsigned i = 0; 1396 1370 1397 - /* Ensure that all underlying device are non-rotational. */ 1398 1371 while (i < dm_table_get_num_targets(t)) { 1399 1372 ti = dm_table_get_target(t, i++); 1400 1373 1401 1374 if (!ti->type->iterate_devices || 1402 - !ti->type->iterate_devices(ti, device_is_nonrot, NULL)) 1375 + !ti->type->iterate_devices(ti, func, NULL)) 1403 1376 return 0; 1404 1377 } 1405 1378 ··· 1439 1396 if (!dm_table_discard_zeroes_data(t)) 1440 1397 q->limits.discard_zeroes_data = 0; 1441 1398 1442 - if (dm_table_is_nonrot(t)) 1399 + /* Ensure that all underlying devices are non-rotational. */ 1400 + if (dm_table_all_devices_attribute(t, device_is_nonrot)) 1443 1401 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); 1444 1402 else 1445 1403 queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, q); 1446 1404 1447 1405 dm_table_set_integrity(t); 1406 + 1407 + /* 1408 + * Determine whether or not this queue's I/O timings contribute 1409 + * to the entropy pool, Only request-based targets use this. 1410 + * Clear QUEUE_FLAG_ADD_RANDOM if any underlying device does not 1411 + * have it set. 1412 + */ 1413 + if (blk_queue_add_random(q) && dm_table_all_devices_attribute(t, device_is_not_random)) 1414 + queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q); 1448 1415 1449 1416 /* 1450 1417 * QUEUE_FLAG_STACKABLE must be set after all queue settings are
+88 -47
drivers/md/dm-thin.c
··· 509 509 struct pool_features { 510 510 enum pool_mode mode; 511 511 512 - unsigned zero_new_blocks:1; 513 - unsigned discard_enabled:1; 514 - unsigned discard_passdown:1; 512 + bool zero_new_blocks:1; 513 + bool discard_enabled:1; 514 + bool discard_passdown:1; 515 515 }; 516 516 517 517 struct thin_c; ··· 580 580 struct dm_target_callbacks callbacks; 581 581 582 582 dm_block_t low_water_blocks; 583 - struct pool_features pf; 583 + struct pool_features requested_pf; /* Features requested during table load */ 584 + struct pool_features adjusted_pf; /* Features used after adjusting for constituent devices */ 584 585 }; 585 586 586 587 /* ··· 1840 1839 /*---------------------------------------------------------------- 1841 1840 * Binding of control targets to a pool object 1842 1841 *--------------------------------------------------------------*/ 1842 + static bool data_dev_supports_discard(struct pool_c *pt) 1843 + { 1844 + struct request_queue *q = bdev_get_queue(pt->data_dev->bdev); 1845 + 1846 + return q && blk_queue_discard(q); 1847 + } 1848 + 1849 + /* 1850 + * If discard_passdown was enabled verify that the data device 1851 + * supports discards. Disable discard_passdown if not. 1852 + */ 1853 + static void disable_passdown_if_not_supported(struct pool_c *pt) 1854 + { 1855 + struct pool *pool = pt->pool; 1856 + struct block_device *data_bdev = pt->data_dev->bdev; 1857 + struct queue_limits *data_limits = &bdev_get_queue(data_bdev)->limits; 1858 + sector_t block_size = pool->sectors_per_block << SECTOR_SHIFT; 1859 + const char *reason = NULL; 1860 + char buf[BDEVNAME_SIZE]; 1861 + 1862 + if (!pt->adjusted_pf.discard_passdown) 1863 + return; 1864 + 1865 + if (!data_dev_supports_discard(pt)) 1866 + reason = "discard unsupported"; 1867 + 1868 + else if (data_limits->max_discard_sectors < pool->sectors_per_block) 1869 + reason = "max discard sectors smaller than a block"; 1870 + 1871 + else if (data_limits->discard_granularity > block_size) 1872 + reason = "discard granularity larger than a block"; 1873 + 1874 + else if (block_size & (data_limits->discard_granularity - 1)) 1875 + reason = "discard granularity not a factor of block size"; 1876 + 1877 + if (reason) { 1878 + DMWARN("Data device (%s) %s: Disabling discard passdown.", bdevname(data_bdev, buf), reason); 1879 + pt->adjusted_pf.discard_passdown = false; 1880 + } 1881 + } 1882 + 1843 1883 static int bind_control_target(struct pool *pool, struct dm_target *ti) 1844 1884 { 1845 1885 struct pool_c *pt = ti->private; ··· 1889 1847 * We want to make sure that degraded pools are never upgraded. 1890 1848 */ 1891 1849 enum pool_mode old_mode = pool->pf.mode; 1892 - enum pool_mode new_mode = pt->pf.mode; 1850 + enum pool_mode new_mode = pt->adjusted_pf.mode; 1893 1851 1894 1852 if (old_mode > new_mode) 1895 1853 new_mode = old_mode; 1896 1854 1897 1855 pool->ti = ti; 1898 1856 pool->low_water_blocks = pt->low_water_blocks; 1899 - pool->pf = pt->pf; 1900 - set_pool_mode(pool, new_mode); 1857 + pool->pf = pt->adjusted_pf; 1901 1858 1902 - /* 1903 - * If discard_passdown was enabled verify that the data device 1904 - * supports discards. Disable discard_passdown if not; otherwise 1905 - * -EOPNOTSUPP will be returned. 1906 - */ 1907 - /* FIXME: pull this out into a sep fn. */ 1908 - if (pt->pf.discard_passdown) { 1909 - struct request_queue *q = bdev_get_queue(pt->data_dev->bdev); 1910 - if (!q || !blk_queue_discard(q)) { 1911 - char buf[BDEVNAME_SIZE]; 1912 - DMWARN("Discard unsupported by data device (%s): Disabling discard passdown.", 1913 - bdevname(pt->data_dev->bdev, buf)); 1914 - pool->pf.discard_passdown = 0; 1915 - } 1916 - } 1859 + set_pool_mode(pool, new_mode); 1917 1860 1918 1861 return 0; 1919 1862 } ··· 1916 1889 static void pool_features_init(struct pool_features *pf) 1917 1890 { 1918 1891 pf->mode = PM_WRITE; 1919 - pf->zero_new_blocks = 1; 1920 - pf->discard_enabled = 1; 1921 - pf->discard_passdown = 1; 1892 + pf->zero_new_blocks = true; 1893 + pf->discard_enabled = true; 1894 + pf->discard_passdown = true; 1922 1895 } 1923 1896 1924 1897 static void __pool_destroy(struct pool *pool) ··· 2146 2119 argc--; 2147 2120 2148 2121 if (!strcasecmp(arg_name, "skip_block_zeroing")) 2149 - pf->zero_new_blocks = 0; 2122 + pf->zero_new_blocks = false; 2150 2123 2151 2124 else if (!strcasecmp(arg_name, "ignore_discard")) 2152 - pf->discard_enabled = 0; 2125 + pf->discard_enabled = false; 2153 2126 2154 2127 else if (!strcasecmp(arg_name, "no_discard_passdown")) 2155 - pf->discard_passdown = 0; 2128 + pf->discard_passdown = false; 2156 2129 2157 2130 else if (!strcasecmp(arg_name, "read_only")) 2158 2131 pf->mode = PM_READ_ONLY; ··· 2286 2259 pt->metadata_dev = metadata_dev; 2287 2260 pt->data_dev = data_dev; 2288 2261 pt->low_water_blocks = low_water_blocks; 2289 - pt->pf = pf; 2262 + pt->adjusted_pf = pt->requested_pf = pf; 2290 2263 ti->num_flush_requests = 1; 2264 + 2291 2265 /* 2292 2266 * Only need to enable discards if the pool should pass 2293 2267 * them down to the data device. The thin device's discard ··· 2296 2268 */ 2297 2269 if (pf.discard_enabled && pf.discard_passdown) { 2298 2270 ti->num_discard_requests = 1; 2271 + 2299 2272 /* 2300 2273 * Setting 'discards_supported' circumvents the normal 2301 2274 * stacking of discard limits (this keeps the pool and 2302 2275 * thin devices' discard limits consistent). 2303 2276 */ 2304 2277 ti->discards_supported = true; 2278 + ti->discard_zeroes_data_unsupported = true; 2305 2279 } 2306 2280 ti->private = pt; 2307 2281 ··· 2733 2703 format_dev_t(buf2, pt->data_dev->bdev->bd_dev), 2734 2704 (unsigned long)pool->sectors_per_block, 2735 2705 (unsigned long long)pt->low_water_blocks); 2736 - emit_flags(&pt->pf, result, sz, maxlen); 2706 + emit_flags(&pt->requested_pf, result, sz, maxlen); 2737 2707 break; 2738 2708 } 2739 2709 ··· 2762 2732 return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); 2763 2733 } 2764 2734 2765 - static void set_discard_limits(struct pool *pool, struct queue_limits *limits) 2735 + static void set_discard_limits(struct pool_c *pt, struct queue_limits *limits) 2766 2736 { 2767 - /* 2768 - * FIXME: these limits may be incompatible with the pool's data device 2769 - */ 2737 + struct pool *pool = pt->pool; 2738 + struct queue_limits *data_limits; 2739 + 2770 2740 limits->max_discard_sectors = pool->sectors_per_block; 2771 2741 2772 2742 /* 2773 - * This is just a hint, and not enforced. We have to cope with 2774 - * bios that cover a block partially. A discard that spans a block 2775 - * boundary is not sent to this target. 2743 + * discard_granularity is just a hint, and not enforced. 2776 2744 */ 2777 - limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT; 2778 - limits->discard_zeroes_data = pool->pf.zero_new_blocks; 2745 + if (pt->adjusted_pf.discard_passdown) { 2746 + data_limits = &bdev_get_queue(pt->data_dev->bdev)->limits; 2747 + limits->discard_granularity = data_limits->discard_granularity; 2748 + } else 2749 + limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT; 2779 2750 } 2780 2751 2781 2752 static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) ··· 2786 2755 2787 2756 blk_limits_io_min(limits, 0); 2788 2757 blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT); 2789 - if (pool->pf.discard_enabled) 2790 - set_discard_limits(pool, limits); 2758 + 2759 + /* 2760 + * pt->adjusted_pf is a staging area for the actual features to use. 2761 + * They get transferred to the live pool in bind_control_target() 2762 + * called from pool_preresume(). 2763 + */ 2764 + if (!pt->adjusted_pf.discard_enabled) 2765 + return; 2766 + 2767 + disable_passdown_if_not_supported(pt); 2768 + 2769 + set_discard_limits(pt, limits); 2791 2770 } 2792 2771 2793 2772 static struct target_type pool_target = { 2794 2773 .name = "thin-pool", 2795 2774 .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | 2796 2775 DM_TARGET_IMMUTABLE, 2797 - .version = {1, 3, 0}, 2776 + .version = {1, 4, 0}, 2798 2777 .module = THIS_MODULE, 2799 2778 .ctr = pool_ctr, 2800 2779 .dtr = pool_dtr, ··· 3083 3042 return 0; 3084 3043 } 3085 3044 3045 + /* 3046 + * A thin device always inherits its queue limits from its pool. 3047 + */ 3086 3048 static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) 3087 3049 { 3088 3050 struct thin_c *tc = ti->private; 3089 - struct pool *pool = tc->pool; 3090 3051 3091 - blk_limits_io_min(limits, 0); 3092 - blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT); 3093 - set_discard_limits(pool, limits); 3052 + *limits = bdev_get_queue(tc->pool_dev->bdev)->limits; 3094 3053 } 3095 3054 3096 3055 static struct target_type thin_target = { 3097 3056 .name = "thin", 3098 - .version = {1, 3, 0}, 3057 + .version = {1, 4, 0}, 3099 3058 .module = THIS_MODULE, 3100 3059 .ctr = thin_ctr, 3101 3060 .dtr = thin_dtr,
+4 -4
drivers/md/dm-verity.c
··· 718 718 v->hash_dev_block_bits = ffs(num) - 1; 719 719 720 720 if (sscanf(argv[5], "%llu%c", &num_ll, &dummy) != 1 || 721 - num_ll << (v->data_dev_block_bits - SECTOR_SHIFT) != 722 - (sector_t)num_ll << (v->data_dev_block_bits - SECTOR_SHIFT)) { 721 + (sector_t)(num_ll << (v->data_dev_block_bits - SECTOR_SHIFT)) 722 + >> (v->data_dev_block_bits - SECTOR_SHIFT) != num_ll) { 723 723 ti->error = "Invalid data blocks"; 724 724 r = -EINVAL; 725 725 goto bad; ··· 733 733 } 734 734 735 735 if (sscanf(argv[6], "%llu%c", &num_ll, &dummy) != 1 || 736 - num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT) != 737 - (sector_t)num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT)) { 736 + (sector_t)(num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT)) 737 + >> (v->hash_dev_block_bits - SECTOR_SHIFT) != num_ll) { 738 738 ti->error = "Invalid hash start"; 739 739 r = -EINVAL; 740 740 goto bad;
+52 -19
drivers/md/dm.c
··· 865 865 { 866 866 int r = error; 867 867 struct dm_rq_target_io *tio = clone->end_io_data; 868 - dm_request_endio_fn rq_end_io = tio->ti->type->rq_end_io; 868 + dm_request_endio_fn rq_end_io = NULL; 869 869 870 - if (mapped && rq_end_io) 871 - r = rq_end_io(tio->ti, clone, error, &tio->info); 870 + if (tio->ti) { 871 + rq_end_io = tio->ti->type->rq_end_io; 872 + 873 + if (mapped && rq_end_io) 874 + r = rq_end_io(tio->ti, clone, error, &tio->info); 875 + } 872 876 873 877 if (r <= 0) 874 878 /* The target wants to complete the I/O */ ··· 1592 1588 int r, requeued = 0; 1593 1589 struct dm_rq_target_io *tio = clone->end_io_data; 1594 1590 1595 - /* 1596 - * Hold the md reference here for the in-flight I/O. 1597 - * We can't rely on the reference count by device opener, 1598 - * because the device may be closed during the request completion 1599 - * when all bios are completed. 1600 - * See the comment in rq_completed() too. 1601 - */ 1602 - dm_get(md); 1603 - 1604 1591 tio->ti = ti; 1605 1592 r = ti->type->map_rq(ti, clone, &tio->info); 1606 1593 switch (r) { ··· 1623 1628 return requeued; 1624 1629 } 1625 1630 1631 + static struct request *dm_start_request(struct mapped_device *md, struct request *orig) 1632 + { 1633 + struct request *clone; 1634 + 1635 + blk_start_request(orig); 1636 + clone = orig->special; 1637 + atomic_inc(&md->pending[rq_data_dir(clone)]); 1638 + 1639 + /* 1640 + * Hold the md reference here for the in-flight I/O. 1641 + * We can't rely on the reference count by device opener, 1642 + * because the device may be closed during the request completion 1643 + * when all bios are completed. 1644 + * See the comment in rq_completed() too. 1645 + */ 1646 + dm_get(md); 1647 + 1648 + return clone; 1649 + } 1650 + 1626 1651 /* 1627 1652 * q->request_fn for request-based dm. 1628 1653 * Called with the queue lock held. ··· 1672 1657 pos = blk_rq_pos(rq); 1673 1658 1674 1659 ti = dm_table_find_target(map, pos); 1675 - BUG_ON(!dm_target_is_valid(ti)); 1660 + if (!dm_target_is_valid(ti)) { 1661 + /* 1662 + * Must perform setup, that dm_done() requires, 1663 + * before calling dm_kill_unmapped_request 1664 + */ 1665 + DMERR_LIMIT("request attempted access beyond the end of device"); 1666 + clone = dm_start_request(md, rq); 1667 + dm_kill_unmapped_request(clone, -EIO); 1668 + continue; 1669 + } 1676 1670 1677 1671 if (ti->type->busy && ti->type->busy(ti)) 1678 1672 goto delay_and_out; 1679 1673 1680 - blk_start_request(rq); 1681 - clone = rq->special; 1682 - atomic_inc(&md->pending[rq_data_dir(clone)]); 1674 + clone = dm_start_request(md, rq); 1683 1675 1684 1676 spin_unlock(q->queue_lock); 1685 1677 if (map_request(ti, clone, md)) ··· 1706 1684 blk_delay_queue(q, HZ / 10); 1707 1685 out: 1708 1686 dm_table_put(map); 1709 - 1710 - return; 1711 1687 } 1712 1688 1713 1689 int dm_underlying_device_busy(struct request_queue *q) ··· 2429 2409 */ 2430 2410 struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table) 2431 2411 { 2432 - struct dm_table *map = ERR_PTR(-EINVAL); 2412 + struct dm_table *live_map, *map = ERR_PTR(-EINVAL); 2433 2413 struct queue_limits limits; 2434 2414 int r; 2435 2415 ··· 2438 2418 /* device must be suspended */ 2439 2419 if (!dm_suspended_md(md)) 2440 2420 goto out; 2421 + 2422 + /* 2423 + * If the new table has no data devices, retain the existing limits. 2424 + * This helps multipath with queue_if_no_path if all paths disappear, 2425 + * then new I/O is queued based on these limits, and then some paths 2426 + * reappear. 2427 + */ 2428 + if (dm_table_has_no_data_devices(table)) { 2429 + live_map = dm_get_live_table(md); 2430 + if (live_map) 2431 + limits = md->queue->limits; 2432 + dm_table_put(live_map); 2433 + } 2441 2434 2442 2435 r = dm_calculate_queue_limits(table, &limits); 2443 2436 if (r) {
+1
drivers/md/dm.h
··· 54 54 void (*fn)(void *), void *context); 55 55 struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index); 56 56 struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector); 57 + bool dm_table_has_no_data_devices(struct dm_table *table); 57 58 int dm_calculate_queue_limits(struct dm_table *table, 58 59 struct queue_limits *limits); 59 60 void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,