Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'dm-3.10-changes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm

Pull device-mapper updates from Alasdair Kergon:
"Allow devices that hold metadata for the device-mapper thin
provisioning target to be extended easily; allow WRITE SAME on
multipath devices; an assortment of little fixes and clean-ups."

* tag 'dm-3.10-changes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm: (21 commits)
dm cache: set config value
dm cache: move config fns
dm thin: generate event when metadata threshold passed
dm persistent metadata: add space map threshold callback
dm persistent data: add threshold callback to space map
dm thin: detect metadata device resizing
dm persistent data: support space map resizing
dm thin: open dev read only when possible
dm thin: refactor data dev resize
dm cache: replace memcpy with struct assignment
dm cache: fix typos in comments
dm cache policy: fix description of lookup fn
dm: document iterate_devices
dm persistent data: fix error message typos
dm cache: tune migration throttling
dm mpath: enable WRITE SAME support
dm table: fix write same support
dm bufio: avoid a possible __vmalloc deadlock
dm snapshot: fix error return code in snapshot_ctr
dm cache: fix error return code in cache_create
...

+445 -119
+23 -1
drivers/md/dm-bufio.c
··· 319 319 static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask, 320 320 enum data_mode *data_mode) 321 321 { 322 + unsigned noio_flag; 323 + void *ptr; 324 + 322 325 if (c->block_size <= DM_BUFIO_BLOCK_SIZE_SLAB_LIMIT) { 323 326 *data_mode = DATA_MODE_SLAB; 324 327 return kmem_cache_alloc(DM_BUFIO_CACHE(c), gfp_mask); ··· 335 332 } 336 333 337 334 *data_mode = DATA_MODE_VMALLOC; 338 - return __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL); 335 + 336 + /* 337 + * __vmalloc allocates the data pages and auxiliary structures with 338 + * gfp_flags that were specified, but pagetables are always allocated 339 + * with GFP_KERNEL, no matter what was specified as gfp_mask. 340 + * 341 + * Consequently, we must set per-process flag PF_MEMALLOC_NOIO so that 342 + * all allocations done by this process (including pagetables) are done 343 + * as if GFP_NOIO was specified. 344 + */ 345 + 346 + if (gfp_mask & __GFP_NORETRY) 347 + noio_flag = memalloc_noio_save(); 348 + 349 + ptr = __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL); 350 + 351 + if (gfp_mask & __GFP_NORETRY) 352 + memalloc_noio_restore(noio_flag); 353 + 354 + return ptr; 339 355 } 340 356 341 357 /*
+2 -2
drivers/md/dm-cache-metadata.c
··· 1044 1044 struct dm_cache_statistics *stats) 1045 1045 { 1046 1046 down_read(&cmd->root_lock); 1047 - memcpy(stats, &cmd->stats, sizeof(*stats)); 1047 + *stats = cmd->stats; 1048 1048 up_read(&cmd->root_lock); 1049 1049 } 1050 1050 ··· 1052 1052 struct dm_cache_statistics *stats) 1053 1053 { 1054 1054 down_write(&cmd->root_lock); 1055 - memcpy(&cmd->stats, stats, sizeof(*stats)); 1055 + cmd->stats = *stats; 1056 1056 up_write(&cmd->root_lock); 1057 1057 } 1058 1058
+2 -2
drivers/md/dm-cache-policy.h
··· 130 130 * 131 131 * Must not block. 132 132 * 133 - * Returns 1 iff in cache, 0 iff not, < 0 on error (-EWOULDBLOCK 134 - * would be typical). 133 + * Returns 0 if in cache, -ENOENT if not, < 0 for other errors 134 + * (-EWOULDBLOCK would be typical). 135 135 */ 136 136 int (*lookup)(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock); 137 137
+53 -47
drivers/md/dm-cache-target.c
··· 205 205 /* 206 206 * writethrough fields. These MUST remain at the end of this 207 207 * structure and the 'cache' member must be the first as it 208 - * is used to determine the offsetof the writethrough fields. 208 + * is used to determine the offset of the writethrough fields. 209 209 */ 210 210 struct cache *cache; 211 211 dm_cblock_t cblock; ··· 393 393 return r; 394 394 } 395 395 396 - /*----------------------------------------------------------------*/ 396 + /*----------------------------------------------------------------*/ 397 397 398 398 static bool is_dirty(struct cache *cache, dm_cblock_t b) 399 399 { ··· 419 419 } 420 420 421 421 /*----------------------------------------------------------------*/ 422 + 422 423 static bool block_size_is_power_of_two(struct cache *cache) 423 424 { 424 425 return cache->sectors_per_block_shift >= 0; ··· 668 667 669 668 /* 670 669 * We can't issue this bio directly, since we're in interrupt 671 - * context. So it get's put on a bio list for processing by the 670 + * context. So it gets put on a bio list for processing by the 672 671 * worker thread. 673 672 */ 674 673 defer_writethrough_bio(pb->cache, bio); ··· 1446 1445 static void do_waker(struct work_struct *ws) 1447 1446 { 1448 1447 struct cache *cache = container_of(to_delayed_work(ws), struct cache, waker); 1448 + policy_tick(cache->policy); 1449 1449 wake_worker(cache); 1450 1450 queue_delayed_work(cache->wq, &cache->waker, COMMIT_PERIOD); 1451 1451 } ··· 1811 1809 1812 1810 static struct kmem_cache *migration_cache; 1813 1811 1814 - static int set_config_values(struct dm_cache_policy *p, int argc, const char **argv) 1812 + #define NOT_CORE_OPTION 1 1813 + 1814 + static int process_config_option(struct cache *cache, const char *key, const char *value) 1815 + { 1816 + unsigned long tmp; 1817 + 1818 + if (!strcasecmp(key, "migration_threshold")) { 1819 + if (kstrtoul(value, 10, &tmp)) 1820 + return -EINVAL; 1821 + 1822 + cache->migration_threshold = tmp; 1823 + return 0; 1824 + } 1825 + 1826 + return NOT_CORE_OPTION; 1827 + } 1828 + 1829 + static int set_config_value(struct cache *cache, const char *key, const char *value) 1830 + { 1831 + int r = process_config_option(cache, key, value); 1832 + 1833 + if (r == NOT_CORE_OPTION) 1834 + r = policy_set_config_value(cache->policy, key, value); 1835 + 1836 + if (r) 1837 + DMWARN("bad config value for %s: %s", key, value); 1838 + 1839 + return r; 1840 + } 1841 + 1842 + static int set_config_values(struct cache *cache, int argc, const char **argv) 1815 1843 { 1816 1844 int r = 0; 1817 1845 ··· 1851 1819 } 1852 1820 1853 1821 while (argc) { 1854 - r = policy_set_config_value(p, argv[0], argv[1]); 1855 - if (r) { 1856 - DMWARN("policy_set_config_value failed: key = '%s', value = '%s'", 1857 - argv[0], argv[1]); 1858 - return r; 1859 - } 1822 + r = set_config_value(cache, argv[0], argv[1]); 1823 + if (r) 1824 + break; 1860 1825 1861 1826 argc -= 2; 1862 1827 argv += 2; ··· 1865 1836 static int create_cache_policy(struct cache *cache, struct cache_args *ca, 1866 1837 char **error) 1867 1838 { 1868 - int r; 1869 - 1870 1839 cache->policy = dm_cache_policy_create(ca->policy_name, 1871 1840 cache->cache_size, 1872 1841 cache->origin_sectors, ··· 1874 1847 return -ENOMEM; 1875 1848 } 1876 1849 1877 - r = set_config_values(cache->policy, ca->policy_argc, ca->policy_argv); 1878 - if (r) { 1879 - *error = "Error setting cache policy's config values"; 1880 - dm_cache_policy_destroy(cache->policy); 1881 - cache->policy = NULL; 1882 - } 1883 - 1884 - return r; 1850 + return 0; 1885 1851 } 1886 1852 1887 1853 /* ··· 1906 1886 return discard_block_size; 1907 1887 } 1908 1888 1909 - #define DEFAULT_MIGRATION_THRESHOLD (2048 * 100) 1889 + #define DEFAULT_MIGRATION_THRESHOLD 2048 1910 1890 1911 1891 static int cache_create(struct cache_args *ca, struct cache **result) 1912 1892 { ··· 1931 1911 ti->discards_supported = true; 1932 1912 ti->discard_zeroes_data_unsupported = true; 1933 1913 1934 - memcpy(&cache->features, &ca->features, sizeof(cache->features)); 1914 + cache->features = ca->features; 1935 1915 ti->per_bio_data_size = get_per_bio_data_size(cache); 1936 1916 1937 1917 cache->callbacks.congested_fn = cache_is_congested; ··· 1968 1948 r = create_cache_policy(cache, ca, error); 1969 1949 if (r) 1970 1950 goto bad; 1951 + 1971 1952 cache->policy_nr_args = ca->policy_argc; 1953 + cache->migration_threshold = DEFAULT_MIGRATION_THRESHOLD; 1954 + 1955 + r = set_config_values(cache, ca->policy_argc, ca->policy_argv); 1956 + if (r) { 1957 + *error = "Error setting cache policy's config values"; 1958 + goto bad; 1959 + } 1972 1960 1973 1961 cmd = dm_cache_metadata_open(cache->metadata_dev->bdev, 1974 1962 ca->block_size, may_format, ··· 1995 1967 INIT_LIST_HEAD(&cache->quiesced_migrations); 1996 1968 INIT_LIST_HEAD(&cache->completed_migrations); 1997 1969 INIT_LIST_HEAD(&cache->need_commit_migrations); 1998 - cache->migration_threshold = DEFAULT_MIGRATION_THRESHOLD; 1999 1970 atomic_set(&cache->nr_migrations, 0); 2000 1971 init_waitqueue_head(&cache->migration_wait); 2001 1972 1973 + r = -ENOMEM; 2002 1974 cache->nr_dirty = 0; 2003 1975 cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size)); 2004 1976 if (!cache->dirty_bitset) { ··· 2545 2517 DMEMIT("Error"); 2546 2518 } 2547 2519 2548 - #define NOT_CORE_OPTION 1 2549 - 2550 - static int process_config_option(struct cache *cache, char **argv) 2551 - { 2552 - unsigned long tmp; 2553 - 2554 - if (!strcasecmp(argv[0], "migration_threshold")) { 2555 - if (kstrtoul(argv[1], 10, &tmp)) 2556 - return -EINVAL; 2557 - 2558 - cache->migration_threshold = tmp; 2559 - return 0; 2560 - } 2561 - 2562 - return NOT_CORE_OPTION; 2563 - } 2564 - 2565 2520 /* 2566 2521 * Supports <key> <value>. 2567 2522 * ··· 2552 2541 */ 2553 2542 static int cache_message(struct dm_target *ti, unsigned argc, char **argv) 2554 2543 { 2555 - int r; 2556 2544 struct cache *cache = ti->private; 2557 2545 2558 2546 if (argc != 2) 2559 2547 return -EINVAL; 2560 2548 2561 - r = process_config_option(cache, argv); 2562 - if (r == NOT_CORE_OPTION) 2563 - return policy_set_config_value(cache->policy, argv[0], argv[1]); 2564 - 2565 - return r; 2549 + return set_config_value(cache, argv[0], argv[1]); 2566 2550 } 2567 2551 2568 2552 static int cache_iterate_devices(struct dm_target *ti, ··· 2615 2609 2616 2610 static struct target_type cache_target = { 2617 2611 .name = "cache", 2618 - .version = {1, 1, 0}, 2612 + .version = {1, 1, 1}, 2619 2613 .module = THIS_MODULE, 2620 2614 .ctr = cache_ctr, 2621 2615 .dtr = cache_dtr,
+1
drivers/md/dm-mpath.c
··· 907 907 908 908 ti->num_flush_bios = 1; 909 909 ti->num_discard_bios = 1; 910 + ti->num_write_same_bios = 1; 910 911 911 912 return 0; 912 913
+1
drivers/md/dm-snap.c
··· 1121 1121 s->pending_pool = mempool_create_slab_pool(MIN_IOS, pending_cache); 1122 1122 if (!s->pending_pool) { 1123 1123 ti->error = "Could not allocate mempool for pending exceptions"; 1124 + r = -ENOMEM; 1124 1125 goto bad_pending_pool; 1125 1126 } 1126 1127
+8 -7
drivers/md/dm-stripe.c
··· 94 94 static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) 95 95 { 96 96 struct stripe_c *sc; 97 - sector_t width; 97 + sector_t width, tmp_len; 98 98 uint32_t stripes; 99 99 uint32_t chunk_size; 100 100 int r; ··· 116 116 } 117 117 118 118 width = ti->len; 119 - if (sector_div(width, chunk_size)) { 120 - ti->error = "Target length not divisible by " 121 - "chunk size"; 122 - return -EINVAL; 123 - } 124 - 125 119 if (sector_div(width, stripes)) { 126 120 ti->error = "Target length not divisible by " 127 121 "number of stripes"; 122 + return -EINVAL; 123 + } 124 + 125 + tmp_len = width; 126 + if (sector_div(tmp_len, chunk_size)) { 127 + ti->error = "Target length not divisible by " 128 + "chunk size"; 128 129 return -EINVAL; 129 130 } 130 131
+1 -1
drivers/md/dm-table.c
··· 1442 1442 return false; 1443 1443 1444 1444 if (!ti->type->iterate_devices || 1445 - !ti->type->iterate_devices(ti, device_not_write_same_capable, NULL)) 1445 + ti->type->iterate_devices(ti, device_not_write_same_capable, NULL)) 1446 1446 return false; 1447 1447 } 1448 1448
+31 -5
drivers/md/dm-thin-metadata.c
··· 1645 1645 return r; 1646 1646 } 1647 1647 1648 - static int __resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count) 1648 + static int __resize_space_map(struct dm_space_map *sm, dm_block_t new_count) 1649 1649 { 1650 1650 int r; 1651 1651 dm_block_t old_count; 1652 1652 1653 - r = dm_sm_get_nr_blocks(pmd->data_sm, &old_count); 1653 + r = dm_sm_get_nr_blocks(sm, &old_count); 1654 1654 if (r) 1655 1655 return r; 1656 1656 ··· 1658 1658 return 0; 1659 1659 1660 1660 if (new_count < old_count) { 1661 - DMERR("cannot reduce size of data device"); 1661 + DMERR("cannot reduce size of space map"); 1662 1662 return -EINVAL; 1663 1663 } 1664 1664 1665 - return dm_sm_extend(pmd->data_sm, new_count - old_count); 1665 + return dm_sm_extend(sm, new_count - old_count); 1666 1666 } 1667 1667 1668 1668 int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count) ··· 1671 1671 1672 1672 down_write(&pmd->root_lock); 1673 1673 if (!pmd->fail_io) 1674 - r = __resize_data_dev(pmd, new_count); 1674 + r = __resize_space_map(pmd->data_sm, new_count); 1675 + up_write(&pmd->root_lock); 1676 + 1677 + return r; 1678 + } 1679 + 1680 + int dm_pool_resize_metadata_dev(struct dm_pool_metadata *pmd, dm_block_t new_count) 1681 + { 1682 + int r = -EINVAL; 1683 + 1684 + down_write(&pmd->root_lock); 1685 + if (!pmd->fail_io) 1686 + r = __resize_space_map(pmd->metadata_sm, new_count); 1675 1687 up_write(&pmd->root_lock); 1676 1688 1677 1689 return r; ··· 1695 1683 pmd->read_only = true; 1696 1684 dm_bm_set_read_only(pmd->bm); 1697 1685 up_write(&pmd->root_lock); 1686 + } 1687 + 1688 + int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd, 1689 + dm_block_t threshold, 1690 + dm_sm_threshold_fn fn, 1691 + void *context) 1692 + { 1693 + int r; 1694 + 1695 + down_write(&pmd->root_lock); 1696 + r = dm_sm_register_threshold_callback(pmd->metadata_sm, threshold, fn, context); 1697 + up_write(&pmd->root_lock); 1698 + 1699 + return r; 1698 1700 }
+7
drivers/md/dm-thin-metadata.h
··· 8 8 #define DM_THIN_METADATA_H 9 9 10 10 #include "persistent-data/dm-block-manager.h" 11 + #include "persistent-data/dm-space-map.h" 11 12 12 13 #define THIN_METADATA_BLOCK_SIZE 4096 13 14 ··· 186 185 * blocks would be lost. 187 186 */ 188 187 int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_size); 188 + int dm_pool_resize_metadata_dev(struct dm_pool_metadata *pmd, dm_block_t new_size); 189 189 190 190 /* 191 191 * Flicks the underlying block manager into read only mode, so you know 192 192 * that nothing is changing. 193 193 */ 194 194 void dm_pool_metadata_read_only(struct dm_pool_metadata *pmd); 195 + 196 + int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd, 197 + dm_block_t threshold, 198 + dm_sm_threshold_fn fn, 199 + void *context); 195 200 196 201 /*----------------------------------------------------------------*/ 197 202
+161 -41
drivers/md/dm-thin.c
··· 922 922 return r; 923 923 924 924 if (free_blocks <= pool->low_water_blocks && !pool->low_water_triggered) { 925 - DMWARN("%s: reached low water mark, sending event.", 925 + DMWARN("%s: reached low water mark for data device: sending event.", 926 926 dm_device_name(pool->pool_md)); 927 927 spin_lock_irqsave(&pool->lock, flags); 928 928 pool->low_water_triggered = 1; ··· 1281 1281 bio_io_error(bio); 1282 1282 } 1283 1283 1284 + /* 1285 + * FIXME: should we also commit due to size of transaction, measured in 1286 + * metadata blocks? 1287 + */ 1284 1288 static int need_commit_due_to_time(struct pool *pool) 1285 1289 { 1286 1290 return jiffies < pool->last_commit_jiffies || ··· 1913 1909 return r; 1914 1910 } 1915 1911 1912 + static void metadata_low_callback(void *context) 1913 + { 1914 + struct pool *pool = context; 1915 + 1916 + DMWARN("%s: reached low water mark for metadata device: sending event.", 1917 + dm_device_name(pool->pool_md)); 1918 + 1919 + dm_table_event(pool->ti->table); 1920 + } 1921 + 1922 + static sector_t get_metadata_dev_size(struct block_device *bdev) 1923 + { 1924 + sector_t metadata_dev_size = i_size_read(bdev->bd_inode) >> SECTOR_SHIFT; 1925 + char buffer[BDEVNAME_SIZE]; 1926 + 1927 + if (metadata_dev_size > THIN_METADATA_MAX_SECTORS_WARNING) { 1928 + DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.", 1929 + bdevname(bdev, buffer), THIN_METADATA_MAX_SECTORS); 1930 + metadata_dev_size = THIN_METADATA_MAX_SECTORS_WARNING; 1931 + } 1932 + 1933 + return metadata_dev_size; 1934 + } 1935 + 1936 + static dm_block_t get_metadata_dev_size_in_blocks(struct block_device *bdev) 1937 + { 1938 + sector_t metadata_dev_size = get_metadata_dev_size(bdev); 1939 + 1940 + sector_div(metadata_dev_size, THIN_METADATA_BLOCK_SIZE >> SECTOR_SHIFT); 1941 + 1942 + return metadata_dev_size; 1943 + } 1944 + 1945 + /* 1946 + * When a metadata threshold is crossed a dm event is triggered, and 1947 + * userland should respond by growing the metadata device. We could let 1948 + * userland set the threshold, like we do with the data threshold, but I'm 1949 + * not sure they know enough to do this well. 1950 + */ 1951 + static dm_block_t calc_metadata_threshold(struct pool_c *pt) 1952 + { 1953 + /* 1954 + * 4M is ample for all ops with the possible exception of thin 1955 + * device deletion which is harmless if it fails (just retry the 1956 + * delete after you've grown the device). 1957 + */ 1958 + dm_block_t quarter = get_metadata_dev_size_in_blocks(pt->metadata_dev->bdev) / 4; 1959 + return min((dm_block_t)1024ULL /* 4M */, quarter); 1960 + } 1961 + 1916 1962 /* 1917 1963 * thin-pool <metadata dev> <data dev> 1918 1964 * <data block size (sectors)> ··· 1985 1931 unsigned long block_size; 1986 1932 dm_block_t low_water_blocks; 1987 1933 struct dm_dev *metadata_dev; 1988 - sector_t metadata_dev_size; 1989 - char b[BDEVNAME_SIZE]; 1934 + fmode_t metadata_mode; 1990 1935 1991 1936 /* 1992 1937 * FIXME Remove validation from scope of lock. ··· 1997 1944 r = -EINVAL; 1998 1945 goto out_unlock; 1999 1946 } 1947 + 2000 1948 as.argc = argc; 2001 1949 as.argv = argv; 2002 1950 2003 - r = dm_get_device(ti, argv[0], FMODE_READ | FMODE_WRITE, &metadata_dev); 1951 + /* 1952 + * Set default pool features. 1953 + */ 1954 + pool_features_init(&pf); 1955 + 1956 + dm_consume_args(&as, 4); 1957 + r = parse_pool_features(&as, &pf, ti); 1958 + if (r) 1959 + goto out_unlock; 1960 + 1961 + metadata_mode = FMODE_READ | ((pf.mode == PM_READ_ONLY) ? 0 : FMODE_WRITE); 1962 + r = dm_get_device(ti, argv[0], metadata_mode, &metadata_dev); 2004 1963 if (r) { 2005 1964 ti->error = "Error opening metadata block device"; 2006 1965 goto out_unlock; 2007 1966 } 2008 1967 2009 - metadata_dev_size = i_size_read(metadata_dev->bdev->bd_inode) >> SECTOR_SHIFT; 2010 - if (metadata_dev_size > THIN_METADATA_MAX_SECTORS_WARNING) 2011 - DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.", 2012 - bdevname(metadata_dev->bdev, b), THIN_METADATA_MAX_SECTORS); 1968 + /* 1969 + * Run for the side-effect of possibly issuing a warning if the 1970 + * device is too big. 1971 + */ 1972 + (void) get_metadata_dev_size(metadata_dev->bdev); 2013 1973 2014 1974 r = dm_get_device(ti, argv[1], FMODE_READ | FMODE_WRITE, &data_dev); 2015 1975 if (r) { ··· 2044 1978 r = -EINVAL; 2045 1979 goto out; 2046 1980 } 2047 - 2048 - /* 2049 - * Set default pool features. 2050 - */ 2051 - pool_features_init(&pf); 2052 - 2053 - dm_consume_args(&as, 4); 2054 - r = parse_pool_features(&as, &pf, ti); 2055 - if (r) 2056 - goto out; 2057 1981 2058 1982 pt = kzalloc(sizeof(*pt), GFP_KERNEL); 2059 1983 if (!pt) { ··· 2096 2040 } 2097 2041 ti->private = pt; 2098 2042 2043 + r = dm_pool_register_metadata_threshold(pt->pool->pmd, 2044 + calc_metadata_threshold(pt), 2045 + metadata_low_callback, 2046 + pool); 2047 + if (r) 2048 + goto out_free_pt; 2049 + 2099 2050 pt->callbacks.congested_fn = pool_is_congested; 2100 2051 dm_table_add_target_callbacks(ti->table, &pt->callbacks); 2101 2052 ··· 2142 2079 return r; 2143 2080 } 2144 2081 2082 + static int maybe_resize_data_dev(struct dm_target *ti, bool *need_commit) 2083 + { 2084 + int r; 2085 + struct pool_c *pt = ti->private; 2086 + struct pool *pool = pt->pool; 2087 + sector_t data_size = ti->len; 2088 + dm_block_t sb_data_size; 2089 + 2090 + *need_commit = false; 2091 + 2092 + (void) sector_div(data_size, pool->sectors_per_block); 2093 + 2094 + r = dm_pool_get_data_dev_size(pool->pmd, &sb_data_size); 2095 + if (r) { 2096 + DMERR("failed to retrieve data device size"); 2097 + return r; 2098 + } 2099 + 2100 + if (data_size < sb_data_size) { 2101 + DMERR("pool target (%llu blocks) too small: expected %llu", 2102 + (unsigned long long)data_size, sb_data_size); 2103 + return -EINVAL; 2104 + 2105 + } else if (data_size > sb_data_size) { 2106 + r = dm_pool_resize_data_dev(pool->pmd, data_size); 2107 + if (r) { 2108 + DMERR("failed to resize data device"); 2109 + set_pool_mode(pool, PM_READ_ONLY); 2110 + return r; 2111 + } 2112 + 2113 + *need_commit = true; 2114 + } 2115 + 2116 + return 0; 2117 + } 2118 + 2119 + static int maybe_resize_metadata_dev(struct dm_target *ti, bool *need_commit) 2120 + { 2121 + int r; 2122 + struct pool_c *pt = ti->private; 2123 + struct pool *pool = pt->pool; 2124 + dm_block_t metadata_dev_size, sb_metadata_dev_size; 2125 + 2126 + *need_commit = false; 2127 + 2128 + metadata_dev_size = get_metadata_dev_size(pool->md_dev); 2129 + 2130 + r = dm_pool_get_metadata_dev_size(pool->pmd, &sb_metadata_dev_size); 2131 + if (r) { 2132 + DMERR("failed to retrieve data device size"); 2133 + return r; 2134 + } 2135 + 2136 + if (metadata_dev_size < sb_metadata_dev_size) { 2137 + DMERR("metadata device (%llu sectors) too small: expected %llu", 2138 + metadata_dev_size, sb_metadata_dev_size); 2139 + return -EINVAL; 2140 + 2141 + } else if (metadata_dev_size > sb_metadata_dev_size) { 2142 + r = dm_pool_resize_metadata_dev(pool->pmd, metadata_dev_size); 2143 + if (r) { 2144 + DMERR("failed to resize metadata device"); 2145 + return r; 2146 + } 2147 + 2148 + *need_commit = true; 2149 + } 2150 + 2151 + return 0; 2152 + } 2153 + 2145 2154 /* 2146 2155 * Retrieves the number of blocks of the data device from 2147 2156 * the superblock and compares it to the actual device size, ··· 2228 2093 static int pool_preresume(struct dm_target *ti) 2229 2094 { 2230 2095 int r; 2096 + bool need_commit1, need_commit2; 2231 2097 struct pool_c *pt = ti->private; 2232 2098 struct pool *pool = pt->pool; 2233 - sector_t data_size = ti->len; 2234 - dm_block_t sb_data_size; 2235 2099 2236 2100 /* 2237 2101 * Take control of the pool object. ··· 2239 2105 if (r) 2240 2106 return r; 2241 2107 2242 - (void) sector_div(data_size, pool->sectors_per_block); 2243 - 2244 - r = dm_pool_get_data_dev_size(pool->pmd, &sb_data_size); 2245 - if (r) { 2246 - DMERR("failed to retrieve data device size"); 2108 + r = maybe_resize_data_dev(ti, &need_commit1); 2109 + if (r) 2247 2110 return r; 2248 - } 2249 2111 2250 - if (data_size < sb_data_size) { 2251 - DMERR("pool target too small, is %llu blocks (expected %llu)", 2252 - (unsigned long long)data_size, sb_data_size); 2253 - return -EINVAL; 2112 + r = maybe_resize_metadata_dev(ti, &need_commit2); 2113 + if (r) 2114 + return r; 2254 2115 2255 - } else if (data_size > sb_data_size) { 2256 - r = dm_pool_resize_data_dev(pool->pmd, data_size); 2257 - if (r) { 2258 - DMERR("failed to resize data device"); 2259 - /* FIXME Stricter than necessary: Rollback transaction instead here */ 2260 - set_pool_mode(pool, PM_READ_ONLY); 2261 - return r; 2262 - } 2263 - 2116 + if (need_commit1 || need_commit2) 2264 2117 (void) commit_or_fallback(pool); 2265 - } 2266 2118 2267 2119 return 0; 2268 2120 } ··· 2669 2549 .name = "thin-pool", 2670 2550 .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | 2671 2551 DM_TARGET_IMMUTABLE, 2672 - .version = {1, 7, 0}, 2552 + .version = {1, 8, 0}, 2673 2553 .module = THIS_MODULE, 2674 2554 .ctr = pool_ctr, 2675 2555 .dtr = pool_dtr,
+2 -1
drivers/md/persistent-data/dm-space-map-disk.c
··· 248 248 .new_block = sm_disk_new_block, 249 249 .commit = sm_disk_commit, 250 250 .root_size = sm_disk_root_size, 251 - .copy_root = sm_disk_copy_root 251 + .copy_root = sm_disk_copy_root, 252 + .register_threshold_callback = NULL 252 253 }; 253 254 254 255 struct dm_space_map *dm_sm_disk_create(struct dm_transaction_manager *tm,
+115 -12
drivers/md/persistent-data/dm-space-map-metadata.c
··· 17 17 /*----------------------------------------------------------------*/ 18 18 19 19 /* 20 + * An edge triggered threshold. 21 + */ 22 + struct threshold { 23 + bool threshold_set; 24 + bool value_set; 25 + dm_block_t threshold; 26 + dm_block_t current_value; 27 + dm_sm_threshold_fn fn; 28 + void *context; 29 + }; 30 + 31 + static void threshold_init(struct threshold *t) 32 + { 33 + t->threshold_set = false; 34 + t->value_set = false; 35 + } 36 + 37 + static void set_threshold(struct threshold *t, dm_block_t value, 38 + dm_sm_threshold_fn fn, void *context) 39 + { 40 + t->threshold_set = true; 41 + t->threshold = value; 42 + t->fn = fn; 43 + t->context = context; 44 + } 45 + 46 + static bool below_threshold(struct threshold *t, dm_block_t value) 47 + { 48 + return t->threshold_set && value <= t->threshold; 49 + } 50 + 51 + static bool threshold_already_triggered(struct threshold *t) 52 + { 53 + return t->value_set && below_threshold(t, t->current_value); 54 + } 55 + 56 + static void check_threshold(struct threshold *t, dm_block_t value) 57 + { 58 + if (below_threshold(t, value) && 59 + !threshold_already_triggered(t)) 60 + t->fn(t->context); 61 + 62 + t->value_set = true; 63 + t->current_value = value; 64 + } 65 + 66 + /*----------------------------------------------------------------*/ 67 + 68 + /* 20 69 * Space map interface. 21 70 * 22 71 * The low level disk format is written using the standard btree and ··· 103 54 unsigned allocated_this_transaction; 104 55 unsigned nr_uncommitted; 105 56 struct block_op uncommitted[MAX_RECURSIVE_ALLOCATIONS]; 57 + 58 + struct threshold threshold; 106 59 }; 107 60 108 61 static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b) ··· 193 142 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 194 143 195 144 kfree(smm); 196 - } 197 - 198 - static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks) 199 - { 200 - DMERR("doesn't support extend"); 201 - return -EINVAL; 202 145 } 203 146 204 147 static int sm_metadata_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) ··· 380 335 381 336 static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b) 382 337 { 338 + dm_block_t count; 339 + struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 340 + 383 341 int r = sm_metadata_new_block_(sm, b); 384 342 if (r) 385 343 DMERR("unable to allocate new metadata block"); 344 + 345 + r = sm_metadata_get_nr_free(sm, &count); 346 + if (r) 347 + DMERR("couldn't get free block count"); 348 + 349 + check_threshold(&smm->threshold, count); 350 + 386 351 return r; 387 352 } 388 353 ··· 408 353 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll)); 409 354 smm->begin = 0; 410 355 smm->allocated_this_transaction = 0; 356 + 357 + return 0; 358 + } 359 + 360 + static int sm_metadata_register_threshold_callback(struct dm_space_map *sm, 361 + dm_block_t threshold, 362 + dm_sm_threshold_fn fn, 363 + void *context) 364 + { 365 + struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 366 + 367 + set_threshold(&smm->threshold, threshold, fn, context); 411 368 412 369 return 0; 413 370 } ··· 449 382 return 0; 450 383 } 451 384 385 + static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks); 386 + 452 387 static struct dm_space_map ops = { 453 388 .destroy = sm_metadata_destroy, 454 389 .extend = sm_metadata_extend, ··· 464 395 .new_block = sm_metadata_new_block, 465 396 .commit = sm_metadata_commit, 466 397 .root_size = sm_metadata_root_size, 467 - .copy_root = sm_metadata_copy_root 398 + .copy_root = sm_metadata_copy_root, 399 + .register_threshold_callback = sm_metadata_register_threshold_callback 468 400 }; 469 401 470 402 /*----------------------------------------------------------------*/ ··· 480 410 481 411 static int sm_bootstrap_extend(struct dm_space_map *sm, dm_block_t extra_blocks) 482 412 { 483 - DMERR("boostrap doesn't support extend"); 413 + DMERR("bootstrap doesn't support extend"); 484 414 485 415 return -EINVAL; 486 416 } ··· 520 450 static int sm_bootstrap_set_count(struct dm_space_map *sm, dm_block_t b, 521 451 uint32_t count) 522 452 { 523 - DMERR("boostrap doesn't support set_count"); 453 + DMERR("bootstrap doesn't support set_count"); 524 454 525 455 return -EINVAL; 526 456 } ··· 561 491 562 492 static int sm_bootstrap_root_size(struct dm_space_map *sm, size_t *result) 563 493 { 564 - DMERR("boostrap doesn't support root_size"); 494 + DMERR("bootstrap doesn't support root_size"); 565 495 566 496 return -EINVAL; 567 497 } ··· 569 499 static int sm_bootstrap_copy_root(struct dm_space_map *sm, void *where, 570 500 size_t max) 571 501 { 572 - DMERR("boostrap doesn't support copy_root"); 502 + DMERR("bootstrap doesn't support copy_root"); 573 503 574 504 return -EINVAL; 575 505 } ··· 587 517 .new_block = sm_bootstrap_new_block, 588 518 .commit = sm_bootstrap_commit, 589 519 .root_size = sm_bootstrap_root_size, 590 - .copy_root = sm_bootstrap_copy_root 520 + .copy_root = sm_bootstrap_copy_root, 521 + .register_threshold_callback = NULL 591 522 }; 523 + 524 + /*----------------------------------------------------------------*/ 525 + 526 + static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks) 527 + { 528 + int r, i; 529 + enum allocation_event ev; 530 + struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 531 + dm_block_t old_len = smm->ll.nr_blocks; 532 + 533 + /* 534 + * Flick into a mode where all blocks get allocated in the new area. 535 + */ 536 + smm->begin = old_len; 537 + memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm)); 538 + 539 + /* 540 + * Extend. 541 + */ 542 + r = sm_ll_extend(&smm->ll, extra_blocks); 543 + 544 + /* 545 + * Switch back to normal behaviour. 546 + */ 547 + memcpy(&smm->sm, &ops, sizeof(smm->sm)); 548 + for (i = old_len; !r && i < smm->begin; i++) 549 + r = sm_ll_inc(&smm->ll, i, &ev); 550 + 551 + return r; 552 + } 592 553 593 554 /*----------------------------------------------------------------*/ 594 555 ··· 650 549 smm->recursion_count = 0; 651 550 smm->allocated_this_transaction = 0; 652 551 smm->nr_uncommitted = 0; 552 + threshold_init(&smm->threshold); 653 553 654 554 memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm)); 655 555 ··· 692 590 smm->recursion_count = 0; 693 591 smm->allocated_this_transaction = 0; 694 592 smm->nr_uncommitted = 0; 593 + threshold_init(&smm->threshold); 695 594 696 595 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll)); 697 596 return 0;
+23
drivers/md/persistent-data/dm-space-map.h
··· 9 9 10 10 #include "dm-block-manager.h" 11 11 12 + typedef void (*dm_sm_threshold_fn)(void *context); 13 + 12 14 /* 13 15 * struct dm_space_map keeps a record of how many times each block in a device 14 16 * is referenced. It needs to be fixed on disk as part of the transaction. ··· 61 59 */ 62 60 int (*root_size)(struct dm_space_map *sm, size_t *result); 63 61 int (*copy_root)(struct dm_space_map *sm, void *copy_to_here_le, size_t len); 62 + 63 + /* 64 + * You can register one threshold callback which is edge-triggered 65 + * when the free space in the space map drops below the threshold. 66 + */ 67 + int (*register_threshold_callback)(struct dm_space_map *sm, 68 + dm_block_t threshold, 69 + dm_sm_threshold_fn fn, 70 + void *context); 64 71 }; 65 72 66 73 /*----------------------------------------------------------------*/ ··· 141 130 { 142 131 return sm->copy_root(sm, copy_to_here_le, len); 143 132 } 133 + 134 + static inline int dm_sm_register_threshold_callback(struct dm_space_map *sm, 135 + dm_block_t threshold, 136 + dm_sm_threshold_fn fn, 137 + void *context) 138 + { 139 + if (sm->register_threshold_callback) 140 + return sm->register_threshold_callback(sm, threshold, fn, context); 141 + 142 + return -EINVAL; 143 + } 144 + 144 145 145 146 #endif /* _LINUX_DM_SPACE_MAP_H */
+15
include/linux/device-mapper.h
··· 79 79 typedef int (*dm_merge_fn) (struct dm_target *ti, struct bvec_merge_data *bvm, 80 80 struct bio_vec *biovec, int max_size); 81 81 82 + /* 83 + * These iteration functions are typically used to check (and combine) 84 + * properties of underlying devices. 85 + * E.g. Does at least one underlying device support flush? 86 + * Does any underlying device not support WRITE_SAME? 87 + * 88 + * The callout function is called once for each contiguous section of 89 + * an underlying device. State can be maintained in *data. 90 + * Return non-zero to stop iterating through any further devices. 91 + */ 82 92 typedef int (*iterate_devices_callout_fn) (struct dm_target *ti, 83 93 struct dm_dev *dev, 84 94 sector_t start, sector_t len, 85 95 void *data); 86 96 97 + /* 98 + * This function must iterate through each section of device used by the 99 + * target until it encounters a non-zero return code, which it then returns. 100 + * Returns zero if no callout returned non-zero. 101 + */ 87 102 typedef int (*dm_iterate_devices_fn) (struct dm_target *ti, 88 103 iterate_devices_callout_fn fn, 89 104 void *data);