Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'dm-4.2-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper fixes from Mike Snitzer:

- revert a request-based DM core change that caused IO latency to
increase and adversely impact both throughput and system load

- fix for a use after free bug in DM core's device cleanup

- a couple DM btree removal fixes (used by dm-thinp)

- a DM thinp fix for order-5 allocation failure

- a DM thinp fix to not degrade to read-only metadata mode when in
out-of-data-space mode for longer than the 'no_space_timeout'

- fix a long-standing oversight in both dm-thinp and dm-cache by now
exporting 'needs_check' in status if it was set in metadata

- fix an embarrassing dm-cache busy-loop that caused worker threads to
eat cpu even if no IO was actively being issued to the cache device

* tag 'dm-4.2-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
dm cache: avoid calls to prealloc_free_structs() if possible
dm cache: avoid preallocation if no work in writeback_some_dirty_blocks()
dm cache: do not wake_worker() in free_migration()
dm cache: display 'needs_check' in status if it is set
dm thin: display 'needs_check' in status if it is set
dm thin: stay in out-of-data-space mode once no_space_timeout expires
dm: fix use after free crash due to incorrect cleanup sequence
Revert "dm: only run the queue on completion if congested or no requests pending"
dm btree: silence lockdep lock inversion in dm_btree_del()
dm thin: allocate the cell_sort_array dynamically
dm btree remove: fix bug in redistribute3

+82 -35
+6
Documentation/device-mapper/cache.txt
··· 258 258 no further I/O will be permitted and the status will just 259 259 contain the string 'Fail'. The userspace recovery tools 260 260 should then be used. 261 + needs_check : 'needs_check' if set, '-' if not set 262 + A metadata operation has failed, resulting in the needs_check 263 + flag being set in the metadata's superblock. The metadata 264 + device must be deactivated and checked/repaired before the 265 + cache can be made fully operational again. '-' indicates 266 + needs_check is not set. 261 267 262 268 Messages 263 269 --------
+8 -1
Documentation/device-mapper/thin-provisioning.txt
··· 296 296 underlying device. When this is enabled when loading the table, 297 297 it can get disabled if the underlying device doesn't support it. 298 298 299 - ro|rw 299 + ro|rw|out_of_data_space 300 300 If the pool encounters certain types of device failures it will 301 301 drop into a read-only metadata mode in which no changes to 302 302 the pool metadata (like allocating new blocks) are permitted. ··· 313 313 'no_space_timeout' expires. The 'no_space_timeout' dm-thin-pool 314 314 module parameter can be used to change this timeout -- it 315 315 defaults to 60 seconds but may be disabled using a value of 0. 316 + 317 + needs_check 318 + A metadata operation has failed, resulting in the needs_check 319 + flag being set in the metadata's superblock. The metadata 320 + device must be deactivated and checked/repaired before the 321 + thin-pool can be made fully operational again. '-' indicates 322 + needs_check is not set. 316 323 317 324 iii) Messages 318 325
+23 -15
drivers/md/dm-cache-target.c
··· 424 424 wake_up(&cache->migration_wait); 425 425 426 426 mempool_free(mg, cache->migration_pool); 427 - wake_worker(cache); 428 427 } 429 428 430 429 static int prealloc_data_structs(struct cache *cache, struct prealloc *p) ··· 1946 1947 1947 1948 static void process_deferred_bios(struct cache *cache) 1948 1949 { 1950 + bool prealloc_used = false; 1949 1951 unsigned long flags; 1950 1952 struct bio_list bios; 1951 1953 struct bio *bio; ··· 1981 1981 process_discard_bio(cache, &structs, bio); 1982 1982 else 1983 1983 process_bio(cache, &structs, bio); 1984 + prealloc_used = true; 1984 1985 } 1985 1986 1986 - prealloc_free_structs(cache, &structs); 1987 + if (prealloc_used) 1988 + prealloc_free_structs(cache, &structs); 1987 1989 } 1988 1990 1989 1991 static void process_deferred_cells(struct cache *cache) 1990 1992 { 1993 + bool prealloc_used = false; 1991 1994 unsigned long flags; 1992 1995 struct dm_bio_prison_cell *cell, *tmp; 1993 1996 struct list_head cells; ··· 2018 2015 } 2019 2016 2020 2017 process_cell(cache, &structs, cell); 2018 + prealloc_used = true; 2021 2019 } 2022 2020 2023 - prealloc_free_structs(cache, &structs); 2021 + if (prealloc_used) 2022 + prealloc_free_structs(cache, &structs); 2024 2023 } 2025 2024 2026 2025 static void process_deferred_flush_bios(struct cache *cache, bool submit_bios) ··· 2067 2062 2068 2063 static void writeback_some_dirty_blocks(struct cache *cache) 2069 2064 { 2070 - int r = 0; 2065 + bool prealloc_used = false; 2071 2066 dm_oblock_t oblock; 2072 2067 dm_cblock_t cblock; 2073 2068 struct prealloc structs; ··· 2077 2072 memset(&structs, 0, sizeof(structs)); 2078 2073 2079 2074 while (spare_migration_bandwidth(cache)) { 2080 - if (prealloc_data_structs(cache, &structs)) 2081 - break; 2075 + if (policy_writeback_work(cache->policy, &oblock, &cblock, busy)) 2076 + break; /* no work to do */ 2082 2077 2083 - r = policy_writeback_work(cache->policy, &oblock, &cblock, busy); 2084 - if (r) 2085 - break; 2086 - 2087 - r = get_cell(cache, oblock, &structs, &old_ocell); 2088 - if (r) { 2078 + if (prealloc_data_structs(cache, &structs) || 2079 + get_cell(cache, oblock, &structs, &old_ocell)) { 2089 2080 policy_set_dirty(cache->policy, oblock); 2090 2081 break; 2091 2082 } 2092 2083 2093 2084 writeback(cache, &structs, oblock, cblock, old_ocell); 2085 + prealloc_used = true; 2094 2086 } 2095 2087 2096 - prealloc_free_structs(cache, &structs); 2088 + if (prealloc_used) 2089 + prealloc_free_structs(cache, &structs); 2097 2090 } 2098 2091 2099 2092 /*---------------------------------------------------------------- ··· 3499 3496 * <#demotions> <#promotions> <#dirty> 3500 3497 * <#features> <features>* 3501 3498 * <#core args> <core args> 3502 - * <policy name> <#policy args> <policy args>* <cache metadata mode> 3499 + * <policy name> <#policy args> <policy args>* <cache metadata mode> <needs_check> 3503 3500 */ 3504 3501 static void cache_status(struct dm_target *ti, status_type_t type, 3505 3502 unsigned status_flags, char *result, unsigned maxlen) ··· 3584 3581 DMEMIT("ro "); 3585 3582 else 3586 3583 DMEMIT("rw "); 3584 + 3585 + if (dm_cache_metadata_needs_check(cache->cmd)) 3586 + DMEMIT("needs_check "); 3587 + else 3588 + DMEMIT("- "); 3587 3589 3588 3590 break; 3589 3591 ··· 3828 3820 3829 3821 static struct target_type cache_target = { 3830 3822 .name = "cache", 3831 - .version = {1, 7, 0}, 3823 + .version = {1, 8, 0}, 3832 3824 .module = THIS_MODULE, 3833 3825 .ctr = cache_ctr, 3834 3826 .dtr = cache_dtr,
+37 -7
drivers/md/dm-thin.c
··· 18 18 #include <linux/init.h> 19 19 #include <linux/module.h> 20 20 #include <linux/slab.h> 21 + #include <linux/vmalloc.h> 21 22 #include <linux/sort.h> 22 23 #include <linux/rbtree.h> 23 24 ··· 269 268 process_mapping_fn process_prepared_mapping; 270 269 process_mapping_fn process_prepared_discard; 271 270 272 - struct dm_bio_prison_cell *cell_sort_array[CELL_SORT_ARRAY_SIZE]; 271 + struct dm_bio_prison_cell **cell_sort_array; 273 272 }; 274 273 275 274 static enum pool_mode get_pool_mode(struct pool *pool); ··· 2282 2281 queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD); 2283 2282 } 2284 2283 2284 + static void notify_of_pool_mode_change_to_oods(struct pool *pool); 2285 + 2285 2286 /* 2286 2287 * We're holding onto IO to allow userland time to react. After the 2287 2288 * timeout either the pool will have been resized (and thus back in 2288 - * PM_WRITE mode), or we degrade to PM_READ_ONLY and start erroring IO. 2289 + * PM_WRITE mode), or we degrade to PM_OUT_OF_DATA_SPACE w/ error_if_no_space. 2289 2290 */ 2290 2291 static void do_no_space_timeout(struct work_struct *ws) 2291 2292 { 2292 2293 struct pool *pool = container_of(to_delayed_work(ws), struct pool, 2293 2294 no_space_timeout); 2294 2295 2295 - if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) 2296 - set_pool_mode(pool, PM_READ_ONLY); 2296 + if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) { 2297 + pool->pf.error_if_no_space = true; 2298 + notify_of_pool_mode_change_to_oods(pool); 2299 + error_retry_list(pool); 2300 + } 2297 2301 } 2298 2302 2299 2303 /*----------------------------------------------------------------*/ ··· 2374 2368 dm_table_event(pool->ti->table); 2375 2369 DMINFO("%s: switching pool to %s mode", 2376 2370 dm_device_name(pool->pool_md), new_mode); 2371 + } 2372 + 2373 + static void notify_of_pool_mode_change_to_oods(struct pool *pool) 2374 + { 2375 + if (!pool->pf.error_if_no_space) 2376 + notify_of_pool_mode_change(pool, "out-of-data-space (queue IO)"); 2377 + else 2378 + notify_of_pool_mode_change(pool, "out-of-data-space (error IO)"); 2377 2379 } 2378 2380 2379 2381 static bool passdown_enabled(struct pool_c *pt) ··· 2468 2454 * frequently seeing this mode. 2469 2455 */ 2470 2456 if (old_mode != new_mode) 2471 - notify_of_pool_mode_change(pool, "out-of-data-space"); 2457 + notify_of_pool_mode_change_to_oods(pool); 2472 2458 pool->process_bio = process_bio_read_only; 2473 2459 pool->process_discard = process_discard_bio; 2474 2460 pool->process_cell = process_cell_read_only; ··· 2791 2777 { 2792 2778 __pool_table_remove(pool); 2793 2779 2780 + vfree(pool->cell_sort_array); 2794 2781 if (dm_pool_metadata_close(pool->pmd) < 0) 2795 2782 DMWARN("%s: dm_pool_metadata_close() failed.", __func__); 2796 2783 ··· 2904 2889 goto bad_mapping_pool; 2905 2890 } 2906 2891 2892 + pool->cell_sort_array = vmalloc(sizeof(*pool->cell_sort_array) * CELL_SORT_ARRAY_SIZE); 2893 + if (!pool->cell_sort_array) { 2894 + *error = "Error allocating cell sort array"; 2895 + err_p = ERR_PTR(-ENOMEM); 2896 + goto bad_sort_array; 2897 + } 2898 + 2907 2899 pool->ref_count = 1; 2908 2900 pool->last_commit_jiffies = jiffies; 2909 2901 pool->pool_md = pool_md; ··· 2919 2897 2920 2898 return pool; 2921 2899 2900 + bad_sort_array: 2901 + mempool_destroy(pool->mapping_pool); 2922 2902 bad_mapping_pool: 2923 2903 dm_deferred_set_destroy(pool->all_io_ds); 2924 2904 bad_all_io_ds: ··· 3738 3714 * Status line is: 3739 3715 * <transaction id> <used metadata sectors>/<total metadata sectors> 3740 3716 * <used data sectors>/<total data sectors> <held metadata root> 3717 + * <pool mode> <discard config> <no space config> <needs_check> 3741 3718 */ 3742 3719 static void pool_status(struct dm_target *ti, status_type_t type, 3743 3720 unsigned status_flags, char *result, unsigned maxlen) ··· 3839 3814 DMEMIT("error_if_no_space "); 3840 3815 else 3841 3816 DMEMIT("queue_if_no_space "); 3817 + 3818 + if (dm_pool_metadata_needs_check(pool->pmd)) 3819 + DMEMIT("needs_check "); 3820 + else 3821 + DMEMIT("- "); 3842 3822 3843 3823 break; 3844 3824 ··· 3948 3918 .name = "thin-pool", 3949 3919 .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | 3950 3920 DM_TARGET_IMMUTABLE, 3951 - .version = {1, 15, 0}, 3921 + .version = {1, 16, 0}, 3952 3922 .module = THIS_MODULE, 3953 3923 .ctr = pool_ctr, 3954 3924 .dtr = pool_dtr, ··· 4335 4305 4336 4306 static struct target_type thin_target = { 4337 4307 .name = "thin", 4338 - .version = {1, 15, 0}, 4308 + .version = {1, 16, 0}, 4339 4309 .module = THIS_MODULE, 4340 4310 .ctr = thin_ctr, 4341 4311 .dtr = thin_dtr,
+4 -8
drivers/md/dm.c
··· 1067 1067 */ 1068 1068 static void rq_completed(struct mapped_device *md, int rw, bool run_queue) 1069 1069 { 1070 - int nr_requests_pending; 1071 - 1072 1070 atomic_dec(&md->pending[rw]); 1073 1071 1074 1072 /* nudge anyone waiting on suspend queue */ 1075 - nr_requests_pending = md_in_flight(md); 1076 - if (!nr_requests_pending) 1073 + if (!md_in_flight(md)) 1077 1074 wake_up(&md->wait); 1078 1075 1079 1076 /* ··· 1082 1085 if (run_queue) { 1083 1086 if (md->queue->mq_ops) 1084 1087 blk_mq_run_hw_queues(md->queue, true); 1085 - else if (!nr_requests_pending || 1086 - (nr_requests_pending >= md->queue->nr_congestion_on)) 1088 + else 1087 1089 blk_run_queue_async(md->queue); 1088 1090 } 1089 1091 ··· 2277 2281 2278 2282 static void cleanup_mapped_device(struct mapped_device *md) 2279 2283 { 2280 - cleanup_srcu_struct(&md->io_barrier); 2281 - 2282 2284 if (md->wq) 2283 2285 destroy_workqueue(md->wq); 2284 2286 if (md->kworker_task) ··· 2287 2293 mempool_destroy(md->rq_pool); 2288 2294 if (md->bs) 2289 2295 bioset_free(md->bs); 2296 + 2297 + cleanup_srcu_struct(&md->io_barrier); 2290 2298 2291 2299 if (md->disk) { 2292 2300 spin_lock(&_minor_lock);
+3 -3
drivers/md/persistent-data/dm-btree-remove.c
··· 309 309 310 310 if (s < 0 && nr_center < -s) { 311 311 /* not enough in central node */ 312 - shift(left, center, nr_center); 313 - s = nr_center - target; 312 + shift(left, center, -nr_center); 313 + s += nr_center; 314 314 shift(left, right, s); 315 315 nr_right += s; 316 316 } else ··· 323 323 if (s > 0 && nr_center < s) { 324 324 /* not enough in central node */ 325 325 shift(center, right, nr_center); 326 - s = target - nr_center; 326 + s -= nr_center; 327 327 shift(left, right, s); 328 328 nr_left -= s; 329 329 } else
+1 -1
drivers/md/persistent-data/dm-btree.c
··· 255 255 int r; 256 256 struct del_stack *s; 257 257 258 - s = kmalloc(sizeof(*s), GFP_KERNEL); 258 + s = kmalloc(sizeof(*s), GFP_NOIO); 259 259 if (!s) 260 260 return -ENOMEM; 261 261 s->info = info;