Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-5.4/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper fixes from Mike Snitzer:

- Fix DM snapshot deadlock that can occur due to COW throttling
preventing locks from being released.

- Fix DM cache's GFP_NOWAIT allocation failure error paths by switching
to GFP_NOIO.

- Make __hash_find() static in the DM clone target.

* tag 'for-5.4/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
dm cache: fix bugs when a GFP_NOWAIT allocation fails
dm snapshot: rework COW throttling to fix deadlock
dm snapshot: introduce account_start_copy() and account_end_copy()
dm clone: Make __hash_find static

+81 -45
+2 -26
drivers/md/dm-cache-target.c
··· 542 542 543 543 static struct dm_bio_prison_cell_v2 *alloc_prison_cell(struct cache *cache) 544 544 { 545 - return dm_bio_prison_alloc_cell_v2(cache->prison, GFP_NOWAIT); 545 + return dm_bio_prison_alloc_cell_v2(cache->prison, GFP_NOIO); 546 546 } 547 547 548 548 static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell_v2 *cell) ··· 554 554 { 555 555 struct dm_cache_migration *mg; 556 556 557 - mg = mempool_alloc(&cache->migration_pool, GFP_NOWAIT); 558 - if (!mg) 559 - return NULL; 557 + mg = mempool_alloc(&cache->migration_pool, GFP_NOIO); 560 558 561 559 memset(mg, 0, sizeof(*mg)); 562 560 ··· 662 664 struct dm_bio_prison_cell_v2 *cell_prealloc, *cell; 663 665 664 666 cell_prealloc = alloc_prison_cell(cache); /* FIXME: allow wait if calling from worker */ 665 - if (!cell_prealloc) { 666 - defer_bio(cache, bio); 667 - return false; 668 - } 669 667 670 668 build_key(oblock, end, &key); 671 669 r = dm_cell_get_v2(cache->prison, &key, lock_level(bio), bio, cell_prealloc, &cell); ··· 1487 1493 struct dm_bio_prison_cell_v2 *prealloc; 1488 1494 1489 1495 prealloc = alloc_prison_cell(cache); 1490 - if (!prealloc) { 1491 - DMERR_LIMIT("%s: alloc_prison_cell failed", cache_device_name(cache)); 1492 - mg_complete(mg, false); 1493 - return -ENOMEM; 1494 - } 1495 1496 1496 1497 /* 1497 1498 * Prevent writes to the block, but allow reads to continue. ··· 1524 1535 } 1525 1536 1526 1537 mg = alloc_migration(cache); 1527 - if (!mg) { 1528 - policy_complete_background_work(cache->policy, op, false); 1529 - background_work_end(cache); 1530 - return -ENOMEM; 1531 - } 1532 1538 1533 1539 mg->op = op; 1534 1540 mg->overwrite_bio = bio; ··· 1612 1628 struct dm_bio_prison_cell_v2 *prealloc; 1613 1629 1614 1630 prealloc = alloc_prison_cell(cache); 1615 - if (!prealloc) { 1616 - invalidate_complete(mg, false); 1617 - return -ENOMEM; 1618 - } 1619 1631 1620 1632 build_key(mg->invalidate_oblock, oblock_succ(mg->invalidate_oblock), &key); 1621 1633 r = dm_cell_lock_v2(cache->prison, &key, ··· 1649 1669 return -EPERM; 1650 1670 1651 1671 mg = alloc_migration(cache); 1652 - if (!mg) { 1653 - background_work_end(cache); 1654 - return -ENOMEM; 1655 - } 1656 1672 1657 1673 mg->overwrite_bio = bio; 1658 1674 mg->invalidate_cblock = cblock;
+2 -2
drivers/md/dm-clone-target.c
··· 591 591 * 592 592 * NOTE: Must be called with the bucket lock held 593 593 */ 594 - struct dm_clone_region_hydration *__hash_find(struct hash_table_bucket *bucket, 595 - unsigned long region_nr) 594 + static struct dm_clone_region_hydration *__hash_find(struct hash_table_bucket *bucket, 595 + unsigned long region_nr) 596 596 { 597 597 struct dm_clone_region_hydration *hd; 598 598
+77 -17
drivers/md/dm-snap.c
··· 18 18 #include <linux/vmalloc.h> 19 19 #include <linux/log2.h> 20 20 #include <linux/dm-kcopyd.h> 21 - #include <linux/semaphore.h> 22 21 23 22 #include "dm.h" 24 23 ··· 106 107 /* The on disk metadata handler */ 107 108 struct dm_exception_store *store; 108 109 109 - /* Maximum number of in-flight COW jobs. */ 110 - struct semaphore cow_count; 110 + unsigned in_progress; 111 + struct wait_queue_head in_progress_wait; 111 112 112 113 struct dm_kcopyd_client *kcopyd_client; 113 114 ··· 161 162 */ 162 163 #define DEFAULT_COW_THRESHOLD 2048 163 164 164 - static int cow_threshold = DEFAULT_COW_THRESHOLD; 165 - module_param_named(snapshot_cow_threshold, cow_threshold, int, 0644); 165 + static unsigned cow_threshold = DEFAULT_COW_THRESHOLD; 166 + module_param_named(snapshot_cow_threshold, cow_threshold, uint, 0644); 166 167 MODULE_PARM_DESC(snapshot_cow_threshold, "Maximum number of chunks being copied on write"); 167 168 168 169 DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle, ··· 1326 1327 goto bad_hash_tables; 1327 1328 } 1328 1329 1329 - sema_init(&s->cow_count, (cow_threshold > 0) ? cow_threshold : INT_MAX); 1330 + init_waitqueue_head(&s->in_progress_wait); 1330 1331 1331 1332 s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle); 1332 1333 if (IS_ERR(s->kcopyd_client)) { ··· 1508 1509 1509 1510 dm_put_device(ti, s->origin); 1510 1511 1512 + WARN_ON(s->in_progress); 1513 + 1511 1514 kfree(s); 1515 + } 1516 + 1517 + static void account_start_copy(struct dm_snapshot *s) 1518 + { 1519 + spin_lock(&s->in_progress_wait.lock); 1520 + s->in_progress++; 1521 + spin_unlock(&s->in_progress_wait.lock); 1522 + } 1523 + 1524 + static void account_end_copy(struct dm_snapshot *s) 1525 + { 1526 + spin_lock(&s->in_progress_wait.lock); 1527 + BUG_ON(!s->in_progress); 1528 + s->in_progress--; 1529 + if (likely(s->in_progress <= cow_threshold) && 1530 + unlikely(waitqueue_active(&s->in_progress_wait))) 1531 + wake_up_locked(&s->in_progress_wait); 1532 + spin_unlock(&s->in_progress_wait.lock); 1533 + } 1534 + 1535 + static bool wait_for_in_progress(struct dm_snapshot *s, bool unlock_origins) 1536 + { 1537 + if (unlikely(s->in_progress > cow_threshold)) { 1538 + spin_lock(&s->in_progress_wait.lock); 1539 + if (likely(s->in_progress > cow_threshold)) { 1540 + /* 1541 + * NOTE: this throttle doesn't account for whether 1542 + * the caller is servicing an IO that will trigger a COW 1543 + * so excess throttling may result for chunks not required 1544 + * to be COW'd. But if cow_threshold was reached, extra 1545 + * throttling is unlikely to negatively impact performance. 1546 + */ 1547 + DECLARE_WAITQUEUE(wait, current); 1548 + __add_wait_queue(&s->in_progress_wait, &wait); 1549 + __set_current_state(TASK_UNINTERRUPTIBLE); 1550 + spin_unlock(&s->in_progress_wait.lock); 1551 + if (unlock_origins) 1552 + up_read(&_origins_lock); 1553 + io_schedule(); 1554 + remove_wait_queue(&s->in_progress_wait, &wait); 1555 + return false; 1556 + } 1557 + spin_unlock(&s->in_progress_wait.lock); 1558 + } 1559 + return true; 1512 1560 } 1513 1561 1514 1562 /* ··· 1573 1527 } 1574 1528 } 1575 1529 1576 - static int do_origin(struct dm_dev *origin, struct bio *bio); 1530 + static int do_origin(struct dm_dev *origin, struct bio *bio, bool limit); 1577 1531 1578 1532 /* 1579 1533 * Flush a list of buffers. ··· 1586 1540 while (bio) { 1587 1541 n = bio->bi_next; 1588 1542 bio->bi_next = NULL; 1589 - r = do_origin(s->origin, bio); 1543 + r = do_origin(s->origin, bio, false); 1590 1544 if (r == DM_MAPIO_REMAPPED) 1591 1545 generic_make_request(bio); 1592 1546 bio = n; ··· 1778 1732 rb_link_node(&pe->out_of_order_node, parent, p); 1779 1733 rb_insert_color(&pe->out_of_order_node, &s->out_of_order_tree); 1780 1734 } 1781 - up(&s->cow_count); 1735 + account_end_copy(s); 1782 1736 } 1783 1737 1784 1738 /* ··· 1802 1756 dest.count = src.count; 1803 1757 1804 1758 /* Hand over to kcopyd */ 1805 - down(&s->cow_count); 1759 + account_start_copy(s); 1806 1760 dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe); 1807 1761 } 1808 1762 ··· 1822 1776 pe->full_bio = bio; 1823 1777 pe->full_bio_end_io = bio->bi_end_io; 1824 1778 1825 - down(&s->cow_count); 1779 + account_start_copy(s); 1826 1780 callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client, 1827 1781 copy_callback, pe); 1828 1782 ··· 1912 1866 struct bio *bio = context; 1913 1867 struct dm_snapshot *s = bio->bi_private; 1914 1868 1915 - up(&s->cow_count); 1869 + account_end_copy(s); 1916 1870 bio->bi_status = write_err ? BLK_STS_IOERR : 0; 1917 1871 bio_endio(bio); 1918 1872 } ··· 1926 1880 dest.sector = bio->bi_iter.bi_sector; 1927 1881 dest.count = s->store->chunk_size; 1928 1882 1929 - down(&s->cow_count); 1883 + account_start_copy(s); 1930 1884 WARN_ON_ONCE(bio->bi_private); 1931 1885 bio->bi_private = s; 1932 1886 dm_kcopyd_zero(s->kcopyd_client, 1, &dest, 0, zero_callback, bio); ··· 1961 1915 /* To get here the table must be live so s->active is always set. */ 1962 1916 if (!s->valid) 1963 1917 return DM_MAPIO_KILL; 1918 + 1919 + if (bio_data_dir(bio) == WRITE) { 1920 + while (unlikely(!wait_for_in_progress(s, false))) 1921 + ; /* wait_for_in_progress() has slept */ 1922 + } 1964 1923 1965 1924 down_read(&s->lock); 1966 1925 dm_exception_table_lock(&lock); ··· 2163 2112 2164 2113 if (bio_data_dir(bio) == WRITE) { 2165 2114 up_write(&s->lock); 2166 - return do_origin(s->origin, bio); 2115 + return do_origin(s->origin, bio, false); 2167 2116 } 2168 2117 2169 2118 out_unlock: ··· 2538 2487 /* 2539 2488 * Called on a write from the origin driver. 2540 2489 */ 2541 - static int do_origin(struct dm_dev *origin, struct bio *bio) 2490 + static int do_origin(struct dm_dev *origin, struct bio *bio, bool limit) 2542 2491 { 2543 2492 struct origin *o; 2544 2493 int r = DM_MAPIO_REMAPPED; 2545 2494 2495 + again: 2546 2496 down_read(&_origins_lock); 2547 2497 o = __lookup_origin(origin->bdev); 2548 - if (o) 2498 + if (o) { 2499 + if (limit) { 2500 + struct dm_snapshot *s; 2501 + list_for_each_entry(s, &o->snapshots, list) 2502 + if (unlikely(!wait_for_in_progress(s, true))) 2503 + goto again; 2504 + } 2505 + 2549 2506 r = __origin_write(&o->snapshots, bio->bi_iter.bi_sector, bio); 2507 + } 2550 2508 up_read(&_origins_lock); 2551 2509 2552 2510 return r; ··· 2668 2608 dm_accept_partial_bio(bio, available_sectors); 2669 2609 2670 2610 /* Only tell snapshots if this is a write */ 2671 - return do_origin(o->dev, bio); 2611 + return do_origin(o->dev, bio, true); 2672 2612 } 2673 2613 2674 2614 /*