Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

ocfs2/heartbeat: fix slot mapping rollback leaks on error paths

o2hb_map_slot_data() allocates hr_tmp_block, hr_slots, hr_slot_data, and
pages in stages. If a later allocation fails, the current code returns
without unwinding the earlier allocations.

o2hb_region_dev_store() also leaves slot mapping resources behind when
setup aborts, and it keeps hr_aborted_start/hr_node_deleted set across
retries. That leaves stale state behind after a failed start.

Factor the slot cleanup into o2hb_unmap_slot_data(), use it from both
o2hb_map_slot_data() and o2hb_region_release(), and call it from the
dev_store() rollback after stopping a started heartbeat thread. While
freeing pages, clear each hr_slot_data entry as it is released, and reset
the start state before each new setup attempt.

This closes the slot mapping leak on allocation/setup failure paths and
keeps failed setup attempts retryable.

Link: https://lkml.kernel.org/r/20260330153428.19586-1-yufan.chen@linux.dev
Signed-off-by: Yufan Chen <ericterminal@gmail.com>
Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Changwei Ge <gechangwei@live.cn>
Cc: Jun Piao <piaojun@huawei.com>
Cc: Heming Zhao <heming.zhao@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Yufan Chen and committed by
Andrew Morton
56864594 e3a84be1

+56 -27
+56 -27
fs/ocfs2/cluster/heartbeat.c
··· 1488 1488 return item ? container_of(item, struct o2hb_region, hr_item) : NULL; 1489 1489 } 1490 1490 1491 - /* drop_item only drops its ref after killing the thread, nothing should 1492 - * be using the region anymore. this has to clean up any state that 1493 - * attributes might have built up. */ 1494 - static void o2hb_region_release(struct config_item *item) 1491 + static void o2hb_unmap_slot_data(struct o2hb_region *reg) 1495 1492 { 1496 1493 int i; 1497 1494 struct page *page; 1498 - struct o2hb_region *reg = to_o2hb_region(item); 1499 - 1500 - mlog(ML_HEARTBEAT, "hb region release (%pg)\n", reg_bdev(reg)); 1501 - 1502 - kfree(reg->hr_tmp_block); 1503 1495 1504 1496 if (reg->hr_slot_data) { 1505 1497 for (i = 0; i < reg->hr_num_pages; i++) { 1506 1498 page = reg->hr_slot_data[i]; 1507 - if (page) 1499 + if (page) { 1508 1500 __free_page(page); 1501 + reg->hr_slot_data[i] = NULL; 1502 + } 1509 1503 } 1510 1504 kfree(reg->hr_slot_data); 1505 + reg->hr_slot_data = NULL; 1511 1506 } 1507 + 1508 + kfree(reg->hr_slots); 1509 + reg->hr_slots = NULL; 1510 + 1511 + kfree(reg->hr_tmp_block); 1512 + reg->hr_tmp_block = NULL; 1513 + } 1514 + 1515 + /* drop_item only drops its ref after killing the thread, nothing should 1516 + * be using the region anymore. this has to clean up any state that 1517 + * attributes might have built up. 1518 + */ 1519 + static void o2hb_region_release(struct config_item *item) 1520 + { 1521 + struct o2hb_region *reg = to_o2hb_region(item); 1522 + 1523 + mlog(ML_HEARTBEAT, "hb region release (%pg)\n", reg_bdev(reg)); 1524 + 1525 + o2hb_unmap_slot_data(reg); 1512 1526 1513 1527 if (reg->hr_bdev_file) 1514 1528 fput(reg->hr_bdev_file); 1515 - 1516 - kfree(reg->hr_slots); 1517 1529 1518 1530 debugfs_remove_recursive(reg->hr_debug_dir); 1519 1531 kfree(reg->hr_db_livenodes); ··· 1679 1667 static int o2hb_map_slot_data(struct o2hb_region *reg) 1680 1668 { 1681 1669 int i, j; 1670 + int ret = -ENOMEM; 1682 1671 unsigned int last_slot; 1683 1672 unsigned int spp = reg->hr_slots_per_page; 1684 1673 struct page *page; ··· 1687 1674 struct o2hb_disk_slot *slot; 1688 1675 1689 1676 reg->hr_tmp_block = kmalloc(reg->hr_block_bytes, GFP_KERNEL); 1690 - if (reg->hr_tmp_block == NULL) 1691 - return -ENOMEM; 1677 + if (!reg->hr_tmp_block) 1678 + goto out; 1692 1679 1693 1680 reg->hr_slots = kzalloc_objs(struct o2hb_disk_slot, reg->hr_blocks); 1694 - if (reg->hr_slots == NULL) 1695 - return -ENOMEM; 1681 + if (!reg->hr_slots) 1682 + goto out; 1696 1683 1697 - for(i = 0; i < reg->hr_blocks; i++) { 1684 + for (i = 0; i < reg->hr_blocks; i++) { 1698 1685 slot = &reg->hr_slots[i]; 1699 1686 slot->ds_node_num = i; 1700 1687 INIT_LIST_HEAD(&slot->ds_live_item); ··· 1708 1695 1709 1696 reg->hr_slot_data = kzalloc_objs(struct page *, reg->hr_num_pages); 1710 1697 if (!reg->hr_slot_data) 1711 - return -ENOMEM; 1698 + goto out; 1712 1699 1713 - for(i = 0; i < reg->hr_num_pages; i++) { 1700 + for (i = 0; i < reg->hr_num_pages; i++) { 1714 1701 page = alloc_page(GFP_KERNEL); 1715 1702 if (!page) 1716 - return -ENOMEM; 1703 + goto out; 1717 1704 1718 1705 reg->hr_slot_data[i] = page; 1719 1706 ··· 1733 1720 } 1734 1721 1735 1722 return 0; 1723 + 1724 + out: 1725 + o2hb_unmap_slot_data(reg); 1726 + return ret; 1736 1727 } 1737 1728 1738 1729 /* Read in all the slots available and populate the tracking ··· 1826 1809 "blocksize %u incorrect for device, expected %d", 1827 1810 reg->hr_block_bytes, sectsize); 1828 1811 ret = -EINVAL; 1829 - goto out3; 1812 + goto out; 1830 1813 } 1831 1814 1815 + reg->hr_aborted_start = 0; 1816 + reg->hr_node_deleted = 0; 1832 1817 o2hb_init_region_params(reg); 1833 1818 1834 1819 /* Generation of zero is invalid */ ··· 1842 1823 ret = o2hb_map_slot_data(reg); 1843 1824 if (ret) { 1844 1825 mlog_errno(ret); 1845 - goto out3; 1826 + goto out; 1846 1827 } 1847 1828 1848 1829 ret = o2hb_populate_slot_data(reg); 1849 1830 if (ret) { 1850 1831 mlog_errno(ret); 1851 - goto out3; 1832 + goto out; 1852 1833 } 1853 1834 1854 1835 INIT_DELAYED_WORK(&reg->hr_write_timeout_work, o2hb_write_timeout); ··· 1879 1860 if (IS_ERR(hb_task)) { 1880 1861 ret = PTR_ERR(hb_task); 1881 1862 mlog_errno(ret); 1882 - goto out3; 1863 + goto out; 1883 1864 } 1884 1865 1885 1866 spin_lock(&o2hb_live_lock); ··· 1896 1877 1897 1878 if (reg->hr_aborted_start) { 1898 1879 ret = -EIO; 1899 - goto out3; 1880 + goto out; 1900 1881 } 1901 1882 1902 1883 if (reg->hr_node_deleted) { 1903 1884 ret = -EINVAL; 1904 - goto out3; 1885 + goto out; 1905 1886 } 1906 1887 1907 1888 /* Ok, we were woken. Make sure it wasn't by drop_item() */ ··· 1920 1901 printk(KERN_NOTICE "o2hb: Heartbeat started on region %s (%pg)\n", 1921 1902 config_item_name(&reg->hr_item), reg_bdev(reg)); 1922 1903 1923 - out3: 1904 + out: 1924 1905 if (ret < 0) { 1906 + spin_lock(&o2hb_live_lock); 1907 + hb_task = reg->hr_task; 1908 + reg->hr_task = NULL; 1909 + spin_unlock(&o2hb_live_lock); 1910 + 1911 + if (hb_task) 1912 + kthread_stop(hb_task); 1913 + 1914 + o2hb_unmap_slot_data(reg); 1915 + 1925 1916 fput(reg->hr_bdev_file); 1926 1917 reg->hr_bdev_file = NULL; 1927 1918 }