Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

arm64/mm: Reject memory removal that splits a kernel leaf mapping

Linear and vmemmap mappings that get torn down during a memory hot remove
operation might contain leaf level entries on any page table level. If the
requested memory range's linear or vmemmap mappings falls within such leaf
entries, new mappings need to be created for the remaining memory mapped on
the leaf entry earlier, following standard break before make aka BBM rules.
But kernel cannot tolerate BBM and hence remapping to fine grained leaves
would not be possible on systems without BBML2_NOABORT.

Currently memory hot remove operation does not perform such restructuring,
and so removing memory ranges that could split a kernel leaf level mapping
need to be rejected.

While memory_hotplug.c does appear to permit hot removing arbitrary ranges
of memory, the higher layers that drive memory_hotplug (e.g. ACPI, virtio,
...) all appear to treat memory as fixed size devices. So it is impossible
to hot unplug a different amount than was previously hot plugged, and hence
we should never see a rejection in practice, but adding the check makes us
robust against a future change.

Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Link: https://lore.kernel.org/all/aWZYXhrT6D2M-7-N@willie-the-truck/
Reviewed-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
Suggested-by: Ryan Roberts <ryan.roberts@arm.com>
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

authored by

Anshuman Khandual and committed by
Catalin Marinas
95a58852 48478b9f

+114 -6
+114 -6
arch/arm64/mm/mmu.c
··· 2014 2014 __remove_pgd_mapping(swapper_pg_dir, __phys_to_virt(start), size); 2015 2015 } 2016 2016 2017 + 2018 + static bool addr_splits_kernel_leaf(unsigned long addr) 2019 + { 2020 + pgd_t *pgdp, pgd; 2021 + p4d_t *p4dp, p4d; 2022 + pud_t *pudp, pud; 2023 + pmd_t *pmdp, pmd; 2024 + pte_t *ptep, pte; 2025 + 2026 + /* 2027 + * If the given address points at a the start address of 2028 + * a possible leaf, we certainly won't split. Otherwise, 2029 + * check if we would actually split a leaf by traversing 2030 + * the page tables further. 2031 + */ 2032 + if (IS_ALIGNED(addr, PGDIR_SIZE)) 2033 + return false; 2034 + 2035 + pgdp = pgd_offset_k(addr); 2036 + pgd = pgdp_get(pgdp); 2037 + if (!pgd_present(pgd)) 2038 + return false; 2039 + 2040 + if (IS_ALIGNED(addr, P4D_SIZE)) 2041 + return false; 2042 + 2043 + p4dp = p4d_offset(pgdp, addr); 2044 + p4d = p4dp_get(p4dp); 2045 + if (!p4d_present(p4d)) 2046 + return false; 2047 + 2048 + if (IS_ALIGNED(addr, PUD_SIZE)) 2049 + return false; 2050 + 2051 + pudp = pud_offset(p4dp, addr); 2052 + pud = pudp_get(pudp); 2053 + if (!pud_present(pud)) 2054 + return false; 2055 + 2056 + if (pud_leaf(pud)) 2057 + return true; 2058 + 2059 + if (IS_ALIGNED(addr, CONT_PMD_SIZE)) 2060 + return false; 2061 + 2062 + pmdp = pmd_offset(pudp, addr); 2063 + pmd = pmdp_get(pmdp); 2064 + if (!pmd_present(pmd)) 2065 + return false; 2066 + 2067 + if (pmd_cont(pmd)) 2068 + return true; 2069 + 2070 + if (IS_ALIGNED(addr, PMD_SIZE)) 2071 + return false; 2072 + 2073 + if (pmd_leaf(pmd)) 2074 + return true; 2075 + 2076 + if (IS_ALIGNED(addr, CONT_PTE_SIZE)) 2077 + return false; 2078 + 2079 + ptep = pte_offset_kernel(pmdp, addr); 2080 + pte = __ptep_get(ptep); 2081 + if (!pte_present(pte)) 2082 + return false; 2083 + 2084 + if (pte_cont(pte)) 2085 + return true; 2086 + 2087 + return !IS_ALIGNED(addr, PAGE_SIZE); 2088 + } 2089 + 2090 + static bool can_unmap_without_split(unsigned long pfn, unsigned long nr_pages) 2091 + { 2092 + unsigned long phys_start, phys_end, start, end; 2093 + 2094 + phys_start = PFN_PHYS(pfn); 2095 + phys_end = phys_start + nr_pages * PAGE_SIZE; 2096 + 2097 + /* PFN range's linear map edges are leaf entry aligned */ 2098 + start = __phys_to_virt(phys_start); 2099 + end = __phys_to_virt(phys_end); 2100 + if (addr_splits_kernel_leaf(start) || addr_splits_kernel_leaf(end)) { 2101 + pr_warn("[%lx %lx] splits a leaf entry in linear map\n", 2102 + phys_start, phys_end); 2103 + return false; 2104 + } 2105 + 2106 + /* PFN range's vmemmap edges are leaf entry aligned */ 2107 + BUILD_BUG_ON(!IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP)); 2108 + start = (unsigned long)pfn_to_page(pfn); 2109 + end = (unsigned long)pfn_to_page(pfn + nr_pages); 2110 + if (addr_splits_kernel_leaf(start) || addr_splits_kernel_leaf(end)) { 2111 + pr_warn("[%lx %lx] splits a leaf entry in vmemmap\n", 2112 + phys_start, phys_end); 2113 + return false; 2114 + } 2115 + return true; 2116 + } 2117 + 2017 2118 /* 2018 2119 * This memory hotplug notifier helps prevent boot memory from being 2019 2120 * inadvertently removed as it blocks pfn range offlining process in ··· 2123 2022 * In future if and when boot memory could be removed, this notifier 2124 2023 * should be dropped and free_hotplug_page_range() should handle any 2125 2024 * reserved pages allocated during boot. 2025 + * 2026 + * This also blocks any memory remove that would have caused a split 2027 + * in leaf entry in kernel linear or vmemmap mapping. 2126 2028 */ 2127 - static int prevent_bootmem_remove_notifier(struct notifier_block *nb, 2029 + static int prevent_memory_remove_notifier(struct notifier_block *nb, 2128 2030 unsigned long action, void *data) 2129 2031 { 2130 2032 struct mem_section *ms; ··· 2173 2069 return NOTIFY_DONE; 2174 2070 } 2175 2071 } 2072 + 2073 + if (!can_unmap_without_split(pfn, arg->nr_pages)) 2074 + return NOTIFY_BAD; 2075 + 2176 2076 return NOTIFY_OK; 2177 2077 } 2178 2078 2179 - static struct notifier_block prevent_bootmem_remove_nb = { 2180 - .notifier_call = prevent_bootmem_remove_notifier, 2079 + static struct notifier_block prevent_memory_remove_nb = { 2080 + .notifier_call = prevent_memory_remove_notifier, 2181 2081 }; 2182 2082 2183 2083 /* ··· 2231 2123 } 2232 2124 } 2233 2125 2234 - static int __init prevent_bootmem_remove_init(void) 2126 + static int __init prevent_memory_remove_init(void) 2235 2127 { 2236 2128 int ret = 0; 2237 2129 ··· 2239 2131 return ret; 2240 2132 2241 2133 validate_bootmem_online(); 2242 - ret = register_memory_notifier(&prevent_bootmem_remove_nb); 2134 + ret = register_memory_notifier(&prevent_memory_remove_nb); 2243 2135 if (ret) 2244 2136 pr_err("%s: Notifier registration failed %d\n", __func__, ret); 2245 2137 2246 2138 return ret; 2247 2139 } 2248 - early_initcall(prevent_bootmem_remove_init); 2140 + early_initcall(prevent_memory_remove_init); 2249 2141 #endif 2250 2142 2251 2143 pte_t modify_prot_start_ptes(struct vm_area_struct *vma, unsigned long addr,