Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

powerpc/mm: support page table check

On creation and clearing of a page table mapping, instrument such calls by
invoking page_table_check_pte_set and page_table_check_pte_clear
respectively. These calls serve as a sanity check against illegal
mappings.

Enable ARCH_SUPPORTS_PAGE_TABLE_CHECK on powerpc, except when HUGETLB_PAGE
is enabled (powerpc has some weirdness in how it implements
set_huge_pte_at(), which may require some further work).

See also:

riscv support in commit 3fee229a8eb9 ("riscv/mm: enable
ARCH_SUPPORTS_PAGE_TABLE_CHECK")
arm64 in commit 42b2547137f5 ("arm64/mm: enable
ARCH_SUPPORTS_PAGE_TABLE_CHECK")
x86_64 in commit d283d422c6c4 ("x86: mm: add x86_64 support for page table
check")

[ajd@linux.ibm.com: rebase, add additional instrumentation, misc fixes]
Link: https://lkml.kernel.org/r/20251219-pgtable_check_v18rebase-v18-12-755bc151a50b@linux.ibm.com
Signed-off-by: Rohan McLure <rmclure@linux.ibm.com>
Signed-off-by: Andrew Donnellan <ajd@linux.ibm.com>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Acked-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Alexandre Ghiti <alexghiti@rivosinc.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: "Christophe Leroy (CS GROUP)" <chleroy@kernel.org>
Cc: David Hildenbrand <david@kernel.org>
Cc: Donet Tom <donettom@linux.ibm.com>
Cc: Guo Weikang <guoweikang.kernel@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Kevin Brodsky <kevin.brodsky@arm.com>
Cc: Magnus Lindholm <linmag7@gmail.com>
Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Miehlbradt <nicholas@linux.ibm.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Paul Mackerras <paulus@ozlabs.org>
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: "Ritesh Harjani (IBM)" <ritesh.list@gmail.com>
Cc: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
Cc: Thomas Huth <thuth@redhat.com>
Cc: "Vishal Moola (Oracle)" <vishal.moola@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Rohan McLure and committed by
Andrew Morton
641d47d4 2360f523

+73 -18
+1
arch/powerpc/Kconfig
··· 172 172 select ARCH_STACKWALK 173 173 select ARCH_SUPPORTS_ATOMIC_RMW 174 174 select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC_BOOK3S || PPC_8xx 175 + select ARCH_SUPPORTS_PAGE_TABLE_CHECK if !HUGETLB_PAGE 175 176 select ARCH_SUPPORTS_SCHED_MC if SMP 176 177 select ARCH_SUPPORTS_SCHED_SMT if PPC64 && SMP 177 178 select SCHED_MC if ARCH_SUPPORTS_SCHED_MC
+6 -1
arch/powerpc/include/asm/book3s/32/pgtable.h
··· 202 202 #ifndef __ASSEMBLER__ 203 203 #include <linux/sched.h> 204 204 #include <linux/threads.h> 205 + #include <linux/page_table_check.h> 205 206 206 207 /* Bits to mask out from a PGD to get to the PUD page */ 207 208 #define PGD_MASKED_BITS 0 ··· 316 315 static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, 317 316 pte_t *ptep) 318 317 { 319 - return __pte(pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, 0, 0)); 318 + pte_t old_pte = __pte(pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, 0, 0)); 319 + 320 + page_table_check_pte_clear(mm, addr, old_pte); 321 + 322 + return old_pte; 320 323 } 321 324 322 325 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
+35 -10
arch/powerpc/include/asm/book3s/64/pgtable.h
··· 144 144 #define PAGE_KERNEL_ROX __pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX) 145 145 146 146 #ifndef __ASSEMBLER__ 147 + #include <linux/page_table_check.h> 148 + 147 149 /* 148 150 * page table defines 149 151 */ ··· 418 416 static inline pte_t ptep_get_and_clear(struct mm_struct *mm, 419 417 unsigned long addr, pte_t *ptep) 420 418 { 421 - unsigned long old = pte_update(mm, addr, ptep, ~0UL, 0, 0); 422 - return __pte(old); 419 + pte_t old_pte = __pte(pte_update(mm, addr, ptep, ~0UL, 0, 0)); 420 + 421 + page_table_check_pte_clear(mm, addr, old_pte); 422 + 423 + return old_pte; 423 424 } 424 425 425 426 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL ··· 431 426 pte_t *ptep, int full) 432 427 { 433 428 if (full && radix_enabled()) { 429 + pte_t old_pte; 430 + 434 431 /* 435 432 * We know that this is a full mm pte clear and 436 433 * hence can be sure there is no parallel set_pte. 437 434 */ 438 - return radix__ptep_get_and_clear_full(mm, addr, ptep, full); 435 + old_pte = radix__ptep_get_and_clear_full(mm, addr, ptep, full); 436 + page_table_check_pte_clear(mm, addr, old_pte); 437 + 438 + return old_pte; 439 439 } 440 440 return ptep_get_and_clear(mm, addr, ptep); 441 441 } ··· 1311 1301 static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, 1312 1302 unsigned long addr, pmd_t *pmdp) 1313 1303 { 1314 - if (radix_enabled()) 1315 - return radix__pmdp_huge_get_and_clear(mm, addr, pmdp); 1316 - return hash__pmdp_huge_get_and_clear(mm, addr, pmdp); 1304 + pmd_t old_pmd; 1305 + 1306 + if (radix_enabled()) { 1307 + old_pmd = radix__pmdp_huge_get_and_clear(mm, addr, pmdp); 1308 + } else { 1309 + old_pmd = hash__pmdp_huge_get_and_clear(mm, addr, pmdp); 1310 + } 1311 + 1312 + page_table_check_pmd_clear(mm, addr, old_pmd); 1313 + 1314 + return old_pmd; 1317 1315 } 1318 1316 1319 1317 #define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR 1320 1318 static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm, 1321 1319 unsigned long addr, pud_t *pudp) 1322 1320 { 1323 - if (radix_enabled()) 1324 - return radix__pudp_huge_get_and_clear(mm, addr, pudp); 1325 - BUG(); 1326 - return *pudp; 1321 + pud_t old_pud; 1322 + 1323 + if (radix_enabled()) { 1324 + old_pud = radix__pudp_huge_get_and_clear(mm, addr, pudp); 1325 + } else { 1326 + BUG(); 1327 + } 1328 + 1329 + page_table_check_pud_clear(mm, addr, old_pud); 1330 + 1331 + return old_pud; 1327 1332 } 1328 1333 1329 1334 static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
+7 -1
arch/powerpc/include/asm/nohash/pgtable.h
··· 29 29 30 30 #ifndef __ASSEMBLER__ 31 31 32 + #include <linux/page_table_check.h> 33 + 32 34 extern int icache_44x_need_flush; 33 35 34 36 #ifndef pte_huge_size ··· 124 122 static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, 125 123 pte_t *ptep) 126 124 { 127 - return __pte(pte_update(mm, addr, ptep, ~0UL, 0, 0)); 125 + pte_t old_pte = __pte(pte_update(mm, addr, ptep, ~0UL, 0, 0)); 126 + 127 + page_table_check_pte_clear(mm, addr, old_pte); 128 + 129 + return old_pte; 128 130 } 129 131 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR 130 132
+4
arch/powerpc/mm/book3s64/hash_pgtable.c
··· 8 8 #include <linux/sched.h> 9 9 #include <linux/mm_types.h> 10 10 #include <linux/mm.h> 11 + #include <linux/page_table_check.h> 11 12 #include <linux/stop_machine.h> 12 13 13 14 #include <asm/sections.h> ··· 231 230 232 231 pmd = *pmdp; 233 232 pmd_clear(pmdp); 233 + 234 + page_table_check_pmd_clear(vma->vm_mm, address, pmd); 235 + 234 236 /* 235 237 * Wait for all pending hash_page to finish. This is needed 236 238 * in case of subpage collapse. When we collapse normal pages
+13 -6
arch/powerpc/mm/book3s64/pgtable.c
··· 10 10 #include <linux/pkeys.h> 11 11 #include <linux/debugfs.h> 12 12 #include <linux/proc_fs.h> 13 + #include <linux/page_table_check.h> 13 14 14 15 #include <asm/pgalloc.h> 15 16 #include <asm/tlb.h> ··· 128 127 WARN_ON(!(pmd_leaf(pmd))); 129 128 #endif 130 129 trace_hugepage_set_pmd(addr, pmd_val(pmd)); 130 + page_table_check_pmd_set(mm, addr, pmdp, pmd); 131 131 return set_pte_at_unchecked(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd)); 132 132 } 133 133 ··· 146 144 WARN_ON(!(pud_leaf(pud))); 147 145 #endif 148 146 trace_hugepage_set_pud(addr, pud_val(pud)); 147 + page_table_check_pud_set(mm, addr, pudp, pud); 149 148 return set_pte_at_unchecked(mm, addr, pudp_ptep(pudp), pud_pte(pud)); 150 149 } 151 150 ··· 182 179 pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, 183 180 pmd_t *pmdp) 184 181 { 185 - unsigned long old_pmd; 182 + pmd_t old_pmd; 186 183 187 184 VM_WARN_ON_ONCE(!pmd_present(*pmdp)); 188 - old_pmd = pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, _PAGE_INVALID); 185 + old_pmd = __pmd(pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, _PAGE_INVALID)); 189 186 flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); 190 - return __pmd(old_pmd); 187 + page_table_check_pmd_clear(vma->vm_mm, address, old_pmd); 188 + 189 + return old_pmd; 191 190 } 192 191 193 192 pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address, 194 193 pud_t *pudp) 195 194 { 196 - unsigned long old_pud; 195 + pud_t old_pud; 197 196 198 197 VM_WARN_ON_ONCE(!pud_present(*pudp)); 199 - old_pud = pud_hugepage_update(vma->vm_mm, address, pudp, _PAGE_PRESENT, _PAGE_INVALID); 198 + old_pud = __pud(pud_hugepage_update(vma->vm_mm, address, pudp, _PAGE_PRESENT, _PAGE_INVALID)); 200 199 flush_pud_tlb_range(vma, address, address + HPAGE_PUD_SIZE); 201 - return __pud(old_pud); 200 + page_table_check_pud_clear(vma->vm_mm, address, old_pud); 201 + 202 + return old_pud; 202 203 } 203 204 204 205 pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma,
+3
arch/powerpc/mm/book3s64/radix_pgtable.c
··· 14 14 #include <linux/of.h> 15 15 #include <linux/of_fdt.h> 16 16 #include <linux/mm.h> 17 + #include <linux/page_table_check.h> 17 18 #include <linux/hugetlb.h> 18 19 #include <linux/string_helpers.h> 19 20 #include <linux/memory.h> ··· 1474 1473 */ 1475 1474 pmd = *pmdp; 1476 1475 pmd_clear(pmdp); 1476 + 1477 + page_table_check_pmd_clear(vma->vm_mm, address, pmd); 1477 1478 1478 1479 radix__flush_tlb_collapsed_pmd(vma->vm_mm, address); 1479 1480
+4
arch/powerpc/mm/pgtable.c
··· 22 22 #include <linux/mm.h> 23 23 #include <linux/percpu.h> 24 24 #include <linux/hardirq.h> 25 + #include <linux/page_table_check.h> 25 26 #include <linux/hugetlb.h> 26 27 #include <asm/tlbflush.h> 27 28 #include <asm/tlb.h> ··· 207 206 * and not hw_valid ptes. Hence there is no translation cache flush 208 207 * involved that need to be batched. 209 208 */ 209 + 210 + page_table_check_ptes_set(mm, addr, ptep, pte, nr); 211 + 210 212 for (;;) { 211 213 212 214 /*