mm/pagewalk: split walk_page_range_novma() into kernel/user parts

+1 -1

arch/loongarch/mm/pageattr.c

··· 118 118 return 0; 119 119 120 120 mmap_write_lock(&init_mm); 121 - ret = walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL, &masks); 121 + ret = walk_kernel_page_table_range(start, end, &pageattr_ops, NULL, &masks); 122 122 mmap_write_unlock(&init_mm); 123 123 124 124 flush_tlb_kernel_range(start, end);

+2 -2

arch/openrisc/kernel/dma.c

··· 72 72 * them and setting the cache-inhibit bit. 73 73 */ 74 74 mmap_write_lock(&init_mm); 75 - error = walk_page_range_novma(&init_mm, va, va + size, 75 + error = walk_kernel_page_table_range(va, va + size, 76 76 &set_nocache_walk_ops, NULL, NULL); 77 77 mmap_write_unlock(&init_mm); 78 78 ··· 87 87 88 88 mmap_write_lock(&init_mm); 89 89 /* walk_page_range shouldn't be able to fail here */ 90 - WARN_ON(walk_page_range_novma(&init_mm, va, va + size, 90 + WARN_ON(walk_kernel_page_table_range(va, va + size, 91 91 &clear_nocache_walk_ops, NULL, NULL)); 92 92 mmap_write_unlock(&init_mm); 93 93 }

+4 -4

arch/riscv/mm/pageattr.c

··· 299 299 if (ret) 300 300 goto unlock; 301 301 302 - ret = walk_page_range_novma(&init_mm, lm_start, lm_end, 302 + ret = walk_kernel_page_table_range(lm_start, lm_end, 303 303 &pageattr_ops, NULL, &masks); 304 304 if (ret) 305 305 goto unlock; ··· 317 317 if (ret) 318 318 goto unlock; 319 319 320 - ret = walk_page_range_novma(&init_mm, lm_start, lm_end, 320 + ret = walk_kernel_page_table_range(lm_start, lm_end, 321 321 &pageattr_ops, NULL, &masks); 322 322 if (ret) 323 323 goto unlock; 324 324 } 325 325 326 - ret = walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL, 326 + ret = walk_kernel_page_table_range(start, end, &pageattr_ops, NULL, 327 327 &masks); 328 328 329 329 unlock: ··· 335 335 */ 336 336 flush_tlb_all(); 337 337 #else 338 - ret = walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL, 338 + ret = walk_kernel_page_table_range(start, end, &pageattr_ops, NULL, 339 339 &masks); 340 340 341 341 mmap_write_unlock(&init_mm);

+3 -4

include/linux/pagewalk.h

··· 129 129 int walk_page_range(struct mm_struct *mm, unsigned long start, 130 130 unsigned long end, const struct mm_walk_ops *ops, 131 131 void *private); 132 - int walk_page_range_novma(struct mm_struct *mm, unsigned long start, 133 - unsigned long end, const struct mm_walk_ops *ops, 134 - pgd_t *pgd, 135 - void *private); 132 + int walk_kernel_page_table_range(unsigned long start, 133 + unsigned long end, const struct mm_walk_ops *ops, 134 + pgd_t *pgd, void *private); 136 135 int walk_page_range_vma(struct vm_area_struct *vma, unsigned long start, 137 136 unsigned long end, const struct mm_walk_ops *ops, 138 137 void *private);

+1 -1

mm/hugetlb_vmemmap.c

··· 166 166 VM_BUG_ON(!PAGE_ALIGNED(start | end)); 167 167 168 168 mmap_read_lock(&init_mm); 169 - ret = walk_page_range_novma(&init_mm, start, end, &vmemmap_remap_ops, 169 + ret = walk_kernel_page_table_range(start, end, &vmemmap_remap_ops, 170 170 NULL, walk); 171 171 mmap_read_unlock(&init_mm); 172 172 if (ret)

+3

mm/internal.h

··· 1604 1604 int walk_page_range_mm(struct mm_struct *mm, unsigned long start, 1605 1605 unsigned long end, const struct mm_walk_ops *ops, 1606 1606 void *private); 1607 + int walk_page_range_debug(struct mm_struct *mm, unsigned long start, 1608 + unsigned long end, const struct mm_walk_ops *ops, 1609 + pgd_t *pgd, void *private); 1607 1610 1608 1611 /* pt_reclaim.c */ 1609 1612 bool try_get_and_clear_pmd(struct mm_struct *mm, pmd_t *pmd, pmd_t *pmdval);

+55 -22

mm/pagewalk.c

··· 585 585 } 586 586 587 587 /** 588 - * walk_page_range_novma - walk a range of pagetables not backed by a vma 589 - * @mm: mm_struct representing the target process of page table walk 588 + * walk_kernel_page_table_range - walk a range of kernel pagetables. 590 589 * @start: start address of the virtual address range 591 590 * @end: end address of the virtual address range 592 591 * @ops: operation to call during the walk ··· 595 596 * Similar to walk_page_range() but can walk any page tables even if they are 596 597 * not backed by VMAs. Because 'unusual' entries may be walked this function 597 598 * will also not lock the PTEs for the pte_entry() callback. This is useful for 598 - * walking the kernel pages tables or page tables for firmware. 599 + * walking kernel pages tables or page tables for firmware. 599 600 * 600 601 * Note: Be careful to walk the kernel pages tables, the caller may be need to 601 602 * take other effective approaches (mmap lock may be insufficient) to prevent 602 603 * the intermediate kernel page tables belonging to the specified address range 603 604 * from being freed (e.g. memory hot-remove). 604 605 */ 605 - int walk_page_range_novma(struct mm_struct *mm, unsigned long start, 606 + int walk_kernel_page_table_range(unsigned long start, unsigned long end, 607 + const struct mm_walk_ops *ops, pgd_t *pgd, void *private) 608 + { 609 + struct mm_struct *mm = &init_mm; 610 + struct mm_walk walk = { 611 + .ops = ops, 612 + .mm = mm, 613 + .pgd = pgd, 614 + .private = private, 615 + .no_vma = true 616 + }; 617 + 618 + if (start >= end) 619 + return -EINVAL; 620 + if (!check_ops_valid(ops)) 621 + return -EINVAL; 622 + 623 + /* 624 + * Kernel intermediate page tables are usually not freed, so the mmap 625 + * read lock is sufficient. But there are some exceptions. 626 + * E.g. memory hot-remove. In which case, the mmap lock is insufficient 627 + * to prevent the intermediate kernel pages tables belonging to the 628 + * specified address range from being freed. The caller should take 629 + * other actions to prevent this race. 630 + */ 631 + mmap_assert_locked(mm); 632 + 633 + return walk_pgd_range(start, end, &walk); 634 + } 635 + 636 + /** 637 + * walk_page_range_debug - walk a range of pagetables not backed by a vma 638 + * @mm: mm_struct representing the target process of page table walk 639 + * @start: start address of the virtual address range 640 + * @end: end address of the virtual address range 641 + * @ops: operation to call during the walk 642 + * @pgd: pgd to walk if different from mm->pgd 643 + * @private: private data for callbacks' usage 644 + * 645 + * Similar to walk_page_range() but can walk any page tables even if they are 646 + * not backed by VMAs. Because 'unusual' entries may be walked this function 647 + * will also not lock the PTEs for the pte_entry() callback. 648 + * 649 + * This is for debugging purposes ONLY. 650 + */ 651 + int walk_page_range_debug(struct mm_struct *mm, unsigned long start, 606 652 unsigned long end, const struct mm_walk_ops *ops, 607 - pgd_t *pgd, 608 - void *private) 653 + pgd_t *pgd, void *private) 609 654 { 610 655 struct mm_walk walk = { 611 656 .ops = ops, ··· 659 616 .no_vma = true 660 617 }; 661 618 619 + /* For convenience, we allow traversal of kernel mappings. */ 620 + if (mm == &init_mm) 621 + return walk_kernel_page_table_range(start, end, ops, 622 + pgd, private); 662 623 if (start >= end || !walk.mm) 663 624 return -EINVAL; 664 625 if (!check_ops_valid(ops)) 665 626 return -EINVAL; 666 627 667 628 /* 668 - * 1) For walking the user virtual address space: 669 - * 670 629 * The mmap lock protects the page walker from changes to the page 671 630 * tables during the walk. However a read lock is insufficient to 672 631 * protect those areas which don't have a VMA as munmap() detaches 673 632 * the VMAs before downgrading to a read lock and actually tearing 674 633 * down PTEs/page tables. In which case, the mmap write lock should 675 - * be hold. 676 - * 677 - * 2) For walking the kernel virtual address space: 678 - * 679 - * The kernel intermediate page tables usually do not be freed, so 680 - * the mmap map read lock is sufficient. But there are some exceptions. 681 - * E.g. memory hot-remove. In which case, the mmap lock is insufficient 682 - * to prevent the intermediate kernel pages tables belonging to the 683 - * specified address range from being freed. The caller should take 684 - * other actions to prevent this race. 634 + * be held. 685 635 */ 686 - if (mm == &init_mm) 687 - mmap_assert_locked(walk.mm); 688 - else 689 - mmap_assert_write_locked(walk.mm); 636 + mmap_assert_write_locked(mm); 690 637 691 638 return walk_pgd_range(start, end, &walk); 692 639 }

+2 -1

mm/ptdump.c

··· 4 4 #include <linux/debugfs.h> 5 5 #include <linux/ptdump.h> 6 6 #include <linux/kasan.h> 7 + #include "internal.h" 7 8 8 9 #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) 9 10 /* ··· 178 177 179 178 mmap_write_lock(mm); 180 179 while (range->start != range->end) { 181 - walk_page_range_novma(mm, range->start, range->end, 180 + walk_page_range_debug(mm, range->start, range->end, 182 181 &ptdump_ops, pgd, st); 183 182 range++; 184 183 }

Configure Feed

Configure Feed