Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

mm: move pte table reclaim code to memory.c

Some cleanups for PT table reclaim code, triggered by a false-positive
warning we might start to see soon after we unlocked pt-reclaim on
architectures besides x86-64.


This patch (of 2):

The pte-table reclaim code is only called from memory.c, while zapping
pages, and it better also stays that way in the long run. If we ever have
to call it from other files, we should expose proper high-level helpers
for zapping if the existing helpers are not good enough.

So, let's move the code over (it's not a lot) and slightly clean it up a
bit by:
- Renaming the functions.
- Dropping the "Check if it is empty PTE page" comment, which is now
self-explaining given the function name.
- Making zap_pte_table_if_empty() return whether zapping worked so the
caller can free it.
- Adding a comment in pte_table_reclaim_possible().
- Inlining free_pte() in the last remaining user.
- In zap_empty_pte_table(), switch from pmdp_get_lcokless() to
pmd_clear(), we are holding the PMD PT lock.

By moving the code over, compilers can also easily figure out when
zap_empty_pte_table() does not initialize the pmdval variable, avoiding
false-positive warnings about the variable possibly not being initialized.

Link: https://lkml.kernel.org/r/20260119220708.3438514-1-david@kernel.org
Link: https://lkml.kernel.org/r/20260119220708.3438514-2-david@kernel.org
Signed-off-by: David Hildenbrand (Red Hat) <david@kernel.org>
Reviewed-by: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

David Hildenbrand (Red Hat) and committed by
Andrew Morton
4c640eb4 9c8c02df

+62 -98
-1
MAINTAINERS
··· 16696 16696 R: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> 16697 16697 L: linux-mm@kvack.org 16698 16698 S: Maintained 16699 - F: mm/pt_reclaim.c 16700 16699 F: mm/vmscan.c 16701 16700 F: mm/workingset.c 16702 16701
-1
mm/Makefile
··· 146 146 obj-$(CONFIG_SHRINKER_DEBUG) += shrinker_debug.o 147 147 obj-$(CONFIG_EXECMEM) += execmem.o 148 148 obj-$(CONFIG_TMPFS_QUOTA) += shmem_quota.o 149 - obj-$(CONFIG_PT_RECLAIM) += pt_reclaim.o 150 149 obj-$(CONFIG_LAZY_MMU_MODE_KUNIT_TEST) += tests/lazy_mmu_mode_kunit.o
-18
mm/internal.h
··· 1743 1743 unsigned long end, const struct mm_walk_ops *ops, 1744 1744 pgd_t *pgd, void *private); 1745 1745 1746 - /* pt_reclaim.c */ 1747 - bool try_get_and_clear_pmd(struct mm_struct *mm, pmd_t *pmd, pmd_t *pmdval); 1748 - void free_pte(struct mm_struct *mm, unsigned long addr, struct mmu_gather *tlb, 1749 - pmd_t pmdval); 1750 - void try_to_free_pte(struct mm_struct *mm, pmd_t *pmd, unsigned long addr, 1751 - struct mmu_gather *tlb); 1752 - 1753 - #ifdef CONFIG_PT_RECLAIM 1754 - bool reclaim_pt_is_enabled(unsigned long start, unsigned long end, 1755 - struct zap_details *details); 1756 - #else 1757 - static inline bool reclaim_pt_is_enabled(unsigned long start, unsigned long end, 1758 - struct zap_details *details) 1759 - { 1760 - return false; 1761 - } 1762 - #endif /* CONFIG_PT_RECLAIM */ 1763 - 1764 1746 void dup_mm_exe_file(struct mm_struct *mm, struct mm_struct *oldmm); 1765 1747 int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm); 1766 1748
+62 -6
mm/memory.c
··· 1821 1821 return nr; 1822 1822 } 1823 1823 1824 + static bool pte_table_reclaim_possible(unsigned long start, unsigned long end, 1825 + struct zap_details *details) 1826 + { 1827 + if (!IS_ENABLED(CONFIG_PT_RECLAIM)) 1828 + return false; 1829 + /* Only zap if we are allowed to and cover the full page table. */ 1830 + return details && details->reclaim_pt && (end - start >= PMD_SIZE); 1831 + } 1832 + 1833 + static bool zap_empty_pte_table(struct mm_struct *mm, pmd_t *pmd, pmd_t *pmdval) 1834 + { 1835 + spinlock_t *pml = pmd_lockptr(mm, pmd); 1836 + 1837 + if (!spin_trylock(pml)) 1838 + return false; 1839 + 1840 + *pmdval = pmdp_get(pmd); 1841 + pmd_clear(pmd); 1842 + spin_unlock(pml); 1843 + return true; 1844 + } 1845 + 1846 + static bool zap_pte_table_if_empty(struct mm_struct *mm, pmd_t *pmd, 1847 + unsigned long addr, pmd_t *pmdval) 1848 + { 1849 + spinlock_t *pml, *ptl = NULL; 1850 + pte_t *start_pte, *pte; 1851 + int i; 1852 + 1853 + pml = pmd_lock(mm, pmd); 1854 + start_pte = pte_offset_map_rw_nolock(mm, pmd, addr, pmdval, &ptl); 1855 + if (!start_pte) 1856 + goto out_ptl; 1857 + if (ptl != pml) 1858 + spin_lock_nested(ptl, SINGLE_DEPTH_NESTING); 1859 + 1860 + for (i = 0, pte = start_pte; i < PTRS_PER_PTE; i++, pte++) { 1861 + if (!pte_none(ptep_get(pte))) 1862 + goto out_ptl; 1863 + } 1864 + pte_unmap(start_pte); 1865 + 1866 + pmd_clear(pmd); 1867 + 1868 + if (ptl != pml) 1869 + spin_unlock(ptl); 1870 + spin_unlock(pml); 1871 + return true; 1872 + out_ptl: 1873 + if (start_pte) 1874 + pte_unmap_unlock(start_pte, ptl); 1875 + if (ptl != pml) 1876 + spin_unlock(pml); 1877 + return false; 1878 + } 1879 + 1824 1880 static unsigned long zap_pte_range(struct mmu_gather *tlb, 1825 1881 struct vm_area_struct *vma, pmd_t *pmd, 1826 1882 unsigned long addr, unsigned long end, 1827 1883 struct zap_details *details) 1828 1884 { 1885 + bool can_reclaim_pt = pte_table_reclaim_possible(addr, end, details); 1829 1886 bool force_flush = false, force_break = false; 1830 1887 struct mm_struct *mm = tlb->mm; 1831 1888 int rss[NR_MM_COUNTERS]; ··· 1891 1834 pte_t *pte; 1892 1835 pmd_t pmdval; 1893 1836 unsigned long start = addr; 1894 - bool can_reclaim_pt = reclaim_pt_is_enabled(start, end, details); 1895 1837 bool direct_reclaim = true; 1896 1838 int nr; 1897 1839 ··· 1931 1875 * from being repopulated by another thread. 1932 1876 */ 1933 1877 if (can_reclaim_pt && direct_reclaim && addr == end) 1934 - direct_reclaim = try_get_and_clear_pmd(mm, pmd, &pmdval); 1878 + direct_reclaim = zap_empty_pte_table(mm, pmd, &pmdval); 1935 1879 1936 1880 add_mm_rss_vec(mm, rss); 1937 1881 lazy_mmu_mode_disable(); ··· 1960 1904 } 1961 1905 1962 1906 if (can_reclaim_pt) { 1963 - if (direct_reclaim) 1964 - free_pte(mm, start, tlb, pmdval); 1965 - else 1966 - try_to_free_pte(mm, pmd, start, tlb); 1907 + if (direct_reclaim || zap_pte_table_if_empty(mm, pmd, start, &pmdval)) { 1908 + pte_free_tlb(tlb, pmd_pgtable(pmdval), addr); 1909 + mm_dec_nr_ptes(mm); 1910 + } 1967 1911 } 1968 1912 1969 1913 return addr;
-72
mm/pt_reclaim.c
··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - #include <linux/hugetlb.h> 3 - #include <linux/pgalloc.h> 4 - 5 - #include <asm/tlb.h> 6 - 7 - #include "internal.h" 8 - 9 - bool reclaim_pt_is_enabled(unsigned long start, unsigned long end, 10 - struct zap_details *details) 11 - { 12 - return details && details->reclaim_pt && (end - start >= PMD_SIZE); 13 - } 14 - 15 - bool try_get_and_clear_pmd(struct mm_struct *mm, pmd_t *pmd, pmd_t *pmdval) 16 - { 17 - spinlock_t *pml = pmd_lockptr(mm, pmd); 18 - 19 - if (!spin_trylock(pml)) 20 - return false; 21 - 22 - *pmdval = pmdp_get_lockless(pmd); 23 - pmd_clear(pmd); 24 - spin_unlock(pml); 25 - 26 - return true; 27 - } 28 - 29 - void free_pte(struct mm_struct *mm, unsigned long addr, struct mmu_gather *tlb, 30 - pmd_t pmdval) 31 - { 32 - pte_free_tlb(tlb, pmd_pgtable(pmdval), addr); 33 - mm_dec_nr_ptes(mm); 34 - } 35 - 36 - void try_to_free_pte(struct mm_struct *mm, pmd_t *pmd, unsigned long addr, 37 - struct mmu_gather *tlb) 38 - { 39 - pmd_t pmdval; 40 - spinlock_t *pml, *ptl = NULL; 41 - pte_t *start_pte, *pte; 42 - int i; 43 - 44 - pml = pmd_lock(mm, pmd); 45 - start_pte = pte_offset_map_rw_nolock(mm, pmd, addr, &pmdval, &ptl); 46 - if (!start_pte) 47 - goto out_ptl; 48 - if (ptl != pml) 49 - spin_lock_nested(ptl, SINGLE_DEPTH_NESTING); 50 - 51 - /* Check if it is empty PTE page */ 52 - for (i = 0, pte = start_pte; i < PTRS_PER_PTE; i++, pte++) { 53 - if (!pte_none(ptep_get(pte))) 54 - goto out_ptl; 55 - } 56 - pte_unmap(start_pte); 57 - 58 - pmd_clear(pmd); 59 - 60 - if (ptl != pml) 61 - spin_unlock(ptl); 62 - spin_unlock(pml); 63 - 64 - free_pte(mm, addr, tlb, pmdval); 65 - 66 - return; 67 - out_ptl: 68 - if (start_pte) 69 - pte_unmap_unlock(start_pte, ptl); 70 - if (ptl != pml) 71 - spin_unlock(pml); 72 - }