Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'x86_mm_for_6.2_v2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 mm updates from Dave Hansen:
"New Feature:

- Randomize the per-cpu entry areas

Cleanups:

- Have CR3_ADDR_MASK use PHYSICAL_PAGE_MASK instead of open coding it

- Move to "native" set_memory_rox() helper

- Clean up pmd_get_atomic() and i386-PAE

- Remove some unused page table size macros"

* tag 'x86_mm_for_6.2_v2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (35 commits)
x86/mm: Ensure forced page table splitting
x86/kasan: Populate shadow for shared chunk of the CPU entry area
x86/kasan: Add helpers to align shadow addresses up and down
x86/kasan: Rename local CPU_ENTRY_AREA variables to shorten names
x86/mm: Populate KASAN shadow for entire per-CPU range of CPU entry area
x86/mm: Recompute physical address for every page of per-CPU CEA mapping
x86/mm: Rename __change_page_attr_set_clr(.checkalias)
x86/mm: Inhibit _PAGE_NX changes from cpa_process_alias()
x86/mm: Untangle __change_page_attr_set_clr(.checkalias)
x86/mm: Add a few comments
x86/mm: Fix CR3_ADDR_MASK
x86/mm: Remove P*D_PAGE_MASK and P*D_PAGE_SIZE macros
mm: Convert __HAVE_ARCH_P..P_GET to the new style
mm: Remove pointless barrier() after pmdp_get_lockless()
x86/mm/pae: Get rid of set_64bit()
x86_64: Remove pointless set_64bit() usage
x86/mm/pae: Be consistent with pXXp_get_and_clear()
x86/mm/pae: Use WRITE_ONCE()
x86/mm/pae: Don't (ab)use atomic64
mm/gup: Fix the lockless PMD access
...

+358 -397
+3 -5
arch/arm/mach-omap1/sram-init.c
··· 10 10 #include <linux/kernel.h> 11 11 #include <linux/init.h> 12 12 #include <linux/io.h> 13 + #include <linux/set_memory.h> 13 14 14 15 #include <asm/fncpy.h> 15 16 #include <asm/tlb.h> 16 17 #include <asm/cacheflush.h> 17 - #include <asm/set_memory.h> 18 18 19 19 #include <asm/mach/map.h> 20 20 ··· 74 74 75 75 dst = fncpy(sram, funcp, size); 76 76 77 - set_memory_ro(base, pages); 78 - set_memory_x(base, pages); 77 + set_memory_rox(base, pages); 79 78 80 79 return dst; 81 80 } ··· 125 126 base = (unsigned long)omap_sram_base; 126 127 pages = PAGE_ALIGN(omap_sram_size) / PAGE_SIZE; 127 128 128 - set_memory_ro(base, pages); 129 - set_memory_x(base, pages); 129 + set_memory_rox(base, pages); 130 130 } 131 131 132 132 static void (*_omap_sram_reprogram_clock)(u32 dpllctl, u32 ckctl);
+3 -5
arch/arm/mach-omap2/sram.c
··· 14 14 #include <linux/kernel.h> 15 15 #include <linux/init.h> 16 16 #include <linux/io.h> 17 + #include <linux/set_memory.h> 17 18 18 19 #include <asm/fncpy.h> 19 20 #include <asm/tlb.h> 20 21 #include <asm/cacheflush.h> 21 - #include <asm/set_memory.h> 22 22 23 23 #include <asm/mach/map.h> 24 24 ··· 96 96 97 97 dst = fncpy(sram, funcp, size); 98 98 99 - set_memory_ro(base, pages); 100 - set_memory_x(base, pages); 99 + set_memory_rox(base, pages); 101 100 102 101 return dst; 103 102 } ··· 216 217 base = (unsigned long)omap_sram_base; 217 218 pages = PAGE_ALIGN(omap_sram_size) / PAGE_SIZE; 218 219 219 - set_memory_ro(base, pages); 220 - set_memory_x(base, pages); 220 + set_memory_rox(base, pages); 221 221 } 222 222 223 223 static void (*_omap2_sram_ddr_init)(u32 *slow_dll_ctrl, u32 fast_dll_ctrl,
+1 -1
arch/mips/Kconfig
··· 46 46 select GENERIC_SCHED_CLOCK if !CAVIUM_OCTEON_SOC 47 47 select GENERIC_SMP_IDLE_THREAD 48 48 select GENERIC_TIME_VSYSCALL 49 - select GUP_GET_PTE_LOW_HIGH if CPU_MIPS32 && PHYS_ADDR_T_64BIT 49 + select GUP_GET_PXX_LOW_HIGH if CPU_MIPS32 && PHYS_ADDR_T_64BIT 50 50 select HAVE_ARCH_COMPILER_H 51 51 select HAVE_ARCH_JUMP_LABEL 52 52 select HAVE_ARCH_KGDB if MIPS_FP_SUPPORT
+1 -1
arch/powerpc/include/asm/nohash/32/pgtable.h
··· 263 263 } 264 264 265 265 #ifdef CONFIG_PPC_16K_PAGES 266 - #define __HAVE_ARCH_PTEP_GET 266 + #define ptep_get ptep_get 267 267 static inline pte_t ptep_get(pte_t *ptep) 268 268 { 269 269 pte_basic_t val = READ_ONCE(ptep->pte);
+4 -5
arch/powerpc/kernel/kprobes.c
··· 20 20 #include <linux/kdebug.h> 21 21 #include <linux/slab.h> 22 22 #include <linux/moduleloader.h> 23 + #include <linux/set_memory.h> 23 24 #include <asm/code-patching.h> 24 25 #include <asm/cacheflush.h> 25 26 #include <asm/sstep.h> 26 27 #include <asm/sections.h> 27 28 #include <asm/inst.h> 28 - #include <asm/set_memory.h> 29 29 #include <linux/uaccess.h> 30 30 31 31 DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; ··· 134 134 if (!page) 135 135 return NULL; 136 136 137 - if (strict_module_rwx_enabled()) { 138 - set_memory_ro((unsigned long)page, 1); 139 - set_memory_x((unsigned long)page, 1); 140 - } 137 + if (strict_module_rwx_enabled()) 138 + set_memory_rox((unsigned long)page, 1); 139 + 141 140 return page; 142 141 } 143 142
+1 -1
arch/sh/Kconfig
··· 24 24 select GENERIC_PCI_IOMAP if PCI 25 25 select GENERIC_SCHED_CLOCK 26 26 select GENERIC_SMP_IDLE_THREAD 27 - select GUP_GET_PTE_LOW_HIGH if X2TLB 27 + select GUP_GET_PXX_LOW_HIGH if X2TLB 28 28 select HAVE_ARCH_AUDITSYSCALL 29 29 select HAVE_ARCH_KGDB 30 30 select HAVE_ARCH_SECCOMP_FILTER
+8 -2
arch/sh/include/asm/pgtable-3level.h
··· 28 28 #define pmd_ERROR(e) \ 29 29 printk("%s:%d: bad pmd %016llx.\n", __FILE__, __LINE__, pmd_val(e)) 30 30 31 - typedef struct { unsigned long long pmd; } pmd_t; 31 + typedef struct { 32 + struct { 33 + unsigned long pmd_low; 34 + unsigned long pmd_high; 35 + }; 36 + unsigned long long pmd; 37 + } pmd_t; 32 38 #define pmd_val(x) ((x).pmd) 33 - #define __pmd(x) ((pmd_t) { (x) } ) 39 + #define __pmd(x) ((pmd_t) { .pmd = (x) } ) 34 40 35 41 static inline pmd_t *pud_pgtable(pud_t pud) 36 42 {
-8
arch/um/include/asm/pgtable-3level.h
··· 58 58 #define pud_populate(mm, pud, pmd) \ 59 59 set_pud(pud, __pud(_PAGE_TABLE + __pa(pmd))) 60 60 61 - #ifdef CONFIG_64BIT 62 - #define set_pud(pudptr, pudval) set_64bit((u64 *) (pudptr), pud_val(pudval)) 63 - #else 64 61 #define set_pud(pudptr, pudval) (*(pudptr) = (pudval)) 65 - #endif 66 62 67 63 static inline int pgd_newpage(pgd_t pgd) 68 64 { ··· 67 71 68 72 static inline void pgd_mkuptodate(pgd_t pgd) { pgd_val(pgd) &= ~_PAGE_NEWPAGE; } 69 73 70 - #ifdef CONFIG_64BIT 71 - #define set_pmd(pmdptr, pmdval) set_64bit((u64 *) (pmdptr), pmd_val(pmdval)) 72 - #else 73 74 #define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval)) 74 - #endif 75 75 76 76 static inline void pud_clear (pud_t *pud) 77 77 {
+1 -1
arch/x86/Kconfig
··· 159 159 select GENERIC_TIME_VSYSCALL 160 160 select GENERIC_GETTIMEOFDAY 161 161 select GENERIC_VDSO_TIME_NS 162 - select GUP_GET_PTE_LOW_HIGH if X86_PAE 162 + select GUP_GET_PXX_LOW_HIGH if X86_PAE 163 163 select HARDIRQS_SW_RESEND 164 164 select HARDLOCKUP_CHECK_TIMESTAMP if X86_64 165 165 select HAVE_ACPI_APEI if ACPI
-28
arch/x86/include/asm/cmpxchg_32.h
··· 7 7 * you need to test for the feature in boot_cpu_data. 8 8 */ 9 9 10 - /* 11 - * CMPXCHG8B only writes to the target if we had the previous 12 - * value in registers, otherwise it acts as a read and gives us the 13 - * "new previous" value. That is why there is a loop. Preloading 14 - * EDX:EAX is a performance optimization: in the common case it means 15 - * we need only one locked operation. 16 - * 17 - * A SIMD/3DNOW!/MMX/FPU 64-bit store here would require at the very 18 - * least an FPU save and/or %cr0.ts manipulation. 19 - * 20 - * cmpxchg8b must be used with the lock prefix here to allow the 21 - * instruction to be executed atomically. We need to have the reader 22 - * side to see the coherent 64bit value. 23 - */ 24 - static inline void set_64bit(volatile u64 *ptr, u64 value) 25 - { 26 - u32 low = value; 27 - u32 high = value >> 32; 28 - u64 prev = *ptr; 29 - 30 - asm volatile("\n1:\t" 31 - LOCK_PREFIX "cmpxchg8b %0\n\t" 32 - "jnz 1b" 33 - : "=m" (*ptr), "+A" (prev) 34 - : "b" (low), "c" (high) 35 - : "memory"); 36 - } 37 - 38 10 #ifdef CONFIG_X86_CMPXCHG64 39 11 #define arch_cmpxchg64(ptr, o, n) \ 40 12 ((__typeof__(*(ptr)))__cmpxchg64((ptr), (unsigned long long)(o), \
-5
arch/x86/include/asm/cmpxchg_64.h
··· 2 2 #ifndef _ASM_X86_CMPXCHG_64_H 3 3 #define _ASM_X86_CMPXCHG_64_H 4 4 5 - static inline void set_64bit(volatile u64 *ptr, u64 val) 6 - { 7 - *ptr = val; 8 - } 9 - 10 5 #define arch_cmpxchg64(ptr, o, n) \ 11 6 ({ \ 12 7 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
-4
arch/x86/include/asm/cpu_entry_area.h
··· 130 130 }; 131 131 132 132 #define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area)) 133 - #define CPU_ENTRY_AREA_ARRAY_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS) 134 - 135 - /* Total size includes the readonly IDT mapping page as well: */ 136 - #define CPU_ENTRY_AREA_TOTAL_SIZE (CPU_ENTRY_AREA_ARRAY_SIZE + PAGE_SIZE) 137 133 138 134 DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); 139 135 DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks);
+3
arch/x86/include/asm/kasan.h
··· 28 28 #ifdef CONFIG_KASAN 29 29 void __init kasan_early_init(void); 30 30 void __init kasan_init(void); 31 + void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid); 31 32 #else 32 33 static inline void kasan_early_init(void) { } 33 34 static inline void kasan_init(void) { } 35 + static inline void kasan_populate_shadow_for_vaddr(void *va, size_t size, 36 + int nid) { } 34 37 #endif 35 38 36 39 #endif
+3 -9
arch/x86/include/asm/page_types.h
··· 11 11 #define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) 12 12 #define PAGE_MASK (~(PAGE_SIZE-1)) 13 13 14 - #define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT) 15 - #define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1)) 16 - 17 - #define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) 18 - #define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) 19 - 20 14 #define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1) 21 15 22 - /* Cast *PAGE_MASK to a signed type so that it is sign-extended if 16 + /* Cast P*D_MASK to a signed type so that it is sign-extended if 23 17 virtual addresses are 32-bits but physical addresses are larger 24 18 (ie, 32-bit PAE). */ 25 19 #define PHYSICAL_PAGE_MASK (((signed long)PAGE_MASK) & __PHYSICAL_MASK) 26 - #define PHYSICAL_PMD_PAGE_MASK (((signed long)PMD_PAGE_MASK) & __PHYSICAL_MASK) 27 - #define PHYSICAL_PUD_PAGE_MASK (((signed long)PUD_PAGE_MASK) & __PHYSICAL_MASK) 20 + #define PHYSICAL_PMD_PAGE_MASK (((signed long)PMD_MASK) & __PHYSICAL_MASK) 21 + #define PHYSICAL_PUD_PAGE_MASK (((signed long)PUD_MASK) & __PHYSICAL_MASK) 28 22 29 23 #define HPAGE_SHIFT PMD_SHIFT 30 24 #define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT)
+36 -139
arch/x86/include/asm/pgtable-3level.h
··· 2 2 #ifndef _ASM_X86_PGTABLE_3LEVEL_H 3 3 #define _ASM_X86_PGTABLE_3LEVEL_H 4 4 5 - #include <asm/atomic64_32.h> 6 - 7 5 /* 8 6 * Intel Physical Address Extension (PAE) Mode - three-level page 9 7 * tables on PPro+ CPUs. ··· 19 21 pr_err("%s:%d: bad pgd %p(%016Lx)\n", \ 20 22 __FILE__, __LINE__, &(e), pgd_val(e)) 21 23 22 - /* Rules for using set_pte: the pte being assigned *must* be 24 + #define pxx_xchg64(_pxx, _ptr, _val) ({ \ 25 + _pxx##val_t *_p = (_pxx##val_t *)_ptr; \ 26 + _pxx##val_t _o = *_p; \ 27 + do { } while (!try_cmpxchg64(_p, &_o, (_val))); \ 28 + native_make_##_pxx(_o); \ 29 + }) 30 + 31 + /* 32 + * Rules for using set_pte: the pte being assigned *must* be 23 33 * either not present or in a state where the hardware will 24 34 * not attempt to update the pte. In places where this is 25 35 * not possible, use pte_get_and_clear to obtain the old pte ··· 35 29 */ 36 30 static inline void native_set_pte(pte_t *ptep, pte_t pte) 37 31 { 38 - ptep->pte_high = pte.pte_high; 32 + WRITE_ONCE(ptep->pte_high, pte.pte_high); 39 33 smp_wmb(); 40 - ptep->pte_low = pte.pte_low; 41 - } 42 - 43 - #define pmd_read_atomic pmd_read_atomic 44 - /* 45 - * pte_offset_map_lock() on 32-bit PAE kernels was reading the pmd_t with 46 - * a "*pmdp" dereference done by GCC. Problem is, in certain places 47 - * where pte_offset_map_lock() is called, concurrent page faults are 48 - * allowed, if the mmap_lock is hold for reading. An example is mincore 49 - * vs page faults vs MADV_DONTNEED. On the page fault side 50 - * pmd_populate() rightfully does a set_64bit(), but if we're reading the 51 - * pmd_t with a "*pmdp" on the mincore side, a SMP race can happen 52 - * because GCC will not read the 64-bit value of the pmd atomically. 53 - * 54 - * To fix this all places running pte_offset_map_lock() while holding the 55 - * mmap_lock in read mode, shall read the pmdp pointer using this 56 - * function to know if the pmd is null or not, and in turn to know if 57 - * they can run pte_offset_map_lock() or pmd_trans_huge() or other pmd 58 - * operations. 59 - * 60 - * Without THP if the mmap_lock is held for reading, the pmd can only 61 - * transition from null to not null while pmd_read_atomic() runs. So 62 - * we can always return atomic pmd values with this function. 63 - * 64 - * With THP if the mmap_lock is held for reading, the pmd can become 65 - * trans_huge or none or point to a pte (and in turn become "stable") 66 - * at any time under pmd_read_atomic(). We could read it truly 67 - * atomically here with an atomic64_read() for the THP enabled case (and 68 - * it would be a whole lot simpler), but to avoid using cmpxchg8b we 69 - * only return an atomic pmdval if the low part of the pmdval is later 70 - * found to be stable (i.e. pointing to a pte). We are also returning a 71 - * 'none' (zero) pmdval if the low part of the pmd is zero. 72 - * 73 - * In some cases the high and low part of the pmdval returned may not be 74 - * consistent if THP is enabled (the low part may point to previously 75 - * mapped hugepage, while the high part may point to a more recently 76 - * mapped hugepage), but pmd_none_or_trans_huge_or_clear_bad() only 77 - * needs the low part of the pmd to be read atomically to decide if the 78 - * pmd is unstable or not, with the only exception when the low part 79 - * of the pmd is zero, in which case we return a 'none' pmd. 80 - */ 81 - static inline pmd_t pmd_read_atomic(pmd_t *pmdp) 82 - { 83 - pmdval_t ret; 84 - u32 *tmp = (u32 *)pmdp; 85 - 86 - ret = (pmdval_t) (*tmp); 87 - if (ret) { 88 - /* 89 - * If the low part is null, we must not read the high part 90 - * or we can end up with a partial pmd. 91 - */ 92 - smp_rmb(); 93 - ret |= ((pmdval_t)*(tmp + 1)) << 32; 94 - } 95 - 96 - return (pmd_t) { ret }; 34 + WRITE_ONCE(ptep->pte_low, pte.pte_low); 97 35 } 98 36 99 37 static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) 100 38 { 101 - set_64bit((unsigned long long *)(ptep), native_pte_val(pte)); 39 + pxx_xchg64(pte, ptep, native_pte_val(pte)); 102 40 } 103 41 104 42 static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) 105 43 { 106 - set_64bit((unsigned long long *)(pmdp), native_pmd_val(pmd)); 44 + pxx_xchg64(pmd, pmdp, native_pmd_val(pmd)); 107 45 } 108 46 109 47 static inline void native_set_pud(pud_t *pudp, pud_t pud) ··· 55 105 #ifdef CONFIG_PAGE_TABLE_ISOLATION 56 106 pud.p4d.pgd = pti_set_user_pgtbl(&pudp->p4d.pgd, pud.p4d.pgd); 57 107 #endif 58 - set_64bit((unsigned long long *)(pudp), native_pud_val(pud)); 108 + pxx_xchg64(pud, pudp, native_pud_val(pud)); 59 109 } 60 110 61 111 /* ··· 66 116 static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, 67 117 pte_t *ptep) 68 118 { 69 - ptep->pte_low = 0; 119 + WRITE_ONCE(ptep->pte_low, 0); 70 120 smp_wmb(); 71 - ptep->pte_high = 0; 121 + WRITE_ONCE(ptep->pte_high, 0); 72 122 } 73 123 74 - static inline void native_pmd_clear(pmd_t *pmd) 124 + static inline void native_pmd_clear(pmd_t *pmdp) 75 125 { 76 - u32 *tmp = (u32 *)pmd; 77 - *tmp = 0; 126 + WRITE_ONCE(pmdp->pmd_low, 0); 78 127 smp_wmb(); 79 - *(tmp + 1) = 0; 128 + WRITE_ONCE(pmdp->pmd_high, 0); 80 129 } 81 130 82 131 static inline void native_pud_clear(pud_t *pudp) ··· 98 149 */ 99 150 } 100 151 152 + 101 153 #ifdef CONFIG_SMP 102 154 static inline pte_t native_ptep_get_and_clear(pte_t *ptep) 103 155 { 104 - pte_t res; 156 + return pxx_xchg64(pte, ptep, 0ULL); 157 + } 105 158 106 - res.pte = (pteval_t)arch_atomic64_xchg((atomic64_t *)ptep, 0); 159 + static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp) 160 + { 161 + return pxx_xchg64(pmd, pmdp, 0ULL); 162 + } 107 163 108 - return res; 164 + static inline pud_t native_pudp_get_and_clear(pud_t *pudp) 165 + { 166 + return pxx_xchg64(pud, pudp, 0ULL); 109 167 } 110 168 #else 111 169 #define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp) 112 - #endif 113 - 114 - union split_pmd { 115 - struct { 116 - u32 pmd_low; 117 - u32 pmd_high; 118 - }; 119 - pmd_t pmd; 120 - }; 121 - 122 - #ifdef CONFIG_SMP 123 - static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp) 124 - { 125 - union split_pmd res, *orig = (union split_pmd *)pmdp; 126 - 127 - /* xchg acts as a barrier before setting of the high bits */ 128 - res.pmd_low = xchg(&orig->pmd_low, 0); 129 - res.pmd_high = orig->pmd_high; 130 - orig->pmd_high = 0; 131 - 132 - return res.pmd; 133 - } 134 - #else 135 170 #define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp) 171 + #define native_pudp_get_and_clear(xp) native_local_pudp_get_and_clear(xp) 136 172 #endif 137 173 138 174 #ifndef pmdp_establish ··· 133 199 * anybody. 134 200 */ 135 201 if (!(pmd_val(pmd) & _PAGE_PRESENT)) { 136 - union split_pmd old, new, *ptr; 137 - 138 - ptr = (union split_pmd *)pmdp; 139 - 140 - new.pmd = pmd; 141 - 142 202 /* xchg acts as a barrier before setting of the high bits */ 143 - old.pmd_low = xchg(&ptr->pmd_low, new.pmd_low); 144 - old.pmd_high = ptr->pmd_high; 145 - ptr->pmd_high = new.pmd_high; 146 - return old.pmd; 203 + old.pmd_low = xchg(&pmdp->pmd_low, pmd.pmd_low); 204 + old.pmd_high = READ_ONCE(pmdp->pmd_high); 205 + WRITE_ONCE(pmdp->pmd_high, pmd.pmd_high); 206 + 207 + return old; 147 208 } 148 209 149 - do { 150 - old = *pmdp; 151 - } while (cmpxchg64(&pmdp->pmd, old.pmd, pmd.pmd) != old.pmd); 152 - 153 - return old; 210 + return pxx_xchg64(pmd, pmdp, pmd.pmd); 154 211 } 155 - #endif 156 - 157 - #ifdef CONFIG_SMP 158 - union split_pud { 159 - struct { 160 - u32 pud_low; 161 - u32 pud_high; 162 - }; 163 - pud_t pud; 164 - }; 165 - 166 - static inline pud_t native_pudp_get_and_clear(pud_t *pudp) 167 - { 168 - union split_pud res, *orig = (union split_pud *)pudp; 169 - 170 - #ifdef CONFIG_PAGE_TABLE_ISOLATION 171 - pti_set_user_pgtbl(&pudp->p4d.pgd, __pgd(0)); 172 - #endif 173 - 174 - /* xchg acts as a barrier before setting of the high bits */ 175 - res.pud_low = xchg(&orig->pud_low, 0); 176 - res.pud_high = orig->pud_high; 177 - orig->pud_high = 0; 178 - 179 - return res.pud; 180 - } 181 - #else 182 - #define native_pudp_get_and_clear(xp) native_local_pudp_get_and_clear(xp) 183 212 #endif 184 213 185 214 /* Encode and de-code a swap entry */
+7
arch/x86/include/asm/pgtable-3level_types.h
··· 18 18 }; 19 19 pteval_t pte; 20 20 } pte_t; 21 + 22 + typedef union { 23 + struct { 24 + unsigned long pmd_low, pmd_high; 25 + }; 26 + pmdval_t pmd; 27 + } pmd_t; 21 28 #endif /* !__ASSEMBLY__ */ 22 29 23 30 #define SHARED_KERNEL_PMD (!static_cpu_has(X86_FEATURE_PTI))
+1
arch/x86/include/asm/pgtable_64_types.h
··· 19 19 typedef unsigned long pgprotval_t; 20 20 21 21 typedef struct { pteval_t pte; } pte_t; 22 + typedef struct { pmdval_t pmd; } pmd_t; 22 23 23 24 #ifdef CONFIG_X86_5LEVEL 24 25 extern unsigned int __pgtable_l5_enabled;
+7 -1
arch/x86/include/asm/pgtable_areas.h
··· 11 11 12 12 #define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT) 13 13 14 - #define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE) 14 + #ifdef CONFIG_X86_32 15 + #define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + \ 16 + (CPU_ENTRY_AREA_SIZE * NR_CPUS) - \ 17 + CPU_ENTRY_AREA_BASE) 18 + #else 19 + #define CPU_ENTRY_AREA_MAP_SIZE P4D_SIZE 20 + #endif 15 21 16 22 #endif /* _ASM_X86_PGTABLE_AREAS_H */
+1 -3
arch/x86/include/asm/pgtable_types.h
··· 361 361 #endif 362 362 363 363 #if CONFIG_PGTABLE_LEVELS > 2 364 - typedef struct { pmdval_t pmd; } pmd_t; 365 - 366 364 static inline pmd_t native_make_pmd(pmdval_t val) 367 365 { 368 - return (pmd_t) { val }; 366 + return (pmd_t) { .pmd = val }; 369 367 } 370 368 371 369 static inline pmdval_t native_pmd_val(pmd_t pmd)
+1 -1
arch/x86/include/asm/processor-flags.h
··· 35 35 */ 36 36 #ifdef CONFIG_X86_64 37 37 /* Mask off the address space ID and SME encryption bits. */ 38 - #define CR3_ADDR_MASK __sme_clr(0x7FFFFFFFFFFFF000ull) 38 + #define CR3_ADDR_MASK __sme_clr(PHYSICAL_PAGE_MASK) 39 39 #define CR3_PCID_MASK 0xFFFull 40 40 #define CR3_NOFLUSH BIT_ULL(63) 41 41
+3
arch/x86/include/asm/set_memory.h
··· 6 6 #include <asm/page.h> 7 7 #include <asm-generic/set_memory.h> 8 8 9 + #define set_memory_rox set_memory_rox 10 + int set_memory_rox(unsigned long addr, int numpages); 11 + 9 12 /* 10 13 * The set_memory_* API can be used to change various attributes of a virtual 11 14 * address range. The attributes include:
-10
arch/x86/kernel/alternative.c
··· 2142 2142 { 2143 2143 struct text_poke_loc *tp; 2144 2144 2145 - if (unlikely(system_state == SYSTEM_BOOTING)) { 2146 - text_poke_early(addr, opcode, len); 2147 - return; 2148 - } 2149 - 2150 2145 text_poke_flush(addr); 2151 2146 2152 2147 tp = &tp_vec[tp_vec_nr++]; ··· 2162 2167 void __ref text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate) 2163 2168 { 2164 2169 struct text_poke_loc tp; 2165 - 2166 - if (unlikely(system_state == SYSTEM_BOOTING)) { 2167 - text_poke_early(addr, opcode, len); 2168 - return; 2169 - } 2170 2170 2171 2171 text_poke_loc_init(&tp, addr, opcode, len, emulate); 2172 2172 text_poke_bp_batch(&tp, 1);
+1 -1
arch/x86/kernel/amd_gart_64.c
··· 504 504 } 505 505 506 506 a = aper + iommu_size; 507 - iommu_size -= round_up(a, PMD_PAGE_SIZE) - a; 507 + iommu_size -= round_up(a, PMD_SIZE) - a; 508 508 509 509 if (iommu_size < 64*1024*1024) { 510 510 pr_warn("PCI-DMA: Warning: Small IOMMU %luMB."
+2 -4
arch/x86/kernel/ftrace.c
··· 24 24 #include <linux/module.h> 25 25 #include <linux/memory.h> 26 26 #include <linux/vmalloc.h> 27 + #include <linux/set_memory.h> 27 28 28 29 #include <trace/syscall.h> 29 30 30 - #include <asm/set_memory.h> 31 31 #include <asm/kprobes.h> 32 32 #include <asm/ftrace.h> 33 33 #include <asm/nops.h> ··· 423 423 /* ALLOC_TRAMP flags lets us know we created it */ 424 424 ops->flags |= FTRACE_OPS_FL_ALLOC_TRAMP; 425 425 426 - if (likely(system_state != SYSTEM_BOOTING)) 427 - set_memory_ro((unsigned long)trampoline, npages); 428 - set_memory_x((unsigned long)trampoline, npages); 426 + set_memory_rox((unsigned long)trampoline, npages); 429 427 return (unsigned long)trampoline; 430 428 fail: 431 429 tramp_free(trampoline);
+1 -1
arch/x86/kernel/head64.c
··· 203 203 load_delta = physaddr - (unsigned long)(_text - __START_KERNEL_map); 204 204 205 205 /* Is the address not 2M aligned? */ 206 - if (load_delta & ~PMD_PAGE_MASK) 206 + if (load_delta & ~PMD_MASK) 207 207 for (;;); 208 208 209 209 /* Include the SME encryption mask in the fixup value */
+1 -1
arch/x86/kernel/hw_breakpoint.c
··· 266 266 267 267 /* CPU entry erea is always used for CPU entry */ 268 268 if (within_area(addr, end, CPU_ENTRY_AREA_BASE, 269 - CPU_ENTRY_AREA_TOTAL_SIZE)) 269 + CPU_ENTRY_AREA_MAP_SIZE)) 270 270 return true; 271 271 272 272 /*
+2 -8
arch/x86/kernel/kprobes/core.c
··· 43 43 #include <linux/objtool.h> 44 44 #include <linux/vmalloc.h> 45 45 #include <linux/pgtable.h> 46 + #include <linux/set_memory.h> 46 47 47 48 #include <asm/text-patching.h> 48 49 #include <asm/cacheflush.h> ··· 52 51 #include <asm/alternative.h> 53 52 #include <asm/insn.h> 54 53 #include <asm/debugreg.h> 55 - #include <asm/set_memory.h> 56 54 #include <asm/ibt.h> 57 55 58 56 #include "common.h" ··· 415 415 return NULL; 416 416 417 417 /* 418 - * First make the page read-only, and only then make it executable to 419 - * prevent it from being W+X in between. 420 - */ 421 - set_memory_ro((unsigned long)page, 1); 422 - 423 - /* 424 418 * TODO: Once additional kernel code protection mechanisms are set, ensure 425 419 * that the page was not maliciously altered and it is still zeroed. 426 420 */ 427 - set_memory_x((unsigned long)page, 1); 421 + set_memory_rox((unsigned long)page, 1); 428 422 429 423 return page; 430 424 }
+46 -4
arch/x86/mm/cpu_entry_area.c
··· 9 9 #include <asm/cpu_entry_area.h> 10 10 #include <asm/fixmap.h> 11 11 #include <asm/desc.h> 12 + #include <asm/kasan.h> 12 13 13 14 static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage); 14 15 15 16 #ifdef CONFIG_X86_64 16 17 static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks); 17 18 DEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks); 18 - #endif 19 19 20 - #ifdef CONFIG_X86_32 20 + static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, _cea_offset); 21 + 22 + static __always_inline unsigned int cea_offset(unsigned int cpu) 23 + { 24 + return per_cpu(_cea_offset, cpu); 25 + } 26 + 27 + static __init void init_cea_offsets(void) 28 + { 29 + unsigned int max_cea; 30 + unsigned int i, j; 31 + 32 + max_cea = (CPU_ENTRY_AREA_MAP_SIZE - PAGE_SIZE) / CPU_ENTRY_AREA_SIZE; 33 + 34 + /* O(sodding terrible) */ 35 + for_each_possible_cpu(i) { 36 + unsigned int cea; 37 + 38 + again: 39 + cea = prandom_u32_max(max_cea); 40 + 41 + for_each_possible_cpu(j) { 42 + if (cea_offset(j) == cea) 43 + goto again; 44 + 45 + if (i == j) 46 + break; 47 + } 48 + 49 + per_cpu(_cea_offset, i) = cea; 50 + } 51 + } 52 + #else /* !X86_64 */ 21 53 DECLARE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack); 54 + 55 + static __always_inline unsigned int cea_offset(unsigned int cpu) 56 + { 57 + return cpu; 58 + } 59 + static inline void init_cea_offsets(void) { } 22 60 #endif 23 61 24 62 /* Is called from entry code, so must be noinstr */ 25 63 noinstr struct cpu_entry_area *get_cpu_entry_area(int cpu) 26 64 { 27 - unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE; 65 + unsigned long va = CPU_ENTRY_AREA_PER_CPU + cea_offset(cpu) * CPU_ENTRY_AREA_SIZE; 28 66 BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0); 29 67 30 68 return (struct cpu_entry_area *) va; ··· 186 148 pgprot_t tss_prot = PAGE_KERNEL; 187 149 #endif 188 150 151 + kasan_populate_shadow_for_vaddr(cea, CPU_ENTRY_AREA_SIZE, 152 + early_cpu_to_node(cpu)); 153 + 189 154 cea_set_pte(&cea->gdt, get_cpu_gdt_paddr(cpu), gdt_prot); 190 155 191 156 cea_map_percpu_pages(&cea->entry_stack_page, ··· 242 201 243 202 /* The +1 is for the readonly IDT: */ 244 203 BUILD_BUG_ON((CPU_ENTRY_AREA_PAGES+1)*PAGE_SIZE != CPU_ENTRY_AREA_MAP_SIZE); 245 - BUILD_BUG_ON(CPU_ENTRY_AREA_TOTAL_SIZE != CPU_ENTRY_AREA_MAP_SIZE); 246 204 BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK); 247 205 248 206 start = CPU_ENTRY_AREA_BASE; ··· 256 216 void __init setup_cpu_entry_areas(void) 257 217 { 258 218 unsigned int cpu; 219 + 220 + init_cea_offsets(); 259 221 260 222 setup_cpu_entry_area_ptes(); 261 223
+1 -1
arch/x86/mm/init.c
··· 801 801 spinlock_t *ptl; 802 802 pte_t *ptep; 803 803 804 - poking_mm = copy_init_mm(); 804 + poking_mm = mm_alloc(); 805 805 BUG_ON(!poking_mm); 806 806 807 807 /*
+38 -15
arch/x86/mm/kasan_init_64.c
··· 316 316 kasan_map_early_shadow(init_top_pgt); 317 317 } 318 318 319 + static unsigned long kasan_mem_to_shadow_align_down(unsigned long va) 320 + { 321 + unsigned long shadow = (unsigned long)kasan_mem_to_shadow((void *)va); 322 + 323 + return round_down(shadow, PAGE_SIZE); 324 + } 325 + 326 + static unsigned long kasan_mem_to_shadow_align_up(unsigned long va) 327 + { 328 + unsigned long shadow = (unsigned long)kasan_mem_to_shadow((void *)va); 329 + 330 + return round_up(shadow, PAGE_SIZE); 331 + } 332 + 333 + void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid) 334 + { 335 + unsigned long shadow_start, shadow_end; 336 + 337 + shadow_start = kasan_mem_to_shadow_align_down((unsigned long)va); 338 + shadow_end = kasan_mem_to_shadow_align_up((unsigned long)va + size); 339 + kasan_populate_shadow(shadow_start, shadow_end, nid); 340 + } 341 + 319 342 void __init kasan_init(void) 320 343 { 344 + unsigned long shadow_cea_begin, shadow_cea_per_cpu_begin, shadow_cea_end; 321 345 int i; 322 - void *shadow_cpu_entry_begin, *shadow_cpu_entry_end; 323 346 324 347 memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt)); 325 348 ··· 383 360 map_range(&pfn_mapped[i]); 384 361 } 385 362 386 - shadow_cpu_entry_begin = (void *)CPU_ENTRY_AREA_BASE; 387 - shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin); 388 - shadow_cpu_entry_begin = (void *)round_down( 389 - (unsigned long)shadow_cpu_entry_begin, PAGE_SIZE); 390 - 391 - shadow_cpu_entry_end = (void *)(CPU_ENTRY_AREA_BASE + 392 - CPU_ENTRY_AREA_MAP_SIZE); 393 - shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end); 394 - shadow_cpu_entry_end = (void *)round_up( 395 - (unsigned long)shadow_cpu_entry_end, PAGE_SIZE); 363 + shadow_cea_begin = kasan_mem_to_shadow_align_down(CPU_ENTRY_AREA_BASE); 364 + shadow_cea_per_cpu_begin = kasan_mem_to_shadow_align_up(CPU_ENTRY_AREA_PER_CPU); 365 + shadow_cea_end = kasan_mem_to_shadow_align_up(CPU_ENTRY_AREA_BASE + 366 + CPU_ENTRY_AREA_MAP_SIZE); 396 367 397 368 kasan_populate_early_shadow( 398 369 kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), ··· 408 391 409 392 kasan_populate_early_shadow( 410 393 kasan_mem_to_shadow((void *)VMALLOC_END + 1), 411 - shadow_cpu_entry_begin); 394 + (void *)shadow_cea_begin); 412 395 413 - kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin, 414 - (unsigned long)shadow_cpu_entry_end, 0); 396 + /* 397 + * Populate the shadow for the shared portion of the CPU entry area. 398 + * Shadows for the per-CPU areas are mapped on-demand, as each CPU's 399 + * area is randomly placed somewhere in the 512GiB range and mapping 400 + * the entire 512GiB range is prohibitively expensive. 401 + */ 402 + kasan_populate_shadow(shadow_cea_begin, 403 + shadow_cea_per_cpu_begin, 0); 415 404 416 - kasan_populate_early_shadow(shadow_cpu_entry_end, 405 + kasan_populate_early_shadow((void *)shadow_cea_end, 417 406 kasan_mem_to_shadow((void *)__START_KERNEL_map)); 418 407 419 408 kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext),
+2 -2
arch/x86/mm/mem_encrypt_boot.S
··· 26 26 * RCX - virtual address of the encryption workarea, including: 27 27 * - stack page (PAGE_SIZE) 28 28 * - encryption routine page (PAGE_SIZE) 29 - * - intermediate copy buffer (PMD_PAGE_SIZE) 29 + * - intermediate copy buffer (PMD_SIZE) 30 30 * R8 - physical address of the pagetables to use for encryption 31 31 */ 32 32 ··· 123 123 wbinvd /* Invalidate any cache entries */ 124 124 125 125 /* Copy/encrypt up to 2MB at a time */ 126 - movq $PMD_PAGE_SIZE, %r12 126 + movq $PMD_SIZE, %r12 127 127 1: 128 128 cmpq %r12, %r9 129 129 jnb 2f
+9 -9
arch/x86/mm/mem_encrypt_identity.c
··· 93 93 * section is 2MB aligned to allow for simple pagetable setup using only 94 94 * PMD entries (see vmlinux.lds.S). 95 95 */ 96 - static char sme_workarea[2 * PMD_PAGE_SIZE] __section(".init.scratch"); 96 + static char sme_workarea[2 * PMD_SIZE] __section(".init.scratch"); 97 97 98 98 static char sme_cmdline_arg[] __initdata = "mem_encrypt"; 99 99 static char sme_cmdline_on[] __initdata = "on"; ··· 198 198 while (ppd->vaddr < ppd->vaddr_end) { 199 199 sme_populate_pgd_large(ppd); 200 200 201 - ppd->vaddr += PMD_PAGE_SIZE; 202 - ppd->paddr += PMD_PAGE_SIZE; 201 + ppd->vaddr += PMD_SIZE; 202 + ppd->paddr += PMD_SIZE; 203 203 } 204 204 } 205 205 ··· 225 225 vaddr_end = ppd->vaddr_end; 226 226 227 227 /* If start is not 2MB aligned, create PTE entries */ 228 - ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_PAGE_SIZE); 228 + ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_SIZE); 229 229 __sme_map_range_pte(ppd); 230 230 231 231 /* Create PMD entries */ 232 - ppd->vaddr_end = vaddr_end & PMD_PAGE_MASK; 232 + ppd->vaddr_end = vaddr_end & PMD_MASK; 233 233 __sme_map_range_pmd(ppd); 234 234 235 235 /* If end is not 2MB aligned, create PTE entries */ ··· 325 325 326 326 /* Physical addresses gives us the identity mapped virtual addresses */ 327 327 kernel_start = __pa_symbol(_text); 328 - kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE); 328 + kernel_end = ALIGN(__pa_symbol(_end), PMD_SIZE); 329 329 kernel_len = kernel_end - kernel_start; 330 330 331 331 initrd_start = 0; ··· 355 355 * executable encryption area size: 356 356 * stack page (PAGE_SIZE) 357 357 * encryption routine page (PAGE_SIZE) 358 - * intermediate copy buffer (PMD_PAGE_SIZE) 358 + * intermediate copy buffer (PMD_SIZE) 359 359 * pagetable structures for the encryption of the kernel 360 360 * pagetable structures for workarea (in case not currently mapped) 361 361 */ 362 362 execute_start = workarea_start; 363 - execute_end = execute_start + (PAGE_SIZE * 2) + PMD_PAGE_SIZE; 363 + execute_end = execute_start + (PAGE_SIZE * 2) + PMD_SIZE; 364 364 execute_len = execute_end - execute_start; 365 365 366 366 /* ··· 383 383 * before it is mapped. 384 384 */ 385 385 workarea_len = execute_len + pgtable_area_len; 386 - workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE); 386 + workarea_end = ALIGN(workarea_start + workarea_len, PMD_SIZE); 387 387 388 388 /* 389 389 * Set the address to the start of where newly created pagetable
+71 -34
arch/x86/mm/pat/set_memory.c
··· 220 220 221 221 #ifdef CONFIG_X86_64 222 222 223 + /* 224 + * The kernel image is mapped into two places in the virtual address space 225 + * (addresses without KASLR, of course): 226 + * 227 + * 1. The kernel direct map (0xffff880000000000) 228 + * 2. The "high kernel map" (0xffffffff81000000) 229 + * 230 + * We actually execute out of #2. If we get the address of a kernel symbol, it 231 + * points to #2, but almost all physical-to-virtual translations point to #1. 232 + * 233 + * This is so that we can have both a directmap of all physical memory *and* 234 + * take full advantage of the the limited (s32) immediate addressing range (2G) 235 + * of x86_64. 236 + * 237 + * See Documentation/x86/x86_64/mm.rst for more detail. 238 + */ 239 + 223 240 static inline unsigned long highmap_start_pfn(void) 224 241 { 225 242 return __pa_symbol(_text) >> PAGE_SHIFT; ··· 622 605 { 623 606 unsigned long end; 624 607 625 - /* Kernel text is rw at boot up */ 626 - if (system_state == SYSTEM_BOOTING) 627 - return new; 628 - 629 608 /* 630 609 * 32-bit has some unfixable W+X issues, like EFI code 631 610 * and writeable data being in the same page. Disable ··· 778 765 switch (level) { 779 766 case PG_LEVEL_1G: 780 767 phys_addr = (phys_addr_t)pud_pfn(*(pud_t *)pte) << PAGE_SHIFT; 781 - offset = virt_addr & ~PUD_PAGE_MASK; 768 + offset = virt_addr & ~PUD_MASK; 782 769 break; 783 770 case PG_LEVEL_2M: 784 771 phys_addr = (phys_addr_t)pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT; 785 - offset = virt_addr & ~PMD_PAGE_MASK; 772 + offset = virt_addr & ~PMD_MASK; 786 773 break; 787 774 default: 788 775 phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; ··· 1072 1059 case PG_LEVEL_1G: 1073 1060 ref_prot = pud_pgprot(*(pud_t *)kpte); 1074 1061 ref_pfn = pud_pfn(*(pud_t *)kpte); 1075 - pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT; 1062 + pfninc = PMD_SIZE >> PAGE_SHIFT; 1076 1063 lpaddr = address & PUD_MASK; 1077 1064 lpinc = PMD_SIZE; 1078 1065 /* ··· 1659 1646 return err; 1660 1647 } 1661 1648 1662 - static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias); 1649 + static int __change_page_attr_set_clr(struct cpa_data *cpa, int primary); 1663 1650 1651 + /* 1652 + * Check the directmap and "high kernel map" 'aliases'. 1653 + */ 1664 1654 static int cpa_process_alias(struct cpa_data *cpa) 1665 1655 { 1666 1656 struct cpa_data alias_cpa; ··· 1687 1671 alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); 1688 1672 alias_cpa.curpage = 0; 1689 1673 1674 + /* Directmap always has NX set, do not modify. */ 1675 + if (__supported_pte_mask & _PAGE_NX) { 1676 + alias_cpa.mask_clr.pgprot &= ~_PAGE_NX; 1677 + alias_cpa.mask_set.pgprot &= ~_PAGE_NX; 1678 + } 1679 + 1690 1680 cpa->force_flush_all = 1; 1691 1681 1692 1682 ret = __change_page_attr_set_clr(&alias_cpa, 0); ··· 1715 1693 alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); 1716 1694 alias_cpa.curpage = 0; 1717 1695 1696 + /* 1697 + * [_text, _brk_end) also covers data, do not modify NX except 1698 + * in cases where the highmap is the primary target. 1699 + */ 1700 + if (__supported_pte_mask & _PAGE_NX) { 1701 + alias_cpa.mask_clr.pgprot &= ~_PAGE_NX; 1702 + alias_cpa.mask_set.pgprot &= ~_PAGE_NX; 1703 + } 1704 + 1718 1705 cpa->force_flush_all = 1; 1719 1706 /* 1720 1707 * The high mapping range is imprecise, so ignore the ··· 1736 1705 return 0; 1737 1706 } 1738 1707 1739 - static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) 1708 + static int __change_page_attr_set_clr(struct cpa_data *cpa, int primary) 1740 1709 { 1741 1710 unsigned long numpages = cpa->numpages; 1742 1711 unsigned long rempages = numpages; 1743 1712 int ret = 0; 1713 + 1714 + /* 1715 + * No changes, easy! 1716 + */ 1717 + if (!(pgprot_val(cpa->mask_set) | pgprot_val(cpa->mask_clr)) && 1718 + !cpa->force_split) 1719 + return ret; 1744 1720 1745 1721 while (rempages) { 1746 1722 /* ··· 1761 1723 1762 1724 if (!debug_pagealloc_enabled()) 1763 1725 spin_lock(&cpa_lock); 1764 - ret = __change_page_attr(cpa, checkalias); 1726 + ret = __change_page_attr(cpa, primary); 1765 1727 if (!debug_pagealloc_enabled()) 1766 1728 spin_unlock(&cpa_lock); 1767 1729 if (ret) 1768 1730 goto out; 1769 1731 1770 - if (checkalias) { 1732 + if (primary && !(cpa->flags & CPA_NO_CHECK_ALIAS)) { 1771 1733 ret = cpa_process_alias(cpa); 1772 1734 if (ret) 1773 1735 goto out; ··· 1795 1757 struct page **pages) 1796 1758 { 1797 1759 struct cpa_data cpa; 1798 - int ret, cache, checkalias; 1760 + int ret, cache; 1799 1761 1800 1762 memset(&cpa, 0, sizeof(cpa)); 1801 1763 ··· 1841 1803 cpa.numpages = numpages; 1842 1804 cpa.mask_set = mask_set; 1843 1805 cpa.mask_clr = mask_clr; 1844 - cpa.flags = 0; 1806 + cpa.flags = in_flag; 1845 1807 cpa.curpage = 0; 1846 1808 cpa.force_split = force_split; 1847 1809 1848 - if (in_flag & (CPA_ARRAY | CPA_PAGES_ARRAY)) 1849 - cpa.flags |= in_flag; 1850 - 1851 - /* No alias checking for _NX bit modifications */ 1852 - checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX; 1853 - /* Has caller explicitly disabled alias checking? */ 1854 - if (in_flag & CPA_NO_CHECK_ALIAS) 1855 - checkalias = 0; 1856 - 1857 - ret = __change_page_attr_set_clr(&cpa, checkalias); 1810 + ret = __change_page_attr_set_clr(&cpa, 1); 1858 1811 1859 1812 /* 1860 1813 * Check whether we really changed something: ··· 2076 2047 return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0); 2077 2048 } 2078 2049 2050 + int set_memory_rox(unsigned long addr, int numpages) 2051 + { 2052 + pgprot_t clr = __pgprot(_PAGE_RW); 2053 + 2054 + if (__supported_pte_mask & _PAGE_NX) 2055 + clr.pgprot |= _PAGE_NX; 2056 + 2057 + return change_page_attr_clear(&addr, numpages, clr, 0); 2058 + } 2059 + 2079 2060 int set_memory_rw(unsigned long addr, int numpages) 2080 2061 { 2081 2062 return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0); ··· 2098 2059 2099 2060 int set_memory_np_noalias(unsigned long addr, int numpages) 2100 2061 { 2101 - int cpa_flags = CPA_NO_CHECK_ALIAS; 2102 - 2103 2062 return change_page_attr_set_clr(&addr, numpages, __pgprot(0), 2104 2063 __pgprot(_PAGE_PRESENT), 0, 2105 - cpa_flags, NULL); 2064 + CPA_NO_CHECK_ALIAS, NULL); 2106 2065 } 2107 2066 2108 2067 int set_memory_4k(unsigned long addr, int numpages) ··· 2317 2280 .numpages = numpages, 2318 2281 .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW), 2319 2282 .mask_clr = __pgprot(0), 2320 - .flags = 0}; 2283 + .flags = CPA_NO_CHECK_ALIAS }; 2321 2284 2322 2285 /* 2323 2286 * No alias checking needed for setting present flag. otherwise, ··· 2325 2288 * mappings (this adds to complexity if we want to do this from 2326 2289 * atomic context especially). Let's keep it simple! 2327 2290 */ 2328 - return __change_page_attr_set_clr(&cpa, 0); 2291 + return __change_page_attr_set_clr(&cpa, 1); 2329 2292 } 2330 2293 2331 2294 static int __set_pages_np(struct page *page, int numpages) ··· 2336 2299 .numpages = numpages, 2337 2300 .mask_set = __pgprot(0), 2338 2301 .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW), 2339 - .flags = 0}; 2302 + .flags = CPA_NO_CHECK_ALIAS }; 2340 2303 2341 2304 /* 2342 2305 * No alias checking needed for setting not present flag. otherwise, ··· 2344 2307 * mappings (this adds to complexity if we want to do this from 2345 2308 * atomic context especially). Let's keep it simple! 2346 2309 */ 2347 - return __change_page_attr_set_clr(&cpa, 0); 2310 + return __change_page_attr_set_clr(&cpa, 1); 2348 2311 } 2349 2312 2350 2313 int set_direct_map_invalid_noflush(struct page *page) ··· 2415 2378 .numpages = numpages, 2416 2379 .mask_set = __pgprot(0), 2417 2380 .mask_clr = __pgprot(~page_flags & (_PAGE_NX|_PAGE_RW)), 2418 - .flags = 0, 2381 + .flags = CPA_NO_CHECK_ALIAS, 2419 2382 }; 2420 2383 2421 2384 WARN_ONCE(num_online_cpus() > 1, "Don't call after initializing SMP"); ··· 2428 2391 2429 2392 cpa.mask_set = __pgprot(_PAGE_PRESENT | page_flags); 2430 2393 2431 - retval = __change_page_attr_set_clr(&cpa, 0); 2394 + retval = __change_page_attr_set_clr(&cpa, 1); 2432 2395 __flush_tlb_all(); 2433 2396 2434 2397 out: ··· 2458 2421 .numpages = numpages, 2459 2422 .mask_set = __pgprot(0), 2460 2423 .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW), 2461 - .flags = 0, 2424 + .flags = CPA_NO_CHECK_ALIAS, 2462 2425 }; 2463 2426 2464 2427 WARN_ONCE(num_online_cpus() > 1, "Don't call after initializing SMP"); 2465 2428 2466 - retval = __change_page_attr_set_clr(&cpa, 0); 2429 + retval = __change_page_attr_set_clr(&cpa, 1); 2467 2430 __flush_tlb_all(); 2468 2431 2469 2432 return retval;
+1 -1
arch/x86/mm/pti.c
··· 592 592 * of the image. 593 593 */ 594 594 unsigned long start = PFN_ALIGN(_text); 595 - unsigned long end = ALIGN((unsigned long)_end, PMD_PAGE_SIZE); 595 + unsigned long end = ALIGN((unsigned long)_end, PMD_SIZE); 596 596 597 597 /* 598 598 * This clears _PAGE_GLOBAL from the entire kernel image.
+5 -8
drivers/iommu/intel/irq_remapping.c
··· 174 174 index = irq_iommu->irte_index + irq_iommu->sub_handle; 175 175 irte = &iommu->ir_table->base[index]; 176 176 177 - #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) 178 177 if ((irte->pst == 1) || (irte_modified->pst == 1)) { 179 178 bool ret; 180 179 ··· 187 188 * same as the old value. 188 189 */ 189 190 WARN_ON(!ret); 190 - } else 191 - #endif 192 - { 193 - set_64bit(&irte->low, irte_modified->low); 194 - set_64bit(&irte->high, irte_modified->high); 191 + } else { 192 + WRITE_ONCE(irte->low, irte_modified->low); 193 + WRITE_ONCE(irte->high, irte_modified->high); 195 194 } 196 195 __iommu_flush_cache(iommu, irte, sizeof(*irte)); 197 196 ··· 247 250 end = start + (1 << irq_iommu->irte_mask); 248 251 249 252 for (entry = start; entry < end; entry++) { 250 - set_64bit(&entry->low, 0); 251 - set_64bit(&entry->high, 0); 253 + WRITE_ONCE(entry->low, 0); 254 + WRITE_ONCE(entry->high, 0); 252 255 } 253 256 bitmap_release_region(iommu->ir_table->bitmap, index, 254 257 irq_iommu->irte_mask);
+2 -5
drivers/misc/sram-exec.c
··· 10 10 #include <linux/genalloc.h> 11 11 #include <linux/mm.h> 12 12 #include <linux/sram.h> 13 + #include <linux/set_memory.h> 13 14 14 15 #include <asm/fncpy.h> 15 - #include <asm/set_memory.h> 16 16 17 17 #include "sram.h" 18 18 ··· 106 106 107 107 dst_cpy = fncpy(dst, src, size); 108 108 109 - ret = set_memory_ro((unsigned long)base, pages); 110 - if (ret) 111 - goto error_out; 112 - ret = set_memory_x((unsigned long)base, pages); 109 + ret = set_memory_rox((unsigned long)base, pages); 113 110 if (ret) 114 111 goto error_out; 115 112
+1 -2
include/linux/filter.h
··· 860 860 static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr) 861 861 { 862 862 set_vm_flush_reset_perms(hdr); 863 - set_memory_ro((unsigned long)hdr, hdr->size >> PAGE_SHIFT); 864 - set_memory_x((unsigned long)hdr, hdr->size >> PAGE_SHIFT); 863 + set_memory_rox((unsigned long)hdr, hdr->size >> PAGE_SHIFT); 865 864 } 866 865 867 866 int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap);
+46 -27
include/linux/pgtable.h
··· 309 309 ptep_get_and_clear(mm, addr, ptep); 310 310 } 311 311 312 - #ifndef __HAVE_ARCH_PTEP_GET 312 + #ifndef ptep_get 313 313 static inline pte_t ptep_get(pte_t *ptep) 314 314 { 315 315 return READ_ONCE(*ptep); 316 316 } 317 317 #endif 318 318 319 - #ifdef CONFIG_GUP_GET_PTE_LOW_HIGH 319 + #ifndef pmdp_get 320 + static inline pmd_t pmdp_get(pmd_t *pmdp) 321 + { 322 + return READ_ONCE(*pmdp); 323 + } 324 + #endif 325 + 326 + #ifdef CONFIG_GUP_GET_PXX_LOW_HIGH 320 327 /* 321 - * WARNING: only to be used in the get_user_pages_fast() implementation. 322 - * 323 - * With get_user_pages_fast(), we walk down the pagetables without taking any 324 - * locks. For this we would like to load the pointers atomically, but sometimes 325 - * that is not possible (e.g. without expensive cmpxchg8b on x86_32 PAE). What 326 - * we do have is the guarantee that a PTE will only either go from not present 327 - * to present, or present to not present or both -- it will not switch to a 328 - * completely different present page without a TLB flush in between; something 329 - * that we are blocking by holding interrupts off. 328 + * For walking the pagetables without holding any locks. Some architectures 329 + * (eg x86-32 PAE) cannot load the entries atomically without using expensive 330 + * instructions. We are guaranteed that a PTE will only either go from not 331 + * present to present, or present to not present -- it will not switch to a 332 + * completely different present page without a TLB flush inbetween; which we 333 + * are blocking by holding interrupts off. 330 334 * 331 335 * Setting ptes from not present to present goes: 332 336 * ··· 365 361 366 362 return pte; 367 363 } 368 - #else /* CONFIG_GUP_GET_PTE_LOW_HIGH */ 364 + #define ptep_get_lockless ptep_get_lockless 365 + 366 + #if CONFIG_PGTABLE_LEVELS > 2 367 + static inline pmd_t pmdp_get_lockless(pmd_t *pmdp) 368 + { 369 + pmd_t pmd; 370 + 371 + do { 372 + pmd.pmd_low = pmdp->pmd_low; 373 + smp_rmb(); 374 + pmd.pmd_high = pmdp->pmd_high; 375 + smp_rmb(); 376 + } while (unlikely(pmd.pmd_low != pmdp->pmd_low)); 377 + 378 + return pmd; 379 + } 380 + #define pmdp_get_lockless pmdp_get_lockless 381 + #endif /* CONFIG_PGTABLE_LEVELS > 2 */ 382 + #endif /* CONFIG_GUP_GET_PXX_LOW_HIGH */ 383 + 369 384 /* 370 385 * We require that the PTE can be read atomically. 371 386 */ 387 + #ifndef ptep_get_lockless 372 388 static inline pte_t ptep_get_lockless(pte_t *ptep) 373 389 { 374 390 return ptep_get(ptep); 375 391 } 376 - #endif /* CONFIG_GUP_GET_PTE_LOW_HIGH */ 392 + #endif 393 + 394 + #ifndef pmdp_get_lockless 395 + static inline pmd_t pmdp_get_lockless(pmd_t *pmdp) 396 + { 397 + return pmdp_get(pmdp); 398 + } 399 + #endif 377 400 378 401 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 379 402 #ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR ··· 1344 1313 #endif 1345 1314 } 1346 1315 1347 - #ifndef pmd_read_atomic 1348 - static inline pmd_t pmd_read_atomic(pmd_t *pmdp) 1349 - { 1350 - /* 1351 - * Depend on compiler for an atomic pmd read. NOTE: this is 1352 - * only going to work, if the pmdval_t isn't larger than 1353 - * an unsigned long. 1354 - */ 1355 - return *pmdp; 1356 - } 1357 - #endif 1358 - 1359 1316 #ifndef arch_needs_pgtable_deposit 1360 1317 #define arch_needs_pgtable_deposit() (false) 1361 1318 #endif ··· 1370 1351 */ 1371 1352 static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd) 1372 1353 { 1373 - pmd_t pmdval = pmd_read_atomic(pmd); 1354 + pmd_t pmdval = pmdp_get_lockless(pmd); 1374 1355 /* 1375 1356 * The barrier will stabilize the pmdval in a register or on 1376 1357 * the stack so that it will stop changing under the code. 1377 1358 * 1378 1359 * When CONFIG_TRANSPARENT_HUGEPAGE=y on x86 32bit PAE, 1379 - * pmd_read_atomic is allowed to return a not atomic pmdval 1360 + * pmdp_get_lockless is allowed to return a not atomic pmdval 1380 1361 * (for example pointing to an hugepage that has never been 1381 1362 * mapped in the pmd). The below checks will only care about 1382 1363 * the low part of the pmd with 32bit PAE x86 anyway, with the
+1 -1
include/linux/sched/task.h
··· 65 65 void __noreturn do_task_dead(void); 66 66 void __noreturn make_task_dead(int signr); 67 67 68 + extern void mm_cache_init(void); 68 69 extern void proc_caches_init(void); 69 70 70 71 extern void fork_init(void); ··· 91 90 extern pid_t kernel_clone(struct kernel_clone_args *kargs); 92 91 struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node); 93 92 struct task_struct *fork_idle(int); 94 - struct mm_struct *copy_init_mm(void); 95 93 extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); 96 94 extern pid_t user_mode_thread(int (*fn)(void *), void *arg, unsigned long flags); 97 95 extern long kernel_wait4(pid_t, int __user *, int, struct rusage *);
+10
include/linux/set_memory.h
··· 14 14 static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; } 15 15 #endif 16 16 17 + #ifndef set_memory_rox 18 + static inline int set_memory_rox(unsigned long addr, int numpages) 19 + { 20 + int ret = set_memory_ro(addr, numpages); 21 + if (ret) 22 + return ret; 23 + return set_memory_x(addr, numpages); 24 + } 25 + #endif 26 + 17 27 #ifndef CONFIG_ARCH_HAS_SET_DIRECT_MAP 18 28 static inline int set_direct_map_invalid_noflush(struct page *page) 19 29 {
+2 -2
init/main.c
··· 863 863 /* Should be run after espfix64 is set up. */ 864 864 pti_init(); 865 865 kmsan_init_runtime(); 866 + mm_cache_init(); 866 867 } 867 868 868 869 #ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET ··· 999 998 sort_main_extable(); 1000 999 trap_init(); 1001 1000 mm_init(); 1002 - 1001 + poking_init(); 1003 1002 ftrace_init(); 1004 1003 1005 1004 /* trace_printk can be enabled here */ ··· 1138 1137 taskstats_init_early(); 1139 1138 delayacct_init(); 1140 1139 1141 - poking_init(); 1142 1140 check_bugs(); 1143 1141 1144 1142 acpi_subsystem_init();
+1 -2
kernel/bpf/bpf_struct_ops.c
··· 494 494 refcount_set(&kvalue->refcnt, 1); 495 495 bpf_map_inc(map); 496 496 497 - set_memory_ro((long)st_map->image, 1); 498 - set_memory_x((long)st_map->image, 1); 497 + set_memory_rox((long)st_map->image, 1); 499 498 err = st_ops->reg(kdata); 500 499 if (likely(!err)) { 501 500 /* Pair with smp_load_acquire() during lookup_elem().
+2 -4
kernel/bpf/core.c
··· 868 868 list_add_tail(&pack->list, &pack_list); 869 869 870 870 set_vm_flush_reset_perms(pack->ptr); 871 - set_memory_ro((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE); 872 - set_memory_x((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE); 871 + set_memory_rox((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE); 873 872 return pack; 874 873 } 875 874 ··· 886 887 if (ptr) { 887 888 bpf_fill_ill_insns(ptr, size); 888 889 set_vm_flush_reset_perms(ptr); 889 - set_memory_ro((unsigned long)ptr, size / PAGE_SIZE); 890 - set_memory_x((unsigned long)ptr, size / PAGE_SIZE); 890 + set_memory_rox((unsigned long)ptr, size / PAGE_SIZE); 891 891 } 892 892 goto out; 893 893 }
+1 -2
kernel/bpf/trampoline.c
··· 468 468 if (err < 0) 469 469 goto out; 470 470 471 - set_memory_ro((long)im->image, 1); 472 - set_memory_x((long)im->image, 1); 471 + set_memory_rox((long)im->image, 1); 473 472 474 473 WARN_ON(tr->cur_image && tr->selector == 0); 475 474 WARN_ON(!tr->cur_image && tr->selector);
+1 -1
kernel/events/core.c
··· 7493 7493 return pud_leaf_size(pud); 7494 7494 7495 7495 pmdp = pmd_offset_lockless(pudp, pud, addr); 7496 - pmd = READ_ONCE(*pmdp); 7496 + pmd = pmdp_get_lockless(pmdp); 7497 7497 if (!pmd_present(pmd)) 7498 7498 return 0; 7499 7499
+18 -19
kernel/fork.c
··· 2607 2607 return task; 2608 2608 } 2609 2609 2610 - struct mm_struct *copy_init_mm(void) 2611 - { 2612 - return dup_mm(NULL, &init_mm); 2613 - } 2614 - 2615 2610 /* 2616 2611 * This is like kernel_clone(), but shaved down and tailored to just 2617 2612 * creating io_uring workers. It returns a created task, or an error pointer. ··· 3025 3030 init_waitqueue_head(&sighand->signalfd_wqh); 3026 3031 } 3027 3032 3028 - void __init proc_caches_init(void) 3033 + void __init mm_cache_init(void) 3029 3034 { 3030 3035 unsigned int mm_size; 3031 3036 3037 + /* 3038 + * The mm_cpumask is located at the end of mm_struct, and is 3039 + * dynamically sized based on the maximum CPU number this system 3040 + * can have, taking hotplug into account (nr_cpu_ids). 3041 + */ 3042 + mm_size = sizeof(struct mm_struct) + cpumask_size(); 3043 + 3044 + mm_cachep = kmem_cache_create_usercopy("mm_struct", 3045 + mm_size, ARCH_MIN_MMSTRUCT_ALIGN, 3046 + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, 3047 + offsetof(struct mm_struct, saved_auxv), 3048 + sizeof_field(struct mm_struct, saved_auxv), 3049 + NULL); 3050 + } 3051 + 3052 + void __init proc_caches_init(void) 3053 + { 3032 3054 sighand_cachep = kmem_cache_create("sighand_cache", 3033 3055 sizeof(struct sighand_struct), 0, 3034 3056 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU| ··· 3063 3051 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, 3064 3052 NULL); 3065 3053 3066 - /* 3067 - * The mm_cpumask is located at the end of mm_struct, and is 3068 - * dynamically sized based on the maximum CPU number this system 3069 - * can have, taking hotplug into account (nr_cpu_ids). 3070 - */ 3071 - mm_size = sizeof(struct mm_struct) + cpumask_size(); 3072 - 3073 - mm_cachep = kmem_cache_create_usercopy("mm_struct", 3074 - mm_size, ARCH_MIN_MMSTRUCT_ALIGN, 3075 - SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, 3076 - offsetof(struct mm_struct, saved_auxv), 3077 - sizeof_field(struct mm_struct, saved_auxv), 3078 - NULL); 3079 3054 vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT); 3080 3055 mmap_init(); 3081 3056 nsproxy_cache_init();
+1 -1
mm/Kconfig
··· 1078 1078 comment "GUP_TEST needs to have DEBUG_FS enabled" 1079 1079 depends on !GUP_TEST && !DEBUG_FS 1080 1080 1081 - config GUP_GET_PTE_LOW_HIGH 1081 + config GUP_GET_PXX_LOW_HIGH 1082 1082 bool 1083 1083 1084 1084 config ARCH_HAS_PTE_SPECIAL
+1 -1
mm/gup.c
··· 2721 2721 2722 2722 pmdp = pmd_offset_lockless(pudp, pud, addr); 2723 2723 do { 2724 - pmd_t pmd = READ_ONCE(*pmdp); 2724 + pmd_t pmd = pmdp_get_lockless(pmdp); 2725 2725 2726 2726 next = pmd_addr_end(addr, end); 2727 2727 if (!pmd_present(pmd))
+1 -2
mm/hmm.c
··· 361 361 * huge or device mapping one and compute corresponding pfn 362 362 * values. 363 363 */ 364 - pmd = pmd_read_atomic(pmdp); 365 - barrier(); 364 + pmd = pmdp_get_lockless(pmdp); 366 365 if (!pmd_devmap(pmd) && !pmd_trans_huge(pmd)) 367 366 goto again; 368 367
+1 -1
mm/khugepaged.c
··· 857 857 if (!*pmd) 858 858 return SCAN_PMD_NULL; 859 859 860 - pmde = pmd_read_atomic(*pmd); 860 + pmde = pmdp_get_lockless(*pmd); 861 861 862 862 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 863 863 /* See comments in pmd_none_or_trans_huge_or_clear_bad() */
+1 -1
mm/mapping_dirty_helpers.c
··· 126 126 static int wp_clean_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long end, 127 127 struct mm_walk *walk) 128 128 { 129 - pmd_t pmdval = pmd_read_atomic(pmd); 129 + pmd_t pmdval = pmdp_get_lockless(pmd); 130 130 131 131 if (!pmd_trans_unstable(&pmdval)) 132 132 return 0;
+1 -1
mm/mprotect.c
··· 297 297 */ 298 298 static inline int pmd_none_or_clear_bad_unless_trans_huge(pmd_t *pmd) 299 299 { 300 - pmd_t pmdval = pmd_read_atomic(pmd); 300 + pmd_t pmdval = pmdp_get_lockless(pmd); 301 301 302 302 /* See pmd_none_or_trans_huge_or_clear_bad for info on barrier */ 303 303 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+1 -1
mm/userfaultfd.c
··· 632 632 break; 633 633 } 634 634 635 - dst_pmdval = pmd_read_atomic(dst_pmd); 635 + dst_pmdval = pmdp_get_lockless(dst_pmd); 636 636 /* 637 637 * If the dst_pmd is mapped as THP don't 638 638 * override it and just be strict.
+1 -4
mm/vmscan.c
··· 4084 4084 /* walk_pte_range() may call get_next_vma() */ 4085 4085 vma = args->vma; 4086 4086 for (i = pmd_index(start), addr = start; addr != end; i++, addr = next) { 4087 - pmd_t val = pmd_read_atomic(pmd + i); 4088 - 4089 - /* for pmd_read_atomic() */ 4090 - barrier(); 4087 + pmd_t val = pmdp_get_lockless(pmd + i); 4091 4088 4092 4089 next = pmd_addr_end(addr, end); 4093 4090
+1 -2
net/bpf/bpf_dummy_struct_ops.c
··· 124 124 if (err < 0) 125 125 goto out; 126 126 127 - set_memory_ro((long)image, 1); 128 - set_memory_x((long)image, 1); 127 + set_memory_rox((long)image, 1); 129 128 prog_ret = dummy_ops_call_op(image, args); 130 129 131 130 err = dummy_ops_copy_args(args);