Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

mm/mprotect: special-case small folios when applying permissions

The common order-0 case is important enough to want its own branch, and
avoids the hairy, large loop logic that the CPU does not seem to handle
particularly well.

While at it, encourage the compiler to inline batch PTE logic and resolve
constant branches by adding __always_inline strategically.

Link: https://lore.kernel.org/20260402141628.3367596-3-pfalcato@suse.de
Signed-off-by: Pedro Falcato <pfalcato@suse.de>
Suggested-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Tested-by: Luke Yang <luyang@redhat.com>
Reviewed-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Jann Horn <jannh@google.com>
Cc: Jiri Hladky <jhladky@redhat.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Pedro Falcato and committed by
Andrew Morton
89e613bc 3bc181c1

+57 -34
+57 -34
mm/mprotect.c
··· 117 117 } 118 118 119 119 /* Set nr_ptes number of ptes, starting from idx */ 120 - static void prot_commit_flush_ptes(struct vm_area_struct *vma, unsigned long addr, 121 - pte_t *ptep, pte_t oldpte, pte_t ptent, int nr_ptes, 122 - int idx, bool set_write, struct mmu_gather *tlb) 120 + static __always_inline void prot_commit_flush_ptes(struct vm_area_struct *vma, 121 + unsigned long addr, pte_t *ptep, pte_t oldpte, pte_t ptent, 122 + int nr_ptes, int idx, bool set_write, struct mmu_gather *tlb) 123 123 { 124 124 /* 125 125 * Advance the position in the batch by idx; note that if idx > 0, ··· 143 143 * !PageAnonExclusive() pages, starting from start_idx. Caller must enforce 144 144 * that the ptes point to consecutive pages of the same anon large folio. 145 145 */ 146 - static int page_anon_exclusive_sub_batch(int start_idx, int max_len, 146 + static __always_inline int page_anon_exclusive_sub_batch(int start_idx, int max_len, 147 147 struct page *first_page, bool expected_anon_exclusive) 148 148 { 149 149 int idx; ··· 169 169 * pte of the batch. Therefore, we must individually check all pages and 170 170 * retrieve sub-batches. 171 171 */ 172 - static void commit_anon_folio_batch(struct vm_area_struct *vma, 172 + static __always_inline void commit_anon_folio_batch(struct vm_area_struct *vma, 173 173 struct folio *folio, struct page *first_page, unsigned long addr, pte_t *ptep, 174 174 pte_t oldpte, pte_t ptent, int nr_ptes, struct mmu_gather *tlb) 175 175 { ··· 188 188 } 189 189 } 190 190 191 - static void set_write_prot_commit_flush_ptes(struct vm_area_struct *vma, 191 + static __always_inline void set_write_prot_commit_flush_ptes(struct vm_area_struct *vma, 192 192 struct folio *folio, struct page *page, unsigned long addr, pte_t *ptep, 193 193 pte_t oldpte, pte_t ptent, int nr_ptes, struct mmu_gather *tlb) 194 194 { ··· 277 277 return 0; 278 278 } 279 279 280 + static __always_inline void change_present_ptes(struct mmu_gather *tlb, 281 + struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, 282 + int nr_ptes, unsigned long end, pgprot_t newprot, 283 + struct folio *folio, struct page *page, unsigned long cp_flags) 284 + { 285 + const bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE; 286 + const bool uffd_wp = cp_flags & MM_CP_UFFD_WP; 287 + pte_t ptent, oldpte; 288 + 289 + oldpte = modify_prot_start_ptes(vma, addr, ptep, nr_ptes); 290 + ptent = pte_modify(oldpte, newprot); 291 + 292 + if (uffd_wp) 293 + ptent = pte_mkuffd_wp(ptent); 294 + else if (uffd_wp_resolve) 295 + ptent = pte_clear_uffd_wp(ptent); 296 + 297 + /* 298 + * In some writable, shared mappings, we might want 299 + * to catch actual write access -- see 300 + * vma_wants_writenotify(). 301 + * 302 + * In all writable, private mappings, we have to 303 + * properly handle COW. 304 + * 305 + * In both cases, we can sometimes still change PTEs 306 + * writable and avoid the write-fault handler, for 307 + * example, if a PTE is already dirty and no other 308 + * COW or special handling is required. 309 + */ 310 + if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) && 311 + !pte_write(ptent)) 312 + set_write_prot_commit_flush_ptes(vma, folio, page, 313 + addr, ptep, oldpte, ptent, nr_ptes, tlb); 314 + else 315 + prot_commit_flush_ptes(vma, addr, ptep, oldpte, ptent, 316 + nr_ptes, /* idx = */ 0, /* set_write = */ false, tlb); 317 + } 318 + 280 319 static long change_pte_range(struct mmu_gather *tlb, 281 320 struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, 282 321 unsigned long end, pgprot_t newprot, unsigned long cp_flags) ··· 326 287 bool is_private_single_threaded; 327 288 bool prot_numa = cp_flags & MM_CP_PROT_NUMA; 328 289 bool uffd_wp = cp_flags & MM_CP_UFFD_WP; 329 - bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE; 330 290 int nr_ptes; 331 291 332 292 tlb_change_page_size(tlb, PAGE_SIZE); ··· 346 308 int max_nr_ptes = (end - addr) >> PAGE_SHIFT; 347 309 struct folio *folio = NULL; 348 310 struct page *page; 349 - pte_t ptent; 350 311 351 312 /* Already in the desired state. */ 352 313 if (prot_numa && pte_protnone(oldpte)) ··· 371 334 372 335 nr_ptes = mprotect_folio_pte_batch(folio, pte, oldpte, max_nr_ptes, flags); 373 336 374 - oldpte = modify_prot_start_ptes(vma, addr, pte, nr_ptes); 375 - ptent = pte_modify(oldpte, newprot); 376 - 377 - if (uffd_wp) 378 - ptent = pte_mkuffd_wp(ptent); 379 - else if (uffd_wp_resolve) 380 - ptent = pte_clear_uffd_wp(ptent); 381 - 382 337 /* 383 - * In some writable, shared mappings, we might want 384 - * to catch actual write access -- see 385 - * vma_wants_writenotify(). 386 - * 387 - * In all writable, private mappings, we have to 388 - * properly handle COW. 389 - * 390 - * In both cases, we can sometimes still change PTEs 391 - * writable and avoid the write-fault handler, for 392 - * example, if a PTE is already dirty and no other 393 - * COW or special handling is required. 338 + * Optimize for the small-folio common case by 339 + * special-casing it here. Compiler constant propagation 340 + * plus copious amounts of __always_inline does wonders. 394 341 */ 395 - if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) && 396 - !pte_write(ptent)) 397 - set_write_prot_commit_flush_ptes(vma, folio, page, 398 - addr, pte, oldpte, ptent, nr_ptes, tlb); 399 - else 400 - prot_commit_flush_ptes(vma, addr, pte, oldpte, ptent, 401 - nr_ptes, /* idx = */ 0, /* set_write = */ false, tlb); 342 + if (likely(nr_ptes == 1)) { 343 + change_present_ptes(tlb, vma, addr, pte, 1, 344 + end, newprot, folio, page, cp_flags); 345 + } else { 346 + change_present_ptes(tlb, vma, addr, pte, 347 + nr_ptes, end, newprot, folio, page, 348 + cp_flags); 349 + } 350 + 402 351 pages += nr_ptes; 403 352 } else if (pte_none(oldpte)) { 404 353 /*