Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

hugetlb: convert hugetlb_fault() to use struct vm_fault

Patch series "Hugetlb fault path to use struct vm_fault", v2.

This patchset converts the hugetlb fault path to use struct vm_fault.
This helps make the code more readable, and alleviates the stack by
allowing us to consolidate many fault-related variables into an individual
pointer.


This patch (of 3):

Now that hugetlb_fault() has a vm_fault available for fault tracking, use
it throughout. This cleans up the code by removing 2 variables, and
prepares hugetlb_fault() to take in a struct vm_fault argument.

Link: https://lkml.kernel.org/r/20240401202651.31440-1-vishal.moola@gmail.com
Link: https://lkml.kernel.org/r/20240401202651.31440-2-vishal.moola@gmail.com
Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Reviewed-by: Muchun Song <muchun.song@linux.dev>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Vishal Moola (Oracle) and committed by
Andrew Morton
9b42fa16 7edea4c6

+41 -43
+41 -43
mm/hugetlb.c
··· 6427 6427 vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, 6428 6428 unsigned long address, unsigned int flags) 6429 6429 { 6430 - pte_t *ptep, entry; 6431 - spinlock_t *ptl; 6432 6430 vm_fault_t ret; 6433 6431 u32 hash; 6434 6432 struct folio *folio = NULL; ··· 6434 6436 struct hstate *h = hstate_vma(vma); 6435 6437 struct address_space *mapping; 6436 6438 int need_wait_lock = 0; 6437 - unsigned long haddr = address & huge_page_mask(h); 6438 6439 struct vm_fault vmf = { 6439 6440 .vma = vma, 6440 - .address = haddr, 6441 + .address = address & huge_page_mask(h), 6441 6442 .real_address = address, 6442 6443 .flags = flags, 6443 - .pgoff = vma_hugecache_offset(h, vma, haddr), 6444 + .pgoff = vma_hugecache_offset(h, vma, 6445 + address & huge_page_mask(h)), 6444 6446 /* TODO: Track hugetlb faults using vm_fault */ 6445 6447 6446 6448 /* ··· 6460 6462 6461 6463 /* 6462 6464 * Acquire vma lock before calling huge_pte_alloc and hold 6463 - * until finished with ptep. This prevents huge_pmd_unshare from 6464 - * being called elsewhere and making the ptep no longer valid. 6465 + * until finished with vmf.pte. This prevents huge_pmd_unshare from 6466 + * being called elsewhere and making the vmf.pte no longer valid. 6465 6467 */ 6466 6468 hugetlb_vma_lock_read(vma); 6467 - ptep = huge_pte_alloc(mm, vma, haddr, huge_page_size(h)); 6468 - if (!ptep) { 6469 + vmf.pte = huge_pte_alloc(mm, vma, vmf.address, huge_page_size(h)); 6470 + if (!vmf.pte) { 6469 6471 hugetlb_vma_unlock_read(vma); 6470 6472 mutex_unlock(&hugetlb_fault_mutex_table[hash]); 6471 6473 return VM_FAULT_OOM; 6472 6474 } 6473 6475 6474 - entry = huge_ptep_get(ptep); 6475 - if (huge_pte_none_mostly(entry)) { 6476 - if (is_pte_marker(entry)) { 6476 + vmf.orig_pte = huge_ptep_get(vmf.pte); 6477 + if (huge_pte_none_mostly(vmf.orig_pte)) { 6478 + if (is_pte_marker(vmf.orig_pte)) { 6477 6479 pte_marker marker = 6478 - pte_marker_get(pte_to_swp_entry(entry)); 6480 + pte_marker_get(pte_to_swp_entry(vmf.orig_pte)); 6479 6481 6480 6482 if (marker & PTE_MARKER_POISONED) { 6481 6483 ret = VM_FAULT_HWPOISON_LARGE; ··· 6490 6492 * mutex internally, which make us return immediately. 6491 6493 */ 6492 6494 return hugetlb_no_page(mm, vma, mapping, vmf.pgoff, address, 6493 - ptep, entry, flags, &vmf); 6495 + vmf.pte, vmf.orig_pte, flags, &vmf); 6494 6496 } 6495 6497 6496 6498 ret = 0; 6497 6499 6498 6500 /* 6499 - * entry could be a migration/hwpoison entry at this point, so this 6500 - * check prevents the kernel from going below assuming that we have 6501 - * an active hugepage in pagecache. This goto expects the 2nd page 6502 - * fault, and is_hugetlb_entry_(migration|hwpoisoned) check will 6503 - * properly handle it. 6501 + * vmf.orig_pte could be a migration/hwpoison vmf.orig_pte at this 6502 + * point, so this check prevents the kernel from going below assuming 6503 + * that we have an active hugepage in pagecache. This goto expects 6504 + * the 2nd page fault, and is_hugetlb_entry_(migration|hwpoisoned) 6505 + * check will properly handle it. 6504 6506 */ 6505 - if (!pte_present(entry)) { 6506 - if (unlikely(is_hugetlb_entry_migration(entry))) { 6507 + if (!pte_present(vmf.orig_pte)) { 6508 + if (unlikely(is_hugetlb_entry_migration(vmf.orig_pte))) { 6507 6509 /* 6508 6510 * Release the hugetlb fault lock now, but retain 6509 6511 * the vma lock, because it is needed to guard the ··· 6512 6514 * be released there. 6513 6515 */ 6514 6516 mutex_unlock(&hugetlb_fault_mutex_table[hash]); 6515 - migration_entry_wait_huge(vma, ptep); 6517 + migration_entry_wait_huge(vma, vmf.pte); 6516 6518 return 0; 6517 - } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) 6519 + } else if (unlikely(is_hugetlb_entry_hwpoisoned(vmf.orig_pte))) 6518 6520 ret = VM_FAULT_HWPOISON_LARGE | 6519 6521 VM_FAULT_SET_HINDEX(hstate_index(h)); 6520 6522 goto out_mutex; ··· 6528 6530 * determine if a reservation has been consumed. 6529 6531 */ 6530 6532 if ((flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) && 6531 - !(vma->vm_flags & VM_MAYSHARE) && !huge_pte_write(entry)) { 6532 - if (vma_needs_reservation(h, vma, haddr) < 0) { 6533 + !(vma->vm_flags & VM_MAYSHARE) && !huge_pte_write(vmf.orig_pte)) { 6534 + if (vma_needs_reservation(h, vma, vmf.address) < 0) { 6533 6535 ret = VM_FAULT_OOM; 6534 6536 goto out_mutex; 6535 6537 } 6536 6538 /* Just decrements count, does not deallocate */ 6537 - vma_end_reservation(h, vma, haddr); 6539 + vma_end_reservation(h, vma, vmf.address); 6538 6540 6539 6541 pagecache_folio = filemap_lock_hugetlb_folio(h, mapping, 6540 6542 vmf.pgoff); ··· 6542 6544 pagecache_folio = NULL; 6543 6545 } 6544 6546 6545 - ptl = huge_pte_lock(h, mm, ptep); 6547 + vmf.ptl = huge_pte_lock(h, mm, vmf.pte); 6546 6548 6547 6549 /* Check for a racing update before calling hugetlb_wp() */ 6548 - if (unlikely(!pte_same(entry, huge_ptep_get(ptep)))) 6550 + if (unlikely(!pte_same(vmf.orig_pte, huge_ptep_get(vmf.pte)))) 6549 6551 goto out_ptl; 6550 6552 6551 6553 /* Handle userfault-wp first, before trying to lock more pages */ 6552 - if (userfaultfd_wp(vma) && huge_pte_uffd_wp(huge_ptep_get(ptep)) && 6553 - (flags & FAULT_FLAG_WRITE) && !huge_pte_write(entry)) { 6554 + if (userfaultfd_wp(vma) && huge_pte_uffd_wp(huge_ptep_get(vmf.pte)) && 6555 + (flags & FAULT_FLAG_WRITE) && !huge_pte_write(vmf.orig_pte)) { 6554 6556 if (!userfaultfd_wp_async(vma)) { 6555 - spin_unlock(ptl); 6557 + spin_unlock(vmf.ptl); 6556 6558 if (pagecache_folio) { 6557 6559 folio_unlock(pagecache_folio); 6558 6560 folio_put(pagecache_folio); ··· 6562 6564 return handle_userfault(&vmf, VM_UFFD_WP); 6563 6565 } 6564 6566 6565 - entry = huge_pte_clear_uffd_wp(entry); 6566 - set_huge_pte_at(mm, haddr, ptep, entry, 6567 + vmf.orig_pte = huge_pte_clear_uffd_wp(vmf.orig_pte); 6568 + set_huge_pte_at(mm, vmf.address, vmf.pte, vmf.orig_pte, 6567 6569 huge_page_size(hstate_vma(vma))); 6568 6570 /* Fallthrough to CoW */ 6569 6571 } 6570 6572 6571 6573 /* 6572 - * hugetlb_wp() requires page locks of pte_page(entry) and 6574 + * hugetlb_wp() requires page locks of pte_page(vmf.orig_pte) and 6573 6575 * pagecache_folio, so here we need take the former one 6574 6576 * when folio != pagecache_folio or !pagecache_folio. 6575 6577 */ 6576 - folio = page_folio(pte_page(entry)); 6578 + folio = page_folio(pte_page(vmf.orig_pte)); 6577 6579 if (folio != pagecache_folio) 6578 6580 if (!folio_trylock(folio)) { 6579 6581 need_wait_lock = 1; ··· 6583 6585 folio_get(folio); 6584 6586 6585 6587 if (flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) { 6586 - if (!huge_pte_write(entry)) { 6587 - ret = hugetlb_wp(mm, vma, address, ptep, flags, 6588 - pagecache_folio, ptl, &vmf); 6588 + if (!huge_pte_write(vmf.orig_pte)) { 6589 + ret = hugetlb_wp(mm, vma, address, vmf.pte, flags, 6590 + pagecache_folio, vmf.ptl, &vmf); 6589 6591 goto out_put_page; 6590 6592 } else if (likely(flags & FAULT_FLAG_WRITE)) { 6591 - entry = huge_pte_mkdirty(entry); 6593 + vmf.orig_pte = huge_pte_mkdirty(vmf.orig_pte); 6592 6594 } 6593 6595 } 6594 - entry = pte_mkyoung(entry); 6595 - if (huge_ptep_set_access_flags(vma, haddr, ptep, entry, 6596 + vmf.orig_pte = pte_mkyoung(vmf.orig_pte); 6597 + if (huge_ptep_set_access_flags(vma, vmf.address, vmf.pte, vmf.orig_pte, 6596 6598 flags & FAULT_FLAG_WRITE)) 6597 - update_mmu_cache(vma, haddr, ptep); 6599 + update_mmu_cache(vma, vmf.address, vmf.pte); 6598 6600 out_put_page: 6599 6601 if (folio != pagecache_folio) 6600 6602 folio_unlock(folio); 6601 6603 folio_put(folio); 6602 6604 out_ptl: 6603 - spin_unlock(ptl); 6605 + spin_unlock(vmf.ptl); 6604 6606 6605 6607 if (pagecache_folio) { 6606 6608 folio_unlock(pagecache_folio);