Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

fs/proc/task_mmu: execute PROCMAP_QUERY ioctl under per-vma locks

Utilize per-vma locks to stabilize vma after lookup without taking
mmap_lock during PROCMAP_QUERY ioctl execution. If vma lock is
contended, we fall back to mmap_lock but take it only momentarily
to lock the vma and release the mmap_lock. In a very unlikely case
of vm_refcnt overflow, this fall back path will fail and ioctl is
done under mmap_lock protection.

This change is designed to reduce mmap_lock contention and prevent
PROCMAP_QUERY ioctl calls from blocking address space updates.

Link: https://lkml.kernel.org/r/20250808152850.2580887-4-surenb@google.com
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Acked-by: SeongJae Park <sj@kernel.org>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: David Hildenbrand <david@redhat.com>
Cc: Jann Horn <jannh@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: Kalesh Singh <kaleshsingh@google.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: "Paul E . McKenney" <paulmck@kernel.org>
Cc: Peter Xu <peterx@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Thomas Weißschuh <linux@weissschuh.net>
Cc: T.J. Mercier <tjmercier@google.com>
Cc: Ye Bin <yebin10@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Suren Baghdasaryan and committed by
Andrew Morton
d9d1c2d8 ee737a5a

+85 -18
+85 -18
fs/proc/task_mmu.c
··· 132 132 133 133 #ifdef CONFIG_PER_VMA_LOCK 134 134 135 + static void reset_lock_ctx(struct proc_maps_locking_ctx *lock_ctx) 136 + { 137 + lock_ctx->locked_vma = NULL; 138 + lock_ctx->mmap_locked = false; 139 + } 140 + 135 141 static void unlock_ctx_vma(struct proc_maps_locking_ctx *lock_ctx) 136 142 { 137 143 if (lock_ctx->locked_vma) { ··· 163 157 lock_ctx->mmap_locked = true; 164 158 } else { 165 159 rcu_read_lock(); 166 - lock_ctx->locked_vma = NULL; 167 - lock_ctx->mmap_locked = false; 160 + reset_lock_ctx(lock_ctx); 168 161 } 169 162 170 163 return true; ··· 527 522 PROCMAP_QUERY_VMA_FLAGS \ 528 523 ) 529 524 530 - static int query_vma_setup(struct mm_struct *mm) 525 + #ifdef CONFIG_PER_VMA_LOCK 526 + 527 + static int query_vma_setup(struct proc_maps_locking_ctx *lock_ctx) 531 528 { 532 - return mmap_read_lock_killable(mm); 529 + reset_lock_ctx(lock_ctx); 530 + 531 + return 0; 533 532 } 534 533 535 - static void query_vma_teardown(struct mm_struct *mm, struct vm_area_struct *vma) 534 + static void query_vma_teardown(struct proc_maps_locking_ctx *lock_ctx) 536 535 { 537 - mmap_read_unlock(mm); 536 + if (lock_ctx->mmap_locked) { 537 + mmap_read_unlock(lock_ctx->mm); 538 + lock_ctx->mmap_locked = false; 539 + } else { 540 + unlock_ctx_vma(lock_ctx); 541 + } 538 542 } 539 543 540 - static struct vm_area_struct *query_vma_find_by_addr(struct mm_struct *mm, unsigned long addr) 544 + static struct vm_area_struct *query_vma_find_by_addr(struct proc_maps_locking_ctx *lock_ctx, 545 + unsigned long addr) 541 546 { 542 - return find_vma(mm, addr); 547 + struct mm_struct *mm = lock_ctx->mm; 548 + struct vm_area_struct *vma; 549 + struct vma_iterator vmi; 550 + 551 + if (lock_ctx->mmap_locked) 552 + return find_vma(mm, addr); 553 + 554 + /* Unlock previously locked VMA and find the next one under RCU */ 555 + unlock_ctx_vma(lock_ctx); 556 + rcu_read_lock(); 557 + vma_iter_init(&vmi, mm, addr); 558 + vma = lock_next_vma(mm, &vmi, addr); 559 + rcu_read_unlock(); 560 + 561 + if (!vma) 562 + return NULL; 563 + 564 + if (!IS_ERR(vma)) { 565 + lock_ctx->locked_vma = vma; 566 + return vma; 567 + } 568 + 569 + if (PTR_ERR(vma) == -EAGAIN) { 570 + /* Fallback to mmap_lock on vma->vm_refcnt overflow */ 571 + mmap_read_lock(mm); 572 + vma = find_vma(mm, addr); 573 + lock_ctx->mmap_locked = true; 574 + } 575 + 576 + return vma; 543 577 } 544 578 545 - static struct vm_area_struct *query_matching_vma(struct mm_struct *mm, 579 + #else /* CONFIG_PER_VMA_LOCK */ 580 + 581 + static int query_vma_setup(struct proc_maps_locking_ctx *lock_ctx) 582 + { 583 + return mmap_read_lock_killable(lock_ctx->mm); 584 + } 585 + 586 + static void query_vma_teardown(struct proc_maps_locking_ctx *lock_ctx) 587 + { 588 + mmap_read_unlock(lock_ctx->mm); 589 + } 590 + 591 + static struct vm_area_struct *query_vma_find_by_addr(struct proc_maps_locking_ctx *lock_ctx, 592 + unsigned long addr) 593 + { 594 + return find_vma(lock_ctx->mm, addr); 595 + } 596 + 597 + #endif /* CONFIG_PER_VMA_LOCK */ 598 + 599 + static struct vm_area_struct *query_matching_vma(struct proc_maps_locking_ctx *lock_ctx, 546 600 unsigned long addr, u32 flags) 547 601 { 548 602 struct vm_area_struct *vma; 549 603 550 604 next_vma: 551 - vma = query_vma_find_by_addr(mm, addr); 605 + vma = query_vma_find_by_addr(lock_ctx, addr); 606 + if (IS_ERR(vma)) 607 + return vma; 608 + 552 609 if (!vma) 553 610 goto no_vma; 554 611 ··· 651 584 return ERR_PTR(-ENOENT); 652 585 } 653 586 654 - static int do_procmap_query(struct proc_maps_private *priv, void __user *uarg) 587 + static int do_procmap_query(struct mm_struct *mm, void __user *uarg) 655 588 { 589 + struct proc_maps_locking_ctx lock_ctx = { .mm = mm }; 656 590 struct procmap_query karg; 657 591 struct vm_area_struct *vma; 658 - struct mm_struct *mm; 659 592 const char *name = NULL; 660 593 char build_id_buf[BUILD_ID_SIZE_MAX], *name_buf = NULL; 661 594 __u64 usize; ··· 682 615 if (!!karg.build_id_size != !!karg.build_id_addr) 683 616 return -EINVAL; 684 617 685 - mm = priv->lock_ctx.mm; 686 618 if (!mm || !mmget_not_zero(mm)) 687 619 return -ESRCH; 688 620 689 - err = query_vma_setup(mm); 621 + err = query_vma_setup(&lock_ctx); 690 622 if (err) { 691 623 mmput(mm); 692 624 return err; 693 625 } 694 626 695 - vma = query_matching_vma(mm, karg.query_addr, karg.query_flags); 627 + vma = query_matching_vma(&lock_ctx, karg.query_addr, karg.query_flags); 696 628 if (IS_ERR(vma)) { 697 629 err = PTR_ERR(vma); 698 630 vma = NULL; ··· 776 710 } 777 711 778 712 /* unlock vma or mmap_lock, and put mm_struct before copying data to user */ 779 - query_vma_teardown(mm, vma); 713 + query_vma_teardown(&lock_ctx); 780 714 mmput(mm); 781 715 782 716 if (karg.vma_name_size && copy_to_user(u64_to_user_ptr(karg.vma_name_addr), ··· 796 730 return 0; 797 731 798 732 out: 799 - query_vma_teardown(mm, vma); 733 + query_vma_teardown(&lock_ctx); 800 734 mmput(mm); 801 735 kfree(name_buf); 802 736 return err; ··· 809 743 810 744 switch (cmd) { 811 745 case PROCMAP_QUERY: 812 - return do_procmap_query(priv, (void __user *)arg); 746 + /* priv->lock_ctx.mm is set during file open operation */ 747 + return do_procmap_query(priv->lock_ctx.mm, (void __user *)arg); 813 748 default: 814 749 return -ENOIOCTLCMD; 815 750 }