Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

mm: move vma_shrink(), vma_expand() to internal header

The vma_shrink() and vma_expand() functions are internal VMA manipulation
functions which we ought to abstract for use outside of memory management
code.

To achieve this, we replace shift_arg_pages() in fs/exec.c with an
invocation of a new relocate_vma_down() function implemented in mm/mmap.c,
which enables us to also move move_page_tables() and vma_iter_prev_range()
to internal.h.

The purpose of doing this is to isolate key VMA manipulation functions in
order that we can both abstract them and later render them easily
testable.

Link: https://lkml.kernel.org/r/3cfcd9ec433e032a85f636fdc0d7d98fafbd19c5.1722251717.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Brendan Higgins <brendanhiggins@google.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: David Gow <davidgow@google.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Kees Cook <kees@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Rae Moar <rmoar@google.com>
Cc: SeongJae Park <sj@kernel.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Pengfei Xu <pengfei.xu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Lorenzo Stoakes and committed by
Andrew Morton
d61f0d59 fa04c08f

+106 -91
+6 -75
fs/exec.c
··· 712 712 #ifdef CONFIG_MMU 713 713 714 714 /* 715 - * During bprm_mm_init(), we create a temporary stack at STACK_TOP_MAX. Once 716 - * the binfmt code determines where the new stack should reside, we shift it to 717 - * its final location. The process proceeds as follows: 718 - * 719 - * 1) Use shift to calculate the new vma endpoints. 720 - * 2) Extend vma to cover both the old and new ranges. This ensures the 721 - * arguments passed to subsequent functions are consistent. 722 - * 3) Move vma's page tables to the new range. 723 - * 4) Free up any cleared pgd range. 724 - * 5) Shrink the vma to cover only the new range. 725 - */ 726 - static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) 727 - { 728 - struct mm_struct *mm = vma->vm_mm; 729 - unsigned long old_start = vma->vm_start; 730 - unsigned long old_end = vma->vm_end; 731 - unsigned long length = old_end - old_start; 732 - unsigned long new_start = old_start - shift; 733 - unsigned long new_end = old_end - shift; 734 - VMA_ITERATOR(vmi, mm, new_start); 735 - struct vm_area_struct *next; 736 - struct mmu_gather tlb; 737 - 738 - BUG_ON(new_start > new_end); 739 - 740 - /* 741 - * ensure there are no vmas between where we want to go 742 - * and where we are 743 - */ 744 - if (vma != vma_next(&vmi)) 745 - return -EFAULT; 746 - 747 - vma_iter_prev_range(&vmi); 748 - /* 749 - * cover the whole range: [new_start, old_end) 750 - */ 751 - if (vma_expand(&vmi, vma, new_start, old_end, vma->vm_pgoff, NULL)) 752 - return -ENOMEM; 753 - 754 - /* 755 - * move the page tables downwards, on failure we rely on 756 - * process cleanup to remove whatever mess we made. 757 - */ 758 - if (length != move_page_tables(vma, old_start, 759 - vma, new_start, length, false, true)) 760 - return -ENOMEM; 761 - 762 - lru_add_drain(); 763 - tlb_gather_mmu(&tlb, mm); 764 - next = vma_next(&vmi); 765 - if (new_end > old_start) { 766 - /* 767 - * when the old and new regions overlap clear from new_end. 768 - */ 769 - free_pgd_range(&tlb, new_end, old_end, new_end, 770 - next ? next->vm_start : USER_PGTABLES_CEILING); 771 - } else { 772 - /* 773 - * otherwise, clean from old_start; this is done to not touch 774 - * the address space in [new_end, old_start) some architectures 775 - * have constraints on va-space that make this illegal (IA64) - 776 - * for the others its just a little faster. 777 - */ 778 - free_pgd_range(&tlb, old_start, old_end, new_end, 779 - next ? next->vm_start : USER_PGTABLES_CEILING); 780 - } 781 - tlb_finish_mmu(&tlb); 782 - 783 - vma_prev(&vmi); 784 - /* Shrink the vma to just the new range */ 785 - return vma_shrink(&vmi, vma, new_start, new_end, vma->vm_pgoff); 786 - } 787 - 788 - /* 789 715 * Finalizes the stack vm_area_struct. The flags and permissions are updated, 790 716 * the stack is optionally relocated, and some extra space is added. 791 717 */ ··· 803 877 804 878 /* Move stack pages down in memory. */ 805 879 if (stack_shift) { 806 - ret = shift_arg_pages(vma, stack_shift); 880 + /* 881 + * During bprm_mm_init(), we create a temporary stack at STACK_TOP_MAX. Once 882 + * the binfmt code determines where the new stack should reside, we shift it to 883 + * its final location. 884 + */ 885 + ret = relocate_vma_down(vma, stack_shift); 807 886 if (ret) 808 887 goto out_unlock; 809 888 }
+1 -16
include/linux/mm.h
··· 1005 1005 return mas_prev(&vmi->mas, 0); 1006 1006 } 1007 1007 1008 - static inline 1009 - struct vm_area_struct *vma_iter_prev_range(struct vma_iterator *vmi) 1010 - { 1011 - return mas_prev_range(&vmi->mas, 0); 1012 - } 1013 - 1014 1008 static inline unsigned long vma_iter_addr(struct vma_iterator *vmi) 1015 1009 { 1016 1010 return vmi->mas.index; ··· 2514 2520 2515 2521 int get_cmdline(struct task_struct *task, char *buffer, int buflen); 2516 2522 2517 - extern unsigned long move_page_tables(struct vm_area_struct *vma, 2518 - unsigned long old_addr, struct vm_area_struct *new_vma, 2519 - unsigned long new_addr, unsigned long len, 2520 - bool need_rmap_locks, bool for_stack); 2521 - 2522 2523 /* 2523 2524 * Flags used by change_protection(). For now we make it a bitmap so 2524 2525 * that we can pass in multiple flags just like parameters. However ··· 3256 3267 3257 3268 /* mmap.c */ 3258 3269 extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin); 3259 - extern int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma, 3260 - unsigned long start, unsigned long end, pgoff_t pgoff, 3261 - struct vm_area_struct *next); 3262 - extern int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma, 3263 - unsigned long start, unsigned long end, pgoff_t pgoff); 3264 3270 extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *); 3265 3271 extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); 3266 3272 extern void unlink_file_vma(struct vm_area_struct *); ··· 3263 3279 unsigned long addr, unsigned long len, pgoff_t pgoff, 3264 3280 bool *need_rmap_locks); 3265 3281 extern void exit_mmap(struct mm_struct *); 3282 + int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift); 3266 3283 3267 3284 static inline int check_data_rlimit(unsigned long rlim, 3268 3285 unsigned long new,
+18
mm/internal.h
··· 1305 1305 vma_policy(vma), new_ctx, anon_vma_name(vma)); 1306 1306 } 1307 1307 1308 + int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma, 1309 + unsigned long start, unsigned long end, pgoff_t pgoff, 1310 + struct vm_area_struct *next); 1311 + int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma, 1312 + unsigned long start, unsigned long end, pgoff_t pgoff); 1313 + 1308 1314 enum { 1309 1315 /* mark page accessed */ 1310 1316 FOLL_TOUCH = 1 << 16, ··· 1534 1528 return 0; 1535 1529 } 1536 1530 1531 + static inline 1532 + struct vm_area_struct *vma_iter_prev_range(struct vma_iterator *vmi) 1533 + { 1534 + return mas_prev_range(&vmi->mas, 0); 1535 + } 1536 + 1537 1537 /* 1538 1538 * VMA lock generalization 1539 1539 */ ··· 1650 1638 void unlink_file_vma_batch_init(struct unlink_vma_file_batch *); 1651 1639 void unlink_file_vma_batch_add(struct unlink_vma_file_batch *, struct vm_area_struct *); 1652 1640 void unlink_file_vma_batch_final(struct unlink_vma_file_batch *); 1641 + 1642 + /* mremap.c */ 1643 + unsigned long move_page_tables(struct vm_area_struct *vma, 1644 + unsigned long old_addr, struct vm_area_struct *new_vma, 1645 + unsigned long new_addr, unsigned long len, 1646 + bool need_rmap_locks, bool for_stack); 1653 1647 1654 1648 #endif /* __MM_INTERNAL_H */
+81
mm/mmap.c
··· 4088 4088 return 0; 4089 4089 } 4090 4090 subsys_initcall(init_reserve_notifier); 4091 + 4092 + /* 4093 + * Relocate a VMA downwards by shift bytes. There cannot be any VMAs between 4094 + * this VMA and its relocated range, which will now reside at [vma->vm_start - 4095 + * shift, vma->vm_end - shift). 4096 + * 4097 + * This function is almost certainly NOT what you want for anything other than 4098 + * early executable temporary stack relocation. 4099 + */ 4100 + int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift) 4101 + { 4102 + /* 4103 + * The process proceeds as follows: 4104 + * 4105 + * 1) Use shift to calculate the new vma endpoints. 4106 + * 2) Extend vma to cover both the old and new ranges. This ensures the 4107 + * arguments passed to subsequent functions are consistent. 4108 + * 3) Move vma's page tables to the new range. 4109 + * 4) Free up any cleared pgd range. 4110 + * 5) Shrink the vma to cover only the new range. 4111 + */ 4112 + 4113 + struct mm_struct *mm = vma->vm_mm; 4114 + unsigned long old_start = vma->vm_start; 4115 + unsigned long old_end = vma->vm_end; 4116 + unsigned long length = old_end - old_start; 4117 + unsigned long new_start = old_start - shift; 4118 + unsigned long new_end = old_end - shift; 4119 + VMA_ITERATOR(vmi, mm, new_start); 4120 + struct vm_area_struct *next; 4121 + struct mmu_gather tlb; 4122 + 4123 + BUG_ON(new_start > new_end); 4124 + 4125 + /* 4126 + * ensure there are no vmas between where we want to go 4127 + * and where we are 4128 + */ 4129 + if (vma != vma_next(&vmi)) 4130 + return -EFAULT; 4131 + 4132 + vma_iter_prev_range(&vmi); 4133 + /* 4134 + * cover the whole range: [new_start, old_end) 4135 + */ 4136 + if (vma_expand(&vmi, vma, new_start, old_end, vma->vm_pgoff, NULL)) 4137 + return -ENOMEM; 4138 + 4139 + /* 4140 + * move the page tables downwards, on failure we rely on 4141 + * process cleanup to remove whatever mess we made. 4142 + */ 4143 + if (length != move_page_tables(vma, old_start, 4144 + vma, new_start, length, false, true)) 4145 + return -ENOMEM; 4146 + 4147 + lru_add_drain(); 4148 + tlb_gather_mmu(&tlb, mm); 4149 + next = vma_next(&vmi); 4150 + if (new_end > old_start) { 4151 + /* 4152 + * when the old and new regions overlap clear from new_end. 4153 + */ 4154 + free_pgd_range(&tlb, new_end, old_end, new_end, 4155 + next ? next->vm_start : USER_PGTABLES_CEILING); 4156 + } else { 4157 + /* 4158 + * otherwise, clean from old_start; this is done to not touch 4159 + * the address space in [new_end, old_start) some architectures 4160 + * have constraints on va-space that make this illegal (IA64) - 4161 + * for the others its just a little faster. 4162 + */ 4163 + free_pgd_range(&tlb, old_start, old_end, new_end, 4164 + next ? next->vm_start : USER_PGTABLES_CEILING); 4165 + } 4166 + tlb_finish_mmu(&tlb); 4167 + 4168 + vma_prev(&vmi); 4169 + /* Shrink the vma to just the new range */ 4170 + return vma_shrink(&vmi, vma, new_start, new_end, vma->vm_pgoff); 4171 + }