Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

tools: add skeleton code for userland testing of VMA logic

Establish a new userland VMA unit testing implementation under
tools/testing which utilises existing logic providing maple tree support
in userland utilising the now-shared code previously exclusive to radix
tree testing.

This provides fundamental VMA operations whose API is defined in mm/vma.h,
while stubbing out superfluous functionality.

This exists as a proof-of-concept, with the test implementation functional
and sufficient to allow userland compilation of vma.c, but containing only
cursory tests to demonstrate basic functionality.

Link: https://lkml.kernel.org/r/533ffa2eec771cbe6b387dd049a7f128a53eb616.1722251717.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Tested-by: SeongJae Park <sj@kernel.org>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Brendan Higgins <brendanhiggins@google.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: David Gow <davidgow@google.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Kees Cook <kees@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Rae Moar <rmoar@google.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Pengfei Xu <pengfei.xu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Lorenzo Stoakes and committed by
Andrew Morton
9325b8b5 74579d8d

+1163
+1
MAINTAINERS
··· 24423 24423 F: mm/vma.c 24424 24424 F: mm/vma.h 24425 24425 F: mm/vma_internal.h 24426 + F: tools/testing/vma/ 24426 24427 24427 24428 VMALLOC 24428 24429 M: Andrew Morton <akpm@linux-foundation.org>
+7
tools/testing/vma/.gitignore
··· 1 + # SPDX-License-Identifier: GPL-2.0-only 2 + generated/bit-length.h 3 + generated/map-shift.h 4 + generated/autoconf.h 5 + idr.c 6 + radix-tree.c 7 + vma
+16
tools/testing/vma/Makefile
··· 1 + # SPDX-License-Identifier: GPL-2.0-or-later 2 + 3 + .PHONY: default 4 + 5 + default: vma 6 + 7 + include ../shared/shared.mk 8 + 9 + OFILES = $(SHARED_OFILES) vma.o maple-shim.o 10 + TARGETS = vma 11 + 12 + vma: $(OFILES) vma_internal.h ../../../mm/vma.c ../../../mm/vma.h 13 + $(CC) $(CFLAGS) -o $@ $(OFILES) $(LDLIBS) 14 + 15 + clean: 16 + $(RM) $(TARGETS) *.o radix-tree.c idr.c generated/map-shift.h generated/bit-length.h generated/autoconf.h
+12
tools/testing/vma/linux/atomic.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 + 3 + #ifndef _LINUX_ATOMIC_H 4 + #define _LINUX_ATOMIC_H 5 + 6 + #define atomic_t int32_t 7 + #define atomic_inc(x) uatomic_inc(x) 8 + #define atomic_read(x) uatomic_read(x) 9 + #define atomic_set(x, y) do {} while (0) 10 + #define U8_MAX UCHAR_MAX 11 + 12 + #endif /* _LINUX_ATOMIC_H */
+38
tools/testing/vma/linux/mmzone.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 + 3 + #ifndef _LINUX_MMZONE_H 4 + #define _LINUX_MMZONE_H 5 + 6 + #include <linux/atomic.h> 7 + 8 + struct pglist_data *first_online_pgdat(void); 9 + struct pglist_data *next_online_pgdat(struct pglist_data *pgdat); 10 + 11 + #define for_each_online_pgdat(pgdat) \ 12 + for (pgdat = first_online_pgdat(); \ 13 + pgdat; \ 14 + pgdat = next_online_pgdat(pgdat)) 15 + 16 + enum zone_type { 17 + __MAX_NR_ZONES 18 + }; 19 + 20 + #define MAX_NR_ZONES __MAX_NR_ZONES 21 + #define MAX_PAGE_ORDER 10 22 + #define MAX_ORDER_NR_PAGES (1 << MAX_PAGE_ORDER) 23 + 24 + #define pageblock_order MAX_PAGE_ORDER 25 + #define pageblock_nr_pages BIT(pageblock_order) 26 + #define pageblock_align(pfn) ALIGN((pfn), pageblock_nr_pages) 27 + #define pageblock_start_pfn(pfn) ALIGN_DOWN((pfn), pageblock_nr_pages) 28 + 29 + struct zone { 30 + atomic_long_t managed_pages; 31 + }; 32 + 33 + typedef struct pglist_data { 34 + struct zone node_zones[MAX_NR_ZONES]; 35 + 36 + } pg_data_t; 37 + 38 + #endif /* _LINUX_MMZONE_H */
+207
tools/testing/vma/vma.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + 3 + #include <stdbool.h> 4 + #include <stdio.h> 5 + #include <stdlib.h> 6 + 7 + #include "maple-shared.h" 8 + #include "vma_internal.h" 9 + 10 + /* 11 + * Directly import the VMA implementation here. Our vma_internal.h wrapper 12 + * provides userland-equivalent functionality for everything vma.c uses. 13 + */ 14 + #include "../../../mm/vma.c" 15 + 16 + const struct vm_operations_struct vma_dummy_vm_ops; 17 + 18 + #define ASSERT_TRUE(_expr) \ 19 + do { \ 20 + if (!(_expr)) { \ 21 + fprintf(stderr, \ 22 + "Assert FAILED at %s:%d:%s(): %s is FALSE.\n", \ 23 + __FILE__, __LINE__, __FUNCTION__, #_expr); \ 24 + return false; \ 25 + } \ 26 + } while (0) 27 + #define ASSERT_FALSE(_expr) ASSERT_TRUE(!(_expr)) 28 + #define ASSERT_EQ(_val1, _val2) ASSERT_TRUE((_val1) == (_val2)) 29 + #define ASSERT_NE(_val1, _val2) ASSERT_TRUE((_val1) != (_val2)) 30 + 31 + static struct vm_area_struct *alloc_vma(struct mm_struct *mm, 32 + unsigned long start, 33 + unsigned long end, 34 + pgoff_t pgoff, 35 + vm_flags_t flags) 36 + { 37 + struct vm_area_struct *ret = vm_area_alloc(mm); 38 + 39 + if (ret == NULL) 40 + return NULL; 41 + 42 + ret->vm_start = start; 43 + ret->vm_end = end; 44 + ret->vm_pgoff = pgoff; 45 + ret->__vm_flags = flags; 46 + 47 + return ret; 48 + } 49 + 50 + static bool test_simple_merge(void) 51 + { 52 + struct vm_area_struct *vma; 53 + unsigned long flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; 54 + struct mm_struct mm = {}; 55 + struct vm_area_struct *vma_left = alloc_vma(&mm, 0, 0x1000, 0, flags); 56 + struct vm_area_struct *vma_middle = alloc_vma(&mm, 0x1000, 0x2000, 1, flags); 57 + struct vm_area_struct *vma_right = alloc_vma(&mm, 0x2000, 0x3000, 2, flags); 58 + VMA_ITERATOR(vmi, &mm, 0x1000); 59 + 60 + ASSERT_FALSE(vma_link(&mm, vma_left)); 61 + ASSERT_FALSE(vma_link(&mm, vma_middle)); 62 + ASSERT_FALSE(vma_link(&mm, vma_right)); 63 + 64 + vma = vma_merge_new_vma(&vmi, vma_left, vma_middle, 0x1000, 65 + 0x2000, 1); 66 + ASSERT_NE(vma, NULL); 67 + 68 + ASSERT_EQ(vma->vm_start, 0); 69 + ASSERT_EQ(vma->vm_end, 0x3000); 70 + ASSERT_EQ(vma->vm_pgoff, 0); 71 + ASSERT_EQ(vma->vm_flags, flags); 72 + 73 + vm_area_free(vma); 74 + mtree_destroy(&mm.mm_mt); 75 + 76 + return true; 77 + } 78 + 79 + static bool test_simple_modify(void) 80 + { 81 + struct vm_area_struct *vma; 82 + unsigned long flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; 83 + struct mm_struct mm = {}; 84 + struct vm_area_struct *init_vma = alloc_vma(&mm, 0, 0x3000, 0, flags); 85 + VMA_ITERATOR(vmi, &mm, 0x1000); 86 + 87 + ASSERT_FALSE(vma_link(&mm, init_vma)); 88 + 89 + /* 90 + * The flags will not be changed, the vma_modify_flags() function 91 + * performs the merge/split only. 92 + */ 93 + vma = vma_modify_flags(&vmi, init_vma, init_vma, 94 + 0x1000, 0x2000, VM_READ | VM_MAYREAD); 95 + ASSERT_NE(vma, NULL); 96 + /* We modify the provided VMA, and on split allocate new VMAs. */ 97 + ASSERT_EQ(vma, init_vma); 98 + 99 + ASSERT_EQ(vma->vm_start, 0x1000); 100 + ASSERT_EQ(vma->vm_end, 0x2000); 101 + ASSERT_EQ(vma->vm_pgoff, 1); 102 + 103 + /* 104 + * Now walk through the three split VMAs and make sure they are as 105 + * expected. 106 + */ 107 + 108 + vma_iter_set(&vmi, 0); 109 + vma = vma_iter_load(&vmi); 110 + 111 + ASSERT_EQ(vma->vm_start, 0); 112 + ASSERT_EQ(vma->vm_end, 0x1000); 113 + ASSERT_EQ(vma->vm_pgoff, 0); 114 + 115 + vm_area_free(vma); 116 + vma_iter_clear(&vmi); 117 + 118 + vma = vma_next(&vmi); 119 + 120 + ASSERT_EQ(vma->vm_start, 0x1000); 121 + ASSERT_EQ(vma->vm_end, 0x2000); 122 + ASSERT_EQ(vma->vm_pgoff, 1); 123 + 124 + vm_area_free(vma); 125 + vma_iter_clear(&vmi); 126 + 127 + vma = vma_next(&vmi); 128 + 129 + ASSERT_EQ(vma->vm_start, 0x2000); 130 + ASSERT_EQ(vma->vm_end, 0x3000); 131 + ASSERT_EQ(vma->vm_pgoff, 2); 132 + 133 + vm_area_free(vma); 134 + mtree_destroy(&mm.mm_mt); 135 + 136 + return true; 137 + } 138 + 139 + static bool test_simple_expand(void) 140 + { 141 + unsigned long flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; 142 + struct mm_struct mm = {}; 143 + struct vm_area_struct *vma = alloc_vma(&mm, 0, 0x1000, 0, flags); 144 + VMA_ITERATOR(vmi, &mm, 0); 145 + 146 + ASSERT_FALSE(vma_link(&mm, vma)); 147 + 148 + ASSERT_FALSE(vma_expand(&vmi, vma, 0, 0x3000, 0, NULL)); 149 + 150 + ASSERT_EQ(vma->vm_start, 0); 151 + ASSERT_EQ(vma->vm_end, 0x3000); 152 + ASSERT_EQ(vma->vm_pgoff, 0); 153 + 154 + vm_area_free(vma); 155 + mtree_destroy(&mm.mm_mt); 156 + 157 + return true; 158 + } 159 + 160 + static bool test_simple_shrink(void) 161 + { 162 + unsigned long flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; 163 + struct mm_struct mm = {}; 164 + struct vm_area_struct *vma = alloc_vma(&mm, 0, 0x3000, 0, flags); 165 + VMA_ITERATOR(vmi, &mm, 0); 166 + 167 + ASSERT_FALSE(vma_link(&mm, vma)); 168 + 169 + ASSERT_FALSE(vma_shrink(&vmi, vma, 0, 0x1000, 0)); 170 + 171 + ASSERT_EQ(vma->vm_start, 0); 172 + ASSERT_EQ(vma->vm_end, 0x1000); 173 + ASSERT_EQ(vma->vm_pgoff, 0); 174 + 175 + vm_area_free(vma); 176 + mtree_destroy(&mm.mm_mt); 177 + 178 + return true; 179 + } 180 + 181 + int main(void) 182 + { 183 + int num_tests = 0, num_fail = 0; 184 + 185 + maple_tree_init(); 186 + 187 + #define TEST(name) \ 188 + do { \ 189 + num_tests++; \ 190 + if (!test_##name()) { \ 191 + num_fail++; \ 192 + fprintf(stderr, "Test " #name " FAILED\n"); \ 193 + } \ 194 + } while (0) 195 + 196 + TEST(simple_merge); 197 + TEST(simple_modify); 198 + TEST(simple_expand); 199 + TEST(simple_shrink); 200 + 201 + #undef TEST 202 + 203 + printf("%d tests run, %d passed, %d failed.\n", 204 + num_tests, num_tests - num_fail, num_fail); 205 + 206 + return num_fail == 0 ? EXIT_SUCCESS : EXIT_FAILURE; 207 + }
+882
tools/testing/vma/vma_internal.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0+ */ 2 + /* 3 + * vma_internal.h 4 + * 5 + * Header providing userland wrappers and shims for the functionality provided 6 + * by mm/vma_internal.h. 7 + * 8 + * We make the header guard the same as mm/vma_internal.h, so if this shim 9 + * header is included, it precludes the inclusion of the kernel one. 10 + */ 11 + 12 + #ifndef __MM_VMA_INTERNAL_H 13 + #define __MM_VMA_INTERNAL_H 14 + 15 + #define __private 16 + #define __bitwise 17 + #define __randomize_layout 18 + 19 + #define CONFIG_MMU 20 + #define CONFIG_PER_VMA_LOCK 21 + 22 + #include <stdlib.h> 23 + 24 + #include <linux/list.h> 25 + #include <linux/maple_tree.h> 26 + #include <linux/mm.h> 27 + #include <linux/rbtree.h> 28 + #include <linux/rwsem.h> 29 + 30 + #define VM_WARN_ON(_expr) (WARN_ON(_expr)) 31 + #define VM_WARN_ON_ONCE(_expr) (WARN_ON_ONCE(_expr)) 32 + #define VM_BUG_ON(_expr) (BUG_ON(_expr)) 33 + #define VM_BUG_ON_VMA(_expr, _vma) (BUG_ON(_expr)) 34 + 35 + #define VM_NONE 0x00000000 36 + #define VM_READ 0x00000001 37 + #define VM_WRITE 0x00000002 38 + #define VM_EXEC 0x00000004 39 + #define VM_SHARED 0x00000008 40 + #define VM_MAYREAD 0x00000010 41 + #define VM_MAYWRITE 0x00000020 42 + #define VM_GROWSDOWN 0x00000100 43 + #define VM_PFNMAP 0x00000400 44 + #define VM_LOCKED 0x00002000 45 + #define VM_IO 0x00004000 46 + #define VM_DONTEXPAND 0x00040000 47 + #define VM_ACCOUNT 0x00100000 48 + #define VM_MIXEDMAP 0x10000000 49 + #define VM_STACK VM_GROWSDOWN 50 + #define VM_SHADOW_STACK VM_NONE 51 + #define VM_SOFTDIRTY 0 52 + 53 + #define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC) 54 + #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP) 55 + 56 + #define FIRST_USER_ADDRESS 0UL 57 + #define USER_PGTABLES_CEILING 0UL 58 + 59 + #define vma_policy(vma) NULL 60 + 61 + #define down_write_nest_lock(sem, nest_lock) 62 + 63 + #define pgprot_val(x) ((x).pgprot) 64 + #define __pgprot(x) ((pgprot_t) { (x) } ) 65 + 66 + #define for_each_vma(__vmi, __vma) \ 67 + while (((__vma) = vma_next(&(__vmi))) != NULL) 68 + 69 + /* The MM code likes to work with exclusive end addresses */ 70 + #define for_each_vma_range(__vmi, __vma, __end) \ 71 + while (((__vma) = vma_find(&(__vmi), (__end))) != NULL) 72 + 73 + #define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) 74 + 75 + #define PHYS_PFN(x) ((unsigned long)((x) >> PAGE_SHIFT)) 76 + 77 + #define test_and_set_bit(nr, addr) __test_and_set_bit(nr, addr) 78 + #define test_and_clear_bit(nr, addr) __test_and_clear_bit(nr, addr) 79 + 80 + #define TASK_SIZE ((1ul << 47)-PAGE_SIZE) 81 + 82 + #define AS_MM_ALL_LOCKS 2 83 + 84 + #define current NULL 85 + 86 + /* We hardcode this for now. */ 87 + #define sysctl_max_map_count 0x1000000UL 88 + 89 + #define pgoff_t unsigned long 90 + typedef unsigned long pgprotval_t; 91 + typedef struct pgprot { pgprotval_t pgprot; } pgprot_t; 92 + typedef unsigned long vm_flags_t; 93 + typedef __bitwise unsigned int vm_fault_t; 94 + 95 + typedef struct refcount_struct { 96 + atomic_t refs; 97 + } refcount_t; 98 + 99 + struct kref { 100 + refcount_t refcount; 101 + }; 102 + 103 + struct anon_vma { 104 + struct anon_vma *root; 105 + struct rb_root_cached rb_root; 106 + }; 107 + 108 + struct anon_vma_chain { 109 + struct anon_vma *anon_vma; 110 + struct list_head same_vma; 111 + }; 112 + 113 + struct anon_vma_name { 114 + struct kref kref; 115 + /* The name needs to be at the end because it is dynamically sized. */ 116 + char name[]; 117 + }; 118 + 119 + struct vma_iterator { 120 + struct ma_state mas; 121 + }; 122 + 123 + #define VMA_ITERATOR(name, __mm, __addr) \ 124 + struct vma_iterator name = { \ 125 + .mas = { \ 126 + .tree = &(__mm)->mm_mt, \ 127 + .index = __addr, \ 128 + .node = NULL, \ 129 + .status = ma_start, \ 130 + }, \ 131 + } 132 + 133 + struct address_space { 134 + struct rb_root_cached i_mmap; 135 + unsigned long flags; 136 + atomic_t i_mmap_writable; 137 + }; 138 + 139 + struct vm_userfaultfd_ctx {}; 140 + struct mempolicy {}; 141 + struct mmu_gather {}; 142 + struct mutex {}; 143 + #define DEFINE_MUTEX(mutexname) \ 144 + struct mutex mutexname = {} 145 + 146 + struct mm_struct { 147 + struct maple_tree mm_mt; 148 + int map_count; /* number of VMAs */ 149 + unsigned long total_vm; /* Total pages mapped */ 150 + unsigned long locked_vm; /* Pages that have PG_mlocked set */ 151 + unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */ 152 + unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */ 153 + unsigned long stack_vm; /* VM_STACK */ 154 + }; 155 + 156 + struct vma_lock { 157 + struct rw_semaphore lock; 158 + }; 159 + 160 + 161 + struct file { 162 + struct address_space *f_mapping; 163 + }; 164 + 165 + struct vm_area_struct { 166 + /* The first cache line has the info for VMA tree walking. */ 167 + 168 + union { 169 + struct { 170 + /* VMA covers [vm_start; vm_end) addresses within mm */ 171 + unsigned long vm_start; 172 + unsigned long vm_end; 173 + }; 174 + #ifdef CONFIG_PER_VMA_LOCK 175 + struct rcu_head vm_rcu; /* Used for deferred freeing. */ 176 + #endif 177 + }; 178 + 179 + struct mm_struct *vm_mm; /* The address space we belong to. */ 180 + pgprot_t vm_page_prot; /* Access permissions of this VMA. */ 181 + 182 + /* 183 + * Flags, see mm.h. 184 + * To modify use vm_flags_{init|reset|set|clear|mod} functions. 185 + */ 186 + union { 187 + const vm_flags_t vm_flags; 188 + vm_flags_t __private __vm_flags; 189 + }; 190 + 191 + #ifdef CONFIG_PER_VMA_LOCK 192 + /* Flag to indicate areas detached from the mm->mm_mt tree */ 193 + bool detached; 194 + 195 + /* 196 + * Can only be written (using WRITE_ONCE()) while holding both: 197 + * - mmap_lock (in write mode) 198 + * - vm_lock->lock (in write mode) 199 + * Can be read reliably while holding one of: 200 + * - mmap_lock (in read or write mode) 201 + * - vm_lock->lock (in read or write mode) 202 + * Can be read unreliably (using READ_ONCE()) for pessimistic bailout 203 + * while holding nothing (except RCU to keep the VMA struct allocated). 204 + * 205 + * This sequence counter is explicitly allowed to overflow; sequence 206 + * counter reuse can only lead to occasional unnecessary use of the 207 + * slowpath. 208 + */ 209 + int vm_lock_seq; 210 + struct vma_lock *vm_lock; 211 + #endif 212 + 213 + /* 214 + * For areas with an address space and backing store, 215 + * linkage into the address_space->i_mmap interval tree. 216 + * 217 + */ 218 + struct { 219 + struct rb_node rb; 220 + unsigned long rb_subtree_last; 221 + } shared; 222 + 223 + /* 224 + * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma 225 + * list, after a COW of one of the file pages. A MAP_SHARED vma 226 + * can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack 227 + * or brk vma (with NULL file) can only be in an anon_vma list. 228 + */ 229 + struct list_head anon_vma_chain; /* Serialized by mmap_lock & 230 + * page_table_lock */ 231 + struct anon_vma *anon_vma; /* Serialized by page_table_lock */ 232 + 233 + /* Function pointers to deal with this struct. */ 234 + const struct vm_operations_struct *vm_ops; 235 + 236 + /* Information about our backing store: */ 237 + unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE 238 + units */ 239 + struct file * vm_file; /* File we map to (can be NULL). */ 240 + void * vm_private_data; /* was vm_pte (shared mem) */ 241 + 242 + #ifdef CONFIG_ANON_VMA_NAME 243 + /* 244 + * For private and shared anonymous mappings, a pointer to a null 245 + * terminated string containing the name given to the vma, or NULL if 246 + * unnamed. Serialized by mmap_lock. Use anon_vma_name to access. 247 + */ 248 + struct anon_vma_name *anon_name; 249 + #endif 250 + #ifdef CONFIG_SWAP 251 + atomic_long_t swap_readahead_info; 252 + #endif 253 + #ifndef CONFIG_MMU 254 + struct vm_region *vm_region; /* NOMMU mapping region */ 255 + #endif 256 + #ifdef CONFIG_NUMA 257 + struct mempolicy *vm_policy; /* NUMA policy for the VMA */ 258 + #endif 259 + #ifdef CONFIG_NUMA_BALANCING 260 + struct vma_numab_state *numab_state; /* NUMA Balancing state */ 261 + #endif 262 + struct vm_userfaultfd_ctx vm_userfaultfd_ctx; 263 + } __randomize_layout; 264 + 265 + struct vm_fault {}; 266 + 267 + struct vm_operations_struct { 268 + void (*open)(struct vm_area_struct * area); 269 + /** 270 + * @close: Called when the VMA is being removed from the MM. 271 + * Context: User context. May sleep. Caller holds mmap_lock. 272 + */ 273 + void (*close)(struct vm_area_struct * area); 274 + /* Called any time before splitting to check if it's allowed */ 275 + int (*may_split)(struct vm_area_struct *area, unsigned long addr); 276 + int (*mremap)(struct vm_area_struct *area); 277 + /* 278 + * Called by mprotect() to make driver-specific permission 279 + * checks before mprotect() is finalised. The VMA must not 280 + * be modified. Returns 0 if mprotect() can proceed. 281 + */ 282 + int (*mprotect)(struct vm_area_struct *vma, unsigned long start, 283 + unsigned long end, unsigned long newflags); 284 + vm_fault_t (*fault)(struct vm_fault *vmf); 285 + vm_fault_t (*huge_fault)(struct vm_fault *vmf, unsigned int order); 286 + vm_fault_t (*map_pages)(struct vm_fault *vmf, 287 + pgoff_t start_pgoff, pgoff_t end_pgoff); 288 + unsigned long (*pagesize)(struct vm_area_struct * area); 289 + 290 + /* notification that a previously read-only page is about to become 291 + * writable, if an error is returned it will cause a SIGBUS */ 292 + vm_fault_t (*page_mkwrite)(struct vm_fault *vmf); 293 + 294 + /* same as page_mkwrite when using VM_PFNMAP|VM_MIXEDMAP */ 295 + vm_fault_t (*pfn_mkwrite)(struct vm_fault *vmf); 296 + 297 + /* called by access_process_vm when get_user_pages() fails, typically 298 + * for use by special VMAs. See also generic_access_phys() for a generic 299 + * implementation useful for any iomem mapping. 300 + */ 301 + int (*access)(struct vm_area_struct *vma, unsigned long addr, 302 + void *buf, int len, int write); 303 + 304 + /* Called by the /proc/PID/maps code to ask the vma whether it 305 + * has a special name. Returning non-NULL will also cause this 306 + * vma to be dumped unconditionally. */ 307 + const char *(*name)(struct vm_area_struct *vma); 308 + 309 + #ifdef CONFIG_NUMA 310 + /* 311 + * set_policy() op must add a reference to any non-NULL @new mempolicy 312 + * to hold the policy upon return. Caller should pass NULL @new to 313 + * remove a policy and fall back to surrounding context--i.e. do not 314 + * install a MPOL_DEFAULT policy, nor the task or system default 315 + * mempolicy. 316 + */ 317 + int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new); 318 + 319 + /* 320 + * get_policy() op must add reference [mpol_get()] to any policy at 321 + * (vma,addr) marked as MPOL_SHARED. The shared policy infrastructure 322 + * in mm/mempolicy.c will do this automatically. 323 + * get_policy() must NOT add a ref if the policy at (vma,addr) is not 324 + * marked as MPOL_SHARED. vma policies are protected by the mmap_lock. 325 + * If no [shared/vma] mempolicy exists at the addr, get_policy() op 326 + * must return NULL--i.e., do not "fallback" to task or system default 327 + * policy. 328 + */ 329 + struct mempolicy *(*get_policy)(struct vm_area_struct *vma, 330 + unsigned long addr, pgoff_t *ilx); 331 + #endif 332 + /* 333 + * Called by vm_normal_page() for special PTEs to find the 334 + * page for @addr. This is useful if the default behavior 335 + * (using pte_page()) would not find the correct page. 336 + */ 337 + struct page *(*find_special_page)(struct vm_area_struct *vma, 338 + unsigned long addr); 339 + }; 340 + 341 + static inline void vma_iter_invalidate(struct vma_iterator *vmi) 342 + { 343 + mas_pause(&vmi->mas); 344 + } 345 + 346 + static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) 347 + { 348 + return __pgprot(pgprot_val(oldprot) | pgprot_val(newprot)); 349 + } 350 + 351 + static inline pgprot_t vm_get_page_prot(unsigned long vm_flags) 352 + { 353 + return __pgprot(vm_flags); 354 + } 355 + 356 + static inline bool is_shared_maywrite(vm_flags_t vm_flags) 357 + { 358 + return (vm_flags & (VM_SHARED | VM_MAYWRITE)) == 359 + (VM_SHARED | VM_MAYWRITE); 360 + } 361 + 362 + static inline bool vma_is_shared_maywrite(struct vm_area_struct *vma) 363 + { 364 + return is_shared_maywrite(vma->vm_flags); 365 + } 366 + 367 + static inline struct vm_area_struct *vma_next(struct vma_iterator *vmi) 368 + { 369 + /* 370 + * Uses mas_find() to get the first VMA when the iterator starts. 371 + * Calling mas_next() could skip the first entry. 372 + */ 373 + return mas_find(&vmi->mas, ULONG_MAX); 374 + } 375 + 376 + static inline bool vma_lock_alloc(struct vm_area_struct *vma) 377 + { 378 + vma->vm_lock = calloc(1, sizeof(struct vma_lock)); 379 + 380 + if (!vma->vm_lock) 381 + return false; 382 + 383 + init_rwsem(&vma->vm_lock->lock); 384 + vma->vm_lock_seq = -1; 385 + 386 + return true; 387 + } 388 + 389 + static inline void vma_assert_write_locked(struct vm_area_struct *); 390 + static inline void vma_mark_detached(struct vm_area_struct *vma, bool detached) 391 + { 392 + /* When detaching vma should be write-locked */ 393 + if (detached) 394 + vma_assert_write_locked(vma); 395 + vma->detached = detached; 396 + } 397 + 398 + extern const struct vm_operations_struct vma_dummy_vm_ops; 399 + 400 + static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) 401 + { 402 + memset(vma, 0, sizeof(*vma)); 403 + vma->vm_mm = mm; 404 + vma->vm_ops = &vma_dummy_vm_ops; 405 + INIT_LIST_HEAD(&vma->anon_vma_chain); 406 + vma_mark_detached(vma, false); 407 + } 408 + 409 + static inline struct vm_area_struct *vm_area_alloc(struct mm_struct *mm) 410 + { 411 + struct vm_area_struct *vma = calloc(1, sizeof(struct vm_area_struct)); 412 + 413 + if (!vma) 414 + return NULL; 415 + 416 + vma_init(vma, mm); 417 + if (!vma_lock_alloc(vma)) { 418 + free(vma); 419 + return NULL; 420 + } 421 + 422 + return vma; 423 + } 424 + 425 + static inline struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig) 426 + { 427 + struct vm_area_struct *new = calloc(1, sizeof(struct vm_area_struct)); 428 + 429 + if (!new) 430 + return NULL; 431 + 432 + memcpy(new, orig, sizeof(*new)); 433 + if (!vma_lock_alloc(new)) { 434 + free(new); 435 + return NULL; 436 + } 437 + INIT_LIST_HEAD(&new->anon_vma_chain); 438 + 439 + return new; 440 + } 441 + 442 + /* 443 + * These are defined in vma.h, but sadly vm_stat_account() is referenced by 444 + * kernel/fork.c, so we have to these broadly available there, and temporarily 445 + * define them here to resolve the dependency cycle. 446 + */ 447 + 448 + #define is_exec_mapping(flags) \ 449 + ((flags & (VM_EXEC | VM_WRITE | VM_STACK)) == VM_EXEC) 450 + 451 + #define is_stack_mapping(flags) \ 452 + (((flags & VM_STACK) == VM_STACK) || (flags & VM_SHADOW_STACK)) 453 + 454 + #define is_data_mapping(flags) \ 455 + ((flags & (VM_WRITE | VM_SHARED | VM_STACK)) == VM_WRITE) 456 + 457 + static inline void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, 458 + long npages) 459 + { 460 + WRITE_ONCE(mm->total_vm, READ_ONCE(mm->total_vm)+npages); 461 + 462 + if (is_exec_mapping(flags)) 463 + mm->exec_vm += npages; 464 + else if (is_stack_mapping(flags)) 465 + mm->stack_vm += npages; 466 + else if (is_data_mapping(flags)) 467 + mm->data_vm += npages; 468 + } 469 + 470 + #undef is_exec_mapping 471 + #undef is_stack_mapping 472 + #undef is_data_mapping 473 + 474 + /* Currently stubbed but we may later wish to un-stub. */ 475 + static inline void vm_acct_memory(long pages); 476 + static inline void vm_unacct_memory(long pages) 477 + { 478 + vm_acct_memory(-pages); 479 + } 480 + 481 + static inline void mapping_allow_writable(struct address_space *mapping) 482 + { 483 + atomic_inc(&mapping->i_mmap_writable); 484 + } 485 + 486 + static inline void vma_set_range(struct vm_area_struct *vma, 487 + unsigned long start, unsigned long end, 488 + pgoff_t pgoff) 489 + { 490 + vma->vm_start = start; 491 + vma->vm_end = end; 492 + vma->vm_pgoff = pgoff; 493 + } 494 + 495 + static inline 496 + struct vm_area_struct *vma_find(struct vma_iterator *vmi, unsigned long max) 497 + { 498 + return mas_find(&vmi->mas, max - 1); 499 + } 500 + 501 + static inline int vma_iter_clear_gfp(struct vma_iterator *vmi, 502 + unsigned long start, unsigned long end, gfp_t gfp) 503 + { 504 + __mas_set_range(&vmi->mas, start, end - 1); 505 + mas_store_gfp(&vmi->mas, NULL, gfp); 506 + if (unlikely(mas_is_err(&vmi->mas))) 507 + return -ENOMEM; 508 + 509 + return 0; 510 + } 511 + 512 + static inline void mmap_assert_locked(struct mm_struct *); 513 + static inline struct vm_area_struct *find_vma_intersection(struct mm_struct *mm, 514 + unsigned long start_addr, 515 + unsigned long end_addr) 516 + { 517 + unsigned long index = start_addr; 518 + 519 + mmap_assert_locked(mm); 520 + return mt_find(&mm->mm_mt, &index, end_addr - 1); 521 + } 522 + 523 + static inline 524 + struct vm_area_struct *vma_lookup(struct mm_struct *mm, unsigned long addr) 525 + { 526 + return mtree_load(&mm->mm_mt, addr); 527 + } 528 + 529 + static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi) 530 + { 531 + return mas_prev(&vmi->mas, 0); 532 + } 533 + 534 + static inline void vma_iter_set(struct vma_iterator *vmi, unsigned long addr) 535 + { 536 + mas_set(&vmi->mas, addr); 537 + } 538 + 539 + static inline bool vma_is_anonymous(struct vm_area_struct *vma) 540 + { 541 + return !vma->vm_ops; 542 + } 543 + 544 + /* Defined in vma.h, so temporarily define here to avoid circular dependency. */ 545 + #define vma_iter_load(vmi) \ 546 + mas_walk(&(vmi)->mas) 547 + 548 + static inline struct vm_area_struct * 549 + find_vma_prev(struct mm_struct *mm, unsigned long addr, 550 + struct vm_area_struct **pprev) 551 + { 552 + struct vm_area_struct *vma; 553 + VMA_ITERATOR(vmi, mm, addr); 554 + 555 + vma = vma_iter_load(&vmi); 556 + *pprev = vma_prev(&vmi); 557 + if (!vma) 558 + vma = vma_next(&vmi); 559 + return vma; 560 + } 561 + 562 + #undef vma_iter_load 563 + 564 + static inline void vma_iter_init(struct vma_iterator *vmi, 565 + struct mm_struct *mm, unsigned long addr) 566 + { 567 + mas_init(&vmi->mas, &mm->mm_mt, addr); 568 + } 569 + 570 + /* Stubbed functions. */ 571 + 572 + static inline struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma) 573 + { 574 + return NULL; 575 + } 576 + 577 + static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, 578 + struct vm_userfaultfd_ctx vm_ctx) 579 + { 580 + return true; 581 + } 582 + 583 + static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1, 584 + struct anon_vma_name *anon_name2) 585 + { 586 + return true; 587 + } 588 + 589 + static inline void might_sleep(void) 590 + { 591 + } 592 + 593 + static inline unsigned long vma_pages(struct vm_area_struct *vma) 594 + { 595 + return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; 596 + } 597 + 598 + static inline void fput(struct file *) 599 + { 600 + } 601 + 602 + static inline void mpol_put(struct mempolicy *) 603 + { 604 + } 605 + 606 + static inline void vma_lock_free(struct vm_area_struct *vma) 607 + { 608 + free(vma->vm_lock); 609 + } 610 + 611 + static inline void __vm_area_free(struct vm_area_struct *vma) 612 + { 613 + vma_lock_free(vma); 614 + free(vma); 615 + } 616 + 617 + static inline void vm_area_free(struct vm_area_struct *vma) 618 + { 619 + __vm_area_free(vma); 620 + } 621 + 622 + static inline void lru_add_drain(void) 623 + { 624 + } 625 + 626 + static inline void tlb_gather_mmu(struct mmu_gather *, struct mm_struct *) 627 + { 628 + } 629 + 630 + static inline void update_hiwater_rss(struct mm_struct *) 631 + { 632 + } 633 + 634 + static inline void update_hiwater_vm(struct mm_struct *) 635 + { 636 + } 637 + 638 + static inline void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas, 639 + struct vm_area_struct *vma, unsigned long start_addr, 640 + unsigned long end_addr, unsigned long tree_end, 641 + bool mm_wr_locked) 642 + { 643 + (void)tlb; 644 + (void)mas; 645 + (void)vma; 646 + (void)start_addr; 647 + (void)end_addr; 648 + (void)tree_end; 649 + (void)mm_wr_locked; 650 + } 651 + 652 + static inline void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas, 653 + struct vm_area_struct *vma, unsigned long floor, 654 + unsigned long ceiling, bool mm_wr_locked) 655 + { 656 + (void)tlb; 657 + (void)mas; 658 + (void)vma; 659 + (void)floor; 660 + (void)ceiling; 661 + (void)mm_wr_locked; 662 + } 663 + 664 + static inline void mapping_unmap_writable(struct address_space *) 665 + { 666 + } 667 + 668 + static inline void flush_dcache_mmap_lock(struct address_space *) 669 + { 670 + } 671 + 672 + static inline void tlb_finish_mmu(struct mmu_gather *) 673 + { 674 + } 675 + 676 + static inline void get_file(struct file *) 677 + { 678 + } 679 + 680 + static inline int vma_dup_policy(struct vm_area_struct *, struct vm_area_struct *) 681 + { 682 + return 0; 683 + } 684 + 685 + static inline int anon_vma_clone(struct vm_area_struct *, struct vm_area_struct *) 686 + { 687 + return 0; 688 + } 689 + 690 + static inline void vma_start_write(struct vm_area_struct *) 691 + { 692 + } 693 + 694 + static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, 695 + unsigned long start, 696 + unsigned long end, 697 + long adjust_next) 698 + { 699 + (void)vma; 700 + (void)start; 701 + (void)end; 702 + (void)adjust_next; 703 + } 704 + 705 + static inline void vma_iter_free(struct vma_iterator *vmi) 706 + { 707 + mas_destroy(&vmi->mas); 708 + } 709 + 710 + static inline void vm_acct_memory(long pages) 711 + { 712 + } 713 + 714 + static inline void vma_interval_tree_insert(struct vm_area_struct *, 715 + struct rb_root_cached *) 716 + { 717 + } 718 + 719 + static inline void vma_interval_tree_remove(struct vm_area_struct *, 720 + struct rb_root_cached *) 721 + { 722 + } 723 + 724 + static inline void flush_dcache_mmap_unlock(struct address_space *) 725 + { 726 + } 727 + 728 + static inline void anon_vma_interval_tree_insert(struct anon_vma_chain*, 729 + struct rb_root_cached *) 730 + { 731 + } 732 + 733 + static inline void anon_vma_interval_tree_remove(struct anon_vma_chain*, 734 + struct rb_root_cached *) 735 + { 736 + } 737 + 738 + static inline void uprobe_mmap(struct vm_area_struct *) 739 + { 740 + } 741 + 742 + static inline void uprobe_munmap(struct vm_area_struct *vma, 743 + unsigned long start, unsigned long end) 744 + { 745 + (void)vma; 746 + (void)start; 747 + (void)end; 748 + } 749 + 750 + static inline void i_mmap_lock_write(struct address_space *) 751 + { 752 + } 753 + 754 + static inline void anon_vma_lock_write(struct anon_vma *) 755 + { 756 + } 757 + 758 + static inline void vma_assert_write_locked(struct vm_area_struct *) 759 + { 760 + } 761 + 762 + static inline void unlink_anon_vmas(struct vm_area_struct *) 763 + { 764 + } 765 + 766 + static inline void anon_vma_unlock_write(struct anon_vma *) 767 + { 768 + } 769 + 770 + static inline void i_mmap_unlock_write(struct address_space *) 771 + { 772 + } 773 + 774 + static inline void anon_vma_merge(struct vm_area_struct *, 775 + struct vm_area_struct *) 776 + { 777 + } 778 + 779 + static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma, 780 + unsigned long start, 781 + unsigned long end, 782 + struct list_head *unmaps) 783 + { 784 + (void)vma; 785 + (void)start; 786 + (void)end; 787 + (void)unmaps; 788 + 789 + return 0; 790 + } 791 + 792 + static inline void mmap_write_downgrade(struct mm_struct *) 793 + { 794 + } 795 + 796 + static inline void mmap_read_unlock(struct mm_struct *) 797 + { 798 + } 799 + 800 + static inline void mmap_write_unlock(struct mm_struct *) 801 + { 802 + } 803 + 804 + static inline bool can_modify_mm(struct mm_struct *mm, 805 + unsigned long start, 806 + unsigned long end) 807 + { 808 + (void)mm; 809 + (void)start; 810 + (void)end; 811 + 812 + return true; 813 + } 814 + 815 + static inline void arch_unmap(struct mm_struct *mm, 816 + unsigned long start, 817 + unsigned long end) 818 + { 819 + (void)mm; 820 + (void)start; 821 + (void)end; 822 + } 823 + 824 + static inline void mmap_assert_locked(struct mm_struct *) 825 + { 826 + } 827 + 828 + static inline bool mpol_equal(struct mempolicy *, struct mempolicy *) 829 + { 830 + return true; 831 + } 832 + 833 + static inline void khugepaged_enter_vma(struct vm_area_struct *vma, 834 + unsigned long vm_flags) 835 + { 836 + (void)vma; 837 + (void)vm_flags; 838 + } 839 + 840 + static inline bool mapping_can_writeback(struct address_space *) 841 + { 842 + return true; 843 + } 844 + 845 + static inline bool is_vm_hugetlb_page(struct vm_area_struct *) 846 + { 847 + return false; 848 + } 849 + 850 + static inline bool vma_soft_dirty_enabled(struct vm_area_struct *) 851 + { 852 + return false; 853 + } 854 + 855 + static inline bool userfaultfd_wp(struct vm_area_struct *) 856 + { 857 + return false; 858 + } 859 + 860 + static inline void mmap_assert_write_locked(struct mm_struct *) 861 + { 862 + } 863 + 864 + static inline void mutex_lock(struct mutex *) 865 + { 866 + } 867 + 868 + static inline void mutex_unlock(struct mutex *) 869 + { 870 + } 871 + 872 + static inline bool mutex_is_locked(struct mutex *) 873 + { 874 + return true; 875 + } 876 + 877 + static inline bool signal_pending(void *) 878 + { 879 + return false; 880 + } 881 + 882 + #endif /* __MM_VMA_INTERNAL_H */