Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

mm: larger stack guard gap, between vmas

Stack guard page is a useful feature to reduce a risk of stack smashing
into a different mapping. We have been using a single page gap which
is sufficient to prevent having stack adjacent to a different mapping.
But this seems to be insufficient in the light of the stack usage in
userspace. E.g. glibc uses as large as 64kB alloca() in many commonly
used functions. Others use constructs liks gid_t buffer[NGROUPS_MAX]
which is 256kB or stack strings with MAX_ARG_STRLEN.

This will become especially dangerous for suid binaries and the default
no limit for the stack size limit because those applications can be
tricked to consume a large portion of the stack and a single glibc call
could jump over the guard page. These attacks are not theoretical,
unfortunatelly.

Make those attacks less probable by increasing the stack guard gap
to 1MB (on systems with 4k pages; but make it depend on the page size
because systems with larger base pages might cap stack allocations in
the PAGE_SIZE units) which should cover larger alloca() and VLA stack
allocations. It is obviously not a full fix because the problem is
somehow inherent, but it should reduce attack space a lot.

One could argue that the gap size should be configurable from userspace,
but that can be done later when somebody finds that the new 1MB is wrong
for some special case applications. For now, add a kernel command line
option (stack_guard_gap) to specify the stack gap size (in page units).

Implementation wise, first delete all the old code for stack guard page:
because although we could get away with accounting one extra page in a
stack vma, accounting a larger gap can break userspace - case in point,
a program run with "ulimit -S -v 20000" failed when the 1MB gap was
counted for RLIMIT_AS; similar problems could come with RLIMIT_MLOCK
and strict non-overcommit mode.

Instead of keeping gap inside the stack vma, maintain the stack guard
gap as a gap between vmas: using vm_start_gap() in place of vm_start
(or vm_end_gap() in place of vm_end if VM_GROWSUP) in just those few
places which need to respect the gap - mainly arch_get_unmapped_area(),
and and the vma tree's subtree_gap support for that.

Original-patch-by: Oleg Nesterov <oleg@redhat.com>
Original-patch-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Tested-by: Helge Deller <deller@gmx.de> # parisc
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Hugh Dickins and committed by
Linus Torvalds
1be7107f 1132d5e7

+152 -163
+7
Documentation/admin-guide/kernel-parameters.txt
··· 3811 3811 expediting. Set to zero to disable automatic 3812 3812 expediting. 3813 3813 3814 + stack_guard_gap= [MM] 3815 + override the default stack gap protection. The value 3816 + is in page units and it defines how many pages prior 3817 + to (for stacks growing down) resp. after (for stacks 3818 + growing up) the main stack are reserved for no other 3819 + mapping. Default value is 256 pages. 3820 + 3814 3821 stacktrace [FTRACE] 3815 3822 Enabled the stack tracer on boot up. 3816 3823
+1 -1
arch/arc/mm/mmap.c
··· 65 65 66 66 vma = find_vma(mm, addr); 67 67 if (TASK_SIZE - len >= addr && 68 - (!vma || addr + len <= vma->vm_start)) 68 + (!vma || addr + len <= vm_start_gap(vma))) 69 69 return addr; 70 70 } 71 71
+2 -2
arch/arm/mm/mmap.c
··· 90 90 91 91 vma = find_vma(mm, addr); 92 92 if (TASK_SIZE - len >= addr && 93 - (!vma || addr + len <= vma->vm_start)) 93 + (!vma || addr + len <= vm_start_gap(vma))) 94 94 return addr; 95 95 } 96 96 ··· 141 141 addr = PAGE_ALIGN(addr); 142 142 vma = find_vma(mm, addr); 143 143 if (TASK_SIZE - len >= addr && 144 - (!vma || addr + len <= vma->vm_start)) 144 + (!vma || addr + len <= vm_start_gap(vma))) 145 145 return addr; 146 146 } 147 147
+1 -1
arch/frv/mm/elf-fdpic.c
··· 75 75 addr = PAGE_ALIGN(addr); 76 76 vma = find_vma(current->mm, addr); 77 77 if (TASK_SIZE - len >= addr && 78 - (!vma || addr + len <= vma->vm_start)) 78 + (!vma || addr + len <= vm_start_gap(vma))) 79 79 goto success; 80 80 } 81 81
+1 -1
arch/mips/mm/mmap.c
··· 93 93 94 94 vma = find_vma(mm, addr); 95 95 if (TASK_SIZE - len >= addr && 96 - (!vma || addr + len <= vma->vm_start)) 96 + (!vma || addr + len <= vm_start_gap(vma))) 97 97 return addr; 98 98 } 99 99
+9 -6
arch/parisc/kernel/sys_parisc.c
··· 90 90 unsigned long len, unsigned long pgoff, unsigned long flags) 91 91 { 92 92 struct mm_struct *mm = current->mm; 93 - struct vm_area_struct *vma; 93 + struct vm_area_struct *vma, *prev; 94 94 unsigned long task_size = TASK_SIZE; 95 95 int do_color_align, last_mmap; 96 96 struct vm_unmapped_area_info info; ··· 117 117 else 118 118 addr = PAGE_ALIGN(addr); 119 119 120 - vma = find_vma(mm, addr); 120 + vma = find_vma_prev(mm, addr, &prev); 121 121 if (task_size - len >= addr && 122 - (!vma || addr + len <= vma->vm_start)) 122 + (!vma || addr + len <= vm_start_gap(vma)) && 123 + (!prev || addr >= vm_end_gap(prev))) 123 124 goto found_addr; 124 125 } 125 126 ··· 144 143 const unsigned long len, const unsigned long pgoff, 145 144 const unsigned long flags) 146 145 { 147 - struct vm_area_struct *vma; 146 + struct vm_area_struct *vma, *prev; 148 147 struct mm_struct *mm = current->mm; 149 148 unsigned long addr = addr0; 150 149 int do_color_align, last_mmap; ··· 178 177 addr = COLOR_ALIGN(addr, last_mmap, pgoff); 179 178 else 180 179 addr = PAGE_ALIGN(addr); 181 - vma = find_vma(mm, addr); 180 + 181 + vma = find_vma_prev(mm, addr, &prev); 182 182 if (TASK_SIZE - len >= addr && 183 - (!vma || addr + len <= vma->vm_start)) 183 + (!vma || addr + len <= vm_start_gap(vma)) && 184 + (!prev || addr >= vm_end_gap(prev))) 184 185 goto found_addr; 185 186 } 186 187
+1 -1
arch/powerpc/mm/hugetlbpage-radix.c
··· 68 68 addr = ALIGN(addr, huge_page_size(h)); 69 69 vma = find_vma(mm, addr); 70 70 if (mm->task_size - len >= addr && 71 - (!vma || addr + len <= vma->vm_start)) 71 + (!vma || addr + len <= vm_start_gap(vma))) 72 72 return addr; 73 73 } 74 74 /*
+2 -2
arch/powerpc/mm/mmap.c
··· 112 112 addr = PAGE_ALIGN(addr); 113 113 vma = find_vma(mm, addr); 114 114 if (mm->task_size - len >= addr && addr >= mmap_min_addr && 115 - (!vma || addr + len <= vma->vm_start)) 115 + (!vma || addr + len <= vm_start_gap(vma))) 116 116 return addr; 117 117 } 118 118 ··· 157 157 addr = PAGE_ALIGN(addr); 158 158 vma = find_vma(mm, addr); 159 159 if (mm->task_size - len >= addr && addr >= mmap_min_addr && 160 - (!vma || addr + len <= vma->vm_start)) 160 + (!vma || addr + len <= vm_start_gap(vma))) 161 161 return addr; 162 162 } 163 163
+1 -1
arch/powerpc/mm/slice.c
··· 99 99 if ((mm->task_size - len) < addr) 100 100 return 0; 101 101 vma = find_vma(mm, addr); 102 - return (!vma || (addr + len) <= vma->vm_start); 102 + return (!vma || (addr + len) <= vm_start_gap(vma)); 103 103 } 104 104 105 105 static int slice_low_has_vma(struct mm_struct *mm, unsigned long slice)
+2 -2
arch/s390/mm/mmap.c
··· 101 101 addr = PAGE_ALIGN(addr); 102 102 vma = find_vma(mm, addr); 103 103 if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && 104 - (!vma || addr + len <= vma->vm_start)) 104 + (!vma || addr + len <= vm_start_gap(vma))) 105 105 goto check_asce_limit; 106 106 } 107 107 ··· 151 151 addr = PAGE_ALIGN(addr); 152 152 vma = find_vma(mm, addr); 153 153 if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && 154 - (!vma || addr + len <= vma->vm_start)) 154 + (!vma || addr + len <= vm_start_gap(vma))) 155 155 goto check_asce_limit; 156 156 } 157 157
+2 -2
arch/sh/mm/mmap.c
··· 64 64 65 65 vma = find_vma(mm, addr); 66 66 if (TASK_SIZE - len >= addr && 67 - (!vma || addr + len <= vma->vm_start)) 67 + (!vma || addr + len <= vm_start_gap(vma))) 68 68 return addr; 69 69 } 70 70 ··· 114 114 115 115 vma = find_vma(mm, addr); 116 116 if (TASK_SIZE - len >= addr && 117 - (!vma || addr + len <= vma->vm_start)) 117 + (!vma || addr + len <= vm_start_gap(vma))) 118 118 return addr; 119 119 } 120 120
+2 -2
arch/sparc/kernel/sys_sparc_64.c
··· 120 120 121 121 vma = find_vma(mm, addr); 122 122 if (task_size - len >= addr && 123 - (!vma || addr + len <= vma->vm_start)) 123 + (!vma || addr + len <= vm_start_gap(vma))) 124 124 return addr; 125 125 } 126 126 ··· 183 183 184 184 vma = find_vma(mm, addr); 185 185 if (task_size - len >= addr && 186 - (!vma || addr + len <= vma->vm_start)) 186 + (!vma || addr + len <= vm_start_gap(vma))) 187 187 return addr; 188 188 } 189 189
+1 -1
arch/sparc/mm/hugetlbpage.c
··· 120 120 addr = ALIGN(addr, huge_page_size(h)); 121 121 vma = find_vma(mm, addr); 122 122 if (task_size - len >= addr && 123 - (!vma || addr + len <= vma->vm_start)) 123 + (!vma || addr + len <= vm_start_gap(vma))) 124 124 return addr; 125 125 } 126 126 if (mm->get_unmapped_area == arch_get_unmapped_area)
+1 -1
arch/tile/mm/hugetlbpage.c
··· 233 233 addr = ALIGN(addr, huge_page_size(h)); 234 234 vma = find_vma(mm, addr); 235 235 if (TASK_SIZE - len >= addr && 236 - (!vma || addr + len <= vma->vm_start)) 236 + (!vma || addr + len <= vm_start_gap(vma))) 237 237 return addr; 238 238 } 239 239 if (current->mm->get_unmapped_area == arch_get_unmapped_area)
+2 -2
arch/x86/kernel/sys_x86_64.c
··· 144 144 addr = PAGE_ALIGN(addr); 145 145 vma = find_vma(mm, addr); 146 146 if (end - len >= addr && 147 - (!vma || addr + len <= vma->vm_start)) 147 + (!vma || addr + len <= vm_start_gap(vma))) 148 148 return addr; 149 149 } 150 150 ··· 187 187 addr = PAGE_ALIGN(addr); 188 188 vma = find_vma(mm, addr); 189 189 if (TASK_SIZE - len >= addr && 190 - (!vma || addr + len <= vma->vm_start)) 190 + (!vma || addr + len <= vm_start_gap(vma))) 191 191 return addr; 192 192 } 193 193
+1 -1
arch/x86/mm/hugetlbpage.c
··· 148 148 addr = ALIGN(addr, huge_page_size(h)); 149 149 vma = find_vma(mm, addr); 150 150 if (TASK_SIZE - len >= addr && 151 - (!vma || addr + len <= vma->vm_start)) 151 + (!vma || addr + len <= vm_start_gap(vma))) 152 152 return addr; 153 153 } 154 154 if (mm->get_unmapped_area == arch_get_unmapped_area)
+1 -1
arch/xtensa/kernel/syscall.c
··· 88 88 /* At this point: (!vmm || addr < vmm->vm_end). */ 89 89 if (TASK_SIZE - len < addr) 90 90 return -ENOMEM; 91 - if (!vmm || addr + len <= vmm->vm_start) 91 + if (!vmm || addr + len <= vm_start_gap(vmm)) 92 92 return addr; 93 93 addr = vmm->vm_end; 94 94 if (flags & MAP_SHARED)
+1 -1
fs/hugetlbfs/inode.c
··· 200 200 addr = ALIGN(addr, huge_page_size(h)); 201 201 vma = find_vma(mm, addr); 202 202 if (TASK_SIZE - len >= addr && 203 - (!vma || addr + len <= vma->vm_start)) 203 + (!vma || addr + len <= vm_start_gap(vma))) 204 204 return addr; 205 205 } 206 206
-4
fs/proc/task_mmu.c
··· 300 300 301 301 /* We don't show the stack guard page in /proc/maps */ 302 302 start = vma->vm_start; 303 - if (stack_guard_page_start(vma, start)) 304 - start += PAGE_SIZE; 305 303 end = vma->vm_end; 306 - if (stack_guard_page_end(vma, end)) 307 - end -= PAGE_SIZE; 308 304 309 305 seq_setwidth(m, 25 + sizeof(void *) * 6 - 1); 310 306 seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",
+25 -28
include/linux/mm.h
··· 1393 1393 1394 1394 int get_cmdline(struct task_struct *task, char *buffer, int buflen); 1395 1395 1396 - /* Is the vma a continuation of the stack vma above it? */ 1397 - static inline int vma_growsdown(struct vm_area_struct *vma, unsigned long addr) 1398 - { 1399 - return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN); 1400 - } 1401 - 1402 1396 static inline bool vma_is_anonymous(struct vm_area_struct *vma) 1403 1397 { 1404 1398 return !vma->vm_ops; ··· 1407 1413 #else 1408 1414 static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; } 1409 1415 #endif 1410 - 1411 - static inline int stack_guard_page_start(struct vm_area_struct *vma, 1412 - unsigned long addr) 1413 - { 1414 - return (vma->vm_flags & VM_GROWSDOWN) && 1415 - (vma->vm_start == addr) && 1416 - !vma_growsdown(vma->vm_prev, addr); 1417 - } 1418 - 1419 - /* Is the vma a continuation of the stack vma below it? */ 1420 - static inline int vma_growsup(struct vm_area_struct *vma, unsigned long addr) 1421 - { 1422 - return vma && (vma->vm_start == addr) && (vma->vm_flags & VM_GROWSUP); 1423 - } 1424 - 1425 - static inline int stack_guard_page_end(struct vm_area_struct *vma, 1426 - unsigned long addr) 1427 - { 1428 - return (vma->vm_flags & VM_GROWSUP) && 1429 - (vma->vm_end == addr) && 1430 - !vma_growsup(vma->vm_next, addr); 1431 - } 1432 1416 1433 1417 int vma_is_stack_for_current(struct vm_area_struct *vma); 1434 1418 ··· 2194 2222 pgoff_t offset, 2195 2223 unsigned long size); 2196 2224 2225 + extern unsigned long stack_guard_gap; 2197 2226 /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */ 2198 2227 extern int expand_stack(struct vm_area_struct *vma, unsigned long address); 2199 2228 ··· 2221 2248 if (vma && end_addr <= vma->vm_start) 2222 2249 vma = NULL; 2223 2250 return vma; 2251 + } 2252 + 2253 + static inline unsigned long vm_start_gap(struct vm_area_struct *vma) 2254 + { 2255 + unsigned long vm_start = vma->vm_start; 2256 + 2257 + if (vma->vm_flags & VM_GROWSDOWN) { 2258 + vm_start -= stack_guard_gap; 2259 + if (vm_start > vma->vm_start) 2260 + vm_start = 0; 2261 + } 2262 + return vm_start; 2263 + } 2264 + 2265 + static inline unsigned long vm_end_gap(struct vm_area_struct *vma) 2266 + { 2267 + unsigned long vm_end = vma->vm_end; 2268 + 2269 + if (vma->vm_flags & VM_GROWSUP) { 2270 + vm_end += stack_guard_gap; 2271 + if (vm_end < vma->vm_end) 2272 + vm_end = -PAGE_SIZE; 2273 + } 2274 + return vm_end; 2224 2275 } 2225 2276 2226 2277 static inline unsigned long vma_pages(struct vm_area_struct *vma)
-5
mm/gup.c
··· 387 387 /* mlock all present pages, but do not fault in new pages */ 388 388 if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK) 389 389 return -ENOENT; 390 - /* For mm_populate(), just skip the stack guard page. */ 391 - if ((*flags & FOLL_POPULATE) && 392 - (stack_guard_page_start(vma, address) || 393 - stack_guard_page_end(vma, address + PAGE_SIZE))) 394 - return -ENOENT; 395 390 if (*flags & FOLL_WRITE) 396 391 fault_flags |= FAULT_FLAG_WRITE; 397 392 if (*flags & FOLL_REMOTE)
-38
mm/memory.c
··· 2855 2855 } 2856 2856 2857 2857 /* 2858 - * This is like a special single-page "expand_{down|up}wards()", 2859 - * except we must first make sure that 'address{-|+}PAGE_SIZE' 2860 - * doesn't hit another vma. 2861 - */ 2862 - static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned long address) 2863 - { 2864 - address &= PAGE_MASK; 2865 - if ((vma->vm_flags & VM_GROWSDOWN) && address == vma->vm_start) { 2866 - struct vm_area_struct *prev = vma->vm_prev; 2867 - 2868 - /* 2869 - * Is there a mapping abutting this one below? 2870 - * 2871 - * That's only ok if it's the same stack mapping 2872 - * that has gotten split.. 2873 - */ 2874 - if (prev && prev->vm_end == address) 2875 - return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM; 2876 - 2877 - return expand_downwards(vma, address - PAGE_SIZE); 2878 - } 2879 - if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) { 2880 - struct vm_area_struct *next = vma->vm_next; 2881 - 2882 - /* As VM_GROWSDOWN but s/below/above/ */ 2883 - if (next && next->vm_start == address + PAGE_SIZE) 2884 - return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM; 2885 - 2886 - return expand_upwards(vma, address + PAGE_SIZE); 2887 - } 2888 - return 0; 2889 - } 2890 - 2891 - /* 2892 2858 * We enter with non-exclusive mmap_sem (to exclude vma changes, 2893 2859 * but allow concurrent faults), and pte mapped but not yet locked. 2894 2860 * We return with mmap_sem still held, but pte unmapped and unlocked. ··· 2869 2903 /* File mapping without ->vm_ops ? */ 2870 2904 if (vma->vm_flags & VM_SHARED) 2871 2905 return VM_FAULT_SIGBUS; 2872 - 2873 - /* Check if we need to add a guard page to the stack */ 2874 - if (check_stack_guard_page(vma, vmf->address) < 0) 2875 - return VM_FAULT_SIGSEGV; 2876 2906 2877 2907 /* 2878 2908 * Use pte_alloc() instead of pte_alloc_map(). We can't run
+89 -60
mm/mmap.c
··· 183 183 unsigned long retval; 184 184 unsigned long newbrk, oldbrk; 185 185 struct mm_struct *mm = current->mm; 186 + struct vm_area_struct *next; 186 187 unsigned long min_brk; 187 188 bool populate; 188 189 LIST_HEAD(uf); ··· 230 229 } 231 230 232 231 /* Check against existing mmap mappings. */ 233 - if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE)) 232 + next = find_vma(mm, oldbrk); 233 + if (next && newbrk + PAGE_SIZE > vm_start_gap(next)) 234 234 goto out; 235 235 236 236 /* Ok, looks good - let it rip. */ ··· 255 253 256 254 static long vma_compute_subtree_gap(struct vm_area_struct *vma) 257 255 { 258 - unsigned long max, subtree_gap; 259 - max = vma->vm_start; 260 - if (vma->vm_prev) 261 - max -= vma->vm_prev->vm_end; 256 + unsigned long max, prev_end, subtree_gap; 257 + 258 + /* 259 + * Note: in the rare case of a VM_GROWSDOWN above a VM_GROWSUP, we 260 + * allow two stack_guard_gaps between them here, and when choosing 261 + * an unmapped area; whereas when expanding we only require one. 262 + * That's a little inconsistent, but keeps the code here simpler. 263 + */ 264 + max = vm_start_gap(vma); 265 + if (vma->vm_prev) { 266 + prev_end = vm_end_gap(vma->vm_prev); 267 + if (max > prev_end) 268 + max -= prev_end; 269 + else 270 + max = 0; 271 + } 262 272 if (vma->vm_rb.rb_left) { 263 273 subtree_gap = rb_entry(vma->vm_rb.rb_left, 264 274 struct vm_area_struct, vm_rb)->rb_subtree_gap; ··· 366 352 anon_vma_unlock_read(anon_vma); 367 353 } 368 354 369 - highest_address = vma->vm_end; 355 + highest_address = vm_end_gap(vma); 370 356 vma = vma->vm_next; 371 357 i++; 372 358 } ··· 555 541 if (vma->vm_next) 556 542 vma_gap_update(vma->vm_next); 557 543 else 558 - mm->highest_vm_end = vma->vm_end; 544 + mm->highest_vm_end = vm_end_gap(vma); 559 545 560 546 /* 561 547 * vma->vm_prev wasn't known when we followed the rbtree to find the ··· 870 856 vma_gap_update(vma); 871 857 if (end_changed) { 872 858 if (!next) 873 - mm->highest_vm_end = end; 859 + mm->highest_vm_end = vm_end_gap(vma); 874 860 else if (!adjust_next) 875 861 vma_gap_update(next); 876 862 } ··· 955 941 * mm->highest_vm_end doesn't need any update 956 942 * in remove_next == 1 case. 957 943 */ 958 - VM_WARN_ON(mm->highest_vm_end != end); 944 + VM_WARN_ON(mm->highest_vm_end != vm_end_gap(vma)); 959 945 } 960 946 } 961 947 if (insert && file) ··· 1801 1787 1802 1788 while (true) { 1803 1789 /* Visit left subtree if it looks promising */ 1804 - gap_end = vma->vm_start; 1790 + gap_end = vm_start_gap(vma); 1805 1791 if (gap_end >= low_limit && vma->vm_rb.rb_left) { 1806 1792 struct vm_area_struct *left = 1807 1793 rb_entry(vma->vm_rb.rb_left, ··· 1812 1798 } 1813 1799 } 1814 1800 1815 - gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0; 1801 + gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0; 1816 1802 check_current: 1817 1803 /* Check if current node has a suitable gap */ 1818 1804 if (gap_start > high_limit) ··· 1839 1825 vma = rb_entry(rb_parent(prev), 1840 1826 struct vm_area_struct, vm_rb); 1841 1827 if (prev == vma->vm_rb.rb_left) { 1842 - gap_start = vma->vm_prev->vm_end; 1843 - gap_end = vma->vm_start; 1828 + gap_start = vm_end_gap(vma->vm_prev); 1829 + gap_end = vm_start_gap(vma); 1844 1830 goto check_current; 1845 1831 } 1846 1832 } ··· 1904 1890 1905 1891 while (true) { 1906 1892 /* Visit right subtree if it looks promising */ 1907 - gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0; 1893 + gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0; 1908 1894 if (gap_start <= high_limit && vma->vm_rb.rb_right) { 1909 1895 struct vm_area_struct *right = 1910 1896 rb_entry(vma->vm_rb.rb_right, ··· 1917 1903 1918 1904 check_current: 1919 1905 /* Check if current node has a suitable gap */ 1920 - gap_end = vma->vm_start; 1906 + gap_end = vm_start_gap(vma); 1921 1907 if (gap_end < low_limit) 1922 1908 return -ENOMEM; 1923 1909 if (gap_start <= high_limit && gap_end - gap_start >= length) ··· 1943 1929 struct vm_area_struct, vm_rb); 1944 1930 if (prev == vma->vm_rb.rb_right) { 1945 1931 gap_start = vma->vm_prev ? 1946 - vma->vm_prev->vm_end : 0; 1932 + vm_end_gap(vma->vm_prev) : 0; 1947 1933 goto check_current; 1948 1934 } 1949 1935 } ··· 1981 1967 unsigned long len, unsigned long pgoff, unsigned long flags) 1982 1968 { 1983 1969 struct mm_struct *mm = current->mm; 1984 - struct vm_area_struct *vma; 1970 + struct vm_area_struct *vma, *prev; 1985 1971 struct vm_unmapped_area_info info; 1986 1972 1987 1973 if (len > TASK_SIZE - mmap_min_addr) ··· 1992 1978 1993 1979 if (addr) { 1994 1980 addr = PAGE_ALIGN(addr); 1995 - vma = find_vma(mm, addr); 1981 + vma = find_vma_prev(mm, addr, &prev); 1996 1982 if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && 1997 - (!vma || addr + len <= vma->vm_start)) 1983 + (!vma || addr + len <= vm_start_gap(vma)) && 1984 + (!prev || addr >= vm_end_gap(prev))) 1998 1985 return addr; 1999 1986 } 2000 1987 ··· 2018 2003 const unsigned long len, const unsigned long pgoff, 2019 2004 const unsigned long flags) 2020 2005 { 2021 - struct vm_area_struct *vma; 2006 + struct vm_area_struct *vma, *prev; 2022 2007 struct mm_struct *mm = current->mm; 2023 2008 unsigned long addr = addr0; 2024 2009 struct vm_unmapped_area_info info; ··· 2033 2018 /* requesting a specific address */ 2034 2019 if (addr) { 2035 2020 addr = PAGE_ALIGN(addr); 2036 - vma = find_vma(mm, addr); 2021 + vma = find_vma_prev(mm, addr, &prev); 2037 2022 if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && 2038 - (!vma || addr + len <= vma->vm_start)) 2023 + (!vma || addr + len <= vm_start_gap(vma)) && 2024 + (!prev || addr >= vm_end_gap(prev))) 2039 2025 return addr; 2040 2026 } 2041 2027 ··· 2171 2155 * update accounting. This is shared with both the 2172 2156 * grow-up and grow-down cases. 2173 2157 */ 2174 - static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow) 2158 + static int acct_stack_growth(struct vm_area_struct *vma, 2159 + unsigned long size, unsigned long grow) 2175 2160 { 2176 2161 struct mm_struct *mm = vma->vm_mm; 2177 2162 struct rlimit *rlim = current->signal->rlim; 2178 - unsigned long new_start, actual_size; 2163 + unsigned long new_start; 2179 2164 2180 2165 /* address space limit tests */ 2181 2166 if (!may_expand_vm(mm, vma->vm_flags, grow)) 2182 2167 return -ENOMEM; 2183 2168 2184 2169 /* Stack limit test */ 2185 - actual_size = size; 2186 - if (size && (vma->vm_flags & (VM_GROWSUP | VM_GROWSDOWN))) 2187 - actual_size -= PAGE_SIZE; 2188 - if (actual_size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur)) 2170 + if (size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur)) 2189 2171 return -ENOMEM; 2190 2172 2191 2173 /* mlock limit tests */ ··· 2221 2207 int expand_upwards(struct vm_area_struct *vma, unsigned long address) 2222 2208 { 2223 2209 struct mm_struct *mm = vma->vm_mm; 2210 + struct vm_area_struct *next; 2211 + unsigned long gap_addr; 2224 2212 int error = 0; 2225 2213 2226 2214 if (!(vma->vm_flags & VM_GROWSUP)) 2227 2215 return -EFAULT; 2228 2216 2229 2217 /* Guard against wrapping around to address 0. */ 2230 - if (address < PAGE_ALIGN(address+4)) 2231 - address = PAGE_ALIGN(address+4); 2232 - else 2218 + address &= PAGE_MASK; 2219 + address += PAGE_SIZE; 2220 + if (!address) 2233 2221 return -ENOMEM; 2222 + 2223 + /* Enforce stack_guard_gap */ 2224 + gap_addr = address + stack_guard_gap; 2225 + if (gap_addr < address) 2226 + return -ENOMEM; 2227 + next = vma->vm_next; 2228 + if (next && next->vm_start < gap_addr) { 2229 + if (!(next->vm_flags & VM_GROWSUP)) 2230 + return -ENOMEM; 2231 + /* Check that both stack segments have the same anon_vma? */ 2232 + } 2234 2233 2235 2234 /* We must make sure the anon_vma is allocated. */ 2236 2235 if (unlikely(anon_vma_prepare(vma))) ··· 2288 2261 if (vma->vm_next) 2289 2262 vma_gap_update(vma->vm_next); 2290 2263 else 2291 - mm->highest_vm_end = address; 2264 + mm->highest_vm_end = vm_end_gap(vma); 2292 2265 spin_unlock(&mm->page_table_lock); 2293 2266 2294 2267 perf_event_mmap(vma); ··· 2309 2282 unsigned long address) 2310 2283 { 2311 2284 struct mm_struct *mm = vma->vm_mm; 2285 + struct vm_area_struct *prev; 2286 + unsigned long gap_addr; 2312 2287 int error; 2313 2288 2314 2289 address &= PAGE_MASK; 2315 2290 error = security_mmap_addr(address); 2316 2291 if (error) 2317 2292 return error; 2293 + 2294 + /* Enforce stack_guard_gap */ 2295 + gap_addr = address - stack_guard_gap; 2296 + if (gap_addr > address) 2297 + return -ENOMEM; 2298 + prev = vma->vm_prev; 2299 + if (prev && prev->vm_end > gap_addr) { 2300 + if (!(prev->vm_flags & VM_GROWSDOWN)) 2301 + return -ENOMEM; 2302 + /* Check that both stack segments have the same anon_vma? */ 2303 + } 2318 2304 2319 2305 /* We must make sure the anon_vma is allocated. */ 2320 2306 if (unlikely(anon_vma_prepare(vma))) ··· 2383 2343 return error; 2384 2344 } 2385 2345 2386 - /* 2387 - * Note how expand_stack() refuses to expand the stack all the way to 2388 - * abut the next virtual mapping, *unless* that mapping itself is also 2389 - * a stack mapping. We want to leave room for a guard page, after all 2390 - * (the guard page itself is not added here, that is done by the 2391 - * actual page faulting logic) 2392 - * 2393 - * This matches the behavior of the guard page logic (see mm/memory.c: 2394 - * check_stack_guard_page()), which only allows the guard page to be 2395 - * removed under these circumstances. 2396 - */ 2346 + /* enforced gap between the expanding stack and other mappings. */ 2347 + unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT; 2348 + 2349 + static int __init cmdline_parse_stack_guard_gap(char *p) 2350 + { 2351 + unsigned long val; 2352 + char *endptr; 2353 + 2354 + val = simple_strtoul(p, &endptr, 10); 2355 + if (!*endptr) 2356 + stack_guard_gap = val << PAGE_SHIFT; 2357 + 2358 + return 0; 2359 + } 2360 + __setup("stack_guard_gap=", cmdline_parse_stack_guard_gap); 2361 + 2397 2362 #ifdef CONFIG_STACK_GROWSUP 2398 2363 int expand_stack(struct vm_area_struct *vma, unsigned long address) 2399 2364 { 2400 - struct vm_area_struct *next; 2401 - 2402 - address &= PAGE_MASK; 2403 - next = vma->vm_next; 2404 - if (next && next->vm_start == address + PAGE_SIZE) { 2405 - if (!(next->vm_flags & VM_GROWSUP)) 2406 - return -ENOMEM; 2407 - } 2408 2365 return expand_upwards(vma, address); 2409 2366 } 2410 2367 ··· 2423 2386 #else 2424 2387 int expand_stack(struct vm_area_struct *vma, unsigned long address) 2425 2388 { 2426 - struct vm_area_struct *prev; 2427 - 2428 - address &= PAGE_MASK; 2429 - prev = vma->vm_prev; 2430 - if (prev && prev->vm_end == address) { 2431 - if (!(prev->vm_flags & VM_GROWSDOWN)) 2432 - return -ENOMEM; 2433 - } 2434 2389 return expand_downwards(vma, address); 2435 2390 } 2436 2391 ··· 2520 2491 vma->vm_prev = prev; 2521 2492 vma_gap_update(vma); 2522 2493 } else 2523 - mm->highest_vm_end = prev ? prev->vm_end : 0; 2494 + mm->highest_vm_end = prev ? vm_end_gap(prev) : 0; 2524 2495 tail_vma->vm_next = NULL; 2525 2496 2526 2497 /* Kill the cache */