Merge branch 'akpm' (patches from Andrew)

tjh.dev / kernel

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Merge branch 'akpm' (patches from Andrew)

Merge misc fixes from Andrew Morton:
"15 fixes"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
mm, docs: update memory.stat description with workingset* entries
mm: vmscan: scan until it finds eligible pages
mm, thp: copying user pages must schedule on collapse
dax: fix PMD data corruption when fault races with write
dax: fix data corruption when fault races with write
ext4: return to starting transaction in ext4_dax_huge_fault()
mm: fix data corruption due to stale mmap reads
dax: prevent invalidation of mapped DAX entries
Tigran has moved
mm, vmalloc: fix vmalloc users tracking properly
mm/khugepaged: add missed tracepoint for collapse_huge_page_swapin
gcov: support GCC 7.1
mm, vmstat: Remove spurious WARN() during zoneinfo print
time: delete current_fs_time()
hwpoison, memcg: forcibly uncharge LRU pages

Linus Torvalds 9 years ago 1251704a 0fcc3ab2

+147 -128

23 changed files

expand all collapse all

Documentation

cgroup-v2.txt

filesystems

bfs.txt

MAINTAINERS

arch

x86

kernel

cpu

microcode

amd.c

core.c

intel.c

bfs

inode.c

dax.c

ext4

file.c

include

linux

dax.h

fs.h

vmalloc.h

kernel

gcov

base.c

gcc_4_7.c

time

time.c

khugepaged.c

memcontrol.c

memory-failure.c

truncate.c

util.c

vmalloc.c

vmscan.c

vmstat.c

+12

Documentation/cgroup-v2.txt

reviewed

··· 918 918 919 919 Number of major page faults incurred 920 920 921 921 + workingset_refault 922 922 + 923 923 + Number of refaults of previously evicted pages 924 924 + 925 925 + workingset_activate 926 926 + 927 927 + Number of refaulted pages that were immediately activated 928 928 + 929 929 + workingset_nodereclaim 930 930 + 931 931 + Number of times a shadow node has been reclaimed 932 932 + 921 933 memory.swap.current 922 934 923 935 A read-only single value file which exists on non-root

+1 -1

Documentation/filesystems/bfs.txt

reviewed

··· 54 54 If you have any patches, questions or suggestions regarding this BFS 55 55 implementation please contact the author: 56 56 57 57 - Tigran Aivazian <tigran@aivazian.fsnet.co.uk> 57 57 + Tigran Aivazian <aivazian.tigran@gmail.com>

+1 -1

MAINTAINERS

reviewed

··· 2483 2483 F: drivers/net/ethernet/ec_bhf.c 2484 2484 2485 2485 BFS FILE SYSTEM 2486 2486 - M: "Tigran A. Aivazian" <tigran@aivazian.fsnet.co.uk> 2486 2486 + M: "Tigran A. Aivazian" <aivazian.tigran@gmail.com> 2487 2487 S: Maintained 2488 2488 F: Documentation/filesystems/bfs.txt 2489 2489 F: fs/bfs/

+1 -1

arch/x86/kernel/cpu/microcode/amd.c

reviewed

··· 10 10 * Author: Peter Oruba <peter.oruba@amd.com> 11 11 * 12 12 * Based on work by: 13 13 - * Tigran Aivazian <tigran@aivazian.fsnet.co.uk> 13 13 + * Tigran Aivazian <aivazian.tigran@gmail.com> 14 14 * 15 15 * early loader: 16 16 * Copyright (C) 2013 Advanced Micro Devices, Inc.

+1 -1

arch/x86/kernel/cpu/microcode/core.c

reviewed

··· 1 1 /* 2 2 * CPU Microcode Update Driver for Linux 3 3 * 4 4 - * Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk> 4 4 + * Copyright (C) 2000-2006 Tigran Aivazian <aivazian.tigran@gmail.com> 5 5 * 2006 Shaohua Li <shaohua.li@intel.com> 6 6 * 2013-2016 Borislav Petkov <bp@alien8.de> 7 7 *

+1 -1

arch/x86/kernel/cpu/microcode/intel.c

reviewed

··· 1 1 /* 2 2 * Intel CPU Microcode Update Driver for Linux 3 3 * 4 4 - * Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk> 4 4 + * Copyright (C) 2000-2006 Tigran Aivazian <aivazian.tigran@gmail.com> 5 5 * 2006 Shaohua Li <shaohua.li@intel.com> 6 6 * 7 7 * Intel CPU microcode early update for Linux

+2 -2

fs/bfs/inode.c

reviewed

··· 1 1 /* 2 2 * fs/bfs/inode.c 3 3 * BFS superblock and inode operations. 4 4 - * Copyright (C) 1999-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk> 4 4 + * Copyright (C) 1999-2006 Tigran Aivazian <aivazian.tigran@gmail.com> 5 5 * From fs/minix, Copyright (C) 1991, 1992 Linus Torvalds. 6 6 * 7 7 * Made endianness-clean by Andrew Stribblehill <ads@wompom.org>, 2005. ··· 19 19 #include <linux/uaccess.h> 20 20 #include "bfs.h" 21 21 22 22 - MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); 22 22 + MODULE_AUTHOR("Tigran Aivazian <aivazian.tigran@gmail.com>"); 23 23 MODULE_DESCRIPTION("SCO UnixWare BFS filesystem for Linux"); 24 24 MODULE_LICENSE("GPL"); 25 25

+33 -62

fs/dax.c

reviewed

··· 461 461 } 462 462 463 463 /* 464 464 - * Invalidate exceptional DAX entry if easily possible. This handles DAX 465 465 - * entries for invalidate_inode_pages() so we evict the entry only if we can 466 466 - * do so without blocking. 467 467 - */ 468 468 - int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index) 469 469 - { 470 470 - int ret = 0; 471 471 - void *entry, **slot; 472 472 - struct radix_tree_root *page_tree = &mapping->page_tree; 473 473 - 474 474 - spin_lock_irq(&mapping->tree_lock); 475 475 - entry = __radix_tree_lookup(page_tree, index, NULL, &slot); 476 476 - if (!entry || !radix_tree_exceptional_entry(entry) || 477 477 - slot_locked(mapping, slot)) 478 478 - goto out; 479 479 - if (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) || 480 480 - radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE)) 481 481 - goto out; 482 482 - radix_tree_delete(page_tree, index); 483 483 - mapping->nrexceptional--; 484 484 - ret = 1; 485 485 - out: 486 486 - spin_unlock_irq(&mapping->tree_lock); 487 487 - if (ret) 488 488 - dax_wake_mapping_entry_waiter(mapping, index, entry, true); 489 489 - return ret; 490 490 - } 491 491 - 492 492 - /* 493 464 * Invalidate exceptional DAX entry if it is clean. 494 465 */ 495 466 int dax_invalidate_mapping_entry_sync(struct address_space *mapping, ··· 1015 1044 * into page tables. We have to tear down these mappings so that data 1016 1045 * written by write(2) is visible in mmap. 1017 1046 */ 1018 1018 - if ((iomap->flags & IOMAP_F_NEW) && inode->i_mapping->nrpages) { 1047 1047 + if (iomap->flags & IOMAP_F_NEW) { 1019 1048 invalidate_inode_pages2_range(inode->i_mapping, 1020 1049 pos >> PAGE_SHIFT, 1021 1050 (end - 1) >> PAGE_SHIFT); ··· 1148 1177 if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page) 1149 1178 flags |= IOMAP_WRITE; 1150 1179 1180 1180 + entry = grab_mapping_entry(mapping, vmf->pgoff, 0); 1181 1181 + if (IS_ERR(entry)) { 1182 1182 + vmf_ret = dax_fault_return(PTR_ERR(entry)); 1183 1183 + goto out; 1184 1184 + } 1185 1185 + 1151 1186 /* 1152 1187 * Note that we don't bother to use iomap_apply here: DAX required 1153 1188 * the file system block size to be equal the page size, which means ··· 1162 1185 error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap); 1163 1186 if (error) { 1164 1187 vmf_ret = dax_fault_return(error); 1165 1165 - goto out; 1188 1188 + goto unlock_entry; 1166 1189 } 1167 1190 if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) { 1168 1168 - vmf_ret = dax_fault_return(-EIO); /* fs corruption? */ 1169 1169 - goto finish_iomap; 1170 1170 - } 1171 1171 - 1172 1172 - entry = grab_mapping_entry(mapping, vmf->pgoff, 0); 1173 1173 - if (IS_ERR(entry)) { 1174 1174 - vmf_ret = dax_fault_return(PTR_ERR(entry)); 1175 1175 - goto finish_iomap; 1191 1191 + error = -EIO; /* fs corruption? */ 1192 1192 + goto error_finish_iomap; 1176 1193 } 1177 1194 1178 1195 sector = dax_iomap_sector(&iomap, pos); ··· 1188 1217 } 1189 1218 1190 1219 if (error) 1191 1191 - goto error_unlock_entry; 1220 1220 + goto error_finish_iomap; 1192 1221 1193 1222 __SetPageUptodate(vmf->cow_page); 1194 1223 vmf_ret = finish_fault(vmf); 1195 1224 if (!vmf_ret) 1196 1225 vmf_ret = VM_FAULT_DONE_COW; 1197 1197 - goto unlock_entry; 1226 1226 + goto finish_iomap; 1198 1227 } 1199 1228 1200 1229 switch (iomap.type) { ··· 1214 1243 case IOMAP_HOLE: 1215 1244 if (!(vmf->flags & FAULT_FLAG_WRITE)) { 1216 1245 vmf_ret = dax_load_hole(mapping, &entry, vmf); 1217 1217 - goto unlock_entry; 1246 1246 + goto finish_iomap; 1218 1247 } 1219 1248 /*FALLTHRU*/ 1220 1249 default: ··· 1223 1252 break; 1224 1253 } 1225 1254 1226 1226 - error_unlock_entry: 1255 1255 + error_finish_iomap: 1227 1256 vmf_ret = dax_fault_return(error) | major; 1228 1228 - unlock_entry: 1229 1229 - put_locked_mapping_entry(mapping, vmf->pgoff, entry); 1230 1257 finish_iomap: 1231 1258 if (ops->iomap_end) { 1232 1259 int copied = PAGE_SIZE; ··· 1239 1270 */ 1240 1271 ops->iomap_end(inode, pos, PAGE_SIZE, copied, flags, &iomap); 1241 1272 } 1242 1242 - out: 1273 1273 + unlock_entry: 1274 1274 + put_locked_mapping_entry(mapping, vmf->pgoff, entry); 1275 1275 + out: 1243 1276 trace_dax_pte_fault_done(inode, vmf, vmf_ret); 1244 1277 return vmf_ret; 1245 1278 } ··· 1388 1417 goto fallback; 1389 1418 1390 1419 /* 1391 1391 - * Note that we don't use iomap_apply here. We aren't doing I/O, only 1392 1392 - * setting up a mapping, so really we're using iomap_begin() as a way 1393 1393 - * to look up our filesystem block. 1394 1394 - */ 1395 1395 - pos = (loff_t)pgoff << PAGE_SHIFT; 1396 1396 - error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap); 1397 1397 - if (error) 1398 1398 - goto fallback; 1399 1399 - 1400 1400 - if (iomap.offset + iomap.length < pos + PMD_SIZE) 1401 1401 - goto finish_iomap; 1402 1402 - 1403 1403 - /* 1404 1420 * grab_mapping_entry() will make sure we get a 2M empty entry, a DAX 1405 1421 * PMD or a HZP entry. If it can't (because a 4k page is already in 1406 1422 * the tree, for instance), it will return -EEXIST and we just fall ··· 1395 1437 */ 1396 1438 entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD); 1397 1439 if (IS_ERR(entry)) 1440 1440 + goto fallback; 1441 1441 + 1442 1442 + /* 1443 1443 + * Note that we don't use iomap_apply here. We aren't doing I/O, only 1444 1444 + * setting up a mapping, so really we're using iomap_begin() as a way 1445 1445 + * to look up our filesystem block. 1446 1446 + */ 1447 1447 + pos = (loff_t)pgoff << PAGE_SHIFT; 1448 1448 + error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap); 1449 1449 + if (error) 1450 1450 + goto unlock_entry; 1451 1451 + 1452 1452 + if (iomap.offset + iomap.length < pos + PMD_SIZE) 1398 1453 goto finish_iomap; 1399 1454 1400 1455 switch (iomap.type) { ··· 1417 1446 case IOMAP_UNWRITTEN: 1418 1447 case IOMAP_HOLE: 1419 1448 if (WARN_ON_ONCE(write)) 1420 1420 - goto unlock_entry; 1449 1449 + break; 1421 1450 result = dax_pmd_load_hole(vmf, &iomap, &entry); 1422 1451 break; 1423 1452 default: ··· 1425 1454 break; 1426 1455 } 1427 1456 1428 1428 - unlock_entry: 1429 1429 - put_locked_mapping_entry(mapping, pgoff, entry); 1430 1457 finish_iomap: 1431 1458 if (ops->iomap_end) { 1432 1459 int copied = PMD_SIZE; ··· 1440 1471 ops->iomap_end(inode, pos, PMD_SIZE, copied, iomap_flags, 1441 1472 &iomap); 1442 1473 } 1474 1474 + unlock_entry: 1475 1475 + put_locked_mapping_entry(mapping, pgoff, entry); 1443 1476 fallback: 1444 1477 if (result == VM_FAULT_FALLBACK) { 1445 1478 split_huge_pmd(vma, vmf->pmd, vmf->address);

+17 -4

fs/ext4/file.c

reviewed

··· 257 257 enum page_entry_size pe_size) 258 258 { 259 259 int result; 260 260 + handle_t *handle = NULL; 260 261 struct inode *inode = file_inode(vmf->vma->vm_file); 261 262 struct super_block *sb = inode->i_sb; 262 263 bool write = vmf->flags & FAULT_FLAG_WRITE; ··· 265 264 if (write) { 266 265 sb_start_pagefault(sb); 267 266 file_update_time(vmf->vma->vm_file); 267 267 + down_read(&EXT4_I(inode)->i_mmap_sem); 268 268 + handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, 269 269 + EXT4_DATA_TRANS_BLOCKS(sb)); 270 270 + } else { 271 271 + down_read(&EXT4_I(inode)->i_mmap_sem); 268 272 } 269 269 - down_read(&EXT4_I(inode)->i_mmap_sem); 270 270 - result = dax_iomap_fault(vmf, pe_size, &ext4_iomap_ops); 271 271 - up_read(&EXT4_I(inode)->i_mmap_sem); 272 272 - if (write) 273 273 + if (!IS_ERR(handle)) 274 274 + result = dax_iomap_fault(vmf, pe_size, &ext4_iomap_ops); 275 275 + else 276 276 + result = VM_FAULT_SIGBUS; 277 277 + if (write) { 278 278 + if (!IS_ERR(handle)) 279 279 + ext4_journal_stop(handle); 280 280 + up_read(&EXT4_I(inode)->i_mmap_sem); 273 281 sb_end_pagefault(sb); 282 282 + } else { 283 283 + up_read(&EXT4_I(inode)->i_mmap_sem); 284 284 + } 274 285 275 286 return result; 276 287 }

-1

include/linux/dax.h

reviewed

··· 89 89 int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, 90 90 const struct iomap_ops *ops); 91 91 int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); 92 92 - int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index); 93 92 int dax_invalidate_mapping_entry_sync(struct address_space *mapping, 94 93 pgoff_t index); 95 94 void dax_wake_mapping_entry_waiter(struct address_space *mapping,

-1

include/linux/fs.h

reviewed

··· 1431 1431 inode->i_gid = make_kgid(inode->i_sb->s_user_ns, gid); 1432 1432 } 1433 1433 1434 1434 - extern struct timespec current_fs_time(struct super_block *sb); 1435 1434 extern struct timespec current_time(struct inode *inode); 1436 1435 1437 1436 /*

+6 -15

include/linux/vmalloc.h

reviewed

··· 6 6 #include <linux/list.h> 7 7 #include <linux/llist.h> 8 8 #include <asm/page.h> /* pgprot_t */ 9 9 - #include <asm/pgtable.h> /* PAGE_KERNEL */ 10 9 #include <linux/rbtree.h> 11 10 12 11 struct vm_area_struct; /* vma defining user mapping in mm_types.h */ ··· 82 83 const void *caller); 83 84 #ifndef CONFIG_MMU 84 85 extern void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags); 85 85 - #else 86 86 - extern void *__vmalloc_node(unsigned long size, unsigned long align, 87 87 - gfp_t gfp_mask, pgprot_t prot, 88 88 - int node, const void *caller); 89 89 - 90 90 - /* 91 91 - * We really want to have this inlined due to caller tracking. This 92 92 - * function is used by the highlevel vmalloc apis and so we want to track 93 93 - * their callers and inlining will achieve that. 94 94 - */ 95 95 - static inline void *__vmalloc_node_flags(unsigned long size, 96 96 - int node, gfp_t flags) 86 86 + static inline void *__vmalloc_node_flags_caller(unsigned long size, int node, 87 87 + gfp_t flags, void *caller) 97 88 { 98 98 - return __vmalloc_node(size, 1, flags, PAGE_KERNEL, 99 99 - node, __builtin_return_address(0)); 89 89 + return __vmalloc_node_flags(size, node, flags); 100 90 } 91 91 + #else 92 92 + extern void *__vmalloc_node_flags_caller(unsigned long size, 93 93 + int node, gfp_t flags, void *caller); 101 94 #endif 102 95 103 96 extern void vfree(const void *addr);

kernel/gcov/base.c

reviewed

··· 98 98 } 99 99 EXPORT_SYMBOL(__gcov_merge_icall_topn); 100 100 101 101 + void __gcov_exit(void) 102 102 + { 103 103 + /* Unused. */ 104 104 + } 105 105 + EXPORT_SYMBOL(__gcov_exit); 106 106 + 101 107 /** 102 108 * gcov_enable_events - enable event reporting through gcov_event() 103 109 *

+3 -1

kernel/gcov/gcc_4_7.c

reviewed

··· 18 18 #include <linux/vmalloc.h> 19 19 #include "gcov.h" 20 20 21 21 - #if (__GNUC__ > 5) || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1) 21 21 + #if (__GNUC__ >= 7) 22 22 + #define GCOV_COUNTERS 9 23 23 + #elif (__GNUC__ > 5) || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1) 22 24 #define GCOV_COUNTERS 10 23 25 #elif __GNUC__ == 4 && __GNUC_MINOR__ >= 9 24 26 #define GCOV_COUNTERS 9

-14

kernel/time/time.c

reviewed

··· 230 230 return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret; 231 231 } 232 232 233 233 - /** 234 234 - * current_fs_time - Return FS time 235 235 - * @sb: Superblock. 236 236 - * 237 237 - * Return the current time truncated to the time granularity supported by 238 238 - * the fs. 239 239 - */ 240 240 - struct timespec current_fs_time(struct super_block *sb) 241 241 - { 242 242 - struct timespec now = current_kernel_time(); 243 243 - return timespec_trunc(now, sb->s_time_gran); 244 244 - } 245 245 - EXPORT_SYMBOL(current_fs_time); 246 246 - 247 233 /* 248 234 * Convert jiffies to milliseconds and back. 249 235 *

+6 -5

mm/khugepaged.c

reviewed

··· 612 612 spinlock_t *ptl) 613 613 { 614 614 pte_t *_pte; 615 615 - for (_pte = pte; _pte < pte+HPAGE_PMD_NR; _pte++) { 615 615 + for (_pte = pte; _pte < pte + HPAGE_PMD_NR; 616 616 + _pte++, page++, address += PAGE_SIZE) { 616 617 pte_t pteval = *_pte; 617 618 struct page *src_page; 618 619 ··· 652 651 spin_unlock(ptl); 653 652 free_page_and_swap_cache(src_page); 654 653 } 655 655 - 656 656 - address += PAGE_SIZE; 657 657 - page++; 654 654 + cond_resched(); 658 655 } 659 656 } 660 657 ··· 906 907 return false; 907 908 } 908 909 /* check if the pmd is still valid */ 909 909 - if (mm_find_pmd(mm, address) != pmd) 910 910 + if (mm_find_pmd(mm, address) != pmd) { 911 911 + trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0); 910 912 return false; 913 913 + } 911 914 } 912 915 if (ret & VM_FAULT_ERROR) { 913 916 trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);

+1 -1

mm/memcontrol.c

reviewed

··· 5528 5528 next = page->lru.next; 5529 5529 5530 5530 VM_BUG_ON_PAGE(PageLRU(page), page); 5531 5531 - VM_BUG_ON_PAGE(page_count(page), page); 5531 5531 + VM_BUG_ON_PAGE(!PageHWPoison(page) && page_count(page), page); 5532 5532 5533 5533 if (!page->mem_cgroup) 5534 5534 continue;

mm/memory-failure.c

reviewed

··· 539 539 */ 540 540 ClearPageActive(p); 541 541 ClearPageUnevictable(p); 542 542 + 543 543 + /* 544 544 + * Poisoned page might never drop its ref count to 0 so we have 545 545 + * to uncharge it manually from its memcg. 546 546 + */ 547 547 + mem_cgroup_uncharge(p); 548 548 + 542 549 /* 543 550 * drop the page count elevated by isolate_lru_page() 544 551 */

+14 -7

mm/truncate.c

reviewed

··· 67 67 68 68 /* 69 69 * Invalidate exceptional entry if easily possible. This handles exceptional 70 70 - * entries for invalidate_inode_pages() so for DAX it evicts only unlocked and 71 71 - * clean entries. 70 70 + * entries for invalidate_inode_pages(). 72 71 */ 73 72 static int invalidate_exceptional_entry(struct address_space *mapping, 74 73 pgoff_t index, void *entry) 75 74 { 76 76 - /* Handled by shmem itself */ 77 77 - if (shmem_mapping(mapping)) 75 75 + /* Handled by shmem itself, or for DAX we do nothing. */ 76 76 + if (shmem_mapping(mapping) || dax_mapping(mapping)) 78 77 return 1; 79 79 - if (dax_mapping(mapping)) 80 80 - return dax_invalidate_mapping_entry(mapping, index); 81 78 clear_shadow_entry(mapping, index, entry); 82 79 return 1; 83 80 } ··· 686 689 cond_resched(); 687 690 index++; 688 691 } 689 689 - 692 692 + /* 693 693 + * For DAX we invalidate page tables after invalidating radix tree. We 694 694 + * could invalidate page tables while invalidating each entry however 695 695 + * that would be expensive. And doing range unmapping before doesn't 696 696 + * work as we have no cheap way to find whether radix tree entry didn't 697 697 + * get remapped later. 698 698 + */ 699 699 + if (dax_mapping(mapping)) { 700 700 + unmap_mapping_range(mapping, (loff_t)start << PAGE_SHIFT, 701 701 + (loff_t)(end - start + 1) << PAGE_SHIFT, 0); 702 702 + } 690 703 out: 691 704 cleancache_invalidate_inode(mapping); 692 705 return ret;

+2 -1

mm/util.c

reviewed

··· 382 382 if (ret || size <= PAGE_SIZE) 383 383 return ret; 384 384 385 385 - return __vmalloc_node_flags(size, node, flags); 385 385 + return __vmalloc_node_flags_caller(size, node, flags, 386 386 + __builtin_return_address(0)); 386 387 } 387 388 EXPORT_SYMBOL(kvmalloc_node); 388 389

+18 -1

mm/vmalloc.c

reviewed

··· 1649 1649 } 1650 1650 EXPORT_SYMBOL(vmap); 1651 1651 1652 1652 + static void *__vmalloc_node(unsigned long size, unsigned long align, 1653 1653 + gfp_t gfp_mask, pgprot_t prot, 1654 1654 + int node, const void *caller); 1652 1655 static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, 1653 1656 pgprot_t prot, int node) 1654 1657 { ··· 1794 1791 * with mm people. 1795 1792 * 1796 1793 */ 1797 1797 - void *__vmalloc_node(unsigned long size, unsigned long align, 1794 1794 + static void *__vmalloc_node(unsigned long size, unsigned long align, 1798 1795 gfp_t gfp_mask, pgprot_t prot, 1799 1796 int node, const void *caller) 1800 1797 { ··· 1808 1805 __builtin_return_address(0)); 1809 1806 } 1810 1807 EXPORT_SYMBOL(__vmalloc); 1808 1808 + 1809 1809 + static inline void *__vmalloc_node_flags(unsigned long size, 1810 1810 + int node, gfp_t flags) 1811 1811 + { 1812 1812 + return __vmalloc_node(size, 1, flags, PAGE_KERNEL, 1813 1813 + node, __builtin_return_address(0)); 1814 1814 + } 1815 1815 + 1816 1816 + 1817 1817 + void *__vmalloc_node_flags_caller(unsigned long size, int node, gfp_t flags, 1818 1818 + void *caller) 1819 1819 + { 1820 1820 + return __vmalloc_node(size, 1, flags, PAGE_KERNEL, node, caller); 1821 1821 + } 1811 1822 1812 1823 /** 1813 1824 * vmalloc - allocate virtually contiguous memory

+15 -6

mm/vmscan.c

reviewed

··· 1449 1449 * 1450 1450 * Appropriate locks must be held before calling this function. 1451 1451 * 1452 1452 - * @nr_to_scan: The number of pages to look through on the list. 1452 1452 + * @nr_to_scan: The number of eligible pages to look through on the list. 1453 1453 * @lruvec: The LRU vector to pull pages from. 1454 1454 * @dst: The temp list to put pages on to. 1455 1455 * @nr_scanned: The number of pages that were scanned. ··· 1469 1469 unsigned long nr_zone_taken[MAX_NR_ZONES] = { 0 }; 1470 1470 unsigned long nr_skipped[MAX_NR_ZONES] = { 0, }; 1471 1471 unsigned long skipped = 0; 1472 1472 - unsigned long scan, nr_pages; 1472 1472 + unsigned long scan, total_scan, nr_pages; 1473 1473 LIST_HEAD(pages_skipped); 1474 1474 1475 1475 - for (scan = 0; scan < nr_to_scan && nr_taken < nr_to_scan && 1476 1476 - !list_empty(src); scan++) { 1475 1475 + scan = 0; 1476 1476 + for (total_scan = 0; 1477 1477 + scan < nr_to_scan && nr_taken < nr_to_scan && !list_empty(src); 1478 1478 + total_scan++) { 1477 1479 struct page *page; 1478 1480 1479 1481 page = lru_to_page(src); ··· 1489 1487 continue; 1490 1488 } 1491 1489 1490 1490 + /* 1491 1491 + * Do not count skipped pages because that makes the function 1492 1492 + * return with no isolated pages if the LRU mostly contains 1493 1493 + * ineligible pages. This causes the VM to not reclaim any 1494 1494 + * pages, triggering a premature OOM. 1495 1495 + */ 1496 1496 + scan++; 1492 1497 switch (__isolate_lru_page(page, mode)) { 1493 1498 case 0: 1494 1499 nr_pages = hpage_nr_pages(page); ··· 1533 1524 skipped += nr_skipped[zid]; 1534 1525 } 1535 1526 } 1536 1536 - *nr_scanned = scan; 1527 1527 + *nr_scanned = total_scan; 1537 1528 trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan, 1538 1538 - scan, skipped, nr_taken, mode, lru); 1529 1529 + total_scan, skipped, nr_taken, mode, lru); 1539 1530 update_lru_sizes(lruvec, lru, nr_zone_taken); 1540 1531 return nr_taken; 1541 1532 }

-2

mm/vmstat.c

reviewed

··· 1359 1359 return zone == compare; 1360 1360 } 1361 1361 1362 1362 - /* The zone must be somewhere! */ 1363 1363 - WARN_ON_ONCE(1); 1364 1362 return false; 1365 1363 } 1366 1364