Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'akpm' (patches from Andrew)

Merge misc fixes from Andrew Morton:
"15 fixes"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
mm, docs: update memory.stat description with workingset* entries
mm: vmscan: scan until it finds eligible pages
mm, thp: copying user pages must schedule on collapse
dax: fix PMD data corruption when fault races with write
dax: fix data corruption when fault races with write
ext4: return to starting transaction in ext4_dax_huge_fault()
mm: fix data corruption due to stale mmap reads
dax: prevent invalidation of mapped DAX entries
Tigran has moved
mm, vmalloc: fix vmalloc users tracking properly
mm/khugepaged: add missed tracepoint for collapse_huge_page_swapin
gcov: support GCC 7.1
mm, vmstat: Remove spurious WARN() during zoneinfo print
time: delete current_fs_time()
hwpoison, memcg: forcibly uncharge LRU pages

+147 -128
+12
Documentation/cgroup-v2.txt
··· 918 918 919 919 Number of major page faults incurred 920 920 921 + workingset_refault 922 + 923 + Number of refaults of previously evicted pages 924 + 925 + workingset_activate 926 + 927 + Number of refaulted pages that were immediately activated 928 + 929 + workingset_nodereclaim 930 + 931 + Number of times a shadow node has been reclaimed 932 + 921 933 memory.swap.current 922 934 923 935 A read-only single value file which exists on non-root
+1 -1
Documentation/filesystems/bfs.txt
··· 54 54 If you have any patches, questions or suggestions regarding this BFS 55 55 implementation please contact the author: 56 56 57 - Tigran Aivazian <tigran@aivazian.fsnet.co.uk> 57 + Tigran Aivazian <aivazian.tigran@gmail.com>
+1 -1
MAINTAINERS
··· 2483 2483 F: drivers/net/ethernet/ec_bhf.c 2484 2484 2485 2485 BFS FILE SYSTEM 2486 - M: "Tigran A. Aivazian" <tigran@aivazian.fsnet.co.uk> 2486 + M: "Tigran A. Aivazian" <aivazian.tigran@gmail.com> 2487 2487 S: Maintained 2488 2488 F: Documentation/filesystems/bfs.txt 2489 2489 F: fs/bfs/
+1 -1
arch/x86/kernel/cpu/microcode/amd.c
··· 10 10 * Author: Peter Oruba <peter.oruba@amd.com> 11 11 * 12 12 * Based on work by: 13 - * Tigran Aivazian <tigran@aivazian.fsnet.co.uk> 13 + * Tigran Aivazian <aivazian.tigran@gmail.com> 14 14 * 15 15 * early loader: 16 16 * Copyright (C) 2013 Advanced Micro Devices, Inc.
+1 -1
arch/x86/kernel/cpu/microcode/core.c
··· 1 1 /* 2 2 * CPU Microcode Update Driver for Linux 3 3 * 4 - * Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk> 4 + * Copyright (C) 2000-2006 Tigran Aivazian <aivazian.tigran@gmail.com> 5 5 * 2006 Shaohua Li <shaohua.li@intel.com> 6 6 * 2013-2016 Borislav Petkov <bp@alien8.de> 7 7 *
+1 -1
arch/x86/kernel/cpu/microcode/intel.c
··· 1 1 /* 2 2 * Intel CPU Microcode Update Driver for Linux 3 3 * 4 - * Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk> 4 + * Copyright (C) 2000-2006 Tigran Aivazian <aivazian.tigran@gmail.com> 5 5 * 2006 Shaohua Li <shaohua.li@intel.com> 6 6 * 7 7 * Intel CPU microcode early update for Linux
+2 -2
fs/bfs/inode.c
··· 1 1 /* 2 2 * fs/bfs/inode.c 3 3 * BFS superblock and inode operations. 4 - * Copyright (C) 1999-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk> 4 + * Copyright (C) 1999-2006 Tigran Aivazian <aivazian.tigran@gmail.com> 5 5 * From fs/minix, Copyright (C) 1991, 1992 Linus Torvalds. 6 6 * 7 7 * Made endianness-clean by Andrew Stribblehill <ads@wompom.org>, 2005. ··· 19 19 #include <linux/uaccess.h> 20 20 #include "bfs.h" 21 21 22 - MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); 22 + MODULE_AUTHOR("Tigran Aivazian <aivazian.tigran@gmail.com>"); 23 23 MODULE_DESCRIPTION("SCO UnixWare BFS filesystem for Linux"); 24 24 MODULE_LICENSE("GPL"); 25 25
+33 -62
fs/dax.c
··· 461 461 } 462 462 463 463 /* 464 - * Invalidate exceptional DAX entry if easily possible. This handles DAX 465 - * entries for invalidate_inode_pages() so we evict the entry only if we can 466 - * do so without blocking. 467 - */ 468 - int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index) 469 - { 470 - int ret = 0; 471 - void *entry, **slot; 472 - struct radix_tree_root *page_tree = &mapping->page_tree; 473 - 474 - spin_lock_irq(&mapping->tree_lock); 475 - entry = __radix_tree_lookup(page_tree, index, NULL, &slot); 476 - if (!entry || !radix_tree_exceptional_entry(entry) || 477 - slot_locked(mapping, slot)) 478 - goto out; 479 - if (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) || 480 - radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE)) 481 - goto out; 482 - radix_tree_delete(page_tree, index); 483 - mapping->nrexceptional--; 484 - ret = 1; 485 - out: 486 - spin_unlock_irq(&mapping->tree_lock); 487 - if (ret) 488 - dax_wake_mapping_entry_waiter(mapping, index, entry, true); 489 - return ret; 490 - } 491 - 492 - /* 493 464 * Invalidate exceptional DAX entry if it is clean. 494 465 */ 495 466 int dax_invalidate_mapping_entry_sync(struct address_space *mapping, ··· 1015 1044 * into page tables. We have to tear down these mappings so that data 1016 1045 * written by write(2) is visible in mmap. 1017 1046 */ 1018 - if ((iomap->flags & IOMAP_F_NEW) && inode->i_mapping->nrpages) { 1047 + if (iomap->flags & IOMAP_F_NEW) { 1019 1048 invalidate_inode_pages2_range(inode->i_mapping, 1020 1049 pos >> PAGE_SHIFT, 1021 1050 (end - 1) >> PAGE_SHIFT); ··· 1148 1177 if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page) 1149 1178 flags |= IOMAP_WRITE; 1150 1179 1180 + entry = grab_mapping_entry(mapping, vmf->pgoff, 0); 1181 + if (IS_ERR(entry)) { 1182 + vmf_ret = dax_fault_return(PTR_ERR(entry)); 1183 + goto out; 1184 + } 1185 + 1151 1186 /* 1152 1187 * Note that we don't bother to use iomap_apply here: DAX required 1153 1188 * the file system block size to be equal the page size, which means ··· 1162 1185 error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap); 1163 1186 if (error) { 1164 1187 vmf_ret = dax_fault_return(error); 1165 - goto out; 1188 + goto unlock_entry; 1166 1189 } 1167 1190 if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) { 1168 - vmf_ret = dax_fault_return(-EIO); /* fs corruption? */ 1169 - goto finish_iomap; 1170 - } 1171 - 1172 - entry = grab_mapping_entry(mapping, vmf->pgoff, 0); 1173 - if (IS_ERR(entry)) { 1174 - vmf_ret = dax_fault_return(PTR_ERR(entry)); 1175 - goto finish_iomap; 1191 + error = -EIO; /* fs corruption? */ 1192 + goto error_finish_iomap; 1176 1193 } 1177 1194 1178 1195 sector = dax_iomap_sector(&iomap, pos); ··· 1188 1217 } 1189 1218 1190 1219 if (error) 1191 - goto error_unlock_entry; 1220 + goto error_finish_iomap; 1192 1221 1193 1222 __SetPageUptodate(vmf->cow_page); 1194 1223 vmf_ret = finish_fault(vmf); 1195 1224 if (!vmf_ret) 1196 1225 vmf_ret = VM_FAULT_DONE_COW; 1197 - goto unlock_entry; 1226 + goto finish_iomap; 1198 1227 } 1199 1228 1200 1229 switch (iomap.type) { ··· 1214 1243 case IOMAP_HOLE: 1215 1244 if (!(vmf->flags & FAULT_FLAG_WRITE)) { 1216 1245 vmf_ret = dax_load_hole(mapping, &entry, vmf); 1217 - goto unlock_entry; 1246 + goto finish_iomap; 1218 1247 } 1219 1248 /*FALLTHRU*/ 1220 1249 default: ··· 1223 1252 break; 1224 1253 } 1225 1254 1226 - error_unlock_entry: 1255 + error_finish_iomap: 1227 1256 vmf_ret = dax_fault_return(error) | major; 1228 - unlock_entry: 1229 - put_locked_mapping_entry(mapping, vmf->pgoff, entry); 1230 1257 finish_iomap: 1231 1258 if (ops->iomap_end) { 1232 1259 int copied = PAGE_SIZE; ··· 1239 1270 */ 1240 1271 ops->iomap_end(inode, pos, PAGE_SIZE, copied, flags, &iomap); 1241 1272 } 1242 - out: 1273 + unlock_entry: 1274 + put_locked_mapping_entry(mapping, vmf->pgoff, entry); 1275 + out: 1243 1276 trace_dax_pte_fault_done(inode, vmf, vmf_ret); 1244 1277 return vmf_ret; 1245 1278 } ··· 1388 1417 goto fallback; 1389 1418 1390 1419 /* 1391 - * Note that we don't use iomap_apply here. We aren't doing I/O, only 1392 - * setting up a mapping, so really we're using iomap_begin() as a way 1393 - * to look up our filesystem block. 1394 - */ 1395 - pos = (loff_t)pgoff << PAGE_SHIFT; 1396 - error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap); 1397 - if (error) 1398 - goto fallback; 1399 - 1400 - if (iomap.offset + iomap.length < pos + PMD_SIZE) 1401 - goto finish_iomap; 1402 - 1403 - /* 1404 1420 * grab_mapping_entry() will make sure we get a 2M empty entry, a DAX 1405 1421 * PMD or a HZP entry. If it can't (because a 4k page is already in 1406 1422 * the tree, for instance), it will return -EEXIST and we just fall ··· 1395 1437 */ 1396 1438 entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD); 1397 1439 if (IS_ERR(entry)) 1440 + goto fallback; 1441 + 1442 + /* 1443 + * Note that we don't use iomap_apply here. We aren't doing I/O, only 1444 + * setting up a mapping, so really we're using iomap_begin() as a way 1445 + * to look up our filesystem block. 1446 + */ 1447 + pos = (loff_t)pgoff << PAGE_SHIFT; 1448 + error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap); 1449 + if (error) 1450 + goto unlock_entry; 1451 + 1452 + if (iomap.offset + iomap.length < pos + PMD_SIZE) 1398 1453 goto finish_iomap; 1399 1454 1400 1455 switch (iomap.type) { ··· 1417 1446 case IOMAP_UNWRITTEN: 1418 1447 case IOMAP_HOLE: 1419 1448 if (WARN_ON_ONCE(write)) 1420 - goto unlock_entry; 1449 + break; 1421 1450 result = dax_pmd_load_hole(vmf, &iomap, &entry); 1422 1451 break; 1423 1452 default: ··· 1425 1454 break; 1426 1455 } 1427 1456 1428 - unlock_entry: 1429 - put_locked_mapping_entry(mapping, pgoff, entry); 1430 1457 finish_iomap: 1431 1458 if (ops->iomap_end) { 1432 1459 int copied = PMD_SIZE; ··· 1440 1471 ops->iomap_end(inode, pos, PMD_SIZE, copied, iomap_flags, 1441 1472 &iomap); 1442 1473 } 1474 + unlock_entry: 1475 + put_locked_mapping_entry(mapping, pgoff, entry); 1443 1476 fallback: 1444 1477 if (result == VM_FAULT_FALLBACK) { 1445 1478 split_huge_pmd(vma, vmf->pmd, vmf->address);
+17 -4
fs/ext4/file.c
··· 257 257 enum page_entry_size pe_size) 258 258 { 259 259 int result; 260 + handle_t *handle = NULL; 260 261 struct inode *inode = file_inode(vmf->vma->vm_file); 261 262 struct super_block *sb = inode->i_sb; 262 263 bool write = vmf->flags & FAULT_FLAG_WRITE; ··· 265 264 if (write) { 266 265 sb_start_pagefault(sb); 267 266 file_update_time(vmf->vma->vm_file); 267 + down_read(&EXT4_I(inode)->i_mmap_sem); 268 + handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, 269 + EXT4_DATA_TRANS_BLOCKS(sb)); 270 + } else { 271 + down_read(&EXT4_I(inode)->i_mmap_sem); 268 272 } 269 - down_read(&EXT4_I(inode)->i_mmap_sem); 270 - result = dax_iomap_fault(vmf, pe_size, &ext4_iomap_ops); 271 - up_read(&EXT4_I(inode)->i_mmap_sem); 272 - if (write) 273 + if (!IS_ERR(handle)) 274 + result = dax_iomap_fault(vmf, pe_size, &ext4_iomap_ops); 275 + else 276 + result = VM_FAULT_SIGBUS; 277 + if (write) { 278 + if (!IS_ERR(handle)) 279 + ext4_journal_stop(handle); 280 + up_read(&EXT4_I(inode)->i_mmap_sem); 273 281 sb_end_pagefault(sb); 282 + } else { 283 + up_read(&EXT4_I(inode)->i_mmap_sem); 284 + } 274 285 275 286 return result; 276 287 }
-1
include/linux/dax.h
··· 89 89 int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, 90 90 const struct iomap_ops *ops); 91 91 int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); 92 - int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index); 93 92 int dax_invalidate_mapping_entry_sync(struct address_space *mapping, 94 93 pgoff_t index); 95 94 void dax_wake_mapping_entry_waiter(struct address_space *mapping,
-1
include/linux/fs.h
··· 1431 1431 inode->i_gid = make_kgid(inode->i_sb->s_user_ns, gid); 1432 1432 } 1433 1433 1434 - extern struct timespec current_fs_time(struct super_block *sb); 1435 1434 extern struct timespec current_time(struct inode *inode); 1436 1435 1437 1436 /*
+6 -15
include/linux/vmalloc.h
··· 6 6 #include <linux/list.h> 7 7 #include <linux/llist.h> 8 8 #include <asm/page.h> /* pgprot_t */ 9 - #include <asm/pgtable.h> /* PAGE_KERNEL */ 10 9 #include <linux/rbtree.h> 11 10 12 11 struct vm_area_struct; /* vma defining user mapping in mm_types.h */ ··· 82 83 const void *caller); 83 84 #ifndef CONFIG_MMU 84 85 extern void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags); 85 - #else 86 - extern void *__vmalloc_node(unsigned long size, unsigned long align, 87 - gfp_t gfp_mask, pgprot_t prot, 88 - int node, const void *caller); 89 - 90 - /* 91 - * We really want to have this inlined due to caller tracking. This 92 - * function is used by the highlevel vmalloc apis and so we want to track 93 - * their callers and inlining will achieve that. 94 - */ 95 - static inline void *__vmalloc_node_flags(unsigned long size, 96 - int node, gfp_t flags) 86 + static inline void *__vmalloc_node_flags_caller(unsigned long size, int node, 87 + gfp_t flags, void *caller) 97 88 { 98 - return __vmalloc_node(size, 1, flags, PAGE_KERNEL, 99 - node, __builtin_return_address(0)); 89 + return __vmalloc_node_flags(size, node, flags); 100 90 } 91 + #else 92 + extern void *__vmalloc_node_flags_caller(unsigned long size, 93 + int node, gfp_t flags, void *caller); 101 94 #endif 102 95 103 96 extern void vfree(const void *addr);
+6
kernel/gcov/base.c
··· 98 98 } 99 99 EXPORT_SYMBOL(__gcov_merge_icall_topn); 100 100 101 + void __gcov_exit(void) 102 + { 103 + /* Unused. */ 104 + } 105 + EXPORT_SYMBOL(__gcov_exit); 106 + 101 107 /** 102 108 * gcov_enable_events - enable event reporting through gcov_event() 103 109 *
+3 -1
kernel/gcov/gcc_4_7.c
··· 18 18 #include <linux/vmalloc.h> 19 19 #include "gcov.h" 20 20 21 - #if (__GNUC__ > 5) || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1) 21 + #if (__GNUC__ >= 7) 22 + #define GCOV_COUNTERS 9 23 + #elif (__GNUC__ > 5) || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1) 22 24 #define GCOV_COUNTERS 10 23 25 #elif __GNUC__ == 4 && __GNUC_MINOR__ >= 9 24 26 #define GCOV_COUNTERS 9
-14
kernel/time/time.c
··· 230 230 return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret; 231 231 } 232 232 233 - /** 234 - * current_fs_time - Return FS time 235 - * @sb: Superblock. 236 - * 237 - * Return the current time truncated to the time granularity supported by 238 - * the fs. 239 - */ 240 - struct timespec current_fs_time(struct super_block *sb) 241 - { 242 - struct timespec now = current_kernel_time(); 243 - return timespec_trunc(now, sb->s_time_gran); 244 - } 245 - EXPORT_SYMBOL(current_fs_time); 246 - 247 233 /* 248 234 * Convert jiffies to milliseconds and back. 249 235 *
+6 -5
mm/khugepaged.c
··· 612 612 spinlock_t *ptl) 613 613 { 614 614 pte_t *_pte; 615 - for (_pte = pte; _pte < pte+HPAGE_PMD_NR; _pte++) { 615 + for (_pte = pte; _pte < pte + HPAGE_PMD_NR; 616 + _pte++, page++, address += PAGE_SIZE) { 616 617 pte_t pteval = *_pte; 617 618 struct page *src_page; 618 619 ··· 652 651 spin_unlock(ptl); 653 652 free_page_and_swap_cache(src_page); 654 653 } 655 - 656 - address += PAGE_SIZE; 657 - page++; 654 + cond_resched(); 658 655 } 659 656 } 660 657 ··· 906 907 return false; 907 908 } 908 909 /* check if the pmd is still valid */ 909 - if (mm_find_pmd(mm, address) != pmd) 910 + if (mm_find_pmd(mm, address) != pmd) { 911 + trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0); 910 912 return false; 913 + } 911 914 } 912 915 if (ret & VM_FAULT_ERROR) { 913 916 trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
+1 -1
mm/memcontrol.c
··· 5528 5528 next = page->lru.next; 5529 5529 5530 5530 VM_BUG_ON_PAGE(PageLRU(page), page); 5531 - VM_BUG_ON_PAGE(page_count(page), page); 5531 + VM_BUG_ON_PAGE(!PageHWPoison(page) && page_count(page), page); 5532 5532 5533 5533 if (!page->mem_cgroup) 5534 5534 continue;
+7
mm/memory-failure.c
··· 539 539 */ 540 540 ClearPageActive(p); 541 541 ClearPageUnevictable(p); 542 + 543 + /* 544 + * Poisoned page might never drop its ref count to 0 so we have 545 + * to uncharge it manually from its memcg. 546 + */ 547 + mem_cgroup_uncharge(p); 548 + 542 549 /* 543 550 * drop the page count elevated by isolate_lru_page() 544 551 */
+14 -7
mm/truncate.c
··· 67 67 68 68 /* 69 69 * Invalidate exceptional entry if easily possible. This handles exceptional 70 - * entries for invalidate_inode_pages() so for DAX it evicts only unlocked and 71 - * clean entries. 70 + * entries for invalidate_inode_pages(). 72 71 */ 73 72 static int invalidate_exceptional_entry(struct address_space *mapping, 74 73 pgoff_t index, void *entry) 75 74 { 76 - /* Handled by shmem itself */ 77 - if (shmem_mapping(mapping)) 75 + /* Handled by shmem itself, or for DAX we do nothing. */ 76 + if (shmem_mapping(mapping) || dax_mapping(mapping)) 78 77 return 1; 79 - if (dax_mapping(mapping)) 80 - return dax_invalidate_mapping_entry(mapping, index); 81 78 clear_shadow_entry(mapping, index, entry); 82 79 return 1; 83 80 } ··· 686 689 cond_resched(); 687 690 index++; 688 691 } 689 - 692 + /* 693 + * For DAX we invalidate page tables after invalidating radix tree. We 694 + * could invalidate page tables while invalidating each entry however 695 + * that would be expensive. And doing range unmapping before doesn't 696 + * work as we have no cheap way to find whether radix tree entry didn't 697 + * get remapped later. 698 + */ 699 + if (dax_mapping(mapping)) { 700 + unmap_mapping_range(mapping, (loff_t)start << PAGE_SHIFT, 701 + (loff_t)(end - start + 1) << PAGE_SHIFT, 0); 702 + } 690 703 out: 691 704 cleancache_invalidate_inode(mapping); 692 705 return ret;
+2 -1
mm/util.c
··· 382 382 if (ret || size <= PAGE_SIZE) 383 383 return ret; 384 384 385 - return __vmalloc_node_flags(size, node, flags); 385 + return __vmalloc_node_flags_caller(size, node, flags, 386 + __builtin_return_address(0)); 386 387 } 387 388 EXPORT_SYMBOL(kvmalloc_node); 388 389
+18 -1
mm/vmalloc.c
··· 1649 1649 } 1650 1650 EXPORT_SYMBOL(vmap); 1651 1651 1652 + static void *__vmalloc_node(unsigned long size, unsigned long align, 1653 + gfp_t gfp_mask, pgprot_t prot, 1654 + int node, const void *caller); 1652 1655 static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, 1653 1656 pgprot_t prot, int node) 1654 1657 { ··· 1794 1791 * with mm people. 1795 1792 * 1796 1793 */ 1797 - void *__vmalloc_node(unsigned long size, unsigned long align, 1794 + static void *__vmalloc_node(unsigned long size, unsigned long align, 1798 1795 gfp_t gfp_mask, pgprot_t prot, 1799 1796 int node, const void *caller) 1800 1797 { ··· 1808 1805 __builtin_return_address(0)); 1809 1806 } 1810 1807 EXPORT_SYMBOL(__vmalloc); 1808 + 1809 + static inline void *__vmalloc_node_flags(unsigned long size, 1810 + int node, gfp_t flags) 1811 + { 1812 + return __vmalloc_node(size, 1, flags, PAGE_KERNEL, 1813 + node, __builtin_return_address(0)); 1814 + } 1815 + 1816 + 1817 + void *__vmalloc_node_flags_caller(unsigned long size, int node, gfp_t flags, 1818 + void *caller) 1819 + { 1820 + return __vmalloc_node(size, 1, flags, PAGE_KERNEL, node, caller); 1821 + } 1811 1822 1812 1823 /** 1813 1824 * vmalloc - allocate virtually contiguous memory
+15 -6
mm/vmscan.c
··· 1449 1449 * 1450 1450 * Appropriate locks must be held before calling this function. 1451 1451 * 1452 - * @nr_to_scan: The number of pages to look through on the list. 1452 + * @nr_to_scan: The number of eligible pages to look through on the list. 1453 1453 * @lruvec: The LRU vector to pull pages from. 1454 1454 * @dst: The temp list to put pages on to. 1455 1455 * @nr_scanned: The number of pages that were scanned. ··· 1469 1469 unsigned long nr_zone_taken[MAX_NR_ZONES] = { 0 }; 1470 1470 unsigned long nr_skipped[MAX_NR_ZONES] = { 0, }; 1471 1471 unsigned long skipped = 0; 1472 - unsigned long scan, nr_pages; 1472 + unsigned long scan, total_scan, nr_pages; 1473 1473 LIST_HEAD(pages_skipped); 1474 1474 1475 - for (scan = 0; scan < nr_to_scan && nr_taken < nr_to_scan && 1476 - !list_empty(src); scan++) { 1475 + scan = 0; 1476 + for (total_scan = 0; 1477 + scan < nr_to_scan && nr_taken < nr_to_scan && !list_empty(src); 1478 + total_scan++) { 1477 1479 struct page *page; 1478 1480 1479 1481 page = lru_to_page(src); ··· 1489 1487 continue; 1490 1488 } 1491 1489 1490 + /* 1491 + * Do not count skipped pages because that makes the function 1492 + * return with no isolated pages if the LRU mostly contains 1493 + * ineligible pages. This causes the VM to not reclaim any 1494 + * pages, triggering a premature OOM. 1495 + */ 1496 + scan++; 1492 1497 switch (__isolate_lru_page(page, mode)) { 1493 1498 case 0: 1494 1499 nr_pages = hpage_nr_pages(page); ··· 1533 1524 skipped += nr_skipped[zid]; 1534 1525 } 1535 1526 } 1536 - *nr_scanned = scan; 1527 + *nr_scanned = total_scan; 1537 1528 trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan, 1538 - scan, skipped, nr_taken, mode, lru); 1529 + total_scan, skipped, nr_taken, mode, lru); 1539 1530 update_lru_sizes(lruvec, lru, nr_zone_taken); 1540 1531 return nr_taken; 1541 1532 }
-2
mm/vmstat.c
··· 1359 1359 return zone == compare; 1360 1360 } 1361 1361 1362 - /* The zone must be somewhere! */ 1363 - WARN_ON_ONCE(1); 1364 1362 return false; 1365 1363 } 1366 1364