Merge branch 'akpm' (incoming from Andrew)

tjh.dev / kernel

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Merge branch 'akpm' (incoming from Andrew)

Merge patches from Andrew Morton:
"23 fixes and a MAINTAINERS update"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (24 commits)
mm/hugetlb: check for pte NULL pointer in __page_check_address()
fix build with make 3.80
mm/mempolicy: fix !vma in new_vma_page()
MAINTAINERS: add Davidlohr as GPT maintainer
mm/memory-failure.c: recheck PageHuge() after hugetlb page migrate successfully
mm/compaction: respect ignore_skip_hint in update_pageblock_skip
mm/mempolicy: correct putback method for isolate pages if failed
mm: add missing dependency in Kconfig
sh: always link in helper functions extracted from libgcc
mm: page_alloc: exclude unreclaimable allocations from zone fairness policy
mm: numa: defer TLB flush for THP migration as long as possible
mm: numa: guarantee that tlb_flush_pending updates are visible before page table updates
mm: fix TLB flush race between migration, and change_protection_range
mm: numa: avoid unnecessary disruption of NUMA hinting during migration
mm: numa: clear numa hinting information on mprotect
sched: numa: skip inaccessible VMAs
mm: numa: avoid unnecessary work on the failure path
mm: numa: ensure anon_vma is locked to prevent parallel THP splits
mm: numa: do not clear PTE for pte_numa update
mm: numa: do not clear PMD during PTE update scan
...

Linus Torvalds 12 years ago 86fbf161 a36c160c

+250 -58

24 changed files

expand all collapse all

MAINTAINERS

Makefile

arch

lib

Makefile

sparc

include

asm

pgtable_64.h

x86

include

asm

pgtable.h

gup.c

include

asm-generic

pgtable.h

linux

migrate.h

mm_types.h

reboot.h

kernel

fork.c

kexec.c

reboot.c

sched

fair.c

Kconfig

compaction.c

huge_memory.c

memory-failure.c

mempolicy.c

migrate.c

mprotect.c

page_alloc.c

pgtable-generic.c

rmap.c

MAINTAINERS

reviewed

··· 3833 3833 S: Maintained 3834 3834 F: drivers/media/usb/gspca/ 3835 3835 3836 3836 + GUID PARTITION TABLE (GPT) 3837 3837 + M: Davidlohr Bueso <davidlohr@hp.com> 3838 3838 + L: linux-efi@vger.kernel.org 3839 3839 + S: Maintained 3840 3840 + F: block/partitions/efi.* 3841 3841 + 3836 3842 STK1160 USB VIDEO CAPTURE DRIVER 3837 3843 M: Ezequiel Garcia <elezegarcia@gmail.com> 3838 3844 L: linux-media@vger.kernel.org

+7 -13

Makefile

reviewed

··· 732 732 # Select initial ramdisk compression format, default is gzip(1). 733 733 # This shall be used by the dracut(8) tool while creating an initramfs image. 734 734 # 735 735 - INITRD_COMPRESS=gzip 736 736 - ifeq ($(CONFIG_RD_BZIP2), y) 737 737 - INITRD_COMPRESS=bzip2 738 738 - else ifeq ($(CONFIG_RD_LZMA), y) 739 739 - INITRD_COMPRESS=lzma 740 740 - else ifeq ($(CONFIG_RD_XZ), y) 741 741 - INITRD_COMPRESS=xz 742 742 - else ifeq ($(CONFIG_RD_LZO), y) 743 743 - INITRD_COMPRESS=lzo 744 744 - else ifeq ($(CONFIG_RD_LZ4), y) 745 745 - INITRD_COMPRESS=lz4 746 746 - endif 747 747 - export INITRD_COMPRESS 735 735 + INITRD_COMPRESS-y := gzip 736 736 + INITRD_COMPRESS-$(CONFIG_RD_BZIP2) := bzip2 737 737 + INITRD_COMPRESS-$(CONFIG_RD_LZMA) := lzma 738 738 + INITRD_COMPRESS-$(CONFIG_RD_XZ) := xz 739 739 + INITRD_COMPRESS-$(CONFIG_RD_LZO) := lzo 740 740 + INITRD_COMPRESS-$(CONFIG_RD_LZ4) := lz4 741 741 + export INITRD_COMPRESS := $(INITRD_COMPRESS-y) 748 742 749 743 ifdef CONFIG_MODULE_SIG_ALL 750 744 MODSECKEY = ./signing_key.priv

+1 -1

arch/sh/lib/Makefile

reviewed

··· 6 6 checksum.o strlen.o div64.o div64-generic.o 7 7 8 8 # Extracted from libgcc 9 9 - lib-y += movmem.o ashldi3.o ashrdi3.o lshrdi3.o \ 9 9 + obj-y += movmem.o ashldi3.o ashrdi3.o lshrdi3.o \ 10 10 ashlsi3.o ashrsi3.o ashiftrt.o lshrsi3.o \ 11 11 udiv_qrnnd.o 12 12

+2 -2

arch/sparc/include/asm/pgtable_64.h

reviewed

··· 619 619 } 620 620 621 621 #define pte_accessible pte_accessible 622 622 - static inline unsigned long pte_accessible(pte_t a) 622 622 + static inline unsigned long pte_accessible(struct mm_struct *mm, pte_t a) 623 623 { 624 624 return pte_val(a) & _PAGE_VALID; 625 625 } ··· 847 847 * SUN4V NOTE: _PAGE_VALID is the same value in both the SUN4U 848 848 * and SUN4V pte layout, so this inline test is fine. 849 849 */ 850 850 - if (likely(mm != &init_mm) && pte_accessible(orig)) 850 850 + if (likely(mm != &init_mm) && pte_accessible(mm, orig)) 851 851 tlb_batch_add(mm, addr, ptep, orig, fullmm); 852 852 } 853 853

+9 -2

arch/x86/include/asm/pgtable.h

reviewed

··· 452 452 } 453 453 454 454 #define pte_accessible pte_accessible 455 455 - static inline int pte_accessible(pte_t a) 455 455 + static inline bool pte_accessible(struct mm_struct *mm, pte_t a) 456 456 { 457 457 - return pte_flags(a) & _PAGE_PRESENT; 457 457 + if (pte_flags(a) & _PAGE_PRESENT) 458 458 + return true; 459 459 + 460 460 + if ((pte_flags(a) & (_PAGE_PROTNONE | _PAGE_NUMA)) && 461 461 + mm_tlb_flush_pending(mm)) 462 462 + return true; 463 463 + 464 464 + return false; 458 465 } 459 466 460 467 static inline int pte_hidden(pte_t pte)

+13

arch/x86/mm/gup.c

reviewed

··· 83 83 pte_t pte = gup_get_pte(ptep); 84 84 struct page *page; 85 85 86 86 + /* Similar to the PMD case, NUMA hinting must take slow path */ 87 87 + if (pte_numa(pte)) { 88 88 + pte_unmap(ptep); 89 89 + return 0; 90 90 + } 91 91 + 86 92 if ((pte_flags(pte) & (mask | _PAGE_SPECIAL)) != mask) { 87 93 pte_unmap(ptep); 88 94 return 0; ··· 173 167 if (pmd_none(pmd) || pmd_trans_splitting(pmd)) 174 168 return 0; 175 169 if (unlikely(pmd_large(pmd))) { 170 170 + /* 171 171 + * NUMA hinting faults need to be handled in the GUP 172 172 + * slowpath for accounting purposes and so that they 173 173 + * can be serialised against THP migration. 174 174 + */ 175 175 + if (pmd_numa(pmd)) 176 176 + return 0; 176 177 if (!gup_huge_pmd(pmd, addr, next, write, pages, nr)) 177 178 return 0; 178 179 } else {

+1 -1

include/asm-generic/pgtable.h

reviewed

··· 217 217 #endif 218 218 219 219 #ifndef pte_accessible 220 220 - # define pte_accessible(pte) ((void)(pte),1) 220 220 + # define pte_accessible(mm, pte) ((void)(pte), 1) 221 221 #endif 222 222 223 223 #ifndef flush_tlb_fix_spurious_fault

include/linux/migrate.h

reviewed

··· 90 90 #endif /* CONFIG_MIGRATION */ 91 91 92 92 #ifdef CONFIG_NUMA_BALANCING 93 93 + extern bool pmd_trans_migrating(pmd_t pmd); 94 94 + extern void wait_migrate_huge_page(struct anon_vma *anon_vma, pmd_t *pmd); 93 95 extern int migrate_misplaced_page(struct page *page, 94 96 struct vm_area_struct *vma, int node); 95 97 extern bool migrate_ratelimited(int node); 96 98 #else 99 99 + static inline bool pmd_trans_migrating(pmd_t pmd) 100 100 + { 101 101 + return false; 102 102 + } 103 103 + static inline void wait_migrate_huge_page(struct anon_vma *anon_vma, pmd_t *pmd) 104 104 + { 105 105 + } 97 106 static inline int migrate_misplaced_page(struct page *page, 98 107 struct vm_area_struct *vma, int node) 99 108 {

+49

include/linux/mm_types.h

reviewed

··· 443 443 /* numa_scan_seq prevents two threads setting pte_numa */ 444 444 int numa_scan_seq; 445 445 #endif 446 446 + #if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION) 447 447 + /* 448 448 + * An operation with batched TLB flushing is going on. Anything that 449 449 + * can move process memory needs to flush the TLB when moving a 450 450 + * PROT_NONE or PROT_NUMA mapped page. 451 451 + */ 452 452 + bool tlb_flush_pending; 453 453 + #endif 446 454 struct uprobes_state uprobes_state; 447 455 }; 448 456 ··· 466 458 { 467 459 return mm->cpu_vm_mask_var; 468 460 } 461 461 + 462 462 + #if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION) 463 463 + /* 464 464 + * Memory barriers to keep this state in sync are graciously provided by 465 465 + * the page table locks, outside of which no page table modifications happen. 466 466 + * The barriers below prevent the compiler from re-ordering the instructions 467 467 + * around the memory barriers that are already present in the code. 468 468 + */ 469 469 + static inline bool mm_tlb_flush_pending(struct mm_struct *mm) 470 470 + { 471 471 + barrier(); 472 472 + return mm->tlb_flush_pending; 473 473 + } 474 474 + static inline void set_tlb_flush_pending(struct mm_struct *mm) 475 475 + { 476 476 + mm->tlb_flush_pending = true; 477 477 + 478 478 + /* 479 479 + * Guarantee that the tlb_flush_pending store does not leak into the 480 480 + * critical section updating the page tables 481 481 + */ 482 482 + smp_mb__before_spinlock(); 483 483 + } 484 484 + /* Clearing is done after a TLB flush, which also provides a barrier. */ 485 485 + static inline void clear_tlb_flush_pending(struct mm_struct *mm) 486 486 + { 487 487 + barrier(); 488 488 + mm->tlb_flush_pending = false; 489 489 + } 490 490 + #else 491 491 + static inline bool mm_tlb_flush_pending(struct mm_struct *mm) 492 492 + { 493 493 + return false; 494 494 + } 495 495 + static inline void set_tlb_flush_pending(struct mm_struct *mm) 496 496 + { 497 497 + } 498 498 + static inline void clear_tlb_flush_pending(struct mm_struct *mm) 499 499 + { 500 500 + } 501 501 + #endif 469 502 470 503 #endif /* _LINUX_MM_TYPES_H */

include/linux/reboot.h

reviewed

··· 43 43 * Architecture-specific implementations of sys_reboot commands. 44 44 */ 45 45 46 46 + extern void migrate_to_reboot_cpu(void); 46 47 extern void machine_restart(char *cmd); 47 48 extern void machine_halt(void); 48 49 extern void machine_power_off(void);

kernel/fork.c

reviewed

··· 537 537 spin_lock_init(&mm->page_table_lock); 538 538 mm_init_aio(mm); 539 539 mm_init_owner(mm, p); 540 540 + clear_tlb_flush_pending(mm); 540 541 541 542 if (likely(!mm_alloc_pgd(mm))) { 542 543 mm->def_flags = 0;

kernel/kexec.c

reviewed

··· 1680 1680 { 1681 1681 kexec_in_progress = true; 1682 1682 kernel_restart_prepare(NULL); 1683 1683 + migrate_to_reboot_cpu(); 1683 1684 printk(KERN_EMERG "Starting new kernel\n"); 1684 1685 machine_shutdown(); 1685 1686 }

+1 -1

kernel/reboot.c

reviewed

··· 104 104 } 105 105 EXPORT_SYMBOL(unregister_reboot_notifier); 106 106 107 107 - static void migrate_to_reboot_cpu(void) 107 107 + void migrate_to_reboot_cpu(void) 108 108 { 109 109 /* The boot cpu is always logical cpu 0 */ 110 110 int cpu = reboot_cpu;

kernel/sched/fair.c

reviewed

··· 1738 1738 (vma->vm_file && (vma->vm_flags & (VM_READ|VM_WRITE)) == (VM_READ))) 1739 1739 continue; 1740 1740 1741 1741 + /* 1742 1742 + * Skip inaccessible VMAs to avoid any confusion between 1743 1743 + * PROT_NONE and NUMA hinting ptes 1744 1744 + */ 1745 1745 + if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) 1746 1746 + continue; 1747 1747 + 1741 1748 do { 1742 1749 start = max(start, vma->vm_start); 1743 1750 end = ALIGN(start + (pages << PAGE_SHIFT), HPAGE_SIZE);

+1 -1

mm/Kconfig

reviewed

··· 543 543 544 544 config MEM_SOFT_DIRTY 545 545 bool "Track memory changes" 546 546 - depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY 546 546 + depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY && PROC_FS 547 547 select PROC_PAGE_MONITOR 548 548 help 549 549 This option enables memory changes tracking by introducing a

mm/compaction.c

reviewed

··· 134 134 bool migrate_scanner) 135 135 { 136 136 struct zone *zone = cc->zone; 137 137 + 138 138 + if (cc->ignore_skip_hint) 139 139 + return; 140 140 + 137 141 if (!page) 138 142 return; 139 143

+36 -9

mm/huge_memory.c

reviewed

··· 882 882 ret = 0; 883 883 goto out_unlock; 884 884 } 885 885 + 886 886 + /* mmap_sem prevents this happening but warn if that changes */ 887 887 + WARN_ON(pmd_trans_migrating(pmd)); 888 888 + 885 889 if (unlikely(pmd_trans_splitting(pmd))) { 886 890 /* split huge page running from under us */ 887 891 spin_unlock(src_ptl); ··· 1247 1243 if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd)) 1248 1244 return ERR_PTR(-EFAULT); 1249 1245 1246 1246 + /* Full NUMA hinting faults to serialise migration in fault paths */ 1247 1247 + if ((flags & FOLL_NUMA) && pmd_numa(*pmd)) 1248 1248 + goto out; 1249 1249 + 1250 1250 page = pmd_page(*pmd); 1251 1251 VM_BUG_ON(!PageHead(page)); 1252 1252 if (flags & FOLL_TOUCH) { ··· 1303 1295 if (unlikely(!pmd_same(pmd, *pmdp))) 1304 1296 goto out_unlock; 1305 1297 1298 1298 + /* 1299 1299 + * If there are potential migrations, wait for completion and retry 1300 1300 + * without disrupting NUMA hinting information. Do not relock and 1301 1301 + * check_same as the page may no longer be mapped. 1302 1302 + */ 1303 1303 + if (unlikely(pmd_trans_migrating(*pmdp))) { 1304 1304 + spin_unlock(ptl); 1305 1305 + wait_migrate_huge_page(vma->anon_vma, pmdp); 1306 1306 + goto out; 1307 1307 + } 1308 1308 + 1306 1309 page = pmd_page(pmd); 1307 1310 BUG_ON(is_huge_zero_page(page)); 1308 1311 page_nid = page_to_nid(page); ··· 1342 1323 /* If the page was locked, there are no parallel migrations */ 1343 1324 if (page_locked) 1344 1325 goto clear_pmdnuma; 1326 1326 + } 1345 1327 1346 1346 - /* 1347 1347 - * Otherwise wait for potential migrations and retry. We do 1348 1348 - * relock and check_same as the page may no longer be mapped. 1349 1349 - * As the fault is being retried, do not account for it. 1350 1350 - */ 1328 1328 + /* Migration could have started since the pmd_trans_migrating check */ 1329 1329 + if (!page_locked) { 1351 1330 spin_unlock(ptl); 1352 1331 wait_on_page_locked(page); 1353 1332 page_nid = -1; 1354 1333 goto out; 1355 1334 } 1356 1335 1357 1357 - /* Page is misplaced, serialise migrations and parallel THP splits */ 1336 1336 + /* 1337 1337 + * Page is misplaced. Page lock serialises migrations. Acquire anon_vma 1338 1338 + * to serialises splits 1339 1339 + */ 1358 1340 get_page(page); 1359 1341 spin_unlock(ptl); 1360 1360 - if (!page_locked) 1361 1361 - lock_page(page); 1362 1342 anon_vma = page_lock_anon_vma_read(page); 1363 1343 1364 1344 /* Confirm the PMD did not change while page_table_lock was released */ ··· 1367 1349 put_page(page); 1368 1350 page_nid = -1; 1369 1351 goto out_unlock; 1352 1352 + } 1353 1353 + 1354 1354 + /* Bail if we fail to protect against THP splits for any reason */ 1355 1355 + if (unlikely(!anon_vma)) { 1356 1356 + put_page(page); 1357 1357 + page_nid = -1; 1358 1358 + goto clear_pmdnuma; 1370 1359 } 1371 1360 1372 1361 /* ··· 1542 1517 ret = 1; 1543 1518 if (!prot_numa) { 1544 1519 entry = pmdp_get_and_clear(mm, addr, pmd); 1520 1520 + if (pmd_numa(entry)) 1521 1521 + entry = pmd_mknonnuma(entry); 1545 1522 entry = pmd_modify(entry, newprot); 1546 1523 ret = HPAGE_PMD_NR; 1547 1524 BUG_ON(pmd_write(entry)); ··· 1558 1531 */ 1559 1532 if (!is_huge_zero_page(page) && 1560 1533 !pmd_numa(*pmd)) { 1561 1561 - entry = pmdp_get_and_clear(mm, addr, pmd); 1534 1534 + entry = *pmd; 1562 1535 entry = pmd_mknuma(entry); 1563 1536 ret = HPAGE_PMD_NR; 1564 1537 }

+10 -4

mm/memory-failure.c

reviewed

··· 1505 1505 if (ret > 0) 1506 1506 ret = -EIO; 1507 1507 } else { 1508 1508 - set_page_hwpoison_huge_page(hpage); 1509 1509 - dequeue_hwpoisoned_huge_page(hpage); 1510 1510 - atomic_long_add(1 << compound_order(hpage), 1511 1511 - &num_poisoned_pages); 1508 1508 + /* overcommit hugetlb page will be freed to buddy */ 1509 1509 + if (PageHuge(page)) { 1510 1510 + set_page_hwpoison_huge_page(hpage); 1511 1511 + dequeue_hwpoisoned_huge_page(hpage); 1512 1512 + atomic_long_add(1 << compound_order(hpage), 1513 1513 + &num_poisoned_pages); 1514 1514 + } else { 1515 1515 + SetPageHWPoison(page); 1516 1516 + atomic_long_inc(&num_poisoned_pages); 1517 1517 + } 1512 1518 } 1513 1519 return ret; 1514 1520 }

+10 -8

mm/mempolicy.c

reviewed

··· 1197 1197 break; 1198 1198 vma = vma->vm_next; 1199 1199 } 1200 1200 - /* 1201 1201 - * queue_pages_range() confirms that @page belongs to some vma, 1202 1202 - * so vma shouldn't be NULL. 1203 1203 - */ 1204 1204 - BUG_ON(!vma); 1205 1200 1206 1206 - if (PageHuge(page)) 1207 1207 - return alloc_huge_page_noerr(vma, address, 1); 1201 1201 + if (PageHuge(page)) { 1202 1202 + if (vma) 1203 1203 + return alloc_huge_page_noerr(vma, address, 1); 1204 1204 + else 1205 1205 + return NULL; 1206 1206 + } 1207 1207 + /* 1208 1208 + * if !vma, alloc_page_vma() will use task or system default policy 1209 1209 + */ 1208 1210 return alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); 1209 1211 } 1210 1212 #else ··· 1320 1318 if (nr_failed && (flags & MPOL_MF_STRICT)) 1321 1319 err = -EIO; 1322 1320 } else 1323 1323 - putback_lru_pages(&pagelist); 1321 1321 + putback_movable_pages(&pagelist); 1324 1322 1325 1323 up_write(&mm->mmap_sem); 1326 1324 mpol_out:

+58 -11

mm/migrate.c

reviewed

··· 36 36 #include <linux/hugetlb_cgroup.h> 37 37 #include <linux/gfp.h> 38 38 #include <linux/balloon_compaction.h> 39 39 + #include <linux/mmu_notifier.h> 39 40 40 41 #include <asm/tlbflush.h> 41 42 ··· 1655 1654 return 1; 1656 1655 } 1657 1656 1657 1657 + bool pmd_trans_migrating(pmd_t pmd) 1658 1658 + { 1659 1659 + struct page *page = pmd_page(pmd); 1660 1660 + return PageLocked(page); 1661 1661 + } 1662 1662 + 1663 1663 + void wait_migrate_huge_page(struct anon_vma *anon_vma, pmd_t *pmd) 1664 1664 + { 1665 1665 + struct page *page = pmd_page(*pmd); 1666 1666 + wait_on_page_locked(page); 1667 1667 + } 1668 1668 + 1658 1669 /* 1659 1670 * Attempt to migrate a misplaced page to the specified destination 1660 1671 * node. Caller is expected to have an elevated reference count on ··· 1729 1716 struct page *page, int node) 1730 1717 { 1731 1718 spinlock_t *ptl; 1732 1732 - unsigned long haddr = address & HPAGE_PMD_MASK; 1733 1719 pg_data_t *pgdat = NODE_DATA(node); 1734 1720 int isolated = 0; 1735 1721 struct page *new_page = NULL; 1736 1722 struct mem_cgroup *memcg = NULL; 1737 1723 int page_lru = page_is_file_cache(page); 1724 1724 + unsigned long mmun_start = address & HPAGE_PMD_MASK; 1725 1725 + unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE; 1726 1726 + pmd_t orig_entry; 1738 1727 1739 1728 /* 1740 1729 * Rate-limit the amount of data that is being migrated to a node. ··· 1759 1744 goto out_fail; 1760 1745 } 1761 1746 1747 1747 + if (mm_tlb_flush_pending(mm)) 1748 1748 + flush_tlb_range(vma, mmun_start, mmun_end); 1749 1749 + 1762 1750 /* Prepare a page as a migration target */ 1763 1751 __set_page_locked(new_page); 1764 1752 SetPageSwapBacked(new_page); ··· 1773 1755 WARN_ON(PageLRU(new_page)); 1774 1756 1775 1757 /* Recheck the target PMD */ 1758 1758 + mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); 1776 1759 ptl = pmd_lock(mm, pmd); 1777 1777 - if (unlikely(!pmd_same(*pmd, entry))) { 1760 1760 + if (unlikely(!pmd_same(*pmd, entry) || page_count(page) != 2)) { 1761 1761 + fail_putback: 1778 1762 spin_unlock(ptl); 1763 1763 + mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); 1779 1764 1780 1765 /* Reverse changes made by migrate_page_copy() */ 1781 1766 if (TestClearPageActive(new_page)) ··· 1795 1774 putback_lru_page(page); 1796 1775 mod_zone_page_state(page_zone(page), 1797 1776 NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR); 1798 1798 - goto out_fail; 1777 1777 + 1778 1778 + goto out_unlock; 1799 1779 } 1800 1780 1801 1781 /* ··· 1808 1786 */ 1809 1787 mem_cgroup_prepare_migration(page, new_page, &memcg); 1810 1788 1789 1789 + orig_entry = *pmd; 1811 1790 entry = mk_pmd(new_page, vma->vm_page_prot); 1812 1812 - entry = pmd_mknonnuma(entry); 1813 1813 - entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); 1814 1791 entry = pmd_mkhuge(entry); 1792 1792 + entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); 1815 1793 1816 1816 - pmdp_clear_flush(vma, haddr, pmd); 1817 1817 - set_pmd_at(mm, haddr, pmd, entry); 1818 1818 - page_add_new_anon_rmap(new_page, vma, haddr); 1794 1794 + /* 1795 1795 + * Clear the old entry under pagetable lock and establish the new PTE. 1796 1796 + * Any parallel GUP will either observe the old page blocking on the 1797 1797 + * page lock, block on the page table lock or observe the new page. 1798 1798 + * The SetPageUptodate on the new page and page_add_new_anon_rmap 1799 1799 + * guarantee the copy is visible before the pagetable update. 1800 1800 + */ 1801 1801 + flush_cache_range(vma, mmun_start, mmun_end); 1802 1802 + page_add_new_anon_rmap(new_page, vma, mmun_start); 1803 1803 + pmdp_clear_flush(vma, mmun_start, pmd); 1804 1804 + set_pmd_at(mm, mmun_start, pmd, entry); 1805 1805 + flush_tlb_range(vma, mmun_start, mmun_end); 1819 1806 update_mmu_cache_pmd(vma, address, &entry); 1807 1807 + 1808 1808 + if (page_count(page) != 2) { 1809 1809 + set_pmd_at(mm, mmun_start, pmd, orig_entry); 1810 1810 + flush_tlb_range(vma, mmun_start, mmun_end); 1811 1811 + update_mmu_cache_pmd(vma, address, &entry); 1812 1812 + page_remove_rmap(new_page); 1813 1813 + goto fail_putback; 1814 1814 + } 1815 1815 + 1820 1816 page_remove_rmap(page); 1817 1817 + 1821 1818 /* 1822 1819 * Finish the charge transaction under the page table lock to 1823 1820 * prevent split_huge_page() from dividing up the charge ··· 1844 1803 */ 1845 1804 mem_cgroup_end_migration(memcg, page, new_page, true); 1846 1805 spin_unlock(ptl); 1806 1806 + mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); 1847 1807 1848 1808 unlock_page(new_page); 1849 1809 unlock_page(page); ··· 1862 1820 out_fail: 1863 1821 count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); 1864 1822 out_dropref: 1865 1865 - entry = pmd_mknonnuma(entry); 1866 1866 - set_pmd_at(mm, haddr, pmd, entry); 1867 1867 - update_mmu_cache_pmd(vma, address, &entry); 1823 1823 + ptl = pmd_lock(mm, pmd); 1824 1824 + if (pmd_same(*pmd, entry)) { 1825 1825 + entry = pmd_mknonnuma(entry); 1826 1826 + set_pmd_at(mm, mmun_start, pmd, entry); 1827 1827 + update_mmu_cache_pmd(vma, address, &entry); 1828 1828 + } 1829 1829 + spin_unlock(ptl); 1868 1830 1831 1831 + out_unlock: 1869 1832 unlock_page(page); 1870 1833 put_page(page); 1871 1834 return 0;

+11 -2

mm/mprotect.c

reviewed

··· 52 52 pte_t ptent; 53 53 bool updated = false; 54 54 55 55 - ptent = ptep_modify_prot_start(mm, addr, pte); 56 55 if (!prot_numa) { 56 56 + ptent = ptep_modify_prot_start(mm, addr, pte); 57 57 + if (pte_numa(ptent)) 58 58 + ptent = pte_mknonnuma(ptent); 57 59 ptent = pte_modify(ptent, newprot); 58 60 updated = true; 59 61 } else { 60 62 struct page *page; 61 63 64 64 + ptent = *pte; 62 65 page = vm_normal_page(vma, addr, oldpte); 63 66 if (page) { 64 67 if (!pte_numa(oldpte)) { 65 68 ptent = pte_mknuma(ptent); 69 69 + set_pte_at(mm, addr, pte, ptent); 66 70 updated = true; 67 71 } 68 72 } ··· 83 79 84 80 if (updated) 85 81 pages++; 86 86 - ptep_modify_prot_commit(mm, addr, pte, ptent); 82 82 + 83 83 + /* Only !prot_numa always clears the pte */ 84 84 + if (!prot_numa) 85 85 + ptep_modify_prot_commit(mm, addr, pte, ptent); 87 86 } else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) { 88 87 swp_entry_t entry = pte_to_swp_entry(oldpte); 89 88 ··· 188 181 BUG_ON(addr >= end); 189 182 pgd = pgd_offset(mm, addr); 190 183 flush_cache_range(vma, addr, end); 184 184 + set_tlb_flush_pending(mm); 191 185 do { 192 186 next = pgd_addr_end(addr, end); 193 187 if (pgd_none_or_clear_bad(pgd)) ··· 200 192 /* Only flush the TLB if we actually modified any entries: */ 201 193 if (pages) 202 194 flush_tlb_range(vma, start, end); 195 195 + clear_tlb_flush_pending(mm); 203 196 204 197 return pages; 205 198 }

+2 -1

mm/page_alloc.c

reviewed

··· 1920 1920 * back to remote zones that do not partake in the 1921 1921 * fairness round-robin cycle of this zonelist. 1922 1922 */ 1923 1923 - if (alloc_flags & ALLOC_WMARK_LOW) { 1923 1923 + if ((alloc_flags & ALLOC_WMARK_LOW) && 1924 1924 + (gfp_mask & GFP_MOVABLE_MASK)) { 1924 1925 if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0) 1925 1926 continue; 1926 1927 if (zone_reclaim_mode &&

+6 -2

mm/pgtable-generic.c

reviewed

··· 110 110 pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address, 111 111 pte_t *ptep) 112 112 { 113 113 + struct mm_struct *mm = (vma)->vm_mm; 113 114 pte_t pte; 114 114 - pte = ptep_get_and_clear((vma)->vm_mm, address, ptep); 115 115 - if (pte_accessible(pte)) 115 115 + pte = ptep_get_and_clear(mm, address, ptep); 116 116 + if (pte_accessible(mm, pte)) 116 117 flush_tlb_page(vma, address); 117 118 return pte; 118 119 } ··· 192 191 void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, 193 192 pmd_t *pmdp) 194 193 { 194 194 + pmd_t entry = *pmdp; 195 195 + if (pmd_numa(entry)) 196 196 + entry = pmd_mknonnuma(entry); 195 197 set_pmd_at(vma->vm_mm, address, pmdp, pmd_mknotpresent(*pmdp)); 196 198 flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); 197 199 }

mm/rmap.c

reviewed

··· 600 600 spinlock_t *ptl; 601 601 602 602 if (unlikely(PageHuge(page))) { 603 603 + /* when pud is not present, pte will be NULL */ 603 604 pte = huge_pte_offset(mm, address); 605 605 + if (!pte) 606 606 + return NULL; 607 607 + 604 608 ptl = huge_pte_lockptr(page_hstate(page), mm, pte); 605 609 goto check; 606 610 }