Merge branch 'akpm' (patches from Andrew)

+4

.mailmap

··· 48 48 Felix Moeller <felix@derklecks.de> 49 49 Filipe Lautert <filipe@icewall.org> 50 50 Franck Bui-Huu <vagabon.xyz@gmail.com> 51 + Frank Rowand <frowand.list@gmail.com> <frowand@mvista.com> 52 + Frank Rowand <frowand.list@gmail.com> <frank.rowand@am.sony.com> 53 + Frank Rowand <frowand.list@gmail.com> <frank.rowand@sonymobile.com> 51 54 Frank Zago <fzago@systemfabricworks.com> 52 55 Greg Kroah-Hartman <greg@echidna.(none)> 53 56 Greg Kroah-Hartman <gregkh@suse.de> ··· 82 79 Kenneth W Chen <kenneth.w.chen@intel.com> 83 80 Konstantin Khlebnikov <koct9i@gmail.com> <k.khlebnikov@samsung.com> 84 81 Koushik <raghavendra.koushik@neterion.com> 82 + Krzysztof Kozlowski <krzk@kernel.org> <k.kozlowski.k@gmail.com> 85 83 Kuninori Morimoto <kuninori.morimoto.gx@renesas.com> 86 84 Leonid I Ananiev <leonid.i.ananiev@intel.com> 87 85 Linas Vepstas <linas@austin.ibm.com>

+9 -8

Documentation/sysctl/vm.txt

··· 581 581 "Zone Order" orders the zonelists by zone type, then by node within each 582 582 zone. Specify "[Zz]one" for zone order. 583 583 584 - Specify "[Dd]efault" to request automatic configuration. Autoconfiguration 585 - will select "node" order in following case. 586 - (1) if the DMA zone does not exist or 587 - (2) if the DMA zone comprises greater than 50% of the available memory or 588 - (3) if any node's DMA zone comprises greater than 70% of its local memory and 589 - the amount of local memory is big enough. 584 + Specify "[Dd]efault" to request automatic configuration. 590 585 591 - Otherwise, "zone" order will be selected. Default order is recommended unless 592 - this is causing problems for your system/application. 586 + On 32-bit, the Normal zone needs to be preserved for allocations accessible 587 + by the kernel, so "zone" order will be selected. 588 + 589 + On 64-bit, devices that require DMA32/DMA are relatively rare, so "node" 590 + order will be selected. 591 + 592 + Default order is recommended unless this is causing problems for your 593 + system/application. 593 594 594 595 ============================================================== 595 596

+1 -1

MAINTAINERS

··· 6400 6400 F: mm/kmemleak-test.c 6401 6401 6402 6402 KPROBES 6403 - M: Ananth N Mavinakayanahalli <ananth@in.ibm.com> 6403 + M: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com> 6404 6404 M: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> 6405 6405 M: "David S. Miller" <davem@davemloft.net> 6406 6406 M: Masami Hiramatsu <mhiramat@kernel.org>

+2 -2

drivers/rapidio/devices/rio_mport_cdev.c

··· 2669 2669 2670 2670 /* Create device class needed by udev */ 2671 2671 dev_class = class_create(THIS_MODULE, DRV_NAME); 2672 - if (!dev_class) { 2672 + if (IS_ERR(dev_class)) { 2673 2673 rmcd_error("Unable to create " DRV_NAME " class"); 2674 - return -EINVAL; 2674 + return PTR_ERR(dev_class); 2675 2675 } 2676 2676 2677 2677 ret = alloc_chrdev_region(&dev_number, 0, RIO_MAX_MPORTS, DRV_NAME);

+2

fs/ocfs2/dlm/dlmmaster.c

··· 2455 2455 2456 2456 spin_unlock(&dlm->spinlock); 2457 2457 2458 + ret = 0; 2459 + 2458 2460 done: 2459 2461 dlm_put(dlm); 2460 2462 return ret;

+30 -3

fs/proc/task_mmu.c

··· 1518 1518 return page; 1519 1519 } 1520 1520 1521 + #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1522 + static struct page *can_gather_numa_stats_pmd(pmd_t pmd, 1523 + struct vm_area_struct *vma, 1524 + unsigned long addr) 1525 + { 1526 + struct page *page; 1527 + int nid; 1528 + 1529 + if (!pmd_present(pmd)) 1530 + return NULL; 1531 + 1532 + page = vm_normal_page_pmd(vma, addr, pmd); 1533 + if (!page) 1534 + return NULL; 1535 + 1536 + if (PageReserved(page)) 1537 + return NULL; 1538 + 1539 + nid = page_to_nid(page); 1540 + if (!node_isset(nid, node_states[N_MEMORY])) 1541 + return NULL; 1542 + 1543 + return page; 1544 + } 1545 + #endif 1546 + 1521 1547 static int gather_pte_stats(pmd_t *pmd, unsigned long addr, 1522 1548 unsigned long end, struct mm_walk *walk) 1523 1549 { ··· 1553 1527 pte_t *orig_pte; 1554 1528 pte_t *pte; 1555 1529 1530 + #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1556 1531 ptl = pmd_trans_huge_lock(pmd, vma); 1557 1532 if (ptl) { 1558 - pte_t huge_pte = *(pte_t *)pmd; 1559 1533 struct page *page; 1560 1534 1561 - page = can_gather_numa_stats(huge_pte, vma, addr); 1535 + page = can_gather_numa_stats_pmd(*pmd, vma, addr); 1562 1536 if (page) 1563 - gather_stats(page, md, pte_dirty(huge_pte), 1537 + gather_stats(page, md, pmd_dirty(*pmd), 1564 1538 HPAGE_PMD_SIZE/PAGE_SIZE); 1565 1539 spin_unlock(ptl); 1566 1540 return 0; ··· 1568 1542 1569 1543 if (pmd_trans_unstable(pmd)) 1570 1544 return 0; 1545 + #endif 1571 1546 orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); 1572 1547 do { 1573 1548 struct page *page = can_gather_numa_stats(*pte, vma, addr);

+5

include/linux/huge_mm.h

··· 152 152 } 153 153 154 154 struct page *get_huge_zero_page(void); 155 + void put_huge_zero_page(void); 155 156 156 157 #else /* CONFIG_TRANSPARENT_HUGEPAGE */ 157 158 #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; }) ··· 209 208 return false; 210 209 } 211 210 211 + static inline void put_huge_zero_page(void) 212 + { 213 + BUILD_BUG(); 214 + } 212 215 213 216 static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma, 214 217 unsigned long addr, pmd_t *pmd, int flags)

+4

include/linux/mm.h

··· 1031 1031 page = compound_head(page); 1032 1032 if (atomic_read(compound_mapcount_ptr(page)) >= 0) 1033 1033 return true; 1034 + if (PageHuge(page)) 1035 + return false; 1034 1036 for (i = 0; i < hpage_nr_pages(page); i++) { 1035 1037 if (atomic_read(&page[i]._mapcount) >= 0) 1036 1038 return true; ··· 1140 1138 1141 1139 struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, 1142 1140 pte_t pte); 1141 + struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, 1142 + pmd_t pmd); 1143 1143 1144 1144 int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, 1145 1145 unsigned long size);

+2 -1

kernel/kcov.c

··· 1 1 #define pr_fmt(fmt) "kcov: " fmt 2 2 3 + #define DISABLE_BRANCH_PROFILING 3 4 #include <linux/compiler.h> 4 5 #include <linux/types.h> 5 6 #include <linux/file.h> ··· 44 43 * Entry point from instrumented code. 45 44 * This is called once per basic-block/edge. 46 45 */ 47 - void __sanitizer_cov_trace_pc(void) 46 + void notrace __sanitizer_cov_trace_pc(void) 48 47 { 49 48 struct task_struct *t; 50 49 enum kcov_mode mode;

+5 -2

kernel/kexec_core.c

··· 1415 1415 VMCOREINFO_OFFSET(page, lru); 1416 1416 VMCOREINFO_OFFSET(page, _mapcount); 1417 1417 VMCOREINFO_OFFSET(page, private); 1418 + VMCOREINFO_OFFSET(page, compound_dtor); 1419 + VMCOREINFO_OFFSET(page, compound_order); 1420 + VMCOREINFO_OFFSET(page, compound_head); 1418 1421 VMCOREINFO_OFFSET(pglist_data, node_zones); 1419 1422 VMCOREINFO_OFFSET(pglist_data, nr_zones); 1420 1423 #ifdef CONFIG_FLAT_NODE_MEM_MAP ··· 1450 1447 #ifdef CONFIG_X86 1451 1448 VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE); 1452 1449 #endif 1453 - #ifdef CONFIG_HUGETLBFS 1454 - VMCOREINFO_SYMBOL(free_huge_page); 1450 + #ifdef CONFIG_HUGETLB_PAGE 1451 + VMCOREINFO_NUMBER(HUGETLB_PAGE_DTOR); 1455 1452 #endif 1456 1453 1457 1454 arch_crash_save_vmcoreinfo();

-4

lib/stackdepot.c

··· 210 210 goto fast_exit; 211 211 212 212 hash = hash_stack(trace->entries, trace->nr_entries); 213 - /* Bad luck, we won't store this stack. */ 214 - if (hash == 0) 215 - goto exit; 216 - 217 213 bucket = &stack_table[hash & STACK_HASH_MASK]; 218 214 219 215 /*

+5 -7

mm/huge_memory.c

··· 232 232 return READ_ONCE(huge_zero_page); 233 233 } 234 234 235 - static void put_huge_zero_page(void) 235 + void put_huge_zero_page(void) 236 236 { 237 237 /* 238 238 * Counter should never go to zero here. Only shrinker can put ··· 1684 1684 if (vma_is_dax(vma)) { 1685 1685 spin_unlock(ptl); 1686 1686 if (is_huge_zero_pmd(orig_pmd)) 1687 - put_huge_zero_page(); 1687 + tlb_remove_page(tlb, pmd_page(orig_pmd)); 1688 1688 } else if (is_huge_zero_pmd(orig_pmd)) { 1689 1689 pte_free(tlb->mm, pgtable_trans_huge_withdraw(tlb->mm, pmd)); 1690 1690 atomic_long_dec(&tlb->mm->nr_ptes); 1691 1691 spin_unlock(ptl); 1692 - put_huge_zero_page(); 1692 + tlb_remove_page(tlb, pmd_page(orig_pmd)); 1693 1693 } else { 1694 1694 struct page *page = pmd_page(orig_pmd); 1695 1695 page_remove_rmap(page, true); ··· 1960 1960 * page fault if needed. 1961 1961 */ 1962 1962 return 0; 1963 - if (vma->vm_ops) 1963 + if (vma->vm_ops || (vm_flags & VM_NO_THP)) 1964 1964 /* khugepaged not yet working on file or special mappings */ 1965 1965 return 0; 1966 - VM_BUG_ON_VMA(vm_flags & VM_NO_THP, vma); 1967 1966 hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; 1968 1967 hend = vma->vm_end & HPAGE_PMD_MASK; 1969 1968 if (hstart < hend) ··· 2351 2352 return false; 2352 2353 if (is_vma_temporary_stack(vma)) 2353 2354 return false; 2354 - VM_BUG_ON_VMA(vma->vm_flags & VM_NO_THP, vma); 2355 - return true; 2355 + return !(vma->vm_flags & VM_NO_THP); 2356 2356 } 2357 2357 2358 2358 static void collapse_huge_page(struct mm_struct *mm,

+9 -1

mm/memory-failure.c

··· 888 888 } 889 889 } 890 890 891 - return get_page_unless_zero(head); 891 + if (get_page_unless_zero(head)) { 892 + if (head == compound_head(page)) 893 + return 1; 894 + 895 + pr_info("MCE: %#lx cannot catch tail\n", page_to_pfn(page)); 896 + put_page(head); 897 + } 898 + 899 + return 0; 892 900 } 893 901 EXPORT_SYMBOL_GPL(get_hwpoison_page); 894 902

+40

mm/memory.c

··· 789 789 return pfn_to_page(pfn); 790 790 } 791 791 792 + #ifdef CONFIG_TRANSPARENT_HUGEPAGE 793 + struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, 794 + pmd_t pmd) 795 + { 796 + unsigned long pfn = pmd_pfn(pmd); 797 + 798 + /* 799 + * There is no pmd_special() but there may be special pmds, e.g. 800 + * in a direct-access (dax) mapping, so let's just replicate the 801 + * !HAVE_PTE_SPECIAL case from vm_normal_page() here. 802 + */ 803 + if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { 804 + if (vma->vm_flags & VM_MIXEDMAP) { 805 + if (!pfn_valid(pfn)) 806 + return NULL; 807 + goto out; 808 + } else { 809 + unsigned long off; 810 + off = (addr - vma->vm_start) >> PAGE_SHIFT; 811 + if (pfn == vma->vm_pgoff + off) 812 + return NULL; 813 + if (!is_cow_mapping(vma->vm_flags)) 814 + return NULL; 815 + } 816 + } 817 + 818 + if (is_zero_pfn(pfn)) 819 + return NULL; 820 + if (unlikely(pfn > highest_memmap_pfn)) 821 + return NULL; 822 + 823 + /* 824 + * NOTE! We still have PageReserved() pages in the page tables. 825 + * eg. VDSO mappings can cause them to exist. 826 + */ 827 + out: 828 + return pfn_to_page(pfn); 829 + } 830 + #endif 831 + 792 832 /* 793 833 * copy one vm_area from one task to the other. Assumes the page tables 794 834 * already present in the new task to be cleared in the whole range

+7 -1

mm/migrate.c

··· 975 975 dec_zone_page_state(page, NR_ISOLATED_ANON + 976 976 page_is_file_cache(page)); 977 977 /* Soft-offlined page shouldn't go through lru cache list */ 978 - if (reason == MR_MEMORY_FAILURE) { 978 + if (reason == MR_MEMORY_FAILURE && rc == MIGRATEPAGE_SUCCESS) { 979 + /* 980 + * With this release, we free successfully migrated 981 + * page and set PG_HWPoison on just freed page 982 + * intentionally. Although it's rather weird, it's how 983 + * HWPoison flag works at the moment. 984 + */ 979 985 put_page(page); 980 986 if (!test_set_page_hwpoison(page)) 981 987 num_poisoned_pages_inc();

+5 -1

mm/page_io.c

··· 353 353 354 354 ret = bdev_read_page(sis->bdev, swap_page_sector(page), page); 355 355 if (!ret) { 356 - swap_slot_free_notify(page); 356 + if (trylock_page(page)) { 357 + swap_slot_free_notify(page); 358 + unlock_page(page); 359 + } 360 + 357 361 count_vm_event(PSWPIN); 358 362 return 0; 359 363 }

+5

mm/swap.c

··· 728 728 zone = NULL; 729 729 } 730 730 731 + if (is_huge_zero_page(page)) { 732 + put_huge_zero_page(); 733 + continue; 734 + } 735 + 731 736 page = compound_head(page); 732 737 if (!put_page_testzero(page)) 733 738 continue;

+15 -15

mm/vmscan.c

··· 2553 2553 sc->gfp_mask |= __GFP_HIGHMEM; 2554 2554 2555 2555 for_each_zone_zonelist_nodemask(zone, z, zonelist, 2556 - requested_highidx, sc->nodemask) { 2556 + gfp_zone(sc->gfp_mask), sc->nodemask) { 2557 2557 enum zone_type classzone_idx; 2558 2558 2559 2559 if (!populated_zone(zone)) ··· 3318 3318 /* Try to sleep for a short interval */ 3319 3319 if (prepare_kswapd_sleep(pgdat, order, remaining, 3320 3320 balanced_classzone_idx)) { 3321 + /* 3322 + * Compaction records what page blocks it recently failed to 3323 + * isolate pages from and skips them in the future scanning. 3324 + * When kswapd is going to sleep, it is reasonable to assume 3325 + * that pages and compaction may succeed so reset the cache. 3326 + */ 3327 + reset_isolation_suitable(pgdat); 3328 + 3329 + /* 3330 + * We have freed the memory, now we should compact it to make 3331 + * allocation of the requested order possible. 3332 + */ 3333 + wakeup_kcompactd(pgdat, order, classzone_idx); 3334 + 3321 3335 remaining = schedule_timeout(HZ/10); 3322 3336 finish_wait(&pgdat->kswapd_wait, &wait); 3323 3337 prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE); ··· 3354 3340 * them before going back to sleep. 3355 3341 */ 3356 3342 set_pgdat_percpu_threshold(pgdat, calculate_normal_threshold); 3357 - 3358 - /* 3359 - * Compaction records what page blocks it recently failed to 3360 - * isolate pages from and skips them in the future scanning. 3361 - * When kswapd is going to sleep, it is reasonable to assume 3362 - * that pages and compaction may succeed so reset the cache. 3363 - */ 3364 - reset_isolation_suitable(pgdat); 3365 - 3366 - /* 3367 - * We have freed the memory, now we should compact it to make 3368 - * allocation of the requested order possible. 3369 - */ 3370 - wakeup_kcompactd(pgdat, order, classzone_idx); 3371 3343 3372 3344 if (!kthread_should_stop()) 3373 3345 schedule();

Configure Feed

Configure Feed