Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'akpm' (patches from Andrew)

Merge misc fixes from Andrew Morton:
"11 patches.

Subsystems affected by this patch series: mm (memcg, memory-failure,
oom-kill, secretmem, vmalloc, hugetlb, damon, and tools), and ocfs2"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
tools/testing/selftests/vm/split_huge_page_test.c: fix application of sizeof to pointer
mm/damon/core-test: fix wrong expectations for 'damon_split_regions_of()'
mm: khugepaged: skip huge page collapse for special files
mm, thp: bail out early in collapse_file for writeback page
mm/vmalloc: fix numa spreading for large hash tables
mm/secretmem: avoid letting secretmem_users drop to zero
ocfs2: fix race between searching chunks and release journal_head from buffer_head
mm/oom_kill.c: prevent a race between process_mrelease and exit_mmap
mm: filemap: check if THP has hwpoisoned subpage for PMD page fault
mm: hwpoison: remove the unnecessary THP check
memcg: page_alloc: skip bulk allocator for __GFP_ACCOUNT

+110 -54
+13 -9
fs/ocfs2/suballoc.c
··· 1251 1251 { 1252 1252 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; 1253 1253 struct journal_head *jh; 1254 - int ret; 1254 + int ret = 1; 1255 1255 1256 1256 if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap)) 1257 1257 return 0; ··· 1259 1259 if (!buffer_jbd(bg_bh)) 1260 1260 return 1; 1261 1261 1262 - jh = bh2jh(bg_bh); 1263 - spin_lock(&jh->b_state_lock); 1264 - bg = (struct ocfs2_group_desc *) jh->b_committed_data; 1265 - if (bg) 1266 - ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap); 1267 - else 1268 - ret = 1; 1269 - spin_unlock(&jh->b_state_lock); 1262 + jbd_lock_bh_journal_head(bg_bh); 1263 + if (buffer_jbd(bg_bh)) { 1264 + jh = bh2jh(bg_bh); 1265 + spin_lock(&jh->b_state_lock); 1266 + bg = (struct ocfs2_group_desc *) jh->b_committed_data; 1267 + if (bg) 1268 + ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap); 1269 + else 1270 + ret = 1; 1271 + spin_unlock(&jh->b_state_lock); 1272 + } 1273 + jbd_unlock_bh_journal_head(bg_bh); 1270 1274 1271 1275 return ret; 1272 1276 }
+23
include/linux/page-flags.h
··· 171 171 /* Compound pages. Stored in first tail page's flags */ 172 172 PG_double_map = PG_workingset, 173 173 174 + #ifdef CONFIG_MEMORY_FAILURE 175 + /* 176 + * Compound pages. Stored in first tail page's flags. 177 + * Indicates that at least one subpage is hwpoisoned in the 178 + * THP. 179 + */ 180 + PG_has_hwpoisoned = PG_mappedtodisk, 181 + #endif 182 + 174 183 /* non-lru isolated movable page */ 175 184 PG_isolated = PG_reclaim, 176 185 ··· 675 666 TESTPAGEFLAG_FALSE(TransTail) 676 667 PAGEFLAG_FALSE(DoubleMap) 677 668 TESTSCFLAG_FALSE(DoubleMap) 669 + #endif 670 + 671 + #if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_TRANSPARENT_HUGEPAGE) 672 + /* 673 + * PageHasHWPoisoned indicates that at least one subpage is hwpoisoned in the 674 + * compound page. 675 + * 676 + * This flag is set by hwpoison handler. Cleared by THP split or free page. 677 + */ 678 + PAGEFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND) 679 + TESTSCFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND) 680 + #else 681 + PAGEFLAG_FALSE(HasHWPoisoned) 682 + TESTSCFLAG_FALSE(HasHWPoisoned) 678 683 #endif 679 684 680 685 /*
+2 -2
mm/damon/core-test.h
··· 219 219 r = damon_new_region(0, 22); 220 220 damon_add_region(r, t); 221 221 damon_split_regions_of(c, t, 2); 222 - KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 2u); 222 + KUNIT_EXPECT_LE(test, damon_nr_regions(t), 2u); 223 223 damon_free_target(t); 224 224 225 225 t = damon_new_target(42); 226 226 r = damon_new_region(0, 220); 227 227 damon_add_region(r, t); 228 228 damon_split_regions_of(c, t, 4); 229 - KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 4u); 229 + KUNIT_EXPECT_LE(test, damon_nr_regions(t), 4u); 230 230 damon_free_target(t); 231 231 damon_destroy_ctx(c); 232 232 }
+2
mm/huge_memory.c
··· 2426 2426 /* lock lru list/PageCompound, ref frozen by page_ref_freeze */ 2427 2427 lruvec = lock_page_lruvec(head); 2428 2428 2429 + ClearPageHasHWPoisoned(head); 2430 + 2429 2431 for (i = nr - 1; i >= 1; i--) { 2430 2432 __split_huge_page_tail(head, i, lruvec, list); 2431 2433 /* Some pages can be beyond EOF: drop them from page cache */
+17 -9
mm/khugepaged.c
··· 445 445 if (!transhuge_vma_enabled(vma, vm_flags)) 446 446 return false; 447 447 448 + if (vma->vm_file && !IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - 449 + vma->vm_pgoff, HPAGE_PMD_NR)) 450 + return false; 451 + 448 452 /* Enabled via shmem mount options or sysfs settings. */ 449 - if (shmem_file(vma->vm_file) && shmem_huge_enabled(vma)) { 450 - return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff, 451 - HPAGE_PMD_NR); 452 - } 453 + if (shmem_file(vma->vm_file)) 454 + return shmem_huge_enabled(vma); 453 455 454 456 /* THP settings require madvise. */ 455 457 if (!(vm_flags & VM_HUGEPAGE) && !khugepaged_always()) 456 458 return false; 457 459 458 - /* Read-only file mappings need to be aligned for THP to work. */ 460 + /* Only regular file is valid */ 459 461 if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && vma->vm_file && 460 - !inode_is_open_for_write(vma->vm_file->f_inode) && 461 462 (vm_flags & VM_EXEC)) { 462 - return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff, 463 - HPAGE_PMD_NR); 463 + struct inode *inode = vma->vm_file->f_inode; 464 + 465 + return !inode_is_open_for_write(inode) && 466 + S_ISREG(inode->i_mode); 464 467 } 465 468 466 469 if (!vma->anon_vma || vma->vm_ops) ··· 1766 1763 filemap_flush(mapping); 1767 1764 result = SCAN_FAIL; 1768 1765 goto xa_unlocked; 1766 + } else if (PageWriteback(page)) { 1767 + xas_unlock_irq(&xas); 1768 + result = SCAN_FAIL; 1769 + goto xa_unlocked; 1769 1770 } else if (trylock_page(page)) { 1770 1771 get_page(page); 1771 1772 xas_unlock_irq(&xas); ··· 1805 1798 goto out_unlock; 1806 1799 } 1807 1800 1808 - if (!is_shmem && PageDirty(page)) { 1801 + if (!is_shmem && (PageDirty(page) || 1802 + PageWriteback(page))) { 1809 1803 /* 1810 1804 * khugepaged only works on read-only fd, so this 1811 1805 * page is dirty because it hasn't been flushed
+14 -14
mm/memory-failure.c
··· 1147 1147 if (!HWPoisonHandlable(head)) 1148 1148 return -EBUSY; 1149 1149 1150 - if (PageTransHuge(head)) { 1151 - /* 1152 - * Non anonymous thp exists only in allocation/free time. We 1153 - * can't handle such a case correctly, so let's give it up. 1154 - * This should be better than triggering BUG_ON when kernel 1155 - * tries to touch the "partially handled" page. 1156 - */ 1157 - if (!PageAnon(head)) { 1158 - pr_err("Memory failure: %#lx: non anonymous thp\n", 1159 - page_to_pfn(page)); 1160 - return 0; 1161 - } 1162 - } 1163 - 1164 1150 if (get_page_unless_zero(head)) { 1165 1151 if (head == compound_head(page)) 1166 1152 return 1; ··· 1694 1708 } 1695 1709 1696 1710 if (PageTransHuge(hpage)) { 1711 + /* 1712 + * The flag must be set after the refcount is bumped 1713 + * otherwise it may race with THP split. 1714 + * And the flag can't be set in get_hwpoison_page() since 1715 + * it is called by soft offline too and it is just called 1716 + * for !MF_COUNT_INCREASE. So here seems to be the best 1717 + * place. 1718 + * 1719 + * Don't need care about the above error handling paths for 1720 + * get_hwpoison_page() since they handle either free page 1721 + * or unhandlable page. The refcount is bumped iff the 1722 + * page is a valid handlable page. 1723 + */ 1724 + SetPageHasHWPoisoned(hpage); 1697 1725 if (try_to_split_thp_page(p, "Memory Failure") < 0) { 1698 1726 action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED); 1699 1727 res = -EBUSY;
+9
mm/memory.c
··· 3907 3907 return ret; 3908 3908 3909 3909 /* 3910 + * Just backoff if any subpage of a THP is corrupted otherwise 3911 + * the corrupted page may mapped by PMD silently to escape the 3912 + * check. This kind of THP just can be PTE mapped. Access to 3913 + * the corrupted subpage should trigger SIGBUS as expected. 3914 + */ 3915 + if (unlikely(PageHasHWPoisoned(page))) 3916 + return ret; 3917 + 3918 + /* 3910 3919 * Archs like ppc64 need additional space to store information 3911 3920 * related to pte entry. Use the preallocated table for that. 3912 3921 */
+12 -11
mm/oom_kill.c
··· 1150 1150 struct task_struct *task; 1151 1151 struct task_struct *p; 1152 1152 unsigned int f_flags; 1153 - bool reap = true; 1153 + bool reap = false; 1154 1154 struct pid *pid; 1155 1155 long ret = 0; 1156 1156 ··· 1177 1177 goto put_task; 1178 1178 } 1179 1179 1180 - mm = p->mm; 1181 - mmgrab(mm); 1182 - 1183 - /* If the work has been done already, just exit with success */ 1184 - if (test_bit(MMF_OOM_SKIP, &mm->flags)) 1185 - reap = false; 1186 - else if (!task_will_free_mem(p)) { 1187 - reap = false; 1188 - ret = -EINVAL; 1180 + if (mmget_not_zero(p->mm)) { 1181 + mm = p->mm; 1182 + if (task_will_free_mem(p)) 1183 + reap = true; 1184 + else { 1185 + /* Error only if the work has not been done already */ 1186 + if (!test_bit(MMF_OOM_SKIP, &mm->flags)) 1187 + ret = -EINVAL; 1188 + } 1189 1189 } 1190 1190 task_unlock(p); 1191 1191 ··· 1201 1201 mmap_read_unlock(mm); 1202 1202 1203 1203 drop_mm: 1204 - mmdrop(mm); 1204 + if (mm) 1205 + mmput(mm); 1205 1206 put_task: 1206 1207 put_task_struct(task); 1207 1208 put_pid:
+7 -1
mm/page_alloc.c
··· 1312 1312 1313 1313 VM_BUG_ON_PAGE(compound && compound_order(page) != order, page); 1314 1314 1315 - if (compound) 1315 + if (compound) { 1316 1316 ClearPageDoubleMap(page); 1317 + ClearPageHasHWPoisoned(page); 1318 + } 1317 1319 for (i = 1; i < (1 << order); i++) { 1318 1320 if (compound) 1319 1321 bad += free_tail_pages_check(page, page + i); ··· 5224 5222 /* Already populated array? */ 5225 5223 if (unlikely(page_array && nr_pages - nr_populated == 0)) 5226 5224 goto out; 5225 + 5226 + /* Bulk allocator does not support memcg accounting. */ 5227 + if (memcg_kmem_enabled() && (gfp & __GFP_ACCOUNT)) 5228 + goto failed; 5227 5229 5228 5230 /* Use the single page allocator for one page. */ 5229 5231 if (nr_pages - nr_populated == 1)
+1 -1
mm/secretmem.c
··· 218 218 219 219 file->f_flags |= O_LARGEFILE; 220 220 221 - fd_install(fd, file); 222 221 atomic_inc(&secretmem_users); 222 + fd_install(fd, file); 223 223 return fd; 224 224 225 225 err_put_fd:
+9 -6
mm/vmalloc.c
··· 2816 2816 unsigned int order, unsigned int nr_pages, struct page **pages) 2817 2817 { 2818 2818 unsigned int nr_allocated = 0; 2819 + struct page *page; 2820 + int i; 2819 2821 2820 2822 /* 2821 2823 * For order-0 pages we make use of bulk allocator, if ··· 2825 2823 * to fails, fallback to a single page allocator that is 2826 2824 * more permissive. 2827 2825 */ 2828 - if (!order) { 2826 + if (!order && nid != NUMA_NO_NODE) { 2829 2827 while (nr_allocated < nr_pages) { 2830 2828 unsigned int nr, nr_pages_request; 2831 2829 ··· 2850 2848 if (nr != nr_pages_request) 2851 2849 break; 2852 2850 } 2853 - } else 2851 + } else if (order) 2854 2852 /* 2855 2853 * Compound pages required for remap_vmalloc_page if 2856 2854 * high-order pages. ··· 2858 2856 gfp |= __GFP_COMP; 2859 2857 2860 2858 /* High-order pages or fallback path if "bulk" fails. */ 2861 - while (nr_allocated < nr_pages) { 2862 - struct page *page; 2863 - int i; 2864 2859 2865 - page = alloc_pages_node(nid, gfp, order); 2860 + while (nr_allocated < nr_pages) { 2861 + if (nid == NUMA_NO_NODE) 2862 + page = alloc_pages(gfp, order); 2863 + else 2864 + page = alloc_pages_node(nid, gfp, order); 2866 2865 if (unlikely(!page)) 2867 2866 break; 2868 2867
+1 -1
tools/testing/selftests/vm/split_huge_page_test.c
··· 341 341 } 342 342 343 343 /* write something to the file, so a file-backed THP can be allocated */ 344 - num_written = write(fd, tmpfs_loc, sizeof(tmpfs_loc)); 344 + num_written = write(fd, tmpfs_loc, strlen(tmpfs_loc) + 1); 345 345 close(fd); 346 346 347 347 if (num_written < 1) {