Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Revert "mm: page cache: store only head pages in i_pages"

This reverts commit 5fd4ca2d84b249f0858ce28cf637cf25b61a398f.

Mikhail Gavrilov reports that it causes the VM_BUG_ON_PAGE() in
__delete_from_swap_cache() to trigger:

page:ffffd6d34dff0000 refcount:1 mapcount:1 mapping:ffff97812323a689 index:0xfecec363
anon
flags: 0x17fffe00080034(uptodate|lru|active|swapbacked)
raw: 0017fffe00080034 ffffd6d34c67c508 ffffd6d3504b8d48 ffff97812323a689
raw: 00000000fecec363 0000000000000000 0000000100000000 ffff978433ace000
page dumped because: VM_BUG_ON_PAGE(entry != page)
page->mem_cgroup:ffff978433ace000
------------[ cut here ]------------
kernel BUG at mm/swap_state.c:170!
invalid opcode: 0000 [#1] SMP NOPTI
CPU: 1 PID: 221 Comm: kswapd0 Not tainted 5.2.0-0.rc2.git0.1.fc31.x86_64 #1
Hardware name: System manufacturer System Product Name/ROG STRIX X470-I GAMING, BIOS 2202 04/11/2019
RIP: 0010:__delete_from_swap_cache+0x20d/0x240
Code: 30 65 48 33 04 25 28 00 00 00 75 4a 48 83 c4 38 5b 5d 41 5c 41 5d 41 5e 41 5f c3 48 c7 c6 2f dc 0f 8a 48 89 c7 e8 93 1b fd ff <0f> 0b 48 c7 c6 a8 74 0f 8a e8 85 1b fd ff 0f 0b 48 c7 c6 a8 7d 0f
RSP: 0018:ffffa982036e7980 EFLAGS: 00010046
RAX: 0000000000000021 RBX: 0000000000000040 RCX: 0000000000000006
RDX: 0000000000000000 RSI: 0000000000000086 RDI: ffff97843d657900
RBP: 0000000000000001 R08: ffffa982036e7835 R09: 0000000000000535
R10: ffff97845e21a46c R11: ffffa982036e7835 R12: ffff978426387120
R13: 0000000000000000 R14: ffffd6d34dff0040 R15: ffffd6d34dff0000
FS: 0000000000000000(0000) GS:ffff97843d640000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00002cba88ef5000 CR3: 000000078a97c000 CR4: 00000000003406e0
Call Trace:
delete_from_swap_cache+0x46/0xa0
try_to_free_swap+0xbc/0x110
swap_writepage+0x13/0x70
pageout.isra.0+0x13c/0x350
shrink_page_list+0xc14/0xdf0
shrink_inactive_list+0x1e5/0x3c0
shrink_node_memcg+0x202/0x760
shrink_node+0xe0/0x470
balance_pgdat+0x2d1/0x510
kswapd+0x220/0x420
kthread+0xfb/0x130
ret_from_fork+0x22/0x40

and it's not immediately obvious why it happens. It's too late in the
rc cycle to do anything but revert for now.

Link: https://lore.kernel.org/lkml/CABXGCsN9mYmBD-4GaaeW_NrDu+FDXLzr_6x+XNxfmFV6QkYCDg@mail.gmail.com/
Reported-and-bisected-by: Mikhail Gavrilov <mikhail.v.gavrilov@gmail.com>
Suggested-by: Jan Kara <jack@suse.cz>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Kirill Shutemov <kirill@shutemov.name>
Cc: William Kucharski <william.kucharski@oracle.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

+94 -82
-13
include/linux/pagemap.h
··· 333 333 mapping_gfp_mask(mapping)); 334 334 } 335 335 336 - static inline struct page *find_subpage(struct page *page, pgoff_t offset) 337 - { 338 - unsigned long mask; 339 - 340 - if (PageHuge(page)) 341 - return page; 342 - 343 - VM_BUG_ON_PAGE(PageTail(page), page); 344 - 345 - mask = (1UL << compound_order(page)) - 1; 346 - return page + (offset & mask); 347 - } 348 - 349 336 struct page *find_get_entry(struct address_space *mapping, pgoff_t offset); 350 337 struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset); 351 338 unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
+88 -58
mm/filemap.c
··· 281 281 * @pvec: pagevec with pages to delete 282 282 * 283 283 * The function walks over mapping->i_pages and removes pages passed in @pvec 284 - * from the mapping. The function expects @pvec to be sorted by page index 285 - * and is optimised for it to be dense. 284 + * from the mapping. The function expects @pvec to be sorted by page index. 286 285 * It tolerates holes in @pvec (mapping entries at those indices are not 287 286 * modified). The function expects only THP head pages to be present in the 288 - * @pvec. 287 + * @pvec and takes care to delete all corresponding tail pages from the 288 + * mapping as well. 289 289 * 290 290 * The function expects the i_pages lock to be held. 291 291 */ ··· 294 294 { 295 295 XA_STATE(xas, &mapping->i_pages, pvec->pages[0]->index); 296 296 int total_pages = 0; 297 - int i = 0; 297 + int i = 0, tail_pages = 0; 298 298 struct page *page; 299 299 300 300 mapping_set_update(&xas, mapping); 301 301 xas_for_each(&xas, page, ULONG_MAX) { 302 - if (i >= pagevec_count(pvec)) 302 + if (i >= pagevec_count(pvec) && !tail_pages) 303 303 break; 304 - 305 - /* A swap/dax/shadow entry got inserted? Skip it. */ 306 304 if (xa_is_value(page)) 307 305 continue; 308 - /* 309 - * A page got inserted in our range? Skip it. We have our 310 - * pages locked so they are protected from being removed. 311 - * If we see a page whose index is higher than ours, it 312 - * means our page has been removed, which shouldn't be 313 - * possible because we're holding the PageLock. 314 - */ 315 - if (page != pvec->pages[i]) { 316 - VM_BUG_ON_PAGE(page->index > pvec->pages[i]->index, 317 - page); 318 - continue; 319 - } 320 - 321 - WARN_ON_ONCE(!PageLocked(page)); 322 - 323 - if (page->index == xas.xa_index) 306 + if (!tail_pages) { 307 + /* 308 + * Some page got inserted in our range? Skip it. We 309 + * have our pages locked so they are protected from 310 + * being removed. 311 + */ 312 + if (page != pvec->pages[i]) { 313 + VM_BUG_ON_PAGE(page->index > 314 + pvec->pages[i]->index, page); 315 + continue; 316 + } 317 + WARN_ON_ONCE(!PageLocked(page)); 318 + if (PageTransHuge(page) && !PageHuge(page)) 319 + tail_pages = HPAGE_PMD_NR - 1; 324 320 page->mapping = NULL; 325 - /* Leave page->index set: truncation lookup relies on it */ 326 - 327 - /* 328 - * Move to the next page in the vector if this is a regular 329 - * page or the index is of the last sub-page of this compound 330 - * page. 331 - */ 332 - if (page->index + (1UL << compound_order(page)) - 1 == 333 - xas.xa_index) 321 + /* 322 + * Leave page->index set: truncation lookup relies 323 + * upon it 324 + */ 334 325 i++; 326 + } else { 327 + VM_BUG_ON_PAGE(page->index + HPAGE_PMD_NR - tail_pages 328 + != pvec->pages[i]->index, page); 329 + tail_pages--; 330 + } 335 331 xas_store(&xas, NULL); 336 332 total_pages++; 337 333 } ··· 1494 1498 struct page *find_get_entry(struct address_space *mapping, pgoff_t offset) 1495 1499 { 1496 1500 XA_STATE(xas, &mapping->i_pages, offset); 1497 - struct page *page; 1501 + struct page *head, *page; 1498 1502 1499 1503 rcu_read_lock(); 1500 1504 repeat: ··· 1509 1513 if (!page || xa_is_value(page)) 1510 1514 goto out; 1511 1515 1512 - if (!page_cache_get_speculative(page)) 1516 + head = compound_head(page); 1517 + if (!page_cache_get_speculative(head)) 1513 1518 goto repeat; 1514 1519 1520 + /* The page was split under us? */ 1521 + if (compound_head(page) != head) { 1522 + put_page(head); 1523 + goto repeat; 1524 + } 1525 + 1515 1526 /* 1516 - * Has the page moved or been split? 1527 + * Has the page moved? 1517 1528 * This is part of the lockless pagecache protocol. See 1518 1529 * include/linux/pagemap.h for details. 1519 1530 */ 1520 1531 if (unlikely(page != xas_reload(&xas))) { 1521 - put_page(page); 1532 + put_page(head); 1522 1533 goto repeat; 1523 1534 } 1524 - page = find_subpage(page, offset); 1525 1535 out: 1526 1536 rcu_read_unlock(); 1527 1537 ··· 1709 1707 1710 1708 rcu_read_lock(); 1711 1709 xas_for_each(&xas, page, ULONG_MAX) { 1710 + struct page *head; 1712 1711 if (xas_retry(&xas, page)) 1713 1712 continue; 1714 1713 /* ··· 1720 1717 if (xa_is_value(page)) 1721 1718 goto export; 1722 1719 1723 - if (!page_cache_get_speculative(page)) 1720 + head = compound_head(page); 1721 + if (!page_cache_get_speculative(head)) 1724 1722 goto retry; 1725 1723 1726 - /* Has the page moved or been split? */ 1724 + /* The page was split under us? */ 1725 + if (compound_head(page) != head) 1726 + goto put_page; 1727 + 1728 + /* Has the page moved? */ 1727 1729 if (unlikely(page != xas_reload(&xas))) 1728 1730 goto put_page; 1729 - page = find_subpage(page, xas.xa_index); 1730 1731 1731 1732 export: 1732 1733 indices[ret] = xas.xa_index; ··· 1739 1732 break; 1740 1733 continue; 1741 1734 put_page: 1742 - put_page(page); 1735 + put_page(head); 1743 1736 retry: 1744 1737 xas_reset(&xas); 1745 1738 } ··· 1781 1774 1782 1775 rcu_read_lock(); 1783 1776 xas_for_each(&xas, page, end) { 1777 + struct page *head; 1784 1778 if (xas_retry(&xas, page)) 1785 1779 continue; 1786 1780 /* Skip over shadow, swap and DAX entries */ 1787 1781 if (xa_is_value(page)) 1788 1782 continue; 1789 1783 1790 - if (!page_cache_get_speculative(page)) 1784 + head = compound_head(page); 1785 + if (!page_cache_get_speculative(head)) 1791 1786 goto retry; 1792 1787 1793 - /* Has the page moved or been split? */ 1788 + /* The page was split under us? */ 1789 + if (compound_head(page) != head) 1790 + goto put_page; 1791 + 1792 + /* Has the page moved? */ 1794 1793 if (unlikely(page != xas_reload(&xas))) 1795 1794 goto put_page; 1796 1795 1797 - pages[ret] = find_subpage(page, xas.xa_index); 1796 + pages[ret] = page; 1798 1797 if (++ret == nr_pages) { 1799 1798 *start = xas.xa_index + 1; 1800 1799 goto out; 1801 1800 } 1802 1801 continue; 1803 1802 put_page: 1804 - put_page(page); 1803 + put_page(head); 1805 1804 retry: 1806 1805 xas_reset(&xas); 1807 1806 } ··· 1852 1839 1853 1840 rcu_read_lock(); 1854 1841 for (page = xas_load(&xas); page; page = xas_next(&xas)) { 1842 + struct page *head; 1855 1843 if (xas_retry(&xas, page)) 1856 1844 continue; 1857 1845 /* ··· 1862 1848 if (xa_is_value(page)) 1863 1849 break; 1864 1850 1865 - if (!page_cache_get_speculative(page)) 1851 + head = compound_head(page); 1852 + if (!page_cache_get_speculative(head)) 1866 1853 goto retry; 1867 1854 1868 - /* Has the page moved or been split? */ 1855 + /* The page was split under us? */ 1856 + if (compound_head(page) != head) 1857 + goto put_page; 1858 + 1859 + /* Has the page moved? */ 1869 1860 if (unlikely(page != xas_reload(&xas))) 1870 1861 goto put_page; 1871 1862 1872 - pages[ret] = find_subpage(page, xas.xa_index); 1863 + pages[ret] = page; 1873 1864 if (++ret == nr_pages) 1874 1865 break; 1875 1866 continue; 1876 1867 put_page: 1877 - put_page(page); 1868 + put_page(head); 1878 1869 retry: 1879 1870 xas_reset(&xas); 1880 1871 } ··· 1915 1896 1916 1897 rcu_read_lock(); 1917 1898 xas_for_each_marked(&xas, page, end, tag) { 1899 + struct page *head; 1918 1900 if (xas_retry(&xas, page)) 1919 1901 continue; 1920 1902 /* ··· 1926 1906 if (xa_is_value(page)) 1927 1907 continue; 1928 1908 1929 - if (!page_cache_get_speculative(page)) 1909 + head = compound_head(page); 1910 + if (!page_cache_get_speculative(head)) 1930 1911 goto retry; 1931 1912 1932 - /* Has the page moved or been split? */ 1913 + /* The page was split under us? */ 1914 + if (compound_head(page) != head) 1915 + goto put_page; 1916 + 1917 + /* Has the page moved? */ 1933 1918 if (unlikely(page != xas_reload(&xas))) 1934 1919 goto put_page; 1935 1920 1936 - pages[ret] = find_subpage(page, xas.xa_index); 1921 + pages[ret] = page; 1937 1922 if (++ret == nr_pages) { 1938 1923 *index = xas.xa_index + 1; 1939 1924 goto out; 1940 1925 } 1941 1926 continue; 1942 1927 put_page: 1943 - put_page(page); 1928 + put_page(head); 1944 1929 retry: 1945 1930 xas_reset(&xas); 1946 1931 } ··· 2628 2603 pgoff_t last_pgoff = start_pgoff; 2629 2604 unsigned long max_idx; 2630 2605 XA_STATE(xas, &mapping->i_pages, start_pgoff); 2631 - struct page *page; 2606 + struct page *head, *page; 2632 2607 2633 2608 rcu_read_lock(); 2634 2609 xas_for_each(&xas, page, end_pgoff) { ··· 2637 2612 if (xa_is_value(page)) 2638 2613 goto next; 2639 2614 2615 + head = compound_head(page); 2616 + 2640 2617 /* 2641 2618 * Check for a locked page first, as a speculative 2642 2619 * reference may adversely influence page migration. 2643 2620 */ 2644 - if (PageLocked(page)) 2621 + if (PageLocked(head)) 2645 2622 goto next; 2646 - if (!page_cache_get_speculative(page)) 2623 + if (!page_cache_get_speculative(head)) 2647 2624 goto next; 2648 2625 2649 - /* Has the page moved or been split? */ 2626 + /* The page was split under us? */ 2627 + if (compound_head(page) != head) 2628 + goto skip; 2629 + 2630 + /* Has the page moved? */ 2650 2631 if (unlikely(page != xas_reload(&xas))) 2651 2632 goto skip; 2652 - page = find_subpage(page, xas.xa_index); 2653 2633 2654 2634 if (!PageUptodate(page) || 2655 2635 PageReadahead(page) ||
-3
mm/huge_memory.c
··· 2496 2496 if (IS_ENABLED(CONFIG_SHMEM) && PageSwapBacked(head)) 2497 2497 shmem_uncharge(head->mapping->host, 1); 2498 2498 put_page(head + i); 2499 - } else if (!PageAnon(page)) { 2500 - __xa_store(&head->mapping->i_pages, head[i].index, 2501 - head + i, 0); 2502 2499 } 2503 2500 } 2504 2501
+2 -2
mm/khugepaged.c
··· 1378 1378 result = SCAN_FAIL; 1379 1379 goto xa_locked; 1380 1380 } 1381 - xas_store(&xas, new_page); 1381 + xas_store(&xas, new_page + (index % HPAGE_PMD_NR)); 1382 1382 nr_none++; 1383 1383 continue; 1384 1384 } ··· 1454 1454 list_add_tail(&page->lru, &pagelist); 1455 1455 1456 1456 /* Finally, replace with the new page. */ 1457 - xas_store(&xas, new_page); 1457 + xas_store(&xas, new_page + (index % HPAGE_PMD_NR)); 1458 1458 continue; 1459 1459 out_unlock: 1460 1460 unlock_page(page);
-2
mm/memfd.c
··· 39 39 xas_for_each(xas, page, ULONG_MAX) { 40 40 if (xa_is_value(page)) 41 41 continue; 42 - page = find_subpage(page, xas->xa_index); 43 42 if (page_count(page) - page_mapcount(page) > 1) 44 43 xas_set_mark(xas, MEMFD_TAG_PINNED); 45 44 ··· 88 89 bool clear = true; 89 90 if (xa_is_value(page)) 90 91 continue; 91 - page = find_subpage(page, xas.xa_index); 92 92 if (page_count(page) - page_mapcount(page) != 1) { 93 93 /* 94 94 * On the last scan, we clean up all those tags
+1 -1
mm/migrate.c
··· 463 463 464 464 for (i = 1; i < HPAGE_PMD_NR; i++) { 465 465 xas_next(&xas); 466 - xas_store(&xas, newpage); 466 + xas_store(&xas, newpage + i); 467 467 } 468 468 } 469 469
+1 -1
mm/shmem.c
··· 614 614 if (xas_error(&xas)) 615 615 goto unlock; 616 616 next: 617 - xas_store(&xas, page); 617 + xas_store(&xas, page + i); 618 618 if (++i < nr) { 619 619 xas_next(&xas); 620 620 goto next;
+2 -2
mm/swap_state.c
··· 132 132 for (i = 0; i < nr; i++) { 133 133 VM_BUG_ON_PAGE(xas.xa_index != idx + i, page); 134 134 set_page_private(page + i, entry.val + i); 135 - xas_store(&xas, page); 135 + xas_store(&xas, page + i); 136 136 xas_next(&xas); 137 137 } 138 138 address_space->nrpages += nr; ··· 167 167 168 168 for (i = 0; i < nr; i++) { 169 169 void *entry = xas_store(&xas, NULL); 170 - VM_BUG_ON_PAGE(entry != page, entry); 170 + VM_BUG_ON_PAGE(entry != page + i, entry); 171 171 set_page_private(page + i, 0); 172 172 xas_next(&xas); 173 173 }