Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'folio-5.17' of git://git.infradead.org/users/willy/pagecache

Pull folio conversion updates from Matthew Wilcox:
"Convert much of the page cache to use folios

This stops just short of actually enabling large folios. It converts
everything that I noticed needs to be converted, but there may still
be places I've overlooked which still have page size assumptions.

The big change here is using large entries in the page cache XArray
instead of many small entries. That only affects shmem for now, but
it's a pretty big change for shmem since it changes where memory needs
to be allocated (at split time instead of insertion)"

* tag 'folio-5.17' of git://git.infradead.org/users/willy/pagecache: (49 commits)
mm: Use multi-index entries in the page cache
XArray: Add xas_advance()
truncate,shmem: Handle truncates that split large folios
truncate: Convert invalidate_inode_pages2_range to folios
fs: Convert vfs_dedupe_file_range_compare to folios
mm: Remove pagevec_remove_exceptionals()
mm: Convert find_lock_entries() to use a folio_batch
filemap: Return only folios from find_get_entries()
filemap: Convert filemap_get_read_batch() to use a folio_batch
filemap: Convert filemap_read() to use a folio
truncate: Add invalidate_complete_folio2()
truncate: Convert invalidate_inode_pages2_range() to use a folio
truncate: Skip known-truncated indices
truncate,shmem: Add truncate_inode_folio()
shmem: Convert part of shmem_undo_range() to use a folio
mm: Add unmap_mapping_folio()
truncate: Add truncate_cleanup_folio()
filemap: Add filemap_release_folio()
filemap: Use a folio in filemap_page_mkwrite
filemap: Use a folio in filemap_map_pages
...

+1113 -1012
+2
fs/f2fs/f2fs.h
··· 28 28 #include <linux/fscrypt.h> 29 29 #include <linux/fsverity.h> 30 30 31 + struct pagevec; 32 + 31 33 #ifdef CONFIG_F2FS_CHECK_FS 32 34 #define f2fs_bug_on(sbi, condition) BUG_ON(condition) 33 35 #else
+13 -11
fs/fs-writeback.c
··· 372 372 { 373 373 struct address_space *mapping = inode->i_mapping; 374 374 XA_STATE(xas, &mapping->i_pages, 0); 375 - struct page *page; 375 + struct folio *folio; 376 376 bool switched = false; 377 377 378 378 spin_lock(&inode->i_lock); ··· 389 389 390 390 /* 391 391 * Count and transfer stats. Note that PAGECACHE_TAG_DIRTY points 392 - * to possibly dirty pages while PAGECACHE_TAG_WRITEBACK points to 393 - * pages actually under writeback. 392 + * to possibly dirty folios while PAGECACHE_TAG_WRITEBACK points to 393 + * folios actually under writeback. 394 394 */ 395 - xas_for_each_marked(&xas, page, ULONG_MAX, PAGECACHE_TAG_DIRTY) { 396 - if (PageDirty(page)) { 397 - dec_wb_stat(old_wb, WB_RECLAIMABLE); 398 - inc_wb_stat(new_wb, WB_RECLAIMABLE); 395 + xas_for_each_marked(&xas, folio, ULONG_MAX, PAGECACHE_TAG_DIRTY) { 396 + if (folio_test_dirty(folio)) { 397 + long nr = folio_nr_pages(folio); 398 + wb_stat_mod(old_wb, WB_RECLAIMABLE, -nr); 399 + wb_stat_mod(new_wb, WB_RECLAIMABLE, nr); 399 400 } 400 401 } 401 402 402 403 xas_set(&xas, 0); 403 - xas_for_each_marked(&xas, page, ULONG_MAX, PAGECACHE_TAG_WRITEBACK) { 404 - WARN_ON_ONCE(!PageWriteback(page)); 405 - dec_wb_stat(old_wb, WB_WRITEBACK); 406 - inc_wb_stat(new_wb, WB_WRITEBACK); 404 + xas_for_each_marked(&xas, folio, ULONG_MAX, PAGECACHE_TAG_WRITEBACK) { 405 + long nr = folio_nr_pages(folio); 406 + WARN_ON_ONCE(!folio_test_writeback(folio)); 407 + wb_stat_mod(old_wb, WB_WRITEBACK, -nr); 408 + wb_stat_mod(new_wb, WB_WRITEBACK, nr); 407 409 } 408 410 409 411 if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) {
+54 -60
fs/remap_range.c
··· 146 146 } 147 147 148 148 /* Read a page's worth of file data into the page cache. */ 149 - static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset) 149 + static struct folio *vfs_dedupe_get_folio(struct inode *inode, loff_t pos) 150 150 { 151 - struct page *page; 151 + struct folio *folio; 152 152 153 - page = read_mapping_page(inode->i_mapping, offset >> PAGE_SHIFT, NULL); 154 - if (IS_ERR(page)) 155 - return page; 156 - if (!PageUptodate(page)) { 157 - put_page(page); 153 + folio = read_mapping_folio(inode->i_mapping, pos >> PAGE_SHIFT, NULL); 154 + if (IS_ERR(folio)) 155 + return folio; 156 + if (!folio_test_uptodate(folio)) { 157 + folio_put(folio); 158 158 return ERR_PTR(-EIO); 159 159 } 160 - return page; 160 + return folio; 161 161 } 162 162 163 163 /* 164 - * Lock two pages, ensuring that we lock in offset order if the pages are from 165 - * the same file. 164 + * Lock two folios, ensuring that we lock in offset order if the folios 165 + * are from the same file. 166 166 */ 167 - static void vfs_lock_two_pages(struct page *page1, struct page *page2) 167 + static void vfs_lock_two_folios(struct folio *folio1, struct folio *folio2) 168 168 { 169 169 /* Always lock in order of increasing index. */ 170 - if (page1->index > page2->index) 171 - swap(page1, page2); 170 + if (folio1->index > folio2->index) 171 + swap(folio1, folio2); 172 172 173 - lock_page(page1); 174 - if (page1 != page2) 175 - lock_page(page2); 173 + folio_lock(folio1); 174 + if (folio1 != folio2) 175 + folio_lock(folio2); 176 176 } 177 177 178 - /* Unlock two pages, being careful not to unlock the same page twice. */ 179 - static void vfs_unlock_two_pages(struct page *page1, struct page *page2) 178 + /* Unlock two folios, being careful not to unlock the same folio twice. */ 179 + static void vfs_unlock_two_folios(struct folio *folio1, struct folio *folio2) 180 180 { 181 - unlock_page(page1); 182 - if (page1 != page2) 183 - unlock_page(page2); 181 + folio_unlock(folio1); 182 + if (folio1 != folio2) 183 + folio_unlock(folio2); 184 184 } 185 185 186 186 /* ··· 188 188 * Caller must have locked both inodes to prevent write races. 189 189 */ 190 190 static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, 191 - struct inode *dest, loff_t destoff, 191 + struct inode *dest, loff_t dstoff, 192 192 loff_t len, bool *is_same) 193 193 { 194 - loff_t src_poff; 195 - loff_t dest_poff; 196 - void *src_addr; 197 - void *dest_addr; 198 - struct page *src_page; 199 - struct page *dest_page; 200 - loff_t cmp_len; 201 - bool same; 202 - int error; 194 + bool same = true; 195 + int error = -EINVAL; 203 196 204 - error = -EINVAL; 205 - same = true; 206 197 while (len) { 207 - src_poff = srcoff & (PAGE_SIZE - 1); 208 - dest_poff = destoff & (PAGE_SIZE - 1); 209 - cmp_len = min(PAGE_SIZE - src_poff, 210 - PAGE_SIZE - dest_poff); 198 + struct folio *src_folio, *dst_folio; 199 + void *src_addr, *dst_addr; 200 + loff_t cmp_len = min(PAGE_SIZE - offset_in_page(srcoff), 201 + PAGE_SIZE - offset_in_page(dstoff)); 202 + 211 203 cmp_len = min(cmp_len, len); 212 204 if (cmp_len <= 0) 213 205 goto out_error; 214 206 215 - src_page = vfs_dedupe_get_page(src, srcoff); 216 - if (IS_ERR(src_page)) { 217 - error = PTR_ERR(src_page); 207 + src_folio = vfs_dedupe_get_folio(src, srcoff); 208 + if (IS_ERR(src_folio)) { 209 + error = PTR_ERR(src_folio); 218 210 goto out_error; 219 211 } 220 - dest_page = vfs_dedupe_get_page(dest, destoff); 221 - if (IS_ERR(dest_page)) { 222 - error = PTR_ERR(dest_page); 223 - put_page(src_page); 212 + dst_folio = vfs_dedupe_get_folio(dest, dstoff); 213 + if (IS_ERR(dst_folio)) { 214 + error = PTR_ERR(dst_folio); 215 + folio_put(src_folio); 224 216 goto out_error; 225 217 } 226 218 227 - vfs_lock_two_pages(src_page, dest_page); 219 + vfs_lock_two_folios(src_folio, dst_folio); 228 220 229 221 /* 230 - * Now that we've locked both pages, make sure they're still 222 + * Now that we've locked both folios, make sure they're still 231 223 * mapped to the file data we're interested in. If not, 232 224 * someone is invalidating pages on us and we lose. 233 225 */ 234 - if (!PageUptodate(src_page) || !PageUptodate(dest_page) || 235 - src_page->mapping != src->i_mapping || 236 - dest_page->mapping != dest->i_mapping) { 226 + if (!folio_test_uptodate(src_folio) || !folio_test_uptodate(dst_folio) || 227 + src_folio->mapping != src->i_mapping || 228 + dst_folio->mapping != dest->i_mapping) { 237 229 same = false; 238 230 goto unlock; 239 231 } 240 232 241 - src_addr = kmap_atomic(src_page); 242 - dest_addr = kmap_atomic(dest_page); 233 + src_addr = kmap_local_folio(src_folio, 234 + offset_in_folio(src_folio, srcoff)); 235 + dst_addr = kmap_local_folio(dst_folio, 236 + offset_in_folio(dst_folio, dstoff)); 243 237 244 - flush_dcache_page(src_page); 245 - flush_dcache_page(dest_page); 238 + flush_dcache_folio(src_folio); 239 + flush_dcache_folio(dst_folio); 246 240 247 - if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len)) 241 + if (memcmp(src_addr, dst_addr, cmp_len)) 248 242 same = false; 249 243 250 - kunmap_atomic(dest_addr); 251 - kunmap_atomic(src_addr); 244 + kunmap_local(dst_addr); 245 + kunmap_local(src_addr); 252 246 unlock: 253 - vfs_unlock_two_pages(src_page, dest_page); 254 - put_page(dest_page); 255 - put_page(src_page); 247 + vfs_unlock_two_folios(src_folio, dst_folio); 248 + folio_put(dst_folio); 249 + folio_put(src_folio); 256 250 257 251 if (!same) 258 252 break; 259 253 260 254 srcoff += cmp_len; 261 - destoff += cmp_len; 255 + dstoff += cmp_len; 262 256 len -= cmp_len; 263 257 } 264 258
+14
include/linux/huge_mm.h
··· 274 274 return 1; 275 275 } 276 276 277 + /** 278 + * folio_test_pmd_mappable - Can we map this folio with a PMD? 279 + * @folio: The folio to test 280 + */ 281 + static inline bool folio_test_pmd_mappable(struct folio *folio) 282 + { 283 + return folio_order(folio) >= HPAGE_PMD_ORDER; 284 + } 285 + 277 286 struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, 278 287 pmd_t *pmd, int flags, struct dev_pagemap **pgmap); 279 288 struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, ··· 346 337 { 347 338 VM_BUG_ON_PGFLAGS(PageTail(page), page); 348 339 return 1; 340 + } 341 + 342 + static inline bool folio_test_pmd_mappable(struct folio *folio) 343 + { 344 + return false; 349 345 } 350 346 351 347 static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
+21 -47
include/linux/mm.h
··· 714 714 struct mmu_gather; 715 715 struct inode; 716 716 717 + static inline unsigned int compound_order(struct page *page) 718 + { 719 + if (!PageHead(page)) 720 + return 0; 721 + return page[1].compound_order; 722 + } 723 + 724 + /** 725 + * folio_order - The allocation order of a folio. 726 + * @folio: The folio. 727 + * 728 + * A folio is composed of 2^order pages. See get_order() for the definition 729 + * of order. 730 + * 731 + * Return: The order of the folio. 732 + */ 733 + static inline unsigned int folio_order(struct folio *folio) 734 + { 735 + return compound_order(&folio->page); 736 + } 737 + 717 738 #include <linux/huge_mm.h> 718 739 719 740 /* ··· 932 911 { 933 912 VM_BUG_ON_PAGE(page[1].compound_dtor >= NR_COMPOUND_DTORS, page); 934 913 compound_page_dtors[page[1].compound_dtor](page); 935 - } 936 - 937 - static inline unsigned int compound_order(struct page *page) 938 - { 939 - if (!PageHead(page)) 940 - return 0; 941 - return page[1].compound_order; 942 - } 943 - 944 - /** 945 - * folio_order - The allocation order of a folio. 946 - * @folio: The folio. 947 - * 948 - * A folio is composed of 2^order pages. See get_order() for the definition 949 - * of order. 950 - * 951 - * Return: The order of the folio. 952 - */ 953 - static inline unsigned int folio_order(struct folio *folio) 954 - { 955 - return compound_order(&folio->page); 956 914 } 957 915 958 916 static inline bool hpage_pincount_available(struct page *page) ··· 1837 1837 extern int user_shm_lock(size_t, struct ucounts *); 1838 1838 extern void user_shm_unlock(size_t, struct ucounts *); 1839 1839 1840 - /* 1841 - * Parameter block passed down to zap_pte_range in exceptional cases. 1842 - */ 1843 - struct zap_details { 1844 - struct address_space *zap_mapping; /* Check page->mapping if set */ 1845 - struct page *single_page; /* Locked page to be unmapped */ 1846 - }; 1847 - 1848 - /* 1849 - * We set details->zap_mappings when we want to unmap shared but keep private 1850 - * pages. Return true if skip zapping this page, false otherwise. 1851 - */ 1852 - static inline bool 1853 - zap_skip_check_mapping(struct zap_details *details, struct page *page) 1854 - { 1855 - if (!details || !page) 1856 - return false; 1857 - 1858 - return details->zap_mapping && 1859 - (details->zap_mapping != page_rmapping(page)); 1860 - } 1861 - 1862 1840 struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, 1863 1841 pte_t pte); 1864 1842 struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, ··· 1871 1893 extern void truncate_setsize(struct inode *inode, loff_t newsize); 1872 1894 void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to); 1873 1895 void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end); 1874 - int truncate_inode_page(struct address_space *mapping, struct page *page); 1875 1896 int generic_error_remove_page(struct address_space *mapping, struct page *page); 1876 1897 int invalidate_inode_page(struct page *page); 1877 1898 ··· 1881 1904 extern int fixup_user_fault(struct mm_struct *mm, 1882 1905 unsigned long address, unsigned int fault_flags, 1883 1906 bool *unlocked); 1884 - void unmap_mapping_page(struct page *page); 1885 1907 void unmap_mapping_pages(struct address_space *mapping, 1886 1908 pgoff_t start, pgoff_t nr, bool even_cows); 1887 1909 void unmap_mapping_range(struct address_space *mapping, ··· 1901 1925 BUG(); 1902 1926 return -EFAULT; 1903 1927 } 1904 - static inline void unmap_mapping_page(struct page *page) { } 1905 1928 static inline void unmap_mapping_pages(struct address_space *mapping, 1906 1929 pgoff_t start, pgoff_t nr, bool even_cows) { } 1907 1930 static inline void unmap_mapping_range(struct address_space *mapping, ··· 1957 1982 struct page **pages); 1958 1983 struct page *get_dump_page(unsigned long addr); 1959 1984 1960 - extern int try_to_release_page(struct page * page, gfp_t gfp_mask); 1961 1985 extern void do_invalidatepage(struct page *page, unsigned int offset, 1962 1986 unsigned int length); 1963 1987
+10 -3
include/linux/page-flags.h
··· 68 68 * might lose their PG_swapbacked flag when they simply can be dropped (e.g. as 69 69 * a result of MADV_FREE). 70 70 * 71 - * PG_uptodate tells whether the page's contents is valid. When a read 72 - * completes, the page becomes uptodate, unless a disk I/O error happened. 73 - * 74 71 * PG_referenced, PG_reclaim are used for page reclaim for anonymous and 75 72 * file-backed pagecache (see mm/vmscan.c). 76 73 * ··· 612 615 613 616 u64 stable_page_flags(struct page *page); 614 617 618 + /** 619 + * folio_test_uptodate - Is this folio up to date? 620 + * @folio: The folio. 621 + * 622 + * The uptodate flag is set on a folio when every byte in the folio is 623 + * at least as new as the corresponding bytes on storage. Anonymous 624 + * and CoW folios are always uptodate. If the folio is not uptodate, 625 + * some of the bytes in it may be; see the is_partially_uptodate() 626 + * address_space operation. 627 + */ 615 628 static inline bool folio_test_uptodate(struct folio *folio) 616 629 { 617 630 bool ret = test_bit(PG_uptodate, folio_flags(folio, 0));
+24 -35
include/linux/pagemap.h
··· 16 16 #include <linux/hardirq.h> /* for in_interrupt() */ 17 17 #include <linux/hugetlb_inline.h> 18 18 19 - struct pagevec; 19 + struct folio_batch; 20 20 21 21 static inline bool mapping_empty(struct address_space *mapping) 22 22 { ··· 511 511 mapping_gfp_mask(mapping)); 512 512 } 513 513 514 - /* Does this page contain this index? */ 515 - static inline bool thp_contains(struct page *head, pgoff_t index) 516 - { 517 - /* HugeTLBfs indexes the page cache in units of hpage_size */ 518 - if (PageHuge(head)) 519 - return head->index == index; 520 - return page_index(head) == (index & ~(thp_nr_pages(head) - 1UL)); 521 - } 522 - 523 514 #define swapcache_index(folio) __page_file_index(&(folio)->page) 524 515 525 516 /** ··· 591 600 return head + (index & (thp_nr_pages(head) - 1)); 592 601 } 593 602 594 - unsigned find_get_entries(struct address_space *mapping, pgoff_t start, 595 - pgoff_t end, struct pagevec *pvec, pgoff_t *indices); 596 603 unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start, 597 604 pgoff_t end, unsigned int nr_pages, 598 605 struct page **pages); ··· 626 637 return find_or_create_page(mapping, index, mapping_gfp_mask(mapping)); 627 638 } 628 639 629 - extern struct page * read_cache_page(struct address_space *mapping, 630 - pgoff_t index, filler_t *filler, void *data); 640 + struct folio *read_cache_folio(struct address_space *, pgoff_t index, 641 + filler_t *filler, void *data); 642 + struct page *read_cache_page(struct address_space *, pgoff_t index, 643 + filler_t *filler, void *data); 631 644 extern struct page * read_cache_page_gfp(struct address_space *mapping, 632 645 pgoff_t index, gfp_t gfp_mask); 633 646 extern int read_cache_pages(struct address_space *mapping, ··· 639 648 pgoff_t index, void *data) 640 649 { 641 650 return read_cache_page(mapping, index, NULL, data); 651 + } 652 + 653 + static inline struct folio *read_mapping_folio(struct address_space *mapping, 654 + pgoff_t index, void *data) 655 + { 656 + return read_cache_folio(mapping, index, NULL, data); 642 657 } 643 658 644 659 /* ··· 864 867 return folio_wait_locked_killable(page_folio(page)); 865 868 } 866 869 867 - int put_and_wait_on_page_locked(struct page *page, int state); 870 + int folio_put_wait_locked(struct folio *folio, int state); 868 871 void wait_on_page_writeback(struct page *page); 869 872 void folio_wait_writeback(struct folio *folio); 870 873 int folio_wait_writeback_killable(struct folio *folio); ··· 880 883 } 881 884 void folio_account_cleaned(struct folio *folio, struct address_space *mapping, 882 885 struct bdi_writeback *wb); 883 - static inline void account_page_cleaned(struct page *page, 884 - struct address_space *mapping, struct bdi_writeback *wb) 885 - { 886 - return folio_account_cleaned(page_folio(page), mapping, wb); 887 - } 888 886 void __folio_cancel_dirty(struct folio *folio); 889 887 static inline void folio_cancel_dirty(struct folio *folio) 890 888 { ··· 926 934 pgoff_t index, gfp_t gfp); 927 935 int filemap_add_folio(struct address_space *mapping, struct folio *folio, 928 936 pgoff_t index, gfp_t gfp); 929 - extern void delete_from_page_cache(struct page *page); 930 - extern void __delete_from_page_cache(struct page *page, void *shadow); 937 + void filemap_remove_folio(struct folio *folio); 938 + void delete_from_page_cache(struct page *page); 939 + void __filemap_remove_folio(struct folio *folio, void *shadow); 940 + static inline void __delete_from_page_cache(struct page *page, void *shadow) 941 + { 942 + __filemap_remove_folio(page_folio(page), shadow); 943 + } 931 944 void replace_page_cache_page(struct page *old, struct page *new); 932 945 void delete_from_page_cache_batch(struct address_space *mapping, 933 - struct pagevec *pvec); 946 + struct folio_batch *fbatch); 947 + int try_to_release_page(struct page *page, gfp_t gfp); 948 + bool filemap_release_folio(struct folio *folio, gfp_t gfp); 934 949 loff_t mapping_seek_hole_data(struct address_space *, loff_t start, loff_t end, 935 950 int whence); 936 951 ··· 1029 1030 void page_cache_ra_unbounded(struct readahead_control *, 1030 1031 unsigned long nr_to_read, unsigned long lookahead_count); 1031 1032 void page_cache_sync_ra(struct readahead_control *, unsigned long req_count); 1032 - void page_cache_async_ra(struct readahead_control *, struct page *, 1033 + void page_cache_async_ra(struct readahead_control *, struct folio *, 1033 1034 unsigned long req_count); 1034 1035 void readahead_expand(struct readahead_control *ractl, 1035 1036 loff_t new_start, size_t new_len); ··· 1076 1077 struct page *page, pgoff_t index, unsigned long req_count) 1077 1078 { 1078 1079 DEFINE_READAHEAD(ractl, file, ra, mapping, index); 1079 - page_cache_async_ra(&ractl, page, req_count); 1080 + page_cache_async_ra(&ractl, page_folio(page), req_count); 1080 1081 } 1081 1082 1082 1083 static inline struct folio *__readahead_folio(struct readahead_control *ractl) ··· 1153 1154 VM_BUG_ON_PAGE(PageTail(page), page); 1154 1155 array[i++] = page; 1155 1156 rac->_batch_count += thp_nr_pages(page); 1156 - 1157 - /* 1158 - * The page cache isn't using multi-index entries yet, 1159 - * so the xas cursor needs to be manually moved to the 1160 - * next index. This can be removed once the page cache 1161 - * is converted. 1162 - */ 1163 - if (PageHead(page)) 1164 - xas_set(&xas, rac->_index + rac->_batch_count); 1165 - 1166 1157 if (i == array_sz) 1167 1158 break; 1168 1159 }
+66 -1
include/linux/pagevec.h
··· 15 15 #define PAGEVEC_SIZE 15 16 16 17 17 struct page; 18 + struct folio; 18 19 struct address_space; 19 20 21 + /* Layout must match folio_batch */ 20 22 struct pagevec { 21 23 unsigned char nr; 22 24 bool percpu_pvec_drained; ··· 27 25 28 26 void __pagevec_release(struct pagevec *pvec); 29 27 void __pagevec_lru_add(struct pagevec *pvec); 30 - void pagevec_remove_exceptionals(struct pagevec *pvec); 31 28 unsigned pagevec_lookup_range(struct pagevec *pvec, 32 29 struct address_space *mapping, 33 30 pgoff_t *start, pgoff_t end); ··· 82 81 __pagevec_release(pvec); 83 82 } 84 83 84 + /** 85 + * struct folio_batch - A collection of folios. 86 + * 87 + * The folio_batch is used to amortise the cost of retrieving and 88 + * operating on a set of folios. The order of folios in the batch may be 89 + * significant (eg delete_from_page_cache_batch()). Some users of the 90 + * folio_batch store "exceptional" entries in it which can be removed 91 + * by calling folio_batch_remove_exceptionals(). 92 + */ 93 + struct folio_batch { 94 + unsigned char nr; 95 + bool percpu_pvec_drained; 96 + struct folio *folios[PAGEVEC_SIZE]; 97 + }; 98 + 99 + /* Layout must match pagevec */ 100 + static_assert(sizeof(struct pagevec) == sizeof(struct folio_batch)); 101 + static_assert(offsetof(struct pagevec, pages) == 102 + offsetof(struct folio_batch, folios)); 103 + 104 + /** 105 + * folio_batch_init() - Initialise a batch of folios 106 + * @fbatch: The folio batch. 107 + * 108 + * A freshly initialised folio_batch contains zero folios. 109 + */ 110 + static inline void folio_batch_init(struct folio_batch *fbatch) 111 + { 112 + fbatch->nr = 0; 113 + } 114 + 115 + static inline unsigned int folio_batch_count(struct folio_batch *fbatch) 116 + { 117 + return fbatch->nr; 118 + } 119 + 120 + static inline unsigned int fbatch_space(struct folio_batch *fbatch) 121 + { 122 + return PAGEVEC_SIZE - fbatch->nr; 123 + } 124 + 125 + /** 126 + * folio_batch_add() - Add a folio to a batch. 127 + * @fbatch: The folio batch. 128 + * @folio: The folio to add. 129 + * 130 + * The folio is added to the end of the batch. 131 + * The batch must have previously been initialised using folio_batch_init(). 132 + * 133 + * Return: The number of slots still available. 134 + */ 135 + static inline unsigned folio_batch_add(struct folio_batch *fbatch, 136 + struct folio *folio) 137 + { 138 + fbatch->folios[fbatch->nr++] = folio; 139 + return fbatch_space(fbatch); 140 + } 141 + 142 + static inline void folio_batch_release(struct folio_batch *fbatch) 143 + { 144 + pagevec_release((struct pagevec *)fbatch); 145 + } 146 + 147 + void folio_batch_remove_exceptionals(struct folio_batch *fbatch); 85 148 #endif /* _LINUX_PAGEVEC_H */
+7
include/linux/uio.h
··· 7 7 8 8 #include <linux/kernel.h> 9 9 #include <linux/thread_info.h> 10 + #include <linux/mm_types.h> 10 11 #include <uapi/linux/uio.h> 11 12 12 13 struct page; ··· 146 145 size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i); 147 146 size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i); 148 147 size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i); 148 + 149 + static inline size_t copy_folio_to_iter(struct folio *folio, size_t offset, 150 + size_t bytes, struct iov_iter *i) 151 + { 152 + return copy_page_to_iter(&folio->page, offset, bytes, i); 153 + } 149 154 150 155 static __always_inline __must_check 151 156 size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
+18
include/linux/xarray.h
··· 1581 1581 } 1582 1582 1583 1583 /** 1584 + * xas_advance() - Skip over sibling entries. 1585 + * @xas: XArray operation state. 1586 + * @index: Index of last sibling entry. 1587 + * 1588 + * Move the operation state to refer to the last sibling entry. 1589 + * This is useful for loops that normally want to see sibling 1590 + * entries but sometimes want to skip them. Use xas_set() if you 1591 + * want to move to an index which is not part of this entry. 1592 + */ 1593 + static inline void xas_advance(struct xa_state *xas, unsigned long index) 1594 + { 1595 + unsigned char shift = xas_is_node(xas) ? xas->xa_node->shift : 0; 1596 + 1597 + xas->xa_index = index; 1598 + xas->xa_offset = (index >> shift) & XA_CHUNK_MASK; 1599 + } 1600 + 1601 + /** 1584 1602 * xas_set_order() - Set up XArray operation state for a multislot entry. 1585 1603 * @xas: XArray operation state. 1586 1604 * @index: Target of the operation.
+17 -15
include/trace/events/filemap.h
··· 15 15 16 16 DECLARE_EVENT_CLASS(mm_filemap_op_page_cache, 17 17 18 - TP_PROTO(struct page *page), 18 + TP_PROTO(struct folio *folio), 19 19 20 - TP_ARGS(page), 20 + TP_ARGS(folio), 21 21 22 22 TP_STRUCT__entry( 23 23 __field(unsigned long, pfn) 24 24 __field(unsigned long, i_ino) 25 25 __field(unsigned long, index) 26 26 __field(dev_t, s_dev) 27 + __field(unsigned char, order) 27 28 ), 28 29 29 30 TP_fast_assign( 30 - __entry->pfn = page_to_pfn(page); 31 - __entry->i_ino = page->mapping->host->i_ino; 32 - __entry->index = page->index; 33 - if (page->mapping->host->i_sb) 34 - __entry->s_dev = page->mapping->host->i_sb->s_dev; 31 + __entry->pfn = folio_pfn(folio); 32 + __entry->i_ino = folio->mapping->host->i_ino; 33 + __entry->index = folio->index; 34 + if (folio->mapping->host->i_sb) 35 + __entry->s_dev = folio->mapping->host->i_sb->s_dev; 35 36 else 36 - __entry->s_dev = page->mapping->host->i_rdev; 37 + __entry->s_dev = folio->mapping->host->i_rdev; 38 + __entry->order = folio_order(folio); 37 39 ), 38 40 39 - TP_printk("dev %d:%d ino %lx page=%p pfn=0x%lx ofs=%lu", 41 + TP_printk("dev %d:%d ino %lx pfn=0x%lx ofs=%lu order=%u", 40 42 MAJOR(__entry->s_dev), MINOR(__entry->s_dev), 41 43 __entry->i_ino, 42 - pfn_to_page(__entry->pfn), 43 44 __entry->pfn, 44 - __entry->index << PAGE_SHIFT) 45 + __entry->index << PAGE_SHIFT, 46 + __entry->order) 45 47 ); 46 48 47 49 DEFINE_EVENT(mm_filemap_op_page_cache, mm_filemap_delete_from_page_cache, 48 - TP_PROTO(struct page *page), 49 - TP_ARGS(page) 50 + TP_PROTO(struct folio *folio), 51 + TP_ARGS(folio) 50 52 ); 51 53 52 54 DEFINE_EVENT(mm_filemap_op_page_cache, mm_filemap_add_to_page_cache, 53 - TP_PROTO(struct page *page), 54 - TP_ARGS(page) 55 + TP_PROTO(struct folio *folio), 56 + TP_ARGS(folio) 55 57 ); 56 58 57 59 TRACE_EVENT(filemap_set_wb_err,
+14 -16
lib/iov_iter.c
··· 69 69 #define iterate_xarray(i, n, base, len, __off, STEP) { \ 70 70 __label__ __out; \ 71 71 size_t __off = 0; \ 72 - struct page *head = NULL; \ 72 + struct folio *folio; \ 73 73 loff_t start = i->xarray_start + i->iov_offset; \ 74 - unsigned offset = start % PAGE_SIZE; \ 75 74 pgoff_t index = start / PAGE_SIZE; \ 76 - int j; \ 77 - \ 78 75 XA_STATE(xas, i->xarray, index); \ 79 76 \ 77 + len = PAGE_SIZE - offset_in_page(start); \ 80 78 rcu_read_lock(); \ 81 - xas_for_each(&xas, head, ULONG_MAX) { \ 79 + xas_for_each(&xas, folio, ULONG_MAX) { \ 82 80 unsigned left; \ 83 - if (xas_retry(&xas, head)) \ 81 + size_t offset; \ 82 + if (xas_retry(&xas, folio)) \ 84 83 continue; \ 85 - if (WARN_ON(xa_is_value(head))) \ 84 + if (WARN_ON(xa_is_value(folio))) \ 86 85 break; \ 87 - if (WARN_ON(PageHuge(head))) \ 86 + if (WARN_ON(folio_test_hugetlb(folio))) \ 88 87 break; \ 89 - for (j = (head->index < index) ? index - head->index : 0; \ 90 - j < thp_nr_pages(head); j++) { \ 91 - void *kaddr = kmap_local_page(head + j); \ 92 - base = kaddr + offset; \ 93 - len = PAGE_SIZE - offset; \ 88 + offset = offset_in_folio(folio, start + __off); \ 89 + while (offset < folio_size(folio)) { \ 90 + base = kmap_local_folio(folio, offset); \ 94 91 len = min(n, len); \ 95 92 left = (STEP); \ 96 - kunmap_local(kaddr); \ 93 + kunmap_local(base); \ 97 94 len -= left; \ 98 95 __off += len; \ 99 96 n -= len; \ 100 97 if (left || n == 0) \ 101 98 goto __out; \ 102 - offset = 0; \ 99 + offset += len; \ 100 + len = PAGE_SIZE; \ 103 101 } \ 104 102 } \ 105 103 __out: \ 106 104 rcu_read_unlock(); \ 107 - i->iov_offset += __off; \ 105 + i->iov_offset += __off; \ 108 106 n = __off; \ 109 107 } 110 108
+3 -3
lib/xarray.c
··· 157 157 xas->xa_index += offset << shift; 158 158 } 159 159 160 - static void xas_advance(struct xa_state *xas) 160 + static void xas_next_offset(struct xa_state *xas) 161 161 { 162 162 xas->xa_offset++; 163 163 xas_move_index(xas, xas->xa_offset); ··· 1250 1250 xas->xa_offset = ((xas->xa_index - 1) & XA_CHUNK_MASK) + 1; 1251 1251 } 1252 1252 1253 - xas_advance(xas); 1253 + xas_next_offset(xas); 1254 1254 1255 1255 while (xas->xa_node && (xas->xa_index <= max)) { 1256 1256 if (unlikely(xas->xa_offset == XA_CHUNK_SIZE)) { ··· 1268 1268 if (entry && !xa_is_sibling(entry)) 1269 1269 return entry; 1270 1270 1271 - xas_advance(xas); 1271 + xas_next_offset(xas); 1272 1272 } 1273 1273 1274 1274 if (!xas->xa_node)
+481 -523
mm/filemap.c
··· 121 121 */ 122 122 123 123 static void page_cache_delete(struct address_space *mapping, 124 - struct page *page, void *shadow) 124 + struct folio *folio, void *shadow) 125 125 { 126 - XA_STATE(xas, &mapping->i_pages, page->index); 127 - unsigned int nr = 1; 126 + XA_STATE(xas, &mapping->i_pages, folio->index); 127 + long nr = 1; 128 128 129 129 mapping_set_update(&xas, mapping); 130 130 131 131 /* hugetlb pages are represented by a single entry in the xarray */ 132 - if (!PageHuge(page)) { 133 - xas_set_order(&xas, page->index, compound_order(page)); 134 - nr = compound_nr(page); 132 + if (!folio_test_hugetlb(folio)) { 133 + xas_set_order(&xas, folio->index, folio_order(folio)); 134 + nr = folio_nr_pages(folio); 135 135 } 136 136 137 - VM_BUG_ON_PAGE(!PageLocked(page), page); 138 - VM_BUG_ON_PAGE(PageTail(page), page); 139 - VM_BUG_ON_PAGE(nr != 1 && shadow, page); 137 + VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); 140 138 141 139 xas_store(&xas, shadow); 142 140 xas_init_marks(&xas); 143 141 144 - page->mapping = NULL; 142 + folio->mapping = NULL; 145 143 /* Leave page->index set: truncation lookup relies upon it */ 146 144 mapping->nrpages -= nr; 147 145 } 148 146 149 - static void unaccount_page_cache_page(struct address_space *mapping, 150 - struct page *page) 147 + static void filemap_unaccount_folio(struct address_space *mapping, 148 + struct folio *folio) 151 149 { 152 - int nr; 150 + long nr; 153 151 154 152 /* 155 153 * if we're uptodate, flush out into the cleancache, otherwise 156 154 * invalidate any existing cleancache entries. We can't leave 157 155 * stale data around in the cleancache once our page is gone 158 156 */ 159 - if (PageUptodate(page) && PageMappedToDisk(page)) 160 - cleancache_put_page(page); 157 + if (folio_test_uptodate(folio) && folio_test_mappedtodisk(folio)) 158 + cleancache_put_page(&folio->page); 161 159 else 162 - cleancache_invalidate_page(mapping, page); 160 + cleancache_invalidate_page(mapping, &folio->page); 163 161 164 - VM_BUG_ON_PAGE(PageTail(page), page); 165 - VM_BUG_ON_PAGE(page_mapped(page), page); 166 - if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(page_mapped(page))) { 162 + VM_BUG_ON_FOLIO(folio_mapped(folio), folio); 163 + if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(folio_mapped(folio))) { 167 164 int mapcount; 168 165 169 166 pr_alert("BUG: Bad page cache in process %s pfn:%05lx\n", 170 - current->comm, page_to_pfn(page)); 171 - dump_page(page, "still mapped when deleted"); 167 + current->comm, folio_pfn(folio)); 168 + dump_page(&folio->page, "still mapped when deleted"); 172 169 dump_stack(); 173 170 add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); 174 171 175 - mapcount = page_mapcount(page); 172 + mapcount = page_mapcount(&folio->page); 176 173 if (mapping_exiting(mapping) && 177 - page_count(page) >= mapcount + 2) { 174 + folio_ref_count(folio) >= mapcount + 2) { 178 175 /* 179 176 * All vmas have already been torn down, so it's 180 - * a good bet that actually the page is unmapped, 177 + * a good bet that actually the folio is unmapped, 181 178 * and we'd prefer not to leak it: if we're wrong, 182 179 * some other bad page check should catch it later. 183 180 */ 184 - page_mapcount_reset(page); 185 - page_ref_sub(page, mapcount); 181 + page_mapcount_reset(&folio->page); 182 + folio_ref_sub(folio, mapcount); 186 183 } 187 184 } 188 185 189 - /* hugetlb pages do not participate in page cache accounting. */ 190 - if (PageHuge(page)) 186 + /* hugetlb folios do not participate in page cache accounting. */ 187 + if (folio_test_hugetlb(folio)) 191 188 return; 192 189 193 - nr = thp_nr_pages(page); 190 + nr = folio_nr_pages(folio); 194 191 195 - __mod_lruvec_page_state(page, NR_FILE_PAGES, -nr); 196 - if (PageSwapBacked(page)) { 197 - __mod_lruvec_page_state(page, NR_SHMEM, -nr); 198 - if (PageTransHuge(page)) 199 - __mod_lruvec_page_state(page, NR_SHMEM_THPS, -nr); 200 - } else if (PageTransHuge(page)) { 201 - __mod_lruvec_page_state(page, NR_FILE_THPS, -nr); 192 + __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, -nr); 193 + if (folio_test_swapbacked(folio)) { 194 + __lruvec_stat_mod_folio(folio, NR_SHMEM, -nr); 195 + if (folio_test_pmd_mappable(folio)) 196 + __lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, -nr); 197 + } else if (folio_test_pmd_mappable(folio)) { 198 + __lruvec_stat_mod_folio(folio, NR_FILE_THPS, -nr); 202 199 filemap_nr_thps_dec(mapping); 203 200 } 204 201 205 202 /* 206 - * At this point page must be either written or cleaned by 207 - * truncate. Dirty page here signals a bug and loss of 203 + * At this point folio must be either written or cleaned by 204 + * truncate. Dirty folio here signals a bug and loss of 208 205 * unwritten data. 209 206 * 210 - * This fixes dirty accounting after removing the page entirely 211 - * but leaves PageDirty set: it has no effect for truncated 212 - * page and anyway will be cleared before returning page into 207 + * This fixes dirty accounting after removing the folio entirely 208 + * but leaves the dirty flag set: it has no effect for truncated 209 + * folio and anyway will be cleared before returning folio to 213 210 * buddy allocator. 214 211 */ 215 - if (WARN_ON_ONCE(PageDirty(page))) 216 - account_page_cleaned(page, mapping, inode_to_wb(mapping->host)); 212 + if (WARN_ON_ONCE(folio_test_dirty(folio))) 213 + folio_account_cleaned(folio, mapping, 214 + inode_to_wb(mapping->host)); 217 215 } 218 216 219 217 /* ··· 219 221 * sure the page is locked and that nobody else uses it - or that usage 220 222 * is safe. The caller must hold the i_pages lock. 221 223 */ 222 - void __delete_from_page_cache(struct page *page, void *shadow) 224 + void __filemap_remove_folio(struct folio *folio, void *shadow) 223 225 { 224 - struct address_space *mapping = page->mapping; 226 + struct address_space *mapping = folio->mapping; 225 227 226 - trace_mm_filemap_delete_from_page_cache(page); 227 - 228 - unaccount_page_cache_page(mapping, page); 229 - page_cache_delete(mapping, page, shadow); 228 + trace_mm_filemap_delete_from_page_cache(folio); 229 + filemap_unaccount_folio(mapping, folio); 230 + page_cache_delete(mapping, folio, shadow); 230 231 } 231 232 232 - static void page_cache_free_page(struct address_space *mapping, 233 - struct page *page) 233 + void filemap_free_folio(struct address_space *mapping, struct folio *folio) 234 234 { 235 235 void (*freepage)(struct page *); 236 236 237 237 freepage = mapping->a_ops->freepage; 238 238 if (freepage) 239 - freepage(page); 239 + freepage(&folio->page); 240 240 241 - if (PageTransHuge(page) && !PageHuge(page)) { 242 - page_ref_sub(page, thp_nr_pages(page)); 243 - VM_BUG_ON_PAGE(page_count(page) <= 0, page); 241 + if (folio_test_large(folio) && !folio_test_hugetlb(folio)) { 242 + folio_ref_sub(folio, folio_nr_pages(folio)); 243 + VM_BUG_ON_FOLIO(folio_ref_count(folio) <= 0, folio); 244 244 } else { 245 - put_page(page); 245 + folio_put(folio); 246 246 } 247 247 } 248 248 249 249 /** 250 - * delete_from_page_cache - delete page from page cache 251 - * @page: the page which the kernel is trying to remove from page cache 250 + * filemap_remove_folio - Remove folio from page cache. 251 + * @folio: The folio. 252 252 * 253 - * This must be called only on pages that have been verified to be in the page 254 - * cache and locked. It will never put the page into the free list, the caller 255 - * has a reference on the page. 253 + * This must be called only on folios that are locked and have been 254 + * verified to be in the page cache. It will never put the folio into 255 + * the free list because the caller has a reference on the page. 256 256 */ 257 - void delete_from_page_cache(struct page *page) 257 + void filemap_remove_folio(struct folio *folio) 258 258 { 259 - struct address_space *mapping = page_mapping(page); 259 + struct address_space *mapping = folio->mapping; 260 260 261 - BUG_ON(!PageLocked(page)); 261 + BUG_ON(!folio_test_locked(folio)); 262 262 spin_lock(&mapping->host->i_lock); 263 263 xa_lock_irq(&mapping->i_pages); 264 - __delete_from_page_cache(page, NULL); 264 + __filemap_remove_folio(folio, NULL); 265 265 xa_unlock_irq(&mapping->i_pages); 266 266 if (mapping_shrinkable(mapping)) 267 267 inode_add_lru(mapping->host); 268 268 spin_unlock(&mapping->host->i_lock); 269 269 270 - page_cache_free_page(mapping, page); 270 + filemap_free_folio(mapping, folio); 271 271 } 272 - EXPORT_SYMBOL(delete_from_page_cache); 273 272 274 273 /* 275 - * page_cache_delete_batch - delete several pages from page cache 276 - * @mapping: the mapping to which pages belong 277 - * @pvec: pagevec with pages to delete 274 + * page_cache_delete_batch - delete several folios from page cache 275 + * @mapping: the mapping to which folios belong 276 + * @fbatch: batch of folios to delete 278 277 * 279 - * The function walks over mapping->i_pages and removes pages passed in @pvec 280 - * from the mapping. The function expects @pvec to be sorted by page index 281 - * and is optimised for it to be dense. 282 - * It tolerates holes in @pvec (mapping entries at those indices are not 283 - * modified). The function expects only THP head pages to be present in the 284 - * @pvec. 278 + * The function walks over mapping->i_pages and removes folios passed in 279 + * @fbatch from the mapping. The function expects @fbatch to be sorted 280 + * by page index and is optimised for it to be dense. 281 + * It tolerates holes in @fbatch (mapping entries at those indices are not 282 + * modified). 285 283 * 286 284 * The function expects the i_pages lock to be held. 287 285 */ 288 286 static void page_cache_delete_batch(struct address_space *mapping, 289 - struct pagevec *pvec) 287 + struct folio_batch *fbatch) 290 288 { 291 - XA_STATE(xas, &mapping->i_pages, pvec->pages[0]->index); 292 - int total_pages = 0; 289 + XA_STATE(xas, &mapping->i_pages, fbatch->folios[0]->index); 290 + long total_pages = 0; 293 291 int i = 0; 294 - struct page *page; 292 + struct folio *folio; 295 293 296 294 mapping_set_update(&xas, mapping); 297 - xas_for_each(&xas, page, ULONG_MAX) { 298 - if (i >= pagevec_count(pvec)) 295 + xas_for_each(&xas, folio, ULONG_MAX) { 296 + if (i >= folio_batch_count(fbatch)) 299 297 break; 300 298 301 299 /* A swap/dax/shadow entry got inserted? Skip it. */ 302 - if (xa_is_value(page)) 300 + if (xa_is_value(folio)) 303 301 continue; 304 302 /* 305 303 * A page got inserted in our range? Skip it. We have our ··· 304 310 * means our page has been removed, which shouldn't be 305 311 * possible because we're holding the PageLock. 306 312 */ 307 - if (page != pvec->pages[i]) { 308 - VM_BUG_ON_PAGE(page->index > pvec->pages[i]->index, 309 - page); 313 + if (folio != fbatch->folios[i]) { 314 + VM_BUG_ON_FOLIO(folio->index > 315 + fbatch->folios[i]->index, folio); 310 316 continue; 311 317 } 312 318 313 - WARN_ON_ONCE(!PageLocked(page)); 319 + WARN_ON_ONCE(!folio_test_locked(folio)); 314 320 315 - if (page->index == xas.xa_index) 316 - page->mapping = NULL; 317 - /* Leave page->index set: truncation lookup relies on it */ 321 + folio->mapping = NULL; 322 + /* Leave folio->index set: truncation lookup relies on it */ 318 323 319 - /* 320 - * Move to the next page in the vector if this is a regular 321 - * page or the index is of the last sub-page of this compound 322 - * page. 323 - */ 324 - if (page->index + compound_nr(page) - 1 == xas.xa_index) 325 - i++; 324 + i++; 326 325 xas_store(&xas, NULL); 327 - total_pages++; 326 + total_pages += folio_nr_pages(folio); 328 327 } 329 328 mapping->nrpages -= total_pages; 330 329 } 331 330 332 331 void delete_from_page_cache_batch(struct address_space *mapping, 333 - struct pagevec *pvec) 332 + struct folio_batch *fbatch) 334 333 { 335 334 int i; 336 335 337 - if (!pagevec_count(pvec)) 336 + if (!folio_batch_count(fbatch)) 338 337 return; 339 338 340 339 spin_lock(&mapping->host->i_lock); 341 340 xa_lock_irq(&mapping->i_pages); 342 - for (i = 0; i < pagevec_count(pvec); i++) { 343 - trace_mm_filemap_delete_from_page_cache(pvec->pages[i]); 341 + for (i = 0; i < folio_batch_count(fbatch); i++) { 342 + struct folio *folio = fbatch->folios[i]; 344 343 345 - unaccount_page_cache_page(mapping, pvec->pages[i]); 344 + trace_mm_filemap_delete_from_page_cache(folio); 345 + filemap_unaccount_folio(mapping, folio); 346 346 } 347 - page_cache_delete_batch(mapping, pvec); 347 + page_cache_delete_batch(mapping, fbatch); 348 348 xa_unlock_irq(&mapping->i_pages); 349 349 if (mapping_shrinkable(mapping)) 350 350 inode_add_lru(mapping->host); 351 351 spin_unlock(&mapping->host->i_lock); 352 352 353 - for (i = 0; i < pagevec_count(pvec); i++) 354 - page_cache_free_page(mapping, pvec->pages[i]); 353 + for (i = 0; i < folio_batch_count(fbatch); i++) 354 + filemap_free_folio(mapping, fbatch->folios[i]); 355 355 } 356 356 357 357 int filemap_check_errors(struct address_space *mapping) ··· 921 933 goto error; 922 934 } 923 935 924 - trace_mm_filemap_add_to_page_cache(&folio->page); 936 + trace_mm_filemap_add_to_page_cache(folio); 925 937 return 0; 926 938 error: 927 939 folio->mapping = NULL; ··· 1221 1233 * __folio_lock() waiting on then setting PG_locked. 1222 1234 */ 1223 1235 SHARED, /* Hold ref to page and check the bit when woken, like 1224 - * wait_on_page_writeback() waiting on PG_writeback. 1236 + * folio_wait_writeback() waiting on PG_writeback. 1225 1237 */ 1226 1238 DROP, /* Drop ref to page before wait, no check when woken, 1227 - * like put_and_wait_on_page_locked() on PG_locked. 1239 + * like folio_put_wait_locked() on PG_locked. 1228 1240 */ 1229 1241 }; 1230 1242 ··· 1401 1413 EXPORT_SYMBOL(folio_wait_bit_killable); 1402 1414 1403 1415 /** 1404 - * put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked 1405 - * @page: The page to wait for. 1416 + * folio_put_wait_locked - Drop a reference and wait for it to be unlocked 1417 + * @folio: The folio to wait for. 1406 1418 * @state: The sleep state (TASK_KILLABLE, TASK_UNINTERRUPTIBLE, etc). 1407 1419 * 1408 - * The caller should hold a reference on @page. They expect the page to 1420 + * The caller should hold a reference on @folio. They expect the page to 1409 1421 * become unlocked relatively soon, but do not wish to hold up migration 1410 - * (for example) by holding the reference while waiting for the page to 1422 + * (for example) by holding the reference while waiting for the folio to 1411 1423 * come unlocked. After this function returns, the caller should not 1412 - * dereference @page. 1424 + * dereference @folio. 1413 1425 * 1414 - * Return: 0 if the page was unlocked or -EINTR if interrupted by a signal. 1426 + * Return: 0 if the folio was unlocked or -EINTR if interrupted by a signal. 1415 1427 */ 1416 - int put_and_wait_on_page_locked(struct page *page, int state) 1428 + int folio_put_wait_locked(struct folio *folio, int state) 1417 1429 { 1418 - return folio_wait_bit_common(page_folio(page), PG_locked, state, 1419 - DROP); 1430 + return folio_wait_bit_common(folio, PG_locked, state, DROP); 1420 1431 } 1421 1432 1422 1433 /** ··· 1940 1953 } 1941 1954 EXPORT_SYMBOL(__filemap_get_folio); 1942 1955 1943 - static inline struct page *find_get_entry(struct xa_state *xas, pgoff_t max, 1956 + static inline struct folio *find_get_entry(struct xa_state *xas, pgoff_t max, 1944 1957 xa_mark_t mark) 1945 1958 { 1946 - struct page *page; 1959 + struct folio *folio; 1947 1960 1948 1961 retry: 1949 1962 if (mark == XA_PRESENT) 1950 - page = xas_find(xas, max); 1963 + folio = xas_find(xas, max); 1951 1964 else 1952 - page = xas_find_marked(xas, max, mark); 1965 + folio = xas_find_marked(xas, max, mark); 1953 1966 1954 - if (xas_retry(xas, page)) 1967 + if (xas_retry(xas, folio)) 1955 1968 goto retry; 1956 1969 /* 1957 1970 * A shadow entry of a recently evicted page, a swap 1958 1971 * entry from shmem/tmpfs or a DAX entry. Return it 1959 1972 * without attempting to raise page count. 1960 1973 */ 1961 - if (!page || xa_is_value(page)) 1962 - return page; 1974 + if (!folio || xa_is_value(folio)) 1975 + return folio; 1963 1976 1964 - if (!page_cache_get_speculative(page)) 1977 + if (!folio_try_get_rcu(folio)) 1965 1978 goto reset; 1966 1979 1967 - /* Has the page moved or been split? */ 1968 - if (unlikely(page != xas_reload(xas))) { 1969 - put_page(page); 1980 + if (unlikely(folio != xas_reload(xas))) { 1981 + folio_put(folio); 1970 1982 goto reset; 1971 1983 } 1972 1984 1973 - return page; 1985 + return folio; 1974 1986 reset: 1975 1987 xas_reset(xas); 1976 1988 goto retry; ··· 1980 1994 * @mapping: The address_space to search 1981 1995 * @start: The starting page cache index 1982 1996 * @end: The final page index (inclusive). 1983 - * @pvec: Where the resulting entries are placed. 1997 + * @fbatch: Where the resulting entries are placed. 1984 1998 * @indices: The cache indices corresponding to the entries in @entries 1985 1999 * 1986 2000 * find_get_entries() will search for and return a batch of entries in 1987 - * the mapping. The entries are placed in @pvec. find_get_entries() 1988 - * takes a reference on any actual pages it returns. 2001 + * the mapping. The entries are placed in @fbatch. find_get_entries() 2002 + * takes a reference on any actual folios it returns. 1989 2003 * 1990 - * The search returns a group of mapping-contiguous page cache entries 1991 - * with ascending indexes. There may be holes in the indices due to 1992 - * not-present pages. 2004 + * The entries have ascending indexes. The indices may not be consecutive 2005 + * due to not-present entries or large folios. 1993 2006 * 1994 - * Any shadow entries of evicted pages, or swap entries from 2007 + * Any shadow entries of evicted folios, or swap entries from 1995 2008 * shmem/tmpfs, are included in the returned array. 1996 2009 * 1997 - * If it finds a Transparent Huge Page, head or tail, find_get_entries() 1998 - * stops at that page: the caller is likely to have a better way to handle 1999 - * the compound page as a whole, and then skip its extent, than repeatedly 2000 - * calling find_get_entries() to return all its tails. 2001 - * 2002 - * Return: the number of pages and shadow entries which were found. 2010 + * Return: The number of entries which were found. 2003 2011 */ 2004 2012 unsigned find_get_entries(struct address_space *mapping, pgoff_t start, 2005 - pgoff_t end, struct pagevec *pvec, pgoff_t *indices) 2013 + pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices) 2006 2014 { 2007 2015 XA_STATE(xas, &mapping->i_pages, start); 2008 - struct page *page; 2009 - unsigned int ret = 0; 2010 - unsigned nr_entries = PAGEVEC_SIZE; 2016 + struct folio *folio; 2011 2017 2012 2018 rcu_read_lock(); 2013 - while ((page = find_get_entry(&xas, end, XA_PRESENT))) { 2014 - /* 2015 - * Terminate early on finding a THP, to allow the caller to 2016 - * handle it all at once; but continue if this is hugetlbfs. 2017 - */ 2018 - if (!xa_is_value(page) && PageTransHuge(page) && 2019 - !PageHuge(page)) { 2020 - page = find_subpage(page, xas.xa_index); 2021 - nr_entries = ret + 1; 2022 - } 2023 - 2024 - indices[ret] = xas.xa_index; 2025 - pvec->pages[ret] = page; 2026 - if (++ret == nr_entries) 2019 + while ((folio = find_get_entry(&xas, end, XA_PRESENT)) != NULL) { 2020 + indices[fbatch->nr] = xas.xa_index; 2021 + if (!folio_batch_add(fbatch, folio)) 2027 2022 break; 2028 2023 } 2029 2024 rcu_read_unlock(); 2030 2025 2031 - pvec->nr = ret; 2032 - return ret; 2026 + return folio_batch_count(fbatch); 2033 2027 } 2034 2028 2035 2029 /** ··· 2017 2051 * @mapping: The address_space to search. 2018 2052 * @start: The starting page cache index. 2019 2053 * @end: The final page index (inclusive). 2020 - * @pvec: Where the resulting entries are placed. 2021 - * @indices: The cache indices of the entries in @pvec. 2054 + * @fbatch: Where the resulting entries are placed. 2055 + * @indices: The cache indices of the entries in @fbatch. 2022 2056 * 2023 2057 * find_lock_entries() will return a batch of entries from @mapping. 2024 - * Swap, shadow and DAX entries are included. Pages are returned 2025 - * locked and with an incremented refcount. Pages which are locked by 2026 - * somebody else or under writeback are skipped. Only the head page of 2027 - * a THP is returned. Pages which are partially outside the range are 2028 - * not returned. 2058 + * Swap, shadow and DAX entries are included. Folios are returned 2059 + * locked and with an incremented refcount. Folios which are locked 2060 + * by somebody else or under writeback are skipped. Folios which are 2061 + * partially outside the range are not returned. 2029 2062 * 2030 2063 * The entries have ascending indexes. The indices may not be consecutive 2031 - * due to not-present entries, THP pages, pages which could not be locked 2032 - * or pages under writeback. 2064 + * due to not-present entries, large folios, folios which could not be 2065 + * locked or folios under writeback. 2033 2066 * 2034 2067 * Return: The number of entries which were found. 2035 2068 */ 2036 2069 unsigned find_lock_entries(struct address_space *mapping, pgoff_t start, 2037 - pgoff_t end, struct pagevec *pvec, pgoff_t *indices) 2070 + pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices) 2038 2071 { 2039 2072 XA_STATE(xas, &mapping->i_pages, start); 2040 - struct page *page; 2073 + struct folio *folio; 2041 2074 2042 2075 rcu_read_lock(); 2043 - while ((page = find_get_entry(&xas, end, XA_PRESENT))) { 2044 - if (!xa_is_value(page)) { 2045 - if (page->index < start) 2076 + while ((folio = find_get_entry(&xas, end, XA_PRESENT))) { 2077 + if (!xa_is_value(folio)) { 2078 + if (folio->index < start) 2046 2079 goto put; 2047 - if (page->index + thp_nr_pages(page) - 1 > end) 2080 + if (folio->index + folio_nr_pages(folio) - 1 > end) 2048 2081 goto put; 2049 - if (!trylock_page(page)) 2082 + if (!folio_trylock(folio)) 2050 2083 goto put; 2051 - if (page->mapping != mapping || PageWriteback(page)) 2084 + if (folio->mapping != mapping || 2085 + folio_test_writeback(folio)) 2052 2086 goto unlock; 2053 - VM_BUG_ON_PAGE(!thp_contains(page, xas.xa_index), 2054 - page); 2087 + VM_BUG_ON_FOLIO(!folio_contains(folio, xas.xa_index), 2088 + folio); 2055 2089 } 2056 - indices[pvec->nr] = xas.xa_index; 2057 - if (!pagevec_add(pvec, page)) 2090 + indices[fbatch->nr] = xas.xa_index; 2091 + if (!folio_batch_add(fbatch, folio)) 2058 2092 break; 2059 - goto next; 2093 + continue; 2060 2094 unlock: 2061 - unlock_page(page); 2095 + folio_unlock(folio); 2062 2096 put: 2063 - put_page(page); 2064 - next: 2065 - if (!xa_is_value(page) && PageTransHuge(page)) { 2066 - unsigned int nr_pages = thp_nr_pages(page); 2067 - 2068 - /* Final THP may cross MAX_LFS_FILESIZE on 32-bit */ 2069 - xas_set(&xas, page->index + nr_pages); 2070 - if (xas.xa_index < nr_pages) 2071 - break; 2072 - } 2097 + folio_put(folio); 2073 2098 } 2074 2099 rcu_read_unlock(); 2075 2100 2076 - return pagevec_count(pvec); 2101 + return folio_batch_count(fbatch); 2102 + } 2103 + 2104 + static inline 2105 + bool folio_more_pages(struct folio *folio, pgoff_t index, pgoff_t max) 2106 + { 2107 + if (!folio_test_large(folio) || folio_test_hugetlb(folio)) 2108 + return false; 2109 + if (index >= max) 2110 + return false; 2111 + return index < folio->index + folio_nr_pages(folio) - 1; 2077 2112 } 2078 2113 2079 2114 /** ··· 2103 2136 struct page **pages) 2104 2137 { 2105 2138 XA_STATE(xas, &mapping->i_pages, *start); 2106 - struct page *page; 2139 + struct folio *folio; 2107 2140 unsigned ret = 0; 2108 2141 2109 2142 if (unlikely(!nr_pages)) 2110 2143 return 0; 2111 2144 2112 2145 rcu_read_lock(); 2113 - while ((page = find_get_entry(&xas, end, XA_PRESENT))) { 2146 + while ((folio = find_get_entry(&xas, end, XA_PRESENT))) { 2114 2147 /* Skip over shadow, swap and DAX entries */ 2115 - if (xa_is_value(page)) 2148 + if (xa_is_value(folio)) 2116 2149 continue; 2117 2150 2118 - pages[ret] = find_subpage(page, xas.xa_index); 2151 + again: 2152 + pages[ret] = folio_file_page(folio, xas.xa_index); 2119 2153 if (++ret == nr_pages) { 2120 2154 *start = xas.xa_index + 1; 2121 2155 goto out; 2156 + } 2157 + if (folio_more_pages(folio, xas.xa_index, end)) { 2158 + xas.xa_index++; 2159 + folio_ref_inc(folio); 2160 + goto again; 2122 2161 } 2123 2162 } 2124 2163 ··· 2160 2187 unsigned int nr_pages, struct page **pages) 2161 2188 { 2162 2189 XA_STATE(xas, &mapping->i_pages, index); 2163 - struct page *page; 2190 + struct folio *folio; 2164 2191 unsigned int ret = 0; 2165 2192 2166 2193 if (unlikely(!nr_pages)) 2167 2194 return 0; 2168 2195 2169 2196 rcu_read_lock(); 2170 - for (page = xas_load(&xas); page; page = xas_next(&xas)) { 2171 - if (xas_retry(&xas, page)) 2197 + for (folio = xas_load(&xas); folio; folio = xas_next(&xas)) { 2198 + if (xas_retry(&xas, folio)) 2172 2199 continue; 2173 2200 /* 2174 2201 * If the entry has been swapped out, we can stop looking. 2175 2202 * No current caller is looking for DAX entries. 2176 2203 */ 2177 - if (xa_is_value(page)) 2204 + if (xa_is_value(folio)) 2178 2205 break; 2179 2206 2180 - if (!page_cache_get_speculative(page)) 2207 + if (!folio_try_get_rcu(folio)) 2181 2208 goto retry; 2182 2209 2183 - /* Has the page moved or been split? */ 2184 - if (unlikely(page != xas_reload(&xas))) 2210 + if (unlikely(folio != xas_reload(&xas))) 2185 2211 goto put_page; 2186 2212 2187 - pages[ret] = find_subpage(page, xas.xa_index); 2213 + again: 2214 + pages[ret] = folio_file_page(folio, xas.xa_index); 2188 2215 if (++ret == nr_pages) 2189 2216 break; 2217 + if (folio_more_pages(folio, xas.xa_index, ULONG_MAX)) { 2218 + xas.xa_index++; 2219 + folio_ref_inc(folio); 2220 + goto again; 2221 + } 2190 2222 continue; 2191 2223 put_page: 2192 - put_page(page); 2224 + folio_put(folio); 2193 2225 retry: 2194 2226 xas_reset(&xas); 2195 2227 } ··· 2223 2245 struct page **pages) 2224 2246 { 2225 2247 XA_STATE(xas, &mapping->i_pages, *index); 2226 - struct page *page; 2248 + struct folio *folio; 2227 2249 unsigned ret = 0; 2228 2250 2229 2251 if (unlikely(!nr_pages)) 2230 2252 return 0; 2231 2253 2232 2254 rcu_read_lock(); 2233 - while ((page = find_get_entry(&xas, end, tag))) { 2255 + while ((folio = find_get_entry(&xas, end, tag))) { 2234 2256 /* 2235 2257 * Shadow entries should never be tagged, but this iteration 2236 2258 * is lockless so there is a window for page reclaim to evict 2237 2259 * a page we saw tagged. Skip over it. 2238 2260 */ 2239 - if (xa_is_value(page)) 2261 + if (xa_is_value(folio)) 2240 2262 continue; 2241 2263 2242 - pages[ret] = page; 2264 + pages[ret] = &folio->page; 2243 2265 if (++ret == nr_pages) { 2244 - *index = page->index + thp_nr_pages(page); 2266 + *index = folio->index + folio_nr_pages(folio); 2245 2267 goto out; 2246 2268 } 2247 2269 } ··· 2284 2306 } 2285 2307 2286 2308 /* 2287 - * filemap_get_read_batch - Get a batch of pages for read 2309 + * filemap_get_read_batch - Get a batch of folios for read 2288 2310 * 2289 - * Get a batch of pages which represent a contiguous range of bytes 2290 - * in the file. No tail pages will be returned. If @index is in the 2291 - * middle of a THP, the entire THP will be returned. The last page in 2292 - * the batch may have Readahead set or be not Uptodate so that the 2293 - * caller can take the appropriate action. 2311 + * Get a batch of folios which represent a contiguous range of bytes in 2312 + * the file. No exceptional entries will be returned. If @index is in 2313 + * the middle of a folio, the entire folio will be returned. The last 2314 + * folio in the batch may have the readahead flag set or the uptodate flag 2315 + * clear so that the caller can take the appropriate action. 2294 2316 */ 2295 2317 static void filemap_get_read_batch(struct address_space *mapping, 2296 - pgoff_t index, pgoff_t max, struct pagevec *pvec) 2318 + pgoff_t index, pgoff_t max, struct folio_batch *fbatch) 2297 2319 { 2298 2320 XA_STATE(xas, &mapping->i_pages, index); 2299 - struct page *head; 2321 + struct folio *folio; 2300 2322 2301 2323 rcu_read_lock(); 2302 - for (head = xas_load(&xas); head; head = xas_next(&xas)) { 2303 - if (xas_retry(&xas, head)) 2324 + for (folio = xas_load(&xas); folio; folio = xas_next(&xas)) { 2325 + if (xas_retry(&xas, folio)) 2304 2326 continue; 2305 - if (xas.xa_index > max || xa_is_value(head)) 2327 + if (xas.xa_index > max || xa_is_value(folio)) 2306 2328 break; 2307 - if (!page_cache_get_speculative(head)) 2329 + if (!folio_try_get_rcu(folio)) 2308 2330 goto retry; 2309 2331 2310 - /* Has the page moved or been split? */ 2311 - if (unlikely(head != xas_reload(&xas))) 2312 - goto put_page; 2332 + if (unlikely(folio != xas_reload(&xas))) 2333 + goto put_folio; 2313 2334 2314 - if (!pagevec_add(pvec, head)) 2335 + if (!folio_batch_add(fbatch, folio)) 2315 2336 break; 2316 - if (!PageUptodate(head)) 2337 + if (!folio_test_uptodate(folio)) 2317 2338 break; 2318 - if (PageReadahead(head)) 2339 + if (folio_test_readahead(folio)) 2319 2340 break; 2320 - xas.xa_index = head->index + thp_nr_pages(head) - 1; 2321 - xas.xa_offset = (xas.xa_index >> xas.xa_shift) & XA_CHUNK_MASK; 2341 + xas_advance(&xas, folio->index + folio_nr_pages(folio) - 1); 2322 2342 continue; 2323 - put_page: 2324 - put_page(head); 2343 + put_folio: 2344 + folio_put(folio); 2325 2345 retry: 2326 2346 xas_reset(&xas); 2327 2347 } 2328 2348 rcu_read_unlock(); 2329 2349 } 2330 2350 2331 - static int filemap_read_page(struct file *file, struct address_space *mapping, 2332 - struct page *page) 2351 + static int filemap_read_folio(struct file *file, struct address_space *mapping, 2352 + struct folio *folio) 2333 2353 { 2334 2354 int error; 2335 2355 ··· 2336 2360 * eg. multipath errors. PG_error will be set again if readpage 2337 2361 * fails. 2338 2362 */ 2339 - ClearPageError(page); 2363 + folio_clear_error(folio); 2340 2364 /* Start the actual read. The read will unlock the page. */ 2341 - error = mapping->a_ops->readpage(file, page); 2365 + error = mapping->a_ops->readpage(file, &folio->page); 2342 2366 if (error) 2343 2367 return error; 2344 2368 2345 - error = wait_on_page_locked_killable(page); 2369 + error = folio_wait_locked_killable(folio); 2346 2370 if (error) 2347 2371 return error; 2348 - if (PageUptodate(page)) 2372 + if (folio_test_uptodate(folio)) 2349 2373 return 0; 2350 2374 shrink_readahead_size_eio(&file->f_ra); 2351 2375 return -EIO; 2352 2376 } 2353 2377 2354 2378 static bool filemap_range_uptodate(struct address_space *mapping, 2355 - loff_t pos, struct iov_iter *iter, struct page *page) 2379 + loff_t pos, struct iov_iter *iter, struct folio *folio) 2356 2380 { 2357 2381 int count; 2358 2382 2359 - if (PageUptodate(page)) 2383 + if (folio_test_uptodate(folio)) 2360 2384 return true; 2361 2385 /* pipes can't handle partially uptodate pages */ 2362 2386 if (iov_iter_is_pipe(iter)) 2363 2387 return false; 2364 2388 if (!mapping->a_ops->is_partially_uptodate) 2365 2389 return false; 2366 - if (mapping->host->i_blkbits >= (PAGE_SHIFT + thp_order(page))) 2390 + if (mapping->host->i_blkbits >= folio_shift(folio)) 2367 2391 return false; 2368 2392 2369 2393 count = iter->count; 2370 - if (page_offset(page) > pos) { 2371 - count -= page_offset(page) - pos; 2394 + if (folio_pos(folio) > pos) { 2395 + count -= folio_pos(folio) - pos; 2372 2396 pos = 0; 2373 2397 } else { 2374 - pos -= page_offset(page); 2398 + pos -= folio_pos(folio); 2375 2399 } 2376 2400 2377 - return mapping->a_ops->is_partially_uptodate(page, pos, count); 2401 + return mapping->a_ops->is_partially_uptodate(&folio->page, pos, count); 2378 2402 } 2379 2403 2380 2404 static int filemap_update_page(struct kiocb *iocb, 2381 2405 struct address_space *mapping, struct iov_iter *iter, 2382 - struct page *page) 2406 + struct folio *folio) 2383 2407 { 2384 - struct folio *folio = page_folio(page); 2385 2408 int error; 2386 2409 2387 2410 if (iocb->ki_flags & IOCB_NOWAIT) { ··· 2396 2421 goto unlock_mapping; 2397 2422 if (!(iocb->ki_flags & IOCB_WAITQ)) { 2398 2423 filemap_invalidate_unlock_shared(mapping); 2399 - put_and_wait_on_page_locked(&folio->page, TASK_KILLABLE); 2424 + /* 2425 + * This is where we usually end up waiting for a 2426 + * previously submitted readahead to finish. 2427 + */ 2428 + folio_put_wait_locked(folio, TASK_KILLABLE); 2400 2429 return AOP_TRUNCATED_PAGE; 2401 2430 } 2402 2431 error = __folio_lock_async(folio, iocb->ki_waitq); ··· 2413 2434 goto unlock; 2414 2435 2415 2436 error = 0; 2416 - if (filemap_range_uptodate(mapping, iocb->ki_pos, iter, &folio->page)) 2437 + if (filemap_range_uptodate(mapping, iocb->ki_pos, iter, folio)) 2417 2438 goto unlock; 2418 2439 2419 2440 error = -EAGAIN; 2420 2441 if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT | IOCB_WAITQ)) 2421 2442 goto unlock; 2422 2443 2423 - error = filemap_read_page(iocb->ki_filp, mapping, &folio->page); 2444 + error = filemap_read_folio(iocb->ki_filp, mapping, folio); 2424 2445 goto unlock_mapping; 2425 2446 unlock: 2426 2447 folio_unlock(folio); ··· 2431 2452 return error; 2432 2453 } 2433 2454 2434 - static int filemap_create_page(struct file *file, 2455 + static int filemap_create_folio(struct file *file, 2435 2456 struct address_space *mapping, pgoff_t index, 2436 - struct pagevec *pvec) 2457 + struct folio_batch *fbatch) 2437 2458 { 2438 - struct page *page; 2459 + struct folio *folio; 2439 2460 int error; 2440 2461 2441 - page = page_cache_alloc(mapping); 2442 - if (!page) 2462 + folio = filemap_alloc_folio(mapping_gfp_mask(mapping), 0); 2463 + if (!folio) 2443 2464 return -ENOMEM; 2444 2465 2445 2466 /* 2446 - * Protect against truncate / hole punch. Grabbing invalidate_lock here 2447 - * assures we cannot instantiate and bring uptodate new pagecache pages 2448 - * after evicting page cache during truncate and before actually 2449 - * freeing blocks. Note that we could release invalidate_lock after 2450 - * inserting the page into page cache as the locked page would then be 2451 - * enough to synchronize with hole punching. But there are code paths 2452 - * such as filemap_update_page() filling in partially uptodate pages or 2453 - * ->readpages() that need to hold invalidate_lock while mapping blocks 2454 - * for IO so let's hold the lock here as well to keep locking rules 2455 - * simple. 2467 + * Protect against truncate / hole punch. Grabbing invalidate_lock 2468 + * here assures we cannot instantiate and bring uptodate new 2469 + * pagecache folios after evicting page cache during truncate 2470 + * and before actually freeing blocks. Note that we could 2471 + * release invalidate_lock after inserting the folio into 2472 + * the page cache as the locked folio would then be enough to 2473 + * synchronize with hole punching. But there are code paths 2474 + * such as filemap_update_page() filling in partially uptodate 2475 + * pages or ->readpages() that need to hold invalidate_lock 2476 + * while mapping blocks for IO so let's hold the lock here as 2477 + * well to keep locking rules simple. 2456 2478 */ 2457 2479 filemap_invalidate_lock_shared(mapping); 2458 - error = add_to_page_cache_lru(page, mapping, index, 2480 + error = filemap_add_folio(mapping, folio, index, 2459 2481 mapping_gfp_constraint(mapping, GFP_KERNEL)); 2460 2482 if (error == -EEXIST) 2461 2483 error = AOP_TRUNCATED_PAGE; 2462 2484 if (error) 2463 2485 goto error; 2464 2486 2465 - error = filemap_read_page(file, mapping, page); 2487 + error = filemap_read_folio(file, mapping, folio); 2466 2488 if (error) 2467 2489 goto error; 2468 2490 2469 2491 filemap_invalidate_unlock_shared(mapping); 2470 - pagevec_add(pvec, page); 2492 + folio_batch_add(fbatch, folio); 2471 2493 return 0; 2472 2494 error: 2473 2495 filemap_invalidate_unlock_shared(mapping); 2474 - put_page(page); 2496 + folio_put(folio); 2475 2497 return error; 2476 2498 } 2477 2499 2478 2500 static int filemap_readahead(struct kiocb *iocb, struct file *file, 2479 - struct address_space *mapping, struct page *page, 2501 + struct address_space *mapping, struct folio *folio, 2480 2502 pgoff_t last_index) 2481 2503 { 2504 + DEFINE_READAHEAD(ractl, file, &file->f_ra, mapping, folio->index); 2505 + 2482 2506 if (iocb->ki_flags & IOCB_NOIO) 2483 2507 return -EAGAIN; 2484 - page_cache_async_readahead(mapping, &file->f_ra, file, page, 2485 - page->index, last_index - page->index); 2508 + page_cache_async_ra(&ractl, folio, last_index - folio->index); 2486 2509 return 0; 2487 2510 } 2488 2511 2489 2512 static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter, 2490 - struct pagevec *pvec) 2513 + struct folio_batch *fbatch) 2491 2514 { 2492 2515 struct file *filp = iocb->ki_filp; 2493 2516 struct address_space *mapping = filp->f_mapping; 2494 2517 struct file_ra_state *ra = &filp->f_ra; 2495 2518 pgoff_t index = iocb->ki_pos >> PAGE_SHIFT; 2496 2519 pgoff_t last_index; 2497 - struct page *page; 2520 + struct folio *folio; 2498 2521 int err = 0; 2499 2522 2500 2523 last_index = DIV_ROUND_UP(iocb->ki_pos + iter->count, PAGE_SIZE); ··· 2504 2523 if (fatal_signal_pending(current)) 2505 2524 return -EINTR; 2506 2525 2507 - filemap_get_read_batch(mapping, index, last_index, pvec); 2508 - if (!pagevec_count(pvec)) { 2526 + filemap_get_read_batch(mapping, index, last_index, fbatch); 2527 + if (!folio_batch_count(fbatch)) { 2509 2528 if (iocb->ki_flags & IOCB_NOIO) 2510 2529 return -EAGAIN; 2511 2530 page_cache_sync_readahead(mapping, ra, filp, index, 2512 2531 last_index - index); 2513 - filemap_get_read_batch(mapping, index, last_index, pvec); 2532 + filemap_get_read_batch(mapping, index, last_index, fbatch); 2514 2533 } 2515 - if (!pagevec_count(pvec)) { 2534 + if (!folio_batch_count(fbatch)) { 2516 2535 if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_WAITQ)) 2517 2536 return -EAGAIN; 2518 - err = filemap_create_page(filp, mapping, 2519 - iocb->ki_pos >> PAGE_SHIFT, pvec); 2537 + err = filemap_create_folio(filp, mapping, 2538 + iocb->ki_pos >> PAGE_SHIFT, fbatch); 2520 2539 if (err == AOP_TRUNCATED_PAGE) 2521 2540 goto retry; 2522 2541 return err; 2523 2542 } 2524 2543 2525 - page = pvec->pages[pagevec_count(pvec) - 1]; 2526 - if (PageReadahead(page)) { 2527 - err = filemap_readahead(iocb, filp, mapping, page, last_index); 2544 + folio = fbatch->folios[folio_batch_count(fbatch) - 1]; 2545 + if (folio_test_readahead(folio)) { 2546 + err = filemap_readahead(iocb, filp, mapping, folio, last_index); 2528 2547 if (err) 2529 2548 goto err; 2530 2549 } 2531 - if (!PageUptodate(page)) { 2532 - if ((iocb->ki_flags & IOCB_WAITQ) && pagevec_count(pvec) > 1) 2550 + if (!folio_test_uptodate(folio)) { 2551 + if ((iocb->ki_flags & IOCB_WAITQ) && 2552 + folio_batch_count(fbatch) > 1) 2533 2553 iocb->ki_flags |= IOCB_NOWAIT; 2534 - err = filemap_update_page(iocb, mapping, iter, page); 2554 + err = filemap_update_page(iocb, mapping, iter, folio); 2535 2555 if (err) 2536 2556 goto err; 2537 2557 } ··· 2540 2558 return 0; 2541 2559 err: 2542 2560 if (err < 0) 2543 - put_page(page); 2544 - if (likely(--pvec->nr)) 2561 + folio_put(folio); 2562 + if (likely(--fbatch->nr)) 2545 2563 return 0; 2546 2564 if (err == AOP_TRUNCATED_PAGE) 2547 2565 goto retry; ··· 2568 2586 struct file_ra_state *ra = &filp->f_ra; 2569 2587 struct address_space *mapping = filp->f_mapping; 2570 2588 struct inode *inode = mapping->host; 2571 - struct pagevec pvec; 2589 + struct folio_batch fbatch; 2572 2590 int i, error = 0; 2573 2591 bool writably_mapped; 2574 2592 loff_t isize, end_offset; ··· 2579 2597 return 0; 2580 2598 2581 2599 iov_iter_truncate(iter, inode->i_sb->s_maxbytes); 2582 - pagevec_init(&pvec); 2600 + folio_batch_init(&fbatch); 2583 2601 2584 2602 do { 2585 2603 cond_resched(); ··· 2595 2613 if (unlikely(iocb->ki_pos >= i_size_read(inode))) 2596 2614 break; 2597 2615 2598 - error = filemap_get_pages(iocb, iter, &pvec); 2616 + error = filemap_get_pages(iocb, iter, &fbatch); 2599 2617 if (error < 0) 2600 2618 break; 2601 2619 ··· 2609 2627 */ 2610 2628 isize = i_size_read(inode); 2611 2629 if (unlikely(iocb->ki_pos >= isize)) 2612 - goto put_pages; 2630 + goto put_folios; 2613 2631 end_offset = min_t(loff_t, isize, iocb->ki_pos + iter->count); 2614 2632 2615 2633 /* ··· 2624 2642 */ 2625 2643 if (iocb->ki_pos >> PAGE_SHIFT != 2626 2644 ra->prev_pos >> PAGE_SHIFT) 2627 - mark_page_accessed(pvec.pages[0]); 2645 + folio_mark_accessed(fbatch.folios[0]); 2628 2646 2629 - for (i = 0; i < pagevec_count(&pvec); i++) { 2630 - struct page *page = pvec.pages[i]; 2631 - size_t page_size = thp_size(page); 2632 - size_t offset = iocb->ki_pos & (page_size - 1); 2647 + for (i = 0; i < folio_batch_count(&fbatch); i++) { 2648 + struct folio *folio = fbatch.folios[i]; 2649 + size_t fsize = folio_size(folio); 2650 + size_t offset = iocb->ki_pos & (fsize - 1); 2633 2651 size_t bytes = min_t(loff_t, end_offset - iocb->ki_pos, 2634 - page_size - offset); 2652 + fsize - offset); 2635 2653 size_t copied; 2636 2654 2637 - if (end_offset < page_offset(page)) 2655 + if (end_offset < folio_pos(folio)) 2638 2656 break; 2639 2657 if (i > 0) 2640 - mark_page_accessed(page); 2658 + folio_mark_accessed(folio); 2641 2659 /* 2642 - * If users can be writing to this page using arbitrary 2643 - * virtual addresses, take care about potential aliasing 2644 - * before reading the page on the kernel side. 2660 + * If users can be writing to this folio using arbitrary 2661 + * virtual addresses, take care of potential aliasing 2662 + * before reading the folio on the kernel side. 2645 2663 */ 2646 - if (writably_mapped) { 2647 - int j; 2664 + if (writably_mapped) 2665 + flush_dcache_folio(folio); 2648 2666 2649 - for (j = 0; j < thp_nr_pages(page); j++) 2650 - flush_dcache_page(page + j); 2651 - } 2652 - 2653 - copied = copy_page_to_iter(page, offset, bytes, iter); 2667 + copied = copy_folio_to_iter(folio, offset, bytes, iter); 2654 2668 2655 2669 already_read += copied; 2656 2670 iocb->ki_pos += copied; ··· 2657 2679 break; 2658 2680 } 2659 2681 } 2660 - put_pages: 2661 - for (i = 0; i < pagevec_count(&pvec); i++) 2662 - put_page(pvec.pages[i]); 2663 - pagevec_reinit(&pvec); 2682 + put_folios: 2683 + for (i = 0; i < folio_batch_count(&fbatch); i++) 2684 + folio_put(fbatch.folios[i]); 2685 + folio_batch_init(&fbatch); 2664 2686 } while (iov_iter_count(iter) && iocb->ki_pos < isize && !error); 2665 2687 2666 2688 file_accessed(filp); ··· 2745 2767 } 2746 2768 EXPORT_SYMBOL(generic_file_read_iter); 2747 2769 2748 - static inline loff_t page_seek_hole_data(struct xa_state *xas, 2749 - struct address_space *mapping, struct page *page, 2770 + static inline loff_t folio_seek_hole_data(struct xa_state *xas, 2771 + struct address_space *mapping, struct folio *folio, 2750 2772 loff_t start, loff_t end, bool seek_data) 2751 2773 { 2752 2774 const struct address_space_operations *ops = mapping->a_ops; 2753 2775 size_t offset, bsz = i_blocksize(mapping->host); 2754 2776 2755 - if (xa_is_value(page) || PageUptodate(page)) 2777 + if (xa_is_value(folio) || folio_test_uptodate(folio)) 2756 2778 return seek_data ? start : end; 2757 2779 if (!ops->is_partially_uptodate) 2758 2780 return seek_data ? end : start; 2759 2781 2760 2782 xas_pause(xas); 2761 2783 rcu_read_unlock(); 2762 - lock_page(page); 2763 - if (unlikely(page->mapping != mapping)) 2784 + folio_lock(folio); 2785 + if (unlikely(folio->mapping != mapping)) 2764 2786 goto unlock; 2765 2787 2766 - offset = offset_in_thp(page, start) & ~(bsz - 1); 2788 + offset = offset_in_folio(folio, start) & ~(bsz - 1); 2767 2789 2768 2790 do { 2769 - if (ops->is_partially_uptodate(page, offset, bsz) == seek_data) 2791 + if (ops->is_partially_uptodate(&folio->page, offset, bsz) == 2792 + seek_data) 2770 2793 break; 2771 2794 start = (start + bsz) & ~(bsz - 1); 2772 2795 offset += bsz; 2773 - } while (offset < thp_size(page)); 2796 + } while (offset < folio_size(folio)); 2774 2797 unlock: 2775 - unlock_page(page); 2798 + folio_unlock(folio); 2776 2799 rcu_read_lock(); 2777 2800 return start; 2778 2801 } 2779 2802 2780 - static inline 2781 - unsigned int seek_page_size(struct xa_state *xas, struct page *page) 2803 + static inline size_t seek_folio_size(struct xa_state *xas, struct folio *folio) 2782 2804 { 2783 - if (xa_is_value(page)) 2805 + if (xa_is_value(folio)) 2784 2806 return PAGE_SIZE << xa_get_order(xas->xa, xas->xa_index); 2785 - return thp_size(page); 2807 + return folio_size(folio); 2786 2808 } 2787 2809 2788 2810 /** ··· 2809 2831 XA_STATE(xas, &mapping->i_pages, start >> PAGE_SHIFT); 2810 2832 pgoff_t max = (end - 1) >> PAGE_SHIFT; 2811 2833 bool seek_data = (whence == SEEK_DATA); 2812 - struct page *page; 2834 + struct folio *folio; 2813 2835 2814 2836 if (end <= start) 2815 2837 return -ENXIO; 2816 2838 2817 2839 rcu_read_lock(); 2818 - while ((page = find_get_entry(&xas, max, XA_PRESENT))) { 2840 + while ((folio = find_get_entry(&xas, max, XA_PRESENT))) { 2819 2841 loff_t pos = (u64)xas.xa_index << PAGE_SHIFT; 2820 - unsigned int seek_size; 2842 + size_t seek_size; 2821 2843 2822 2844 if (start < pos) { 2823 2845 if (!seek_data) ··· 2825 2847 start = pos; 2826 2848 } 2827 2849 2828 - seek_size = seek_page_size(&xas, page); 2829 - pos = round_up(pos + 1, seek_size); 2830 - start = page_seek_hole_data(&xas, mapping, page, start, pos, 2850 + seek_size = seek_folio_size(&xas, folio); 2851 + pos = round_up((u64)pos + 1, seek_size); 2852 + start = folio_seek_hole_data(&xas, mapping, folio, start, pos, 2831 2853 seek_data); 2832 2854 if (start < pos) 2833 2855 goto unlock; ··· 2835 2857 break; 2836 2858 if (seek_size > PAGE_SIZE) 2837 2859 xas_set(&xas, pos >> PAGE_SHIFT); 2838 - if (!xa_is_value(page)) 2839 - put_page(page); 2860 + if (!xa_is_value(folio)) 2861 + folio_put(folio); 2840 2862 } 2841 2863 if (seek_data) 2842 2864 start = -ENXIO; 2843 2865 unlock: 2844 2866 rcu_read_unlock(); 2845 - if (page && !xa_is_value(page)) 2846 - put_page(page); 2867 + if (folio && !xa_is_value(folio)) 2868 + folio_put(folio); 2847 2869 if (start > end) 2848 2870 return end; 2849 2871 return start; ··· 2852 2874 #ifdef CONFIG_MMU 2853 2875 #define MMAP_LOTSAMISS (100) 2854 2876 /* 2855 - * lock_page_maybe_drop_mmap - lock the page, possibly dropping the mmap_lock 2877 + * lock_folio_maybe_drop_mmap - lock the page, possibly dropping the mmap_lock 2856 2878 * @vmf - the vm_fault for this fault. 2857 - * @page - the page to lock. 2879 + * @folio - the folio to lock. 2858 2880 * @fpin - the pointer to the file we may pin (or is already pinned). 2859 2881 * 2860 - * This works similar to lock_page_or_retry in that it can drop the mmap_lock. 2861 - * It differs in that it actually returns the page locked if it returns 1 and 0 2862 - * if it couldn't lock the page. If we did have to drop the mmap_lock then fpin 2863 - * will point to the pinned file and needs to be fput()'ed at a later point. 2882 + * This works similar to lock_folio_or_retry in that it can drop the 2883 + * mmap_lock. It differs in that it actually returns the folio locked 2884 + * if it returns 1 and 0 if it couldn't lock the folio. If we did have 2885 + * to drop the mmap_lock then fpin will point to the pinned file and 2886 + * needs to be fput()'ed at a later point. 2864 2887 */ 2865 - static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page, 2888 + static int lock_folio_maybe_drop_mmap(struct vm_fault *vmf, struct folio *folio, 2866 2889 struct file **fpin) 2867 2890 { 2868 - struct folio *folio = page_folio(page); 2869 - 2870 2891 if (folio_trylock(folio)) 2871 2892 return 1; 2872 2893 ··· 2954 2977 * was pinned if we have to drop the mmap_lock in order to do IO. 2955 2978 */ 2956 2979 static struct file *do_async_mmap_readahead(struct vm_fault *vmf, 2957 - struct page *page) 2980 + struct folio *folio) 2958 2981 { 2959 2982 struct file *file = vmf->vma->vm_file; 2960 2983 struct file_ra_state *ra = &file->f_ra; 2961 - struct address_space *mapping = file->f_mapping; 2984 + DEFINE_READAHEAD(ractl, file, ra, file->f_mapping, vmf->pgoff); 2962 2985 struct file *fpin = NULL; 2963 2986 unsigned int mmap_miss; 2964 - pgoff_t offset = vmf->pgoff; 2965 2987 2966 2988 /* If we don't want any read-ahead, don't bother */ 2967 2989 if (vmf->vma->vm_flags & VM_RAND_READ || !ra->ra_pages) 2968 2990 return fpin; 2991 + 2969 2992 mmap_miss = READ_ONCE(ra->mmap_miss); 2970 2993 if (mmap_miss) 2971 2994 WRITE_ONCE(ra->mmap_miss, --mmap_miss); 2972 - if (PageReadahead(page)) { 2995 + 2996 + if (folio_test_readahead(folio)) { 2973 2997 fpin = maybe_unlock_mmap_for_io(vmf, fpin); 2974 - page_cache_async_readahead(mapping, ra, file, 2975 - page, offset, ra->ra_pages); 2998 + page_cache_async_ra(&ractl, folio, ra->ra_pages); 2976 2999 } 2977 3000 return fpin; 2978 3001 } ··· 2991 3014 * vma->vm_mm->mmap_lock must be held on entry. 2992 3015 * 2993 3016 * If our return value has VM_FAULT_RETRY set, it's because the mmap_lock 2994 - * may be dropped before doing I/O or by lock_page_maybe_drop_mmap(). 3017 + * may be dropped before doing I/O or by lock_folio_maybe_drop_mmap(). 2995 3018 * 2996 3019 * If our return value does not have VM_FAULT_RETRY set, the mmap_lock 2997 3020 * has not been released. ··· 3007 3030 struct file *fpin = NULL; 3008 3031 struct address_space *mapping = file->f_mapping; 3009 3032 struct inode *inode = mapping->host; 3010 - pgoff_t offset = vmf->pgoff; 3011 - pgoff_t max_off; 3012 - struct page *page; 3033 + pgoff_t max_idx, index = vmf->pgoff; 3034 + struct folio *folio; 3013 3035 vm_fault_t ret = 0; 3014 3036 bool mapping_locked = false; 3015 3037 3016 - max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 3017 - if (unlikely(offset >= max_off)) 3038 + max_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 3039 + if (unlikely(index >= max_idx)) 3018 3040 return VM_FAULT_SIGBUS; 3019 3041 3020 3042 /* 3021 3043 * Do we have something in the page cache already? 3022 3044 */ 3023 - page = find_get_page(mapping, offset); 3024 - if (likely(page)) { 3045 + folio = filemap_get_folio(mapping, index); 3046 + if (likely(folio)) { 3025 3047 /* 3026 3048 * We found the page, so try async readahead before waiting for 3027 3049 * the lock. 3028 3050 */ 3029 3051 if (!(vmf->flags & FAULT_FLAG_TRIED)) 3030 - fpin = do_async_mmap_readahead(vmf, page); 3031 - if (unlikely(!PageUptodate(page))) { 3052 + fpin = do_async_mmap_readahead(vmf, folio); 3053 + if (unlikely(!folio_test_uptodate(folio))) { 3032 3054 filemap_invalidate_lock_shared(mapping); 3033 3055 mapping_locked = true; 3034 3056 } ··· 3039 3063 fpin = do_sync_mmap_readahead(vmf); 3040 3064 retry_find: 3041 3065 /* 3042 - * See comment in filemap_create_page() why we need 3066 + * See comment in filemap_create_folio() why we need 3043 3067 * invalidate_lock 3044 3068 */ 3045 3069 if (!mapping_locked) { 3046 3070 filemap_invalidate_lock_shared(mapping); 3047 3071 mapping_locked = true; 3048 3072 } 3049 - page = pagecache_get_page(mapping, offset, 3073 + folio = __filemap_get_folio(mapping, index, 3050 3074 FGP_CREAT|FGP_FOR_MMAP, 3051 3075 vmf->gfp_mask); 3052 - if (!page) { 3076 + if (!folio) { 3053 3077 if (fpin) 3054 3078 goto out_retry; 3055 3079 filemap_invalidate_unlock_shared(mapping); ··· 3057 3081 } 3058 3082 } 3059 3083 3060 - if (!lock_page_maybe_drop_mmap(vmf, page, &fpin)) 3084 + if (!lock_folio_maybe_drop_mmap(vmf, folio, &fpin)) 3061 3085 goto out_retry; 3062 3086 3063 3087 /* Did it get truncated? */ 3064 - if (unlikely(compound_head(page)->mapping != mapping)) { 3065 - unlock_page(page); 3066 - put_page(page); 3088 + if (unlikely(folio->mapping != mapping)) { 3089 + folio_unlock(folio); 3090 + folio_put(folio); 3067 3091 goto retry_find; 3068 3092 } 3069 - VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page); 3093 + VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio); 3070 3094 3071 3095 /* 3072 3096 * We have a locked page in the page cache, now we need to check 3073 3097 * that it's up-to-date. If not, it is going to be due to an error. 3074 3098 */ 3075 - if (unlikely(!PageUptodate(page))) { 3099 + if (unlikely(!folio_test_uptodate(folio))) { 3076 3100 /* 3077 3101 * The page was in cache and uptodate and now it is not. 3078 3102 * Strange but possible since we didn't hold the page lock all ··· 3080 3104 * try again. 3081 3105 */ 3082 3106 if (!mapping_locked) { 3083 - unlock_page(page); 3084 - put_page(page); 3107 + folio_unlock(folio); 3108 + folio_put(folio); 3085 3109 goto retry_find; 3086 3110 } 3087 3111 goto page_not_uptodate; ··· 3093 3117 * redo the fault. 3094 3118 */ 3095 3119 if (fpin) { 3096 - unlock_page(page); 3120 + folio_unlock(folio); 3097 3121 goto out_retry; 3098 3122 } 3099 3123 if (mapping_locked) ··· 3103 3127 * Found the page and have a reference on it. 3104 3128 * We must recheck i_size under page lock. 3105 3129 */ 3106 - max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 3107 - if (unlikely(offset >= max_off)) { 3108 - unlock_page(page); 3109 - put_page(page); 3130 + max_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 3131 + if (unlikely(index >= max_idx)) { 3132 + folio_unlock(folio); 3133 + folio_put(folio); 3110 3134 return VM_FAULT_SIGBUS; 3111 3135 } 3112 3136 3113 - vmf->page = page; 3137 + vmf->page = folio_file_page(folio, index); 3114 3138 return ret | VM_FAULT_LOCKED; 3115 3139 3116 3140 page_not_uptodate: ··· 3121 3145 * and we need to check for errors. 3122 3146 */ 3123 3147 fpin = maybe_unlock_mmap_for_io(vmf, fpin); 3124 - error = filemap_read_page(file, mapping, page); 3148 + error = filemap_read_folio(file, mapping, folio); 3125 3149 if (fpin) 3126 3150 goto out_retry; 3127 - put_page(page); 3151 + folio_put(folio); 3128 3152 3129 3153 if (!error || error == AOP_TRUNCATED_PAGE) 3130 3154 goto retry_find; ··· 3138 3162 * re-find the vma and come back and find our hopefully still populated 3139 3163 * page. 3140 3164 */ 3141 - if (page) 3142 - put_page(page); 3165 + if (folio) 3166 + folio_put(folio); 3143 3167 if (mapping_locked) 3144 3168 filemap_invalidate_unlock_shared(mapping); 3145 3169 if (fpin) ··· 3181 3205 return false; 3182 3206 } 3183 3207 3184 - static struct page *next_uptodate_page(struct page *page, 3208 + static struct folio *next_uptodate_page(struct folio *folio, 3185 3209 struct address_space *mapping, 3186 3210 struct xa_state *xas, pgoff_t end_pgoff) 3187 3211 { 3188 3212 unsigned long max_idx; 3189 3213 3190 3214 do { 3191 - if (!page) 3215 + if (!folio) 3192 3216 return NULL; 3193 - if (xas_retry(xas, page)) 3217 + if (xas_retry(xas, folio)) 3194 3218 continue; 3195 - if (xa_is_value(page)) 3219 + if (xa_is_value(folio)) 3196 3220 continue; 3197 - if (PageLocked(page)) 3221 + if (folio_test_locked(folio)) 3198 3222 continue; 3199 - if (!page_cache_get_speculative(page)) 3223 + if (!folio_try_get_rcu(folio)) 3200 3224 continue; 3201 3225 /* Has the page moved or been split? */ 3202 - if (unlikely(page != xas_reload(xas))) 3226 + if (unlikely(folio != xas_reload(xas))) 3203 3227 goto skip; 3204 - if (!PageUptodate(page) || PageReadahead(page)) 3228 + if (!folio_test_uptodate(folio) || folio_test_readahead(folio)) 3205 3229 goto skip; 3206 - if (!trylock_page(page)) 3230 + if (!folio_trylock(folio)) 3207 3231 goto skip; 3208 - if (page->mapping != mapping) 3232 + if (folio->mapping != mapping) 3209 3233 goto unlock; 3210 - if (!PageUptodate(page)) 3234 + if (!folio_test_uptodate(folio)) 3211 3235 goto unlock; 3212 3236 max_idx = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE); 3213 3237 if (xas->xa_index >= max_idx) 3214 3238 goto unlock; 3215 - return page; 3239 + return folio; 3216 3240 unlock: 3217 - unlock_page(page); 3241 + folio_unlock(folio); 3218 3242 skip: 3219 - put_page(page); 3220 - } while ((page = xas_next_entry(xas, end_pgoff)) != NULL); 3243 + folio_put(folio); 3244 + } while ((folio = xas_next_entry(xas, end_pgoff)) != NULL); 3221 3245 3222 3246 return NULL; 3223 3247 } 3224 3248 3225 - static inline struct page *first_map_page(struct address_space *mapping, 3249 + static inline struct folio *first_map_page(struct address_space *mapping, 3226 3250 struct xa_state *xas, 3227 3251 pgoff_t end_pgoff) 3228 3252 { ··· 3230 3254 mapping, xas, end_pgoff); 3231 3255 } 3232 3256 3233 - static inline struct page *next_map_page(struct address_space *mapping, 3257 + static inline struct folio *next_map_page(struct address_space *mapping, 3234 3258 struct xa_state *xas, 3235 3259 pgoff_t end_pgoff) 3236 3260 { ··· 3247 3271 pgoff_t last_pgoff = start_pgoff; 3248 3272 unsigned long addr; 3249 3273 XA_STATE(xas, &mapping->i_pages, start_pgoff); 3250 - struct page *head, *page; 3274 + struct folio *folio; 3275 + struct page *page; 3251 3276 unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss); 3252 3277 vm_fault_t ret = 0; 3253 3278 3254 3279 rcu_read_lock(); 3255 - head = first_map_page(mapping, &xas, end_pgoff); 3256 - if (!head) 3280 + folio = first_map_page(mapping, &xas, end_pgoff); 3281 + if (!folio) 3257 3282 goto out; 3258 3283 3259 - if (filemap_map_pmd(vmf, head)) { 3284 + if (filemap_map_pmd(vmf, &folio->page)) { 3260 3285 ret = VM_FAULT_NOPAGE; 3261 3286 goto out; 3262 3287 } ··· 3265 3288 addr = vma->vm_start + ((start_pgoff - vma->vm_pgoff) << PAGE_SHIFT); 3266 3289 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, addr, &vmf->ptl); 3267 3290 do { 3268 - page = find_subpage(head, xas.xa_index); 3291 + again: 3292 + page = folio_file_page(folio, xas.xa_index); 3269 3293 if (PageHWPoison(page)) 3270 3294 goto unlock; 3271 3295 ··· 3287 3309 do_set_pte(vmf, page, addr); 3288 3310 /* no need to invalidate: a not-present page won't be cached */ 3289 3311 update_mmu_cache(vma, addr, vmf->pte); 3290 - unlock_page(head); 3312 + if (folio_more_pages(folio, xas.xa_index, end_pgoff)) { 3313 + xas.xa_index++; 3314 + folio_ref_inc(folio); 3315 + goto again; 3316 + } 3317 + folio_unlock(folio); 3291 3318 continue; 3292 3319 unlock: 3293 - unlock_page(head); 3294 - put_page(head); 3295 - } while ((head = next_map_page(mapping, &xas, end_pgoff)) != NULL); 3320 + if (folio_more_pages(folio, xas.xa_index, end_pgoff)) { 3321 + xas.xa_index++; 3322 + goto again; 3323 + } 3324 + folio_unlock(folio); 3325 + folio_put(folio); 3326 + } while ((folio = next_map_page(mapping, &xas, end_pgoff)) != NULL); 3296 3327 pte_unmap_unlock(vmf->pte, vmf->ptl); 3297 3328 out: 3298 3329 rcu_read_unlock(); ··· 3313 3326 vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf) 3314 3327 { 3315 3328 struct address_space *mapping = vmf->vma->vm_file->f_mapping; 3316 - struct page *page = vmf->page; 3329 + struct folio *folio = page_folio(vmf->page); 3317 3330 vm_fault_t ret = VM_FAULT_LOCKED; 3318 3331 3319 3332 sb_start_pagefault(mapping->host->i_sb); 3320 3333 file_update_time(vmf->vma->vm_file); 3321 - lock_page(page); 3322 - if (page->mapping != mapping) { 3323 - unlock_page(page); 3334 + folio_lock(folio); 3335 + if (folio->mapping != mapping) { 3336 + folio_unlock(folio); 3324 3337 ret = VM_FAULT_NOPAGE; 3325 3338 goto out; 3326 3339 } 3327 3340 /* 3328 - * We mark the page dirty already here so that when freeze is in 3341 + * We mark the folio dirty already here so that when freeze is in 3329 3342 * progress, we are guaranteed that writeback during freezing will 3330 - * see the dirty page and writeprotect it again. 3343 + * see the dirty folio and writeprotect it again. 3331 3344 */ 3332 - set_page_dirty(page); 3333 - wait_for_stable_page(page); 3345 + folio_mark_dirty(folio); 3346 + folio_wait_stable(folio); 3334 3347 out: 3335 3348 sb_end_pagefault(mapping->host->i_sb); 3336 3349 return ret; ··· 3383 3396 EXPORT_SYMBOL(generic_file_mmap); 3384 3397 EXPORT_SYMBOL(generic_file_readonly_mmap); 3385 3398 3386 - static struct page *wait_on_page_read(struct page *page) 3399 + static struct folio *do_read_cache_folio(struct address_space *mapping, 3400 + pgoff_t index, filler_t filler, void *data, gfp_t gfp) 3387 3401 { 3388 - if (!IS_ERR(page)) { 3389 - wait_on_page_locked(page); 3390 - if (!PageUptodate(page)) { 3391 - put_page(page); 3392 - page = ERR_PTR(-EIO); 3393 - } 3394 - } 3395 - return page; 3396 - } 3397 - 3398 - static struct page *do_read_cache_page(struct address_space *mapping, 3399 - pgoff_t index, 3400 - int (*filler)(void *, struct page *), 3401 - void *data, 3402 - gfp_t gfp) 3403 - { 3404 - struct page *page; 3402 + struct folio *folio; 3405 3403 int err; 3406 3404 repeat: 3407 - page = find_get_page(mapping, index); 3408 - if (!page) { 3409 - page = __page_cache_alloc(gfp); 3410 - if (!page) 3405 + folio = filemap_get_folio(mapping, index); 3406 + if (!folio) { 3407 + folio = filemap_alloc_folio(gfp, 0); 3408 + if (!folio) 3411 3409 return ERR_PTR(-ENOMEM); 3412 - err = add_to_page_cache_lru(page, mapping, index, gfp); 3410 + err = filemap_add_folio(mapping, folio, index, gfp); 3413 3411 if (unlikely(err)) { 3414 - put_page(page); 3412 + folio_put(folio); 3415 3413 if (err == -EEXIST) 3416 3414 goto repeat; 3417 3415 /* Presumably ENOMEM for xarray node */ ··· 3405 3433 3406 3434 filler: 3407 3435 if (filler) 3408 - err = filler(data, page); 3436 + err = filler(data, &folio->page); 3409 3437 else 3410 - err = mapping->a_ops->readpage(data, page); 3438 + err = mapping->a_ops->readpage(data, &folio->page); 3411 3439 3412 3440 if (err < 0) { 3413 - put_page(page); 3441 + folio_put(folio); 3414 3442 return ERR_PTR(err); 3415 3443 } 3416 3444 3417 - page = wait_on_page_read(page); 3418 - if (IS_ERR(page)) 3419 - return page; 3445 + folio_wait_locked(folio); 3446 + if (!folio_test_uptodate(folio)) { 3447 + folio_put(folio); 3448 + return ERR_PTR(-EIO); 3449 + } 3450 + 3420 3451 goto out; 3421 3452 } 3422 - if (PageUptodate(page)) 3453 + if (folio_test_uptodate(folio)) 3423 3454 goto out; 3424 3455 3425 - /* 3426 - * Page is not up to date and may be locked due to one of the following 3427 - * case a: Page is being filled and the page lock is held 3428 - * case b: Read/write error clearing the page uptodate status 3429 - * case c: Truncation in progress (page locked) 3430 - * case d: Reclaim in progress 3431 - * 3432 - * Case a, the page will be up to date when the page is unlocked. 3433 - * There is no need to serialise on the page lock here as the page 3434 - * is pinned so the lock gives no additional protection. Even if the 3435 - * page is truncated, the data is still valid if PageUptodate as 3436 - * it's a race vs truncate race. 3437 - * Case b, the page will not be up to date 3438 - * Case c, the page may be truncated but in itself, the data may still 3439 - * be valid after IO completes as it's a read vs truncate race. The 3440 - * operation must restart if the page is not uptodate on unlock but 3441 - * otherwise serialising on page lock to stabilise the mapping gives 3442 - * no additional guarantees to the caller as the page lock is 3443 - * released before return. 3444 - * Case d, similar to truncation. If reclaim holds the page lock, it 3445 - * will be a race with remove_mapping that determines if the mapping 3446 - * is valid on unlock but otherwise the data is valid and there is 3447 - * no need to serialise with page lock. 3448 - * 3449 - * As the page lock gives no additional guarantee, we optimistically 3450 - * wait on the page to be unlocked and check if it's up to date and 3451 - * use the page if it is. Otherwise, the page lock is required to 3452 - * distinguish between the different cases. The motivation is that we 3453 - * avoid spurious serialisations and wakeups when multiple processes 3454 - * wait on the same page for IO to complete. 3455 - */ 3456 - wait_on_page_locked(page); 3457 - if (PageUptodate(page)) 3458 - goto out; 3456 + if (!folio_trylock(folio)) { 3457 + folio_put_wait_locked(folio, TASK_UNINTERRUPTIBLE); 3458 + goto repeat; 3459 + } 3459 3460 3460 - /* Distinguish between all the cases under the safety of the lock */ 3461 - lock_page(page); 3462 - 3463 - /* Case c or d, restart the operation */ 3464 - if (!page->mapping) { 3465 - unlock_page(page); 3466 - put_page(page); 3461 + /* Folio was truncated from mapping */ 3462 + if (!folio->mapping) { 3463 + folio_unlock(folio); 3464 + folio_put(folio); 3467 3465 goto repeat; 3468 3466 } 3469 3467 3470 3468 /* Someone else locked and filled the page in a very small window */ 3471 - if (PageUptodate(page)) { 3472 - unlock_page(page); 3469 + if (folio_test_uptodate(folio)) { 3470 + folio_unlock(folio); 3473 3471 goto out; 3474 3472 } 3475 3473 ··· 3449 3507 * Clear page error before actual read, PG_error will be 3450 3508 * set again if read page fails. 3451 3509 */ 3452 - ClearPageError(page); 3510 + folio_clear_error(folio); 3453 3511 goto filler; 3454 3512 3455 3513 out: 3456 - mark_page_accessed(page); 3457 - return page; 3514 + folio_mark_accessed(folio); 3515 + return folio; 3458 3516 } 3459 3517 3460 3518 /** 3461 - * read_cache_page - read into page cache, fill it if needed 3519 + * read_cache_folio - read into page cache, fill it if needed 3462 3520 * @mapping: the page's address_space 3463 3521 * @index: the page index 3464 3522 * @filler: function to perform the read ··· 3473 3531 * 3474 3532 * Return: up to date page on success, ERR_PTR() on failure. 3475 3533 */ 3534 + struct folio *read_cache_folio(struct address_space *mapping, pgoff_t index, 3535 + filler_t filler, void *data) 3536 + { 3537 + return do_read_cache_folio(mapping, index, filler, data, 3538 + mapping_gfp_mask(mapping)); 3539 + } 3540 + EXPORT_SYMBOL(read_cache_folio); 3541 + 3542 + static struct page *do_read_cache_page(struct address_space *mapping, 3543 + pgoff_t index, filler_t *filler, void *data, gfp_t gfp) 3544 + { 3545 + struct folio *folio; 3546 + 3547 + folio = do_read_cache_folio(mapping, index, filler, data, gfp); 3548 + if (IS_ERR(folio)) 3549 + return &folio->page; 3550 + return folio_file_page(folio, index); 3551 + } 3552 + 3476 3553 struct page *read_cache_page(struct address_space *mapping, 3477 - pgoff_t index, 3478 - int (*filler)(void *, struct page *), 3479 - void *data) 3554 + pgoff_t index, filler_t *filler, void *data) 3480 3555 { 3481 3556 return do_read_cache_page(mapping, index, filler, data, 3482 3557 mapping_gfp_mask(mapping)); ··· 3853 3894 EXPORT_SYMBOL(generic_file_write_iter); 3854 3895 3855 3896 /** 3856 - * try_to_release_page() - release old fs-specific metadata on a page 3897 + * filemap_release_folio() - Release fs-specific metadata on a folio. 3898 + * @folio: The folio which the kernel is trying to free. 3899 + * @gfp: Memory allocation flags (and I/O mode). 3857 3900 * 3858 - * @page: the page which the kernel is trying to free 3859 - * @gfp_mask: memory allocation flags (and I/O mode) 3901 + * The address_space is trying to release any data attached to a folio 3902 + * (presumably at folio->private). 3860 3903 * 3861 - * The address_space is to try to release any data against the page 3862 - * (presumably at page->private). 3904 + * This will also be called if the private_2 flag is set on a page, 3905 + * indicating that the folio has other metadata associated with it. 3863 3906 * 3864 - * This may also be called if PG_fscache is set on a page, indicating that the 3865 - * page is known to the local caching routines. 3907 + * The @gfp argument specifies whether I/O may be performed to release 3908 + * this page (__GFP_IO), and whether the call may block 3909 + * (__GFP_RECLAIM & __GFP_FS). 3866 3910 * 3867 - * The @gfp_mask argument specifies whether I/O may be performed to release 3868 - * this page (__GFP_IO), and whether the call may block (__GFP_RECLAIM & __GFP_FS). 3869 - * 3870 - * Return: %1 if the release was successful, otherwise return zero. 3911 + * Return: %true if the release was successful, otherwise %false. 3871 3912 */ 3872 - int try_to_release_page(struct page *page, gfp_t gfp_mask) 3913 + bool filemap_release_folio(struct folio *folio, gfp_t gfp) 3873 3914 { 3874 - struct address_space * const mapping = page->mapping; 3915 + struct address_space * const mapping = folio->mapping; 3875 3916 3876 - BUG_ON(!PageLocked(page)); 3877 - if (PageWriteback(page)) 3878 - return 0; 3917 + BUG_ON(!folio_test_locked(folio)); 3918 + if (folio_test_writeback(folio)) 3919 + return false; 3879 3920 3880 3921 if (mapping && mapping->a_ops->releasepage) 3881 - return mapping->a_ops->releasepage(page, gfp_mask); 3882 - return try_to_free_buffers(page); 3922 + return mapping->a_ops->releasepage(&folio->page, gfp); 3923 + return try_to_free_buffers(&folio->page); 3883 3924 } 3884 - 3885 - EXPORT_SYMBOL(try_to_release_page); 3925 + EXPORT_SYMBOL(filemap_release_folio);
+11
mm/folio-compat.c
··· 140 140 mapping_gfp_mask(mapping)); 141 141 } 142 142 EXPORT_SYMBOL(grab_cache_page_write_begin); 143 + 144 + void delete_from_page_cache(struct page *page) 145 + { 146 + return filemap_remove_folio(page_folio(page)); 147 + } 148 + 149 + int try_to_release_page(struct page *page, gfp_t gfp) 150 + { 151 + return filemap_release_folio(page_folio(page), gfp); 152 + } 153 + EXPORT_SYMBOL(try_to_release_page);
+14 -4
mm/huge_memory.c
··· 2614 2614 { 2615 2615 struct page *head = compound_head(page); 2616 2616 struct deferred_split *ds_queue = get_deferred_split_queue(head); 2617 + XA_STATE(xas, &head->mapping->i_pages, head->index); 2617 2618 struct anon_vma *anon_vma = NULL; 2618 2619 struct address_space *mapping = NULL; 2619 2620 int extra_pins, ret; ··· 2653 2652 goto out; 2654 2653 } 2655 2654 2655 + xas_split_alloc(&xas, head, compound_order(head), 2656 + mapping_gfp_mask(mapping) & GFP_RECLAIM_MASK); 2657 + if (xas_error(&xas)) { 2658 + ret = xas_error(&xas); 2659 + goto out; 2660 + } 2661 + 2656 2662 anon_vma = NULL; 2657 2663 i_mmap_lock_read(mapping); 2658 2664 ··· 2689 2681 /* block interrupt reentry in xa_lock and spinlock */ 2690 2682 local_irq_disable(); 2691 2683 if (mapping) { 2692 - XA_STATE(xas, &mapping->i_pages, page_index(head)); 2693 - 2694 2684 /* 2695 2685 * Check if the head page is present in page cache. 2696 2686 * We assume all tail are present too, if head is there. 2697 2687 */ 2698 - xa_lock(&mapping->i_pages); 2688 + xas_lock(&xas); 2689 + xas_reset(&xas); 2699 2690 if (xas_load(&xas) != head) 2700 2691 goto fail; 2701 2692 } ··· 2710 2703 if (mapping) { 2711 2704 int nr = thp_nr_pages(head); 2712 2705 2706 + xas_split(&xas, head, thp_order(head)); 2713 2707 if (PageSwapBacked(head)) { 2714 2708 __mod_lruvec_page_state(head, NR_SHMEM_THPS, 2715 2709 -nr); ··· 2727 2719 spin_unlock(&ds_queue->split_queue_lock); 2728 2720 fail: 2729 2721 if (mapping) 2730 - xa_unlock(&mapping->i_pages); 2722 + xas_unlock(&xas); 2731 2723 local_irq_enable(); 2732 2724 remap_page(head, thp_nr_pages(head)); 2733 2725 ret = -EBUSY; ··· 2741 2733 if (mapping) 2742 2734 i_mmap_unlock_read(mapping); 2743 2735 out: 2736 + /* Free any memory we didn't use */ 2737 + xas_nomem(&xas, 0); 2744 2738 count_vm_event(!ret ? THP_SPLIT_PAGE : THP_SPLIT_PAGE_FAILED); 2745 2739 return ret; 2746 2740 }
+12 -2
mm/internal.h
··· 12 12 #include <linux/pagemap.h> 13 13 #include <linux/tracepoint-defs.h> 14 14 15 + struct folio_batch; 16 + 15 17 /* 16 18 * The set of flags that only affect watermark checking and reclaim 17 19 * behaviour. This is used by the MM to obey the caller constraints ··· 76 74 return !(vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP)); 77 75 } 78 76 77 + struct zap_details; 79 78 void unmap_page_range(struct mmu_gather *tlb, 80 79 struct vm_area_struct *vma, 81 80 unsigned long addr, unsigned long end, ··· 93 90 } 94 91 95 92 unsigned find_lock_entries(struct address_space *mapping, pgoff_t start, 96 - pgoff_t end, struct pagevec *pvec, pgoff_t *indices); 93 + pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices); 94 + unsigned find_get_entries(struct address_space *mapping, pgoff_t start, 95 + pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices); 96 + void filemap_free_folio(struct address_space *mapping, struct folio *folio); 97 + int truncate_inode_folio(struct address_space *mapping, struct folio *folio); 98 + bool truncate_inode_partial_folio(struct folio *folio, loff_t start, 99 + loff_t end); 97 100 98 101 /** 99 102 * folio_evictable - Test whether a folio is evictable. ··· 397 388 void __vma_unlink_list(struct mm_struct *mm, struct vm_area_struct *vma); 398 389 399 390 #ifdef CONFIG_MMU 391 + void unmap_mapping_folio(struct folio *folio); 400 392 extern long populate_vma_page_range(struct vm_area_struct *vma, 401 393 unsigned long start, unsigned long end, int *locked); 402 394 extern long faultin_vma_page_range(struct vm_area_struct *vma, ··· 501 491 } 502 492 return fpin; 503 493 } 504 - 505 494 #else /* !CONFIG_MMU */ 495 + static inline void unmap_mapping_folio(struct folio *folio) { } 506 496 static inline void clear_page_mlock(struct page *page) { } 507 497 static inline void mlock_vma_page(struct page *page) { } 508 498 static inline void vunmap_range_noflush(unsigned long start, unsigned long end)
+11 -1
mm/khugepaged.c
··· 1667 1667 } 1668 1668 count_memcg_page_event(new_page, THP_COLLAPSE_ALLOC); 1669 1669 1670 - /* This will be less messy when we use multi-index entries */ 1670 + /* 1671 + * Ensure we have slots for all the pages in the range. This is 1672 + * almost certainly a no-op because most of the pages must be present 1673 + */ 1671 1674 do { 1672 1675 xas_lock_irq(&xas); 1673 1676 xas_create_range(&xas); ··· 1895 1892 __mod_lruvec_page_state(new_page, NR_SHMEM, nr_none); 1896 1893 } 1897 1894 1895 + /* Join all the small entries into a single multi-index entry */ 1896 + xas_set_order(&xas, start, HPAGE_PMD_ORDER); 1897 + xas_store(&xas, new_page); 1898 1898 xa_locked: 1899 1899 xas_unlock_irq(&xas); 1900 1900 xa_unlocked: ··· 2019 2013 continue; 2020 2014 } 2021 2015 2016 + /* 2017 + * XXX: khugepaged should compact smaller compound pages 2018 + * into a PMD sized page 2019 + */ 2022 2020 if (PageTransCompound(page)) { 2023 2021 result = SCAN_PAGE_COMPOUND; 2024 2022 break;
+35 -14
mm/memory.c
··· 1304 1304 return ret; 1305 1305 } 1306 1306 1307 + /* 1308 + * Parameter block passed down to zap_pte_range in exceptional cases. 1309 + */ 1310 + struct zap_details { 1311 + struct address_space *zap_mapping; /* Check page->mapping if set */ 1312 + struct folio *single_folio; /* Locked folio to be unmapped */ 1313 + }; 1314 + 1315 + /* 1316 + * We set details->zap_mapping when we want to unmap shared but keep private 1317 + * pages. Return true if skip zapping this page, false otherwise. 1318 + */ 1319 + static inline bool 1320 + zap_skip_check_mapping(struct zap_details *details, struct page *page) 1321 + { 1322 + if (!details || !page) 1323 + return false; 1324 + 1325 + return details->zap_mapping && 1326 + (details->zap_mapping != page_rmapping(page)); 1327 + } 1328 + 1307 1329 static unsigned long zap_pte_range(struct mmu_gather *tlb, 1308 1330 struct vm_area_struct *vma, pmd_t *pmd, 1309 1331 unsigned long addr, unsigned long end, ··· 1465 1443 else if (zap_huge_pmd(tlb, vma, pmd, addr)) 1466 1444 goto next; 1467 1445 /* fall through */ 1468 - } else if (details && details->single_page && 1469 - PageTransCompound(details->single_page) && 1446 + } else if (details && details->single_folio && 1447 + folio_test_pmd_mappable(details->single_folio) && 1470 1448 next - addr == HPAGE_PMD_SIZE && pmd_none(*pmd)) { 1471 1449 spinlock_t *ptl = pmd_lock(tlb->mm, pmd); 1472 1450 /* ··· 3354 3332 } 3355 3333 3356 3334 /** 3357 - * unmap_mapping_page() - Unmap single page from processes. 3358 - * @page: The locked page to be unmapped. 3335 + * unmap_mapping_folio() - Unmap single folio from processes. 3336 + * @folio: The locked folio to be unmapped. 3359 3337 * 3360 - * Unmap this page from any userspace process which still has it mmaped. 3338 + * Unmap this folio from any userspace process which still has it mmaped. 3361 3339 * Typically, for efficiency, the range of nearby pages has already been 3362 3340 * unmapped by unmap_mapping_pages() or unmap_mapping_range(). But once 3363 - * truncation or invalidation holds the lock on a page, it may find that 3364 - * the page has been remapped again: and then uses unmap_mapping_page() 3341 + * truncation or invalidation holds the lock on a folio, it may find that 3342 + * the page has been remapped again: and then uses unmap_mapping_folio() 3365 3343 * to unmap it finally. 3366 3344 */ 3367 - void unmap_mapping_page(struct page *page) 3345 + void unmap_mapping_folio(struct folio *folio) 3368 3346 { 3369 - struct address_space *mapping = page->mapping; 3347 + struct address_space *mapping = folio->mapping; 3370 3348 struct zap_details details = { }; 3371 3349 pgoff_t first_index; 3372 3350 pgoff_t last_index; 3373 3351 3374 - VM_BUG_ON(!PageLocked(page)); 3375 - VM_BUG_ON(PageTail(page)); 3352 + VM_BUG_ON(!folio_test_locked(folio)); 3376 3353 3377 - first_index = page->index; 3378 - last_index = page->index + thp_nr_pages(page) - 1; 3354 + first_index = folio->index; 3355 + last_index = folio->index + folio_nr_pages(folio) - 1; 3379 3356 3380 3357 details.zap_mapping = mapping; 3381 - details.single_page = page; 3358 + details.single_folio = folio; 3382 3359 3383 3360 i_mmap_lock_write(mapping); 3384 3361 if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
+10 -19
mm/migrate.c
··· 291 291 { 292 292 pte_t pte; 293 293 swp_entry_t entry; 294 - struct page *page; 294 + struct folio *folio; 295 295 296 296 spin_lock(ptl); 297 297 pte = *ptep; ··· 302 302 if (!is_migration_entry(entry)) 303 303 goto out; 304 304 305 - page = pfn_swap_entry_to_page(entry); 306 - page = compound_head(page); 305 + folio = page_folio(pfn_swap_entry_to_page(entry)); 307 306 308 307 /* 309 308 * Once page cache replacement of page migration started, page_count 310 - * is zero; but we must not call put_and_wait_on_page_locked() without 311 - * a ref. Use get_page_unless_zero(), and just fault again if it fails. 309 + * is zero; but we must not call folio_put_wait_locked() without 310 + * a ref. Use folio_try_get(), and just fault again if it fails. 312 311 */ 313 - if (!get_page_unless_zero(page)) 312 + if (!folio_try_get(folio)) 314 313 goto out; 315 314 pte_unmap_unlock(ptep, ptl); 316 - put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE); 315 + folio_put_wait_locked(folio, TASK_UNINTERRUPTIBLE); 317 316 return; 318 317 out: 319 318 pte_unmap_unlock(ptep, ptl); ··· 337 338 void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd) 338 339 { 339 340 spinlock_t *ptl; 340 - struct page *page; 341 + struct folio *folio; 341 342 342 343 ptl = pmd_lock(mm, pmd); 343 344 if (!is_pmd_migration_entry(*pmd)) 344 345 goto unlock; 345 - page = pfn_swap_entry_to_page(pmd_to_swp_entry(*pmd)); 346 - if (!get_page_unless_zero(page)) 346 + folio = page_folio(pfn_swap_entry_to_page(pmd_to_swp_entry(*pmd))); 347 + if (!folio_try_get(folio)) 347 348 goto unlock; 348 349 spin_unlock(ptl); 349 - put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE); 350 + folio_put_wait_locked(folio, TASK_UNINTERRUPTIBLE); 350 351 return; 351 352 unlock: 352 353 spin_unlock(ptl); ··· 433 434 } 434 435 435 436 xas_store(&xas, newfolio); 436 - if (nr > 1) { 437 - int i; 438 - 439 - for (i = 1; i < nr; i++) { 440 - xas_next(&xas); 441 - xas_store(&xas, newfolio); 442 - } 443 - } 444 437 445 438 /* 446 439 * Drop cache reference from old page by unfreezing
+5 -1
mm/page-writeback.c
··· 2496 2496 * If warn is true, then emit a warning if the folio is not uptodate and has 2497 2497 * not been truncated. 2498 2498 * 2499 - * The caller must hold lock_page_memcg(). 2499 + * The caller must hold lock_page_memcg(). Most callers have the folio 2500 + * locked. A few have the folio blocked from truncation through other 2501 + * means (eg zap_page_range() has it mapped and is holding the page table 2502 + * lock). This can also be called from mark_buffer_dirty(), which I 2503 + * cannot prove is always protected against truncate. 2500 2504 */ 2501 2505 void __folio_mark_dirty(struct folio *folio, struct address_space *mapping, 2502 2506 int warn)
+12 -12
mm/readahead.c
··· 196 196 * Preallocate as many pages as we will need. 197 197 */ 198 198 for (i = 0; i < nr_to_read; i++) { 199 - struct page *page = xa_load(&mapping->i_pages, index + i); 199 + struct folio *folio = xa_load(&mapping->i_pages, index + i); 200 200 201 - if (page && !xa_is_value(page)) { 201 + if (folio && !xa_is_value(folio)) { 202 202 /* 203 203 * Page already present? Kick off the current batch 204 204 * of contiguous pages before continuing with the ··· 212 212 continue; 213 213 } 214 214 215 - page = __page_cache_alloc(gfp_mask); 216 - if (!page) 215 + folio = filemap_alloc_folio(gfp_mask, 0); 216 + if (!folio) 217 217 break; 218 218 if (mapping->a_ops->readpages) { 219 - page->index = index + i; 220 - list_add(&page->lru, &page_pool); 221 - } else if (add_to_page_cache_lru(page, mapping, index + i, 219 + folio->index = index + i; 220 + list_add(&folio->lru, &page_pool); 221 + } else if (filemap_add_folio(mapping, folio, index + i, 222 222 gfp_mask) < 0) { 223 - put_page(page); 223 + folio_put(folio); 224 224 read_pages(ractl, &page_pool, true); 225 225 i = ractl->_index + ractl->_nr_pages - index - 1; 226 226 continue; 227 227 } 228 228 if (i == nr_to_read - lookahead_size) 229 - SetPageReadahead(page); 229 + folio_set_readahead(folio); 230 230 ractl->_nr_pages++; 231 231 } 232 232 ··· 581 581 EXPORT_SYMBOL_GPL(page_cache_sync_ra); 582 582 583 583 void page_cache_async_ra(struct readahead_control *ractl, 584 - struct page *page, unsigned long req_count) 584 + struct folio *folio, unsigned long req_count) 585 585 { 586 586 /* no read-ahead */ 587 587 if (!ractl->ra->ra_pages) ··· 590 590 /* 591 591 * Same bit is used for PG_readahead and PG_reclaim. 592 592 */ 593 - if (PageWriteback(page)) 593 + if (folio_test_writeback(folio)) 594 594 return; 595 595 596 - ClearPageReadahead(page); 596 + folio_clear_readahead(folio); 597 597 598 598 /* 599 599 * Defer asynchronous read-ahead on IO congestion.
+78 -96
mm/shmem.c
··· 694 694 struct mm_struct *charge_mm) 695 695 { 696 696 XA_STATE_ORDER(xas, &mapping->i_pages, index, compound_order(page)); 697 - unsigned long i = 0; 698 697 unsigned long nr = compound_nr(page); 699 698 int error; 700 699 ··· 720 721 cgroup_throttle_swaprate(page, gfp); 721 722 722 723 do { 723 - void *entry; 724 724 xas_lock_irq(&xas); 725 - entry = xas_find_conflict(&xas); 726 - if (entry != expected) 725 + if (expected != xas_find_conflict(&xas)) { 727 726 xas_set_err(&xas, -EEXIST); 728 - xas_create_range(&xas); 727 + goto unlock; 728 + } 729 + if (expected && xas_find_conflict(&xas)) { 730 + xas_set_err(&xas, -EEXIST); 731 + goto unlock; 732 + } 733 + xas_store(&xas, page); 729 734 if (xas_error(&xas)) 730 735 goto unlock; 731 - next: 732 - xas_store(&xas, page); 733 - if (++i < nr) { 734 - xas_next(&xas); 735 - goto next; 736 - } 737 736 if (PageTransHuge(page)) { 738 737 count_vm_event(THP_FILE_ALLOC); 739 738 __mod_lruvec_page_state(page, NR_SHMEM_THPS, nr); ··· 877 880 } 878 881 } 879 882 880 - /* 881 - * Check whether a hole-punch or truncation needs to split a huge page, 882 - * returning true if no split was required, or the split has been successful. 883 - * 884 - * Eviction (or truncation to 0 size) should never need to split a huge page; 885 - * but in rare cases might do so, if shmem_undo_range() failed to trylock on 886 - * head, and then succeeded to trylock on tail. 887 - * 888 - * A split can only succeed when there are no additional references on the 889 - * huge page: so the split below relies upon find_get_entries() having stopped 890 - * when it found a subpage of the huge page, without getting further references. 891 - */ 892 - static bool shmem_punch_compound(struct page *page, pgoff_t start, pgoff_t end) 883 + static struct folio *shmem_get_partial_folio(struct inode *inode, pgoff_t index) 893 884 { 894 - if (!PageTransCompound(page)) 895 - return true; 885 + struct folio *folio; 886 + struct page *page; 896 887 897 - /* Just proceed to delete a huge page wholly within the range punched */ 898 - if (PageHead(page) && 899 - page->index >= start && page->index + HPAGE_PMD_NR <= end) 900 - return true; 901 - 902 - /* Try to split huge page, so we can truly punch the hole or truncate */ 903 - return split_huge_page(page) >= 0; 888 + /* 889 + * At first avoid shmem_getpage(,,,SGP_READ): that fails 890 + * beyond i_size, and reports fallocated pages as holes. 891 + */ 892 + folio = __filemap_get_folio(inode->i_mapping, index, 893 + FGP_ENTRY | FGP_LOCK, 0); 894 + if (!xa_is_value(folio)) 895 + return folio; 896 + /* 897 + * But read a page back from swap if any of it is within i_size 898 + * (although in some cases this is just a waste of time). 899 + */ 900 + page = NULL; 901 + shmem_getpage(inode, index, &page, SGP_READ); 902 + return page ? page_folio(page) : NULL; 904 903 } 905 904 906 905 /* ··· 910 917 struct shmem_inode_info *info = SHMEM_I(inode); 911 918 pgoff_t start = (lstart + PAGE_SIZE - 1) >> PAGE_SHIFT; 912 919 pgoff_t end = (lend + 1) >> PAGE_SHIFT; 913 - unsigned int partial_start = lstart & (PAGE_SIZE - 1); 914 - unsigned int partial_end = (lend + 1) & (PAGE_SIZE - 1); 915 - struct pagevec pvec; 920 + struct folio_batch fbatch; 916 921 pgoff_t indices[PAGEVEC_SIZE]; 922 + struct folio *folio; 923 + bool same_folio; 917 924 long nr_swaps_freed = 0; 918 925 pgoff_t index; 919 926 int i; ··· 924 931 if (info->fallocend > start && info->fallocend <= end && !unfalloc) 925 932 info->fallocend = start; 926 933 927 - pagevec_init(&pvec); 934 + folio_batch_init(&fbatch); 928 935 index = start; 929 936 while (index < end && find_lock_entries(mapping, index, end - 1, 930 - &pvec, indices)) { 931 - for (i = 0; i < pagevec_count(&pvec); i++) { 932 - struct page *page = pvec.pages[i]; 937 + &fbatch, indices)) { 938 + for (i = 0; i < folio_batch_count(&fbatch); i++) { 939 + folio = fbatch.folios[i]; 933 940 934 941 index = indices[i]; 935 942 936 - if (xa_is_value(page)) { 943 + if (xa_is_value(folio)) { 937 944 if (unfalloc) 938 945 continue; 939 946 nr_swaps_freed += !shmem_free_swap(mapping, 940 - index, page); 947 + index, folio); 941 948 continue; 942 949 } 943 - index += thp_nr_pages(page) - 1; 950 + index += folio_nr_pages(folio) - 1; 944 951 945 - if (!unfalloc || !PageUptodate(page)) 946 - truncate_inode_page(mapping, page); 947 - unlock_page(page); 952 + if (!unfalloc || !folio_test_uptodate(folio)) 953 + truncate_inode_folio(mapping, folio); 954 + folio_unlock(folio); 948 955 } 949 - pagevec_remove_exceptionals(&pvec); 950 - pagevec_release(&pvec); 956 + folio_batch_remove_exceptionals(&fbatch); 957 + folio_batch_release(&fbatch); 951 958 cond_resched(); 952 959 index++; 953 960 } 954 961 955 - if (partial_start) { 956 - struct page *page = NULL; 957 - shmem_getpage(inode, start - 1, &page, SGP_READ); 958 - if (page) { 959 - unsigned int top = PAGE_SIZE; 960 - if (start > end) { 961 - top = partial_end; 962 - partial_end = 0; 963 - } 964 - zero_user_segment(page, partial_start, top); 965 - set_page_dirty(page); 966 - unlock_page(page); 967 - put_page(page); 962 + same_folio = (lstart >> PAGE_SHIFT) == (lend >> PAGE_SHIFT); 963 + folio = shmem_get_partial_folio(inode, lstart >> PAGE_SHIFT); 964 + if (folio) { 965 + same_folio = lend < folio_pos(folio) + folio_size(folio); 966 + folio_mark_dirty(folio); 967 + if (!truncate_inode_partial_folio(folio, lstart, lend)) { 968 + start = folio->index + folio_nr_pages(folio); 969 + if (same_folio) 970 + end = folio->index; 968 971 } 972 + folio_unlock(folio); 973 + folio_put(folio); 974 + folio = NULL; 969 975 } 970 - if (partial_end) { 971 - struct page *page = NULL; 972 - shmem_getpage(inode, end, &page, SGP_READ); 973 - if (page) { 974 - zero_user_segment(page, 0, partial_end); 975 - set_page_dirty(page); 976 - unlock_page(page); 977 - put_page(page); 978 - } 976 + 977 + if (!same_folio) 978 + folio = shmem_get_partial_folio(inode, lend >> PAGE_SHIFT); 979 + if (folio) { 980 + folio_mark_dirty(folio); 981 + if (!truncate_inode_partial_folio(folio, lstart, lend)) 982 + end = folio->index; 983 + folio_unlock(folio); 984 + folio_put(folio); 979 985 } 980 - if (start >= end) 981 - return; 982 986 983 987 index = start; 984 988 while (index < end) { 985 989 cond_resched(); 986 990 987 - if (!find_get_entries(mapping, index, end - 1, &pvec, 991 + if (!find_get_entries(mapping, index, end - 1, &fbatch, 988 992 indices)) { 989 993 /* If all gone or hole-punch or unfalloc, we're done */ 990 994 if (index == start || end != -1) ··· 990 1000 index = start; 991 1001 continue; 992 1002 } 993 - for (i = 0; i < pagevec_count(&pvec); i++) { 994 - struct page *page = pvec.pages[i]; 1003 + for (i = 0; i < folio_batch_count(&fbatch); i++) { 1004 + folio = fbatch.folios[i]; 995 1005 996 1006 index = indices[i]; 997 - if (xa_is_value(page)) { 1007 + if (xa_is_value(folio)) { 998 1008 if (unfalloc) 999 1009 continue; 1000 - if (shmem_free_swap(mapping, index, page)) { 1010 + if (shmem_free_swap(mapping, index, folio)) { 1001 1011 /* Swap was replaced by page: retry */ 1002 1012 index--; 1003 1013 break; ··· 1006 1016 continue; 1007 1017 } 1008 1018 1009 - lock_page(page); 1019 + folio_lock(folio); 1010 1020 1011 - if (!unfalloc || !PageUptodate(page)) { 1012 - if (page_mapping(page) != mapping) { 1021 + if (!unfalloc || !folio_test_uptodate(folio)) { 1022 + if (folio_mapping(folio) != mapping) { 1013 1023 /* Page was replaced by swap: retry */ 1014 - unlock_page(page); 1024 + folio_unlock(folio); 1015 1025 index--; 1016 1026 break; 1017 1027 } 1018 - VM_BUG_ON_PAGE(PageWriteback(page), page); 1019 - if (shmem_punch_compound(page, start, end)) 1020 - truncate_inode_page(mapping, page); 1021 - else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { 1022 - /* Wipe the page and don't get stuck */ 1023 - clear_highpage(page); 1024 - flush_dcache_page(page); 1025 - set_page_dirty(page); 1026 - if (index < 1027 - round_up(start, HPAGE_PMD_NR)) 1028 - start = index + 1; 1029 - } 1028 + VM_BUG_ON_FOLIO(folio_test_writeback(folio), 1029 + folio); 1030 + truncate_inode_folio(mapping, folio); 1030 1031 } 1031 - unlock_page(page); 1032 + index = folio->index + folio_nr_pages(folio) - 1; 1033 + folio_unlock(folio); 1032 1034 } 1033 - pagevec_remove_exceptionals(&pvec); 1034 - pagevec_release(&pvec); 1035 + folio_batch_remove_exceptionals(&fbatch); 1036 + folio_batch_release(&fbatch); 1035 1037 index++; 1036 1038 } 1037 1039
+13 -13
mm/swap.c
··· 1077 1077 } 1078 1078 1079 1079 /** 1080 - * pagevec_remove_exceptionals - pagevec exceptionals pruning 1081 - * @pvec: The pagevec to prune 1080 + * folio_batch_remove_exceptionals() - Prune non-folios from a batch. 1081 + * @fbatch: The batch to prune 1082 1082 * 1083 - * find_get_entries() fills both pages and XArray value entries (aka 1084 - * exceptional entries) into the pagevec. This function prunes all 1085 - * exceptionals from @pvec without leaving holes, so that it can be 1086 - * passed on to page-only pagevec operations. 1083 + * find_get_entries() fills a batch with both folios and shadow/swap/DAX 1084 + * entries. This function prunes all the non-folio entries from @fbatch 1085 + * without leaving holes, so that it can be passed on to folio-only batch 1086 + * operations. 1087 1087 */ 1088 - void pagevec_remove_exceptionals(struct pagevec *pvec) 1088 + void folio_batch_remove_exceptionals(struct folio_batch *fbatch) 1089 1089 { 1090 - int i, j; 1090 + unsigned int i, j; 1091 1091 1092 - for (i = 0, j = 0; i < pagevec_count(pvec); i++) { 1093 - struct page *page = pvec->pages[i]; 1094 - if (!xa_is_value(page)) 1095 - pvec->pages[j++] = page; 1092 + for (i = 0, j = 0; i < folio_batch_count(fbatch); i++) { 1093 + struct folio *folio = fbatch->folios[i]; 1094 + if (!xa_is_value(folio)) 1095 + fbatch->folios[j++] = folio; 1096 1096 } 1097 - pvec->nr = j; 1097 + fbatch->nr = j; 1098 1098 } 1099 1099 1100 1100 /**
+168 -136
mm/truncate.c
··· 56 56 57 57 /* 58 58 * Unconditionally remove exceptional entries. Usually called from truncate 59 - * path. Note that the pagevec may be altered by this function by removing 60 - * exceptional entries similar to what pagevec_remove_exceptionals does. 59 + * path. Note that the folio_batch may be altered by this function by removing 60 + * exceptional entries similar to what folio_batch_remove_exceptionals() does. 61 61 */ 62 - static void truncate_exceptional_pvec_entries(struct address_space *mapping, 63 - struct pagevec *pvec, pgoff_t *indices) 62 + static void truncate_folio_batch_exceptionals(struct address_space *mapping, 63 + struct folio_batch *fbatch, pgoff_t *indices) 64 64 { 65 65 int i, j; 66 66 bool dax; ··· 69 69 if (shmem_mapping(mapping)) 70 70 return; 71 71 72 - for (j = 0; j < pagevec_count(pvec); j++) 73 - if (xa_is_value(pvec->pages[j])) 72 + for (j = 0; j < folio_batch_count(fbatch); j++) 73 + if (xa_is_value(fbatch->folios[j])) 74 74 break; 75 75 76 - if (j == pagevec_count(pvec)) 76 + if (j == folio_batch_count(fbatch)) 77 77 return; 78 78 79 79 dax = dax_mapping(mapping); ··· 82 82 xa_lock_irq(&mapping->i_pages); 83 83 } 84 84 85 - for (i = j; i < pagevec_count(pvec); i++) { 86 - struct page *page = pvec->pages[i]; 85 + for (i = j; i < folio_batch_count(fbatch); i++) { 86 + struct folio *folio = fbatch->folios[i]; 87 87 pgoff_t index = indices[i]; 88 88 89 - if (!xa_is_value(page)) { 90 - pvec->pages[j++] = page; 89 + if (!xa_is_value(folio)) { 90 + fbatch->folios[j++] = folio; 91 91 continue; 92 92 } 93 93 ··· 96 96 continue; 97 97 } 98 98 99 - __clear_shadow_entry(mapping, index, page); 99 + __clear_shadow_entry(mapping, index, folio); 100 100 } 101 101 102 102 if (!dax) { ··· 105 105 inode_add_lru(mapping->host); 106 106 spin_unlock(&mapping->host->i_lock); 107 107 } 108 - pvec->nr = j; 108 + fbatch->nr = j; 109 109 } 110 110 111 111 /* ··· 177 177 * its lock, b) when a concurrent invalidate_mapping_pages got there first and 178 178 * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space. 179 179 */ 180 - static void truncate_cleanup_page(struct page *page) 180 + static void truncate_cleanup_folio(struct folio *folio) 181 181 { 182 - if (page_mapped(page)) 183 - unmap_mapping_page(page); 182 + if (folio_mapped(folio)) 183 + unmap_mapping_folio(folio); 184 184 185 - if (page_has_private(page)) 186 - do_invalidatepage(page, 0, thp_size(page)); 185 + if (folio_has_private(folio)) 186 + do_invalidatepage(&folio->page, 0, folio_size(folio)); 187 187 188 188 /* 189 189 * Some filesystems seem to re-dirty the page even after 190 190 * the VM has canceled the dirty bit (eg ext3 journaling). 191 191 * Hence dirty accounting check is placed after invalidation. 192 192 */ 193 - cancel_dirty_page(page); 194 - ClearPageMappedToDisk(page); 193 + folio_cancel_dirty(folio); 194 + folio_clear_mappedtodisk(folio); 195 195 } 196 196 197 197 /* ··· 218 218 return ret; 219 219 } 220 220 221 - int truncate_inode_page(struct address_space *mapping, struct page *page) 221 + int truncate_inode_folio(struct address_space *mapping, struct folio *folio) 222 222 { 223 - VM_BUG_ON_PAGE(PageTail(page), page); 224 - 225 - if (page->mapping != mapping) 223 + if (folio->mapping != mapping) 226 224 return -EIO; 227 225 228 - truncate_cleanup_page(page); 229 - delete_from_page_cache(page); 226 + truncate_cleanup_folio(folio); 227 + filemap_remove_folio(folio); 230 228 return 0; 229 + } 230 + 231 + /* 232 + * Handle partial folios. The folio may be entirely within the 233 + * range if a split has raced with us. If not, we zero the part of the 234 + * folio that's within the [start, end] range, and then split the folio if 235 + * it's large. split_page_range() will discard pages which now lie beyond 236 + * i_size, and we rely on the caller to discard pages which lie within a 237 + * newly created hole. 238 + * 239 + * Returns false if splitting failed so the caller can avoid 240 + * discarding the entire folio which is stubbornly unsplit. 241 + */ 242 + bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) 243 + { 244 + loff_t pos = folio_pos(folio); 245 + unsigned int offset, length; 246 + 247 + if (pos < start) 248 + offset = start - pos; 249 + else 250 + offset = 0; 251 + length = folio_size(folio); 252 + if (pos + length <= (u64)end) 253 + length = length - offset; 254 + else 255 + length = end + 1 - pos - offset; 256 + 257 + folio_wait_writeback(folio); 258 + if (length == folio_size(folio)) { 259 + truncate_inode_folio(folio->mapping, folio); 260 + return true; 261 + } 262 + 263 + /* 264 + * We may be zeroing pages we're about to discard, but it avoids 265 + * doing a complex calculation here, and then doing the zeroing 266 + * anyway if the page split fails. 267 + */ 268 + folio_zero_range(folio, offset, length); 269 + 270 + cleancache_invalidate_page(folio->mapping, &folio->page); 271 + if (folio_has_private(folio)) 272 + do_invalidatepage(&folio->page, offset, length); 273 + if (!folio_test_large(folio)) 274 + return true; 275 + if (split_huge_page(&folio->page) == 0) 276 + return true; 277 + if (folio_test_dirty(folio)) 278 + return false; 279 + truncate_inode_folio(folio->mapping, folio); 280 + return true; 231 281 } 232 282 233 283 /* ··· 285 235 */ 286 236 int generic_error_remove_page(struct address_space *mapping, struct page *page) 287 237 { 238 + VM_BUG_ON_PAGE(PageTail(page), page); 239 + 288 240 if (!mapping) 289 241 return -EINVAL; 290 242 /* ··· 295 243 */ 296 244 if (!S_ISREG(mapping->host->i_mode)) 297 245 return -EIO; 298 - return truncate_inode_page(mapping, page); 246 + return truncate_inode_folio(mapping, page_folio(page)); 299 247 } 300 248 EXPORT_SYMBOL(generic_error_remove_page); 301 249 ··· 346 294 { 347 295 pgoff_t start; /* inclusive */ 348 296 pgoff_t end; /* exclusive */ 349 - unsigned int partial_start; /* inclusive */ 350 - unsigned int partial_end; /* exclusive */ 351 - struct pagevec pvec; 297 + struct folio_batch fbatch; 352 298 pgoff_t indices[PAGEVEC_SIZE]; 353 299 pgoff_t index; 354 300 int i; 301 + struct folio *folio; 302 + bool same_folio; 355 303 356 304 if (mapping_empty(mapping)) 357 305 goto out; 358 - 359 - /* Offsets within partial pages */ 360 - partial_start = lstart & (PAGE_SIZE - 1); 361 - partial_end = (lend + 1) & (PAGE_SIZE - 1); 362 306 363 307 /* 364 308 * 'start' and 'end' always covers the range of pages to be fully ··· 373 325 else 374 326 end = (lend + 1) >> PAGE_SHIFT; 375 327 376 - pagevec_init(&pvec); 328 + folio_batch_init(&fbatch); 377 329 index = start; 378 330 while (index < end && find_lock_entries(mapping, index, end - 1, 379 - &pvec, indices)) { 380 - index = indices[pagevec_count(&pvec) - 1] + 1; 381 - truncate_exceptional_pvec_entries(mapping, &pvec, indices); 382 - for (i = 0; i < pagevec_count(&pvec); i++) 383 - truncate_cleanup_page(pvec.pages[i]); 384 - delete_from_page_cache_batch(mapping, &pvec); 385 - for (i = 0; i < pagevec_count(&pvec); i++) 386 - unlock_page(pvec.pages[i]); 387 - pagevec_release(&pvec); 331 + &fbatch, indices)) { 332 + index = indices[folio_batch_count(&fbatch) - 1] + 1; 333 + truncate_folio_batch_exceptionals(mapping, &fbatch, indices); 334 + for (i = 0; i < folio_batch_count(&fbatch); i++) 335 + truncate_cleanup_folio(fbatch.folios[i]); 336 + delete_from_page_cache_batch(mapping, &fbatch); 337 + for (i = 0; i < folio_batch_count(&fbatch); i++) 338 + folio_unlock(fbatch.folios[i]); 339 + folio_batch_release(&fbatch); 388 340 cond_resched(); 389 341 } 390 342 391 - if (partial_start) { 392 - struct page *page = find_lock_page(mapping, start - 1); 393 - if (page) { 394 - unsigned int top = PAGE_SIZE; 395 - if (start > end) { 396 - /* Truncation within a single page */ 397 - top = partial_end; 398 - partial_end = 0; 399 - } 400 - wait_on_page_writeback(page); 401 - zero_user_segment(page, partial_start, top); 402 - cleancache_invalidate_page(mapping, page); 403 - if (page_has_private(page)) 404 - do_invalidatepage(page, partial_start, 405 - top - partial_start); 406 - unlock_page(page); 407 - put_page(page); 343 + same_folio = (lstart >> PAGE_SHIFT) == (lend >> PAGE_SHIFT); 344 + folio = __filemap_get_folio(mapping, lstart >> PAGE_SHIFT, FGP_LOCK, 0); 345 + if (folio) { 346 + same_folio = lend < folio_pos(folio) + folio_size(folio); 347 + if (!truncate_inode_partial_folio(folio, lstart, lend)) { 348 + start = folio->index + folio_nr_pages(folio); 349 + if (same_folio) 350 + end = folio->index; 408 351 } 352 + folio_unlock(folio); 353 + folio_put(folio); 354 + folio = NULL; 409 355 } 410 - if (partial_end) { 411 - struct page *page = find_lock_page(mapping, end); 412 - if (page) { 413 - wait_on_page_writeback(page); 414 - zero_user_segment(page, 0, partial_end); 415 - cleancache_invalidate_page(mapping, page); 416 - if (page_has_private(page)) 417 - do_invalidatepage(page, 0, 418 - partial_end); 419 - unlock_page(page); 420 - put_page(page); 421 - } 356 + 357 + if (!same_folio) 358 + folio = __filemap_get_folio(mapping, lend >> PAGE_SHIFT, 359 + FGP_LOCK, 0); 360 + if (folio) { 361 + if (!truncate_inode_partial_folio(folio, lstart, lend)) 362 + end = folio->index; 363 + folio_unlock(folio); 364 + folio_put(folio); 422 365 } 423 - /* 424 - * If the truncation happened within a single page no pages 425 - * will be released, just zeroed, so we can bail out now. 426 - */ 427 - if (start >= end) 428 - goto out; 429 366 430 367 index = start; 431 - for ( ; ; ) { 368 + while (index < end) { 432 369 cond_resched(); 433 - if (!find_get_entries(mapping, index, end - 1, &pvec, 370 + if (!find_get_entries(mapping, index, end - 1, &fbatch, 434 371 indices)) { 435 372 /* If all gone from start onwards, we're done */ 436 373 if (index == start) ··· 425 392 continue; 426 393 } 427 394 428 - for (i = 0; i < pagevec_count(&pvec); i++) { 429 - struct page *page = pvec.pages[i]; 395 + for (i = 0; i < folio_batch_count(&fbatch); i++) { 396 + struct folio *folio = fbatch.folios[i]; 430 397 431 398 /* We rely upon deletion not changing page->index */ 432 399 index = indices[i]; 433 400 434 - if (xa_is_value(page)) 401 + if (xa_is_value(folio)) 435 402 continue; 436 403 437 - lock_page(page); 438 - WARN_ON(page_to_index(page) != index); 439 - wait_on_page_writeback(page); 440 - truncate_inode_page(mapping, page); 441 - unlock_page(page); 404 + folio_lock(folio); 405 + VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio); 406 + folio_wait_writeback(folio); 407 + truncate_inode_folio(mapping, folio); 408 + folio_unlock(folio); 409 + index = folio_index(folio) + folio_nr_pages(folio) - 1; 442 410 } 443 - truncate_exceptional_pvec_entries(mapping, &pvec, indices); 444 - pagevec_release(&pvec); 411 + truncate_folio_batch_exceptionals(mapping, &fbatch, indices); 412 + folio_batch_release(&fbatch); 445 413 index++; 446 414 } 447 415 ··· 513 479 pgoff_t start, pgoff_t end, unsigned long *nr_pagevec) 514 480 { 515 481 pgoff_t indices[PAGEVEC_SIZE]; 516 - struct pagevec pvec; 482 + struct folio_batch fbatch; 517 483 pgoff_t index = start; 518 484 unsigned long ret; 519 485 unsigned long count = 0; 520 486 int i; 521 487 522 - pagevec_init(&pvec); 523 - while (find_lock_entries(mapping, index, end, &pvec, indices)) { 524 - for (i = 0; i < pagevec_count(&pvec); i++) { 525 - struct page *page = pvec.pages[i]; 488 + folio_batch_init(&fbatch); 489 + while (find_lock_entries(mapping, index, end, &fbatch, indices)) { 490 + for (i = 0; i < folio_batch_count(&fbatch); i++) { 491 + struct page *page = &fbatch.folios[i]->page; 526 492 527 493 /* We rely upon deletion not changing page->index */ 528 494 index = indices[i]; ··· 549 515 } 550 516 count += ret; 551 517 } 552 - pagevec_remove_exceptionals(&pvec); 553 - pagevec_release(&pvec); 518 + folio_batch_remove_exceptionals(&fbatch); 519 + folio_batch_release(&fbatch); 554 520 cond_resched(); 555 521 index++; 556 522 } ··· 602 568 * shrink_page_list() has a temp ref on them, or because they're transiently 603 569 * sitting in the lru_cache_add() pagevecs. 604 570 */ 605 - static int 606 - invalidate_complete_page2(struct address_space *mapping, struct page *page) 571 + static int invalidate_complete_folio2(struct address_space *mapping, 572 + struct folio *folio) 607 573 { 608 - if (page->mapping != mapping) 574 + if (folio->mapping != mapping) 609 575 return 0; 610 576 611 - if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL)) 577 + if (folio_has_private(folio) && 578 + !filemap_release_folio(folio, GFP_KERNEL)) 612 579 return 0; 613 580 614 581 spin_lock(&mapping->host->i_lock); 615 582 xa_lock_irq(&mapping->i_pages); 616 - if (PageDirty(page)) 583 + if (folio_test_dirty(folio)) 617 584 goto failed; 618 585 619 - BUG_ON(page_has_private(page)); 620 - __delete_from_page_cache(page, NULL); 586 + BUG_ON(folio_has_private(folio)); 587 + __filemap_remove_folio(folio, NULL); 621 588 xa_unlock_irq(&mapping->i_pages); 622 589 if (mapping_shrinkable(mapping)) 623 590 inode_add_lru(mapping->host); 624 591 spin_unlock(&mapping->host->i_lock); 625 592 626 - if (mapping->a_ops->freepage) 627 - mapping->a_ops->freepage(page); 628 - 629 - put_page(page); /* pagecache ref */ 593 + filemap_free_folio(mapping, folio); 630 594 return 1; 631 595 failed: 632 596 xa_unlock_irq(&mapping->i_pages); ··· 632 600 return 0; 633 601 } 634 602 635 - static int do_launder_page(struct address_space *mapping, struct page *page) 603 + static int do_launder_folio(struct address_space *mapping, struct folio *folio) 636 604 { 637 - if (!PageDirty(page)) 605 + if (!folio_test_dirty(folio)) 638 606 return 0; 639 - if (page->mapping != mapping || mapping->a_ops->launder_page == NULL) 607 + if (folio->mapping != mapping || mapping->a_ops->launder_page == NULL) 640 608 return 0; 641 - return mapping->a_ops->launder_page(page); 609 + return mapping->a_ops->launder_page(&folio->page); 642 610 } 643 611 644 612 /** ··· 656 624 pgoff_t start, pgoff_t end) 657 625 { 658 626 pgoff_t indices[PAGEVEC_SIZE]; 659 - struct pagevec pvec; 627 + struct folio_batch fbatch; 660 628 pgoff_t index; 661 629 int i; 662 630 int ret = 0; ··· 666 634 if (mapping_empty(mapping)) 667 635 goto out; 668 636 669 - pagevec_init(&pvec); 637 + folio_batch_init(&fbatch); 670 638 index = start; 671 - while (find_get_entries(mapping, index, end, &pvec, indices)) { 672 - for (i = 0; i < pagevec_count(&pvec); i++) { 673 - struct page *page = pvec.pages[i]; 639 + while (find_get_entries(mapping, index, end, &fbatch, indices)) { 640 + for (i = 0; i < folio_batch_count(&fbatch); i++) { 641 + struct folio *folio = fbatch.folios[i]; 674 642 675 - /* We rely upon deletion not changing page->index */ 643 + /* We rely upon deletion not changing folio->index */ 676 644 index = indices[i]; 677 645 678 - if (xa_is_value(page)) { 646 + if (xa_is_value(folio)) { 679 647 if (!invalidate_exceptional_entry2(mapping, 680 - index, page)) 648 + index, folio)) 681 649 ret = -EBUSY; 682 650 continue; 683 651 } 684 652 685 - if (!did_range_unmap && page_mapped(page)) { 653 + if (!did_range_unmap && folio_mapped(folio)) { 686 654 /* 687 - * If page is mapped, before taking its lock, 655 + * If folio is mapped, before taking its lock, 688 656 * zap the rest of the file in one hit. 689 657 */ 690 658 unmap_mapping_pages(mapping, index, ··· 692 660 did_range_unmap = 1; 693 661 } 694 662 695 - lock_page(page); 696 - WARN_ON(page_to_index(page) != index); 697 - if (page->mapping != mapping) { 698 - unlock_page(page); 663 + folio_lock(folio); 664 + VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio); 665 + if (folio->mapping != mapping) { 666 + folio_unlock(folio); 699 667 continue; 700 668 } 701 - wait_on_page_writeback(page); 669 + folio_wait_writeback(folio); 702 670 703 - if (page_mapped(page)) 704 - unmap_mapping_page(page); 705 - BUG_ON(page_mapped(page)); 671 + if (folio_mapped(folio)) 672 + unmap_mapping_folio(folio); 673 + BUG_ON(folio_mapped(folio)); 706 674 707 - ret2 = do_launder_page(mapping, page); 675 + ret2 = do_launder_folio(mapping, folio); 708 676 if (ret2 == 0) { 709 - if (!invalidate_complete_page2(mapping, page)) 677 + if (!invalidate_complete_folio2(mapping, folio)) 710 678 ret2 = -EBUSY; 711 679 } 712 680 if (ret2 < 0) 713 681 ret = ret2; 714 - unlock_page(page); 682 + folio_unlock(folio); 715 683 } 716 - pagevec_remove_exceptionals(&pvec); 717 - pagevec_release(&pvec); 684 + folio_batch_remove_exceptionals(&fbatch); 685 + folio_batch_release(&fbatch); 718 686 cond_resched(); 719 687 index++; 720 688 }