Merge tag 'folio-5.17' of git://git.infradead.org/users/willy/pagecache

+2

fs/f2fs/f2fs.h

··· 28 28 #include <linux/fscrypt.h> 29 29 #include <linux/fsverity.h> 30 30 31 + struct pagevec; 32 + 31 33 #ifdef CONFIG_F2FS_CHECK_FS 32 34 #define f2fs_bug_on(sbi, condition) BUG_ON(condition) 33 35 #else

+13 -11

fs/fs-writeback.c

··· 372 372 { 373 373 struct address_space *mapping = inode->i_mapping; 374 374 XA_STATE(xas, &mapping->i_pages, 0); 375 - struct page *page; 375 + struct folio *folio; 376 376 bool switched = false; 377 377 378 378 spin_lock(&inode->i_lock); ··· 389 389 390 390 /* 391 391 * Count and transfer stats. Note that PAGECACHE_TAG_DIRTY points 392 - * to possibly dirty pages while PAGECACHE_TAG_WRITEBACK points to 393 - * pages actually under writeback. 392 + * to possibly dirty folios while PAGECACHE_TAG_WRITEBACK points to 393 + * folios actually under writeback. 394 394 */ 395 - xas_for_each_marked(&xas, page, ULONG_MAX, PAGECACHE_TAG_DIRTY) { 396 - if (PageDirty(page)) { 397 - dec_wb_stat(old_wb, WB_RECLAIMABLE); 398 - inc_wb_stat(new_wb, WB_RECLAIMABLE); 395 + xas_for_each_marked(&xas, folio, ULONG_MAX, PAGECACHE_TAG_DIRTY) { 396 + if (folio_test_dirty(folio)) { 397 + long nr = folio_nr_pages(folio); 398 + wb_stat_mod(old_wb, WB_RECLAIMABLE, -nr); 399 + wb_stat_mod(new_wb, WB_RECLAIMABLE, nr); 399 400 } 400 401 } 401 402 402 403 xas_set(&xas, 0); 403 - xas_for_each_marked(&xas, page, ULONG_MAX, PAGECACHE_TAG_WRITEBACK) { 404 - WARN_ON_ONCE(!PageWriteback(page)); 405 - dec_wb_stat(old_wb, WB_WRITEBACK); 406 - inc_wb_stat(new_wb, WB_WRITEBACK); 404 + xas_for_each_marked(&xas, folio, ULONG_MAX, PAGECACHE_TAG_WRITEBACK) { 405 + long nr = folio_nr_pages(folio); 406 + WARN_ON_ONCE(!folio_test_writeback(folio)); 407 + wb_stat_mod(old_wb, WB_WRITEBACK, -nr); 408 + wb_stat_mod(new_wb, WB_WRITEBACK, nr); 407 409 } 408 410 409 411 if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) {

+54 -60

fs/remap_range.c

··· 146 146 } 147 147 148 148 /* Read a page's worth of file data into the page cache. */ 149 - static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset) 149 + static struct folio *vfs_dedupe_get_folio(struct inode *inode, loff_t pos) 150 150 { 151 - struct page *page; 151 + struct folio *folio; 152 152 153 - page = read_mapping_page(inode->i_mapping, offset >> PAGE_SHIFT, NULL); 154 - if (IS_ERR(page)) 155 - return page; 156 - if (!PageUptodate(page)) { 157 - put_page(page); 153 + folio = read_mapping_folio(inode->i_mapping, pos >> PAGE_SHIFT, NULL); 154 + if (IS_ERR(folio)) 155 + return folio; 156 + if (!folio_test_uptodate(folio)) { 157 + folio_put(folio); 158 158 return ERR_PTR(-EIO); 159 159 } 160 - return page; 160 + return folio; 161 161 } 162 162 163 163 /* 164 - * Lock two pages, ensuring that we lock in offset order if the pages are from 165 - * the same file. 164 + * Lock two folios, ensuring that we lock in offset order if the folios 165 + * are from the same file. 166 166 */ 167 - static void vfs_lock_two_pages(struct page *page1, struct page *page2) 167 + static void vfs_lock_two_folios(struct folio *folio1, struct folio *folio2) 168 168 { 169 169 /* Always lock in order of increasing index. */ 170 - if (page1->index > page2->index) 171 - swap(page1, page2); 170 + if (folio1->index > folio2->index) 171 + swap(folio1, folio2); 172 172 173 - lock_page(page1); 174 - if (page1 != page2) 175 - lock_page(page2); 173 + folio_lock(folio1); 174 + if (folio1 != folio2) 175 + folio_lock(folio2); 176 176 } 177 177 178 - /* Unlock two pages, being careful not to unlock the same page twice. */ 179 - static void vfs_unlock_two_pages(struct page *page1, struct page *page2) 178 + /* Unlock two folios, being careful not to unlock the same folio twice. */ 179 + static void vfs_unlock_two_folios(struct folio *folio1, struct folio *folio2) 180 180 { 181 - unlock_page(page1); 182 - if (page1 != page2) 183 - unlock_page(page2); 181 + folio_unlock(folio1); 182 + if (folio1 != folio2) 183 + folio_unlock(folio2); 184 184 } 185 185 186 186 /* ··· 188 188 * Caller must have locked both inodes to prevent write races. 189 189 */ 190 190 static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, 191 - struct inode *dest, loff_t destoff, 191 + struct inode *dest, loff_t dstoff, 192 192 loff_t len, bool *is_same) 193 193 { 194 - loff_t src_poff; 195 - loff_t dest_poff; 196 - void *src_addr; 197 - void *dest_addr; 198 - struct page *src_page; 199 - struct page *dest_page; 200 - loff_t cmp_len; 201 - bool same; 202 - int error; 194 + bool same = true; 195 + int error = -EINVAL; 203 196 204 - error = -EINVAL; 205 - same = true; 206 197 while (len) { 207 - src_poff = srcoff & (PAGE_SIZE - 1); 208 - dest_poff = destoff & (PAGE_SIZE - 1); 209 - cmp_len = min(PAGE_SIZE - src_poff, 210 - PAGE_SIZE - dest_poff); 198 + struct folio *src_folio, *dst_folio; 199 + void *src_addr, *dst_addr; 200 + loff_t cmp_len = min(PAGE_SIZE - offset_in_page(srcoff), 201 + PAGE_SIZE - offset_in_page(dstoff)); 202 + 211 203 cmp_len = min(cmp_len, len); 212 204 if (cmp_len <= 0) 213 205 goto out_error; 214 206 215 - src_page = vfs_dedupe_get_page(src, srcoff); 216 - if (IS_ERR(src_page)) { 217 - error = PTR_ERR(src_page); 207 + src_folio = vfs_dedupe_get_folio(src, srcoff); 208 + if (IS_ERR(src_folio)) { 209 + error = PTR_ERR(src_folio); 218 210 goto out_error; 219 211 } 220 - dest_page = vfs_dedupe_get_page(dest, destoff); 221 - if (IS_ERR(dest_page)) { 222 - error = PTR_ERR(dest_page); 223 - put_page(src_page); 212 + dst_folio = vfs_dedupe_get_folio(dest, dstoff); 213 + if (IS_ERR(dst_folio)) { 214 + error = PTR_ERR(dst_folio); 215 + folio_put(src_folio); 224 216 goto out_error; 225 217 } 226 218 227 - vfs_lock_two_pages(src_page, dest_page); 219 + vfs_lock_two_folios(src_folio, dst_folio); 228 220 229 221 /* 230 - * Now that we've locked both pages, make sure they're still 222 + * Now that we've locked both folios, make sure they're still 231 223 * mapped to the file data we're interested in. If not, 232 224 * someone is invalidating pages on us and we lose. 233 225 */ 234 - if (!PageUptodate(src_page) || !PageUptodate(dest_page) || 235 - src_page->mapping != src->i_mapping || 236 - dest_page->mapping != dest->i_mapping) { 226 + if (!folio_test_uptodate(src_folio) || !folio_test_uptodate(dst_folio) || 227 + src_folio->mapping != src->i_mapping || 228 + dst_folio->mapping != dest->i_mapping) { 237 229 same = false; 238 230 goto unlock; 239 231 } 240 232 241 - src_addr = kmap_atomic(src_page); 242 - dest_addr = kmap_atomic(dest_page); 233 + src_addr = kmap_local_folio(src_folio, 234 + offset_in_folio(src_folio, srcoff)); 235 + dst_addr = kmap_local_folio(dst_folio, 236 + offset_in_folio(dst_folio, dstoff)); 243 237 244 - flush_dcache_page(src_page); 245 - flush_dcache_page(dest_page); 238 + flush_dcache_folio(src_folio); 239 + flush_dcache_folio(dst_folio); 246 240 247 - if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len)) 241 + if (memcmp(src_addr, dst_addr, cmp_len)) 248 242 same = false; 249 243 250 - kunmap_atomic(dest_addr); 251 - kunmap_atomic(src_addr); 244 + kunmap_local(dst_addr); 245 + kunmap_local(src_addr); 252 246 unlock: 253 - vfs_unlock_two_pages(src_page, dest_page); 254 - put_page(dest_page); 255 - put_page(src_page); 247 + vfs_unlock_two_folios(src_folio, dst_folio); 248 + folio_put(dst_folio); 249 + folio_put(src_folio); 256 250 257 251 if (!same) 258 252 break; 259 253 260 254 srcoff += cmp_len; 261 - destoff += cmp_len; 255 + dstoff += cmp_len; 262 256 len -= cmp_len; 263 257 } 264 258

+14

include/linux/huge_mm.h

··· 274 274 return 1; 275 275 } 276 276 277 + /** 278 + * folio_test_pmd_mappable - Can we map this folio with a PMD? 279 + * @folio: The folio to test 280 + */ 281 + static inline bool folio_test_pmd_mappable(struct folio *folio) 282 + { 283 + return folio_order(folio) >= HPAGE_PMD_ORDER; 284 + } 285 + 277 286 struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, 278 287 pmd_t *pmd, int flags, struct dev_pagemap **pgmap); 279 288 struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, ··· 346 337 { 347 338 VM_BUG_ON_PGFLAGS(PageTail(page), page); 348 339 return 1; 340 + } 341 + 342 + static inline bool folio_test_pmd_mappable(struct folio *folio) 343 + { 344 + return false; 349 345 } 350 346 351 347 static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)

+21 -47

include/linux/mm.h

··· 714 714 struct mmu_gather; 715 715 struct inode; 716 716 717 + static inline unsigned int compound_order(struct page *page) 718 + { 719 + if (!PageHead(page)) 720 + return 0; 721 + return page[1].compound_order; 722 + } 723 + 724 + /** 725 + * folio_order - The allocation order of a folio. 726 + * @folio: The folio. 727 + * 728 + * A folio is composed of 2^order pages. See get_order() for the definition 729 + * of order. 730 + * 731 + * Return: The order of the folio. 732 + */ 733 + static inline unsigned int folio_order(struct folio *folio) 734 + { 735 + return compound_order(&folio->page); 736 + } 737 + 717 738 #include <linux/huge_mm.h> 718 739 719 740 /* ··· 932 911 { 933 912 VM_BUG_ON_PAGE(page[1].compound_dtor >= NR_COMPOUND_DTORS, page); 934 913 compound_page_dtors[page[1].compound_dtor](page); 935 - } 936 - 937 - static inline unsigned int compound_order(struct page *page) 938 - { 939 - if (!PageHead(page)) 940 - return 0; 941 - return page[1].compound_order; 942 - } 943 - 944 - /** 945 - * folio_order - The allocation order of a folio. 946 - * @folio: The folio. 947 - * 948 - * A folio is composed of 2^order pages. See get_order() for the definition 949 - * of order. 950 - * 951 - * Return: The order of the folio. 952 - */ 953 - static inline unsigned int folio_order(struct folio *folio) 954 - { 955 - return compound_order(&folio->page); 956 914 } 957 915 958 916 static inline bool hpage_pincount_available(struct page *page) ··· 1837 1837 extern int user_shm_lock(size_t, struct ucounts *); 1838 1838 extern void user_shm_unlock(size_t, struct ucounts *); 1839 1839 1840 - /* 1841 - * Parameter block passed down to zap_pte_range in exceptional cases. 1842 - */ 1843 - struct zap_details { 1844 - struct address_space *zap_mapping; /* Check page->mapping if set */ 1845 - struct page *single_page; /* Locked page to be unmapped */ 1846 - }; 1847 - 1848 - /* 1849 - * We set details->zap_mappings when we want to unmap shared but keep private 1850 - * pages. Return true if skip zapping this page, false otherwise. 1851 - */ 1852 - static inline bool 1853 - zap_skip_check_mapping(struct zap_details *details, struct page *page) 1854 - { 1855 - if (!details || !page) 1856 - return false; 1857 - 1858 - return details->zap_mapping && 1859 - (details->zap_mapping != page_rmapping(page)); 1860 - } 1861 - 1862 1840 struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, 1863 1841 pte_t pte); 1864 1842 struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, ··· 1871 1893 extern void truncate_setsize(struct inode *inode, loff_t newsize); 1872 1894 void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to); 1873 1895 void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end); 1874 - int truncate_inode_page(struct address_space *mapping, struct page *page); 1875 1896 int generic_error_remove_page(struct address_space *mapping, struct page *page); 1876 1897 int invalidate_inode_page(struct page *page); 1877 1898 ··· 1881 1904 extern int fixup_user_fault(struct mm_struct *mm, 1882 1905 unsigned long address, unsigned int fault_flags, 1883 1906 bool *unlocked); 1884 - void unmap_mapping_page(struct page *page); 1885 1907 void unmap_mapping_pages(struct address_space *mapping, 1886 1908 pgoff_t start, pgoff_t nr, bool even_cows); 1887 1909 void unmap_mapping_range(struct address_space *mapping, ··· 1901 1925 BUG(); 1902 1926 return -EFAULT; 1903 1927 } 1904 - static inline void unmap_mapping_page(struct page *page) { } 1905 1928 static inline void unmap_mapping_pages(struct address_space *mapping, 1906 1929 pgoff_t start, pgoff_t nr, bool even_cows) { } 1907 1930 static inline void unmap_mapping_range(struct address_space *mapping, ··· 1957 1982 struct page **pages); 1958 1983 struct page *get_dump_page(unsigned long addr); 1959 1984 1960 - extern int try_to_release_page(struct page * page, gfp_t gfp_mask); 1961 1985 extern void do_invalidatepage(struct page *page, unsigned int offset, 1962 1986 unsigned int length); 1963 1987

+10 -3

include/linux/page-flags.h

··· 68 68 * might lose their PG_swapbacked flag when they simply can be dropped (e.g. as 69 69 * a result of MADV_FREE). 70 70 * 71 - * PG_uptodate tells whether the page's contents is valid. When a read 72 - * completes, the page becomes uptodate, unless a disk I/O error happened. 73 - * 74 71 * PG_referenced, PG_reclaim are used for page reclaim for anonymous and 75 72 * file-backed pagecache (see mm/vmscan.c). 76 73 * ··· 612 615 613 616 u64 stable_page_flags(struct page *page); 614 617 618 + /** 619 + * folio_test_uptodate - Is this folio up to date? 620 + * @folio: The folio. 621 + * 622 + * The uptodate flag is set on a folio when every byte in the folio is 623 + * at least as new as the corresponding bytes on storage. Anonymous 624 + * and CoW folios are always uptodate. If the folio is not uptodate, 625 + * some of the bytes in it may be; see the is_partially_uptodate() 626 + * address_space operation. 627 + */ 615 628 static inline bool folio_test_uptodate(struct folio *folio) 616 629 { 617 630 bool ret = test_bit(PG_uptodate, folio_flags(folio, 0));

+24 -35

include/linux/pagemap.h

··· 16 16 #include <linux/hardirq.h> /* for in_interrupt() */ 17 17 #include <linux/hugetlb_inline.h> 18 18 19 - struct pagevec; 19 + struct folio_batch; 20 20 21 21 static inline bool mapping_empty(struct address_space *mapping) 22 22 { ··· 511 511 mapping_gfp_mask(mapping)); 512 512 } 513 513 514 - /* Does this page contain this index? */ 515 - static inline bool thp_contains(struct page *head, pgoff_t index) 516 - { 517 - /* HugeTLBfs indexes the page cache in units of hpage_size */ 518 - if (PageHuge(head)) 519 - return head->index == index; 520 - return page_index(head) == (index & ~(thp_nr_pages(head) - 1UL)); 521 - } 522 - 523 514 #define swapcache_index(folio) __page_file_index(&(folio)->page) 524 515 525 516 /** ··· 591 600 return head + (index & (thp_nr_pages(head) - 1)); 592 601 } 593 602 594 - unsigned find_get_entries(struct address_space *mapping, pgoff_t start, 595 - pgoff_t end, struct pagevec *pvec, pgoff_t *indices); 596 603 unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start, 597 604 pgoff_t end, unsigned int nr_pages, 598 605 struct page **pages); ··· 626 637 return find_or_create_page(mapping, index, mapping_gfp_mask(mapping)); 627 638 } 628 639 629 - extern struct page * read_cache_page(struct address_space *mapping, 630 - pgoff_t index, filler_t *filler, void *data); 640 + struct folio *read_cache_folio(struct address_space *, pgoff_t index, 641 + filler_t *filler, void *data); 642 + struct page *read_cache_page(struct address_space *, pgoff_t index, 643 + filler_t *filler, void *data); 631 644 extern struct page * read_cache_page_gfp(struct address_space *mapping, 632 645 pgoff_t index, gfp_t gfp_mask); 633 646 extern int read_cache_pages(struct address_space *mapping, ··· 639 648 pgoff_t index, void *data) 640 649 { 641 650 return read_cache_page(mapping, index, NULL, data); 651 + } 652 + 653 + static inline struct folio *read_mapping_folio(struct address_space *mapping, 654 + pgoff_t index, void *data) 655 + { 656 + return read_cache_folio(mapping, index, NULL, data); 642 657 } 643 658 644 659 /* ··· 864 867 return folio_wait_locked_killable(page_folio(page)); 865 868 } 866 869 867 - int put_and_wait_on_page_locked(struct page *page, int state); 870 + int folio_put_wait_locked(struct folio *folio, int state); 868 871 void wait_on_page_writeback(struct page *page); 869 872 void folio_wait_writeback(struct folio *folio); 870 873 int folio_wait_writeback_killable(struct folio *folio); ··· 880 883 } 881 884 void folio_account_cleaned(struct folio *folio, struct address_space *mapping, 882 885 struct bdi_writeback *wb); 883 - static inline void account_page_cleaned(struct page *page, 884 - struct address_space *mapping, struct bdi_writeback *wb) 885 - { 886 - return folio_account_cleaned(page_folio(page), mapping, wb); 887 - } 888 886 void __folio_cancel_dirty(struct folio *folio); 889 887 static inline void folio_cancel_dirty(struct folio *folio) 890 888 { ··· 926 934 pgoff_t index, gfp_t gfp); 927 935 int filemap_add_folio(struct address_space *mapping, struct folio *folio, 928 936 pgoff_t index, gfp_t gfp); 929 - extern void delete_from_page_cache(struct page *page); 930 - extern void __delete_from_page_cache(struct page *page, void *shadow); 937 + void filemap_remove_folio(struct folio *folio); 938 + void delete_from_page_cache(struct page *page); 939 + void __filemap_remove_folio(struct folio *folio, void *shadow); 940 + static inline void __delete_from_page_cache(struct page *page, void *shadow) 941 + { 942 + __filemap_remove_folio(page_folio(page), shadow); 943 + } 931 944 void replace_page_cache_page(struct page *old, struct page *new); 932 945 void delete_from_page_cache_batch(struct address_space *mapping, 933 - struct pagevec *pvec); 946 + struct folio_batch *fbatch); 947 + int try_to_release_page(struct page *page, gfp_t gfp); 948 + bool filemap_release_folio(struct folio *folio, gfp_t gfp); 934 949 loff_t mapping_seek_hole_data(struct address_space *, loff_t start, loff_t end, 935 950 int whence); 936 951 ··· 1029 1030 void page_cache_ra_unbounded(struct readahead_control *, 1030 1031 unsigned long nr_to_read, unsigned long lookahead_count); 1031 1032 void page_cache_sync_ra(struct readahead_control *, unsigned long req_count); 1032 - void page_cache_async_ra(struct readahead_control *, struct page *, 1033 + void page_cache_async_ra(struct readahead_control *, struct folio *, 1033 1034 unsigned long req_count); 1034 1035 void readahead_expand(struct readahead_control *ractl, 1035 1036 loff_t new_start, size_t new_len); ··· 1076 1077 struct page *page, pgoff_t index, unsigned long req_count) 1077 1078 { 1078 1079 DEFINE_READAHEAD(ractl, file, ra, mapping, index); 1079 - page_cache_async_ra(&ractl, page, req_count); 1080 + page_cache_async_ra(&ractl, page_folio(page), req_count); 1080 1081 } 1081 1082 1082 1083 static inline struct folio *__readahead_folio(struct readahead_control *ractl) ··· 1153 1154 VM_BUG_ON_PAGE(PageTail(page), page); 1154 1155 array[i++] = page; 1155 1156 rac->_batch_count += thp_nr_pages(page); 1156 - 1157 - /* 1158 - * The page cache isn't using multi-index entries yet, 1159 - * so the xas cursor needs to be manually moved to the 1160 - * next index. This can be removed once the page cache 1161 - * is converted. 1162 - */ 1163 - if (PageHead(page)) 1164 - xas_set(&xas, rac->_index + rac->_batch_count); 1165 - 1166 1157 if (i == array_sz) 1167 1158 break; 1168 1159 }

+66 -1

include/linux/pagevec.h

··· 15 15 #define PAGEVEC_SIZE 15 16 16 17 17 struct page; 18 + struct folio; 18 19 struct address_space; 19 20 21 + /* Layout must match folio_batch */ 20 22 struct pagevec { 21 23 unsigned char nr; 22 24 bool percpu_pvec_drained; ··· 27 25 28 26 void __pagevec_release(struct pagevec *pvec); 29 27 void __pagevec_lru_add(struct pagevec *pvec); 30 - void pagevec_remove_exceptionals(struct pagevec *pvec); 31 28 unsigned pagevec_lookup_range(struct pagevec *pvec, 32 29 struct address_space *mapping, 33 30 pgoff_t *start, pgoff_t end); ··· 82 81 __pagevec_release(pvec); 83 82 } 84 83 84 + /** 85 + * struct folio_batch - A collection of folios. 86 + * 87 + * The folio_batch is used to amortise the cost of retrieving and 88 + * operating on a set of folios. The order of folios in the batch may be 89 + * significant (eg delete_from_page_cache_batch()). Some users of the 90 + * folio_batch store "exceptional" entries in it which can be removed 91 + * by calling folio_batch_remove_exceptionals(). 92 + */ 93 + struct folio_batch { 94 + unsigned char nr; 95 + bool percpu_pvec_drained; 96 + struct folio *folios[PAGEVEC_SIZE]; 97 + }; 98 + 99 + /* Layout must match pagevec */ 100 + static_assert(sizeof(struct pagevec) == sizeof(struct folio_batch)); 101 + static_assert(offsetof(struct pagevec, pages) == 102 + offsetof(struct folio_batch, folios)); 103 + 104 + /** 105 + * folio_batch_init() - Initialise a batch of folios 106 + * @fbatch: The folio batch. 107 + * 108 + * A freshly initialised folio_batch contains zero folios. 109 + */ 110 + static inline void folio_batch_init(struct folio_batch *fbatch) 111 + { 112 + fbatch->nr = 0; 113 + } 114 + 115 + static inline unsigned int folio_batch_count(struct folio_batch *fbatch) 116 + { 117 + return fbatch->nr; 118 + } 119 + 120 + static inline unsigned int fbatch_space(struct folio_batch *fbatch) 121 + { 122 + return PAGEVEC_SIZE - fbatch->nr; 123 + } 124 + 125 + /** 126 + * folio_batch_add() - Add a folio to a batch. 127 + * @fbatch: The folio batch. 128 + * @folio: The folio to add. 129 + * 130 + * The folio is added to the end of the batch. 131 + * The batch must have previously been initialised using folio_batch_init(). 132 + * 133 + * Return: The number of slots still available. 134 + */ 135 + static inline unsigned folio_batch_add(struct folio_batch *fbatch, 136 + struct folio *folio) 137 + { 138 + fbatch->folios[fbatch->nr++] = folio; 139 + return fbatch_space(fbatch); 140 + } 141 + 142 + static inline void folio_batch_release(struct folio_batch *fbatch) 143 + { 144 + pagevec_release((struct pagevec *)fbatch); 145 + } 146 + 147 + void folio_batch_remove_exceptionals(struct folio_batch *fbatch); 85 148 #endif /* _LINUX_PAGEVEC_H */

+7

include/linux/uio.h

··· 7 7 8 8 #include <linux/kernel.h> 9 9 #include <linux/thread_info.h> 10 + #include <linux/mm_types.h> 10 11 #include <uapi/linux/uio.h> 11 12 12 13 struct page; ··· 146 145 size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i); 147 146 size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i); 148 147 size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i); 148 + 149 + static inline size_t copy_folio_to_iter(struct folio *folio, size_t offset, 150 + size_t bytes, struct iov_iter *i) 151 + { 152 + return copy_page_to_iter(&folio->page, offset, bytes, i); 153 + } 149 154 150 155 static __always_inline __must_check 151 156 size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)

+18

include/linux/xarray.h

··· 1581 1581 } 1582 1582 1583 1583 /** 1584 + * xas_advance() - Skip over sibling entries. 1585 + * @xas: XArray operation state. 1586 + * @index: Index of last sibling entry. 1587 + * 1588 + * Move the operation state to refer to the last sibling entry. 1589 + * This is useful for loops that normally want to see sibling 1590 + * entries but sometimes want to skip them. Use xas_set() if you 1591 + * want to move to an index which is not part of this entry. 1592 + */ 1593 + static inline void xas_advance(struct xa_state *xas, unsigned long index) 1594 + { 1595 + unsigned char shift = xas_is_node(xas) ? xas->xa_node->shift : 0; 1596 + 1597 + xas->xa_index = index; 1598 + xas->xa_offset = (index >> shift) & XA_CHUNK_MASK; 1599 + } 1600 + 1601 + /** 1584 1602 * xas_set_order() - Set up XArray operation state for a multislot entry. 1585 1603 * @xas: XArray operation state. 1586 1604 * @index: Target of the operation.

+17 -15

include/trace/events/filemap.h

··· 15 15 16 16 DECLARE_EVENT_CLASS(mm_filemap_op_page_cache, 17 17 18 - TP_PROTO(struct page *page), 18 + TP_PROTO(struct folio *folio), 19 19 20 - TP_ARGS(page), 20 + TP_ARGS(folio), 21 21 22 22 TP_STRUCT__entry( 23 23 __field(unsigned long, pfn) 24 24 __field(unsigned long, i_ino) 25 25 __field(unsigned long, index) 26 26 __field(dev_t, s_dev) 27 + __field(unsigned char, order) 27 28 ), 28 29 29 30 TP_fast_assign( 30 - __entry->pfn = page_to_pfn(page); 31 - __entry->i_ino = page->mapping->host->i_ino; 32 - __entry->index = page->index; 33 - if (page->mapping->host->i_sb) 34 - __entry->s_dev = page->mapping->host->i_sb->s_dev; 31 + __entry->pfn = folio_pfn(folio); 32 + __entry->i_ino = folio->mapping->host->i_ino; 33 + __entry->index = folio->index; 34 + if (folio->mapping->host->i_sb) 35 + __entry->s_dev = folio->mapping->host->i_sb->s_dev; 35 36 else 36 - __entry->s_dev = page->mapping->host->i_rdev; 37 + __entry->s_dev = folio->mapping->host->i_rdev; 38 + __entry->order = folio_order(folio); 37 39 ), 38 40 39 - TP_printk("dev %d:%d ino %lx page=%p pfn=0x%lx ofs=%lu", 41 + TP_printk("dev %d:%d ino %lx pfn=0x%lx ofs=%lu order=%u", 40 42 MAJOR(__entry->s_dev), MINOR(__entry->s_dev), 41 43 __entry->i_ino, 42 - pfn_to_page(__entry->pfn), 43 44 __entry->pfn, 44 - __entry->index << PAGE_SHIFT) 45 + __entry->index << PAGE_SHIFT, 46 + __entry->order) 45 47 ); 46 48 47 49 DEFINE_EVENT(mm_filemap_op_page_cache, mm_filemap_delete_from_page_cache, 48 - TP_PROTO(struct page *page), 49 - TP_ARGS(page) 50 + TP_PROTO(struct folio *folio), 51 + TP_ARGS(folio) 50 52 ); 51 53 52 54 DEFINE_EVENT(mm_filemap_op_page_cache, mm_filemap_add_to_page_cache, 53 - TP_PROTO(struct page *page), 54 - TP_ARGS(page) 55 + TP_PROTO(struct folio *folio), 56 + TP_ARGS(folio) 55 57 ); 56 58 57 59 TRACE_EVENT(filemap_set_wb_err,

+14 -16

lib/iov_iter.c

··· 69 69 #define iterate_xarray(i, n, base, len, __off, STEP) { \ 70 70 __label__ __out; \ 71 71 size_t __off = 0; \ 72 - struct page *head = NULL; \ 72 + struct folio *folio; \ 73 73 loff_t start = i->xarray_start + i->iov_offset; \ 74 - unsigned offset = start % PAGE_SIZE; \ 75 74 pgoff_t index = start / PAGE_SIZE; \ 76 - int j; \ 77 - \ 78 75 XA_STATE(xas, i->xarray, index); \ 79 76 \ 77 + len = PAGE_SIZE - offset_in_page(start); \ 80 78 rcu_read_lock(); \ 81 - xas_for_each(&xas, head, ULONG_MAX) { \ 79 + xas_for_each(&xas, folio, ULONG_MAX) { \ 82 80 unsigned left; \ 83 - if (xas_retry(&xas, head)) \ 81 + size_t offset; \ 82 + if (xas_retry(&xas, folio)) \ 84 83 continue; \ 85 - if (WARN_ON(xa_is_value(head))) \ 84 + if (WARN_ON(xa_is_value(folio))) \ 86 85 break; \ 87 - if (WARN_ON(PageHuge(head))) \ 86 + if (WARN_ON(folio_test_hugetlb(folio))) \ 88 87 break; \ 89 - for (j = (head->index < index) ? index - head->index : 0; \ 90 - j < thp_nr_pages(head); j++) { \ 91 - void *kaddr = kmap_local_page(head + j); \ 92 - base = kaddr + offset; \ 93 - len = PAGE_SIZE - offset; \ 88 + offset = offset_in_folio(folio, start + __off); \ 89 + while (offset < folio_size(folio)) { \ 90 + base = kmap_local_folio(folio, offset); \ 94 91 len = min(n, len); \ 95 92 left = (STEP); \ 96 - kunmap_local(kaddr); \ 93 + kunmap_local(base); \ 97 94 len -= left; \ 98 95 __off += len; \ 99 96 n -= len; \ 100 97 if (left || n == 0) \ 101 98 goto __out; \ 102 - offset = 0; \ 99 + offset += len; \ 100 + len = PAGE_SIZE; \ 103 101 } \ 104 102 } \ 105 103 __out: \ 106 104 rcu_read_unlock(); \ 107 - i->iov_offset += __off; \ 105 + i->iov_offset += __off; \ 108 106 n = __off; \ 109 107 } 110 108

+3 -3

lib/xarray.c

··· 157 157 xas->xa_index += offset << shift; 158 158 } 159 159 160 - static void xas_advance(struct xa_state *xas) 160 + static void xas_next_offset(struct xa_state *xas) 161 161 { 162 162 xas->xa_offset++; 163 163 xas_move_index(xas, xas->xa_offset); ··· 1250 1250 xas->xa_offset = ((xas->xa_index - 1) & XA_CHUNK_MASK) + 1; 1251 1251 } 1252 1252 1253 - xas_advance(xas); 1253 + xas_next_offset(xas); 1254 1254 1255 1255 while (xas->xa_node && (xas->xa_index <= max)) { 1256 1256 if (unlikely(xas->xa_offset == XA_CHUNK_SIZE)) { ··· 1268 1268 if (entry && !xa_is_sibling(entry)) 1269 1269 return entry; 1270 1270 1271 - xas_advance(xas); 1271 + xas_next_offset(xas); 1272 1272 } 1273 1273 1274 1274 if (!xas->xa_node)

+481 -523

mm/filemap.c

··· 121 121 */ 122 122 123 123 static void page_cache_delete(struct address_space *mapping, 124 - struct page *page, void *shadow) 124 + struct folio *folio, void *shadow) 125 125 { 126 - XA_STATE(xas, &mapping->i_pages, page->index); 127 - unsigned int nr = 1; 126 + XA_STATE(xas, &mapping->i_pages, folio->index); 127 + long nr = 1; 128 128 129 129 mapping_set_update(&xas, mapping); 130 130 131 131 /* hugetlb pages are represented by a single entry in the xarray */ 132 - if (!PageHuge(page)) { 133 - xas_set_order(&xas, page->index, compound_order(page)); 134 - nr = compound_nr(page); 132 + if (!folio_test_hugetlb(folio)) { 133 + xas_set_order(&xas, folio->index, folio_order(folio)); 134 + nr = folio_nr_pages(folio); 135 135 } 136 136 137 - VM_BUG_ON_PAGE(!PageLocked(page), page); 138 - VM_BUG_ON_PAGE(PageTail(page), page); 139 - VM_BUG_ON_PAGE(nr != 1 && shadow, page); 137 + VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); 140 138 141 139 xas_store(&xas, shadow); 142 140 xas_init_marks(&xas); 143 141 144 - page->mapping = NULL; 142 + folio->mapping = NULL; 145 143 /* Leave page->index set: truncation lookup relies upon it */ 146 144 mapping->nrpages -= nr; 147 145 } 148 146 149 - static void unaccount_page_cache_page(struct address_space *mapping, 150 - struct page *page) 147 + static void filemap_unaccount_folio(struct address_space *mapping, 148 + struct folio *folio) 151 149 { 152 - int nr; 150 + long nr; 153 151 154 152 /* 155 153 * if we're uptodate, flush out into the cleancache, otherwise 156 154 * invalidate any existing cleancache entries. We can't leave 157 155 * stale data around in the cleancache once our page is gone 158 156 */ 159 - if (PageUptodate(page) && PageMappedToDisk(page)) 160 - cleancache_put_page(page); 157 + if (folio_test_uptodate(folio) && folio_test_mappedtodisk(folio)) 158 + cleancache_put_page(&folio->page); 161 159 else 162 - cleancache_invalidate_page(mapping, page); 160 + cleancache_invalidate_page(mapping, &folio->page); 163 161 164 - VM_BUG_ON_PAGE(PageTail(page), page); 165 - VM_BUG_ON_PAGE(page_mapped(page), page); 166 - if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(page_mapped(page))) { 162 + VM_BUG_ON_FOLIO(folio_mapped(folio), folio); 163 + if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(folio_mapped(folio))) { 167 164 int mapcount; 168 165 169 166 pr_alert("BUG: Bad page cache in process %s pfn:%05lx\n", 170 - current->comm, page_to_pfn(page)); 171 - dump_page(page, "still mapped when deleted"); 167 + current->comm, folio_pfn(folio)); 168 + dump_page(&folio->page, "still mapped when deleted"); 172 169 dump_stack(); 173 170 add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); 174 171 175 - mapcount = page_mapcount(page); 172 + mapcount = page_mapcount(&folio->page); 176 173 if (mapping_exiting(mapping) && 177 - page_count(page) >= mapcount + 2) { 174 + folio_ref_count(folio) >= mapcount + 2) { 178 175 /* 179 176 * All vmas have already been torn down, so it's 180 - * a good bet that actually the page is unmapped, 177 + * a good bet that actually the folio is unmapped, 181 178 * and we'd prefer not to leak it: if we're wrong, 182 179 * some other bad page check should catch it later. 183 180 */ 184 - page_mapcount_reset(page); 185 - page_ref_sub(page, mapcount); 181 + page_mapcount_reset(&folio->page); 182 + folio_ref_sub(folio, mapcount); 186 183 } 187 184 } 188 185 189 - /* hugetlb pages do not participate in page cache accounting. */ 190 - if (PageHuge(page)) 186 + /* hugetlb folios do not participate in page cache accounting. */ 187 + if (folio_test_hugetlb(folio)) 191 188 return; 192 189 193 - nr = thp_nr_pages(page); 190 + nr = folio_nr_pages(folio); 194 191 195 - __mod_lruvec_page_state(page, NR_FILE_PAGES, -nr); 196 - if (PageSwapBacked(page)) { 197 - __mod_lruvec_page_state(page, NR_SHMEM, -nr); 198 - if (PageTransHuge(page)) 199 - __mod_lruvec_page_state(page, NR_SHMEM_THPS, -nr); 200 - } else if (PageTransHuge(page)) { 201 - __mod_lruvec_page_state(page, NR_FILE_THPS, -nr); 192 + __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, -nr); 193 + if (folio_test_swapbacked(folio)) { 194 + __lruvec_stat_mod_folio(folio, NR_SHMEM, -nr); 195 + if (folio_test_pmd_mappable(folio)) 196 + __lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, -nr); 197 + } else if (folio_test_pmd_mappable(folio)) { 198 + __lruvec_stat_mod_folio(folio, NR_FILE_THPS, -nr); 202 199 filemap_nr_thps_dec(mapping); 203 200 } 204 201 205 202 /* 206 - * At this point page must be either written or cleaned by 207 - * truncate. Dirty page here signals a bug and loss of 203 + * At this point folio must be either written or cleaned by 204 + * truncate. Dirty folio here signals a bug and loss of 208 205 * unwritten data. 209 206 * 210 - * This fixes dirty accounting after removing the page entirely 211 - * but leaves PageDirty set: it has no effect for truncated 212 - * page and anyway will be cleared before returning page into 207 + * This fixes dirty accounting after removing the folio entirely 208 + * but leaves the dirty flag set: it has no effect for truncated 209 + * folio and anyway will be cleared before returning folio to 213 210 * buddy allocator. 214 211 */ 215 - if (WARN_ON_ONCE(PageDirty(page))) 216 - account_page_cleaned(page, mapping, inode_to_wb(mapping->host)); 212 + if (WARN_ON_ONCE(folio_test_dirty(folio))) 213 + folio_account_cleaned(folio, mapping, 214 + inode_to_wb(mapping->host)); 217 215 } 218 216 219 217 /* ··· 219 221 * sure the page is locked and that nobody else uses it - or that usage 220 222 * is safe. The caller must hold the i_pages lock. 221 223 */ 222 - void __delete_from_page_cache(struct page *page, void *shadow) 224 + void __filemap_remove_folio(struct folio *folio, void *shadow) 223 225 { 224 - struct address_space *mapping = page->mapping; 226 + struct address_space *mapping = folio->mapping; 225 227 226 - trace_mm_filemap_delete_from_page_cache(page); 227 - 228 - unaccount_page_cache_page(mapping, page); 229 - page_cache_delete(mapping, page, shadow); 228 + trace_mm_filemap_delete_from_page_cache(folio); 229 + filemap_unaccount_folio(mapping, folio); 230 + page_cache_delete(mapping, folio, shadow); 230 231 } 231 232 232 - static void page_cache_free_page(struct address_space *mapping, 233 - struct page *page) 233 + void filemap_free_folio(struct address_space *mapping, struct folio *folio) 234 234 { 235 235 void (*freepage)(struct page *); 236 236 237 237 freepage = mapping->a_ops->freepage; 238 238 if (freepage) 239 - freepage(page); 239 + freepage(&folio->page); 240 240 241 - if (PageTransHuge(page) && !PageHuge(page)) { 242 - page_ref_sub(page, thp_nr_pages(page)); 243 - VM_BUG_ON_PAGE(page_count(page) <= 0, page); 241 + if (folio_test_large(folio) && !folio_test_hugetlb(folio)) { 242 + folio_ref_sub(folio, folio_nr_pages(folio)); 243 + VM_BUG_ON_FOLIO(folio_ref_count(folio) <= 0, folio); 244 244 } else { 245 - put_page(page); 245 + folio_put(folio); 246 246 } 247 247 } 248 248 249 249 /** 250 - * delete_from_page_cache - delete page from page cache 251 - * @page: the page which the kernel is trying to remove from page cache 250 + * filemap_remove_folio - Remove folio from page cache. 251 + * @folio: The folio. 252 252 * 253 - * This must be called only on pages that have been verified to be in the page 254 - * cache and locked. It will never put the page into the free list, the caller 255 - * has a reference on the page. 253 + * This must be called only on folios that are locked and have been 254 + * verified to be in the page cache. It will never put the folio into 255 + * the free list because the caller has a reference on the page. 256 256 */ 257 - void delete_from_page_cache(struct page *page) 257 + void filemap_remove_folio(struct folio *folio) 258 258 { 259 - struct address_space *mapping = page_mapping(page); 259 + struct address_space *mapping = folio->mapping; 260 260 261 - BUG_ON(!PageLocked(page)); 261 + BUG_ON(!folio_test_locked(folio)); 262 262 spin_lock(&mapping->host->i_lock); 263 263 xa_lock_irq(&mapping->i_pages); 264 - __delete_from_page_cache(page, NULL); 264 + __filemap_remove_folio(folio, NULL); 265 265 xa_unlock_irq(&mapping->i_pages); 266 266 if (mapping_shrinkable(mapping)) 267 267 inode_add_lru(mapping->host); 268 268 spin_unlock(&mapping->host->i_lock); 269 269 270 - page_cache_free_page(mapping, page); 270 + filemap_free_folio(mapping, folio); 271 271 } 272 - EXPORT_SYMBOL(delete_from_page_cache); 273 272 274 273 /* 275 - * page_cache_delete_batch - delete several pages from page cache 276 - * @mapping: the mapping to which pages belong 277 - * @pvec: pagevec with pages to delete 274 + * page_cache_delete_batch - delete several folios from page cache 275 + * @mapping: the mapping to which folios belong 276 + * @fbatch: batch of folios to delete 278 277 * 279 - * The function walks over mapping->i_pages and removes pages passed in @pvec 280 - * from the mapping. The function expects @pvec to be sorted by page index 281 - * and is optimised for it to be dense. 282 - * It tolerates holes in @pvec (mapping entries at those indices are not 283 - * modified). The function expects only THP head pages to be present in the 284 - * @pvec. 278 + * The function walks over mapping->i_pages and removes folios passed in 279 + * @fbatch from the mapping. The function expects @fbatch to be sorted 280 + * by page index and is optimised for it to be dense. 281 + * It tolerates holes in @fbatch (mapping entries at those indices are not 282 + * modified). 285 283 * 286 284 * The function expects the i_pages lock to be held. 287 285 */ 288 286 static void page_cache_delete_batch(struct address_space *mapping, 289 - struct pagevec *pvec) 287 + struct folio_batch *fbatch) 290 288 { 291 - XA_STATE(xas, &mapping->i_pages, pvec->pages[0]->index); 292 - int total_pages = 0; 289 + XA_STATE(xas, &mapping->i_pages, fbatch->folios[0]->index); 290 + long total_pages = 0; 293 291 int i = 0; 294 - struct page *page; 292 + struct folio *folio; 295 293 296 294 mapping_set_update(&xas, mapping); 297 - xas_for_each(&xas, page, ULONG_MAX) { 298 - if (i >= pagevec_count(pvec)) 295 + xas_for_each(&xas, folio, ULONG_MAX) { 296 + if (i >= folio_batch_count(fbatch)) 299 297 break; 300 298 301 299 /* A swap/dax/shadow entry got inserted? Skip it. */ 302 - if (xa_is_value(page)) 300 + if (xa_is_value(folio)) 303 301 continue; 304 302 /* 305 303 * A page got inserted in our range? Skip it. We have our ··· 304 310 * means our page has been removed, which shouldn't be 305 311 * possible because we're holding the PageLock. 306 312 */ 307 - if (page != pvec->pages[i]) { 308 - VM_BUG_ON_PAGE(page->index > pvec->pages[i]->index, 309 - page); 313 + if (folio != fbatch->folios[i]) { 314 + VM_BUG_ON_FOLIO(folio->index > 315 + fbatch->folios[i]->index, folio); 310 316 continue; 311 317 } 312 318 313 - WARN_ON_ONCE(!PageLocked(page)); 319 + WARN_ON_ONCE(!folio_test_locked(folio)); 314 320 315 - if (page->index == xas.xa_index) 316 - page->mapping = NULL; 317 - /* Leave page->index set: truncation lookup relies on it */ 321 + folio->mapping = NULL; 322 + /* Leave folio->index set: truncation lookup relies on it */ 318 323 319 - /* 320 - * Move to the next page in the vector if this is a regular 321 - * page or the index is of the last sub-page of this compound 322 - * page. 323 - */ 324 - if (page->index + compound_nr(page) - 1 == xas.xa_index) 325 - i++; 324 + i++; 326 325 xas_store(&xas, NULL); 327 - total_pages++; 326 + total_pages += folio_nr_pages(folio); 328 327 } 329 328 mapping->nrpages -= total_pages; 330 329 } 331 330 332 331 void delete_from_page_cache_batch(struct address_space *mapping, 333 - struct pagevec *pvec) 332 + struct folio_batch *fbatch) 334 333 { 335 334 int i; 336 335 337 - if (!pagevec_count(pvec)) 336 + if (!folio_batch_count(fbatch)) 338 337 return; 339 338 340 339 spin_lock(&mapping->host->i_lock); 341 340 xa_lock_irq(&mapping->i_pages); 342 - for (i = 0; i < pagevec_count(pvec); i++) { 343 - trace_mm_filemap_delete_from_page_cache(pvec->pages[i]); 341 + for (i = 0; i < folio_batch_count(fbatch); i++) { 342 + struct folio *folio = fbatch->folios[i]; 344 343 345 - unaccount_page_cache_page(mapping, pvec->pages[i]); 344 + trace_mm_filemap_delete_from_page_cache(folio); 345 + filemap_unaccount_folio(mapping, folio); 346 346 } 347 - page_cache_delete_batch(mapping, pvec); 347 + page_cache_delete_batch(mapping, fbatch); 348 348 xa_unlock_irq(&mapping->i_pages); 349 349 if (mapping_shrinkable(mapping)) 350 350 inode_add_lru(mapping->host); 351 351 spin_unlock(&mapping->host->i_lock); 352 352 353 - for (i = 0; i < pagevec_count(pvec); i++) 354 - page_cache_free_page(mapping, pvec->pages[i]); 353 + for (i = 0; i < folio_batch_count(fbatch); i++) 354 + filemap_free_folio(mapping, fbatch->folios[i]); 355 355 } 356 356 357 357 int filemap_check_errors(struct address_space *mapping) ··· 921 933 goto error; 922 934 } 923 935 924 - trace_mm_filemap_add_to_page_cache(&folio->page); 936 + trace_mm_filemap_add_to_page_cache(folio); 925 937 return 0; 926 938 error: 927 939 folio->mapping = NULL; ··· 1221 1233 * __folio_lock() waiting on then setting PG_locked. 1222 1234 */ 1223 1235 SHARED, /* Hold ref to page and check the bit when woken, like 1224 - * wait_on_page_writeback() waiting on PG_writeback. 1236 + * folio_wait_writeback() waiting on PG_writeback. 1225 1237 */ 1226 1238 DROP, /* Drop ref to page before wait, no check when woken, 1227 - * like put_and_wait_on_page_locked() on PG_locked. 1239 + * like folio_put_wait_locked() on PG_locked. 1228 1240 */ 1229 1241 }; 1230 1242 ··· 1401 1413 EXPORT_SYMBOL(folio_wait_bit_killable); 1402 1414 1403 1415 /** 1404 - * put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked 1405 - * @page: The page to wait for. 1416 + * folio_put_wait_locked - Drop a reference and wait for it to be unlocked 1417 + * @folio: The folio to wait for. 1406 1418 * @state: The sleep state (TASK_KILLABLE, TASK_UNINTERRUPTIBLE, etc). 1407 1419 * 1408 - * The caller should hold a reference on @page. They expect the page to 1420 + * The caller should hold a reference on @folio. They expect the page to 1409 1421 * become unlocked relatively soon, but do not wish to hold up migration 1410 - * (for example) by holding the reference while waiting for the page to 1422 + * (for example) by holding the reference while waiting for the folio to 1411 1423 * come unlocked. After this function returns, the caller should not 1412 - * dereference @page. 1424 + * dereference @folio. 1413 1425 * 1414 - * Return: 0 if the page was unlocked or -EINTR if interrupted by a signal. 1426 + * Return: 0 if the folio was unlocked or -EINTR if interrupted by a signal. 1415 1427 */ 1416 - int put_and_wait_on_page_locked(struct page *page, int state) 1428 + int folio_put_wait_locked(struct folio *folio, int state) 1417 1429 { 1418 - return folio_wait_bit_common(page_folio(page), PG_locked, state, 1419 - DROP); 1430 + return folio_wait_bit_common(folio, PG_locked, state, DROP); 1420 1431 } 1421 1432 1422 1433 /** ··· 1940 1953 } 1941 1954 EXPORT_SYMBOL(__filemap_get_folio); 1942 1955 1943 - static inline struct page *find_get_entry(struct xa_state *xas, pgoff_t max, 1956 + static inline struct folio *find_get_entry(struct xa_state *xas, pgoff_t max, 1944 1957 xa_mark_t mark) 1945 1958 { 1946 - struct page *page; 1959 + struct folio *folio; 1947 1960 1948 1961 retry: 1949 1962 if (mark == XA_PRESENT) 1950 - page = xas_find(xas, max); 1963 + folio = xas_find(xas, max); 1951 1964 else 1952 - page = xas_find_marked(xas, max, mark); 1965 + folio = xas_find_marked(xas, max, mark); 1953 1966 1954 - if (xas_retry(xas, page)) 1967 + if (xas_retry(xas, folio)) 1955 1968 goto retry; 1956 1969 /* 1957 1970 * A shadow entry of a recently evicted page, a swap 1958 1971 * entry from shmem/tmpfs or a DAX entry. Return it 1959 1972 * without attempting to raise page count. 1960 1973 */ 1961 - if (!page || xa_is_value(page)) 1962 - return page; 1974 + if (!folio || xa_is_value(folio)) 1975 + return folio; 1963 1976 1964 - if (!page_cache_get_speculative(page)) 1977 + if (!folio_try_get_rcu(folio)) 1965 1978 goto reset; 1966 1979 1967 - /* Has the page moved or been split? */ 1968 - if (unlikely(page != xas_reload(xas))) { 1969 - put_page(page); 1980 + if (unlikely(folio != xas_reload(xas))) { 1981 + folio_put(folio); 1970 1982 goto reset; 1971 1983 } 1972 1984 1973 - return page; 1985 + return folio; 1974 1986 reset: 1975 1987 xas_reset(xas); 1976 1988 goto retry; ··· 1980 1994 * @mapping: The address_space to search 1981 1995 * @start: The starting page cache index 1982 1996 * @end: The final page index (inclusive). 1983 - * @pvec: Where the resulting entries are placed. 1997 + * @fbatch: Where the resulting entries are placed. 1984 1998 * @indices: The cache indices corresponding to the entries in @entries 1985 1999 * 1986 2000 * find_get_entries() will search for and return a batch of entries in 1987 - * the mapping. The entries are placed in @pvec. find_get_entries() 1988 - * takes a reference on any actual pages it returns. 2001 + * the mapping. The entries are placed in @fbatch. find_get_entries() 2002 + * takes a reference on any actual folios it returns. 1989 2003 * 1990 - * The search returns a group of mapping-contiguous page cache entries 1991 - * with ascending indexes. There may be holes in the indices due to 1992 - * not-present pages. 2004 + * The entries have ascending indexes. The indices may not be consecutive 2005 + * due to not-present entries or large folios. 1993 2006 * 1994 - * Any shadow entries of evicted pages, or swap entries from 2007 + * Any shadow entries of evicted folios, or swap entries from 1995 2008 * shmem/tmpfs, are included in the returned array. 1996 2009 * 1997 - * If it finds a Transparent Huge Page, head or tail, find_get_entries() 1998 - * stops at that page: the caller is likely to have a better way to handle 1999 - * the compound page as a whole, and then skip its extent, than repeatedly 2000 - * calling find_get_entries() to return all its tails. 2001 - * 2002 - * Return: the number of pages and shadow entries which were found. 2010 + * Return: The number of entries which were found. 2003 2011 */ 2004 2012 unsigned find_get_entries(struct address_space *mapping, pgoff_t start, 2005 - pgoff_t end, struct pagevec *pvec, pgoff_t *indices) 2013 + pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices) 2006 2014 { 2007 2015 XA_STATE(xas, &mapping->i_pages, start); 2008 - struct page *page; 2009 - unsigned int ret = 0; 2010 - unsigned nr_entries = PAGEVEC_SIZE; 2016 + struct folio *folio; 2011 2017 2012 2018 rcu_read_lock(); 2013 - while ((page = find_get_entry(&xas, end, XA_PRESENT))) { 2014 - /* 2015 - * Terminate early on finding a THP, to allow the caller to 2016 - * handle it all at once; but continue if this is hugetlbfs. 2017 - */ 2018 - if (!xa_is_value(page) && PageTransHuge(page) && 2019 - !PageHuge(page)) { 2020 - page = find_subpage(page, xas.xa_index); 2021 - nr_entries = ret + 1; 2022 - } 2023 - 2024 - indices[ret] = xas.xa_index; 2025 - pvec->pages[ret] = page; 2026 - if (++ret == nr_entries) 2019 + while ((folio = find_get_entry(&xas, end, XA_PRESENT)) != NULL) { 2020 + indices[fbatch->nr] = xas.xa_index; 2021 + if (!folio_batch_add(fbatch, folio)) 2027 2022 break; 2028 2023 } 2029 2024 rcu_read_unlock(); 2030 2025 2031 - pvec->nr = ret; 2032 - return ret; 2026 + return folio_batch_count(fbatch); 2033 2027 } 2034 2028 2035 2029 /** ··· 2017 2051 * @mapping: The address_space to search. 2018 2052 * @start: The starting page cache index. 2019 2053 * @end: The final page index (inclusive). 2020 - * @pvec: Where the resulting entries are placed. 2021 - * @indices: The cache indices of the entries in @pvec. 2054 + * @fbatch: Where the resulting entries are placed. 2055 + * @indices: The cache indices of the entries in @fbatch. 2022 2056 * 2023 2057 * find_lock_entries() will return a batch of entries from @mapping. 2024 - * Swap, shadow and DAX entries are included. Pages are returned 2025 - * locked and with an incremented refcount. Pages which are locked by 2026 - * somebody else or under writeback are skipped. Only the head page of 2027 - * a THP is returned. Pages which are partially outside the range are 2028 - * not returned. 2058 + * Swap, shadow and DAX entries are included. Folios are returned 2059 + * locked and with an incremented refcount. Folios which are locked 2060 + * by somebody else or under writeback are skipped. Folios which are 2061 + * partially outside the range are not returned. 2029 2062 * 2030 2063 * The entries have ascending indexes. The indices may not be consecutive 2031 - * due to not-present entries, THP pages, pages which could not be locked 2032 - * or pages under writeback. 2064 + * due to not-present entries, large folios, folios which could not be 2065 + * locked or folios under writeback. 2033 2066 * 2034 2067 * Return: The number of entries which were found. 2035 2068 */ 2036 2069 unsigned find_lock_entries(struct address_space *mapping, pgoff_t start, 2037 - pgoff_t end, struct pagevec *pvec, pgoff_t *indices) 2070 + pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices) 2038 2071 { 2039 2072 XA_STATE(xas, &mapping->i_pages, start); 2040 - struct page *page; 2073 + struct folio *folio; 2041 2074 2042 2075 rcu_read_lock(); 2043 - while ((page = find_get_entry(&xas, end, XA_PRESENT))) { 2044 - if (!xa_is_value(page)) { 2045 - if (page->index < start) 2076 + while ((folio = find_get_entry(&xas, end, XA_PRESENT))) { 2077 + if (!xa_is_value(folio)) { 2078 + if (folio->index < start) 2046 2079 goto put; 2047 - if (page->index + thp_nr_pages(page) - 1 > end) 2080 + if (folio->index + folio_nr_pages(folio) - 1 > end) 2048 2081 goto put; 2049 - if (!trylock_page(page)) 2082 + if (!folio_trylock(folio)) 2050 2083 goto put; 2051 - if (page->mapping != mapping || PageWriteback(page)) 2084 + if (folio->mapping != mapping || 2085 + folio_test_writeback(folio)) 2052 2086 goto unlock; 2053 - VM_BUG_ON_PAGE(!thp_contains(page, xas.xa_index), 2054 - page); 2087 + VM_BUG_ON_FOLIO(!folio_contains(folio, xas.xa_index), 2088 + folio); 2055 2089 } 2056 - indices[pvec->nr] = xas.xa_index; 2057 - if (!pagevec_add(pvec, page)) 2090 + indices[fbatch->nr] = xas.xa_index; 2091 + if (!folio_batch_add(fbatch, folio)) 2058 2092 break; 2059 - goto next; 2093 + continue; 2060 2094 unlock: 2061 - unlock_page(page); 2095 + folio_unlock(folio); 2062 2096 put: 2063 - put_page(page); 2064 - next: 2065 - if (!xa_is_value(page) && PageTransHuge(page)) { 2066 - unsigned int nr_pages = thp_nr_pages(page); 2067 - 2068 - /* Final THP may cross MAX_LFS_FILESIZE on 32-bit */ 2069 - xas_set(&xas, page->index + nr_pages); 2070 - if (xas.xa_index < nr_pages) 2071 - break; 2072 - } 2097 + folio_put(folio); 2073 2098 } 2074 2099 rcu_read_unlock(); 2075 2100 2076 - return pagevec_count(pvec); 2101 + return folio_batch_count(fbatch); 2102 + } 2103 + 2104 + static inline 2105 + bool folio_more_pages(struct folio *folio, pgoff_t index, pgoff_t max) 2106 + { 2107 + if (!folio_test_large(folio) || folio_test_hugetlb(folio)) 2108 + return false; 2109 + if (index >= max) 2110 + return false; 2111 + return index < folio->index + folio_nr_pages(folio) - 1; 2077 2112 } 2078 2113 2079 2114 /** ··· 2103 2136 struct page **pages) 2104 2137 { 2105 2138 XA_STATE(xas, &mapping->i_pages, *start); 2106 - struct page *page; 2139 + struct folio *folio; 2107 2140 unsigned ret = 0; 2108 2141 2109 2142 if (unlikely(!nr_pages)) 2110 2143 return 0; 2111 2144 2112 2145 rcu_read_lock(); 2113 - while ((page = find_get_entry(&xas, end, XA_PRESENT))) { 2146 + while ((folio = find_get_entry(&xas, end, XA_PRESENT))) { 2114 2147 /* Skip over shadow, swap and DAX entries */ 2115 - if (xa_is_value(page)) 2148 + if (xa_is_value(folio)) 2116 2149 continue; 2117 2150 2118 - pages[ret] = find_subpage(page, xas.xa_index); 2151 + again: 2152 + pages[ret] = folio_file_page(folio, xas.xa_index); 2119 2153 if (++ret == nr_pages) { 2120 2154 *start = xas.xa_index + 1; 2121 2155 goto out; 2156 + } 2157 + if (folio_more_pages(folio, xas.xa_index, end)) { 2158 + xas.xa_index++; 2159 + folio_ref_inc(folio); 2160 + goto again; 2122 2161 } 2123 2162 } 2124 2163 ··· 2160 2187 unsigned int nr_pages, struct page **pages) 2161 2188 { 2162 2189 XA_STATE(xas, &mapping->i_pages, index); 2163 - struct page *page; 2190 + struct folio *folio; 2164 2191 unsigned int ret = 0; 2165 2192 2166 2193 if (unlikely(!nr_pages)) 2167 2194 return 0; 2168 2195 2169 2196 rcu_read_lock(); 2170 - for (page = xas_load(&xas); page; page = xas_next(&xas)) { 2171 - if (xas_retry(&xas, page)) 2197 + for (folio = xas_load(&xas); folio; folio = xas_next(&xas)) { 2198 + if (xas_retry(&xas, folio)) 2172 2199 continue; 2173 2200 /* 2174 2201 * If the entry has been swapped out, we can stop looking. 2175 2202 * No current caller is looking for DAX entries. 2176 2203 */ 2177 - if (xa_is_value(page)) 2204 + if (xa_is_value(folio)) 2178 2205 break; 2179 2206 2180 - if (!page_cache_get_speculative(page)) 2207 + if (!folio_try_get_rcu(folio)) 2181 2208 goto retry; 2182 2209 2183 - /* Has the page moved or been split? */ 2184 - if (unlikely(page != xas_reload(&xas))) 2210 + if (unlikely(folio != xas_reload(&xas))) 2185 2211 goto put_page; 2186 2212 2187 - pages[ret] = find_subpage(page, xas.xa_index); 2213 + again: 2214 + pages[ret] = folio_file_page(folio, xas.xa_index); 2188 2215 if (++ret == nr_pages) 2189 2216 break; 2217 + if (folio_more_pages(folio, xas.xa_index, ULONG_MAX)) { 2218 + xas.xa_index++; 2219 + folio_ref_inc(folio); 2220 + goto again; 2221 + } 2190 2222 continue; 2191 2223 put_page: 2192 - put_page(page); 2224 + folio_put(folio); 2193 2225 retry: 2194 2226 xas_reset(&xas); 2195 2227 } ··· 2223 2245 struct page **pages) 2224 2246 { 2225 2247 XA_STATE(xas, &mapping->i_pages, *index); 2226 - struct page *page; 2248 + struct folio *folio; 2227 2249 unsigned ret = 0; 2228 2250 2229 2251 if (unlikely(!nr_pages)) 2230 2252 return 0; 2231 2253 2232 2254 rcu_read_lock(); 2233 - while ((page = find_get_entry(&xas, end, tag))) { 2255 + while ((folio = find_get_entry(&xas, end, tag))) { 2234 2256 /* 2235 2257 * Shadow entries should never be tagged, but this iteration 2236 2258 * is lockless so there is a window for page reclaim to evict 2237 2259 * a page we saw tagged. Skip over it. 2238 2260 */ 2239 - if (xa_is_value(page)) 2261 + if (xa_is_value(folio)) 2240 2262 continue; 2241 2263 2242 - pages[ret] = page; 2264 + pages[ret] = &folio->page; 2243 2265 if (++ret == nr_pages) { 2244 - *index = page->index + thp_nr_pages(page); 2266 + *index = folio->index + folio_nr_pages(folio); 2245 2267 goto out; 2246 2268 } 2247 2269 } ··· 2284 2306 } 2285 2307 2286 2308 /* 2287 - * filemap_get_read_batch - Get a batch of pages for read 2309 + * filemap_get_read_batch - Get a batch of folios for read 2288 2310 * 2289 - * Get a batch of pages which represent a contiguous range of bytes 2290 - * in the file. No tail pages will be returned. If @index is in the 2291 - * middle of a THP, the entire THP will be returned. The last page in 2292 - * the batch may have Readahead set or be not Uptodate so that the 2293 - * caller can take the appropriate action. 2311 + * Get a batch of folios which represent a contiguous range of bytes in 2312 + * the file. No exceptional entries will be returned. If @index is in 2313 + * the middle of a folio, the entire folio will be returned. The last 2314 + * folio in the batch may have the readahead flag set or the uptodate flag 2315 + * clear so that the caller can take the appropriate action. 2294 2316 */ 2295 2317 static void filemap_get_read_batch(struct address_space *mapping, 2296 - pgoff_t index, pgoff_t max, struct pagevec *pvec) 2318 + pgoff_t index, pgoff_t max, struct folio_batch *fbatch) 2297 2319 { 2298 2320 XA_STATE(xas, &mapping->i_pages, index); 2299 - struct page *head; 2321 + struct folio *folio; 2300 2322 2301 2323 rcu_read_lock(); 2302 - for (head = xas_load(&xas); head; head = xas_next(&xas)) { 2303 - if (xas_retry(&xas, head)) 2324 + for (folio = xas_load(&xas); folio; folio = xas_next(&xas)) { 2325 + if (xas_retry(&xas, folio)) 2304 2326 continue; 2305 - if (xas.xa_index > max || xa_is_value(head)) 2327 + if (xas.xa_index > max || xa_is_value(folio)) 2306 2328 break; 2307 - if (!page_cache_get_speculative(head)) 2329 + if (!folio_try_get_rcu(folio)) 2308 2330 goto retry; 2309 2331 2310 - /* Has the page moved or been split? */ 2311 - if (unlikely(head != xas_reload(&xas))) 2312 - goto put_page; 2332 + if (unlikely(folio != xas_reload(&xas))) 2333 + goto put_folio; 2313 2334 2314 - if (!pagevec_add(pvec, head)) 2335 + if (!folio_batch_add(fbatch, folio)) 2315 2336 break; 2316 - if (!PageUptodate(head)) 2337 + if (!folio_test_uptodate(folio)) 2317 2338 break; 2318 - if (PageReadahead(head)) 2339 + if (folio_test_readahead(folio)) 2319 2340 break; 2320 - xas.xa_index = head->index + thp_nr_pages(head) - 1; 2321 - xas.xa_offset = (xas.xa_index >> xas.xa_shift) & XA_CHUNK_MASK; 2341 + xas_advance(&xas, folio->index + folio_nr_pages(folio) - 1); 2322 2342 continue; 2323 - put_page: 2324 - put_page(head); 2343 + put_folio: 2344 + folio_put(folio); 2325 2345 retry: 2326 2346 xas_reset(&xas); 2327 2347 } 2328 2348 rcu_read_unlock(); 2329 2349 } 2330 2350 2331 - static int filemap_read_page(struct file *file, struct address_space *mapping, 2332 - struct page *page) 2351 + static int filemap_read_folio(struct file *file, struct address_space *mapping, 2352 + struct folio *folio) 2333 2353 { 2334 2354 int error; 2335 2355 ··· 2336 2360 * eg. multipath errors. PG_error will be set again if readpage 2337 2361 * fails. 2338 2362 */ 2339 - ClearPageError(page); 2363 + folio_clear_error(folio); 2340 2364 /* Start the actual read. The read will unlock the page. */ 2341 - error = mapping->a_ops->readpage(file, page); 2365 + error = mapping->a_ops->readpage(file, &folio->page); 2342 2366 if (error) 2343 2367 return error; 2344 2368 2345 - error = wait_on_page_locked_killable(page); 2369 + error = folio_wait_locked_killable(folio); 2346 2370 if (error) 2347 2371 return error; 2348 - if (PageUptodate(page)) 2372 + if (folio_test_uptodate(folio)) 2349 2373 return 0; 2350 2374 shrink_readahead_size_eio(&file->f_ra); 2351 2375 return -EIO; 2352 2376 } 2353 2377 2354 2378 static bool filemap_range_uptodate(struct address_space *mapping, 2355 - loff_t pos, struct iov_iter *iter, struct page *page) 2379 + loff_t pos, struct iov_iter *iter, struct folio *folio) 2356 2380 { 2357 2381 int count; 2358 2382 2359 - if (PageUptodate(page)) 2383 + if (folio_test_uptodate(folio)) 2360 2384 return true; 2361 2385 /* pipes can't handle partially uptodate pages */ 2362 2386 if (iov_iter_is_pipe(iter)) 2363 2387 return false; 2364 2388 if (!mapping->a_ops->is_partially_uptodate) 2365 2389 return false; 2366 - if (mapping->host->i_blkbits >= (PAGE_SHIFT + thp_order(page))) 2390 + if (mapping->host->i_blkbits >= folio_shift(folio)) 2367 2391 return false; 2368 2392 2369 2393 count = iter->count; 2370 - if (page_offset(page) > pos) { 2371 - count -= page_offset(page) - pos; 2394 + if (folio_pos(folio) > pos) { 2395 + count -= folio_pos(folio) - pos; 2372 2396 pos = 0; 2373 2397 } else { 2374 - pos -= page_offset(page); 2398 + pos -= folio_pos(folio); 2375 2399 } 2376 2400 2377 - return mapping->a_ops->is_partially_uptodate(page, pos, count); 2401 + return mapping->a_ops->is_partially_uptodate(&folio->page, pos, count); 2378 2402 } 2379 2403 2380 2404 static int filemap_update_page(struct kiocb *iocb, 2381 2405 struct address_space *mapping, struct iov_iter *iter, 2382 - struct page *page) 2406 + struct folio *folio) 2383 2407 { 2384 - struct folio *folio = page_folio(page); 2385 2408 int error; 2386 2409 2387 2410 if (iocb->ki_flags & IOCB_NOWAIT) { ··· 2396 2421 goto unlock_mapping; 2397 2422 if (!(iocb->ki_flags & IOCB_WAITQ)) { 2398 2423 filemap_invalidate_unlock_shared(mapping); 2399 - put_and_wait_on_page_locked(&folio->page, TASK_KILLABLE); 2424 + /* 2425 + * This is where we usually end up waiting for a 2426 + * previously submitted readahead to finish. 2427 + */ 2428 + folio_put_wait_locked(folio, TASK_KILLABLE); 2400 2429 return AOP_TRUNCATED_PAGE; 2401 2430 } 2402 2431 error = __folio_lock_async(folio, iocb->ki_waitq); ··· 2413 2434 goto unlock; 2414 2435 2415 2436 error = 0; 2416 - if (filemap_range_uptodate(mapping, iocb->ki_pos, iter, &folio->page)) 2437 + if (filemap_range_uptodate(mapping, iocb->ki_pos, iter, folio)) 2417 2438 goto unlock; 2418 2439 2419 2440 error = -EAGAIN; 2420 2441 if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT | IOCB_WAITQ)) 2421 2442 goto unlock; 2422 2443 2423 - error = filemap_read_page(iocb->ki_filp, mapping, &folio->page); 2444 + error = filemap_read_folio(iocb->ki_filp, mapping, folio); 2424 2445 goto unlock_mapping; 2425 2446 unlock: 2426 2447 folio_unlock(folio); ··· 2431 2452 return error; 2432 2453 } 2433 2454 2434 - static int filemap_create_page(struct file *file, 2455 + static int filemap_create_folio(struct file *file, 2435 2456 struct address_space *mapping, pgoff_t index, 2436 - struct pagevec *pvec) 2457 + struct folio_batch *fbatch) 2437 2458 { 2438 - struct page *page; 2459 + struct folio *folio; 2439 2460 int error; 2440 2461 2441 - page = page_cache_alloc(mapping); 2442 - if (!page) 2462 + folio = filemap_alloc_folio(mapping_gfp_mask(mapping), 0); 2463 + if (!folio) 2443 2464 return -ENOMEM; 2444 2465 2445 2466 /* 2446 - * Protect against truncate / hole punch. Grabbing invalidate_lock here 2447 - * assures we cannot instantiate and bring uptodate new pagecache pages 2448 - * after evicting page cache during truncate and before actually 2449 - * freeing blocks. Note that we could release invalidate_lock after 2450 - * inserting the page into page cache as the locked page would then be 2451 - * enough to synchronize with hole punching. But there are code paths 2452 - * such as filemap_update_page() filling in partially uptodate pages or 2453 - * ->readpages() that need to hold invalidate_lock while mapping blocks 2454 - * for IO so let's hold the lock here as well to keep locking rules 2455 - * simple. 2467 + * Protect against truncate / hole punch. Grabbing invalidate_lock 2468 + * here assures we cannot instantiate and bring uptodate new 2469 + * pagecache folios after evicting page cache during truncate 2470 + * and before actually freeing blocks. Note that we could 2471 + * release invalidate_lock after inserting the folio into 2472 + * the page cache as the locked folio would then be enough to 2473 + * synchronize with hole punching. But there are code paths 2474 + * such as filemap_update_page() filling in partially uptodate 2475 + * pages or ->readpages() that need to hold invalidate_lock 2476 + * while mapping blocks for IO so let's hold the lock here as 2477 + * well to keep locking rules simple. 2456 2478 */ 2457 2479 filemap_invalidate_lock_shared(mapping); 2458 - error = add_to_page_cache_lru(page, mapping, index, 2480 + error = filemap_add_folio(mapping, folio, index, 2459 2481 mapping_gfp_constraint(mapping, GFP_KERNEL)); 2460 2482 if (error == -EEXIST) 2461 2483 error = AOP_TRUNCATED_PAGE; 2462 2484 if (error) 2463 2485 goto error; 2464 2486 2465 - error = filemap_read_page(file, mapping, page); 2487 + error = filemap_read_folio(file, mapping, folio); 2466 2488 if (error) 2467 2489 goto error; 2468 2490 2469 2491 filemap_invalidate_unlock_shared(mapping); 2470 - pagevec_add(pvec, page); 2492 + folio_batch_add(fbatch, folio); 2471 2493 return 0; 2472 2494 error: 2473 2495 filemap_invalidate_unlock_shared(mapping); 2474 - put_page(page); 2496 + folio_put(folio); 2475 2497 return error; 2476 2498 } 2477 2499 2478 2500 static int filemap_readahead(struct kiocb *iocb, struct file *file, 2479 - struct address_space *mapping, struct page *page, 2501 + struct address_space *mapping, struct folio *folio, 2480 2502 pgoff_t last_index) 2481 2503 { 2504 + DEFINE_READAHEAD(ractl, file, &file->f_ra, mapping, folio->index); 2505 + 2482 2506 if (iocb->ki_flags & IOCB_NOIO) 2483 2507 return -EAGAIN; 2484 - page_cache_async_readahead(mapping, &file->f_ra, file, page, 2485 - page->index, last_index - page->index); 2508 + page_cache_async_ra(&ractl, folio, last_index - folio->index); 2486 2509 return 0; 2487 2510 } 2488 2511 2489 2512 static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter, 2490 - struct pagevec *pvec) 2513 + struct folio_batch *fbatch) 2491 2514 { 2492 2515 struct file *filp = iocb->ki_filp; 2493 2516 struct address_space *mapping = filp->f_mapping; 2494 2517 struct file_ra_state *ra = &filp->f_ra; 2495 2518 pgoff_t index = iocb->ki_pos >> PAGE_SHIFT; 2496 2519 pgoff_t last_index; 2497 - struct page *page; 2520 + struct folio *folio; 2498 2521 int err = 0; 2499 2522 2500 2523 last_index = DIV_ROUND_UP(iocb->ki_pos + iter->count, PAGE_SIZE); ··· 2504 2523 if (fatal_signal_pending(current)) 2505 2524 return -EINTR; 2506 2525 2507 - filemap_get_read_batch(mapping, index, last_index, pvec); 2508 - if (!pagevec_count(pvec)) { 2526 + filemap_get_read_batch(mapping, index, last_index, fbatch); 2527 + if (!folio_batch_count(fbatch)) { 2509 2528 if (iocb->ki_flags & IOCB_NOIO) 2510 2529 return -EAGAIN; 2511 2530 page_cache_sync_readahead(mapping, ra, filp, index, 2512 2531 last_index - index); 2513 - filemap_get_read_batch(mapping, index, last_index, pvec); 2532 + filemap_get_read_batch(mapping, index, last_index, fbatch); 2514 2533 } 2515 - if (!pagevec_count(pvec)) { 2534 + if (!folio_batch_count(fbatch)) { 2516 2535 if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_WAITQ)) 2517 2536 return -EAGAIN; 2518 - err = filemap_create_page(filp, mapping, 2519 - iocb->ki_pos >> PAGE_SHIFT, pvec); 2537 + err = filemap_create_folio(filp, mapping, 2538 + iocb->ki_pos >> PAGE_SHIFT, fbatch); 2520 2539 if (err == AOP_TRUNCATED_PAGE) 2521 2540 goto retry; 2522 2541 return err; 2523 2542 } 2524 2543 2525 - page = pvec->pages[pagevec_count(pvec) - 1]; 2526 - if (PageReadahead(page)) { 2527 - err = filemap_readahead(iocb, filp, mapping, page, last_index); 2544 + folio = fbatch->folios[folio_batch_count(fbatch) - 1]; 2545 + if (folio_test_readahead(folio)) { 2546 + err = filemap_readahead(iocb, filp, mapping, folio, last_index); 2528 2547 if (err) 2529 2548 goto err; 2530 2549 } 2531 - if (!PageUptodate(page)) { 2532 - if ((iocb->ki_flags & IOCB_WAITQ) && pagevec_count(pvec) > 1) 2550 + if (!folio_test_uptodate(folio)) { 2551 + if ((iocb->ki_flags & IOCB_WAITQ) && 2552 + folio_batch_count(fbatch) > 1) 2533 2553 iocb->ki_flags |= IOCB_NOWAIT; 2534 - err = filemap_update_page(iocb, mapping, iter, page); 2554 + err = filemap_update_page(iocb, mapping, iter, folio); 2535 2555 if (err) 2536 2556 goto err; 2537 2557 } ··· 2540 2558 return 0; 2541 2559 err: 2542 2560 if (err < 0) 2543 - put_page(page); 2544 - if (likely(--pvec->nr)) 2561 + folio_put(folio); 2562 + if (likely(--fbatch->nr)) 2545 2563 return 0; 2546 2564 if (err == AOP_TRUNCATED_PAGE) 2547 2565 goto retry; ··· 2568 2586 struct file_ra_state *ra = &filp->f_ra; 2569 2587 struct address_space *mapping = filp->f_mapping; 2570 2588 struct inode *inode = mapping->host; 2571 - struct pagevec pvec; 2589 + struct folio_batch fbatch; 2572 2590 int i, error = 0; 2573 2591 bool writably_mapped; 2574 2592 loff_t isize, end_offset; ··· 2579 2597 return 0; 2580 2598 2581 2599 iov_iter_truncate(iter, inode->i_sb->s_maxbytes); 2582 - pagevec_init(&pvec); 2600 + folio_batch_init(&fbatch); 2583 2601 2584 2602 do { 2585 2603 cond_resched(); ··· 2595 2613 if (unlikely(iocb->ki_pos >= i_size_read(inode))) 2596 2614 break; 2597 2615 2598 - error = filemap_get_pages(iocb, iter, &pvec); 2616 + error = filemap_get_pages(iocb, iter, &fbatch); 2599 2617 if (error < 0) 2600 2618 break; 2601 2619 ··· 2609 2627 */ 2610 2628 isize = i_size_read(inode); 2611 2629 if (unlikely(iocb->ki_pos >= isize)) 2612 - goto put_pages; 2630 + goto put_folios; 2613 2631 end_offset = min_t(loff_t, isize, iocb->ki_pos + iter->count); 2614 2632 2615 2633 /* ··· 2624 2642 */ 2625 2643 if (iocb->ki_pos >> PAGE_SHIFT != 2626 2644 ra->prev_pos >> PAGE_SHIFT) 2627 - mark_page_accessed(pvec.pages[0]); 2645 + folio_mark_accessed(fbatch.folios[0]); 2628 2646 2629 - for (i = 0; i < pagevec_count(&pvec); i++) { 2630 - struct page *page = pvec.pages[i]; 2631 - size_t page_size = thp_size(page); 2632 - size_t offset = iocb->ki_pos & (page_size - 1); 2647 + for (i = 0; i < folio_batch_count(&fbatch); i++) { 2648 + struct folio *folio = fbatch.folios[i]; 2649 + size_t fsize = folio_size(folio); 2650 + size_t offset = iocb->ki_pos & (fsize - 1); 2633 2651 size_t bytes = min_t(loff_t, end_offset - iocb->ki_pos, 2634 - page_size - offset); 2652 + fsize - offset); 2635 2653 size_t copied; 2636 2654 2637 - if (end_offset < page_offset(page)) 2655 + if (end_offset < folio_pos(folio)) 2638 2656 break; 2639 2657 if (i > 0) 2640 - mark_page_accessed(page); 2658 + folio_mark_accessed(folio); 2641 2659 /* 2642 - * If users can be writing to this page using arbitrary 2643 - * virtual addresses, take care about potential aliasing 2644 - * before reading the page on the kernel side. 2660 + * If users can be writing to this folio using arbitrary 2661 + * virtual addresses, take care of potential aliasing 2662 + * before reading the folio on the kernel side. 2645 2663 */ 2646 - if (writably_mapped) { 2647 - int j; 2664 + if (writably_mapped) 2665 + flush_dcache_folio(folio); 2648 2666 2649 - for (j = 0; j < thp_nr_pages(page); j++) 2650 - flush_dcache_page(page + j); 2651 - } 2652 - 2653 - copied = copy_page_to_iter(page, offset, bytes, iter); 2667 + copied = copy_folio_to_iter(folio, offset, bytes, iter); 2654 2668 2655 2669 already_read += copied; 2656 2670 iocb->ki_pos += copied; ··· 2657 2679 break; 2658 2680 } 2659 2681 } 2660 - put_pages: 2661 - for (i = 0; i < pagevec_count(&pvec); i++) 2662 - put_page(pvec.pages[i]); 2663 - pagevec_reinit(&pvec); 2682 + put_folios: 2683 + for (i = 0; i < folio_batch_count(&fbatch); i++) 2684 + folio_put(fbatch.folios[i]); 2685 + folio_batch_init(&fbatch); 2664 2686 } while (iov_iter_count(iter) && iocb->ki_pos < isize && !error); 2665 2687 2666 2688 file_accessed(filp); ··· 2745 2767 } 2746 2768 EXPORT_SYMBOL(generic_file_read_iter); 2747 2769 2748 - static inline loff_t page_seek_hole_data(struct xa_state *xas, 2749 - struct address_space *mapping, struct page *page, 2770 + static inline loff_t folio_seek_hole_data(struct xa_state *xas, 2771 + struct address_space *mapping, struct folio *folio, 2750 2772 loff_t start, loff_t end, bool seek_data) 2751 2773 { 2752 2774 const struct address_space_operations *ops = mapping->a_ops; 2753 2775 size_t offset, bsz = i_blocksize(mapping->host); 2754 2776 2755 - if (xa_is_value(page) || PageUptodate(page)) 2777 + if (xa_is_value(folio) || folio_test_uptodate(folio)) 2756 2778 return seek_data ? start : end; 2757 2779 if (!ops->is_partially_uptodate) 2758 2780 return seek_data ? end : start; 2759 2781 2760 2782 xas_pause(xas); 2761 2783 rcu_read_unlock(); 2762 - lock_page(page); 2763 - if (unlikely(page->mapping != mapping)) 2784 + folio_lock(folio); 2785 + if (unlikely(folio->mapping != mapping)) 2764 2786 goto unlock; 2765 2787 2766 - offset = offset_in_thp(page, start) & ~(bsz - 1); 2788 + offset = offset_in_folio(folio, start) & ~(bsz - 1); 2767 2789 2768 2790 do { 2769 - if (ops->is_partially_uptodate(page, offset, bsz) == seek_data) 2791 + if (ops->is_partially_uptodate(&folio->page, offset, bsz) == 2792 + seek_data) 2770 2793 break; 2771 2794 start = (start + bsz) & ~(bsz - 1); 2772 2795 offset += bsz; 2773 - } while (offset < thp_size(page)); 2796 + } while (offset < folio_size(folio)); 2774 2797 unlock: 2775 - unlock_page(page); 2798 + folio_unlock(folio); 2776 2799 rcu_read_lock(); 2777 2800 return start; 2778 2801 } 2779 2802 2780 - static inline 2781 - unsigned int seek_page_size(struct xa_state *xas, struct page *page) 2803 + static inline size_t seek_folio_size(struct xa_state *xas, struct folio *folio) 2782 2804 { 2783 - if (xa_is_value(page)) 2805 + if (xa_is_value(folio)) 2784 2806 return PAGE_SIZE << xa_get_order(xas->xa, xas->xa_index); 2785 - return thp_size(page); 2807 + return folio_size(folio); 2786 2808 } 2787 2809 2788 2810 /** ··· 2809 2831 XA_STATE(xas, &mapping->i_pages, start >> PAGE_SHIFT); 2810 2832 pgoff_t max = (end - 1) >> PAGE_SHIFT; 2811 2833 bool seek_data = (whence == SEEK_DATA); 2812 - struct page *page; 2834 + struct folio *folio; 2813 2835 2814 2836 if (end <= start) 2815 2837 return -ENXIO; 2816 2838 2817 2839 rcu_read_lock(); 2818 - while ((page = find_get_entry(&xas, max, XA_PRESENT))) { 2840 + while ((folio = find_get_entry(&xas, max, XA_PRESENT))) { 2819 2841 loff_t pos = (u64)xas.xa_index << PAGE_SHIFT; 2820 - unsigned int seek_size; 2842 + size_t seek_size; 2821 2843 2822 2844 if (start < pos) { 2823 2845 if (!seek_data) ··· 2825 2847 start = pos; 2826 2848 } 2827 2849 2828 - seek_size = seek_page_size(&xas, page); 2829 - pos = round_up(pos + 1, seek_size); 2830 - start = page_seek_hole_data(&xas, mapping, page, start, pos, 2850 + seek_size = seek_folio_size(&xas, folio); 2851 + pos = round_up((u64)pos + 1, seek_size); 2852 + start = folio_seek_hole_data(&xas, mapping, folio, start, pos, 2831 2853 seek_data); 2832 2854 if (start < pos) 2833 2855 goto unlock; ··· 2835 2857 break; 2836 2858 if (seek_size > PAGE_SIZE) 2837 2859 xas_set(&xas, pos >> PAGE_SHIFT); 2838 - if (!xa_is_value(page)) 2839 - put_page(page); 2860 + if (!xa_is_value(folio)) 2861 + folio_put(folio); 2840 2862 } 2841 2863 if (seek_data) 2842 2864 start = -ENXIO; 2843 2865 unlock: 2844 2866 rcu_read_unlock(); 2845 - if (page && !xa_is_value(page)) 2846 - put_page(page); 2867 + if (folio && !xa_is_value(folio)) 2868 + folio_put(folio); 2847 2869 if (start > end) 2848 2870 return end; 2849 2871 return start; ··· 2852 2874 #ifdef CONFIG_MMU 2853 2875 #define MMAP_LOTSAMISS (100) 2854 2876 /* 2855 - * lock_page_maybe_drop_mmap - lock the page, possibly dropping the mmap_lock 2877 + * lock_folio_maybe_drop_mmap - lock the page, possibly dropping the mmap_lock 2856 2878 * @vmf - the vm_fault for this fault. 2857 - * @page - the page to lock. 2879 + * @folio - the folio to lock. 2858 2880 * @fpin - the pointer to the file we may pin (or is already pinned). 2859 2881 * 2860 - * This works similar to lock_page_or_retry in that it can drop the mmap_lock. 2861 - * It differs in that it actually returns the page locked if it returns 1 and 0 2862 - * if it couldn't lock the page. If we did have to drop the mmap_lock then fpin 2863 - * will point to the pinned file and needs to be fput()'ed at a later point. 2882 + * This works similar to lock_folio_or_retry in that it can drop the 2883 + * mmap_lock. It differs in that it actually returns the folio locked 2884 + * if it returns 1 and 0 if it couldn't lock the folio. If we did have 2885 + * to drop the mmap_lock then fpin will point to the pinned file and 2886 + * needs to be fput()'ed at a later point. 2864 2887 */ 2865 - static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page, 2888 + static int lock_folio_maybe_drop_mmap(struct vm_fault *vmf, struct folio *folio, 2866 2889 struct file **fpin) 2867 2890 { 2868 - struct folio *folio = page_folio(page); 2869 - 2870 2891 if (folio_trylock(folio)) 2871 2892 return 1; 2872 2893 ··· 2954 2977 * was pinned if we have to drop the mmap_lock in order to do IO. 2955 2978 */ 2956 2979 static struct file *do_async_mmap_readahead(struct vm_fault *vmf, 2957 - struct page *page) 2980 + struct folio *folio) 2958 2981 { 2959 2982 struct file *file = vmf->vma->vm_file; 2960 2983 struct file_ra_state *ra = &file->f_ra; 2961 - struct address_space *mapping = file->f_mapping; 2984 + DEFINE_READAHEAD(ractl, file, ra, file->f_mapping, vmf->pgoff); 2962 2985 struct file *fpin = NULL; 2963 2986 unsigned int mmap_miss; 2964 - pgoff_t offset = vmf->pgoff; 2965 2987 2966 2988 /* If we don't want any read-ahead, don't bother */ 2967 2989 if (vmf->vma->vm_flags & VM_RAND_READ || !ra->ra_pages) 2968 2990 return fpin; 2991 + 2969 2992 mmap_miss = READ_ONCE(ra->mmap_miss); 2970 2993 if (mmap_miss) 2971 2994 WRITE_ONCE(ra->mmap_miss, --mmap_miss); 2972 - if (PageReadahead(page)) { 2995 + 2996 + if (folio_test_readahead(folio)) { 2973 2997 fpin = maybe_unlock_mmap_for_io(vmf, fpin); 2974 - page_cache_async_readahead(mapping, ra, file, 2975 - page, offset, ra->ra_pages); 2998 + page_cache_async_ra(&ractl, folio, ra->ra_pages); 2976 2999 } 2977 3000 return fpin; 2978 3001 } ··· 2991 3014 * vma->vm_mm->mmap_lock must be held on entry. 2992 3015 * 2993 3016 * If our return value has VM_FAULT_RETRY set, it's because the mmap_lock 2994 - * may be dropped before doing I/O or by lock_page_maybe_drop_mmap(). 3017 + * may be dropped before doing I/O or by lock_folio_maybe_drop_mmap(). 2995 3018 * 2996 3019 * If our return value does not have VM_FAULT_RETRY set, the mmap_lock 2997 3020 * has not been released. ··· 3007 3030 struct file *fpin = NULL; 3008 3031 struct address_space *mapping = file->f_mapping; 3009 3032 struct inode *inode = mapping->host; 3010 - pgoff_t offset = vmf->pgoff; 3011 - pgoff_t max_off; 3012 - struct page *page; 3033 + pgoff_t max_idx, index = vmf->pgoff; 3034 + struct folio *folio; 3013 3035 vm_fault_t ret = 0; 3014 3036 bool mapping_locked = false; 3015 3037 3016 - max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 3017 - if (unlikely(offset >= max_off)) 3038 + max_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 3039 + if (unlikely(index >= max_idx)) 3018 3040 return VM_FAULT_SIGBUS; 3019 3041 3020 3042 /* 3021 3043 * Do we have something in the page cache already? 3022 3044 */ 3023 - page = find_get_page(mapping, offset); 3024 - if (likely(page)) { 3045 + folio = filemap_get_folio(mapping, index); 3046 + if (likely(folio)) { 3025 3047 /* 3026 3048 * We found the page, so try async readahead before waiting for 3027 3049 * the lock. 3028 3050 */ 3029 3051 if (!(vmf->flags & FAULT_FLAG_TRIED)) 3030 - fpin = do_async_mmap_readahead(vmf, page); 3031 - if (unlikely(!PageUptodate(page))) { 3052 + fpin = do_async_mmap_readahead(vmf, folio); 3053 + if (unlikely(!folio_test_uptodate(folio))) { 3032 3054 filemap_invalidate_lock_shared(mapping); 3033 3055 mapping_locked = true; 3034 3056 } ··· 3039 3063 fpin = do_sync_mmap_readahead(vmf); 3040 3064 retry_find: 3041 3065 /* 3042 - * See comment in filemap_create_page() why we need 3066 + * See comment in filemap_create_folio() why we need 3043 3067 * invalidate_lock 3044 3068 */ 3045 3069 if (!mapping_locked) { 3046 3070 filemap_invalidate_lock_shared(mapping); 3047 3071 mapping_locked = true; 3048 3072 } 3049 - page = pagecache_get_page(mapping, offset, 3073 + folio = __filemap_get_folio(mapping, index, 3050 3074 FGP_CREAT|FGP_FOR_MMAP, 3051 3075 vmf->gfp_mask); 3052 - if (!page) { 3076 + if (!folio) { 3053 3077 if (fpin) 3054 3078 goto out_retry; 3055 3079 filemap_invalidate_unlock_shared(mapping); ··· 3057 3081 } 3058 3082 } 3059 3083 3060 - if (!lock_page_maybe_drop_mmap(vmf, page, &fpin)) 3084 + if (!lock_folio_maybe_drop_mmap(vmf, folio, &fpin)) 3061 3085 goto out_retry; 3062 3086 3063 3087 /* Did it get truncated? */ 3064 - if (unlikely(compound_head(page)->mapping != mapping)) { 3065 - unlock_page(page); 3066 - put_page(page); 3088 + if (unlikely(folio->mapping != mapping)) { 3089 + folio_unlock(folio); 3090 + folio_put(folio); 3067 3091 goto retry_find; 3068 3092 } 3069 - VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page); 3093 + VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio); 3070 3094 3071 3095 /* 3072 3096 * We have a locked page in the page cache, now we need to check 3073 3097 * that it's up-to-date. If not, it is going to be due to an error. 3074 3098 */ 3075 - if (unlikely(!PageUptodate(page))) { 3099 + if (unlikely(!folio_test_uptodate(folio))) { 3076 3100 /* 3077 3101 * The page was in cache and uptodate and now it is not. 3078 3102 * Strange but possible since we didn't hold the page lock all ··· 3080 3104 * try again. 3081 3105 */ 3082 3106 if (!mapping_locked) { 3083 - unlock_page(page); 3084 - put_page(page); 3107 + folio_unlock(folio); 3108 + folio_put(folio); 3085 3109 goto retry_find; 3086 3110 } 3087 3111 goto page_not_uptodate; ··· 3093 3117 * redo the fault. 3094 3118 */ 3095 3119 if (fpin) { 3096 - unlock_page(page); 3120 + folio_unlock(folio); 3097 3121 goto out_retry; 3098 3122 } 3099 3123 if (mapping_locked) ··· 3103 3127 * Found the page and have a reference on it. 3104 3128 * We must recheck i_size under page lock. 3105 3129 */ 3106 - max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 3107 - if (unlikely(offset >= max_off)) { 3108 - unlock_page(page); 3109 - put_page(page); 3130 + max_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 3131 + if (unlikely(index >= max_idx)) { 3132 + folio_unlock(folio); 3133 + folio_put(folio); 3110 3134 return VM_FAULT_SIGBUS; 3111 3135 } 3112 3136 3113 - vmf->page = page; 3137 + vmf->page = folio_file_page(folio, index); 3114 3138 return ret | VM_FAULT_LOCKED; 3115 3139 3116 3140 page_not_uptodate: ··· 3121 3145 * and we need to check for errors. 3122 3146 */ 3123 3147 fpin = maybe_unlock_mmap_for_io(vmf, fpin); 3124 - error = filemap_read_page(file, mapping, page); 3148 + error = filemap_read_folio(file, mapping, folio); 3125 3149 if (fpin) 3126 3150 goto out_retry; 3127 - put_page(page); 3151 + folio_put(folio); 3128 3152 3129 3153 if (!error || error == AOP_TRUNCATED_PAGE) 3130 3154 goto retry_find; ··· 3138 3162 * re-find the vma and come back and find our hopefully still populated 3139 3163 * page. 3140 3164 */ 3141 - if (page) 3142 - put_page(page); 3165 + if (folio) 3166 + folio_put(folio); 3143 3167 if (mapping_locked) 3144 3168 filemap_invalidate_unlock_shared(mapping); 3145 3169 if (fpin) ··· 3181 3205 return false; 3182 3206 } 3183 3207 3184 - static struct page *next_uptodate_page(struct page *page, 3208 + static struct folio *next_uptodate_page(struct folio *folio, 3185 3209 struct address_space *mapping, 3186 3210 struct xa_state *xas, pgoff_t end_pgoff) 3187 3211 { 3188 3212 unsigned long max_idx; 3189 3213 3190 3214 do { 3191 - if (!page) 3215 + if (!folio) 3192 3216 return NULL; 3193 - if (xas_retry(xas, page)) 3217 + if (xas_retry(xas, folio)) 3194 3218 continue; 3195 - if (xa_is_value(page)) 3219 + if (xa_is_value(folio)) 3196 3220 continue; 3197 - if (PageLocked(page)) 3221 + if (folio_test_locked(folio)) 3198 3222 continue; 3199 - if (!page_cache_get_speculative(page)) 3223 + if (!folio_try_get_rcu(folio)) 3200 3224 continue; 3201 3225 /* Has the page moved or been split? */ 3202 - if (unlikely(page != xas_reload(xas))) 3226 + if (unlikely(folio != xas_reload(xas))) 3203 3227 goto skip; 3204 - if (!PageUptodate(page) || PageReadahead(page)) 3228 + if (!folio_test_uptodate(folio) || folio_test_readahead(folio)) 3205 3229 goto skip; 3206 - if (!trylock_page(page)) 3230 + if (!folio_trylock(folio)) 3207 3231 goto skip; 3208 - if (page->mapping != mapping) 3232 + if (folio->mapping != mapping) 3209 3233 goto unlock; 3210 - if (!PageUptodate(page)) 3234 + if (!folio_test_uptodate(folio)) 3211 3235 goto unlock; 3212 3236 max_idx = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE); 3213 3237 if (xas->xa_index >= max_idx) 3214 3238 goto unlock; 3215 - return page; 3239 + return folio; 3216 3240 unlock: 3217 - unlock_page(page); 3241 + folio_unlock(folio); 3218 3242 skip: 3219 - put_page(page); 3220 - } while ((page = xas_next_entry(xas, end_pgoff)) != NULL); 3243 + folio_put(folio); 3244 + } while ((folio = xas_next_entry(xas, end_pgoff)) != NULL); 3221 3245 3222 3246 return NULL; 3223 3247 } 3224 3248 3225 - static inline struct page *first_map_page(struct address_space *mapping, 3249 + static inline struct folio *first_map_page(struct address_space *mapping, 3226 3250 struct xa_state *xas, 3227 3251 pgoff_t end_pgoff) 3228 3252 { ··· 3230 3254 mapping, xas, end_pgoff); 3231 3255 } 3232 3256 3233 - static inline struct page *next_map_page(struct address_space *mapping, 3257 + static inline struct folio *next_map_page(struct address_space *mapping, 3234 3258 struct xa_state *xas, 3235 3259 pgoff_t end_pgoff) 3236 3260 { ··· 3247 3271 pgoff_t last_pgoff = start_pgoff; 3248 3272 unsigned long addr; 3249 3273 XA_STATE(xas, &mapping->i_pages, start_pgoff); 3250 - struct page *head, *page; 3274 + struct folio *folio; 3275 + struct page *page; 3251 3276 unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss); 3252 3277 vm_fault_t ret = 0; 3253 3278 3254 3279 rcu_read_lock(); 3255 - head = first_map_page(mapping, &xas, end_pgoff); 3256 - if (!head) 3280 + folio = first_map_page(mapping, &xas, end_pgoff); 3281 + if (!folio) 3257 3282 goto out; 3258 3283 3259 - if (filemap_map_pmd(vmf, head)) { 3284 + if (filemap_map_pmd(vmf, &folio->page)) { 3260 3285 ret = VM_FAULT_NOPAGE; 3261 3286 goto out; 3262 3287 } ··· 3265 3288 addr = vma->vm_start + ((start_pgoff - vma->vm_pgoff) << PAGE_SHIFT); 3266 3289 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, addr, &vmf->ptl); 3267 3290 do { 3268 - page = find_subpage(head, xas.xa_index); 3291 + again: 3292 + page = folio_file_page(folio, xas.xa_index); 3269 3293 if (PageHWPoison(page)) 3270 3294 goto unlock; 3271 3295 ··· 3287 3309 do_set_pte(vmf, page, addr); 3288 3310 /* no need to invalidate: a not-present page won't be cached */ 3289 3311 update_mmu_cache(vma, addr, vmf->pte); 3290 - unlock_page(head); 3312 + if (folio_more_pages(folio, xas.xa_index, end_pgoff)) { 3313 + xas.xa_index++; 3314 + folio_ref_inc(folio); 3315 + goto again; 3316 + } 3317 + folio_unlock(folio); 3291 3318 continue; 3292 3319 unlock: 3293 - unlock_page(head); 3294 - put_page(head); 3295 - } while ((head = next_map_page(mapping, &xas, end_pgoff)) != NULL); 3320 + if (folio_more_pages(folio, xas.xa_index, end_pgoff)) { 3321 + xas.xa_index++; 3322 + goto again; 3323 + } 3324 + folio_unlock(folio); 3325 + folio_put(folio); 3326 + } while ((folio = next_map_page(mapping, &xas, end_pgoff)) != NULL); 3296 3327 pte_unmap_unlock(vmf->pte, vmf->ptl); 3297 3328 out: 3298 3329 rcu_read_unlock(); ··· 3313 3326 vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf) 3314 3327 { 3315 3328 struct address_space *mapping = vmf->vma->vm_file->f_mapping; 3316 - struct page *page = vmf->page; 3329 + struct folio *folio = page_folio(vmf->page); 3317 3330 vm_fault_t ret = VM_FAULT_LOCKED; 3318 3331 3319 3332 sb_start_pagefault(mapping->host->i_sb); 3320 3333 file_update_time(vmf->vma->vm_file); 3321 - lock_page(page); 3322 - if (page->mapping != mapping) { 3323 - unlock_page(page); 3334 + folio_lock(folio); 3335 + if (folio->mapping != mapping) { 3336 + folio_unlock(folio); 3324 3337 ret = VM_FAULT_NOPAGE; 3325 3338 goto out; 3326 3339 } 3327 3340 /* 3328 - * We mark the page dirty already here so that when freeze is in 3341 + * We mark the folio dirty already here so that when freeze is in 3329 3342 * progress, we are guaranteed that writeback during freezing will 3330 - * see the dirty page and writeprotect it again. 3343 + * see the dirty folio and writeprotect it again. 3331 3344 */ 3332 - set_page_dirty(page); 3333 - wait_for_stable_page(page); 3345 + folio_mark_dirty(folio); 3346 + folio_wait_stable(folio); 3334 3347 out: 3335 3348 sb_end_pagefault(mapping->host->i_sb); 3336 3349 return ret; ··· 3383 3396 EXPORT_SYMBOL(generic_file_mmap); 3384 3397 EXPORT_SYMBOL(generic_file_readonly_mmap); 3385 3398 3386 - static struct page *wait_on_page_read(struct page *page) 3399 + static struct folio *do_read_cache_folio(struct address_space *mapping, 3400 + pgoff_t index, filler_t filler, void *data, gfp_t gfp) 3387 3401 { 3388 - if (!IS_ERR(page)) { 3389 - wait_on_page_locked(page); 3390 - if (!PageUptodate(page)) { 3391 - put_page(page); 3392 - page = ERR_PTR(-EIO); 3393 - } 3394 - } 3395 - return page; 3396 - } 3397 - 3398 - static struct page *do_read_cache_page(struct address_space *mapping, 3399 - pgoff_t index, 3400 - int (*filler)(void *, struct page *), 3401 - void *data, 3402 - gfp_t gfp) 3403 - { 3404 - struct page *page; 3402 + struct folio *folio; 3405 3403 int err; 3406 3404 repeat: 3407 - page = find_get_page(mapping, index); 3408 - if (!page) { 3409 - page = __page_cache_alloc(gfp); 3410 - if (!page) 3405 + folio = filemap_get_folio(mapping, index); 3406 + if (!folio) { 3407 + folio = filemap_alloc_folio(gfp, 0); 3408 + if (!folio) 3411 3409 return ERR_PTR(-ENOMEM); 3412 - err = add_to_page_cache_lru(page, mapping, index, gfp); 3410 + err = filemap_add_folio(mapping, folio, index, gfp); 3413 3411 if (unlikely(err)) { 3414 - put_page(page); 3412 + folio_put(folio); 3415 3413 if (err == -EEXIST) 3416 3414 goto repeat; 3417 3415 /* Presumably ENOMEM for xarray node */ ··· 3405 3433 3406 3434 filler: 3407 3435 if (filler) 3408 - err = filler(data, page); 3436 + err = filler(data, &folio->page); 3409 3437 else 3410 - err = mapping->a_ops->readpage(data, page); 3438 + err = mapping->a_ops->readpage(data, &folio->page); 3411 3439 3412 3440 if (err < 0) { 3413 - put_page(page); 3441 + folio_put(folio); 3414 3442 return ERR_PTR(err); 3415 3443 } 3416 3444 3417 - page = wait_on_page_read(page); 3418 - if (IS_ERR(page)) 3419 - return page; 3445 + folio_wait_locked(folio); 3446 + if (!folio_test_uptodate(folio)) { 3447 + folio_put(folio); 3448 + return ERR_PTR(-EIO); 3449 + } 3450 + 3420 3451 goto out; 3421 3452 } 3422 - if (PageUptodate(page)) 3453 + if (folio_test_uptodate(folio)) 3423 3454 goto out; 3424 3455 3425 - /* 3426 - * Page is not up to date and may be locked due to one of the following 3427 - * case a: Page is being filled and the page lock is held 3428 - * case b: Read/write error clearing the page uptodate status 3429 - * case c: Truncation in progress (page locked) 3430 - * case d: Reclaim in progress 3431 - * 3432 - * Case a, the page will be up to date when the page is unlocked. 3433 - * There is no need to serialise on the page lock here as the page 3434 - * is pinned so the lock gives no additional protection. Even if the 3435 - * page is truncated, the data is still valid if PageUptodate as 3436 - * it's a race vs truncate race. 3437 - * Case b, the page will not be up to date 3438 - * Case c, the page may be truncated but in itself, the data may still 3439 - * be valid after IO completes as it's a read vs truncate race. The 3440 - * operation must restart if the page is not uptodate on unlock but 3441 - * otherwise serialising on page lock to stabilise the mapping gives 3442 - * no additional guarantees to the caller as the page lock is 3443 - * released before return. 3444 - * Case d, similar to truncation. If reclaim holds the page lock, it 3445 - * will be a race with remove_mapping that determines if the mapping 3446 - * is valid on unlock but otherwise the data is valid and there is 3447 - * no need to serialise with page lock. 3448 - * 3449 - * As the page lock gives no additional guarantee, we optimistically 3450 - * wait on the page to be unlocked and check if it's up to date and 3451 - * use the page if it is. Otherwise, the page lock is required to 3452 - * distinguish between the different cases. The motivation is that we 3453 - * avoid spurious serialisations and wakeups when multiple processes 3454 - * wait on the same page for IO to complete. 3455 - */ 3456 - wait_on_page_locked(page); 3457 - if (PageUptodate(page)) 3458 - goto out; 3456 + if (!folio_trylock(folio)) { 3457 + folio_put_wait_locked(folio, TASK_UNINTERRUPTIBLE); 3458 + goto repeat; 3459 + } 3459 3460 3460 - /* Distinguish between all the cases under the safety of the lock */ 3461 - lock_page(page); 3462 - 3463 - /* Case c or d, restart the operation */ 3464 - if (!page->mapping) { 3465 - unlock_page(page); 3466 - put_page(page); 3461 + /* Folio was truncated from mapping */ 3462 + if (!folio->mapping) { 3463 + folio_unlock(folio); 3464 + folio_put(folio); 3467 3465 goto repeat; 3468 3466 } 3469 3467 3470 3468 /* Someone else locked and filled the page in a very small window */ 3471 - if (PageUptodate(page)) { 3472 - unlock_page(page); 3469 + if (folio_test_uptodate(folio)) { 3470 + folio_unlock(folio); 3473 3471 goto out; 3474 3472 } 3475 3473 ··· 3449 3507 * Clear page error before actual read, PG_error will be 3450 3508 * set again if read page fails. 3451 3509 */ 3452 - ClearPageError(page); 3510 + folio_clear_error(folio); 3453 3511 goto filler; 3454 3512 3455 3513 out: 3456 - mark_page_accessed(page); 3457 - return page; 3514 + folio_mark_accessed(folio); 3515 + return folio; 3458 3516 } 3459 3517 3460 3518 /** 3461 - * read_cache_page - read into page cache, fill it if needed 3519 + * read_cache_folio - read into page cache, fill it if needed 3462 3520 * @mapping: the page's address_space 3463 3521 * @index: the page index 3464 3522 * @filler: function to perform the read ··· 3473 3531 * 3474 3532 * Return: up to date page on success, ERR_PTR() on failure. 3475 3533 */ 3534 + struct folio *read_cache_folio(struct address_space *mapping, pgoff_t index, 3535 + filler_t filler, void *data) 3536 + { 3537 + return do_read_cache_folio(mapping, index, filler, data, 3538 + mapping_gfp_mask(mapping)); 3539 + } 3540 + EXPORT_SYMBOL(read_cache_folio); 3541 + 3542 + static struct page *do_read_cache_page(struct address_space *mapping, 3543 + pgoff_t index, filler_t *filler, void *data, gfp_t gfp) 3544 + { 3545 + struct folio *folio; 3546 + 3547 + folio = do_read_cache_folio(mapping, index, filler, data, gfp); 3548 + if (IS_ERR(folio)) 3549 + return &folio->page; 3550 + return folio_file_page(folio, index); 3551 + } 3552 + 3476 3553 struct page *read_cache_page(struct address_space *mapping, 3477 - pgoff_t index, 3478 - int (*filler)(void *, struct page *), 3479 - void *data) 3554 + pgoff_t index, filler_t *filler, void *data) 3480 3555 { 3481 3556 return do_read_cache_page(mapping, index, filler, data, 3482 3557 mapping_gfp_mask(mapping)); ··· 3853 3894 EXPORT_SYMBOL(generic_file_write_iter); 3854 3895 3855 3896 /** 3856 - * try_to_release_page() - release old fs-specific metadata on a page 3897 + * filemap_release_folio() - Release fs-specific metadata on a folio. 3898 + * @folio: The folio which the kernel is trying to free. 3899 + * @gfp: Memory allocation flags (and I/O mode). 3857 3900 * 3858 - * @page: the page which the kernel is trying to free 3859 - * @gfp_mask: memory allocation flags (and I/O mode) 3901 + * The address_space is trying to release any data attached to a folio 3902 + * (presumably at folio->private). 3860 3903 * 3861 - * The address_space is to try to release any data against the page 3862 - * (presumably at page->private). 3904 + * This will also be called if the private_2 flag is set on a page, 3905 + * indicating that the folio has other metadata associated with it. 3863 3906 * 3864 - * This may also be called if PG_fscache is set on a page, indicating that the 3865 - * page is known to the local caching routines. 3907 + * The @gfp argument specifies whether I/O may be performed to release 3908 + * this page (__GFP_IO), and whether the call may block 3909 + * (__GFP_RECLAIM & __GFP_FS). 3866 3910 * 3867 - * The @gfp_mask argument specifies whether I/O may be performed to release 3868 - * this page (__GFP_IO), and whether the call may block (__GFP_RECLAIM & __GFP_FS). 3869 - * 3870 - * Return: %1 if the release was successful, otherwise return zero. 3911 + * Return: %true if the release was successful, otherwise %false. 3871 3912 */ 3872 - int try_to_release_page(struct page *page, gfp_t gfp_mask) 3913 + bool filemap_release_folio(struct folio *folio, gfp_t gfp) 3873 3914 { 3874 - struct address_space * const mapping = page->mapping; 3915 + struct address_space * const mapping = folio->mapping; 3875 3916 3876 - BUG_ON(!PageLocked(page)); 3877 - if (PageWriteback(page)) 3878 - return 0; 3917 + BUG_ON(!folio_test_locked(folio)); 3918 + if (folio_test_writeback(folio)) 3919 + return false; 3879 3920 3880 3921 if (mapping && mapping->a_ops->releasepage) 3881 - return mapping->a_ops->releasepage(page, gfp_mask); 3882 - return try_to_free_buffers(page); 3922 + return mapping->a_ops->releasepage(&folio->page, gfp); 3923 + return try_to_free_buffers(&folio->page); 3883 3924 } 3884 - 3885 - EXPORT_SYMBOL(try_to_release_page); 3925 + EXPORT_SYMBOL(filemap_release_folio);

+11

mm/folio-compat.c

··· 140 140 mapping_gfp_mask(mapping)); 141 141 } 142 142 EXPORT_SYMBOL(grab_cache_page_write_begin); 143 + 144 + void delete_from_page_cache(struct page *page) 145 + { 146 + return filemap_remove_folio(page_folio(page)); 147 + } 148 + 149 + int try_to_release_page(struct page *page, gfp_t gfp) 150 + { 151 + return filemap_release_folio(page_folio(page), gfp); 152 + } 153 + EXPORT_SYMBOL(try_to_release_page);

+14 -4

mm/huge_memory.c

··· 2614 2614 { 2615 2615 struct page *head = compound_head(page); 2616 2616 struct deferred_split *ds_queue = get_deferred_split_queue(head); 2617 + XA_STATE(xas, &head->mapping->i_pages, head->index); 2617 2618 struct anon_vma *anon_vma = NULL; 2618 2619 struct address_space *mapping = NULL; 2619 2620 int extra_pins, ret; ··· 2653 2652 goto out; 2654 2653 } 2655 2654 2655 + xas_split_alloc(&xas, head, compound_order(head), 2656 + mapping_gfp_mask(mapping) & GFP_RECLAIM_MASK); 2657 + if (xas_error(&xas)) { 2658 + ret = xas_error(&xas); 2659 + goto out; 2660 + } 2661 + 2656 2662 anon_vma = NULL; 2657 2663 i_mmap_lock_read(mapping); 2658 2664 ··· 2689 2681 /* block interrupt reentry in xa_lock and spinlock */ 2690 2682 local_irq_disable(); 2691 2683 if (mapping) { 2692 - XA_STATE(xas, &mapping->i_pages, page_index(head)); 2693 - 2694 2684 /* 2695 2685 * Check if the head page is present in page cache. 2696 2686 * We assume all tail are present too, if head is there. 2697 2687 */ 2698 - xa_lock(&mapping->i_pages); 2688 + xas_lock(&xas); 2689 + xas_reset(&xas); 2699 2690 if (xas_load(&xas) != head) 2700 2691 goto fail; 2701 2692 } ··· 2710 2703 if (mapping) { 2711 2704 int nr = thp_nr_pages(head); 2712 2705 2706 + xas_split(&xas, head, thp_order(head)); 2713 2707 if (PageSwapBacked(head)) { 2714 2708 __mod_lruvec_page_state(head, NR_SHMEM_THPS, 2715 2709 -nr); ··· 2727 2719 spin_unlock(&ds_queue->split_queue_lock); 2728 2720 fail: 2729 2721 if (mapping) 2730 - xa_unlock(&mapping->i_pages); 2722 + xas_unlock(&xas); 2731 2723 local_irq_enable(); 2732 2724 remap_page(head, thp_nr_pages(head)); 2733 2725 ret = -EBUSY; ··· 2741 2733 if (mapping) 2742 2734 i_mmap_unlock_read(mapping); 2743 2735 out: 2736 + /* Free any memory we didn't use */ 2737 + xas_nomem(&xas, 0); 2744 2738 count_vm_event(!ret ? THP_SPLIT_PAGE : THP_SPLIT_PAGE_FAILED); 2745 2739 return ret; 2746 2740 }

+12 -2

mm/internal.h

··· 12 12 #include <linux/pagemap.h> 13 13 #include <linux/tracepoint-defs.h> 14 14 15 + struct folio_batch; 16 + 15 17 /* 16 18 * The set of flags that only affect watermark checking and reclaim 17 19 * behaviour. This is used by the MM to obey the caller constraints ··· 76 74 return !(vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP)); 77 75 } 78 76 77 + struct zap_details; 79 78 void unmap_page_range(struct mmu_gather *tlb, 80 79 struct vm_area_struct *vma, 81 80 unsigned long addr, unsigned long end, ··· 93 90 } 94 91 95 92 unsigned find_lock_entries(struct address_space *mapping, pgoff_t start, 96 - pgoff_t end, struct pagevec *pvec, pgoff_t *indices); 93 + pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices); 94 + unsigned find_get_entries(struct address_space *mapping, pgoff_t start, 95 + pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices); 96 + void filemap_free_folio(struct address_space *mapping, struct folio *folio); 97 + int truncate_inode_folio(struct address_space *mapping, struct folio *folio); 98 + bool truncate_inode_partial_folio(struct folio *folio, loff_t start, 99 + loff_t end); 97 100 98 101 /** 99 102 * folio_evictable - Test whether a folio is evictable. ··· 397 388 void __vma_unlink_list(struct mm_struct *mm, struct vm_area_struct *vma); 398 389 399 390 #ifdef CONFIG_MMU 391 + void unmap_mapping_folio(struct folio *folio); 400 392 extern long populate_vma_page_range(struct vm_area_struct *vma, 401 393 unsigned long start, unsigned long end, int *locked); 402 394 extern long faultin_vma_page_range(struct vm_area_struct *vma, ··· 501 491 } 502 492 return fpin; 503 493 } 504 - 505 494 #else /* !CONFIG_MMU */ 495 + static inline void unmap_mapping_folio(struct folio *folio) { } 506 496 static inline void clear_page_mlock(struct page *page) { } 507 497 static inline void mlock_vma_page(struct page *page) { } 508 498 static inline void vunmap_range_noflush(unsigned long start, unsigned long end)

+11 -1

mm/khugepaged.c

··· 1667 1667 } 1668 1668 count_memcg_page_event(new_page, THP_COLLAPSE_ALLOC); 1669 1669 1670 - /* This will be less messy when we use multi-index entries */ 1670 + /* 1671 + * Ensure we have slots for all the pages in the range. This is 1672 + * almost certainly a no-op because most of the pages must be present 1673 + */ 1671 1674 do { 1672 1675 xas_lock_irq(&xas); 1673 1676 xas_create_range(&xas); ··· 1895 1892 __mod_lruvec_page_state(new_page, NR_SHMEM, nr_none); 1896 1893 } 1897 1894 1895 + /* Join all the small entries into a single multi-index entry */ 1896 + xas_set_order(&xas, start, HPAGE_PMD_ORDER); 1897 + xas_store(&xas, new_page); 1898 1898 xa_locked: 1899 1899 xas_unlock_irq(&xas); 1900 1900 xa_unlocked: ··· 2019 2013 continue; 2020 2014 } 2021 2015 2016 + /* 2017 + * XXX: khugepaged should compact smaller compound pages 2018 + * into a PMD sized page 2019 + */ 2022 2020 if (PageTransCompound(page)) { 2023 2021 result = SCAN_PAGE_COMPOUND; 2024 2022 break;

+35 -14

mm/memory.c

··· 1304 1304 return ret; 1305 1305 } 1306 1306 1307 + /* 1308 + * Parameter block passed down to zap_pte_range in exceptional cases. 1309 + */ 1310 + struct zap_details { 1311 + struct address_space *zap_mapping; /* Check page->mapping if set */ 1312 + struct folio *single_folio; /* Locked folio to be unmapped */ 1313 + }; 1314 + 1315 + /* 1316 + * We set details->zap_mapping when we want to unmap shared but keep private 1317 + * pages. Return true if skip zapping this page, false otherwise. 1318 + */ 1319 + static inline bool 1320 + zap_skip_check_mapping(struct zap_details *details, struct page *page) 1321 + { 1322 + if (!details || !page) 1323 + return false; 1324 + 1325 + return details->zap_mapping && 1326 + (details->zap_mapping != page_rmapping(page)); 1327 + } 1328 + 1307 1329 static unsigned long zap_pte_range(struct mmu_gather *tlb, 1308 1330 struct vm_area_struct *vma, pmd_t *pmd, 1309 1331 unsigned long addr, unsigned long end, ··· 1465 1443 else if (zap_huge_pmd(tlb, vma, pmd, addr)) 1466 1444 goto next; 1467 1445 /* fall through */ 1468 - } else if (details && details->single_page && 1469 - PageTransCompound(details->single_page) && 1446 + } else if (details && details->single_folio && 1447 + folio_test_pmd_mappable(details->single_folio) && 1470 1448 next - addr == HPAGE_PMD_SIZE && pmd_none(*pmd)) { 1471 1449 spinlock_t *ptl = pmd_lock(tlb->mm, pmd); 1472 1450 /* ··· 3354 3332 } 3355 3333 3356 3334 /** 3357 - * unmap_mapping_page() - Unmap single page from processes. 3358 - * @page: The locked page to be unmapped. 3335 + * unmap_mapping_folio() - Unmap single folio from processes. 3336 + * @folio: The locked folio to be unmapped. 3359 3337 * 3360 - * Unmap this page from any userspace process which still has it mmaped. 3338 + * Unmap this folio from any userspace process which still has it mmaped. 3361 3339 * Typically, for efficiency, the range of nearby pages has already been 3362 3340 * unmapped by unmap_mapping_pages() or unmap_mapping_range(). But once 3363 - * truncation or invalidation holds the lock on a page, it may find that 3364 - * the page has been remapped again: and then uses unmap_mapping_page() 3341 + * truncation or invalidation holds the lock on a folio, it may find that 3342 + * the page has been remapped again: and then uses unmap_mapping_folio() 3365 3343 * to unmap it finally. 3366 3344 */ 3367 - void unmap_mapping_page(struct page *page) 3345 + void unmap_mapping_folio(struct folio *folio) 3368 3346 { 3369 - struct address_space *mapping = page->mapping; 3347 + struct address_space *mapping = folio->mapping; 3370 3348 struct zap_details details = { }; 3371 3349 pgoff_t first_index; 3372 3350 pgoff_t last_index; 3373 3351 3374 - VM_BUG_ON(!PageLocked(page)); 3375 - VM_BUG_ON(PageTail(page)); 3352 + VM_BUG_ON(!folio_test_locked(folio)); 3376 3353 3377 - first_index = page->index; 3378 - last_index = page->index + thp_nr_pages(page) - 1; 3354 + first_index = folio->index; 3355 + last_index = folio->index + folio_nr_pages(folio) - 1; 3379 3356 3380 3357 details.zap_mapping = mapping; 3381 - details.single_page = page; 3358 + details.single_folio = folio; 3382 3359 3383 3360 i_mmap_lock_write(mapping); 3384 3361 if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))

+10 -19

mm/migrate.c

··· 291 291 { 292 292 pte_t pte; 293 293 swp_entry_t entry; 294 - struct page *page; 294 + struct folio *folio; 295 295 296 296 spin_lock(ptl); 297 297 pte = *ptep; ··· 302 302 if (!is_migration_entry(entry)) 303 303 goto out; 304 304 305 - page = pfn_swap_entry_to_page(entry); 306 - page = compound_head(page); 305 + folio = page_folio(pfn_swap_entry_to_page(entry)); 307 306 308 307 /* 309 308 * Once page cache replacement of page migration started, page_count 310 - * is zero; but we must not call put_and_wait_on_page_locked() without 311 - * a ref. Use get_page_unless_zero(), and just fault again if it fails. 309 + * is zero; but we must not call folio_put_wait_locked() without 310 + * a ref. Use folio_try_get(), and just fault again if it fails. 312 311 */ 313 - if (!get_page_unless_zero(page)) 312 + if (!folio_try_get(folio)) 314 313 goto out; 315 314 pte_unmap_unlock(ptep, ptl); 316 - put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE); 315 + folio_put_wait_locked(folio, TASK_UNINTERRUPTIBLE); 317 316 return; 318 317 out: 319 318 pte_unmap_unlock(ptep, ptl); ··· 337 338 void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd) 338 339 { 339 340 spinlock_t *ptl; 340 - struct page *page; 341 + struct folio *folio; 341 342 342 343 ptl = pmd_lock(mm, pmd); 343 344 if (!is_pmd_migration_entry(*pmd)) 344 345 goto unlock; 345 - page = pfn_swap_entry_to_page(pmd_to_swp_entry(*pmd)); 346 - if (!get_page_unless_zero(page)) 346 + folio = page_folio(pfn_swap_entry_to_page(pmd_to_swp_entry(*pmd))); 347 + if (!folio_try_get(folio)) 347 348 goto unlock; 348 349 spin_unlock(ptl); 349 - put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE); 350 + folio_put_wait_locked(folio, TASK_UNINTERRUPTIBLE); 350 351 return; 351 352 unlock: 352 353 spin_unlock(ptl); ··· 433 434 } 434 435 435 436 xas_store(&xas, newfolio); 436 - if (nr > 1) { 437 - int i; 438 - 439 - for (i = 1; i < nr; i++) { 440 - xas_next(&xas); 441 - xas_store(&xas, newfolio); 442 - } 443 - } 444 437 445 438 /* 446 439 * Drop cache reference from old page by unfreezing

+5 -1

mm/page-writeback.c

··· 2496 2496 * If warn is true, then emit a warning if the folio is not uptodate and has 2497 2497 * not been truncated. 2498 2498 * 2499 - * The caller must hold lock_page_memcg(). 2499 + * The caller must hold lock_page_memcg(). Most callers have the folio 2500 + * locked. A few have the folio blocked from truncation through other 2501 + * means (eg zap_page_range() has it mapped and is holding the page table 2502 + * lock). This can also be called from mark_buffer_dirty(), which I 2503 + * cannot prove is always protected against truncate. 2500 2504 */ 2501 2505 void __folio_mark_dirty(struct folio *folio, struct address_space *mapping, 2502 2506 int warn)

+12 -12

mm/readahead.c

··· 196 196 * Preallocate as many pages as we will need. 197 197 */ 198 198 for (i = 0; i < nr_to_read; i++) { 199 - struct page *page = xa_load(&mapping->i_pages, index + i); 199 + struct folio *folio = xa_load(&mapping->i_pages, index + i); 200 200 201 - if (page && !xa_is_value(page)) { 201 + if (folio && !xa_is_value(folio)) { 202 202 /* 203 203 * Page already present? Kick off the current batch 204 204 * of contiguous pages before continuing with the ··· 212 212 continue; 213 213 } 214 214 215 - page = __page_cache_alloc(gfp_mask); 216 - if (!page) 215 + folio = filemap_alloc_folio(gfp_mask, 0); 216 + if (!folio) 217 217 break; 218 218 if (mapping->a_ops->readpages) { 219 - page->index = index + i; 220 - list_add(&page->lru, &page_pool); 221 - } else if (add_to_page_cache_lru(page, mapping, index + i, 219 + folio->index = index + i; 220 + list_add(&folio->lru, &page_pool); 221 + } else if (filemap_add_folio(mapping, folio, index + i, 222 222 gfp_mask) < 0) { 223 - put_page(page); 223 + folio_put(folio); 224 224 read_pages(ractl, &page_pool, true); 225 225 i = ractl->_index + ractl->_nr_pages - index - 1; 226 226 continue; 227 227 } 228 228 if (i == nr_to_read - lookahead_size) 229 - SetPageReadahead(page); 229 + folio_set_readahead(folio); 230 230 ractl->_nr_pages++; 231 231 } 232 232 ··· 581 581 EXPORT_SYMBOL_GPL(page_cache_sync_ra); 582 582 583 583 void page_cache_async_ra(struct readahead_control *ractl, 584 - struct page *page, unsigned long req_count) 584 + struct folio *folio, unsigned long req_count) 585 585 { 586 586 /* no read-ahead */ 587 587 if (!ractl->ra->ra_pages) ··· 590 590 /* 591 591 * Same bit is used for PG_readahead and PG_reclaim. 592 592 */ 593 - if (PageWriteback(page)) 593 + if (folio_test_writeback(folio)) 594 594 return; 595 595 596 - ClearPageReadahead(page); 596 + folio_clear_readahead(folio); 597 597 598 598 /* 599 599 * Defer asynchronous read-ahead on IO congestion.

+78 -96

mm/shmem.c

··· 694 694 struct mm_struct *charge_mm) 695 695 { 696 696 XA_STATE_ORDER(xas, &mapping->i_pages, index, compound_order(page)); 697 - unsigned long i = 0; 698 697 unsigned long nr = compound_nr(page); 699 698 int error; 700 699 ··· 720 721 cgroup_throttle_swaprate(page, gfp); 721 722 722 723 do { 723 - void *entry; 724 724 xas_lock_irq(&xas); 725 - entry = xas_find_conflict(&xas); 726 - if (entry != expected) 725 + if (expected != xas_find_conflict(&xas)) { 727 726 xas_set_err(&xas, -EEXIST); 728 - xas_create_range(&xas); 727 + goto unlock; 728 + } 729 + if (expected && xas_find_conflict(&xas)) { 730 + xas_set_err(&xas, -EEXIST); 731 + goto unlock; 732 + } 733 + xas_store(&xas, page); 729 734 if (xas_error(&xas)) 730 735 goto unlock; 731 - next: 732 - xas_store(&xas, page); 733 - if (++i < nr) { 734 - xas_next(&xas); 735 - goto next; 736 - } 737 736 if (PageTransHuge(page)) { 738 737 count_vm_event(THP_FILE_ALLOC); 739 738 __mod_lruvec_page_state(page, NR_SHMEM_THPS, nr); ··· 877 880 } 878 881 } 879 882 880 - /* 881 - * Check whether a hole-punch or truncation needs to split a huge page, 882 - * returning true if no split was required, or the split has been successful. 883 - * 884 - * Eviction (or truncation to 0 size) should never need to split a huge page; 885 - * but in rare cases might do so, if shmem_undo_range() failed to trylock on 886 - * head, and then succeeded to trylock on tail. 887 - * 888 - * A split can only succeed when there are no additional references on the 889 - * huge page: so the split below relies upon find_get_entries() having stopped 890 - * when it found a subpage of the huge page, without getting further references. 891 - */ 892 - static bool shmem_punch_compound(struct page *page, pgoff_t start, pgoff_t end) 883 + static struct folio *shmem_get_partial_folio(struct inode *inode, pgoff_t index) 893 884 { 894 - if (!PageTransCompound(page)) 895 - return true; 885 + struct folio *folio; 886 + struct page *page; 896 887 897 - /* Just proceed to delete a huge page wholly within the range punched */ 898 - if (PageHead(page) && 899 - page->index >= start && page->index + HPAGE_PMD_NR <= end) 900 - return true; 901 - 902 - /* Try to split huge page, so we can truly punch the hole or truncate */ 903 - return split_huge_page(page) >= 0; 888 + /* 889 + * At first avoid shmem_getpage(,,,SGP_READ): that fails 890 + * beyond i_size, and reports fallocated pages as holes. 891 + */ 892 + folio = __filemap_get_folio(inode->i_mapping, index, 893 + FGP_ENTRY | FGP_LOCK, 0); 894 + if (!xa_is_value(folio)) 895 + return folio; 896 + /* 897 + * But read a page back from swap if any of it is within i_size 898 + * (although in some cases this is just a waste of time). 899 + */ 900 + page = NULL; 901 + shmem_getpage(inode, index, &page, SGP_READ); 902 + return page ? page_folio(page) : NULL; 904 903 } 905 904 906 905 /* ··· 910 917 struct shmem_inode_info *info = SHMEM_I(inode); 911 918 pgoff_t start = (lstart + PAGE_SIZE - 1) >> PAGE_SHIFT; 912 919 pgoff_t end = (lend + 1) >> PAGE_SHIFT; 913 - unsigned int partial_start = lstart & (PAGE_SIZE - 1); 914 - unsigned int partial_end = (lend + 1) & (PAGE_SIZE - 1); 915 - struct pagevec pvec; 920 + struct folio_batch fbatch; 916 921 pgoff_t indices[PAGEVEC_SIZE]; 922 + struct folio *folio; 923 + bool same_folio; 917 924 long nr_swaps_freed = 0; 918 925 pgoff_t index; 919 926 int i; ··· 924 931 if (info->fallocend > start && info->fallocend <= end && !unfalloc) 925 932 info->fallocend = start; 926 933 927 - pagevec_init(&pvec); 934 + folio_batch_init(&fbatch); 928 935 index = start; 929 936 while (index < end && find_lock_entries(mapping, index, end - 1, 930 - &pvec, indices)) { 931 - for (i = 0; i < pagevec_count(&pvec); i++) { 932 - struct page *page = pvec.pages[i]; 937 + &fbatch, indices)) { 938 + for (i = 0; i < folio_batch_count(&fbatch); i++) { 939 + folio = fbatch.folios[i]; 933 940 934 941 index = indices[i]; 935 942 936 - if (xa_is_value(page)) { 943 + if (xa_is_value(folio)) { 937 944 if (unfalloc) 938 945 continue; 939 946 nr_swaps_freed += !shmem_free_swap(mapping, 940 - index, page); 947 + index, folio); 941 948 continue; 942 949 } 943 - index += thp_nr_pages(page) - 1; 950 + index += folio_nr_pages(folio) - 1; 944 951 945 - if (!unfalloc || !PageUptodate(page)) 946 - truncate_inode_page(mapping, page); 947 - unlock_page(page); 952 + if (!unfalloc || !folio_test_uptodate(folio)) 953 + truncate_inode_folio(mapping, folio); 954 + folio_unlock(folio); 948 955 } 949 - pagevec_remove_exceptionals(&pvec); 950 - pagevec_release(&pvec); 956 + folio_batch_remove_exceptionals(&fbatch); 957 + folio_batch_release(&fbatch); 951 958 cond_resched(); 952 959 index++; 953 960 } 954 961 955 - if (partial_start) { 956 - struct page *page = NULL; 957 - shmem_getpage(inode, start - 1, &page, SGP_READ); 958 - if (page) { 959 - unsigned int top = PAGE_SIZE; 960 - if (start > end) { 961 - top = partial_end; 962 - partial_end = 0; 963 - } 964 - zero_user_segment(page, partial_start, top); 965 - set_page_dirty(page); 966 - unlock_page(page); 967 - put_page(page); 962 + same_folio = (lstart >> PAGE_SHIFT) == (lend >> PAGE_SHIFT); 963 + folio = shmem_get_partial_folio(inode, lstart >> PAGE_SHIFT); 964 + if (folio) { 965 + same_folio = lend < folio_pos(folio) + folio_size(folio); 966 + folio_mark_dirty(folio); 967 + if (!truncate_inode_partial_folio(folio, lstart, lend)) { 968 + start = folio->index + folio_nr_pages(folio); 969 + if (same_folio) 970 + end = folio->index; 968 971 } 972 + folio_unlock(folio); 973 + folio_put(folio); 974 + folio = NULL; 969 975 } 970 - if (partial_end) { 971 - struct page *page = NULL; 972 - shmem_getpage(inode, end, &page, SGP_READ); 973 - if (page) { 974 - zero_user_segment(page, 0, partial_end); 975 - set_page_dirty(page); 976 - unlock_page(page); 977 - put_page(page); 978 - } 976 + 977 + if (!same_folio) 978 + folio = shmem_get_partial_folio(inode, lend >> PAGE_SHIFT); 979 + if (folio) { 980 + folio_mark_dirty(folio); 981 + if (!truncate_inode_partial_folio(folio, lstart, lend)) 982 + end = folio->index; 983 + folio_unlock(folio); 984 + folio_put(folio); 979 985 } 980 - if (start >= end) 981 - return; 982 986 983 987 index = start; 984 988 while (index < end) { 985 989 cond_resched(); 986 990 987 - if (!find_get_entries(mapping, index, end - 1, &pvec, 991 + if (!find_get_entries(mapping, index, end - 1, &fbatch, 988 992 indices)) { 989 993 /* If all gone or hole-punch or unfalloc, we're done */ 990 994 if (index == start || end != -1) ··· 990 1000 index = start; 991 1001 continue; 992 1002 } 993 - for (i = 0; i < pagevec_count(&pvec); i++) { 994 - struct page *page = pvec.pages[i]; 1003 + for (i = 0; i < folio_batch_count(&fbatch); i++) { 1004 + folio = fbatch.folios[i]; 995 1005 996 1006 index = indices[i]; 997 - if (xa_is_value(page)) { 1007 + if (xa_is_value(folio)) { 998 1008 if (unfalloc) 999 1009 continue; 1000 - if (shmem_free_swap(mapping, index, page)) { 1010 + if (shmem_free_swap(mapping, index, folio)) { 1001 1011 /* Swap was replaced by page: retry */ 1002 1012 index--; 1003 1013 break; ··· 1006 1016 continue; 1007 1017 } 1008 1018 1009 - lock_page(page); 1019 + folio_lock(folio); 1010 1020 1011 - if (!unfalloc || !PageUptodate(page)) { 1012 - if (page_mapping(page) != mapping) { 1021 + if (!unfalloc || !folio_test_uptodate(folio)) { 1022 + if (folio_mapping(folio) != mapping) { 1013 1023 /* Page was replaced by swap: retry */ 1014 - unlock_page(page); 1024 + folio_unlock(folio); 1015 1025 index--; 1016 1026 break; 1017 1027 } 1018 - VM_BUG_ON_PAGE(PageWriteback(page), page); 1019 - if (shmem_punch_compound(page, start, end)) 1020 - truncate_inode_page(mapping, page); 1021 - else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { 1022 - /* Wipe the page and don't get stuck */ 1023 - clear_highpage(page); 1024 - flush_dcache_page(page); 1025 - set_page_dirty(page); 1026 - if (index < 1027 - round_up(start, HPAGE_PMD_NR)) 1028 - start = index + 1; 1029 - } 1028 + VM_BUG_ON_FOLIO(folio_test_writeback(folio), 1029 + folio); 1030 + truncate_inode_folio(mapping, folio); 1030 1031 } 1031 - unlock_page(page); 1032 + index = folio->index + folio_nr_pages(folio) - 1; 1033 + folio_unlock(folio); 1032 1034 } 1033 - pagevec_remove_exceptionals(&pvec); 1034 - pagevec_release(&pvec); 1035 + folio_batch_remove_exceptionals(&fbatch); 1036 + folio_batch_release(&fbatch); 1035 1037 index++; 1036 1038 } 1037 1039

+13 -13

mm/swap.c

··· 1077 1077 } 1078 1078 1079 1079 /** 1080 - * pagevec_remove_exceptionals - pagevec exceptionals pruning 1081 - * @pvec: The pagevec to prune 1080 + * folio_batch_remove_exceptionals() - Prune non-folios from a batch. 1081 + * @fbatch: The batch to prune 1082 1082 * 1083 - * find_get_entries() fills both pages and XArray value entries (aka 1084 - * exceptional entries) into the pagevec. This function prunes all 1085 - * exceptionals from @pvec without leaving holes, so that it can be 1086 - * passed on to page-only pagevec operations. 1083 + * find_get_entries() fills a batch with both folios and shadow/swap/DAX 1084 + * entries. This function prunes all the non-folio entries from @fbatch 1085 + * without leaving holes, so that it can be passed on to folio-only batch 1086 + * operations. 1087 1087 */ 1088 - void pagevec_remove_exceptionals(struct pagevec *pvec) 1088 + void folio_batch_remove_exceptionals(struct folio_batch *fbatch) 1089 1089 { 1090 - int i, j; 1090 + unsigned int i, j; 1091 1091 1092 - for (i = 0, j = 0; i < pagevec_count(pvec); i++) { 1093 - struct page *page = pvec->pages[i]; 1094 - if (!xa_is_value(page)) 1095 - pvec->pages[j++] = page; 1092 + for (i = 0, j = 0; i < folio_batch_count(fbatch); i++) { 1093 + struct folio *folio = fbatch->folios[i]; 1094 + if (!xa_is_value(folio)) 1095 + fbatch->folios[j++] = folio; 1096 1096 } 1097 - pvec->nr = j; 1097 + fbatch->nr = j; 1098 1098 } 1099 1099 1100 1100 /**

+168 -136

mm/truncate.c

··· 56 56 57 57 /* 58 58 * Unconditionally remove exceptional entries. Usually called from truncate 59 - * path. Note that the pagevec may be altered by this function by removing 60 - * exceptional entries similar to what pagevec_remove_exceptionals does. 59 + * path. Note that the folio_batch may be altered by this function by removing 60 + * exceptional entries similar to what folio_batch_remove_exceptionals() does. 61 61 */ 62 - static void truncate_exceptional_pvec_entries(struct address_space *mapping, 63 - struct pagevec *pvec, pgoff_t *indices) 62 + static void truncate_folio_batch_exceptionals(struct address_space *mapping, 63 + struct folio_batch *fbatch, pgoff_t *indices) 64 64 { 65 65 int i, j; 66 66 bool dax; ··· 69 69 if (shmem_mapping(mapping)) 70 70 return; 71 71 72 - for (j = 0; j < pagevec_count(pvec); j++) 73 - if (xa_is_value(pvec->pages[j])) 72 + for (j = 0; j < folio_batch_count(fbatch); j++) 73 + if (xa_is_value(fbatch->folios[j])) 74 74 break; 75 75 76 - if (j == pagevec_count(pvec)) 76 + if (j == folio_batch_count(fbatch)) 77 77 return; 78 78 79 79 dax = dax_mapping(mapping); ··· 82 82 xa_lock_irq(&mapping->i_pages); 83 83 } 84 84 85 - for (i = j; i < pagevec_count(pvec); i++) { 86 - struct page *page = pvec->pages[i]; 85 + for (i = j; i < folio_batch_count(fbatch); i++) { 86 + struct folio *folio = fbatch->folios[i]; 87 87 pgoff_t index = indices[i]; 88 88 89 - if (!xa_is_value(page)) { 90 - pvec->pages[j++] = page; 89 + if (!xa_is_value(folio)) { 90 + fbatch->folios[j++] = folio; 91 91 continue; 92 92 } 93 93 ··· 96 96 continue; 97 97 } 98 98 99 - __clear_shadow_entry(mapping, index, page); 99 + __clear_shadow_entry(mapping, index, folio); 100 100 } 101 101 102 102 if (!dax) { ··· 105 105 inode_add_lru(mapping->host); 106 106 spin_unlock(&mapping->host->i_lock); 107 107 } 108 - pvec->nr = j; 108 + fbatch->nr = j; 109 109 } 110 110 111 111 /* ··· 177 177 * its lock, b) when a concurrent invalidate_mapping_pages got there first and 178 178 * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space. 179 179 */ 180 - static void truncate_cleanup_page(struct page *page) 180 + static void truncate_cleanup_folio(struct folio *folio) 181 181 { 182 - if (page_mapped(page)) 183 - unmap_mapping_page(page); 182 + if (folio_mapped(folio)) 183 + unmap_mapping_folio(folio); 184 184 185 - if (page_has_private(page)) 186 - do_invalidatepage(page, 0, thp_size(page)); 185 + if (folio_has_private(folio)) 186 + do_invalidatepage(&folio->page, 0, folio_size(folio)); 187 187 188 188 /* 189 189 * Some filesystems seem to re-dirty the page even after 190 190 * the VM has canceled the dirty bit (eg ext3 journaling). 191 191 * Hence dirty accounting check is placed after invalidation. 192 192 */ 193 - cancel_dirty_page(page); 194 - ClearPageMappedToDisk(page); 193 + folio_cancel_dirty(folio); 194 + folio_clear_mappedtodisk(folio); 195 195 } 196 196 197 197 /* ··· 218 218 return ret; 219 219 } 220 220 221 - int truncate_inode_page(struct address_space *mapping, struct page *page) 221 + int truncate_inode_folio(struct address_space *mapping, struct folio *folio) 222 222 { 223 - VM_BUG_ON_PAGE(PageTail(page), page); 224 - 225 - if (page->mapping != mapping) 223 + if (folio->mapping != mapping) 226 224 return -EIO; 227 225 228 - truncate_cleanup_page(page); 229 - delete_from_page_cache(page); 226 + truncate_cleanup_folio(folio); 227 + filemap_remove_folio(folio); 230 228 return 0; 229 + } 230 + 231 + /* 232 + * Handle partial folios. The folio may be entirely within the 233 + * range if a split has raced with us. If not, we zero the part of the 234 + * folio that's within the [start, end] range, and then split the folio if 235 + * it's large. split_page_range() will discard pages which now lie beyond 236 + * i_size, and we rely on the caller to discard pages which lie within a 237 + * newly created hole. 238 + * 239 + * Returns false if splitting failed so the caller can avoid 240 + * discarding the entire folio which is stubbornly unsplit. 241 + */ 242 + bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) 243 + { 244 + loff_t pos = folio_pos(folio); 245 + unsigned int offset, length; 246 + 247 + if (pos < start) 248 + offset = start - pos; 249 + else 250 + offset = 0; 251 + length = folio_size(folio); 252 + if (pos + length <= (u64)end) 253 + length = length - offset; 254 + else 255 + length = end + 1 - pos - offset; 256 + 257 + folio_wait_writeback(folio); 258 + if (length == folio_size(folio)) { 259 + truncate_inode_folio(folio->mapping, folio); 260 + return true; 261 + } 262 + 263 + /* 264 + * We may be zeroing pages we're about to discard, but it avoids 265 + * doing a complex calculation here, and then doing the zeroing 266 + * anyway if the page split fails. 267 + */ 268 + folio_zero_range(folio, offset, length); 269 + 270 + cleancache_invalidate_page(folio->mapping, &folio->page); 271 + if (folio_has_private(folio)) 272 + do_invalidatepage(&folio->page, offset, length); 273 + if (!folio_test_large(folio)) 274 + return true; 275 + if (split_huge_page(&folio->page) == 0) 276 + return true; 277 + if (folio_test_dirty(folio)) 278 + return false; 279 + truncate_inode_folio(folio->mapping, folio); 280 + return true; 231 281 } 232 282 233 283 /* ··· 285 235 */ 286 236 int generic_error_remove_page(struct address_space *mapping, struct page *page) 287 237 { 238 + VM_BUG_ON_PAGE(PageTail(page), page); 239 + 288 240 if (!mapping) 289 241 return -EINVAL; 290 242 /* ··· 295 243 */ 296 244 if (!S_ISREG(mapping->host->i_mode)) 297 245 return -EIO; 298 - return truncate_inode_page(mapping, page); 246 + return truncate_inode_folio(mapping, page_folio(page)); 299 247 } 300 248 EXPORT_SYMBOL(generic_error_remove_page); 301 249 ··· 346 294 { 347 295 pgoff_t start; /* inclusive */ 348 296 pgoff_t end; /* exclusive */ 349 - unsigned int partial_start; /* inclusive */ 350 - unsigned int partial_end; /* exclusive */ 351 - struct pagevec pvec; 297 + struct folio_batch fbatch; 352 298 pgoff_t indices[PAGEVEC_SIZE]; 353 299 pgoff_t index; 354 300 int i; 301 + struct folio *folio; 302 + bool same_folio; 355 303 356 304 if (mapping_empty(mapping)) 357 305 goto out; 358 - 359 - /* Offsets within partial pages */ 360 - partial_start = lstart & (PAGE_SIZE - 1); 361 - partial_end = (lend + 1) & (PAGE_SIZE - 1); 362 306 363 307 /* 364 308 * 'start' and 'end' always covers the range of pages to be fully ··· 373 325 else 374 326 end = (lend + 1) >> PAGE_SHIFT; 375 327 376 - pagevec_init(&pvec); 328 + folio_batch_init(&fbatch); 377 329 index = start; 378 330 while (index < end && find_lock_entries(mapping, index, end - 1, 379 - &pvec, indices)) { 380 - index = indices[pagevec_count(&pvec) - 1] + 1; 381 - truncate_exceptional_pvec_entries(mapping, &pvec, indices); 382 - for (i = 0; i < pagevec_count(&pvec); i++) 383 - truncate_cleanup_page(pvec.pages[i]); 384 - delete_from_page_cache_batch(mapping, &pvec); 385 - for (i = 0; i < pagevec_count(&pvec); i++) 386 - unlock_page(pvec.pages[i]); 387 - pagevec_release(&pvec); 331 + &fbatch, indices)) { 332 + index = indices[folio_batch_count(&fbatch) - 1] + 1; 333 + truncate_folio_batch_exceptionals(mapping, &fbatch, indices); 334 + for (i = 0; i < folio_batch_count(&fbatch); i++) 335 + truncate_cleanup_folio(fbatch.folios[i]); 336 + delete_from_page_cache_batch(mapping, &fbatch); 337 + for (i = 0; i < folio_batch_count(&fbatch); i++) 338 + folio_unlock(fbatch.folios[i]); 339 + folio_batch_release(&fbatch); 388 340 cond_resched(); 389 341 } 390 342 391 - if (partial_start) { 392 - struct page *page = find_lock_page(mapping, start - 1); 393 - if (page) { 394 - unsigned int top = PAGE_SIZE; 395 - if (start > end) { 396 - /* Truncation within a single page */ 397 - top = partial_end; 398 - partial_end = 0; 399 - } 400 - wait_on_page_writeback(page); 401 - zero_user_segment(page, partial_start, top); 402 - cleancache_invalidate_page(mapping, page); 403 - if (page_has_private(page)) 404 - do_invalidatepage(page, partial_start, 405 - top - partial_start); 406 - unlock_page(page); 407 - put_page(page); 343 + same_folio = (lstart >> PAGE_SHIFT) == (lend >> PAGE_SHIFT); 344 + folio = __filemap_get_folio(mapping, lstart >> PAGE_SHIFT, FGP_LOCK, 0); 345 + if (folio) { 346 + same_folio = lend < folio_pos(folio) + folio_size(folio); 347 + if (!truncate_inode_partial_folio(folio, lstart, lend)) { 348 + start = folio->index + folio_nr_pages(folio); 349 + if (same_folio) 350 + end = folio->index; 408 351 } 352 + folio_unlock(folio); 353 + folio_put(folio); 354 + folio = NULL; 409 355 } 410 - if (partial_end) { 411 - struct page *page = find_lock_page(mapping, end); 412 - if (page) { 413 - wait_on_page_writeback(page); 414 - zero_user_segment(page, 0, partial_end); 415 - cleancache_invalidate_page(mapping, page); 416 - if (page_has_private(page)) 417 - do_invalidatepage(page, 0, 418 - partial_end); 419 - unlock_page(page); 420 - put_page(page); 421 - } 356 + 357 + if (!same_folio) 358 + folio = __filemap_get_folio(mapping, lend >> PAGE_SHIFT, 359 + FGP_LOCK, 0); 360 + if (folio) { 361 + if (!truncate_inode_partial_folio(folio, lstart, lend)) 362 + end = folio->index; 363 + folio_unlock(folio); 364 + folio_put(folio); 422 365 } 423 - /* 424 - * If the truncation happened within a single page no pages 425 - * will be released, just zeroed, so we can bail out now. 426 - */ 427 - if (start >= end) 428 - goto out; 429 366 430 367 index = start; 431 - for ( ; ; ) { 368 + while (index < end) { 432 369 cond_resched(); 433 - if (!find_get_entries(mapping, index, end - 1, &pvec, 370 + if (!find_get_entries(mapping, index, end - 1, &fbatch, 434 371 indices)) { 435 372 /* If all gone from start onwards, we're done */ 436 373 if (index == start) ··· 425 392 continue; 426 393 } 427 394 428 - for (i = 0; i < pagevec_count(&pvec); i++) { 429 - struct page *page = pvec.pages[i]; 395 + for (i = 0; i < folio_batch_count(&fbatch); i++) { 396 + struct folio *folio = fbatch.folios[i]; 430 397 431 398 /* We rely upon deletion not changing page->index */ 432 399 index = indices[i]; 433 400 434 - if (xa_is_value(page)) 401 + if (xa_is_value(folio)) 435 402 continue; 436 403 437 - lock_page(page); 438 - WARN_ON(page_to_index(page) != index); 439 - wait_on_page_writeback(page); 440 - truncate_inode_page(mapping, page); 441 - unlock_page(page); 404 + folio_lock(folio); 405 + VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio); 406 + folio_wait_writeback(folio); 407 + truncate_inode_folio(mapping, folio); 408 + folio_unlock(folio); 409 + index = folio_index(folio) + folio_nr_pages(folio) - 1; 442 410 } 443 - truncate_exceptional_pvec_entries(mapping, &pvec, indices); 444 - pagevec_release(&pvec); 411 + truncate_folio_batch_exceptionals(mapping, &fbatch, indices); 412 + folio_batch_release(&fbatch); 445 413 index++; 446 414 } 447 415 ··· 513 479 pgoff_t start, pgoff_t end, unsigned long *nr_pagevec) 514 480 { 515 481 pgoff_t indices[PAGEVEC_SIZE]; 516 - struct pagevec pvec; 482 + struct folio_batch fbatch; 517 483 pgoff_t index = start; 518 484 unsigned long ret; 519 485 unsigned long count = 0; 520 486 int i; 521 487 522 - pagevec_init(&pvec); 523 - while (find_lock_entries(mapping, index, end, &pvec, indices)) { 524 - for (i = 0; i < pagevec_count(&pvec); i++) { 525 - struct page *page = pvec.pages[i]; 488 + folio_batch_init(&fbatch); 489 + while (find_lock_entries(mapping, index, end, &fbatch, indices)) { 490 + for (i = 0; i < folio_batch_count(&fbatch); i++) { 491 + struct page *page = &fbatch.folios[i]->page; 526 492 527 493 /* We rely upon deletion not changing page->index */ 528 494 index = indices[i]; ··· 549 515 } 550 516 count += ret; 551 517 } 552 - pagevec_remove_exceptionals(&pvec); 553 - pagevec_release(&pvec); 518 + folio_batch_remove_exceptionals(&fbatch); 519 + folio_batch_release(&fbatch); 554 520 cond_resched(); 555 521 index++; 556 522 } ··· 602 568 * shrink_page_list() has a temp ref on them, or because they're transiently 603 569 * sitting in the lru_cache_add() pagevecs. 604 570 */ 605 - static int 606 - invalidate_complete_page2(struct address_space *mapping, struct page *page) 571 + static int invalidate_complete_folio2(struct address_space *mapping, 572 + struct folio *folio) 607 573 { 608 - if (page->mapping != mapping) 574 + if (folio->mapping != mapping) 609 575 return 0; 610 576 611 - if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL)) 577 + if (folio_has_private(folio) && 578 + !filemap_release_folio(folio, GFP_KERNEL)) 612 579 return 0; 613 580 614 581 spin_lock(&mapping->host->i_lock); 615 582 xa_lock_irq(&mapping->i_pages); 616 - if (PageDirty(page)) 583 + if (folio_test_dirty(folio)) 617 584 goto failed; 618 585 619 - BUG_ON(page_has_private(page)); 620 - __delete_from_page_cache(page, NULL); 586 + BUG_ON(folio_has_private(folio)); 587 + __filemap_remove_folio(folio, NULL); 621 588 xa_unlock_irq(&mapping->i_pages); 622 589 if (mapping_shrinkable(mapping)) 623 590 inode_add_lru(mapping->host); 624 591 spin_unlock(&mapping->host->i_lock); 625 592 626 - if (mapping->a_ops->freepage) 627 - mapping->a_ops->freepage(page); 628 - 629 - put_page(page); /* pagecache ref */ 593 + filemap_free_folio(mapping, folio); 630 594 return 1; 631 595 failed: 632 596 xa_unlock_irq(&mapping->i_pages); ··· 632 600 return 0; 633 601 } 634 602 635 - static int do_launder_page(struct address_space *mapping, struct page *page) 603 + static int do_launder_folio(struct address_space *mapping, struct folio *folio) 636 604 { 637 - if (!PageDirty(page)) 605 + if (!folio_test_dirty(folio)) 638 606 return 0; 639 - if (page->mapping != mapping || mapping->a_ops->launder_page == NULL) 607 + if (folio->mapping != mapping || mapping->a_ops->launder_page == NULL) 640 608 return 0; 641 - return mapping->a_ops->launder_page(page); 609 + return mapping->a_ops->launder_page(&folio->page); 642 610 } 643 611 644 612 /** ··· 656 624 pgoff_t start, pgoff_t end) 657 625 { 658 626 pgoff_t indices[PAGEVEC_SIZE]; 659 - struct pagevec pvec; 627 + struct folio_batch fbatch; 660 628 pgoff_t index; 661 629 int i; 662 630 int ret = 0; ··· 666 634 if (mapping_empty(mapping)) 667 635 goto out; 668 636 669 - pagevec_init(&pvec); 637 + folio_batch_init(&fbatch); 670 638 index = start; 671 - while (find_get_entries(mapping, index, end, &pvec, indices)) { 672 - for (i = 0; i < pagevec_count(&pvec); i++) { 673 - struct page *page = pvec.pages[i]; 639 + while (find_get_entries(mapping, index, end, &fbatch, indices)) { 640 + for (i = 0; i < folio_batch_count(&fbatch); i++) { 641 + struct folio *folio = fbatch.folios[i]; 674 642 675 - /* We rely upon deletion not changing page->index */ 643 + /* We rely upon deletion not changing folio->index */ 676 644 index = indices[i]; 677 645 678 - if (xa_is_value(page)) { 646 + if (xa_is_value(folio)) { 679 647 if (!invalidate_exceptional_entry2(mapping, 680 - index, page)) 648 + index, folio)) 681 649 ret = -EBUSY; 682 650 continue; 683 651 } 684 652 685 - if (!did_range_unmap && page_mapped(page)) { 653 + if (!did_range_unmap && folio_mapped(folio)) { 686 654 /* 687 - * If page is mapped, before taking its lock, 655 + * If folio is mapped, before taking its lock, 688 656 * zap the rest of the file in one hit. 689 657 */ 690 658 unmap_mapping_pages(mapping, index, ··· 692 660 did_range_unmap = 1; 693 661 } 694 662 695 - lock_page(page); 696 - WARN_ON(page_to_index(page) != index); 697 - if (page->mapping != mapping) { 698 - unlock_page(page); 663 + folio_lock(folio); 664 + VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio); 665 + if (folio->mapping != mapping) { 666 + folio_unlock(folio); 699 667 continue; 700 668 } 701 - wait_on_page_writeback(page); 669 + folio_wait_writeback(folio); 702 670 703 - if (page_mapped(page)) 704 - unmap_mapping_page(page); 705 - BUG_ON(page_mapped(page)); 671 + if (folio_mapped(folio)) 672 + unmap_mapping_folio(folio); 673 + BUG_ON(folio_mapped(folio)); 706 674 707 - ret2 = do_launder_page(mapping, page); 675 + ret2 = do_launder_folio(mapping, folio); 708 676 if (ret2 == 0) { 709 - if (!invalidate_complete_page2(mapping, page)) 677 + if (!invalidate_complete_folio2(mapping, folio)) 710 678 ret2 = -EBUSY; 711 679 } 712 680 if (ret2 < 0) 713 681 ret = ret2; 714 - unlock_page(page); 682 + folio_unlock(folio); 715 683 } 716 - pagevec_remove_exceptionals(&pvec); 717 - pagevec_release(&pvec); 684 + folio_batch_remove_exceptionals(&fbatch); 685 + folio_batch_release(&fbatch); 718 686 cond_resched(); 719 687 index++; 720 688 }

Configure Feed

Configure Feed