Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

mm: support only one page_type per page

By using a few values in the top byte, users of page_type can store up to
24 bits of additional data in page_type. It also reduces the code size as
(with replacement of READ_ONCE() with data_race()), the kernel can check
just a single byte. eg:

ffffffff811e3a79: 8b 47 30 mov 0x30(%rdi),%eax
ffffffff811e3a7c: 55 push %rbp
ffffffff811e3a7d: 48 89 e5 mov %rsp,%rbp
ffffffff811e3a80: 25 00 00 00 82 and $0x82000000,%eax
ffffffff811e3a85: 3d 00 00 00 80 cmp $0x80000000,%eax
ffffffff811e3a8a: 74 4d je ffffffff811e3ad9 <folio_mapping+0x69>

becomes:

ffffffff811e3a69: 80 7f 33 f5 cmpb $0xf5,0x33(%rdi)
ffffffff811e3a6d: 55 push %rbp
ffffffff811e3a6e: 48 89 e5 mov %rsp,%rbp
ffffffff811e3a71: 74 4d je ffffffff811e3ac0 <folio_mapping+0x60>

replacing three instructions with one.

[wangkefeng.wang@huawei.com: fix ubsan warnings]
Link: https://lkml.kernel.org/r/2d19c48a-c550-4345-bf36-d05cd303c5de@huawei.com
Link: https://lkml.kernel.org/r/20240821173914.2270383-4-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Matthew Wilcox (Oracle) and committed by
Andrew Morton
4ffca5a9 e880034c

+55 -50
+27 -39
include/linux/page-flags.h
··· 923 923 #endif 924 924 925 925 /* 926 - * For pages that are never mapped to userspace, 927 - * page_type may be used. Because it is initialised to -1, we invert the 928 - * sense of the bit, so __SetPageFoo *clears* the bit used for PageFoo, and 929 - * __ClearPageFoo *sets* the bit used for PageFoo. We reserve a few high and 930 - * low bits so that an underflow or overflow of _mapcount won't be 931 - * mistaken for a page type value. 926 + * For pages that do not use mapcount, page_type may be used. 927 + * The low 24 bits of pagetype may be used for your own purposes, as long 928 + * as you are careful to not affect the top 8 bits. The low bits of 929 + * pagetype will be overwritten when you clear the page_type from the page. 932 930 */ 933 - 934 931 enum pagetype { 935 - PG_buddy = 0x40000000, 936 - PG_offline = 0x20000000, 937 - PG_table = 0x10000000, 938 - PG_guard = 0x08000000, 939 - PG_hugetlb = 0x04000000, 940 - PG_slab = 0x02000000, 941 - PG_zsmalloc = 0x01000000, 942 - PG_unaccepted = 0x00800000, 932 + /* 0x00-0x7f are positive numbers, ie mapcount */ 933 + /* Reserve 0x80-0xef for mapcount overflow. */ 934 + PGTY_buddy = 0xf0, 935 + PGTY_offline = 0xf1, 936 + PGTY_table = 0xf2, 937 + PGTY_guard = 0xf3, 938 + PGTY_hugetlb = 0xf4, 939 + PGTY_slab = 0xf5, 940 + PGTY_zsmalloc = 0xf6, 941 + PGTY_unaccepted = 0xf7, 943 942 944 - PAGE_TYPE_BASE = 0x80000000, 945 - 946 - /* 947 - * Reserve 0xffff0000 - 0xfffffffe to catch _mapcount underflows and 948 - * allow owners that set a type to reuse the lower 16 bit for their own 949 - * purposes. 950 - */ 951 - PAGE_MAPCOUNT_RESERVE = ~0x0000ffff, 943 + PGTY_mapcount_underflow = 0xff 952 944 }; 953 - 954 - #define PageType(page, flag) \ 955 - ((READ_ONCE(page->page_type) & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) 956 - #define folio_test_type(folio, flag) \ 957 - ((READ_ONCE(folio->page.page_type) & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) 958 945 959 946 static inline bool page_type_has_type(int page_type) 960 947 { 961 - return page_type < PAGE_MAPCOUNT_RESERVE; 948 + return page_type < (PGTY_mapcount_underflow << 24); 962 949 } 963 950 964 951 /* This takes a mapcount which is one more than page->_mapcount */ ··· 956 969 957 970 static inline bool page_has_type(const struct page *page) 958 971 { 959 - return page_type_has_type(READ_ONCE(page->page_type)); 972 + return page_mapcount_is_type(data_race(page->page_type)); 960 973 } 961 974 962 975 #define FOLIO_TYPE_OPS(lname, fname) \ 963 - static __always_inline bool folio_test_##fname(const struct folio *folio)\ 976 + static __always_inline bool folio_test_##fname(const struct folio *folio) \ 964 977 { \ 965 - return folio_test_type(folio, PG_##lname); \ 978 + return data_race(folio->page.page_type >> 24) == PGTY_##lname; \ 966 979 } \ 967 980 static __always_inline void __folio_set_##fname(struct folio *folio) \ 968 981 { \ 969 - VM_BUG_ON_FOLIO(!folio_test_type(folio, 0), folio); \ 970 - folio->page.page_type &= ~PG_##lname; \ 982 + VM_BUG_ON_FOLIO(data_race(folio->page.page_type) != UINT_MAX, \ 983 + folio); \ 984 + folio->page.page_type = (unsigned int)PGTY_##lname << 24; \ 971 985 } \ 972 986 static __always_inline void __folio_clear_##fname(struct folio *folio) \ 973 987 { \ 974 988 VM_BUG_ON_FOLIO(!folio_test_##fname(folio), folio); \ 975 - folio->page.page_type |= PG_##lname; \ 989 + folio->page.page_type = UINT_MAX; \ 976 990 } 977 991 978 992 #define PAGE_TYPE_OPS(uname, lname, fname) \ 979 993 FOLIO_TYPE_OPS(lname, fname) \ 980 994 static __always_inline int Page##uname(const struct page *page) \ 981 995 { \ 982 - return PageType(page, PG_##lname); \ 996 + return data_race(page->page_type >> 24) == PGTY_##lname; \ 983 997 } \ 984 998 static __always_inline void __SetPage##uname(struct page *page) \ 985 999 { \ 986 - VM_BUG_ON_PAGE(!PageType(page, 0), page); \ 987 - page->page_type &= ~PG_##lname; \ 1000 + VM_BUG_ON_PAGE(data_race(page->page_type) != UINT_MAX, page); \ 1001 + page->page_type = (unsigned int)PGTY_##lname << 24; \ 988 1002 } \ 989 1003 static __always_inline void __ClearPage##uname(struct page *page) \ 990 1004 { \ 991 1005 VM_BUG_ON_PAGE(!Page##uname(page), page); \ 992 - page->page_type |= PG_##lname; \ 1006 + page->page_type = UINT_MAX; \ 993 1007 } 994 1008 995 1009 /*
+4 -4
kernel/vmcore_info.c
··· 198 198 VMCOREINFO_NUMBER(PG_private); 199 199 VMCOREINFO_NUMBER(PG_swapcache); 200 200 VMCOREINFO_NUMBER(PG_swapbacked); 201 - #define PAGE_SLAB_MAPCOUNT_VALUE (~PG_slab) 201 + #define PAGE_SLAB_MAPCOUNT_VALUE (PGTY_slab << 24) 202 202 VMCOREINFO_NUMBER(PAGE_SLAB_MAPCOUNT_VALUE); 203 203 #ifdef CONFIG_MEMORY_FAILURE 204 204 VMCOREINFO_NUMBER(PG_hwpoison); 205 205 #endif 206 206 VMCOREINFO_NUMBER(PG_head_mask); 207 - #define PAGE_BUDDY_MAPCOUNT_VALUE (~PG_buddy) 207 + #define PAGE_BUDDY_MAPCOUNT_VALUE (PGTY_buddy << 24) 208 208 VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); 209 - #define PAGE_HUGETLB_MAPCOUNT_VALUE (~PG_hugetlb) 209 + #define PAGE_HUGETLB_MAPCOUNT_VALUE (PGTY_hugetlb << 24) 210 210 VMCOREINFO_NUMBER(PAGE_HUGETLB_MAPCOUNT_VALUE); 211 - #define PAGE_OFFLINE_MAPCOUNT_VALUE (~PG_offline) 211 + #define PAGE_OFFLINE_MAPCOUNT_VALUE (PGTY_offline << 24) 212 212 VMCOREINFO_NUMBER(PAGE_OFFLINE_MAPCOUNT_VALUE); 213 213 214 214 #ifdef CONFIG_KALLSYMS
+24 -7
mm/debug.c
··· 36 36 {0, NULL} 37 37 }; 38 38 39 - const struct trace_print_flags pagetype_names[] = { 40 - __def_pagetype_names, 41 - {0, NULL} 42 - }; 43 - 44 39 const struct trace_print_flags gfpflag_names[] = { 45 40 __def_gfpflag_names, 46 41 {0, NULL} ··· 46 51 {0, NULL} 47 52 }; 48 53 54 + #define DEF_PAGETYPE_NAME(_name) [PGTY_##_name - 0xf0] = __stringify(_name) 55 + 56 + static const char *page_type_names[] = { 57 + DEF_PAGETYPE_NAME(slab), 58 + DEF_PAGETYPE_NAME(hugetlb), 59 + DEF_PAGETYPE_NAME(offline), 60 + DEF_PAGETYPE_NAME(guard), 61 + DEF_PAGETYPE_NAME(table), 62 + DEF_PAGETYPE_NAME(buddy), 63 + DEF_PAGETYPE_NAME(unaccepted), 64 + }; 65 + 66 + static const char *page_type_name(unsigned int page_type) 67 + { 68 + unsigned i = (page_type >> 24) - 0xf0; 69 + 70 + if (i >= ARRAY_SIZE(page_type_names)) 71 + return "unknown"; 72 + return page_type_names[i]; 73 + } 74 + 49 75 static void __dump_folio(struct folio *folio, struct page *page, 50 76 unsigned long pfn, unsigned long idx) 51 77 { ··· 74 58 int mapcount = atomic_read(&page->_mapcount); 75 59 char *type = ""; 76 60 77 - mapcount = page_type_has_type(mapcount) ? 0 : mapcount + 1; 61 + mapcount = page_mapcount_is_type(mapcount) ? 0 : mapcount + 1; 78 62 pr_warn("page: refcount:%d mapcount:%d mapping:%p index:%#lx pfn:%#lx\n", 79 63 folio_ref_count(folio), mapcount, mapping, 80 64 folio->index + idx, pfn); ··· 108 92 pr_warn("%sflags: %pGp%s\n", type, &folio->flags, 109 93 is_migrate_cma_folio(folio, pfn) ? " CMA" : ""); 110 94 if (page_has_type(&folio->page)) 111 - pr_warn("page_type: %x\n", folio->page.page_type); 95 + pr_warn("page_type: %x(%s)\n", folio->page.page_type >> 24, 96 + page_type_name(folio->page.page_type)); 112 97 113 98 print_hex_dump(KERN_WARNING, "raw: ", DUMP_PREFIX_NONE, 32, 114 99 sizeof(unsigned long), page,