Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

kho: add support for preserving vmalloc allocations

A vmalloc allocation is preserved using binary structure similar to global
KHO memory tracker. It's a linked list of pages where each page is an
array of physical address of pages in vmalloc area.

kho_preserve_vmalloc() hands out the physical address of the head page to
the caller. This address is used as the argument to kho_vmalloc_restore()
to restore the mapping in the vmalloc address space and populate it with
the preserved pages.

[pasha.tatashin@soleen.com: free chunks using free_page() not kfree()]
Link: https://lkml.kernel.org/r/mafs0a52idbeg.fsf@kernel.org
[akpm@linux-foundation.org: coding-style cleanups]
Link: https://lkml.kernel.org/r/20250921054458.4043761-4-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
Cc: Alexander Graf <graf@amazon.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Changyuan Lyu <changyuanl@google.com>
Cc: Chris Li <chrisl@kernel.org>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Mike Rapoport (Microsoft) and committed by
Andrew Morton
a667300b 8375b765

+309
+28
include/linux/kexec_handover.h
··· 39 39 40 40 struct kho_serialization; 41 41 42 + struct kho_vmalloc_chunk; 43 + struct kho_vmalloc { 44 + DECLARE_KHOSER_PTR(first, struct kho_vmalloc_chunk *); 45 + unsigned int total_pages; 46 + unsigned short flags; 47 + unsigned short order; 48 + }; 49 + 42 50 #ifdef CONFIG_KEXEC_HANDOVER 43 51 bool kho_is_enabled(void); 44 52 bool is_kho_boot(void); 45 53 46 54 int kho_preserve_folio(struct folio *folio); 47 55 int kho_preserve_pages(struct page *page, unsigned int nr_pages); 56 + int kho_preserve_vmalloc(void *ptr, struct kho_vmalloc *preservation); 48 57 struct folio *kho_restore_folio(phys_addr_t phys); 58 + struct page *kho_restore_pages(phys_addr_t phys, unsigned int nr_pages); 59 + void *kho_restore_vmalloc(const struct kho_vmalloc *preservation); 49 60 int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt); 50 61 int kho_retrieve_subtree(const char *name, phys_addr_t *phys); 51 62 ··· 88 77 return -EOPNOTSUPP; 89 78 } 90 79 80 + static inline int kho_preserve_vmalloc(void *ptr, 81 + struct kho_vmalloc *preservation) 82 + { 83 + return -EOPNOTSUPP; 84 + } 85 + 91 86 static inline struct folio *kho_restore_folio(phys_addr_t phys) 87 + { 88 + return NULL; 89 + } 90 + 91 + static inline struct page *kho_restore_pages(phys_addr_t phys, 92 + unsigned int nr_pages) 93 + { 94 + return NULL; 95 + } 96 + 97 + static inline void *kho_restore_vmalloc(const struct kho_vmalloc *preservation) 92 98 { 93 99 return NULL; 94 100 }
+281
kernel/kexec_handover.c
··· 18 18 #include <linux/memblock.h> 19 19 #include <linux/notifier.h> 20 20 #include <linux/page-isolation.h> 21 + #include <linux/vmalloc.h> 21 22 22 23 #include <asm/early_ioremap.h> 23 24 ··· 274 273 return page ? page_folio(page) : NULL; 275 274 } 276 275 EXPORT_SYMBOL_GPL(kho_restore_folio); 276 + 277 + /** 278 + * kho_restore_pages - restore list of contiguous order 0 pages. 279 + * @phys: physical address of the first page. 280 + * @nr_pages: number of pages. 281 + * 282 + * Restore a contiguous list of order 0 pages that was preserved with 283 + * kho_preserve_pages(). 284 + * 285 + * Return: 0 on success, error code on failure 286 + */ 287 + struct page *kho_restore_pages(phys_addr_t phys, unsigned int nr_pages) 288 + { 289 + const unsigned long start_pfn = PHYS_PFN(phys); 290 + const unsigned long end_pfn = start_pfn + nr_pages; 291 + unsigned long pfn = start_pfn; 292 + 293 + while (pfn < end_pfn) { 294 + const unsigned int order = 295 + min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn)); 296 + struct page *page = kho_restore_page(PFN_PHYS(pfn)); 297 + 298 + if (!page) 299 + return NULL; 300 + split_page(page, order); 301 + pfn += 1 << order; 302 + } 303 + 304 + return pfn_to_page(start_pfn); 305 + } 306 + EXPORT_SYMBOL_GPL(kho_restore_pages); 277 307 278 308 /* Serialize and deserialize struct kho_mem_phys across kexec 279 309 * ··· 794 762 return err; 795 763 } 796 764 EXPORT_SYMBOL_GPL(kho_preserve_pages); 765 + 766 + struct kho_vmalloc_hdr { 767 + DECLARE_KHOSER_PTR(next, struct kho_vmalloc_chunk *); 768 + }; 769 + 770 + #define KHO_VMALLOC_SIZE \ 771 + ((PAGE_SIZE - sizeof(struct kho_vmalloc_hdr)) / \ 772 + sizeof(phys_addr_t)) 773 + 774 + struct kho_vmalloc_chunk { 775 + struct kho_vmalloc_hdr hdr; 776 + phys_addr_t phys[KHO_VMALLOC_SIZE]; 777 + }; 778 + 779 + static_assert(sizeof(struct kho_vmalloc_chunk) == PAGE_SIZE); 780 + 781 + /* vmalloc flags KHO supports */ 782 + #define KHO_VMALLOC_SUPPORTED_FLAGS (VM_ALLOC | VM_ALLOW_HUGE_VMAP) 783 + 784 + /* KHO internal flags for vmalloc preservations */ 785 + #define KHO_VMALLOC_ALLOC 0x0001 786 + #define KHO_VMALLOC_HUGE_VMAP 0x0002 787 + 788 + static unsigned short vmalloc_flags_to_kho(unsigned int vm_flags) 789 + { 790 + unsigned short kho_flags = 0; 791 + 792 + if (vm_flags & VM_ALLOC) 793 + kho_flags |= KHO_VMALLOC_ALLOC; 794 + if (vm_flags & VM_ALLOW_HUGE_VMAP) 795 + kho_flags |= KHO_VMALLOC_HUGE_VMAP; 796 + 797 + return kho_flags; 798 + } 799 + 800 + static unsigned int kho_flags_to_vmalloc(unsigned short kho_flags) 801 + { 802 + unsigned int vm_flags = 0; 803 + 804 + if (kho_flags & KHO_VMALLOC_ALLOC) 805 + vm_flags |= VM_ALLOC; 806 + if (kho_flags & KHO_VMALLOC_HUGE_VMAP) 807 + vm_flags |= VM_ALLOW_HUGE_VMAP; 808 + 809 + return vm_flags; 810 + } 811 + 812 + static struct kho_vmalloc_chunk *new_vmalloc_chunk(struct kho_vmalloc_chunk *cur) 813 + { 814 + struct kho_vmalloc_chunk *chunk; 815 + int err; 816 + 817 + chunk = (struct kho_vmalloc_chunk *)get_zeroed_page(GFP_KERNEL); 818 + if (!chunk) 819 + return NULL; 820 + 821 + err = kho_preserve_pages(virt_to_page(chunk), 1); 822 + if (err) 823 + goto err_free; 824 + if (cur) 825 + KHOSER_STORE_PTR(cur->hdr.next, chunk); 826 + return chunk; 827 + 828 + err_free: 829 + free_page((unsigned long)chunk); 830 + return NULL; 831 + } 832 + 833 + static void kho_vmalloc_unpreserve_chunk(struct kho_vmalloc_chunk *chunk) 834 + { 835 + struct kho_mem_track *track = &kho_out.ser.track; 836 + unsigned long pfn = PHYS_PFN(virt_to_phys(chunk)); 837 + 838 + __kho_unpreserve(track, pfn, pfn + 1); 839 + 840 + for (int i = 0; chunk->phys[i]; i++) { 841 + pfn = PHYS_PFN(chunk->phys[i]); 842 + __kho_unpreserve(track, pfn, pfn + 1); 843 + } 844 + } 845 + 846 + static void kho_vmalloc_free_chunks(struct kho_vmalloc *kho_vmalloc) 847 + { 848 + struct kho_vmalloc_chunk *chunk = KHOSER_LOAD_PTR(kho_vmalloc->first); 849 + 850 + while (chunk) { 851 + struct kho_vmalloc_chunk *tmp = chunk; 852 + 853 + kho_vmalloc_unpreserve_chunk(chunk); 854 + 855 + chunk = KHOSER_LOAD_PTR(chunk->hdr.next); 856 + free_page((unsigned long)tmp); 857 + } 858 + } 859 + 860 + /** 861 + * kho_preserve_vmalloc - preserve memory allocated with vmalloc() across kexec 862 + * @ptr: pointer to the area in vmalloc address space 863 + * @preservation: placeholder for preservation metadata 864 + * 865 + * Instructs KHO to preserve the area in vmalloc address space at @ptr. The 866 + * physical pages mapped at @ptr will be preserved and on successful return 867 + * @preservation will hold the physical address of a structure that describes 868 + * the preservation. 869 + * 870 + * NOTE: The memory allocated with vmalloc_node() variants cannot be reliably 871 + * restored on the same node 872 + * 873 + * Return: 0 on success, error code on failure 874 + */ 875 + int kho_preserve_vmalloc(void *ptr, struct kho_vmalloc *preservation) 876 + { 877 + struct kho_vmalloc_chunk *chunk; 878 + struct vm_struct *vm = find_vm_area(ptr); 879 + unsigned int order, flags, nr_contig_pages; 880 + unsigned int idx = 0; 881 + int err; 882 + 883 + if (!vm) 884 + return -EINVAL; 885 + 886 + if (vm->flags & ~KHO_VMALLOC_SUPPORTED_FLAGS) 887 + return -EOPNOTSUPP; 888 + 889 + flags = vmalloc_flags_to_kho(vm->flags); 890 + order = get_vm_area_page_order(vm); 891 + 892 + chunk = new_vmalloc_chunk(NULL); 893 + if (!chunk) 894 + return -ENOMEM; 895 + KHOSER_STORE_PTR(preservation->first, chunk); 896 + 897 + nr_contig_pages = (1 << order); 898 + for (int i = 0; i < vm->nr_pages; i += nr_contig_pages) { 899 + phys_addr_t phys = page_to_phys(vm->pages[i]); 900 + 901 + err = kho_preserve_pages(vm->pages[i], nr_contig_pages); 902 + if (err) 903 + goto err_free; 904 + 905 + chunk->phys[idx++] = phys; 906 + if (idx == ARRAY_SIZE(chunk->phys)) { 907 + chunk = new_vmalloc_chunk(chunk); 908 + if (!chunk) 909 + goto err_free; 910 + idx = 0; 911 + } 912 + } 913 + 914 + preservation->total_pages = vm->nr_pages; 915 + preservation->flags = flags; 916 + preservation->order = order; 917 + 918 + return 0; 919 + 920 + err_free: 921 + kho_vmalloc_free_chunks(preservation); 922 + return err; 923 + } 924 + EXPORT_SYMBOL_GPL(kho_preserve_vmalloc); 925 + 926 + /** 927 + * kho_restore_vmalloc - recreates and populates an area in vmalloc address 928 + * space from the preserved memory. 929 + * @preservation: preservation metadata. 930 + * 931 + * Recreates an area in vmalloc address space and populates it with memory that 932 + * was preserved using kho_preserve_vmalloc(). 933 + * 934 + * Return: pointer to the area in the vmalloc address space, NULL on failure. 935 + */ 936 + void *kho_restore_vmalloc(const struct kho_vmalloc *preservation) 937 + { 938 + struct kho_vmalloc_chunk *chunk = KHOSER_LOAD_PTR(preservation->first); 939 + unsigned int align, order, shift, vm_flags; 940 + unsigned long total_pages, contig_pages; 941 + unsigned long addr, size; 942 + struct vm_struct *area; 943 + struct page **pages; 944 + unsigned int idx = 0; 945 + int err; 946 + 947 + vm_flags = kho_flags_to_vmalloc(preservation->flags); 948 + if (vm_flags & ~KHO_VMALLOC_SUPPORTED_FLAGS) 949 + return NULL; 950 + 951 + total_pages = preservation->total_pages; 952 + pages = kvmalloc_array(total_pages, sizeof(*pages), GFP_KERNEL); 953 + if (!pages) 954 + return NULL; 955 + order = preservation->order; 956 + contig_pages = (1 << order); 957 + shift = PAGE_SHIFT + order; 958 + align = 1 << shift; 959 + 960 + while (chunk) { 961 + struct page *page; 962 + 963 + for (int i = 0; chunk->phys[i]; i++) { 964 + phys_addr_t phys = chunk->phys[i]; 965 + 966 + if (idx + contig_pages > total_pages) 967 + goto err_free_pages_array; 968 + 969 + page = kho_restore_pages(phys, contig_pages); 970 + if (!page) 971 + goto err_free_pages_array; 972 + 973 + for (int j = 0; j < contig_pages; j++) 974 + pages[idx++] = page; 975 + 976 + phys += contig_pages * PAGE_SIZE; 977 + } 978 + 979 + page = kho_restore_pages(virt_to_phys(chunk), 1); 980 + if (!page) 981 + goto err_free_pages_array; 982 + chunk = KHOSER_LOAD_PTR(chunk->hdr.next); 983 + __free_page(page); 984 + } 985 + 986 + if (idx != total_pages) 987 + goto err_free_pages_array; 988 + 989 + area = __get_vm_area_node(total_pages * PAGE_SIZE, align, shift, 990 + vm_flags, VMALLOC_START, VMALLOC_END, 991 + NUMA_NO_NODE, GFP_KERNEL, 992 + __builtin_return_address(0)); 993 + if (!area) 994 + goto err_free_pages_array; 995 + 996 + addr = (unsigned long)area->addr; 997 + size = get_vm_area_size(area); 998 + err = vmap_pages_range(addr, addr + size, PAGE_KERNEL, pages, shift); 999 + if (err) 1000 + goto err_free_vm_area; 1001 + 1002 + area->nr_pages = total_pages; 1003 + area->pages = pages; 1004 + 1005 + return area->addr; 1006 + 1007 + err_free_vm_area: 1008 + free_vm_area(area); 1009 + err_free_pages_array: 1010 + kvfree(pages); 1011 + return NULL; 1012 + } 1013 + EXPORT_SYMBOL_GPL(kho_restore_vmalloc); 797 1014 798 1015 /* Handling for debug/kho/out */ 799 1016