Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0-only
2#include <linux/alloc_tag.h>
3#include <linux/execmem.h>
4#include <linux/fs.h>
5#include <linux/gfp.h>
6#include <linux/kallsyms.h>
7#include <linux/module.h>
8#include <linux/page_ext.h>
9#include <linux/pgalloc_tag.h>
10#include <linux/proc_fs.h>
11#include <linux/rcupdate.h>
12#include <linux/seq_buf.h>
13#include <linux/seq_file.h>
14#include <linux/string_choices.h>
15#include <linux/vmalloc.h>
16#include <linux/kmemleak.h>
17
18#define ALLOCINFO_FILE_NAME "allocinfo"
19#define MODULE_ALLOC_TAG_VMAP_SIZE (100000UL * sizeof(struct alloc_tag))
20#define SECTION_START(NAME) (CODETAG_SECTION_START_PREFIX NAME)
21#define SECTION_STOP(NAME) (CODETAG_SECTION_STOP_PREFIX NAME)
22
23#ifdef CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT
24static bool mem_profiling_support = true;
25#else
26static bool mem_profiling_support;
27#endif
28
29static struct codetag_type *alloc_tag_cttype;
30
31#ifdef CONFIG_ARCH_MODULE_NEEDS_WEAK_PER_CPU
32DEFINE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag);
33EXPORT_SYMBOL(_shared_alloc_tag);
34#endif
35
36DEFINE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT,
37 mem_alloc_profiling_key);
38EXPORT_SYMBOL(mem_alloc_profiling_key);
39
40DEFINE_STATIC_KEY_FALSE(mem_profiling_compressed);
41
42struct alloc_tag_kernel_section kernel_tags = { NULL, 0 };
43unsigned long alloc_tag_ref_mask;
44int alloc_tag_ref_offs;
45
46struct allocinfo_private {
47 struct codetag_iterator iter;
48 bool print_header;
49};
50
51static void *allocinfo_start(struct seq_file *m, loff_t *pos)
52{
53 struct allocinfo_private *priv;
54 loff_t node = *pos;
55
56 priv = (struct allocinfo_private *)m->private;
57 codetag_lock_module_list(alloc_tag_cttype, true);
58 if (node == 0) {
59 priv->print_header = true;
60 priv->iter = codetag_get_ct_iter(alloc_tag_cttype);
61 codetag_next_ct(&priv->iter);
62 }
63 return priv->iter.ct ? priv : NULL;
64}
65
66static void *allocinfo_next(struct seq_file *m, void *arg, loff_t *pos)
67{
68 struct allocinfo_private *priv = (struct allocinfo_private *)arg;
69 struct codetag *ct = codetag_next_ct(&priv->iter);
70
71 (*pos)++;
72 if (!ct)
73 return NULL;
74
75 return priv;
76}
77
78static void allocinfo_stop(struct seq_file *m, void *arg)
79{
80 codetag_lock_module_list(alloc_tag_cttype, false);
81}
82
83static void print_allocinfo_header(struct seq_buf *buf)
84{
85 /* Output format version, so we can change it. */
86 seq_buf_printf(buf, "allocinfo - version: 2.0\n");
87 seq_buf_printf(buf, "# <size> <calls> <tag info>\n");
88}
89
90static void alloc_tag_to_text(struct seq_buf *out, struct codetag *ct)
91{
92 struct alloc_tag *tag = ct_to_alloc_tag(ct);
93 struct alloc_tag_counters counter = alloc_tag_read(tag);
94 s64 bytes = counter.bytes;
95
96 seq_buf_printf(out, "%12lli %8llu ", bytes, counter.calls);
97 codetag_to_text(out, ct);
98 if (unlikely(alloc_tag_is_inaccurate(tag)))
99 seq_buf_printf(out, " accurate:no");
100 seq_buf_putc(out, ' ');
101 seq_buf_putc(out, '\n');
102}
103
104static int allocinfo_show(struct seq_file *m, void *arg)
105{
106 struct allocinfo_private *priv = (struct allocinfo_private *)arg;
107 char *bufp;
108 size_t n = seq_get_buf(m, &bufp);
109 struct seq_buf buf;
110
111 seq_buf_init(&buf, bufp, n);
112 if (priv->print_header) {
113 print_allocinfo_header(&buf);
114 priv->print_header = false;
115 }
116 alloc_tag_to_text(&buf, priv->iter.ct);
117 seq_commit(m, seq_buf_used(&buf));
118 return 0;
119}
120
121static const struct seq_operations allocinfo_seq_op = {
122 .start = allocinfo_start,
123 .next = allocinfo_next,
124 .stop = allocinfo_stop,
125 .show = allocinfo_show,
126};
127
128size_t alloc_tag_top_users(struct codetag_bytes *tags, size_t count, bool can_sleep)
129{
130 struct codetag_iterator iter;
131 struct codetag *ct;
132 struct codetag_bytes n;
133 unsigned int i, nr = 0;
134
135 if (IS_ERR_OR_NULL(alloc_tag_cttype))
136 return 0;
137
138 if (can_sleep)
139 codetag_lock_module_list(alloc_tag_cttype, true);
140 else if (!codetag_trylock_module_list(alloc_tag_cttype))
141 return 0;
142
143 iter = codetag_get_ct_iter(alloc_tag_cttype);
144 while ((ct = codetag_next_ct(&iter))) {
145 struct alloc_tag_counters counter = alloc_tag_read(ct_to_alloc_tag(ct));
146
147 n.ct = ct;
148 n.bytes = counter.bytes;
149
150 for (i = 0; i < nr; i++)
151 if (n.bytes > tags[i].bytes)
152 break;
153
154 if (i < count) {
155 nr -= nr == count;
156 memmove(&tags[i + 1],
157 &tags[i],
158 sizeof(tags[0]) * (nr - i));
159 nr++;
160 tags[i] = n;
161 }
162 }
163
164 codetag_lock_module_list(alloc_tag_cttype, false);
165
166 return nr;
167}
168
169void pgalloc_tag_split(struct folio *folio, int old_order, int new_order)
170{
171 int i;
172 struct alloc_tag *tag;
173 unsigned int nr_pages = 1 << new_order;
174
175 if (!mem_alloc_profiling_enabled())
176 return;
177
178 tag = __pgalloc_tag_get(&folio->page);
179 if (!tag)
180 return;
181
182 for (i = nr_pages; i < (1 << old_order); i += nr_pages) {
183 union pgtag_ref_handle handle;
184 union codetag_ref ref;
185
186 if (get_page_tag_ref(folio_page(folio, i), &ref, &handle)) {
187 /* Set new reference to point to the original tag */
188 alloc_tag_ref_set(&ref, tag);
189 update_page_tag_ref(handle, &ref);
190 put_page_tag_ref(handle);
191 }
192 }
193}
194
195void pgalloc_tag_swap(struct folio *new, struct folio *old)
196{
197 union pgtag_ref_handle handle_old, handle_new;
198 union codetag_ref ref_old, ref_new;
199 struct alloc_tag *tag_old, *tag_new;
200
201 if (!mem_alloc_profiling_enabled())
202 return;
203
204 tag_old = __pgalloc_tag_get(&old->page);
205 if (!tag_old)
206 return;
207 tag_new = __pgalloc_tag_get(&new->page);
208 if (!tag_new)
209 return;
210
211 if (!get_page_tag_ref(&old->page, &ref_old, &handle_old))
212 return;
213 if (!get_page_tag_ref(&new->page, &ref_new, &handle_new)) {
214 put_page_tag_ref(handle_old);
215 return;
216 }
217
218 /*
219 * Clear tag references to avoid debug warning when using
220 * __alloc_tag_ref_set() with non-empty reference.
221 */
222 set_codetag_empty(&ref_old);
223 set_codetag_empty(&ref_new);
224
225 /* swap tags */
226 __alloc_tag_ref_set(&ref_old, tag_new);
227 update_page_tag_ref(handle_old, &ref_old);
228 __alloc_tag_ref_set(&ref_new, tag_old);
229 update_page_tag_ref(handle_new, &ref_new);
230
231 put_page_tag_ref(handle_old);
232 put_page_tag_ref(handle_new);
233}
234
235static void shutdown_mem_profiling(bool remove_file)
236{
237 if (mem_alloc_profiling_enabled())
238 static_branch_disable(&mem_alloc_profiling_key);
239
240 if (!mem_profiling_support)
241 return;
242
243 if (remove_file)
244 remove_proc_entry(ALLOCINFO_FILE_NAME, NULL);
245 mem_profiling_support = false;
246}
247
248void __init alloc_tag_sec_init(void)
249{
250 struct alloc_tag *last_codetag;
251
252 if (!mem_profiling_support)
253 return;
254
255 if (!static_key_enabled(&mem_profiling_compressed))
256 return;
257
258 kernel_tags.first_tag = (struct alloc_tag *)kallsyms_lookup_name(
259 SECTION_START(ALLOC_TAG_SECTION_NAME));
260 last_codetag = (struct alloc_tag *)kallsyms_lookup_name(
261 SECTION_STOP(ALLOC_TAG_SECTION_NAME));
262 kernel_tags.count = last_codetag - kernel_tags.first_tag;
263
264 /* Check if kernel tags fit into page flags */
265 if (kernel_tags.count > (1UL << NR_UNUSED_PAGEFLAG_BITS)) {
266 shutdown_mem_profiling(false); /* allocinfo file does not exist yet */
267 pr_err("%lu allocation tags cannot be references using %d available page flag bits. Memory allocation profiling is disabled!\n",
268 kernel_tags.count, NR_UNUSED_PAGEFLAG_BITS);
269 return;
270 }
271
272 alloc_tag_ref_offs = (LRU_REFS_PGOFF - NR_UNUSED_PAGEFLAG_BITS);
273 alloc_tag_ref_mask = ((1UL << NR_UNUSED_PAGEFLAG_BITS) - 1);
274 pr_debug("Memory allocation profiling compression is using %d page flag bits!\n",
275 NR_UNUSED_PAGEFLAG_BITS);
276}
277
278#ifdef CONFIG_MODULES
279
280static struct maple_tree mod_area_mt = MTREE_INIT(mod_area_mt, MT_FLAGS_ALLOC_RANGE);
281static struct vm_struct *vm_module_tags;
282/* A dummy object used to indicate an unloaded module */
283static struct module unloaded_mod;
284/* A dummy object used to indicate a module prepended area */
285static struct module prepend_mod;
286
287struct alloc_tag_module_section module_tags;
288
289static inline unsigned long alloc_tag_align(unsigned long val)
290{
291 if (!static_key_enabled(&mem_profiling_compressed)) {
292 /* No alignment requirements when we are not indexing the tags */
293 return val;
294 }
295
296 if (val % sizeof(struct alloc_tag) == 0)
297 return val;
298 return ((val / sizeof(struct alloc_tag)) + 1) * sizeof(struct alloc_tag);
299}
300
301static bool ensure_alignment(unsigned long align, unsigned int *prepend)
302{
303 if (!static_key_enabled(&mem_profiling_compressed)) {
304 /* No alignment requirements when we are not indexing the tags */
305 return true;
306 }
307
308 /*
309 * If alloc_tag size is not a multiple of required alignment, tag
310 * indexing does not work.
311 */
312 if (!IS_ALIGNED(sizeof(struct alloc_tag), align))
313 return false;
314
315 /* Ensure prepend consumes multiple of alloc_tag-sized blocks */
316 if (*prepend)
317 *prepend = alloc_tag_align(*prepend);
318
319 return true;
320}
321
322static inline bool tags_addressable(void)
323{
324 unsigned long tag_idx_count;
325
326 if (!static_key_enabled(&mem_profiling_compressed))
327 return true; /* with page_ext tags are always addressable */
328
329 tag_idx_count = CODETAG_ID_FIRST + kernel_tags.count +
330 module_tags.size / sizeof(struct alloc_tag);
331
332 return tag_idx_count < (1UL << NR_UNUSED_PAGEFLAG_BITS);
333}
334
335static bool needs_section_mem(struct module *mod, unsigned long size)
336{
337 if (!mem_profiling_support)
338 return false;
339
340 return size >= sizeof(struct alloc_tag);
341}
342
343static bool clean_unused_counters(struct alloc_tag *start_tag,
344 struct alloc_tag *end_tag)
345{
346 struct alloc_tag *tag;
347 bool ret = true;
348
349 for (tag = start_tag; tag <= end_tag; tag++) {
350 struct alloc_tag_counters counter;
351
352 if (!tag->counters)
353 continue;
354
355 counter = alloc_tag_read(tag);
356 if (!counter.bytes) {
357 free_percpu(tag->counters);
358 tag->counters = NULL;
359 } else {
360 ret = false;
361 }
362 }
363
364 return ret;
365}
366
367/* Called with mod_area_mt locked */
368static void clean_unused_module_areas_locked(void)
369{
370 MA_STATE(mas, &mod_area_mt, 0, module_tags.size);
371 struct module *val;
372
373 mas_for_each(&mas, val, module_tags.size) {
374 struct alloc_tag *start_tag;
375 struct alloc_tag *end_tag;
376
377 if (val != &unloaded_mod)
378 continue;
379
380 /* Release area if all tags are unused */
381 start_tag = (struct alloc_tag *)(module_tags.start_addr + mas.index);
382 end_tag = (struct alloc_tag *)(module_tags.start_addr + mas.last);
383 if (clean_unused_counters(start_tag, end_tag))
384 mas_erase(&mas);
385 }
386}
387
388/* Called with mod_area_mt locked */
389static bool find_aligned_area(struct ma_state *mas, unsigned long section_size,
390 unsigned long size, unsigned int prepend, unsigned long align)
391{
392 bool cleanup_done = false;
393
394repeat:
395 /* Try finding exact size and hope the start is aligned */
396 if (!mas_empty_area(mas, 0, section_size - 1, prepend + size)) {
397 if (IS_ALIGNED(mas->index + prepend, align))
398 return true;
399
400 /* Try finding larger area to align later */
401 mas_reset(mas);
402 if (!mas_empty_area(mas, 0, section_size - 1,
403 size + prepend + align - 1))
404 return true;
405 }
406
407 /* No free area, try cleanup stale data and repeat the search once */
408 if (!cleanup_done) {
409 clean_unused_module_areas_locked();
410 cleanup_done = true;
411 mas_reset(mas);
412 goto repeat;
413 }
414
415 return false;
416}
417
418static int vm_module_tags_populate(void)
419{
420 unsigned long phys_end = ALIGN_DOWN(module_tags.start_addr, PAGE_SIZE) +
421 (vm_module_tags->nr_pages << PAGE_SHIFT);
422 unsigned long new_end = module_tags.start_addr + module_tags.size;
423
424 if (phys_end < new_end) {
425 struct page **next_page = vm_module_tags->pages + vm_module_tags->nr_pages;
426 unsigned long old_shadow_end = ALIGN(phys_end, MODULE_ALIGN);
427 unsigned long new_shadow_end = ALIGN(new_end, MODULE_ALIGN);
428 unsigned long more_pages;
429 unsigned long nr = 0;
430
431 more_pages = ALIGN(new_end - phys_end, PAGE_SIZE) >> PAGE_SHIFT;
432 while (nr < more_pages) {
433 unsigned long allocated;
434
435 allocated = alloc_pages_bulk_node(GFP_KERNEL | __GFP_NOWARN,
436 NUMA_NO_NODE, more_pages - nr, next_page + nr);
437
438 if (!allocated)
439 break;
440 nr += allocated;
441 }
442
443 if (nr < more_pages ||
444 vmap_pages_range(phys_end, phys_end + (nr << PAGE_SHIFT), PAGE_KERNEL,
445 next_page, PAGE_SHIFT) < 0) {
446 release_pages_arg arg = { .pages = next_page };
447
448 /* Clean up and error out */
449 release_pages(arg, nr);
450 return -ENOMEM;
451 }
452
453 vm_module_tags->nr_pages += nr;
454
455 /*
456 * Kasan allocates 1 byte of shadow for every 8 bytes of data.
457 * When kasan_alloc_module_shadow allocates shadow memory,
458 * its unit of allocation is a page.
459 * Therefore, here we need to align to MODULE_ALIGN.
460 */
461 if (old_shadow_end < new_shadow_end)
462 kasan_alloc_module_shadow((void *)old_shadow_end,
463 new_shadow_end - old_shadow_end,
464 GFP_KERNEL);
465 }
466
467 /*
468 * Mark the pages as accessible, now that they are mapped.
469 * With hardware tag-based KASAN, marking is skipped for
470 * non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc().
471 */
472 kasan_unpoison_vmalloc((void *)module_tags.start_addr,
473 new_end - module_tags.start_addr,
474 KASAN_VMALLOC_PROT_NORMAL);
475
476 return 0;
477}
478
479static void *reserve_module_tags(struct module *mod, unsigned long size,
480 unsigned int prepend, unsigned long align)
481{
482 unsigned long section_size = module_tags.end_addr - module_tags.start_addr;
483 MA_STATE(mas, &mod_area_mt, 0, section_size - 1);
484 unsigned long offset;
485 void *ret = NULL;
486
487 /* If no tags return error */
488 if (size < sizeof(struct alloc_tag))
489 return ERR_PTR(-EINVAL);
490
491 /*
492 * align is always power of 2, so we can use IS_ALIGNED and ALIGN.
493 * align 0 or 1 means no alignment, to simplify set to 1.
494 */
495 if (!align)
496 align = 1;
497
498 if (!ensure_alignment(align, &prepend)) {
499 shutdown_mem_profiling(true);
500 pr_err("%s: alignment %lu is incompatible with allocation tag indexing. Memory allocation profiling is disabled!\n",
501 mod->name, align);
502 return ERR_PTR(-EINVAL);
503 }
504
505 mas_lock(&mas);
506 if (!find_aligned_area(&mas, section_size, size, prepend, align)) {
507 ret = ERR_PTR(-ENOMEM);
508 goto unlock;
509 }
510
511 /* Mark found area as reserved */
512 offset = mas.index;
513 offset += prepend;
514 offset = ALIGN(offset, align);
515 if (offset != mas.index) {
516 unsigned long pad_start = mas.index;
517
518 mas.last = offset - 1;
519 mas_store(&mas, &prepend_mod);
520 if (mas_is_err(&mas)) {
521 ret = ERR_PTR(xa_err(mas.node));
522 goto unlock;
523 }
524 mas.index = offset;
525 mas.last = offset + size - 1;
526 mas_store(&mas, mod);
527 if (mas_is_err(&mas)) {
528 mas.index = pad_start;
529 mas_erase(&mas);
530 ret = ERR_PTR(xa_err(mas.node));
531 }
532 } else {
533 mas.last = offset + size - 1;
534 mas_store(&mas, mod);
535 if (mas_is_err(&mas))
536 ret = ERR_PTR(xa_err(mas.node));
537 }
538unlock:
539 mas_unlock(&mas);
540
541 if (IS_ERR(ret))
542 return ret;
543
544 if (module_tags.size < offset + size) {
545 int grow_res;
546
547 module_tags.size = offset + size;
548 if (mem_alloc_profiling_enabled() && !tags_addressable()) {
549 shutdown_mem_profiling(true);
550 pr_warn("With module %s there are too many tags to fit in %d page flag bits. Memory allocation profiling is disabled!\n",
551 mod->name, NR_UNUSED_PAGEFLAG_BITS);
552 }
553
554 grow_res = vm_module_tags_populate();
555 if (grow_res) {
556 shutdown_mem_profiling(true);
557 pr_err("Failed to allocate memory for allocation tags in the module %s. Memory allocation profiling is disabled!\n",
558 mod->name);
559 return ERR_PTR(grow_res);
560 }
561 }
562
563 return (struct alloc_tag *)(module_tags.start_addr + offset);
564}
565
566static void release_module_tags(struct module *mod, bool used)
567{
568 MA_STATE(mas, &mod_area_mt, module_tags.size, module_tags.size);
569 struct alloc_tag *start_tag;
570 struct alloc_tag *end_tag;
571 struct module *val;
572
573 mas_lock(&mas);
574 mas_for_each_rev(&mas, val, 0)
575 if (val == mod)
576 break;
577
578 if (!val) /* module not found */
579 goto out;
580
581 if (!used)
582 goto release_area;
583
584 start_tag = (struct alloc_tag *)(module_tags.start_addr + mas.index);
585 end_tag = (struct alloc_tag *)(module_tags.start_addr + mas.last);
586 if (!clean_unused_counters(start_tag, end_tag)) {
587 struct alloc_tag *tag;
588
589 for (tag = start_tag; tag <= end_tag; tag++) {
590 struct alloc_tag_counters counter;
591
592 if (!tag->counters)
593 continue;
594
595 counter = alloc_tag_read(tag);
596 pr_info("%s:%u module %s func:%s has %llu allocated at module unload\n",
597 tag->ct.filename, tag->ct.lineno, tag->ct.modname,
598 tag->ct.function, counter.bytes);
599 }
600 } else {
601 used = false;
602 }
603release_area:
604 mas_store(&mas, used ? &unloaded_mod : NULL);
605 val = mas_prev_range(&mas, 0);
606 if (val == &prepend_mod)
607 mas_store(&mas, NULL);
608out:
609 mas_unlock(&mas);
610}
611
612static int load_module(struct module *mod, struct codetag *start, struct codetag *stop)
613{
614 /* Allocate module alloc_tag percpu counters */
615 struct alloc_tag *start_tag;
616 struct alloc_tag *stop_tag;
617 struct alloc_tag *tag;
618
619 /* percpu counters for core allocations are already statically allocated */
620 if (!mod)
621 return 0;
622
623 start_tag = ct_to_alloc_tag(start);
624 stop_tag = ct_to_alloc_tag(stop);
625 for (tag = start_tag; tag < stop_tag; tag++) {
626 WARN_ON(tag->counters);
627 tag->counters = alloc_percpu(struct alloc_tag_counters);
628 if (!tag->counters) {
629 while (--tag >= start_tag) {
630 free_percpu(tag->counters);
631 tag->counters = NULL;
632 }
633 pr_err("Failed to allocate memory for allocation tag percpu counters in the module %s\n",
634 mod->name);
635 return -ENOMEM;
636 }
637
638 /*
639 * Avoid a kmemleak false positive. The pointer to the counters is stored
640 * in the alloc_tag section of the module and cannot be directly accessed.
641 */
642 kmemleak_ignore_percpu(tag->counters);
643 }
644 return 0;
645}
646
647static void replace_module(struct module *mod, struct module *new_mod)
648{
649 MA_STATE(mas, &mod_area_mt, 0, module_tags.size);
650 struct module *val;
651
652 mas_lock(&mas);
653 mas_for_each(&mas, val, module_tags.size) {
654 if (val != mod)
655 continue;
656
657 mas_store_gfp(&mas, new_mod, GFP_KERNEL);
658 break;
659 }
660 mas_unlock(&mas);
661}
662
663static int __init alloc_mod_tags_mem(void)
664{
665 /* Map space to copy allocation tags */
666 vm_module_tags = execmem_vmap(MODULE_ALLOC_TAG_VMAP_SIZE);
667 if (!vm_module_tags) {
668 pr_err("Failed to map %lu bytes for module allocation tags\n",
669 MODULE_ALLOC_TAG_VMAP_SIZE);
670 module_tags.start_addr = 0;
671 return -ENOMEM;
672 }
673
674 vm_module_tags->pages = kmalloc_objs(struct page *,
675 get_vm_area_size(vm_module_tags) >> PAGE_SHIFT,
676 GFP_KERNEL | __GFP_ZERO);
677 if (!vm_module_tags->pages) {
678 free_vm_area(vm_module_tags);
679 return -ENOMEM;
680 }
681
682 module_tags.start_addr = (unsigned long)vm_module_tags->addr;
683 module_tags.end_addr = module_tags.start_addr + MODULE_ALLOC_TAG_VMAP_SIZE;
684 /* Ensure the base is alloc_tag aligned when required for indexing */
685 module_tags.start_addr = alloc_tag_align(module_tags.start_addr);
686
687 return 0;
688}
689
690static void __init free_mod_tags_mem(void)
691{
692 release_pages_arg arg = { .pages = vm_module_tags->pages };
693
694 module_tags.start_addr = 0;
695 release_pages(arg, vm_module_tags->nr_pages);
696 kfree(vm_module_tags->pages);
697 free_vm_area(vm_module_tags);
698}
699
700#else /* CONFIG_MODULES */
701
702static inline int alloc_mod_tags_mem(void) { return 0; }
703static inline void free_mod_tags_mem(void) {}
704
705#endif /* CONFIG_MODULES */
706
707/* See: Documentation/mm/allocation-profiling.rst */
708static int __init setup_early_mem_profiling(char *str)
709{
710 bool compressed = false;
711 bool enable;
712
713 if (!str || !str[0])
714 return -EINVAL;
715
716 if (!strncmp(str, "never", 5)) {
717 enable = false;
718 mem_profiling_support = false;
719 pr_info("Memory allocation profiling is disabled!\n");
720 } else {
721 char *token = strsep(&str, ",");
722
723 if (kstrtobool(token, &enable))
724 return -EINVAL;
725
726 if (str) {
727
728 if (strcmp(str, "compressed"))
729 return -EINVAL;
730
731 compressed = true;
732 }
733 mem_profiling_support = true;
734 pr_info("Memory allocation profiling is enabled %s compression and is turned %s!\n",
735 compressed ? "with" : "without", str_on_off(enable));
736 }
737
738 if (enable != mem_alloc_profiling_enabled()) {
739 if (enable)
740 static_branch_enable(&mem_alloc_profiling_key);
741 else
742 static_branch_disable(&mem_alloc_profiling_key);
743 }
744 if (compressed != static_key_enabled(&mem_profiling_compressed)) {
745 if (compressed)
746 static_branch_enable(&mem_profiling_compressed);
747 else
748 static_branch_disable(&mem_profiling_compressed);
749 }
750
751 return 0;
752}
753early_param("sysctl.vm.mem_profiling", setup_early_mem_profiling);
754
755static __init bool need_page_alloc_tagging(void)
756{
757 if (static_key_enabled(&mem_profiling_compressed))
758 return false;
759
760 return mem_profiling_support;
761}
762
763#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
764/*
765 * Track page allocations before page_ext is initialized.
766 * Some pages are allocated before page_ext becomes available, leaving
767 * their codetag uninitialized. Track these early PFNs so we can clear
768 * their codetag refs later to avoid warnings when they are freed.
769 *
770 * Early allocations include:
771 * - Base allocations independent of CPU count
772 * - Per-CPU allocations (e.g., CPU hotplug callbacks during smp_init,
773 * such as trace ring buffers, scheduler per-cpu data)
774 *
775 * For simplicity, we fix the size to 8192.
776 * If insufficient, a warning will be triggered to alert the user.
777 *
778 * TODO: Replace fixed-size array with dynamic allocation using
779 * a GFP flag similar to ___GFP_NO_OBJ_EXT to avoid recursion.
780 */
781#define EARLY_ALLOC_PFN_MAX 8192
782
783static unsigned long early_pfns[EARLY_ALLOC_PFN_MAX] __initdata;
784static atomic_t early_pfn_count __initdata = ATOMIC_INIT(0);
785
786static void __init __alloc_tag_add_early_pfn(unsigned long pfn)
787{
788 int old_idx, new_idx;
789
790 do {
791 old_idx = atomic_read(&early_pfn_count);
792 if (old_idx >= EARLY_ALLOC_PFN_MAX) {
793 pr_warn_once("Early page allocations before page_ext init exceeded EARLY_ALLOC_PFN_MAX (%d)\n",
794 EARLY_ALLOC_PFN_MAX);
795 return;
796 }
797 new_idx = old_idx + 1;
798 } while (!atomic_try_cmpxchg(&early_pfn_count, &old_idx, new_idx));
799
800 early_pfns[old_idx] = pfn;
801}
802
803typedef void alloc_tag_add_func(unsigned long pfn);
804static alloc_tag_add_func __rcu *alloc_tag_add_early_pfn_ptr __refdata =
805 RCU_INITIALIZER(__alloc_tag_add_early_pfn);
806
807void alloc_tag_add_early_pfn(unsigned long pfn)
808{
809 alloc_tag_add_func *alloc_tag_add;
810
811 if (static_key_enabled(&mem_profiling_compressed))
812 return;
813
814 rcu_read_lock();
815 alloc_tag_add = rcu_dereference(alloc_tag_add_early_pfn_ptr);
816 if (alloc_tag_add)
817 alloc_tag_add(pfn);
818 rcu_read_unlock();
819}
820
821static void __init clear_early_alloc_pfn_tag_refs(void)
822{
823 unsigned int i;
824
825 if (static_key_enabled(&mem_profiling_compressed))
826 return;
827
828 rcu_assign_pointer(alloc_tag_add_early_pfn_ptr, NULL);
829 /* Make sure we are not racing with __alloc_tag_add_early_pfn() */
830 synchronize_rcu();
831
832 for (i = 0; i < atomic_read(&early_pfn_count); i++) {
833 unsigned long pfn = early_pfns[i];
834
835 if (pfn_valid(pfn)) {
836 struct page *page = pfn_to_page(pfn);
837 union pgtag_ref_handle handle;
838 union codetag_ref ref;
839
840 if (get_page_tag_ref(page, &ref, &handle)) {
841 /*
842 * An early-allocated page could be freed and reallocated
843 * after its page_ext is initialized but before we clear it.
844 * In that case, it already has a valid tag set.
845 * We should not overwrite that valid tag with CODETAG_EMPTY.
846 *
847 * Note: there is still a small race window between checking
848 * ref.ct and calling set_codetag_empty(). We accept this
849 * race as it's unlikely and the extra complexity of atomic
850 * cmpxchg is not worth it for this debug-only code path.
851 */
852 if (ref.ct) {
853 put_page_tag_ref(handle);
854 continue;
855 }
856
857 set_codetag_empty(&ref);
858 update_page_tag_ref(handle, &ref);
859 put_page_tag_ref(handle);
860 }
861 }
862
863 }
864}
865#else /* !CONFIG_MEM_ALLOC_PROFILING_DEBUG */
866static inline void __init clear_early_alloc_pfn_tag_refs(void) {}
867#endif /* CONFIG_MEM_ALLOC_PROFILING_DEBUG */
868
869static __init void init_page_alloc_tagging(void)
870{
871 clear_early_alloc_pfn_tag_refs();
872}
873
874struct page_ext_operations page_alloc_tagging_ops = {
875 .size = sizeof(union codetag_ref),
876 .need = need_page_alloc_tagging,
877 .init = init_page_alloc_tagging,
878};
879EXPORT_SYMBOL(page_alloc_tagging_ops);
880
881#ifdef CONFIG_SYSCTL
882/*
883 * Not using proc_do_static_key() directly to prevent enabling profiling
884 * after it was shut down.
885 */
886static int proc_mem_profiling_handler(const struct ctl_table *table, int write,
887 void *buffer, size_t *lenp, loff_t *ppos)
888{
889 if (write) {
890 /*
891 * Call from do_sysctl_args() which is a no-op since the same
892 * value was already set by setup_early_mem_profiling.
893 * Return success to avoid warnings from do_sysctl_args().
894 */
895 if (!current->mm)
896 return 0;
897
898#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
899 /* User can't toggle profiling while debugging */
900 return -EACCES;
901#endif
902 if (!mem_profiling_support)
903 return -EINVAL;
904 }
905
906 return proc_do_static_key(table, write, buffer, lenp, ppos);
907}
908
909
910static const struct ctl_table memory_allocation_profiling_sysctls[] = {
911 {
912 .procname = "mem_profiling",
913 .data = &mem_alloc_profiling_key,
914 .mode = 0644,
915 .proc_handler = proc_mem_profiling_handler,
916 },
917};
918
919static void __init sysctl_init(void)
920{
921 register_sysctl_init("vm", memory_allocation_profiling_sysctls);
922}
923#else /* CONFIG_SYSCTL */
924static inline void sysctl_init(void) {}
925#endif /* CONFIG_SYSCTL */
926
927static int __init alloc_tag_init(void)
928{
929 const struct codetag_type_desc desc = {
930 .section = ALLOC_TAG_SECTION_NAME,
931 .tag_size = sizeof(struct alloc_tag),
932#ifdef CONFIG_MODULES
933 .needs_section_mem = needs_section_mem,
934 .alloc_section_mem = reserve_module_tags,
935 .free_section_mem = release_module_tags,
936 .module_load = load_module,
937 .module_replaced = replace_module,
938#endif
939 };
940 int res;
941
942 sysctl_init();
943
944 if (!mem_profiling_support) {
945 pr_info("Memory allocation profiling is not supported!\n");
946 return 0;
947 }
948
949 if (!proc_create_seq_private(ALLOCINFO_FILE_NAME, 0400, NULL, &allocinfo_seq_op,
950 sizeof(struct allocinfo_private), NULL)) {
951 pr_err("Failed to create %s file\n", ALLOCINFO_FILE_NAME);
952 shutdown_mem_profiling(false);
953 return -ENOMEM;
954 }
955
956 res = alloc_mod_tags_mem();
957 if (res) {
958 pr_err("Failed to reserve address space for module tags, errno = %d\n", res);
959 shutdown_mem_profiling(true);
960 return res;
961 }
962
963 alloc_tag_cttype = codetag_register_type(&desc);
964 if (IS_ERR(alloc_tag_cttype)) {
965 pr_err("Allocation tags registration failed, errno = %pe\n", alloc_tag_cttype);
966 free_mod_tags_mem();
967 shutdown_mem_profiling(true);
968 return PTR_ERR(alloc_tag_cttype);
969 }
970
971 return 0;
972}
973module_init(alloc_tag_init);