Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'trace-ringbuffer-v6.15-2' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace

Pull ring-buffer updates from Steven Rostedt:

- Restructure the persistent memory to have a "scratch" area

Instead of hard coding the KASLR offset in the persistent memory by
the ring buffer, push that work up to the callers of the persistent
memory as they are the ones that need this information. The offsets
and such is not important to the ring buffer logic and it should not
be part of that.

A scratch pad is now created when the caller allocates a ring buffer
from persistent memory by stating how much memory it needs to save.

- Allow where modules are loaded to be saved in the new scratch pad

Save the addresses of modules when they are loaded into the
persistent memory scratch pad.

- A new module_for_each_mod() helper function was created

With the acknowledgement of the module maintainers a new module
helper function was created to iterate over all the currently loaded
modules. This has a callback to be called for each module. This is
needed for when tracing is started in the persistent buffer and the
currently loaded modules need to be saved in the scratch area.

- Expose the last boot information where the kernel and modules were
loaded

The last_boot_info file is updated to print out the addresses of
where the kernel "_text" location was loaded from a previous boot, as
well as where the modules are loaded. If the buffer is recording the
current boot, it only prints "# Current" so that it does not expose
the KASLR offset of the currently running kernel.

- Allow the persistent ring buffer to be released (freed)

To have this in production environments, where the kernel command
line can not be changed easily, the ring buffer needs to be freed
when it is not going to be used. The memory for the buffer will
always be allocated at boot up, but if the system isn't going to
enable tracing, the memory needs to be freed. Allow it to be freed
and added back to the kernel memory pool.

- Allow stack traces to print the function names in the persistent
buffer

Now that the modules are saved in the persistent ring buffer, if the
same modules are loaded, the printing of the function names will
examine the saved modules. If the module is found in the scratch area
and is also loaded, then it will do the offset shift and use kallsyms
to display the function name. If the address is not found, it simply
displays the address from the previous boot in hex.

* tag 'trace-ringbuffer-v6.15-2' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace:
tracing: Use _text and the kernel offset in last_boot_info
tracing: Show last module text symbols in the stacktrace
ring-buffer: Remove the unused variable bmeta
tracing: Skip update_last_data() if cleared and remove active check for save_mod()
tracing: Initialize scratch_size to zero to prevent UB
tracing: Fix a compilation error without CONFIG_MODULES
tracing: Freeable reserved ring buffer
mm/memblock: Add reserved memory release function
tracing: Update modules to persistent instances when loaded
tracing: Show module names and addresses of last boot
tracing: Have persistent trace instances save module addresses
module: Add module_for_each_mod() function
tracing: Have persistent trace instances save KASLR offset
ring-buffer: Add ring_buffer_meta_scratch()
ring-buffer: Add buffer meta data for persistent ring buffer
ring-buffer: Use kaslr address instead of text delta
ring-buffer: Fix bytes_dropped calculation issue

+639 -159
+1
include/linux/mm.h
··· 4110 4110 void vma_pgtable_walk_end(struct vm_area_struct *vma); 4111 4111 4112 4112 int reserve_mem_find_by_name(const char *name, phys_addr_t *start, phys_addr_t *size); 4113 + int reserve_mem_release_by_name(const char *name); 4113 4114 4114 4115 #ifdef CONFIG_64BIT 4115 4116 int do_mseal(unsigned long start, size_t len_in, unsigned long flags);
+6
include/linux/module.h
··· 771 771 772 772 void set_module_sig_enforced(void); 773 773 774 + void module_for_each_mod(int(*func)(struct module *mod, void *data), void *data); 775 + 774 776 #else /* !CONFIG_MODULES... */ 775 777 776 778 static inline struct module *__module_address(unsigned long addr) ··· 879 877 static inline bool module_is_coming(struct module *mod) 880 878 { 881 879 return false; 880 + } 881 + 882 + static inline void module_for_each_mod(int(*func)(struct module *mod, void *data), void *data) 883 + { 882 884 } 883 885 #endif /* CONFIG_MODULES */ 884 886
+4 -4
include/linux/ring_buffer.h
··· 92 92 struct trace_buffer *__ring_buffer_alloc_range(unsigned long size, unsigned flags, 93 93 int order, unsigned long start, 94 94 unsigned long range_size, 95 + unsigned long scratch_size, 95 96 struct lock_class_key *key); 96 97 97 - bool ring_buffer_last_boot_delta(struct trace_buffer *buffer, long *text, 98 - long *data); 98 + void *ring_buffer_meta_scratch(struct trace_buffer *buffer, unsigned int *size); 99 99 100 100 /* 101 101 * Because the ring buffer is generic, if other users of the ring buffer get ··· 113 113 * traced by ftrace, it can produce lockdep warnings. We need to keep each 114 114 * ring buffer's lock class separate. 115 115 */ 116 - #define ring_buffer_alloc_range(size, flags, order, start, range_size) \ 116 + #define ring_buffer_alloc_range(size, flags, order, start, range_size, s_size) \ 117 117 ({ \ 118 118 static struct lock_class_key __key; \ 119 119 __ring_buffer_alloc_range((size), (flags), (order), (start), \ 120 - (range_size), &__key); \ 120 + (range_size), (s_size), &__key); \ 121 121 }) 122 122 123 123 typedef bool (*ring_buffer_cond_fn)(void *data);
+13
kernel/module/main.c
··· 3744 3744 return __module_text_address(addr) != NULL; 3745 3745 } 3746 3746 3747 + void module_for_each_mod(int(*func)(struct module *mod, void *data), void *data) 3748 + { 3749 + struct module *mod; 3750 + 3751 + guard(rcu)(); 3752 + list_for_each_entry_rcu(mod, &modules, list) { 3753 + if (mod->state == MODULE_STATE_UNFORMED) 3754 + continue; 3755 + if (func(mod, data)) 3756 + break; 3757 + } 3758 + } 3759 + 3747 3760 /** 3748 3761 * __module_text_address() - get the module whose code contains an address. 3749 3762 * @addr: the address.
+144 -103
kernel/trace/ring_buffer.c
··· 31 31 32 32 #include <asm/local64.h> 33 33 #include <asm/local.h> 34 + #include <asm/setup.h> 34 35 35 36 #include "trace.h" 36 37 ··· 49 48 50 49 struct ring_buffer_meta { 51 50 int magic; 52 - int struct_size; 53 - unsigned long text_addr; 54 - unsigned long data_addr; 51 + int struct_sizes; 52 + unsigned long total_size; 53 + unsigned long buffers_offset; 54 + }; 55 + 56 + struct ring_buffer_cpu_meta { 55 57 unsigned long first_buffer; 56 58 unsigned long head_buffer; 57 59 unsigned long commit_buffer; ··· 521 517 struct mutex mapping_lock; 522 518 unsigned long *subbuf_ids; /* ID to subbuf VA */ 523 519 struct trace_buffer_meta *meta_page; 524 - struct ring_buffer_meta *ring_meta; 520 + struct ring_buffer_cpu_meta *ring_meta; 525 521 526 522 /* ring buffer pages to update, > 0 to add, < 0 to remove */ 527 523 long nr_pages_to_update; ··· 554 550 unsigned long range_addr_start; 555 551 unsigned long range_addr_end; 556 552 557 - long last_text_delta; 558 - long last_data_delta; 553 + struct ring_buffer_meta *meta; 559 554 560 555 unsigned int subbuf_size; 561 556 unsigned int subbuf_order; ··· 1274 1271 rb_set_list_to_head(head->list.prev); 1275 1272 1276 1273 if (cpu_buffer->ring_meta) { 1277 - struct ring_buffer_meta *meta = cpu_buffer->ring_meta; 1274 + struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta; 1278 1275 meta->head_buffer = (unsigned long)head->page; 1279 1276 } 1280 1277 } ··· 1572 1569 static unsigned long 1573 1570 rb_range_align_subbuf(unsigned long addr, int subbuf_size, int nr_subbufs) 1574 1571 { 1575 - addr += sizeof(struct ring_buffer_meta) + 1572 + addr += sizeof(struct ring_buffer_cpu_meta) + 1576 1573 sizeof(int) * nr_subbufs; 1577 1574 return ALIGN(addr, subbuf_size); 1578 1575 } ··· 1583 1580 static void *rb_range_meta(struct trace_buffer *buffer, int nr_pages, int cpu) 1584 1581 { 1585 1582 int subbuf_size = buffer->subbuf_size + BUF_PAGE_HDR_SIZE; 1586 - unsigned long ptr = buffer->range_addr_start; 1587 - struct ring_buffer_meta *meta; 1583 + struct ring_buffer_cpu_meta *meta; 1584 + struct ring_buffer_meta *bmeta; 1585 + unsigned long ptr; 1588 1586 int nr_subbufs; 1589 1587 1590 - if (!ptr) 1588 + bmeta = buffer->meta; 1589 + if (!bmeta) 1591 1590 return NULL; 1591 + 1592 + ptr = (unsigned long)bmeta + bmeta->buffers_offset; 1593 + meta = (struct ring_buffer_cpu_meta *)ptr; 1592 1594 1593 1595 /* When nr_pages passed in is zero, the first meta has already been initialized */ 1594 1596 if (!nr_pages) { 1595 - meta = (struct ring_buffer_meta *)ptr; 1596 1597 nr_subbufs = meta->nr_subbufs; 1597 1598 } else { 1598 - meta = NULL; 1599 1599 /* Include the reader page */ 1600 1600 nr_subbufs = nr_pages + 1; 1601 1601 } ··· 1630 1624 } 1631 1625 1632 1626 /* Return the start of subbufs given the meta pointer */ 1633 - static void *rb_subbufs_from_meta(struct ring_buffer_meta *meta) 1627 + static void *rb_subbufs_from_meta(struct ring_buffer_cpu_meta *meta) 1634 1628 { 1635 1629 int subbuf_size = meta->subbuf_size; 1636 1630 unsigned long ptr; ··· 1646 1640 */ 1647 1641 static void *rb_range_buffer(struct ring_buffer_per_cpu *cpu_buffer, int idx) 1648 1642 { 1649 - struct ring_buffer_meta *meta; 1643 + struct ring_buffer_cpu_meta *meta; 1650 1644 unsigned long ptr; 1651 1645 int subbuf_size; 1652 1646 ··· 1672 1666 } 1673 1667 1674 1668 /* 1669 + * See if the existing memory contains a valid meta section. 1670 + * if so, use that, otherwise initialize it. 1671 + */ 1672 + static bool rb_meta_init(struct trace_buffer *buffer, int scratch_size) 1673 + { 1674 + unsigned long ptr = buffer->range_addr_start; 1675 + struct ring_buffer_meta *bmeta; 1676 + unsigned long total_size; 1677 + int struct_sizes; 1678 + 1679 + bmeta = (struct ring_buffer_meta *)ptr; 1680 + buffer->meta = bmeta; 1681 + 1682 + total_size = buffer->range_addr_end - buffer->range_addr_start; 1683 + 1684 + struct_sizes = sizeof(struct ring_buffer_cpu_meta); 1685 + struct_sizes |= sizeof(*bmeta) << 16; 1686 + 1687 + /* The first buffer will start word size after the meta page */ 1688 + ptr += sizeof(*bmeta); 1689 + ptr = ALIGN(ptr, sizeof(long)); 1690 + ptr += scratch_size; 1691 + 1692 + if (bmeta->magic != RING_BUFFER_META_MAGIC) { 1693 + pr_info("Ring buffer boot meta mismatch of magic\n"); 1694 + goto init; 1695 + } 1696 + 1697 + if (bmeta->struct_sizes != struct_sizes) { 1698 + pr_info("Ring buffer boot meta mismatch of struct size\n"); 1699 + goto init; 1700 + } 1701 + 1702 + if (bmeta->total_size != total_size) { 1703 + pr_info("Ring buffer boot meta mismatch of total size\n"); 1704 + goto init; 1705 + } 1706 + 1707 + if (bmeta->buffers_offset > bmeta->total_size) { 1708 + pr_info("Ring buffer boot meta mismatch of offset outside of total size\n"); 1709 + goto init; 1710 + } 1711 + 1712 + if (bmeta->buffers_offset != (void *)ptr - (void *)bmeta) { 1713 + pr_info("Ring buffer boot meta mismatch of first buffer offset\n"); 1714 + goto init; 1715 + } 1716 + 1717 + return true; 1718 + 1719 + init: 1720 + bmeta->magic = RING_BUFFER_META_MAGIC; 1721 + bmeta->struct_sizes = struct_sizes; 1722 + bmeta->total_size = total_size; 1723 + bmeta->buffers_offset = (void *)ptr - (void *)bmeta; 1724 + 1725 + /* Zero out the scatch pad */ 1726 + memset((void *)bmeta + sizeof(*bmeta), 0, bmeta->buffers_offset - sizeof(*bmeta)); 1727 + 1728 + return false; 1729 + } 1730 + 1731 + /* 1675 1732 * See if the existing memory contains valid ring buffer data. 1676 1733 * As the previous kernel must be the same as this kernel, all 1677 1734 * the calculations (size of buffers and number of buffers) 1678 1735 * must be the same. 1679 1736 */ 1680 - static bool rb_meta_valid(struct ring_buffer_meta *meta, int cpu, 1681 - struct trace_buffer *buffer, int nr_pages, 1682 - unsigned long *subbuf_mask) 1737 + static bool rb_cpu_meta_valid(struct ring_buffer_cpu_meta *meta, int cpu, 1738 + struct trace_buffer *buffer, int nr_pages, 1739 + unsigned long *subbuf_mask) 1683 1740 { 1684 1741 int subbuf_size = PAGE_SIZE; 1685 1742 struct buffer_data_page *subbuf; ··· 1752 1683 1753 1684 if (!subbuf_mask) 1754 1685 return false; 1755 - 1756 - /* Check the meta magic and meta struct size */ 1757 - if (meta->magic != RING_BUFFER_META_MAGIC || 1758 - meta->struct_size != sizeof(*meta)) { 1759 - pr_info("Ring buffer boot meta[%d] mismatch of magic or struct size\n", cpu); 1760 - return false; 1761 - } 1762 - 1763 - /* The subbuffer's size and number of subbuffers must match */ 1764 - if (meta->subbuf_size != subbuf_size || 1765 - meta->nr_subbufs != nr_pages + 1) { 1766 - pr_info("Ring buffer boot meta [%d] mismatch of subbuf_size/nr_pages\n", cpu); 1767 - return false; 1768 - } 1769 1686 1770 1687 buffers_start = meta->first_buffer; 1771 1688 buffers_end = meta->first_buffer + (subbuf_size * meta->nr_subbufs); ··· 1798 1743 return true; 1799 1744 } 1800 1745 1801 - static int rb_meta_subbuf_idx(struct ring_buffer_meta *meta, void *subbuf); 1746 + static int rb_meta_subbuf_idx(struct ring_buffer_cpu_meta *meta, void *subbuf); 1802 1747 1803 1748 static int rb_read_data_buffer(struct buffer_data_page *dpage, int tail, int cpu, 1804 1749 unsigned long long *timestamp, u64 *delta_ptr) ··· 1865 1810 /* If the meta data has been validated, now validate the events */ 1866 1811 static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer) 1867 1812 { 1868 - struct ring_buffer_meta *meta = cpu_buffer->ring_meta; 1813 + struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta; 1869 1814 struct buffer_page *head_page; 1870 1815 unsigned long entry_bytes = 0; 1871 1816 unsigned long entries = 0; ··· 1946 1891 } 1947 1892 } 1948 1893 1949 - /* Used to calculate data delta */ 1950 - static char rb_data_ptr[] = ""; 1951 - 1952 - #define THIS_TEXT_PTR ((unsigned long)rb_meta_init_text_addr) 1953 - #define THIS_DATA_PTR ((unsigned long)rb_data_ptr) 1954 - 1955 - static void rb_meta_init_text_addr(struct ring_buffer_meta *meta) 1894 + static void rb_range_meta_init(struct trace_buffer *buffer, int nr_pages, int scratch_size) 1956 1895 { 1957 - meta->text_addr = THIS_TEXT_PTR; 1958 - meta->data_addr = THIS_DATA_PTR; 1959 - } 1960 - 1961 - static void rb_range_meta_init(struct trace_buffer *buffer, int nr_pages) 1962 - { 1963 - struct ring_buffer_meta *meta; 1896 + struct ring_buffer_cpu_meta *meta; 1964 1897 unsigned long *subbuf_mask; 1965 1898 unsigned long delta; 1966 1899 void *subbuf; 1900 + bool valid = false; 1967 1901 int cpu; 1968 1902 int i; 1969 1903 ··· 1960 1916 subbuf_mask = bitmap_alloc(nr_pages + 1, GFP_KERNEL); 1961 1917 /* If subbuf_mask fails to allocate, then rb_meta_valid() will return false */ 1962 1918 1919 + if (rb_meta_init(buffer, scratch_size)) 1920 + valid = true; 1921 + 1963 1922 for (cpu = 0; cpu < nr_cpu_ids; cpu++) { 1964 1923 void *next_meta; 1965 1924 1966 1925 meta = rb_range_meta(buffer, nr_pages, cpu); 1967 1926 1968 - if (rb_meta_valid(meta, cpu, buffer, nr_pages, subbuf_mask)) { 1927 + if (valid && rb_cpu_meta_valid(meta, cpu, buffer, nr_pages, subbuf_mask)) { 1969 1928 /* Make the mappings match the current address */ 1970 1929 subbuf = rb_subbufs_from_meta(meta); 1971 1930 delta = (unsigned long)subbuf - meta->first_buffer; 1972 1931 meta->first_buffer += delta; 1973 1932 meta->head_buffer += delta; 1974 1933 meta->commit_buffer += delta; 1975 - buffer->last_text_delta = THIS_TEXT_PTR - meta->text_addr; 1976 - buffer->last_data_delta = THIS_DATA_PTR - meta->data_addr; 1977 1934 continue; 1978 1935 } 1979 1936 ··· 1985 1940 1986 1941 memset(meta, 0, next_meta - (void *)meta); 1987 1942 1988 - meta->magic = RING_BUFFER_META_MAGIC; 1989 - meta->struct_size = sizeof(*meta); 1990 - 1991 1943 meta->nr_subbufs = nr_pages + 1; 1992 1944 meta->subbuf_size = PAGE_SIZE; 1993 1945 1994 1946 subbuf = rb_subbufs_from_meta(meta); 1995 1947 1996 1948 meta->first_buffer = (unsigned long)subbuf; 1997 - rb_meta_init_text_addr(meta); 1998 1949 1999 1950 /* 2000 1951 * The buffers[] array holds the order of the sub-buffers ··· 2012 1971 static void *rbm_start(struct seq_file *m, loff_t *pos) 2013 1972 { 2014 1973 struct ring_buffer_per_cpu *cpu_buffer = m->private; 2015 - struct ring_buffer_meta *meta = cpu_buffer->ring_meta; 1974 + struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta; 2016 1975 unsigned long val; 2017 1976 2018 1977 if (!meta) ··· 2037 1996 static int rbm_show(struct seq_file *m, void *v) 2038 1997 { 2039 1998 struct ring_buffer_per_cpu *cpu_buffer = m->private; 2040 - struct ring_buffer_meta *meta = cpu_buffer->ring_meta; 1999 + struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta; 2041 2000 unsigned long val = (unsigned long)v; 2042 2001 2043 2002 if (val == 1) { ··· 2086 2045 static void rb_meta_buffer_update(struct ring_buffer_per_cpu *cpu_buffer, 2087 2046 struct buffer_page *bpage) 2088 2047 { 2089 - struct ring_buffer_meta *meta = cpu_buffer->ring_meta; 2048 + struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta; 2090 2049 2091 2050 if (meta->head_buffer == (unsigned long)bpage->page) 2092 2051 cpu_buffer->head_page = bpage; ··· 2101 2060 long nr_pages, struct list_head *pages) 2102 2061 { 2103 2062 struct trace_buffer *buffer = cpu_buffer->buffer; 2104 - struct ring_buffer_meta *meta = NULL; 2063 + struct ring_buffer_cpu_meta *meta = NULL; 2105 2064 struct buffer_page *bpage, *tmp; 2106 2065 bool user_thread = current->mm != NULL; 2107 2066 gfp_t mflags; ··· 2225 2184 rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu) 2226 2185 { 2227 2186 struct ring_buffer_per_cpu *cpu_buffer; 2228 - struct ring_buffer_meta *meta; 2187 + struct ring_buffer_cpu_meta *meta; 2229 2188 struct buffer_page *bpage; 2230 2189 struct page *page; 2231 2190 int ret; ··· 2354 2313 static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags, 2355 2314 int order, unsigned long start, 2356 2315 unsigned long end, 2316 + unsigned long scratch_size, 2357 2317 struct lock_class_key *key) 2358 2318 { 2359 2319 struct trace_buffer *buffer; ··· 2397 2355 2398 2356 /* If start/end are specified, then that overrides size */ 2399 2357 if (start && end) { 2358 + unsigned long buffers_start; 2400 2359 unsigned long ptr; 2401 2360 int n; 2402 2361 2403 - size = end - start; 2362 + /* Make sure that start is word aligned */ 2363 + start = ALIGN(start, sizeof(long)); 2364 + 2365 + /* scratch_size needs to be aligned too */ 2366 + scratch_size = ALIGN(scratch_size, sizeof(long)); 2367 + 2368 + /* Subtract the buffer meta data and word aligned */ 2369 + buffers_start = start + sizeof(struct ring_buffer_cpu_meta); 2370 + buffers_start = ALIGN(buffers_start, sizeof(long)); 2371 + buffers_start += scratch_size; 2372 + 2373 + /* Calculate the size for the per CPU data */ 2374 + size = end - buffers_start; 2404 2375 size = size / nr_cpu_ids; 2405 2376 2406 2377 /* ··· 2423 2368 * needed, plus account for the integer array index that 2424 2369 * will be appended to the meta data. 2425 2370 */ 2426 - nr_pages = (size - sizeof(struct ring_buffer_meta)) / 2371 + nr_pages = (size - sizeof(struct ring_buffer_cpu_meta)) / 2427 2372 (subbuf_size + sizeof(int)); 2428 2373 /* Need at least two pages plus the reader page */ 2429 2374 if (nr_pages < 3) ··· 2431 2376 2432 2377 again: 2433 2378 /* Make sure that the size fits aligned */ 2434 - for (n = 0, ptr = start; n < nr_cpu_ids; n++) { 2435 - ptr += sizeof(struct ring_buffer_meta) + 2379 + for (n = 0, ptr = buffers_start; n < nr_cpu_ids; n++) { 2380 + ptr += sizeof(struct ring_buffer_cpu_meta) + 2436 2381 sizeof(int) * nr_pages; 2437 2382 ptr = ALIGN(ptr, subbuf_size); 2438 2383 ptr += subbuf_size * nr_pages; ··· 2449 2394 buffer->range_addr_start = start; 2450 2395 buffer->range_addr_end = end; 2451 2396 2452 - rb_range_meta_init(buffer, nr_pages); 2397 + rb_range_meta_init(buffer, nr_pages, scratch_size); 2453 2398 } else { 2454 2399 2455 2400 /* need at least two pages */ ··· 2502 2447 struct lock_class_key *key) 2503 2448 { 2504 2449 /* Default buffer page size - one system page */ 2505 - return alloc_buffer(size, flags, 0, 0, 0,key); 2450 + return alloc_buffer(size, flags, 0, 0, 0, 0, key); 2506 2451 2507 2452 } 2508 2453 EXPORT_SYMBOL_GPL(__ring_buffer_alloc); ··· 2514 2459 * @order: sub-buffer order 2515 2460 * @start: start of allocated range 2516 2461 * @range_size: size of allocated range 2462 + * @scratch_size: size of scratch area (for preallocated memory buffers) 2517 2463 * @key: ring buffer reader_lock_key. 2518 2464 * 2519 2465 * Currently the only flag that is available is the RB_FL_OVERWRITE ··· 2525 2469 struct trace_buffer *__ring_buffer_alloc_range(unsigned long size, unsigned flags, 2526 2470 int order, unsigned long start, 2527 2471 unsigned long range_size, 2472 + unsigned long scratch_size, 2528 2473 struct lock_class_key *key) 2529 2474 { 2530 - return alloc_buffer(size, flags, order, start, start + range_size, key); 2475 + return alloc_buffer(size, flags, order, start, start + range_size, 2476 + scratch_size, key); 2531 2477 } 2532 2478 2533 - /** 2534 - * ring_buffer_last_boot_delta - return the delta offset from last boot 2535 - * @buffer: The buffer to return the delta from 2536 - * @text: Return text delta 2537 - * @data: Return data delta 2538 - * 2539 - * Returns: The true if the delta is non zero 2540 - */ 2541 - bool ring_buffer_last_boot_delta(struct trace_buffer *buffer, long *text, 2542 - long *data) 2479 + void *ring_buffer_meta_scratch(struct trace_buffer *buffer, unsigned int *size) 2543 2480 { 2544 - if (!buffer) 2545 - return false; 2481 + struct ring_buffer_meta *meta; 2482 + void *ptr; 2546 2483 2547 - if (!buffer->last_text_delta) 2548 - return false; 2484 + if (!buffer || !buffer->meta) 2485 + return NULL; 2549 2486 2550 - *text = buffer->last_text_delta; 2551 - *data = buffer->last_data_delta; 2487 + meta = buffer->meta; 2552 2488 2553 - return true; 2489 + ptr = (void *)ALIGN((unsigned long)meta + sizeof(*meta), sizeof(long)); 2490 + 2491 + if (size) 2492 + *size = (void *)meta + meta->buffers_offset - ptr; 2493 + 2494 + return ptr; 2554 2495 } 2555 2496 2556 2497 /** ··· 3158 3105 } 3159 3106 3160 3107 /* Return the index into the sub-buffers for a given sub-buffer */ 3161 - static int rb_meta_subbuf_idx(struct ring_buffer_meta *meta, void *subbuf) 3108 + static int rb_meta_subbuf_idx(struct ring_buffer_cpu_meta *meta, void *subbuf) 3162 3109 { 3163 3110 void *subbuf_array; 3164 3111 ··· 3170 3117 static void rb_update_meta_head(struct ring_buffer_per_cpu *cpu_buffer, 3171 3118 struct buffer_page *next_page) 3172 3119 { 3173 - struct ring_buffer_meta *meta = cpu_buffer->ring_meta; 3120 + struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta; 3174 3121 unsigned long old_head = (unsigned long)next_page->page; 3175 3122 unsigned long new_head; 3176 3123 ··· 3187 3134 static void rb_update_meta_reader(struct ring_buffer_per_cpu *cpu_buffer, 3188 3135 struct buffer_page *reader) 3189 3136 { 3190 - struct ring_buffer_meta *meta = cpu_buffer->ring_meta; 3137 + struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta; 3191 3138 void *old_reader = cpu_buffer->reader_page->page; 3192 3139 void *new_reader = reader->page; 3193 3140 int id; ··· 3816 3763 rb_page_write(cpu_buffer->commit_page)); 3817 3764 rb_inc_page(&cpu_buffer->commit_page); 3818 3765 if (cpu_buffer->ring_meta) { 3819 - struct ring_buffer_meta *meta = cpu_buffer->ring_meta; 3766 + struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta; 3820 3767 meta->commit_buffer = (unsigned long)cpu_buffer->commit_page->page; 3821 3768 } 3822 3769 /* add barrier to keep gcc from optimizing too much */ ··· 6069 6016 if (cpu_buffer->mapped) { 6070 6017 rb_update_meta_page(cpu_buffer); 6071 6018 if (cpu_buffer->ring_meta) { 6072 - struct ring_buffer_meta *meta = cpu_buffer->ring_meta; 6019 + struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta; 6073 6020 meta->commit_buffer = meta->head_buffer; 6074 6021 } 6075 6022 } ··· 6103 6050 void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu) 6104 6051 { 6105 6052 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; 6106 - struct ring_buffer_meta *meta; 6107 6053 6108 6054 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 6109 6055 return; ··· 6121 6069 atomic_dec(&cpu_buffer->record_disabled); 6122 6070 atomic_dec(&cpu_buffer->resize_disabled); 6123 6071 6124 - /* Make sure persistent meta now uses this buffer's addresses */ 6125 - meta = rb_range_meta(buffer, 0, cpu_buffer->cpu); 6126 - if (meta) 6127 - rb_meta_init_text_addr(meta); 6128 - 6129 6072 mutex_unlock(&buffer->mutex); 6130 6073 } 6131 6074 EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu); ··· 6135 6088 void ring_buffer_reset_online_cpus(struct trace_buffer *buffer) 6136 6089 { 6137 6090 struct ring_buffer_per_cpu *cpu_buffer; 6138 - struct ring_buffer_meta *meta; 6139 6091 int cpu; 6140 6092 6141 6093 /* prevent another thread from changing buffer sizes */ ··· 6161 6115 continue; 6162 6116 6163 6117 reset_disabled_cpu_buffer(cpu_buffer); 6164 - 6165 - /* Make sure persistent meta now uses this buffer's addresses */ 6166 - meta = rb_range_meta(buffer, 0, cpu_buffer->cpu); 6167 - if (meta) 6168 - rb_meta_init_text_addr(meta); 6169 6118 6170 6119 atomic_dec(&cpu_buffer->record_disabled); 6171 6120 atomic_sub(RESET_BIT, &cpu_buffer->resize_disabled); ··· 7452 7411 /* Ignore dropped events before test starts. */ 7453 7412 if (started) { 7454 7413 if (nested) 7455 - data->bytes_dropped += len; 7456 - else 7457 7414 data->bytes_dropped_nested += len; 7415 + else 7416 + data->bytes_dropped += len; 7458 7417 } 7459 7418 return len; 7460 7419 }
+368 -21
kernel/trace/trace.c
··· 49 49 #include <linux/fsnotify.h> 50 50 #include <linux/irq_work.h> 51 51 #include <linux/workqueue.h> 52 + #include <linux/sort.h> 52 53 53 54 #include <asm/setup.h> /* COMMAND_LINE_SIZE */ 54 55 ··· 4207 4206 * safe to use if the array has delta offsets 4208 4207 * Force printing via the fields. 4209 4208 */ 4210 - if ((tr->text_delta || tr->data_delta) && 4209 + if ((tr->text_delta) && 4211 4210 event->type > __TRACE_LAST_TYPE) 4212 4211 return print_event_fields(iter, event); 4213 4212 ··· 6002 6001 return __tracing_resize_ring_buffer(tr, size, cpu_id); 6003 6002 } 6004 6003 6004 + struct trace_mod_entry { 6005 + unsigned long mod_addr; 6006 + char mod_name[MODULE_NAME_LEN]; 6007 + }; 6008 + 6009 + struct trace_scratch { 6010 + unsigned long text_addr; 6011 + unsigned long nr_entries; 6012 + struct trace_mod_entry entries[]; 6013 + }; 6014 + 6015 + static DEFINE_MUTEX(scratch_mutex); 6016 + 6017 + static int cmp_mod_entry(const void *key, const void *pivot) 6018 + { 6019 + unsigned long addr = (unsigned long)key; 6020 + const struct trace_mod_entry *ent = pivot; 6021 + 6022 + if (addr >= ent[0].mod_addr && addr < ent[1].mod_addr) 6023 + return 0; 6024 + else 6025 + return addr - ent->mod_addr; 6026 + } 6027 + 6028 + /** 6029 + * trace_adjust_address() - Adjust prev boot address to current address. 6030 + * @tr: Persistent ring buffer's trace_array. 6031 + * @addr: Address in @tr which is adjusted. 6032 + */ 6033 + unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr) 6034 + { 6035 + struct trace_module_delta *module_delta; 6036 + struct trace_scratch *tscratch; 6037 + struct trace_mod_entry *entry; 6038 + int idx = 0, nr_entries; 6039 + 6040 + /* If we don't have last boot delta, return the address */ 6041 + if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) 6042 + return addr; 6043 + 6044 + /* tr->module_delta must be protected by rcu. */ 6045 + guard(rcu)(); 6046 + tscratch = tr->scratch; 6047 + /* if there is no tscrach, module_delta must be NULL. */ 6048 + module_delta = READ_ONCE(tr->module_delta); 6049 + if (!module_delta || tscratch->entries[0].mod_addr > addr) 6050 + return addr + tr->text_delta; 6051 + 6052 + /* Note that entries must be sorted. */ 6053 + nr_entries = tscratch->nr_entries; 6054 + if (nr_entries == 1 || 6055 + tscratch->entries[nr_entries - 1].mod_addr < addr) 6056 + idx = nr_entries - 1; 6057 + else { 6058 + entry = __inline_bsearch((void *)addr, 6059 + tscratch->entries, 6060 + nr_entries - 1, 6061 + sizeof(tscratch->entries[0]), 6062 + cmp_mod_entry); 6063 + if (entry) 6064 + idx = entry - tscratch->entries; 6065 + } 6066 + 6067 + return addr + module_delta->delta[idx]; 6068 + } 6069 + 6070 + #ifdef CONFIG_MODULES 6071 + static int save_mod(struct module *mod, void *data) 6072 + { 6073 + struct trace_array *tr = data; 6074 + struct trace_scratch *tscratch; 6075 + struct trace_mod_entry *entry; 6076 + unsigned int size; 6077 + 6078 + tscratch = tr->scratch; 6079 + if (!tscratch) 6080 + return -1; 6081 + size = tr->scratch_size; 6082 + 6083 + if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size) 6084 + return -1; 6085 + 6086 + entry = &tscratch->entries[tscratch->nr_entries]; 6087 + 6088 + tscratch->nr_entries++; 6089 + 6090 + entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base; 6091 + strscpy(entry->mod_name, mod->name); 6092 + 6093 + return 0; 6094 + } 6095 + #else 6096 + static int save_mod(struct module *mod, void *data) 6097 + { 6098 + return 0; 6099 + } 6100 + #endif 6101 + 6005 6102 static void update_last_data(struct trace_array *tr) 6006 6103 { 6007 - if (!tr->text_delta && !tr->data_delta) 6104 + struct trace_module_delta *module_delta; 6105 + struct trace_scratch *tscratch; 6106 + 6107 + if (!(tr->flags & TRACE_ARRAY_FL_BOOT)) 6008 6108 return; 6109 + 6110 + if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) 6111 + return; 6112 + 6113 + /* Only if the buffer has previous boot data clear and update it. */ 6114 + tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT; 6115 + 6116 + /* Reset the module list and reload them */ 6117 + if (tr->scratch) { 6118 + struct trace_scratch *tscratch = tr->scratch; 6119 + 6120 + memset(tscratch->entries, 0, 6121 + flex_array_size(tscratch, entries, tscratch->nr_entries)); 6122 + tscratch->nr_entries = 0; 6123 + 6124 + guard(mutex)(&scratch_mutex); 6125 + module_for_each_mod(save_mod, tr); 6126 + } 6009 6127 6010 6128 /* 6011 6129 * Need to clear all CPU buffers as there cannot be events ··· 6136 6016 6137 6017 /* Using current data now */ 6138 6018 tr->text_delta = 0; 6139 - tr->data_delta = 0; 6019 + 6020 + if (!tr->scratch) 6021 + return; 6022 + 6023 + tscratch = tr->scratch; 6024 + module_delta = READ_ONCE(tr->module_delta); 6025 + WRITE_ONCE(tr->module_delta, NULL); 6026 + kfree_rcu(module_delta, rcu); 6027 + 6028 + /* Set the persistent ring buffer meta data to this address */ 6029 + tscratch->text_addr = (unsigned long)_text; 6140 6030 } 6141 6031 6142 6032 /** ··· 6955 6825 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 6956 6826 } 6957 6827 6958 - static ssize_t 6959 - tracing_last_boot_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) 6828 + #define LAST_BOOT_HEADER ((void *)1) 6829 + 6830 + static void *l_next(struct seq_file *m, void *v, loff_t *pos) 6960 6831 { 6961 - struct trace_array *tr = filp->private_data; 6962 - struct seq_buf seq; 6963 - char buf[64]; 6832 + struct trace_array *tr = m->private; 6833 + struct trace_scratch *tscratch = tr->scratch; 6834 + unsigned int index = *pos; 6964 6835 6965 - seq_buf_init(&seq, buf, 64); 6836 + (*pos)++; 6966 6837 6967 - seq_buf_printf(&seq, "text delta:\t%ld\n", tr->text_delta); 6968 - seq_buf_printf(&seq, "data delta:\t%ld\n", tr->data_delta); 6838 + if (*pos == 1) 6839 + return LAST_BOOT_HEADER; 6969 6840 6970 - return simple_read_from_buffer(ubuf, cnt, ppos, buf, seq_buf_used(&seq)); 6841 + /* Only show offsets of the last boot data */ 6842 + if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) 6843 + return NULL; 6844 + 6845 + /* *pos 0 is for the header, 1 is for the first module */ 6846 + index--; 6847 + 6848 + if (index >= tscratch->nr_entries) 6849 + return NULL; 6850 + 6851 + return &tscratch->entries[index]; 6852 + } 6853 + 6854 + static void *l_start(struct seq_file *m, loff_t *pos) 6855 + { 6856 + mutex_lock(&scratch_mutex); 6857 + 6858 + return l_next(m, NULL, pos); 6859 + } 6860 + 6861 + static void l_stop(struct seq_file *m, void *p) 6862 + { 6863 + mutex_unlock(&scratch_mutex); 6864 + } 6865 + 6866 + static void show_last_boot_header(struct seq_file *m, struct trace_array *tr) 6867 + { 6868 + struct trace_scratch *tscratch = tr->scratch; 6869 + 6870 + /* 6871 + * Do not leak KASLR address. This only shows the KASLR address of 6872 + * the last boot. When the ring buffer is started, the LAST_BOOT 6873 + * flag gets cleared, and this should only report "current". 6874 + * Otherwise it shows the KASLR address from the previous boot which 6875 + * should not be the same as the current boot. 6876 + */ 6877 + if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) 6878 + seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr); 6879 + else 6880 + seq_puts(m, "# Current\n"); 6881 + } 6882 + 6883 + static int l_show(struct seq_file *m, void *v) 6884 + { 6885 + struct trace_array *tr = m->private; 6886 + struct trace_mod_entry *entry = v; 6887 + 6888 + if (v == LAST_BOOT_HEADER) { 6889 + show_last_boot_header(m, tr); 6890 + return 0; 6891 + } 6892 + 6893 + seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name); 6894 + return 0; 6895 + } 6896 + 6897 + static const struct seq_operations last_boot_seq_ops = { 6898 + .start = l_start, 6899 + .next = l_next, 6900 + .stop = l_stop, 6901 + .show = l_show, 6902 + }; 6903 + 6904 + static int tracing_last_boot_open(struct inode *inode, struct file *file) 6905 + { 6906 + struct trace_array *tr = inode->i_private; 6907 + struct seq_file *m; 6908 + int ret; 6909 + 6910 + ret = tracing_check_open_get_tr(tr); 6911 + if (ret) 6912 + return ret; 6913 + 6914 + ret = seq_open(file, &last_boot_seq_ops); 6915 + if (ret) { 6916 + trace_array_put(tr); 6917 + return ret; 6918 + } 6919 + 6920 + m = file->private_data; 6921 + m->private = tr; 6922 + 6923 + return 0; 6971 6924 } 6972 6925 6973 6926 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp) ··· 7679 7466 }; 7680 7467 7681 7468 static const struct file_operations last_boot_fops = { 7682 - .open = tracing_open_generic_tr, 7683 - .read = tracing_last_boot_read, 7684 - .llseek = generic_file_llseek, 7685 - .release = tracing_release_generic_tr, 7469 + .open = tracing_last_boot_open, 7470 + .read = seq_read, 7471 + .llseek = seq_lseek, 7472 + .release = tracing_seq_release, 7686 7473 }; 7687 7474 7688 7475 #ifdef CONFIG_TRACER_SNAPSHOT ··· 9422 9209 static void 9423 9210 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer); 9424 9211 9212 + #ifdef CONFIG_MODULES 9213 + static int make_mod_delta(struct module *mod, void *data) 9214 + { 9215 + struct trace_module_delta *module_delta; 9216 + struct trace_scratch *tscratch; 9217 + struct trace_mod_entry *entry; 9218 + struct trace_array *tr = data; 9219 + int i; 9220 + 9221 + tscratch = tr->scratch; 9222 + module_delta = READ_ONCE(tr->module_delta); 9223 + for (i = 0; i < tscratch->nr_entries; i++) { 9224 + entry = &tscratch->entries[i]; 9225 + if (strcmp(mod->name, entry->mod_name)) 9226 + continue; 9227 + if (mod->state == MODULE_STATE_GOING) 9228 + module_delta->delta[i] = 0; 9229 + else 9230 + module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base 9231 + - entry->mod_addr; 9232 + break; 9233 + } 9234 + return 0; 9235 + } 9236 + #else 9237 + static int make_mod_delta(struct module *mod, void *data) 9238 + { 9239 + return 0; 9240 + } 9241 + #endif 9242 + 9243 + static int mod_addr_comp(const void *a, const void *b, const void *data) 9244 + { 9245 + const struct trace_mod_entry *e1 = a; 9246 + const struct trace_mod_entry *e2 = b; 9247 + 9248 + return e1->mod_addr > e2->mod_addr ? 1 : -1; 9249 + } 9250 + 9251 + static void setup_trace_scratch(struct trace_array *tr, 9252 + struct trace_scratch *tscratch, unsigned int size) 9253 + { 9254 + struct trace_module_delta *module_delta; 9255 + struct trace_mod_entry *entry; 9256 + int i, nr_entries; 9257 + 9258 + if (!tscratch) 9259 + return; 9260 + 9261 + tr->scratch = tscratch; 9262 + tr->scratch_size = size; 9263 + 9264 + if (tscratch->text_addr) 9265 + tr->text_delta = (unsigned long)_text - tscratch->text_addr; 9266 + 9267 + if (struct_size(tscratch, entries, tscratch->nr_entries) > size) 9268 + goto reset; 9269 + 9270 + /* Check if each module name is a valid string */ 9271 + for (i = 0; i < tscratch->nr_entries; i++) { 9272 + int n; 9273 + 9274 + entry = &tscratch->entries[i]; 9275 + 9276 + for (n = 0; n < MODULE_NAME_LEN; n++) { 9277 + if (entry->mod_name[n] == '\0') 9278 + break; 9279 + if (!isprint(entry->mod_name[n])) 9280 + goto reset; 9281 + } 9282 + if (n == MODULE_NAME_LEN) 9283 + goto reset; 9284 + } 9285 + 9286 + /* Sort the entries so that we can find appropriate module from address. */ 9287 + nr_entries = tscratch->nr_entries; 9288 + sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry), 9289 + mod_addr_comp, NULL, NULL); 9290 + 9291 + if (IS_ENABLED(CONFIG_MODULES)) { 9292 + module_delta = kzalloc(struct_size(module_delta, delta, nr_entries), GFP_KERNEL); 9293 + if (!module_delta) { 9294 + pr_info("module_delta allocation failed. Not able to decode module address."); 9295 + goto reset; 9296 + } 9297 + init_rcu_head(&module_delta->rcu); 9298 + } else 9299 + module_delta = NULL; 9300 + WRITE_ONCE(tr->module_delta, module_delta); 9301 + 9302 + /* Scan modules to make text delta for modules. */ 9303 + module_for_each_mod(make_mod_delta, tr); 9304 + return; 9305 + reset: 9306 + /* Invalid trace modules */ 9307 + memset(tscratch, 0, size); 9308 + } 9309 + 9425 9310 static int 9426 9311 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size) 9427 9312 { 9428 9313 enum ring_buffer_flags rb_flags; 9314 + struct trace_scratch *tscratch; 9315 + unsigned int scratch_size = 0; 9429 9316 9430 9317 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0; 9431 9318 9432 9319 buf->tr = tr; 9433 9320 9434 9321 if (tr->range_addr_start && tr->range_addr_size) { 9322 + /* Add scratch buffer to handle 128 modules */ 9435 9323 buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0, 9436 9324 tr->range_addr_start, 9437 - tr->range_addr_size); 9325 + tr->range_addr_size, 9326 + struct_size(tscratch, entries, 128)); 9438 9327 9439 - ring_buffer_last_boot_delta(buf->buffer, 9440 - &tr->text_delta, &tr->data_delta); 9328 + tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size); 9329 + setup_trace_scratch(tr, tscratch, scratch_size); 9330 + 9441 9331 /* 9442 9332 * This is basically the same as a mapped buffer, 9443 9333 * with the same restrictions. ··· 9613 9297 #ifdef CONFIG_TRACER_MAX_TRACE 9614 9298 free_trace_buffer(&tr->max_buffer); 9615 9299 #endif 9300 + 9301 + if (tr->range_addr_start) 9302 + vunmap((void *)tr->range_addr_start); 9616 9303 } 9617 9304 9618 9305 static void init_trace_flags_index(struct trace_array *tr) ··· 9777 9458 free_cpumask_var(tr->pipe_cpumask); 9778 9459 free_cpumask_var(tr->tracing_cpumask); 9779 9460 kfree_const(tr->system_names); 9461 + kfree(tr->range_name); 9780 9462 kfree(tr->name); 9781 9463 kfree(tr); 9782 9464 ··· 9903 9583 free_percpu(tr->last_func_repeats); 9904 9584 free_trace_buffers(tr); 9905 9585 clear_tracing_err_log(tr); 9586 + 9587 + if (tr->range_name) { 9588 + reserve_mem_release_by_name(tr->range_name); 9589 + kfree(tr->range_name); 9590 + } 9906 9591 9907 9592 for (i = 0; i < tr->nr_topts; i++) { 9908 9593 kfree(tr->topts[i].topts); ··· 10230 9905 static inline void trace_module_remove_evals(struct module *mod) { } 10231 9906 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */ 10232 9907 9908 + static void trace_module_record(struct module *mod, bool add) 9909 + { 9910 + struct trace_array *tr; 9911 + unsigned long flags; 9912 + 9913 + list_for_each_entry(tr, &ftrace_trace_arrays, list) { 9914 + flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT); 9915 + /* Update any persistent trace array that has already been started */ 9916 + if (flags == TRACE_ARRAY_FL_BOOT && add) { 9917 + guard(mutex)(&scratch_mutex); 9918 + save_mod(mod, tr); 9919 + } else if (flags & TRACE_ARRAY_FL_LAST_BOOT) { 9920 + /* Update delta if the module loaded in previous boot */ 9921 + make_mod_delta(mod, tr); 9922 + } 9923 + } 9924 + } 9925 + 10233 9926 static int trace_module_notify(struct notifier_block *self, 10234 9927 unsigned long val, void *data) 10235 9928 { ··· 10256 9913 switch (val) { 10257 9914 case MODULE_STATE_COMING: 10258 9915 trace_module_add_evals(mod); 9916 + trace_module_record(mod, true); 10259 9917 break; 10260 9918 case MODULE_STATE_GOING: 10261 9919 trace_module_remove_evals(mod); 9920 + trace_module_record(mod, false); 10262 9921 break; 10263 9922 } 10264 9923 ··· 10726 10381 bool traceoff = false; 10727 10382 char *flag_delim; 10728 10383 char *addr_delim; 10384 + char *rname __free(kfree) = NULL; 10729 10385 10730 10386 tok = strsep(&curr_str, ","); 10731 10387 ··· 10783 10437 pr_warn("Failed to map boot instance %s to %s\n", name, tok); 10784 10438 continue; 10785 10439 } 10440 + rname = kstrdup(tok, GFP_KERNEL); 10786 10441 } 10787 10442 10788 10443 if (start) { ··· 10819 10472 * to it. 10820 10473 */ 10821 10474 if (start) { 10822 - tr->flags |= TRACE_ARRAY_FL_BOOT; 10823 - tr->ref++; 10475 + tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT; 10476 + tr->range_name = no_free_ptr(rname); 10824 10477 } 10825 10478 10826 10479 while ((tok = strsep(&curr_str, ","))) {
+19 -5
kernel/trace/trace.h
··· 313 313 u64 ts_last_call; 314 314 }; 315 315 316 + struct trace_module_delta { 317 + struct rcu_head rcu; 318 + long delta[]; 319 + }; 320 + 316 321 /* 317 322 * The trace array - an array of per-CPU trace arrays. This is the 318 323 * highest level data structure that individual tracers deal with. ··· 354 349 unsigned int mapped; 355 350 unsigned long range_addr_start; 356 351 unsigned long range_addr_size; 352 + char *range_name; 357 353 long text_delta; 358 - long data_delta; 354 + struct trace_module_delta *module_delta; 355 + void *scratch; /* pointer in persistent memory */ 356 + int scratch_size; 357 + 358 + int buffer_disabled; 359 359 360 360 struct trace_pid_list __rcu *filtered_pids; 361 361 struct trace_pid_list __rcu *filtered_no_pids; ··· 378 368 * CONFIG_TRACER_MAX_TRACE. 379 369 */ 380 370 arch_spinlock_t max_lock; 381 - int buffer_disabled; 382 371 #ifdef CONFIG_FTRACE_SYSCALLS 383 372 int sys_refcount_enter; 384 373 int sys_refcount_exit; ··· 443 434 }; 444 435 445 436 enum { 446 - TRACE_ARRAY_FL_GLOBAL = BIT(0), 447 - TRACE_ARRAY_FL_BOOT = BIT(1), 448 - TRACE_ARRAY_FL_MOD_INIT = BIT(2), 437 + TRACE_ARRAY_FL_GLOBAL = BIT(0), 438 + TRACE_ARRAY_FL_BOOT = BIT(1), 439 + TRACE_ARRAY_FL_LAST_BOOT = BIT(2), 440 + TRACE_ARRAY_FL_MOD_INIT = BIT(3), 449 441 }; 450 442 451 443 #ifdef CONFIG_MODULES ··· 472 462 extern int tracing_set_clock(struct trace_array *tr, const char *clockstr); 473 463 474 464 extern bool trace_clock_in_ns(struct trace_array *tr); 465 + 466 + extern unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr); 475 467 476 468 /* 477 469 * The global tracer (top) should be the first trace array added, ··· 796 784 extern void trace_find_cmdline(int pid, char comm[]); 797 785 extern int trace_find_tgid(int pid); 798 786 extern void trace_event_follow_fork(struct trace_array *tr, bool enable); 787 + 788 + extern int trace_events_enabled(struct trace_array *tr, const char *system); 799 789 800 790 #ifdef CONFIG_DYNAMIC_FTRACE 801 791 extern unsigned long ftrace_update_tot_cnt;
+28 -12
kernel/trace/trace_events.c
··· 1820 1820 return cnt; 1821 1821 } 1822 1822 1823 - static ssize_t 1824 - system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, 1825 - loff_t *ppos) 1823 + /* 1824 + * Returns: 1825 + * 0 : no events exist? 1826 + * 1 : all events are disabled 1827 + * 2 : all events are enabled 1828 + * 3 : some events are enabled and some are enabled 1829 + */ 1830 + int trace_events_enabled(struct trace_array *tr, const char *system) 1826 1831 { 1827 - const char set_to_char[4] = { '?', '0', '1', 'X' }; 1828 - struct trace_subsystem_dir *dir = filp->private_data; 1829 - struct event_subsystem *system = dir->subsystem; 1830 1832 struct trace_event_call *call; 1831 1833 struct trace_event_file *file; 1832 - struct trace_array *tr = dir->tr; 1833 - char buf[2]; 1834 1834 int set = 0; 1835 - int ret; 1836 1835 1837 - mutex_lock(&event_mutex); 1836 + guard(mutex)(&event_mutex); 1837 + 1838 1838 list_for_each_entry(file, &tr->events, list) { 1839 1839 call = file->event_call; 1840 1840 if ((call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) || 1841 1841 !trace_event_name(call) || !call->class || !call->class->reg) 1842 1842 continue; 1843 1843 1844 - if (system && strcmp(call->class->system, system->name) != 0) 1844 + if (system && strcmp(call->class->system, system) != 0) 1845 1845 continue; 1846 1846 1847 1847 /* ··· 1857 1857 if (set == 3) 1858 1858 break; 1859 1859 } 1860 - mutex_unlock(&event_mutex); 1860 + 1861 + return set; 1862 + } 1863 + 1864 + static ssize_t 1865 + system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, 1866 + loff_t *ppos) 1867 + { 1868 + const char set_to_char[4] = { '?', '0', '1', 'X' }; 1869 + struct trace_subsystem_dir *dir = filp->private_data; 1870 + struct event_subsystem *system = dir->subsystem; 1871 + struct trace_array *tr = dir->tr; 1872 + char buf[2]; 1873 + int set; 1874 + int ret; 1875 + 1876 + set = trace_events_enabled(tr, system ? system->name : NULL); 1861 1877 1862 1878 buf[0] = set_to_char[set]; 1863 1879 buf[1] = '\n';
+2 -2
kernel/trace/trace_output.c
··· 5 5 * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> 6 6 * 7 7 */ 8 + #include "trace.h" 8 9 #include <linux/module.h> 9 10 #include <linux/mutex.h> 10 11 #include <linux/ftrace.h> ··· 1341 1340 struct trace_seq *s = &iter->seq; 1342 1341 unsigned long *p; 1343 1342 unsigned long *end; 1344 - long delta = iter->tr->text_delta; 1345 1343 1346 1344 trace_assign_type(field, iter->ent); 1347 1345 end = (unsigned long *)((long)iter->ent + iter->ent_size); ··· 1357 1357 trace_seq_puts(s, "[FTRACE TRAMPOLINE]\n"); 1358 1358 continue; 1359 1359 } 1360 - seq_print_ip_sym(s, (*p) + delta, flags); 1360 + seq_print_ip_sym(s, trace_adjust_address(iter->tr, *p), flags); 1361 1361 trace_seq_putc(s, '\n'); 1362 1362 } 1363 1363
+54 -12
mm/memblock.c
··· 16 16 #include <linux/kmemleak.h> 17 17 #include <linux/seq_file.h> 18 18 #include <linux/memblock.h> 19 + #include <linux/mutex.h> 19 20 20 21 #include <asm/sections.h> 21 22 #include <linux/io.h> ··· 2284 2283 }; 2285 2284 static struct reserve_mem_table reserved_mem_table[RESERVE_MEM_MAX_ENTRIES]; 2286 2285 static int reserved_mem_count; 2286 + static DEFINE_MUTEX(reserve_mem_lock); 2287 2287 2288 2288 /* Add wildcard region with a lookup name */ 2289 2289 static void __init reserved_mem_add(phys_addr_t start, phys_addr_t size, ··· 2296 2294 map->start = start; 2297 2295 map->size = size; 2298 2296 strscpy(map->name, name); 2297 + } 2298 + 2299 + static struct reserve_mem_table *reserve_mem_find_by_name_nolock(const char *name) 2300 + { 2301 + struct reserve_mem_table *map; 2302 + int i; 2303 + 2304 + for (i = 0; i < reserved_mem_count; i++) { 2305 + map = &reserved_mem_table[i]; 2306 + if (!map->size) 2307 + continue; 2308 + if (strcmp(name, map->name) == 0) 2309 + return map; 2310 + } 2311 + return NULL; 2299 2312 } 2300 2313 2301 2314 /** ··· 2326 2309 int reserve_mem_find_by_name(const char *name, phys_addr_t *start, phys_addr_t *size) 2327 2310 { 2328 2311 struct reserve_mem_table *map; 2329 - int i; 2330 2312 2331 - for (i = 0; i < reserved_mem_count; i++) { 2332 - map = &reserved_mem_table[i]; 2333 - if (!map->size) 2334 - continue; 2335 - if (strcmp(name, map->name) == 0) { 2336 - *start = map->start; 2337 - *size = map->size; 2338 - return 1; 2339 - } 2340 - } 2341 - return 0; 2313 + guard(mutex)(&reserve_mem_lock); 2314 + map = reserve_mem_find_by_name_nolock(name); 2315 + if (!map) 2316 + return 0; 2317 + 2318 + *start = map->start; 2319 + *size = map->size; 2320 + return 1; 2342 2321 } 2343 2322 EXPORT_SYMBOL_GPL(reserve_mem_find_by_name); 2323 + 2324 + /** 2325 + * reserve_mem_release_by_name - Release reserved memory region with a given name 2326 + * @name: The name that is attatched to a reserved memory region 2327 + * 2328 + * Forcibly release the pages in the reserved memory region so that those memory 2329 + * can be used as free memory. After released the reserved region size becomes 0. 2330 + * 2331 + * Returns: 1 if released or 0 if not found. 2332 + */ 2333 + int reserve_mem_release_by_name(const char *name) 2334 + { 2335 + char buf[RESERVE_MEM_NAME_SIZE + 12]; 2336 + struct reserve_mem_table *map; 2337 + void *start, *end; 2338 + 2339 + guard(mutex)(&reserve_mem_lock); 2340 + map = reserve_mem_find_by_name_nolock(name); 2341 + if (!map) 2342 + return 0; 2343 + 2344 + start = phys_to_virt(map->start); 2345 + end = start + map->size - 1; 2346 + snprintf(buf, sizeof(buf), "reserve_mem:%s", name); 2347 + free_reserved_area(start, end, 0, buf); 2348 + map->size = 0; 2349 + 2350 + return 1; 2351 + } 2344 2352 2345 2353 /* 2346 2354 * Parse reserve_mem=nn:align:name