Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'perf-fixes-27504' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git

Pull perf fixes from Thomas Gleixner:
"Perf fixes for perf_mmap() reference counting to prevent potential
reference count leaks which are caused by:

- VMA splits, which change the offset or size of a mapping, which
causes perf_mmap_close() to ignore the unmap or unmap the wrong
buffer.

- Several internal issues of perf_mmap(), which can cause reference
count leaks in the perf mmap, corrupt accounting or cause leaks in
perf drivers.

The main fix is to prevent VMA splits by implementing the
[may_]split() callback for vm operations.

The other issues are addressed by rearranging code, early returns on
failure and invocation of cleanups.

Also provide a selftest to validate the fixes.

The reference counting should be converted to refcount_t, but that
requires larger refactoring of the code and will be done once these
fixes are upstream"

* tag 'perf-fixes-27504' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git:
selftests/perf_events: Add a mmap() correctness test
perf/core: Prevent VMA split of buffer mappings
perf/core: Handle buffer mapping fail correctly in perf_mmap()
perf/core: Exit early on perf_mmap() fail
perf/core: Don't leak AUX buffer refcount on allocation failure
perf/core: Preserve AUX buffer allocation failure result

+266 -9
+28 -8
kernel/events/core.c
··· 6842 6842 return vmf->pgoff == 0 ? 0 : VM_FAULT_SIGBUS; 6843 6843 } 6844 6844 6845 + static int perf_mmap_may_split(struct vm_area_struct *vma, unsigned long addr) 6846 + { 6847 + /* 6848 + * Forbid splitting perf mappings to prevent refcount leaks due to 6849 + * the resulting non-matching offsets and sizes. See open()/close(). 6850 + */ 6851 + return -EINVAL; 6852 + } 6853 + 6845 6854 static const struct vm_operations_struct perf_mmap_vmops = { 6846 6855 .open = perf_mmap_open, 6847 6856 .close = perf_mmap_close, /* non mergeable */ 6848 6857 .pfn_mkwrite = perf_mmap_pfn_mkwrite, 6858 + .may_split = perf_mmap_may_split, 6849 6859 }; 6850 6860 6851 6861 static int map_range(struct perf_buffer *rb, struct vm_area_struct *vma) ··· 7061 7051 ret = 0; 7062 7052 goto unlock; 7063 7053 } 7064 - 7065 - atomic_set(&rb->aux_mmap_count, 1); 7066 7054 } 7067 7055 7068 7056 user_lock_limit = sysctl_perf_event_mlock >> (PAGE_SHIFT - 10); ··· 7123 7115 perf_event_update_time(event); 7124 7116 perf_event_init_userpage(event); 7125 7117 perf_event_update_userpage(event); 7118 + ret = 0; 7126 7119 } else { 7127 7120 ret = rb_alloc_aux(rb, event, vma->vm_pgoff, nr_pages, 7128 7121 event->attr.aux_watermark, flags); 7129 - if (!ret) 7122 + if (!ret) { 7123 + atomic_set(&rb->aux_mmap_count, 1); 7130 7124 rb->aux_mmap_locked = extra; 7125 + } 7131 7126 } 7132 - 7133 - ret = 0; 7134 7127 7135 7128 unlock: 7136 7129 if (!ret) { ··· 7140 7131 7141 7132 atomic_inc(&event->mmap_count); 7142 7133 } else if (rb) { 7134 + /* AUX allocation failed */ 7143 7135 atomic_dec(&rb->mmap_count); 7144 7136 } 7145 7137 aux_unlock: 7146 7138 if (aux_mutex) 7147 7139 mutex_unlock(aux_mutex); 7148 7140 mutex_unlock(&event->mmap_mutex); 7141 + 7142 + if (ret) 7143 + return ret; 7149 7144 7150 7145 /* 7151 7146 * Since pinned accounting is per vm we cannot allow fork() to copy our ··· 7158 7145 vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP); 7159 7146 vma->vm_ops = &perf_mmap_vmops; 7160 7147 7161 - if (!ret) 7162 - ret = map_range(rb, vma); 7163 - 7164 7148 mapped = get_mapped(event, event_mapped); 7165 7149 if (mapped) 7166 7150 mapped(event, vma->vm_mm); 7151 + 7152 + /* 7153 + * Try to map it into the page table. On fail, invoke 7154 + * perf_mmap_close() to undo the above, as the callsite expects 7155 + * full cleanup in this case and therefore does not invoke 7156 + * vmops::close(). 7157 + */ 7158 + ret = map_range(rb, vma); 7159 + if (ret) 7160 + perf_mmap_close(vma); 7167 7161 7168 7162 return ret; 7169 7163 }
+1
tools/testing/selftests/perf_events/.gitignore
··· 2 2 sigtrap_threads 3 3 remove_on_exec 4 4 watermark_signal 5 + mmap
+1 -1
tools/testing/selftests/perf_events/Makefile
··· 2 2 CFLAGS += -Wl,-no-as-needed -Wall $(KHDR_INCLUDES) 3 3 LDFLAGS += -lpthread 4 4 5 - TEST_GEN_PROGS := sigtrap_threads remove_on_exec watermark_signal 5 + TEST_GEN_PROGS := sigtrap_threads remove_on_exec watermark_signal mmap 6 6 include ../lib.mk
+236
tools/testing/selftests/perf_events/mmap.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + #define _GNU_SOURCE 3 + 4 + #include <dirent.h> 5 + #include <sched.h> 6 + #include <stdbool.h> 7 + #include <stdio.h> 8 + #include <unistd.h> 9 + 10 + #include <sys/ioctl.h> 11 + #include <sys/mman.h> 12 + #include <sys/syscall.h> 13 + #include <sys/types.h> 14 + 15 + #include <linux/perf_event.h> 16 + 17 + #include "../kselftest_harness.h" 18 + 19 + #define RB_SIZE 0x3000 20 + #define AUX_SIZE 0x10000 21 + #define AUX_OFFS 0x4000 22 + 23 + #define HOLE_SIZE 0x1000 24 + 25 + /* Reserve space for rb, aux with space for shrink-beyond-vma testing. */ 26 + #define REGION_SIZE (2 * RB_SIZE + 2 * AUX_SIZE) 27 + #define REGION_AUX_OFFS (2 * RB_SIZE) 28 + 29 + #define MAP_BASE 1 30 + #define MAP_AUX 2 31 + 32 + #define EVENT_SRC_DIR "/sys/bus/event_source/devices" 33 + 34 + FIXTURE(perf_mmap) 35 + { 36 + int fd; 37 + void *ptr; 38 + void *region; 39 + }; 40 + 41 + FIXTURE_VARIANT(perf_mmap) 42 + { 43 + bool aux; 44 + unsigned long ptr_size; 45 + }; 46 + 47 + FIXTURE_VARIANT_ADD(perf_mmap, rb) 48 + { 49 + .aux = false, 50 + .ptr_size = RB_SIZE, 51 + }; 52 + 53 + FIXTURE_VARIANT_ADD(perf_mmap, aux) 54 + { 55 + .aux = true, 56 + .ptr_size = AUX_SIZE, 57 + }; 58 + 59 + static bool read_event_type(struct dirent *dent, __u32 *type) 60 + { 61 + char typefn[512]; 62 + FILE *fp; 63 + int res; 64 + 65 + snprintf(typefn, sizeof(typefn), "%s/%s/type", EVENT_SRC_DIR, dent->d_name); 66 + fp = fopen(typefn, "r"); 67 + if (!fp) 68 + return false; 69 + 70 + res = fscanf(fp, "%u", type); 71 + fclose(fp); 72 + return res > 0; 73 + } 74 + 75 + FIXTURE_SETUP(perf_mmap) 76 + { 77 + struct perf_event_attr attr = { 78 + .size = sizeof(attr), 79 + .disabled = 1, 80 + .exclude_kernel = 1, 81 + .exclude_hv = 1, 82 + }; 83 + struct perf_event_attr attr_ok = {}; 84 + unsigned int eacces = 0, map = 0; 85 + struct perf_event_mmap_page *rb; 86 + struct dirent *dent; 87 + void *aux, *region; 88 + DIR *dir; 89 + 90 + self->ptr = NULL; 91 + 92 + dir = opendir(EVENT_SRC_DIR); 93 + if (!dir) 94 + SKIP(return, "perf not available."); 95 + 96 + region = mmap(NULL, REGION_SIZE, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); 97 + ASSERT_NE(region, MAP_FAILED); 98 + self->region = region; 99 + 100 + // Try to find a suitable event on this system 101 + while ((dent = readdir(dir))) { 102 + int fd; 103 + 104 + if (!read_event_type(dent, &attr.type)) 105 + continue; 106 + 107 + fd = syscall(SYS_perf_event_open, &attr, 0, -1, -1, 0); 108 + if (fd < 0) { 109 + if (errno == EACCES) 110 + eacces++; 111 + continue; 112 + } 113 + 114 + // Check whether the event supports mmap() 115 + rb = mmap(region, RB_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fd, 0); 116 + if (rb == MAP_FAILED) { 117 + close(fd); 118 + continue; 119 + } 120 + 121 + if (!map) { 122 + // Save the event in case that no AUX capable event is found 123 + attr_ok = attr; 124 + map = MAP_BASE; 125 + } 126 + 127 + if (!variant->aux) 128 + continue; 129 + 130 + rb->aux_offset = AUX_OFFS; 131 + rb->aux_size = AUX_SIZE; 132 + 133 + // Check whether it supports a AUX buffer 134 + aux = mmap(region + REGION_AUX_OFFS, AUX_SIZE, PROT_READ | PROT_WRITE, 135 + MAP_SHARED | MAP_FIXED, fd, AUX_OFFS); 136 + if (aux == MAP_FAILED) { 137 + munmap(rb, RB_SIZE); 138 + close(fd); 139 + continue; 140 + } 141 + 142 + attr_ok = attr; 143 + map = MAP_AUX; 144 + munmap(aux, AUX_SIZE); 145 + munmap(rb, RB_SIZE); 146 + close(fd); 147 + break; 148 + } 149 + closedir(dir); 150 + 151 + if (!map) { 152 + if (!eacces) 153 + SKIP(return, "No mappable perf event found."); 154 + else 155 + SKIP(return, "No permissions for perf_event_open()"); 156 + } 157 + 158 + self->fd = syscall(SYS_perf_event_open, &attr_ok, 0, -1, -1, 0); 159 + ASSERT_NE(self->fd, -1); 160 + 161 + rb = mmap(region, RB_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, self->fd, 0); 162 + ASSERT_NE(rb, MAP_FAILED); 163 + 164 + if (!variant->aux) { 165 + self->ptr = rb; 166 + return; 167 + } 168 + 169 + if (map != MAP_AUX) 170 + SKIP(return, "No AUX event found."); 171 + 172 + rb->aux_offset = AUX_OFFS; 173 + rb->aux_size = AUX_SIZE; 174 + aux = mmap(region + REGION_AUX_OFFS, AUX_SIZE, PROT_READ | PROT_WRITE, 175 + MAP_SHARED | MAP_FIXED, self->fd, AUX_OFFS); 176 + ASSERT_NE(aux, MAP_FAILED); 177 + self->ptr = aux; 178 + } 179 + 180 + FIXTURE_TEARDOWN(perf_mmap) 181 + { 182 + ASSERT_EQ(munmap(self->region, REGION_SIZE), 0); 183 + if (self->fd != -1) 184 + ASSERT_EQ(close(self->fd), 0); 185 + } 186 + 187 + TEST_F(perf_mmap, remap) 188 + { 189 + void *tmp, *ptr = self->ptr; 190 + unsigned long size = variant->ptr_size; 191 + 192 + // Test the invalid remaps 193 + ASSERT_EQ(mremap(ptr, size, HOLE_SIZE, MREMAP_MAYMOVE), MAP_FAILED); 194 + ASSERT_EQ(mremap(ptr + HOLE_SIZE, size, HOLE_SIZE, MREMAP_MAYMOVE), MAP_FAILED); 195 + ASSERT_EQ(mremap(ptr + size - HOLE_SIZE, HOLE_SIZE, size, MREMAP_MAYMOVE), MAP_FAILED); 196 + // Shrink the end of the mapping such that we only unmap past end of the VMA, 197 + // which should succeed and poke a hole into the PROT_NONE region 198 + ASSERT_NE(mremap(ptr + size - HOLE_SIZE, size, HOLE_SIZE, MREMAP_MAYMOVE), MAP_FAILED); 199 + 200 + // Remap the whole buffer to a new address 201 + tmp = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); 202 + ASSERT_NE(tmp, MAP_FAILED); 203 + 204 + // Try splitting offset 1 hole size into VMA, this should fail 205 + ASSERT_EQ(mremap(ptr + HOLE_SIZE, size - HOLE_SIZE, size - HOLE_SIZE, 206 + MREMAP_MAYMOVE | MREMAP_FIXED, tmp), MAP_FAILED); 207 + // Remapping the whole thing should succeed fine 208 + ptr = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, tmp); 209 + ASSERT_EQ(ptr, tmp); 210 + ASSERT_EQ(munmap(tmp, size), 0); 211 + } 212 + 213 + TEST_F(perf_mmap, unmap) 214 + { 215 + unsigned long size = variant->ptr_size; 216 + 217 + // Try to poke holes into the mappings 218 + ASSERT_NE(munmap(self->ptr, HOLE_SIZE), 0); 219 + ASSERT_NE(munmap(self->ptr + HOLE_SIZE, HOLE_SIZE), 0); 220 + ASSERT_NE(munmap(self->ptr + size - HOLE_SIZE, HOLE_SIZE), 0); 221 + } 222 + 223 + TEST_F(perf_mmap, map) 224 + { 225 + unsigned long size = variant->ptr_size; 226 + 227 + // Try to poke holes into the mappings by mapping anonymous memory over it 228 + ASSERT_EQ(mmap(self->ptr, HOLE_SIZE, PROT_READ | PROT_WRITE, 229 + MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0), MAP_FAILED); 230 + ASSERT_EQ(mmap(self->ptr + HOLE_SIZE, HOLE_SIZE, PROT_READ | PROT_WRITE, 231 + MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0), MAP_FAILED); 232 + ASSERT_EQ(mmap(self->ptr + size - HOLE_SIZE, HOLE_SIZE, PROT_READ | PROT_WRITE, 233 + MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0), MAP_FAILED); 234 + } 235 + 236 + TEST_HARNESS_MAIN