Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

tracing/user_events: Add ioctl for disabling addresses

Enablements are now tracked by the lifetime of the task/mm. User
processes need to be able to disable their addresses if tracing is
requested to be turned off. Before unmapping the page would suffice.
However, we now need a stronger contract. Add an ioctl to enable this.

A new flag bit is added, freeing, to user_event_enabler to ensure that
if the event is attempted to be removed while a fault is being handled
that the remove is delayed until after the fault is reattempted.

Link: https://lkml.kernel.org/r/20230328235219.203-6-beaub@linux.microsoft.com

Signed-off-by: Beau Belgrave <beaub@linux.microsoft.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>

authored by

Beau Belgrave and committed by
Steven Rostedt (Google)
dcb8177c 81f8fb65

+119 -2
+24
include/uapi/linux/user_events.h
··· 46 46 __u32 write_index; 47 47 } __attribute__((__packed__)); 48 48 49 + /* 50 + * Describes an event unregister, callers must set the size, address and bit. 51 + * This structure is passed to the DIAG_IOCSUNREG ioctl to disable bit updates. 52 + */ 53 + struct user_unreg { 54 + /* Input: Size of the user_unreg structure being used */ 55 + __u32 size; 56 + 57 + /* Input: Bit to unregister */ 58 + __u8 disable_bit; 59 + 60 + /* Input: Reserved, set to 0 */ 61 + __u8 __reserved; 62 + 63 + /* Input: Reserved, set to 0 */ 64 + __u16 __reserved2; 65 + 66 + /* Input: Address to unregister */ 67 + __u64 disable_addr; 68 + } __attribute__((__packed__)); 69 + 49 70 #define DIAG_IOC_MAGIC '*' 50 71 51 72 /* Request to register a user_event */ ··· 74 53 75 54 /* Request to delete a user_event */ 76 55 #define DIAG_IOCSDEL _IOW(DIAG_IOC_MAGIC, 1, char *) 56 + 57 + /* Requests to unregister a user_event */ 58 + #define DIAG_IOCSUNREG _IOW(DIAG_IOC_MAGIC, 2, struct user_unreg*) 77 59 78 60 #endif /* _UAPI_LINUX_USER_EVENTS_H */
+95 -2
kernel/trace/trace_events_user.c
··· 102 102 /* Bit 6 is for faulting status of enablement */ 103 103 #define ENABLE_VAL_FAULTING_BIT 6 104 104 105 + /* Bit 7 is for freeing status of enablement */ 106 + #define ENABLE_VAL_FREEING_BIT 7 107 + 105 108 /* Only duplicate the bit value */ 106 109 #define ENABLE_VAL_DUP_MASK ENABLE_VAL_BIT_MASK 107 110 ··· 304 301 /* Prevent state changes from racing */ 305 302 mutex_lock(&event_mutex); 306 303 304 + /* User asked for enabler to be removed during fault */ 305 + if (test_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(enabler))) { 306 + user_event_enabler_destroy(enabler); 307 + goto out; 308 + } 309 + 307 310 /* 308 311 * If we managed to get the page, re-issue the write. We do not 309 312 * want to get into a possible infinite loop, which is why we only ··· 324 315 user_event_enabler_write(mm, enabler, true); 325 316 mmap_read_unlock(mm->mm); 326 317 } 327 - 318 + out: 328 319 mutex_unlock(&event_mutex); 329 320 330 321 /* In all cases we no longer need the mm or fault */ ··· 379 370 if (refcount_read(&mm->tasks) == 0) 380 371 return -ENOENT; 381 372 382 - if (unlikely(test_bit(ENABLE_VAL_FAULTING_BIT, ENABLE_BITOPS(enabler)))) 373 + if (unlikely(test_bit(ENABLE_VAL_FAULTING_BIT, ENABLE_BITOPS(enabler)) || 374 + test_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(enabler)))) 383 375 return -EBUSY; 384 376 385 377 ret = pin_user_pages_remote(mm->mm, uaddr, 1, FOLL_WRITE | FOLL_NOFAULT, ··· 437 427 struct user_event_mm *mm) 438 428 { 439 429 struct user_event_enabler *enabler; 430 + 431 + /* Skip pending frees */ 432 + if (unlikely(test_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(orig)))) 433 + return true; 440 434 441 435 enabler = kzalloc(sizeof(*enabler), GFP_NOWAIT); 442 436 ··· 2100 2086 return ret; 2101 2087 } 2102 2088 2089 + static long user_unreg_get(struct user_unreg __user *ureg, 2090 + struct user_unreg *kreg) 2091 + { 2092 + u32 size; 2093 + long ret; 2094 + 2095 + ret = get_user(size, &ureg->size); 2096 + 2097 + if (ret) 2098 + return ret; 2099 + 2100 + if (size > PAGE_SIZE) 2101 + return -E2BIG; 2102 + 2103 + if (size < offsetofend(struct user_unreg, disable_addr)) 2104 + return -EINVAL; 2105 + 2106 + ret = copy_struct_from_user(kreg, sizeof(*kreg), ureg, size); 2107 + 2108 + /* Ensure no reserved values, since we don't support any yet */ 2109 + if (kreg->__reserved || kreg->__reserved2) 2110 + return -EINVAL; 2111 + 2112 + return ret; 2113 + } 2114 + 2115 + /* 2116 + * Unregisters an enablement address/bit within a task/user mm. 2117 + */ 2118 + static long user_events_ioctl_unreg(unsigned long uarg) 2119 + { 2120 + struct user_unreg __user *ureg = (struct user_unreg __user *)uarg; 2121 + struct user_event_mm *mm = current->user_event_mm; 2122 + struct user_event_enabler *enabler, *next; 2123 + struct user_unreg reg; 2124 + long ret; 2125 + 2126 + ret = user_unreg_get(ureg, &reg); 2127 + 2128 + if (ret) 2129 + return ret; 2130 + 2131 + if (!mm) 2132 + return -ENOENT; 2133 + 2134 + ret = -ENOENT; 2135 + 2136 + /* 2137 + * Flags freeing and faulting are used to indicate if the enabler is in 2138 + * use at all. When faulting is set a page-fault is occurring asyncly. 2139 + * During async fault if freeing is set, the enabler will be destroyed. 2140 + * If no async fault is happening, we can destroy it now since we hold 2141 + * the event_mutex during these checks. 2142 + */ 2143 + mutex_lock(&event_mutex); 2144 + 2145 + list_for_each_entry_safe(enabler, next, &mm->enablers, link) 2146 + if (enabler->addr == reg.disable_addr && 2147 + (enabler->values & ENABLE_VAL_BIT_MASK) == reg.disable_bit) { 2148 + set_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(enabler)); 2149 + 2150 + if (!test_bit(ENABLE_VAL_FAULTING_BIT, ENABLE_BITOPS(enabler))) 2151 + user_event_enabler_destroy(enabler); 2152 + 2153 + /* Removed at least one */ 2154 + ret = 0; 2155 + } 2156 + 2157 + mutex_unlock(&event_mutex); 2158 + 2159 + return ret; 2160 + } 2161 + 2103 2162 /* 2104 2163 * Handles the ioctl from user mode to register or alter operations. 2105 2164 */ ··· 2193 2106 case DIAG_IOCSDEL: 2194 2107 mutex_lock(&group->reg_mutex); 2195 2108 ret = user_events_ioctl_del(info, uarg); 2109 + mutex_unlock(&group->reg_mutex); 2110 + break; 2111 + 2112 + case DIAG_IOCSUNREG: 2113 + mutex_lock(&group->reg_mutex); 2114 + ret = user_events_ioctl_unreg(uarg); 2196 2115 mutex_unlock(&group->reg_mutex); 2197 2116 break; 2198 2117 }