Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

tracing/user_events: Introduce multi-format events

Currently user_events supports 1 event with the same name and must have
the exact same format when referenced by multiple programs. This opens
an opportunity for malicious or poorly thought through programs to
create events that others use with different formats. Another scenario
is user programs wishing to use the same event name but add more fields
later when the software updates. Various versions of a program may be
running side-by-side, which is prevented by the current single format
requirement.

Add a new register flag (USER_EVENT_REG_MULTI_FORMAT) which indicates
the user program wishes to use the same user_event name, but may have
several different formats of the event. When this flag is used, create
the underlying tracepoint backing the user_event with a unique name
per-version of the format. It's important that existing ABI users do
not get this logic automatically, even if one of the multi format
events matches the format. This ensures existing programs that create
events and assume the tracepoint name will match exactly continue to
work as expected. Add logic to only check multi-format events with
other multi-format events and single-format events to only check
single-format events during find.

Change system name of the multi-format event tracepoint to ensure that
multi-format events are isolated completely from single-format events.
This prevents single-format names from conflicting with multi-format
events if they end with the same suffix as the multi-format events.

Add a register_name (reg_name) to the user_event struct which allows for
split naming of events. We now have the name that was used to register
within user_events as well as the unique name for the tracepoint. Upon
registering events ensure matches based on first the reg_name, followed
by the fields and format of the event. This allows for multiple events
with the same registered name to have different formats. The underlying
tracepoint will have a unique name in the format of {reg_name}.{unique_id}.

For example, if both "test u32 value" and "test u64 value" are used with
the USER_EVENT_REG_MULTI_FORMAT the system would have 2 unique
tracepoints. The dynamic_events file would then show the following:
u:test u64 count
u:test u32 count

The actual tracepoint names look like this:
test.0
test.1

Both would be under the new user_events_multi system name to prevent the
older ABI from being used to squat on multi-formatted events and block
their use.

Deleting events via "!u:test u64 count" would only delete the first
tracepoint that matched that format. When the delete ABI is used all
events with the same name will be attempted to be deleted. If
per-version deletion is required, user programs should either not use
persistent events or delete them via dynamic_events.

Link: https://lore.kernel.org/linux-trace-kernel/20240222001807.1463-3-beaub@linux.microsoft.com

Signed-off-by: Beau Belgrave <beaub@linux.microsoft.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>

authored by

Beau Belgrave and committed by
Steven Rostedt (Google)
64805e40 1e953de9

+95 -13
+5 -1
include/uapi/linux/user_events.h
··· 12 12 #include <linux/ioctl.h> 13 13 14 14 #define USER_EVENTS_SYSTEM "user_events" 15 + #define USER_EVENTS_MULTI_SYSTEM "user_events_multi" 15 16 #define USER_EVENTS_PREFIX "u:" 16 17 17 18 /* Create dynamic location entry within a 32-bit value */ ··· 23 22 /* Event will not delete upon last reference closing */ 24 23 USER_EVENT_REG_PERSIST = 1U << 0, 25 24 25 + /* Event will be allowed to have multiple formats */ 26 + USER_EVENT_REG_MULTI_FORMAT = 1U << 1, 27 + 26 28 /* This value or above is currently non-ABI */ 27 - USER_EVENT_REG_MAX = 1U << 1, 29 + USER_EVENT_REG_MAX = 1U << 2, 28 30 }; 29 31 30 32 /*
+90 -12
kernel/trace/trace_events_user.c
··· 34 34 35 35 /* Limit how long of an event name plus args within the subsystem. */ 36 36 #define MAX_EVENT_DESC 512 37 - #define EVENT_NAME(user_event) ((user_event)->tracepoint.name) 37 + #define EVENT_NAME(user_event) ((user_event)->reg_name) 38 + #define EVENT_TP_NAME(user_event) ((user_event)->tracepoint.name) 38 39 #define MAX_FIELD_ARRAY_SIZE 1024 39 40 40 41 /* ··· 55 54 * allows isolation for events by various means. 56 55 */ 57 56 struct user_event_group { 58 - char *system_name; 59 - struct hlist_node node; 60 - struct mutex reg_mutex; 57 + char *system_name; 58 + char *system_multi_name; 59 + struct hlist_node node; 60 + struct mutex reg_mutex; 61 61 DECLARE_HASHTABLE(register_table, 8); 62 + /* ID that moves forward within the group for multi-event names */ 63 + u64 multi_id; 62 64 }; 63 65 64 66 /* Group for init_user_ns mapping, top-most group */ ··· 82 78 */ 83 79 struct user_event { 84 80 struct user_event_group *group; 81 + char *reg_name; 85 82 struct tracepoint tracepoint; 86 83 struct trace_event_call call; 87 84 struct trace_event_class class; ··· 131 126 #define ENABLE_BITOPS(e) (&(e)->values) 132 127 133 128 #define ENABLE_BIT(e) ((int)((e)->values & ENABLE_VAL_BIT_MASK)) 129 + 130 + #define EVENT_MULTI_FORMAT(f) ((f) & USER_EVENT_REG_MULTI_FORMAT) 134 131 135 132 /* Used for asynchronous faulting in of pages */ 136 133 struct user_event_enabler_fault { ··· 337 330 static void user_event_group_destroy(struct user_event_group *group) 338 331 { 339 332 kfree(group->system_name); 333 + kfree(group->system_multi_name); 340 334 kfree(group); 341 335 } 342 336 ··· 354 346 snprintf(system_name, len, "%s", USER_EVENTS_SYSTEM); 355 347 356 348 return system_name; 349 + } 350 + 351 + static char *user_event_group_system_multi_name(void) 352 + { 353 + return kstrdup(USER_EVENTS_MULTI_SYSTEM, GFP_KERNEL); 357 354 } 358 355 359 356 static struct user_event_group *current_user_event_group(void) ··· 378 365 group->system_name = user_event_group_system_name(); 379 366 380 367 if (!group->system_name) 368 + goto error; 369 + 370 + group->system_multi_name = user_event_group_system_multi_name(); 371 + 372 + if (!group->system_multi_name) 381 373 goto error; 382 374 383 375 mutex_init(&group->reg_mutex); ··· 1500 1482 hash_del(&user->node); 1501 1483 1502 1484 user_event_destroy_validators(user); 1485 + 1486 + /* If we have different names, both must be freed */ 1487 + if (EVENT_NAME(user) != EVENT_TP_NAME(user)) 1488 + kfree(EVENT_TP_NAME(user)); 1489 + 1503 1490 kfree(user->call.print_fmt); 1504 1491 kfree(EVENT_NAME(user)); 1505 1492 kfree(user); ··· 1527 1504 *outkey = key; 1528 1505 1529 1506 hash_for_each_possible(group->register_table, user, node, key) { 1507 + /* 1508 + * Single-format events shouldn't return multi-format 1509 + * events. Callers expect the underlying tracepoint to match 1510 + * the name exactly in these cases. Only check like-formats. 1511 + */ 1512 + if (EVENT_MULTI_FORMAT(flags) != EVENT_MULTI_FORMAT(user->reg_flags)) 1513 + continue; 1514 + 1530 1515 if (strcmp(EVENT_NAME(user), name)) 1531 1516 continue; 1532 1517 1533 1518 if (user_fields_match(user, argc, argv)) 1534 1519 return user_event_get(user); 1520 + 1521 + /* Scan others if this is a multi-format event */ 1522 + if (EVENT_MULTI_FORMAT(flags)) 1523 + continue; 1535 1524 1536 1525 return ERR_PTR(-EADDRINUSE); 1537 1526 } ··· 1924 1889 struct user_event *user = container_of(ev, struct user_event, devent); 1925 1890 bool match; 1926 1891 1927 - match = strcmp(EVENT_NAME(user), event) == 0 && 1928 - (!system || strcmp(system, USER_EVENTS_SYSTEM) == 0); 1892 + match = strcmp(EVENT_NAME(user), event) == 0; 1893 + 1894 + if (match && system) { 1895 + match = strcmp(system, user->group->system_name) == 0 || 1896 + strcmp(system, user->group->system_multi_name) == 0; 1897 + } 1929 1898 1930 1899 if (match) 1931 1900 match = user_fields_match(user, argc, argv); ··· 1960 1921 unregister_trace_event(&user->call.event); 1961 1922 1962 1923 return ret; 1924 + } 1925 + 1926 + static int user_event_set_tp_name(struct user_event *user) 1927 + { 1928 + lockdep_assert_held(&user->group->reg_mutex); 1929 + 1930 + if (EVENT_MULTI_FORMAT(user->reg_flags)) { 1931 + char *multi_name; 1932 + 1933 + multi_name = kasprintf(GFP_KERNEL_ACCOUNT, "%s.%llx", 1934 + user->reg_name, user->group->multi_id); 1935 + 1936 + if (!multi_name) 1937 + return -ENOMEM; 1938 + 1939 + user->call.name = multi_name; 1940 + user->tracepoint.name = multi_name; 1941 + 1942 + /* Inc to ensure unique multi-event name next time */ 1943 + user->group->multi_id++; 1944 + } else { 1945 + /* Non Multi-format uses register name */ 1946 + user->call.name = user->reg_name; 1947 + user->tracepoint.name = user->reg_name; 1948 + } 1949 + 1950 + return 0; 1963 1951 } 1964 1952 1965 1953 /* ··· 2051 1985 INIT_LIST_HEAD(&user->validators); 2052 1986 2053 1987 user->group = group; 2054 - user->tracepoint.name = name; 1988 + user->reg_name = name; 1989 + user->reg_flags = reg_flags; 1990 + 1991 + ret = user_event_set_tp_name(user); 1992 + 1993 + if (ret) 1994 + goto put_user; 2055 1995 2056 1996 ret = user_event_parse_fields(user, args); 2057 1997 ··· 2071 1999 2072 2000 user->call.data = user; 2073 2001 user->call.class = &user->class; 2074 - user->call.name = name; 2075 2002 user->call.flags = TRACE_EVENT_FL_TRACEPOINT; 2076 2003 user->call.tp = &user->tracepoint; 2077 2004 user->call.event.funcs = &user_event_funcs; 2078 - user->class.system = group->system_name; 2005 + 2006 + if (EVENT_MULTI_FORMAT(user->reg_flags)) 2007 + user->class.system = group->system_multi_name; 2008 + else 2009 + user->class.system = group->system_name; 2079 2010 2080 2011 user->class.fields_array = user_event_fields_array; 2081 2012 user->class.get_fields = user_event_get_fields; ··· 2099 2024 2100 2025 if (ret) 2101 2026 goto put_user_lock; 2102 - 2103 - user->reg_flags = reg_flags; 2104 2027 2105 2028 if (user->reg_flags & USER_EVENT_REG_PERSIST) { 2106 2029 /* Ensure we track self ref and caller ref (2) */ ··· 2123 2050 user_event_destroy_fields(user); 2124 2051 user_event_destroy_validators(user); 2125 2052 kfree(user->call.print_fmt); 2053 + 2054 + /* Caller frees reg_name on error, but not multi-name */ 2055 + if (EVENT_NAME(user) != EVENT_TP_NAME(user)) 2056 + kfree(EVENT_TP_NAME(user)); 2057 + 2126 2058 kfree(user); 2127 2059 return ret; 2128 2060 } ··· 2717 2639 hash_for_each(group->register_table, i, user, node) { 2718 2640 status = user->status; 2719 2641 2720 - seq_printf(m, "%s", EVENT_NAME(user)); 2642 + seq_printf(m, "%s", EVENT_TP_NAME(user)); 2721 2643 2722 2644 if (status != 0) 2723 2645 seq_puts(m, " #");