Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
2#ifndef __TASK_LOCAL_DATA_H
3#define __TASK_LOCAL_DATA_H
4
5#include <errno.h>
6#include <fcntl.h>
7#include <sched.h>
8#include <stdatomic.h>
9#include <stddef.h>
10#include <stdlib.h>
11#include <string.h>
12#include <unistd.h>
13#include <sys/syscall.h>
14#include <sys/types.h>
15
16#ifdef TLD_FREE_DATA_ON_THREAD_EXIT
17#include <pthread.h>
18#endif
19
20#include <bpf/bpf.h>
21
22/*
23 * OPTIONS
24 *
25 * Define the option before including the header. Using different options in
26 * different translation units is strongly discouraged.
27 *
28 * TLD_FREE_DATA_ON_THREAD_EXIT - Frees memory on thread exit automatically
29 *
30 * Thread-specific memory for storing TLD is allocated lazily on the first call to
31 * tld_get_data(). The thread that calls it must also call tld_free() on thread exit
32 * to prevent memory leak. Pthread will be included if the option is defined. A pthread
33 * key will be registered with a destructor that calls tld_free(). Enabled only when
34 * the option is defined and TLD_DEFINE_KEY/tld_create_key() is called in the same
35 * translation unit.
36 *
37 *
38 * TLD_DYN_DATA_SIZE - The maximum size of memory allocated for TLDs created dynamically
39 * (default: 64 bytes)
40 *
41 * A TLD can be defined statically using TLD_DEFINE_KEY() or created on the fly using
42 * tld_create_key(). As the total size of TLDs created with tld_create_key() cannot be
43 * possibly known statically, a memory area of size TLD_DYN_DATA_SIZE will be allocated
44 * for these TLDs. This additional memory is allocated for every thread that calls
45 * tld_get_data() even if no tld_create_key are actually called, so be mindful of
46 * potential memory wastage. Use TLD_DEFINE_KEY() whenever possible as just enough memory
47 * will be allocated for TLDs created with it.
48 *
49 *
50 * TLD_NAME_LEN - The maximum length of the name of a TLD (default: 62)
51 *
52 * Setting TLD_NAME_LEN will affect the maximum number of TLDs a process can store,
53 * TLD_MAX_DATA_CNT. Must be consistent with task_local_data.bpf.h.
54 *
55 *
56 * TLD_DONT_ROUND_UP_DATA_SIZE - Don't round up memory size allocated for data if
57 * the memory allocator has low overhead aligned_alloc() implementation.
58 *
59 * For some memory allocators, when calling aligned_alloc(alignment, size), size
60 * does not need to be an integral multiple of alignment and it can be fulfilled
61 * without using round_up(size, alignment) bytes of memory. Enable this option to
62 * reduce memory usage.
63 */
64
65#define TLD_PAGE_SIZE getpagesize()
66#define TLD_PAGE_MASK (~(TLD_PAGE_SIZE - 1))
67
68#define TLD_ROUND_MASK(x, y) ((__typeof__(x))((y) - 1))
69#define TLD_ROUND_UP(x, y) ((((x) - 1) | TLD_ROUND_MASK(x, y)) + 1)
70
71#define TLD_ROUND_UP_POWER_OF_TWO(x) (1UL << (sizeof(x) * 8 - __builtin_clzl(x - 1)))
72
73#ifndef TLD_DYN_DATA_SIZE
74#define TLD_DYN_DATA_SIZE 64
75#endif
76
77#define TLD_MAX_DATA_CNT (TLD_PAGE_SIZE / sizeof(struct tld_metadata) - 1)
78
79#ifndef TLD_NAME_LEN
80#define TLD_NAME_LEN 62
81#endif
82
83#ifdef __cplusplus
84extern "C" {
85#endif
86
87typedef struct {
88 __s16 off;
89} tld_key_t;
90
91struct tld_metadata {
92 char name[TLD_NAME_LEN];
93 _Atomic __u16 size; /* size of tld_data_u->data */
94};
95
96struct tld_meta_u {
97 _Atomic __u16 cnt;
98 __u16 size;
99 struct tld_metadata metadata[];
100};
101
102/*
103 * The unused field ensures map_val.start > 0. On the BPF side, __tld_fetch_key()
104 * calculates off by summing map_val.start and tld_key_t.off and treats off == 0
105 * as key not cached.
106 */
107struct tld_data_u {
108 __u64 unused;
109 char data[] __attribute__((aligned(8)));
110};
111
112struct tld_map_value {
113 void *data;
114 struct tld_meta_u *meta;
115 __u16 start; /* offset of tld_data_u->data in a page */
116};
117
118struct tld_meta_u * _Atomic tld_meta_p __attribute__((weak));
119__thread struct tld_data_u *tld_data_p __attribute__((weak));
120
121#ifdef TLD_FREE_DATA_ON_THREAD_EXIT
122bool _Atomic tld_pthread_key_init __attribute__((weak));
123pthread_key_t tld_pthread_key __attribute__((weak));
124
125static void tld_free(void);
126
127static void __tld_thread_exit_handler(void *unused)
128{
129 (void)unused;
130 tld_free();
131}
132#endif
133
134static int __tld_init_meta_p(void)
135{
136 struct tld_meta_u *meta, *uninit = NULL;
137 int err = 0;
138
139 meta = (struct tld_meta_u *)aligned_alloc(TLD_PAGE_SIZE, TLD_PAGE_SIZE);
140 if (!meta) {
141 err = -ENOMEM;
142 goto out;
143 }
144
145 memset(meta, 0, TLD_PAGE_SIZE);
146 meta->size = TLD_DYN_DATA_SIZE;
147
148 if (!atomic_compare_exchange_strong(&tld_meta_p, &uninit, meta)) {
149 free(meta);
150 goto out;
151 }
152
153out:
154 return err;
155}
156
157static int __tld_init_data_p(int map_fd)
158{
159 struct tld_map_value map_val;
160 struct tld_data_u *data;
161 int err, tid_fd = -1;
162 size_t size, size_pot;
163
164 tid_fd = syscall(SYS_pidfd_open, sys_gettid(), O_EXCL);
165 if (tid_fd < 0) {
166 err = -errno;
167 goto out;
168 }
169
170 /*
171 * tld_meta_p->size = TLD_DYN_DATA_SIZE +
172 * total size of TLDs defined via TLD_DEFINE_KEY()
173 */
174 size = tld_meta_p->size + sizeof(struct tld_data_u);
175 size_pot = TLD_ROUND_UP_POWER_OF_TWO(size);
176#ifdef TLD_DONT_ROUND_UP_DATA_SIZE
177 data = (struct tld_data_u *)aligned_alloc(size_pot, size);
178#else
179 data = (struct tld_data_u *)aligned_alloc(size_pot, size_pot);
180#endif
181 if (!data) {
182 err = -ENOMEM;
183 goto out;
184 }
185
186 /*
187 * Always pass a page-aligned address to UPTR since the size of tld_map_value::data
188 * is a page in BTF.
189 */
190 map_val.data = (void *)(TLD_PAGE_MASK & (intptr_t)data);
191 map_val.start = (~TLD_PAGE_MASK & (intptr_t)data) + sizeof(struct tld_data_u);
192 map_val.meta = tld_meta_p;
193
194 err = bpf_map_update_elem(map_fd, &tid_fd, &map_val, 0);
195 if (err) {
196 free(data);
197 goto out;
198 }
199
200 tld_data_p = data;
201#ifdef TLD_FREE_DATA_ON_THREAD_EXIT
202 pthread_setspecific(tld_pthread_key, (void *)1);
203#endif
204out:
205 if (tid_fd >= 0)
206 close(tid_fd);
207 return err;
208}
209
210static tld_key_t __tld_create_key(const char *name, size_t size, bool dyn_data)
211{
212 int err, i, sz, off = 0;
213 bool uninit = false;
214 __u16 cnt;
215
216 if (!tld_meta_p) {
217 err = __tld_init_meta_p();
218 if (err)
219 return (tld_key_t){(__s16)err};
220 }
221
222#ifdef TLD_FREE_DATA_ON_THREAD_EXIT
223 if (atomic_compare_exchange_strong(&tld_pthread_key_init, &uninit, true)) {
224 err = pthread_key_create(&tld_pthread_key, __tld_thread_exit_handler);
225 if (err)
226 return (tld_key_t){(__s16)err};
227 }
228#endif
229
230 for (i = 0; i < (int)TLD_MAX_DATA_CNT; i++) {
231retry:
232 cnt = atomic_load(&tld_meta_p->cnt);
233 if (i < cnt) {
234 /* A metadata is not ready until size is updated with a non-zero value */
235 while (!(sz = atomic_load(&tld_meta_p->metadata[i].size)))
236 sched_yield();
237
238 if (!strncmp(tld_meta_p->metadata[i].name, name, TLD_NAME_LEN))
239 return (tld_key_t){-EEXIST};
240
241 off += TLD_ROUND_UP(sz, 8);
242 continue;
243 }
244
245 /*
246 * TLD_DEFINE_KEY() is given memory upto a page while at most
247 * TLD_DYN_DATA_SIZE is allocated for tld_create_key()
248 */
249 if (dyn_data) {
250 if (off + TLD_ROUND_UP(size, 8) > tld_meta_p->size ||
251 tld_meta_p->size > TLD_PAGE_SIZE - sizeof(struct tld_data_u))
252 return (tld_key_t){-E2BIG};
253 } else {
254 if (off + TLD_ROUND_UP(size, 8) > TLD_PAGE_SIZE - sizeof(struct tld_data_u))
255 return (tld_key_t){-E2BIG};
256 tld_meta_p->size += TLD_ROUND_UP(size, 8);
257 }
258
259 /*
260 * Only one tld_create_key() can increase the current cnt by one and
261 * takes the latest available slot. Other threads will check again if a new
262 * TLD can still be added, and then compete for the new slot after the
263 * succeeding thread update the size.
264 */
265 if (!atomic_compare_exchange_strong(&tld_meta_p->cnt, &cnt, cnt + 1))
266 goto retry;
267
268 strscpy(tld_meta_p->metadata[i].name, name);
269 atomic_store(&tld_meta_p->metadata[i].size, size);
270 return (tld_key_t){(__s16)off};
271 }
272
273 return (tld_key_t){-ENOSPC};
274}
275
276/**
277 * TLD_DEFINE_KEY() - Define a TLD and a global variable key associated with the TLD.
278 *
279 * @name: The name of the TLD
280 * @size: The size of the TLD
281 * @key: The variable name of the key. Cannot exceed TLD_NAME_LEN
282 *
283 * The macro can only be used in file scope.
284 *
285 * A global variable key of opaque type, tld_key_t, will be declared and initialized before
286 * main() starts. Use tld_key_is_err() or tld_key_err_or_zero() later to check if the key
287 * creation succeeded. Pass the key to tld_get_data() to get a pointer to the TLD.
288 * bpf programs can also fetch the same key by name.
289 *
290 * The total size of TLDs created using TLD_DEFINE_KEY() cannot exceed a page. Just
291 * enough memory will be allocated for each thread on the first call to tld_get_data().
292 */
293#define TLD_DEFINE_KEY(key, name, size) \
294tld_key_t key; \
295 \
296__attribute__((constructor(101))) \
297void __tld_define_key_##key(void) \
298{ \
299 key = __tld_create_key(name, size, false); \
300}
301
302/**
303 * tld_create_key() - Create a TLD and return a key associated with the TLD.
304 *
305 * @name: The name the TLD
306 * @size: The size of the TLD
307 *
308 * Return an opaque object key. Use tld_key_is_err() or tld_key_err_or_zero() to check
309 * if the key creation succeeded. Pass the key to tld_get_data() to get a pointer to
310 * locate the TLD. bpf programs can also fetch the same key by name.
311 *
312 * Use tld_create_key() only when a TLD needs to be created dynamically (e.g., @name is
313 * not known statically or a TLD needs to be created conditionally)
314 *
315 * An additional TLD_DYN_DATA_SIZE bytes are allocated per-thread to accommodate TLDs
316 * created dynamically with tld_create_key(). Since only a user page is pinned to the
317 * kernel, when TLDs created with TLD_DEFINE_KEY() uses more than TLD_PAGE_SIZE -
318 * TLD_DYN_DATA_SIZE, the buffer size will be limited to the rest of the page.
319 */
320__attribute__((unused))
321static tld_key_t tld_create_key(const char *name, size_t size)
322{
323 return __tld_create_key(name, size, true);
324}
325
326__attribute__((unused))
327static inline bool tld_key_is_err(tld_key_t key)
328{
329 return key.off < 0;
330}
331
332__attribute__((unused))
333static inline int tld_key_err_or_zero(tld_key_t key)
334{
335 return tld_key_is_err(key) ? key.off : 0;
336}
337
338/**
339 * tld_get_data() - Get a pointer to the TLD associated with the given key of the
340 * calling thread.
341 *
342 * @map_fd: A file descriptor of tld_data_map, the underlying BPF task local storage map
343 * of task local data.
344 * @key: A key object created by TLD_DEFINE_KEY() or tld_create_key().
345 *
346 * Return a pointer to the TLD if the key is valid; NULL if not enough memory for TLD
347 * for this thread, or the key is invalid. The returned pointer is guaranteed to be 8-byte
348 * aligned.
349 *
350 * Threads that call tld_get_data() must call tld_free() on exit to prevent
351 * memory leak if TLD_FREE_DATA_ON_THREAD_EXIT is not defined.
352 */
353__attribute__((unused))
354static void *tld_get_data(int map_fd, tld_key_t key)
355{
356 if (!tld_meta_p)
357 return NULL;
358
359 /* tld_data_p is allocated on the first invocation of tld_get_data() */
360 if (!tld_data_p && __tld_init_data_p(map_fd))
361 return NULL;
362
363 return tld_data_p->data + key.off;
364}
365
366/**
367 * tld_free() - Free task local data memory of the calling thread
368 *
369 * For the calling thread, all pointers to TLDs acquired before will become invalid.
370 *
371 * Users must call tld_free() on thread exit to prevent memory leak. Alternatively,
372 * define TLD_FREE_DATA_ON_THREAD_EXIT and a thread exit handler will be registered
373 * to free the memory automatically. Calling tld_free() before thread exit is
374 * undefined behavior, which may lead to null-pointer dereference.
375 */
376__attribute__((unused))
377static void tld_free(void)
378{
379 if (tld_data_p) {
380 free(tld_data_p);
381 tld_data_p = NULL;
382 }
383}
384
385#ifdef __cplusplus
386} /* extern "C" */
387#endif
388
389#endif /* __TASK_LOCAL_DATA_H */