Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'task-local-data-bug-fixes-and-improvement'

Amery Hung says:

====================
Task local data bug fixes and improvement

This patchset fixed three task local data bugs, improved the
memory allocation code, and dropped unnecessary TLD_READ_ONCE. Please
find the detail in each patch's commit msg.

One thing worth mentioning is that Patch 3 allows us to renable task
local data selftests as the library now always calls aligned_alloc()
with size matching alignment under default configuration.

v1 -> v2
- Fix potential memory leak
- Drop TLD_READ_ONCE()
Link: https://lore.kernel.org/bpf/20260326052437.590158-1-ameryhung@gmail.com/
====================

Link: https://patch.msgid.link/20260331213555.1993883-1-ameryhung@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

+48 -53
+46 -51
tools/testing/selftests/bpf/prog_tests/task_local_data.h
··· 22 22 /* 23 23 * OPTIONS 24 24 * 25 - * Define the option before including the header 25 + * Define the option before including the header. Using different options in 26 + * different translation units is strongly discouraged. 26 27 * 27 28 * TLD_FREE_DATA_ON_THREAD_EXIT - Frees memory on thread exit automatically 28 29 * 29 30 * Thread-specific memory for storing TLD is allocated lazily on the first call to 30 31 * tld_get_data(). The thread that calls it must also call tld_free() on thread exit 31 32 * to prevent memory leak. Pthread will be included if the option is defined. A pthread 32 - * key will be registered with a destructor that calls tld_free(). 33 + * key will be registered with a destructor that calls tld_free(). Enabled only when 34 + * the option is defined and TLD_DEFINE_KEY/tld_create_key() is called in the same 35 + * translation unit. 33 36 * 34 37 * 35 38 * TLD_DYN_DATA_SIZE - The maximum size of memory allocated for TLDs created dynamically ··· 50 47 * TLD_NAME_LEN - The maximum length of the name of a TLD (default: 62) 51 48 * 52 49 * Setting TLD_NAME_LEN will affect the maximum number of TLDs a process can store, 53 - * TLD_MAX_DATA_CNT. 50 + * TLD_MAX_DATA_CNT. Must be consistent with task_local_data.bpf.h. 54 51 * 55 52 * 56 - * TLD_DATA_USE_ALIGNED_ALLOC - Always use aligned_alloc() instead of malloc() 53 + * TLD_DONT_ROUND_UP_DATA_SIZE - Don't round up memory size allocated for data if 54 + * the memory allocator has low overhead aligned_alloc() implementation. 57 55 * 58 - * When allocating the memory for storing TLDs, we need to make sure there is a memory 59 - * region of the X bytes within a page. This is due to the limit posed by UPTR: memory 60 - * pinned to the kernel cannot exceed a page nor can it cross the page boundary. The 61 - * library normally calls malloc(2*X) given X bytes of total TLDs, and only uses 62 - * aligned_alloc(PAGE_SIZE, X) when X >= PAGE_SIZE / 2. This is to reduce memory wastage 63 - * as not all memory allocator can use the exact amount of memory requested to fulfill 64 - * aligned_alloc(). For example, some may round the size up to the alignment. Enable the 65 - * option to always use aligned_alloc() if the implementation has low memory overhead. 56 + * For some memory allocators, when calling aligned_alloc(alignment, size), size 57 + * does not need to be an integral multiple of alignment and it can be fulfilled 58 + * without using round_up(size, alignment) bytes of memory. Enable this option to 59 + * reduce memory usage. 66 60 */ 67 61 68 62 #define TLD_PAGE_SIZE getpagesize() ··· 68 68 #define TLD_ROUND_MASK(x, y) ((__typeof__(x))((y) - 1)) 69 69 #define TLD_ROUND_UP(x, y) ((((x) - 1) | TLD_ROUND_MASK(x, y)) + 1) 70 70 71 - #define TLD_READ_ONCE(x) (*(volatile typeof(x) *)&(x)) 71 + #define TLD_ROUND_UP_POWER_OF_TWO(x) (1UL << (sizeof(x) * 8 - __builtin_clzl(x - 1))) 72 72 73 73 #ifndef TLD_DYN_DATA_SIZE 74 74 #define TLD_DYN_DATA_SIZE 64 ··· 90 90 91 91 struct tld_metadata { 92 92 char name[TLD_NAME_LEN]; 93 - _Atomic __u16 size; 93 + _Atomic __u16 size; /* size of tld_data_u->data */ 94 94 }; 95 95 96 96 struct tld_meta_u { ··· 101 101 102 102 struct tld_data_u { 103 103 __u64 start; /* offset of tld_data_u->data in a page */ 104 - char data[]; 104 + char data[] __attribute__((aligned(8))); 105 105 }; 106 106 107 107 struct tld_map_value { ··· 111 111 112 112 struct tld_meta_u * _Atomic tld_meta_p __attribute__((weak)); 113 113 __thread struct tld_data_u *tld_data_p __attribute__((weak)); 114 - __thread void *tld_data_alloc_p __attribute__((weak)); 115 114 116 115 #ifdef TLD_FREE_DATA_ON_THREAD_EXIT 116 + bool _Atomic tld_pthread_key_init __attribute__((weak)); 117 117 pthread_key_t tld_pthread_key __attribute__((weak)); 118 118 119 119 static void tld_free(void); ··· 144 144 goto out; 145 145 } 146 146 147 - #ifdef TLD_FREE_DATA_ON_THREAD_EXIT 148 - pthread_key_create(&tld_pthread_key, __tld_thread_exit_handler); 149 - #endif 150 147 out: 151 148 return err; 152 149 } 153 150 154 151 static int __tld_init_data_p(int map_fd) 155 152 { 156 - bool use_aligned_alloc = false; 157 153 struct tld_map_value map_val; 158 154 struct tld_data_u *data; 159 - void *data_alloc = NULL; 160 155 int err, tid_fd = -1; 156 + size_t size, size_pot; 161 157 162 158 tid_fd = syscall(SYS_pidfd_open, sys_gettid(), O_EXCL); 163 159 if (tid_fd < 0) { ··· 161 165 goto out; 162 166 } 163 167 164 - #ifdef TLD_DATA_USE_ALIGNED_ALLOC 165 - use_aligned_alloc = true; 166 - #endif 167 - 168 168 /* 169 169 * tld_meta_p->size = TLD_DYN_DATA_SIZE + 170 170 * total size of TLDs defined via TLD_DEFINE_KEY() 171 171 */ 172 - data_alloc = (use_aligned_alloc || tld_meta_p->size * 2 >= TLD_PAGE_SIZE) ? 173 - aligned_alloc(TLD_PAGE_SIZE, tld_meta_p->size) : 174 - malloc(tld_meta_p->size * 2); 175 - if (!data_alloc) { 172 + size = tld_meta_p->size + sizeof(struct tld_data_u); 173 + size_pot = TLD_ROUND_UP_POWER_OF_TWO(size); 174 + #ifdef TLD_DONT_ROUND_UP_DATA_SIZE 175 + data = (struct tld_data_u *)aligned_alloc(size_pot, size); 176 + #else 177 + data = (struct tld_data_u *)aligned_alloc(size_pot, size_pot); 178 + #endif 179 + if (!data) { 176 180 err = -ENOMEM; 177 181 goto out; 178 182 } 179 183 180 184 /* 181 185 * Always pass a page-aligned address to UPTR since the size of tld_map_value::data 182 - * is a page in BTF. If data_alloc spans across two pages, use the page that contains large 183 - * enough memory. 186 + * is a page in BTF. 184 187 */ 185 - if (TLD_PAGE_SIZE - (~TLD_PAGE_MASK & (intptr_t)data_alloc) >= tld_meta_p->size) { 186 - map_val.data = (void *)(TLD_PAGE_MASK & (intptr_t)data_alloc); 187 - data = data_alloc; 188 - data->start = (~TLD_PAGE_MASK & (intptr_t)data_alloc) + 189 - offsetof(struct tld_data_u, data); 190 - } else { 191 - map_val.data = (void *)(TLD_ROUND_UP((intptr_t)data_alloc, TLD_PAGE_SIZE)); 192 - data = (void *)(TLD_ROUND_UP((intptr_t)data_alloc, TLD_PAGE_SIZE)); 193 - data->start = offsetof(struct tld_data_u, data); 194 - } 195 - map_val.meta = TLD_READ_ONCE(tld_meta_p); 188 + map_val.data = (void *)(TLD_PAGE_MASK & (intptr_t)data); 189 + data->start = (~TLD_PAGE_MASK & (intptr_t)data) + sizeof(struct tld_data_u); 190 + map_val.meta = tld_meta_p; 196 191 197 192 err = bpf_map_update_elem(map_fd, &tid_fd, &map_val, 0); 198 193 if (err) { 199 - free(data_alloc); 194 + free(data); 200 195 goto out; 201 196 } 202 197 203 198 tld_data_p = data; 204 - tld_data_alloc_p = data_alloc; 205 199 #ifdef TLD_FREE_DATA_ON_THREAD_EXIT 206 200 pthread_setspecific(tld_pthread_key, (void *)1); 207 201 #endif ··· 204 218 static tld_key_t __tld_create_key(const char *name, size_t size, bool dyn_data) 205 219 { 206 220 int err, i, sz, off = 0; 221 + bool uninit = false; 207 222 __u16 cnt; 208 223 209 - if (!TLD_READ_ONCE(tld_meta_p)) { 224 + if (!tld_meta_p) { 210 225 err = __tld_init_meta_p(); 211 226 if (err) 212 227 return (tld_key_t){(__s16)err}; 213 228 } 229 + 230 + #ifdef TLD_FREE_DATA_ON_THREAD_EXIT 231 + if (atomic_compare_exchange_strong(&tld_pthread_key_init, &uninit, true)) { 232 + err = pthread_key_create(&tld_pthread_key, __tld_thread_exit_handler); 233 + if (err) 234 + return (tld_key_t){(__s16)err}; 235 + } 236 + #endif 214 237 215 238 for (i = 0; i < (int)TLD_MAX_DATA_CNT; i++) { 216 239 retry: ··· 286 291 #define TLD_DEFINE_KEY(key, name, size) \ 287 292 tld_key_t key; \ 288 293 \ 289 - __attribute__((constructor)) \ 294 + __attribute__((constructor(101))) \ 290 295 void __tld_define_key_##key(void) \ 291 296 { \ 292 297 key = __tld_create_key(name, size, false); \ ··· 346 351 __attribute__((unused)) 347 352 static void *tld_get_data(int map_fd, tld_key_t key) 348 353 { 349 - if (!TLD_READ_ONCE(tld_meta_p)) 354 + if (!tld_meta_p) 350 355 return NULL; 351 356 352 357 /* tld_data_p is allocated on the first invocation of tld_get_data() */ ··· 363 368 * 364 369 * Users must call tld_free() on thread exit to prevent memory leak. Alternatively, 365 370 * define TLD_FREE_DATA_ON_THREAD_EXIT and a thread exit handler will be registered 366 - * to free the memory automatically. 371 + * to free the memory automatically. Calling tld_free() before thread exit is 372 + * undefined behavior, which may lead to null-pointer dereference. 367 373 */ 368 374 __attribute__((unused)) 369 375 static void tld_free(void) 370 376 { 371 - if (tld_data_alloc_p) { 372 - free(tld_data_alloc_p); 373 - tld_data_alloc_p = NULL; 377 + if (tld_data_p) { 378 + free(tld_data_p); 374 379 tld_data_p = NULL; 375 380 } 376 381 }
+1 -1
tools/testing/selftests/bpf/prog_tests/test_task_local_data.c
··· 26 26 */ 27 27 static void reset_tld(void) 28 28 { 29 - if (TLD_READ_ONCE(tld_meta_p)) { 29 + if (tld_meta_p) { 30 30 /* Remove TLDs created by tld_create_key() */ 31 31 tld_meta_p->cnt = 1; 32 32 tld_meta_p->size = TLD_DYN_DATA_SIZE;
+1 -1
tools/testing/selftests/bpf/progs/task_local_data.bpf.h
··· 87 87 88 88 struct tld_data_u { 89 89 __u64 start; /* offset of tld_data_u->data in a page */ 90 - char data[__PAGE_SIZE - sizeof(__u64)]; 90 + char data[__PAGE_SIZE - sizeof(__u64)] __attribute__((aligned(8))); 91 91 }; 92 92 93 93 struct tld_map_value {