Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2/* Copyright (c) 2021 Facebook */
3
4#define _GNU_SOURCE /* See feature_test_macros(7) */
5#include <unistd.h>
6#include <sched.h>
7#include <pthread.h>
8#include <sys/syscall.h> /* For SYS_xxx definitions */
9#include <sys/types.h>
10#include <sys/eventfd.h>
11#include <sys/mman.h>
12#include <test_progs.h>
13#include <bpf/btf.h>
14#include "task_local_storage_helpers.h"
15#include "task_local_storage.skel.h"
16#include "task_local_storage_exit_creds.skel.h"
17#include "task_ls_recursion.skel.h"
18#include "task_storage_nodeadlock.skel.h"
19#include "uptr_test_common.h"
20#include "task_ls_uptr.skel.h"
21#include "uptr_update_failure.skel.h"
22#include "uptr_failure.skel.h"
23#include "uptr_map_failure.skel.h"
24
25static void test_sys_enter_exit(void)
26{
27 struct task_local_storage *skel;
28 pid_t pid = sys_gettid();
29 int err;
30
31 skel = task_local_storage__open_and_load();
32 if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
33 return;
34
35 err = task_local_storage__attach(skel);
36 if (!ASSERT_OK(err, "skel_attach"))
37 goto out;
38
39 /* Set target_pid after attach so that syscalls made during
40 * attach are not counted.
41 */
42 skel->bss->target_pid = pid;
43
44 sys_gettid();
45 sys_gettid();
46
47 skel->bss->target_pid = 0;
48
49 /* 2x gettid syscalls */
50 ASSERT_EQ(skel->bss->enter_cnt, 2, "enter_cnt");
51 ASSERT_EQ(skel->bss->exit_cnt, 2, "exit_cnt");
52 ASSERT_EQ(skel->bss->mismatch_cnt, 0, "mismatch_cnt");
53out:
54 task_local_storage__destroy(skel);
55}
56
57static void test_exit_creds(void)
58{
59 struct task_local_storage_exit_creds *skel;
60 int err, run_count, sync_rcu_calls = 0;
61 const int MAX_SYNC_RCU_CALLS = 1000;
62
63 skel = task_local_storage_exit_creds__open_and_load();
64 if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
65 return;
66
67 err = task_local_storage_exit_creds__attach(skel);
68 if (!ASSERT_OK(err, "skel_attach"))
69 goto out;
70
71 /* trigger at least one exit_creds() */
72 if (CHECK_FAIL(system("ls > /dev/null")))
73 goto out;
74
75 /* kern_sync_rcu is not enough on its own as the read section we want
76 * to wait for may start after we enter synchronize_rcu, so our call
77 * won't wait for the section to finish. Loop on the run counter
78 * as well to ensure the program has run.
79 */
80 do {
81 kern_sync_rcu();
82 run_count = __atomic_load_n(&skel->bss->run_count, __ATOMIC_SEQ_CST);
83 } while (run_count == 0 && ++sync_rcu_calls < MAX_SYNC_RCU_CALLS);
84
85 ASSERT_NEQ(sync_rcu_calls, MAX_SYNC_RCU_CALLS,
86 "sync_rcu count too high");
87 ASSERT_NEQ(run_count, 0, "run_count");
88 ASSERT_EQ(skel->bss->valid_ptr_count, 0, "valid_ptr_count");
89 ASSERT_NEQ(skel->bss->null_ptr_count, 0, "null_ptr_count");
90out:
91 task_local_storage_exit_creds__destroy(skel);
92}
93
94static void test_recursion(void)
95{
96 int err, map_fd, prog_fd, task_fd;
97 struct task_ls_recursion *skel;
98 struct bpf_prog_info info;
99 __u32 info_len = sizeof(info);
100 long value;
101
102 task_fd = sys_pidfd_open(getpid(), 0);
103 if (!ASSERT_NEQ(task_fd, -1, "sys_pidfd_open"))
104 return;
105
106 skel = task_ls_recursion__open_and_load();
107 if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
108 goto out;
109
110 err = task_ls_recursion__attach(skel);
111 if (!ASSERT_OK(err, "skel_attach"))
112 goto out;
113
114 /* trigger sys_enter, make sure it does not cause deadlock */
115 skel->bss->test_pid = getpid();
116 sys_gettid();
117 skel->bss->test_pid = 0;
118 task_ls_recursion__detach(skel);
119
120 /* Refer to the comment in BPF_PROG(on_update) for
121 * the explanation on the value 200 and 1.
122 */
123 map_fd = bpf_map__fd(skel->maps.map_a);
124 err = bpf_map_lookup_elem(map_fd, &task_fd, &value);
125 ASSERT_OK(err, "lookup map_a");
126 ASSERT_EQ(value, 200, "map_a value");
127 ASSERT_EQ(skel->bss->nr_del_errs, 0, "bpf_task_storage_delete busy");
128
129 map_fd = bpf_map__fd(skel->maps.map_b);
130 err = bpf_map_lookup_elem(map_fd, &task_fd, &value);
131 ASSERT_OK(err, "lookup map_b");
132 ASSERT_EQ(value, 1, "map_b value");
133
134 prog_fd = bpf_program__fd(skel->progs.on_update);
135 memset(&info, 0, sizeof(info));
136 err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
137 ASSERT_OK(err, "get prog info");
138 ASSERT_EQ(info.recursion_misses, 2, "on_update prog recursion");
139
140 prog_fd = bpf_program__fd(skel->progs.on_enter);
141 memset(&info, 0, sizeof(info));
142 err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
143 ASSERT_OK(err, "get prog info");
144 ASSERT_EQ(info.recursion_misses, 0, "on_enter prog recursion");
145
146out:
147 close(task_fd);
148 task_ls_recursion__destroy(skel);
149}
150
151static bool stop;
152
153static void waitall(const pthread_t *tids, int nr)
154{
155 int i;
156
157 stop = true;
158 for (i = 0; i < nr; i++)
159 pthread_join(tids[i], NULL);
160}
161
162static void *sock_create_loop(void *arg)
163{
164 struct task_storage_nodeadlock *skel = arg;
165 int fd;
166
167 while (!stop) {
168 fd = socket(AF_INET, SOCK_STREAM, 0);
169 close(fd);
170 if (skel->bss->nr_get_errs || skel->bss->nr_del_errs)
171 stop = true;
172 }
173
174 return NULL;
175}
176
177static void test_nodeadlock(void)
178{
179 struct task_storage_nodeadlock *skel;
180 struct bpf_prog_info info = {};
181 __u32 info_len = sizeof(info);
182 const int nr_threads = 32;
183 pthread_t tids[nr_threads];
184 int i, prog_fd, err;
185 cpu_set_t old, new;
186
187 /* Pin all threads to one cpu to increase the chance of preemption
188 * in a sleepable bpf prog.
189 */
190 CPU_ZERO(&new);
191 CPU_SET(0, &new);
192 err = sched_getaffinity(getpid(), sizeof(old), &old);
193 if (!ASSERT_OK(err, "getaffinity"))
194 return;
195 err = sched_setaffinity(getpid(), sizeof(new), &new);
196 if (!ASSERT_OK(err, "setaffinity"))
197 return;
198
199 skel = task_storage_nodeadlock__open_and_load();
200 if (!ASSERT_OK_PTR(skel, "open_and_load"))
201 goto done;
202
203 /* Unnecessary recursion and deadlock detection are reproducible
204 * in the preemptible kernel.
205 */
206 if (!skel->kconfig->CONFIG_PREEMPTION) {
207 test__skip();
208 goto done;
209 }
210
211 err = task_storage_nodeadlock__attach(skel);
212 ASSERT_OK(err, "attach prog");
213
214 for (i = 0; i < nr_threads; i++) {
215 err = pthread_create(&tids[i], NULL, sock_create_loop, skel);
216 if (err) {
217 /* Only assert once here to avoid excessive
218 * PASS printing during test failure.
219 */
220 ASSERT_OK(err, "pthread_create");
221 waitall(tids, i);
222 goto done;
223 }
224 }
225
226 /* With 32 threads, 1s is enough to reproduce the issue */
227 sleep(1);
228 waitall(tids, nr_threads);
229
230 info_len = sizeof(info);
231 prog_fd = bpf_program__fd(skel->progs.socket_post_create);
232 err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
233 ASSERT_OK(err, "get prog info");
234 ASSERT_EQ(info.recursion_misses, 0, "prog recursion");
235
236 ASSERT_EQ(skel->bss->nr_get_errs, 0, "bpf_task_storage_get busy");
237 ASSERT_EQ(skel->bss->nr_del_errs, 0, "bpf_task_storage_delete busy");
238
239done:
240 task_storage_nodeadlock__destroy(skel);
241 sched_setaffinity(getpid(), sizeof(old), &old);
242}
243
244static struct user_data udata __attribute__((aligned(16))) = {
245 .a = 1,
246 .b = 2,
247};
248
249static struct user_data udata2 __attribute__((aligned(16))) = {
250 .a = 3,
251 .b = 4,
252};
253
254static void check_udata2(int expected)
255{
256 udata2.result = udata2.nested_result = 0;
257 usleep(1);
258 ASSERT_EQ(udata2.result, expected, "udata2.result");
259 ASSERT_EQ(udata2.nested_result, expected, "udata2.nested_result");
260}
261
262static void test_uptr_basic(void)
263{
264 int map_fd, parent_task_fd, ev_fd;
265 struct value_type value = {};
266 struct task_ls_uptr *skel;
267 pid_t child_pid, my_tid;
268 __u64 ev_dummy_data = 1;
269 int err;
270
271 my_tid = sys_gettid();
272 parent_task_fd = sys_pidfd_open(my_tid, 0);
273 if (!ASSERT_OK_FD(parent_task_fd, "parent_task_fd"))
274 return;
275
276 ev_fd = eventfd(0, 0);
277 if (!ASSERT_OK_FD(ev_fd, "ev_fd")) {
278 close(parent_task_fd);
279 return;
280 }
281
282 skel = task_ls_uptr__open_and_load();
283 if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
284 goto out;
285
286 map_fd = bpf_map__fd(skel->maps.datamap);
287 value.udata = &udata;
288 value.nested.udata = &udata;
289 err = bpf_map_update_elem(map_fd, &parent_task_fd, &value, BPF_NOEXIST);
290 if (!ASSERT_OK(err, "update_elem(udata)"))
291 goto out;
292
293 err = task_ls_uptr__attach(skel);
294 if (!ASSERT_OK(err, "skel_attach"))
295 goto out;
296
297 child_pid = fork();
298 if (!ASSERT_NEQ(child_pid, -1, "fork"))
299 goto out;
300
301 /* Call syscall in the child process, but access the map value of
302 * the parent process in the BPF program to check if the user kptr
303 * is translated/mapped correctly.
304 */
305 if (child_pid == 0) {
306 /* child */
307
308 /* Overwrite the user_data in the child process to check if
309 * the BPF program accesses the user_data of the parent.
310 */
311 udata.a = 0;
312 udata.b = 0;
313
314 /* Wait for the parent to set child_pid */
315 read(ev_fd, &ev_dummy_data, sizeof(ev_dummy_data));
316 exit(0);
317 }
318
319 skel->bss->parent_pid = my_tid;
320 skel->bss->target_pid = child_pid;
321
322 write(ev_fd, &ev_dummy_data, sizeof(ev_dummy_data));
323
324 err = waitpid(child_pid, NULL, 0);
325 ASSERT_EQ(err, child_pid, "waitpid");
326 ASSERT_EQ(udata.result, MAGIC_VALUE + udata.a + udata.b, "udata.result");
327 ASSERT_EQ(udata.nested_result, MAGIC_VALUE + udata.a + udata.b, "udata.nested_result");
328
329 skel->bss->target_pid = my_tid;
330
331 /* update_elem: uptr changes from udata1 to udata2 */
332 value.udata = &udata2;
333 value.nested.udata = &udata2;
334 err = bpf_map_update_elem(map_fd, &parent_task_fd, &value, BPF_EXIST);
335 if (!ASSERT_OK(err, "update_elem(udata2)"))
336 goto out;
337 check_udata2(MAGIC_VALUE + udata2.a + udata2.b);
338
339 /* update_elem: uptr changes from udata2 uptr to NULL */
340 memset(&value, 0, sizeof(value));
341 err = bpf_map_update_elem(map_fd, &parent_task_fd, &value, BPF_EXIST);
342 if (!ASSERT_OK(err, "update_elem(udata2)"))
343 goto out;
344 check_udata2(0);
345
346 /* update_elem: uptr changes from NULL to udata2 */
347 value.udata = &udata2;
348 value.nested.udata = &udata2;
349 err = bpf_map_update_elem(map_fd, &parent_task_fd, &value, BPF_EXIST);
350 if (!ASSERT_OK(err, "update_elem(udata2)"))
351 goto out;
352 check_udata2(MAGIC_VALUE + udata2.a + udata2.b);
353
354 /* Check if user programs can access the value of user kptrs
355 * through bpf_map_lookup_elem(). Make sure the kernel value is not
356 * leaked.
357 */
358 err = bpf_map_lookup_elem(map_fd, &parent_task_fd, &value);
359 if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
360 goto out;
361 ASSERT_EQ(value.udata, NULL, "value.udata");
362 ASSERT_EQ(value.nested.udata, NULL, "value.nested.udata");
363
364 /* delete_elem */
365 err = bpf_map_delete_elem(map_fd, &parent_task_fd);
366 ASSERT_OK(err, "delete_elem(udata2)");
367 check_udata2(0);
368
369 /* update_elem: add uptr back to test map_free */
370 value.udata = &udata2;
371 value.nested.udata = &udata2;
372 err = bpf_map_update_elem(map_fd, &parent_task_fd, &value, BPF_NOEXIST);
373 ASSERT_OK(err, "update_elem(udata2)");
374
375out:
376 task_ls_uptr__destroy(skel);
377 close(ev_fd);
378 close(parent_task_fd);
379}
380
381static void test_uptr_across_pages(void)
382{
383 int page_size = getpagesize();
384 struct value_type value = {};
385 struct task_ls_uptr *skel;
386 int err, task_fd, map_fd;
387 void *mem;
388
389 task_fd = sys_pidfd_open(getpid(), 0);
390 if (!ASSERT_OK_FD(task_fd, "task_fd"))
391 return;
392
393 mem = mmap(NULL, page_size * 2, PROT_READ | PROT_WRITE,
394 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
395 if (!ASSERT_OK_PTR(mem, "mmap(page_size * 2)")) {
396 close(task_fd);
397 return;
398 }
399
400 skel = task_ls_uptr__open_and_load();
401 if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
402 goto out;
403
404 map_fd = bpf_map__fd(skel->maps.datamap);
405 value.udata = mem + page_size - offsetof(struct user_data, b);
406 err = bpf_map_update_elem(map_fd, &task_fd, &value, 0);
407 if (!ASSERT_ERR(err, "update_elem(udata)"))
408 goto out;
409 ASSERT_EQ(errno, EOPNOTSUPP, "errno");
410
411 value.udata = mem + page_size - sizeof(struct user_data);
412 err = bpf_map_update_elem(map_fd, &task_fd, &value, 0);
413 ASSERT_OK(err, "update_elem(udata)");
414
415out:
416 task_ls_uptr__destroy(skel);
417 close(task_fd);
418 munmap(mem, page_size * 2);
419}
420
421static void test_uptr_update_failure(void)
422{
423 struct value_lock_type value = {};
424 struct uptr_update_failure *skel;
425 int err, task_fd, map_fd;
426
427 task_fd = sys_pidfd_open(getpid(), 0);
428 if (!ASSERT_OK_FD(task_fd, "task_fd"))
429 return;
430
431 skel = uptr_update_failure__open_and_load();
432 if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
433 goto out;
434
435 map_fd = bpf_map__fd(skel->maps.datamap);
436
437 value.udata = &udata;
438 err = bpf_map_update_elem(map_fd, &task_fd, &value, BPF_F_LOCK);
439 if (!ASSERT_ERR(err, "update_elem(udata, BPF_F_LOCK)"))
440 goto out;
441 ASSERT_EQ(errno, EOPNOTSUPP, "errno");
442
443 err = bpf_map_update_elem(map_fd, &task_fd, &value, BPF_EXIST);
444 if (!ASSERT_ERR(err, "update_elem(udata, BPF_EXIST)"))
445 goto out;
446 ASSERT_EQ(errno, ENOENT, "errno");
447
448 err = bpf_map_update_elem(map_fd, &task_fd, &value, BPF_NOEXIST);
449 if (!ASSERT_OK(err, "update_elem(udata, BPF_NOEXIST)"))
450 goto out;
451
452 value.udata = &udata2;
453 err = bpf_map_update_elem(map_fd, &task_fd, &value, BPF_NOEXIST);
454 if (!ASSERT_ERR(err, "update_elem(udata2, BPF_NOEXIST)"))
455 goto out;
456 ASSERT_EQ(errno, EEXIST, "errno");
457
458out:
459 uptr_update_failure__destroy(skel);
460 close(task_fd);
461}
462
463static void test_uptr_map_failure(const char *map_name, int expected_errno)
464{
465 LIBBPF_OPTS(bpf_map_create_opts, create_attr);
466 struct uptr_map_failure *skel;
467 struct bpf_map *map;
468 struct btf *btf;
469 int map_fd, err;
470
471 skel = uptr_map_failure__open();
472 if (!ASSERT_OK_PTR(skel, "uptr_map_failure__open"))
473 return;
474
475 map = bpf_object__find_map_by_name(skel->obj, map_name);
476 btf = bpf_object__btf(skel->obj);
477 err = btf__load_into_kernel(btf);
478 if (!ASSERT_OK(err, "btf__load_into_kernel"))
479 goto done;
480
481 create_attr.map_flags = bpf_map__map_flags(map);
482 create_attr.btf_fd = btf__fd(btf);
483 create_attr.btf_key_type_id = bpf_map__btf_key_type_id(map);
484 create_attr.btf_value_type_id = bpf_map__btf_value_type_id(map);
485 map_fd = bpf_map_create(bpf_map__type(map), map_name,
486 bpf_map__key_size(map), bpf_map__value_size(map),
487 0, &create_attr);
488 if (ASSERT_ERR_FD(map_fd, "map_create"))
489 ASSERT_EQ(errno, expected_errno, "errno");
490 else
491 close(map_fd);
492
493done:
494 uptr_map_failure__destroy(skel);
495}
496
497void test_task_local_storage(void)
498{
499 if (test__start_subtest("sys_enter_exit"))
500 test_sys_enter_exit();
501 if (test__start_subtest("exit_creds"))
502 test_exit_creds();
503 if (test__start_subtest("recursion"))
504 test_recursion();
505 if (test__start_subtest("nodeadlock"))
506 test_nodeadlock();
507 if (test__start_subtest("uptr_basic"))
508 test_uptr_basic();
509 if (test__start_subtest("uptr_across_pages"))
510 test_uptr_across_pages();
511 if (test__start_subtest("uptr_update_failure"))
512 test_uptr_update_failure();
513 if (test__start_subtest("uptr_map_failure_e2big")) {
514 if (getpagesize() == PAGE_SIZE)
515 test_uptr_map_failure("large_uptr_map", E2BIG);
516 else
517 test__skip();
518 }
519 if (test__start_subtest("uptr_map_failure_size0"))
520 test_uptr_map_failure("empty_uptr_map", EINVAL);
521 if (test__start_subtest("uptr_map_failure_kstruct"))
522 test_uptr_map_failure("kstruct_uptr_map", EINVAL);
523 RUN_TESTS(uptr_failure);
524}