Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'Use this_cpu_xxx for preemption-safety'

Hou Tao says:

====================

From: Hou Tao <houtao1@huawei.com>

Hi,

The patchset aims to make the update of per-cpu prog->active and per-cpu
bpf_task_storage_busy being preemption-safe. The problem is on same
architectures (e.g. arm64), __this_cpu_{inc|dec|inc_return} are neither
preemption-safe nor IRQ-safe, so under fully preemptible kernel the
concurrent updates on these per-cpu variables may be interleaved and the
final values of these variables may be not zero.

Patch 1 & 2 use the preemption-safe per-cpu helpers to manipulate
prog->active and bpf_task_storage_busy. Patch 3 & 4 add a test case in
map_tests to show the concurrent updates on the per-cpu
bpf_task_storage_busy by using __this_cpu_{inc|dec} are not atomic.

Comments are always welcome.

Regards,
Tao

Change Log:
v2:
* Patch 1: update commit message to indicate the problem is only
possible for fully preemptible kernel
* Patch 2: a new patch which fixes the problem for prog->active
* Patch 3 & 4: move it to test_maps and make it depend on CONFIG_PREEMPT

v1: https://lore.kernel.org/bpf/20220829142752.330094-1-houtao@huaweicloud.com/
====================

Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>

+191 -28
+2 -2
kernel/bpf/bpf_local_storage.c
··· 555 555 struct bpf_local_storage_elem, map_node))) { 556 556 if (busy_counter) { 557 557 migrate_disable(); 558 - __this_cpu_inc(*busy_counter); 558 + this_cpu_inc(*busy_counter); 559 559 } 560 560 bpf_selem_unlink(selem, false); 561 561 if (busy_counter) { 562 - __this_cpu_dec(*busy_counter); 562 + this_cpu_dec(*busy_counter); 563 563 migrate_enable(); 564 564 } 565 565 cond_resched_rcu();
+4 -4
kernel/bpf/bpf_task_storage.c
··· 26 26 static void bpf_task_storage_lock(void) 27 27 { 28 28 migrate_disable(); 29 - __this_cpu_inc(bpf_task_storage_busy); 29 + this_cpu_inc(bpf_task_storage_busy); 30 30 } 31 31 32 32 static void bpf_task_storage_unlock(void) 33 33 { 34 - __this_cpu_dec(bpf_task_storage_busy); 34 + this_cpu_dec(bpf_task_storage_busy); 35 35 migrate_enable(); 36 36 } 37 37 38 38 static bool bpf_task_storage_trylock(void) 39 39 { 40 40 migrate_disable(); 41 - if (unlikely(__this_cpu_inc_return(bpf_task_storage_busy) != 1)) { 42 - __this_cpu_dec(bpf_task_storage_busy); 41 + if (unlikely(this_cpu_inc_return(bpf_task_storage_busy) != 1)) { 42 + this_cpu_dec(bpf_task_storage_busy); 43 43 migrate_enable(); 44 44 return false; 45 45 }
+4 -4
kernel/bpf/trampoline.c
··· 895 895 896 896 run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx); 897 897 898 - if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1)) { 898 + if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) { 899 899 inc_misses_counter(prog); 900 900 return 0; 901 901 } ··· 930 930 bpf_reset_run_ctx(run_ctx->saved_run_ctx); 931 931 932 932 update_prog_stats(prog, start); 933 - __this_cpu_dec(*(prog->active)); 933 + this_cpu_dec(*(prog->active)); 934 934 migrate_enable(); 935 935 rcu_read_unlock(); 936 936 } ··· 966 966 migrate_disable(); 967 967 might_fault(); 968 968 969 - if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1)) { 969 + if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) { 970 970 inc_misses_counter(prog); 971 971 return 0; 972 972 } ··· 982 982 bpf_reset_run_ctx(run_ctx->saved_run_ctx); 983 983 984 984 update_prog_stats(prog, start); 985 - __this_cpu_dec(*(prog->active)); 985 + this_cpu_dec(*(prog->active)); 986 986 migrate_enable(); 987 987 rcu_read_unlock_trace(); 988 988 }
+122
tools/testing/selftests/bpf/map_tests/task_storage_map.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (C) 2022. Huawei Technologies Co., Ltd */ 3 + #define _GNU_SOURCE 4 + #include <sched.h> 5 + #include <unistd.h> 6 + #include <stdlib.h> 7 + #include <stdbool.h> 8 + #include <errno.h> 9 + #include <string.h> 10 + #include <pthread.h> 11 + 12 + #include <bpf/bpf.h> 13 + #include <bpf/libbpf.h> 14 + 15 + #include "test_maps.h" 16 + #include "task_local_storage_helpers.h" 17 + #include "read_bpf_task_storage_busy.skel.h" 18 + 19 + struct lookup_ctx { 20 + bool start; 21 + bool stop; 22 + int pid_fd; 23 + int map_fd; 24 + int loop; 25 + }; 26 + 27 + static void *lookup_fn(void *arg) 28 + { 29 + struct lookup_ctx *ctx = arg; 30 + long value; 31 + int i = 0; 32 + 33 + while (!ctx->start) 34 + usleep(1); 35 + 36 + while (!ctx->stop && i++ < ctx->loop) 37 + bpf_map_lookup_elem(ctx->map_fd, &ctx->pid_fd, &value); 38 + return NULL; 39 + } 40 + 41 + static void abort_lookup(struct lookup_ctx *ctx, pthread_t *tids, unsigned int nr) 42 + { 43 + unsigned int i; 44 + 45 + ctx->stop = true; 46 + ctx->start = true; 47 + for (i = 0; i < nr; i++) 48 + pthread_join(tids[i], NULL); 49 + } 50 + 51 + void test_task_storage_map_stress_lookup(void) 52 + { 53 + #define MAX_NR_THREAD 4096 54 + unsigned int i, nr = 256, loop = 8192, cpu = 0; 55 + struct read_bpf_task_storage_busy *skel; 56 + pthread_t tids[MAX_NR_THREAD]; 57 + struct lookup_ctx ctx; 58 + cpu_set_t old, new; 59 + const char *cfg; 60 + int err; 61 + 62 + cfg = getenv("TASK_STORAGE_MAP_NR_THREAD"); 63 + if (cfg) { 64 + nr = atoi(cfg); 65 + if (nr > MAX_NR_THREAD) 66 + nr = MAX_NR_THREAD; 67 + } 68 + cfg = getenv("TASK_STORAGE_MAP_NR_LOOP"); 69 + if (cfg) 70 + loop = atoi(cfg); 71 + cfg = getenv("TASK_STORAGE_MAP_PIN_CPU"); 72 + if (cfg) 73 + cpu = atoi(cfg); 74 + 75 + skel = read_bpf_task_storage_busy__open_and_load(); 76 + err = libbpf_get_error(skel); 77 + CHECK(err, "open_and_load", "error %d\n", err); 78 + 79 + /* Only for a fully preemptible kernel */ 80 + if (!skel->kconfig->CONFIG_PREEMPT) 81 + return; 82 + 83 + /* Save the old affinity setting */ 84 + sched_getaffinity(getpid(), sizeof(old), &old); 85 + 86 + /* Pinned on a specific CPU */ 87 + CPU_ZERO(&new); 88 + CPU_SET(cpu, &new); 89 + sched_setaffinity(getpid(), sizeof(new), &new); 90 + 91 + ctx.start = false; 92 + ctx.stop = false; 93 + ctx.pid_fd = sys_pidfd_open(getpid(), 0); 94 + ctx.map_fd = bpf_map__fd(skel->maps.task); 95 + ctx.loop = loop; 96 + for (i = 0; i < nr; i++) { 97 + err = pthread_create(&tids[i], NULL, lookup_fn, &ctx); 98 + if (err) { 99 + abort_lookup(&ctx, tids, i); 100 + CHECK(err, "pthread_create", "error %d\n", err); 101 + goto out; 102 + } 103 + } 104 + 105 + ctx.start = true; 106 + for (i = 0; i < nr; i++) 107 + pthread_join(tids[i], NULL); 108 + 109 + skel->bss->pid = getpid(); 110 + err = read_bpf_task_storage_busy__attach(skel); 111 + CHECK(err, "attach", "error %d\n", err); 112 + 113 + /* Trigger program */ 114 + syscall(SYS_gettid); 115 + skel->bss->pid = 0; 116 + 117 + CHECK(skel->bss->busy != 0, "bad bpf_task_storage_busy", "got %d\n", skel->bss->busy); 118 + out: 119 + read_bpf_task_storage_busy__destroy(skel); 120 + /* Restore affinity setting */ 121 + sched_setaffinity(getpid(), sizeof(old), &old); 122 + }
+1 -9
tools/testing/selftests/bpf/prog_tests/test_bprm_opts.c
··· 9 9 10 10 #include "bprm_opts.skel.h" 11 11 #include "network_helpers.h" 12 - 13 - #ifndef __NR_pidfd_open 14 - #define __NR_pidfd_open 434 15 - #endif 12 + #include "task_local_storage_helpers.h" 16 13 17 14 static const char * const bash_envp[] = { "TMPDIR=shouldnotbeset", NULL }; 18 - 19 - static inline int sys_pidfd_open(pid_t pid, unsigned int flags) 20 - { 21 - return syscall(__NR_pidfd_open, pid, flags); 22 - } 23 15 24 16 static int update_storage(int map_fd, int secureexec) 25 17 {
+1 -9
tools/testing/selftests/bpf/prog_tests/test_local_storage.c
··· 11 11 12 12 #include "local_storage.skel.h" 13 13 #include "network_helpers.h" 14 - 15 - #ifndef __NR_pidfd_open 16 - #define __NR_pidfd_open 434 17 - #endif 18 - 19 - static inline int sys_pidfd_open(pid_t pid, unsigned int flags) 20 - { 21 - return syscall(__NR_pidfd_open, pid, flags); 22 - } 14 + #include "task_local_storage_helpers.h" 23 15 24 16 static unsigned int duration; 25 17
+39
tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (C) 2022. Huawei Technologies Co., Ltd */ 3 + #include "vmlinux.h" 4 + #include <bpf/bpf_helpers.h> 5 + #include <bpf/bpf_tracing.h> 6 + 7 + extern bool CONFIG_PREEMPT __kconfig __weak; 8 + extern const int bpf_task_storage_busy __ksym; 9 + 10 + char _license[] SEC("license") = "GPL"; 11 + 12 + int pid = 0; 13 + int busy = 0; 14 + 15 + struct { 16 + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); 17 + __uint(map_flags, BPF_F_NO_PREALLOC); 18 + __type(key, int); 19 + __type(value, long); 20 + } task SEC(".maps"); 21 + 22 + SEC("raw_tp/sys_enter") 23 + int BPF_PROG(read_bpf_task_storage_busy) 24 + { 25 + int *value; 26 + int key; 27 + 28 + if (!CONFIG_PREEMPT) 29 + return 0; 30 + 31 + if (bpf_get_current_pid_tgid() >> 32 != pid) 32 + return 0; 33 + 34 + value = bpf_this_cpu_ptr(&bpf_task_storage_busy); 35 + if (value) 36 + busy = *value; 37 + 38 + return 0; 39 + }
+18
tools/testing/selftests/bpf/task_local_storage_helpers.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef __TASK_LOCAL_STORAGE_HELPER_H 3 + #define __TASK_LOCAL_STORAGE_HELPER_H 4 + 5 + #include <unistd.h> 6 + #include <sys/syscall.h> 7 + #include <sys/types.h> 8 + 9 + #ifndef __NR_pidfd_open 10 + #define __NR_pidfd_open 434 11 + #endif 12 + 13 + static inline int sys_pidfd_open(pid_t pid, unsigned int flags) 14 + { 15 + return syscall(__NR_pidfd_open, pid, flags); 16 + } 17 + 18 + #endif