Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

bpf: Fix NULL pointer dereference in bpf_get_local_storage() helper

Jiri Olsa reported a bug ([1]) in kernel where cgroup local
storage pointer may be NULL in bpf_get_local_storage() helper.
There are two issues uncovered by this bug:
(1). kprobe or tracepoint prog incorrectly sets cgroup local storage
before prog run,
(2). due to change from preempt_disable to migrate_disable,
preemption is possible and percpu storage might be overwritten
by other tasks.

This issue (1) is fixed in [2]. This patch tried to address issue (2).
The following shows how things can go wrong:
task 1: bpf_cgroup_storage_set() for percpu local storage
preemption happens
task 2: bpf_cgroup_storage_set() for percpu local storage
preemption happens
task 1: run bpf program

task 1 will effectively use the percpu local storage setting by task 2
which will be either NULL or incorrect ones.

Instead of just one common local storage per cpu, this patch fixed
the issue by permitting 8 local storages per cpu and each local
storage is identified by a task_struct pointer. This way, we
allow at most 8 nested preemption between bpf_cgroup_storage_set()
and bpf_cgroup_storage_unset(). The percpu local storage slot
is released (calling bpf_cgroup_storage_unset()) by the same task
after bpf program finished running.
bpf_test_run() is also fixed to use the new bpf_cgroup_storage_set()
interface.

The patch is tested on top of [2] with reproducer in [1].
Without this patch, kernel will emit error in 2-3 minutes.
With this patch, after one hour, still no error.

[1] https://lore.kernel.org/bpf/CAKH8qBuXCfUz=w8L+Fj74OaUpbosO29niYwTki7e3Ag044_aww@mail.gmail.com/T
[2] https://lore.kernel.org/bpf/20210309185028.3763817-1-yhs@fb.com

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Roman Gushchin <guro@fb.com>
Link: https://lore.kernel.org/bpf/20210323055146.3334476-1-yhs@fb.com

authored by

Yonghong Song and committed by
Alexei Starovoitov
b910eaaa a46410d5

+86 -19
+49 -8
include/linux/bpf-cgroup.h
··· 20 20 struct bpf_cgroup_storage; 21 21 struct ctl_table; 22 22 struct ctl_table_header; 23 + struct task_struct; 23 24 24 25 #ifdef CONFIG_CGROUP_BPF 25 26 26 27 extern struct static_key_false cgroup_bpf_enabled_key[MAX_BPF_ATTACH_TYPE]; 27 28 #define cgroup_bpf_enabled(type) static_branch_unlikely(&cgroup_bpf_enabled_key[type]) 28 29 29 - DECLARE_PER_CPU(struct bpf_cgroup_storage*, 30 - bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]); 30 + #define BPF_CGROUP_STORAGE_NEST_MAX 8 31 + 32 + struct bpf_cgroup_storage_info { 33 + struct task_struct *task; 34 + struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]; 35 + }; 36 + 37 + /* For each cpu, permit maximum BPF_CGROUP_STORAGE_NEST_MAX number of tasks 38 + * to use bpf cgroup storage simultaneously. 39 + */ 40 + DECLARE_PER_CPU(struct bpf_cgroup_storage_info, 41 + bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]); 31 42 32 43 #define for_each_cgroup_storage_type(stype) \ 33 44 for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++) ··· 172 161 return BPF_CGROUP_STORAGE_SHARED; 173 162 } 174 163 175 - static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage 176 - *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) 164 + static inline int bpf_cgroup_storage_set(struct bpf_cgroup_storage 165 + *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) 177 166 { 178 167 enum bpf_cgroup_storage_type stype; 168 + int i, err = 0; 179 169 180 - for_each_cgroup_storage_type(stype) 181 - this_cpu_write(bpf_cgroup_storage[stype], storage[stype]); 170 + preempt_disable(); 171 + for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) { 172 + if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != NULL)) 173 + continue; 174 + 175 + this_cpu_write(bpf_cgroup_storage_info[i].task, current); 176 + for_each_cgroup_storage_type(stype) 177 + this_cpu_write(bpf_cgroup_storage_info[i].storage[stype], 178 + storage[stype]); 179 + goto out; 180 + } 181 + err = -EBUSY; 182 + WARN_ON_ONCE(1); 183 + 184 + out: 185 + preempt_enable(); 186 + return err; 187 + } 188 + 189 + static inline void bpf_cgroup_storage_unset(void) 190 + { 191 + int i; 192 + 193 + for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) { 194 + if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current)) 195 + continue; 196 + 197 + this_cpu_write(bpf_cgroup_storage_info[i].task, NULL); 198 + return; 199 + } 182 200 } 183 201 184 202 struct bpf_cgroup_storage * ··· 488 448 return -EINVAL; 489 449 } 490 450 491 - static inline void bpf_cgroup_storage_set( 492 - struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) {} 451 + static inline int bpf_cgroup_storage_set( 452 + struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) { return 0; } 453 + static inline void bpf_cgroup_storage_unset(void) {} 493 454 static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, 494 455 struct bpf_map *map) { return 0; } 495 456 static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
+18 -4
include/linux/bpf.h
··· 1106 1106 /* BPF program asks to set CN on the packet. */ 1107 1107 #define BPF_RET_SET_CN (1 << 0) 1108 1108 1109 + /* For BPF_PROG_RUN_ARRAY_FLAGS and __BPF_PROG_RUN_ARRAY, 1110 + * if bpf_cgroup_storage_set() failed, the rest of programs 1111 + * will not execute. This should be a really rare scenario 1112 + * as it requires BPF_CGROUP_STORAGE_NEST_MAX number of 1113 + * preemptions all between bpf_cgroup_storage_set() and 1114 + * bpf_cgroup_storage_unset() on the same cpu. 1115 + */ 1109 1116 #define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags) \ 1110 1117 ({ \ 1111 1118 struct bpf_prog_array_item *_item; \ ··· 1125 1118 _array = rcu_dereference(array); \ 1126 1119 _item = &_array->items[0]; \ 1127 1120 while ((_prog = READ_ONCE(_item->prog))) { \ 1128 - bpf_cgroup_storage_set(_item->cgroup_storage); \ 1121 + if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage))) \ 1122 + break; \ 1129 1123 func_ret = func(_prog, ctx); \ 1130 1124 _ret &= (func_ret & 1); \ 1131 1125 *(ret_flags) |= (func_ret >> 1); \ 1126 + bpf_cgroup_storage_unset(); \ 1132 1127 _item++; \ 1133 1128 } \ 1134 1129 rcu_read_unlock(); \ ··· 1151 1142 goto _out; \ 1152 1143 _item = &_array->items[0]; \ 1153 1144 while ((_prog = READ_ONCE(_item->prog))) { \ 1154 - if (set_cg_storage) \ 1155 - bpf_cgroup_storage_set(_item->cgroup_storage); \ 1156 - _ret &= func(_prog, ctx); \ 1145 + if (!set_cg_storage) { \ 1146 + _ret &= func(_prog, ctx); \ 1147 + } else { \ 1148 + if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage))) \ 1149 + break; \ 1150 + _ret &= func(_prog, ctx); \ 1151 + bpf_cgroup_storage_unset(); \ 1152 + } \ 1157 1153 _item++; \ 1158 1154 } \ 1159 1155 _out: \
+11 -4
kernel/bpf/helpers.c
··· 382 382 }; 383 383 384 384 #ifdef CONFIG_CGROUP_BPF 385 - DECLARE_PER_CPU(struct bpf_cgroup_storage*, 386 - bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]); 385 + DECLARE_PER_CPU(struct bpf_cgroup_storage_info, 386 + bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]); 387 387 388 388 BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags) 389 389 { ··· 392 392 * verifier checks that its value is correct. 393 393 */ 394 394 enum bpf_cgroup_storage_type stype = cgroup_storage_type(map); 395 - struct bpf_cgroup_storage *storage; 395 + struct bpf_cgroup_storage *storage = NULL; 396 396 void *ptr; 397 + int i; 397 398 398 - storage = this_cpu_read(bpf_cgroup_storage[stype]); 399 + for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) { 400 + if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current)) 401 + continue; 402 + 403 + storage = this_cpu_read(bpf_cgroup_storage_info[i].storage[stype]); 404 + break; 405 + } 399 406 400 407 if (stype == BPF_CGROUP_STORAGE_SHARED) 401 408 ptr = &READ_ONCE(storage->buf)->data[0];
+3 -2
kernel/bpf/local_storage.c
··· 9 9 #include <linux/slab.h> 10 10 #include <uapi/linux/btf.h> 11 11 12 - DEFINE_PER_CPU(struct bpf_cgroup_storage*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]); 13 - 14 12 #ifdef CONFIG_CGROUP_BPF 13 + 14 + DEFINE_PER_CPU(struct bpf_cgroup_storage_info, 15 + bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]); 15 16 16 17 #include "../cgroup/cgroup-internal.h" 17 18
+5 -1
net/bpf/test_run.c
··· 106 106 107 107 bpf_test_timer_enter(&t); 108 108 do { 109 - bpf_cgroup_storage_set(storage); 109 + ret = bpf_cgroup_storage_set(storage); 110 + if (ret) 111 + break; 110 112 111 113 if (xdp) 112 114 *retval = bpf_prog_run_xdp(prog, ctx); 113 115 else 114 116 *retval = BPF_PROG_RUN(prog, ctx); 117 + 118 + bpf_cgroup_storage_unset(); 115 119 } while (bpf_test_timer_continue(&t, repeat, &ret, time)); 116 120 bpf_test_timer_leave(&t); 117 121