Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

bpf: Remove task local storage percpu counter

The percpu counter in task local storage is no longer needed as the
underlying bpf_local_storage can now handle deadlock with the help of
rqspinlock. Remove the percpu counter and related migrate_{disable,
enable}.

Since the percpu counter is removed, merge back bpf_task_storage_get()
and bpf_task_storage_get_recur(). This will allow the bpf syscalls and
helpers to run concurrently on the same CPU, removing the spurious
-EBUSY error. bpf_task_storage_get(..., F_CREATE) will now always
succeed with enough free memory unless being called recursively.

Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Amery Hung <ameryhung@gmail.com>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://patch.msgid.link/20260205222916.1788211-7-ameryhung@gmail.com

authored by

Amery Hung and committed by
Martin KaFai Lau
4a98c2ef 8dabe34b

+19 -137
+19 -133
kernel/bpf/bpf_task_storage.c
··· 20 20 21 21 DEFINE_BPF_STORAGE_CACHE(task_cache); 22 22 23 - static DEFINE_PER_CPU(int, bpf_task_storage_busy); 24 - 25 - static void bpf_task_storage_lock(void) 26 - { 27 - cant_migrate(); 28 - this_cpu_inc(bpf_task_storage_busy); 29 - } 30 - 31 - static void bpf_task_storage_unlock(void) 32 - { 33 - this_cpu_dec(bpf_task_storage_busy); 34 - } 35 - 36 - static bool bpf_task_storage_trylock(void) 37 - { 38 - cant_migrate(); 39 - if (unlikely(this_cpu_inc_return(bpf_task_storage_busy) != 1)) { 40 - this_cpu_dec(bpf_task_storage_busy); 41 - return false; 42 - } 43 - return true; 44 - } 45 - 46 23 static struct bpf_local_storage __rcu **task_storage_ptr(void *owner) 47 24 { 48 25 struct task_struct *task = owner; ··· 47 70 { 48 71 struct bpf_local_storage *local_storage; 49 72 50 - rcu_read_lock_dont_migrate(); 73 + rcu_read_lock(); 51 74 52 75 local_storage = rcu_dereference(task->bpf_storage); 53 76 if (!local_storage) 54 77 goto out; 55 78 56 - bpf_task_storage_lock(); 57 79 bpf_local_storage_destroy(local_storage); 58 - bpf_task_storage_unlock(); 59 80 out: 60 - rcu_read_unlock_migrate(); 81 + rcu_read_unlock(); 61 82 } 62 83 63 84 static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key) ··· 81 106 goto out; 82 107 } 83 108 84 - bpf_task_storage_lock(); 85 109 sdata = task_storage_lookup(task, map, true); 86 - bpf_task_storage_unlock(); 87 110 put_pid(pid); 88 111 return sdata ? sdata->data : NULL; 89 112 out: ··· 116 143 goto out; 117 144 } 118 145 119 - bpf_task_storage_lock(); 120 146 sdata = bpf_local_storage_update( 121 147 task, (struct bpf_local_storage_map *)map, value, map_flags, 122 148 true, GFP_ATOMIC); 123 - bpf_task_storage_unlock(); 124 149 125 150 err = PTR_ERR_OR_ZERO(sdata); 126 151 out: ··· 126 155 return err; 127 156 } 128 157 129 - static int task_storage_delete(struct task_struct *task, struct bpf_map *map, 130 - bool nobusy) 158 + static int task_storage_delete(struct task_struct *task, struct bpf_map *map) 131 159 { 132 160 struct bpf_local_storage_data *sdata; 133 161 134 162 sdata = task_storage_lookup(task, map, false); 135 163 if (!sdata) 136 164 return -ENOENT; 137 - 138 - if (!nobusy) 139 - return -EBUSY; 140 165 141 166 return bpf_selem_unlink(SELEM(sdata), false); 142 167 } ··· 159 192 goto out; 160 193 } 161 194 162 - bpf_task_storage_lock(); 163 - err = task_storage_delete(task, map, true); 164 - bpf_task_storage_unlock(); 195 + err = task_storage_delete(task, map); 165 196 out: 166 197 put_pid(pid); 167 198 return err; 168 - } 169 - 170 - /* Called by bpf_task_storage_get*() helpers */ 171 - static void *__bpf_task_storage_get(struct bpf_map *map, 172 - struct task_struct *task, void *value, 173 - u64 flags, gfp_t gfp_flags, bool nobusy) 174 - { 175 - struct bpf_local_storage_data *sdata; 176 - 177 - sdata = task_storage_lookup(task, map, nobusy); 178 - if (sdata) 179 - return sdata->data; 180 - 181 - /* only allocate new storage, when the task is refcounted */ 182 - if (refcount_read(&task->usage) && 183 - (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) && nobusy) { 184 - sdata = bpf_local_storage_update( 185 - task, (struct bpf_local_storage_map *)map, value, 186 - BPF_NOEXIST, false, gfp_flags); 187 - return IS_ERR(sdata) ? NULL : sdata->data; 188 - } 189 - 190 - return NULL; 191 - } 192 - 193 - /* *gfp_flags* is a hidden argument provided by the verifier */ 194 - BPF_CALL_5(bpf_task_storage_get_recur, struct bpf_map *, map, struct task_struct *, 195 - task, void *, value, u64, flags, gfp_t, gfp_flags) 196 - { 197 - bool nobusy; 198 - void *data; 199 - 200 - WARN_ON_ONCE(!bpf_rcu_lock_held()); 201 - if (flags & ~BPF_LOCAL_STORAGE_GET_F_CREATE || !task) 202 - return (unsigned long)NULL; 203 - 204 - nobusy = bpf_task_storage_trylock(); 205 - data = __bpf_task_storage_get(map, task, value, flags, 206 - gfp_flags, nobusy); 207 - if (nobusy) 208 - bpf_task_storage_unlock(); 209 - return (unsigned long)data; 210 199 } 211 200 212 201 /* *gfp_flags* is a hidden argument provided by the verifier */ 213 202 BPF_CALL_5(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, 214 203 task, void *, value, u64, flags, gfp_t, gfp_flags) 215 204 { 216 - void *data; 205 + struct bpf_local_storage_data *sdata; 217 206 218 207 WARN_ON_ONCE(!bpf_rcu_lock_held()); 219 208 if (flags & ~BPF_LOCAL_STORAGE_GET_F_CREATE || !task) 220 209 return (unsigned long)NULL; 221 210 222 - bpf_task_storage_lock(); 223 - data = __bpf_task_storage_get(map, task, value, flags, 224 - gfp_flags, true); 225 - bpf_task_storage_unlock(); 226 - return (unsigned long)data; 227 - } 211 + sdata = task_storage_lookup(task, map, true); 212 + if (sdata) 213 + return (unsigned long)sdata->data; 228 214 229 - BPF_CALL_2(bpf_task_storage_delete_recur, struct bpf_map *, map, struct task_struct *, 230 - task) 231 - { 232 - bool nobusy; 233 - int ret; 215 + /* only allocate new storage, when the task is refcounted */ 216 + if (refcount_read(&task->usage) && 217 + (flags & BPF_LOCAL_STORAGE_GET_F_CREATE)) { 218 + sdata = bpf_local_storage_update( 219 + task, (struct bpf_local_storage_map *)map, value, 220 + BPF_NOEXIST, false, gfp_flags); 221 + return IS_ERR(sdata) ? (unsigned long)NULL : (unsigned long)sdata->data; 222 + } 234 223 235 - WARN_ON_ONCE(!bpf_rcu_lock_held()); 236 - if (!task) 237 - return -EINVAL; 238 - 239 - nobusy = bpf_task_storage_trylock(); 240 - /* This helper must only be called from places where the lifetime of the task 241 - * is guaranteed. Either by being refcounted or by being protected 242 - * by an RCU read-side critical section. 243 - */ 244 - ret = task_storage_delete(task, map, nobusy); 245 - if (nobusy) 246 - bpf_task_storage_unlock(); 247 - return ret; 224 + return (unsigned long)NULL; 248 225 } 249 226 250 227 BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *, 251 228 task) 252 229 { 253 - int ret; 254 - 255 230 WARN_ON_ONCE(!bpf_rcu_lock_held()); 256 231 if (!task) 257 232 return -EINVAL; 258 233 259 - bpf_task_storage_lock(); 260 234 /* This helper must only be called from places where the lifetime of the task 261 235 * is guaranteed. Either by being refcounted or by being protected 262 236 * by an RCU read-side critical section. 263 237 */ 264 - ret = task_storage_delete(task, map, true); 265 - bpf_task_storage_unlock(); 266 - return ret; 238 + return task_storage_delete(task, map); 267 239 } 268 240 269 241 static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key) ··· 217 311 218 312 static void task_storage_map_free(struct bpf_map *map) 219 313 { 220 - bpf_local_storage_map_free(map, &task_cache, &bpf_task_storage_busy); 314 + bpf_local_storage_map_free(map, &task_cache, NULL); 221 315 } 222 316 223 317 BTF_ID_LIST_GLOBAL_SINGLE(bpf_local_storage_map_btf_id, struct, bpf_local_storage_map) ··· 236 330 .map_owner_storage_ptr = task_storage_ptr, 237 331 }; 238 332 239 - const struct bpf_func_proto bpf_task_storage_get_recur_proto = { 240 - .func = bpf_task_storage_get_recur, 241 - .gpl_only = false, 242 - .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, 243 - .arg1_type = ARG_CONST_MAP_PTR, 244 - .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, 245 - .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], 246 - .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, 247 - .arg4_type = ARG_ANYTHING, 248 - }; 249 - 250 333 const struct bpf_func_proto bpf_task_storage_get_proto = { 251 334 .func = bpf_task_storage_get, 252 335 .gpl_only = false, ··· 245 350 .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], 246 351 .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, 247 352 .arg4_type = ARG_ANYTHING, 248 - }; 249 - 250 - const struct bpf_func_proto bpf_task_storage_delete_recur_proto = { 251 - .func = bpf_task_storage_delete_recur, 252 - .gpl_only = false, 253 - .ret_type = RET_INTEGER, 254 - .arg1_type = ARG_CONST_MAP_PTR, 255 - .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, 256 - .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], 257 353 }; 258 354 259 355 const struct bpf_func_proto bpf_task_storage_delete_proto = {
-4
kernel/bpf/helpers.c
··· 2167 2167 return &bpf_get_cgroup_classid_curr_proto; 2168 2168 #endif 2169 2169 case BPF_FUNC_task_storage_get: 2170 - if (bpf_prog_check_recur(prog)) 2171 - return &bpf_task_storage_get_recur_proto; 2172 2170 return &bpf_task_storage_get_proto; 2173 2171 case BPF_FUNC_task_storage_delete: 2174 - if (bpf_prog_check_recur(prog)) 2175 - return &bpf_task_storage_delete_recur_proto; 2176 2172 return &bpf_task_storage_delete_proto; 2177 2173 default: 2178 2174 break;