Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'Sleepable local storage'

KP Singh says:

====================

Local storage is currently unusable in sleepable helpers. One of the
important use cases of local_storage is to attach security (or
performance) contextual information to kernel objects in LSM / tracing
programs to be used later in the life-cyle of the object.

Sometimes this context can only be gathered from sleepable programs
(because it needs accesing __user pointers or helpers like
bpf_ima_inode_hash). Allowing local storage to be used from sleepable
programs allows such context to be managed with the benefits of
local_storage.

# v2 -> v3

* Fixed some RCU issues pointed by Martin
* Added Martin's ack

# v1 -> v2

* Generalize RCU checks (will send a separate patch for updating
non local storage code where this can be used).
* Add missing RCU lock checks from v1
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>

+73 -49
+5
include/linux/bpf_local_storage.h
··· 17 17 18 18 #define BPF_LOCAL_STORAGE_CACHE_SIZE 16 19 19 20 + #define bpf_rcu_lock_held() \ 21 + (rcu_read_lock_held() || rcu_read_lock_trace_held() || \ 22 + rcu_read_lock_bh_held()) 20 23 struct bpf_local_storage_map_bucket { 21 24 struct hlist_head list; 22 25 raw_spinlock_t lock; ··· 164 161 struct bpf_local_storage_data * 165 162 bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, 166 163 void *value, u64 map_flags); 164 + 165 + void bpf_local_storage_free_rcu(struct rcu_head *rcu); 167 166 168 167 #endif /* _BPF_LOCAL_STORAGE_H */
+5 -1
kernel/bpf/bpf_inode_storage.c
··· 17 17 #include <linux/bpf_lsm.h> 18 18 #include <linux/btf_ids.h> 19 19 #include <linux/fdtable.h> 20 + #include <linux/rcupdate_trace.h> 20 21 21 22 DEFINE_BPF_STORAGE_CACHE(inode_cache); 22 23 ··· 45 44 if (!bsb) 46 45 return NULL; 47 46 48 - inode_storage = rcu_dereference(bsb->storage); 47 + inode_storage = 48 + rcu_dereference_check(bsb->storage, bpf_rcu_lock_held()); 49 49 if (!inode_storage) 50 50 return NULL; 51 51 ··· 174 172 { 175 173 struct bpf_local_storage_data *sdata; 176 174 175 + WARN_ON_ONCE(!bpf_rcu_lock_held()); 177 176 if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE)) 178 177 return (unsigned long)NULL; 179 178 ··· 207 204 BPF_CALL_2(bpf_inode_storage_delete, 208 205 struct bpf_map *, map, struct inode *, inode) 209 206 { 207 + WARN_ON_ONCE(!bpf_rcu_lock_held()); 210 208 if (!inode) 211 209 return -EINVAL; 212 210
+37 -13
kernel/bpf/bpf_local_storage.c
··· 11 11 #include <net/sock.h> 12 12 #include <uapi/linux/sock_diag.h> 13 13 #include <uapi/linux/btf.h> 14 + #include <linux/rcupdate.h> 15 + #include <linux/rcupdate_trace.h> 16 + #include <linux/rcupdate_wait.h> 14 17 15 18 #define BPF_LOCAL_STORAGE_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_CLONE) 16 19 ··· 84 81 return NULL; 85 82 } 86 83 84 + void bpf_local_storage_free_rcu(struct rcu_head *rcu) 85 + { 86 + struct bpf_local_storage *local_storage; 87 + 88 + local_storage = container_of(rcu, struct bpf_local_storage, rcu); 89 + kfree_rcu(local_storage, rcu); 90 + } 91 + 92 + static void bpf_selem_free_rcu(struct rcu_head *rcu) 93 + { 94 + struct bpf_local_storage_elem *selem; 95 + 96 + selem = container_of(rcu, struct bpf_local_storage_elem, rcu); 97 + kfree_rcu(selem, rcu); 98 + } 99 + 87 100 /* local_storage->lock must be held and selem->local_storage == local_storage. 88 101 * The caller must ensure selem->smap is still valid to be 89 102 * dereferenced for its smap->elem_size and smap->cache_idx. ··· 112 93 bool free_local_storage; 113 94 void *owner; 114 95 115 - smap = rcu_dereference(SDATA(selem)->smap); 96 + smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); 116 97 owner = local_storage->owner; 117 98 118 99 /* All uncharging on the owner must be done first. ··· 137 118 * 138 119 * Although the unlock will be done under 139 120 * rcu_read_lock(), it is more intutivie to 140 - * read if kfree_rcu(local_storage, rcu) is done 121 + * read if the freeing of the storage is done 141 122 * after the raw_spin_unlock_bh(&local_storage->lock). 142 123 * 143 124 * Hence, a "bool free_local_storage" is returned 144 - * to the caller which then calls the kfree_rcu() 145 - * after unlock. 125 + * to the caller which then calls then frees the storage after 126 + * all the RCU grace periods have expired. 146 127 */ 147 128 } 148 129 hlist_del_init_rcu(&selem->snode); ··· 150 131 SDATA(selem)) 151 132 RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL); 152 133 153 - kfree_rcu(selem, rcu); 154 - 134 + call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_rcu); 155 135 return free_local_storage; 156 136 } 157 137 ··· 164 146 /* selem has already been unlinked from sk */ 165 147 return; 166 148 167 - local_storage = rcu_dereference(selem->local_storage); 149 + local_storage = rcu_dereference_check(selem->local_storage, 150 + bpf_rcu_lock_held()); 168 151 raw_spin_lock_irqsave(&local_storage->lock, flags); 169 152 if (likely(selem_linked_to_storage(selem))) 170 153 free_local_storage = bpf_selem_unlink_storage_nolock( ··· 173 154 raw_spin_unlock_irqrestore(&local_storage->lock, flags); 174 155 175 156 if (free_local_storage) 176 - kfree_rcu(local_storage, rcu); 157 + call_rcu_tasks_trace(&local_storage->rcu, 158 + bpf_local_storage_free_rcu); 177 159 } 178 160 179 161 void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, ··· 194 174 /* selem has already be unlinked from smap */ 195 175 return; 196 176 197 - smap = rcu_dereference(SDATA(selem)->smap); 177 + smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); 198 178 b = select_bucket(smap, selem); 199 179 raw_spin_lock_irqsave(&b->lock, flags); 200 180 if (likely(selem_linked_to_map(selem))) ··· 233 213 struct bpf_local_storage_elem *selem; 234 214 235 215 /* Fast path (cache hit) */ 236 - sdata = rcu_dereference(local_storage->cache[smap->cache_idx]); 216 + sdata = rcu_dereference_check(local_storage->cache[smap->cache_idx], 217 + bpf_rcu_lock_held()); 237 218 if (sdata && rcu_access_pointer(sdata->smap) == smap) 238 219 return sdata; 239 220 240 221 /* Slow path (cache miss) */ 241 - hlist_for_each_entry_rcu(selem, &local_storage->list, snode) 222 + hlist_for_each_entry_rcu(selem, &local_storage->list, snode, 223 + rcu_read_lock_trace_held()) 242 224 if (rcu_access_pointer(SDATA(selem)->smap) == smap) 243 225 break; 244 226 ··· 328 306 * bucket->list, first_selem can be freed immediately 329 307 * (instead of kfree_rcu) because 330 308 * bpf_local_storage_map_free() does a 331 - * synchronize_rcu() before walking the bucket->list. 309 + * synchronize_rcu_mult (waiting for both sleepable and 310 + * normal programs) before walking the bucket->list. 332 311 * Hence, no one is accessing selem from the 333 312 * bucket->list under rcu_read_lock(). 334 313 */ ··· 365 342 !map_value_has_spin_lock(&smap->map))) 366 343 return ERR_PTR(-EINVAL); 367 344 368 - local_storage = rcu_dereference(*owner_storage(smap, owner)); 345 + local_storage = rcu_dereference_check(*owner_storage(smap, owner), 346 + bpf_rcu_lock_held()); 369 347 if (!local_storage || hlist_empty(&local_storage->list)) { 370 348 /* Very first elem for the owner */ 371 349 err = check_flags(NULL, map_flags);
+5 -1
kernel/bpf/bpf_task_storage.c
··· 17 17 #include <uapi/linux/btf.h> 18 18 #include <linux/btf_ids.h> 19 19 #include <linux/fdtable.h> 20 + #include <linux/rcupdate_trace.h> 20 21 21 22 DEFINE_BPF_STORAGE_CACHE(task_cache); 22 23 ··· 60 59 struct bpf_local_storage *task_storage; 61 60 struct bpf_local_storage_map *smap; 62 61 63 - task_storage = rcu_dereference(task->bpf_storage); 62 + task_storage = 63 + rcu_dereference_check(task->bpf_storage, bpf_rcu_lock_held()); 64 64 if (!task_storage) 65 65 return NULL; 66 66 ··· 231 229 { 232 230 struct bpf_local_storage_data *sdata; 233 231 232 + WARN_ON_ONCE(!bpf_rcu_lock_held()); 234 233 if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE)) 235 234 return (unsigned long)NULL; 236 235 ··· 263 260 { 264 261 int ret; 265 262 263 + WARN_ON_ONCE(!bpf_rcu_lock_held()); 266 264 if (!task) 267 265 return -EINVAL; 268 266
+3
kernel/bpf/verifier.c
··· 11874 11874 } 11875 11875 break; 11876 11876 case BPF_MAP_TYPE_RINGBUF: 11877 + case BPF_MAP_TYPE_INODE_STORAGE: 11878 + case BPF_MAP_TYPE_SK_STORAGE: 11879 + case BPF_MAP_TYPE_TASK_STORAGE: 11877 11880 break; 11878 11881 default: 11879 11882 verbose(env,
+7 -1
net/core/bpf_sk_storage.c
··· 13 13 #include <net/sock.h> 14 14 #include <uapi/linux/sock_diag.h> 15 15 #include <uapi/linux/btf.h> 16 + #include <linux/rcupdate_trace.h> 16 17 17 18 DEFINE_BPF_STORAGE_CACHE(sk_cache); 18 19 ··· 23 22 struct bpf_local_storage *sk_storage; 24 23 struct bpf_local_storage_map *smap; 25 24 26 - sk_storage = rcu_dereference(sk->sk_bpf_storage); 25 + sk_storage = 26 + rcu_dereference_check(sk->sk_bpf_storage, bpf_rcu_lock_held()); 27 27 if (!sk_storage) 28 28 return NULL; 29 29 ··· 260 258 { 261 259 struct bpf_local_storage_data *sdata; 262 260 261 + WARN_ON_ONCE(!bpf_rcu_lock_held()); 263 262 if (!sk || !sk_fullsock(sk) || flags > BPF_SK_STORAGE_GET_F_CREATE) 264 263 return (unsigned long)NULL; 265 264 ··· 291 288 292 289 BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk) 293 290 { 291 + WARN_ON_ONCE(!bpf_rcu_lock_held()); 294 292 if (!sk || !sk_fullsock(sk)) 295 293 return -EINVAL; 296 294 ··· 420 416 BPF_CALL_4(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk, 421 417 void *, value, u64, flags) 422 418 { 419 + WARN_ON_ONCE(!bpf_rcu_lock_held()); 423 420 if (in_hardirq() || in_nmi()) 424 421 return (unsigned long)NULL; 425 422 ··· 430 425 BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map, 431 426 struct sock *, sk) 432 427 { 428 + WARN_ON_ONCE(!bpf_rcu_lock_held()); 433 429 if (in_hardirq() || in_nmi()) 434 430 return -EPERM; 435 431
+6 -14
tools/testing/selftests/bpf/prog_tests/test_local_storage.c
··· 28 28 struct storage { 29 29 void *inode; 30 30 unsigned int value; 31 - /* Lock ensures that spin locked versions of local stoage operations 32 - * also work, most operations in this tests are still single threaded 33 - */ 34 - struct bpf_spin_lock lock; 35 31 }; 36 32 37 33 /* Fork and exec the provided rm binary and return the exit code of the ··· 62 66 63 67 static bool check_syscall_operations(int map_fd, int obj_fd) 64 68 { 65 - struct storage val = { .value = TEST_STORAGE_VALUE, .lock = { 0 } }, 66 - lookup_val = { .value = 0, .lock = { 0 } }; 69 + struct storage val = { .value = TEST_STORAGE_VALUE }, 70 + lookup_val = { .value = 0 }; 67 71 int err; 68 72 69 73 /* Looking up an existing element should fail initially */ 70 - err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 71 - BPF_F_LOCK); 74 + err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0); 72 75 if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem", 73 76 "err:%d errno:%d\n", err, errno)) 74 77 return false; 75 78 76 79 /* Create a new element */ 77 - err = bpf_map_update_elem(map_fd, &obj_fd, &val, 78 - BPF_NOEXIST | BPF_F_LOCK); 80 + err = bpf_map_update_elem(map_fd, &obj_fd, &val, BPF_NOEXIST); 79 81 if (CHECK(err < 0, "bpf_map_update_elem", "err:%d errno:%d\n", err, 80 82 errno)) 81 83 return false; 82 84 83 85 /* Lookup the newly created element */ 84 - err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 85 - BPF_F_LOCK); 86 + err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0); 86 87 if (CHECK(err < 0, "bpf_map_lookup_elem", "err:%d errno:%d", err, 87 88 errno)) 88 89 return false; ··· 95 102 return false; 96 103 97 104 /* The lookup should fail, now that the element has been deleted */ 98 - err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 99 - BPF_F_LOCK); 105 + err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0); 100 106 if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem", 101 107 "err:%d errno:%d\n", err, errno)) 102 108 return false;
+5 -19
tools/testing/selftests/bpf/progs/local_storage.c
··· 20 20 struct local_storage { 21 21 struct inode *exec_inode; 22 22 __u32 value; 23 - struct bpf_spin_lock lock; 24 23 }; 25 24 26 25 struct { ··· 57 58 bpf_get_current_task_btf(), 0, 0); 58 59 if (storage) { 59 60 /* Don't let an executable delete itself */ 60 - bpf_spin_lock(&storage->lock); 61 61 is_self_unlink = storage->exec_inode == victim->d_inode; 62 - bpf_spin_unlock(&storage->lock); 63 62 if (is_self_unlink) 64 63 return -EPERM; 65 64 } ··· 65 68 return 0; 66 69 } 67 70 68 - SEC("lsm/inode_rename") 71 + SEC("lsm.s/inode_rename") 69 72 int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry, 70 73 struct inode *new_dir, struct dentry *new_dentry, 71 74 unsigned int flags) ··· 86 89 if (!storage) 87 90 return 0; 88 91 89 - bpf_spin_lock(&storage->lock); 90 92 if (storage->value != DUMMY_STORAGE_VALUE) 91 93 inode_storage_result = -1; 92 - bpf_spin_unlock(&storage->lock); 93 94 94 95 err = bpf_inode_storage_delete(&inode_storage_map, old_dentry->d_inode); 95 96 if (!err) ··· 96 101 return 0; 97 102 } 98 103 99 - SEC("lsm/socket_bind") 104 + SEC("lsm.s/socket_bind") 100 105 int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address, 101 106 int addrlen) 102 107 { ··· 112 117 if (!storage) 113 118 return 0; 114 119 115 - bpf_spin_lock(&storage->lock); 116 120 if (storage->value != DUMMY_STORAGE_VALUE) 117 121 sk_storage_result = -1; 118 - bpf_spin_unlock(&storage->lock); 119 122 120 123 err = bpf_sk_storage_delete(&sk_storage_map, sock->sk); 121 124 if (!err) ··· 122 129 return 0; 123 130 } 124 131 125 - SEC("lsm/socket_post_create") 132 + SEC("lsm.s/socket_post_create") 126 133 int BPF_PROG(socket_post_create, struct socket *sock, int family, int type, 127 134 int protocol, int kern) 128 135 { ··· 137 144 if (!storage) 138 145 return 0; 139 146 140 - bpf_spin_lock(&storage->lock); 141 147 storage->value = DUMMY_STORAGE_VALUE; 142 - bpf_spin_unlock(&storage->lock); 143 148 144 149 return 0; 145 150 } ··· 145 154 /* This uses the local storage to remember the inode of the binary that a 146 155 * process was originally executing. 147 156 */ 148 - SEC("lsm/bprm_committed_creds") 157 + SEC("lsm.s/bprm_committed_creds") 149 158 void BPF_PROG(exec, struct linux_binprm *bprm) 150 159 { 151 160 __u32 pid = bpf_get_current_pid_tgid() >> 32; ··· 157 166 storage = bpf_task_storage_get(&task_storage_map, 158 167 bpf_get_current_task_btf(), 0, 159 168 BPF_LOCAL_STORAGE_GET_F_CREATE); 160 - if (storage) { 161 - bpf_spin_lock(&storage->lock); 169 + if (storage) 162 170 storage->exec_inode = bprm->file->f_inode; 163 - bpf_spin_unlock(&storage->lock); 164 - } 165 171 166 172 storage = bpf_inode_storage_get(&inode_storage_map, bprm->file->f_inode, 167 173 0, BPF_LOCAL_STORAGE_GET_F_CREATE); 168 174 if (!storage) 169 175 return; 170 176 171 - bpf_spin_lock(&storage->lock); 172 177 storage->value = DUMMY_STORAGE_VALUE; 173 - bpf_spin_unlock(&storage->lock); 174 178 }