Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

bpf: Switch to bpf_selem_unlink_nofail in bpf_local_storage_{map_free, destroy}

Take care of rqspinlock error in bpf_local_storage_{map_free, destroy}()
properly by switching to bpf_selem_unlink_nofail().

Both functions iterate their own RCU-protected list of selems and call
bpf_selem_unlink_nofail(). In map_free(), to prevent infinite loop when
both map_free() and destroy() fail to remove a selem from b->list
(extremely unlikely), switch to hlist_for_each_entry_rcu(). In destroy(),
also switch to hlist_for_each_entry_rcu() since we no longer iterate
local_storage->list under local_storage->lock.

bpf_selem_unlink() now becomes dedicated to helpers and syscalls paths
so reuse_now should always be false. Remove it from the argument and
hardcode it.

Acked-by: Alexei Starovoitov <ast@kernel.org>
Co-developed-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Amery Hung <ameryhung@gmail.com>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://patch.msgid.link/20260205222916.1788211-12-ameryhung@gmail.com

authored by

Amery Hung and committed by
Martin KaFai Lau
0be08389 5d800f87

+39 -41
+2 -2
include/linux/bpf_local_storage.h
··· 171 171 return SDATA(selem); 172 172 } 173 173 174 - void bpf_local_storage_destroy(struct bpf_local_storage *local_storage); 174 + u32 bpf_local_storage_destroy(struct bpf_local_storage *local_storage); 175 175 176 176 void bpf_local_storage_map_free(struct bpf_map *map, 177 177 struct bpf_local_storage_cache *cache); ··· 184 184 void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, 185 185 struct bpf_local_storage_elem *selem); 186 186 187 - int bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now); 187 + int bpf_selem_unlink(struct bpf_local_storage_elem *selem); 188 188 189 189 int bpf_selem_link_map(struct bpf_local_storage_map *smap, 190 190 struct bpf_local_storage *local_storage,
+1 -1
kernel/bpf/bpf_cgrp_storage.c
··· 89 89 if (!sdata) 90 90 return -ENOENT; 91 91 92 - return bpf_selem_unlink(SELEM(sdata), false); 92 + return bpf_selem_unlink(SELEM(sdata)); 93 93 } 94 94 95 95 static long bpf_cgrp_storage_delete_elem(struct bpf_map *map, void *key)
+1 -1
kernel/bpf/bpf_inode_storage.c
··· 110 110 if (!sdata) 111 111 return -ENOENT; 112 112 113 - return bpf_selem_unlink(SELEM(sdata), false); 113 + return bpf_selem_unlink(SELEM(sdata)); 114 114 } 115 115 116 116 static long bpf_fd_inode_storage_delete_elem(struct bpf_map *map, void *key)
+29 -34
kernel/bpf/bpf_local_storage.c
··· 377 377 hlist_add_head_rcu(&selem->map_node, &b->list); 378 378 } 379 379 380 - int bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now) 380 + /* 381 + * Unlink an selem from map and local storage with lock held. 382 + * This is the common path used by local storages to delete an selem. 383 + */ 384 + int bpf_selem_unlink(struct bpf_local_storage_elem *selem) 381 385 { 382 386 struct bpf_local_storage *local_storage; 383 387 bool free_local_storage = false; ··· 415 411 out: 416 412 raw_res_spin_unlock_irqrestore(&local_storage->lock, flags); 417 413 418 - bpf_selem_free_list(&selem_free_list, reuse_now); 414 + bpf_selem_free_list(&selem_free_list, false); 419 415 420 416 if (free_local_storage) 421 - bpf_local_storage_free(local_storage, reuse_now); 417 + bpf_local_storage_free(local_storage, false); 422 418 423 419 return err; 424 420 } ··· 808 804 return 0; 809 805 } 810 806 811 - void bpf_local_storage_destroy(struct bpf_local_storage *local_storage) 807 + /* 808 + * Destroy local storage when the owner is going away. Caller must uncharge memory 809 + * if memory charging is used. 810 + */ 811 + u32 bpf_local_storage_destroy(struct bpf_local_storage *local_storage) 812 812 { 813 813 struct bpf_local_storage_elem *selem; 814 - bool free_storage = false; 815 - HLIST_HEAD(free_selem_list); 816 - struct hlist_node *n; 817 - unsigned long flags; 818 814 819 815 /* Neither the bpf_prog nor the bpf_map's syscall 820 816 * could be modifying the local_storage->list now. ··· 825 821 * when unlinking elem from the local_storage->list and 826 822 * the map's bucket->list. 827 823 */ 828 - raw_res_spin_lock_irqsave(&local_storage->lock, flags); 829 - hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) { 830 - /* Always unlink from map before unlinking from 831 - * local_storage. 832 - */ 833 - bpf_selem_unlink_map(selem); 834 - /* If local_storage list has only one element, the 835 - * bpf_selem_unlink_storage_nolock() will return true. 836 - * Otherwise, it will return false. The current loop iteration 837 - * intends to remove all local storage. So the last iteration 838 - * of the loop will set the free_cgroup_storage to true. 839 - */ 840 - free_storage = bpf_selem_unlink_storage_nolock( 841 - local_storage, selem, &free_selem_list); 842 - } 843 - raw_res_spin_unlock_irqrestore(&local_storage->lock, flags); 844 - 845 - bpf_selem_free_list(&free_selem_list, true); 846 - 847 - if (free_storage) 848 - bpf_local_storage_free(local_storage, true); 824 + hlist_for_each_entry_rcu(selem, &local_storage->list, snode) 825 + bpf_selem_unlink_nofail(selem, NULL); 849 826 850 827 if (!refcount_dec_and_test(&local_storage->owner_refcnt)) { 851 828 while (refcount_read(&local_storage->owner_refcnt)) 852 829 cpu_relax(); 830 + /* 831 + * Paired with refcount_dec() in bpf_selem_unlink_nofail() 832 + * to make sure destroy() sees the correct local_storage->mem_charge. 833 + */ 834 + smp_mb(); 853 835 } 836 + 837 + return local_storage->mem_charge; 854 838 } 855 839 856 840 u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map) ··· 932 940 933 941 rcu_read_lock(); 934 942 /* No one is adding to b->list now */ 935 - while ((selem = hlist_entry_safe( 936 - rcu_dereference_raw(hlist_first_rcu(&b->list)), 937 - struct bpf_local_storage_elem, map_node))) { 938 - bpf_selem_unlink(selem, true); 939 - cond_resched_rcu(); 943 + restart: 944 + hlist_for_each_entry_rcu(selem, &b->list, map_node) { 945 + bpf_selem_unlink_nofail(selem, b); 946 + 947 + if (need_resched()) { 948 + cond_resched_rcu(); 949 + goto restart; 950 + } 940 951 } 941 952 rcu_read_unlock(); 942 953 }
+1 -1
kernel/bpf/bpf_task_storage.c
··· 134 134 if (!sdata) 135 135 return -ENOENT; 136 136 137 - return bpf_selem_unlink(SELEM(sdata), false); 137 + return bpf_selem_unlink(SELEM(sdata)); 138 138 } 139 139 140 140 static long bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key)
+5 -2
net/core/bpf_sk_storage.c
··· 40 40 if (!sdata) 41 41 return -ENOENT; 42 42 43 - return bpf_selem_unlink(SELEM(sdata), false); 43 + return bpf_selem_unlink(SELEM(sdata)); 44 44 } 45 45 46 46 /* Called by __sk_destruct() & bpf_sk_storage_clone() */ 47 47 void bpf_sk_storage_free(struct sock *sk) 48 48 { 49 49 struct bpf_local_storage *sk_storage; 50 + u32 uncharge; 50 51 51 52 rcu_read_lock_dont_migrate(); 52 53 sk_storage = rcu_dereference(sk->sk_bpf_storage); 53 54 if (!sk_storage) 54 55 goto out; 55 56 56 - bpf_local_storage_destroy(sk_storage); 57 + uncharge = bpf_local_storage_destroy(sk_storage); 58 + if (uncharge) 59 + atomic_sub(uncharge, &sk->sk_omem_alloc); 57 60 out: 58 61 rcu_read_unlock_migrate(); 59 62 }