Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

bpf: Add BPF_F_CPU and BPF_F_ALL_CPUS flags support for percpu_hash and lru_percpu_hash maps

Introduce BPF_F_ALL_CPUS flag support for percpu_hash and lru_percpu_hash
maps to allow updating values for all CPUs with a single value for both
update_elem and update_batch APIs.

Introduce BPF_F_CPU flag support for percpu_hash and lru_percpu_hash
maps to allow:

* update value for specified CPU for both update_elem and update_batch
APIs.
* lookup value for specified CPU for both lookup_elem and lookup_batch
APIs.

The BPF_F_CPU flag is passed via:

* map_flags along with embedded cpu info.
* elem_flags along with embedded cpu info.

Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
Link: https://lore.kernel.org/r/20260107022022.12843-4-leon.hwang@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

authored by

Leon Hwang and committed by
Alexei Starovoitov
c6936161 8eb76cb0

+68 -32
+3 -1
include/linux/bpf.h
··· 2847 2847 struct bpf_func_state *caller, 2848 2848 struct bpf_func_state *callee); 2849 2849 2850 - int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value); 2850 + int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value, u64 flags); 2851 2851 int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value, u64 flags); 2852 2852 int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, 2853 2853 u64 flags); ··· 3919 3919 { 3920 3920 switch (map_type) { 3921 3921 case BPF_MAP_TYPE_PERCPU_ARRAY: 3922 + case BPF_MAP_TYPE_PERCPU_HASH: 3923 + case BPF_MAP_TYPE_LRU_PERCPU_HASH: 3922 3924 return true; 3923 3925 default: 3924 3926 return false;
+64 -30
kernel/bpf/hashtab.c
··· 932 932 } 933 933 934 934 static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr, 935 - void *value, bool onallcpus) 935 + void *value, bool onallcpus, u64 map_flags) 936 936 { 937 937 void *ptr; 938 938 ··· 943 943 bpf_obj_free_fields(htab->map.record, ptr); 944 944 } else { 945 945 u32 size = round_up(htab->map.value_size, 8); 946 - int off = 0, cpu; 946 + void *val; 947 + int cpu; 948 + 949 + if (map_flags & BPF_F_CPU) { 950 + cpu = map_flags >> 32; 951 + ptr = per_cpu_ptr(pptr, cpu); 952 + copy_map_value(&htab->map, ptr, value); 953 + bpf_obj_free_fields(htab->map.record, ptr); 954 + return; 955 + } 947 956 948 957 for_each_possible_cpu(cpu) { 949 958 ptr = per_cpu_ptr(pptr, cpu); 950 - copy_map_value_long(&htab->map, ptr, value + off); 959 + val = (map_flags & BPF_F_ALL_CPUS) ? value : value + size * cpu; 960 + copy_map_value(&htab->map, ptr, val); 951 961 bpf_obj_free_fields(htab->map.record, ptr); 952 - off += size; 953 962 } 954 963 } 955 964 } 956 965 957 966 static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr, 958 - void *value, bool onallcpus) 967 + void *value, bool onallcpus, u64 map_flags) 959 968 { 960 969 /* When not setting the initial value on all cpus, zero-fill element 961 970 * values for other cpus. Otherwise, bpf program has no way to ensure ··· 982 973 zero_map_value(&htab->map, per_cpu_ptr(pptr, cpu)); 983 974 } 984 975 } else { 985 - pcpu_copy_value(htab, pptr, value, onallcpus); 976 + pcpu_copy_value(htab, pptr, value, onallcpus, map_flags); 986 977 } 987 978 } 988 979 ··· 994 985 static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, 995 986 void *value, u32 key_size, u32 hash, 996 987 bool percpu, bool onallcpus, 997 - struct htab_elem *old_elem) 988 + struct htab_elem *old_elem, u64 map_flags) 998 989 { 999 990 u32 size = htab->map.value_size; 1000 991 bool prealloc = htab_is_prealloc(htab); ··· 1052 1043 pptr = *(void __percpu **)ptr; 1053 1044 } 1054 1045 1055 - pcpu_init_value(htab, pptr, value, onallcpus); 1046 + pcpu_init_value(htab, pptr, value, onallcpus, map_flags); 1056 1047 1057 1048 if (!prealloc) 1058 1049 htab_elem_set_ptr(l_new, key_size, pptr); ··· 1156 1147 } 1157 1148 1158 1149 l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false, 1159 - l_old); 1150 + l_old, map_flags); 1160 1151 if (IS_ERR(l_new)) { 1161 1152 /* all pre-allocated elements are in use or memory exhausted */ 1162 1153 ret = PTR_ERR(l_new); ··· 1258 1249 return ret; 1259 1250 } 1260 1251 1252 + static int htab_map_check_update_flags(bool onallcpus, u64 map_flags) 1253 + { 1254 + if (unlikely(!onallcpus && map_flags > BPF_EXIST)) 1255 + return -EINVAL; 1256 + if (unlikely(onallcpus && ((map_flags & BPF_F_LOCK) || (u32)map_flags > BPF_F_ALL_CPUS))) 1257 + return -EINVAL; 1258 + return 0; 1259 + } 1260 + 1261 1261 static long htab_map_update_elem_in_place(struct bpf_map *map, void *key, 1262 1262 void *value, u64 map_flags, 1263 1263 bool percpu, bool onallcpus) ··· 1280 1262 u32 key_size, hash; 1281 1263 int ret; 1282 1264 1283 - if (unlikely(map_flags > BPF_EXIST)) 1284 - /* unknown flags */ 1285 - return -EINVAL; 1265 + ret = htab_map_check_update_flags(onallcpus, map_flags); 1266 + if (unlikely(ret)) 1267 + return ret; 1286 1268 1287 1269 WARN_ON_ONCE(!bpf_rcu_lock_held()); 1288 1270 ··· 1307 1289 /* Update value in-place */ 1308 1290 if (percpu) { 1309 1291 pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size), 1310 - value, onallcpus); 1292 + value, onallcpus, map_flags); 1311 1293 } else { 1312 1294 void **inner_map_pptr = htab_elem_value(l_old, key_size); 1313 1295 ··· 1316 1298 } 1317 1299 } else { 1318 1300 l_new = alloc_htab_elem(htab, key, value, key_size, 1319 - hash, percpu, onallcpus, NULL); 1301 + hash, percpu, onallcpus, NULL, map_flags); 1320 1302 if (IS_ERR(l_new)) { 1321 1303 ret = PTR_ERR(l_new); 1322 1304 goto err; ··· 1342 1324 u32 key_size, hash; 1343 1325 int ret; 1344 1326 1345 - if (unlikely(map_flags > BPF_EXIST)) 1346 - /* unknown flags */ 1347 - return -EINVAL; 1327 + ret = htab_map_check_update_flags(onallcpus, map_flags); 1328 + if (unlikely(ret)) 1329 + return ret; 1348 1330 1349 1331 WARN_ON_ONCE(!bpf_rcu_lock_held()); 1350 1332 ··· 1381 1363 1382 1364 /* per-cpu hash map can update value in-place */ 1383 1365 pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size), 1384 - value, onallcpus); 1366 + value, onallcpus, map_flags); 1385 1367 } else { 1386 1368 pcpu_init_value(htab, htab_elem_get_ptr(l_new, key_size), 1387 - value, onallcpus); 1369 + value, onallcpus, map_flags); 1388 1370 hlist_nulls_add_head_rcu(&l_new->hash_node, head); 1389 1371 l_new = NULL; 1390 1372 } ··· 1696 1678 void __user *ukeys = u64_to_user_ptr(attr->batch.keys); 1697 1679 void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch); 1698 1680 u32 batch, max_count, size, bucket_size, map_id; 1681 + u64 elem_map_flags, map_flags, allowed_flags; 1699 1682 u32 bucket_cnt, total, key_size, value_size; 1700 1683 struct htab_elem *node_to_free = NULL; 1701 - u64 elem_map_flags, map_flags; 1702 1684 struct hlist_nulls_head *head; 1703 1685 struct hlist_nulls_node *n; 1704 1686 unsigned long flags = 0; ··· 1708 1690 int ret = 0; 1709 1691 1710 1692 elem_map_flags = attr->batch.elem_flags; 1711 - if ((elem_map_flags & ~BPF_F_LOCK) || 1712 - ((elem_map_flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK))) 1713 - return -EINVAL; 1693 + allowed_flags = BPF_F_LOCK; 1694 + if (!do_delete && is_percpu) 1695 + allowed_flags |= BPF_F_CPU; 1696 + ret = bpf_map_check_op_flags(map, elem_map_flags, allowed_flags); 1697 + if (ret) 1698 + return ret; 1714 1699 1715 1700 map_flags = attr->batch.flags; 1716 1701 if (map_flags) ··· 1736 1715 key_size = htab->map.key_size; 1737 1716 value_size = htab->map.value_size; 1738 1717 size = round_up(value_size, 8); 1739 - if (is_percpu) 1718 + if (is_percpu && !(elem_map_flags & BPF_F_CPU)) 1740 1719 value_size = size * num_possible_cpus(); 1741 1720 total = 0; 1742 1721 /* while experimenting with hash tables with sizes ranging from 10 to ··· 1819 1798 void __percpu *pptr; 1820 1799 1821 1800 pptr = htab_elem_get_ptr(l, map->key_size); 1822 - for_each_possible_cpu(cpu) { 1823 - copy_map_value_long(&htab->map, dst_val + off, per_cpu_ptr(pptr, cpu)); 1824 - check_and_init_map_value(&htab->map, dst_val + off); 1825 - off += size; 1801 + if (elem_map_flags & BPF_F_CPU) { 1802 + cpu = elem_map_flags >> 32; 1803 + copy_map_value(&htab->map, dst_val, per_cpu_ptr(pptr, cpu)); 1804 + check_and_init_map_value(&htab->map, dst_val); 1805 + } else { 1806 + for_each_possible_cpu(cpu) { 1807 + copy_map_value_long(&htab->map, dst_val + off, 1808 + per_cpu_ptr(pptr, cpu)); 1809 + check_and_init_map_value(&htab->map, dst_val + off); 1810 + off += size; 1811 + } 1826 1812 } 1827 1813 } else { 1828 1814 value = htab_elem_value(l, key_size); ··· 2385 2357 return NULL; 2386 2358 } 2387 2359 2388 - int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value) 2360 + int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value, u64 map_flags) 2389 2361 { 2390 2362 struct htab_elem *l; 2391 2363 void __percpu *pptr; ··· 2402 2374 l = __htab_map_lookup_elem(map, key); 2403 2375 if (!l) 2404 2376 goto out; 2377 + ret = 0; 2405 2378 /* We do not mark LRU map element here in order to not mess up 2406 2379 * eviction heuristics when user space does a map walk. 2407 2380 */ 2408 2381 pptr = htab_elem_get_ptr(l, map->key_size); 2382 + if (map_flags & BPF_F_CPU) { 2383 + cpu = map_flags >> 32; 2384 + copy_map_value(map, value, per_cpu_ptr(pptr, cpu)); 2385 + check_and_init_map_value(map, value); 2386 + goto out; 2387 + } 2409 2388 for_each_possible_cpu(cpu) { 2410 2389 copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu)); 2411 2390 check_and_init_map_value(map, value + off); 2412 2391 off += size; 2413 2392 } 2414 - ret = 0; 2415 2393 out: 2416 2394 rcu_read_unlock(); 2417 2395 return ret;
+1 -1
kernel/bpf/syscall.c
··· 316 316 bpf_disable_instrumentation(); 317 317 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 318 318 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 319 - err = bpf_percpu_hash_copy(map, key, value); 319 + err = bpf_percpu_hash_copy(map, key, value, flags); 320 320 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 321 321 err = bpf_percpu_array_copy(map, key, value, flags); 322 322 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {