Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'selftests/bpf: Fixes for map_percpu_stats test'

Hou Tao says:

====================
From: Hou Tao <houtao1@huawei.com>

Hi,

BPF CI failed due to map_percpu_stats_percpu_hash from time to time [1].
It seems that the failure reason is per-cpu bpf memory allocator may not
be able to allocate per-cpu pointer successfully and it can not refill
free llist timely, and bpf_map_update_elem() will return -ENOMEM.

Patch #1 fixes the size of value passed to per-cpu map update API. The
problem was found when fixing the ENOMEM problem, so also post it in
this patchset. Patch #2 & #3 mitigates the ENOMEM problem by retrying
the update operation for non-preallocated per-cpu map.

Please see individual patches for more details. And comments are always
welcome.

Regards,
Tao

[1]: https://github.com/kernel-patches/bpf/actions/runs/6713177520/job/18244865326?pr=5909
====================

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

authored by

Andrii Nakryiko and committed by
Alexei Starovoitov
e3499962 cd9c1270

+53 -8
+36 -3
tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
··· 131 131 map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; 132 132 } 133 133 134 + static bool is_percpu(__u32 map_type) 135 + { 136 + return map_type == BPF_MAP_TYPE_PERCPU_HASH || 137 + map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; 138 + } 139 + 134 140 struct upsert_opts { 135 141 __u32 map_type; 136 142 int map_fd; 137 143 __u32 n; 144 + bool retry_for_nomem; 138 145 }; 139 146 140 147 static int create_small_hash(void) ··· 155 148 return map_fd; 156 149 } 157 150 151 + static bool retry_for_nomem_fn(int err) 152 + { 153 + return err == ENOMEM; 154 + } 155 + 158 156 static void *patch_map_thread(void *arg) 159 157 { 158 + /* 8KB is enough for 1024 CPUs. And it is shared between N_THREADS. */ 159 + static __u8 blob[8 << 10]; 160 160 struct upsert_opts *opts = arg; 161 + void *val_ptr; 161 162 int val; 162 163 int ret; 163 164 int i; 164 165 165 166 for (i = 0; i < opts->n; i++) { 166 - if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) 167 + if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 167 168 val = create_small_hash(); 168 - else 169 + val_ptr = &val; 170 + } else if (is_percpu(opts->map_type)) { 171 + val_ptr = blob; 172 + } else { 169 173 val = rand(); 170 - ret = bpf_map_update_elem(opts->map_fd, &i, &val, 0); 174 + val_ptr = &val; 175 + } 176 + 177 + /* 2 seconds may be enough ? */ 178 + if (opts->retry_for_nomem) 179 + ret = map_update_retriable(opts->map_fd, &i, val_ptr, 0, 180 + 40, retry_for_nomem_fn); 181 + else 182 + ret = bpf_map_update_elem(opts->map_fd, &i, val_ptr, 0); 171 183 CHECK(ret < 0, "bpf_map_update_elem", "key=%d error: %s\n", i, strerror(errno)); 172 184 173 185 if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) ··· 306 280 opts.n -= 512; 307 281 else 308 282 opts.n /= 2; 283 + 284 + /* per-cpu bpf memory allocator may not be able to allocate per-cpu 285 + * pointer successfully and it can not refill free llist timely, and 286 + * bpf_map_update_elem() will return -ENOMEM. so just retry to mitigate 287 + * the problem temporarily. 288 + */ 289 + opts.retry_for_nomem = is_percpu(opts.map_type) && (info.map_flags & BPF_F_NO_PREALLOC); 309 290 310 291 /* 311 292 * Upsert keys [0, n) under some competition: with random values from
+12 -5
tools/testing/selftests/bpf/test_maps.c
··· 1396 1396 #define MAX_DELAY_US 50000 1397 1397 #define MIN_DELAY_RANGE_US 5000 1398 1398 1399 - static int map_update_retriable(int map_fd, const void *key, const void *value, 1400 - int flags, int attempts) 1399 + static bool retry_for_again_or_busy(int err) 1400 + { 1401 + return (err == EAGAIN || err == EBUSY); 1402 + } 1403 + 1404 + int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts, 1405 + retry_for_error_fn need_retry) 1401 1406 { 1402 1407 int delay = rand() % MIN_DELAY_RANGE_US; 1403 1408 1404 1409 while (bpf_map_update_elem(map_fd, key, value, flags)) { 1405 - if (!attempts || (errno != EAGAIN && errno != EBUSY)) 1410 + if (!attempts || !need_retry(errno)) 1406 1411 return -errno; 1407 1412 1408 1413 if (delay <= MAX_DELAY_US / 2) ··· 1450 1445 key = value = i; 1451 1446 1452 1447 if (do_update) { 1453 - err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES); 1448 + err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES, 1449 + retry_for_again_or_busy); 1454 1450 if (err) 1455 1451 printf("error %d %d\n", err, errno); 1456 1452 assert(err == 0); 1457 - err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES); 1453 + err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES, 1454 + retry_for_again_or_busy); 1458 1455 if (err) 1459 1456 printf("error %d %d\n", err, errno); 1460 1457 assert(err == 0);
+5
tools/testing/selftests/bpf/test_maps.h
··· 4 4 5 5 #include <stdio.h> 6 6 #include <stdlib.h> 7 + #include <stdbool.h> 7 8 8 9 #define CHECK(condition, tag, format...) ({ \ 9 10 int __ret = !!(condition); \ ··· 16 15 }) 17 16 18 17 extern int skips; 18 + 19 + typedef bool (*retry_for_error_fn)(int err); 20 + int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts, 21 + retry_for_error_fn need_retry); 19 22 20 23 #endif