Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'bpf-fix-torn-writes-in-non-prealloc-htab-with-bpf_f_lock'

Mykyta Yatsenko says:

====================
bpf: Fix torn writes in non-prealloc htab with BPF_F_LOCK

A torn write issue was reported in htab_map_update_elem() with
BPF_F_LOCK on hash maps. The BPF_F_LOCK fast path performs
a lockless lookup and copies the value under the element's embedded
spin_lock. A concurrent delete can free the element via
bpf_mem_cache_free(), which allows immediate reuse. When
alloc_htab_elem() recycles the same memory, it writes the value with
plain copy_map_value() without taking the spin_lock, racing with the
stale lock holder and producing torn writes.

Patch 1 fixes alloc_htab_elem() to use copy_map_value_locked() when
BPF_F_LOCK is set.

Patch 2 adds a selftest that reliably detects the torn writes on an
unpatched kernel.

Reported-by: Aaron Esau <aaron1esau@gmail.com>
Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
====================

Link: https://patch.msgid.link/20260401-bpf_map_torn_writes-v1-0-782d071c55e7@meta.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

+188 -1
+4
kernel/bpf/hashtab.c
··· 1138 1138 } else if (fd_htab_map_needs_adjust(htab)) { 1139 1139 size = round_up(size, 8); 1140 1140 memcpy(htab_elem_value(l_new, key_size), value, size); 1141 + } else if (map_flags & BPF_F_LOCK) { 1142 + copy_map_value_locked(&htab->map, 1143 + htab_elem_value(l_new, key_size), 1144 + value, false); 1141 1145 } else { 1142 1146 copy_map_value(&htab->map, htab_elem_value(l_new, key_size), value); 1143 1147 }
+168 -1
tools/testing/selftests/bpf/prog_tests/htab_reuse.c
··· 59 59 return NULL; 60 60 } 61 61 62 - void test_htab_reuse(void) 62 + static void test_htab_reuse_basic(void) 63 63 { 64 64 unsigned int i, wr_nr = 1, rd_nr = 4; 65 65 pthread_t tids[wr_nr + rd_nr]; ··· 98 98 pthread_join(tids[i], NULL); 99 99 } 100 100 htab_reuse__destroy(skel); 101 + } 102 + 103 + /* 104 + * Writes consistency test for BPF_F_LOCK update 105 + * 106 + * The race: 107 + * 1. Thread A: BPF_F_LOCK|BPF_EXIST update 108 + * 2. Thread B: delete element then update it with BPF_ANY 109 + */ 110 + 111 + struct htab_val_large { 112 + struct bpf_spin_lock lock; 113 + __u32 seq; 114 + __u64 data[256]; 115 + }; 116 + 117 + struct consistency_ctx { 118 + int fd; 119 + int start_fd; 120 + int loop; 121 + volatile bool torn_write; 122 + }; 123 + 124 + static void wait_for_start(int fd) 125 + { 126 + char buf; 127 + 128 + read(fd, &buf, 1); 129 + } 130 + 131 + static void *locked_update_fn(void *arg) 132 + { 133 + struct consistency_ctx *ctx = arg; 134 + struct htab_val_large value; 135 + unsigned int key = 1; 136 + int i; 137 + 138 + memset(&value, 0xAA, sizeof(value)); 139 + wait_for_start(ctx->start_fd); 140 + 141 + for (i = 0; i < ctx->loop; i++) { 142 + value.seq = i; 143 + bpf_map_update_elem(ctx->fd, &key, &value, 144 + BPF_F_LOCK | BPF_EXIST); 145 + } 146 + 147 + return NULL; 148 + } 149 + 150 + /* Delete + update: removes the element then re-creates it with BPF_ANY. */ 151 + static void *delete_update_fn(void *arg) 152 + { 153 + struct consistency_ctx *ctx = arg; 154 + struct htab_val_large value; 155 + unsigned int key = 1; 156 + int i; 157 + 158 + memset(&value, 0xBB, sizeof(value)); 159 + 160 + wait_for_start(ctx->start_fd); 161 + 162 + for (i = 0; i < ctx->loop; i++) { 163 + value.seq = i; 164 + bpf_map_delete_elem(ctx->fd, &key); 165 + bpf_map_update_elem(ctx->fd, &key, &value, BPF_ANY | BPF_F_LOCK); 166 + } 167 + 168 + return NULL; 169 + } 170 + 171 + static void *locked_lookup_fn(void *arg) 172 + { 173 + struct consistency_ctx *ctx = arg; 174 + struct htab_val_large value; 175 + unsigned int key = 1; 176 + int i, j; 177 + 178 + wait_for_start(ctx->start_fd); 179 + 180 + for (i = 0; i < ctx->loop && !ctx->torn_write; i++) { 181 + if (bpf_map_lookup_elem_flags(ctx->fd, &key, &value, BPF_F_LOCK)) 182 + continue; 183 + 184 + for (j = 0; j < 256; j++) { 185 + if (value.data[j] != value.data[0]) { 186 + ctx->torn_write = true; 187 + return NULL; 188 + } 189 + } 190 + } 191 + 192 + return NULL; 193 + } 194 + 195 + static void test_htab_reuse_consistency(void) 196 + { 197 + int threads_total = 6, threads = 2; 198 + pthread_t tids[threads_total]; 199 + struct consistency_ctx ctx; 200 + struct htab_val_large seed; 201 + struct htab_reuse *skel; 202 + unsigned int key = 1, i; 203 + int pipefd[2]; 204 + int err; 205 + 206 + skel = htab_reuse__open_and_load(); 207 + if (!ASSERT_OK_PTR(skel, "htab_reuse__open_and_load")) 208 + return; 209 + 210 + if (!ASSERT_OK(pipe(pipefd), "pipe")) 211 + goto out; 212 + 213 + ctx.fd = bpf_map__fd(skel->maps.htab_lock_consistency); 214 + ctx.start_fd = pipefd[0]; 215 + ctx.loop = 100000; 216 + ctx.torn_write = false; 217 + 218 + /* Seed the element so locked updaters have something to find */ 219 + memset(&seed, 0xBB, sizeof(seed)); 220 + err = bpf_map_update_elem(ctx.fd, &key, &seed, BPF_ANY); 221 + if (!ASSERT_OK(err, "seed_element")) 222 + goto close_pipe; 223 + 224 + memset(tids, 0, sizeof(tids)); 225 + for (i = 0; i < threads; i++) { 226 + err = pthread_create(&tids[i], NULL, locked_update_fn, &ctx); 227 + if (!ASSERT_OK(err, "pthread_create")) 228 + goto stop; 229 + } 230 + for (i = 0; i < threads; i++) { 231 + err = pthread_create(&tids[threads + i], NULL, delete_update_fn, &ctx); 232 + if (!ASSERT_OK(err, "pthread_create")) 233 + goto stop; 234 + } 235 + for (i = 0; i < threads; i++) { 236 + err = pthread_create(&tids[threads * 2 + i], NULL, locked_lookup_fn, &ctx); 237 + if (!ASSERT_OK(err, "pthread_create")) 238 + goto stop; 239 + } 240 + 241 + /* Release all threads simultaneously */ 242 + close(pipefd[1]); 243 + pipefd[1] = -1; 244 + 245 + stop: 246 + for (i = 0; i < threads_total; i++) { 247 + if (!tids[i]) 248 + continue; 249 + pthread_join(tids[i], NULL); 250 + } 251 + 252 + ASSERT_FALSE(ctx.torn_write, "no torn writes detected"); 253 + 254 + close_pipe: 255 + if (pipefd[1] >= 0) 256 + close(pipefd[1]); 257 + close(pipefd[0]); 258 + out: 259 + htab_reuse__destroy(skel); 260 + } 261 + 262 + void test_htab_reuse(void) 263 + { 264 + if (test__start_subtest("basic")) 265 + test_htab_reuse_basic(); 266 + if (test__start_subtest("consistency")) 267 + test_htab_reuse_consistency(); 101 268 }
+16
tools/testing/selftests/bpf/progs/htab_reuse.c
··· 17 17 __type(value, struct htab_val); 18 18 __uint(map_flags, BPF_F_NO_PREALLOC); 19 19 } htab SEC(".maps"); 20 + 21 + #define HTAB_NDATA 256 22 + 23 + struct htab_val_large { 24 + struct bpf_spin_lock lock; 25 + __u32 seq; 26 + __u64 data[HTAB_NDATA]; 27 + }; 28 + 29 + struct { 30 + __uint(type, BPF_MAP_TYPE_HASH); 31 + __uint(max_entries, 8); 32 + __type(key, unsigned int); 33 + __type(value, struct htab_val_large); 34 + __uint(map_flags, BPF_F_NO_PREALLOC); 35 + } htab_lock_consistency SEC(".maps");