Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

landlock: Multithreading support for landlock_restrict_self()

Introduce the LANDLOCK_RESTRICT_SELF_TSYNC flag. With this flag, a
given Landlock ruleset is applied to all threads of the calling
process, instead of only the current one.

Without this flag, multithreaded userspace programs currently resort
to using the nptl(7)/libpsx hack for multithreaded policy enforcement,
which is also used by libcap and for setuid(2). Using this
userspace-based scheme, the threads of a process enforce the same
Landlock policy, but the resulting Landlock domains are still
separate. The domains being separate causes multiple problems:

* When using Landlock's "scoped" access rights, the domain identity is
used to determine whether an operation is permitted. As a result,
when using LANLDOCK_SCOPE_SIGNAL, signaling between sibling threads
stops working. This is a problem for programming languages and
frameworks which are inherently multithreaded (e.g. Go).

* In audit logging, the domains of separate threads in a process will
get logged with different domain IDs, even when they are based on
the same ruleset FD, which might confuse users.

Cc: Andrew G. Morgan <morgan@kernel.org>
Cc: John Johansen <john.johansen@canonical.com>
Cc: Paul Moore <paul@paul-moore.com>
Suggested-by: Jann Horn <jannh@google.com>
Signed-off-by: Günther Noack <gnoack@google.com>
Link: https://lore.kernel.org/r/20251127115136.3064948-2-gnoack@google.com
[mic: Fix restrict_self_flags test, clean up Makefile, allign comments,
reduce local variable scope, add missing includes]
Closes: https://github.com/landlock-lsm/linux/issues/2
Signed-off-by: Mickaël Salaün <mic@digikod.net>

authored by

Günther Noack and committed by
Mickaël Salaün
42fc7e65 24d479d2

+654 -34
+13
include/uapi/linux/landlock.h
··· 117 117 * future nested domains, not the one being created. It can also be used 118 118 * with a @ruleset_fd value of -1 to mute subdomain logs without creating a 119 119 * domain. 120 + * 121 + * The following flag supports policy enforcement in multithreaded processes: 122 + * 123 + * %LANDLOCK_RESTRICT_SELF_TSYNC 124 + * Applies the new Landlock configuration atomically to all threads of the 125 + * current process, including the Landlock domain and logging 126 + * configuration. This overrides the Landlock configuration of sibling 127 + * threads, irrespective of previously established Landlock domains and 128 + * logging configurations on these threads. 129 + * 130 + * If the calling thread is running with no_new_privs, this operation 131 + * enables no_new_privs on the sibling threads as well. 120 132 */ 121 133 /* clang-format off */ 122 134 #define LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF (1U << 0) 123 135 #define LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON (1U << 1) 124 136 #define LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF (1U << 2) 137 + #define LANDLOCK_RESTRICT_SELF_TSYNC (1U << 3) 125 138 /* clang-format on */ 126 139 127 140 /**
+9 -2
security/landlock/Makefile
··· 1 1 obj-$(CONFIG_SECURITY_LANDLOCK) := landlock.o 2 2 3 - landlock-y := setup.o syscalls.o object.o ruleset.o \ 4 - cred.o task.o fs.o 3 + landlock-y := \ 4 + setup.o \ 5 + syscalls.o \ 6 + object.o \ 7 + ruleset.o \ 8 + cred.o \ 9 + task.o \ 10 + fs.o \ 11 + tsync.o 5 12 6 13 landlock-$(CONFIG_INET) += net.o 7 14
+12
security/landlock/cred.h
··· 26 26 * This structure is packed to minimize the size of struct 27 27 * landlock_file_security. However, it is always aligned in the LSM cred blob, 28 28 * see lsm_set_blob_size(). 29 + * 30 + * When updating this, also update landlock_cred_copy() if needed. 29 31 */ 30 32 struct landlock_cred_security { 31 33 /** ··· 65 63 landlock_cred(const struct cred *cred) 66 64 { 67 65 return cred->security + landlock_blob_sizes.lbs_cred; 66 + } 67 + 68 + static inline void landlock_cred_copy(struct landlock_cred_security *dst, 69 + const struct landlock_cred_security *src) 70 + { 71 + landlock_put_ruleset(dst->domain); 72 + 73 + *dst = *src; 74 + 75 + landlock_get_ruleset(src->domain); 68 76 } 69 77 70 78 static inline struct landlock_ruleset *landlock_get_current_domain(void)
+1 -1
security/landlock/limits.h
··· 31 31 #define LANDLOCK_MASK_SCOPE ((LANDLOCK_LAST_SCOPE << 1) - 1) 32 32 #define LANDLOCK_NUM_SCOPE __const_hweight64(LANDLOCK_MASK_SCOPE) 33 33 34 - #define LANDLOCK_LAST_RESTRICT_SELF LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF 34 + #define LANDLOCK_LAST_RESTRICT_SELF LANDLOCK_RESTRICT_SELF_TSYNC 35 35 #define LANDLOCK_MASK_RESTRICT_SELF ((LANDLOCK_LAST_RESTRICT_SELF << 1) - 1) 36 36 37 37 /* clang-format on */
+40 -29
security/landlock/syscalls.c
··· 36 36 #include "net.h" 37 37 #include "ruleset.h" 38 38 #include "setup.h" 39 + #include "tsync.h" 39 40 40 41 static bool is_initialized(void) 41 42 { ··· 162 161 * Documentation/userspace-api/landlock.rst should be updated to reflect the 163 162 * UAPI change. 164 163 */ 165 - const int landlock_abi_version = 7; 164 + const int landlock_abi_version = 8; 166 165 167 166 /** 168 167 * sys_landlock_create_ruleset - Create a new ruleset ··· 455 454 * - %LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF 456 455 * - %LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON 457 456 * - %LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF 457 + * - %LANDLOCK_RESTRICT_SELF_TSYNC 458 458 * 459 - * This system call enables to enforce a Landlock ruleset on the current 460 - * thread. Enforcing a ruleset requires that the task has %CAP_SYS_ADMIN in its 459 + * This system call enforces a Landlock ruleset on the current thread. 460 + * Enforcing a ruleset requires that the task has %CAP_SYS_ADMIN in its 461 461 * namespace or is running with no_new_privs. This avoids scenarios where 462 462 * unprivileged tasks can affect the behavior of privileged children. 463 463 * ··· 480 478 SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32, 481 479 flags) 482 480 { 483 - struct landlock_ruleset *new_dom, 484 - *ruleset __free(landlock_put_ruleset) = NULL; 481 + struct landlock_ruleset *ruleset __free(landlock_put_ruleset) = NULL; 485 482 struct cred *new_cred; 486 483 struct landlock_cred_security *new_llcred; 487 484 bool __maybe_unused log_same_exec, log_new_exec, log_subdomains, ··· 539 538 * We could optimize this case by not calling commit_creds() if this flag 540 539 * was already set, but it is not worth the complexity. 541 540 */ 542 - if (!ruleset) 543 - return commit_creds(new_cred); 541 + if (ruleset) { 542 + /* 543 + * There is no possible race condition while copying and 544 + * manipulating the current credentials because they are 545 + * dedicated per thread. 546 + */ 547 + struct landlock_ruleset *const new_dom = 548 + landlock_merge_ruleset(new_llcred->domain, ruleset); 549 + if (IS_ERR(new_dom)) { 550 + abort_creds(new_cred); 551 + return PTR_ERR(new_dom); 552 + } 544 553 545 - /* 546 - * There is no possible race condition while copying and manipulating 547 - * the current credentials because they are dedicated per thread. 548 - */ 549 - new_dom = landlock_merge_ruleset(new_llcred->domain, ruleset); 550 - if (IS_ERR(new_dom)) { 551 - abort_creds(new_cred); 552 - return PTR_ERR(new_dom); 554 + #ifdef CONFIG_AUDIT 555 + new_dom->hierarchy->log_same_exec = log_same_exec; 556 + new_dom->hierarchy->log_new_exec = log_new_exec; 557 + if ((!log_same_exec && !log_new_exec) || !prev_log_subdomains) 558 + new_dom->hierarchy->log_status = LANDLOCK_LOG_DISABLED; 559 + #endif /* CONFIG_AUDIT */ 560 + 561 + /* Replaces the old (prepared) domain. */ 562 + landlock_put_ruleset(new_llcred->domain); 563 + new_llcred->domain = new_dom; 564 + 565 + #ifdef CONFIG_AUDIT 566 + new_llcred->domain_exec |= BIT(new_dom->num_layers - 1); 567 + #endif /* CONFIG_AUDIT */ 553 568 } 554 569 555 - #ifdef CONFIG_AUDIT 556 - new_dom->hierarchy->log_same_exec = log_same_exec; 557 - new_dom->hierarchy->log_new_exec = log_new_exec; 558 - if ((!log_same_exec && !log_new_exec) || !prev_log_subdomains) 559 - new_dom->hierarchy->log_status = LANDLOCK_LOG_DISABLED; 560 - #endif /* CONFIG_AUDIT */ 561 - 562 - /* Replaces the old (prepared) domain. */ 563 - landlock_put_ruleset(new_llcred->domain); 564 - new_llcred->domain = new_dom; 565 - 566 - #ifdef CONFIG_AUDIT 567 - new_llcred->domain_exec |= BIT(new_dom->num_layers - 1); 568 - #endif /* CONFIG_AUDIT */ 570 + if (flags & LANDLOCK_RESTRICT_SELF_TSYNC) { 571 + const int err = landlock_restrict_sibling_threads( 572 + current_cred(), new_cred); 573 + if (err) { 574 + abort_creds(new_cred); 575 + return err; 576 + } 577 + } 569 578 570 579 return commit_creds(new_cred); 571 580 }
+561
security/landlock/tsync.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Landlock - Cross-thread ruleset enforcement 4 + * 5 + * Copyright © 2025 Google LLC 6 + */ 7 + 8 + #include <linux/atomic.h> 9 + #include <linux/cleanup.h> 10 + #include <linux/completion.h> 11 + #include <linux/cred.h> 12 + #include <linux/errno.h> 13 + #include <linux/overflow.h> 14 + #include <linux/rcupdate.h> 15 + #include <linux/sched.h> 16 + #include <linux/sched/signal.h> 17 + #include <linux/sched/task.h> 18 + #include <linux/slab.h> 19 + #include <linux/task_work.h> 20 + 21 + #include "cred.h" 22 + #include "tsync.h" 23 + 24 + /* 25 + * Shared state between multiple threads which are enforcing Landlock rulesets 26 + * in lockstep with each other. 27 + */ 28 + struct tsync_shared_context { 29 + /* The old and tentative new creds of the calling thread. */ 30 + const struct cred *old_cred; 31 + const struct cred *new_cred; 32 + 33 + /* True if sibling tasks need to set the no_new_privs flag. */ 34 + bool set_no_new_privs; 35 + 36 + /* An error encountered in preparation step, or 0. */ 37 + atomic_t preparation_error; 38 + 39 + /* 40 + * Barrier after preparation step in restrict_one_thread. 41 + * The calling thread waits for completion. 42 + * 43 + * Re-initialized on every round of looking for newly spawned threads. 44 + */ 45 + atomic_t num_preparing; 46 + struct completion all_prepared; 47 + 48 + /* Sibling threads wait for completion. */ 49 + struct completion ready_to_commit; 50 + 51 + /* 52 + * Barrier after commit step (used by syscall impl to wait for 53 + * completion). 54 + */ 55 + atomic_t num_unfinished; 56 + struct completion all_finished; 57 + }; 58 + 59 + struct tsync_work { 60 + struct callback_head work; 61 + struct task_struct *task; 62 + struct tsync_shared_context *shared_ctx; 63 + }; 64 + 65 + /* 66 + * restrict_one_thread - update a thread's Landlock domain in lockstep with the 67 + * other threads in the same process 68 + * 69 + * When this is run, the same function gets run in all other threads in the same 70 + * process (except for the calling thread which called landlock_restrict_self). 71 + * The concurrently running invocations of restrict_one_thread coordinate 72 + * through the shared ctx object to do their work in lockstep to implement 73 + * all-or-nothing semantics for enforcing the new Landlock domain. 74 + * 75 + * Afterwards, depending on the presence of an error, all threads either commit 76 + * or abort the prepared credentials. The commit operation can not fail any 77 + * more. 78 + */ 79 + static void restrict_one_thread(struct tsync_shared_context *ctx) 80 + { 81 + int err; 82 + struct cred *cred = NULL; 83 + 84 + if (current_cred() == ctx->old_cred) { 85 + /* 86 + * Switch out old_cred with new_cred, if possible. 87 + * 88 + * In the common case, where all threads initially point to the same 89 + * struct cred, this optimization avoids creating separate redundant 90 + * credentials objects for each, which would all have the same contents. 91 + * 92 + * Note: We are intentionally dropping the const qualifier here, because 93 + * it is required by commit_creds() and abort_creds(). 94 + */ 95 + cred = (struct cred *)get_cred(ctx->new_cred); 96 + } else { 97 + /* Else, prepare new creds and populate them. */ 98 + cred = prepare_creds(); 99 + 100 + if (!cred) { 101 + atomic_set(&ctx->preparation_error, -ENOMEM); 102 + 103 + /* 104 + * Even on error, we need to adhere to the protocol and coordinate 105 + * with concurrently running invocations. 106 + */ 107 + if (atomic_dec_return(&ctx->num_preparing) == 0) 108 + complete_all(&ctx->all_prepared); 109 + 110 + goto out; 111 + } 112 + 113 + landlock_cred_copy(landlock_cred(cred), 114 + landlock_cred(ctx->new_cred)); 115 + } 116 + 117 + /* 118 + * Barrier: Wait until all threads are done preparing. 119 + * After this point, we can have no more failures. 120 + */ 121 + if (atomic_dec_return(&ctx->num_preparing) == 0) 122 + complete_all(&ctx->all_prepared); 123 + 124 + /* 125 + * Wait for signal from calling thread that it's safe to read the 126 + * preparation error now and we are ready to commit (or abort). 127 + */ 128 + wait_for_completion(&ctx->ready_to_commit); 129 + 130 + /* Abort the commit if any of the other threads had an error. */ 131 + err = atomic_read(&ctx->preparation_error); 132 + if (err) { 133 + abort_creds(cred); 134 + goto out; 135 + } 136 + 137 + /* 138 + * Make sure that all sibling tasks fulfill the no_new_privs prerequisite. 139 + * (This is in line with Seccomp's SECCOMP_FILTER_FLAG_TSYNC logic in 140 + * kernel/seccomp.c) 141 + */ 142 + if (ctx->set_no_new_privs) 143 + task_set_no_new_privs(current); 144 + 145 + commit_creds(cred); 146 + 147 + out: 148 + /* Notify the calling thread once all threads are done */ 149 + if (atomic_dec_return(&ctx->num_unfinished) == 0) 150 + complete_all(&ctx->all_finished); 151 + } 152 + 153 + /* 154 + * restrict_one_thread_callback - task_work callback for restricting a thread 155 + * 156 + * Calls restrict_one_thread with the struct landlock_shared_tsync_context. 157 + */ 158 + static void restrict_one_thread_callback(struct callback_head *work) 159 + { 160 + struct tsync_work *ctx = container_of(work, struct tsync_work, work); 161 + 162 + restrict_one_thread(ctx->shared_ctx); 163 + } 164 + 165 + /* 166 + * struct tsync_works - a growable array of per-task contexts 167 + * 168 + * The zero-initialized struct represents the empty array. 169 + */ 170 + struct tsync_works { 171 + struct tsync_work **works; 172 + size_t size; 173 + size_t capacity; 174 + }; 175 + 176 + /* 177 + * tsync_works_provide - provides a preallocated tsync_work for the given task 178 + * 179 + * This also stores a task pointer in the context and increments the reference 180 + * count of the task. 181 + * 182 + * This function may fail in the case where we did not preallocate sufficient 183 + * capacity. This can legitimately happen if new threads get started after we 184 + * grew the capacity. 185 + * 186 + * Returns: 187 + * A pointer to the preallocated context struct, with task filled in. 188 + * 189 + * NULL, if we ran out of preallocated context structs. 190 + */ 191 + static struct tsync_work *tsync_works_provide(struct tsync_works *s, 192 + struct task_struct *task) 193 + { 194 + struct tsync_work *ctx; 195 + 196 + if (s->size >= s->capacity) 197 + return NULL; 198 + 199 + ctx = s->works[s->size]; 200 + s->size++; 201 + 202 + ctx->task = get_task_struct(task); 203 + return ctx; 204 + } 205 + 206 + /* 207 + * tsync_works_grow_by - preallocates space for n more contexts in s 208 + * 209 + * On a successful return, the subsequent n calls to tsync_works_provide() are 210 + * guaranteed to succeed. (size + n <= capacity) 211 + * 212 + * Returns: 213 + * -ENOMEM if the (re)allocation fails 214 + 215 + * 0 if the allocation succeeds, partially succeeds, or no reallocation 216 + * was needed 217 + */ 218 + static int tsync_works_grow_by(struct tsync_works *s, size_t n, gfp_t flags) 219 + { 220 + size_t i; 221 + size_t new_capacity; 222 + struct tsync_work **works; 223 + struct tsync_work *work; 224 + 225 + if (check_add_overflow(s->size, n, &new_capacity)) 226 + return -EOVERFLOW; 227 + 228 + /* No need to reallocate if s already has sufficient capacity. */ 229 + if (new_capacity <= s->capacity) 230 + return 0; 231 + 232 + works = krealloc_array(s->works, new_capacity, sizeof(s->works[0]), 233 + flags); 234 + if (!works) 235 + return -ENOMEM; 236 + 237 + s->works = works; 238 + 239 + for (i = s->capacity; i < new_capacity; i++) { 240 + work = kzalloc(sizeof(*work), flags); 241 + if (!work) { 242 + /* 243 + * Leave the object in a consistent state, 244 + * but return an error. 245 + */ 246 + s->capacity = i; 247 + return -ENOMEM; 248 + } 249 + s->works[i] = work; 250 + } 251 + s->capacity = new_capacity; 252 + return 0; 253 + } 254 + 255 + /* 256 + * tsync_works_contains - checks for presence of task in s 257 + */ 258 + static bool tsync_works_contains_task(const struct tsync_works *s, 259 + struct task_struct *task) 260 + { 261 + size_t i; 262 + 263 + for (i = 0; i < s->size; i++) 264 + if (s->works[i]->task == task) 265 + return true; 266 + return false; 267 + } 268 + 269 + /* 270 + * tsync_works_release - frees memory held by s and drops all task references 271 + * 272 + * This does not free s itself, only the data structures held by it. 273 + */ 274 + static void tsync_works_release(struct tsync_works *s) 275 + { 276 + size_t i; 277 + 278 + for (i = 0; i < s->size; i++) { 279 + if (!s->works[i]->task) 280 + continue; 281 + 282 + put_task_struct(s->works[i]->task); 283 + } 284 + 285 + for (i = 0; i < s->capacity; i++) 286 + kfree(s->works[i]); 287 + kfree(s->works); 288 + s->works = NULL; 289 + s->size = 0; 290 + s->capacity = 0; 291 + } 292 + 293 + /* 294 + * count_additional_threads - counts the sibling threads that are not in works 295 + */ 296 + static size_t count_additional_threads(const struct tsync_works *works) 297 + { 298 + struct task_struct *thread, *caller; 299 + size_t n = 0; 300 + 301 + caller = current; 302 + 303 + guard(rcu)(); 304 + 305 + for_each_thread(caller, thread) { 306 + /* Skip current, since it is initiating the sync. */ 307 + if (thread == caller) 308 + continue; 309 + 310 + /* Skip exited threads. */ 311 + if (thread->flags & PF_EXITING) 312 + continue; 313 + 314 + /* Skip threads that we have already seen. */ 315 + if (tsync_works_contains_task(works, thread)) 316 + continue; 317 + 318 + n++; 319 + } 320 + return n; 321 + } 322 + 323 + /* 324 + * schedule_task_work - adds task_work for all eligible sibling threads 325 + * which have not been scheduled yet 326 + * 327 + * For each added task_work, atomically increments shared_ctx->num_preparing and 328 + * shared_ctx->num_unfinished. 329 + * 330 + * Returns: 331 + * true, if at least one eligible sibling thread was found 332 + */ 333 + static bool schedule_task_work(struct tsync_works *works, 334 + struct tsync_shared_context *shared_ctx) 335 + { 336 + int err; 337 + struct task_struct *thread, *caller; 338 + struct tsync_work *ctx; 339 + bool found_more_threads = false; 340 + 341 + caller = current; 342 + 343 + guard(rcu)(); 344 + 345 + for_each_thread(caller, thread) { 346 + /* Skip current, since it is initiating the sync. */ 347 + if (thread == caller) 348 + continue; 349 + 350 + /* Skip exited threads. */ 351 + if (thread->flags & PF_EXITING) 352 + continue; 353 + 354 + /* Skip threads that we already looked at. */ 355 + if (tsync_works_contains_task(works, thread)) 356 + continue; 357 + 358 + /* 359 + * We found a sibling thread that is not doing its task_work yet, and 360 + * which might spawn new threads before our task work runs, so we need 361 + * at least one more round in the outer loop. 362 + */ 363 + found_more_threads = true; 364 + 365 + ctx = tsync_works_provide(works, thread); 366 + if (!ctx) { 367 + /* 368 + * We ran out of preallocated contexts -- we need to try again with 369 + * this thread at a later time! 370 + * found_more_threads is already true at this point. 371 + */ 372 + break; 373 + } 374 + 375 + ctx->shared_ctx = shared_ctx; 376 + 377 + atomic_inc(&shared_ctx->num_preparing); 378 + atomic_inc(&shared_ctx->num_unfinished); 379 + 380 + init_task_work(&ctx->work, restrict_one_thread_callback); 381 + err = task_work_add(thread, &ctx->work, TWA_SIGNAL); 382 + if (err) { 383 + /* 384 + * task_work_add() only fails if the task is about to exit. We 385 + * checked that earlier, but it can happen as a race. Resume 386 + * without setting an error, as the task is probably gone in the 387 + * next loop iteration. For consistency, remove the task from ctx 388 + * so that it does not look like we handed it a task_work. 389 + */ 390 + put_task_struct(ctx->task); 391 + ctx->task = NULL; 392 + 393 + atomic_dec(&shared_ctx->num_preparing); 394 + atomic_dec(&shared_ctx->num_unfinished); 395 + } 396 + } 397 + 398 + return found_more_threads; 399 + } 400 + 401 + /* 402 + * cancel_tsync_works - cancel all task works where it is possible 403 + * 404 + * Task works can be canceled as long as they are still queued and have not 405 + * started running. If they get canceled, we decrement 406 + * shared_ctx->num_preparing and shared_ctx->num_unfished and mark the two 407 + * completions if needed, as if the task was never scheduled. 408 + */ 409 + static void cancel_tsync_works(struct tsync_works *works, 410 + struct tsync_shared_context *shared_ctx) 411 + { 412 + int i; 413 + 414 + for (i = 0; i < works->size; i++) { 415 + if (!task_work_cancel(works->works[i]->task, 416 + &works->works[i]->work)) 417 + continue; 418 + 419 + /* After dequeueing, act as if the task work had executed. */ 420 + 421 + if (atomic_dec_return(&shared_ctx->num_preparing) == 0) 422 + complete_all(&shared_ctx->all_prepared); 423 + 424 + if (atomic_dec_return(&shared_ctx->num_unfinished) == 0) 425 + complete_all(&shared_ctx->all_finished); 426 + } 427 + } 428 + 429 + /* 430 + * restrict_sibling_threads - enables a Landlock policy for all sibling threads 431 + */ 432 + int landlock_restrict_sibling_threads(const struct cred *old_cred, 433 + const struct cred *new_cred) 434 + { 435 + int err; 436 + struct tsync_shared_context shared_ctx; 437 + struct tsync_works works = {}; 438 + size_t newly_discovered_threads; 439 + bool found_more_threads; 440 + 441 + atomic_set(&shared_ctx.preparation_error, 0); 442 + init_completion(&shared_ctx.all_prepared); 443 + init_completion(&shared_ctx.ready_to_commit); 444 + atomic_set(&shared_ctx.num_unfinished, 1); 445 + init_completion(&shared_ctx.all_finished); 446 + shared_ctx.old_cred = old_cred; 447 + shared_ctx.new_cred = new_cred; 448 + shared_ctx.set_no_new_privs = task_no_new_privs(current); 449 + 450 + /* 451 + * We schedule a pseudo-signal task_work for each of the calling task's 452 + * sibling threads. In the task work, each thread: 453 + * 454 + * 1) runs prepare_creds() and writes back the error to 455 + * shared_ctx.preparation_error, if needed. 456 + * 457 + * 2) signals that it's done with prepare_creds() to the calling task. 458 + * (completion "all_prepared"). 459 + * 460 + * 3) waits for the completion "ready_to_commit". This is sent by the 461 + * calling task after ensuring that all sibling threads have done 462 + * with the "preparation" stage. 463 + * 464 + * After this barrier is reached, it's safe to read 465 + * shared_ctx.preparation_error. 466 + * 467 + * 4) reads shared_ctx.preparation_error and then either does commit_creds() 468 + * or abort_creds(). 469 + * 470 + * 5) signals that it's done altogether (barrier synchronization 471 + * "all_finished") 472 + * 473 + * Unlike seccomp, which modifies sibling tasks directly, we do not need to 474 + * acquire the cred_guard_mutex and sighand->siglock: 475 + * 476 + * - As in our case, all threads are themselves exchanging their own struct 477 + * cred through the credentials API, no locks are needed for that. 478 + * - Our for_each_thread() loops are protected by RCU. 479 + * - We do not acquire a lock to keep the list of sibling threads stable 480 + * between our for_each_thread loops. If the list of available sibling 481 + * threads changes between these for_each_thread loops, we make up for 482 + * that by continuing to look for threads until they are all discovered 483 + * and have entered their task_work, where they are unable to spawn new 484 + * threads. 485 + */ 486 + do { 487 + /* In RCU read-lock, count the threads we need. */ 488 + newly_discovered_threads = count_additional_threads(&works); 489 + 490 + if (newly_discovered_threads == 0) 491 + break; /* done */ 492 + 493 + err = tsync_works_grow_by(&works, newly_discovered_threads, 494 + GFP_KERNEL_ACCOUNT); 495 + if (err) { 496 + atomic_set(&shared_ctx.preparation_error, err); 497 + break; 498 + } 499 + 500 + /* 501 + * The "all_prepared" barrier is used locally to the loop body, this use 502 + * of for_each_thread(). We can reset it on each loop iteration because 503 + * all previous loop iterations are done with it already. 504 + * 505 + * num_preparing is initialized to 1 so that the counter can not go to 0 506 + * and mark the completion as done before all task works are registered. 507 + * We decrement it at the end of the loop body. 508 + */ 509 + atomic_set(&shared_ctx.num_preparing, 1); 510 + reinit_completion(&shared_ctx.all_prepared); 511 + 512 + /* 513 + * In RCU read-lock, schedule task work on newly discovered sibling 514 + * tasks. 515 + */ 516 + found_more_threads = schedule_task_work(&works, &shared_ctx); 517 + 518 + /* 519 + * Decrement num_preparing for current, to undo that we initialized it 520 + * to 1 a few lines above. 521 + */ 522 + if (atomic_dec_return(&shared_ctx.num_preparing) > 0) { 523 + if (wait_for_completion_interruptible( 524 + &shared_ctx.all_prepared)) { 525 + /* In case of interruption, we need to retry the system call. */ 526 + atomic_set(&shared_ctx.preparation_error, 527 + -ERESTARTNOINTR); 528 + 529 + /* 530 + * Cancel task works for tasks that did not start running yet, 531 + * and decrement all_prepared and num_unfinished accordingly. 532 + */ 533 + cancel_tsync_works(&works, &shared_ctx); 534 + 535 + /* 536 + * The remaining task works have started running, so waiting for 537 + * their completion will finish. 538 + */ 539 + wait_for_completion(&shared_ctx.all_prepared); 540 + } 541 + } 542 + } while (found_more_threads && 543 + !atomic_read(&shared_ctx.preparation_error)); 544 + 545 + /* 546 + * We now have all sibling threads blocking and in "prepared" state in the 547 + * task work. Ask all threads to commit. 548 + */ 549 + complete_all(&shared_ctx.ready_to_commit); 550 + 551 + /* 552 + * Decrement num_unfinished for current, to undo that we initialized it to 1 553 + * at the beginning. 554 + */ 555 + if (atomic_dec_return(&shared_ctx.num_unfinished) > 0) 556 + wait_for_completion(&shared_ctx.all_finished); 557 + 558 + tsync_works_release(&works); 559 + 560 + return atomic_read(&shared_ctx.preparation_error); 561 + }
+16
security/landlock/tsync.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + /* 3 + * Landlock - Cross-thread ruleset enforcement 4 + * 5 + * Copyright © 2025 Google LLC 6 + */ 7 + 8 + #ifndef _SECURITY_LANDLOCK_TSYNC_H 9 + #define _SECURITY_LANDLOCK_TSYNC_H 10 + 11 + #include <linux/cred.h> 12 + 13 + int landlock_restrict_sibling_threads(const struct cred *old_cred, 14 + const struct cred *new_cred); 15 + 16 + #endif /* _SECURITY_LANDLOCK_TSYNC_H */
+2 -2
tools/testing/selftests/landlock/base_test.c
··· 76 76 const struct landlock_ruleset_attr ruleset_attr = { 77 77 .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE, 78 78 }; 79 - ASSERT_EQ(7, landlock_create_ruleset(NULL, 0, 79 + ASSERT_EQ(8, landlock_create_ruleset(NULL, 0, 80 80 LANDLOCK_CREATE_RULESET_VERSION)); 81 81 82 82 ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0, ··· 306 306 307 307 TEST(restrict_self_flags) 308 308 { 309 - const __u32 last_flag = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF; 309 + const __u32 last_flag = LANDLOCK_RESTRICT_SELF_TSYNC; 310 310 311 311 /* Tests invalid flag combinations. */ 312 312