Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'ucount-fixes-for-v5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace

Pull ucounts fixes from Eric Biederman:
"There has been one very hard to track down bug in the ucount code that
we have been tracking since roughly v5.14 was released. Alex managed
to find a reliable reproducer a few days ago and then I was able to
instrument the code and figure out what the issue was.

It turns out the sigqueue_alloc single atomic operation optimization
did not play nicely with ucounts multiple level rlimits. It turned out
that either sigqueue_alloc or sigqueue_free could be operating on
multiple levels and trigger the conditions for the optimization on
more than one level at the same time.

To deal with that situation I have introduced inc_rlimit_get_ucounts
and dec_rlimit_put_ucounts that just focuses on the optimization and
the rlimit and ucount changes.

While looking into the big bug I found I couple of other little issues
so I am including those fixes here as well.

When I have time I would very much like to dig into process ownership
of the shared signal queue and see if we could pick a single owner for
the entire queue so that all of the rlimits can count to that owner.
That should entirely remove the need to call get_ucounts and
put_ucounts in sigqueue_alloc and sigqueue_free. It is difficult
because Linux unlike POSIX supports setuid that works on a single
thread"

* 'ucount-fixes-for-v5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace:
ucounts: Move get_ucounts from cred_alloc_blank to key_change_session_keyring
ucounts: Proper error handling in set_cred_ucounts
ucounts: Pair inc_rlimit_ucounts with dec_rlimit_ucoutns in commit_creds
ucounts: Fix signal ucount refcounting

+69 -24
+2
include/linux/user_namespace.h
··· 127 127 128 128 long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v); 129 129 bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v); 130 + long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type); 131 + void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum ucount_type type); 130 132 bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max); 131 133 132 134 static inline void set_rlimit_ucount_max(struct user_namespace *ns,
+4 -5
kernel/cred.c
··· 225 225 #ifdef CONFIG_DEBUG_CREDENTIALS 226 226 new->magic = CRED_MAGIC; 227 227 #endif 228 - new->ucounts = get_ucounts(&init_ucounts); 229 - 230 228 if (security_cred_alloc_blank(new, GFP_KERNEL_ACCOUNT) < 0) 231 229 goto error; 232 230 ··· 499 501 inc_rlimit_ucounts(new->ucounts, UCOUNT_RLIMIT_NPROC, 1); 500 502 rcu_assign_pointer(task->real_cred, new); 501 503 rcu_assign_pointer(task->cred, new); 502 - if (new->user != old->user) 504 + if (new->user != old->user || new->user_ns != old->user_ns) 503 505 dec_rlimit_ucounts(old->ucounts, UCOUNT_RLIMIT_NPROC, 1); 504 506 alter_cred_subscribers(old, -2); 505 507 ··· 667 669 { 668 670 struct task_struct *task = current; 669 671 const struct cred *old = task->real_cred; 670 - struct ucounts *old_ucounts = new->ucounts; 672 + struct ucounts *new_ucounts, *old_ucounts = new->ucounts; 671 673 672 674 if (new->user == old->user && new->user_ns == old->user_ns) 673 675 return 0; ··· 679 681 if (old_ucounts && old_ucounts->ns == new->user_ns && uid_eq(old_ucounts->uid, new->euid)) 680 682 return 0; 681 683 682 - if (!(new->ucounts = alloc_ucounts(new->user_ns, new->euid))) 684 + if (!(new_ucounts = alloc_ucounts(new->user_ns, new->euid))) 683 685 return -EAGAIN; 684 686 687 + new->ucounts = new_ucounts; 685 688 if (old_ucounts) 686 689 put_ucounts(old_ucounts); 687 690
+6 -19
kernel/signal.c
··· 426 426 */ 427 427 rcu_read_lock(); 428 428 ucounts = task_ucounts(t); 429 - sigpending = inc_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1); 430 - switch (sigpending) { 431 - case 1: 432 - if (likely(get_ucounts(ucounts))) 433 - break; 434 - fallthrough; 435 - case LONG_MAX: 436 - /* 437 - * we need to decrease the ucount in the userns tree on any 438 - * failure to avoid counts leaking. 439 - */ 440 - dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1); 441 - rcu_read_unlock(); 442 - return NULL; 443 - } 429 + sigpending = inc_rlimit_get_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING); 444 430 rcu_read_unlock(); 431 + if (!sigpending) 432 + return NULL; 445 433 446 434 if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) { 447 435 q = kmem_cache_alloc(sigqueue_cachep, gfp_flags); ··· 438 450 } 439 451 440 452 if (unlikely(q == NULL)) { 441 - if (dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1)) 442 - put_ucounts(ucounts); 453 + dec_rlimit_put_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING); 443 454 } else { 444 455 INIT_LIST_HEAD(&q->list); 445 456 q->flags = sigqueue_flags; ··· 451 464 { 452 465 if (q->flags & SIGQUEUE_PREALLOC) 453 466 return; 454 - if (q->ucounts && dec_rlimit_ucounts(q->ucounts, UCOUNT_RLIMIT_SIGPENDING, 1)) { 455 - put_ucounts(q->ucounts); 467 + if (q->ucounts) { 468 + dec_rlimit_put_ucounts(q->ucounts, UCOUNT_RLIMIT_SIGPENDING); 456 469 q->ucounts = NULL; 457 470 } 458 471 kmem_cache_free(sigqueue_cachep, q);
+49
kernel/ucount.c
··· 284 284 return (new == 0); 285 285 } 286 286 287 + static void do_dec_rlimit_put_ucounts(struct ucounts *ucounts, 288 + struct ucounts *last, enum ucount_type type) 289 + { 290 + struct ucounts *iter, *next; 291 + for (iter = ucounts; iter != last; iter = next) { 292 + long dec = atomic_long_add_return(-1, &iter->ucount[type]); 293 + WARN_ON_ONCE(dec < 0); 294 + next = iter->ns->ucounts; 295 + if (dec == 0) 296 + put_ucounts(iter); 297 + } 298 + } 299 + 300 + void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum ucount_type type) 301 + { 302 + do_dec_rlimit_put_ucounts(ucounts, NULL, type); 303 + } 304 + 305 + long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type) 306 + { 307 + /* Caller must hold a reference to ucounts */ 308 + struct ucounts *iter; 309 + long dec, ret = 0; 310 + 311 + for (iter = ucounts; iter; iter = iter->ns->ucounts) { 312 + long max = READ_ONCE(iter->ns->ucount_max[type]); 313 + long new = atomic_long_add_return(1, &iter->ucount[type]); 314 + if (new < 0 || new > max) 315 + goto unwind; 316 + if (iter == ucounts) 317 + ret = new; 318 + /* 319 + * Grab an extra ucount reference for the caller when 320 + * the rlimit count was previously 0. 321 + */ 322 + if (new != 1) 323 + continue; 324 + if (!get_ucounts(iter)) 325 + goto dec_unwind; 326 + } 327 + return ret; 328 + dec_unwind: 329 + dec = atomic_long_add_return(-1, &iter->ucount[type]); 330 + WARN_ON_ONCE(dec < 0); 331 + unwind: 332 + do_dec_rlimit_put_ucounts(ucounts, iter, type); 333 + return 0; 334 + } 335 + 287 336 bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max) 288 337 { 289 338 struct ucounts *iter;
+8
security/keys/process_keys.c
··· 918 918 return; 919 919 } 920 920 921 + /* If get_ucounts fails more bits are needed in the refcount */ 922 + if (unlikely(!get_ucounts(old->ucounts))) { 923 + WARN_ONCE(1, "In %s get_ucounts failed\n", __func__); 924 + put_cred(new); 925 + return; 926 + } 927 + 921 928 new-> uid = old-> uid; 922 929 new-> euid = old-> euid; 923 930 new-> suid = old-> suid; ··· 934 927 new-> sgid = old-> sgid; 935 928 new->fsgid = old->fsgid; 936 929 new->user = get_uid(old->user); 930 + new->ucounts = old->ucounts; 937 931 new->user_ns = get_user_ns(old->user_ns); 938 932 new->group_info = get_group_info(old->group_info); 939 933