Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'always-allow-sleepable-and-fmod_ret-programs-on-syscalls'

Viktor Malik says:

====================
Always allow sleepable and fmod_ret programs on syscalls

Both sleepable and fmod_ret programs are only allowed on selected
functions. For convenience, the error injection list was originally
used.

When error injection is disabled, that list is empty and sleepable
tracing programs, as well as fmod_ret programs, are effectively
unavailable.

This patch series addresses the issue by at least enabling sleepable and
fmod_ret programs on syscalls, if error injection is disabled. More
details on why syscalls are used can be found in [1].

[1] https://lore.kernel.org/bpf/CAADnVQK6qP8izg+k9yV0vdcT-+=axtFQ2fKw7D-2Ei-V6WS5Dw@mail.gmail.com/

Changes in v3:
- Handle LoongArch (Leon)
- Add Kumar's and Leon's acks

Changes in v2:
- Check "sys_" prefix instead of "sys" for powerpc syscalls (AI review)
- Add link to the original discussion (Kumar)
- Add explanation why arch syscall prefixes are hard-coded (Leon)
====================

Link: https://patch.msgid.link/cover.1773055375.git.vmalik@redhat.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

+74 -17
+71 -14
kernel/bpf/verifier.c
··· 24952 24952 } 24953 24953 #define SECURITY_PREFIX "security_" 24954 24954 24955 - static int check_attach_modify_return(unsigned long addr, const char *func_name) 24956 - { 24957 - if (within_error_injection_list(addr) || 24958 - !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1)) 24959 - return 0; 24960 - 24961 - return -EINVAL; 24962 - } 24955 + #ifdef CONFIG_FUNCTION_ERROR_INJECTION 24963 24956 24964 24957 /* list of non-sleepable functions that are otherwise on 24965 24958 * ALLOW_ERROR_INJECTION list ··· 24974 24981 { 24975 24982 return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id); 24976 24983 } 24984 + 24985 + static int check_attach_sleepable(u32 btf_id, unsigned long addr, const char *func_name) 24986 + { 24987 + /* fentry/fexit/fmod_ret progs can be sleepable if they are 24988 + * attached to ALLOW_ERROR_INJECTION and are not in denylist. 24989 + */ 24990 + if (!check_non_sleepable_error_inject(btf_id) && 24991 + within_error_injection_list(addr)) 24992 + return 0; 24993 + 24994 + return -EINVAL; 24995 + } 24996 + 24997 + static int check_attach_modify_return(unsigned long addr, const char *func_name) 24998 + { 24999 + if (within_error_injection_list(addr) || 25000 + !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1)) 25001 + return 0; 25002 + 25003 + return -EINVAL; 25004 + } 25005 + 25006 + #else 25007 + 25008 + /* Unfortunately, the arch-specific prefixes are hard-coded in arch syscall code 25009 + * so we need to hard-code them, too. Ftrace has arch_syscall_match_sym_name() 25010 + * but that just compares two concrete function names. 25011 + */ 25012 + static bool has_arch_syscall_prefix(const char *func_name) 25013 + { 25014 + #if defined(__x86_64__) 25015 + return !strncmp(func_name, "__x64_", 6); 25016 + #elif defined(__i386__) 25017 + return !strncmp(func_name, "__ia32_", 7); 25018 + #elif defined(__s390x__) 25019 + return !strncmp(func_name, "__s390x_", 8); 25020 + #elif defined(__aarch64__) 25021 + return !strncmp(func_name, "__arm64_", 8); 25022 + #elif defined(__riscv) 25023 + return !strncmp(func_name, "__riscv_", 8); 25024 + #elif defined(__powerpc__) || defined(__powerpc64__) 25025 + return !strncmp(func_name, "sys_", 4); 25026 + #elif defined(__loongarch__) 25027 + return !strncmp(func_name, "sys_", 4); 25028 + #else 25029 + return false; 25030 + #endif 25031 + } 25032 + 25033 + /* Without error injection, allow sleepable and fmod_ret progs on syscalls. */ 25034 + 25035 + static int check_attach_sleepable(u32 btf_id, unsigned long addr, const char *func_name) 25036 + { 25037 + if (has_arch_syscall_prefix(func_name)) 25038 + return 0; 25039 + 25040 + return -EINVAL; 25041 + } 25042 + 25043 + static int check_attach_modify_return(unsigned long addr, const char *func_name) 25044 + { 25045 + if (has_arch_syscall_prefix(func_name) || 25046 + !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1)) 25047 + return 0; 25048 + 25049 + return -EINVAL; 25050 + } 25051 + 25052 + #endif /* CONFIG_FUNCTION_ERROR_INJECTION */ 24977 25053 24978 25054 int bpf_check_attach_target(struct bpf_verifier_log *log, 24979 25055 const struct bpf_prog *prog, ··· 25323 25261 ret = -EINVAL; 25324 25262 switch (prog->type) { 25325 25263 case BPF_PROG_TYPE_TRACING: 25326 - 25327 - /* fentry/fexit/fmod_ret progs can be sleepable if they are 25328 - * attached to ALLOW_ERROR_INJECTION and are not in denylist. 25329 - */ 25330 - if (!check_non_sleepable_error_inject(btf_id) && 25331 - within_error_injection_list(addr)) 25264 + if (!check_attach_sleepable(btf_id, addr, tname)) 25332 25265 ret = 0; 25333 25266 /* fentry/fexit/fmod_ret progs can also be sleepable if they are 25334 25267 * in the fmodret id set with the KF_SLEEPABLE flag.
+2 -2
tools/testing/selftests/bpf/progs/refcounted_kptr.c
··· 500 500 return 0; 501 501 } 502 502 503 - SEC("?fentry.s/bpf_testmod_test_read") 503 + SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") 504 504 __success 505 505 int BPF_PROG(rbtree_sleepable_rcu, 506 506 struct file *file, struct kobject *kobj, ··· 534 534 return 0; 535 535 } 536 536 537 - SEC("?fentry.s/bpf_testmod_test_read") 537 + SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") 538 538 __success 539 539 int BPF_PROG(rbtree_sleepable_rcu_no_explicit_rcu_lock, 540 540 struct file *file, struct kobject *kobj,
+1 -1
tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
··· 93 93 return 0; 94 94 } 95 95 96 - SEC("?fentry.s/bpf_testmod_test_read") 96 + SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") 97 97 __failure __msg("function calls are not allowed while holding a lock") 98 98 int BPF_PROG(rbtree_fail_sleepable_lock_across_rcu, 99 99 struct file *file, struct kobject *kobj,