Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'bpf-allow-utf-8-literals-in-bpf_bprintf_prepare'

Yihan Ding says:

====================
bpf: allow UTF-8 literals in bpf_bprintf_prepare()

bpf_bprintf_prepare() currently rejects any non-ASCII byte in format
strings, so helpers such as bpf_trace_printk() fail to emit UTF-8
literal text even when those bytes are not part of a format specifier.

Keep plain text permissive while continuing to parse '%' sequences as
ASCII-only. Patch 1 updates snprintf_negative() at the same time so the
selftests stay consistent during bisection. Patch 2 then extends
trace_printk coverage for both the valid UTF-8 literal case and the
invalid non-ASCII-after-'%' case.

Changes in v3:
- drop Suggested-by trailers and move review credit into this changelog
- update test_snprintf_negative() in patch 1/2 so plain non-ASCII text is
accepted while non-ASCII after '%' is still rejected, keeping
./test_progs -t snprintf aligned with the new behavior.
- clarify the trace_printk negative case with an explicit invalid format
string and comment
- address Paul Chaignon's review feedback and keep the negative coverage
requested earlier by Alan Maguire

Changes in v2:
- split the core change and selftest updates into two patches
- drop unnecessary isspace()/ispunct() casts
- add comments to clarify plain-text vs format-specifier handling
- add a negative selftest for non-ASCII bytes inside '%' sequences

Testing:
- Reproduced on x86_64 without the core fix: ASCII trace output works,
while UTF-8 literal text in bpf_trace_printk() is rejected and
produces no trace output
- Verified with tools/testing/selftests/bpf: ./test_progs -t trace_printk
- Verified with tools/testing/selftests/bpf: ./test_progs -t snprintf
====================

Link: https://patch.msgid.link/20260416120142.1420646-1-dingyihan@uniontech.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

+50 -8
+16 -1
kernel/bpf/helpers.c
··· 845 845 data->buf = buffers->buf; 846 846 847 847 for (i = 0; i < fmt_size; i++) { 848 - if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) { 848 + unsigned char c = fmt[i]; 849 + 850 + /* 851 + * Permit bytes >= 0x80 in plain text so UTF-8 literals can pass 852 + * through unchanged, while still rejecting ASCII control bytes. 853 + */ 854 + if (isascii(c) && !isprint(c) && !isspace(c)) { 849 855 err = -EINVAL; 850 856 goto out; 851 857 } ··· 873 867 * always access fmt[i + 1], in the worst case it will be a 0 874 868 */ 875 869 i++; 870 + c = fmt[i]; 871 + /* 872 + * The format parser below only understands ASCII conversion 873 + * specifiers and modifiers, so reject non-ASCII after '%'. 874 + */ 875 + if (!isascii(c)) { 876 + err = -EINVAL; 877 + goto out; 878 + } 876 879 877 880 /* skip optional "[0 +-][num]" width formatting field */ 878 881 while (fmt[i] == '0' || fmt[i] == '+' || fmt[i] == '-' ||
+2 -1
tools/testing/selftests/bpf/prog_tests/snprintf.c
··· 114 114 ASSERT_ERR(load_single_snprintf("%--------"), "invalid specifier 5"); 115 115 ASSERT_ERR(load_single_snprintf("%lc"), "invalid specifier 6"); 116 116 ASSERT_ERR(load_single_snprintf("%llc"), "invalid specifier 7"); 117 - ASSERT_ERR(load_single_snprintf("\x80"), "non ascii character"); 117 + ASSERT_OK(load_single_snprintf("\x80"), "non ascii plain text"); 118 + ASSERT_ERR(load_single_snprintf("%\x80"), "non ascii in specifier"); 118 119 ASSERT_ERR(load_single_snprintf("\x1"), "non printable character"); 119 120 ASSERT_ERR(load_single_snprintf("%p%"), "invalid specifier 8"); 120 121 ASSERT_ERR(load_single_snprintf("%s%"), "invalid specifier 9");
+22 -6
tools/testing/selftests/bpf/prog_tests/trace_printk.c
··· 6 6 #include "trace_printk.lskel.h" 7 7 8 8 #define SEARCHMSG "testing,testing" 9 + #define SEARCHMSG_UTF8 "中文,测试" 9 10 10 11 static void trace_pipe_cb(const char *str, void *data) 11 12 { 12 13 if (strstr(str, SEARCHMSG) != NULL) 13 - (*(int *)data)++; 14 + ((int *)data)[0]++; 15 + if (strstr(str, SEARCHMSG_UTF8)) 16 + ((int *)data)[1]++; 14 17 } 15 18 16 19 void serial_test_trace_printk(void) 17 20 { 18 21 struct trace_printk_lskel__bss *bss; 19 22 struct trace_printk_lskel *skel; 20 - int err = 0, found = 0; 23 + int err = 0, found[2] = {}; 21 24 22 25 skel = trace_printk_lskel__open(); 23 26 if (!ASSERT_OK_PTR(skel, "trace_printk__open")) ··· 49 46 if (!ASSERT_GT(bss->trace_printk_ret, 0, "bss->trace_printk_ret")) 50 47 goto cleanup; 51 48 52 - /* verify our search string is in the trace buffer */ 53 - ASSERT_OK(read_trace_pipe_iter(trace_pipe_cb, &found, 1000), 54 - "read_trace_pipe_iter"); 49 + if (!ASSERT_GT(bss->trace_printk_utf8_ran, 0, "bss->trace_printk_utf8_ran")) 50 + goto cleanup; 55 51 56 - if (!ASSERT_EQ(found, bss->trace_printk_ran, "found")) 52 + if (!ASSERT_GT(bss->trace_printk_utf8_ret, 0, "bss->trace_printk_utf8_ret")) 53 + goto cleanup; 54 + 55 + if (!ASSERT_LT(bss->trace_printk_invalid_spec_ret, 0, 56 + "bss->trace_printk_invalid_spec_ret")) 57 + goto cleanup; 58 + 59 + /* verify our search strings are in the trace buffer */ 60 + ASSERT_OK(read_trace_pipe_iter(trace_pipe_cb, found, 1000), 61 + "read_trace_pipe_iter"); 62 + 63 + if (!ASSERT_EQ(found[0], bss->trace_printk_ran, "found")) 64 + goto cleanup; 65 + 66 + if (!ASSERT_EQ(found[1], bss->trace_printk_utf8_ran, "found_utf8")) 57 67 goto cleanup; 58 68 59 69 cleanup:
+10
tools/testing/selftests/bpf/progs/trace_printk.c
··· 10 10 11 11 int trace_printk_ret = 0; 12 12 int trace_printk_ran = 0; 13 + int trace_printk_invalid_spec_ret = 0; 14 + int trace_printk_utf8_ret = 0; 15 + int trace_printk_utf8_ran = 0; 13 16 14 17 const char fmt[] = "Testing,testing %d\n"; 18 + static const char utf8_fmt[] = "中文,测试 %d\n"; 19 + /* Non-ASCII bytes after '%' must still be rejected. */ 20 + static const char invalid_spec_fmt[] = "%\x80\n"; 15 21 16 22 SEC("fentry/" SYS_PREFIX "sys_nanosleep") 17 23 int sys_enter(void *ctx) 18 24 { 19 25 trace_printk_ret = bpf_trace_printk(fmt, sizeof(fmt), 20 26 ++trace_printk_ran); 27 + trace_printk_utf8_ret = bpf_trace_printk(utf8_fmt, sizeof(utf8_fmt), 28 + ++trace_printk_utf8_ran); 29 + trace_printk_invalid_spec_ret = bpf_trace_printk(invalid_spec_fmt, 30 + sizeof(invalid_spec_fmt)); 21 31 return 0; 22 32 }