Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

x86_64/bug: Implement __WARN_printf()

The basic idea is to have __WARN_printf() be a vararg function such
that the compiler can do the optimal calling convention for us. This
function body will be a #UD and then set up a va_list in the exception
from pt_regs.

But because the trap will be in a called function, the bug_entry must
be passed in. Have that be the first argument, with the format tucked
away inside the bug_entry.

The comments should clarify the real fun details.

The big downside is that all WARNs will now show:

RIP: 0010:__WARN_trap:+0

One possible solution is to simply discard the top frame when
unwinding. A follow up patch takes care of this slightly differently
by abusing the x86 static_call implementation.

This changes (with the next patches):

WARN_ONCE(preempt_count() != 2*PREEMPT_DISABLE_OFFSET,
"corrupted preempt_count: %s/%d/0x%x\n",

from:

cmpl $2, %ecx #, _7
jne .L1472
...

.L1472:
cmpb $0, __already_done.11(%rip)
je .L1513
...

.L1513
movb $1, __already_done.11(%rip)
movl 1424(%r14), %edx # _15->pid, _15->pid
leaq 1912(%r14), %rsi #, _17
movq $.LC43, %rdi #,
call __warn_printk #
ud2
.pushsection __bug_table,"aw"
2:
.long 1b - . # bug_entry::bug_addr
.long .LC1 - . # bug_entry::file
.word 5093 # bug_entry::line
.word 2313 # bug_entry::flags
.org 2b + 12
.popsection
.pushsection .discard.annotate_insn,"M", @progbits, 8
.long 1b - .
.long 8 # ANNOTYPE_REACHABLE
.popsection

into:

cmpl $2, %ecx #, _7
jne .L1442 #,
...

.L1442:
lea (2f)(%rip), %rdi
1:
.pushsection __bug_table,"aw"
2:
.long 1b - . # bug_entry::bug_addr
.long .LC43 - . # bug_entry::format
.long .LC1 - . # bug_entry::file
.word 5093 # bug_entry::line
.word 2323 # bug_entry::flags
.org 2b + 16
.popsection
movl 1424(%r14), %edx # _19->pid, _19->pid
leaq 1912(%r14), %rsi #, _13
ud1 (%edx), %rdi

Notably, by pushing everything into the exception handler it can take
care of the ONCE thing.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20251110115758.213813530@infradead.org

+171 -16
+8
arch/x86/entry/entry.S
··· 32 32 /* For KVM */ 33 33 EXPORT_SYMBOL_GPL(write_ibpb); 34 34 35 + SYM_FUNC_START(__WARN_trap) 36 + ANNOTATE_NOENDBR 37 + ANNOTATE_REACHABLE 38 + ud1 (%edx), %_ASM_ARG1 39 + RET 40 + SYM_FUNC_END(__WARN_trap) 41 + EXPORT_SYMBOL(__WARN_trap) 42 + 35 43 .popsection 36 44 37 45 /*
+58 -4
arch/x86/include/asm/bug.h
··· 7 7 #include <linux/objtool.h> 8 8 #include <asm/asm.h> 9 9 10 + #ifndef __ASSEMBLY__ 11 + struct bug_entry; 12 + extern void __WARN_trap(struct bug_entry *bug, ...); 13 + #endif 14 + 10 15 /* 11 16 * Despite that some emulators terminate on UD2, we use it for WARN(). 12 17 */ ··· 36 31 #define BUG_UD2 0xfffe 37 32 #define BUG_UD1 0xfffd 38 33 #define BUG_UD1_UBSAN 0xfffc 34 + #define BUG_UD1_WARN 0xfffb 39 35 #define BUG_UDB 0xffd6 40 36 #define BUG_LOCK 0xfff0 41 37 ··· 64 58 #define __BUG_ENTRY_FORMAT(format) 65 59 #endif 66 60 61 + #ifdef CONFIG_X86_64 62 + #define HAVE_ARCH_BUG_FORMAT_ARGS 63 + #endif 64 + 67 65 #define __BUG_ENTRY(format, file, line, flags) \ 68 66 __BUG_REL("1b") "\t# bug_entry::bug_addr\n" \ 69 67 __BUG_ENTRY_FORMAT(format) \ 70 68 __BUG_ENTRY_VERBOSE(file, line) \ 71 69 "\t.word " flags "\t# bug_entry::flags\n" 72 70 73 - #define _BUG_FLAGS_ASM(ins, format, file, line, flags, size, extra) \ 74 - "1:\t" ins "\n" \ 71 + #define _BUG_FLAGS_ASM(format, file, line, flags, size, extra) \ 75 72 ".pushsection __bug_table,\"aw\"\n\t" \ 76 73 ANNOTATE_DATA_SPECIAL \ 77 74 "2:\n\t" \ ··· 91 82 92 83 #define _BUG_FLAGS(cond_str, ins, flags, extra) \ 93 84 do { \ 94 - asm_inline volatile(_BUG_FLAGS_ASM(ins, "%c[fmt]", "%c[file]", \ 85 + asm_inline volatile("1:\t" ins "\n" \ 86 + _BUG_FLAGS_ASM("%c[fmt]", "%c[file]", \ 95 87 "%c[line]", "%c[fl]", \ 96 88 "%c[size]", extra) \ 97 89 : : [fmt] "i" (WARN_CONDITION_STR(cond_str)), \ ··· 103 93 } while (0) 104 94 105 95 #define ARCH_WARN_ASM(file, line, flags, size) \ 106 - _BUG_FLAGS_ASM(ASM_UD2, "0", file, line, flags, size, "") 96 + "1:\t " ASM_UD2 "\n" \ 97 + _BUG_FLAGS_ASM("0", file, line, flags, size, "") 107 98 108 99 #else 109 100 ··· 136 125 _BUG_FLAGS(cond_str, ASM_UD2, __flags, ARCH_WARN_REACHABLE); \ 137 126 instrumentation_end(); \ 138 127 } while (0) 128 + 129 + #ifdef HAVE_ARCH_BUG_FORMAT_ARGS 130 + 131 + #ifndef __ASSEMBLY__ 132 + struct pt_regs; 133 + struct sysv_va_list { /* from AMD64 System V ABI */ 134 + unsigned int gp_offset; 135 + unsigned int fp_offset; 136 + void *overflow_arg_area; 137 + void *reg_save_area; 138 + }; 139 + struct arch_va_list { 140 + unsigned long regs[6]; 141 + struct sysv_va_list args; 142 + }; 143 + extern void *__warn_args(struct arch_va_list *args, struct pt_regs *regs); 144 + #endif /* __ASSEMBLY__ */ 145 + 146 + #define __WARN_bug_entry(flags, format) ({ \ 147 + struct bug_entry *bug; \ 148 + asm_inline volatile("lea (2f)(%%rip), %[addr]\n1:\n" \ 149 + _BUG_FLAGS_ASM("%c[fmt]", "%c[file]", \ 150 + "%c[line]", "%c[fl]", \ 151 + "%c[size]", "") \ 152 + : [addr] "=r" (bug) \ 153 + : [fmt] "i" (format), \ 154 + [file] "i" (__FILE__), \ 155 + [line] "i" (__LINE__), \ 156 + [fl] "i" (flags), \ 157 + [size] "i" (sizeof(struct bug_entry))); \ 158 + bug; }) 159 + 160 + #define __WARN_print_arg(flags, format, arg...) \ 161 + do { \ 162 + int __flags = (flags) | BUGFLAG_WARNING | BUGFLAG_ARGS ; \ 163 + __WARN_trap(__WARN_bug_entry(__flags, format), ## arg); \ 164 + asm (""); /* inhibit tail-call optimization */ \ 165 + } while (0) 166 + 167 + #define __WARN_printf(taint, fmt, arg...) \ 168 + __WARN_print_arg(BUGFLAG_TAINT(taint), fmt, ## arg) 169 + 170 + #endif /* HAVE_ARCH_BUG_FORMAT_ARGS */ 139 171 140 172 #include <asm-generic/bug.h> 141 173
+105 -12
arch/x86/kernel/traps.c
··· 102 102 * UBSan{0}: 67 0f b9 00 ud1 (%eax),%eax 103 103 * UBSan{10}: 67 0f b9 40 10 ud1 0x10(%eax),%eax 104 104 * static_call: 0f b9 cc ud1 %esp,%ecx 105 + * __WARN_trap: 67 48 0f b9 3a ud1 (%edx),%reg 105 106 * 106 - * Notably UBSAN uses EAX, static_call uses ECX. 107 + * Notable, since __WARN_trap can use all registers, the distinction between 108 + * UD1 users is through R/M. 107 109 */ 108 110 __always_inline int decode_bug(unsigned long addr, s32 *imm, int *len) 109 111 { 110 112 unsigned long start = addr; 113 + u8 v, reg, rm, rex = 0; 114 + int type = BUG_UD1; 111 115 bool lock = false; 112 - u8 v; 113 116 114 117 if (addr < TASK_SIZE_MAX) 115 118 return BUG_NONE; 116 119 117 - v = *(u8 *)(addr++); 118 - if (v == INSN_ASOP) 120 + for (;;) { 119 121 v = *(u8 *)(addr++); 122 + if (v == INSN_ASOP) 123 + continue; 120 124 121 - if (v == INSN_LOCK) { 122 - lock = true; 123 - v = *(u8 *)(addr++); 125 + if (v == INSN_LOCK) { 126 + lock = true; 127 + continue; 128 + } 129 + 130 + if ((v & 0xf0) == 0x40) { 131 + rex = v; 132 + continue; 133 + } 134 + 135 + break; 124 136 } 125 137 126 138 switch (v) { ··· 168 156 if (X86_MODRM_MOD(v) != 3 && X86_MODRM_RM(v) == 4) 169 157 addr++; /* SIB */ 170 158 159 + reg = X86_MODRM_REG(v) + 8*!!X86_REX_R(rex); 160 + rm = X86_MODRM_RM(v) + 8*!!X86_REX_B(rex); 161 + 171 162 /* Decode immediate, if present */ 172 163 switch (X86_MODRM_MOD(v)) { 173 164 case 0: if (X86_MODRM_RM(v) == 5) 174 - addr += 4; /* RIP + disp32 */ 165 + addr += 4; /* RIP + disp32 */ 166 + 167 + if (rm == 0) /* (%eax) */ 168 + type = BUG_UD1_UBSAN; 169 + 170 + if (rm == 2) { /* (%edx) */ 171 + *imm = reg; 172 + type = BUG_UD1_WARN; 173 + } 175 174 break; 176 175 177 176 case 1: *imm = *(s8 *)addr; 178 177 addr += 1; 178 + if (rm == 0) /* (%eax) */ 179 + type = BUG_UD1_UBSAN; 179 180 break; 180 181 181 182 case 2: *imm = *(s32 *)addr; 182 183 addr += 4; 184 + if (rm == 0) /* (%eax) */ 185 + type = BUG_UD1_UBSAN; 183 186 break; 184 187 185 188 case 3: break; ··· 203 176 /* record instruction length */ 204 177 *len = addr - start; 205 178 206 - if (X86_MODRM_REG(v) == 0) /* EAX */ 207 - return BUG_UD1_UBSAN; 208 - 209 - return BUG_UD1; 179 + return type; 210 180 } 211 181 182 + static inline unsigned long pt_regs_val(struct pt_regs *regs, int nr) 183 + { 184 + int offset = pt_regs_offset(regs, nr); 185 + if (WARN_ON_ONCE(offset < -0)) 186 + return 0; 187 + return *((unsigned long *)((void *)regs + offset)); 188 + } 189 + 190 + #ifdef HAVE_ARCH_BUG_FORMAT_ARGS 191 + /* 192 + * Create a va_list from an exception context. 193 + */ 194 + void *__warn_args(struct arch_va_list *args, struct pt_regs *regs) 195 + { 196 + /* 197 + * Register save area; populate with function call argument registers 198 + */ 199 + args->regs[0] = regs->di; 200 + args->regs[1] = regs->si; 201 + args->regs[2] = regs->dx; 202 + args->regs[3] = regs->cx; 203 + args->regs[4] = regs->r8; 204 + args->regs[5] = regs->r9; 205 + 206 + /* 207 + * From the ABI document: 208 + * 209 + * @gp_offset - the element holds the offset in bytes from 210 + * reg_save_area to the place where the next available general purpose 211 + * argument register is saved. In case all argument registers have 212 + * been exhausted, it is set to the value 48 (6*8). 213 + * 214 + * @fp_offset - the element holds the offset in bytes from 215 + * reg_save_area to the place where the next available floating point 216 + * argument is saved. In case all argument registers have been 217 + * exhausted, it is set to the value 176 (6*8 + 8*16) 218 + * 219 + * @overflow_arg_area - this pointer is used to fetch arguments passed 220 + * on the stack. It is initialized with the address of the first 221 + * argument passed on the stack, if any, and then always updated to 222 + * point to the start of the next argument on the stack. 223 + * 224 + * @reg_save_area - the element points to the start of the register 225 + * save area. 226 + * 227 + * Notably the vararg starts with the second argument and there are no 228 + * floating point arguments in the kernel. 229 + */ 230 + args->args.gp_offset = 1*8; 231 + args->args.fp_offset = 6*8 + 8*16; 232 + args->args.reg_save_area = &args->regs; 233 + args->args.overflow_arg_area = (void *)regs->sp; 234 + 235 + /* 236 + * If the exception came from __WARN_trap, there is a return 237 + * address on the stack, skip that. This is why any __WARN_trap() 238 + * caller must inhibit tail-call optimization. 239 + */ 240 + if ((void *)regs->ip == &__WARN_trap) 241 + args->args.overflow_arg_area += 8; 242 + 243 + return &args->args; 244 + } 245 + #endif /* HAVE_ARCH_BUG_FORMAT */ 212 246 213 247 static nokprobe_inline int 214 248 do_trap_no_signal(struct task_struct *tsk, int trapnr, const char *str, ··· 422 334 raw_local_irq_enable(); 423 335 424 336 switch (ud_type) { 337 + case BUG_UD1_WARN: 338 + if (report_bug_entry((void *)pt_regs_val(regs, ud_imm), regs) == BUG_TRAP_TYPE_WARN) 339 + handled = true; 340 + break; 341 + 425 342 case BUG_UD2: 426 343 if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN) { 427 344 handled = true;