Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at master 855 lines 24 kB view raw
1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * sigreturn.c - tests for x86 sigreturn(2) and exit-to-userspace 4 * Copyright (c) 2014-2015 Andrew Lutomirski 5 * 6 * This is a series of tests that exercises the sigreturn(2) syscall and 7 * the IRET / SYSRET paths in the kernel. 8 * 9 * For now, this focuses on the effects of unusual CS and SS values, 10 * and it has a bunch of tests to make sure that ESP/RSP is restored 11 * properly. 12 * 13 * The basic idea behind these tests is to raise(SIGUSR1) to create a 14 * sigcontext frame, plug in the values to be tested, and then return, 15 * which implicitly invokes sigreturn(2) and programs the user context 16 * as desired. 17 * 18 * For tests for which we expect sigreturn and the subsequent return to 19 * user mode to succeed, we return to a short trampoline that generates 20 * SIGTRAP so that the meat of the tests can be ordinary C code in a 21 * SIGTRAP handler. 22 * 23 * The inner workings of each test is documented below. 24 * 25 * Do not run on outdated, unpatched kernels at risk of nasty crashes. 26 */ 27 28#define _GNU_SOURCE 29 30#include <sys/time.h> 31#include <time.h> 32#include <stdlib.h> 33#include <sys/syscall.h> 34#include <unistd.h> 35#include <stdio.h> 36#include <string.h> 37#include <inttypes.h> 38#include <sys/mman.h> 39#include <sys/signal.h> 40#include <sys/ucontext.h> 41#include <asm/ldt.h> 42#include <err.h> 43#include <setjmp.h> 44#include <stddef.h> 45#include <stdbool.h> 46#include <sys/ptrace.h> 47#include <sys/user.h> 48 49#include "helpers.h" 50 51/* Pull in AR_xyz defines. */ 52typedef unsigned int u32; 53typedef unsigned short u16; 54#include "../../../../arch/x86/include/asm/desc_defs.h" 55 56/* 57 * Copied from asm/ucontext.h, as asm/ucontext.h conflicts badly with the glibc 58 * headers. 59 */ 60#ifdef __x86_64__ 61/* 62 * UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on 63 * kernels that save SS in the sigcontext. All kernels that set 64 * UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp 65 * regardless of SS (i.e. they implement espfix). 66 * 67 * Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS 68 * when delivering a signal that came from 64-bit code. 69 * 70 * Sigreturn restores SS as follows: 71 * 72 * if (saved SS is valid || UC_STRICT_RESTORE_SS is set || 73 * saved CS is not 64-bit) 74 * new SS = saved SS (will fail IRET and signal if invalid) 75 * else 76 * new SS = a flat 32-bit data segment 77 */ 78#define UC_SIGCONTEXT_SS 0x2 79#define UC_STRICT_RESTORE_SS 0x4 80#endif 81 82/* 83 * In principle, this test can run on Linux emulation layers (e.g. 84 * Illumos "LX branded zones"). Solaris-based kernels reserve LDT 85 * entries 0-5 for their own internal purposes, so start our LDT 86 * allocations above that reservation. (The tests don't pass on LX 87 * branded zones, but at least this lets them run.) 88 */ 89#define LDT_OFFSET 6 90 91/* An aligned stack accessible through some of our segments. */ 92static unsigned char stack16[65536] __attribute__((aligned(4096))); 93 94/* 95 * An aligned int3 instruction used as a trampoline. Some of the tests 96 * want to fish out their ss values, so this trampoline copies ss to eax 97 * before the int3. 98 */ 99asm (".pushsection .text\n\t" 100 ".type int3, @function\n\t" 101 ".align 4096\n\t" 102 "int3:\n\t" 103 "mov %ss,%ecx\n\t" 104 "int3\n\t" 105 ".size int3, . - int3\n\t" 106 ".align 4096, 0xcc\n\t" 107 ".popsection"); 108extern char int3[4096]; 109 110/* 111 * At startup, we prepapre: 112 * 113 * - ldt_nonexistent_sel: An LDT entry that doesn't exist (all-zero 114 * descriptor or out of bounds). 115 * - code16_sel: A 16-bit LDT code segment pointing to int3. 116 * - data16_sel: A 16-bit LDT data segment pointing to stack16. 117 * - npcode32_sel: A 32-bit not-present LDT code segment pointing to int3. 118 * - npdata32_sel: A 32-bit not-present LDT data segment pointing to stack16. 119 * - gdt_data16_idx: A 16-bit GDT data segment pointing to stack16. 120 * - gdt_npdata32_idx: A 32-bit not-present GDT data segment pointing to 121 * stack16. 122 * 123 * For no particularly good reason, xyz_sel is a selector value with the 124 * RPL and LDT bits filled in, whereas xyz_idx is just an index into the 125 * descriptor table. These variables will be zero if their respective 126 * segments could not be allocated. 127 */ 128static unsigned short ldt_nonexistent_sel; 129static unsigned short code16_sel, data16_sel, npcode32_sel, npdata32_sel; 130 131static unsigned short gdt_data16_idx, gdt_npdata32_idx; 132 133static unsigned short GDT3(int idx) 134{ 135 return (idx << 3) | 3; 136} 137 138static unsigned short LDT3(int idx) 139{ 140 return (idx << 3) | 7; 141} 142 143static void add_ldt(const struct user_desc *desc, unsigned short *var, 144 const char *name) 145{ 146 if (syscall(SYS_modify_ldt, 1, desc, sizeof(*desc)) == 0) { 147 *var = LDT3(desc->entry_number); 148 } else { 149 printf("[NOTE]\tFailed to create %s segment\n", name); 150 *var = 0; 151 } 152} 153 154static void setup_ldt(void) 155{ 156 if ((unsigned long)stack16 > (1ULL << 32) - sizeof(stack16)) 157 errx(1, "stack16 is too high\n"); 158 if ((unsigned long)int3 > (1ULL << 32) - sizeof(int3)) 159 errx(1, "int3 is too high\n"); 160 161 ldt_nonexistent_sel = LDT3(LDT_OFFSET + 2); 162 163 const struct user_desc code16_desc = { 164 .entry_number = LDT_OFFSET + 0, 165 .base_addr = (unsigned long)int3, 166 .limit = 4095, 167 .seg_32bit = 0, 168 .contents = 2, /* Code, not conforming */ 169 .read_exec_only = 0, 170 .limit_in_pages = 0, 171 .seg_not_present = 0, 172 .useable = 0 173 }; 174 add_ldt(&code16_desc, &code16_sel, "code16"); 175 176 const struct user_desc data16_desc = { 177 .entry_number = LDT_OFFSET + 1, 178 .base_addr = (unsigned long)stack16, 179 .limit = 0xffff, 180 .seg_32bit = 0, 181 .contents = 0, /* Data, grow-up */ 182 .read_exec_only = 0, 183 .limit_in_pages = 0, 184 .seg_not_present = 0, 185 .useable = 0 186 }; 187 add_ldt(&data16_desc, &data16_sel, "data16"); 188 189 const struct user_desc npcode32_desc = { 190 .entry_number = LDT_OFFSET + 3, 191 .base_addr = (unsigned long)int3, 192 .limit = 4095, 193 .seg_32bit = 1, 194 .contents = 2, /* Code, not conforming */ 195 .read_exec_only = 0, 196 .limit_in_pages = 0, 197 .seg_not_present = 1, 198 .useable = 0 199 }; 200 add_ldt(&npcode32_desc, &npcode32_sel, "npcode32"); 201 202 const struct user_desc npdata32_desc = { 203 .entry_number = LDT_OFFSET + 4, 204 .base_addr = (unsigned long)stack16, 205 .limit = 0xffff, 206 .seg_32bit = 1, 207 .contents = 0, /* Data, grow-up */ 208 .read_exec_only = 0, 209 .limit_in_pages = 0, 210 .seg_not_present = 1, 211 .useable = 0 212 }; 213 add_ldt(&npdata32_desc, &npdata32_sel, "npdata32"); 214 215 struct user_desc gdt_data16_desc = { 216 .entry_number = -1, 217 .base_addr = (unsigned long)stack16, 218 .limit = 0xffff, 219 .seg_32bit = 0, 220 .contents = 0, /* Data, grow-up */ 221 .read_exec_only = 0, 222 .limit_in_pages = 0, 223 .seg_not_present = 0, 224 .useable = 0 225 }; 226 227 if (syscall(SYS_set_thread_area, &gdt_data16_desc) == 0) { 228 /* 229 * This probably indicates vulnerability to CVE-2014-8133. 230 * Merely getting here isn't definitive, though, and we'll 231 * diagnose the problem for real later on. 232 */ 233 printf("[WARN]\tset_thread_area allocated data16 at index %d\n", 234 gdt_data16_desc.entry_number); 235 gdt_data16_idx = gdt_data16_desc.entry_number; 236 } else { 237 printf("[OK]\tset_thread_area refused 16-bit data\n"); 238 } 239 240 struct user_desc gdt_npdata32_desc = { 241 .entry_number = -1, 242 .base_addr = (unsigned long)stack16, 243 .limit = 0xffff, 244 .seg_32bit = 1, 245 .contents = 0, /* Data, grow-up */ 246 .read_exec_only = 0, 247 .limit_in_pages = 0, 248 .seg_not_present = 1, 249 .useable = 0 250 }; 251 252 if (syscall(SYS_set_thread_area, &gdt_npdata32_desc) == 0) { 253 /* 254 * As a hardening measure, newer kernels don't allow this. 255 */ 256 printf("[WARN]\tset_thread_area allocated npdata32 at index %d\n", 257 gdt_npdata32_desc.entry_number); 258 gdt_npdata32_idx = gdt_npdata32_desc.entry_number; 259 } else { 260 printf("[OK]\tset_thread_area refused 16-bit data\n"); 261 } 262} 263 264/* State used by our signal handlers. */ 265static gregset_t initial_regs, requested_regs, resulting_regs; 266 267/* Instructions for the SIGUSR1 handler. */ 268static volatile unsigned short sig_cs, sig_ss; 269static volatile sig_atomic_t sig_trapped, sig_err, sig_trapno; 270#ifdef __x86_64__ 271static volatile sig_atomic_t sig_corrupt_final_ss; 272#endif 273 274/* Abstractions for some 32-bit vs 64-bit differences. */ 275#ifdef __x86_64__ 276# define REG_IP REG_RIP 277# define REG_SP REG_RSP 278# define REG_CX REG_RCX 279 280struct selectors { 281 unsigned short cs, gs, fs, ss; 282}; 283 284static unsigned short *ssptr(ucontext_t *ctx) 285{ 286 struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS]; 287 return &sels->ss; 288} 289 290static unsigned short *csptr(ucontext_t *ctx) 291{ 292 struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS]; 293 return &sels->cs; 294} 295#else 296# define REG_IP REG_EIP 297# define REG_SP REG_ESP 298# define REG_CX REG_ECX 299 300static greg_t *ssptr(ucontext_t *ctx) 301{ 302 return &ctx->uc_mcontext.gregs[REG_SS]; 303} 304 305static greg_t *csptr(ucontext_t *ctx) 306{ 307 return &ctx->uc_mcontext.gregs[REG_CS]; 308} 309#endif 310 311/* 312 * Checks a given selector for its code bitness or returns -1 if it's not 313 * a usable code segment selector. 314 */ 315int cs_bitness(unsigned short cs) 316{ 317 uint32_t valid = 0, ar; 318 asm ("lar %[cs], %[ar]\n\t" 319 "jnz 1f\n\t" 320 "mov $1, %[valid]\n\t" 321 "1:" 322 : [ar] "=r" (ar), [valid] "+rm" (valid) 323 : [cs] "r" (cs)); 324 325 if (!valid) 326 return -1; 327 328 bool db = (ar & (1 << 22)); 329 bool l = (ar & (1 << 21)); 330 331 if (!(ar & (1<<11))) 332 return -1; /* Not code. */ 333 334 if (l && !db) 335 return 64; 336 else if (!l && db) 337 return 32; 338 else if (!l && !db) 339 return 16; 340 else 341 return -1; /* Unknown bitness. */ 342} 343 344/* 345 * Checks a given selector for its code bitness or returns -1 if it's not 346 * a usable code segment selector. 347 */ 348bool is_valid_ss(unsigned short cs) 349{ 350 uint32_t valid = 0, ar; 351 asm ("lar %[cs], %[ar]\n\t" 352 "jnz 1f\n\t" 353 "mov $1, %[valid]\n\t" 354 "1:" 355 : [ar] "=r" (ar), [valid] "+rm" (valid) 356 : [cs] "r" (cs)); 357 358 if (!valid) 359 return false; 360 361 if ((ar & AR_TYPE_MASK) != AR_TYPE_RWDATA && 362 (ar & AR_TYPE_MASK) != AR_TYPE_RWDATA_EXPDOWN) 363 return false; 364 365 return (ar & AR_P); 366} 367 368/* Number of errors in the current test case. */ 369static volatile sig_atomic_t nerrs; 370 371static void validate_signal_ss(int sig, ucontext_t *ctx) 372{ 373#ifdef __x86_64__ 374 bool was_64bit = (cs_bitness(*csptr(ctx)) == 64); 375 376 if (!(ctx->uc_flags & UC_SIGCONTEXT_SS)) { 377 printf("[FAIL]\tUC_SIGCONTEXT_SS was not set\n"); 378 nerrs++; 379 380 /* 381 * This happens on Linux 4.1. The rest will fail, too, so 382 * return now to reduce the noise. 383 */ 384 return; 385 } 386 387 /* UC_STRICT_RESTORE_SS is set iff we came from 64-bit mode. */ 388 if (!!(ctx->uc_flags & UC_STRICT_RESTORE_SS) != was_64bit) { 389 printf("[FAIL]\tUC_STRICT_RESTORE_SS was wrong in signal %d\n", 390 sig); 391 nerrs++; 392 } 393 394 if (is_valid_ss(*ssptr(ctx))) { 395 /* 396 * DOSEMU was written before 64-bit sigcontext had SS, and 397 * it tries to figure out the signal source SS by looking at 398 * the physical register. Make sure that keeps working. 399 */ 400 unsigned short hw_ss; 401 asm ("mov %%ss, %0" : "=rm" (hw_ss)); 402 if (hw_ss != *ssptr(ctx)) { 403 printf("[FAIL]\tHW SS didn't match saved SS\n"); 404 nerrs++; 405 } 406 } 407#endif 408} 409 410/* 411 * SIGUSR1 handler. Sets CS and SS as requested and points IP to the 412 * int3 trampoline. Sets SP to a large known value so that we can see 413 * whether the value round-trips back to user mode correctly. 414 */ 415static void sigusr1(int sig, siginfo_t *info, void *ctx_void) 416{ 417 ucontext_t *ctx = (ucontext_t*)ctx_void; 418 419 validate_signal_ss(sig, ctx); 420 421 memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); 422 423 *csptr(ctx) = sig_cs; 424 *ssptr(ctx) = sig_ss; 425 426 ctx->uc_mcontext.gregs[REG_IP] = 427 sig_cs == code16_sel ? 0 : (unsigned long)&int3; 428 ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL; 429 ctx->uc_mcontext.gregs[REG_CX] = 0; 430 431#ifdef __i386__ 432 /* 433 * Make sure the kernel doesn't inadvertently use DS or ES-relative 434 * accesses in a region where user DS or ES is loaded. 435 * 436 * Skip this for 64-bit builds because long mode doesn't care about 437 * DS and ES and skipping it increases test coverage a little bit, 438 * since 64-bit kernels can still run the 32-bit build. 439 */ 440 ctx->uc_mcontext.gregs[REG_DS] = 0; 441 ctx->uc_mcontext.gregs[REG_ES] = 0; 442#endif 443 444 memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); 445 requested_regs[REG_CX] = *ssptr(ctx); /* The asm code does this. */ 446 447 return; 448} 449 450/* 451 * Called after a successful sigreturn (via int3) or from a failed 452 * sigreturn (directly by kernel). Restores our state so that the 453 * original raise(SIGUSR1) returns. 454 */ 455static void sigtrap(int sig, siginfo_t *info, void *ctx_void) 456{ 457 ucontext_t *ctx = (ucontext_t*)ctx_void; 458 459 validate_signal_ss(sig, ctx); 460 461 sig_err = ctx->uc_mcontext.gregs[REG_ERR]; 462 sig_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO]; 463 464 unsigned short ss; 465 asm ("mov %%ss,%0" : "=r" (ss)); 466 467 greg_t asm_ss = ctx->uc_mcontext.gregs[REG_CX]; 468 if (asm_ss != sig_ss && sig == SIGTRAP) { 469 /* Sanity check failure. */ 470 printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %x, ax = %llx\n", 471 ss, *ssptr(ctx), (unsigned long long)asm_ss); 472 nerrs++; 473 } 474 475 memcpy(&resulting_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); 476 memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t)); 477 478#ifdef __x86_64__ 479 if (sig_corrupt_final_ss) { 480 if (ctx->uc_flags & UC_STRICT_RESTORE_SS) { 481 printf("[FAIL]\tUC_STRICT_RESTORE_SS was set inappropriately\n"); 482 nerrs++; 483 } else { 484 /* 485 * DOSEMU transitions from 32-bit to 64-bit mode by 486 * adjusting sigcontext, and it requires that this work 487 * even if the saved SS is bogus. 488 */ 489 printf("\tCorrupting SS on return to 64-bit mode\n"); 490 *ssptr(ctx) = 0; 491 } 492 } 493#endif 494 495 sig_trapped = sig; 496} 497 498#ifdef __x86_64__ 499/* Tests recovery if !UC_STRICT_RESTORE_SS */ 500static void sigusr2(int sig, siginfo_t *info, void *ctx_void) 501{ 502 ucontext_t *ctx = (ucontext_t*)ctx_void; 503 504 if (!(ctx->uc_flags & UC_STRICT_RESTORE_SS)) { 505 printf("[FAIL]\traise(2) didn't set UC_STRICT_RESTORE_SS\n"); 506 nerrs++; 507 return; /* We can't do the rest. */ 508 } 509 510 ctx->uc_flags &= ~UC_STRICT_RESTORE_SS; 511 *ssptr(ctx) = 0; 512 513 /* Return. The kernel should recover without sending another signal. */ 514} 515 516static int test_nonstrict_ss(void) 517{ 518 clearhandler(SIGUSR1); 519 clearhandler(SIGTRAP); 520 clearhandler(SIGSEGV); 521 clearhandler(SIGILL); 522 sethandler(SIGUSR2, sigusr2, 0); 523 524 nerrs = 0; 525 526 printf("[RUN]\tClear UC_STRICT_RESTORE_SS and corrupt SS\n"); 527 raise(SIGUSR2); 528 if (!nerrs) 529 printf("[OK]\tIt worked\n"); 530 531 return nerrs; 532} 533#endif 534 535/* Finds a usable code segment of the requested bitness. */ 536int find_cs(int bitness) 537{ 538 unsigned short my_cs; 539 540 asm ("mov %%cs,%0" : "=r" (my_cs)); 541 542 if (cs_bitness(my_cs) == bitness) 543 return my_cs; 544 if (cs_bitness(my_cs + (2 << 3)) == bitness) 545 return my_cs + (2 << 3); 546 if (my_cs > (2<<3) && cs_bitness(my_cs - (2 << 3)) == bitness) 547 return my_cs - (2 << 3); 548 if (cs_bitness(code16_sel) == bitness) 549 return code16_sel; 550 551 printf("[WARN]\tCould not find %d-bit CS\n", bitness); 552 return -1; 553} 554 555static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss) 556{ 557 int cs = find_cs(cs_bits); 558 if (cs == -1) { 559 printf("[SKIP]\tCode segment unavailable for %d-bit CS, %d-bit SS\n", 560 cs_bits, use_16bit_ss ? 16 : 32); 561 return 0; 562 } 563 564 if (force_ss != -1) { 565 sig_ss = force_ss; 566 } else { 567 if (use_16bit_ss) { 568 if (!data16_sel) { 569 printf("[SKIP]\tData segment unavailable for %d-bit CS, 16-bit SS\n", 570 cs_bits); 571 return 0; 572 } 573 sig_ss = data16_sel; 574 } else { 575 asm volatile ("mov %%ss,%0" : "=r" (sig_ss)); 576 } 577 } 578 579 sig_cs = cs; 580 581 printf("[RUN]\tValid sigreturn: %d-bit CS (%hx), %d-bit SS (%hx%s)\n", 582 cs_bits, sig_cs, use_16bit_ss ? 16 : 32, sig_ss, 583 (sig_ss & 4) ? "" : ", GDT"); 584 585 raise(SIGUSR1); 586 587 nerrs = 0; 588 589 /* 590 * Check that each register had an acceptable value when the 591 * int3 trampoline was invoked. 592 */ 593 for (int i = 0; i < NGREG; i++) { 594 greg_t req = requested_regs[i], res = resulting_regs[i]; 595 596 if (i == REG_TRAPNO || i == REG_IP) 597 continue; /* don't care */ 598 599 if (i == REG_SP) { 600 /* 601 * If we were using a 16-bit stack segment, then 602 * the kernel is a bit stuck: IRET only restores 603 * the low 16 bits of ESP/RSP if SS is 16-bit. 604 * The kernel uses a hack to restore bits 31:16, 605 * but that hack doesn't help with bits 63:32. 606 * On Intel CPUs, bits 63:32 end up zeroed, and, on 607 * AMD CPUs, they leak the high bits of the kernel 608 * espfix64 stack pointer. There's very little that 609 * the kernel can do about it. 610 * 611 * Similarly, if we are returning to a 32-bit context, 612 * the CPU will often lose the high 32 bits of RSP. 613 */ 614 615 if (res == req) 616 continue; 617 618 if (cs_bits != 64 && ((res ^ req) & 0xFFFFFFFF) == 0) { 619 printf("[NOTE]\tSP: %llx -> %llx\n", 620 (unsigned long long)req, 621 (unsigned long long)res); 622 continue; 623 } 624 625 printf("[FAIL]\tSP mismatch: requested 0x%llx; got 0x%llx\n", 626 (unsigned long long)requested_regs[i], 627 (unsigned long long)resulting_regs[i]); 628 nerrs++; 629 continue; 630 } 631 632 bool ignore_reg = false; 633#if __i386__ 634 if (i == REG_UESP) 635 ignore_reg = true; 636#else 637 if (i == REG_CSGSFS) { 638 struct selectors *req_sels = 639 (void *)&requested_regs[REG_CSGSFS]; 640 struct selectors *res_sels = 641 (void *)&resulting_regs[REG_CSGSFS]; 642 if (req_sels->cs != res_sels->cs) { 643 printf("[FAIL]\tCS mismatch: requested 0x%hx; got 0x%hx\n", 644 req_sels->cs, res_sels->cs); 645 nerrs++; 646 } 647 648 if (req_sels->ss != res_sels->ss) { 649 printf("[FAIL]\tSS mismatch: requested 0x%hx; got 0x%hx\n", 650 req_sels->ss, res_sels->ss); 651 nerrs++; 652 } 653 654 continue; 655 } 656#endif 657 658 /* Sanity check on the kernel */ 659 if (i == REG_CX && req != res) { 660 printf("[FAIL]\tCX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n", 661 (unsigned long long)req, 662 (unsigned long long)res); 663 nerrs++; 664 continue; 665 } 666 667 if (req != res && !ignore_reg) { 668 printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n", 669 i, (unsigned long long)req, 670 (unsigned long long)res); 671 nerrs++; 672 } 673 } 674 675 if (nerrs == 0) 676 printf("[OK]\tall registers okay\n"); 677 678 return nerrs; 679} 680 681static int test_bad_iret(int cs_bits, unsigned short ss, int force_cs) 682{ 683 int cs = force_cs == -1 ? find_cs(cs_bits) : force_cs; 684 if (cs == -1) 685 return 0; 686 687 sig_cs = cs; 688 sig_ss = ss; 689 690 printf("[RUN]\t%d-bit CS (%hx), bogus SS (%hx)\n", 691 cs_bits, sig_cs, sig_ss); 692 693 sig_trapped = 0; 694 raise(SIGUSR1); 695 if (sig_trapped) { 696 char errdesc[32] = ""; 697 if (sig_err) { 698 const char *src = (sig_err & 1) ? " EXT" : ""; 699 const char *table; 700 if ((sig_err & 0x6) == 0x0) 701 table = "GDT"; 702 else if ((sig_err & 0x6) == 0x4) 703 table = "LDT"; 704 else if ((sig_err & 0x6) == 0x2) 705 table = "IDT"; 706 else 707 table = "???"; 708 709 sprintf(errdesc, "%s%s index %d, ", 710 table, src, sig_err >> 3); 711 } 712 713 char trapname[32]; 714 if (sig_trapno == 13) 715 strcpy(trapname, "GP"); 716 else if (sig_trapno == 11) 717 strcpy(trapname, "NP"); 718 else if (sig_trapno == 12) 719 strcpy(trapname, "SS"); 720 else if (sig_trapno == 32) 721 strcpy(trapname, "IRET"); /* X86_TRAP_IRET */ 722 else 723 sprintf(trapname, "%d", sig_trapno); 724 725 printf("[OK]\tGot #%s(0x%lx) (i.e. %s%s)\n", 726 trapname, (unsigned long)sig_err, 727 errdesc, strsignal(sig_trapped)); 728 return 0; 729 } else { 730 /* 731 * This also implicitly tests UC_STRICT_RESTORE_SS: 732 * We check that these signals set UC_STRICT_RESTORE_SS and, 733 * if UC_STRICT_RESTORE_SS doesn't cause strict behavior, 734 * then we won't get SIGSEGV. 735 */ 736 printf("[FAIL]\tDid not get SIGSEGV\n"); 737 return 1; 738 } 739} 740 741int main() 742{ 743 int total_nerrs = 0; 744 unsigned short my_cs, my_ss; 745 746 asm volatile ("mov %%cs,%0" : "=r" (my_cs)); 747 asm volatile ("mov %%ss,%0" : "=r" (my_ss)); 748 setup_ldt(); 749 750 stack_t stack = { 751 /* Our sigaltstack scratch space. */ 752 .ss_sp = malloc(sizeof(char) * SIGSTKSZ), 753 .ss_size = SIGSTKSZ, 754 }; 755 if (sigaltstack(&stack, NULL) != 0) 756 err(1, "sigaltstack"); 757 758 sethandler(SIGUSR1, sigusr1, 0); 759 sethandler(SIGTRAP, sigtrap, SA_ONSTACK); 760 761 /* Easy cases: return to a 32-bit SS in each possible CS bitness. */ 762 total_nerrs += test_valid_sigreturn(64, false, -1); 763 total_nerrs += test_valid_sigreturn(32, false, -1); 764 total_nerrs += test_valid_sigreturn(16, false, -1); 765 766 /* 767 * Test easy espfix cases: return to a 16-bit LDT SS in each possible 768 * CS bitness. NB: with a long mode CS, the SS bitness is irrelevant. 769 * 770 * This catches the original missing-espfix-on-64-bit-kernels issue 771 * as well as CVE-2014-8134. 772 */ 773 total_nerrs += test_valid_sigreturn(64, true, -1); 774 total_nerrs += test_valid_sigreturn(32, true, -1); 775 total_nerrs += test_valid_sigreturn(16, true, -1); 776 777 if (gdt_data16_idx) { 778 /* 779 * For performance reasons, Linux skips espfix if SS points 780 * to the GDT. If we were able to allocate a 16-bit SS in 781 * the GDT, see if it leaks parts of the kernel stack pointer. 782 * 783 * This tests for CVE-2014-8133. 784 */ 785 total_nerrs += test_valid_sigreturn(64, true, 786 GDT3(gdt_data16_idx)); 787 total_nerrs += test_valid_sigreturn(32, true, 788 GDT3(gdt_data16_idx)); 789 total_nerrs += test_valid_sigreturn(16, true, 790 GDT3(gdt_data16_idx)); 791 } 792 793#ifdef __x86_64__ 794 /* Nasty ABI case: check SS corruption handling. */ 795 sig_corrupt_final_ss = 1; 796 total_nerrs += test_valid_sigreturn(32, false, -1); 797 total_nerrs += test_valid_sigreturn(32, true, -1); 798 sig_corrupt_final_ss = 0; 799#endif 800 801 /* 802 * We're done testing valid sigreturn cases. Now we test states 803 * for which sigreturn itself will succeed but the subsequent 804 * entry to user mode will fail. 805 * 806 * Depending on the failure mode and the kernel bitness, these 807 * entry failures can generate SIGSEGV, SIGBUS, or SIGILL. 808 */ 809 clearhandler(SIGTRAP); 810 sethandler(SIGSEGV, sigtrap, SA_ONSTACK); 811 sethandler(SIGBUS, sigtrap, SA_ONSTACK); 812 sethandler(SIGILL, sigtrap, SA_ONSTACK); /* 32-bit kernels do this */ 813 814 /* Easy failures: invalid SS, resulting in #GP(0) */ 815 test_bad_iret(64, ldt_nonexistent_sel, -1); 816 test_bad_iret(32, ldt_nonexistent_sel, -1); 817 test_bad_iret(16, ldt_nonexistent_sel, -1); 818 819 /* These fail because SS isn't a data segment, resulting in #GP(SS) */ 820 test_bad_iret(64, my_cs, -1); 821 test_bad_iret(32, my_cs, -1); 822 test_bad_iret(16, my_cs, -1); 823 824 /* Try to return to a not-present code segment, triggering #NP(SS). */ 825 test_bad_iret(32, my_ss, npcode32_sel); 826 827 /* 828 * Try to return to a not-present but otherwise valid data segment. 829 * This will cause IRET to fail with #SS on the espfix stack. This 830 * exercises CVE-2014-9322. 831 * 832 * Note that, if espfix is enabled, 64-bit Linux will lose track 833 * of the actual cause of failure and report #GP(0) instead. 834 * This would be very difficult for Linux to avoid, because 835 * espfix64 causes IRET failures to be promoted to #DF, so the 836 * original exception frame is never pushed onto the stack. 837 */ 838 test_bad_iret(32, npdata32_sel, -1); 839 840 /* 841 * Try to return to a not-present but otherwise valid data 842 * segment without invoking espfix. Newer kernels don't allow 843 * this to happen in the first place. On older kernels, though, 844 * this can trigger CVE-2014-9322. 845 */ 846 if (gdt_npdata32_idx) 847 test_bad_iret(32, GDT3(gdt_npdata32_idx), -1); 848 849#ifdef __x86_64__ 850 total_nerrs += test_nonstrict_ss(); 851#endif 852 853 free(stack.ss_sp); 854 return total_nerrs ? 1 : 0; 855}