Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

coredump: allow for flexible coredump handling

Extend the coredump socket to allow the coredump server to tell the
kernel how to process individual coredumps.

When the crashing task connects to the coredump socket the kernel will
send a struct coredump_req to the coredump server. The kernel will set
the size member of struct coredump_req allowing the coredump server how
much data can be read.

The coredump server uses MSG_PEEK to peek the size of struct
coredump_req. If the kernel uses a newer struct coredump_req the
coredump server just reads the size it knows and discard any remaining
bytes in the buffer. If the kernel uses an older struct coredump_req
the coredump server just reads the size the kernel knows.

The returned struct coredump_req will inform the coredump server what
features the kernel supports. The coredump_req->mask member is set to
the currently know features.

The coredump server may only use features whose bits were raised by the
kernel in coredump_req->mask.

In response to a coredump_req from the kernel the coredump server sends
a struct coredump_ack to the kernel. The kernel informs the coredump
server what version of struct coredump_ack it supports by setting struct
coredump_req->size_ack to the size it knows about. The coredump server
may only send as many bytes as coredump_req->size_ack indicates (a
smaller size is fine of course). The coredump server must set
coredump_ack->size accordingly.

The coredump server sets the features it wants to use in struct
coredump_ack->mask. Only bits returned in struct coredump_req->mask may
be used.

In case an invalid struct coredump_ack is sent to the kernel a non-zero
u32 integer is sent indicating the reason for the failure. If it was
successful a zero u32 integer is sent.

In the initial version the following features are supported in
coredump_{req,ack}->mask:

* COREDUMP_KERNEL
The kernel will write the coredump data to the socket.

* COREDUMP_USERSPACE
The kernel will not write coredump data but will indicate to the
parent that a coredump has been generated. This is used when userspace
generates its own coredumps.

* COREDUMP_REJECT
The kernel will skip generating a coredump for this task.

* COREDUMP_WAIT
The kernel will prevent the task from exiting until the coredump
server has shutdown the socket connection.

The flexible coredump socket can be enabled by using the "@@" prefix
instead of the single "@" prefix for the regular coredump socket:

@@/run/systemd/coredump.socket

will enable flexible coredump handling. Current kernels already enforce
that "@" must be followed by "/" and will reject anything else. So
extending this is backward and forward compatible.

Link: https://lore.kernel.org/20250603-work-coredump-socket-protocol-v2-1-05a5f0c18ecc@kernel.org
Acked-by: Lennart Poettering <lennart@poettering.net>
Reviewed-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>

+272 -27
+168 -27
fs/coredump.c
··· 51 51 #include <net/sock.h> 52 52 #include <uapi/linux/pidfd.h> 53 53 #include <uapi/linux/un.h> 54 + #include <uapi/linux/coredump.h> 54 55 55 56 #include <linux/uaccess.h> 56 57 #include <asm/mmu_context.h> ··· 84 83 unsigned int core_file_note_size_limit = CORE_FILE_NOTE_SIZE_DEFAULT; 85 84 86 85 enum coredump_type_t { 87 - COREDUMP_FILE = 1, 88 - COREDUMP_PIPE = 2, 89 - COREDUMP_SOCK = 3, 86 + COREDUMP_FILE = 1, 87 + COREDUMP_PIPE = 2, 88 + COREDUMP_SOCK = 3, 89 + COREDUMP_SOCK_REQ = 4, 90 90 }; 91 91 92 92 struct core_name { 93 93 char *corename; 94 94 int used, size; 95 95 enum coredump_type_t core_type; 96 + u64 mask; 96 97 }; 97 98 98 99 static int expand_corename(struct core_name *cn, int size) ··· 238 235 int pid_in_pattern = 0; 239 236 int err = 0; 240 237 238 + cn->mask = COREDUMP_KERNEL; 239 + if (core_pipe_limit) 240 + cn->mask |= COREDUMP_WAIT; 241 241 cn->used = 0; 242 242 cn->corename = NULL; 243 243 if (*pat_ptr == '|') ··· 270 264 pat_ptr++; 271 265 if (!(*pat_ptr)) 272 266 return -ENOMEM; 267 + if (*pat_ptr == '@') { 268 + pat_ptr++; 269 + if (!(*pat_ptr)) 270 + return -ENOMEM; 271 + 272 + cn->core_type = COREDUMP_SOCK_REQ; 273 + } 273 274 274 275 err = cn_printf(cn, "%s", pat_ptr); 275 276 if (err) ··· 645 632 return 0; 646 633 } 647 634 635 + #ifdef CONFIG_UNIX 636 + static inline bool coredump_sock_recv(struct file *file, struct coredump_ack *ack, size_t size, int flags) 637 + { 638 + struct msghdr msg = {}; 639 + struct kvec iov = { .iov_base = ack, .iov_len = size }; 640 + ssize_t ret; 641 + 642 + memset(ack, 0, size); 643 + ret = kernel_recvmsg(sock_from_file(file), &msg, &iov, 1, size, flags); 644 + return ret == size; 645 + } 646 + 647 + static inline bool coredump_sock_send(struct file *file, struct coredump_req *req) 648 + { 649 + struct msghdr msg = { .msg_flags = MSG_NOSIGNAL }; 650 + struct kvec iov = { .iov_base = req, .iov_len = sizeof(*req) }; 651 + ssize_t ret; 652 + 653 + ret = kernel_sendmsg(sock_from_file(file), &msg, &iov, 1, sizeof(*req)); 654 + return ret == sizeof(*req); 655 + } 656 + 657 + static_assert(sizeof(enum coredump_mark) == sizeof(__u32)); 658 + 659 + static inline bool coredump_sock_mark(struct file *file, enum coredump_mark mark) 660 + { 661 + struct msghdr msg = { .msg_flags = MSG_NOSIGNAL }; 662 + struct kvec iov = { .iov_base = &mark, .iov_len = sizeof(mark) }; 663 + ssize_t ret; 664 + 665 + ret = kernel_sendmsg(sock_from_file(file), &msg, &iov, 1, sizeof(mark)); 666 + return ret == sizeof(mark); 667 + } 668 + 669 + static inline void coredump_sock_wait(struct file *file) 670 + { 671 + ssize_t n; 672 + 673 + /* 674 + * We use a simple read to wait for the coredump processing to 675 + * finish. Either the socket is closed or we get sent unexpected 676 + * data. In both cases, we're done. 677 + */ 678 + n = __kernel_read(file, &(char){ 0 }, 1, NULL); 679 + if (n > 0) 680 + coredump_report_failure("Coredump socket had unexpected data"); 681 + else if (n < 0) 682 + coredump_report_failure("Coredump socket failed"); 683 + } 684 + 685 + static inline void coredump_sock_shutdown(struct file *file) 686 + { 687 + struct socket *socket; 688 + 689 + socket = sock_from_file(file); 690 + if (!socket) 691 + return; 692 + 693 + /* Let userspace know we're done processing the coredump. */ 694 + kernel_sock_shutdown(socket, SHUT_WR); 695 + } 696 + 697 + static bool coredump_request(struct core_name *cn, struct coredump_params *cprm) 698 + { 699 + struct coredump_req req = { 700 + .size = sizeof(struct coredump_req), 701 + .mask = COREDUMP_KERNEL | COREDUMP_USERSPACE | 702 + COREDUMP_REJECT | COREDUMP_WAIT, 703 + .size_ack = sizeof(struct coredump_ack), 704 + }; 705 + struct coredump_ack ack = {}; 706 + ssize_t usize; 707 + 708 + if (cn->core_type != COREDUMP_SOCK_REQ) 709 + return true; 710 + 711 + /* Let userspace know what we support. */ 712 + if (!coredump_sock_send(cprm->file, &req)) 713 + return false; 714 + 715 + /* Peek the size of the coredump_ack. */ 716 + if (!coredump_sock_recv(cprm->file, &ack, sizeof(ack.size), 717 + MSG_PEEK | MSG_WAITALL)) 718 + return false; 719 + 720 + /* Refuse unknown coredump_ack sizes. */ 721 + usize = ack.size; 722 + if (usize < COREDUMP_ACK_SIZE_VER0) { 723 + coredump_sock_mark(cprm->file, COREDUMP_MARK_MINSIZE); 724 + return false; 725 + } 726 + 727 + if (usize > sizeof(ack)) { 728 + coredump_sock_mark(cprm->file, COREDUMP_MARK_MAXSIZE); 729 + return false; 730 + } 731 + 732 + /* Now retrieve the coredump_ack. */ 733 + if (!coredump_sock_recv(cprm->file, &ack, usize, MSG_WAITALL)) 734 + return false; 735 + if (ack.size != usize) 736 + return false; 737 + 738 + /* Refuse unknown coredump_ack flags. */ 739 + if (ack.mask & ~req.mask) { 740 + coredump_sock_mark(cprm->file, COREDUMP_MARK_UNSUPPORTED); 741 + return false; 742 + } 743 + 744 + /* Refuse mutually exclusive options. */ 745 + if (hweight64(ack.mask & (COREDUMP_USERSPACE | COREDUMP_KERNEL | 746 + COREDUMP_REJECT)) != 1) { 747 + coredump_sock_mark(cprm->file, COREDUMP_MARK_CONFLICTING); 748 + return false; 749 + } 750 + 751 + if (ack.spare) { 752 + coredump_sock_mark(cprm->file, COREDUMP_MARK_UNSUPPORTED); 753 + return false; 754 + } 755 + 756 + cn->mask = ack.mask; 757 + return coredump_sock_mark(cprm->file, COREDUMP_MARK_REQACK); 758 + } 759 + #else 760 + static inline void coredump_sock_wait(struct file *file) { } 761 + static inline void coredump_sock_shutdown(struct file *file) { } 762 + #endif 763 + 648 764 void do_coredump(const kernel_siginfo_t *siginfo) 649 765 { 650 766 struct core_state core_state; ··· 992 850 } 993 851 break; 994 852 } 853 + case COREDUMP_SOCK_REQ: 854 + fallthrough; 995 855 case COREDUMP_SOCK: { 996 856 #ifdef CONFIG_UNIX 997 857 struct file *file __free(fput) = NULL; ··· 1062 918 1063 919 cprm.limit = RLIM_INFINITY; 1064 920 cprm.file = no_free_ptr(file); 921 + 922 + if (!coredump_request(&cn, &cprm)) 923 + goto close_fail; 1065 924 #else 1066 925 coredump_report_failure("Core dump socket support %s disabled", cn.corename); 1067 926 goto close_fail; ··· 1076 929 goto close_fail; 1077 930 } 1078 931 932 + /* Don't even generate the coredump. */ 933 + if (cn.mask & COREDUMP_REJECT) 934 + goto close_fail; 935 + 1079 936 /* get us an unshared descriptor table; almost always a no-op */ 1080 937 /* The cell spufs coredump code reads the file descriptor tables */ 1081 938 retval = unshare_files(); 1082 939 if (retval) 1083 940 goto close_fail; 1084 - if (!dump_interrupted()) { 941 + 942 + if ((cn.mask & COREDUMP_KERNEL) && !dump_interrupted()) { 1085 943 /* 1086 944 * umh disabled with CONFIG_STATIC_USERMODEHELPER_PATH="" would 1087 945 * have this set to NULL. ··· 1114 962 free_vma_snapshot(&cprm); 1115 963 } 1116 964 1117 - #ifdef CONFIG_UNIX 1118 - /* Let userspace know we're done processing the coredump. */ 1119 - if (sock_from_file(cprm.file)) 1120 - kernel_sock_shutdown(sock_from_file(cprm.file), SHUT_WR); 1121 - #endif 965 + coredump_sock_shutdown(cprm.file); 966 + 967 + /* Let the parent know that a coredump was generated. */ 968 + if (cn.mask & COREDUMP_USERSPACE) 969 + core_dumped = true; 1122 970 1123 971 /* 1124 972 * When core_pipe_limit is set we wait for the coredump server 1125 973 * or usermodehelper to finish before exiting so it can e.g., 1126 974 * inspect /proc/<pid>. 1127 975 */ 1128 - if (core_pipe_limit) { 976 + if (cn.mask & COREDUMP_WAIT) { 1129 977 switch (cn.core_type) { 1130 978 case COREDUMP_PIPE: 1131 979 wait_for_dump_helpers(cprm.file); 1132 980 break; 1133 - #ifdef CONFIG_UNIX 1134 - case COREDUMP_SOCK: { 1135 - ssize_t n; 1136 - 1137 - /* 1138 - * We use a simple read to wait for the coredump 1139 - * processing to finish. Either the socket is 1140 - * closed or we get sent unexpected data. In 1141 - * both cases, we're done. 1142 - */ 1143 - n = __kernel_read(cprm.file, &(char){ 0 }, 1, NULL); 1144 - if (n != 0) 1145 - coredump_report_failure("Unexpected data on coredump socket"); 981 + case COREDUMP_SOCK_REQ: 982 + fallthrough; 983 + case COREDUMP_SOCK: 984 + coredump_sock_wait(cprm.file); 1146 985 break; 1147 - } 1148 - #endif 1149 986 default: 1150 987 break; 1151 988 } ··· 1390 1249 if (current->nsproxy->mnt_ns != init_task.nsproxy->mnt_ns) 1391 1250 return false; 1392 1251 1393 - /* Must be an absolute path. */ 1394 - if (*(core_pattern + 1) != '/') 1252 + /* Must be an absolute path or the socket request. */ 1253 + if (*(core_pattern + 1) != '/' && *(core_pattern + 1) != '@') 1395 1254 return false; 1396 1255 1397 1256 return true;
+104
include/uapi/linux/coredump.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ 2 + 3 + #ifndef _UAPI_LINUX_COREDUMP_H 4 + #define _UAPI_LINUX_COREDUMP_H 5 + 6 + #include <linux/types.h> 7 + 8 + /** 9 + * coredump_{req,ack} flags 10 + * @COREDUMP_KERNEL: kernel writes coredump 11 + * @COREDUMP_USERSPACE: userspace writes coredump 12 + * @COREDUMP_REJECT: don't generate coredump 13 + * @COREDUMP_WAIT: wait for coredump server 14 + */ 15 + enum { 16 + COREDUMP_KERNEL = (1ULL << 0), 17 + COREDUMP_USERSPACE = (1ULL << 1), 18 + COREDUMP_REJECT = (1ULL << 2), 19 + COREDUMP_WAIT = (1ULL << 3), 20 + }; 21 + 22 + /** 23 + * struct coredump_req - message kernel sends to userspace 24 + * @size: size of struct coredump_req 25 + * @size_ack: known size of struct coredump_ack on this kernel 26 + * @mask: supported features 27 + * 28 + * When a coredump happens the kernel will connect to the coredump 29 + * socket and send a coredump request to the coredump server. The @size 30 + * member is set to the size of struct coredump_req and provides a hint 31 + * to userspace how much data can be read. Userspace may use MSG_PEEK to 32 + * peek the size of struct coredump_req and then choose to consume it in 33 + * one go. Userspace may also simply read a COREDUMP_ACK_SIZE_VER0 34 + * request. If the size the kernel sends is larger userspace simply 35 + * discards any remaining data. 36 + * 37 + * The coredump_req->mask member is set to the currently know features. 38 + * Userspace may only set coredump_ack->mask to the bits raised by the 39 + * kernel in coredump_req->mask. 40 + * 41 + * The coredump_req->size_ack member is set by the kernel to the size of 42 + * struct coredump_ack the kernel knows. Userspace may only send up to 43 + * coredump_req->size_ack bytes to the kernel and must set 44 + * coredump_ack->size accordingly. 45 + */ 46 + struct coredump_req { 47 + __u32 size; 48 + __u32 size_ack; 49 + __u64 mask; 50 + }; 51 + 52 + enum { 53 + COREDUMP_REQ_SIZE_VER0 = 16U, /* size of first published struct */ 54 + }; 55 + 56 + /** 57 + * struct coredump_ack - message userspace sends to kernel 58 + * @size: size of the struct 59 + * @spare: unused 60 + * @mask: features kernel is supposed to use 61 + * 62 + * The @size member must be set to the size of struct coredump_ack. It 63 + * may never exceed what the kernel returned in coredump_req->size_ack 64 + * but it may of course be smaller (>= COREDUMP_ACK_SIZE_VER0 and <= 65 + * coredump_req->size_ack). 66 + * 67 + * The @mask member must be set to the features the coredump server 68 + * wants the kernel to use. Only bits the kernel returned in 69 + * coredump_req->mask may be set. 70 + */ 71 + struct coredump_ack { 72 + __u32 size; 73 + __u32 spare; 74 + __u64 mask; 75 + }; 76 + 77 + enum { 78 + COREDUMP_ACK_SIZE_VER0 = 16U, /* size of first published struct */ 79 + }; 80 + 81 + /** 82 + * enum coredump_mark - Markers for the coredump socket 83 + * 84 + * The kernel will place a single byte on the coredump socket. The 85 + * markers notify userspace whether the coredump ack succeeded or 86 + * failed. 87 + * 88 + * @COREDUMP_MARK_MINSIZE: the provided coredump_ack size was too small 89 + * @COREDUMP_MARK_MAXSIZE: the provided coredump_ack size was too big 90 + * @COREDUMP_MARK_UNSUPPORTED: the provided coredump_ack mask was invalid 91 + * @COREDUMP_MARK_CONFLICTING: the provided coredump_ack mask has conflicting options 92 + * @COREDUMP_MARK_REQACK: the coredump request and ack was successful 93 + * @__COREDUMP_MARK_MAX: the maximum coredump mark value 94 + */ 95 + enum coredump_mark { 96 + COREDUMP_MARK_REQACK = 0U, 97 + COREDUMP_MARK_MINSIZE = 1U, 98 + COREDUMP_MARK_MAXSIZE = 2U, 99 + COREDUMP_MARK_UNSUPPORTED = 3U, 100 + COREDUMP_MARK_CONFLICTING = 4U, 101 + __COREDUMP_MARK_MAX = (1U << 31), 102 + }; 103 + 104 + #endif /* _UAPI_LINUX_COREDUMP_H */