Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'net-netconsole-convert-to-nbcon-console-infrastructure'

Breno Leitao says:

====================
net: netconsole: convert to NBCON console infrastructure

This series adds support for the nbcon (new buffer console) infrastructure
to netconsole, enabling lock-free, priority-based console operations that
are safer in crash scenarios.

The implementation is introduced in three steps:

0) Extend printk to expose CPU and taskname (task->comm) where the
printk originated from. (Thanks John and Petr for the support in
getting this done)
1) Refactor the message fragmentation logic into a reusable helper function
2) Extend nbcon support to non-extended (basic) consoles using the same
infrastructure.

The initial discussion about it appeared a while ago in [1], in order to
solve Mike's HARDIRQ-safe -> HARDIRQ-unsafe lock order warning, and the root
cause is that some hosts were calling IRQ unsafe locks from inside console
lock.

At that time, we didn't have the CON_NBCON_ATOMIC_UNSAFE yet. John
kindly implemented CON_NBCON_ATOMIC_UNSAFE in 187de7c212e5 ("printk:
nbcon: Allow unsafe write_atomic() for panic"), and now we can
implement netconsole on top of nbcon.

Important to note that netconsole continues to call netpoll and the
network TX helpers with interrupt disable, given the TX are called with
target_list_lock.
====================

Link: https://patch.msgid.link/20260206-nbcon-v7-0-62bda69b1b41@debian.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+197 -60
+1
drivers/net/Kconfig
··· 341 341 bool "Dynamic reconfiguration of logging targets" 342 342 depends on NETCONSOLE && SYSFS && CONFIGFS_FS && \ 343 343 !(NETCONSOLE=y && CONFIGFS_FS=m) 344 + select PRINTK_EXECUTION_CTX 344 345 help 345 346 This option enables the ability to dynamically reconfigure target 346 347 parameters (interface, IP addresses, port numbers, MAC addresses)
+94 -59
drivers/net/netconsole.c
··· 1490 1490 init_target_config_group(nt, target_name); 1491 1491 } 1492 1492 1493 - static int sysdata_append_cpu_nr(struct netconsole_target *nt, int offset) 1493 + static int sysdata_append_cpu_nr(struct netconsole_target *nt, int offset, 1494 + struct nbcon_write_context *wctxt) 1494 1495 { 1495 1496 return scnprintf(&nt->sysdata[offset], 1496 1497 MAX_EXTRADATA_ENTRY_LEN, " cpu=%u\n", 1497 - raw_smp_processor_id()); 1498 + wctxt->cpu); 1498 1499 } 1499 1500 1500 - static int sysdata_append_taskname(struct netconsole_target *nt, int offset) 1501 + static int sysdata_append_taskname(struct netconsole_target *nt, int offset, 1502 + struct nbcon_write_context *wctxt) 1501 1503 { 1502 1504 return scnprintf(&nt->sysdata[offset], 1503 1505 MAX_EXTRADATA_ENTRY_LEN, " taskname=%s\n", 1504 - current->comm); 1506 + wctxt->comm); 1505 1507 } 1506 1508 1507 1509 static int sysdata_append_release(struct netconsole_target *nt, int offset) ··· 1524 1522 /* 1525 1523 * prepare_sysdata - append sysdata in runtime 1526 1524 * @nt: target to send message to 1525 + * @wctxt: nbcon write context containing message metadata 1527 1526 */ 1528 - static int prepare_sysdata(struct netconsole_target *nt) 1527 + static int prepare_sysdata(struct netconsole_target *nt, 1528 + struct nbcon_write_context *wctxt) 1529 1529 { 1530 1530 int sysdata_len = 0; 1531 1531 ··· 1535 1531 goto out; 1536 1532 1537 1533 if (nt->sysdata_fields & SYSDATA_CPU_NR) 1538 - sysdata_len += sysdata_append_cpu_nr(nt, sysdata_len); 1534 + sysdata_len += sysdata_append_cpu_nr(nt, sysdata_len, wctxt); 1539 1535 if (nt->sysdata_fields & SYSDATA_TASKNAME) 1540 - sysdata_len += sysdata_append_taskname(nt, sysdata_len); 1536 + sysdata_len += sysdata_append_taskname(nt, sysdata_len, wctxt); 1541 1537 if (nt->sysdata_fields & SYSDATA_RELEASE) 1542 1538 sysdata_len += sysdata_append_release(nt, sysdata_len); 1543 1539 if (nt->sysdata_fields & SYSDATA_MSGID) ··· 1835 1831 /** 1836 1832 * send_ext_msg_udp - send extended log message to target 1837 1833 * @nt: target to send message to 1838 - * @msg: extended log message to send 1839 - * @msg_len: length of message 1834 + * @wctxt: nbcon write context containing message and metadata 1840 1835 * 1841 - * Transfer extended log @msg to @nt. If @msg is longer than 1836 + * Transfer extended log message to @nt. If message is longer than 1842 1837 * MAX_PRINT_CHUNK, it'll be split and transmitted in multiple chunks with 1843 1838 * ncfrag header field added to identify them. 1844 1839 */ 1845 - static void send_ext_msg_udp(struct netconsole_target *nt, const char *msg, 1846 - int msg_len) 1840 + static void send_ext_msg_udp(struct netconsole_target *nt, 1841 + struct nbcon_write_context *wctxt) 1847 1842 { 1848 1843 int userdata_len = 0; 1849 1844 int release_len = 0; 1850 1845 int sysdata_len = 0; 1846 + int len; 1851 1847 1852 1848 #ifdef CONFIG_NETCONSOLE_DYNAMIC 1853 - sysdata_len = prepare_sysdata(nt); 1849 + sysdata_len = prepare_sysdata(nt, wctxt); 1854 1850 userdata_len = nt->userdata_length; 1855 1851 #endif 1856 1852 if (nt->release) 1857 1853 release_len = strlen(init_utsname()->release) + 1; 1858 1854 1859 - if (msg_len + release_len + sysdata_len + userdata_len <= MAX_PRINT_CHUNK) 1860 - return send_msg_no_fragmentation(nt, msg, msg_len, release_len); 1855 + len = wctxt->len + release_len + sysdata_len + userdata_len; 1856 + if (len <= MAX_PRINT_CHUNK) 1857 + return send_msg_no_fragmentation(nt, wctxt->outbuf, 1858 + wctxt->len, release_len); 1861 1859 1862 - return send_msg_fragmented(nt, msg, msg_len, release_len, 1860 + return send_msg_fragmented(nt, wctxt->outbuf, wctxt->len, release_len, 1863 1861 sysdata_len); 1864 1862 } 1865 1863 1866 - static void write_ext_msg(struct console *con, const char *msg, 1867 - unsigned int len) 1864 + static void send_msg_udp(struct netconsole_target *nt, const char *msg, 1865 + unsigned int len) 1868 1866 { 1869 - struct netconsole_target *nt; 1870 - unsigned long flags; 1867 + const char *tmp = msg; 1868 + int frag, left = len; 1871 1869 1872 - if ((oops_only && !oops_in_progress) || list_empty(&target_list)) 1873 - return; 1874 - 1875 - spin_lock_irqsave(&target_list_lock, flags); 1876 - list_for_each_entry(nt, &target_list, list) 1877 - if (nt->extended && nt->state == STATE_ENABLED && 1878 - netif_running(nt->np.dev)) 1879 - send_ext_msg_udp(nt, msg, len); 1880 - spin_unlock_irqrestore(&target_list_lock, flags); 1870 + while (left > 0) { 1871 + frag = min(left, MAX_PRINT_CHUNK); 1872 + send_udp(nt, tmp, frag); 1873 + tmp += frag; 1874 + left -= frag; 1875 + } 1881 1876 } 1882 1877 1883 - static void write_msg(struct console *con, const char *msg, unsigned int len) 1878 + /** 1879 + * netconsole_write - Generic function to send a msg to all targets 1880 + * @wctxt: nbcon write context 1881 + * @extended: "true" for extended console mode 1882 + * 1883 + * Given an nbcon write context, send the message to the netconsole targets 1884 + */ 1885 + static void netconsole_write(struct nbcon_write_context *wctxt, bool extended) 1884 1886 { 1885 - int frag, left; 1886 - unsigned long flags; 1887 1887 struct netconsole_target *nt; 1888 - const char *tmp; 1889 1888 1890 1889 if (oops_only && !oops_in_progress) 1891 1890 return; 1892 - /* Avoid taking lock and disabling interrupts unnecessarily */ 1893 - if (list_empty(&target_list)) 1894 - return; 1895 1891 1896 - spin_lock_irqsave(&target_list_lock, flags); 1897 1892 list_for_each_entry(nt, &target_list, list) { 1898 - if (!nt->extended && nt->state == STATE_ENABLED && 1899 - netif_running(nt->np.dev)) { 1900 - /* 1901 - * We nest this inside the for-each-target loop above 1902 - * so that we're able to get as much logging out to 1903 - * at least one target if we die inside here, instead 1904 - * of unnecessarily keeping all targets in lock-step. 1905 - */ 1906 - tmp = msg; 1907 - for (left = len; left;) { 1908 - frag = min(left, MAX_PRINT_CHUNK); 1909 - send_udp(nt, tmp, frag); 1910 - tmp += frag; 1911 - left -= frag; 1912 - } 1913 - } 1893 + if (nt->extended != extended || nt->state != STATE_ENABLED || 1894 + !netif_running(nt->np.dev)) 1895 + continue; 1896 + 1897 + /* If nbcon_enter_unsafe() fails, just return given netconsole 1898 + * lost the ownership, and iterating over the targets will not 1899 + * be able to re-acquire. 1900 + */ 1901 + if (!nbcon_enter_unsafe(wctxt)) 1902 + return; 1903 + 1904 + if (extended) 1905 + send_ext_msg_udp(nt, wctxt); 1906 + else 1907 + send_msg_udp(nt, wctxt->outbuf, wctxt->len); 1908 + 1909 + nbcon_exit_unsafe(wctxt); 1914 1910 } 1911 + } 1912 + 1913 + static void netconsole_write_ext(struct console *con __always_unused, 1914 + struct nbcon_write_context *wctxt) 1915 + { 1916 + netconsole_write(wctxt, true); 1917 + } 1918 + 1919 + static void netconsole_write_basic(struct console *con __always_unused, 1920 + struct nbcon_write_context *wctxt) 1921 + { 1922 + netconsole_write(wctxt, false); 1923 + } 1924 + 1925 + static void netconsole_device_lock(struct console *con __always_unused, 1926 + unsigned long *flags) 1927 + __acquires(&target_list_lock) 1928 + { 1929 + spin_lock_irqsave(&target_list_lock, *flags); 1930 + } 1931 + 1932 + static void netconsole_device_unlock(struct console *con __always_unused, 1933 + unsigned long flags) 1934 + __releases(&target_list_lock) 1935 + { 1915 1936 spin_unlock_irqrestore(&target_list_lock, flags); 1916 1937 } 1917 1938 ··· 2100 2071 } 2101 2072 2102 2073 static struct console netconsole_ext = { 2103 - .name = "netcon_ext", 2104 - .flags = CON_ENABLED | CON_EXTENDED, 2105 - .write = write_ext_msg, 2074 + .name = "netcon_ext", 2075 + .flags = CON_ENABLED | CON_EXTENDED | CON_NBCON | CON_NBCON_ATOMIC_UNSAFE, 2076 + .write_thread = netconsole_write_ext, 2077 + .write_atomic = netconsole_write_ext, 2078 + .device_lock = netconsole_device_lock, 2079 + .device_unlock = netconsole_device_unlock, 2106 2080 }; 2107 2081 2108 2082 static struct console netconsole = { 2109 - .name = "netcon", 2110 - .flags = CON_ENABLED, 2111 - .write = write_msg, 2083 + .name = "netcon", 2084 + .flags = CON_ENABLED | CON_NBCON | CON_NBCON_ATOMIC_UNSAFE, 2085 + .write_thread = netconsole_write_basic, 2086 + .write_atomic = netconsole_write_basic, 2087 + .device_lock = netconsole_device_lock, 2088 + .device_unlock = netconsole_device_unlock, 2112 2089 }; 2113 2090 2114 2091 static int __init init_netconsole(void)
+8
include/linux/console.h
··· 298 298 * @outbuf: Pointer to the text buffer for output 299 299 * @len: Length to write 300 300 * @unsafe_takeover: If a hostile takeover in an unsafe state has occurred 301 + * @cpu: CPU on which the message was generated 302 + * @pid: PID of the task that generated the message 303 + * @comm: Name of the task that generated the message 301 304 */ 302 305 struct nbcon_write_context { 303 306 struct nbcon_context __private ctxt; 304 307 char *outbuf; 305 308 unsigned int len; 306 309 bool unsafe_takeover; 310 + #ifdef CONFIG_PRINTK_EXECUTION_CTX 311 + int cpu; 312 + pid_t pid; 313 + char comm[TASK_COMM_LEN]; 314 + #endif 307 315 }; 308 316 309 317 /**
+8
kernel/printk/internal.h
··· 281 281 * nothing to output and this record should be skipped. 282 282 * @seq: The sequence number of the record used for @pbufs->outbuf. 283 283 * @dropped: The number of dropped records from reading @seq. 284 + * @cpu: CPU on which the message was generated. 285 + * @pid: PID of the task that generated the message 286 + * @comm: Name of the task that generated the message. 284 287 */ 285 288 struct printk_message { 286 289 struct printk_buffers *pbufs; 287 290 unsigned int outbuf_len; 288 291 u64 seq; 289 292 unsigned long dropped; 293 + #ifdef CONFIG_PRINTK_EXECUTION_CTX 294 + int cpu; 295 + pid_t pid; 296 + char comm[TASK_COMM_LEN]; 297 + #endif 290 298 }; 291 299 292 300 bool printk_get_next_message(struct printk_message *pmsg, u64 seq,
+16
kernel/printk/nbcon.c
··· 946 946 } 947 947 EXPORT_SYMBOL_GPL(nbcon_reacquire_nobuf); 948 948 949 + #ifdef CONFIG_PRINTK_EXECUTION_CTX 950 + static void wctxt_load_execution_ctx(struct nbcon_write_context *wctxt, 951 + struct printk_message *pmsg) 952 + { 953 + wctxt->cpu = pmsg->cpu; 954 + wctxt->pid = pmsg->pid; 955 + memcpy(wctxt->comm, pmsg->comm, sizeof(wctxt->comm)); 956 + static_assert(sizeof(wctxt->comm) == sizeof(pmsg->comm)); 957 + } 958 + #else 959 + static void wctxt_load_execution_ctx(struct nbcon_write_context *wctxt, 960 + struct printk_message *pmsg) {} 961 + #endif 962 + 949 963 /** 950 964 * nbcon_emit_next_record - Emit a record in the acquired context 951 965 * @wctxt: The write context that will be handed to the write function ··· 1061 1047 1062 1048 /* Initialize the write context for driver callbacks. */ 1063 1049 nbcon_write_context_set_buf(wctxt, &pmsg.pbufs->outbuf[0], pmsg.outbuf_len); 1050 + 1051 + wctxt_load_execution_ctx(wctxt, &pmsg); 1064 1052 1065 1053 if (use_atomic) 1066 1054 con->write_atomic(con, wctxt);
+53 -1
kernel/printk/printk.c
··· 2131 2131 } 2132 2132 } 2133 2133 2134 + #define CALLER_ID_MASK 0x80000000 2135 + 2134 2136 static inline u32 printk_caller_id(void) 2135 2137 { 2136 2138 return in_task() ? task_pid_nr(current) : 2137 - 0x80000000 + smp_processor_id(); 2139 + CALLER_ID_MASK + smp_processor_id(); 2138 2140 } 2141 + 2142 + #ifdef CONFIG_PRINTK_EXECUTION_CTX 2143 + /* Store the opposite info than caller_id. */ 2144 + static u32 printk_caller_id2(void) 2145 + { 2146 + return !in_task() ? task_pid_nr(current) : 2147 + CALLER_ID_MASK + smp_processor_id(); 2148 + } 2149 + 2150 + static pid_t printk_info_get_pid(const struct printk_info *info) 2151 + { 2152 + u32 caller_id = info->caller_id; 2153 + u32 caller_id2 = info->caller_id2; 2154 + 2155 + return caller_id & CALLER_ID_MASK ? caller_id2 : caller_id; 2156 + } 2157 + 2158 + static int printk_info_get_cpu(const struct printk_info *info) 2159 + { 2160 + u32 caller_id = info->caller_id; 2161 + u32 caller_id2 = info->caller_id2; 2162 + 2163 + return ((caller_id & CALLER_ID_MASK ? 2164 + caller_id : caller_id2) & ~CALLER_ID_MASK); 2165 + } 2166 + #endif 2139 2167 2140 2168 /** 2141 2169 * printk_parse_prefix - Parse level and control flags. ··· 2240 2212 2241 2213 return text_len; 2242 2214 } 2215 + 2216 + #ifdef CONFIG_PRINTK_EXECUTION_CTX 2217 + static void printk_store_execution_ctx(struct printk_info *info) 2218 + { 2219 + info->caller_id2 = printk_caller_id2(); 2220 + get_task_comm(info->comm, current); 2221 + } 2222 + 2223 + static void pmsg_load_execution_ctx(struct printk_message *pmsg, 2224 + const struct printk_info *info) 2225 + { 2226 + pmsg->cpu = printk_info_get_cpu(info); 2227 + pmsg->pid = printk_info_get_pid(info); 2228 + memcpy(pmsg->comm, info->comm, sizeof(pmsg->comm)); 2229 + static_assert(sizeof(pmsg->comm) == sizeof(info->comm)); 2230 + } 2231 + #else 2232 + static void printk_store_execution_ctx(struct printk_info *info) {} 2233 + 2234 + static void pmsg_load_execution_ctx(struct printk_message *pmsg, 2235 + const struct printk_info *info) {} 2236 + #endif 2243 2237 2244 2238 __printf(4, 0) 2245 2239 int vprintk_store(int facility, int level, ··· 2370 2320 r.info->caller_id = caller_id; 2371 2321 if (dev_info) 2372 2322 memcpy(&r.info->dev_info, dev_info, sizeof(r.info->dev_info)); 2323 + printk_store_execution_ctx(r.info); 2373 2324 2374 2325 /* A message without a trailing newline can be continued. */ 2375 2326 if (!(flags & LOG_NEWLINE)) ··· 3053 3002 pmsg->seq = r.info->seq; 3054 3003 pmsg->dropped = r.info->seq - seq; 3055 3004 force_con = r.info->flags & LOG_FORCE_CON; 3005 + pmsg_load_execution_ctx(pmsg, r.info); 3056 3006 3057 3007 /* 3058 3008 * Skip records that are not forced to be printed on consoles and that
+5
kernel/printk/printk_ringbuffer.h
··· 23 23 u8 flags:5; /* internal record flags */ 24 24 u8 level:3; /* syslog level */ 25 25 u32 caller_id; /* thread id or processor id */ 26 + #ifdef CONFIG_PRINTK_EXECUTION_CTX 27 + u32 caller_id2; /* caller_id complement */ 28 + /* name of the task that generated the message */ 29 + char comm[TASK_COMM_LEN]; 30 + #endif 26 31 27 32 struct dev_printk_info dev_info; 28 33 };
+12
lib/Kconfig.debug
··· 35 35 no option to enable/disable at the kernel command line parameter or 36 36 sysfs interface. 37 37 38 + config PRINTK_EXECUTION_CTX 39 + bool 40 + depends on PRINTK 41 + help 42 + This option extends struct printk_info to include extra execution 43 + context in printk, such as task name and CPU number from where the 44 + message originated. This is useful for correlating printk messages 45 + with specific execution contexts. 46 + 47 + This is automatically enabled when a console driver that supports 48 + execution context is selected. 49 + 38 50 config STACKTRACE_BUILD_ID 39 51 bool "Show build ID information in stacktraces" 40 52 depends on PRINTK