Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'wq-for-7.0-rc3-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq

Pull workqueue fixes from Tejun Heo:

- Improve workqueue stall diagnostics: dump all busy workers (not just
running ones), show wall-clock duration of in-flight work items, and
add a sample module for reproducing stalls

- Fix POOL_BH vs WQ_BH flag namespace mismatch in pr_cont_worker_id()

- Rename pool->watchdog_ts to pool->last_progress_ts and related
functions for clarity

* tag 'wq-for-7.0-rc3-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq:
workqueue: Rename show_cpu_pool{s,}_hog{s,}() to reflect broadened scope
workqueue: Add stall detector sample module
workqueue: Show all busy workers in stall diagnostics
workqueue: Show in-flight work item duration in stall diagnostics
workqueue: Rename pool->watchdog_ts to pool->last_progress_ts
workqueue: Use POOL_BH instead of WQ_BH when checking pool flags

+128 -27
+28 -27
kernel/workqueue.c
··· 190 190 int id; /* I: pool ID */ 191 191 unsigned int flags; /* L: flags */ 192 192 193 - unsigned long watchdog_ts; /* L: watchdog timestamp */ 193 + unsigned long last_progress_ts; /* L: last forward progress timestamp */ 194 194 bool cpu_stall; /* WD: stalled cpu bound pool */ 195 195 196 196 /* ··· 1697 1697 WARN_ON_ONCE(!(*wdb & WORK_STRUCT_INACTIVE)); 1698 1698 trace_workqueue_activate_work(work); 1699 1699 if (list_empty(&pwq->pool->worklist)) 1700 - pwq->pool->watchdog_ts = jiffies; 1700 + pwq->pool->last_progress_ts = jiffies; 1701 1701 move_linked_works(work, &pwq->pool->worklist, NULL); 1702 1702 __clear_bit(WORK_STRUCT_INACTIVE_BIT, wdb); 1703 1703 } ··· 2348 2348 */ 2349 2349 if (list_empty(&pwq->inactive_works) && pwq_tryinc_nr_active(pwq, false)) { 2350 2350 if (list_empty(&pool->worklist)) 2351 - pool->watchdog_ts = jiffies; 2351 + pool->last_progress_ts = jiffies; 2352 2352 2353 2353 trace_workqueue_activate_work(work); 2354 2354 insert_work(pwq, work, &pool->worklist, work_flags); ··· 3204 3204 worker->current_pwq = pwq; 3205 3205 if (worker->task) 3206 3206 worker->current_at = worker->task->se.sum_exec_runtime; 3207 + worker->current_start = jiffies; 3207 3208 work_data = *work_data_bits(work); 3208 3209 worker->current_color = get_work_color(work_data); 3209 3210 ··· 3353 3352 while ((work = list_first_entry_or_null(&worker->scheduled, 3354 3353 struct work_struct, entry))) { 3355 3354 if (first) { 3356 - worker->pool->watchdog_ts = jiffies; 3355 + worker->pool->last_progress_ts = jiffies; 3357 3356 first = false; 3358 3357 } 3359 3358 process_one_work(worker, work); ··· 4851 4850 pool->cpu = -1; 4852 4851 pool->node = NUMA_NO_NODE; 4853 4852 pool->flags |= POOL_DISASSOCIATED; 4854 - pool->watchdog_ts = jiffies; 4853 + pool->last_progress_ts = jiffies; 4855 4854 INIT_LIST_HEAD(&pool->worklist); 4856 4855 INIT_LIST_HEAD(&pool->idle_list); 4857 4856 hash_init(pool->busy_hash); ··· 6275 6274 { 6276 6275 struct worker_pool *pool = worker->pool; 6277 6276 6278 - if (pool->flags & WQ_BH) 6277 + if (pool->flags & POOL_BH) 6279 6278 pr_cont("bh%s", 6280 6279 pool->attrs->nice == HIGHPRI_NICE_LEVEL ? "-hi" : ""); 6281 6280 else ··· 6360 6359 pr_cont(" %s", comma ? "," : ""); 6361 6360 pr_cont_worker_id(worker); 6362 6361 pr_cont(":%ps", worker->current_func); 6362 + pr_cont(" for %us", 6363 + jiffies_to_msecs(jiffies - worker->current_start) / 1000); 6363 6364 list_for_each_entry(work, &worker->scheduled, entry) 6364 6365 pr_cont_work(false, work, &pcws); 6365 6366 pr_cont_work_flush(comma, (work_func_t)-1L, &pcws); ··· 6465 6462 6466 6463 /* How long the first pending work is waiting for a worker. */ 6467 6464 if (!list_empty(&pool->worklist)) 6468 - hung = jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000; 6465 + hung = jiffies_to_msecs(jiffies - pool->last_progress_ts) / 1000; 6469 6466 6470 6467 /* 6471 6468 * Defer printing to avoid deadlocks in console drivers that ··· 7583 7580 7584 7581 /* 7585 7582 * Show workers that might prevent the processing of pending work items. 7586 - * The only candidates are CPU-bound workers in the running state. 7587 - * Pending work items should be handled by another idle worker 7588 - * in all other situations. 7583 + * A busy worker that is not running on the CPU (e.g. sleeping in 7584 + * wait_event_idle() with PF_WQ_WORKER cleared) can stall the pool just as 7585 + * effectively as a CPU-bound one, so dump every in-flight worker. 7589 7586 */ 7590 - static void show_cpu_pool_hog(struct worker_pool *pool) 7587 + static void show_cpu_pool_busy_workers(struct worker_pool *pool) 7591 7588 { 7592 7589 struct worker *worker; 7593 7590 unsigned long irq_flags; ··· 7596 7593 raw_spin_lock_irqsave(&pool->lock, irq_flags); 7597 7594 7598 7595 hash_for_each(pool->busy_hash, bkt, worker, hentry) { 7599 - if (task_is_running(worker->task)) { 7600 - /* 7601 - * Defer printing to avoid deadlocks in console 7602 - * drivers that queue work while holding locks 7603 - * also taken in their write paths. 7604 - */ 7605 - printk_deferred_enter(); 7596 + /* 7597 + * Defer printing to avoid deadlocks in console 7598 + * drivers that queue work while holding locks 7599 + * also taken in their write paths. 7600 + */ 7601 + printk_deferred_enter(); 7606 7602 7607 - pr_info("pool %d:\n", pool->id); 7608 - sched_show_task(worker->task); 7603 + pr_info("pool %d:\n", pool->id); 7604 + sched_show_task(worker->task); 7609 7605 7610 - printk_deferred_exit(); 7611 - } 7606 + printk_deferred_exit(); 7612 7607 } 7613 7608 7614 7609 raw_spin_unlock_irqrestore(&pool->lock, irq_flags); 7615 7610 } 7616 7611 7617 - static void show_cpu_pools_hogs(void) 7612 + static void show_cpu_pools_busy_workers(void) 7618 7613 { 7619 7614 struct worker_pool *pool; 7620 7615 int pi; 7621 7616 7622 - pr_info("Showing backtraces of running workers in stalled CPU-bound worker pools:\n"); 7617 + pr_info("Showing backtraces of busy workers in stalled worker pools:\n"); 7623 7618 7624 7619 rcu_read_lock(); 7625 7620 7626 7621 for_each_pool(pool, pi) { 7627 7622 if (pool->cpu_stall) 7628 - show_cpu_pool_hog(pool); 7623 + show_cpu_pool_busy_workers(pool); 7629 7624 7630 7625 } 7631 7626 ··· 7692 7691 touched = READ_ONCE(per_cpu(wq_watchdog_touched_cpu, pool->cpu)); 7693 7692 else 7694 7693 touched = READ_ONCE(wq_watchdog_touched); 7695 - pool_ts = READ_ONCE(pool->watchdog_ts); 7694 + pool_ts = READ_ONCE(pool->last_progress_ts); 7696 7695 7697 7696 if (time_after(pool_ts, touched)) 7698 7697 ts = pool_ts; ··· 7720 7719 show_all_workqueues(); 7721 7720 7722 7721 if (cpu_pool_stall) 7723 - show_cpu_pools_hogs(); 7722 + show_cpu_pools_busy_workers(); 7724 7723 7725 7724 if (lockup_detected) 7726 7725 panic_on_wq_watchdog(max_stall_time);
+1
kernel/workqueue_internal.h
··· 32 32 work_func_t current_func; /* K: function */ 33 33 struct pool_workqueue *current_pwq; /* K: pwq */ 34 34 u64 current_at; /* K: runtime at start or last wakeup */ 35 + unsigned long current_start; /* K: start time of current work item */ 35 36 unsigned int current_color; /* K: color */ 36 37 37 38 int sleeping; /* S: is worker sleeping? */
+1
samples/workqueue/stall_detector/Makefile
··· 1 + obj-m += wq_stall.o
+98
samples/workqueue/stall_detector/wq_stall.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * wq_stall - Test module for the workqueue stall detector. 4 + * 5 + * Deliberately creates a workqueue stall so the watchdog fires and 6 + * prints diagnostic output. Useful for verifying that the stall 7 + * detector correctly identifies stuck workers and produces useful 8 + * backtraces. 9 + * 10 + * The stall is triggered by clearing PF_WQ_WORKER before sleeping, 11 + * which hides the worker from the concurrency manager. A second 12 + * work item queued on the same pool then sits in the worklist with 13 + * no worker available to process it. 14 + * 15 + * After ~30s the workqueue watchdog fires: 16 + * BUG: workqueue lockup - pool cpus=N ... 17 + * 18 + * Build: 19 + * make -C <kernel tree> M=samples/workqueue/stall_detector modules 20 + * 21 + * Copyright (c) 2026 Meta Platforms, Inc. and affiliates. 22 + * Copyright (c) 2026 Breno Leitao <leitao@debian.org> 23 + */ 24 + 25 + #include <linux/module.h> 26 + #include <linux/workqueue.h> 27 + #include <linux/wait.h> 28 + #include <linux/atomic.h> 29 + #include <linux/sched.h> 30 + 31 + static DECLARE_WAIT_QUEUE_HEAD(stall_wq_head); 32 + static atomic_t wake_condition = ATOMIC_INIT(0); 33 + static struct work_struct stall_work1; 34 + static struct work_struct stall_work2; 35 + 36 + static void stall_work2_fn(struct work_struct *work) 37 + { 38 + pr_info("wq_stall: second work item finally ran\n"); 39 + } 40 + 41 + static void stall_work1_fn(struct work_struct *work) 42 + { 43 + pr_info("wq_stall: first work item running on cpu %d\n", 44 + raw_smp_processor_id()); 45 + 46 + /* 47 + * Queue second item while we're still counted as running 48 + * (pool->nr_running > 0). Since schedule_work() on a per-CPU 49 + * workqueue targets raw_smp_processor_id(), item 2 lands on the 50 + * same pool. __queue_work -> kick_pool -> need_more_worker() 51 + * sees nr_running > 0 and does NOT wake a new worker. 52 + */ 53 + schedule_work(&stall_work2); 54 + 55 + /* 56 + * Hide from the workqueue concurrency manager. Without 57 + * PF_WQ_WORKER, schedule() won't call wq_worker_sleeping(), 58 + * so nr_running is never decremented and no replacement 59 + * worker is created. Item 2 stays stuck in pool->worklist. 60 + */ 61 + current->flags &= ~PF_WQ_WORKER; 62 + 63 + pr_info("wq_stall: entering wait_event_idle (PF_WQ_WORKER cleared)\n"); 64 + pr_info("wq_stall: expect 'BUG: workqueue lockup' in ~30-60s\n"); 65 + wait_event_idle(stall_wq_head, atomic_read(&wake_condition) != 0); 66 + 67 + /* Restore so process_one_work() cleanup works correctly */ 68 + current->flags |= PF_WQ_WORKER; 69 + pr_info("wq_stall: woke up, PF_WQ_WORKER restored\n"); 70 + } 71 + 72 + static int __init wq_stall_init(void) 73 + { 74 + pr_info("wq_stall: loading\n"); 75 + 76 + INIT_WORK(&stall_work1, stall_work1_fn); 77 + INIT_WORK(&stall_work2, stall_work2_fn); 78 + schedule_work(&stall_work1); 79 + 80 + return 0; 81 + } 82 + 83 + static void __exit wq_stall_exit(void) 84 + { 85 + pr_info("wq_stall: unloading\n"); 86 + atomic_set(&wake_condition, 1); 87 + wake_up(&stall_wq_head); 88 + flush_work(&stall_work1); 89 + flush_work(&stall_work2); 90 + pr_info("wq_stall: all work flushed, module unloaded\n"); 91 + } 92 + 93 + module_init(wq_stall_init); 94 + module_exit(wq_stall_exit); 95 + 96 + MODULE_LICENSE("GPL"); 97 + MODULE_DESCRIPTION("Reproduce workqueue stall caused by PF_WQ_WORKER misuse"); 98 + MODULE_AUTHOR("Breno Leitao <leitao@debian.org>");