Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'wq-for-6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq

Pull workqueue updates from Tejun Heo:

- WQ_PERCPU was added to remaining alloc_workqueue() users and
system_wq usage was replaced with system_percpu_wq and
system_unbound_wq with system_dfl_wq.

These are equivalent conversions with no functional changes,
preparing for switching default to unbound workqueues from percpu.

- A handshake mechanism was added for canceling BH workers to avoid
live lock scenarios under PREEMPT_RT.

- Unnecessary rcu_read_lock/unlock() calls were dropped in
wq_watchdog_timer_fn() and workqueue_congested().

- Documentation was fixed to resolve texinfodocs warnings.

* tag 'wq-for-6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq:
workqueue: fix texinfodocs warning for WQ_* flags reference
workqueue: WQ_PERCPU added to alloc_workqueue users
workqueue: replace use of system_wq with system_percpu_wq
workqueue: replace use of system_unbound_wq with system_dfl_wq
workqueue: Provide a handshake for canceling BH workers
workqueue: Remove rcu_read_lock/unlock() in wq_watchdog_timer_fn()
workqueue: Remove redundant rcu_read_lock/unlock() in workqueue_congested()

+69 -43
+16 -16
include/linux/workqueue.h
··· 410 410 __WQ_LEGACY = 1 << 18, /* internal: create*_workqueue() */ 411 411 412 412 /* BH wq only allows the following flags */ 413 - __WQ_BH_ALLOWS = WQ_BH | WQ_HIGHPRI, 413 + __WQ_BH_ALLOWS = WQ_BH | WQ_HIGHPRI | WQ_PERCPU, 414 414 }; 415 415 416 416 enum wq_consts { ··· 434 434 * short queue flush time. Don't queue works which can run for too 435 435 * long. 436 436 * 437 - * system_highpri_wq is similar to system_wq but for work items which 437 + * system_highpri_wq is similar to system_percpu_wq but for work items which 438 438 * require WQ_HIGHPRI. 439 439 * 440 - * system_long_wq is similar to system_wq but may host long running 440 + * system_long_wq is similar to system_percpu_wq but may host long running 441 441 * works. Queue flushing might take relatively long. 442 442 * 443 443 * system_dfl_wq is unbound workqueue. Workers are not bound to ··· 445 445 * executed immediately as long as max_active limit is not reached and 446 446 * resources are available. 447 447 * 448 - * system_freezable_wq is equivalent to system_wq except that it's 448 + * system_freezable_wq is equivalent to system_percpu_wq except that it's 449 449 * freezable. 450 450 * 451 451 * *_power_efficient_wq are inclined towards saving power and converted 452 452 * into WQ_UNBOUND variants if 'wq_power_efficient' is enabled; otherwise, 453 453 * they are same as their non-power-efficient counterparts - e.g. 454 - * system_power_efficient_wq is identical to system_wq if 454 + * system_power_efficient_wq is identical to system_percpu_wq if 455 455 * 'wq_power_efficient' is disabled. See WQ_POWER_EFFICIENT for more info. 456 456 * 457 457 * system_bh[_highpri]_wq are convenience interface to softirq. BH work items ··· 502 502 * min_active which is set to min(@max_active, %WQ_DFL_MIN_ACTIVE). This means 503 503 * that the sum of per-node max_active's may be larger than @max_active. 504 504 * 505 - * For detailed information on %WQ_* flags, please refer to 505 + * For detailed information on %WQ_\* flags, please refer to 506 506 * Documentation/core-api/workqueue.rst. 507 507 * 508 508 * RETURNS: ··· 570 570 alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args) 571 571 572 572 #define create_workqueue(name) \ 573 - alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name)) 573 + alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM | WQ_PERCPU, 1, (name)) 574 574 #define create_freezable_workqueue(name) \ 575 575 alloc_workqueue("%s", __WQ_LEGACY | WQ_FREEZABLE | WQ_UNBOUND | \ 576 576 WQ_MEM_RECLAIM, 1, (name)) ··· 708 708 */ 709 709 static inline bool schedule_work_on(int cpu, struct work_struct *work) 710 710 { 711 - return queue_work_on(cpu, system_wq, work); 711 + return queue_work_on(cpu, system_percpu_wq, work); 712 712 } 713 713 714 714 /** ··· 727 727 */ 728 728 static inline bool schedule_work(struct work_struct *work) 729 729 { 730 - return queue_work(system_wq, work); 730 + return queue_work(system_percpu_wq, work); 731 731 } 732 732 733 733 /** ··· 770 770 #define flush_scheduled_work() \ 771 771 ({ \ 772 772 __warn_flushing_systemwide_wq(); \ 773 - __flush_workqueue(system_wq); \ 773 + __flush_workqueue(system_percpu_wq); \ 774 774 }) 775 775 776 776 #define flush_workqueue(wq) \ 777 777 ({ \ 778 778 struct workqueue_struct *_wq = (wq); \ 779 779 \ 780 - if ((__builtin_constant_p(_wq == system_wq) && \ 781 - _wq == system_wq) || \ 780 + if ((__builtin_constant_p(_wq == system_percpu_wq) && \ 781 + _wq == system_percpu_wq) || \ 782 782 (__builtin_constant_p(_wq == system_highpri_wq) && \ 783 783 _wq == system_highpri_wq) || \ 784 784 (__builtin_constant_p(_wq == system_long_wq) && \ 785 785 _wq == system_long_wq) || \ 786 - (__builtin_constant_p(_wq == system_unbound_wq) && \ 787 - _wq == system_unbound_wq) || \ 786 + (__builtin_constant_p(_wq == system_dfl_wq) && \ 787 + _wq == system_dfl_wq) || \ 788 788 (__builtin_constant_p(_wq == system_freezable_wq) && \ 789 789 _wq == system_freezable_wq) || \ 790 790 (__builtin_constant_p(_wq == system_power_efficient_wq) && \ ··· 807 807 static inline bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork, 808 808 unsigned long delay) 809 809 { 810 - return queue_delayed_work_on(cpu, system_wq, dwork, delay); 810 + return queue_delayed_work_on(cpu, system_percpu_wq, dwork, delay); 811 811 } 812 812 813 813 /** ··· 821 821 static inline bool schedule_delayed_work(struct delayed_work *dwork, 822 822 unsigned long delay) 823 823 { 824 - return queue_delayed_work(system_wq, dwork, delay); 824 + return queue_delayed_work(system_percpu_wq, dwork, delay); 825 825 } 826 826 827 827 #ifndef CONFIG_SMP
+53 -27
kernel/workqueue.c
··· 222 222 struct workqueue_attrs *attrs; /* I: worker attributes */ 223 223 struct hlist_node hash_node; /* PL: unbound_pool_hash node */ 224 224 int refcnt; /* PL: refcnt for unbound pools */ 225 - 225 + #ifdef CONFIG_PREEMPT_RT 226 + spinlock_t cb_lock; /* BH worker cancel lock */ 227 + #endif 226 228 /* 227 229 * Destruction of pool is RCU protected to allow dereferences 228 230 * from get_work_pool(). ··· 2932 2930 raw_spin_unlock_irq(&pool->lock); 2933 2931 2934 2932 if (do_cull) 2935 - queue_work(system_unbound_wq, &pool->idle_cull_work); 2933 + queue_work(system_dfl_wq, &pool->idle_cull_work); 2936 2934 } 2937 2935 2938 2936 /** ··· 3079 3077 if (need_to_create_worker(pool)) 3080 3078 goto restart; 3081 3079 } 3080 + 3081 + #ifdef CONFIG_PREEMPT_RT 3082 + static void worker_lock_callback(struct worker_pool *pool) 3083 + { 3084 + spin_lock(&pool->cb_lock); 3085 + } 3086 + 3087 + static void worker_unlock_callback(struct worker_pool *pool) 3088 + { 3089 + spin_unlock(&pool->cb_lock); 3090 + } 3091 + 3092 + static void workqueue_callback_cancel_wait_running(struct worker_pool *pool) 3093 + { 3094 + spin_lock(&pool->cb_lock); 3095 + spin_unlock(&pool->cb_lock); 3096 + } 3097 + 3098 + #else 3099 + 3100 + static void worker_lock_callback(struct worker_pool *pool) { } 3101 + static void worker_unlock_callback(struct worker_pool *pool) { } 3102 + static void workqueue_callback_cancel_wait_running(struct worker_pool *pool) { } 3103 + 3104 + #endif 3082 3105 3083 3106 /** 3084 3107 * manage_workers - manage worker pool ··· 3584 3557 int nr_restarts = BH_WORKER_RESTARTS; 3585 3558 unsigned long end = jiffies + BH_WORKER_JIFFIES; 3586 3559 3560 + worker_lock_callback(pool); 3587 3561 raw_spin_lock_irq(&pool->lock); 3588 3562 worker_leave_idle(worker); 3589 3563 ··· 3613 3585 worker_enter_idle(worker); 3614 3586 kick_pool(pool); 3615 3587 raw_spin_unlock_irq(&pool->lock); 3588 + worker_unlock_callback(pool); 3616 3589 } 3617 3590 3618 3591 /* ··· 4251 4222 (data & WORK_OFFQ_BH)) { 4252 4223 /* 4253 4224 * On RT, prevent a live lock when %current preempted 4254 - * soft interrupt processing or prevents ksoftirqd from 4255 - * running by keeping flipping BH. If the BH work item 4256 - * runs on a different CPU then this has no effect other 4257 - * than doing the BH disable/enable dance for nothing. 4258 - * This is copied from 4259 - * kernel/softirq.c::tasklet_unlock_spin_wait(). 4225 + * soft interrupt processing by blocking on lock which 4226 + * is owned by the thread invoking the callback. 4260 4227 */ 4261 4228 while (!try_wait_for_completion(&barr.done)) { 4262 4229 if (IS_ENABLED(CONFIG_PREEMPT_RT)) { 4263 - local_bh_disable(); 4264 - local_bh_enable(); 4230 + struct worker_pool *pool; 4231 + 4232 + guard(rcu)(); 4233 + pool = get_work_pool(work); 4234 + if (pool) 4235 + workqueue_callback_cancel_wait_running(pool); 4265 4236 } else { 4266 4237 cpu_relax(); 4267 4238 } ··· 4811 4782 ida_init(&pool->worker_ida); 4812 4783 INIT_HLIST_NODE(&pool->hash_node); 4813 4784 pool->refcnt = 1; 4785 + #ifdef CONFIG_PREEMPT_RT 4786 + spin_lock_init(&pool->cb_lock); 4787 + #endif 4814 4788 4815 4789 /* shouldn't fail above this point */ 4816 4790 pool->attrs = alloc_workqueue_attrs(); ··· 6078 6046 struct pool_workqueue *pwq; 6079 6047 bool ret; 6080 6048 6081 - rcu_read_lock(); 6082 6049 preempt_disable(); 6083 6050 6084 6051 if (cpu == WORK_CPU_UNBOUND) ··· 6087 6056 ret = !list_empty(&pwq->inactive_works); 6088 6057 6089 6058 preempt_enable(); 6090 - rcu_read_unlock(); 6091 6059 6092 6060 return ret; 6093 6061 } ··· 7576 7546 if (!thresh) 7577 7547 return; 7578 7548 7579 - rcu_read_lock(); 7580 - 7581 7549 for_each_pool(pool, pi) { 7582 7550 unsigned long pool_ts, touched, ts; 7583 7551 ··· 7616 7588 7617 7589 7618 7590 } 7619 - 7620 - rcu_read_unlock(); 7621 7591 7622 7592 if (lockup_detected) 7623 7593 show_all_workqueues(); ··· 7668 7642 if (ret) 7669 7643 return ret; 7670 7644 7671 - if (system_wq) 7645 + if (system_percpu_wq) 7672 7646 wq_watchdog_set_thresh(thresh); 7673 7647 else 7674 7648 wq_watchdog_thresh = thresh; ··· 7828 7802 ordered_wq_attrs[i] = attrs; 7829 7803 } 7830 7804 7831 - system_wq = alloc_workqueue("events", 0, 0); 7832 - system_percpu_wq = alloc_workqueue("events", 0, 0); 7833 - system_highpri_wq = alloc_workqueue("events_highpri", WQ_HIGHPRI, 0); 7834 - system_long_wq = alloc_workqueue("events_long", 0, 0); 7805 + system_wq = alloc_workqueue("events", WQ_PERCPU, 0); 7806 + system_percpu_wq = alloc_workqueue("events", WQ_PERCPU, 0); 7807 + system_highpri_wq = alloc_workqueue("events_highpri", 7808 + WQ_HIGHPRI | WQ_PERCPU, 0); 7809 + system_long_wq = alloc_workqueue("events_long", WQ_PERCPU, 0); 7835 7810 system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND, WQ_MAX_ACTIVE); 7836 7811 system_dfl_wq = alloc_workqueue("events_unbound", WQ_UNBOUND, WQ_MAX_ACTIVE); 7837 7812 system_freezable_wq = alloc_workqueue("events_freezable", 7838 - WQ_FREEZABLE, 0); 7813 + WQ_FREEZABLE | WQ_PERCPU, 0); 7839 7814 system_power_efficient_wq = alloc_workqueue("events_power_efficient", 7840 - WQ_POWER_EFFICIENT, 0); 7815 + WQ_POWER_EFFICIENT | WQ_PERCPU, 0); 7841 7816 system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_pwr_efficient", 7842 - WQ_FREEZABLE | WQ_POWER_EFFICIENT, 7843 - 0); 7844 - system_bh_wq = alloc_workqueue("events_bh", WQ_BH, 0); 7817 + WQ_FREEZABLE | WQ_POWER_EFFICIENT | WQ_PERCPU, 0); 7818 + system_bh_wq = alloc_workqueue("events_bh", WQ_BH | WQ_PERCPU, 0); 7845 7819 system_bh_highpri_wq = alloc_workqueue("events_bh_highpri", 7846 - WQ_BH | WQ_HIGHPRI, 0); 7820 + WQ_BH | WQ_HIGHPRI | WQ_PERCPU, 0); 7847 7821 BUG_ON(!system_wq || !system_percpu_wq|| !system_highpri_wq || !system_long_wq || 7848 7822 !system_unbound_wq || !system_freezable_wq || !system_dfl_wq || 7849 7823 !system_power_efficient_wq ||