Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'io_uring-5.14-2021-08-07' of git://git.kernel.dk/linux-block

Pull io_uring from Jens Axboe:
"A few io-wq related fixes:

- Fix potential nr_worker race and missing max_workers check from one
path (Hao)

- Fix race between worker exiting and new work queue (me)"

* tag 'io_uring-5.14-2021-08-07' of git://git.kernel.dk/linux-block:
io-wq: fix lack of acct->nr_workers < acct->max_workers judgement
io-wq: fix no lock protection of acct->nr_worker
io-wq: fix race between worker exiting and activating free worker

+45 -26
+45 -26
fs/io-wq.c
··· 130 130 }; 131 131 132 132 static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index); 133 + static void io_wqe_dec_running(struct io_worker *worker); 133 134 134 135 static bool io_worker_get(struct io_worker *worker) 135 136 { ··· 169 168 { 170 169 struct io_wqe *wqe = worker->wqe; 171 170 struct io_wqe_acct *acct = io_wqe_get_acct(worker); 172 - unsigned flags; 173 171 174 172 if (refcount_dec_and_test(&worker->ref)) 175 173 complete(&worker->ref_done); 176 174 wait_for_completion(&worker->ref_done); 177 175 178 - preempt_disable(); 179 - current->flags &= ~PF_IO_WORKER; 180 - flags = worker->flags; 181 - worker->flags = 0; 182 - if (flags & IO_WORKER_F_RUNNING) 183 - atomic_dec(&acct->nr_running); 184 - worker->flags = 0; 185 - preempt_enable(); 186 - 187 176 raw_spin_lock_irq(&wqe->lock); 188 - if (flags & IO_WORKER_F_FREE) 177 + if (worker->flags & IO_WORKER_F_FREE) 189 178 hlist_nulls_del_rcu(&worker->nulls_node); 190 179 list_del_rcu(&worker->all_list); 191 180 acct->nr_workers--; 181 + preempt_disable(); 182 + io_wqe_dec_running(worker); 183 + worker->flags = 0; 184 + current->flags &= ~PF_IO_WORKER; 185 + preempt_enable(); 192 186 raw_spin_unlock_irq(&wqe->lock); 193 187 194 188 kfree_rcu(worker, rcu); ··· 210 214 struct hlist_nulls_node *n; 211 215 struct io_worker *worker; 212 216 213 - n = rcu_dereference(hlist_nulls_first_rcu(&wqe->free_list)); 214 - if (is_a_nulls(n)) 215 - return false; 216 - 217 - worker = hlist_nulls_entry(n, struct io_worker, nulls_node); 218 - if (io_worker_get(worker)) { 219 - wake_up_process(worker->task); 217 + /* 218 + * Iterate free_list and see if we can find an idle worker to 219 + * activate. If a given worker is on the free_list but in the process 220 + * of exiting, keep trying. 221 + */ 222 + hlist_nulls_for_each_entry_rcu(worker, n, &wqe->free_list, nulls_node) { 223 + if (!io_worker_get(worker)) 224 + continue; 225 + if (wake_up_process(worker->task)) { 226 + io_worker_release(worker); 227 + return true; 228 + } 220 229 io_worker_release(worker); 221 - return true; 222 230 } 223 231 224 232 return false; ··· 247 247 ret = io_wqe_activate_free_worker(wqe); 248 248 rcu_read_unlock(); 249 249 250 - if (!ret && acct->nr_workers < acct->max_workers) { 251 - atomic_inc(&acct->nr_running); 252 - atomic_inc(&wqe->wq->worker_refs); 253 - create_io_worker(wqe->wq, wqe, acct->index); 250 + if (!ret) { 251 + bool do_create = false; 252 + 253 + raw_spin_lock_irq(&wqe->lock); 254 + if (acct->nr_workers < acct->max_workers) { 255 + atomic_inc(&acct->nr_running); 256 + atomic_inc(&wqe->wq->worker_refs); 257 + acct->nr_workers++; 258 + do_create = true; 259 + } 260 + raw_spin_unlock_irq(&wqe->lock); 261 + if (do_create) 262 + create_io_worker(wqe->wq, wqe, acct->index); 254 263 } 255 264 } 256 265 ··· 280 271 { 281 272 struct create_worker_data *cwd; 282 273 struct io_wq *wq; 274 + struct io_wqe *wqe; 275 + struct io_wqe_acct *acct; 283 276 284 277 cwd = container_of(cb, struct create_worker_data, work); 285 - wq = cwd->wqe->wq; 278 + wqe = cwd->wqe; 279 + wq = wqe->wq; 280 + acct = &wqe->acct[cwd->index]; 281 + raw_spin_lock_irq(&wqe->lock); 282 + if (acct->nr_workers < acct->max_workers) 283 + acct->nr_workers++; 284 + raw_spin_unlock_irq(&wqe->lock); 286 285 create_io_worker(wq, cwd->wqe, cwd->index); 287 286 kfree(cwd); 288 287 } ··· 652 635 kfree(worker); 653 636 fail: 654 637 atomic_dec(&acct->nr_running); 638 + raw_spin_lock_irq(&wqe->lock); 639 + acct->nr_workers--; 640 + raw_spin_unlock_irq(&wqe->lock); 655 641 io_worker_ref_put(wq); 656 642 return; 657 643 } ··· 670 650 worker->flags |= IO_WORKER_F_FREE; 671 651 if (index == IO_WQ_ACCT_BOUND) 672 652 worker->flags |= IO_WORKER_F_BOUND; 673 - if (!acct->nr_workers && (worker->flags & IO_WORKER_F_BOUND)) 653 + if ((acct->nr_workers == 1) && (worker->flags & IO_WORKER_F_BOUND)) 674 654 worker->flags |= IO_WORKER_F_FIXED; 675 - acct->nr_workers++; 676 655 raw_spin_unlock_irq(&wqe->lock); 677 656 wake_up_new_task(tsk); 678 657 }