Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

padata: Replace delayed timer with immediate workqueue in padata_reorder

The function padata_reorder will use a timer when it cannot progress
while completed jobs are outstanding (pd->reorder_objects > 0). This
is suboptimal as if we do end up using the timer then it would have
introduced a gratuitous delay of one second.

In fact we can easily distinguish between whether completed jobs
are outstanding and whether we can make progress. All we have to
do is look at the next pqueue list.

This patch does that by replacing pd->processed with pd->cpu so
that the next pqueue is more accessible.

A work queue is used instead of the original try_again to avoid
hogging the CPU.

Note that we don't bother removing the work queue in
padata_flush_queues because the whole premise is broken. You
cannot flush async crypto requests so it makes no sense to even
try. A subsequent patch will fix it by replacing it with a ref
counting scheme.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

+22 -88
+4 -9
include/linux/padata.h
··· 12 12 #include <linux/workqueue.h> 13 13 #include <linux/spinlock.h> 14 14 #include <linux/list.h> 15 - #include <linux/timer.h> 16 15 #include <linux/notifier.h> 17 16 #include <linux/kobject.h> 18 17 ··· 72 73 * @serial: List to wait for serialization after reordering. 73 74 * @pwork: work struct for parallelization. 74 75 * @swork: work struct for serialization. 75 - * @pd: Backpointer to the internal control structure. 76 76 * @work: work struct for parallelization. 77 - * @reorder_work: work struct for reordering. 78 77 * @num_obj: Number of objects that are processed by this cpu. 79 78 * @cpu_index: Index of the cpu. 80 79 */ 81 80 struct padata_parallel_queue { 82 81 struct padata_list parallel; 83 82 struct padata_list reorder; 84 - struct parallel_data *pd; 85 83 struct work_struct work; 86 - struct work_struct reorder_work; 87 84 atomic_t num_obj; 88 85 int cpu_index; 89 86 }; ··· 105 110 * @reorder_objects: Number of objects waiting in the reorder queues. 106 111 * @refcnt: Number of objects holding a reference on this parallel_data. 107 112 * @max_seq_nr: Maximal used sequence number. 113 + * @cpu: Next CPU to be processed. 108 114 * @cpumask: The cpumasks in use for parallel and serial workers. 115 + * @reorder_work: work struct for reordering. 109 116 * @lock: Reorder lock. 110 - * @processed: Number of already processed objects. 111 - * @timer: Reorder timer. 112 117 */ 113 118 struct parallel_data { 114 119 struct padata_instance *pinst; ··· 117 122 atomic_t reorder_objects; 118 123 atomic_t refcnt; 119 124 atomic_t seq_nr; 125 + int cpu; 120 126 struct padata_cpumask cpumask; 127 + struct work_struct reorder_work; 121 128 spinlock_t lock ____cacheline_aligned; 122 - unsigned int processed; 123 - struct timer_list timer; 124 129 }; 125 130 126 131 /**
+18 -79
kernel/padata.c
··· 165 165 */ 166 166 static struct padata_priv *padata_get_next(struct parallel_data *pd) 167 167 { 168 - int cpu, num_cpus; 169 - unsigned int next_nr, next_index; 170 168 struct padata_parallel_queue *next_queue; 171 169 struct padata_priv *padata; 172 170 struct padata_list *reorder; 171 + int cpu = pd->cpu; 173 172 174 - num_cpus = cpumask_weight(pd->cpumask.pcpu); 175 - 176 - /* 177 - * Calculate the percpu reorder queue and the sequence 178 - * number of the next object. 179 - */ 180 - next_nr = pd->processed; 181 - next_index = next_nr % num_cpus; 182 - cpu = padata_index_to_cpu(pd, next_index); 183 173 next_queue = per_cpu_ptr(pd->pqueue, cpu); 184 - 185 174 reorder = &next_queue->reorder; 186 175 187 176 spin_lock(&reorder->lock); ··· 181 192 list_del_init(&padata->list); 182 193 atomic_dec(&pd->reorder_objects); 183 194 184 - pd->processed++; 195 + pd->cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu, -1, 196 + false); 185 197 186 198 spin_unlock(&reorder->lock); 187 199 goto out; ··· 205 215 struct padata_priv *padata; 206 216 struct padata_serial_queue *squeue; 207 217 struct padata_instance *pinst = pd->pinst; 218 + struct padata_parallel_queue *next_queue; 208 219 209 220 /* 210 221 * We need to ensure that only one cpu can work on dequeueing of ··· 237 246 * so exit immediately. 238 247 */ 239 248 if (PTR_ERR(padata) == -ENODATA) { 240 - del_timer(&pd->timer); 241 249 spin_unlock_bh(&pd->lock); 242 250 return; 243 251 } ··· 255 265 256 266 /* 257 267 * The next object that needs serialization might have arrived to 258 - * the reorder queues in the meantime, we will be called again 259 - * from the timer function if no one else cares for it. 268 + * the reorder queues in the meantime. 260 269 * 261 - * Ensure reorder_objects is read after pd->lock is dropped so we see 262 - * an increment from another task in padata_do_serial. Pairs with 270 + * Ensure reorder queue is read after pd->lock is dropped so we see 271 + * new objects from another task in padata_do_serial. Pairs with 263 272 * smp_mb__after_atomic in padata_do_serial. 264 273 */ 265 274 smp_mb(); 266 - if (atomic_read(&pd->reorder_objects) 267 - && !(pinst->flags & PADATA_RESET)) 268 - mod_timer(&pd->timer, jiffies + HZ); 269 - else 270 - del_timer(&pd->timer); 271 275 272 - return; 276 + next_queue = per_cpu_ptr(pd->pqueue, pd->cpu); 277 + if (!list_empty(&next_queue->reorder.list)) 278 + queue_work(pinst->wq, &pd->reorder_work); 273 279 } 274 280 275 281 static void invoke_padata_reorder(struct work_struct *work) 276 282 { 277 - struct padata_parallel_queue *pqueue; 278 283 struct parallel_data *pd; 279 284 280 285 local_bh_disable(); 281 - pqueue = container_of(work, struct padata_parallel_queue, reorder_work); 282 - pd = pqueue->pd; 286 + pd = container_of(work, struct parallel_data, reorder_work); 283 287 padata_reorder(pd); 284 288 local_bh_enable(); 285 - } 286 - 287 - static void padata_reorder_timer(struct timer_list *t) 288 - { 289 - struct parallel_data *pd = from_timer(pd, t, timer); 290 - unsigned int weight; 291 - int target_cpu, cpu; 292 - 293 - cpu = get_cpu(); 294 - 295 - /* We don't lock pd here to not interfere with parallel processing 296 - * padata_reorder() calls on other CPUs. We just need any CPU out of 297 - * the cpumask.pcpu set. It would be nice if it's the right one but 298 - * it doesn't matter if we're off to the next one by using an outdated 299 - * pd->processed value. 300 - */ 301 - weight = cpumask_weight(pd->cpumask.pcpu); 302 - target_cpu = padata_index_to_cpu(pd, pd->processed % weight); 303 - 304 - /* ensure to call the reorder callback on the correct CPU */ 305 - if (cpu != target_cpu) { 306 - struct padata_parallel_queue *pqueue; 307 - struct padata_instance *pinst; 308 - 309 - /* The timer function is serialized wrt itself -- no locking 310 - * needed. 311 - */ 312 - pinst = pd->pinst; 313 - pqueue = per_cpu_ptr(pd->pqueue, target_cpu); 314 - queue_work_on(target_cpu, pinst->wq, &pqueue->reorder_work); 315 - } else { 316 - padata_reorder(pd); 317 - } 318 - 319 - put_cpu(); 320 289 } 321 290 322 291 static void padata_serial_worker(struct work_struct *serial_work) ··· 325 376 326 377 cpu = get_cpu(); 327 378 328 - /* We need to run on the same CPU padata_do_parallel(.., padata, ..) 329 - * was called on -- or, at least, enqueue the padata object into the 330 - * correct per-cpu queue. 379 + /* We need to enqueue the padata object into the correct 380 + * per-cpu queue. 331 381 */ 332 382 if (cpu != padata->cpu) { 333 383 reorder_via_wq = 1; ··· 336 388 pqueue = per_cpu_ptr(pd->pqueue, cpu); 337 389 338 390 spin_lock(&pqueue->reorder.lock); 339 - atomic_inc(&pd->reorder_objects); 340 391 list_add_tail(&padata->list, &pqueue->reorder.list); 392 + atomic_inc(&pd->reorder_objects); 341 393 spin_unlock(&pqueue->reorder.lock); 342 394 343 395 /* 344 - * Ensure the atomic_inc of reorder_objects above is ordered correctly 396 + * Ensure the addition to the reorder list is ordered correctly 345 397 * with the trylock of pd->lock in padata_reorder. Pairs with smp_mb 346 398 * in padata_reorder. 347 399 */ ··· 349 401 350 402 put_cpu(); 351 403 352 - /* If we're running on the wrong CPU, call padata_reorder() via a 353 - * kernel worker. 354 - */ 355 - if (reorder_via_wq) 356 - queue_work_on(cpu, pd->pinst->wq, &pqueue->reorder_work); 357 - else 358 - padata_reorder(pd); 404 + padata_reorder(pd); 359 405 } 360 406 EXPORT_SYMBOL(padata_do_serial); 361 407 ··· 405 463 continue; 406 464 } 407 465 408 - pqueue->pd = pd; 409 466 pqueue->cpu_index = cpu_index; 410 467 cpu_index++; 411 468 412 469 __padata_list_init(&pqueue->reorder); 413 470 __padata_list_init(&pqueue->parallel); 414 471 INIT_WORK(&pqueue->work, padata_parallel_worker); 415 - INIT_WORK(&pqueue->reorder_work, invoke_padata_reorder); 416 472 atomic_set(&pqueue->num_obj, 0); 417 473 } 418 474 } ··· 438 498 439 499 padata_init_pqueues(pd); 440 500 padata_init_squeues(pd); 441 - timer_setup(&pd->timer, padata_reorder_timer, 0); 442 501 atomic_set(&pd->seq_nr, -1); 443 502 atomic_set(&pd->reorder_objects, 0); 444 503 atomic_set(&pd->refcnt, 0); 445 504 pd->pinst = pinst; 446 505 spin_lock_init(&pd->lock); 506 + pd->cpu = cpumask_first(pcpumask); 507 + INIT_WORK(&pd->reorder_work, invoke_padata_reorder); 447 508 448 509 return pd; 449 510 ··· 478 537 pqueue = per_cpu_ptr(pd->pqueue, cpu); 479 538 flush_work(&pqueue->work); 480 539 } 481 - 482 - del_timer_sync(&pd->timer); 483 540 484 541 if (atomic_read(&pd->reorder_objects)) 485 542 padata_reorder(pd);