Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'percpu_ref-rcu-audit-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc

Pull percpu_ref rcu fixes from Tejun Heo:
"Jann Horn found that aio was depending on the internal RCU grace
periods of percpu-ref and that it's broken because aio uses regular
RCU while percpu_ref uses sched-RCU.

Depending on percpu_ref's internal grace periods isn't a good idea
because

- The RCU type might not match.

- percpu_ref's grace periods are used to switch to atomic mode. They
aren't between the last put and the invocation of the last release.
This is easy to get confused about and can lead to subtle bugs.

- percpu_ref might not have grace periods at all depending on its
current operation mode.

This patchset audits and fixes percpu_ref users for their RCU usages"

[ There's a continuation of this series that clarifies percpu_ref
documentation that the internal grace periods must not be depended
upon, and introduces rcu_work to simplify bouncing to a workqueue
after an RCU grace period.

That will go in for 4.17 - this is just the minimal set with the fixes
that are tagged for -stable ]

* 'percpu_ref-rcu-audit-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc:
RDMAVT: Fix synchronization around percpu_ref
fs/aio: Use RCU accessors for kioctx_table->table[]
fs/aio: Add explicit RCU grace period when freeing kioctx

+36 -18
+6 -4
drivers/infiniband/sw/rdmavt/mr.c
··· 489 489 unsigned long timeout; 490 490 struct rvt_dev_info *rdi = ib_to_rvt(mr->pd->device); 491 491 492 - if (percpu_ref_is_zero(&mr->refcount)) 493 - return 0; 494 - /* avoid dma mr */ 495 - if (mr->lkey) 492 + if (mr->lkey) { 493 + /* avoid dma mr */ 496 494 rvt_dereg_clean_qps(mr); 495 + /* @mr was indexed on rcu protected @lkey_table */ 496 + synchronize_rcu(); 497 + } 498 + 497 499 timeout = wait_for_completion_timeout(&mr->comp, 5 * HZ); 498 500 if (!timeout) { 499 501 rvt_pr_err(rdi,
+30 -14
fs/aio.c
··· 68 68 #define AIO_RING_PAGES 8 69 69 70 70 struct kioctx_table { 71 - struct rcu_head rcu; 72 - unsigned nr; 73 - struct kioctx *table[]; 71 + struct rcu_head rcu; 72 + unsigned nr; 73 + struct kioctx __rcu *table[]; 74 74 }; 75 75 76 76 struct kioctx_cpu { ··· 115 115 struct page **ring_pages; 116 116 long nr_pages; 117 117 118 - struct work_struct free_work; 118 + struct rcu_head free_rcu; 119 + struct work_struct free_work; /* see free_ioctx() */ 119 120 120 121 /* 121 122 * signals when all in-flight requests are done ··· 330 329 for (i = 0; i < table->nr; i++) { 331 330 struct kioctx *ctx; 332 331 333 - ctx = table->table[i]; 332 + ctx = rcu_dereference(table->table[i]); 334 333 if (ctx && ctx->aio_ring_file == file) { 335 334 if (!atomic_read(&ctx->dead)) { 336 335 ctx->user_id = ctx->mmap_base = vma->vm_start; ··· 589 588 return cancel(&kiocb->common); 590 589 } 591 590 591 + /* 592 + * free_ioctx() should be RCU delayed to synchronize against the RCU 593 + * protected lookup_ioctx() and also needs process context to call 594 + * aio_free_ring(), so the double bouncing through kioctx->free_rcu and 595 + * ->free_work. 596 + */ 592 597 static void free_ioctx(struct work_struct *work) 593 598 { 594 599 struct kioctx *ctx = container_of(work, struct kioctx, free_work); ··· 608 601 kmem_cache_free(kioctx_cachep, ctx); 609 602 } 610 603 604 + static void free_ioctx_rcufn(struct rcu_head *head) 605 + { 606 + struct kioctx *ctx = container_of(head, struct kioctx, free_rcu); 607 + 608 + INIT_WORK(&ctx->free_work, free_ioctx); 609 + schedule_work(&ctx->free_work); 610 + } 611 + 611 612 static void free_ioctx_reqs(struct percpu_ref *ref) 612 613 { 613 614 struct kioctx *ctx = container_of(ref, struct kioctx, reqs); ··· 624 609 if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count)) 625 610 complete(&ctx->rq_wait->comp); 626 611 627 - INIT_WORK(&ctx->free_work, free_ioctx); 628 - schedule_work(&ctx->free_work); 612 + /* Synchronize against RCU protected table->table[] dereferences */ 613 + call_rcu(&ctx->free_rcu, free_ioctx_rcufn); 629 614 } 630 615 631 616 /* ··· 666 651 while (1) { 667 652 if (table) 668 653 for (i = 0; i < table->nr; i++) 669 - if (!table->table[i]) { 654 + if (!rcu_access_pointer(table->table[i])) { 670 655 ctx->id = i; 671 - table->table[i] = ctx; 656 + rcu_assign_pointer(table->table[i], ctx); 672 657 spin_unlock(&mm->ioctx_lock); 673 658 674 659 /* While kioctx setup is in progress, ··· 849 834 } 850 835 851 836 table = rcu_dereference_raw(mm->ioctx_table); 852 - WARN_ON(ctx != table->table[ctx->id]); 853 - table->table[ctx->id] = NULL; 837 + WARN_ON(ctx != rcu_access_pointer(table->table[ctx->id])); 838 + RCU_INIT_POINTER(table->table[ctx->id], NULL); 854 839 spin_unlock(&mm->ioctx_lock); 855 840 856 - /* percpu_ref_kill() will do the necessary call_rcu() */ 841 + /* free_ioctx_reqs() will do the necessary RCU synchronization */ 857 842 wake_up_all(&ctx->wait); 858 843 859 844 /* ··· 895 880 896 881 skipped = 0; 897 882 for (i = 0; i < table->nr; ++i) { 898 - struct kioctx *ctx = table->table[i]; 883 + struct kioctx *ctx = 884 + rcu_dereference_protected(table->table[i], true); 899 885 900 886 if (!ctx) { 901 887 skipped++; ··· 1085 1069 if (!table || id >= table->nr) 1086 1070 goto out; 1087 1071 1088 - ctx = table->table[id]; 1072 + ctx = rcu_dereference(table->table[id]); 1089 1073 if (ctx && ctx->user_id == ctx_id) { 1090 1074 percpu_ref_get(&ctx->users); 1091 1075 ret = ctx;