Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

arm64: mm: More flags for __flush_tlb_range()

Refactor function variants with "_nosync", "_local" and "_nonotify" into
a single __always_inline implementation that takes flags and rely on
constant folding to select the parts that are actually needed at any
given callsite, based on the provided flags.

Flags all live in the tlbf_t (TLB flags) type; TLBF_NONE (0) continues
to provide the strongest semantics (i.e. evict from walk cache,
broadcast, synchronise and notify). Each flag reduces the strength in
some way; TLBF_NONOTIFY, TLBF_NOSYNC and TLBF_NOBROADCAST are added to
complement the existing TLBF_NOWALKCACHE.

There are no users that require TLBF_NOBROADCAST without
TLBF_NOWALKCACHE so implement that as BUILD_BUG() to avoid needing to
introduce dead code for vae1 invalidations.

The result is a clearer, simpler, more powerful API.

Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

authored by

Ryan Roberts and committed by
Catalin Marinas
0477fc56 11f6dd8d

+62 -42
+56 -39
arch/arm64/include/asm/tlbflush.h
··· 295 295 * no invalidation may take place. In the case where the level 296 296 * cannot be easily determined, the value TLBI_TTL_UNKNOWN will 297 297 * perform a non-hinted invalidation. flags may be TLBF_NONE (0) or 298 - * TLBF_NOWALKCACHE (elide eviction of walk cache entries). 298 + * any combination of TLBF_NOWALKCACHE (elide eviction of walk 299 + * cache entries), TLBF_NONOTIFY (don't call mmu notifiers), 300 + * TLBF_NOSYNC (don't issue trailing dsb) and TLBF_NOBROADCAST 301 + * (only perform the invalidation for the local cpu). 299 302 * 300 303 * local_flush_tlb_page(vma, addr) 301 304 * Local variant of flush_tlb_page(). Stale TLB entries may ··· 307 304 * local_flush_tlb_page_nonotify(vma, addr) 308 305 * Same as local_flush_tlb_page() except MMU notifier will not be 309 306 * called. 310 - * 311 - * local_flush_tlb_contpte(vma, addr) 312 - * Invalidate the virtual-address range 313 - * '[addr, addr+CONT_PTE_SIZE)' mapped with contpte on local CPU 314 - * for the user address space corresponding to 'vma->mm'. Stale 315 - * TLB entries may remain in remote CPUs. 316 307 * 317 308 * Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented 318 309 * on top of these routines, since that is our interface to the mmu_gather ··· 549 552 /* Invalidate tlb entries only, leaving the page table walk cache intact. */ 550 553 #define TLBF_NOWALKCACHE ((__force tlbf_t)BIT(0)) 551 554 552 - static inline void __flush_tlb_range_nosync(struct mm_struct *mm, 553 - unsigned long start, unsigned long end, 554 - unsigned long stride, int tlb_level, 555 - tlbf_t flags) 555 + /* Skip the trailing dsb after issuing tlbi. */ 556 + #define TLBF_NOSYNC ((__force tlbf_t)BIT(1)) 557 + 558 + /* Suppress tlb notifier callbacks for this flush operation. */ 559 + #define TLBF_NONOTIFY ((__force tlbf_t)BIT(2)) 560 + 561 + /* Perform the tlbi locally without broadcasting to other CPUs. */ 562 + #define TLBF_NOBROADCAST ((__force tlbf_t)BIT(3)) 563 + 564 + static __always_inline void __do_flush_tlb_range(struct vm_area_struct *vma, 565 + unsigned long start, unsigned long end, 566 + unsigned long stride, int tlb_level, 567 + tlbf_t flags) 556 568 { 569 + struct mm_struct *mm = vma->vm_mm; 557 570 unsigned long asid, pages; 558 571 559 - start = round_down(start, stride); 560 - end = round_up(end, stride); 561 572 pages = (end - start) >> PAGE_SHIFT; 562 573 563 574 if (__flush_tlb_range_limit_excess(pages, stride)) { ··· 573 568 return; 574 569 } 575 570 576 - dsb(ishst); 571 + if (!(flags & TLBF_NOBROADCAST)) 572 + dsb(ishst); 573 + else 574 + dsb(nshst); 575 + 577 576 asid = ASID(mm); 578 577 579 - if (flags & TLBF_NOWALKCACHE) 580 - __flush_s1_tlb_range_op(vale1is, start, pages, stride, 581 - asid, tlb_level); 582 - else 578 + switch (flags & (TLBF_NOWALKCACHE | TLBF_NOBROADCAST)) { 579 + case TLBF_NONE: 583 580 __flush_s1_tlb_range_op(vae1is, start, pages, stride, 584 - asid, tlb_level); 581 + asid, tlb_level); 582 + break; 583 + case TLBF_NOWALKCACHE: 584 + __flush_s1_tlb_range_op(vale1is, start, pages, stride, 585 + asid, tlb_level); 586 + break; 587 + case TLBF_NOBROADCAST: 588 + /* Combination unused */ 589 + BUG(); 590 + break; 591 + case TLBF_NOWALKCACHE | TLBF_NOBROADCAST: 592 + __flush_s1_tlb_range_op(vale1, start, pages, stride, 593 + asid, tlb_level); 594 + break; 595 + } 585 596 586 - mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end); 597 + if (!(flags & TLBF_NONOTIFY)) 598 + mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end); 599 + 600 + if (!(flags & TLBF_NOSYNC)) { 601 + if (!(flags & TLBF_NOBROADCAST)) 602 + __tlbi_sync_s1ish(); 603 + else 604 + dsb(nsh); 605 + } 587 606 } 588 607 589 608 static inline void __flush_tlb_range(struct vm_area_struct *vma, ··· 615 586 unsigned long stride, int tlb_level, 616 587 tlbf_t flags) 617 588 { 618 - __flush_tlb_range_nosync(vma->vm_mm, start, end, stride, 619 - tlb_level, flags); 620 - __tlbi_sync_s1ish(); 621 - } 622 - 623 - static inline void local_flush_tlb_contpte(struct vm_area_struct *vma, 624 - unsigned long addr) 625 - { 626 - unsigned long asid; 627 - 628 - addr = round_down(addr, CONT_PTE_SIZE); 629 - 630 - dsb(nshst); 631 - asid = ASID(vma->vm_mm); 632 - __flush_s1_tlb_range_op(vale1, addr, CONT_PTES, PAGE_SIZE, asid, 3); 633 - mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, addr, 634 - addr + CONT_PTE_SIZE); 635 - dsb(nsh); 589 + start = round_down(start, stride); 590 + end = round_up(end, stride); 591 + __do_flush_tlb_range(vma, start, end, stride, tlb_level, flags); 636 592 } 637 593 638 594 static inline void flush_tlb_range(struct vm_area_struct *vma, ··· 670 656 static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, 671 657 struct mm_struct *mm, unsigned long start, unsigned long end) 672 658 { 673 - __flush_tlb_range_nosync(mm, start, end, PAGE_SIZE, 3, TLBF_NOWALKCACHE); 659 + struct vm_area_struct vma = { .vm_mm = mm, .vm_flags = 0 }; 660 + 661 + __flush_tlb_range(&vma, start, end, PAGE_SIZE, 3, 662 + TLBF_NOWALKCACHE | TLBF_NOSYNC); 674 663 } 675 664 676 665 static inline bool __pte_flags_need_flush(ptdesc_t oldval, ptdesc_t newval)
+6 -3
arch/arm64/mm/contpte.c
··· 552 552 * See comment in __ptep_clear_flush_young(); same rationale for 553 553 * eliding the trailing DSB applies here. 554 554 */ 555 - __flush_tlb_range_nosync(vma->vm_mm, addr, end, 556 - PAGE_SIZE, 3, TLBF_NOWALKCACHE); 555 + __flush_tlb_range(vma, addr, end, PAGE_SIZE, 3, 556 + TLBF_NOWALKCACHE | TLBF_NOSYNC); 557 557 } 558 558 559 559 return young; ··· 686 686 __ptep_set_access_flags(vma, addr, ptep, entry, 0); 687 687 688 688 if (dirty) 689 - local_flush_tlb_contpte(vma, start_addr); 689 + __flush_tlb_range(vma, start_addr, 690 + start_addr + CONT_PTE_SIZE, 691 + PAGE_SIZE, 3, 692 + TLBF_NOWALKCACHE | TLBF_NOBROADCAST); 690 693 } else { 691 694 __contpte_try_unfold(vma->vm_mm, addr, ptep, orig_pte); 692 695 __ptep_set_access_flags(vma, addr, ptep, entry, dirty);