Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

uprobes/x86: Add mapping for optimized uprobe trampolines

Adding support to add special mapping for user space trampoline with
following functions:

uprobe_trampoline_get - find or add uprobe_trampoline
uprobe_trampoline_put - remove or destroy uprobe_trampoline

The user space trampoline is exported as arch specific user space special
mapping through tramp_mapping, which is initialized in following changes
with new uprobe syscall.

The uprobe trampoline needs to be callable/reachable from the probed address,
so while searching for available address we use is_reachable_by_call function
to decide if the uprobe trampoline is callable from the probe address.

All uprobe_trampoline objects are stored in uprobes_state object and are
cleaned up when the process mm_struct goes down. Adding new arch hooks
for that, because this change is x86_64 specific.

Locking is provided by callers in following changes.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Link: https://lore.kernel.org/r/20250720112133.244369-9-jolsa@kernel.org

authored by

Jiri Olsa and committed by
Peter Zijlstra
91440ff4 18a11125

+161
+144
arch/x86/kernel/uprobes.c
··· 608 608 *sr = utask->autask.saved_scratch_register; 609 609 } 610 610 } 611 + 612 + static int tramp_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) 613 + { 614 + return -EPERM; 615 + } 616 + 617 + static struct page *tramp_mapping_pages[2] __ro_after_init; 618 + 619 + static struct vm_special_mapping tramp_mapping = { 620 + .name = "[uprobes-trampoline]", 621 + .mremap = tramp_mremap, 622 + .pages = tramp_mapping_pages, 623 + }; 624 + 625 + struct uprobe_trampoline { 626 + struct hlist_node node; 627 + unsigned long vaddr; 628 + }; 629 + 630 + static bool is_reachable_by_call(unsigned long vtramp, unsigned long vaddr) 631 + { 632 + long delta = (long)(vaddr + 5 - vtramp); 633 + 634 + return delta >= INT_MIN && delta <= INT_MAX; 635 + } 636 + 637 + static unsigned long find_nearest_trampoline(unsigned long vaddr) 638 + { 639 + struct vm_unmapped_area_info info = { 640 + .length = PAGE_SIZE, 641 + .align_mask = ~PAGE_MASK, 642 + }; 643 + unsigned long low_limit, high_limit; 644 + unsigned long low_tramp, high_tramp; 645 + unsigned long call_end = vaddr + 5; 646 + 647 + if (check_add_overflow(call_end, INT_MIN, &low_limit)) 648 + low_limit = PAGE_SIZE; 649 + 650 + high_limit = call_end + INT_MAX; 651 + 652 + /* Search up from the caller address. */ 653 + info.low_limit = call_end; 654 + info.high_limit = min(high_limit, TASK_SIZE); 655 + high_tramp = vm_unmapped_area(&info); 656 + 657 + /* Search down from the caller address. */ 658 + info.low_limit = max(low_limit, PAGE_SIZE); 659 + info.high_limit = call_end; 660 + info.flags = VM_UNMAPPED_AREA_TOPDOWN; 661 + low_tramp = vm_unmapped_area(&info); 662 + 663 + if (IS_ERR_VALUE(high_tramp) && IS_ERR_VALUE(low_tramp)) 664 + return -ENOMEM; 665 + if (IS_ERR_VALUE(high_tramp)) 666 + return low_tramp; 667 + if (IS_ERR_VALUE(low_tramp)) 668 + return high_tramp; 669 + 670 + /* Return address that's closest to the caller address. */ 671 + if (call_end - low_tramp < high_tramp - call_end) 672 + return low_tramp; 673 + return high_tramp; 674 + } 675 + 676 + static struct uprobe_trampoline *create_uprobe_trampoline(unsigned long vaddr) 677 + { 678 + struct pt_regs *regs = task_pt_regs(current); 679 + struct mm_struct *mm = current->mm; 680 + struct uprobe_trampoline *tramp; 681 + struct vm_area_struct *vma; 682 + 683 + if (!user_64bit_mode(regs)) 684 + return NULL; 685 + 686 + vaddr = find_nearest_trampoline(vaddr); 687 + if (IS_ERR_VALUE(vaddr)) 688 + return NULL; 689 + 690 + tramp = kzalloc(sizeof(*tramp), GFP_KERNEL); 691 + if (unlikely(!tramp)) 692 + return NULL; 693 + 694 + tramp->vaddr = vaddr; 695 + vma = _install_special_mapping(mm, tramp->vaddr, PAGE_SIZE, 696 + VM_READ|VM_EXEC|VM_MAYEXEC|VM_MAYREAD|VM_DONTCOPY|VM_IO, 697 + &tramp_mapping); 698 + if (IS_ERR(vma)) { 699 + kfree(tramp); 700 + return NULL; 701 + } 702 + return tramp; 703 + } 704 + 705 + __maybe_unused 706 + static struct uprobe_trampoline *get_uprobe_trampoline(unsigned long vaddr, bool *new) 707 + { 708 + struct uprobes_state *state = &current->mm->uprobes_state; 709 + struct uprobe_trampoline *tramp = NULL; 710 + 711 + if (vaddr > TASK_SIZE || vaddr < PAGE_SIZE) 712 + return NULL; 713 + 714 + hlist_for_each_entry(tramp, &state->head_tramps, node) { 715 + if (is_reachable_by_call(tramp->vaddr, vaddr)) { 716 + *new = false; 717 + return tramp; 718 + } 719 + } 720 + 721 + tramp = create_uprobe_trampoline(vaddr); 722 + if (!tramp) 723 + return NULL; 724 + 725 + *new = true; 726 + hlist_add_head(&tramp->node, &state->head_tramps); 727 + return tramp; 728 + } 729 + 730 + static void destroy_uprobe_trampoline(struct uprobe_trampoline *tramp) 731 + { 732 + /* 733 + * We do not unmap and release uprobe trampoline page itself, 734 + * because there's no easy way to make sure none of the threads 735 + * is still inside the trampoline. 736 + */ 737 + hlist_del(&tramp->node); 738 + kfree(tramp); 739 + } 740 + 741 + void arch_uprobe_init_state(struct mm_struct *mm) 742 + { 743 + INIT_HLIST_HEAD(&mm->uprobes_state.head_tramps); 744 + } 745 + 746 + void arch_uprobe_clear_state(struct mm_struct *mm) 747 + { 748 + struct uprobes_state *state = &mm->uprobes_state; 749 + struct uprobe_trampoline *tramp; 750 + struct hlist_node *n; 751 + 752 + hlist_for_each_entry_safe(tramp, n, &state->head_tramps, node) 753 + destroy_uprobe_trampoline(tramp); 754 + } 611 755 #else /* 32-bit: */ 612 756 /* 613 757 * No RIP-relative addressing on 32-bit
+6
include/linux/uprobes.h
··· 17 17 #include <linux/wait.h> 18 18 #include <linux/timer.h> 19 19 #include <linux/seqlock.h> 20 + #include <linux/mutex.h> 20 21 21 22 struct uprobe; 22 23 struct vm_area_struct; ··· 186 185 187 186 struct uprobes_state { 188 187 struct xol_area *xol_area; 188 + #ifdef CONFIG_X86_64 189 + struct hlist_head head_tramps; 190 + #endif 189 191 }; 190 192 191 193 typedef int (*uprobe_write_verify_t)(struct page *page, unsigned long vaddr, ··· 237 233 extern void *arch_uretprobe_trampoline(unsigned long *psize); 238 234 extern unsigned long uprobe_get_trampoline_vaddr(void); 239 235 extern void uprobe_copy_from_page(struct page *page, unsigned long vaddr, void *dst, int len); 236 + extern void arch_uprobe_clear_state(struct mm_struct *mm); 237 + extern void arch_uprobe_init_state(struct mm_struct *mm); 240 238 #else /* !CONFIG_UPROBES */ 241 239 struct uprobes_state { 242 240 };
+10
kernel/events/uprobes.c
··· 1801 1801 return area; 1802 1802 } 1803 1803 1804 + void __weak arch_uprobe_clear_state(struct mm_struct *mm) 1805 + { 1806 + } 1807 + 1808 + void __weak arch_uprobe_init_state(struct mm_struct *mm) 1809 + { 1810 + } 1811 + 1804 1812 /* 1805 1813 * uprobe_clear_state - Free the area allocated for slots. 1806 1814 */ ··· 1819 1811 mutex_lock(&delayed_uprobe_lock); 1820 1812 delayed_uprobe_remove(NULL, mm); 1821 1813 mutex_unlock(&delayed_uprobe_lock); 1814 + 1815 + arch_uprobe_clear_state(mm); 1822 1816 1823 1817 if (!area) 1824 1818 return;
+1
kernel/fork.c
··· 1015 1015 { 1016 1016 #ifdef CONFIG_UPROBES 1017 1017 mm->uprobes_state.xol_area = NULL; 1018 + arch_uprobe_init_state(mm); 1018 1019 #endif 1019 1020 } 1020 1021