Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'memcg-accounting-for-bpf-arena'

Puranjay Mohan says:

====================
memcg accounting for BPF arena

v4: https://lore.kernel.org/all/20260102181333.3033679-1-puranjay@kernel.org/
Changes in v4->v5:
- Remove unused variables from bpf_map_alloc_pages() (CI)

v3: https://lore.kernel.org/all/20260102151852.570285-1-puranjay@kernel.org/
Changes in v3->v4:
- Do memcg set/recover in arena_reserve_pages() rather than
bpf_arena_reserve_pages() for symmetry with other kfuncs (Alexei)

v2: https://lore.kernel.org/all/20251231141434.3416822-1-puranjay@kernel.org/
Changes in v2->v3:
- Remove memcg accounting from bpf_map_alloc_pages() as the caller does
it already. (Alexei)
- Do memcg set/recover in arena_alloc/free_pages() rather than
bpf_arena_alloc/free_pages(), it reduces copy pasting in
sleepable/non_sleepable functions.

v1: https://lore.kernel.org/all/20251230153006.1347742-1-puranjay@kernel.org/
Changes in v1->v2:
- Return both pointers through arguments from bpf_map_memcg_enter and
make it return void. (Alexei)
- Add memcg accounting in arena_free_worker (AI)

This set adds memcg accounting logic into arena kfuncs and other places
that do allocations in arena.c.
====================

Link: https://patch.msgid.link/20260102200230.25168-1-puranjay@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

+68 -34
+15
include/linux/bpf.h
··· 2608 2608 int bpf_map_alloc_pages(const struct bpf_map *map, int nid, 2609 2609 unsigned long nr_pages, struct page **page_array); 2610 2610 #ifdef CONFIG_MEMCG 2611 + void bpf_map_memcg_enter(const struct bpf_map *map, struct mem_cgroup **old_memcg, 2612 + struct mem_cgroup **new_memcg); 2613 + void bpf_map_memcg_exit(struct mem_cgroup *old_memcg, 2614 + struct mem_cgroup *memcg); 2611 2615 void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, 2612 2616 int node); 2613 2617 void *bpf_map_kmalloc_nolock(const struct bpf_map *map, size_t size, gfp_t flags, ··· 2636 2632 kvcalloc(_n, _size, _flags) 2637 2633 #define bpf_map_alloc_percpu(_map, _size, _align, _flags) \ 2638 2634 __alloc_percpu_gfp(_size, _align, _flags) 2635 + static inline void bpf_map_memcg_enter(const struct bpf_map *map, struct mem_cgroup **old_memcg, 2636 + struct mem_cgroup **new_memcg) 2637 + { 2638 + *new_memcg = NULL; 2639 + *old_memcg = NULL; 2640 + } 2641 + 2642 + static inline void bpf_map_memcg_exit(struct mem_cgroup *old_memcg, 2643 + struct mem_cgroup *memcg) 2644 + { 2645 + } 2639 2646 #endif 2640 2647 2641 2648 static inline int
+26 -3
kernel/bpf/arena.c
··· 360 360 { 361 361 struct bpf_map *map = vmf->vma->vm_file->private_data; 362 362 struct bpf_arena *arena = container_of(map, struct bpf_arena, map); 363 + struct mem_cgroup *new_memcg, *old_memcg; 363 364 struct page *page; 364 365 long kbase, kaddr; 365 366 unsigned long flags; ··· 377 376 if (page) 378 377 /* already have a page vmap-ed */ 379 378 goto out; 379 + 380 + bpf_map_memcg_enter(&arena->map, &old_memcg, &new_memcg); 380 381 381 382 if (arena->map.map_flags & BPF_F_SEGV_ON_FAULT) 382 383 /* User space requested to segfault when page is not allocated by bpf prog */ ··· 403 400 goto out_unlock_sigsegv; 404 401 } 405 402 flush_vmap_cache(kaddr, PAGE_SIZE); 403 + bpf_map_memcg_exit(old_memcg, new_memcg); 406 404 out: 407 405 page_ref_add(page, 1); 408 406 raw_res_spin_unlock_irqrestore(&arena->spinlock, flags); 409 407 vmf->page = page; 410 408 return 0; 411 409 out_unlock_sigsegv: 410 + bpf_map_memcg_exit(old_memcg, new_memcg); 412 411 raw_res_spin_unlock_irqrestore(&arena->spinlock, flags); 413 412 return VM_FAULT_SIGSEGV; 414 413 } ··· 539 534 /* user_vm_end/start are fixed before bpf prog runs */ 540 535 long page_cnt_max = (arena->user_vm_end - arena->user_vm_start) >> PAGE_SHIFT; 541 536 u64 kern_vm_start = bpf_arena_get_kern_vm_start(arena); 537 + struct mem_cgroup *new_memcg, *old_memcg; 542 538 struct apply_range_data data; 543 539 struct page **pages = NULL; 544 540 long remaining, mapped = 0; ··· 561 555 return 0; 562 556 } 563 557 558 + bpf_map_memcg_enter(&arena->map, &old_memcg, &new_memcg); 564 559 /* Cap allocation size to KMALLOC_MAX_CACHE_SIZE so kmalloc_nolock() can succeed. */ 565 560 alloc_pages = min(page_cnt, KMALLOC_MAX_CACHE_SIZE / sizeof(struct page *)); 566 - pages = kmalloc_nolock(alloc_pages * sizeof(struct page *), 0, NUMA_NO_NODE); 567 - if (!pages) 561 + pages = kmalloc_nolock(alloc_pages * sizeof(struct page *), __GFP_ACCOUNT, NUMA_NO_NODE); 562 + if (!pages) { 563 + bpf_map_memcg_exit(old_memcg, new_memcg); 568 564 return 0; 565 + } 569 566 data.pages = pages; 570 567 571 568 if (raw_res_spin_lock_irqsave(&arena->spinlock, flags)) ··· 626 617 flush_vmap_cache(kern_vm_start + uaddr32, mapped << PAGE_SHIFT); 627 618 raw_res_spin_unlock_irqrestore(&arena->spinlock, flags); 628 619 kfree_nolock(pages); 620 + bpf_map_memcg_exit(old_memcg, new_memcg); 629 621 return clear_lo32(arena->user_vm_start) + uaddr32; 630 622 out: 631 623 range_tree_set(&arena->rt, pgoff + mapped, page_cnt - mapped); ··· 640 630 raw_res_spin_unlock_irqrestore(&arena->spinlock, flags); 641 631 out_free_pages: 642 632 kfree_nolock(pages); 633 + bpf_map_memcg_exit(old_memcg, new_memcg); 643 634 return 0; 644 635 } 645 636 ··· 662 651 663 652 static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt, bool sleepable) 664 653 { 654 + struct mem_cgroup *new_memcg, *old_memcg; 665 655 u64 full_uaddr, uaddr_end; 666 656 long kaddr, pgoff; 667 657 struct page *page; ··· 683 671 684 672 page_cnt = (uaddr_end - full_uaddr) >> PAGE_SHIFT; 685 673 pgoff = compute_pgoff(arena, uaddr); 674 + bpf_map_memcg_enter(&arena->map, &old_memcg, &new_memcg); 686 675 687 676 if (!sleepable) 688 677 goto defer; ··· 722 709 zap_pages(arena, full_uaddr, 1); 723 710 __free_page(page); 724 711 } 712 + bpf_map_memcg_exit(old_memcg, new_memcg); 725 713 726 714 return; 727 715 728 716 defer: 729 - s = kmalloc_nolock(sizeof(struct arena_free_span), 0, -1); 717 + s = kmalloc_nolock(sizeof(struct arena_free_span), __GFP_ACCOUNT, -1); 718 + bpf_map_memcg_exit(old_memcg, new_memcg); 730 719 if (!s) 731 720 /* 732 721 * If allocation fails in non-sleepable context, pages are intentionally left ··· 750 735 static int arena_reserve_pages(struct bpf_arena *arena, long uaddr, u32 page_cnt) 751 736 { 752 737 long page_cnt_max = (arena->user_vm_end - arena->user_vm_start) >> PAGE_SHIFT; 738 + struct mem_cgroup *new_memcg, *old_memcg; 753 739 unsigned long flags; 754 740 long pgoff; 755 741 int ret; ··· 773 757 } 774 758 775 759 /* "Allocate" the region to prevent it from being allocated. */ 760 + bpf_map_memcg_enter(&arena->map, &old_memcg, &new_memcg); 776 761 ret = range_tree_clear(&arena->rt, pgoff, page_cnt); 762 + bpf_map_memcg_exit(old_memcg, new_memcg); 777 763 out: 778 764 raw_res_spin_unlock_irqrestore(&arena->spinlock, flags); 779 765 return ret; ··· 784 766 static void arena_free_worker(struct work_struct *work) 785 767 { 786 768 struct bpf_arena *arena = container_of(work, struct bpf_arena, free_work); 769 + struct mem_cgroup *new_memcg, *old_memcg; 787 770 struct llist_node *list, *pos, *t; 788 771 struct arena_free_span *s; 789 772 u64 arena_vm_start, user_vm_start; ··· 798 779 schedule_work(work); 799 780 return; 800 781 } 782 + 783 + bpf_map_memcg_enter(&arena->map, &old_memcg, &new_memcg); 801 784 802 785 init_llist_head(&free_pages); 803 786 arena_vm_start = bpf_arena_get_kern_vm_start(arena); ··· 841 820 page = llist_entry(pos, struct page, pcp_llist); 842 821 __free_page(page); 843 822 } 823 + 824 + bpf_map_memcg_exit(old_memcg, new_memcg); 844 825 } 845 826 846 827 static void arena_free_irq(struct irq_work *iw)
+3 -2
kernel/bpf/range_tree.c
··· 149 149 range_it_insert(rn, rt); 150 150 151 151 /* Add a range */ 152 - new_rn = kmalloc_nolock(sizeof(struct range_node), 0, NUMA_NO_NODE); 152 + new_rn = kmalloc_nolock(sizeof(struct range_node), __GFP_ACCOUNT, 153 + NUMA_NO_NODE); 153 154 if (!new_rn) 154 155 return -ENOMEM; 155 156 new_rn->rn_start = last + 1; ··· 235 234 right->rn_start = start; 236 235 range_it_insert(right, rt); 237 236 } else { 238 - left = kmalloc_nolock(sizeof(struct range_node), 0, NUMA_NO_NODE); 237 + left = kmalloc_nolock(sizeof(struct range_node), __GFP_ACCOUNT, NUMA_NO_NODE); 239 238 if (!left) 240 239 return -ENOMEM; 241 240 left->rn_start = start;
+24 -29
kernel/bpf/syscall.c
··· 505 505 return root_mem_cgroup; 506 506 } 507 507 508 + void bpf_map_memcg_enter(const struct bpf_map *map, struct mem_cgroup **old_memcg, 509 + struct mem_cgroup **new_memcg) 510 + { 511 + *new_memcg = bpf_map_get_memcg(map); 512 + *old_memcg = set_active_memcg(*new_memcg); 513 + } 514 + 515 + void bpf_map_memcg_exit(struct mem_cgroup *old_memcg, 516 + struct mem_cgroup *new_memcg) 517 + { 518 + set_active_memcg(old_memcg); 519 + mem_cgroup_put(new_memcg); 520 + } 521 + 508 522 void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, 509 523 int node) 510 524 { 511 525 struct mem_cgroup *memcg, *old_memcg; 512 526 void *ptr; 513 527 514 - memcg = bpf_map_get_memcg(map); 515 - old_memcg = set_active_memcg(memcg); 528 + bpf_map_memcg_enter(map, &old_memcg, &memcg); 516 529 ptr = kmalloc_node(size, flags | __GFP_ACCOUNT, node); 517 - set_active_memcg(old_memcg); 518 - mem_cgroup_put(memcg); 530 + bpf_map_memcg_exit(old_memcg, memcg); 519 531 520 532 return ptr; 521 533 } ··· 538 526 struct mem_cgroup *memcg, *old_memcg; 539 527 void *ptr; 540 528 541 - memcg = bpf_map_get_memcg(map); 542 - old_memcg = set_active_memcg(memcg); 529 + bpf_map_memcg_enter(map, &old_memcg, &memcg); 543 530 ptr = kmalloc_nolock(size, flags | __GFP_ACCOUNT, node); 544 - set_active_memcg(old_memcg); 545 - mem_cgroup_put(memcg); 531 + bpf_map_memcg_exit(old_memcg, memcg); 546 532 547 533 return ptr; 548 534 } ··· 550 540 struct mem_cgroup *memcg, *old_memcg; 551 541 void *ptr; 552 542 553 - memcg = bpf_map_get_memcg(map); 554 - old_memcg = set_active_memcg(memcg); 543 + bpf_map_memcg_enter(map, &old_memcg, &memcg); 555 544 ptr = kzalloc(size, flags | __GFP_ACCOUNT); 556 - set_active_memcg(old_memcg); 557 - mem_cgroup_put(memcg); 545 + bpf_map_memcg_exit(old_memcg, memcg); 558 546 559 547 return ptr; 560 548 } ··· 563 555 struct mem_cgroup *memcg, *old_memcg; 564 556 void *ptr; 565 557 566 - memcg = bpf_map_get_memcg(map); 567 - old_memcg = set_active_memcg(memcg); 558 + bpf_map_memcg_enter(map, &old_memcg, &memcg); 568 559 ptr = kvcalloc(n, size, flags | __GFP_ACCOUNT); 569 - set_active_memcg(old_memcg); 570 - mem_cgroup_put(memcg); 560 + bpf_map_memcg_exit(old_memcg, memcg); 571 561 572 562 return ptr; 573 563 } ··· 576 570 struct mem_cgroup *memcg, *old_memcg; 577 571 void __percpu *ptr; 578 572 579 - memcg = bpf_map_get_memcg(map); 580 - old_memcg = set_active_memcg(memcg); 573 + bpf_map_memcg_enter(map, &old_memcg, &memcg); 581 574 ptr = __alloc_percpu_gfp(size, align, flags | __GFP_ACCOUNT); 582 - set_active_memcg(old_memcg); 583 - mem_cgroup_put(memcg); 575 + bpf_map_memcg_exit(old_memcg, memcg); 584 576 585 577 return ptr; 586 578 } ··· 616 612 unsigned long i, j; 617 613 struct page *pg; 618 614 int ret = 0; 619 - #ifdef CONFIG_MEMCG 620 - struct mem_cgroup *memcg, *old_memcg; 621 615 622 - memcg = bpf_map_get_memcg(map); 623 - old_memcg = set_active_memcg(memcg); 624 - #endif 625 616 for (i = 0; i < nr_pages; i++) { 626 617 pg = __bpf_alloc_page(nid); 627 618 ··· 630 631 break; 631 632 } 632 633 633 - #ifdef CONFIG_MEMCG 634 - set_active_memcg(old_memcg); 635 - mem_cgroup_put(memcg); 636 - #endif 637 634 return ret; 638 635 } 639 636