Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

memcg: nmi safe memcg stats for specific archs

There are archs which have NMI but does not support this_cpu_* ops safely
in the nmi context but they support safe atomic ops in nmi context. For
such archs, let's add infra to use atomic ops for the memcg stats which
can be updated in nmi.

At the moment, the memcg stats which get updated in the objcg charging
path are MEMCG_KMEM, NR_SLAB_RECLAIMABLE_B & NR_SLAB_UNRECLAIMABLE_B.
Rather than adding support for all memcg stats to be nmi safe, let's just
add infra to make these three stats nmi safe which this patch is doing.

Link: https://lkml.kernel.org/r/20250519063142.111219-3-shakeel.butt@linux.dev
Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Shakeel Butt and committed by
Andrew Morton
940b01fc 25352d2f

+66
+10
include/linux/memcontrol.h
··· 113 113 CACHELINE_PADDING(_pad2_); 114 114 unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; 115 115 struct mem_cgroup_reclaim_iter iter; 116 + 117 + #ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC 118 + /* slab stats for nmi context */ 119 + atomic_t slab_reclaimable; 120 + atomic_t slab_unreclaimable; 121 + #endif 116 122 }; 117 123 118 124 struct mem_cgroup_threshold { ··· 242 236 atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS]; 243 237 atomic_long_t memory_events_local[MEMCG_NR_MEMORY_EVENTS]; 244 238 239 + #ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC 240 + /* MEMCG_KMEM for nmi context */ 241 + atomic_t kmem_stat; 242 + #endif 245 243 /* 246 244 * Hint of reclaim pressure for socket memroy management. Note 247 245 * that this indicator should NOT be used in legacy cgroup mode
+7
init/Kconfig
··· 1013 1013 depends on !ARCH_HAS_NMI_SAFE_THIS_CPU_OPS && !ARCH_HAVE_NMI_SAFE_CMPXCHG 1014 1014 default y 1015 1015 1016 + config MEMCG_NMI_SAFETY_REQUIRES_ATOMIC 1017 + bool 1018 + depends on MEMCG 1019 + depends on HAVE_NMI 1020 + depends on !ARCH_HAS_NMI_SAFE_THIS_CPU_OPS && ARCH_HAVE_NMI_SAFE_CMPXCHG 1021 + default y 1022 + 1016 1023 config MEMCG_V1 1017 1024 bool "Legacy cgroup v1 memory controller" 1018 1025 depends on MEMCG
+49
mm/memcontrol.c
··· 3966 3966 } 3967 3967 } 3968 3968 3969 + #ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC 3970 + static void flush_nmi_stats(struct mem_cgroup *memcg, struct mem_cgroup *parent, 3971 + int cpu) 3972 + { 3973 + int nid; 3974 + 3975 + if (atomic_read(&memcg->kmem_stat)) { 3976 + int kmem = atomic_xchg(&memcg->kmem_stat, 0); 3977 + int index = memcg_stats_index(MEMCG_KMEM); 3978 + 3979 + memcg->vmstats->state[index] += kmem; 3980 + if (parent) 3981 + parent->vmstats->state_pending[index] += kmem; 3982 + } 3983 + 3984 + for_each_node_state(nid, N_MEMORY) { 3985 + struct mem_cgroup_per_node *pn = memcg->nodeinfo[nid]; 3986 + struct lruvec_stats *lstats = pn->lruvec_stats; 3987 + struct lruvec_stats *plstats = NULL; 3988 + 3989 + if (parent) 3990 + plstats = parent->nodeinfo[nid]->lruvec_stats; 3991 + 3992 + if (atomic_read(&pn->slab_reclaimable)) { 3993 + int slab = atomic_xchg(&pn->slab_reclaimable, 0); 3994 + int index = memcg_stats_index(NR_SLAB_RECLAIMABLE_B); 3995 + 3996 + lstats->state[index] += slab; 3997 + if (plstats) 3998 + plstats->state_pending[index] += slab; 3999 + } 4000 + if (atomic_read(&pn->slab_unreclaimable)) { 4001 + int slab = atomic_xchg(&pn->slab_unreclaimable, 0); 4002 + int index = memcg_stats_index(NR_SLAB_UNRECLAIMABLE_B); 4003 + 4004 + lstats->state[index] += slab; 4005 + if (plstats) 4006 + plstats->state_pending[index] += slab; 4007 + } 4008 + } 4009 + } 4010 + #else 4011 + static void flush_nmi_stats(struct mem_cgroup *memcg, struct mem_cgroup *parent, 4012 + int cpu) 4013 + {} 4014 + #endif 4015 + 3969 4016 static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu) 3970 4017 { 3971 4018 struct mem_cgroup *memcg = mem_cgroup_from_css(css); ··· 4020 3973 struct memcg_vmstats_percpu *statc; 4021 3974 struct aggregate_control ac; 4022 3975 int nid; 3976 + 3977 + flush_nmi_stats(memcg, parent, cpu); 4023 3978 4024 3979 statc = per_cpu_ptr(memcg->vmstats_percpu, cpu); 4025 3980