Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

mm: vmscan: add cgroup IDs to vmscan tracepoints

Memory reclaim events are currently difficult to attribute to specific
cgroups, making debugging memory pressure issues challenging. This patch
adds memory cgroup ID (memcg_id) to key vmscan tracepoints to enable
better correlation and analysis.

For operations not associated with a specific cgroup, the field is
defaulted to 0.

Link: https://lkml.kernel.org/r/20260316160908.42727-3-tballasi@linux.microsoft.com
Signed-off-by: Thomas Ballasi <tballasi@linux.microsoft.com>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: Steven Rostedt (Google) <rostedt@goodmis.org>
Cc: Wei Xu <weixugc@google.com>
Cc: Yuanchu Xie <yuanchu@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Thomas Ballasi and committed by
Andrew Morton
874a0a56 d8d68d81

+61 -45
+48 -35
include/trace/events/vmscan.h
··· 124 124 125 125 DECLARE_EVENT_CLASS(mm_vmscan_direct_reclaim_begin_template, 126 126 127 - TP_PROTO(int order, gfp_t gfp_flags), 127 + TP_PROTO(gfp_t gfp_flags, int order, struct mem_cgroup *memcg), 128 128 129 - TP_ARGS(order, gfp_flags), 129 + TP_ARGS(gfp_flags, order, memcg), 130 130 131 131 TP_STRUCT__entry( 132 - __field( int, order ) 133 132 __field( unsigned long, gfp_flags ) 133 + __field( u64, memcg_id ) 134 + __field( int, order ) 134 135 ), 135 136 136 137 TP_fast_assign( 137 - __entry->order = order; 138 138 __entry->gfp_flags = (__force unsigned long)gfp_flags; 139 + __entry->order = order; 140 + __entry->memcg_id = mem_cgroup_id(memcg); 139 141 ), 140 142 141 - TP_printk("order=%d gfp_flags=%s", 143 + TP_printk("order=%d gfp_flags=%s memcg_id=%llu", 142 144 __entry->order, 143 - show_gfp_flags(__entry->gfp_flags)) 145 + show_gfp_flags(__entry->gfp_flags), 146 + __entry->memcg_id) 144 147 ); 145 148 146 149 DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_direct_reclaim_begin, 147 150 148 - TP_PROTO(int order, gfp_t gfp_flags), 151 + TP_PROTO(gfp_t gfp_flags, int order, struct mem_cgroup *memcg), 149 152 150 - TP_ARGS(order, gfp_flags) 153 + TP_ARGS(gfp_flags, order, memcg) 151 154 ); 152 155 153 156 #ifdef CONFIG_MEMCG 154 157 DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_reclaim_begin, 155 158 156 - TP_PROTO(int order, gfp_t gfp_flags), 159 + TP_PROTO(gfp_t gfp_flags, int order, struct mem_cgroup *memcg), 157 160 158 - TP_ARGS(order, gfp_flags) 161 + TP_ARGS(gfp_flags, order, memcg) 159 162 ); 160 163 161 164 DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_softlimit_reclaim_begin, 162 165 163 - TP_PROTO(int order, gfp_t gfp_flags), 166 + TP_PROTO(gfp_t gfp_flags, int order, struct mem_cgroup *memcg), 164 167 165 - TP_ARGS(order, gfp_flags) 168 + TP_ARGS(gfp_flags, order, memcg) 166 169 ); 167 170 #endif /* CONFIG_MEMCG */ 168 171 169 172 DECLARE_EVENT_CLASS(mm_vmscan_direct_reclaim_end_template, 170 173 171 - TP_PROTO(unsigned long nr_reclaimed), 174 + TP_PROTO(unsigned long nr_reclaimed, struct mem_cgroup *memcg), 172 175 173 - TP_ARGS(nr_reclaimed), 176 + TP_ARGS(nr_reclaimed, memcg), 174 177 175 178 TP_STRUCT__entry( 176 179 __field( unsigned long, nr_reclaimed ) 180 + __field( u64, memcg_id ) 177 181 ), 178 182 179 183 TP_fast_assign( 180 184 __entry->nr_reclaimed = nr_reclaimed; 185 + __entry->memcg_id = mem_cgroup_id(memcg); 181 186 ), 182 187 183 - TP_printk("nr_reclaimed=%lu", __entry->nr_reclaimed) 188 + TP_printk("nr_reclaimed=%lu memcg_id=%llu", 189 + __entry->nr_reclaimed, 190 + __entry->memcg_id) 184 191 ); 185 192 186 193 DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_direct_reclaim_end, 187 194 188 - TP_PROTO(unsigned long nr_reclaimed), 195 + TP_PROTO(unsigned long nr_reclaimed, struct mem_cgroup *memcg), 189 196 190 - TP_ARGS(nr_reclaimed) 197 + TP_ARGS(nr_reclaimed, memcg) 191 198 ); 192 199 193 200 #ifdef CONFIG_MEMCG 194 201 DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_memcg_reclaim_end, 195 202 196 - TP_PROTO(unsigned long nr_reclaimed), 203 + TP_PROTO(unsigned long nr_reclaimed, struct mem_cgroup *memcg), 197 204 198 - TP_ARGS(nr_reclaimed) 205 + TP_ARGS(nr_reclaimed, memcg) 199 206 ); 200 207 201 208 DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_memcg_softlimit_reclaim_end, 202 209 203 - TP_PROTO(unsigned long nr_reclaimed), 210 + TP_PROTO(unsigned long nr_reclaimed, struct mem_cgroup *memcg), 204 211 205 - TP_ARGS(nr_reclaimed) 212 + TP_ARGS(nr_reclaimed, memcg) 206 213 ); 207 214 #endif /* CONFIG_MEMCG */ 208 215 ··· 217 210 TP_PROTO(struct shrinker *shr, struct shrink_control *sc, 218 211 long nr_objects_to_shrink, unsigned long cache_items, 219 212 unsigned long long delta, unsigned long total_scan, 220 - int priority), 213 + int priority, struct mem_cgroup *memcg), 221 214 222 215 TP_ARGS(shr, sc, nr_objects_to_shrink, cache_items, delta, total_scan, 223 - priority), 216 + priority, memcg), 224 217 225 218 TP_STRUCT__entry( 226 219 __field(struct shrinker *, shr) 227 220 __field(void *, shrink) 228 - __field(int, nid) 229 221 __field(long, nr_objects_to_shrink) 230 222 __field(unsigned long, gfp_flags) 231 223 __field(unsigned long, cache_items) 232 224 __field(unsigned long long, delta) 233 225 __field(unsigned long, total_scan) 234 226 __field(int, priority) 227 + __field(int, nid) 228 + __field(u64, memcg_id) 235 229 ), 236 230 237 231 TP_fast_assign( 238 232 __entry->shr = shr; 239 233 __entry->shrink = shr->scan_objects; 240 - __entry->nid = sc->nid; 241 234 __entry->nr_objects_to_shrink = nr_objects_to_shrink; 242 235 __entry->gfp_flags = (__force unsigned long)sc->gfp_mask; 243 236 __entry->cache_items = cache_items; 244 237 __entry->delta = delta; 245 238 __entry->total_scan = total_scan; 246 239 __entry->priority = priority; 240 + __entry->nid = sc->nid; 241 + __entry->memcg_id = mem_cgroup_id(memcg); 247 242 ), 248 243 249 - TP_printk("%pS %p: nid: %d objects to shrink %ld gfp_flags %s cache items %ld delta %lld total_scan %ld priority %d", 244 + TP_printk("%pS %p: nid: %d memcg_id: %llu objects to shrink %ld gfp_flags %s cache items %ld delta %lld total_scan %ld priority %d", 250 245 __entry->shrink, 251 246 __entry->shr, 252 247 __entry->nid, 248 + __entry->memcg_id, 253 249 __entry->nr_objects_to_shrink, 254 250 show_gfp_flags(__entry->gfp_flags), 255 251 __entry->cache_items, ··· 263 253 264 254 TRACE_EVENT(mm_shrink_slab_end, 265 255 TP_PROTO(struct shrinker *shr, int nid, int shrinker_retval, 266 - long unused_scan_cnt, long new_scan_cnt, long total_scan), 256 + long unused_scan_cnt, long new_scan_cnt, long total_scan, struct mem_cgroup *memcg), 267 257 268 258 TP_ARGS(shr, nid, shrinker_retval, unused_scan_cnt, new_scan_cnt, 269 - total_scan), 259 + total_scan, memcg), 270 260 271 261 TP_STRUCT__entry( 272 262 __field(struct shrinker *, shr) 273 - __field(int, nid) 274 263 __field(void *, shrink) 275 264 __field(long, unused_scan) 276 265 __field(long, new_scan) 277 - __field(int, retval) 278 266 __field(long, total_scan) 267 + __field(int, nid) 268 + __field(int, retval) 269 + __field(u64, memcg_id) 279 270 ), 280 271 281 272 TP_fast_assign( 282 273 __entry->shr = shr; 283 - __entry->nid = nid; 284 274 __entry->shrink = shr->scan_objects; 285 275 __entry->unused_scan = unused_scan_cnt; 286 276 __entry->new_scan = new_scan_cnt; 287 - __entry->retval = shrinker_retval; 288 277 __entry->total_scan = total_scan; 278 + __entry->nid = nid; 279 + __entry->retval = shrinker_retval; 280 + __entry->memcg_id = mem_cgroup_id(memcg); 289 281 ), 290 282 291 - TP_printk("%pS %p: nid: %d unused scan count %ld new scan count %ld total_scan %ld last shrinker return val %d", 283 + TP_printk("%pS %p: nid: %d memcg_id: %llu unused scan count %ld new scan count %ld total_scan %ld last shrinker return val %d", 292 284 __entry->shrink, 293 285 __entry->shr, 294 286 __entry->nid, 287 + __entry->memcg_id, 295 288 __entry->unused_scan, 296 289 __entry->new_scan, 297 290 __entry->total_scan, ··· 527 514 528 515 DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_node_reclaim_end, 529 516 530 - TP_PROTO(unsigned long nr_reclaimed), 517 + TP_PROTO(unsigned long nr_reclaimed, struct mem_cgroup *memcg), 531 518 532 - TP_ARGS(nr_reclaimed) 519 + TP_ARGS(nr_reclaimed, memcg) 533 520 ); 534 521 535 522 TRACE_EVENT(mm_vmscan_throttled,
+4 -2
mm/shrinker.c
··· 410 410 total_scan = min(total_scan, (2 * freeable)); 411 411 412 412 trace_mm_shrink_slab_start(shrinker, shrinkctl, nr, 413 - freeable, delta, total_scan, priority); 413 + freeable, delta, total_scan, priority, 414 + shrinkctl->memcg); 414 415 415 416 /* 416 417 * Normally, we should not scan less than batch_size objects in one ··· 462 461 */ 463 462 new_nr = add_nr_deferred(next_deferred, shrinker, shrinkctl); 464 463 465 - trace_mm_shrink_slab_end(shrinker, shrinkctl->nid, freed, nr, new_nr, total_scan); 464 + trace_mm_shrink_slab_end(shrinker, shrinkctl->nid, freed, nr, new_nr, total_scan, 465 + shrinkctl->memcg); 466 466 return freed; 467 467 } 468 468
+9 -8
mm/vmscan.c
··· 6582 6582 return 1; 6583 6583 6584 6584 set_task_reclaim_state(current, &sc.reclaim_state); 6585 - trace_mm_vmscan_direct_reclaim_begin(order, sc.gfp_mask); 6585 + trace_mm_vmscan_direct_reclaim_begin(sc.gfp_mask, order, 0); 6586 6586 6587 6587 nr_reclaimed = do_try_to_free_pages(zonelist, &sc); 6588 6588 6589 - trace_mm_vmscan_direct_reclaim_end(nr_reclaimed); 6589 + trace_mm_vmscan_direct_reclaim_end(nr_reclaimed, 0); 6590 6590 set_task_reclaim_state(current, NULL); 6591 6591 6592 6592 return nr_reclaimed; ··· 6615 6615 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | 6616 6616 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); 6617 6617 6618 - trace_mm_vmscan_memcg_softlimit_reclaim_begin(sc.order, 6619 - sc.gfp_mask); 6618 + trace_mm_vmscan_memcg_softlimit_reclaim_begin(sc.gfp_mask, 6619 + sc.order, 6620 + memcg); 6620 6621 6621 6622 /* 6622 6623 * NOTE: Although we can get the priority field, using it ··· 6628 6627 */ 6629 6628 shrink_lruvec(lruvec, &sc); 6630 6629 6631 - trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); 6630 + trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed, memcg); 6632 6631 6633 6632 *nr_scanned = sc.nr_scanned; 6634 6633 ··· 6664 6663 struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask); 6665 6664 6666 6665 set_task_reclaim_state(current, &sc.reclaim_state); 6667 - trace_mm_vmscan_memcg_reclaim_begin(0, sc.gfp_mask); 6666 + trace_mm_vmscan_memcg_reclaim_begin(sc.gfp_mask, 0, memcg); 6668 6667 noreclaim_flag = memalloc_noreclaim_save(); 6669 6668 6670 6669 nr_reclaimed = do_try_to_free_pages(zonelist, &sc); 6671 6670 6672 6671 memalloc_noreclaim_restore(noreclaim_flag); 6673 - trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed); 6672 + trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed, memcg); 6674 6673 set_task_reclaim_state(current, NULL); 6675 6674 6676 6675 return nr_reclaimed; ··· 7644 7643 delayacct_freepages_end(); 7645 7644 psi_memstall_leave(&pflags); 7646 7645 7647 - trace_mm_vmscan_node_reclaim_end(sc->nr_reclaimed); 7646 + trace_mm_vmscan_node_reclaim_end(sc->nr_reclaimed, 0); 7648 7647 7649 7648 return sc->nr_reclaimed; 7650 7649 }