Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

cgroup: cgroup.stat.local time accounting

There isn't yet a clear way to identify a set of "lost" time that
everyone (or at least a wider group of users) cares about. However,
users can perform some delay accounting by iterating over components of
interest. This patch allows cgroup v2 freezing time to be one of those
components.

Track the cumulative time that each v2 cgroup spends freezing and expose
it to userland via a new local stat file in cgroupfs. Thank you to
Michal, who provided the ASCII art in the updated documentation.

To access this value:
$ mkdir /sys/fs/cgroup/test
$ cat /sys/fs/cgroup/test/cgroup.stat.local
freeze_time_total 0

Ensure consistent freeze time reads with freeze_seq, a per-cgroup
sequence counter. Writes are serialized using the css_set_lock.

Signed-off-by: Tiffany Yang <ynaffit@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Michal Koutný <mkoutny@suse.com>
Signed-off-by: Tejun Heo <tj@kernel.org>

authored by

Tiffany Yang and committed by
Tejun Heo
afa3701c 4c70fb26

+75 -4
+18
Documentation/admin-guide/cgroup-v2.rst
··· 1001 1001 Total number of dying cgroup subsystems (e.g. memory 1002 1002 cgroup) at and beneath the current cgroup. 1003 1003 1004 + cgroup.stat.local 1005 + A read-only flat-keyed file which exists in non-root cgroups. 1006 + The following entry is defined: 1007 + 1008 + frozen_usec 1009 + Cumulative time that this cgroup has spent between freezing and 1010 + thawing, regardless of whether by self or ancestor groups. 1011 + NB: (not) reaching "frozen" state is not accounted here. 1012 + 1013 + Using the following ASCII representation of a cgroup's freezer 1014 + state, :: 1015 + 1016 + 1 _____ 1017 + frozen 0 __/ \__ 1018 + ab cd 1019 + 1020 + the duration being measured is the span between a and c. 1021 + 1004 1022 cgroup.freeze 1005 1023 A read-write single value file which exists on non-root cgroups. 1006 1024 Allowed values are "0" and "1". The default is "0".
+17
include/linux/cgroup-defs.h
··· 433 433 * frozen, SIGSTOPped, and PTRACEd. 434 434 */ 435 435 int nr_frozen_tasks; 436 + 437 + /* Freeze time data consistency protection */ 438 + seqcount_t freeze_seq; 439 + 440 + /* 441 + * Most recent time the cgroup was requested to freeze. 442 + * Accesses guarded by freeze_seq counter. Writes serialized 443 + * by css_set_lock. 444 + */ 445 + u64 freeze_start_nsec; 446 + 447 + /* 448 + * Total duration the cgroup has spent freezing. 449 + * Accesses guarded by freeze_seq counter. Writes serialized 450 + * by css_set_lock. 451 + */ 452 + u64 frozen_nsec; 436 453 }; 437 454 438 455 struct cgroup {
+28
kernel/cgroup/cgroup.c
··· 3763 3763 return 0; 3764 3764 } 3765 3765 3766 + static int cgroup_core_local_stat_show(struct seq_file *seq, void *v) 3767 + { 3768 + struct cgroup *cgrp = seq_css(seq)->cgroup; 3769 + unsigned int sequence; 3770 + u64 freeze_time; 3771 + 3772 + do { 3773 + sequence = read_seqcount_begin(&cgrp->freezer.freeze_seq); 3774 + freeze_time = cgrp->freezer.frozen_nsec; 3775 + /* Add in current freezer interval if the cgroup is freezing. */ 3776 + if (test_bit(CGRP_FREEZE, &cgrp->flags)) 3777 + freeze_time += (ktime_get_ns() - 3778 + cgrp->freezer.freeze_start_nsec); 3779 + } while (read_seqcount_retry(&cgrp->freezer.freeze_seq, sequence)); 3780 + 3781 + seq_printf(seq, "frozen_usec %llu\n", 3782 + (unsigned long long) freeze_time / NSEC_PER_USEC); 3783 + 3784 + return 0; 3785 + } 3786 + 3766 3787 #ifdef CONFIG_CGROUP_SCHED 3767 3788 /** 3768 3789 * cgroup_tryget_css - try to get a cgroup's css for the specified subsystem ··· 5376 5355 .seq_show = cgroup_stat_show, 5377 5356 }, 5378 5357 { 5358 + .name = "cgroup.stat.local", 5359 + .flags = CFTYPE_NOT_ON_ROOT, 5360 + .seq_show = cgroup_core_local_stat_show, 5361 + }, 5362 + { 5379 5363 .name = "cgroup.freeze", 5380 5364 .flags = CFTYPE_NOT_ON_ROOT, 5381 5365 .seq_show = cgroup_freeze_show, ··· 5789 5763 * if the parent has to be frozen, the child has too. 5790 5764 */ 5791 5765 cgrp->freezer.e_freeze = parent->freezer.e_freeze; 5766 + seqcount_init(&cgrp->freezer.freeze_seq); 5792 5767 if (cgrp->freezer.e_freeze) { 5793 5768 /* 5794 5769 * Set the CGRP_FREEZE flag, so when a process will be ··· 5798 5771 * consider it frozen immediately. 5799 5772 */ 5800 5773 set_bit(CGRP_FREEZE, &cgrp->flags); 5774 + cgrp->freezer.freeze_start_nsec = ktime_get_ns(); 5801 5775 set_bit(CGRP_FROZEN, &cgrp->flags); 5802 5776 } 5803 5777
+12 -4
kernel/cgroup/freezer.c
··· 171 171 /* 172 172 * Freeze or unfreeze all tasks in the given cgroup. 173 173 */ 174 - static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze) 174 + static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze, u64 ts_nsec) 175 175 { 176 176 struct css_task_iter it; 177 177 struct task_struct *task; ··· 179 179 lockdep_assert_held(&cgroup_mutex); 180 180 181 181 spin_lock_irq(&css_set_lock); 182 - if (freeze) 182 + write_seqcount_begin(&cgrp->freezer.freeze_seq); 183 + if (freeze) { 183 184 set_bit(CGRP_FREEZE, &cgrp->flags); 184 - else 185 + cgrp->freezer.freeze_start_nsec = ts_nsec; 186 + } else { 185 187 clear_bit(CGRP_FREEZE, &cgrp->flags); 188 + cgrp->freezer.frozen_nsec += (ts_nsec - 189 + cgrp->freezer.freeze_start_nsec); 190 + } 191 + write_seqcount_end(&cgrp->freezer.freeze_seq); 186 192 spin_unlock_irq(&css_set_lock); 187 193 188 194 if (freeze) ··· 266 260 struct cgroup *parent; 267 261 struct cgroup *dsct; 268 262 bool applied = false; 263 + u64 ts_nsec; 269 264 bool old_e; 270 265 271 266 lockdep_assert_held(&cgroup_mutex); ··· 278 271 return; 279 272 280 273 cgrp->freezer.freeze = freeze; 274 + ts_nsec = ktime_get_ns(); 281 275 282 276 /* 283 277 * Propagate changes downwards the cgroup tree. ··· 306 298 /* 307 299 * Do change actual state: freeze or unfreeze. 308 300 */ 309 - cgroup_do_freeze(dsct, freeze); 301 + cgroup_do_freeze(dsct, freeze, ts_nsec); 310 302 applied = true; 311 303 } 312 304