Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

delayacct: add delay min to record delay peak

Delay accounting can now calculate the average delay of processes, detect
the overall system load, and also record the 'delay max' to identify
potential abnormal delays. However, 'delay min' can help us identify
another useful delay peak. By comparing the difference between 'delay
max' and 'delay min', we can understand the optimization space for
latency, providing a reference for the optimization of latency
performance.

Use case
=========
bash-4.4# ./getdelays -d -t 242
print delayacct stats ON
TGID 242
CPU count real total virtual total delay total delay average delay max delay min
39 156000000 156576579 2111069 0.054ms 0.212296ms 0.031307ms
IO count delay total delay average delay max delay min
0 0 0.000ms 0.000000ms 0.000000ms
SWAP count delay total delay average delay max delay min
0 0 0.000ms 0.000000ms 0.000000ms
RECLAIM count delay total delay average delay max delay min
0 0 0.000ms 0.000000ms 0.000000ms
THRASHING count delay total delay average delay max delay min
0 0 0.000ms 0.000000ms 0.000000ms
COMPACT count delay total delay average delay max delay min
0 0 0.000ms 0.000000ms 0.000000ms
WPCOPY count delay total delay average delay max delay min
156 11215873 0.072ms 0.207403ms 0.033913ms
IRQ count delay total delay average delay max delay min
0 0 0.000ms 0.000000ms 0.000000ms

Link: https://lkml.kernel.org/r/20241220173105906EOdsPhzjMLYNJJBqgz1ga@zte.com.cn
Co-developed-by: Wang Yong <wang.yong12@zte.com.cn>
Signed-off-by: Wang Yong <wang.yong12@zte.com.cn>
Co-developed-by: xu xin <xu.xin16@zte.com.cn>
Signed-off-by: xu xin <xu.xin16@zte.com.cn>
Signed-off-by: Wang Yaxin <wang.yaxin@zte.com.cn>
Co-developed-by: Kun Jiang <jiang.kun2@zte.com.cn>
Signed-off-by: Kun Jiang <jiang.kun2@zte.com.cn>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Fan Yu <fan.yu9@zte.com.cn>
Cc: Peilin He <he.peilin@zte.com.cn>
Cc: tuqiang <tu.qiang35@zte.com.cn>
Cc: ye xingchen <ye.xingchen@zte.com.cn>
Cc: Yunkai Zhang <zhang.yunkai@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Wang Yaxin and committed by
Andrew Morton
f65c64f3 7e70433c

+88 -40
+16 -16
Documentation/accounting/delay-accounting.rst
··· 107 107 TGID 242 108 108 109 109 110 - CPU count real total virtual total delay total delay average delay max 111 - 239 296000000 307724885 1127792 0.005ms 0.238382ms 112 - IO count delay total delay average delay max 113 - 0 0 0.000ms 0.000000ms 114 - SWAP count delay total delay average delay max 115 - 0 0 0.000ms 0.000000ms 116 - RECLAIM count delay total delay average delay max 117 - 0 0 0.000ms 0.000000ms 118 - THRASHING count delay total delay average delay max 119 - 0 0 0.000ms 0.000000ms 120 - COMPACT count delay total delay average delay max 121 - 0 0 0.000ms 0.000000ms 122 - WPCOPY count delay total delay average delay max 123 - 230 19100476 0.083ms 0.383822ms 124 - IRQ count delay total delay average delay max 125 - 0 0 0.000ms 0.000000ms 110 + CPU count real total virtual total delay total delay average delay max delay min 111 + 39 156000000 156576579 2111069 0.054ms 0.212296ms 0.031307ms 112 + IO count delay total delay average delay max delay min 113 + 0 0 0.000ms 0.000000ms 0.000000ms 114 + SWAP count delay total delay average delay max delay min 115 + 0 0 0.000ms 0.000000ms 0.000000ms 116 + RECLAIM count delay total delay average delay max delay min 117 + 0 0 0.000ms 0.000000ms 0.000000ms 118 + THRASHING count delay total delay average delay max delay min 119 + 0 0 0.000ms 0.000000ms 0.000000ms 120 + COMPACT count delay total delay average delay max delay min 121 + 0 0 0.000ms 0.000000ms 0.000000ms 122 + WPCOPY count delay total delay average delay max delay min 123 + 156 11215873 0.072ms 0.207403ms 0.033913ms 124 + IRQ count delay total delay average delay max delay min 125 + 0 0 0.000ms 0.000000ms 0.000000ms 126 126 127 127 Get IO accounting for pid 1, it works only with -p:: 128 128
+7
include/linux/delayacct.h
··· 30 30 */ 31 31 u64 blkio_start; 32 32 u64 blkio_delay_max; 33 + u64 blkio_delay_min; 33 34 u64 blkio_delay; /* wait for sync block io completion */ 34 35 u64 swapin_start; 35 36 u64 swapin_delay_max; 37 + u64 swapin_delay_min; 36 38 u64 swapin_delay; /* wait for swapin */ 37 39 u32 blkio_count; /* total count of the number of sync block */ 38 40 /* io operations performed */ ··· 42 40 43 41 u64 freepages_start; 44 42 u64 freepages_delay_max; 43 + u64 freepages_delay_min; 45 44 u64 freepages_delay; /* wait for memory reclaim */ 46 45 47 46 u64 thrashing_start; 48 47 u64 thrashing_delay_max; 48 + u64 thrashing_delay_min; 49 49 u64 thrashing_delay; /* wait for thrashing page */ 50 50 51 51 u64 compact_start; 52 52 u64 compact_delay_max; 53 + u64 compact_delay_min; 53 54 u64 compact_delay; /* wait for memory compact */ 54 55 55 56 u64 wpcopy_start; 56 57 u64 wpcopy_delay_max; 58 + u64 wpcopy_delay_min; 57 59 u64 wpcopy_delay; /* wait for write-protect copy */ 58 60 59 61 u64 irq_delay_max; 62 + u64 irq_delay_min; 60 63 u64 irq_delay; /* wait for IRQ/SOFTIRQ */ 61 64 62 65 u32 freepages_count; /* total count of memory reclaim */
+3
include/linux/sched.h
··· 401 401 /* Max time spent waiting on a runqueue: */ 402 402 unsigned long long max_run_delay; 403 403 404 + /* Min time spent waiting on a runqueue: */ 405 + unsigned long long min_run_delay; 406 + 404 407 /* Timestamps: */ 405 408 406 409 /* When did we last run on a CPU? */
+8
include/uapi/linux/taskstats.h
··· 73 73 __u64 cpu_count __attribute__((aligned(8))); 74 74 __u64 cpu_delay_total; 75 75 __u64 cpu_delay_max; 76 + __u64 cpu_delay_min; 76 77 77 78 /* Following four fields atomically updated using task->delays->lock */ 78 79 ··· 83 82 __u64 blkio_count; 84 83 __u64 blkio_delay_total; 85 84 __u64 blkio_delay_max; 85 + __u64 blkio_delay_min; 86 86 87 87 /* Delay waiting for page fault I/O (swap in only) */ 88 88 __u64 swapin_count; 89 89 __u64 swapin_delay_total; 90 90 __u64 swapin_delay_max; 91 + __u64 swapin_delay_min; 91 92 92 93 /* cpu "wall-clock" running time 93 94 * On some architectures, value will adjust for cpu time stolen ··· 173 170 __u64 freepages_count; 174 171 __u64 freepages_delay_total; 175 172 __u64 freepages_delay_max; 173 + __u64 freepages_delay_min; 176 174 177 175 /* Delay waiting for thrashing page */ 178 176 __u64 thrashing_count; 179 177 __u64 thrashing_delay_total; 180 178 __u64 thrashing_delay_max; 179 + __u64 thrashing_delay_min; 181 180 182 181 /* v10: 64-bit btime to avoid overflow */ 183 182 __u64 ac_btime64; /* 64-bit begin time */ ··· 188 183 __u64 compact_count; 189 184 __u64 compact_delay_total; 190 185 __u64 compact_delay_max; 186 + __u64 compact_delay_min; 191 187 192 188 /* v12 begin */ 193 189 __u32 ac_tgid; /* thread group ID */ ··· 211 205 __u64 wpcopy_count; 212 206 __u64 wpcopy_delay_total; 213 207 __u64 wpcopy_delay_max; 208 + __u64 wpcopy_delay_min; 214 209 215 210 /* v14: Delay waiting for IRQ/SOFTIRQ */ 216 211 __u64 irq_count; 217 212 __u64 irq_delay_total; 218 213 __u64 irq_delay_max; 214 + __u64 irq_delay_min; 219 215 /* v15: add Delay max */ 220 216 }; 221 217
+25 -7
kernel/delayacct.c
··· 95 95 * Finish delay accounting for a statistic using its timestamps (@start), 96 96 * accumulator (@total) and @count 97 97 */ 98 - static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *count, u64 *max) 98 + static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *count, u64 *max, u64 *min) 99 99 { 100 100 s64 ns = local_clock() - *start; 101 101 unsigned long flags; ··· 106 106 (*count)++; 107 107 if (ns > *max) 108 108 *max = ns; 109 + if (*min == 0 || ns < *min) 110 + *min = ns; 109 111 raw_spin_unlock_irqrestore(lock, flags); 110 112 } 111 113 } ··· 127 125 &p->delays->blkio_start, 128 126 &p->delays->blkio_delay, 129 127 &p->delays->blkio_count, 130 - &p->delays->blkio_delay_max); 128 + &p->delays->blkio_delay_max, 129 + &p->delays->blkio_delay_min); 131 130 } 132 131 133 132 int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) ··· 160 157 d->cpu_count += t1; 161 158 162 159 d->cpu_delay_max = tsk->sched_info.max_run_delay; 160 + d->cpu_delay_min = tsk->sched_info.min_run_delay; 163 161 tmp = (s64)d->cpu_delay_total + t2; 164 162 d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp; 165 163 tmp = (s64)d->cpu_run_virtual_total + t3; ··· 174 170 /* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */ 175 171 raw_spin_lock_irqsave(&tsk->delays->lock, flags); 176 172 d->blkio_delay_max = tsk->delays->blkio_delay_max; 173 + d->blkio_delay_min = tsk->delays->blkio_delay_min; 177 174 tmp = d->blkio_delay_total + tsk->delays->blkio_delay; 178 175 d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp; 179 176 d->swapin_delay_max = tsk->delays->swapin_delay_max; 177 + d->swapin_delay_min = tsk->delays->swapin_delay_min; 180 178 tmp = d->swapin_delay_total + tsk->delays->swapin_delay; 181 179 d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp; 182 180 d->freepages_delay_max = tsk->delays->freepages_delay_max; 181 + d->freepages_delay_min = tsk->delays->freepages_delay_min; 183 182 tmp = d->freepages_delay_total + tsk->delays->freepages_delay; 184 183 d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp; 185 184 d->thrashing_delay_max = tsk->delays->thrashing_delay_max; 185 + d->thrashing_delay_min = tsk->delays->thrashing_delay_min; 186 186 tmp = d->thrashing_delay_total + tsk->delays->thrashing_delay; 187 187 d->thrashing_delay_total = (tmp < d->thrashing_delay_total) ? 0 : tmp; 188 188 d->compact_delay_max = tsk->delays->compact_delay_max; 189 + d->compact_delay_min = tsk->delays->compact_delay_min; 189 190 tmp = d->compact_delay_total + tsk->delays->compact_delay; 190 191 d->compact_delay_total = (tmp < d->compact_delay_total) ? 0 : tmp; 191 192 d->wpcopy_delay_max = tsk->delays->wpcopy_delay_max; 193 + d->wpcopy_delay_min = tsk->delays->wpcopy_delay_min; 192 194 tmp = d->wpcopy_delay_total + tsk->delays->wpcopy_delay; 193 195 d->wpcopy_delay_total = (tmp < d->wpcopy_delay_total) ? 0 : tmp; 194 196 d->irq_delay_max = tsk->delays->irq_delay_max; 197 + d->irq_delay_min = tsk->delays->irq_delay_min; 195 198 tmp = d->irq_delay_total + tsk->delays->irq_delay; 196 199 d->irq_delay_total = (tmp < d->irq_delay_total) ? 0 : tmp; 197 200 d->blkio_count += tsk->delays->blkio_count; ··· 235 224 &current->delays->freepages_start, 236 225 &current->delays->freepages_delay, 237 226 &current->delays->freepages_count, 238 - &current->delays->freepages_delay_max); 227 + &current->delays->freepages_delay_max, 228 + &current->delays->freepages_delay_min); 239 229 } 240 230 241 231 void __delayacct_thrashing_start(bool *in_thrashing) ··· 259 247 &current->delays->thrashing_start, 260 248 &current->delays->thrashing_delay, 261 249 &current->delays->thrashing_count, 262 - &current->delays->thrashing_delay_max); 250 + &current->delays->thrashing_delay_max, 251 + &current->delays->thrashing_delay_min); 263 252 } 264 253 265 254 void __delayacct_swapin_start(void) ··· 274 261 &current->delays->swapin_start, 275 262 &current->delays->swapin_delay, 276 263 &current->delays->swapin_count, 277 - &current->delays->swapin_delay_max); 264 + &current->delays->swapin_delay_max, 265 + &current->delays->swapin_delay_min); 278 266 } 279 267 280 268 void __delayacct_compact_start(void) ··· 289 275 &current->delays->compact_start, 290 276 &current->delays->compact_delay, 291 277 &current->delays->compact_count, 292 - &current->delays->compact_delay_max); 278 + &current->delays->compact_delay_max, 279 + &current->delays->compact_delay_min); 293 280 } 294 281 295 282 void __delayacct_wpcopy_start(void) ··· 304 289 &current->delays->wpcopy_start, 305 290 &current->delays->wpcopy_delay, 306 291 &current->delays->wpcopy_count, 307 - &current->delays->wpcopy_delay_max); 292 + &current->delays->wpcopy_delay_max, 293 + &current->delays->wpcopy_delay_min); 308 294 } 309 295 310 296 void __delayacct_irq(struct task_struct *task, u32 delta) ··· 317 301 task->delays->irq_count++; 318 302 if (delta > task->delays->irq_delay_max) 319 303 task->delays->irq_delay_max = delta; 304 + if (delta && (!task->delays->irq_delay_min || delta < task->delays->irq_delay_min)) 305 + task->delays->irq_delay_min = delta; 320 306 raw_spin_unlock_irqrestore(&task->delays->lock, flags); 321 307 } 322 308
+4
kernel/sched/stats.h
··· 246 246 t->sched_info.run_delay += delta; 247 247 if (delta > t->sched_info.max_run_delay) 248 248 t->sched_info.max_run_delay = delta; 249 + if (delta && (!t->sched_info.min_run_delay || delta < t->sched_info.min_run_delay)) 250 + t->sched_info.min_run_delay = delta; 249 251 rq_sched_info_dequeue(rq, delta); 250 252 } 251 253 ··· 271 269 t->sched_info.pcount++; 272 270 if (delta > t->sched_info.max_run_delay) 273 271 t->sched_info.max_run_delay = delta; 272 + if (delta && (!t->sched_info.min_run_delay || delta < t->sched_info.min_run_delay)) 273 + t->sched_info.min_run_delay = delta; 274 274 275 275 rq_sched_info_arrive(rq, delta); 276 276 }
+25 -17
tools/accounting/getdelays.c
··· 192 192 } 193 193 194 194 #define average_ms(t, c) (t / 1000000ULL / (c ? c : 1)) 195 - #define delay_max_ms(t) (t / 1000000ULL) 195 + #define delay_ms(t) (t / 1000000ULL) 196 196 197 197 static void print_delayacct(struct taskstats *t) 198 198 { ··· 213 213 "IRQ %15s%15s%15s%15s\n" 214 214 " %15llu%15llu%15.3fms%13.6fms\n", 215 215 "count", "real total", "virtual total", 216 - "delay total", "delay average", "delay max", 216 + "delay total", "delay average", "delay max", "delay min", 217 217 (unsigned long long)t->cpu_count, 218 218 (unsigned long long)t->cpu_run_real_total, 219 219 (unsigned long long)t->cpu_run_virtual_total, 220 220 (unsigned long long)t->cpu_delay_total, 221 221 average_ms((double)t->cpu_delay_total, t->cpu_count), 222 - delay_max_ms((double)t->cpu_delay_max), 223 - "count", "delay total", "delay average", "delay max", 222 + delay_ms((double)t->cpu_delay_max), 223 + delay_ms((double)t->cpu_delay_min), 224 + "count", "delay total", "delay average", "delay max", "delay min", 224 225 (unsigned long long)t->blkio_count, 225 226 (unsigned long long)t->blkio_delay_total, 226 227 average_ms((double)t->blkio_delay_total, t->blkio_count), 227 - delay_max_ms((double)t->blkio_delay_max), 228 - "count", "delay total", "delay average", "delay max", 228 + delay_ms((double)t->blkio_delay_max), 229 + delay_ms((double)t->blkio_delay_min), 230 + "count", "delay total", "delay average", "delay max", "delay min", 229 231 (unsigned long long)t->swapin_count, 230 232 (unsigned long long)t->swapin_delay_total, 231 233 average_ms((double)t->swapin_delay_total, t->swapin_count), 232 - delay_max_ms((double)t->swapin_delay_max), 233 - "count", "delay total", "delay average", "delay max", 234 + delay_ms((double)t->swapin_delay_max), 235 + delay_ms((double)t->swapin_delay_min), 236 + "count", "delay total", "delay average", "delay max", "delay min", 234 237 (unsigned long long)t->freepages_count, 235 238 (unsigned long long)t->freepages_delay_total, 236 239 average_ms((double)t->freepages_delay_total, t->freepages_count), 237 - delay_max_ms((double)t->freepages_delay_max), 238 - "count", "delay total", "delay average", "delay max", 240 + delay_ms((double)t->freepages_delay_max), 241 + delay_ms((double)t->freepages_delay_min), 242 + "count", "delay total", "delay average", "delay max", "delay min", 239 243 (unsigned long long)t->thrashing_count, 240 244 (unsigned long long)t->thrashing_delay_total, 241 245 average_ms((double)t->thrashing_delay_total, t->thrashing_count), 242 - delay_max_ms((double)t->thrashing_delay_max), 243 - "count", "delay total", "delay average", "delay max", 246 + delay_ms((double)t->thrashing_delay_max), 247 + delay_ms((double)t->thrashing_delay_min), 248 + "count", "delay total", "delay average", "delay max", "delay min", 244 249 (unsigned long long)t->compact_count, 245 250 (unsigned long long)t->compact_delay_total, 246 251 average_ms((double)t->compact_delay_total, t->compact_count), 247 - delay_max_ms((double)t->compact_delay_max), 248 - "count", "delay total", "delay average", "delay max", 252 + delay_ms((double)t->compact_delay_max), 253 + delay_ms((double)t->compact_delay_min), 254 + "count", "delay total", "delay average", "delay max", "delay min", 249 255 (unsigned long long)t->wpcopy_count, 250 256 (unsigned long long)t->wpcopy_delay_total, 251 257 average_ms((double)t->wpcopy_delay_total, t->wpcopy_count), 252 - delay_max_ms((double)t->wpcopy_delay_max), 253 - "count", "delay total", "delay average", "delay max", 258 + delay_ms((double)t->wpcopy_delay_max), 259 + delay_ms((double)t->wpcopy_delay_min), 260 + "count", "delay total", "delay average", "delay max", "delay min", 254 261 (unsigned long long)t->irq_count, 255 262 (unsigned long long)t->irq_delay_total, 256 263 average_ms((double)t->irq_delay_total, t->irq_count), 257 - delay_max_ms((double)t->irq_delay_max)); 264 + delay_ms((double)t->irq_delay_max), 265 + delay_ms((double)t->irq_delay_min)); 258 266 } 259 267 260 268 static void task_context_switch_counts(struct taskstats *t)