Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

f2fs: add lock elapsed time trace facility for f2fs rwsemphore

This patch adds lock elapsed time trace facility for f2fs rwsemphore.

If total elapsed time of critical region covered by lock exceeds a
threshold, it will print tracepoint to dump information of lock related
context, including:
- thread information
- CPU/IO priority
- lock information
- elapsed time
- total time
- running time (depend on CONFIG_64BIT)
- runnable time (depend on CONFIG_SCHED_INFO and CONFIG_SCHEDSTATS)
- io sleep time (depend on CONFIG_TASK_DELAY_ACCT and
/proc/sys/kernel/task_delayacct)
- other time (by default other time will account nonio sleep time,
but, if above kconfig is not defined, other time will
include runnable time and/or io sleep time as wll)

output:
f2fs_lock_elapsed_time: dev = (254,52), comm: sh, pid: 13855, prio: 120, ioprio_class: 2, ioprio_data: 4, lock_name: cp_rwsem, lock_type: rlock, total: 1000, running: 993, runnable: 2, io_sleep: 0, other: 5

Signed-off-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>

authored by

Chao Yu and committed by
Jaegeuk Kim
79b3cebc 7ec19911

+221 -4
+106
fs/f2fs/checkpoint.c
··· 14 14 #include <linux/pagevec.h> 15 15 #include <linux/swap.h> 16 16 #include <linux/kthread.h> 17 + #include <linux/delayacct.h> 18 + #include <linux/ioprio.h> 19 + #include <linux/math64.h> 17 20 18 21 #include "f2fs.h" 19 22 #include "node.h" 20 23 #include "segment.h" 21 24 #include "iostat.h" 22 25 #include <trace/events/f2fs.h> 26 + 27 + static inline void get_lock_elapsed_time(struct f2fs_time_stat *ts) 28 + { 29 + ts->total_time = ktime_get(); 30 + #ifdef CONFIG_64BIT 31 + ts->running_time = current->se.sum_exec_runtime; 32 + #endif 33 + #if defined(CONFIG_SCHED_INFO) && defined(CONFIG_SCHEDSTATS) 34 + ts->runnable_time = current->sched_info.run_delay; 35 + #endif 36 + #ifdef CONFIG_TASK_DELAY_ACCT 37 + if (current->delays) 38 + ts->io_sleep_time = current->delays->blkio_delay; 39 + #endif 40 + } 41 + 42 + static inline void trace_lock_elapsed_time_start(struct f2fs_rwsem *sem, 43 + struct f2fs_lock_context *lc) 44 + { 45 + lc->lock_trace = trace_f2fs_lock_elapsed_time_enabled(); 46 + if (!lc->lock_trace) 47 + return; 48 + 49 + get_lock_elapsed_time(&lc->ts); 50 + } 51 + 52 + static inline void trace_lock_elapsed_time_end(struct f2fs_rwsem *sem, 53 + struct f2fs_lock_context *lc, bool is_write) 54 + { 55 + struct f2fs_time_stat tts; 56 + unsigned long long total_time; 57 + unsigned long long running_time = 0; 58 + unsigned long long runnable_time = 0; 59 + unsigned long long io_sleep_time = 0; 60 + unsigned long long other_time = 0; 61 + unsigned npm = NSEC_PER_MSEC; 62 + 63 + if (!lc->lock_trace) 64 + return; 65 + 66 + get_lock_elapsed_time(&tts); 67 + 68 + total_time = div_u64(tts.total_time - lc->ts.total_time, npm); 69 + if (total_time <= MAX_LOCK_ELAPSED_TIME) 70 + return; 71 + 72 + #ifdef CONFIG_64BIT 73 + running_time = div_u64(tts.running_time - lc->ts.running_time, npm); 74 + #endif 75 + #if defined(CONFIG_SCHED_INFO) && defined(CONFIG_SCHEDSTATS) 76 + runnable_time = div_u64(tts.runnable_time - lc->ts.runnable_time, npm); 77 + #endif 78 + #ifdef CONFIG_TASK_DELAY_ACCT 79 + io_sleep_time = div_u64(tts.io_sleep_time - lc->ts.io_sleep_time, npm); 80 + #endif 81 + if (total_time > running_time + io_sleep_time + runnable_time) 82 + other_time = total_time - running_time - 83 + io_sleep_time - runnable_time; 84 + 85 + trace_f2fs_lock_elapsed_time(sem->sbi, sem->name, is_write, current, 86 + get_current_ioprio(), total_time, running_time, 87 + runnable_time, io_sleep_time, other_time); 88 + } 89 + 90 + void f2fs_down_read_trace(struct f2fs_rwsem *sem, struct f2fs_lock_context *lc) 91 + { 92 + f2fs_down_read(sem); 93 + trace_lock_elapsed_time_start(sem, lc); 94 + } 95 + 96 + int f2fs_down_read_trylock_trace(struct f2fs_rwsem *sem, struct f2fs_lock_context *lc) 97 + { 98 + if (!f2fs_down_read_trylock(sem)) 99 + return 0; 100 + trace_lock_elapsed_time_start(sem, lc); 101 + return 1; 102 + } 103 + 104 + void f2fs_up_read_trace(struct f2fs_rwsem *sem, struct f2fs_lock_context *lc) 105 + { 106 + f2fs_up_read(sem); 107 + trace_lock_elapsed_time_end(sem, lc, false); 108 + } 109 + 110 + void f2fs_down_write_trace(struct f2fs_rwsem *sem, struct f2fs_lock_context *lc) 111 + { 112 + f2fs_down_write(sem); 113 + trace_lock_elapsed_time_start(sem, lc); 114 + } 115 + 116 + int f2fs_down_write_trylock_trace(struct f2fs_rwsem *sem, struct f2fs_lock_context *lc) 117 + { 118 + if (!f2fs_down_write_trylock(sem)) 119 + return 0; 120 + trace_lock_elapsed_time_start(sem, lc); 121 + return 1; 122 + } 123 + 124 + void f2fs_up_write_trace(struct f2fs_rwsem *sem, struct f2fs_lock_context *lc) 125 + { 126 + f2fs_up_write(sem); 127 + trace_lock_elapsed_time_end(sem, lc, true); 128 + } 23 129 24 130 #define DEFAULT_CHECKPOINT_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_RT, 3)) 25 131
+47 -4
fs/f2fs/f2fs.h
··· 173 173 ALLOCATE_FORWARD_FROM_HINT, 174 174 }; 175 175 176 + enum f2fs_lock_name { 177 + LOCK_NAME_NONE, 178 + }; 179 + 176 180 /* 177 181 * An implementation of an rwsem that is explicitly unfair to readers. This 178 182 * prevents priority inversion when a low-priority reader acquires the read lock ··· 185 181 */ 186 182 187 183 struct f2fs_rwsem { 184 + struct f2fs_sb_info *sbi; 185 + enum f2fs_lock_name name; 188 186 struct rw_semaphore internal_rwsem; 189 187 #ifdef CONFIG_F2FS_UNFAIR_RWSEM 190 188 wait_queue_head_t read_waiters; ··· 1415 1409 unsigned int nr_free_secs; /* # of free sections to do GC */ 1416 1410 }; 1417 1411 1412 + struct f2fs_time_stat { 1413 + unsigned long long total_time; /* total wall clock time */ 1414 + #ifdef CONFIG_64BIT 1415 + unsigned long long running_time; /* running time */ 1416 + #endif 1417 + #if defined(CONFIG_SCHED_INFO) && defined(CONFIG_SCHEDSTATS) 1418 + unsigned long long runnable_time; /* runnable(including preempted) time */ 1419 + #endif 1420 + #ifdef CONFIG_TASK_DELAY_ACCT 1421 + unsigned long long io_sleep_time; /* IO sleep time */ 1422 + #endif 1423 + }; 1424 + 1425 + struct f2fs_lock_context { 1426 + struct f2fs_time_stat ts; 1427 + bool lock_trace; 1428 + }; 1429 + 1418 1430 /* 1419 1431 * For s_flag in struct f2fs_sb_info 1420 1432 * Modification on enum should be synchronized with s_flag array ··· 1548 1524 LOOKUP_COMPAT, 1549 1525 LOOKUP_AUTO, 1550 1526 }; 1527 + 1528 + /* a threshold of maximum elapsed time in critical region to print tracepoint */ 1529 + #define MAX_LOCK_ELAPSED_TIME 500 1551 1530 1552 1531 static inline int f2fs_test_bit(unsigned int nr, char *addr); 1553 1532 static inline void f2fs_set_bit(unsigned int nr, char *addr); ··· 2290 2263 spin_unlock_irqrestore(&sbi->cp_lock, flags); 2291 2264 } 2292 2265 2293 - #define init_f2fs_rwsem(sem) \ 2266 + #define init_f2fs_rwsem(sem) __init_f2fs_rwsem(sem, NULL, LOCK_NAME_NONE) 2267 + #define init_f2fs_rwsem_trace __init_f2fs_rwsem 2268 + 2269 + #define __init_f2fs_rwsem(sem, sbi, name) \ 2294 2270 do { \ 2295 2271 static struct lock_class_key __key; \ 2296 2272 \ 2297 - __init_f2fs_rwsem((sem), #sem, &__key); \ 2273 + do_init_f2fs_rwsem((sem), #sem, &__key, sbi, name); \ 2298 2274 } while (0) 2299 2275 2300 - static inline void __init_f2fs_rwsem(struct f2fs_rwsem *sem, 2301 - const char *sem_name, struct lock_class_key *key) 2276 + static inline void do_init_f2fs_rwsem(struct f2fs_rwsem *sem, 2277 + const char *sem_name, struct lock_class_key *key, 2278 + struct f2fs_sb_info *sbi, enum f2fs_lock_name name) 2302 2279 { 2280 + sem->sbi = sbi; 2281 + sem->name = name; 2303 2282 __init_rwsem(&sem->internal_rwsem, sem_name, key); 2304 2283 #ifdef CONFIG_F2FS_UNFAIR_RWSEM 2305 2284 init_waitqueue_head(&sem->read_waiters); ··· 2373 2340 wake_up_all(&sem->read_waiters); 2374 2341 #endif 2375 2342 } 2343 + 2344 + void f2fs_down_read_trace(struct f2fs_rwsem *sem, struct f2fs_lock_context *lc); 2345 + int f2fs_down_read_trylock_trace(struct f2fs_rwsem *sem, 2346 + struct f2fs_lock_context *lc); 2347 + void f2fs_up_read_trace(struct f2fs_rwsem *sem, struct f2fs_lock_context *lc); 2348 + void f2fs_down_write_trace(struct f2fs_rwsem *sem, 2349 + struct f2fs_lock_context *lc); 2350 + int f2fs_down_write_trylock_trace(struct f2fs_rwsem *sem, 2351 + struct f2fs_lock_context *lc); 2352 + void f2fs_up_write_trace(struct f2fs_rwsem *sem, struct f2fs_lock_context *lc); 2376 2353 2377 2354 static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock) 2378 2355 {
+68
include/trace/events/f2fs.h
··· 184 184 { CP_PHASE_FINISH_BLOCK_OPS, "finish block_ops" }, \ 185 185 { CP_PHASE_FINISH_CHECKPOINT, "finish checkpoint" }) 186 186 187 + #define show_lock_name(lock) \ 188 + __print_symbolic(lock, \ 189 + { LOCK_NAME_NONE, "none" }) 190 + 187 191 struct f2fs_sb_info; 188 192 struct f2fs_io_info; 189 193 struct extent_info; ··· 2454 2450 TP_PROTO(struct inode *inode, loff_t offset, int bytes), 2455 2451 2456 2452 TP_ARGS(inode, offset, bytes) 2453 + ); 2454 + 2455 + TRACE_EVENT(f2fs_lock_elapsed_time, 2456 + 2457 + TP_PROTO(struct f2fs_sb_info *sbi, enum f2fs_lock_name lock_name, 2458 + bool is_write, struct task_struct *p, int ioprio, 2459 + unsigned long long total_time, 2460 + unsigned long long running_time, 2461 + unsigned long long runnable_time, 2462 + unsigned long long io_sleep_time, 2463 + unsigned long long other_time), 2464 + 2465 + TP_ARGS(sbi, lock_name, is_write, p, ioprio, total_time, running_time, 2466 + runnable_time, io_sleep_time, other_time), 2467 + 2468 + TP_STRUCT__entry( 2469 + __field(dev_t, dev) 2470 + __array(char, comm, TASK_COMM_LEN) 2471 + __field(pid_t, pid) 2472 + __field(int, prio) 2473 + __field(int, ioprio_class) 2474 + __field(int, ioprio_data) 2475 + __field(unsigned int, lock_name) 2476 + __field(bool, is_write) 2477 + __field(unsigned long long, total_time) 2478 + __field(unsigned long long, running_time) 2479 + __field(unsigned long long, runnable_time) 2480 + __field(unsigned long long, io_sleep_time) 2481 + __field(unsigned long long, other_time) 2482 + ), 2483 + 2484 + TP_fast_assign( 2485 + __entry->dev = sbi->sb->s_dev; 2486 + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); 2487 + __entry->pid = p->pid; 2488 + __entry->prio = p->prio; 2489 + __entry->ioprio_class = IOPRIO_PRIO_CLASS(ioprio); 2490 + __entry->ioprio_data = IOPRIO_PRIO_DATA(ioprio); 2491 + __entry->lock_name = lock_name; 2492 + __entry->is_write = is_write; 2493 + __entry->total_time = total_time; 2494 + __entry->running_time = running_time; 2495 + __entry->runnable_time = runnable_time; 2496 + __entry->io_sleep_time = io_sleep_time; 2497 + __entry->other_time = other_time; 2498 + ), 2499 + 2500 + TP_printk("dev = (%d,%d), comm: %s, pid: %d, prio: %d, " 2501 + "ioprio_class: %d, ioprio_data: %d, lock_name: %s, " 2502 + "lock_type: %s, total: %llu, running: %llu, " 2503 + "runnable: %llu, io_sleep: %llu, other: %llu", 2504 + show_dev(__entry->dev), 2505 + __entry->comm, 2506 + __entry->pid, 2507 + __entry->prio, 2508 + __entry->ioprio_class, 2509 + __entry->ioprio_data, 2510 + show_lock_name(__entry->lock_name), 2511 + __entry->is_write ? "wlock" : "rlock", 2512 + __entry->total_time, 2513 + __entry->running_time, 2514 + __entry->runnable_time, 2515 + __entry->io_sleep_time, 2516 + __entry->other_time) 2457 2517 ); 2458 2518 2459 2519 #endif /* _TRACE_F2FS_H */