Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

lockdep: Add lock_set_cmp_fn() annotation

This implements a new interface to lockdep, lock_set_cmp_fn(), for
defining a custom ordering when taking multiple locks of the same
class.

This is an alternative to subclasses, but can not fully replace them
since subclasses allow lock hierarchies with other clasees
inter-twined, while this relies on pure class nesting.

Specifically, if A is our nesting class then:

A/0 <- B <- A/1

Would be a valid lock order with subclasses (each subclass really is a
full class from the validation PoV) but not with this annotation,
which requires all nesting to be consecutive.

Example output:

| ============================================
| WARNING: possible recursive locking detected
| 6.2.0-rc8-00003-g7d81e591ca6a-dirty #15 Not tainted
| --------------------------------------------
| kworker/14:3/938 is trying to acquire lock:
| ffff8880143218c8 (&b->lock l=0 0:2803368){++++}-{3:3}, at: bch_btree_node_get.part.0+0x81/0x2b0
|
| but task is already holding lock:
| ffff8880143de8c8 (&b->lock l=1 1048575:9223372036854775807){++++}-{3:3}, at: __bch_btree_map_nodes+0xea/0x1e0
| and the lock comparison function returns 1:
|
| other info that might help us debug this:
| Possible unsafe locking scenario:
|
| CPU0
| ----
| lock(&b->lock l=1 1048575:9223372036854775807);
| lock(&b->lock l=0 0:2803368);
|
| *** DEADLOCK ***
|
| May be due to missing lock nesting notation
|
| 3 locks held by kworker/14:3/938:
| #0: ffff888005ea9d38 ((wq_completion)bcache){+.+.}-{0:0}, at: process_one_work+0x1ec/0x530
| #1: ffff8880098c3e70 ((work_completion)(&cl->work)#3){+.+.}-{0:0}, at: process_one_work+0x1ec/0x530
| #2: ffff8880143de8c8 (&b->lock l=1 1048575:9223372036854775807){++++}-{3:3}, at: __bch_btree_map_nodes+0xea/0x1e0

[peterz: extended changelog]
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20230509195847.1745548-1-kent.overstreet@linux.dev

authored by

Kent Overstreet and committed by
Peter Zijlstra
eb1cfd09 f1fcbaa1

+103 -31
+8
include/linux/lockdep.h
··· 434 434 435 435 #endif /* !LOCKDEP */ 436 436 437 + #ifdef CONFIG_PROVE_LOCKING 438 + void lockdep_set_lock_cmp_fn(struct lockdep_map *, lock_cmp_fn, lock_print_fn); 439 + 440 + #define lock_set_cmp_fn(lock, ...) lockdep_set_lock_cmp_fn(&(lock)->dep_map, __VA_ARGS__) 441 + #else 442 + #define lock_set_cmp_fn(lock, ...) do { } while (0) 443 + #endif 444 + 437 445 enum xhlock_context_t { 438 446 XHLOCK_HARD, 439 447 XHLOCK_SOFT,
+8
include/linux/lockdep_types.h
··· 84 84 85 85 #define LOCKSTAT_POINTS 4 86 86 87 + struct lockdep_map; 88 + typedef int (*lock_cmp_fn)(const struct lockdep_map *a, 89 + const struct lockdep_map *b); 90 + typedef void (*lock_print_fn)(const struct lockdep_map *map); 91 + 87 92 /* 88 93 * The lock-class itself. The order of the structure members matters. 89 94 * reinit_class() zeroes the key member and all subsequent members. ··· 114 109 struct list_head locks_after, locks_before; 115 110 116 111 const struct lockdep_subclass_key *key; 112 + lock_cmp_fn cmp_fn; 113 + lock_print_fn print_fn; 114 + 117 115 unsigned int subclass; 118 116 unsigned int dep_gen_id; 119 117
+87 -31
kernel/locking/lockdep.c
··· 709 709 usage[i] = '\0'; 710 710 } 711 711 712 - static void __print_lock_name(struct lock_class *class) 712 + static void __print_lock_name(struct held_lock *hlock, struct lock_class *class) 713 713 { 714 714 char str[KSYM_NAME_LEN]; 715 715 const char *name; ··· 724 724 printk(KERN_CONT "#%d", class->name_version); 725 725 if (class->subclass) 726 726 printk(KERN_CONT "/%d", class->subclass); 727 + if (hlock && class->print_fn) 728 + class->print_fn(hlock->instance); 727 729 } 728 730 } 729 731 730 - static void print_lock_name(struct lock_class *class) 732 + static void print_lock_name(struct held_lock *hlock, struct lock_class *class) 731 733 { 732 734 char usage[LOCK_USAGE_CHARS]; 733 735 734 736 get_usage_chars(class, usage); 735 737 736 738 printk(KERN_CONT " ("); 737 - __print_lock_name(class); 739 + __print_lock_name(hlock, class); 738 740 printk(KERN_CONT "){%s}-{%d:%d}", usage, 739 741 class->wait_type_outer ?: class->wait_type_inner, 740 742 class->wait_type_inner); ··· 774 772 } 775 773 776 774 printk(KERN_CONT "%px", hlock->instance); 777 - print_lock_name(lock); 775 + print_lock_name(hlock, lock); 778 776 printk(KERN_CONT ", at: %pS\n", (void *)hlock->acquire_ip); 779 777 } 780 778 ··· 1870 1868 if (debug_locks_silent) 1871 1869 return; 1872 1870 printk("\n-> #%u", depth); 1873 - print_lock_name(target->class); 1871 + print_lock_name(NULL, target->class); 1874 1872 printk(KERN_CONT ":\n"); 1875 1873 print_lock_trace(target->trace, 6); 1876 1874 } ··· 1901 1899 */ 1902 1900 if (parent != source) { 1903 1901 printk("Chain exists of:\n "); 1904 - __print_lock_name(source); 1902 + __print_lock_name(src, source); 1905 1903 printk(KERN_CONT " --> "); 1906 - __print_lock_name(parent); 1904 + __print_lock_name(NULL, parent); 1907 1905 printk(KERN_CONT " --> "); 1908 - __print_lock_name(target); 1906 + __print_lock_name(tgt, target); 1909 1907 printk(KERN_CONT "\n\n"); 1910 1908 } 1911 1909 ··· 1916 1914 printk(" rlock("); 1917 1915 else 1918 1916 printk(" lock("); 1919 - __print_lock_name(target); 1917 + __print_lock_name(tgt, target); 1920 1918 printk(KERN_CONT ");\n"); 1921 1919 printk(" lock("); 1922 - __print_lock_name(parent); 1920 + __print_lock_name(NULL, parent); 1923 1921 printk(KERN_CONT ");\n"); 1924 1922 printk(" lock("); 1925 - __print_lock_name(target); 1923 + __print_lock_name(tgt, target); 1926 1924 printk(KERN_CONT ");\n"); 1927 1925 if (src_read != 0) 1928 1926 printk(" rlock("); ··· 1930 1928 printk(" sync("); 1931 1929 else 1932 1930 printk(" lock("); 1933 - __print_lock_name(source); 1931 + __print_lock_name(src, source); 1934 1932 printk(KERN_CONT ");\n"); 1935 1933 printk("\n *** DEADLOCK ***\n\n"); 1936 1934 } ··· 2156 2154 return ret; 2157 2155 } 2158 2156 2157 + static void print_deadlock_bug(struct task_struct *, struct held_lock *, struct held_lock *); 2158 + 2159 2159 /* 2160 2160 * Prove that the dependency graph starting at <src> can not 2161 2161 * lead to <target>. If it can, there is a circle when adding ··· 2189 2185 *trace = save_trace(); 2190 2186 } 2191 2187 2192 - print_circular_bug(&src_entry, target_entry, src, target); 2188 + if (src->class_idx == target->class_idx) 2189 + print_deadlock_bug(current, src, target); 2190 + else 2191 + print_circular_bug(&src_entry, target_entry, src, target); 2193 2192 } 2194 2193 2195 2194 return ret; ··· 2348 2341 int bit; 2349 2342 2350 2343 printk("%*s->", depth, ""); 2351 - print_lock_name(class); 2344 + print_lock_name(NULL, class); 2352 2345 #ifdef CONFIG_DEBUG_LOCKDEP 2353 2346 printk(KERN_CONT " ops: %lu", debug_class_ops_read(class)); 2354 2347 #endif ··· 2530 2523 */ 2531 2524 if (middle_class != unsafe_class) { 2532 2525 printk("Chain exists of:\n "); 2533 - __print_lock_name(safe_class); 2526 + __print_lock_name(NULL, safe_class); 2534 2527 printk(KERN_CONT " --> "); 2535 - __print_lock_name(middle_class); 2528 + __print_lock_name(NULL, middle_class); 2536 2529 printk(KERN_CONT " --> "); 2537 - __print_lock_name(unsafe_class); 2530 + __print_lock_name(NULL, unsafe_class); 2538 2531 printk(KERN_CONT "\n\n"); 2539 2532 } 2540 2533 ··· 2542 2535 printk(" CPU0 CPU1\n"); 2543 2536 printk(" ---- ----\n"); 2544 2537 printk(" lock("); 2545 - __print_lock_name(unsafe_class); 2538 + __print_lock_name(NULL, unsafe_class); 2546 2539 printk(KERN_CONT ");\n"); 2547 2540 printk(" local_irq_disable();\n"); 2548 2541 printk(" lock("); 2549 - __print_lock_name(safe_class); 2542 + __print_lock_name(NULL, safe_class); 2550 2543 printk(KERN_CONT ");\n"); 2551 2544 printk(" lock("); 2552 - __print_lock_name(middle_class); 2545 + __print_lock_name(NULL, middle_class); 2553 2546 printk(KERN_CONT ");\n"); 2554 2547 printk(" <Interrupt>\n"); 2555 2548 printk(" lock("); 2556 - __print_lock_name(safe_class); 2549 + __print_lock_name(NULL, safe_class); 2557 2550 printk(KERN_CONT ");\n"); 2558 2551 printk("\n *** DEADLOCK ***\n\n"); 2559 2552 } ··· 2590 2583 pr_warn("\nand this task is already holding:\n"); 2591 2584 print_lock(prev); 2592 2585 pr_warn("which would create a new lock dependency:\n"); 2593 - print_lock_name(hlock_class(prev)); 2586 + print_lock_name(prev, hlock_class(prev)); 2594 2587 pr_cont(" ->"); 2595 - print_lock_name(hlock_class(next)); 2588 + print_lock_name(next, hlock_class(next)); 2596 2589 pr_cont("\n"); 2597 2590 2598 2591 pr_warn("\nbut this new dependency connects a %s-irq-safe lock:\n", 2599 2592 irqclass); 2600 - print_lock_name(backwards_entry->class); 2593 + print_lock_name(NULL, backwards_entry->class); 2601 2594 pr_warn("\n... which became %s-irq-safe at:\n", irqclass); 2602 2595 2603 2596 print_lock_trace(backwards_entry->class->usage_traces[bit1], 1); 2604 2597 2605 2598 pr_warn("\nto a %s-irq-unsafe lock:\n", irqclass); 2606 - print_lock_name(forwards_entry->class); 2599 + print_lock_name(NULL, forwards_entry->class); 2607 2600 pr_warn("\n... which became %s-irq-unsafe at:\n", irqclass); 2608 2601 pr_warn("..."); 2609 2602 ··· 2973 2966 printk(" CPU0\n"); 2974 2967 printk(" ----\n"); 2975 2968 printk(" lock("); 2976 - __print_lock_name(prev); 2969 + __print_lock_name(prv, prev); 2977 2970 printk(KERN_CONT ");\n"); 2978 2971 printk(" lock("); 2979 - __print_lock_name(next); 2972 + __print_lock_name(nxt, next); 2980 2973 printk(KERN_CONT ");\n"); 2981 2974 printk("\n *** DEADLOCK ***\n\n"); 2982 2975 printk(" May be due to missing lock nesting notation\n\n"); ··· 2986 2979 print_deadlock_bug(struct task_struct *curr, struct held_lock *prev, 2987 2980 struct held_lock *next) 2988 2981 { 2982 + struct lock_class *class = hlock_class(prev); 2983 + 2989 2984 if (!debug_locks_off_graph_unlock() || debug_locks_silent) 2990 2985 return; 2991 2986 ··· 3001 2992 print_lock(next); 3002 2993 pr_warn("\nbut task is already holding lock:\n"); 3003 2994 print_lock(prev); 2995 + 2996 + if (class->cmp_fn) { 2997 + pr_warn("and the lock comparison function returns %i:\n", 2998 + class->cmp_fn(prev->instance, next->instance)); 2999 + } 3004 3000 3005 3001 pr_warn("\nother info that might help us debug this:\n"); 3006 3002 print_deadlock_scenario(next, prev); ··· 3028 3014 static int 3029 3015 check_deadlock(struct task_struct *curr, struct held_lock *next) 3030 3016 { 3017 + struct lock_class *class; 3031 3018 struct held_lock *prev; 3032 3019 struct held_lock *nest = NULL; 3033 3020 int i; ··· 3047 3032 * lock class (i.e. read_lock(lock)+read_lock(lock)): 3048 3033 */ 3049 3034 if ((next->read == 2) && prev->read) 3035 + continue; 3036 + 3037 + class = hlock_class(prev); 3038 + 3039 + if (class->cmp_fn && 3040 + class->cmp_fn(prev->instance, next->instance) < 0) 3050 3041 continue; 3051 3042 3052 3043 /* ··· 3114 3093 hlock_class(next), 3115 3094 hlock_class(next)->name); 3116 3095 return 2; 3096 + } 3097 + 3098 + if (prev->class_idx == next->class_idx) { 3099 + struct lock_class *class = hlock_class(prev); 3100 + 3101 + if (class->cmp_fn && 3102 + class->cmp_fn(prev->instance, next->instance) < 0) 3103 + return 2; 3117 3104 } 3118 3105 3119 3106 /* ··· 3600 3571 hlock_id = chain_hlocks[chain->base + i]; 3601 3572 chain_key = print_chain_key_iteration(hlock_id, chain_key); 3602 3573 3603 - print_lock_name(lock_classes + chain_hlock_class_idx(hlock_id)); 3574 + print_lock_name(NULL, lock_classes + chain_hlock_class_idx(hlock_id)); 3604 3575 printk("\n"); 3605 3576 } 3606 3577 } ··· 3957 3928 printk(" CPU0\n"); 3958 3929 printk(" ----\n"); 3959 3930 printk(" lock("); 3960 - __print_lock_name(class); 3931 + __print_lock_name(lock, class); 3961 3932 printk(KERN_CONT ");\n"); 3962 3933 printk(" <Interrupt>\n"); 3963 3934 printk(" lock("); 3964 - __print_lock_name(class); 3935 + __print_lock_name(lock, class); 3965 3936 printk(KERN_CONT ");\n"); 3966 3937 printk("\n *** DEADLOCK ***\n\n"); 3967 3938 } ··· 4047 4018 pr_warn("but this lock took another, %s-unsafe lock in the past:\n", irqclass); 4048 4019 else 4049 4020 pr_warn("but this lock was taken by another, %s-safe lock in the past:\n", irqclass); 4050 - print_lock_name(other->class); 4021 + print_lock_name(NULL, other->class); 4051 4022 pr_warn("\n\nand interrupts could create inverse lock ordering between them.\n\n"); 4052 4023 4053 4024 pr_warn("\nother info that might help us debug this:\n"); ··· 4910 4881 4911 4882 struct lock_class_key __lockdep_no_validate__; 4912 4883 EXPORT_SYMBOL_GPL(__lockdep_no_validate__); 4884 + 4885 + #ifdef CONFIG_PROVE_LOCKING 4886 + void lockdep_set_lock_cmp_fn(struct lockdep_map *lock, lock_cmp_fn cmp_fn, 4887 + lock_print_fn print_fn) 4888 + { 4889 + struct lock_class *class = lock->class_cache[0]; 4890 + unsigned long flags; 4891 + 4892 + raw_local_irq_save(flags); 4893 + lockdep_recursion_inc(); 4894 + 4895 + if (!class) 4896 + class = register_lock_class(lock, 0, 0); 4897 + 4898 + if (class) { 4899 + WARN_ON(class->cmp_fn && class->cmp_fn != cmp_fn); 4900 + WARN_ON(class->print_fn && class->print_fn != print_fn); 4901 + 4902 + class->cmp_fn = cmp_fn; 4903 + class->print_fn = print_fn; 4904 + } 4905 + 4906 + lockdep_recursion_finish(); 4907 + raw_local_irq_restore(flags); 4908 + } 4909 + EXPORT_SYMBOL_GPL(lockdep_set_lock_cmp_fn); 4910 + #endif 4913 4911 4914 4912 static void 4915 4913 print_lock_nested_lock_not_held(struct task_struct *curr,