Merge branch 'core/softlockup' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

+10 -8

include/linux/sched.h

··· 300 300 struct file *filp, void __user *buffer, 301 301 size_t *lenp, loff_t *ppos); 302 302 extern unsigned int softlockup_panic; 303 - extern unsigned long sysctl_hung_task_check_count; 304 - extern unsigned long sysctl_hung_task_timeout_secs; 305 - extern unsigned long sysctl_hung_task_warnings; 306 303 extern int softlockup_thresh; 307 304 #else 308 305 static inline void softlockup_tick(void) 309 - { 310 - } 311 - static inline void spawn_softlockup_task(void) 312 306 { 313 307 } 314 308 static inline void touch_softlockup_watchdog(void) ··· 313 319 } 314 320 #endif 315 321 322 + #ifdef CONFIG_DETECT_HUNG_TASK 323 + extern unsigned int sysctl_hung_task_panic; 324 + extern unsigned long sysctl_hung_task_check_count; 325 + extern unsigned long sysctl_hung_task_timeout_secs; 326 + extern unsigned long sysctl_hung_task_warnings; 327 + extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, 328 + struct file *filp, void __user *buffer, 329 + size_t *lenp, loff_t *ppos); 330 + #endif 316 331 317 332 /* Attach to any functions which should be ignored in wchan output. */ 318 333 #define __sched __attribute__((__section__(".sched.text"))) ··· 1258 1255 /* ipc stuff */ 1259 1256 struct sysv_sem sysvsem; 1260 1257 #endif 1261 - #ifdef CONFIG_DETECT_SOFTLOCKUP 1258 + #ifdef CONFIG_DETECT_HUNG_TASK 1262 1259 /* hung task detection */ 1263 - unsigned long last_switch_timestamp; 1264 1260 unsigned long last_switch_count; 1265 1261 #endif 1266 1262 /* CPU-specific state of this task */

+1

kernel/Makefile

··· 74 74 obj-$(CONFIG_KPROBES) += kprobes.o 75 75 obj-$(CONFIG_KGDB) += kgdb.o 76 76 obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o 77 + obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o 77 78 obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ 78 79 obj-$(CONFIG_SECCOMP) += seccomp.o 79 80 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o

+3 -5

kernel/fork.c

··· 645 645 646 646 tsk->min_flt = tsk->maj_flt = 0; 647 647 tsk->nvcsw = tsk->nivcsw = 0; 648 + #ifdef CONFIG_DETECT_HUNG_TASK 649 + tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw; 650 + #endif 648 651 649 652 tsk->mm = NULL; 650 653 tsk->active_mm = NULL; ··· 1034 1031 p->prev_stime = cputime_zero; 1035 1032 1036 1033 p->default_timer_slack_ns = current->timer_slack_ns; 1037 - 1038 - #ifdef CONFIG_DETECT_SOFTLOCKUP 1039 - p->last_switch_count = 0; 1040 - p->last_switch_timestamp = 0; 1041 - #endif 1042 1034 1043 1035 task_io_accounting_init(&p->ioac); 1044 1036 acct_clear_integrals(p);

+217

kernel/hung_task.c

··· 1 + /* 2 + * Detect Hung Task 3 + * 4 + * kernel/hung_task.c - kernel thread for detecting tasks stuck in D state 5 + * 6 + */ 7 + 8 + #include <linux/mm.h> 9 + #include <linux/cpu.h> 10 + #include <linux/nmi.h> 11 + #include <linux/init.h> 12 + #include <linux/delay.h> 13 + #include <linux/freezer.h> 14 + #include <linux/kthread.h> 15 + #include <linux/lockdep.h> 16 + #include <linux/module.h> 17 + #include <linux/sysctl.h> 18 + 19 + /* 20 + * The number of tasks checked: 21 + */ 22 + unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT; 23 + 24 + /* 25 + * Limit number of tasks checked in a batch. 26 + * 27 + * This value controls the preemptibility of khungtaskd since preemption 28 + * is disabled during the critical section. It also controls the size of 29 + * the RCU grace period. So it needs to be upper-bound. 30 + */ 31 + #define HUNG_TASK_BATCHING 1024 32 + 33 + /* 34 + * Zero means infinite timeout - no checking done: 35 + */ 36 + unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120; 37 + 38 + unsigned long __read_mostly sysctl_hung_task_warnings = 10; 39 + 40 + static int __read_mostly did_panic; 41 + 42 + static struct task_struct *watchdog_task; 43 + 44 + /* 45 + * Should we panic (and reboot, if panic_timeout= is set) when a 46 + * hung task is detected: 47 + */ 48 + unsigned int __read_mostly sysctl_hung_task_panic = 49 + CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE; 50 + 51 + static int __init hung_task_panic_setup(char *str) 52 + { 53 + sysctl_hung_task_panic = simple_strtoul(str, NULL, 0); 54 + 55 + return 1; 56 + } 57 + __setup("hung_task_panic=", hung_task_panic_setup); 58 + 59 + static int 60 + hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr) 61 + { 62 + did_panic = 1; 63 + 64 + return NOTIFY_DONE; 65 + } 66 + 67 + static struct notifier_block panic_block = { 68 + .notifier_call = hung_task_panic, 69 + }; 70 + 71 + static void check_hung_task(struct task_struct *t, unsigned long timeout) 72 + { 73 + unsigned long switch_count = t->nvcsw + t->nivcsw; 74 + 75 + /* 76 + * Ensure the task is not frozen. 77 + * Also, when a freshly created task is scheduled once, changes 78 + * its state to TASK_UNINTERRUPTIBLE without having ever been 79 + * switched out once, it musn't be checked. 80 + */ 81 + if (unlikely(t->flags & PF_FROZEN || !switch_count)) 82 + return; 83 + 84 + if (switch_count != t->last_switch_count) { 85 + t->last_switch_count = switch_count; 86 + return; 87 + } 88 + if (!sysctl_hung_task_warnings) 89 + return; 90 + sysctl_hung_task_warnings--; 91 + 92 + /* 93 + * Ok, the task did not get scheduled for more than 2 minutes, 94 + * complain: 95 + */ 96 + printk(KERN_ERR "INFO: task %s:%d blocked for more than " 97 + "%ld seconds.\n", t->comm, t->pid, timeout); 98 + printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\"" 99 + " disables this message.\n"); 100 + sched_show_task(t); 101 + __debug_show_held_locks(t); 102 + 103 + touch_nmi_watchdog(); 104 + 105 + if (sysctl_hung_task_panic) 106 + panic("hung_task: blocked tasks"); 107 + } 108 + 109 + /* 110 + * To avoid extending the RCU grace period for an unbounded amount of time, 111 + * periodically exit the critical section and enter a new one. 112 + * 113 + * For preemptible RCU it is sufficient to call rcu_read_unlock in order 114 + * exit the grace period. For classic RCU, a reschedule is required. 115 + */ 116 + static void rcu_lock_break(struct task_struct *g, struct task_struct *t) 117 + { 118 + get_task_struct(g); 119 + get_task_struct(t); 120 + rcu_read_unlock(); 121 + cond_resched(); 122 + rcu_read_lock(); 123 + put_task_struct(t); 124 + put_task_struct(g); 125 + } 126 + 127 + /* 128 + * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for 129 + * a really long time (120 seconds). If that happens, print out 130 + * a warning. 131 + */ 132 + static void check_hung_uninterruptible_tasks(unsigned long timeout) 133 + { 134 + int max_count = sysctl_hung_task_check_count; 135 + int batch_count = HUNG_TASK_BATCHING; 136 + struct task_struct *g, *t; 137 + 138 + /* 139 + * If the system crashed already then all bets are off, 140 + * do not report extra hung tasks: 141 + */ 142 + if (test_taint(TAINT_DIE) || did_panic) 143 + return; 144 + 145 + rcu_read_lock(); 146 + do_each_thread(g, t) { 147 + if (!--max_count) 148 + goto unlock; 149 + if (!--batch_count) { 150 + batch_count = HUNG_TASK_BATCHING; 151 + rcu_lock_break(g, t); 152 + /* Exit if t or g was unhashed during refresh. */ 153 + if (t->state == TASK_DEAD || g->state == TASK_DEAD) 154 + goto unlock; 155 + } 156 + /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */ 157 + if (t->state == TASK_UNINTERRUPTIBLE) 158 + check_hung_task(t, timeout); 159 + } while_each_thread(g, t); 160 + unlock: 161 + rcu_read_unlock(); 162 + } 163 + 164 + static unsigned long timeout_jiffies(unsigned long timeout) 165 + { 166 + /* timeout of 0 will disable the watchdog */ 167 + return timeout ? timeout * HZ : MAX_SCHEDULE_TIMEOUT; 168 + } 169 + 170 + /* 171 + * Process updating of timeout sysctl 172 + */ 173 + int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, 174 + struct file *filp, void __user *buffer, 175 + size_t *lenp, loff_t *ppos) 176 + { 177 + int ret; 178 + 179 + ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); 180 + 181 + if (ret || !write) 182 + goto out; 183 + 184 + wake_up_process(watchdog_task); 185 + 186 + out: 187 + return ret; 188 + } 189 + 190 + /* 191 + * kthread which checks for tasks stuck in D state 192 + */ 193 + static int watchdog(void *dummy) 194 + { 195 + set_user_nice(current, 0); 196 + 197 + for ( ; ; ) { 198 + unsigned long timeout = sysctl_hung_task_timeout_secs; 199 + 200 + while (schedule_timeout_interruptible(timeout_jiffies(timeout))) 201 + timeout = sysctl_hung_task_timeout_secs; 202 + 203 + check_hung_uninterruptible_tasks(timeout); 204 + } 205 + 206 + return 0; 207 + } 208 + 209 + static int __init hung_task_init(void) 210 + { 211 + atomic_notifier_chain_register(&panic_notifier_list, &panic_block); 212 + watchdog_task = kthread_run(watchdog, NULL, "khungtaskd"); 213 + 214 + return 0; 215 + } 216 + 217 + module_init(hung_task_init);

-100

kernel/softlockup.c

··· 166 166 } 167 167 168 168 /* 169 - * Have a reasonable limit on the number of tasks checked: 170 - */ 171 - unsigned long __read_mostly sysctl_hung_task_check_count = 1024; 172 - 173 - /* 174 - * Zero means infinite timeout - no checking done: 175 - */ 176 - unsigned long __read_mostly sysctl_hung_task_timeout_secs = 480; 177 - 178 - unsigned long __read_mostly sysctl_hung_task_warnings = 10; 179 - 180 - /* 181 - * Only do the hung-tasks check on one CPU: 182 - */ 183 - static int check_cpu __read_mostly = -1; 184 - 185 - static void check_hung_task(struct task_struct *t, unsigned long now) 186 - { 187 - unsigned long switch_count = t->nvcsw + t->nivcsw; 188 - 189 - if (t->flags & PF_FROZEN) 190 - return; 191 - 192 - if (switch_count != t->last_switch_count || !t->last_switch_timestamp) { 193 - t->last_switch_count = switch_count; 194 - t->last_switch_timestamp = now; 195 - return; 196 - } 197 - if ((long)(now - t->last_switch_timestamp) < 198 - sysctl_hung_task_timeout_secs) 199 - return; 200 - if (!sysctl_hung_task_warnings) 201 - return; 202 - sysctl_hung_task_warnings--; 203 - 204 - /* 205 - * Ok, the task did not get scheduled for more than 2 minutes, 206 - * complain: 207 - */ 208 - printk(KERN_ERR "INFO: task %s:%d blocked for more than " 209 - "%ld seconds.\n", t->comm, t->pid, 210 - sysctl_hung_task_timeout_secs); 211 - printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\"" 212 - " disables this message.\n"); 213 - sched_show_task(t); 214 - __debug_show_held_locks(t); 215 - 216 - t->last_switch_timestamp = now; 217 - touch_nmi_watchdog(); 218 - 219 - if (softlockup_panic) 220 - panic("softlockup: blocked tasks"); 221 - } 222 - 223 - /* 224 - * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for 225 - * a really long time (120 seconds). If that happens, print out 226 - * a warning. 227 - */ 228 - static void check_hung_uninterruptible_tasks(int this_cpu) 229 - { 230 - int max_count = sysctl_hung_task_check_count; 231 - unsigned long now = get_timestamp(this_cpu); 232 - struct task_struct *g, *t; 233 - 234 - /* 235 - * If the system crashed already then all bets are off, 236 - * do not report extra hung tasks: 237 - */ 238 - if (test_taint(TAINT_DIE) || did_panic) 239 - return; 240 - 241 - read_lock(&tasklist_lock); 242 - do_each_thread(g, t) { 243 - if (!--max_count) 244 - goto unlock; 245 - /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */ 246 - if (t->state == TASK_UNINTERRUPTIBLE) 247 - check_hung_task(t, now); 248 - } while_each_thread(g, t); 249 - unlock: 250 - read_unlock(&tasklist_lock); 251 - } 252 - 253 - /* 254 169 * The watchdog thread - runs every second and touches the timestamp. 255 170 */ 256 171 static int watchdog(void *__bind_cpu) 257 172 { 258 173 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; 259 - int this_cpu = (long)__bind_cpu; 260 174 261 175 sched_setscheduler(current, SCHED_FIFO, &param); 262 176 ··· 189 275 190 276 if (kthread_should_stop()) 191 277 break; 192 - 193 - if (this_cpu == check_cpu) { 194 - if (sysctl_hung_task_timeout_secs) 195 - check_hung_uninterruptible_tasks(this_cpu); 196 - } 197 278 198 279 set_current_state(TASK_INTERRUPTIBLE); 199 280 } ··· 221 312 break; 222 313 case CPU_ONLINE: 223 314 case CPU_ONLINE_FROZEN: 224 - check_cpu = cpumask_any(cpu_online_mask); 225 315 wake_up_process(per_cpu(watchdog_task, hotcpu)); 226 316 break; 227 317 #ifdef CONFIG_HOTPLUG_CPU 228 - case CPU_DOWN_PREPARE: 229 - case CPU_DOWN_PREPARE_FROZEN: 230 - if (hotcpu == check_cpu) { 231 - /* Pick any other online cpu. */ 232 - check_cpu = cpumask_any_but(cpu_online_mask, hotcpu); 233 - } 234 - break; 235 - 236 318 case CPU_UP_CANCELED: 237 319 case CPU_UP_CANCELED_FROZEN: 238 320 if (!per_cpu(watchdog_task, hotcpu))

+14 -1

kernel/sysctl.c

··· 814 814 .extra1 = &neg_one, 815 815 .extra2 = &sixty, 816 816 }, 817 + #endif 818 + #ifdef CONFIG_DETECT_HUNG_TASK 819 + { 820 + .ctl_name = CTL_UNNUMBERED, 821 + .procname = "hung_task_panic", 822 + .data = &sysctl_hung_task_panic, 823 + .maxlen = sizeof(int), 824 + .mode = 0644, 825 + .proc_handler = &proc_dointvec_minmax, 826 + .strategy = &sysctl_intvec, 827 + .extra1 = &zero, 828 + .extra2 = &one, 829 + }, 817 830 { 818 831 .ctl_name = CTL_UNNUMBERED, 819 832 .procname = "hung_task_check_count", ··· 842 829 .data = &sysctl_hung_task_timeout_secs, 843 830 .maxlen = sizeof(unsigned long), 844 831 .mode = 0644, 845 - .proc_handler = &proc_doulongvec_minmax, 832 + .proc_handler = &proc_dohung_task_timeout_secs, 846 833 .strategy = &sysctl_intvec, 847 834 }, 848 835 {

+38

lib/Kconfig.debug

··· 186 186 default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC 187 187 default 1 if BOOTPARAM_SOFTLOCKUP_PANIC 188 188 189 + config DETECT_HUNG_TASK 190 + bool "Detect Hung Tasks" 191 + depends on DEBUG_KERNEL 192 + default DETECT_SOFTLOCKUP 193 + help 194 + Say Y here to enable the kernel to detect "hung tasks", 195 + which are bugs that cause the task to be stuck in 196 + uninterruptible "D" state indefinitiley. 197 + 198 + When a hung task is detected, the kernel will print the 199 + current stack trace (which you should report), but the 200 + task will stay in uninterruptible state. If lockdep is 201 + enabled then all held locks will also be reported. This 202 + feature has negligible overhead. 203 + 204 + config BOOTPARAM_HUNG_TASK_PANIC 205 + bool "Panic (Reboot) On Hung Tasks" 206 + depends on DETECT_HUNG_TASK 207 + help 208 + Say Y here to enable the kernel to panic on "hung tasks", 209 + which are bugs that cause the kernel to leave a task stuck 210 + in uninterruptible "D" state. 211 + 212 + The panic can be used in combination with panic_timeout, 213 + to cause the system to reboot automatically after a 214 + hung task has been detected. This feature is useful for 215 + high-availability systems that have uptime guarantees and 216 + where a hung tasks must be resolved ASAP. 217 + 218 + Say N if unsure. 219 + 220 + config BOOTPARAM_HUNG_TASK_PANIC_VALUE 221 + int 222 + depends on DETECT_HUNG_TASK 223 + range 0 1 224 + default 0 if !BOOTPARAM_HUNG_TASK_PANIC 225 + default 1 if BOOTPARAM_HUNG_TASK_PANIC 226 + 189 227 config SCHED_DEBUG 190 228 bool "Collect scheduler debugging info" 191 229 depends on DEBUG_KERNEL && PROC_FS

Configure Feed

Configure Feed