Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'smp-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull smp/hotplug fixes from Thomas Gleixner:
"This addresses the fallout of the new lockdep mechanism which covers
completions in the CPU hotplug code.

The lockdep splats are false positives, but there is no way to
annotate that reliably. The solution is to split the completions for
CPU up and down, which requires some reshuffling of the failure
rollback handling as well"

* 'smp-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
smp/hotplug: Hotplug state fail injection
smp/hotplug: Differentiate the AP completion between up and down
smp/hotplug: Differentiate the AP-work lockdep class between up and down
smp/hotplug: Callback vs state-machine consistency
smp/hotplug: Rewrite AP state machine core
smp/hotplug: Allow external multi-instance rollback
smp/hotplug: Add state diagram

+387 -146
+20 -1
include/linux/cpuhotplug.h
··· 3 3 4 4 #include <linux/types.h> 5 5 6 + /* 7 + * CPU-up CPU-down 8 + * 9 + * BP AP BP AP 10 + * 11 + * OFFLINE OFFLINE 12 + * | ^ 13 + * v | 14 + * BRINGUP_CPU->AP_OFFLINE BRINGUP_CPU <- AP_IDLE_DEAD (idle thread/play_dead) 15 + * | AP_OFFLINE 16 + * v (IRQ-off) ,---------------^ 17 + * AP_ONLNE | (stop_machine) 18 + * | TEARDOWN_CPU <- AP_ONLINE_IDLE 19 + * | ^ 20 + * v | 21 + * AP_ACTIVE AP_ACTIVE 22 + */ 23 + 6 24 enum cpuhp_state { 7 - CPUHP_OFFLINE, 25 + CPUHP_INVALID = -1, 26 + CPUHP_OFFLINE = 0, 8 27 CPUHP_CREATE_THREADS, 9 28 CPUHP_PERF_PREPARE, 10 29 CPUHP_PERF_X86_PREPARE,
+367 -145
kernel/cpu.c
··· 46 46 * @bringup: Single callback bringup or teardown selector 47 47 * @cb_state: The state for a single callback (install/uninstall) 48 48 * @result: Result of the operation 49 - * @done: Signal completion to the issuer of the task 49 + * @done_up: Signal completion to the issuer of the task for cpu-up 50 + * @done_down: Signal completion to the issuer of the task for cpu-down 50 51 */ 51 52 struct cpuhp_cpu_state { 52 53 enum cpuhp_state state; 53 54 enum cpuhp_state target; 55 + enum cpuhp_state fail; 54 56 #ifdef CONFIG_SMP 55 57 struct task_struct *thread; 56 58 bool should_run; ··· 60 58 bool single; 61 59 bool bringup; 62 60 struct hlist_node *node; 61 + struct hlist_node *last; 63 62 enum cpuhp_state cb_state; 64 63 int result; 65 - struct completion done; 64 + struct completion done_up; 65 + struct completion done_down; 66 66 #endif 67 67 }; 68 68 69 - static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state); 69 + static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = { 70 + .fail = CPUHP_INVALID, 71 + }; 70 72 71 73 #if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP) 72 - static struct lock_class_key cpuhp_state_key; 73 - static struct lockdep_map cpuhp_state_lock_map = 74 - STATIC_LOCKDEP_MAP_INIT("cpuhp_state", &cpuhp_state_key); 74 + static struct lockdep_map cpuhp_state_up_map = 75 + STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map); 76 + static struct lockdep_map cpuhp_state_down_map = 77 + STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map); 78 + 79 + 80 + static void inline cpuhp_lock_acquire(bool bringup) 81 + { 82 + lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map); 83 + } 84 + 85 + static void inline cpuhp_lock_release(bool bringup) 86 + { 87 + lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map); 88 + } 89 + #else 90 + 91 + static void inline cpuhp_lock_acquire(bool bringup) { } 92 + static void inline cpuhp_lock_release(bool bringup) { } 93 + 75 94 #endif 76 95 77 96 /** ··· 146 123 /** 147 124 * cpuhp_invoke_callback _ Invoke the callbacks for a given state 148 125 * @cpu: The cpu for which the callback should be invoked 149 - * @step: The step in the state machine 126 + * @state: The state to do callbacks for 150 127 * @bringup: True if the bringup callback should be invoked 128 + * @node: For multi-instance, do a single entry callback for install/remove 129 + * @lastp: For multi-instance rollback, remember how far we got 151 130 * 152 131 * Called from cpu hotplug and from the state register machinery. 153 132 */ 154 133 static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state, 155 - bool bringup, struct hlist_node *node) 134 + bool bringup, struct hlist_node *node, 135 + struct hlist_node **lastp) 156 136 { 157 137 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); 158 138 struct cpuhp_step *step = cpuhp_get_step(state); ··· 163 137 int (*cb)(unsigned int cpu); 164 138 int ret, cnt; 165 139 140 + if (st->fail == state) { 141 + st->fail = CPUHP_INVALID; 142 + 143 + if (!(bringup ? step->startup.single : step->teardown.single)) 144 + return 0; 145 + 146 + return -EAGAIN; 147 + } 148 + 166 149 if (!step->multi_instance) { 150 + WARN_ON_ONCE(lastp && *lastp); 167 151 cb = bringup ? step->startup.single : step->teardown.single; 168 152 if (!cb) 169 153 return 0; ··· 188 152 189 153 /* Single invocation for instance add/remove */ 190 154 if (node) { 155 + WARN_ON_ONCE(lastp && *lastp); 191 156 trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node); 192 157 ret = cbm(cpu, node); 193 158 trace_cpuhp_exit(cpu, st->state, state, ret); ··· 198 161 /* State transition. Invoke on all instances */ 199 162 cnt = 0; 200 163 hlist_for_each(node, &step->list) { 164 + if (lastp && node == *lastp) 165 + break; 166 + 201 167 trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node); 202 168 ret = cbm(cpu, node); 203 169 trace_cpuhp_exit(cpu, st->state, state, ret); 204 - if (ret) 205 - goto err; 170 + if (ret) { 171 + if (!lastp) 172 + goto err; 173 + 174 + *lastp = node; 175 + return ret; 176 + } 206 177 cnt++; 207 178 } 179 + if (lastp) 180 + *lastp = NULL; 208 181 return 0; 209 182 err: 210 183 /* Rollback the instances if one failed */ ··· 225 178 hlist_for_each(node, &step->list) { 226 179 if (!cnt--) 227 180 break; 228 - cbm(cpu, node); 181 + 182 + trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node); 183 + ret = cbm(cpu, node); 184 + trace_cpuhp_exit(cpu, st->state, state, ret); 185 + /* 186 + * Rollback must not fail, 187 + */ 188 + WARN_ON_ONCE(ret); 229 189 } 230 190 return ret; 231 191 } 232 192 233 193 #ifdef CONFIG_SMP 194 + static inline void wait_for_ap_thread(struct cpuhp_cpu_state *st, bool bringup) 195 + { 196 + struct completion *done = bringup ? &st->done_up : &st->done_down; 197 + wait_for_completion(done); 198 + } 199 + 200 + static inline void complete_ap_thread(struct cpuhp_cpu_state *st, bool bringup) 201 + { 202 + struct completion *done = bringup ? &st->done_up : &st->done_down; 203 + complete(done); 204 + } 205 + 206 + /* 207 + * The former STARTING/DYING states, ran with IRQs disabled and must not fail. 208 + */ 209 + static bool cpuhp_is_atomic_state(enum cpuhp_state state) 210 + { 211 + return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE; 212 + } 213 + 234 214 /* Serializes the updates to cpu_online_mask, cpu_present_mask */ 235 215 static DEFINE_MUTEX(cpu_add_remove_lock); 236 216 bool cpuhp_tasks_frozen; ··· 345 271 EXPORT_SYMBOL_GPL(cpu_hotplug_enable); 346 272 #endif /* CONFIG_HOTPLUG_CPU */ 347 273 348 - static void __cpuhp_kick_ap_work(struct cpuhp_cpu_state *st); 274 + static inline enum cpuhp_state 275 + cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target) 276 + { 277 + enum cpuhp_state prev_state = st->state; 278 + 279 + st->rollback = false; 280 + st->last = NULL; 281 + 282 + st->target = target; 283 + st->single = false; 284 + st->bringup = st->state < target; 285 + 286 + return prev_state; 287 + } 288 + 289 + static inline void 290 + cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state) 291 + { 292 + st->rollback = true; 293 + 294 + /* 295 + * If we have st->last we need to undo partial multi_instance of this 296 + * state first. Otherwise start undo at the previous state. 297 + */ 298 + if (!st->last) { 299 + if (st->bringup) 300 + st->state--; 301 + else 302 + st->state++; 303 + } 304 + 305 + st->target = prev_state; 306 + st->bringup = !st->bringup; 307 + } 308 + 309 + /* Regular hotplug invocation of the AP hotplug thread */ 310 + static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st) 311 + { 312 + if (!st->single && st->state == st->target) 313 + return; 314 + 315 + st->result = 0; 316 + /* 317 + * Make sure the above stores are visible before should_run becomes 318 + * true. Paired with the mb() above in cpuhp_thread_fun() 319 + */ 320 + smp_mb(); 321 + st->should_run = true; 322 + wake_up_process(st->thread); 323 + wait_for_ap_thread(st, st->bringup); 324 + } 325 + 326 + static int cpuhp_kick_ap(struct cpuhp_cpu_state *st, enum cpuhp_state target) 327 + { 328 + enum cpuhp_state prev_state; 329 + int ret; 330 + 331 + prev_state = cpuhp_set_state(st, target); 332 + __cpuhp_kick_ap(st); 333 + if ((ret = st->result)) { 334 + cpuhp_reset_state(st, prev_state); 335 + __cpuhp_kick_ap(st); 336 + } 337 + 338 + return ret; 339 + } 349 340 350 341 static int bringup_wait_for_ap(unsigned int cpu) 351 342 { 352 343 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); 353 344 354 345 /* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */ 355 - wait_for_completion(&st->done); 346 + wait_for_ap_thread(st, true); 356 347 if (WARN_ON_ONCE((!cpu_online(cpu)))) 357 348 return -ECANCELED; 358 349 ··· 425 286 stop_machine_unpark(cpu); 426 287 kthread_unpark(st->thread); 427 288 428 - /* Should we go further up ? */ 429 - if (st->target > CPUHP_AP_ONLINE_IDLE) { 430 - __cpuhp_kick_ap_work(st); 431 - wait_for_completion(&st->done); 432 - } 433 - return st->result; 289 + if (st->target <= CPUHP_AP_ONLINE_IDLE) 290 + return 0; 291 + 292 + return cpuhp_kick_ap(st, st->target); 434 293 } 435 294 436 295 static int bringup_cpu(unsigned int cpu) ··· 454 317 /* 455 318 * Hotplug state machine related functions 456 319 */ 457 - static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st) 458 - { 459 - for (st->state++; st->state < st->target; st->state++) { 460 - struct cpuhp_step *step = cpuhp_get_step(st->state); 461 - 462 - if (!step->skip_onerr) 463 - cpuhp_invoke_callback(cpu, st->state, true, NULL); 464 - } 465 - } 466 - 467 - static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, 468 - enum cpuhp_state target) 469 - { 470 - enum cpuhp_state prev_state = st->state; 471 - int ret = 0; 472 - 473 - for (; st->state > target; st->state--) { 474 - ret = cpuhp_invoke_callback(cpu, st->state, false, NULL); 475 - if (ret) { 476 - st->target = prev_state; 477 - undo_cpu_down(cpu, st); 478 - break; 479 - } 480 - } 481 - return ret; 482 - } 483 320 484 321 static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st) 485 322 { ··· 461 350 struct cpuhp_step *step = cpuhp_get_step(st->state); 462 351 463 352 if (!step->skip_onerr) 464 - cpuhp_invoke_callback(cpu, st->state, false, NULL); 353 + cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL); 465 354 } 466 355 } 467 356 ··· 473 362 474 363 while (st->state < target) { 475 364 st->state++; 476 - ret = cpuhp_invoke_callback(cpu, st->state, true, NULL); 365 + ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL); 477 366 if (ret) { 478 367 st->target = prev_state; 479 368 undo_cpu_up(cpu, st); ··· 490 379 { 491 380 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); 492 381 493 - init_completion(&st->done); 382 + init_completion(&st->done_up); 383 + init_completion(&st->done_down); 494 384 } 495 385 496 386 static int cpuhp_should_run(unsigned int cpu) ··· 501 389 return st->should_run; 502 390 } 503 391 504 - /* Execute the teardown callbacks. Used to be CPU_DOWN_PREPARE */ 505 - static int cpuhp_ap_offline(unsigned int cpu, struct cpuhp_cpu_state *st) 506 - { 507 - enum cpuhp_state target = max((int)st->target, CPUHP_TEARDOWN_CPU); 508 - 509 - return cpuhp_down_callbacks(cpu, st, target); 510 - } 511 - 512 - /* Execute the online startup callbacks. Used to be CPU_ONLINE */ 513 - static int cpuhp_ap_online(unsigned int cpu, struct cpuhp_cpu_state *st) 514 - { 515 - return cpuhp_up_callbacks(cpu, st, st->target); 516 - } 517 - 518 392 /* 519 393 * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke 520 394 * callbacks when a state gets [un]installed at runtime. 395 + * 396 + * Each invocation of this function by the smpboot thread does a single AP 397 + * state callback. 398 + * 399 + * It has 3 modes of operation: 400 + * - single: runs st->cb_state 401 + * - up: runs ++st->state, while st->state < st->target 402 + * - down: runs st->state--, while st->state > st->target 403 + * 404 + * When complete or on error, should_run is cleared and the completion is fired. 521 405 */ 522 406 static void cpuhp_thread_fun(unsigned int cpu) 523 407 { 524 408 struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state); 525 - int ret = 0; 409 + bool bringup = st->bringup; 410 + enum cpuhp_state state; 526 411 527 412 /* 528 - * Paired with the mb() in cpuhp_kick_ap_work and 529 - * cpuhp_invoke_ap_callback, so the work set is consistent visible. 413 + * ACQUIRE for the cpuhp_should_run() load of ->should_run. Ensures 414 + * that if we see ->should_run we also see the rest of the state. 530 415 */ 531 416 smp_mb(); 532 - if (!st->should_run) 417 + 418 + if (WARN_ON_ONCE(!st->should_run)) 533 419 return; 534 420 535 - st->should_run = false; 421 + cpuhp_lock_acquire(bringup); 536 422 537 - lock_map_acquire(&cpuhp_state_lock_map); 538 - /* Single callback invocation for [un]install ? */ 539 423 if (st->single) { 540 - if (st->cb_state < CPUHP_AP_ONLINE) { 541 - local_irq_disable(); 542 - ret = cpuhp_invoke_callback(cpu, st->cb_state, 543 - st->bringup, st->node); 544 - local_irq_enable(); 545 - } else { 546 - ret = cpuhp_invoke_callback(cpu, st->cb_state, 547 - st->bringup, st->node); 548 - } 549 - } else if (st->rollback) { 550 - BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE); 551 - 552 - undo_cpu_down(cpu, st); 553 - st->rollback = false; 424 + state = st->cb_state; 425 + st->should_run = false; 554 426 } else { 555 - /* Cannot happen .... */ 556 - BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE); 557 - 558 - /* Regular hotplug work */ 559 - if (st->state < st->target) 560 - ret = cpuhp_ap_online(cpu, st); 561 - else if (st->state > st->target) 562 - ret = cpuhp_ap_offline(cpu, st); 427 + if (bringup) { 428 + st->state++; 429 + state = st->state; 430 + st->should_run = (st->state < st->target); 431 + WARN_ON_ONCE(st->state > st->target); 432 + } else { 433 + state = st->state; 434 + st->state--; 435 + st->should_run = (st->state > st->target); 436 + WARN_ON_ONCE(st->state < st->target); 437 + } 563 438 } 564 - lock_map_release(&cpuhp_state_lock_map); 565 - st->result = ret; 566 - complete(&st->done); 439 + 440 + WARN_ON_ONCE(!cpuhp_is_ap_state(state)); 441 + 442 + if (st->rollback) { 443 + struct cpuhp_step *step = cpuhp_get_step(state); 444 + if (step->skip_onerr) 445 + goto next; 446 + } 447 + 448 + if (cpuhp_is_atomic_state(state)) { 449 + local_irq_disable(); 450 + st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last); 451 + local_irq_enable(); 452 + 453 + /* 454 + * STARTING/DYING must not fail! 455 + */ 456 + WARN_ON_ONCE(st->result); 457 + } else { 458 + st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last); 459 + } 460 + 461 + if (st->result) { 462 + /* 463 + * If we fail on a rollback, we're up a creek without no 464 + * paddle, no way forward, no way back. We loose, thanks for 465 + * playing. 466 + */ 467 + WARN_ON_ONCE(st->rollback); 468 + st->should_run = false; 469 + } 470 + 471 + next: 472 + cpuhp_lock_release(bringup); 473 + 474 + if (!st->should_run) 475 + complete_ap_thread(st, bringup); 567 476 } 568 477 569 478 /* Invoke a single callback on a remote cpu */ ··· 593 460 struct hlist_node *node) 594 461 { 595 462 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); 463 + int ret; 596 464 597 465 if (!cpu_online(cpu)) 598 466 return 0; 599 467 600 - lock_map_acquire(&cpuhp_state_lock_map); 601 - lock_map_release(&cpuhp_state_lock_map); 468 + cpuhp_lock_acquire(false); 469 + cpuhp_lock_release(false); 470 + 471 + cpuhp_lock_acquire(true); 472 + cpuhp_lock_release(true); 602 473 603 474 /* 604 475 * If we are up and running, use the hotplug thread. For early calls 605 476 * we invoke the thread function directly. 606 477 */ 607 478 if (!st->thread) 608 - return cpuhp_invoke_callback(cpu, state, bringup, node); 479 + return cpuhp_invoke_callback(cpu, state, bringup, node, NULL); 609 480 481 + st->rollback = false; 482 + st->last = NULL; 483 + 484 + st->node = node; 485 + st->bringup = bringup; 610 486 st->cb_state = state; 611 487 st->single = true; 612 - st->bringup = bringup; 613 - st->node = node; 488 + 489 + __cpuhp_kick_ap(st); 614 490 615 491 /* 616 - * Make sure the above stores are visible before should_run becomes 617 - * true. Paired with the mb() above in cpuhp_thread_fun() 492 + * If we failed and did a partial, do a rollback. 618 493 */ 619 - smp_mb(); 620 - st->should_run = true; 621 - wake_up_process(st->thread); 622 - wait_for_completion(&st->done); 623 - return st->result; 624 - } 494 + if ((ret = st->result) && st->last) { 495 + st->rollback = true; 496 + st->bringup = !bringup; 625 497 626 - /* Regular hotplug invocation of the AP hotplug thread */ 627 - static void __cpuhp_kick_ap_work(struct cpuhp_cpu_state *st) 628 - { 629 - st->result = 0; 630 - st->single = false; 631 - /* 632 - * Make sure the above stores are visible before should_run becomes 633 - * true. Paired with the mb() above in cpuhp_thread_fun() 634 - */ 635 - smp_mb(); 636 - st->should_run = true; 637 - wake_up_process(st->thread); 498 + __cpuhp_kick_ap(st); 499 + } 500 + 501 + return ret; 638 502 } 639 503 640 504 static int cpuhp_kick_ap_work(unsigned int cpu) 641 505 { 642 506 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); 643 - enum cpuhp_state state = st->state; 507 + enum cpuhp_state prev_state = st->state; 508 + int ret; 644 509 645 - trace_cpuhp_enter(cpu, st->target, state, cpuhp_kick_ap_work); 646 - lock_map_acquire(&cpuhp_state_lock_map); 647 - lock_map_release(&cpuhp_state_lock_map); 648 - __cpuhp_kick_ap_work(st); 649 - wait_for_completion(&st->done); 650 - trace_cpuhp_exit(cpu, st->state, state, st->result); 651 - return st->result; 510 + cpuhp_lock_acquire(false); 511 + cpuhp_lock_release(false); 512 + 513 + cpuhp_lock_acquire(true); 514 + cpuhp_lock_release(true); 515 + 516 + trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work); 517 + ret = cpuhp_kick_ap(st, st->target); 518 + trace_cpuhp_exit(cpu, st->state, prev_state, ret); 519 + 520 + return ret; 652 521 } 653 522 654 523 static struct smp_hotplug_thread cpuhp_threads = { ··· 716 581 struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state); 717 582 enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE); 718 583 int err, cpu = smp_processor_id(); 584 + int ret; 719 585 720 586 /* Ensure this CPU doesn't handle any more interrupts. */ 721 587 err = __cpu_disable(); ··· 730 594 WARN_ON(st->state != CPUHP_TEARDOWN_CPU); 731 595 st->state--; 732 596 /* Invoke the former CPU_DYING callbacks */ 733 - for (; st->state > target; st->state--) 734 - cpuhp_invoke_callback(cpu, st->state, false, NULL); 597 + for (; st->state > target; st->state--) { 598 + ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL); 599 + /* 600 + * DYING must not fail! 601 + */ 602 + WARN_ON_ONCE(ret); 603 + } 735 604 736 605 /* Give up timekeeping duties */ 737 606 tick_handover_do_timer(); ··· 780 639 * 781 640 * Wait for the stop thread to go away. 782 641 */ 783 - wait_for_completion(&st->done); 642 + wait_for_ap_thread(st, false); 784 643 BUG_ON(st->state != CPUHP_AP_IDLE_DEAD); 785 644 786 645 /* Interrupts are moved away from the dying cpu, reenable alloc/free */ ··· 799 658 { 800 659 struct cpuhp_cpu_state *st = arg; 801 660 802 - complete(&st->done); 661 + complete_ap_thread(st, false); 803 662 } 804 663 805 664 void cpuhp_report_idle_dead(void) ··· 817 676 cpuhp_complete_idle_dead, st, 0); 818 677 } 819 678 820 - #else 821 - #define takedown_cpu NULL 822 - #endif 679 + static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st) 680 + { 681 + for (st->state++; st->state < st->target; st->state++) { 682 + struct cpuhp_step *step = cpuhp_get_step(st->state); 823 683 824 - #ifdef CONFIG_HOTPLUG_CPU 684 + if (!step->skip_onerr) 685 + cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL); 686 + } 687 + } 688 + 689 + static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, 690 + enum cpuhp_state target) 691 + { 692 + enum cpuhp_state prev_state = st->state; 693 + int ret = 0; 694 + 695 + for (; st->state > target; st->state--) { 696 + ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL); 697 + if (ret) { 698 + st->target = prev_state; 699 + undo_cpu_down(cpu, st); 700 + break; 701 + } 702 + } 703 + return ret; 704 + } 825 705 826 706 /* Requires cpu_add_remove_lock to be held */ 827 707 static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, ··· 861 699 862 700 cpuhp_tasks_frozen = tasks_frozen; 863 701 864 - prev_state = st->state; 865 - st->target = target; 702 + prev_state = cpuhp_set_state(st, target); 866 703 /* 867 704 * If the current CPU state is in the range of the AP hotplug thread, 868 705 * then we need to kick the thread. 869 706 */ 870 707 if (st->state > CPUHP_TEARDOWN_CPU) { 708 + st->target = max((int)target, CPUHP_TEARDOWN_CPU); 871 709 ret = cpuhp_kick_ap_work(cpu); 872 710 /* 873 711 * The AP side has done the error rollback already. Just ··· 882 720 */ 883 721 if (st->state > CPUHP_TEARDOWN_CPU) 884 722 goto out; 723 + 724 + st->target = target; 885 725 } 886 726 /* 887 727 * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need ··· 891 727 */ 892 728 ret = cpuhp_down_callbacks(cpu, st, target); 893 729 if (ret && st->state > CPUHP_TEARDOWN_CPU && st->state < prev_state) { 894 - st->target = prev_state; 895 - st->rollback = true; 896 - cpuhp_kick_ap_work(cpu); 730 + cpuhp_reset_state(st, prev_state); 731 + __cpuhp_kick_ap(st); 897 732 } 898 733 899 734 out: ··· 917 754 cpu_maps_update_done(); 918 755 return err; 919 756 } 757 + 920 758 int cpu_down(unsigned int cpu) 921 759 { 922 760 return do_cpu_down(cpu, CPUHP_OFFLINE); 923 761 } 924 762 EXPORT_SYMBOL(cpu_down); 763 + 764 + #else 765 + #define takedown_cpu NULL 925 766 #endif /*CONFIG_HOTPLUG_CPU*/ 926 767 927 768 /** ··· 939 772 { 940 773 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); 941 774 enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE); 775 + int ret; 942 776 943 777 rcu_cpu_starting(cpu); /* Enables RCU usage on this CPU. */ 944 778 while (st->state < target) { 945 779 st->state++; 946 - cpuhp_invoke_callback(cpu, st->state, true, NULL); 780 + ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL); 781 + /* 782 + * STARTING must not fail! 783 + */ 784 + WARN_ON_ONCE(ret); 947 785 } 948 786 } 949 787 ··· 966 794 return; 967 795 968 796 st->state = CPUHP_AP_ONLINE_IDLE; 969 - complete(&st->done); 797 + complete_ap_thread(st, true); 970 798 } 971 799 972 800 /* Requires cpu_add_remove_lock to be held */ ··· 1001 829 1002 830 cpuhp_tasks_frozen = tasks_frozen; 1003 831 1004 - st->target = target; 832 + cpuhp_set_state(st, target); 1005 833 /* 1006 834 * If the current CPU state is in the range of the AP hotplug thread, 1007 835 * then we need to kick the thread once more. ··· 1468 1296 struct cpuhp_step *sp = cpuhp_get_step(state); 1469 1297 int ret; 1470 1298 1299 + /* 1300 + * If there's nothing to do, we done. 1301 + * Relies on the union for multi_instance. 1302 + */ 1471 1303 if ((bringup && !sp->startup.single) || 1472 1304 (!bringup && !sp->teardown.single)) 1473 1305 return 0; ··· 1483 1307 if (cpuhp_is_ap_state(state)) 1484 1308 ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node); 1485 1309 else 1486 - ret = cpuhp_invoke_callback(cpu, state, bringup, node); 1310 + ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL); 1487 1311 #else 1488 - ret = cpuhp_invoke_callback(cpu, state, bringup, node); 1312 + ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL); 1489 1313 #endif 1490 1314 BUG_ON(ret && !bringup); 1491 1315 return ret; ··· 1817 1641 } 1818 1642 static DEVICE_ATTR(target, 0644, show_cpuhp_target, write_cpuhp_target); 1819 1643 1644 + 1645 + static ssize_t write_cpuhp_fail(struct device *dev, 1646 + struct device_attribute *attr, 1647 + const char *buf, size_t count) 1648 + { 1649 + struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id); 1650 + struct cpuhp_step *sp; 1651 + int fail, ret; 1652 + 1653 + ret = kstrtoint(buf, 10, &fail); 1654 + if (ret) 1655 + return ret; 1656 + 1657 + /* 1658 + * Cannot fail STARTING/DYING callbacks. 1659 + */ 1660 + if (cpuhp_is_atomic_state(fail)) 1661 + return -EINVAL; 1662 + 1663 + /* 1664 + * Cannot fail anything that doesn't have callbacks. 1665 + */ 1666 + mutex_lock(&cpuhp_state_mutex); 1667 + sp = cpuhp_get_step(fail); 1668 + if (!sp->startup.single && !sp->teardown.single) 1669 + ret = -EINVAL; 1670 + mutex_unlock(&cpuhp_state_mutex); 1671 + if (ret) 1672 + return ret; 1673 + 1674 + st->fail = fail; 1675 + 1676 + return count; 1677 + } 1678 + 1679 + static ssize_t show_cpuhp_fail(struct device *dev, 1680 + struct device_attribute *attr, char *buf) 1681 + { 1682 + struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id); 1683 + 1684 + return sprintf(buf, "%d\n", st->fail); 1685 + } 1686 + 1687 + static DEVICE_ATTR(fail, 0644, show_cpuhp_fail, write_cpuhp_fail); 1688 + 1820 1689 static struct attribute *cpuhp_cpu_attrs[] = { 1821 1690 &dev_attr_state.attr, 1822 1691 &dev_attr_target.attr, 1692 + &dev_attr_fail.attr, 1823 1693 NULL 1824 1694 }; 1825 1695