Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'rcu/refscale' into next

Add performance testing for common context synchronizations
(Preemption, IRQ, Softirq) and per-cpu increments. Those are
relevant comparisons against SRCU-fast read side APIs, especially
as they are planned to synchronize further tracing fast-path code.

+321 -9
+321 -9
kernel/rcu/refscale.c
··· 136 136 void (*cleanup)(void); 137 137 void (*readsection)(const int nloops); 138 138 void (*delaysection)(const int nloops, const int udl, const int ndl); 139 + bool enable_irqs; 139 140 const char *name; 140 141 }; 141 142 ··· 368 367 // Definitions for reference count 369 368 static atomic_t refcnt; 370 369 370 + // Definitions acquire-release. 371 + static DEFINE_PER_CPU(unsigned long, test_acqrel); 372 + 371 373 static void ref_refcnt_section(const int nloops) 372 374 { 373 375 int i; ··· 397 393 .readsection = ref_refcnt_section, 398 394 .delaysection = ref_refcnt_delay_section, 399 395 .name = "refcnt" 396 + }; 397 + 398 + static void ref_percpuinc_section(const int nloops) 399 + { 400 + int i; 401 + 402 + for (i = nloops; i >= 0; i--) { 403 + this_cpu_inc(test_acqrel); 404 + this_cpu_dec(test_acqrel); 405 + } 406 + } 407 + 408 + static void ref_percpuinc_delay_section(const int nloops, const int udl, const int ndl) 409 + { 410 + int i; 411 + 412 + for (i = nloops; i >= 0; i--) { 413 + this_cpu_inc(test_acqrel); 414 + un_delay(udl, ndl); 415 + this_cpu_dec(test_acqrel); 416 + } 417 + } 418 + 419 + static const struct ref_scale_ops percpuinc_ops = { 420 + .init = rcu_sync_scale_init, 421 + .readsection = ref_percpuinc_section, 422 + .delaysection = ref_percpuinc_delay_section, 423 + .name = "percpuinc" 424 + }; 425 + 426 + // Note that this can lose counts in preemptible kernels. 427 + static void ref_incpercpu_section(const int nloops) 428 + { 429 + int i; 430 + 431 + for (i = nloops; i >= 0; i--) { 432 + unsigned long *tap = this_cpu_ptr(&test_acqrel); 433 + 434 + WRITE_ONCE(*tap, READ_ONCE(*tap) + 1); 435 + WRITE_ONCE(*tap, READ_ONCE(*tap) - 1); 436 + } 437 + } 438 + 439 + static void ref_incpercpu_delay_section(const int nloops, const int udl, const int ndl) 440 + { 441 + int i; 442 + 443 + for (i = nloops; i >= 0; i--) { 444 + unsigned long *tap = this_cpu_ptr(&test_acqrel); 445 + 446 + WRITE_ONCE(*tap, READ_ONCE(*tap) + 1); 447 + un_delay(udl, ndl); 448 + WRITE_ONCE(*tap, READ_ONCE(*tap) - 1); 449 + } 450 + } 451 + 452 + static const struct ref_scale_ops incpercpu_ops = { 453 + .init = rcu_sync_scale_init, 454 + .readsection = ref_incpercpu_section, 455 + .delaysection = ref_incpercpu_delay_section, 456 + .name = "incpercpu" 457 + }; 458 + 459 + static void ref_incpercpupreempt_section(const int nloops) 460 + { 461 + int i; 462 + 463 + for (i = nloops; i >= 0; i--) { 464 + unsigned long *tap; 465 + 466 + preempt_disable(); 467 + tap = this_cpu_ptr(&test_acqrel); 468 + WRITE_ONCE(*tap, READ_ONCE(*tap) + 1); 469 + WRITE_ONCE(*tap, READ_ONCE(*tap) - 1); 470 + preempt_enable(); 471 + } 472 + } 473 + 474 + static void ref_incpercpupreempt_delay_section(const int nloops, const int udl, const int ndl) 475 + { 476 + int i; 477 + 478 + for (i = nloops; i >= 0; i--) { 479 + unsigned long *tap; 480 + 481 + preempt_disable(); 482 + tap = this_cpu_ptr(&test_acqrel); 483 + WRITE_ONCE(*tap, READ_ONCE(*tap) + 1); 484 + un_delay(udl, ndl); 485 + WRITE_ONCE(*tap, READ_ONCE(*tap) - 1); 486 + preempt_enable(); 487 + } 488 + } 489 + 490 + static const struct ref_scale_ops incpercpupreempt_ops = { 491 + .init = rcu_sync_scale_init, 492 + .readsection = ref_incpercpupreempt_section, 493 + .delaysection = ref_incpercpupreempt_delay_section, 494 + .name = "incpercpupreempt" 495 + }; 496 + 497 + static void ref_incpercpubh_section(const int nloops) 498 + { 499 + int i; 500 + 501 + for (i = nloops; i >= 0; i--) { 502 + unsigned long *tap; 503 + 504 + local_bh_disable(); 505 + tap = this_cpu_ptr(&test_acqrel); 506 + WRITE_ONCE(*tap, READ_ONCE(*tap) + 1); 507 + WRITE_ONCE(*tap, READ_ONCE(*tap) - 1); 508 + local_bh_enable(); 509 + } 510 + } 511 + 512 + static void ref_incpercpubh_delay_section(const int nloops, const int udl, const int ndl) 513 + { 514 + int i; 515 + 516 + for (i = nloops; i >= 0; i--) { 517 + unsigned long *tap; 518 + 519 + local_bh_disable(); 520 + tap = this_cpu_ptr(&test_acqrel); 521 + WRITE_ONCE(*tap, READ_ONCE(*tap) + 1); 522 + un_delay(udl, ndl); 523 + WRITE_ONCE(*tap, READ_ONCE(*tap) - 1); 524 + local_bh_enable(); 525 + } 526 + } 527 + 528 + static const struct ref_scale_ops incpercpubh_ops = { 529 + .init = rcu_sync_scale_init, 530 + .readsection = ref_incpercpubh_section, 531 + .delaysection = ref_incpercpubh_delay_section, 532 + .enable_irqs = true, 533 + .name = "incpercpubh" 534 + }; 535 + 536 + static void ref_incpercpuirqsave_section(const int nloops) 537 + { 538 + int i; 539 + unsigned long flags; 540 + 541 + for (i = nloops; i >= 0; i--) { 542 + unsigned long *tap; 543 + 544 + local_irq_save(flags); 545 + tap = this_cpu_ptr(&test_acqrel); 546 + WRITE_ONCE(*tap, READ_ONCE(*tap) + 1); 547 + WRITE_ONCE(*tap, READ_ONCE(*tap) - 1); 548 + local_irq_restore(flags); 549 + } 550 + } 551 + 552 + static void ref_incpercpuirqsave_delay_section(const int nloops, const int udl, const int ndl) 553 + { 554 + int i; 555 + unsigned long flags; 556 + 557 + for (i = nloops; i >= 0; i--) { 558 + unsigned long *tap; 559 + 560 + local_irq_save(flags); 561 + tap = this_cpu_ptr(&test_acqrel); 562 + WRITE_ONCE(*tap, READ_ONCE(*tap) + 1); 563 + un_delay(udl, ndl); 564 + WRITE_ONCE(*tap, READ_ONCE(*tap) - 1); 565 + local_irq_restore(flags); 566 + } 567 + } 568 + 569 + static const struct ref_scale_ops incpercpuirqsave_ops = { 570 + .init = rcu_sync_scale_init, 571 + .readsection = ref_incpercpuirqsave_section, 572 + .delaysection = ref_incpercpuirqsave_delay_section, 573 + .name = "incpercpuirqsave" 400 574 }; 401 575 402 576 // Definitions for rwlock ··· 720 538 .name = "lock-irq" 721 539 }; 722 540 723 - // Definitions acquire-release. 724 - static DEFINE_PER_CPU(unsigned long, test_acqrel); 725 - 726 541 static void ref_acqrel_section(const int nloops) 727 542 { 728 543 unsigned long x; ··· 850 671 .readsection = ref_jiffies_section, 851 672 .delaysection = ref_jiffies_delay_section, 852 673 .name = "jiffies" 674 + }; 675 + 676 + static void ref_preempt_section(const int nloops) 677 + { 678 + int i; 679 + 680 + migrate_disable(); 681 + for (i = nloops; i >= 0; i--) { 682 + preempt_disable(); 683 + preempt_enable(); 684 + } 685 + migrate_enable(); 686 + } 687 + 688 + static void ref_preempt_delay_section(const int nloops, const int udl, const int ndl) 689 + { 690 + int i; 691 + 692 + migrate_disable(); 693 + for (i = nloops; i >= 0; i--) { 694 + preempt_disable(); 695 + un_delay(udl, ndl); 696 + preempt_enable(); 697 + } 698 + migrate_enable(); 699 + } 700 + 701 + static const struct ref_scale_ops preempt_ops = { 702 + .readsection = ref_preempt_section, 703 + .delaysection = ref_preempt_delay_section, 704 + .name = "preempt" 705 + }; 706 + 707 + static void ref_bh_section(const int nloops) 708 + { 709 + int i; 710 + 711 + preempt_disable(); 712 + for (i = nloops; i >= 0; i--) { 713 + local_bh_disable(); 714 + local_bh_enable(); 715 + } 716 + preempt_enable(); 717 + } 718 + 719 + static void ref_bh_delay_section(const int nloops, const int udl, const int ndl) 720 + { 721 + int i; 722 + 723 + preempt_disable(); 724 + for (i = nloops; i >= 0; i--) { 725 + local_bh_disable(); 726 + un_delay(udl, ndl); 727 + local_bh_enable(); 728 + } 729 + preempt_enable(); 730 + } 731 + 732 + static const struct ref_scale_ops bh_ops = { 733 + .readsection = ref_bh_section, 734 + .delaysection = ref_bh_delay_section, 735 + .enable_irqs = true, 736 + .name = "bh" 737 + }; 738 + 739 + static void ref_irq_section(const int nloops) 740 + { 741 + int i; 742 + 743 + preempt_disable(); 744 + for (i = nloops; i >= 0; i--) { 745 + local_irq_disable(); 746 + local_irq_enable(); 747 + } 748 + preempt_enable(); 749 + } 750 + 751 + static void ref_irq_delay_section(const int nloops, const int udl, const int ndl) 752 + { 753 + int i; 754 + 755 + preempt_disable(); 756 + for (i = nloops; i >= 0; i--) { 757 + local_irq_disable(); 758 + un_delay(udl, ndl); 759 + local_irq_enable(); 760 + } 761 + preempt_enable(); 762 + } 763 + 764 + static const struct ref_scale_ops irq_ops = { 765 + .readsection = ref_irq_section, 766 + .delaysection = ref_irq_delay_section, 767 + .name = "irq" 768 + }; 769 + 770 + static void ref_irqsave_section(const int nloops) 771 + { 772 + unsigned long flags; 773 + int i; 774 + 775 + preempt_disable(); 776 + for (i = nloops; i >= 0; i--) { 777 + local_irq_save(flags); 778 + local_irq_restore(flags); 779 + } 780 + preempt_enable(); 781 + } 782 + 783 + static void ref_irqsave_delay_section(const int nloops, const int udl, const int ndl) 784 + { 785 + unsigned long flags; 786 + int i; 787 + 788 + preempt_disable(); 789 + for (i = nloops; i >= 0; i--) { 790 + local_irq_save(flags); 791 + un_delay(udl, ndl); 792 + local_irq_restore(flags); 793 + } 794 + preempt_enable(); 795 + } 796 + 797 + static const struct ref_scale_ops irqsave_ops = { 798 + .readsection = ref_irqsave_section, 799 + .delaysection = ref_irqsave_delay_section, 800 + .name = "irqsave" 853 801 }; 854 802 855 803 //////////////////////////////////////////////////////////////////////// ··· 1274 968 if (!atomic_dec_return(&n_warmedup)) 1275 969 while (atomic_read_acquire(&n_warmedup)) 1276 970 rcu_scale_one_reader(); 1277 - // Also keep interrupts disabled. This also has the effect 1278 - // of preventing entries into slow path for rcu_read_unlock(). 1279 - local_irq_save(flags); 971 + // Also keep interrupts disabled when it is safe to do so, which 972 + // it is not for local_bh_enable(). This also has the effect of 973 + // preventing entries into slow path for rcu_read_unlock(). 974 + if (!cur_ops->enable_irqs) 975 + local_irq_save(flags); 1280 976 start = ktime_get_mono_fast_ns(); 1281 977 1282 978 rcu_scale_one_reader(); 1283 979 1284 980 duration = ktime_get_mono_fast_ns() - start; 1285 - local_irq_restore(flags); 981 + if (!cur_ops->enable_irqs) 982 + local_irq_restore(flags); 1286 983 1287 984 rt->last_duration_ns = WARN_ON_ONCE(duration < 0) ? 0 : duration; 1288 985 // To reduce runtime-skew noise, do maintain-load invocations until ··· 1518 1209 static const struct ref_scale_ops *scale_ops[] = { 1519 1210 &rcu_ops, &srcu_ops, &srcu_fast_ops, &srcu_fast_updown_ops, 1520 1211 RCU_TRACE_OPS RCU_TASKS_OPS 1521 - &refcnt_ops, &rwlock_ops, &rwsem_ops, &lock_ops, &lock_irq_ops, 1522 - &acqrel_ops, &sched_clock_ops, &clock_ops, &jiffies_ops, 1212 + &refcnt_ops, &percpuinc_ops, &incpercpu_ops, &incpercpupreempt_ops, 1213 + &incpercpubh_ops, &incpercpuirqsave_ops, 1214 + &rwlock_ops, &rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops, 1215 + &sched_clock_ops, &clock_ops, &jiffies_ops, 1216 + &preempt_ops, &bh_ops, &irq_ops, &irqsave_ops, 1523 1217 &typesafe_ref_ops, &typesafe_lock_ops, &typesafe_seqlock_ops, 1524 1218 }; 1525 1219