Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf fixes from Ingo Molnar:
"The biggest changes are fixes for races that kept triggering Trinity
crashes, plus liblockdep build fixes and smaller misc fixes.

The liblockdep bits in perf/urgent are a pull mistake - they should
have been in locking/urgent - but by the time I noticed other commits
were added and testing was done :-/ Sorry about that"

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
perf: Fix a race between ring_buffer_detach() and ring_buffer_attach()
perf: Prevent false warning in perf_swevent_add
perf: Limit perf_event_attr::sample_period to 63 bits
tools/liblockdep: Remove all build files when doing make clean
tools/liblockdep: Build liblockdep from tools/Makefile
perf/x86/intel: Fix Silvermont's event constraints
perf: Fix perf_event_init_context()
perf: Fix race in removing an event

+102 -86
-1
arch/x86/kernel/cpu/perf_event_intel.c
··· 169 169 { 170 170 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 171 171 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 172 - FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF */ 173 172 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */ 174 173 EVENT_CONSTRAINT_END 175 174 };
+2
include/linux/perf_event.h
··· 402 402 403 403 struct ring_buffer *rb; 404 404 struct list_head rb_entry; 405 + unsigned long rcu_batches; 406 + int rcu_pending; 405 407 406 408 /* poll related */ 407 409 wait_queue_head_t waitq;
+92 -82
kernel/events/core.c
··· 1443 1443 cpuctx->exclusive = 0; 1444 1444 } 1445 1445 1446 + struct remove_event { 1447 + struct perf_event *event; 1448 + bool detach_group; 1449 + }; 1450 + 1446 1451 /* 1447 1452 * Cross CPU call to remove a performance event 1448 1453 * ··· 1456 1451 */ 1457 1452 static int __perf_remove_from_context(void *info) 1458 1453 { 1459 - struct perf_event *event = info; 1454 + struct remove_event *re = info; 1455 + struct perf_event *event = re->event; 1460 1456 struct perf_event_context *ctx = event->ctx; 1461 1457 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); 1462 1458 1463 1459 raw_spin_lock(&ctx->lock); 1464 1460 event_sched_out(event, cpuctx, ctx); 1461 + if (re->detach_group) 1462 + perf_group_detach(event); 1465 1463 list_del_event(event, ctx); 1466 1464 if (!ctx->nr_events && cpuctx->task_ctx == ctx) { 1467 1465 ctx->is_active = 0; ··· 1489 1481 * When called from perf_event_exit_task, it's OK because the 1490 1482 * context has been detached from its task. 1491 1483 */ 1492 - static void perf_remove_from_context(struct perf_event *event) 1484 + static void perf_remove_from_context(struct perf_event *event, bool detach_group) 1493 1485 { 1494 1486 struct perf_event_context *ctx = event->ctx; 1495 1487 struct task_struct *task = ctx->task; 1488 + struct remove_event re = { 1489 + .event = event, 1490 + .detach_group = detach_group, 1491 + }; 1496 1492 1497 1493 lockdep_assert_held(&ctx->mutex); 1498 1494 ··· 1505 1493 * Per cpu events are removed via an smp call and 1506 1494 * the removal is always successful. 1507 1495 */ 1508 - cpu_function_call(event->cpu, __perf_remove_from_context, event); 1496 + cpu_function_call(event->cpu, __perf_remove_from_context, &re); 1509 1497 return; 1510 1498 } 1511 1499 1512 1500 retry: 1513 - if (!task_function_call(task, __perf_remove_from_context, event)) 1501 + if (!task_function_call(task, __perf_remove_from_context, &re)) 1514 1502 return; 1515 1503 1516 1504 raw_spin_lock_irq(&ctx->lock); ··· 1527 1515 * Since the task isn't running, its safe to remove the event, us 1528 1516 * holding the ctx->lock ensures the task won't get scheduled in. 1529 1517 */ 1518 + if (detach_group) 1519 + perf_group_detach(event); 1530 1520 list_del_event(event, ctx); 1531 1521 raw_spin_unlock_irq(&ctx->lock); 1532 1522 } ··· 3192 3178 } 3193 3179 3194 3180 static void ring_buffer_put(struct ring_buffer *rb); 3195 - static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb); 3181 + static void ring_buffer_attach(struct perf_event *event, 3182 + struct ring_buffer *rb); 3196 3183 3197 3184 static void unaccount_event_cpu(struct perf_event *event, int cpu) 3198 3185 { ··· 3253 3238 unaccount_event(event); 3254 3239 3255 3240 if (event->rb) { 3256 - struct ring_buffer *rb; 3257 - 3258 3241 /* 3259 3242 * Can happen when we close an event with re-directed output. 3260 3243 * ··· 3260 3247 * over us; possibly making our ring_buffer_put() the last. 3261 3248 */ 3262 3249 mutex_lock(&event->mmap_mutex); 3263 - rb = event->rb; 3264 - if (rb) { 3265 - rcu_assign_pointer(event->rb, NULL); 3266 - ring_buffer_detach(event, rb); 3267 - ring_buffer_put(rb); /* could be last */ 3268 - } 3250 + ring_buffer_attach(event, NULL); 3269 3251 mutex_unlock(&event->mmap_mutex); 3270 3252 } 3271 3253 ··· 3289 3281 * to trigger the AB-BA case. 3290 3282 */ 3291 3283 mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); 3292 - raw_spin_lock_irq(&ctx->lock); 3293 - perf_group_detach(event); 3294 - raw_spin_unlock_irq(&ctx->lock); 3295 - perf_remove_from_context(event); 3284 + perf_remove_from_context(event, true); 3296 3285 mutex_unlock(&ctx->mutex); 3297 3286 3298 3287 free_event(event); ··· 3844 3839 static void ring_buffer_attach(struct perf_event *event, 3845 3840 struct ring_buffer *rb) 3846 3841 { 3842 + struct ring_buffer *old_rb = NULL; 3847 3843 unsigned long flags; 3848 3844 3849 - if (!list_empty(&event->rb_entry)) 3850 - return; 3845 + if (event->rb) { 3846 + /* 3847 + * Should be impossible, we set this when removing 3848 + * event->rb_entry and wait/clear when adding event->rb_entry. 3849 + */ 3850 + WARN_ON_ONCE(event->rcu_pending); 3851 3851 3852 - spin_lock_irqsave(&rb->event_lock, flags); 3853 - if (list_empty(&event->rb_entry)) 3854 - list_add(&event->rb_entry, &rb->event_list); 3855 - spin_unlock_irqrestore(&rb->event_lock, flags); 3856 - } 3852 + old_rb = event->rb; 3853 + event->rcu_batches = get_state_synchronize_rcu(); 3854 + event->rcu_pending = 1; 3857 3855 3858 - static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb) 3859 - { 3860 - unsigned long flags; 3856 + spin_lock_irqsave(&old_rb->event_lock, flags); 3857 + list_del_rcu(&event->rb_entry); 3858 + spin_unlock_irqrestore(&old_rb->event_lock, flags); 3859 + } 3861 3860 3862 - if (list_empty(&event->rb_entry)) 3863 - return; 3861 + if (event->rcu_pending && rb) { 3862 + cond_synchronize_rcu(event->rcu_batches); 3863 + event->rcu_pending = 0; 3864 + } 3864 3865 3865 - spin_lock_irqsave(&rb->event_lock, flags); 3866 - list_del_init(&event->rb_entry); 3867 - wake_up_all(&event->waitq); 3868 - spin_unlock_irqrestore(&rb->event_lock, flags); 3866 + if (rb) { 3867 + spin_lock_irqsave(&rb->event_lock, flags); 3868 + list_add_rcu(&event->rb_entry, &rb->event_list); 3869 + spin_unlock_irqrestore(&rb->event_lock, flags); 3870 + } 3871 + 3872 + rcu_assign_pointer(event->rb, rb); 3873 + 3874 + if (old_rb) { 3875 + ring_buffer_put(old_rb); 3876 + /* 3877 + * Since we detached before setting the new rb, so that we 3878 + * could attach the new rb, we could have missed a wakeup. 3879 + * Provide it now. 3880 + */ 3881 + wake_up_all(&event->waitq); 3882 + } 3869 3883 } 3870 3884 3871 3885 static void ring_buffer_wakeup(struct perf_event *event) ··· 3953 3929 { 3954 3930 struct perf_event *event = vma->vm_file->private_data; 3955 3931 3956 - struct ring_buffer *rb = event->rb; 3932 + struct ring_buffer *rb = ring_buffer_get(event); 3957 3933 struct user_struct *mmap_user = rb->mmap_user; 3958 3934 int mmap_locked = rb->mmap_locked; 3959 3935 unsigned long size = perf_data_size(rb); ··· 3961 3937 atomic_dec(&rb->mmap_count); 3962 3938 3963 3939 if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) 3964 - return; 3940 + goto out_put; 3965 3941 3966 - /* Detach current event from the buffer. */ 3967 - rcu_assign_pointer(event->rb, NULL); 3968 - ring_buffer_detach(event, rb); 3942 + ring_buffer_attach(event, NULL); 3969 3943 mutex_unlock(&event->mmap_mutex); 3970 3944 3971 3945 /* If there's still other mmap()s of this buffer, we're done. */ 3972 - if (atomic_read(&rb->mmap_count)) { 3973 - ring_buffer_put(rb); /* can't be last */ 3974 - return; 3975 - } 3946 + if (atomic_read(&rb->mmap_count)) 3947 + goto out_put; 3976 3948 3977 3949 /* 3978 3950 * No other mmap()s, detach from all other events that might redirect ··· 3998 3978 * still restart the iteration to make sure we're not now 3999 3979 * iterating the wrong list. 4000 3980 */ 4001 - if (event->rb == rb) { 4002 - rcu_assign_pointer(event->rb, NULL); 4003 - ring_buffer_detach(event, rb); 4004 - ring_buffer_put(rb); /* can't be last, we still have one */ 4005 - } 3981 + if (event->rb == rb) 3982 + ring_buffer_attach(event, NULL); 3983 + 4006 3984 mutex_unlock(&event->mmap_mutex); 4007 3985 put_event(event); 4008 3986 ··· 4025 4007 vma->vm_mm->pinned_vm -= mmap_locked; 4026 4008 free_uid(mmap_user); 4027 4009 4010 + out_put: 4028 4011 ring_buffer_put(rb); /* could be last */ 4029 4012 } 4030 4013 ··· 4143 4124 vma->vm_mm->pinned_vm += extra; 4144 4125 4145 4126 ring_buffer_attach(event, rb); 4146 - rcu_assign_pointer(event->rb, rb); 4147 4127 4148 4128 perf_event_init_userpage(event); 4149 4129 perf_event_update_userpage(event); ··· 5426 5408 5427 5409 /* Recursion avoidance in each contexts */ 5428 5410 int recursion[PERF_NR_CONTEXTS]; 5411 + 5412 + /* Keeps track of cpu being initialized/exited */ 5413 + bool online; 5429 5414 }; 5430 5415 5431 5416 static DEFINE_PER_CPU(struct swevent_htable, swevent_htable); ··· 5675 5654 hwc->state = !(flags & PERF_EF_START); 5676 5655 5677 5656 head = find_swevent_head(swhash, event); 5678 - if (WARN_ON_ONCE(!head)) 5657 + if (!head) { 5658 + /* 5659 + * We can race with cpu hotplug code. Do not 5660 + * WARN if the cpu just got unplugged. 5661 + */ 5662 + WARN_ON_ONCE(swhash->online); 5679 5663 return -EINVAL; 5664 + } 5680 5665 5681 5666 hlist_add_head_rcu(&event->hlist_entry, head); 5682 5667 ··· 6941 6914 static int 6942 6915 perf_event_set_output(struct perf_event *event, struct perf_event *output_event) 6943 6916 { 6944 - struct ring_buffer *rb = NULL, *old_rb = NULL; 6917 + struct ring_buffer *rb = NULL; 6945 6918 int ret = -EINVAL; 6946 6919 6947 6920 if (!output_event) ··· 6969 6942 if (atomic_read(&event->mmap_count)) 6970 6943 goto unlock; 6971 6944 6972 - old_rb = event->rb; 6973 - 6974 6945 if (output_event) { 6975 6946 /* get the rb we want to redirect to */ 6976 6947 rb = ring_buffer_get(output_event); ··· 6976 6951 goto unlock; 6977 6952 } 6978 6953 6979 - if (old_rb) 6980 - ring_buffer_detach(event, old_rb); 6981 - 6982 - if (rb) 6983 - ring_buffer_attach(event, rb); 6984 - 6985 - rcu_assign_pointer(event->rb, rb); 6986 - 6987 - if (old_rb) { 6988 - ring_buffer_put(old_rb); 6989 - /* 6990 - * Since we detached before setting the new rb, so that we 6991 - * could attach the new rb, we could have missed a wakeup. 6992 - * Provide it now. 6993 - */ 6994 - wake_up_all(&event->waitq); 6995 - } 6954 + ring_buffer_attach(event, rb); 6996 6955 6997 6956 ret = 0; 6998 6957 unlock: ··· 7026 7017 7027 7018 if (attr.freq) { 7028 7019 if (attr.sample_freq > sysctl_perf_event_sample_rate) 7020 + return -EINVAL; 7021 + } else { 7022 + if (attr.sample_period & (1ULL << 63)) 7029 7023 return -EINVAL; 7030 7024 } 7031 7025 ··· 7177 7165 struct perf_event_context *gctx = group_leader->ctx; 7178 7166 7179 7167 mutex_lock(&gctx->mutex); 7180 - perf_remove_from_context(group_leader); 7168 + perf_remove_from_context(group_leader, false); 7181 7169 7182 7170 /* 7183 7171 * Removing from the context ends up with disabled ··· 7187 7175 perf_event__state_init(group_leader); 7188 7176 list_for_each_entry(sibling, &group_leader->sibling_list, 7189 7177 group_entry) { 7190 - perf_remove_from_context(sibling); 7178 + perf_remove_from_context(sibling, false); 7191 7179 perf_event__state_init(sibling); 7192 7180 put_ctx(gctx); 7193 7181 } ··· 7317 7305 mutex_lock(&src_ctx->mutex); 7318 7306 list_for_each_entry_safe(event, tmp, &src_ctx->event_list, 7319 7307 event_entry) { 7320 - perf_remove_from_context(event); 7308 + perf_remove_from_context(event, false); 7321 7309 unaccount_event_cpu(event, src_cpu); 7322 7310 put_ctx(src_ctx); 7323 7311 list_add(&event->migrate_entry, &events); ··· 7379 7367 struct perf_event_context *child_ctx, 7380 7368 struct task_struct *child) 7381 7369 { 7382 - if (child_event->parent) { 7383 - raw_spin_lock_irq(&child_ctx->lock); 7384 - perf_group_detach(child_event); 7385 - raw_spin_unlock_irq(&child_ctx->lock); 7386 - } 7387 - 7388 - perf_remove_from_context(child_event); 7370 + perf_remove_from_context(child_event, !!child_event->parent); 7389 7371 7390 7372 /* 7391 7373 * It can happen that the parent exits first, and has events ··· 7730 7724 * swapped under us. 7731 7725 */ 7732 7726 parent_ctx = perf_pin_task_context(parent, ctxn); 7727 + if (!parent_ctx) 7728 + return 0; 7733 7729 7734 7730 /* 7735 7731 * No need to check if parent_ctx != NULL here; since we saw ··· 7843 7835 struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); 7844 7836 7845 7837 mutex_lock(&swhash->hlist_mutex); 7838 + swhash->online = true; 7846 7839 if (swhash->hlist_refcount > 0) { 7847 7840 struct swevent_hlist *hlist; 7848 7841 ··· 7866 7857 7867 7858 static void __perf_event_exit_context(void *__info) 7868 7859 { 7860 + struct remove_event re = { .detach_group = false }; 7869 7861 struct perf_event_context *ctx = __info; 7870 - struct perf_event *event; 7871 7862 7872 7863 perf_pmu_rotate_stop(ctx->pmu); 7873 7864 7874 7865 rcu_read_lock(); 7875 - list_for_each_entry_rcu(event, &ctx->event_list, event_entry) 7876 - __perf_remove_from_context(event); 7866 + list_for_each_entry_rcu(re.event, &ctx->event_list, event_entry) 7867 + __perf_remove_from_context(&re); 7877 7868 rcu_read_unlock(); 7878 7869 } 7879 7870 ··· 7901 7892 perf_event_exit_cpu_context(cpu); 7902 7893 7903 7894 mutex_lock(&swhash->hlist_mutex); 7895 + swhash->online = false; 7904 7896 swevent_hlist_release(swhash); 7905 7897 mutex_unlock(&swhash->hlist_mutex); 7906 7898 }
+6
tools/Makefile
··· 44 44 cgroup firewire hv guest usb virtio vm net: FORCE 45 45 $(call descend,$@) 46 46 47 + liblockdep: FORCE 48 + $(call descend,lib/lockdep) 49 + 47 50 libapikfs: FORCE 48 51 $(call descend,lib/api) 49 52 ··· 93 90 94 91 cgroup_clean hv_clean firewire_clean lguest_clean usb_clean virtio_clean vm_clean net_clean: 95 92 $(call descend,$(@:_clean=),clean) 93 + 94 + liblockdep_clean: 95 + $(call descend,lib/lockdep,clean) 96 96 97 97 libapikfs_clean: 98 98 $(call descend,lib/api,clean)
+2 -3
tools/lib/lockdep/Makefile
··· 1 1 # file format version 2 2 FILE_VERSION = 1 3 3 4 - MAKEFLAGS += --no-print-directory 5 - LIBLOCKDEP_VERSION=$(shell make -sC ../../.. kernelversion) 4 + LIBLOCKDEP_VERSION=$(shell make --no-print-directory -sC ../../.. kernelversion) 6 5 7 6 # Makefiles suck: This macro sets a default value of $(2) for the 8 7 # variable named by $(1), unless the variable has been set by ··· 230 231 install: install_lib 231 232 232 233 clean: 233 - $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d 234 + $(RM) *.o *~ $(TARGETS) *.a *liblockdep*.so* $(VERSION_FILES) .*.d 234 235 $(RM) tags TAGS 235 236 236 237 endif # skip-makefile