Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'trace-v6.8-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace

Pull tracing and eventfs fixes from Steven Rostedt:

- Fix the return code for ring_buffer_poll_wait()

It was returing a -EINVAL instead of EPOLLERR.

- Zero out the tracefs_inode so that all fields are initialized.

The ti->private could have had stale data, but instead of just
initializing it to NULL, clear out the entire structure when it is
allocated.

- Fix a crash in timerlat

The hrtimer was initialized at read and not open, but is canceled at
close. If the file was opened and never read the close will pass a
NULL pointer to hrtime_cancel().

- Rewrite of eventfs.

Linus wrote a patch series to remove the dentry references in the
eventfs_inode and to use ref counting and more of proper VFS
interfaces to make it work.

- Add warning to put_ei() if ei is not set to free. That means
something is about to free it when it shouldn't.

- Restructure the eventfs_inode to make it more compact, and remove the
unused llist field.

- Remove the fsnotify*() funtions for when the inodes were being
created in the lookup code. It doesn't make sense to notify about
creation just because something is being looked up.

- The inode hard link count was not accurate.

It was being updated when a file was looked up. The inodes of
directories were updating their parent inode hard link count every
time the inode was created. That means if memory reclaim cleaned a
stale directory inode and the inode was lookup up again, it would
increment the parent inode again as well. Al Viro said to just have
all eventfs directories have a hard link count of 1. That tells user
space not to trust it.

* tag 'trace-v6.8-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace:
eventfs: Keep all directory links at 1
eventfs: Remove fsnotify*() functions from lookup()
eventfs: Restructure eventfs_inode structure to be more condensed
eventfs: Warn if an eventfs_inode is freed without is_freed being set
tracing/timerlat: Move hrtimer_init to timerlat_fd open()
eventfs: Get rid of dentry pointers without refcounts
eventfs: Clean up dentry ops and add revalidate function
eventfs: Remove unused d_parent pointer field
tracefs: dentry lookup crapectomy
tracefs: Avoid using the ei->dentry pointer unnecessarily
eventfs: Initialize the tracefs inode properly
tracefs: Zero out the tracefs_inode when allocating it
ring-buffer: Clean ring_buffer_poll_wait() error return

+195 -526
+151 -417
fs/tracefs/event_inode.c
··· 62 62 63 63 #define EVENTFS_MODE_MASK (EVENTFS_SAVE_MODE - 1) 64 64 65 + /* 66 + * eventfs_inode reference count management. 67 + * 68 + * NOTE! We count only references from dentries, in the 69 + * form 'dentry->d_fsdata'. There are also references from 70 + * directory inodes ('ti->private'), but the dentry reference 71 + * count is always a superset of the inode reference count. 72 + */ 73 + static void release_ei(struct kref *ref) 74 + { 75 + struct eventfs_inode *ei = container_of(ref, struct eventfs_inode, kref); 76 + 77 + WARN_ON_ONCE(!ei->is_freed); 78 + 79 + kfree(ei->entry_attrs); 80 + kfree_const(ei->name); 81 + kfree_rcu(ei, rcu); 82 + } 83 + 84 + static inline void put_ei(struct eventfs_inode *ei) 85 + { 86 + if (ei) 87 + kref_put(&ei->kref, release_ei); 88 + } 89 + 90 + static inline void free_ei(struct eventfs_inode *ei) 91 + { 92 + if (ei) { 93 + ei->is_freed = 1; 94 + put_ei(ei); 95 + } 96 + } 97 + 98 + static inline struct eventfs_inode *get_ei(struct eventfs_inode *ei) 99 + { 100 + if (ei) 101 + kref_get(&ei->kref); 102 + return ei; 103 + } 104 + 65 105 static struct dentry *eventfs_root_lookup(struct inode *dir, 66 106 struct dentry *dentry, 67 107 unsigned int flags); ··· 196 156 return ret; 197 157 } 198 158 199 - static void update_top_events_attr(struct eventfs_inode *ei, struct dentry *dentry) 159 + static void update_top_events_attr(struct eventfs_inode *ei, struct super_block *sb) 200 160 { 201 - struct inode *inode; 161 + struct inode *root; 202 162 203 163 /* Only update if the "events" was on the top level */ 204 164 if (!ei || !(ei->attr.mode & EVENTFS_TOPLEVEL)) 205 165 return; 206 166 207 167 /* Get the tracefs root inode. */ 208 - inode = d_inode(dentry->d_sb->s_root); 209 - ei->attr.uid = inode->i_uid; 210 - ei->attr.gid = inode->i_gid; 168 + root = d_inode(sb->s_root); 169 + ei->attr.uid = root->i_uid; 170 + ei->attr.gid = root->i_gid; 211 171 } 212 172 213 173 static void set_top_events_ownership(struct inode *inode) 214 174 { 215 175 struct tracefs_inode *ti = get_tracefs(inode); 216 176 struct eventfs_inode *ei = ti->private; 217 - struct dentry *dentry; 218 177 219 178 /* The top events directory doesn't get automatically updated */ 220 179 if (!ei || !ei->is_events || !(ei->attr.mode & EVENTFS_TOPLEVEL)) 221 180 return; 222 181 223 - dentry = ei->dentry; 224 - 225 - update_top_events_attr(ei, dentry); 182 + update_top_events_attr(ei, inode->i_sb); 226 183 227 184 if (!(ei->attr.mode & EVENTFS_SAVE_UID)) 228 185 inode->i_uid = ei->attr.uid; ··· 270 233 { 271 234 struct eventfs_inode *ei; 272 235 273 - mutex_lock(&eventfs_mutex); 274 236 do { 275 - /* The parent always has an ei, except for events itself */ 276 - ei = dentry->d_parent->d_fsdata; 237 + // The parent is stable because we do not do renames 238 + dentry = dentry->d_parent; 239 + // ... and directories always have d_fsdata 240 + ei = dentry->d_fsdata; 277 241 278 242 /* 279 243 * If the ei is being freed, the ownership of the children ··· 284 246 ei = NULL; 285 247 break; 286 248 } 287 - 288 - dentry = ei->dentry; 249 + // Walk upwards until you find the events inode 289 250 } while (!ei->is_events); 290 - mutex_unlock(&eventfs_mutex); 291 251 292 - update_top_events_attr(ei, dentry); 252 + update_top_events_attr(ei, dentry->d_sb); 293 253 294 254 return ei; 295 255 } ··· 318 282 } 319 283 320 284 /** 321 - * create_file - create a file in the tracefs filesystem 322 - * @name: the name of the file to create. 285 + * lookup_file - look up a file in the tracefs filesystem 286 + * @dentry: the dentry to look up 323 287 * @mode: the permission that the file should have. 324 288 * @attr: saved attributes changed by user 325 - * @parent: parent dentry for this file. 326 289 * @data: something that the caller will want to get to later on. 327 290 * @fop: struct file_operations that should be used for this file. 328 291 * ··· 329 294 * directory. The inode.i_private pointer will point to @data in the open() 330 295 * call. 331 296 */ 332 - static struct dentry *create_file(const char *name, umode_t mode, 297 + static struct dentry *lookup_file(struct eventfs_inode *parent_ei, 298 + struct dentry *dentry, 299 + umode_t mode, 333 300 struct eventfs_attr *attr, 334 - struct dentry *parent, void *data, 301 + void *data, 335 302 const struct file_operations *fop) 336 303 { 337 304 struct tracefs_inode *ti; 338 - struct dentry *dentry; 339 305 struct inode *inode; 340 306 341 307 if (!(mode & S_IFMT)) 342 308 mode |= S_IFREG; 343 309 344 310 if (WARN_ON_ONCE(!S_ISREG(mode))) 345 - return NULL; 346 - 347 - WARN_ON_ONCE(!parent); 348 - dentry = eventfs_start_creating(name, parent); 349 - 350 - if (IS_ERR(dentry)) 351 - return dentry; 311 + return ERR_PTR(-EIO); 352 312 353 313 inode = tracefs_get_inode(dentry->d_sb); 354 314 if (unlikely(!inode)) 355 - return eventfs_failed_creating(dentry); 315 + return ERR_PTR(-ENOMEM); 356 316 357 317 /* If the user updated the directory's attributes, use them */ 358 318 update_inode_attr(dentry, inode, attr, mode); ··· 361 331 362 332 ti = get_tracefs(inode); 363 333 ti->flags |= TRACEFS_EVENT_INODE; 364 - d_instantiate(dentry, inode); 365 - fsnotify_create(dentry->d_parent->d_inode, dentry); 366 - return eventfs_end_creating(dentry); 334 + 335 + // Files have their parent's ei as their fsdata 336 + dentry->d_fsdata = get_ei(parent_ei); 337 + 338 + d_add(dentry, inode); 339 + return NULL; 367 340 }; 368 341 369 342 /** 370 - * create_dir - create a dir in the tracefs filesystem 343 + * lookup_dir_entry - look up a dir in the tracefs filesystem 344 + * @dentry: the directory to look up 371 345 * @ei: the eventfs_inode that represents the directory to create 372 - * @parent: parent dentry for this file. 373 346 * 374 - * This function will create a dentry for a directory represented by 347 + * This function will look up a dentry for a directory represented by 375 348 * a eventfs_inode. 376 349 */ 377 - static struct dentry *create_dir(struct eventfs_inode *ei, struct dentry *parent) 350 + static struct dentry *lookup_dir_entry(struct dentry *dentry, 351 + struct eventfs_inode *pei, struct eventfs_inode *ei) 378 352 { 379 353 struct tracefs_inode *ti; 380 - struct dentry *dentry; 381 354 struct inode *inode; 382 - 383 - dentry = eventfs_start_creating(ei->name, parent); 384 - if (IS_ERR(dentry)) 385 - return dentry; 386 355 387 356 inode = tracefs_get_inode(dentry->d_sb); 388 357 if (unlikely(!inode)) 389 - return eventfs_failed_creating(dentry); 358 + return ERR_PTR(-ENOMEM); 390 359 391 360 /* If the user updated the directory's attributes, use them */ 392 361 update_inode_attr(dentry, inode, &ei->attr, ··· 399 370 400 371 ti = get_tracefs(inode); 401 372 ti->flags |= TRACEFS_EVENT_INODE; 373 + /* Only directories have ti->private set to an ei, not files */ 374 + ti->private = ei; 402 375 403 - inc_nlink(inode); 404 - d_instantiate(dentry, inode); 405 - inc_nlink(dentry->d_parent->d_inode); 406 - fsnotify_mkdir(dentry->d_parent->d_inode, dentry); 407 - return eventfs_end_creating(dentry); 376 + dentry->d_fsdata = get_ei(ei); 377 + 378 + d_add(dentry, inode); 379 + return NULL; 408 380 } 409 381 410 - static void free_ei(struct eventfs_inode *ei) 382 + static inline struct eventfs_inode *alloc_ei(const char *name) 411 383 { 412 - kfree_const(ei->name); 413 - kfree(ei->d_children); 414 - kfree(ei->entry_attrs); 415 - kfree(ei); 384 + struct eventfs_inode *ei = kzalloc(sizeof(*ei), GFP_KERNEL); 385 + 386 + if (!ei) 387 + return NULL; 388 + 389 + ei->name = kstrdup_const(name, GFP_KERNEL); 390 + if (!ei->name) { 391 + kfree(ei); 392 + return NULL; 393 + } 394 + kref_init(&ei->kref); 395 + return ei; 416 396 } 417 397 418 398 /** 419 - * eventfs_set_ei_status_free - remove the dentry reference from an eventfs_inode 420 - * @ti: the tracefs_inode of the dentry 399 + * eventfs_d_release - dentry is going away 421 400 * @dentry: dentry which has the reference to remove. 422 401 * 423 402 * Remove the association between a dentry from an eventfs_inode. 424 403 */ 425 - void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry) 404 + void eventfs_d_release(struct dentry *dentry) 426 405 { 427 - struct eventfs_inode *ei; 428 - int i; 429 - 430 - mutex_lock(&eventfs_mutex); 431 - 432 - ei = dentry->d_fsdata; 433 - if (!ei) 434 - goto out; 435 - 436 - /* This could belong to one of the files of the ei */ 437 - if (ei->dentry != dentry) { 438 - for (i = 0; i < ei->nr_entries; i++) { 439 - if (ei->d_children[i] == dentry) 440 - break; 441 - } 442 - if (WARN_ON_ONCE(i == ei->nr_entries)) 443 - goto out; 444 - ei->d_children[i] = NULL; 445 - } else if (ei->is_freed) { 446 - free_ei(ei); 447 - } else { 448 - ei->dentry = NULL; 449 - } 450 - 451 - dentry->d_fsdata = NULL; 452 - out: 453 - mutex_unlock(&eventfs_mutex); 406 + put_ei(dentry->d_fsdata); 454 407 } 455 408 456 409 /** 457 - * create_file_dentry - create a dentry for a file of an eventfs_inode 410 + * lookup_file_dentry - create a dentry for a file of an eventfs_inode 458 411 * @ei: the eventfs_inode that the file will be created under 459 - * @idx: the index into the d_children[] of the @ei 412 + * @idx: the index into the entry_attrs[] of the @ei 460 413 * @parent: The parent dentry of the created file. 461 414 * @name: The name of the file to create 462 415 * @mode: The mode of the file. ··· 449 438 * address located at @e_dentry. 450 439 */ 451 440 static struct dentry * 452 - create_file_dentry(struct eventfs_inode *ei, int idx, 453 - struct dentry *parent, const char *name, umode_t mode, void *data, 441 + lookup_file_dentry(struct dentry *dentry, 442 + struct eventfs_inode *ei, int idx, 443 + umode_t mode, void *data, 454 444 const struct file_operations *fops) 455 445 { 456 446 struct eventfs_attr *attr = NULL; 457 - struct dentry **e_dentry = &ei->d_children[idx]; 458 - struct dentry *dentry; 459 447 460 - WARN_ON_ONCE(!inode_is_locked(parent->d_inode)); 461 - 462 - mutex_lock(&eventfs_mutex); 463 - if (ei->is_freed) { 464 - mutex_unlock(&eventfs_mutex); 465 - return NULL; 466 - } 467 - /* If the e_dentry already has a dentry, use it */ 468 - if (*e_dentry) { 469 - dget(*e_dentry); 470 - mutex_unlock(&eventfs_mutex); 471 - return *e_dentry; 472 - } 473 - 474 - /* ei->entry_attrs are protected by SRCU */ 475 448 if (ei->entry_attrs) 476 449 attr = &ei->entry_attrs[idx]; 477 450 478 - mutex_unlock(&eventfs_mutex); 479 - 480 - dentry = create_file(name, mode, attr, parent, data, fops); 481 - 482 - mutex_lock(&eventfs_mutex); 483 - 484 - if (IS_ERR_OR_NULL(dentry)) { 485 - /* 486 - * When the mutex was released, something else could have 487 - * created the dentry for this e_dentry. In which case 488 - * use that one. 489 - * 490 - * If ei->is_freed is set, the e_dentry is currently on its 491 - * way to being freed, don't return it. If e_dentry is NULL 492 - * it means it was already freed. 493 - */ 494 - if (ei->is_freed) { 495 - dentry = NULL; 496 - } else { 497 - dentry = *e_dentry; 498 - dget(dentry); 499 - } 500 - mutex_unlock(&eventfs_mutex); 501 - return dentry; 502 - } 503 - 504 - if (!*e_dentry && !ei->is_freed) { 505 - *e_dentry = dentry; 506 - dentry->d_fsdata = ei; 507 - } else { 508 - /* 509 - * Should never happen unless we get here due to being freed. 510 - * Otherwise it means two dentries exist with the same name. 511 - */ 512 - WARN_ON_ONCE(!ei->is_freed); 513 - dentry = NULL; 514 - } 515 - mutex_unlock(&eventfs_mutex); 516 - 517 - return dentry; 518 - } 519 - 520 - /** 521 - * eventfs_post_create_dir - post create dir routine 522 - * @ei: eventfs_inode of recently created dir 523 - * 524 - * Map the meta-data of files within an eventfs dir to their parent dentry 525 - */ 526 - static void eventfs_post_create_dir(struct eventfs_inode *ei) 527 - { 528 - struct eventfs_inode *ei_child; 529 - struct tracefs_inode *ti; 530 - 531 - lockdep_assert_held(&eventfs_mutex); 532 - 533 - /* srcu lock already held */ 534 - /* fill parent-child relation */ 535 - list_for_each_entry_srcu(ei_child, &ei->children, list, 536 - srcu_read_lock_held(&eventfs_srcu)) { 537 - ei_child->d_parent = ei->dentry; 538 - } 539 - 540 - ti = get_tracefs(ei->dentry->d_inode); 541 - ti->private = ei; 542 - } 543 - 544 - /** 545 - * create_dir_dentry - Create a directory dentry for the eventfs_inode 546 - * @pei: The eventfs_inode parent of ei. 547 - * @ei: The eventfs_inode to create the directory for 548 - * @parent: The dentry of the parent of this directory 549 - * 550 - * This creates and attaches a directory dentry to the eventfs_inode @ei. 551 - */ 552 - static struct dentry * 553 - create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei, 554 - struct dentry *parent) 555 - { 556 - struct dentry *dentry = NULL; 557 - 558 - WARN_ON_ONCE(!inode_is_locked(parent->d_inode)); 559 - 560 - mutex_lock(&eventfs_mutex); 561 - if (pei->is_freed || ei->is_freed) { 562 - mutex_unlock(&eventfs_mutex); 563 - return NULL; 564 - } 565 - if (ei->dentry) { 566 - /* If the eventfs_inode already has a dentry, use it */ 567 - dentry = ei->dentry; 568 - dget(dentry); 569 - mutex_unlock(&eventfs_mutex); 570 - return dentry; 571 - } 572 - mutex_unlock(&eventfs_mutex); 573 - 574 - dentry = create_dir(ei, parent); 575 - 576 - mutex_lock(&eventfs_mutex); 577 - 578 - if (IS_ERR_OR_NULL(dentry) && !ei->is_freed) { 579 - /* 580 - * When the mutex was released, something else could have 581 - * created the dentry for this e_dentry. In which case 582 - * use that one. 583 - * 584 - * If ei->is_freed is set, the e_dentry is currently on its 585 - * way to being freed. 586 - */ 587 - dentry = ei->dentry; 588 - if (dentry) 589 - dget(dentry); 590 - mutex_unlock(&eventfs_mutex); 591 - return dentry; 592 - } 593 - 594 - if (!ei->dentry && !ei->is_freed) { 595 - ei->dentry = dentry; 596 - eventfs_post_create_dir(ei); 597 - dentry->d_fsdata = ei; 598 - } else { 599 - /* 600 - * Should never happen unless we get here due to being freed. 601 - * Otherwise it means two dentries exist with the same name. 602 - */ 603 - WARN_ON_ONCE(!ei->is_freed); 604 - dentry = NULL; 605 - } 606 - mutex_unlock(&eventfs_mutex); 607 - 608 - return dentry; 451 + return lookup_file(ei, dentry, mode, attr, data, fops); 609 452 } 610 453 611 454 /** ··· 476 611 struct dentry *dentry, 477 612 unsigned int flags) 478 613 { 479 - const struct file_operations *fops; 480 - const struct eventfs_entry *entry; 481 614 struct eventfs_inode *ei_child; 482 615 struct tracefs_inode *ti; 483 616 struct eventfs_inode *ei; 484 - struct dentry *ei_dentry = NULL; 485 - struct dentry *ret = NULL; 486 - struct dentry *d; 487 617 const char *name = dentry->d_name.name; 488 - umode_t mode; 489 - void *data; 490 - int idx; 491 - int i; 492 - int r; 618 + struct dentry *result = NULL; 493 619 494 620 ti = get_tracefs(dir); 495 621 if (!(ti->flags & TRACEFS_EVENT_INODE)) 496 - return NULL; 622 + return ERR_PTR(-EIO); 497 623 498 - /* Grab srcu to prevent the ei from going away */ 499 - idx = srcu_read_lock(&eventfs_srcu); 500 - 501 - /* 502 - * Grab the eventfs_mutex to consistent value from ti->private. 503 - * This s 504 - */ 505 624 mutex_lock(&eventfs_mutex); 506 - ei = READ_ONCE(ti->private); 507 - if (ei && !ei->is_freed) 508 - ei_dentry = READ_ONCE(ei->dentry); 509 - mutex_unlock(&eventfs_mutex); 510 625 511 - if (!ei || !ei_dentry) 626 + ei = ti->private; 627 + if (!ei || ei->is_freed) 512 628 goto out; 513 629 514 - data = ei->data; 515 - 516 - list_for_each_entry_srcu(ei_child, &ei->children, list, 517 - srcu_read_lock_held(&eventfs_srcu)) { 630 + list_for_each_entry(ei_child, &ei->children, list) { 518 631 if (strcmp(ei_child->name, name) != 0) 519 632 continue; 520 - ret = simple_lookup(dir, dentry, flags); 521 - if (IS_ERR(ret)) 633 + if (ei_child->is_freed) 522 634 goto out; 523 - d = create_dir_dentry(ei, ei_child, ei_dentry); 524 - dput(d); 635 + result = lookup_dir_entry(dentry, ei, ei_child); 525 636 goto out; 526 637 } 527 638 528 - for (i = 0; i < ei->nr_entries; i++) { 529 - entry = &ei->entries[i]; 530 - if (strcmp(name, entry->name) == 0) { 531 - void *cdata = data; 532 - mutex_lock(&eventfs_mutex); 533 - /* If ei->is_freed, then the event itself may be too */ 534 - if (!ei->is_freed) 535 - r = entry->callback(name, &mode, &cdata, &fops); 536 - else 537 - r = -1; 538 - mutex_unlock(&eventfs_mutex); 539 - if (r <= 0) 540 - continue; 541 - ret = simple_lookup(dir, dentry, flags); 542 - if (IS_ERR(ret)) 543 - goto out; 544 - d = create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops); 545 - dput(d); 546 - break; 547 - } 639 + for (int i = 0; i < ei->nr_entries; i++) { 640 + void *data; 641 + umode_t mode; 642 + const struct file_operations *fops; 643 + const struct eventfs_entry *entry = &ei->entries[i]; 644 + 645 + if (strcmp(name, entry->name) != 0) 646 + continue; 647 + 648 + data = ei->data; 649 + if (entry->callback(name, &mode, &data, &fops) <= 0) 650 + goto out; 651 + 652 + result = lookup_file_dentry(dentry, ei, i, mode, data, fops); 653 + goto out; 548 654 } 549 655 out: 550 - srcu_read_unlock(&eventfs_srcu, idx); 551 - return ret; 656 + mutex_unlock(&eventfs_mutex); 657 + return result; 552 658 } 553 659 554 660 /* ··· 669 833 if (!parent) 670 834 return ERR_PTR(-EINVAL); 671 835 672 - ei = kzalloc(sizeof(*ei), GFP_KERNEL); 836 + ei = alloc_ei(name); 673 837 if (!ei) 674 838 return ERR_PTR(-ENOMEM); 675 - 676 - ei->name = kstrdup_const(name, GFP_KERNEL); 677 - if (!ei->name) { 678 - kfree(ei); 679 - return ERR_PTR(-ENOMEM); 680 - } 681 - 682 - if (size) { 683 - ei->d_children = kcalloc(size, sizeof(*ei->d_children), GFP_KERNEL); 684 - if (!ei->d_children) { 685 - kfree_const(ei->name); 686 - kfree(ei); 687 - return ERR_PTR(-ENOMEM); 688 - } 689 - } 690 839 691 840 ei->entries = entries; 692 841 ei->nr_entries = size; ··· 680 859 INIT_LIST_HEAD(&ei->list); 681 860 682 861 mutex_lock(&eventfs_mutex); 683 - if (!parent->is_freed) { 862 + if (!parent->is_freed) 684 863 list_add_tail(&ei->list, &parent->children); 685 - ei->d_parent = parent->dentry; 686 - } 687 864 mutex_unlock(&eventfs_mutex); 688 865 689 866 /* Was the parent freed? */ ··· 721 902 if (IS_ERR(dentry)) 722 903 return ERR_CAST(dentry); 723 904 724 - ei = kzalloc(sizeof(*ei), GFP_KERNEL); 905 + ei = alloc_ei(name); 725 906 if (!ei) 726 - goto fail_ei; 907 + goto fail; 727 908 728 909 inode = tracefs_get_inode(dentry->d_sb); 729 910 if (unlikely(!inode)) 730 911 goto fail; 731 912 732 - if (size) { 733 - ei->d_children = kcalloc(size, sizeof(*ei->d_children), GFP_KERNEL); 734 - if (!ei->d_children) 735 - goto fail; 736 - } 737 - 738 - ei->dentry = dentry; 913 + // Note: we have a ref to the dentry from tracefs_start_creating() 914 + ei->events_dir = dentry; 739 915 ei->entries = entries; 740 916 ei->nr_entries = size; 741 917 ei->is_events = 1; 742 918 ei->data = data; 743 - ei->name = kstrdup_const(name, GFP_KERNEL); 744 - if (!ei->name) 745 - goto fail; 746 919 747 920 /* Save the ownership of this directory */ 748 921 uid = d_inode(dentry->d_parent)->i_uid; ··· 765 954 inode->i_op = &eventfs_root_dir_inode_operations; 766 955 inode->i_fop = &eventfs_file_operations; 767 956 768 - dentry->d_fsdata = ei; 957 + dentry->d_fsdata = get_ei(ei); 769 958 770 - /* directory inodes start off with i_nlink == 2 (for "." entry) */ 771 - inc_nlink(inode); 959 + /* 960 + * Keep all eventfs directories with i_nlink == 1. 961 + * Due to the dynamic nature of the dentry creations and not 962 + * wanting to add a pointer to the parent eventfs_inode in the 963 + * eventfs_inode structure, keeping the i_nlink in sync with the 964 + * number of directories would cause too much complexity for 965 + * something not worth much. Keeping directory links at 1 966 + * tells userspace not to trust the link number. 967 + */ 772 968 d_instantiate(dentry, inode); 969 + /* The dentry of the "events" parent does keep track though */ 773 970 inc_nlink(dentry->d_parent->d_inode); 774 971 fsnotify_mkdir(dentry->d_parent->d_inode, dentry); 775 972 tracefs_end_creating(dentry); ··· 785 966 return ei; 786 967 787 968 fail: 788 - kfree(ei->d_children); 789 - kfree(ei); 790 - fail_ei: 969 + free_ei(ei); 791 970 tracefs_failed_creating(dentry); 792 971 return ERR_PTR(-ENOMEM); 793 - } 794 - 795 - static LLIST_HEAD(free_list); 796 - 797 - static void eventfs_workfn(struct work_struct *work) 798 - { 799 - struct eventfs_inode *ei, *tmp; 800 - struct llist_node *llnode; 801 - 802 - llnode = llist_del_all(&free_list); 803 - llist_for_each_entry_safe(ei, tmp, llnode, llist) { 804 - /* This dput() matches the dget() from unhook_dentry() */ 805 - for (int i = 0; i < ei->nr_entries; i++) { 806 - if (ei->d_children[i]) 807 - dput(ei->d_children[i]); 808 - } 809 - /* This should only get here if it had a dentry */ 810 - if (!WARN_ON_ONCE(!ei->dentry)) 811 - dput(ei->dentry); 812 - } 813 - } 814 - 815 - static DECLARE_WORK(eventfs_work, eventfs_workfn); 816 - 817 - static void free_rcu_ei(struct rcu_head *head) 818 - { 819 - struct eventfs_inode *ei = container_of(head, struct eventfs_inode, rcu); 820 - 821 - if (ei->dentry) { 822 - /* Do not free the ei until all references of dentry are gone */ 823 - if (llist_add(&ei->llist, &free_list)) 824 - queue_work(system_unbound_wq, &eventfs_work); 825 - return; 826 - } 827 - 828 - /* If the ei doesn't have a dentry, neither should its children */ 829 - for (int i = 0; i < ei->nr_entries; i++) { 830 - WARN_ON_ONCE(ei->d_children[i]); 831 - } 832 - 833 - free_ei(ei); 834 - } 835 - 836 - static void unhook_dentry(struct dentry *dentry) 837 - { 838 - if (!dentry) 839 - return; 840 - /* 841 - * Need to add a reference to the dentry that is expected by 842 - * simple_recursive_removal(), which will include a dput(). 843 - */ 844 - dget(dentry); 845 - 846 - /* 847 - * Also add a reference for the dput() in eventfs_workfn(). 848 - * That is required as that dput() will free the ei after 849 - * the SRCU grace period is over. 850 - */ 851 - dget(dentry); 852 972 } 853 973 854 974 /** ··· 802 1044 { 803 1045 struct eventfs_inode *ei_child; 804 1046 805 - if (!ei) 806 - return; 807 1047 /* 808 1048 * Check recursion depth. It should never be greater than 3: 809 1049 * 0 - events/ ··· 813 1057 return; 814 1058 815 1059 /* search for nested folders or files */ 816 - list_for_each_entry_srcu(ei_child, &ei->children, list, 817 - lockdep_is_held(&eventfs_mutex)) { 818 - /* Children only have dentry if parent does */ 819 - WARN_ON_ONCE(ei_child->dentry && !ei->dentry); 1060 + list_for_each_entry(ei_child, &ei->children, list) 820 1061 eventfs_remove_rec(ei_child, level + 1); 821 - } 822 1062 823 - 824 - ei->is_freed = 1; 825 - 826 - for (int i = 0; i < ei->nr_entries; i++) { 827 - if (ei->d_children[i]) { 828 - /* Children only have dentry if parent does */ 829 - WARN_ON_ONCE(!ei->dentry); 830 - unhook_dentry(ei->d_children[i]); 831 - } 832 - } 833 - 834 - unhook_dentry(ei->dentry); 835 - 836 - list_del_rcu(&ei->list); 837 - call_srcu(&eventfs_srcu, &ei->rcu, free_rcu_ei); 1063 + list_del(&ei->list); 1064 + free_ei(ei); 838 1065 } 839 1066 840 1067 /** ··· 828 1089 */ 829 1090 void eventfs_remove_dir(struct eventfs_inode *ei) 830 1091 { 831 - struct dentry *dentry; 832 - 833 1092 if (!ei) 834 1093 return; 835 1094 836 1095 mutex_lock(&eventfs_mutex); 837 - dentry = ei->dentry; 838 1096 eventfs_remove_rec(ei, 0); 839 1097 mutex_unlock(&eventfs_mutex); 840 - 841 - /* 842 - * If any of the ei children has a dentry, then the ei itself 843 - * must have a dentry. 844 - */ 845 - if (dentry) 846 - simple_recursive_removal(dentry, NULL); 847 1098 } 848 1099 849 1100 /** ··· 846 1117 { 847 1118 struct dentry *dentry; 848 1119 849 - dentry = ei->dentry; 1120 + dentry = ei->events_dir; 1121 + if (!dentry) 1122 + return; 1123 + 1124 + ei->events_dir = NULL; 850 1125 eventfs_remove_dir(ei); 851 1126 852 1127 /* ··· 860 1127 * sticks around while the other ei->dentry are created 861 1128 * and destroyed dynamically. 862 1129 */ 1130 + d_invalidate(dentry); 863 1131 dput(dentry); 864 1132 }
+22 -80
fs/tracefs/inode.c
··· 38 38 if (!ti) 39 39 return NULL; 40 40 41 - ti->flags = 0; 42 - 43 41 return &ti->vfs_inode; 44 42 } 45 43 ··· 377 379 .show_options = tracefs_show_options, 378 380 }; 379 381 380 - static void tracefs_dentry_iput(struct dentry *dentry, struct inode *inode) 382 + /* 383 + * It would be cleaner if eventfs had its own dentry ops. 384 + * 385 + * Note that d_revalidate is called potentially under RCU, 386 + * so it can't take the eventfs mutex etc. It's fine - if 387 + * we open a file just as it's marked dead, things will 388 + * still work just fine, and just see the old stale case. 389 + */ 390 + static void tracefs_d_release(struct dentry *dentry) 381 391 { 382 - struct tracefs_inode *ti; 392 + if (dentry->d_fsdata) 393 + eventfs_d_release(dentry); 394 + } 383 395 384 - if (!dentry || !inode) 385 - return; 396 + static int tracefs_d_revalidate(struct dentry *dentry, unsigned int flags) 397 + { 398 + struct eventfs_inode *ei = dentry->d_fsdata; 386 399 387 - ti = get_tracefs(inode); 388 - if (ti && ti->flags & TRACEFS_EVENT_INODE) 389 - eventfs_set_ei_status_free(ti, dentry); 390 - iput(inode); 400 + return !(ei && ei->is_freed); 391 401 } 392 402 393 403 static const struct dentry_operations tracefs_dentry_operations = { 394 - .d_iput = tracefs_dentry_iput, 404 + .d_revalidate = tracefs_d_revalidate, 405 + .d_release = tracefs_d_release, 395 406 }; 396 407 397 408 static int trace_fill_super(struct super_block *sb, void *data, int silent) ··· 501 494 struct dentry *tracefs_end_creating(struct dentry *dentry) 502 495 { 503 496 inode_unlock(d_inode(dentry->d_parent)); 504 - return dentry; 505 - } 506 - 507 - /** 508 - * eventfs_start_creating - start the process of creating a dentry 509 - * @name: Name of the file created for the dentry 510 - * @parent: The parent dentry where this dentry will be created 511 - * 512 - * This is a simple helper function for the dynamically created eventfs 513 - * files. When the directory of the eventfs files are accessed, their 514 - * dentries are created on the fly. This function is used to start that 515 - * process. 516 - */ 517 - struct dentry *eventfs_start_creating(const char *name, struct dentry *parent) 518 - { 519 - struct dentry *dentry; 520 - int error; 521 - 522 - /* Must always have a parent. */ 523 - if (WARN_ON_ONCE(!parent)) 524 - return ERR_PTR(-EINVAL); 525 - 526 - error = simple_pin_fs(&trace_fs_type, &tracefs_mount, 527 - &tracefs_mount_count); 528 - if (error) 529 - return ERR_PTR(error); 530 - 531 - if (unlikely(IS_DEADDIR(parent->d_inode))) 532 - dentry = ERR_PTR(-ENOENT); 533 - else 534 - dentry = lookup_one_len(name, parent, strlen(name)); 535 - 536 - if (!IS_ERR(dentry) && dentry->d_inode) { 537 - dput(dentry); 538 - dentry = ERR_PTR(-EEXIST); 539 - } 540 - 541 - if (IS_ERR(dentry)) 542 - simple_release_fs(&tracefs_mount, &tracefs_mount_count); 543 - 544 - return dentry; 545 - } 546 - 547 - /** 548 - * eventfs_failed_creating - clean up a failed eventfs dentry creation 549 - * @dentry: The dentry to clean up 550 - * 551 - * If after calling eventfs_start_creating(), a failure is detected, the 552 - * resources created by eventfs_start_creating() needs to be cleaned up. In 553 - * that case, this function should be called to perform that clean up. 554 - */ 555 - struct dentry *eventfs_failed_creating(struct dentry *dentry) 556 - { 557 - dput(dentry); 558 - simple_release_fs(&tracefs_mount, &tracefs_mount_count); 559 - return NULL; 560 - } 561 - 562 - /** 563 - * eventfs_end_creating - Finish the process of creating a eventfs dentry 564 - * @dentry: The dentry that has successfully been created. 565 - * 566 - * This function is currently just a place holder to match 567 - * eventfs_start_creating(). In case any synchronization needs to be added, 568 - * this function will be used to implement that without having to modify 569 - * the callers of eventfs_start_creating(). 570 - */ 571 - struct dentry *eventfs_end_creating(struct dentry *dentry) 572 - { 573 497 return dentry; 574 498 } 575 499 ··· 717 779 { 718 780 struct tracefs_inode *ti = (struct tracefs_inode *) foo; 719 781 782 + /* inode_init_once() calls memset() on the vfs_inode portion */ 720 783 inode_init_once(&ti->vfs_inode); 784 + 785 + /* Zero out the rest */ 786 + memset_after(ti, 0, vfs_inode); 721 787 } 722 788 723 789 static int __init tracefs_init(void)
+18 -25
fs/tracefs/internal.h
··· 11 11 }; 12 12 13 13 struct tracefs_inode { 14 + struct inode vfs_inode; 15 + /* The below gets initialized with memset_after(ti, 0, vfs_inode) */ 14 16 unsigned long flags; 15 17 void *private; 16 - struct inode vfs_inode; 17 18 }; 18 19 19 20 /* ··· 32 31 /* 33 32 * struct eventfs_inode - hold the properties of the eventfs directories. 34 33 * @list: link list into the parent directory 34 + * @rcu: Union with @list for freeing 35 + * @children: link list into the child eventfs_inode 35 36 * @entries: the array of entries representing the files in the directory 36 37 * @name: the name of the directory to create 37 - * @children: link list into the child eventfs_inode 38 - * @dentry: the dentry of the directory 39 - * @d_parent: pointer to the parent's dentry 40 - * @d_children: The array of dentries to represent the files when created 38 + * @events_dir: the dentry of the events directory 41 39 * @entry_attrs: Saved mode and ownership of the @d_children 42 - * @attr: Saved mode and ownership of eventfs_inode itself 43 40 * @data: The private data to pass to the callbacks 41 + * @attr: Saved mode and ownership of eventfs_inode itself 44 42 * @is_freed: Flag set if the eventfs is on its way to be freed 45 43 * Note if is_freed is set, then dentry is corrupted. 44 + * @is_events: Flag set for only the top level "events" directory 46 45 * @nr_entries: The number of items in @entries 46 + * @ino: The saved inode number 47 47 */ 48 48 struct eventfs_inode { 49 - struct list_head list; 49 + union { 50 + struct list_head list; 51 + struct rcu_head rcu; 52 + }; 53 + struct list_head children; 50 54 const struct eventfs_entry *entries; 51 55 const char *name; 52 - struct list_head children; 53 - struct dentry *dentry; /* Check is_freed to access */ 54 - struct dentry *d_parent; 55 - struct dentry **d_children; 56 + struct dentry *events_dir; 56 57 struct eventfs_attr *entry_attrs; 57 - struct eventfs_attr attr; 58 58 void *data; 59 + struct eventfs_attr attr; 60 + struct kref kref; 59 61 unsigned int is_freed:1; 60 62 unsigned int is_events:1; 61 63 unsigned int nr_entries:30; 62 64 unsigned int ino; 63 - /* 64 - * Union - used for deletion 65 - * @llist: for calling dput() if needed after RCU 66 - * @rcu: eventfs_inode to delete in RCU 67 - */ 68 - union { 69 - struct llist_node llist; 70 - struct rcu_head rcu; 71 - }; 72 65 }; 73 66 74 67 static inline struct tracefs_inode *get_tracefs(const struct inode *inode) ··· 74 79 struct dentry *tracefs_end_creating(struct dentry *dentry); 75 80 struct dentry *tracefs_failed_creating(struct dentry *dentry); 76 81 struct inode *tracefs_get_inode(struct super_block *sb); 77 - struct dentry *eventfs_start_creating(const char *name, struct dentry *parent); 78 - struct dentry *eventfs_failed_creating(struct dentry *dentry); 79 - struct dentry *eventfs_end_creating(struct dentry *dentry); 80 - void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry); 82 + 83 + void eventfs_d_release(struct dentry *dentry); 81 84 82 85 #endif /* _TRACEFS_INTERNAL_H */
+1 -1
kernel/trace/ring_buffer.c
··· 944 944 full = 0; 945 945 } else { 946 946 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 947 - return -EINVAL; 947 + return EPOLLERR; 948 948 949 949 cpu_buffer = buffer->buffers[cpu]; 950 950 work = &cpu_buffer->irq_work;
+3 -3
kernel/trace/trace_osnoise.c
··· 2444 2444 tlat = this_cpu_tmr_var(); 2445 2445 tlat->count = 0; 2446 2446 2447 + hrtimer_init(&tlat->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); 2448 + tlat->timer.function = timerlat_irq; 2449 + 2447 2450 migrate_enable(); 2448 2451 return 0; 2449 2452 }; ··· 2528 2525 } else { 2529 2526 tlat->tracing_thread = false; 2530 2527 tlat->kthread = current; 2531 - 2532 - hrtimer_init(&tlat->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); 2533 - tlat->timer.function = timerlat_irq; 2534 2528 2535 2529 /* Annotate now to drift new period */ 2536 2530 tlat->abs_period = hrtimer_cb_get_time(&tlat->timer);