Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'for-linus-2' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull vfs dcache livelock fix from Al Viro:
"Fixes for livelocks in shrink_dentry_list() introduced by fixes to
shrink list corruption; the root cause was that trylock of parent's
->d_lock could be disrupted by d_walk() happening on other CPUs,
resulting in shrink_dentry_list() making no progress *and* the same
d_walk() being called again and again for as long as
shrink_dentry_list() doesn't get past that mess.

The solution is to have shrink_dentry_list() treat that trylock
failure not as 'try to do the same thing again', but 'lock them in the
right order'"

* 'for-linus-2' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
dentry_kill() doesn't need the second argument now
dealing with the rest of shrink_dentry_list() livelock
shrink_dentry_list(): take parent's ->d_lock earlier
expand dentry_kill(dentry, 0) in shrink_dentry_list()
split dentry_kill()
lift the "already marked killed" case into shrink_dentry_list()

+108 -47
+108 -47
fs/dcache.c
··· 441 441 } 442 442 EXPORT_SYMBOL(d_drop); 443 443 444 - /* 445 - * Finish off a dentry we've decided to kill. 446 - * dentry->d_lock must be held, returns with it unlocked. 447 - * If ref is non-zero, then decrement the refcount too. 448 - * Returns dentry requiring refcount drop, or NULL if we're done. 449 - */ 450 - static struct dentry * 451 - dentry_kill(struct dentry *dentry, int unlock_on_failure) 452 - __releases(dentry->d_lock) 444 + static void __dentry_kill(struct dentry *dentry) 453 445 { 454 - struct inode *inode; 455 446 struct dentry *parent = NULL; 456 447 bool can_free = true; 457 - 458 - if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) { 459 - can_free = dentry->d_flags & DCACHE_MAY_FREE; 460 - spin_unlock(&dentry->d_lock); 461 - goto out; 462 - } 463 - 464 - inode = dentry->d_inode; 465 - if (inode && !spin_trylock(&inode->i_lock)) { 466 - relock: 467 - if (unlock_on_failure) { 468 - spin_unlock(&dentry->d_lock); 469 - cpu_relax(); 470 - } 471 - return dentry; /* try again with same dentry */ 472 - } 473 448 if (!IS_ROOT(dentry)) 474 449 parent = dentry->d_parent; 475 - if (parent && !spin_trylock(&parent->d_lock)) { 476 - if (inode) 477 - spin_unlock(&inode->i_lock); 478 - goto relock; 479 - } 480 450 481 451 /* 482 452 * The dentry is now unrecoverably dead to the world. ··· 490 520 can_free = false; 491 521 } 492 522 spin_unlock(&dentry->d_lock); 493 - out: 494 523 if (likely(can_free)) 495 524 dentry_free(dentry); 525 + } 526 + 527 + /* 528 + * Finish off a dentry we've decided to kill. 529 + * dentry->d_lock must be held, returns with it unlocked. 530 + * If ref is non-zero, then decrement the refcount too. 531 + * Returns dentry requiring refcount drop, or NULL if we're done. 532 + */ 533 + static struct dentry *dentry_kill(struct dentry *dentry) 534 + __releases(dentry->d_lock) 535 + { 536 + struct inode *inode = dentry->d_inode; 537 + struct dentry *parent = NULL; 538 + 539 + if (inode && unlikely(!spin_trylock(&inode->i_lock))) 540 + goto failed; 541 + 542 + if (!IS_ROOT(dentry)) { 543 + parent = dentry->d_parent; 544 + if (unlikely(!spin_trylock(&parent->d_lock))) { 545 + if (inode) 546 + spin_unlock(&inode->i_lock); 547 + goto failed; 548 + } 549 + } 550 + 551 + __dentry_kill(dentry); 552 + return parent; 553 + 554 + failed: 555 + spin_unlock(&dentry->d_lock); 556 + cpu_relax(); 557 + return dentry; /* try again with same dentry */ 558 + } 559 + 560 + static inline struct dentry *lock_parent(struct dentry *dentry) 561 + { 562 + struct dentry *parent = dentry->d_parent; 563 + if (IS_ROOT(dentry)) 564 + return NULL; 565 + if (likely(spin_trylock(&parent->d_lock))) 566 + return parent; 567 + spin_unlock(&dentry->d_lock); 568 + rcu_read_lock(); 569 + again: 570 + parent = ACCESS_ONCE(dentry->d_parent); 571 + spin_lock(&parent->d_lock); 572 + /* 573 + * We can't blindly lock dentry until we are sure 574 + * that we won't violate the locking order. 575 + * Any changes of dentry->d_parent must have 576 + * been done with parent->d_lock held, so 577 + * spin_lock() above is enough of a barrier 578 + * for checking if it's still our child. 579 + */ 580 + if (unlikely(parent != dentry->d_parent)) { 581 + spin_unlock(&parent->d_lock); 582 + goto again; 583 + } 584 + rcu_read_unlock(); 585 + if (parent != dentry) 586 + spin_lock(&dentry->d_lock); 587 + else 588 + parent = NULL; 496 589 return parent; 497 590 } 498 591 ··· 612 579 return; 613 580 614 581 kill_it: 615 - dentry = dentry_kill(dentry, 1); 582 + dentry = dentry_kill(dentry); 616 583 if (dentry) 617 584 goto repeat; 618 585 } ··· 830 797 struct dentry *dentry, *parent; 831 798 832 799 while (!list_empty(list)) { 800 + struct inode *inode; 833 801 dentry = list_entry(list->prev, struct dentry, d_lru); 834 802 spin_lock(&dentry->d_lock); 803 + parent = lock_parent(dentry); 804 + 835 805 /* 836 806 * The dispose list is isolated and dentries are not accounted 837 807 * to the LRU here, so we can simply remove it from the list ··· 848 812 */ 849 813 if ((int)dentry->d_lockref.count > 0) { 850 814 spin_unlock(&dentry->d_lock); 815 + if (parent) 816 + spin_unlock(&parent->d_lock); 851 817 continue; 852 818 } 853 819 854 - parent = dentry_kill(dentry, 0); 855 - /* 856 - * If dentry_kill returns NULL, we have nothing more to do. 857 - */ 858 - if (!parent) 859 - continue; 860 820 861 - if (unlikely(parent == dentry)) { 862 - /* 863 - * trylocks have failed and d_lock has been held the 864 - * whole time, so it could not have been added to any 865 - * other lists. Just add it back to the shrink list. 866 - */ 821 + if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) { 822 + bool can_free = dentry->d_flags & DCACHE_MAY_FREE; 823 + spin_unlock(&dentry->d_lock); 824 + if (parent) 825 + spin_unlock(&parent->d_lock); 826 + if (can_free) 827 + dentry_free(dentry); 828 + continue; 829 + } 830 + 831 + inode = dentry->d_inode; 832 + if (inode && unlikely(!spin_trylock(&inode->i_lock))) { 867 833 d_shrink_add(dentry, list); 868 834 spin_unlock(&dentry->d_lock); 835 + if (parent) 836 + spin_unlock(&parent->d_lock); 869 837 continue; 870 838 } 839 + 840 + __dentry_kill(dentry); 841 + 871 842 /* 872 843 * We need to prune ancestors too. This is necessary to prevent 873 844 * quadratic behavior of shrink_dcache_parent(), but is also ··· 882 839 * fragmentation. 883 840 */ 884 841 dentry = parent; 885 - while (dentry && !lockref_put_or_lock(&dentry->d_lockref)) 886 - dentry = dentry_kill(dentry, 1); 842 + while (dentry && !lockref_put_or_lock(&dentry->d_lockref)) { 843 + parent = lock_parent(dentry); 844 + if (dentry->d_lockref.count != 1) { 845 + dentry->d_lockref.count--; 846 + spin_unlock(&dentry->d_lock); 847 + if (parent) 848 + spin_unlock(&parent->d_lock); 849 + break; 850 + } 851 + inode = dentry->d_inode; /* can't be NULL */ 852 + if (unlikely(!spin_trylock(&inode->i_lock))) { 853 + spin_unlock(&dentry->d_lock); 854 + if (parent) 855 + spin_unlock(&parent->d_lock); 856 + cpu_relax(); 857 + continue; 858 + } 859 + __dentry_kill(dentry); 860 + dentry = parent; 861 + } 887 862 } 888 863 } 889 864