Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull vfs fixes from Al Viro:
"dcache fixes + kvfree() (uninlined, exported by mm/util.c) + posix_acl
bugfix from hch"

The dcache fixes are for a subtle LRU list corruption bug reported by
Miklos Szeredi, where people inside IBM saw list corruptions with the
LTP/host01 test.

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
nick kvfree() from apparmor
posix_acl: handle NULL ACL in posix_acl_equiv_mode
dcache: don't need rcu in shrink_dentry_list()
more graceful recovery in umount_collect()
don't remove from shrink list in select_collect()
dentry_kill(): don't try to remove from shrink list
expand the call of dentry_lru_del() in dentry_kill()
new helper: dentry_free()
fold try_prune_one_dentry()
fold d_kill() and d_free()
fix races between __d_instantiate() and checks of dentry flags

+125 -234
+102 -216
fs/dcache.c
··· 246 246 kmem_cache_free(dentry_cache, dentry); 247 247 } 248 248 249 - /* 250 - * no locks, please. 251 - */ 252 - static void d_free(struct dentry *dentry) 249 + static void dentry_free(struct dentry *dentry) 253 250 { 254 - BUG_ON((int)dentry->d_lockref.count > 0); 255 - this_cpu_dec(nr_dentry); 256 - if (dentry->d_op && dentry->d_op->d_release) 257 - dentry->d_op->d_release(dentry); 258 - 259 251 /* if dentry was never visible to RCU, immediate free is OK */ 260 252 if (!(dentry->d_flags & DCACHE_RCUACCESS)) 261 253 __d_free(&dentry->d_u.d_rcu); ··· 395 403 d_lru_add(dentry); 396 404 } 397 405 398 - /* 399 - * Remove a dentry with references from the LRU. 400 - * 401 - * If we are on the shrink list, then we can get to try_prune_one_dentry() and 402 - * lose our last reference through the parent walk. In this case, we need to 403 - * remove ourselves from the shrink list, not the LRU. 404 - */ 405 - static void dentry_lru_del(struct dentry *dentry) 406 - { 407 - if (dentry->d_flags & DCACHE_LRU_LIST) { 408 - if (dentry->d_flags & DCACHE_SHRINK_LIST) 409 - return d_shrink_del(dentry); 410 - d_lru_del(dentry); 411 - } 412 - } 413 - 414 - /** 415 - * d_kill - kill dentry and return parent 416 - * @dentry: dentry to kill 417 - * @parent: parent dentry 418 - * 419 - * The dentry must already be unhashed and removed from the LRU. 420 - * 421 - * If this is the root of the dentry tree, return NULL. 422 - * 423 - * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by 424 - * d_kill. 425 - */ 426 - static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) 427 - __releases(dentry->d_lock) 428 - __releases(parent->d_lock) 429 - __releases(dentry->d_inode->i_lock) 430 - { 431 - list_del(&dentry->d_u.d_child); 432 - /* 433 - * Inform d_walk() that we are no longer attached to the 434 - * dentry tree 435 - */ 436 - dentry->d_flags |= DCACHE_DENTRY_KILLED; 437 - if (parent) 438 - spin_unlock(&parent->d_lock); 439 - dentry_iput(dentry); 440 - /* 441 - * dentry_iput drops the locks, at which point nobody (except 442 - * transient RCU lookups) can reach this dentry. 443 - */ 444 - d_free(dentry); 445 - return parent; 446 - } 447 - 448 406 /** 449 407 * d_drop - drop a dentry 450 408 * @dentry: dentry to drop ··· 452 510 __releases(dentry->d_lock) 453 511 { 454 512 struct inode *inode; 455 - struct dentry *parent; 513 + struct dentry *parent = NULL; 514 + bool can_free = true; 515 + 516 + if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) { 517 + can_free = dentry->d_flags & DCACHE_MAY_FREE; 518 + spin_unlock(&dentry->d_lock); 519 + goto out; 520 + } 456 521 457 522 inode = dentry->d_inode; 458 523 if (inode && !spin_trylock(&inode->i_lock)) { ··· 470 521 } 471 522 return dentry; /* try again with same dentry */ 472 523 } 473 - if (IS_ROOT(dentry)) 474 - parent = NULL; 475 - else 524 + if (!IS_ROOT(dentry)) 476 525 parent = dentry->d_parent; 477 526 if (parent && !spin_trylock(&parent->d_lock)) { 478 527 if (inode) ··· 490 543 if ((dentry->d_flags & DCACHE_OP_PRUNE) && !d_unhashed(dentry)) 491 544 dentry->d_op->d_prune(dentry); 492 545 493 - dentry_lru_del(dentry); 546 + if (dentry->d_flags & DCACHE_LRU_LIST) { 547 + if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) 548 + d_lru_del(dentry); 549 + } 494 550 /* if it was on the hash then remove it */ 495 551 __d_drop(dentry); 496 - return d_kill(dentry, parent); 552 + list_del(&dentry->d_u.d_child); 553 + /* 554 + * Inform d_walk() that we are no longer attached to the 555 + * dentry tree 556 + */ 557 + dentry->d_flags |= DCACHE_DENTRY_KILLED; 558 + if (parent) 559 + spin_unlock(&parent->d_lock); 560 + dentry_iput(dentry); 561 + /* 562 + * dentry_iput drops the locks, at which point nobody (except 563 + * transient RCU lookups) can reach this dentry. 564 + */ 565 + BUG_ON((int)dentry->d_lockref.count > 0); 566 + this_cpu_dec(nr_dentry); 567 + if (dentry->d_op && dentry->d_op->d_release) 568 + dentry->d_op->d_release(dentry); 569 + 570 + spin_lock(&dentry->d_lock); 571 + if (dentry->d_flags & DCACHE_SHRINK_LIST) { 572 + dentry->d_flags |= DCACHE_MAY_FREE; 573 + can_free = false; 574 + } 575 + spin_unlock(&dentry->d_lock); 576 + out: 577 + if (likely(can_free)) 578 + dentry_free(dentry); 579 + return parent; 497 580 } 498 581 499 582 /* ··· 792 815 } 793 816 EXPORT_SYMBOL(d_prune_aliases); 794 817 795 - /* 796 - * Try to throw away a dentry - free the inode, dput the parent. 797 - * Requires dentry->d_lock is held, and dentry->d_count == 0. 798 - * Releases dentry->d_lock. 799 - * 800 - * This may fail if locks cannot be acquired no problem, just try again. 801 - */ 802 - static struct dentry * try_prune_one_dentry(struct dentry *dentry) 803 - __releases(dentry->d_lock) 804 - { 805 - struct dentry *parent; 806 - 807 - parent = dentry_kill(dentry, 0); 808 - /* 809 - * If dentry_kill returns NULL, we have nothing more to do. 810 - * if it returns the same dentry, trylocks failed. In either 811 - * case, just loop again. 812 - * 813 - * Otherwise, we need to prune ancestors too. This is necessary 814 - * to prevent quadratic behavior of shrink_dcache_parent(), but 815 - * is also expected to be beneficial in reducing dentry cache 816 - * fragmentation. 817 - */ 818 - if (!parent) 819 - return NULL; 820 - if (parent == dentry) 821 - return dentry; 822 - 823 - /* Prune ancestors. */ 824 - dentry = parent; 825 - while (dentry) { 826 - if (lockref_put_or_lock(&dentry->d_lockref)) 827 - return NULL; 828 - dentry = dentry_kill(dentry, 1); 829 - } 830 - return NULL; 831 - } 832 - 833 818 static void shrink_dentry_list(struct list_head *list) 834 819 { 835 - struct dentry *dentry; 820 + struct dentry *dentry, *parent; 836 821 837 - rcu_read_lock(); 838 - for (;;) { 839 - dentry = list_entry_rcu(list->prev, struct dentry, d_lru); 840 - if (&dentry->d_lru == list) 841 - break; /* empty */ 842 - 843 - /* 844 - * Get the dentry lock, and re-verify that the dentry is 845 - * this on the shrinking list. If it is, we know that 846 - * DCACHE_SHRINK_LIST and DCACHE_LRU_LIST are set. 847 - */ 822 + while (!list_empty(list)) { 823 + dentry = list_entry(list->prev, struct dentry, d_lru); 848 824 spin_lock(&dentry->d_lock); 849 - if (dentry != list_entry(list->prev, struct dentry, d_lru)) { 850 - spin_unlock(&dentry->d_lock); 851 - continue; 852 - } 853 - 854 825 /* 855 826 * The dispose list is isolated and dentries are not accounted 856 827 * to the LRU here, so we can simply remove it from the list ··· 810 885 * We found an inuse dentry which was not removed from 811 886 * the LRU because of laziness during lookup. Do not free it. 812 887 */ 813 - if (dentry->d_lockref.count) { 888 + if ((int)dentry->d_lockref.count > 0) { 814 889 spin_unlock(&dentry->d_lock); 815 890 continue; 816 891 } 817 - rcu_read_unlock(); 818 892 893 + parent = dentry_kill(dentry, 0); 819 894 /* 820 - * If 'try_to_prune()' returns a dentry, it will 821 - * be the same one we passed in, and d_lock will 822 - * have been held the whole time, so it will not 823 - * have been added to any other lists. We failed 824 - * to get the inode lock. 825 - * 826 - * We just add it back to the shrink list. 895 + * If dentry_kill returns NULL, we have nothing more to do. 827 896 */ 828 - dentry = try_prune_one_dentry(dentry); 897 + if (!parent) 898 + continue; 829 899 830 - rcu_read_lock(); 831 - if (dentry) { 900 + if (unlikely(parent == dentry)) { 901 + /* 902 + * trylocks have failed and d_lock has been held the 903 + * whole time, so it could not have been added to any 904 + * other lists. Just add it back to the shrink list. 905 + */ 832 906 d_shrink_add(dentry, list); 833 907 spin_unlock(&dentry->d_lock); 908 + continue; 834 909 } 910 + /* 911 + * We need to prune ancestors too. This is necessary to prevent 912 + * quadratic behavior of shrink_dcache_parent(), but is also 913 + * expected to be beneficial in reducing dentry cache 914 + * fragmentation. 915 + */ 916 + dentry = parent; 917 + while (dentry && !lockref_put_or_lock(&dentry->d_lockref)) 918 + dentry = dentry_kill(dentry, 1); 835 919 } 836 - rcu_read_unlock(); 837 920 } 838 921 839 922 static enum lru_status ··· 1194 1261 if (data->start == dentry) 1195 1262 goto out; 1196 1263 1197 - /* 1198 - * move only zero ref count dentries to the dispose list. 1199 - * 1200 - * Those which are presently on the shrink list, being processed 1201 - * by shrink_dentry_list(), shouldn't be moved. Otherwise the 1202 - * loop in shrink_dcache_parent() might not make any progress 1203 - * and loop forever. 1204 - */ 1205 - if (dentry->d_lockref.count) { 1206 - dentry_lru_del(dentry); 1207 - } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) { 1208 - /* 1209 - * We can't use d_lru_shrink_move() because we 1210 - * need to get the global LRU lock and do the 1211 - * LRU accounting. 1212 - */ 1213 - d_lru_del(dentry); 1214 - d_shrink_add(dentry, &data->dispose); 1264 + if (dentry->d_flags & DCACHE_SHRINK_LIST) { 1215 1265 data->found++; 1216 - ret = D_WALK_NORETRY; 1266 + } else { 1267 + if (dentry->d_flags & DCACHE_LRU_LIST) 1268 + d_lru_del(dentry); 1269 + if (!dentry->d_lockref.count) { 1270 + d_shrink_add(dentry, &data->dispose); 1271 + data->found++; 1272 + } 1217 1273 } 1218 1274 /* 1219 1275 * We can return to the caller if we have found some (this 1220 1276 * ensures forward progress). We'll be coming back to find 1221 1277 * the rest. 1222 1278 */ 1223 - if (data->found && need_resched()) 1224 - ret = D_WALK_QUIT; 1279 + if (!list_empty(&data->dispose)) 1280 + ret = need_resched() ? D_WALK_QUIT : D_WALK_NORETRY; 1225 1281 out: 1226 1282 return ret; 1227 1283 } ··· 1240 1318 } 1241 1319 EXPORT_SYMBOL(shrink_dcache_parent); 1242 1320 1243 - static enum d_walk_ret umount_collect(void *_data, struct dentry *dentry) 1321 + static enum d_walk_ret umount_check(void *_data, struct dentry *dentry) 1244 1322 { 1245 - struct select_data *data = _data; 1246 - enum d_walk_ret ret = D_WALK_CONTINUE; 1323 + /* it has busy descendents; complain about those instead */ 1324 + if (!list_empty(&dentry->d_subdirs)) 1325 + return D_WALK_CONTINUE; 1247 1326 1248 - if (dentry->d_lockref.count) { 1249 - dentry_lru_del(dentry); 1250 - if (likely(!list_empty(&dentry->d_subdirs))) 1251 - goto out; 1252 - if (dentry == data->start && dentry->d_lockref.count == 1) 1253 - goto out; 1254 - printk(KERN_ERR 1255 - "BUG: Dentry %p{i=%lx,n=%s}" 1256 - " still in use (%d)" 1257 - " [unmount of %s %s]\n", 1327 + /* root with refcount 1 is fine */ 1328 + if (dentry == _data && dentry->d_lockref.count == 1) 1329 + return D_WALK_CONTINUE; 1330 + 1331 + printk(KERN_ERR "BUG: Dentry %p{i=%lx,n=%pd} " 1332 + " still in use (%d) [unmount of %s %s]\n", 1258 1333 dentry, 1259 1334 dentry->d_inode ? 1260 1335 dentry->d_inode->i_ino : 0UL, 1261 - dentry->d_name.name, 1336 + dentry, 1262 1337 dentry->d_lockref.count, 1263 1338 dentry->d_sb->s_type->name, 1264 1339 dentry->d_sb->s_id); 1265 - BUG(); 1266 - } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) { 1267 - /* 1268 - * We can't use d_lru_shrink_move() because we 1269 - * need to get the global LRU lock and do the 1270 - * LRU accounting. 1271 - */ 1272 - if (dentry->d_flags & DCACHE_LRU_LIST) 1273 - d_lru_del(dentry); 1274 - d_shrink_add(dentry, &data->dispose); 1275 - data->found++; 1276 - ret = D_WALK_NORETRY; 1277 - } 1278 - out: 1279 - if (data->found && need_resched()) 1280 - ret = D_WALK_QUIT; 1281 - return ret; 1340 + WARN_ON(1); 1341 + return D_WALK_CONTINUE; 1342 + } 1343 + 1344 + static void do_one_tree(struct dentry *dentry) 1345 + { 1346 + shrink_dcache_parent(dentry); 1347 + d_walk(dentry, dentry, umount_check, NULL); 1348 + d_drop(dentry); 1349 + dput(dentry); 1282 1350 } 1283 1351 1284 1352 /* ··· 1278 1366 { 1279 1367 struct dentry *dentry; 1280 1368 1281 - if (down_read_trylock(&sb->s_umount)) 1282 - BUG(); 1369 + WARN(down_read_trylock(&sb->s_umount), "s_umount should've been locked"); 1283 1370 1284 1371 dentry = sb->s_root; 1285 1372 sb->s_root = NULL; 1286 - for (;;) { 1287 - struct select_data data; 1288 - 1289 - INIT_LIST_HEAD(&data.dispose); 1290 - data.start = dentry; 1291 - data.found = 0; 1292 - 1293 - d_walk(dentry, &data, umount_collect, NULL); 1294 - if (!data.found) 1295 - break; 1296 - 1297 - shrink_dentry_list(&data.dispose); 1298 - cond_resched(); 1299 - } 1300 - d_drop(dentry); 1301 - dput(dentry); 1373 + do_one_tree(dentry); 1302 1374 1303 1375 while (!hlist_bl_empty(&sb->s_anon)) { 1304 - struct select_data data; 1305 - dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash); 1306 - 1307 - INIT_LIST_HEAD(&data.dispose); 1308 - data.start = NULL; 1309 - data.found = 0; 1310 - 1311 - d_walk(dentry, &data, umount_collect, NULL); 1312 - if (data.found) 1313 - shrink_dentry_list(&data.dispose); 1314 - cond_resched(); 1376 + dentry = dget(hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash)); 1377 + do_one_tree(dentry); 1315 1378 } 1316 1379 } 1317 1380 ··· 1534 1647 unsigned add_flags = d_flags_for_inode(inode); 1535 1648 1536 1649 spin_lock(&dentry->d_lock); 1537 - dentry->d_flags &= ~DCACHE_ENTRY_TYPE; 1538 - dentry->d_flags |= add_flags; 1650 + __d_set_type(dentry, add_flags); 1539 1651 if (inode) 1540 1652 hlist_add_head(&dentry->d_alias, &inode->i_dentry); 1541 1653 dentry->d_inode = inode;
+3 -3
fs/namei.c
··· 1542 1542 inode = path->dentry->d_inode; 1543 1543 } 1544 1544 err = -ENOENT; 1545 - if (!inode) 1545 + if (!inode || d_is_negative(path->dentry)) 1546 1546 goto out_path_put; 1547 1547 1548 1548 if (should_follow_link(path->dentry, follow)) { ··· 2249 2249 mutex_unlock(&dir->d_inode->i_mutex); 2250 2250 2251 2251 done: 2252 - if (!dentry->d_inode) { 2252 + if (!dentry->d_inode || d_is_negative(dentry)) { 2253 2253 error = -ENOENT; 2254 2254 dput(dentry); 2255 2255 goto out; ··· 2994 2994 finish_lookup: 2995 2995 /* we _can_ be in RCU mode here */ 2996 2996 error = -ENOENT; 2997 - if (d_is_negative(path->dentry)) { 2997 + if (!inode || d_is_negative(path->dentry)) { 2998 2998 path_to_nameidata(path, nd); 2999 2999 goto out; 3000 3000 }
+6
fs/posix_acl.c
··· 246 246 umode_t mode = 0; 247 247 int not_equiv = 0; 248 248 249 + /* 250 + * A null ACL can always be presented as mode bits. 251 + */ 252 + if (!acl) 253 + return 0; 254 + 249 255 FOREACH_ACL_ENTRY(pa, acl, pe) { 250 256 switch (pa->e_tag) { 251 257 case ACL_USER_OBJ:
+2
include/linux/dcache.h
··· 221 221 #define DCACHE_SYMLINK_TYPE 0x00300000 /* Symlink */ 222 222 #define DCACHE_FILE_TYPE 0x00400000 /* Other file type */ 223 223 224 + #define DCACHE_MAY_FREE 0x00800000 225 + 224 226 extern seqlock_t rename_lock; 225 227 226 228 static inline int dname_external(const struct dentry *dentry)
+2
include/linux/mm.h
··· 370 370 } 371 371 #endif 372 372 373 + extern void kvfree(const void *addr); 374 + 373 375 static inline void compound_lock(struct page *page) 374 376 { 375 377 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+10
mm/util.c
··· 10 10 #include <linux/swapops.h> 11 11 #include <linux/mman.h> 12 12 #include <linux/hugetlb.h> 13 + #include <linux/vmalloc.h> 13 14 14 15 #include <asm/uaccess.h> 15 16 ··· 387 386 return vm_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); 388 387 } 389 388 EXPORT_SYMBOL(vm_mmap); 389 + 390 + void kvfree(const void *addr) 391 + { 392 + if (is_vmalloc_addr(addr)) 393 + vfree(addr); 394 + else 395 + kfree(addr); 396 + } 397 + EXPORT_SYMBOL(kvfree); 390 398 391 399 struct address_space *page_mapping(struct page *page) 392 400 {
-1
security/apparmor/include/apparmor.h
··· 66 66 char *aa_split_fqname(char *args, char **ns_name); 67 67 void aa_info_message(const char *str); 68 68 void *__aa_kvmalloc(size_t size, gfp_t flags); 69 - void kvfree(void *buffer); 70 69 71 70 static inline void *kvmalloc(size_t size) 72 71 {
-14
security/apparmor/lib.c
··· 104 104 } 105 105 return buffer; 106 106 } 107 - 108 - /** 109 - * kvfree - free an allocation do by kvmalloc 110 - * @buffer: buffer to free (MAYBE_NULL) 111 - * 112 - * Free a buffer allocated by kvmalloc 113 - */ 114 - void kvfree(void *buffer) 115 - { 116 - if (is_vmalloc_addr(buffer)) 117 - vfree(buffer); 118 - else 119 - kfree(buffer); 120 - }