Merge branch 'akpm' (patches from Andrew)

+87

fs/ocfs2/acl.c

··· 322 322 brelse(di_bh); 323 323 return acl; 324 324 } 325 + 326 + int ocfs2_acl_chmod(struct inode *inode, struct buffer_head *bh) 327 + { 328 + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 329 + struct posix_acl *acl; 330 + int ret; 331 + 332 + if (S_ISLNK(inode->i_mode)) 333 + return -EOPNOTSUPP; 334 + 335 + if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) 336 + return 0; 337 + 338 + acl = ocfs2_get_acl_nolock(inode, ACL_TYPE_ACCESS, bh); 339 + if (IS_ERR(acl) || !acl) 340 + return PTR_ERR(acl); 341 + ret = __posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); 342 + if (ret) 343 + return ret; 344 + ret = ocfs2_set_acl(NULL, inode, NULL, ACL_TYPE_ACCESS, 345 + acl, NULL, NULL); 346 + posix_acl_release(acl); 347 + return ret; 348 + } 349 + 350 + /* 351 + * Initialize the ACLs of a new inode. If parent directory has default ACL, 352 + * then clone to new inode. Called from ocfs2_mknod. 353 + */ 354 + int ocfs2_init_acl(handle_t *handle, 355 + struct inode *inode, 356 + struct inode *dir, 357 + struct buffer_head *di_bh, 358 + struct buffer_head *dir_bh, 359 + struct ocfs2_alloc_context *meta_ac, 360 + struct ocfs2_alloc_context *data_ac) 361 + { 362 + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 363 + struct posix_acl *acl = NULL; 364 + int ret = 0, ret2; 365 + umode_t mode; 366 + 367 + if (!S_ISLNK(inode->i_mode)) { 368 + if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { 369 + acl = ocfs2_get_acl_nolock(dir, ACL_TYPE_DEFAULT, 370 + dir_bh); 371 + if (IS_ERR(acl)) 372 + return PTR_ERR(acl); 373 + } 374 + if (!acl) { 375 + mode = inode->i_mode & ~current_umask(); 376 + ret = ocfs2_acl_set_mode(inode, di_bh, handle, mode); 377 + if (ret) { 378 + mlog_errno(ret); 379 + goto cleanup; 380 + } 381 + } 382 + } 383 + if ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) && acl) { 384 + if (S_ISDIR(inode->i_mode)) { 385 + ret = ocfs2_set_acl(handle, inode, di_bh, 386 + ACL_TYPE_DEFAULT, acl, 387 + meta_ac, data_ac); 388 + if (ret) 389 + goto cleanup; 390 + } 391 + mode = inode->i_mode; 392 + ret = __posix_acl_create(&acl, GFP_NOFS, &mode); 393 + if (ret < 0) 394 + return ret; 395 + 396 + ret2 = ocfs2_acl_set_mode(inode, di_bh, handle, mode); 397 + if (ret2) { 398 + mlog_errno(ret2); 399 + ret = ret2; 400 + goto cleanup; 401 + } 402 + if (ret > 0) { 403 + ret = ocfs2_set_acl(handle, inode, 404 + di_bh, ACL_TYPE_ACCESS, 405 + acl, meta_ac, data_ac); 406 + } 407 + } 408 + cleanup: 409 + posix_acl_release(acl); 410 + return ret; 411 + }

+5

fs/ocfs2/acl.h

··· 35 35 struct posix_acl *acl, 36 36 struct ocfs2_alloc_context *meta_ac, 37 37 struct ocfs2_alloc_context *data_ac); 38 + extern int ocfs2_acl_chmod(struct inode *, struct buffer_head *); 39 + extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *, 40 + struct buffer_head *, struct buffer_head *, 41 + struct ocfs2_alloc_context *, 42 + struct ocfs2_alloc_context *); 38 43 39 44 #endif /* OCFS2_ACL_H */

+2 -2

fs/ocfs2/file.c

··· 1268 1268 if (size_change) 1269 1269 ocfs2_rw_unlock(inode, 1); 1270 1270 bail: 1271 - brelse(bh); 1272 1271 1273 1272 /* Release quota pointers in case we acquired them */ 1274 1273 for (qtype = 0; qtype < OCFS2_MAXQUOTAS; qtype++) 1275 1274 dqput(transfer_to[qtype]); 1276 1275 1277 1276 if (!status && attr->ia_valid & ATTR_MODE) { 1278 - status = posix_acl_chmod(inode, inode->i_mode); 1277 + status = ocfs2_acl_chmod(inode, bh); 1279 1278 if (status < 0) 1280 1279 mlog_errno(status); 1281 1280 } 1282 1281 if (inode_locked) 1283 1282 ocfs2_inode_unlock(inode, 1); 1284 1283 1284 + brelse(bh); 1285 1285 return status; 1286 1286 } 1287 1287

+2 -21

fs/ocfs2/namei.c

··· 259 259 struct ocfs2_dir_lookup_result lookup = { NULL, }; 260 260 sigset_t oldset; 261 261 int did_block_signals = 0; 262 - struct posix_acl *default_acl = NULL, *acl = NULL; 263 262 struct ocfs2_dentry_lock *dl = NULL; 264 263 265 264 trace_ocfs2_mknod(dir, dentry, dentry->d_name.len, dentry->d_name.name, ··· 366 367 goto leave; 367 368 } 368 369 369 - status = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl); 370 - if (status) { 371 - mlog_errno(status); 372 - goto leave; 373 - } 374 - 375 370 handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb, 376 371 S_ISDIR(mode), 377 372 xattr_credits)); ··· 414 421 inc_nlink(dir); 415 422 } 416 423 417 - if (default_acl) { 418 - status = ocfs2_set_acl(handle, inode, new_fe_bh, 419 - ACL_TYPE_DEFAULT, default_acl, 420 - meta_ac, data_ac); 421 - } 422 - if (!status && acl) { 423 - status = ocfs2_set_acl(handle, inode, new_fe_bh, 424 - ACL_TYPE_ACCESS, acl, 425 - meta_ac, data_ac); 426 - } 424 + status = ocfs2_init_acl(handle, inode, dir, new_fe_bh, parent_fe_bh, 425 + meta_ac, data_ac); 427 426 428 427 if (status < 0) { 429 428 mlog_errno(status); ··· 457 472 d_instantiate(dentry, inode); 458 473 status = 0; 459 474 leave: 460 - if (default_acl) 461 - posix_acl_release(default_acl); 462 - if (acl) 463 - posix_acl_release(acl); 464 475 if (status < 0 && did_quota_inode) 465 476 dquot_free_inode(inode); 466 477 if (handle)

+2 -15

fs/ocfs2/refcounttree.c

··· 4248 4248 struct inode *inode = d_inode(old_dentry); 4249 4249 struct buffer_head *old_bh = NULL; 4250 4250 struct inode *new_orphan_inode = NULL; 4251 - struct posix_acl *default_acl, *acl; 4252 - umode_t mode; 4253 4251 4254 4252 if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) 4255 4253 return -EOPNOTSUPP; 4256 4254 4257 - mode = inode->i_mode; 4258 - error = posix_acl_create(dir, &mode, &default_acl, &acl); 4259 - if (error) { 4260 - mlog_errno(error); 4261 - return error; 4262 - } 4263 4255 4264 - error = ocfs2_create_inode_in_orphan(dir, mode, 4256 + error = ocfs2_create_inode_in_orphan(dir, inode->i_mode, 4265 4257 &new_orphan_inode); 4266 4258 if (error) { 4267 4259 mlog_errno(error); ··· 4292 4300 /* If the security isn't preserved, we need to re-initialize them. */ 4293 4301 if (!preserve) { 4294 4302 error = ocfs2_init_security_and_acl(dir, new_orphan_inode, 4295 - &new_dentry->d_name, 4296 - default_acl, acl); 4303 + &new_dentry->d_name); 4297 4304 if (error) 4298 4305 mlog_errno(error); 4299 4306 } 4300 4307 out: 4301 - if (default_acl) 4302 - posix_acl_release(default_acl); 4303 - if (acl) 4304 - posix_acl_release(acl); 4305 4308 if (!error) { 4306 4309 error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode, 4307 4310 new_dentry);

+5 -9

fs/ocfs2/xattr.c

··· 7216 7216 */ 7217 7217 int ocfs2_init_security_and_acl(struct inode *dir, 7218 7218 struct inode *inode, 7219 - const struct qstr *qstr, 7220 - struct posix_acl *default_acl, 7221 - struct posix_acl *acl) 7219 + const struct qstr *qstr) 7222 7220 { 7223 - struct buffer_head *dir_bh = NULL; 7224 7221 int ret = 0; 7222 + struct buffer_head *dir_bh = NULL; 7225 7223 7226 7224 ret = ocfs2_init_security_get(inode, dir, qstr, NULL); 7227 7225 if (ret) { ··· 7232 7234 mlog_errno(ret); 7233 7235 goto leave; 7234 7236 } 7235 - 7236 - if (!ret && default_acl) 7237 - ret = ocfs2_iop_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); 7238 - if (!ret && acl) 7239 - ret = ocfs2_iop_set_acl(inode, acl, ACL_TYPE_ACCESS); 7237 + ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL); 7238 + if (ret) 7239 + mlog_errno(ret); 7240 7240 7241 7241 ocfs2_inode_unlock(dir, 0); 7242 7242 brelse(dir_bh);

+1 -3

fs/ocfs2/xattr.h

··· 94 94 bool preserve_security); 95 95 int ocfs2_init_security_and_acl(struct inode *dir, 96 96 struct inode *inode, 97 - const struct qstr *qstr, 98 - struct posix_acl *default_acl, 99 - struct posix_acl *acl); 97 + const struct qstr *qstr); 100 98 #endif /* OCFS2_XATTR_H */

+9

include/linux/mm.h

··· 500 500 501 501 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 502 502 int total_mapcount(struct page *page); 503 + int page_trans_huge_mapcount(struct page *page, int *total_mapcount); 503 504 #else 504 505 static inline int total_mapcount(struct page *page) 505 506 { 506 507 return page_mapcount(page); 508 + } 509 + static inline int page_trans_huge_mapcount(struct page *page, 510 + int *total_mapcount) 511 + { 512 + int mapcount = page_mapcount(page); 513 + if (total_mapcount) 514 + *total_mapcount = mapcount; 515 + return mapcount; 507 516 } 508 517 #endif 509 518

+3 -3

include/linux/swap.h

··· 418 418 extern int page_swapcount(struct page *); 419 419 extern int swp_swapcount(swp_entry_t entry); 420 420 extern struct swap_info_struct *page_swap_info(struct page *); 421 - extern int reuse_swap_page(struct page *); 421 + extern bool reuse_swap_page(struct page *, int *); 422 422 extern int try_to_free_swap(struct page *); 423 423 struct backing_dev_info; 424 424 ··· 513 513 return 0; 514 514 } 515 515 516 - #define reuse_swap_page(page) \ 517 - (!PageTransCompound(page) && page_mapcount(page) == 1) 516 + #define reuse_swap_page(page, total_mapcount) \ 517 + (page_trans_huge_mapcount(page, total_mapcount) == 1) 518 518 519 519 static inline int try_to_free_swap(struct page *page) 520 520 {

+62 -9

mm/huge_memory.c

··· 1298 1298 VM_BUG_ON_PAGE(!PageCompound(page) || !PageHead(page), page); 1299 1299 /* 1300 1300 * We can only reuse the page if nobody else maps the huge page or it's 1301 - * part. We can do it by checking page_mapcount() on each sub-page, but 1302 - * it's expensive. 1303 - * The cheaper way is to check page_count() to be equal 1: every 1304 - * mapcount takes page reference reference, so this way we can 1305 - * guarantee, that the PMD is the only mapping. 1306 - * This can give false negative if somebody pinned the page, but that's 1307 - * fine. 1301 + * part. 1308 1302 */ 1309 - if (page_mapcount(page) == 1 && page_count(page) == 1) { 1303 + if (page_trans_huge_mapcount(page, NULL) == 1) { 1310 1304 pmd_t entry; 1311 1305 entry = pmd_mkyoung(orig_pmd); 1312 1306 entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); ··· 2073 2079 if (pte_write(pteval)) { 2074 2080 writable = true; 2075 2081 } else { 2076 - if (PageSwapCache(page) && !reuse_swap_page(page)) { 2082 + if (PageSwapCache(page) && 2083 + !reuse_swap_page(page, NULL)) { 2077 2084 unlock_page(page); 2078 2085 result = SCAN_SWAP_CACHE_PAGE; 2079 2086 goto out; ··· 3214 3219 ret += atomic_read(&page[i]._mapcount) + 1; 3215 3220 if (PageDoubleMap(page)) 3216 3221 ret -= HPAGE_PMD_NR; 3222 + return ret; 3223 + } 3224 + 3225 + /* 3226 + * This calculates accurately how many mappings a transparent hugepage 3227 + * has (unlike page_mapcount() which isn't fully accurate). This full 3228 + * accuracy is primarily needed to know if copy-on-write faults can 3229 + * reuse the page and change the mapping to read-write instead of 3230 + * copying them. At the same time this returns the total_mapcount too. 3231 + * 3232 + * The function returns the highest mapcount any one of the subpages 3233 + * has. If the return value is one, even if different processes are 3234 + * mapping different subpages of the transparent hugepage, they can 3235 + * all reuse it, because each process is reusing a different subpage. 3236 + * 3237 + * The total_mapcount is instead counting all virtual mappings of the 3238 + * subpages. If the total_mapcount is equal to "one", it tells the 3239 + * caller all mappings belong to the same "mm" and in turn the 3240 + * anon_vma of the transparent hugepage can become the vma->anon_vma 3241 + * local one as no other process may be mapping any of the subpages. 3242 + * 3243 + * It would be more accurate to replace page_mapcount() with 3244 + * page_trans_huge_mapcount(), however we only use 3245 + * page_trans_huge_mapcount() in the copy-on-write faults where we 3246 + * need full accuracy to avoid breaking page pinning, because 3247 + * page_trans_huge_mapcount() is slower than page_mapcount(). 3248 + */ 3249 + int page_trans_huge_mapcount(struct page *page, int *total_mapcount) 3250 + { 3251 + int i, ret, _total_mapcount, mapcount; 3252 + 3253 + /* hugetlbfs shouldn't call it */ 3254 + VM_BUG_ON_PAGE(PageHuge(page), page); 3255 + 3256 + if (likely(!PageTransCompound(page))) { 3257 + mapcount = atomic_read(&page->_mapcount) + 1; 3258 + if (total_mapcount) 3259 + *total_mapcount = mapcount; 3260 + return mapcount; 3261 + } 3262 + 3263 + page = compound_head(page); 3264 + 3265 + _total_mapcount = ret = 0; 3266 + for (i = 0; i < HPAGE_PMD_NR; i++) { 3267 + mapcount = atomic_read(&page[i]._mapcount) + 1; 3268 + ret = max(ret, mapcount); 3269 + _total_mapcount += mapcount; 3270 + } 3271 + if (PageDoubleMap(page)) { 3272 + ret -= 1; 3273 + _total_mapcount -= HPAGE_PMD_NR; 3274 + } 3275 + mapcount = compound_mapcount(page); 3276 + ret += mapcount; 3277 + _total_mapcount += mapcount; 3278 + if (total_mapcount) 3279 + *total_mapcount = _total_mapcount; 3217 3280 return ret; 3218 3281 } 3219 3282

+10 -5

mm/ksm.c

··· 783 783 } 784 784 785 785 remove_trailing_rmap_items(mm_slot, &mm_slot->rmap_list); 786 + up_read(&mm->mmap_sem); 786 787 787 788 spin_lock(&ksm_mmlist_lock); 788 789 ksm_scan.mm_slot = list_entry(mm_slot->mm_list.next, ··· 795 794 796 795 free_mm_slot(mm_slot); 797 796 clear_bit(MMF_VM_MERGEABLE, &mm->flags); 798 - up_read(&mm->mmap_sem); 799 797 mmdrop(mm); 800 - } else { 798 + } else 801 799 spin_unlock(&ksm_mmlist_lock); 802 - up_read(&mm->mmap_sem); 803 - } 804 800 } 805 801 806 802 /* Clean up stable nodes, but don't worry if some are still busy */ ··· 1661 1663 up_read(&mm->mmap_sem); 1662 1664 mmdrop(mm); 1663 1665 } else { 1664 - spin_unlock(&ksm_mmlist_lock); 1665 1666 up_read(&mm->mmap_sem); 1667 + /* 1668 + * up_read(&mm->mmap_sem) first because after 1669 + * spin_unlock(&ksm_mmlist_lock) run, the "mm" may 1670 + * already have been freed under us by __ksm_exit() 1671 + * because the "mm_slot" is still hashed and 1672 + * ksm_scan.mm_slot doesn't point to it anymore. 1673 + */ 1674 + spin_unlock(&ksm_mmlist_lock); 1666 1675 } 1667 1676 1668 1677 /* Repeat until we've completed scanning the whole list */

+14 -8

mm/memory.c

··· 2373 2373 * not dirty accountable. 2374 2374 */ 2375 2375 if (PageAnon(old_page) && !PageKsm(old_page)) { 2376 + int total_mapcount; 2376 2377 if (!trylock_page(old_page)) { 2377 2378 get_page(old_page); 2378 2379 pte_unmap_unlock(page_table, ptl); ··· 2388 2387 } 2389 2388 put_page(old_page); 2390 2389 } 2391 - if (reuse_swap_page(old_page)) { 2392 - /* 2393 - * The page is all ours. Move it to our anon_vma so 2394 - * the rmap code will not search our parent or siblings. 2395 - * Protected against the rmap code by the page lock. 2396 - */ 2397 - page_move_anon_rmap(old_page, vma, address); 2390 + if (reuse_swap_page(old_page, &total_mapcount)) { 2391 + if (total_mapcount == 1) { 2392 + /* 2393 + * The page is all ours. Move it to 2394 + * our anon_vma so the rmap code will 2395 + * not search our parent or siblings. 2396 + * Protected against the rmap code by 2397 + * the page lock. 2398 + */ 2399 + page_move_anon_rmap(compound_head(old_page), 2400 + vma, address); 2401 + } 2398 2402 unlock_page(old_page); 2399 2403 return wp_page_reuse(mm, vma, address, page_table, ptl, 2400 2404 orig_pte, old_page, 0, 0); ··· 2623 2617 inc_mm_counter_fast(mm, MM_ANONPAGES); 2624 2618 dec_mm_counter_fast(mm, MM_SWAPENTS); 2625 2619 pte = mk_pte(page, vma->vm_page_prot); 2626 - if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) { 2620 + if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) { 2627 2621 pte = maybe_mkwrite(pte_mkdirty(pte), vma); 2628 2622 flags &= ~FAULT_FLAG_WRITE; 2629 2623 ret |= VM_FAULT_WRITE;

+7 -6

mm/swapfile.c

··· 922 922 * to it. And as a side-effect, free up its swap: because the old content 923 923 * on disk will never be read, and seeking back there to write new content 924 924 * later would only waste time away from clustering. 925 + * 926 + * NOTE: total_mapcount should not be relied upon by the caller if 927 + * reuse_swap_page() returns false, but it may be always overwritten 928 + * (see the other implementation for CONFIG_SWAP=n). 925 929 */ 926 - int reuse_swap_page(struct page *page) 930 + bool reuse_swap_page(struct page *page, int *total_mapcount) 927 931 { 928 932 int count; 929 933 930 934 VM_BUG_ON_PAGE(!PageLocked(page), page); 931 935 if (unlikely(PageKsm(page))) 932 - return 0; 933 - /* The page is part of THP and cannot be reused */ 934 - if (PageTransCompound(page)) 935 - return 0; 936 - count = page_mapcount(page); 936 + return false; 937 + count = page_trans_huge_mapcount(page, total_mapcount); 937 938 if (count <= 1 && PageSwapCache(page)) { 938 939 count += page_swapcount(page); 939 940 if (count == 1 && !PageWriteback(page)) {

Configure Feed

Configure Feed