Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'nfs-for-5.9-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs

Pull NFS client updates from Trond Myklebust:
"Stable fixes:
- pNFS: Don't return layout segments that are being used for I/O
- pNFS: Don't move layout segments off the active list when being used for I/O

Features:
- NFS: Add support for user xattrs through the NFSv4.2 protocol
- NFS: Allow applications to speed up readdir+statx() using AT_STATX_DONT_SYNC
- NFSv4.0 allow nconnect for v4.0

Bugfixes and cleanups:
- nfs: ensure correct writeback errors are returned on close()
- nfs: nfs_file_write() should check for writeback errors
- nfs: Fix getxattr kernel panic and memory overflow
- NFS: Fix the pNFS/flexfiles mirrored read failover code
- SUNRPC: dont update timeout value on connection reset
- freezer: Add unsafe versions of freezable_schedule_timeout_interruptible for NFS
- sunrpc: destroy rpc_inode_cachep after unregister_filesystem"

* tag 'nfs-for-5.9-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (32 commits)
NFS: Fix flexfiles read failover
fs: nfs: delete repeated words in comments
rpc_pipefs: convert comma to semicolon
nfs: Fix getxattr kernel panic and memory overflow
NFS: Don't return layout segments that are in use
NFS: Don't move layouts to plh_return_segs list while in use
NFS: Add layout segment info to pnfs read/write/commit tracepoints
NFS: Add tracepoints for layouterror and layoutstats.
NFS: Report the stateid + status in trace_nfs4_layoutreturn_on_close()
SUNRPC dont update timeout value on connection reset
nfs: nfs_file_write() should check for writeback errors
nfs: ensure correct writeback errors are returned on close()
NFSv4.2: xattr cache: get rid of cache discard work queue
NFS: remove redundant initialization of variable result
NFSv4.0 allow nconnect for v4.0
freezer: Add unsafe versions of freezable_schedule_timeout_interruptible for NFS
sunrpc: destroy rpc_inode_cachep after unregister_filesystem
NFSv4.2: add client side xattr caching.
NFSv4.2: hook in the user extended attribute handlers
NFSv4.2: add the extended attribute proc functions.
...

+2368 -123
+1 -1
fs/nfs/Makefile
··· 30 30 nfsv4-$(CONFIG_NFS_USE_LEGACY_DNS) += cache_lib.o 31 31 nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o 32 32 nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o pnfs_nfs.o 33 - nfsv4-$(CONFIG_NFS_V4_2) += nfs42proc.o 33 + nfsv4-$(CONFIG_NFS_V4_2) += nfs42proc.o nfs42xattr.o 34 34 35 35 obj-$(CONFIG_PNFS_FILE_LAYOUT) += filelayout/ 36 36 obj-$(CONFIG_PNFS_BLOCK) += blocklayout/
+1 -1
fs/nfs/blocklayout/rpc_pipefs.c
··· 79 79 goto out_free_data; 80 80 81 81 bl_msg = msg->data; 82 - bl_msg->type = BL_DEVICE_MOUNT, 82 + bl_msg->type = BL_DEVICE_MOUNT; 83 83 bl_msg->totallen = b->simple.len; 84 84 nfs4_encode_simple(msg->data + sizeof(*bl_msg), b); 85 85
+20 -2
fs/nfs/client.c
··· 50 50 #include "nfs.h" 51 51 #include "netns.h" 52 52 #include "sysfs.h" 53 + #include "nfs42.h" 53 54 54 55 #define NFSDBG_FACILITY NFSDBG_CLIENT 55 56 ··· 750 749 static void nfs_server_set_fsinfo(struct nfs_server *server, 751 750 struct nfs_fsinfo *fsinfo) 752 751 { 753 - unsigned long max_rpc_payload; 752 + unsigned long max_rpc_payload, raw_max_rpc_payload; 754 753 755 754 /* Work out a lot of parameters */ 756 755 if (server->rsize == 0) ··· 763 762 if (fsinfo->wtmax >= 512 && server->wsize > fsinfo->wtmax) 764 763 server->wsize = nfs_block_size(fsinfo->wtmax, NULL); 765 764 766 - max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL); 765 + raw_max_rpc_payload = rpc_max_payload(server->client); 766 + max_rpc_payload = nfs_block_size(raw_max_rpc_payload, NULL); 767 + 767 768 if (server->rsize > max_rpc_payload) 768 769 server->rsize = max_rpc_payload; 769 770 if (server->rsize > NFS_MAX_FILE_IO_SIZE) ··· 798 795 server->clone_blksize = fsinfo->clone_blksize; 799 796 /* We're airborne Set socket buffersize */ 800 797 rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100); 798 + 799 + #ifdef CONFIG_NFS_V4_2 800 + /* 801 + * Defaults until limited by the session parameters. 802 + */ 803 + server->gxasize = min_t(unsigned int, raw_max_rpc_payload, 804 + XATTR_SIZE_MAX); 805 + server->sxasize = min_t(unsigned int, raw_max_rpc_payload, 806 + XATTR_SIZE_MAX); 807 + server->lxasize = min_t(unsigned int, raw_max_rpc_payload, 808 + nfs42_listxattr_xdrsize(XATTR_LIST_MAX)); 809 + 810 + if (fsinfo->xattr_support) 811 + server->caps |= NFS_CAP_XATTR; 812 + #endif 801 813 } 802 814 803 815 /*
+20 -4
fs/nfs/dir.c
··· 2460 2460 return NULL; 2461 2461 } 2462 2462 2463 - static int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res, bool may_block) 2463 + static int nfs_access_get_cached_locked(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res, bool may_block) 2464 2464 { 2465 2465 struct nfs_inode *nfsi = NFS_I(inode); 2466 2466 struct nfs_access_entry *cache; ··· 2532 2532 rcu_read_unlock(); 2533 2533 return err; 2534 2534 } 2535 + 2536 + int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct 2537 + nfs_access_entry *res, bool may_block) 2538 + { 2539 + int status; 2540 + 2541 + status = nfs_access_get_cached_rcu(inode, cred, res); 2542 + if (status != 0) 2543 + status = nfs_access_get_cached_locked(inode, cred, res, 2544 + may_block); 2545 + 2546 + return status; 2547 + } 2548 + EXPORT_SYMBOL_GPL(nfs_access_get_cached); 2535 2549 2536 2550 static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set) 2537 2551 { ··· 2661 2647 2662 2648 trace_nfs_access_enter(inode); 2663 2649 2664 - status = nfs_access_get_cached_rcu(inode, cred, &cache); 2665 - if (status != 0) 2666 - status = nfs_access_get_cached(inode, cred, &cache, may_block); 2650 + status = nfs_access_get_cached(inode, cred, &cache, may_block); 2667 2651 if (status == 0) 2668 2652 goto out_cached; 2669 2653 ··· 2673 2661 * Determine which access bits we want to ask for... 2674 2662 */ 2675 2663 cache.mask = NFS_ACCESS_READ | NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND; 2664 + if (nfs_server_capable(inode, NFS_CAP_XATTR)) { 2665 + cache.mask |= NFS_ACCESS_XAREAD | NFS_ACCESS_XAWRITE | 2666 + NFS_ACCESS_XALIST; 2667 + } 2676 2668 if (S_ISDIR(inode->i_mode)) 2677 2669 cache.mask |= NFS_ACCESS_DELETE | NFS_ACCESS_LOOKUP; 2678 2670 else
+1 -1
fs/nfs/direct.c
··· 896 896 */ 897 897 ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) 898 898 { 899 - ssize_t result = -EINVAL, requested; 899 + ssize_t result, requested; 900 900 size_t count; 901 901 struct file *file = iocb->ki_filp; 902 902 struct address_space *mapping = file->f_mapping;
+13 -4
fs/nfs/file.c
··· 140 140 nfs_file_flush(struct file *file, fl_owner_t id) 141 141 { 142 142 struct inode *inode = file_inode(file); 143 + errseq_t since; 143 144 144 145 dprintk("NFS: flush(%pD2)\n", file); 145 146 ··· 149 148 return 0; 150 149 151 150 /* Flush writes to the server and return any errors */ 152 - return nfs_wb_all(inode); 151 + since = filemap_sample_wb_err(file->f_mapping); 152 + nfs_wb_all(inode); 153 + return filemap_check_wb_err(file->f_mapping, since); 153 154 } 154 155 155 156 ssize_t ··· 590 587 .page_mkwrite = nfs_vm_page_mkwrite, 591 588 }; 592 589 593 - static int nfs_need_check_write(struct file *filp, struct inode *inode) 590 + static int nfs_need_check_write(struct file *filp, struct inode *inode, 591 + int error) 594 592 { 595 593 struct nfs_open_context *ctx; 596 594 597 595 ctx = nfs_file_open_context(filp); 598 - if (nfs_ctx_key_to_expire(ctx, inode)) 596 + if (nfs_error_is_fatal_on_server(error) || 597 + nfs_ctx_key_to_expire(ctx, inode)) 599 598 return 1; 600 599 return 0; 601 600 } ··· 608 603 struct inode *inode = file_inode(file); 609 604 unsigned long written = 0; 610 605 ssize_t result; 606 + errseq_t since; 607 + int error; 611 608 612 609 result = nfs_key_timeout_notify(file, inode); 613 610 if (result) ··· 634 627 if (iocb->ki_pos > i_size_read(inode)) 635 628 nfs_revalidate_mapping(inode, file->f_mapping); 636 629 630 + since = filemap_sample_wb_err(file->f_mapping); 637 631 nfs_start_io_write(inode); 638 632 result = generic_write_checks(iocb, from); 639 633 if (result > 0) { ··· 653 645 goto out; 654 646 655 647 /* Return error values */ 656 - if (nfs_need_check_write(file, inode)) { 648 + error = filemap_check_wb_err(file->f_mapping, since); 649 + if (nfs_need_check_write(file, inode, error)) { 657 650 int err = nfs_wb_all(inode); 658 651 if (err < 0) 659 652 result = err;
+40 -24
fs/nfs/flexfilelayout/flexfilelayout.c
··· 790 790 return ff_layout_choose_any_ds_for_read(lseg, start_idx, best_idx); 791 791 } 792 792 793 + static struct nfs4_pnfs_ds * 794 + ff_layout_get_ds_for_read(struct nfs_pageio_descriptor *pgio, int *best_idx) 795 + { 796 + struct pnfs_layout_segment *lseg = pgio->pg_lseg; 797 + struct nfs4_pnfs_ds *ds; 798 + 799 + ds = ff_layout_choose_best_ds_for_read(lseg, pgio->pg_mirror_idx, 800 + best_idx); 801 + if (ds || !pgio->pg_mirror_idx) 802 + return ds; 803 + return ff_layout_choose_best_ds_for_read(lseg, 0, best_idx); 804 + } 805 + 793 806 static void 794 807 ff_layout_pg_get_read(struct nfs_pageio_descriptor *pgio, 795 808 struct nfs_page *req, ··· 853 840 goto out_nolseg; 854 841 } 855 842 856 - ds = ff_layout_choose_best_ds_for_read(pgio->pg_lseg, 0, &ds_idx); 843 + ds = ff_layout_get_ds_for_read(pgio, &ds_idx); 857 844 if (!ds) { 858 845 if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg)) 859 846 goto out_mds; 860 - pnfs_put_lseg(pgio->pg_lseg); 861 - pgio->pg_lseg = NULL; 847 + pnfs_generic_pg_cleanup(pgio); 862 848 /* Sleep for 1 second before retrying */ 863 849 ssleep(1); 864 850 goto retry; ··· 883 871 0, NFS4_MAX_UINT64, IOMODE_READ, 884 872 NFS_I(pgio->pg_inode)->layout, 885 873 pgio->pg_lseg); 886 - pnfs_put_lseg(pgio->pg_lseg); 887 - pgio->pg_lseg = NULL; 888 874 pgio->pg_maxretrans = 0; 889 875 nfs_pageio_reset_read_mds(pgio); 890 876 } ··· 926 916 if (!ds) { 927 917 if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg)) 928 918 goto out_mds; 929 - pnfs_put_lseg(pgio->pg_lseg); 930 - pgio->pg_lseg = NULL; 919 + pnfs_generic_pg_cleanup(pgio); 931 920 /* Sleep for 1 second before retrying */ 932 921 ssleep(1); 933 922 goto retry; ··· 948 939 0, NFS4_MAX_UINT64, IOMODE_RW, 949 940 NFS_I(pgio->pg_inode)->layout, 950 941 pgio->pg_lseg); 951 - pnfs_put_lseg(pgio->pg_lseg); 952 - pgio->pg_lseg = NULL; 953 942 pgio->pg_maxretrans = 0; 954 943 nfs_pageio_reset_write_mds(pgio); 955 944 pgio->pg_error = -EAGAIN; ··· 960 953 if (!pgio->pg_lseg) { 961 954 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 962 955 nfs_req_openctx(req), 963 - 0, 964 - NFS4_MAX_UINT64, 956 + req_offset(req), 957 + req->wb_bytes, 965 958 IOMODE_RW, 966 959 false, 967 960 GFP_NOFS); ··· 1035 1028 } 1036 1029 } 1037 1030 1031 + static void ff_layout_resend_pnfs_read(struct nfs_pgio_header *hdr) 1032 + { 1033 + u32 idx = hdr->pgio_mirror_idx + 1; 1034 + int new_idx = 0; 1035 + 1036 + if (ff_layout_choose_any_ds_for_read(hdr->lseg, idx + 1, &new_idx)) 1037 + ff_layout_send_layouterror(hdr->lseg); 1038 + else 1039 + pnfs_error_mark_layout_for_return(hdr->inode, hdr->lseg); 1040 + pnfs_read_resend_pnfs(hdr, new_idx); 1041 + } 1042 + 1038 1043 static void ff_layout_reset_read(struct nfs_pgio_header *hdr) 1039 1044 { 1040 1045 struct rpc_task *task = &hdr->task; 1041 1046 1042 1047 pnfs_layoutcommit_inode(hdr->inode, false); 1048 + pnfs_error_mark_layout_for_return(hdr->inode, hdr->lseg); 1043 1049 1044 1050 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 1045 1051 dprintk("%s Reset task %5u for i/o through MDS " ··· 1254 1234 break; 1255 1235 case NFS4ERR_NXIO: 1256 1236 ff_layout_mark_ds_unreachable(lseg, idx); 1237 + /* 1238 + * Don't return the layout if this is a read and we still 1239 + * have layouts to try 1240 + */ 1241 + if (opnum == OP_READ) 1242 + break; 1257 1243 /* Fallthrough */ 1258 1244 default: 1259 1245 pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode, ··· 1273 1247 static int ff_layout_read_done_cb(struct rpc_task *task, 1274 1248 struct nfs_pgio_header *hdr) 1275 1249 { 1276 - int new_idx = hdr->pgio_mirror_idx; 1277 1250 int err; 1278 1251 1279 1252 if (task->tk_status < 0) { ··· 1292 1267 clear_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags); 1293 1268 switch (err) { 1294 1269 case -NFS4ERR_RESET_TO_PNFS: 1295 - if (ff_layout_choose_best_ds_for_read(hdr->lseg, 1296 - hdr->pgio_mirror_idx + 1, 1297 - &new_idx)) 1298 - goto out_layouterror; 1299 1270 set_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags); 1300 1271 return task->tk_status; 1301 1272 case -NFS4ERR_RESET_TO_MDS: ··· 1302 1281 } 1303 1282 1304 1283 return 0; 1305 - out_layouterror: 1306 - ff_layout_read_record_layoutstats_done(task, hdr); 1307 - ff_layout_send_layouterror(hdr->lseg); 1308 - hdr->pgio_mirror_idx = new_idx; 1309 1284 out_eagain: 1310 1285 rpc_restart_call_prepare(task); 1311 1286 return -EAGAIN; ··· 1428 1411 struct nfs_pgio_header *hdr = data; 1429 1412 1430 1413 ff_layout_read_record_layoutstats_done(&hdr->task, hdr); 1431 - if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags)) { 1432 - ff_layout_send_layouterror(hdr->lseg); 1433 - pnfs_read_resend_pnfs(hdr); 1434 - } else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags)) 1414 + if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags)) 1415 + ff_layout_resend_pnfs_read(hdr); 1416 + else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags)) 1435 1417 ff_layout_reset_read(hdr); 1436 1418 pnfs_generic_rw_release(data); 1437 1419 }
+1 -1
fs/nfs/fs_context.c
··· 982 982 /* 983 983 * The legacy version 6 binary mount data from userspace has a 984 984 * field used only to transport selinux information into the 985 - * the kernel. To continue to support that functionality we 985 + * kernel. To continue to support that functionality we 986 986 * have a touch of selinux knowledge here in the NFS code. The 987 987 * userspace code converted context=blah to just blah so we are 988 988 * converting back to the full string selinux understands.
+17 -3
fs/nfs/inode.c
··· 193 193 194 194 return nfs_check_cache_invalid_not_delegated(inode, flags); 195 195 } 196 + EXPORT_SYMBOL_GPL(nfs_check_cache_invalid); 196 197 197 198 static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) 198 199 { ··· 205 204 flags &= ~NFS_INO_INVALID_OTHER; 206 205 flags &= ~(NFS_INO_INVALID_CHANGE 207 206 | NFS_INO_INVALID_SIZE 208 - | NFS_INO_REVAL_PAGECACHE); 207 + | NFS_INO_REVAL_PAGECACHE 208 + | NFS_INO_INVALID_XATTR); 209 209 } 210 210 211 211 if (inode->i_mapping->nrpages == 0) ··· 235 233 | NFS_INO_INVALID_DATA 236 234 | NFS_INO_INVALID_ACCESS 237 235 | NFS_INO_INVALID_ACL 236 + | NFS_INO_INVALID_XATTR 238 237 | NFS_INO_REVAL_PAGECACHE); 239 238 } else 240 239 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR 241 240 | NFS_INO_INVALID_ACCESS 242 241 | NFS_INO_INVALID_ACL 242 + | NFS_INO_INVALID_XATTR 243 243 | NFS_INO_REVAL_PAGECACHE); 244 244 nfs_zap_label_cache_locked(nfsi); 245 245 } ··· 546 542 inode->i_gid = fattr->gid; 547 543 else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP)) 548 544 nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); 545 + if (nfs_server_capable(inode, NFS_CAP_XATTR)) 546 + nfs_set_cache_invalid(inode, NFS_INO_INVALID_XATTR); 549 547 if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) 550 548 inode->i_blocks = fattr->du.nfs2.blocks; 551 549 if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { ··· 800 794 801 795 trace_nfs_getattr_enter(inode); 802 796 803 - if ((query_flags & AT_STATX_DONT_SYNC) && !force_sync) 797 + if ((query_flags & AT_STATX_DONT_SYNC) && !force_sync) { 798 + nfs_readdirplus_parent_cache_hit(path->dentry); 804 799 goto out_no_update; 800 + } 805 801 806 802 /* Flush out writes to the server in order to update c/mtime. */ 807 803 if ((request_mask & (STATX_CTIME|STATX_MTIME)) && ··· 1383 1375 inode_set_iversion_raw(inode, fattr->change_attr); 1384 1376 if (S_ISDIR(inode->i_mode)) 1385 1377 nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA); 1378 + else if (nfs_server_capable(inode, NFS_CAP_XATTR)) 1379 + nfs_set_cache_invalid(inode, NFS_INO_INVALID_XATTR); 1386 1380 } 1387 1381 /* If we have atomic WCC data, we may update some attributes */ 1388 1382 ts = inode->i_ctime; ··· 1902 1892 if (!(have_writers || have_delegation)) { 1903 1893 invalid |= NFS_INO_INVALID_DATA 1904 1894 | NFS_INO_INVALID_ACCESS 1905 - | NFS_INO_INVALID_ACL; 1895 + | NFS_INO_INVALID_ACL 1896 + | NFS_INO_INVALID_XATTR; 1906 1897 /* Force revalidate of all attributes */ 1907 1898 save_cache_validity |= NFS_INO_INVALID_CTIME 1908 1899 | NFS_INO_INVALID_MTIME ··· 2106 2095 #if IS_ENABLED(CONFIG_NFS_V4) 2107 2096 nfsi->nfs4_acl = NULL; 2108 2097 #endif /* CONFIG_NFS_V4 */ 2098 + #ifdef CONFIG_NFS_V4_2 2099 + nfsi->xattr_cache = NULL; 2100 + #endif 2109 2101 return &nfsi->vfs_inode; 2110 2102 } 2111 2103 EXPORT_SYMBOL_GPL(nfs_alloc_inode);
+24
fs/nfs/nfs42.h
··· 6 6 #ifndef __LINUX_FS_NFS_NFS4_2_H 7 7 #define __LINUX_FS_NFS_NFS4_2_H 8 8 9 + #include <linux/xattr.h> 10 + 9 11 /* 10 12 * FIXME: four LAYOUTSTATS calls per compound at most! Do we need to support 11 13 * more? Need to consider not to pre-alloc too much for a compound. ··· 37 35 38 36 return nfs4_check_serverowner_major_id(c_in->cl_serverowner, 39 37 c_out->cl_serverowner); 38 + } 39 + 40 + ssize_t nfs42_proc_getxattr(struct inode *inode, const char *name, 41 + void *buf, size_t buflen); 42 + int nfs42_proc_setxattr(struct inode *inode, const char *name, 43 + const void *buf, size_t buflen, int flags); 44 + ssize_t nfs42_proc_listxattrs(struct inode *inode, void *buf, 45 + size_t buflen, u64 *cookiep, bool *eofp); 46 + int nfs42_proc_removexattr(struct inode *inode, const char *name); 47 + 48 + /* 49 + * Maximum XDR buffer size needed for a listxattr buffer of buflen size. 50 + * 51 + * The upper boundary is a buffer with all 1-byte sized attribute names. 52 + * They would be 7 bytes long in the eventual buffer ("user.x\0"), and 53 + * 8 bytes long XDR-encoded. 54 + * 55 + * Include the trailing eof word as well. 56 + */ 57 + static inline u32 nfs42_listxattr_xdrsize(u32 buflen) 58 + { 59 + return ((buflen / (XATTR_USER_PREFIX_LEN + 2)) * 8) + 4; 40 60 } 41 61 #endif /* CONFIG_NFS_V4_2 */ 42 62 #endif /* __LINUX_FS_NFS_NFS4_2_H */
+256 -2
fs/nfs/nfs42proc.c
··· 17 17 #include "nfs4session.h" 18 18 #include "internal.h" 19 19 #include "delegation.h" 20 + #include "nfs4trace.h" 20 21 21 22 #define NFSDBG_FACILITY NFSDBG_PROC 22 23 static int nfs42_do_offload_cancel_async(struct file *dst, nfs4_stateid *std); ··· 715 714 716 715 switch (task->tk_status) { 717 716 case 0: 718 - break; 717 + return; 719 718 case -NFS4ERR_BADHANDLE: 720 719 case -ESTALE: 721 720 pnfs_destroy_layout(NFS_I(inode)); ··· 761 760 case -EOPNOTSUPP: 762 761 NFS_SERVER(inode)->caps &= ~NFS_CAP_LAYOUTSTATS; 763 762 } 763 + 764 + trace_nfs4_layoutstats(inode, &data->args.stateid, task->tk_status); 764 765 } 765 766 766 767 static void ··· 885 882 886 883 switch (task->tk_status) { 887 884 case 0: 888 - break; 885 + return; 889 886 case -NFS4ERR_BADHANDLE: 890 887 case -ESTALE: 891 888 pnfs_destroy_layout(NFS_I(inode)); ··· 929 926 case -EOPNOTSUPP: 930 927 NFS_SERVER(inode)->caps &= ~NFS_CAP_LAYOUTERROR; 931 928 } 929 + 930 + trace_nfs4_layouterror(inode, &data->args.errors[0].stateid, 931 + task->tk_status); 932 932 } 933 933 934 934 static void ··· 1092 1086 nfs_put_lock_context(dst_lock); 1093 1087 out_put_src_lock: 1094 1088 nfs_put_lock_context(src_lock); 1089 + return err; 1090 + } 1091 + 1092 + #define NFS4XATTR_MAXPAGES DIV_ROUND_UP(XATTR_SIZE_MAX, PAGE_SIZE) 1093 + 1094 + static int _nfs42_proc_removexattr(struct inode *inode, const char *name) 1095 + { 1096 + struct nfs_server *server = NFS_SERVER(inode); 1097 + struct nfs42_removexattrargs args = { 1098 + .fh = NFS_FH(inode), 1099 + .xattr_name = name, 1100 + }; 1101 + struct nfs42_removexattrres res; 1102 + struct rpc_message msg = { 1103 + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVEXATTR], 1104 + .rpc_argp = &args, 1105 + .rpc_resp = &res, 1106 + }; 1107 + int ret; 1108 + unsigned long timestamp = jiffies; 1109 + 1110 + ret = nfs4_call_sync(server->client, server, &msg, &args.seq_args, 1111 + &res.seq_res, 1); 1112 + if (!ret) 1113 + nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0); 1114 + 1115 + return ret; 1116 + } 1117 + 1118 + static int _nfs42_proc_setxattr(struct inode *inode, const char *name, 1119 + const void *buf, size_t buflen, int flags) 1120 + { 1121 + struct nfs_server *server = NFS_SERVER(inode); 1122 + struct page *pages[NFS4XATTR_MAXPAGES]; 1123 + struct nfs42_setxattrargs arg = { 1124 + .fh = NFS_FH(inode), 1125 + .xattr_pages = pages, 1126 + .xattr_len = buflen, 1127 + .xattr_name = name, 1128 + .xattr_flags = flags, 1129 + }; 1130 + struct nfs42_setxattrres res; 1131 + struct rpc_message msg = { 1132 + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETXATTR], 1133 + .rpc_argp = &arg, 1134 + .rpc_resp = &res, 1135 + }; 1136 + int ret, np; 1137 + unsigned long timestamp = jiffies; 1138 + 1139 + if (buflen > server->sxasize) 1140 + return -ERANGE; 1141 + 1142 + if (buflen > 0) { 1143 + np = nfs4_buf_to_pages_noslab(buf, buflen, arg.xattr_pages); 1144 + if (np < 0) 1145 + return np; 1146 + } else 1147 + np = 0; 1148 + 1149 + ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, 1150 + &res.seq_res, 1); 1151 + 1152 + for (; np > 0; np--) 1153 + put_page(pages[np - 1]); 1154 + 1155 + if (!ret) 1156 + nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0); 1157 + 1158 + return ret; 1159 + } 1160 + 1161 + static ssize_t _nfs42_proc_getxattr(struct inode *inode, const char *name, 1162 + void *buf, size_t buflen) 1163 + { 1164 + struct nfs_server *server = NFS_SERVER(inode); 1165 + struct page *pages[NFS4XATTR_MAXPAGES] = {}; 1166 + struct nfs42_getxattrargs arg = { 1167 + .fh = NFS_FH(inode), 1168 + .xattr_pages = pages, 1169 + .xattr_len = buflen, 1170 + .xattr_name = name, 1171 + }; 1172 + struct nfs42_getxattrres res; 1173 + struct rpc_message msg = { 1174 + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETXATTR], 1175 + .rpc_argp = &arg, 1176 + .rpc_resp = &res, 1177 + }; 1178 + int ret, np; 1179 + 1180 + ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, 1181 + &res.seq_res, 0); 1182 + if (ret < 0) 1183 + return ret; 1184 + 1185 + /* 1186 + * Normally, the caching is done one layer up, but for successful 1187 + * RPCS, always cache the result here, even if the caller was 1188 + * just querying the length, or if the reply was too big for 1189 + * the caller. This avoids a second RPC in the case of the 1190 + * common query-alloc-retrieve cycle for xattrs. 1191 + * 1192 + * Note that xattr_len is always capped to XATTR_SIZE_MAX. 1193 + */ 1194 + 1195 + nfs4_xattr_cache_add(inode, name, NULL, pages, res.xattr_len); 1196 + 1197 + if (buflen) { 1198 + if (res.xattr_len > buflen) 1199 + return -ERANGE; 1200 + _copy_from_pages(buf, pages, 0, res.xattr_len); 1201 + } 1202 + 1203 + np = DIV_ROUND_UP(res.xattr_len, PAGE_SIZE); 1204 + while (--np >= 0) 1205 + __free_page(pages[np]); 1206 + 1207 + return res.xattr_len; 1208 + } 1209 + 1210 + static ssize_t _nfs42_proc_listxattrs(struct inode *inode, void *buf, 1211 + size_t buflen, u64 *cookiep, bool *eofp) 1212 + { 1213 + struct nfs_server *server = NFS_SERVER(inode); 1214 + struct page **pages; 1215 + struct nfs42_listxattrsargs arg = { 1216 + .fh = NFS_FH(inode), 1217 + .cookie = *cookiep, 1218 + }; 1219 + struct nfs42_listxattrsres res = { 1220 + .eof = false, 1221 + .xattr_buf = buf, 1222 + .xattr_len = buflen, 1223 + }; 1224 + struct rpc_message msg = { 1225 + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LISTXATTRS], 1226 + .rpc_argp = &arg, 1227 + .rpc_resp = &res, 1228 + }; 1229 + u32 xdrlen; 1230 + int ret, np; 1231 + 1232 + 1233 + res.scratch = alloc_page(GFP_KERNEL); 1234 + if (!res.scratch) 1235 + return -ENOMEM; 1236 + 1237 + xdrlen = nfs42_listxattr_xdrsize(buflen); 1238 + if (xdrlen > server->lxasize) 1239 + xdrlen = server->lxasize; 1240 + np = xdrlen / PAGE_SIZE + 1; 1241 + 1242 + pages = kcalloc(np, sizeof(struct page *), GFP_KERNEL); 1243 + if (pages == NULL) { 1244 + __free_page(res.scratch); 1245 + return -ENOMEM; 1246 + } 1247 + 1248 + arg.xattr_pages = pages; 1249 + arg.count = xdrlen; 1250 + 1251 + ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, 1252 + &res.seq_res, 0); 1253 + 1254 + if (ret >= 0) { 1255 + ret = res.copied; 1256 + *cookiep = res.cookie; 1257 + *eofp = res.eof; 1258 + } 1259 + 1260 + while (--np >= 0) { 1261 + if (pages[np]) 1262 + __free_page(pages[np]); 1263 + } 1264 + 1265 + __free_page(res.scratch); 1266 + kfree(pages); 1267 + 1268 + return ret; 1269 + 1270 + } 1271 + 1272 + ssize_t nfs42_proc_getxattr(struct inode *inode, const char *name, 1273 + void *buf, size_t buflen) 1274 + { 1275 + struct nfs4_exception exception = { }; 1276 + ssize_t err; 1277 + 1278 + do { 1279 + err = _nfs42_proc_getxattr(inode, name, buf, buflen); 1280 + if (err >= 0) 1281 + break; 1282 + err = nfs4_handle_exception(NFS_SERVER(inode), err, 1283 + &exception); 1284 + } while (exception.retry); 1285 + 1286 + return err; 1287 + } 1288 + 1289 + int nfs42_proc_setxattr(struct inode *inode, const char *name, 1290 + const void *buf, size_t buflen, int flags) 1291 + { 1292 + struct nfs4_exception exception = { }; 1293 + int err; 1294 + 1295 + do { 1296 + err = _nfs42_proc_setxattr(inode, name, buf, buflen, flags); 1297 + if (!err) 1298 + break; 1299 + err = nfs4_handle_exception(NFS_SERVER(inode), err, 1300 + &exception); 1301 + } while (exception.retry); 1302 + 1303 + return err; 1304 + } 1305 + 1306 + ssize_t nfs42_proc_listxattrs(struct inode *inode, void *buf, 1307 + size_t buflen, u64 *cookiep, bool *eofp) 1308 + { 1309 + struct nfs4_exception exception = { }; 1310 + ssize_t err; 1311 + 1312 + do { 1313 + err = _nfs42_proc_listxattrs(inode, buf, buflen, 1314 + cookiep, eofp); 1315 + if (err >= 0) 1316 + break; 1317 + err = nfs4_handle_exception(NFS_SERVER(inode), err, 1318 + &exception); 1319 + } while (exception.retry); 1320 + 1321 + return err; 1322 + } 1323 + 1324 + int nfs42_proc_removexattr(struct inode *inode, const char *name) 1325 + { 1326 + struct nfs4_exception exception = { }; 1327 + int err; 1328 + 1329 + do { 1330 + err = _nfs42_proc_removexattr(inode, name); 1331 + if (!err) 1332 + break; 1333 + err = nfs4_handle_exception(NFS_SERVER(inode), err, 1334 + &exception); 1335 + } while (exception.retry); 1336 + 1095 1337 return err; 1096 1338 }
+1056
fs/nfs/nfs42xattr.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + /* 4 + * Copyright 2019, 2020 Amazon.com, Inc. or its affiliates. All rights reserved. 5 + * 6 + * User extended attribute client side cache functions. 7 + * 8 + * Author: Frank van der Linden <fllinden@amazon.com> 9 + */ 10 + #include <linux/errno.h> 11 + #include <linux/nfs_fs.h> 12 + #include <linux/hashtable.h> 13 + #include <linux/refcount.h> 14 + #include <uapi/linux/xattr.h> 15 + 16 + #include "nfs4_fs.h" 17 + #include "internal.h" 18 + 19 + /* 20 + * User extended attributes client side caching is implemented by having 21 + * a cache structure attached to NFS inodes. This structure is allocated 22 + * when needed, and freed when the cache is zapped. 23 + * 24 + * The cache structure contains as hash table of entries, and a pointer 25 + * to a special-cased entry for the listxattr cache. 26 + * 27 + * Accessing and allocating / freeing the caches is done via reference 28 + * counting. The cache entries use a similar refcounting scheme. 29 + * 30 + * This makes freeing a cache, both from the shrinker and from the 31 + * zap cache path, easy. It also means that, in current use cases, 32 + * the large majority of inodes will not waste any memory, as they 33 + * will never have any user extended attributes assigned to them. 34 + * 35 + * Attribute entries are hashed in to a simple hash table. They are 36 + * also part of an LRU. 37 + * 38 + * There are three shrinkers. 39 + * 40 + * Two shrinkers deal with the cache entries themselves: one for 41 + * large entries (> PAGE_SIZE), and one for smaller entries. The 42 + * shrinker for the larger entries works more aggressively than 43 + * those for the smaller entries. 44 + * 45 + * The other shrinker frees the cache structures themselves. 46 + */ 47 + 48 + /* 49 + * 64 buckets is a good default. There is likely no reasonable 50 + * workload that uses more than even 64 user extended attributes. 51 + * You can certainly add a lot more - but you get what you ask for 52 + * in those circumstances. 53 + */ 54 + #define NFS4_XATTR_HASH_SIZE 64 55 + 56 + #define NFSDBG_FACILITY NFSDBG_XATTRCACHE 57 + 58 + struct nfs4_xattr_cache; 59 + struct nfs4_xattr_entry; 60 + 61 + struct nfs4_xattr_bucket { 62 + spinlock_t lock; 63 + struct hlist_head hlist; 64 + struct nfs4_xattr_cache *cache; 65 + bool draining; 66 + }; 67 + 68 + struct nfs4_xattr_cache { 69 + struct kref ref; 70 + spinlock_t hash_lock; /* protects hashtable and lru */ 71 + struct nfs4_xattr_bucket buckets[NFS4_XATTR_HASH_SIZE]; 72 + struct list_head lru; 73 + struct list_head dispose; 74 + atomic_long_t nent; 75 + spinlock_t listxattr_lock; 76 + struct inode *inode; 77 + struct nfs4_xattr_entry *listxattr; 78 + }; 79 + 80 + struct nfs4_xattr_entry { 81 + struct kref ref; 82 + struct hlist_node hnode; 83 + struct list_head lru; 84 + struct list_head dispose; 85 + char *xattr_name; 86 + void *xattr_value; 87 + size_t xattr_size; 88 + struct nfs4_xattr_bucket *bucket; 89 + uint32_t flags; 90 + }; 91 + 92 + #define NFS4_XATTR_ENTRY_EXTVAL 0x0001 93 + 94 + /* 95 + * LRU list of NFS inodes that have xattr caches. 96 + */ 97 + static struct list_lru nfs4_xattr_cache_lru; 98 + static struct list_lru nfs4_xattr_entry_lru; 99 + static struct list_lru nfs4_xattr_large_entry_lru; 100 + 101 + static struct kmem_cache *nfs4_xattr_cache_cachep; 102 + 103 + /* 104 + * Hashing helper functions. 105 + */ 106 + static void 107 + nfs4_xattr_hash_init(struct nfs4_xattr_cache *cache) 108 + { 109 + unsigned int i; 110 + 111 + for (i = 0; i < NFS4_XATTR_HASH_SIZE; i++) { 112 + INIT_HLIST_HEAD(&cache->buckets[i].hlist); 113 + spin_lock_init(&cache->buckets[i].lock); 114 + cache->buckets[i].cache = cache; 115 + cache->buckets[i].draining = false; 116 + } 117 + } 118 + 119 + /* 120 + * Locking order: 121 + * 1. inode i_lock or bucket lock 122 + * 2. list_lru lock (taken by list_lru_* functions) 123 + */ 124 + 125 + /* 126 + * Wrapper functions to add a cache entry to the right LRU. 127 + */ 128 + static bool 129 + nfs4_xattr_entry_lru_add(struct nfs4_xattr_entry *entry) 130 + { 131 + struct list_lru *lru; 132 + 133 + lru = (entry->flags & NFS4_XATTR_ENTRY_EXTVAL) ? 134 + &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru; 135 + 136 + return list_lru_add(lru, &entry->lru); 137 + } 138 + 139 + static bool 140 + nfs4_xattr_entry_lru_del(struct nfs4_xattr_entry *entry) 141 + { 142 + struct list_lru *lru; 143 + 144 + lru = (entry->flags & NFS4_XATTR_ENTRY_EXTVAL) ? 145 + &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru; 146 + 147 + return list_lru_del(lru, &entry->lru); 148 + } 149 + 150 + /* 151 + * This function allocates cache entries. They are the normal 152 + * extended attribute name/value pairs, but may also be a listxattr 153 + * cache. Those allocations use the same entry so that they can be 154 + * treated as one by the memory shrinker. 155 + * 156 + * xattr cache entries are allocated together with names. If the 157 + * value fits in to one page with the entry structure and the name, 158 + * it will also be part of the same allocation (kmalloc). This is 159 + * expected to be the vast majority of cases. Larger allocations 160 + * have a value pointer that is allocated separately by kvmalloc. 161 + * 162 + * Parameters: 163 + * 164 + * @name: Name of the extended attribute. NULL for listxattr cache 165 + * entry. 166 + * @value: Value of attribute, or listxattr cache. NULL if the 167 + * value is to be copied from pages instead. 168 + * @pages: Pages to copy the value from, if not NULL. Passed in to 169 + * make it easier to copy the value after an RPC, even if 170 + * the value will not be passed up to application (e.g. 171 + * for a 'query' getxattr with NULL buffer). 172 + * @len: Length of the value. Can be 0 for zero-length attribues. 173 + * @value and @pages will be NULL if @len is 0. 174 + */ 175 + static struct nfs4_xattr_entry * 176 + nfs4_xattr_alloc_entry(const char *name, const void *value, 177 + struct page **pages, size_t len) 178 + { 179 + struct nfs4_xattr_entry *entry; 180 + void *valp; 181 + char *namep; 182 + size_t alloclen, slen; 183 + char *buf; 184 + uint32_t flags; 185 + 186 + BUILD_BUG_ON(sizeof(struct nfs4_xattr_entry) + 187 + XATTR_NAME_MAX + 1 > PAGE_SIZE); 188 + 189 + alloclen = sizeof(struct nfs4_xattr_entry); 190 + if (name != NULL) { 191 + slen = strlen(name) + 1; 192 + alloclen += slen; 193 + } else 194 + slen = 0; 195 + 196 + if (alloclen + len <= PAGE_SIZE) { 197 + alloclen += len; 198 + flags = 0; 199 + } else { 200 + flags = NFS4_XATTR_ENTRY_EXTVAL; 201 + } 202 + 203 + buf = kmalloc(alloclen, GFP_KERNEL_ACCOUNT | GFP_NOFS); 204 + if (buf == NULL) 205 + return NULL; 206 + entry = (struct nfs4_xattr_entry *)buf; 207 + 208 + if (name != NULL) { 209 + namep = buf + sizeof(struct nfs4_xattr_entry); 210 + memcpy(namep, name, slen); 211 + } else { 212 + namep = NULL; 213 + } 214 + 215 + 216 + if (flags & NFS4_XATTR_ENTRY_EXTVAL) { 217 + valp = kvmalloc(len, GFP_KERNEL_ACCOUNT | GFP_NOFS); 218 + if (valp == NULL) { 219 + kfree(buf); 220 + return NULL; 221 + } 222 + } else if (len != 0) { 223 + valp = buf + sizeof(struct nfs4_xattr_entry) + slen; 224 + } else 225 + valp = NULL; 226 + 227 + if (valp != NULL) { 228 + if (value != NULL) 229 + memcpy(valp, value, len); 230 + else 231 + _copy_from_pages(valp, pages, 0, len); 232 + } 233 + 234 + entry->flags = flags; 235 + entry->xattr_value = valp; 236 + kref_init(&entry->ref); 237 + entry->xattr_name = namep; 238 + entry->xattr_size = len; 239 + entry->bucket = NULL; 240 + INIT_LIST_HEAD(&entry->lru); 241 + INIT_LIST_HEAD(&entry->dispose); 242 + INIT_HLIST_NODE(&entry->hnode); 243 + 244 + return entry; 245 + } 246 + 247 + static void 248 + nfs4_xattr_free_entry(struct nfs4_xattr_entry *entry) 249 + { 250 + if (entry->flags & NFS4_XATTR_ENTRY_EXTVAL) 251 + kvfree(entry->xattr_value); 252 + kfree(entry); 253 + } 254 + 255 + static void 256 + nfs4_xattr_free_entry_cb(struct kref *kref) 257 + { 258 + struct nfs4_xattr_entry *entry; 259 + 260 + entry = container_of(kref, struct nfs4_xattr_entry, ref); 261 + 262 + if (WARN_ON(!list_empty(&entry->lru))) 263 + return; 264 + 265 + nfs4_xattr_free_entry(entry); 266 + } 267 + 268 + static void 269 + nfs4_xattr_free_cache_cb(struct kref *kref) 270 + { 271 + struct nfs4_xattr_cache *cache; 272 + int i; 273 + 274 + cache = container_of(kref, struct nfs4_xattr_cache, ref); 275 + 276 + for (i = 0; i < NFS4_XATTR_HASH_SIZE; i++) { 277 + if (WARN_ON(!hlist_empty(&cache->buckets[i].hlist))) 278 + return; 279 + cache->buckets[i].draining = false; 280 + } 281 + 282 + cache->listxattr = NULL; 283 + 284 + kmem_cache_free(nfs4_xattr_cache_cachep, cache); 285 + 286 + } 287 + 288 + static struct nfs4_xattr_cache * 289 + nfs4_xattr_alloc_cache(void) 290 + { 291 + struct nfs4_xattr_cache *cache; 292 + 293 + cache = kmem_cache_alloc(nfs4_xattr_cache_cachep, 294 + GFP_KERNEL_ACCOUNT | GFP_NOFS); 295 + if (cache == NULL) 296 + return NULL; 297 + 298 + kref_init(&cache->ref); 299 + atomic_long_set(&cache->nent, 0); 300 + 301 + return cache; 302 + } 303 + 304 + /* 305 + * Set the listxattr cache, which is a special-cased cache entry. 306 + * The special value ERR_PTR(-ESTALE) is used to indicate that 307 + * the cache is being drained - this prevents a new listxattr 308 + * cache from being added to what is now a stale cache. 309 + */ 310 + static int 311 + nfs4_xattr_set_listcache(struct nfs4_xattr_cache *cache, 312 + struct nfs4_xattr_entry *new) 313 + { 314 + struct nfs4_xattr_entry *old; 315 + int ret = 1; 316 + 317 + spin_lock(&cache->listxattr_lock); 318 + 319 + old = cache->listxattr; 320 + 321 + if (old == ERR_PTR(-ESTALE)) { 322 + ret = 0; 323 + goto out; 324 + } 325 + 326 + cache->listxattr = new; 327 + if (new != NULL && new != ERR_PTR(-ESTALE)) 328 + nfs4_xattr_entry_lru_add(new); 329 + 330 + if (old != NULL) { 331 + nfs4_xattr_entry_lru_del(old); 332 + kref_put(&old->ref, nfs4_xattr_free_entry_cb); 333 + } 334 + out: 335 + spin_unlock(&cache->listxattr_lock); 336 + 337 + return ret; 338 + } 339 + 340 + /* 341 + * Unlink a cache from its parent inode, clearing out an invalid 342 + * cache. Must be called with i_lock held. 343 + */ 344 + static struct nfs4_xattr_cache * 345 + nfs4_xattr_cache_unlink(struct inode *inode) 346 + { 347 + struct nfs_inode *nfsi; 348 + struct nfs4_xattr_cache *oldcache; 349 + 350 + nfsi = NFS_I(inode); 351 + 352 + oldcache = nfsi->xattr_cache; 353 + if (oldcache != NULL) { 354 + list_lru_del(&nfs4_xattr_cache_lru, &oldcache->lru); 355 + oldcache->inode = NULL; 356 + } 357 + nfsi->xattr_cache = NULL; 358 + nfsi->cache_validity &= ~NFS_INO_INVALID_XATTR; 359 + 360 + return oldcache; 361 + 362 + } 363 + 364 + /* 365 + * Discard a cache. Called by get_cache() if there was an old, 366 + * invalid cache. Can also be called from a shrinker callback. 367 + * 368 + * The cache is dead, it has already been unlinked from its inode, 369 + * and no longer appears on the cache LRU list. 370 + * 371 + * Mark all buckets as draining, so that no new entries are added. This 372 + * could still happen in the unlikely, but possible case that another 373 + * thread had grabbed a reference before it was unlinked from the inode, 374 + * and is still holding it for an add operation. 375 + * 376 + * Remove all entries from the LRU lists, so that there is no longer 377 + * any way to 'find' this cache. Then, remove the entries from the hash 378 + * table. 379 + * 380 + * At that point, the cache will remain empty and can be freed when the final 381 + * reference drops, which is very likely the kref_put at the end of 382 + * this function, or the one called immediately afterwards in the 383 + * shrinker callback. 384 + */ 385 + static void 386 + nfs4_xattr_discard_cache(struct nfs4_xattr_cache *cache) 387 + { 388 + unsigned int i; 389 + struct nfs4_xattr_entry *entry; 390 + struct nfs4_xattr_bucket *bucket; 391 + struct hlist_node *n; 392 + 393 + nfs4_xattr_set_listcache(cache, ERR_PTR(-ESTALE)); 394 + 395 + for (i = 0; i < NFS4_XATTR_HASH_SIZE; i++) { 396 + bucket = &cache->buckets[i]; 397 + 398 + spin_lock(&bucket->lock); 399 + bucket->draining = true; 400 + hlist_for_each_entry_safe(entry, n, &bucket->hlist, hnode) { 401 + nfs4_xattr_entry_lru_del(entry); 402 + hlist_del_init(&entry->hnode); 403 + kref_put(&entry->ref, nfs4_xattr_free_entry_cb); 404 + } 405 + spin_unlock(&bucket->lock); 406 + } 407 + 408 + atomic_long_set(&cache->nent, 0); 409 + 410 + kref_put(&cache->ref, nfs4_xattr_free_cache_cb); 411 + } 412 + 413 + /* 414 + * Get a referenced copy of the cache structure. Avoid doing allocs 415 + * while holding i_lock. Which means that we do some optimistic allocation, 416 + * and might have to free the result in rare cases. 417 + * 418 + * This function only checks the NFS_INO_INVALID_XATTR cache validity bit 419 + * and acts accordingly, replacing the cache when needed. For the read case 420 + * (!add), this means that the caller must make sure that the cache 421 + * is valid before caling this function. getxattr and listxattr call 422 + * revalidate_inode to do this. The attribute cache timeout (for the 423 + * non-delegated case) is expected to be dealt with in the revalidate 424 + * call. 425 + */ 426 + 427 + static struct nfs4_xattr_cache * 428 + nfs4_xattr_get_cache(struct inode *inode, int add) 429 + { 430 + struct nfs_inode *nfsi; 431 + struct nfs4_xattr_cache *cache, *oldcache, *newcache; 432 + 433 + nfsi = NFS_I(inode); 434 + 435 + cache = oldcache = NULL; 436 + 437 + spin_lock(&inode->i_lock); 438 + 439 + if (nfsi->cache_validity & NFS_INO_INVALID_XATTR) 440 + oldcache = nfs4_xattr_cache_unlink(inode); 441 + else 442 + cache = nfsi->xattr_cache; 443 + 444 + if (cache != NULL) 445 + kref_get(&cache->ref); 446 + 447 + spin_unlock(&inode->i_lock); 448 + 449 + if (add && cache == NULL) { 450 + newcache = NULL; 451 + 452 + cache = nfs4_xattr_alloc_cache(); 453 + if (cache == NULL) 454 + goto out; 455 + 456 + spin_lock(&inode->i_lock); 457 + if (nfsi->cache_validity & NFS_INO_INVALID_XATTR) { 458 + /* 459 + * The cache was invalidated again. Give up, 460 + * since what we want to enter is now likely 461 + * outdated anyway. 462 + */ 463 + spin_unlock(&inode->i_lock); 464 + kref_put(&cache->ref, nfs4_xattr_free_cache_cb); 465 + cache = NULL; 466 + goto out; 467 + } 468 + 469 + /* 470 + * Check if someone beat us to it. 471 + */ 472 + if (nfsi->xattr_cache != NULL) { 473 + newcache = nfsi->xattr_cache; 474 + kref_get(&newcache->ref); 475 + } else { 476 + kref_get(&cache->ref); 477 + nfsi->xattr_cache = cache; 478 + cache->inode = inode; 479 + list_lru_add(&nfs4_xattr_cache_lru, &cache->lru); 480 + } 481 + 482 + spin_unlock(&inode->i_lock); 483 + 484 + /* 485 + * If there was a race, throw away the cache we just 486 + * allocated, and use the new one allocated by someone 487 + * else. 488 + */ 489 + if (newcache != NULL) { 490 + kref_put(&cache->ref, nfs4_xattr_free_cache_cb); 491 + cache = newcache; 492 + } 493 + } 494 + 495 + out: 496 + /* 497 + * Discard the now orphaned old cache. 498 + */ 499 + if (oldcache != NULL) 500 + nfs4_xattr_discard_cache(oldcache); 501 + 502 + return cache; 503 + } 504 + 505 + static inline struct nfs4_xattr_bucket * 506 + nfs4_xattr_hash_bucket(struct nfs4_xattr_cache *cache, const char *name) 507 + { 508 + return &cache->buckets[jhash(name, strlen(name), 0) & 509 + (ARRAY_SIZE(cache->buckets) - 1)]; 510 + } 511 + 512 + static struct nfs4_xattr_entry * 513 + nfs4_xattr_get_entry(struct nfs4_xattr_bucket *bucket, const char *name) 514 + { 515 + struct nfs4_xattr_entry *entry; 516 + 517 + entry = NULL; 518 + 519 + hlist_for_each_entry(entry, &bucket->hlist, hnode) { 520 + if (!strcmp(entry->xattr_name, name)) 521 + break; 522 + } 523 + 524 + return entry; 525 + } 526 + 527 + static int 528 + nfs4_xattr_hash_add(struct nfs4_xattr_cache *cache, 529 + struct nfs4_xattr_entry *entry) 530 + { 531 + struct nfs4_xattr_bucket *bucket; 532 + struct nfs4_xattr_entry *oldentry = NULL; 533 + int ret = 1; 534 + 535 + bucket = nfs4_xattr_hash_bucket(cache, entry->xattr_name); 536 + entry->bucket = bucket; 537 + 538 + spin_lock(&bucket->lock); 539 + 540 + if (bucket->draining) { 541 + ret = 0; 542 + goto out; 543 + } 544 + 545 + oldentry = nfs4_xattr_get_entry(bucket, entry->xattr_name); 546 + if (oldentry != NULL) { 547 + hlist_del_init(&oldentry->hnode); 548 + nfs4_xattr_entry_lru_del(oldentry); 549 + } else { 550 + atomic_long_inc(&cache->nent); 551 + } 552 + 553 + hlist_add_head(&entry->hnode, &bucket->hlist); 554 + nfs4_xattr_entry_lru_add(entry); 555 + 556 + out: 557 + spin_unlock(&bucket->lock); 558 + 559 + if (oldentry != NULL) 560 + kref_put(&oldentry->ref, nfs4_xattr_free_entry_cb); 561 + 562 + return ret; 563 + } 564 + 565 + static void 566 + nfs4_xattr_hash_remove(struct nfs4_xattr_cache *cache, const char *name) 567 + { 568 + struct nfs4_xattr_bucket *bucket; 569 + struct nfs4_xattr_entry *entry; 570 + 571 + bucket = nfs4_xattr_hash_bucket(cache, name); 572 + 573 + spin_lock(&bucket->lock); 574 + 575 + entry = nfs4_xattr_get_entry(bucket, name); 576 + if (entry != NULL) { 577 + hlist_del_init(&entry->hnode); 578 + nfs4_xattr_entry_lru_del(entry); 579 + atomic_long_dec(&cache->nent); 580 + } 581 + 582 + spin_unlock(&bucket->lock); 583 + 584 + if (entry != NULL) 585 + kref_put(&entry->ref, nfs4_xattr_free_entry_cb); 586 + } 587 + 588 + static struct nfs4_xattr_entry * 589 + nfs4_xattr_hash_find(struct nfs4_xattr_cache *cache, const char *name) 590 + { 591 + struct nfs4_xattr_bucket *bucket; 592 + struct nfs4_xattr_entry *entry; 593 + 594 + bucket = nfs4_xattr_hash_bucket(cache, name); 595 + 596 + spin_lock(&bucket->lock); 597 + 598 + entry = nfs4_xattr_get_entry(bucket, name); 599 + if (entry != NULL) 600 + kref_get(&entry->ref); 601 + 602 + spin_unlock(&bucket->lock); 603 + 604 + return entry; 605 + } 606 + 607 + /* 608 + * Entry point to retrieve an entry from the cache. 609 + */ 610 + ssize_t nfs4_xattr_cache_get(struct inode *inode, const char *name, char *buf, 611 + ssize_t buflen) 612 + { 613 + struct nfs4_xattr_cache *cache; 614 + struct nfs4_xattr_entry *entry; 615 + ssize_t ret; 616 + 617 + cache = nfs4_xattr_get_cache(inode, 0); 618 + if (cache == NULL) 619 + return -ENOENT; 620 + 621 + ret = 0; 622 + entry = nfs4_xattr_hash_find(cache, name); 623 + 624 + if (entry != NULL) { 625 + dprintk("%s: cache hit '%s', len %lu\n", __func__, 626 + entry->xattr_name, (unsigned long)entry->xattr_size); 627 + if (buflen == 0) { 628 + /* Length probe only */ 629 + ret = entry->xattr_size; 630 + } else if (buflen < entry->xattr_size) 631 + ret = -ERANGE; 632 + else { 633 + memcpy(buf, entry->xattr_value, entry->xattr_size); 634 + ret = entry->xattr_size; 635 + } 636 + kref_put(&entry->ref, nfs4_xattr_free_entry_cb); 637 + } else { 638 + dprintk("%s: cache miss '%s'\n", __func__, name); 639 + ret = -ENOENT; 640 + } 641 + 642 + kref_put(&cache->ref, nfs4_xattr_free_cache_cb); 643 + 644 + return ret; 645 + } 646 + 647 + /* 648 + * Retrieve a cached list of xattrs from the cache. 649 + */ 650 + ssize_t nfs4_xattr_cache_list(struct inode *inode, char *buf, ssize_t buflen) 651 + { 652 + struct nfs4_xattr_cache *cache; 653 + struct nfs4_xattr_entry *entry; 654 + ssize_t ret; 655 + 656 + cache = nfs4_xattr_get_cache(inode, 0); 657 + if (cache == NULL) 658 + return -ENOENT; 659 + 660 + spin_lock(&cache->listxattr_lock); 661 + 662 + entry = cache->listxattr; 663 + 664 + if (entry != NULL && entry != ERR_PTR(-ESTALE)) { 665 + if (buflen == 0) { 666 + /* Length probe only */ 667 + ret = entry->xattr_size; 668 + } else if (entry->xattr_size > buflen) 669 + ret = -ERANGE; 670 + else { 671 + memcpy(buf, entry->xattr_value, entry->xattr_size); 672 + ret = entry->xattr_size; 673 + } 674 + } else { 675 + ret = -ENOENT; 676 + } 677 + 678 + spin_unlock(&cache->listxattr_lock); 679 + 680 + kref_put(&cache->ref, nfs4_xattr_free_cache_cb); 681 + 682 + return ret; 683 + } 684 + 685 + /* 686 + * Add an xattr to the cache. 687 + * 688 + * This also invalidates the xattr list cache. 689 + */ 690 + void nfs4_xattr_cache_add(struct inode *inode, const char *name, 691 + const char *buf, struct page **pages, ssize_t buflen) 692 + { 693 + struct nfs4_xattr_cache *cache; 694 + struct nfs4_xattr_entry *entry; 695 + 696 + dprintk("%s: add '%s' len %lu\n", __func__, 697 + name, (unsigned long)buflen); 698 + 699 + cache = nfs4_xattr_get_cache(inode, 1); 700 + if (cache == NULL) 701 + return; 702 + 703 + entry = nfs4_xattr_alloc_entry(name, buf, pages, buflen); 704 + if (entry == NULL) 705 + goto out; 706 + 707 + (void)nfs4_xattr_set_listcache(cache, NULL); 708 + 709 + if (!nfs4_xattr_hash_add(cache, entry)) 710 + kref_put(&entry->ref, nfs4_xattr_free_entry_cb); 711 + 712 + out: 713 + kref_put(&cache->ref, nfs4_xattr_free_cache_cb); 714 + } 715 + 716 + 717 + /* 718 + * Remove an xattr from the cache. 719 + * 720 + * This also invalidates the xattr list cache. 721 + */ 722 + void nfs4_xattr_cache_remove(struct inode *inode, const char *name) 723 + { 724 + struct nfs4_xattr_cache *cache; 725 + 726 + dprintk("%s: remove '%s'\n", __func__, name); 727 + 728 + cache = nfs4_xattr_get_cache(inode, 0); 729 + if (cache == NULL) 730 + return; 731 + 732 + (void)nfs4_xattr_set_listcache(cache, NULL); 733 + nfs4_xattr_hash_remove(cache, name); 734 + 735 + kref_put(&cache->ref, nfs4_xattr_free_cache_cb); 736 + } 737 + 738 + /* 739 + * Cache listxattr output, replacing any possible old one. 740 + */ 741 + void nfs4_xattr_cache_set_list(struct inode *inode, const char *buf, 742 + ssize_t buflen) 743 + { 744 + struct nfs4_xattr_cache *cache; 745 + struct nfs4_xattr_entry *entry; 746 + 747 + cache = nfs4_xattr_get_cache(inode, 1); 748 + if (cache == NULL) 749 + return; 750 + 751 + entry = nfs4_xattr_alloc_entry(NULL, buf, NULL, buflen); 752 + if (entry == NULL) 753 + goto out; 754 + 755 + /* 756 + * This is just there to be able to get to bucket->cache, 757 + * which is obviously the same for all buckets, so just 758 + * use bucket 0. 759 + */ 760 + entry->bucket = &cache->buckets[0]; 761 + 762 + if (!nfs4_xattr_set_listcache(cache, entry)) 763 + kref_put(&entry->ref, nfs4_xattr_free_entry_cb); 764 + 765 + out: 766 + kref_put(&cache->ref, nfs4_xattr_free_cache_cb); 767 + } 768 + 769 + /* 770 + * Zap the entire cache. Called when an inode is evicted. 771 + */ 772 + void nfs4_xattr_cache_zap(struct inode *inode) 773 + { 774 + struct nfs4_xattr_cache *oldcache; 775 + 776 + spin_lock(&inode->i_lock); 777 + oldcache = nfs4_xattr_cache_unlink(inode); 778 + spin_unlock(&inode->i_lock); 779 + 780 + if (oldcache) 781 + nfs4_xattr_discard_cache(oldcache); 782 + } 783 + 784 + /* 785 + * The entry LRU is shrunk more aggressively than the cache LRU, 786 + * by settings @seeks to 1. 787 + * 788 + * Cache structures are freed only when they've become empty, after 789 + * pruning all but one entry. 790 + */ 791 + 792 + static unsigned long nfs4_xattr_cache_count(struct shrinker *shrink, 793 + struct shrink_control *sc); 794 + static unsigned long nfs4_xattr_entry_count(struct shrinker *shrink, 795 + struct shrink_control *sc); 796 + static unsigned long nfs4_xattr_cache_scan(struct shrinker *shrink, 797 + struct shrink_control *sc); 798 + static unsigned long nfs4_xattr_entry_scan(struct shrinker *shrink, 799 + struct shrink_control *sc); 800 + 801 + static struct shrinker nfs4_xattr_cache_shrinker = { 802 + .count_objects = nfs4_xattr_cache_count, 803 + .scan_objects = nfs4_xattr_cache_scan, 804 + .seeks = DEFAULT_SEEKS, 805 + .flags = SHRINKER_MEMCG_AWARE, 806 + }; 807 + 808 + static struct shrinker nfs4_xattr_entry_shrinker = { 809 + .count_objects = nfs4_xattr_entry_count, 810 + .scan_objects = nfs4_xattr_entry_scan, 811 + .seeks = DEFAULT_SEEKS, 812 + .batch = 512, 813 + .flags = SHRINKER_MEMCG_AWARE, 814 + }; 815 + 816 + static struct shrinker nfs4_xattr_large_entry_shrinker = { 817 + .count_objects = nfs4_xattr_entry_count, 818 + .scan_objects = nfs4_xattr_entry_scan, 819 + .seeks = 1, 820 + .batch = 512, 821 + .flags = SHRINKER_MEMCG_AWARE, 822 + }; 823 + 824 + static enum lru_status 825 + cache_lru_isolate(struct list_head *item, 826 + struct list_lru_one *lru, spinlock_t *lru_lock, void *arg) 827 + { 828 + struct list_head *dispose = arg; 829 + struct inode *inode; 830 + struct nfs4_xattr_cache *cache = container_of(item, 831 + struct nfs4_xattr_cache, lru); 832 + 833 + if (atomic_long_read(&cache->nent) > 1) 834 + return LRU_SKIP; 835 + 836 + /* 837 + * If a cache structure is on the LRU list, we know that 838 + * its inode is valid. Try to lock it to break the link. 839 + * Since we're inverting the lock order here, only try. 840 + */ 841 + inode = cache->inode; 842 + 843 + if (!spin_trylock(&inode->i_lock)) 844 + return LRU_SKIP; 845 + 846 + kref_get(&cache->ref); 847 + 848 + cache->inode = NULL; 849 + NFS_I(inode)->xattr_cache = NULL; 850 + NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_XATTR; 851 + list_lru_isolate(lru, &cache->lru); 852 + 853 + spin_unlock(&inode->i_lock); 854 + 855 + list_add_tail(&cache->dispose, dispose); 856 + return LRU_REMOVED; 857 + } 858 + 859 + static unsigned long 860 + nfs4_xattr_cache_scan(struct shrinker *shrink, struct shrink_control *sc) 861 + { 862 + LIST_HEAD(dispose); 863 + unsigned long freed; 864 + struct nfs4_xattr_cache *cache; 865 + 866 + freed = list_lru_shrink_walk(&nfs4_xattr_cache_lru, sc, 867 + cache_lru_isolate, &dispose); 868 + while (!list_empty(&dispose)) { 869 + cache = list_first_entry(&dispose, struct nfs4_xattr_cache, 870 + dispose); 871 + list_del_init(&cache->dispose); 872 + nfs4_xattr_discard_cache(cache); 873 + kref_put(&cache->ref, nfs4_xattr_free_cache_cb); 874 + } 875 + 876 + return freed; 877 + } 878 + 879 + 880 + static unsigned long 881 + nfs4_xattr_cache_count(struct shrinker *shrink, struct shrink_control *sc) 882 + { 883 + unsigned long count; 884 + 885 + count = list_lru_count(&nfs4_xattr_cache_lru); 886 + return vfs_pressure_ratio(count); 887 + } 888 + 889 + static enum lru_status 890 + entry_lru_isolate(struct list_head *item, 891 + struct list_lru_one *lru, spinlock_t *lru_lock, void *arg) 892 + { 893 + struct list_head *dispose = arg; 894 + struct nfs4_xattr_bucket *bucket; 895 + struct nfs4_xattr_cache *cache; 896 + struct nfs4_xattr_entry *entry = container_of(item, 897 + struct nfs4_xattr_entry, lru); 898 + 899 + bucket = entry->bucket; 900 + cache = bucket->cache; 901 + 902 + /* 903 + * Unhook the entry from its parent (either a cache bucket 904 + * or a cache structure if it's a listxattr buf), so that 905 + * it's no longer found. Then add it to the isolate list, 906 + * to be freed later. 907 + * 908 + * In both cases, we're reverting lock order, so use 909 + * trylock and skip the entry if we can't get the lock. 910 + */ 911 + if (entry->xattr_name != NULL) { 912 + /* Regular cache entry */ 913 + if (!spin_trylock(&bucket->lock)) 914 + return LRU_SKIP; 915 + 916 + kref_get(&entry->ref); 917 + 918 + hlist_del_init(&entry->hnode); 919 + atomic_long_dec(&cache->nent); 920 + list_lru_isolate(lru, &entry->lru); 921 + 922 + spin_unlock(&bucket->lock); 923 + } else { 924 + /* Listxattr cache entry */ 925 + if (!spin_trylock(&cache->listxattr_lock)) 926 + return LRU_SKIP; 927 + 928 + kref_get(&entry->ref); 929 + 930 + cache->listxattr = NULL; 931 + list_lru_isolate(lru, &entry->lru); 932 + 933 + spin_unlock(&cache->listxattr_lock); 934 + } 935 + 936 + list_add_tail(&entry->dispose, dispose); 937 + return LRU_REMOVED; 938 + } 939 + 940 + static unsigned long 941 + nfs4_xattr_entry_scan(struct shrinker *shrink, struct shrink_control *sc) 942 + { 943 + LIST_HEAD(dispose); 944 + unsigned long freed; 945 + struct nfs4_xattr_entry *entry; 946 + struct list_lru *lru; 947 + 948 + lru = (shrink == &nfs4_xattr_large_entry_shrinker) ? 949 + &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru; 950 + 951 + freed = list_lru_shrink_walk(lru, sc, entry_lru_isolate, &dispose); 952 + 953 + while (!list_empty(&dispose)) { 954 + entry = list_first_entry(&dispose, struct nfs4_xattr_entry, 955 + dispose); 956 + list_del_init(&entry->dispose); 957 + 958 + /* 959 + * Drop two references: the one that we just grabbed 960 + * in entry_lru_isolate, and the one that was set 961 + * when the entry was first allocated. 962 + */ 963 + kref_put(&entry->ref, nfs4_xattr_free_entry_cb); 964 + kref_put(&entry->ref, nfs4_xattr_free_entry_cb); 965 + } 966 + 967 + return freed; 968 + } 969 + 970 + static unsigned long 971 + nfs4_xattr_entry_count(struct shrinker *shrink, struct shrink_control *sc) 972 + { 973 + unsigned long count; 974 + struct list_lru *lru; 975 + 976 + lru = (shrink == &nfs4_xattr_large_entry_shrinker) ? 977 + &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru; 978 + 979 + count = list_lru_count(lru); 980 + return vfs_pressure_ratio(count); 981 + } 982 + 983 + 984 + static void nfs4_xattr_cache_init_once(void *p) 985 + { 986 + struct nfs4_xattr_cache *cache = (struct nfs4_xattr_cache *)p; 987 + 988 + spin_lock_init(&cache->listxattr_lock); 989 + atomic_long_set(&cache->nent, 0); 990 + nfs4_xattr_hash_init(cache); 991 + cache->listxattr = NULL; 992 + INIT_LIST_HEAD(&cache->lru); 993 + INIT_LIST_HEAD(&cache->dispose); 994 + } 995 + 996 + int __init nfs4_xattr_cache_init(void) 997 + { 998 + int ret = 0; 999 + 1000 + nfs4_xattr_cache_cachep = kmem_cache_create("nfs4_xattr_cache_cache", 1001 + sizeof(struct nfs4_xattr_cache), 0, 1002 + (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT), 1003 + nfs4_xattr_cache_init_once); 1004 + if (nfs4_xattr_cache_cachep == NULL) 1005 + return -ENOMEM; 1006 + 1007 + ret = list_lru_init_memcg(&nfs4_xattr_large_entry_lru, 1008 + &nfs4_xattr_large_entry_shrinker); 1009 + if (ret) 1010 + goto out4; 1011 + 1012 + ret = list_lru_init_memcg(&nfs4_xattr_entry_lru, 1013 + &nfs4_xattr_entry_shrinker); 1014 + if (ret) 1015 + goto out3; 1016 + 1017 + ret = list_lru_init_memcg(&nfs4_xattr_cache_lru, 1018 + &nfs4_xattr_cache_shrinker); 1019 + if (ret) 1020 + goto out2; 1021 + 1022 + ret = register_shrinker(&nfs4_xattr_cache_shrinker); 1023 + if (ret) 1024 + goto out1; 1025 + 1026 + ret = register_shrinker(&nfs4_xattr_entry_shrinker); 1027 + if (ret) 1028 + goto out; 1029 + 1030 + ret = register_shrinker(&nfs4_xattr_large_entry_shrinker); 1031 + if (!ret) 1032 + return 0; 1033 + 1034 + unregister_shrinker(&nfs4_xattr_entry_shrinker); 1035 + out: 1036 + unregister_shrinker(&nfs4_xattr_cache_shrinker); 1037 + out1: 1038 + list_lru_destroy(&nfs4_xattr_cache_lru); 1039 + out2: 1040 + list_lru_destroy(&nfs4_xattr_entry_lru); 1041 + out3: 1042 + list_lru_destroy(&nfs4_xattr_large_entry_lru); 1043 + out4: 1044 + kmem_cache_destroy(nfs4_xattr_cache_cachep); 1045 + 1046 + return ret; 1047 + } 1048 + 1049 + void nfs4_xattr_cache_exit(void) 1050 + { 1051 + unregister_shrinker(&nfs4_xattr_entry_shrinker); 1052 + unregister_shrinker(&nfs4_xattr_cache_shrinker); 1053 + list_lru_destroy(&nfs4_xattr_entry_lru); 1054 + list_lru_destroy(&nfs4_xattr_cache_lru); 1055 + kmem_cache_destroy(nfs4_xattr_cache_cachep); 1056 + }
+438
fs/nfs/nfs42xdr.c
··· 169 169 decode_clone_maxsz + \ 170 170 decode_getattr_maxsz) 171 171 172 + /* Not limited by NFS itself, limited by the generic xattr code */ 173 + #define nfs4_xattr_name_maxsz XDR_QUADLEN(XATTR_NAME_MAX) 174 + 175 + #define encode_getxattr_maxsz (op_encode_hdr_maxsz + 1 + \ 176 + nfs4_xattr_name_maxsz) 177 + #define decode_getxattr_maxsz (op_decode_hdr_maxsz + 1 + 1) 178 + #define encode_setxattr_maxsz (op_encode_hdr_maxsz + \ 179 + 1 + nfs4_xattr_name_maxsz + 1) 180 + #define decode_setxattr_maxsz (op_decode_hdr_maxsz + decode_change_info_maxsz) 181 + #define encode_listxattrs_maxsz (op_encode_hdr_maxsz + 2 + 1) 182 + #define decode_listxattrs_maxsz (op_decode_hdr_maxsz + 2 + 1 + 1) 183 + #define encode_removexattr_maxsz (op_encode_hdr_maxsz + 1 + \ 184 + nfs4_xattr_name_maxsz) 185 + #define decode_removexattr_maxsz (op_decode_hdr_maxsz + \ 186 + decode_change_info_maxsz) 187 + 188 + #define NFS4_enc_getxattr_sz (compound_encode_hdr_maxsz + \ 189 + encode_sequence_maxsz + \ 190 + encode_putfh_maxsz + \ 191 + encode_getxattr_maxsz) 192 + #define NFS4_dec_getxattr_sz (compound_decode_hdr_maxsz + \ 193 + decode_sequence_maxsz + \ 194 + decode_putfh_maxsz + \ 195 + decode_getxattr_maxsz) 196 + #define NFS4_enc_setxattr_sz (compound_encode_hdr_maxsz + \ 197 + encode_sequence_maxsz + \ 198 + encode_putfh_maxsz + \ 199 + encode_setxattr_maxsz) 200 + #define NFS4_dec_setxattr_sz (compound_decode_hdr_maxsz + \ 201 + decode_sequence_maxsz + \ 202 + decode_putfh_maxsz + \ 203 + decode_setxattr_maxsz) 204 + #define NFS4_enc_listxattrs_sz (compound_encode_hdr_maxsz + \ 205 + encode_sequence_maxsz + \ 206 + encode_putfh_maxsz + \ 207 + encode_listxattrs_maxsz) 208 + #define NFS4_dec_listxattrs_sz (compound_decode_hdr_maxsz + \ 209 + decode_sequence_maxsz + \ 210 + decode_putfh_maxsz + \ 211 + decode_listxattrs_maxsz) 212 + #define NFS4_enc_removexattr_sz (compound_encode_hdr_maxsz + \ 213 + encode_sequence_maxsz + \ 214 + encode_putfh_maxsz + \ 215 + encode_removexattr_maxsz) 216 + #define NFS4_dec_removexattr_sz (compound_decode_hdr_maxsz + \ 217 + decode_sequence_maxsz + \ 218 + decode_putfh_maxsz + \ 219 + decode_removexattr_maxsz) 220 + 221 + /* 222 + * These values specify the maximum amount of data that is not 223 + * associated with the extended attribute name or extended 224 + * attribute list in the SETXATTR, GETXATTR and LISTXATTR 225 + * respectively. 226 + */ 227 + const u32 nfs42_maxsetxattr_overhead = ((RPC_MAX_HEADER_WITH_AUTH + 228 + compound_encode_hdr_maxsz + 229 + encode_sequence_maxsz + 230 + encode_putfh_maxsz + 1 + 231 + nfs4_xattr_name_maxsz) 232 + * XDR_UNIT); 233 + 234 + const u32 nfs42_maxgetxattr_overhead = ((RPC_MAX_HEADER_WITH_AUTH + 235 + compound_decode_hdr_maxsz + 236 + decode_sequence_maxsz + 237 + decode_putfh_maxsz + 1) * XDR_UNIT); 238 + 239 + const u32 nfs42_maxlistxattrs_overhead = ((RPC_MAX_HEADER_WITH_AUTH + 240 + compound_decode_hdr_maxsz + 241 + decode_sequence_maxsz + 242 + decode_putfh_maxsz + 3) * XDR_UNIT); 243 + 172 244 static void encode_fallocate(struct xdr_stream *xdr, 173 245 const struct nfs42_falloc_args *args) 174 246 { ··· 403 331 p = reserve_space(xdr, 4); 404 332 *p = cpu_to_be32(1); 405 333 encode_device_error(xdr, &args->errors[0]); 334 + } 335 + 336 + static void encode_setxattr(struct xdr_stream *xdr, 337 + const struct nfs42_setxattrargs *arg, 338 + struct compound_hdr *hdr) 339 + { 340 + __be32 *p; 341 + 342 + BUILD_BUG_ON(XATTR_CREATE != SETXATTR4_CREATE); 343 + BUILD_BUG_ON(XATTR_REPLACE != SETXATTR4_REPLACE); 344 + 345 + encode_op_hdr(xdr, OP_SETXATTR, decode_setxattr_maxsz, hdr); 346 + p = reserve_space(xdr, 4); 347 + *p = cpu_to_be32(arg->xattr_flags); 348 + encode_string(xdr, strlen(arg->xattr_name), arg->xattr_name); 349 + p = reserve_space(xdr, 4); 350 + *p = cpu_to_be32(arg->xattr_len); 351 + if (arg->xattr_len) 352 + xdr_write_pages(xdr, arg->xattr_pages, 0, arg->xattr_len); 353 + } 354 + 355 + static int decode_setxattr(struct xdr_stream *xdr, 356 + struct nfs4_change_info *cinfo) 357 + { 358 + int status; 359 + 360 + status = decode_op_hdr(xdr, OP_SETXATTR); 361 + if (status) 362 + goto out; 363 + status = decode_change_info(xdr, cinfo); 364 + out: 365 + return status; 366 + } 367 + 368 + 369 + static void encode_getxattr(struct xdr_stream *xdr, const char *name, 370 + struct compound_hdr *hdr) 371 + { 372 + encode_op_hdr(xdr, OP_GETXATTR, decode_getxattr_maxsz, hdr); 373 + encode_string(xdr, strlen(name), name); 374 + } 375 + 376 + static int decode_getxattr(struct xdr_stream *xdr, 377 + struct nfs42_getxattrres *res, 378 + struct rpc_rqst *req) 379 + { 380 + int status; 381 + __be32 *p; 382 + u32 len, rdlen; 383 + 384 + status = decode_op_hdr(xdr, OP_GETXATTR); 385 + if (status) 386 + return status; 387 + 388 + p = xdr_inline_decode(xdr, 4); 389 + if (unlikely(!p)) 390 + return -EIO; 391 + 392 + len = be32_to_cpup(p); 393 + if (len > req->rq_rcv_buf.page_len) 394 + return -ERANGE; 395 + 396 + res->xattr_len = len; 397 + 398 + if (len > 0) { 399 + rdlen = xdr_read_pages(xdr, len); 400 + if (rdlen < len) 401 + return -EIO; 402 + } 403 + 404 + return 0; 405 + } 406 + 407 + static void encode_removexattr(struct xdr_stream *xdr, const char *name, 408 + struct compound_hdr *hdr) 409 + { 410 + encode_op_hdr(xdr, OP_REMOVEXATTR, decode_removexattr_maxsz, hdr); 411 + encode_string(xdr, strlen(name), name); 412 + } 413 + 414 + 415 + static int decode_removexattr(struct xdr_stream *xdr, 416 + struct nfs4_change_info *cinfo) 417 + { 418 + int status; 419 + 420 + status = decode_op_hdr(xdr, OP_REMOVEXATTR); 421 + if (status) 422 + goto out; 423 + 424 + status = decode_change_info(xdr, cinfo); 425 + out: 426 + return status; 427 + } 428 + 429 + static void encode_listxattrs(struct xdr_stream *xdr, 430 + const struct nfs42_listxattrsargs *arg, 431 + struct compound_hdr *hdr) 432 + { 433 + __be32 *p; 434 + 435 + encode_op_hdr(xdr, OP_LISTXATTRS, decode_listxattrs_maxsz + 1, hdr); 436 + 437 + p = reserve_space(xdr, 12); 438 + if (unlikely(!p)) 439 + return; 440 + 441 + p = xdr_encode_hyper(p, arg->cookie); 442 + /* 443 + * RFC 8276 says to specify the full max length of the LISTXATTRS 444 + * XDR reply. Count is set to the XDR length of the names array 445 + * plus the EOF marker. So, add the cookie and the names count. 446 + */ 447 + *p = cpu_to_be32(arg->count + 8 + 4); 448 + } 449 + 450 + static int decode_listxattrs(struct xdr_stream *xdr, 451 + struct nfs42_listxattrsres *res) 452 + { 453 + int status; 454 + __be32 *p; 455 + u32 count, len, ulen; 456 + size_t left, copied; 457 + char *buf; 458 + 459 + status = decode_op_hdr(xdr, OP_LISTXATTRS); 460 + if (status) { 461 + /* 462 + * Special case: for LISTXATTRS, NFS4ERR_TOOSMALL 463 + * should be translated to ERANGE. 464 + */ 465 + if (status == -ETOOSMALL) 466 + status = -ERANGE; 467 + goto out; 468 + } 469 + 470 + p = xdr_inline_decode(xdr, 8); 471 + if (unlikely(!p)) 472 + return -EIO; 473 + 474 + xdr_decode_hyper(p, &res->cookie); 475 + 476 + p = xdr_inline_decode(xdr, 4); 477 + if (unlikely(!p)) 478 + return -EIO; 479 + 480 + left = res->xattr_len; 481 + buf = res->xattr_buf; 482 + 483 + count = be32_to_cpup(p); 484 + copied = 0; 485 + 486 + /* 487 + * We have asked for enough room to encode the maximum number 488 + * of possible attribute names, so everything should fit. 489 + * 490 + * But, don't rely on that assumption. Just decode entries 491 + * until they don't fit anymore, just in case the server did 492 + * something odd. 493 + */ 494 + while (count--) { 495 + p = xdr_inline_decode(xdr, 4); 496 + if (unlikely(!p)) 497 + return -EIO; 498 + 499 + len = be32_to_cpup(p); 500 + if (len > (XATTR_NAME_MAX - XATTR_USER_PREFIX_LEN)) { 501 + status = -ERANGE; 502 + goto out; 503 + } 504 + 505 + p = xdr_inline_decode(xdr, len); 506 + if (unlikely(!p)) 507 + return -EIO; 508 + 509 + ulen = len + XATTR_USER_PREFIX_LEN + 1; 510 + if (buf) { 511 + if (ulen > left) { 512 + status = -ERANGE; 513 + goto out; 514 + } 515 + 516 + memcpy(buf, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); 517 + memcpy(buf + XATTR_USER_PREFIX_LEN, p, len); 518 + 519 + buf[ulen - 1] = 0; 520 + buf += ulen; 521 + left -= ulen; 522 + } 523 + copied += ulen; 524 + } 525 + 526 + p = xdr_inline_decode(xdr, 4); 527 + if (unlikely(!p)) 528 + return -EIO; 529 + 530 + res->eof = be32_to_cpup(p); 531 + res->copied = copied; 532 + 533 + out: 534 + if (status == -ERANGE && res->xattr_len == XATTR_LIST_MAX) 535 + status = -E2BIG; 536 + 537 + return status; 406 538 } 407 539 408 540 /* ··· 1264 988 return status; 1265 989 } 1266 990 991 + #ifdef CONFIG_NFS_V4_2 992 + static void nfs4_xdr_enc_setxattr(struct rpc_rqst *req, struct xdr_stream *xdr, 993 + const void *data) 994 + { 995 + const struct nfs42_setxattrargs *args = data; 996 + struct compound_hdr hdr = { 997 + .minorversion = nfs4_xdr_minorversion(&args->seq_args), 998 + }; 999 + 1000 + encode_compound_hdr(xdr, req, &hdr); 1001 + encode_sequence(xdr, &args->seq_args, &hdr); 1002 + encode_putfh(xdr, args->fh, &hdr); 1003 + encode_setxattr(xdr, args, &hdr); 1004 + encode_nops(&hdr); 1005 + } 1006 + 1007 + static int nfs4_xdr_dec_setxattr(struct rpc_rqst *req, struct xdr_stream *xdr, 1008 + void *data) 1009 + { 1010 + struct nfs42_setxattrres *res = data; 1011 + struct compound_hdr hdr; 1012 + int status; 1013 + 1014 + status = decode_compound_hdr(xdr, &hdr); 1015 + if (status) 1016 + goto out; 1017 + status = decode_sequence(xdr, &res->seq_res, req); 1018 + if (status) 1019 + goto out; 1020 + status = decode_putfh(xdr); 1021 + if (status) 1022 + goto out; 1023 + 1024 + status = decode_setxattr(xdr, &res->cinfo); 1025 + out: 1026 + return status; 1027 + } 1028 + 1029 + static void nfs4_xdr_enc_getxattr(struct rpc_rqst *req, struct xdr_stream *xdr, 1030 + const void *data) 1031 + { 1032 + const struct nfs42_getxattrargs *args = data; 1033 + struct compound_hdr hdr = { 1034 + .minorversion = nfs4_xdr_minorversion(&args->seq_args), 1035 + }; 1036 + size_t plen; 1037 + 1038 + encode_compound_hdr(xdr, req, &hdr); 1039 + encode_sequence(xdr, &args->seq_args, &hdr); 1040 + encode_putfh(xdr, args->fh, &hdr); 1041 + encode_getxattr(xdr, args->xattr_name, &hdr); 1042 + 1043 + plen = args->xattr_len ? args->xattr_len : XATTR_SIZE_MAX; 1044 + 1045 + rpc_prepare_reply_pages(req, args->xattr_pages, 0, plen, 1046 + hdr.replen); 1047 + req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES; 1048 + 1049 + encode_nops(&hdr); 1050 + } 1051 + 1052 + static int nfs4_xdr_dec_getxattr(struct rpc_rqst *rqstp, 1053 + struct xdr_stream *xdr, void *data) 1054 + { 1055 + struct nfs42_getxattrres *res = data; 1056 + struct compound_hdr hdr; 1057 + int status; 1058 + 1059 + status = decode_compound_hdr(xdr, &hdr); 1060 + if (status) 1061 + goto out; 1062 + status = decode_sequence(xdr, &res->seq_res, rqstp); 1063 + if (status) 1064 + goto out; 1065 + status = decode_putfh(xdr); 1066 + if (status) 1067 + goto out; 1068 + status = decode_getxattr(xdr, res, rqstp); 1069 + out: 1070 + return status; 1071 + } 1072 + 1073 + static void nfs4_xdr_enc_listxattrs(struct rpc_rqst *req, 1074 + struct xdr_stream *xdr, const void *data) 1075 + { 1076 + const struct nfs42_listxattrsargs *args = data; 1077 + struct compound_hdr hdr = { 1078 + .minorversion = nfs4_xdr_minorversion(&args->seq_args), 1079 + }; 1080 + 1081 + encode_compound_hdr(xdr, req, &hdr); 1082 + encode_sequence(xdr, &args->seq_args, &hdr); 1083 + encode_putfh(xdr, args->fh, &hdr); 1084 + encode_listxattrs(xdr, args, &hdr); 1085 + 1086 + rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->count, 1087 + hdr.replen); 1088 + req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES; 1089 + 1090 + encode_nops(&hdr); 1091 + } 1092 + 1093 + static int nfs4_xdr_dec_listxattrs(struct rpc_rqst *rqstp, 1094 + struct xdr_stream *xdr, void *data) 1095 + { 1096 + struct nfs42_listxattrsres *res = data; 1097 + struct compound_hdr hdr; 1098 + int status; 1099 + 1100 + xdr_set_scratch_buffer(xdr, page_address(res->scratch), PAGE_SIZE); 1101 + 1102 + status = decode_compound_hdr(xdr, &hdr); 1103 + if (status) 1104 + goto out; 1105 + status = decode_sequence(xdr, &res->seq_res, rqstp); 1106 + if (status) 1107 + goto out; 1108 + status = decode_putfh(xdr); 1109 + if (status) 1110 + goto out; 1111 + status = decode_listxattrs(xdr, res); 1112 + out: 1113 + return status; 1114 + } 1115 + 1116 + static void nfs4_xdr_enc_removexattr(struct rpc_rqst *req, 1117 + struct xdr_stream *xdr, const void *data) 1118 + { 1119 + const struct nfs42_removexattrargs *args = data; 1120 + struct compound_hdr hdr = { 1121 + .minorversion = nfs4_xdr_minorversion(&args->seq_args), 1122 + }; 1123 + 1124 + encode_compound_hdr(xdr, req, &hdr); 1125 + encode_sequence(xdr, &args->seq_args, &hdr); 1126 + encode_putfh(xdr, args->fh, &hdr); 1127 + encode_removexattr(xdr, args->xattr_name, &hdr); 1128 + encode_nops(&hdr); 1129 + } 1130 + 1131 + static int nfs4_xdr_dec_removexattr(struct rpc_rqst *req, 1132 + struct xdr_stream *xdr, void *data) 1133 + { 1134 + struct nfs42_removexattrres *res = data; 1135 + struct compound_hdr hdr; 1136 + int status; 1137 + 1138 + status = decode_compound_hdr(xdr, &hdr); 1139 + if (status) 1140 + goto out; 1141 + status = decode_sequence(xdr, &res->seq_res, req); 1142 + if (status) 1143 + goto out; 1144 + status = decode_putfh(xdr); 1145 + if (status) 1146 + goto out; 1147 + 1148 + status = decode_removexattr(xdr, &res->cinfo); 1149 + out: 1150 + return status; 1151 + } 1152 + #endif 1267 1153 #endif /* __LINUX_FS_NFS_NFS4_2XDR_H */
+35
fs/nfs/nfs4_fs.h
··· 324 324 325 325 extern int nfs4_proc_get_lease_time(struct nfs_client *clp, 326 326 struct nfs_fsinfo *fsinfo); 327 + extern void nfs4_update_changeattr(struct inode *dir, 328 + struct nfs4_change_info *cinfo, 329 + unsigned long timestamp, 330 + unsigned long cache_validity); 331 + extern int nfs4_buf_to_pages_noslab(const void *buf, size_t buflen, 332 + struct page **pages); 333 + 327 334 #if defined(CONFIG_NFS_V4_1) 328 335 extern int nfs41_sequence_done(struct rpc_task *, struct nfs4_sequence_res *); 329 336 extern int nfs4_proc_create_session(struct nfs_client *, const struct cred *); ··· 564 557 /* nfs4xdr.c */ 565 558 extern const struct rpc_procinfo nfs4_procedures[]; 566 559 560 + #ifdef CONFIG_NFS_V4_2 561 + extern const u32 nfs42_maxsetxattr_overhead; 562 + extern const u32 nfs42_maxgetxattr_overhead; 563 + extern const u32 nfs42_maxlistxattrs_overhead; 564 + #endif 565 + 567 566 struct nfs4_mount_data; 568 567 569 568 /* callback_xdr.c */ ··· 626 613 nfs4_stateid_match_other(&state->open_stateid, stateid); 627 614 } 628 615 616 + /* nfs42xattr.c */ 617 + #ifdef CONFIG_NFS_V4_2 618 + extern int __init nfs4_xattr_cache_init(void); 619 + extern void nfs4_xattr_cache_exit(void); 620 + extern void nfs4_xattr_cache_add(struct inode *inode, const char *name, 621 + const char *buf, struct page **pages, 622 + ssize_t buflen); 623 + extern void nfs4_xattr_cache_remove(struct inode *inode, const char *name); 624 + extern ssize_t nfs4_xattr_cache_get(struct inode *inode, const char *name, 625 + char *buf, ssize_t buflen); 626 + extern void nfs4_xattr_cache_set_list(struct inode *inode, const char *buf, 627 + ssize_t buflen); 628 + extern ssize_t nfs4_xattr_cache_list(struct inode *inode, char *buf, 629 + ssize_t buflen); 630 + extern void nfs4_xattr_cache_zap(struct inode *inode); 629 631 #else 632 + static inline void nfs4_xattr_cache_zap(struct inode *inode) 633 + { 634 + } 635 + #endif /* CONFIG_NFS_V4_2 */ 636 + 637 + #else /* CONFIG_NFS_V4 */ 630 638 631 639 #define nfs4_close_state(a, b) do { } while (0) 632 640 #define nfs4_close_sync(a, b) do { } while (0) 633 641 #define nfs4_state_protect(a, b, c, d) do { } while (0) 634 642 #define nfs4_state_protect_write(a, b, c, d) do { } while (0) 643 + 635 644 636 645 #endif /* CONFIG_NFS_V4 */ 637 646 #endif /* __LINUX_FS_NFS_NFS4_FS.H */
+32 -1
fs/nfs/nfs4client.c
··· 880 880 881 881 if (minorversion == 0) 882 882 __set_bit(NFS_CS_REUSEPORT, &cl_init.init_flags); 883 - else if (proto == XPRT_TRANSPORT_TCP) 883 + if (proto == XPRT_TRANSPORT_TCP) 884 884 cl_init.nconnect = nconnect; 885 885 886 886 if (server->flags & NFS_MOUNT_NORESVPORT) ··· 992 992 #endif /* CONFIG_NFS_V4_1 */ 993 993 } 994 994 995 + /* 996 + * Limit xattr sizes using the channel attributes. 997 + */ 998 + static void nfs4_session_limit_xasize(struct nfs_server *server) 999 + { 1000 + #ifdef CONFIG_NFS_V4_2 1001 + struct nfs4_session *sess; 1002 + u32 server_gxa_sz; 1003 + u32 server_sxa_sz; 1004 + u32 server_lxa_sz; 1005 + 1006 + if (!nfs4_has_session(server->nfs_client)) 1007 + return; 1008 + 1009 + sess = server->nfs_client->cl_session; 1010 + 1011 + server_gxa_sz = sess->fc_attrs.max_resp_sz - nfs42_maxgetxattr_overhead; 1012 + server_sxa_sz = sess->fc_attrs.max_rqst_sz - nfs42_maxsetxattr_overhead; 1013 + server_lxa_sz = sess->fc_attrs.max_resp_sz - 1014 + nfs42_maxlistxattrs_overhead; 1015 + 1016 + if (server->gxasize > server_gxa_sz) 1017 + server->gxasize = server_gxa_sz; 1018 + if (server->sxasize > server_sxa_sz) 1019 + server->sxasize = server_sxa_sz; 1020 + if (server->lxasize > server_lxa_sz) 1021 + server->lxasize = server_lxa_sz; 1022 + #endif 1023 + } 1024 + 995 1025 static int nfs4_server_common_setup(struct nfs_server *server, 996 1026 struct nfs_fh *mntfh, bool auth_probe) 997 1027 { ··· 1069 1039 goto out; 1070 1040 1071 1041 nfs4_session_limit_rwsize(server); 1042 + nfs4_session_limit_xasize(server); 1072 1043 1073 1044 if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN) 1074 1045 server->namelen = NFS4_MAXNAMLEN;
+4 -1
fs/nfs/nfs4file.c
··· 110 110 nfs4_file_flush(struct file *file, fl_owner_t id) 111 111 { 112 112 struct inode *inode = file_inode(file); 113 + errseq_t since; 113 114 114 115 dprintk("NFS: flush(%pD2)\n", file); 115 116 ··· 126 125 return filemap_fdatawrite(file->f_mapping); 127 126 128 127 /* Flush writes to the server and return any errors */ 129 - return nfs_wb_all(inode); 128 + since = filemap_sample_wb_err(file->f_mapping); 129 + nfs_wb_all(inode); 130 + return filemap_check_wb_err(file->f_mapping, since); 130 131 } 131 132 132 133 #ifdef CONFIG_NFS_V4_2
+206 -33
fs/nfs/nfs4proc.c
··· 66 66 #include "nfs4idmap.h" 67 67 #include "nfs4session.h" 68 68 #include "fscache.h" 69 + #include "nfs42.h" 69 70 70 71 #include "nfs4trace.h" 71 72 ··· 257 256 | FATTR4_WORD1_FS_LAYOUT_TYPES, 258 257 FATTR4_WORD2_LAYOUT_BLKSIZE 259 258 | FATTR4_WORD2_CLONE_BLKSIZE 259 + | FATTR4_WORD2_XATTR_SUPPORT 260 260 }; 261 261 262 262 const u32 nfs4_fs_locations_bitmap[3] = { ··· 1175 1173 } 1176 1174 1177 1175 static void 1178 - update_changeattr_locked(struct inode *dir, struct nfs4_change_info *cinfo, 1176 + nfs4_update_changeattr_locked(struct inode *inode, 1177 + struct nfs4_change_info *cinfo, 1179 1178 unsigned long timestamp, unsigned long cache_validity) 1180 1179 { 1181 - struct nfs_inode *nfsi = NFS_I(dir); 1180 + struct nfs_inode *nfsi = NFS_I(inode); 1182 1181 1183 1182 nfsi->cache_validity |= NFS_INO_INVALID_CTIME 1184 1183 | NFS_INO_INVALID_MTIME 1185 - | NFS_INO_INVALID_DATA 1186 1184 | cache_validity; 1187 - if (cinfo->atomic && cinfo->before == inode_peek_iversion_raw(dir)) { 1185 + 1186 + if (cinfo->atomic && cinfo->before == inode_peek_iversion_raw(inode)) { 1188 1187 nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE; 1189 1188 nfsi->attrtimeo_timestamp = jiffies; 1190 1189 } else { 1191 - nfs_force_lookup_revalidate(dir); 1192 - if (cinfo->before != inode_peek_iversion_raw(dir)) 1190 + if (S_ISDIR(inode->i_mode)) { 1191 + nfsi->cache_validity |= NFS_INO_INVALID_DATA; 1192 + nfs_force_lookup_revalidate(inode); 1193 + } else { 1194 + if (!NFS_PROTO(inode)->have_delegation(inode, 1195 + FMODE_READ)) 1196 + nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; 1197 + } 1198 + 1199 + if (cinfo->before != inode_peek_iversion_raw(inode)) 1193 1200 nfsi->cache_validity |= NFS_INO_INVALID_ACCESS | 1194 - NFS_INO_INVALID_ACL; 1201 + NFS_INO_INVALID_ACL | 1202 + NFS_INO_INVALID_XATTR; 1195 1203 } 1196 - inode_set_iversion_raw(dir, cinfo->after); 1204 + inode_set_iversion_raw(inode, cinfo->after); 1197 1205 nfsi->read_cache_jiffies = timestamp; 1198 1206 nfsi->attr_gencount = nfs_inc_attr_generation_counter(); 1199 1207 nfsi->cache_validity &= ~NFS_INO_INVALID_CHANGE; 1200 - nfs_fscache_invalidate(dir); 1208 + 1209 + if (nfsi->cache_validity & NFS_INO_INVALID_DATA) 1210 + nfs_fscache_invalidate(inode); 1201 1211 } 1202 1212 1203 - static void 1204 - update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo, 1213 + void 1214 + nfs4_update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo, 1205 1215 unsigned long timestamp, unsigned long cache_validity) 1206 1216 { 1207 1217 spin_lock(&dir->i_lock); 1208 - update_changeattr_locked(dir, cinfo, timestamp, cache_validity); 1218 + nfs4_update_changeattr_locked(dir, cinfo, timestamp, cache_validity); 1209 1219 spin_unlock(&dir->i_lock); 1210 1220 } 1211 1221 ··· 1370 1356 NFS4_ACCESS_MODIFY | 1371 1357 NFS4_ACCESS_EXTEND | 1372 1358 NFS4_ACCESS_EXECUTE; 1359 + #ifdef CONFIG_NFS_V4_2 1360 + if (server->caps & NFS_CAP_XATTR) 1361 + p->o_arg.access |= NFS4_ACCESS_XAREAD | 1362 + NFS4_ACCESS_XAWRITE | 1363 + NFS4_ACCESS_XALIST; 1364 + #endif 1373 1365 } 1374 1366 } 1375 1367 p->o_arg.clientid = server->nfs_client->cl_clientid; ··· 2673 2653 data->file_created = true; 2674 2654 if (data->file_created || 2675 2655 inode_peek_iversion_raw(dir) != o_res->cinfo.after) 2676 - update_changeattr(dir, &o_res->cinfo, 2677 - o_res->f_attr->time_start, 0); 2656 + nfs4_update_changeattr(dir, &o_res->cinfo, 2657 + o_res->f_attr->time_start, 2658 + NFS_INO_INVALID_DATA); 2678 2659 } 2679 2660 if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0) 2680 2661 server->caps &= ~NFS_CAP_POSIX_LOCK; ··· 3777 3756 3778 3757 #define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL) 3779 3758 #define FATTR4_WORD2_NFS41_MASK (2*FATTR4_WORD2_SUPPATTR_EXCLCREAT - 1UL) 3780 - #define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_MODE_UMASK - 1UL) 3759 + #define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_XATTR_SUPPORT - 1UL) 3781 3760 3782 3761 static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) 3783 3762 { ··· 4561 4540 status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); 4562 4541 if (status == 0) { 4563 4542 spin_lock(&dir->i_lock); 4564 - update_changeattr_locked(dir, &res.cinfo, timestamp, 0); 4543 + nfs4_update_changeattr_locked(dir, &res.cinfo, timestamp, 4544 + NFS_INO_INVALID_DATA); 4565 4545 /* Removing a directory decrements nlink in the parent */ 4566 4546 if (ftype == NF4DIR && dir->i_nlink > 2) 4567 4547 nfs4_dec_nlink_locked(dir); ··· 4646 4624 &data->timeout) == -EAGAIN) 4647 4625 return 0; 4648 4626 if (task->tk_status == 0) 4649 - update_changeattr(dir, &res->cinfo, 4650 - res->dir_attr->time_start, 0); 4627 + nfs4_update_changeattr(dir, &res->cinfo, 4628 + res->dir_attr->time_start, 4629 + NFS_INO_INVALID_DATA); 4651 4630 return 1; 4652 4631 } 4653 4632 ··· 4692 4669 if (task->tk_status == 0) { 4693 4670 if (new_dir != old_dir) { 4694 4671 /* Note: If we moved a directory, nlink will change */ 4695 - update_changeattr(old_dir, &res->old_cinfo, 4672 + nfs4_update_changeattr(old_dir, &res->old_cinfo, 4696 4673 res->old_fattr->time_start, 4697 - NFS_INO_INVALID_OTHER); 4698 - update_changeattr(new_dir, &res->new_cinfo, 4674 + NFS_INO_INVALID_OTHER | 4675 + NFS_INO_INVALID_DATA); 4676 + nfs4_update_changeattr(new_dir, &res->new_cinfo, 4699 4677 res->new_fattr->time_start, 4700 - NFS_INO_INVALID_OTHER); 4678 + NFS_INO_INVALID_OTHER | 4679 + NFS_INO_INVALID_DATA); 4701 4680 } else 4702 - update_changeattr(old_dir, &res->old_cinfo, 4681 + nfs4_update_changeattr(old_dir, &res->old_cinfo, 4703 4682 res->old_fattr->time_start, 4704 - 0); 4683 + NFS_INO_INVALID_DATA); 4705 4684 } 4706 4685 return 1; 4707 4686 } ··· 4744 4719 4745 4720 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); 4746 4721 if (!status) { 4747 - update_changeattr(dir, &res.cinfo, res.fattr->time_start, 0); 4722 + nfs4_update_changeattr(dir, &res.cinfo, res.fattr->time_start, 4723 + NFS_INO_INVALID_DATA); 4748 4724 status = nfs_post_op_update_inode(inode, res.fattr); 4749 4725 if (!status) 4750 4726 nfs_setsecurity(inode, res.fattr, res.label); ··· 4823 4797 &data->arg.seq_args, &data->res.seq_res, 1); 4824 4798 if (status == 0) { 4825 4799 spin_lock(&dir->i_lock); 4826 - update_changeattr_locked(dir, &data->res.dir_cinfo, 4827 - data->res.fattr->time_start, 0); 4800 + nfs4_update_changeattr_locked(dir, &data->res.dir_cinfo, 4801 + data->res.fattr->time_start, 4802 + NFS_INO_INVALID_DATA); 4828 4803 /* Creating a directory bumps nlink in the parent */ 4829 4804 if (data->arg.ftype == NF4DIR) 4830 4805 nfs4_inc_nlink_locked(dir); ··· 5558 5531 */ 5559 5532 #define NFS4ACL_MAXPAGES DIV_ROUND_UP(XATTR_SIZE_MAX, PAGE_SIZE) 5560 5533 5561 - static int buf_to_pages_noslab(const void *buf, size_t buflen, 5534 + int nfs4_buf_to_pages_noslab(const void *buf, size_t buflen, 5562 5535 struct page **pages) 5563 5536 { 5564 5537 struct page *newpage, **spages; ··· 5800 5773 return -EOPNOTSUPP; 5801 5774 if (npages > ARRAY_SIZE(pages)) 5802 5775 return -ERANGE; 5803 - i = buf_to_pages_noslab(buf, buflen, arg.acl_pages); 5776 + i = nfs4_buf_to_pages_noslab(buf, buflen, arg.acl_pages); 5804 5777 if (i < 0) 5805 5778 return i; 5806 5779 nfs4_inode_make_writeable(inode); ··· 5872 5845 return ret; 5873 5846 if (!(fattr.valid & NFS_ATTR_FATTR_V4_SECURITY_LABEL)) 5874 5847 return -ENOENT; 5875 - if (buflen < label.len) 5876 - return -ERANGE; 5877 5848 return 0; 5878 5849 } 5879 5850 ··· 7454 7429 } 7455 7430 7456 7431 #endif 7432 + 7433 + #ifdef CONFIG_NFS_V4_2 7434 + static int nfs4_xattr_set_nfs4_user(const struct xattr_handler *handler, 7435 + struct dentry *unused, struct inode *inode, 7436 + const char *key, const void *buf, 7437 + size_t buflen, int flags) 7438 + { 7439 + struct nfs_access_entry cache; 7440 + int ret; 7441 + 7442 + if (!nfs_server_capable(inode, NFS_CAP_XATTR)) 7443 + return -EOPNOTSUPP; 7444 + 7445 + /* 7446 + * There is no mapping from the MAY_* flags to the NFS_ACCESS_XA* 7447 + * flags right now. Handling of xattr operations use the normal 7448 + * file read/write permissions. 7449 + * 7450 + * Just in case the server has other ideas (which RFC 8276 allows), 7451 + * do a cached access check for the XA* flags to possibly avoid 7452 + * doing an RPC and getting EACCES back. 7453 + */ 7454 + if (!nfs_access_get_cached(inode, current_cred(), &cache, true)) { 7455 + if (!(cache.mask & NFS_ACCESS_XAWRITE)) 7456 + return -EACCES; 7457 + } 7458 + 7459 + if (buf == NULL) { 7460 + ret = nfs42_proc_removexattr(inode, key); 7461 + if (!ret) 7462 + nfs4_xattr_cache_remove(inode, key); 7463 + } else { 7464 + ret = nfs42_proc_setxattr(inode, key, buf, buflen, flags); 7465 + if (!ret) 7466 + nfs4_xattr_cache_add(inode, key, buf, NULL, buflen); 7467 + } 7468 + 7469 + return ret; 7470 + } 7471 + 7472 + static int nfs4_xattr_get_nfs4_user(const struct xattr_handler *handler, 7473 + struct dentry *unused, struct inode *inode, 7474 + const char *key, void *buf, size_t buflen) 7475 + { 7476 + struct nfs_access_entry cache; 7477 + ssize_t ret; 7478 + 7479 + if (!nfs_server_capable(inode, NFS_CAP_XATTR)) 7480 + return -EOPNOTSUPP; 7481 + 7482 + if (!nfs_access_get_cached(inode, current_cred(), &cache, true)) { 7483 + if (!(cache.mask & NFS_ACCESS_XAREAD)) 7484 + return -EACCES; 7485 + } 7486 + 7487 + ret = nfs_revalidate_inode(NFS_SERVER(inode), inode); 7488 + if (ret) 7489 + return ret; 7490 + 7491 + ret = nfs4_xattr_cache_get(inode, key, buf, buflen); 7492 + if (ret >= 0 || (ret < 0 && ret != -ENOENT)) 7493 + return ret; 7494 + 7495 + ret = nfs42_proc_getxattr(inode, key, buf, buflen); 7496 + 7497 + return ret; 7498 + } 7499 + 7500 + static ssize_t 7501 + nfs4_listxattr_nfs4_user(struct inode *inode, char *list, size_t list_len) 7502 + { 7503 + u64 cookie; 7504 + bool eof; 7505 + ssize_t ret, size; 7506 + char *buf; 7507 + size_t buflen; 7508 + struct nfs_access_entry cache; 7509 + 7510 + if (!nfs_server_capable(inode, NFS_CAP_XATTR)) 7511 + return 0; 7512 + 7513 + if (!nfs_access_get_cached(inode, current_cred(), &cache, true)) { 7514 + if (!(cache.mask & NFS_ACCESS_XALIST)) 7515 + return 0; 7516 + } 7517 + 7518 + ret = nfs_revalidate_inode(NFS_SERVER(inode), inode); 7519 + if (ret) 7520 + return ret; 7521 + 7522 + ret = nfs4_xattr_cache_list(inode, list, list_len); 7523 + if (ret >= 0 || (ret < 0 && ret != -ENOENT)) 7524 + return ret; 7525 + 7526 + cookie = 0; 7527 + eof = false; 7528 + buflen = list_len ? list_len : XATTR_LIST_MAX; 7529 + buf = list_len ? list : NULL; 7530 + size = 0; 7531 + 7532 + while (!eof) { 7533 + ret = nfs42_proc_listxattrs(inode, buf, buflen, 7534 + &cookie, &eof); 7535 + if (ret < 0) 7536 + return ret; 7537 + 7538 + if (list_len) { 7539 + buf += ret; 7540 + buflen -= ret; 7541 + } 7542 + size += ret; 7543 + } 7544 + 7545 + if (list_len) 7546 + nfs4_xattr_cache_set_list(inode, list, size); 7547 + 7548 + return size; 7549 + } 7550 + 7551 + #else 7552 + 7553 + static ssize_t 7554 + nfs4_listxattr_nfs4_user(struct inode *inode, char *list, size_t list_len) 7555 + { 7556 + return 0; 7557 + } 7558 + #endif /* CONFIG_NFS_V4_2 */ 7457 7559 7458 7560 /* 7459 7561 * nfs_fhget will use either the mounted_on_fileid or the fileid ··· 10187 10035 10188 10036 static ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size) 10189 10037 { 10190 - ssize_t error, error2; 10038 + ssize_t error, error2, error3; 10191 10039 10192 10040 error = generic_listxattr(dentry, list, size); 10193 10041 if (error < 0) ··· 10200 10048 error2 = nfs4_listxattr_nfs4_label(d_inode(dentry), list, size); 10201 10049 if (error2 < 0) 10202 10050 return error2; 10203 - return error + error2; 10051 + 10052 + if (list) { 10053 + list += error2; 10054 + size -= error2; 10055 + } 10056 + 10057 + error3 = nfs4_listxattr_nfs4_user(d_inode(dentry), list, size); 10058 + if (error3 < 0) 10059 + return error3; 10060 + 10061 + return error + error2 + error3; 10204 10062 } 10205 10063 10206 10064 static const struct inode_operations nfs4_dir_inode_operations = { ··· 10298 10136 .set = nfs4_xattr_set_nfs4_acl, 10299 10137 }; 10300 10138 10139 + #ifdef CONFIG_NFS_V4_2 10140 + static const struct xattr_handler nfs4_xattr_nfs4_user_handler = { 10141 + .prefix = XATTR_USER_PREFIX, 10142 + .get = nfs4_xattr_get_nfs4_user, 10143 + .set = nfs4_xattr_set_nfs4_user, 10144 + }; 10145 + #endif 10146 + 10301 10147 const struct xattr_handler *nfs4_xattr_handlers[] = { 10302 10148 &nfs4_xattr_nfs4_acl_handler, 10303 10149 #ifdef CONFIG_NFS_V4_SECURITY_LABEL 10304 10150 &nfs4_xattr_nfs4_label_handler, 10151 + #endif 10152 + #ifdef CONFIG_NFS_V4_2 10153 + &nfs4_xattr_nfs4_user_handler, 10305 10154 #endif 10306 10155 NULL 10307 10156 };
+10
fs/nfs/nfs4super.c
··· 69 69 pnfs_destroy_layout(NFS_I(inode)); 70 70 /* First call standard NFS clear_inode() code */ 71 71 nfs_clear_inode(inode); 72 + nfs4_xattr_cache_zap(inode); 72 73 } 73 74 74 75 struct nfs_referral_count { ··· 269 268 if (err) 270 269 goto out1; 271 270 271 + #ifdef CONFIG_NFS_V4_2 272 + err = nfs4_xattr_cache_init(); 273 + if (err) 274 + goto out2; 275 + #endif 276 + 272 277 err = nfs4_register_sysctl(); 273 278 if (err) 274 279 goto out2; ··· 295 288 nfs4_pnfs_v3_ds_connect_unload(); 296 289 297 290 unregister_nfs_version(&nfs_v4); 291 + #ifdef CONFIG_NFS_V4_2 292 + nfs4_xattr_cache_exit(); 293 + #endif 298 294 nfs4_unregister_sysctl(); 299 295 nfs_idmap_quit(); 300 296 nfs_dns_resolver_destroy();
+39 -7
fs/nfs/nfs4trace.h
··· 1727 1727 DEFINE_NFS4_IDMAP_EVENT(nfs4_map_uid_to_name); 1728 1728 DEFINE_NFS4_IDMAP_EVENT(nfs4_map_gid_to_group); 1729 1729 1730 + #ifdef CONFIG_NFS_V4_1 1731 + #define NFS4_LSEG_LAYOUT_STATEID_HASH(lseg) \ 1732 + (lseg ? nfs_stateid_hash(&lseg->pls_layout->plh_stateid) : 0) 1733 + #else 1734 + #define NFS4_LSEG_LAYOUT_STATEID_HASH(lseg) (0) 1735 + #endif 1736 + 1730 1737 DECLARE_EVENT_CLASS(nfs4_read_event, 1731 1738 TP_PROTO( 1732 1739 const struct nfs_pgio_header *hdr, ··· 1752 1745 __field(unsigned long, error) 1753 1746 __field(int, stateid_seq) 1754 1747 __field(u32, stateid_hash) 1748 + __field(int, layoutstateid_seq) 1749 + __field(u32, layoutstateid_hash) 1755 1750 ), 1756 1751 1757 1752 TP_fast_assign( ··· 1763 1754 hdr->args.fh : &nfsi->fh; 1764 1755 const struct nfs4_state *state = 1765 1756 hdr->args.context->state; 1757 + const struct pnfs_layout_segment *lseg = hdr->lseg; 1766 1758 1767 1759 __entry->dev = inode->i_sb->s_dev; 1768 1760 __entry->fileid = nfsi->fileid; ··· 1776 1766 be32_to_cpu(state->stateid.seqid); 1777 1767 __entry->stateid_hash = 1778 1768 nfs_stateid_hash(&state->stateid); 1769 + __entry->layoutstateid_seq = lseg ? lseg->pls_seq : 0; 1770 + __entry->layoutstateid_hash = 1771 + NFS4_LSEG_LAYOUT_STATEID_HASH(lseg); 1779 1772 ), 1780 1773 1781 1774 TP_printk( 1782 1775 "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " 1783 - "offset=%lld count=%u res=%u stateid=%d:0x%08x", 1776 + "offset=%lld count=%u res=%u stateid=%d:0x%08x " 1777 + "layoutstateid=%d:0x%08x", 1784 1778 -__entry->error, 1785 1779 show_nfsv4_errors(__entry->error), 1786 1780 MAJOR(__entry->dev), MINOR(__entry->dev), ··· 1792 1778 __entry->fhandle, 1793 1779 (long long)__entry->offset, 1794 1780 __entry->arg_count, __entry->res_count, 1795 - __entry->stateid_seq, __entry->stateid_hash 1781 + __entry->stateid_seq, __entry->stateid_hash, 1782 + __entry->layoutstateid_seq, __entry->layoutstateid_hash 1796 1783 ) 1797 1784 ); 1798 1785 #define DEFINE_NFS4_READ_EVENT(name) \ ··· 1826 1811 __field(unsigned long, error) 1827 1812 __field(int, stateid_seq) 1828 1813 __field(u32, stateid_hash) 1814 + __field(int, layoutstateid_seq) 1815 + __field(u32, layoutstateid_hash) 1829 1816 ), 1830 1817 1831 1818 TP_fast_assign( ··· 1837 1820 hdr->args.fh : &nfsi->fh; 1838 1821 const struct nfs4_state *state = 1839 1822 hdr->args.context->state; 1823 + const struct pnfs_layout_segment *lseg = hdr->lseg; 1840 1824 1841 1825 __entry->dev = inode->i_sb->s_dev; 1842 1826 __entry->fileid = nfsi->fileid; ··· 1850 1832 be32_to_cpu(state->stateid.seqid); 1851 1833 __entry->stateid_hash = 1852 1834 nfs_stateid_hash(&state->stateid); 1835 + __entry->layoutstateid_seq = lseg ? lseg->pls_seq : 0; 1836 + __entry->layoutstateid_hash = 1837 + NFS4_LSEG_LAYOUT_STATEID_HASH(lseg); 1853 1838 ), 1854 1839 1855 1840 TP_printk( 1856 1841 "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " 1857 - "offset=%lld count=%u res=%u stateid=%d:0x%08x", 1842 + "offset=%lld count=%u res=%u stateid=%d:0x%08x " 1843 + "layoutstateid=%d:0x%08x", 1858 1844 -__entry->error, 1859 1845 show_nfsv4_errors(__entry->error), 1860 1846 MAJOR(__entry->dev), MINOR(__entry->dev), ··· 1866 1844 __entry->fhandle, 1867 1845 (long long)__entry->offset, 1868 1846 __entry->arg_count, __entry->res_count, 1869 - __entry->stateid_seq, __entry->stateid_hash 1847 + __entry->stateid_seq, __entry->stateid_hash, 1848 + __entry->layoutstateid_seq, __entry->layoutstateid_hash 1870 1849 ) 1871 1850 ); 1872 1851 ··· 1898 1875 __field(unsigned long, error) 1899 1876 __field(loff_t, offset) 1900 1877 __field(u32, count) 1878 + __field(int, layoutstateid_seq) 1879 + __field(u32, layoutstateid_hash) 1901 1880 ), 1902 1881 1903 1882 TP_fast_assign( ··· 1907 1882 const struct nfs_inode *nfsi = NFS_I(inode); 1908 1883 const struct nfs_fh *fh = data->args.fh ? 1909 1884 data->args.fh : &nfsi->fh; 1885 + const struct pnfs_layout_segment *lseg = data->lseg; 1910 1886 1911 1887 __entry->dev = inode->i_sb->s_dev; 1912 1888 __entry->fileid = nfsi->fileid; ··· 1915 1889 __entry->offset = data->args.offset; 1916 1890 __entry->count = data->args.count; 1917 1891 __entry->error = error < 0 ? -error : 0; 1892 + __entry->layoutstateid_seq = lseg ? lseg->pls_seq : 0; 1893 + __entry->layoutstateid_hash = 1894 + NFS4_LSEG_LAYOUT_STATEID_HASH(lseg); 1918 1895 ), 1919 1896 1920 1897 TP_printk( 1921 1898 "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " 1922 - "offset=%lld count=%u", 1899 + "offset=%lld count=%u layoutstateid=%d:0x%08x", 1923 1900 -__entry->error, 1924 1901 show_nfsv4_errors(__entry->error), 1925 1902 MAJOR(__entry->dev), MINOR(__entry->dev), 1926 1903 (unsigned long long)__entry->fileid, 1927 1904 __entry->fhandle, 1928 1905 (long long)__entry->offset, 1929 - __entry->count 1906 + __entry->count, 1907 + __entry->layoutstateid_seq, __entry->layoutstateid_hash 1930 1908 ) 1931 1909 ); 1932 1910 #define DEFINE_NFS4_COMMIT_EVENT(name) \ ··· 2023 1993 2024 1994 DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_layoutcommit); 2025 1995 DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_layoutreturn); 2026 - DEFINE_NFS4_INODE_EVENT(nfs4_layoutreturn_on_close); 1996 + DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_layoutreturn_on_close); 1997 + DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_layouterror); 1998 + DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_layoutstats); 2027 1999 2028 2000 TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_UNKNOWN); 2029 2001 TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_NO_PNFS);
+37 -2
fs/nfs/nfs4xdr.c
··· 4166 4166 return -EIO; 4167 4167 if (len < NFS4_MAXLABELLEN) { 4168 4168 if (label) { 4169 - memcpy(label->label, p, len); 4169 + if (label->len) { 4170 + if (label->len < len) 4171 + return -ERANGE; 4172 + memcpy(label->label, p, len); 4173 + } 4170 4174 label->len = len; 4171 4175 label->pi = pi; 4172 4176 label->lfs = lfs; ··· 4203 4199 } 4204 4200 dprintk("%s: mtime=%lld\n", __func__, time->tv_sec); 4205 4201 return status; 4202 + } 4203 + 4204 + static int decode_attr_xattrsupport(struct xdr_stream *xdr, uint32_t *bitmap, 4205 + uint32_t *res) 4206 + { 4207 + __be32 *p; 4208 + 4209 + *res = 0; 4210 + if (unlikely(bitmap[2] & (FATTR4_WORD2_XATTR_SUPPORT - 1U))) 4211 + return -EIO; 4212 + if (likely(bitmap[2] & FATTR4_WORD2_XATTR_SUPPORT)) { 4213 + p = xdr_inline_decode(xdr, 4); 4214 + if (unlikely(!p)) 4215 + return -EIO; 4216 + *res = be32_to_cpup(p); 4217 + bitmap[2] &= ~FATTR4_WORD2_XATTR_SUPPORT; 4218 + } 4219 + dprintk("%s: XATTR support=%s\n", __func__, 4220 + *res == 0 ? "false" : "true"); 4221 + return 0; 4206 4222 } 4207 4223 4208 4224 static int verify_attr_len(struct xdr_stream *xdr, unsigned int savep, uint32_t attrlen) ··· 4879 4855 if (status) 4880 4856 goto xdr_error; 4881 4857 4858 + status = decode_attr_xattrsupport(xdr, bitmap, 4859 + &fsinfo->xattr_support); 4860 + if (status) 4861 + goto xdr_error; 4862 + 4882 4863 status = verify_attr_len(xdr, savep, attrlen); 4883 4864 xdr_error: 4884 4865 dprintk("%s: xdr returned %d!\n", __func__, -status); ··· 5256 5227 * The XDR encode routine has set things up so that 5257 5228 * the link text will be copied directly into the 5258 5229 * buffer. We just have to do overflow-checking, 5259 - * and and null-terminate the text (the VFS expects 5230 + * and null-terminate the text (the VFS expects 5260 5231 * null-termination). 5261 5232 */ 5262 5233 xdr_terminate_string(rcvbuf, len); ··· 7485 7456 { NFS4ERR_SYMLINK, -ELOOP }, 7486 7457 { NFS4ERR_OP_ILLEGAL, -EOPNOTSUPP }, 7487 7458 { NFS4ERR_DEADLOCK, -EDEADLK }, 7459 + { NFS4ERR_NOXATTR, -ENODATA }, 7460 + { NFS4ERR_XATTR2BIG, -E2BIG }, 7488 7461 { -1, -EIO } 7489 7462 }; 7490 7463 ··· 7615 7584 PROC42(COPY_NOTIFY, enc_copy_notify, dec_copy_notify), 7616 7585 PROC(LOOKUPP, enc_lookupp, dec_lookupp), 7617 7586 PROC42(LAYOUTERROR, enc_layouterror, dec_layouterror), 7587 + PROC42(GETXATTR, enc_getxattr, dec_getxattr), 7588 + PROC42(SETXATTR, enc_setxattr, dec_setxattr), 7589 + PROC42(LISTXATTRS, enc_listxattrs, dec_listxattrs), 7590 + PROC42(REMOVEXATTR, enc_removexattr, dec_removexattr), 7618 7591 }; 7619 7592 7620 7593 static unsigned int nfs_version4_counts[ARRAY_SIZE(nfs4_procedures)];
+2 -1
fs/nfs/nfstrace.h
··· 59 59 { NFS_INO_INVALID_CTIME, "INVALID_CTIME" }, \ 60 60 { NFS_INO_INVALID_MTIME, "INVALID_MTIME" }, \ 61 61 { NFS_INO_INVALID_SIZE, "INVALID_SIZE" }, \ 62 - { NFS_INO_INVALID_OTHER, "INVALID_OTHER" }) 62 + { NFS_INO_INVALID_OTHER, "INVALID_OTHER" }, \ 63 + { NFS_INO_INVALID_XATTR, "INVALID_XATTR" }) 63 64 64 65 TRACE_DEFINE_ENUM(NFS_INO_ADVISE_RDPLUS); 65 66 TRACE_DEFINE_ENUM(NFS_INO_STALE);
+20 -32
fs/nfs/pnfs.c
··· 1226 1226 return status; 1227 1227 } 1228 1228 1229 + static bool 1230 + pnfs_layout_segments_returnable(struct pnfs_layout_hdr *lo, 1231 + enum pnfs_iomode iomode, 1232 + u32 seq) 1233 + { 1234 + struct pnfs_layout_range recall_range = { 1235 + .length = NFS4_MAX_UINT64, 1236 + .iomode = iomode, 1237 + }; 1238 + return pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, 1239 + &recall_range, seq) != -EBUSY; 1240 + } 1241 + 1229 1242 /* Return true if layoutreturn is needed */ 1230 1243 static bool 1231 1244 pnfs_layout_need_return(struct pnfs_layout_hdr *lo) 1232 1245 { 1233 - struct pnfs_layout_segment *s; 1234 - enum pnfs_iomode iomode; 1235 - u32 seq; 1236 - 1237 1246 if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) 1238 1247 return false; 1239 - 1240 - seq = lo->plh_return_seq; 1241 - iomode = lo->plh_return_iomode; 1242 - 1243 - /* Defer layoutreturn until all recalled lsegs are done */ 1244 - list_for_each_entry(s, &lo->plh_segs, pls_list) { 1245 - if (seq && pnfs_seqid_is_newer(s->pls_seq, seq)) 1246 - continue; 1247 - if (iomode != IOMODE_ANY && s->pls_range.iomode != iomode) 1248 - continue; 1249 - if (test_bit(NFS_LSEG_LAYOUTRETURN, &s->pls_flags)) 1250 - return false; 1251 - } 1252 - 1253 - return true; 1248 + return pnfs_layout_segments_returnable(lo, lo->plh_return_iomode, 1249 + lo->plh_return_seq); 1254 1250 } 1255 1251 1256 1252 static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo) ··· 1545 1549 default: 1546 1550 arg_stateid = &args->stateid; 1547 1551 } 1552 + trace_nfs4_layoutreturn_on_close(args->inode, &args->stateid, ret); 1548 1553 pnfs_layoutreturn_free_lsegs(lo, arg_stateid, &args->range, 1549 1554 res_stateid); 1550 1555 if (ld_private && ld_private->ops && ld_private->ops->free) 1551 1556 ld_private->ops->free(ld_private); 1552 1557 pnfs_put_layout_hdr(lo); 1553 - trace_nfs4_layoutreturn_on_close(args->inode, 0); 1554 1558 } 1555 1559 1556 1560 bool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task) ··· 2388 2392 return ERR_PTR(-EAGAIN); 2389 2393 } 2390 2394 2391 - static int 2392 - mark_lseg_invalid_or_return(struct pnfs_layout_segment *lseg, 2393 - struct list_head *tmp_list) 2394 - { 2395 - if (!mark_lseg_invalid(lseg, tmp_list)) 2396 - return 0; 2397 - pnfs_cache_lseg_for_layoutreturn(lseg->pls_layout, lseg); 2398 - return 1; 2399 - } 2400 - 2401 2395 /** 2402 2396 * pnfs_mark_matching_lsegs_return - Free or return matching layout segments 2403 2397 * @lo: pointer to layout header ··· 2424 2438 lseg, lseg->pls_range.iomode, 2425 2439 lseg->pls_range.offset, 2426 2440 lseg->pls_range.length); 2427 - if (mark_lseg_invalid_or_return(lseg, tmp_list)) 2441 + if (mark_lseg_invalid(lseg, tmp_list)) 2428 2442 continue; 2429 2443 remaining++; 2430 2444 set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags); ··· 2939 2953 } 2940 2954 2941 2955 /* Resend all requests through pnfs. */ 2942 - void pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr) 2956 + void pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr, 2957 + unsigned int mirror_idx) 2943 2958 { 2944 2959 struct nfs_pageio_descriptor pgio; 2945 2960 ··· 2951 2964 2952 2965 nfs_pageio_init_read(&pgio, hdr->inode, false, 2953 2966 hdr->completion_ops); 2967 + pgio.pg_mirror_idx = mirror_idx; 2954 2968 hdr->task.tk_status = nfs_pageio_resend(&pgio, hdr); 2955 2969 } 2956 2970 }
+1 -1
fs/nfs/pnfs.h
··· 311 311 int pnfs_commit_and_return_layout(struct inode *); 312 312 void pnfs_ld_write_done(struct nfs_pgio_header *); 313 313 void pnfs_ld_read_done(struct nfs_pgio_header *); 314 - void pnfs_read_resend_pnfs(struct nfs_pgio_header *); 314 + void pnfs_read_resend_pnfs(struct nfs_pgio_header *, unsigned int mirror_idx); 315 315 struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, 316 316 struct nfs_open_context *ctx, 317 317 loff_t pos,
+5
include/linux/nfs4.h
··· 553 553 NFSPROC4_CLNT_LAYOUTERROR, 554 554 555 555 NFSPROC4_CLNT_COPY_NOTIFY, 556 + 557 + NFSPROC4_CLNT_GETXATTR, 558 + NFSPROC4_CLNT_SETXATTR, 559 + NFSPROC4_CLNT_LISTXATTRS, 560 + NFSPROC4_CLNT_REMOVEXATTR, 556 561 }; 557 562 558 563 /* nfs41 types */
+12
include/linux/nfs_fs.h
··· 102 102 103 103 struct posix_acl; 104 104 105 + struct nfs4_xattr_cache; 106 + 105 107 /* 106 108 * nfs fs inode data in memory 107 109 */ ··· 190 188 struct fscache_cookie *fscache; 191 189 #endif 192 190 struct inode vfs_inode; 191 + 192 + #ifdef CONFIG_NFS_V4_2 193 + struct nfs4_xattr_cache *xattr_cache; 194 + #endif 193 195 }; 194 196 195 197 struct nfs4_copy_state { ··· 218 212 #define NFS_ACCESS_EXTEND 0x0008 219 213 #define NFS_ACCESS_DELETE 0x0010 220 214 #define NFS_ACCESS_EXECUTE 0x0020 215 + #define NFS_ACCESS_XAREAD 0x0040 216 + #define NFS_ACCESS_XAWRITE 0x0080 217 + #define NFS_ACCESS_XALIST 0x0100 221 218 222 219 /* 223 220 * Cache validity bit flags ··· 240 231 #define NFS_INO_DATA_INVAL_DEFER \ 241 232 BIT(13) /* Deferred cache invalidation */ 242 233 #define NFS_INO_INVALID_BLOCKS BIT(14) /* cached blocks are invalid */ 234 + #define NFS_INO_INVALID_XATTR BIT(15) /* xattrs are invalid */ 243 235 244 236 #define NFS_INO_INVALID_ATTR (NFS_INO_INVALID_CHANGE \ 245 237 | NFS_INO_INVALID_CTIME \ ··· 500 490 struct nfs_fattr *fattr, struct nfs4_label *label); 501 491 extern int nfs_may_open(struct inode *inode, const struct cred *cred, int openflags); 502 492 extern void nfs_access_zap_cache(struct inode *inode); 493 + extern int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res, 494 + bool may_block); 503 495 504 496 /* 505 497 * linux/fs/nfs/symlink.c
+6
include/linux/nfs_fs_sb.h
··· 163 163 unsigned int dtsize; /* readdir size */ 164 164 unsigned short port; /* "port=" setting */ 165 165 unsigned int bsize; /* server block size */ 166 + #ifdef CONFIG_NFS_V4_2 167 + unsigned int gxasize; /* getxattr size */ 168 + unsigned int sxasize; /* setxattr size */ 169 + unsigned int lxasize; /* listxattr size */ 170 + #endif 166 171 unsigned int acregmin; /* attr cache timeouts */ 167 172 unsigned int acregmax; 168 173 unsigned int acdirmin; ··· 286 281 #define NFS_CAP_OFFLOAD_CANCEL (1U << 25) 287 282 #define NFS_CAP_LAYOUTERROR (1U << 26) 288 283 #define NFS_CAP_COPY_NOTIFY (1U << 27) 284 + #define NFS_CAP_XATTR (1U << 28) 289 285 290 286 #endif
+59 -1
include/linux/nfs_xdr.h
··· 150 150 __u32 layouttype[NFS_MAX_LAYOUT_TYPES]; /* supported pnfs layout driver */ 151 151 __u32 blksize; /* preferred pnfs io block size */ 152 152 __u32 clone_blksize; /* granularity of a CLONE operation */ 153 + __u32 xattr_support; /* User xattrs supported */ 153 154 }; 154 155 155 156 struct nfs_fsstat { ··· 1498 1497 u32 sr_eof; 1499 1498 u64 sr_offset; 1500 1499 }; 1501 - #endif 1500 + 1501 + struct nfs42_setxattrargs { 1502 + struct nfs4_sequence_args seq_args; 1503 + struct nfs_fh *fh; 1504 + const char *xattr_name; 1505 + u32 xattr_flags; 1506 + size_t xattr_len; 1507 + struct page **xattr_pages; 1508 + }; 1509 + 1510 + struct nfs42_setxattrres { 1511 + struct nfs4_sequence_res seq_res; 1512 + struct nfs4_change_info cinfo; 1513 + }; 1514 + 1515 + struct nfs42_getxattrargs { 1516 + struct nfs4_sequence_args seq_args; 1517 + struct nfs_fh *fh; 1518 + const char *xattr_name; 1519 + size_t xattr_len; 1520 + struct page **xattr_pages; 1521 + }; 1522 + 1523 + struct nfs42_getxattrres { 1524 + struct nfs4_sequence_res seq_res; 1525 + size_t xattr_len; 1526 + }; 1527 + 1528 + struct nfs42_listxattrsargs { 1529 + struct nfs4_sequence_args seq_args; 1530 + struct nfs_fh *fh; 1531 + u32 count; 1532 + u64 cookie; 1533 + struct page **xattr_pages; 1534 + }; 1535 + 1536 + struct nfs42_listxattrsres { 1537 + struct nfs4_sequence_res seq_res; 1538 + struct page *scratch; 1539 + void *xattr_buf; 1540 + size_t xattr_len; 1541 + u64 cookie; 1542 + bool eof; 1543 + size_t copied; 1544 + }; 1545 + 1546 + struct nfs42_removexattrargs { 1547 + struct nfs4_sequence_args seq_args; 1548 + struct nfs_fh *fh; 1549 + const char *xattr_name; 1550 + }; 1551 + 1552 + struct nfs42_removexattrres { 1553 + struct nfs4_sequence_res seq_res; 1554 + struct nfs4_change_info cinfo; 1555 + }; 1556 + 1557 + #endif /* CONFIG_NFS_V4_2 */ 1502 1558 1503 1559 struct nfs_page; 1504 1560
+1
include/linux/sunrpc/xprt.h
··· 101 101 * used in the softirq. 102 102 */ 103 103 unsigned long rq_majortimeo; /* major timeout alarm */ 104 + unsigned long rq_minortimeo; /* minor timeout alarm */ 104 105 unsigned long rq_timeout; /* Current timeout value */ 105 106 ktime_t rq_rtt; /* round-trip time */ 106 107 unsigned int rq_retries; /* # of retries */
+1
include/uapi/linux/nfs_fs.h
··· 56 56 #define NFSDBG_PNFS 0x1000 57 57 #define NFSDBG_PNFS_LD 0x2000 58 58 #define NFSDBG_STATE 0x4000 59 + #define NFSDBG_XATTRCACHE 0x8000 59 60 #define NFSDBG_ALL 0xFFFF 60 61 61 62
+1 -1
net/sunrpc/rpc_pipe.c
··· 1510 1510 void unregister_rpc_pipefs(void) 1511 1511 { 1512 1512 rpc_clients_notifier_unregister(); 1513 - kmem_cache_destroy(rpc_inode_cachep); 1514 1513 unregister_filesystem(&rpc_pipe_fs_type); 1514 + kmem_cache_destroy(rpc_inode_cachep); 1515 1515 }
+9
net/sunrpc/xprt.c
··· 607 607 req->rq_majortimeo += xprt_calc_majortimeo(req); 608 608 } 609 609 610 + static void xprt_reset_minortimeo(struct rpc_rqst *req) 611 + { 612 + req->rq_minortimeo += req->rq_timeout; 613 + } 614 + 610 615 static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req) 611 616 { 612 617 unsigned long time_init; ··· 623 618 time_init = xprt_abs_ktime_to_jiffies(task->tk_start); 624 619 req->rq_timeout = task->tk_client->cl_timeout->to_initval; 625 620 req->rq_majortimeo = time_init + xprt_calc_majortimeo(req); 621 + req->rq_minortimeo = time_init + req->rq_timeout; 626 622 } 627 623 628 624 /** ··· 637 631 const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout; 638 632 int status = 0; 639 633 634 + if (time_before(jiffies, req->rq_minortimeo)) 635 + return status; 640 636 if (time_before(jiffies, req->rq_majortimeo)) { 641 637 if (to->to_exponential) 642 638 req->rq_timeout <<= 1; ··· 657 649 spin_unlock(&xprt->transport_lock); 658 650 status = -ETIMEDOUT; 659 651 } 652 + xprt_reset_minortimeo(req); 660 653 661 654 if (req->rq_timeout == 0) { 662 655 printk(KERN_WARNING "xprt_adjust_timeout: rq_timeout = 0!\n");