Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'nfs-for-4.1-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs

Pull NFS client updates from Trond Myklebust:
"Another set of mainly bugfixes and a couple of cleanups. No new
functionality in this round.

Highlights include:

Stable patches:
- Fix a regression in /proc/self/mountstats
- Fix the pNFS flexfiles O_DIRECT support
- Fix high load average due to callback thread sleeping

Bugfixes:
- Various patches to fix the pNFS layoutcommit support
- Do not cache pNFS deviceids unless server notifications are enabled
- Fix a SUNRPC transport reconnection regression
- make debugfs file creation failure non-fatal in SUNRPC
- Another fix for circular directory warnings on NFSv4 "junctioned"
mountpoints
- Fix locking around NFSv4.2 fallocate() support
- Truncating NFSv4 file opens should also sync O_DIRECT writes
- Prevent infinite loop in rpcrdma_ep_create()

Features:
- Various improvements to the RDMA transport code's handling of
memory registration
- Various code cleanups"

* tag 'nfs-for-4.1-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (55 commits)
fs/nfs: fix new compiler warning about boolean in switch
nfs: Remove unneeded casts in nfs
NFS: Don't attempt to decode missing directory entries
Revert "nfs: replace nfs_add_stats with nfs_inc_stats when add one"
NFS: Rename idmap.c to nfs4idmap.c
NFS: Move nfs_idmap.h into fs/nfs/
NFS: Remove CONFIG_NFS_V4 checks from nfs_idmap.h
NFS: Add a stub for GETDEVICELIST
nfs: remove WARN_ON_ONCE from nfs_direct_good_bytes
nfs: fix DIO good bytes calculation
nfs: Fetch MOUNTED_ON_FILEID when updating an inode
sunrpc: make debugfs file creation failure non-fatal
nfs: fix high load average due to callback thread sleeping
NFS: Reduce time spent holding the i_mutex during fallocate()
NFS: Don't zap caches on fallocate()
xprtrdma: Make rpcrdma_{un}map_one() into inline functions
xprtrdma: Handle non-SEND completions via a callout
xprtrdma: Add "open" memreg op
xprtrdma: Add "destroy MRs" memreg op
xprtrdma: Add "reset MRs" memreg op
...

+1151 -915
+1 -1
fs/nfs/Makefile
··· 22 22 obj-$(CONFIG_NFS_V4) += nfsv4.o 23 23 CFLAGS_nfs4trace.o += -I$(src) 24 24 nfsv4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \ 25 - delegation.o idmap.o callback.o callback_xdr.o callback_proc.o \ 25 + delegation.o nfs4idmap.o callback.o callback_xdr.o callback_proc.o \ 26 26 nfs4namespace.o nfs4getroot.o nfs4client.o nfs4session.o \ 27 27 dns_resolve.o nfs4trace.o 28 28 nfsv4-$(CONFIG_NFS_USE_LEGACY_DNS) += cache_lib.o
+1
fs/nfs/blocklayout/blocklayout.c
··· 890 890 .free_deviceid_node = bl_free_deviceid_node, 891 891 .pg_read_ops = &bl_pg_read_ops, 892 892 .pg_write_ops = &bl_pg_write_ops, 893 + .sync = pnfs_generic_sync, 893 894 }; 894 895 895 896 static int __init nfs4blocklayout_init(void)
+1 -1
fs/nfs/blocklayout/dev.c
··· 33 33 container_of(d, struct pnfs_block_dev, node); 34 34 35 35 bl_free_device(dev); 36 - kfree(dev); 36 + kfree_rcu(dev, node.rcu); 37 37 } 38 38 39 39 static int
+3 -3
fs/nfs/callback.c
··· 128 128 if (try_to_freeze()) 129 129 continue; 130 130 131 - prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_UNINTERRUPTIBLE); 131 + prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE); 132 132 spin_lock_bh(&serv->sv_cb_lock); 133 133 if (!list_empty(&serv->sv_cb_list)) { 134 134 req = list_first_entry(&serv->sv_cb_list, ··· 142 142 error); 143 143 } else { 144 144 spin_unlock_bh(&serv->sv_cb_lock); 145 - /* schedule_timeout to game the hung task watchdog */ 146 - schedule_timeout(60 * HZ); 145 + schedule(); 147 146 finish_wait(&serv->sv_cb_waitq, &wq); 148 147 } 148 + flush_signals(current); 149 149 } 150 150 return 0; 151 151 }
-1
fs/nfs/client.c
··· 31 31 #include <linux/lockd/bind.h> 32 32 #include <linux/seq_file.h> 33 33 #include <linux/mount.h> 34 - #include <linux/nfs_idmap.h> 35 34 #include <linux/vfs.h> 36 35 #include <linux/inet.h> 37 36 #include <linux/in6.h>
+2 -2
fs/nfs/delegation.c
··· 378 378 if (freeme == NULL) 379 379 goto out; 380 380 } 381 - list_add_rcu(&delegation->super_list, &server->delegations); 381 + list_add_tail_rcu(&delegation->super_list, &server->delegations); 382 382 rcu_assign_pointer(nfsi->delegation, delegation); 383 383 delegation = NULL; 384 384 ··· 514 514 515 515 delegation = nfs_inode_detach_delegation(inode); 516 516 if (delegation != NULL) 517 - nfs_do_return_delegation(inode, delegation, 0); 517 + nfs_do_return_delegation(inode, delegation, 1); 518 518 } 519 519 520 520 /**
+4
fs/nfs/dir.c
··· 543 543 if (scratch == NULL) 544 544 return -ENOMEM; 545 545 546 + if (buflen == 0) 547 + goto out_nopages; 548 + 546 549 xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen); 547 550 xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); 548 551 ··· 567 564 break; 568 565 } while (!entry->eof); 569 566 567 + out_nopages: 570 568 if (count == 0 || (status == -EBADCOOKIE && entry->eof != 0)) { 571 569 array = nfs_readdir_get_array(page); 572 570 if (!IS_ERR(array)) {
+18 -21
fs/nfs/direct.c
··· 129 129 int i; 130 130 ssize_t count; 131 131 132 - WARN_ON_ONCE(hdr->pgio_mirror_idx >= dreq->mirror_count); 132 + if (dreq->mirror_count == 1) { 133 + dreq->mirrors[hdr->pgio_mirror_idx].count += hdr->good_bytes; 134 + dreq->count += hdr->good_bytes; 135 + } else { 136 + /* mirrored writes */ 137 + count = dreq->mirrors[hdr->pgio_mirror_idx].count; 138 + if (count + dreq->io_start < hdr->io_start + hdr->good_bytes) { 139 + count = hdr->io_start + hdr->good_bytes - dreq->io_start; 140 + dreq->mirrors[hdr->pgio_mirror_idx].count = count; 141 + } 142 + /* update the dreq->count by finding the minimum agreed count from all 143 + * mirrors */ 144 + count = dreq->mirrors[0].count; 133 145 134 - count = dreq->mirrors[hdr->pgio_mirror_idx].count; 135 - if (count + dreq->io_start < hdr->io_start + hdr->good_bytes) { 136 - count = hdr->io_start + hdr->good_bytes - dreq->io_start; 137 - dreq->mirrors[hdr->pgio_mirror_idx].count = count; 146 + for (i = 1; i < dreq->mirror_count; i++) 147 + count = min(count, dreq->mirrors[i].count); 148 + 149 + dreq->count = count; 138 150 } 139 - 140 - /* update the dreq->count by finding the minimum agreed count from all 141 - * mirrors */ 142 - count = dreq->mirrors[0].count; 143 - 144 - for (i = 1; i < dreq->mirror_count; i++) 145 - count = min(count, dreq->mirrors[i].count); 146 - 147 - dreq->count = count; 148 151 } 149 152 150 153 /* ··· 261 258 if (!IS_SWAPFILE(inode)) 262 259 return 0; 263 260 264 - #ifndef CONFIG_NFS_SWAP 265 - dprintk("NFS: nfs_direct_IO (%pD) off/no(%Ld/%lu) EINVAL\n", 266 - iocb->ki_filp, (long long) pos, iter->nr_segs); 267 - 268 - return -EINVAL; 269 - #else 270 261 VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE); 271 262 272 263 if (iov_iter_rw(iter) == READ) 273 264 return nfs_file_direct_read(iocb, iter, pos); 274 265 return nfs_file_direct_write(iocb, iter); 275 - #endif /* CONFIG_NFS_SWAP */ 276 266 } 277 267 278 268 static void nfs_direct_release_pages(struct page **pages, unsigned int npages) ··· 1026 1030 if (i_size_read(inode) < iocb->ki_pos) 1027 1031 i_size_write(inode, iocb->ki_pos); 1028 1032 spin_unlock(&inode->i_lock); 1033 + generic_write_sync(file, pos, result); 1029 1034 } 1030 1035 } 1031 1036 nfs_direct_req_release(dreq);
+2 -1
fs/nfs/file.c
··· 280 280 281 281 trace_nfs_fsync_enter(inode); 282 282 283 + nfs_inode_dio_wait(inode); 283 284 do { 284 285 ret = filemap_write_and_wait_range(inode->i_mapping, start, end); 285 286 if (ret != 0) ··· 783 782 * Flush all pending writes before doing anything 784 783 * with locks.. 785 784 */ 786 - nfs_sync_mapping(filp->f_mapping); 785 + vfs_fsync(filp, 0); 787 786 788 787 l_ctx = nfs_get_lock_context(nfs_file_open_context(filp)); 789 788 if (!IS_ERR(l_ctx)) {
+6 -4
fs/nfs/filelayout/filelayout.c
··· 258 258 hdr->res.verf->committed != NFS_DATA_SYNC) 259 259 return; 260 260 261 - pnfs_set_layoutcommit(hdr); 261 + pnfs_set_layoutcommit(hdr->inode, hdr->lseg, 262 + hdr->mds_offset + hdr->res.count); 262 263 dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, 263 264 (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); 264 265 } ··· 374 373 } 375 374 376 375 if (data->verf.committed == NFS_UNSTABLE) 377 - pnfs_commit_set_layoutcommit(data); 376 + pnfs_set_layoutcommit(data->inode, data->lseg, data->lwb); 378 377 379 378 return 0; 380 379 } ··· 1087 1086 } 1088 1087 1089 1088 static void 1090 - filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d) 1089 + filelayout_free_deviceid_node(struct nfs4_deviceid_node *d) 1091 1090 { 1092 1091 nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node)); 1093 1092 } ··· 1138 1137 .read_pagelist = filelayout_read_pagelist, 1139 1138 .write_pagelist = filelayout_write_pagelist, 1140 1139 .alloc_deviceid_node = filelayout_alloc_deviceid_node, 1141 - .free_deviceid_node = filelayout_free_deveiceid_node, 1140 + .free_deviceid_node = filelayout_free_deviceid_node, 1141 + .sync = pnfs_nfs_generic_sync, 1142 1142 }; 1143 1143 1144 1144 static int __init nfs4filelayout_init(void)
+1 -1
fs/nfs/filelayout/filelayoutdev.c
··· 55 55 nfs4_pnfs_ds_put(ds); 56 56 } 57 57 kfree(dsaddr->stripe_indices); 58 - kfree(dsaddr); 58 + kfree_rcu(dsaddr, id_node.rcu); 59 59 } 60 60 61 61 /* Decode opaque device data and return the result */
+7 -5
fs/nfs/flexfilelayout/flexfilelayout.c
··· 11 11 #include <linux/module.h> 12 12 13 13 #include <linux/sunrpc/metrics.h> 14 - #include <linux/nfs_idmap.h> 15 14 16 15 #include "flexfilelayout.h" 17 16 #include "../nfs4session.h" 17 + #include "../nfs4idmap.h" 18 18 #include "../internal.h" 19 19 #include "../delegation.h" 20 20 #include "../nfs4trace.h" ··· 891 891 static void 892 892 ff_layout_set_layoutcommit(struct nfs_pgio_header *hdr) 893 893 { 894 - pnfs_set_layoutcommit(hdr); 894 + pnfs_set_layoutcommit(hdr->inode, hdr->lseg, 895 + hdr->mds_offset + hdr->res.count); 895 896 dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, 896 897 (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); 897 898 } ··· 1075 1074 } 1076 1075 1077 1076 if (data->verf.committed == NFS_UNSTABLE) 1078 - pnfs_commit_set_layoutcommit(data); 1077 + pnfs_set_layoutcommit(data->inode, data->lseg, data->lwb); 1079 1078 1080 1079 return 0; 1081 1080 } ··· 1415 1414 } 1416 1415 1417 1416 static void 1418 - ff_layout_free_deveiceid_node(struct nfs4_deviceid_node *d) 1417 + ff_layout_free_deviceid_node(struct nfs4_deviceid_node *d) 1419 1418 { 1420 1419 nfs4_ff_layout_free_deviceid(container_of(d, struct nfs4_ff_layout_ds, 1421 1420 id_node)); ··· 1499 1498 .pg_read_ops = &ff_layout_pg_read_ops, 1500 1499 .pg_write_ops = &ff_layout_pg_write_ops, 1501 1500 .get_ds_info = ff_layout_get_ds_info, 1502 - .free_deviceid_node = ff_layout_free_deveiceid_node, 1501 + .free_deviceid_node = ff_layout_free_deviceid_node, 1503 1502 .mark_request_commit = pnfs_layout_mark_request_commit, 1504 1503 .clear_request_commit = pnfs_generic_clear_request_commit, 1505 1504 .scan_commit_lists = pnfs_generic_scan_commit_lists, ··· 1509 1508 .write_pagelist = ff_layout_write_pagelist, 1510 1509 .alloc_deviceid_node = ff_layout_alloc_deviceid_node, 1511 1510 .encode_layoutreturn = ff_layout_encode_layoutreturn, 1511 + .sync = pnfs_nfs_generic_sync, 1512 1512 }; 1513 1513 1514 1514 static int __init nfs4flexfilelayout_init(void)
+1 -1
fs/nfs/flexfilelayout/flexfilelayoutdev.c
··· 30 30 { 31 31 nfs4_print_deviceid(&mirror_ds->id_node.deviceid); 32 32 nfs4_pnfs_ds_put(mirror_ds->ds); 33 - kfree(mirror_ds); 33 + kfree_rcu(mirror_ds, id_node.rcu); 34 34 } 35 35 36 36 /* Decode opaque device data and construct new_ds using it */
+1 -1
fs/nfs/idmap.c fs/nfs/nfs4idmap.c
··· 36 36 #include <linux/types.h> 37 37 #include <linux/parser.h> 38 38 #include <linux/fs.h> 39 - #include <linux/nfs_idmap.h> 40 39 #include <net/net_namespace.h> 41 40 #include <linux/sunrpc/rpc_pipe_fs.h> 42 41 #include <linux/nfs_fs.h> ··· 48 49 49 50 #include "internal.h" 50 51 #include "netns.h" 52 + #include "nfs4idmap.h" 51 53 #include "nfs4trace.h" 52 54 53 55 #define NFS_UINT_MAXLEN 11
+27 -9
fs/nfs/inode.c
··· 133 133 nfs_clear_inode(inode); 134 134 } 135 135 136 + int nfs_sync_inode(struct inode *inode) 137 + { 138 + nfs_inode_dio_wait(inode); 139 + return nfs_wb_all(inode); 140 + } 141 + EXPORT_SYMBOL_GPL(nfs_sync_inode); 142 + 136 143 /** 137 144 * nfs_sync_mapping - helper to flush all mmapped dirty data to disk 138 145 */ ··· 199 192 nfs_zap_caches_locked(inode); 200 193 spin_unlock(&inode->i_lock); 201 194 } 202 - EXPORT_SYMBOL_GPL(nfs_zap_caches); 203 195 204 196 void nfs_zap_mapping(struct inode *inode, struct address_space *mapping) 205 197 { ··· 531 525 trace_nfs_setattr_enter(inode); 532 526 533 527 /* Write all dirty data */ 534 - if (S_ISREG(inode->i_mode)) { 535 - nfs_inode_dio_wait(inode); 536 - nfs_wb_all(inode); 537 - } 528 + if (S_ISREG(inode->i_mode)) 529 + nfs_sync_inode(inode); 538 530 539 531 fattr = nfs_alloc_fattr(); 540 532 if (fattr == NULL) ··· 648 644 trace_nfs_getattr_enter(inode); 649 645 /* Flush out writes to the server in order to update c/mtime. */ 650 646 if (S_ISREG(inode->i_mode)) { 651 - nfs_inode_dio_wait(inode); 652 - err = filemap_write_and_wait(inode->i_mapping); 647 + mutex_lock(&inode->i_mutex); 648 + err = nfs_sync_inode(inode); 649 + mutex_unlock(&inode->i_mutex); 653 650 if (err) 654 651 goto out; 655 652 } ··· 1593 1588 } 1594 1589 EXPORT_SYMBOL_GPL(nfs_post_op_update_inode_force_wcc); 1595 1590 1591 + 1592 + static inline bool nfs_fileid_valid(struct nfs_inode *nfsi, 1593 + struct nfs_fattr *fattr) 1594 + { 1595 + bool ret1 = true, ret2 = true; 1596 + 1597 + if (fattr->valid & NFS_ATTR_FATTR_FILEID) 1598 + ret1 = (nfsi->fileid == fattr->fileid); 1599 + if (fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) 1600 + ret2 = (nfsi->fileid == fattr->mounted_on_fileid); 1601 + return ret1 || ret2; 1602 + } 1603 + 1596 1604 /* 1597 1605 * Many nfs protocol calls return the new file attributes after 1598 1606 * an operation. Here we update the inode to reflect the state ··· 1632 1614 nfs_display_fhandle_hash(NFS_FH(inode)), 1633 1615 atomic_read(&inode->i_count), fattr->valid); 1634 1616 1635 - if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) { 1617 + if (!nfs_fileid_valid(nfsi, fattr)) { 1636 1618 printk(KERN_ERR "NFS: server %s error: fileid changed\n" 1637 1619 "fsid %s: expected fileid 0x%Lx, got 0x%Lx\n", 1638 1620 NFS_SERVER(inode)->nfs_client->cl_hostname, ··· 1837 1819 struct inode *nfs_alloc_inode(struct super_block *sb) 1838 1820 { 1839 1821 struct nfs_inode *nfsi; 1840 - nfsi = (struct nfs_inode *)kmem_cache_alloc(nfs_inode_cachep, GFP_KERNEL); 1822 + nfsi = kmem_cache_alloc(nfs_inode_cachep, GFP_KERNEL); 1841 1823 if (!nfsi) 1842 1824 return NULL; 1843 1825 nfsi->flags = 0UL;
+27 -4
fs/nfs/nfs42proc.c
··· 36 36 loff_t offset, loff_t len) 37 37 { 38 38 struct inode *inode = file_inode(filep); 39 + struct nfs_server *server = NFS_SERVER(inode); 39 40 struct nfs42_falloc_args args = { 40 41 .falloc_fh = NFS_FH(inode), 41 42 .falloc_offset = offset, 42 43 .falloc_length = len, 44 + .falloc_bitmask = server->cache_consistency_bitmask, 43 45 }; 44 - struct nfs42_falloc_res res; 45 - struct nfs_server *server = NFS_SERVER(inode); 46 + struct nfs42_falloc_res res = { 47 + .falloc_server = server, 48 + }; 46 49 int status; 47 50 48 51 msg->rpc_argp = &args; ··· 55 52 if (status) 56 53 return status; 57 54 58 - return nfs4_call_sync(server->client, server, msg, 59 - &args.seq_args, &res.seq_res, 0); 55 + res.falloc_fattr = nfs_alloc_fattr(); 56 + if (!res.falloc_fattr) 57 + return -ENOMEM; 58 + 59 + status = nfs4_call_sync(server->client, server, msg, 60 + &args.seq_args, &res.seq_res, 0); 61 + if (status == 0) 62 + status = nfs_post_op_update_inode(inode, res.falloc_fattr); 63 + 64 + kfree(res.falloc_fattr); 65 + return status; 60 66 } 61 67 62 68 static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, ··· 96 84 if (!nfs_server_capable(inode, NFS_CAP_ALLOCATE)) 97 85 return -EOPNOTSUPP; 98 86 87 + mutex_lock(&inode->i_mutex); 88 + 99 89 err = nfs42_proc_fallocate(&msg, filep, offset, len); 100 90 if (err == -EOPNOTSUPP) 101 91 NFS_SERVER(inode)->caps &= ~NFS_CAP_ALLOCATE; 92 + 93 + mutex_unlock(&inode->i_mutex); 102 94 return err; 103 95 } 104 96 ··· 117 101 if (!nfs_server_capable(inode, NFS_CAP_DEALLOCATE)) 118 102 return -EOPNOTSUPP; 119 103 104 + nfs_wb_all(inode); 105 + mutex_lock(&inode->i_mutex); 106 + 120 107 err = nfs42_proc_fallocate(&msg, filep, offset, len); 108 + if (err == 0) 109 + truncate_pagecache_range(inode, offset, (offset + len) -1); 121 110 if (err == -EOPNOTSUPP) 122 111 NFS_SERVER(inode)->caps &= ~NFS_CAP_DEALLOCATE; 112 + 113 + mutex_unlock(&inode->i_mutex); 123 114 return err; 124 115 } 125 116
+16 -4
fs/nfs/nfs42xdr.c
··· 25 25 26 26 #define NFS4_enc_allocate_sz (compound_encode_hdr_maxsz + \ 27 27 encode_putfh_maxsz + \ 28 - encode_allocate_maxsz) 28 + encode_allocate_maxsz + \ 29 + encode_getattr_maxsz) 29 30 #define NFS4_dec_allocate_sz (compound_decode_hdr_maxsz + \ 30 31 decode_putfh_maxsz + \ 31 - decode_allocate_maxsz) 32 + decode_allocate_maxsz + \ 33 + decode_getattr_maxsz) 32 34 #define NFS4_enc_deallocate_sz (compound_encode_hdr_maxsz + \ 33 35 encode_putfh_maxsz + \ 34 - encode_deallocate_maxsz) 36 + encode_deallocate_maxsz + \ 37 + encode_getattr_maxsz) 35 38 #define NFS4_dec_deallocate_sz (compound_decode_hdr_maxsz + \ 36 39 decode_putfh_maxsz + \ 37 - decode_deallocate_maxsz) 40 + decode_deallocate_maxsz + \ 41 + decode_getattr_maxsz) 38 42 #define NFS4_enc_seek_sz (compound_encode_hdr_maxsz + \ 39 43 encode_putfh_maxsz + \ 40 44 encode_seek_maxsz) ··· 96 92 encode_sequence(xdr, &args->seq_args, &hdr); 97 93 encode_putfh(xdr, args->falloc_fh, &hdr); 98 94 encode_allocate(xdr, args, &hdr); 95 + encode_getfattr(xdr, args->falloc_bitmask, &hdr); 99 96 encode_nops(&hdr); 100 97 } 101 98 ··· 115 110 encode_sequence(xdr, &args->seq_args, &hdr); 116 111 encode_putfh(xdr, args->falloc_fh, &hdr); 117 112 encode_deallocate(xdr, args, &hdr); 113 + encode_getfattr(xdr, args->falloc_bitmask, &hdr); 118 114 encode_nops(&hdr); 119 115 } 120 116 ··· 189 183 if (status) 190 184 goto out; 191 185 status = decode_allocate(xdr, res); 186 + if (status) 187 + goto out; 188 + decode_getfattr(xdr, res->falloc_fattr, res->falloc_server); 192 189 out: 193 190 return status; 194 191 } ··· 216 207 if (status) 217 208 goto out; 218 209 status = decode_deallocate(xdr, res); 210 + if (status) 211 + goto out; 212 + decode_getfattr(xdr, res->falloc_fattr, res->falloc_server); 219 213 out: 220 214 return status; 221 215 }
+1 -1
fs/nfs/nfs4client.c
··· 4 4 */ 5 5 #include <linux/module.h> 6 6 #include <linux/nfs_fs.h> 7 - #include <linux/nfs_idmap.h> 8 7 #include <linux/nfs_mount.h> 9 8 #include <linux/sunrpc/addr.h> 10 9 #include <linux/sunrpc/auth.h> ··· 14 15 #include "callback.h" 15 16 #include "delegation.h" 16 17 #include "nfs4session.h" 18 + #include "nfs4idmap.h" 17 19 #include "pnfs.h" 18 20 #include "netns.h" 19 21
+10 -10
fs/nfs/nfs4file.c
··· 10 10 #include "fscache.h" 11 11 #include "pnfs.h" 12 12 13 + #include "nfstrace.h" 14 + 13 15 #ifdef CONFIG_NFS_V4_2 14 16 #include "nfs42.h" 15 17 #endif ··· 59 57 if (openflags & O_TRUNC) { 60 58 attr.ia_valid |= ATTR_SIZE; 61 59 attr.ia_size = 0; 62 - nfs_wb_all(inode); 60 + nfs_sync_inode(inode); 63 61 } 64 62 65 63 inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr, &opened); ··· 102 100 int ret; 103 101 struct inode *inode = file_inode(file); 104 102 103 + trace_nfs_fsync_enter(inode); 104 + 105 + nfs_inode_dio_wait(inode); 105 106 do { 106 107 ret = filemap_write_and_wait_range(inode->i_mapping, start, end); 107 108 if (ret != 0) ··· 112 107 mutex_lock(&inode->i_mutex); 113 108 ret = nfs_file_fsync_commit(file, start, end, datasync); 114 109 if (!ret) 115 - ret = pnfs_layoutcommit_inode(inode, true); 110 + ret = pnfs_sync_inode(inode, !!datasync); 116 111 mutex_unlock(&inode->i_mutex); 117 112 /* 118 113 * If nfs_file_fsync_commit detected a server reboot, then ··· 123 118 end = LLONG_MAX; 124 119 } while (ret == -EAGAIN); 125 120 121 + trace_nfs_fsync_exit(inode, ret); 126 122 return ret; 127 123 } 128 124 ··· 158 152 if (ret < 0) 159 153 return ret; 160 154 161 - mutex_lock(&inode->i_mutex); 162 155 if (mode & FALLOC_FL_PUNCH_HOLE) 163 - ret = nfs42_proc_deallocate(filep, offset, len); 164 - else 165 - ret = nfs42_proc_allocate(filep, offset, len); 166 - mutex_unlock(&inode->i_mutex); 167 - 168 - nfs_zap_caches(inode); 169 - return ret; 156 + return nfs42_proc_deallocate(filep, offset, len); 157 + return nfs42_proc_allocate(filep, offset, len); 170 158 } 171 159 #endif /* CONFIG_NFS_V4_2 */ 172 160
+14 -9
fs/nfs/nfs4proc.c
··· 51 51 #include <linux/namei.h> 52 52 #include <linux/mount.h> 53 53 #include <linux/module.h> 54 - #include <linux/nfs_idmap.h> 55 54 #include <linux/xattr.h> 56 55 #include <linux/utsname.h> 57 56 #include <linux/freezer.h> ··· 62 63 #include "callback.h" 63 64 #include "pnfs.h" 64 65 #include "netns.h" 66 + #include "nfs4idmap.h" 65 67 #include "nfs4session.h" 66 68 #include "fscache.h" 67 69 ··· 185 185 | FATTR4_WORD1_SPACE_USED 186 186 | FATTR4_WORD1_TIME_ACCESS 187 187 | FATTR4_WORD1_TIME_METADATA 188 - | FATTR4_WORD1_TIME_MODIFY, 188 + | FATTR4_WORD1_TIME_MODIFY 189 + | FATTR4_WORD1_MOUNTED_ON_FILEID, 189 190 #ifdef CONFIG_NFS_V4_SECURITY_LABEL 190 191 FATTR4_WORD2_SECURITY_LABEL 191 192 #endif ··· 3096 3095 struct nfs_fsinfo *info, 3097 3096 bool auth_probe) 3098 3097 { 3099 - int status; 3098 + int status = 0; 3100 3099 3101 - switch (auth_probe) { 3102 - case false: 3100 + if (!auth_probe) 3103 3101 status = nfs4_lookup_root(server, fhandle, info); 3104 - if (status != -NFS4ERR_WRONGSEC) 3105 - break; 3106 - default: 3102 + 3103 + if (auth_probe || status == NFS4ERR_WRONGSEC) 3107 3104 status = nfs4_do_find_root_sec(server, fhandle, info); 3108 - } 3109 3105 3110 3106 if (status == 0) 3111 3107 status = nfs4_server_capabilities(server, fhandle); ··· 7942 7944 { 7943 7945 struct nfs4_getdeviceinfo_args args = { 7944 7946 .pdev = pdev, 7947 + .notify_types = NOTIFY_DEVICEID4_CHANGE | 7948 + NOTIFY_DEVICEID4_DELETE, 7945 7949 }; 7946 7950 struct nfs4_getdeviceinfo_res res = { 7947 7951 .pdev = pdev, ··· 7958 7958 7959 7959 dprintk("--> %s\n", __func__); 7960 7960 status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); 7961 + if (res.notification & ~args.notify_types) 7962 + dprintk("%s: unsupported notification\n", __func__); 7963 + if (res.notification != args.notify_types) 7964 + pdev->nocache = 1; 7965 + 7961 7966 dprintk("<-- %s status=%d\n", __func__, status); 7962 7967 7963 7968 return status;
+1 -1
fs/nfs/nfs4state.c
··· 42 42 #include <linux/slab.h> 43 43 #include <linux/fs.h> 44 44 #include <linux/nfs_fs.h> 45 - #include <linux/nfs_idmap.h> 46 45 #include <linux/kthread.h> 47 46 #include <linux/module.h> 48 47 #include <linux/random.h> ··· 56 57 #include "callback.h" 57 58 #include "delegation.h" 58 59 #include "internal.h" 60 + #include "nfs4idmap.h" 59 61 #include "nfs4session.h" 60 62 #include "pnfs.h" 61 63 #include "netns.h"
+4 -3
fs/nfs/nfs4super.c
··· 3 3 */ 4 4 #include <linux/init.h> 5 5 #include <linux/module.h> 6 - #include <linux/nfs_idmap.h> 7 6 #include <linux/nfs4_mount.h> 8 7 #include <linux/nfs_fs.h> 9 8 #include "delegation.h" 10 9 #include "internal.h" 11 10 #include "nfs4_fs.h" 11 + #include "nfs4idmap.h" 12 12 #include "dns_resolve.h" 13 13 #include "pnfs.h" 14 14 #include "nfs.h" ··· 91 91 { 92 92 truncate_inode_pages_final(&inode->i_data); 93 93 clear_inode(inode); 94 - pnfs_return_layout(inode); 95 - pnfs_destroy_layout(NFS_I(inode)); 96 94 /* If we are holding a delegation, return it! */ 97 95 nfs_inode_return_delegation_noreclaim(inode); 96 + /* Note that above delegreturn would trigger pnfs return-on-close */ 97 + pnfs_return_layout(inode); 98 + pnfs_destroy_layout(NFS_I(inode)); 98 99 /* First call standard NFS clear_inode() code */ 99 100 nfs_clear_inode(inode); 100 101 }
+1 -1
fs/nfs/nfs4sysctl.c
··· 6 6 * Copyright (c) 2006 Trond Myklebust <Trond.Myklebust@netapp.com> 7 7 */ 8 8 #include <linux/sysctl.h> 9 - #include <linux/nfs_idmap.h> 10 9 #include <linux/nfs_fs.h> 11 10 12 11 #include "nfs4_fs.h" 12 + #include "nfs4idmap.h" 13 13 #include "callback.h" 14 14 15 15 static const int nfs_set_port_min = 0;
+12 -10
fs/nfs/nfs4xdr.c
··· 52 52 #include <linux/nfs.h> 53 53 #include <linux/nfs4.h> 54 54 #include <linux/nfs_fs.h> 55 - #include <linux/nfs_idmap.h> 56 55 57 56 #include "nfs4_fs.h" 58 57 #include "internal.h" 58 + #include "nfs4idmap.h" 59 59 #include "nfs4session.h" 60 60 #include "pnfs.h" 61 61 #include "netns.h" ··· 1920 1920 1921 1921 p = reserve_space(xdr, 4 + 4); 1922 1922 *p++ = cpu_to_be32(1); /* bitmap length */ 1923 - *p++ = cpu_to_be32(NOTIFY_DEVICEID4_CHANGE | NOTIFY_DEVICEID4_DELETE); 1923 + *p++ = cpu_to_be32(args->notify_types); 1924 1924 } 1925 1925 1926 1926 static void ··· 5753 5753 5754 5754 #if defined(CONFIG_NFS_V4_1) 5755 5755 static int decode_getdeviceinfo(struct xdr_stream *xdr, 5756 - struct pnfs_device *pdev) 5756 + struct nfs4_getdeviceinfo_res *res) 5757 5757 { 5758 + struct pnfs_device *pdev = res->pdev; 5758 5759 __be32 *p; 5759 5760 uint32_t len, type; 5760 5761 int status; ··· 5803 5802 if (unlikely(!p)) 5804 5803 goto out_overflow; 5805 5804 5806 - if (be32_to_cpup(p++) & 5807 - ~(NOTIFY_DEVICEID4_CHANGE | NOTIFY_DEVICEID4_DELETE)) { 5808 - dprintk("%s: unsupported notification\n", 5809 - __func__); 5810 - } 5811 - 5805 + res->notification = be32_to_cpup(p++); 5812 5806 for (i = 1; i < len; i++) { 5813 5807 if (be32_to_cpup(p++)) { 5814 5808 dprintk("%s: unsupported notification\n", ··· 7057 7061 status = decode_sequence(xdr, &res->seq_res, rqstp); 7058 7062 if (status != 0) 7059 7063 goto out; 7060 - status = decode_getdeviceinfo(xdr, res->pdev); 7064 + status = decode_getdeviceinfo(xdr, res); 7061 7065 out: 7062 7066 return status; 7063 7067 } ··· 7361 7365 .p_name = #proc, \ 7362 7366 } 7363 7367 7368 + #define STUB(proc) \ 7369 + [NFSPROC4_CLNT_##proc] = { \ 7370 + .p_name = #proc, \ 7371 + } 7372 + 7364 7373 struct rpc_procinfo nfs4_procedures[] = { 7365 7374 PROC(READ, enc_read, dec_read), 7366 7375 PROC(WRITE, enc_write, dec_write), ··· 7418 7417 PROC(SECINFO_NO_NAME, enc_secinfo_no_name, dec_secinfo_no_name), 7419 7418 PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid), 7420 7419 PROC(FREE_STATEID, enc_free_stateid, dec_free_stateid), 7420 + STUB(GETDEVICELIST), 7421 7421 PROC(BIND_CONN_TO_SESSION, 7422 7422 enc_bind_conn_to_session, dec_bind_conn_to_session), 7423 7423 PROC(DESTROY_CLIENTID, enc_destroy_clientid, dec_destroy_clientid),
+3
fs/nfs/nfstrace.c
··· 7 7 8 8 #define CREATE_TRACE_POINTS 9 9 #include "nfstrace.h" 10 + 11 + EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_fsync_enter); 12 + EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_fsync_exit);
+3 -1
fs/nfs/objlayout/objio_osd.c
··· 57 57 58 58 dprintk("%s: free od=%p\n", __func__, de->od.od); 59 59 osduld_put_device(de->od.od); 60 - kfree(de); 60 + kfree_rcu(d, rcu); 61 61 } 62 62 63 63 struct objio_segment { ··· 636 636 .write_pagelist = objlayout_write_pagelist, 637 637 .pg_read_ops = &objio_pg_read_ops, 638 638 .pg_write_ops = &objio_pg_write_ops, 639 + 640 + .sync = pnfs_generic_sync, 639 641 640 642 .free_deviceid_node = objio_free_deviceid_node, 641 643
+24 -46
fs/nfs/pnfs.c
··· 1090 1090 pnfs_get_layout_hdr(lo); /* matched in pnfs_roc_release */ 1091 1091 spin_unlock(&ino->i_lock); 1092 1092 pnfs_free_lseg_list(&tmp_list); 1093 + pnfs_layoutcommit_inode(ino, true); 1093 1094 return true; 1094 1095 1095 1096 out_noroc: ··· 1105 1104 } 1106 1105 } 1107 1106 spin_unlock(&ino->i_lock); 1108 - if (layoutreturn) 1107 + if (layoutreturn) { 1108 + pnfs_layoutcommit_inode(ino, true); 1109 1109 pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true); 1110 + } 1110 1111 return false; 1111 1112 } 1112 1113 ··· 1844 1841 { 1845 1842 trace_nfs4_pnfs_write(hdr, hdr->pnfs_error); 1846 1843 if (!hdr->pnfs_error) { 1847 - pnfs_set_layoutcommit(hdr); 1844 + pnfs_set_layoutcommit(hdr->inode, hdr->lseg, 1845 + hdr->mds_offset + hdr->res.count); 1848 1846 hdr->mds_ops->rpc_call_done(&hdr->task, hdr); 1849 1847 } else 1850 1848 pnfs_ld_handle_write_error(hdr); ··· 1906 1902 pnfs_put_lseg(hdr->lseg); 1907 1903 nfs_pgio_header_free(hdr); 1908 1904 } 1909 - EXPORT_SYMBOL_GPL(pnfs_writehdr_free); 1910 1905 1911 1906 int 1912 1907 pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) ··· 2035 2032 pnfs_put_lseg(hdr->lseg); 2036 2033 nfs_pgio_header_free(hdr); 2037 2034 } 2038 - EXPORT_SYMBOL_GPL(pnfs_readhdr_free); 2039 2035 2040 2036 int 2041 2037 pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) ··· 2101 2099 EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); 2102 2100 2103 2101 void 2104 - pnfs_set_layoutcommit(struct nfs_pgio_header *hdr) 2102 + pnfs_set_layoutcommit(struct inode *inode, struct pnfs_layout_segment *lseg, 2103 + loff_t end_pos) 2105 2104 { 2106 - struct inode *inode = hdr->inode; 2107 2105 struct nfs_inode *nfsi = NFS_I(inode); 2108 - loff_t end_pos = hdr->mds_offset + hdr->res.count; 2109 2106 bool mark_as_dirty = false; 2110 2107 2111 2108 spin_lock(&inode->i_lock); 2112 2109 if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { 2110 + nfsi->layout->plh_lwb = end_pos; 2113 2111 mark_as_dirty = true; 2114 2112 dprintk("%s: Set layoutcommit for inode %lu ", 2115 2113 __func__, inode->i_ino); 2116 - } 2117 - if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &hdr->lseg->pls_flags)) { 2118 - /* references matched in nfs4_layoutcommit_release */ 2119 - pnfs_get_lseg(hdr->lseg); 2120 - } 2121 - if (end_pos > nfsi->layout->plh_lwb) 2114 + } else if (end_pos > nfsi->layout->plh_lwb) 2122 2115 nfsi->layout->plh_lwb = end_pos; 2116 + if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) { 2117 + /* references matched in nfs4_layoutcommit_release */ 2118 + pnfs_get_lseg(lseg); 2119 + } 2123 2120 spin_unlock(&inode->i_lock); 2124 2121 dprintk("%s: lseg %p end_pos %llu\n", 2125 - __func__, hdr->lseg, nfsi->layout->plh_lwb); 2122 + __func__, lseg, nfsi->layout->plh_lwb); 2126 2123 2127 2124 /* if pnfs_layoutcommit_inode() runs between inode locks, the next one 2128 2125 * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ ··· 2129 2128 mark_inode_dirty_sync(inode); 2130 2129 } 2131 2130 EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); 2132 - 2133 - void pnfs_commit_set_layoutcommit(struct nfs_commit_data *data) 2134 - { 2135 - struct inode *inode = data->inode; 2136 - struct nfs_inode *nfsi = NFS_I(inode); 2137 - bool mark_as_dirty = false; 2138 - 2139 - spin_lock(&inode->i_lock); 2140 - if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { 2141 - mark_as_dirty = true; 2142 - dprintk("%s: Set layoutcommit for inode %lu ", 2143 - __func__, inode->i_ino); 2144 - } 2145 - if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &data->lseg->pls_flags)) { 2146 - /* references matched in nfs4_layoutcommit_release */ 2147 - pnfs_get_lseg(data->lseg); 2148 - } 2149 - if (data->lwb > nfsi->layout->plh_lwb) 2150 - nfsi->layout->plh_lwb = data->lwb; 2151 - spin_unlock(&inode->i_lock); 2152 - dprintk("%s: lseg %p end_pos %llu\n", 2153 - __func__, data->lseg, nfsi->layout->plh_lwb); 2154 - 2155 - /* if pnfs_layoutcommit_inode() runs between inode locks, the next one 2156 - * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ 2157 - if (mark_as_dirty) 2158 - mark_inode_dirty_sync(inode); 2159 - } 2160 - EXPORT_SYMBOL_GPL(pnfs_commit_set_layoutcommit); 2161 2131 2162 2132 void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data) 2163 2133 { ··· 2188 2216 pnfs_list_write_lseg(inode, &data->lseg_list); 2189 2217 2190 2218 end_pos = nfsi->layout->plh_lwb; 2191 - nfsi->layout->plh_lwb = 0; 2192 2219 2193 2220 nfs4_stateid_copy(&data->args.stateid, &nfsi->layout->plh_stateid); 2194 2221 spin_unlock(&inode->i_lock); ··· 2204 2233 status = ld->prepare_layoutcommit(&data->args); 2205 2234 if (status) { 2206 2235 spin_lock(&inode->i_lock); 2207 - if (end_pos < nfsi->layout->plh_lwb) 2236 + set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags); 2237 + if (end_pos > nfsi->layout->plh_lwb) 2208 2238 nfsi->layout->plh_lwb = end_pos; 2209 2239 spin_unlock(&inode->i_lock); 2210 2240 put_rpccred(data->cred); 2211 - set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags); 2212 2241 goto clear_layoutcommitting; 2213 2242 } 2214 2243 } ··· 2228 2257 goto out; 2229 2258 } 2230 2259 EXPORT_SYMBOL_GPL(pnfs_layoutcommit_inode); 2260 + 2261 + int 2262 + pnfs_generic_sync(struct inode *inode, bool datasync) 2263 + { 2264 + return pnfs_layoutcommit_inode(inode, true); 2265 + } 2266 + EXPORT_SYMBOL_GPL(pnfs_generic_sync); 2231 2267 2232 2268 struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) 2233 2269 {
+22 -2
fs/nfs/pnfs.h
··· 155 155 int how, 156 156 struct nfs_commit_info *cinfo); 157 157 158 + int (*sync)(struct inode *inode, bool datasync); 159 + 158 160 /* 159 161 * Return PNFS_ATTEMPTED to indicate the layout code has attempted 160 162 * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS ··· 205 203 struct page **pages; 206 204 unsigned int pgbase; 207 205 unsigned int pglen; /* reply buffer length */ 206 + unsigned char nocache : 1;/* May not be cached */ 208 207 }; 209 208 210 209 #define NFS4_PNFS_GETDEVLIST_MAXNUM 16 ··· 266 263 void pnfs_roc_release(struct inode *ino); 267 264 void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); 268 265 bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task); 269 - void pnfs_set_layoutcommit(struct nfs_pgio_header *); 270 - void pnfs_commit_set_layoutcommit(struct nfs_commit_data *data); 266 + void pnfs_set_layoutcommit(struct inode *, struct pnfs_layout_segment *, loff_t); 271 267 void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); 272 268 int pnfs_layoutcommit_inode(struct inode *inode, bool sync); 269 + int pnfs_generic_sync(struct inode *inode, bool datasync); 270 + int pnfs_nfs_generic_sync(struct inode *inode, bool datasync); 273 271 int _pnfs_return_layout(struct inode *); 274 272 int pnfs_commit_and_return_layout(struct inode *); 275 273 void pnfs_ld_write_done(struct nfs_pgio_header *); ··· 295 291 enum { 296 292 NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */ 297 293 NFS_DEVICEID_UNAVAILABLE, /* device temporarily unavailable */ 294 + NFS_DEVICEID_NOCACHE, /* device may not be cached */ 298 295 }; 299 296 300 297 /* pnfs_dev.c */ ··· 307 302 unsigned long flags; 308 303 unsigned long timestamp_unavailable; 309 304 struct nfs4_deviceid deviceid; 305 + struct rcu_head rcu; 310 306 atomic_t ref; 311 307 }; 312 308 ··· 492 486 return NFS_SERVER(inode)->pnfs_curr_ld->flags & PNFS_READ_WHOLE_PAGE; 493 487 } 494 488 489 + static inline int 490 + pnfs_sync_inode(struct inode *inode, bool datasync) 491 + { 492 + if (!pnfs_enabled_sb(NFS_SERVER(inode))) 493 + return 0; 494 + return NFS_SERVER(inode)->pnfs_curr_ld->sync(inode, datasync); 495 + } 496 + 495 497 static inline bool 496 498 pnfs_layoutcommit_outstanding(struct inode *inode) 497 499 { ··· 580 566 pnfs_ld_read_whole_page(struct inode *inode) 581 567 { 582 568 return false; 569 + } 570 + 571 + static inline int 572 + pnfs_sync_inode(struct inode *inode, bool datasync) 573 + { 574 + return 0; 583 575 } 584 576 585 577 static inline bool
+13 -8
fs/nfs/pnfs_dev.c
··· 149 149 */ 150 150 d = server->pnfs_curr_ld->alloc_deviceid_node(server, pdev, 151 151 gfp_flags); 152 + if (d && pdev->nocache) 153 + set_bit(NFS_DEVICEID_NOCACHE, &d->flags); 152 154 153 155 out_free_pages: 154 156 for (i = 0; i < max_pages; i++) ··· 177 175 rcu_read_lock(); 178 176 d = _lookup_deviceid(server->pnfs_curr_ld, server->nfs_client, id, 179 177 hash); 180 - if (d != NULL) 181 - atomic_inc(&d->ref); 178 + if (d != NULL && !atomic_inc_not_zero(&d->ref)) 179 + d = NULL; 182 180 rcu_read_unlock(); 183 181 return d; 184 182 } ··· 237 235 return; 238 236 } 239 237 hlist_del_init_rcu(&d->node); 238 + clear_bit(NFS_DEVICEID_NOCACHE, &d->flags); 240 239 spin_unlock(&nfs4_deviceid_lock); 241 - synchronize_rcu(); 242 240 243 241 /* balance the initial ref set in pnfs_insert_deviceid */ 244 - if (atomic_dec_and_test(&d->ref)) 245 - d->ld->free_deviceid_node(d); 242 + nfs4_put_deviceid_node(d); 246 243 } 247 244 EXPORT_SYMBOL_GPL(nfs4_delete_deviceid); 248 245 ··· 272 271 bool 273 272 nfs4_put_deviceid_node(struct nfs4_deviceid_node *d) 274 273 { 274 + if (test_bit(NFS_DEVICEID_NOCACHE, &d->flags)) { 275 + if (atomic_add_unless(&d->ref, -1, 2)) 276 + return false; 277 + nfs4_delete_deviceid(d->ld, d->nfs_client, &d->deviceid); 278 + } 275 279 if (!atomic_dec_and_test(&d->ref)) 276 280 return false; 277 281 d->ld->free_deviceid_node(d); ··· 320 314 if (d->nfs_client == clp && atomic_read(&d->ref)) { 321 315 hlist_del_init_rcu(&d->node); 322 316 hlist_add_head(&d->tmpnode, &tmp); 317 + clear_bit(NFS_DEVICEID_NOCACHE, &d->flags); 323 318 } 324 319 rcu_read_unlock(); 325 320 spin_unlock(&nfs4_deviceid_lock); ··· 328 321 if (hlist_empty(&tmp)) 329 322 return; 330 323 331 - synchronize_rcu(); 332 324 while (!hlist_empty(&tmp)) { 333 325 d = hlist_entry(tmp.first, struct nfs4_deviceid_node, tmpnode); 334 326 hlist_del(&d->tmpnode); 335 - if (atomic_dec_and_test(&d->ref)) 336 - d->ld->free_deviceid_node(d); 327 + nfs4_put_deviceid_node(d); 337 328 } 338 329 } 339 330
+11 -1
fs/nfs/pnfs_nfs.c
··· 561 561 return(get_v3_ds_connect != NULL); 562 562 } 563 563 564 - void __exit nfs4_pnfs_v3_ds_connect_unload(void) 564 + void nfs4_pnfs_v3_ds_connect_unload(void) 565 565 { 566 566 if (get_v3_ds_connect) { 567 567 symbol_put(nfs3_set_ds_client); ··· 868 868 nfs_request_add_commit_list(req, list, cinfo); 869 869 } 870 870 EXPORT_SYMBOL_GPL(pnfs_layout_mark_request_commit); 871 + 872 + int 873 + pnfs_nfs_generic_sync(struct inode *inode, bool datasync) 874 + { 875 + if (datasync) 876 + return 0; 877 + return pnfs_layoutcommit_inode(inode, true); 878 + } 879 + EXPORT_SYMBOL_GPL(pnfs_nfs_generic_sync); 880 +
+1 -1
fs/nfs/read.c
··· 284 284 dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", 285 285 page, PAGE_CACHE_SIZE, page_file_index(page)); 286 286 nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); 287 - nfs_inc_stats(inode, NFSIOS_READPAGES); 287 + nfs_add_stats(inode, NFSIOS_READPAGES, 1); 288 288 289 289 /* 290 290 * Try to flush any pending writes to the file..
+1 -3
fs/nfs/super.c
··· 43 43 #include <linux/seq_file.h> 44 44 #include <linux/mount.h> 45 45 #include <linux/namei.h> 46 - #include <linux/nfs_idmap.h> 47 46 #include <linux/vfs.h> 48 47 #include <linux/inet.h> 49 48 #include <linux/in6.h> ··· 2192 2193 data->version != nfss->nfs_client->rpc_ops->version || 2193 2194 data->minorversion != nfss->nfs_client->cl_minorversion || 2194 2195 data->retrans != nfss->client->cl_timeout->to_retries || 2195 - data->selected_flavor != nfss->client->cl_auth->au_flavor || 2196 + !nfs_auth_info_match(&data->auth_info, nfss->client->cl_auth->au_flavor) || 2196 2197 data->acregmin != nfss->acregmin / HZ || 2197 2198 data->acregmax != nfss->acregmax / HZ || 2198 2199 data->acdirmin != nfss->acdirmin / HZ || ··· 2240 2241 data->wsize = nfss->wsize; 2241 2242 data->retrans = nfss->client->cl_timeout->to_retries; 2242 2243 data->selected_flavor = nfss->client->cl_auth->au_flavor; 2243 - data->auth_info = nfss->auth_info; 2244 2244 data->acregmin = nfss->acregmin / HZ; 2245 2245 data->acregmax = nfss->acregmax / HZ; 2246 2246 data->acdirmin = nfss->acdirmin / HZ;
+7 -8
fs/nfs/write.c
··· 580 580 int ret; 581 581 582 582 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); 583 - nfs_inc_stats(inode, NFSIOS_WRITEPAGES); 583 + nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); 584 584 585 585 nfs_pageio_cond_complete(pgio, page_file_index(page)); 586 586 ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE); ··· 1840 1840 */ 1841 1841 int nfs_wb_all(struct inode *inode) 1842 1842 { 1843 - struct writeback_control wbc = { 1844 - .sync_mode = WB_SYNC_ALL, 1845 - .nr_to_write = LONG_MAX, 1846 - .range_start = 0, 1847 - .range_end = LLONG_MAX, 1848 - }; 1849 1843 int ret; 1850 1844 1851 1845 trace_nfs_writeback_inode_enter(inode); 1852 1846 1853 - ret = sync_inode(inode, &wbc); 1847 + ret = filemap_write_and_wait(inode->i_mapping); 1848 + if (!ret) { 1849 + ret = nfs_commit_inode(inode, FLUSH_SYNC); 1850 + if (!ret) 1851 + pnfs_sync_inode(inode, true); 1852 + } 1854 1853 1855 1854 trace_nfs_writeback_inode_exit(inode, ret); 1856 1855 return ret;
+1
include/linux/nfs_fs.h
··· 511 511 * Try to write back everything synchronously (but check the 512 512 * return value!) 513 513 */ 514 + extern int nfs_sync_inode(struct inode *inode); 514 515 extern int nfs_wb_all(struct inode *inode); 515 516 extern int nfs_wb_page(struct inode *inode, struct page* page); 516 517 extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
+1 -12
include/linux/nfs_idmap.h fs/nfs/nfs4idmap.h
··· 1 1 /* 2 - * include/linux/nfs_idmap.h 2 + * fs/nfs/nfs4idmap.h 3 3 * 4 4 * UID and GID to name mapping for clients. 5 5 * ··· 46 46 struct nfs_fattr; 47 47 struct nfs4_string; 48 48 49 - #if IS_ENABLED(CONFIG_NFS_V4) 50 49 int nfs_idmap_init(void); 51 50 void nfs_idmap_quit(void); 52 - #else 53 - static inline int nfs_idmap_init(void) 54 - { 55 - return 0; 56 - } 57 - 58 - static inline void nfs_idmap_quit(void) 59 - {} 60 - #endif 61 - 62 51 int nfs_idmap_new(struct nfs_client *); 63 52 void nfs_idmap_delete(struct nfs_client *); 64 53
+6
include/linux/nfs_xdr.h
··· 255 255 struct nfs4_getdeviceinfo_args { 256 256 struct nfs4_sequence_args seq_args; 257 257 struct pnfs_device *pdev; 258 + __u32 notify_types; 258 259 }; 259 260 260 261 struct nfs4_getdeviceinfo_res { 261 262 struct nfs4_sequence_res seq_res; 262 263 struct pnfs_device *pdev; 264 + __u32 notification; 263 265 }; 264 266 265 267 struct nfs4_layoutcommit_args { ··· 1273 1271 nfs4_stateid falloc_stateid; 1274 1272 u64 falloc_offset; 1275 1273 u64 falloc_length; 1274 + const u32 *falloc_bitmask; 1276 1275 }; 1277 1276 1278 1277 struct nfs42_falloc_res { 1279 1278 struct nfs4_sequence_res seq_res; 1280 1279 unsigned int status; 1280 + 1281 + struct nfs_fattr *falloc_fattr; 1282 + const struct nfs_server *falloc_server; 1281 1283 }; 1282 1284 1283 1285 struct nfs42_seek_args {
+7 -1
include/linux/sunrpc/msg_prot.h
··· 142 142 (RPC_REPHDRSIZE + (2 + RPC_MAX_AUTH_SIZE/4)) 143 143 144 144 /* 145 - * RFC1833/RFC3530 rpcbind (v3+) well-known netid's. 145 + * Well-known netids. See: 146 + * 147 + * http://www.iana.org/assignments/rpc-netids/rpc-netids.xhtml 146 148 */ 147 149 #define RPCBIND_NETID_UDP "udp" 148 150 #define RPCBIND_NETID_TCP "tcp" 151 + #define RPCBIND_NETID_RDMA "rdma" 152 + #define RPCBIND_NETID_SCTP "sctp" 149 153 #define RPCBIND_NETID_UDP6 "udp6" 150 154 #define RPCBIND_NETID_TCP6 "tcp6" 155 + #define RPCBIND_NETID_RDMA6 "rdma6" 156 + #define RPCBIND_NETID_SCTP6 "sctp6" 151 157 #define RPCBIND_NETID_LOCAL "local" 152 158 153 159 /*
-5
include/linux/sunrpc/xprtrdma.h
··· 41 41 #define _LINUX_SUNRPC_XPRTRDMA_H 42 42 43 43 /* 44 - * rpcbind (v3+) RDMA netid. 45 - */ 46 - #define RPCBIND_NETID_RDMA "rdma" 47 - 48 - /* 49 44 * Constants. Max RPC/NFS header is big enough to account for 50 45 * additional marshaling buffers passed down by Linux client. 51 46 *
+1 -1
include/uapi/linux/nfs_idmap.h
··· 1 1 /* 2 - * include/linux/nfs_idmap.h 2 + * include/uapi/linux/nfs_idmap.h 3 3 * 4 4 * UID and GID to name mapping for clients. 5 5 *
+2 -2
net/sunrpc/sched.c
··· 89 89 if (!task->tk_timeout) 90 90 return; 91 91 92 - dprintk("RPC: %5u setting alarm for %lu ms\n", 93 - task->tk_pid, task->tk_timeout * 1000 / HZ); 92 + dprintk("RPC: %5u setting alarm for %u ms\n", 93 + task->tk_pid, jiffies_to_msecs(task->tk_timeout)); 94 94 95 95 task->u.tk_wait.expires = jiffies + task->tk_timeout; 96 96 if (list_empty(&queue->timer_list.list) || time_before(task->u.tk_wait.expires, queue->timer_list.expires))
+12 -10
net/sunrpc/xprt.c
··· 326 326 xprt_clear_locked(xprt); 327 327 } 328 328 329 + static void xprt_task_clear_bytes_sent(struct rpc_task *task) 330 + { 331 + if (task != NULL) { 332 + struct rpc_rqst *req = task->tk_rqstp; 333 + if (req != NULL) 334 + req->rq_bytes_sent = 0; 335 + } 336 + } 337 + 329 338 /** 330 339 * xprt_release_xprt - allow other requests to use a transport 331 340 * @xprt: transport with other tasks potentially waiting ··· 345 336 void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task) 346 337 { 347 338 if (xprt->snd_task == task) { 348 - if (task != NULL) { 349 - struct rpc_rqst *req = task->tk_rqstp; 350 - if (req != NULL) 351 - req->rq_bytes_sent = 0; 352 - } 339 + xprt_task_clear_bytes_sent(task); 353 340 xprt_clear_locked(xprt); 354 341 __xprt_lock_write_next(xprt); 355 342 } ··· 363 358 void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) 364 359 { 365 360 if (xprt->snd_task == task) { 366 - if (task != NULL) { 367 - struct rpc_rqst *req = task->tk_rqstp; 368 - if (req != NULL) 369 - req->rq_bytes_sent = 0; 370 - } 361 + xprt_task_clear_bytes_sent(task); 371 362 xprt_clear_locked(xprt); 372 363 __xprt_lock_write_next_cong(xprt); 373 364 } ··· 701 700 goto out; 702 701 if (xprt->snd_task != task) 703 702 goto out; 703 + xprt_task_clear_bytes_sent(task); 704 704 xprt->snd_task = cookie; 705 705 ret = true; 706 706 out:
+2 -1
net/sunrpc/xprtrdma/Makefile
··· 1 1 obj-$(CONFIG_SUNRPC_XPRT_RDMA_CLIENT) += xprtrdma.o 2 2 3 - xprtrdma-y := transport.o rpc_rdma.o verbs.o 3 + xprtrdma-y := transport.o rpc_rdma.o verbs.o \ 4 + fmr_ops.o frwr_ops.o physical_ops.o 4 5 5 6 obj-$(CONFIG_SUNRPC_XPRT_RDMA_SERVER) += svcrdma.o 6 7
+208
net/sunrpc/xprtrdma/fmr_ops.c
··· 1 + /* 2 + * Copyright (c) 2015 Oracle. All rights reserved. 3 + * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. 4 + */ 5 + 6 + /* Lightweight memory registration using Fast Memory Regions (FMR). 7 + * Referred to sometimes as MTHCAFMR mode. 8 + * 9 + * FMR uses synchronous memory registration and deregistration. 10 + * FMR registration is known to be fast, but FMR deregistration 11 + * can take tens of usecs to complete. 12 + */ 13 + 14 + #include "xprt_rdma.h" 15 + 16 + #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 17 + # define RPCDBG_FACILITY RPCDBG_TRANS 18 + #endif 19 + 20 + /* Maximum scatter/gather per FMR */ 21 + #define RPCRDMA_MAX_FMR_SGES (64) 22 + 23 + static int 24 + fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, 25 + struct rpcrdma_create_data_internal *cdata) 26 + { 27 + return 0; 28 + } 29 + 30 + /* FMR mode conveys up to 64 pages of payload per chunk segment. 31 + */ 32 + static size_t 33 + fmr_op_maxpages(struct rpcrdma_xprt *r_xprt) 34 + { 35 + return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, 36 + rpcrdma_max_segments(r_xprt) * RPCRDMA_MAX_FMR_SGES); 37 + } 38 + 39 + static int 40 + fmr_op_init(struct rpcrdma_xprt *r_xprt) 41 + { 42 + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 43 + int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ; 44 + struct ib_fmr_attr fmr_attr = { 45 + .max_pages = RPCRDMA_MAX_FMR_SGES, 46 + .max_maps = 1, 47 + .page_shift = PAGE_SHIFT 48 + }; 49 + struct ib_pd *pd = r_xprt->rx_ia.ri_pd; 50 + struct rpcrdma_mw *r; 51 + int i, rc; 52 + 53 + INIT_LIST_HEAD(&buf->rb_mws); 54 + INIT_LIST_HEAD(&buf->rb_all); 55 + 56 + i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; 57 + dprintk("RPC: %s: initializing %d FMRs\n", __func__, i); 58 + 59 + while (i--) { 60 + r = kzalloc(sizeof(*r), GFP_KERNEL); 61 + if (!r) 62 + return -ENOMEM; 63 + 64 + r->r.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr); 65 + if (IS_ERR(r->r.fmr)) 66 + goto out_fmr_err; 67 + 68 + list_add(&r->mw_list, &buf->rb_mws); 69 + list_add(&r->mw_all, &buf->rb_all); 70 + } 71 + return 0; 72 + 73 + out_fmr_err: 74 + rc = PTR_ERR(r->r.fmr); 75 + dprintk("RPC: %s: ib_alloc_fmr status %i\n", __func__, rc); 76 + kfree(r); 77 + return rc; 78 + } 79 + 80 + /* Use the ib_map_phys_fmr() verb to register a memory region 81 + * for remote access via RDMA READ or RDMA WRITE. 82 + */ 83 + static int 84 + fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, 85 + int nsegs, bool writing) 86 + { 87 + struct rpcrdma_ia *ia = &r_xprt->rx_ia; 88 + struct ib_device *device = ia->ri_id->device; 89 + enum dma_data_direction direction = rpcrdma_data_dir(writing); 90 + struct rpcrdma_mr_seg *seg1 = seg; 91 + struct rpcrdma_mw *mw = seg1->rl_mw; 92 + u64 physaddrs[RPCRDMA_MAX_DATA_SEGS]; 93 + int len, pageoff, i, rc; 94 + 95 + pageoff = offset_in_page(seg1->mr_offset); 96 + seg1->mr_offset -= pageoff; /* start of page */ 97 + seg1->mr_len += pageoff; 98 + len = -pageoff; 99 + if (nsegs > RPCRDMA_MAX_FMR_SGES) 100 + nsegs = RPCRDMA_MAX_FMR_SGES; 101 + for (i = 0; i < nsegs;) { 102 + rpcrdma_map_one(device, seg, direction); 103 + physaddrs[i] = seg->mr_dma; 104 + len += seg->mr_len; 105 + ++seg; 106 + ++i; 107 + /* Check for holes */ 108 + if ((i < nsegs && offset_in_page(seg->mr_offset)) || 109 + offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) 110 + break; 111 + } 112 + 113 + rc = ib_map_phys_fmr(mw->r.fmr, physaddrs, i, seg1->mr_dma); 114 + if (rc) 115 + goto out_maperr; 116 + 117 + seg1->mr_rkey = mw->r.fmr->rkey; 118 + seg1->mr_base = seg1->mr_dma + pageoff; 119 + seg1->mr_nsegs = i; 120 + seg1->mr_len = len; 121 + return i; 122 + 123 + out_maperr: 124 + dprintk("RPC: %s: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n", 125 + __func__, len, (unsigned long long)seg1->mr_dma, 126 + pageoff, i, rc); 127 + while (i--) 128 + rpcrdma_unmap_one(device, --seg); 129 + return rc; 130 + } 131 + 132 + /* Use the ib_unmap_fmr() verb to prevent further remote 133 + * access via RDMA READ or RDMA WRITE. 134 + */ 135 + static int 136 + fmr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) 137 + { 138 + struct rpcrdma_ia *ia = &r_xprt->rx_ia; 139 + struct rpcrdma_mr_seg *seg1 = seg; 140 + struct ib_device *device; 141 + int rc, nsegs = seg->mr_nsegs; 142 + LIST_HEAD(l); 143 + 144 + list_add(&seg1->rl_mw->r.fmr->list, &l); 145 + rc = ib_unmap_fmr(&l); 146 + read_lock(&ia->ri_qplock); 147 + device = ia->ri_id->device; 148 + while (seg1->mr_nsegs--) 149 + rpcrdma_unmap_one(device, seg++); 150 + read_unlock(&ia->ri_qplock); 151 + if (rc) 152 + goto out_err; 153 + return nsegs; 154 + 155 + out_err: 156 + dprintk("RPC: %s: ib_unmap_fmr status %i\n", __func__, rc); 157 + return nsegs; 158 + } 159 + 160 + /* After a disconnect, unmap all FMRs. 161 + * 162 + * This is invoked only in the transport connect worker in order 163 + * to serialize with rpcrdma_register_fmr_external(). 164 + */ 165 + static void 166 + fmr_op_reset(struct rpcrdma_xprt *r_xprt) 167 + { 168 + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 169 + struct rpcrdma_mw *r; 170 + LIST_HEAD(list); 171 + int rc; 172 + 173 + list_for_each_entry(r, &buf->rb_all, mw_all) 174 + list_add(&r->r.fmr->list, &list); 175 + 176 + rc = ib_unmap_fmr(&list); 177 + if (rc) 178 + dprintk("RPC: %s: ib_unmap_fmr failed %i\n", 179 + __func__, rc); 180 + } 181 + 182 + static void 183 + fmr_op_destroy(struct rpcrdma_buffer *buf) 184 + { 185 + struct rpcrdma_mw *r; 186 + int rc; 187 + 188 + while (!list_empty(&buf->rb_all)) { 189 + r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); 190 + list_del(&r->mw_all); 191 + rc = ib_dealloc_fmr(r->r.fmr); 192 + if (rc) 193 + dprintk("RPC: %s: ib_dealloc_fmr failed %i\n", 194 + __func__, rc); 195 + kfree(r); 196 + } 197 + } 198 + 199 + const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = { 200 + .ro_map = fmr_op_map, 201 + .ro_unmap = fmr_op_unmap, 202 + .ro_open = fmr_op_open, 203 + .ro_maxpages = fmr_op_maxpages, 204 + .ro_init = fmr_op_init, 205 + .ro_reset = fmr_op_reset, 206 + .ro_destroy = fmr_op_destroy, 207 + .ro_displayname = "fmr", 208 + };
+353
net/sunrpc/xprtrdma/frwr_ops.c
··· 1 + /* 2 + * Copyright (c) 2015 Oracle. All rights reserved. 3 + * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. 4 + */ 5 + 6 + /* Lightweight memory registration using Fast Registration Work 7 + * Requests (FRWR). Also referred to sometimes as FRMR mode. 8 + * 9 + * FRWR features ordered asynchronous registration and deregistration 10 + * of arbitrarily sized memory regions. This is the fastest and safest 11 + * but most complex memory registration mode. 12 + */ 13 + 14 + #include "xprt_rdma.h" 15 + 16 + #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 17 + # define RPCDBG_FACILITY RPCDBG_TRANS 18 + #endif 19 + 20 + static int 21 + __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device, 22 + unsigned int depth) 23 + { 24 + struct rpcrdma_frmr *f = &r->r.frmr; 25 + int rc; 26 + 27 + f->fr_mr = ib_alloc_fast_reg_mr(pd, depth); 28 + if (IS_ERR(f->fr_mr)) 29 + goto out_mr_err; 30 + f->fr_pgl = ib_alloc_fast_reg_page_list(device, depth); 31 + if (IS_ERR(f->fr_pgl)) 32 + goto out_list_err; 33 + return 0; 34 + 35 + out_mr_err: 36 + rc = PTR_ERR(f->fr_mr); 37 + dprintk("RPC: %s: ib_alloc_fast_reg_mr status %i\n", 38 + __func__, rc); 39 + return rc; 40 + 41 + out_list_err: 42 + rc = PTR_ERR(f->fr_pgl); 43 + dprintk("RPC: %s: ib_alloc_fast_reg_page_list status %i\n", 44 + __func__, rc); 45 + ib_dereg_mr(f->fr_mr); 46 + return rc; 47 + } 48 + 49 + static void 50 + __frwr_release(struct rpcrdma_mw *r) 51 + { 52 + int rc; 53 + 54 + rc = ib_dereg_mr(r->r.frmr.fr_mr); 55 + if (rc) 56 + dprintk("RPC: %s: ib_dereg_mr status %i\n", 57 + __func__, rc); 58 + ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); 59 + } 60 + 61 + static int 62 + frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, 63 + struct rpcrdma_create_data_internal *cdata) 64 + { 65 + struct ib_device_attr *devattr = &ia->ri_devattr; 66 + int depth, delta; 67 + 68 + ia->ri_max_frmr_depth = 69 + min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, 70 + devattr->max_fast_reg_page_list_len); 71 + dprintk("RPC: %s: device's max FR page list len = %u\n", 72 + __func__, ia->ri_max_frmr_depth); 73 + 74 + /* Add room for frmr register and invalidate WRs. 75 + * 1. FRMR reg WR for head 76 + * 2. FRMR invalidate WR for head 77 + * 3. N FRMR reg WRs for pagelist 78 + * 4. N FRMR invalidate WRs for pagelist 79 + * 5. FRMR reg WR for tail 80 + * 6. FRMR invalidate WR for tail 81 + * 7. The RDMA_SEND WR 82 + */ 83 + depth = 7; 84 + 85 + /* Calculate N if the device max FRMR depth is smaller than 86 + * RPCRDMA_MAX_DATA_SEGS. 87 + */ 88 + if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) { 89 + delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frmr_depth; 90 + do { 91 + depth += 2; /* FRMR reg + invalidate */ 92 + delta -= ia->ri_max_frmr_depth; 93 + } while (delta > 0); 94 + } 95 + 96 + ep->rep_attr.cap.max_send_wr *= depth; 97 + if (ep->rep_attr.cap.max_send_wr > devattr->max_qp_wr) { 98 + cdata->max_requests = devattr->max_qp_wr / depth; 99 + if (!cdata->max_requests) 100 + return -EINVAL; 101 + ep->rep_attr.cap.max_send_wr = cdata->max_requests * 102 + depth; 103 + } 104 + 105 + return 0; 106 + } 107 + 108 + /* FRWR mode conveys a list of pages per chunk segment. The 109 + * maximum length of that list is the FRWR page list depth. 110 + */ 111 + static size_t 112 + frwr_op_maxpages(struct rpcrdma_xprt *r_xprt) 113 + { 114 + struct rpcrdma_ia *ia = &r_xprt->rx_ia; 115 + 116 + return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, 117 + rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth); 118 + } 119 + 120 + /* If FAST_REG or LOCAL_INV failed, indicate the frmr needs to be reset. */ 121 + static void 122 + frwr_sendcompletion(struct ib_wc *wc) 123 + { 124 + struct rpcrdma_mw *r; 125 + 126 + if (likely(wc->status == IB_WC_SUCCESS)) 127 + return; 128 + 129 + /* WARNING: Only wr_id and status are reliable at this point */ 130 + r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; 131 + dprintk("RPC: %s: frmr %p (stale), status %d\n", 132 + __func__, r, wc->status); 133 + r->r.frmr.fr_state = FRMR_IS_STALE; 134 + } 135 + 136 + static int 137 + frwr_op_init(struct rpcrdma_xprt *r_xprt) 138 + { 139 + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 140 + struct ib_device *device = r_xprt->rx_ia.ri_id->device; 141 + unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth; 142 + struct ib_pd *pd = r_xprt->rx_ia.ri_pd; 143 + int i; 144 + 145 + INIT_LIST_HEAD(&buf->rb_mws); 146 + INIT_LIST_HEAD(&buf->rb_all); 147 + 148 + i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; 149 + dprintk("RPC: %s: initializing %d FRMRs\n", __func__, i); 150 + 151 + while (i--) { 152 + struct rpcrdma_mw *r; 153 + int rc; 154 + 155 + r = kzalloc(sizeof(*r), GFP_KERNEL); 156 + if (!r) 157 + return -ENOMEM; 158 + 159 + rc = __frwr_init(r, pd, device, depth); 160 + if (rc) { 161 + kfree(r); 162 + return rc; 163 + } 164 + 165 + list_add(&r->mw_list, &buf->rb_mws); 166 + list_add(&r->mw_all, &buf->rb_all); 167 + r->mw_sendcompletion = frwr_sendcompletion; 168 + } 169 + 170 + return 0; 171 + } 172 + 173 + /* Post a FAST_REG Work Request to register a memory region 174 + * for remote access via RDMA READ or RDMA WRITE. 175 + */ 176 + static int 177 + frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, 178 + int nsegs, bool writing) 179 + { 180 + struct rpcrdma_ia *ia = &r_xprt->rx_ia; 181 + struct ib_device *device = ia->ri_id->device; 182 + enum dma_data_direction direction = rpcrdma_data_dir(writing); 183 + struct rpcrdma_mr_seg *seg1 = seg; 184 + struct rpcrdma_mw *mw = seg1->rl_mw; 185 + struct rpcrdma_frmr *frmr = &mw->r.frmr; 186 + struct ib_mr *mr = frmr->fr_mr; 187 + struct ib_send_wr fastreg_wr, *bad_wr; 188 + u8 key; 189 + int len, pageoff; 190 + int i, rc; 191 + int seg_len; 192 + u64 pa; 193 + int page_no; 194 + 195 + pageoff = offset_in_page(seg1->mr_offset); 196 + seg1->mr_offset -= pageoff; /* start of page */ 197 + seg1->mr_len += pageoff; 198 + len = -pageoff; 199 + if (nsegs > ia->ri_max_frmr_depth) 200 + nsegs = ia->ri_max_frmr_depth; 201 + for (page_no = i = 0; i < nsegs;) { 202 + rpcrdma_map_one(device, seg, direction); 203 + pa = seg->mr_dma; 204 + for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) { 205 + frmr->fr_pgl->page_list[page_no++] = pa; 206 + pa += PAGE_SIZE; 207 + } 208 + len += seg->mr_len; 209 + ++seg; 210 + ++i; 211 + /* Check for holes */ 212 + if ((i < nsegs && offset_in_page(seg->mr_offset)) || 213 + offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) 214 + break; 215 + } 216 + dprintk("RPC: %s: Using frmr %p to map %d segments (%d bytes)\n", 217 + __func__, mw, i, len); 218 + 219 + frmr->fr_state = FRMR_IS_VALID; 220 + 221 + memset(&fastreg_wr, 0, sizeof(fastreg_wr)); 222 + fastreg_wr.wr_id = (unsigned long)(void *)mw; 223 + fastreg_wr.opcode = IB_WR_FAST_REG_MR; 224 + fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma + pageoff; 225 + fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl; 226 + fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; 227 + fastreg_wr.wr.fast_reg.page_list_len = page_no; 228 + fastreg_wr.wr.fast_reg.length = len; 229 + fastreg_wr.wr.fast_reg.access_flags = writing ? 230 + IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : 231 + IB_ACCESS_REMOTE_READ; 232 + key = (u8)(mr->rkey & 0x000000FF); 233 + ib_update_fast_reg_key(mr, ++key); 234 + fastreg_wr.wr.fast_reg.rkey = mr->rkey; 235 + 236 + DECR_CQCOUNT(&r_xprt->rx_ep); 237 + rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr); 238 + if (rc) 239 + goto out_senderr; 240 + 241 + seg1->mr_rkey = mr->rkey; 242 + seg1->mr_base = seg1->mr_dma + pageoff; 243 + seg1->mr_nsegs = i; 244 + seg1->mr_len = len; 245 + return i; 246 + 247 + out_senderr: 248 + dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc); 249 + ib_update_fast_reg_key(mr, --key); 250 + frmr->fr_state = FRMR_IS_INVALID; 251 + while (i--) 252 + rpcrdma_unmap_one(device, --seg); 253 + return rc; 254 + } 255 + 256 + /* Post a LOCAL_INV Work Request to prevent further remote access 257 + * via RDMA READ or RDMA WRITE. 258 + */ 259 + static int 260 + frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) 261 + { 262 + struct rpcrdma_mr_seg *seg1 = seg; 263 + struct rpcrdma_ia *ia = &r_xprt->rx_ia; 264 + struct ib_send_wr invalidate_wr, *bad_wr; 265 + int rc, nsegs = seg->mr_nsegs; 266 + struct ib_device *device; 267 + 268 + seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID; 269 + 270 + memset(&invalidate_wr, 0, sizeof(invalidate_wr)); 271 + invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw; 272 + invalidate_wr.opcode = IB_WR_LOCAL_INV; 273 + invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey; 274 + DECR_CQCOUNT(&r_xprt->rx_ep); 275 + 276 + read_lock(&ia->ri_qplock); 277 + device = ia->ri_id->device; 278 + while (seg1->mr_nsegs--) 279 + rpcrdma_unmap_one(device, seg++); 280 + rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); 281 + read_unlock(&ia->ri_qplock); 282 + if (rc) 283 + goto out_err; 284 + return nsegs; 285 + 286 + out_err: 287 + /* Force rpcrdma_buffer_get() to retry */ 288 + seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE; 289 + dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc); 290 + return nsegs; 291 + } 292 + 293 + /* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in 294 + * an unusable state. Find FRMRs in this state and dereg / reg 295 + * each. FRMRs that are VALID and attached to an rpcrdma_req are 296 + * also torn down. 297 + * 298 + * This gives all in-use FRMRs a fresh rkey and leaves them INVALID. 299 + * 300 + * This is invoked only in the transport connect worker in order 301 + * to serialize with rpcrdma_register_frmr_external(). 302 + */ 303 + static void 304 + frwr_op_reset(struct rpcrdma_xprt *r_xprt) 305 + { 306 + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 307 + struct ib_device *device = r_xprt->rx_ia.ri_id->device; 308 + unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth; 309 + struct ib_pd *pd = r_xprt->rx_ia.ri_pd; 310 + struct rpcrdma_mw *r; 311 + int rc; 312 + 313 + list_for_each_entry(r, &buf->rb_all, mw_all) { 314 + if (r->r.frmr.fr_state == FRMR_IS_INVALID) 315 + continue; 316 + 317 + __frwr_release(r); 318 + rc = __frwr_init(r, pd, device, depth); 319 + if (rc) { 320 + dprintk("RPC: %s: mw %p left %s\n", 321 + __func__, r, 322 + (r->r.frmr.fr_state == FRMR_IS_STALE ? 323 + "stale" : "valid")); 324 + continue; 325 + } 326 + 327 + r->r.frmr.fr_state = FRMR_IS_INVALID; 328 + } 329 + } 330 + 331 + static void 332 + frwr_op_destroy(struct rpcrdma_buffer *buf) 333 + { 334 + struct rpcrdma_mw *r; 335 + 336 + while (!list_empty(&buf->rb_all)) { 337 + r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); 338 + list_del(&r->mw_all); 339 + __frwr_release(r); 340 + kfree(r); 341 + } 342 + } 343 + 344 + const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { 345 + .ro_map = frwr_op_map, 346 + .ro_unmap = frwr_op_unmap, 347 + .ro_open = frwr_op_open, 348 + .ro_maxpages = frwr_op_maxpages, 349 + .ro_init = frwr_op_init, 350 + .ro_reset = frwr_op_reset, 351 + .ro_destroy = frwr_op_destroy, 352 + .ro_displayname = "frwr", 353 + };
+94
net/sunrpc/xprtrdma/physical_ops.c
··· 1 + /* 2 + * Copyright (c) 2015 Oracle. All rights reserved. 3 + * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. 4 + */ 5 + 6 + /* No-op chunk preparation. All client memory is pre-registered. 7 + * Sometimes referred to as ALLPHYSICAL mode. 8 + * 9 + * Physical registration is simple because all client memory is 10 + * pre-registered and never deregistered. This mode is good for 11 + * adapter bring up, but is considered not safe: the server is 12 + * trusted not to abuse its access to client memory not involved 13 + * in RDMA I/O. 14 + */ 15 + 16 + #include "xprt_rdma.h" 17 + 18 + #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 19 + # define RPCDBG_FACILITY RPCDBG_TRANS 20 + #endif 21 + 22 + static int 23 + physical_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, 24 + struct rpcrdma_create_data_internal *cdata) 25 + { 26 + return 0; 27 + } 28 + 29 + /* PHYSICAL memory registration conveys one page per chunk segment. 30 + */ 31 + static size_t 32 + physical_op_maxpages(struct rpcrdma_xprt *r_xprt) 33 + { 34 + return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, 35 + rpcrdma_max_segments(r_xprt)); 36 + } 37 + 38 + static int 39 + physical_op_init(struct rpcrdma_xprt *r_xprt) 40 + { 41 + return 0; 42 + } 43 + 44 + /* The client's physical memory is already exposed for 45 + * remote access via RDMA READ or RDMA WRITE. 46 + */ 47 + static int 48 + physical_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, 49 + int nsegs, bool writing) 50 + { 51 + struct rpcrdma_ia *ia = &r_xprt->rx_ia; 52 + 53 + rpcrdma_map_one(ia->ri_id->device, seg, 54 + rpcrdma_data_dir(writing)); 55 + seg->mr_rkey = ia->ri_bind_mem->rkey; 56 + seg->mr_base = seg->mr_dma; 57 + seg->mr_nsegs = 1; 58 + return 1; 59 + } 60 + 61 + /* Unmap a memory region, but leave it registered. 62 + */ 63 + static int 64 + physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) 65 + { 66 + struct rpcrdma_ia *ia = &r_xprt->rx_ia; 67 + 68 + read_lock(&ia->ri_qplock); 69 + rpcrdma_unmap_one(ia->ri_id->device, seg); 70 + read_unlock(&ia->ri_qplock); 71 + 72 + return 1; 73 + } 74 + 75 + static void 76 + physical_op_reset(struct rpcrdma_xprt *r_xprt) 77 + { 78 + } 79 + 80 + static void 81 + physical_op_destroy(struct rpcrdma_buffer *buf) 82 + { 83 + } 84 + 85 + const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = { 86 + .ro_map = physical_op_map, 87 + .ro_unmap = physical_op_unmap, 88 + .ro_open = physical_op_open, 89 + .ro_maxpages = physical_op_maxpages, 90 + .ro_init = physical_op_init, 91 + .ro_reset = physical_op_reset, 92 + .ro_destroy = physical_op_destroy, 93 + .ro_displayname = "physical", 94 + };
+42 -45
net/sunrpc/xprtrdma/rpc_rdma.c
··· 53 53 # define RPCDBG_FACILITY RPCDBG_TRANS 54 54 #endif 55 55 56 + enum rpcrdma_chunktype { 57 + rpcrdma_noch = 0, 58 + rpcrdma_readch, 59 + rpcrdma_areadch, 60 + rpcrdma_writech, 61 + rpcrdma_replych 62 + }; 63 + 56 64 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 57 65 static const char transfertypes[][12] = { 58 66 "pure inline", /* no chunks */ ··· 187 179 struct rpcrdma_write_array *warray = NULL; 188 180 struct rpcrdma_write_chunk *cur_wchunk = NULL; 189 181 __be32 *iptr = headerp->rm_body.rm_chunks; 182 + int (*map)(struct rpcrdma_xprt *, struct rpcrdma_mr_seg *, int, bool); 190 183 191 184 if (type == rpcrdma_readch || type == rpcrdma_areadch) { 192 185 /* a read chunk - server will RDMA Read our memory */ ··· 210 201 if (nsegs < 0) 211 202 return nsegs; 212 203 204 + map = r_xprt->rx_ia.ri_ops->ro_map; 213 205 do { 214 - n = rpcrdma_register_external(seg, nsegs, 215 - cur_wchunk != NULL, r_xprt); 206 + n = map(r_xprt, seg, nsegs, cur_wchunk != NULL); 216 207 if (n <= 0) 217 208 goto out; 218 209 if (cur_rchunk) { /* read */ ··· 284 275 return (unsigned char *)iptr - (unsigned char *)headerp; 285 276 286 277 out: 287 - if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_FRMR) { 288 - for (pos = 0; nchunks--;) 289 - pos += rpcrdma_deregister_external( 290 - &req->rl_segments[pos], r_xprt); 291 - } 278 + if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR) 279 + return n; 280 + 281 + for (pos = 0; nchunks--;) 282 + pos += r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, 283 + &req->rl_segments[pos]); 292 284 return n; 293 - } 294 - 295 - /* 296 - * Marshal chunks. This routine returns the header length 297 - * consumed by marshaling. 298 - * 299 - * Returns positive RPC/RDMA header size, or negative errno. 300 - */ 301 - 302 - ssize_t 303 - rpcrdma_marshal_chunks(struct rpc_rqst *rqst, ssize_t result) 304 - { 305 - struct rpcrdma_req *req = rpcr_to_rdmar(rqst); 306 - struct rpcrdma_msg *headerp = rdmab_to_msg(req->rl_rdmabuf); 307 - 308 - if (req->rl_rtype != rpcrdma_noch) 309 - result = rpcrdma_create_chunks(rqst, &rqst->rq_snd_buf, 310 - headerp, req->rl_rtype); 311 - else if (req->rl_wtype != rpcrdma_noch) 312 - result = rpcrdma_create_chunks(rqst, &rqst->rq_rcv_buf, 313 - headerp, req->rl_wtype); 314 - return result; 315 285 } 316 286 317 287 /* ··· 385 397 char *base; 386 398 size_t rpclen, padlen; 387 399 ssize_t hdrlen; 400 + enum rpcrdma_chunktype rtype, wtype; 388 401 struct rpcrdma_msg *headerp; 389 402 390 403 /* ··· 422 433 * into pages; otherwise use reply chunks. 423 434 */ 424 435 if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst)) 425 - req->rl_wtype = rpcrdma_noch; 436 + wtype = rpcrdma_noch; 426 437 else if (rqst->rq_rcv_buf.page_len == 0) 427 - req->rl_wtype = rpcrdma_replych; 438 + wtype = rpcrdma_replych; 428 439 else if (rqst->rq_rcv_buf.flags & XDRBUF_READ) 429 - req->rl_wtype = rpcrdma_writech; 440 + wtype = rpcrdma_writech; 430 441 else 431 - req->rl_wtype = rpcrdma_replych; 442 + wtype = rpcrdma_replych; 432 443 433 444 /* 434 445 * Chunks needed for arguments? ··· 445 456 * TBD check NFSv4 setacl 446 457 */ 447 458 if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst)) 448 - req->rl_rtype = rpcrdma_noch; 459 + rtype = rpcrdma_noch; 449 460 else if (rqst->rq_snd_buf.page_len == 0) 450 - req->rl_rtype = rpcrdma_areadch; 461 + rtype = rpcrdma_areadch; 451 462 else 452 - req->rl_rtype = rpcrdma_readch; 463 + rtype = rpcrdma_readch; 453 464 454 465 /* The following simplification is not true forever */ 455 - if (req->rl_rtype != rpcrdma_noch && req->rl_wtype == rpcrdma_replych) 456 - req->rl_wtype = rpcrdma_noch; 457 - if (req->rl_rtype != rpcrdma_noch && req->rl_wtype != rpcrdma_noch) { 466 + if (rtype != rpcrdma_noch && wtype == rpcrdma_replych) 467 + wtype = rpcrdma_noch; 468 + if (rtype != rpcrdma_noch && wtype != rpcrdma_noch) { 458 469 dprintk("RPC: %s: cannot marshal multiple chunk lists\n", 459 470 __func__); 460 471 return -EIO; ··· 468 479 * When padding is in use and applies to the transfer, insert 469 480 * it and change the message type. 470 481 */ 471 - if (req->rl_rtype == rpcrdma_noch) { 482 + if (rtype == rpcrdma_noch) { 472 483 473 484 padlen = rpcrdma_inline_pullup(rqst, 474 485 RPCRDMA_INLINE_PAD_VALUE(rqst)); ··· 483 494 headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero; 484 495 headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero; 485 496 hdrlen += 2 * sizeof(u32); /* extra words in padhdr */ 486 - if (req->rl_wtype != rpcrdma_noch) { 497 + if (wtype != rpcrdma_noch) { 487 498 dprintk("RPC: %s: invalid chunk list\n", 488 499 __func__); 489 500 return -EIO; ··· 504 515 * on receive. Therefore, we request a reply chunk 505 516 * for non-writes wherever feasible and efficient. 506 517 */ 507 - if (req->rl_wtype == rpcrdma_noch) 508 - req->rl_wtype = rpcrdma_replych; 518 + if (wtype == rpcrdma_noch) 519 + wtype = rpcrdma_replych; 509 520 } 510 521 } 511 522 512 - hdrlen = rpcrdma_marshal_chunks(rqst, hdrlen); 523 + if (rtype != rpcrdma_noch) { 524 + hdrlen = rpcrdma_create_chunks(rqst, &rqst->rq_snd_buf, 525 + headerp, rtype); 526 + wtype = rtype; /* simplify dprintk */ 527 + 528 + } else if (wtype != rpcrdma_noch) { 529 + hdrlen = rpcrdma_create_chunks(rqst, &rqst->rq_rcv_buf, 530 + headerp, wtype); 531 + } 513 532 if (hdrlen < 0) 514 533 return hdrlen; 515 534 516 535 dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd" 517 536 " headerp 0x%p base 0x%p lkey 0x%x\n", 518 - __func__, transfertypes[req->rl_wtype], hdrlen, rpclen, padlen, 537 + __func__, transfertypes[wtype], hdrlen, rpclen, padlen, 519 538 headerp, base, rdmab_lkey(req->rl_rdmabuf)); 520 539 521 540 /*
+45 -16
net/sunrpc/xprtrdma/transport.c
··· 157 157 static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */ 158 158 159 159 static void 160 + xprt_rdma_format_addresses4(struct rpc_xprt *xprt, struct sockaddr *sap) 161 + { 162 + struct sockaddr_in *sin = (struct sockaddr_in *)sap; 163 + char buf[20]; 164 + 165 + snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr)); 166 + xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); 167 + 168 + xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA; 169 + } 170 + 171 + static void 172 + xprt_rdma_format_addresses6(struct rpc_xprt *xprt, struct sockaddr *sap) 173 + { 174 + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; 175 + char buf[40]; 176 + 177 + snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr); 178 + xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); 179 + 180 + xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA6; 181 + } 182 + 183 + static void 160 184 xprt_rdma_format_addresses(struct rpc_xprt *xprt) 161 185 { 162 186 struct sockaddr *sap = (struct sockaddr *) 163 187 &rpcx_to_rdmad(xprt).addr; 164 - struct sockaddr_in *sin = (struct sockaddr_in *)sap; 165 - char buf[64]; 188 + char buf[128]; 189 + 190 + switch (sap->sa_family) { 191 + case AF_INET: 192 + xprt_rdma_format_addresses4(xprt, sap); 193 + break; 194 + case AF_INET6: 195 + xprt_rdma_format_addresses6(xprt, sap); 196 + break; 197 + default: 198 + pr_err("rpcrdma: Unrecognized address family\n"); 199 + return; 200 + } 166 201 167 202 (void)rpc_ntop(sap, buf, sizeof(buf)); 168 203 xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL); ··· 205 170 snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap)); 206 171 xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL); 207 172 208 - xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma"; 209 - 210 - snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr)); 211 - xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); 212 - 213 173 snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); 214 174 xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); 215 175 216 - /* netid */ 217 - xprt->address_strings[RPC_DISPLAY_NETID] = "rdma"; 176 + xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma"; 218 177 } 219 178 220 179 static void ··· 406 377 xprt_rdma_connect_worker); 407 378 408 379 xprt_rdma_format_addresses(xprt); 409 - xprt->max_payload = rpcrdma_max_payload(new_xprt); 380 + xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt); 381 + if (xprt->max_payload == 0) 382 + goto out4; 383 + xprt->max_payload <<= PAGE_SHIFT; 410 384 dprintk("RPC: %s: transport data payload maximum: %zu bytes\n", 411 385 __func__, xprt->max_payload); 412 386 ··· 584 552 585 553 for (i = 0; req->rl_nchunks;) { 586 554 --req->rl_nchunks; 587 - i += rpcrdma_deregister_external( 588 - &req->rl_segments[i], r_xprt); 555 + i += r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, 556 + &req->rl_segments[i]); 589 557 } 590 558 591 559 rpcrdma_buffer_put(req); ··· 611 579 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 612 580 int rc = 0; 613 581 614 - if (req->rl_niovs == 0) 615 - rc = rpcrdma_marshal_req(rqst); 616 - else if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_ALLPHYSICAL) 617 - rc = rpcrdma_marshal_chunks(rqst, 0); 582 + rc = rpcrdma_marshal_req(rqst); 618 583 if (rc < 0) 619 584 goto failed_marshal; 620 585
+57 -642
net/sunrpc/xprtrdma/verbs.c
··· 50 50 #include <linux/interrupt.h> 51 51 #include <linux/slab.h> 52 52 #include <linux/prefetch.h> 53 + #include <linux/sunrpc/addr.h> 53 54 #include <asm/bitops.h> 54 55 55 56 #include "xprt_rdma.h" ··· 62 61 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 63 62 # define RPCDBG_FACILITY RPCDBG_TRANS 64 63 #endif 65 - 66 - static void rpcrdma_reset_frmrs(struct rpcrdma_ia *); 67 - static void rpcrdma_reset_fmrs(struct rpcrdma_ia *); 68 64 69 65 /* 70 66 * internal functions ··· 186 188 "remote access error", 187 189 "remote operation error", 188 190 "transport retry counter exceeded", 189 - "RNR retrycounter exceeded", 191 + "RNR retry counter exceeded", 190 192 "local RDD violation error", 191 193 "remove invalid RD request", 192 194 "operation aborted", ··· 204 206 static void 205 207 rpcrdma_sendcq_process_wc(struct ib_wc *wc) 206 208 { 207 - if (likely(wc->status == IB_WC_SUCCESS)) 208 - return; 209 - 210 209 /* WARNING: Only wr_id and status are reliable at this point */ 211 - if (wc->wr_id == 0ULL) { 212 - if (wc->status != IB_WC_WR_FLUSH_ERR) 210 + if (wc->wr_id == RPCRDMA_IGNORE_COMPLETION) { 211 + if (wc->status != IB_WC_SUCCESS && 212 + wc->status != IB_WC_WR_FLUSH_ERR) 213 213 pr_err("RPC: %s: SEND: %s\n", 214 214 __func__, COMPLETION_MSG(wc->status)); 215 215 } else { 216 216 struct rpcrdma_mw *r; 217 217 218 218 r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; 219 - r->r.frmr.fr_state = FRMR_IS_STALE; 220 - pr_err("RPC: %s: frmr %p (stale): %s\n", 221 - __func__, r, COMPLETION_MSG(wc->status)); 219 + r->mw_sendcompletion(wc); 222 220 } 223 221 } 224 222 ··· 418 424 struct rpcrdma_ia *ia = &xprt->rx_ia; 419 425 struct rpcrdma_ep *ep = &xprt->rx_ep; 420 426 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 421 - struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr; 427 + struct sockaddr *sap = (struct sockaddr *)&ep->rep_remote_addr; 422 428 #endif 423 429 struct ib_qp_attr *attr = &ia->ri_qp_attr; 424 430 struct ib_qp_init_attr *iattr = &ia->ri_qp_init_attr; ··· 474 480 wake_up_all(&ep->rep_connect_wait); 475 481 /*FALLTHROUGH*/ 476 482 default: 477 - dprintk("RPC: %s: %pI4:%u (ep 0x%p): %s\n", 478 - __func__, &addr->sin_addr.s_addr, 479 - ntohs(addr->sin_port), ep, 483 + dprintk("RPC: %s: %pIS:%u (ep 0x%p): %s\n", 484 + __func__, sap, rpc_get_port(sap), ep, 480 485 CONNECTION_MSG(event->event)); 481 486 break; 482 487 } ··· 484 491 if (connstate == 1) { 485 492 int ird = attr->max_dest_rd_atomic; 486 493 int tird = ep->rep_remote_cma.responder_resources; 487 - printk(KERN_INFO "rpcrdma: connection to %pI4:%u " 488 - "on %s, memreg %d slots %d ird %d%s\n", 489 - &addr->sin_addr.s_addr, 490 - ntohs(addr->sin_port), 494 + 495 + pr_info("rpcrdma: connection to %pIS:%u on %s, memreg '%s', %d credits, %d responders%s\n", 496 + sap, rpc_get_port(sap), 491 497 ia->ri_id->device->name, 492 - ia->ri_memreg_strategy, 498 + ia->ri_ops->ro_displayname, 493 499 xprt->rx_buf.rb_max_requests, 494 500 ird, ird < 4 && ird < tird / 2 ? " (low!)" : ""); 495 501 } else if (connstate < 0) { 496 - printk(KERN_INFO "rpcrdma: connection to %pI4:%u closed (%d)\n", 497 - &addr->sin_addr.s_addr, 498 - ntohs(addr->sin_port), 499 - connstate); 502 + pr_info("rpcrdma: connection to %pIS:%u closed (%d)\n", 503 + sap, rpc_get_port(sap), connstate); 500 504 } 501 505 #endif 502 506 ··· 611 621 612 622 if (memreg == RPCRDMA_FRMR) { 613 623 /* Requires both frmr reg and local dma lkey */ 614 - if ((devattr->device_cap_flags & 624 + if (((devattr->device_cap_flags & 615 625 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) != 616 - (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) { 626 + (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) || 627 + (devattr->max_fast_reg_page_list_len == 0)) { 617 628 dprintk("RPC: %s: FRMR registration " 618 629 "not supported by HCA\n", __func__); 619 630 memreg = RPCRDMA_MTHCAFMR; 620 - } else { 621 - /* Mind the ia limit on FRMR page list depth */ 622 - ia->ri_max_frmr_depth = min_t(unsigned int, 623 - RPCRDMA_MAX_DATA_SEGS, 624 - devattr->max_fast_reg_page_list_len); 625 631 } 626 632 } 627 633 if (memreg == RPCRDMA_MTHCAFMR) { ··· 638 652 */ 639 653 switch (memreg) { 640 654 case RPCRDMA_FRMR: 655 + ia->ri_ops = &rpcrdma_frwr_memreg_ops; 641 656 break; 642 657 case RPCRDMA_ALLPHYSICAL: 658 + ia->ri_ops = &rpcrdma_physical_memreg_ops; 643 659 mem_priv = IB_ACCESS_LOCAL_WRITE | 644 660 IB_ACCESS_REMOTE_WRITE | 645 661 IB_ACCESS_REMOTE_READ; 646 662 goto register_setup; 647 663 case RPCRDMA_MTHCAFMR: 664 + ia->ri_ops = &rpcrdma_fmr_memreg_ops; 648 665 if (ia->ri_have_dma_lkey) 649 666 break; 650 667 mem_priv = IB_ACCESS_LOCAL_WRITE; ··· 667 678 rc = -ENOMEM; 668 679 goto out3; 669 680 } 670 - dprintk("RPC: %s: memory registration strategy is %d\n", 671 - __func__, memreg); 681 + dprintk("RPC: %s: memory registration strategy is '%s'\n", 682 + __func__, ia->ri_ops->ro_displayname); 672 683 673 684 /* Else will do memory reg/dereg for each chunk */ 674 685 ia->ri_memreg_strategy = memreg; ··· 732 743 733 744 ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall; 734 745 ep->rep_attr.qp_context = ep; 735 - /* send_cq and recv_cq initialized below */ 736 746 ep->rep_attr.srq = NULL; 737 747 ep->rep_attr.cap.max_send_wr = cdata->max_requests; 738 - switch (ia->ri_memreg_strategy) { 739 - case RPCRDMA_FRMR: { 740 - int depth = 7; 741 - 742 - /* Add room for frmr register and invalidate WRs. 743 - * 1. FRMR reg WR for head 744 - * 2. FRMR invalidate WR for head 745 - * 3. N FRMR reg WRs for pagelist 746 - * 4. N FRMR invalidate WRs for pagelist 747 - * 5. FRMR reg WR for tail 748 - * 6. FRMR invalidate WR for tail 749 - * 7. The RDMA_SEND WR 750 - */ 751 - 752 - /* Calculate N if the device max FRMR depth is smaller than 753 - * RPCRDMA_MAX_DATA_SEGS. 754 - */ 755 - if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) { 756 - int delta = RPCRDMA_MAX_DATA_SEGS - 757 - ia->ri_max_frmr_depth; 758 - 759 - do { 760 - depth += 2; /* FRMR reg + invalidate */ 761 - delta -= ia->ri_max_frmr_depth; 762 - } while (delta > 0); 763 - 764 - } 765 - ep->rep_attr.cap.max_send_wr *= depth; 766 - if (ep->rep_attr.cap.max_send_wr > devattr->max_qp_wr) { 767 - cdata->max_requests = devattr->max_qp_wr / depth; 768 - if (!cdata->max_requests) 769 - return -EINVAL; 770 - ep->rep_attr.cap.max_send_wr = cdata->max_requests * 771 - depth; 772 - } 773 - break; 774 - } 775 - default: 776 - break; 777 - } 748 + rc = ia->ri_ops->ro_open(ia, ep, cdata); 749 + if (rc) 750 + return rc; 778 751 ep->rep_attr.cap.max_recv_wr = cdata->max_requests; 779 752 ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2); 780 753 ep->rep_attr.cap.max_recv_sge = 1; ··· 895 944 rpcrdma_ep_disconnect(ep, ia); 896 945 rpcrdma_flush_cqs(ep); 897 946 898 - switch (ia->ri_memreg_strategy) { 899 - case RPCRDMA_FRMR: 900 - rpcrdma_reset_frmrs(ia); 901 - break; 902 - case RPCRDMA_MTHCAFMR: 903 - rpcrdma_reset_fmrs(ia); 904 - break; 905 - case RPCRDMA_ALLPHYSICAL: 906 - break; 907 - default: 908 - rc = -EIO; 909 - goto out; 910 - } 911 - 912 947 xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); 948 + ia->ri_ops->ro_reset(xprt); 949 + 913 950 id = rpcrdma_create_id(xprt, ia, 914 951 (struct sockaddr *)&xprt->rx_data.addr); 915 952 if (IS_ERR(id)) { ··· 1062 1123 return ERR_PTR(rc); 1063 1124 } 1064 1125 1065 - static int 1066 - rpcrdma_init_fmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf) 1067 - { 1068 - int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ; 1069 - struct ib_fmr_attr fmr_attr = { 1070 - .max_pages = RPCRDMA_MAX_DATA_SEGS, 1071 - .max_maps = 1, 1072 - .page_shift = PAGE_SHIFT 1073 - }; 1074 - struct rpcrdma_mw *r; 1075 - int i, rc; 1076 - 1077 - i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; 1078 - dprintk("RPC: %s: initializing %d FMRs\n", __func__, i); 1079 - 1080 - while (i--) { 1081 - r = kzalloc(sizeof(*r), GFP_KERNEL); 1082 - if (r == NULL) 1083 - return -ENOMEM; 1084 - 1085 - r->r.fmr = ib_alloc_fmr(ia->ri_pd, mr_access_flags, &fmr_attr); 1086 - if (IS_ERR(r->r.fmr)) { 1087 - rc = PTR_ERR(r->r.fmr); 1088 - dprintk("RPC: %s: ib_alloc_fmr failed %i\n", 1089 - __func__, rc); 1090 - goto out_free; 1091 - } 1092 - 1093 - list_add(&r->mw_list, &buf->rb_mws); 1094 - list_add(&r->mw_all, &buf->rb_all); 1095 - } 1096 - return 0; 1097 - 1098 - out_free: 1099 - kfree(r); 1100 - return rc; 1101 - } 1102 - 1103 - static int 1104 - rpcrdma_init_frmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf) 1105 - { 1106 - struct rpcrdma_frmr *f; 1107 - struct rpcrdma_mw *r; 1108 - int i, rc; 1109 - 1110 - i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; 1111 - dprintk("RPC: %s: initializing %d FRMRs\n", __func__, i); 1112 - 1113 - while (i--) { 1114 - r = kzalloc(sizeof(*r), GFP_KERNEL); 1115 - if (r == NULL) 1116 - return -ENOMEM; 1117 - f = &r->r.frmr; 1118 - 1119 - f->fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd, 1120 - ia->ri_max_frmr_depth); 1121 - if (IS_ERR(f->fr_mr)) { 1122 - rc = PTR_ERR(f->fr_mr); 1123 - dprintk("RPC: %s: ib_alloc_fast_reg_mr " 1124 - "failed %i\n", __func__, rc); 1125 - goto out_free; 1126 - } 1127 - 1128 - f->fr_pgl = ib_alloc_fast_reg_page_list(ia->ri_id->device, 1129 - ia->ri_max_frmr_depth); 1130 - if (IS_ERR(f->fr_pgl)) { 1131 - rc = PTR_ERR(f->fr_pgl); 1132 - dprintk("RPC: %s: ib_alloc_fast_reg_page_list " 1133 - "failed %i\n", __func__, rc); 1134 - 1135 - ib_dereg_mr(f->fr_mr); 1136 - goto out_free; 1137 - } 1138 - 1139 - list_add(&r->mw_list, &buf->rb_mws); 1140 - list_add(&r->mw_all, &buf->rb_all); 1141 - } 1142 - 1143 - return 0; 1144 - 1145 - out_free: 1146 - kfree(r); 1147 - return rc; 1148 - } 1149 - 1150 1126 int 1151 1127 rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) 1152 1128 { ··· 1098 1244 buf->rb_recv_bufs = (struct rpcrdma_rep **) p; 1099 1245 p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests]; 1100 1246 1101 - INIT_LIST_HEAD(&buf->rb_mws); 1102 - INIT_LIST_HEAD(&buf->rb_all); 1103 - switch (ia->ri_memreg_strategy) { 1104 - case RPCRDMA_FRMR: 1105 - rc = rpcrdma_init_frmrs(ia, buf); 1106 - if (rc) 1107 - goto out; 1108 - break; 1109 - case RPCRDMA_MTHCAFMR: 1110 - rc = rpcrdma_init_fmrs(ia, buf); 1111 - if (rc) 1112 - goto out; 1113 - break; 1114 - default: 1115 - break; 1116 - } 1247 + rc = ia->ri_ops->ro_init(r_xprt); 1248 + if (rc) 1249 + goto out; 1117 1250 1118 1251 for (i = 0; i < buf->rb_max_requests; i++) { 1119 1252 struct rpcrdma_req *req; ··· 1152 1311 kfree(req); 1153 1312 } 1154 1313 1155 - static void 1156 - rpcrdma_destroy_fmrs(struct rpcrdma_buffer *buf) 1157 - { 1158 - struct rpcrdma_mw *r; 1159 - int rc; 1160 - 1161 - while (!list_empty(&buf->rb_all)) { 1162 - r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); 1163 - list_del(&r->mw_all); 1164 - list_del(&r->mw_list); 1165 - 1166 - rc = ib_dealloc_fmr(r->r.fmr); 1167 - if (rc) 1168 - dprintk("RPC: %s: ib_dealloc_fmr failed %i\n", 1169 - __func__, rc); 1170 - 1171 - kfree(r); 1172 - } 1173 - } 1174 - 1175 - static void 1176 - rpcrdma_destroy_frmrs(struct rpcrdma_buffer *buf) 1177 - { 1178 - struct rpcrdma_mw *r; 1179 - int rc; 1180 - 1181 - while (!list_empty(&buf->rb_all)) { 1182 - r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); 1183 - list_del(&r->mw_all); 1184 - list_del(&r->mw_list); 1185 - 1186 - rc = ib_dereg_mr(r->r.frmr.fr_mr); 1187 - if (rc) 1188 - dprintk("RPC: %s: ib_dereg_mr failed %i\n", 1189 - __func__, rc); 1190 - ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); 1191 - 1192 - kfree(r); 1193 - } 1194 - } 1195 - 1196 1314 void 1197 1315 rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) 1198 1316 { ··· 1172 1372 rpcrdma_destroy_req(ia, buf->rb_send_bufs[i]); 1173 1373 } 1174 1374 1175 - switch (ia->ri_memreg_strategy) { 1176 - case RPCRDMA_FRMR: 1177 - rpcrdma_destroy_frmrs(buf); 1178 - break; 1179 - case RPCRDMA_MTHCAFMR: 1180 - rpcrdma_destroy_fmrs(buf); 1181 - break; 1182 - default: 1183 - break; 1184 - } 1375 + ia->ri_ops->ro_destroy(buf); 1185 1376 1186 1377 kfree(buf->rb_pool); 1187 - } 1188 - 1189 - /* After a disconnect, unmap all FMRs. 1190 - * 1191 - * This is invoked only in the transport connect worker in order 1192 - * to serialize with rpcrdma_register_fmr_external(). 1193 - */ 1194 - static void 1195 - rpcrdma_reset_fmrs(struct rpcrdma_ia *ia) 1196 - { 1197 - struct rpcrdma_xprt *r_xprt = 1198 - container_of(ia, struct rpcrdma_xprt, rx_ia); 1199 - struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1200 - struct list_head *pos; 1201 - struct rpcrdma_mw *r; 1202 - LIST_HEAD(l); 1203 - int rc; 1204 - 1205 - list_for_each(pos, &buf->rb_all) { 1206 - r = list_entry(pos, struct rpcrdma_mw, mw_all); 1207 - 1208 - INIT_LIST_HEAD(&l); 1209 - list_add(&r->r.fmr->list, &l); 1210 - rc = ib_unmap_fmr(&l); 1211 - if (rc) 1212 - dprintk("RPC: %s: ib_unmap_fmr failed %i\n", 1213 - __func__, rc); 1214 - } 1215 - } 1216 - 1217 - /* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in 1218 - * an unusable state. Find FRMRs in this state and dereg / reg 1219 - * each. FRMRs that are VALID and attached to an rpcrdma_req are 1220 - * also torn down. 1221 - * 1222 - * This gives all in-use FRMRs a fresh rkey and leaves them INVALID. 1223 - * 1224 - * This is invoked only in the transport connect worker in order 1225 - * to serialize with rpcrdma_register_frmr_external(). 1226 - */ 1227 - static void 1228 - rpcrdma_reset_frmrs(struct rpcrdma_ia *ia) 1229 - { 1230 - struct rpcrdma_xprt *r_xprt = 1231 - container_of(ia, struct rpcrdma_xprt, rx_ia); 1232 - struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1233 - struct list_head *pos; 1234 - struct rpcrdma_mw *r; 1235 - int rc; 1236 - 1237 - list_for_each(pos, &buf->rb_all) { 1238 - r = list_entry(pos, struct rpcrdma_mw, mw_all); 1239 - 1240 - if (r->r.frmr.fr_state == FRMR_IS_INVALID) 1241 - continue; 1242 - 1243 - rc = ib_dereg_mr(r->r.frmr.fr_mr); 1244 - if (rc) 1245 - dprintk("RPC: %s: ib_dereg_mr failed %i\n", 1246 - __func__, rc); 1247 - ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); 1248 - 1249 - r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd, 1250 - ia->ri_max_frmr_depth); 1251 - if (IS_ERR(r->r.frmr.fr_mr)) { 1252 - rc = PTR_ERR(r->r.frmr.fr_mr); 1253 - dprintk("RPC: %s: ib_alloc_fast_reg_mr" 1254 - " failed %i\n", __func__, rc); 1255 - continue; 1256 - } 1257 - r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list( 1258 - ia->ri_id->device, 1259 - ia->ri_max_frmr_depth); 1260 - if (IS_ERR(r->r.frmr.fr_pgl)) { 1261 - rc = PTR_ERR(r->r.frmr.fr_pgl); 1262 - dprintk("RPC: %s: " 1263 - "ib_alloc_fast_reg_page_list " 1264 - "failed %i\n", __func__, rc); 1265 - 1266 - ib_dereg_mr(r->r.frmr.fr_mr); 1267 - continue; 1268 - } 1269 - r->r.frmr.fr_state = FRMR_IS_INVALID; 1270 - } 1271 1378 } 1272 1379 1273 1380 /* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving ··· 1216 1509 } 1217 1510 } 1218 1511 1219 - /* rpcrdma_unmap_one() was already done by rpcrdma_deregister_frmr_external(). 1512 + /* rpcrdma_unmap_one() was already done during deregistration. 1220 1513 * Redo only the ib_post_send(). 1221 1514 */ 1222 1515 static void ··· 1436 1729 * Wrappers for internal-use kmalloc memory registration, used by buffer code. 1437 1730 */ 1438 1731 1732 + void 1733 + rpcrdma_mapping_error(struct rpcrdma_mr_seg *seg) 1734 + { 1735 + dprintk("RPC: map_one: offset %p iova %llx len %zu\n", 1736 + seg->mr_offset, 1737 + (unsigned long long)seg->mr_dma, seg->mr_dmalen); 1738 + } 1739 + 1439 1740 static int 1440 1741 rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len, 1441 1742 struct ib_mr **mrp, struct ib_sge *iov) ··· 1569 1854 } 1570 1855 1571 1856 /* 1572 - * Wrappers for chunk registration, shared by read/write chunk code. 1573 - */ 1574 - 1575 - static void 1576 - rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing) 1577 - { 1578 - seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; 1579 - seg->mr_dmalen = seg->mr_len; 1580 - if (seg->mr_page) 1581 - seg->mr_dma = ib_dma_map_page(ia->ri_id->device, 1582 - seg->mr_page, offset_in_page(seg->mr_offset), 1583 - seg->mr_dmalen, seg->mr_dir); 1584 - else 1585 - seg->mr_dma = ib_dma_map_single(ia->ri_id->device, 1586 - seg->mr_offset, 1587 - seg->mr_dmalen, seg->mr_dir); 1588 - if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) { 1589 - dprintk("RPC: %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n", 1590 - __func__, 1591 - (unsigned long long)seg->mr_dma, 1592 - seg->mr_offset, seg->mr_dmalen); 1593 - } 1594 - } 1595 - 1596 - static void 1597 - rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg) 1598 - { 1599 - if (seg->mr_page) 1600 - ib_dma_unmap_page(ia->ri_id->device, 1601 - seg->mr_dma, seg->mr_dmalen, seg->mr_dir); 1602 - else 1603 - ib_dma_unmap_single(ia->ri_id->device, 1604 - seg->mr_dma, seg->mr_dmalen, seg->mr_dir); 1605 - } 1606 - 1607 - static int 1608 - rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, 1609 - int *nsegs, int writing, struct rpcrdma_ia *ia, 1610 - struct rpcrdma_xprt *r_xprt) 1611 - { 1612 - struct rpcrdma_mr_seg *seg1 = seg; 1613 - struct rpcrdma_mw *mw = seg1->rl_mw; 1614 - struct rpcrdma_frmr *frmr = &mw->r.frmr; 1615 - struct ib_mr *mr = frmr->fr_mr; 1616 - struct ib_send_wr fastreg_wr, *bad_wr; 1617 - u8 key; 1618 - int len, pageoff; 1619 - int i, rc; 1620 - int seg_len; 1621 - u64 pa; 1622 - int page_no; 1623 - 1624 - pageoff = offset_in_page(seg1->mr_offset); 1625 - seg1->mr_offset -= pageoff; /* start of page */ 1626 - seg1->mr_len += pageoff; 1627 - len = -pageoff; 1628 - if (*nsegs > ia->ri_max_frmr_depth) 1629 - *nsegs = ia->ri_max_frmr_depth; 1630 - for (page_no = i = 0; i < *nsegs;) { 1631 - rpcrdma_map_one(ia, seg, writing); 1632 - pa = seg->mr_dma; 1633 - for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) { 1634 - frmr->fr_pgl->page_list[page_no++] = pa; 1635 - pa += PAGE_SIZE; 1636 - } 1637 - len += seg->mr_len; 1638 - ++seg; 1639 - ++i; 1640 - /* Check for holes */ 1641 - if ((i < *nsegs && offset_in_page(seg->mr_offset)) || 1642 - offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) 1643 - break; 1644 - } 1645 - dprintk("RPC: %s: Using frmr %p to map %d segments\n", 1646 - __func__, mw, i); 1647 - 1648 - frmr->fr_state = FRMR_IS_VALID; 1649 - 1650 - memset(&fastreg_wr, 0, sizeof(fastreg_wr)); 1651 - fastreg_wr.wr_id = (unsigned long)(void *)mw; 1652 - fastreg_wr.opcode = IB_WR_FAST_REG_MR; 1653 - fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma; 1654 - fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl; 1655 - fastreg_wr.wr.fast_reg.page_list_len = page_no; 1656 - fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; 1657 - fastreg_wr.wr.fast_reg.length = page_no << PAGE_SHIFT; 1658 - if (fastreg_wr.wr.fast_reg.length < len) { 1659 - rc = -EIO; 1660 - goto out_err; 1661 - } 1662 - 1663 - /* Bump the key */ 1664 - key = (u8)(mr->rkey & 0x000000FF); 1665 - ib_update_fast_reg_key(mr, ++key); 1666 - 1667 - fastreg_wr.wr.fast_reg.access_flags = (writing ? 1668 - IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : 1669 - IB_ACCESS_REMOTE_READ); 1670 - fastreg_wr.wr.fast_reg.rkey = mr->rkey; 1671 - DECR_CQCOUNT(&r_xprt->rx_ep); 1672 - 1673 - rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr); 1674 - if (rc) { 1675 - dprintk("RPC: %s: failed ib_post_send for register," 1676 - " status %i\n", __func__, rc); 1677 - ib_update_fast_reg_key(mr, --key); 1678 - goto out_err; 1679 - } else { 1680 - seg1->mr_rkey = mr->rkey; 1681 - seg1->mr_base = seg1->mr_dma + pageoff; 1682 - seg1->mr_nsegs = i; 1683 - seg1->mr_len = len; 1684 - } 1685 - *nsegs = i; 1686 - return 0; 1687 - out_err: 1688 - frmr->fr_state = FRMR_IS_INVALID; 1689 - while (i--) 1690 - rpcrdma_unmap_one(ia, --seg); 1691 - return rc; 1692 - } 1693 - 1694 - static int 1695 - rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg, 1696 - struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt) 1697 - { 1698 - struct rpcrdma_mr_seg *seg1 = seg; 1699 - struct ib_send_wr invalidate_wr, *bad_wr; 1700 - int rc; 1701 - 1702 - seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID; 1703 - 1704 - memset(&invalidate_wr, 0, sizeof invalidate_wr); 1705 - invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw; 1706 - invalidate_wr.opcode = IB_WR_LOCAL_INV; 1707 - invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey; 1708 - DECR_CQCOUNT(&r_xprt->rx_ep); 1709 - 1710 - read_lock(&ia->ri_qplock); 1711 - while (seg1->mr_nsegs--) 1712 - rpcrdma_unmap_one(ia, seg++); 1713 - rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); 1714 - read_unlock(&ia->ri_qplock); 1715 - if (rc) { 1716 - /* Force rpcrdma_buffer_get() to retry */ 1717 - seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE; 1718 - dprintk("RPC: %s: failed ib_post_send for invalidate," 1719 - " status %i\n", __func__, rc); 1720 - } 1721 - return rc; 1722 - } 1723 - 1724 - static int 1725 - rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg, 1726 - int *nsegs, int writing, struct rpcrdma_ia *ia) 1727 - { 1728 - struct rpcrdma_mr_seg *seg1 = seg; 1729 - u64 physaddrs[RPCRDMA_MAX_DATA_SEGS]; 1730 - int len, pageoff, i, rc; 1731 - 1732 - pageoff = offset_in_page(seg1->mr_offset); 1733 - seg1->mr_offset -= pageoff; /* start of page */ 1734 - seg1->mr_len += pageoff; 1735 - len = -pageoff; 1736 - if (*nsegs > RPCRDMA_MAX_DATA_SEGS) 1737 - *nsegs = RPCRDMA_MAX_DATA_SEGS; 1738 - for (i = 0; i < *nsegs;) { 1739 - rpcrdma_map_one(ia, seg, writing); 1740 - physaddrs[i] = seg->mr_dma; 1741 - len += seg->mr_len; 1742 - ++seg; 1743 - ++i; 1744 - /* Check for holes */ 1745 - if ((i < *nsegs && offset_in_page(seg->mr_offset)) || 1746 - offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) 1747 - break; 1748 - } 1749 - rc = ib_map_phys_fmr(seg1->rl_mw->r.fmr, physaddrs, i, seg1->mr_dma); 1750 - if (rc) { 1751 - dprintk("RPC: %s: failed ib_map_phys_fmr " 1752 - "%u@0x%llx+%i (%d)... status %i\n", __func__, 1753 - len, (unsigned long long)seg1->mr_dma, 1754 - pageoff, i, rc); 1755 - while (i--) 1756 - rpcrdma_unmap_one(ia, --seg); 1757 - } else { 1758 - seg1->mr_rkey = seg1->rl_mw->r.fmr->rkey; 1759 - seg1->mr_base = seg1->mr_dma + pageoff; 1760 - seg1->mr_nsegs = i; 1761 - seg1->mr_len = len; 1762 - } 1763 - *nsegs = i; 1764 - return rc; 1765 - } 1766 - 1767 - static int 1768 - rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg, 1769 - struct rpcrdma_ia *ia) 1770 - { 1771 - struct rpcrdma_mr_seg *seg1 = seg; 1772 - LIST_HEAD(l); 1773 - int rc; 1774 - 1775 - list_add(&seg1->rl_mw->r.fmr->list, &l); 1776 - rc = ib_unmap_fmr(&l); 1777 - read_lock(&ia->ri_qplock); 1778 - while (seg1->mr_nsegs--) 1779 - rpcrdma_unmap_one(ia, seg++); 1780 - read_unlock(&ia->ri_qplock); 1781 - if (rc) 1782 - dprintk("RPC: %s: failed ib_unmap_fmr," 1783 - " status %i\n", __func__, rc); 1784 - return rc; 1785 - } 1786 - 1787 - int 1788 - rpcrdma_register_external(struct rpcrdma_mr_seg *seg, 1789 - int nsegs, int writing, struct rpcrdma_xprt *r_xprt) 1790 - { 1791 - struct rpcrdma_ia *ia = &r_xprt->rx_ia; 1792 - int rc = 0; 1793 - 1794 - switch (ia->ri_memreg_strategy) { 1795 - 1796 - case RPCRDMA_ALLPHYSICAL: 1797 - rpcrdma_map_one(ia, seg, writing); 1798 - seg->mr_rkey = ia->ri_bind_mem->rkey; 1799 - seg->mr_base = seg->mr_dma; 1800 - seg->mr_nsegs = 1; 1801 - nsegs = 1; 1802 - break; 1803 - 1804 - /* Registration using frmr registration */ 1805 - case RPCRDMA_FRMR: 1806 - rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt); 1807 - break; 1808 - 1809 - /* Registration using fmr memory registration */ 1810 - case RPCRDMA_MTHCAFMR: 1811 - rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia); 1812 - break; 1813 - 1814 - default: 1815 - return -EIO; 1816 - } 1817 - if (rc) 1818 - return rc; 1819 - 1820 - return nsegs; 1821 - } 1822 - 1823 - int 1824 - rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, 1825 - struct rpcrdma_xprt *r_xprt) 1826 - { 1827 - struct rpcrdma_ia *ia = &r_xprt->rx_ia; 1828 - int nsegs = seg->mr_nsegs, rc; 1829 - 1830 - switch (ia->ri_memreg_strategy) { 1831 - 1832 - case RPCRDMA_ALLPHYSICAL: 1833 - read_lock(&ia->ri_qplock); 1834 - rpcrdma_unmap_one(ia, seg); 1835 - read_unlock(&ia->ri_qplock); 1836 - break; 1837 - 1838 - case RPCRDMA_FRMR: 1839 - rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt); 1840 - break; 1841 - 1842 - case RPCRDMA_MTHCAFMR: 1843 - rc = rpcrdma_deregister_fmr_external(seg, ia); 1844 - break; 1845 - 1846 - default: 1847 - break; 1848 - } 1849 - return nsegs; 1850 - } 1851 - 1852 - /* 1853 1857 * Prepost any receive buffer, then post send. 1854 1858 * 1855 1859 * Receive buffer is donated to hardware, reclaimed upon recv completion. ··· 1590 2156 } 1591 2157 1592 2158 send_wr.next = NULL; 1593 - send_wr.wr_id = 0ULL; /* no send cookie */ 2159 + send_wr.wr_id = RPCRDMA_IGNORE_COMPLETION; 1594 2160 send_wr.sg_list = req->rl_send_iov; 1595 2161 send_wr.num_sge = req->rl_niovs; 1596 2162 send_wr.opcode = IB_WR_SEND; ··· 1649 2215 return rc; 1650 2216 } 1651 2217 1652 - /* Physical mapping means one Read/Write list entry per-page. 1653 - * All list entries must fit within an inline buffer 1654 - * 1655 - * NB: The server must return a Write list for NFS READ, 1656 - * which has the same constraint. Factor in the inline 1657 - * rsize as well. 2218 + /* How many chunk list items fit within our inline buffers? 1658 2219 */ 1659 - static size_t 1660 - rpcrdma_physical_max_payload(struct rpcrdma_xprt *r_xprt) 2220 + unsigned int 2221 + rpcrdma_max_segments(struct rpcrdma_xprt *r_xprt) 1661 2222 { 1662 2223 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; 1663 - unsigned int inline_size, pages; 2224 + int bytes, segments; 1664 2225 1665 - inline_size = min_t(unsigned int, 1666 - cdata->inline_wsize, cdata->inline_rsize); 1667 - inline_size -= RPCRDMA_HDRLEN_MIN; 1668 - pages = inline_size / sizeof(struct rpcrdma_segment); 1669 - return pages << PAGE_SHIFT; 1670 - } 1671 - 1672 - static size_t 1673 - rpcrdma_mr_max_payload(struct rpcrdma_xprt *r_xprt) 1674 - { 1675 - return RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT; 1676 - } 1677 - 1678 - size_t 1679 - rpcrdma_max_payload(struct rpcrdma_xprt *r_xprt) 1680 - { 1681 - size_t result; 1682 - 1683 - switch (r_xprt->rx_ia.ri_memreg_strategy) { 1684 - case RPCRDMA_ALLPHYSICAL: 1685 - result = rpcrdma_physical_max_payload(r_xprt); 1686 - break; 1687 - default: 1688 - result = rpcrdma_mr_max_payload(r_xprt); 2226 + bytes = min_t(unsigned int, cdata->inline_wsize, cdata->inline_rsize); 2227 + bytes -= RPCRDMA_HDRLEN_MIN; 2228 + if (bytes < sizeof(struct rpcrdma_segment) * 2) { 2229 + pr_warn("RPC: %s: inline threshold too small\n", 2230 + __func__); 2231 + return 0; 1689 2232 } 1690 - return result; 2233 + 2234 + segments = 1 << (fls(bytes / sizeof(struct rpcrdma_segment)) - 1); 2235 + dprintk("RPC: %s: max chunk list size = %d segments\n", 2236 + __func__, segments); 2237 + return segments; 1691 2238 }
+74 -16
net/sunrpc/xprtrdma/xprt_rdma.h
··· 60 60 * Interface Adapter -- one per transport instance 61 61 */ 62 62 struct rpcrdma_ia { 63 + const struct rpcrdma_memreg_ops *ri_ops; 63 64 rwlock_t ri_qplock; 64 65 struct rdma_cm_id *ri_id; 65 66 struct ib_pd *ri_pd; ··· 106 105 #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) 107 106 #define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) 108 107 108 + /* Force completion handler to ignore the signal 109 + */ 110 + #define RPCRDMA_IGNORE_COMPLETION (0ULL) 111 + 109 112 /* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV 110 113 * 111 114 * The below structure appears at the front of a large region of kmalloc'd ··· 147 142 { 148 143 return (struct rpcrdma_msg *)rb->rg_base; 149 144 } 150 - 151 - enum rpcrdma_chunktype { 152 - rpcrdma_noch = 0, 153 - rpcrdma_readch, 154 - rpcrdma_areadch, 155 - rpcrdma_writech, 156 - rpcrdma_replych 157 - }; 158 145 159 146 /* 160 147 * struct rpcrdma_rep -- this structure encapsulates state required to recv ··· 210 213 struct ib_fmr *fmr; 211 214 struct rpcrdma_frmr frmr; 212 215 } r; 216 + void (*mw_sendcompletion)(struct ib_wc *); 213 217 struct list_head mw_list; 214 218 struct list_head mw_all; 215 219 }; ··· 256 258 unsigned int rl_niovs; /* 0, 2 or 4 */ 257 259 unsigned int rl_nchunks; /* non-zero if chunks */ 258 260 unsigned int rl_connect_cookie; /* retry detection */ 259 - enum rpcrdma_chunktype rl_rtype, rl_wtype; 260 261 struct rpcrdma_buffer *rl_buffer; /* home base for this structure */ 261 262 struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ 262 263 struct ib_sge rl_send_iov[4]; /* for active requests */ ··· 337 340 }; 338 341 339 342 /* 343 + * Per-registration mode operations 344 + */ 345 + struct rpcrdma_xprt; 346 + struct rpcrdma_memreg_ops { 347 + int (*ro_map)(struct rpcrdma_xprt *, 348 + struct rpcrdma_mr_seg *, int, bool); 349 + int (*ro_unmap)(struct rpcrdma_xprt *, 350 + struct rpcrdma_mr_seg *); 351 + int (*ro_open)(struct rpcrdma_ia *, 352 + struct rpcrdma_ep *, 353 + struct rpcrdma_create_data_internal *); 354 + size_t (*ro_maxpages)(struct rpcrdma_xprt *); 355 + int (*ro_init)(struct rpcrdma_xprt *); 356 + void (*ro_reset)(struct rpcrdma_xprt *); 357 + void (*ro_destroy)(struct rpcrdma_buffer *); 358 + const char *ro_displayname; 359 + }; 360 + 361 + extern const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops; 362 + extern const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops; 363 + extern const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops; 364 + 365 + /* 340 366 * RPCRDMA transport -- encapsulates the structures above for 341 367 * integration with RPC. 342 368 * ··· 418 398 void rpcrdma_recv_buffer_get(struct rpcrdma_req *); 419 399 void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); 420 400 421 - int rpcrdma_register_external(struct rpcrdma_mr_seg *, 422 - int, int, struct rpcrdma_xprt *); 423 - int rpcrdma_deregister_external(struct rpcrdma_mr_seg *, 424 - struct rpcrdma_xprt *); 425 - 426 401 struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *, 427 402 size_t, gfp_t); 428 403 void rpcrdma_free_regbuf(struct rpcrdma_ia *, 429 404 struct rpcrdma_regbuf *); 405 + 406 + unsigned int rpcrdma_max_segments(struct rpcrdma_xprt *); 407 + 408 + /* 409 + * Wrappers for chunk registration, shared by read/write chunk code. 410 + */ 411 + 412 + void rpcrdma_mapping_error(struct rpcrdma_mr_seg *); 413 + 414 + static inline enum dma_data_direction 415 + rpcrdma_data_dir(bool writing) 416 + { 417 + return writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; 418 + } 419 + 420 + static inline void 421 + rpcrdma_map_one(struct ib_device *device, struct rpcrdma_mr_seg *seg, 422 + enum dma_data_direction direction) 423 + { 424 + seg->mr_dir = direction; 425 + seg->mr_dmalen = seg->mr_len; 426 + 427 + if (seg->mr_page) 428 + seg->mr_dma = ib_dma_map_page(device, 429 + seg->mr_page, offset_in_page(seg->mr_offset), 430 + seg->mr_dmalen, seg->mr_dir); 431 + else 432 + seg->mr_dma = ib_dma_map_single(device, 433 + seg->mr_offset, 434 + seg->mr_dmalen, seg->mr_dir); 435 + 436 + if (ib_dma_mapping_error(device, seg->mr_dma)) 437 + rpcrdma_mapping_error(seg); 438 + } 439 + 440 + static inline void 441 + rpcrdma_unmap_one(struct ib_device *device, struct rpcrdma_mr_seg *seg) 442 + { 443 + if (seg->mr_page) 444 + ib_dma_unmap_page(device, 445 + seg->mr_dma, seg->mr_dmalen, seg->mr_dir); 446 + else 447 + ib_dma_unmap_single(device, 448 + seg->mr_dma, seg->mr_dmalen, seg->mr_dir); 449 + } 430 450 431 451 /* 432 452 * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c ··· 478 418 /* 479 419 * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c 480 420 */ 481 - ssize_t rpcrdma_marshal_chunks(struct rpc_rqst *, ssize_t); 482 421 int rpcrdma_marshal_req(struct rpc_rqst *); 483 - size_t rpcrdma_max_payload(struct rpcrdma_xprt *); 484 422 485 423 /* Temporary NFS request map cache. Created in svc_rdma.c */ 486 424 extern struct kmem_cache *svc_rdma_map_cachep;