Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'nfsd-5.7' of git://git.linux-nfs.org/projects/cel/cel-2.6

Pull nfsd updates from Chuck Lever:

- Fix EXCHANGE_ID response when NFSD runs in a container

- A battery of new static trace points

- Socket transports now use bio_vec to send Replies

- NFS/RDMA now supports filesystems with no .splice_read method

- Favor memcpy() over DMA mapping for small RPC/RDMA Replies

- Add pre-requisites for supporting multiple Write chunks

- Numerous minor fixes and clean-ups

[ Chuck is filling in for Bruce this time while he and his family settle
into a new house ]

* tag 'nfsd-5.7' of git://git.linux-nfs.org/projects/cel/cel-2.6: (39 commits)
svcrdma: Fix leak of transport addresses
SUNRPC: Fix a potential buffer overflow in 'svc_print_xprts()'
SUNRPC/cache: don't allow invalid entries to be flushed
nfsd: fsnotify on rmdir under nfsd/clients/
nfsd4: kill warnings on testing stateids with mismatched clientids
nfsd: remove read permission bit for ctl sysctl
NFSD: Fix NFS server build errors
sunrpc: Add tracing for cache events
SUNRPC/cache: Allow garbage collection of invalid cache entries
nfsd: export upcalls must not return ESTALE when mountd is down
nfsd: Add tracepoints for update of the expkey and export cache entries
nfsd: Add tracepoints for exp_find_key() and exp_get_by_name()
nfsd: Add tracing to nfsd_set_fh_dentry()
nfsd: Don't add locks to closed or closing open stateids
SUNRPC: Teach server to use xprt_sock_sendmsg for socket sends
SUNRPC: Refactor xs_sendpages()
svcrdma: Avoid DMA mapping small RPC Replies
svcrdma: Fix double sync of transport header buffer
svcrdma: Refactor chunk list encoders
SUNRPC: Add encoders for list item discriminators
...

+1420 -922
+6 -5
fs/nfs/dns_resolve.c
··· 152 152 struct cache_head *ch) 153 153 { 154 154 struct nfs_dns_ent *key = container_of(ch, struct nfs_dns_ent, h); 155 - int ret; 156 155 157 - ret = nfs_cache_upcall(cd, key->hostname); 158 - if (ret) 159 - ret = sunrpc_cache_pipe_upcall(cd, ch); 160 - return ret; 156 + if (test_and_set_bit(CACHE_PENDING, &ch->flags)) 157 + return 0; 158 + if (!nfs_cache_upcall(cd, key->hostname)) 159 + return 0; 160 + clear_bit(CACHE_PENDING, &ch->flags); 161 + return sunrpc_cache_pipe_upcall_timeout(cd, ch); 161 162 } 162 163 163 164 static int nfs_dns_match(struct cache_head *ca,
+1 -1
fs/nfsd/Kconfig
··· 136 136 137 137 config NFSD_V4_2_INTER_SSC 138 138 bool "NFSv4.2 inter server to server COPY" 139 - depends on NFSD_V4 && NFS_V4_1 && NFS_V4_2 139 + depends on NFSD_V4 && NFS_V4_1 && NFS_V4_2 && NFS_FS=y 140 140 help 141 141 This option enables support for NFSv4.2 inter server to 142 142 server copy where the destination server calls the NFSv4.2
+34 -11
fs/nfsd/export.c
··· 23 23 #include "netns.h" 24 24 #include "pnfs.h" 25 25 #include "filecache.h" 26 + #include "trace.h" 26 27 27 28 #define NFSDDBG_FACILITY NFSDDBG_EXPORT 28 29 ··· 49 48 path_put(&key->ek_path); 50 49 auth_domain_put(key->ek_client); 51 50 kfree_rcu(key, ek_rcu); 51 + } 52 + 53 + static int expkey_upcall(struct cache_detail *cd, struct cache_head *h) 54 + { 55 + return sunrpc_cache_pipe_upcall(cd, h); 52 56 } 53 57 54 58 static void expkey_request(struct cache_detail *cd, ··· 146 140 if (len == 0) { 147 141 set_bit(CACHE_NEGATIVE, &key.h.flags); 148 142 ek = svc_expkey_update(cd, &key, ek); 149 - if (!ek) 143 + if (ek) 144 + trace_nfsd_expkey_update(ek, NULL); 145 + else 150 146 err = -ENOMEM; 151 147 } else { 152 148 err = kern_path(buf, 0, &key.ek_path); ··· 158 150 dprintk("Found the path %s\n", buf); 159 151 160 152 ek = svc_expkey_update(cd, &key, ek); 161 - if (!ek) 153 + if (ek) 154 + trace_nfsd_expkey_update(ek, buf); 155 + else 162 156 err = -ENOMEM; 163 157 path_put(&key.ek_path); 164 158 } ··· 259 249 .hash_size = EXPKEY_HASHMAX, 260 250 .name = "nfsd.fh", 261 251 .cache_put = expkey_put, 252 + .cache_upcall = expkey_upcall, 262 253 .cache_request = expkey_request, 263 254 .cache_parse = expkey_parse, 264 255 .cache_show = expkey_show, ··· 339 328 nfsd4_fslocs_free(&exp->ex_fslocs); 340 329 kfree(exp->ex_uuid); 341 330 kfree_rcu(exp, ex_rcu); 331 + } 332 + 333 + static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h) 334 + { 335 + return sunrpc_cache_pipe_upcall(cd, h); 342 336 } 343 337 344 338 static void svc_export_request(struct cache_detail *cd, ··· 659 643 } 660 644 661 645 expp = svc_export_lookup(&exp); 662 - if (expp) 663 - expp = svc_export_update(&exp, expp); 664 - else 646 + if (!expp) { 665 647 err = -ENOMEM; 666 - cache_flush(); 667 - if (expp == NULL) 668 - err = -ENOMEM; 669 - else 648 + goto out4; 649 + } 650 + expp = svc_export_update(&exp, expp); 651 + if (expp) { 652 + trace_nfsd_export_update(expp); 653 + cache_flush(); 670 654 exp_put(expp); 655 + } else 656 + err = -ENOMEM; 671 657 out4: 672 658 nfsd4_fslocs_free(&exp.ex_fslocs); 673 659 kfree(exp.ex_uuid); ··· 785 767 .hash_size = EXPORT_HASHMAX, 786 768 .name = "nfsd.export", 787 769 .cache_put = svc_export_put, 770 + .cache_upcall = svc_export_upcall, 788 771 .cache_request = svc_export_request, 789 772 .cache_parse = svc_export_parse, 790 773 .cache_show = svc_export_show, ··· 851 832 if (ek == NULL) 852 833 return ERR_PTR(-ENOMEM); 853 834 err = cache_check(cd, &ek->h, reqp); 854 - if (err) 835 + if (err) { 836 + trace_nfsd_exp_find_key(&key, err); 855 837 return ERR_PTR(err); 838 + } 856 839 return ek; 857 840 } 858 841 ··· 876 855 if (exp == NULL) 877 856 return ERR_PTR(-ENOMEM); 878 857 err = cache_check(cd, &exp->h, reqp); 879 - if (err) 858 + if (err) { 859 + trace_nfsd_exp_get_by_name(&key, err); 880 860 return ERR_PTR(err); 861 + } 881 862 return exp; 882 863 } 883 864
+1 -1
fs/nfsd/filecache.c
··· 890 890 unsigned char need = may_flags & NFSD_FILE_MAY_MASK; 891 891 892 892 hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, 893 - nf_node) { 893 + nf_node, lockdep_is_held(&nfsd_file_hashtbl[hashval].nfb_lock)) { 894 894 if ((need & nf->nf_may) != need) 895 895 continue; 896 896 if (nf->nf_inode != inode)
+2
fs/nfsd/netns.h
··· 172 172 unsigned int longest_chain_cachesize; 173 173 174 174 struct shrinker nfsd_reply_cache_shrinker; 175 + /* utsname taken from the the process that starts the server */ 176 + char nfsd_name[UNX_MAXNODENAME+1]; 175 177 }; 176 178 177 179 /* Simple check to find out if a given net was properly initialized */
+14
fs/nfsd/nfs4idmap.c
··· 122 122 return hash; 123 123 } 124 124 125 + static int 126 + idtoname_upcall(struct cache_detail *cd, struct cache_head *h) 127 + { 128 + return sunrpc_cache_pipe_upcall_timeout(cd, h); 129 + } 130 + 125 131 static void 126 132 idtoname_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, 127 133 int *blen) ··· 190 184 .hash_size = ENT_HASHMAX, 191 185 .name = "nfs4.idtoname", 192 186 .cache_put = ent_put, 187 + .cache_upcall = idtoname_upcall, 193 188 .cache_request = idtoname_request, 194 189 .cache_parse = idtoname_parse, 195 190 .cache_show = idtoname_show, ··· 302 295 return hash_str(ent->name, ENT_HASHBITS); 303 296 } 304 297 298 + static int 299 + nametoid_upcall(struct cache_detail *cd, struct cache_head *h) 300 + { 301 + return sunrpc_cache_pipe_upcall_timeout(cd, h); 302 + } 303 + 305 304 static void 306 305 nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, 307 306 int *blen) ··· 360 347 .hash_size = ENT_HASHMAX, 361 348 .name = "nfs4.nametoid", 362 349 .cache_put = ent_put, 350 + .cache_upcall = nametoid_upcall, 363 351 .cache_request = nametoid_request, 364 352 .cache_parse = nametoid_parse, 365 353 .cache_show = nametoid_show,
+47 -40
fs/nfsd/nfs4state.c
··· 494 494 { 495 495 struct nfsd_file *ret; 496 496 497 + if (!f) 498 + return NULL; 497 499 spin_lock(&f->fi_lock); 498 500 ret = __nfs4_get_fd(f, O_RDWR); 499 501 if (!ret) { ··· 1311 1309 nfs4_free_stateowner(sop); 1312 1310 } 1313 1311 1312 + static bool 1313 + nfs4_ol_stateid_unhashed(const struct nfs4_ol_stateid *stp) 1314 + { 1315 + return list_empty(&stp->st_perfile); 1316 + } 1317 + 1314 1318 static bool unhash_ol_stateid(struct nfs4_ol_stateid *stp) 1315 1319 { 1316 1320 struct nfs4_file *fp = stp->st_stid.sc_file; ··· 1387 1379 { 1388 1380 lockdep_assert_held(&stp->st_stid.sc_client->cl_lock); 1389 1381 1382 + if (!unhash_ol_stateid(stp)) 1383 + return false; 1390 1384 list_del_init(&stp->st_locks); 1391 1385 nfs4_unhash_stid(&stp->st_stid); 1392 - return unhash_ol_stateid(stp); 1386 + return true; 1393 1387 } 1394 1388 1395 1389 static void release_lock_stateid(struct nfs4_ol_stateid *stp) ··· 1456 1446 static bool unhash_open_stateid(struct nfs4_ol_stateid *stp, 1457 1447 struct list_head *reaplist) 1458 1448 { 1459 - bool unhashed; 1460 - 1461 1449 lockdep_assert_held(&stp->st_stid.sc_client->cl_lock); 1462 1450 1463 - unhashed = unhash_ol_stateid(stp); 1451 + if (!unhash_ol_stateid(stp)) 1452 + return false; 1464 1453 release_open_stateid_locks(stp, reaplist); 1465 - return unhashed; 1454 + return true; 1466 1455 } 1467 1456 1468 1457 static void release_open_stateid(struct nfs4_ol_stateid *stp) ··· 2645 2636 static const struct tree_descr client_files[] = { 2646 2637 [0] = {"info", &client_info_fops, S_IRUSR}, 2647 2638 [1] = {"states", &client_states_fops, S_IRUSR}, 2648 - [2] = {"ctl", &client_ctl_fops, S_IRUSR|S_IWUSR}, 2639 + [2] = {"ctl", &client_ctl_fops, S_IWUSR}, 2649 2640 [3] = {""}, 2650 2641 }; 2651 2642 ··· 4352 4343 { 4353 4344 struct nfs4_file *fp; 4354 4345 4355 - hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash) { 4346 + hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash, 4347 + lockdep_is_held(&state_lock)) { 4356 4348 if (fh_match(&fp->fi_fhandle, fh)) { 4357 4349 if (refcount_inc_not_zero(&fp->fi_ref)) 4358 4350 return fp; ··· 5531 5521 if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) || 5532 5522 CLOSE_STATEID(stateid)) 5533 5523 return status; 5534 - /* Client debugging aid. */ 5535 - if (!same_clid(&stateid->si_opaque.so_clid, &cl->cl_clientid)) { 5536 - char addr_str[INET6_ADDRSTRLEN]; 5537 - rpc_ntop((struct sockaddr *)&cl->cl_addr, addr_str, 5538 - sizeof(addr_str)); 5539 - pr_warn_ratelimited("NFSD: client %s testing state ID " 5540 - "with incorrect client ID\n", addr_str); 5524 + if (!same_clid(&stateid->si_opaque.so_clid, &cl->cl_clientid)) 5541 5525 return status; 5542 - } 5543 5526 spin_lock(&cl->cl_lock); 5544 5527 s = find_stateid_locked(cl, stateid); 5545 5528 if (!s) ··· 6396 6393 } 6397 6394 6398 6395 static struct nfs4_ol_stateid * 6399 - find_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp) 6396 + find_lock_stateid(const struct nfs4_lockowner *lo, 6397 + const struct nfs4_ol_stateid *ost) 6400 6398 { 6401 6399 struct nfs4_ol_stateid *lst; 6402 - struct nfs4_client *clp = lo->lo_owner.so_client; 6403 6400 6404 - lockdep_assert_held(&clp->cl_lock); 6401 + lockdep_assert_held(&ost->st_stid.sc_client->cl_lock); 6405 6402 6406 - list_for_each_entry(lst, &lo->lo_owner.so_stateids, st_perstateowner) { 6407 - if (lst->st_stid.sc_type != NFS4_LOCK_STID) 6408 - continue; 6409 - if (lst->st_stid.sc_file == fp) { 6410 - refcount_inc(&lst->st_stid.sc_count); 6411 - return lst; 6403 + /* If ost is not hashed, ost->st_locks will not be valid */ 6404 + if (!nfs4_ol_stateid_unhashed(ost)) 6405 + list_for_each_entry(lst, &ost->st_locks, st_locks) { 6406 + if (lst->st_stateowner == &lo->lo_owner) { 6407 + refcount_inc(&lst->st_stid.sc_count); 6408 + return lst; 6409 + } 6412 6410 } 6413 - } 6414 6411 return NULL; 6415 6412 } 6416 6413 ··· 6426 6423 mutex_lock_nested(&stp->st_mutex, OPEN_STATEID_MUTEX); 6427 6424 retry: 6428 6425 spin_lock(&clp->cl_lock); 6429 - spin_lock(&fp->fi_lock); 6430 - retstp = find_lock_stateid(lo, fp); 6426 + if (nfs4_ol_stateid_unhashed(open_stp)) 6427 + goto out_close; 6428 + retstp = find_lock_stateid(lo, open_stp); 6431 6429 if (retstp) 6432 - goto out_unlock; 6433 - 6430 + goto out_found; 6434 6431 refcount_inc(&stp->st_stid.sc_count); 6435 6432 stp->st_stid.sc_type = NFS4_LOCK_STID; 6436 6433 stp->st_stateowner = nfs4_get_stateowner(&lo->lo_owner); ··· 6439 6436 stp->st_access_bmap = 0; 6440 6437 stp->st_deny_bmap = open_stp->st_deny_bmap; 6441 6438 stp->st_openstp = open_stp; 6439 + spin_lock(&fp->fi_lock); 6442 6440 list_add(&stp->st_locks, &open_stp->st_locks); 6443 6441 list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); 6444 6442 list_add(&stp->st_perfile, &fp->fi_stateids); 6445 - out_unlock: 6446 6443 spin_unlock(&fp->fi_lock); 6447 6444 spin_unlock(&clp->cl_lock); 6448 - if (retstp) { 6449 - if (nfsd4_lock_ol_stateid(retstp) != nfs_ok) { 6450 - nfs4_put_stid(&retstp->st_stid); 6451 - goto retry; 6452 - } 6453 - /* To keep mutex tracking happy */ 6454 - mutex_unlock(&stp->st_mutex); 6455 - stp = retstp; 6456 - } 6457 6445 return stp; 6446 + out_found: 6447 + spin_unlock(&clp->cl_lock); 6448 + if (nfsd4_lock_ol_stateid(retstp) != nfs_ok) { 6449 + nfs4_put_stid(&retstp->st_stid); 6450 + goto retry; 6451 + } 6452 + /* To keep mutex tracking happy */ 6453 + mutex_unlock(&stp->st_mutex); 6454 + return retstp; 6455 + out_close: 6456 + spin_unlock(&clp->cl_lock); 6457 + mutex_unlock(&stp->st_mutex); 6458 + return NULL; 6458 6459 } 6459 6460 6460 6461 static struct nfs4_ol_stateid * ··· 6473 6466 6474 6467 *new = false; 6475 6468 spin_lock(&clp->cl_lock); 6476 - lst = find_lock_stateid(lo, fi); 6469 + lst = find_lock_stateid(lo, ost); 6477 6470 spin_unlock(&clp->cl_lock); 6478 6471 if (lst != NULL) { 6479 6472 if (nfsd4_lock_ol_stateid(lst) == nfs_ok)
+20 -18
fs/nfsd/nfs4xdr.c
··· 3591 3591 __be32 nfserr; 3592 3592 __be32 tmp; 3593 3593 __be32 *p; 3594 - u32 zzz = 0; 3595 3594 int pad; 3595 + 3596 + /* 3597 + * svcrdma requires every READ payload to start somewhere 3598 + * in xdr->pages. 3599 + */ 3600 + if (xdr->iov == xdr->buf->head) { 3601 + xdr->iov = NULL; 3602 + xdr->end = xdr->p; 3603 + } 3596 3604 3597 3605 len = maxcount; 3598 3606 v = 0; 3599 - 3600 - thislen = min_t(long, len, ((void *)xdr->end - (void *)xdr->p)); 3601 - p = xdr_reserve_space(xdr, (thislen+3)&~3); 3602 - WARN_ON_ONCE(!p); 3603 - resp->rqstp->rq_vec[v].iov_base = p; 3604 - resp->rqstp->rq_vec[v].iov_len = thislen; 3605 - v++; 3606 - len -= thislen; 3607 - 3608 3607 while (len) { 3609 3608 thislen = min_t(long, len, PAGE_SIZE); 3610 - p = xdr_reserve_space(xdr, (thislen+3)&~3); 3609 + p = xdr_reserve_space(xdr, thislen); 3611 3610 WARN_ON_ONCE(!p); 3612 3611 resp->rqstp->rq_vec[v].iov_base = p; 3613 3612 resp->rqstp->rq_vec[v].iov_len = thislen; ··· 3615 3616 } 3616 3617 read->rd_vlen = v; 3617 3618 3618 - len = maxcount; 3619 3619 nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset, 3620 3620 resp->rqstp->rq_vec, read->rd_vlen, &maxcount, 3621 3621 &eof); 3622 3622 read->rd_length = maxcount; 3623 3623 if (nfserr) 3624 3624 return nfserr; 3625 - xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3)); 3625 + if (svc_encode_read_payload(resp->rqstp, starting_len + 8, maxcount)) 3626 + return nfserr_io; 3627 + xdr_truncate_encode(xdr, starting_len + 8 + xdr_align_size(maxcount)); 3626 3628 3627 3629 tmp = htonl(eof); 3628 3630 write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4); 3629 3631 tmp = htonl(maxcount); 3630 3632 write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); 3631 3633 3634 + tmp = xdr_zero; 3632 3635 pad = (maxcount&3) ? 4 - (maxcount&3) : 0; 3633 3636 write_bytes_to_xdr_buf(xdr->buf, starting_len + 8 + maxcount, 3634 - &zzz, pad); 3637 + &tmp, pad); 3635 3638 return 0; 3636 3639 3637 3640 } ··· 4006 4005 int major_id_sz; 4007 4006 int server_scope_sz; 4008 4007 uint64_t minor_id = 0; 4008 + struct nfsd_net *nn = net_generic(SVC_NET(resp->rqstp), nfsd_net_id); 4009 4009 4010 - major_id = utsname()->nodename; 4011 - major_id_sz = strlen(major_id); 4012 - server_scope = utsname()->nodename; 4013 - server_scope_sz = strlen(server_scope); 4010 + major_id = nn->nfsd_name; 4011 + major_id_sz = strlen(nn->nfsd_name); 4012 + server_scope = nn->nfsd_name; 4013 + server_scope_sz = strlen(nn->nfsd_name); 4014 4014 4015 4015 p = xdr_reserve_space(xdr, 4016 4016 8 /* eir_clientid */ +
+1
fs/nfsd/nfsctl.c
··· 1333 1333 dget(dentry); 1334 1334 ret = simple_rmdir(dir, dentry); 1335 1335 WARN_ON_ONCE(ret); 1336 + fsnotify_rmdir(dir, dentry); 1336 1337 d_delete(dentry); 1337 1338 inode_unlock(dir); 1338 1339 }
+10 -3
fs/nfsd/nfsfh.c
··· 14 14 #include "nfsd.h" 15 15 #include "vfs.h" 16 16 #include "auth.h" 17 + #include "trace.h" 17 18 18 19 #define NFSDDBG_FACILITY NFSDDBG_FH 19 20 ··· 210 209 } 211 210 212 211 error = nfserr_stale; 213 - if (PTR_ERR(exp) == -ENOENT) 214 - return error; 212 + if (IS_ERR(exp)) { 213 + trace_nfsd_set_fh_dentry_badexport(rqstp, fhp, PTR_ERR(exp)); 215 214 216 - if (IS_ERR(exp)) 215 + if (PTR_ERR(exp) == -ENOENT) 216 + return error; 217 + 217 218 return nfserrno(PTR_ERR(exp)); 219 + } 218 220 219 221 if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) { 220 222 /* Elevate privileges so that the lack of 'r' or 'x' ··· 271 267 dentry = exportfs_decode_fh(exp->ex_path.mnt, fid, 272 268 data_left, fileid_type, 273 269 nfsd_acceptable, exp); 270 + if (IS_ERR_OR_NULL(dentry)) 271 + trace_nfsd_set_fh_dentry_badhandle(rqstp, fhp, 272 + dentry ? PTR_ERR(dentry) : -ESTALE); 274 273 } 275 274 if (dentry == NULL) 276 275 goto out;
+3
fs/nfsd/nfssvc.c
··· 749 749 if (nrservs == 0 && nn->nfsd_serv == NULL) 750 750 goto out; 751 751 752 + strlcpy(nn->nfsd_name, utsname()->nodename, 753 + sizeof(nn->nfsd_name)); 754 + 752 755 error = nfsd_create_serv(net); 753 756 if (error) 754 757 goto out;
+122
fs/nfsd/trace.h
··· 9 9 #define _NFSD_TRACE_H 10 10 11 11 #include <linux/tracepoint.h> 12 + #include "export.h" 12 13 #include "nfsfh.h" 13 14 14 15 TRACE_EVENT(nfsd_compound, ··· 50 49 __entry->resp_opcnt, __entry->args_opcnt, 51 50 __get_str(name), __entry->status) 52 51 ) 52 + 53 + DECLARE_EVENT_CLASS(nfsd_fh_err_class, 54 + TP_PROTO(struct svc_rqst *rqstp, 55 + struct svc_fh *fhp, 56 + int status), 57 + TP_ARGS(rqstp, fhp, status), 58 + TP_STRUCT__entry( 59 + __field(u32, xid) 60 + __field(u32, fh_hash) 61 + __field(int, status) 62 + ), 63 + TP_fast_assign( 64 + __entry->xid = be32_to_cpu(rqstp->rq_xid); 65 + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); 66 + __entry->status = status; 67 + ), 68 + TP_printk("xid=0x%08x fh_hash=0x%08x status=%d", 69 + __entry->xid, __entry->fh_hash, 70 + __entry->status) 71 + ) 72 + 73 + #define DEFINE_NFSD_FH_ERR_EVENT(name) \ 74 + DEFINE_EVENT(nfsd_fh_err_class, nfsd_##name, \ 75 + TP_PROTO(struct svc_rqst *rqstp, \ 76 + struct svc_fh *fhp, \ 77 + int status), \ 78 + TP_ARGS(rqstp, fhp, status)) 79 + 80 + DEFINE_NFSD_FH_ERR_EVENT(set_fh_dentry_badexport); 81 + DEFINE_NFSD_FH_ERR_EVENT(set_fh_dentry_badhandle); 82 + 83 + TRACE_EVENT(nfsd_exp_find_key, 84 + TP_PROTO(const struct svc_expkey *key, 85 + int status), 86 + TP_ARGS(key, status), 87 + TP_STRUCT__entry( 88 + __field(int, fsidtype) 89 + __array(u32, fsid, 6) 90 + __string(auth_domain, key->ek_client->name) 91 + __field(int, status) 92 + ), 93 + TP_fast_assign( 94 + __entry->fsidtype = key->ek_fsidtype; 95 + memcpy(__entry->fsid, key->ek_fsid, 4*6); 96 + __assign_str(auth_domain, key->ek_client->name); 97 + __entry->status = status; 98 + ), 99 + TP_printk("fsid=%x::%s domain=%s status=%d", 100 + __entry->fsidtype, 101 + __print_array(__entry->fsid, 6, 4), 102 + __get_str(auth_domain), 103 + __entry->status 104 + ) 105 + ); 106 + 107 + TRACE_EVENT(nfsd_expkey_update, 108 + TP_PROTO(const struct svc_expkey *key, const char *exp_path), 109 + TP_ARGS(key, exp_path), 110 + TP_STRUCT__entry( 111 + __field(int, fsidtype) 112 + __array(u32, fsid, 6) 113 + __string(auth_domain, key->ek_client->name) 114 + __string(path, exp_path) 115 + __field(bool, cache) 116 + ), 117 + TP_fast_assign( 118 + __entry->fsidtype = key->ek_fsidtype; 119 + memcpy(__entry->fsid, key->ek_fsid, 4*6); 120 + __assign_str(auth_domain, key->ek_client->name); 121 + __assign_str(path, exp_path); 122 + __entry->cache = !test_bit(CACHE_NEGATIVE, &key->h.flags); 123 + ), 124 + TP_printk("fsid=%x::%s domain=%s path=%s cache=%s", 125 + __entry->fsidtype, 126 + __print_array(__entry->fsid, 6, 4), 127 + __get_str(auth_domain), 128 + __get_str(path), 129 + __entry->cache ? "pos" : "neg" 130 + ) 131 + ); 132 + 133 + TRACE_EVENT(nfsd_exp_get_by_name, 134 + TP_PROTO(const struct svc_export *key, 135 + int status), 136 + TP_ARGS(key, status), 137 + TP_STRUCT__entry( 138 + __string(path, key->ex_path.dentry->d_name.name) 139 + __string(auth_domain, key->ex_client->name) 140 + __field(int, status) 141 + ), 142 + TP_fast_assign( 143 + __assign_str(path, key->ex_path.dentry->d_name.name); 144 + __assign_str(auth_domain, key->ex_client->name); 145 + __entry->status = status; 146 + ), 147 + TP_printk("path=%s domain=%s status=%d", 148 + __get_str(path), 149 + __get_str(auth_domain), 150 + __entry->status 151 + ) 152 + ); 153 + 154 + TRACE_EVENT(nfsd_export_update, 155 + TP_PROTO(const struct svc_export *key), 156 + TP_ARGS(key), 157 + TP_STRUCT__entry( 158 + __string(path, key->ex_path.dentry->d_name.name) 159 + __string(auth_domain, key->ex_client->name) 160 + __field(bool, cache) 161 + ), 162 + TP_fast_assign( 163 + __assign_str(path, key->ex_path.dentry->d_name.name); 164 + __assign_str(auth_domain, key->ex_client->name); 165 + __entry->cache = !test_bit(CACHE_NEGATIVE, &key->h.flags); 166 + ), 167 + TP_printk("path=%s domain=%s cache=%s", 168 + __get_str(path), 169 + __get_str(auth_domain), 170 + __entry->cache ? "pos" : "neg" 171 + ) 172 + ); 53 173 54 174 DECLARE_EVENT_CLASS(nfsd_io_class, 55 175 TP_PROTO(struct svc_rqst *rqstp,
+6 -3
include/linux/sunrpc/cache.h
··· 179 179 180 180 extern int 181 181 sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h); 182 + extern int 183 + sunrpc_cache_pipe_upcall_timeout(struct cache_detail *detail, 184 + struct cache_head *h); 182 185 183 186 184 187 extern void cache_clean_deferred(void *owner); ··· 209 206 210 207 static inline bool cache_is_expired(struct cache_detail *detail, struct cache_head *h) 211 208 { 209 + if (h->expiry_time < seconds_since_boot()) 210 + return true; 212 211 if (!test_bit(CACHE_VALID, &h->flags)) 213 212 return false; 214 - 215 - return (h->expiry_time < seconds_since_boot()) || 216 - (detail->flush_time >= h->last_refresh); 213 + return detail->flush_time >= h->last_refresh; 217 214 } 218 215 219 216 extern int cache_check(struct cache_detail *detail,
+2 -1
include/linux/sunrpc/rpc_rdma.h
··· 58 58 enum { 59 59 rpcrdma_fixed_maxsz = 4, 60 60 rpcrdma_segment_maxsz = 4, 61 - rpcrdma_readchunk_maxsz = 2 + rpcrdma_segment_maxsz, 61 + rpcrdma_readseg_maxsz = 1 + rpcrdma_segment_maxsz, 62 + rpcrdma_readchunk_maxsz = 1 + rpcrdma_readseg_maxsz, 62 63 }; 63 64 64 65 /*
+4 -1
include/linux/sunrpc/svc.h
··· 380 380 struct cache_deferred_req handle; 381 381 size_t xprt_hlen; 382 382 int argslen; 383 - __be32 args[0]; 383 + __be32 args[]; 384 384 }; 385 385 386 386 struct svc_process_info { ··· 517 517 void svc_reserve(struct svc_rqst *rqstp, int space); 518 518 struct svc_pool * svc_pool_for_cpu(struct svc_serv *serv, int cpu); 519 519 char * svc_print_addr(struct svc_rqst *, char *, size_t); 520 + int svc_encode_read_payload(struct svc_rqst *rqstp, 521 + unsigned int offset, 522 + unsigned int length); 520 523 unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, 521 524 struct page **pages, 522 525 struct kvec *first, size_t total);
+17 -7
include/linux/sunrpc/svc_rdma.h
··· 52 52 53 53 /* Default and maximum inline threshold sizes */ 54 54 enum { 55 + RPCRDMA_PULLUP_THRESH = RPCRDMA_V1_DEF_INLINE_SIZE >> 1, 55 56 RPCRDMA_DEF_INLINE_THRESH = 4096, 56 57 RPCRDMA_MAX_INLINE_THRESH = 65536 57 58 }; ··· 133 132 struct ib_sge rc_recv_sge; 134 133 void *rc_recv_buf; 135 134 struct xdr_buf rc_arg; 135 + struct xdr_stream rc_stream; 136 136 bool rc_temp; 137 137 u32 rc_byte_len; 138 138 unsigned int rc_page_count; 139 139 unsigned int rc_hdr_count; 140 140 u32 rc_inv_rkey; 141 + __be32 *rc_write_list; 142 + __be32 *rc_reply_chunk; 143 + unsigned int rc_read_payload_offset; 144 + unsigned int rc_read_payload_length; 141 145 struct page *rc_pages[RPCSVC_MAXPAGES]; 142 146 }; 143 147 ··· 150 144 struct list_head sc_list; 151 145 struct ib_send_wr sc_send_wr; 152 146 struct ib_cqe sc_cqe; 147 + struct xdr_buf sc_hdrbuf; 148 + struct xdr_stream sc_stream; 153 149 void *sc_xprt_buf; 154 150 int sc_page_count; 155 151 int sc_cur_sge_no; ··· 178 170 struct svc_rqst *rqstp, 179 171 struct svc_rdma_recv_ctxt *head, __be32 *p); 180 172 extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, 181 - __be32 *wr_ch, struct xdr_buf *xdr); 173 + __be32 *wr_ch, struct xdr_buf *xdr, 174 + unsigned int offset, 175 + unsigned long length); 182 176 extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, 183 - __be32 *rp_ch, bool writelist, 177 + const struct svc_rdma_recv_ctxt *rctxt, 184 178 struct xdr_buf *xdr); 185 179 186 180 /* svc_rdma_sendto.c */ ··· 192 182 extern void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma, 193 183 struct svc_rdma_send_ctxt *ctxt); 194 184 extern int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr); 195 - extern void svc_rdma_sync_reply_hdr(struct svcxprt_rdma *rdma, 196 - struct svc_rdma_send_ctxt *ctxt, 197 - unsigned int len); 198 185 extern int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, 199 - struct svc_rdma_send_ctxt *ctxt, 200 - struct xdr_buf *xdr, __be32 *wr_lst); 186 + struct svc_rdma_send_ctxt *sctxt, 187 + const struct svc_rdma_recv_ctxt *rctxt, 188 + struct xdr_buf *xdr); 201 189 extern int svc_rdma_sendto(struct svc_rqst *); 190 + extern int svc_rdma_read_payload(struct svc_rqst *rqstp, unsigned int offset, 191 + unsigned int length); 202 192 203 193 /* svc_rdma_transport.c */ 204 194 extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);
+2
include/linux/sunrpc/svc_xprt.h
··· 21 21 int (*xpo_has_wspace)(struct svc_xprt *); 22 22 int (*xpo_recvfrom)(struct svc_rqst *); 23 23 int (*xpo_sendto)(struct svc_rqst *); 24 + int (*xpo_read_payload)(struct svc_rqst *, unsigned int, 25 + unsigned int); 24 26 void (*xpo_release_rqst)(struct svc_rqst *); 25 27 void (*xpo_detach)(struct svc_xprt *); 26 28 void (*xpo_free)(struct svc_xprt *);
+53 -14
include/linux/sunrpc/xdr.h
··· 188 188 extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); 189 189 extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); 190 190 191 - /* 192 - * Helper structure for copying from an sk_buff. 193 - */ 194 - struct xdr_skb_reader { 195 - struct sk_buff *skb; 196 - unsigned int offset; 197 - size_t count; 198 - __wsum csum; 199 - }; 200 - 201 - typedef size_t (*xdr_skb_read_actor)(struct xdr_skb_reader *desc, void *to, size_t len); 202 - 203 - extern int csum_partial_copy_to_xdr(struct xdr_buf *, struct sk_buff *); 204 - 205 191 extern int xdr_encode_word(struct xdr_buf *, unsigned int, u32); 206 192 extern int xdr_decode_word(struct xdr_buf *, unsigned int, u32 *); 207 193 ··· 284 298 const size_t mask = sizeof(__u32) - 1; 285 299 286 300 return (n + mask) & ~mask; 301 + } 302 + 303 + /** 304 + * xdr_pad_size - Calculate size of an object's pad 305 + * @n: Size of an object being XDR encoded (in bytes) 306 + * 307 + * This implementation avoids the need for conditional 308 + * branches or modulo division. 309 + * 310 + * Return value: 311 + * Size (in bytes) of the needed XDR pad 312 + */ 313 + static inline size_t xdr_pad_size(size_t n) 314 + { 315 + return xdr_align_size(n) - n; 316 + } 317 + 318 + /** 319 + * xdr_stream_encode_item_present - Encode a "present" list item 320 + * @xdr: pointer to xdr_stream 321 + * 322 + * Return values: 323 + * On success, returns length in bytes of XDR buffer consumed 324 + * %-EMSGSIZE on XDR buffer overflow 325 + */ 326 + static inline ssize_t xdr_stream_encode_item_present(struct xdr_stream *xdr) 327 + { 328 + const size_t len = sizeof(__be32); 329 + __be32 *p = xdr_reserve_space(xdr, len); 330 + 331 + if (unlikely(!p)) 332 + return -EMSGSIZE; 333 + *p = xdr_one; 334 + return len; 335 + } 336 + 337 + /** 338 + * xdr_stream_encode_item_absent - Encode a "not present" list item 339 + * @xdr: pointer to xdr_stream 340 + * 341 + * Return values: 342 + * On success, returns length in bytes of XDR buffer consumed 343 + * %-EMSGSIZE on XDR buffer overflow 344 + */ 345 + static inline int xdr_stream_encode_item_absent(struct xdr_stream *xdr) 346 + { 347 + const size_t len = sizeof(__be32); 348 + __be32 *p = xdr_reserve_space(xdr, len); 349 + 350 + if (unlikely(!p)) 351 + return -EMSGSIZE; 352 + *p = xdr_zero; 353 + return len; 287 354 } 288 355 289 356 /**
+58 -1
include/trace/events/rpcgss.h
··· 126 126 DEFINE_GSSAPI_EVENT(wrap); 127 127 DEFINE_GSSAPI_EVENT(unwrap); 128 128 129 - TRACE_EVENT(rpcgss_accept_upcall, 129 + TRACE_EVENT(rpcgss_svc_accept_upcall, 130 130 TP_PROTO( 131 131 __be32 xid, 132 132 u32 major_status, ··· 151 151 __entry->xid, __entry->major_status == 0 ? "GSS_S_COMPLETE" : 152 152 show_gss_status(__entry->major_status), 153 153 __entry->major_status, __entry->minor_status 154 + ) 155 + ); 156 + 157 + TRACE_EVENT(rpcgss_svc_accept, 158 + TP_PROTO( 159 + __be32 xid, 160 + size_t len 161 + ), 162 + 163 + TP_ARGS(xid, len), 164 + 165 + TP_STRUCT__entry( 166 + __field(u32, xid) 167 + __field(size_t, len) 168 + ), 169 + 170 + TP_fast_assign( 171 + __entry->xid = be32_to_cpu(xid); 172 + __entry->len = len; 173 + ), 174 + 175 + TP_printk("xid=0x%08x len=%zu", 176 + __entry->xid, __entry->len 154 177 ) 155 178 ); 156 179 ··· 290 267 __entry->xid, __entry->seqno, __entry->seq_xmit, 291 268 __entry->ret ? "" : "un") 292 269 ); 270 + 271 + DECLARE_EVENT_CLASS(rpcgss_svc_seqno_class, 272 + TP_PROTO( 273 + __be32 xid, 274 + u32 seqno 275 + ), 276 + 277 + TP_ARGS(xid, seqno), 278 + 279 + TP_STRUCT__entry( 280 + __field(u32, xid) 281 + __field(u32, seqno) 282 + ), 283 + 284 + TP_fast_assign( 285 + __entry->xid = be32_to_cpu(xid); 286 + __entry->seqno = seqno; 287 + ), 288 + 289 + TP_printk("xid=0x%08x seqno=%u, request discarded", 290 + __entry->xid, __entry->seqno) 291 + ); 292 + 293 + #define DEFINE_SVC_SEQNO_EVENT(name) \ 294 + DEFINE_EVENT(rpcgss_svc_seqno_class, rpcgss_svc_##name, \ 295 + TP_PROTO( \ 296 + __be32 xid, \ 297 + u32 seqno \ 298 + ), \ 299 + TP_ARGS(xid, seqno)) 300 + 301 + DEFINE_SVC_SEQNO_EVENT(large_seqno); 302 + DEFINE_SVC_SEQNO_EVENT(old_seqno); 303 + 293 304 294 305 /** 295 306 ** gssd upcall related trace events
+31 -36
include/trace/events/rpcrdma.h
··· 1469 1469 ); 1470 1470 1471 1471 #define DEFINE_SEGMENT_EVENT(name) \ 1472 - DEFINE_EVENT(svcrdma_segment_event, svcrdma_encode_##name,\ 1472 + DEFINE_EVENT(svcrdma_segment_event, svcrdma_##name,\ 1473 1473 TP_PROTO( \ 1474 1474 u32 handle, \ 1475 1475 u32 length, \ ··· 1477 1477 ), \ 1478 1478 TP_ARGS(handle, length, offset)) 1479 1479 1480 - DEFINE_SEGMENT_EVENT(rseg); 1481 - DEFINE_SEGMENT_EVENT(wseg); 1480 + DEFINE_SEGMENT_EVENT(decode_wseg); 1481 + DEFINE_SEGMENT_EVENT(encode_rseg); 1482 + DEFINE_SEGMENT_EVENT(send_rseg); 1483 + DEFINE_SEGMENT_EVENT(encode_wseg); 1484 + DEFINE_SEGMENT_EVENT(send_wseg); 1482 1485 1483 1486 DECLARE_EVENT_CLASS(svcrdma_chunk_event, 1484 1487 TP_PROTO( ··· 1504 1501 ); 1505 1502 1506 1503 #define DEFINE_CHUNK_EVENT(name) \ 1507 - DEFINE_EVENT(svcrdma_chunk_event, svcrdma_encode_##name,\ 1504 + DEFINE_EVENT(svcrdma_chunk_event, svcrdma_##name, \ 1508 1505 TP_PROTO( \ 1509 1506 u32 length \ 1510 1507 ), \ 1511 1508 TP_ARGS(length)) 1512 1509 1513 - DEFINE_CHUNK_EVENT(pzr); 1514 - DEFINE_CHUNK_EVENT(write); 1515 - DEFINE_CHUNK_EVENT(reply); 1510 + DEFINE_CHUNK_EVENT(send_pzr); 1511 + DEFINE_CHUNK_EVENT(encode_write_chunk); 1512 + DEFINE_CHUNK_EVENT(send_write_chunk); 1513 + DEFINE_CHUNK_EVENT(encode_read_chunk); 1514 + DEFINE_CHUNK_EVENT(send_reply_chunk); 1516 1515 1517 - TRACE_EVENT(svcrdma_encode_read, 1516 + TRACE_EVENT(svcrdma_send_read_chunk, 1518 1517 TP_PROTO( 1519 1518 u32 length, 1520 1519 u32 position ··· 1637 1632 TP_printk("addr=%s device=%s status=%d", 1638 1633 __get_str(addr), __get_str(device), __entry->status 1639 1634 ) 1635 + ); 1636 + 1637 + TRACE_EVENT(svcrdma_send_pullup, 1638 + TP_PROTO( 1639 + unsigned int len 1640 + ), 1641 + 1642 + TP_ARGS(len), 1643 + 1644 + TP_STRUCT__entry( 1645 + __field(unsigned int, len) 1646 + ), 1647 + 1648 + TP_fast_assign( 1649 + __entry->len = len; 1650 + ), 1651 + 1652 + TP_printk("len=%u", __entry->len) 1640 1653 ); 1641 1654 1642 1655 TRACE_EVENT(svcrdma_send_failed, ··· 1835 1812 1836 1813 DEFINE_SENDCOMP_EVENT(read); 1837 1814 DEFINE_SENDCOMP_EVENT(write); 1838 - 1839 - TRACE_EVENT(svcrdma_cm_event, 1840 - TP_PROTO( 1841 - const struct rdma_cm_event *event, 1842 - const struct sockaddr *sap 1843 - ), 1844 - 1845 - TP_ARGS(event, sap), 1846 - 1847 - TP_STRUCT__entry( 1848 - __field(unsigned int, event) 1849 - __field(int, status) 1850 - __array(__u8, addr, INET6_ADDRSTRLEN + 10) 1851 - ), 1852 - 1853 - TP_fast_assign( 1854 - __entry->event = event->event; 1855 - __entry->status = event->status; 1856 - snprintf(__entry->addr, sizeof(__entry->addr) - 1, 1857 - "%pISpc", sap); 1858 - ), 1859 - 1860 - TP_printk("addr=%s event=%s (%u/%d)", 1861 - __entry->addr, 1862 - rdma_show_cm_event(__entry->event), 1863 - __entry->event, __entry->status 1864 - ) 1865 - ); 1866 1815 1867 1816 TRACE_EVENT(svcrdma_qp_error, 1868 1817 TP_PROTO(
+76
include/trace/events/sunrpc.h
··· 14 14 #include <linux/net.h> 15 15 #include <linux/tracepoint.h> 16 16 17 + DECLARE_EVENT_CLASS(xdr_buf_class, 18 + TP_PROTO( 19 + const struct xdr_buf *xdr 20 + ), 21 + 22 + TP_ARGS(xdr), 23 + 24 + TP_STRUCT__entry( 25 + __field(const void *, head_base) 26 + __field(size_t, head_len) 27 + __field(const void *, tail_base) 28 + __field(size_t, tail_len) 29 + __field(unsigned int, page_len) 30 + __field(unsigned int, msg_len) 31 + ), 32 + 33 + TP_fast_assign( 34 + __entry->head_base = xdr->head[0].iov_base; 35 + __entry->head_len = xdr->head[0].iov_len; 36 + __entry->tail_base = xdr->tail[0].iov_base; 37 + __entry->tail_len = xdr->tail[0].iov_len; 38 + __entry->page_len = xdr->page_len; 39 + __entry->msg_len = xdr->len; 40 + ), 41 + 42 + TP_printk("head=[%p,%zu] page=%u tail=[%p,%zu] len=%u", 43 + __entry->head_base, __entry->head_len, __entry->page_len, 44 + __entry->tail_base, __entry->tail_len, __entry->msg_len 45 + ) 46 + ); 47 + 48 + #define DEFINE_XDRBUF_EVENT(name) \ 49 + DEFINE_EVENT(xdr_buf_class, name, \ 50 + TP_PROTO( \ 51 + const struct xdr_buf *xdr \ 52 + ), \ 53 + TP_ARGS(xdr)) 54 + 55 + DEFINE_XDRBUF_EVENT(xprt_sendto); 56 + DEFINE_XDRBUF_EVENT(xprt_recvfrom); 57 + DEFINE_XDRBUF_EVENT(svc_recvfrom); 58 + DEFINE_XDRBUF_EVENT(svc_sendto); 59 + 17 60 TRACE_DEFINE_ENUM(RPC_AUTH_OK); 18 61 TRACE_DEFINE_ENUM(RPC_AUTH_BADCRED); 19 62 TRACE_DEFINE_ENUM(RPC_AUTH_REJECTEDCRED); ··· 1334 1291 1335 1292 DEFINE_SVC_DEFERRED_EVENT(drop); 1336 1293 DEFINE_SVC_DEFERRED_EVENT(revisit); 1294 + 1295 + DECLARE_EVENT_CLASS(cache_event, 1296 + TP_PROTO( 1297 + const struct cache_detail *cd, 1298 + const struct cache_head *h 1299 + ), 1300 + 1301 + TP_ARGS(cd, h), 1302 + 1303 + TP_STRUCT__entry( 1304 + __field(const struct cache_head *, h) 1305 + __string(name, cd->name) 1306 + ), 1307 + 1308 + TP_fast_assign( 1309 + __entry->h = h; 1310 + __assign_str(name, cd->name); 1311 + ), 1312 + 1313 + TP_printk("cache=%s entry=%p", __get_str(name), __entry->h) 1314 + ); 1315 + #define DEFINE_CACHE_EVENT(name) \ 1316 + DEFINE_EVENT(cache_event, name, \ 1317 + TP_PROTO( \ 1318 + const struct cache_detail *cd, \ 1319 + const struct cache_head *h \ 1320 + ), \ 1321 + TP_ARGS(cd, h)) 1322 + DEFINE_CACHE_EVENT(cache_entry_expired); 1323 + DEFINE_CACHE_EVENT(cache_entry_upcall); 1324 + DEFINE_CACHE_EVENT(cache_entry_update); 1325 + DEFINE_CACHE_EVENT(cache_entry_make_negative); 1326 + DEFINE_CACHE_EVENT(cache_entry_no_listener); 1337 1327 1338 1328 #endif /* _TRACE_SUNRPC_H */ 1339 1329
+1 -1
net/sunrpc/auth_gss/auth_gss.c
··· 1877 1877 else 1878 1878 iov = snd_buf->head; 1879 1879 p = iov->iov_base + iov->iov_len; 1880 - pad = 3 - ((snd_buf->len - offset - 1) & 3); 1880 + pad = xdr_pad_size(snd_buf->len - offset); 1881 1881 memset(p, 0, pad); 1882 1882 iov->iov_len += pad; 1883 1883 snd_buf->len += pad;
+30 -30
net/sunrpc/auth_gss/svcauth_gss.c
··· 55 55 #include "gss_rpc_upcall.h" 56 56 57 57 58 - #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 59 - # define RPCDBG_FACILITY RPCDBG_AUTH 60 - #endif 61 - 62 58 /* The rpcsec_init cache is used for mapping RPCSEC_GSS_{,CONT_}INIT requests 63 59 * into replies. 64 60 * ··· 180 184 return NULL; 181 185 } 182 186 187 + static int rsi_upcall(struct cache_detail *cd, struct cache_head *h) 188 + { 189 + return sunrpc_cache_pipe_upcall_timeout(cd, h); 190 + } 191 + 183 192 static void rsi_request(struct cache_detail *cd, 184 193 struct cache_head *h, 185 194 char **bpp, int *blen) ··· 283 282 .hash_size = RSI_HASHMAX, 284 283 .name = "auth.rpcsec.init", 285 284 .cache_put = rsi_put, 285 + .cache_upcall = rsi_upcall, 286 286 .cache_request = rsi_request, 287 287 .cache_parse = rsi_parse, 288 288 .match = rsi_match, ··· 430 428 return NULL; 431 429 } 432 430 431 + static int rsc_upcall(struct cache_detail *cd, struct cache_head *h) 432 + { 433 + return -EINVAL; 434 + } 435 + 433 436 static int rsc_parse(struct cache_detail *cd, 434 437 char *mesg, int mlen) 435 438 { ··· 561 554 .hash_size = RSC_HASHMAX, 562 555 .name = "auth.rpcsec.context", 563 556 .cache_put = rsc_put, 557 + .cache_upcall = rsc_upcall, 564 558 .cache_parse = rsc_parse, 565 559 .match = rsc_match, 566 560 .init = rsc_init, ··· 721 713 } 722 714 723 715 if (gc->gc_seq > MAXSEQ) { 724 - dprintk("RPC: svcauth_gss: discarding request with " 725 - "large sequence number %d\n", gc->gc_seq); 716 + trace_rpcgss_svc_large_seqno(rqstp->rq_xid, gc->gc_seq); 726 717 *authp = rpcsec_gsserr_ctxproblem; 727 718 return SVC_DENIED; 728 719 } 729 720 if (!gss_check_seq_num(rsci, gc->gc_seq)) { 730 - dprintk("RPC: svcauth_gss: discarding request with " 731 - "old sequence number %d\n", gc->gc_seq); 721 + trace_rpcgss_svc_old_seqno(rqstp->rq_xid, gc->gc_seq); 732 722 return SVC_DROP; 733 723 } 734 724 return SVC_OK; ··· 967 961 /* XXX: This is very inefficient. It would be better to either do 968 962 * this while we encrypt, or maybe in the receive code, if we can peak 969 963 * ahead and work out the service and mechanism there. */ 970 - offset = buf->head[0].iov_len % 4; 964 + offset = xdr_pad_size(buf->head[0].iov_len); 971 965 if (offset) { 972 966 buf->buflen = RPCSVC_MAXPAYLOAD; 973 967 xdr_shift_buf(buf, offset); ··· 1251 1245 if (!ud->found_creds) { 1252 1246 /* userspace seem buggy, we should always get at least a 1253 1247 * mapping to nobody */ 1254 - dprintk("RPC: No creds found!\n"); 1255 1248 goto out; 1256 1249 } else { 1257 1250 struct timespec64 boot; ··· 1316 1311 if (status) 1317 1312 goto out; 1318 1313 1319 - trace_rpcgss_accept_upcall(rqstp->rq_xid, ud.major_status, 1320 - ud.minor_status); 1314 + trace_rpcgss_svc_accept_upcall(rqstp->rq_xid, ud.major_status, 1315 + ud.minor_status); 1321 1316 1322 1317 switch (ud.major_status) { 1323 1318 case GSS_S_CONTINUE_NEEDED: ··· 1325 1320 break; 1326 1321 case GSS_S_COMPLETE: 1327 1322 status = gss_proxy_save_rsc(sn->rsc_cache, &ud, &handle); 1328 - if (status) { 1329 - pr_info("%s: gss_proxy_save_rsc failed (%d)\n", 1330 - __func__, status); 1323 + if (status) 1331 1324 goto out; 1332 - } 1333 1325 cli_handle.data = (u8 *)&handle; 1334 1326 cli_handle.len = sizeof(handle); 1335 1327 break; 1336 1328 default: 1337 - ret = SVC_CLOSE; 1338 1329 goto out; 1339 1330 } 1340 1331 1341 1332 /* Got an answer to the upcall; use it: */ 1342 1333 if (gss_write_init_verf(sn->rsc_cache, rqstp, 1343 - &cli_handle, &ud.major_status)) { 1344 - pr_info("%s: gss_write_init_verf failed\n", __func__); 1334 + &cli_handle, &ud.major_status)) 1345 1335 goto out; 1346 - } 1347 1336 if (gss_write_resv(resv, PAGE_SIZE, 1348 1337 &cli_handle, &ud.out_token, 1349 - ud.major_status, ud.minor_status)) { 1350 - pr_info("%s: gss_write_resv failed\n", __func__); 1338 + ud.major_status, ud.minor_status)) 1351 1339 goto out; 1352 - } 1353 1340 1354 1341 ret = SVC_COMPLETE; 1355 1342 out: ··· 1492 1495 int ret; 1493 1496 struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id); 1494 1497 1495 - dprintk("RPC: svcauth_gss: argv->iov_len = %zd\n", 1496 - argv->iov_len); 1498 + trace_rpcgss_svc_accept(rqstp->rq_xid, argv->iov_len); 1497 1499 1498 1500 *authp = rpc_autherr_badcred; 1499 1501 if (!svcdata) ··· 1676 1680 goto out; 1677 1681 integ_offset = (u8 *)(p + 1) - (u8 *)resbuf->head[0].iov_base; 1678 1682 integ_len = resbuf->len - integ_offset; 1679 - BUG_ON(integ_len % 4); 1683 + if (integ_len & 3) 1684 + goto out; 1680 1685 *p++ = htonl(integ_len); 1681 1686 *p++ = htonl(gc->gc_seq); 1682 1687 if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, integ_len)) { ··· 1701 1704 resv->iov_len += XDR_QUADLEN(mic.len) << 2; 1702 1705 /* not strictly required: */ 1703 1706 resbuf->len += XDR_QUADLEN(mic.len) << 2; 1704 - BUG_ON(resv->iov_len > PAGE_SIZE); 1707 + if (resv->iov_len > PAGE_SIZE) 1708 + goto out_err; 1705 1709 out: 1706 1710 stat = 0; 1707 1711 out_err: ··· 1738 1740 * both the head and tail. 1739 1741 */ 1740 1742 if (resbuf->tail[0].iov_base) { 1741 - BUG_ON(resbuf->tail[0].iov_base >= resbuf->head[0].iov_base 1742 - + PAGE_SIZE); 1743 - BUG_ON(resbuf->tail[0].iov_base < resbuf->head[0].iov_base); 1743 + if (resbuf->tail[0].iov_base >= 1744 + resbuf->head[0].iov_base + PAGE_SIZE) 1745 + return -EINVAL; 1746 + if (resbuf->tail[0].iov_base < resbuf->head[0].iov_base) 1747 + return -EINVAL; 1744 1748 if (resbuf->tail[0].iov_len + resbuf->head[0].iov_len 1745 1749 + 2 * RPC_MAX_AUTH_SIZE > PAGE_SIZE) 1746 1750 return -ENOMEM;
+74 -54
net/sunrpc/cache.c
··· 32 32 #include <linux/sunrpc/cache.h> 33 33 #include <linux/sunrpc/stats.h> 34 34 #include <linux/sunrpc/rpc_pipe_fs.h> 35 + #include <trace/events/sunrpc.h> 35 36 #include "netns.h" 36 37 37 38 #define RPCDBG_FACILITY RPCDBG_CACHE 38 39 39 40 static bool cache_defer_req(struct cache_req *req, struct cache_head *item); 40 41 static void cache_revisit_request(struct cache_head *item); 41 - static bool cache_listeners_exist(struct cache_detail *detail); 42 42 43 43 static void cache_init(struct cache_head *h, struct cache_detail *detail) 44 44 { ··· 65 65 66 66 rcu_read_lock(); 67 67 hlist_for_each_entry_rcu(tmp, head, cache_list) { 68 - if (detail->match(tmp, key)) { 69 - if (cache_is_expired(detail, tmp)) 70 - continue; 71 - tmp = cache_get_rcu(tmp); 72 - rcu_read_unlock(); 73 - return tmp; 74 - } 68 + if (!detail->match(tmp, key)) 69 + continue; 70 + if (test_bit(CACHE_VALID, &tmp->flags) && 71 + cache_is_expired(detail, tmp)) 72 + continue; 73 + tmp = cache_get_rcu(tmp); 74 + rcu_read_unlock(); 75 + return tmp; 75 76 } 76 77 rcu_read_unlock(); 77 78 return NULL; ··· 114 113 spin_lock(&detail->hash_lock); 115 114 116 115 /* check if entry appeared while we slept */ 117 - hlist_for_each_entry_rcu(tmp, head, cache_list) { 118 - if (detail->match(tmp, key)) { 119 - if (cache_is_expired(detail, tmp)) { 120 - sunrpc_begin_cache_remove_entry(tmp, detail); 121 - freeme = tmp; 122 - break; 123 - } 124 - cache_get(tmp); 125 - spin_unlock(&detail->hash_lock); 126 - cache_put(new, detail); 127 - return tmp; 116 + hlist_for_each_entry_rcu(tmp, head, cache_list, 117 + lockdep_is_held(&detail->hash_lock)) { 118 + if (!detail->match(tmp, key)) 119 + continue; 120 + if (test_bit(CACHE_VALID, &tmp->flags) && 121 + cache_is_expired(detail, tmp)) { 122 + sunrpc_begin_cache_remove_entry(tmp, detail); 123 + trace_cache_entry_expired(detail, tmp); 124 + freeme = tmp; 125 + break; 128 126 } 127 + cache_get(tmp); 128 + spin_unlock(&detail->hash_lock); 129 + cache_put(new, detail); 130 + return tmp; 129 131 } 130 132 131 133 hlist_add_head_rcu(&new->cache_list, head); ··· 178 174 } 179 175 } 180 176 177 + static void cache_make_negative(struct cache_detail *detail, 178 + struct cache_head *h) 179 + { 180 + set_bit(CACHE_NEGATIVE, &h->flags); 181 + trace_cache_entry_make_negative(detail, h); 182 + } 183 + 184 + static void cache_entry_update(struct cache_detail *detail, 185 + struct cache_head *h, 186 + struct cache_head *new) 187 + { 188 + if (!test_bit(CACHE_NEGATIVE, &new->flags)) { 189 + detail->update(h, new); 190 + trace_cache_entry_update(detail, h); 191 + } else { 192 + cache_make_negative(detail, h); 193 + } 194 + } 195 + 181 196 struct cache_head *sunrpc_cache_update(struct cache_detail *detail, 182 197 struct cache_head *new, struct cache_head *old, int hash) 183 198 { ··· 209 186 if (!test_bit(CACHE_VALID, &old->flags)) { 210 187 spin_lock(&detail->hash_lock); 211 188 if (!test_bit(CACHE_VALID, &old->flags)) { 212 - if (test_bit(CACHE_NEGATIVE, &new->flags)) 213 - set_bit(CACHE_NEGATIVE, &old->flags); 214 - else 215 - detail->update(old, new); 189 + cache_entry_update(detail, old, new); 216 190 cache_fresh_locked(old, new->expiry_time, detail); 217 191 spin_unlock(&detail->hash_lock); 218 192 cache_fresh_unlocked(old, detail); ··· 227 207 detail->init(tmp, old); 228 208 229 209 spin_lock(&detail->hash_lock); 230 - if (test_bit(CACHE_NEGATIVE, &new->flags)) 231 - set_bit(CACHE_NEGATIVE, &tmp->flags); 232 - else 233 - detail->update(tmp, new); 210 + cache_entry_update(detail, tmp, new); 234 211 hlist_add_head(&tmp->cache_list, &detail->hash_table[hash]); 235 212 detail->entries++; 236 213 cache_get(tmp); ··· 240 223 return tmp; 241 224 } 242 225 EXPORT_SYMBOL_GPL(sunrpc_cache_update); 243 - 244 - static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h) 245 - { 246 - if (cd->cache_upcall) 247 - return cd->cache_upcall(cd, h); 248 - return sunrpc_cache_pipe_upcall(cd, h); 249 - } 250 226 251 227 static inline int cache_is_valid(struct cache_head *h) 252 228 { ··· 269 259 spin_lock(&detail->hash_lock); 270 260 rv = cache_is_valid(h); 271 261 if (rv == -EAGAIN) { 272 - set_bit(CACHE_NEGATIVE, &h->flags); 262 + cache_make_negative(detail, h); 273 263 cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY, 274 264 detail); 275 265 rv = -ENOENT; ··· 313 303 (h->expiry_time != 0 && age > refresh_age/2)) { 314 304 dprintk("RPC: Want update, refage=%lld, age=%lld\n", 315 305 refresh_age, age); 316 - if (!test_and_set_bit(CACHE_PENDING, &h->flags)) { 317 - switch (cache_make_upcall(detail, h)) { 318 - case -EINVAL: 319 - rv = try_to_negate_entry(detail, h); 320 - break; 321 - case -EAGAIN: 322 - cache_fresh_unlocked(h, detail); 323 - break; 324 - } 325 - } else if (!cache_listeners_exist(detail)) 306 + switch (detail->cache_upcall(detail, h)) { 307 + case -EINVAL: 326 308 rv = try_to_negate_entry(detail, h); 309 + break; 310 + case -EAGAIN: 311 + cache_fresh_unlocked(h, detail); 312 + break; 313 + } 327 314 } 328 315 329 316 if (rv == -EAGAIN) { ··· 475 468 continue; 476 469 477 470 sunrpc_begin_cache_remove_entry(ch, current_detail); 471 + trace_cache_entry_expired(current_detail, ch); 478 472 rv = 1; 479 473 break; 480 474 } ··· 1203 1195 * 1204 1196 * Each request is at most one page long. 1205 1197 */ 1206 - int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h) 1198 + static int cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h) 1207 1199 { 1208 - 1209 1200 char *buf; 1210 1201 struct cache_request *crq; 1211 1202 int ret = 0; 1212 1203 1213 - if (!detail->cache_request) 1214 - return -EINVAL; 1215 - 1216 - if (!cache_listeners_exist(detail)) { 1217 - warn_no_listener(detail); 1218 - return -EINVAL; 1219 - } 1220 1204 if (test_bit(CACHE_CLEANED, &h->flags)) 1221 1205 /* Too late to make an upcall */ 1222 1206 return -EAGAIN; ··· 1231 1231 if (test_bit(CACHE_PENDING, &h->flags)) { 1232 1232 crq->item = cache_get(h); 1233 1233 list_add_tail(&crq->q.list, &detail->queue); 1234 + trace_cache_entry_upcall(detail, h); 1234 1235 } else 1235 1236 /* Lost a race, no longer PENDING, so don't enqueue */ 1236 1237 ret = -EAGAIN; ··· 1243 1242 } 1244 1243 return ret; 1245 1244 } 1245 + 1246 + int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h) 1247 + { 1248 + if (test_and_set_bit(CACHE_PENDING, &h->flags)) 1249 + return 0; 1250 + return cache_pipe_upcall(detail, h); 1251 + } 1246 1252 EXPORT_SYMBOL_GPL(sunrpc_cache_pipe_upcall); 1253 + 1254 + int sunrpc_cache_pipe_upcall_timeout(struct cache_detail *detail, 1255 + struct cache_head *h) 1256 + { 1257 + if (!cache_listeners_exist(detail)) { 1258 + warn_no_listener(detail); 1259 + trace_cache_entry_no_listener(detail, h); 1260 + return -EINVAL; 1261 + } 1262 + return sunrpc_cache_pipe_upcall(detail, h); 1263 + } 1264 + EXPORT_SYMBOL_GPL(sunrpc_cache_pipe_upcall_timeout); 1247 1265 1248 1266 /* 1249 1267 * parse a message from user-space and pass it
+1
net/sunrpc/clnt.c
··· 2509 2509 goto out; 2510 2510 2511 2511 req->rq_rcv_buf.len = req->rq_private_buf.len; 2512 + trace_xprt_recvfrom(&req->rq_rcv_buf); 2512 2513 2513 2514 /* Check that the softirq receive buffer is valid */ 2514 2515 WARN_ON(memcmp(&req->rq_rcv_buf, &req->rq_private_buf,
+141
net/sunrpc/socklib.c
··· 14 14 #include <linux/types.h> 15 15 #include <linux/pagemap.h> 16 16 #include <linux/udp.h> 17 + #include <linux/sunrpc/msg_prot.h> 17 18 #include <linux/sunrpc/xdr.h> 18 19 #include <linux/export.h> 19 20 21 + #include "socklib.h" 22 + 23 + /* 24 + * Helper structure for copying from an sk_buff. 25 + */ 26 + struct xdr_skb_reader { 27 + struct sk_buff *skb; 28 + unsigned int offset; 29 + size_t count; 30 + __wsum csum; 31 + }; 32 + 33 + typedef size_t (*xdr_skb_read_actor)(struct xdr_skb_reader *desc, void *to, 34 + size_t len); 20 35 21 36 /** 22 37 * xdr_skb_read_bits - copy some data bits from skb to internal buffer ··· 201 186 return 0; 202 187 } 203 188 EXPORT_SYMBOL_GPL(csum_partial_copy_to_xdr); 189 + 190 + static inline int xprt_sendmsg(struct socket *sock, struct msghdr *msg, 191 + size_t seek) 192 + { 193 + if (seek) 194 + iov_iter_advance(&msg->msg_iter, seek); 195 + return sock_sendmsg(sock, msg); 196 + } 197 + 198 + static int xprt_send_kvec(struct socket *sock, struct msghdr *msg, 199 + struct kvec *vec, size_t seek) 200 + { 201 + iov_iter_kvec(&msg->msg_iter, WRITE, vec, 1, vec->iov_len); 202 + return xprt_sendmsg(sock, msg, seek); 203 + } 204 + 205 + static int xprt_send_pagedata(struct socket *sock, struct msghdr *msg, 206 + struct xdr_buf *xdr, size_t base) 207 + { 208 + int err; 209 + 210 + err = xdr_alloc_bvec(xdr, GFP_KERNEL); 211 + if (err < 0) 212 + return err; 213 + 214 + iov_iter_bvec(&msg->msg_iter, WRITE, xdr->bvec, xdr_buf_pagecount(xdr), 215 + xdr->page_len + xdr->page_base); 216 + return xprt_sendmsg(sock, msg, base + xdr->page_base); 217 + } 218 + 219 + /* Common case: 220 + * - stream transport 221 + * - sending from byte 0 of the message 222 + * - the message is wholly contained in @xdr's head iovec 223 + */ 224 + static int xprt_send_rm_and_kvec(struct socket *sock, struct msghdr *msg, 225 + rpc_fraghdr marker, struct kvec *vec, 226 + size_t base) 227 + { 228 + struct kvec iov[2] = { 229 + [0] = { 230 + .iov_base = &marker, 231 + .iov_len = sizeof(marker) 232 + }, 233 + [1] = *vec, 234 + }; 235 + size_t len = iov[0].iov_len + iov[1].iov_len; 236 + 237 + iov_iter_kvec(&msg->msg_iter, WRITE, iov, 2, len); 238 + return xprt_sendmsg(sock, msg, base); 239 + } 240 + 241 + /** 242 + * xprt_sock_sendmsg - write an xdr_buf directly to a socket 243 + * @sock: open socket to send on 244 + * @msg: socket message metadata 245 + * @xdr: xdr_buf containing this request 246 + * @base: starting position in the buffer 247 + * @marker: stream record marker field 248 + * @sent_p: return the total number of bytes successfully queued for sending 249 + * 250 + * Return values: 251 + * On success, returns zero and fills in @sent_p. 252 + * %-ENOTSOCK if @sock is not a struct socket. 253 + */ 254 + int xprt_sock_sendmsg(struct socket *sock, struct msghdr *msg, 255 + struct xdr_buf *xdr, unsigned int base, 256 + rpc_fraghdr marker, unsigned int *sent_p) 257 + { 258 + unsigned int rmsize = marker ? sizeof(marker) : 0; 259 + unsigned int remainder = rmsize + xdr->len - base; 260 + unsigned int want; 261 + int err = 0; 262 + 263 + *sent_p = 0; 264 + 265 + if (unlikely(!sock)) 266 + return -ENOTSOCK; 267 + 268 + msg->msg_flags |= MSG_MORE; 269 + want = xdr->head[0].iov_len + rmsize; 270 + if (base < want) { 271 + unsigned int len = want - base; 272 + 273 + remainder -= len; 274 + if (remainder == 0) 275 + msg->msg_flags &= ~MSG_MORE; 276 + if (rmsize) 277 + err = xprt_send_rm_and_kvec(sock, msg, marker, 278 + &xdr->head[0], base); 279 + else 280 + err = xprt_send_kvec(sock, msg, &xdr->head[0], base); 281 + if (remainder == 0 || err != len) 282 + goto out; 283 + *sent_p += err; 284 + base = 0; 285 + } else { 286 + base -= want; 287 + } 288 + 289 + if (base < xdr->page_len) { 290 + unsigned int len = xdr->page_len - base; 291 + 292 + remainder -= len; 293 + if (remainder == 0) 294 + msg->msg_flags &= ~MSG_MORE; 295 + err = xprt_send_pagedata(sock, msg, xdr, base); 296 + if (remainder == 0 || err != len) 297 + goto out; 298 + *sent_p += err; 299 + base = 0; 300 + } else { 301 + base -= xdr->page_len; 302 + } 303 + 304 + if (base >= xdr->tail[0].iov_len) 305 + return 0; 306 + msg->msg_flags &= ~MSG_MORE; 307 + err = xprt_send_kvec(sock, msg, &xdr->tail[0], base); 308 + out: 309 + if (err > 0) { 310 + *sent_p += err; 311 + err = 0; 312 + } 313 + return err; 314 + }
+15
net/sunrpc/socklib.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de> 4 + * Copyright (C) 2020, Oracle. 5 + */ 6 + 7 + #ifndef _NET_SUNRPC_SOCKLIB_H_ 8 + #define _NET_SUNRPC_SOCKLIB_H_ 9 + 10 + int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb); 11 + int xprt_sock_sendmsg(struct socket *sock, struct msghdr *msg, 12 + struct xdr_buf *xdr, unsigned int base, 13 + rpc_fraghdr marker, unsigned int *sent_p); 14 + 15 + #endif /* _NET_SUNRPC_SOCKLIB_H_ */
-4
net/sunrpc/sunrpc.h
··· 50 50 return loopback; 51 51 } 52 52 53 - int svc_send_common(struct socket *sock, struct xdr_buf *xdr, 54 - struct page *headpage, unsigned long headoffset, 55 - struct page *tailpage, unsigned long tailoffset); 56 - 57 53 int rpc_clients_notifier_register(void); 58 54 void rpc_clients_notifier_unregister(void); 59 55 #endif /* _NET_SUNRPC_SUNRPC_H */
+16 -4
net/sunrpc/svc.c
··· 1529 1529 goto out_drop; 1530 1530 } 1531 1531 1532 - /* Reserve space for the record marker */ 1533 - if (rqstp->rq_prot == IPPROTO_TCP) 1534 - svc_putnl(resv, 0); 1535 - 1536 1532 /* Returns 1 for send, 0 for drop */ 1537 1533 if (likely(svc_process_common(rqstp, argv, resv))) 1538 1534 return svc_send(rqstp); ··· 1631 1635 return max; 1632 1636 } 1633 1637 EXPORT_SYMBOL_GPL(svc_max_payload); 1638 + 1639 + /** 1640 + * svc_encode_read_payload - mark a range of bytes as a READ payload 1641 + * @rqstp: svc_rqst to operate on 1642 + * @offset: payload's byte offset in rqstp->rq_res 1643 + * @length: size of payload, in bytes 1644 + * 1645 + * Returns zero on success, or a negative errno if a permanent 1646 + * error occurred. 1647 + */ 1648 + int svc_encode_read_payload(struct svc_rqst *rqstp, unsigned int offset, 1649 + unsigned int length) 1650 + { 1651 + return rqstp->rq_xprt->xpt_ops->xpo_read_payload(rqstp, offset, length); 1652 + } 1653 + EXPORT_SYMBOL_GPL(svc_encode_read_payload); 1634 1654 1635 1655 /** 1636 1656 * svc_fill_write_vector - Construct data argument for VFS write call
+17 -5
net/sunrpc/svc_xprt.c
··· 104 104 } 105 105 EXPORT_SYMBOL_GPL(svc_unreg_xprt_class); 106 106 107 - /* 108 - * Format the transport list for printing 107 + /** 108 + * svc_print_xprts - Format the transport list for printing 109 + * @buf: target buffer for formatted address 110 + * @maxlen: length of target buffer 111 + * 112 + * Fills in @buf with a string containing a list of transport names, each name 113 + * terminated with '\n'. If the buffer is too small, some entries may be 114 + * missing, but it is guaranteed that all lines in the output buffer are 115 + * complete. 116 + * 117 + * Returns positive length of the filled-in string. 109 118 */ 110 119 int svc_print_xprts(char *buf, int maxlen) 111 120 { ··· 127 118 list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) { 128 119 int slen; 129 120 130 - sprintf(tmpstr, "%s %d\n", xcl->xcl_name, xcl->xcl_max_payload); 131 - slen = strlen(tmpstr); 132 - if (len + slen > maxlen) 121 + slen = snprintf(tmpstr, sizeof(tmpstr), "%s %d\n", 122 + xcl->xcl_name, xcl->xcl_max_payload); 123 + if (slen >= sizeof(tmpstr) || len + slen >= maxlen) 133 124 break; 134 125 len += slen; 135 126 strcat(buf, tmpstr); ··· 811 802 len = svc_deferred_recv(rqstp); 812 803 else 813 804 len = xprt->xpt_ops->xpo_recvfrom(rqstp); 805 + if (len > 0) 806 + trace_svc_recvfrom(&rqstp->rq_arg); 814 807 rqstp->rq_stime = ktime_get(); 815 808 rqstp->rq_reserved = serv->sv_max_mesg; 816 809 atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); ··· 916 905 xb->len = xb->head[0].iov_len + 917 906 xb->page_len + 918 907 xb->tail[0].iov_len; 908 + trace_svc_sendto(xb); 919 909 920 910 /* Grab mutex to serialize outgoing data. */ 921 911 mutex_lock(&xprt->xpt_mutex);
+12
net/sunrpc/svcauth_unix.c
··· 148 148 return NULL; 149 149 } 150 150 151 + static int ip_map_upcall(struct cache_detail *cd, struct cache_head *h) 152 + { 153 + return sunrpc_cache_pipe_upcall(cd, h); 154 + } 155 + 151 156 static void ip_map_request(struct cache_detail *cd, 152 157 struct cache_head *h, 153 158 char **bpp, int *blen) ··· 472 467 return NULL; 473 468 } 474 469 470 + static int unix_gid_upcall(struct cache_detail *cd, struct cache_head *h) 471 + { 472 + return sunrpc_cache_pipe_upcall_timeout(cd, h); 473 + } 474 + 475 475 static void unix_gid_request(struct cache_detail *cd, 476 476 struct cache_head *h, 477 477 char **bpp, int *blen) ··· 594 584 .hash_size = GID_HASHMAX, 595 585 .name = "auth.unix.gid", 596 586 .cache_put = unix_gid_put, 587 + .cache_upcall = unix_gid_upcall, 597 588 .cache_request = unix_gid_request, 598 589 .cache_parse = unix_gid_parse, 599 590 .cache_show = unix_gid_show, ··· 892 881 .hash_size = IP_HASHMAX, 893 882 .name = "auth.unix.ip", 894 883 .cache_put = ip_map_put, 884 + .cache_upcall = ip_map_upcall, 895 885 .cache_request = ip_map_request, 896 886 .cache_parse = ip_map_parse, 897 887 .cache_show = ip_map_show,
+68 -134
net/sunrpc/svcsock.c
··· 55 55 #include <linux/sunrpc/stats.h> 56 56 #include <linux/sunrpc/xprt.h> 57 57 58 + #include "socklib.h" 58 59 #include "sunrpc.h" 59 60 60 61 #define RPCDBG_FACILITY RPCDBG_SVCXPRT ··· 175 174 } 176 175 } 177 176 178 - /* 179 - * send routine intended to be shared by the fore- and back-channel 180 - */ 181 - int svc_send_common(struct socket *sock, struct xdr_buf *xdr, 182 - struct page *headpage, unsigned long headoffset, 183 - struct page *tailpage, unsigned long tailoffset) 177 + static int svc_sock_read_payload(struct svc_rqst *rqstp, unsigned int offset, 178 + unsigned int length) 184 179 { 185 - int result; 186 - int size; 187 - struct page **ppage = xdr->pages; 188 - size_t base = xdr->page_base; 189 - unsigned int pglen = xdr->page_len; 190 - unsigned int flags = MSG_MORE | MSG_SENDPAGE_NOTLAST; 191 - int slen; 192 - int len = 0; 193 - 194 - slen = xdr->len; 195 - 196 - /* send head */ 197 - if (slen == xdr->head[0].iov_len) 198 - flags = 0; 199 - len = kernel_sendpage(sock, headpage, headoffset, 200 - xdr->head[0].iov_len, flags); 201 - if (len != xdr->head[0].iov_len) 202 - goto out; 203 - slen -= xdr->head[0].iov_len; 204 - if (slen == 0) 205 - goto out; 206 - 207 - /* send page data */ 208 - size = PAGE_SIZE - base < pglen ? PAGE_SIZE - base : pglen; 209 - while (pglen > 0) { 210 - if (slen == size) 211 - flags = 0; 212 - result = kernel_sendpage(sock, *ppage, base, size, flags); 213 - if (result > 0) 214 - len += result; 215 - if (result != size) 216 - goto out; 217 - slen -= size; 218 - pglen -= size; 219 - size = PAGE_SIZE < pglen ? PAGE_SIZE : pglen; 220 - base = 0; 221 - ppage++; 222 - } 223 - 224 - /* send tail */ 225 - if (xdr->tail[0].iov_len) { 226 - result = kernel_sendpage(sock, tailpage, tailoffset, 227 - xdr->tail[0].iov_len, 0); 228 - if (result > 0) 229 - len += result; 230 - } 231 - 232 - out: 233 - return len; 234 - } 235 - 236 - 237 - /* 238 - * Generic sendto routine 239 - */ 240 - static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) 241 - { 242 - struct svc_sock *svsk = 243 - container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); 244 - struct socket *sock = svsk->sk_sock; 245 - union { 246 - struct cmsghdr hdr; 247 - long all[SVC_PKTINFO_SPACE / sizeof(long)]; 248 - } buffer; 249 - struct cmsghdr *cmh = &buffer.hdr; 250 - int len = 0; 251 - unsigned long tailoff; 252 - unsigned long headoff; 253 - RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); 254 - 255 - if (rqstp->rq_prot == IPPROTO_UDP) { 256 - struct msghdr msg = { 257 - .msg_name = &rqstp->rq_addr, 258 - .msg_namelen = rqstp->rq_addrlen, 259 - .msg_control = cmh, 260 - .msg_controllen = sizeof(buffer), 261 - .msg_flags = MSG_MORE, 262 - }; 263 - 264 - svc_set_cmsg_data(rqstp, cmh); 265 - 266 - if (sock_sendmsg(sock, &msg) < 0) 267 - goto out; 268 - } 269 - 270 - tailoff = ((unsigned long)xdr->tail[0].iov_base) & (PAGE_SIZE-1); 271 - headoff = 0; 272 - len = svc_send_common(sock, xdr, rqstp->rq_respages[0], headoff, 273 - rqstp->rq_respages[0], tailoff); 274 - 275 - out: 276 - dprintk("svc: socket %p sendto([%p %zu... ], %d) = %d (addr %s)\n", 277 - svsk, xdr->head[0].iov_base, xdr->head[0].iov_len, 278 - xdr->len, len, svc_print_addr(rqstp, buf, sizeof(buf))); 279 - 280 - return len; 180 + return 0; 281 181 } 282 182 283 183 /* ··· 502 600 return 0; 503 601 } 504 602 505 - static int 506 - svc_udp_sendto(struct svc_rqst *rqstp) 603 + /** 604 + * svc_udp_sendto - Send out a reply on a UDP socket 605 + * @rqstp: completed svc_rqst 606 + * 607 + * Returns the number of bytes sent, or a negative errno. 608 + */ 609 + static int svc_udp_sendto(struct svc_rqst *rqstp) 507 610 { 508 - int error; 611 + struct svc_xprt *xprt = rqstp->rq_xprt; 612 + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); 613 + struct xdr_buf *xdr = &rqstp->rq_res; 614 + union { 615 + struct cmsghdr hdr; 616 + long all[SVC_PKTINFO_SPACE / sizeof(long)]; 617 + } buffer; 618 + struct cmsghdr *cmh = &buffer.hdr; 619 + struct msghdr msg = { 620 + .msg_name = &rqstp->rq_addr, 621 + .msg_namelen = rqstp->rq_addrlen, 622 + .msg_control = cmh, 623 + .msg_controllen = sizeof(buffer), 624 + }; 625 + unsigned int uninitialized_var(sent); 626 + int err; 509 627 510 - error = svc_sendto(rqstp, &rqstp->rq_res); 511 - if (error == -ECONNREFUSED) 628 + svc_set_cmsg_data(rqstp, cmh); 629 + 630 + err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent); 631 + xdr_free_bvec(xdr); 632 + if (err == -ECONNREFUSED) { 512 633 /* ICMP error on earlier request. */ 513 - error = svc_sendto(rqstp, &rqstp->rq_res); 514 - 515 - return error; 634 + err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent); 635 + xdr_free_bvec(xdr); 636 + } 637 + if (err < 0) 638 + return err; 639 + return sent; 516 640 } 517 641 518 642 static int svc_udp_has_wspace(struct svc_xprt *xprt) ··· 581 653 .xpo_create = svc_udp_create, 582 654 .xpo_recvfrom = svc_udp_recvfrom, 583 655 .xpo_sendto = svc_udp_sendto, 656 + .xpo_read_payload = svc_sock_read_payload, 584 657 .xpo_release_rqst = svc_release_udp_skb, 585 658 .xpo_detach = svc_sock_detach, 586 659 .xpo_free = svc_sock_free, ··· 1057 1128 return 0; /* record not complete */ 1058 1129 } 1059 1130 1060 - /* 1061 - * Send out data on TCP socket. 1131 + /** 1132 + * svc_tcp_sendto - Send out a reply on a TCP socket 1133 + * @rqstp: completed svc_rqst 1134 + * 1135 + * Returns the number of bytes sent, or a negative errno. 1062 1136 */ 1063 1137 static int svc_tcp_sendto(struct svc_rqst *rqstp) 1064 1138 { 1065 - struct xdr_buf *xbufp = &rqstp->rq_res; 1066 - int sent; 1067 - __be32 reclen; 1139 + struct svc_xprt *xprt = rqstp->rq_xprt; 1140 + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); 1141 + struct xdr_buf *xdr = &rqstp->rq_res; 1142 + rpc_fraghdr marker = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | 1143 + (u32)xdr->len); 1144 + struct msghdr msg = { 1145 + .msg_flags = 0, 1146 + }; 1147 + unsigned int uninitialized_var(sent); 1148 + int err; 1068 1149 1069 - /* Set up the first element of the reply kvec. 1070 - * Any other kvecs that may be in use have been taken 1071 - * care of by the server implementation itself. 1072 - */ 1073 - reclen = htonl(0x80000000|((xbufp->len ) - 4)); 1074 - memcpy(xbufp->head[0].iov_base, &reclen, 4); 1075 - 1076 - sent = svc_sendto(rqstp, &rqstp->rq_res); 1077 - if (sent != xbufp->len) { 1078 - printk(KERN_NOTICE 1079 - "rpc-srv/tcp: %s: %s %d when sending %d bytes " 1080 - "- shutting down socket\n", 1081 - rqstp->rq_xprt->xpt_server->sv_name, 1082 - (sent<0)?"got error":"sent only", 1083 - sent, xbufp->len); 1084 - set_bit(XPT_CLOSE, &rqstp->rq_xprt->xpt_flags); 1085 - svc_xprt_enqueue(rqstp->rq_xprt); 1086 - sent = -EAGAIN; 1087 - } 1150 + err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, marker, &sent); 1151 + xdr_free_bvec(xdr); 1152 + if (err < 0 || sent != (xdr->len + sizeof(marker))) 1153 + goto out_close; 1088 1154 return sent; 1155 + 1156 + out_close: 1157 + pr_notice("rpc-srv/tcp: %s: %s %d when sending %d bytes - shutting down socket\n", 1158 + xprt->xpt_server->sv_name, 1159 + (err < 0) ? "got error" : "sent", 1160 + (err < 0) ? err : sent, xdr->len); 1161 + set_bit(XPT_CLOSE, &xprt->xpt_flags); 1162 + svc_xprt_enqueue(xprt); 1163 + return -EAGAIN; 1089 1164 } 1090 1165 1091 1166 static struct svc_xprt *svc_tcp_create(struct svc_serv *serv, ··· 1104 1171 .xpo_create = svc_tcp_create, 1105 1172 .xpo_recvfrom = svc_tcp_recvfrom, 1106 1173 .xpo_sendto = svc_tcp_sendto, 1174 + .xpo_read_payload = svc_sock_read_payload, 1107 1175 .xpo_release_rqst = svc_release_skb, 1108 1176 .xpo_detach = svc_tcp_sock_detach, 1109 1177 .xpo_free = svc_sock_free,
+1 -2
net/sunrpc/xprt.c
··· 1117 1117 struct rpc_rqst *req = task->tk_rqstp; 1118 1118 struct rpc_xprt *xprt = req->rq_xprt; 1119 1119 1120 - dprintk("RPC: %5u xid %08x complete (%d bytes received)\n", 1121 - task->tk_pid, ntohl(req->rq_xid), copied); 1122 1120 trace_xprt_complete_rqst(xprt, req->rq_xid, copied); 1123 1121 1124 1122 xprt->stat.recvs++; ··· 1460 1462 */ 1461 1463 req->rq_ntrans++; 1462 1464 1465 + trace_xprt_sendto(&req->rq_snd_buf); 1463 1466 connect_cookie = xprt->connect_cookie; 1464 1467 status = xprt->ops->send_request(req); 1465 1468 if (status != 0) {
+5 -31
net/sunrpc/xprtrdma/rpc_rdma.c
··· 275 275 return n; 276 276 } 277 277 278 - static inline int 279 - encode_item_present(struct xdr_stream *xdr) 280 - { 281 - __be32 *p; 282 - 283 - p = xdr_reserve_space(xdr, sizeof(*p)); 284 - if (unlikely(!p)) 285 - return -EMSGSIZE; 286 - 287 - *p = xdr_one; 288 - return 0; 289 - } 290 - 291 - static inline int 292 - encode_item_not_present(struct xdr_stream *xdr) 293 - { 294 - __be32 *p; 295 - 296 - p = xdr_reserve_space(xdr, sizeof(*p)); 297 - if (unlikely(!p)) 298 - return -EMSGSIZE; 299 - 300 - *p = xdr_zero; 301 - return 0; 302 - } 303 - 304 278 static void 305 279 xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mr *mr) 306 280 { ··· 388 414 } while (nsegs); 389 415 390 416 done: 391 - return encode_item_not_present(xdr); 417 + return xdr_stream_encode_item_absent(xdr); 392 418 } 393 419 394 420 /* Register and XDR encode the Write list. Supports encoding a list ··· 427 453 if (nsegs < 0) 428 454 return nsegs; 429 455 430 - if (encode_item_present(xdr) < 0) 456 + if (xdr_stream_encode_item_present(xdr) < 0) 431 457 return -EMSGSIZE; 432 458 segcount = xdr_reserve_space(xdr, sizeof(*segcount)); 433 459 if (unlikely(!segcount)) ··· 454 480 *segcount = cpu_to_be32(nchunks); 455 481 456 482 done: 457 - return encode_item_not_present(xdr); 483 + return xdr_stream_encode_item_absent(xdr); 458 484 } 459 485 460 486 /* Register and XDR encode the Reply chunk. Supports encoding an array ··· 481 507 __be32 *segcount; 482 508 483 509 if (wtype != rpcrdma_replych) 484 - return encode_item_not_present(xdr); 510 + return xdr_stream_encode_item_absent(xdr); 485 511 486 512 seg = req->rl_segments; 487 513 nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg); 488 514 if (nsegs < 0) 489 515 return nsegs; 490 516 491 - if (encode_item_present(xdr) < 0) 517 + if (xdr_stream_encode_item_present(xdr) < 0) 492 518 return -EMSGSIZE; 493 519 segcount = xdr_reserve_space(xdr, sizeof(*segcount)); 494 520 if (unlikely(!segcount))
+10 -7
net/sunrpc/xprtrdma/svc_rdma_backchannel.c
··· 117 117 { 118 118 int ret; 119 119 120 - ret = svc_rdma_map_reply_msg(rdma, ctxt, &rqst->rq_snd_buf, NULL); 120 + ret = svc_rdma_map_reply_msg(rdma, ctxt, NULL, &rqst->rq_snd_buf); 121 121 if (ret < 0) 122 122 return -EIO; 123 123 ··· 181 181 if (!ctxt) 182 182 goto drop_connection; 183 183 184 - p = ctxt->sc_xprt_buf; 184 + p = xdr_reserve_space(&ctxt->sc_stream, RPCRDMA_HDRLEN_MIN); 185 + if (!p) 186 + goto put_ctxt; 185 187 *p++ = rqst->rq_xid; 186 188 *p++ = rpcrdma_version; 187 189 *p++ = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests); ··· 191 189 *p++ = xdr_zero; 192 190 *p++ = xdr_zero; 193 191 *p = xdr_zero; 194 - svc_rdma_sync_reply_hdr(rdma, ctxt, RPCRDMA_HDRLEN_MIN); 195 192 196 193 #ifdef SVCRDMA_BACKCHANNEL_DEBUG 197 194 pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer); ··· 198 197 199 198 rqst->rq_xtime = ktime_get(); 200 199 rc = svc_rdma_bc_sendto(rdma, rqst, ctxt); 201 - if (rc) { 202 - svc_rdma_send_ctxt_put(rdma, ctxt); 203 - goto drop_connection; 204 - } 200 + if (rc) 201 + goto put_ctxt; 205 202 return 0; 203 + 204 + put_ctxt: 205 + svc_rdma_send_ctxt_put(rdma, ctxt); 206 206 207 207 drop_connection: 208 208 dprintk("svcrdma: failed to send bc call\n"); ··· 252 250 { 253 251 dprintk("svcrdma: %s: xprt %p\n", __func__, xprt); 254 252 253 + xprt_rdma_free_addresses(xprt); 255 254 xprt_free(xprt); 256 255 } 257 256
+155 -91
net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
··· 193 193 194 194 out: 195 195 ctxt->rc_page_count = 0; 196 + ctxt->rc_read_payload_length = 0; 196 197 return ctxt; 197 198 198 199 out_empty: ··· 358 357 arg->len = ctxt->rc_byte_len; 359 358 } 360 359 361 - /* This accommodates the largest possible Write chunk, 362 - * in one segment. 360 + /* This accommodates the largest possible Write chunk. 363 361 */ 364 - #define MAX_BYTES_WRITE_SEG ((u32)(RPCSVC_MAXPAGES << PAGE_SHIFT)) 362 + #define MAX_BYTES_WRITE_CHUNK ((u32)(RPCSVC_MAXPAGES << PAGE_SHIFT)) 365 363 366 364 /* This accommodates the largest possible Position-Zero 367 - * Read chunk or Reply chunk, in one segment. 365 + * Read chunk or Reply chunk. 368 366 */ 369 - #define MAX_BYTES_SPECIAL_SEG ((u32)((RPCSVC_MAXPAGES + 2) << PAGE_SHIFT)) 367 + #define MAX_BYTES_SPECIAL_CHUNK ((u32)((RPCSVC_MAXPAGES + 2) << PAGE_SHIFT)) 370 368 371 369 /* Sanity check the Read list. 372 370 * ··· 373 373 * - This implementation supports only one Read chunk. 374 374 * 375 375 * Sanity checks: 376 - * - Read list does not overflow buffer. 376 + * - Read list does not overflow Receive buffer. 377 377 * - Segment size limited by largest NFS data payload. 378 378 * 379 379 * The segment count is limited to how many segments can ··· 381 381 * buffer. That's about 40 Read segments for a 1KB inline 382 382 * threshold. 383 383 * 384 - * Returns pointer to the following Write list. 384 + * Return values: 385 + * %true: Read list is valid. @rctxt's xdr_stream is updated 386 + * to point to the first byte past the Read list. 387 + * %false: Read list is corrupt. @rctxt's xdr_stream is left 388 + * in an unknown state. 385 389 */ 386 - static __be32 *xdr_check_read_list(__be32 *p, const __be32 *end) 390 + static bool xdr_check_read_list(struct svc_rdma_recv_ctxt *rctxt) 387 391 { 388 - u32 position; 392 + u32 position, len; 389 393 bool first; 394 + __be32 *p; 390 395 396 + p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p)); 397 + if (!p) 398 + return false; 399 + 400 + len = 0; 391 401 first = true; 392 - while (*p++ != xdr_zero) { 393 - if (first) { 394 - position = be32_to_cpup(p++); 395 - first = false; 396 - } else if (be32_to_cpup(p++) != position) { 397 - return NULL; 398 - } 399 - p++; /* handle */ 400 - if (be32_to_cpup(p++) > MAX_BYTES_SPECIAL_SEG) 401 - return NULL; 402 - p += 2; /* offset */ 402 + while (*p != xdr_zero) { 403 + p = xdr_inline_decode(&rctxt->rc_stream, 404 + rpcrdma_readseg_maxsz * sizeof(*p)); 405 + if (!p) 406 + return false; 403 407 404 - if (p > end) 405 - return NULL; 408 + if (first) { 409 + position = be32_to_cpup(p); 410 + first = false; 411 + } else if (be32_to_cpup(p) != position) { 412 + return false; 413 + } 414 + p += 2; 415 + len += be32_to_cpup(p); 416 + 417 + p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p)); 418 + if (!p) 419 + return false; 406 420 } 407 - return p; 421 + return len <= MAX_BYTES_SPECIAL_CHUNK; 408 422 } 409 423 410 424 /* The segment count is limited to how many segments can ··· 426 412 * buffer. That's about 60 Write segments for a 1KB inline 427 413 * threshold. 428 414 */ 429 - static __be32 *xdr_check_write_chunk(__be32 *p, const __be32 *end, 430 - u32 maxlen) 415 + static bool xdr_check_write_chunk(struct svc_rdma_recv_ctxt *rctxt, u32 maxlen) 431 416 { 432 - u32 i, segcount; 417 + u32 i, segcount, total; 418 + __be32 *p; 433 419 434 - segcount = be32_to_cpup(p++); 420 + p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p)); 421 + if (!p) 422 + return false; 423 + segcount = be32_to_cpup(p); 424 + 425 + total = 0; 435 426 for (i = 0; i < segcount; i++) { 436 - p++; /* handle */ 437 - if (be32_to_cpup(p++) > maxlen) 438 - return NULL; 439 - p += 2; /* offset */ 427 + u32 handle, length; 428 + u64 offset; 440 429 441 - if (p > end) 442 - return NULL; 430 + p = xdr_inline_decode(&rctxt->rc_stream, 431 + rpcrdma_segment_maxsz * sizeof(*p)); 432 + if (!p) 433 + return false; 434 + 435 + handle = be32_to_cpup(p++); 436 + length = be32_to_cpup(p++); 437 + xdr_decode_hyper(p, &offset); 438 + trace_svcrdma_decode_wseg(handle, length, offset); 439 + 440 + total += length; 443 441 } 444 - 445 - return p; 442 + return total <= maxlen; 446 443 } 447 444 448 445 /* Sanity check the Write list. 449 446 * 450 447 * Implementation limits: 451 - * - This implementation supports only one Write chunk. 448 + * - This implementation currently supports only one Write chunk. 452 449 * 453 450 * Sanity checks: 454 - * - Write list does not overflow buffer. 455 - * - Segment size limited by largest NFS data payload. 451 + * - Write list does not overflow Receive buffer. 452 + * - Chunk size limited by largest NFS data payload. 456 453 * 457 - * Returns pointer to the following Reply chunk. 454 + * Return values: 455 + * %true: Write list is valid. @rctxt's xdr_stream is updated 456 + * to point to the first byte past the Write list. 457 + * %false: Write list is corrupt. @rctxt's xdr_stream is left 458 + * in an unknown state. 458 459 */ 459 - static __be32 *xdr_check_write_list(__be32 *p, const __be32 *end) 460 + static bool xdr_check_write_list(struct svc_rdma_recv_ctxt *rctxt) 460 461 { 461 - u32 chcount; 462 + u32 chcount = 0; 463 + __be32 *p; 462 464 463 - chcount = 0; 464 - while (*p++ != xdr_zero) { 465 - p = xdr_check_write_chunk(p, end, MAX_BYTES_WRITE_SEG); 465 + p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p)); 466 + if (!p) 467 + return false; 468 + rctxt->rc_write_list = p; 469 + while (*p != xdr_zero) { 470 + if (!xdr_check_write_chunk(rctxt, MAX_BYTES_WRITE_CHUNK)) 471 + return false; 472 + ++chcount; 473 + p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p)); 466 474 if (!p) 467 - return NULL; 468 - if (chcount++ > 1) 469 - return NULL; 475 + return false; 470 476 } 471 - return p; 477 + if (!chcount) 478 + rctxt->rc_write_list = NULL; 479 + return chcount < 2; 472 480 } 473 481 474 482 /* Sanity check the Reply chunk. 475 483 * 476 484 * Sanity checks: 477 - * - Reply chunk does not overflow buffer. 478 - * - Segment size limited by largest NFS data payload. 485 + * - Reply chunk does not overflow Receive buffer. 486 + * - Chunk size limited by largest NFS data payload. 479 487 * 480 - * Returns pointer to the following RPC header. 488 + * Return values: 489 + * %true: Reply chunk is valid. @rctxt's xdr_stream is updated 490 + * to point to the first byte past the Reply chunk. 491 + * %false: Reply chunk is corrupt. @rctxt's xdr_stream is left 492 + * in an unknown state. 481 493 */ 482 - static __be32 *xdr_check_reply_chunk(__be32 *p, const __be32 *end) 494 + static bool xdr_check_reply_chunk(struct svc_rdma_recv_ctxt *rctxt) 483 495 { 484 - if (*p++ != xdr_zero) { 485 - p = xdr_check_write_chunk(p, end, MAX_BYTES_SPECIAL_SEG); 486 - if (!p) 487 - return NULL; 496 + __be32 *p; 497 + 498 + p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p)); 499 + if (!p) 500 + return false; 501 + rctxt->rc_reply_chunk = p; 502 + if (*p != xdr_zero) { 503 + if (!xdr_check_write_chunk(rctxt, MAX_BYTES_SPECIAL_CHUNK)) 504 + return false; 505 + } else { 506 + rctxt->rc_reply_chunk = NULL; 488 507 } 489 - return p; 508 + return true; 490 509 } 491 510 492 511 /* RPC-over-RDMA Version One private extension: Remote Invalidation. ··· 584 537 ctxt->rc_inv_rkey = be32_to_cpu(inv_rkey); 585 538 } 586 539 587 - /* On entry, xdr->head[0].iov_base points to first byte in the 588 - * RPC-over-RDMA header. 540 + /** 541 + * svc_rdma_xdr_decode_req - Decode the transport header 542 + * @rq_arg: xdr_buf containing ingress RPC/RDMA message 543 + * @rctxt: state of decoding 544 + * 545 + * On entry, xdr->head[0].iov_base points to first byte of the 546 + * RPC-over-RDMA transport header. 589 547 * 590 548 * On successful exit, head[0] points to first byte past the 591 549 * RPC-over-RDMA header. For RDMA_MSG, this is the RPC message. 550 + * 592 551 * The length of the RPC-over-RDMA header is returned. 593 552 * 594 553 * Assumptions: 595 554 * - The transport header is entirely contained in the head iovec. 596 555 */ 597 - static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg) 556 + static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg, 557 + struct svc_rdma_recv_ctxt *rctxt) 598 558 { 599 - __be32 *p, *end, *rdma_argp; 559 + __be32 *p, *rdma_argp; 600 560 unsigned int hdr_len; 601 561 602 - /* Verify that there's enough bytes for header + something */ 603 - if (rq_arg->len <= RPCRDMA_HDRLEN_ERR) 604 - goto out_short; 605 - 606 562 rdma_argp = rq_arg->head[0].iov_base; 607 - if (*(rdma_argp + 1) != rpcrdma_version) 608 - goto out_version; 563 + xdr_init_decode(&rctxt->rc_stream, rq_arg, rdma_argp, NULL); 609 564 610 - switch (*(rdma_argp + 3)) { 565 + p = xdr_inline_decode(&rctxt->rc_stream, 566 + rpcrdma_fixed_maxsz * sizeof(*p)); 567 + if (unlikely(!p)) 568 + goto out_short; 569 + p++; 570 + if (*p != rpcrdma_version) 571 + goto out_version; 572 + p += 2; 573 + switch (*p) { 611 574 case rdma_msg: 612 575 break; 613 576 case rdma_nomsg: 614 577 break; 615 - 616 578 case rdma_done: 617 579 goto out_drop; 618 - 619 580 case rdma_error: 620 581 goto out_drop; 621 - 622 582 default: 623 583 goto out_proc; 624 584 } 625 585 626 - end = (__be32 *)((unsigned long)rdma_argp + rq_arg->len); 627 - p = xdr_check_read_list(rdma_argp + 4, end); 628 - if (!p) 586 + if (!xdr_check_read_list(rctxt)) 629 587 goto out_inval; 630 - p = xdr_check_write_list(p, end); 631 - if (!p) 588 + if (!xdr_check_write_list(rctxt)) 632 589 goto out_inval; 633 - p = xdr_check_reply_chunk(p, end); 634 - if (!p) 635 - goto out_inval; 636 - if (p > end) 590 + if (!xdr_check_reply_chunk(rctxt)) 637 591 goto out_inval; 638 592 639 - rq_arg->head[0].iov_base = p; 640 - hdr_len = (unsigned long)p - (unsigned long)rdma_argp; 593 + rq_arg->head[0].iov_base = rctxt->rc_stream.p; 594 + hdr_len = xdr_stream_pos(&rctxt->rc_stream); 641 595 rq_arg->head[0].iov_len -= hdr_len; 642 596 rq_arg->len -= hdr_len; 643 597 trace_svcrdma_decode_rqst(rdma_argp, hdr_len); ··· 698 650 __be32 *rdma_argp, int status) 699 651 { 700 652 struct svc_rdma_send_ctxt *ctxt; 701 - unsigned int length; 702 653 __be32 *p; 703 654 int ret; 704 655 ··· 705 658 if (!ctxt) 706 659 return; 707 660 708 - p = ctxt->sc_xprt_buf; 661 + p = xdr_reserve_space(&ctxt->sc_stream, 662 + rpcrdma_fixed_maxsz * sizeof(*p)); 663 + if (!p) 664 + goto put_ctxt; 665 + 709 666 *p++ = *rdma_argp; 710 667 *p++ = *(rdma_argp + 1); 711 668 *p++ = xprt->sc_fc_credits; 712 - *p++ = rdma_error; 669 + *p = rdma_error; 670 + 713 671 switch (status) { 714 672 case -EPROTONOSUPPORT: 673 + p = xdr_reserve_space(&ctxt->sc_stream, 3 * sizeof(*p)); 674 + if (!p) 675 + goto put_ctxt; 676 + 715 677 *p++ = err_vers; 716 678 *p++ = rpcrdma_version; 717 - *p++ = rpcrdma_version; 679 + *p = rpcrdma_version; 718 680 trace_svcrdma_err_vers(*rdma_argp); 719 681 break; 720 682 default: 721 - *p++ = err_chunk; 683 + p = xdr_reserve_space(&ctxt->sc_stream, sizeof(*p)); 684 + if (!p) 685 + goto put_ctxt; 686 + 687 + *p = err_chunk; 722 688 trace_svcrdma_err_chunk(*rdma_argp); 723 689 } 724 - length = (unsigned long)p - (unsigned long)ctxt->sc_xprt_buf; 725 - svc_rdma_sync_reply_hdr(xprt, ctxt, length); 726 690 691 + ctxt->sc_send_wr.num_sge = 1; 727 692 ctxt->sc_send_wr.opcode = IB_WR_SEND; 693 + ctxt->sc_sges[0].length = ctxt->sc_hdrbuf.len; 728 694 ret = svc_rdma_send(xprt, &ctxt->sc_send_wr); 729 695 if (ret) 730 - svc_rdma_send_ctxt_put(xprt, ctxt); 696 + goto put_ctxt; 697 + return; 698 + 699 + put_ctxt: 700 + svc_rdma_send_ctxt_put(xprt, ctxt); 731 701 } 732 702 733 703 /* By convention, backchannel calls arrive via rdma_msg type ··· 849 785 rqstp->rq_next_page = rqstp->rq_respages; 850 786 851 787 p = (__be32 *)rqstp->rq_arg.head[0].iov_base; 852 - ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg); 788 + ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg, ctxt); 853 789 if (ret < 0) 854 790 goto out_err; 855 791 if (ret == 0)
+32 -25
net/sunrpc/xprtrdma/svc_rdma_rw.c
··· 41 41 struct rdma_rw_ctx rw_ctx; 42 42 int rw_nents; 43 43 struct sg_table rw_sg_table; 44 - struct scatterlist rw_first_sgl[0]; 44 + struct scatterlist rw_first_sgl[]; 45 45 }; 46 46 47 47 static inline struct svc_rdma_rw_ctxt * ··· 439 439 if (ret < 0) 440 440 goto out_initerr; 441 441 442 - trace_svcrdma_encode_wseg(seg_handle, write_len, seg_offset); 442 + trace_svcrdma_send_wseg(seg_handle, write_len, seg_offset); 443 + 443 444 list_add(&ctxt->rw_list, &cc->cc_rwctxts); 444 445 cc->cc_sqecount += ret; 445 446 if (write_len == seg_length - info->wi_seg_off) { ··· 483 482 vec->iov_len); 484 483 } 485 484 486 - /* Send an xdr_buf's page list by itself. A Write chunk is 487 - * just the page list. a Reply chunk is the head, page list, 488 - * and tail. This function is shared between the two types 489 - * of chunk. 485 + /* Send an xdr_buf's page list by itself. A Write chunk is just 486 + * the page list. A Reply chunk is @xdr's head, page list, and 487 + * tail. This function is shared between the two types of chunk. 490 488 */ 491 489 static int svc_rdma_send_xdr_pagelist(struct svc_rdma_write_info *info, 492 - struct xdr_buf *xdr) 490 + struct xdr_buf *xdr, 491 + unsigned int offset, 492 + unsigned long length) 493 493 { 494 494 info->wi_xdr = xdr; 495 - info->wi_next_off = 0; 495 + info->wi_next_off = offset - xdr->head[0].iov_len; 496 496 return svc_rdma_build_writes(info, svc_rdma_pagelist_to_sg, 497 - xdr->page_len); 497 + length); 498 498 } 499 499 500 500 /** ··· 503 501 * @rdma: controlling RDMA transport 504 502 * @wr_ch: Write chunk provided by client 505 503 * @xdr: xdr_buf containing the data payload 504 + * @offset: payload's byte offset in @xdr 505 + * @length: size of payload, in bytes 506 506 * 507 507 * Returns a non-negative number of bytes the chunk consumed, or 508 508 * %-E2BIG if the payload was larger than the Write chunk, ··· 514 510 * %-EIO if rdma_rw initialization failed (DMA mapping, etc). 515 511 */ 516 512 int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, __be32 *wr_ch, 517 - struct xdr_buf *xdr) 513 + struct xdr_buf *xdr, 514 + unsigned int offset, unsigned long length) 518 515 { 519 516 struct svc_rdma_write_info *info; 520 517 int ret; 521 518 522 - if (!xdr->page_len) 519 + if (!length) 523 520 return 0; 524 521 525 522 info = svc_rdma_write_info_alloc(rdma, wr_ch); 526 523 if (!info) 527 524 return -ENOMEM; 528 525 529 - ret = svc_rdma_send_xdr_pagelist(info, xdr); 526 + ret = svc_rdma_send_xdr_pagelist(info, xdr, offset, length); 530 527 if (ret < 0) 531 528 goto out_err; 532 529 ··· 535 530 if (ret < 0) 536 531 goto out_err; 537 532 538 - trace_svcrdma_encode_write(xdr->page_len); 539 - return xdr->page_len; 533 + trace_svcrdma_send_write_chunk(xdr->page_len); 534 + return length; 540 535 541 536 out_err: 542 537 svc_rdma_write_info_free(info); ··· 546 541 /** 547 542 * svc_rdma_send_reply_chunk - Write all segments in the Reply chunk 548 543 * @rdma: controlling RDMA transport 549 - * @rp_ch: Reply chunk provided by client 550 - * @writelist: true if client provided a Write list 544 + * @rctxt: Write and Reply chunks from client 551 545 * @xdr: xdr_buf containing an RPC Reply 552 546 * 553 547 * Returns a non-negative number of bytes the chunk consumed, or ··· 556 552 * %-ENOTCONN if posting failed (connection is lost), 557 553 * %-EIO if rdma_rw initialization failed (DMA mapping, etc). 558 554 */ 559 - int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, __be32 *rp_ch, 560 - bool writelist, struct xdr_buf *xdr) 555 + int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, 556 + const struct svc_rdma_recv_ctxt *rctxt, 557 + struct xdr_buf *xdr) 561 558 { 562 559 struct svc_rdma_write_info *info; 563 560 int consumed, ret; 564 561 565 - info = svc_rdma_write_info_alloc(rdma, rp_ch); 562 + info = svc_rdma_write_info_alloc(rdma, rctxt->rc_reply_chunk); 566 563 if (!info) 567 564 return -ENOMEM; 568 565 ··· 575 570 /* Send the page list in the Reply chunk only if the 576 571 * client did not provide Write chunks. 577 572 */ 578 - if (!writelist && xdr->page_len) { 579 - ret = svc_rdma_send_xdr_pagelist(info, xdr); 573 + if (!rctxt->rc_write_list && xdr->page_len) { 574 + ret = svc_rdma_send_xdr_pagelist(info, xdr, 575 + xdr->head[0].iov_len, 576 + xdr->page_len); 580 577 if (ret < 0) 581 578 goto out_err; 582 579 consumed += xdr->page_len; ··· 595 588 if (ret < 0) 596 589 goto out_err; 597 590 598 - trace_svcrdma_encode_reply(consumed); 591 + trace_svcrdma_send_reply_chunk(consumed); 599 592 return consumed; 600 593 601 594 out_err: ··· 698 691 if (ret < 0) 699 692 break; 700 693 701 - trace_svcrdma_encode_rseg(rs_handle, rs_length, rs_offset); 694 + trace_svcrdma_send_rseg(rs_handle, rs_length, rs_offset); 702 695 info->ri_chunklen += rs_length; 703 696 } 704 697 ··· 729 722 if (ret < 0) 730 723 goto out; 731 724 732 - trace_svcrdma_encode_read(info->ri_chunklen, info->ri_position); 725 + trace_svcrdma_send_read_chunk(info->ri_chunklen, info->ri_position); 733 726 734 727 head->rc_hdr_count = 0; 735 728 ··· 785 778 if (ret < 0) 786 779 goto out; 787 780 788 - trace_svcrdma_encode_pzr(info->ri_chunklen); 781 + trace_svcrdma_send_pzr(info->ri_chunklen); 789 782 790 783 head->rc_arg.len += info->ri_chunklen; 791 784 head->rc_arg.buflen += info->ri_chunklen;
+302 -226
net/sunrpc/xprtrdma/svc_rdma_sendto.c
··· 151 151 ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED; 152 152 ctxt->sc_cqe.done = svc_rdma_wc_send; 153 153 ctxt->sc_xprt_buf = buffer; 154 + xdr_buf_init(&ctxt->sc_hdrbuf, ctxt->sc_xprt_buf, 155 + rdma->sc_max_req_size); 154 156 ctxt->sc_sges[0].addr = addr; 155 157 156 158 for (i = 0; i < rdma->sc_max_send_sges; i++) ··· 206 204 spin_unlock(&rdma->sc_send_lock); 207 205 208 206 out: 207 + rpcrdma_set_xdrlen(&ctxt->sc_hdrbuf, 0); 208 + xdr_init_encode(&ctxt->sc_stream, &ctxt->sc_hdrbuf, 209 + ctxt->sc_xprt_buf, NULL); 210 + 209 211 ctxt->sc_send_wr.num_sge = 0; 210 212 ctxt->sc_cur_sge_no = 0; 211 213 ctxt->sc_page_count = 0; ··· 301 295 302 296 might_sleep(); 303 297 298 + /* Sync the transport header buffer */ 299 + ib_dma_sync_single_for_device(rdma->sc_pd->device, 300 + wr->sg_list[0].addr, 301 + wr->sg_list[0].length, 302 + DMA_TO_DEVICE); 303 + 304 304 /* If the SQ is full, wait until an SQ entry is available */ 305 305 while (1) { 306 306 if ((atomic_dec_return(&rdma->sc_sq_avail) < 0)) { ··· 334 322 return ret; 335 323 } 336 324 337 - static u32 xdr_padsize(u32 len) 338 - { 339 - return (len & 3) ? (4 - (len & 3)) : 0; 340 - } 341 - 342 - /* Returns length of transport header, in bytes. 343 - */ 344 - static unsigned int svc_rdma_reply_hdr_len(__be32 *rdma_resp) 345 - { 346 - unsigned int nsegs; 347 - __be32 *p; 348 - 349 - p = rdma_resp; 350 - 351 - /* RPC-over-RDMA V1 replies never have a Read list. */ 352 - p += rpcrdma_fixed_maxsz + 1; 353 - 354 - /* Skip Write list. */ 355 - while (*p++ != xdr_zero) { 356 - nsegs = be32_to_cpup(p++); 357 - p += nsegs * rpcrdma_segment_maxsz; 358 - } 359 - 360 - /* Skip Reply chunk. */ 361 - if (*p++ != xdr_zero) { 362 - nsegs = be32_to_cpup(p++); 363 - p += nsegs * rpcrdma_segment_maxsz; 364 - } 365 - 366 - return (unsigned long)p - (unsigned long)rdma_resp; 367 - } 368 - 369 - /* One Write chunk is copied from Call transport header to Reply 370 - * transport header. Each segment's length field is updated to 371 - * reflect number of bytes consumed in the segment. 325 + /** 326 + * svc_rdma_encode_read_list - Encode RPC Reply's Read chunk list 327 + * @sctxt: Send context for the RPC Reply 372 328 * 373 - * Returns number of segments in this chunk. 329 + * Return values: 330 + * On success, returns length in bytes of the Reply XDR buffer 331 + * that was consumed by the Reply Read list 332 + * %-EMSGSIZE on XDR buffer overflow 374 333 */ 375 - static unsigned int xdr_encode_write_chunk(__be32 *dst, __be32 *src, 334 + static ssize_t svc_rdma_encode_read_list(struct svc_rdma_send_ctxt *sctxt) 335 + { 336 + /* RPC-over-RDMA version 1 replies never have a Read list. */ 337 + return xdr_stream_encode_item_absent(&sctxt->sc_stream); 338 + } 339 + 340 + /** 341 + * svc_rdma_encode_write_segment - Encode one Write segment 342 + * @src: matching Write chunk in the RPC Call header 343 + * @sctxt: Send context for the RPC Reply 344 + * @remaining: remaining bytes of the payload left in the Write chunk 345 + * 346 + * Return values: 347 + * On success, returns length in bytes of the Reply XDR buffer 348 + * that was consumed by the Write segment 349 + * %-EMSGSIZE on XDR buffer overflow 350 + */ 351 + static ssize_t svc_rdma_encode_write_segment(__be32 *src, 352 + struct svc_rdma_send_ctxt *sctxt, 353 + unsigned int *remaining) 354 + { 355 + __be32 *p; 356 + const size_t len = rpcrdma_segment_maxsz * sizeof(*p); 357 + u32 handle, length; 358 + u64 offset; 359 + 360 + p = xdr_reserve_space(&sctxt->sc_stream, len); 361 + if (!p) 362 + return -EMSGSIZE; 363 + 364 + handle = be32_to_cpup(src++); 365 + length = be32_to_cpup(src++); 366 + xdr_decode_hyper(src, &offset); 367 + 368 + *p++ = cpu_to_be32(handle); 369 + if (*remaining < length) { 370 + /* segment only partly filled */ 371 + length = *remaining; 372 + *remaining = 0; 373 + } else { 374 + /* entire segment was consumed */ 375 + *remaining -= length; 376 + } 377 + *p++ = cpu_to_be32(length); 378 + xdr_encode_hyper(p, offset); 379 + 380 + trace_svcrdma_encode_wseg(handle, length, offset); 381 + return len; 382 + } 383 + 384 + /** 385 + * svc_rdma_encode_write_chunk - Encode one Write chunk 386 + * @src: matching Write chunk in the RPC Call header 387 + * @sctxt: Send context for the RPC Reply 388 + * @remaining: size in bytes of the payload in the Write chunk 389 + * 390 + * Copy a Write chunk from the Call transport header to the 391 + * Reply transport header. Update each segment's length field 392 + * to reflect the number of bytes written in that segment. 393 + * 394 + * Return values: 395 + * On success, returns length in bytes of the Reply XDR buffer 396 + * that was consumed by the Write chunk 397 + * %-EMSGSIZE on XDR buffer overflow 398 + */ 399 + static ssize_t svc_rdma_encode_write_chunk(__be32 *src, 400 + struct svc_rdma_send_ctxt *sctxt, 376 401 unsigned int remaining) 377 402 { 378 403 unsigned int i, nsegs; 379 - u32 seg_len; 404 + ssize_t len, ret; 380 405 381 - /* Write list discriminator */ 382 - *dst++ = *src++; 406 + len = 0; 407 + trace_svcrdma_encode_write_chunk(remaining); 383 408 384 - /* number of segments in this chunk */ 385 - nsegs = be32_to_cpup(src); 386 - *dst++ = *src++; 409 + src++; 410 + ret = xdr_stream_encode_item_present(&sctxt->sc_stream); 411 + if (ret < 0) 412 + return -EMSGSIZE; 413 + len += ret; 414 + 415 + nsegs = be32_to_cpup(src++); 416 + ret = xdr_stream_encode_u32(&sctxt->sc_stream, nsegs); 417 + if (ret < 0) 418 + return -EMSGSIZE; 419 + len += ret; 387 420 388 421 for (i = nsegs; i; i--) { 389 - /* segment's RDMA handle */ 390 - *dst++ = *src++; 391 - 392 - /* bytes returned in this segment */ 393 - seg_len = be32_to_cpu(*src); 394 - if (remaining >= seg_len) { 395 - /* entire segment was consumed */ 396 - *dst = *src; 397 - remaining -= seg_len; 398 - } else { 399 - /* segment only partly filled */ 400 - *dst = cpu_to_be32(remaining); 401 - remaining = 0; 402 - } 403 - dst++; src++; 404 - 405 - /* segment's RDMA offset */ 406 - *dst++ = *src++; 407 - *dst++ = *src++; 422 + ret = svc_rdma_encode_write_segment(src, sctxt, &remaining); 423 + if (ret < 0) 424 + return -EMSGSIZE; 425 + src += rpcrdma_segment_maxsz; 426 + len += ret; 408 427 } 409 428 410 - return nsegs; 429 + return len; 411 430 } 412 431 413 - /* The client provided a Write list in the Call message. Fill in 414 - * the segments in the first Write chunk in the Reply's transport 432 + /** 433 + * svc_rdma_encode_write_list - Encode RPC Reply's Write chunk list 434 + * @rctxt: Reply context with information about the RPC Call 435 + * @sctxt: Send context for the RPC Reply 436 + * @length: size in bytes of the payload in the first Write chunk 437 + * 438 + * The client provides a Write chunk list in the Call message. Fill 439 + * in the segments in the first Write chunk in the Reply's transport 415 440 * header with the number of bytes consumed in each segment. 416 441 * Remaining chunks are returned unused. 417 442 * 418 443 * Assumptions: 419 444 * - Client has provided only one Write chunk 445 + * 446 + * Return values: 447 + * On success, returns length in bytes of the Reply XDR buffer 448 + * that was consumed by the Reply's Write list 449 + * %-EMSGSIZE on XDR buffer overflow 420 450 */ 421 - static void svc_rdma_xdr_encode_write_list(__be32 *rdma_resp, __be32 *wr_ch, 422 - unsigned int consumed) 451 + static ssize_t 452 + svc_rdma_encode_write_list(const struct svc_rdma_recv_ctxt *rctxt, 453 + struct svc_rdma_send_ctxt *sctxt, 454 + unsigned int length) 423 455 { 424 - unsigned int nsegs; 425 - __be32 *p, *q; 456 + ssize_t len, ret; 426 457 427 - /* RPC-over-RDMA V1 replies never have a Read list. */ 428 - p = rdma_resp + rpcrdma_fixed_maxsz + 1; 458 + ret = svc_rdma_encode_write_chunk(rctxt->rc_write_list, sctxt, length); 459 + if (ret < 0) 460 + return ret; 461 + len = ret; 429 462 430 - q = wr_ch; 431 - while (*q != xdr_zero) { 432 - nsegs = xdr_encode_write_chunk(p, q, consumed); 433 - q += 2 + nsegs * rpcrdma_segment_maxsz; 434 - p += 2 + nsegs * rpcrdma_segment_maxsz; 435 - consumed = 0; 436 - } 463 + /* Terminate the Write list */ 464 + ret = xdr_stream_encode_item_absent(&sctxt->sc_stream); 465 + if (ret < 0) 466 + return ret; 437 467 438 - /* Terminate Write list */ 439 - *p++ = xdr_zero; 440 - 441 - /* Reply chunk discriminator; may be replaced later */ 442 - *p = xdr_zero; 468 + return len + ret; 443 469 } 444 470 445 - /* The client provided a Reply chunk in the Call message. Fill in 446 - * the segments in the Reply chunk in the Reply message with the 447 - * number of bytes consumed in each segment. 471 + /** 472 + * svc_rdma_encode_reply_chunk - Encode RPC Reply's Reply chunk 473 + * @rctxt: Reply context with information about the RPC Call 474 + * @sctxt: Send context for the RPC Reply 475 + * @length: size in bytes of the payload in the Reply chunk 448 476 * 449 477 * Assumptions: 450 - * - Reply can always fit in the provided Reply chunk 478 + * - Reply can always fit in the client-provided Reply chunk 479 + * 480 + * Return values: 481 + * On success, returns length in bytes of the Reply XDR buffer 482 + * that was consumed by the Reply's Reply chunk 483 + * %-EMSGSIZE on XDR buffer overflow 451 484 */ 452 - static void svc_rdma_xdr_encode_reply_chunk(__be32 *rdma_resp, __be32 *rp_ch, 453 - unsigned int consumed) 485 + static ssize_t 486 + svc_rdma_encode_reply_chunk(const struct svc_rdma_recv_ctxt *rctxt, 487 + struct svc_rdma_send_ctxt *sctxt, 488 + unsigned int length) 454 489 { 455 - __be32 *p; 456 - 457 - /* Find the Reply chunk in the Reply's xprt header. 458 - * RPC-over-RDMA V1 replies never have a Read list. 459 - */ 460 - p = rdma_resp + rpcrdma_fixed_maxsz + 1; 461 - 462 - /* Skip past Write list */ 463 - while (*p++ != xdr_zero) 464 - p += 1 + be32_to_cpup(p) * rpcrdma_segment_maxsz; 465 - 466 - xdr_encode_write_chunk(p, rp_ch, consumed); 467 - } 468 - 469 - /* Parse the RPC Call's transport header. 470 - */ 471 - static void svc_rdma_get_write_arrays(__be32 *rdma_argp, 472 - __be32 **write, __be32 **reply) 473 - { 474 - __be32 *p; 475 - 476 - p = rdma_argp + rpcrdma_fixed_maxsz; 477 - 478 - /* Read list */ 479 - while (*p++ != xdr_zero) 480 - p += 5; 481 - 482 - /* Write list */ 483 - if (*p != xdr_zero) { 484 - *write = p; 485 - while (*p++ != xdr_zero) 486 - p += 1 + be32_to_cpu(*p) * 4; 487 - } else { 488 - *write = NULL; 489 - p++; 490 - } 491 - 492 - /* Reply chunk */ 493 - if (*p != xdr_zero) 494 - *reply = p; 495 - else 496 - *reply = NULL; 490 + return svc_rdma_encode_write_chunk(rctxt->rc_reply_chunk, sctxt, 491 + length); 497 492 } 498 493 499 494 static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma, ··· 539 520 } 540 521 541 522 /** 542 - * svc_rdma_sync_reply_hdr - DMA sync the transport header buffer 523 + * svc_rdma_pull_up_needed - Determine whether to use pull-up 543 524 * @rdma: controlling transport 544 - * @ctxt: send_ctxt for the Send WR 545 - * @len: length of transport header 525 + * @sctxt: send_ctxt for the Send WR 526 + * @rctxt: Write and Reply chunks provided by client 527 + * @xdr: xdr_buf containing RPC message to transmit 546 528 * 547 - */ 548 - void svc_rdma_sync_reply_hdr(struct svcxprt_rdma *rdma, 549 - struct svc_rdma_send_ctxt *ctxt, 550 - unsigned int len) 551 - { 552 - ctxt->sc_sges[0].length = len; 553 - ctxt->sc_send_wr.num_sge++; 554 - ib_dma_sync_single_for_device(rdma->sc_pd->device, 555 - ctxt->sc_sges[0].addr, len, 556 - DMA_TO_DEVICE); 557 - } 558 - 559 - /* If the xdr_buf has more elements than the device can 560 - * transmit in a single RDMA Send, then the reply will 561 - * have to be copied into a bounce buffer. 529 + * Returns: 530 + * %true if pull-up must be used 531 + * %false otherwise 562 532 */ 563 533 static bool svc_rdma_pull_up_needed(struct svcxprt_rdma *rdma, 564 - struct xdr_buf *xdr, 565 - __be32 *wr_lst) 534 + struct svc_rdma_send_ctxt *sctxt, 535 + const struct svc_rdma_recv_ctxt *rctxt, 536 + struct xdr_buf *xdr) 566 537 { 567 538 int elements; 568 539 540 + /* For small messages, copying bytes is cheaper than DMA mapping. 541 + */ 542 + if (sctxt->sc_hdrbuf.len + xdr->len < RPCRDMA_PULLUP_THRESH) 543 + return true; 544 + 545 + /* Check whether the xdr_buf has more elements than can 546 + * fit in a single RDMA Send. 547 + */ 569 548 /* xdr->head */ 570 549 elements = 1; 571 550 572 551 /* xdr->pages */ 573 - if (!wr_lst) { 552 + if (!rctxt || !rctxt->rc_write_list) { 574 553 unsigned int remaining; 575 554 unsigned long pageoff; 576 555 ··· 590 573 return elements >= rdma->sc_max_send_sges; 591 574 } 592 575 593 - /* The device is not capable of sending the reply directly. 594 - * Assemble the elements of @xdr into the transport header 595 - * buffer. 576 + /** 577 + * svc_rdma_pull_up_reply_msg - Copy Reply into a single buffer 578 + * @rdma: controlling transport 579 + * @sctxt: send_ctxt for the Send WR; xprt hdr is already prepared 580 + * @rctxt: Write and Reply chunks provided by client 581 + * @xdr: prepared xdr_buf containing RPC message 582 + * 583 + * The device is not capable of sending the reply directly. 584 + * Assemble the elements of @xdr into the transport header buffer. 585 + * 586 + * Returns zero on success, or a negative errno on failure. 596 587 */ 597 588 static int svc_rdma_pull_up_reply_msg(struct svcxprt_rdma *rdma, 598 - struct svc_rdma_send_ctxt *ctxt, 599 - struct xdr_buf *xdr, __be32 *wr_lst) 589 + struct svc_rdma_send_ctxt *sctxt, 590 + const struct svc_rdma_recv_ctxt *rctxt, 591 + const struct xdr_buf *xdr) 600 592 { 601 593 unsigned char *dst, *tailbase; 602 594 unsigned int taillen; 603 595 604 - dst = ctxt->sc_xprt_buf; 605 - dst += ctxt->sc_sges[0].length; 606 - 596 + dst = sctxt->sc_xprt_buf + sctxt->sc_hdrbuf.len; 607 597 memcpy(dst, xdr->head[0].iov_base, xdr->head[0].iov_len); 608 598 dst += xdr->head[0].iov_len; 609 599 610 600 tailbase = xdr->tail[0].iov_base; 611 601 taillen = xdr->tail[0].iov_len; 612 - if (wr_lst) { 602 + if (rctxt && rctxt->rc_write_list) { 613 603 u32 xdrpad; 614 604 615 - xdrpad = xdr_padsize(xdr->page_len); 605 + xdrpad = xdr_pad_size(xdr->page_len); 616 606 if (taillen && xdrpad) { 617 607 tailbase += xdrpad; 618 608 taillen -= xdrpad; ··· 645 621 if (taillen) 646 622 memcpy(dst, tailbase, taillen); 647 623 648 - ctxt->sc_sges[0].length += xdr->len; 649 - ib_dma_sync_single_for_device(rdma->sc_pd->device, 650 - ctxt->sc_sges[0].addr, 651 - ctxt->sc_sges[0].length, 652 - DMA_TO_DEVICE); 653 - 624 + sctxt->sc_sges[0].length += xdr->len; 625 + trace_svcrdma_send_pullup(sctxt->sc_sges[0].length); 654 626 return 0; 655 627 } 656 628 657 - /* svc_rdma_map_reply_msg - Map the buffer holding RPC message 629 + /* svc_rdma_map_reply_msg - DMA map the buffer holding RPC message 658 630 * @rdma: controlling transport 659 - * @ctxt: send_ctxt for the Send WR 631 + * @sctxt: send_ctxt for the Send WR 632 + * @rctxt: Write and Reply chunks provided by client 660 633 * @xdr: prepared xdr_buf containing RPC message 661 - * @wr_lst: pointer to Call header's Write list, or NULL 662 634 * 663 635 * Load the xdr_buf into the ctxt's sge array, and DMA map each 664 - * element as it is added. 636 + * element as it is added. The Send WR's num_sge field is set. 665 637 * 666 638 * Returns zero on success, or a negative errno on failure. 667 639 */ 668 640 int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, 669 - struct svc_rdma_send_ctxt *ctxt, 670 - struct xdr_buf *xdr, __be32 *wr_lst) 641 + struct svc_rdma_send_ctxt *sctxt, 642 + const struct svc_rdma_recv_ctxt *rctxt, 643 + struct xdr_buf *xdr) 671 644 { 672 645 unsigned int len, remaining; 673 646 unsigned long page_off; ··· 673 652 u32 xdr_pad; 674 653 int ret; 675 654 676 - if (svc_rdma_pull_up_needed(rdma, xdr, wr_lst)) 677 - return svc_rdma_pull_up_reply_msg(rdma, ctxt, xdr, wr_lst); 655 + /* Set up the (persistently-mapped) transport header SGE. */ 656 + sctxt->sc_send_wr.num_sge = 1; 657 + sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len; 678 658 679 - ++ctxt->sc_cur_sge_no; 680 - ret = svc_rdma_dma_map_buf(rdma, ctxt, 659 + /* If there is a Reply chunk, nothing follows the transport 660 + * header, and we're done here. 661 + */ 662 + if (rctxt && rctxt->rc_reply_chunk) 663 + return 0; 664 + 665 + /* For pull-up, svc_rdma_send() will sync the transport header. 666 + * No additional DMA mapping is necessary. 667 + */ 668 + if (svc_rdma_pull_up_needed(rdma, sctxt, rctxt, xdr)) 669 + return svc_rdma_pull_up_reply_msg(rdma, sctxt, rctxt, xdr); 670 + 671 + ++sctxt->sc_cur_sge_no; 672 + ret = svc_rdma_dma_map_buf(rdma, sctxt, 681 673 xdr->head[0].iov_base, 682 674 xdr->head[0].iov_len); 683 675 if (ret < 0) ··· 701 667 * have added XDR padding in the tail buffer, and that 702 668 * should not be included inline. 703 669 */ 704 - if (wr_lst) { 670 + if (rctxt && rctxt->rc_write_list) { 705 671 base = xdr->tail[0].iov_base; 706 672 len = xdr->tail[0].iov_len; 707 - xdr_pad = xdr_padsize(xdr->page_len); 673 + xdr_pad = xdr_pad_size(xdr->page_len); 708 674 709 675 if (len && xdr_pad) { 710 676 base += xdr_pad; ··· 720 686 while (remaining) { 721 687 len = min_t(u32, PAGE_SIZE - page_off, remaining); 722 688 723 - ++ctxt->sc_cur_sge_no; 724 - ret = svc_rdma_dma_map_page(rdma, ctxt, *ppages++, 689 + ++sctxt->sc_cur_sge_no; 690 + ret = svc_rdma_dma_map_page(rdma, sctxt, *ppages++, 725 691 page_off, len); 726 692 if (ret < 0) 727 693 return ret; ··· 734 700 len = xdr->tail[0].iov_len; 735 701 tail: 736 702 if (len) { 737 - ++ctxt->sc_cur_sge_no; 738 - ret = svc_rdma_dma_map_buf(rdma, ctxt, base, len); 703 + ++sctxt->sc_cur_sge_no; 704 + ret = svc_rdma_dma_map_buf(rdma, sctxt, base, len); 739 705 if (ret < 0) 740 706 return ret; 741 707 } ··· 782 748 */ 783 749 static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma, 784 750 struct svc_rdma_send_ctxt *sctxt, 785 - struct svc_rdma_recv_ctxt *rctxt, 786 - struct svc_rqst *rqstp, 787 - __be32 *wr_lst, __be32 *rp_ch) 751 + const struct svc_rdma_recv_ctxt *rctxt, 752 + struct svc_rqst *rqstp) 788 753 { 789 754 int ret; 790 755 791 - if (!rp_ch) { 792 - ret = svc_rdma_map_reply_msg(rdma, sctxt, 793 - &rqstp->rq_res, wr_lst); 794 - if (ret < 0) 795 - return ret; 796 - } 756 + ret = svc_rdma_map_reply_msg(rdma, sctxt, rctxt, &rqstp->rq_res); 757 + if (ret < 0) 758 + return ret; 797 759 798 760 svc_rdma_save_io_pages(rqstp, sctxt); 799 761 ··· 799 769 } else { 800 770 sctxt->sc_send_wr.opcode = IB_WR_SEND; 801 771 } 802 - dprintk("svcrdma: posting Send WR with %u sge(s)\n", 803 - sctxt->sc_send_wr.num_sge); 804 772 return svc_rdma_send(rdma, &sctxt->sc_send_wr); 805 773 } 806 774 ··· 813 785 struct svc_rdma_send_ctxt *ctxt, 814 786 struct svc_rqst *rqstp) 815 787 { 788 + struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt; 789 + __be32 *rdma_argp = rctxt->rc_recv_buf; 816 790 __be32 *p; 817 - int ret; 818 791 819 - p = ctxt->sc_xprt_buf; 820 - trace_svcrdma_err_chunk(*p); 821 - p += 3; 792 + rpcrdma_set_xdrlen(&ctxt->sc_hdrbuf, 0); 793 + xdr_init_encode(&ctxt->sc_stream, &ctxt->sc_hdrbuf, ctxt->sc_xprt_buf, 794 + NULL); 795 + 796 + p = xdr_reserve_space(&ctxt->sc_stream, RPCRDMA_HDRLEN_ERR); 797 + if (!p) 798 + return -ENOMSG; 799 + 800 + *p++ = *rdma_argp; 801 + *p++ = *(rdma_argp + 1); 802 + *p++ = rdma->sc_fc_credits; 822 803 *p++ = rdma_error; 823 804 *p = err_chunk; 824 - svc_rdma_sync_reply_hdr(rdma, ctxt, RPCRDMA_HDRLEN_ERR); 805 + trace_svcrdma_err_chunk(*rdma_argp); 825 806 826 807 svc_rdma_save_io_pages(rqstp, ctxt); 827 808 809 + ctxt->sc_send_wr.num_sge = 1; 828 810 ctxt->sc_send_wr.opcode = IB_WR_SEND; 829 - ret = svc_rdma_send(rdma, &ctxt->sc_send_wr); 830 - if (ret) { 831 - svc_rdma_send_ctxt_put(rdma, ctxt); 832 - return ret; 833 - } 834 - 835 - return 0; 811 + ctxt->sc_sges[0].length = ctxt->sc_hdrbuf.len; 812 + return svc_rdma_send(rdma, &ctxt->sc_send_wr); 836 813 } 837 814 838 815 /** ··· 858 825 struct svcxprt_rdma *rdma = 859 826 container_of(xprt, struct svcxprt_rdma, sc_xprt); 860 827 struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt; 861 - __be32 *p, *rdma_argp, *rdma_resp, *wr_lst, *rp_ch; 828 + __be32 *rdma_argp = rctxt->rc_recv_buf; 829 + __be32 *wr_lst = rctxt->rc_write_list; 830 + __be32 *rp_ch = rctxt->rc_reply_chunk; 862 831 struct xdr_buf *xdr = &rqstp->rq_res; 863 832 struct svc_rdma_send_ctxt *sctxt; 833 + __be32 *p; 864 834 int ret; 865 - 866 - rdma_argp = rctxt->rc_recv_buf; 867 - svc_rdma_get_write_arrays(rdma_argp, &wr_lst, &rp_ch); 868 835 869 836 /* Create the RDMA response header. xprt->xpt_mutex, 870 837 * acquired in svc_send(), serializes RPC replies. The ··· 876 843 sctxt = svc_rdma_send_ctxt_get(rdma); 877 844 if (!sctxt) 878 845 goto err0; 879 - rdma_resp = sctxt->sc_xprt_buf; 880 846 881 - p = rdma_resp; 847 + p = xdr_reserve_space(&sctxt->sc_stream, 848 + rpcrdma_fixed_maxsz * sizeof(*p)); 849 + if (!p) 850 + goto err0; 882 851 *p++ = *rdma_argp; 883 852 *p++ = *(rdma_argp + 1); 884 853 *p++ = rdma->sc_fc_credits; 885 - *p++ = rp_ch ? rdma_nomsg : rdma_msg; 854 + *p = rp_ch ? rdma_nomsg : rdma_msg; 886 855 887 - /* Start with empty chunks */ 888 - *p++ = xdr_zero; 889 - *p++ = xdr_zero; 890 - *p = xdr_zero; 891 - 856 + if (svc_rdma_encode_read_list(sctxt) < 0) 857 + goto err0; 892 858 if (wr_lst) { 893 859 /* XXX: Presume the client sent only one Write chunk */ 894 - ret = svc_rdma_send_write_chunk(rdma, wr_lst, xdr); 860 + unsigned long offset; 861 + unsigned int length; 862 + 863 + if (rctxt->rc_read_payload_length) { 864 + offset = rctxt->rc_read_payload_offset; 865 + length = rctxt->rc_read_payload_length; 866 + } else { 867 + offset = xdr->head[0].iov_len; 868 + length = xdr->page_len; 869 + } 870 + ret = svc_rdma_send_write_chunk(rdma, wr_lst, xdr, offset, 871 + length); 895 872 if (ret < 0) 896 873 goto err2; 897 - svc_rdma_xdr_encode_write_list(rdma_resp, wr_lst, ret); 874 + if (svc_rdma_encode_write_list(rctxt, sctxt, length) < 0) 875 + goto err0; 876 + } else { 877 + if (xdr_stream_encode_item_absent(&sctxt->sc_stream) < 0) 878 + goto err0; 898 879 } 899 880 if (rp_ch) { 900 - ret = svc_rdma_send_reply_chunk(rdma, rp_ch, wr_lst, xdr); 881 + ret = svc_rdma_send_reply_chunk(rdma, rctxt, &rqstp->rq_res); 901 882 if (ret < 0) 902 883 goto err2; 903 - svc_rdma_xdr_encode_reply_chunk(rdma_resp, rp_ch, ret); 884 + if (svc_rdma_encode_reply_chunk(rctxt, sctxt, ret) < 0) 885 + goto err0; 886 + } else { 887 + if (xdr_stream_encode_item_absent(&sctxt->sc_stream) < 0) 888 + goto err0; 904 889 } 905 890 906 - svc_rdma_sync_reply_hdr(rdma, sctxt, svc_rdma_reply_hdr_len(rdma_resp)); 907 - ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp, 908 - wr_lst, rp_ch); 891 + ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp); 909 892 if (ret < 0) 910 893 goto err1; 911 894 ret = 0; ··· 948 899 set_bit(XPT_CLOSE, &xprt->xpt_flags); 949 900 ret = -ENOTCONN; 950 901 goto out; 902 + } 903 + 904 + /** 905 + * svc_rdma_read_payload - special processing for a READ payload 906 + * @rqstp: svc_rqst to operate on 907 + * @offset: payload's byte offset in @xdr 908 + * @length: size of payload, in bytes 909 + * 910 + * Returns zero on success. 911 + * 912 + * For the moment, just record the xdr_buf location of the READ 913 + * payload. svc_rdma_sendto will use that location later when 914 + * we actually send the payload. 915 + */ 916 + int svc_rdma_read_payload(struct svc_rqst *rqstp, unsigned int offset, 917 + unsigned int length) 918 + { 919 + struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt; 920 + 921 + /* XXX: Just one READ payload slot for now, since our 922 + * transport implementation currently supports only one 923 + * Write chunk. 924 + */ 925 + rctxt->rc_read_payload_offset = offset; 926 + rctxt->rc_read_payload_length = length; 927 + 928 + return 0; 951 929 }
+1 -7
net/sunrpc/xprtrdma/svc_rdma_transport.c
··· 82 82 .xpo_create = svc_rdma_create, 83 83 .xpo_recvfrom = svc_rdma_recvfrom, 84 84 .xpo_sendto = svc_rdma_sendto, 85 + .xpo_read_payload = svc_rdma_read_payload, 85 86 .xpo_release_rqst = svc_rdma_release_rqst, 86 87 .xpo_detach = svc_rdma_detach, 87 88 .xpo_free = svc_rdma_free, ··· 241 240 static int rdma_listen_handler(struct rdma_cm_id *cma_id, 242 241 struct rdma_cm_event *event) 243 242 { 244 - struct sockaddr *sap = (struct sockaddr *)&cma_id->route.addr.src_addr; 245 - 246 - trace_svcrdma_cm_event(event, sap); 247 - 248 243 switch (event->event) { 249 244 case RDMA_CM_EVENT_CONNECT_REQUEST: 250 245 dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, " ··· 262 265 static int rdma_cma_handler(struct rdma_cm_id *cma_id, 263 266 struct rdma_cm_event *event) 264 267 { 265 - struct sockaddr *sap = (struct sockaddr *)&cma_id->route.addr.dst_addr; 266 268 struct svcxprt_rdma *rdma = cma_id->context; 267 269 struct svc_xprt *xprt = &rdma->sc_xprt; 268 - 269 - trace_svcrdma_cm_event(event, sap); 270 270 271 271 switch (event->event) { 272 272 case RDMA_CM_EVENT_ESTABLISHED:
+29 -159
net/sunrpc/xprtsock.c
··· 54 54 55 55 #include <trace/events/sunrpc.h> 56 56 57 + #include "socklib.h" 57 58 #include "sunrpc.h" 58 59 59 60 static void xs_close(struct rpc_xprt *xprt); ··· 750 749 751 750 #define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) 752 751 753 - static int xs_sendmsg(struct socket *sock, struct msghdr *msg, size_t seek) 754 - { 755 - if (seek) 756 - iov_iter_advance(&msg->msg_iter, seek); 757 - return sock_sendmsg(sock, msg); 758 - } 759 - 760 - static int xs_send_kvec(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t seek) 761 - { 762 - iov_iter_kvec(&msg->msg_iter, WRITE, vec, 1, vec->iov_len); 763 - return xs_sendmsg(sock, msg, seek); 764 - } 765 - 766 - static int xs_send_pagedata(struct socket *sock, struct msghdr *msg, struct xdr_buf *xdr, size_t base) 767 - { 768 - int err; 769 - 770 - err = xdr_alloc_bvec(xdr, GFP_KERNEL); 771 - if (err < 0) 772 - return err; 773 - 774 - iov_iter_bvec(&msg->msg_iter, WRITE, xdr->bvec, 775 - xdr_buf_pagecount(xdr), 776 - xdr->page_len + xdr->page_base); 777 - return xs_sendmsg(sock, msg, base + xdr->page_base); 778 - } 779 - 780 - #define xs_record_marker_len() sizeof(rpc_fraghdr) 781 - 782 - /* Common case: 783 - * - stream transport 784 - * - sending from byte 0 of the message 785 - * - the message is wholly contained in @xdr's head iovec 786 - */ 787 - static int xs_send_rm_and_kvec(struct socket *sock, struct msghdr *msg, 788 - rpc_fraghdr marker, struct kvec *vec, size_t base) 789 - { 790 - struct kvec iov[2] = { 791 - [0] = { 792 - .iov_base = &marker, 793 - .iov_len = sizeof(marker) 794 - }, 795 - [1] = *vec, 796 - }; 797 - size_t len = iov[0].iov_len + iov[1].iov_len; 798 - 799 - iov_iter_kvec(&msg->msg_iter, WRITE, iov, 2, len); 800 - return xs_sendmsg(sock, msg, base); 801 - } 802 - 803 - /** 804 - * xs_sendpages - write pages directly to a socket 805 - * @sock: socket to send on 806 - * @addr: UDP only -- address of destination 807 - * @addrlen: UDP only -- length of destination address 808 - * @xdr: buffer containing this request 809 - * @base: starting position in the buffer 810 - * @rm: stream record marker field 811 - * @sent_p: return the total number of bytes successfully queued for sending 812 - * 813 - */ 814 - static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, rpc_fraghdr rm, int *sent_p) 815 - { 816 - struct msghdr msg = { 817 - .msg_name = addr, 818 - .msg_namelen = addrlen, 819 - .msg_flags = XS_SENDMSG_FLAGS | MSG_MORE, 820 - }; 821 - unsigned int rmsize = rm ? sizeof(rm) : 0; 822 - unsigned int remainder = rmsize + xdr->len - base; 823 - unsigned int want; 824 - int err = 0; 825 - 826 - if (unlikely(!sock)) 827 - return -ENOTSOCK; 828 - 829 - want = xdr->head[0].iov_len + rmsize; 830 - if (base < want) { 831 - unsigned int len = want - base; 832 - remainder -= len; 833 - if (remainder == 0) 834 - msg.msg_flags &= ~MSG_MORE; 835 - if (rmsize) 836 - err = xs_send_rm_and_kvec(sock, &msg, rm, 837 - &xdr->head[0], base); 838 - else 839 - err = xs_send_kvec(sock, &msg, &xdr->head[0], base); 840 - if (remainder == 0 || err != len) 841 - goto out; 842 - *sent_p += err; 843 - base = 0; 844 - } else 845 - base -= want; 846 - 847 - if (base < xdr->page_len) { 848 - unsigned int len = xdr->page_len - base; 849 - remainder -= len; 850 - if (remainder == 0) 851 - msg.msg_flags &= ~MSG_MORE; 852 - err = xs_send_pagedata(sock, &msg, xdr, base); 853 - if (remainder == 0 || err != len) 854 - goto out; 855 - *sent_p += err; 856 - base = 0; 857 - } else 858 - base -= xdr->page_len; 859 - 860 - if (base >= xdr->tail[0].iov_len) 861 - return 0; 862 - msg.msg_flags &= ~MSG_MORE; 863 - err = xs_send_kvec(sock, &msg, &xdr->tail[0], base); 864 - out: 865 - if (err > 0) { 866 - *sent_p += err; 867 - err = 0; 868 - } 869 - return err; 870 - } 871 - 872 752 /** 873 753 * xs_nospace - handle transmit was incomplete 874 754 * @req: pointer to RPC request ··· 841 959 struct xdr_buf *xdr = &req->rq_snd_buf; 842 960 rpc_fraghdr rm = xs_stream_record_marker(xdr); 843 961 unsigned int msglen = rm ? req->rq_slen + sizeof(rm) : req->rq_slen; 962 + struct msghdr msg = { 963 + .msg_flags = XS_SENDMSG_FLAGS, 964 + }; 965 + unsigned int uninitialized_var(sent); 844 966 int status; 845 - int sent = 0; 846 967 847 968 /* Close the stream if the previous transmission was incomplete */ 848 969 if (xs_send_request_was_aborted(transport, req)) { ··· 857 972 req->rq_svec->iov_base, req->rq_svec->iov_len); 858 973 859 974 req->rq_xtime = ktime_get(); 860 - status = xs_sendpages(transport->sock, NULL, 0, xdr, 861 - transport->xmit.offset, rm, &sent); 975 + status = xprt_sock_sendmsg(transport->sock, &msg, xdr, 976 + transport->xmit.offset, rm, &sent); 862 977 dprintk("RPC: %s(%u) = %d\n", 863 978 __func__, xdr->len - transport->xmit.offset, status); 864 979 ··· 910 1025 struct rpc_xprt *xprt = req->rq_xprt; 911 1026 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 912 1027 struct xdr_buf *xdr = &req->rq_snd_buf; 913 - int sent = 0; 1028 + struct msghdr msg = { 1029 + .msg_name = xs_addr(xprt), 1030 + .msg_namelen = xprt->addrlen, 1031 + .msg_flags = XS_SENDMSG_FLAGS, 1032 + }; 1033 + unsigned int uninitialized_var(sent); 914 1034 int status; 915 1035 916 1036 xs_pktdump("packet data:", ··· 929 1039 return -EBADSLT; 930 1040 931 1041 req->rq_xtime = ktime_get(); 932 - status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen, 933 - xdr, 0, 0, &sent); 1042 + status = xprt_sock_sendmsg(transport->sock, &msg, xdr, 0, 0, &sent); 934 1043 935 1044 dprintk("RPC: xs_udp_send_request(%u) = %d\n", 936 1045 xdr->len, status); ··· 995 1106 struct xdr_buf *xdr = &req->rq_snd_buf; 996 1107 rpc_fraghdr rm = xs_stream_record_marker(xdr); 997 1108 unsigned int msglen = rm ? req->rq_slen + sizeof(rm) : req->rq_slen; 1109 + struct msghdr msg = { 1110 + .msg_flags = XS_SENDMSG_FLAGS, 1111 + }; 998 1112 bool vm_wait = false; 1113 + unsigned int uninitialized_var(sent); 999 1114 int status; 1000 - int sent; 1001 1115 1002 1116 /* Close the stream if the previous transmission was incomplete */ 1003 1117 if (xs_send_request_was_aborted(transport, req)) { ··· 1021 1129 * called sendmsg(). */ 1022 1130 req->rq_xtime = ktime_get(); 1023 1131 while (1) { 1024 - sent = 0; 1025 - status = xs_sendpages(transport->sock, NULL, 0, xdr, 1026 - transport->xmit.offset, rm, &sent); 1132 + status = xprt_sock_sendmsg(transport->sock, &msg, xdr, 1133 + transport->xmit.offset, rm, &sent); 1027 1134 1028 1135 dprintk("RPC: xs_tcp_send_request(%u) = %d\n", 1029 1136 xdr->len - transport->xmit.offset, status); ··· 2527 2636 free_page((unsigned long)buf); 2528 2637 } 2529 2638 2530 - /* 2531 - * Use the svc_sock to send the callback. Must be called with svsk->sk_mutex 2532 - * held. Borrows heavily from svc_tcp_sendto and xs_tcp_send_request. 2533 - */ 2534 2639 static int bc_sendto(struct rpc_rqst *req) 2535 2640 { 2536 - int len; 2537 - struct xdr_buf *xbufp = &req->rq_snd_buf; 2641 + struct xdr_buf *xdr = &req->rq_snd_buf; 2538 2642 struct sock_xprt *transport = 2539 2643 container_of(req->rq_xprt, struct sock_xprt, xprt); 2540 - unsigned long headoff; 2541 - unsigned long tailoff; 2542 - struct page *tailpage; 2543 2644 struct msghdr msg = { 2544 - .msg_flags = MSG_MORE 2645 + .msg_flags = 0, 2545 2646 }; 2546 2647 rpc_fraghdr marker = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | 2547 - (u32)xbufp->len); 2548 - struct kvec iov = { 2549 - .iov_base = &marker, 2550 - .iov_len = sizeof(marker), 2551 - }; 2648 + (u32)xdr->len); 2649 + unsigned int sent = 0; 2650 + int err; 2552 2651 2553 2652 req->rq_xtime = ktime_get(); 2554 - 2555 - len = kernel_sendmsg(transport->sock, &msg, &iov, 1, iov.iov_len); 2556 - if (len != iov.iov_len) 2653 + err = xprt_sock_sendmsg(transport->sock, &msg, xdr, 0, marker, &sent); 2654 + xdr_free_bvec(xdr); 2655 + if (err < 0 || sent != (xdr->len + sizeof(marker))) 2557 2656 return -EAGAIN; 2558 - 2559 - tailpage = NULL; 2560 - if (xbufp->tail[0].iov_len) 2561 - tailpage = virt_to_page(xbufp->tail[0].iov_base); 2562 - tailoff = (unsigned long)xbufp->tail[0].iov_base & ~PAGE_MASK; 2563 - headoff = (unsigned long)xbufp->head[0].iov_base & ~PAGE_MASK; 2564 - len = svc_send_common(transport->sock, xbufp, 2565 - virt_to_page(xbufp->head[0].iov_base), headoff, 2566 - tailpage, tailoff); 2567 - if (len != xbufp->len) 2568 - return -EAGAIN; 2569 - return len; 2657 + return sent; 2570 2658 } 2571 2659 2572 2660 /*