Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'nfs-for-5.5-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs

Pull NFS client updates from Trond Myklebust:
"Highlights include:

Features:

- NFSv4.2 now supports cross device offloaded copy (i.e. offloaded
copy of a file from one source server to a different target
server).

- New RDMA tracepoints for debugging congestion control and Local
Invalidate WRs.

Bugfixes and cleanups

- Drop the NFSv4.1 session slot if nfs4_delegreturn_prepare waits for
layoutreturn

- Handle bad/dead sessions correctly in nfs41_sequence_process()

- Various bugfixes to the delegation return operation.

- Various bugfixes pertaining to delegations that have been revoked.

- Cleanups to the NFS timespec code to avoid unnecessary conversions
between timespec and timespec64.

- Fix unstable RDMA connections after a reconnect

- Close race between waking an RDMA sender and posting a receive

- Wake pending RDMA tasks if connection fails

- Fix MR list corruption, and clean up MR usage

- Fix another RPCSEC_GSS issue with MIC buffer space"

* tag 'nfs-for-5.5-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (79 commits)
SUNRPC: Capture completion of all RPC tasks
SUNRPC: Fix another issue with MIC buffer space
NFS4: Trace lock reclaims
NFS4: Trace state recovery operation
NFSv4.2 fix memory leak in nfs42_ssc_open
NFSv4.2 fix kfree in __nfs42_copy_file_range
NFS: remove duplicated include from nfs4file.c
NFSv4: Make _nfs42_proc_copy_notify() static
NFS: Fallocate should use the nfs4_fattr_bitmap
NFS: Return -ETXTBSY when attempting to write to a swapfile
fs: nfs: sysfs: Remove NULL check before kfree
NFS: remove unneeded semicolon
NFSv4: add declaration of current_stateid
NFSv4.x: Drop the slot if nfs4_delegreturn_prepare waits for layoutreturn
NFSv4.x: Handle bad/dead sessions correctly in nfs41_sequence_process()
nfsv4: Move NFSPROC4_CLNT_COPY_NOTIFY to end of list
SUNRPC: Avoid RPC delays when exiting suspend
NFS: Add a tracepoint in nfs_fh_to_dentry()
NFSv4: Don't retry the GETATTR on old stateid in nfs4_delegreturn_done()
NFSv4: Handle NFS4ERR_OLD_STATEID in delegreturn
...

+1773 -639
+2 -1
fs/lockd/host.c
··· 464 464 .version = host->h_version, 465 465 .authflavor = RPC_AUTH_UNIX, 466 466 .flags = (RPC_CLNT_CREATE_NOPING | 467 - RPC_CLNT_CREATE_AUTOBIND), 467 + RPC_CLNT_CREATE_AUTOBIND | 468 + RPC_CLNT_CREATE_REUSEPORT), 468 469 .cred = host->h_cred, 469 470 }; 470 471
+2 -2
fs/nfs/callback.h
··· 72 72 uint32_t bitmap[2]; 73 73 uint64_t size; 74 74 uint64_t change_attr; 75 - struct timespec ctime; 76 - struct timespec mtime; 75 + struct timespec64 ctime; 76 + struct timespec64 mtime; 77 77 }; 78 78 79 79 struct cb_recallargs {
+3 -5
fs/nfs/callback_proc.c
··· 26 26 struct cb_getattrargs *args = argp; 27 27 struct cb_getattrres *res = resp; 28 28 struct nfs_delegation *delegation; 29 - struct nfs_inode *nfsi; 30 29 struct inode *inode; 31 30 32 31 res->status = htonl(NFS4ERR_OP_NOT_IN_SESSION); ··· 46 47 -ntohl(res->status)); 47 48 goto out; 48 49 } 49 - nfsi = NFS_I(inode); 50 50 rcu_read_lock(); 51 - delegation = rcu_dereference(nfsi->delegation); 51 + delegation = nfs4_get_valid_delegation(inode); 52 52 if (delegation == NULL || (delegation->type & FMODE_WRITE) == 0) 53 53 goto out_iput; 54 54 res->size = i_size_read(inode); 55 55 res->change_attr = delegation->change_attr; 56 56 if (nfs_have_writebacks(inode)) 57 57 res->change_attr++; 58 - res->ctime = timespec64_to_timespec(inode->i_ctime); 59 - res->mtime = timespec64_to_timespec(inode->i_mtime); 58 + res->ctime = inode->i_ctime; 59 + res->mtime = inode->i_mtime; 60 60 res->bitmap[0] = (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE) & 61 61 args->bitmap[0]; 62 62 res->bitmap[1] = (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY) &
+3 -3
fs/nfs/callback_xdr.c
··· 627 627 return 0; 628 628 } 629 629 630 - static __be32 encode_attr_time(struct xdr_stream *xdr, const struct timespec *time) 630 + static __be32 encode_attr_time(struct xdr_stream *xdr, const struct timespec64 *time) 631 631 { 632 632 __be32 *p; 633 633 ··· 639 639 return 0; 640 640 } 641 641 642 - static __be32 encode_attr_ctime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec *time) 642 + static __be32 encode_attr_ctime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec64 *time) 643 643 { 644 644 if (!(bitmap[1] & FATTR4_WORD1_TIME_METADATA)) 645 645 return 0; 646 646 return encode_attr_time(xdr,time); 647 647 } 648 648 649 - static __be32 encode_attr_mtime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec *time) 649 + static __be32 encode_attr_mtime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec64 *time) 650 650 { 651 651 if (!(bitmap[1] & FATTR4_WORD1_TIME_MODIFY)) 652 652 return 0;
+11
fs/nfs/client.c
··· 312 312 /* Match nfsv4 minorversion */ 313 313 if (clp->cl_minorversion != data->minorversion) 314 314 continue; 315 + 316 + /* Match request for a dedicated DS */ 317 + if (test_bit(NFS_CS_DS, &data->init_flags) != 318 + test_bit(NFS_CS_DS, &clp->cl_flags)) 319 + continue; 320 + 315 321 /* Match the full socket address */ 316 322 if (!rpc_cmp_addr_port(sap, clap)) 317 323 /* Match all xprt_switch full socket addresses */ ··· 521 515 args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; 522 516 if (test_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags)) 523 517 args.flags |= RPC_CLNT_CREATE_INFINITE_SLOTS; 518 + if (test_bit(NFS_CS_NOPING, &clp->cl_flags)) 519 + args.flags |= RPC_CLNT_CREATE_NOPING; 520 + if (test_bit(NFS_CS_REUSEPORT, &clp->cl_flags)) 521 + args.flags |= RPC_CLNT_CREATE_REUSEPORT; 524 522 525 523 if (!IS_ERR(clp->cl_rpcclient)) 526 524 return 0; ··· 672 662 .timeparms = &timeparms, 673 663 .cred = server->cred, 674 664 .nconnect = data->nfs_server.nconnect, 665 + .init_flags = (1UL << NFS_CS_REUSEPORT), 675 666 }; 676 667 struct nfs_client *clp; 677 668 int error;
+113 -49
fs/nfs/delegation.c
··· 199 199 delegation = rcu_dereference(NFS_I(inode)->delegation); 200 200 if (delegation != NULL) { 201 201 spin_lock(&delegation->lock); 202 - if (delegation->inode != NULL) { 202 + if (nfs4_is_valid_delegation(delegation, 0)) { 203 203 nfs4_stateid_copy(&delegation->stateid, stateid); 204 204 delegation->type = type; 205 205 delegation->pagemod_limit = pagemod_limit; ··· 229 229 delegation->cred, 230 230 &delegation->stateid, 231 231 issync); 232 - nfs_free_delegation(delegation); 233 232 return res; 234 233 } 235 234 ··· 297 298 return NULL; 298 299 299 300 spin_lock(&delegation->lock); 300 - set_bit(NFS_DELEGATION_RETURNING, &delegation->flags); 301 + if (!delegation->inode) { 302 + spin_unlock(&delegation->lock); 303 + return NULL; 304 + } 301 305 list_del_rcu(&delegation->super_list); 302 306 delegation->inode = NULL; 303 307 rcu_assign_pointer(nfsi->delegation, NULL); ··· 327 325 struct nfs_server *server = NFS_SERVER(inode); 328 326 struct nfs_delegation *delegation; 329 327 330 - delegation = nfs_start_delegation_return(nfsi); 331 - if (delegation == NULL) 332 - return NULL; 333 - return nfs_detach_delegation(nfsi, delegation, server); 328 + rcu_read_lock(); 329 + delegation = rcu_dereference(nfsi->delegation); 330 + if (delegation != NULL) 331 + delegation = nfs_detach_delegation(nfsi, delegation, server); 332 + rcu_read_unlock(); 333 + return delegation; 334 334 } 335 335 336 336 static void ··· 343 339 delegation->stateid.seqid = update->stateid.seqid; 344 340 smp_wmb(); 345 341 delegation->type = update->type; 342 + clear_bit(NFS_DELEGATION_REVOKED, &delegation->flags); 346 343 } 347 344 } 348 345 ··· 384 379 spin_lock(&clp->cl_lock); 385 380 old_delegation = rcu_dereference_protected(nfsi->delegation, 386 381 lockdep_is_held(&clp->cl_lock)); 387 - if (old_delegation != NULL) { 388 - /* Is this an update of the existing delegation? */ 389 - if (nfs4_stateid_match_other(&old_delegation->stateid, 390 - &delegation->stateid)) { 391 - nfs_update_inplace_delegation(old_delegation, 392 - delegation); 393 - goto out; 394 - } 382 + if (old_delegation == NULL) 383 + goto add_new; 384 + /* Is this an update of the existing delegation? */ 385 + if (nfs4_stateid_match_other(&old_delegation->stateid, 386 + &delegation->stateid)) { 387 + spin_lock(&old_delegation->lock); 388 + nfs_update_inplace_delegation(old_delegation, 389 + delegation); 390 + spin_unlock(&old_delegation->lock); 391 + goto out; 392 + } 393 + if (!test_bit(NFS_DELEGATION_REVOKED, &old_delegation->flags)) { 395 394 /* 396 395 * Deal with broken servers that hand out two 397 396 * delegations for the same file. ··· 414 405 if (test_and_set_bit(NFS_DELEGATION_RETURNING, 415 406 &old_delegation->flags)) 416 407 goto out; 417 - freeme = nfs_detach_delegation_locked(nfsi, 418 - old_delegation, clp); 419 - if (freeme == NULL) 420 - goto out; 421 408 } 409 + freeme = nfs_detach_delegation_locked(nfsi, old_delegation, clp); 410 + if (freeme == NULL) 411 + goto out; 412 + add_new: 422 413 list_add_tail_rcu(&delegation->super_list, &server->delegations); 423 414 rcu_assign_pointer(nfsi->delegation, delegation); 424 415 delegation = NULL; ··· 433 424 spin_unlock(&clp->cl_lock); 434 425 if (delegation != NULL) 435 426 nfs_free_delegation(delegation); 436 - if (freeme != NULL) 427 + if (freeme != NULL) { 437 428 nfs_do_return_delegation(inode, freeme, 0); 429 + nfs_free_delegation(freeme); 430 + } 438 431 return status; 439 432 } 440 433 ··· 446 435 static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation *delegation, int issync) 447 436 { 448 437 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; 449 - struct nfs_inode *nfsi = NFS_I(inode); 450 438 int err = 0; 451 439 452 440 if (delegation == NULL) ··· 467 457 nfs_abort_delegation_return(delegation, clp); 468 458 goto out; 469 459 } 470 - if (!nfs_detach_delegation(nfsi, delegation, NFS_SERVER(inode))) 471 - goto out; 472 460 473 461 err = nfs_do_return_delegation(inode, delegation, issync); 474 462 out: ··· 477 469 { 478 470 bool ret = false; 479 471 480 - if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) 481 - goto out; 482 472 if (test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags)) 483 473 ret = true; 484 474 if (test_and_clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags) && !ret) { ··· 488 482 ret = true; 489 483 spin_unlock(&delegation->lock); 490 484 } 491 - out: 485 + if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags) || 486 + test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) 487 + ret = false; 488 + 492 489 return ret; 493 490 } 494 491 ··· 594 585 } 595 586 596 587 /** 597 - * nfs_inode_return_delegation_noreclaim - return delegation, don't reclaim opens 588 + * nfs_inode_evict_delegation - return delegation, don't reclaim opens 598 589 * @inode: inode to process 599 590 * 600 591 * Does not protect against delegation reclaims, therefore really only safe 601 - * to be called from nfs4_clear_inode(). 592 + * to be called from nfs4_clear_inode(). Guaranteed to always free 593 + * the delegation structure. 602 594 */ 603 - void nfs_inode_return_delegation_noreclaim(struct inode *inode) 595 + void nfs_inode_evict_delegation(struct inode *inode) 604 596 { 605 597 struct nfs_delegation *delegation; 606 598 607 599 delegation = nfs_inode_detach_delegation(inode); 608 - if (delegation != NULL) 600 + if (delegation != NULL) { 601 + set_bit(NFS_DELEGATION_INODE_FREEING, &delegation->flags); 609 602 nfs_do_return_delegation(inode, delegation, 1); 603 + nfs_free_delegation(delegation); 604 + } 610 605 } 611 606 612 607 /** ··· 646 633 */ 647 634 int nfs4_inode_make_writeable(struct inode *inode) 648 635 { 649 - if (!nfs4_has_session(NFS_SERVER(inode)->nfs_client) || 650 - !nfs4_check_delegation(inode, FMODE_WRITE)) 651 - return nfs4_inode_return_delegation(inode); 652 - return 0; 636 + struct nfs_delegation *delegation; 637 + 638 + rcu_read_lock(); 639 + delegation = nfs4_get_valid_delegation(inode); 640 + if (delegation == NULL || 641 + (nfs4_has_session(NFS_SERVER(inode)->nfs_client) && 642 + (delegation->type & FMODE_WRITE))) { 643 + rcu_read_unlock(); 644 + return 0; 645 + } 646 + rcu_read_unlock(); 647 + return nfs4_inode_return_delegation(inode); 653 648 } 654 649 655 650 static void nfs_mark_return_if_closed_delegation(struct nfs_server *server, ··· 765 744 { 766 745 set_bit(NFS_DELEGATION_REVOKED, &delegation->flags); 767 746 delegation->stateid.type = NFS4_INVALID_STATEID_TYPE; 768 - nfs_mark_return_delegation(server, delegation); 769 747 } 770 748 771 - static bool nfs_revoke_delegation(struct inode *inode, 749 + static void nfs_revoke_delegation(struct inode *inode, 772 750 const nfs4_stateid *stateid) 773 751 { 774 752 struct nfs_delegation *delegation; ··· 781 761 if (stateid == NULL) { 782 762 nfs4_stateid_copy(&tmp, &delegation->stateid); 783 763 stateid = &tmp; 784 - } else if (!nfs4_stateid_match(stateid, &delegation->stateid)) 785 - goto out; 764 + } else { 765 + if (!nfs4_stateid_match_other(stateid, &delegation->stateid)) 766 + goto out; 767 + spin_lock(&delegation->lock); 768 + if (stateid->seqid) { 769 + if (nfs4_stateid_is_newer(&delegation->stateid, stateid)) { 770 + spin_unlock(&delegation->lock); 771 + goto out; 772 + } 773 + delegation->stateid.seqid = stateid->seqid; 774 + } 775 + spin_unlock(&delegation->lock); 776 + } 786 777 nfs_mark_delegation_revoked(NFS_SERVER(inode), delegation); 787 778 ret = true; 788 779 out: 789 780 rcu_read_unlock(); 790 781 if (ret) 791 782 nfs_inode_find_state_and_recover(inode, stateid); 792 - return ret; 793 783 } 794 784 795 785 void nfs_remove_bad_delegation(struct inode *inode, 796 786 const nfs4_stateid *stateid) 797 787 { 798 - struct nfs_delegation *delegation; 799 - 800 - if (!nfs_revoke_delegation(inode, stateid)) 801 - return; 802 - delegation = nfs_inode_detach_delegation(inode); 803 - if (delegation) 804 - nfs_free_delegation(delegation); 788 + nfs_revoke_delegation(inode, stateid); 805 789 } 806 790 EXPORT_SYMBOL_GPL(nfs_remove_bad_delegation); 791 + 792 + void nfs_delegation_mark_returned(struct inode *inode, 793 + const nfs4_stateid *stateid) 794 + { 795 + struct nfs_delegation *delegation; 796 + 797 + if (!inode) 798 + return; 799 + 800 + rcu_read_lock(); 801 + delegation = rcu_dereference(NFS_I(inode)->delegation); 802 + if (!delegation) 803 + goto out_rcu_unlock; 804 + 805 + spin_lock(&delegation->lock); 806 + if (!nfs4_stateid_match_other(stateid, &delegation->stateid)) 807 + goto out_spin_unlock; 808 + if (stateid->seqid) { 809 + /* If delegation->stateid is newer, dont mark as returned */ 810 + if (nfs4_stateid_is_newer(&delegation->stateid, stateid)) 811 + goto out_clear_returning; 812 + if (delegation->stateid.seqid != stateid->seqid) 813 + delegation->stateid.seqid = stateid->seqid; 814 + } 815 + 816 + nfs_mark_delegation_revoked(NFS_SERVER(inode), delegation); 817 + 818 + out_clear_returning: 819 + clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags); 820 + out_spin_unlock: 821 + spin_unlock(&delegation->lock); 822 + out_rcu_unlock: 823 + rcu_read_unlock(); 824 + 825 + nfs_inode_find_state_and_recover(inode, stateid); 826 + } 807 827 808 828 /** 809 829 * nfs_expire_unused_delegation_types ··· 900 840 struct nfs_delegation *delegation; 901 841 902 842 rcu_read_lock(); 903 - delegation = rcu_dereference(NFS_I(inode)->delegation); 843 + delegation = nfs4_get_valid_delegation(inode); 904 844 if (delegation == NULL) 905 845 goto out_enoent; 906 846 if (stateid != NULL && ··· 926 866 list_for_each_entry_rcu(delegation, &server->delegations, super_list) { 927 867 spin_lock(&delegation->lock); 928 868 if (delegation->inode != NULL && 869 + !test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) && 929 870 nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) { 930 871 freeme = igrab(delegation->inode); 931 872 if (freeme && nfs_sb_active(freeme->i_sb)) ··· 1201 1140 rcu_read_lock(); 1202 1141 delegation = rcu_dereference(NFS_I(inode)->delegation); 1203 1142 if (delegation && 1204 - nfs4_stateid_match_other(&delegation->stateid, stateid)) { 1143 + nfs4_stateid_match_or_older(&delegation->stateid, stateid) && 1144 + !test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) { 1205 1145 nfs_mark_test_expired_delegation(NFS_SERVER(inode), delegation); 1206 1146 found = true; 1207 1147 } ··· 1251 1189 rcu_read_lock(); 1252 1190 delegation = rcu_dereference(NFS_I(inode)->delegation); 1253 1191 if (delegation != NULL && 1254 - nfs4_stateid_match_other(dst, &delegation->stateid)) { 1192 + nfs4_stateid_match_other(dst, &delegation->stateid) && 1193 + nfs4_stateid_is_newer(&delegation->stateid, dst) && 1194 + !test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) { 1255 1195 dst->seqid = delegation->stateid.seqid; 1256 1196 ret = true; 1257 1197 }
+2 -1
fs/nfs/delegation.h
··· 43 43 fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit); 44 44 int nfs4_inode_return_delegation(struct inode *inode); 45 45 int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); 46 - void nfs_inode_return_delegation_noreclaim(struct inode *inode); 46 + void nfs_inode_evict_delegation(struct inode *inode); 47 47 48 48 struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle); 49 49 void nfs_server_return_all_delegations(struct nfs_server *); ··· 53 53 int nfs_client_return_marked_delegations(struct nfs_client *clp); 54 54 int nfs_delegations_present(struct nfs_client *clp); 55 55 void nfs_remove_bad_delegation(struct inode *inode, const nfs4_stateid *stateid); 56 + void nfs_delegation_mark_returned(struct inode *inode, const nfs4_stateid *stateid); 56 57 57 58 void nfs_delegation_mark_reclaim(struct nfs_client *clp); 58 59 void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
+1
fs/nfs/export.c
··· 105 105 ret = rpc_ops->getattr(NFS_SB(sb), server_fh, fattr, label, NULL); 106 106 if (ret) { 107 107 dprintk("%s: getattr failed %d\n", __func__, ret); 108 + trace_nfs_fh_to_dentry(sb, server_fh, fattr->fileid, ret); 108 109 dentry = ERR_PTR(ret); 109 110 goto out_free_label; 110 111 }
+1 -1
fs/nfs/file.c
··· 649 649 650 650 out_swapfile: 651 651 printk(KERN_INFO "NFS: attempt to write to active swap file!\n"); 652 - return -EBUSY; 652 + return -ETXTBSY; 653 653 } 654 654 EXPORT_SYMBOL_GPL(nfs_file_write); 655 655
+27 -27
fs/nfs/inode.c
··· 504 504 nfsi->read_cache_jiffies = fattr->time_start; 505 505 nfsi->attr_gencount = fattr->gencount; 506 506 if (fattr->valid & NFS_ATTR_FATTR_ATIME) 507 - inode->i_atime = timespec_to_timespec64(fattr->atime); 507 + inode->i_atime = fattr->atime; 508 508 else if (nfs_server_capable(inode, NFS_CAP_ATIME)) 509 509 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME); 510 510 if (fattr->valid & NFS_ATTR_FATTR_MTIME) 511 - inode->i_mtime = timespec_to_timespec64(fattr->mtime); 511 + inode->i_mtime = fattr->mtime; 512 512 else if (nfs_server_capable(inode, NFS_CAP_MTIME)) 513 513 nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); 514 514 if (fattr->valid & NFS_ATTR_FATTR_CTIME) 515 - inode->i_ctime = timespec_to_timespec64(fattr->ctime); 515 + inode->i_ctime = fattr->ctime; 516 516 else if (nfs_server_capable(inode, NFS_CAP_CTIME)) 517 517 nfs_set_cache_invalid(inode, NFS_INO_INVALID_CTIME); 518 518 if (fattr->valid & NFS_ATTR_FATTR_CHANGE) ··· 698 698 if ((attr->ia_valid & ATTR_GID) != 0) 699 699 inode->i_gid = attr->ia_gid; 700 700 if (fattr->valid & NFS_ATTR_FATTR_CTIME) 701 - inode->i_ctime = timespec_to_timespec64(fattr->ctime); 701 + inode->i_ctime = fattr->ctime; 702 702 else 703 703 nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE 704 704 | NFS_INO_INVALID_CTIME); ··· 709 709 NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_ATIME 710 710 | NFS_INO_INVALID_CTIME); 711 711 if (fattr->valid & NFS_ATTR_FATTR_ATIME) 712 - inode->i_atime = timespec_to_timespec64(fattr->atime); 712 + inode->i_atime = fattr->atime; 713 713 else if (attr->ia_valid & ATTR_ATIME_SET) 714 714 inode->i_atime = attr->ia_atime; 715 715 else 716 716 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME); 717 717 718 718 if (fattr->valid & NFS_ATTR_FATTR_CTIME) 719 - inode->i_ctime = timespec_to_timespec64(fattr->ctime); 719 + inode->i_ctime = fattr->ctime; 720 720 else 721 721 nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE 722 722 | NFS_INO_INVALID_CTIME); ··· 725 725 NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_MTIME 726 726 | NFS_INO_INVALID_CTIME); 727 727 if (fattr->valid & NFS_ATTR_FATTR_MTIME) 728 - inode->i_mtime = timespec_to_timespec64(fattr->mtime); 728 + inode->i_mtime = fattr->mtime; 729 729 else if (attr->ia_valid & ATTR_MTIME_SET) 730 730 inode->i_mtime = attr->ia_mtime; 731 731 else 732 732 nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); 733 733 734 734 if (fattr->valid & NFS_ATTR_FATTR_CTIME) 735 - inode->i_ctime = timespec_to_timespec64(fattr->ctime); 735 + inode->i_ctime = fattr->ctime; 736 736 else 737 737 nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE 738 738 | NFS_INO_INVALID_CTIME); ··· 1351 1351 1352 1352 static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) 1353 1353 { 1354 - struct timespec ts; 1354 + struct timespec64 ts; 1355 1355 1356 1356 if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE) 1357 1357 && (fattr->valid & NFS_ATTR_FATTR_CHANGE) ··· 1361 1361 nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA); 1362 1362 } 1363 1363 /* If we have atomic WCC data, we may update some attributes */ 1364 - ts = timespec64_to_timespec(inode->i_ctime); 1364 + ts = inode->i_ctime; 1365 1365 if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME) 1366 1366 && (fattr->valid & NFS_ATTR_FATTR_CTIME) 1367 - && timespec_equal(&ts, &fattr->pre_ctime)) { 1368 - inode->i_ctime = timespec_to_timespec64(fattr->ctime); 1367 + && timespec64_equal(&ts, &fattr->pre_ctime)) { 1368 + inode->i_ctime = fattr->ctime; 1369 1369 } 1370 1370 1371 - ts = timespec64_to_timespec(inode->i_mtime); 1371 + ts = inode->i_mtime; 1372 1372 if ((fattr->valid & NFS_ATTR_FATTR_PREMTIME) 1373 1373 && (fattr->valid & NFS_ATTR_FATTR_MTIME) 1374 - && timespec_equal(&ts, &fattr->pre_mtime)) { 1375 - inode->i_mtime = timespec_to_timespec64(fattr->mtime); 1374 + && timespec64_equal(&ts, &fattr->pre_mtime)) { 1375 + inode->i_mtime = fattr->mtime; 1376 1376 if (S_ISDIR(inode->i_mode)) 1377 1377 nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA); 1378 1378 } ··· 1398 1398 struct nfs_inode *nfsi = NFS_I(inode); 1399 1399 loff_t cur_size, new_isize; 1400 1400 unsigned long invalid = 0; 1401 - struct timespec ts; 1401 + struct timespec64 ts; 1402 1402 1403 1403 if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) 1404 1404 return 0; ··· 1425 1425 invalid |= NFS_INO_INVALID_CHANGE 1426 1426 | NFS_INO_REVAL_PAGECACHE; 1427 1427 1428 - ts = timespec64_to_timespec(inode->i_mtime); 1429 - if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&ts, &fattr->mtime)) 1428 + ts = inode->i_mtime; 1429 + if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec64_equal(&ts, &fattr->mtime)) 1430 1430 invalid |= NFS_INO_INVALID_MTIME; 1431 1431 1432 - ts = timespec64_to_timespec(inode->i_ctime); 1433 - if ((fattr->valid & NFS_ATTR_FATTR_CTIME) && !timespec_equal(&ts, &fattr->ctime)) 1432 + ts = inode->i_ctime; 1433 + if ((fattr->valid & NFS_ATTR_FATTR_CTIME) && !timespec64_equal(&ts, &fattr->ctime)) 1434 1434 invalid |= NFS_INO_INVALID_CTIME; 1435 1435 1436 1436 if (fattr->valid & NFS_ATTR_FATTR_SIZE) { ··· 1460 1460 if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink) 1461 1461 invalid |= NFS_INO_INVALID_OTHER; 1462 1462 1463 - ts = timespec64_to_timespec(inode->i_atime); 1464 - if ((fattr->valid & NFS_ATTR_FATTR_ATIME) && !timespec_equal(&ts, &fattr->atime)) 1463 + ts = inode->i_atime; 1464 + if ((fattr->valid & NFS_ATTR_FATTR_ATIME) && !timespec64_equal(&ts, &fattr->atime)) 1465 1465 invalid |= NFS_INO_INVALID_ATIME; 1466 1466 1467 1467 if (invalid != 0) ··· 1733 1733 } 1734 1734 if ((fattr->valid & NFS_ATTR_FATTR_CTIME) != 0 && 1735 1735 (fattr->valid & NFS_ATTR_FATTR_PRECTIME) == 0) { 1736 - fattr->pre_ctime = timespec64_to_timespec(inode->i_ctime); 1736 + fattr->pre_ctime = inode->i_ctime; 1737 1737 fattr->valid |= NFS_ATTR_FATTR_PRECTIME; 1738 1738 } 1739 1739 if ((fattr->valid & NFS_ATTR_FATTR_MTIME) != 0 && 1740 1740 (fattr->valid & NFS_ATTR_FATTR_PREMTIME) == 0) { 1741 - fattr->pre_mtime = timespec64_to_timespec(inode->i_mtime); 1741 + fattr->pre_mtime = inode->i_mtime; 1742 1742 fattr->valid |= NFS_ATTR_FATTR_PREMTIME; 1743 1743 } 1744 1744 if ((fattr->valid & NFS_ATTR_FATTR_SIZE) != 0 && ··· 1899 1899 } 1900 1900 1901 1901 if (fattr->valid & NFS_ATTR_FATTR_MTIME) { 1902 - inode->i_mtime = timespec_to_timespec64(fattr->mtime); 1902 + inode->i_mtime = fattr->mtime; 1903 1903 } else if (server->caps & NFS_CAP_MTIME) { 1904 1904 nfsi->cache_validity |= save_cache_validity & 1905 1905 (NFS_INO_INVALID_MTIME ··· 1908 1908 } 1909 1909 1910 1910 if (fattr->valid & NFS_ATTR_FATTR_CTIME) { 1911 - inode->i_ctime = timespec_to_timespec64(fattr->ctime); 1911 + inode->i_ctime = fattr->ctime; 1912 1912 } else if (server->caps & NFS_CAP_CTIME) { 1913 1913 nfsi->cache_validity |= save_cache_validity & 1914 1914 (NFS_INO_INVALID_CTIME ··· 1946 1946 1947 1947 1948 1948 if (fattr->valid & NFS_ATTR_FATTR_ATIME) 1949 - inode->i_atime = timespec_to_timespec64(fattr->atime); 1949 + inode->i_atime = fattr->atime; 1950 1950 else if (server->caps & NFS_CAP_ATIME) { 1951 1951 nfsi->cache_validity |= save_cache_validity & 1952 1952 (NFS_INO_INVALID_ATIME
+1 -1
fs/nfs/internal.h
··· 713 713 * 1024*1024*1024. 714 714 */ 715 715 static inline 716 - u64 nfs_timespec_to_change_attr(const struct timespec *ts) 716 + u64 nfs_timespec_to_change_attr(const struct timespec64 *ts) 717 717 { 718 718 return ((u64)ts->tv_sec << 30) + ts->tv_nsec; 719 719 }
+3
fs/nfs/namespace.c
··· 157 157 if (IS_ERR(mnt)) 158 158 goto out; 159 159 160 + if (nfs_mountpoint_expiry_timeout < 0) 161 + goto out; 162 + 160 163 mntget(mnt); /* prevent immediate expiration */ 161 164 mnt_set_expiry(mnt, &nfs_automount_list); 162 165 schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout);
+8 -13
fs/nfs/nfs2xdr.c
··· 209 209 * unsigned int useconds; 210 210 * }; 211 211 */ 212 - static __be32 *xdr_encode_time(__be32 *p, const struct timespec *timep) 212 + static __be32 *xdr_encode_time(__be32 *p, const struct timespec64 *timep) 213 213 { 214 - *p++ = cpu_to_be32(timep->tv_sec); 214 + *p++ = cpu_to_be32((u32)timep->tv_sec); 215 215 if (timep->tv_nsec != 0) 216 216 *p++ = cpu_to_be32(timep->tv_nsec / NSEC_PER_USEC); 217 217 else ··· 227 227 * Illustrated" by Brent Callaghan, Addison-Wesley, ISBN 0-201-32750-5. 228 228 */ 229 229 static __be32 *xdr_encode_current_server_time(__be32 *p, 230 - const struct timespec *timep) 230 + const struct timespec64 *timep) 231 231 { 232 232 *p++ = cpu_to_be32(timep->tv_sec); 233 233 *p++ = cpu_to_be32(1000000); 234 234 return p; 235 235 } 236 236 237 - static __be32 *xdr_decode_time(__be32 *p, struct timespec *timep) 237 + static __be32 *xdr_decode_time(__be32 *p, struct timespec64 *timep) 238 238 { 239 239 timep->tv_sec = be32_to_cpup(p++); 240 240 timep->tv_nsec = be32_to_cpup(p++) * NSEC_PER_USEC; ··· 339 339 static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr, 340 340 struct user_namespace *userns) 341 341 { 342 - struct timespec ts; 343 342 __be32 *p; 344 343 345 344 p = xdr_reserve_space(xdr, NFS_sattr_sz << 2); ··· 361 362 *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET); 362 363 363 364 if (attr->ia_valid & ATTR_ATIME_SET) { 364 - ts = timespec64_to_timespec(attr->ia_atime); 365 - p = xdr_encode_time(p, &ts); 365 + p = xdr_encode_time(p, &attr->ia_atime); 366 366 } else if (attr->ia_valid & ATTR_ATIME) { 367 - ts = timespec64_to_timespec(attr->ia_atime); 368 - p = xdr_encode_current_server_time(p, &ts); 367 + p = xdr_encode_current_server_time(p, &attr->ia_atime); 369 368 } else 370 369 p = xdr_time_not_set(p); 371 370 if (attr->ia_valid & ATTR_MTIME_SET) { 372 - ts = timespec64_to_timespec(attr->ia_atime); 373 - xdr_encode_time(p, &ts); 371 + xdr_encode_time(p, &attr->ia_mtime); 374 372 } else if (attr->ia_valid & ATTR_MTIME) { 375 - ts = timespec64_to_timespec(attr->ia_mtime); 376 - xdr_encode_current_server_time(p, &ts); 373 + xdr_encode_current_server_time(p, &attr->ia_mtime); 377 374 } else 378 375 xdr_time_not_set(p); 379 376 }
+4 -1
fs/nfs/nfs3client.c
··· 106 106 cl_init.nconnect = mds_clp->cl_nconnect; 107 107 108 108 if (mds_srv->flags & NFS_MOUNT_NORESVPORT) 109 - set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); 109 + __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); 110 + 111 + __set_bit(NFS_CS_NOPING, &cl_init.init_flags); 112 + __set_bit(NFS_CS_DS, &cl_init.init_flags); 110 113 111 114 /* Use the MDS nfs_client cl_ipaddr. */ 112 115 nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans);
+5 -9
fs/nfs/nfs3xdr.c
··· 456 456 * uint32 nseconds; 457 457 * }; 458 458 */ 459 - static __be32 *xdr_encode_nfstime3(__be32 *p, const struct timespec *timep) 459 + static __be32 *xdr_encode_nfstime3(__be32 *p, const struct timespec64 *timep) 460 460 { 461 - *p++ = cpu_to_be32(timep->tv_sec); 461 + *p++ = cpu_to_be32((u32)timep->tv_sec); 462 462 *p++ = cpu_to_be32(timep->tv_nsec); 463 463 return p; 464 464 } 465 465 466 - static __be32 *xdr_decode_nfstime3(__be32 *p, struct timespec *timep) 466 + static __be32 *xdr_decode_nfstime3(__be32 *p, struct timespec64 *timep) 467 467 { 468 468 timep->tv_sec = be32_to_cpup(p++); 469 469 timep->tv_nsec = be32_to_cpup(p++); ··· 533 533 static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr, 534 534 struct user_namespace *userns) 535 535 { 536 - struct timespec ts; 537 536 u32 nbytes; 538 537 __be32 *p; 539 538 ··· 582 583 *p++ = xdr_zero; 583 584 584 585 if (attr->ia_valid & ATTR_ATIME_SET) { 585 - struct timespec ts; 586 586 *p++ = xdr_two; 587 - ts = timespec64_to_timespec(attr->ia_atime); 588 - p = xdr_encode_nfstime3(p, &ts); 587 + p = xdr_encode_nfstime3(p, &attr->ia_atime); 589 588 } else if (attr->ia_valid & ATTR_ATIME) { 590 589 *p++ = xdr_one; 591 590 } else ··· 591 594 592 595 if (attr->ia_valid & ATTR_MTIME_SET) { 593 596 *p++ = xdr_two; 594 - ts = timespec64_to_timespec(attr->ia_mtime); 595 - xdr_encode_nfstime3(p, &ts); 597 + xdr_encode_nfstime3(p, &attr->ia_mtime); 596 598 } else if (attr->ia_valid & ATTR_MTIME) { 597 599 *p = xdr_one; 598 600 } else
+14 -1
fs/nfs/nfs42.h
··· 13 13 #define PNFS_LAYOUTSTATS_MAXDEV (4) 14 14 15 15 /* nfs4.2proc.c */ 16 + #ifdef CONFIG_NFS_V4_2 16 17 int nfs42_proc_allocate(struct file *, loff_t, loff_t); 17 - ssize_t nfs42_proc_copy(struct file *, loff_t, struct file *, loff_t, size_t); 18 + ssize_t nfs42_proc_copy(struct file *, loff_t, struct file *, loff_t, size_t, 19 + struct nl4_server *, nfs4_stateid *, bool); 18 20 int nfs42_proc_deallocate(struct file *, loff_t, loff_t); 19 21 loff_t nfs42_proc_llseek(struct file *, loff_t, int); 20 22 int nfs42_proc_layoutstats_generic(struct nfs_server *, ··· 25 23 int nfs42_proc_layouterror(struct pnfs_layout_segment *lseg, 26 24 const struct nfs42_layout_error *errors, 27 25 size_t n); 26 + int nfs42_proc_copy_notify(struct file *, struct file *, 27 + struct nfs42_copy_notify_res *); 28 + static inline bool nfs42_files_from_same_server(struct file *in, 29 + struct file *out) 30 + { 31 + struct nfs_client *c_in = (NFS_SERVER(file_inode(in)))->nfs_client; 32 + struct nfs_client *c_out = (NFS_SERVER(file_inode(out)))->nfs_client; 28 33 34 + return nfs4_check_serverowner_major_id(c_in->cl_serverowner, 35 + c_out->cl_serverowner); 36 + } 37 + #endif /* CONFIG_NFS_V4_2 */ 29 38 #endif /* __LINUX_FS_NFS_NFS4_2_H */
+168 -33
fs/nfs/nfs42proc.c
··· 3 3 * Copyright (c) 2014 Anna Schumaker <Anna.Schumaker@Netapp.com> 4 4 */ 5 5 #include <linux/fs.h> 6 + #include <linux/sunrpc/addr.h> 6 7 #include <linux/sunrpc/sched.h> 7 8 #include <linux/nfs.h> 8 9 #include <linux/nfs3.h> ··· 16 15 #include "pnfs.h" 17 16 #include "nfs4session.h" 18 17 #include "internal.h" 18 + #include "delegation.h" 19 19 20 20 #define NFSDBG_FACILITY NFSDBG_PROC 21 21 static int nfs42_do_offload_cancel_async(struct file *dst, nfs4_stateid *std); 22 + 23 + static void nfs42_set_netaddr(struct file *filep, struct nfs42_netaddr *naddr) 24 + { 25 + struct nfs_client *clp = (NFS_SERVER(file_inode(filep)))->nfs_client; 26 + unsigned short port = 2049; 27 + 28 + rcu_read_lock(); 29 + naddr->netid_len = scnprintf(naddr->netid, 30 + sizeof(naddr->netid), "%s", 31 + rpc_peeraddr2str(clp->cl_rpcclient, 32 + RPC_DISPLAY_NETID)); 33 + naddr->addr_len = scnprintf(naddr->addr, 34 + sizeof(naddr->addr), 35 + "%s.%u.%u", 36 + rpc_peeraddr2str(clp->cl_rpcclient, 37 + RPC_DISPLAY_ADDR), 38 + port >> 8, port & 255); 39 + rcu_read_unlock(); 40 + } 22 41 23 42 static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, 24 43 struct nfs_lock_context *lock, loff_t offset, loff_t len) ··· 49 28 .falloc_fh = NFS_FH(inode), 50 29 .falloc_offset = offset, 51 30 .falloc_length = len, 52 - .falloc_bitmask = server->cache_consistency_bitmask, 31 + .falloc_bitmask = nfs4_fattr_bitmap, 53 32 }; 54 33 struct nfs42_falloc_res res = { 55 34 .falloc_server = server, ··· 153 132 } 154 133 155 134 static int handle_async_copy(struct nfs42_copy_res *res, 156 - struct nfs_server *server, 135 + struct nfs_server *dst_server, 136 + struct nfs_server *src_server, 157 137 struct file *src, 158 138 struct file *dst, 159 - nfs4_stateid *src_stateid) 139 + nfs4_stateid *src_stateid, 140 + bool *restart) 160 141 { 161 142 struct nfs4_copy_state *copy, *tmp_copy; 162 143 int status = NFS4_OK; 163 144 bool found_pending = false; 164 - struct nfs_open_context *ctx = nfs_file_open_context(dst); 145 + struct nfs_open_context *dst_ctx = nfs_file_open_context(dst); 146 + struct nfs_open_context *src_ctx = nfs_file_open_context(src); 165 147 166 148 copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS); 167 149 if (!copy) 168 150 return -ENOMEM; 169 151 170 - spin_lock(&server->nfs_client->cl_lock); 171 - list_for_each_entry(tmp_copy, &server->nfs_client->pending_cb_stateids, 152 + spin_lock(&dst_server->nfs_client->cl_lock); 153 + list_for_each_entry(tmp_copy, 154 + &dst_server->nfs_client->pending_cb_stateids, 172 155 copies) { 173 156 if (memcmp(&res->write_res.stateid, &tmp_copy->stateid, 174 157 NFS4_STATEID_SIZE)) ··· 182 157 break; 183 158 } 184 159 if (found_pending) { 185 - spin_unlock(&server->nfs_client->cl_lock); 160 + spin_unlock(&dst_server->nfs_client->cl_lock); 186 161 kfree(copy); 187 162 copy = tmp_copy; 188 163 goto out; ··· 190 165 191 166 memcpy(&copy->stateid, &res->write_res.stateid, NFS4_STATEID_SIZE); 192 167 init_completion(&copy->completion); 193 - copy->parent_state = ctx->state; 168 + copy->parent_dst_state = dst_ctx->state; 169 + copy->parent_src_state = src_ctx->state; 194 170 195 - list_add_tail(&copy->copies, &server->ss_copies); 196 - spin_unlock(&server->nfs_client->cl_lock); 171 + list_add_tail(&copy->copies, &dst_server->ss_copies); 172 + spin_unlock(&dst_server->nfs_client->cl_lock); 173 + 174 + if (dst_server != src_server) { 175 + spin_lock(&src_server->nfs_client->cl_lock); 176 + list_add_tail(&copy->src_copies, &src_server->ss_copies); 177 + spin_unlock(&src_server->nfs_client->cl_lock); 178 + } 197 179 198 180 status = wait_for_completion_interruptible(&copy->completion); 199 - spin_lock(&server->nfs_client->cl_lock); 181 + spin_lock(&dst_server->nfs_client->cl_lock); 200 182 list_del_init(&copy->copies); 201 - spin_unlock(&server->nfs_client->cl_lock); 183 + spin_unlock(&dst_server->nfs_client->cl_lock); 184 + if (dst_server != src_server) { 185 + spin_lock(&src_server->nfs_client->cl_lock); 186 + list_del_init(&copy->src_copies); 187 + spin_unlock(&src_server->nfs_client->cl_lock); 188 + } 202 189 if (status == -ERESTARTSYS) { 203 190 goto out_cancel; 204 - } else if (copy->flags) { 191 + } else if (copy->flags || copy->error == NFS4ERR_PARTNER_NO_AUTH) { 205 192 status = -EAGAIN; 193 + *restart = true; 206 194 goto out_cancel; 207 195 } 208 196 out: ··· 223 185 memcpy(&res->write_res.verifier, &copy->verf, sizeof(copy->verf)); 224 186 status = -copy->error; 225 187 188 + out_free: 226 189 kfree(copy); 227 190 return status; 228 191 out_cancel: 229 192 nfs42_do_offload_cancel_async(dst, &copy->stateid); 230 - kfree(copy); 231 - return status; 193 + if (!nfs42_files_from_same_server(src, dst)) 194 + nfs42_do_offload_cancel_async(src, src_stateid); 195 + goto out_free; 232 196 } 233 197 234 198 static int process_copy_commit(struct file *dst, loff_t pos_dst, ··· 262 222 struct file *dst, 263 223 struct nfs_lock_context *dst_lock, 264 224 struct nfs42_copy_args *args, 265 - struct nfs42_copy_res *res) 225 + struct nfs42_copy_res *res, 226 + struct nl4_server *nss, 227 + nfs4_stateid *cnr_stateid, 228 + bool *restart) 266 229 { 267 230 struct rpc_message msg = { 268 231 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COPY], ··· 273 230 .rpc_resp = res, 274 231 }; 275 232 struct inode *dst_inode = file_inode(dst); 276 - struct nfs_server *server = NFS_SERVER(dst_inode); 233 + struct inode *src_inode = file_inode(src); 234 + struct nfs_server *dst_server = NFS_SERVER(dst_inode); 235 + struct nfs_server *src_server = NFS_SERVER(src_inode); 277 236 loff_t pos_src = args->src_pos; 278 237 loff_t pos_dst = args->dst_pos; 279 238 size_t count = args->count; 280 239 ssize_t status; 281 240 282 - status = nfs4_set_rw_stateid(&args->src_stateid, src_lock->open_context, 283 - src_lock, FMODE_READ); 284 - if (status) 285 - return status; 286 - 241 + if (nss) { 242 + args->cp_src = nss; 243 + nfs4_stateid_copy(&args->src_stateid, cnr_stateid); 244 + } else { 245 + status = nfs4_set_rw_stateid(&args->src_stateid, 246 + src_lock->open_context, src_lock, FMODE_READ); 247 + if (status) 248 + return status; 249 + } 287 250 status = nfs_filemap_write_and_wait_range(file_inode(src)->i_mapping, 288 251 pos_src, pos_src + (loff_t)count - 1); 289 252 if (status) ··· 311 262 if (!res->commit_res.verf) 312 263 return -ENOMEM; 313 264 } 265 + set_bit(NFS_CLNT_SRC_SSC_COPY_STATE, 266 + &src_lock->open_context->state->flags); 314 267 set_bit(NFS_CLNT_DST_SSC_COPY_STATE, 315 268 &dst_lock->open_context->state->flags); 316 269 317 - status = nfs4_call_sync(server->client, server, &msg, 270 + status = nfs4_call_sync(dst_server->client, dst_server, &msg, 318 271 &args->seq_args, &res->seq_res, 0); 319 272 if (status == -ENOTSUPP) 320 - server->caps &= ~NFS_CAP_COPY; 273 + dst_server->caps &= ~NFS_CAP_COPY; 321 274 if (status) 322 275 goto out; 323 276 ··· 331 280 } 332 281 333 282 if (!res->synchronous) { 334 - status = handle_async_copy(res, server, src, dst, 335 - &args->src_stateid); 283 + status = handle_async_copy(res, dst_server, src_server, src, 284 + dst, &args->src_stateid, restart); 336 285 if (status) 337 286 return status; 338 287 } ··· 355 304 } 356 305 357 306 ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src, 358 - struct file *dst, loff_t pos_dst, 359 - size_t count) 307 + struct file *dst, loff_t pos_dst, size_t count, 308 + struct nl4_server *nss, 309 + nfs4_stateid *cnr_stateid, bool sync) 360 310 { 361 311 struct nfs_server *server = NFS_SERVER(file_inode(dst)); 362 312 struct nfs_lock_context *src_lock; ··· 368 316 .dst_fh = NFS_FH(file_inode(dst)), 369 317 .dst_pos = pos_dst, 370 318 .count = count, 371 - .sync = false, 319 + .sync = sync, 372 320 }; 373 321 struct nfs42_copy_res res; 374 322 struct nfs4_exception src_exception = { ··· 380 328 .stateid = &args.dst_stateid, 381 329 }; 382 330 ssize_t err, err2; 331 + bool restart = false; 383 332 384 333 src_lock = nfs_get_lock_context(nfs_file_open_context(src)); 385 334 if (IS_ERR(src_lock)) ··· 400 347 inode_lock(file_inode(dst)); 401 348 err = _nfs42_proc_copy(src, src_lock, 402 349 dst, dst_lock, 403 - &args, &res); 350 + &args, &res, 351 + nss, cnr_stateid, &restart); 404 352 inode_unlock(file_inode(dst)); 405 353 406 354 if (err >= 0) 407 355 break; 408 - if (err == -ENOTSUPP) { 356 + if (err == -ENOTSUPP && 357 + nfs42_files_from_same_server(src, dst)) { 409 358 err = -EOPNOTSUPP; 410 359 break; 411 360 } else if (err == -EAGAIN) { 412 - dst_exception.retry = 1; 413 - continue; 361 + if (!restart) { 362 + dst_exception.retry = 1; 363 + continue; 364 + } 365 + break; 414 366 } else if (err == -NFS4ERR_OFFLOAD_NO_REQS && !args.sync) { 415 367 args.sync = true; 416 368 dst_exception.retry = 1; 417 369 continue; 370 + } else if ((err == -ESTALE || 371 + err == -NFS4ERR_OFFLOAD_DENIED || 372 + err == -ENOTSUPP) && 373 + !nfs42_files_from_same_server(src, dst)) { 374 + nfs42_do_offload_cancel_async(src, &args.src_stateid); 375 + err = -EOPNOTSUPP; 376 + break; 418 377 } 419 378 420 379 err2 = nfs4_handle_exception(server, err, &src_exception); ··· 521 456 if (status == -ENOTSUPP) 522 457 dst_server->caps &= ~NFS_CAP_OFFLOAD_CANCEL; 523 458 rpc_put_task(task); 459 + return status; 460 + } 461 + 462 + static int _nfs42_proc_copy_notify(struct file *src, struct file *dst, 463 + struct nfs42_copy_notify_args *args, 464 + struct nfs42_copy_notify_res *res) 465 + { 466 + struct nfs_server *src_server = NFS_SERVER(file_inode(src)); 467 + struct rpc_message msg = { 468 + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COPY_NOTIFY], 469 + .rpc_argp = args, 470 + .rpc_resp = res, 471 + }; 472 + int status; 473 + struct nfs_open_context *ctx; 474 + struct nfs_lock_context *l_ctx; 475 + 476 + ctx = get_nfs_open_context(nfs_file_open_context(src)); 477 + l_ctx = nfs_get_lock_context(ctx); 478 + if (IS_ERR(l_ctx)) 479 + return PTR_ERR(l_ctx); 480 + 481 + status = nfs4_set_rw_stateid(&args->cna_src_stateid, ctx, l_ctx, 482 + FMODE_READ); 483 + nfs_put_lock_context(l_ctx); 484 + if (status) 485 + return status; 486 + 487 + status = nfs4_call_sync(src_server->client, src_server, &msg, 488 + &args->cna_seq_args, &res->cnr_seq_res, 0); 489 + if (status == -ENOTSUPP) 490 + src_server->caps &= ~NFS_CAP_COPY_NOTIFY; 491 + 492 + put_nfs_open_context(nfs_file_open_context(src)); 493 + return status; 494 + } 495 + 496 + int nfs42_proc_copy_notify(struct file *src, struct file *dst, 497 + struct nfs42_copy_notify_res *res) 498 + { 499 + struct nfs_server *src_server = NFS_SERVER(file_inode(src)); 500 + struct nfs42_copy_notify_args *args; 501 + struct nfs4_exception exception = { 502 + .inode = file_inode(src), 503 + }; 504 + int status; 505 + 506 + if (!(src_server->caps & NFS_CAP_COPY_NOTIFY)) 507 + return -EOPNOTSUPP; 508 + 509 + args = kzalloc(sizeof(struct nfs42_copy_notify_args), GFP_NOFS); 510 + if (args == NULL) 511 + return -ENOMEM; 512 + 513 + args->cna_src_fh = NFS_FH(file_inode(src)), 514 + args->cna_dst.nl4_type = NL4_NETADDR; 515 + nfs42_set_netaddr(dst, &args->cna_dst.u.nl4_addr); 516 + exception.stateid = &args->cna_src_stateid; 517 + 518 + do { 519 + status = _nfs42_proc_copy_notify(src, dst, args, res); 520 + if (status == -ENOTSUPP) { 521 + status = -EOPNOTSUPP; 522 + goto out; 523 + } 524 + status = nfs4_handle_exception(src_server, status, &exception); 525 + } while (exception.retry); 526 + 527 + out: 528 + kfree(args); 524 529 return status; 525 530 } 526 531
+188 -2
fs/nfs/nfs42xdr.c
··· 21 21 #define encode_copy_maxsz (op_encode_hdr_maxsz + \ 22 22 XDR_QUADLEN(NFS4_STATEID_SIZE) + \ 23 23 XDR_QUADLEN(NFS4_STATEID_SIZE) + \ 24 - 2 + 2 + 2 + 1 + 1 + 1) 24 + 2 + 2 + 2 + 1 + 1 + 1 +\ 25 + 1 + /* One cnr_source_server */\ 26 + 1 + /* nl4_type */ \ 27 + 1 + XDR_QUADLEN(NFS4_OPAQUE_LIMIT)) 25 28 #define decode_copy_maxsz (op_decode_hdr_maxsz + \ 26 29 NFS42_WRITE_RES_SIZE + \ 27 30 1 /* cr_consecutive */ + \ ··· 32 29 #define encode_offload_cancel_maxsz (op_encode_hdr_maxsz + \ 33 30 XDR_QUADLEN(NFS4_STATEID_SIZE)) 34 31 #define decode_offload_cancel_maxsz (op_decode_hdr_maxsz) 32 + #define encode_copy_notify_maxsz (op_encode_hdr_maxsz + \ 33 + XDR_QUADLEN(NFS4_STATEID_SIZE) + \ 34 + 1 + /* nl4_type */ \ 35 + 1 + XDR_QUADLEN(NFS4_OPAQUE_LIMIT)) 36 + #define decode_copy_notify_maxsz (op_decode_hdr_maxsz + \ 37 + 3 + /* cnr_lease_time */\ 38 + XDR_QUADLEN(NFS4_STATEID_SIZE) + \ 39 + 1 + /* Support 1 cnr_source_server */\ 40 + 1 + /* nl4_type */ \ 41 + 1 + XDR_QUADLEN(NFS4_OPAQUE_LIMIT)) 35 42 #define encode_deallocate_maxsz (op_encode_hdr_maxsz + \ 36 43 encode_fallocate_maxsz) 37 44 #define decode_deallocate_maxsz (op_decode_hdr_maxsz) ··· 112 99 decode_sequence_maxsz + \ 113 100 decode_putfh_maxsz + \ 114 101 decode_offload_cancel_maxsz) 102 + #define NFS4_enc_copy_notify_sz (compound_encode_hdr_maxsz + \ 103 + encode_putfh_maxsz + \ 104 + encode_copy_notify_maxsz) 105 + #define NFS4_dec_copy_notify_sz (compound_decode_hdr_maxsz + \ 106 + decode_putfh_maxsz + \ 107 + decode_copy_notify_maxsz) 115 108 #define NFS4_enc_deallocate_sz (compound_encode_hdr_maxsz + \ 116 109 encode_sequence_maxsz + \ 117 110 encode_putfh_maxsz + \ ··· 185 166 encode_fallocate(xdr, args); 186 167 } 187 168 169 + static void encode_nl4_server(struct xdr_stream *xdr, 170 + const struct nl4_server *ns) 171 + { 172 + encode_uint32(xdr, ns->nl4_type); 173 + switch (ns->nl4_type) { 174 + case NL4_NAME: 175 + case NL4_URL: 176 + encode_string(xdr, ns->u.nl4_str_sz, ns->u.nl4_str); 177 + break; 178 + case NL4_NETADDR: 179 + encode_string(xdr, ns->u.nl4_addr.netid_len, 180 + ns->u.nl4_addr.netid); 181 + encode_string(xdr, ns->u.nl4_addr.addr_len, 182 + ns->u.nl4_addr.addr); 183 + break; 184 + default: 185 + WARN_ON_ONCE(1); 186 + } 187 + } 188 + 188 189 static void encode_copy(struct xdr_stream *xdr, 189 190 const struct nfs42_copy_args *args, 190 191 struct compound_hdr *hdr) ··· 219 180 220 181 encode_uint32(xdr, 1); /* consecutive = true */ 221 182 encode_uint32(xdr, args->sync); 222 - encode_uint32(xdr, 0); /* src server list */ 183 + if (args->cp_src == NULL) { /* intra-ssc */ 184 + encode_uint32(xdr, 0); /* no src server list */ 185 + return; 186 + } 187 + encode_uint32(xdr, 1); /* supporting 1 server */ 188 + encode_nl4_server(xdr, args->cp_src); 223 189 } 224 190 225 191 static void encode_offload_cancel(struct xdr_stream *xdr, ··· 233 189 { 234 190 encode_op_hdr(xdr, OP_OFFLOAD_CANCEL, decode_offload_cancel_maxsz, hdr); 235 191 encode_nfs4_stateid(xdr, &args->osa_stateid); 192 + } 193 + 194 + static void encode_copy_notify(struct xdr_stream *xdr, 195 + const struct nfs42_copy_notify_args *args, 196 + struct compound_hdr *hdr) 197 + { 198 + encode_op_hdr(xdr, OP_COPY_NOTIFY, decode_copy_notify_maxsz, hdr); 199 + encode_nfs4_stateid(xdr, &args->cna_src_stateid); 200 + encode_nl4_server(xdr, &args->cna_dst); 236 201 } 237 202 238 203 static void encode_deallocate(struct xdr_stream *xdr, ··· 408 355 } 409 356 410 357 /* 358 + * Encode COPY_NOTIFY request 359 + */ 360 + static void nfs4_xdr_enc_copy_notify(struct rpc_rqst *req, 361 + struct xdr_stream *xdr, 362 + const void *data) 363 + { 364 + const struct nfs42_copy_notify_args *args = data; 365 + struct compound_hdr hdr = { 366 + .minorversion = nfs4_xdr_minorversion(&args->cna_seq_args), 367 + }; 368 + 369 + encode_compound_hdr(xdr, req, &hdr); 370 + encode_sequence(xdr, &args->cna_seq_args, &hdr); 371 + encode_putfh(xdr, args->cna_src_fh, &hdr); 372 + encode_copy_notify(xdr, args, &hdr); 373 + encode_nops(&hdr); 374 + } 375 + 376 + /* 411 377 * Encode DEALLOCATE request 412 378 */ 413 379 static void nfs4_xdr_enc_deallocate(struct rpc_rqst *req, ··· 562 490 return decode_verifier(xdr, &res->verifier.verifier); 563 491 } 564 492 493 + static int decode_nl4_server(struct xdr_stream *xdr, struct nl4_server *ns) 494 + { 495 + struct nfs42_netaddr *naddr; 496 + uint32_t dummy; 497 + char *dummy_str; 498 + __be32 *p; 499 + int status; 500 + 501 + /* nl_type */ 502 + p = xdr_inline_decode(xdr, 4); 503 + if (unlikely(!p)) 504 + return -EIO; 505 + ns->nl4_type = be32_to_cpup(p); 506 + switch (ns->nl4_type) { 507 + case NL4_NAME: 508 + case NL4_URL: 509 + status = decode_opaque_inline(xdr, &dummy, &dummy_str); 510 + if (unlikely(status)) 511 + return status; 512 + if (unlikely(dummy > NFS4_OPAQUE_LIMIT)) 513 + return -EIO; 514 + memcpy(&ns->u.nl4_str, dummy_str, dummy); 515 + ns->u.nl4_str_sz = dummy; 516 + break; 517 + case NL4_NETADDR: 518 + naddr = &ns->u.nl4_addr; 519 + 520 + /* netid string */ 521 + status = decode_opaque_inline(xdr, &dummy, &dummy_str); 522 + if (unlikely(status)) 523 + return status; 524 + if (unlikely(dummy > RPCBIND_MAXNETIDLEN)) 525 + return -EIO; 526 + naddr->netid_len = dummy; 527 + memcpy(naddr->netid, dummy_str, naddr->netid_len); 528 + 529 + /* uaddr string */ 530 + status = decode_opaque_inline(xdr, &dummy, &dummy_str); 531 + if (unlikely(status)) 532 + return status; 533 + if (unlikely(dummy > RPCBIND_MAXUADDRLEN)) 534 + return -EIO; 535 + naddr->addr_len = dummy; 536 + memcpy(naddr->addr, dummy_str, naddr->addr_len); 537 + break; 538 + default: 539 + WARN_ON_ONCE(1); 540 + return -EIO; 541 + } 542 + return 0; 543 + } 544 + 565 545 static int decode_copy_requirements(struct xdr_stream *xdr, 566 546 struct nfs42_copy_res *res) { 567 547 __be32 *p; ··· 651 527 struct nfs42_offload_status_res *res) 652 528 { 653 529 return decode_op_hdr(xdr, OP_OFFLOAD_CANCEL); 530 + } 531 + 532 + static int decode_copy_notify(struct xdr_stream *xdr, 533 + struct nfs42_copy_notify_res *res) 534 + { 535 + __be32 *p; 536 + int status, count; 537 + 538 + status = decode_op_hdr(xdr, OP_COPY_NOTIFY); 539 + if (status) 540 + return status; 541 + /* cnr_lease_time */ 542 + p = xdr_inline_decode(xdr, 12); 543 + if (unlikely(!p)) 544 + return -EIO; 545 + p = xdr_decode_hyper(p, &res->cnr_lease_time.seconds); 546 + res->cnr_lease_time.nseconds = be32_to_cpup(p); 547 + 548 + status = decode_opaque_fixed(xdr, &res->cnr_stateid, NFS4_STATEID_SIZE); 549 + if (unlikely(status)) 550 + return -EIO; 551 + 552 + /* number of source addresses */ 553 + p = xdr_inline_decode(xdr, 4); 554 + if (unlikely(!p)) 555 + return -EIO; 556 + 557 + count = be32_to_cpup(p); 558 + if (count > 1) 559 + pr_warn("NFS: %s: nsvr %d > Supported. Use first servers\n", 560 + __func__, count); 561 + 562 + status = decode_nl4_server(xdr, &res->cnr_src); 563 + if (unlikely(status)) 564 + return -EIO; 565 + return 0; 654 566 } 655 567 656 568 static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res) ··· 811 651 if (status) 812 652 goto out; 813 653 status = decode_offload_cancel(xdr, res); 654 + 655 + out: 656 + return status; 657 + } 658 + 659 + /* 660 + * Decode COPY_NOTIFY response 661 + */ 662 + static int nfs4_xdr_dec_copy_notify(struct rpc_rqst *rqstp, 663 + struct xdr_stream *xdr, 664 + void *data) 665 + { 666 + struct nfs42_copy_notify_res *res = data; 667 + struct compound_hdr hdr; 668 + int status; 669 + 670 + status = decode_compound_hdr(xdr, &hdr); 671 + if (status) 672 + goto out; 673 + status = decode_sequence(xdr, &res->cnr_seq_res, rqstp); 674 + if (status) 675 + goto out; 676 + status = decode_putfh(xdr); 677 + if (status) 678 + goto out; 679 + status = decode_copy_notify(xdr, res); 814 680 815 681 out: 816 682 return status;
+19 -2
fs/nfs/nfs4_fs.h
··· 166 166 NFS_STATE_RECOVERY_FAILED, /* OPEN stateid state recovery failed */ 167 167 NFS_STATE_MAY_NOTIFY_LOCK, /* server may CB_NOTIFY_LOCK */ 168 168 NFS_STATE_CHANGE_WAIT, /* A state changing operation is outstanding */ 169 - #ifdef CONFIG_NFS_V4_2 170 169 NFS_CLNT_DST_SSC_COPY_STATE, /* dst server open state on client*/ 171 - #endif /* CONFIG_NFS_V4_2 */ 170 + NFS_CLNT_SRC_SSC_COPY_STATE, /* src server open state on client*/ 171 + NFS_SRV_SSC_COPY_STATE, /* ssc state on the dst server */ 172 172 }; 173 173 174 174 struct nfs4_state { ··· 311 311 const struct nfs_open_context *ctx, 312 312 const struct nfs_lock_context *l_ctx, 313 313 fmode_t fmode); 314 + extern int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, 315 + struct nfs_fattr *fattr, struct nfs4_label *label, 316 + struct inode *inode); 317 + extern int update_open_stateid(struct nfs4_state *state, 318 + const nfs4_stateid *open_stateid, 319 + const nfs4_stateid *deleg_stateid, 320 + fmode_t fmode); 314 321 315 322 extern int nfs4_proc_get_lease_time(struct nfs_client *clp, 316 323 struct nfs_fsinfo *fsinfo); ··· 452 445 453 446 454 447 /* nfs4state.c */ 448 + extern const nfs4_stateid current_stateid; 449 + 455 450 const struct cred *nfs4_get_clid_cred(struct nfs_client *clp); 456 451 const struct cred *nfs4_get_machine_cred(struct nfs_client *clp); 457 452 const struct cred *nfs4_get_renew_cred(struct nfs_client *clp); ··· 466 457 struct nfs_client **, const struct cred *); 467 458 extern void nfs4_schedule_session_recovery(struct nfs4_session *, int); 468 459 extern void nfs41_notify_server(struct nfs_client *); 460 + bool nfs4_check_serverowner_major_id(struct nfs41_server_owner *o1, 461 + struct nfs41_server_owner *o2); 469 462 #else 470 463 static inline void nfs4_schedule_session_recovery(struct nfs4_session *session, int err) 471 464 { ··· 581 570 static inline bool nfs4_stateid_is_newer(const nfs4_stateid *s1, const nfs4_stateid *s2) 582 571 { 583 572 return (s32)(be32_to_cpu(s1->seqid) - be32_to_cpu(s2->seqid)) > 0; 573 + } 574 + 575 + static inline bool nfs4_stateid_match_or_older(const nfs4_stateid *dst, const nfs4_stateid *src) 576 + { 577 + return nfs4_stateid_match_other(dst, src) && 578 + !(src->seqid && nfs4_stateid_is_newer(dst, src)); 584 579 } 585 580 586 581 static inline void nfs4_stateid_seqid_inc(nfs4_stateid *s1)
+8 -5
fs/nfs/nfs4client.c
··· 629 629 /* 630 630 * Returns true if the server major ids match 631 631 */ 632 - static bool 632 + bool 633 633 nfs4_check_serverowner_major_id(struct nfs41_server_owner *o1, 634 634 struct nfs41_server_owner *o2) 635 635 { ··· 879 879 }; 880 880 struct nfs_client *clp; 881 881 882 - if (minorversion > 0 && proto == XPRT_TRANSPORT_TCP) 882 + if (minorversion == 0) 883 + __set_bit(NFS_CS_REUSEPORT, &cl_init.init_flags); 884 + else if (proto == XPRT_TRANSPORT_TCP) 883 885 cl_init.nconnect = nconnect; 886 + 884 887 if (server->flags & NFS_MOUNT_NORESVPORT) 885 - set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); 888 + __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); 886 889 if (server->options & NFS_OPTION_MIGRATION) 887 - set_bit(NFS_CS_MIGRATION, &cl_init.init_flags); 890 + __set_bit(NFS_CS_MIGRATION, &cl_init.init_flags); 888 891 if (test_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status)) 889 - set_bit(NFS_CS_TSM_POSSIBLE, &cl_init.init_flags); 892 + __set_bit(NFS_CS_TSM_POSSIBLE, &cl_init.init_flags); 890 893 server->port = rpc_get_port(addr); 891 894 892 895 /* Allocate or find a client reference we can use */
+139 -2
fs/nfs/nfs4file.c
··· 133 133 struct file *file_out, loff_t pos_out, 134 134 size_t count, unsigned int flags) 135 135 { 136 + struct nfs42_copy_notify_res *cn_resp = NULL; 137 + struct nl4_server *nss = NULL; 138 + nfs4_stateid *cnrs = NULL; 139 + ssize_t ret; 140 + bool sync = false; 141 + 136 142 /* Only offload copy if superblock is the same */ 137 - if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) 143 + if (file_in->f_op != &nfs4_file_operations) 138 144 return -EXDEV; 139 145 if (!nfs_server_capable(file_inode(file_out), NFS_CAP_COPY)) 140 146 return -EOPNOTSUPP; 141 147 if (file_inode(file_in) == file_inode(file_out)) 142 148 return -EOPNOTSUPP; 143 - return nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count); 149 + /* if the copy size if smaller than 2 RPC payloads, make it 150 + * synchronous 151 + */ 152 + if (count <= 2 * NFS_SERVER(file_inode(file_in))->rsize) 153 + sync = true; 154 + retry: 155 + if (!nfs42_files_from_same_server(file_in, file_out)) { 156 + /* for inter copy, if copy size if smaller than 12 RPC 157 + * payloads, fallback to traditional copy. There are 158 + * 14 RPCs during an NFSv4.x mount between source/dest 159 + * servers. 160 + */ 161 + if (sync || 162 + count <= 14 * NFS_SERVER(file_inode(file_in))->rsize) 163 + return -EOPNOTSUPP; 164 + cn_resp = kzalloc(sizeof(struct nfs42_copy_notify_res), 165 + GFP_NOFS); 166 + if (unlikely(cn_resp == NULL)) 167 + return -ENOMEM; 168 + 169 + ret = nfs42_proc_copy_notify(file_in, file_out, cn_resp); 170 + if (ret) { 171 + ret = -EOPNOTSUPP; 172 + goto out; 173 + } 174 + nss = &cn_resp->cnr_src; 175 + cnrs = &cn_resp->cnr_stateid; 176 + } 177 + ret = nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count, 178 + nss, cnrs, sync); 179 + out: 180 + if (!nfs42_files_from_same_server(file_in, file_out)) 181 + kfree(cn_resp); 182 + if (ret == -EAGAIN) 183 + goto retry; 184 + return ret; 144 185 } 145 186 146 187 static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in, ··· 304 263 out: 305 264 return ret < 0 ? ret : count; 306 265 } 266 + 267 + static int read_name_gen = 1; 268 + #define SSC_READ_NAME_BODY "ssc_read_%d" 269 + 270 + struct file * 271 + nfs42_ssc_open(struct vfsmount *ss_mnt, struct nfs_fh *src_fh, 272 + nfs4_stateid *stateid) 273 + { 274 + struct nfs_fattr fattr; 275 + struct file *filep, *res; 276 + struct nfs_server *server; 277 + struct inode *r_ino = NULL; 278 + struct nfs_open_context *ctx; 279 + struct nfs4_state_owner *sp; 280 + char *read_name = NULL; 281 + int len, status = 0; 282 + 283 + server = NFS_SERVER(ss_mnt->mnt_root->d_inode); 284 + 285 + nfs_fattr_init(&fattr); 286 + 287 + status = nfs4_proc_getattr(server, src_fh, &fattr, NULL, NULL); 288 + if (status < 0) { 289 + res = ERR_PTR(status); 290 + goto out; 291 + } 292 + 293 + res = ERR_PTR(-ENOMEM); 294 + len = strlen(SSC_READ_NAME_BODY) + 16; 295 + read_name = kzalloc(len, GFP_NOFS); 296 + if (read_name == NULL) 297 + goto out; 298 + snprintf(read_name, len, SSC_READ_NAME_BODY, read_name_gen++); 299 + 300 + r_ino = nfs_fhget(ss_mnt->mnt_root->d_inode->i_sb, src_fh, &fattr, 301 + NULL); 302 + if (IS_ERR(r_ino)) { 303 + res = ERR_CAST(r_ino); 304 + goto out_free_name; 305 + } 306 + 307 + filep = alloc_file_pseudo(r_ino, ss_mnt, read_name, FMODE_READ, 308 + r_ino->i_fop); 309 + if (IS_ERR(filep)) { 310 + res = ERR_CAST(filep); 311 + goto out_free_name; 312 + } 313 + filep->f_mode |= FMODE_READ; 314 + 315 + ctx = alloc_nfs_open_context(filep->f_path.dentry, filep->f_mode, 316 + filep); 317 + if (IS_ERR(ctx)) { 318 + res = ERR_CAST(ctx); 319 + goto out_filep; 320 + } 321 + 322 + res = ERR_PTR(-EINVAL); 323 + sp = nfs4_get_state_owner(server, ctx->cred, GFP_KERNEL); 324 + if (sp == NULL) 325 + goto out_ctx; 326 + 327 + ctx->state = nfs4_get_open_state(r_ino, sp); 328 + if (ctx->state == NULL) 329 + goto out_stateowner; 330 + 331 + set_bit(NFS_SRV_SSC_COPY_STATE, &ctx->state->flags); 332 + set_bit(NFS_OPEN_STATE, &ctx->state->flags); 333 + memcpy(&ctx->state->open_stateid.other, &stateid->other, 334 + NFS4_STATEID_OTHER_SIZE); 335 + update_open_stateid(ctx->state, stateid, NULL, filep->f_mode); 336 + 337 + nfs_file_set_open_context(filep, ctx); 338 + put_nfs_open_context(ctx); 339 + 340 + file_ra_state_init(&filep->f_ra, filep->f_mapping->host->i_mapping); 341 + res = filep; 342 + out_free_name: 343 + kfree(read_name); 344 + out: 345 + return res; 346 + out_stateowner: 347 + nfs4_put_state_owner(sp); 348 + out_ctx: 349 + put_nfs_open_context(ctx); 350 + out_filep: 351 + fput(filep); 352 + goto out_free_name; 353 + } 354 + EXPORT_SYMBOL_GPL(nfs42_ssc_open); 355 + void nfs42_ssc_close(struct file *filep) 356 + { 357 + struct nfs_open_context *ctx = nfs_file_open_context(filep); 358 + 359 + ctx->state->flags = 0; 360 + } 361 + EXPORT_SYMBOL_GPL(nfs42_ssc_close); 307 362 #endif /* CONFIG_NFS_V4_2 */ 308 363 309 364 const struct file_operations nfs4_file_operations = {
+46 -23
fs/nfs/nfs4proc.c
··· 91 91 static int _nfs4_recover_proc_open(struct nfs4_opendata *data); 92 92 static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); 93 93 static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); 94 - static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label, struct inode *inode); 95 94 static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label, struct inode *inode); 96 95 static int nfs4_do_setattr(struct inode *inode, const struct cred *cred, 97 96 struct nfs_fattr *fattr, struct iattr *sattr, ··· 475 476 case -NFS4ERR_ADMIN_REVOKED: 476 477 case -NFS4ERR_EXPIRED: 477 478 case -NFS4ERR_BAD_STATEID: 479 + case -NFS4ERR_PARTNER_NO_AUTH: 478 480 if (inode != NULL && stateid != NULL) { 479 481 nfs_inode_find_state_and_recover(inode, 480 482 stateid); ··· 521 521 case -NFS4ERR_DEADSESSION: 522 522 case -NFS4ERR_SEQ_FALSE_RETRY: 523 523 case -NFS4ERR_SEQ_MISORDERED: 524 - dprintk("%s ERROR: %d Reset session\n", __func__, 525 - errorcode); 526 - nfs4_schedule_session_recovery(clp->cl_session, errorcode); 524 + /* Handled in nfs41_sequence_process() */ 527 525 goto wait_on_recovery; 528 526 #endif /* defined(CONFIG_NFS_V4_1) */ 529 527 case -NFS4ERR_FILE_OPEN: ··· 780 782 struct nfs4_session *session; 781 783 struct nfs4_slot *slot = res->sr_slot; 782 784 struct nfs_client *clp; 785 + int status; 783 786 int ret = 1; 784 787 785 788 if (slot == NULL) ··· 792 793 session = slot->table->session; 793 794 794 795 trace_nfs4_sequence_done(session, res); 796 + 797 + status = res->sr_status; 798 + if (task->tk_status == -NFS4ERR_DEADSESSION) 799 + status = -NFS4ERR_DEADSESSION; 800 + 795 801 /* Check the SEQUENCE operation status */ 796 - switch (res->sr_status) { 802 + switch (status) { 797 803 case 0: 798 804 /* Mark this sequence number as having been acked */ 799 805 nfs4_slot_sequence_acked(slot, slot->seq_nr); ··· 870 866 */ 871 867 slot->seq_nr = slot->seq_nr_highest_sent; 872 868 goto out_retry; 869 + case -NFS4ERR_BADSESSION: 870 + case -NFS4ERR_DEADSESSION: 871 + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 872 + goto session_recover; 873 873 default: 874 874 /* Just update the slot sequence no. */ 875 875 slot->seq_done = 1; ··· 884 876 out_noaction: 885 877 return ret; 886 878 session_recover: 887 - nfs4_schedule_session_recovery(session, res->sr_status); 888 - goto retry_nowait; 879 + nfs4_schedule_session_recovery(session, status); 880 + dprintk("%s ERROR: %d Reset session\n", __func__, status); 881 + nfs41_sequence_free_slot(res); 882 + goto out; 889 883 retry_new_seq: 890 884 ++slot->seq_nr; 891 885 retry_nowait: ··· 1726 1716 write_sequnlock(&state->seqlock); 1727 1717 } 1728 1718 1729 - static int update_open_stateid(struct nfs4_state *state, 1719 + int update_open_stateid(struct nfs4_state *state, 1730 1720 const nfs4_stateid *open_stateid, 1731 1721 const nfs4_stateid *delegation, 1732 1722 fmode_t fmode) ··· 1747 1737 ret = 1; 1748 1738 } 1749 1739 1750 - deleg_cur = rcu_dereference(nfsi->delegation); 1740 + deleg_cur = nfs4_get_valid_delegation(state->inode); 1751 1741 if (deleg_cur == NULL) 1752 1742 goto no_delegation; 1753 1743 ··· 1759 1749 1760 1750 if (delegation == NULL) 1761 1751 delegation = &deleg_cur->stateid; 1762 - else if (!nfs4_stateid_match(&deleg_cur->stateid, delegation)) 1752 + else if (!nfs4_stateid_match_other(&deleg_cur->stateid, delegation)) 1763 1753 goto no_delegation_unlock; 1764 1754 1765 1755 nfs_mark_delegation_referenced(deleg_cur); ··· 1806 1796 1807 1797 fmode &= FMODE_READ|FMODE_WRITE; 1808 1798 rcu_read_lock(); 1809 - delegation = rcu_dereference(NFS_I(inode)->delegation); 1799 + delegation = nfs4_get_valid_delegation(inode); 1810 1800 if (delegation == NULL || (delegation->type & fmode) == fmode) { 1811 1801 rcu_read_unlock(); 1812 1802 return; ··· 2198 2188 case -NFS4ERR_BAD_HIGH_SLOT: 2199 2189 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 2200 2190 case -NFS4ERR_DEADSESSION: 2201 - nfs4_schedule_session_recovery(server->nfs_client->cl_session, err); 2202 2191 return -EAGAIN; 2203 2192 case -NFS4ERR_STALE_CLIENTID: 2204 2193 case -NFS4ERR_STALE_STATEID: ··· 4071 4062 return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); 4072 4063 } 4073 4064 4074 - static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, 4065 + int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, 4075 4066 struct nfs_fattr *fattr, struct nfs4_label *label, 4076 4067 struct inode *inode) 4077 4068 { ··· 5107 5098 const struct nfs_lock_context *l_ctx, 5108 5099 fmode_t fmode) 5109 5100 { 5110 - nfs4_stateid current_stateid; 5101 + nfs4_stateid _current_stateid; 5111 5102 5112 5103 /* If the current stateid represents a lost lock, then exit */ 5113 - if (nfs4_set_rw_stateid(&current_stateid, ctx, l_ctx, fmode) == -EIO) 5104 + if (nfs4_set_rw_stateid(&_current_stateid, ctx, l_ctx, fmode) == -EIO) 5114 5105 return true; 5115 - return nfs4_stateid_match(stateid, &current_stateid); 5106 + return nfs4_stateid_match(stateid, &_current_stateid); 5116 5107 } 5117 5108 5118 5109 static bool nfs4_error_stateid_expired(int err) ··· 6205 6196 task->tk_status = 0; 6206 6197 break; 6207 6198 case -NFS4ERR_OLD_STATEID: 6208 - if (nfs4_refresh_delegation_stateid(&data->stateid, data->inode)) 6209 - goto out_restart; 6210 - task->tk_status = 0; 6211 - break; 6199 + if (!nfs4_refresh_delegation_stateid(&data->stateid, data->inode)) 6200 + nfs4_stateid_seqid_inc(&data->stateid); 6201 + if (data->args.bitmask) { 6202 + data->args.bitmask = NULL; 6203 + data->res.fattr = NULL; 6204 + } 6205 + goto out_restart; 6212 6206 case -NFS4ERR_ACCESS: 6213 6207 if (data->args.bitmask) { 6214 6208 data->args.bitmask = NULL; ··· 6226 6214 if (exception.retry) 6227 6215 goto out_restart; 6228 6216 } 6217 + nfs_delegation_mark_returned(data->inode, data->args.stateid); 6229 6218 data->rpc_status = task->tk_status; 6230 6219 return; 6231 6220 out_restart: ··· 6256 6243 6257 6244 d_data = (struct nfs4_delegreturndata *)data; 6258 6245 6259 - if (!d_data->lr.roc && nfs4_wait_on_layoutreturn(d_data->inode, task)) 6246 + if (!d_data->lr.roc && nfs4_wait_on_layoutreturn(d_data->inode, task)) { 6247 + nfs4_sequence_done(task, &d_data->res.seq_res); 6260 6248 return; 6249 + } 6261 6250 6262 6251 lo = d_data->args.lr_args ? d_data->args.lr_args->layout : NULL; 6263 6252 if (lo && !pnfs_layout_is_valid(lo)) { ··· 7835 7820 static void 7836 7821 nfs4_bind_one_conn_to_session_done(struct rpc_task *task, void *calldata) 7837 7822 { 7823 + struct nfs41_bind_conn_to_session_args *args = task->tk_msg.rpc_argp; 7824 + struct nfs_client *clp = args->client; 7825 + 7826 + switch (task->tk_status) { 7827 + case -NFS4ERR_BADSESSION: 7828 + case -NFS4ERR_DEADSESSION: 7829 + nfs4_schedule_session_recovery(clp->cl_session, 7830 + task->tk_status); 7831 + } 7838 7832 } 7839 7833 7840 7834 static const struct rpc_call_ops nfs4_bind_one_conn_to_session_ops = { ··· 8891 8867 case -NFS4ERR_BADSESSION: 8892 8868 case -NFS4ERR_DEADSESSION: 8893 8869 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 8894 - nfs4_schedule_session_recovery(clp->cl_session, 8895 - task->tk_status); 8896 8870 break; 8897 8871 default: 8898 8872 nfs4_schedule_lease_recovery(clp); ··· 9919 9897 | NFS_CAP_ALLOCATE 9920 9898 | NFS_CAP_COPY 9921 9899 | NFS_CAP_OFFLOAD_CANCEL 9900 + | NFS_CAP_COPY_NOTIFY 9922 9901 | NFS_CAP_DEALLOCATE 9923 9902 | NFS_CAP_SEEK 9924 9903 | NFS_CAP_LAYOUTSTATS
+43 -8
fs/nfs/nfs4state.c
··· 60 60 #include "nfs4session.h" 61 61 #include "pnfs.h" 62 62 #include "netns.h" 63 + #include "nfs4trace.h" 63 64 64 65 #define NFSDBG_FACILITY NFSDBG_STATE 65 66 ··· 1408 1407 list_for_each_entry(pos, &state->lock_states, ls_locks) { 1409 1408 if (!test_bit(NFS_LOCK_INITIALIZED, &pos->ls_flags)) 1410 1409 continue; 1411 - if (nfs4_stateid_match_other(&pos->ls_stateid, stateid)) 1410 + if (nfs4_stateid_match_or_older(&pos->ls_stateid, stateid)) 1412 1411 return pos; 1413 1412 } 1414 1413 return NULL; ··· 1442 1441 state = ctx->state; 1443 1442 if (state == NULL) 1444 1443 continue; 1445 - if (nfs4_stateid_match_other(&state->stateid, stateid) && 1444 + if (nfs4_stateid_match_or_older(&state->stateid, stateid) && 1446 1445 nfs4_state_mark_reclaim_nograce(clp, state)) { 1447 1446 found = true; 1448 1447 continue; 1449 1448 } 1450 - if (nfs4_stateid_match_other(&state->open_stateid, stateid) && 1449 + if (test_bit(NFS_OPEN_STATE, &state->flags) && 1450 + nfs4_stateid_match_or_older(&state->open_stateid, stateid) && 1451 1451 nfs4_state_mark_reclaim_nograce(clp, state)) { 1452 1452 found = true; 1453 1453 continue; ··· 1558 1556 { 1559 1557 struct nfs4_copy_state *copy; 1560 1558 1561 - if (!test_bit(NFS_CLNT_DST_SSC_COPY_STATE, &state->flags)) 1559 + if (!test_bit(NFS_CLNT_DST_SSC_COPY_STATE, &state->flags) && 1560 + !test_bit(NFS_CLNT_SRC_SSC_COPY_STATE, &state->flags)) 1562 1561 return; 1563 1562 1564 1563 spin_lock(&sp->so_server->nfs_client->cl_lock); 1565 1564 list_for_each_entry(copy, &sp->so_server->ss_copies, copies) { 1566 - if (!nfs4_stateid_match_other(&state->stateid, &copy->parent_state->stateid)) 1567 - continue; 1565 + if ((test_bit(NFS_CLNT_DST_SSC_COPY_STATE, &state->flags) && 1566 + !nfs4_stateid_match_other(&state->stateid, 1567 + &copy->parent_dst_state->stateid))) 1568 + continue; 1568 1569 copy->flags = 1; 1569 - complete(&copy->completion); 1570 - break; 1570 + if (test_and_clear_bit(NFS_CLNT_DST_SSC_COPY_STATE, 1571 + &state->flags)) { 1572 + clear_bit(NFS_CLNT_SRC_SSC_COPY_STATE, &state->flags); 1573 + complete(&copy->completion); 1574 + } 1575 + } 1576 + list_for_each_entry(copy, &sp->so_server->ss_copies, src_copies) { 1577 + if ((test_bit(NFS_CLNT_SRC_SSC_COPY_STATE, &state->flags) && 1578 + !nfs4_stateid_match_other(&state->stateid, 1579 + &copy->parent_src_state->stateid))) 1580 + continue; 1581 + copy->flags = 1; 1582 + if (test_and_clear_bit(NFS_CLNT_DST_SSC_COPY_STATE, 1583 + &state->flags)) 1584 + complete(&copy->completion); 1571 1585 } 1572 1586 spin_unlock(&sp->so_server->nfs_client->cl_lock); 1573 1587 } ··· 1611 1593 if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) { 1612 1594 spin_lock(&state->state_lock); 1613 1595 list_for_each_entry(lock, &state->lock_states, ls_locks) { 1596 + trace_nfs4_state_lock_reclaim(state, lock); 1614 1597 if (!test_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags)) 1615 1598 pr_warn_ratelimited("NFS: %s: Lock reclaim failed!\n", __func__); 1616 1599 } ··· 1628 1609 struct nfs4_state *state; 1629 1610 unsigned int loop = 0; 1630 1611 int status = 0; 1612 + #ifdef CONFIG_NFS_V4_2 1613 + bool found_ssc_copy_state = false; 1614 + #endif /* CONFIG_NFS_V4_2 */ 1631 1615 1632 1616 /* Note: we rely on the sp->so_states list being ordered 1633 1617 * so that we always reclaim open(O_RDWR) and/or open(O_WRITE) ··· 1650 1628 continue; 1651 1629 if (state->state == 0) 1652 1630 continue; 1631 + #ifdef CONFIG_NFS_V4_2 1632 + if (test_bit(NFS_SRV_SSC_COPY_STATE, &state->flags)) { 1633 + nfs4_state_mark_recovery_failed(state, -EIO); 1634 + found_ssc_copy_state = true; 1635 + continue; 1636 + } 1637 + #endif /* CONFIG_NFS_V4_2 */ 1653 1638 refcount_inc(&state->count); 1654 1639 spin_unlock(&sp->so_lock); 1655 1640 status = __nfs4_reclaim_open_state(sp, state, ops); ··· 1711 1682 } 1712 1683 raw_write_seqcount_end(&sp->so_reclaim_seqcount); 1713 1684 spin_unlock(&sp->so_lock); 1685 + #ifdef CONFIG_NFS_V4_2 1686 + if (found_ssc_copy_state) 1687 + return -EIO; 1688 + #endif /* CONFIG_NFS_V4_2 */ 1714 1689 return 0; 1715 1690 out_err: 1716 1691 nfs4_put_open_state(state); ··· 2541 2508 2542 2509 /* Ensure exclusive access to NFSv4 state */ 2543 2510 do { 2511 + trace_nfs4_state_mgr(clp); 2544 2512 clear_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state); 2545 2513 if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) { 2546 2514 section = "purge state"; ··· 2655 2621 out_error: 2656 2622 if (strlen(section)) 2657 2623 section_sep = ": "; 2624 + trace_nfs4_state_mgr_failed(clp, section, status); 2658 2625 pr_warn_ratelimited("NFS: state manager%s%s failed on NFSv4 server %s" 2659 2626 " with error %d\n", section_sep, section, 2660 2627 clp->cl_hostname, -status);
+2 -2
fs/nfs/nfs4super.c
··· 92 92 { 93 93 truncate_inode_pages_final(&inode->i_data); 94 94 clear_inode(inode); 95 - /* If we are holding a delegation, return it! */ 96 - nfs_inode_return_delegation_noreclaim(inode); 95 + /* If we are holding a delegation, return and free it */ 96 + nfs_inode_evict_delegation(inode); 97 97 /* Note that above delegreturn would trigger pnfs return-on-close */ 98 98 pnfs_return_layout(inode); 99 99 pnfs_destroy_layout(NFS_I(inode));
+175
fs/nfs/nfs4trace.h
··· 562 562 ) 563 563 ); 564 564 565 + TRACE_DEFINE_ENUM(NFS4CLNT_MANAGER_RUNNING); 566 + TRACE_DEFINE_ENUM(NFS4CLNT_CHECK_LEASE); 567 + TRACE_DEFINE_ENUM(NFS4CLNT_LEASE_EXPIRED); 568 + TRACE_DEFINE_ENUM(NFS4CLNT_RECLAIM_REBOOT); 569 + TRACE_DEFINE_ENUM(NFS4CLNT_RECLAIM_NOGRACE); 570 + TRACE_DEFINE_ENUM(NFS4CLNT_DELEGRETURN); 571 + TRACE_DEFINE_ENUM(NFS4CLNT_SESSION_RESET); 572 + TRACE_DEFINE_ENUM(NFS4CLNT_LEASE_CONFIRM); 573 + TRACE_DEFINE_ENUM(NFS4CLNT_SERVER_SCOPE_MISMATCH); 574 + TRACE_DEFINE_ENUM(NFS4CLNT_PURGE_STATE); 575 + TRACE_DEFINE_ENUM(NFS4CLNT_BIND_CONN_TO_SESSION); 576 + TRACE_DEFINE_ENUM(NFS4CLNT_MOVED); 577 + TRACE_DEFINE_ENUM(NFS4CLNT_LEASE_MOVED); 578 + TRACE_DEFINE_ENUM(NFS4CLNT_DELEGATION_EXPIRED); 579 + TRACE_DEFINE_ENUM(NFS4CLNT_RUN_MANAGER); 580 + TRACE_DEFINE_ENUM(NFS4CLNT_DELEGRETURN_RUNNING); 581 + 582 + #define show_nfs4_clp_state(state) \ 583 + __print_flags(state, "|", \ 584 + { NFS4CLNT_MANAGER_RUNNING, "MANAGER_RUNNING" }, \ 585 + { NFS4CLNT_CHECK_LEASE, "CHECK_LEASE" }, \ 586 + { NFS4CLNT_LEASE_EXPIRED, "LEASE_EXPIRED" }, \ 587 + { NFS4CLNT_RECLAIM_REBOOT, "RECLAIM_REBOOT" }, \ 588 + { NFS4CLNT_RECLAIM_NOGRACE, "RECLAIM_NOGRACE" }, \ 589 + { NFS4CLNT_DELEGRETURN, "DELEGRETURN" }, \ 590 + { NFS4CLNT_SESSION_RESET, "SESSION_RESET" }, \ 591 + { NFS4CLNT_LEASE_CONFIRM, "LEASE_CONFIRM" }, \ 592 + { NFS4CLNT_SERVER_SCOPE_MISMATCH, \ 593 + "SERVER_SCOPE_MISMATCH" }, \ 594 + { NFS4CLNT_PURGE_STATE, "PURGE_STATE" }, \ 595 + { NFS4CLNT_BIND_CONN_TO_SESSION, \ 596 + "BIND_CONN_TO_SESSION" }, \ 597 + { NFS4CLNT_MOVED, "MOVED" }, \ 598 + { NFS4CLNT_LEASE_MOVED, "LEASE_MOVED" }, \ 599 + { NFS4CLNT_DELEGATION_EXPIRED, "DELEGATION_EXPIRED" }, \ 600 + { NFS4CLNT_RUN_MANAGER, "RUN_MANAGER" }, \ 601 + { NFS4CLNT_DELEGRETURN_RUNNING, "DELEGRETURN_RUNNING" }) 602 + 603 + TRACE_EVENT(nfs4_state_mgr, 604 + TP_PROTO( 605 + const struct nfs_client *clp 606 + ), 607 + 608 + TP_ARGS(clp), 609 + 610 + TP_STRUCT__entry( 611 + __field(unsigned long, state) 612 + __string(hostname, clp->cl_hostname) 613 + ), 614 + 615 + TP_fast_assign( 616 + __entry->state = clp->cl_state; 617 + __assign_str(hostname, clp->cl_hostname) 618 + ), 619 + 620 + TP_printk( 621 + "hostname=%s clp state=%s", __get_str(hostname), 622 + show_nfs4_clp_state(__entry->state) 623 + ) 624 + ) 625 + 626 + TRACE_EVENT(nfs4_state_mgr_failed, 627 + TP_PROTO( 628 + const struct nfs_client *clp, 629 + const char *section, 630 + int status 631 + ), 632 + 633 + TP_ARGS(clp, section, status), 634 + 635 + TP_STRUCT__entry( 636 + __field(unsigned long, error) 637 + __field(unsigned long, state) 638 + __string(hostname, clp->cl_hostname) 639 + __string(section, section) 640 + ), 641 + 642 + TP_fast_assign( 643 + __entry->error = status; 644 + __entry->state = clp->cl_state; 645 + __assign_str(hostname, clp->cl_hostname); 646 + __assign_str(section, section); 647 + ), 648 + 649 + TP_printk( 650 + "hostname=%s clp state=%s error=%ld (%s) section=%s", 651 + __get_str(hostname), 652 + show_nfs4_clp_state(__entry->state), -__entry->error, 653 + show_nfsv4_errors(__entry->error), __get_str(section) 654 + 655 + ) 656 + ) 657 + 565 658 TRACE_EVENT(nfs4_xdr_status, 566 659 TP_PROTO( 567 660 const struct xdr_stream *xdr, ··· 1021 928 __entry->lockstateid_seq, __entry->lockstateid_hash 1022 929 ) 1023 930 ); 931 + 932 + TRACE_DEFINE_ENUM(LK_STATE_IN_USE); 933 + TRACE_DEFINE_ENUM(NFS_DELEGATED_STATE); 934 + TRACE_DEFINE_ENUM(NFS_OPEN_STATE); 935 + TRACE_DEFINE_ENUM(NFS_O_RDONLY_STATE); 936 + TRACE_DEFINE_ENUM(NFS_O_WRONLY_STATE); 937 + TRACE_DEFINE_ENUM(NFS_O_RDWR_STATE); 938 + TRACE_DEFINE_ENUM(NFS_STATE_RECLAIM_REBOOT); 939 + TRACE_DEFINE_ENUM(NFS_STATE_RECLAIM_NOGRACE); 940 + TRACE_DEFINE_ENUM(NFS_STATE_POSIX_LOCKS); 941 + TRACE_DEFINE_ENUM(NFS_STATE_RECOVERY_FAILED); 942 + TRACE_DEFINE_ENUM(NFS_STATE_MAY_NOTIFY_LOCK); 943 + TRACE_DEFINE_ENUM(NFS_STATE_CHANGE_WAIT); 944 + TRACE_DEFINE_ENUM(NFS_CLNT_DST_SSC_COPY_STATE); 945 + TRACE_DEFINE_ENUM(NFS_CLNT_SRC_SSC_COPY_STATE); 946 + TRACE_DEFINE_ENUM(NFS_SRV_SSC_COPY_STATE); 947 + 948 + #define show_nfs4_state_flags(flags) \ 949 + __print_flags(flags, "|", \ 950 + { LK_STATE_IN_USE, "IN_USE" }, \ 951 + { NFS_DELEGATED_STATE, "DELEGATED" }, \ 952 + { NFS_OPEN_STATE, "OPEN" }, \ 953 + { NFS_O_RDONLY_STATE, "O_RDONLY" }, \ 954 + { NFS_O_WRONLY_STATE, "O_WRONLY" }, \ 955 + { NFS_O_RDWR_STATE, "O_RDWR" }, \ 956 + { NFS_STATE_RECLAIM_REBOOT, "RECLAIM_REBOOT" }, \ 957 + { NFS_STATE_RECLAIM_NOGRACE, "RECLAIM_NOGRACE" }, \ 958 + { NFS_STATE_POSIX_LOCKS, "POSIX_LOCKS" }, \ 959 + { NFS_STATE_RECOVERY_FAILED, "RECOVERY_FAILED" }, \ 960 + { NFS_STATE_MAY_NOTIFY_LOCK, "MAY_NOTIFY_LOCK" }, \ 961 + { NFS_STATE_CHANGE_WAIT, "CHANGE_WAIT" }, \ 962 + { NFS_CLNT_DST_SSC_COPY_STATE, "CLNT_DST_SSC_COPY" }, \ 963 + { NFS_CLNT_SRC_SSC_COPY_STATE, "CLNT_SRC_SSC_COPY" }, \ 964 + { NFS_SRV_SSC_COPY_STATE, "SRV_SSC_COPY" }) 965 + 966 + #define show_nfs4_lock_flags(flags) \ 967 + __print_flags(flags, "|", \ 968 + { BIT(NFS_LOCK_INITIALIZED), "INITIALIZED" }, \ 969 + { BIT(NFS_LOCK_LOST), "LOST" }) 970 + 971 + TRACE_EVENT(nfs4_state_lock_reclaim, 972 + TP_PROTO( 973 + const struct nfs4_state *state, 974 + const struct nfs4_lock_state *lock 975 + ), 976 + 977 + TP_ARGS(state, lock), 978 + 979 + TP_STRUCT__entry( 980 + __field(dev_t, dev) 981 + __field(u32, fhandle) 982 + __field(u64, fileid) 983 + __field(unsigned long, state_flags) 984 + __field(unsigned long, lock_flags) 985 + __field(int, stateid_seq) 986 + __field(u32, stateid_hash) 987 + ), 988 + 989 + TP_fast_assign( 990 + const struct inode *inode = state->inode; 991 + 992 + __entry->dev = inode->i_sb->s_dev; 993 + __entry->fileid = NFS_FILEID(inode); 994 + __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); 995 + __entry->state_flags = state->flags; 996 + __entry->lock_flags = lock->ls_flags; 997 + __entry->stateid_seq = 998 + be32_to_cpu(state->stateid.seqid); 999 + __entry->stateid_hash = 1000 + nfs_stateid_hash(&state->stateid); 1001 + ), 1002 + 1003 + TP_printk( 1004 + "fileid=%02x:%02x:%llu fhandle=0x%08x " 1005 + "stateid=%d:0x%08x state_flags=%s lock_flags=%s", 1006 + MAJOR(__entry->dev), MINOR(__entry->dev), 1007 + (unsigned long long)__entry->fileid, __entry->fhandle, 1008 + __entry->stateid_seq, __entry->stateid_hash, 1009 + show_nfs4_state_flags(__entry->state_flags), 1010 + show_nfs4_lock_flags(__entry->lock_flags) 1011 + ) 1012 + ) 1024 1013 1025 1014 DECLARE_EVENT_CLASS(nfs4_set_delegation_event, 1026 1015 TP_PROTO(
+11 -13
fs/nfs/nfs4xdr.c
··· 1059 1059 } 1060 1060 1061 1061 static __be32 * 1062 - xdr_encode_nfstime4(__be32 *p, const struct timespec *t) 1062 + xdr_encode_nfstime4(__be32 *p, const struct timespec64 *t) 1063 1063 { 1064 1064 p = xdr_encode_hyper(p, (__s64)t->tv_sec); 1065 1065 *p++ = cpu_to_be32(t->tv_nsec); ··· 1072 1072 const struct nfs_server *server, 1073 1073 const uint32_t attrmask[]) 1074 1074 { 1075 - struct timespec ts; 1076 1075 char owner_name[IDMAP_NAMESZ]; 1077 1076 char owner_group[IDMAP_NAMESZ]; 1078 1077 int owner_namelen = 0; ··· 1160 1161 if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) { 1161 1162 if (iap->ia_valid & ATTR_ATIME_SET) { 1162 1163 *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME); 1163 - ts = timespec64_to_timespec(iap->ia_atime); 1164 - p = xdr_encode_nfstime4(p, &ts); 1164 + p = xdr_encode_nfstime4(p, &iap->ia_atime); 1165 1165 } else 1166 1166 *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME); 1167 1167 } 1168 1168 if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) { 1169 1169 if (iap->ia_valid & ATTR_MTIME_SET) { 1170 1170 *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME); 1171 - ts = timespec64_to_timespec(iap->ia_mtime); 1172 - p = xdr_encode_nfstime4(p, &ts); 1171 + p = xdr_encode_nfstime4(p, &iap->ia_mtime); 1173 1172 } else 1174 1173 *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME); 1175 1174 } ··· 4062 4065 } 4063 4066 4064 4067 static __be32 * 4065 - xdr_decode_nfstime4(__be32 *p, struct timespec *t) 4068 + xdr_decode_nfstime4(__be32 *p, struct timespec64 *t) 4066 4069 { 4067 4070 __u64 sec; 4068 4071 4069 4072 p = xdr_decode_hyper(p, &sec); 4070 - t-> tv_sec = (time_t)sec; 4073 + t-> tv_sec = sec; 4071 4074 t->tv_nsec = be32_to_cpup(p++); 4072 4075 return p; 4073 4076 } 4074 4077 4075 - static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time) 4078 + static int decode_attr_time(struct xdr_stream *xdr, struct timespec64 *time) 4076 4079 { 4077 4080 __be32 *p; 4078 4081 ··· 4083 4086 return 0; 4084 4087 } 4085 4088 4086 - static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time) 4089 + static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec64 *time) 4087 4090 { 4088 4091 int status = 0; 4089 4092 ··· 4101 4104 return status; 4102 4105 } 4103 4106 4104 - static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time) 4107 + static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec64 *time) 4105 4108 { 4106 4109 int status = 0; 4107 4110 ··· 4120 4123 } 4121 4124 4122 4125 static int decode_attr_time_delta(struct xdr_stream *xdr, uint32_t *bitmap, 4123 - struct timespec *time) 4126 + struct timespec64 *time) 4124 4127 { 4125 4128 int status = 0; 4126 4129 ··· 4183 4186 return status; 4184 4187 } 4185 4188 4186 - static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time) 4189 + static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec64 *time) 4187 4190 { 4188 4191 int status = 0; 4189 4192 ··· 7578 7581 PROC42(CLONE, enc_clone, dec_clone), 7579 7582 PROC42(COPY, enc_copy, dec_copy), 7580 7583 PROC42(OFFLOAD_CANCEL, enc_offload_cancel, dec_offload_cancel), 7584 + PROC42(COPY_NOTIFY, enc_copy_notify, dec_copy_notify), 7581 7585 PROC(LOOKUPP, enc_lookupp, dec_lookupp), 7582 7586 PROC42(LAYOUTERROR, enc_layouterror, dec_layouterror), 7583 7587 };
+33
fs/nfs/nfstrace.h
··· 1065 1065 ) 1066 1066 ); 1067 1067 1068 + TRACE_EVENT(nfs_fh_to_dentry, 1069 + TP_PROTO( 1070 + const struct super_block *sb, 1071 + const struct nfs_fh *fh, 1072 + u64 fileid, 1073 + int error 1074 + ), 1075 + 1076 + TP_ARGS(sb, fh, fileid, error), 1077 + 1078 + TP_STRUCT__entry( 1079 + __field(int, error) 1080 + __field(dev_t, dev) 1081 + __field(u32, fhandle) 1082 + __field(u64, fileid) 1083 + ), 1084 + 1085 + TP_fast_assign( 1086 + __entry->error = error; 1087 + __entry->dev = sb->s_dev; 1088 + __entry->fileid = fileid; 1089 + __entry->fhandle = nfs_fhandle_hash(fh); 1090 + ), 1091 + 1092 + TP_printk( 1093 + "error=%d fileid=%02x:%02x:%llu fhandle=0x%08x ", 1094 + __entry->error, 1095 + MAJOR(__entry->dev), MINOR(__entry->dev), 1096 + (unsigned long long)__entry->fileid, 1097 + __entry->fhandle 1098 + ) 1099 + ); 1100 + 1068 1101 TRACE_DEFINE_ENUM(NFS_OK); 1069 1102 TRACE_DEFINE_ENUM(NFSERR_PERM); 1070 1103 TRACE_DEFINE_ENUM(NFSERR_NOENT);
-2
fs/nfs/pnfs.c
··· 2160 2160 return NULL; 2161 2161 } 2162 2162 2163 - extern const nfs4_stateid current_stateid; 2164 - 2165 2163 static void _lgopen_prepare_attached(struct nfs4_opendata *data, 2166 2164 struct nfs_open_context *ctx) 2167 2165 {
+3 -3
fs/nfs/super.c
··· 1592 1592 dfprintk(MOUNT, "NFS: invalid " 1593 1593 "lookupcache argument\n"); 1594 1594 return 0; 1595 - }; 1595 + } 1596 1596 break; 1597 1597 case Opt_fscache_uniq: 1598 1598 if (nfs_get_option_str(args, &mnt->fscache_uniq)) ··· 1625 1625 dfprintk(MOUNT, "NFS: invalid " 1626 1626 "local_lock argument\n"); 1627 1627 return 0; 1628 - }; 1628 + } 1629 1629 break; 1630 1630 1631 1631 /* ··· 2585 2585 if (mnt_s->fscache_key) { 2586 2586 uniq = mnt_s->fscache_key->key.uniquifier; 2587 2587 ulen = mnt_s->fscache_key->key.uniq_len; 2588 - }; 2588 + } 2589 2589 } else 2590 2590 return; 2591 2591
+1 -2
fs/nfs/sysfs.c
··· 121 121 struct nfs_netns_client, 122 122 kobject); 123 123 124 - if (c->identifier) 125 - kfree(c->identifier); 124 + kfree(c->identifier); 126 125 kfree(c); 127 126 } 128 127
+26
include/linux/nfs4.h
··· 16 16 #include <linux/list.h> 17 17 #include <linux/uidgid.h> 18 18 #include <uapi/linux/nfs4.h> 19 + #include <linux/sunrpc/msg_prot.h> 19 20 20 21 enum nfs4_acl_whotype { 21 22 NFS4_ACL_WHO_NAMED = 0, ··· 540 539 541 540 NFSPROC4_CLNT_LOOKUPP, 542 541 NFSPROC4_CLNT_LAYOUTERROR, 542 + 543 + NFSPROC4_CLNT_COPY_NOTIFY, 543 544 }; 544 545 545 546 /* nfs41 types */ ··· 677 674 } u; 678 675 }; 679 676 677 + struct nfs42_netaddr { 678 + char netid[RPCBIND_MAXNETIDLEN]; 679 + char addr[RPCBIND_MAXUADDRLEN + 1]; 680 + u32 netid_len; 681 + u32 addr_len; 682 + }; 683 + 684 + enum netloc_type4 { 685 + NL4_NAME = 1, 686 + NL4_URL = 2, 687 + NL4_NETADDR = 3, 688 + }; 689 + 690 + struct nl4_server { 691 + enum netloc_type4 nl4_type; 692 + union { 693 + struct { /* NL4_NAME, NL4_URL */ 694 + int nl4_str_sz; 695 + char nl4_str[NFS4_OPAQUE_LIMIT + 1]; 696 + }; 697 + struct nfs42_netaddr nl4_addr; /* NL4_NETADDR */ 698 + } u; 699 + }; 680 700 #endif
+3 -1
include/linux/nfs_fs.h
··· 189 189 190 190 struct nfs4_copy_state { 191 191 struct list_head copies; 192 + struct list_head src_copies; 192 193 nfs4_stateid stateid; 193 194 struct completion completion; 194 195 uint64_t count; 195 196 struct nfs_writeverf verf; 196 197 int error; 197 198 int flags; 198 - struct nfs4_state *parent_state; 199 + struct nfs4_state *parent_src_state; 200 + struct nfs4_state *parent_dst_state; 199 201 }; 200 202 201 203 /*
+5 -1
include/linux/nfs_fs_sb.h
··· 45 45 #define NFS_CS_INFINITE_SLOTS 3 /* - don't limit TCP slots */ 46 46 #define NFS_CS_NO_RETRANS_TIMEOUT 4 /* - Disable retransmit timeouts */ 47 47 #define NFS_CS_TSM_POSSIBLE 5 /* - Maybe state migration */ 48 + #define NFS_CS_NOPING 6 /* - don't ping on connect */ 49 + #define NFS_CS_DS 7 /* - Server is a DS */ 50 + #define NFS_CS_REUSEPORT 8 /* - reuse src port on reconnect */ 48 51 struct sockaddr_storage cl_addr; /* server identifier */ 49 52 size_t cl_addrlen; 50 53 char * cl_hostname; /* hostname of server */ ··· 174 171 175 172 struct nfs_fsid fsid; 176 173 __u64 maxfilesize; /* maximum file size */ 177 - struct timespec time_delta; /* smallest time granularity */ 174 + struct timespec64 time_delta; /* smallest time granularity */ 178 175 unsigned long mount_time; /* when this fs was mounted */ 179 176 struct super_block *super; /* VFS super block */ 180 177 dev_t s_dev; /* superblock dev numbers */ ··· 279 276 #define NFS_CAP_COPY (1U << 24) 280 277 #define NFS_CAP_OFFLOAD_CANCEL (1U << 25) 281 278 #define NFS_CAP_LAYOUTERROR (1U << 26) 279 + #define NFS_CAP_COPY_NOTIFY (1U << 27) 282 280 283 281 #endif
+24 -7
include/linux/nfs_xdr.h
··· 62 62 struct nfs_fsid fsid; 63 63 __u64 fileid; 64 64 __u64 mounted_on_fileid; 65 - struct timespec atime; 66 - struct timespec mtime; 67 - struct timespec ctime; 65 + struct timespec64 atime; 66 + struct timespec64 mtime; 67 + struct timespec64 ctime; 68 68 __u64 change_attr; /* NFSv4 change attribute */ 69 69 __u64 pre_change_attr;/* pre-op NFSv4 change attribute */ 70 70 __u64 pre_size; /* pre_op_attr.size */ 71 - struct timespec pre_mtime; /* pre_op_attr.mtime */ 72 - struct timespec pre_ctime; /* pre_op_attr.ctime */ 71 + struct timespec64 pre_mtime; /* pre_op_attr.mtime */ 72 + struct timespec64 pre_ctime; /* pre_op_attr.ctime */ 73 73 unsigned long time_start; 74 74 unsigned long gencount; 75 75 struct nfs4_string *owner_name; ··· 143 143 __u32 wtmult; /* writes should be multiple of this */ 144 144 __u32 dtpref; /* pref. readdir transfer size */ 145 145 __u64 maxfilesize; 146 - struct timespec time_delta; /* server time granularity */ 146 + struct timespec64 time_delta; /* server time granularity */ 147 147 __u32 lease_time; /* in seconds */ 148 148 __u32 nlayouttypes; /* number of layouttypes */ 149 149 __u32 layouttype[NFS_MAX_LAYOUT_TYPES]; /* supported pnfs layout driver */ ··· 869 869 struct nfs_fh * fh; 870 870 struct iattr * sattr; 871 871 unsigned int guard; 872 - struct timespec guardtime; 872 + struct timespec64 guardtime; 873 873 }; 874 874 875 875 struct nfs3_diropargs { ··· 1435 1435 1436 1436 u64 count; 1437 1437 bool sync; 1438 + struct nl4_server *cp_src; 1438 1439 }; 1439 1440 1440 1441 struct nfs42_write_res { ··· 1462 1461 struct nfs4_sequence_res osr_seq_res; 1463 1462 uint64_t osr_count; 1464 1463 int osr_status; 1464 + }; 1465 + 1466 + struct nfs42_copy_notify_args { 1467 + struct nfs4_sequence_args cna_seq_args; 1468 + 1469 + struct nfs_fh *cna_src_fh; 1470 + nfs4_stateid cna_src_stateid; 1471 + struct nl4_server cna_dst; 1472 + }; 1473 + 1474 + struct nfs42_copy_notify_res { 1475 + struct nfs4_sequence_res cnr_seq_res; 1476 + 1477 + struct nfstime4 cnr_lease_time; 1478 + nfs4_stateid cnr_stateid; 1479 + struct nl4_server cnr_src; 1465 1480 }; 1466 1481 1467 1482 struct nfs42_seek_args {
+1
include/linux/sunrpc/clnt.h
··· 149 149 #define RPC_CLNT_CREATE_NO_IDLE_TIMEOUT (1UL << 8) 150 150 #define RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT (1UL << 9) 151 151 #define RPC_CLNT_CREATE_SOFTERR (1UL << 10) 152 + #define RPC_CLNT_CREATE_REUSEPORT (1UL << 11) 152 153 153 154 struct rpc_clnt *rpc_create(struct rpc_create_args *args); 154 155 struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *,
+2 -1
include/linux/sunrpc/xprt.h
··· 207 207 unsigned int min_reqs; /* min number of slots */ 208 208 unsigned int num_reqs; /* total slots */ 209 209 unsigned long state; /* transport state */ 210 - unsigned char resvport : 1; /* use a reserved port */ 210 + unsigned char resvport : 1, /* use a reserved port */ 211 + reuseport : 1; /* reuse port on reconnect */ 211 212 atomic_t swapper; /* we're swapping over this 212 213 transport */ 213 214 unsigned int bind_index; /* bind function index */
+138 -72
include/trace/events/rpcrdma.h
··· 85 85 ), \ 86 86 TP_ARGS(r_xprt)) 87 87 88 + DECLARE_EVENT_CLASS(xprtrdma_connect_class, 89 + TP_PROTO( 90 + const struct rpcrdma_xprt *r_xprt, 91 + int rc 92 + ), 93 + 94 + TP_ARGS(r_xprt, rc), 95 + 96 + TP_STRUCT__entry( 97 + __field(const void *, r_xprt) 98 + __field(int, rc) 99 + __field(int, connect_status) 100 + __string(addr, rpcrdma_addrstr(r_xprt)) 101 + __string(port, rpcrdma_portstr(r_xprt)) 102 + ), 103 + 104 + TP_fast_assign( 105 + __entry->r_xprt = r_xprt; 106 + __entry->rc = rc; 107 + __entry->connect_status = r_xprt->rx_ep.rep_connected; 108 + __assign_str(addr, rpcrdma_addrstr(r_xprt)); 109 + __assign_str(port, rpcrdma_portstr(r_xprt)); 110 + ), 111 + 112 + TP_printk("peer=[%s]:%s r_xprt=%p: rc=%d connect status=%d", 113 + __get_str(addr), __get_str(port), __entry->r_xprt, 114 + __entry->rc, __entry->connect_status 115 + ) 116 + ); 117 + 118 + #define DEFINE_CONN_EVENT(name) \ 119 + DEFINE_EVENT(xprtrdma_connect_class, xprtrdma_##name, \ 120 + TP_PROTO( \ 121 + const struct rpcrdma_xprt *r_xprt, \ 122 + int rc \ 123 + ), \ 124 + TP_ARGS(r_xprt, rc)) 125 + 88 126 DECLARE_EVENT_CLASS(xprtrdma_rdch_event, 89 127 TP_PROTO( 90 128 const struct rpc_task *task, ··· 371 333 ) 372 334 ); 373 335 374 - TRACE_EVENT(xprtrdma_disconnect, 336 + TRACE_EVENT(xprtrdma_inline_thresh, 375 337 TP_PROTO( 376 - const struct rpcrdma_xprt *r_xprt, 377 - int status 338 + const struct rpcrdma_xprt *r_xprt 378 339 ), 379 340 380 - TP_ARGS(r_xprt, status), 341 + TP_ARGS(r_xprt), 381 342 382 343 TP_STRUCT__entry( 383 344 __field(const void *, r_xprt) 384 - __field(int, status) 385 - __field(int, connected) 345 + __field(unsigned int, inline_send) 346 + __field(unsigned int, inline_recv) 347 + __field(unsigned int, max_send) 348 + __field(unsigned int, max_recv) 349 + __string(addr, rpcrdma_addrstr(r_xprt)) 350 + __string(port, rpcrdma_portstr(r_xprt)) 351 + ), 352 + 353 + TP_fast_assign( 354 + const struct rpcrdma_ep *ep = &r_xprt->rx_ep; 355 + 356 + __entry->r_xprt = r_xprt; 357 + __entry->inline_send = ep->rep_inline_send; 358 + __entry->inline_recv = ep->rep_inline_recv; 359 + __entry->max_send = ep->rep_max_inline_send; 360 + __entry->max_recv = ep->rep_max_inline_recv; 361 + __assign_str(addr, rpcrdma_addrstr(r_xprt)); 362 + __assign_str(port, rpcrdma_portstr(r_xprt)); 363 + ), 364 + 365 + TP_printk("peer=[%s]:%s r_xprt=%p neg send/recv=%u/%u, calc send/recv=%u/%u", 366 + __get_str(addr), __get_str(port), __entry->r_xprt, 367 + __entry->inline_send, __entry->inline_recv, 368 + __entry->max_send, __entry->max_recv 369 + ) 370 + ); 371 + 372 + DEFINE_CONN_EVENT(connect); 373 + DEFINE_CONN_EVENT(disconnect); 374 + 375 + DEFINE_RXPRT_EVENT(xprtrdma_create); 376 + DEFINE_RXPRT_EVENT(xprtrdma_op_destroy); 377 + DEFINE_RXPRT_EVENT(xprtrdma_remove); 378 + DEFINE_RXPRT_EVENT(xprtrdma_reinsert); 379 + DEFINE_RXPRT_EVENT(xprtrdma_op_inject_dsc); 380 + DEFINE_RXPRT_EVENT(xprtrdma_op_close); 381 + DEFINE_RXPRT_EVENT(xprtrdma_op_setport); 382 + 383 + TRACE_EVENT(xprtrdma_op_connect, 384 + TP_PROTO( 385 + const struct rpcrdma_xprt *r_xprt, 386 + unsigned long delay 387 + ), 388 + 389 + TP_ARGS(r_xprt, delay), 390 + 391 + TP_STRUCT__entry( 392 + __field(const void *, r_xprt) 393 + __field(unsigned long, delay) 386 394 __string(addr, rpcrdma_addrstr(r_xprt)) 387 395 __string(port, rpcrdma_portstr(r_xprt)) 388 396 ), 389 397 390 398 TP_fast_assign( 391 399 __entry->r_xprt = r_xprt; 392 - __entry->status = status; 393 - __entry->connected = r_xprt->rx_ep.rep_connected; 400 + __entry->delay = delay; 394 401 __assign_str(addr, rpcrdma_addrstr(r_xprt)); 395 402 __assign_str(port, rpcrdma_portstr(r_xprt)); 396 403 ), 397 404 398 - TP_printk("peer=[%s]:%s r_xprt=%p: status=%d %sconnected", 399 - __get_str(addr), __get_str(port), 400 - __entry->r_xprt, __entry->status, 401 - __entry->connected == 1 ? "still " : "dis" 405 + TP_printk("peer=[%s]:%s r_xprt=%p delay=%lu", 406 + __get_str(addr), __get_str(port), __entry->r_xprt, 407 + __entry->delay 402 408 ) 403 409 ); 404 410 405 - DEFINE_RXPRT_EVENT(xprtrdma_conn_start); 406 - DEFINE_RXPRT_EVENT(xprtrdma_conn_tout); 407 - DEFINE_RXPRT_EVENT(xprtrdma_create); 408 - DEFINE_RXPRT_EVENT(xprtrdma_op_destroy); 409 - DEFINE_RXPRT_EVENT(xprtrdma_remove); 410 - DEFINE_RXPRT_EVENT(xprtrdma_reinsert); 411 - DEFINE_RXPRT_EVENT(xprtrdma_reconnect); 412 - DEFINE_RXPRT_EVENT(xprtrdma_op_inject_dsc); 413 - DEFINE_RXPRT_EVENT(xprtrdma_op_close); 414 - DEFINE_RXPRT_EVENT(xprtrdma_op_connect); 415 411 416 412 TRACE_EVENT(xprtrdma_op_set_cto, 417 413 TP_PROTO( ··· 604 532 DEFINE_WRCH_EVENT(reply); 605 533 606 534 TRACE_DEFINE_ENUM(rpcrdma_noch); 535 + TRACE_DEFINE_ENUM(rpcrdma_noch_pullup); 536 + TRACE_DEFINE_ENUM(rpcrdma_noch_mapped); 607 537 TRACE_DEFINE_ENUM(rpcrdma_readch); 608 538 TRACE_DEFINE_ENUM(rpcrdma_areadch); 609 539 TRACE_DEFINE_ENUM(rpcrdma_writech); ··· 614 540 #define xprtrdma_show_chunktype(x) \ 615 541 __print_symbolic(x, \ 616 542 { rpcrdma_noch, "inline" }, \ 543 + { rpcrdma_noch_pullup, "pullup" }, \ 544 + { rpcrdma_noch_mapped, "mapped" }, \ 617 545 { rpcrdma_readch, "read list" }, \ 618 546 { rpcrdma_areadch, "*read list" }, \ 619 547 { rpcrdma_writech, "write list" }, \ ··· 743 667 __entry->client_id = rqst->rq_task->tk_client ? 744 668 rqst->rq_task->tk_client->cl_clid : -1; 745 669 __entry->req = req; 746 - __entry->num_sge = req->rl_sendctx->sc_wr.num_sge; 747 - __entry->signaled = req->rl_sendctx->sc_wr.send_flags & 748 - IB_SEND_SIGNALED; 670 + __entry->num_sge = req->rl_wr.num_sge; 671 + __entry->signaled = req->rl_wr.send_flags & IB_SEND_SIGNALED; 749 672 __entry->status = status; 750 673 ), 751 674 ··· 807 732 TP_printk("peer=[%s]:%s r_xprt=%p: %u new recvs, %d active (rc %d)", 808 733 __get_str(addr), __get_str(port), __entry->r_xprt, 809 734 __entry->count, __entry->posted, __entry->status 735 + ) 736 + ); 737 + 738 + TRACE_EVENT(xprtrdma_post_linv, 739 + TP_PROTO( 740 + const struct rpcrdma_req *req, 741 + int status 742 + ), 743 + 744 + TP_ARGS(req, status), 745 + 746 + TP_STRUCT__entry( 747 + __field(const void *, req) 748 + __field(int, status) 749 + __field(u32, xid) 750 + ), 751 + 752 + TP_fast_assign( 753 + __entry->req = req; 754 + __entry->status = status; 755 + __entry->xid = be32_to_cpu(req->rl_slot.rq_xid); 756 + ), 757 + 758 + TP_printk("req=%p xid=0x%08x status=%d", 759 + __entry->req, __entry->xid, __entry->status 810 760 ) 811 761 ); 812 762 ··· 1121 1021 TRACE_EVENT(xprtrdma_fixup, 1122 1022 TP_PROTO( 1123 1023 const struct rpc_rqst *rqst, 1124 - int len, 1125 - int hdrlen 1024 + unsigned long fixup 1126 1025 ), 1127 1026 1128 - TP_ARGS(rqst, len, hdrlen), 1027 + TP_ARGS(rqst, fixup), 1129 1028 1130 1029 TP_STRUCT__entry( 1131 1030 __field(unsigned int, task_id) 1132 1031 __field(unsigned int, client_id) 1133 - __field(const void *, base) 1134 - __field(int, len) 1135 - __field(int, hdrlen) 1032 + __field(unsigned long, fixup) 1033 + __field(size_t, headlen) 1034 + __field(unsigned int, pagelen) 1035 + __field(size_t, taillen) 1136 1036 ), 1137 1037 1138 1038 TP_fast_assign( 1139 1039 __entry->task_id = rqst->rq_task->tk_pid; 1140 1040 __entry->client_id = rqst->rq_task->tk_client->cl_clid; 1141 - __entry->base = rqst->rq_rcv_buf.head[0].iov_base; 1142 - __entry->len = len; 1143 - __entry->hdrlen = hdrlen; 1041 + __entry->fixup = fixup; 1042 + __entry->headlen = rqst->rq_rcv_buf.head[0].iov_len; 1043 + __entry->pagelen = rqst->rq_rcv_buf.page_len; 1044 + __entry->taillen = rqst->rq_rcv_buf.tail[0].iov_len; 1144 1045 ), 1145 1046 1146 - TP_printk("task:%u@%u base=%p len=%d hdrlen=%d", 1147 - __entry->task_id, __entry->client_id, 1148 - __entry->base, __entry->len, __entry->hdrlen 1149 - ) 1150 - ); 1151 - 1152 - TRACE_EVENT(xprtrdma_fixup_pg, 1153 - TP_PROTO( 1154 - const struct rpc_rqst *rqst, 1155 - int pageno, 1156 - const void *pos, 1157 - int len, 1158 - int curlen 1159 - ), 1160 - 1161 - TP_ARGS(rqst, pageno, pos, len, curlen), 1162 - 1163 - TP_STRUCT__entry( 1164 - __field(unsigned int, task_id) 1165 - __field(unsigned int, client_id) 1166 - __field(const void *, pos) 1167 - __field(int, pageno) 1168 - __field(int, len) 1169 - __field(int, curlen) 1170 - ), 1171 - 1172 - TP_fast_assign( 1173 - __entry->task_id = rqst->rq_task->tk_pid; 1174 - __entry->client_id = rqst->rq_task->tk_client->cl_clid; 1175 - __entry->pos = pos; 1176 - __entry->pageno = pageno; 1177 - __entry->len = len; 1178 - __entry->curlen = curlen; 1179 - ), 1180 - 1181 - TP_printk("task:%u@%u pageno=%d pos=%p len=%d curlen=%d", 1182 - __entry->task_id, __entry->client_id, 1183 - __entry->pageno, __entry->pos, __entry->len, __entry->curlen 1047 + TP_printk("task:%u@%u fixup=%lu xdr=%zu/%u/%zu", 1048 + __entry->task_id, __entry->client_id, __entry->fixup, 1049 + __entry->headlen, __entry->pagelen, __entry->taillen 1184 1050 ) 1185 1051 ); 1186 1052
+94
include/trace/events/sunrpc.h
··· 165 165 DEFINE_RPC_RUNNING_EVENT(begin); 166 166 DEFINE_RPC_RUNNING_EVENT(run_action); 167 167 DEFINE_RPC_RUNNING_EVENT(complete); 168 + DEFINE_RPC_RUNNING_EVENT(end); 168 169 169 170 DECLARE_EVENT_CLASS(rpc_task_queued, 170 171 ··· 777 776 TP_printk("peer=[%s]:%s status=%d", 778 777 __get_str(addr), __get_str(port), __entry->status) 779 778 ); 779 + 780 + DECLARE_EVENT_CLASS(xprt_writelock_event, 781 + TP_PROTO( 782 + const struct rpc_xprt *xprt, const struct rpc_task *task 783 + ), 784 + 785 + TP_ARGS(xprt, task), 786 + 787 + TP_STRUCT__entry( 788 + __field(unsigned int, task_id) 789 + __field(unsigned int, client_id) 790 + __field(unsigned int, snd_task_id) 791 + ), 792 + 793 + TP_fast_assign( 794 + if (task) { 795 + __entry->task_id = task->tk_pid; 796 + __entry->client_id = task->tk_client ? 797 + task->tk_client->cl_clid : -1; 798 + } else { 799 + __entry->task_id = -1; 800 + __entry->client_id = -1; 801 + } 802 + __entry->snd_task_id = xprt->snd_task ? 803 + xprt->snd_task->tk_pid : -1; 804 + ), 805 + 806 + TP_printk("task:%u@%u snd_task:%u", 807 + __entry->task_id, __entry->client_id, 808 + __entry->snd_task_id) 809 + ); 810 + 811 + #define DEFINE_WRITELOCK_EVENT(name) \ 812 + DEFINE_EVENT(xprt_writelock_event, xprt_##name, \ 813 + TP_PROTO( \ 814 + const struct rpc_xprt *xprt, \ 815 + const struct rpc_task *task \ 816 + ), \ 817 + TP_ARGS(xprt, task)) 818 + 819 + DEFINE_WRITELOCK_EVENT(reserve_xprt); 820 + DEFINE_WRITELOCK_EVENT(release_xprt); 821 + 822 + DECLARE_EVENT_CLASS(xprt_cong_event, 823 + TP_PROTO( 824 + const struct rpc_xprt *xprt, const struct rpc_task *task 825 + ), 826 + 827 + TP_ARGS(xprt, task), 828 + 829 + TP_STRUCT__entry( 830 + __field(unsigned int, task_id) 831 + __field(unsigned int, client_id) 832 + __field(unsigned int, snd_task_id) 833 + __field(unsigned long, cong) 834 + __field(unsigned long, cwnd) 835 + __field(bool, wait) 836 + ), 837 + 838 + TP_fast_assign( 839 + if (task) { 840 + __entry->task_id = task->tk_pid; 841 + __entry->client_id = task->tk_client ? 842 + task->tk_client->cl_clid : -1; 843 + } else { 844 + __entry->task_id = -1; 845 + __entry->client_id = -1; 846 + } 847 + __entry->snd_task_id = xprt->snd_task ? 848 + xprt->snd_task->tk_pid : -1; 849 + __entry->cong = xprt->cong; 850 + __entry->cwnd = xprt->cwnd; 851 + __entry->wait = test_bit(XPRT_CWND_WAIT, &xprt->state); 852 + ), 853 + 854 + TP_printk("task:%u@%u snd_task:%u cong=%lu cwnd=%lu%s", 855 + __entry->task_id, __entry->client_id, 856 + __entry->snd_task_id, __entry->cong, __entry->cwnd, 857 + __entry->wait ? " (wait)" : "") 858 + ); 859 + 860 + #define DEFINE_CONG_EVENT(name) \ 861 + DEFINE_EVENT(xprt_cong_event, xprt_##name, \ 862 + TP_PROTO( \ 863 + const struct rpc_xprt *xprt, \ 864 + const struct rpc_task *task \ 865 + ), \ 866 + TP_ARGS(xprt, task)) 867 + 868 + DEFINE_CONG_EVENT(reserve_cong); 869 + DEFINE_CONG_EVENT(release_cong); 870 + DEFINE_CONG_EVENT(get_cong); 871 + DEFINE_CONG_EVENT(put_cong); 780 872 781 873 TRACE_EVENT(xs_stream_read_data, 782 874 TP_PROTO(struct rpc_xprt *xprt, ssize_t err, size_t total),
+8 -13
net/sunrpc/clnt.c
··· 591 591 xprt->resvport = 1; 592 592 if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT) 593 593 xprt->resvport = 0; 594 + xprt->reuseport = 0; 595 + if (args->flags & RPC_CLNT_CREATE_REUSEPORT) 596 + xprt->reuseport = 1; 594 597 595 598 clnt = rpc_create_xprt(args, xprt); 596 599 if (IS_ERR(clnt) || args->nconnect <= 1) ··· 1679 1676 return; 1680 1677 } 1681 1678 1682 - printk(KERN_ERR "%s: status=%d, but no request slot, exiting\n", 1683 - __func__, status); 1684 1679 rpc_call_rpcerror(task, -EIO); 1685 1680 return; 1686 1681 } ··· 1687 1686 * Even though there was an error, we may have acquired 1688 1687 * a request slot somehow. Make sure not to leak it. 1689 1688 */ 1690 - if (task->tk_rqstp) { 1691 - printk(KERN_ERR "%s: status=%d, request allocated anyway\n", 1692 - __func__, status); 1689 + if (task->tk_rqstp) 1693 1690 xprt_release(task); 1694 - } 1695 1691 1696 1692 switch (status) { 1697 1693 case -ENOMEM: ··· 1697 1699 case -EAGAIN: /* woken up; retry */ 1698 1700 task->tk_action = call_retry_reserve; 1699 1701 return; 1700 - case -EIO: /* probably a shutdown */ 1701 - break; 1702 1702 default: 1703 - printk(KERN_ERR "%s: unrecognized error %d, exiting\n", 1704 - __func__, status); 1705 - break; 1703 + rpc_call_rpcerror(task, status); 1706 1704 } 1707 - rpc_call_rpcerror(task, status); 1708 1705 } 1709 1706 1710 1707 /* ··· 2899 2906 struct rpc_xprt *xprt; 2900 2907 unsigned long connect_timeout; 2901 2908 unsigned long reconnect_timeout; 2902 - unsigned char resvport; 2909 + unsigned char resvport, reuseport; 2903 2910 int ret = 0; 2904 2911 2905 2912 rcu_read_lock(); ··· 2911 2918 return -EAGAIN; 2912 2919 } 2913 2920 resvport = xprt->resvport; 2921 + reuseport = xprt->reuseport; 2914 2922 connect_timeout = xprt->connect_timeout; 2915 2923 reconnect_timeout = xprt->max_reconnect_timeout; 2916 2924 rcu_read_unlock(); ··· 2922 2928 goto out_put_switch; 2923 2929 } 2924 2930 xprt->resvport = resvport; 2931 + xprt->reuseport = reuseport; 2925 2932 if (xprt->ops->set_connect_timeout != NULL) 2926 2933 xprt->ops->set_connect_timeout(xprt, 2927 2934 connect_timeout,
+2 -1
net/sunrpc/sched.c
··· 260 260 rpc_reset_waitqueue_priority(queue); 261 261 queue->qlen = 0; 262 262 queue->timer_list.expires = 0; 263 - INIT_DEFERRABLE_WORK(&queue->timer_list.dwork, __rpc_queue_timer_fn); 263 + INIT_DELAYED_WORK(&queue->timer_list.dwork, __rpc_queue_timer_fn); 264 264 INIT_LIST_HEAD(&queue->timer_list.list); 265 265 rpc_assign_waitqueue_name(queue, qname); 266 266 } ··· 824 824 */ 825 825 void rpc_exit_task(struct rpc_task *task) 826 826 { 827 + trace_rpc_task_end(task, task->tk_action); 827 828 task->tk_action = NULL; 828 829 if (task->tk_ops->rpc_count_stats) 829 830 task->tk_ops->rpc_count_stats(task, task->tk_calldata);
+5 -6
net/sunrpc/xdr.c
··· 436 436 } 437 437 438 438 /** 439 - * xdr_shrink_pagelen 439 + * xdr_shrink_pagelen - shrinks buf->pages by up to @len bytes 440 440 * @buf: xdr_buf 441 441 * @len: bytes to remove from buf->pages 442 442 * 443 - * Shrinks XDR buffer's page array buf->pages by 444 - * 'len' bytes. The extra data is not lost, but is instead 445 - * moved into the tail. 443 + * The extra data is not lost, but is instead moved into buf->tail. 444 + * Returns the actual number of bytes moved. 446 445 */ 447 446 static unsigned int 448 447 xdr_shrink_pagelen(struct xdr_buf *buf, size_t len) ··· 454 455 455 456 result = 0; 456 457 tail = buf->tail; 457 - BUG_ON (len > pglen); 458 - 458 + if (len > buf->page_len) 459 + len = buf-> page_len; 459 460 tailbuf_len = buf->buflen - buf->head->iov_len - buf->page_len; 460 461 461 462 /* Shift the tail first */
+13 -9
net/sunrpc/xprt.c
··· 205 205 206 206 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { 207 207 if (task == xprt->snd_task) 208 - return 1; 208 + goto out_locked; 209 209 goto out_sleep; 210 210 } 211 211 if (test_bit(XPRT_WRITE_SPACE, &xprt->state)) 212 212 goto out_unlock; 213 213 xprt->snd_task = task; 214 214 215 + out_locked: 216 + trace_xprt_reserve_xprt(xprt, task); 215 217 return 1; 216 218 217 219 out_unlock: 218 220 xprt_clear_locked(xprt); 219 221 out_sleep: 220 - dprintk("RPC: %5u failed to lock transport %p\n", 221 - task->tk_pid, xprt); 222 222 task->tk_status = -EAGAIN; 223 223 if (RPC_IS_SOFT(task)) 224 224 rpc_sleep_on_timeout(&xprt->sending, task, NULL, ··· 269 269 270 270 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { 271 271 if (task == xprt->snd_task) 272 - return 1; 272 + goto out_locked; 273 273 goto out_sleep; 274 274 } 275 275 if (req == NULL) { 276 276 xprt->snd_task = task; 277 - return 1; 277 + goto out_locked; 278 278 } 279 279 if (test_bit(XPRT_WRITE_SPACE, &xprt->state)) 280 280 goto out_unlock; 281 281 if (!xprt_need_congestion_window_wait(xprt)) { 282 282 xprt->snd_task = task; 283 - return 1; 283 + goto out_locked; 284 284 } 285 285 out_unlock: 286 286 xprt_clear_locked(xprt); 287 287 out_sleep: 288 - dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt); 289 288 task->tk_status = -EAGAIN; 290 289 if (RPC_IS_SOFT(task)) 291 290 rpc_sleep_on_timeout(&xprt->sending, task, NULL, ··· 292 293 else 293 294 rpc_sleep_on(&xprt->sending, task, NULL); 294 295 return 0; 296 + out_locked: 297 + trace_xprt_reserve_cong(xprt, task); 298 + return 1; 295 299 } 296 300 EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong); 297 301 ··· 359 357 xprt_clear_locked(xprt); 360 358 __xprt_lock_write_next(xprt); 361 359 } 360 + trace_xprt_release_xprt(xprt, task); 362 361 } 363 362 EXPORT_SYMBOL_GPL(xprt_release_xprt); 364 363 ··· 377 374 xprt_clear_locked(xprt); 378 375 __xprt_lock_write_next_cong(xprt); 379 376 } 377 + trace_xprt_release_cong(xprt, task); 380 378 } 381 379 EXPORT_SYMBOL_GPL(xprt_release_xprt_cong); 382 380 ··· 399 395 { 400 396 if (req->rq_cong) 401 397 return 1; 402 - dprintk("RPC: %5u xprt_cwnd_limited cong = %lu cwnd = %lu\n", 403 - req->rq_task->tk_pid, xprt->cong, xprt->cwnd); 398 + trace_xprt_get_cong(xprt, req->rq_task); 404 399 if (RPCXPRT_CONGESTED(xprt)) { 405 400 xprt_set_congestion_window_wait(xprt); 406 401 return 0; ··· 421 418 req->rq_cong = 0; 422 419 xprt->cong -= RPC_CWNDSCALE; 423 420 xprt_test_and_clear_congestion_window_wait(xprt); 421 + trace_xprt_put_cong(xprt, req->rq_task); 424 422 __xprt_lock_write_next_cong(xprt); 425 423 } 426 424
+1 -1
net/sunrpc/xprtrdma/backchannel.c
··· 79 79 *p = xdr_zero; 80 80 81 81 if (rpcrdma_prepare_send_sges(r_xprt, req, RPCRDMA_HDRLEN_MIN, 82 - &rqst->rq_snd_buf, rpcrdma_noch)) 82 + &rqst->rq_snd_buf, rpcrdma_noch_pullup)) 83 83 return -EIO; 84 84 85 85 trace_xprtrdma_cb_reply(rqst);
+11 -42
net/sunrpc/xprtrdma/frwr_ops.c
··· 36 36 * connect worker from running concurrently. 37 37 * 38 38 * When the underlying transport disconnects, MRs that are in flight 39 - * are flushed and are likely unusable. Thus all flushed MRs are 40 - * destroyed. New MRs are created on demand. 39 + * are flushed and are likely unusable. Thus all MRs are destroyed. 40 + * New MRs are created on demand. 41 41 */ 42 42 43 43 #include <linux/sunrpc/rpc_rdma.h> ··· 88 88 kfree(mr); 89 89 } 90 90 91 - static void frwr_mr_recycle(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr) 91 + static void frwr_mr_recycle(struct rpcrdma_mr *mr) 92 92 { 93 + struct rpcrdma_xprt *r_xprt = mr->mr_xprt; 94 + 93 95 trace_xprtrdma_mr_recycle(mr); 94 96 95 97 if (mr->mr_dir != DMA_NONE) { ··· 107 105 spin_unlock(&r_xprt->rx_buf.rb_lock); 108 106 109 107 frwr_release_mr(mr); 110 - } 111 - 112 - /* MRs are dynamically allocated, so simply clean up and release the MR. 113 - * A replacement MR will subsequently be allocated on demand. 114 - */ 115 - static void 116 - frwr_mr_recycle_worker(struct work_struct *work) 117 - { 118 - struct rpcrdma_mr *mr = container_of(work, struct rpcrdma_mr, 119 - mr_recycle); 120 - 121 - frwr_mr_recycle(mr->mr_xprt, mr); 122 - } 123 - 124 - /* frwr_recycle - Discard MRs 125 - * @req: request to reset 126 - * 127 - * Used after a reconnect. These MRs could be in flight, we can't 128 - * tell. Safe thing to do is release them. 129 - */ 130 - void frwr_recycle(struct rpcrdma_req *req) 131 - { 132 - struct rpcrdma_mr *mr; 133 - 134 - while ((mr = rpcrdma_mr_pop(&req->rl_registered))) 135 - frwr_mr_recycle(mr->mr_xprt, mr); 136 108 } 137 109 138 110 /* frwr_reset - Place MRs back on the free list ··· 142 166 struct ib_mr *frmr; 143 167 int rc; 144 168 145 - /* NB: ib_alloc_mr and device drivers typically allocate 146 - * memory with GFP_KERNEL. 147 - */ 148 169 frmr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth); 149 170 if (IS_ERR(frmr)) 150 171 goto out_mr_err; ··· 153 180 mr->frwr.fr_mr = frmr; 154 181 mr->mr_dir = DMA_NONE; 155 182 INIT_LIST_HEAD(&mr->mr_list); 156 - INIT_WORK(&mr->mr_recycle, frwr_mr_recycle_worker); 157 183 init_completion(&mr->frwr.fr_linv_done); 158 184 159 185 sg_init_table(sg, depth); ··· 396 424 struct ib_send_wr *post_wr; 397 425 struct rpcrdma_mr *mr; 398 426 399 - post_wr = &req->rl_sendctx->sc_wr; 427 + post_wr = &req->rl_wr; 400 428 list_for_each_entry(mr, &req->rl_registered, mr_list) { 401 429 struct rpcrdma_frwr *frwr; 402 430 ··· 412 440 post_wr = &frwr->fr_regwr.wr; 413 441 } 414 442 415 - /* If ib_post_send fails, the next ->send_request for 416 - * @req will queue these MRs for recovery. 417 - */ 418 443 return ib_post_send(ia->ri_id->qp, post_wr, NULL); 419 444 } 420 445 ··· 437 468 static void __frwr_release_mr(struct ib_wc *wc, struct rpcrdma_mr *mr) 438 469 { 439 470 if (wc->status != IB_WC_SUCCESS) 440 - rpcrdma_mr_recycle(mr); 471 + frwr_mr_recycle(mr); 441 472 else 442 473 rpcrdma_mr_put(mr); 443 474 } ··· 539 570 */ 540 571 bad_wr = NULL; 541 572 rc = ib_post_send(r_xprt->rx_ia.ri_id->qp, first, &bad_wr); 542 - trace_xprtrdma_post_send(req, rc); 543 573 544 574 /* The final LOCAL_INV WR in the chain is supposed to 545 575 * do the wake. If it was never posted, the wake will ··· 551 583 552 584 /* Recycle MRs in the LOCAL_INV chain that did not get posted. 553 585 */ 586 + trace_xprtrdma_post_linv(req, rc); 554 587 while (bad_wr) { 555 588 frwr = container_of(bad_wr, struct rpcrdma_frwr, 556 589 fr_invwr); ··· 559 590 bad_wr = bad_wr->next; 560 591 561 592 list_del_init(&mr->mr_list); 562 - rpcrdma_mr_recycle(mr); 593 + frwr_mr_recycle(mr); 563 594 } 564 595 } 565 596 ··· 642 673 */ 643 674 bad_wr = NULL; 644 675 rc = ib_post_send(r_xprt->rx_ia.ri_id->qp, first, &bad_wr); 645 - trace_xprtrdma_post_send(req, rc); 646 676 if (!rc) 647 677 return; 648 678 649 679 /* Recycle MRs in the LOCAL_INV chain that did not get posted. 650 680 */ 681 + trace_xprtrdma_post_linv(req, rc); 651 682 while (bad_wr) { 652 683 frwr = container_of(bad_wr, struct rpcrdma_frwr, fr_invwr); 653 684 mr = container_of(frwr, struct rpcrdma_mr, frwr); 654 685 bad_wr = bad_wr->next; 655 686 656 - rpcrdma_mr_recycle(mr); 687 + frwr_mr_recycle(mr); 657 688 } 658 689 659 690 /* The final LOCAL_INV WR in the chain is supposed to
+277 -154
net/sunrpc/xprtrdma/rpc_rdma.c
··· 78 78 size += rpcrdma_segment_maxsz * sizeof(__be32); 79 79 size += sizeof(__be32); /* list discriminator */ 80 80 81 - dprintk("RPC: %s: max call header size = %u\n", 82 - __func__, size); 83 81 return size; 84 82 } 85 83 ··· 98 100 size += maxsegs * rpcrdma_segment_maxsz * sizeof(__be32); 99 101 size += sizeof(__be32); /* list discriminator */ 100 102 101 - dprintk("RPC: %s: max reply header size = %u\n", 102 - __func__, size); 103 103 return size; 104 104 } 105 105 ··· 359 363 out_getmr_err: 360 364 trace_xprtrdma_nomrs(req); 361 365 xprt_wait_for_buffer_space(&r_xprt->rx_xprt); 362 - if (r_xprt->rx_ep.rep_connected != -ENODEV) 363 - schedule_work(&r_xprt->rx_buf.rb_refresh_worker); 366 + rpcrdma_mrs_refresh(r_xprt); 364 367 return ERR_PTR(-EAGAIN); 365 368 } 366 369 ··· 388 393 unsigned int pos; 389 394 int nsegs; 390 395 391 - if (rtype == rpcrdma_noch) 396 + if (rtype == rpcrdma_noch_pullup || rtype == rpcrdma_noch_mapped) 392 397 goto done; 393 398 394 399 pos = rqst->rq_snd_buf.head[0].iov_len; ··· 560 565 */ 561 566 void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc) 562 567 { 568 + struct rpcrdma_regbuf *rb = sc->sc_req->rl_sendbuf; 563 569 struct ib_sge *sge; 564 570 565 571 if (!sc->sc_unmap_count) ··· 572 576 */ 573 577 for (sge = &sc->sc_sges[2]; sc->sc_unmap_count; 574 578 ++sge, --sc->sc_unmap_count) 575 - ib_dma_unmap_page(sc->sc_device, sge->addr, sge->length, 579 + ib_dma_unmap_page(rdmab_device(rb), sge->addr, sge->length, 576 580 DMA_TO_DEVICE); 577 581 578 582 kref_put(&sc->sc_req->rl_kref, rpcrdma_sendctx_done); ··· 585 589 { 586 590 struct rpcrdma_sendctx *sc = req->rl_sendctx; 587 591 struct rpcrdma_regbuf *rb = req->rl_rdmabuf; 588 - struct ib_sge *sge = sc->sc_sges; 592 + struct ib_sge *sge = &sc->sc_sges[req->rl_wr.num_sge++]; 589 593 590 594 if (!rpcrdma_regbuf_dma_map(r_xprt, rb)) 591 - goto out_regbuf; 595 + return false; 592 596 sge->addr = rdmab_addr(rb); 593 597 sge->length = len; 594 598 sge->lkey = rdmab_lkey(rb); 595 599 596 600 ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, sge->length, 597 601 DMA_TO_DEVICE); 598 - sc->sc_wr.num_sge++; 602 + return true; 603 + } 604 + 605 + /* The head iovec is straightforward, as it is usually already 606 + * DMA-mapped. Sync the content that has changed. 607 + */ 608 + static bool rpcrdma_prepare_head_iov(struct rpcrdma_xprt *r_xprt, 609 + struct rpcrdma_req *req, unsigned int len) 610 + { 611 + struct rpcrdma_sendctx *sc = req->rl_sendctx; 612 + struct ib_sge *sge = &sc->sc_sges[req->rl_wr.num_sge++]; 613 + struct rpcrdma_regbuf *rb = req->rl_sendbuf; 614 + 615 + if (!rpcrdma_regbuf_dma_map(r_xprt, rb)) 616 + return false; 617 + 618 + sge->addr = rdmab_addr(rb); 619 + sge->length = len; 620 + sge->lkey = rdmab_lkey(rb); 621 + 622 + ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, sge->length, 623 + DMA_TO_DEVICE); 624 + return true; 625 + } 626 + 627 + /* If there is a page list present, DMA map and prepare an 628 + * SGE for each page to be sent. 629 + */ 630 + static bool rpcrdma_prepare_pagelist(struct rpcrdma_req *req, 631 + struct xdr_buf *xdr) 632 + { 633 + struct rpcrdma_sendctx *sc = req->rl_sendctx; 634 + struct rpcrdma_regbuf *rb = req->rl_sendbuf; 635 + unsigned int page_base, len, remaining; 636 + struct page **ppages; 637 + struct ib_sge *sge; 638 + 639 + ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT); 640 + page_base = offset_in_page(xdr->page_base); 641 + remaining = xdr->page_len; 642 + while (remaining) { 643 + sge = &sc->sc_sges[req->rl_wr.num_sge++]; 644 + len = min_t(unsigned int, PAGE_SIZE - page_base, remaining); 645 + sge->addr = ib_dma_map_page(rdmab_device(rb), *ppages, 646 + page_base, len, DMA_TO_DEVICE); 647 + if (ib_dma_mapping_error(rdmab_device(rb), sge->addr)) 648 + goto out_mapping_err; 649 + 650 + sge->length = len; 651 + sge->lkey = rdmab_lkey(rb); 652 + 653 + sc->sc_unmap_count++; 654 + ppages++; 655 + remaining -= len; 656 + page_base = 0; 657 + } 658 + 599 659 return true; 600 660 601 - out_regbuf: 602 - pr_err("rpcrdma: failed to DMA map a Send buffer\n"); 661 + out_mapping_err: 662 + trace_xprtrdma_dma_maperr(sge->addr); 603 663 return false; 604 664 } 605 665 606 - /* Prepare the Send SGEs. The head and tail iovec, and each entry 607 - * in the page list, gets its own SGE. 666 + /* The tail iovec may include an XDR pad for the page list, 667 + * as well as additional content, and may not reside in the 668 + * same page as the head iovec. 608 669 */ 609 - static bool rpcrdma_prepare_msg_sges(struct rpcrdma_xprt *r_xprt, 610 - struct rpcrdma_req *req, 670 + static bool rpcrdma_prepare_tail_iov(struct rpcrdma_req *req, 611 671 struct xdr_buf *xdr, 612 - enum rpcrdma_chunktype rtype) 672 + unsigned int page_base, unsigned int len) 613 673 { 614 674 struct rpcrdma_sendctx *sc = req->rl_sendctx; 615 - unsigned int sge_no, page_base, len, remaining; 675 + struct ib_sge *sge = &sc->sc_sges[req->rl_wr.num_sge++]; 616 676 struct rpcrdma_regbuf *rb = req->rl_sendbuf; 617 - struct ib_sge *sge = sc->sc_sges; 618 - struct page *page, **ppages; 677 + struct page *page = virt_to_page(xdr->tail[0].iov_base); 619 678 620 - /* The head iovec is straightforward, as it is already 621 - * DMA-mapped. Sync the content that has changed. 622 - */ 623 - if (!rpcrdma_regbuf_dma_map(r_xprt, rb)) 624 - goto out_regbuf; 625 - sc->sc_device = rdmab_device(rb); 626 - sge_no = 1; 627 - sge[sge_no].addr = rdmab_addr(rb); 628 - sge[sge_no].length = xdr->head[0].iov_len; 629 - sge[sge_no].lkey = rdmab_lkey(rb); 630 - ib_dma_sync_single_for_device(rdmab_device(rb), sge[sge_no].addr, 631 - sge[sge_no].length, DMA_TO_DEVICE); 679 + sge->addr = ib_dma_map_page(rdmab_device(rb), page, page_base, len, 680 + DMA_TO_DEVICE); 681 + if (ib_dma_mapping_error(rdmab_device(rb), sge->addr)) 682 + goto out_mapping_err; 632 683 633 - /* If there is a Read chunk, the page list is being handled 634 - * via explicit RDMA, and thus is skipped here. However, the 635 - * tail iovec may include an XDR pad for the page list, as 636 - * well as additional content, and may not reside in the 637 - * same page as the head iovec. 638 - */ 639 - if (rtype == rpcrdma_readch) { 640 - len = xdr->tail[0].iov_len; 641 - 642 - /* Do not include the tail if it is only an XDR pad */ 643 - if (len < 4) 644 - goto out; 645 - 646 - page = virt_to_page(xdr->tail[0].iov_base); 647 - page_base = offset_in_page(xdr->tail[0].iov_base); 648 - 649 - /* If the content in the page list is an odd length, 650 - * xdr_write_pages() has added a pad at the beginning 651 - * of the tail iovec. Force the tail's non-pad content 652 - * to land at the next XDR position in the Send message. 653 - */ 654 - page_base += len & 3; 655 - len -= len & 3; 656 - goto map_tail; 657 - } 658 - 659 - /* If there is a page list present, temporarily DMA map 660 - * and prepare an SGE for each page to be sent. 661 - */ 662 - if (xdr->page_len) { 663 - ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT); 664 - page_base = offset_in_page(xdr->page_base); 665 - remaining = xdr->page_len; 666 - while (remaining) { 667 - sge_no++; 668 - if (sge_no > RPCRDMA_MAX_SEND_SGES - 2) 669 - goto out_mapping_overflow; 670 - 671 - len = min_t(u32, PAGE_SIZE - page_base, remaining); 672 - sge[sge_no].addr = 673 - ib_dma_map_page(rdmab_device(rb), *ppages, 674 - page_base, len, DMA_TO_DEVICE); 675 - if (ib_dma_mapping_error(rdmab_device(rb), 676 - sge[sge_no].addr)) 677 - goto out_mapping_err; 678 - sge[sge_no].length = len; 679 - sge[sge_no].lkey = rdmab_lkey(rb); 680 - 681 - sc->sc_unmap_count++; 682 - ppages++; 683 - remaining -= len; 684 - page_base = 0; 685 - } 686 - } 687 - 688 - /* The tail iovec is not always constructed in the same 689 - * page where the head iovec resides (see, for example, 690 - * gss_wrap_req_priv). To neatly accommodate that case, 691 - * DMA map it separately. 692 - */ 693 - if (xdr->tail[0].iov_len) { 694 - page = virt_to_page(xdr->tail[0].iov_base); 695 - page_base = offset_in_page(xdr->tail[0].iov_base); 696 - len = xdr->tail[0].iov_len; 697 - 698 - map_tail: 699 - sge_no++; 700 - sge[sge_no].addr = 701 - ib_dma_map_page(rdmab_device(rb), page, page_base, len, 702 - DMA_TO_DEVICE); 703 - if (ib_dma_mapping_error(rdmab_device(rb), sge[sge_no].addr)) 704 - goto out_mapping_err; 705 - sge[sge_no].length = len; 706 - sge[sge_no].lkey = rdmab_lkey(rb); 707 - sc->sc_unmap_count++; 708 - } 709 - 710 - out: 711 - sc->sc_wr.num_sge += sge_no; 712 - if (sc->sc_unmap_count) 713 - kref_get(&req->rl_kref); 684 + sge->length = len; 685 + sge->lkey = rdmab_lkey(rb); 686 + ++sc->sc_unmap_count; 714 687 return true; 715 688 716 - out_regbuf: 717 - pr_err("rpcrdma: failed to DMA map a Send buffer\n"); 718 - return false; 719 - 720 - out_mapping_overflow: 721 - rpcrdma_sendctx_unmap(sc); 722 - pr_err("rpcrdma: too many Send SGEs (%u)\n", sge_no); 723 - return false; 724 - 725 689 out_mapping_err: 726 - rpcrdma_sendctx_unmap(sc); 727 - trace_xprtrdma_dma_maperr(sge[sge_no].addr); 690 + trace_xprtrdma_dma_maperr(sge->addr); 728 691 return false; 692 + } 693 + 694 + /* Copy the tail to the end of the head buffer. 695 + */ 696 + static void rpcrdma_pullup_tail_iov(struct rpcrdma_xprt *r_xprt, 697 + struct rpcrdma_req *req, 698 + struct xdr_buf *xdr) 699 + { 700 + unsigned char *dst; 701 + 702 + dst = (unsigned char *)xdr->head[0].iov_base; 703 + dst += xdr->head[0].iov_len + xdr->page_len; 704 + memmove(dst, xdr->tail[0].iov_base, xdr->tail[0].iov_len); 705 + r_xprt->rx_stats.pullup_copy_count += xdr->tail[0].iov_len; 706 + } 707 + 708 + /* Copy pagelist content into the head buffer. 709 + */ 710 + static void rpcrdma_pullup_pagelist(struct rpcrdma_xprt *r_xprt, 711 + struct rpcrdma_req *req, 712 + struct xdr_buf *xdr) 713 + { 714 + unsigned int len, page_base, remaining; 715 + struct page **ppages; 716 + unsigned char *src, *dst; 717 + 718 + dst = (unsigned char *)xdr->head[0].iov_base; 719 + dst += xdr->head[0].iov_len; 720 + ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT); 721 + page_base = offset_in_page(xdr->page_base); 722 + remaining = xdr->page_len; 723 + while (remaining) { 724 + src = page_address(*ppages); 725 + src += page_base; 726 + len = min_t(unsigned int, PAGE_SIZE - page_base, remaining); 727 + memcpy(dst, src, len); 728 + r_xprt->rx_stats.pullup_copy_count += len; 729 + 730 + ppages++; 731 + dst += len; 732 + remaining -= len; 733 + page_base = 0; 734 + } 735 + } 736 + 737 + /* Copy the contents of @xdr into @rl_sendbuf and DMA sync it. 738 + * When the head, pagelist, and tail are small, a pull-up copy 739 + * is considerably less costly than DMA mapping the components 740 + * of @xdr. 741 + * 742 + * Assumptions: 743 + * - the caller has already verified that the total length 744 + * of the RPC Call body will fit into @rl_sendbuf. 745 + */ 746 + static bool rpcrdma_prepare_noch_pullup(struct rpcrdma_xprt *r_xprt, 747 + struct rpcrdma_req *req, 748 + struct xdr_buf *xdr) 749 + { 750 + if (unlikely(xdr->tail[0].iov_len)) 751 + rpcrdma_pullup_tail_iov(r_xprt, req, xdr); 752 + 753 + if (unlikely(xdr->page_len)) 754 + rpcrdma_pullup_pagelist(r_xprt, req, xdr); 755 + 756 + /* The whole RPC message resides in the head iovec now */ 757 + return rpcrdma_prepare_head_iov(r_xprt, req, xdr->len); 758 + } 759 + 760 + static bool rpcrdma_prepare_noch_mapped(struct rpcrdma_xprt *r_xprt, 761 + struct rpcrdma_req *req, 762 + struct xdr_buf *xdr) 763 + { 764 + struct kvec *tail = &xdr->tail[0]; 765 + 766 + if (!rpcrdma_prepare_head_iov(r_xprt, req, xdr->head[0].iov_len)) 767 + return false; 768 + if (xdr->page_len) 769 + if (!rpcrdma_prepare_pagelist(req, xdr)) 770 + return false; 771 + if (tail->iov_len) 772 + if (!rpcrdma_prepare_tail_iov(req, xdr, 773 + offset_in_page(tail->iov_base), 774 + tail->iov_len)) 775 + return false; 776 + 777 + if (req->rl_sendctx->sc_unmap_count) 778 + kref_get(&req->rl_kref); 779 + return true; 780 + } 781 + 782 + static bool rpcrdma_prepare_readch(struct rpcrdma_xprt *r_xprt, 783 + struct rpcrdma_req *req, 784 + struct xdr_buf *xdr) 785 + { 786 + if (!rpcrdma_prepare_head_iov(r_xprt, req, xdr->head[0].iov_len)) 787 + return false; 788 + 789 + /* If there is a Read chunk, the page list is being handled 790 + * via explicit RDMA, and thus is skipped here. 791 + */ 792 + 793 + /* Do not include the tail if it is only an XDR pad */ 794 + if (xdr->tail[0].iov_len > 3) { 795 + unsigned int page_base, len; 796 + 797 + /* If the content in the page list is an odd length, 798 + * xdr_write_pages() adds a pad at the beginning of 799 + * the tail iovec. Force the tail's non-pad content to 800 + * land at the next XDR position in the Send message. 801 + */ 802 + page_base = offset_in_page(xdr->tail[0].iov_base); 803 + len = xdr->tail[0].iov_len; 804 + page_base += len & 3; 805 + len -= len & 3; 806 + if (!rpcrdma_prepare_tail_iov(req, xdr, page_base, len)) 807 + return false; 808 + kref_get(&req->rl_kref); 809 + } 810 + 811 + return true; 729 812 } 730 813 731 814 /** ··· 817 742 * 818 743 * Returns 0 on success; otherwise a negative errno is returned. 819 744 */ 820 - int 821 - rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt, 822 - struct rpcrdma_req *req, u32 hdrlen, 823 - struct xdr_buf *xdr, enum rpcrdma_chunktype rtype) 745 + inline int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt, 746 + struct rpcrdma_req *req, u32 hdrlen, 747 + struct xdr_buf *xdr, 748 + enum rpcrdma_chunktype rtype) 824 749 { 825 750 int ret; 826 751 827 752 ret = -EAGAIN; 828 753 req->rl_sendctx = rpcrdma_sendctx_get_locked(r_xprt); 829 754 if (!req->rl_sendctx) 830 - goto err; 831 - req->rl_sendctx->sc_wr.num_sge = 0; 755 + goto out_nosc; 832 756 req->rl_sendctx->sc_unmap_count = 0; 833 757 req->rl_sendctx->sc_req = req; 834 758 kref_init(&req->rl_kref); 759 + req->rl_wr.wr_cqe = &req->rl_sendctx->sc_cqe; 760 + req->rl_wr.sg_list = req->rl_sendctx->sc_sges; 761 + req->rl_wr.num_sge = 0; 762 + req->rl_wr.opcode = IB_WR_SEND; 835 763 836 764 ret = -EIO; 837 765 if (!rpcrdma_prepare_hdr_sge(r_xprt, req, hdrlen)) 838 - goto err; 839 - if (rtype != rpcrdma_areadch) 840 - if (!rpcrdma_prepare_msg_sges(r_xprt, req, xdr, rtype)) 841 - goto err; 766 + goto out_unmap; 767 + 768 + switch (rtype) { 769 + case rpcrdma_noch_pullup: 770 + if (!rpcrdma_prepare_noch_pullup(r_xprt, req, xdr)) 771 + goto out_unmap; 772 + break; 773 + case rpcrdma_noch_mapped: 774 + if (!rpcrdma_prepare_noch_mapped(r_xprt, req, xdr)) 775 + goto out_unmap; 776 + break; 777 + case rpcrdma_readch: 778 + if (!rpcrdma_prepare_readch(r_xprt, req, xdr)) 779 + goto out_unmap; 780 + break; 781 + case rpcrdma_areadch: 782 + break; 783 + default: 784 + goto out_unmap; 785 + } 786 + 842 787 return 0; 843 788 844 - err: 789 + out_unmap: 790 + rpcrdma_sendctx_unmap(req->rl_sendctx); 791 + out_nosc: 845 792 trace_xprtrdma_prepsend_failed(&req->rl_slot, ret); 846 793 return ret; 847 794 } ··· 893 796 struct rpcrdma_req *req = rpcr_to_rdmar(rqst); 894 797 struct xdr_stream *xdr = &req->rl_stream; 895 798 enum rpcrdma_chunktype rtype, wtype; 799 + struct xdr_buf *buf = &rqst->rq_snd_buf; 896 800 bool ddp_allowed; 897 801 __be32 *p; 898 802 int ret; ··· 951 853 */ 952 854 if (rpcrdma_args_inline(r_xprt, rqst)) { 953 855 *p++ = rdma_msg; 954 - rtype = rpcrdma_noch; 955 - } else if (ddp_allowed && rqst->rq_snd_buf.flags & XDRBUF_WRITE) { 856 + rtype = buf->len < rdmab_length(req->rl_sendbuf) ? 857 + rpcrdma_noch_pullup : rpcrdma_noch_mapped; 858 + } else if (ddp_allowed && buf->flags & XDRBUF_WRITE) { 956 859 *p++ = rdma_msg; 957 860 rtype = rpcrdma_readch; 958 861 } else { ··· 961 862 *p++ = rdma_nomsg; 962 863 rtype = rpcrdma_areadch; 963 864 } 964 - 965 - /* If this is a retransmit, discard previously registered 966 - * chunks. Very likely the connection has been replaced, 967 - * so these registrations are invalid and unusable. 968 - */ 969 - frwr_recycle(req); 970 865 971 866 /* This implementation supports the following combinations 972 867 * of chunk lists in one RPC-over-RDMA Call message: ··· 995 902 goto out_err; 996 903 997 904 ret = rpcrdma_prepare_send_sges(r_xprt, req, req->rl_hdrbuf.len, 998 - &rqst->rq_snd_buf, rtype); 905 + buf, rtype); 999 906 if (ret) 1000 907 goto out_err; 1001 908 ··· 1007 914 r_xprt->rx_stats.failed_marshal_count++; 1008 915 frwr_reset(req); 1009 916 return ret; 917 + } 918 + 919 + static void __rpcrdma_update_cwnd_locked(struct rpc_xprt *xprt, 920 + struct rpcrdma_buffer *buf, 921 + u32 grant) 922 + { 923 + buf->rb_credits = grant; 924 + xprt->cwnd = grant << RPC_CWNDSHIFT; 925 + } 926 + 927 + static void rpcrdma_update_cwnd(struct rpcrdma_xprt *r_xprt, u32 grant) 928 + { 929 + struct rpc_xprt *xprt = &r_xprt->rx_xprt; 930 + 931 + spin_lock(&xprt->transport_lock); 932 + __rpcrdma_update_cwnd_locked(xprt, &r_xprt->rx_buf, grant); 933 + spin_unlock(&xprt->transport_lock); 934 + } 935 + 936 + /** 937 + * rpcrdma_reset_cwnd - Reset the xprt's congestion window 938 + * @r_xprt: controlling transport instance 939 + * 940 + * Prepare @r_xprt for the next connection by reinitializing 941 + * its credit grant to one (see RFC 8166, Section 3.3.3). 942 + */ 943 + void rpcrdma_reset_cwnd(struct rpcrdma_xprt *r_xprt) 944 + { 945 + struct rpc_xprt *xprt = &r_xprt->rx_xprt; 946 + 947 + spin_lock(&xprt->transport_lock); 948 + xprt->cong = 0; 949 + __rpcrdma_update_cwnd_locked(xprt, &r_xprt->rx_buf, 1); 950 + spin_unlock(&xprt->transport_lock); 1010 951 } 1011 952 1012 953 /** ··· 1082 955 curlen = rqst->rq_rcv_buf.head[0].iov_len; 1083 956 if (curlen > copy_len) 1084 957 curlen = copy_len; 1085 - trace_xprtrdma_fixup(rqst, copy_len, curlen); 1086 958 srcp += curlen; 1087 959 copy_len -= curlen; 1088 960 ··· 1101 975 if (curlen > pagelist_len) 1102 976 curlen = pagelist_len; 1103 977 1104 - trace_xprtrdma_fixup_pg(rqst, i, srcp, 1105 - copy_len, curlen); 1106 978 destp = kmap_atomic(ppages[i]); 1107 979 memcpy(destp + page_base, srcp, curlen); 1108 980 flush_dcache_page(ppages[i]); ··· 1132 1008 rqst->rq_private_buf.tail[0].iov_base = srcp; 1133 1009 } 1134 1010 1011 + if (fixup_copy_count) 1012 + trace_xprtrdma_fixup(rqst, fixup_copy_count); 1135 1013 return fixup_copy_count; 1136 1014 } 1137 1015 ··· 1482 1356 credits = 1; /* don't deadlock */ 1483 1357 else if (credits > buf->rb_max_requests) 1484 1358 credits = buf->rb_max_requests; 1485 - if (buf->rb_credits != credits) { 1486 - spin_lock(&xprt->transport_lock); 1487 - buf->rb_credits = credits; 1488 - xprt->cwnd = credits << RPC_CWNDSHIFT; 1489 - spin_unlock(&xprt->transport_lock); 1490 - } 1359 + if (buf->rb_credits != credits) 1360 + rpcrdma_update_cwnd(r_xprt, credits); 1361 + rpcrdma_post_recvs(r_xprt, false); 1491 1362 1492 1363 req = rpcr_to_rdmar(rqst); 1493 1364 if (req->rl_reply) {
+10 -23
net/sunrpc/xprtrdma/transport.c
··· 243 243 rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia); 244 244 xprt_clear_connecting(xprt); 245 245 if (r_xprt->rx_ep.rep_connected > 0) { 246 - if (!xprt_test_and_set_connected(xprt)) { 247 - xprt->stat.connect_count++; 248 - xprt->stat.connect_time += (long)jiffies - 249 - xprt->stat.connect_start; 250 - xprt_wake_pending_tasks(xprt, -EAGAIN); 251 - } 252 - } else { 253 - if (xprt_test_and_clear_connected(xprt)) 254 - xprt_wake_pending_tasks(xprt, rc); 246 + xprt->stat.connect_count++; 247 + xprt->stat.connect_time += (long)jiffies - 248 + xprt->stat.connect_start; 249 + xprt_set_connected(xprt); 250 + rc = -EAGAIN; 255 251 } 252 + xprt_wake_pending_tasks(xprt, rc); 256 253 } 257 254 258 255 /** ··· 422 425 return; 423 426 rpcrdma_ep_disconnect(ep, ia); 424 427 425 - /* Prepare @xprt for the next connection by reinitializing 426 - * its credit grant to one (see RFC 8166, Section 3.3.3). 427 - */ 428 - r_xprt->rx_buf.rb_credits = 1; 429 - xprt->cwnd = RPC_CWNDSHIFT; 430 - 431 428 out: 432 429 xprt->reestablish_timeout = 0; 433 430 ++xprt->connect_cookie; ··· 441 450 struct sockaddr *sap = (struct sockaddr *)&xprt->addr; 442 451 char buf[8]; 443 452 444 - dprintk("RPC: %s: setting port for xprt %p (%s:%s) to %u\n", 445 - __func__, xprt, 446 - xprt->address_strings[RPC_DISPLAY_ADDR], 447 - xprt->address_strings[RPC_DISPLAY_PORT], 448 - port); 449 - 450 453 rpc_set_port(sap, port); 451 454 452 455 kfree(xprt->address_strings[RPC_DISPLAY_PORT]); ··· 450 465 kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]); 451 466 snprintf(buf, sizeof(buf), "%4hx", port); 452 467 xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); 468 + 469 + trace_xprtrdma_op_setport(container_of(xprt, struct rpcrdma_xprt, 470 + rx_xprt)); 453 471 } 454 472 455 473 /** ··· 524 536 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 525 537 unsigned long delay; 526 538 527 - trace_xprtrdma_op_connect(r_xprt); 528 - 529 539 delay = 0; 530 540 if (r_xprt->rx_ep.rep_connected != 0) { 531 541 delay = xprt_reconnect_delay(xprt); 532 542 xprt_reconnect_backoff(xprt, RPCRDMA_INIT_REEST_TO); 533 543 } 544 + trace_xprtrdma_op_connect(r_xprt, delay); 534 545 queue_delayed_work(xprtiod_workqueue, &r_xprt->rx_connect_worker, 535 546 delay); 536 547 }
+110 -84
net/sunrpc/xprtrdma/verbs.c
··· 74 74 /* 75 75 * internal functions 76 76 */ 77 - static void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc); 77 + static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt, 78 + struct rpcrdma_sendctx *sc); 79 + static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt); 78 80 static void rpcrdma_reps_destroy(struct rpcrdma_buffer *buf); 79 81 static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt); 80 - static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf); 81 - static void rpcrdma_mr_free(struct rpcrdma_mr *mr); 82 + static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt); 82 83 static struct rpcrdma_regbuf * 83 84 rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction, 84 85 gfp_t flags); 85 86 static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb); 86 87 static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb); 87 - static void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp); 88 88 89 89 /* Wait for outstanding transport work to finish. ib_drain_qp 90 90 * handles the drains in the wrong order for us, so open code ··· 125 125 126 126 /** 127 127 * rpcrdma_wc_send - Invoked by RDMA provider for each polled Send WC 128 - * @cq: completion queue (ignored) 128 + * @cq: completion queue 129 129 * @wc: completed WR 130 130 * 131 131 */ ··· 138 138 139 139 /* WARNING: Only wr_cqe and status are reliable at this point */ 140 140 trace_xprtrdma_wc_send(sc, wc); 141 - rpcrdma_sendctx_put_locked(sc); 141 + rpcrdma_sendctx_put_locked((struct rpcrdma_xprt *)cq->cq_context, sc); 142 142 } 143 143 144 144 /** ··· 170 170 rdmab_addr(rep->rr_rdmabuf), 171 171 wc->byte_len, DMA_FROM_DEVICE); 172 172 173 - rpcrdma_post_recvs(r_xprt, false); 174 173 rpcrdma_reply_handler(rep); 175 174 return; 176 175 ··· 177 178 rpcrdma_recv_buffer_put(rep); 178 179 } 179 180 180 - static void 181 - rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt, 182 - struct rdma_conn_param *param) 181 + static void rpcrdma_update_cm_private(struct rpcrdma_xprt *r_xprt, 182 + struct rdma_conn_param *param) 183 183 { 184 184 const struct rpcrdma_connect_private *pmsg = param->private_data; 185 + struct rpcrdma_ep *ep = &r_xprt->rx_ep; 185 186 unsigned int rsize, wsize; 186 187 187 188 /* Default settings for RPC-over-RDMA Version One */ ··· 197 198 wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size); 198 199 } 199 200 200 - if (rsize < r_xprt->rx_ep.rep_inline_recv) 201 - r_xprt->rx_ep.rep_inline_recv = rsize; 202 - if (wsize < r_xprt->rx_ep.rep_inline_send) 203 - r_xprt->rx_ep.rep_inline_send = wsize; 204 - dprintk("RPC: %s: max send %u, max recv %u\n", __func__, 205 - r_xprt->rx_ep.rep_inline_send, 206 - r_xprt->rx_ep.rep_inline_recv); 201 + if (rsize < ep->rep_inline_recv) 202 + ep->rep_inline_recv = rsize; 203 + if (wsize < ep->rep_inline_send) 204 + ep->rep_inline_send = wsize; 205 + 207 206 rpcrdma_set_max_header_sizes(r_xprt); 208 207 } 209 208 ··· 255 258 case RDMA_CM_EVENT_ESTABLISHED: 256 259 ++xprt->connect_cookie; 257 260 ep->rep_connected = 1; 258 - rpcrdma_update_connect_private(r_xprt, &event->param.conn); 261 + rpcrdma_update_cm_private(r_xprt, &event->param.conn); 262 + trace_xprtrdma_inline_thresh(r_xprt); 259 263 wake_up_all(&ep->rep_connect_wait); 260 264 break; 261 265 case RDMA_CM_EVENT_CONNECT_ERROR: ··· 296 298 struct rdma_cm_id *id; 297 299 int rc; 298 300 299 - trace_xprtrdma_conn_start(xprt); 300 - 301 301 init_completion(&ia->ri_done); 302 302 init_completion(&ia->ri_remove_done); 303 303 ··· 311 315 if (rc) 312 316 goto out; 313 317 rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); 314 - if (rc < 0) { 315 - trace_xprtrdma_conn_tout(xprt); 318 + if (rc < 0) 316 319 goto out; 317 - } 318 320 319 321 rc = ia->ri_async_rc; 320 322 if (rc) ··· 323 329 if (rc) 324 330 goto out; 325 331 rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); 326 - if (rc < 0) { 327 - trace_xprtrdma_conn_tout(xprt); 332 + if (rc < 0) 328 333 goto out; 329 - } 330 334 rc = ia->ri_async_rc; 331 335 if (rc) 332 336 goto out; ··· 401 409 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 402 410 struct rpcrdma_req *req; 403 411 404 - cancel_work_sync(&buf->rb_refresh_worker); 405 - 406 412 /* This is similar to rpcrdma_ep_destroy, but: 407 413 * - Don't cancel the connect worker. 408 414 * - Don't call rpcrdma_ep_disconnect, which waits ··· 427 437 rpcrdma_regbuf_dma_unmap(req->rl_sendbuf); 428 438 rpcrdma_regbuf_dma_unmap(req->rl_recvbuf); 429 439 } 430 - rpcrdma_mrs_destroy(buf); 440 + rpcrdma_mrs_destroy(r_xprt); 431 441 ib_dealloc_pd(ia->ri_pd); 432 442 ia->ri_pd = NULL; 433 443 ··· 512 522 init_waitqueue_head(&ep->rep_connect_wait); 513 523 ep->rep_receive_count = 0; 514 524 515 - sendcq = ib_alloc_cq_any(ia->ri_id->device, NULL, 525 + sendcq = ib_alloc_cq_any(ia->ri_id->device, r_xprt, 516 526 ep->rep_attr.cap.max_send_wr + 1, 517 527 IB_POLL_WORKQUEUE); 518 528 if (IS_ERR(sendcq)) { ··· 620 630 pr_err("rpcrdma: rdma_create_qp returned %d\n", err); 621 631 goto out3; 622 632 } 623 - 624 - rpcrdma_mrs_create(r_xprt); 625 633 return 0; 626 634 627 635 out3: ··· 636 648 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 637 649 struct rdma_cm_id *id, *old; 638 650 int err, rc; 639 - 640 - trace_xprtrdma_reconnect(r_xprt); 641 651 642 652 rpcrdma_ep_disconnect(&r_xprt->rx_ep, ia); 643 653 ··· 691 705 memcpy(&qp_init_attr, &ep->rep_attr, sizeof(qp_init_attr)); 692 706 switch (ep->rep_connected) { 693 707 case 0: 694 - dprintk("RPC: %s: connecting...\n", __func__); 695 708 rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &qp_init_attr); 696 709 if (rc) { 697 710 rc = -ENETUNREACH; ··· 711 726 ep->rep_connected = 0; 712 727 xprt_clear_connected(xprt); 713 728 729 + rpcrdma_reset_cwnd(r_xprt); 714 730 rpcrdma_post_recvs(r_xprt, true); 715 731 716 732 rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); ··· 728 742 goto out; 729 743 } 730 744 731 - dprintk("RPC: %s: connected\n", __func__); 745 + rpcrdma_mrs_create(r_xprt); 732 746 733 747 out: 734 748 if (rc) 735 749 ep->rep_connected = rc; 736 750 737 751 out_noupdate: 752 + trace_xprtrdma_connect(r_xprt, rc); 738 753 return rc; 739 754 } 740 755 ··· 744 757 * @ep: endpoint to disconnect 745 758 * @ia: associated interface adapter 746 759 * 747 - * This is separate from destroy to facilitate the ability 748 - * to reconnect without recreating the endpoint. 749 - * 750 - * This call is not reentrant, and must not be made in parallel 751 - * on the same endpoint. 760 + * Caller serializes. Either the transport send lock is held, 761 + * or we're being called to destroy the transport. 752 762 */ 753 763 void 754 764 rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) ··· 764 780 trace_xprtrdma_disconnect(r_xprt, rc); 765 781 766 782 rpcrdma_xprt_drain(r_xprt); 783 + rpcrdma_reqs_reset(r_xprt); 784 + rpcrdma_mrs_destroy(r_xprt); 767 785 } 768 786 769 787 /* Fixed-size circular FIFO queue. This implementation is wait-free and ··· 803 817 if (!sc) 804 818 return NULL; 805 819 806 - sc->sc_wr.wr_cqe = &sc->sc_cqe; 807 - sc->sc_wr.sg_list = sc->sc_sges; 808 - sc->sc_wr.opcode = IB_WR_SEND; 809 820 sc->sc_cqe.done = rpcrdma_wc_send; 810 821 return sc; 811 822 } ··· 830 847 if (!sc) 831 848 return -ENOMEM; 832 849 833 - sc->sc_xprt = r_xprt; 834 850 buf->rb_sc_ctxs[i] = sc; 835 851 } 836 852 ··· 892 910 893 911 /** 894 912 * rpcrdma_sendctx_put_locked - Release a send context 913 + * @r_xprt: controlling transport instance 895 914 * @sc: send context to release 896 915 * 897 916 * Usage: Called from Send completion to return a sendctxt ··· 900 917 * 901 918 * The caller serializes calls to this function (per transport). 902 919 */ 903 - static void 904 - rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc) 920 + static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt, 921 + struct rpcrdma_sendctx *sc) 905 922 { 906 - struct rpcrdma_buffer *buf = &sc->sc_xprt->rx_buf; 923 + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 907 924 unsigned long next_tail; 908 925 909 926 /* Unmap SGEs of previously completed but unsignaled ··· 921 938 /* Paired with READ_ONCE */ 922 939 smp_store_release(&buf->rb_sc_tail, next_tail); 923 940 924 - xprt_write_space(&sc->sc_xprt->rx_xprt); 941 + xprt_write_space(&r_xprt->rx_xprt); 925 942 } 926 943 927 944 static void ··· 948 965 mr->mr_xprt = r_xprt; 949 966 950 967 spin_lock(&buf->rb_lock); 951 - list_add(&mr->mr_list, &buf->rb_mrs); 968 + rpcrdma_mr_push(mr, &buf->rb_mrs); 952 969 list_add(&mr->mr_all, &buf->rb_all_mrs); 953 970 spin_unlock(&buf->rb_lock); 954 971 } ··· 967 984 968 985 rpcrdma_mrs_create(r_xprt); 969 986 xprt_write_space(&r_xprt->rx_xprt); 987 + } 988 + 989 + /** 990 + * rpcrdma_mrs_refresh - Wake the MR refresh worker 991 + * @r_xprt: controlling transport instance 992 + * 993 + */ 994 + void rpcrdma_mrs_refresh(struct rpcrdma_xprt *r_xprt) 995 + { 996 + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 997 + struct rpcrdma_ep *ep = &r_xprt->rx_ep; 998 + 999 + /* If there is no underlying device, it's no use to 1000 + * wake the refresh worker. 1001 + */ 1002 + if (ep->rep_connected != -ENODEV) { 1003 + /* The work is scheduled on a WQ_MEM_RECLAIM 1004 + * workqueue in order to prevent MR allocation 1005 + * from recursing into NFS during direct reclaim. 1006 + */ 1007 + queue_work(xprtiod_workqueue, &buf->rb_refresh_worker); 1008 + } 970 1009 } 971 1010 972 1011 /** ··· 1045 1040 kfree(req); 1046 1041 out1: 1047 1042 return NULL; 1043 + } 1044 + 1045 + /** 1046 + * rpcrdma_reqs_reset - Reset all reqs owned by a transport 1047 + * @r_xprt: controlling transport instance 1048 + * 1049 + * ASSUMPTION: the rb_allreqs list is stable for the duration, 1050 + * and thus can be walked without holding rb_lock. Eg. the 1051 + * caller is holding the transport send lock to exclude 1052 + * device removal or disconnection. 1053 + */ 1054 + static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt) 1055 + { 1056 + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1057 + struct rpcrdma_req *req; 1058 + 1059 + list_for_each_entry(req, &buf->rb_allreqs, rl_all) { 1060 + /* Credits are valid only for one connection */ 1061 + req->rl_slot.rq_cong = 0; 1062 + } 1048 1063 } 1049 1064 1050 1065 static struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, ··· 1150 1125 INIT_LIST_HEAD(&buf->rb_all_mrs); 1151 1126 INIT_WORK(&buf->rb_refresh_worker, rpcrdma_mr_refresh_worker); 1152 1127 1153 - rpcrdma_mrs_create(r_xprt); 1154 - 1155 1128 INIT_LIST_HEAD(&buf->rb_send_bufs); 1156 1129 INIT_LIST_HEAD(&buf->rb_allreqs); 1157 1130 ··· 1157 1134 for (i = 0; i < buf->rb_max_requests; i++) { 1158 1135 struct rpcrdma_req *req; 1159 1136 1160 - req = rpcrdma_req_create(r_xprt, RPCRDMA_V1_DEF_INLINE_SIZE, 1137 + req = rpcrdma_req_create(r_xprt, RPCRDMA_V1_DEF_INLINE_SIZE * 2, 1161 1138 GFP_KERNEL); 1162 1139 if (!req) 1163 1140 goto out; 1164 1141 list_add(&req->rl_list, &buf->rb_send_bufs); 1165 1142 } 1166 1143 1167 - buf->rb_credits = 1; 1168 1144 init_llist_head(&buf->rb_free_reps); 1169 1145 1170 1146 rc = rpcrdma_sendctxs_create(r_xprt); ··· 1180 1158 * rpcrdma_req_destroy - Destroy an rpcrdma_req object 1181 1159 * @req: unused object to be destroyed 1182 1160 * 1183 - * This function assumes that the caller prevents concurrent device 1184 - * unload and transport tear-down. 1161 + * Relies on caller holding the transport send lock to protect 1162 + * removing req->rl_all from buf->rb_all_reqs safely. 1185 1163 */ 1186 1164 void rpcrdma_req_destroy(struct rpcrdma_req *req) 1187 1165 { 1166 + struct rpcrdma_mr *mr; 1167 + 1188 1168 list_del(&req->rl_all); 1189 1169 1190 - while (!list_empty(&req->rl_free_mrs)) 1191 - rpcrdma_mr_free(rpcrdma_mr_pop(&req->rl_free_mrs)); 1170 + while ((mr = rpcrdma_mr_pop(&req->rl_free_mrs))) { 1171 + struct rpcrdma_buffer *buf = &mr->mr_xprt->rx_buf; 1172 + 1173 + spin_lock(&buf->rb_lock); 1174 + list_del(&mr->mr_all); 1175 + spin_unlock(&buf->rb_lock); 1176 + 1177 + frwr_release_mr(mr); 1178 + } 1192 1179 1193 1180 rpcrdma_regbuf_free(req->rl_recvbuf); 1194 1181 rpcrdma_regbuf_free(req->rl_sendbuf); ··· 1205 1174 kfree(req); 1206 1175 } 1207 1176 1208 - static void 1209 - rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf) 1177 + /** 1178 + * rpcrdma_mrs_destroy - Release all of a transport's MRs 1179 + * @r_xprt: controlling transport instance 1180 + * 1181 + * Relies on caller holding the transport send lock to protect 1182 + * removing mr->mr_list from req->rl_free_mrs safely. 1183 + */ 1184 + static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt) 1210 1185 { 1211 - struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt, 1212 - rx_buf); 1186 + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1213 1187 struct rpcrdma_mr *mr; 1214 - unsigned int count; 1215 1188 1216 - count = 0; 1189 + cancel_work_sync(&buf->rb_refresh_worker); 1190 + 1217 1191 spin_lock(&buf->rb_lock); 1218 1192 while ((mr = list_first_entry_or_null(&buf->rb_all_mrs, 1219 1193 struct rpcrdma_mr, 1220 1194 mr_all)) != NULL) { 1195 + list_del(&mr->mr_list); 1221 1196 list_del(&mr->mr_all); 1222 1197 spin_unlock(&buf->rb_lock); 1223 1198 1224 1199 frwr_release_mr(mr); 1225 - count++; 1200 + 1226 1201 spin_lock(&buf->rb_lock); 1227 1202 } 1228 1203 spin_unlock(&buf->rb_lock); 1229 - r_xprt->rx_stats.mrs_allocated = 0; 1230 1204 } 1231 1205 1232 1206 /** ··· 1245 1209 void 1246 1210 rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) 1247 1211 { 1248 - cancel_work_sync(&buf->rb_refresh_worker); 1249 - 1250 1212 rpcrdma_sendctxs_destroy(buf); 1251 1213 rpcrdma_reps_destroy(buf); 1252 1214 ··· 1256 1222 list_del(&req->rl_list); 1257 1223 rpcrdma_req_destroy(req); 1258 1224 } 1259 - 1260 - rpcrdma_mrs_destroy(buf); 1261 1225 } 1262 1226 1263 1227 /** ··· 1294 1262 } 1295 1263 1296 1264 rpcrdma_mr_push(mr, &mr->mr_req->rl_free_mrs); 1297 - } 1298 - 1299 - static void rpcrdma_mr_free(struct rpcrdma_mr *mr) 1300 - { 1301 - struct rpcrdma_xprt *r_xprt = mr->mr_xprt; 1302 - struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1303 - 1304 - mr->mr_req = NULL; 1305 - spin_lock(&buf->rb_lock); 1306 - rpcrdma_mr_push(mr, &buf->rb_mrs); 1307 - spin_unlock(&buf->rb_lock); 1308 1265 } 1309 1266 1310 1267 /** ··· 1458 1437 struct rpcrdma_ep *ep, 1459 1438 struct rpcrdma_req *req) 1460 1439 { 1461 - struct ib_send_wr *send_wr = &req->rl_sendctx->sc_wr; 1440 + struct ib_send_wr *send_wr = &req->rl_wr; 1462 1441 int rc; 1463 1442 1464 1443 if (!ep->rep_send_count || kref_read(&req->rl_kref) > 1) { ··· 1476 1455 return 0; 1477 1456 } 1478 1457 1479 - static void 1480 - rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp) 1458 + /** 1459 + * rpcrdma_post_recvs - Refill the Receive Queue 1460 + * @r_xprt: controlling transport instance 1461 + * @temp: mark Receive buffers to be deleted after use 1462 + * 1463 + */ 1464 + void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp) 1481 1465 { 1482 1466 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1483 1467 struct rpcrdma_ep *ep = &r_xprt->rx_ep;
+6 -12
net/sunrpc/xprtrdma/xprt_rdma.h
··· 218 218 /* struct rpcrdma_sendctx - DMA mapped SGEs to unmap after Send completes 219 219 */ 220 220 struct rpcrdma_req; 221 - struct rpcrdma_xprt; 222 221 struct rpcrdma_sendctx { 223 - struct ib_send_wr sc_wr; 224 222 struct ib_cqe sc_cqe; 225 - struct ib_device *sc_device; 226 - struct rpcrdma_xprt *sc_xprt; 227 223 struct rpcrdma_req *sc_req; 228 224 unsigned int sc_unmap_count; 229 225 struct ib_sge sc_sges[]; ··· 253 257 u32 mr_handle; 254 258 u32 mr_length; 255 259 u64 mr_offset; 256 - struct work_struct mr_recycle; 257 260 struct list_head mr_all; 258 261 }; 259 262 ··· 313 318 struct rpcrdma_rep *rl_reply; 314 319 struct xdr_stream rl_stream; 315 320 struct xdr_buf rl_hdrbuf; 321 + struct ib_send_wr rl_wr; 316 322 struct rpcrdma_sendctx *rl_sendctx; 317 323 struct rpcrdma_regbuf *rl_rdmabuf; /* xprt header */ 318 324 struct rpcrdma_regbuf *rl_sendbuf; /* rq_snd_buf */ ··· 470 474 471 475 int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *, 472 476 struct rpcrdma_req *); 477 + void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp); 473 478 474 479 /* 475 480 * Buffer calls - xprtrdma/verbs.c ··· 484 487 485 488 struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt); 486 489 void rpcrdma_mr_put(struct rpcrdma_mr *mr); 487 - 488 - static inline void 489 - rpcrdma_mr_recycle(struct rpcrdma_mr *mr) 490 - { 491 - schedule_work(&mr->mr_recycle); 492 - } 490 + void rpcrdma_mrs_refresh(struct rpcrdma_xprt *r_xprt); 493 491 494 492 struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); 495 493 void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, ··· 534 542 /* Memory registration calls xprtrdma/frwr_ops.c 535 543 */ 536 544 bool frwr_is_supported(struct ib_device *device); 537 - void frwr_recycle(struct rpcrdma_req *req); 538 545 void frwr_reset(struct rpcrdma_req *req); 539 546 int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep); 540 547 int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr); ··· 554 563 555 564 enum rpcrdma_chunktype { 556 565 rpcrdma_noch = 0, 566 + rpcrdma_noch_pullup, 567 + rpcrdma_noch_mapped, 557 568 rpcrdma_readch, 558 569 rpcrdma_areadch, 559 570 rpcrdma_writech, ··· 569 576 void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc); 570 577 int rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst); 571 578 void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *); 579 + void rpcrdma_reset_cwnd(struct rpcrdma_xprt *r_xprt); 572 580 void rpcrdma_complete_rqst(struct rpcrdma_rep *rep); 573 581 void rpcrdma_reply_handler(struct rpcrdma_rep *rep); 574 582
+1 -1
net/sunrpc/xprtsock.c
··· 1752 1752 1753 1753 static void xs_set_srcport(struct sock_xprt *transport, struct socket *sock) 1754 1754 { 1755 - if (transport->srcport == 0) 1755 + if (transport->srcport == 0 && transport->xprt.reuseport) 1756 1756 transport->srcport = xs_sock_getport(sock); 1757 1757 } 1758 1758