Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'for-2.6.40' of git://linux-nfs.org/~bfields/linux

* 'for-2.6.40' of git://linux-nfs.org/~bfields/linux: (22 commits)
nfsd: make local functions static
NFSD: Remove unused variable from nfsd4_decode_bind_conn_to_session()
NFSD: Check status from nfsd4_map_bcts_dir()
NFSD: Remove setting unused variable in nfsd_vfs_read()
nfsd41: error out on repeated RECLAIM_COMPLETE
nfsd41: compare request's opcnt with session's maxops at nfsd4_sequence
nfsd v4.1 lOCKT clientid field must be ignored
nfsd41: add flag checking for create_session
nfsd41: make sure nfs server process OPEN with EXCLUSIVE4_1 correctly
nfsd4: fix wrongsec handling for PUTFH + op cases
nfsd4: make fh_verify responsibility of nfsd_lookup_dentry caller
nfsd4: introduce OPDESC helper
nfsd4: allow fh_verify caller to skip pseudoflavor checks
nfsd: distinguish functions of NFSD_MAY_* flags
svcrpc: complete svsk processing on cb receive failure
svcrpc: take advantage of tcp autotuning
SUNRPC: Don't wait for full record to receive tcp data
svcrpc: copy cb reply instead of pages
svcrpc: close connection if client sends short packet
svcrpc: note network-order types in svc_process_calldir
...

+323 -199
-6
fs/nfsd/export.c
··· 1354 1354 if (IS_ERR(exp)) 1355 1355 return nfserrno(PTR_ERR(exp)); 1356 1356 rv = fh_compose(fhp, exp, exp->ex_path.dentry, NULL); 1357 - if (rv) 1358 - goto out; 1359 - rv = check_nfsd_access(exp, rqstp); 1360 - if (rv) 1361 - fh_put(fhp); 1362 - out: 1363 1357 exp_put(exp); 1364 1358 return rv; 1365 1359 }
+1 -1
fs/nfsd/nfs3proc.c
··· 245 245 } 246 246 247 247 /* Now create the file and set attributes */ 248 - nfserr = nfsd_create_v3(rqstp, dirfhp, argp->name, argp->len, 248 + nfserr = do_nfsd_create(rqstp, dirfhp, argp->name, argp->len, 249 249 attr, newfhp, 250 250 argp->createmode, argp->verf, NULL, NULL); 251 251
+1 -1
fs/nfsd/nfs3xdr.c
··· 842 842 return rv; 843 843 } 844 844 845 - __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen) 845 + static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen) 846 846 { 847 847 struct svc_fh fh; 848 848 int err;
+65 -8
fs/nfsd/nfs4proc.c
··· 196 196 197 197 /* 198 198 * Note: create modes (UNCHECKED,GUARDED...) are the same 199 - * in NFSv4 as in v3. 199 + * in NFSv4 as in v3 except EXCLUSIVE4_1. 200 200 */ 201 - status = nfsd_create_v3(rqstp, current_fh, open->op_fname.data, 201 + status = do_nfsd_create(rqstp, current_fh, open->op_fname.data, 202 202 open->op_fname.len, &open->op_iattr, 203 203 &resfh, open->op_createmode, 204 204 (u32 *)open->op_verf.data, ··· 403 403 cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen; 404 404 memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval, 405 405 putfh->pf_fhlen); 406 - return fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP); 406 + return fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_BYPASS_GSS); 407 407 } 408 408 409 409 static __be32 ··· 762 762 __be32 err; 763 763 764 764 fh_init(&resfh, NFS4_FHSIZE); 765 + err = fh_verify(rqstp, &cstate->current_fh, S_IFDIR, NFSD_MAY_EXEC); 766 + if (err) 767 + return err; 765 768 err = nfsd_lookup_dentry(rqstp, &cstate->current_fh, 766 769 secinfo->si_name, secinfo->si_namelen, 767 770 &exp, &dentry); ··· 989 986 ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */ 990 987 ALLOWED_ON_ABSENT_FS = 1 << 1, /* ops processed on absent fs */ 991 988 ALLOWED_AS_FIRST_OP = 1 << 2, /* ops reqired first in compound */ 989 + /* For rfc 5661 section 2.6.3.1.1: */ 990 + OP_HANDLES_WRONGSEC = 1 << 3, 991 + OP_IS_PUTFH_LIKE = 1 << 4, 992 992 }; 993 993 994 994 struct nfsd4_operation { ··· 1035 1029 if (args->opcnt != 1) 1036 1030 return nfserr_not_only_op; 1037 1031 return nfs_ok; 1032 + } 1033 + 1034 + static inline struct nfsd4_operation *OPDESC(struct nfsd4_op *op) 1035 + { 1036 + return &nfsd4_ops[op->opnum]; 1037 + } 1038 + 1039 + static bool need_wrongsec_check(struct svc_rqst *rqstp) 1040 + { 1041 + struct nfsd4_compoundres *resp = rqstp->rq_resp; 1042 + struct nfsd4_compoundargs *argp = rqstp->rq_argp; 1043 + struct nfsd4_op *this = &argp->ops[resp->opcnt - 1]; 1044 + struct nfsd4_op *next = &argp->ops[resp->opcnt]; 1045 + struct nfsd4_operation *thisd; 1046 + struct nfsd4_operation *nextd; 1047 + 1048 + thisd = OPDESC(this); 1049 + /* 1050 + * Most ops check wronsec on our own; only the putfh-like ops 1051 + * have special rules. 1052 + */ 1053 + if (!(thisd->op_flags & OP_IS_PUTFH_LIKE)) 1054 + return false; 1055 + /* 1056 + * rfc 5661 2.6.3.1.1.6: don't bother erroring out a 1057 + * put-filehandle operation if we're not going to use the 1058 + * result: 1059 + */ 1060 + if (argp->opcnt == resp->opcnt) 1061 + return false; 1062 + 1063 + nextd = OPDESC(next); 1064 + /* 1065 + * Rest of 2.6.3.1.1: certain operations will return WRONGSEC 1066 + * errors themselves as necessary; others should check for them 1067 + * now: 1068 + */ 1069 + return !(nextd->op_flags & OP_HANDLES_WRONGSEC); 1038 1070 } 1039 1071 1040 1072 /* ··· 1152 1108 goto encode_op; 1153 1109 } 1154 1110 1155 - opdesc = &nfsd4_ops[op->opnum]; 1111 + opdesc = OPDESC(op); 1156 1112 1157 1113 if (!cstate->current_fh.fh_dentry) { 1158 1114 if (!(opdesc->op_flags & ALLOWED_WITHOUT_FH)) { ··· 1169 1125 op->status = opdesc->op_func(rqstp, cstate, &op->u); 1170 1126 else 1171 1127 BUG_ON(op->status == nfs_ok); 1128 + 1129 + if (!op->status && need_wrongsec_check(rqstp)) 1130 + op->status = check_nfsd_access(cstate->current_fh.fh_export, rqstp); 1172 1131 1173 1132 encode_op: 1174 1133 /* Only from SEQUENCE */ ··· 1264 1217 }, 1265 1218 [OP_LOOKUP] = { 1266 1219 .op_func = (nfsd4op_func)nfsd4_lookup, 1220 + .op_flags = OP_HANDLES_WRONGSEC, 1267 1221 .op_name = "OP_LOOKUP", 1268 1222 }, 1269 1223 [OP_LOOKUPP] = { 1270 1224 .op_func = (nfsd4op_func)nfsd4_lookupp, 1225 + .op_flags = OP_HANDLES_WRONGSEC, 1271 1226 .op_name = "OP_LOOKUPP", 1272 1227 }, 1273 1228 [OP_NVERIFY] = { ··· 1278 1229 }, 1279 1230 [OP_OPEN] = { 1280 1231 .op_func = (nfsd4op_func)nfsd4_open, 1232 + .op_flags = OP_HANDLES_WRONGSEC, 1281 1233 .op_name = "OP_OPEN", 1282 1234 }, 1283 1235 [OP_OPEN_CONFIRM] = { ··· 1291 1241 }, 1292 1242 [OP_PUTFH] = { 1293 1243 .op_func = (nfsd4op_func)nfsd4_putfh, 1294 - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, 1244 + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS 1245 + | OP_IS_PUTFH_LIKE, 1295 1246 .op_name = "OP_PUTFH", 1296 1247 }, 1297 1248 [OP_PUTPUBFH] = { 1298 1249 .op_func = (nfsd4op_func)nfsd4_putrootfh, 1299 - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, 1250 + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS 1251 + | OP_IS_PUTFH_LIKE, 1300 1252 .op_name = "OP_PUTPUBFH", 1301 1253 }, 1302 1254 [OP_PUTROOTFH] = { 1303 1255 .op_func = (nfsd4op_func)nfsd4_putrootfh, 1304 - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, 1256 + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS 1257 + | OP_IS_PUTFH_LIKE, 1305 1258 .op_name = "OP_PUTROOTFH", 1306 1259 }, 1307 1260 [OP_READ] = { ··· 1334 1281 }, 1335 1282 [OP_RESTOREFH] = { 1336 1283 .op_func = (nfsd4op_func)nfsd4_restorefh, 1337 - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, 1284 + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS 1285 + | OP_IS_PUTFH_LIKE, 1338 1286 .op_name = "OP_RESTOREFH", 1339 1287 }, 1340 1288 [OP_SAVEFH] = { 1341 1289 .op_func = (nfsd4op_func)nfsd4_savefh, 1290 + .op_flags = OP_HANDLES_WRONGSEC, 1342 1291 .op_name = "OP_SAVEFH", 1343 1292 }, 1344 1293 [OP_SECINFO] = { 1345 1294 .op_func = (nfsd4op_func)nfsd4_secinfo, 1295 + .op_flags = OP_HANDLES_WRONGSEC, 1346 1296 .op_name = "OP_SECINFO", 1347 1297 }, 1348 1298 [OP_SETATTR] = { ··· 1409 1353 }, 1410 1354 [OP_SECINFO_NO_NAME] = { 1411 1355 .op_func = (nfsd4op_func)nfsd4_secinfo_no_name, 1356 + .op_flags = OP_HANDLES_WRONGSEC, 1412 1357 .op_name = "OP_SECINFO_NO_NAME", 1413 1358 }, 1414 1359 };
+33 -9
fs/nfsd/nfs4state.c
··· 1519 1519 bool confirm_me = false; 1520 1520 int status = 0; 1521 1521 1522 + if (cr_ses->flags & ~SESSION4_FLAG_MASK_A) 1523 + return nfserr_inval; 1524 + 1522 1525 nfs4_lock_state(); 1523 1526 unconf = find_unconfirmed_client(&cr_ses->clientid); 1524 1527 conf = find_confirmed_client(&cr_ses->clientid); ··· 1640 1637 return nfserr_badsession; 1641 1638 1642 1639 status = nfsd4_map_bcts_dir(&bcts->dir); 1643 - nfsd4_new_conn(rqstp, cstate->session, bcts->dir); 1644 - return nfs_ok; 1640 + if (!status) 1641 + nfsd4_new_conn(rqstp, cstate->session, bcts->dir); 1642 + return status; 1645 1643 } 1646 1644 1647 1645 static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid) ··· 1729 1725 return; 1730 1726 } 1731 1727 1728 + static bool nfsd4_session_too_many_ops(struct svc_rqst *rqstp, struct nfsd4_session *session) 1729 + { 1730 + struct nfsd4_compoundargs *args = rqstp->rq_argp; 1731 + 1732 + return args->opcnt > session->se_fchannel.maxops; 1733 + } 1734 + 1732 1735 __be32 1733 1736 nfsd4_sequence(struct svc_rqst *rqstp, 1734 1737 struct nfsd4_compound_state *cstate, ··· 1762 1751 status = nfserr_badsession; 1763 1752 session = find_in_sessionid_hashtbl(&seq->sessionid); 1764 1753 if (!session) 1754 + goto out; 1755 + 1756 + status = nfserr_too_many_ops; 1757 + if (nfsd4_session_too_many_ops(rqstp, session)) 1765 1758 goto out; 1766 1759 1767 1760 status = nfserr_badslot; ··· 1823 1808 __be32 1824 1809 nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_reclaim_complete *rc) 1825 1810 { 1811 + int status = 0; 1812 + 1826 1813 if (rc->rca_one_fs) { 1827 1814 if (!cstate->current_fh.fh_dentry) 1828 1815 return nfserr_nofilehandle; ··· 1834 1817 */ 1835 1818 return nfs_ok; 1836 1819 } 1820 + 1837 1821 nfs4_lock_state(); 1838 - if (is_client_expired(cstate->session->se_client)) { 1839 - nfs4_unlock_state(); 1822 + status = nfserr_complete_already; 1823 + if (cstate->session->se_client->cl_firststate) 1824 + goto out; 1825 + 1826 + status = nfserr_stale_clientid; 1827 + if (is_client_expired(cstate->session->se_client)) 1840 1828 /* 1841 1829 * The following error isn't really legal. 1842 1830 * But we only get here if the client just explicitly ··· 1849 1827 * error it gets back on an operation for the dead 1850 1828 * client. 1851 1829 */ 1852 - return nfserr_stale_clientid; 1853 - } 1830 + goto out; 1831 + 1832 + status = nfs_ok; 1854 1833 nfsd4_create_clid_dir(cstate->session->se_client); 1834 + out: 1855 1835 nfs4_unlock_state(); 1856 - return nfs_ok; 1836 + return status; 1857 1837 } 1858 1838 1859 1839 __be32 ··· 2486 2462 return NULL; 2487 2463 } 2488 2464 2489 - int share_access_to_flags(u32 share_access) 2465 + static int share_access_to_flags(u32 share_access) 2490 2466 { 2491 2467 share_access &= ~NFS4_SHARE_WANT_MASK; 2492 2468 ··· 2906 2882 return status; 2907 2883 } 2908 2884 2909 - struct lock_manager nfsd4_manager = { 2885 + static struct lock_manager nfsd4_manager = { 2910 2886 }; 2911 2887 2912 2888 static void
+3 -8
fs/nfsd/nfs4xdr.c
··· 424 424 static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts) 425 425 { 426 426 DECODE_HEAD; 427 - u32 dummy; 428 427 429 428 READ_BUF(NFS4_MAX_SESSIONID_LEN + 8); 430 429 COPYMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN); 431 430 READ32(bcts->dir); 432 - /* XXX: Perhaps Tom Tucker could help us figure out how we 433 - * should be using ctsa_use_conn_in_rdma_mode: */ 434 - READ32(dummy); 435 - 431 + /* XXX: skipping ctsa_use_conn_in_rdma_mode. Perhaps Tom Tucker 432 + * could help us figure out we should be using it. */ 436 433 DECODE_TAIL; 437 434 } 438 435 ··· 585 588 READ_BUF(lockt->lt_owner.len); 586 589 READMEM(lockt->lt_owner.data, lockt->lt_owner.len); 587 590 588 - if (argp->minorversion && !zero_clientid(&lockt->lt_clientid)) 589 - return nfserr_inval; 590 591 DECODE_TAIL; 591 592 } 592 593 ··· 3115 3120 return nfserr; 3116 3121 } 3117 3122 3118 - __be32 3123 + static __be32 3119 3124 nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr, 3120 3125 struct nfsd4_sequence *seq) 3121 3126 {
+1 -1
fs/nfsd/nfsfh.c
··· 344 344 * which clients virtually always use auth_sys for, 345 345 * even while using RPCSEC_GSS for NFS. 346 346 */ 347 - if (access & NFSD_MAY_LOCK) 347 + if (access & NFSD_MAY_LOCK || access & NFSD_MAY_BYPASS_GSS) 348 348 goto skip_pseudoflavor_check; 349 349 /* 350 350 * Clients may expect to be able to use auth_sys during mount,
+20 -13
fs/nfsd/vfs.c
··· 181 181 struct svc_export *exp; 182 182 struct dentry *dparent; 183 183 struct dentry *dentry; 184 - __be32 err; 185 184 int host_err; 186 185 187 186 dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name); 188 - 189 - /* Obtain dentry and export. */ 190 - err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC); 191 - if (err) 192 - return err; 193 187 194 188 dparent = fhp->fh_dentry; 195 189 exp = fhp->fh_export; ··· 248 254 struct dentry *dentry; 249 255 __be32 err; 250 256 257 + err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC); 258 + if (err) 259 + return err; 251 260 err = nfsd_lookup_dentry(rqstp, fhp, name, len, &exp, &dentry); 252 261 if (err) 253 262 return err; ··· 874 877 nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, 875 878 loff_t offset, struct kvec *vec, int vlen, unsigned long *count) 876 879 { 877 - struct inode *inode; 878 880 mm_segment_t oldfs; 879 881 __be32 err; 880 882 int host_err; 881 883 882 884 err = nfserr_perm; 883 - inode = file->f_path.dentry->d_inode; 884 885 885 886 if (file->f_op->splice_read && rqstp->rq_splice_ok) { 886 887 struct splice_desc sd = { ··· 1335 1340 } 1336 1341 1337 1342 #ifdef CONFIG_NFSD_V3 1343 + 1344 + static inline int nfsd_create_is_exclusive(int createmode) 1345 + { 1346 + return createmode == NFS3_CREATE_EXCLUSIVE 1347 + || createmode == NFS4_CREATE_EXCLUSIVE4_1; 1348 + } 1349 + 1338 1350 /* 1339 - * NFSv3 version of nfsd_create 1351 + * NFSv3 and NFSv4 version of nfsd_create 1340 1352 */ 1341 1353 __be32 1342 - nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, 1354 + do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, 1343 1355 char *fname, int flen, struct iattr *iap, 1344 1356 struct svc_fh *resfhp, int createmode, u32 *verifier, 1345 1357 int *truncp, int *created) ··· 1398 1396 if (err) 1399 1397 goto out; 1400 1398 1401 - if (createmode == NFS3_CREATE_EXCLUSIVE) { 1399 + if (nfsd_create_is_exclusive(createmode)) { 1402 1400 /* solaris7 gets confused (bugid 4218508) if these have 1403 1401 * the high bit set, so just clear the high bits. If this is 1404 1402 * ever changed to use different attrs for storing the ··· 1439 1437 && dchild->d_inode->i_atime.tv_sec == v_atime 1440 1438 && dchild->d_inode->i_size == 0 ) 1441 1439 break; 1440 + case NFS4_CREATE_EXCLUSIVE4_1: 1441 + if ( dchild->d_inode->i_mtime.tv_sec == v_mtime 1442 + && dchild->d_inode->i_atime.tv_sec == v_atime 1443 + && dchild->d_inode->i_size == 0 ) 1444 + goto set_attr; 1442 1445 /* fallthru */ 1443 1446 case NFS3_CREATE_GUARDED: 1444 1447 err = nfserr_exist; ··· 1462 1455 1463 1456 nfsd_check_ignore_resizing(iap); 1464 1457 1465 - if (createmode == NFS3_CREATE_EXCLUSIVE) { 1458 + if (nfsd_create_is_exclusive(createmode)) { 1466 1459 /* Cram the verifier into atime/mtime */ 1467 1460 iap->ia_valid = ATTR_MTIME|ATTR_ATIME 1468 1461 | ATTR_MTIME_SET|ATTR_ATIME_SET; ··· 2041 2034 struct inode *inode = dentry->d_inode; 2042 2035 int err; 2043 2036 2044 - if (acc == NFSD_MAY_NOP) 2037 + if ((acc & NFSD_MAY_MASK) == NFSD_MAY_NOP) 2045 2038 return 0; 2046 2039 #if 0 2047 2040 dprintk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s\n",
+5 -1
fs/nfsd/vfs.h
··· 17 17 #define NFSD_MAY_SATTR 8 18 18 #define NFSD_MAY_TRUNC 16 19 19 #define NFSD_MAY_LOCK 32 20 + #define NFSD_MAY_MASK 63 21 + 22 + /* extra hints to permission and open routines: */ 20 23 #define NFSD_MAY_OWNER_OVERRIDE 64 21 24 #define NFSD_MAY_LOCAL_ACCESS 128 /* IRIX doing local access check on device special file*/ 22 25 #define NFSD_MAY_BYPASS_GSS_ON_ROOT 256 23 26 #define NFSD_MAY_NOT_BREAK_LEASE 512 27 + #define NFSD_MAY_BYPASS_GSS 1024 24 28 25 29 #define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE) 26 30 #define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC) ··· 58 54 int type, dev_t rdev, struct svc_fh *res); 59 55 #ifdef CONFIG_NFSD_V3 60 56 __be32 nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *); 61 - __be32 nfsd_create_v3(struct svc_rqst *, struct svc_fh *, 57 + __be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *, 62 58 char *name, int len, struct iattr *attrs, 63 59 struct svc_fh *res, int createmode, 64 60 u32 *verifier, int *truncp, int *created);
+5 -3
include/linux/nfs4.h
··· 570 570 }; 571 571 572 572 /* Create Session Flags */ 573 - #define SESSION4_PERSIST 0x001 574 - #define SESSION4_BACK_CHAN 0x002 575 - #define SESSION4_RDMA 0x004 573 + #define SESSION4_PERSIST 0x001 574 + #define SESSION4_BACK_CHAN 0x002 575 + #define SESSION4_RDMA 0x004 576 + 577 + #define SESSION4_FLAG_MASK_A 0x007 576 578 577 579 enum state_protect_how4 { 578 580 SP4_NONE = 0,
+1
include/linux/sunrpc/svcsock.h
··· 28 28 /* private TCP part */ 29 29 u32 sk_reclen; /* length of record */ 30 30 u32 sk_tcplen; /* current read length */ 31 + struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */ 31 32 }; 32 33 33 34 /*
+188 -148
net/sunrpc/svcsock.c
··· 387 387 return len; 388 388 } 389 389 390 + static int svc_partial_recvfrom(struct svc_rqst *rqstp, 391 + struct kvec *iov, int nr, 392 + int buflen, unsigned int base) 393 + { 394 + size_t save_iovlen; 395 + void __user *save_iovbase; 396 + unsigned int i; 397 + int ret; 398 + 399 + if (base == 0) 400 + return svc_recvfrom(rqstp, iov, nr, buflen); 401 + 402 + for (i = 0; i < nr; i++) { 403 + if (iov[i].iov_len > base) 404 + break; 405 + base -= iov[i].iov_len; 406 + } 407 + save_iovlen = iov[i].iov_len; 408 + save_iovbase = iov[i].iov_base; 409 + iov[i].iov_len -= base; 410 + iov[i].iov_base += base; 411 + ret = svc_recvfrom(rqstp, &iov[i], nr - i, buflen); 412 + iov[i].iov_len = save_iovlen; 413 + iov[i].iov_base = save_iovbase; 414 + return ret; 415 + } 416 + 390 417 /* 391 418 * Set socket snd and rcv buffer lengths 392 419 */ ··· 436 409 lock_sock(sock->sk); 437 410 sock->sk->sk_sndbuf = snd * 2; 438 411 sock->sk->sk_rcvbuf = rcv * 2; 439 - sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK; 440 412 sock->sk->sk_write_space(sock->sk); 441 413 release_sock(sock->sk); 442 414 #endif ··· 910 884 return NULL; 911 885 } 912 886 887 + static unsigned int svc_tcp_restore_pages(struct svc_sock *svsk, struct svc_rqst *rqstp) 888 + { 889 + unsigned int i, len, npages; 890 + 891 + if (svsk->sk_tcplen <= sizeof(rpc_fraghdr)) 892 + return 0; 893 + len = svsk->sk_tcplen - sizeof(rpc_fraghdr); 894 + npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; 895 + for (i = 0; i < npages; i++) { 896 + if (rqstp->rq_pages[i] != NULL) 897 + put_page(rqstp->rq_pages[i]); 898 + BUG_ON(svsk->sk_pages[i] == NULL); 899 + rqstp->rq_pages[i] = svsk->sk_pages[i]; 900 + svsk->sk_pages[i] = NULL; 901 + } 902 + rqstp->rq_arg.head[0].iov_base = page_address(rqstp->rq_pages[0]); 903 + return len; 904 + } 905 + 906 + static void svc_tcp_save_pages(struct svc_sock *svsk, struct svc_rqst *rqstp) 907 + { 908 + unsigned int i, len, npages; 909 + 910 + if (svsk->sk_tcplen <= sizeof(rpc_fraghdr)) 911 + return; 912 + len = svsk->sk_tcplen - sizeof(rpc_fraghdr); 913 + npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; 914 + for (i = 0; i < npages; i++) { 915 + svsk->sk_pages[i] = rqstp->rq_pages[i]; 916 + rqstp->rq_pages[i] = NULL; 917 + } 918 + } 919 + 920 + static void svc_tcp_clear_pages(struct svc_sock *svsk) 921 + { 922 + unsigned int i, len, npages; 923 + 924 + if (svsk->sk_tcplen <= sizeof(rpc_fraghdr)) 925 + goto out; 926 + len = svsk->sk_tcplen - sizeof(rpc_fraghdr); 927 + npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; 928 + for (i = 0; i < npages; i++) { 929 + BUG_ON(svsk->sk_pages[i] == NULL); 930 + put_page(svsk->sk_pages[i]); 931 + svsk->sk_pages[i] = NULL; 932 + } 933 + out: 934 + svsk->sk_tcplen = 0; 935 + } 936 + 913 937 /* 914 938 * Receive data. 915 939 * If we haven't gotten the record length yet, get the next four bytes. ··· 969 893 static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) 970 894 { 971 895 struct svc_serv *serv = svsk->sk_xprt.xpt_server; 896 + unsigned int want; 972 897 int len; 973 - 974 - if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags)) 975 - /* sndbuf needs to have room for one request 976 - * per thread, otherwise we can stall even when the 977 - * network isn't a bottleneck. 978 - * 979 - * We count all threads rather than threads in a 980 - * particular pool, which provides an upper bound 981 - * on the number of threads which will access the socket. 982 - * 983 - * rcvbuf just needs to be able to hold a few requests. 984 - * Normally they will be removed from the queue 985 - * as soon a a complete request arrives. 986 - */ 987 - svc_sock_setbufsize(svsk->sk_sock, 988 - (serv->sv_nrthreads+3) * serv->sv_max_mesg, 989 - 3 * serv->sv_max_mesg); 990 898 991 899 clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 992 900 993 901 if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) { 994 - int want = sizeof(rpc_fraghdr) - svsk->sk_tcplen; 995 902 struct kvec iov; 996 903 904 + want = sizeof(rpc_fraghdr) - svsk->sk_tcplen; 997 905 iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen; 998 906 iov.iov_len = want; 999 907 if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0) ··· 987 927 if (len < want) { 988 928 dprintk("svc: short recvfrom while reading record " 989 929 "length (%d of %d)\n", len, want); 990 - goto err_again; /* record header not complete */ 930 + return -EAGAIN; 991 931 } 992 932 993 933 svsk->sk_reclen = ntohl(svsk->sk_reclen); ··· 1014 954 } 1015 955 } 1016 956 1017 - /* Check whether enough data is available */ 1018 - len = svc_recv_available(svsk); 1019 - if (len < 0) 1020 - goto error; 957 + if (svsk->sk_reclen < 8) 958 + goto err_delete; /* client is nuts. */ 1021 959 1022 - if (len < svsk->sk_reclen) { 1023 - dprintk("svc: incomplete TCP record (%d of %d)\n", 1024 - len, svsk->sk_reclen); 1025 - goto err_again; /* record not complete */ 1026 - } 1027 960 len = svsk->sk_reclen; 1028 - set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 1029 961 1030 962 return len; 1031 - error: 1032 - if (len == -EAGAIN) 1033 - dprintk("RPC: TCP recv_record got EAGAIN\n"); 963 + error: 964 + dprintk("RPC: TCP recv_record got %d\n", len); 1034 965 return len; 1035 - err_delete: 966 + err_delete: 1036 967 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); 1037 - err_again: 1038 968 return -EAGAIN; 1039 969 } 1040 970 1041 - static int svc_process_calldir(struct svc_sock *svsk, struct svc_rqst *rqstp, 1042 - struct rpc_rqst **reqpp, struct kvec *vec) 971 + static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp) 1043 972 { 973 + struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt; 1044 974 struct rpc_rqst *req = NULL; 1045 - u32 *p; 1046 - u32 xid; 1047 - u32 calldir; 1048 - int len; 975 + struct kvec *src, *dst; 976 + __be32 *p = (__be32 *)rqstp->rq_arg.head[0].iov_base; 977 + __be32 xid; 978 + __be32 calldir; 1049 979 1050 - len = svc_recvfrom(rqstp, vec, 1, 8); 1051 - if (len < 0) 1052 - goto error; 1053 - 1054 - p = (u32 *)rqstp->rq_arg.head[0].iov_base; 1055 980 xid = *p++; 1056 981 calldir = *p; 1057 982 1058 - if (calldir == 0) { 1059 - /* REQUEST is the most common case */ 1060 - vec[0] = rqstp->rq_arg.head[0]; 1061 - } else { 1062 - /* REPLY */ 1063 - struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt; 983 + if (bc_xprt) 984 + req = xprt_lookup_rqst(bc_xprt, xid); 1064 985 1065 - if (bc_xprt) 1066 - req = xprt_lookup_rqst(bc_xprt, xid); 1067 - 1068 - if (!req) { 1069 - printk(KERN_NOTICE 1070 - "%s: Got unrecognized reply: " 1071 - "calldir 0x%x xpt_bc_xprt %p xid %08x\n", 1072 - __func__, ntohl(calldir), 1073 - bc_xprt, xid); 1074 - vec[0] = rqstp->rq_arg.head[0]; 1075 - goto out; 1076 - } 1077 - 1078 - memcpy(&req->rq_private_buf, &req->rq_rcv_buf, 1079 - sizeof(struct xdr_buf)); 1080 - /* copy the xid and call direction */ 1081 - memcpy(req->rq_private_buf.head[0].iov_base, 1082 - rqstp->rq_arg.head[0].iov_base, 8); 1083 - vec[0] = req->rq_private_buf.head[0]; 986 + if (!req) { 987 + printk(KERN_NOTICE 988 + "%s: Got unrecognized reply: " 989 + "calldir 0x%x xpt_bc_xprt %p xid %08x\n", 990 + __func__, ntohl(calldir), 991 + bc_xprt, xid); 992 + return -EAGAIN; 1084 993 } 1085 - out: 1086 - vec[0].iov_base += 8; 1087 - vec[0].iov_len -= 8; 1088 - len = svsk->sk_reclen - 8; 1089 - error: 1090 - *reqpp = req; 1091 - return len; 994 + 995 + memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf)); 996 + /* 997 + * XXX!: cheating for now! Only copying HEAD. 998 + * But we know this is good enough for now (in fact, for any 999 + * callback reply in the forseeable future). 1000 + */ 1001 + dst = &req->rq_private_buf.head[0]; 1002 + src = &rqstp->rq_arg.head[0]; 1003 + if (dst->iov_len < src->iov_len) 1004 + return -EAGAIN; /* whatever; just giving up. */ 1005 + memcpy(dst->iov_base, src->iov_base, src->iov_len); 1006 + xprt_complete_rqst(req->rq_task, svsk->sk_reclen); 1007 + rqstp->rq_arg.len = 0; 1008 + return 0; 1092 1009 } 1010 + 1011 + static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len) 1012 + { 1013 + int i = 0; 1014 + int t = 0; 1015 + 1016 + while (t < len) { 1017 + vec[i].iov_base = page_address(pages[i]); 1018 + vec[i].iov_len = PAGE_SIZE; 1019 + i++; 1020 + t += PAGE_SIZE; 1021 + } 1022 + return i; 1023 + } 1024 + 1093 1025 1094 1026 /* 1095 1027 * Receive data from a TCP socket. ··· 1093 1041 struct svc_serv *serv = svsk->sk_xprt.xpt_server; 1094 1042 int len; 1095 1043 struct kvec *vec; 1096 - int pnum, vlen; 1097 - struct rpc_rqst *req = NULL; 1044 + unsigned int want, base; 1045 + __be32 *p; 1046 + __be32 calldir; 1047 + int pnum; 1098 1048 1099 1049 dprintk("svc: tcp_recv %p data %d conn %d close %d\n", 1100 1050 svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags), ··· 1107 1053 if (len < 0) 1108 1054 goto error; 1109 1055 1056 + base = svc_tcp_restore_pages(svsk, rqstp); 1057 + want = svsk->sk_reclen - base; 1058 + 1110 1059 vec = rqstp->rq_vec; 1111 - vec[0] = rqstp->rq_arg.head[0]; 1112 - vlen = PAGE_SIZE; 1113 1060 1114 - /* 1115 - * We have enough data for the whole tcp record. Let's try and read the 1116 - * first 8 bytes to get the xid and the call direction. We can use this 1117 - * to figure out if this is a call or a reply to a callback. If 1118 - * sk_reclen is < 8 (xid and calldir), then this is a malformed packet. 1119 - * In that case, don't bother with the calldir and just read the data. 1120 - * It will be rejected in svc_process. 1121 - */ 1122 - if (len >= 8) { 1123 - len = svc_process_calldir(svsk, rqstp, &req, vec); 1124 - if (len < 0) 1125 - goto err_again; 1126 - vlen -= 8; 1127 - } 1061 + pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0], 1062 + svsk->sk_reclen); 1128 1063 1129 - pnum = 1; 1130 - while (vlen < len) { 1131 - vec[pnum].iov_base = (req) ? 1132 - page_address(req->rq_private_buf.pages[pnum - 1]) : 1133 - page_address(rqstp->rq_pages[pnum]); 1134 - vec[pnum].iov_len = PAGE_SIZE; 1135 - pnum++; 1136 - vlen += PAGE_SIZE; 1137 - } 1138 1064 rqstp->rq_respages = &rqstp->rq_pages[pnum]; 1139 1065 1140 1066 /* Now receive data */ 1141 - len = svc_recvfrom(rqstp, vec, pnum, len); 1142 - if (len < 0) 1143 - goto err_again; 1144 - 1145 - /* 1146 - * Account for the 8 bytes we read earlier 1147 - */ 1148 - len += 8; 1149 - 1150 - if (req) { 1151 - xprt_complete_rqst(req->rq_task, len); 1152 - len = 0; 1153 - goto out; 1067 + len = svc_partial_recvfrom(rqstp, vec, pnum, want, base); 1068 + if (len >= 0) 1069 + svsk->sk_tcplen += len; 1070 + if (len != want) { 1071 + if (len < 0 && len != -EAGAIN) 1072 + goto err_other; 1073 + svc_tcp_save_pages(svsk, rqstp); 1074 + dprintk("svc: incomplete TCP record (%d of %d)\n", 1075 + svsk->sk_tcplen, svsk->sk_reclen); 1076 + goto err_noclose; 1154 1077 } 1155 - dprintk("svc: TCP complete record (%d bytes)\n", len); 1156 - rqstp->rq_arg.len = len; 1078 + 1079 + rqstp->rq_arg.len = svsk->sk_reclen; 1157 1080 rqstp->rq_arg.page_base = 0; 1158 - if (len <= rqstp->rq_arg.head[0].iov_len) { 1159 - rqstp->rq_arg.head[0].iov_len = len; 1081 + if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) { 1082 + rqstp->rq_arg.head[0].iov_len = rqstp->rq_arg.len; 1160 1083 rqstp->rq_arg.page_len = 0; 1161 - } else { 1162 - rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; 1163 - } 1084 + } else 1085 + rqstp->rq_arg.page_len = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len; 1164 1086 1165 1087 rqstp->rq_xprt_ctxt = NULL; 1166 1088 rqstp->rq_prot = IPPROTO_TCP; 1167 1089 1168 - out: 1090 + p = (__be32 *)rqstp->rq_arg.head[0].iov_base; 1091 + calldir = p[1]; 1092 + if (calldir) 1093 + len = receive_cb_reply(svsk, rqstp); 1094 + 1169 1095 /* Reset TCP read info */ 1170 1096 svsk->sk_reclen = 0; 1171 1097 svsk->sk_tcplen = 0; 1098 + /* If we have more data, signal svc_xprt_enqueue() to try again */ 1099 + if (svc_recv_available(svsk) > sizeof(rpc_fraghdr)) 1100 + set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 1101 + 1102 + if (len < 0) 1103 + goto error; 1172 1104 1173 1105 svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt); 1174 1106 if (serv->sv_stats) 1175 1107 serv->sv_stats->nettcpcnt++; 1176 1108 1177 - return len; 1109 + dprintk("svc: TCP complete record (%d bytes)\n", rqstp->rq_arg.len); 1110 + return rqstp->rq_arg.len; 1178 1111 1179 - err_again: 1180 - if (len == -EAGAIN) { 1181 - dprintk("RPC: TCP recvfrom got EAGAIN\n"); 1182 - return len; 1183 - } 1184 1112 error: 1185 - if (len != -EAGAIN) { 1186 - printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", 1187 - svsk->sk_xprt.xpt_server->sv_name, -len); 1188 - set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); 1189 - } 1113 + if (len != -EAGAIN) 1114 + goto err_other; 1115 + dprintk("RPC: TCP recvfrom got EAGAIN\n"); 1190 1116 return -EAGAIN; 1117 + err_other: 1118 + printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", 1119 + svsk->sk_xprt.xpt_server->sv_name, -len); 1120 + set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); 1121 + err_noclose: 1122 + return -EAGAIN; /* record not complete */ 1191 1123 } 1192 1124 1193 1125 /* ··· 1344 1304 1345 1305 svsk->sk_reclen = 0; 1346 1306 svsk->sk_tcplen = 0; 1307 + memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages)); 1347 1308 1348 1309 tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; 1349 1310 1350 - /* initialise setting must have enough space to 1351 - * receive and respond to one request. 1352 - * svc_tcp_recvfrom will re-adjust if necessary 1353 - */ 1354 - svc_sock_setbufsize(svsk->sk_sock, 1355 - 3 * svsk->sk_xprt.xpt_server->sv_max_mesg, 1356 - 3 * svsk->sk_xprt.xpt_server->sv_max_mesg); 1357 - 1358 - set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); 1359 1311 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 1360 1312 if (sk->sk_state != TCP_ESTABLISHED) 1361 1313 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); ··· 1411 1379 /* Initialize the socket */ 1412 1380 if (sock->type == SOCK_DGRAM) 1413 1381 svc_udp_init(svsk, serv); 1414 - else 1382 + else { 1383 + /* initialise setting must have enough space to 1384 + * receive and respond to one request. 1385 + */ 1386 + svc_sock_setbufsize(svsk->sk_sock, 4 * serv->sv_max_mesg, 1387 + 4 * serv->sv_max_mesg); 1415 1388 svc_tcp_init(svsk, serv); 1389 + } 1416 1390 1417 1391 dprintk("svc: svc_setup_socket created %p (inet %p)\n", 1418 1392 svsk, svsk->sk_sk); ··· 1600 1562 1601 1563 svc_sock_detach(xprt); 1602 1564 1603 - if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) 1565 + if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) { 1566 + svc_tcp_clear_pages(svsk); 1604 1567 kernel_sock_shutdown(svsk->sk_sock, SHUT_RDWR); 1568 + } 1605 1569 } 1606 1570 1607 1571 /*