Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'nfs-for-6.18-1' of git://git.linux-nfs.org/projects/anna/linux-nfs

Pull NFS client updates from Anna Schumaker:
"New Features:
- Add a Kconfig option to redirect dfprintk() to the trace buffer
- Enable use of the RWF_DONTCACHE flag on the NFS client
- Add striped layout handling to pNFS flexfiles
- Add proper localio handling for READ and WRITE O_DIRECT

Bugfixes:
- Handle NFS4ERR_GRACE errors during delegation recall
- Fix NFSv4.1 backchannel max_resp_sz verification check
- Fix mount hang after CREATE_SESSION failure
- Fix d_parent->d_inode locking in nfs4_setup_readdir()

Other Cleanups and Improvements:
- Improvements to write handling tracepoints
- Fix a few trivial spelling mistakes
- Cleanups to the rpcbind cleanup call sites
- Convert the SUNRPC xdr_buf to use a scratch folio instead of
scratch page
- Remove unused NFS_WBACK_BUSY() macro
- Remove __GFP_NOWARN flags
- Unexport rpc_malloc() and rpc_free()"

* tag 'nfs-for-6.18-1' of git://git.linux-nfs.org/projects/anna/linux-nfs: (46 commits)
NFS: add basic STATX_DIOALIGN and STATX_DIO_READ_ALIGN support
nfs/localio: add tracepoints for misaligned DIO READ and WRITE support
nfs/localio: add proper O_DIRECT support for READ and WRITE
nfs/localio: refactor iocb initialization
nfs/localio: refactor iocb and iov_iter_bvec initialization
nfs/localio: avoid issuing misaligned IO using O_DIRECT
nfs/localio: make trace_nfs_local_open_fh more useful
NFSD: filecache: add STATX_DIOALIGN and STATX_DIO_READ_ALIGN support
sunrpc: unexport rpc_malloc() and rpc_free()
NFSv4/flexfiles: Add support for striped layouts
NFSv4/flexfiles: Update layout stats & error paths for striped layouts
NFSv4/flexfiles: Write path updates for striped layouts
NFSv4/flexfiles: Commit path updates for striped layouts
NFSv4/flexfiles: Read path updates for striped layouts
NFSv4/flexfiles: Update low level helper functions to be DS stripe aware.
NFSv4/flexfiles: Add data structure support for striped layouts
NFSv4/flexfiles: Use ds_commit_idx when marking a write commit
NFSv4/flexfiles: Remove cred local variable dependency
nfs4_setup_readdir(): insufficient locking for ->d_parent->d_inode dereferencing
NFS: Enable use of the RWF_DONTCACHE flag on the NFS client
...

+1469 -556
+2 -4
fs/lockd/svc.c
··· 216 216 if (warned++ == 0) 217 217 printk(KERN_WARNING 218 218 "lockd_up: makesock failed, error=%d\n", err); 219 - svc_xprt_destroy_all(serv, net); 220 - svc_rpcb_cleanup(serv, net); 219 + svc_xprt_destroy_all(serv, net, true); 221 220 return err; 222 221 } 223 222 ··· 254 255 nlm_shutdown_hosts_net(net); 255 256 cancel_delayed_work_sync(&ln->grace_period_end); 256 257 locks_end_grace(&ln->lockd_manager); 257 - svc_xprt_destroy_all(serv, net); 258 - svc_rpcb_cleanup(serv, net); 258 + svc_xprt_destroy_all(serv, net, true); 259 259 } 260 260 } else { 261 261 pr_err("%s: no users! net=%x\n",
+4 -4
fs/nfs/blocklayout/blocklayout.c
··· 676 676 struct pnfs_layout_segment *lseg; 677 677 struct xdr_buf buf; 678 678 struct xdr_stream xdr; 679 - struct page *scratch; 679 + struct folio *scratch; 680 680 int status, i; 681 681 uint32_t count; 682 682 __be32 *p; ··· 689 689 return ERR_PTR(-ENOMEM); 690 690 691 691 status = -ENOMEM; 692 - scratch = alloc_page(gfp_mask); 692 + scratch = folio_alloc(gfp_mask, 0); 693 693 if (!scratch) 694 694 goto out; 695 695 696 696 xdr_init_decode_pages(&xdr, &buf, 697 697 lgr->layoutp->pages, lgr->layoutp->len); 698 - xdr_set_scratch_page(&xdr, scratch); 698 + xdr_set_scratch_folio(&xdr, scratch); 699 699 700 700 status = -EIO; 701 701 p = xdr_inline_decode(&xdr, 4); ··· 744 744 } 745 745 746 746 out_free_scratch: 747 - __free_page(scratch); 747 + folio_put(scratch); 748 748 out: 749 749 dprintk("%s returns %d\n", __func__, status); 750 750 switch (status) {
+4 -4
fs/nfs/blocklayout/dev.c
··· 541 541 struct pnfs_block_dev *top; 542 542 struct xdr_stream xdr; 543 543 struct xdr_buf buf; 544 - struct page *scratch; 544 + struct folio *scratch; 545 545 int nr_volumes, ret, i; 546 546 __be32 *p; 547 547 548 - scratch = alloc_page(gfp_mask); 548 + scratch = folio_alloc(gfp_mask, 0); 549 549 if (!scratch) 550 550 goto out; 551 551 552 552 xdr_init_decode_pages(&xdr, &buf, pdev->pages, pdev->pglen); 553 - xdr_set_scratch_page(&xdr, scratch); 553 + xdr_set_scratch_folio(&xdr, scratch); 554 554 555 555 p = xdr_inline_decode(&xdr, sizeof(__be32)); 556 556 if (!p) ··· 582 582 out_free_volumes: 583 583 kfree(volumes); 584 584 out_free_scratch: 585 - __free_page(scratch); 585 + folio_put(scratch); 586 586 out: 587 587 return node; 588 588 }
+4 -6
fs/nfs/callback.c
··· 136 136 return; 137 137 138 138 dprintk("NFS: destroy per-net callback data; net=%x\n", net->ns.inum); 139 - svc_xprt_destroy_all(serv, net); 139 + svc_xprt_destroy_all(serv, net, false); 140 140 } 141 141 142 142 static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, ··· 153 153 ret = svc_bind(serv, net); 154 154 if (ret < 0) { 155 155 printk(KERN_WARNING "NFS: bind callback service failed\n"); 156 - goto err_bind; 156 + goto err; 157 157 } 158 158 159 159 ret = 0; ··· 166 166 167 167 if (ret < 0) { 168 168 printk(KERN_ERR "NFS: callback service start failed\n"); 169 - goto err_socks; 169 + goto err; 170 170 } 171 171 return 0; 172 172 173 - err_socks: 174 - svc_rpcb_cleanup(serv, net); 175 - err_bind: 173 + err: 176 174 nn->cb_users[minorversion]--; 177 175 dprintk("NFS: Couldn't create callback socket: err = %d; " 178 176 "net = %x\n", ret, net->ns.inum);
+4 -4
fs/nfs/dir.c
··· 829 829 struct address_space *mapping = desc->file->f_mapping; 830 830 struct folio *new, *folio = *arrays; 831 831 struct xdr_stream stream; 832 - struct page *scratch; 832 + struct folio *scratch; 833 833 struct xdr_buf buf; 834 834 u64 cookie; 835 835 int status; 836 836 837 - scratch = alloc_page(GFP_KERNEL); 837 + scratch = folio_alloc(GFP_KERNEL, 0); 838 838 if (scratch == NULL) 839 839 return -ENOMEM; 840 840 841 841 xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen); 842 - xdr_set_scratch_page(&stream, scratch); 842 + xdr_set_scratch_folio(&stream, scratch); 843 843 844 844 do { 845 845 status = nfs_readdir_entry_decode(desc, entry, &stream); ··· 891 891 if (folio != *arrays) 892 892 nfs_readdir_folio_unlock_and_put(folio); 893 893 894 - put_page(scratch); 894 + folio_put(scratch); 895 895 return status; 896 896 } 897 897
+22 -7
fs/nfs/file.c
··· 161 161 struct inode *inode = file_inode(iocb->ki_filp); 162 162 ssize_t result; 163 163 164 + trace_nfs_file_read(iocb, to); 165 + 164 166 if (iocb->ki_flags & IOCB_DIRECT) 165 167 return nfs_file_direct_read(iocb, to, false); 166 168 ··· 363 361 364 362 if (pnfs_ld_read_whole_page(file_inode(file))) 365 363 return true; 364 + if (folio_test_dropbehind(folio)) 365 + return false; 366 366 /* Open for reading too? */ 367 367 if (file->f_mode & FMODE_READ) 368 368 return true; ··· 384 380 loff_t pos, unsigned len, struct folio **foliop, 385 381 void **fsdata) 386 382 { 387 - fgf_t fgp = FGP_WRITEBEGIN; 388 383 struct folio *folio; 389 384 struct file *file = iocb->ki_filp; 390 385 int once_thru = 0; 391 386 int ret; 392 387 388 + trace_nfs_write_begin(file_inode(file), pos, len); 389 + 393 390 dfprintk(PAGECACHE, "NFS: write_begin(%pD2(%lu), %u@%lld)\n", 394 391 file, mapping->host->i_ino, len, (long long) pos); 395 392 nfs_truncate_last_folio(mapping, i_size_read(mapping->host), pos); 396 393 397 - fgp |= fgf_set_order(len); 398 394 start: 399 - folio = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, fgp, 400 - mapping_gfp_mask(mapping)); 401 - if (IS_ERR(folio)) 402 - return PTR_ERR(folio); 395 + folio = write_begin_get_folio(iocb, mapping, pos >> PAGE_SHIFT, len); 396 + if (IS_ERR(folio)) { 397 + ret = PTR_ERR(folio); 398 + goto out; 399 + } 403 400 *foliop = folio; 404 401 405 402 ret = nfs_flush_incompatible(file, folio); ··· 410 405 } else if (!once_thru && 411 406 nfs_want_read_modify_write(file, folio, pos, len)) { 412 407 once_thru = 1; 408 + folio_clear_dropbehind(folio); 413 409 ret = nfs_read_folio(file, folio); 414 410 folio_put(folio); 415 411 if (!ret) 416 412 goto start; 417 413 } 414 + out: 415 + trace_nfs_write_begin_done(file_inode(file), pos, len, ret); 418 416 return ret; 419 417 } 420 418 ··· 431 423 unsigned offset = offset_in_folio(folio, pos); 432 424 int status; 433 425 426 + trace_nfs_write_end(file_inode(file), pos, len); 434 427 dfprintk(PAGECACHE, "NFS: write_end(%pD2(%lu), %u@%lld)\n", 435 428 file, mapping->host->i_ino, len, (long long) pos); 436 429 ··· 460 451 folio_unlock(folio); 461 452 folio_put(folio); 462 453 463 - if (status < 0) 454 + if (status < 0) { 455 + trace_nfs_write_end_done(file_inode(file), pos, len, status); 464 456 return status; 457 + } 465 458 NFS_I(mapping->host)->write_io += copied; 466 459 467 460 if (nfs_ctx_key_to_expire(ctx, mapping->host)) 468 461 nfs_wb_all(mapping->host); 469 462 463 + trace_nfs_write_end_done(file_inode(file), pos, len, copied); 470 464 return copied; 471 465 } 472 466 ··· 701 689 ssize_t result, written; 702 690 errseq_t since; 703 691 int error; 692 + 693 + trace_nfs_file_write(iocb, from); 704 694 705 695 result = nfs_key_timeout_notify(file, inode); 706 696 if (result) ··· 963 949 .splice_write = iter_file_splice_write, 964 950 .check_flags = nfs_check_flags, 965 951 .setlease = simple_nosetlease, 952 + .fop_flags = FOP_DONTCACHE, 966 953 }; 967 954 EXPORT_SYMBOL_GPL(nfs_file_operations);
+5 -5
fs/nfs/filelayout/filelayout.c
··· 646 646 { 647 647 struct xdr_stream stream; 648 648 struct xdr_buf buf; 649 - struct page *scratch; 649 + struct folio *scratch; 650 650 __be32 *p; 651 651 uint32_t nfl_util; 652 652 int i; 653 653 654 654 dprintk("%s: set_layout_map Begin\n", __func__); 655 655 656 - scratch = alloc_page(gfp_flags); 656 + scratch = folio_alloc(gfp_flags, 0); 657 657 if (!scratch) 658 658 return -ENOMEM; 659 659 660 660 xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, lgr->layoutp->len); 661 - xdr_set_scratch_page(&stream, scratch); 661 + xdr_set_scratch_folio(&stream, scratch); 662 662 663 663 /* 20 = ufl_util (4), first_stripe_index (4), pattern_offset (8), 664 664 * num_fh (4) */ ··· 724 724 fl->fh_array[i]->size); 725 725 } 726 726 727 - __free_page(scratch); 727 + folio_put(scratch); 728 728 return 0; 729 729 730 730 out_err: 731 - __free_page(scratch); 731 + folio_put(scratch); 732 732 return -EIO; 733 733 } 734 734
+5 -5
fs/nfs/filelayout/filelayoutdev.c
··· 73 73 struct nfs4_file_layout_dsaddr *dsaddr = NULL; 74 74 struct xdr_stream stream; 75 75 struct xdr_buf buf; 76 - struct page *scratch; 76 + struct folio *scratch; 77 77 struct list_head dsaddrs; 78 78 struct nfs4_pnfs_ds_addr *da; 79 79 struct net *net = server->nfs_client->cl_net; 80 80 81 81 /* set up xdr stream */ 82 - scratch = alloc_page(gfp_flags); 82 + scratch = folio_alloc(gfp_flags, 0); 83 83 if (!scratch) 84 84 goto out_err; 85 85 86 86 xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen); 87 - xdr_set_scratch_page(&stream, scratch); 87 + xdr_set_scratch_folio(&stream, scratch); 88 88 89 89 /* Get the stripe count (number of stripe index) */ 90 90 p = xdr_inline_decode(&stream, 4); ··· 186 186 } 187 187 } 188 188 189 - __free_page(scratch); 189 + folio_put(scratch); 190 190 return dsaddr; 191 191 192 192 out_err_drain_dsaddrs: ··· 204 204 out_err_free_stripe_indices: 205 205 kfree(stripe_indices); 206 206 out_err_free_scratch: 207 - __free_page(scratch); 207 + folio_put(scratch); 208 208 out_err: 209 209 dprintk("%s ERROR: returning NULL\n", __func__); 210 210 return NULL;
+543 -261
fs/nfs/flexfilelayout/flexfilelayout.c
··· 47 47 int dev_limit, enum nfs4_ff_op_type type); 48 48 static void ff_layout_encode_ff_layoutupdate(struct xdr_stream *xdr, 49 49 const struct nfs42_layoutstat_devinfo *devinfo, 50 - struct nfs4_ff_layout_mirror *mirror); 50 + struct nfs4_ff_layout_ds_stripe *dss_info); 51 51 52 52 static struct pnfs_layout_hdr * 53 53 ff_layout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) ··· 164 164 } 165 165 166 166 static struct nfsd_file * 167 - ff_local_open_fh(struct pnfs_layout_segment *lseg, u32 ds_idx, 167 + ff_local_open_fh(struct pnfs_layout_segment *lseg, u32 ds_idx, u32 dss_id, 168 168 struct nfs_client *clp, const struct cred *cred, 169 169 struct nfs_fh *fh, fmode_t mode) 170 170 { 171 171 #if IS_ENABLED(CONFIG_NFS_LOCALIO) 172 172 struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx); 173 173 174 - return nfs_local_open_fh(clp, cred, fh, &mirror->nfl, mode); 174 + return nfs_local_open_fh(clp, cred, fh, &mirror->dss[dss_id].nfl, mode); 175 175 #else 176 176 return NULL; 177 177 #endif 178 178 } 179 179 180 - static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1, 181 - const struct nfs4_ff_layout_mirror *m2) 180 + static bool ff_dss_match_fh(const struct nfs4_ff_layout_ds_stripe *dss1, 181 + const struct nfs4_ff_layout_ds_stripe *dss2) 182 182 { 183 183 int i, j; 184 184 185 - if (m1->fh_versions_cnt != m2->fh_versions_cnt) 185 + if (dss1->fh_versions_cnt != dss2->fh_versions_cnt) 186 186 return false; 187 - for (i = 0; i < m1->fh_versions_cnt; i++) { 187 + 188 + for (i = 0; i < dss1->fh_versions_cnt; i++) { 188 189 bool found_fh = false; 189 - for (j = 0; j < m2->fh_versions_cnt; j++) { 190 - if (nfs_compare_fh(&m1->fh_versions[i], 191 - &m2->fh_versions[j]) == 0) { 190 + for (j = 0; j < dss2->fh_versions_cnt; j++) { 191 + if (nfs_compare_fh(&dss1->fh_versions[i], 192 + &dss2->fh_versions[j]) == 0) { 192 193 found_fh = true; 193 194 break; 194 195 } ··· 197 196 if (!found_fh) 198 197 return false; 199 198 } 199 + return true; 200 + } 201 + 202 + static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1, 203 + const struct nfs4_ff_layout_mirror *m2) 204 + { 205 + u32 dss_id; 206 + 207 + if (m1->dss_count != m2->dss_count) 208 + return false; 209 + 210 + for (dss_id = 0; dss_id < m1->dss_count; dss_id++) 211 + if (!ff_dss_match_fh(&m1->dss[dss_id], &m2->dss[dss_id])) 212 + return false; 213 + 214 + return true; 215 + } 216 + 217 + static bool ff_mirror_match_devid(const struct nfs4_ff_layout_mirror *m1, 218 + const struct nfs4_ff_layout_mirror *m2) 219 + { 220 + u32 dss_id; 221 + 222 + if (m1->dss_count != m2->dss_count) 223 + return false; 224 + 225 + for (dss_id = 0; dss_id < m1->dss_count; dss_id++) 226 + if (memcmp(&m1->dss[dss_id].devid, 227 + &m2->dss[dss_id].devid, 228 + sizeof(m1->dss[dss_id].devid)) != 0) 229 + return false; 230 + 200 231 return true; 201 232 } 202 233 ··· 242 209 243 210 spin_lock(&inode->i_lock); 244 211 list_for_each_entry(pos, &ff_layout->mirrors, mirrors) { 245 - if (memcmp(&mirror->devid, &pos->devid, sizeof(pos->devid)) != 0) 212 + if (!ff_mirror_match_devid(mirror, pos)) 246 213 continue; 247 214 if (!ff_mirror_match_fh(mirror, pos)) 248 215 continue; ··· 273 240 static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags) 274 241 { 275 242 struct nfs4_ff_layout_mirror *mirror; 243 + u32 dss_id; 276 244 277 245 mirror = kzalloc(sizeof(*mirror), gfp_flags); 278 246 if (mirror != NULL) { 279 247 spin_lock_init(&mirror->lock); 280 248 refcount_set(&mirror->ref, 1); 281 249 INIT_LIST_HEAD(&mirror->mirrors); 282 - nfs_localio_file_init(&mirror->nfl); 250 + for (dss_id = 0; dss_id < mirror->dss_count; dss_id++) 251 + nfs_localio_file_init(&mirror->dss[dss_id].nfl); 283 252 } 284 253 return mirror; 285 254 } 286 255 287 256 static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror) 288 257 { 289 - const struct cred *cred; 258 + const struct cred *cred; 259 + u32 dss_id; 290 260 291 261 ff_layout_remove_mirror(mirror); 292 - kfree(mirror->fh_versions); 293 - nfs_close_local_fh(&mirror->nfl); 294 - cred = rcu_access_pointer(mirror->ro_cred); 295 - put_cred(cred); 296 - cred = rcu_access_pointer(mirror->rw_cred); 297 - put_cred(cred); 298 - nfs4_ff_layout_put_deviceid(mirror->mirror_ds); 262 + 263 + for (dss_id = 0; dss_id < mirror->dss_count; dss_id++) { 264 + kfree(mirror->dss[dss_id].fh_versions); 265 + cred = rcu_access_pointer(mirror->dss[dss_id].ro_cred); 266 + put_cred(cred); 267 + cred = rcu_access_pointer(mirror->dss[dss_id].rw_cred); 268 + put_cred(cred); 269 + nfs_close_local_fh(&mirror->dss[dss_id].nfl); 270 + nfs4_ff_layout_put_deviceid(mirror->dss[dss_id].mirror_ds); 271 + } 272 + 273 + kfree(mirror->dss); 299 274 kfree(mirror); 300 275 } 301 276 ··· 407 366 free_me); 408 367 } 409 368 369 + static u32 ff_mirror_efficiency_sum(const struct nfs4_ff_layout_mirror *mirror) 370 + { 371 + u32 dss_id, sum = 0; 372 + 373 + for (dss_id = 0; dss_id < mirror->dss_count; dss_id++) 374 + sum += mirror->dss[dss_id].efficiency; 375 + 376 + return sum; 377 + } 378 + 410 379 static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls) 411 380 { 412 381 int i, j; 413 382 414 383 for (i = 0; i < fls->mirror_array_cnt - 1; i++) { 415 384 for (j = i + 1; j < fls->mirror_array_cnt; j++) 416 - if (fls->mirror_array[i]->efficiency < 417 - fls->mirror_array[j]->efficiency) 385 + if (ff_mirror_efficiency_sum(fls->mirror_array[i]) < 386 + ff_mirror_efficiency_sum(fls->mirror_array[j])) 418 387 swap(fls->mirror_array[i], 419 388 fls->mirror_array[j]); 420 389 } ··· 439 388 struct nfs4_ff_layout_segment *fls = NULL; 440 389 struct xdr_stream stream; 441 390 struct xdr_buf buf; 442 - struct page *scratch; 391 + struct folio *scratch; 443 392 u64 stripe_unit; 444 393 u32 mirror_array_cnt; 445 394 __be32 *p; 446 395 int i, rc; 396 + struct nfs4_ff_layout_ds_stripe *dss_info; 447 397 448 398 dprintk("--> %s\n", __func__); 449 - scratch = alloc_page(gfp_flags); 399 + scratch = folio_alloc(gfp_flags, 0); 450 400 if (!scratch) 451 401 return ERR_PTR(-ENOMEM); 452 402 453 403 xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, 454 404 lgr->layoutp->len); 455 - xdr_set_scratch_page(&stream, scratch); 405 + xdr_set_scratch_folio(&stream, scratch); 456 406 457 407 /* stripe unit and mirror_array_cnt */ 458 408 rc = -EIO; ··· 479 427 fls->mirror_array_cnt = mirror_array_cnt; 480 428 fls->stripe_unit = stripe_unit; 481 429 430 + u32 dss_count = 0; 482 431 for (i = 0; i < fls->mirror_array_cnt; i++) { 483 432 struct nfs4_ff_layout_mirror *mirror; 484 433 struct cred *kcred; 485 434 const struct cred __rcu *cred; 486 435 kuid_t uid; 487 436 kgid_t gid; 488 - u32 ds_count, fh_count, id; 489 - int j; 437 + u32 fh_count, id; 438 + int j, dss_id; 490 439 491 440 rc = -EIO; 492 441 p = xdr_inline_decode(&stream, 4); 493 442 if (!p) 494 443 goto out_err_free; 495 - ds_count = be32_to_cpup(p); 496 444 497 - /* FIXME: allow for striping? */ 498 - if (ds_count != 1) 445 + // Ensure all mirrors have same stripe count. 446 + if (dss_count == 0) 447 + dss_count = be32_to_cpup(p); 448 + else if (dss_count != be32_to_cpup(p)) 449 + goto out_err_free; 450 + 451 + if (dss_count > NFS4_FLEXFILE_LAYOUT_MAX_STRIPE_CNT || 452 + dss_count == 0) 453 + goto out_err_free; 454 + 455 + if (dss_count > 1 && stripe_unit == 0) 499 456 goto out_err_free; 500 457 501 458 fls->mirror_array[i] = ff_layout_alloc_mirror(gfp_flags); ··· 513 452 goto out_err_free; 514 453 } 515 454 516 - fls->mirror_array[i]->ds_count = ds_count; 455 + fls->mirror_array[i]->dss_count = dss_count; 456 + fls->mirror_array[i]->dss = 457 + kcalloc(dss_count, sizeof(struct nfs4_ff_layout_ds_stripe), 458 + gfp_flags); 517 459 518 - /* deviceid */ 519 - rc = decode_deviceid(&stream, &fls->mirror_array[i]->devid); 520 - if (rc) 521 - goto out_err_free; 460 + for (dss_id = 0; dss_id < dss_count; dss_id++) { 461 + dss_info = &fls->mirror_array[i]->dss[dss_id]; 462 + dss_info->mirror = fls->mirror_array[i]; 522 463 523 - /* efficiency */ 524 - rc = -EIO; 525 - p = xdr_inline_decode(&stream, 4); 526 - if (!p) 527 - goto out_err_free; 528 - fls->mirror_array[i]->efficiency = be32_to_cpup(p); 529 - 530 - /* stateid */ 531 - rc = decode_pnfs_stateid(&stream, &fls->mirror_array[i]->stateid); 532 - if (rc) 533 - goto out_err_free; 534 - 535 - /* fh */ 536 - rc = -EIO; 537 - p = xdr_inline_decode(&stream, 4); 538 - if (!p) 539 - goto out_err_free; 540 - fh_count = be32_to_cpup(p); 541 - 542 - fls->mirror_array[i]->fh_versions = 543 - kcalloc(fh_count, sizeof(struct nfs_fh), 544 - gfp_flags); 545 - if (fls->mirror_array[i]->fh_versions == NULL) { 546 - rc = -ENOMEM; 547 - goto out_err_free; 548 - } 549 - 550 - for (j = 0; j < fh_count; j++) { 551 - rc = decode_nfs_fh(&stream, 552 - &fls->mirror_array[i]->fh_versions[j]); 464 + /* deviceid */ 465 + rc = decode_deviceid(&stream, &dss_info->devid); 553 466 if (rc) 554 467 goto out_err_free; 468 + 469 + /* efficiency */ 470 + rc = -EIO; 471 + p = xdr_inline_decode(&stream, 4); 472 + if (!p) 473 + goto out_err_free; 474 + dss_info->efficiency = be32_to_cpup(p); 475 + 476 + /* stateid */ 477 + rc = decode_pnfs_stateid(&stream, &dss_info->stateid); 478 + if (rc) 479 + goto out_err_free; 480 + 481 + /* fh */ 482 + rc = -EIO; 483 + p = xdr_inline_decode(&stream, 4); 484 + if (!p) 485 + goto out_err_free; 486 + fh_count = be32_to_cpup(p); 487 + 488 + dss_info->fh_versions = 489 + kcalloc(fh_count, sizeof(struct nfs_fh), 490 + gfp_flags); 491 + if (dss_info->fh_versions == NULL) { 492 + rc = -ENOMEM; 493 + goto out_err_free; 494 + } 495 + 496 + for (j = 0; j < fh_count; j++) { 497 + rc = decode_nfs_fh(&stream, 498 + &dss_info->fh_versions[j]); 499 + if (rc) 500 + goto out_err_free; 501 + } 502 + 503 + dss_info->fh_versions_cnt = fh_count; 504 + 505 + /* user */ 506 + rc = decode_name(&stream, &id); 507 + if (rc) 508 + goto out_err_free; 509 + 510 + uid = make_kuid(&init_user_ns, id); 511 + 512 + /* group */ 513 + rc = decode_name(&stream, &id); 514 + if (rc) 515 + goto out_err_free; 516 + 517 + gid = make_kgid(&init_user_ns, id); 518 + 519 + if (gfp_flags & __GFP_FS) 520 + kcred = prepare_kernel_cred(&init_task); 521 + else { 522 + unsigned int nofs_flags = memalloc_nofs_save(); 523 + 524 + kcred = prepare_kernel_cred(&init_task); 525 + memalloc_nofs_restore(nofs_flags); 526 + } 527 + rc = -ENOMEM; 528 + if (!kcred) 529 + goto out_err_free; 530 + kcred->fsuid = uid; 531 + kcred->fsgid = gid; 532 + cred = RCU_INITIALIZER(kcred); 533 + 534 + if (lgr->range.iomode == IOMODE_READ) 535 + rcu_assign_pointer(dss_info->ro_cred, cred); 536 + else 537 + rcu_assign_pointer(dss_info->rw_cred, cred); 555 538 } 556 - 557 - fls->mirror_array[i]->fh_versions_cnt = fh_count; 558 - 559 - /* user */ 560 - rc = decode_name(&stream, &id); 561 - if (rc) 562 - goto out_err_free; 563 - 564 - uid = make_kuid(&init_user_ns, id); 565 - 566 - /* group */ 567 - rc = decode_name(&stream, &id); 568 - if (rc) 569 - goto out_err_free; 570 - 571 - gid = make_kgid(&init_user_ns, id); 572 - 573 - if (gfp_flags & __GFP_FS) 574 - kcred = prepare_kernel_cred(&init_task); 575 - else { 576 - unsigned int nofs_flags = memalloc_nofs_save(); 577 - kcred = prepare_kernel_cred(&init_task); 578 - memalloc_nofs_restore(nofs_flags); 579 - } 580 - rc = -ENOMEM; 581 - if (!kcred) 582 - goto out_err_free; 583 - kcred->fsuid = uid; 584 - kcred->fsgid = gid; 585 - cred = RCU_INITIALIZER(kcred); 586 - 587 - if (lgr->range.iomode == IOMODE_READ) 588 - rcu_assign_pointer(fls->mirror_array[i]->ro_cred, cred); 589 - else 590 - rcu_assign_pointer(fls->mirror_array[i]->rw_cred, cred); 591 539 592 540 mirror = ff_layout_add_mirror(lh, fls->mirror_array[i]); 593 541 if (mirror != fls->mirror_array[i]) { 594 - /* swap cred ptrs so free_mirror will clean up old */ 595 - if (lgr->range.iomode == IOMODE_READ) { 596 - cred = xchg(&mirror->ro_cred, cred); 597 - rcu_assign_pointer(fls->mirror_array[i]->ro_cred, cred); 598 - } else { 599 - cred = xchg(&mirror->rw_cred, cred); 600 - rcu_assign_pointer(fls->mirror_array[i]->rw_cred, cred); 542 + for (dss_id = 0; dss_id < dss_count; dss_id++) { 543 + dss_info = &fls->mirror_array[i]->dss[dss_id]; 544 + /* swap cred ptrs so free_mirror will clean up old */ 545 + if (lgr->range.iomode == IOMODE_READ) { 546 + cred = xchg(&mirror->dss[dss_id].ro_cred, 547 + dss_info->ro_cred); 548 + rcu_assign_pointer(dss_info->ro_cred, cred); 549 + } else { 550 + cred = xchg(&mirror->dss[dss_id].rw_cred, 551 + dss_info->rw_cred); 552 + rcu_assign_pointer(dss_info->rw_cred, cred); 553 + } 601 554 } 602 555 ff_layout_free_mirror(fls->mirror_array[i]); 603 556 fls->mirror_array[i] = mirror; ··· 639 564 ret = &fls->generic_hdr; 640 565 dprintk("<-- %s (success)\n", __func__); 641 566 out_free_page: 642 - __free_page(scratch); 567 + folio_put(scratch); 643 568 return ret; 644 569 out_err_free: 645 570 _ff_layout_free_lseg(fls); ··· 668 593 _ff_layout_free_lseg(fls); 669 594 } 670 595 596 + static u32 calc_commit_idx(struct pnfs_layout_segment *lseg, 597 + u32 mirror_idx, u32 dss_id) 598 + { 599 + struct nfs4_ff_layout_segment *flseg = FF_LAYOUT_LSEG(lseg); 600 + 601 + return (mirror_idx * flseg->mirror_array[0]->dss_count) + dss_id; 602 + } 603 + 604 + static u32 calc_mirror_idx_from_commit(struct pnfs_layout_segment *lseg, 605 + u32 commit_index) 606 + { 607 + return commit_index / FF_LAYOUT_LSEG(lseg)->mirror_array[0]->dss_count; 608 + } 609 + 610 + static u32 calc_dss_id_from_commit(struct pnfs_layout_segment *lseg, 611 + u32 commit_index) 612 + { 613 + return commit_index % FF_LAYOUT_LSEG(lseg)->mirror_array[0]->dss_count; 614 + } 615 + 671 616 static void 672 617 nfs4_ff_start_busy_timer(struct nfs4_ff_busy_timer *timer, ktime_t now) 673 618 { ··· 712 617 713 618 static bool 714 619 nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror, 620 + u32 dss_id, 715 621 struct nfs4_ff_layoutstat *layoutstat, 716 622 ktime_t now) 717 623 { ··· 720 624 struct nfs4_flexfile_layout *ffl = FF_LAYOUT_FROM_HDR(mirror->layout); 721 625 722 626 nfs4_ff_start_busy_timer(&layoutstat->busy_timer, now); 723 - if (!mirror->start_time) 724 - mirror->start_time = now; 627 + if (!mirror->dss[dss_id].start_time) 628 + mirror->dss[dss_id].start_time = now; 725 629 if (mirror->report_interval != 0) 726 630 report_interval = (s64)mirror->report_interval * 1000LL; 727 631 else if (layoutstats_timer != 0) ··· 771 675 static void 772 676 nfs4_ff_layout_stat_io_start_read(struct inode *inode, 773 677 struct nfs4_ff_layout_mirror *mirror, 678 + u32 dss_id, 774 679 __u64 requested, ktime_t now) 775 680 { 776 681 bool report; 777 682 778 683 spin_lock(&mirror->lock); 779 - report = nfs4_ff_layoutstat_start_io(mirror, &mirror->read_stat, now); 780 - nfs4_ff_layout_stat_io_update_requested(&mirror->read_stat, requested); 684 + report = nfs4_ff_layoutstat_start_io( 685 + mirror, dss_id, &mirror->dss[dss_id].read_stat, now); 686 + nfs4_ff_layout_stat_io_update_requested( 687 + &mirror->dss[dss_id].read_stat, requested); 781 688 set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags); 782 689 spin_unlock(&mirror->lock); 783 690 ··· 791 692 static void 792 693 nfs4_ff_layout_stat_io_end_read(struct rpc_task *task, 793 694 struct nfs4_ff_layout_mirror *mirror, 695 + u32 dss_id, 794 696 __u64 requested, 795 697 __u64 completed) 796 698 { 797 699 spin_lock(&mirror->lock); 798 - nfs4_ff_layout_stat_io_update_completed(&mirror->read_stat, 700 + nfs4_ff_layout_stat_io_update_completed(&mirror->dss[dss_id].read_stat, 799 701 requested, completed, 800 702 ktime_get(), task->tk_start); 801 703 set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags); ··· 806 706 static void 807 707 nfs4_ff_layout_stat_io_start_write(struct inode *inode, 808 708 struct nfs4_ff_layout_mirror *mirror, 709 + u32 dss_id, 809 710 __u64 requested, ktime_t now) 810 711 { 811 712 bool report; 812 713 813 714 spin_lock(&mirror->lock); 814 - report = nfs4_ff_layoutstat_start_io(mirror , &mirror->write_stat, now); 815 - nfs4_ff_layout_stat_io_update_requested(&mirror->write_stat, requested); 715 + report = nfs4_ff_layoutstat_start_io( 716 + mirror, 717 + dss_id, 718 + &mirror->dss[dss_id].write_stat, 719 + now); 720 + nfs4_ff_layout_stat_io_update_requested( 721 + &mirror->dss[dss_id].write_stat, 722 + requested); 816 723 set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags); 817 724 spin_unlock(&mirror->lock); 818 725 ··· 830 723 static void 831 724 nfs4_ff_layout_stat_io_end_write(struct rpc_task *task, 832 725 struct nfs4_ff_layout_mirror *mirror, 726 + u32 dss_id, 833 727 __u64 requested, 834 728 __u64 completed, 835 729 enum nfs3_stable_how committed) ··· 839 731 requested = completed = 0; 840 732 841 733 spin_lock(&mirror->lock); 842 - nfs4_ff_layout_stat_io_update_completed(&mirror->write_stat, 734 + nfs4_ff_layout_stat_io_update_completed(&mirror->dss[dss_id].write_stat, 843 735 requested, completed, ktime_get(), task->tk_start); 844 736 set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags); 845 737 spin_unlock(&mirror->lock); 846 738 } 847 739 848 740 static void 849 - ff_layout_mark_ds_unreachable(struct pnfs_layout_segment *lseg, u32 idx) 741 + ff_layout_mark_ds_unreachable(struct pnfs_layout_segment *lseg, u32 idx, u32 dss_id) 850 742 { 851 - struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); 743 + struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx, dss_id); 852 744 853 745 if (devid) 854 746 nfs4_mark_deviceid_unavailable(devid); 855 747 } 856 748 857 749 static void 858 - ff_layout_mark_ds_reachable(struct pnfs_layout_segment *lseg, u32 idx) 750 + ff_layout_mark_ds_reachable(struct pnfs_layout_segment *lseg, u32 idx, u32 dss_id) 859 751 { 860 - struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); 752 + struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx, dss_id); 861 753 862 754 if (devid) 863 755 nfs4_mark_deviceid_available(devid); ··· 866 758 static struct nfs4_pnfs_ds * 867 759 ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg, 868 760 u32 start_idx, u32 *best_idx, 761 + u32 offset, u32 *dss_id, 869 762 bool check_device) 870 763 { 871 764 struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg); ··· 877 768 /* mirrors are initially sorted by efficiency */ 878 769 for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) { 879 770 mirror = FF_LAYOUT_COMP(lseg, idx); 880 - ds = nfs4_ff_layout_prepare_ds(lseg, mirror, false); 771 + *dss_id = nfs4_ff_layout_calc_dss_id( 772 + fls->stripe_unit, 773 + fls->mirror_array[idx]->dss_count, 774 + offset); 775 + ds = nfs4_ff_layout_prepare_ds(lseg, mirror, *dss_id, false); 881 776 if (IS_ERR(ds)) 882 777 continue; 883 778 884 779 if (check_device && 885 - nfs4_test_deviceid_unavailable(&mirror->mirror_ds->id_node)) { 780 + nfs4_test_deviceid_unavailable(&mirror->dss[*dss_id].mirror_ds->id_node)) { 886 781 // reinitialize the error state in case if this is the last iteration 887 782 ds = ERR_PTR(-EINVAL); 888 783 continue; ··· 901 788 902 789 static struct nfs4_pnfs_ds * 903 790 ff_layout_choose_any_ds_for_read(struct pnfs_layout_segment *lseg, 904 - u32 start_idx, u32 *best_idx) 791 + u32 start_idx, u32 *best_idx, 792 + u32 offset, u32 *dss_id) 905 793 { 906 - return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, false); 794 + return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, 795 + offset, dss_id, false); 907 796 } 908 797 909 798 static struct nfs4_pnfs_ds * 910 799 ff_layout_choose_valid_ds_for_read(struct pnfs_layout_segment *lseg, 911 - u32 start_idx, u32 *best_idx) 800 + u32 start_idx, u32 *best_idx, 801 + u32 offset, u32 *dss_id) 912 802 { 913 - return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, true); 803 + return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, 804 + offset, dss_id, true); 914 805 } 915 806 916 807 static struct nfs4_pnfs_ds * 917 808 ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg, 918 - u32 start_idx, u32 *best_idx) 809 + u32 start_idx, u32 *best_idx, 810 + u32 offset, u32 *dss_id) 919 811 { 920 812 struct nfs4_pnfs_ds *ds; 921 813 922 - ds = ff_layout_choose_valid_ds_for_read(lseg, start_idx, best_idx); 814 + ds = ff_layout_choose_valid_ds_for_read(lseg, start_idx, best_idx, 815 + offset, dss_id); 923 816 if (!IS_ERR(ds)) 924 817 return ds; 925 - return ff_layout_choose_any_ds_for_read(lseg, start_idx, best_idx); 818 + return ff_layout_choose_any_ds_for_read(lseg, start_idx, best_idx, 819 + offset, dss_id); 926 820 } 927 821 928 822 static struct nfs4_pnfs_ds * 929 823 ff_layout_get_ds_for_read(struct nfs_pageio_descriptor *pgio, 930 - u32 *best_idx) 824 + u32 *best_idx, 825 + u32 offset, 826 + u32 *dss_id) 931 827 { 932 828 struct pnfs_layout_segment *lseg = pgio->pg_lseg; 933 829 struct nfs4_pnfs_ds *ds; 934 830 935 831 ds = ff_layout_choose_best_ds_for_read(lseg, pgio->pg_mirror_idx, 936 - best_idx); 832 + best_idx, offset, dss_id); 937 833 if (!IS_ERR(ds) || !pgio->pg_mirror_idx) 938 834 return ds; 939 - return ff_layout_choose_best_ds_for_read(lseg, 0, best_idx); 835 + return ff_layout_choose_best_ds_for_read(lseg, 0, best_idx, 836 + offset, dss_id); 940 837 } 941 838 942 839 static void ··· 965 842 } 966 843 } 967 844 845 + static bool 846 + ff_layout_lseg_is_striped(const struct nfs4_ff_layout_segment *fls) 847 + { 848 + return fls->mirror_array[0]->dss_count > 1; 849 + } 850 + 851 + /* 852 + * ff_layout_pg_test(). Called by nfs_can_coalesce_requests() 853 + * 854 + * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number 855 + * of bytes (maximum @req->wb_bytes) that can be coalesced. 856 + */ 857 + static size_t 858 + ff_layout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, 859 + struct nfs_page *req) 860 + { 861 + unsigned int size; 862 + u64 p_stripe, r_stripe; 863 + u32 stripe_offset; 864 + u64 segment_offset = pgio->pg_lseg->pls_range.offset; 865 + u32 stripe_unit = FF_LAYOUT_LSEG(pgio->pg_lseg)->stripe_unit; 866 + 867 + /* calls nfs_generic_pg_test */ 868 + size = pnfs_generic_pg_test(pgio, prev, req); 869 + if (!size) 870 + return 0; 871 + else if (!ff_layout_lseg_is_striped(FF_LAYOUT_LSEG(pgio->pg_lseg))) 872 + return size; 873 + 874 + /* see if req and prev are in the same stripe */ 875 + if (prev) { 876 + p_stripe = (u64)req_offset(prev) - segment_offset; 877 + r_stripe = (u64)req_offset(req) - segment_offset; 878 + do_div(p_stripe, stripe_unit); 879 + do_div(r_stripe, stripe_unit); 880 + 881 + if (p_stripe != r_stripe) 882 + return 0; 883 + } 884 + 885 + /* calculate remaining bytes in the current stripe */ 886 + div_u64_rem((u64)req_offset(req) - segment_offset, 887 + stripe_unit, 888 + &stripe_offset); 889 + WARN_ON_ONCE(stripe_offset > stripe_unit); 890 + if (stripe_offset >= stripe_unit) 891 + return 0; 892 + return min(stripe_unit - (unsigned int)stripe_offset, size); 893 + } 894 + 968 895 static void 969 896 ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, 970 897 struct nfs_page *req) ··· 1022 849 struct nfs_pgio_mirror *pgm; 1023 850 struct nfs4_ff_layout_mirror *mirror; 1024 851 struct nfs4_pnfs_ds *ds; 1025 - u32 ds_idx; 852 + u32 ds_idx, dss_id; 1026 853 1027 854 if (NFS_SERVER(pgio->pg_inode)->flags & 1028 855 (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR)) ··· 1043 870 /* Reset wb_nio, since getting layout segment was successful */ 1044 871 req->wb_nio = 0; 1045 872 1046 - ds = ff_layout_get_ds_for_read(pgio, &ds_idx); 873 + ds = ff_layout_get_ds_for_read(pgio, &ds_idx, 874 + req_offset(req), &dss_id); 1047 875 if (IS_ERR(ds)) { 1048 876 if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg)) 1049 877 goto out_mds; ··· 1056 882 1057 883 mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx); 1058 884 pgm = &pgio->pg_mirrors[0]; 1059 - pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize; 885 + pgm->pg_bsize = mirror->dss[dss_id].mirror_ds->ds_versions[0].rsize; 1060 886 1061 887 pgio->pg_mirror_idx = ds_idx; 1062 888 return; ··· 1093 919 struct nfs4_ff_layout_mirror *mirror; 1094 920 struct nfs_pgio_mirror *pgm; 1095 921 struct nfs4_pnfs_ds *ds; 1096 - u32 i; 922 + u32 i, dss_id; 1097 923 1098 924 retry: 1099 925 pnfs_generic_pg_check_layout(pgio, req); ··· 1118 944 1119 945 for (i = 0; i < pgio->pg_mirror_count; i++) { 1120 946 mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i); 1121 - ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, mirror, true); 947 + dss_id = nfs4_ff_layout_calc_dss_id( 948 + FF_LAYOUT_LSEG(pgio->pg_lseg)->stripe_unit, 949 + mirror->dss_count, 950 + req_offset(req)); 951 + ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, mirror, 952 + dss_id, true); 1122 953 if (IS_ERR(ds)) { 1123 954 if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg)) 1124 955 goto out_mds; ··· 1133 954 goto retry; 1134 955 } 1135 956 pgm = &pgio->pg_mirrors[i]; 1136 - pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].wsize; 957 + pgm->pg_bsize = mirror->dss[dss_id].mirror_ds->ds_versions[0].wsize; 1137 958 } 1138 959 1139 960 if (NFS_SERVER(pgio->pg_inode)->flags & ··· 1199 1020 1200 1021 static const struct nfs_pageio_ops ff_layout_pg_read_ops = { 1201 1022 .pg_init = ff_layout_pg_init_read, 1202 - .pg_test = pnfs_generic_pg_test, 1023 + .pg_test = ff_layout_pg_test, 1203 1024 .pg_doio = pnfs_generic_pg_readpages, 1204 1025 .pg_cleanup = pnfs_generic_pg_cleanup, 1205 1026 }; 1206 1027 1207 1028 static const struct nfs_pageio_ops ff_layout_pg_write_ops = { 1208 1029 .pg_init = ff_layout_pg_init_write, 1209 - .pg_test = pnfs_generic_pg_test, 1030 + .pg_test = ff_layout_pg_test, 1210 1031 .pg_doio = pnfs_generic_pg_writepages, 1211 1032 .pg_get_mirror_count = ff_layout_pg_get_mirror_count_write, 1212 1033 .pg_cleanup = pnfs_generic_pg_cleanup, ··· 1254 1075 { 1255 1076 u32 idx = hdr->pgio_mirror_idx + 1; 1256 1077 u32 new_idx = 0; 1078 + u32 dss_id = 0; 1257 1079 struct nfs4_pnfs_ds *ds; 1258 1080 1259 - ds = ff_layout_choose_any_ds_for_read(hdr->lseg, idx, &new_idx); 1081 + ds = ff_layout_choose_any_ds_for_read(hdr->lseg, idx, &new_idx, 1082 + hdr->args.offset, &dss_id); 1260 1083 if (IS_ERR(ds)) 1261 1084 pnfs_error_mark_layout_for_return(hdr->inode, hdr->lseg); 1262 1085 else ··· 1295 1114 struct nfs4_state *state, 1296 1115 struct nfs_client *clp, 1297 1116 struct pnfs_layout_segment *lseg, 1298 - u32 idx) 1117 + u32 idx, u32 dss_id) 1299 1118 { 1300 1119 struct pnfs_layout_hdr *lo = lseg->pls_layout; 1301 1120 struct inode *inode = lo->plh_inode; 1302 - struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); 1121 + struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx, dss_id); 1303 1122 struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table; 1304 1123 1305 1124 switch (op_status) { ··· 1396 1215 u32 op_status, 1397 1216 struct nfs_client *clp, 1398 1217 struct pnfs_layout_segment *lseg, 1399 - u32 idx) 1218 + u32 idx, u32 dss_id) 1400 1219 { 1401 - struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); 1220 + struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx, dss_id); 1402 1221 1403 1222 switch (op_status) { 1404 1223 case NFS_OK: ··· 1462 1281 struct nfs4_state *state, 1463 1282 struct nfs_client *clp, 1464 1283 struct pnfs_layout_segment *lseg, 1465 - u32 idx) 1284 + u32 idx, u32 dss_id) 1466 1285 { 1467 1286 int vers = clp->cl_nfs_mod->rpc_vers->number; 1468 1287 1469 1288 if (task->tk_status >= 0) { 1470 - ff_layout_mark_ds_reachable(lseg, idx); 1289 + ff_layout_mark_ds_reachable(lseg, idx, dss_id); 1471 1290 return 0; 1472 1291 } 1473 1292 ··· 1478 1297 switch (vers) { 1479 1298 case 3: 1480 1299 return ff_layout_async_handle_error_v3(task, op_status, clp, 1481 - lseg, idx); 1300 + lseg, idx, dss_id); 1482 1301 case 4: 1483 1302 return ff_layout_async_handle_error_v4(task, op_status, state, 1484 - clp, lseg, idx); 1303 + clp, lseg, idx, dss_id); 1485 1304 default: 1486 1305 /* should never happen */ 1487 1306 WARN_ON_ONCE(1); ··· 1490 1309 } 1491 1310 1492 1311 static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, 1493 - u32 idx, u64 offset, u64 length, 1312 + u32 idx, u32 dss_id, u64 offset, u64 length, 1494 1313 u32 *op_status, int opnum, int error) 1495 1314 { 1496 1315 struct nfs4_ff_layout_mirror *mirror; ··· 1528 1347 1529 1348 mirror = FF_LAYOUT_COMP(lseg, idx); 1530 1349 err = ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout), 1531 - mirror, offset, length, status, opnum, 1350 + mirror, dss_id, offset, length, status, opnum, 1532 1351 nfs_io_gfp_mask()); 1533 1352 1534 1353 switch (status) { ··· 1537 1356 case NFS4ERR_PERM: 1538 1357 break; 1539 1358 case NFS4ERR_NXIO: 1540 - ff_layout_mark_ds_unreachable(lseg, idx); 1359 + ff_layout_mark_ds_unreachable(lseg, idx, dss_id); 1541 1360 /* 1542 1361 * Don't return the layout if this is a read and we still 1543 1362 * have layouts to try ··· 1557 1376 static int ff_layout_read_done_cb(struct rpc_task *task, 1558 1377 struct nfs_pgio_header *hdr) 1559 1378 { 1379 + struct nfs4_ff_layout_segment *flseg = FF_LAYOUT_LSEG(hdr->lseg); 1380 + u32 dss_id = nfs4_ff_layout_calc_dss_id( 1381 + flseg->stripe_unit, 1382 + flseg->mirror_array[hdr->pgio_mirror_idx]->dss_count, 1383 + hdr->args.offset); 1560 1384 int err; 1561 1385 1562 1386 if (task->tk_status < 0) { 1563 - ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx, 1387 + ff_layout_io_track_ds_error(hdr->lseg, 1388 + hdr->pgio_mirror_idx, dss_id, 1564 1389 hdr->args.offset, hdr->args.count, 1565 1390 &hdr->res.op_status, OP_READ, 1566 1391 task->tk_status); ··· 1576 1389 err = ff_layout_async_handle_error(task, hdr->res.op_status, 1577 1390 hdr->args.context->state, 1578 1391 hdr->ds_clp, hdr->lseg, 1579 - hdr->pgio_mirror_idx); 1392 + hdr->pgio_mirror_idx, 1393 + dss_id); 1580 1394 1581 1395 trace_nfs4_pnfs_read(hdr, err); 1582 1396 clear_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags); ··· 1633 1445 static void ff_layout_read_record_layoutstats_start(struct rpc_task *task, 1634 1446 struct nfs_pgio_header *hdr) 1635 1447 { 1448 + struct nfs4_ff_layout_mirror *mirror; 1449 + u32 dss_id; 1450 + 1636 1451 if (test_and_set_bit(NFS_IOHDR_STAT, &hdr->flags)) 1637 1452 return; 1638 - nfs4_ff_layout_stat_io_start_read(hdr->inode, 1639 - FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), 1640 - hdr->args.count, 1641 - task->tk_start); 1453 + 1454 + mirror = FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx); 1455 + dss_id = nfs4_ff_layout_calc_dss_id( 1456 + FF_LAYOUT_LSEG(hdr->lseg)->stripe_unit, 1457 + mirror->dss_count, 1458 + hdr->args.offset); 1459 + 1460 + nfs4_ff_layout_stat_io_start_read( 1461 + hdr->inode, 1462 + mirror, 1463 + dss_id, 1464 + hdr->args.count, 1465 + task->tk_start); 1642 1466 } 1643 1467 1644 1468 static void ff_layout_read_record_layoutstats_done(struct rpc_task *task, 1645 1469 struct nfs_pgio_header *hdr) 1646 1470 { 1471 + struct nfs4_ff_layout_mirror *mirror; 1472 + u32 dss_id; 1473 + 1647 1474 if (!test_and_clear_bit(NFS_IOHDR_STAT, &hdr->flags)) 1648 1475 return; 1649 - nfs4_ff_layout_stat_io_end_read(task, 1650 - FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), 1651 - hdr->args.count, 1652 - hdr->res.count); 1476 + 1477 + mirror = FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx); 1478 + dss_id = nfs4_ff_layout_calc_dss_id( 1479 + FF_LAYOUT_LSEG(hdr->lseg)->stripe_unit, 1480 + mirror->dss_count, 1481 + hdr->args.offset); 1482 + 1483 + nfs4_ff_layout_stat_io_end_read( 1484 + task, 1485 + mirror, 1486 + dss_id, 1487 + hdr->args.count, 1488 + hdr->res.count); 1653 1489 set_bit(NFS_LSEG_LAYOUTRETURN, &hdr->lseg->pls_flags); 1654 1490 } 1655 1491 ··· 1761 1549 static int ff_layout_write_done_cb(struct rpc_task *task, 1762 1550 struct nfs_pgio_header *hdr) 1763 1551 { 1552 + struct nfs4_ff_layout_segment *flseg = FF_LAYOUT_LSEG(hdr->lseg); 1553 + u32 dss_id = nfs4_ff_layout_calc_dss_id( 1554 + flseg->stripe_unit, 1555 + flseg->mirror_array[hdr->pgio_mirror_idx]->dss_count, 1556 + hdr->args.offset); 1764 1557 loff_t end_offs = 0; 1765 1558 int err; 1766 1559 1767 1560 if (task->tk_status < 0) { 1768 - ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx, 1561 + ff_layout_io_track_ds_error(hdr->lseg, 1562 + hdr->pgio_mirror_idx, dss_id, 1769 1563 hdr->args.offset, hdr->args.count, 1770 1564 &hdr->res.op_status, OP_WRITE, 1771 1565 task->tk_status); ··· 1781 1563 err = ff_layout_async_handle_error(task, hdr->res.op_status, 1782 1564 hdr->args.context->state, 1783 1565 hdr->ds_clp, hdr->lseg, 1784 - hdr->pgio_mirror_idx); 1566 + hdr->pgio_mirror_idx, 1567 + dss_id); 1785 1568 1786 1569 trace_nfs4_pnfs_write(hdr, err); 1787 1570 clear_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags); ··· 1820 1601 struct nfs_commit_data *data) 1821 1602 { 1822 1603 int err; 1604 + u32 idx = calc_mirror_idx_from_commit(data->lseg, data->ds_commit_index); 1605 + u32 dss_id = calc_dss_id_from_commit(data->lseg, data->ds_commit_index); 1823 1606 1824 1607 if (task->tk_status < 0) { 1825 - ff_layout_io_track_ds_error(data->lseg, data->ds_commit_index, 1608 + ff_layout_io_track_ds_error(data->lseg, idx, dss_id, 1826 1609 data->args.offset, data->args.count, 1827 1610 &data->res.op_status, OP_COMMIT, 1828 1611 task->tk_status); ··· 1832 1611 } 1833 1612 1834 1613 err = ff_layout_async_handle_error(task, data->res.op_status, 1835 - NULL, data->ds_clp, data->lseg, 1836 - data->ds_commit_index); 1614 + NULL, data->ds_clp, data->lseg, idx, 1615 + dss_id); 1837 1616 1838 1617 trace_nfs4_pnfs_commit_ds(data, err); 1839 1618 switch (err) { ··· 1852 1631 } 1853 1632 1854 1633 ff_layout_set_layoutcommit(data->inode, data->lseg, data->lwb); 1855 - 1856 1634 return 0; 1857 1635 } 1858 1636 1859 1637 static void ff_layout_write_record_layoutstats_start(struct rpc_task *task, 1860 1638 struct nfs_pgio_header *hdr) 1861 1639 { 1640 + struct nfs4_ff_layout_mirror *mirror; 1641 + u32 dss_id; 1642 + 1862 1643 if (test_and_set_bit(NFS_IOHDR_STAT, &hdr->flags)) 1863 1644 return; 1864 - nfs4_ff_layout_stat_io_start_write(hdr->inode, 1865 - FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), 1866 - hdr->args.count, 1867 - task->tk_start); 1645 + 1646 + mirror = FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx); 1647 + dss_id = nfs4_ff_layout_calc_dss_id( 1648 + FF_LAYOUT_LSEG(hdr->lseg)->stripe_unit, 1649 + mirror->dss_count, 1650 + hdr->args.offset); 1651 + 1652 + nfs4_ff_layout_stat_io_start_write( 1653 + hdr->inode, 1654 + mirror, 1655 + dss_id, 1656 + hdr->args.count, 1657 + task->tk_start); 1868 1658 } 1869 1659 1870 1660 static void ff_layout_write_record_layoutstats_done(struct rpc_task *task, 1871 1661 struct nfs_pgio_header *hdr) 1872 1662 { 1663 + struct nfs4_ff_layout_mirror *mirror; 1664 + u32 dss_id; 1665 + 1873 1666 if (!test_and_clear_bit(NFS_IOHDR_STAT, &hdr->flags)) 1874 1667 return; 1875 - nfs4_ff_layout_stat_io_end_write(task, 1876 - FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), 1877 - hdr->args.count, hdr->res.count, 1878 - hdr->res.verf->committed); 1668 + 1669 + mirror = FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx); 1670 + dss_id = nfs4_ff_layout_calc_dss_id( 1671 + FF_LAYOUT_LSEG(hdr->lseg)->stripe_unit, 1672 + mirror->dss_count, 1673 + hdr->args.offset); 1674 + 1675 + nfs4_ff_layout_stat_io_end_write( 1676 + task, 1677 + mirror, 1678 + dss_id, 1679 + hdr->args.count, 1680 + hdr->res.count, 1681 + hdr->res.verf->committed); 1879 1682 set_bit(NFS_LSEG_LAYOUTRETURN, &hdr->lseg->pls_flags); 1880 1683 } 1881 1684 ··· 1982 1737 static void ff_layout_commit_record_layoutstats_start(struct rpc_task *task, 1983 1738 struct nfs_commit_data *cdata) 1984 1739 { 1740 + u32 idx, dss_id; 1741 + 1985 1742 if (test_and_set_bit(NFS_IOHDR_STAT, &cdata->flags)) 1986 1743 return; 1744 + 1745 + idx = calc_mirror_idx_from_commit(cdata->lseg, cdata->ds_commit_index); 1746 + dss_id = calc_dss_id_from_commit(cdata->lseg, cdata->ds_commit_index); 1987 1747 nfs4_ff_layout_stat_io_start_write(cdata->inode, 1988 - FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index), 1748 + FF_LAYOUT_COMP(cdata->lseg, idx), 1749 + dss_id, 1989 1750 0, task->tk_start); 1990 1751 } 1991 1752 ··· 2000 1749 { 2001 1750 struct nfs_page *req; 2002 1751 __u64 count = 0; 1752 + u32 idx, dss_id; 2003 1753 2004 1754 if (!test_and_clear_bit(NFS_IOHDR_STAT, &cdata->flags)) 2005 1755 return; ··· 2009 1757 list_for_each_entry(req, &cdata->pages, wb_list) 2010 1758 count += req->wb_bytes; 2011 1759 } 1760 + 1761 + idx = calc_mirror_idx_from_commit(cdata->lseg, cdata->ds_commit_index); 1762 + dss_id = calc_dss_id_from_commit(cdata->lseg, cdata->ds_commit_index); 2012 1763 nfs4_ff_layout_stat_io_end_write(task, 2013 - FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index), 1764 + FF_LAYOUT_COMP(cdata->lseg, idx), 1765 + dss_id, 2014 1766 count, count, NFS_FILE_SYNC); 2015 1767 set_bit(NFS_LSEG_LAYOUTRETURN, &cdata->lseg->pls_flags); 2016 1768 } ··· 2128 1872 u32 idx = hdr->pgio_mirror_idx; 2129 1873 int vers; 2130 1874 struct nfs_fh *fh; 1875 + u32 dss_id; 2131 1876 bool ds_fatal_error = false; 2132 1877 2133 1878 dprintk("--> %s ino %lu pgbase %u req %zu@%llu\n", ··· 2136 1879 hdr->args.pgbase, (size_t)hdr->args.count, offset); 2137 1880 2138 1881 mirror = FF_LAYOUT_COMP(lseg, idx); 2139 - ds = nfs4_ff_layout_prepare_ds(lseg, mirror, false); 1882 + dss_id = nfs4_ff_layout_calc_dss_id( 1883 + FF_LAYOUT_LSEG(lseg)->stripe_unit, 1884 + mirror->dss_count, 1885 + offset); 1886 + ds = nfs4_ff_layout_prepare_ds(lseg, mirror, dss_id, false); 2140 1887 if (IS_ERR(ds)) { 2141 1888 ds_fatal_error = nfs_error_is_fatal(PTR_ERR(ds)); 2142 1889 goto out_failed; 2143 1890 } 2144 1891 2145 1892 ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp, 2146 - hdr->inode); 1893 + hdr->inode, dss_id); 2147 1894 if (IS_ERR(ds_clnt)) 2148 1895 goto out_failed; 2149 1896 2150 - ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, hdr->cred); 1897 + ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, hdr->cred, dss_id); 2151 1898 if (!ds_cred) 2152 1899 goto out_failed; 2153 1900 2154 - vers = nfs4_ff_layout_ds_version(mirror); 1901 + vers = nfs4_ff_layout_ds_version(mirror, dss_id); 2155 1902 2156 1903 dprintk("%s USE DS: %s cl_count %d vers %d\n", __func__, 2157 1904 ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count), vers); ··· 2163 1902 hdr->pgio_done_cb = ff_layout_read_done_cb; 2164 1903 refcount_inc(&ds->ds_clp->cl_count); 2165 1904 hdr->ds_clp = ds->ds_clp; 2166 - fh = nfs4_ff_layout_select_ds_fh(mirror); 1905 + fh = nfs4_ff_layout_select_ds_fh(mirror, dss_id); 2167 1906 if (fh) 2168 1907 hdr->args.fh = fh; 2169 1908 2170 - nfs4_ff_layout_select_ds_stateid(mirror, &hdr->args.stateid); 1909 + nfs4_ff_layout_select_ds_stateid(mirror, dss_id, &hdr->args.stateid); 2171 1910 2172 1911 /* 2173 1912 * Note that if we ever decide to split across DSes, ··· 2177 1916 hdr->mds_offset = offset; 2178 1917 2179 1918 /* Start IO accounting for local read */ 2180 - localio = ff_local_open_fh(lseg, idx, ds->ds_clp, ds_cred, fh, FMODE_READ); 1919 + localio = ff_local_open_fh(lseg, idx, dss_id, ds->ds_clp, ds_cred, fh, 1920 + FMODE_READ); 2181 1921 if (localio) { 2182 1922 hdr->task.tk_start = ktime_get(); 2183 1923 ff_layout_read_record_layoutstats_start(&hdr->task, hdr); ··· 2215 1953 int vers; 2216 1954 struct nfs_fh *fh; 2217 1955 u32 idx = hdr->pgio_mirror_idx; 1956 + u32 dss_id; 2218 1957 bool ds_fatal_error = false; 2219 1958 2220 1959 mirror = FF_LAYOUT_COMP(lseg, idx); 2221 - ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true); 1960 + dss_id = nfs4_ff_layout_calc_dss_id( 1961 + FF_LAYOUT_LSEG(lseg)->stripe_unit, 1962 + mirror->dss_count, 1963 + offset); 1964 + ds = nfs4_ff_layout_prepare_ds(lseg, mirror, dss_id, true); 2222 1965 if (IS_ERR(ds)) { 2223 1966 ds_fatal_error = nfs_error_is_fatal(PTR_ERR(ds)); 2224 1967 goto out_failed; 2225 1968 } 2226 1969 2227 1970 ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp, 2228 - hdr->inode); 1971 + hdr->inode, dss_id); 2229 1972 if (IS_ERR(ds_clnt)) 2230 1973 goto out_failed; 2231 1974 2232 - ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, hdr->cred); 1975 + ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, hdr->cred, dss_id); 2233 1976 if (!ds_cred) 2234 1977 goto out_failed; 2235 1978 2236 - vers = nfs4_ff_layout_ds_version(mirror); 1979 + vers = nfs4_ff_layout_ds_version(mirror, dss_id); 2237 1980 2238 1981 dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d vers %d\n", 2239 1982 __func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count, ··· 2248 1981 hdr->pgio_done_cb = ff_layout_write_done_cb; 2249 1982 refcount_inc(&ds->ds_clp->cl_count); 2250 1983 hdr->ds_clp = ds->ds_clp; 2251 - hdr->ds_commit_idx = idx; 2252 - fh = nfs4_ff_layout_select_ds_fh(mirror); 1984 + hdr->ds_commit_idx = calc_commit_idx(lseg, idx, dss_id); 1985 + fh = nfs4_ff_layout_select_ds_fh(mirror, dss_id); 2253 1986 if (fh) 2254 1987 hdr->args.fh = fh; 2255 1988 2256 - nfs4_ff_layout_select_ds_stateid(mirror, &hdr->args.stateid); 1989 + nfs4_ff_layout_select_ds_stateid(mirror, dss_id, &hdr->args.stateid); 2257 1990 2258 1991 /* 2259 1992 * Note that if we ever decide to split across DSes, ··· 2262 1995 hdr->args.offset = offset; 2263 1996 2264 1997 /* Start IO accounting for local write */ 2265 - localio = ff_local_open_fh(lseg, idx, ds->ds_clp, ds_cred, fh, 1998 + localio = ff_local_open_fh(lseg, idx, dss_id, ds->ds_clp, ds_cred, fh, 2266 1999 FMODE_READ|FMODE_WRITE); 2267 2000 if (localio) { 2268 2001 hdr->task.tk_start = ktime_get(); ··· 2286 2019 return PNFS_NOT_ATTEMPTED; 2287 2020 } 2288 2021 2289 - static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) 2290 - { 2291 - return i; 2292 - } 2293 - 2294 2022 static struct nfs_fh * 2295 - select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i) 2023 + select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i, u32 dss_id) 2296 2024 { 2297 2025 struct nfs4_ff_layout_segment *flseg = FF_LAYOUT_LSEG(lseg); 2298 2026 2299 2027 /* FIXME: Assume that there is only one NFS version available 2300 2028 * for the DS. 2301 2029 */ 2302 - return &flseg->mirror_array[i]->fh_versions[0]; 2030 + return &flseg->mirror_array[i]->dss[dss_id].fh_versions[0]; 2303 2031 } 2304 2032 2305 2033 static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) ··· 2305 2043 struct nfsd_file *localio; 2306 2044 struct nfs4_ff_layout_mirror *mirror; 2307 2045 const struct cred *ds_cred; 2308 - u32 idx; 2046 + u32 idx, dss_id; 2309 2047 int vers, ret; 2310 2048 struct nfs_fh *fh; 2311 2049 ··· 2313 2051 test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))) 2314 2052 goto out_err; 2315 2053 2316 - idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); 2054 + idx = calc_mirror_idx_from_commit(lseg, data->ds_commit_index); 2317 2055 mirror = FF_LAYOUT_COMP(lseg, idx); 2318 - ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true); 2056 + dss_id = calc_dss_id_from_commit(lseg, data->ds_commit_index); 2057 + ds = nfs4_ff_layout_prepare_ds(lseg, mirror, dss_id, true); 2319 2058 if (IS_ERR(ds)) 2320 2059 goto out_err; 2321 2060 2322 2061 ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp, 2323 - data->inode); 2062 + data->inode, dss_id); 2324 2063 if (IS_ERR(ds_clnt)) 2325 2064 goto out_err; 2326 2065 2327 - ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, data->cred); 2066 + ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, data->cred, dss_id); 2328 2067 if (!ds_cred) 2329 2068 goto out_err; 2330 2069 2331 - vers = nfs4_ff_layout_ds_version(mirror); 2070 + vers = nfs4_ff_layout_ds_version(mirror, dss_id); 2332 2071 2333 2072 dprintk("%s ino %lu, how %d cl_count %d vers %d\n", __func__, 2334 2073 data->inode->i_ino, how, refcount_read(&ds->ds_clp->cl_count), ··· 2338 2075 data->cred = ds_cred; 2339 2076 refcount_inc(&ds->ds_clp->cl_count); 2340 2077 data->ds_clp = ds->ds_clp; 2341 - fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); 2078 + fh = select_ds_fh_from_commit(lseg, idx, dss_id); 2342 2079 if (fh) 2343 2080 data->args.fh = fh; 2344 2081 2345 2082 /* Start IO accounting for local commit */ 2346 - localio = ff_local_open_fh(lseg, idx, ds->ds_clp, ds_cred, fh, 2083 + localio = ff_local_open_fh(lseg, idx, dss_id, ds->ds_clp, ds_cred, fh, 2347 2084 FMODE_READ|FMODE_WRITE); 2348 2085 if (localio) { 2349 2086 data->task.tk_start = ktime_get(); ··· 2407 2144 struct nfs4_pnfs_ds *ds; 2408 2145 struct nfs_client *ds_clp; 2409 2146 struct rpc_clnt *clnt; 2410 - u32 idx; 2147 + u32 idx, dss_id; 2411 2148 2412 2149 for (idx = 0; idx < flseg->mirror_array_cnt; idx++) { 2413 2150 mirror = flseg->mirror_array[idx]; 2414 - mirror_ds = mirror->mirror_ds; 2415 - if (IS_ERR_OR_NULL(mirror_ds)) 2416 - continue; 2417 - ds = mirror->mirror_ds->ds; 2418 - if (!ds) 2419 - continue; 2420 - ds_clp = ds->ds_clp; 2421 - if (!ds_clp) 2422 - continue; 2423 - clnt = ds_clp->cl_rpcclient; 2424 - if (!clnt) 2425 - continue; 2426 - if (!rpc_cancel_tasks(clnt, -EAGAIN, ff_layout_match_io, lseg)) 2427 - continue; 2428 - rpc_clnt_disconnect(clnt); 2151 + for (dss_id = 0; dss_id < mirror->dss_count; dss_id++) { 2152 + mirror_ds = mirror->dss[dss_id].mirror_ds; 2153 + if (IS_ERR_OR_NULL(mirror_ds)) 2154 + continue; 2155 + ds = mirror->dss[dss_id].mirror_ds->ds; 2156 + if (!ds) 2157 + continue; 2158 + ds_clp = ds->ds_clp; 2159 + if (!ds_clp) 2160 + continue; 2161 + clnt = ds_clp->cl_rpcclient; 2162 + if (!clnt) 2163 + continue; 2164 + if (!rpc_cancel_tasks(clnt, -EAGAIN, 2165 + ff_layout_match_io, lseg)) 2166 + continue; 2167 + rpc_clnt_disconnect(clnt); 2168 + } 2429 2169 } 2430 2170 } 2431 2171 ··· 2450 2184 struct nfs4_ff_layout_segment *flseg = FF_LAYOUT_LSEG(lseg); 2451 2185 struct inode *inode = lseg->pls_layout->plh_inode; 2452 2186 struct pnfs_commit_array *array, *new; 2187 + u32 size = flseg->mirror_array_cnt * flseg->mirror_array[0]->dss_count; 2453 2188 2454 - new = pnfs_alloc_commit_array(flseg->mirror_array_cnt, 2189 + new = pnfs_alloc_commit_array(size, 2455 2190 nfs_io_gfp_mask()); 2456 2191 if (new) { 2457 2192 spin_lock(&inode->i_lock); ··· 2816 2549 static void 2817 2550 ff_layout_encode_ff_layoutupdate(struct xdr_stream *xdr, 2818 2551 const struct nfs42_layoutstat_devinfo *devinfo, 2819 - struct nfs4_ff_layout_mirror *mirror) 2552 + struct nfs4_ff_layout_ds_stripe *dss_info) 2820 2553 { 2821 2554 struct nfs4_pnfs_ds_addr *da; 2822 - struct nfs4_pnfs_ds *ds = mirror->mirror_ds->ds; 2823 - struct nfs_fh *fh = &mirror->fh_versions[0]; 2555 + struct nfs4_pnfs_ds *ds = dss_info->mirror_ds->ds; 2556 + struct nfs_fh *fh = &dss_info->fh_versions[0]; 2824 2557 __be32 *p; 2825 2558 2826 2559 da = list_first_entry(&ds->ds_addrs, struct nfs4_pnfs_ds_addr, da_node); ··· 2832 2565 p = xdr_reserve_space(xdr, 4 + fh->size); 2833 2566 xdr_encode_opaque(p, fh->data, fh->size); 2834 2567 /* ff_io_latency4 read */ 2835 - spin_lock(&mirror->lock); 2836 - ff_layout_encode_io_latency(xdr, &mirror->read_stat.io_stat); 2568 + spin_lock(&dss_info->mirror->lock); 2569 + ff_layout_encode_io_latency(xdr, 2570 + &dss_info->read_stat.io_stat); 2837 2571 /* ff_io_latency4 write */ 2838 - ff_layout_encode_io_latency(xdr, &mirror->write_stat.io_stat); 2839 - spin_unlock(&mirror->lock); 2572 + ff_layout_encode_io_latency(xdr, 2573 + &dss_info->write_stat.io_stat); 2574 + spin_unlock(&dss_info->mirror->lock); 2840 2575 /* nfstime4 */ 2841 - ff_layout_encode_nfstime(xdr, ktime_sub(ktime_get(), mirror->start_time)); 2576 + ff_layout_encode_nfstime(xdr, 2577 + ktime_sub(ktime_get(), 2578 + dss_info->start_time)); 2842 2579 /* bool */ 2843 2580 p = xdr_reserve_space(xdr, 4); 2844 2581 *p = cpu_to_be32(false); ··· 2866 2595 static void 2867 2596 ff_layout_free_layoutstats(struct nfs4_xdr_opaque_data *opaque) 2868 2597 { 2869 - struct nfs4_ff_layout_mirror *mirror = opaque->data; 2598 + struct nfs4_ff_layout_ds_stripe *dss_info = opaque->data; 2599 + struct nfs4_ff_layout_mirror *mirror = dss_info->mirror; 2870 2600 2871 2601 ff_layout_put_mirror(mirror); 2872 2602 } ··· 2884 2612 { 2885 2613 struct nfs4_flexfile_layout *ff_layout = FF_LAYOUT_FROM_HDR(lo); 2886 2614 struct nfs4_ff_layout_mirror *mirror; 2615 + struct nfs4_ff_layout_ds_stripe *dss_info; 2887 2616 struct nfs4_deviceid_node *dev; 2888 - int i = 0; 2617 + int i = 0, dss_id; 2889 2618 2890 2619 list_for_each_entry(mirror, &ff_layout->mirrors, mirrors) { 2891 - if (i >= dev_limit) 2892 - break; 2893 - if (IS_ERR_OR_NULL(mirror->mirror_ds)) 2894 - continue; 2895 - if (!test_and_clear_bit(NFS4_FF_MIRROR_STAT_AVAIL, 2896 - &mirror->flags) && 2897 - type != NFS4_FF_OP_LAYOUTRETURN) 2898 - continue; 2899 - /* mirror refcount put in cleanup_layoutstats */ 2900 - if (!refcount_inc_not_zero(&mirror->ref)) 2901 - continue; 2902 - dev = &mirror->mirror_ds->id_node; 2903 - memcpy(&devinfo->dev_id, &dev->deviceid, NFS4_DEVICEID4_SIZE); 2904 - devinfo->offset = 0; 2905 - devinfo->length = NFS4_MAX_UINT64; 2906 - spin_lock(&mirror->lock); 2907 - devinfo->read_count = mirror->read_stat.io_stat.ops_completed; 2908 - devinfo->read_bytes = mirror->read_stat.io_stat.bytes_completed; 2909 - devinfo->write_count = mirror->write_stat.io_stat.ops_completed; 2910 - devinfo->write_bytes = mirror->write_stat.io_stat.bytes_completed; 2911 - spin_unlock(&mirror->lock); 2912 - devinfo->layout_type = LAYOUT_FLEX_FILES; 2913 - devinfo->ld_private.ops = &layoutstat_ops; 2914 - devinfo->ld_private.data = mirror; 2620 + for (dss_id = 0; dss_id < mirror->dss_count; ++dss_id) { 2621 + dss_info = &mirror->dss[dss_id]; 2622 + if (i >= dev_limit) 2623 + break; 2624 + if (IS_ERR_OR_NULL(dss_info->mirror_ds)) 2625 + continue; 2626 + if (!test_and_clear_bit(NFS4_FF_MIRROR_STAT_AVAIL, 2627 + &mirror->flags) && 2628 + type != NFS4_FF_OP_LAYOUTRETURN) 2629 + continue; 2630 + /* mirror refcount put in cleanup_layoutstats */ 2631 + if (!refcount_inc_not_zero(&mirror->ref)) 2632 + continue; 2633 + dev = &dss_info->mirror_ds->id_node; 2634 + memcpy(&devinfo->dev_id, 2635 + &dev->deviceid, 2636 + NFS4_DEVICEID4_SIZE); 2637 + devinfo->offset = 0; 2638 + devinfo->length = NFS4_MAX_UINT64; 2639 + spin_lock(&mirror->lock); 2640 + devinfo->read_count = 2641 + dss_info->read_stat.io_stat.ops_completed; 2642 + devinfo->read_bytes = 2643 + dss_info->read_stat.io_stat.bytes_completed; 2644 + devinfo->write_count = 2645 + dss_info->write_stat.io_stat.ops_completed; 2646 + devinfo->write_bytes = 2647 + dss_info->write_stat.io_stat.bytes_completed; 2648 + spin_unlock(&mirror->lock); 2649 + devinfo->layout_type = LAYOUT_FLEX_FILES; 2650 + devinfo->ld_private.ops = &layoutstat_ops; 2651 + devinfo->ld_private.data = &mirror->dss[dss_id]; 2915 2652 2916 - devinfo++; 2917 - i++; 2653 + devinfo++; 2654 + i++; 2655 + } 2918 2656 } 2919 2657 return i; 2920 2658 }
+45 -19
fs/nfs/flexfilelayout/flexfilelayout.h
··· 21 21 * due to network error etc. */ 22 22 #define NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT 4096 23 23 24 + #define NFS4_FLEXFILE_LAYOUT_MAX_STRIPE_CNT 4096 25 + 24 26 /* LAYOUTSTATS report interval in ms */ 25 27 #define FF_LAYOUTSTATS_REPORT_INTERVAL (60000L) 26 28 #define FF_LAYOUTSTATS_MAXDEV 4 ··· 73 71 struct nfs4_ff_busy_timer busy_timer; 74 72 }; 75 73 76 - struct nfs4_ff_layout_mirror { 77 - struct pnfs_layout_hdr *layout; 78 - struct list_head mirrors; 79 - u32 ds_count; 80 - u32 efficiency; 74 + struct nfs4_ff_layout_mirror; 75 + 76 + struct nfs4_ff_layout_ds_stripe { 77 + struct nfs4_ff_layout_mirror *mirror; 81 78 struct nfs4_deviceid devid; 79 + u32 efficiency; 82 80 struct nfs4_ff_layout_ds *mirror_ds; 83 81 u32 fh_versions_cnt; 84 82 struct nfs_fh *fh_versions; ··· 86 84 const struct cred __rcu *ro_cred; 87 85 const struct cred __rcu *rw_cred; 88 86 struct nfs_file_localio nfl; 89 - refcount_t ref; 90 - spinlock_t lock; 91 - unsigned long flags; 92 87 struct nfs4_ff_layoutstat read_stat; 93 88 struct nfs4_ff_layoutstat write_stat; 94 89 ktime_t start_time; 90 + }; 91 + 92 + struct nfs4_ff_layout_mirror { 93 + struct pnfs_layout_hdr *layout; 94 + struct list_head mirrors; 95 + u32 dss_count; 96 + struct nfs4_ff_layout_ds_stripe *dss; 97 + refcount_t ref; 98 + spinlock_t lock; 99 + unsigned long flags; 95 100 u32 report_interval; 96 101 }; 97 102 ··· 159 150 } 160 151 161 152 static inline struct nfs4_deviceid_node * 162 - FF_LAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg, u32 idx) 153 + FF_LAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg, u32 idx, u32 dss_id) 163 154 { 164 155 struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, idx); 165 156 166 157 if (mirror != NULL) { 167 - struct nfs4_ff_layout_ds *mirror_ds = mirror->mirror_ds; 158 + struct nfs4_ff_layout_ds *mirror_ds = mirror->dss[dss_id].mirror_ds; 168 159 169 160 if (!IS_ERR_OR_NULL(mirror_ds)) 170 161 return &mirror_ds->id_node; ··· 191 182 } 192 183 193 184 static inline int 194 - nfs4_ff_layout_ds_version(const struct nfs4_ff_layout_mirror *mirror) 185 + nfs4_ff_layout_ds_version(const struct nfs4_ff_layout_mirror *mirror, u32 dss_id) 195 186 { 196 - return mirror->mirror_ds->ds_versions[0].version; 187 + return mirror->dss[dss_id].mirror_ds->ds_versions[0].version; 188 + } 189 + 190 + static inline u32 191 + nfs4_ff_layout_calc_dss_id(const u64 stripe_unit, const u32 dss_count, const loff_t offset) 192 + { 193 + u64 tmp = offset; 194 + 195 + if (dss_count == 1 || stripe_unit == 0) 196 + return 0; 197 + 198 + do_div(tmp, stripe_unit); 199 + 200 + return do_div(tmp, dss_count); 197 201 } 198 202 199 203 struct nfs4_ff_layout_ds * ··· 215 193 void nfs4_ff_layout_put_deviceid(struct nfs4_ff_layout_ds *mirror_ds); 216 194 void nfs4_ff_layout_free_deviceid(struct nfs4_ff_layout_ds *mirror_ds); 217 195 int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo, 218 - struct nfs4_ff_layout_mirror *mirror, u64 offset, 219 - u64 length, int status, enum nfs_opnum4 opnum, 220 - gfp_t gfp_flags); 196 + struct nfs4_ff_layout_mirror *mirror, 197 + u32 dss_id, u64 offset, u64 length, int status, 198 + enum nfs_opnum4 opnum, gfp_t gfp_flags); 221 199 void ff_layout_send_layouterror(struct pnfs_layout_segment *lseg); 222 200 int ff_layout_encode_ds_ioerr(struct xdr_stream *xdr, const struct list_head *head); 223 201 void ff_layout_free_ds_ioerr(struct list_head *head); ··· 226 204 struct list_head *head, 227 205 unsigned int maxnum); 228 206 struct nfs_fh * 229 - nfs4_ff_layout_select_ds_fh(struct nfs4_ff_layout_mirror *mirror); 207 + nfs4_ff_layout_select_ds_fh(struct nfs4_ff_layout_mirror *mirror, u32 dss_id); 230 208 void 231 209 nfs4_ff_layout_select_ds_stateid(const struct nfs4_ff_layout_mirror *mirror, 232 - nfs4_stateid *stateid); 210 + u32 dss_id, 211 + nfs4_stateid *stateid); 233 212 234 213 struct nfs4_pnfs_ds * 235 214 nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, 236 215 struct nfs4_ff_layout_mirror *mirror, 216 + u32 dss_id, 237 217 bool fail_return); 238 218 239 219 struct rpc_clnt * 240 220 nfs4_ff_find_or_create_ds_client(struct nfs4_ff_layout_mirror *mirror, 241 221 struct nfs_client *ds_clp, 242 - struct inode *inode); 222 + struct inode *inode, 223 + u32 dss_id); 243 224 const struct cred *ff_layout_get_ds_cred(struct nfs4_ff_layout_mirror *mirror, 244 225 const struct pnfs_layout_range *range, 245 - const struct cred *mdscred); 226 + const struct cred *mdscred, 227 + u32 dss_id); 246 228 bool ff_layout_avoid_mds_available_ds(struct pnfs_layout_segment *lseg); 247 229 bool ff_layout_avoid_read_on_rw(struct pnfs_layout_segment *lseg); 248 230
+64 -51
fs/nfs/flexfilelayout/flexfilelayoutdev.c
··· 44 44 { 45 45 struct xdr_stream stream; 46 46 struct xdr_buf buf; 47 - struct page *scratch; 47 + struct folio *scratch; 48 48 struct list_head dsaddrs; 49 49 struct nfs4_pnfs_ds_addr *da; 50 50 struct nfs4_ff_layout_ds *new_ds = NULL; ··· 56 56 int i, ret = -ENOMEM; 57 57 58 58 /* set up xdr stream */ 59 - scratch = alloc_page(gfp_flags); 59 + scratch = folio_alloc(gfp_flags, 0); 60 60 if (!scratch) 61 61 goto out_err; 62 62 ··· 70 70 INIT_LIST_HEAD(&dsaddrs); 71 71 72 72 xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen); 73 - xdr_set_scratch_page(&stream, scratch); 73 + xdr_set_scratch_folio(&stream, scratch); 74 74 75 75 /* multipath count */ 76 76 p = xdr_inline_decode(&stream, 4); ··· 163 163 kfree(da); 164 164 } 165 165 166 - __free_page(scratch); 166 + folio_put(scratch); 167 167 return new_ds; 168 168 169 169 out_err_drain_dsaddrs: ··· 177 177 178 178 kfree(ds_versions); 179 179 out_scratch: 180 - __free_page(scratch); 180 + folio_put(scratch); 181 181 out_err: 182 182 kfree(new_ds); 183 183 ··· 250 250 } 251 251 252 252 int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo, 253 - struct nfs4_ff_layout_mirror *mirror, u64 offset, 254 - u64 length, int status, enum nfs_opnum4 opnum, 255 - gfp_t gfp_flags) 253 + struct nfs4_ff_layout_mirror *mirror, 254 + u32 dss_id, u64 offset, u64 length, int status, 255 + enum nfs_opnum4 opnum, gfp_t gfp_flags) 256 256 { 257 257 struct nfs4_ff_layout_ds_err *dserr; 258 258 259 259 if (status == 0) 260 260 return 0; 261 261 262 - if (IS_ERR_OR_NULL(mirror->mirror_ds)) 262 + if (IS_ERR_OR_NULL(mirror->dss[dss_id].mirror_ds)) 263 263 return -EINVAL; 264 264 265 265 dserr = kmalloc(sizeof(*dserr), gfp_flags); ··· 271 271 dserr->length = length; 272 272 dserr->status = status; 273 273 dserr->opnum = opnum; 274 - nfs4_stateid_copy(&dserr->stateid, &mirror->stateid); 275 - memcpy(&dserr->deviceid, &mirror->mirror_ds->id_node.deviceid, 274 + nfs4_stateid_copy(&dserr->stateid, &mirror->dss[dss_id].stateid); 275 + memcpy(&dserr->deviceid, &mirror->dss[dss_id].mirror_ds->id_node.deviceid, 276 276 NFS4_DEVICEID4_SIZE); 277 277 278 278 spin_lock(&flo->generic_hdr.plh_inode->i_lock); ··· 282 282 } 283 283 284 284 static const struct cred * 285 - ff_layout_get_mirror_cred(struct nfs4_ff_layout_mirror *mirror, u32 iomode) 285 + ff_layout_get_mirror_cred(struct nfs4_ff_layout_mirror *mirror, u32 iomode, u32 dss_id) 286 286 { 287 287 const struct cred *cred, __rcu **pcred; 288 288 289 289 if (iomode == IOMODE_READ) 290 - pcred = &mirror->ro_cred; 290 + pcred = &mirror->dss[dss_id].ro_cred; 291 291 else 292 - pcred = &mirror->rw_cred; 292 + pcred = &mirror->dss[dss_id].rw_cred; 293 293 294 294 rcu_read_lock(); 295 295 do { ··· 304 304 } 305 305 306 306 struct nfs_fh * 307 - nfs4_ff_layout_select_ds_fh(struct nfs4_ff_layout_mirror *mirror) 307 + nfs4_ff_layout_select_ds_fh(struct nfs4_ff_layout_mirror *mirror, u32 dss_id) 308 308 { 309 309 /* FIXME: For now assume there is only 1 version available for the DS */ 310 - return &mirror->fh_versions[0]; 310 + return &mirror->dss[dss_id].fh_versions[0]; 311 311 } 312 312 313 313 void 314 314 nfs4_ff_layout_select_ds_stateid(const struct nfs4_ff_layout_mirror *mirror, 315 - nfs4_stateid *stateid) 315 + u32 dss_id, 316 + nfs4_stateid *stateid) 316 317 { 317 - if (nfs4_ff_layout_ds_version(mirror) == 4) 318 - nfs4_stateid_copy(stateid, &mirror->stateid); 318 + if (nfs4_ff_layout_ds_version(mirror, dss_id) == 4) 319 + nfs4_stateid_copy(stateid, &mirror->dss[dss_id].stateid); 319 320 } 320 321 321 322 static bool 322 323 ff_layout_init_mirror_ds(struct pnfs_layout_hdr *lo, 323 - struct nfs4_ff_layout_mirror *mirror) 324 + struct nfs4_ff_layout_mirror *mirror, 325 + u32 dss_id) 324 326 { 325 327 if (mirror == NULL) 326 328 goto outerr; 327 - if (mirror->mirror_ds == NULL) { 329 + if (mirror->dss[dss_id].mirror_ds == NULL) { 328 330 struct nfs4_deviceid_node *node; 329 331 struct nfs4_ff_layout_ds *mirror_ds = ERR_PTR(-ENODEV); 330 332 331 333 node = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), 332 - &mirror->devid, lo->plh_lc_cred, 334 + &mirror->dss[dss_id].devid, lo->plh_lc_cred, 333 335 GFP_KERNEL); 334 336 if (node) 335 337 mirror_ds = FF_LAYOUT_MIRROR_DS(node); 336 338 337 339 /* check for race with another call to this function */ 338 - if (cmpxchg(&mirror->mirror_ds, NULL, mirror_ds) && 340 + if (cmpxchg(&mirror->dss[dss_id].mirror_ds, NULL, mirror_ds) && 339 341 mirror_ds != ERR_PTR(-ENODEV)) 340 342 nfs4_put_deviceid_node(node); 341 343 } 342 344 343 - if (IS_ERR(mirror->mirror_ds)) 345 + if (IS_ERR(mirror->dss[dss_id].mirror_ds)) 344 346 goto outerr; 345 347 346 348 return true; ··· 354 352 * nfs4_ff_layout_prepare_ds - prepare a DS connection for an RPC call 355 353 * @lseg: the layout segment we're operating on 356 354 * @mirror: layout mirror describing the DS to use 355 + * @dss_id: DS stripe id to select stripe to use 357 356 * @fail_return: return layout on connect failure? 358 357 * 359 358 * Try to prepare a DS connection to accept an RPC call. This involves ··· 371 368 struct nfs4_pnfs_ds * 372 369 nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, 373 370 struct nfs4_ff_layout_mirror *mirror, 371 + u32 dss_id, 374 372 bool fail_return) 375 373 { 376 374 struct nfs4_pnfs_ds *ds; ··· 380 376 unsigned int max_payload; 381 377 int status = -EAGAIN; 382 378 383 - if (!ff_layout_init_mirror_ds(lseg->pls_layout, mirror)) 379 + if (!ff_layout_init_mirror_ds(lseg->pls_layout, mirror, dss_id)) 384 380 goto noconnect; 385 381 386 - ds = mirror->mirror_ds->ds; 382 + ds = mirror->dss[dss_id].mirror_ds->ds; 387 383 if (READ_ONCE(ds->ds_clp)) 388 384 goto out; 389 385 /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */ ··· 392 388 /* FIXME: For now we assume the server sent only one version of NFS 393 389 * to use for the DS. 394 390 */ 395 - status = nfs4_pnfs_ds_connect(s, ds, &mirror->mirror_ds->id_node, 391 + status = nfs4_pnfs_ds_connect(s, ds, &mirror->dss[dss_id].mirror_ds->id_node, 396 392 dataserver_timeo, dataserver_retrans, 397 - mirror->mirror_ds->ds_versions[0].version, 398 - mirror->mirror_ds->ds_versions[0].minor_version); 393 + mirror->dss[dss_id].mirror_ds->ds_versions[0].version, 394 + mirror->dss[dss_id].mirror_ds->ds_versions[0].minor_version); 399 395 400 396 /* connect success, check rsize/wsize limit */ 401 397 if (!status) { ··· 408 404 max_payload = 409 405 nfs_block_size(rpc_max_payload(ds->ds_clp->cl_rpcclient), 410 406 NULL); 411 - if (mirror->mirror_ds->ds_versions[0].rsize > max_payload) 412 - mirror->mirror_ds->ds_versions[0].rsize = max_payload; 413 - if (mirror->mirror_ds->ds_versions[0].wsize > max_payload) 414 - mirror->mirror_ds->ds_versions[0].wsize = max_payload; 407 + if (mirror->dss[dss_id].mirror_ds->ds_versions[0].rsize > max_payload) 408 + mirror->dss[dss_id].mirror_ds->ds_versions[0].rsize = max_payload; 409 + if (mirror->dss[dss_id].mirror_ds->ds_versions[0].wsize > max_payload) 410 + mirror->dss[dss_id].mirror_ds->ds_versions[0].wsize = max_payload; 415 411 goto out; 416 412 } 417 413 noconnect: 418 414 ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout), 419 - mirror, lseg->pls_range.offset, 415 + mirror, dss_id, lseg->pls_range.offset, 420 416 lseg->pls_range.length, NFS4ERR_NXIO, 421 417 OP_ILLEGAL, GFP_NOIO); 422 418 ff_layout_send_layouterror(lseg); ··· 430 426 const struct cred * 431 427 ff_layout_get_ds_cred(struct nfs4_ff_layout_mirror *mirror, 432 428 const struct pnfs_layout_range *range, 433 - const struct cred *mdscred) 429 + const struct cred *mdscred, 430 + u32 dss_id) 434 431 { 435 432 const struct cred *cred; 436 433 437 - if (mirror && !mirror->mirror_ds->ds_versions[0].tightly_coupled) { 438 - cred = ff_layout_get_mirror_cred(mirror, range->iomode); 434 + if (mirror && !mirror->dss[dss_id].mirror_ds->ds_versions[0].tightly_coupled) { 435 + cred = ff_layout_get_mirror_cred(mirror, range->iomode, dss_id); 439 436 if (!cred) 440 437 cred = get_cred(mdscred); 441 438 } else { ··· 450 445 * @mirror: pointer to the mirror 451 446 * @ds_clp: nfs_client for the DS 452 447 * @inode: pointer to inode 448 + * @dss_id: DS stripe id 453 449 * 454 450 * Find or create a DS rpc client with th MDS server rpc client auth flavor 455 451 * in the nfs_client cl_ds_clients list. 456 452 */ 457 453 struct rpc_clnt * 458 454 nfs4_ff_find_or_create_ds_client(struct nfs4_ff_layout_mirror *mirror, 459 - struct nfs_client *ds_clp, struct inode *inode) 455 + struct nfs_client *ds_clp, struct inode *inode, 456 + u32 dss_id) 460 457 { 461 - switch (mirror->mirror_ds->ds_versions[0].version) { 458 + switch (mirror->dss[dss_id].mirror_ds->ds_versions[0].version) { 462 459 case 3: 463 460 /* For NFSv3 DS, flavor is set when creating DS connections */ 464 461 return ds_clp->cl_rpcclient; ··· 566 559 { 567 560 struct nfs4_ff_layout_mirror *mirror; 568 561 struct nfs4_deviceid_node *devid; 569 - u32 idx; 562 + u32 idx, dss_id; 570 563 571 564 for (idx = 0; idx < FF_LAYOUT_MIRROR_COUNT(lseg); idx++) { 572 565 mirror = FF_LAYOUT_COMP(lseg, idx); 573 - if (mirror) { 574 - if (!mirror->mirror_ds) 566 + if (!mirror) 567 + continue; 568 + for (dss_id = 0; dss_id < mirror->dss_count; dss_id++) { 569 + if (!mirror->dss[dss_id].mirror_ds) 575 570 return true; 576 - if (IS_ERR(mirror->mirror_ds)) 571 + if (IS_ERR(mirror->dss[dss_id].mirror_ds)) 577 572 continue; 578 - devid = &mirror->mirror_ds->id_node; 573 + devid = &mirror->dss[dss_id].mirror_ds->id_node; 579 574 if (!nfs4_test_deviceid_unavailable(devid)) 580 575 return true; 581 576 } ··· 590 581 { 591 582 struct nfs4_ff_layout_mirror *mirror; 592 583 struct nfs4_deviceid_node *devid; 593 - u32 idx; 584 + u32 idx, dss_id; 594 585 595 586 for (idx = 0; idx < FF_LAYOUT_MIRROR_COUNT(lseg); idx++) { 596 587 mirror = FF_LAYOUT_COMP(lseg, idx); 597 - if (!mirror || IS_ERR(mirror->mirror_ds)) 588 + if (!mirror) 598 589 return false; 599 - if (!mirror->mirror_ds) 600 - continue; 601 - devid = &mirror->mirror_ds->id_node; 602 - if (nfs4_test_deviceid_unavailable(devid)) 603 - return false; 590 + for (dss_id = 0; dss_id < mirror->dss_count; dss_id++) { 591 + if (IS_ERR(mirror->dss[dss_id].mirror_ds)) 592 + return false; 593 + if (!mirror->dss[dss_id].mirror_ds) 594 + continue; 595 + devid = &mirror->dss[dss_id].mirror_ds->id_node; 596 + if (nfs4_test_deviceid_unavailable(devid)) 597 + return false; 598 + } 604 599 } 605 600 606 601 return FF_LAYOUT_MIRROR_COUNT(lseg) != 0;
+15
fs/nfs/inode.c
··· 1073 1073 if (S_ISDIR(inode->i_mode)) 1074 1074 stat->blksize = NFS_SERVER(inode)->dtsize; 1075 1075 stat->btime = NFS_I(inode)->btime; 1076 + 1077 + /* Special handling for STATX_DIOALIGN and STATX_DIO_READ_ALIGN 1078 + * - NFS doesn't have DIO alignment constraints, avoid getting 1079 + * these DIO attrs from remote and just respond with most 1080 + * accommodating limits (so client will issue supported DIO). 1081 + * - this is unintuitive, but the most coarse-grained 1082 + * dio_offset_align is the most accommodating. 1083 + */ 1084 + if ((request_mask & (STATX_DIOALIGN | STATX_DIO_READ_ALIGN)) && 1085 + S_ISREG(inode->i_mode)) { 1086 + stat->result_mask |= STATX_DIOALIGN | STATX_DIO_READ_ALIGN; 1087 + stat->dio_mem_align = 4; /* 4-byte alignment */ 1088 + stat->dio_offset_align = PAGE_SIZE; 1089 + stat->dio_read_offset_align = stat->dio_offset_align; 1090 + } 1076 1091 out: 1077 1092 trace_nfs_getattr_exit(inode, err); 1078 1093 return err;
+10
fs/nfs/internal.h
··· 456 456 457 457 #if IS_ENABLED(CONFIG_NFS_LOCALIO) 458 458 /* localio.c */ 459 + struct nfs_local_dio { 460 + u32 mem_align; 461 + u32 offset_align; 462 + loff_t middle_offset; 463 + loff_t end_offset; 464 + ssize_t start_len; /* Length for misaligned first extent */ 465 + ssize_t middle_len; /* Length for DIO-aligned middle extent */ 466 + ssize_t end_len; /* Length for misaligned last extent */ 467 + }; 468 + 459 469 extern void nfs_local_probe_async(struct nfs_client *); 460 470 extern void nfs_local_probe_async_work(struct work_struct *); 461 471 extern struct nfsd_file *nfs_local_open_fh(struct nfs_client *,
+307 -96
fs/nfs/localio.c
··· 30 30 31 31 #define NFSDBG_FACILITY NFSDBG_VFS 32 32 33 + #define NFSLOCAL_MAX_IOS 3 34 + 33 35 struct nfs_local_kiocb { 34 36 struct kiocb kiocb; 35 37 struct bio_vec *bvec; ··· 39 37 struct work_struct work; 40 38 void (*aio_complete_work)(struct work_struct *); 41 39 struct nfsd_file *localio; 40 + /* Begin mostly DIO-specific members */ 41 + size_t end_len; 42 + short int end_iter_index; 43 + short int n_iters; 44 + bool iter_is_dio_aligned[NFSLOCAL_MAX_IOS]; 45 + loff_t offset[NFSLOCAL_MAX_IOS] ____cacheline_aligned; 46 + struct iov_iter iters[NFSLOCAL_MAX_IOS]; 47 + /* End mostly DIO-specific members */ 42 48 }; 43 49 44 50 struct nfs_local_fsync_ctx { ··· 58 48 59 49 static bool localio_enabled __read_mostly = true; 60 50 module_param(localio_enabled, bool, 0644); 61 - 62 - static bool localio_O_DIRECT_semantics __read_mostly = false; 63 - module_param(localio_O_DIRECT_semantics, bool, 0644); 64 - MODULE_PARM_DESC(localio_O_DIRECT_semantics, 65 - "LOCALIO will use O_DIRECT semantics to filesystem."); 66 51 67 52 static inline bool nfs_client_is_local(const struct nfs_client *clp) 68 53 { ··· 236 231 struct nfsd_file __rcu **pnf, 237 232 const fmode_t mode) 238 233 { 234 + int status = 0; 239 235 struct nfsd_file *localio; 240 236 241 237 localio = nfs_open_local_fh(&clp->cl_uuid, clp->cl_rpcclient, 242 238 cred, fh, nfl, pnf, mode); 243 239 if (IS_ERR(localio)) { 244 - int status = PTR_ERR(localio); 245 - trace_nfs_local_open_fh(fh, mode, status); 240 + status = PTR_ERR(localio); 246 241 switch (status) { 247 242 case -ENOMEM: 248 243 case -ENXIO: ··· 252 247 nfs_local_probe(clp); 253 248 } 254 249 } 250 + trace_nfs_local_open_fh(fh, mode, status); 255 251 return localio; 256 252 } 257 253 ··· 287 281 } 288 282 EXPORT_SYMBOL_GPL(nfs_local_open_fh); 289 283 290 - static struct bio_vec * 291 - nfs_bvec_alloc_and_import_pagevec(struct page **pagevec, 292 - unsigned int npages, gfp_t flags) 293 - { 294 - struct bio_vec *bvec, *p; 295 - 296 - bvec = kmalloc_array(npages, sizeof(*bvec), flags); 297 - if (bvec != NULL) { 298 - for (p = bvec; npages > 0; p++, pagevec++, npages--) { 299 - p->bv_page = *pagevec; 300 - p->bv_len = PAGE_SIZE; 301 - p->bv_offset = 0; 302 - } 303 - } 304 - return bvec; 305 - } 306 - 307 284 static void 308 285 nfs_local_iocb_free(struct nfs_local_kiocb *iocb) 309 286 { ··· 300 311 { 301 312 struct nfs_local_kiocb *iocb; 302 313 303 - iocb = kmalloc(sizeof(*iocb), flags); 314 + iocb = kzalloc(sizeof(*iocb), flags); 304 315 if (iocb == NULL) 305 316 return NULL; 306 - iocb->bvec = nfs_bvec_alloc_and_import_pagevec(hdr->page_array.pagevec, 307 - hdr->page_array.npages, flags); 317 + 318 + iocb->bvec = kmalloc_array(hdr->page_array.npages, 319 + sizeof(struct bio_vec), flags); 308 320 if (iocb->bvec == NULL) { 309 321 kfree(iocb); 310 322 return NULL; 311 323 } 312 324 313 - if (localio_O_DIRECT_semantics && 314 - test_bit(NFS_IOHDR_ODIRECT, &hdr->flags)) { 315 - iocb->kiocb.ki_filp = file; 316 - iocb->kiocb.ki_flags = IOCB_DIRECT; 317 - } else 318 - init_sync_kiocb(&iocb->kiocb, file); 325 + init_sync_kiocb(&iocb->kiocb, file); 319 326 320 - iocb->kiocb.ki_pos = hdr->args.offset; 321 327 iocb->hdr = hdr; 322 328 iocb->kiocb.ki_flags &= ~IOCB_APPEND; 323 329 iocb->aio_complete_work = NULL; 324 330 331 + iocb->end_iter_index = -1; 332 + 325 333 return iocb; 326 334 } 327 335 328 - static void 329 - nfs_local_iter_init(struct iov_iter *i, struct nfs_local_kiocb *iocb, int dir) 336 + static bool 337 + nfs_is_local_dio_possible(struct nfs_local_kiocb *iocb, int rw, 338 + size_t len, struct nfs_local_dio *local_dio) 330 339 { 331 340 struct nfs_pgio_header *hdr = iocb->hdr; 341 + loff_t offset = hdr->args.offset; 342 + u32 nf_dio_mem_align, nf_dio_offset_align, nf_dio_read_offset_align; 343 + loff_t start_end, orig_end, middle_end; 332 344 333 - iov_iter_bvec(i, dir, iocb->bvec, hdr->page_array.npages, 334 - hdr->args.count + hdr->args.pgbase); 335 - if (hdr->args.pgbase != 0) 336 - iov_iter_advance(i, hdr->args.pgbase); 345 + nfs_to->nfsd_file_dio_alignment(iocb->localio, &nf_dio_mem_align, 346 + &nf_dio_offset_align, &nf_dio_read_offset_align); 347 + if (rw == ITER_DEST) 348 + nf_dio_offset_align = nf_dio_read_offset_align; 349 + 350 + if (unlikely(!nf_dio_mem_align || !nf_dio_offset_align)) 351 + return false; 352 + if (unlikely(nf_dio_offset_align > PAGE_SIZE)) 353 + return false; 354 + if (unlikely(len < nf_dio_offset_align)) 355 + return false; 356 + 357 + local_dio->mem_align = nf_dio_mem_align; 358 + local_dio->offset_align = nf_dio_offset_align; 359 + 360 + start_end = round_up(offset, nf_dio_offset_align); 361 + orig_end = offset + len; 362 + middle_end = round_down(orig_end, nf_dio_offset_align); 363 + 364 + local_dio->middle_offset = start_end; 365 + local_dio->end_offset = middle_end; 366 + 367 + local_dio->start_len = start_end - offset; 368 + local_dio->middle_len = middle_end - start_end; 369 + local_dio->end_len = orig_end - middle_end; 370 + 371 + if (rw == ITER_DEST) 372 + trace_nfs_local_dio_read(hdr->inode, offset, len, local_dio); 373 + else 374 + trace_nfs_local_dio_write(hdr->inode, offset, len, local_dio); 375 + return true; 376 + } 377 + 378 + static bool nfs_iov_iter_aligned_bvec(const struct iov_iter *i, 379 + unsigned int addr_mask, unsigned int len_mask) 380 + { 381 + const struct bio_vec *bvec = i->bvec; 382 + size_t skip = i->iov_offset; 383 + size_t size = i->count; 384 + 385 + if (size & len_mask) 386 + return false; 387 + do { 388 + size_t len = bvec->bv_len; 389 + 390 + if (len > size) 391 + len = size; 392 + if ((unsigned long)(bvec->bv_offset + skip) & addr_mask) 393 + return false; 394 + bvec++; 395 + size -= len; 396 + skip = 0; 397 + } while (size); 398 + 399 + return true; 400 + } 401 + 402 + /* 403 + * Setup as many as 3 iov_iter based on extents described by @local_dio. 404 + * Returns the number of iov_iter that were setup. 405 + */ 406 + static int 407 + nfs_local_iters_setup_dio(struct nfs_local_kiocb *iocb, int rw, 408 + unsigned int nvecs, size_t len, 409 + struct nfs_local_dio *local_dio) 410 + { 411 + int n_iters = 0; 412 + struct iov_iter *iters = iocb->iters; 413 + 414 + /* Setup misaligned start? */ 415 + if (local_dio->start_len) { 416 + iov_iter_bvec(&iters[n_iters], rw, iocb->bvec, nvecs, len); 417 + iters[n_iters].count = local_dio->start_len; 418 + iocb->offset[n_iters] = iocb->hdr->args.offset; 419 + iocb->iter_is_dio_aligned[n_iters] = false; 420 + ++n_iters; 421 + } 422 + 423 + /* Setup misaligned end? 424 + * If so, the end is purposely setup to be issued using buffered IO 425 + * before the middle (which will use DIO, if DIO-aligned, with AIO). 426 + * This creates problems if/when the end results in a partial write. 427 + * So must save index and length of end to handle this corner case. 428 + */ 429 + if (local_dio->end_len) { 430 + iov_iter_bvec(&iters[n_iters], rw, iocb->bvec, nvecs, len); 431 + iocb->offset[n_iters] = local_dio->end_offset; 432 + iov_iter_advance(&iters[n_iters], 433 + local_dio->start_len + local_dio->middle_len); 434 + iocb->iter_is_dio_aligned[n_iters] = false; 435 + /* Save index and length of end */ 436 + iocb->end_iter_index = n_iters; 437 + iocb->end_len = local_dio->end_len; 438 + ++n_iters; 439 + } 440 + 441 + /* Setup DIO-aligned middle to be issued last, to allow for 442 + * DIO with AIO completion (see nfs_local_call_{read,write}). 443 + */ 444 + iov_iter_bvec(&iters[n_iters], rw, iocb->bvec, nvecs, len); 445 + if (local_dio->start_len) 446 + iov_iter_advance(&iters[n_iters], local_dio->start_len); 447 + iters[n_iters].count -= local_dio->end_len; 448 + iocb->offset[n_iters] = local_dio->middle_offset; 449 + 450 + iocb->iter_is_dio_aligned[n_iters] = 451 + nfs_iov_iter_aligned_bvec(&iters[n_iters], 452 + local_dio->mem_align-1, local_dio->offset_align-1); 453 + 454 + if (unlikely(!iocb->iter_is_dio_aligned[n_iters])) { 455 + trace_nfs_local_dio_misaligned(iocb->hdr->inode, 456 + iocb->hdr->args.offset, len, local_dio); 457 + return 0; /* no DIO-aligned IO possible */ 458 + } 459 + ++n_iters; 460 + 461 + iocb->n_iters = n_iters; 462 + return n_iters; 463 + } 464 + 465 + static noinline_for_stack void 466 + nfs_local_iters_init(struct nfs_local_kiocb *iocb, int rw) 467 + { 468 + struct nfs_pgio_header *hdr = iocb->hdr; 469 + struct page **pagevec = hdr->page_array.pagevec; 470 + unsigned long v, total; 471 + unsigned int base; 472 + size_t len; 473 + 474 + v = 0; 475 + total = hdr->args.count; 476 + base = hdr->args.pgbase; 477 + while (total && v < hdr->page_array.npages) { 478 + len = min_t(size_t, total, PAGE_SIZE - base); 479 + bvec_set_page(&iocb->bvec[v], *pagevec, len, base); 480 + total -= len; 481 + ++pagevec; 482 + ++v; 483 + base = 0; 484 + } 485 + len = hdr->args.count - total; 486 + 487 + if (test_bit(NFS_IOHDR_ODIRECT, &hdr->flags)) { 488 + struct nfs_local_dio local_dio; 489 + 490 + if (nfs_is_local_dio_possible(iocb, rw, len, &local_dio) && 491 + nfs_local_iters_setup_dio(iocb, rw, v, len, &local_dio) != 0) 492 + return; /* is DIO-aligned */ 493 + } 494 + 495 + /* Use buffered IO */ 496 + iocb->offset[0] = hdr->args.offset; 497 + iov_iter_bvec(&iocb->iters[0], rw, iocb->bvec, v, len); 498 + iocb->n_iters = 1; 337 499 } 338 500 339 501 static void ··· 507 367 static void 508 368 nfs_local_pgio_done(struct nfs_pgio_header *hdr, long status) 509 369 { 370 + /* Must handle partial completions */ 510 371 if (status >= 0) { 511 - hdr->res.count = status; 512 - hdr->res.op_status = NFS4_OK; 513 - hdr->task.tk_status = 0; 372 + hdr->res.count += status; 373 + /* @hdr was initialized to 0 (zeroed during allocation) */ 374 + if (hdr->task.tk_status == 0) 375 + hdr->res.op_status = NFS4_OK; 514 376 } else { 515 377 hdr->res.op_status = nfs_localio_errno_to_nfs4_stat(status); 516 378 hdr->task.tk_status = status; ··· 520 378 } 521 379 522 380 static void 381 + nfs_local_iocb_release(struct nfs_local_kiocb *iocb) 382 + { 383 + nfs_local_file_put(iocb->localio); 384 + nfs_local_iocb_free(iocb); 385 + } 386 + 387 + static void 523 388 nfs_local_pgio_release(struct nfs_local_kiocb *iocb) 524 389 { 525 390 struct nfs_pgio_header *hdr = iocb->hdr; 526 391 527 - nfs_local_file_put(iocb->localio); 528 - nfs_local_iocb_free(iocb); 392 + nfs_local_iocb_release(iocb); 529 393 nfs_local_hdr_release(hdr, hdr->task.tk_ops); 530 394 } 531 395 ··· 553 405 struct nfs_pgio_header *hdr = iocb->hdr; 554 406 struct file *filp = iocb->kiocb.ki_filp; 555 407 556 - nfs_local_pgio_done(hdr, status); 408 + if ((iocb->kiocb.ki_flags & IOCB_DIRECT) && status == -EINVAL) { 409 + /* Underlying FS will return -EINVAL if misaligned DIO is attempted. */ 410 + pr_info_ratelimited("nfs: Unexpected direct I/O read alignment failure\n"); 411 + } 557 412 558 413 /* 559 414 * Must clear replen otherwise NFSv3 data corruption will occur ··· 585 434 struct nfs_local_kiocb *iocb = 586 435 container_of(kiocb, struct nfs_local_kiocb, kiocb); 587 436 437 + nfs_local_pgio_done(iocb->hdr, ret); 588 438 nfs_local_read_done(iocb, ret); 589 439 nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_read_aio_complete_work */ 590 440 } ··· 596 444 container_of(work, struct nfs_local_kiocb, work); 597 445 struct file *filp = iocb->kiocb.ki_filp; 598 446 const struct cred *save_cred; 599 - struct iov_iter iter; 600 447 ssize_t status; 601 448 602 449 save_cred = override_creds(filp->f_cred); 603 450 604 - nfs_local_iter_init(&iter, iocb, READ); 451 + for (int i = 0; i < iocb->n_iters ; i++) { 452 + if (iocb->iter_is_dio_aligned[i]) { 453 + iocb->kiocb.ki_flags |= IOCB_DIRECT; 454 + iocb->kiocb.ki_complete = nfs_local_read_aio_complete; 455 + iocb->aio_complete_work = nfs_local_read_aio_complete_work; 456 + } 605 457 606 - status = filp->f_op->read_iter(&iocb->kiocb, &iter); 458 + iocb->kiocb.ki_pos = iocb->offset[i]; 459 + status = filp->f_op->read_iter(&iocb->kiocb, &iocb->iters[i]); 460 + if (status != -EIOCBQUEUED) { 461 + nfs_local_pgio_done(iocb->hdr, status); 462 + if (iocb->hdr->task.tk_status) 463 + break; 464 + } 465 + } 607 466 608 467 revert_creds(save_cred); 609 468 ··· 625 462 } 626 463 627 464 static int 628 - nfs_do_local_read(struct nfs_pgio_header *hdr, 629 - struct nfsd_file *localio, 465 + nfs_local_do_read(struct nfs_local_kiocb *iocb, 630 466 const struct rpc_call_ops *call_ops) 631 467 { 632 - struct nfs_local_kiocb *iocb; 633 - struct file *file = nfs_to->nfsd_file_file(localio); 634 - 635 - /* Don't support filesystems without read_iter */ 636 - if (!file->f_op->read_iter) 637 - return -EAGAIN; 468 + struct nfs_pgio_header *hdr = iocb->hdr; 638 469 639 470 dprintk("%s: vfs_read count=%u pos=%llu\n", 640 471 __func__, hdr->args.count, hdr->args.offset); 641 472 642 - iocb = nfs_local_iocb_alloc(hdr, file, GFP_KERNEL); 643 - if (iocb == NULL) 644 - return -ENOMEM; 645 - iocb->localio = localio; 646 - 647 473 nfs_local_pgio_init(hdr, call_ops); 648 474 hdr->res.eof = false; 649 - 650 - if (iocb->kiocb.ki_flags & IOCB_DIRECT) { 651 - iocb->kiocb.ki_complete = nfs_local_read_aio_complete; 652 - iocb->aio_complete_work = nfs_local_read_aio_complete_work; 653 - } 654 475 655 476 INIT_WORK(&iocb->work, nfs_local_call_read); 656 477 queue_work(nfslocaliod_workqueue, &iocb->work); ··· 744 597 745 598 dprintk("%s: wrote %ld bytes.\n", __func__, status > 0 ? status : 0); 746 599 600 + if ((iocb->kiocb.ki_flags & IOCB_DIRECT) && status == -EINVAL) { 601 + /* Underlying FS will return -EINVAL if misaligned DIO is attempted. */ 602 + pr_info_ratelimited("nfs: Unexpected direct I/O write alignment failure\n"); 603 + } 604 + 747 605 /* Handle short writes as if they are ENOSPC */ 606 + status = hdr->res.count; 748 607 if (status > 0 && status < hdr->args.count) { 749 608 hdr->mds_offset += status; 750 609 hdr->args.offset += status; ··· 758 605 hdr->args.count -= status; 759 606 nfs_set_pgio_error(hdr, -ENOSPC, hdr->args.offset); 760 607 status = -ENOSPC; 608 + /* record -ENOSPC in terms of nfs_local_pgio_done */ 609 + nfs_local_pgio_done(hdr, status); 761 610 } 762 - if (status < 0) 611 + if (hdr->task.tk_status < 0) 763 612 nfs_reset_boot_verifier(inode); 764 - 765 - nfs_local_pgio_done(hdr, status); 766 613 } 767 614 768 615 static void nfs_local_write_aio_complete_work(struct work_struct *work) ··· 779 626 struct nfs_local_kiocb *iocb = 780 627 container_of(kiocb, struct nfs_local_kiocb, kiocb); 781 628 629 + nfs_local_pgio_done(iocb->hdr, ret); 782 630 nfs_local_write_done(iocb, ret); 783 631 nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_write_aio_complete_work */ 784 632 } ··· 791 637 struct file *filp = iocb->kiocb.ki_filp; 792 638 unsigned long old_flags = current->flags; 793 639 const struct cred *save_cred; 794 - struct iov_iter iter; 795 640 ssize_t status; 796 641 797 642 current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO; 798 643 save_cred = override_creds(filp->f_cred); 799 644 800 - nfs_local_iter_init(&iter, iocb, WRITE); 801 - 802 645 file_start_write(filp); 803 - status = filp->f_op->write_iter(&iocb->kiocb, &iter); 646 + for (int i = 0; i < iocb->n_iters ; i++) { 647 + if (iocb->iter_is_dio_aligned[i]) { 648 + iocb->kiocb.ki_flags |= IOCB_DIRECT; 649 + iocb->kiocb.ki_complete = nfs_local_write_aio_complete; 650 + iocb->aio_complete_work = nfs_local_write_aio_complete_work; 651 + } 652 + retry: 653 + iocb->kiocb.ki_pos = iocb->offset[i]; 654 + status = filp->f_op->write_iter(&iocb->kiocb, &iocb->iters[i]); 655 + if (status != -EIOCBQUEUED) { 656 + if (unlikely(status >= 0 && status < iocb->iters[i].count)) { 657 + /* partial write */ 658 + if (i == iocb->end_iter_index) { 659 + /* Must not account partial end, otherwise, due 660 + * to end being issued before middle: the partial 661 + * write accounting in nfs_local_write_done() 662 + * would incorrectly advance hdr->args.offset 663 + */ 664 + status = 0; 665 + } else { 666 + /* Partial write at start or buffered middle, 667 + * exit early. 668 + */ 669 + nfs_local_pgio_done(iocb->hdr, status); 670 + break; 671 + } 672 + } else if (unlikely(status == -ENOTBLK && 673 + (iocb->kiocb.ki_flags & IOCB_DIRECT))) { 674 + /* VFS will return -ENOTBLK if DIO WRITE fails to 675 + * invalidate the page cache. Retry using buffered IO. 676 + */ 677 + iocb->kiocb.ki_flags &= ~IOCB_DIRECT; 678 + iocb->kiocb.ki_complete = NULL; 679 + iocb->aio_complete_work = NULL; 680 + goto retry; 681 + } 682 + nfs_local_pgio_done(iocb->hdr, status); 683 + if (iocb->hdr->task.tk_status) 684 + break; 685 + } 686 + } 804 687 file_end_write(filp); 805 688 806 689 revert_creds(save_cred); ··· 851 660 } 852 661 853 662 static int 854 - nfs_do_local_write(struct nfs_pgio_header *hdr, 855 - struct nfsd_file *localio, 663 + nfs_local_do_write(struct nfs_local_kiocb *iocb, 856 664 const struct rpc_call_ops *call_ops) 857 665 { 858 - struct nfs_local_kiocb *iocb; 859 - struct file *file = nfs_to->nfsd_file_file(localio); 860 - 861 - /* Don't support filesystems without write_iter */ 862 - if (!file->f_op->write_iter) 863 - return -EAGAIN; 666 + struct nfs_pgio_header *hdr = iocb->hdr; 864 667 865 668 dprintk("%s: vfs_write count=%u pos=%llu %s\n", 866 669 __func__, hdr->args.count, hdr->args.offset, 867 670 (hdr->args.stable == NFS_UNSTABLE) ? "unstable" : "stable"); 868 - 869 - iocb = nfs_local_iocb_alloc(hdr, file, GFP_NOIO); 870 - if (iocb == NULL) 871 - return -ENOMEM; 872 - iocb->localio = localio; 873 671 874 672 switch (hdr->args.stable) { 875 673 default: ··· 874 694 875 695 nfs_set_local_verifier(hdr->inode, hdr->res.verf, hdr->args.stable); 876 696 877 - if (iocb->kiocb.ki_flags & IOCB_DIRECT) { 878 - iocb->kiocb.ki_complete = nfs_local_write_aio_complete; 879 - iocb->aio_complete_work = nfs_local_write_aio_complete_work; 880 - } 881 - 882 697 INIT_WORK(&iocb->work, nfs_local_call_write); 883 698 queue_work(nfslocaliod_workqueue, &iocb->work); 884 699 885 700 return 0; 886 701 } 887 702 703 + static struct nfs_local_kiocb * 704 + nfs_local_iocb_init(struct nfs_pgio_header *hdr, struct nfsd_file *localio) 705 + { 706 + struct file *file = nfs_to->nfsd_file_file(localio); 707 + struct nfs_local_kiocb *iocb; 708 + gfp_t gfp_mask; 709 + int rw; 710 + 711 + if (hdr->rw_mode & FMODE_READ) { 712 + if (!file->f_op->read_iter) 713 + return ERR_PTR(-EOPNOTSUPP); 714 + gfp_mask = GFP_KERNEL; 715 + rw = ITER_DEST; 716 + } else { 717 + if (!file->f_op->write_iter) 718 + return ERR_PTR(-EOPNOTSUPP); 719 + gfp_mask = GFP_NOIO; 720 + rw = ITER_SOURCE; 721 + } 722 + 723 + iocb = nfs_local_iocb_alloc(hdr, file, gfp_mask); 724 + if (iocb == NULL) 725 + return ERR_PTR(-ENOMEM); 726 + iocb->hdr = hdr; 727 + iocb->localio = localio; 728 + 729 + nfs_local_iters_init(iocb, rw); 730 + 731 + return iocb; 732 + } 733 + 888 734 int nfs_local_doio(struct nfs_client *clp, struct nfsd_file *localio, 889 735 struct nfs_pgio_header *hdr, 890 736 const struct rpc_call_ops *call_ops) 891 737 { 738 + struct nfs_local_kiocb *iocb; 892 739 int status = 0; 893 740 894 741 if (!hdr->args.count) 895 742 return 0; 896 743 744 + iocb = nfs_local_iocb_init(hdr, localio); 745 + if (IS_ERR(iocb)) 746 + return PTR_ERR(iocb); 747 + 897 748 switch (hdr->rw_mode) { 898 749 case FMODE_READ: 899 - status = nfs_do_local_read(hdr, localio, call_ops); 750 + status = nfs_local_do_read(iocb, call_ops); 900 751 break; 901 752 case FMODE_WRITE: 902 - status = nfs_do_local_write(hdr, localio, call_ops); 753 + status = nfs_local_do_write(iocb, call_ops); 903 754 break; 904 755 default: 905 756 dprintk("%s: invalid mode: %d\n", __func__, 906 757 hdr->rw_mode); 907 - status = -EINVAL; 758 + status = -EOPNOTSUPP; 908 759 } 909 760 910 761 if (status != 0) { 911 762 if (status == -EAGAIN) 912 763 nfs_localio_disable_client(clp); 913 - nfs_local_file_put(localio); 764 + nfs_local_iocb_release(iocb); 914 765 hdr->task.tk_status = status; 915 766 nfs_local_hdr_release(hdr, call_ops); 916 767 }
+1 -1
fs/nfs/nfs2xdr.c
··· 23 23 #include <linux/nfs2.h> 24 24 #include <linux/nfs_fs.h> 25 25 #include <linux/nfs_common.h> 26 - #include "nfstrace.h" 27 26 #include "internal.h" 27 + #include "nfstrace.h" 28 28 29 29 #define NFSDBG_FACILITY NFSDBG_XDR 30 30
+1 -1
fs/nfs/nfs3xdr.c
··· 23 23 #include <linux/nfsacl.h> 24 24 #include <linux/nfs_common.h> 25 25 26 - #include "nfstrace.h" 27 26 #include "internal.h" 27 + #include "nfstrace.h" 28 28 29 29 #define NFSDBG_FACILITY NFSDBG_XDR 30 30
+2 -2
fs/nfs/nfs42proc.c
··· 1514 1514 1515 1515 1516 1516 ret = -ENOMEM; 1517 - res.scratch = alloc_page(GFP_KERNEL); 1517 + res.scratch = folio_alloc(GFP_KERNEL, 0); 1518 1518 if (!res.scratch) 1519 1519 goto out; 1520 1520 ··· 1552 1552 } 1553 1553 kfree(pages); 1554 1554 out_free_scratch: 1555 - __free_page(res.scratch); 1555 + folio_put(res.scratch); 1556 1556 out: 1557 1557 return ret; 1558 1558
+1 -1
fs/nfs/nfs42xdr.c
··· 1781 1781 struct compound_hdr hdr; 1782 1782 int status; 1783 1783 1784 - xdr_set_scratch_page(xdr, res->scratch); 1784 + xdr_set_scratch_folio(xdr, res->scratch); 1785 1785 1786 1786 status = decode_compound_hdr(xdr, &hdr); 1787 1787 if (status)
+1
fs/nfs/nfs4file.c
··· 456 456 #else 457 457 .llseek = nfs_file_llseek, 458 458 #endif 459 + .fop_flags = FOP_DONTCACHE, 459 460 };
+7 -5
fs/nfs/nfs4proc.c
··· 391 391 *p++ = htonl(attrs); /* bitmap */ 392 392 *p++ = htonl(12); /* attribute buffer length */ 393 393 *p++ = htonl(NF4DIR); 394 + spin_lock(&dentry->d_lock); 394 395 p = xdr_encode_hyper(p, NFS_FILEID(d_inode(dentry->d_parent))); 396 + spin_unlock(&dentry->d_lock); 395 397 396 398 readdir->pgbase = (char *)p - (char *)start; 397 399 readdir->count -= readdir->pgbase; ··· 6162 6160 } 6163 6161 6164 6162 /* for decoding across pages */ 6165 - res.acl_scratch = alloc_page(GFP_KERNEL); 6163 + res.acl_scratch = folio_alloc(GFP_KERNEL, 0); 6166 6164 if (!res.acl_scratch) 6167 6165 goto out_free; 6168 6166 ··· 6198 6196 while (--i >= 0) 6199 6197 __free_page(pages[i]); 6200 6198 if (res.acl_scratch) 6201 - __free_page(res.acl_scratch); 6199 + folio_put(res.acl_scratch); 6202 6200 kfree(pages); 6203 6201 return ret; 6204 6202 } ··· 7874 7872 return err; 7875 7873 do { 7876 7874 err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW); 7877 - if (err != -NFS4ERR_DELAY) 7875 + if (err != -NFS4ERR_DELAY && err != -NFS4ERR_GRACE) 7878 7876 break; 7879 7877 ssleep(1); 7880 - } while (err == -NFS4ERR_DELAY); 7878 + } while (err == -NFS4ERR_DELAY || err == -NFSERR_GRACE); 7881 7879 return nfs4_handle_delegation_recall_error(server, state, stateid, fl, err); 7882 7880 } 7883 7881 ··· 9444 9442 goto out; 9445 9443 if (rcvd->max_rqst_sz > sent->max_rqst_sz) 9446 9444 return -EINVAL; 9447 - if (rcvd->max_resp_sz < sent->max_resp_sz) 9445 + if (rcvd->max_resp_sz > sent->max_resp_sz) 9448 9446 return -EINVAL; 9449 9447 if (rcvd->max_resp_sz_cached > sent->max_resp_sz_cached) 9450 9448 return -EINVAL;
+3
fs/nfs/nfs4state.c
··· 2744 2744 case -ENETUNREACH: 2745 2745 nfs_mark_client_ready(clp, -EIO); 2746 2746 break; 2747 + case -EINVAL: 2748 + nfs_mark_client_ready(clp, status); 2749 + break; 2747 2750 default: 2748 2751 ssleep(1); 2749 2752 break;
+2 -2
fs/nfs/nfs4xdr.c
··· 4930 4930 } 4931 4931 4932 4932 /* 4933 - * The prefered block size for layout directed io 4933 + * The preferred block size for layout directed io 4934 4934 */ 4935 4935 static int decode_attr_layout_blksize(struct xdr_stream *xdr, uint32_t *bitmap, 4936 4936 uint32_t *res) ··· 6585 6585 int status; 6586 6586 6587 6587 if (res->acl_scratch != NULL) 6588 - xdr_set_scratch_page(xdr, res->acl_scratch); 6588 + xdr_set_scratch_folio(xdr, res->acl_scratch); 6589 6589 status = decode_compound_hdr(xdr, &hdr); 6590 6590 if (status) 6591 6591 goto out;
+209 -6
fs/nfs/nfstrace.h
··· 45 45 { BIT(NFS_INO_LAYOUTSTATS), "LAYOUTSTATS" }, \ 46 46 { BIT(NFS_INO_ODIRECT), "ODIRECT" }) 47 47 48 + #define nfs_show_wb_flags(v) \ 49 + __print_flags(v, "|", \ 50 + { BIT(PG_BUSY), "BUSY" }, \ 51 + { BIT(PG_MAPPED), "MAPPED" }, \ 52 + { BIT(PG_FOLIO), "FOLIO" }, \ 53 + { BIT(PG_CLEAN), "CLEAN" }, \ 54 + { BIT(PG_COMMIT_TO_DS), "COMMIT_TO_DS" }, \ 55 + { BIT(PG_INODE_REF), "INODE_REF" }, \ 56 + { BIT(PG_HEADLOCK), "HEADLOCK" }, \ 57 + { BIT(PG_TEARDOWN), "TEARDOWN" }, \ 58 + { BIT(PG_UNLOCKPAGE), "UNLOCKPAGE" }, \ 59 + { BIT(PG_UPTODATE), "UPTODATE" }, \ 60 + { BIT(PG_WB_END), "WB_END" }, \ 61 + { BIT(PG_REMOVE), "REMOVE" }, \ 62 + { BIT(PG_CONTENDED1), "CONTENDED1" }, \ 63 + { BIT(PG_CONTENDED2), "CONTENDED2" }) 64 + 48 65 DECLARE_EVENT_CLASS(nfs_inode_event, 49 66 TP_PROTO( 50 67 const struct inode *inode ··· 984 967 __entry->fileid = nfsi->fileid; 985 968 __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); 986 969 __entry->version = inode_peek_iversion_raw(inode); 987 - __entry->offset = offset, 970 + __entry->offset = offset; 988 971 __entry->count = count; 989 972 ), 990 973 ··· 1034 1017 __entry->fileid = nfsi->fileid; 1035 1018 __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); 1036 1019 __entry->version = inode_peek_iversion_raw(inode); 1037 - __entry->offset = offset, 1038 - __entry->count = count, 1020 + __entry->offset = offset; 1021 + __entry->count = count; 1039 1022 __entry->ret = ret; 1040 1023 ), 1041 1024 ··· 1067 1050 1068 1051 DEFINE_NFS_FOLIO_EVENT(nfs_invalidate_folio); 1069 1052 DEFINE_NFS_FOLIO_EVENT_DONE(nfs_launder_folio_done); 1053 + 1054 + DEFINE_NFS_FOLIO_EVENT(nfs_try_to_update_request); 1055 + DEFINE_NFS_FOLIO_EVENT_DONE(nfs_try_to_update_request_done); 1056 + 1057 + DEFINE_NFS_FOLIO_EVENT(nfs_update_folio); 1058 + DEFINE_NFS_FOLIO_EVENT_DONE(nfs_update_folio_done); 1059 + 1060 + DEFINE_NFS_FOLIO_EVENT(nfs_write_begin); 1061 + DEFINE_NFS_FOLIO_EVENT_DONE(nfs_write_begin_done); 1062 + 1063 + DEFINE_NFS_FOLIO_EVENT(nfs_write_end); 1064 + DEFINE_NFS_FOLIO_EVENT_DONE(nfs_write_end_done); 1065 + 1066 + DEFINE_NFS_FOLIO_EVENT(nfs_writepages); 1067 + DEFINE_NFS_FOLIO_EVENT_DONE(nfs_writepages_done); 1068 + 1069 + DECLARE_EVENT_CLASS(nfs_kiocb_event, 1070 + TP_PROTO( 1071 + const struct kiocb *iocb, 1072 + const struct iov_iter *iter 1073 + ), 1074 + 1075 + TP_ARGS(iocb, iter), 1076 + 1077 + TP_STRUCT__entry( 1078 + __field(dev_t, dev) 1079 + __field(u32, fhandle) 1080 + __field(u64, fileid) 1081 + __field(u64, version) 1082 + __field(loff_t, offset) 1083 + __field(size_t, count) 1084 + __field(int, flags) 1085 + ), 1086 + 1087 + TP_fast_assign( 1088 + const struct inode *inode = file_inode(iocb->ki_filp); 1089 + const struct nfs_inode *nfsi = NFS_I(inode); 1090 + 1091 + __entry->dev = inode->i_sb->s_dev; 1092 + __entry->fileid = nfsi->fileid; 1093 + __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); 1094 + __entry->version = inode_peek_iversion_raw(inode); 1095 + __entry->offset = iocb->ki_pos; 1096 + __entry->count = iov_iter_count(iter); 1097 + __entry->flags = iocb->ki_flags; 1098 + ), 1099 + 1100 + TP_printk( 1101 + "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu offset=%lld count=%zu ki_flags=%s", 1102 + MAJOR(__entry->dev), MINOR(__entry->dev), 1103 + (unsigned long long)__entry->fileid, 1104 + __entry->fhandle, __entry->version, 1105 + __entry->offset, __entry->count, 1106 + __print_flags(__entry->flags, "|", TRACE_IOCB_STRINGS) 1107 + ) 1108 + ); 1109 + 1110 + #define DEFINE_NFS_KIOCB_EVENT(name) \ 1111 + DEFINE_EVENT(nfs_kiocb_event, name, \ 1112 + TP_PROTO( \ 1113 + const struct kiocb *iocb, \ 1114 + const struct iov_iter *iter \ 1115 + ), \ 1116 + TP_ARGS(iocb, iter)) 1117 + 1118 + DEFINE_NFS_KIOCB_EVENT(nfs_file_read); 1119 + DEFINE_NFS_KIOCB_EVENT(nfs_file_write); 1070 1120 1071 1121 TRACE_EVENT(nfs_aop_readahead, 1072 1122 TP_PROTO( ··· 1482 1398 ) 1483 1399 ); 1484 1400 1401 + DECLARE_EVENT_CLASS(nfs_page_class, 1402 + TP_PROTO( 1403 + const struct nfs_page *req 1404 + ), 1405 + 1406 + TP_ARGS(req), 1407 + 1408 + TP_STRUCT__entry( 1409 + __field(dev_t, dev) 1410 + __field(u32, fhandle) 1411 + __field(u64, fileid) 1412 + __field(const struct nfs_page *__private, req) 1413 + __field(loff_t, offset) 1414 + __field(unsigned int, count) 1415 + __field(unsigned long, flags) 1416 + ), 1417 + 1418 + TP_fast_assign( 1419 + const struct inode *inode = folio_inode(req->wb_folio); 1420 + const struct nfs_inode *nfsi = NFS_I(inode); 1421 + 1422 + __entry->dev = inode->i_sb->s_dev; 1423 + __entry->fileid = nfsi->fileid; 1424 + __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); 1425 + __entry->req = req; 1426 + __entry->offset = req_offset(req); 1427 + __entry->count = req->wb_bytes; 1428 + __entry->flags = req->wb_flags; 1429 + ), 1430 + 1431 + TP_printk( 1432 + "fileid=%02x:%02x:%llu fhandle=0x%08x req=%p offset=%lld count=%u flags=%s", 1433 + MAJOR(__entry->dev), MINOR(__entry->dev), 1434 + (unsigned long long)__entry->fileid, __entry->fhandle, 1435 + __entry->req, __entry->offset, __entry->count, 1436 + nfs_show_wb_flags(__entry->flags) 1437 + ) 1438 + ); 1439 + 1440 + #define DEFINE_NFS_PAGE_EVENT(name) \ 1441 + DEFINE_EVENT(nfs_page_class, name, \ 1442 + TP_PROTO( \ 1443 + const struct nfs_page *req \ 1444 + ), \ 1445 + TP_ARGS(req)) 1446 + 1447 + DEFINE_NFS_PAGE_EVENT(nfs_writepage_setup); 1448 + DEFINE_NFS_PAGE_EVENT(nfs_do_writepage); 1449 + 1485 1450 DECLARE_EVENT_CLASS(nfs_page_error_class, 1486 1451 TP_PROTO( 1487 1452 const struct inode *inode, ··· 1732 1599 DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_write_schedule_iovec); 1733 1600 DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_write_reschedule_io); 1734 1601 1602 + #if IS_ENABLED(CONFIG_NFS_LOCALIO) 1603 + 1604 + DECLARE_EVENT_CLASS(nfs_local_dio_class, 1605 + TP_PROTO( 1606 + const struct inode *inode, 1607 + loff_t offset, 1608 + ssize_t count, 1609 + const struct nfs_local_dio *local_dio 1610 + ), 1611 + TP_ARGS(inode, offset, count, local_dio), 1612 + TP_STRUCT__entry( 1613 + __field(dev_t, dev) 1614 + __field(u64, fileid) 1615 + __field(u32, fhandle) 1616 + __field(loff_t, offset) 1617 + __field(ssize_t, count) 1618 + __field(u32, mem_align) 1619 + __field(u32, offset_align) 1620 + __field(loff_t, start) 1621 + __field(ssize_t, start_len) 1622 + __field(loff_t, middle) 1623 + __field(ssize_t, middle_len) 1624 + __field(loff_t, end) 1625 + __field(ssize_t, end_len) 1626 + ), 1627 + TP_fast_assign( 1628 + const struct nfs_inode *nfsi = NFS_I(inode); 1629 + const struct nfs_fh *fh = &nfsi->fh; 1630 + 1631 + __entry->dev = inode->i_sb->s_dev; 1632 + __entry->fileid = nfsi->fileid; 1633 + __entry->fhandle = nfs_fhandle_hash(fh); 1634 + __entry->offset = offset; 1635 + __entry->count = count; 1636 + __entry->mem_align = local_dio->mem_align; 1637 + __entry->offset_align = local_dio->offset_align; 1638 + __entry->start = offset; 1639 + __entry->start_len = local_dio->start_len; 1640 + __entry->middle = local_dio->middle_offset; 1641 + __entry->middle_len = local_dio->middle_len; 1642 + __entry->end = local_dio->end_offset; 1643 + __entry->end_len = local_dio->end_len; 1644 + ), 1645 + TP_printk("fileid=%02x:%02x:%llu fhandle=0x%08x " 1646 + "offset=%lld count=%zd " 1647 + "mem_align=%u offset_align=%u " 1648 + "start=%llu+%zd middle=%llu+%zd end=%llu+%zd", 1649 + MAJOR(__entry->dev), MINOR(__entry->dev), 1650 + (unsigned long long)__entry->fileid, 1651 + __entry->fhandle, __entry->offset, __entry->count, 1652 + __entry->mem_align, __entry->offset_align, 1653 + __entry->start, __entry->start_len, 1654 + __entry->middle, __entry->middle_len, 1655 + __entry->end, __entry->end_len) 1656 + ) 1657 + 1658 + #define DEFINE_NFS_LOCAL_DIO_EVENT(name) \ 1659 + DEFINE_EVENT(nfs_local_dio_class, nfs_local_dio_##name, \ 1660 + TP_PROTO(const struct inode *inode, \ 1661 + loff_t offset, \ 1662 + ssize_t count, \ 1663 + const struct nfs_local_dio *local_dio),\ 1664 + TP_ARGS(inode, offset, count, local_dio)) 1665 + 1666 + DEFINE_NFS_LOCAL_DIO_EVENT(read); 1667 + DEFINE_NFS_LOCAL_DIO_EVENT(write); 1668 + DEFINE_NFS_LOCAL_DIO_EVENT(misaligned); 1669 + 1670 + #endif /* CONFIG_NFS_LOCALIO */ 1671 + 1735 1672 TRACE_EVENT(nfs_fh_to_dentry, 1736 1673 TP_PROTO( 1737 1674 const struct super_block *sb, ··· 1916 1713 ), 1917 1714 1918 1715 TP_printk( 1919 - "error=%d fhandle=0x%08x mode=%s", 1920 - __entry->error, 1716 + "fhandle=0x%08x mode=%s result=%d", 1921 1717 __entry->fhandle, 1922 - show_fs_fmode_flags(__entry->fmode) 1718 + show_fs_fmode_flags(__entry->fmode), 1719 + __entry->error 1923 1720 ) 1924 1721 ); 1925 1722
+24 -10
fs/nfs/write.c
··· 296 296 { 297 297 struct nfs_server *nfss = NFS_SERVER(folio->mapping->host); 298 298 299 - folio_end_writeback(folio); 299 + folio_end_writeback_no_dropbehind(folio); 300 300 if (atomic_long_dec_return(&nfss->writeback) < 301 301 NFS_CONGESTION_OFF_THRESH) { 302 302 nfss->write_congested = 0; ··· 593 593 if (IS_ERR(req)) 594 594 return PTR_ERR(req); 595 595 596 + trace_nfs_do_writepage(req); 596 597 nfs_folio_set_writeback(folio); 597 598 WARN_ON_ONCE(test_bit(PG_CLEAN, &req->wb_flags)); 598 599 ··· 657 656 int priority = 0; 658 657 int err; 659 658 659 + trace_nfs_writepages(inode, wbc->range_start, wbc->range_end - wbc->range_start); 660 + 660 661 /* Wait with writeback until write congestion eases */ 661 662 if (wbc->sync_mode == WB_SYNC_NONE && nfss->write_congested) { 662 663 err = wait_event_killable(nfss->write_congestion_wait, 663 664 nfss->write_congested == 0); 664 665 if (err) 665 - return err; 666 + goto out_err; 666 667 } 667 668 668 669 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); ··· 695 692 } while (err < 0 && !nfs_error_is_fatal(err)); 696 693 nfs_io_completion_put(ioc); 697 694 698 - if (err < 0) 699 - goto out_err; 700 - return 0; 695 + if (err > 0) 696 + err = 0; 701 697 out_err: 698 + trace_nfs_writepages_done(inode, wbc->range_start, wbc->range_end - wbc->range_start, err); 702 699 return err; 703 700 } 704 701 ··· 748 745 clear_bit(PG_MAPPED, &req->wb_head->wb_flags); 749 746 } 750 747 spin_unlock(&mapping->i_private_lock); 748 + 749 + folio_end_dropbehind(folio); 751 750 } 752 751 nfs_page_group_unlock(req); 753 752 ··· 931 926 req->wb_nio = 0; 932 927 memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf)); 933 928 nfs_mark_request_commit(req, hdr->lseg, &cinfo, 934 - hdr->pgio_mirror_idx); 929 + hdr->ds_commit_idx); 935 930 goto next; 936 931 } 937 932 remove_req: ··· 1022 1017 unsigned int end; 1023 1018 int error; 1024 1019 1020 + trace_nfs_try_to_update_request(folio_inode(folio), offset, bytes); 1025 1021 end = offset + bytes; 1026 1022 1027 1023 req = nfs_lock_and_join_requests(folio); 1028 1024 if (IS_ERR_OR_NULL(req)) 1029 - return req; 1025 + goto out; 1030 1026 1031 1027 rqend = req->wb_offset + req->wb_bytes; 1032 1028 /* ··· 1049 1043 else 1050 1044 req->wb_bytes = rqend - req->wb_offset; 1051 1045 req->wb_nio = 0; 1046 + out: 1047 + trace_nfs_try_to_update_request_done(folio_inode(folio), offset, bytes, 1048 + PTR_ERR_OR_ZERO(req)); 1052 1049 return req; 1053 1050 out_flushme: 1054 1051 /* ··· 1062 1053 nfs_mark_request_dirty(req); 1063 1054 nfs_unlock_and_release_request(req); 1064 1055 error = nfs_wb_folio(folio->mapping->host, folio); 1056 + trace_nfs_try_to_update_request_done(folio_inode(folio), offset, bytes, error); 1065 1057 return (error < 0) ? ERR_PTR(error) : NULL; 1066 1058 } 1067 1059 ··· 1100 1090 req = nfs_setup_write_request(ctx, folio, offset, count); 1101 1091 if (IS_ERR(req)) 1102 1092 return PTR_ERR(req); 1093 + trace_nfs_writepage_setup(req); 1103 1094 /* Update file length */ 1104 1095 nfs_grow_file(folio, offset, count); 1105 1096 nfs_mark_uptodate(req); ··· 1301 1290 1302 1291 nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE); 1303 1292 1293 + trace_nfs_update_folio(inode, offset, count); 1294 + 1304 1295 dprintk("NFS: nfs_update_folio(%pD2 %d@%lld)\n", file, count, 1305 1296 (long long)(folio_pos(folio) + offset)); 1306 1297 ··· 1322 1309 if (status < 0) 1323 1310 nfs_set_pageerror(mapping); 1324 1311 out: 1312 + trace_nfs_update_folio_done(inode, offset, count, status); 1325 1313 dprintk("NFS: nfs_update_folio returns %d (isize %lld)\n", 1326 1314 status, (long long)i_size_read(inode)); 1327 1315 return status; ··· 1820 1806 nfs_mapping_set_error(folio, status); 1821 1807 nfs_inode_remove_request(req); 1822 1808 } 1823 - dprintk_cont(", error = %d\n", status); 1809 + dprintk(", error = %d\n", status); 1824 1810 goto next; 1825 1811 } 1826 1812 ··· 1830 1816 /* We have a match */ 1831 1817 if (folio) 1832 1818 nfs_inode_remove_request(req); 1833 - dprintk_cont(" OK\n"); 1819 + dprintk(" OK\n"); 1834 1820 goto next; 1835 1821 } 1836 1822 /* We have a mismatch. Write the page again */ 1837 - dprintk_cont(" mismatch\n"); 1823 + dprintk(" mismatch\n"); 1838 1824 nfs_mark_request_dirty(req); 1839 1825 atomic_long_inc(&NFS_I(data->inode)->redirtied_pages); 1840 1826 next:
+34
fs/nfsd/filecache.c
··· 231 231 refcount_set(&nf->nf_ref, 1); 232 232 nf->nf_may = need; 233 233 nf->nf_mark = NULL; 234 + nf->nf_dio_mem_align = 0; 235 + nf->nf_dio_offset_align = 0; 236 + nf->nf_dio_read_offset_align = 0; 234 237 return nf; 235 238 } 236 239 ··· 1073 1070 } 1074 1071 1075 1072 static __be32 1073 + nfsd_file_get_dio_attrs(const struct svc_fh *fhp, struct nfsd_file *nf) 1074 + { 1075 + struct inode *inode = file_inode(nf->nf_file); 1076 + struct kstat stat; 1077 + __be32 status; 1078 + 1079 + /* Currently only need to get DIO alignment info for regular files */ 1080 + if (!S_ISREG(inode->i_mode)) 1081 + return nfs_ok; 1082 + 1083 + status = fh_getattr(fhp, &stat); 1084 + if (status != nfs_ok) 1085 + return status; 1086 + 1087 + trace_nfsd_file_get_dio_attrs(inode, &stat); 1088 + 1089 + if (stat.result_mask & STATX_DIOALIGN) { 1090 + nf->nf_dio_mem_align = stat.dio_mem_align; 1091 + nf->nf_dio_offset_align = stat.dio_offset_align; 1092 + } 1093 + if (stat.result_mask & STATX_DIO_READ_ALIGN) 1094 + nf->nf_dio_read_offset_align = stat.dio_read_offset_align; 1095 + else 1096 + nf->nf_dio_read_offset_align = nf->nf_dio_offset_align; 1097 + 1098 + return nfs_ok; 1099 + } 1100 + 1101 + static __be32 1076 1102 nfsd_file_do_acquire(struct svc_rqst *rqstp, struct net *net, 1077 1103 struct svc_cred *cred, 1078 1104 struct auth_domain *client, ··· 1219 1187 } 1220 1188 status = nfserrno(ret); 1221 1189 trace_nfsd_file_open(nf, status); 1190 + if (status == nfs_ok) 1191 + status = nfsd_file_get_dio_attrs(fhp, nf); 1222 1192 } 1223 1193 } else 1224 1194 status = nfserr_jukebox;
+4
fs/nfsd/filecache.h
··· 54 54 struct list_head nf_gc; 55 55 struct rcu_head nf_rcu; 56 56 ktime_t nf_birthtime; 57 + 58 + u32 nf_dio_mem_align; 59 + u32 nf_dio_offset_align; 60 + u32 nf_dio_read_offset_align; 57 61 }; 58 62 59 63 int nfsd_file_cache_init(void);
+11
fs/nfsd/localio.c
··· 117 117 return localio; 118 118 } 119 119 120 + static void nfsd_file_dio_alignment(struct nfsd_file *nf, 121 + u32 *nf_dio_mem_align, 122 + u32 *nf_dio_offset_align, 123 + u32 *nf_dio_read_offset_align) 124 + { 125 + *nf_dio_mem_align = nf->nf_dio_mem_align; 126 + *nf_dio_offset_align = nf->nf_dio_offset_align; 127 + *nf_dio_read_offset_align = nf->nf_dio_read_offset_align; 128 + } 129 + 120 130 static const struct nfsd_localio_operations nfsd_localio_ops = { 121 131 .nfsd_net_try_get = nfsd_net_try_get, 122 132 .nfsd_net_put = nfsd_net_put, ··· 134 124 .nfsd_file_put_local = nfsd_file_put_local, 135 125 .nfsd_file_get_local = nfsd_file_get_local, 136 126 .nfsd_file_file = nfsd_file_file, 127 + .nfsd_file_dio_alignment = nfsd_file_dio_alignment, 137 128 }; 138 129 139 130 void nfsd_localio_ops_init(void)
+1 -1
fs/nfsd/nfsctl.c
··· 1954 1954 * remaining listeners and recreate the list. 1955 1955 */ 1956 1956 if (delete) 1957 - svc_xprt_destroy_all(serv, net); 1957 + svc_xprt_destroy_all(serv, net, false); 1958 1958 1959 1959 /* walk list of addrs again, open any that still don't exist */ 1960 1960 nlmsg_for_each_attr_type(attr, NFSD_A_SERVER_SOCK_ADDR, info->nlhdr,
+2 -5
fs/nfsd/nfssvc.c
··· 535 535 #endif 536 536 } 537 537 538 - svc_xprt_destroy_all(serv, net); 539 - 540 538 /* 541 539 * write_ports can create the server without actually starting 542 - * any threads--if we get shut down before any threads are 540 + * any threads. If we get shut down before any threads are 543 541 * started, then nfsd_destroy_serv will be run before any of this 544 542 * other initialization has been done except the rpcb information. 545 543 */ 546 - svc_rpcb_cleanup(serv, net); 547 - 544 + svc_xprt_destroy_all(serv, net, true); 548 545 nfsd_shutdown_net(net); 549 546 svc_destroy(&serv); 550 547 }
+27
fs/nfsd/trace.h
··· 1133 1133 ) 1134 1134 ); 1135 1135 1136 + TRACE_EVENT(nfsd_file_get_dio_attrs, 1137 + TP_PROTO( 1138 + const struct inode *inode, 1139 + const struct kstat *stat 1140 + ), 1141 + TP_ARGS(inode, stat), 1142 + TP_STRUCT__entry( 1143 + __field(const void *, inode) 1144 + __field(unsigned long, mask) 1145 + __field(u32, mem_align) 1146 + __field(u32, offset_align) 1147 + __field(u32, read_offset_align) 1148 + ), 1149 + TP_fast_assign( 1150 + __entry->inode = inode; 1151 + __entry->mask = stat->result_mask; 1152 + __entry->mem_align = stat->dio_mem_align; 1153 + __entry->offset_align = stat->dio_offset_align; 1154 + __entry->read_offset_align = stat->dio_read_offset_align; 1155 + ), 1156 + TP_printk("inode=%p flags=%s mem_align=%u offset_align=%u read_offset_align=%u", 1157 + __entry->inode, show_statx_mask(__entry->mask), 1158 + __entry->mem_align, __entry->offset_align, 1159 + __entry->read_offset_align 1160 + ) 1161 + ); 1162 + 1136 1163 TRACE_EVENT(nfsd_file_acquire, 1137 1164 TP_PROTO( 1138 1165 const struct svc_rqst *rqstp,
+4
fs/nfsd/vfs.h
··· 185 185 u32 request_mask = STATX_BASIC_STATS; 186 186 struct path p = {.mnt = fh->fh_export->ex_path.mnt, 187 187 .dentry = fh->fh_dentry}; 188 + struct inode *inode = d_inode(p.dentry); 189 + 190 + if (S_ISREG(inode->i_mode)) 191 + request_mask |= (STATX_DIOALIGN | STATX_DIO_READ_ALIGN); 188 192 189 193 if (fh->fh_maxsize == NFS4_FHSIZE) 190 194 request_mask |= (STATX_BTIME | STATX_CHANGE_COOKIE);
-2
include/linux/nfs_page.h
··· 122 122 /* arbitrarily selected limit to number of mirrors */ 123 123 #define NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX 16 124 124 125 - #define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) 126 - 127 125 extern struct nfs_page *nfs_page_create_from_page(struct nfs_open_context *ctx, 128 126 struct page *page, 129 127 unsigned int pgbase,
+2 -2
include/linux/nfs_xdr.h
··· 862 862 size_t acl_len; 863 863 size_t acl_data_offset; 864 864 int acl_flags; 865 - struct page * acl_scratch; 865 + struct folio * acl_scratch; 866 866 }; 867 867 868 868 struct nfs_setattrres { ··· 1596 1596 1597 1597 struct nfs42_listxattrsres { 1598 1598 struct nfs4_sequence_res seq_res; 1599 - struct page *scratch; 1599 + struct folio *scratch; 1600 1600 void *xattr_buf; 1601 1601 size_t xattr_len; 1602 1602 u64 cookie;
+2
include/linux/nfslocalio.h
··· 65 65 struct net *(*nfsd_file_put_local)(struct nfsd_file __rcu **); 66 66 struct nfsd_file *(*nfsd_file_get_local)(struct nfsd_file *); 67 67 struct file *(*nfsd_file_file)(struct nfsd_file *); 68 + void (*nfsd_file_dio_alignment)(struct nfsd_file *, 69 + u32 *, u32 *, u32 *); 68 70 } ____cacheline_aligned; 69 71 70 72 extern void nfsd_localio_ops_init(void);
+2
include/linux/pagemap.h
··· 1229 1229 int folio_wait_writeback_killable(struct folio *folio); 1230 1230 void end_page_writeback(struct page *page); 1231 1231 void folio_end_writeback(struct folio *folio); 1232 + void folio_end_writeback_no_dropbehind(struct folio *folio); 1233 + void folio_end_dropbehind(struct folio *folio); 1232 1234 void folio_wait_stable(struct folio *folio); 1233 1235 void __folio_mark_dirty(struct folio *folio, struct address_space *, int warn); 1234 1236 void folio_account_cleaned(struct folio *folio, struct bdi_writeback *wb);
+8 -22
include/linux/sunrpc/debug.h
··· 23 23 24 24 #define dprintk(fmt, ...) \ 25 25 dfprintk(FACILITY, fmt, ##__VA_ARGS__) 26 - #define dprintk_cont(fmt, ...) \ 27 - dfprintk_cont(FACILITY, fmt, ##__VA_ARGS__) 28 26 #define dprintk_rcu(fmt, ...) \ 29 27 dfprintk_rcu(FACILITY, fmt, ##__VA_ARGS__) 30 - #define dprintk_rcu_cont(fmt, ...) \ 31 - dfprintk_rcu_cont(FACILITY, fmt, ##__VA_ARGS__) 32 28 33 29 #undef ifdebug 34 30 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 35 31 # define ifdebug(fac) if (unlikely(rpc_debug & RPCDBG_##fac)) 36 32 33 + # if IS_ENABLED(CONFIG_SUNRPC_DEBUG_TRACE) 34 + # define __sunrpc_printk(fmt, ...) trace_printk(fmt, ##__VA_ARGS__) 35 + # else 36 + # define __sunrpc_printk(fmt, ...) printk(KERN_DEFAULT fmt, ##__VA_ARGS__) 37 + # endif 38 + 37 39 # define dfprintk(fac, fmt, ...) \ 38 40 do { \ 39 41 ifdebug(fac) \ 40 - printk(KERN_DEFAULT fmt, ##__VA_ARGS__); \ 41 - } while (0) 42 - 43 - # define dfprintk_cont(fac, fmt, ...) \ 44 - do { \ 45 - ifdebug(fac) \ 46 - printk(KERN_CONT fmt, ##__VA_ARGS__); \ 42 + __sunrpc_printk(fmt, ##__VA_ARGS__); \ 47 43 } while (0) 48 44 49 45 # define dfprintk_rcu(fac, fmt, ...) \ 50 46 do { \ 51 47 ifdebug(fac) { \ 52 48 rcu_read_lock(); \ 53 - printk(KERN_DEFAULT fmt, ##__VA_ARGS__); \ 54 - rcu_read_unlock(); \ 55 - } \ 56 - } while (0) 57 - 58 - # define dfprintk_rcu_cont(fac, fmt, ...) \ 59 - do { \ 60 - ifdebug(fac) { \ 61 - rcu_read_lock(); \ 62 - printk(KERN_CONT fmt, ##__VA_ARGS__); \ 49 + __sunrpc_printk(fmt, ##__VA_ARGS__); \ 63 50 rcu_read_unlock(); \ 64 51 } \ 65 52 } while (0) ··· 55 68 #else 56 69 # define ifdebug(fac) if (0) 57 70 # define dfprintk(fac, fmt, ...) do {} while (0) 58 - # define dfprintk_cont(fac, fmt, ...) do {} while (0) 59 71 # define dfprintk_rcu(fac, fmt, ...) do {} while (0) 60 72 # define RPC_IFDEBUG(x) 61 73 #endif
+2 -2
include/linux/sunrpc/svc.h
··· 196 196 struct xdr_buf rq_arg; 197 197 struct xdr_stream rq_arg_stream; 198 198 struct xdr_stream rq_res_stream; 199 - struct page *rq_scratch_page; 199 + struct folio *rq_scratch_folio; 200 200 struct xdr_buf rq_res; 201 201 unsigned long rq_maxpages; /* num of entries in rq_pages */ 202 202 struct page * *rq_pages; ··· 503 503 buf->len = buf->head->iov_len + buf->page_len + buf->tail->iov_len; 504 504 505 505 xdr_init_decode(xdr, buf, argv->iov_base, NULL); 506 - xdr_set_scratch_page(xdr, rqstp->rq_scratch_page); 506 + xdr_set_scratch_folio(xdr, rqstp->rq_scratch_folio); 507 507 } 508 508 509 509 /**
+2 -1
include/linux/sunrpc/svc_xprt.h
··· 165 165 struct net *net, const int family, 166 166 const unsigned short port, int flags, 167 167 const struct cred *cred); 168 - void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net); 168 + void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net, 169 + bool unregister); 169 170 void svc_xprt_received(struct svc_xprt *xprt); 170 171 void svc_xprt_enqueue(struct svc_xprt *xprt); 171 172 void svc_xprt_put(struct svc_xprt *xprt);
+4 -4
include/linux/sunrpc/xdr.h
··· 288 288 } 289 289 290 290 /** 291 - * xdr_set_scratch_page - Attach a scratch buffer for decoding data 291 + * xdr_set_scratch_folio - Attach a scratch buffer for decoding data 292 292 * @xdr: pointer to xdr_stream struct 293 - * @page: an anonymous page 293 + * @page: an anonymous folio 294 294 * 295 295 * See xdr_set_scratch_buffer(). 296 296 */ 297 297 static inline void 298 - xdr_set_scratch_page(struct xdr_stream *xdr, struct page *page) 298 + xdr_set_scratch_folio(struct xdr_stream *xdr, struct folio *folio) 299 299 { 300 - xdr_set_scratch_buffer(xdr, page_address(page), PAGE_SIZE); 300 + xdr_set_scratch_buffer(xdr, folio_address(folio), folio_size(folio)); 301 301 } 302 302 303 303 /**
+22
include/trace/misc/fs.h
··· 141 141 { ATTR_TIMES_SET, "TIMES_SET" }, \ 142 142 { ATTR_TOUCH, "TOUCH"}, \ 143 143 { ATTR_DELEG, "DELEG"}) 144 + 145 + #define show_statx_mask(flags) \ 146 + __print_flags(flags, "|", \ 147 + { STATX_TYPE, "TYPE" }, \ 148 + { STATX_MODE, "MODE" }, \ 149 + { STATX_NLINK, "NLINK" }, \ 150 + { STATX_UID, "UID" }, \ 151 + { STATX_GID, "GID" }, \ 152 + { STATX_ATIME, "ATIME" }, \ 153 + { STATX_MTIME, "MTIME" }, \ 154 + { STATX_CTIME, "CTIME" }, \ 155 + { STATX_INO, "INO" }, \ 156 + { STATX_SIZE, "SIZE" }, \ 157 + { STATX_BLOCKS, "BLOCKS" }, \ 158 + { STATX_BASIC_STATS, "BASIC_STATS" }, \ 159 + { STATX_BTIME, "BTIME" }, \ 160 + { STATX_MNT_ID, "MNT_ID" }, \ 161 + { STATX_DIOALIGN, "DIOALIGN" }, \ 162 + { STATX_MNT_ID_UNIQUE, "MNT_ID_UNIQUE" }, \ 163 + { STATX_SUBVOL, "SUBVOL" }, \ 164 + { STATX_WRITE_ATOMIC, "WRITE_ATOMIC" }, \ 165 + { STATX_DIO_READ_ALIGN, "DIO_READ_ALIGN" })
+26 -8
mm/filemap.c
··· 1621 1621 * completes. Do that now. If we fail, it's likely because of a big folio - 1622 1622 * just reset dropbehind for that case and latter completions should invalidate. 1623 1623 */ 1624 - static void filemap_end_dropbehind_write(struct folio *folio) 1624 + void folio_end_dropbehind(struct folio *folio) 1625 1625 { 1626 1626 if (!folio_test_dropbehind(folio)) 1627 1627 return; ··· 1638 1638 folio_unlock(folio); 1639 1639 } 1640 1640 } 1641 + EXPORT_SYMBOL_GPL(folio_end_dropbehind); 1641 1642 1642 1643 /** 1643 - * folio_end_writeback - End writeback against a folio. 1644 + * folio_end_writeback_no_dropbehind - End writeback against a folio. 1644 1645 * @folio: The folio. 1645 1646 * 1646 1647 * The folio must actually be under writeback. 1648 + * This call is intended for filesystems that need to defer dropbehind. 1647 1649 * 1648 1650 * Context: May be called from process or interrupt context. 1649 1651 */ 1650 - void folio_end_writeback(struct folio *folio) 1652 + void folio_end_writeback_no_dropbehind(struct folio *folio) 1651 1653 { 1652 1654 VM_BUG_ON_FOLIO(!folio_test_writeback(folio), folio); 1653 1655 ··· 1665 1663 folio_rotate_reclaimable(folio); 1666 1664 } 1667 1665 1666 + if (__folio_end_writeback(folio)) 1667 + folio_wake_bit(folio, PG_writeback); 1668 + 1669 + acct_reclaim_writeback(folio); 1670 + } 1671 + EXPORT_SYMBOL_GPL(folio_end_writeback_no_dropbehind); 1672 + 1673 + /** 1674 + * folio_end_writeback - End writeback against a folio. 1675 + * @folio: The folio. 1676 + * 1677 + * The folio must actually be under writeback. 1678 + * 1679 + * Context: May be called from process or interrupt context. 1680 + */ 1681 + void folio_end_writeback(struct folio *folio) 1682 + { 1683 + VM_BUG_ON_FOLIO(!folio_test_writeback(folio), folio); 1684 + 1668 1685 /* 1669 1686 * Writeback does not hold a folio reference of its own, relying 1670 1687 * on truncation to wait for the clearing of PG_writeback. ··· 1691 1670 * reused before the folio_wake_bit(). 1692 1671 */ 1693 1672 folio_get(folio); 1694 - if (__folio_end_writeback(folio)) 1695 - folio_wake_bit(folio, PG_writeback); 1696 - 1697 - filemap_end_dropbehind_write(folio); 1698 - acct_reclaim_writeback(folio); 1673 + folio_end_writeback_no_dropbehind(folio); 1674 + folio_end_dropbehind(folio); 1699 1675 folio_put(folio); 1700 1676 } 1701 1677 EXPORT_SYMBOL(folio_end_writeback);
+14
net/sunrpc/Kconfig
··· 101 101 102 102 If unsure, say Y. 103 103 104 + config SUNRPC_DEBUG_TRACE 105 + bool "RPC: Send dfprintk() output to the trace buffer" 106 + depends on SUNRPC_DEBUG && TRACING 107 + default n 108 + help 109 + dprintk() output can be voluminous, which can overwhelm the 110 + kernel's logging facility as it must be sent to the console. 111 + This option causes dprintk() output to go to the trace buffer 112 + instead of the kernel log. 113 + 114 + This will cause warnings about trace_printk() being used to be 115 + logged at boot time, so say N unless you are debugging a problem 116 + with sunrpc-based clients or services. 117 + 104 118 config SUNRPC_XPRT_RDMA 105 119 tristate "RPC-over-RDMA transport" 106 120 depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS
+4 -4
net/sunrpc/auth_gss/gss_rpc_xdr.c
··· 794 794 struct gssx_res_accept_sec_context *res = data; 795 795 u32 value_follows; 796 796 int err; 797 - struct page *scratch; 797 + struct folio *scratch; 798 798 799 - scratch = alloc_page(GFP_KERNEL); 799 + scratch = folio_alloc(GFP_KERNEL, 0); 800 800 if (!scratch) 801 801 return -ENOMEM; 802 - xdr_set_scratch_page(xdr, scratch); 802 + xdr_set_scratch_folio(xdr, scratch); 803 803 804 804 /* res->status */ 805 805 err = gssx_dec_status(xdr, &res->status); ··· 844 844 err = gssx_dec_option_array(xdr, &res->options); 845 845 846 846 out_free: 847 - __free_page(scratch); 847 + folio_put(scratch); 848 848 return err; 849 849 }
-2
net/sunrpc/sched.c
··· 1074 1074 rqst->rq_rbuffer = (char *)rqst->rq_buffer + rqst->rq_callsize; 1075 1075 return 0; 1076 1076 } 1077 - EXPORT_SYMBOL_GPL(rpc_malloc); 1078 1077 1079 1078 /** 1080 1079 * rpc_free - free RPC buffer resources allocated via rpc_malloc ··· 1094 1095 else 1095 1096 kfree(buf); 1096 1097 } 1097 - EXPORT_SYMBOL_GPL(rpc_free); 1098 1098 1099 1099 /* 1100 1100 * Creation and deletion of RPC task structures
+1 -1
net/sunrpc/socklib.c
··· 86 86 /* ACL likes to be lazy in allocating pages - ACLs 87 87 * are small by default but can get huge. */ 88 88 if ((xdr->flags & XDRBUF_SPARSE_PAGES) && *ppage == NULL) { 89 - *ppage = alloc_page(GFP_NOWAIT | __GFP_NOWARN); 89 + *ppage = alloc_page(GFP_NOWAIT); 90 90 if (unlikely(*ppage == NULL)) { 91 91 if (copied == 0) 92 92 return -ENOMEM;
+5 -6
net/sunrpc/svc.c
··· 352 352 if (m->mode == SVC_POOL_PERNODE) 353 353 return m->pool_to[pidx]; 354 354 } 355 - return NUMA_NO_NODE; 355 + return numa_mem_id(); 356 356 } 357 357 /* 358 358 * Set the given thread's cpus_allowed mask so that it ··· 436 436 svc_unregister(serv, net); 437 437 rpcb_put_local(net); 438 438 } 439 - EXPORT_SYMBOL_GPL(svc_rpcb_cleanup); 440 439 441 440 static int svc_uses_rpcbind(struct svc_serv *serv) 442 441 { ··· 669 670 folio_batch_release(&rqstp->rq_fbatch); 670 671 kfree(rqstp->rq_bvec); 671 672 svc_release_buffer(rqstp); 672 - if (rqstp->rq_scratch_page) 673 - put_page(rqstp->rq_scratch_page); 673 + if (rqstp->rq_scratch_folio) 674 + folio_put(rqstp->rq_scratch_folio); 674 675 kfree(rqstp->rq_resp); 675 676 kfree(rqstp->rq_argp); 676 677 kfree(rqstp->rq_auth_data); ··· 691 692 rqstp->rq_server = serv; 692 693 rqstp->rq_pool = pool; 693 694 694 - rqstp->rq_scratch_page = alloc_pages_node(node, GFP_KERNEL, 0); 695 - if (!rqstp->rq_scratch_page) 695 + rqstp->rq_scratch_folio = __folio_alloc_node(GFP_KERNEL, 0, node); 696 + if (!rqstp->rq_scratch_folio) 696 697 goto out_enomem; 697 698 698 699 rqstp->rq_argp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node);
+6 -1
net/sunrpc/svc_xprt.c
··· 1102 1102 * svc_xprt_destroy_all - Destroy transports associated with @serv 1103 1103 * @serv: RPC service to be shut down 1104 1104 * @net: target network namespace 1105 + * @unregister: true if it is OK to unregister the destroyed xprts 1105 1106 * 1106 1107 * Server threads may still be running (especially in the case where the 1107 1108 * service is still running in other network namespaces). ··· 1115 1114 * threads, we may need to wait a little while and then check again to 1116 1115 * see if they're done. 1117 1116 */ 1118 - void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net) 1117 + void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net, 1118 + bool unregister) 1119 1119 { 1120 1120 int delay = 0; 1121 1121 ··· 1126 1124 svc_clean_up_xprts(serv, net); 1127 1125 msleep(delay++); 1128 1126 } 1127 + 1128 + if (unregister) 1129 + svc_rpcb_cleanup(serv, net); 1129 1130 } 1130 1131 EXPORT_SYMBOL_GPL(svc_xprt_destroy_all); 1131 1132
+1 -1
net/sunrpc/xprtrdma/rpc_rdma.c
··· 190 190 ppages = buf->pages + (buf->page_base >> PAGE_SHIFT); 191 191 while (len > 0) { 192 192 if (!*ppages) 193 - *ppages = alloc_page(GFP_NOWAIT | __GFP_NOWARN); 193 + *ppages = alloc_page(GFP_NOWAIT); 194 194 if (!*ppages) 195 195 return -ENOBUFS; 196 196 ppages++;