Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'pnfs-submit' of git://git.open-osd.org/linux-open-osd

* 'pnfs-submit' of git://git.open-osd.org/linux-open-osd: (32 commits)
pnfs-obj: pg_test check for max_io_size
NFSv4.1: define nfs_generic_pg_test
NFSv4.1: use pnfs_generic_pg_test directly by layout driver
NFSv4.1: change pg_test return type to bool
NFSv4.1: unify pnfs_pageio_init functions
pnfs-obj: objlayout_encode_layoutcommit implementation
pnfs: encode_layoutcommit
pnfs-obj: report errors and .encode_layoutreturn Implementation.
pnfs: encode_layoutreturn
pnfs: layoutret_on_setattr
pnfs: layoutreturn
pnfs-obj: osd raid engine read/write implementation
pnfs: support for non-rpc layout drivers
pnfs-obj: define per-inode private structure
pnfs: alloc and free layout_hdr layoutdriver methods
pnfs-obj: objio_osd device information retrieval and caching
pnfs-obj: decode layout, alloc/free lseg
pnfs-obj: pnfs_osd XDR client implementation
pnfs-obj: pnfs_osd XDR definitions
pnfs-obj: objlayoutdriver module skeleton
...

+3910 -282
+10
fs/nfs/Kconfig
··· 87 87 config PNFS_FILE_LAYOUT 88 88 tristate 89 89 90 + config PNFS_OBJLAYOUT 91 + tristate "Provide support for the pNFS Objects Layout Driver for NFSv4.1 pNFS (EXPERIMENTAL)" 92 + depends on NFS_FS && NFS_V4_1 && SCSI_OSD_ULD 93 + help 94 + Say M here if you want your pNFS client to support the Objects Layout Driver. 95 + Requires the SCSI osd initiator library (SCSI_OSD_INITIATOR) and 96 + upper level driver (SCSI_OSD_ULD). 97 + 98 + If unsure, say N. 99 + 90 100 config ROOT_NFS 91 101 bool "Root file system on NFS" 92 102 depends on NFS_FS=y && IP_PNP
+3 -1
fs/nfs/Makefile
··· 15 15 delegation.o idmap.o \ 16 16 callback.o callback_xdr.o callback_proc.o \ 17 17 nfs4namespace.o 18 - nfs-$(CONFIG_NFS_V4_1) += pnfs.o 18 + nfs-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o 19 19 nfs-$(CONFIG_SYSCTL) += sysctl.o 20 20 nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o 21 21 22 22 obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o 23 23 nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o 24 + 25 + obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/
+17
fs/nfs/callback.h
··· 167 167 168 168 extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses); 169 169 extern void nfs4_cb_take_slot(struct nfs_client *clp); 170 + 171 + struct cb_devicenotifyitem { 172 + uint32_t cbd_notify_type; 173 + uint32_t cbd_layout_type; 174 + struct nfs4_deviceid cbd_dev_id; 175 + uint32_t cbd_immediate; 176 + }; 177 + 178 + struct cb_devicenotifyargs { 179 + int ndevs; 180 + struct cb_devicenotifyitem *devs; 181 + }; 182 + 183 + extern __be32 nfs4_callback_devicenotify( 184 + struct cb_devicenotifyargs *args, 185 + void *dummy, struct cb_process_state *cps); 186 + 170 187 #endif /* CONFIG_NFS_V4_1 */ 171 188 extern int check_gss_callback_principal(struct nfs_client *, struct svc_rqst *); 172 189 extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
+49 -2
fs/nfs/callback_proc.c
··· 139 139 spin_lock(&ino->i_lock); 140 140 if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || 141 141 mark_matching_lsegs_invalid(lo, &free_me_list, 142 - args->cbl_range.iomode)) 142 + &args->cbl_range)) 143 143 rv = NFS4ERR_DELAY; 144 144 else 145 145 rv = NFS4ERR_NOMATCHING_LAYOUT; ··· 184 184 ino = lo->plh_inode; 185 185 spin_lock(&ino->i_lock); 186 186 set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); 187 - if (mark_matching_lsegs_invalid(lo, &free_me_list, range.iomode)) 187 + if (mark_matching_lsegs_invalid(lo, &free_me_list, &range)) 188 188 rv = NFS4ERR_DELAY; 189 189 list_del_init(&lo->plh_bulk_recall); 190 190 spin_unlock(&ino->i_lock); ··· 239 239 args.cbl_recall_type = RETURN_ALL; 240 240 /* FIXME we ignore errors, what should we do? */ 241 241 do_callback_layoutrecall(clp, &args); 242 + } 243 + 244 + __be32 nfs4_callback_devicenotify(struct cb_devicenotifyargs *args, 245 + void *dummy, struct cb_process_state *cps) 246 + { 247 + int i; 248 + __be32 res = 0; 249 + struct nfs_client *clp = cps->clp; 250 + struct nfs_server *server = NULL; 251 + 252 + dprintk("%s: -->\n", __func__); 253 + 254 + if (!clp) { 255 + res = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION); 256 + goto out; 257 + } 258 + 259 + for (i = 0; i < args->ndevs; i++) { 260 + struct cb_devicenotifyitem *dev = &args->devs[i]; 261 + 262 + if (!server || 263 + server->pnfs_curr_ld->id != dev->cbd_layout_type) { 264 + rcu_read_lock(); 265 + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) 266 + if (server->pnfs_curr_ld && 267 + server->pnfs_curr_ld->id == dev->cbd_layout_type) { 268 + rcu_read_unlock(); 269 + goto found; 270 + } 271 + rcu_read_unlock(); 272 + dprintk("%s: layout type %u not found\n", 273 + __func__, dev->cbd_layout_type); 274 + continue; 275 + } 276 + 277 + found: 278 + if (dev->cbd_notify_type == NOTIFY_DEVICEID4_CHANGE) 279 + dprintk("%s: NOTIFY_DEVICEID4_CHANGE not supported, " 280 + "deleting instead\n", __func__); 281 + nfs4_delete_deviceid(server->pnfs_curr_ld, clp, &dev->cbd_dev_id); 282 + } 283 + 284 + out: 285 + kfree(args->devs); 286 + dprintk("%s: exit with status = %u\n", 287 + __func__, be32_to_cpu(res)); 288 + return res; 242 289 } 243 290 244 291 int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
+95 -1
fs/nfs/callback_xdr.c
··· 25 25 26 26 #if defined(CONFIG_NFS_V4_1) 27 27 #define CB_OP_LAYOUTRECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) 28 + #define CB_OP_DEVICENOTIFY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) 28 29 #define CB_OP_SEQUENCE_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \ 29 30 4 + 1 + 3) 30 31 #define CB_OP_RECALLANY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) ··· 283 282 out: 284 283 dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); 285 284 return status; 285 + } 286 + 287 + static 288 + __be32 decode_devicenotify_args(struct svc_rqst *rqstp, 289 + struct xdr_stream *xdr, 290 + struct cb_devicenotifyargs *args) 291 + { 292 + __be32 *p; 293 + __be32 status = 0; 294 + u32 tmp; 295 + int n, i; 296 + args->ndevs = 0; 297 + 298 + /* Num of device notifications */ 299 + p = read_buf(xdr, sizeof(uint32_t)); 300 + if (unlikely(p == NULL)) { 301 + status = htonl(NFS4ERR_BADXDR); 302 + goto out; 303 + } 304 + n = ntohl(*p++); 305 + if (n <= 0) 306 + goto out; 307 + 308 + args->devs = kmalloc(n * sizeof(*args->devs), GFP_KERNEL); 309 + if (!args->devs) { 310 + status = htonl(NFS4ERR_DELAY); 311 + goto out; 312 + } 313 + 314 + /* Decode each dev notification */ 315 + for (i = 0; i < n; i++) { 316 + struct cb_devicenotifyitem *dev = &args->devs[i]; 317 + 318 + p = read_buf(xdr, (4 * sizeof(uint32_t)) + NFS4_DEVICEID4_SIZE); 319 + if (unlikely(p == NULL)) { 320 + status = htonl(NFS4ERR_BADXDR); 321 + goto err; 322 + } 323 + 324 + tmp = ntohl(*p++); /* bitmap size */ 325 + if (tmp != 1) { 326 + status = htonl(NFS4ERR_INVAL); 327 + goto err; 328 + } 329 + dev->cbd_notify_type = ntohl(*p++); 330 + if (dev->cbd_notify_type != NOTIFY_DEVICEID4_CHANGE && 331 + dev->cbd_notify_type != NOTIFY_DEVICEID4_DELETE) { 332 + status = htonl(NFS4ERR_INVAL); 333 + goto err; 334 + } 335 + 336 + tmp = ntohl(*p++); /* opaque size */ 337 + if (((dev->cbd_notify_type == NOTIFY_DEVICEID4_CHANGE) && 338 + (tmp != NFS4_DEVICEID4_SIZE + 8)) || 339 + ((dev->cbd_notify_type == NOTIFY_DEVICEID4_DELETE) && 340 + (tmp != NFS4_DEVICEID4_SIZE + 4))) { 341 + status = htonl(NFS4ERR_INVAL); 342 + goto err; 343 + } 344 + dev->cbd_layout_type = ntohl(*p++); 345 + memcpy(dev->cbd_dev_id.data, p, NFS4_DEVICEID4_SIZE); 346 + p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); 347 + 348 + if (dev->cbd_layout_type == NOTIFY_DEVICEID4_CHANGE) { 349 + p = read_buf(xdr, sizeof(uint32_t)); 350 + if (unlikely(p == NULL)) { 351 + status = htonl(NFS4ERR_BADXDR); 352 + goto err; 353 + } 354 + dev->cbd_immediate = ntohl(*p++); 355 + } else { 356 + dev->cbd_immediate = 0; 357 + } 358 + 359 + args->ndevs++; 360 + 361 + dprintk("%s: type %d layout 0x%x immediate %d\n", 362 + __func__, dev->cbd_notify_type, dev->cbd_layout_type, 363 + dev->cbd_immediate); 364 + } 365 + out: 366 + dprintk("%s: status %d ndevs %d\n", 367 + __func__, ntohl(status), args->ndevs); 368 + return status; 369 + err: 370 + kfree(args->devs); 371 + goto out; 286 372 } 287 373 288 374 static __be32 decode_sessionid(struct xdr_stream *xdr, ··· 727 639 case OP_CB_RECALL_ANY: 728 640 case OP_CB_RECALL_SLOT: 729 641 case OP_CB_LAYOUTRECALL: 642 + case OP_CB_NOTIFY_DEVICEID: 730 643 *op = &callback_ops[op_nr]; 731 644 break; 732 645 733 - case OP_CB_NOTIFY_DEVICEID: 734 646 case OP_CB_NOTIFY: 735 647 case OP_CB_PUSH_DELEG: 736 648 case OP_CB_RECALLABLE_OBJ_AVAIL: ··· 936 848 .decode_args = 937 849 (callback_decode_arg_t)decode_layoutrecall_args, 938 850 .res_maxsize = CB_OP_LAYOUTRECALL_RES_MAXSZ, 851 + }, 852 + [OP_CB_NOTIFY_DEVICEID] = { 853 + .process_op = (callback_process_op_t)nfs4_callback_devicenotify, 854 + .decode_args = 855 + (callback_decode_arg_t)decode_devicenotify_args, 856 + .res_maxsize = CB_OP_DEVICENOTIFY_RES_MAXSZ, 939 857 }, 940 858 [OP_CB_SEQUENCE] = { 941 859 .process_op = (callback_process_op_t)nfs4_callback_sequence,
+2
fs/nfs/client.c
··· 290 290 if (clp->cl_machine_cred != NULL) 291 291 put_rpccred(clp->cl_machine_cred); 292 292 293 + nfs4_deviceid_purge_client(clp); 294 + 293 295 kfree(clp->cl_hostname); 294 296 kfree(clp); 295 297
+2 -7
fs/nfs/dir.c
··· 512 512 struct page **xdr_pages, struct page *page, unsigned int buflen) 513 513 { 514 514 struct xdr_stream stream; 515 - struct xdr_buf buf = { 516 - .pages = xdr_pages, 517 - .page_len = buflen, 518 - .buflen = buflen, 519 - .len = buflen, 520 - }; 515 + struct xdr_buf buf; 521 516 struct page *scratch; 522 517 struct nfs_cache_array *array; 523 518 unsigned int count = 0; ··· 522 527 if (scratch == NULL) 523 528 return -ENOMEM; 524 529 525 - xdr_init_decode(&stream, &buf, NULL); 530 + xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen); 526 531 xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); 527 532 528 533 do {
+2 -1
fs/nfs/inode.c
··· 1428 1428 */ 1429 1429 void nfs4_evict_inode(struct inode *inode) 1430 1430 { 1431 - pnfs_destroy_layout(NFS_I(inode)); 1432 1431 truncate_inode_pages(&inode->i_data, 0); 1433 1432 end_writeback(inode); 1433 + pnfs_return_layout(inode); 1434 + pnfs_destroy_layout(NFS_I(inode)); 1434 1435 /* If we are holding a delegation, return it! */ 1435 1436 nfs_inode_return_delegation_noreclaim(inode); 1436 1437 /* First call standard NFS clear_inode() code */
+1
fs/nfs/internal.h
··· 310 310 #endif 311 311 312 312 /* nfs4proc.c */ 313 + extern void __nfs4_read_done_cb(struct nfs_read_data *); 313 314 extern void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data); 314 315 extern int nfs4_init_client(struct nfs_client *clp, 315 316 const struct rpc_timeout *timeparms,
+23 -15
fs/nfs/nfs4filelayout.c
··· 421 421 struct nfs4_deviceid *id, 422 422 gfp_t gfp_flags) 423 423 { 424 + struct nfs4_deviceid_node *d; 424 425 struct nfs4_file_layout_dsaddr *dsaddr; 425 426 int status = -EINVAL; 426 427 struct nfs_server *nfss = NFS_SERVER(lo->plh_inode); ··· 429 428 dprintk("--> %s\n", __func__); 430 429 431 430 if (fl->pattern_offset > lgr->range.offset) { 432 - dprintk("%s pattern_offset %lld to large\n", 431 + dprintk("%s pattern_offset %lld too large\n", 433 432 __func__, fl->pattern_offset); 434 433 goto out; 435 434 } ··· 441 440 } 442 441 443 442 /* find and reference the deviceid */ 444 - dsaddr = nfs4_fl_find_get_deviceid(id); 445 - if (dsaddr == NULL) { 443 + d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode)->pnfs_curr_ld, 444 + NFS_SERVER(lo->plh_inode)->nfs_client, id); 445 + if (d == NULL) { 446 446 dsaddr = get_device_info(lo->plh_inode, id, gfp_flags); 447 447 if (dsaddr == NULL) 448 448 goto out; 449 - } 449 + } else 450 + dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node); 450 451 fl->dsaddr = dsaddr; 451 452 452 453 if (fl->first_stripe_index < 0 || ··· 510 507 gfp_t gfp_flags) 511 508 { 512 509 struct xdr_stream stream; 513 - struct xdr_buf buf = { 514 - .pages = lgr->layoutp->pages, 515 - .page_len = lgr->layoutp->len, 516 - .buflen = lgr->layoutp->len, 517 - .len = lgr->layoutp->len, 518 - }; 510 + struct xdr_buf buf; 519 511 struct page *scratch; 520 512 __be32 *p; 521 513 uint32_t nfl_util; ··· 522 524 if (!scratch) 523 525 return -ENOMEM; 524 526 525 - xdr_init_decode(&stream, &buf, NULL); 527 + xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, lgr->layoutp->len); 526 528 xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); 527 529 528 530 /* 20 = ufl_util (4), first_stripe_index (4), pattern_offset (8), ··· 533 535 534 536 memcpy(id, p, sizeof(*id)); 535 537 p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); 536 - print_deviceid(id); 538 + nfs4_print_deviceid(id); 537 539 538 540 nfl_util = be32_to_cpup(p++); 539 541 if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS) ··· 651 653 /* 652 654 * filelayout_pg_test(). Called by nfs_can_coalesce_requests() 653 655 * 654 - * return 1 : coalesce page 655 - * return 0 : don't coalesce page 656 + * return true : coalesce page 657 + * return false : don't coalesce page 656 658 */ 657 - int 659 + bool 658 660 filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, 659 661 struct nfs_page *req) 660 662 { 661 663 u64 p_stripe, r_stripe; 662 664 u32 stripe_unit; 665 + 666 + if (!pnfs_generic_pg_test(pgio, prev, req)) 667 + return 0; 663 668 664 669 if (!pgio->pg_lseg) 665 670 return 1; ··· 861 860 return -ENOMEM; 862 861 } 863 862 863 + static void 864 + filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d) 865 + { 866 + nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node)); 867 + } 868 + 864 869 static struct pnfs_layoutdriver_type filelayout_type = { 865 870 .id = LAYOUT_NFSV4_1_FILES, 866 871 .name = "LAYOUT_NFSV4_1_FILES", ··· 879 872 .commit_pagelist = filelayout_commit_pagelist, 880 873 .read_pagelist = filelayout_read_pagelist, 881 874 .write_pagelist = filelayout_write_pagelist, 875 + .free_deviceid_node = filelayout_free_deveiceid_node, 882 876 }; 883 877 884 878 static int __init nfs4filelayout_init(void)
+2 -6
fs/nfs/nfs4filelayout.h
··· 59 59 #define NFS4_DEVICE_ID_NEG_ENTRY 0x00000001 60 60 61 61 struct nfs4_file_layout_dsaddr { 62 - struct hlist_node node; 63 - struct nfs4_deviceid deviceid; 64 - atomic_t ref; 62 + struct nfs4_deviceid_node id_node; 65 63 unsigned long flags; 66 64 u32 stripe_count; 67 65 u8 *stripe_indices; ··· 93 95 nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j); 94 96 95 97 extern void print_ds(struct nfs4_pnfs_ds *ds); 96 - extern void print_deviceid(struct nfs4_deviceid *dev_id); 97 98 u32 nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset); 98 99 u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j); 99 100 struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, 100 101 u32 ds_idx); 101 - extern struct nfs4_file_layout_dsaddr * 102 - nfs4_fl_find_get_deviceid(struct nfs4_deviceid *dev_id); 103 102 extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); 103 + extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); 104 104 struct nfs4_file_layout_dsaddr * 105 105 get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags); 106 106
+18 -101
fs/nfs/nfs4filelayoutdev.c
··· 37 37 #define NFSDBG_FACILITY NFSDBG_PNFS_LD 38 38 39 39 /* 40 - * Device ID RCU cache. A device ID is unique per client ID and layout type. 41 - */ 42 - #define NFS4_FL_DEVICE_ID_HASH_BITS 5 43 - #define NFS4_FL_DEVICE_ID_HASH_SIZE (1 << NFS4_FL_DEVICE_ID_HASH_BITS) 44 - #define NFS4_FL_DEVICE_ID_HASH_MASK (NFS4_FL_DEVICE_ID_HASH_SIZE - 1) 45 - 46 - static inline u32 47 - nfs4_fl_deviceid_hash(struct nfs4_deviceid *id) 48 - { 49 - unsigned char *cptr = (unsigned char *)id->data; 50 - unsigned int nbytes = NFS4_DEVICEID4_SIZE; 51 - u32 x = 0; 52 - 53 - while (nbytes--) { 54 - x *= 37; 55 - x += *cptr++; 56 - } 57 - return x & NFS4_FL_DEVICE_ID_HASH_MASK; 58 - } 59 - 60 - static struct hlist_head filelayout_deviceid_cache[NFS4_FL_DEVICE_ID_HASH_SIZE]; 61 - static DEFINE_SPINLOCK(filelayout_deviceid_lock); 62 - 63 - /* 64 40 * Data server cache 65 41 * 66 42 * Data servers can be mapped to different device ids. ··· 63 87 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port), 64 88 atomic_read(&ds->ds_count), ds->ds_clp, 65 89 ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); 66 - } 67 - 68 - void 69 - print_ds_list(struct nfs4_file_layout_dsaddr *dsaddr) 70 - { 71 - int i; 72 - 73 - ifdebug(FACILITY) { 74 - printk("%s dsaddr->ds_num %d\n", __func__, 75 - dsaddr->ds_num); 76 - for (i = 0; i < dsaddr->ds_num; i++) 77 - print_ds(dsaddr->ds_list[i]); 78 - } 79 - } 80 - 81 - void print_deviceid(struct nfs4_deviceid *id) 82 - { 83 - u32 *p = (u32 *)id; 84 - 85 - dprintk("%s: device id= [%x%x%x%x]\n", __func__, 86 - p[0], p[1], p[2], p[3]); 87 90 } 88 91 89 92 /* nfs4_ds_cache_lock is held */ ··· 156 201 kfree(ds); 157 202 } 158 203 159 - static void 204 + void 160 205 nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) 161 206 { 162 207 struct nfs4_pnfs_ds *ds; 163 208 int i; 164 209 165 - print_deviceid(&dsaddr->deviceid); 210 + nfs4_print_deviceid(&dsaddr->id_node.deviceid); 166 211 167 212 for (i = 0; i < dsaddr->ds_num; i++) { 168 213 ds = dsaddr->ds_list[i]; ··· 308 353 u8 max_stripe_index; 309 354 struct nfs4_file_layout_dsaddr *dsaddr = NULL; 310 355 struct xdr_stream stream; 311 - struct xdr_buf buf = { 312 - .pages = pdev->pages, 313 - .page_len = pdev->pglen, 314 - .buflen = pdev->pglen, 315 - .len = pdev->pglen, 316 - }; 356 + struct xdr_buf buf; 317 357 struct page *scratch; 318 358 319 359 /* set up xdr stream */ ··· 316 366 if (!scratch) 317 367 goto out_err; 318 368 319 - xdr_init_decode(&stream, &buf, NULL); 369 + xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen); 320 370 xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); 321 371 322 372 /* Get the stripe count (number of stripe index) */ ··· 381 431 dsaddr->stripe_indices = stripe_indices; 382 432 stripe_indices = NULL; 383 433 dsaddr->ds_num = num; 384 - 385 - memcpy(&dsaddr->deviceid, &pdev->dev_id, sizeof(pdev->dev_id)); 434 + nfs4_init_deviceid_node(&dsaddr->id_node, 435 + NFS_SERVER(ino)->pnfs_curr_ld, 436 + NFS_SERVER(ino)->nfs_client, 437 + &pdev->dev_id); 386 438 387 439 for (i = 0; i < dsaddr->ds_num; i++) { 388 440 int j; ··· 457 505 static struct nfs4_file_layout_dsaddr * 458 506 decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_flags) 459 507 { 460 - struct nfs4_file_layout_dsaddr *d, *new; 461 - long hash; 508 + struct nfs4_deviceid_node *d; 509 + struct nfs4_file_layout_dsaddr *n, *new; 462 510 463 511 new = decode_device(inode, dev, gfp_flags); 464 512 if (!new) { ··· 467 515 return NULL; 468 516 } 469 517 470 - spin_lock(&filelayout_deviceid_lock); 471 - d = nfs4_fl_find_get_deviceid(&new->deviceid); 472 - if (d) { 473 - spin_unlock(&filelayout_deviceid_lock); 518 + d = nfs4_insert_deviceid_node(&new->id_node); 519 + n = container_of(d, struct nfs4_file_layout_dsaddr, id_node); 520 + if (n != new) { 474 521 nfs4_fl_free_deviceid(new); 475 - return d; 522 + return n; 476 523 } 477 - 478 - INIT_HLIST_NODE(&new->node); 479 - atomic_set(&new->ref, 1); 480 - hash = nfs4_fl_deviceid_hash(&new->deviceid); 481 - hlist_add_head_rcu(&new->node, &filelayout_deviceid_cache[hash]); 482 - spin_unlock(&filelayout_deviceid_lock); 483 524 484 525 return new; 485 526 } ··· 545 600 void 546 601 nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) 547 602 { 548 - if (atomic_dec_and_lock(&dsaddr->ref, &filelayout_deviceid_lock)) { 549 - hlist_del_rcu(&dsaddr->node); 550 - spin_unlock(&filelayout_deviceid_lock); 551 - 552 - synchronize_rcu(); 553 - nfs4_fl_free_deviceid(dsaddr); 554 - } 555 - } 556 - 557 - struct nfs4_file_layout_dsaddr * 558 - nfs4_fl_find_get_deviceid(struct nfs4_deviceid *id) 559 - { 560 - struct nfs4_file_layout_dsaddr *d; 561 - struct hlist_node *n; 562 - long hash = nfs4_fl_deviceid_hash(id); 563 - 564 - 565 - rcu_read_lock(); 566 - hlist_for_each_entry_rcu(d, n, &filelayout_deviceid_cache[hash], node) { 567 - if (!memcmp(&d->deviceid, id, sizeof(*id))) { 568 - if (!atomic_inc_not_zero(&d->ref)) 569 - goto fail; 570 - rcu_read_unlock(); 571 - return d; 572 - } 573 - } 574 - fail: 575 - rcu_read_unlock(); 576 - return NULL; 603 + nfs4_put_deviceid_node(&dsaddr->id_node); 577 604 } 578 605 579 606 /* ··· 593 676 filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr, 594 677 int err, u32 ds_addr) 595 678 { 596 - u32 *p = (u32 *)&dsaddr->deviceid; 679 + u32 *p = (u32 *)&dsaddr->id_node.deviceid; 597 680 598 681 printk(KERN_ERR "NFS: data server %x connection error %d." 599 682 " Deviceid [%x%x%x%x] marked out of use.\n", 600 683 ds_addr, err, p[0], p[1], p[2], p[3]); 601 684 602 - spin_lock(&filelayout_deviceid_lock); 685 + spin_lock(&nfs4_ds_cache_lock); 603 686 dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY; 604 - spin_unlock(&filelayout_deviceid_lock); 687 + spin_unlock(&nfs4_ds_cache_lock); 605 688 } 606 689 607 690 struct nfs4_pnfs_ds *
+95 -3
fs/nfs/nfs4proc.c
··· 2363 2363 struct nfs4_state *state = NULL; 2364 2364 int status; 2365 2365 2366 + if (pnfs_ld_layoutret_on_setattr(inode)) 2367 + pnfs_return_layout(inode); 2368 + 2366 2369 nfs_fattr_init(fattr); 2367 2370 2368 2371 /* Search for an existing open(O_WRITE) file */ ··· 3180 3177 return err; 3181 3178 } 3182 3179 3180 + void __nfs4_read_done_cb(struct nfs_read_data *data) 3181 + { 3182 + nfs_invalidate_atime(data->inode); 3183 + } 3184 + 3183 3185 static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) 3184 3186 { 3185 3187 struct nfs_server *server = NFS_SERVER(data->inode); ··· 3194 3186 return -EAGAIN; 3195 3187 } 3196 3188 3197 - nfs_invalidate_atime(data->inode); 3189 + __nfs4_read_done_cb(data); 3198 3190 if (task->tk_status > 0) 3199 3191 renew_lease(server, data->timestamp); 3200 3192 return 0; ··· 3208 3200 if (!nfs4_sequence_done(task, &data->res.seq_res)) 3209 3201 return -EAGAIN; 3210 3202 3211 - return data->read_done_cb(task, data); 3203 + return data->read_done_cb ? data->read_done_cb(task, data) : 3204 + nfs4_read_done_cb(task, data); 3212 3205 } 3213 3206 3214 3207 static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) ··· 3254 3245 { 3255 3246 if (!nfs4_sequence_done(task, &data->res.seq_res)) 3256 3247 return -EAGAIN; 3257 - return data->write_done_cb(task, data); 3248 + return data->write_done_cb ? data->write_done_cb(task, data) : 3249 + nfs4_write_done_cb(task, data); 3258 3250 } 3259 3251 3260 3252 /* Reset the the nfs_write_data to send the write to the MDS. */ ··· 5678 5668 status = pnfs_layout_process(lgp); 5679 5669 rpc_put_task(task); 5680 5670 dprintk("<-- %s status=%d\n", __func__, status); 5671 + return status; 5672 + } 5673 + 5674 + static void 5675 + nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata) 5676 + { 5677 + struct nfs4_layoutreturn *lrp = calldata; 5678 + 5679 + dprintk("--> %s\n", __func__); 5680 + if (nfs41_setup_sequence(lrp->clp->cl_session, &lrp->args.seq_args, 5681 + &lrp->res.seq_res, 0, task)) 5682 + return; 5683 + rpc_call_start(task); 5684 + } 5685 + 5686 + static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) 5687 + { 5688 + struct nfs4_layoutreturn *lrp = calldata; 5689 + struct nfs_server *server; 5690 + 5691 + dprintk("--> %s\n", __func__); 5692 + 5693 + if (!nfs4_sequence_done(task, &lrp->res.seq_res)) 5694 + return; 5695 + 5696 + server = NFS_SERVER(lrp->args.inode); 5697 + if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { 5698 + nfs_restart_rpc(task, lrp->clp); 5699 + return; 5700 + } 5701 + if (task->tk_status == 0) { 5702 + struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout; 5703 + 5704 + if (lrp->res.lrs_present) { 5705 + spin_lock(&lo->plh_inode->i_lock); 5706 + pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); 5707 + spin_unlock(&lo->plh_inode->i_lock); 5708 + } else 5709 + BUG_ON(!list_empty(&lo->plh_segs)); 5710 + } 5711 + dprintk("<-- %s\n", __func__); 5712 + } 5713 + 5714 + static void nfs4_layoutreturn_release(void *calldata) 5715 + { 5716 + struct nfs4_layoutreturn *lrp = calldata; 5717 + 5718 + dprintk("--> %s\n", __func__); 5719 + put_layout_hdr(NFS_I(lrp->args.inode)->layout); 5720 + kfree(calldata); 5721 + dprintk("<-- %s\n", __func__); 5722 + } 5723 + 5724 + static const struct rpc_call_ops nfs4_layoutreturn_call_ops = { 5725 + .rpc_call_prepare = nfs4_layoutreturn_prepare, 5726 + .rpc_call_done = nfs4_layoutreturn_done, 5727 + .rpc_release = nfs4_layoutreturn_release, 5728 + }; 5729 + 5730 + int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp) 5731 + { 5732 + struct rpc_task *task; 5733 + struct rpc_message msg = { 5734 + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTRETURN], 5735 + .rpc_argp = &lrp->args, 5736 + .rpc_resp = &lrp->res, 5737 + }; 5738 + struct rpc_task_setup task_setup_data = { 5739 + .rpc_client = lrp->clp->cl_rpcclient, 5740 + .rpc_message = &msg, 5741 + .callback_ops = &nfs4_layoutreturn_call_ops, 5742 + .callback_data = lrp, 5743 + }; 5744 + int status; 5745 + 5746 + dprintk("--> %s\n", __func__); 5747 + task = rpc_run_task(&task_setup_data); 5748 + if (IS_ERR(task)) 5749 + return PTR_ERR(task); 5750 + status = task->tk_status; 5751 + dprintk("<-- %s status=%d\n", __func__, status); 5752 + rpc_put_task(task); 5681 5753 return status; 5682 5754 } 5683 5755
+124 -10
fs/nfs/nfs4xdr.c
··· 338 338 1 /* layoutupdate4 layout type */ + \ 339 339 1 /* NULL filelayout layoutupdate4 payload */) 340 340 #define decode_layoutcommit_maxsz (op_decode_hdr_maxsz + 3) 341 - 341 + #define encode_layoutreturn_maxsz (8 + op_encode_hdr_maxsz + \ 342 + encode_stateid_maxsz + \ 343 + 1 /* FIXME: opaque lrf_body always empty at the moment */) 344 + #define decode_layoutreturn_maxsz (op_decode_hdr_maxsz + \ 345 + 1 + decode_stateid_maxsz) 342 346 #else /* CONFIG_NFS_V4_1 */ 343 347 #define encode_sequence_maxsz 0 344 348 #define decode_sequence_maxsz 0 ··· 764 760 decode_putfh_maxsz + \ 765 761 decode_layoutcommit_maxsz + \ 766 762 decode_getattr_maxsz) 767 - 763 + #define NFS4_enc_layoutreturn_sz (compound_encode_hdr_maxsz + \ 764 + encode_sequence_maxsz + \ 765 + encode_putfh_maxsz + \ 766 + encode_layoutreturn_maxsz) 767 + #define NFS4_dec_layoutreturn_sz (compound_decode_hdr_maxsz + \ 768 + decode_sequence_maxsz + \ 769 + decode_putfh_maxsz + \ 770 + decode_layoutreturn_maxsz) 768 771 769 772 const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + 770 773 compound_encode_hdr_maxsz + ··· 1875 1864 1876 1865 static int 1877 1866 encode_layoutcommit(struct xdr_stream *xdr, 1867 + struct inode *inode, 1878 1868 const struct nfs4_layoutcommit_args *args, 1879 1869 struct compound_hdr *hdr) 1880 1870 { ··· 1884 1872 dprintk("%s: lbw: %llu type: %d\n", __func__, args->lastbytewritten, 1885 1873 NFS_SERVER(args->inode)->pnfs_curr_ld->id); 1886 1874 1887 - p = reserve_space(xdr, 48 + NFS4_STATEID_SIZE); 1875 + p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE); 1888 1876 *p++ = cpu_to_be32(OP_LAYOUTCOMMIT); 1889 1877 /* Only whole file layouts */ 1890 1878 p = xdr_encode_hyper(p, 0); /* offset */ ··· 1895 1883 p = xdr_encode_hyper(p, args->lastbytewritten); 1896 1884 *p++ = cpu_to_be32(0); /* Never send time_modify_changed */ 1897 1885 *p++ = cpu_to_be32(NFS_SERVER(args->inode)->pnfs_curr_ld->id);/* type */ 1898 - *p++ = cpu_to_be32(0); /* no file layout payload */ 1886 + 1887 + if (NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit) 1888 + NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit( 1889 + NFS_I(inode)->layout, xdr, args); 1890 + else { 1891 + p = reserve_space(xdr, 4); 1892 + *p = cpu_to_be32(0); /* no layout-type payload */ 1893 + } 1899 1894 1900 1895 hdr->nops++; 1901 1896 hdr->replen += decode_layoutcommit_maxsz; 1902 1897 return 0; 1898 + } 1899 + 1900 + static void 1901 + encode_layoutreturn(struct xdr_stream *xdr, 1902 + const struct nfs4_layoutreturn_args *args, 1903 + struct compound_hdr *hdr) 1904 + { 1905 + __be32 *p; 1906 + 1907 + p = reserve_space(xdr, 20); 1908 + *p++ = cpu_to_be32(OP_LAYOUTRETURN); 1909 + *p++ = cpu_to_be32(0); /* reclaim. always 0 for now */ 1910 + *p++ = cpu_to_be32(args->layout_type); 1911 + *p++ = cpu_to_be32(IOMODE_ANY); 1912 + *p = cpu_to_be32(RETURN_FILE); 1913 + p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE); 1914 + p = xdr_encode_hyper(p, 0); 1915 + p = xdr_encode_hyper(p, NFS4_MAX_UINT64); 1916 + spin_lock(&args->inode->i_lock); 1917 + xdr_encode_opaque_fixed(p, &args->stateid.data, NFS4_STATEID_SIZE); 1918 + spin_unlock(&args->inode->i_lock); 1919 + if (NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn) { 1920 + NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn( 1921 + NFS_I(args->inode)->layout, xdr, args); 1922 + } else { 1923 + p = reserve_space(xdr, 4); 1924 + *p = cpu_to_be32(0); 1925 + } 1926 + hdr->nops++; 1927 + hdr->replen += decode_layoutreturn_maxsz; 1903 1928 } 1904 1929 #endif /* CONFIG_NFS_V4_1 */ 1905 1930 ··· 2755 2706 /* 2756 2707 * Encode LAYOUTCOMMIT request 2757 2708 */ 2758 - static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req, 2759 - struct xdr_stream *xdr, 2760 - struct nfs4_layoutcommit_args *args) 2709 + static void nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req, 2710 + struct xdr_stream *xdr, 2711 + struct nfs4_layoutcommit_args *args) 2712 + { 2713 + struct nfs4_layoutcommit_data *data = 2714 + container_of(args, struct nfs4_layoutcommit_data, args); 2715 + struct compound_hdr hdr = { 2716 + .minorversion = nfs4_xdr_minorversion(&args->seq_args), 2717 + }; 2718 + 2719 + encode_compound_hdr(xdr, req, &hdr); 2720 + encode_sequence(xdr, &args->seq_args, &hdr); 2721 + encode_putfh(xdr, NFS_FH(args->inode), &hdr); 2722 + encode_layoutcommit(xdr, data->args.inode, args, &hdr); 2723 + encode_getfattr(xdr, args->bitmask, &hdr); 2724 + encode_nops(&hdr); 2725 + } 2726 + 2727 + /* 2728 + * Encode LAYOUTRETURN request 2729 + */ 2730 + static void nfs4_xdr_enc_layoutreturn(struct rpc_rqst *req, 2731 + struct xdr_stream *xdr, 2732 + struct nfs4_layoutreturn_args *args) 2761 2733 { 2762 2734 struct compound_hdr hdr = { 2763 2735 .minorversion = nfs4_xdr_minorversion(&args->seq_args), ··· 2787 2717 encode_compound_hdr(xdr, req, &hdr); 2788 2718 encode_sequence(xdr, &args->seq_args, &hdr); 2789 2719 encode_putfh(xdr, NFS_FH(args->inode), &hdr); 2790 - encode_layoutcommit(xdr, args, &hdr); 2791 - encode_getfattr(xdr, args->bitmask, &hdr); 2720 + encode_layoutreturn(xdr, args, &hdr); 2792 2721 encode_nops(&hdr); 2793 - return 0; 2794 2722 } 2795 2723 #endif /* CONFIG_NFS_V4_1 */ 2796 2724 ··· 5271 5203 return -EIO; 5272 5204 } 5273 5205 5206 + static int decode_layoutreturn(struct xdr_stream *xdr, 5207 + struct nfs4_layoutreturn_res *res) 5208 + { 5209 + __be32 *p; 5210 + int status; 5211 + 5212 + status = decode_op_hdr(xdr, OP_LAYOUTRETURN); 5213 + if (status) 5214 + return status; 5215 + p = xdr_inline_decode(xdr, 4); 5216 + if (unlikely(!p)) 5217 + goto out_overflow; 5218 + res->lrs_present = be32_to_cpup(p); 5219 + if (res->lrs_present) 5220 + status = decode_stateid(xdr, &res->stateid); 5221 + return status; 5222 + out_overflow: 5223 + print_overflow_msg(__func__, xdr); 5224 + return -EIO; 5225 + } 5226 + 5274 5227 static int decode_layoutcommit(struct xdr_stream *xdr, 5275 5228 struct rpc_rqst *req, 5276 5229 struct nfs4_layoutcommit_res *res) ··· 6409 6320 } 6410 6321 6411 6322 /* 6323 + * Decode LAYOUTRETURN response 6324 + */ 6325 + static int nfs4_xdr_dec_layoutreturn(struct rpc_rqst *rqstp, 6326 + struct xdr_stream *xdr, 6327 + struct nfs4_layoutreturn_res *res) 6328 + { 6329 + struct compound_hdr hdr; 6330 + int status; 6331 + 6332 + status = decode_compound_hdr(xdr, &hdr); 6333 + if (status) 6334 + goto out; 6335 + status = decode_sequence(xdr, &res->seq_res, rqstp); 6336 + if (status) 6337 + goto out; 6338 + status = decode_putfh(xdr); 6339 + if (status) 6340 + goto out; 6341 + status = decode_layoutreturn(xdr, res); 6342 + out: 6343 + return status; 6344 + } 6345 + 6346 + /* 6412 6347 * Decode LAYOUTCOMMIT response 6413 6348 */ 6414 6349 static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp, ··· 6660 6547 PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo), 6661 6548 PROC(LAYOUTGET, enc_layoutget, dec_layoutget), 6662 6549 PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit), 6550 + PROC(LAYOUTRETURN, enc_layoutreturn, dec_layoutreturn), 6663 6551 #endif /* CONFIG_NFS_V4_1 */ 6664 6552 }; 6665 6553
+5
fs/nfs/objlayout/Kbuild
··· 1 + # 2 + # Makefile for the pNFS Objects Layout Driver kernel module 3 + # 4 + objlayoutdriver-y := objio_osd.o pnfs_osd_xdr_cli.o objlayout.o 5 + obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayoutdriver.o
+1057
fs/nfs/objlayout/objio_osd.c
··· 1 + /* 2 + * pNFS Objects layout implementation over open-osd initiator library 3 + * 4 + * Copyright (C) 2009 Panasas Inc. [year of first publication] 5 + * All rights reserved. 6 + * 7 + * Benny Halevy <bhalevy@panasas.com> 8 + * Boaz Harrosh <bharrosh@panasas.com> 9 + * 10 + * This program is free software; you can redistribute it and/or modify 11 + * it under the terms of the GNU General Public License version 2 12 + * See the file COPYING included with this distribution for more details. 13 + * 14 + * Redistribution and use in source and binary forms, with or without 15 + * modification, are permitted provided that the following conditions 16 + * are met: 17 + * 18 + * 1. Redistributions of source code must retain the above copyright 19 + * notice, this list of conditions and the following disclaimer. 20 + * 2. Redistributions in binary form must reproduce the above copyright 21 + * notice, this list of conditions and the following disclaimer in the 22 + * documentation and/or other materials provided with the distribution. 23 + * 3. Neither the name of the Panasas company nor the names of its 24 + * contributors may be used to endorse or promote products derived 25 + * from this software without specific prior written permission. 26 + * 27 + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED 28 + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 29 + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 30 + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 34 + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 35 + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 36 + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 37 + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 38 + */ 39 + 40 + #include <linux/module.h> 41 + #include <scsi/osd_initiator.h> 42 + 43 + #include "objlayout.h" 44 + 45 + #define NFSDBG_FACILITY NFSDBG_PNFS_LD 46 + 47 + #define _LLU(x) ((unsigned long long)x) 48 + 49 + enum { BIO_MAX_PAGES_KMALLOC = 50 + (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec), 51 + }; 52 + 53 + struct objio_dev_ent { 54 + struct nfs4_deviceid_node id_node; 55 + struct osd_dev *od; 56 + }; 57 + 58 + static void 59 + objio_free_deviceid_node(struct nfs4_deviceid_node *d) 60 + { 61 + struct objio_dev_ent *de = container_of(d, struct objio_dev_ent, id_node); 62 + 63 + dprintk("%s: free od=%p\n", __func__, de->od); 64 + osduld_put_device(de->od); 65 + kfree(de); 66 + } 67 + 68 + static struct objio_dev_ent *_dev_list_find(const struct nfs_server *nfss, 69 + const struct nfs4_deviceid *d_id) 70 + { 71 + struct nfs4_deviceid_node *d; 72 + struct objio_dev_ent *de; 73 + 74 + d = nfs4_find_get_deviceid(nfss->pnfs_curr_ld, nfss->nfs_client, d_id); 75 + if (!d) 76 + return NULL; 77 + 78 + de = container_of(d, struct objio_dev_ent, id_node); 79 + return de; 80 + } 81 + 82 + static struct objio_dev_ent * 83 + _dev_list_add(const struct nfs_server *nfss, 84 + const struct nfs4_deviceid *d_id, struct osd_dev *od, 85 + gfp_t gfp_flags) 86 + { 87 + struct nfs4_deviceid_node *d; 88 + struct objio_dev_ent *de = kzalloc(sizeof(*de), gfp_flags); 89 + struct objio_dev_ent *n; 90 + 91 + if (!de) { 92 + dprintk("%s: -ENOMEM od=%p\n", __func__, od); 93 + return NULL; 94 + } 95 + 96 + dprintk("%s: Adding od=%p\n", __func__, od); 97 + nfs4_init_deviceid_node(&de->id_node, 98 + nfss->pnfs_curr_ld, 99 + nfss->nfs_client, 100 + d_id); 101 + de->od = od; 102 + 103 + d = nfs4_insert_deviceid_node(&de->id_node); 104 + n = container_of(d, struct objio_dev_ent, id_node); 105 + if (n != de) { 106 + dprintk("%s: Race with other n->od=%p\n", __func__, n->od); 107 + objio_free_deviceid_node(&de->id_node); 108 + de = n; 109 + } 110 + 111 + atomic_inc(&de->id_node.ref); 112 + return de; 113 + } 114 + 115 + struct caps_buffers { 116 + u8 caps_key[OSD_CRYPTO_KEYID_SIZE]; 117 + u8 creds[OSD_CAP_LEN]; 118 + }; 119 + 120 + struct objio_segment { 121 + struct pnfs_layout_segment lseg; 122 + 123 + struct pnfs_osd_object_cred *comps; 124 + 125 + unsigned mirrors_p1; 126 + unsigned stripe_unit; 127 + unsigned group_width; /* Data stripe_units without integrity comps */ 128 + u64 group_depth; 129 + unsigned group_count; 130 + 131 + unsigned max_io_size; 132 + 133 + unsigned comps_index; 134 + unsigned num_comps; 135 + /* variable length */ 136 + struct objio_dev_ent *ods[]; 137 + }; 138 + 139 + static inline struct objio_segment * 140 + OBJIO_LSEG(struct pnfs_layout_segment *lseg) 141 + { 142 + return container_of(lseg, struct objio_segment, lseg); 143 + } 144 + 145 + struct objio_state; 146 + typedef ssize_t (*objio_done_fn)(struct objio_state *ios); 147 + 148 + struct objio_state { 149 + /* Generic layer */ 150 + struct objlayout_io_state ol_state; 151 + 152 + struct objio_segment *layout; 153 + 154 + struct kref kref; 155 + objio_done_fn done; 156 + void *private; 157 + 158 + unsigned long length; 159 + unsigned numdevs; /* Actually used devs in this IO */ 160 + /* A per-device variable array of size numdevs */ 161 + struct _objio_per_comp { 162 + struct bio *bio; 163 + struct osd_request *or; 164 + unsigned long length; 165 + u64 offset; 166 + unsigned dev; 167 + } per_dev[]; 168 + }; 169 + 170 + /* Send and wait for a get_device_info of devices in the layout, 171 + then look them up with the osd_initiator library */ 172 + static struct objio_dev_ent *_device_lookup(struct pnfs_layout_hdr *pnfslay, 173 + struct objio_segment *objio_seg, unsigned comp, 174 + gfp_t gfp_flags) 175 + { 176 + struct pnfs_osd_deviceaddr *deviceaddr; 177 + struct nfs4_deviceid *d_id; 178 + struct objio_dev_ent *ode; 179 + struct osd_dev *od; 180 + struct osd_dev_info odi; 181 + int err; 182 + 183 + d_id = &objio_seg->comps[comp].oc_object_id.oid_device_id; 184 + 185 + ode = _dev_list_find(NFS_SERVER(pnfslay->plh_inode), d_id); 186 + if (ode) 187 + return ode; 188 + 189 + err = objlayout_get_deviceinfo(pnfslay, d_id, &deviceaddr, gfp_flags); 190 + if (unlikely(err)) { 191 + dprintk("%s: objlayout_get_deviceinfo dev(%llx:%llx) =>%d\n", 192 + __func__, _DEVID_LO(d_id), _DEVID_HI(d_id), err); 193 + return ERR_PTR(err); 194 + } 195 + 196 + odi.systemid_len = deviceaddr->oda_systemid.len; 197 + if (odi.systemid_len > sizeof(odi.systemid)) { 198 + err = -EINVAL; 199 + goto out; 200 + } else if (odi.systemid_len) 201 + memcpy(odi.systemid, deviceaddr->oda_systemid.data, 202 + odi.systemid_len); 203 + odi.osdname_len = deviceaddr->oda_osdname.len; 204 + odi.osdname = (u8 *)deviceaddr->oda_osdname.data; 205 + 206 + if (!odi.osdname_len && !odi.systemid_len) { 207 + dprintk("%s: !odi.osdname_len && !odi.systemid_len\n", 208 + __func__); 209 + err = -ENODEV; 210 + goto out; 211 + } 212 + 213 + od = osduld_info_lookup(&odi); 214 + if (unlikely(IS_ERR(od))) { 215 + err = PTR_ERR(od); 216 + dprintk("%s: osduld_info_lookup => %d\n", __func__, err); 217 + goto out; 218 + } 219 + 220 + ode = _dev_list_add(NFS_SERVER(pnfslay->plh_inode), d_id, od, 221 + gfp_flags); 222 + 223 + out: 224 + dprintk("%s: return=%d\n", __func__, err); 225 + objlayout_put_deviceinfo(deviceaddr); 226 + return err ? ERR_PTR(err) : ode; 227 + } 228 + 229 + static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay, 230 + struct objio_segment *objio_seg, 231 + gfp_t gfp_flags) 232 + { 233 + unsigned i; 234 + int err; 235 + 236 + /* lookup all devices */ 237 + for (i = 0; i < objio_seg->num_comps; i++) { 238 + struct objio_dev_ent *ode; 239 + 240 + ode = _device_lookup(pnfslay, objio_seg, i, gfp_flags); 241 + if (unlikely(IS_ERR(ode))) { 242 + err = PTR_ERR(ode); 243 + goto out; 244 + } 245 + objio_seg->ods[i] = ode; 246 + } 247 + err = 0; 248 + 249 + out: 250 + dprintk("%s: return=%d\n", __func__, err); 251 + return err; 252 + } 253 + 254 + static int _verify_data_map(struct pnfs_osd_layout *layout) 255 + { 256 + struct pnfs_osd_data_map *data_map = &layout->olo_map; 257 + u64 stripe_length; 258 + u32 group_width; 259 + 260 + /* FIXME: Only raid0 for now. if not go through MDS */ 261 + if (data_map->odm_raid_algorithm != PNFS_OSD_RAID_0) { 262 + printk(KERN_ERR "Only RAID_0 for now\n"); 263 + return -ENOTSUPP; 264 + } 265 + if (0 != (data_map->odm_num_comps % (data_map->odm_mirror_cnt + 1))) { 266 + printk(KERN_ERR "Data Map wrong, num_comps=%u mirrors=%u\n", 267 + data_map->odm_num_comps, data_map->odm_mirror_cnt); 268 + return -EINVAL; 269 + } 270 + 271 + if (data_map->odm_group_width) 272 + group_width = data_map->odm_group_width; 273 + else 274 + group_width = data_map->odm_num_comps / 275 + (data_map->odm_mirror_cnt + 1); 276 + 277 + stripe_length = (u64)data_map->odm_stripe_unit * group_width; 278 + if (stripe_length >= (1ULL << 32)) { 279 + printk(KERN_ERR "Total Stripe length(0x%llx)" 280 + " >= 32bit is not supported\n", _LLU(stripe_length)); 281 + return -ENOTSUPP; 282 + } 283 + 284 + if (0 != (data_map->odm_stripe_unit & ~PAGE_MASK)) { 285 + printk(KERN_ERR "Stripe Unit(0x%llx)" 286 + " must be Multples of PAGE_SIZE(0x%lx)\n", 287 + _LLU(data_map->odm_stripe_unit), PAGE_SIZE); 288 + return -ENOTSUPP; 289 + } 290 + 291 + return 0; 292 + } 293 + 294 + static void copy_single_comp(struct pnfs_osd_object_cred *cur_comp, 295 + struct pnfs_osd_object_cred *src_comp, 296 + struct caps_buffers *caps_p) 297 + { 298 + WARN_ON(src_comp->oc_cap_key.cred_len > sizeof(caps_p->caps_key)); 299 + WARN_ON(src_comp->oc_cap.cred_len > sizeof(caps_p->creds)); 300 + 301 + *cur_comp = *src_comp; 302 + 303 + memcpy(caps_p->caps_key, src_comp->oc_cap_key.cred, 304 + sizeof(caps_p->caps_key)); 305 + cur_comp->oc_cap_key.cred = caps_p->caps_key; 306 + 307 + memcpy(caps_p->creds, src_comp->oc_cap.cred, 308 + sizeof(caps_p->creds)); 309 + cur_comp->oc_cap.cred = caps_p->creds; 310 + } 311 + 312 + int objio_alloc_lseg(struct pnfs_layout_segment **outp, 313 + struct pnfs_layout_hdr *pnfslay, 314 + struct pnfs_layout_range *range, 315 + struct xdr_stream *xdr, 316 + gfp_t gfp_flags) 317 + { 318 + struct objio_segment *objio_seg; 319 + struct pnfs_osd_xdr_decode_layout_iter iter; 320 + struct pnfs_osd_layout layout; 321 + struct pnfs_osd_object_cred *cur_comp, src_comp; 322 + struct caps_buffers *caps_p; 323 + int err; 324 + 325 + err = pnfs_osd_xdr_decode_layout_map(&layout, &iter, xdr); 326 + if (unlikely(err)) 327 + return err; 328 + 329 + err = _verify_data_map(&layout); 330 + if (unlikely(err)) 331 + return err; 332 + 333 + objio_seg = kzalloc(sizeof(*objio_seg) + 334 + sizeof(objio_seg->ods[0]) * layout.olo_num_comps + 335 + sizeof(*objio_seg->comps) * layout.olo_num_comps + 336 + sizeof(struct caps_buffers) * layout.olo_num_comps, 337 + gfp_flags); 338 + if (!objio_seg) 339 + return -ENOMEM; 340 + 341 + objio_seg->comps = (void *)(objio_seg->ods + layout.olo_num_comps); 342 + cur_comp = objio_seg->comps; 343 + caps_p = (void *)(cur_comp + layout.olo_num_comps); 344 + while (pnfs_osd_xdr_decode_layout_comp(&src_comp, &iter, xdr, &err)) 345 + copy_single_comp(cur_comp++, &src_comp, caps_p++); 346 + if (unlikely(err)) 347 + goto err; 348 + 349 + objio_seg->num_comps = layout.olo_num_comps; 350 + objio_seg->comps_index = layout.olo_comps_index; 351 + err = objio_devices_lookup(pnfslay, objio_seg, gfp_flags); 352 + if (err) 353 + goto err; 354 + 355 + objio_seg->mirrors_p1 = layout.olo_map.odm_mirror_cnt + 1; 356 + objio_seg->stripe_unit = layout.olo_map.odm_stripe_unit; 357 + if (layout.olo_map.odm_group_width) { 358 + objio_seg->group_width = layout.olo_map.odm_group_width; 359 + objio_seg->group_depth = layout.olo_map.odm_group_depth; 360 + objio_seg->group_count = layout.olo_map.odm_num_comps / 361 + objio_seg->mirrors_p1 / 362 + objio_seg->group_width; 363 + } else { 364 + objio_seg->group_width = layout.olo_map.odm_num_comps / 365 + objio_seg->mirrors_p1; 366 + objio_seg->group_depth = -1; 367 + objio_seg->group_count = 1; 368 + } 369 + 370 + /* Cache this calculation it will hit for every page */ 371 + objio_seg->max_io_size = (BIO_MAX_PAGES_KMALLOC * PAGE_SIZE - 372 + objio_seg->stripe_unit) * 373 + objio_seg->group_width; 374 + 375 + *outp = &objio_seg->lseg; 376 + return 0; 377 + 378 + err: 379 + kfree(objio_seg); 380 + dprintk("%s: Error: return %d\n", __func__, err); 381 + *outp = NULL; 382 + return err; 383 + } 384 + 385 + void objio_free_lseg(struct pnfs_layout_segment *lseg) 386 + { 387 + int i; 388 + struct objio_segment *objio_seg = OBJIO_LSEG(lseg); 389 + 390 + for (i = 0; i < objio_seg->num_comps; i++) { 391 + if (!objio_seg->ods[i]) 392 + break; 393 + nfs4_put_deviceid_node(&objio_seg->ods[i]->id_node); 394 + } 395 + kfree(objio_seg); 396 + } 397 + 398 + int objio_alloc_io_state(struct pnfs_layout_segment *lseg, 399 + struct objlayout_io_state **outp, 400 + gfp_t gfp_flags) 401 + { 402 + struct objio_segment *objio_seg = OBJIO_LSEG(lseg); 403 + struct objio_state *ios; 404 + const unsigned first_size = sizeof(*ios) + 405 + objio_seg->num_comps * sizeof(ios->per_dev[0]); 406 + const unsigned sec_size = objio_seg->num_comps * 407 + sizeof(ios->ol_state.ioerrs[0]); 408 + 409 + ios = kzalloc(first_size + sec_size, gfp_flags); 410 + if (unlikely(!ios)) 411 + return -ENOMEM; 412 + 413 + ios->layout = objio_seg; 414 + ios->ol_state.ioerrs = ((void *)ios) + first_size; 415 + ios->ol_state.num_comps = objio_seg->num_comps; 416 + 417 + *outp = &ios->ol_state; 418 + return 0; 419 + } 420 + 421 + void objio_free_io_state(struct objlayout_io_state *ol_state) 422 + { 423 + struct objio_state *ios = container_of(ol_state, struct objio_state, 424 + ol_state); 425 + 426 + kfree(ios); 427 + } 428 + 429 + enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep) 430 + { 431 + switch (oep) { 432 + case OSD_ERR_PRI_NO_ERROR: 433 + return (enum pnfs_osd_errno)0; 434 + 435 + case OSD_ERR_PRI_CLEAR_PAGES: 436 + BUG_ON(1); 437 + return 0; 438 + 439 + case OSD_ERR_PRI_RESOURCE: 440 + return PNFS_OSD_ERR_RESOURCE; 441 + case OSD_ERR_PRI_BAD_CRED: 442 + return PNFS_OSD_ERR_BAD_CRED; 443 + case OSD_ERR_PRI_NO_ACCESS: 444 + return PNFS_OSD_ERR_NO_ACCESS; 445 + case OSD_ERR_PRI_UNREACHABLE: 446 + return PNFS_OSD_ERR_UNREACHABLE; 447 + case OSD_ERR_PRI_NOT_FOUND: 448 + return PNFS_OSD_ERR_NOT_FOUND; 449 + case OSD_ERR_PRI_NO_SPACE: 450 + return PNFS_OSD_ERR_NO_SPACE; 451 + default: 452 + WARN_ON(1); 453 + /* fallthrough */ 454 + case OSD_ERR_PRI_EIO: 455 + return PNFS_OSD_ERR_EIO; 456 + } 457 + } 458 + 459 + static void _clear_bio(struct bio *bio) 460 + { 461 + struct bio_vec *bv; 462 + unsigned i; 463 + 464 + __bio_for_each_segment(bv, bio, i, 0) { 465 + unsigned this_count = bv->bv_len; 466 + 467 + if (likely(PAGE_SIZE == this_count)) 468 + clear_highpage(bv->bv_page); 469 + else 470 + zero_user(bv->bv_page, bv->bv_offset, this_count); 471 + } 472 + } 473 + 474 + static int _io_check(struct objio_state *ios, bool is_write) 475 + { 476 + enum osd_err_priority oep = OSD_ERR_PRI_NO_ERROR; 477 + int lin_ret = 0; 478 + int i; 479 + 480 + for (i = 0; i < ios->numdevs; i++) { 481 + struct osd_sense_info osi; 482 + struct osd_request *or = ios->per_dev[i].or; 483 + unsigned dev; 484 + int ret; 485 + 486 + if (!or) 487 + continue; 488 + 489 + ret = osd_req_decode_sense(or, &osi); 490 + if (likely(!ret)) 491 + continue; 492 + 493 + if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) { 494 + /* start read offset passed endof file */ 495 + BUG_ON(is_write); 496 + _clear_bio(ios->per_dev[i].bio); 497 + dprintk("%s: start read offset passed end of file " 498 + "offset=0x%llx, length=0x%lx\n", __func__, 499 + _LLU(ios->per_dev[i].offset), 500 + ios->per_dev[i].length); 501 + 502 + continue; /* we recovered */ 503 + } 504 + dev = ios->per_dev[i].dev; 505 + objlayout_io_set_result(&ios->ol_state, dev, 506 + &ios->layout->comps[dev].oc_object_id, 507 + osd_pri_2_pnfs_err(osi.osd_err_pri), 508 + ios->per_dev[i].offset, 509 + ios->per_dev[i].length, 510 + is_write); 511 + 512 + if (osi.osd_err_pri >= oep) { 513 + oep = osi.osd_err_pri; 514 + lin_ret = ret; 515 + } 516 + } 517 + 518 + return lin_ret; 519 + } 520 + 521 + /* 522 + * Common IO state helpers. 523 + */ 524 + static void _io_free(struct objio_state *ios) 525 + { 526 + unsigned i; 527 + 528 + for (i = 0; i < ios->numdevs; i++) { 529 + struct _objio_per_comp *per_dev = &ios->per_dev[i]; 530 + 531 + if (per_dev->or) { 532 + osd_end_request(per_dev->or); 533 + per_dev->or = NULL; 534 + } 535 + 536 + if (per_dev->bio) { 537 + bio_put(per_dev->bio); 538 + per_dev->bio = NULL; 539 + } 540 + } 541 + } 542 + 543 + struct osd_dev *_io_od(struct objio_state *ios, unsigned dev) 544 + { 545 + unsigned min_dev = ios->layout->comps_index; 546 + unsigned max_dev = min_dev + ios->layout->num_comps; 547 + 548 + BUG_ON(dev < min_dev || max_dev <= dev); 549 + return ios->layout->ods[dev - min_dev]->od; 550 + } 551 + 552 + struct _striping_info { 553 + u64 obj_offset; 554 + u64 group_length; 555 + unsigned dev; 556 + unsigned unit_off; 557 + }; 558 + 559 + static void _calc_stripe_info(struct objio_state *ios, u64 file_offset, 560 + struct _striping_info *si) 561 + { 562 + u32 stripe_unit = ios->layout->stripe_unit; 563 + u32 group_width = ios->layout->group_width; 564 + u64 group_depth = ios->layout->group_depth; 565 + u32 U = stripe_unit * group_width; 566 + 567 + u64 T = U * group_depth; 568 + u64 S = T * ios->layout->group_count; 569 + u64 M = div64_u64(file_offset, S); 570 + 571 + /* 572 + G = (L - (M * S)) / T 573 + H = (L - (M * S)) % T 574 + */ 575 + u64 LmodU = file_offset - M * S; 576 + u32 G = div64_u64(LmodU, T); 577 + u64 H = LmodU - G * T; 578 + 579 + u32 N = div_u64(H, U); 580 + 581 + div_u64_rem(file_offset, stripe_unit, &si->unit_off); 582 + si->obj_offset = si->unit_off + (N * stripe_unit) + 583 + (M * group_depth * stripe_unit); 584 + 585 + /* "H - (N * U)" is just "H % U" so it's bound to u32 */ 586 + si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width; 587 + si->dev *= ios->layout->mirrors_p1; 588 + 589 + si->group_length = T - H; 590 + } 591 + 592 + static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg, 593 + unsigned pgbase, struct _objio_per_comp *per_dev, int cur_len, 594 + gfp_t gfp_flags) 595 + { 596 + unsigned pg = *cur_pg; 597 + struct request_queue *q = 598 + osd_request_queue(_io_od(ios, per_dev->dev)); 599 + 600 + per_dev->length += cur_len; 601 + 602 + if (per_dev->bio == NULL) { 603 + unsigned stripes = ios->layout->num_comps / 604 + ios->layout->mirrors_p1; 605 + unsigned pages_in_stripe = stripes * 606 + (ios->layout->stripe_unit / PAGE_SIZE); 607 + unsigned bio_size = (ios->ol_state.nr_pages + pages_in_stripe) / 608 + stripes; 609 + 610 + if (BIO_MAX_PAGES_KMALLOC < bio_size) 611 + bio_size = BIO_MAX_PAGES_KMALLOC; 612 + 613 + per_dev->bio = bio_kmalloc(gfp_flags, bio_size); 614 + if (unlikely(!per_dev->bio)) { 615 + dprintk("Faild to allocate BIO size=%u\n", bio_size); 616 + return -ENOMEM; 617 + } 618 + } 619 + 620 + while (cur_len > 0) { 621 + unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len); 622 + unsigned added_len; 623 + 624 + BUG_ON(ios->ol_state.nr_pages <= pg); 625 + cur_len -= pglen; 626 + 627 + added_len = bio_add_pc_page(q, per_dev->bio, 628 + ios->ol_state.pages[pg], pglen, pgbase); 629 + if (unlikely(pglen != added_len)) 630 + return -ENOMEM; 631 + pgbase = 0; 632 + ++pg; 633 + } 634 + BUG_ON(cur_len); 635 + 636 + *cur_pg = pg; 637 + return 0; 638 + } 639 + 640 + static int _prepare_one_group(struct objio_state *ios, u64 length, 641 + struct _striping_info *si, unsigned *last_pg, 642 + gfp_t gfp_flags) 643 + { 644 + unsigned stripe_unit = ios->layout->stripe_unit; 645 + unsigned mirrors_p1 = ios->layout->mirrors_p1; 646 + unsigned devs_in_group = ios->layout->group_width * mirrors_p1; 647 + unsigned dev = si->dev; 648 + unsigned first_dev = dev - (dev % devs_in_group); 649 + unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0; 650 + unsigned cur_pg = *last_pg; 651 + int ret = 0; 652 + 653 + while (length) { 654 + struct _objio_per_comp *per_dev = &ios->per_dev[dev]; 655 + unsigned cur_len, page_off = 0; 656 + 657 + if (!per_dev->length) { 658 + per_dev->dev = dev; 659 + if (dev < si->dev) { 660 + per_dev->offset = si->obj_offset + stripe_unit - 661 + si->unit_off; 662 + cur_len = stripe_unit; 663 + } else if (dev == si->dev) { 664 + per_dev->offset = si->obj_offset; 665 + cur_len = stripe_unit - si->unit_off; 666 + page_off = si->unit_off & ~PAGE_MASK; 667 + BUG_ON(page_off && 668 + (page_off != ios->ol_state.pgbase)); 669 + } else { /* dev > si->dev */ 670 + per_dev->offset = si->obj_offset - si->unit_off; 671 + cur_len = stripe_unit; 672 + } 673 + 674 + if (max_comp < dev) 675 + max_comp = dev; 676 + } else { 677 + cur_len = stripe_unit; 678 + } 679 + if (cur_len >= length) 680 + cur_len = length; 681 + 682 + ret = _add_stripe_unit(ios, &cur_pg, page_off , per_dev, 683 + cur_len, gfp_flags); 684 + if (unlikely(ret)) 685 + goto out; 686 + 687 + dev += mirrors_p1; 688 + dev = (dev % devs_in_group) + first_dev; 689 + 690 + length -= cur_len; 691 + ios->length += cur_len; 692 + } 693 + out: 694 + ios->numdevs = max_comp + mirrors_p1; 695 + *last_pg = cur_pg; 696 + return ret; 697 + } 698 + 699 + static int _io_rw_pagelist(struct objio_state *ios, gfp_t gfp_flags) 700 + { 701 + u64 length = ios->ol_state.count; 702 + u64 offset = ios->ol_state.offset; 703 + struct _striping_info si; 704 + unsigned last_pg = 0; 705 + int ret = 0; 706 + 707 + while (length) { 708 + _calc_stripe_info(ios, offset, &si); 709 + 710 + if (length < si.group_length) 711 + si.group_length = length; 712 + 713 + ret = _prepare_one_group(ios, si.group_length, &si, &last_pg, gfp_flags); 714 + if (unlikely(ret)) 715 + goto out; 716 + 717 + offset += si.group_length; 718 + length -= si.group_length; 719 + } 720 + 721 + out: 722 + if (!ios->length) 723 + return ret; 724 + 725 + return 0; 726 + } 727 + 728 + static ssize_t _sync_done(struct objio_state *ios) 729 + { 730 + struct completion *waiting = ios->private; 731 + 732 + complete(waiting); 733 + return 0; 734 + } 735 + 736 + static void _last_io(struct kref *kref) 737 + { 738 + struct objio_state *ios = container_of(kref, struct objio_state, kref); 739 + 740 + ios->done(ios); 741 + } 742 + 743 + static void _done_io(struct osd_request *or, void *p) 744 + { 745 + struct objio_state *ios = p; 746 + 747 + kref_put(&ios->kref, _last_io); 748 + } 749 + 750 + static ssize_t _io_exec(struct objio_state *ios) 751 + { 752 + DECLARE_COMPLETION_ONSTACK(wait); 753 + ssize_t status = 0; /* sync status */ 754 + unsigned i; 755 + objio_done_fn saved_done_fn = ios->done; 756 + bool sync = ios->ol_state.sync; 757 + 758 + if (sync) { 759 + ios->done = _sync_done; 760 + ios->private = &wait; 761 + } 762 + 763 + kref_init(&ios->kref); 764 + 765 + for (i = 0; i < ios->numdevs; i++) { 766 + struct osd_request *or = ios->per_dev[i].or; 767 + 768 + if (!or) 769 + continue; 770 + 771 + kref_get(&ios->kref); 772 + osd_execute_request_async(or, _done_io, ios); 773 + } 774 + 775 + kref_put(&ios->kref, _last_io); 776 + 777 + if (sync) { 778 + wait_for_completion(&wait); 779 + status = saved_done_fn(ios); 780 + } 781 + 782 + return status; 783 + } 784 + 785 + /* 786 + * read 787 + */ 788 + static ssize_t _read_done(struct objio_state *ios) 789 + { 790 + ssize_t status; 791 + int ret = _io_check(ios, false); 792 + 793 + _io_free(ios); 794 + 795 + if (likely(!ret)) 796 + status = ios->length; 797 + else 798 + status = ret; 799 + 800 + objlayout_read_done(&ios->ol_state, status, ios->ol_state.sync); 801 + return status; 802 + } 803 + 804 + static int _read_mirrors(struct objio_state *ios, unsigned cur_comp) 805 + { 806 + struct osd_request *or = NULL; 807 + struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp]; 808 + unsigned dev = per_dev->dev; 809 + struct pnfs_osd_object_cred *cred = 810 + &ios->layout->comps[dev]; 811 + struct osd_obj_id obj = { 812 + .partition = cred->oc_object_id.oid_partition_id, 813 + .id = cred->oc_object_id.oid_object_id, 814 + }; 815 + int ret; 816 + 817 + or = osd_start_request(_io_od(ios, dev), GFP_KERNEL); 818 + if (unlikely(!or)) { 819 + ret = -ENOMEM; 820 + goto err; 821 + } 822 + per_dev->or = or; 823 + 824 + osd_req_read(or, &obj, per_dev->offset, per_dev->bio, per_dev->length); 825 + 826 + ret = osd_finalize_request(or, 0, cred->oc_cap.cred, NULL); 827 + if (ret) { 828 + dprintk("%s: Faild to osd_finalize_request() => %d\n", 829 + __func__, ret); 830 + goto err; 831 + } 832 + 833 + dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n", 834 + __func__, cur_comp, dev, obj.id, _LLU(per_dev->offset), 835 + per_dev->length); 836 + 837 + err: 838 + return ret; 839 + } 840 + 841 + static ssize_t _read_exec(struct objio_state *ios) 842 + { 843 + unsigned i; 844 + int ret; 845 + 846 + for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { 847 + if (!ios->per_dev[i].length) 848 + continue; 849 + ret = _read_mirrors(ios, i); 850 + if (unlikely(ret)) 851 + goto err; 852 + } 853 + 854 + ios->done = _read_done; 855 + return _io_exec(ios); /* In sync mode exec returns the io status */ 856 + 857 + err: 858 + _io_free(ios); 859 + return ret; 860 + } 861 + 862 + ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state) 863 + { 864 + struct objio_state *ios = container_of(ol_state, struct objio_state, 865 + ol_state); 866 + int ret; 867 + 868 + ret = _io_rw_pagelist(ios, GFP_KERNEL); 869 + if (unlikely(ret)) 870 + return ret; 871 + 872 + return _read_exec(ios); 873 + } 874 + 875 + /* 876 + * write 877 + */ 878 + static ssize_t _write_done(struct objio_state *ios) 879 + { 880 + ssize_t status; 881 + int ret = _io_check(ios, true); 882 + 883 + _io_free(ios); 884 + 885 + if (likely(!ret)) { 886 + /* FIXME: should be based on the OSD's persistence model 887 + * See OSD2r05 Section 4.13 Data persistence model */ 888 + ios->ol_state.committed = NFS_FILE_SYNC; 889 + status = ios->length; 890 + } else { 891 + status = ret; 892 + } 893 + 894 + objlayout_write_done(&ios->ol_state, status, ios->ol_state.sync); 895 + return status; 896 + } 897 + 898 + static int _write_mirrors(struct objio_state *ios, unsigned cur_comp) 899 + { 900 + struct _objio_per_comp *master_dev = &ios->per_dev[cur_comp]; 901 + unsigned dev = ios->per_dev[cur_comp].dev; 902 + unsigned last_comp = cur_comp + ios->layout->mirrors_p1; 903 + int ret; 904 + 905 + for (; cur_comp < last_comp; ++cur_comp, ++dev) { 906 + struct osd_request *or = NULL; 907 + struct pnfs_osd_object_cred *cred = 908 + &ios->layout->comps[dev]; 909 + struct osd_obj_id obj = { 910 + .partition = cred->oc_object_id.oid_partition_id, 911 + .id = cred->oc_object_id.oid_object_id, 912 + }; 913 + struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp]; 914 + struct bio *bio; 915 + 916 + or = osd_start_request(_io_od(ios, dev), GFP_NOFS); 917 + if (unlikely(!or)) { 918 + ret = -ENOMEM; 919 + goto err; 920 + } 921 + per_dev->or = or; 922 + 923 + if (per_dev != master_dev) { 924 + bio = bio_kmalloc(GFP_NOFS, 925 + master_dev->bio->bi_max_vecs); 926 + if (unlikely(!bio)) { 927 + dprintk("Faild to allocate BIO size=%u\n", 928 + master_dev->bio->bi_max_vecs); 929 + ret = -ENOMEM; 930 + goto err; 931 + } 932 + 933 + __bio_clone(bio, master_dev->bio); 934 + bio->bi_bdev = NULL; 935 + bio->bi_next = NULL; 936 + per_dev->bio = bio; 937 + per_dev->dev = dev; 938 + per_dev->length = master_dev->length; 939 + per_dev->offset = master_dev->offset; 940 + } else { 941 + bio = master_dev->bio; 942 + bio->bi_rw |= REQ_WRITE; 943 + } 944 + 945 + osd_req_write(or, &obj, per_dev->offset, bio, per_dev->length); 946 + 947 + ret = osd_finalize_request(or, 0, cred->oc_cap.cred, NULL); 948 + if (ret) { 949 + dprintk("%s: Faild to osd_finalize_request() => %d\n", 950 + __func__, ret); 951 + goto err; 952 + } 953 + 954 + dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n", 955 + __func__, cur_comp, dev, obj.id, _LLU(per_dev->offset), 956 + per_dev->length); 957 + } 958 + 959 + err: 960 + return ret; 961 + } 962 + 963 + static ssize_t _write_exec(struct objio_state *ios) 964 + { 965 + unsigned i; 966 + int ret; 967 + 968 + for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { 969 + if (!ios->per_dev[i].length) 970 + continue; 971 + ret = _write_mirrors(ios, i); 972 + if (unlikely(ret)) 973 + goto err; 974 + } 975 + 976 + ios->done = _write_done; 977 + return _io_exec(ios); /* In sync mode exec returns the io->status */ 978 + 979 + err: 980 + _io_free(ios); 981 + return ret; 982 + } 983 + 984 + ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state, bool stable) 985 + { 986 + struct objio_state *ios = container_of(ol_state, struct objio_state, 987 + ol_state); 988 + int ret; 989 + 990 + /* TODO: ios->stable = stable; */ 991 + ret = _io_rw_pagelist(ios, GFP_NOFS); 992 + if (unlikely(ret)) 993 + return ret; 994 + 995 + return _write_exec(ios); 996 + } 997 + 998 + static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, 999 + struct nfs_page *prev, struct nfs_page *req) 1000 + { 1001 + if (!pnfs_generic_pg_test(pgio, prev, req)) 1002 + return false; 1003 + 1004 + return pgio->pg_count + req->wb_bytes <= 1005 + OBJIO_LSEG(pgio->pg_lseg)->max_io_size; 1006 + } 1007 + 1008 + static struct pnfs_layoutdriver_type objlayout_type = { 1009 + .id = LAYOUT_OSD2_OBJECTS, 1010 + .name = "LAYOUT_OSD2_OBJECTS", 1011 + .flags = PNFS_LAYOUTRET_ON_SETATTR, 1012 + 1013 + .alloc_layout_hdr = objlayout_alloc_layout_hdr, 1014 + .free_layout_hdr = objlayout_free_layout_hdr, 1015 + 1016 + .alloc_lseg = objlayout_alloc_lseg, 1017 + .free_lseg = objlayout_free_lseg, 1018 + 1019 + .read_pagelist = objlayout_read_pagelist, 1020 + .write_pagelist = objlayout_write_pagelist, 1021 + .pg_test = objio_pg_test, 1022 + 1023 + .free_deviceid_node = objio_free_deviceid_node, 1024 + 1025 + .encode_layoutcommit = objlayout_encode_layoutcommit, 1026 + .encode_layoutreturn = objlayout_encode_layoutreturn, 1027 + }; 1028 + 1029 + MODULE_DESCRIPTION("pNFS Layout Driver for OSD2 objects"); 1030 + MODULE_AUTHOR("Benny Halevy <bhalevy@panasas.com>"); 1031 + MODULE_LICENSE("GPL"); 1032 + 1033 + static int __init 1034 + objlayout_init(void) 1035 + { 1036 + int ret = pnfs_register_layoutdriver(&objlayout_type); 1037 + 1038 + if (ret) 1039 + printk(KERN_INFO 1040 + "%s: Registering OSD pNFS Layout Driver failed: error=%d\n", 1041 + __func__, ret); 1042 + else 1043 + printk(KERN_INFO "%s: Registered OSD pNFS Layout Driver\n", 1044 + __func__); 1045 + return ret; 1046 + } 1047 + 1048 + static void __exit 1049 + objlayout_exit(void) 1050 + { 1051 + pnfs_unregister_layoutdriver(&objlayout_type); 1052 + printk(KERN_INFO "%s: Unregistered OSD pNFS Layout Driver\n", 1053 + __func__); 1054 + } 1055 + 1056 + module_init(objlayout_init); 1057 + module_exit(objlayout_exit);
+712
fs/nfs/objlayout/objlayout.c
··· 1 + /* 2 + * pNFS Objects layout driver high level definitions 3 + * 4 + * Copyright (C) 2007 Panasas Inc. [year of first publication] 5 + * All rights reserved. 6 + * 7 + * Benny Halevy <bhalevy@panasas.com> 8 + * Boaz Harrosh <bharrosh@panasas.com> 9 + * 10 + * This program is free software; you can redistribute it and/or modify 11 + * it under the terms of the GNU General Public License version 2 12 + * See the file COPYING included with this distribution for more details. 13 + * 14 + * Redistribution and use in source and binary forms, with or without 15 + * modification, are permitted provided that the following conditions 16 + * are met: 17 + * 18 + * 1. Redistributions of source code must retain the above copyright 19 + * notice, this list of conditions and the following disclaimer. 20 + * 2. Redistributions in binary form must reproduce the above copyright 21 + * notice, this list of conditions and the following disclaimer in the 22 + * documentation and/or other materials provided with the distribution. 23 + * 3. Neither the name of the Panasas company nor the names of its 24 + * contributors may be used to endorse or promote products derived 25 + * from this software without specific prior written permission. 26 + * 27 + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED 28 + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 29 + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 30 + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 34 + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 35 + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 36 + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 37 + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 38 + */ 39 + 40 + #include <scsi/osd_initiator.h> 41 + #include "objlayout.h" 42 + 43 + #define NFSDBG_FACILITY NFSDBG_PNFS_LD 44 + /* 45 + * Create a objlayout layout structure for the given inode and return it. 46 + */ 47 + struct pnfs_layout_hdr * 48 + objlayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) 49 + { 50 + struct objlayout *objlay; 51 + 52 + objlay = kzalloc(sizeof(struct objlayout), gfp_flags); 53 + if (objlay) { 54 + spin_lock_init(&objlay->lock); 55 + INIT_LIST_HEAD(&objlay->err_list); 56 + } 57 + dprintk("%s: Return %p\n", __func__, objlay); 58 + return &objlay->pnfs_layout; 59 + } 60 + 61 + /* 62 + * Free an objlayout layout structure 63 + */ 64 + void 65 + objlayout_free_layout_hdr(struct pnfs_layout_hdr *lo) 66 + { 67 + struct objlayout *objlay = OBJLAYOUT(lo); 68 + 69 + dprintk("%s: objlay %p\n", __func__, objlay); 70 + 71 + WARN_ON(!list_empty(&objlay->err_list)); 72 + kfree(objlay); 73 + } 74 + 75 + /* 76 + * Unmarshall layout and store it in pnfslay. 77 + */ 78 + struct pnfs_layout_segment * 79 + objlayout_alloc_lseg(struct pnfs_layout_hdr *pnfslay, 80 + struct nfs4_layoutget_res *lgr, 81 + gfp_t gfp_flags) 82 + { 83 + int status = -ENOMEM; 84 + struct xdr_stream stream; 85 + struct xdr_buf buf = { 86 + .pages = lgr->layoutp->pages, 87 + .page_len = lgr->layoutp->len, 88 + .buflen = lgr->layoutp->len, 89 + .len = lgr->layoutp->len, 90 + }; 91 + struct page *scratch; 92 + struct pnfs_layout_segment *lseg; 93 + 94 + dprintk("%s: Begin pnfslay %p\n", __func__, pnfslay); 95 + 96 + scratch = alloc_page(gfp_flags); 97 + if (!scratch) 98 + goto err_nofree; 99 + 100 + xdr_init_decode(&stream, &buf, NULL); 101 + xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); 102 + 103 + status = objio_alloc_lseg(&lseg, pnfslay, &lgr->range, &stream, gfp_flags); 104 + if (unlikely(status)) { 105 + dprintk("%s: objio_alloc_lseg Return err %d\n", __func__, 106 + status); 107 + goto err; 108 + } 109 + 110 + __free_page(scratch); 111 + 112 + dprintk("%s: Return %p\n", __func__, lseg); 113 + return lseg; 114 + 115 + err: 116 + __free_page(scratch); 117 + err_nofree: 118 + dprintk("%s: Err Return=>%d\n", __func__, status); 119 + return ERR_PTR(status); 120 + } 121 + 122 + /* 123 + * Free a layout segement 124 + */ 125 + void 126 + objlayout_free_lseg(struct pnfs_layout_segment *lseg) 127 + { 128 + dprintk("%s: freeing layout segment %p\n", __func__, lseg); 129 + 130 + if (unlikely(!lseg)) 131 + return; 132 + 133 + objio_free_lseg(lseg); 134 + } 135 + 136 + /* 137 + * I/O Operations 138 + */ 139 + static inline u64 140 + end_offset(u64 start, u64 len) 141 + { 142 + u64 end; 143 + 144 + end = start + len; 145 + return end >= start ? end : NFS4_MAX_UINT64; 146 + } 147 + 148 + /* last octet in a range */ 149 + static inline u64 150 + last_byte_offset(u64 start, u64 len) 151 + { 152 + u64 end; 153 + 154 + BUG_ON(!len); 155 + end = start + len; 156 + return end > start ? end - 1 : NFS4_MAX_UINT64; 157 + } 158 + 159 + static struct objlayout_io_state * 160 + objlayout_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, 161 + struct page **pages, 162 + unsigned pgbase, 163 + loff_t offset, 164 + size_t count, 165 + struct pnfs_layout_segment *lseg, 166 + void *rpcdata, 167 + gfp_t gfp_flags) 168 + { 169 + struct objlayout_io_state *state; 170 + u64 lseg_end_offset; 171 + 172 + dprintk("%s: allocating io_state\n", __func__); 173 + if (objio_alloc_io_state(lseg, &state, gfp_flags)) 174 + return NULL; 175 + 176 + BUG_ON(offset < lseg->pls_range.offset); 177 + lseg_end_offset = end_offset(lseg->pls_range.offset, 178 + lseg->pls_range.length); 179 + BUG_ON(offset >= lseg_end_offset); 180 + if (offset + count > lseg_end_offset) { 181 + count = lseg->pls_range.length - 182 + (offset - lseg->pls_range.offset); 183 + dprintk("%s: truncated count %Zd\n", __func__, count); 184 + } 185 + 186 + if (pgbase > PAGE_SIZE) { 187 + pages += pgbase >> PAGE_SHIFT; 188 + pgbase &= ~PAGE_MASK; 189 + } 190 + 191 + INIT_LIST_HEAD(&state->err_list); 192 + state->lseg = lseg; 193 + state->rpcdata = rpcdata; 194 + state->pages = pages; 195 + state->pgbase = pgbase; 196 + state->nr_pages = (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT; 197 + state->offset = offset; 198 + state->count = count; 199 + state->sync = 0; 200 + 201 + return state; 202 + } 203 + 204 + static void 205 + objlayout_free_io_state(struct objlayout_io_state *state) 206 + { 207 + dprintk("%s: freeing io_state\n", __func__); 208 + if (unlikely(!state)) 209 + return; 210 + 211 + objio_free_io_state(state); 212 + } 213 + 214 + /* 215 + * I/O done common code 216 + */ 217 + static void 218 + objlayout_iodone(struct objlayout_io_state *state) 219 + { 220 + dprintk("%s: state %p status\n", __func__, state); 221 + 222 + if (likely(state->status >= 0)) { 223 + objlayout_free_io_state(state); 224 + } else { 225 + struct objlayout *objlay = OBJLAYOUT(state->lseg->pls_layout); 226 + 227 + spin_lock(&objlay->lock); 228 + objlay->delta_space_valid = OBJ_DSU_INVALID; 229 + list_add(&objlay->err_list, &state->err_list); 230 + spin_unlock(&objlay->lock); 231 + } 232 + } 233 + 234 + /* 235 + * objlayout_io_set_result - Set an osd_error code on a specific osd comp. 236 + * 237 + * The @index component IO failed (error returned from target). Register 238 + * the error for later reporting at layout-return. 239 + */ 240 + void 241 + objlayout_io_set_result(struct objlayout_io_state *state, unsigned index, 242 + struct pnfs_osd_objid *pooid, int osd_error, 243 + u64 offset, u64 length, bool is_write) 244 + { 245 + struct pnfs_osd_ioerr *ioerr = &state->ioerrs[index]; 246 + 247 + BUG_ON(index >= state->num_comps); 248 + if (osd_error) { 249 + ioerr->oer_component = *pooid; 250 + ioerr->oer_comp_offset = offset; 251 + ioerr->oer_comp_length = length; 252 + ioerr->oer_iswrite = is_write; 253 + ioerr->oer_errno = osd_error; 254 + 255 + dprintk("%s: err[%d]: errno=%d is_write=%d dev(%llx:%llx) " 256 + "par=0x%llx obj=0x%llx offset=0x%llx length=0x%llx\n", 257 + __func__, index, ioerr->oer_errno, 258 + ioerr->oer_iswrite, 259 + _DEVID_LO(&ioerr->oer_component.oid_device_id), 260 + _DEVID_HI(&ioerr->oer_component.oid_device_id), 261 + ioerr->oer_component.oid_partition_id, 262 + ioerr->oer_component.oid_object_id, 263 + ioerr->oer_comp_offset, 264 + ioerr->oer_comp_length); 265 + } else { 266 + /* User need not call if no error is reported */ 267 + ioerr->oer_errno = 0; 268 + } 269 + } 270 + 271 + /* Function scheduled on rpc workqueue to call ->nfs_readlist_complete(). 272 + * This is because the osd completion is called with ints-off from 273 + * the block layer 274 + */ 275 + static void _rpc_read_complete(struct work_struct *work) 276 + { 277 + struct rpc_task *task; 278 + struct nfs_read_data *rdata; 279 + 280 + dprintk("%s enter\n", __func__); 281 + task = container_of(work, struct rpc_task, u.tk_work); 282 + rdata = container_of(task, struct nfs_read_data, task); 283 + 284 + pnfs_ld_read_done(rdata); 285 + } 286 + 287 + void 288 + objlayout_read_done(struct objlayout_io_state *state, ssize_t status, bool sync) 289 + { 290 + int eof = state->eof; 291 + struct nfs_read_data *rdata; 292 + 293 + state->status = status; 294 + dprintk("%s: Begin status=%ld eof=%d\n", __func__, status, eof); 295 + rdata = state->rpcdata; 296 + rdata->task.tk_status = status; 297 + if (status >= 0) { 298 + rdata->res.count = status; 299 + rdata->res.eof = eof; 300 + } 301 + objlayout_iodone(state); 302 + /* must not use state after this point */ 303 + 304 + if (sync) 305 + pnfs_ld_read_done(rdata); 306 + else { 307 + INIT_WORK(&rdata->task.u.tk_work, _rpc_read_complete); 308 + schedule_work(&rdata->task.u.tk_work); 309 + } 310 + } 311 + 312 + /* 313 + * Perform sync or async reads. 314 + */ 315 + enum pnfs_try_status 316 + objlayout_read_pagelist(struct nfs_read_data *rdata) 317 + { 318 + loff_t offset = rdata->args.offset; 319 + size_t count = rdata->args.count; 320 + struct objlayout_io_state *state; 321 + ssize_t status = 0; 322 + loff_t eof; 323 + 324 + dprintk("%s: Begin inode %p offset %llu count %d\n", 325 + __func__, rdata->inode, offset, (int)count); 326 + 327 + eof = i_size_read(rdata->inode); 328 + if (unlikely(offset + count > eof)) { 329 + if (offset >= eof) { 330 + status = 0; 331 + rdata->res.count = 0; 332 + rdata->res.eof = 1; 333 + goto out; 334 + } 335 + count = eof - offset; 336 + } 337 + 338 + state = objlayout_alloc_io_state(NFS_I(rdata->inode)->layout, 339 + rdata->args.pages, rdata->args.pgbase, 340 + offset, count, 341 + rdata->lseg, rdata, 342 + GFP_KERNEL); 343 + if (unlikely(!state)) { 344 + status = -ENOMEM; 345 + goto out; 346 + } 347 + 348 + state->eof = state->offset + state->count >= eof; 349 + 350 + status = objio_read_pagelist(state); 351 + out: 352 + dprintk("%s: Return status %Zd\n", __func__, status); 353 + rdata->pnfs_error = status; 354 + return PNFS_ATTEMPTED; 355 + } 356 + 357 + /* Function scheduled on rpc workqueue to call ->nfs_writelist_complete(). 358 + * This is because the osd completion is called with ints-off from 359 + * the block layer 360 + */ 361 + static void _rpc_write_complete(struct work_struct *work) 362 + { 363 + struct rpc_task *task; 364 + struct nfs_write_data *wdata; 365 + 366 + dprintk("%s enter\n", __func__); 367 + task = container_of(work, struct rpc_task, u.tk_work); 368 + wdata = container_of(task, struct nfs_write_data, task); 369 + 370 + pnfs_ld_write_done(wdata); 371 + } 372 + 373 + void 374 + objlayout_write_done(struct objlayout_io_state *state, ssize_t status, 375 + bool sync) 376 + { 377 + struct nfs_write_data *wdata; 378 + 379 + dprintk("%s: Begin\n", __func__); 380 + wdata = state->rpcdata; 381 + state->status = status; 382 + wdata->task.tk_status = status; 383 + if (status >= 0) { 384 + wdata->res.count = status; 385 + wdata->verf.committed = state->committed; 386 + dprintk("%s: Return status %d committed %d\n", 387 + __func__, wdata->task.tk_status, 388 + wdata->verf.committed); 389 + } else 390 + dprintk("%s: Return status %d\n", 391 + __func__, wdata->task.tk_status); 392 + objlayout_iodone(state); 393 + /* must not use state after this point */ 394 + 395 + if (sync) 396 + pnfs_ld_write_done(wdata); 397 + else { 398 + INIT_WORK(&wdata->task.u.tk_work, _rpc_write_complete); 399 + schedule_work(&wdata->task.u.tk_work); 400 + } 401 + } 402 + 403 + /* 404 + * Perform sync or async writes. 405 + */ 406 + enum pnfs_try_status 407 + objlayout_write_pagelist(struct nfs_write_data *wdata, 408 + int how) 409 + { 410 + struct objlayout_io_state *state; 411 + ssize_t status; 412 + 413 + dprintk("%s: Begin inode %p offset %llu count %u\n", 414 + __func__, wdata->inode, wdata->args.offset, wdata->args.count); 415 + 416 + state = objlayout_alloc_io_state(NFS_I(wdata->inode)->layout, 417 + wdata->args.pages, 418 + wdata->args.pgbase, 419 + wdata->args.offset, 420 + wdata->args.count, 421 + wdata->lseg, wdata, 422 + GFP_NOFS); 423 + if (unlikely(!state)) { 424 + status = -ENOMEM; 425 + goto out; 426 + } 427 + 428 + state->sync = how & FLUSH_SYNC; 429 + 430 + status = objio_write_pagelist(state, how & FLUSH_STABLE); 431 + out: 432 + dprintk("%s: Return status %Zd\n", __func__, status); 433 + wdata->pnfs_error = status; 434 + return PNFS_ATTEMPTED; 435 + } 436 + 437 + void 438 + objlayout_encode_layoutcommit(struct pnfs_layout_hdr *pnfslay, 439 + struct xdr_stream *xdr, 440 + const struct nfs4_layoutcommit_args *args) 441 + { 442 + struct objlayout *objlay = OBJLAYOUT(pnfslay); 443 + struct pnfs_osd_layoutupdate lou; 444 + __be32 *start; 445 + 446 + dprintk("%s: Begin\n", __func__); 447 + 448 + spin_lock(&objlay->lock); 449 + lou.dsu_valid = (objlay->delta_space_valid == OBJ_DSU_VALID); 450 + lou.dsu_delta = objlay->delta_space_used; 451 + objlay->delta_space_used = 0; 452 + objlay->delta_space_valid = OBJ_DSU_INIT; 453 + lou.olu_ioerr_flag = !list_empty(&objlay->err_list); 454 + spin_unlock(&objlay->lock); 455 + 456 + start = xdr_reserve_space(xdr, 4); 457 + 458 + BUG_ON(pnfs_osd_xdr_encode_layoutupdate(xdr, &lou)); 459 + 460 + *start = cpu_to_be32((xdr->p - start - 1) * 4); 461 + 462 + dprintk("%s: Return delta_space_used %lld err %d\n", __func__, 463 + lou.dsu_delta, lou.olu_ioerr_flag); 464 + } 465 + 466 + static int 467 + err_prio(u32 oer_errno) 468 + { 469 + switch (oer_errno) { 470 + case 0: 471 + return 0; 472 + 473 + case PNFS_OSD_ERR_RESOURCE: 474 + return OSD_ERR_PRI_RESOURCE; 475 + case PNFS_OSD_ERR_BAD_CRED: 476 + return OSD_ERR_PRI_BAD_CRED; 477 + case PNFS_OSD_ERR_NO_ACCESS: 478 + return OSD_ERR_PRI_NO_ACCESS; 479 + case PNFS_OSD_ERR_UNREACHABLE: 480 + return OSD_ERR_PRI_UNREACHABLE; 481 + case PNFS_OSD_ERR_NOT_FOUND: 482 + return OSD_ERR_PRI_NOT_FOUND; 483 + case PNFS_OSD_ERR_NO_SPACE: 484 + return OSD_ERR_PRI_NO_SPACE; 485 + default: 486 + WARN_ON(1); 487 + /* fallthrough */ 488 + case PNFS_OSD_ERR_EIO: 489 + return OSD_ERR_PRI_EIO; 490 + } 491 + } 492 + 493 + static void 494 + merge_ioerr(struct pnfs_osd_ioerr *dest_err, 495 + const struct pnfs_osd_ioerr *src_err) 496 + { 497 + u64 dest_end, src_end; 498 + 499 + if (!dest_err->oer_errno) { 500 + *dest_err = *src_err; 501 + /* accumulated device must be blank */ 502 + memset(&dest_err->oer_component.oid_device_id, 0, 503 + sizeof(dest_err->oer_component.oid_device_id)); 504 + 505 + return; 506 + } 507 + 508 + if (dest_err->oer_component.oid_partition_id != 509 + src_err->oer_component.oid_partition_id) 510 + dest_err->oer_component.oid_partition_id = 0; 511 + 512 + if (dest_err->oer_component.oid_object_id != 513 + src_err->oer_component.oid_object_id) 514 + dest_err->oer_component.oid_object_id = 0; 515 + 516 + if (dest_err->oer_comp_offset > src_err->oer_comp_offset) 517 + dest_err->oer_comp_offset = src_err->oer_comp_offset; 518 + 519 + dest_end = end_offset(dest_err->oer_comp_offset, 520 + dest_err->oer_comp_length); 521 + src_end = end_offset(src_err->oer_comp_offset, 522 + src_err->oer_comp_length); 523 + if (dest_end < src_end) 524 + dest_end = src_end; 525 + 526 + dest_err->oer_comp_length = dest_end - dest_err->oer_comp_offset; 527 + 528 + if ((src_err->oer_iswrite == dest_err->oer_iswrite) && 529 + (err_prio(src_err->oer_errno) > err_prio(dest_err->oer_errno))) { 530 + dest_err->oer_errno = src_err->oer_errno; 531 + } else if (src_err->oer_iswrite) { 532 + dest_err->oer_iswrite = true; 533 + dest_err->oer_errno = src_err->oer_errno; 534 + } 535 + } 536 + 537 + static void 538 + encode_accumulated_error(struct objlayout *objlay, __be32 *p) 539 + { 540 + struct objlayout_io_state *state, *tmp; 541 + struct pnfs_osd_ioerr accumulated_err = {.oer_errno = 0}; 542 + 543 + list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) { 544 + unsigned i; 545 + 546 + for (i = 0; i < state->num_comps; i++) { 547 + struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i]; 548 + 549 + if (!ioerr->oer_errno) 550 + continue; 551 + 552 + printk(KERN_ERR "%s: err[%d]: errno=%d is_write=%d " 553 + "dev(%llx:%llx) par=0x%llx obj=0x%llx " 554 + "offset=0x%llx length=0x%llx\n", 555 + __func__, i, ioerr->oer_errno, 556 + ioerr->oer_iswrite, 557 + _DEVID_LO(&ioerr->oer_component.oid_device_id), 558 + _DEVID_HI(&ioerr->oer_component.oid_device_id), 559 + ioerr->oer_component.oid_partition_id, 560 + ioerr->oer_component.oid_object_id, 561 + ioerr->oer_comp_offset, 562 + ioerr->oer_comp_length); 563 + 564 + merge_ioerr(&accumulated_err, ioerr); 565 + } 566 + list_del(&state->err_list); 567 + objlayout_free_io_state(state); 568 + } 569 + 570 + pnfs_osd_xdr_encode_ioerr(p, &accumulated_err); 571 + } 572 + 573 + void 574 + objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay, 575 + struct xdr_stream *xdr, 576 + const struct nfs4_layoutreturn_args *args) 577 + { 578 + struct objlayout *objlay = OBJLAYOUT(pnfslay); 579 + struct objlayout_io_state *state, *tmp; 580 + __be32 *start; 581 + 582 + dprintk("%s: Begin\n", __func__); 583 + start = xdr_reserve_space(xdr, 4); 584 + BUG_ON(!start); 585 + 586 + spin_lock(&objlay->lock); 587 + 588 + list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) { 589 + __be32 *last_xdr = NULL, *p; 590 + unsigned i; 591 + int res = 0; 592 + 593 + for (i = 0; i < state->num_comps; i++) { 594 + struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i]; 595 + 596 + if (!ioerr->oer_errno) 597 + continue; 598 + 599 + dprintk("%s: err[%d]: errno=%d is_write=%d " 600 + "dev(%llx:%llx) par=0x%llx obj=0x%llx " 601 + "offset=0x%llx length=0x%llx\n", 602 + __func__, i, ioerr->oer_errno, 603 + ioerr->oer_iswrite, 604 + _DEVID_LO(&ioerr->oer_component.oid_device_id), 605 + _DEVID_HI(&ioerr->oer_component.oid_device_id), 606 + ioerr->oer_component.oid_partition_id, 607 + ioerr->oer_component.oid_object_id, 608 + ioerr->oer_comp_offset, 609 + ioerr->oer_comp_length); 610 + 611 + p = pnfs_osd_xdr_ioerr_reserve_space(xdr); 612 + if (unlikely(!p)) { 613 + res = -E2BIG; 614 + break; /* accumulated_error */ 615 + } 616 + 617 + last_xdr = p; 618 + pnfs_osd_xdr_encode_ioerr(p, &state->ioerrs[i]); 619 + } 620 + 621 + /* TODO: use xdr_write_pages */ 622 + if (unlikely(res)) { 623 + /* no space for even one error descriptor */ 624 + BUG_ON(!last_xdr); 625 + 626 + /* we've encountered a situation with lots and lots of 627 + * errors and no space to encode them all. Use the last 628 + * available slot to report the union of all the 629 + * remaining errors. 630 + */ 631 + encode_accumulated_error(objlay, last_xdr); 632 + goto loop_done; 633 + } 634 + list_del(&state->err_list); 635 + objlayout_free_io_state(state); 636 + } 637 + loop_done: 638 + spin_unlock(&objlay->lock); 639 + 640 + *start = cpu_to_be32((xdr->p - start - 1) * 4); 641 + dprintk("%s: Return\n", __func__); 642 + } 643 + 644 + 645 + /* 646 + * Get Device Info API for io engines 647 + */ 648 + struct objlayout_deviceinfo { 649 + struct page *page; 650 + struct pnfs_osd_deviceaddr da; /* This must be last */ 651 + }; 652 + 653 + /* Initialize and call nfs_getdeviceinfo, then decode and return a 654 + * "struct pnfs_osd_deviceaddr *" Eventually objlayout_put_deviceinfo() 655 + * should be called. 656 + */ 657 + int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay, 658 + struct nfs4_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr, 659 + gfp_t gfp_flags) 660 + { 661 + struct objlayout_deviceinfo *odi; 662 + struct pnfs_device pd; 663 + struct super_block *sb; 664 + struct page *page, **pages; 665 + u32 *p; 666 + int err; 667 + 668 + page = alloc_page(gfp_flags); 669 + if (!page) 670 + return -ENOMEM; 671 + 672 + pages = &page; 673 + pd.pages = pages; 674 + 675 + memcpy(&pd.dev_id, d_id, sizeof(*d_id)); 676 + pd.layout_type = LAYOUT_OSD2_OBJECTS; 677 + pd.pages = &page; 678 + pd.pgbase = 0; 679 + pd.pglen = PAGE_SIZE; 680 + pd.mincount = 0; 681 + 682 + sb = pnfslay->plh_inode->i_sb; 683 + err = nfs4_proc_getdeviceinfo(NFS_SERVER(pnfslay->plh_inode), &pd); 684 + dprintk("%s nfs_getdeviceinfo returned %d\n", __func__, err); 685 + if (err) 686 + goto err_out; 687 + 688 + p = page_address(page); 689 + odi = kzalloc(sizeof(*odi), gfp_flags); 690 + if (!odi) { 691 + err = -ENOMEM; 692 + goto err_out; 693 + } 694 + pnfs_osd_xdr_decode_deviceaddr(&odi->da, p); 695 + odi->page = page; 696 + *deviceaddr = &odi->da; 697 + return 0; 698 + 699 + err_out: 700 + __free_page(page); 701 + return err; 702 + } 703 + 704 + void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr) 705 + { 706 + struct objlayout_deviceinfo *odi = container_of(deviceaddr, 707 + struct objlayout_deviceinfo, 708 + da); 709 + 710 + __free_page(odi->page); 711 + kfree(odi); 712 + }
+187
fs/nfs/objlayout/objlayout.h
··· 1 + /* 2 + * Data types and function declerations for interfacing with the 3 + * pNFS standard object layout driver. 4 + * 5 + * Copyright (C) 2007 Panasas Inc. [year of first publication] 6 + * All rights reserved. 7 + * 8 + * Benny Halevy <bhalevy@panasas.com> 9 + * Boaz Harrosh <bharrosh@panasas.com> 10 + * 11 + * This program is free software; you can redistribute it and/or modify 12 + * it under the terms of the GNU General Public License version 2 13 + * See the file COPYING included with this distribution for more details. 14 + * 15 + * Redistribution and use in source and binary forms, with or without 16 + * modification, are permitted provided that the following conditions 17 + * are met: 18 + * 19 + * 1. Redistributions of source code must retain the above copyright 20 + * notice, this list of conditions and the following disclaimer. 21 + * 2. Redistributions in binary form must reproduce the above copyright 22 + * notice, this list of conditions and the following disclaimer in the 23 + * documentation and/or other materials provided with the distribution. 24 + * 3. Neither the name of the Panasas company nor the names of its 25 + * contributors may be used to endorse or promote products derived 26 + * from this software without specific prior written permission. 27 + * 28 + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED 29 + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 30 + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 31 + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 32 + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 33 + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 34 + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 35 + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 36 + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 37 + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 38 + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 39 + */ 40 + 41 + #ifndef _OBJLAYOUT_H 42 + #define _OBJLAYOUT_H 43 + 44 + #include <linux/nfs_fs.h> 45 + #include <linux/pnfs_osd_xdr.h> 46 + #include "../pnfs.h" 47 + 48 + /* 49 + * per-inode layout 50 + */ 51 + struct objlayout { 52 + struct pnfs_layout_hdr pnfs_layout; 53 + 54 + /* for layout_commit */ 55 + enum osd_delta_space_valid_enum { 56 + OBJ_DSU_INIT = 0, 57 + OBJ_DSU_VALID, 58 + OBJ_DSU_INVALID, 59 + } delta_space_valid; 60 + s64 delta_space_used; /* consumed by write ops */ 61 + 62 + /* for layout_return */ 63 + spinlock_t lock; 64 + struct list_head err_list; 65 + }; 66 + 67 + static inline struct objlayout * 68 + OBJLAYOUT(struct pnfs_layout_hdr *lo) 69 + { 70 + return container_of(lo, struct objlayout, pnfs_layout); 71 + } 72 + 73 + /* 74 + * per-I/O operation state 75 + * embedded in objects provider io_state data structure 76 + */ 77 + struct objlayout_io_state { 78 + struct pnfs_layout_segment *lseg; 79 + 80 + struct page **pages; 81 + unsigned pgbase; 82 + unsigned nr_pages; 83 + unsigned long count; 84 + loff_t offset; 85 + bool sync; 86 + 87 + void *rpcdata; 88 + int status; /* res */ 89 + int eof; /* res */ 90 + int committed; /* res */ 91 + 92 + /* Error reporting (layout_return) */ 93 + struct list_head err_list; 94 + unsigned num_comps; 95 + /* Pointer to array of error descriptors of size num_comps. 96 + * It should contain as many entries as devices in the osd_layout 97 + * that participate in the I/O. It is up to the io_engine to allocate 98 + * needed space and set num_comps. 99 + */ 100 + struct pnfs_osd_ioerr *ioerrs; 101 + }; 102 + 103 + /* 104 + * Raid engine I/O API 105 + */ 106 + extern int objio_alloc_lseg(struct pnfs_layout_segment **outp, 107 + struct pnfs_layout_hdr *pnfslay, 108 + struct pnfs_layout_range *range, 109 + struct xdr_stream *xdr, 110 + gfp_t gfp_flags); 111 + extern void objio_free_lseg(struct pnfs_layout_segment *lseg); 112 + 113 + extern int objio_alloc_io_state( 114 + struct pnfs_layout_segment *lseg, 115 + struct objlayout_io_state **outp, 116 + gfp_t gfp_flags); 117 + extern void objio_free_io_state(struct objlayout_io_state *state); 118 + 119 + extern ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state); 120 + extern ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state, 121 + bool stable); 122 + 123 + /* 124 + * callback API 125 + */ 126 + extern void objlayout_io_set_result(struct objlayout_io_state *state, 127 + unsigned index, struct pnfs_osd_objid *pooid, 128 + int osd_error, u64 offset, u64 length, bool is_write); 129 + 130 + static inline void 131 + objlayout_add_delta_space_used(struct objlayout_io_state *state, s64 space_used) 132 + { 133 + struct objlayout *objlay = OBJLAYOUT(state->lseg->pls_layout); 134 + 135 + /* If one of the I/Os errored out and the delta_space_used was 136 + * invalid we render the complete report as invalid. Protocol mandate 137 + * the DSU be accurate or not reported. 138 + */ 139 + spin_lock(&objlay->lock); 140 + if (objlay->delta_space_valid != OBJ_DSU_INVALID) { 141 + objlay->delta_space_valid = OBJ_DSU_VALID; 142 + objlay->delta_space_used += space_used; 143 + } 144 + spin_unlock(&objlay->lock); 145 + } 146 + 147 + extern void objlayout_read_done(struct objlayout_io_state *state, 148 + ssize_t status, bool sync); 149 + extern void objlayout_write_done(struct objlayout_io_state *state, 150 + ssize_t status, bool sync); 151 + 152 + extern int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay, 153 + struct nfs4_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr, 154 + gfp_t gfp_flags); 155 + extern void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr); 156 + 157 + /* 158 + * exported generic objects function vectors 159 + */ 160 + 161 + extern struct pnfs_layout_hdr *objlayout_alloc_layout_hdr(struct inode *, gfp_t gfp_flags); 162 + extern void objlayout_free_layout_hdr(struct pnfs_layout_hdr *); 163 + 164 + extern struct pnfs_layout_segment *objlayout_alloc_lseg( 165 + struct pnfs_layout_hdr *, 166 + struct nfs4_layoutget_res *, 167 + gfp_t gfp_flags); 168 + extern void objlayout_free_lseg(struct pnfs_layout_segment *); 169 + 170 + extern enum pnfs_try_status objlayout_read_pagelist( 171 + struct nfs_read_data *); 172 + 173 + extern enum pnfs_try_status objlayout_write_pagelist( 174 + struct nfs_write_data *, 175 + int how); 176 + 177 + extern void objlayout_encode_layoutcommit( 178 + struct pnfs_layout_hdr *, 179 + struct xdr_stream *, 180 + const struct nfs4_layoutcommit_args *); 181 + 182 + extern void objlayout_encode_layoutreturn( 183 + struct pnfs_layout_hdr *, 184 + struct xdr_stream *, 185 + const struct nfs4_layoutreturn_args *); 186 + 187 + #endif /* _OBJLAYOUT_H */
+412
fs/nfs/objlayout/pnfs_osd_xdr_cli.c
··· 1 + /* 2 + * Object-Based pNFS Layout XDR layer 3 + * 4 + * Copyright (C) 2007 Panasas Inc. [year of first publication] 5 + * All rights reserved. 6 + * 7 + * Benny Halevy <bhalevy@panasas.com> 8 + * Boaz Harrosh <bharrosh@panasas.com> 9 + * 10 + * This program is free software; you can redistribute it and/or modify 11 + * it under the terms of the GNU General Public License version 2 12 + * See the file COPYING included with this distribution for more details. 13 + * 14 + * Redistribution and use in source and binary forms, with or without 15 + * modification, are permitted provided that the following conditions 16 + * are met: 17 + * 18 + * 1. Redistributions of source code must retain the above copyright 19 + * notice, this list of conditions and the following disclaimer. 20 + * 2. Redistributions in binary form must reproduce the above copyright 21 + * notice, this list of conditions and the following disclaimer in the 22 + * documentation and/or other materials provided with the distribution. 23 + * 3. Neither the name of the Panasas company nor the names of its 24 + * contributors may be used to endorse or promote products derived 25 + * from this software without specific prior written permission. 26 + * 27 + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED 28 + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 29 + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 30 + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 34 + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 35 + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 36 + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 37 + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 38 + */ 39 + 40 + #include <linux/pnfs_osd_xdr.h> 41 + 42 + #define NFSDBG_FACILITY NFSDBG_PNFS_LD 43 + 44 + /* 45 + * The following implementation is based on RFC5664 46 + */ 47 + 48 + /* 49 + * struct pnfs_osd_objid { 50 + * struct nfs4_deviceid oid_device_id; 51 + * u64 oid_partition_id; 52 + * u64 oid_object_id; 53 + * }; // xdr size 32 bytes 54 + */ 55 + static __be32 * 56 + _osd_xdr_decode_objid(__be32 *p, struct pnfs_osd_objid *objid) 57 + { 58 + p = xdr_decode_opaque_fixed(p, objid->oid_device_id.data, 59 + sizeof(objid->oid_device_id.data)); 60 + 61 + p = xdr_decode_hyper(p, &objid->oid_partition_id); 62 + p = xdr_decode_hyper(p, &objid->oid_object_id); 63 + return p; 64 + } 65 + /* 66 + * struct pnfs_osd_opaque_cred { 67 + * u32 cred_len; 68 + * void *cred; 69 + * }; // xdr size [variable] 70 + * The return pointers are from the xdr buffer 71 + */ 72 + static int 73 + _osd_xdr_decode_opaque_cred(struct pnfs_osd_opaque_cred *opaque_cred, 74 + struct xdr_stream *xdr) 75 + { 76 + __be32 *p = xdr_inline_decode(xdr, 1); 77 + 78 + if (!p) 79 + return -EINVAL; 80 + 81 + opaque_cred->cred_len = be32_to_cpu(*p++); 82 + 83 + p = xdr_inline_decode(xdr, opaque_cred->cred_len); 84 + if (!p) 85 + return -EINVAL; 86 + 87 + opaque_cred->cred = p; 88 + return 0; 89 + } 90 + 91 + /* 92 + * struct pnfs_osd_object_cred { 93 + * struct pnfs_osd_objid oc_object_id; 94 + * u32 oc_osd_version; 95 + * u32 oc_cap_key_sec; 96 + * struct pnfs_osd_opaque_cred oc_cap_key 97 + * struct pnfs_osd_opaque_cred oc_cap; 98 + * }; // xdr size 32 + 4 + 4 + [variable] + [variable] 99 + */ 100 + static int 101 + _osd_xdr_decode_object_cred(struct pnfs_osd_object_cred *comp, 102 + struct xdr_stream *xdr) 103 + { 104 + __be32 *p = xdr_inline_decode(xdr, 32 + 4 + 4); 105 + int ret; 106 + 107 + if (!p) 108 + return -EIO; 109 + 110 + p = _osd_xdr_decode_objid(p, &comp->oc_object_id); 111 + comp->oc_osd_version = be32_to_cpup(p++); 112 + comp->oc_cap_key_sec = be32_to_cpup(p); 113 + 114 + ret = _osd_xdr_decode_opaque_cred(&comp->oc_cap_key, xdr); 115 + if (unlikely(ret)) 116 + return ret; 117 + 118 + ret = _osd_xdr_decode_opaque_cred(&comp->oc_cap, xdr); 119 + return ret; 120 + } 121 + 122 + /* 123 + * struct pnfs_osd_data_map { 124 + * u32 odm_num_comps; 125 + * u64 odm_stripe_unit; 126 + * u32 odm_group_width; 127 + * u32 odm_group_depth; 128 + * u32 odm_mirror_cnt; 129 + * u32 odm_raid_algorithm; 130 + * }; // xdr size 4 + 8 + 4 + 4 + 4 + 4 131 + */ 132 + static inline int 133 + _osd_data_map_xdr_sz(void) 134 + { 135 + return 4 + 8 + 4 + 4 + 4 + 4; 136 + } 137 + 138 + static __be32 * 139 + _osd_xdr_decode_data_map(__be32 *p, struct pnfs_osd_data_map *data_map) 140 + { 141 + data_map->odm_num_comps = be32_to_cpup(p++); 142 + p = xdr_decode_hyper(p, &data_map->odm_stripe_unit); 143 + data_map->odm_group_width = be32_to_cpup(p++); 144 + data_map->odm_group_depth = be32_to_cpup(p++); 145 + data_map->odm_mirror_cnt = be32_to_cpup(p++); 146 + data_map->odm_raid_algorithm = be32_to_cpup(p++); 147 + dprintk("%s: odm_num_comps=%u odm_stripe_unit=%llu odm_group_width=%u " 148 + "odm_group_depth=%u odm_mirror_cnt=%u odm_raid_algorithm=%u\n", 149 + __func__, 150 + data_map->odm_num_comps, 151 + (unsigned long long)data_map->odm_stripe_unit, 152 + data_map->odm_group_width, 153 + data_map->odm_group_depth, 154 + data_map->odm_mirror_cnt, 155 + data_map->odm_raid_algorithm); 156 + return p; 157 + } 158 + 159 + int pnfs_osd_xdr_decode_layout_map(struct pnfs_osd_layout *layout, 160 + struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr) 161 + { 162 + __be32 *p; 163 + 164 + memset(iter, 0, sizeof(*iter)); 165 + 166 + p = xdr_inline_decode(xdr, _osd_data_map_xdr_sz() + 4 + 4); 167 + if (unlikely(!p)) 168 + return -EINVAL; 169 + 170 + p = _osd_xdr_decode_data_map(p, &layout->olo_map); 171 + layout->olo_comps_index = be32_to_cpup(p++); 172 + layout->olo_num_comps = be32_to_cpup(p++); 173 + iter->total_comps = layout->olo_num_comps; 174 + return 0; 175 + } 176 + 177 + bool pnfs_osd_xdr_decode_layout_comp(struct pnfs_osd_object_cred *comp, 178 + struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr, 179 + int *err) 180 + { 181 + BUG_ON(iter->decoded_comps > iter->total_comps); 182 + if (iter->decoded_comps == iter->total_comps) 183 + return false; 184 + 185 + *err = _osd_xdr_decode_object_cred(comp, xdr); 186 + if (unlikely(*err)) { 187 + dprintk("%s: _osd_xdr_decode_object_cred=>%d decoded_comps=%d " 188 + "total_comps=%d\n", __func__, *err, 189 + iter->decoded_comps, iter->total_comps); 190 + return false; /* stop the loop */ 191 + } 192 + dprintk("%s: dev(%llx:%llx) par=0x%llx obj=0x%llx " 193 + "key_len=%u cap_len=%u\n", 194 + __func__, 195 + _DEVID_LO(&comp->oc_object_id.oid_device_id), 196 + _DEVID_HI(&comp->oc_object_id.oid_device_id), 197 + comp->oc_object_id.oid_partition_id, 198 + comp->oc_object_id.oid_object_id, 199 + comp->oc_cap_key.cred_len, comp->oc_cap.cred_len); 200 + 201 + iter->decoded_comps++; 202 + return true; 203 + } 204 + 205 + /* 206 + * Get Device Information Decoding 207 + * 208 + * Note: since Device Information is currently done synchronously, all 209 + * variable strings fields are left inside the rpc buffer and are only 210 + * pointed to by the pnfs_osd_deviceaddr members. So the read buffer 211 + * should not be freed while the returned information is in use. 212 + */ 213 + /* 214 + *struct nfs4_string { 215 + * unsigned int len; 216 + * char *data; 217 + *}; // size [variable] 218 + * NOTE: Returned string points to inside the XDR buffer 219 + */ 220 + static __be32 * 221 + __read_u8_opaque(__be32 *p, struct nfs4_string *str) 222 + { 223 + str->len = be32_to_cpup(p++); 224 + str->data = (char *)p; 225 + 226 + p += XDR_QUADLEN(str->len); 227 + return p; 228 + } 229 + 230 + /* 231 + * struct pnfs_osd_targetid { 232 + * u32 oti_type; 233 + * struct nfs4_string oti_scsi_device_id; 234 + * };// size 4 + [variable] 235 + */ 236 + static __be32 * 237 + __read_targetid(__be32 *p, struct pnfs_osd_targetid* targetid) 238 + { 239 + u32 oti_type; 240 + 241 + oti_type = be32_to_cpup(p++); 242 + targetid->oti_type = oti_type; 243 + 244 + switch (oti_type) { 245 + case OBJ_TARGET_SCSI_NAME: 246 + case OBJ_TARGET_SCSI_DEVICE_ID: 247 + p = __read_u8_opaque(p, &targetid->oti_scsi_device_id); 248 + } 249 + 250 + return p; 251 + } 252 + 253 + /* 254 + * struct pnfs_osd_net_addr { 255 + * struct nfs4_string r_netid; 256 + * struct nfs4_string r_addr; 257 + * }; 258 + */ 259 + static __be32 * 260 + __read_net_addr(__be32 *p, struct pnfs_osd_net_addr* netaddr) 261 + { 262 + p = __read_u8_opaque(p, &netaddr->r_netid); 263 + p = __read_u8_opaque(p, &netaddr->r_addr); 264 + 265 + return p; 266 + } 267 + 268 + /* 269 + * struct pnfs_osd_targetaddr { 270 + * u32 ota_available; 271 + * struct pnfs_osd_net_addr ota_netaddr; 272 + * }; 273 + */ 274 + static __be32 * 275 + __read_targetaddr(__be32 *p, struct pnfs_osd_targetaddr *targetaddr) 276 + { 277 + u32 ota_available; 278 + 279 + ota_available = be32_to_cpup(p++); 280 + targetaddr->ota_available = ota_available; 281 + 282 + if (ota_available) 283 + p = __read_net_addr(p, &targetaddr->ota_netaddr); 284 + 285 + 286 + return p; 287 + } 288 + 289 + /* 290 + * struct pnfs_osd_deviceaddr { 291 + * struct pnfs_osd_targetid oda_targetid; 292 + * struct pnfs_osd_targetaddr oda_targetaddr; 293 + * u8 oda_lun[8]; 294 + * struct nfs4_string oda_systemid; 295 + * struct pnfs_osd_object_cred oda_root_obj_cred; 296 + * struct nfs4_string oda_osdname; 297 + * }; 298 + */ 299 + 300 + /* We need this version for the pnfs_osd_xdr_decode_deviceaddr which does 301 + * not have an xdr_stream 302 + */ 303 + static __be32 * 304 + __read_opaque_cred(__be32 *p, 305 + struct pnfs_osd_opaque_cred *opaque_cred) 306 + { 307 + opaque_cred->cred_len = be32_to_cpu(*p++); 308 + opaque_cred->cred = p; 309 + return p + XDR_QUADLEN(opaque_cred->cred_len); 310 + } 311 + 312 + static __be32 * 313 + __read_object_cred(__be32 *p, struct pnfs_osd_object_cred *comp) 314 + { 315 + p = _osd_xdr_decode_objid(p, &comp->oc_object_id); 316 + comp->oc_osd_version = be32_to_cpup(p++); 317 + comp->oc_cap_key_sec = be32_to_cpup(p++); 318 + 319 + p = __read_opaque_cred(p, &comp->oc_cap_key); 320 + p = __read_opaque_cred(p, &comp->oc_cap); 321 + return p; 322 + } 323 + 324 + void pnfs_osd_xdr_decode_deviceaddr( 325 + struct pnfs_osd_deviceaddr *deviceaddr, __be32 *p) 326 + { 327 + p = __read_targetid(p, &deviceaddr->oda_targetid); 328 + 329 + p = __read_targetaddr(p, &deviceaddr->oda_targetaddr); 330 + 331 + p = xdr_decode_opaque_fixed(p, deviceaddr->oda_lun, 332 + sizeof(deviceaddr->oda_lun)); 333 + 334 + p = __read_u8_opaque(p, &deviceaddr->oda_systemid); 335 + 336 + p = __read_object_cred(p, &deviceaddr->oda_root_obj_cred); 337 + 338 + p = __read_u8_opaque(p, &deviceaddr->oda_osdname); 339 + 340 + /* libosd likes this terminated in dbg. It's last, so no problems */ 341 + deviceaddr->oda_osdname.data[deviceaddr->oda_osdname.len] = 0; 342 + } 343 + 344 + /* 345 + * struct pnfs_osd_layoutupdate { 346 + * u32 dsu_valid; 347 + * s64 dsu_delta; 348 + * u32 olu_ioerr_flag; 349 + * }; xdr size 4 + 8 + 4 350 + */ 351 + int 352 + pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr, 353 + struct pnfs_osd_layoutupdate *lou) 354 + { 355 + __be32 *p = xdr_reserve_space(xdr, 4 + 8 + 4); 356 + 357 + if (!p) 358 + return -E2BIG; 359 + 360 + *p++ = cpu_to_be32(lou->dsu_valid); 361 + if (lou->dsu_valid) 362 + p = xdr_encode_hyper(p, lou->dsu_delta); 363 + *p++ = cpu_to_be32(lou->olu_ioerr_flag); 364 + return 0; 365 + } 366 + 367 + /* 368 + * struct pnfs_osd_objid { 369 + * struct nfs4_deviceid oid_device_id; 370 + * u64 oid_partition_id; 371 + * u64 oid_object_id; 372 + * }; // xdr size 32 bytes 373 + */ 374 + static inline __be32 * 375 + pnfs_osd_xdr_encode_objid(__be32 *p, struct pnfs_osd_objid *object_id) 376 + { 377 + p = xdr_encode_opaque_fixed(p, &object_id->oid_device_id.data, 378 + sizeof(object_id->oid_device_id.data)); 379 + p = xdr_encode_hyper(p, object_id->oid_partition_id); 380 + p = xdr_encode_hyper(p, object_id->oid_object_id); 381 + 382 + return p; 383 + } 384 + 385 + /* 386 + * struct pnfs_osd_ioerr { 387 + * struct pnfs_osd_objid oer_component; 388 + * u64 oer_comp_offset; 389 + * u64 oer_comp_length; 390 + * u32 oer_iswrite; 391 + * u32 oer_errno; 392 + * }; // xdr size 32 + 24 bytes 393 + */ 394 + void pnfs_osd_xdr_encode_ioerr(__be32 *p, struct pnfs_osd_ioerr *ioerr) 395 + { 396 + p = pnfs_osd_xdr_encode_objid(p, &ioerr->oer_component); 397 + p = xdr_encode_hyper(p, ioerr->oer_comp_offset); 398 + p = xdr_encode_hyper(p, ioerr->oer_comp_length); 399 + *p++ = cpu_to_be32(ioerr->oer_iswrite); 400 + *p = cpu_to_be32(ioerr->oer_errno); 401 + } 402 + 403 + __be32 *pnfs_osd_xdr_ioerr_reserve_space(struct xdr_stream *xdr) 404 + { 405 + __be32 *p; 406 + 407 + p = xdr_reserve_space(xdr, 32 + 24); 408 + if (unlikely(!p)) 409 + dprintk("%s: out of xdr space\n", __func__); 410 + 411 + return p; 412 + }
+30 -32
fs/nfs/pagelist.c
··· 204 204 TASK_UNINTERRUPTIBLE); 205 205 } 206 206 207 + static bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req) 208 + { 209 + /* 210 + * FIXME: ideally we should be able to coalesce all requests 211 + * that are not block boundary aligned, but currently this 212 + * is problematic for the case of bsize < PAGE_CACHE_SIZE, 213 + * since nfs_flush_multi and nfs_pagein_multi assume you 214 + * can have only one struct nfs_page. 215 + */ 216 + if (desc->pg_bsize < PAGE_SIZE) 217 + return 0; 218 + 219 + return desc->pg_count + req->wb_bytes <= desc->pg_bsize; 220 + } 221 + 207 222 /** 208 223 * nfs_pageio_init - initialise a page io descriptor 209 224 * @desc: pointer to descriptor ··· 244 229 desc->pg_ioflags = io_flags; 245 230 desc->pg_error = 0; 246 231 desc->pg_lseg = NULL; 232 + desc->pg_test = nfs_generic_pg_test; 233 + pnfs_pageio_init(desc, inode); 247 234 } 248 235 249 236 /** ··· 259 242 * 260 243 * Return 'true' if this is the case, else return 'false'. 261 244 */ 262 - static int nfs_can_coalesce_requests(struct nfs_page *prev, 263 - struct nfs_page *req, 264 - struct nfs_pageio_descriptor *pgio) 245 + static bool nfs_can_coalesce_requests(struct nfs_page *prev, 246 + struct nfs_page *req, 247 + struct nfs_pageio_descriptor *pgio) 265 248 { 266 249 if (req->wb_context->cred != prev->wb_context->cred) 267 - return 0; 250 + return false; 268 251 if (req->wb_lock_context->lockowner != prev->wb_lock_context->lockowner) 269 - return 0; 252 + return false; 270 253 if (req->wb_context->state != prev->wb_context->state) 271 - return 0; 254 + return false; 272 255 if (req->wb_index != (prev->wb_index + 1)) 273 - return 0; 256 + return false; 274 257 if (req->wb_pgbase != 0) 275 - return 0; 258 + return false; 276 259 if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) 277 - return 0; 278 - /* 279 - * Non-whole file layouts need to check that req is inside of 280 - * pgio->pg_lseg. 281 - */ 282 - if (pgio->pg_test && !pgio->pg_test(pgio, prev, req)) 283 - return 0; 284 - return 1; 260 + return false; 261 + return pgio->pg_test(pgio, prev, req); 285 262 } 286 263 287 264 /** ··· 289 278 static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, 290 279 struct nfs_page *req) 291 280 { 292 - size_t newlen = req->wb_bytes; 293 - 294 281 if (desc->pg_count != 0) { 295 282 struct nfs_page *prev; 296 283 297 - /* 298 - * FIXME: ideally we should be able to coalesce all requests 299 - * that are not block boundary aligned, but currently this 300 - * is problematic for the case of bsize < PAGE_CACHE_SIZE, 301 - * since nfs_flush_multi and nfs_pagein_multi assume you 302 - * can have only one struct nfs_page. 303 - */ 304 - if (desc->pg_bsize < PAGE_SIZE) 305 - return 0; 306 - newlen += desc->pg_count; 307 - if (newlen > desc->pg_bsize) 308 - return 0; 309 284 prev = nfs_list_entry(desc->pg_list.prev); 310 285 if (!nfs_can_coalesce_requests(prev, req, desc)) 311 286 return 0; 312 - } else 287 + } else { 313 288 desc->pg_base = req->wb_pgbase; 289 + } 314 290 nfs_list_remove_request(req); 315 291 nfs_list_add_request(req, &desc->pg_list); 316 - desc->pg_count = newlen; 292 + desc->pg_count += req->wb_bytes; 317 293 return 1; 318 294 } 319 295
+264 -80
fs/nfs/pnfs.c
··· 177 177 atomic_inc(&lo->plh_refcount); 178 178 } 179 179 180 + static struct pnfs_layout_hdr * 181 + pnfs_alloc_layout_hdr(struct inode *ino, gfp_t gfp_flags) 182 + { 183 + struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; 184 + return ld->alloc_layout_hdr ? ld->alloc_layout_hdr(ino, gfp_flags) : 185 + kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags); 186 + } 187 + 188 + static void 189 + pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo) 190 + { 191 + struct pnfs_layoutdriver_type *ld = NFS_SERVER(lo->plh_inode)->pnfs_curr_ld; 192 + return ld->alloc_layout_hdr ? ld->free_layout_hdr(lo) : kfree(lo); 193 + } 194 + 180 195 static void 181 196 destroy_layout_hdr(struct pnfs_layout_hdr *lo) 182 197 { 183 198 dprintk("%s: freeing layout cache %p\n", __func__, lo); 184 199 BUG_ON(!list_empty(&lo->plh_layouts)); 185 200 NFS_I(lo->plh_inode)->layout = NULL; 186 - kfree(lo); 201 + pnfs_free_layout_hdr(lo); 187 202 } 188 203 189 204 static void ··· 243 228 { 244 229 struct inode *inode = lseg->pls_layout->plh_inode; 245 230 246 - BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 231 + WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 247 232 list_del_init(&lseg->pls_list); 248 233 if (list_empty(&lseg->pls_layout->plh_segs)) { 249 234 set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags); ··· 276 261 } 277 262 EXPORT_SYMBOL_GPL(put_lseg); 278 263 279 - static bool 280 - should_free_lseg(u32 lseg_iomode, u32 recall_iomode) 264 + static inline u64 265 + end_offset(u64 start, u64 len) 281 266 { 282 - return (recall_iomode == IOMODE_ANY || 283 - lseg_iomode == recall_iomode); 267 + u64 end; 268 + 269 + end = start + len; 270 + return end >= start ? end : NFS4_MAX_UINT64; 271 + } 272 + 273 + /* last octet in a range */ 274 + static inline u64 275 + last_byte_offset(u64 start, u64 len) 276 + { 277 + u64 end; 278 + 279 + BUG_ON(!len); 280 + end = start + len; 281 + return end > start ? end - 1 : NFS4_MAX_UINT64; 282 + } 283 + 284 + /* 285 + * is l2 fully contained in l1? 286 + * start1 end1 287 + * [----------------------------------) 288 + * start2 end2 289 + * [----------------) 290 + */ 291 + static inline int 292 + lo_seg_contained(struct pnfs_layout_range *l1, 293 + struct pnfs_layout_range *l2) 294 + { 295 + u64 start1 = l1->offset; 296 + u64 end1 = end_offset(start1, l1->length); 297 + u64 start2 = l2->offset; 298 + u64 end2 = end_offset(start2, l2->length); 299 + 300 + return (start1 <= start2) && (end1 >= end2); 301 + } 302 + 303 + /* 304 + * is l1 and l2 intersecting? 305 + * start1 end1 306 + * [----------------------------------) 307 + * start2 end2 308 + * [----------------) 309 + */ 310 + static inline int 311 + lo_seg_intersecting(struct pnfs_layout_range *l1, 312 + struct pnfs_layout_range *l2) 313 + { 314 + u64 start1 = l1->offset; 315 + u64 end1 = end_offset(start1, l1->length); 316 + u64 start2 = l2->offset; 317 + u64 end2 = end_offset(start2, l2->length); 318 + 319 + return (end1 == NFS4_MAX_UINT64 || end1 > start2) && 320 + (end2 == NFS4_MAX_UINT64 || end2 > start1); 321 + } 322 + 323 + static bool 324 + should_free_lseg(struct pnfs_layout_range *lseg_range, 325 + struct pnfs_layout_range *recall_range) 326 + { 327 + return (recall_range->iomode == IOMODE_ANY || 328 + lseg_range->iomode == recall_range->iomode) && 329 + lo_seg_intersecting(lseg_range, recall_range); 284 330 } 285 331 286 332 /* Returns 1 if lseg is removed from list, 0 otherwise */ ··· 372 296 int 373 297 mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, 374 298 struct list_head *tmp_list, 375 - u32 iomode) 299 + struct pnfs_layout_range *recall_range) 376 300 { 377 301 struct pnfs_layout_segment *lseg, *next; 378 302 int invalid = 0, removed = 0; ··· 385 309 return 0; 386 310 } 387 311 list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) 388 - if (should_free_lseg(lseg->pls_range.iomode, iomode)) { 312 + if (!recall_range || 313 + should_free_lseg(&lseg->pls_range, recall_range)) { 389 314 dprintk("%s: freeing lseg %p iomode %d " 390 315 "offset %llu length %llu\n", __func__, 391 316 lseg, lseg->pls_range.iomode, lseg->pls_range.offset, ··· 435 358 lo = nfsi->layout; 436 359 if (lo) { 437 360 lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */ 438 - mark_matching_lsegs_invalid(lo, &tmp_list, IOMODE_ANY); 361 + mark_matching_lsegs_invalid(lo, &tmp_list, NULL); 439 362 } 440 363 spin_unlock(&nfsi->vfs_inode.i_lock); 441 364 pnfs_free_lseg_list(&tmp_list); ··· 544 467 static struct pnfs_layout_segment * 545 468 send_layoutget(struct pnfs_layout_hdr *lo, 546 469 struct nfs_open_context *ctx, 547 - u32 iomode, 470 + struct pnfs_layout_range *range, 548 471 gfp_t gfp_flags) 549 472 { 550 473 struct inode *ino = lo->plh_inode; ··· 576 499 goto out_err_free; 577 500 } 578 501 579 - lgp->args.minlength = NFS4_MAX_UINT64; 502 + lgp->args.minlength = PAGE_CACHE_SIZE; 503 + if (lgp->args.minlength > range->length) 504 + lgp->args.minlength = range->length; 580 505 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; 581 - lgp->args.range.iomode = iomode; 582 - lgp->args.range.offset = 0; 583 - lgp->args.range.length = NFS4_MAX_UINT64; 506 + lgp->args.range = *range; 584 507 lgp->args.type = server->pnfs_curr_ld->id; 585 508 lgp->args.inode = ino; 586 509 lgp->args.ctx = get_nfs_open_context(ctx); ··· 595 518 nfs4_proc_layoutget(lgp); 596 519 if (!lseg) { 597 520 /* remember that LAYOUTGET failed and suspend trying */ 598 - set_bit(lo_fail_bit(iomode), &lo->plh_flags); 521 + set_bit(lo_fail_bit(range->iomode), &lo->plh_flags); 599 522 } 600 523 601 524 /* free xdr pages */ ··· 617 540 } 618 541 kfree(lgp); 619 542 return NULL; 543 + } 544 + 545 + /* Initiates a LAYOUTRETURN(FILE) */ 546 + int 547 + _pnfs_return_layout(struct inode *ino) 548 + { 549 + struct pnfs_layout_hdr *lo = NULL; 550 + struct nfs_inode *nfsi = NFS_I(ino); 551 + LIST_HEAD(tmp_list); 552 + struct nfs4_layoutreturn *lrp; 553 + nfs4_stateid stateid; 554 + int status = 0; 555 + 556 + dprintk("--> %s\n", __func__); 557 + 558 + spin_lock(&ino->i_lock); 559 + lo = nfsi->layout; 560 + if (!lo || !mark_matching_lsegs_invalid(lo, &tmp_list, NULL)) { 561 + spin_unlock(&ino->i_lock); 562 + dprintk("%s: no layout segments to return\n", __func__); 563 + goto out; 564 + } 565 + stateid = nfsi->layout->plh_stateid; 566 + /* Reference matched in nfs4_layoutreturn_release */ 567 + get_layout_hdr(lo); 568 + spin_unlock(&ino->i_lock); 569 + pnfs_free_lseg_list(&tmp_list); 570 + 571 + WARN_ON(test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)); 572 + 573 + lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); 574 + if (unlikely(lrp == NULL)) { 575 + status = -ENOMEM; 576 + goto out; 577 + } 578 + 579 + lrp->args.stateid = stateid; 580 + lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; 581 + lrp->args.inode = ino; 582 + lrp->clp = NFS_SERVER(ino)->nfs_client; 583 + 584 + status = nfs4_proc_layoutreturn(lrp); 585 + out: 586 + dprintk("<-- %s status: %d\n", __func__, status); 587 + return status; 620 588 } 621 589 622 590 bool pnfs_roc(struct inode *ino) ··· 747 625 * are seen first. 748 626 */ 749 627 static s64 750 - cmp_layout(u32 iomode1, u32 iomode2) 628 + cmp_layout(struct pnfs_layout_range *l1, 629 + struct pnfs_layout_range *l2) 751 630 { 631 + s64 d; 632 + 633 + /* high offset > low offset */ 634 + d = l1->offset - l2->offset; 635 + if (d) 636 + return d; 637 + 638 + /* short length > long length */ 639 + d = l2->length - l1->length; 640 + if (d) 641 + return d; 642 + 752 643 /* read > read/write */ 753 - return (int)(iomode2 == IOMODE_READ) - (int)(iomode1 == IOMODE_READ); 644 + return (int)(l1->iomode == IOMODE_READ) - (int)(l2->iomode == IOMODE_READ); 754 645 } 755 646 756 647 static void ··· 771 636 struct pnfs_layout_segment *lseg) 772 637 { 773 638 struct pnfs_layout_segment *lp; 774 - int found = 0; 775 639 776 640 dprintk("%s:Begin\n", __func__); 777 641 778 642 assert_spin_locked(&lo->plh_inode->i_lock); 779 643 list_for_each_entry(lp, &lo->plh_segs, pls_list) { 780 - if (cmp_layout(lp->pls_range.iomode, lseg->pls_range.iomode) > 0) 644 + if (cmp_layout(&lseg->pls_range, &lp->pls_range) > 0) 781 645 continue; 782 646 list_add_tail(&lseg->pls_list, &lp->pls_list); 783 647 dprintk("%s: inserted lseg %p " ··· 786 652 lseg->pls_range.offset, lseg->pls_range.length, 787 653 lp, lp->pls_range.iomode, lp->pls_range.offset, 788 654 lp->pls_range.length); 789 - found = 1; 790 - break; 655 + goto out; 791 656 } 792 - if (!found) { 793 - list_add_tail(&lseg->pls_list, &lo->plh_segs); 794 - dprintk("%s: inserted lseg %p " 795 - "iomode %d offset %llu length %llu at tail\n", 796 - __func__, lseg, lseg->pls_range.iomode, 797 - lseg->pls_range.offset, lseg->pls_range.length); 798 - } 657 + list_add_tail(&lseg->pls_list, &lo->plh_segs); 658 + dprintk("%s: inserted lseg %p " 659 + "iomode %d offset %llu length %llu at tail\n", 660 + __func__, lseg, lseg->pls_range.iomode, 661 + lseg->pls_range.offset, lseg->pls_range.length); 662 + out: 799 663 get_layout_hdr(lo); 800 664 801 665 dprintk("%s:Return\n", __func__); ··· 804 672 { 805 673 struct pnfs_layout_hdr *lo; 806 674 807 - lo = kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags); 675 + lo = pnfs_alloc_layout_hdr(ino, gfp_flags); 808 676 if (!lo) 809 677 return NULL; 810 678 atomic_set(&lo->plh_refcount, 1); ··· 837 705 if (likely(nfsi->layout == NULL)) /* Won the race? */ 838 706 nfsi->layout = new; 839 707 else 840 - kfree(new); 708 + pnfs_free_layout_hdr(new); 841 709 return nfsi->layout; 842 710 } 843 711 ··· 853 721 * READ RW true 854 722 */ 855 723 static int 856 - is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode) 724 + is_matching_lseg(struct pnfs_layout_range *ls_range, 725 + struct pnfs_layout_range *range) 857 726 { 858 - return (iomode != IOMODE_RW || lseg->pls_range.iomode == IOMODE_RW); 727 + struct pnfs_layout_range range1; 728 + 729 + if ((range->iomode == IOMODE_RW && 730 + ls_range->iomode != IOMODE_RW) || 731 + !lo_seg_intersecting(ls_range, range)) 732 + return 0; 733 + 734 + /* range1 covers only the first byte in the range */ 735 + range1 = *range; 736 + range1.length = 1; 737 + return lo_seg_contained(ls_range, &range1); 859 738 } 860 739 861 740 /* 862 741 * lookup range in layout 863 742 */ 864 743 static struct pnfs_layout_segment * 865 - pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode) 744 + pnfs_find_lseg(struct pnfs_layout_hdr *lo, 745 + struct pnfs_layout_range *range) 866 746 { 867 747 struct pnfs_layout_segment *lseg, *ret = NULL; 868 748 ··· 883 739 assert_spin_locked(&lo->plh_inode->i_lock); 884 740 list_for_each_entry(lseg, &lo->plh_segs, pls_list) { 885 741 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && 886 - is_matching_lseg(lseg, iomode)) { 742 + is_matching_lseg(&lseg->pls_range, range)) { 887 743 ret = get_lseg(lseg); 888 744 break; 889 745 } 890 - if (cmp_layout(iomode, lseg->pls_range.iomode) > 0) 746 + if (cmp_layout(range, &lseg->pls_range) > 0) 891 747 break; 892 748 } 893 749 ··· 903 759 struct pnfs_layout_segment * 904 760 pnfs_update_layout(struct inode *ino, 905 761 struct nfs_open_context *ctx, 762 + loff_t pos, 763 + u64 count, 906 764 enum pnfs_iomode iomode, 907 765 gfp_t gfp_flags) 908 766 { 767 + struct pnfs_layout_range arg = { 768 + .iomode = iomode, 769 + .offset = pos, 770 + .length = count, 771 + }; 772 + unsigned pg_offset; 909 773 struct nfs_inode *nfsi = NFS_I(ino); 910 774 struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; 911 775 struct pnfs_layout_hdr *lo; ··· 941 789 goto out_unlock; 942 790 943 791 /* Check to see if the layout for the given range already exists */ 944 - lseg = pnfs_find_lseg(lo, iomode); 792 + lseg = pnfs_find_lseg(lo, &arg); 945 793 if (lseg) 946 794 goto out_unlock; 947 795 ··· 963 811 spin_unlock(&clp->cl_lock); 964 812 } 965 813 966 - lseg = send_layoutget(lo, ctx, iomode, gfp_flags); 814 + pg_offset = arg.offset & ~PAGE_CACHE_MASK; 815 + if (pg_offset) { 816 + arg.offset -= pg_offset; 817 + arg.length += pg_offset; 818 + } 819 + arg.length = PAGE_CACHE_ALIGN(arg.length); 820 + 821 + lseg = send_layoutget(lo, ctx, &arg, gfp_flags); 967 822 if (!lseg && first) { 968 823 spin_lock(&clp->cl_lock); 969 824 list_del_init(&lo->plh_layouts); ··· 997 838 struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; 998 839 int status = 0; 999 840 1000 - /* Verify we got what we asked for. 1001 - * Note that because the xdr parsing only accepts a single 1002 - * element array, this can fail even if the server is behaving 1003 - * correctly. 1004 - */ 1005 - if (lgp->args.range.iomode > res->range.iomode || 1006 - res->range.offset != 0 || 1007 - res->range.length != NFS4_MAX_UINT64) { 1008 - status = -EINVAL; 1009 - goto out; 1010 - } 1011 841 /* Inject layout blob into I/O device driver */ 1012 842 lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags); 1013 843 if (!lseg || IS_ERR(lseg)) { ··· 1043 895 goto out; 1044 896 } 1045 897 1046 - static int pnfs_read_pg_test(struct nfs_pageio_descriptor *pgio, 1047 - struct nfs_page *prev, 1048 - struct nfs_page *req) 898 + bool 899 + pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, 900 + struct nfs_page *req) 1049 901 { 902 + enum pnfs_iomode access_type; 903 + gfp_t gfp_flags; 904 + 905 + /* We assume that pg_ioflags == 0 iff we're reading a page */ 906 + if (pgio->pg_ioflags == 0) { 907 + access_type = IOMODE_READ; 908 + gfp_flags = GFP_KERNEL; 909 + } else { 910 + access_type = IOMODE_RW; 911 + gfp_flags = GFP_NOFS; 912 + } 913 + 1050 914 if (pgio->pg_count == prev->wb_bytes) { 1051 915 /* This is first coelesce call for a series of nfs_pages */ 1052 916 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1053 917 prev->wb_context, 1054 - IOMODE_READ, 1055 - GFP_KERNEL); 918 + req_offset(req), 919 + pgio->pg_count, 920 + access_type, 921 + gfp_flags); 922 + return true; 1056 923 } 1057 - return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req); 924 + 925 + if (pgio->pg_lseg && 926 + req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset, 927 + pgio->pg_lseg->pls_range.length)) 928 + return false; 929 + 930 + return true; 1058 931 } 932 + EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); 1059 933 1060 - void 1061 - pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) 934 + /* 935 + * Called by non rpc-based layout drivers 936 + */ 937 + int 938 + pnfs_ld_write_done(struct nfs_write_data *data) 1062 939 { 1063 - struct pnfs_layoutdriver_type *ld; 940 + int status; 1064 941 1065 - ld = NFS_SERVER(inode)->pnfs_curr_ld; 1066 - pgio->pg_test = (ld && ld->pg_test) ? pnfs_read_pg_test : NULL; 1067 - } 1068 - 1069 - static int pnfs_write_pg_test(struct nfs_pageio_descriptor *pgio, 1070 - struct nfs_page *prev, 1071 - struct nfs_page *req) 1072 - { 1073 - if (pgio->pg_count == prev->wb_bytes) { 1074 - /* This is first coelesce call for a series of nfs_pages */ 1075 - pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1076 - prev->wb_context, 1077 - IOMODE_RW, 1078 - GFP_NOFS); 942 + if (!data->pnfs_error) { 943 + pnfs_set_layoutcommit(data); 944 + data->mds_ops->rpc_call_done(&data->task, data); 945 + data->mds_ops->rpc_release(data); 946 + return 0; 1079 947 } 1080 - return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req); 1081 - } 1082 948 1083 - void 1084 - pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode) 1085 - { 1086 - struct pnfs_layoutdriver_type *ld; 1087 - 1088 - ld = NFS_SERVER(inode)->pnfs_curr_ld; 1089 - pgio->pg_test = (ld && ld->pg_test) ? pnfs_write_pg_test : NULL; 949 + dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__, 950 + data->pnfs_error); 951 + status = nfs_initiate_write(data, NFS_CLIENT(data->inode), 952 + data->mds_ops, NFS_FILE_SYNC); 953 + return status ? : -EAGAIN; 1090 954 } 955 + EXPORT_SYMBOL_GPL(pnfs_ld_write_done); 1091 956 1092 957 enum pnfs_try_status 1093 958 pnfs_try_to_write_data(struct nfs_write_data *wdata, ··· 1125 964 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 1126 965 return trypnfs; 1127 966 } 967 + 968 + /* 969 + * Called by non rpc-based layout drivers 970 + */ 971 + int 972 + pnfs_ld_read_done(struct nfs_read_data *data) 973 + { 974 + int status; 975 + 976 + if (!data->pnfs_error) { 977 + __nfs4_read_done_cb(data); 978 + data->mds_ops->rpc_call_done(&data->task, data); 979 + data->mds_ops->rpc_release(data); 980 + return 0; 981 + } 982 + 983 + dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__, 984 + data->pnfs_error); 985 + status = nfs_initiate_read(data, NFS_CLIENT(data->inode), 986 + data->mds_ops); 987 + return status ? : -EAGAIN; 988 + } 989 + EXPORT_SYMBOL_GPL(pnfs_ld_read_done); 1128 990 1129 991 /* 1130 992 * Call the appropriate parallel I/O subsystem read function.
+102 -15
fs/nfs/pnfs.h
··· 30 30 #ifndef FS_NFS_PNFS_H 31 31 #define FS_NFS_PNFS_H 32 32 33 + #include <linux/nfs_fs.h> 33 34 #include <linux/nfs_page.h> 34 35 35 36 enum { ··· 65 64 NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */ 66 65 }; 67 66 67 + enum layoutdriver_policy_flags { 68 + /* Should the pNFS client commit and return the layout upon a setattr */ 69 + PNFS_LAYOUTRET_ON_SETATTR = 1 << 0, 70 + }; 71 + 72 + struct nfs4_deviceid_node; 73 + 68 74 /* Per-layout driver specific registration structure */ 69 75 struct pnfs_layoutdriver_type { 70 76 struct list_head pnfs_tblid; 71 77 const u32 id; 72 78 const char *name; 73 79 struct module *owner; 80 + unsigned flags; 81 + 82 + struct pnfs_layout_hdr * (*alloc_layout_hdr) (struct inode *inode, gfp_t gfp_flags); 83 + void (*free_layout_hdr) (struct pnfs_layout_hdr *); 84 + 74 85 struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr, gfp_t gfp_flags); 75 86 void (*free_lseg) (struct pnfs_layout_segment *lseg); 76 87 77 88 /* test for nfs page cache coalescing */ 78 - int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); 89 + bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); 79 90 80 91 /* Returns true if layoutdriver wants to divert this request to 81 92 * driver's commit routine. ··· 102 89 */ 103 90 enum pnfs_try_status (*read_pagelist) (struct nfs_read_data *nfs_data); 104 91 enum pnfs_try_status (*write_pagelist) (struct nfs_write_data *nfs_data, int how); 92 + 93 + void (*free_deviceid_node) (struct nfs4_deviceid_node *); 94 + 95 + void (*encode_layoutreturn) (struct pnfs_layout_hdr *layoutid, 96 + struct xdr_stream *xdr, 97 + const struct nfs4_layoutreturn_args *args); 98 + 99 + void (*encode_layoutcommit) (struct pnfs_layout_hdr *layoutid, 100 + struct xdr_stream *xdr, 101 + const struct nfs4_layoutcommit_args *args); 105 102 }; 106 103 107 104 struct pnfs_layout_hdr { ··· 143 120 extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, 144 121 struct pnfs_device *dev); 145 122 extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp); 123 + extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); 146 124 147 125 /* pnfs.c */ 148 126 void get_layout_hdr(struct pnfs_layout_hdr *lo); 149 127 void put_lseg(struct pnfs_layout_segment *lseg); 150 128 struct pnfs_layout_segment * 151 129 pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, 152 - enum pnfs_iomode access_type, gfp_t gfp_flags); 130 + loff_t pos, u64 count, enum pnfs_iomode access_type, 131 + gfp_t gfp_flags); 153 132 void set_pnfs_layoutdriver(struct nfs_server *, u32 id); 154 133 void unset_pnfs_layoutdriver(struct nfs_server *); 155 134 enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *, 156 135 const struct rpc_call_ops *, int); 157 136 enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *, 158 137 const struct rpc_call_ops *); 159 - void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *); 160 - void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *); 138 + bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); 161 139 int pnfs_layout_process(struct nfs4_layoutget *lgp); 162 140 void pnfs_free_lseg_list(struct list_head *tmp_list); 163 141 void pnfs_destroy_layout(struct nfs_inode *); ··· 172 148 struct nfs4_state *open_state); 173 149 int mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, 174 150 struct list_head *tmp_list, 175 - u32 iomode); 151 + struct pnfs_layout_range *recall_range); 176 152 bool pnfs_roc(struct inode *ino); 177 153 void pnfs_roc_release(struct inode *ino); 178 154 void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); 179 155 bool pnfs_roc_drain(struct inode *ino, u32 *barrier); 180 156 void pnfs_set_layoutcommit(struct nfs_write_data *wdata); 181 157 int pnfs_layoutcommit_inode(struct inode *inode, bool sync); 158 + int _pnfs_return_layout(struct inode *); 159 + int pnfs_ld_write_done(struct nfs_write_data *); 160 + int pnfs_ld_read_done(struct nfs_read_data *); 161 + 162 + /* pnfs_dev.c */ 163 + struct nfs4_deviceid_node { 164 + struct hlist_node node; 165 + const struct pnfs_layoutdriver_type *ld; 166 + const struct nfs_client *nfs_client; 167 + struct nfs4_deviceid deviceid; 168 + atomic_t ref; 169 + }; 170 + 171 + void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id); 172 + struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); 173 + struct nfs4_deviceid_node *nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); 174 + void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); 175 + void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, 176 + const struct pnfs_layoutdriver_type *, 177 + const struct nfs_client *, 178 + const struct nfs4_deviceid *); 179 + struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node *); 180 + bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *); 181 + void nfs4_deviceid_purge_client(const struct nfs_client *); 182 182 183 183 static inline int lo_fail_bit(u32 iomode) 184 184 { ··· 271 223 put_lseg(req->wb_commit_lseg); 272 224 } 273 225 226 + /* Should the pNFS client commit and return the layout upon a setattr */ 227 + static inline bool 228 + pnfs_ld_layoutret_on_setattr(struct inode *inode) 229 + { 230 + if (!pnfs_enabled_sb(NFS_SERVER(inode))) 231 + return false; 232 + return NFS_SERVER(inode)->pnfs_curr_ld->flags & 233 + PNFS_LAYOUTRET_ON_SETATTR; 234 + } 235 + 236 + static inline int pnfs_return_layout(struct inode *ino) 237 + { 238 + struct nfs_inode *nfsi = NFS_I(ino); 239 + struct nfs_server *nfss = NFS_SERVER(ino); 240 + 241 + if (pnfs_enabled_sb(nfss) && nfsi->layout) 242 + return _pnfs_return_layout(ino); 243 + 244 + return 0; 245 + } 246 + 247 + static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio, 248 + struct inode *inode) 249 + { 250 + struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; 251 + 252 + if (ld) 253 + pgio->pg_test = ld->pg_test; 254 + } 255 + 274 256 #else /* CONFIG_NFS_V4_1 */ 275 257 276 258 static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) ··· 323 245 324 246 static inline struct pnfs_layout_segment * 325 247 pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, 326 - enum pnfs_iomode access_type, gfp_t gfp_flags) 248 + loff_t pos, u64 count, enum pnfs_iomode access_type, 249 + gfp_t gfp_flags) 327 250 { 328 251 return NULL; 329 252 } ··· 341 262 const struct rpc_call_ops *call_ops, int how) 342 263 { 343 264 return PNFS_NOT_ATTEMPTED; 265 + } 266 + 267 + static inline int pnfs_return_layout(struct inode *ino) 268 + { 269 + return 0; 270 + } 271 + 272 + static inline bool 273 + pnfs_ld_layoutret_on_setattr(struct inode *inode) 274 + { 275 + return false; 344 276 } 345 277 346 278 static inline bool ··· 384 294 { 385 295 } 386 296 387 - static inline void 388 - pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *ino) 297 + static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio, 298 + struct inode *inode) 389 299 { 390 - pgio->pg_test = NULL; 391 - } 392 - 393 - static inline void 394 - pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *ino) 395 - { 396 - pgio->pg_test = NULL; 397 300 } 398 301 399 302 static inline void ··· 413 330 static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync) 414 331 { 415 332 return 0; 333 + } 334 + 335 + static inline void nfs4_deviceid_purge_client(struct nfs_client *ncl) 336 + { 416 337 } 417 338 #endif /* CONFIG_NFS_V4_1 */ 418 339
+270
fs/nfs/pnfs_dev.c
··· 1 + /* 2 + * Device operations for the pnfs client. 3 + * 4 + * Copyright (c) 2002 5 + * The Regents of the University of Michigan 6 + * All Rights Reserved 7 + * 8 + * Dean Hildebrand <dhildebz@umich.edu> 9 + * Garth Goodson <Garth.Goodson@netapp.com> 10 + * 11 + * Permission is granted to use, copy, create derivative works, and 12 + * redistribute this software and such derivative works for any purpose, 13 + * so long as the name of the University of Michigan is not used in 14 + * any advertising or publicity pertaining to the use or distribution 15 + * of this software without specific, written prior authorization. If 16 + * the above copyright notice or any other identification of the 17 + * University of Michigan is included in any copy of any portion of 18 + * this software, then the disclaimer below must also be included. 19 + * 20 + * This software is provided as is, without representation or warranty 21 + * of any kind either express or implied, including without limitation 22 + * the implied warranties of merchantability, fitness for a particular 23 + * purpose, or noninfringement. The Regents of the University of 24 + * Michigan shall not be liable for any damages, including special, 25 + * indirect, incidental, or consequential damages, with respect to any 26 + * claim arising out of or in connection with the use of the software, 27 + * even if it has been or is hereafter advised of the possibility of 28 + * such damages. 29 + */ 30 + 31 + #include "pnfs.h" 32 + 33 + #define NFSDBG_FACILITY NFSDBG_PNFS 34 + 35 + /* 36 + * Device ID RCU cache. A device ID is unique per server and layout type. 37 + */ 38 + #define NFS4_DEVICE_ID_HASH_BITS 5 39 + #define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS) 40 + #define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1) 41 + 42 + static struct hlist_head nfs4_deviceid_cache[NFS4_DEVICE_ID_HASH_SIZE]; 43 + static DEFINE_SPINLOCK(nfs4_deviceid_lock); 44 + 45 + void 46 + nfs4_print_deviceid(const struct nfs4_deviceid *id) 47 + { 48 + u32 *p = (u32 *)id; 49 + 50 + dprintk("%s: device id= [%x%x%x%x]\n", __func__, 51 + p[0], p[1], p[2], p[3]); 52 + } 53 + EXPORT_SYMBOL_GPL(nfs4_print_deviceid); 54 + 55 + static inline u32 56 + nfs4_deviceid_hash(const struct nfs4_deviceid *id) 57 + { 58 + unsigned char *cptr = (unsigned char *)id->data; 59 + unsigned int nbytes = NFS4_DEVICEID4_SIZE; 60 + u32 x = 0; 61 + 62 + while (nbytes--) { 63 + x *= 37; 64 + x += *cptr++; 65 + } 66 + return x & NFS4_DEVICE_ID_HASH_MASK; 67 + } 68 + 69 + static struct nfs4_deviceid_node * 70 + _lookup_deviceid(const struct pnfs_layoutdriver_type *ld, 71 + const struct nfs_client *clp, const struct nfs4_deviceid *id, 72 + long hash) 73 + { 74 + struct nfs4_deviceid_node *d; 75 + struct hlist_node *n; 76 + 77 + hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node) 78 + if (d->ld == ld && d->nfs_client == clp && 79 + !memcmp(&d->deviceid, id, sizeof(*id))) { 80 + if (atomic_read(&d->ref)) 81 + return d; 82 + else 83 + continue; 84 + } 85 + return NULL; 86 + } 87 + 88 + /* 89 + * Lookup a deviceid in cache and get a reference count on it if found 90 + * 91 + * @clp nfs_client associated with deviceid 92 + * @id deviceid to look up 93 + */ 94 + struct nfs4_deviceid_node * 95 + _find_get_deviceid(const struct pnfs_layoutdriver_type *ld, 96 + const struct nfs_client *clp, const struct nfs4_deviceid *id, 97 + long hash) 98 + { 99 + struct nfs4_deviceid_node *d; 100 + 101 + rcu_read_lock(); 102 + d = _lookup_deviceid(ld, clp, id, hash); 103 + if (d && !atomic_inc_not_zero(&d->ref)) 104 + d = NULL; 105 + rcu_read_unlock(); 106 + return d; 107 + } 108 + 109 + struct nfs4_deviceid_node * 110 + nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *ld, 111 + const struct nfs_client *clp, const struct nfs4_deviceid *id) 112 + { 113 + return _find_get_deviceid(ld, clp, id, nfs4_deviceid_hash(id)); 114 + } 115 + EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid); 116 + 117 + /* 118 + * Unhash and put deviceid 119 + * 120 + * @clp nfs_client associated with deviceid 121 + * @id the deviceid to unhash 122 + * 123 + * @ret the unhashed node, if found and dereferenced to zero, NULL otherwise. 124 + */ 125 + struct nfs4_deviceid_node * 126 + nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld, 127 + const struct nfs_client *clp, const struct nfs4_deviceid *id) 128 + { 129 + struct nfs4_deviceid_node *d; 130 + 131 + spin_lock(&nfs4_deviceid_lock); 132 + rcu_read_lock(); 133 + d = _lookup_deviceid(ld, clp, id, nfs4_deviceid_hash(id)); 134 + rcu_read_unlock(); 135 + if (!d) { 136 + spin_unlock(&nfs4_deviceid_lock); 137 + return NULL; 138 + } 139 + hlist_del_init_rcu(&d->node); 140 + spin_unlock(&nfs4_deviceid_lock); 141 + synchronize_rcu(); 142 + 143 + /* balance the initial ref set in pnfs_insert_deviceid */ 144 + if (atomic_dec_and_test(&d->ref)) 145 + return d; 146 + 147 + return NULL; 148 + } 149 + EXPORT_SYMBOL_GPL(nfs4_unhash_put_deviceid); 150 + 151 + /* 152 + * Delete a deviceid from cache 153 + * 154 + * @clp struct nfs_client qualifying the deviceid 155 + * @id deviceid to delete 156 + */ 157 + void 158 + nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld, 159 + const struct nfs_client *clp, const struct nfs4_deviceid *id) 160 + { 161 + struct nfs4_deviceid_node *d; 162 + 163 + d = nfs4_unhash_put_deviceid(ld, clp, id); 164 + if (!d) 165 + return; 166 + d->ld->free_deviceid_node(d); 167 + } 168 + EXPORT_SYMBOL_GPL(nfs4_delete_deviceid); 169 + 170 + void 171 + nfs4_init_deviceid_node(struct nfs4_deviceid_node *d, 172 + const struct pnfs_layoutdriver_type *ld, 173 + const struct nfs_client *nfs_client, 174 + const struct nfs4_deviceid *id) 175 + { 176 + INIT_HLIST_NODE(&d->node); 177 + d->ld = ld; 178 + d->nfs_client = nfs_client; 179 + d->deviceid = *id; 180 + atomic_set(&d->ref, 1); 181 + } 182 + EXPORT_SYMBOL_GPL(nfs4_init_deviceid_node); 183 + 184 + /* 185 + * Uniquely initialize and insert a deviceid node into cache 186 + * 187 + * @new new deviceid node 188 + * Note that the caller must set up the following members: 189 + * new->ld 190 + * new->nfs_client 191 + * new->deviceid 192 + * 193 + * @ret the inserted node, if none found, otherwise, the found entry. 194 + */ 195 + struct nfs4_deviceid_node * 196 + nfs4_insert_deviceid_node(struct nfs4_deviceid_node *new) 197 + { 198 + struct nfs4_deviceid_node *d; 199 + long hash; 200 + 201 + spin_lock(&nfs4_deviceid_lock); 202 + hash = nfs4_deviceid_hash(&new->deviceid); 203 + d = _find_get_deviceid(new->ld, new->nfs_client, &new->deviceid, hash); 204 + if (d) { 205 + spin_unlock(&nfs4_deviceid_lock); 206 + return d; 207 + } 208 + 209 + hlist_add_head_rcu(&new->node, &nfs4_deviceid_cache[hash]); 210 + spin_unlock(&nfs4_deviceid_lock); 211 + 212 + return new; 213 + } 214 + EXPORT_SYMBOL_GPL(nfs4_insert_deviceid_node); 215 + 216 + /* 217 + * Dereference a deviceid node and delete it when its reference count drops 218 + * to zero. 219 + * 220 + * @d deviceid node to put 221 + * 222 + * @ret true iff the node was deleted 223 + */ 224 + bool 225 + nfs4_put_deviceid_node(struct nfs4_deviceid_node *d) 226 + { 227 + if (!atomic_dec_and_lock(&d->ref, &nfs4_deviceid_lock)) 228 + return false; 229 + hlist_del_init_rcu(&d->node); 230 + spin_unlock(&nfs4_deviceid_lock); 231 + synchronize_rcu(); 232 + d->ld->free_deviceid_node(d); 233 + return true; 234 + } 235 + EXPORT_SYMBOL_GPL(nfs4_put_deviceid_node); 236 + 237 + static void 238 + _deviceid_purge_client(const struct nfs_client *clp, long hash) 239 + { 240 + struct nfs4_deviceid_node *d; 241 + struct hlist_node *n, *next; 242 + HLIST_HEAD(tmp); 243 + 244 + rcu_read_lock(); 245 + hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node) 246 + if (d->nfs_client == clp && atomic_read(&d->ref)) { 247 + hlist_del_init_rcu(&d->node); 248 + hlist_add_head(&d->node, &tmp); 249 + } 250 + rcu_read_unlock(); 251 + 252 + if (hlist_empty(&tmp)) 253 + return; 254 + 255 + synchronize_rcu(); 256 + hlist_for_each_entry_safe(d, n, next, &tmp, node) 257 + if (atomic_dec_and_test(&d->ref)) 258 + d->ld->free_deviceid_node(d); 259 + } 260 + 261 + void 262 + nfs4_deviceid_purge_client(const struct nfs_client *clp) 263 + { 264 + long h; 265 + 266 + spin_lock(&nfs4_deviceid_lock); 267 + for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++) 268 + _deviceid_purge_client(clp, h); 269 + spin_unlock(&nfs4_deviceid_lock); 270 + }
+6 -3
fs/nfs/read.c
··· 288 288 atomic_set(&req->wb_complete, requests); 289 289 290 290 BUG_ON(desc->pg_lseg != NULL); 291 - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ, GFP_KERNEL); 291 + lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, 292 + req_offset(req), desc->pg_count, 293 + IOMODE_READ, GFP_KERNEL); 292 294 ClearPageError(page); 293 295 offset = 0; 294 296 nbytes = desc->pg_count; ··· 353 351 } 354 352 req = nfs_list_entry(data->pages.next); 355 353 if ((!lseg) && list_is_singular(&data->pages)) 356 - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ, GFP_KERNEL); 354 + lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, 355 + req_offset(req), desc->pg_count, 356 + IOMODE_READ, GFP_KERNEL); 357 357 358 358 ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count, 359 359 0, lseg); ··· 664 660 if (ret == 0) 665 661 goto read_complete; /* all pages were read */ 666 662 667 - pnfs_pageio_init_read(&pgio, inode); 668 663 if (rsize < PAGE_CACHE_SIZE) 669 664 nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); 670 665 else
+25
fs/nfs/super.c
··· 63 63 #include "iostat.h" 64 64 #include "internal.h" 65 65 #include "fscache.h" 66 + #include "pnfs.h" 66 67 67 68 #define NFSDBG_FACILITY NFSDBG_VFS 68 69 ··· 733 732 734 733 return 0; 735 734 } 735 + #ifdef CONFIG_NFS_V4_1 736 + void show_sessions(struct seq_file *m, struct nfs_server *server) 737 + { 738 + if (nfs4_has_session(server->nfs_client)) 739 + seq_printf(m, ",sessions"); 740 + } 741 + #else 742 + void show_sessions(struct seq_file *m, struct nfs_server *server) {} 743 + #endif 744 + 745 + #ifdef CONFIG_NFS_V4_1 746 + void show_pnfs(struct seq_file *m, struct nfs_server *server) 747 + { 748 + seq_printf(m, ",pnfs="); 749 + if (server->pnfs_curr_ld) 750 + seq_printf(m, "%s", server->pnfs_curr_ld->name); 751 + else 752 + seq_printf(m, "not configured"); 753 + } 754 + #else /* CONFIG_NFS_V4_1 */ 755 + void show_pnfs(struct seq_file *m, struct nfs_server *server) {} 756 + #endif /* CONFIG_NFS_V4_1 */ 736 757 737 758 static int nfs_show_devname(struct seq_file *m, struct vfsmount *mnt) 738 759 { ··· 815 792 seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]); 816 793 seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]); 817 794 seq_printf(m, ",acl=0x%x", nfss->acl_bitmask); 795 + show_sessions(m, nfss); 796 + show_pnfs(m, nfss); 818 797 } 819 798 #endif 820 799
+6 -4
fs/nfs/write.c
··· 939 939 atomic_set(&req->wb_complete, requests); 940 940 941 941 BUG_ON(desc->pg_lseg); 942 - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW, GFP_NOFS); 942 + lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, 943 + req_offset(req), desc->pg_count, 944 + IOMODE_RW, GFP_NOFS); 943 945 ClearPageError(page); 944 946 offset = 0; 945 947 nbytes = desc->pg_count; ··· 1015 1013 } 1016 1014 req = nfs_list_entry(data->pages.next); 1017 1015 if ((!lseg) && list_is_singular(&data->pages)) 1018 - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW, GFP_NOFS); 1016 + lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, 1017 + req_offset(req), desc->pg_count, 1018 + IOMODE_RW, GFP_NOFS); 1019 1019 1020 1020 if ((desc->pg_ioflags & FLUSH_COND_STABLE) && 1021 1021 (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) ··· 1035 1031 struct inode *inode, int ioflags) 1036 1032 { 1037 1033 size_t wsize = NFS_SERVER(inode)->wsize; 1038 - 1039 - pnfs_pageio_init_write(pgio, inode); 1040 1034 1041 1035 if (wsize < PAGE_CACHE_SIZE) 1042 1036 nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags);
+1
include/linux/nfs4.h
··· 562 562 NFSPROC4_CLNT_LAYOUTGET, 563 563 NFSPROC4_CLNT_GETDEVICEINFO, 564 564 NFSPROC4_CLNT_LAYOUTCOMMIT, 565 + NFSPROC4_CLNT_LAYOUTRETURN, 565 566 }; 566 567 567 568 /* nfs41 types */
+1 -1
include/linux/nfs_page.h
··· 68 68 int pg_ioflags; 69 69 int pg_error; 70 70 struct pnfs_layout_segment *pg_lseg; 71 - int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); 71 + bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); 72 72 }; 73 73 74 74 #define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags))
+23
include/linux/nfs_xdr.h
··· 269 269 struct nfs4_layoutcommit_res res; 270 270 }; 271 271 272 + struct nfs4_layoutreturn_args { 273 + __u32 layout_type; 274 + struct inode *inode; 275 + nfs4_stateid stateid; 276 + struct nfs4_sequence_args seq_args; 277 + }; 278 + 279 + struct nfs4_layoutreturn_res { 280 + struct nfs4_sequence_res seq_res; 281 + u32 lrs_present; 282 + nfs4_stateid stateid; 283 + }; 284 + 285 + struct nfs4_layoutreturn { 286 + struct nfs4_layoutreturn_args args; 287 + struct nfs4_layoutreturn_res res; 288 + struct rpc_cred *cred; 289 + struct nfs_client *clp; 290 + int rpc_status; 291 + }; 292 + 272 293 /* 273 294 * Arguments to the open call. 274 295 */ ··· 1108 1087 const struct rpc_call_ops *mds_ops; 1109 1088 int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data); 1110 1089 __u64 mds_offset; 1090 + int pnfs_error; 1111 1091 struct page *page_array[NFS_PAGEVEC_SIZE]; 1112 1092 }; 1113 1093 ··· 1134 1112 unsigned long timestamp; /* For lease renewal */ 1135 1113 #endif 1136 1114 __u64 mds_offset; /* Filelayout dense stripe */ 1115 + int pnfs_error; 1137 1116 struct page *page_array[NFS_PAGEVEC_SIZE]; 1138 1117 }; 1139 1118
+345
include/linux/pnfs_osd_xdr.h
··· 1 + /* 2 + * pNFS-osd on-the-wire data structures 3 + * 4 + * Copyright (C) 2007 Panasas Inc. [year of first publication] 5 + * All rights reserved. 6 + * 7 + * Benny Halevy <bhalevy@panasas.com> 8 + * Boaz Harrosh <bharrosh@panasas.com> 9 + * 10 + * This program is free software; you can redistribute it and/or modify 11 + * it under the terms of the GNU General Public License version 2 12 + * See the file COPYING included with this distribution for more details. 13 + * 14 + * Redistribution and use in source and binary forms, with or without 15 + * modification, are permitted provided that the following conditions 16 + * are met: 17 + * 18 + * 1. Redistributions of source code must retain the above copyright 19 + * notice, this list of conditions and the following disclaimer. 20 + * 2. Redistributions in binary form must reproduce the above copyright 21 + * notice, this list of conditions and the following disclaimer in the 22 + * documentation and/or other materials provided with the distribution. 23 + * 3. Neither the name of the Panasas company nor the names of its 24 + * contributors may be used to endorse or promote products derived 25 + * from this software without specific prior written permission. 26 + * 27 + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED 28 + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 29 + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 30 + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 34 + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 35 + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 36 + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 37 + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 38 + */ 39 + #ifndef __PNFS_OSD_XDR_H__ 40 + #define __PNFS_OSD_XDR_H__ 41 + 42 + #include <linux/nfs_fs.h> 43 + #include <linux/nfs_page.h> 44 + #include <scsi/osd_protocol.h> 45 + 46 + #define PNFS_OSD_OSDNAME_MAXSIZE 256 47 + 48 + /* 49 + * draft-ietf-nfsv4-minorversion-22 50 + * draft-ietf-nfsv4-pnfs-obj-12 51 + */ 52 + 53 + /* Layout Structure */ 54 + 55 + enum pnfs_osd_raid_algorithm4 { 56 + PNFS_OSD_RAID_0 = 1, 57 + PNFS_OSD_RAID_4 = 2, 58 + PNFS_OSD_RAID_5 = 3, 59 + PNFS_OSD_RAID_PQ = 4 /* Reed-Solomon P+Q */ 60 + }; 61 + 62 + /* struct pnfs_osd_data_map4 { 63 + * uint32_t odm_num_comps; 64 + * length4 odm_stripe_unit; 65 + * uint32_t odm_group_width; 66 + * uint32_t odm_group_depth; 67 + * uint32_t odm_mirror_cnt; 68 + * pnfs_osd_raid_algorithm4 odm_raid_algorithm; 69 + * }; 70 + */ 71 + struct pnfs_osd_data_map { 72 + u32 odm_num_comps; 73 + u64 odm_stripe_unit; 74 + u32 odm_group_width; 75 + u32 odm_group_depth; 76 + u32 odm_mirror_cnt; 77 + u32 odm_raid_algorithm; 78 + }; 79 + 80 + /* struct pnfs_osd_objid4 { 81 + * deviceid4 oid_device_id; 82 + * uint64_t oid_partition_id; 83 + * uint64_t oid_object_id; 84 + * }; 85 + */ 86 + struct pnfs_osd_objid { 87 + struct nfs4_deviceid oid_device_id; 88 + u64 oid_partition_id; 89 + u64 oid_object_id; 90 + }; 91 + 92 + /* For printout. I use: 93 + * kprint("dev(%llx:%llx)", _DEVID_LO(pointer), _DEVID_HI(pointer)); 94 + * BE style 95 + */ 96 + #define _DEVID_LO(oid_device_id) \ 97 + (unsigned long long)be64_to_cpup((__be64 *)(oid_device_id)->data) 98 + 99 + #define _DEVID_HI(oid_device_id) \ 100 + (unsigned long long)be64_to_cpup(((__be64 *)(oid_device_id)->data) + 1) 101 + 102 + static inline int 103 + pnfs_osd_objid_xdr_sz(void) 104 + { 105 + return (NFS4_DEVICEID4_SIZE / 4) + 2 + 2; 106 + } 107 + 108 + enum pnfs_osd_version { 109 + PNFS_OSD_MISSING = 0, 110 + PNFS_OSD_VERSION_1 = 1, 111 + PNFS_OSD_VERSION_2 = 2 112 + }; 113 + 114 + struct pnfs_osd_opaque_cred { 115 + u32 cred_len; 116 + void *cred; 117 + }; 118 + 119 + enum pnfs_osd_cap_key_sec { 120 + PNFS_OSD_CAP_KEY_SEC_NONE = 0, 121 + PNFS_OSD_CAP_KEY_SEC_SSV = 1, 122 + }; 123 + 124 + /* struct pnfs_osd_object_cred4 { 125 + * pnfs_osd_objid4 oc_object_id; 126 + * pnfs_osd_version4 oc_osd_version; 127 + * pnfs_osd_cap_key_sec4 oc_cap_key_sec; 128 + * opaque oc_capability_key<>; 129 + * opaque oc_capability<>; 130 + * }; 131 + */ 132 + struct pnfs_osd_object_cred { 133 + struct pnfs_osd_objid oc_object_id; 134 + u32 oc_osd_version; 135 + u32 oc_cap_key_sec; 136 + struct pnfs_osd_opaque_cred oc_cap_key; 137 + struct pnfs_osd_opaque_cred oc_cap; 138 + }; 139 + 140 + /* struct pnfs_osd_layout4 { 141 + * pnfs_osd_data_map4 olo_map; 142 + * uint32_t olo_comps_index; 143 + * pnfs_osd_object_cred4 olo_components<>; 144 + * }; 145 + */ 146 + struct pnfs_osd_layout { 147 + struct pnfs_osd_data_map olo_map; 148 + u32 olo_comps_index; 149 + u32 olo_num_comps; 150 + struct pnfs_osd_object_cred *olo_comps; 151 + }; 152 + 153 + /* Device Address */ 154 + enum pnfs_osd_targetid_type { 155 + OBJ_TARGET_ANON = 1, 156 + OBJ_TARGET_SCSI_NAME = 2, 157 + OBJ_TARGET_SCSI_DEVICE_ID = 3, 158 + }; 159 + 160 + /* union pnfs_osd_targetid4 switch (pnfs_osd_targetid_type4 oti_type) { 161 + * case OBJ_TARGET_SCSI_NAME: 162 + * string oti_scsi_name<>; 163 + * 164 + * case OBJ_TARGET_SCSI_DEVICE_ID: 165 + * opaque oti_scsi_device_id<>; 166 + * 167 + * default: 168 + * void; 169 + * }; 170 + * 171 + * union pnfs_osd_targetaddr4 switch (bool ota_available) { 172 + * case TRUE: 173 + * netaddr4 ota_netaddr; 174 + * case FALSE: 175 + * void; 176 + * }; 177 + * 178 + * struct pnfs_osd_deviceaddr4 { 179 + * pnfs_osd_targetid4 oda_targetid; 180 + * pnfs_osd_targetaddr4 oda_targetaddr; 181 + * uint64_t oda_lun; 182 + * opaque oda_systemid<>; 183 + * pnfs_osd_object_cred4 oda_root_obj_cred; 184 + * opaque oda_osdname<>; 185 + * }; 186 + */ 187 + struct pnfs_osd_targetid { 188 + u32 oti_type; 189 + struct nfs4_string oti_scsi_device_id; 190 + }; 191 + 192 + enum { PNFS_OSD_TARGETID_MAX = 1 + PNFS_OSD_OSDNAME_MAXSIZE / 4 }; 193 + 194 + /* struct netaddr4 { 195 + * // see struct rpcb in RFC1833 196 + * string r_netid<>; // network id 197 + * string r_addr<>; // universal address 198 + * }; 199 + */ 200 + struct pnfs_osd_net_addr { 201 + struct nfs4_string r_netid; 202 + struct nfs4_string r_addr; 203 + }; 204 + 205 + struct pnfs_osd_targetaddr { 206 + u32 ota_available; 207 + struct pnfs_osd_net_addr ota_netaddr; 208 + }; 209 + 210 + enum { 211 + NETWORK_ID_MAX = 16 / 4, 212 + UNIVERSAL_ADDRESS_MAX = 64 / 4, 213 + PNFS_OSD_TARGETADDR_MAX = 3 + NETWORK_ID_MAX + UNIVERSAL_ADDRESS_MAX, 214 + }; 215 + 216 + struct pnfs_osd_deviceaddr { 217 + struct pnfs_osd_targetid oda_targetid; 218 + struct pnfs_osd_targetaddr oda_targetaddr; 219 + u8 oda_lun[8]; 220 + struct nfs4_string oda_systemid; 221 + struct pnfs_osd_object_cred oda_root_obj_cred; 222 + struct nfs4_string oda_osdname; 223 + }; 224 + 225 + enum { 226 + ODA_OSDNAME_MAX = PNFS_OSD_OSDNAME_MAXSIZE / 4, 227 + PNFS_OSD_DEVICEADDR_MAX = 228 + PNFS_OSD_TARGETID_MAX + PNFS_OSD_TARGETADDR_MAX + 229 + 2 /*oda_lun*/ + 230 + 1 + OSD_SYSTEMID_LEN + 231 + 1 + ODA_OSDNAME_MAX, 232 + }; 233 + 234 + /* LAYOUTCOMMIT: layoutupdate */ 235 + 236 + /* union pnfs_osd_deltaspaceused4 switch (bool dsu_valid) { 237 + * case TRUE: 238 + * int64_t dsu_delta; 239 + * case FALSE: 240 + * void; 241 + * }; 242 + * 243 + * struct pnfs_osd_layoutupdate4 { 244 + * pnfs_osd_deltaspaceused4 olu_delta_space_used; 245 + * bool olu_ioerr_flag; 246 + * }; 247 + */ 248 + struct pnfs_osd_layoutupdate { 249 + u32 dsu_valid; 250 + s64 dsu_delta; 251 + u32 olu_ioerr_flag; 252 + }; 253 + 254 + /* LAYOUTRETURN: I/O Rrror Report */ 255 + 256 + enum pnfs_osd_errno { 257 + PNFS_OSD_ERR_EIO = 1, 258 + PNFS_OSD_ERR_NOT_FOUND = 2, 259 + PNFS_OSD_ERR_NO_SPACE = 3, 260 + PNFS_OSD_ERR_BAD_CRED = 4, 261 + PNFS_OSD_ERR_NO_ACCESS = 5, 262 + PNFS_OSD_ERR_UNREACHABLE = 6, 263 + PNFS_OSD_ERR_RESOURCE = 7 264 + }; 265 + 266 + /* struct pnfs_osd_ioerr4 { 267 + * pnfs_osd_objid4 oer_component; 268 + * length4 oer_comp_offset; 269 + * length4 oer_comp_length; 270 + * bool oer_iswrite; 271 + * pnfs_osd_errno4 oer_errno; 272 + * }; 273 + */ 274 + struct pnfs_osd_ioerr { 275 + struct pnfs_osd_objid oer_component; 276 + u64 oer_comp_offset; 277 + u64 oer_comp_length; 278 + u32 oer_iswrite; 279 + u32 oer_errno; 280 + }; 281 + 282 + /* OSD XDR API */ 283 + /* Layout helpers */ 284 + /* Layout decoding is done in two parts: 285 + * 1. First Call pnfs_osd_xdr_decode_layout_map to read in only the header part 286 + * of the layout. @iter members need not be initialized. 287 + * Returned: 288 + * @layout members are set. (@layout->olo_comps set to NULL). 289 + * 290 + * Zero on success, or negative error if passed xdr is broken. 291 + * 292 + * 2. 2nd Call pnfs_osd_xdr_decode_layout_comp() in a loop until it returns 293 + * false, to decode the next component. 294 + * Returned: 295 + * true if there is more to decode or false if we are done or error. 296 + * 297 + * Example: 298 + * struct pnfs_osd_xdr_decode_layout_iter iter; 299 + * struct pnfs_osd_layout layout; 300 + * struct pnfs_osd_object_cred comp; 301 + * int status; 302 + * 303 + * status = pnfs_osd_xdr_decode_layout_map(&layout, &iter, xdr); 304 + * if (unlikely(status)) 305 + * goto err; 306 + * while(pnfs_osd_xdr_decode_layout_comp(&comp, &iter, xdr, &status)) { 307 + * // All of @comp strings point to inside the xdr_buffer 308 + * // or scrach buffer. Copy them out to user memory eg. 309 + * copy_single_comp(dest_comp++, &comp); 310 + * } 311 + * if (unlikely(status)) 312 + * goto err; 313 + */ 314 + 315 + struct pnfs_osd_xdr_decode_layout_iter { 316 + unsigned total_comps; 317 + unsigned decoded_comps; 318 + }; 319 + 320 + extern int pnfs_osd_xdr_decode_layout_map(struct pnfs_osd_layout *layout, 321 + struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr); 322 + 323 + extern bool pnfs_osd_xdr_decode_layout_comp(struct pnfs_osd_object_cred *comp, 324 + struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr, 325 + int *err); 326 + 327 + /* Device Info helpers */ 328 + 329 + /* Note: All strings inside @deviceaddr point to space inside @p. 330 + * @p should stay valid while @deviceaddr is in use. 331 + */ 332 + extern void pnfs_osd_xdr_decode_deviceaddr( 333 + struct pnfs_osd_deviceaddr *deviceaddr, __be32 *p); 334 + 335 + /* layoutupdate (layout_commit) xdr helpers */ 336 + extern int 337 + pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr, 338 + struct pnfs_osd_layoutupdate *lou); 339 + 340 + /* osd_ioerror encoding/decoding (layout_return) */ 341 + /* Client */ 342 + extern __be32 *pnfs_osd_xdr_ioerr_reserve_space(struct xdr_stream *xdr); 343 + extern void pnfs_osd_xdr_encode_ioerr(__be32 *p, struct pnfs_osd_ioerr *ioerr); 344 + 345 + #endif /* __PNFS_OSD_XDR_H__ */
+2
include/linux/sunrpc/xdr.h
··· 216 216 extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, 217 217 unsigned int base, unsigned int len); 218 218 extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p); 219 + extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, 220 + struct page **pages, unsigned int len); 219 221 extern void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen); 220 222 extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); 221 223 extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
+19
net/sunrpc/xdr.c
··· 638 638 } 639 639 EXPORT_SYMBOL_GPL(xdr_init_decode); 640 640 641 + /** 642 + * xdr_init_decode - Initialize an xdr_stream for decoding data. 643 + * @xdr: pointer to xdr_stream struct 644 + * @buf: pointer to XDR buffer from which to decode data 645 + * @pages: list of pages to decode into 646 + * @len: length in bytes of buffer in pages 647 + */ 648 + void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, 649 + struct page **pages, unsigned int len) 650 + { 651 + memset(buf, 0, sizeof(*buf)); 652 + buf->pages = pages; 653 + buf->page_len = len; 654 + buf->buflen = len; 655 + buf->len = len; 656 + xdr_init_decode(xdr, buf, NULL); 657 + } 658 + EXPORT_SYMBOL_GPL(xdr_init_decode_pages); 659 + 641 660 static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes) 642 661 { 643 662 __be32 *p = xdr->p;