Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

NFSv4/flexfiles: Add support for striped layouts

Updates lseg creation path to parse and add striped layouts. Enable
support for striped layouts.

Limitations:

1. All mirrors must have the same number of stripes.

Signed-off-by: Jonathan Curley <jcurley@purestorage.com>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>

authored by

Jonathan Curley and committed by
Anna Schumaker
20b1d75f 8a8e0f55

+167 -102
+165 -102
fs/nfs/flexfilelayout/flexfilelayout.c
··· 177 177 #endif 178 178 } 179 179 180 - static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1, 181 - const struct nfs4_ff_layout_mirror *m2) 180 + static bool ff_dss_match_fh(const struct nfs4_ff_layout_ds_stripe *dss1, 181 + const struct nfs4_ff_layout_ds_stripe *dss2) 182 182 { 183 183 int i, j; 184 184 185 - if (m1->dss[0].fh_versions_cnt != m2->dss[0].fh_versions_cnt) 185 + if (dss1->fh_versions_cnt != dss2->fh_versions_cnt) 186 186 return false; 187 - for (i = 0; i < m1->dss[0].fh_versions_cnt; i++) { 187 + 188 + for (i = 0; i < dss1->fh_versions_cnt; i++) { 188 189 bool found_fh = false; 189 - for (j = 0; j < m2->dss[0].fh_versions_cnt; j++) { 190 - if (nfs_compare_fh(&m1->dss[0].fh_versions[i], 191 - &m2->dss[0].fh_versions[j]) == 0) { 190 + for (j = 0; j < dss2->fh_versions_cnt; j++) { 191 + if (nfs_compare_fh(&dss1->fh_versions[i], 192 + &dss2->fh_versions[j]) == 0) { 192 193 found_fh = true; 193 194 break; 194 195 } ··· 197 196 if (!found_fh) 198 197 return false; 199 198 } 199 + return true; 200 + } 201 + 202 + static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1, 203 + const struct nfs4_ff_layout_mirror *m2) 204 + { 205 + u32 dss_id; 206 + 207 + if (m1->dss_count != m2->dss_count) 208 + return false; 209 + 210 + for (dss_id = 0; dss_id < m1->dss_count; dss_id++) 211 + if (!ff_dss_match_fh(&m1->dss[dss_id], &m2->dss[dss_id])) 212 + return false; 213 + 214 + return true; 215 + } 216 + 217 + static bool ff_mirror_match_devid(const struct nfs4_ff_layout_mirror *m1, 218 + const struct nfs4_ff_layout_mirror *m2) 219 + { 220 + u32 dss_id; 221 + 222 + if (m1->dss_count != m2->dss_count) 223 + return false; 224 + 225 + for (dss_id = 0; dss_id < m1->dss_count; dss_id++) 226 + if (memcmp(&m1->dss[dss_id].devid, 227 + &m2->dss[dss_id].devid, 228 + sizeof(m1->dss[dss_id].devid)) != 0) 229 + return false; 230 + 200 231 return true; 201 232 } 202 233 ··· 242 209 243 210 spin_lock(&inode->i_lock); 244 211 list_for_each_entry(pos, &ff_layout->mirrors, mirrors) { 245 - if (memcmp(&mirror->dss[0].devid, &pos->dss[0].devid, 246 - sizeof(pos->dss[0].devid)) != 0) 212 + if (!ff_mirror_match_devid(mirror, pos)) 247 213 continue; 248 214 if (!ff_mirror_match_fh(mirror, pos)) 249 215 continue; ··· 273 241 static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags) 274 242 { 275 243 struct nfs4_ff_layout_mirror *mirror; 244 + u32 dss_id; 276 245 277 246 mirror = kzalloc(sizeof(*mirror), gfp_flags); 278 247 if (mirror != NULL) { 279 248 spin_lock_init(&mirror->lock); 280 249 refcount_set(&mirror->ref, 1); 281 250 INIT_LIST_HEAD(&mirror->mirrors); 282 - nfs_localio_file_init(&mirror->dss[0].nfl); 251 + for (dss_id = 0; dss_id < mirror->dss_count; dss_id++) 252 + nfs_localio_file_init(&mirror->dss[dss_id].nfl); 283 253 } 284 254 return mirror; 285 255 } ··· 289 255 static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror) 290 256 { 291 257 const struct cred *cred; 292 - int dss_id = 0; 258 + u32 dss_id; 293 259 294 260 ff_layout_remove_mirror(mirror); 295 261 296 - kfree(mirror->dss[dss_id].fh_versions); 297 - nfs_close_local_fh(&mirror->dss[dss_id].nfl); 298 - cred = rcu_access_pointer(mirror->dss[dss_id].ro_cred); 299 - put_cred(cred); 300 - cred = rcu_access_pointer(mirror->dss[dss_id].rw_cred); 301 - put_cred(cred); 302 - nfs4_ff_layout_put_deviceid(mirror->dss[dss_id].mirror_ds); 262 + for (dss_id = 0; dss_id < mirror->dss_count; dss_id++) { 263 + kfree(mirror->dss[dss_id].fh_versions); 264 + cred = rcu_access_pointer(mirror->dss[dss_id].ro_cred); 265 + put_cred(cred); 266 + cred = rcu_access_pointer(mirror->dss[dss_id].rw_cred); 267 + put_cred(cred); 268 + nfs_close_local_fh(&mirror->dss[dss_id].nfl); 269 + nfs4_ff_layout_put_deviceid(mirror->dss[dss_id].mirror_ds); 270 + } 303 271 304 272 kfree(mirror->dss); 305 273 kfree(mirror); ··· 407 371 free_me); 408 372 } 409 373 374 + static u32 ff_mirror_efficiency_sum(const struct nfs4_ff_layout_mirror *mirror) 375 + { 376 + u32 dss_id, sum = 0; 377 + 378 + for (dss_id = 0; dss_id < mirror->dss_count; dss_id++) 379 + sum += mirror->dss[dss_id].efficiency; 380 + 381 + return sum; 382 + } 383 + 410 384 static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls) 411 385 { 412 386 int i, j; 413 387 414 388 for (i = 0; i < fls->mirror_array_cnt - 1; i++) { 415 389 for (j = i + 1; j < fls->mirror_array_cnt; j++) 416 - if (fls->mirror_array[i]->dss[0].efficiency < 417 - fls->mirror_array[j]->dss[0].efficiency) 390 + if (ff_mirror_efficiency_sum(fls->mirror_array[i]) < 391 + ff_mirror_efficiency_sum(fls->mirror_array[j])) 418 392 swap(fls->mirror_array[i], 419 393 fls->mirror_array[j]); 420 394 } ··· 444 398 u32 mirror_array_cnt; 445 399 __be32 *p; 446 400 int i, rc; 401 + struct nfs4_ff_layout_ds_stripe *dss_info; 447 402 448 403 dprintk("--> %s\n", __func__); 449 404 scratch = folio_alloc(gfp_flags, 0); ··· 487 440 kuid_t uid; 488 441 kgid_t gid; 489 442 u32 fh_count, id; 490 - int j, dss_id = 0; 443 + int j, dss_id; 491 444 492 445 rc = -EIO; 493 446 p = xdr_inline_decode(&stream, 4); 494 447 if (!p) 495 448 goto out_err_free; 496 449 497 - dss_count = be32_to_cpup(p); 450 + // Ensure all mirrors have same stripe count. 451 + if (dss_count == 0) 452 + dss_count = be32_to_cpup(p); 453 + else if (dss_count != be32_to_cpup(p)) 454 + goto out_err_free; 498 455 499 - /* FIXME: allow for striping? */ 500 - if (dss_count != 1) 456 + if (dss_count > NFS4_FLEXFILE_LAYOUT_MAX_STRIPE_CNT || 457 + dss_count == 0) 458 + goto out_err_free; 459 + 460 + if (dss_count > 1 && stripe_unit == 0) 501 461 goto out_err_free; 502 462 503 463 fls->mirror_array[i] = ff_layout_alloc_mirror(gfp_flags); ··· 518 464 kcalloc(dss_count, sizeof(struct nfs4_ff_layout_ds_stripe), 519 465 gfp_flags); 520 466 521 - /* deviceid */ 522 - rc = decode_deviceid(&stream, &fls->mirror_array[i]->dss[dss_id].devid); 523 - if (rc) 524 - goto out_err_free; 467 + for (dss_id = 0; dss_id < dss_count; dss_id++) { 468 + dss_info = &fls->mirror_array[i]->dss[dss_id]; 469 + dss_info->mirror = fls->mirror_array[i]; 525 470 526 - /* efficiency */ 527 - rc = -EIO; 528 - p = xdr_inline_decode(&stream, 4); 529 - if (!p) 530 - goto out_err_free; 531 - fls->mirror_array[i]->dss[dss_id].efficiency = be32_to_cpup(p); 532 - 533 - /* stateid */ 534 - rc = decode_pnfs_stateid(&stream, &fls->mirror_array[i]->dss[dss_id].stateid); 535 - if (rc) 536 - goto out_err_free; 537 - 538 - /* fh */ 539 - rc = -EIO; 540 - p = xdr_inline_decode(&stream, 4); 541 - if (!p) 542 - goto out_err_free; 543 - fh_count = be32_to_cpup(p); 544 - 545 - fls->mirror_array[i]->dss[dss_id].fh_versions = 546 - kcalloc(fh_count, sizeof(struct nfs_fh), 547 - gfp_flags); 548 - if (fls->mirror_array[i]->dss[dss_id].fh_versions == NULL) { 549 - rc = -ENOMEM; 550 - goto out_err_free; 551 - } 552 - 553 - for (j = 0; j < fh_count; j++) { 554 - rc = decode_nfs_fh(&stream, 555 - &fls->mirror_array[i]->dss[dss_id].fh_versions[j]); 471 + /* deviceid */ 472 + rc = decode_deviceid(&stream, &dss_info->devid); 556 473 if (rc) 557 474 goto out_err_free; 475 + 476 + /* efficiency */ 477 + rc = -EIO; 478 + p = xdr_inline_decode(&stream, 4); 479 + if (!p) 480 + goto out_err_free; 481 + dss_info->efficiency = be32_to_cpup(p); 482 + 483 + /* stateid */ 484 + rc = decode_pnfs_stateid(&stream, &dss_info->stateid); 485 + if (rc) 486 + goto out_err_free; 487 + 488 + /* fh */ 489 + rc = -EIO; 490 + p = xdr_inline_decode(&stream, 4); 491 + if (!p) 492 + goto out_err_free; 493 + fh_count = be32_to_cpup(p); 494 + 495 + dss_info->fh_versions = 496 + kcalloc(fh_count, sizeof(struct nfs_fh), 497 + gfp_flags); 498 + if (dss_info->fh_versions == NULL) { 499 + rc = -ENOMEM; 500 + goto out_err_free; 501 + } 502 + 503 + for (j = 0; j < fh_count; j++) { 504 + rc = decode_nfs_fh(&stream, 505 + &dss_info->fh_versions[j]); 506 + if (rc) 507 + goto out_err_free; 508 + } 509 + 510 + dss_info->fh_versions_cnt = fh_count; 511 + 512 + /* user */ 513 + rc = decode_name(&stream, &id); 514 + if (rc) 515 + goto out_err_free; 516 + 517 + uid = make_kuid(&init_user_ns, id); 518 + 519 + /* group */ 520 + rc = decode_name(&stream, &id); 521 + if (rc) 522 + goto out_err_free; 523 + 524 + gid = make_kgid(&init_user_ns, id); 525 + 526 + if (gfp_flags & __GFP_FS) 527 + kcred = prepare_kernel_cred(&init_task); 528 + else { 529 + unsigned int nofs_flags = memalloc_nofs_save(); 530 + 531 + kcred = prepare_kernel_cred(&init_task); 532 + memalloc_nofs_restore(nofs_flags); 533 + } 534 + rc = -ENOMEM; 535 + if (!kcred) 536 + goto out_err_free; 537 + kcred->fsuid = uid; 538 + kcred->fsgid = gid; 539 + cred = RCU_INITIALIZER(kcred); 540 + 541 + if (lgr->range.iomode == IOMODE_READ) 542 + rcu_assign_pointer(dss_info->ro_cred, cred); 543 + else 544 + rcu_assign_pointer(dss_info->rw_cred, cred); 558 545 } 559 - 560 - fls->mirror_array[i]->dss[dss_id].fh_versions_cnt = fh_count; 561 - 562 - /* user */ 563 - rc = decode_name(&stream, &id); 564 - if (rc) 565 - goto out_err_free; 566 - 567 - uid = make_kuid(&init_user_ns, id); 568 - 569 - /* group */ 570 - rc = decode_name(&stream, &id); 571 - if (rc) 572 - goto out_err_free; 573 - 574 - gid = make_kgid(&init_user_ns, id); 575 - 576 - if (gfp_flags & __GFP_FS) 577 - kcred = prepare_kernel_cred(&init_task); 578 - else { 579 - unsigned int nofs_flags = memalloc_nofs_save(); 580 - kcred = prepare_kernel_cred(&init_task); 581 - memalloc_nofs_restore(nofs_flags); 582 - } 583 - rc = -ENOMEM; 584 - if (!kcred) 585 - goto out_err_free; 586 - kcred->fsuid = uid; 587 - kcred->fsgid = gid; 588 - cred = RCU_INITIALIZER(kcred); 589 - 590 - if (lgr->range.iomode == IOMODE_READ) 591 - rcu_assign_pointer(fls->mirror_array[i]->dss[dss_id].ro_cred, cred); 592 - else 593 - rcu_assign_pointer(fls->mirror_array[i]->dss[dss_id].rw_cred, cred); 594 546 595 547 mirror = ff_layout_add_mirror(lh, fls->mirror_array[i]); 596 548 if (mirror != fls->mirror_array[i]) { 597 - /* swap cred ptrs so free_mirror will clean up old */ 598 - if (lgr->range.iomode == IOMODE_READ) { 599 - cred = xchg(&mirror->dss[dss_id].ro_cred, 600 - fls->mirror_array[i]->dss[dss_id].ro_cred); 601 - rcu_assign_pointer(fls->mirror_array[i]->dss[dss_id].ro_cred, cred); 602 - } else { 603 - cred = xchg(&mirror->dss[dss_id].rw_cred, 604 - fls->mirror_array[i]->dss[dss_id].rw_cred); 605 - rcu_assign_pointer(fls->mirror_array[i]->dss[dss_id].rw_cred, cred); 549 + for (dss_id = 0; dss_id < dss_count; dss_id++) { 550 + dss_info = &fls->mirror_array[i]->dss[dss_id]; 551 + /* swap cred ptrs so free_mirror will clean up old */ 552 + if (lgr->range.iomode == IOMODE_READ) { 553 + cred = xchg(&mirror->dss[dss_id].ro_cred, 554 + dss_info->ro_cred); 555 + rcu_assign_pointer(dss_info->ro_cred, cred); 556 + } else { 557 + cred = xchg(&mirror->dss[dss_id].rw_cred, 558 + dss_info->rw_cred); 559 + rcu_assign_pointer(dss_info->rw_cred, cred); 560 + } 606 561 } 607 562 ff_layout_free_mirror(fls->mirror_array[i]); 608 563 fls->mirror_array[i] = mirror;
+2
fs/nfs/flexfilelayout/flexfilelayout.h
··· 21 21 * due to network error etc. */ 22 22 #define NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT 4096 23 23 24 + #define NFS4_FLEXFILE_LAYOUT_MAX_STRIPE_CNT 4096 25 + 24 26 /* LAYOUTSTATS report interval in ms */ 25 27 #define FF_LAYOUTSTATS_REPORT_INTERVAL (60000L) 26 28 #define FF_LAYOUTSTATS_MAXDEV 4