Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'ceph-for-6.17-rc6' of https://github.com/ceph/ceph-client

Pull ceph fixes from Ilya Dryomov:
"A fix for a race condition around r_parent tracking that took a long
time to track down from Alex and some fixes for potential crashes on
accessing invalid memory from Max and myself.

All marked for stable"

* tag 'ceph-for-6.17-rc6' of https://github.com/ceph/ceph-client:
libceph: fix invalid accesses to ceph_connection_v1_info
ceph: fix crash after fscrypt_encrypt_pagecache_blocks() error
ceph: always call ceph_shift_unused_folios_left()
ceph: fix race condition where r_parent becomes stale before sending message
ceph: fix race condition validating r_parent before applying state

+223 -126
+5 -4
fs/ceph/addr.c
··· 1264 1264 0, 1265 1265 gfp_flags); 1266 1266 if (IS_ERR(pages[index])) { 1267 - if (PTR_ERR(pages[index]) == -EINVAL) { 1267 + int err = PTR_ERR(pages[index]); 1268 + 1269 + if (err == -EINVAL) { 1268 1270 pr_err_client(cl, "inode->i_blkbits=%hhu\n", 1269 1271 inode->i_blkbits); 1270 1272 } ··· 1275 1273 BUG_ON(ceph_wbc->locked_pages == 0); 1276 1274 1277 1275 pages[index] = NULL; 1278 - return PTR_ERR(pages[index]); 1276 + return err; 1279 1277 } 1280 1278 } else { 1281 1279 pages[index] = &folio->page; ··· 1689 1687 1690 1688 process_folio_batch: 1691 1689 rc = ceph_process_folio_batch(mapping, wbc, &ceph_wbc); 1690 + ceph_shift_unused_folios_left(&ceph_wbc.fbatch); 1692 1691 if (rc) 1693 1692 goto release_folios; 1694 1693 ··· 1698 1695 goto release_folios; 1699 1696 1700 1697 if (ceph_wbc.processed_in_fbatch) { 1701 - ceph_shift_unused_folios_left(&ceph_wbc.fbatch); 1702 - 1703 1698 if (folio_batch_count(&ceph_wbc.fbatch) == 0 && 1704 1699 ceph_wbc.locked_pages < ceph_wbc.max_pages) { 1705 1700 doutc(cl, "reached end fbatch, trying for more\n");
+6 -8
fs/ceph/debugfs.c
··· 55 55 struct ceph_mds_client *mdsc = fsc->mdsc; 56 56 struct ceph_mds_request *req; 57 57 struct rb_node *rp; 58 - int pathlen = 0; 59 - u64 pathbase; 60 58 char *path; 61 59 62 60 mutex_lock(&mdsc->mutex); ··· 79 81 if (req->r_inode) { 80 82 seq_printf(s, " #%llx", ceph_ino(req->r_inode)); 81 83 } else if (req->r_dentry) { 82 - path = ceph_mdsc_build_path(mdsc, req->r_dentry, &pathlen, 83 - &pathbase, 0); 84 + struct ceph_path_info path_info; 85 + path = ceph_mdsc_build_path(mdsc, req->r_dentry, &path_info, 0); 84 86 if (IS_ERR(path)) 85 87 path = NULL; 86 88 spin_lock(&req->r_dentry->d_lock); ··· 89 91 req->r_dentry, 90 92 path ? path : ""); 91 93 spin_unlock(&req->r_dentry->d_lock); 92 - ceph_mdsc_free_path(path, pathlen); 94 + ceph_mdsc_free_path_info(&path_info); 93 95 } else if (req->r_path1) { 94 96 seq_printf(s, " #%llx/%s", req->r_ino1.ino, 95 97 req->r_path1); ··· 98 100 } 99 101 100 102 if (req->r_old_dentry) { 101 - path = ceph_mdsc_build_path(mdsc, req->r_old_dentry, &pathlen, 102 - &pathbase, 0); 103 + struct ceph_path_info path_info; 104 + path = ceph_mdsc_build_path(mdsc, req->r_old_dentry, &path_info, 0); 103 105 if (IS_ERR(path)) 104 106 path = NULL; 105 107 spin_lock(&req->r_old_dentry->d_lock); ··· 109 111 req->r_old_dentry, 110 112 path ? path : ""); 111 113 spin_unlock(&req->r_old_dentry->d_lock); 112 - ceph_mdsc_free_path(path, pathlen); 114 + ceph_mdsc_free_path_info(&path_info); 113 115 } else if (req->r_path2 && req->r_op != CEPH_MDS_OP_SYMLINK) { 114 116 if (req->r_ino2.ino) 115 117 seq_printf(s, " #%llx/%s", req->r_ino2.ino,
+7 -10
fs/ceph/dir.c
··· 1271 1271 1272 1272 /* If op failed, mark everyone involved for errors */ 1273 1273 if (result) { 1274 - int pathlen = 0; 1275 - u64 base = 0; 1276 - char *path = ceph_mdsc_build_path(mdsc, dentry, &pathlen, 1277 - &base, 0); 1274 + struct ceph_path_info path_info = {0}; 1275 + char *path = ceph_mdsc_build_path(mdsc, dentry, &path_info, 0); 1278 1276 1279 1277 /* mark error on parent + clear complete */ 1280 1278 mapping_set_error(req->r_parent->i_mapping, result); ··· 1286 1288 mapping_set_error(req->r_old_inode->i_mapping, result); 1287 1289 1288 1290 pr_warn_client(cl, "failure path=(%llx)%s result=%d!\n", 1289 - base, IS_ERR(path) ? "<<bad>>" : path, result); 1290 - ceph_mdsc_free_path(path, pathlen); 1291 + path_info.vino.ino, IS_ERR(path) ? "<<bad>>" : path, result); 1292 + ceph_mdsc_free_path_info(&path_info); 1291 1293 } 1292 1294 out: 1293 1295 iput(req->r_old_inode); ··· 1345 1347 int err = -EROFS; 1346 1348 int op; 1347 1349 char *path; 1348 - int pathlen; 1349 - u64 pathbase; 1350 1350 1351 1351 if (ceph_snap(dir) == CEPH_SNAPDIR) { 1352 1352 /* rmdir .snap/foo is RMSNAP */ ··· 1363 1367 if (!dn) { 1364 1368 try_async = false; 1365 1369 } else { 1366 - path = ceph_mdsc_build_path(mdsc, dn, &pathlen, &pathbase, 0); 1370 + struct ceph_path_info path_info; 1371 + path = ceph_mdsc_build_path(mdsc, dn, &path_info, 0); 1367 1372 if (IS_ERR(path)) { 1368 1373 try_async = false; 1369 1374 err = 0; 1370 1375 } else { 1371 1376 err = ceph_mds_check_access(mdsc, path, MAY_WRITE); 1372 1377 } 1373 - ceph_mdsc_free_path(path, pathlen); 1378 + ceph_mdsc_free_path_info(&path_info); 1374 1379 dput(dn); 1375 1380 1376 1381 /* For none EACCES cases will let the MDS do the mds auth check */
+10 -14
fs/ceph/file.c
··· 368 368 int flags, fmode, wanted; 369 369 struct dentry *dentry; 370 370 char *path; 371 - int pathlen; 372 - u64 pathbase; 373 371 bool do_sync = false; 374 372 int mask = MAY_READ; 375 373 ··· 397 399 if (!dentry) { 398 400 do_sync = true; 399 401 } else { 400 - path = ceph_mdsc_build_path(mdsc, dentry, &pathlen, &pathbase, 0); 402 + struct ceph_path_info path_info; 403 + path = ceph_mdsc_build_path(mdsc, dentry, &path_info, 0); 401 404 if (IS_ERR(path)) { 402 405 do_sync = true; 403 406 err = 0; 404 407 } else { 405 408 err = ceph_mds_check_access(mdsc, path, mask); 406 409 } 407 - ceph_mdsc_free_path(path, pathlen); 410 + ceph_mdsc_free_path_info(&path_info); 408 411 dput(dentry); 409 412 410 413 /* For none EACCES cases will let the MDS do the mds auth check */ ··· 613 614 mapping_set_error(req->r_parent->i_mapping, result); 614 615 615 616 if (result) { 616 - int pathlen = 0; 617 - u64 base = 0; 618 - char *path = ceph_mdsc_build_path(mdsc, req->r_dentry, &pathlen, 619 - &base, 0); 617 + struct ceph_path_info path_info = {0}; 618 + char *path = ceph_mdsc_build_path(mdsc, req->r_dentry, &path_info, 0); 620 619 621 620 pr_warn_client(cl, 622 621 "async create failure path=(%llx)%s result=%d!\n", 623 - base, IS_ERR(path) ? "<<bad>>" : path, result); 624 - ceph_mdsc_free_path(path, pathlen); 622 + path_info.vino.ino, IS_ERR(path) ? "<<bad>>" : path, result); 623 + ceph_mdsc_free_path_info(&path_info); 625 624 626 625 ceph_dir_clear_complete(req->r_parent); 627 626 if (!d_unhashed(dentry)) ··· 788 791 int mask; 789 792 int err; 790 793 char *path; 791 - int pathlen; 792 - u64 pathbase; 793 794 794 795 doutc(cl, "%p %llx.%llx dentry %p '%pd' %s flags %d mode 0%o\n", 795 796 dir, ceph_vinop(dir), dentry, dentry, ··· 809 814 if (!dn) { 810 815 try_async = false; 811 816 } else { 812 - path = ceph_mdsc_build_path(mdsc, dn, &pathlen, &pathbase, 0); 817 + struct ceph_path_info path_info; 818 + path = ceph_mdsc_build_path(mdsc, dn, &path_info, 0); 813 819 if (IS_ERR(path)) { 814 820 try_async = false; 815 821 err = 0; ··· 822 826 mask |= MAY_WRITE; 823 827 err = ceph_mds_check_access(mdsc, path, mask); 824 828 } 825 - ceph_mdsc_free_path(path, pathlen); 829 + ceph_mdsc_free_path_info(&path_info); 826 830 dput(dn); 827 831 828 832 /* For none EACCES cases will let the MDS do the mds auth check */
+72 -16
fs/ceph/inode.c
··· 55 55 return 0; 56 56 } 57 57 58 + /* 59 + * Check if the parent inode matches the vino from directory reply info 60 + */ 61 + static inline bool ceph_vino_matches_parent(struct inode *parent, 62 + struct ceph_vino vino) 63 + { 64 + return ceph_ino(parent) == vino.ino && ceph_snap(parent) == vino.snap; 65 + } 66 + 67 + /* 68 + * Validate that the directory inode referenced by @req->r_parent matches the 69 + * inode number and snapshot id contained in the reply's directory record. If 70 + * they do not match – which can theoretically happen if the parent dentry was 71 + * moved between the time the request was issued and the reply arrived – fall 72 + * back to looking up the correct inode in the inode cache. 73 + * 74 + * A reference is *always* returned. Callers that receive a different inode 75 + * than the original @parent are responsible for dropping the extra reference 76 + * once the reply has been processed. 77 + */ 78 + static struct inode *ceph_get_reply_dir(struct super_block *sb, 79 + struct inode *parent, 80 + struct ceph_mds_reply_info_parsed *rinfo) 81 + { 82 + struct ceph_vino vino; 83 + 84 + if (unlikely(!rinfo->diri.in)) 85 + return parent; /* nothing to compare against */ 86 + 87 + /* If we didn't have a cached parent inode to begin with, just bail out. */ 88 + if (!parent) 89 + return NULL; 90 + 91 + vino.ino = le64_to_cpu(rinfo->diri.in->ino); 92 + vino.snap = le64_to_cpu(rinfo->diri.in->snapid); 93 + 94 + if (likely(ceph_vino_matches_parent(parent, vino))) 95 + return parent; /* matches – use the original reference */ 96 + 97 + /* Mismatch – this should be rare. Emit a WARN and obtain the correct inode. */ 98 + WARN_ONCE(1, "ceph: reply dir mismatch (parent valid %llx.%llx reply %llx.%llx)\n", 99 + ceph_ino(parent), ceph_snap(parent), vino.ino, vino.snap); 100 + 101 + return ceph_get_inode(sb, vino, NULL); 102 + } 103 + 58 104 /** 59 105 * ceph_new_inode - allocate a new inode in advance of an expected create 60 106 * @dir: parent directory for new inode ··· 1569 1523 struct ceph_vino tvino, dvino; 1570 1524 struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); 1571 1525 struct ceph_client *cl = fsc->client; 1526 + struct inode *parent_dir = NULL; 1572 1527 int err = 0; 1573 1528 1574 1529 doutc(cl, "%p is_dentry %d is_target %d\n", req, ··· 1583 1536 } 1584 1537 1585 1538 if (rinfo->head->is_dentry) { 1586 - struct inode *dir = req->r_parent; 1587 - 1588 - if (dir) { 1589 - err = ceph_fill_inode(dir, NULL, &rinfo->diri, 1539 + /* 1540 + * r_parent may be stale, in cases when R_PARENT_LOCKED is not set, 1541 + * so we need to get the correct inode 1542 + */ 1543 + parent_dir = ceph_get_reply_dir(sb, req->r_parent, rinfo); 1544 + if (unlikely(IS_ERR(parent_dir))) { 1545 + err = PTR_ERR(parent_dir); 1546 + goto done; 1547 + } 1548 + if (parent_dir) { 1549 + err = ceph_fill_inode(parent_dir, NULL, &rinfo->diri, 1590 1550 rinfo->dirfrag, session, -1, 1591 1551 &req->r_caps_reservation); 1592 1552 if (err < 0) ··· 1602 1548 WARN_ON_ONCE(1); 1603 1549 } 1604 1550 1605 - if (dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME && 1551 + if (parent_dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME && 1606 1552 test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags) && 1607 1553 !test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) { 1608 1554 bool is_nokey = false; 1609 1555 struct qstr dname; 1610 1556 struct dentry *dn, *parent; 1611 1557 struct fscrypt_str oname = FSTR_INIT(NULL, 0); 1612 - struct ceph_fname fname = { .dir = dir, 1558 + struct ceph_fname fname = { .dir = parent_dir, 1613 1559 .name = rinfo->dname, 1614 1560 .ctext = rinfo->altname, 1615 1561 .name_len = rinfo->dname_len, ··· 1618 1564 BUG_ON(!rinfo->head->is_target); 1619 1565 BUG_ON(req->r_dentry); 1620 1566 1621 - parent = d_find_any_alias(dir); 1567 + parent = d_find_any_alias(parent_dir); 1622 1568 BUG_ON(!parent); 1623 1569 1624 - err = ceph_fname_alloc_buffer(dir, &oname); 1570 + err = ceph_fname_alloc_buffer(parent_dir, &oname); 1625 1571 if (err < 0) { 1626 1572 dput(parent); 1627 1573 goto done; ··· 1630 1576 err = ceph_fname_to_usr(&fname, NULL, &oname, &is_nokey); 1631 1577 if (err < 0) { 1632 1578 dput(parent); 1633 - ceph_fname_free_buffer(dir, &oname); 1579 + ceph_fname_free_buffer(parent_dir, &oname); 1634 1580 goto done; 1635 1581 } 1636 1582 dname.name = oname.name; ··· 1649 1595 dname.len, dname.name, dn); 1650 1596 if (!dn) { 1651 1597 dput(parent); 1652 - ceph_fname_free_buffer(dir, &oname); 1598 + ceph_fname_free_buffer(parent_dir, &oname); 1653 1599 err = -ENOMEM; 1654 1600 goto done; 1655 1601 } ··· 1664 1610 ceph_snap(d_inode(dn)) != tvino.snap)) { 1665 1611 doutc(cl, " dn %p points to wrong inode %p\n", 1666 1612 dn, d_inode(dn)); 1667 - ceph_dir_clear_ordered(dir); 1613 + ceph_dir_clear_ordered(parent_dir); 1668 1614 d_delete(dn); 1669 1615 dput(dn); 1670 1616 goto retry_lookup; 1671 1617 } 1672 - ceph_fname_free_buffer(dir, &oname); 1618 + ceph_fname_free_buffer(parent_dir, &oname); 1673 1619 1674 1620 req->r_dentry = dn; 1675 1621 dput(parent); ··· 1848 1794 &dvino, ptvino); 1849 1795 } 1850 1796 done: 1797 + /* Drop extra ref from ceph_get_reply_dir() if it returned a new inode */ 1798 + if (unlikely(!IS_ERR_OR_NULL(parent_dir) && parent_dir != req->r_parent)) 1799 + iput(parent_dir); 1851 1800 doutc(cl, "done err=%d\n", err); 1852 1801 return err; 1853 1802 } ··· 2544 2487 int truncate_retry = 20; /* The RMW will take around 50ms */ 2545 2488 struct dentry *dentry; 2546 2489 char *path; 2547 - int pathlen; 2548 - u64 pathbase; 2549 2490 bool do_sync = false; 2550 2491 2551 2492 dentry = d_find_alias(inode); 2552 2493 if (!dentry) { 2553 2494 do_sync = true; 2554 2495 } else { 2555 - path = ceph_mdsc_build_path(mdsc, dentry, &pathlen, &pathbase, 0); 2496 + struct ceph_path_info path_info; 2497 + path = ceph_mdsc_build_path(mdsc, dentry, &path_info, 0); 2556 2498 if (IS_ERR(path)) { 2557 2499 do_sync = true; 2558 2500 err = 0; 2559 2501 } else { 2560 2502 err = ceph_mds_check_access(mdsc, path, MAY_WRITE); 2561 2503 } 2562 - ceph_mdsc_free_path(path, pathlen); 2504 + ceph_mdsc_free_path_info(&path_info); 2563 2505 dput(dentry); 2564 2506 2565 2507 /* For none EACCES cases will let the MDS do the mds auth check */
+105 -67
fs/ceph/mds_client.c
··· 2681 2681 * ceph_mdsc_build_path - build a path string to a given dentry 2682 2682 * @mdsc: mds client 2683 2683 * @dentry: dentry to which path should be built 2684 - * @plen: returned length of string 2685 - * @pbase: returned base inode number 2684 + * @path_info: output path, length, base ino+snap, and freepath ownership flag 2686 2685 * @for_wire: is this path going to be sent to the MDS? 2687 2686 * 2688 2687 * Build a string that represents the path to the dentry. This is mostly called ··· 2699 2700 * foo/.snap/bar -> foo//bar 2700 2701 */ 2701 2702 char *ceph_mdsc_build_path(struct ceph_mds_client *mdsc, struct dentry *dentry, 2702 - int *plen, u64 *pbase, int for_wire) 2703 + struct ceph_path_info *path_info, int for_wire) 2703 2704 { 2704 2705 struct ceph_client *cl = mdsc->fsc->client; 2705 2706 struct dentry *cur; ··· 2809 2810 return ERR_PTR(-ENAMETOOLONG); 2810 2811 } 2811 2812 2812 - *pbase = base; 2813 - *plen = PATH_MAX - 1 - pos; 2813 + /* Initialize the output structure */ 2814 + memset(path_info, 0, sizeof(*path_info)); 2815 + 2816 + path_info->vino.ino = base; 2817 + path_info->pathlen = PATH_MAX - 1 - pos; 2818 + path_info->path = path + pos; 2819 + path_info->freepath = true; 2820 + 2821 + /* Set snap from dentry if available */ 2822 + if (d_inode(dentry)) 2823 + path_info->vino.snap = ceph_snap(d_inode(dentry)); 2824 + else 2825 + path_info->vino.snap = CEPH_NOSNAP; 2826 + 2814 2827 doutc(cl, "on %p %d built %llx '%.*s'\n", dentry, d_count(dentry), 2815 - base, *plen, path + pos); 2828 + base, PATH_MAX - 1 - pos, path + pos); 2816 2829 return path + pos; 2817 2830 } 2818 2831 2819 2832 static int build_dentry_path(struct ceph_mds_client *mdsc, struct dentry *dentry, 2820 - struct inode *dir, const char **ppath, int *ppathlen, 2821 - u64 *pino, bool *pfreepath, bool parent_locked) 2833 + struct inode *dir, struct ceph_path_info *path_info, 2834 + bool parent_locked) 2822 2835 { 2823 2836 char *path; 2824 2837 ··· 2839 2828 dir = d_inode_rcu(dentry->d_parent); 2840 2829 if (dir && parent_locked && ceph_snap(dir) == CEPH_NOSNAP && 2841 2830 !IS_ENCRYPTED(dir)) { 2842 - *pino = ceph_ino(dir); 2831 + path_info->vino.ino = ceph_ino(dir); 2832 + path_info->vino.snap = ceph_snap(dir); 2843 2833 rcu_read_unlock(); 2844 - *ppath = dentry->d_name.name; 2845 - *ppathlen = dentry->d_name.len; 2834 + path_info->path = dentry->d_name.name; 2835 + path_info->pathlen = dentry->d_name.len; 2836 + path_info->freepath = false; 2846 2837 return 0; 2847 2838 } 2848 2839 rcu_read_unlock(); 2849 - path = ceph_mdsc_build_path(mdsc, dentry, ppathlen, pino, 1); 2840 + path = ceph_mdsc_build_path(mdsc, dentry, path_info, 1); 2850 2841 if (IS_ERR(path)) 2851 2842 return PTR_ERR(path); 2852 - *ppath = path; 2853 - *pfreepath = true; 2843 + /* 2844 + * ceph_mdsc_build_path already fills path_info, including snap handling. 2845 + */ 2854 2846 return 0; 2855 2847 } 2856 2848 2857 - static int build_inode_path(struct inode *inode, 2858 - const char **ppath, int *ppathlen, u64 *pino, 2859 - bool *pfreepath) 2849 + static int build_inode_path(struct inode *inode, struct ceph_path_info *path_info) 2860 2850 { 2861 2851 struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); 2862 2852 struct dentry *dentry; 2863 2853 char *path; 2864 2854 2865 2855 if (ceph_snap(inode) == CEPH_NOSNAP) { 2866 - *pino = ceph_ino(inode); 2867 - *ppathlen = 0; 2856 + path_info->vino.ino = ceph_ino(inode); 2857 + path_info->vino.snap = ceph_snap(inode); 2858 + path_info->pathlen = 0; 2859 + path_info->freepath = false; 2868 2860 return 0; 2869 2861 } 2870 2862 dentry = d_find_alias(inode); 2871 - path = ceph_mdsc_build_path(mdsc, dentry, ppathlen, pino, 1); 2863 + path = ceph_mdsc_build_path(mdsc, dentry, path_info, 1); 2872 2864 dput(dentry); 2873 2865 if (IS_ERR(path)) 2874 2866 return PTR_ERR(path); 2875 - *ppath = path; 2876 - *pfreepath = true; 2867 + /* 2868 + * ceph_mdsc_build_path already fills path_info, including snap from dentry. 2869 + * Override with inode's snap since that's what this function is for. 2870 + */ 2871 + path_info->vino.snap = ceph_snap(inode); 2877 2872 return 0; 2878 2873 } 2879 2874 ··· 2889 2872 */ 2890 2873 static int set_request_path_attr(struct ceph_mds_client *mdsc, struct inode *rinode, 2891 2874 struct dentry *rdentry, struct inode *rdiri, 2892 - const char *rpath, u64 rino, const char **ppath, 2893 - int *pathlen, u64 *ino, bool *freepath, 2875 + const char *rpath, u64 rino, 2876 + struct ceph_path_info *path_info, 2894 2877 bool parent_locked) 2895 2878 { 2896 2879 struct ceph_client *cl = mdsc->fsc->client; 2897 2880 int r = 0; 2898 2881 2882 + /* Initialize the output structure */ 2883 + memset(path_info, 0, sizeof(*path_info)); 2884 + 2899 2885 if (rinode) { 2900 - r = build_inode_path(rinode, ppath, pathlen, ino, freepath); 2886 + r = build_inode_path(rinode, path_info); 2901 2887 doutc(cl, " inode %p %llx.%llx\n", rinode, ceph_ino(rinode), 2902 2888 ceph_snap(rinode)); 2903 2889 } else if (rdentry) { 2904 - r = build_dentry_path(mdsc, rdentry, rdiri, ppath, pathlen, ino, 2905 - freepath, parent_locked); 2906 - doutc(cl, " dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen, *ppath); 2890 + r = build_dentry_path(mdsc, rdentry, rdiri, path_info, parent_locked); 2891 + doutc(cl, " dentry %p %llx/%.*s\n", rdentry, path_info->vino.ino, 2892 + path_info->pathlen, path_info->path); 2907 2893 } else if (rpath || rino) { 2908 - *ino = rino; 2909 - *ppath = rpath; 2910 - *pathlen = rpath ? strlen(rpath) : 0; 2911 - doutc(cl, " path %.*s\n", *pathlen, rpath); 2894 + path_info->vino.ino = rino; 2895 + path_info->vino.snap = CEPH_NOSNAP; 2896 + path_info->path = rpath; 2897 + path_info->pathlen = rpath ? strlen(rpath) : 0; 2898 + path_info->freepath = false; 2899 + 2900 + doutc(cl, " path %.*s\n", path_info->pathlen, rpath); 2912 2901 } 2913 2902 2914 2903 return r; ··· 2991 2968 struct ceph_client *cl = mdsc->fsc->client; 2992 2969 struct ceph_msg *msg; 2993 2970 struct ceph_mds_request_head_legacy *lhead; 2994 - const char *path1 = NULL; 2995 - const char *path2 = NULL; 2996 - u64 ino1 = 0, ino2 = 0; 2997 - int pathlen1 = 0, pathlen2 = 0; 2998 - bool freepath1 = false, freepath2 = false; 2971 + struct ceph_path_info path_info1 = {0}; 2972 + struct ceph_path_info path_info2 = {0}; 2999 2973 struct dentry *old_dentry = NULL; 3000 2974 int len; 3001 2975 u16 releases; ··· 3002 2982 u16 request_head_version = mds_supported_head_version(session); 3003 2983 kuid_t caller_fsuid = req->r_cred->fsuid; 3004 2984 kgid_t caller_fsgid = req->r_cred->fsgid; 2985 + bool parent_locked = test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); 3005 2986 3006 2987 ret = set_request_path_attr(mdsc, req->r_inode, req->r_dentry, 3007 - req->r_parent, req->r_path1, req->r_ino1.ino, 3008 - &path1, &pathlen1, &ino1, &freepath1, 3009 - test_bit(CEPH_MDS_R_PARENT_LOCKED, 3010 - &req->r_req_flags)); 2988 + req->r_parent, req->r_path1, req->r_ino1.ino, 2989 + &path_info1, parent_locked); 3011 2990 if (ret < 0) { 3012 2991 msg = ERR_PTR(ret); 3013 2992 goto out; 2993 + } 2994 + 2995 + /* 2996 + * When the parent directory's i_rwsem is *not* locked, req->r_parent may 2997 + * have become stale (e.g. after a concurrent rename) between the time the 2998 + * dentry was looked up and now. If we detect that the stored r_parent 2999 + * does not match the inode number we just encoded for the request, switch 3000 + * to the correct inode so that the MDS receives a valid parent reference. 3001 + */ 3002 + if (!parent_locked && req->r_parent && path_info1.vino.ino && 3003 + ceph_ino(req->r_parent) != path_info1.vino.ino) { 3004 + struct inode *old_parent = req->r_parent; 3005 + struct inode *correct_dir = ceph_get_inode(mdsc->fsc->sb, path_info1.vino, NULL); 3006 + if (!IS_ERR(correct_dir)) { 3007 + WARN_ONCE(1, "ceph: r_parent mismatch (had %llx wanted %llx) - updating\n", 3008 + ceph_ino(old_parent), path_info1.vino.ino); 3009 + /* 3010 + * Transfer CEPH_CAP_PIN from the old parent to the new one. 3011 + * The pin was taken earlier in ceph_mdsc_submit_request(). 3012 + */ 3013 + ceph_put_cap_refs(ceph_inode(old_parent), CEPH_CAP_PIN); 3014 + iput(old_parent); 3015 + req->r_parent = correct_dir; 3016 + ceph_get_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN); 3017 + } 3014 3018 } 3015 3019 3016 3020 /* If r_old_dentry is set, then assume that its parent is locked */ ··· 3042 2998 !(req->r_old_dentry->d_flags & DCACHE_DISCONNECTED)) 3043 2999 old_dentry = req->r_old_dentry; 3044 3000 ret = set_request_path_attr(mdsc, NULL, old_dentry, 3045 - req->r_old_dentry_dir, 3046 - req->r_path2, req->r_ino2.ino, 3047 - &path2, &pathlen2, &ino2, &freepath2, true); 3001 + req->r_old_dentry_dir, 3002 + req->r_path2, req->r_ino2.ino, 3003 + &path_info2, true); 3048 3004 if (ret < 0) { 3049 3005 msg = ERR_PTR(ret); 3050 3006 goto out_free1; ··· 3075 3031 3076 3032 /* filepaths */ 3077 3033 len += 2 * (1 + sizeof(u32) + sizeof(u64)); 3078 - len += pathlen1 + pathlen2; 3034 + len += path_info1.pathlen + path_info2.pathlen; 3079 3035 3080 3036 /* cap releases */ 3081 3037 len += sizeof(struct ceph_mds_request_release) * ··· 3083 3039 !!req->r_old_inode_drop + !!req->r_old_dentry_drop); 3084 3040 3085 3041 if (req->r_dentry_drop) 3086 - len += pathlen1; 3042 + len += path_info1.pathlen; 3087 3043 if (req->r_old_dentry_drop) 3088 - len += pathlen2; 3044 + len += path_info2.pathlen; 3089 3045 3090 3046 /* MClientRequest tail */ 3091 3047 ··· 3198 3154 lhead->ino = cpu_to_le64(req->r_deleg_ino); 3199 3155 lhead->args = req->r_args; 3200 3156 3201 - ceph_encode_filepath(&p, end, ino1, path1); 3202 - ceph_encode_filepath(&p, end, ino2, path2); 3157 + ceph_encode_filepath(&p, end, path_info1.vino.ino, path_info1.path); 3158 + ceph_encode_filepath(&p, end, path_info2.vino.ino, path_info2.path); 3203 3159 3204 3160 /* make note of release offset, in case we need to replay */ 3205 3161 req->r_request_release_offset = p - msg->front.iov_base; ··· 3262 3218 msg->hdr.data_off = cpu_to_le16(0); 3263 3219 3264 3220 out_free2: 3265 - if (freepath2) 3266 - ceph_mdsc_free_path((char *)path2, pathlen2); 3221 + ceph_mdsc_free_path_info(&path_info2); 3267 3222 out_free1: 3268 - if (freepath1) 3269 - ceph_mdsc_free_path((char *)path1, pathlen1); 3223 + ceph_mdsc_free_path_info(&path_info1); 3270 3224 out: 3271 3225 return msg; 3272 3226 out_err: ··· 4621 4579 struct ceph_pagelist *pagelist = recon_state->pagelist; 4622 4580 struct dentry *dentry; 4623 4581 struct ceph_cap *cap; 4624 - char *path; 4625 - int pathlen = 0, err; 4626 - u64 pathbase; 4582 + struct ceph_path_info path_info = {0}; 4583 + int err; 4627 4584 u64 snap_follows; 4628 4585 4629 4586 dentry = d_find_primary(inode); 4630 4587 if (dentry) { 4631 4588 /* set pathbase to parent dir when msg_version >= 2 */ 4632 - path = ceph_mdsc_build_path(mdsc, dentry, &pathlen, &pathbase, 4589 + char *path = ceph_mdsc_build_path(mdsc, dentry, &path_info, 4633 4590 recon_state->msg_version >= 2); 4634 4591 dput(dentry); 4635 4592 if (IS_ERR(path)) { 4636 4593 err = PTR_ERR(path); 4637 4594 goto out_err; 4638 4595 } 4639 - } else { 4640 - path = NULL; 4641 - pathbase = 0; 4642 4596 } 4643 4597 4644 4598 spin_lock(&ci->i_ceph_lock); ··· 4667 4629 rec.v2.wanted = cpu_to_le32(__ceph_caps_wanted(ci)); 4668 4630 rec.v2.issued = cpu_to_le32(cap->issued); 4669 4631 rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); 4670 - rec.v2.pathbase = cpu_to_le64(pathbase); 4632 + rec.v2.pathbase = cpu_to_le64(path_info.vino.ino); 4671 4633 rec.v2.flock_len = (__force __le32) 4672 4634 ((ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1); 4673 4635 } else { ··· 4682 4644 ts = inode_get_atime(inode); 4683 4645 ceph_encode_timespec64(&rec.v1.atime, &ts); 4684 4646 rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); 4685 - rec.v1.pathbase = cpu_to_le64(pathbase); 4647 + rec.v1.pathbase = cpu_to_le64(path_info.vino.ino); 4686 4648 } 4687 4649 4688 4650 if (list_empty(&ci->i_cap_snaps)) { ··· 4744 4706 sizeof(struct ceph_filelock); 4745 4707 rec.v2.flock_len = cpu_to_le32(struct_len); 4746 4708 4747 - struct_len += sizeof(u32) + pathlen + sizeof(rec.v2); 4709 + struct_len += sizeof(u32) + path_info.pathlen + sizeof(rec.v2); 4748 4710 4749 4711 if (struct_v >= 2) 4750 4712 struct_len += sizeof(u64); /* snap_follows */ ··· 4768 4730 ceph_pagelist_encode_8(pagelist, 1); 4769 4731 ceph_pagelist_encode_32(pagelist, struct_len); 4770 4732 } 4771 - ceph_pagelist_encode_string(pagelist, path, pathlen); 4733 + ceph_pagelist_encode_string(pagelist, (char *)path_info.path, path_info.pathlen); 4772 4734 ceph_pagelist_append(pagelist, &rec, sizeof(rec.v2)); 4773 4735 ceph_locks_to_pagelist(flocks, pagelist, 4774 4736 num_fcntl_locks, num_flock_locks); ··· 4779 4741 } else { 4780 4742 err = ceph_pagelist_reserve(pagelist, 4781 4743 sizeof(u64) + sizeof(u32) + 4782 - pathlen + sizeof(rec.v1)); 4744 + path_info.pathlen + sizeof(rec.v1)); 4783 4745 if (err) 4784 4746 goto out_err; 4785 4747 4786 4748 ceph_pagelist_encode_64(pagelist, ceph_ino(inode)); 4787 - ceph_pagelist_encode_string(pagelist, path, pathlen); 4749 + ceph_pagelist_encode_string(pagelist, (char *)path_info.path, path_info.pathlen); 4788 4750 ceph_pagelist_append(pagelist, &rec, sizeof(rec.v1)); 4789 4751 } 4790 4752 4791 4753 out_err: 4792 - ceph_mdsc_free_path(path, pathlen); 4754 + ceph_mdsc_free_path_info(&path_info); 4793 4755 if (!err) 4794 4756 recon_state->nr_caps++; 4795 4757 return err;
+14 -4
fs/ceph/mds_client.h
··· 617 617 618 618 extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc); 619 619 620 - static inline void ceph_mdsc_free_path(char *path, int len) 620 + /* 621 + * Structure to group path-related output parameters for build_*_path functions 622 + */ 623 + struct ceph_path_info { 624 + const char *path; 625 + int pathlen; 626 + struct ceph_vino vino; 627 + bool freepath; 628 + }; 629 + 630 + static inline void ceph_mdsc_free_path_info(const struct ceph_path_info *path_info) 621 631 { 622 - if (!IS_ERR_OR_NULL(path)) 623 - __putname(path - (PATH_MAX - 1 - len)); 632 + if (path_info && path_info->freepath && !IS_ERR_OR_NULL(path_info->path)) 633 + __putname((char *)path_info->path - (PATH_MAX - 1 - path_info->pathlen)); 624 634 } 625 635 626 636 extern char *ceph_mdsc_build_path(struct ceph_mds_client *mdsc, 627 - struct dentry *dentry, int *plen, u64 *base, 637 + struct dentry *dentry, struct ceph_path_info *path_info, 628 638 int for_wire); 629 639 630 640 extern void __ceph_mdsc_drop_dentry_lease(struct dentry *dentry);
+4 -3
net/ceph/messenger.c
··· 1524 1524 * in case we faulted due to authentication, invalidate our 1525 1525 * current tickets so that we can get new ones. 1526 1526 */ 1527 - if (con->v1.auth_retry) { 1527 + if (!ceph_msgr2(from_msgr(con->msgr)) && con->v1.auth_retry) { 1528 1528 dout("auth_retry %d, invalidating\n", con->v1.auth_retry); 1529 1529 if (con->ops->invalidate_authorizer) 1530 1530 con->ops->invalidate_authorizer(con); ··· 1714 1714 { 1715 1715 /* come back from STANDBY? */ 1716 1716 if (con->state == CEPH_CON_S_STANDBY) { 1717 - dout("clear_standby %p and ++connect_seq\n", con); 1717 + dout("clear_standby %p\n", con); 1718 1718 con->state = CEPH_CON_S_PREOPEN; 1719 - con->v1.connect_seq++; 1719 + if (!ceph_msgr2(from_msgr(con->msgr))) 1720 + con->v1.connect_seq++; 1720 1721 WARN_ON(ceph_con_flag_test(con, CEPH_CON_F_WRITE_PENDING)); 1721 1722 WARN_ON(ceph_con_flag_test(con, CEPH_CON_F_KEEPALIVE_PENDING)); 1722 1723 }