Merge tag 'ceph-for-6.8-rc1' of https://github.com/ceph/ceph-client

+1

fs/ceph/Kconfig

··· 7 7 select CRYPTO_AES 8 8 select CRYPTO 9 9 select NETFS_SUPPORT 10 + select FS_ENCRYPTION_ALGS if FS_ENCRYPTION 10 11 default n 11 12 help 12 13 Choose Y or M here to include support for mounting the

+5 -3

fs/ceph/addr.c

··· 337 337 u64 len = subreq->len; 338 338 bool sparse = IS_ENCRYPTED(inode) || ceph_test_mount_opt(fsc, SPARSEREAD); 339 339 u64 off = subreq->start; 340 + int extent_cnt; 340 341 341 342 if (ceph_inode_is_shutdown(inode)) { 342 343 err = -EIO; ··· 351 350 352 351 req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, 353 352 off, &len, 0, 1, sparse ? CEPH_OSD_OP_SPARSE_READ : CEPH_OSD_OP_READ, 354 - CEPH_OSD_FLAG_READ | fsc->client->osdc.client->options->read_from_replica, 355 - NULL, ci->i_truncate_seq, ci->i_truncate_size, false); 353 + CEPH_OSD_FLAG_READ, NULL, ci->i_truncate_seq, 354 + ci->i_truncate_size, false); 356 355 if (IS_ERR(req)) { 357 356 err = PTR_ERR(req); 358 357 req = NULL; ··· 360 359 } 361 360 362 361 if (sparse) { 363 - err = ceph_alloc_sparse_ext_map(&req->r_ops[0]); 362 + extent_cnt = __ceph_sparse_read_ext_count(inode, len); 363 + err = ceph_alloc_sparse_ext_map(&req->r_ops[0], extent_cnt); 364 364 if (err) 365 365 goto out; 366 366 }

+3 -6

fs/ceph/caps.c

··· 4887 4887 struct inode *dir, 4888 4888 int mds, int drop, int unless) 4889 4889 { 4890 - struct dentry *parent = NULL; 4891 4890 struct ceph_mds_request_release *rel = *p; 4892 4891 struct ceph_dentry_info *di = ceph_dentry(dentry); 4893 4892 struct ceph_client *cl; 4894 4893 int force = 0; 4895 4894 int ret; 4895 + 4896 + /* This shouldn't happen */ 4897 + BUG_ON(!dir); 4896 4898 4897 4899 /* 4898 4900 * force an record for the directory caps if we have a dentry lease. ··· 4905 4903 spin_lock(&dentry->d_lock); 4906 4904 if (di->lease_session && di->lease_session->s_mds == mds) 4907 4905 force = 1; 4908 - if (!dir) { 4909 - parent = dget(dentry->d_parent); 4910 - dir = d_inode(parent); 4911 - } 4912 4906 spin_unlock(&dentry->d_lock); 4913 4907 4914 4908 ret = ceph_encode_inode_release(p, dir, mds, drop, unless, force); 4915 - dput(parent); 4916 4909 4917 4910 cl = ceph_inode_to_client(dir); 4918 4911 spin_lock(&dentry->d_lock);

+13 -8

fs/ceph/dir.c

··· 1593 1593 unsigned long dir_lease_ttl; 1594 1594 }; 1595 1595 1596 + static int __dir_lease_check(const struct dentry *, struct ceph_lease_walk_control *); 1597 + static int __dentry_lease_check(const struct dentry *); 1598 + 1596 1599 static unsigned long 1597 1600 __dentry_leases_walk(struct ceph_mds_client *mdsc, 1598 - struct ceph_lease_walk_control *lwc, 1599 - int (*check)(struct dentry*, void*)) 1601 + struct ceph_lease_walk_control *lwc) 1600 1602 { 1601 1603 struct ceph_dentry_info *di, *tmp; 1602 1604 struct dentry *dentry, *last = NULL; ··· 1626 1624 goto next; 1627 1625 } 1628 1626 1629 - ret = check(dentry, lwc); 1627 + if (lwc->dir_lease) 1628 + ret = __dir_lease_check(dentry, lwc); 1629 + else 1630 + ret = __dentry_lease_check(dentry); 1630 1631 if (ret & TOUCH) { 1631 1632 /* move it into tail of dir lease list */ 1632 1633 __dentry_dir_lease_touch(mdsc, di); ··· 1686 1681 return freed; 1687 1682 } 1688 1683 1689 - static int __dentry_lease_check(struct dentry *dentry, void *arg) 1684 + static int __dentry_lease_check(const struct dentry *dentry) 1690 1685 { 1691 1686 struct ceph_dentry_info *di = ceph_dentry(dentry); 1692 1687 int ret; ··· 1701 1696 return DELETE; 1702 1697 } 1703 1698 1704 - static int __dir_lease_check(struct dentry *dentry, void *arg) 1699 + static int __dir_lease_check(const struct dentry *dentry, 1700 + struct ceph_lease_walk_control *lwc) 1705 1701 { 1706 - struct ceph_lease_walk_control *lwc = arg; 1707 1702 struct ceph_dentry_info *di = ceph_dentry(dentry); 1708 1703 1709 1704 int ret = __dir_lease_try_check(dentry); ··· 1742 1737 1743 1738 lwc.dir_lease = false; 1744 1739 lwc.nr_to_scan = CEPH_CAPS_PER_RELEASE * 2; 1745 - freed = __dentry_leases_walk(mdsc, &lwc, __dentry_lease_check); 1740 + freed = __dentry_leases_walk(mdsc, &lwc); 1746 1741 if (!lwc.nr_to_scan) /* more invalid leases */ 1747 1742 return -EAGAIN; 1748 1743 ··· 1752 1747 lwc.dir_lease = true; 1753 1748 lwc.expire_dir_lease = freed < count; 1754 1749 lwc.dir_lease_ttl = mdsc->fsc->mount_options->caps_wanted_delay_max * HZ; 1755 - freed +=__dentry_leases_walk(mdsc, &lwc, __dir_lease_check); 1750 + freed +=__dentry_leases_walk(mdsc, &lwc); 1756 1751 if (!lwc.nr_to_scan) /* more to check */ 1757 1752 return -EAGAIN; 1758 1753

-2

fs/ceph/export.c

··· 286 286 doutc(cl, "%llx.%llx parent %llx hash %x err=%d", vino.ino, 287 287 vino.snap, sfh->parent_ino, sfh->hash, err); 288 288 } 289 - if (IS_ERR(inode)) 290 - return ERR_CAST(inode); 291 289 /* see comments in ceph_get_parent() */ 292 290 return unlinked ? d_obtain_root(inode) : d_obtain_alias(inode); 293 291 }

+6 -2

fs/ceph/file.c

··· 1029 1029 struct ceph_osd_req_op *op; 1030 1030 u64 read_off = off; 1031 1031 u64 read_len = len; 1032 + int extent_cnt; 1032 1033 1033 1034 /* determine new offset/length if encrypted */ 1034 1035 ceph_fscrypt_adjust_off_and_len(inode, &read_off, &read_len); ··· 1069 1068 1070 1069 op = &req->r_ops[0]; 1071 1070 if (sparse) { 1072 - ret = ceph_alloc_sparse_ext_map(op); 1071 + extent_cnt = __ceph_sparse_read_ext_count(inode, read_len); 1072 + ret = ceph_alloc_sparse_ext_map(op, extent_cnt); 1073 1073 if (ret) { 1074 1074 ceph_osdc_put_request(req); 1075 1075 break; ··· 1467 1465 ssize_t len; 1468 1466 struct ceph_osd_req_op *op; 1469 1467 int readop = sparse ? CEPH_OSD_OP_SPARSE_READ : CEPH_OSD_OP_READ; 1468 + int extent_cnt; 1470 1469 1471 1470 if (write) 1472 1471 size = min_t(u64, size, fsc->mount_options->wsize); ··· 1531 1528 osd_req_op_extent_osd_data_bvecs(req, 0, bvecs, num_pages, len); 1532 1529 op = &req->r_ops[0]; 1533 1530 if (sparse) { 1534 - ret = ceph_alloc_sparse_ext_map(op); 1531 + extent_cnt = __ceph_sparse_read_ext_count(inode, size); 1532 + ret = ceph_alloc_sparse_ext_map(op, extent_cnt); 1535 1533 if (ret) { 1536 1534 ceph_osdc_put_request(req); 1537 1535 break;

+25 -10

fs/ceph/mds_client.c

··· 1534 1534 * session message, specialization for CEPH_SESSION_REQUEST_OPEN 1535 1535 * to include additional client metadata fields. 1536 1536 */ 1537 - static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u64 seq) 1537 + static struct ceph_msg * 1538 + create_session_full_msg(struct ceph_mds_client *mdsc, int op, u64 seq) 1538 1539 { 1539 1540 struct ceph_msg *msg; 1540 1541 struct ceph_mds_session_head *h; ··· 1579 1578 size = METRIC_BYTES(count); 1580 1579 extra_bytes += 2 + 4 + 4 + size; 1581 1580 1581 + /* flags, mds auth caps and oldest_client_tid */ 1582 + extra_bytes += 4 + 4 + 8; 1583 + 1582 1584 /* Allocate the message */ 1583 1585 msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + extra_bytes, 1584 1586 GFP_NOFS, false); ··· 1593 1589 end = p + msg->front.iov_len; 1594 1590 1595 1591 h = p; 1596 - h->op = cpu_to_le32(CEPH_SESSION_REQUEST_OPEN); 1592 + h->op = cpu_to_le32(op); 1597 1593 h->seq = cpu_to_le64(seq); 1598 1594 1599 1595 /* 1600 1596 * Serialize client metadata into waiting buffer space, using 1601 1597 * the format that userspace expects for map<string, string> 1602 1598 * 1603 - * ClientSession messages with metadata are v4 1599 + * ClientSession messages with metadata are v7 1604 1600 */ 1605 - msg->hdr.version = cpu_to_le16(4); 1601 + msg->hdr.version = cpu_to_le16(7); 1606 1602 msg->hdr.compat_version = cpu_to_le16(1); 1607 1603 1608 1604 /* The write pointer, following the session_head structure */ ··· 1638 1634 return ERR_PTR(ret); 1639 1635 } 1640 1636 1637 + /* version == 5, flags */ 1638 + ceph_encode_32(&p, 0); 1639 + 1640 + /* version == 6, mds auth caps */ 1641 + ceph_encode_32(&p, 0); 1642 + 1643 + /* version == 7, oldest_client_tid */ 1644 + ceph_encode_64(&p, mdsc->oldest_tid); 1645 + 1641 1646 msg->front.iov_len = p - msg->front.iov_base; 1642 1647 msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); 1643 1648 ··· 1676 1663 session->s_renew_requested = jiffies; 1677 1664 1678 1665 /* send connect message */ 1679 - msg = create_session_open_msg(mdsc, session->s_seq); 1666 + msg = create_session_full_msg(mdsc, CEPH_SESSION_REQUEST_OPEN, 1667 + session->s_seq); 1680 1668 if (IS_ERR(msg)) 1681 1669 return PTR_ERR(msg); 1682 1670 ceph_con_send(&session->s_con, msg); ··· 2042 2028 2043 2029 doutc(cl, "to mds%d (%s)\n", session->s_mds, 2044 2030 ceph_mds_state_name(state)); 2045 - msg = ceph_create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS, 2031 + msg = create_session_full_msg(mdsc, CEPH_SESSION_REQUEST_RENEWCAPS, 2046 2032 ++session->s_renew_seq); 2047 - if (!msg) 2048 - return -ENOMEM; 2033 + if (IS_ERR(msg)) 2034 + return PTR_ERR(msg); 2049 2035 ceph_con_send(&session->s_con, msg); 2050 2036 return 0; 2051 2037 } ··· 4142 4128 pr_info_client(cl, "mds%d reconnect success\n", 4143 4129 session->s_mds); 4144 4130 4131 + session->s_features = features; 4145 4132 if (session->s_state == CEPH_MDS_SESSION_OPEN) { 4146 4133 pr_notice_client(cl, "mds%d is already opened\n", 4147 4134 session->s_mds); 4148 4135 } else { 4149 4136 session->s_state = CEPH_MDS_SESSION_OPEN; 4150 - session->s_features = features; 4151 4137 renewed_caps(mdsc, session, 0); 4152 4138 if (test_bit(CEPHFS_FEATURE_METRIC_COLLECT, 4153 4139 &session->s_features)) ··· 5884 5870 5885 5871 pr_warn_client(mdsc->fsc->client, "mds%d closed our session\n", 5886 5872 s->s_mds); 5887 - if (READ_ONCE(mdsc->fsc->mount_state) != CEPH_MOUNT_FENCE_IO) 5873 + if (READ_ONCE(mdsc->fsc->mount_state) != CEPH_MOUNT_FENCE_IO && 5874 + ceph_mdsmap_get_state(mdsc->mdsmap, s->s_mds) >= CEPH_MDS_STATE_RECONNECT) 5888 5875 send_mds_reconnect(mdsc, s); 5889 5876 } 5890 5877

+22 -17

fs/ceph/quota.c

··· 197 197 } 198 198 199 199 /* 200 - * This function walks through the snaprealm for an inode and returns the 201 - * ceph_snap_realm for the first snaprealm that has quotas set (max_files, 200 + * This function walks through the snaprealm for an inode and set the 201 + * realmp with the first snaprealm that has quotas set (max_files, 202 202 * max_bytes, or any, depending on the 'which_quota' argument). If the root is 203 - * reached, return the root ceph_snap_realm instead. 203 + * reached, set the realmp with the root ceph_snap_realm instead. 204 204 * 205 205 * Note that the caller is responsible for calling ceph_put_snap_realm() on the 206 206 * returned realm. ··· 211 211 * this function will return -EAGAIN; otherwise, the snaprealms walk-through 212 212 * will be restarted. 213 213 */ 214 - static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc, 215 - struct inode *inode, 216 - enum quota_get_realm which_quota, 217 - bool retry) 214 + static int get_quota_realm(struct ceph_mds_client *mdsc, struct inode *inode, 215 + enum quota_get_realm which_quota, 216 + struct ceph_snap_realm **realmp, bool retry) 218 217 { 219 218 struct ceph_client *cl = mdsc->fsc->client; 220 219 struct ceph_inode_info *ci = NULL; ··· 221 222 struct inode *in; 222 223 bool has_quota; 223 224 225 + if (realmp) 226 + *realmp = NULL; 224 227 if (ceph_snap(inode) != CEPH_NOSNAP) 225 - return NULL; 228 + return 0; 226 229 227 230 restart: 228 231 realm = ceph_inode(inode)->i_snap_realm; ··· 251 250 break; 252 251 ceph_put_snap_realm(mdsc, realm); 253 252 if (!retry) 254 - return ERR_PTR(-EAGAIN); 253 + return -EAGAIN; 255 254 goto restart; 256 255 } 257 256 ··· 260 259 iput(in); 261 260 262 261 next = realm->parent; 263 - if (has_quota || !next) 264 - return realm; 262 + if (has_quota || !next) { 263 + if (realmp) 264 + *realmp = realm; 265 + return 0; 266 + } 265 267 266 268 ceph_get_snap_realm(mdsc, next); 267 269 ceph_put_snap_realm(mdsc, realm); ··· 273 269 if (realm) 274 270 ceph_put_snap_realm(mdsc, realm); 275 271 276 - return NULL; 272 + return 0; 277 273 } 278 274 279 275 bool ceph_quota_is_same_realm(struct inode *old, struct inode *new) ··· 281 277 struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old->i_sb); 282 278 struct ceph_snap_realm *old_realm, *new_realm; 283 279 bool is_same; 280 + int ret; 284 281 285 282 restart: 286 283 /* ··· 291 286 * dropped and we can then restart the whole operation. 292 287 */ 293 288 down_read(&mdsc->snap_rwsem); 294 - old_realm = get_quota_realm(mdsc, old, QUOTA_GET_ANY, true); 295 - new_realm = get_quota_realm(mdsc, new, QUOTA_GET_ANY, false); 296 - if (PTR_ERR(new_realm) == -EAGAIN) { 289 + get_quota_realm(mdsc, old, QUOTA_GET_ANY, &old_realm, true); 290 + ret = get_quota_realm(mdsc, new, QUOTA_GET_ANY, &new_realm, false); 291 + if (ret == -EAGAIN) { 297 292 up_read(&mdsc->snap_rwsem); 298 293 if (old_realm) 299 294 ceph_put_snap_realm(mdsc, old_realm); ··· 497 492 bool is_updated = false; 498 493 499 494 down_read(&mdsc->snap_rwsem); 500 - realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root), 501 - QUOTA_GET_MAX_BYTES, true); 495 + get_quota_realm(mdsc, d_inode(fsc->sb->s_root), QUOTA_GET_MAX_BYTES, 496 + &realm, true); 502 497 up_read(&mdsc->snap_rwsem); 503 498 if (!realm) 504 499 return false;

+14

fs/ceph/super.h

··· 3 3 #define _FS_CEPH_SUPER_H 4 4 5 5 #include <linux/ceph/ceph_debug.h> 6 + #include <linux/ceph/osd_client.h> 6 7 7 8 #include <asm/unaligned.h> 8 9 #include <linux/backing-dev.h> ··· 1406 1405 1407 1406 if (had_quota != has_quota) 1408 1407 ceph_adjust_quota_realms_count(&ci->netfs.inode, has_quota); 1408 + } 1409 + 1410 + static inline int __ceph_sparse_read_ext_count(struct inode *inode, u64 len) 1411 + { 1412 + int cnt = 0; 1413 + 1414 + if (IS_ENCRYPTED(inode)) { 1415 + cnt = len >> CEPH_FSCRYPT_BLOCK_SHIFT; 1416 + if (cnt > CEPH_SPARSE_EXT_ARRAY_INITIAL) 1417 + cnt = 0; 1418 + } 1419 + 1420 + return cnt; 1409 1421 } 1410 1422 1411 1423 extern void ceph_handle_quota(struct ceph_mds_client *mdsc,

+5 -2

include/linux/ceph/osd_client.h

··· 572 572 */ 573 573 #define CEPH_SPARSE_EXT_ARRAY_INITIAL 16 574 574 575 - static inline int ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op) 575 + static inline int ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt) 576 576 { 577 - return __ceph_alloc_sparse_ext_map(op, CEPH_SPARSE_EXT_ARRAY_INITIAL); 577 + if (!cnt) 578 + cnt = CEPH_SPARSE_EXT_ARRAY_INITIAL; 579 + 580 + return __ceph_alloc_sparse_ext_map(op, cnt); 578 581 } 579 582 580 583 extern void ceph_osdc_get_request(struct ceph_osd_request *req);

+4 -13

net/ceph/osd_client.c

··· 5850 5850 } 5851 5851 #endif 5852 5852 5853 - #define MAX_EXTENTS 4096 5854 - 5855 5853 static int osd_sparse_read(struct ceph_connection *con, 5856 5854 struct ceph_msg_data_cursor *cursor, 5857 5855 char **pbuf) ··· 5880 5882 5881 5883 if (count > 0) { 5882 5884 if (!sr->sr_extent || count > sr->sr_ext_len) { 5883 - /* 5884 - * Apply a hard cap to the number of extents. 5885 - * If we have more, assume something is wrong. 5886 - */ 5887 - if (count > MAX_EXTENTS) { 5888 - dout("%s: OSD returned 0x%x extents in a single reply!\n", 5889 - __func__, count); 5890 - return -EREMOTEIO; 5891 - } 5892 - 5893 5885 /* no extent array provided, or too short */ 5894 5886 kfree(sr->sr_extent); 5895 5887 sr->sr_extent = kmalloc_array(count, 5896 5888 sizeof(*sr->sr_extent), 5897 5889 GFP_NOIO); 5898 - if (!sr->sr_extent) 5890 + if (!sr->sr_extent) { 5891 + pr_err("%s: failed to allocate %u extents\n", 5892 + __func__, count); 5899 5893 return -ENOMEM; 5894 + } 5900 5895 sr->sr_ext_len = count; 5901 5896 } 5902 5897 ret = count * sizeof(*sr->sr_extent);

Configure Feed

Configure Feed