Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

ceph: parse subvolume_id from InodeStat v9 and store in inode

Add support for parsing the subvolume_id field from InodeStat v9 and
storing it in the inode for later use by subvolume metrics tracking.

The subvolume_id identifies which CephFS subvolume an inode belongs to,
enabling per-subvolume I/O metrics collection and reporting.

This patch:
- Adds subvolume_id field to struct ceph_mds_reply_info_in
- Adds i_subvolume_id field to struct ceph_inode_info
- Parses subvolume_id from v9 InodeStat in parse_reply_info_in()
- Adds ceph_inode_set_subvolume() helper to propagate the ID to inodes
- Initializes i_subvolume_id in inode allocation and clears on destroy

Signed-off-by: Alex Markuze <amarkuze@redhat.com>
Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>

authored by

Alex Markuze and committed by
Ilya Dryomov
4a1c5434 e58103ca

+76 -14
+41
fs/ceph/inode.c
··· 638 638 639 639 ci->i_max_bytes = 0; 640 640 ci->i_max_files = 0; 641 + ci->i_subvolume_id = CEPH_SUBVOLUME_ID_NONE; 641 642 642 643 memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout)); 643 644 memset(&ci->i_cached_layout, 0, sizeof(ci->i_cached_layout)); ··· 742 741 doutc(cl, "%p ino %llx.%llx\n", inode, ceph_vinop(inode)); 743 742 744 743 percpu_counter_dec(&mdsc->metric.total_inodes); 744 + 745 + ci->i_subvolume_id = CEPH_SUBVOLUME_ID_NONE; 745 746 746 747 netfs_wait_for_outstanding_io(inode); 747 748 truncate_inode_pages_final(&inode->i_data); ··· 874 871 } 875 872 } 876 873 return queue_trunc; 874 + } 875 + 876 + /* 877 + * Set the subvolume ID for an inode. 878 + * 879 + * The subvolume_id identifies which CephFS subvolume this inode belongs to. 880 + * CEPH_SUBVOLUME_ID_NONE (0) means unknown/unset - the MDS only sends 881 + * non-zero IDs for inodes within subvolumes. 882 + * 883 + * An inode's subvolume membership is immutable - once an inode is created 884 + * in a subvolume, it stays there. Therefore, if we already have a valid 885 + * (non-zero) subvolume_id and receive a different one, that indicates a bug. 886 + */ 887 + void ceph_inode_set_subvolume(struct inode *inode, u64 subvolume_id) 888 + { 889 + struct ceph_inode_info *ci; 890 + u64 old; 891 + 892 + if (!inode || subvolume_id == CEPH_SUBVOLUME_ID_NONE) 893 + return; 894 + 895 + ci = ceph_inode(inode); 896 + old = READ_ONCE(ci->i_subvolume_id); 897 + 898 + if (old == subvolume_id) 899 + return; 900 + 901 + if (old != CEPH_SUBVOLUME_ID_NONE) { 902 + /* subvolume_id should not change once set */ 903 + WARN_ON_ONCE(1); 904 + return; 905 + } 906 + 907 + WRITE_ONCE(ci->i_subvolume_id, subvolume_id); 877 908 } 878 909 879 910 void ceph_fill_file_time(struct inode *inode, int issued, ··· 1113 1076 new_issued = ~issued & info_caps; 1114 1077 1115 1078 __ceph_update_quota(ci, iinfo->max_bytes, iinfo->max_files); 1079 + ceph_inode_set_subvolume(inode, iinfo->subvolume_id); 1116 1080 1117 1081 #ifdef CONFIG_FS_ENCRYPTION 1118 1082 if (iinfo->fscrypt_auth_len && ··· 1621 1583 goto done; 1622 1584 } 1623 1585 if (parent_dir) { 1586 + ceph_inode_set_subvolume(parent_dir, 1587 + rinfo->diri.subvolume_id); 1624 1588 err = ceph_fill_inode(parent_dir, NULL, &rinfo->diri, 1625 1589 rinfo->dirfrag, session, -1, 1626 1590 &req->r_caps_reservation); ··· 1711 1671 BUG_ON(!req->r_target_inode); 1712 1672 1713 1673 in = req->r_target_inode; 1674 + ceph_inode_set_subvolume(in, rinfo->targeti.subvolume_id); 1714 1675 err = ceph_fill_inode(in, req->r_locked_page, &rinfo->targeti, 1715 1676 NULL, session, 1716 1677 (!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags) &&
+24 -14
fs/ceph/mds_client.c
··· 96 96 return -EIO; 97 97 } 98 98 99 - /* 100 - * parse individual inode info 101 - */ 102 99 static int parse_reply_info_in(void **p, void *end, 103 100 struct ceph_mds_reply_info_in *info, 104 - u64 features) 101 + u64 features, 102 + struct ceph_mds_client *mdsc) 105 103 { 106 104 int err = 0; 107 105 u8 struct_v = 0; 106 + u8 struct_compat = 0; 107 + u32 struct_len = 0; 108 + 109 + info->subvolume_id = CEPH_SUBVOLUME_ID_NONE; 108 110 109 111 if (features == (u64)-1) { 110 - u32 struct_len; 111 - u8 struct_compat; 112 112 ceph_decode_8_safe(p, end, struct_v, bad); 113 113 ceph_decode_8_safe(p, end, struct_compat, bad); 114 114 /* struct_v is expected to be >= 1. we only understand ··· 252 252 ceph_decode_skip_n(p, end, v8_struct_len, bad); 253 253 } 254 254 255 + /* struct_v 9 added subvolume_id */ 256 + if (struct_v >= 9) 257 + ceph_decode_64_safe(p, end, info->subvolume_id, bad); 258 + 255 259 *p = end; 256 260 } else { 257 261 /* legacy (unversioned) struct */ ··· 388 384 */ 389 385 static int parse_reply_info_trace(void **p, void *end, 390 386 struct ceph_mds_reply_info_parsed *info, 391 - u64 features) 387 + u64 features, 388 + struct ceph_mds_client *mdsc) 392 389 { 393 390 int err; 394 391 395 392 if (info->head->is_dentry) { 396 - err = parse_reply_info_in(p, end, &info->diri, features); 393 + err = parse_reply_info_in(p, end, &info->diri, features, mdsc); 397 394 if (err < 0) 398 395 goto out_bad; 399 396 ··· 414 409 } 415 410 416 411 if (info->head->is_target) { 417 - err = parse_reply_info_in(p, end, &info->targeti, features); 412 + err = parse_reply_info_in(p, end, &info->targeti, features, 413 + mdsc); 418 414 if (err < 0) 419 415 goto out_bad; 420 416 } ··· 436 430 */ 437 431 static int parse_reply_info_readdir(void **p, void *end, 438 432 struct ceph_mds_request *req, 439 - u64 features) 433 + u64 features, 434 + struct ceph_mds_client *mdsc) 440 435 { 441 436 struct ceph_mds_reply_info_parsed *info = &req->r_reply_info; 442 437 struct ceph_client *cl = req->r_mdsc->fsc->client; ··· 552 545 rde->name_len = oname.len; 553 546 554 547 /* inode */ 555 - err = parse_reply_info_in(p, end, &rde->inode, features); 548 + err = parse_reply_info_in(p, end, &rde->inode, features, mdsc); 556 549 if (err < 0) 557 550 goto out_bad; 558 551 /* ceph_readdir_prepopulate() will update it */ ··· 760 753 if (op == CEPH_MDS_OP_GETFILELOCK) 761 754 return parse_reply_info_filelock(p, end, info, features); 762 755 else if (op == CEPH_MDS_OP_READDIR || op == CEPH_MDS_OP_LSSNAP) 763 - return parse_reply_info_readdir(p, end, req, features); 756 + return parse_reply_info_readdir(p, end, req, features, 757 + req->r_mdsc); 764 758 else if (op == CEPH_MDS_OP_CREATE) 765 759 return parse_reply_info_create(p, end, info, features, s); 766 760 else if (op == CEPH_MDS_OP_GETVXATTR) ··· 790 782 ceph_decode_32_safe(&p, end, len, bad); 791 783 if (len > 0) { 792 784 ceph_decode_need(&p, end, len, bad); 793 - err = parse_reply_info_trace(&p, p+len, info, features); 785 + err = parse_reply_info_trace(&p, p + len, info, features, 786 + s->s_mdsc); 794 787 if (err < 0) 795 788 goto out_bad; 796 789 } ··· 800 791 ceph_decode_32_safe(&p, end, len, bad); 801 792 if (len > 0) { 802 793 ceph_decode_need(&p, end, len, bad); 803 - err = parse_reply_info_extra(&p, p+len, req, features, s); 794 + err = parse_reply_info_extra(&p, p + len, req, features, s); 804 795 if (err < 0) 805 796 goto out_bad; 806 797 } ··· 3998 3989 goto out_err; 3999 3990 } 4000 3991 req->r_target_inode = in; 3992 + ceph_inode_set_subvolume(in, rinfo->targeti.subvolume_id); 4001 3993 } 4002 3994 4003 3995 mutex_lock(&session->s_mutex);
+1
fs/ceph/mds_client.h
··· 118 118 u32 fscrypt_file_len; 119 119 u64 rsnaps; 120 120 u64 change_attr; 121 + u64 subvolume_id; 121 122 }; 122 123 123 124 struct ceph_mds_reply_dir_entry {
+10
fs/ceph/super.h
··· 398 398 /* quotas */ 399 399 u64 i_max_bytes, i_max_files; 400 400 401 + /* 402 + * Subvolume ID this inode belongs to. CEPH_SUBVOLUME_ID_NONE (0) 403 + * means unknown/unset, matching the FUSE client convention. 404 + * Once set to a valid (non-zero) value, it should not change 405 + * during the inode's lifetime. 406 + */ 407 + #define CEPH_SUBVOLUME_ID_NONE 0 408 + u64 i_subvolume_id; 409 + 401 410 s32 i_dir_pin; 402 411 403 412 struct rb_root i_fragtree; ··· 1078 1069 extern struct inode *ceph_get_snapdir(struct inode *parent); 1079 1070 extern int ceph_fill_file_size(struct inode *inode, int issued, 1080 1071 u32 truncate_seq, u64 truncate_size, u64 size); 1072 + extern void ceph_inode_set_subvolume(struct inode *inode, u64 subvolume_id); 1081 1073 extern void ceph_fill_file_time(struct inode *inode, int issued, 1082 1074 u64 time_warp_seq, struct timespec64 *ctime, 1083 1075 struct timespec64 *mtime,