Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'nvme-6.8-2023-12-21' of git://git.infradead.org/nvme into for-6.8/block

Pull NVMe updates from Keith:

"nvme updates for Linux 6.8

- nvme fabrics spec updates (Guixin, Max)
- nvme target udpates (Guixin, Evan)
- nvme attribute refactoring (Daniel)
- nvme-fc numa fix (Keith)"

* tag 'nvme-6.8-2023-12-21' of git://git.infradead.org/nvme:
nvme-fc: set numa_node after nvme_init_ctrl
nvme-fabrics: don't check discovery ioccsz/iorcsz
nvmet: configfs: use ctrl->instance to track passthru subsystems
nvme: repack struct nvme_ns_head
nvme: add csi, ms and nuse to sysfs
nvme: rename ns attribute group
nvme: refactor ns info setup function
nvme: refactor ns info helpers
nvme: move ns id info to struct nvme_ns_head
nvmet: remove cntlid_min and cntlid_max check in nvmet_alloc_ctrl
nvmet: allow identical cntlid_min and cntlid_max settings
nvme-fabrics: check ioccsz and iorcsz
nvme: introduce nvme_check_ctrl_fabric_info helper

+277 -156
+128 -96
drivers/nvme/host/core.c
··· 20 20 #include <linux/ptrace.h> 21 21 #include <linux/nvme_ioctl.h> 22 22 #include <linux/pm_qos.h> 23 + #include <linux/ratelimit.h> 23 24 #include <asm/unaligned.h> 24 25 25 26 #include "nvme.h" ··· 313 312 struct nvme_request *nr = nvme_req(req); 314 313 315 314 if (ns) { 316 - pr_err_ratelimited("%s: %s(0x%x) @ LBA %llu, %llu blocks, %s (sct 0x%x / sc 0x%x) %s%s\n", 315 + pr_err_ratelimited("%s: %s(0x%x) @ LBA %llu, %u blocks, %s (sct 0x%x / sc 0x%x) %s%s\n", 317 316 ns->disk ? ns->disk->disk_name : "?", 318 317 nvme_get_opcode_str(nr->cmd->common.opcode), 319 318 nr->cmd->common.opcode, 320 - (unsigned long long)nvme_sect_to_lba(ns, blk_rq_pos(req)), 321 - (unsigned long long)blk_rq_bytes(req) >> ns->lba_shift, 319 + nvme_sect_to_lba(ns->head, blk_rq_pos(req)), 320 + blk_rq_bytes(req) >> ns->head->lba_shift, 322 321 nvme_get_error_status_str(nr->status), 323 322 nr->status >> 8 & 7, /* Status Code Type */ 324 323 nr->status & 0xff, /* Status Code */ ··· 373 372 static inline void nvme_end_req_zoned(struct request *req) 374 373 { 375 374 if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && 376 - req_op(req) == REQ_OP_ZONE_APPEND) 377 - req->__sector = nvme_lba_to_sect(req->q->queuedata, 375 + req_op(req) == REQ_OP_ZONE_APPEND) { 376 + struct nvme_ns *ns = req->q->queuedata; 377 + 378 + req->__sector = nvme_lba_to_sect(ns->head, 378 379 le64_to_cpu(nvme_req(req)->result.u64)); 380 + } 379 381 } 380 382 381 383 static inline void nvme_end_req(struct request *req) ··· 795 791 } 796 792 797 793 if (queue_max_discard_segments(req->q) == 1) { 798 - u64 slba = nvme_sect_to_lba(ns, blk_rq_pos(req)); 799 - u32 nlb = blk_rq_sectors(req) >> (ns->lba_shift - 9); 794 + u64 slba = nvme_sect_to_lba(ns->head, blk_rq_pos(req)); 795 + u32 nlb = blk_rq_sectors(req) >> (ns->head->lba_shift - 9); 800 796 801 797 range[0].cattr = cpu_to_le32(0); 802 798 range[0].nlb = cpu_to_le32(nlb); ··· 804 800 n = 1; 805 801 } else { 806 802 __rq_for_each_bio(bio, req) { 807 - u64 slba = nvme_sect_to_lba(ns, bio->bi_iter.bi_sector); 808 - u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift; 803 + u64 slba = nvme_sect_to_lba(ns->head, 804 + bio->bi_iter.bi_sector); 805 + u32 nlb = bio->bi_iter.bi_size >> ns->head->lba_shift; 809 806 810 807 if (n < segments) { 811 808 range[n].cattr = cpu_to_le32(0); ··· 844 839 u64 ref48; 845 840 846 841 /* both rw and write zeroes share the same reftag format */ 847 - switch (ns->guard_type) { 842 + switch (ns->head->guard_type) { 848 843 case NVME_NVM_NS_16B_GUARD: 849 844 cmnd->rw.reftag = cpu_to_le32(t10_pi_ref_tag(req)); 850 845 break; ··· 872 867 cmnd->write_zeroes.opcode = nvme_cmd_write_zeroes; 873 868 cmnd->write_zeroes.nsid = cpu_to_le32(ns->head->ns_id); 874 869 cmnd->write_zeroes.slba = 875 - cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req))); 870 + cpu_to_le64(nvme_sect_to_lba(ns->head, blk_rq_pos(req))); 876 871 cmnd->write_zeroes.length = 877 - cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); 872 + cpu_to_le16((blk_rq_bytes(req) >> ns->head->lba_shift) - 1); 878 873 879 - if (!(req->cmd_flags & REQ_NOUNMAP) && (ns->features & NVME_NS_DEAC)) 874 + if (!(req->cmd_flags & REQ_NOUNMAP) && 875 + (ns->head->features & NVME_NS_DEAC)) 880 876 cmnd->write_zeroes.control |= cpu_to_le16(NVME_WZ_DEAC); 881 877 882 - if (nvme_ns_has_pi(ns)) { 878 + if (nvme_ns_has_pi(ns->head)) { 883 879 cmnd->write_zeroes.control |= cpu_to_le16(NVME_RW_PRINFO_PRACT); 884 880 885 - switch (ns->pi_type) { 881 + switch (ns->head->pi_type) { 886 882 case NVME_NS_DPS_PI_TYPE1: 887 883 case NVME_NS_DPS_PI_TYPE2: 888 884 nvme_set_ref_tag(ns, cmnd, req); ··· 915 909 cmnd->rw.cdw2 = 0; 916 910 cmnd->rw.cdw3 = 0; 917 911 cmnd->rw.metadata = 0; 918 - cmnd->rw.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req))); 919 - cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); 912 + cmnd->rw.slba = 913 + cpu_to_le64(nvme_sect_to_lba(ns->head, blk_rq_pos(req))); 914 + cmnd->rw.length = 915 + cpu_to_le16((blk_rq_bytes(req) >> ns->head->lba_shift) - 1); 920 916 cmnd->rw.reftag = 0; 921 917 cmnd->rw.apptag = 0; 922 918 cmnd->rw.appmask = 0; 923 919 924 - if (ns->ms) { 920 + if (ns->head->ms) { 925 921 /* 926 922 * If formated with metadata, the block layer always provides a 927 923 * metadata buffer if CONFIG_BLK_DEV_INTEGRITY is enabled. Else ··· 931 923 * namespace capacity to zero to prevent any I/O. 932 924 */ 933 925 if (!blk_integrity_rq(req)) { 934 - if (WARN_ON_ONCE(!nvme_ns_has_pi(ns))) 926 + if (WARN_ON_ONCE(!nvme_ns_has_pi(ns->head))) 935 927 return BLK_STS_NOTSUPP; 936 928 control |= NVME_RW_PRINFO_PRACT; 937 929 } 938 930 939 - switch (ns->pi_type) { 931 + switch (ns->head->pi_type) { 940 932 case NVME_NS_DPS_PI_TYPE3: 941 933 control |= NVME_RW_PRINFO_PRCHK_GUARD; 942 934 break; ··· 1450 1442 return status; 1451 1443 } 1452 1444 1453 - static int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid, 1445 + int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid, 1454 1446 struct nvme_id_ns **id) 1455 1447 { 1456 1448 struct nvme_command c = { }; ··· 1666 1658 } 1667 1659 1668 1660 #ifdef CONFIG_BLK_DEV_INTEGRITY 1669 - static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns *ns, 1670 - u32 max_integrity_segments) 1661 + static void nvme_init_integrity(struct gendisk *disk, 1662 + struct nvme_ns_head *head, u32 max_integrity_segments) 1671 1663 { 1672 1664 struct blk_integrity integrity = { }; 1673 1665 1674 - switch (ns->pi_type) { 1666 + switch (head->pi_type) { 1675 1667 case NVME_NS_DPS_PI_TYPE3: 1676 - switch (ns->guard_type) { 1668 + switch (head->guard_type) { 1677 1669 case NVME_NVM_NS_16B_GUARD: 1678 1670 integrity.profile = &t10_pi_type3_crc; 1679 1671 integrity.tag_size = sizeof(u16) + sizeof(u32); ··· 1691 1683 break; 1692 1684 case NVME_NS_DPS_PI_TYPE1: 1693 1685 case NVME_NS_DPS_PI_TYPE2: 1694 - switch (ns->guard_type) { 1686 + switch (head->guard_type) { 1695 1687 case NVME_NVM_NS_16B_GUARD: 1696 1688 integrity.profile = &t10_pi_type1_crc; 1697 1689 integrity.tag_size = sizeof(u16); ··· 1712 1704 break; 1713 1705 } 1714 1706 1715 - integrity.tuple_size = ns->ms; 1707 + integrity.tuple_size = head->ms; 1716 1708 blk_integrity_register(disk, &integrity); 1717 1709 blk_queue_max_integrity_segments(disk->queue, max_integrity_segments); 1718 1710 } 1719 1711 #else 1720 - static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns *ns, 1721 - u32 max_integrity_segments) 1712 + static void nvme_init_integrity(struct gendisk *disk, 1713 + struct nvme_ns_head *head, u32 max_integrity_segments) 1722 1714 { 1723 1715 } 1724 1716 #endif /* CONFIG_BLK_DEV_INTEGRITY */ 1725 1717 1726 - static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) 1718 + static void nvme_config_discard(struct nvme_ctrl *ctrl, struct gendisk *disk, 1719 + struct nvme_ns_head *head) 1727 1720 { 1728 - struct nvme_ctrl *ctrl = ns->ctrl; 1729 1721 struct request_queue *queue = disk->queue; 1730 1722 u32 size = queue_logical_block_size(queue); 1731 1723 1732 - if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(ns, UINT_MAX)) 1733 - ctrl->max_discard_sectors = nvme_lba_to_sect(ns, ctrl->dmrsl); 1724 + if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(head, UINT_MAX)) 1725 + ctrl->max_discard_sectors = 1726 + nvme_lba_to_sect(head, ctrl->dmrsl); 1734 1727 1735 1728 if (ctrl->max_discard_sectors == 0) { 1736 1729 blk_queue_max_discard_sectors(queue, 0); ··· 1762 1753 a->csi == b->csi; 1763 1754 } 1764 1755 1765 - static int nvme_init_ms(struct nvme_ns *ns, struct nvme_id_ns *id) 1756 + static int nvme_init_ms(struct nvme_ctrl *ctrl, struct nvme_ns_head *head, 1757 + struct nvme_id_ns *id) 1766 1758 { 1767 1759 bool first = id->dps & NVME_NS_DPS_PI_FIRST; 1768 1760 unsigned lbaf = nvme_lbaf_index(id->flbas); 1769 - struct nvme_ctrl *ctrl = ns->ctrl; 1770 1761 struct nvme_command c = { }; 1771 1762 struct nvme_id_ns_nvm *nvm; 1772 1763 int ret = 0; 1773 1764 u32 elbaf; 1774 1765 1775 - ns->pi_size = 0; 1776 - ns->ms = le16_to_cpu(id->lbaf[lbaf].ms); 1766 + head->pi_size = 0; 1767 + head->ms = le16_to_cpu(id->lbaf[lbaf].ms); 1777 1768 if (!(ctrl->ctratt & NVME_CTRL_ATTR_ELBAS)) { 1778 - ns->pi_size = sizeof(struct t10_pi_tuple); 1779 - ns->guard_type = NVME_NVM_NS_16B_GUARD; 1769 + head->pi_size = sizeof(struct t10_pi_tuple); 1770 + head->guard_type = NVME_NVM_NS_16B_GUARD; 1780 1771 goto set_pi; 1781 1772 } 1782 1773 ··· 1785 1776 return -ENOMEM; 1786 1777 1787 1778 c.identify.opcode = nvme_admin_identify; 1788 - c.identify.nsid = cpu_to_le32(ns->head->ns_id); 1779 + c.identify.nsid = cpu_to_le32(head->ns_id); 1789 1780 c.identify.cns = NVME_ID_CNS_CS_NS; 1790 1781 c.identify.csi = NVME_CSI_NVM; 1791 1782 1792 - ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, nvm, sizeof(*nvm)); 1783 + ret = nvme_submit_sync_cmd(ctrl->admin_q, &c, nvm, sizeof(*nvm)); 1793 1784 if (ret) 1794 1785 goto free_data; 1795 1786 ··· 1799 1790 if (nvme_elbaf_sts(elbaf)) 1800 1791 goto free_data; 1801 1792 1802 - ns->guard_type = nvme_elbaf_guard_type(elbaf); 1803 - switch (ns->guard_type) { 1793 + head->guard_type = nvme_elbaf_guard_type(elbaf); 1794 + switch (head->guard_type) { 1804 1795 case NVME_NVM_NS_64B_GUARD: 1805 - ns->pi_size = sizeof(struct crc64_pi_tuple); 1796 + head->pi_size = sizeof(struct crc64_pi_tuple); 1806 1797 break; 1807 1798 case NVME_NVM_NS_16B_GUARD: 1808 - ns->pi_size = sizeof(struct t10_pi_tuple); 1799 + head->pi_size = sizeof(struct t10_pi_tuple); 1809 1800 break; 1810 1801 default: 1811 1802 break; ··· 1814 1805 free_data: 1815 1806 kfree(nvm); 1816 1807 set_pi: 1817 - if (ns->pi_size && (first || ns->ms == ns->pi_size)) 1818 - ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK; 1808 + if (head->pi_size && (first || head->ms == head->pi_size)) 1809 + head->pi_type = id->dps & NVME_NS_DPS_PI_MASK; 1819 1810 else 1820 - ns->pi_type = 0; 1811 + head->pi_type = 0; 1821 1812 1822 1813 return ret; 1823 1814 } 1824 1815 1825 - static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) 1816 + static int nvme_configure_metadata(struct nvme_ctrl *ctrl, 1817 + struct nvme_ns_head *head, struct nvme_id_ns *id) 1826 1818 { 1827 - struct nvme_ctrl *ctrl = ns->ctrl; 1828 1819 int ret; 1829 1820 1830 - ret = nvme_init_ms(ns, id); 1821 + ret = nvme_init_ms(ctrl, head, id); 1831 1822 if (ret) 1832 1823 return ret; 1833 1824 1834 - ns->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS); 1835 - if (!ns->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) 1825 + head->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS); 1826 + if (!head->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) 1836 1827 return 0; 1837 1828 1838 1829 if (ctrl->ops->flags & NVME_F_FABRICS) { ··· 1844 1835 if (WARN_ON_ONCE(!(id->flbas & NVME_NS_FLBAS_META_EXT))) 1845 1836 return 0; 1846 1837 1847 - ns->features |= NVME_NS_EXT_LBAS; 1838 + head->features |= NVME_NS_EXT_LBAS; 1848 1839 1849 1840 /* 1850 1841 * The current fabrics transport drivers support namespace ··· 1855 1846 * Note, this check will need to be modified if any drivers 1856 1847 * gain the ability to use other metadata formats. 1857 1848 */ 1858 - if (ctrl->max_integrity_segments && nvme_ns_has_pi(ns)) 1859 - ns->features |= NVME_NS_METADATA_SUPPORTED; 1849 + if (ctrl->max_integrity_segments && nvme_ns_has_pi(head)) 1850 + head->features |= NVME_NS_METADATA_SUPPORTED; 1860 1851 } else { 1861 1852 /* 1862 1853 * For PCIe controllers, we can't easily remap the separate ··· 1865 1856 * We allow extended LBAs for the passthrough interface, though. 1866 1857 */ 1867 1858 if (id->flbas & NVME_NS_FLBAS_META_EXT) 1868 - ns->features |= NVME_NS_EXT_LBAS; 1859 + head->features |= NVME_NS_EXT_LBAS; 1869 1860 else 1870 - ns->features |= NVME_NS_METADATA_SUPPORTED; 1861 + head->features |= NVME_NS_METADATA_SUPPORTED; 1871 1862 } 1872 1863 return 0; 1873 1864 } ··· 1890 1881 blk_queue_write_cache(q, vwc, vwc); 1891 1882 } 1892 1883 1893 - static void nvme_update_disk_info(struct gendisk *disk, 1894 - struct nvme_ns *ns, struct nvme_id_ns *id) 1884 + static void nvme_update_disk_info(struct nvme_ctrl *ctrl, struct gendisk *disk, 1885 + struct nvme_ns_head *head, struct nvme_id_ns *id) 1895 1886 { 1896 - sector_t capacity = nvme_lba_to_sect(ns, le64_to_cpu(id->nsze)); 1897 - u32 bs = 1U << ns->lba_shift; 1887 + sector_t capacity = nvme_lba_to_sect(head, le64_to_cpu(id->nsze)); 1888 + u32 bs = 1U << head->lba_shift; 1898 1889 u32 atomic_bs, phys_bs, io_opt = 0; 1899 1890 1900 1891 /* 1901 1892 * The block layer can't support LBA sizes larger than the page size 1902 1893 * yet, so catch this early and don't allow block I/O. 1903 1894 */ 1904 - if (ns->lba_shift > PAGE_SHIFT) { 1895 + if (head->lba_shift > PAGE_SHIFT) { 1905 1896 capacity = 0; 1906 1897 bs = (1 << 9); 1907 1898 } ··· 1918 1909 if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf) 1919 1910 atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs; 1920 1911 else 1921 - atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs; 1912 + atomic_bs = (1 + ctrl->subsys->awupf) * bs; 1922 1913 } 1923 1914 1924 1915 if (id->nsfeat & NVME_NS_FEAT_IO_OPT) { ··· 1944 1935 * I/O to namespaces with metadata except when the namespace supports 1945 1936 * PI, as it can strip/insert in that case. 1946 1937 */ 1947 - if (ns->ms) { 1938 + if (head->ms) { 1948 1939 if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && 1949 - (ns->features & NVME_NS_METADATA_SUPPORTED)) 1950 - nvme_init_integrity(disk, ns, 1951 - ns->ctrl->max_integrity_segments); 1952 - else if (!nvme_ns_has_pi(ns)) 1940 + (head->features & NVME_NS_METADATA_SUPPORTED)) 1941 + nvme_init_integrity(disk, head, 1942 + ctrl->max_integrity_segments); 1943 + else if (!nvme_ns_has_pi(head)) 1953 1944 capacity = 0; 1954 1945 } 1955 1946 1956 1947 set_capacity_and_notify(disk, capacity); 1957 1948 1958 - nvme_config_discard(disk, ns); 1949 + nvme_config_discard(ctrl, disk, head); 1959 1950 blk_queue_max_write_zeroes_sectors(disk->queue, 1960 - ns->ctrl->max_zeroes_sectors); 1951 + ctrl->max_zeroes_sectors); 1961 1952 } 1962 1953 1963 1954 static bool nvme_ns_is_readonly(struct nvme_ns *ns, struct nvme_ns_info *info) ··· 1980 1971 is_power_of_2(ctrl->max_hw_sectors)) 1981 1972 iob = ctrl->max_hw_sectors; 1982 1973 else 1983 - iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob)); 1974 + iob = nvme_lba_to_sect(ns->head, le16_to_cpu(id->noiob)); 1984 1975 1985 1976 if (!iob) 1986 1977 return; ··· 2040 2031 2041 2032 blk_mq_freeze_queue(ns->disk->queue); 2042 2033 lbaf = nvme_lbaf_index(id->flbas); 2043 - ns->lba_shift = id->lbaf[lbaf].ds; 2034 + ns->head->lba_shift = id->lbaf[lbaf].ds; 2035 + ns->head->nuse = le64_to_cpu(id->nuse); 2044 2036 nvme_set_queue_limits(ns->ctrl, ns->queue); 2045 2037 2046 - ret = nvme_configure_metadata(ns, id); 2038 + ret = nvme_configure_metadata(ns->ctrl, ns->head, id); 2047 2039 if (ret < 0) { 2048 2040 blk_mq_unfreeze_queue(ns->disk->queue); 2049 2041 goto out; 2050 2042 } 2051 2043 nvme_set_chunk_sectors(ns, id); 2052 - nvme_update_disk_info(ns->disk, ns, id); 2044 + nvme_update_disk_info(ns->ctrl, ns->disk, ns->head, id); 2053 2045 2054 2046 if (ns->head->ids.csi == NVME_CSI_ZNS) { 2055 2047 ret = nvme_update_zone_info(ns, lbaf); ··· 2067 2057 * do not return zeroes. 2068 2058 */ 2069 2059 if ((id->dlfeat & 0x7) == 0x1 && (id->dlfeat & (1 << 3))) 2070 - ns->features |= NVME_NS_DEAC; 2060 + ns->head->features |= NVME_NS_DEAC; 2071 2061 set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info)); 2072 2062 set_bit(NVME_NS_READY, &ns->flags); 2073 2063 blk_mq_unfreeze_queue(ns->disk->queue); ··· 2080 2070 2081 2071 if (nvme_ns_head_multipath(ns->head)) { 2082 2072 blk_mq_freeze_queue(ns->head->disk->queue); 2083 - nvme_update_disk_info(ns->head->disk, ns, id); 2073 + nvme_update_disk_info(ns->ctrl, ns->head->disk, ns->head, id); 2084 2074 set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info)); 2085 2075 nvme_mpath_revalidate_paths(ns); 2086 2076 blk_stack_limits(&ns->head->disk->queue->limits, ··· 3013 3003 return 0; 3014 3004 } 3015 3005 3006 + static int nvme_check_ctrl_fabric_info(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) 3007 + { 3008 + /* 3009 + * In fabrics we need to verify the cntlid matches the 3010 + * admin connect 3011 + */ 3012 + if (ctrl->cntlid != le16_to_cpu(id->cntlid)) { 3013 + dev_err(ctrl->device, 3014 + "Mismatching cntlid: Connect %u vs Identify %u, rejecting\n", 3015 + ctrl->cntlid, le16_to_cpu(id->cntlid)); 3016 + return -EINVAL; 3017 + } 3018 + 3019 + if (!nvme_discovery_ctrl(ctrl) && !ctrl->kas) { 3020 + dev_err(ctrl->device, 3021 + "keep-alive support is mandatory for fabrics\n"); 3022 + return -EINVAL; 3023 + } 3024 + 3025 + if (!nvme_discovery_ctrl(ctrl) && ctrl->ioccsz < 4) { 3026 + dev_err(ctrl->device, 3027 + "I/O queue command capsule supported size %d < 4\n", 3028 + ctrl->ioccsz); 3029 + return -EINVAL; 3030 + } 3031 + 3032 + if (!nvme_discovery_ctrl(ctrl) && ctrl->iorcsz < 1) { 3033 + dev_err(ctrl->device, 3034 + "I/O queue response capsule supported size %d < 1\n", 3035 + ctrl->iorcsz); 3036 + return -EINVAL; 3037 + } 3038 + 3039 + return 0; 3040 + } 3041 + 3016 3042 static int nvme_init_identify(struct nvme_ctrl *ctrl) 3017 3043 { 3018 3044 struct nvme_id_ctrl *id; ··· 3161 3115 ctrl->iorcsz = le32_to_cpu(id->iorcsz); 3162 3116 ctrl->maxcmd = le16_to_cpu(id->maxcmd); 3163 3117 3164 - /* 3165 - * In fabrics we need to verify the cntlid matches the 3166 - * admin connect 3167 - */ 3168 - if (ctrl->cntlid != le16_to_cpu(id->cntlid)) { 3169 - dev_err(ctrl->device, 3170 - "Mismatching cntlid: Connect %u vs Identify " 3171 - "%u, rejecting\n", 3172 - ctrl->cntlid, le16_to_cpu(id->cntlid)); 3173 - ret = -EINVAL; 3118 + ret = nvme_check_ctrl_fabric_info(ctrl, id); 3119 + if (ret) 3174 3120 goto out_free; 3175 - } 3176 - 3177 - if (!nvme_discovery_ctrl(ctrl) && !ctrl->kas) { 3178 - dev_err(ctrl->device, 3179 - "keep-alive support is mandatory for fabrics\n"); 3180 - ret = -EINVAL; 3181 - goto out_free; 3182 - } 3183 3121 } else { 3184 3122 ctrl->hmpre = le32_to_cpu(id->hmpre); 3185 3123 ctrl->hmmin = le32_to_cpu(id->hmmin); ··· 3422 3392 head->ns_id = info->nsid; 3423 3393 head->ids = info->ids; 3424 3394 head->shared = info->is_shared; 3395 + ratelimit_state_init(&head->rs_nuse, 5 * HZ, 1); 3396 + ratelimit_set_flags(&head->rs_nuse, RATELIMIT_MSG_ON_RELEASE); 3425 3397 kref_init(&head->ref); 3426 3398 3427 3399 if (head->ids.csi) { ··· 3675 3643 up_write(&ctrl->namespaces_rwsem); 3676 3644 nvme_get_ctrl(ctrl); 3677 3645 3678 - if (device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups)) 3646 + if (device_add_disk(ctrl->device, ns->disk, nvme_ns_attr_groups)) 3679 3647 goto out_cleanup_ns_from_list; 3680 3648 3681 3649 if (!nvme_ns_head_multipath(ns->head))
+2 -4
drivers/nvme/host/fc.c
··· 3509 3509 3510 3510 ctrl->ctrl.opts = opts; 3511 3511 ctrl->ctrl.nr_reconnects = 0; 3512 - if (lport->dev) 3513 - ctrl->ctrl.numa_node = dev_to_node(lport->dev); 3514 - else 3515 - ctrl->ctrl.numa_node = NUMA_NO_NODE; 3516 3512 INIT_LIST_HEAD(&ctrl->ctrl_list); 3517 3513 ctrl->lport = lport; 3518 3514 ctrl->rport = rport; ··· 3553 3557 ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_fc_ctrl_ops, 0); 3554 3558 if (ret) 3555 3559 goto out_free_queues; 3560 + if (lport->dev) 3561 + ctrl->ctrl.numa_node = dev_to_node(lport->dev); 3556 3562 3557 3563 /* at this point, teardown path changes to ref counting on nvme ctrl */ 3558 3564
+4 -4
drivers/nvme/host/ioctl.c
··· 224 224 return -EINVAL; 225 225 } 226 226 227 - length = (io.nblocks + 1) << ns->lba_shift; 227 + length = (io.nblocks + 1) << ns->head->lba_shift; 228 228 229 229 if ((io.control & NVME_RW_PRINFO_PRACT) && 230 - ns->ms == sizeof(struct t10_pi_tuple)) { 230 + ns->head->ms == sizeof(struct t10_pi_tuple)) { 231 231 /* 232 232 * Protection information is stripped/inserted by the 233 233 * controller. ··· 237 237 meta_len = 0; 238 238 metadata = NULL; 239 239 } else { 240 - meta_len = (io.nblocks + 1) * ns->ms; 240 + meta_len = (io.nblocks + 1) * ns->head->ms; 241 241 metadata = nvme_to_user_ptr(io.metadata); 242 242 } 243 243 244 - if (ns->features & NVME_NS_EXT_LBAS) { 244 + if (ns->head->features & NVME_NS_EXT_LBAS) { 245 245 length += meta_len; 246 246 meta_len = 0; 247 247 } else if (meta_len) {
+1 -1
drivers/nvme/host/multipath.c
··· 579 579 */ 580 580 if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) { 581 581 rc = device_add_disk(&head->subsys->dev, head->disk, 582 - nvme_ns_id_attr_groups); 582 + nvme_ns_attr_groups); 583 583 if (rc) { 584 584 clear_bit(NVME_NSHEAD_DISK_LIVE, &ns->flags); 585 585 return;
+25 -19
drivers/nvme/host/nvme.h
··· 16 16 #include <linux/rcupdate.h> 17 17 #include <linux/wait.h> 18 18 #include <linux/t10-pi.h> 19 + #include <linux/ratelimit_types.h> 19 20 20 21 #include <trace/events/block.h> 21 22 ··· 440 439 struct list_head list; 441 440 struct srcu_struct srcu; 442 441 struct nvme_subsystem *subsys; 443 - unsigned ns_id; 444 442 struct nvme_ns_ids ids; 445 443 struct list_head entry; 446 444 struct kref ref; 447 445 bool shared; 448 446 int instance; 449 447 struct nvme_effects_log *effects; 448 + u64 nuse; 449 + unsigned ns_id; 450 + int lba_shift; 451 + u16 ms; 452 + u16 pi_size; 453 + u8 pi_type; 454 + u8 guard_type; 455 + u16 sgs; 456 + u32 sws; 457 + #ifdef CONFIG_BLK_DEV_ZONED 458 + u64 zsze; 459 + #endif 460 + unsigned long features; 461 + 462 + struct ratelimit_state rs_nuse; 450 463 451 464 struct cdev cdev; 452 465 struct device cdev_device; ··· 502 487 struct kref kref; 503 488 struct nvme_ns_head *head; 504 489 505 - int lba_shift; 506 - u16 ms; 507 - u16 pi_size; 508 - u16 sgs; 509 - u32 sws; 510 - u8 pi_type; 511 - u8 guard_type; 512 - #ifdef CONFIG_BLK_DEV_ZONED 513 - u64 zsze; 514 - #endif 515 - unsigned long features; 516 490 unsigned long flags; 517 491 #define NVME_NS_REMOVING 0 518 492 #define NVME_NS_ANA_PENDING 2 ··· 516 512 }; 517 513 518 514 /* NVMe ns supports metadata actions by the controller (generate/strip) */ 519 - static inline bool nvme_ns_has_pi(struct nvme_ns *ns) 515 + static inline bool nvme_ns_has_pi(struct nvme_ns_head *head) 520 516 { 521 - return ns->pi_type && ns->ms == ns->pi_size; 517 + return head->pi_type && head->ms == head->pi_size; 522 518 } 523 519 524 520 struct nvme_ctrl_ops { ··· 650 646 /* 651 647 * Convert a 512B sector number to a device logical block number. 652 648 */ 653 - static inline u64 nvme_sect_to_lba(struct nvme_ns *ns, sector_t sector) 649 + static inline u64 nvme_sect_to_lba(struct nvme_ns_head *head, sector_t sector) 654 650 { 655 - return sector >> (ns->lba_shift - SECTOR_SHIFT); 651 + return sector >> (head->lba_shift - SECTOR_SHIFT); 656 652 } 657 653 658 654 /* 659 655 * Convert a device logical block number to a 512B sector number. 660 656 */ 661 - static inline sector_t nvme_lba_to_sect(struct nvme_ns *ns, u64 lba) 657 + static inline sector_t nvme_lba_to_sect(struct nvme_ns_head *head, u64 lba) 662 658 { 663 - return lba << (ns->lba_shift - SECTOR_SHIFT); 659 + return lba << (head->lba_shift - SECTOR_SHIFT); 664 660 } 665 661 666 662 /* ··· 866 862 unsigned int issue_flags); 867 863 int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd, 868 864 unsigned int issue_flags); 865 + int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid, 866 + struct nvme_id_ns **id); 869 867 int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo); 870 868 int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags); 871 869 872 - extern const struct attribute_group *nvme_ns_id_attr_groups[]; 870 + extern const struct attribute_group *nvme_ns_attr_groups[]; 873 871 extern const struct pr_ops nvme_pr_ops; 874 872 extern const struct block_device_operations nvme_ns_head_ops; 875 873 extern const struct attribute_group nvme_dev_attrs_group;
+2 -2
drivers/nvme/host/rdma.c
··· 1418 1418 goto mr_put; 1419 1419 1420 1420 nvme_rdma_set_sig_attrs(blk_get_integrity(bio->bi_bdev->bd_disk), c, 1421 - req->mr->sig_attrs, ns->pi_type); 1421 + req->mr->sig_attrs, ns->head->pi_type); 1422 1422 nvme_rdma_set_prot_checks(c, &req->mr->sig_attrs->check_mask); 1423 1423 1424 1424 ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey)); ··· 2012 2012 queue->pi_support && 2013 2013 (c->common.opcode == nvme_cmd_write || 2014 2014 c->common.opcode == nvme_cmd_read) && 2015 - nvme_ns_has_pi(ns)) 2015 + nvme_ns_has_pi(ns->head)) 2016 2016 req->use_sig_mr = true; 2017 2017 else 2018 2018 req->use_sig_mr = false;
+92 -7
drivers/nvme/host/sysfs.c
··· 114 114 } 115 115 static DEVICE_ATTR_RO(nsid); 116 116 117 - static struct attribute *nvme_ns_id_attrs[] = { 117 + static ssize_t csi_show(struct device *dev, struct device_attribute *attr, 118 + char *buf) 119 + { 120 + return sysfs_emit(buf, "%u\n", dev_to_ns_head(dev)->ids.csi); 121 + } 122 + static DEVICE_ATTR_RO(csi); 123 + 124 + static ssize_t metadata_bytes_show(struct device *dev, 125 + struct device_attribute *attr, char *buf) 126 + { 127 + return sysfs_emit(buf, "%u\n", dev_to_ns_head(dev)->ms); 128 + } 129 + static DEVICE_ATTR_RO(metadata_bytes); 130 + 131 + static int ns_head_update_nuse(struct nvme_ns_head *head) 132 + { 133 + struct nvme_id_ns *id; 134 + struct nvme_ns *ns; 135 + int srcu_idx, ret = -EWOULDBLOCK; 136 + 137 + /* Avoid issuing commands too often by rate limiting the update */ 138 + if (!__ratelimit(&head->rs_nuse)) 139 + return 0; 140 + 141 + srcu_idx = srcu_read_lock(&head->srcu); 142 + ns = nvme_find_path(head); 143 + if (!ns) 144 + goto out_unlock; 145 + 146 + ret = nvme_identify_ns(ns->ctrl, head->ns_id, &id); 147 + if (ret) 148 + goto out_unlock; 149 + 150 + head->nuse = le64_to_cpu(id->nuse); 151 + kfree(id); 152 + 153 + out_unlock: 154 + srcu_read_unlock(&head->srcu, srcu_idx); 155 + return ret; 156 + } 157 + 158 + static int ns_update_nuse(struct nvme_ns *ns) 159 + { 160 + struct nvme_id_ns *id; 161 + int ret; 162 + 163 + /* Avoid issuing commands too often by rate limiting the update. */ 164 + if (!__ratelimit(&ns->head->rs_nuse)) 165 + return 0; 166 + 167 + ret = nvme_identify_ns(ns->ctrl, ns->head->ns_id, &id); 168 + if (ret) 169 + goto out_free_id; 170 + 171 + ns->head->nuse = le64_to_cpu(id->nuse); 172 + 173 + out_free_id: 174 + kfree(id); 175 + 176 + return ret; 177 + } 178 + 179 + static ssize_t nuse_show(struct device *dev, struct device_attribute *attr, 180 + char *buf) 181 + { 182 + struct nvme_ns_head *head = dev_to_ns_head(dev); 183 + struct gendisk *disk = dev_to_disk(dev); 184 + struct block_device *bdev = disk->part0; 185 + int ret; 186 + 187 + if (IS_ENABLED(CONFIG_NVME_MULTIPATH) && 188 + bdev->bd_disk->fops == &nvme_ns_head_ops) 189 + ret = ns_head_update_nuse(head); 190 + else 191 + ret = ns_update_nuse(bdev->bd_disk->private_data); 192 + if (ret) 193 + return ret; 194 + 195 + return sysfs_emit(buf, "%llu\n", head->nuse); 196 + } 197 + static DEVICE_ATTR_RO(nuse); 198 + 199 + static struct attribute *nvme_ns_attrs[] = { 118 200 &dev_attr_wwid.attr, 119 201 &dev_attr_uuid.attr, 120 202 &dev_attr_nguid.attr, 121 203 &dev_attr_eui.attr, 204 + &dev_attr_csi.attr, 122 205 &dev_attr_nsid.attr, 206 + &dev_attr_metadata_bytes.attr, 207 + &dev_attr_nuse.attr, 123 208 #ifdef CONFIG_NVME_MULTIPATH 124 209 &dev_attr_ana_grpid.attr, 125 210 &dev_attr_ana_state.attr, ··· 212 127 NULL, 213 128 }; 214 129 215 - static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj, 130 + static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj, 216 131 struct attribute *a, int n) 217 132 { 218 133 struct device *dev = container_of(kobj, struct device, kobj); ··· 242 157 return a->mode; 243 158 } 244 159 245 - static const struct attribute_group nvme_ns_id_attr_group = { 246 - .attrs = nvme_ns_id_attrs, 247 - .is_visible = nvme_ns_id_attrs_are_visible, 160 + static const struct attribute_group nvme_ns_attr_group = { 161 + .attrs = nvme_ns_attrs, 162 + .is_visible = nvme_ns_attrs_are_visible, 248 163 }; 249 164 250 - const struct attribute_group *nvme_ns_id_attr_groups[] = { 251 - &nvme_ns_id_attr_group, 165 + const struct attribute_group *nvme_ns_attr_groups[] = { 166 + &nvme_ns_attr_group, 252 167 NULL, 253 168 }; 254 169
+19 -16
drivers/nvme/host/zns.c
··· 11 11 { 12 12 struct request_queue *q = ns->queue; 13 13 14 - blk_queue_chunk_sectors(q, ns->zsze); 14 + blk_queue_chunk_sectors(q, ns->head->zsze); 15 15 blk_queue_max_zone_append_sectors(q, ns->ctrl->max_zone_append); 16 16 17 17 return blk_revalidate_disk_zones(ns->disk, NULL); ··· 99 99 goto free_data; 100 100 } 101 101 102 - ns->zsze = nvme_lba_to_sect(ns, le64_to_cpu(id->lbafe[lbaf].zsze)); 103 - if (!is_power_of_2(ns->zsze)) { 102 + ns->head->zsze = 103 + nvme_lba_to_sect(ns->head, le64_to_cpu(id->lbafe[lbaf].zsze)); 104 + if (!is_power_of_2(ns->head->zsze)) { 104 105 dev_warn(ns->ctrl->device, 105 106 "invalid zone size:%llu for namespace:%u\n", 106 - ns->zsze, ns->head->ns_id); 107 + ns->head->zsze, ns->head->ns_id); 107 108 status = -ENODEV; 108 109 goto free_data; 109 110 } ··· 129 128 sizeof(struct nvme_zone_descriptor); 130 129 131 130 nr_zones = min_t(unsigned int, nr_zones, 132 - get_capacity(ns->disk) >> ilog2(ns->zsze)); 131 + get_capacity(ns->disk) >> ilog2(ns->head->zsze)); 133 132 134 133 bufsize = sizeof(struct nvme_zone_report) + 135 134 nr_zones * sizeof(struct nvme_zone_descriptor); ··· 148 147 return NULL; 149 148 } 150 149 151 - static int nvme_zone_parse_entry(struct nvme_ns *ns, 150 + static int nvme_zone_parse_entry(struct nvme_ctrl *ctrl, 151 + struct nvme_ns_head *head, 152 152 struct nvme_zone_descriptor *entry, 153 153 unsigned int idx, report_zones_cb cb, 154 154 void *data) ··· 157 155 struct blk_zone zone = { }; 158 156 159 157 if ((entry->zt & 0xf) != NVME_ZONE_TYPE_SEQWRITE_REQ) { 160 - dev_err(ns->ctrl->device, "invalid zone type %#x\n", 158 + dev_err(ctrl->device, "invalid zone type %#x\n", 161 159 entry->zt); 162 160 return -EINVAL; 163 161 } 164 162 165 163 zone.type = BLK_ZONE_TYPE_SEQWRITE_REQ; 166 164 zone.cond = entry->zs >> 4; 167 - zone.len = ns->zsze; 168 - zone.capacity = nvme_lba_to_sect(ns, le64_to_cpu(entry->zcap)); 169 - zone.start = nvme_lba_to_sect(ns, le64_to_cpu(entry->zslba)); 165 + zone.len = head->zsze; 166 + zone.capacity = nvme_lba_to_sect(head, le64_to_cpu(entry->zcap)); 167 + zone.start = nvme_lba_to_sect(head, le64_to_cpu(entry->zslba)); 170 168 if (zone.cond == BLK_ZONE_COND_FULL) 171 169 zone.wp = zone.start + zone.len; 172 170 else 173 - zone.wp = nvme_lba_to_sect(ns, le64_to_cpu(entry->wp)); 171 + zone.wp = nvme_lba_to_sect(head, le64_to_cpu(entry->wp)); 174 172 175 173 return cb(&zone, idx, data); 176 174 } ··· 198 196 c.zmr.zrasf = NVME_ZRASF_ZONE_REPORT_ALL; 199 197 c.zmr.pr = NVME_REPORT_ZONE_PARTIAL; 200 198 201 - sector &= ~(ns->zsze - 1); 199 + sector &= ~(ns->head->zsze - 1); 202 200 while (zone_idx < nr_zones && sector < get_capacity(ns->disk)) { 203 201 memset(report, 0, buflen); 204 202 205 - c.zmr.slba = cpu_to_le64(nvme_sect_to_lba(ns, sector)); 203 + c.zmr.slba = cpu_to_le64(nvme_sect_to_lba(ns->head, sector)); 206 204 ret = nvme_submit_sync_cmd(ns->queue, &c, report, buflen); 207 205 if (ret) { 208 206 if (ret > 0) ··· 215 213 break; 216 214 217 215 for (i = 0; i < nz && zone_idx < nr_zones; i++) { 218 - ret = nvme_zone_parse_entry(ns, &report->entries[i], 216 + ret = nvme_zone_parse_entry(ns->ctrl, ns->head, 217 + &report->entries[i], 219 218 zone_idx, cb, data); 220 219 if (ret) 221 220 goto out_free; 222 221 zone_idx++; 223 222 } 224 223 225 - sector += ns->zsze * nz; 224 + sector += ns->head->zsze * nz; 226 225 } 227 226 228 227 if (zone_idx > 0) ··· 242 239 243 240 c->zms.opcode = nvme_cmd_zone_mgmt_send; 244 241 c->zms.nsid = cpu_to_le32(ns->head->ns_id); 245 - c->zms.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req))); 242 + c->zms.slba = cpu_to_le64(nvme_sect_to_lba(ns->head, blk_rq_pos(req))); 246 243 c->zms.zsa = action; 247 244 248 245 if (req_op(req) == REQ_OP_ZONE_RESET_ALL)
+2 -2
drivers/nvme/target/configfs.c
··· 1274 1274 return -EINVAL; 1275 1275 1276 1276 down_write(&nvmet_config_sem); 1277 - if (cntlid_min >= to_subsys(item)->cntlid_max) 1277 + if (cntlid_min > to_subsys(item)->cntlid_max) 1278 1278 goto out_unlock; 1279 1279 to_subsys(item)->cntlid_min = cntlid_min; 1280 1280 up_write(&nvmet_config_sem); ··· 1304 1304 return -EINVAL; 1305 1305 1306 1306 down_write(&nvmet_config_sem); 1307 - if (cntlid_max <= to_subsys(item)->cntlid_min) 1307 + if (cntlid_max < to_subsys(item)->cntlid_min) 1308 1308 goto out_unlock; 1309 1309 to_subsys(item)->cntlid_max = cntlid_max; 1310 1310 up_write(&nvmet_config_sem);
-3
drivers/nvme/target/core.c
··· 1425 1425 if (!ctrl->sqs) 1426 1426 goto out_free_changed_ns_list; 1427 1427 1428 - if (subsys->cntlid_min > subsys->cntlid_max) 1429 - goto out_free_sqs; 1430 - 1431 1428 ret = ida_alloc_range(&cntlid_ida, 1432 1429 subsys->cntlid_min, subsys->cntlid_max, 1433 1430 GFP_KERNEL);
+2 -2
drivers/nvme/target/passthru.c
··· 602 602 goto out_put_file; 603 603 } 604 604 605 - old = xa_cmpxchg(&passthru_subsystems, ctrl->cntlid, NULL, 605 + old = xa_cmpxchg(&passthru_subsystems, ctrl->instance, NULL, 606 606 subsys, GFP_KERNEL); 607 607 if (xa_is_err(old)) { 608 608 ret = xa_err(old); ··· 635 635 static void __nvmet_passthru_ctrl_disable(struct nvmet_subsys *subsys) 636 636 { 637 637 if (subsys->passthru_ctrl) { 638 - xa_erase(&passthru_subsystems, subsys->passthru_ctrl->cntlid); 638 + xa_erase(&passthru_subsystems, subsys->passthru_ctrl->instance); 639 639 module_put(subsys->passthru_ctrl->ops->module); 640 640 nvme_put_ctrl(subsys->passthru_ctrl); 641 641 }