Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'nvme-7.0-2026-03-04' of git://git.infradead.org/nvme into block-7.0

Pull NVMe fixes from Keith:

"- Improve quirk visibility and configurability (Maurizio)
- Fix runtime user modification to queue setup (Keith)
- Fix multipath leak on try_module_get failure (Keith)
- Ignore ambiguous spec definitions for better atomics support (John)
- Fix admin queue leak on controller reset (Ming)
- Fix large allocation in persistent reservation read keys (Sungwoo Kim)
- Fix fcloop callback handling (Justin)
- Securely free DHCHAP secrets (Daniel)
- Various cleanups and typo fixes (John, Wilfred)"

* tag 'nvme-7.0-2026-03-04' of git://git.infradead.org/nvme:
nvme: fix memory allocation in nvme_pr_read_keys()
nvme-multipath: fix leak on try_module_get failure
nvmet-fcloop: Check remoteport port_state before calling done callback
nvme-pci: do not try to add queue maps at runtime
nvme-pci: cap queue creation to used queues
nvme-pci: ensure we're polling a polled queue
nvme: fix memory leak in quirks_param_set()
nvme: correct comment about nvme_ns_remove()
nvme: stop setting namespace gendisk device driver data
nvme: add support for dynamic quirk configuration via module parameter
nvme: fix admin queue leak on controller reset
nvme-fabrics: use kfree_sensitive() for DHCHAP secrets
nvme: stop using AWUPF
nvme: expose active quirks in sysfs
nvme/host: fixup some typos

+312 -37
+13
Documentation/admin-guide/kernel-parameters.txt
··· 74 74 TPM TPM drivers are enabled. 75 75 UMS USB Mass Storage support is enabled. 76 76 USB USB support is enabled. 77 + NVME NVMe support is enabled 77 78 USBHID USB Human Interface Device support is enabled. 78 79 V4L Video For Linux support is enabled. 79 80 VGA The VGA console has been enabled. ··· 4787 4786 'node', 'default' can be specified 4788 4787 This can be set from sysctl after boot. 4789 4788 See Documentation/admin-guide/sysctl/vm.rst for details. 4789 + 4790 + nvme.quirks= [NVME] A list of quirk entries to augment the built-in 4791 + nvme quirk list. List entries are separated by a 4792 + '-' character. 4793 + Each entry has the form VendorID:ProductID:quirk_names. 4794 + The IDs are 4-digits hex numbers and quirk_names is a 4795 + list of quirk names separated by commas. A quirk name 4796 + can be prefixed by '^', meaning that the specified 4797 + quirk must be disabled. 4798 + 4799 + Example: 4800 + nvme.quirks=7710:2267:bogus_nid,^identify_cns-9900:7711:broken_msi 4790 4801 4791 4802 ohci1394_dma=early [HW,EARLY] enable debugging via the ohci1394 driver. 4792 4803 See Documentation/core-api/debugging-via-ohci1394.rst for more
+12 -16
drivers/nvme/host/core.c
··· 2046 2046 if (id->nabspf) 2047 2047 boundary = (le16_to_cpu(id->nabspf) + 1) * bs; 2048 2048 } else { 2049 - /* 2050 - * Use the controller wide atomic write unit. This sucks 2051 - * because the limit is defined in terms of logical blocks while 2052 - * namespaces can have different formats, and because there is 2053 - * no clear language in the specification prohibiting different 2054 - * values for different controllers in the subsystem. 2055 - */ 2056 - atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs; 2049 + if (ns->ctrl->awupf) 2050 + dev_info_once(ns->ctrl->device, 2051 + "AWUPF ignored, only NAWUPF accepted\n"); 2052 + atomic_bs = bs; 2057 2053 } 2058 2054 2059 2055 lim->atomic_write_hw_max = atomic_bs; ··· 3218 3222 memcpy(subsys->model, id->mn, sizeof(subsys->model)); 3219 3223 subsys->vendor_id = le16_to_cpu(id->vid); 3220 3224 subsys->cmic = id->cmic; 3221 - subsys->awupf = le16_to_cpu(id->awupf); 3222 3225 3223 3226 /* Versions prior to 1.4 don't necessarily report a valid type */ 3224 3227 if (id->cntrltype == NVME_CTRL_DISC || ··· 3650 3655 dev_pm_qos_expose_latency_tolerance(ctrl->device); 3651 3656 else if (!ctrl->apst_enabled && prev_apst_enabled) 3652 3657 dev_pm_qos_hide_latency_tolerance(ctrl->device); 3658 + ctrl->awupf = le16_to_cpu(id->awupf); 3653 3659 out_free: 3654 3660 kfree(id); 3655 3661 return ret; ··· 4181 4185 4182 4186 nvme_mpath_add_disk(ns, info->anagrpid); 4183 4187 nvme_fault_inject_init(&ns->fault_inject, ns->disk->disk_name); 4184 - 4185 - /* 4186 - * Set ns->disk->device->driver_data to ns so we can access 4187 - * ns->head->passthru_err_log_enabled in 4188 - * nvme_io_passthru_err_log_enabled_[store | show](). 4189 - */ 4190 - dev_set_drvdata(disk_to_dev(ns->disk), ns); 4191 4188 4192 4189 return; 4193 4190 ··· 4853 4864 ret = blk_mq_alloc_tag_set(set); 4854 4865 if (ret) 4855 4866 return ret; 4867 + 4868 + /* 4869 + * If a previous admin queue exists (e.g., from before a reset), 4870 + * put it now before allocating a new one to avoid orphaning it. 4871 + */ 4872 + if (ctrl->admin_q) 4873 + blk_put_queue(ctrl->admin_q); 4856 4874 4857 4875 ctrl->admin_q = blk_mq_alloc_queue(set, &lim, NULL); 4858 4876 if (IS_ERR(ctrl->admin_q)) {
+2 -2
drivers/nvme/host/fabrics.c
··· 1290 1290 kfree(opts->subsysnqn); 1291 1291 kfree(opts->host_traddr); 1292 1292 kfree(opts->host_iface); 1293 - kfree(opts->dhchap_secret); 1294 - kfree(opts->dhchap_ctrl_secret); 1293 + kfree_sensitive(opts->dhchap_secret); 1294 + kfree_sensitive(opts->dhchap_ctrl_secret); 1295 1295 kfree(opts); 1296 1296 } 1297 1297 EXPORT_SYMBOL_GPL(nvmf_free_options);
+6 -8
drivers/nvme/host/multipath.c
··· 1300 1300 mutex_lock(&head->subsys->lock); 1301 1301 /* 1302 1302 * We are called when all paths have been removed, and at that point 1303 - * head->list is expected to be empty. However, nvme_remove_ns() and 1303 + * head->list is expected to be empty. However, nvme_ns_remove() and 1304 1304 * nvme_init_ns_head() can run concurrently and so if head->delayed_ 1305 1305 * removal_secs is configured, it is possible that by the time we reach 1306 1306 * this point, head->list may no longer be empty. Therefore, we recheck ··· 1310 1310 if (!list_empty(&head->list)) 1311 1311 goto out; 1312 1312 1313 - if (head->delayed_removal_secs) { 1314 - /* 1315 - * Ensure that no one could remove this module while the head 1316 - * remove work is pending. 1317 - */ 1318 - if (!try_module_get(THIS_MODULE)) 1319 - goto out; 1313 + /* 1314 + * Ensure that no one could remove this module while the head 1315 + * remove work is pending. 1316 + */ 1317 + if (head->delayed_removal_secs && try_module_get(THIS_MODULE)) { 1320 1318 mod_delayed_work(nvme_wq, &head->remove_work, 1321 1319 head->delayed_removal_secs * HZ); 1322 1320 } else {
+56 -1
drivers/nvme/host/nvme.h
··· 180 180 NVME_QUIRK_DMAPOOL_ALIGN_512 = (1 << 22), 181 181 }; 182 182 183 + static inline char *nvme_quirk_name(enum nvme_quirks q) 184 + { 185 + switch (q) { 186 + case NVME_QUIRK_STRIPE_SIZE: 187 + return "stripe_size"; 188 + case NVME_QUIRK_IDENTIFY_CNS: 189 + return "identify_cns"; 190 + case NVME_QUIRK_DEALLOCATE_ZEROES: 191 + return "deallocate_zeroes"; 192 + case NVME_QUIRK_DELAY_BEFORE_CHK_RDY: 193 + return "delay_before_chk_rdy"; 194 + case NVME_QUIRK_NO_APST: 195 + return "no_apst"; 196 + case NVME_QUIRK_NO_DEEPEST_PS: 197 + return "no_deepest_ps"; 198 + case NVME_QUIRK_QDEPTH_ONE: 199 + return "qdepth_one"; 200 + case NVME_QUIRK_MEDIUM_PRIO_SQ: 201 + return "medium_prio_sq"; 202 + case NVME_QUIRK_IGNORE_DEV_SUBNQN: 203 + return "ignore_dev_subnqn"; 204 + case NVME_QUIRK_DISABLE_WRITE_ZEROES: 205 + return "disable_write_zeroes"; 206 + case NVME_QUIRK_SIMPLE_SUSPEND: 207 + return "simple_suspend"; 208 + case NVME_QUIRK_SINGLE_VECTOR: 209 + return "single_vector"; 210 + case NVME_QUIRK_128_BYTES_SQES: 211 + return "128_bytes_sqes"; 212 + case NVME_QUIRK_SHARED_TAGS: 213 + return "shared_tags"; 214 + case NVME_QUIRK_NO_TEMP_THRESH_CHANGE: 215 + return "no_temp_thresh_change"; 216 + case NVME_QUIRK_NO_NS_DESC_LIST: 217 + return "no_ns_desc_list"; 218 + case NVME_QUIRK_DMA_ADDRESS_BITS_48: 219 + return "dma_address_bits_48"; 220 + case NVME_QUIRK_SKIP_CID_GEN: 221 + return "skip_cid_gen"; 222 + case NVME_QUIRK_BOGUS_NID: 223 + return "bogus_nid"; 224 + case NVME_QUIRK_NO_SECONDARY_TEMP_THRESH: 225 + return "no_secondary_temp_thresh"; 226 + case NVME_QUIRK_FORCE_NO_SIMPLE_SUSPEND: 227 + return "force_no_simple_suspend"; 228 + case NVME_QUIRK_BROKEN_MSI: 229 + return "broken_msi"; 230 + case NVME_QUIRK_DMAPOOL_ALIGN_512: 231 + return "dmapool_align_512"; 232 + } 233 + 234 + return "unknown"; 235 + } 236 + 183 237 /* 184 238 * Common request structure for NVMe passthrough. All drivers must have 185 239 * this structure as the first member of their request-private data. ··· 464 410 465 411 enum nvme_ctrl_type cntrltype; 466 412 enum nvme_dctype dctype; 413 + 414 + u16 awupf; /* 0's based value. */ 467 415 }; 468 416 469 417 static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl) ··· 498 442 u8 cmic; 499 443 enum nvme_subsys_type subtype; 500 444 u16 vendor_id; 501 - u16 awupf; /* 0's based value. */ 502 445 struct ida ns_ida; 503 446 #ifdef CONFIG_NVME_MULTIPATH 504 447 enum nvme_iopolicy iopolicy;
+184 -2
drivers/nvme/host/pci.c
··· 72 72 static_assert(MAX_PRP_RANGE / NVME_CTRL_PAGE_SIZE <= 73 73 (1 /* prp1 */ + NVME_MAX_NR_DESCRIPTORS * PRPS_PER_PAGE)); 74 74 75 + struct quirk_entry { 76 + u16 vendor_id; 77 + u16 dev_id; 78 + u32 enabled_quirks; 79 + u32 disabled_quirks; 80 + }; 81 + 75 82 static int use_threaded_interrupts; 76 83 module_param(use_threaded_interrupts, int, 0444); 77 84 ··· 108 101 static unsigned int io_queue_depth = 1024; 109 102 module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644); 110 103 MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2 and < 4096"); 104 + 105 + static struct quirk_entry *nvme_pci_quirk_list; 106 + static unsigned int nvme_pci_quirk_count; 107 + 108 + /* Helper to parse individual quirk names */ 109 + static int nvme_parse_quirk_names(char *quirk_str, struct quirk_entry *entry) 110 + { 111 + int i; 112 + size_t field_len; 113 + bool disabled, found; 114 + char *p = quirk_str, *field; 115 + 116 + while ((field = strsep(&p, ",")) && *field) { 117 + disabled = false; 118 + found = false; 119 + 120 + if (*field == '^') { 121 + /* Skip the '^' character */ 122 + disabled = true; 123 + field++; 124 + } 125 + 126 + field_len = strlen(field); 127 + for (i = 0; i < 32; i++) { 128 + unsigned int bit = 1U << i; 129 + char *q_name = nvme_quirk_name(bit); 130 + size_t q_len = strlen(q_name); 131 + 132 + if (!strcmp(q_name, "unknown")) 133 + break; 134 + 135 + if (!strcmp(q_name, field) && 136 + q_len == field_len) { 137 + if (disabled) 138 + entry->disabled_quirks |= bit; 139 + else 140 + entry->enabled_quirks |= bit; 141 + found = true; 142 + break; 143 + } 144 + } 145 + 146 + if (!found) { 147 + pr_err("nvme: unrecognized quirk %s\n", field); 148 + return -EINVAL; 149 + } 150 + } 151 + return 0; 152 + } 153 + 154 + /* Helper to parse a single VID:DID:quirk_names entry */ 155 + static int nvme_parse_quirk_entry(char *s, struct quirk_entry *entry) 156 + { 157 + char *field; 158 + 159 + field = strsep(&s, ":"); 160 + if (!field || kstrtou16(field, 16, &entry->vendor_id)) 161 + return -EINVAL; 162 + 163 + field = strsep(&s, ":"); 164 + if (!field || kstrtou16(field, 16, &entry->dev_id)) 165 + return -EINVAL; 166 + 167 + field = strsep(&s, ":"); 168 + if (!field) 169 + return -EINVAL; 170 + 171 + return nvme_parse_quirk_names(field, entry); 172 + } 173 + 174 + static int quirks_param_set(const char *value, const struct kernel_param *kp) 175 + { 176 + int count, err, i; 177 + struct quirk_entry *qlist; 178 + char *field, *val, *sep_ptr; 179 + 180 + err = param_set_copystring(value, kp); 181 + if (err) 182 + return err; 183 + 184 + val = kstrdup(value, GFP_KERNEL); 185 + if (!val) 186 + return -ENOMEM; 187 + 188 + if (!*val) 189 + goto out_free_val; 190 + 191 + count = 1; 192 + for (i = 0; val[i]; i++) { 193 + if (val[i] == '-') 194 + count++; 195 + } 196 + 197 + qlist = kcalloc(count, sizeof(*qlist), GFP_KERNEL); 198 + if (!qlist) { 199 + err = -ENOMEM; 200 + goto out_free_val; 201 + } 202 + 203 + i = 0; 204 + sep_ptr = val; 205 + while ((field = strsep(&sep_ptr, "-"))) { 206 + if (nvme_parse_quirk_entry(field, &qlist[i])) { 207 + pr_err("nvme: failed to parse quirk string %s\n", 208 + value); 209 + goto out_free_qlist; 210 + } 211 + 212 + i++; 213 + } 214 + 215 + kfree(nvme_pci_quirk_list); 216 + nvme_pci_quirk_count = count; 217 + nvme_pci_quirk_list = qlist; 218 + goto out_free_val; 219 + 220 + out_free_qlist: 221 + kfree(qlist); 222 + out_free_val: 223 + kfree(val); 224 + return err; 225 + } 226 + 227 + static char quirks_param[128]; 228 + static const struct kernel_param_ops quirks_param_ops = { 229 + .set = quirks_param_set, 230 + .get = param_get_string, 231 + }; 232 + 233 + static struct kparam_string quirks_param_string = { 234 + .maxlen = sizeof(quirks_param), 235 + .string = quirks_param, 236 + }; 237 + 238 + module_param_cb(quirks, &quirks_param_ops, &quirks_param_string, 0444); 239 + MODULE_PARM_DESC(quirks, "Enable/disable NVMe quirks by specifying " 240 + "quirks=VID:DID:quirk_names"); 111 241 112 242 static int io_queue_count_set(const char *val, const struct kernel_param *kp) 113 243 { ··· 1640 1496 struct nvme_queue *nvmeq = hctx->driver_data; 1641 1497 bool found; 1642 1498 1643 - if (!nvme_cqe_pending(nvmeq)) 1499 + if (!test_bit(NVMEQ_POLLED, &nvmeq->flags) || 1500 + !nvme_cqe_pending(nvmeq)) 1644 1501 return 0; 1645 1502 1646 1503 spin_lock(&nvmeq->cq_poll_lock); ··· 2919 2774 dev->nr_write_queues = write_queues; 2920 2775 dev->nr_poll_queues = poll_queues; 2921 2776 2922 - nr_io_queues = dev->nr_allocated_queues - 1; 2777 + if (dev->ctrl.tagset) { 2778 + /* 2779 + * The set's maps are allocated only once at initialization 2780 + * time. We can't add special queues later if their mq_map 2781 + * wasn't preallocated. 2782 + */ 2783 + if (dev->ctrl.tagset->nr_maps < 3) 2784 + dev->nr_poll_queues = 0; 2785 + if (dev->ctrl.tagset->nr_maps < 2) 2786 + dev->nr_write_queues = 0; 2787 + } 2788 + 2789 + /* 2790 + * The initial number of allocated queue slots may be too large if the 2791 + * user reduced the special queue parameters. Cap the value to the 2792 + * number we need for this round. 2793 + */ 2794 + nr_io_queues = min(nvme_max_io_queues(dev), 2795 + dev->nr_allocated_queues - 1); 2923 2796 result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues); 2924 2797 if (result < 0) 2925 2798 return result; ··· 3621 3458 return 0; 3622 3459 } 3623 3460 3461 + static struct quirk_entry *detect_dynamic_quirks(struct pci_dev *pdev) 3462 + { 3463 + int i; 3464 + 3465 + for (i = 0; i < nvme_pci_quirk_count; i++) 3466 + if (pdev->vendor == nvme_pci_quirk_list[i].vendor_id && 3467 + pdev->device == nvme_pci_quirk_list[i].dev_id) 3468 + return &nvme_pci_quirk_list[i]; 3469 + 3470 + return NULL; 3471 + } 3472 + 3624 3473 static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev, 3625 3474 const struct pci_device_id *id) 3626 3475 { 3627 3476 unsigned long quirks = id->driver_data; 3628 3477 int node = dev_to_node(&pdev->dev); 3629 3478 struct nvme_dev *dev; 3479 + struct quirk_entry *qentry; 3630 3480 int ret = -ENOMEM; 3631 3481 3632 3482 dev = kzalloc_node(struct_size(dev, descriptor_pools, nr_node_ids), ··· 3670 3494 dev_info(&pdev->dev, 3671 3495 "platform quirk: setting simple suspend\n"); 3672 3496 quirks |= NVME_QUIRK_SIMPLE_SUSPEND; 3497 + } 3498 + qentry = detect_dynamic_quirks(pdev); 3499 + if (qentry) { 3500 + quirks |= qentry->enabled_quirks; 3501 + quirks &= ~qentry->disabled_quirks; 3673 3502 } 3674 3503 ret = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops, 3675 3504 quirks); ··· 4276 4095 4277 4096 static void __exit nvme_exit(void) 4278 4097 { 4098 + kfree(nvme_pci_quirk_list); 4279 4099 pci_unregister_driver(&nvme_driver); 4280 4100 flush_workqueue(nvme_wq); 4281 4101 }
+2 -2
drivers/nvme/host/pr.c
··· 242 242 if (rse_len > U32_MAX) 243 243 return -EINVAL; 244 244 245 - rse = kzalloc(rse_len, GFP_KERNEL); 245 + rse = kvzalloc(rse_len, GFP_KERNEL); 246 246 if (!rse) 247 247 return -ENOMEM; 248 248 ··· 267 267 } 268 268 269 269 free_rse: 270 - kfree(rse); 270 + kvfree(rse); 271 271 return ret; 272 272 } 273 273
+23
drivers/nvme/host/sysfs.c
··· 601 601 } 602 602 static DEVICE_ATTR_RO(dctype); 603 603 604 + static ssize_t quirks_show(struct device *dev, struct device_attribute *attr, 605 + char *buf) 606 + { 607 + int count = 0, i; 608 + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); 609 + unsigned long quirks = ctrl->quirks; 610 + 611 + if (!quirks) 612 + return sysfs_emit(buf, "none\n"); 613 + 614 + for (i = 0; quirks; ++i) { 615 + if (quirks & 1) { 616 + count += sysfs_emit_at(buf, count, "%s\n", 617 + nvme_quirk_name(BIT(i))); 618 + } 619 + quirks >>= 1; 620 + } 621 + 622 + return count; 623 + } 624 + static DEVICE_ATTR_RO(quirks); 625 + 604 626 #ifdef CONFIG_NVME_HOST_AUTH 605 627 static ssize_t nvme_ctrl_dhchap_secret_show(struct device *dev, 606 628 struct device_attribute *attr, char *buf) ··· 764 742 &dev_attr_kato.attr, 765 743 &dev_attr_cntrltype.attr, 766 744 &dev_attr_dctype.attr, 745 + &dev_attr_quirks.attr, 767 746 #ifdef CONFIG_NVME_HOST_AUTH 768 747 &dev_attr_dhchap_secret.attr, 769 748 &dev_attr_dhchap_ctrl_secret.attr,
+3 -2
drivers/nvme/host/tcp.c
··· 25 25 26 26 struct nvme_tcp_queue; 27 27 28 - /* Define the socket priority to use for connections were it is desirable 28 + /* 29 + * Define the socket priority to use for connections where it is desirable 29 30 * that the NIC consider performing optimized packet processing or filtering. 30 31 * A non-zero value being sufficient to indicate general consideration of any 31 32 * possible optimization. Making it a module param allows for alternative ··· 927 926 req->curr_bio = req->curr_bio->bi_next; 928 927 929 928 /* 930 - * If we don`t have any bios it means that controller 929 + * If we don't have any bios it means the controller 931 930 * sent more data than we requested, hence error 932 931 */ 933 932 if (!req->curr_bio) {
+11 -4
drivers/nvme/target/fcloop.c
··· 491 491 struct fcloop_rport *rport = remoteport->private; 492 492 struct nvmet_fc_target_port *targetport = rport->targetport; 493 493 struct fcloop_tport *tport; 494 + int ret = 0; 494 495 495 496 if (!targetport) { 496 497 /* ··· 501 500 * We end up here from delete association exchange: 502 501 * nvmet_fc_xmt_disconnect_assoc sends an async request. 503 502 * 504 - * Return success because this is what LLDDs do; silently 505 - * drop the response. 503 + * Return success when remoteport is still online because this 504 + * is what LLDDs do and silently drop the response. Otherwise, 505 + * return with error to signal upper layer to perform the lsrsp 506 + * resource cleanup. 506 507 */ 507 - lsrsp->done(lsrsp); 508 + if (remoteport->port_state == FC_OBJSTATE_ONLINE) 509 + lsrsp->done(lsrsp); 510 + else 511 + ret = -ENODEV; 512 + 508 513 kmem_cache_free(lsreq_cache, tls_req); 509 - return 0; 514 + return ret; 510 515 } 511 516 512 517 memcpy(lsreq->rspaddr, lsrsp->rspbuf,