Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge git://git.infradead.org/users/willy/linux-nvme

Pull NVMe driver fixes from Matthew Wilcox:
"Now that actual hardware has been released (don't have any yet
myself), people are starting to want some of these fixes merged."

Willy doesn't have hardware? Guys...

* git://git.infradead.org/users/willy/linux-nvme:
NVMe: Cancel outstanding IOs on queue deletion
NVMe: Free admin queue memory on initialisation failure
NVMe: Use ida for nvme device instance
NVMe: Fix whitespace damage in nvme_init
NVMe: handle allocation failure in nvme_map_user_pages()
NVMe: Fix uninitialized iod compiler warning
NVMe: Do not set IO queue depth beyond device max
NVMe: Set block queue max sectors
NVMe: use namespace id for nvme_get_features
NVMe: replace nvme_ns with nvme_dev for user admin
NVMe: Fix nvme module init when nvme_major is set
NVMe: Set request queue logical block size

+105 -50
+103 -50
drivers/block/nvme.c
··· 79 79 char serial[20]; 80 80 char model[40]; 81 81 char firmware_rev[8]; 82 + u32 max_hw_sectors; 82 83 }; 83 84 84 85 /* ··· 836 835 } 837 836 838 837 static int nvme_get_features(struct nvme_dev *dev, unsigned fid, 839 - unsigned dword11, dma_addr_t dma_addr) 838 + unsigned nsid, dma_addr_t dma_addr) 840 839 { 841 840 struct nvme_command c; 842 841 843 842 memset(&c, 0, sizeof(c)); 844 843 c.features.opcode = nvme_admin_get_features; 844 + c.features.nsid = cpu_to_le32(nsid); 845 845 c.features.prp1 = cpu_to_le64(dma_addr); 846 846 c.features.fid = cpu_to_le32(fid); 847 - c.features.dword11 = cpu_to_le32(dword11); 848 847 849 848 return nvme_submit_admin_cmd(dev, &c, NULL); 850 849 } ··· 863 862 return nvme_submit_admin_cmd(dev, &c, result); 864 863 } 865 864 865 + /** 866 + * nvme_cancel_ios - Cancel outstanding I/Os 867 + * @queue: The queue to cancel I/Os on 868 + * @timeout: True to only cancel I/Os which have timed out 869 + */ 870 + static void nvme_cancel_ios(struct nvme_queue *nvmeq, bool timeout) 871 + { 872 + int depth = nvmeq->q_depth - 1; 873 + struct nvme_cmd_info *info = nvme_cmd_info(nvmeq); 874 + unsigned long now = jiffies; 875 + int cmdid; 876 + 877 + for_each_set_bit(cmdid, nvmeq->cmdid_data, depth) { 878 + void *ctx; 879 + nvme_completion_fn fn; 880 + static struct nvme_completion cqe = { 881 + .status = cpu_to_le16(NVME_SC_ABORT_REQ) << 1, 882 + }; 883 + 884 + if (timeout && !time_after(now, info[cmdid].timeout)) 885 + continue; 886 + dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d\n", cmdid); 887 + ctx = cancel_cmdid(nvmeq, cmdid, &fn); 888 + fn(nvmeq->dev, ctx, &cqe); 889 + } 890 + } 891 + 892 + static void nvme_free_queue_mem(struct nvme_queue *nvmeq) 893 + { 894 + dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), 895 + (void *)nvmeq->cqes, nvmeq->cq_dma_addr); 896 + dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), 897 + nvmeq->sq_cmds, nvmeq->sq_dma_addr); 898 + kfree(nvmeq); 899 + } 900 + 866 901 static void nvme_free_queue(struct nvme_dev *dev, int qid) 867 902 { 868 903 struct nvme_queue *nvmeq = dev->queues[qid]; 869 904 int vector = dev->entry[nvmeq->cq_vector].vector; 905 + 906 + spin_lock_irq(&nvmeq->q_lock); 907 + nvme_cancel_ios(nvmeq, false); 908 + spin_unlock_irq(&nvmeq->q_lock); 870 909 871 910 irq_set_affinity_hint(vector, NULL); 872 911 free_irq(vector, nvmeq); ··· 917 876 adapter_delete_cq(dev, qid); 918 877 } 919 878 920 - dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), 921 - (void *)nvmeq->cqes, nvmeq->cq_dma_addr); 922 - dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), 923 - nvmeq->sq_cmds, nvmeq->sq_dma_addr); 924 - kfree(nvmeq); 879 + nvme_free_queue_mem(nvmeq); 925 880 } 926 881 927 882 static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, 928 883 int depth, int vector) 929 884 { 930 885 struct device *dmadev = &dev->pci_dev->dev; 931 - unsigned extra = (depth / 8) + (depth * sizeof(struct nvme_cmd_info)); 886 + unsigned extra = DIV_ROUND_UP(depth, 8) + (depth * 887 + sizeof(struct nvme_cmd_info)); 932 888 struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq) + extra, GFP_KERNEL); 933 889 if (!nvmeq) 934 890 return NULL; ··· 1013 975 1014 976 static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev) 1015 977 { 1016 - int result; 978 + int result = 0; 1017 979 u32 aqa; 1018 980 u64 cap; 1019 981 unsigned long timeout; ··· 1043 1005 timeout = ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies; 1044 1006 dev->db_stride = NVME_CAP_STRIDE(cap); 1045 1007 1046 - while (!(readl(&dev->bar->csts) & NVME_CSTS_RDY)) { 1008 + while (!result && !(readl(&dev->bar->csts) & NVME_CSTS_RDY)) { 1047 1009 msleep(100); 1048 1010 if (fatal_signal_pending(current)) 1049 - return -EINTR; 1011 + result = -EINTR; 1050 1012 if (time_after(jiffies, timeout)) { 1051 1013 dev_err(&dev->pci_dev->dev, 1052 1014 "Device not ready; aborting initialisation\n"); 1053 - return -ENODEV; 1015 + result = -ENODEV; 1054 1016 } 1017 + } 1018 + 1019 + if (result) { 1020 + nvme_free_queue_mem(nvmeq); 1021 + return result; 1055 1022 } 1056 1023 1057 1024 result = queue_request_irq(dev, nvmeq, "nvme admin"); ··· 1080 1037 offset = offset_in_page(addr); 1081 1038 count = DIV_ROUND_UP(offset + length, PAGE_SIZE); 1082 1039 pages = kcalloc(count, sizeof(*pages), GFP_KERNEL); 1040 + if (!pages) 1041 + return ERR_PTR(-ENOMEM); 1083 1042 1084 1043 err = get_user_pages_fast(addr, count, 1, pages); 1085 1044 if (err < count) { ··· 1191 1146 return status; 1192 1147 } 1193 1148 1194 - static int nvme_user_admin_cmd(struct nvme_ns *ns, 1149 + static int nvme_user_admin_cmd(struct nvme_dev *dev, 1195 1150 struct nvme_admin_cmd __user *ucmd) 1196 1151 { 1197 - struct nvme_dev *dev = ns->dev; 1198 1152 struct nvme_admin_cmd cmd; 1199 1153 struct nvme_command c; 1200 1154 int status, length; 1201 - struct nvme_iod *iod; 1155 + struct nvme_iod *uninitialized_var(iod); 1202 1156 1203 1157 if (!capable(CAP_SYS_ADMIN)) 1204 1158 return -EACCES; ··· 1248 1204 case NVME_IOCTL_ID: 1249 1205 return ns->ns_id; 1250 1206 case NVME_IOCTL_ADMIN_CMD: 1251 - return nvme_user_admin_cmd(ns, (void __user *)arg); 1207 + return nvme_user_admin_cmd(ns->dev, (void __user *)arg); 1252 1208 case NVME_IOCTL_SUBMIT_IO: 1253 1209 return nvme_submit_io(ns, (void __user *)arg); 1254 1210 default: ··· 1261 1217 .ioctl = nvme_ioctl, 1262 1218 .compat_ioctl = nvme_ioctl, 1263 1219 }; 1264 - 1265 - static void nvme_timeout_ios(struct nvme_queue *nvmeq) 1266 - { 1267 - int depth = nvmeq->q_depth - 1; 1268 - struct nvme_cmd_info *info = nvme_cmd_info(nvmeq); 1269 - unsigned long now = jiffies; 1270 - int cmdid; 1271 - 1272 - for_each_set_bit(cmdid, nvmeq->cmdid_data, depth) { 1273 - void *ctx; 1274 - nvme_completion_fn fn; 1275 - static struct nvme_completion cqe = { .status = cpu_to_le16(NVME_SC_ABORT_REQ) << 1, }; 1276 - 1277 - if (!time_after(now, info[cmdid].timeout)) 1278 - continue; 1279 - dev_warn(nvmeq->q_dmadev, "Timing out I/O %d\n", cmdid); 1280 - ctx = cancel_cmdid(nvmeq, cmdid, &fn); 1281 - fn(nvmeq->dev, ctx, &cqe); 1282 - } 1283 - } 1284 1220 1285 1221 static void nvme_resubmit_bios(struct nvme_queue *nvmeq) 1286 1222 { ··· 1293 1269 spin_lock_irq(&nvmeq->q_lock); 1294 1270 if (nvme_process_cq(nvmeq)) 1295 1271 printk("process_cq did something\n"); 1296 - nvme_timeout_ios(nvmeq); 1272 + nvme_cancel_ios(nvmeq, true); 1297 1273 nvme_resubmit_bios(nvmeq); 1298 1274 spin_unlock_irq(&nvmeq->q_lock); 1299 1275 } ··· 1363 1339 ns->disk = disk; 1364 1340 lbaf = id->flbas & 0xf; 1365 1341 ns->lba_shift = id->lbaf[lbaf].ds; 1342 + blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); 1343 + if (dev->max_hw_sectors) 1344 + blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors); 1366 1345 1367 1346 disk->major = nvme_major; 1368 1347 disk->minors = NVME_MINORS; ··· 1410 1383 1411 1384 static int __devinit nvme_setup_io_queues(struct nvme_dev *dev) 1412 1385 { 1413 - int result, cpu, i, nr_io_queues, db_bar_size; 1386 + int result, cpu, i, nr_io_queues, db_bar_size, q_depth; 1414 1387 1415 1388 nr_io_queues = num_online_cpus(); 1416 1389 result = set_queue_count(dev, nr_io_queues); ··· 1456 1429 cpu = cpumask_next(cpu, cpu_online_mask); 1457 1430 } 1458 1431 1432 + q_depth = min_t(int, NVME_CAP_MQES(readq(&dev->bar->cap)) + 1, 1433 + NVME_Q_DEPTH); 1459 1434 for (i = 0; i < nr_io_queues; i++) { 1460 - dev->queues[i + 1] = nvme_create_queue(dev, i + 1, 1461 - NVME_Q_DEPTH, i); 1435 + dev->queues[i + 1] = nvme_create_queue(dev, i + 1, q_depth, i); 1462 1436 if (IS_ERR(dev->queues[i + 1])) 1463 1437 return PTR_ERR(dev->queues[i + 1]); 1464 1438 dev->queue_count++; ··· 1508 1480 memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn)); 1509 1481 memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn)); 1510 1482 memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr)); 1483 + if (ctrl->mdts) { 1484 + int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12; 1485 + dev->max_hw_sectors = 1 << (ctrl->mdts + shift - 9); 1486 + } 1511 1487 1512 1488 id_ns = mem; 1513 1489 for (i = 1; i <= nn; i++) { ··· 1555 1523 list_del(&dev->node); 1556 1524 spin_unlock(&dev_list_lock); 1557 1525 1558 - /* TODO: wait all I/O finished or cancel them */ 1559 - 1560 1526 list_for_each_entry_safe(ns, next, &dev->namespaces, list) { 1561 1527 list_del(&ns->list); 1562 1528 del_gendisk(ns->disk); ··· 1590 1560 dma_pool_destroy(dev->prp_small_pool); 1591 1561 } 1592 1562 1593 - /* XXX: Use an ida or something to let remove / add work correctly */ 1594 - static void nvme_set_instance(struct nvme_dev *dev) 1563 + static DEFINE_IDA(nvme_instance_ida); 1564 + 1565 + static int nvme_set_instance(struct nvme_dev *dev) 1595 1566 { 1596 - static int instance; 1597 - dev->instance = instance++; 1567 + int instance, error; 1568 + 1569 + do { 1570 + if (!ida_pre_get(&nvme_instance_ida, GFP_KERNEL)) 1571 + return -ENODEV; 1572 + 1573 + spin_lock(&dev_list_lock); 1574 + error = ida_get_new(&nvme_instance_ida, &instance); 1575 + spin_unlock(&dev_list_lock); 1576 + } while (error == -EAGAIN); 1577 + 1578 + if (error) 1579 + return -ENODEV; 1580 + 1581 + dev->instance = instance; 1582 + return 0; 1598 1583 } 1599 1584 1600 1585 static void nvme_release_instance(struct nvme_dev *dev) 1601 1586 { 1587 + spin_lock(&dev_list_lock); 1588 + ida_remove(&nvme_instance_ida, dev->instance); 1589 + spin_unlock(&dev_list_lock); 1602 1590 } 1603 1591 1604 1592 static int __devinit nvme_probe(struct pci_dev *pdev, ··· 1649 1601 pci_set_drvdata(pdev, dev); 1650 1602 dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); 1651 1603 dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64)); 1652 - nvme_set_instance(dev); 1604 + result = nvme_set_instance(dev); 1605 + if (result) 1606 + goto disable; 1607 + 1653 1608 dev->entry[0].vector = pdev->irq; 1654 1609 1655 1610 result = nvme_setup_prp_pools(dev); ··· 1755 1704 1756 1705 static int __init nvme_init(void) 1757 1706 { 1758 - int result = -EBUSY; 1707 + int result; 1759 1708 1760 1709 nvme_thread = kthread_run(nvme_kthread, NULL, "nvme"); 1761 1710 if (IS_ERR(nvme_thread)) 1762 1711 return PTR_ERR(nvme_thread); 1763 1712 1764 - nvme_major = register_blkdev(nvme_major, "nvme"); 1765 - if (nvme_major <= 0) 1713 + result = register_blkdev(nvme_major, "nvme"); 1714 + if (result < 0) 1766 1715 goto kill_kthread; 1716 + else if (result > 0) 1717 + nvme_major = result; 1767 1718 1768 1719 result = pci_register_driver(&nvme_driver); 1769 1720 if (result)
+2
include/linux/nvme.h
··· 35 35 __u64 acq; /* Admin CQ Base Address */ 36 36 }; 37 37 38 + #define NVME_CAP_MQES(cap) ((cap) & 0xffff) 38 39 #define NVME_CAP_TIMEOUT(cap) (((cap) >> 24) & 0xff) 39 40 #define NVME_CAP_STRIDE(cap) (((cap) >> 32) & 0xf) 41 + #define NVME_CAP_MPSMIN(cap) (((cap) >> 48) & 0xf) 40 42 41 43 enum { 42 44 NVME_CC_ENABLE = 1 << 0,