Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'block-5.10-2020-11-07' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:

- NVMe pull request from Christoph:
- revert a nvme_queue size optimization (Keith Bush)
- fabrics timeout races fixes (Chao Leng and Sagi Grimberg)"

- null_blk zone locking fix (Damien)

* tag 'block-5.10-2020-11-07' of git://git.kernel.dk/linux-block:
null_blk: Fix scheduling in atomic with zoned mode
nvme-tcp: avoid repeated request completion
nvme-rdma: avoid repeated request completion
nvme-tcp: avoid race between time out and tear down
nvme-rdma: avoid race between time out and tear down
nvme: introduce nvme_sync_io_queues
Revert "nvme-pci: remove last_sq_tail"

+65 -46
+1 -1
drivers/block/null_blk.h
··· 47 47 unsigned int nr_zones_closed; 48 48 struct blk_zone *zones; 49 49 sector_t zone_size_sects; 50 - spinlock_t zone_dev_lock; 50 + spinlock_t zone_lock; 51 51 unsigned long *zone_locks; 52 52 53 53 unsigned long size; /* device size in MB */
+31 -16
drivers/block/null_blk_zoned.c
··· 46 46 if (!dev->zones) 47 47 return -ENOMEM; 48 48 49 - spin_lock_init(&dev->zone_dev_lock); 50 - dev->zone_locks = bitmap_zalloc(dev->nr_zones, GFP_KERNEL); 51 - if (!dev->zone_locks) { 52 - kvfree(dev->zones); 53 - return -ENOMEM; 49 + /* 50 + * With memory backing, the zone_lock spinlock needs to be temporarily 51 + * released to avoid scheduling in atomic context. To guarantee zone 52 + * information protection, use a bitmap to lock zones with 53 + * wait_on_bit_lock_io(). Sleeping on the lock is OK as memory backing 54 + * implies that the queue is marked with BLK_MQ_F_BLOCKING. 55 + */ 56 + spin_lock_init(&dev->zone_lock); 57 + if (dev->memory_backed) { 58 + dev->zone_locks = bitmap_zalloc(dev->nr_zones, GFP_KERNEL); 59 + if (!dev->zone_locks) { 60 + kvfree(dev->zones); 61 + return -ENOMEM; 62 + } 54 63 } 55 64 56 65 if (dev->zone_nr_conv >= dev->nr_zones) { ··· 146 137 147 138 static inline void null_lock_zone(struct nullb_device *dev, unsigned int zno) 148 139 { 149 - wait_on_bit_lock_io(dev->zone_locks, zno, TASK_UNINTERRUPTIBLE); 140 + if (dev->memory_backed) 141 + wait_on_bit_lock_io(dev->zone_locks, zno, TASK_UNINTERRUPTIBLE); 142 + spin_lock_irq(&dev->zone_lock); 150 143 } 151 144 152 145 static inline void null_unlock_zone(struct nullb_device *dev, unsigned int zno) 153 146 { 154 - clear_and_wake_up_bit(zno, dev->zone_locks); 147 + spin_unlock_irq(&dev->zone_lock); 148 + 149 + if (dev->memory_backed) 150 + clear_and_wake_up_bit(zno, dev->zone_locks); 155 151 } 156 152 157 153 int null_report_zones(struct gendisk *disk, sector_t sector, ··· 336 322 return null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors); 337 323 338 324 null_lock_zone(dev, zno); 339 - spin_lock(&dev->zone_dev_lock); 340 325 341 326 switch (zone->cond) { 342 327 case BLK_ZONE_COND_FULL: ··· 388 375 if (zone->cond != BLK_ZONE_COND_EXP_OPEN) 389 376 zone->cond = BLK_ZONE_COND_IMP_OPEN; 390 377 391 - spin_unlock(&dev->zone_dev_lock); 378 + /* 379 + * Memory backing allocation may sleep: release the zone_lock spinlock 380 + * to avoid scheduling in atomic context. Zone operation atomicity is 381 + * still guaranteed through the zone_locks bitmap. 382 + */ 383 + if (dev->memory_backed) 384 + spin_unlock_irq(&dev->zone_lock); 392 385 ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors); 393 - spin_lock(&dev->zone_dev_lock); 386 + if (dev->memory_backed) 387 + spin_lock_irq(&dev->zone_lock); 388 + 394 389 if (ret != BLK_STS_OK) 395 390 goto unlock; 396 391 ··· 413 392 ret = BLK_STS_OK; 414 393 415 394 unlock: 416 - spin_unlock(&dev->zone_dev_lock); 417 395 null_unlock_zone(dev, zno); 418 396 419 397 return ret; ··· 536 516 null_lock_zone(dev, i); 537 517 zone = &dev->zones[i]; 538 518 if (zone->cond != BLK_ZONE_COND_EMPTY) { 539 - spin_lock(&dev->zone_dev_lock); 540 519 null_reset_zone(dev, zone); 541 - spin_unlock(&dev->zone_dev_lock); 542 520 trace_nullb_zone_op(cmd, i, zone->cond); 543 521 } 544 522 null_unlock_zone(dev, i); ··· 548 530 zone = &dev->zones[zone_no]; 549 531 550 532 null_lock_zone(dev, zone_no); 551 - spin_lock(&dev->zone_dev_lock); 552 533 553 534 switch (op) { 554 535 case REQ_OP_ZONE_RESET: ··· 566 549 ret = BLK_STS_NOTSUPP; 567 550 break; 568 551 } 569 - 570 - spin_unlock(&dev->zone_dev_lock); 571 552 572 553 if (ret == BLK_STS_OK) 573 554 trace_nullb_zone_op(cmd, zone_no, zone->cond);
+6 -2
drivers/nvme/host/core.c
··· 4582 4582 } 4583 4583 EXPORT_SYMBOL_GPL(nvme_start_queues); 4584 4584 4585 - 4586 - void nvme_sync_queues(struct nvme_ctrl *ctrl) 4585 + void nvme_sync_io_queues(struct nvme_ctrl *ctrl) 4587 4586 { 4588 4587 struct nvme_ns *ns; 4589 4588 ··· 4590 4591 list_for_each_entry(ns, &ctrl->namespaces, list) 4591 4592 blk_sync_queue(ns->queue); 4592 4593 up_read(&ctrl->namespaces_rwsem); 4594 + } 4595 + EXPORT_SYMBOL_GPL(nvme_sync_io_queues); 4593 4596 4597 + void nvme_sync_queues(struct nvme_ctrl *ctrl) 4598 + { 4599 + nvme_sync_io_queues(ctrl); 4594 4600 if (ctrl->admin_q) 4595 4601 blk_sync_queue(ctrl->admin_q); 4596 4602 }
+1
drivers/nvme/host/nvme.h
··· 602 602 void nvme_start_queues(struct nvme_ctrl *ctrl); 603 603 void nvme_kill_queues(struct nvme_ctrl *ctrl); 604 604 void nvme_sync_queues(struct nvme_ctrl *ctrl); 605 + void nvme_sync_io_queues(struct nvme_ctrl *ctrl); 605 606 void nvme_unfreeze(struct nvme_ctrl *ctrl); 606 607 void nvme_wait_freeze(struct nvme_ctrl *ctrl); 607 608 int nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout);
+19 -4
drivers/nvme/host/pci.c
··· 198 198 u32 q_depth; 199 199 u16 cq_vector; 200 200 u16 sq_tail; 201 + u16 last_sq_tail; 201 202 u16 cq_head; 202 203 u16 qid; 203 204 u8 cq_phase; ··· 456 455 return 0; 457 456 } 458 457 459 - static inline void nvme_write_sq_db(struct nvme_queue *nvmeq) 458 + /* 459 + * Write sq tail if we are asked to, or if the next command would wrap. 460 + */ 461 + static inline void nvme_write_sq_db(struct nvme_queue *nvmeq, bool write_sq) 460 462 { 463 + if (!write_sq) { 464 + u16 next_tail = nvmeq->sq_tail + 1; 465 + 466 + if (next_tail == nvmeq->q_depth) 467 + next_tail = 0; 468 + if (next_tail != nvmeq->last_sq_tail) 469 + return; 470 + } 471 + 461 472 if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail, 462 473 nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei)) 463 474 writel(nvmeq->sq_tail, nvmeq->q_db); 475 + nvmeq->last_sq_tail = nvmeq->sq_tail; 464 476 } 465 477 466 478 /** ··· 490 476 cmd, sizeof(*cmd)); 491 477 if (++nvmeq->sq_tail == nvmeq->q_depth) 492 478 nvmeq->sq_tail = 0; 493 - if (write_sq) 494 - nvme_write_sq_db(nvmeq); 479 + nvme_write_sq_db(nvmeq, write_sq); 495 480 spin_unlock(&nvmeq->sq_lock); 496 481 } 497 482 ··· 499 486 struct nvme_queue *nvmeq = hctx->driver_data; 500 487 501 488 spin_lock(&nvmeq->sq_lock); 502 - nvme_write_sq_db(nvmeq); 489 + if (nvmeq->sq_tail != nvmeq->last_sq_tail) 490 + nvme_write_sq_db(nvmeq, true); 503 491 spin_unlock(&nvmeq->sq_lock); 504 492 } 505 493 ··· 1510 1496 struct nvme_dev *dev = nvmeq->dev; 1511 1497 1512 1498 nvmeq->sq_tail = 0; 1499 + nvmeq->last_sq_tail = 0; 1513 1500 nvmeq->cq_head = 0; 1514 1501 nvmeq->cq_phase = 1; 1515 1502 nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
+3 -11
drivers/nvme/host/rdma.c
··· 122 122 struct sockaddr_storage src_addr; 123 123 124 124 struct nvme_ctrl ctrl; 125 - struct mutex teardown_lock; 126 125 bool use_inline_data; 127 126 u32 io_queues[HCTX_MAX_TYPES]; 128 127 }; ··· 1009 1010 static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl, 1010 1011 bool remove) 1011 1012 { 1012 - mutex_lock(&ctrl->teardown_lock); 1013 1013 blk_mq_quiesce_queue(ctrl->ctrl.admin_q); 1014 + blk_sync_queue(ctrl->ctrl.admin_q); 1014 1015 nvme_rdma_stop_queue(&ctrl->queues[0]); 1015 1016 if (ctrl->ctrl.admin_tagset) { 1016 1017 blk_mq_tagset_busy_iter(ctrl->ctrl.admin_tagset, ··· 1020 1021 if (remove) 1021 1022 blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); 1022 1023 nvme_rdma_destroy_admin_queue(ctrl, remove); 1023 - mutex_unlock(&ctrl->teardown_lock); 1024 1024 } 1025 1025 1026 1026 static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl, 1027 1027 bool remove) 1028 1028 { 1029 - mutex_lock(&ctrl->teardown_lock); 1030 1029 if (ctrl->ctrl.queue_count > 1) { 1031 1030 nvme_start_freeze(&ctrl->ctrl); 1032 1031 nvme_stop_queues(&ctrl->ctrl); 1032 + nvme_sync_io_queues(&ctrl->ctrl); 1033 1033 nvme_rdma_stop_io_queues(ctrl); 1034 1034 if (ctrl->ctrl.tagset) { 1035 1035 blk_mq_tagset_busy_iter(ctrl->ctrl.tagset, ··· 1039 1041 nvme_start_queues(&ctrl->ctrl); 1040 1042 nvme_rdma_destroy_io_queues(ctrl, remove); 1041 1043 } 1042 - mutex_unlock(&ctrl->teardown_lock); 1043 1044 } 1044 1045 1045 1046 static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl) ··· 1973 1976 { 1974 1977 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); 1975 1978 struct nvme_rdma_queue *queue = req->queue; 1976 - struct nvme_rdma_ctrl *ctrl = queue->ctrl; 1977 1979 1978 - /* fence other contexts that may complete the command */ 1979 - mutex_lock(&ctrl->teardown_lock); 1980 1980 nvme_rdma_stop_queue(queue); 1981 - if (!blk_mq_request_completed(rq)) { 1981 + if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) { 1982 1982 nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD; 1983 1983 blk_mq_complete_request(rq); 1984 1984 } 1985 - mutex_unlock(&ctrl->teardown_lock); 1986 1985 } 1987 1986 1988 1987 static enum blk_eh_timer_return ··· 2313 2320 return ERR_PTR(-ENOMEM); 2314 2321 ctrl->ctrl.opts = opts; 2315 2322 INIT_LIST_HEAD(&ctrl->list); 2316 - mutex_init(&ctrl->teardown_lock); 2317 2323 2318 2324 if (!(opts->mask & NVMF_OPT_TRSVCID)) { 2319 2325 opts->trsvcid =
+4 -12
drivers/nvme/host/tcp.c
··· 124 124 struct sockaddr_storage src_addr; 125 125 struct nvme_ctrl ctrl; 126 126 127 - struct mutex teardown_lock; 128 127 struct work_struct err_work; 129 128 struct delayed_work connect_work; 130 129 struct nvme_tcp_request async_req; ··· 1885 1886 static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl, 1886 1887 bool remove) 1887 1888 { 1888 - mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock); 1889 1889 blk_mq_quiesce_queue(ctrl->admin_q); 1890 + blk_sync_queue(ctrl->admin_q); 1890 1891 nvme_tcp_stop_queue(ctrl, 0); 1891 1892 if (ctrl->admin_tagset) { 1892 1893 blk_mq_tagset_busy_iter(ctrl->admin_tagset, ··· 1896 1897 if (remove) 1897 1898 blk_mq_unquiesce_queue(ctrl->admin_q); 1898 1899 nvme_tcp_destroy_admin_queue(ctrl, remove); 1899 - mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock); 1900 1900 } 1901 1901 1902 1902 static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl, 1903 1903 bool remove) 1904 1904 { 1905 - mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock); 1906 1905 if (ctrl->queue_count <= 1) 1907 - goto out; 1906 + return; 1908 1907 blk_mq_quiesce_queue(ctrl->admin_q); 1909 1908 nvme_start_freeze(ctrl); 1910 1909 nvme_stop_queues(ctrl); 1910 + nvme_sync_io_queues(ctrl); 1911 1911 nvme_tcp_stop_io_queues(ctrl); 1912 1912 if (ctrl->tagset) { 1913 1913 blk_mq_tagset_busy_iter(ctrl->tagset, ··· 1916 1918 if (remove) 1917 1919 nvme_start_queues(ctrl); 1918 1920 nvme_tcp_destroy_io_queues(ctrl, remove); 1919 - out: 1920 - mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock); 1921 1921 } 1922 1922 1923 1923 static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl) ··· 2167 2171 struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); 2168 2172 struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl; 2169 2173 2170 - /* fence other contexts that may complete the command */ 2171 - mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock); 2172 2174 nvme_tcp_stop_queue(ctrl, nvme_tcp_queue_id(req->queue)); 2173 - if (!blk_mq_request_completed(rq)) { 2175 + if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) { 2174 2176 nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD; 2175 2177 blk_mq_complete_request(rq); 2176 2178 } 2177 - mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock); 2178 2179 } 2179 2180 2180 2181 static enum blk_eh_timer_return ··· 2448 2455 nvme_tcp_reconnect_ctrl_work); 2449 2456 INIT_WORK(&ctrl->err_work, nvme_tcp_error_recovery_work); 2450 2457 INIT_WORK(&ctrl->ctrl.reset_work, nvme_reset_ctrl_work); 2451 - mutex_init(&ctrl->teardown_lock); 2452 2458 2453 2459 if (!(opts->mask & NVMF_OPT_TRSVCID)) { 2454 2460 opts->trsvcid =