Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'block-5.10-2020-10-24' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:

- NVMe pull request from Christoph
- rdma error handling fixes (Chao Leng)
- fc error handling and reconnect fixes (James Smart)
- fix the qid displace when tracing ioctl command (Keith Busch)
- don't use BLK_MQ_REQ_NOWAIT for passthru (Chaitanya Kulkarni)
- fix MTDT for passthru (Logan Gunthorpe)
- blacklist Write Same on more devices (Kai-Heng Feng)
- fix an uninitialized work struct (zhenwei pi)"

- lightnvm out-of-bounds fix (Colin)

- SG allocation leak fix (Doug)

- rnbd fixes (Gioh, Guoqing, Jack)

- zone error translation fixes (Keith)

- kerneldoc markup fix (Mauro)

- zram lockdep fix (Peter)

- Kill unused io_context members (Yufen)

- NUMA memory allocation cleanup (Xianting)

- NBD config wakeup fix (Xiubo)

* tag 'block-5.10-2020-10-24' of git://git.kernel.dk/linux-block: (27 commits)
block: blk-mq: fix a kernel-doc markup
nvme-fc: shorten reconnect delay if possible for FC
nvme-fc: wait for queues to freeze before calling update_hr_hw_queues
nvme-fc: fix error loop in create_hw_io_queues
nvme-fc: fix io timeout to abort I/O
null_blk: use zone status for max active/open
nvmet: don't use BLK_MQ_REQ_NOWAIT for passthru
nvmet: cleanup nvmet_passthru_map_sg()
nvmet: limit passthru MTDS by BIO_MAX_PAGES
nvmet: fix uninitialized work for zero kato
nvme-pci: disable Write Zeroes on Sandisk Skyhawk
nvme: use queuedata for nvme_req_qid
nvme-rdma: fix crash due to incorrect cqe
nvme-rdma: fix crash when connect rejected
block: remove unused members for io_context
blk-mq: remove the calling of local_memory_node()
zram: Fix __zram_bvec_{read,write}() locking order
skd_main: remove unused including <linux/version.h>
sgl_alloc_order: fix memory leak
lightnvm: fix out-of-bounds write to array devices->info[]
...

+244 -130
+8
Documentation/block/queue-sysfs.rst
··· 124 124 EXPLICIT OPEN, IMPLICIT OPEN or CLOSED, is limited by this value. 125 125 If this value is 0, there is no limit. 126 126 127 + If the host attempts to exceed this limit, the driver should report this error 128 + with BLK_STS_ZONE_ACTIVE_RESOURCE, which user space may see as the EOVERFLOW 129 + errno. 130 + 127 131 max_open_zones (RO) 128 132 ------------------- 129 133 For zoned block devices (zoned attribute indicating "host-managed" or 130 134 "host-aware"), the sum of zones belonging to any of the zone states: 131 135 EXPLICIT OPEN or IMPLICIT OPEN, is limited by this value. 132 136 If this value is 0, there is no limit. 137 + 138 + If the host attempts to exceed this limit, the driver should report this error 139 + with BLK_STS_ZONE_OPEN_RESOURCE, which user space may see as the ETOOMANYREFS 140 + errno. 133 141 134 142 max_sectors_kb (RW) 135 143 -------------------
+4
block/blk-core.c
··· 186 186 /* device mapper special case, should not leak out: */ 187 187 [BLK_STS_DM_REQUEUE] = { -EREMCHG, "dm internal retry" }, 188 188 189 + /* zone device specific errors */ 190 + [BLK_STS_ZONE_OPEN_RESOURCE] = { -ETOOMANYREFS, "open zones exceeded" }, 191 + [BLK_STS_ZONE_ACTIVE_RESOURCE] = { -EOVERFLOW, "active zones exceeded" }, 192 + 189 193 /* everything else not covered above: */ 190 194 [BLK_STS_IOERR] = { -EIO, "I/O" }, 191 195 };
+1 -1
block/blk-mq-cpumap.c
··· 89 89 90 90 for_each_possible_cpu(i) { 91 91 if (index == qmap->mq_map[i]) 92 - return local_memory_node(cpu_to_node(i)); 92 + return cpu_to_node(i); 93 93 } 94 94 95 95 return NUMA_NO_NODE;
+2 -2
block/blk-mq.c
··· 1664 1664 EXPORT_SYMBOL(blk_mq_run_hw_queue); 1665 1665 1666 1666 /** 1667 - * blk_mq_run_hw_queue - Run all hardware queues in a request queue. 1667 + * blk_mq_run_hw_queues - Run all hardware queues in a request queue. 1668 1668 * @q: Pointer to the request queue to run. 1669 1669 * @async: If we want to run the queue asynchronously. 1670 1670 */ ··· 2743 2743 for (j = 0; j < set->nr_maps; j++) { 2744 2744 hctx = blk_mq_map_queue_type(q, j, i); 2745 2745 if (nr_hw_queues > 1 && hctx->numa_node == NUMA_NO_NODE) 2746 - hctx->numa_node = local_memory_node(cpu_to_node(i)); 2746 + hctx->numa_node = cpu_to_node(i); 2747 2747 } 2748 2748 } 2749 2749 }
+1 -1
drivers/block/nbd.c
··· 802 802 if (likely(!blk_should_fake_timeout(rq->q))) 803 803 blk_mq_complete_request(rq); 804 804 } 805 + nbd_config_put(nbd); 805 806 atomic_dec(&config->recv_threads); 806 807 wake_up(&config->recv_wq); 807 - nbd_config_put(nbd); 808 808 kfree(args); 809 809 } 810 810
+43 -26
drivers/block/null_blk_zoned.c
··· 220 220 } 221 221 } 222 222 223 - static bool null_can_set_active(struct nullb_device *dev) 223 + static blk_status_t null_check_active(struct nullb_device *dev) 224 224 { 225 225 if (!dev->zone_max_active) 226 - return true; 226 + return BLK_STS_OK; 227 227 228 - return dev->nr_zones_exp_open + dev->nr_zones_imp_open + 229 - dev->nr_zones_closed < dev->zone_max_active; 228 + if (dev->nr_zones_exp_open + dev->nr_zones_imp_open + 229 + dev->nr_zones_closed < dev->zone_max_active) 230 + return BLK_STS_OK; 231 + 232 + return BLK_STS_ZONE_ACTIVE_RESOURCE; 230 233 } 231 234 232 - static bool null_can_open(struct nullb_device *dev) 235 + static blk_status_t null_check_open(struct nullb_device *dev) 233 236 { 234 237 if (!dev->zone_max_open) 235 - return true; 238 + return BLK_STS_OK; 236 239 237 240 if (dev->nr_zones_exp_open + dev->nr_zones_imp_open < dev->zone_max_open) 238 - return true; 241 + return BLK_STS_OK; 239 242 240 - if (dev->nr_zones_imp_open && null_can_set_active(dev)) { 241 - null_close_first_imp_zone(dev); 242 - return true; 243 + if (dev->nr_zones_imp_open) { 244 + if (null_check_active(dev) == BLK_STS_OK) { 245 + null_close_first_imp_zone(dev); 246 + return BLK_STS_OK; 247 + } 243 248 } 244 249 245 - return false; 250 + return BLK_STS_ZONE_OPEN_RESOURCE; 246 251 } 247 252 248 253 /* ··· 263 258 * it is not certain that closing an implicit open zone will allow a new zone 264 259 * to be opened, since we might already be at the active limit capacity. 265 260 */ 266 - static bool null_has_zone_resources(struct nullb_device *dev, struct blk_zone *zone) 261 + static blk_status_t null_check_zone_resources(struct nullb_device *dev, struct blk_zone *zone) 267 262 { 263 + blk_status_t ret; 264 + 268 265 switch (zone->cond) { 269 266 case BLK_ZONE_COND_EMPTY: 270 - if (!null_can_set_active(dev)) 271 - return false; 267 + ret = null_check_active(dev); 268 + if (ret != BLK_STS_OK) 269 + return ret; 272 270 fallthrough; 273 271 case BLK_ZONE_COND_CLOSED: 274 - return null_can_open(dev); 272 + return null_check_open(dev); 275 273 default: 276 274 /* Should never be called for other states */ 277 275 WARN_ON(1); 278 - return false; 276 + return BLK_STS_IOERR; 279 277 } 280 278 } 281 279 ··· 301 293 return BLK_STS_IOERR; 302 294 case BLK_ZONE_COND_EMPTY: 303 295 case BLK_ZONE_COND_CLOSED: 304 - if (!null_has_zone_resources(dev, zone)) 305 - return BLK_STS_IOERR; 296 + ret = null_check_zone_resources(dev, zone); 297 + if (ret != BLK_STS_OK) 298 + return ret; 306 299 break; 307 300 case BLK_ZONE_COND_IMP_OPEN: 308 301 case BLK_ZONE_COND_EXP_OPEN: ··· 358 349 359 350 static blk_status_t null_open_zone(struct nullb_device *dev, struct blk_zone *zone) 360 351 { 352 + blk_status_t ret; 353 + 361 354 if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) 362 355 return BLK_STS_IOERR; 363 356 ··· 368 357 /* open operation on exp open is not an error */ 369 358 return BLK_STS_OK; 370 359 case BLK_ZONE_COND_EMPTY: 371 - if (!null_has_zone_resources(dev, zone)) 372 - return BLK_STS_IOERR; 360 + ret = null_check_zone_resources(dev, zone); 361 + if (ret != BLK_STS_OK) 362 + return ret; 373 363 break; 374 364 case BLK_ZONE_COND_IMP_OPEN: 375 365 dev->nr_zones_imp_open--; 376 366 break; 377 367 case BLK_ZONE_COND_CLOSED: 378 - if (!null_has_zone_resources(dev, zone)) 379 - return BLK_STS_IOERR; 368 + ret = null_check_zone_resources(dev, zone); 369 + if (ret != BLK_STS_OK) 370 + return ret; 380 371 dev->nr_zones_closed--; 381 372 break; 382 373 case BLK_ZONE_COND_FULL: ··· 394 381 395 382 static blk_status_t null_finish_zone(struct nullb_device *dev, struct blk_zone *zone) 396 383 { 384 + blk_status_t ret; 385 + 397 386 if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) 398 387 return BLK_STS_IOERR; 399 388 ··· 404 389 /* finish operation on full is not an error */ 405 390 return BLK_STS_OK; 406 391 case BLK_ZONE_COND_EMPTY: 407 - if (!null_has_zone_resources(dev, zone)) 408 - return BLK_STS_IOERR; 392 + ret = null_check_zone_resources(dev, zone); 393 + if (ret != BLK_STS_OK) 394 + return ret; 409 395 break; 410 396 case BLK_ZONE_COND_IMP_OPEN: 411 397 dev->nr_zones_imp_open--; ··· 415 399 dev->nr_zones_exp_open--; 416 400 break; 417 401 case BLK_ZONE_COND_CLOSED: 418 - if (!null_has_zone_resources(dev, zone)) 419 - return BLK_STS_IOERR; 402 + ret = null_check_zone_resources(dev, zone); 403 + if (ret != BLK_STS_OK) 404 + return ret; 420 405 dev->nr_zones_closed--; 421 406 break; 422 407 default:
+8 -11
drivers/block/rnbd/rnbd-clt.c
··· 91 91 dev->max_hw_sectors = sess->max_io_size / SECTOR_SIZE; 92 92 dev->max_segments = BMAX_SEGMENTS; 93 93 94 - dev->max_hw_sectors = min_t(u32, dev->max_hw_sectors, 95 - le32_to_cpu(rsp->max_hw_sectors)); 96 - dev->max_segments = min_t(u16, dev->max_segments, 97 - le16_to_cpu(rsp->max_segments)); 98 - 99 94 return 0; 100 95 } 101 96 ··· 422 427 }; 423 428 424 429 static int send_usr_msg(struct rtrs_clt *rtrs, int dir, 425 - struct rnbd_iu *iu, struct kvec *vec, size_t nr, 430 + struct rnbd_iu *iu, struct kvec *vec, 426 431 size_t len, struct scatterlist *sg, unsigned int sg_len, 427 432 void (*conf)(struct work_struct *work), 428 433 int *errno, enum wait_type wait) ··· 436 441 .conf_fn = msg_conf, 437 442 }; 438 443 err = rtrs_clt_request(dir, &req_ops, rtrs, iu->permit, 439 - vec, nr, len, sg, sg_len); 444 + vec, 1, len, sg, sg_len); 440 445 if (!err && wait) { 441 446 wait_event(iu->comp.wait, iu->comp.errno != INT_MAX); 442 447 *errno = iu->comp.errno; ··· 481 486 msg.device_id = cpu_to_le32(device_id); 482 487 483 488 WARN_ON(!rnbd_clt_get_dev(dev)); 484 - err = send_usr_msg(sess->rtrs, WRITE, iu, &vec, 1, 0, NULL, 0, 489 + err = send_usr_msg(sess->rtrs, WRITE, iu, &vec, 0, NULL, 0, 485 490 msg_close_conf, &errno, wait); 486 491 if (err) { 487 492 rnbd_clt_put_dev(dev); ··· 570 575 571 576 WARN_ON(!rnbd_clt_get_dev(dev)); 572 577 err = send_usr_msg(sess->rtrs, READ, iu, 573 - &vec, 1, sizeof(*rsp), iu->sglist, 1, 578 + &vec, sizeof(*rsp), iu->sglist, 1, 574 579 msg_open_conf, &errno, wait); 575 580 if (err) { 576 581 rnbd_clt_put_dev(dev); ··· 624 629 goto put_iu; 625 630 } 626 631 err = send_usr_msg(sess->rtrs, READ, iu, 627 - &vec, 1, sizeof(*rsp), iu->sglist, 1, 632 + &vec, sizeof(*rsp), iu->sglist, 1, 628 633 msg_sess_info_conf, &errno, wait); 629 634 if (err) { 630 635 rnbd_clt_put_sess(sess); ··· 1509 1514 "map_device: Failed to configure device, err: %d\n", 1510 1515 ret); 1511 1516 mutex_unlock(&dev->lock); 1512 - goto del_dev; 1517 + goto send_close; 1513 1518 } 1514 1519 1515 1520 rnbd_clt_info(dev, ··· 1528 1533 1529 1534 return dev; 1530 1535 1536 + send_close: 1537 + send_msg_close(dev, dev->device_id, WAIT); 1531 1538 del_dev: 1532 1539 delete_dev(dev); 1533 1540 put_dev:
-1
drivers/block/skd_main.c
··· 25 25 #include <linux/dma-mapping.h> 26 26 #include <linux/completion.h> 27 27 #include <linux/scatterlist.h> 28 - #include <linux/version.h> 29 28 #include <linux/err.h> 30 29 #include <linux/aer.h> 31 30 #include <linux/wait.h>
+5 -3
drivers/block/zram/zram_drv.c
··· 1218 1218 static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, 1219 1219 struct bio *bio, bool partial_io) 1220 1220 { 1221 - int ret; 1221 + struct zcomp_strm *zstrm; 1222 1222 unsigned long handle; 1223 1223 unsigned int size; 1224 1224 void *src, *dst; 1225 + int ret; 1225 1226 1226 1227 zram_slot_lock(zram, index); 1227 1228 if (zram_test_flag(zram, index, ZRAM_WB)) { ··· 1253 1252 1254 1253 size = zram_get_obj_size(zram, index); 1255 1254 1255 + if (size != PAGE_SIZE) 1256 + zstrm = zcomp_stream_get(zram->comp); 1257 + 1256 1258 src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO); 1257 1259 if (size == PAGE_SIZE) { 1258 1260 dst = kmap_atomic(page); ··· 1263 1259 kunmap_atomic(dst); 1264 1260 ret = 0; 1265 1261 } else { 1266 - struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp); 1267 - 1268 1262 dst = kmap_atomic(page); 1269 1263 ret = zcomp_decompress(zstrm, src, size, dst); 1270 1264 kunmap_atomic(dst);
+3 -2
drivers/lightnvm/core.c
··· 1311 1311 strlcpy(info->bmname, "gennvm", sizeof(info->bmname)); 1312 1312 i++; 1313 1313 1314 - if (i > 31) { 1315 - pr_err("max 31 devices can be reported.\n"); 1314 + if (i >= ARRAY_SIZE(devices->info)) { 1315 + pr_err("max %zd devices can be reported.\n", 1316 + ARRAY_SIZE(devices->info)); 1316 1317 break; 1317 1318 } 1318 1319 }
+4
drivers/nvme/host/core.c
··· 248 248 return BLK_STS_NEXUS; 249 249 case NVME_SC_HOST_PATH_ERROR: 250 250 return BLK_STS_TRANSPORT; 251 + case NVME_SC_ZONE_TOO_MANY_ACTIVE: 252 + return BLK_STS_ZONE_ACTIVE_RESOURCE; 253 + case NVME_SC_ZONE_TOO_MANY_OPEN: 254 + return BLK_STS_ZONE_OPEN_RESOURCE; 251 255 default: 252 256 return BLK_STS_IOERR; 253 257 }
+116 -66
drivers/nvme/host/fc.c
··· 26 26 }; 27 27 28 28 #define NVME_FC_DEFAULT_DEV_LOSS_TMO 60 /* seconds */ 29 + #define NVME_FC_DEFAULT_RECONNECT_TMO 2 /* delay between reconnects 30 + * when connected and a 31 + * connection failure. 32 + */ 29 33 30 34 struct nvme_fc_queue { 31 35 struct nvme_fc_ctrl *ctrl; ··· 1841 1837 opstate = atomic_xchg(&op->state, FCPOP_STATE_ABORTED); 1842 1838 if (opstate != FCPOP_STATE_ACTIVE) 1843 1839 atomic_set(&op->state, opstate); 1844 - else if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) 1840 + else if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) { 1841 + op->flags |= FCOP_FLAGS_TERMIO; 1845 1842 ctrl->iocnt++; 1843 + } 1846 1844 spin_unlock_irqrestore(&ctrl->lock, flags); 1847 1845 1848 1846 if (opstate != FCPOP_STATE_ACTIVE) ··· 1880 1874 1881 1875 if (opstate == FCPOP_STATE_ABORTED) { 1882 1876 spin_lock_irqsave(&ctrl->lock, flags); 1883 - if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) { 1877 + if (test_bit(FCCTRL_TERMIO, &ctrl->flags) && 1878 + op->flags & FCOP_FLAGS_TERMIO) { 1884 1879 if (!--ctrl->iocnt) 1885 1880 wake_up(&ctrl->ioabort_wait); 1886 1881 } ··· 2321 2314 return 0; 2322 2315 2323 2316 delete_queues: 2324 - for (; i >= 0; i--) 2317 + for (; i > 0; i--) 2325 2318 __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[i], i); 2326 2319 return ret; 2327 2320 } ··· 2440 2433 return; 2441 2434 2442 2435 dev_warn(ctrl->ctrl.device, 2443 - "NVME-FC{%d}: transport association error detected: %s\n", 2436 + "NVME-FC{%d}: transport association event: %s\n", 2444 2437 ctrl->cnum, errmsg); 2445 2438 dev_warn(ctrl->ctrl.device, 2446 2439 "NVME-FC{%d}: resetting controller\n", ctrl->cnum); ··· 2453 2446 { 2454 2447 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 2455 2448 struct nvme_fc_ctrl *ctrl = op->ctrl; 2449 + struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; 2450 + struct nvme_command *sqe = &cmdiu->sqe; 2456 2451 2457 2452 /* 2458 - * we can't individually ABTS an io without affecting the queue, 2459 - * thus killing the queue, and thus the association. 2460 - * So resolve by performing a controller reset, which will stop 2461 - * the host/io stack, terminate the association on the link, 2462 - * and recreate an association on the link. 2453 + * Attempt to abort the offending command. Command completion 2454 + * will detect the aborted io and will fail the connection. 2463 2455 */ 2464 - nvme_fc_error_recovery(ctrl, "io timeout error"); 2456 + dev_info(ctrl->ctrl.device, 2457 + "NVME-FC{%d.%d}: io timeout: opcode %d fctype %d w10/11: " 2458 + "x%08x/x%08x\n", 2459 + ctrl->cnum, op->queue->qnum, sqe->common.opcode, 2460 + sqe->connect.fctype, sqe->common.cdw10, sqe->common.cdw11); 2461 + if (__nvme_fc_abort_op(ctrl, op)) 2462 + nvme_fc_error_recovery(ctrl, "io timeout abort failed"); 2465 2463 2466 2464 /* 2467 2465 * the io abort has been initiated. Have the reset timer ··· 2738 2726 struct nvme_fc_ctrl *ctrl = op->ctrl; 2739 2727 2740 2728 atomic_set(&op->state, FCPOP_STATE_IDLE); 2729 + op->flags &= ~FCOP_FLAGS_TERMIO; 2741 2730 2742 2731 nvme_fc_unmap_data(ctrl, rq, op); 2743 2732 nvme_complete_rq(rq); ··· 2889 2876 if (ret) 2890 2877 goto out_delete_hw_queues; 2891 2878 2892 - if (prior_ioq_cnt != nr_io_queues) 2879 + if (prior_ioq_cnt != nr_io_queues) { 2893 2880 dev_info(ctrl->ctrl.device, 2894 2881 "reconnect: revising io queue count from %d to %d\n", 2895 2882 prior_ioq_cnt, nr_io_queues); 2896 - blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues); 2883 + nvme_wait_freeze(&ctrl->ctrl); 2884 + blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues); 2885 + nvme_unfreeze(&ctrl->ctrl); 2886 + } 2897 2887 2898 2888 return 0; 2899 2889 ··· 3106 3090 return ret; 3107 3091 } 3108 3092 3093 + 3094 + /* 3095 + * This routine runs through all outstanding commands on the association 3096 + * and aborts them. This routine is typically be called by the 3097 + * delete_association routine. It is also called due to an error during 3098 + * reconnect. In that scenario, it is most likely a command that initializes 3099 + * the controller, including fabric Connect commands on io queues, that 3100 + * may have timed out or failed thus the io must be killed for the connect 3101 + * thread to see the error. 3102 + */ 3103 + static void 3104 + __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues) 3105 + { 3106 + /* 3107 + * If io queues are present, stop them and terminate all outstanding 3108 + * ios on them. As FC allocates FC exchange for each io, the 3109 + * transport must contact the LLDD to terminate the exchange, 3110 + * thus releasing the FC exchange. We use blk_mq_tagset_busy_itr() 3111 + * to tell us what io's are busy and invoke a transport routine 3112 + * to kill them with the LLDD. After terminating the exchange 3113 + * the LLDD will call the transport's normal io done path, but it 3114 + * will have an aborted status. The done path will return the 3115 + * io requests back to the block layer as part of normal completions 3116 + * (but with error status). 3117 + */ 3118 + if (ctrl->ctrl.queue_count > 1) { 3119 + nvme_stop_queues(&ctrl->ctrl); 3120 + blk_mq_tagset_busy_iter(&ctrl->tag_set, 3121 + nvme_fc_terminate_exchange, &ctrl->ctrl); 3122 + blk_mq_tagset_wait_completed_request(&ctrl->tag_set); 3123 + if (start_queues) 3124 + nvme_start_queues(&ctrl->ctrl); 3125 + } 3126 + 3127 + /* 3128 + * Other transports, which don't have link-level contexts bound 3129 + * to sqe's, would try to gracefully shutdown the controller by 3130 + * writing the registers for shutdown and polling (call 3131 + * nvme_shutdown_ctrl()). Given a bunch of i/o was potentially 3132 + * just aborted and we will wait on those contexts, and given 3133 + * there was no indication of how live the controlelr is on the 3134 + * link, don't send more io to create more contexts for the 3135 + * shutdown. Let the controller fail via keepalive failure if 3136 + * its still present. 3137 + */ 3138 + 3139 + /* 3140 + * clean up the admin queue. Same thing as above. 3141 + */ 3142 + blk_mq_quiesce_queue(ctrl->ctrl.admin_q); 3143 + blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, 3144 + nvme_fc_terminate_exchange, &ctrl->ctrl); 3145 + blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set); 3146 + } 3147 + 3109 3148 /* 3110 3149 * This routine stops operation of the controller on the host side. 3111 3150 * On the host os stack side: Admin and IO queues are stopped, ··· 3181 3110 ctrl->iocnt = 0; 3182 3111 spin_unlock_irqrestore(&ctrl->lock, flags); 3183 3112 3184 - /* 3185 - * If io queues are present, stop them and terminate all outstanding 3186 - * ios on them. As FC allocates FC exchange for each io, the 3187 - * transport must contact the LLDD to terminate the exchange, 3188 - * thus releasing the FC exchange. We use blk_mq_tagset_busy_itr() 3189 - * to tell us what io's are busy and invoke a transport routine 3190 - * to kill them with the LLDD. After terminating the exchange 3191 - * the LLDD will call the transport's normal io done path, but it 3192 - * will have an aborted status. The done path will return the 3193 - * io requests back to the block layer as part of normal completions 3194 - * (but with error status). 3195 - */ 3196 - if (ctrl->ctrl.queue_count > 1) { 3197 - nvme_stop_queues(&ctrl->ctrl); 3198 - blk_mq_tagset_busy_iter(&ctrl->tag_set, 3199 - nvme_fc_terminate_exchange, &ctrl->ctrl); 3200 - blk_mq_tagset_wait_completed_request(&ctrl->tag_set); 3201 - } 3202 - 3203 - /* 3204 - * Other transports, which don't have link-level contexts bound 3205 - * to sqe's, would try to gracefully shutdown the controller by 3206 - * writing the registers for shutdown and polling (call 3207 - * nvme_shutdown_ctrl()). Given a bunch of i/o was potentially 3208 - * just aborted and we will wait on those contexts, and given 3209 - * there was no indication of how live the controlelr is on the 3210 - * link, don't send more io to create more contexts for the 3211 - * shutdown. Let the controller fail via keepalive failure if 3212 - * its still present. 3213 - */ 3214 - 3215 - /* 3216 - * clean up the admin queue. Same thing as above. 3217 - * use blk_mq_tagset_busy_itr() and the transport routine to 3218 - * terminate the exchanges. 3219 - */ 3220 - blk_mq_quiesce_queue(ctrl->ctrl.admin_q); 3221 - blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, 3222 - nvme_fc_terminate_exchange, &ctrl->ctrl); 3223 - blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set); 3113 + __nvme_fc_abort_outstanding_ios(ctrl, false); 3224 3114 3225 3115 /* kill the aens as they are a separate path */ 3226 3116 nvme_fc_abort_aen_ops(ctrl); ··· 3295 3263 __nvme_fc_terminate_io(struct nvme_fc_ctrl *ctrl) 3296 3264 { 3297 3265 /* 3298 - * if state is connecting - the error occurred as part of a 3299 - * reconnect attempt. The create_association error paths will 3300 - * clean up any outstanding io. 3301 - * 3302 - * if it's a different state - ensure all pending io is 3303 - * terminated. Given this can delay while waiting for the 3304 - * aborted io to return, we recheck adapter state below 3305 - * before changing state. 3266 + * if state is CONNECTING - the error occurred as part of a 3267 + * reconnect attempt. Abort any ios on the association and 3268 + * let the create_association error paths resolve things. 3306 3269 */ 3307 - if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) { 3308 - nvme_stop_keep_alive(&ctrl->ctrl); 3309 - 3310 - /* will block will waiting for io to terminate */ 3311 - nvme_fc_delete_association(ctrl); 3270 + if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) { 3271 + __nvme_fc_abort_outstanding_ios(ctrl, true); 3272 + return; 3312 3273 } 3274 + 3275 + /* 3276 + * For any other state, kill the association. As this routine 3277 + * is a common io abort routine for resetting and such, after 3278 + * the association is terminated, ensure that the state is set 3279 + * to CONNECTING. 3280 + */ 3281 + 3282 + nvme_stop_keep_alive(&ctrl->ctrl); 3283 + 3284 + /* will block will waiting for io to terminate */ 3285 + nvme_fc_delete_association(ctrl); 3313 3286 3314 3287 if (ctrl->ctrl.state != NVME_CTRL_CONNECTING && 3315 3288 !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) ··· 3440 3403 { 3441 3404 struct nvme_fc_ctrl *ctrl; 3442 3405 unsigned long flags; 3443 - int ret, idx; 3406 + int ret, idx, ctrl_loss_tmo; 3444 3407 3445 3408 if (!(rport->remoteport.port_role & 3446 3409 (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) { ··· 3464 3427 if (idx < 0) { 3465 3428 ret = -ENOSPC; 3466 3429 goto out_free_ctrl; 3430 + } 3431 + 3432 + /* 3433 + * if ctrl_loss_tmo is being enforced and the default reconnect delay 3434 + * is being used, change to a shorter reconnect delay for FC. 3435 + */ 3436 + if (opts->max_reconnects != -1 && 3437 + opts->reconnect_delay == NVMF_DEF_RECONNECT_DELAY && 3438 + opts->reconnect_delay > NVME_FC_DEFAULT_RECONNECT_TMO) { 3439 + ctrl_loss_tmo = opts->max_reconnects * opts->reconnect_delay; 3440 + opts->reconnect_delay = NVME_FC_DEFAULT_RECONNECT_TMO; 3441 + opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo, 3442 + opts->reconnect_delay); 3467 3443 } 3468 3444 3469 3445 ctrl->ctrl.opts = opts;
+1 -1
drivers/nvme/host/nvme.h
··· 176 176 177 177 static inline u16 nvme_req_qid(struct request *req) 178 178 { 179 - if (!req->rq_disk) 179 + if (!req->q->queuedata) 180 180 return 0; 181 181 return blk_mq_unique_tag_to_hwq(blk_mq_unique_tag(req)) + 1; 182 182 }
+2
drivers/nvme/host/pci.c
··· 3185 3185 NVME_QUIRK_IGNORE_DEV_SUBNQN, }, 3186 3186 { PCI_DEVICE(0x1c5c, 0x1504), /* SK Hynix PC400 */ 3187 3187 .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, 3188 + { PCI_DEVICE(0x15b7, 0x2001), /* Sandisk Skyhawk */ 3189 + .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, 3188 3190 { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001), 3189 3191 .driver_data = NVME_QUIRK_SINGLE_VECTOR }, 3190 3192 { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
+3 -3
drivers/nvme/host/rdma.c
··· 1730 1730 req->result = cqe->result; 1731 1731 1732 1732 if (wc->wc_flags & IB_WC_WITH_INVALIDATE) { 1733 - if (unlikely(wc->ex.invalidate_rkey != req->mr->rkey)) { 1733 + if (unlikely(!req->mr || 1734 + wc->ex.invalidate_rkey != req->mr->rkey)) { 1734 1735 dev_err(queue->ctrl->ctrl.device, 1735 1736 "Bogus remote invalidation for rkey %#x\n", 1736 - req->mr->rkey); 1737 + req->mr ? req->mr->rkey : 0); 1737 1738 nvme_rdma_error_recovery(queue->ctrl); 1738 1739 } 1739 1740 } else if (req->mr) { ··· 1927 1926 complete(&queue->cm_done); 1928 1927 return 0; 1929 1928 case RDMA_CM_EVENT_REJECTED: 1930 - nvme_rdma_destroy_queue_ib(queue); 1931 1929 cm_error = nvme_rdma_conn_rejected(queue, ev); 1932 1930 break; 1933 1931 case RDMA_CM_EVENT_ROUTE_ERROR:
+2 -1
drivers/nvme/target/core.c
··· 1126 1126 * in case a host died before it enabled the controller. Hence, simply 1127 1127 * reset the keep alive timer when the controller is enabled. 1128 1128 */ 1129 - mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ); 1129 + if (ctrl->kato) 1130 + mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ); 1130 1131 } 1131 1132 1132 1133 static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl)
+13 -5
drivers/nvme/target/passthru.c
··· 26 26 struct nvme_ctrl *pctrl = ctrl->subsys->passthru_ctrl; 27 27 u16 status = NVME_SC_SUCCESS; 28 28 struct nvme_id_ctrl *id; 29 - u32 max_hw_sectors; 29 + int max_hw_sectors; 30 30 int page_shift; 31 31 32 32 id = kzalloc(sizeof(*id), GFP_KERNEL); ··· 47 47 */ 48 48 max_hw_sectors = min_not_zero(pctrl->max_segments << (PAGE_SHIFT - 9), 49 49 pctrl->max_hw_sectors); 50 + 51 + /* 52 + * nvmet_passthru_map_sg is limitted to using a single bio so limit 53 + * the mdts based on BIO_MAX_PAGES as well 54 + */ 55 + max_hw_sectors = min_not_zero(BIO_MAX_PAGES << (PAGE_SHIFT - 9), 56 + max_hw_sectors); 50 57 51 58 page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12; 52 59 ··· 187 180 188 181 static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq) 189 182 { 190 - int sg_cnt = req->sg_cnt; 191 183 struct scatterlist *sg; 192 184 int op_flags = 0; 193 185 struct bio *bio; 194 186 int i, ret; 187 + 188 + if (req->sg_cnt > BIO_MAX_PAGES) 189 + return -EINVAL; 195 190 196 191 if (req->cmd->common.opcode == nvme_cmd_flush) 197 192 op_flags = REQ_FUA; 198 193 else if (nvme_is_write(req->cmd)) 199 194 op_flags = REQ_SYNC | REQ_IDLE; 200 195 201 - bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES)); 196 + bio = bio_alloc(GFP_KERNEL, req->sg_cnt); 202 197 bio->bi_end_io = bio_put; 203 198 bio->bi_opf = req_op(rq) | op_flags; 204 199 ··· 210 201 bio_put(bio); 211 202 return -EINVAL; 212 203 } 213 - sg_cnt--; 214 204 } 215 205 216 206 ret = blk_rq_append_bio(rq, &bio); ··· 244 236 q = ns->queue; 245 237 } 246 238 247 - rq = nvme_alloc_request(q, req->cmd, BLK_MQ_REQ_NOWAIT, NVME_QID_ANY); 239 + rq = nvme_alloc_request(q, req->cmd, 0, NVME_QID_ANY); 248 240 if (IS_ERR(rq)) { 249 241 status = NVME_SC_INTERNAL; 250 242 goto out_put_ns;
+9
drivers/scsi/scsi_lib.c
··· 777 777 /* See SSC3rXX or current. */ 778 778 action = ACTION_FAIL; 779 779 break; 780 + case DATA_PROTECT: 781 + action = ACTION_FAIL; 782 + if ((sshdr.asc == 0x0C && sshdr.ascq == 0x12) || 783 + (sshdr.asc == 0x55 && 784 + (sshdr.ascq == 0x0E || sshdr.ascq == 0x0F))) { 785 + /* Insufficient zone resources */ 786 + blk_stat = BLK_STS_ZONE_OPEN_RESOURCE; 787 + } 788 + break; 780 789 default: 781 790 action = ACTION_FAIL; 782 791 break;
+18
include/linux/blk_types.h
··· 104 104 */ 105 105 #define BLK_STS_ZONE_RESOURCE ((__force blk_status_t)14) 106 106 107 + /* 108 + * BLK_STS_ZONE_OPEN_RESOURCE is returned from the driver in the completion 109 + * path if the device returns a status indicating that too many zone resources 110 + * are currently open. The same command should be successful if resubmitted 111 + * after the number of open zones decreases below the device's limits, which is 112 + * reported in the request_queue's max_open_zones. 113 + */ 114 + #define BLK_STS_ZONE_OPEN_RESOURCE ((__force blk_status_t)15) 115 + 116 + /* 117 + * BLK_STS_ZONE_ACTIVE_RESOURCE is returned from the driver in the completion 118 + * path if the device returns a status indicating that too many zone resources 119 + * are currently active. The same command should be successful if resubmitted 120 + * after the number of active zones decreases below the device's limits, which 121 + * is reported in the request_queue's max_active_zones. 122 + */ 123 + #define BLK_STS_ZONE_ACTIVE_RESOURCE ((__force blk_status_t)16) 124 + 107 125 /** 108 126 * blk_path_error - returns true if error may be path related 109 127 * @error: status the request was completed with
-6
include/linux/iocontext.h
··· 106 106 107 107 unsigned short ioprio; 108 108 109 - /* 110 - * For request batching 111 - */ 112 - int nr_batch_requests; /* Number of requests left in the batch */ 113 - unsigned long last_waited; /* Time last woken after wait for request */ 114 - 115 109 struct radix_tree_root icq_tree; 116 110 struct io_cq __rcu *icq_hint; 117 111 struct hlist_head icq_list;
+1 -1
lib/scatterlist.c
··· 595 595 elem_len = min_t(u64, length, PAGE_SIZE << order); 596 596 page = alloc_pages(gfp, order); 597 597 if (!page) { 598 - sgl_free(sgl); 598 + sgl_free_order(sgl, order); 599 599 return NULL; 600 600 } 601 601