Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-5.1/block-post-20190315' of git://git.kernel.dk/linux-block

Pull more block layer changes from Jens Axboe:
"This is a collection of both stragglers, and fixes that came in after
I finalized the initial pull. This contains:

- An MD pull request from Song, with a few minor fixes

- Set of NVMe patches via Christoph

- Pull request from Konrad, with a few fixes for xen/blkback

- pblk fix IO calculation fix (Javier)

- Segment calculation fix for pass-through (Ming)

- Fallthrough annotation for blkcg (Mathieu)"

* tag 'for-5.1/block-post-20190315' of git://git.kernel.dk/linux-block: (25 commits)
blkcg: annotate implicit fall through
nvme-tcp: support C2HData with SUCCESS flag
nvmet: ignore EOPNOTSUPP for discard
nvme: add proper write zeroes setup for the multipath device
nvme: add proper discard setup for the multipath device
nvme: remove nvme_ns_config_oncs
nvme: disable Write Zeroes for qemu controllers
nvmet-fc: bring Disconnect into compliance with FC-NVME spec
nvmet-fc: fix issues with targetport assoc_list list walking
nvme-fc: reject reconnect if io queue count is reduced to zero
nvme-fc: fix numa_node when dev is null
nvme-fc: use nr_phys_segments to determine existence of sgl
nvme-loop: init nvmet_ctrl fatal_err_work when allocate
nvme: update comment to make the code easier to read
nvme: put ns_head ref if namespace fails allocation
nvme-trace: fix cdw10 buffer overrun
nvme: don't warn on block content change effects
nvme: add get-feature to admin cmds tracer
md: Fix failed allocation of md_register_thread
It's wrong to add len to sector_nr in raid10 reshape twice
...

+261 -130
+3
Documentation/admin-guide/md.rst
··· 756 756 The cache mode for raid5. raid5 could include an extra disk for 757 757 caching. The mode can be "write-throuth" and "write-back". The 758 758 default is "write-through". 759 + 760 + ppl_write_hint 761 + NVMe stream ID to be set for each PPL write request.
+8 -7
block/blk-merge.c
··· 180 180 */ 181 181 static bool bvec_split_segs(struct request_queue *q, struct bio_vec *bv, 182 182 unsigned *nsegs, unsigned *last_seg_size, 183 - unsigned *front_seg_size, unsigned *sectors) 183 + unsigned *front_seg_size, unsigned *sectors, unsigned max_segs) 184 184 { 185 185 unsigned len = bv->bv_len; 186 186 unsigned total_len = 0; ··· 190 190 * Multi-page bvec may be too big to hold in one segment, so the 191 191 * current bvec has to be splitted as multiple segments. 192 192 */ 193 - while (len && new_nsegs + *nsegs < queue_max_segments(q)) { 193 + while (len && new_nsegs + *nsegs < max_segs) { 194 194 seg_size = get_max_segment_size(q, bv->bv_offset + total_len); 195 195 seg_size = min(seg_size, len); 196 196 ··· 240 240 bool do_split = true; 241 241 struct bio *new = NULL; 242 242 const unsigned max_sectors = get_max_io_size(q, bio); 243 + const unsigned max_segs = queue_max_segments(q); 243 244 244 245 bio_for_each_bvec(bv, bio, iter) { 245 246 /* ··· 255 254 * Consider this a new segment if we're splitting in 256 255 * the middle of this vector. 257 256 */ 258 - if (nsegs < queue_max_segments(q) && 257 + if (nsegs < max_segs && 259 258 sectors < max_sectors) { 260 259 /* split in the middle of bvec */ 261 260 bv.bv_len = (max_sectors - sectors) << 9; 262 261 bvec_split_segs(q, &bv, &nsegs, 263 262 &seg_size, 264 263 &front_seg_size, 265 - &sectors); 264 + &sectors, max_segs); 266 265 } 267 266 goto split; 268 267 } ··· 284 283 continue; 285 284 } 286 285 new_segment: 287 - if (nsegs == queue_max_segments(q)) 286 + if (nsegs == max_segs) 288 287 goto split; 289 288 290 289 bvprv = bv; ··· 297 296 if (nsegs == 1 && seg_size > front_seg_size) 298 297 front_seg_size = seg_size; 299 298 } else if (bvec_split_segs(q, &bv, &nsegs, &seg_size, 300 - &front_seg_size, &sectors)) { 299 + &front_seg_size, &sectors, max_segs)) { 301 300 goto split; 302 301 } 303 302 } ··· 416 415 bvprv = bv; 417 416 prev = 1; 418 417 bvec_split_segs(q, &bv, &nr_phys_segs, &seg_size, 419 - &front_seg_size, NULL); 418 + &front_seg_size, NULL, UINT_MAX); 420 419 } 421 420 bbio = bio; 422 421 }
+59 -44
drivers/block/xen-blkback/xenbus.c
··· 926 926 int err, i, j; 927 927 struct xen_blkif *blkif = ring->blkif; 928 928 struct xenbus_device *dev = blkif->be->dev; 929 - unsigned int ring_page_order, nr_grefs, evtchn; 929 + unsigned int nr_grefs, evtchn; 930 930 931 931 err = xenbus_scanf(XBT_NIL, dir, "event-channel", "%u", 932 932 &evtchn); ··· 936 936 return err; 937 937 } 938 938 939 - err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u", 940 - &ring_page_order); 939 + nr_grefs = blkif->nr_ring_pages; 940 + 941 + if (unlikely(!nr_grefs)) { 942 + WARN_ON(true); 943 + return -EINVAL; 944 + } 945 + 946 + for (i = 0; i < nr_grefs; i++) { 947 + char ring_ref_name[RINGREF_NAME_LEN]; 948 + 949 + snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i); 950 + err = xenbus_scanf(XBT_NIL, dir, ring_ref_name, 951 + "%u", &ring_ref[i]); 952 + 953 + if (err != 1) { 954 + if (nr_grefs == 1) 955 + break; 956 + 957 + err = -EINVAL; 958 + xenbus_dev_fatal(dev, err, "reading %s/%s", 959 + dir, ring_ref_name); 960 + return err; 961 + } 962 + } 963 + 941 964 if (err != 1) { 942 - err = xenbus_scanf(XBT_NIL, dir, "ring-ref", "%u", &ring_ref[0]); 965 + WARN_ON(nr_grefs != 1); 966 + 967 + err = xenbus_scanf(XBT_NIL, dir, "ring-ref", "%u", 968 + &ring_ref[0]); 943 969 if (err != 1) { 944 970 err = -EINVAL; 945 971 xenbus_dev_fatal(dev, err, "reading %s/ring-ref", dir); 946 972 return err; 947 973 } 948 - nr_grefs = 1; 949 - } else { 950 - unsigned int i; 951 - 952 - if (ring_page_order > xen_blkif_max_ring_order) { 953 - err = -EINVAL; 954 - xenbus_dev_fatal(dev, err, "%s/request %d ring page order exceed max:%d", 955 - dir, ring_page_order, 956 - xen_blkif_max_ring_order); 957 - return err; 958 - } 959 - 960 - nr_grefs = 1 << ring_page_order; 961 - for (i = 0; i < nr_grefs; i++) { 962 - char ring_ref_name[RINGREF_NAME_LEN]; 963 - 964 - snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i); 965 - err = xenbus_scanf(XBT_NIL, dir, ring_ref_name, 966 - "%u", &ring_ref[i]); 967 - if (err != 1) { 968 - err = -EINVAL; 969 - xenbus_dev_fatal(dev, err, "reading %s/%s", 970 - dir, ring_ref_name); 971 - return err; 972 - } 973 - } 974 974 } 975 - blkif->nr_ring_pages = nr_grefs; 976 975 977 976 for (i = 0; i < nr_grefs * XEN_BLKIF_REQS_PER_PAGE; i++) { 978 977 req = kzalloc(sizeof(*req), GFP_KERNEL); ··· 1022 1023 static int connect_ring(struct backend_info *be) 1023 1024 { 1024 1025 struct xenbus_device *dev = be->dev; 1026 + struct xen_blkif *blkif = be->blkif; 1025 1027 unsigned int pers_grants; 1026 1028 char protocol[64] = ""; 1027 1029 int err, i; ··· 1030 1030 size_t xspathsize; 1031 1031 const size_t xenstore_path_ext_size = 11; /* sufficient for "/queue-NNN" */ 1032 1032 unsigned int requested_num_queues = 0; 1033 + unsigned int ring_page_order; 1033 1034 1034 1035 pr_debug("%s %s\n", __func__, dev->otherend); 1035 1036 1036 - be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT; 1037 + blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT; 1037 1038 err = xenbus_scanf(XBT_NIL, dev->otherend, "protocol", 1038 1039 "%63s", protocol); 1039 1040 if (err <= 0) 1040 1041 strcpy(protocol, "unspecified, assuming default"); 1041 1042 else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE)) 1042 - be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; 1043 + blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; 1043 1044 else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32)) 1044 - be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32; 1045 + blkif->blk_protocol = BLKIF_PROTOCOL_X86_32; 1045 1046 else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64)) 1046 - be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64; 1047 + blkif->blk_protocol = BLKIF_PROTOCOL_X86_64; 1047 1048 else { 1048 1049 xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); 1049 1050 return -ENOSYS; 1050 1051 } 1051 1052 pers_grants = xenbus_read_unsigned(dev->otherend, "feature-persistent", 1052 1053 0); 1053 - be->blkif->vbd.feature_gnt_persistent = pers_grants; 1054 - be->blkif->vbd.overflow_max_grants = 0; 1054 + blkif->vbd.feature_gnt_persistent = pers_grants; 1055 + blkif->vbd.overflow_max_grants = 0; 1055 1056 1056 1057 /* 1057 1058 * Read the number of hardware queues from frontend. ··· 1068 1067 requested_num_queues, xenblk_max_queues); 1069 1068 return -ENOSYS; 1070 1069 } 1071 - be->blkif->nr_rings = requested_num_queues; 1072 - if (xen_blkif_alloc_rings(be->blkif)) 1070 + blkif->nr_rings = requested_num_queues; 1071 + if (xen_blkif_alloc_rings(blkif)) 1073 1072 return -ENOMEM; 1074 1073 1075 1074 pr_info("%s: using %d queues, protocol %d (%s) %s\n", dev->nodename, 1076 - be->blkif->nr_rings, be->blkif->blk_protocol, protocol, 1075 + blkif->nr_rings, blkif->blk_protocol, protocol, 1077 1076 pers_grants ? "persistent grants" : ""); 1078 1077 1079 - if (be->blkif->nr_rings == 1) 1080 - return read_per_ring_refs(&be->blkif->rings[0], dev->otherend); 1078 + ring_page_order = xenbus_read_unsigned(dev->otherend, 1079 + "ring-page-order", 0); 1080 + 1081 + if (ring_page_order > xen_blkif_max_ring_order) { 1082 + err = -EINVAL; 1083 + xenbus_dev_fatal(dev, err, 1084 + "requested ring page order %d exceed max:%d", 1085 + ring_page_order, 1086 + xen_blkif_max_ring_order); 1087 + return err; 1088 + } 1089 + 1090 + blkif->nr_ring_pages = 1 << ring_page_order; 1091 + 1092 + if (blkif->nr_rings == 1) 1093 + return read_per_ring_refs(&blkif->rings[0], dev->otherend); 1081 1094 else { 1082 1095 xspathsize = strlen(dev->otherend) + xenstore_path_ext_size; 1083 1096 xspath = kmalloc(xspathsize, GFP_KERNEL); ··· 1100 1085 return -ENOMEM; 1101 1086 } 1102 1087 1103 - for (i = 0; i < be->blkif->nr_rings; i++) { 1088 + for (i = 0; i < blkif->nr_rings; i++) { 1104 1089 memset(xspath, 0, xspathsize); 1105 1090 snprintf(xspath, xspathsize, "%s/queue-%u", dev->otherend, i); 1106 - err = read_per_ring_refs(&be->blkif->rings[i], xspath); 1091 + err = read_per_ring_refs(&blkif->rings[i], xspath); 1107 1092 if (err) { 1108 1093 kfree(xspath); 1109 1094 return err;
+6 -1
drivers/lightnvm/pblk-rl.c
··· 233 233 /* To start with, all buffer is available to user I/O writers */ 234 234 rl->rb_budget = budget; 235 235 rl->rb_user_max = budget; 236 - rl->rb_max_io = threshold ? (budget - threshold) : (budget - 1); 237 236 rl->rb_gc_max = 0; 238 237 rl->rb_state = PBLK_RL_HIGH; 238 + 239 + /* Maximize I/O size and ansure that back threshold is respected */ 240 + if (threshold) 241 + rl->rb_max_io = budget - pblk->min_write_pgs_data - threshold; 242 + else 243 + rl->rb_max_io = budget - pblk->min_write_pgs_data - 1; 239 244 240 245 atomic_set(&rl->rb_user_cnt, 0); 241 246 atomic_set(&rl->rb_gc_cnt, 0);
+2 -1
drivers/md/raid10.c
··· 3939 3939 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); 3940 3940 mddev->sync_thread = md_register_thread(md_do_sync, mddev, 3941 3941 "reshape"); 3942 + if (!mddev->sync_thread) 3943 + goto out_free_conf; 3942 3944 } 3943 3945 3944 3946 return 0; ··· 4672 4670 atomic_inc(&r10_bio->remaining); 4673 4671 read_bio->bi_next = NULL; 4674 4672 generic_make_request(read_bio); 4675 - sector_nr += nr_sectors; 4676 4673 sectors_done += nr_sectors; 4677 4674 if (sector_nr <= last) 4678 4675 goto read_more;
+1
drivers/md/raid5-log.h
··· 45 45 extern int ppl_modify_log(struct r5conf *conf, struct md_rdev *rdev, bool add); 46 46 extern void ppl_quiesce(struct r5conf *conf, int quiesce); 47 47 extern int ppl_handle_flush_request(struct r5l_log *log, struct bio *bio); 48 + extern struct md_sysfs_entry ppl_write_hint; 48 49 49 50 static inline bool raid5_has_log(struct r5conf *conf) 50 51 {
+63
drivers/md/raid5-ppl.c
··· 20 20 #include <linux/raid/md_p.h> 21 21 #include "md.h" 22 22 #include "raid5.h" 23 + #include "raid5-log.h" 23 24 24 25 /* 25 26 * PPL consists of a 4KB header (struct ppl_header) and at least 128KB for ··· 116 115 /* stripes to retry if failed to allocate io_unit */ 117 116 struct list_head no_mem_stripes; 118 117 spinlock_t no_mem_stripes_lock; 118 + 119 + unsigned short write_hint; 119 120 }; 120 121 121 122 struct ppl_log { ··· 477 474 bio_set_dev(bio, log->rdev->bdev); 478 475 bio->bi_iter.bi_sector = log->next_io_sector; 479 476 bio_add_page(bio, io->header_page, PAGE_SIZE, 0); 477 + bio->bi_write_hint = ppl_conf->write_hint; 480 478 481 479 pr_debug("%s: log->current_io_sector: %llu\n", __func__, 482 480 (unsigned long long)log->next_io_sector); ··· 507 503 bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_PAGES, 508 504 &ppl_conf->bs); 509 505 bio->bi_opf = prev->bi_opf; 506 + bio->bi_write_hint = prev->bi_write_hint; 510 507 bio_copy_dev(bio, prev); 511 508 bio->bi_iter.bi_sector = bio_end_sector(prev); 512 509 bio_add_page(bio, sh->ppl_page, PAGE_SIZE, 0); ··· 1412 1407 atomic64_set(&ppl_conf->seq, 0); 1413 1408 INIT_LIST_HEAD(&ppl_conf->no_mem_stripes); 1414 1409 spin_lock_init(&ppl_conf->no_mem_stripes_lock); 1410 + ppl_conf->write_hint = RWF_WRITE_LIFE_NOT_SET; 1415 1411 1416 1412 if (!mddev->external) { 1417 1413 ppl_conf->signature = ~crc32c_le(~0, mddev->uuid, sizeof(mddev->uuid)); ··· 1507 1501 1508 1502 return ret; 1509 1503 } 1504 + 1505 + static ssize_t 1506 + ppl_write_hint_show(struct mddev *mddev, char *buf) 1507 + { 1508 + size_t ret = 0; 1509 + struct r5conf *conf; 1510 + struct ppl_conf *ppl_conf = NULL; 1511 + 1512 + spin_lock(&mddev->lock); 1513 + conf = mddev->private; 1514 + if (conf && raid5_has_ppl(conf)) 1515 + ppl_conf = conf->log_private; 1516 + ret = sprintf(buf, "%d\n", ppl_conf ? ppl_conf->write_hint : 0); 1517 + spin_unlock(&mddev->lock); 1518 + 1519 + return ret; 1520 + } 1521 + 1522 + static ssize_t 1523 + ppl_write_hint_store(struct mddev *mddev, const char *page, size_t len) 1524 + { 1525 + struct r5conf *conf; 1526 + struct ppl_conf *ppl_conf; 1527 + int err = 0; 1528 + unsigned short new; 1529 + 1530 + if (len >= PAGE_SIZE) 1531 + return -EINVAL; 1532 + if (kstrtou16(page, 10, &new)) 1533 + return -EINVAL; 1534 + 1535 + err = mddev_lock(mddev); 1536 + if (err) 1537 + return err; 1538 + 1539 + conf = mddev->private; 1540 + if (!conf) { 1541 + err = -ENODEV; 1542 + } else if (raid5_has_ppl(conf)) { 1543 + ppl_conf = conf->log_private; 1544 + if (!ppl_conf) 1545 + err = -EINVAL; 1546 + else 1547 + ppl_conf->write_hint = new; 1548 + } else { 1549 + err = -EINVAL; 1550 + } 1551 + 1552 + mddev_unlock(mddev); 1553 + 1554 + return err ?: len; 1555 + } 1556 + 1557 + struct md_sysfs_entry 1558 + ppl_write_hint = __ATTR(ppl_write_hint, S_IRUGO | S_IWUSR, 1559 + ppl_write_hint_show, 1560 + ppl_write_hint_store);
+3
drivers/md/raid5.c
··· 6650 6650 &raid5_skip_copy.attr, 6651 6651 &raid5_rmw_level.attr, 6652 6652 &r5c_journal_mode.attr, 6653 + &ppl_write_hint.attr, 6653 6654 NULL, 6654 6655 }; 6655 6656 static struct attribute_group raid5_attrs_group = { ··· 7394 7393 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); 7395 7394 mddev->sync_thread = md_register_thread(md_do_sync, mddev, 7396 7395 "reshape"); 7396 + if (!mddev->sync_thread) 7397 + goto abort; 7397 7398 } 7398 7399 7399 7400 /* Ok, everything is just fine now */
+13 -15
drivers/nvme/host/core.c
··· 179 179 int ret = 0; 180 180 181 181 /* 182 - * Keep a reference until the work is flushed since ->delete_ctrl 183 - * can free the controller. 182 + * Keep a reference until nvme_do_delete_ctrl() complete, 183 + * since ->delete_ctrl can free the controller. 184 184 */ 185 185 nvme_get_ctrl(ctrl); 186 186 if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING)) ··· 1250 1250 if (ns) { 1251 1251 if (ctrl->effects) 1252 1252 effects = le32_to_cpu(ctrl->effects->iocs[opcode]); 1253 - if (effects & ~NVME_CMD_EFFECTS_CSUPP) 1253 + if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC)) 1254 1254 dev_warn(ctrl->device, 1255 1255 "IO command:%02x has unhandled effects:%08x\n", 1256 1256 opcode, effects); ··· 1495 1495 blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size)); 1496 1496 } 1497 1497 1498 - static void nvme_config_discard(struct nvme_ns *ns) 1498 + static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) 1499 1499 { 1500 1500 struct nvme_ctrl *ctrl = ns->ctrl; 1501 - struct request_queue *queue = ns->queue; 1501 + struct request_queue *queue = disk->queue; 1502 1502 u32 size = queue_logical_block_size(queue); 1503 1503 1504 1504 if (!(ctrl->oncs & NVME_CTRL_ONCS_DSM)) { ··· 1526 1526 blk_queue_max_write_zeroes_sectors(queue, UINT_MAX); 1527 1527 } 1528 1528 1529 - static inline void nvme_config_write_zeroes(struct nvme_ns *ns) 1529 + static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns) 1530 1530 { 1531 1531 u32 max_sectors; 1532 1532 unsigned short bs = 1 << ns->lba_shift; 1533 1533 1534 - if (!(ns->ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES)) 1534 + if (!(ns->ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) || 1535 + (ns->ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES)) 1535 1536 return; 1536 1537 /* 1537 1538 * Even though NVMe spec explicitly states that MDTS is not ··· 1549 1548 else 1550 1549 max_sectors = ((u32)(ns->ctrl->max_hw_sectors + 1) * bs) >> 9; 1551 1550 1552 - blk_queue_max_write_zeroes_sectors(ns->queue, max_sectors); 1553 - } 1554 - 1555 - static inline void nvme_ns_config_oncs(struct nvme_ns *ns) 1556 - { 1557 - nvme_config_discard(ns); 1558 - nvme_config_write_zeroes(ns); 1551 + blk_queue_max_write_zeroes_sectors(disk->queue, max_sectors); 1559 1552 } 1560 1553 1561 1554 static void nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid, ··· 1605 1610 capacity = 0; 1606 1611 1607 1612 set_capacity(disk, capacity); 1608 - nvme_ns_config_oncs(ns); 1613 + 1614 + nvme_config_discard(disk, ns); 1615 + nvme_config_write_zeroes(disk, ns); 1609 1616 1610 1617 if (id->nsattr & (1 << 0)) 1611 1618 set_disk_ro(disk, true); ··· 3301 3304 mutex_lock(&ctrl->subsys->lock); 3302 3305 list_del_rcu(&ns->siblings); 3303 3306 mutex_unlock(&ctrl->subsys->lock); 3307 + nvme_put_ns_head(ns->head); 3304 3308 out_free_id: 3305 3309 kfree(id); 3306 3310 out_free_queue:
+31 -5
drivers/nvme/host/fc.c
··· 2107 2107 2108 2108 freq->sg_cnt = 0; 2109 2109 2110 - if (!blk_rq_payload_bytes(rq)) 2110 + if (!blk_rq_nr_phys_segments(rq)) 2111 2111 return 0; 2112 2112 2113 2113 freq->sg_table.sgl = freq->first_sgl; ··· 2304 2304 if (ret) 2305 2305 return ret; 2306 2306 2307 - data_len = blk_rq_payload_bytes(rq); 2308 - if (data_len) 2307 + /* 2308 + * nvme core doesn't quite treat the rq opaquely. Commands such 2309 + * as WRITE ZEROES will return a non-zero rq payload_bytes yet 2310 + * there is no actual payload to be transferred. 2311 + * To get it right, key data transmission on there being 1 or 2312 + * more physical segments in the sg list. If there is no 2313 + * physical segments, there is no payload. 2314 + */ 2315 + if (blk_rq_nr_phys_segments(rq)) { 2316 + data_len = blk_rq_payload_bytes(rq); 2309 2317 io_dir = ((rq_data_dir(rq) == WRITE) ? 2310 2318 NVMEFC_FCP_WRITE : NVMEFC_FCP_READ); 2311 - else 2319 + } else { 2320 + data_len = 0; 2312 2321 io_dir = NVMEFC_FCP_NODATA; 2322 + } 2323 + 2313 2324 2314 2325 return nvme_fc_start_fcp_op(ctrl, queue, op, data_len, io_dir); 2315 2326 } ··· 2475 2464 nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl) 2476 2465 { 2477 2466 struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; 2467 + u32 prior_ioq_cnt = ctrl->ctrl.queue_count - 1; 2478 2468 unsigned int nr_io_queues; 2479 2469 int ret; 2480 2470 ··· 2486 2474 dev_info(ctrl->ctrl.device, 2487 2475 "set_queue_count failed: %d\n", ret); 2488 2476 return ret; 2477 + } 2478 + 2479 + if (!nr_io_queues && prior_ioq_cnt) { 2480 + dev_info(ctrl->ctrl.device, 2481 + "Fail Reconnect: At least 1 io queue " 2482 + "required (was %d)\n", prior_ioq_cnt); 2483 + return -ENOSPC; 2489 2484 } 2490 2485 2491 2486 ctrl->ctrl.queue_count = nr_io_queues + 1; ··· 2508 2489 if (ret) 2509 2490 goto out_delete_hw_queues; 2510 2491 2492 + if (prior_ioq_cnt != nr_io_queues) 2493 + dev_info(ctrl->ctrl.device, 2494 + "reconnect: revising io queue count from %d to %d\n", 2495 + prior_ioq_cnt, nr_io_queues); 2511 2496 blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues); 2512 2497 2513 2498 return 0; ··· 3029 3006 3030 3007 ctrl->ctrl.opts = opts; 3031 3008 ctrl->ctrl.nr_reconnects = 0; 3032 - ctrl->ctrl.numa_node = dev_to_node(lport->dev); 3009 + if (lport->dev) 3010 + ctrl->ctrl.numa_node = dev_to_node(lport->dev); 3011 + else 3012 + ctrl->ctrl.numa_node = NUMA_NO_NODE; 3033 3013 INIT_LIST_HEAD(&ctrl->ctrl_list); 3034 3014 ctrl->lport = lport; 3035 3015 ctrl->rport = rport;
+5
drivers/nvme/host/nvme.h
··· 87 87 * Ignore device provided subnqn. 88 88 */ 89 89 NVME_QUIRK_IGNORE_DEV_SUBNQN = (1 << 8), 90 + 91 + /* 92 + * Broken Write Zeroes. 93 + */ 94 + NVME_QUIRK_DISABLE_WRITE_ZEROES = (1 << 9), 90 95 }; 91 96 92 97 /*
+2 -1
drivers/nvme/host/pci.c
··· 2937 2937 { PCI_VDEVICE(INTEL, 0xf1a6), /* Intel 760p/Pro 7600p */ 2938 2938 .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, 2939 2939 { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */ 2940 - .driver_data = NVME_QUIRK_IDENTIFY_CNS, }, 2940 + .driver_data = NVME_QUIRK_IDENTIFY_CNS | 2941 + NVME_QUIRK_DISABLE_WRITE_ZEROES, }, 2941 2942 { PCI_DEVICE(0x1bb1, 0x0100), /* Seagate Nytro Flash Storage */ 2942 2943 .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, 2943 2944 { PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */
+28 -4
drivers/nvme/host/tcp.c
··· 463 463 464 464 queue->data_remaining = le32_to_cpu(pdu->data_length); 465 465 466 + if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS && 467 + unlikely(!(pdu->hdr.flags & NVME_TCP_F_DATA_LAST))) { 468 + dev_err(queue->ctrl->ctrl.device, 469 + "queue %d tag %#x SUCCESS set but not last PDU\n", 470 + nvme_tcp_queue_id(queue), rq->tag); 471 + nvme_tcp_error_recovery(&queue->ctrl->ctrl); 472 + return -EPROTO; 473 + } 474 + 466 475 return 0; 467 476 468 477 } ··· 627 618 return ret; 628 619 } 629 620 621 + static inline void nvme_tcp_end_request(struct request *rq, __le16 status) 622 + { 623 + union nvme_result res = {}; 624 + 625 + nvme_end_request(rq, cpu_to_le16(status << 1), res); 626 + } 627 + 628 + 630 629 static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb, 631 630 unsigned int *offset, size_t *len) 632 631 { ··· 702 685 nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst); 703 686 queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH; 704 687 } else { 688 + if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) 689 + nvme_tcp_end_request(rq, NVME_SC_SUCCESS); 705 690 nvme_tcp_init_recv_ctx(queue); 706 691 } 707 692 } ··· 714 695 static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue, 715 696 struct sk_buff *skb, unsigned int *offset, size_t *len) 716 697 { 698 + struct nvme_tcp_data_pdu *pdu = (void *)queue->pdu; 717 699 char *ddgst = (char *)&queue->recv_ddgst; 718 700 size_t recv_len = min_t(size_t, *len, queue->ddgst_remaining); 719 701 off_t off = NVME_TCP_DIGEST_LENGTH - queue->ddgst_remaining; ··· 736 716 le32_to_cpu(queue->recv_ddgst), 737 717 le32_to_cpu(queue->exp_ddgst)); 738 718 return -EIO; 719 + } 720 + 721 + if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) { 722 + struct request *rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), 723 + pdu->command_id); 724 + 725 + nvme_tcp_end_request(rq, NVME_SC_SUCCESS); 739 726 } 740 727 741 728 nvme_tcp_init_recv_ctx(queue); ··· 842 815 843 816 static void nvme_tcp_fail_request(struct nvme_tcp_request *req) 844 817 { 845 - union nvme_result res = {}; 846 - 847 - nvme_end_request(blk_mq_rq_from_pdu(req), 848 - cpu_to_le16(NVME_SC_DATA_XFER_ERROR), res); 818 + nvme_tcp_end_request(blk_mq_rq_from_pdu(req), NVME_SC_DATA_XFER_ERROR); 849 819 } 850 820 851 821 static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
+14
drivers/nvme/host/trace.c
··· 50 50 return ret; 51 51 } 52 52 53 + static const char *nvme_trace_admin_get_features(struct trace_seq *p, 54 + u8 *cdw10) 55 + { 56 + const char *ret = trace_seq_buffer_ptr(p); 57 + u8 fid = cdw10[0]; 58 + u8 sel = cdw10[1] & 0x7; 59 + u32 cdw11 = get_unaligned_le32(cdw10 + 4); 53 60 61 + trace_seq_printf(p, "fid=0x%x sel=0x%x cdw11=0x%x", fid, sel, cdw11); 62 + trace_seq_putc(p, 0); 63 + 64 + return ret; 65 + } 54 66 55 67 static const char *nvme_trace_read_write(struct trace_seq *p, u8 *cdw10) 56 68 { ··· 113 101 return nvme_trace_create_cq(p, cdw10); 114 102 case nvme_admin_identify: 115 103 return nvme_trace_admin_identify(p, cdw10); 104 + case nvme_admin_get_features: 105 + return nvme_trace_admin_get_features(p, cdw10); 116 106 default: 117 107 return nvme_trace_common(p, cdw10); 118 108 }
+1 -1
drivers/nvme/host/trace.h
··· 108 108 __entry->metadata = le64_to_cpu(cmd->common.metadata); 109 109 __assign_disk_name(__entry->disk, req->rq_disk); 110 110 memcpy(__entry->cdw10, &cmd->common.cdw10, 111 - 6 * sizeof(__entry->cdw10)); 111 + sizeof(__entry->cdw10)); 112 112 ), 113 113 TP_printk("nvme%d: %sqid=%d, cmdid=%u, nsid=%u, flags=0x%x, meta=0x%llx, cmd=(%s %s)", 114 114 __entry->ctrl_id, __print_disk_name(__entry->disk),
+10 -10
drivers/nvme/target/core.c
··· 1163 1163 put_device(ctrl->p2p_client); 1164 1164 } 1165 1165 1166 + static void nvmet_fatal_error_handler(struct work_struct *work) 1167 + { 1168 + struct nvmet_ctrl *ctrl = 1169 + container_of(work, struct nvmet_ctrl, fatal_err_work); 1170 + 1171 + pr_err("ctrl %d fatal error occurred!\n", ctrl->cntlid); 1172 + ctrl->ops->delete_ctrl(ctrl); 1173 + } 1174 + 1166 1175 u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, 1167 1176 struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp) 1168 1177 { ··· 1214 1205 INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work); 1215 1206 INIT_LIST_HEAD(&ctrl->async_events); 1216 1207 INIT_RADIX_TREE(&ctrl->p2p_ns_map, GFP_KERNEL); 1208 + INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler); 1217 1209 1218 1210 memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE); 1219 1211 memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE); ··· 1318 1308 kref_put(&ctrl->ref, nvmet_ctrl_free); 1319 1309 } 1320 1310 1321 - static void nvmet_fatal_error_handler(struct work_struct *work) 1322 - { 1323 - struct nvmet_ctrl *ctrl = 1324 - container_of(work, struct nvmet_ctrl, fatal_err_work); 1325 - 1326 - pr_err("ctrl %d fatal error occurred!\n", ctrl->cntlid); 1327 - ctrl->ops->delete_ctrl(ctrl); 1328 - } 1329 - 1330 1311 void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl) 1331 1312 { 1332 1313 mutex_lock(&ctrl->lock); 1333 1314 if (!(ctrl->csts & NVME_CSTS_CFS)) { 1334 1315 ctrl->csts |= NVME_CSTS_CFS; 1335 - INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler); 1336 1316 schedule_work(&ctrl->fatal_err_work); 1337 1317 } 1338 1318 mutex_unlock(&ctrl->lock);
+6 -36
drivers/nvme/target/fc.c
··· 1143 1143 &tgtport->assoc_list, a_list) { 1144 1144 if (!nvmet_fc_tgt_a_get(assoc)) 1145 1145 continue; 1146 - spin_unlock_irqrestore(&tgtport->lock, flags); 1147 - nvmet_fc_delete_target_assoc(assoc); 1148 - nvmet_fc_tgt_a_put(assoc); 1149 - spin_lock_irqsave(&tgtport->lock, flags); 1146 + if (!schedule_work(&assoc->del_work)) 1147 + nvmet_fc_tgt_a_put(assoc); 1150 1148 } 1151 1149 spin_unlock_irqrestore(&tgtport->lock, flags); 1152 1150 } ··· 1183 1185 nvmet_fc_tgtport_put(tgtport); 1184 1186 1185 1187 if (found_ctrl) { 1186 - schedule_work(&assoc->del_work); 1188 + if (!schedule_work(&assoc->del_work)) 1189 + nvmet_fc_tgt_a_put(assoc); 1187 1190 return; 1188 1191 } 1189 1192 ··· 1502 1503 (struct fcnvme_ls_disconnect_rqst *)iod->rqstbuf; 1503 1504 struct fcnvme_ls_disconnect_acc *acc = 1504 1505 (struct fcnvme_ls_disconnect_acc *)iod->rspbuf; 1505 - struct nvmet_fc_tgt_queue *queue = NULL; 1506 1506 struct nvmet_fc_tgt_assoc *assoc; 1507 1507 int ret = 0; 1508 - bool del_assoc = false; 1509 1508 1510 1509 memset(acc, 0, sizeof(*acc)); 1511 1510 ··· 1534 1537 assoc = nvmet_fc_find_target_assoc(tgtport, 1535 1538 be64_to_cpu(rqst->associd.association_id)); 1536 1539 iod->assoc = assoc; 1537 - if (assoc) { 1538 - if (rqst->discon_cmd.scope == 1539 - FCNVME_DISCONN_CONNECTION) { 1540 - queue = nvmet_fc_find_target_queue(tgtport, 1541 - be64_to_cpu( 1542 - rqst->discon_cmd.id)); 1543 - if (!queue) { 1544 - nvmet_fc_tgt_a_put(assoc); 1545 - ret = VERR_NO_CONN; 1546 - } 1547 - } 1548 - } else 1540 + if (!assoc) 1549 1541 ret = VERR_NO_ASSOC; 1550 1542 } 1551 1543 ··· 1562 1576 sizeof(struct fcnvme_ls_disconnect_acc)), 1563 1577 FCNVME_LS_DISCONNECT); 1564 1578 1565 - 1566 - /* are we to delete a Connection ID (queue) */ 1567 - if (queue) { 1568 - int qid = queue->qid; 1569 - 1570 - nvmet_fc_delete_target_queue(queue); 1571 - 1572 - /* release the get taken by find_target_queue */ 1573 - nvmet_fc_tgt_q_put(queue); 1574 - 1575 - /* tear association down if io queue terminated */ 1576 - if (!qid) 1577 - del_assoc = true; 1578 - } 1579 - 1580 1579 /* release get taken in nvmet_fc_find_target_assoc */ 1581 1580 nvmet_fc_tgt_a_put(iod->assoc); 1582 1581 1583 - if (del_assoc) 1584 - nvmet_fc_delete_target_assoc(iod->assoc); 1582 + nvmet_fc_delete_target_assoc(iod->assoc); 1585 1583 } 1586 1584 1587 1585
+4 -4
drivers/nvme/target/io-cmd-bdev.c
··· 194 194 le64_to_cpu(range->slba) << (ns->blksize_shift - 9), 195 195 le32_to_cpu(range->nlb) << (ns->blksize_shift - 9), 196 196 GFP_KERNEL, 0, bio); 197 - 198 - if (ret) 197 + if (ret && ret != -EOPNOTSUPP) { 199 198 req->error_slba = le64_to_cpu(range->slba); 200 - 201 - return blk_to_nvme_status(req, errno_to_blk_status(ret)); 199 + return blk_to_nvme_status(req, errno_to_blk_status(ret)); 200 + } 201 + return NVME_SC_SUCCESS; 202 202 } 203 203 204 204 static void nvmet_bdev_execute_discard(struct nvmet_req *req)
+1 -1
drivers/nvme/target/io-cmd-file.c
··· 297 297 } 298 298 299 299 ret = vfs_fallocate(req->ns->file, mode, offset, len); 300 - if (ret) { 300 + if (ret && ret != -EOPNOTSUPP) { 301 301 req->error_slba = le64_to_cpu(range.slba); 302 302 status = errno_to_nvme_status(req, ret); 303 303 break;
+1
kernel/trace/blktrace.c
··· 723 723 #endif 724 724 case BLKTRACESTART: 725 725 start = 1; 726 + /* fall through */ 726 727 case BLKTRACESTOP: 727 728 ret = __blk_trace_startstop(q, start); 728 729 break;