Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-5.2/block-post-20190516' of git://git.kernel.dk/linux-block

Pull more block updates from Jens Axboe:
"This is mainly some late lightnvm changes that came in just before the
merge window, as well as fixes that have been queued up since the
initial pull request was frozen.

This contains:

- lightnvm changes, fixing race conditions, improving memory
utilization, and improving pblk compatability (Chansol, Igor,
Marcin)

- NVMe pull request with minor fixes all over the map (via Christoph)

- remove redundant error print in sata_rcar (Geert)

- struct_size() cleanup (Jackie)

- dasd CONFIG_LBADF warning fix (Ming)

- brd cond_resched() improvement (Mikulas)"

* tag 'for-5.2/block-post-20190516' of git://git.kernel.dk/linux-block: (41 commits)
block/bio-integrity: use struct_size() in kmalloc()
nvme: validate cntlid during controller initialisation
nvme: change locking for the per-subsystem controller list
nvme: trace all async notice events
nvme: fix typos in nvme status code values
nvme-fabrics: remove unused argument
nvme-multipath: avoid crash on invalid subsystem cntlid enumeration
nvme-fc: use separate work queue to avoid warning
nvme-rdma: remove redundant reference between ib_device and tagset
nvme-pci: mark expected switch fall-through
nvme-pci: add known admin effects to augument admin effects log page
nvme-pci: init shadow doorbell after each reset
brd: add cond_resched to brd_free_pages
sata_rcar: Remove ata_host_alloc() error printing
s390/dasd: fix build warning in dasd_eckd_build_cp_raw
lightnvm: pblk: use nvm_rq_to_ppa_list()
lightnvm: pblk: simplify partial read path
lightnvm: do not remove instance under global lock
lightnvm: track inflight target creations
lightnvm: pblk: recover only written metadata
...

+397 -541
+1 -2
block/bio-integrity.c
··· 43 43 unsigned inline_vecs; 44 44 45 45 if (!bs || !mempool_initialized(&bs->bio_integrity_pool)) { 46 - bip = kmalloc(sizeof(struct bio_integrity_payload) + 47 - sizeof(struct bio_vec) * nr_vecs, gfp_mask); 46 + bip = kmalloc(struct_size(bip, bip_inline_vecs, nr_vecs), gfp_mask); 48 47 inline_vecs = nr_vecs; 49 48 } else { 50 49 bip = mempool_alloc(&bs->bio_integrity_pool, gfp_mask);
-1
drivers/ata/sata_rcar.c
··· 909 909 910 910 host = ata_host_alloc(dev, 1); 911 911 if (!host) { 912 - dev_err(dev, "ata_host_alloc failed\n"); 913 912 ret = -ENOMEM; 914 913 goto err_pm_put; 915 914 }
+6
drivers/block/brd.c
··· 153 153 pos++; 154 154 155 155 /* 156 + * It takes 3.4 seconds to remove 80GiB ramdisk. 157 + * So, we need cond_resched to avoid stalling the CPU. 158 + */ 159 + cond_resched(); 160 + 161 + /* 156 162 * This assumes radix_tree_gang_lookup always returns as 157 163 * many pages as possible. If the radix-tree code changes, 158 164 * so will this have to.
+54 -28
drivers/lightnvm/core.c
··· 45 45 int num_ch; 46 46 }; 47 47 48 + static void nvm_free(struct kref *ref); 49 + 48 50 static struct nvm_target *nvm_find_target(struct nvm_dev *dev, const char *name) 49 51 { 50 52 struct nvm_target *tgt; ··· 327 325 struct nvm_target *t; 328 326 struct nvm_tgt_dev *tgt_dev; 329 327 void *targetdata; 328 + unsigned int mdts; 330 329 int ret; 331 330 332 331 switch (create->conf.type) { ··· 415 412 tdisk->private_data = targetdata; 416 413 tqueue->queuedata = targetdata; 417 414 418 - blk_queue_max_hw_sectors(tqueue, 419 - (dev->geo.csecs >> 9) * NVM_MAX_VLBA); 415 + mdts = (dev->geo.csecs >> 9) * NVM_MAX_VLBA; 416 + if (dev->geo.mdts) { 417 + mdts = min_t(u32, dev->geo.mdts, 418 + (dev->geo.csecs >> 9) * NVM_MAX_VLBA); 419 + } 420 + blk_queue_max_hw_sectors(tqueue, mdts); 420 421 421 422 set_capacity(tdisk, tt->capacity(targetdata)); 422 423 add_disk(tdisk); ··· 483 476 484 477 /** 485 478 * nvm_remove_tgt - Removes a target from the media manager 486 - * @dev: device 487 479 * @remove: ioctl structure with target name to remove. 488 480 * 489 481 * Returns: ··· 490 484 * 1: on not found 491 485 * <0: on error 492 486 */ 493 - static int nvm_remove_tgt(struct nvm_dev *dev, struct nvm_ioctl_remove *remove) 487 + static int nvm_remove_tgt(struct nvm_ioctl_remove *remove) 494 488 { 495 489 struct nvm_target *t; 490 + struct nvm_dev *dev; 496 491 497 - mutex_lock(&dev->mlock); 498 - t = nvm_find_target(dev, remove->tgtname); 499 - if (!t) { 492 + down_read(&nvm_lock); 493 + list_for_each_entry(dev, &nvm_devices, devices) { 494 + mutex_lock(&dev->mlock); 495 + t = nvm_find_target(dev, remove->tgtname); 496 + if (t) { 497 + mutex_unlock(&dev->mlock); 498 + break; 499 + } 500 500 mutex_unlock(&dev->mlock); 501 - return 1; 502 501 } 502 + up_read(&nvm_lock); 503 + 504 + if (!t) 505 + return 1; 506 + 503 507 __nvm_remove_target(t, true); 504 - mutex_unlock(&dev->mlock); 508 + kref_put(&dev->ref, nvm_free); 505 509 506 510 return 0; 507 511 } ··· 1105 1089 return ret; 1106 1090 } 1107 1091 1108 - static void nvm_free(struct nvm_dev *dev) 1092 + static void nvm_free(struct kref *ref) 1109 1093 { 1110 - if (!dev) 1111 - return; 1094 + struct nvm_dev *dev = container_of(ref, struct nvm_dev, ref); 1112 1095 1113 1096 if (dev->dma_pool) 1114 1097 dev->ops->destroy_dma_pool(dev->dma_pool); 1115 1098 1116 - nvm_unregister_map(dev); 1099 + if (dev->rmap) 1100 + nvm_unregister_map(dev); 1101 + 1117 1102 kfree(dev->lun_map); 1118 1103 kfree(dev); 1119 1104 } ··· 1151 1134 1152 1135 struct nvm_dev *nvm_alloc_dev(int node) 1153 1136 { 1154 - return kzalloc_node(sizeof(struct nvm_dev), GFP_KERNEL, node); 1137 + struct nvm_dev *dev; 1138 + 1139 + dev = kzalloc_node(sizeof(struct nvm_dev), GFP_KERNEL, node); 1140 + if (dev) 1141 + kref_init(&dev->ref); 1142 + 1143 + return dev; 1155 1144 } 1156 1145 EXPORT_SYMBOL(nvm_alloc_dev); 1157 1146 ··· 1165 1142 { 1166 1143 int ret, exp_pool_size; 1167 1144 1168 - if (!dev->q || !dev->ops) 1145 + if (!dev->q || !dev->ops) { 1146 + kref_put(&dev->ref, nvm_free); 1169 1147 return -EINVAL; 1148 + } 1170 1149 1171 1150 ret = nvm_init(dev); 1172 - if (ret) 1151 + if (ret) { 1152 + kref_put(&dev->ref, nvm_free); 1173 1153 return ret; 1154 + } 1174 1155 1175 1156 exp_pool_size = max_t(int, PAGE_SIZE, 1176 1157 (NVM_MAX_VLBA * (sizeof(u64) + dev->geo.sos))); ··· 1184 1157 exp_pool_size); 1185 1158 if (!dev->dma_pool) { 1186 1159 pr_err("nvm: could not create dma pool\n"); 1187 - nvm_free(dev); 1160 + kref_put(&dev->ref, nvm_free); 1188 1161 return -ENOMEM; 1189 1162 } 1190 1163 ··· 1206 1179 if (t->dev->parent != dev) 1207 1180 continue; 1208 1181 __nvm_remove_target(t, false); 1182 + kref_put(&dev->ref, nvm_free); 1209 1183 } 1210 1184 mutex_unlock(&dev->mlock); 1211 1185 ··· 1214 1186 list_del(&dev->devices); 1215 1187 up_write(&nvm_lock); 1216 1188 1217 - nvm_free(dev); 1189 + kref_put(&dev->ref, nvm_free); 1218 1190 } 1219 1191 EXPORT_SYMBOL(nvm_unregister); 1220 1192 1221 1193 static int __nvm_configure_create(struct nvm_ioctl_create *create) 1222 1194 { 1223 1195 struct nvm_dev *dev; 1196 + int ret; 1224 1197 1225 1198 down_write(&nvm_lock); 1226 1199 dev = nvm_find_nvm_dev(create->dev); ··· 1232 1203 return -EINVAL; 1233 1204 } 1234 1205 1235 - return nvm_create_tgt(dev, create); 1206 + kref_get(&dev->ref); 1207 + ret = nvm_create_tgt(dev, create); 1208 + if (ret) 1209 + kref_put(&dev->ref, nvm_free); 1210 + 1211 + return ret; 1236 1212 } 1237 1213 1238 1214 static long nvm_ioctl_info(struct file *file, void __user *arg) ··· 1356 1322 static long nvm_ioctl_dev_remove(struct file *file, void __user *arg) 1357 1323 { 1358 1324 struct nvm_ioctl_remove remove; 1359 - struct nvm_dev *dev; 1360 - int ret = 0; 1361 1325 1362 1326 if (copy_from_user(&remove, arg, sizeof(struct nvm_ioctl_remove))) 1363 1327 return -EFAULT; ··· 1367 1335 return -EINVAL; 1368 1336 } 1369 1337 1370 - list_for_each_entry(dev, &nvm_devices, devices) { 1371 - ret = nvm_remove_tgt(dev, &remove); 1372 - if (!ret) 1373 - break; 1374 - } 1375 - 1376 - return ret; 1338 + return nvm_remove_tgt(&remove); 1377 1339 } 1378 1340 1379 1341 /* kept for compatibility reasons */
+6 -2
drivers/lightnvm/pblk-cache.c
··· 18 18 19 19 #include "pblk.h" 20 20 21 - int pblk_write_to_cache(struct pblk *pblk, struct bio *bio, unsigned long flags) 21 + void pblk_write_to_cache(struct pblk *pblk, struct bio *bio, 22 + unsigned long flags) 22 23 { 23 24 struct request_queue *q = pblk->dev->q; 24 25 struct pblk_w_ctx w_ctx; ··· 44 43 goto retry; 45 44 case NVM_IO_ERR: 46 45 pblk_pipeline_stop(pblk); 46 + bio_io_error(bio); 47 47 goto out; 48 48 } 49 49 ··· 81 79 out: 82 80 generic_end_io_acct(q, REQ_OP_WRITE, &pblk->disk->part0, start_time); 83 81 pblk_write_should_kick(pblk); 84 - return ret; 82 + 83 + if (ret == NVM_IO_DONE) 84 + bio_endio(bio); 85 85 } 86 86 87 87 /*
+44 -21
drivers/lightnvm/pblk-core.c
··· 562 562 563 563 int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd) 564 564 { 565 - struct ppa_addr *ppa_list; 565 + struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); 566 566 int ret; 567 - 568 - ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr; 569 567 570 568 pblk_down_chunk(pblk, ppa_list[0]); 571 569 ret = pblk_submit_io_sync(pblk, rqd); ··· 723 725 struct nvm_tgt_dev *dev = pblk->dev; 724 726 struct pblk_line_meta *lm = &pblk->lm; 725 727 struct bio *bio; 728 + struct ppa_addr *ppa_list; 726 729 struct nvm_rq rqd; 727 730 u64 paddr = pblk_line_smeta_start(pblk, line); 728 731 int i, ret; ··· 747 748 rqd.opcode = NVM_OP_PREAD; 748 749 rqd.nr_ppas = lm->smeta_sec; 749 750 rqd.is_seq = 1; 751 + ppa_list = nvm_rq_to_ppa_list(&rqd); 750 752 751 753 for (i = 0; i < lm->smeta_sec; i++, paddr++) 752 - rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id); 754 + ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id); 753 755 754 756 ret = pblk_submit_io_sync(pblk, &rqd); 755 757 if (ret) { ··· 761 761 762 762 atomic_dec(&pblk->inflight_io); 763 763 764 - if (rqd.error) 764 + if (rqd.error && rqd.error != NVM_RSP_WARN_HIGHECC) { 765 765 pblk_log_read_err(pblk, &rqd); 766 + ret = -EIO; 767 + } 766 768 767 769 clear_rqd: 768 770 pblk_free_rqd_meta(pblk, &rqd); ··· 777 775 struct nvm_tgt_dev *dev = pblk->dev; 778 776 struct pblk_line_meta *lm = &pblk->lm; 779 777 struct bio *bio; 778 + struct ppa_addr *ppa_list; 780 779 struct nvm_rq rqd; 781 780 __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf); 782 781 __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); ··· 802 799 rqd.opcode = NVM_OP_PWRITE; 803 800 rqd.nr_ppas = lm->smeta_sec; 804 801 rqd.is_seq = 1; 802 + ppa_list = nvm_rq_to_ppa_list(&rqd); 805 803 806 804 for (i = 0; i < lm->smeta_sec; i++, paddr++) { 807 805 struct pblk_sec_meta *meta = pblk_get_meta(pblk, 808 806 rqd.meta_list, i); 809 807 810 - rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id); 808 + ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id); 811 809 meta->lba = lba_list[paddr] = addr_empty; 812 810 } 813 811 ··· 838 834 struct nvm_geo *geo = &dev->geo; 839 835 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 840 836 struct pblk_line_meta *lm = &pblk->lm; 841 - void *ppa_list, *meta_list; 837 + void *ppa_list_buf, *meta_list; 842 838 struct bio *bio; 839 + struct ppa_addr *ppa_list; 843 840 struct nvm_rq rqd; 844 841 u64 paddr = line->emeta_ssec; 845 842 dma_addr_t dma_ppa_list, dma_meta_list; ··· 856 851 if (!meta_list) 857 852 return -ENOMEM; 858 853 859 - ppa_list = meta_list + pblk_dma_meta_size(pblk); 854 + ppa_list_buf = meta_list + pblk_dma_meta_size(pblk); 860 855 dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk); 861 856 862 857 next_rq: ··· 877 872 878 873 rqd.bio = bio; 879 874 rqd.meta_list = meta_list; 880 - rqd.ppa_list = ppa_list; 875 + rqd.ppa_list = ppa_list_buf; 881 876 rqd.dma_meta_list = dma_meta_list; 882 877 rqd.dma_ppa_list = dma_ppa_list; 883 878 rqd.opcode = NVM_OP_PREAD; 884 879 rqd.nr_ppas = rq_ppas; 880 + ppa_list = nvm_rq_to_ppa_list(&rqd); 885 881 886 882 for (i = 0; i < rqd.nr_ppas; ) { 887 883 struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, line_id); ··· 910 904 } 911 905 912 906 for (j = 0; j < min; j++, i++, paddr++) 913 - rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line_id); 907 + ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line_id); 914 908 } 915 909 916 910 ret = pblk_submit_io_sync(pblk, &rqd); ··· 922 916 923 917 atomic_dec(&pblk->inflight_io); 924 918 925 - if (rqd.error) 919 + if (rqd.error && rqd.error != NVM_RSP_WARN_HIGHECC) { 926 920 pblk_log_read_err(pblk, &rqd); 921 + ret = -EIO; 922 + goto free_rqd_dma; 923 + } 927 924 928 925 emeta_buf += rq_len; 929 926 left_ppas -= rq_ppas; ··· 1171 1162 off = bit * geo->ws_opt; 1172 1163 bitmap_set(line->map_bitmap, off, lm->smeta_sec); 1173 1164 line->sec_in_line -= lm->smeta_sec; 1174 - line->smeta_ssec = off; 1175 1165 line->cur_sec = off + lm->smeta_sec; 1176 1166 1177 1167 if (init && pblk_line_smeta_write(pblk, line, off)) { ··· 1529 1521 1530 1522 void pblk_rq_to_line_put(struct pblk *pblk, struct nvm_rq *rqd) 1531 1523 { 1532 - struct ppa_addr *ppa_list; 1524 + struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); 1533 1525 int i; 1534 - 1535 - ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr; 1536 1526 1537 1527 for (i = 0; i < rqd->nr_ppas; i++) 1538 1528 pblk_ppa_to_line_put(pblk, ppa_list[i]); ··· 1705 1699 1706 1700 spin_lock(&line->lock); 1707 1701 WARN_ON(line->state != PBLK_LINESTATE_GC); 1702 + if (line->w_err_gc->has_gc_err) { 1703 + spin_unlock(&line->lock); 1704 + pblk_err(pblk, "line %d had errors during GC\n", line->id); 1705 + pblk_put_line_back(pblk, line); 1706 + line->w_err_gc->has_gc_err = 0; 1707 + return; 1708 + } 1709 + 1708 1710 line->state = PBLK_LINESTATE_FREE; 1709 1711 trace_pblk_line_state(pblk_disk_name(pblk), line->id, 1710 1712 line->state); ··· 2037 2023 struct ppa_addr ppa_l2p; 2038 2024 2039 2025 /* logic error: lba out-of-bounds. Ignore update */ 2040 - if (!(lba < pblk->rl.nr_secs)) { 2026 + if (!(lba < pblk->capacity)) { 2041 2027 WARN(1, "pblk: corrupted L2P map request\n"); 2042 2028 return; 2043 2029 } ··· 2077 2063 #endif 2078 2064 2079 2065 /* logic error: lba out-of-bounds. Ignore update */ 2080 - if (!(lba < pblk->rl.nr_secs)) { 2066 + if (!(lba < pblk->capacity)) { 2081 2067 WARN(1, "pblk: corrupted L2P map request\n"); 2082 2068 return 0; 2083 2069 } ··· 2123 2109 } 2124 2110 2125 2111 /* logic error: lba out-of-bounds. Ignore update */ 2126 - if (!(lba < pblk->rl.nr_secs)) { 2112 + if (!(lba < pblk->capacity)) { 2127 2113 WARN(1, "pblk: corrupted L2P map request\n"); 2128 2114 return; 2129 2115 } ··· 2149 2135 spin_unlock(&pblk->trans_lock); 2150 2136 } 2151 2137 2152 - void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas, 2153 - sector_t blba, int nr_secs) 2138 + int pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas, 2139 + sector_t blba, int nr_secs, bool *from_cache) 2154 2140 { 2155 2141 int i; 2156 2142 ··· 2164 2150 if (!pblk_ppa_empty(ppa) && !pblk_addr_in_cache(ppa)) { 2165 2151 struct pblk_line *line = pblk_ppa_to_line(pblk, ppa); 2166 2152 2153 + if (i > 0 && *from_cache) 2154 + break; 2155 + *from_cache = false; 2156 + 2167 2157 kref_get(&line->ref); 2158 + } else { 2159 + if (i > 0 && !*from_cache) 2160 + break; 2161 + *from_cache = true; 2168 2162 } 2169 2163 } 2170 2164 spin_unlock(&pblk->trans_lock); 2165 + return i; 2171 2166 } 2172 2167 2173 2168 void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas, ··· 2190 2167 lba = lba_list[i]; 2191 2168 if (lba != ADDR_EMPTY) { 2192 2169 /* logic error: lba out-of-bounds. Ignore update */ 2193 - if (!(lba < pblk->rl.nr_secs)) { 2170 + if (!(lba < pblk->capacity)) { 2194 2171 WARN(1, "pblk: corrupted L2P map request\n"); 2195 2172 continue; 2196 2173 }
+31 -21
drivers/lightnvm/pblk-gc.c
··· 59 59 wake_up_process(gc->gc_writer_ts); 60 60 } 61 61 62 - static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line) 62 + void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line) 63 63 { 64 64 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 65 65 struct list_head *move_list; 66 66 67 + spin_lock(&l_mg->gc_lock); 67 68 spin_lock(&line->lock); 68 69 WARN_ON(line->state != PBLK_LINESTATE_GC); 69 70 line->state = PBLK_LINESTATE_CLOSED; 70 71 trace_pblk_line_state(pblk_disk_name(pblk), line->id, 71 72 line->state); 73 + 74 + /* We need to reset gc_group in order to ensure that 75 + * pblk_line_gc_list will return proper move_list 76 + * since right now current line is not on any of the 77 + * gc lists. 78 + */ 79 + line->gc_group = PBLK_LINEGC_NONE; 72 80 move_list = pblk_line_gc_list(pblk, line); 73 81 spin_unlock(&line->lock); 74 - 75 - if (move_list) { 76 - spin_lock(&l_mg->gc_lock); 77 - list_add_tail(&line->list, move_list); 78 - spin_unlock(&l_mg->gc_lock); 79 - } 82 + list_add_tail(&line->list, move_list); 83 + spin_unlock(&l_mg->gc_lock); 80 84 } 81 85 82 86 static void pblk_gc_line_ws(struct work_struct *work) ··· 88 84 struct pblk_line_ws *gc_rq_ws = container_of(work, 89 85 struct pblk_line_ws, ws); 90 86 struct pblk *pblk = gc_rq_ws->pblk; 91 - struct nvm_tgt_dev *dev = pblk->dev; 92 - struct nvm_geo *geo = &dev->geo; 93 87 struct pblk_gc *gc = &pblk->gc; 94 88 struct pblk_line *line = gc_rq_ws->line; 95 89 struct pblk_gc_rq *gc_rq = gc_rq_ws->priv; ··· 95 93 96 94 up(&gc->gc_sem); 97 95 98 - gc_rq->data = vmalloc(array_size(gc_rq->nr_secs, geo->csecs)); 99 - if (!gc_rq->data) { 100 - pblk_err(pblk, "could not GC line:%d (%d/%d)\n", 101 - line->id, *line->vsc, gc_rq->nr_secs); 102 - goto out; 103 - } 104 - 105 96 /* Read from GC victim block */ 106 97 ret = pblk_submit_read_gc(pblk, gc_rq); 107 98 if (ret) { 108 - pblk_err(pblk, "failed GC read in line:%d (err:%d)\n", 109 - line->id, ret); 99 + line->w_err_gc->has_gc_err = 1; 110 100 goto out; 111 101 } 112 102 ··· 183 189 struct pblk_line *line = line_ws->line; 184 190 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 185 191 struct pblk_line_meta *lm = &pblk->lm; 192 + struct nvm_tgt_dev *dev = pblk->dev; 193 + struct nvm_geo *geo = &dev->geo; 186 194 struct pblk_gc *gc = &pblk->gc; 187 195 struct pblk_line_ws *gc_rq_ws; 188 196 struct pblk_gc_rq *gc_rq; ··· 243 247 gc_rq->nr_secs = nr_secs; 244 248 gc_rq->line = line; 245 249 250 + gc_rq->data = vmalloc(array_size(gc_rq->nr_secs, geo->csecs)); 251 + if (!gc_rq->data) 252 + goto fail_free_gc_rq; 253 + 246 254 gc_rq_ws = kmalloc(sizeof(struct pblk_line_ws), GFP_KERNEL); 247 255 if (!gc_rq_ws) 248 - goto fail_free_gc_rq; 256 + goto fail_free_gc_data; 249 257 250 258 gc_rq_ws->pblk = pblk; 251 259 gc_rq_ws->line = line; ··· 281 281 282 282 return; 283 283 284 + fail_free_gc_data: 285 + vfree(gc_rq->data); 284 286 fail_free_gc_rq: 285 287 kfree(gc_rq); 286 288 fail_free_lba_list: ··· 292 290 fail_free_ws: 293 291 kfree(line_ws); 294 292 293 + /* Line goes back to closed state, so we cannot release additional 294 + * reference for line, since we do that only when we want to do 295 + * gc to free line state transition. 296 + */ 295 297 pblk_put_line_back(pblk, line); 296 - kref_put(&line->ref, pblk_line_put); 297 298 atomic_dec(&gc->read_inflight_gc); 298 299 299 300 pblk_err(pblk, "failed to GC line %d\n", line->id); ··· 360 355 361 356 pblk_gc_kick(pblk); 362 357 363 - if (pblk_gc_line(pblk, line)) 358 + if (pblk_gc_line(pblk, line)) { 364 359 pblk_err(pblk, "failed to GC line %d\n", line->id); 360 + /* rollback */ 361 + spin_lock(&gc->r_lock); 362 + list_add_tail(&line->list, &gc->r_list); 363 + spin_unlock(&gc->r_lock); 364 + } 365 365 366 366 return 0; 367 367 }
+25 -40
drivers/lightnvm/pblk-init.c
··· 47 47 48 48 struct bio_set pblk_bio_set; 49 49 50 - static int pblk_rw_io(struct request_queue *q, struct pblk *pblk, 51 - struct bio *bio) 52 - { 53 - int ret; 54 - 55 - /* Read requests must be <= 256kb due to NVMe's 64 bit completion bitmap 56 - * constraint. Writes can be of arbitrary size. 57 - */ 58 - if (bio_data_dir(bio) == READ) { 59 - blk_queue_split(q, &bio); 60 - ret = pblk_submit_read(pblk, bio); 61 - if (ret == NVM_IO_DONE && bio_flagged(bio, BIO_CLONED)) 62 - bio_put(bio); 63 - 64 - return ret; 65 - } 66 - 67 - /* Prevent deadlock in the case of a modest LUN configuration and large 68 - * user I/Os. Unless stalled, the rate limiter leaves at least 256KB 69 - * available for user I/O. 70 - */ 71 - if (pblk_get_secs(bio) > pblk_rl_max_io(&pblk->rl)) 72 - blk_queue_split(q, &bio); 73 - 74 - return pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER); 75 - } 76 - 77 50 static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio) 78 51 { 79 52 struct pblk *pblk = q->queuedata; ··· 59 86 } 60 87 } 61 88 62 - switch (pblk_rw_io(q, pblk, bio)) { 63 - case NVM_IO_ERR: 64 - bio_io_error(bio); 65 - break; 66 - case NVM_IO_DONE: 67 - bio_endio(bio); 68 - break; 89 + /* Read requests must be <= 256kb due to NVMe's 64 bit completion bitmap 90 + * constraint. Writes can be of arbitrary size. 91 + */ 92 + if (bio_data_dir(bio) == READ) { 93 + blk_queue_split(q, &bio); 94 + pblk_submit_read(pblk, bio); 95 + } else { 96 + /* Prevent deadlock in the case of a modest LUN configuration 97 + * and large user I/Os. Unless stalled, the rate limiter 98 + * leaves at least 256KB available for user I/O. 99 + */ 100 + if (pblk_get_secs(bio) > pblk_rl_max_io(&pblk->rl)) 101 + blk_queue_split(q, &bio); 102 + 103 + pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER); 69 104 } 70 105 71 106 return BLK_QC_T_NONE; ··· 86 105 if (pblk->addrf_len < 32) 87 106 entry_size = 4; 88 107 89 - return entry_size * pblk->rl.nr_secs; 108 + return entry_size * pblk->capacity; 90 109 } 91 110 92 111 #ifdef CONFIG_NVM_PBLK_DEBUG ··· 145 164 int ret = 0; 146 165 147 166 map_size = pblk_trans_map_size(pblk); 148 - pblk->trans_map = vmalloc(map_size); 149 - if (!pblk->trans_map) 167 + pblk->trans_map = __vmalloc(map_size, GFP_KERNEL | __GFP_NOWARN 168 + | __GFP_RETRY_MAYFAIL | __GFP_HIGHMEM, 169 + PAGE_KERNEL); 170 + if (!pblk->trans_map) { 171 + pblk_err(pblk, "failed to allocate L2P (need %zu of memory)\n", 172 + map_size); 150 173 return -ENOMEM; 174 + } 151 175 152 176 pblk_ppa_set_empty(&ppa); 153 177 154 - for (i = 0; i < pblk->rl.nr_secs; i++) 178 + for (i = 0; i < pblk->capacity; i++) 155 179 pblk_trans_map_set(pblk, i, ppa); 156 180 157 181 ret = pblk_l2p_recover(pblk, factory_init); ··· 687 701 * on user capacity consider only provisioned blocks 688 702 */ 689 703 pblk->rl.total_blocks = nr_free_chks; 690 - pblk->rl.nr_secs = nr_free_chks * geo->clba; 691 704 692 705 /* Consider sectors used for metadata */ 693 706 sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines; ··· 1269 1284 1270 1285 pblk_info(pblk, "luns:%u, lines:%d, secs:%llu, buf entries:%u\n", 1271 1286 geo->all_luns, pblk->l_mg.nr_lines, 1272 - (unsigned long long)pblk->rl.nr_secs, 1287 + (unsigned long long)pblk->capacity, 1273 1288 pblk->rwb.nr_entries); 1274 1289 1275 1290 wake_up_process(pblk->writer_ts);
+1
drivers/lightnvm/pblk-map.c
··· 162 162 163 163 *erase_ppa = ppa_list[i]; 164 164 erase_ppa->a.blk = e_line->id; 165 + erase_ppa->a.reserved = 0; 165 166 166 167 spin_unlock(&e_line->lock); 167 168
+2 -11
drivers/lightnvm/pblk-rb.c
··· 642 642 * be directed to disk. 643 643 */ 644 644 int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba, 645 - struct ppa_addr ppa, int bio_iter, bool advanced_bio) 645 + struct ppa_addr ppa) 646 646 { 647 647 struct pblk *pblk = container_of(rb, struct pblk, rwb); 648 648 struct pblk_rb_entry *entry; ··· 673 673 ret = 0; 674 674 goto out; 675 675 } 676 - 677 - /* Only advance the bio if it hasn't been advanced already. If advanced, 678 - * this bio is at least a partial bio (i.e., it has partially been 679 - * filled with data from the cache). If part of the data resides on the 680 - * media, we will read later on 681 - */ 682 - if (unlikely(!advanced_bio)) 683 - bio_advance(bio, bio_iter * PBLK_EXPOSED_PAGE_SIZE); 684 - 685 676 data = bio_data(bio); 686 677 memcpy(data, entry->data, rb->seg_size); 687 678 ··· 790 799 } 791 800 792 801 out: 793 - spin_unlock(&rb->w_lock); 794 802 spin_unlock_irq(&rb->s_lock); 803 + spin_unlock(&rb->w_lock); 795 804 796 805 return ret; 797 806 }
+107 -285
drivers/lightnvm/pblk-read.c
··· 26 26 * issued. 27 27 */ 28 28 static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio, 29 - sector_t lba, struct ppa_addr ppa, 30 - int bio_iter, bool advanced_bio) 29 + sector_t lba, struct ppa_addr ppa) 31 30 { 32 31 #ifdef CONFIG_NVM_PBLK_DEBUG 33 32 /* Callers must ensure that the ppa points to a cache address */ ··· 34 35 BUG_ON(!pblk_addr_in_cache(ppa)); 35 36 #endif 36 37 37 - return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba, ppa, 38 - bio_iter, advanced_bio); 38 + return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba, ppa); 39 39 } 40 40 41 - static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd, 41 + static int pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd, 42 42 struct bio *bio, sector_t blba, 43 - unsigned long *read_bitmap) 43 + bool *from_cache) 44 44 { 45 45 void *meta_list = rqd->meta_list; 46 - struct ppa_addr ppas[NVM_MAX_VLBA]; 47 - int nr_secs = rqd->nr_ppas; 48 - bool advanced_bio = false; 49 - int i, j = 0; 46 + int nr_secs, i; 50 47 51 - pblk_lookup_l2p_seq(pblk, ppas, blba, nr_secs); 48 + retry: 49 + nr_secs = pblk_lookup_l2p_seq(pblk, rqd->ppa_list, blba, rqd->nr_ppas, 50 + from_cache); 51 + 52 + if (!*from_cache) 53 + goto end; 52 54 53 55 for (i = 0; i < nr_secs; i++) { 54 - struct ppa_addr p = ppas[i]; 55 56 struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i); 56 57 sector_t lba = blba + i; 57 58 58 - retry: 59 - if (pblk_ppa_empty(p)) { 59 + if (pblk_ppa_empty(rqd->ppa_list[i])) { 60 60 __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); 61 61 62 - WARN_ON(test_and_set_bit(i, read_bitmap)); 63 62 meta->lba = addr_empty; 64 - 65 - if (unlikely(!advanced_bio)) { 66 - bio_advance(bio, (i) * PBLK_EXPOSED_PAGE_SIZE); 67 - advanced_bio = true; 63 + } else if (pblk_addr_in_cache(rqd->ppa_list[i])) { 64 + /* 65 + * Try to read from write buffer. The address is later 66 + * checked on the write buffer to prevent retrieving 67 + * overwritten data. 68 + */ 69 + if (!pblk_read_from_cache(pblk, bio, lba, 70 + rqd->ppa_list[i])) { 71 + if (i == 0) { 72 + /* 73 + * We didn't call with bio_advance() 74 + * yet, so we can just retry. 75 + */ 76 + goto retry; 77 + } else { 78 + /* 79 + * We already call bio_advance() 80 + * so we cannot retry and we need 81 + * to quit that function in order 82 + * to allow caller to handle the bio 83 + * splitting in the current sector 84 + * position. 85 + */ 86 + nr_secs = i; 87 + goto end; 88 + } 68 89 } 69 - 70 - goto next; 71 - } 72 - 73 - /* Try to read from write buffer. The address is later checked 74 - * on the write buffer to prevent retrieving overwritten data. 75 - */ 76 - if (pblk_addr_in_cache(p)) { 77 - if (!pblk_read_from_cache(pblk, bio, lba, p, i, 78 - advanced_bio)) { 79 - pblk_lookup_l2p_seq(pblk, &p, lba, 1); 80 - goto retry; 81 - } 82 - WARN_ON(test_and_set_bit(i, read_bitmap)); 83 90 meta->lba = cpu_to_le64(lba); 84 - advanced_bio = true; 85 91 #ifdef CONFIG_NVM_PBLK_DEBUG 86 92 atomic_long_inc(&pblk->cache_reads); 87 93 #endif 88 - } else { 89 - /* Read from media non-cached sectors */ 90 - rqd->ppa_list[j++] = p; 91 94 } 92 - 93 - next: 94 - if (advanced_bio) 95 - bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE); 95 + bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE); 96 96 } 97 97 98 + end: 98 99 if (pblk_io_aligned(pblk, nr_secs)) 99 100 rqd->is_seq = 1; 100 101 101 102 #ifdef CONFIG_NVM_PBLK_DEBUG 102 103 atomic_long_add(nr_secs, &pblk->inflight_reads); 103 104 #endif 105 + 106 + return nr_secs; 104 107 } 105 108 106 109 ··· 176 175 WARN_ONCE(j != rqd->nr_ppas, "pblk: corrupted random request\n"); 177 176 } 178 177 179 - static void pblk_end_user_read(struct bio *bio) 178 + static void pblk_end_user_read(struct bio *bio, int error) 180 179 { 181 - #ifdef CONFIG_NVM_PBLK_DEBUG 182 - WARN_ONCE(bio->bi_status, "pblk: corrupted read bio\n"); 183 - #endif 184 - bio_endio(bio); 180 + if (error && error != NVM_RSP_WARN_HIGHECC) 181 + bio_io_error(bio); 182 + else 183 + bio_endio(bio); 185 184 } 186 185 187 186 static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd, ··· 198 197 pblk_log_read_err(pblk, rqd); 199 198 200 199 pblk_read_check_seq(pblk, rqd, r_ctx->lba); 201 - 202 - if (int_bio) 203 - bio_put(int_bio); 200 + bio_put(int_bio); 204 201 205 202 if (put_line) 206 203 pblk_rq_to_line_put(pblk, rqd); ··· 218 219 struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd); 219 220 struct bio *bio = (struct bio *)r_ctx->private; 220 221 221 - pblk_end_user_read(bio); 222 + pblk_end_user_read(bio, rqd->error); 222 223 __pblk_end_io_read(pblk, rqd, true); 223 224 } 224 225 225 - static void pblk_end_partial_read(struct nvm_rq *rqd) 226 - { 227 - struct pblk *pblk = rqd->private; 228 - struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd); 229 - struct pblk_pr_ctx *pr_ctx = r_ctx->private; 230 - struct pblk_sec_meta *meta; 231 - struct bio *new_bio = rqd->bio; 232 - struct bio *bio = pr_ctx->orig_bio; 233 - void *meta_list = rqd->meta_list; 234 - unsigned long *read_bitmap = pr_ctx->bitmap; 235 - struct bvec_iter orig_iter = BVEC_ITER_ALL_INIT; 236 - struct bvec_iter new_iter = BVEC_ITER_ALL_INIT; 237 - int nr_secs = pr_ctx->orig_nr_secs; 238 - int nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs); 239 - void *src_p, *dst_p; 240 - int bit, i; 241 - 242 - if (unlikely(nr_holes == 1)) { 243 - struct ppa_addr ppa; 244 - 245 - ppa = rqd->ppa_addr; 246 - rqd->ppa_list = pr_ctx->ppa_ptr; 247 - rqd->dma_ppa_list = pr_ctx->dma_ppa_list; 248 - rqd->ppa_list[0] = ppa; 249 - } 250 - 251 - for (i = 0; i < nr_secs; i++) { 252 - meta = pblk_get_meta(pblk, meta_list, i); 253 - pr_ctx->lba_list_media[i] = le64_to_cpu(meta->lba); 254 - meta->lba = cpu_to_le64(pr_ctx->lba_list_mem[i]); 255 - } 256 - 257 - /* Fill the holes in the original bio */ 258 - i = 0; 259 - for (bit = 0; bit < nr_secs; bit++) { 260 - if (!test_bit(bit, read_bitmap)) { 261 - struct bio_vec dst_bv, src_bv; 262 - struct pblk_line *line; 263 - 264 - line = pblk_ppa_to_line(pblk, rqd->ppa_list[i]); 265 - kref_put(&line->ref, pblk_line_put); 266 - 267 - meta = pblk_get_meta(pblk, meta_list, bit); 268 - meta->lba = cpu_to_le64(pr_ctx->lba_list_media[i]); 269 - 270 - dst_bv = bio_iter_iovec(bio, orig_iter); 271 - src_bv = bio_iter_iovec(new_bio, new_iter); 272 - 273 - src_p = kmap_atomic(src_bv.bv_page); 274 - dst_p = kmap_atomic(dst_bv.bv_page); 275 - 276 - memcpy(dst_p + dst_bv.bv_offset, 277 - src_p + src_bv.bv_offset, 278 - PBLK_EXPOSED_PAGE_SIZE); 279 - 280 - kunmap_atomic(src_p); 281 - kunmap_atomic(dst_p); 282 - 283 - flush_dcache_page(dst_bv.bv_page); 284 - mempool_free(src_bv.bv_page, &pblk->page_bio_pool); 285 - 286 - bio_advance_iter(new_bio, &new_iter, 287 - PBLK_EXPOSED_PAGE_SIZE); 288 - i++; 289 - } 290 - bio_advance_iter(bio, &orig_iter, PBLK_EXPOSED_PAGE_SIZE); 291 - } 292 - 293 - bio_put(new_bio); 294 - kfree(pr_ctx); 295 - 296 - /* restore original request */ 297 - rqd->bio = NULL; 298 - rqd->nr_ppas = nr_secs; 299 - 300 - bio_endio(bio); 301 - __pblk_end_io_read(pblk, rqd, false); 302 - } 303 - 304 - static int pblk_setup_partial_read(struct pblk *pblk, struct nvm_rq *rqd, 305 - unsigned int bio_init_idx, 306 - unsigned long *read_bitmap, 307 - int nr_holes) 308 - { 309 - void *meta_list = rqd->meta_list; 310 - struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd); 311 - struct pblk_pr_ctx *pr_ctx; 312 - struct bio *new_bio, *bio = r_ctx->private; 313 - int nr_secs = rqd->nr_ppas; 314 - int i; 315 - 316 - new_bio = bio_alloc(GFP_KERNEL, nr_holes); 317 - 318 - if (pblk_bio_add_pages(pblk, new_bio, GFP_KERNEL, nr_holes)) 319 - goto fail_bio_put; 320 - 321 - if (nr_holes != new_bio->bi_vcnt) { 322 - WARN_ONCE(1, "pblk: malformed bio\n"); 323 - goto fail_free_pages; 324 - } 325 - 326 - pr_ctx = kzalloc(sizeof(struct pblk_pr_ctx), GFP_KERNEL); 327 - if (!pr_ctx) 328 - goto fail_free_pages; 329 - 330 - for (i = 0; i < nr_secs; i++) { 331 - struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i); 332 - 333 - pr_ctx->lba_list_mem[i] = le64_to_cpu(meta->lba); 334 - } 335 - 336 - new_bio->bi_iter.bi_sector = 0; /* internal bio */ 337 - bio_set_op_attrs(new_bio, REQ_OP_READ, 0); 338 - 339 - rqd->bio = new_bio; 340 - rqd->nr_ppas = nr_holes; 341 - 342 - pr_ctx->orig_bio = bio; 343 - bitmap_copy(pr_ctx->bitmap, read_bitmap, NVM_MAX_VLBA); 344 - pr_ctx->bio_init_idx = bio_init_idx; 345 - pr_ctx->orig_nr_secs = nr_secs; 346 - r_ctx->private = pr_ctx; 347 - 348 - if (unlikely(nr_holes == 1)) { 349 - pr_ctx->ppa_ptr = rqd->ppa_list; 350 - pr_ctx->dma_ppa_list = rqd->dma_ppa_list; 351 - rqd->ppa_addr = rqd->ppa_list[0]; 352 - } 353 - return 0; 354 - 355 - fail_free_pages: 356 - pblk_bio_free_pages(pblk, new_bio, 0, new_bio->bi_vcnt); 357 - fail_bio_put: 358 - bio_put(new_bio); 359 - 360 - return -ENOMEM; 361 - } 362 - 363 - static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, 364 - unsigned int bio_init_idx, 365 - unsigned long *read_bitmap, int nr_secs) 366 - { 367 - int nr_holes; 368 - int ret; 369 - 370 - nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs); 371 - 372 - if (pblk_setup_partial_read(pblk, rqd, bio_init_idx, read_bitmap, 373 - nr_holes)) 374 - return NVM_IO_ERR; 375 - 376 - rqd->end_io = pblk_end_partial_read; 377 - 378 - ret = pblk_submit_io(pblk, rqd); 379 - if (ret) { 380 - bio_put(rqd->bio); 381 - pblk_err(pblk, "partial read IO submission failed\n"); 382 - goto err; 383 - } 384 - 385 - return NVM_IO_OK; 386 - 387 - err: 388 - pblk_err(pblk, "failed to perform partial read\n"); 389 - 390 - /* Free allocated pages in new bio */ 391 - pblk_bio_free_pages(pblk, rqd->bio, 0, rqd->bio->bi_vcnt); 392 - __pblk_end_io_read(pblk, rqd, false); 393 - return NVM_IO_ERR; 394 - } 395 - 396 226 static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio, 397 - sector_t lba, unsigned long *read_bitmap) 227 + sector_t lba, bool *from_cache) 398 228 { 399 229 struct pblk_sec_meta *meta = pblk_get_meta(pblk, rqd->meta_list, 0); 400 230 struct ppa_addr ppa; 401 231 402 - pblk_lookup_l2p_seq(pblk, &ppa, lba, 1); 232 + pblk_lookup_l2p_seq(pblk, &ppa, lba, 1, from_cache); 403 233 404 234 #ifdef CONFIG_NVM_PBLK_DEBUG 405 235 atomic_long_inc(&pblk->inflight_reads); ··· 238 410 if (pblk_ppa_empty(ppa)) { 239 411 __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); 240 412 241 - WARN_ON(test_and_set_bit(0, read_bitmap)); 242 413 meta->lba = addr_empty; 243 414 return; 244 415 } ··· 246 419 * write buffer to prevent retrieving overwritten data. 247 420 */ 248 421 if (pblk_addr_in_cache(ppa)) { 249 - if (!pblk_read_from_cache(pblk, bio, lba, ppa, 0, 1)) { 250 - pblk_lookup_l2p_seq(pblk, &ppa, lba, 1); 422 + if (!pblk_read_from_cache(pblk, bio, lba, ppa)) { 423 + pblk_lookup_l2p_seq(pblk, &ppa, lba, 1, from_cache); 251 424 goto retry; 252 425 } 253 426 254 - WARN_ON(test_and_set_bit(0, read_bitmap)); 255 427 meta->lba = cpu_to_le64(lba); 256 428 257 429 #ifdef CONFIG_NVM_PBLK_DEBUG ··· 261 435 } 262 436 } 263 437 264 - int pblk_submit_read(struct pblk *pblk, struct bio *bio) 438 + void pblk_submit_read(struct pblk *pblk, struct bio *bio) 265 439 { 266 440 struct nvm_tgt_dev *dev = pblk->dev; 267 441 struct request_queue *q = dev->q; 268 442 sector_t blba = pblk_get_lba(bio); 269 443 unsigned int nr_secs = pblk_get_secs(bio); 444 + bool from_cache; 270 445 struct pblk_g_ctx *r_ctx; 271 446 struct nvm_rq *rqd; 272 - unsigned int bio_init_idx; 273 - DECLARE_BITMAP(read_bitmap, NVM_MAX_VLBA); 274 - int ret = NVM_IO_ERR; 447 + struct bio *int_bio, *split_bio; 275 448 276 449 generic_start_io_acct(q, REQ_OP_READ, bio_sectors(bio), 277 450 &pblk->disk->part0); 278 - 279 - bitmap_zero(read_bitmap, nr_secs); 280 451 281 452 rqd = pblk_alloc_rqd(pblk, PBLK_READ); 282 453 283 454 rqd->opcode = NVM_OP_PREAD; 284 455 rqd->nr_ppas = nr_secs; 285 - rqd->bio = NULL; /* cloned bio if needed */ 286 456 rqd->private = pblk; 287 457 rqd->end_io = pblk_end_io_read; 288 458 289 459 r_ctx = nvm_rq_to_pdu(rqd); 290 460 r_ctx->start_time = jiffies; 291 461 r_ctx->lba = blba; 292 - r_ctx->private = bio; /* original bio */ 293 462 294 - /* Save the index for this bio's start. This is needed in case 295 - * we need to fill a partial read. 463 + if (pblk_alloc_rqd_meta(pblk, rqd)) { 464 + bio_io_error(bio); 465 + pblk_free_rqd(pblk, rqd, PBLK_READ); 466 + return; 467 + } 468 + 469 + /* Clone read bio to deal internally with: 470 + * -read errors when reading from drive 471 + * -bio_advance() calls during cache reads 296 472 */ 297 - bio_init_idx = pblk_get_bi_idx(bio); 298 - 299 - if (pblk_alloc_rqd_meta(pblk, rqd)) 300 - goto fail_rqd_free; 473 + int_bio = bio_clone_fast(bio, GFP_KERNEL, &pblk_bio_set); 301 474 302 475 if (nr_secs > 1) 303 - pblk_read_ppalist_rq(pblk, rqd, bio, blba, read_bitmap); 476 + nr_secs = pblk_read_ppalist_rq(pblk, rqd, int_bio, blba, 477 + &from_cache); 304 478 else 305 - pblk_read_rq(pblk, rqd, bio, blba, read_bitmap); 479 + pblk_read_rq(pblk, rqd, int_bio, blba, &from_cache); 306 480 307 - if (bitmap_full(read_bitmap, nr_secs)) { 481 + split_retry: 482 + r_ctx->private = bio; /* original bio */ 483 + rqd->bio = int_bio; /* internal bio */ 484 + 485 + if (from_cache && nr_secs == rqd->nr_ppas) { 486 + /* All data was read from cache, we can complete the IO. */ 487 + pblk_end_user_read(bio, 0); 308 488 atomic_inc(&pblk->inflight_io); 309 489 __pblk_end_io_read(pblk, rqd, false); 310 - return NVM_IO_DONE; 311 - } 490 + } else if (nr_secs != rqd->nr_ppas) { 491 + /* The read bio request could be partially filled by the write 492 + * buffer, but there are some holes that need to be read from 493 + * the drive. In order to handle this, we will use block layer 494 + * mechanism to split this request in to smaller ones and make 495 + * a chain of it. 496 + */ 497 + split_bio = bio_split(bio, nr_secs * NR_PHY_IN_LOG, GFP_KERNEL, 498 + &pblk_bio_set); 499 + bio_chain(split_bio, bio); 500 + generic_make_request(bio); 312 501 313 - /* All sectors are to be read from the device */ 314 - if (bitmap_empty(read_bitmap, rqd->nr_ppas)) { 315 - struct bio *int_bio = NULL; 502 + /* New bio contains first N sectors of the previous one, so 503 + * we can continue to use existing rqd, but we need to shrink 504 + * the number of PPAs in it. New bio is also guaranteed that 505 + * it contains only either data from cache or from drive, newer 506 + * mix of them. 507 + */ 508 + bio = split_bio; 509 + rqd->nr_ppas = nr_secs; 510 + if (rqd->nr_ppas == 1) 511 + rqd->ppa_addr = rqd->ppa_list[0]; 316 512 317 - /* Clone read bio to deal with read errors internally */ 513 + /* Recreate int_bio - existing might have some needed internal 514 + * fields modified already. 515 + */ 516 + bio_put(int_bio); 318 517 int_bio = bio_clone_fast(bio, GFP_KERNEL, &pblk_bio_set); 319 - if (!int_bio) { 320 - pblk_err(pblk, "could not clone read bio\n"); 321 - goto fail_end_io; 322 - } 323 - 324 - rqd->bio = int_bio; 325 - 326 - if (pblk_submit_io(pblk, rqd)) { 327 - pblk_err(pblk, "read IO submission failed\n"); 328 - ret = NVM_IO_ERR; 329 - goto fail_end_io; 330 - } 331 - 332 - return NVM_IO_OK; 518 + goto split_retry; 519 + } else if (pblk_submit_io(pblk, rqd)) { 520 + /* Submitting IO to drive failed, let's report an error */ 521 + rqd->error = -ENODEV; 522 + pblk_end_io_read(rqd); 333 523 } 334 - 335 - /* The read bio request could be partially filled by the write buffer, 336 - * but there are some holes that need to be read from the drive. 337 - */ 338 - ret = pblk_partial_read_bio(pblk, rqd, bio_init_idx, read_bitmap, 339 - nr_secs); 340 - if (ret) 341 - goto fail_meta_free; 342 - 343 - return NVM_IO_OK; 344 - 345 - fail_meta_free: 346 - nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list); 347 - fail_rqd_free: 348 - pblk_free_rqd(pblk, rqd, PBLK_READ); 349 - return ret; 350 - fail_end_io: 351 - __pblk_end_io_read(pblk, rqd, false); 352 - return ret; 353 524 } 354 525 355 526 static int read_ppalist_rq_gc(struct pblk *pblk, struct nvm_rq *rqd, ··· 391 568 goto out; 392 569 393 570 /* logic error: lba out-of-bounds */ 394 - if (lba >= pblk->rl.nr_secs) { 571 + if (lba >= pblk->capacity) { 395 572 WARN(1, "pblk: read lba out of bounds\n"); 396 573 goto out; 397 574 } ··· 465 642 466 643 if (pblk_submit_io_sync(pblk, &rqd)) { 467 644 ret = -EIO; 468 - pblk_err(pblk, "GC read request failed\n"); 469 645 goto err_free_bio; 470 646 } 471 647
+44 -30
drivers/lightnvm/pblk-recovery.c
··· 93 93 static void pblk_update_line_wp(struct pblk *pblk, struct pblk_line *line, 94 94 u64 written_secs) 95 95 { 96 + struct pblk_line_mgmt *l_mg = &pblk->l_mg; 96 97 int i; 97 98 98 99 for (i = 0; i < written_secs; i += pblk->min_write_pgs) 99 - pblk_alloc_page(pblk, line, pblk->min_write_pgs); 100 + __pblk_alloc_page(pblk, line, pblk->min_write_pgs); 101 + 102 + spin_lock(&l_mg->free_lock); 103 + if (written_secs > line->left_msecs) { 104 + /* 105 + * We have all data sectors written 106 + * and some emeta sectors written too. 107 + */ 108 + line->left_msecs = 0; 109 + } else { 110 + /* We have only some data sectors written. */ 111 + line->left_msecs -= written_secs; 112 + } 113 + spin_unlock(&l_mg->free_lock); 100 114 } 101 115 102 116 static u64 pblk_sec_in_open_line(struct pblk *pblk, struct pblk_line *line) ··· 179 165 struct pblk_pad_rq *pad_rq; 180 166 struct nvm_rq *rqd; 181 167 struct bio *bio; 168 + struct ppa_addr *ppa_list; 182 169 void *data; 183 170 __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf); 184 171 u64 w_ptr = line->cur_sec; ··· 209 194 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false); 210 195 if (rq_ppas < pblk->min_write_pgs) { 211 196 pblk_err(pblk, "corrupted pad line %d\n", line->id); 212 - goto fail_free_pad; 197 + goto fail_complete; 213 198 } 214 199 215 200 rq_len = rq_ppas * geo->csecs; ··· 218 203 PBLK_VMALLOC_META, GFP_KERNEL); 219 204 if (IS_ERR(bio)) { 220 205 ret = PTR_ERR(bio); 221 - goto fail_free_pad; 206 + goto fail_complete; 222 207 } 223 208 224 209 bio->bi_iter.bi_sector = 0; /* internal bio */ ··· 227 212 rqd = pblk_alloc_rqd(pblk, PBLK_WRITE_INT); 228 213 229 214 ret = pblk_alloc_rqd_meta(pblk, rqd); 230 - if (ret) 231 - goto fail_free_rqd; 215 + if (ret) { 216 + pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT); 217 + bio_put(bio); 218 + goto fail_complete; 219 + } 232 220 233 221 rqd->bio = bio; 234 222 rqd->opcode = NVM_OP_PWRITE; ··· 240 222 rqd->end_io = pblk_end_io_recov; 241 223 rqd->private = pad_rq; 242 224 225 + ppa_list = nvm_rq_to_ppa_list(rqd); 243 226 meta_list = rqd->meta_list; 244 227 245 228 for (i = 0; i < rqd->nr_ppas; ) { ··· 268 249 lba_list[w_ptr] = addr_empty; 269 250 meta = pblk_get_meta(pblk, meta_list, i); 270 251 meta->lba = addr_empty; 271 - rqd->ppa_list[i] = dev_ppa; 252 + ppa_list[i] = dev_ppa; 272 253 } 273 254 } 274 255 275 256 kref_get(&pad_rq->ref); 276 - pblk_down_chunk(pblk, rqd->ppa_list[0]); 257 + pblk_down_chunk(pblk, ppa_list[0]); 277 258 278 259 ret = pblk_submit_io(pblk, rqd); 279 260 if (ret) { 280 261 pblk_err(pblk, "I/O submission failed: %d\n", ret); 281 - pblk_up_chunk(pblk, rqd->ppa_list[0]); 282 - goto fail_free_rqd; 262 + pblk_up_chunk(pblk, ppa_list[0]); 263 + kref_put(&pad_rq->ref, pblk_recov_complete); 264 + pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT); 265 + bio_put(bio); 266 + goto fail_complete; 283 267 } 284 268 285 269 left_line_ppas -= rq_ppas; ··· 290 268 if (left_ppas && left_line_ppas) 291 269 goto next_pad_rq; 292 270 271 + fail_complete: 293 272 kref_put(&pad_rq->ref, pblk_recov_complete); 294 - 295 - if (!wait_for_completion_io_timeout(&pad_rq->wait, 296 - msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { 297 - pblk_err(pblk, "pad write timed out\n"); 298 - ret = -ETIME; 299 - } 273 + wait_for_completion(&pad_rq->wait); 300 274 301 275 if (!pblk_line_is_full(line)) 302 276 pblk_err(pblk, "corrupted padded line: %d\n", line->id); ··· 300 282 vfree(data); 301 283 free_rq: 302 284 kfree(pad_rq); 303 - return ret; 304 - 305 - fail_free_rqd: 306 - pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT); 307 - bio_put(bio); 308 - fail_free_pad: 309 - kfree(pad_rq); 310 - vfree(data); 311 285 return ret; 312 286 } 313 287 ··· 422 412 rqd->ppa_list = ppa_list; 423 413 rqd->dma_ppa_list = dma_ppa_list; 424 414 rqd->dma_meta_list = dma_meta_list; 415 + ppa_list = nvm_rq_to_ppa_list(rqd); 425 416 426 417 if (pblk_io_aligned(pblk, rq_ppas)) 427 418 rqd->is_seq = 1; ··· 441 430 } 442 431 443 432 for (j = 0; j < pblk->min_write_pgs; j++, i++) 444 - rqd->ppa_list[i] = 433 + ppa_list[i] = 445 434 addr_to_gen_ppa(pblk, paddr + j, line->id); 446 435 } 447 436 ··· 455 444 atomic_dec(&pblk->inflight_io); 456 445 457 446 /* If a read fails, do a best effort by padding the line and retrying */ 458 - if (rqd->error) { 447 + if (rqd->error && rqd->error != NVM_RSP_WARN_HIGHECC) { 459 448 int pad_distance, ret; 460 449 461 450 if (padded) { ··· 485 474 486 475 lba_list[paddr++] = cpu_to_le64(lba); 487 476 488 - if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs) 477 + if (lba == ADDR_EMPTY || lba >= pblk->capacity) 489 478 continue; 490 479 491 480 line->nr_valid_lbas++; 492 - pblk_update_map(pblk, lba, rqd->ppa_list[i]); 481 + pblk_update_map(pblk, lba, ppa_list[i]); 493 482 } 494 483 495 484 left_ppas -= rq_ppas; ··· 658 647 bppa = pblk->luns[smeta_blk].bppa; 659 648 chunk = &line->chks[pblk_ppa_to_pos(geo, bppa)]; 660 649 661 - if (chunk->state & NVM_CHK_ST_FREE) 662 - return 0; 650 + if (chunk->state & NVM_CHK_ST_CLOSED || 651 + (chunk->state & NVM_CHK_ST_OPEN 652 + && chunk->wp >= lm->smeta_sec)) 653 + return 1; 663 654 664 - return 1; 655 + return 0; 665 656 } 666 657 667 658 static bool pblk_line_is_open(struct pblk *pblk, struct pblk_line *line) ··· 857 844 spin_unlock(&l_mg->free_lock); 858 845 } else { 859 846 spin_lock(&l_mg->free_lock); 847 + l_mg->data_line = data_line; 860 848 /* Allocate next line for preparation */ 861 849 l_mg->data_next = pblk_line_get(pblk); 862 850 if (l_mg->data_next) {
+1
drivers/lightnvm/pblk-write.c
··· 228 228 mempool_free(recovery, &pblk->rec_pool); 229 229 230 230 atomic_dec(&pblk->inflight_io); 231 + pblk_write_kick(pblk); 231 232 } 232 233 233 234
+7 -21
drivers/lightnvm/pblk.h
··· 43 43 44 44 #define PBLK_CACHE_NAME_LEN (DISK_NAME_LEN + 16) 45 45 46 - #define PBLK_COMMAND_TIMEOUT_MS 30000 47 - 48 46 /* Max 512 LUNs per device */ 49 47 #define PBLK_MAX_LUNS_BITMAP (4) 50 48 ··· 119 121 void *private; 120 122 unsigned long start_time; 121 123 u64 lba; 122 - }; 123 - 124 - /* partial read context */ 125 - struct pblk_pr_ctx { 126 - struct bio *orig_bio; 127 - DECLARE_BITMAP(bitmap, NVM_MAX_VLBA); 128 - unsigned int orig_nr_secs; 129 - unsigned int bio_init_idx; 130 - void *ppa_ptr; 131 - dma_addr_t dma_ppa_list; 132 - u64 lba_list_mem[NVM_MAX_VLBA]; 133 - u64 lba_list_media[NVM_MAX_VLBA]; 134 124 }; 135 125 136 126 /* Pad context */ ··· 291 305 292 306 struct timer_list u_timer; 293 307 294 - unsigned long long nr_secs; 295 308 unsigned long total_blocks; 296 309 297 310 atomic_t free_blocks; /* Total number of free blocks (+ OP) */ ··· 425 440 426 441 struct pblk_w_err_gc { 427 442 int has_write_err; 443 + int has_gc_err; 428 444 __le64 *lba_list; 429 445 }; 430 446 ··· 451 465 int meta_line; /* Metadata line id */ 452 466 int meta_distance; /* Distance between data and metadata */ 453 467 454 - u64 smeta_ssec; /* Sector where smeta starts */ 455 468 u64 emeta_ssec; /* Sector where emeta starts */ 456 469 457 470 unsigned int sec_in_line; /* Number of usable secs in line */ ··· 747 762 unsigned int pos, unsigned int nr_entries, 748 763 unsigned int count); 749 764 int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba, 750 - struct ppa_addr ppa, int bio_iter, bool advanced_bio); 765 + struct ppa_addr ppa); 751 766 unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries); 752 767 753 768 unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags); ··· 847 862 struct pblk_line *gc_line, u64 paddr); 848 863 void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas, 849 864 u64 *lba_list, int nr_secs); 850 - void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas, 851 - sector_t blba, int nr_secs); 865 + int pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas, 866 + sector_t blba, int nr_secs, bool *from_cache); 852 867 void *pblk_get_meta_for_writes(struct pblk *pblk, struct nvm_rq *rqd); 853 868 void pblk_get_packed_meta(struct pblk *pblk, struct nvm_rq *rqd); 854 869 855 870 /* 856 871 * pblk user I/O write path 857 872 */ 858 - int pblk_write_to_cache(struct pblk *pblk, struct bio *bio, 873 + void pblk_write_to_cache(struct pblk *pblk, struct bio *bio, 859 874 unsigned long flags); 860 875 int pblk_write_gc_to_cache(struct pblk *pblk, struct pblk_gc_rq *gc_rq); 861 876 ··· 881 896 * pblk read path 882 897 */ 883 898 extern struct bio_set pblk_bio_set; 884 - int pblk_submit_read(struct pblk *pblk, struct bio *bio); 899 + void pblk_submit_read(struct pblk *pblk, struct bio *bio); 885 900 int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq); 886 901 /* 887 902 * pblk recovery ··· 906 921 void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled, 907 922 int *gc_active); 908 923 int pblk_gc_sysfs_force(struct pblk *pblk, int force); 924 + void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line); 909 925 910 926 /* 911 927 * pblk rate limiter
+40 -39
drivers/nvme/host/core.c
··· 1257 1257 return 0; 1258 1258 } 1259 1259 1260 + effects |= nvme_known_admin_effects(opcode); 1260 1261 if (ctrl->effects) 1261 1262 effects = le32_to_cpu(ctrl->effects->acs[opcode]); 1262 - else 1263 - effects = nvme_known_admin_effects(opcode); 1264 1263 1265 1264 /* 1266 1265 * For simplicity, IO to all namespaces is quiesced even if the command ··· 2341 2342 NULL, 2342 2343 }; 2343 2344 2344 - static int nvme_active_ctrls(struct nvme_subsystem *subsys) 2345 + static bool nvme_validate_cntlid(struct nvme_subsystem *subsys, 2346 + struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) 2345 2347 { 2346 - int count = 0; 2347 - struct nvme_ctrl *ctrl; 2348 + struct nvme_ctrl *tmp; 2348 2349 2349 - mutex_lock(&subsys->lock); 2350 - list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 2351 - if (ctrl->state != NVME_CTRL_DELETING && 2352 - ctrl->state != NVME_CTRL_DEAD) 2353 - count++; 2350 + lockdep_assert_held(&nvme_subsystems_lock); 2351 + 2352 + list_for_each_entry(tmp, &subsys->ctrls, subsys_entry) { 2353 + if (ctrl->state == NVME_CTRL_DELETING || 2354 + ctrl->state == NVME_CTRL_DEAD) 2355 + continue; 2356 + 2357 + if (tmp->cntlid == ctrl->cntlid) { 2358 + dev_err(ctrl->device, 2359 + "Duplicate cntlid %u with %s, rejecting\n", 2360 + ctrl->cntlid, dev_name(tmp->device)); 2361 + return false; 2362 + } 2363 + 2364 + if ((id->cmic & (1 << 1)) || 2365 + (ctrl->opts && ctrl->opts->discovery_nqn)) 2366 + continue; 2367 + 2368 + dev_err(ctrl->device, 2369 + "Subsystem does not support multiple controllers\n"); 2370 + return false; 2354 2371 } 2355 - mutex_unlock(&subsys->lock); 2356 2372 2357 - return count; 2373 + return true; 2358 2374 } 2359 2375 2360 2376 static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) ··· 2409 2395 mutex_lock(&nvme_subsystems_lock); 2410 2396 found = __nvme_find_get_subsystem(subsys->subnqn); 2411 2397 if (found) { 2412 - /* 2413 - * Verify that the subsystem actually supports multiple 2414 - * controllers, else bail out. 2415 - */ 2416 - if (!(ctrl->opts && ctrl->opts->discovery_nqn) && 2417 - nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) { 2418 - dev_err(ctrl->device, 2419 - "ignoring ctrl due to duplicate subnqn (%s).\n", 2420 - found->subnqn); 2421 - nvme_put_subsystem(found); 2422 - ret = -EINVAL; 2423 - goto out_unlock; 2424 - } 2425 - 2426 2398 __nvme_release_subsystem(subsys); 2427 2399 subsys = found; 2400 + 2401 + if (!nvme_validate_cntlid(subsys, ctrl, id)) { 2402 + ret = -EINVAL; 2403 + goto out_put_subsystem; 2404 + } 2428 2405 } else { 2429 2406 ret = device_add(&subsys->dev); 2430 2407 if (ret) { ··· 2427 2422 list_add_tail(&subsys->entry, &nvme_subsystems); 2428 2423 } 2429 2424 2430 - ctrl->subsys = subsys; 2431 - mutex_unlock(&nvme_subsystems_lock); 2432 - 2433 2425 if (sysfs_create_link(&subsys->dev.kobj, &ctrl->device->kobj, 2434 2426 dev_name(ctrl->device))) { 2435 2427 dev_err(ctrl->device, 2436 2428 "failed to create sysfs link from subsystem.\n"); 2437 - /* the transport driver will eventually put the subsystem */ 2438 - return -EINVAL; 2429 + goto out_put_subsystem; 2439 2430 } 2440 2431 2441 - mutex_lock(&subsys->lock); 2432 + ctrl->subsys = subsys; 2442 2433 list_add_tail(&ctrl->subsys_entry, &subsys->ctrls); 2443 - mutex_unlock(&subsys->lock); 2444 - 2434 + mutex_unlock(&nvme_subsystems_lock); 2445 2435 return 0; 2446 2436 2437 + out_put_subsystem: 2438 + nvme_put_subsystem(subsys); 2447 2439 out_unlock: 2448 2440 mutex_unlock(&nvme_subsystems_lock); 2449 2441 put_device(&subsys->dev); ··· 3607 3605 { 3608 3606 u32 aer_notice_type = (result & 0xff00) >> 8; 3609 3607 3608 + trace_nvme_async_event(ctrl, aer_notice_type); 3609 + 3610 3610 switch (aer_notice_type) { 3611 3611 case NVME_AER_NOTICE_NS_CHANGED: 3612 - trace_nvme_async_event(ctrl, aer_notice_type); 3613 3612 set_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events); 3614 3613 nvme_queue_scan(ctrl); 3615 3614 break; 3616 3615 case NVME_AER_NOTICE_FW_ACT_STARTING: 3617 - trace_nvme_async_event(ctrl, aer_notice_type); 3618 3616 queue_work(nvme_wq, &ctrl->fw_act_work); 3619 3617 break; 3620 3618 #ifdef CONFIG_NVME_MULTIPATH 3621 3619 case NVME_AER_NOTICE_ANA: 3622 - trace_nvme_async_event(ctrl, aer_notice_type); 3623 3620 if (!ctrl->ana_log_buf) 3624 3621 break; 3625 3622 queue_work(nvme_wq, &ctrl->ana_work); ··· 3697 3696 __free_page(ctrl->discard_page); 3698 3697 3699 3698 if (subsys) { 3700 - mutex_lock(&subsys->lock); 3699 + mutex_lock(&nvme_subsystems_lock); 3701 3700 list_del(&ctrl->subsys_entry); 3702 - mutex_unlock(&subsys->lock); 3703 3701 sysfs_remove_link(&subsys->dev.kobj, dev_name(ctrl->device)); 3702 + mutex_unlock(&nvme_subsystems_lock); 3704 3703 } 3705 3704 3706 3705 ctrl->ops->free_ctrl(ctrl);
+2 -2
drivers/nvme/host/fabrics.c
··· 978 978 NVMF_OPT_DISABLE_SQFLOW) 979 979 980 980 static struct nvme_ctrl * 981 - nvmf_create_ctrl(struct device *dev, const char *buf, size_t count) 981 + nvmf_create_ctrl(struct device *dev, const char *buf) 982 982 { 983 983 struct nvmf_ctrl_options *opts; 984 984 struct nvmf_transport_ops *ops; ··· 1073 1073 goto out_unlock; 1074 1074 } 1075 1075 1076 - ctrl = nvmf_create_ctrl(nvmf_device, buf, count); 1076 + ctrl = nvmf_create_ctrl(nvmf_device, buf); 1077 1077 if (IS_ERR(ctrl)) { 1078 1078 ret = PTR_ERR(ctrl); 1079 1079 goto out_unlock;
+11 -3
drivers/nvme/host/fc.c
··· 202 202 static DEFINE_IDA(nvme_fc_local_port_cnt); 203 203 static DEFINE_IDA(nvme_fc_ctrl_cnt); 204 204 205 - 205 + static struct workqueue_struct *nvme_fc_wq; 206 206 207 207 /* 208 208 * These items are short-term. They will eventually be moved into ··· 2054 2054 */ 2055 2055 if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) { 2056 2056 active = atomic_xchg(&ctrl->err_work_active, 1); 2057 - if (!active && !schedule_work(&ctrl->err_work)) { 2057 + if (!active && !queue_work(nvme_fc_wq, &ctrl->err_work)) { 2058 2058 atomic_set(&ctrl->err_work_active, 0); 2059 2059 WARN_ON(1); 2060 2060 } ··· 3399 3399 { 3400 3400 int ret; 3401 3401 3402 + nvme_fc_wq = alloc_workqueue("nvme_fc_wq", WQ_MEM_RECLAIM, 0); 3403 + if (!nvme_fc_wq) 3404 + return -ENOMEM; 3405 + 3402 3406 /* 3403 3407 * NOTE: 3404 3408 * It is expected that in the future the kernel will combine ··· 3420 3416 ret = class_register(&fc_class); 3421 3417 if (ret) { 3422 3418 pr_err("couldn't register class fc\n"); 3423 - return ret; 3419 + goto out_destroy_wq; 3424 3420 } 3425 3421 3426 3422 /* ··· 3444 3440 device_destroy(&fc_class, MKDEV(0, 0)); 3445 3441 out_destroy_class: 3446 3442 class_unregister(&fc_class); 3443 + out_destroy_wq: 3444 + destroy_workqueue(nvme_fc_wq); 3445 + 3447 3446 return ret; 3448 3447 } 3449 3448 ··· 3463 3456 3464 3457 device_destroy(&fc_class, MKDEV(0, 0)); 3465 3458 class_unregister(&fc_class); 3459 + destroy_workqueue(nvme_fc_wq); 3466 3460 } 3467 3461 3468 3462 module_init(nvme_fc_init_module);
+1
drivers/nvme/host/lightnvm.c
··· 977 977 geo->csecs = 1 << ns->lba_shift; 978 978 geo->sos = ns->ms; 979 979 geo->ext = ns->ext; 980 + geo->mdts = ns->ctrl->max_hw_sectors; 980 981 981 982 dev->q = q; 982 983 memcpy(dev->name, disk_name, DISK_NAME_LEN);
+1 -1
drivers/nvme/host/multipath.c
··· 31 31 sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance); 32 32 } else if (ns->head->disk) { 33 33 sprintf(disk_name, "nvme%dc%dn%d", ctrl->subsys->instance, 34 - ctrl->cntlid, ns->head->instance); 34 + ctrl->instance, ns->head->instance); 35 35 *flags = GENHD_FL_HIDDEN; 36 36 } else { 37 37 sprintf(disk_name, "nvme%dn%d", ctrl->subsys->instance,
+2 -2
drivers/nvme/host/pci.c
··· 1296 1296 switch (dev->ctrl.state) { 1297 1297 case NVME_CTRL_DELETING: 1298 1298 shutdown = true; 1299 + /* fall through */ 1299 1300 case NVME_CTRL_CONNECTING: 1300 1301 case NVME_CTRL_RESETTING: 1301 1302 dev_warn_ratelimited(dev->ctrl.device, ··· 2281 2280 return ret; 2282 2281 } 2283 2282 dev->ctrl.tagset = &dev->tagset; 2284 - 2285 - nvme_dbbuf_set(dev); 2286 2283 } else { 2287 2284 blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1); 2288 2285 ··· 2288 2289 nvme_free_queues(dev, dev->online_queues); 2289 2290 } 2290 2291 2292 + nvme_dbbuf_set(dev); 2291 2293 return 0; 2292 2294 } 2293 2295
+5 -29
drivers/nvme/host/rdma.c
··· 697 697 return ret; 698 698 } 699 699 700 - static void nvme_rdma_free_tagset(struct nvme_ctrl *nctrl, 701 - struct blk_mq_tag_set *set) 702 - { 703 - struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); 704 - 705 - blk_mq_free_tag_set(set); 706 - nvme_rdma_dev_put(ctrl->device); 707 - } 708 - 709 700 static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl, 710 701 bool admin) 711 702 { ··· 735 744 736 745 ret = blk_mq_alloc_tag_set(set); 737 746 if (ret) 738 - goto out; 739 - 740 - /* 741 - * We need a reference on the device as long as the tag_set is alive, 742 - * as the MRs in the request structures need a valid ib_device. 743 - */ 744 - ret = nvme_rdma_dev_get(ctrl->device); 745 - if (!ret) { 746 - ret = -EINVAL; 747 - goto out_free_tagset; 748 - } 747 + return ERR_PTR(ret); 749 748 750 749 return set; 751 - 752 - out_free_tagset: 753 - blk_mq_free_tag_set(set); 754 - out: 755 - return ERR_PTR(ret); 756 750 } 757 751 758 752 static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl, ··· 745 769 { 746 770 if (remove) { 747 771 blk_cleanup_queue(ctrl->ctrl.admin_q); 748 - nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset); 772 + blk_mq_free_tag_set(ctrl->ctrl.admin_tagset); 749 773 } 750 774 if (ctrl->async_event_sqe.data) { 751 775 nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe, ··· 823 847 blk_cleanup_queue(ctrl->ctrl.admin_q); 824 848 out_free_tagset: 825 849 if (new) 826 - nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset); 850 + blk_mq_free_tag_set(ctrl->ctrl.admin_tagset); 827 851 out_free_async_qe: 828 852 nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe, 829 853 sizeof(struct nvme_command), DMA_TO_DEVICE); ··· 838 862 { 839 863 if (remove) { 840 864 blk_cleanup_queue(ctrl->ctrl.connect_q); 841 - nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.tagset); 865 + blk_mq_free_tag_set(ctrl->ctrl.tagset); 842 866 } 843 867 nvme_rdma_free_io_queues(ctrl); 844 868 } ··· 879 903 blk_cleanup_queue(ctrl->ctrl.connect_q); 880 904 out_free_tag_set: 881 905 if (new) 882 - nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.tagset); 906 + blk_mq_free_tag_set(ctrl->ctrl.tagset); 883 907 out_free_io_queues: 884 908 nvme_rdma_free_io_queues(ctrl); 885 909 return ret;
+1
drivers/nvme/host/trace.h
··· 167 167 aer_name(NVME_AER_NOTICE_NS_CHANGED), 168 168 aer_name(NVME_AER_NOTICE_ANA), 169 169 aer_name(NVME_AER_NOTICE_FW_ACT_STARTING), 170 + aer_name(NVME_AER_NOTICE_DISC_CHANGED), 170 171 aer_name(NVME_AER_ERROR), 171 172 aer_name(NVME_AER_SMART), 172 173 aer_name(NVME_AER_CSS),
+1 -1
drivers/s390/block/dasd_eckd.c
··· 3827 3827 if ((start_padding_sectors || end_padding_sectors) && 3828 3828 (rq_data_dir(req) == WRITE)) { 3829 3829 DBF_DEV_EVENT(DBF_ERR, basedev, 3830 - "raw write not track aligned (%lu,%lu) req %p", 3830 + "raw write not track aligned (%llu,%llu) req %p", 3831 3831 start_padding_sectors, end_padding_sectors, req); 3832 3832 return ERR_PTR(-EINVAL); 3833 3833 }
+2
include/linux/lightnvm.h
··· 358 358 u16 csecs; /* sector size */ 359 359 u16 sos; /* out-of-band area size */ 360 360 bool ext; /* metadata in extended data buffer */ 361 + u32 mdts; /* Max data transfer size*/ 361 362 362 363 /* device write constrains */ 363 364 u32 ws_min; /* minimum write size */ ··· 428 427 char name[DISK_NAME_LEN]; 429 428 void *private_data; 430 429 430 + struct kref ref; 431 431 void *rmap; 432 432 433 433 struct mutex mlock;
+2 -2
include/linux/nvme.h
··· 1246 1246 NVME_SC_FW_NEEDS_SUBSYS_RESET = 0x110, 1247 1247 NVME_SC_FW_NEEDS_RESET = 0x111, 1248 1248 NVME_SC_FW_NEEDS_MAX_TIME = 0x112, 1249 - NVME_SC_FW_ACIVATE_PROHIBITED = 0x113, 1249 + NVME_SC_FW_ACTIVATE_PROHIBITED = 0x113, 1250 1250 NVME_SC_OVERLAPPING_RANGE = 0x114, 1251 - NVME_SC_NS_INSUFFICENT_CAP = 0x115, 1251 + NVME_SC_NS_INSUFFICIENT_CAP = 0x115, 1252 1252 NVME_SC_NS_ID_UNAVAILABLE = 0x116, 1253 1253 NVME_SC_NS_ALREADY_ATTACHED = 0x118, 1254 1254 NVME_SC_NS_IS_PRIVATE = 0x119,