Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge remote-tracking branch 'linux-block/block-6.15' into xfs tree

We need two patches inside linux-block tree as dependencies of the patch
which will follow this merge.

Specifically, we need:

block: fix race between set_blocksize and read paths
block: hoist block size validation code to a separate function

Signed-off-by: Carlos Maiolino <cem@kernel.org>

+1444 -649
+51 -16
block/bdev.c
··· 152 152 get_order(bsize)); 153 153 } 154 154 155 + /** 156 + * bdev_validate_blocksize - check that this block size is acceptable 157 + * @bdev: blockdevice to check 158 + * @block_size: block size to check 159 + * 160 + * For block device users that do not use buffer heads or the block device 161 + * page cache, make sure that this block size can be used with the device. 162 + * 163 + * Return: On success zero is returned, negative error code on failure. 164 + */ 165 + int bdev_validate_blocksize(struct block_device *bdev, int block_size) 166 + { 167 + if (blk_validate_block_size(block_size)) 168 + return -EINVAL; 169 + 170 + /* Size cannot be smaller than the size supported by the device */ 171 + if (block_size < bdev_logical_block_size(bdev)) 172 + return -EINVAL; 173 + 174 + return 0; 175 + } 176 + EXPORT_SYMBOL_GPL(bdev_validate_blocksize); 177 + 155 178 int set_blocksize(struct file *file, int size) 156 179 { 157 180 struct inode *inode = file->f_mapping->host; 158 181 struct block_device *bdev = I_BDEV(inode); 182 + int ret; 159 183 160 - if (blk_validate_block_size(size)) 161 - return -EINVAL; 162 - 163 - /* Size cannot be smaller than the size supported by the device */ 164 - if (size < bdev_logical_block_size(bdev)) 165 - return -EINVAL; 184 + ret = bdev_validate_blocksize(bdev, size); 185 + if (ret) 186 + return ret; 166 187 167 188 if (!file->private_data) 168 189 return -EINVAL; 169 190 170 191 /* Don't change the size if it is same as current */ 171 192 if (inode->i_blkbits != blksize_bits(size)) { 193 + /* 194 + * Flush and truncate the pagecache before we reconfigure the 195 + * mapping geometry because folio sizes are variable now. If a 196 + * reader has already allocated a folio whose size is smaller 197 + * than the new min_order but invokes readahead after the new 198 + * min_order becomes visible, readahead will think there are 199 + * "zero" blocks per folio and crash. Take the inode and 200 + * invalidation locks to avoid racing with 201 + * read/write/fallocate. 202 + */ 203 + inode_lock(inode); 204 + filemap_invalidate_lock(inode->i_mapping); 205 + 172 206 sync_blockdev(bdev); 207 + kill_bdev(bdev); 208 + 173 209 inode->i_blkbits = blksize_bits(size); 174 210 mapping_set_folio_min_order(inode->i_mapping, get_order(size)); 175 211 kill_bdev(bdev); 212 + filemap_invalidate_unlock(inode->i_mapping); 213 + inode_unlock(inode); 176 214 } 177 215 return 0; 178 216 } ··· 815 777 blkdev_put_whole(whole); 816 778 } 817 779 818 - struct block_device *blkdev_get_no_open(dev_t dev) 780 + struct block_device *blkdev_get_no_open(dev_t dev, bool autoload) 819 781 { 820 782 struct block_device *bdev; 821 783 struct inode *inode; 822 784 823 785 inode = ilookup(blockdev_superblock, dev); 824 - if (!inode && IS_ENABLED(CONFIG_BLOCK_LEGACY_AUTOLOAD)) { 786 + if (!inode && autoload && IS_ENABLED(CONFIG_BLOCK_LEGACY_AUTOLOAD)) { 825 787 blk_request_module(dev); 826 788 inode = ilookup(blockdev_superblock, dev); 827 789 if (inode) ··· 1043 1005 if (ret) 1044 1006 return ERR_PTR(ret); 1045 1007 1046 - bdev = blkdev_get_no_open(dev); 1008 + bdev = blkdev_get_no_open(dev, true); 1047 1009 if (!bdev) 1048 1010 return ERR_PTR(-ENXIO); 1049 1011 ··· 1313 1275 void bdev_statx(struct path *path, struct kstat *stat, 1314 1276 u32 request_mask) 1315 1277 { 1316 - struct inode *backing_inode; 1317 1278 struct block_device *bdev; 1318 1279 1319 - backing_inode = d_backing_inode(path->dentry); 1320 - 1321 1280 /* 1322 - * Note that backing_inode is the inode of a block device node file, 1323 - * not the block device's internal inode. Therefore it is *not* valid 1324 - * to use I_BDEV() here; the block device has to be looked up by i_rdev 1281 + * Note that d_backing_inode() returns the block device node inode, not 1282 + * the block device's internal inode. Therefore it is *not* valid to 1283 + * use I_BDEV() here; the block device has to be looked up by i_rdev 1325 1284 * instead. 1326 1285 */ 1327 - bdev = blkdev_get_no_open(backing_inode->i_rdev); 1286 + bdev = blkdev_get_no_open(d_backing_inode(path->dentry)->i_rdev, false); 1328 1287 if (!bdev) 1329 1288 return; 1330 1289
+6 -11
block/bio-integrity.c
··· 66 66 } 67 67 EXPORT_SYMBOL(bio_integrity_alloc); 68 68 69 - static void bio_integrity_unpin_bvec(struct bio_vec *bv, int nr_vecs, 70 - bool dirty) 69 + static void bio_integrity_unpin_bvec(struct bio_vec *bv, int nr_vecs) 71 70 { 72 71 int i; 73 72 74 - for (i = 0; i < nr_vecs; i++) { 75 - if (dirty && !PageCompound(bv[i].bv_page)) 76 - set_page_dirty_lock(bv[i].bv_page); 73 + for (i = 0; i < nr_vecs; i++) 77 74 unpin_user_page(bv[i].bv_page); 78 - } 79 75 } 80 76 81 77 static void bio_integrity_uncopy_user(struct bio_integrity_payload *bip) ··· 87 91 ret = copy_to_iter(bvec_virt(bounce_bvec), bytes, &orig_iter); 88 92 WARN_ON_ONCE(ret != bytes); 89 93 90 - bio_integrity_unpin_bvec(orig_bvecs, orig_nr_vecs, true); 94 + bio_integrity_unpin_bvec(orig_bvecs, orig_nr_vecs); 91 95 } 92 96 93 97 /** ··· 107 111 return; 108 112 } 109 113 110 - bio_integrity_unpin_bvec(bip->bip_vec, bip->bip_max_vcnt, 111 - bio_data_dir(bio) == READ); 114 + bio_integrity_unpin_bvec(bip->bip_vec, bip->bip_max_vcnt); 112 115 } 113 116 114 117 /** ··· 193 198 } 194 199 195 200 if (write) 196 - bio_integrity_unpin_bvec(bvec, nr_vecs, false); 201 + bio_integrity_unpin_bvec(bvec, nr_vecs); 197 202 else 198 203 memcpy(&bip->bip_vec[1], bvec, nr_vecs * sizeof(*bvec)); 199 204 ··· 314 319 return 0; 315 320 316 321 release_pages: 317 - bio_integrity_unpin_bvec(bvec, nr_bvecs, false); 322 + bio_integrity_unpin_bvec(bvec, nr_bvecs); 318 323 free_bvec: 319 324 if (bvec != stack_vec) 320 325 kfree(bvec);
+1 -1
block/blk-cgroup.c
··· 797 797 return -EINVAL; 798 798 input = skip_spaces(input); 799 799 800 - bdev = blkdev_get_no_open(MKDEV(major, minor)); 800 + bdev = blkdev_get_no_open(MKDEV(major, minor), false); 801 801 if (!bdev) 802 802 return -ENODEV; 803 803 if (bdev_is_partition(bdev)) {
+7 -1
block/blk-settings.c
··· 61 61 /* 62 62 * For read-ahead of large files to be effective, we need to read ahead 63 63 * at least twice the optimal I/O size. 64 + * 65 + * There is no hardware limitation for the read-ahead size and the user 66 + * might have increased the read-ahead size through sysfs, so don't ever 67 + * decrease it. 64 68 */ 65 - bdi->ra_pages = max(lim->io_opt * 2 / PAGE_SIZE, VM_READAHEAD_PAGES); 69 + bdi->ra_pages = max3(bdi->ra_pages, 70 + lim->io_opt * 2 / PAGE_SIZE, 71 + VM_READAHEAD_PAGES); 66 72 bdi->io_pages = lim->max_sectors >> PAGE_SECTORS_SHIFT; 67 73 } 68 74
+2
block/blk-sysfs.c
··· 909 909 out_debugfs_remove: 910 910 blk_debugfs_remove(disk); 911 911 mutex_unlock(&q->sysfs_lock); 912 + if (queue_is_mq(q)) 913 + blk_mq_sysfs_unregister(disk); 912 914 out_put_queue_kobj: 913 915 kobject_put(&disk->queue_kobj); 914 916 return ret;
+1
block/blk-throttle.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 1 2 #ifndef BLK_THROTTLE_H 2 3 #define BLK_THROTTLE_H 3 4
+4 -1
block/blk-zoned.c
··· 343 343 op = REQ_OP_ZONE_RESET; 344 344 345 345 /* Invalidate the page cache, including dirty pages. */ 346 + inode_lock(bdev->bd_mapping->host); 346 347 filemap_invalidate_lock(bdev->bd_mapping); 347 348 ret = blkdev_truncate_zone_range(bdev, mode, &zrange); 348 349 if (ret) ··· 365 364 ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors); 366 365 367 366 fail: 368 - if (cmd == BLKRESETZONE) 367 + if (cmd == BLKRESETZONE) { 369 368 filemap_invalidate_unlock(bdev->bd_mapping); 369 + inode_unlock(bdev->bd_mapping->host); 370 + } 370 371 371 372 return ret; 372 373 }
+3
block/blk.h
··· 94 94 wait_for_completion_io(done); 95 95 } 96 96 97 + struct block_device *blkdev_get_no_open(dev_t dev, bool autoload); 98 + void blkdev_put_no_open(struct block_device *bdev); 99 + 97 100 #define BIO_INLINE_VECS 4 98 101 struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs, 99 102 gfp_t gfp_mask);
+17 -1
block/fops.c
··· 642 642 if (ret) 643 643 return ret; 644 644 645 - bdev = blkdev_get_no_open(inode->i_rdev); 645 + bdev = blkdev_get_no_open(inode->i_rdev, true); 646 646 if (!bdev) 647 647 return -ENXIO; 648 648 ··· 746 746 ret = direct_write_fallback(iocb, from, ret, 747 747 blkdev_buffered_write(iocb, from)); 748 748 } else { 749 + /* 750 + * Take i_rwsem and invalidate_lock to avoid racing with 751 + * set_blocksize changing i_blkbits/folio order and punching 752 + * out the pagecache. 753 + */ 754 + inode_lock_shared(bd_inode); 749 755 ret = blkdev_buffered_write(iocb, from); 756 + inode_unlock_shared(bd_inode); 750 757 } 751 758 752 759 if (ret > 0) ··· 764 757 765 758 static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) 766 759 { 760 + struct inode *bd_inode = bdev_file_inode(iocb->ki_filp); 767 761 struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host); 768 762 loff_t size = bdev_nr_bytes(bdev); 769 763 loff_t pos = iocb->ki_pos; ··· 801 793 goto reexpand; 802 794 } 803 795 796 + /* 797 + * Take i_rwsem and invalidate_lock to avoid racing with set_blocksize 798 + * changing i_blkbits/folio order and punching out the pagecache. 799 + */ 800 + inode_lock_shared(bd_inode); 804 801 ret = filemap_read(iocb, to, ret); 802 + inode_unlock_shared(bd_inode); 805 803 806 804 reexpand: 807 805 if (unlikely(shorted)) ··· 850 836 if ((start | len) & (bdev_logical_block_size(bdev) - 1)) 851 837 return -EINVAL; 852 838 839 + inode_lock(inode); 853 840 filemap_invalidate_lock(inode->i_mapping); 854 841 855 842 /* ··· 883 868 884 869 fail: 885 870 filemap_invalidate_unlock(inode->i_mapping); 871 + inode_unlock(inode); 886 872 return error; 887 873 } 888 874
+6
block/ioctl.c
··· 142 142 if (err) 143 143 return err; 144 144 145 + inode_lock(bdev->bd_mapping->host); 145 146 filemap_invalidate_lock(bdev->bd_mapping); 146 147 err = truncate_bdev_range(bdev, mode, start, start + len - 1); 147 148 if (err) ··· 175 174 blk_finish_plug(&plug); 176 175 fail: 177 176 filemap_invalidate_unlock(bdev->bd_mapping); 177 + inode_unlock(bdev->bd_mapping->host); 178 178 return err; 179 179 } 180 180 ··· 201 199 end > bdev_nr_bytes(bdev)) 202 200 return -EINVAL; 203 201 202 + inode_lock(bdev->bd_mapping->host); 204 203 filemap_invalidate_lock(bdev->bd_mapping); 205 204 err = truncate_bdev_range(bdev, mode, start, end - 1); 206 205 if (!err) 207 206 err = blkdev_issue_secure_erase(bdev, start >> 9, len >> 9, 208 207 GFP_KERNEL); 209 208 filemap_invalidate_unlock(bdev->bd_mapping); 209 + inode_unlock(bdev->bd_mapping->host); 210 210 return err; 211 211 } 212 212 ··· 240 236 return -EINVAL; 241 237 242 238 /* Invalidate the page cache, including dirty pages */ 239 + inode_lock(bdev->bd_mapping->host); 243 240 filemap_invalidate_lock(bdev->bd_mapping); 244 241 err = truncate_bdev_range(bdev, mode, start, end); 245 242 if (err) ··· 251 246 252 247 fail: 253 248 filemap_invalidate_unlock(bdev->bd_mapping); 249 + inode_unlock(bdev->bd_mapping->host); 254 250 return err; 255 251 } 256 252
-6
drivers/block/Kconfig
··· 388 388 definition isn't finalized yet, and might change according to future 389 389 requirement, so mark is as experimental now. 390 390 391 - Say Y if you want to get better performance because task_work_add() 392 - can be used in IO path for replacing io_uring cmd, which will become 393 - shared between IO tasks and ubq daemon, meantime task_work_add() can 394 - can handle batch more effectively, but task_work_add() isn't exported 395 - for module, so ublk has to be built to kernel. 396 - 397 391 config BLKDEV_UBLK_LEGACY_OPCODES 398 392 bool "Support legacy command opcode" 399 393 depends on BLK_DEV_UBLK
+22 -99
drivers/block/loop.c
··· 211 211 kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE); 212 212 } 213 213 214 - static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos) 215 - { 216 - struct iov_iter i; 217 - ssize_t bw; 218 - 219 - iov_iter_bvec(&i, ITER_SOURCE, bvec, 1, bvec->bv_len); 220 - 221 - bw = vfs_iter_write(file, &i, ppos, 0); 222 - 223 - if (likely(bw == bvec->bv_len)) 224 - return 0; 225 - 226 - printk_ratelimited(KERN_ERR 227 - "loop: Write error at byte offset %llu, length %i.\n", 228 - (unsigned long long)*ppos, bvec->bv_len); 229 - if (bw >= 0) 230 - bw = -EIO; 231 - return bw; 232 - } 233 - 234 - static int lo_write_simple(struct loop_device *lo, struct request *rq, 235 - loff_t pos) 236 - { 237 - struct bio_vec bvec; 238 - struct req_iterator iter; 239 - int ret = 0; 240 - 241 - rq_for_each_segment(bvec, rq, iter) { 242 - ret = lo_write_bvec(lo->lo_backing_file, &bvec, &pos); 243 - if (ret < 0) 244 - break; 245 - cond_resched(); 246 - } 247 - 248 - return ret; 249 - } 250 - 251 - static int lo_read_simple(struct loop_device *lo, struct request *rq, 252 - loff_t pos) 253 - { 254 - struct bio_vec bvec; 255 - struct req_iterator iter; 256 - struct iov_iter i; 257 - ssize_t len; 258 - 259 - rq_for_each_segment(bvec, rq, iter) { 260 - iov_iter_bvec(&i, ITER_DEST, &bvec, 1, bvec.bv_len); 261 - len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0); 262 - if (len < 0) 263 - return len; 264 - 265 - flush_dcache_page(bvec.bv_page); 266 - 267 - if (len != bvec.bv_len) { 268 - struct bio *bio; 269 - 270 - __rq_for_each_bio(bio, rq) 271 - zero_fill_bio(bio); 272 - break; 273 - } 274 - cond_resched(); 275 - } 276 - 277 - return 0; 278 - } 279 - 280 214 static void loop_clear_limits(struct loop_device *lo, int mode) 281 215 { 282 216 struct queue_limits lim = queue_limits_start_update(lo->lo_queue); ··· 276 342 struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq); 277 343 blk_status_t ret = BLK_STS_OK; 278 344 279 - if (!cmd->use_aio || cmd->ret < 0 || cmd->ret == blk_rq_bytes(rq) || 345 + if (cmd->ret < 0 || cmd->ret == blk_rq_bytes(rq) || 280 346 req_op(rq) != REQ_OP_READ) { 281 347 if (cmd->ret < 0) 282 348 ret = errno_to_blk_status(cmd->ret); ··· 292 358 cmd->ret = 0; 293 359 blk_mq_requeue_request(rq, true); 294 360 } else { 295 - if (cmd->use_aio) { 296 - struct bio *bio = rq->bio; 361 + struct bio *bio = rq->bio; 297 362 298 - while (bio) { 299 - zero_fill_bio(bio); 300 - bio = bio->bi_next; 301 - } 363 + while (bio) { 364 + zero_fill_bio(bio); 365 + bio = bio->bi_next; 302 366 } 367 + 303 368 ret = BLK_STS_IOERR; 304 369 end_io: 305 370 blk_mq_end_request(rq, ret); ··· 378 445 379 446 cmd->iocb.ki_pos = pos; 380 447 cmd->iocb.ki_filp = file; 381 - cmd->iocb.ki_complete = lo_rw_aio_complete; 382 - cmd->iocb.ki_flags = IOCB_DIRECT; 383 - cmd->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0); 448 + cmd->iocb.ki_ioprio = req_get_ioprio(rq); 449 + if (cmd->use_aio) { 450 + cmd->iocb.ki_complete = lo_rw_aio_complete; 451 + cmd->iocb.ki_flags = IOCB_DIRECT; 452 + } else { 453 + cmd->iocb.ki_complete = NULL; 454 + cmd->iocb.ki_flags = 0; 455 + } 384 456 385 457 if (rw == ITER_SOURCE) 386 458 ret = file->f_op->write_iter(&cmd->iocb, &iter); ··· 396 458 397 459 if (ret != -EIOCBQUEUED) 398 460 lo_rw_aio_complete(&cmd->iocb, ret); 399 - return 0; 461 + return -EIOCBQUEUED; 400 462 } 401 463 402 464 static int do_req_filebacked(struct loop_device *lo, struct request *rq) ··· 404 466 struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq); 405 467 loff_t pos = ((loff_t) blk_rq_pos(rq) << 9) + lo->lo_offset; 406 468 407 - /* 408 - * lo_write_simple and lo_read_simple should have been covered 409 - * by io submit style function like lo_rw_aio(), one blocker 410 - * is that lo_read_simple() need to call flush_dcache_page after 411 - * the page is written from kernel, and it isn't easy to handle 412 - * this in io submit style function which submits all segments 413 - * of the req at one time. And direct read IO doesn't need to 414 - * run flush_dcache_page(). 415 - */ 416 469 switch (req_op(rq)) { 417 470 case REQ_OP_FLUSH: 418 471 return lo_req_flush(lo, rq); ··· 419 490 case REQ_OP_DISCARD: 420 491 return lo_fallocate(lo, rq, pos, FALLOC_FL_PUNCH_HOLE); 421 492 case REQ_OP_WRITE: 422 - if (cmd->use_aio) 423 - return lo_rw_aio(lo, cmd, pos, ITER_SOURCE); 424 - else 425 - return lo_write_simple(lo, rq, pos); 493 + return lo_rw_aio(lo, cmd, pos, ITER_SOURCE); 426 494 case REQ_OP_READ: 427 - if (cmd->use_aio) 428 - return lo_rw_aio(lo, cmd, pos, ITER_DEST); 429 - else 430 - return lo_read_simple(lo, rq, pos); 495 + return lo_rw_aio(lo, cmd, pos, ITER_DEST); 431 496 default: 432 497 WARN_ON_ONCE(1); 433 498 return -EIO; ··· 585 662 * dependency. 586 663 */ 587 664 fput(old_file); 665 + dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0); 588 666 if (partscan) 589 667 loop_reread_partitions(lo); 590 668 591 669 error = 0; 592 670 done: 593 - /* enable and uncork uevent now that we are done */ 594 - dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0); 671 + kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE); 595 672 return error; 596 673 597 674 out_err: 598 675 loop_global_unlock(lo, is_loop); 599 676 out_putf: 600 677 fput(file); 678 + dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0); 601 679 goto done; 602 680 } 603 681 ··· 1053 1129 if (partscan) 1054 1130 clear_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state); 1055 1131 1056 - /* enable and uncork uevent now that we are done */ 1057 1132 dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0); 1133 + kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE); 1058 1134 1059 1135 loop_global_unlock(lo, is_loop); 1060 1136 if (partscan) ··· 1845 1921 struct loop_device *lo = rq->q->queuedata; 1846 1922 int ret = 0; 1847 1923 struct mem_cgroup *old_memcg = NULL; 1848 - const bool use_aio = cmd->use_aio; 1849 1924 1850 1925 if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY)) { 1851 1926 ret = -EIO; ··· 1874 1951 } 1875 1952 failed: 1876 1953 /* complete non-aio request */ 1877 - if (!use_aio || ret) { 1954 + if (ret != -EIOCBQUEUED) { 1878 1955 if (ret == -EOPNOTSUPP) 1879 1956 cmd->ret = ret; 1880 1957 else
+295 -280
drivers/block/ublk_drv.c
··· 123 123 #define UBLK_IO_FLAG_OWNED_BY_SRV 0x02 124 124 125 125 /* 126 - * IO command is aborted, so this flag is set in case of 127 - * !UBLK_IO_FLAG_ACTIVE. 128 - * 129 - * After this flag is observed, any pending or new incoming request 130 - * associated with this io command will be failed immediately 131 - */ 132 - #define UBLK_IO_FLAG_ABORTED 0x04 133 - 134 - /* 135 126 * UBLK_IO_FLAG_NEED_GET_DATA is set because IO command requires 136 127 * get data buffer address from ublksrv. 137 128 * ··· 190 199 struct completion completion; 191 200 unsigned int nr_queues_ready; 192 201 unsigned int nr_privileged_daemon; 193 - 194 - struct work_struct nosrv_work; 195 202 }; 196 203 197 204 /* header of ublk_params */ ··· 198 209 __u32 types; 199 210 }; 200 211 201 - static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq); 202 - 212 + static void ublk_stop_dev_unlocked(struct ublk_device *ub); 213 + static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq); 203 214 static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub, 204 215 struct ublk_queue *ubq, int tag, size_t offset); 205 216 static inline unsigned int ublk_req_build_flags(struct request *req); ··· 1063 1074 1064 1075 static inline bool ubq_daemon_is_dying(struct ublk_queue *ubq) 1065 1076 { 1066 - return ubq->ubq_daemon->flags & PF_EXITING; 1077 + return !ubq->ubq_daemon || ubq->ubq_daemon->flags & PF_EXITING; 1067 1078 } 1068 1079 1069 1080 /* todo: handle partial completion */ ··· 1073 1084 struct ublk_io *io = &ubq->ios[req->tag]; 1074 1085 unsigned int unmapped_bytes; 1075 1086 blk_status_t res = BLK_STS_OK; 1076 - 1077 - /* called from ublk_abort_queue() code path */ 1078 - if (io->flags & UBLK_IO_FLAG_ABORTED) { 1079 - res = BLK_STS_IOERR; 1080 - goto exit; 1081 - } 1082 1087 1083 1088 /* failed read IO if nothing is read */ 1084 1089 if (!io->res && req_op(req) == REQ_OP_READ) ··· 1121 1138 struct request *req = blk_mq_rq_from_pdu(data); 1122 1139 1123 1140 __ublk_complete_rq(req); 1124 - } 1125 - 1126 - static void ublk_do_fail_rq(struct request *req) 1127 - { 1128 - struct ublk_queue *ubq = req->mq_hctx->driver_data; 1129 - 1130 - if (ublk_nosrv_should_reissue_outstanding(ubq->dev)) 1131 - blk_mq_requeue_request(req, false); 1132 - else 1133 - __ublk_complete_rq(req); 1134 - } 1135 - 1136 - static void ublk_fail_rq_fn(struct kref *ref) 1137 - { 1138 - struct ublk_rq_data *data = container_of(ref, struct ublk_rq_data, 1139 - ref); 1140 - struct request *req = blk_mq_rq_from_pdu(data); 1141 - 1142 - ublk_do_fail_rq(req); 1143 - } 1144 - 1145 - /* 1146 - * Since ublk_rq_task_work_cb always fails requests immediately during 1147 - * exiting, __ublk_fail_req() is only called from abort context during 1148 - * exiting. So lock is unnecessary. 1149 - * 1150 - * Also aborting may not be started yet, keep in mind that one failed 1151 - * request may be issued by block layer again. 1152 - */ 1153 - static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io, 1154 - struct request *req) 1155 - { 1156 - WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_ACTIVE); 1157 - 1158 - if (ublk_need_req_ref(ubq)) { 1159 - struct ublk_rq_data *data = blk_mq_rq_to_pdu(req); 1160 - 1161 - kref_put(&data->ref, ublk_fail_rq_fn); 1162 - } else { 1163 - ublk_do_fail_rq(req); 1164 - } 1165 1141 } 1166 1142 1167 1143 static void ubq_complete_io_cmd(struct ublk_io *io, int res, ··· 1278 1336 static enum blk_eh_timer_return ublk_timeout(struct request *rq) 1279 1337 { 1280 1338 struct ublk_queue *ubq = rq->mq_hctx->driver_data; 1281 - unsigned int nr_inflight = 0; 1282 - int i; 1283 1339 1284 1340 if (ubq->flags & UBLK_F_UNPRIVILEGED_DEV) { 1285 1341 if (!ubq->timeout) { ··· 1285 1345 ubq->timeout = true; 1286 1346 } 1287 1347 1288 - return BLK_EH_DONE; 1289 - } 1290 - 1291 - if (!ubq_daemon_is_dying(ubq)) 1292 - return BLK_EH_RESET_TIMER; 1293 - 1294 - for (i = 0; i < ubq->q_depth; i++) { 1295 - struct ublk_io *io = &ubq->ios[i]; 1296 - 1297 - if (!(io->flags & UBLK_IO_FLAG_ACTIVE)) 1298 - nr_inflight++; 1299 - } 1300 - 1301 - /* cancelable uring_cmd can't help us if all commands are in-flight */ 1302 - if (nr_inflight == ubq->q_depth) { 1303 - struct ublk_device *ub = ubq->dev; 1304 - 1305 - if (ublk_abort_requests(ub, ubq)) { 1306 - schedule_work(&ub->nosrv_work); 1307 - } 1308 1348 return BLK_EH_DONE; 1309 1349 } 1310 1350 ··· 1390 1470 .timeout = ublk_timeout, 1391 1471 }; 1392 1472 1473 + static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq) 1474 + { 1475 + int i; 1476 + 1477 + /* All old ioucmds have to be completed */ 1478 + ubq->nr_io_ready = 0; 1479 + 1480 + /* 1481 + * old daemon is PF_EXITING, put it now 1482 + * 1483 + * It could be NULL in case of closing one quisced device. 1484 + */ 1485 + if (ubq->ubq_daemon) 1486 + put_task_struct(ubq->ubq_daemon); 1487 + /* We have to reset it to NULL, otherwise ub won't accept new FETCH_REQ */ 1488 + ubq->ubq_daemon = NULL; 1489 + ubq->timeout = false; 1490 + 1491 + for (i = 0; i < ubq->q_depth; i++) { 1492 + struct ublk_io *io = &ubq->ios[i]; 1493 + 1494 + /* 1495 + * UBLK_IO_FLAG_CANCELED is kept for avoiding to touch 1496 + * io->cmd 1497 + */ 1498 + io->flags &= UBLK_IO_FLAG_CANCELED; 1499 + io->cmd = NULL; 1500 + io->addr = 0; 1501 + } 1502 + } 1503 + 1393 1504 static int ublk_ch_open(struct inode *inode, struct file *filp) 1394 1505 { 1395 1506 struct ublk_device *ub = container_of(inode->i_cdev, ··· 1432 1481 return 0; 1433 1482 } 1434 1483 1484 + static void ublk_reset_ch_dev(struct ublk_device *ub) 1485 + { 1486 + int i; 1487 + 1488 + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) 1489 + ublk_queue_reinit(ub, ublk_get_queue(ub, i)); 1490 + 1491 + /* set to NULL, otherwise new ubq_daemon cannot mmap the io_cmd_buf */ 1492 + ub->mm = NULL; 1493 + ub->nr_queues_ready = 0; 1494 + ub->nr_privileged_daemon = 0; 1495 + } 1496 + 1497 + static struct gendisk *ublk_get_disk(struct ublk_device *ub) 1498 + { 1499 + struct gendisk *disk; 1500 + 1501 + spin_lock(&ub->lock); 1502 + disk = ub->ub_disk; 1503 + if (disk) 1504 + get_device(disk_to_dev(disk)); 1505 + spin_unlock(&ub->lock); 1506 + 1507 + return disk; 1508 + } 1509 + 1510 + static void ublk_put_disk(struct gendisk *disk) 1511 + { 1512 + if (disk) 1513 + put_device(disk_to_dev(disk)); 1514 + } 1515 + 1435 1516 static int ublk_ch_release(struct inode *inode, struct file *filp) 1436 1517 { 1437 1518 struct ublk_device *ub = filp->private_data; 1519 + struct gendisk *disk; 1520 + int i; 1438 1521 1522 + /* 1523 + * disk isn't attached yet, either device isn't live, or it has 1524 + * been removed already, so we needn't to do anything 1525 + */ 1526 + disk = ublk_get_disk(ub); 1527 + if (!disk) 1528 + goto out; 1529 + 1530 + /* 1531 + * All uring_cmd are done now, so abort any request outstanding to 1532 + * the ublk server 1533 + * 1534 + * This can be done in lockless way because ublk server has been 1535 + * gone 1536 + * 1537 + * More importantly, we have to provide forward progress guarantee 1538 + * without holding ub->mutex, otherwise control task grabbing 1539 + * ub->mutex triggers deadlock 1540 + * 1541 + * All requests may be inflight, so ->canceling may not be set, set 1542 + * it now. 1543 + */ 1544 + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) { 1545 + struct ublk_queue *ubq = ublk_get_queue(ub, i); 1546 + 1547 + ubq->canceling = true; 1548 + ublk_abort_queue(ub, ubq); 1549 + } 1550 + blk_mq_kick_requeue_list(disk->queue); 1551 + 1552 + /* 1553 + * All infligh requests have been completed or requeued and any new 1554 + * request will be failed or requeued via `->canceling` now, so it is 1555 + * fine to grab ub->mutex now. 1556 + */ 1557 + mutex_lock(&ub->mutex); 1558 + 1559 + /* double check after grabbing lock */ 1560 + if (!ub->ub_disk) 1561 + goto unlock; 1562 + 1563 + /* 1564 + * Transition the device to the nosrv state. What exactly this 1565 + * means depends on the recovery flags 1566 + */ 1567 + blk_mq_quiesce_queue(disk->queue); 1568 + if (ublk_nosrv_should_stop_dev(ub)) { 1569 + /* 1570 + * Allow any pending/future I/O to pass through quickly 1571 + * with an error. This is needed because del_gendisk 1572 + * waits for all pending I/O to complete 1573 + */ 1574 + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) 1575 + ublk_get_queue(ub, i)->force_abort = true; 1576 + blk_mq_unquiesce_queue(disk->queue); 1577 + 1578 + ublk_stop_dev_unlocked(ub); 1579 + } else { 1580 + if (ublk_nosrv_dev_should_queue_io(ub)) { 1581 + /* ->canceling is set and all requests are aborted */ 1582 + ub->dev_info.state = UBLK_S_DEV_QUIESCED; 1583 + } else { 1584 + ub->dev_info.state = UBLK_S_DEV_FAIL_IO; 1585 + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) 1586 + ublk_get_queue(ub, i)->fail_io = true; 1587 + } 1588 + blk_mq_unquiesce_queue(disk->queue); 1589 + } 1590 + unlock: 1591 + mutex_unlock(&ub->mutex); 1592 + ublk_put_disk(disk); 1593 + 1594 + /* all uring_cmd has been done now, reset device & ubq */ 1595 + ublk_reset_ch_dev(ub); 1596 + out: 1439 1597 clear_bit(UB_STATE_OPEN, &ub->state); 1440 1598 return 0; 1441 1599 } ··· 1611 1551 ublk_put_req_ref(ubq, req); 1612 1552 } 1613 1553 1554 + static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io, 1555 + struct request *req) 1556 + { 1557 + WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_ACTIVE); 1558 + 1559 + if (ublk_nosrv_should_reissue_outstanding(ubq->dev)) 1560 + blk_mq_requeue_request(req, false); 1561 + else { 1562 + io->res = -EIO; 1563 + __ublk_complete_rq(req); 1564 + } 1565 + } 1566 + 1614 1567 /* 1615 - * Called from ubq_daemon context via cancel fn, meantime quiesce ublk 1616 - * blk-mq queue, so we are called exclusively with blk-mq and ubq_daemon 1617 - * context, so everything is serialized. 1568 + * Called from ublk char device release handler, when any uring_cmd is 1569 + * done, meantime request queue is "quiesced" since all inflight requests 1570 + * can't be completed because ublk server is dead. 1571 + * 1572 + * So no one can hold our request IO reference any more, simply ignore the 1573 + * reference, and complete the request immediately 1618 1574 */ 1619 1575 static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq) 1620 1576 { ··· 1647 1571 * will do it 1648 1572 */ 1649 1573 rq = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], i); 1650 - if (rq && blk_mq_request_started(rq)) { 1651 - io->flags |= UBLK_IO_FLAG_ABORTED; 1574 + if (rq && blk_mq_request_started(rq)) 1652 1575 __ublk_fail_req(ubq, io, rq); 1653 - } 1654 1576 } 1655 1577 } 1656 1578 } 1657 1579 1658 1580 /* Must be called when queue is frozen */ 1659 - static bool ublk_mark_queue_canceling(struct ublk_queue *ubq) 1581 + static void ublk_mark_queue_canceling(struct ublk_queue *ubq) 1660 1582 { 1661 - bool canceled; 1662 - 1663 1583 spin_lock(&ubq->cancel_lock); 1664 - canceled = ubq->canceling; 1665 - if (!canceled) 1584 + if (!ubq->canceling) 1666 1585 ubq->canceling = true; 1667 1586 spin_unlock(&ubq->cancel_lock); 1668 - 1669 - return canceled; 1670 1587 } 1671 1588 1672 - static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq) 1589 + static void ublk_start_cancel(struct ublk_queue *ubq) 1673 1590 { 1674 - bool was_canceled = ubq->canceling; 1675 - struct gendisk *disk; 1676 - 1677 - if (was_canceled) 1678 - return false; 1679 - 1680 - spin_lock(&ub->lock); 1681 - disk = ub->ub_disk; 1682 - if (disk) 1683 - get_device(disk_to_dev(disk)); 1684 - spin_unlock(&ub->lock); 1591 + struct ublk_device *ub = ubq->dev; 1592 + struct gendisk *disk = ublk_get_disk(ub); 1685 1593 1686 1594 /* Our disk has been dead */ 1687 1595 if (!disk) 1688 - return false; 1689 - 1596 + return; 1690 1597 /* 1691 1598 * Now we are serialized with ublk_queue_rq() 1692 1599 * ··· 1678 1619 * touch completed uring_cmd 1679 1620 */ 1680 1621 blk_mq_quiesce_queue(disk->queue); 1681 - was_canceled = ublk_mark_queue_canceling(ubq); 1682 - if (!was_canceled) { 1683 - /* abort queue is for making forward progress */ 1684 - ublk_abort_queue(ub, ubq); 1685 - } 1622 + ublk_mark_queue_canceling(ubq); 1686 1623 blk_mq_unquiesce_queue(disk->queue); 1687 - put_device(disk_to_dev(disk)); 1688 - 1689 - return !was_canceled; 1624 + ublk_put_disk(disk); 1690 1625 } 1691 1626 1692 - static void ublk_cancel_cmd(struct ublk_queue *ubq, struct ublk_io *io, 1627 + static void ublk_cancel_cmd(struct ublk_queue *ubq, unsigned tag, 1693 1628 unsigned int issue_flags) 1694 1629 { 1630 + struct ublk_io *io = &ubq->ios[tag]; 1631 + struct ublk_device *ub = ubq->dev; 1632 + struct request *req; 1695 1633 bool done; 1696 1634 1697 1635 if (!(io->flags & UBLK_IO_FLAG_ACTIVE)) 1636 + return; 1637 + 1638 + /* 1639 + * Don't try to cancel this command if the request is started for 1640 + * avoiding race between io_uring_cmd_done() and 1641 + * io_uring_cmd_complete_in_task(). 1642 + * 1643 + * Either the started request will be aborted via __ublk_abort_rq(), 1644 + * then this uring_cmd is canceled next time, or it will be done in 1645 + * task work function ublk_dispatch_req() because io_uring guarantees 1646 + * that ublk_dispatch_req() is always called 1647 + */ 1648 + req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag); 1649 + if (req && blk_mq_request_started(req)) 1698 1650 return; 1699 1651 1700 1652 spin_lock(&ubq->cancel_lock); ··· 1721 1651 /* 1722 1652 * The ublk char device won't be closed when calling cancel fn, so both 1723 1653 * ublk device and queue are guaranteed to be live 1654 + * 1655 + * Two-stage cancel: 1656 + * 1657 + * - make every active uring_cmd done in ->cancel_fn() 1658 + * 1659 + * - aborting inflight ublk IO requests in ublk char device release handler, 1660 + * which depends on 1st stage because device can only be closed iff all 1661 + * uring_cmd are done 1662 + * 1663 + * Do _not_ try to acquire ub->mutex before all inflight requests are 1664 + * aborted, otherwise deadlock may be caused. 1724 1665 */ 1725 1666 static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd, 1726 1667 unsigned int issue_flags) ··· 1739 1658 struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); 1740 1659 struct ublk_queue *ubq = pdu->ubq; 1741 1660 struct task_struct *task; 1742 - struct ublk_device *ub; 1743 - bool need_schedule; 1744 - struct ublk_io *io; 1745 1661 1746 1662 if (WARN_ON_ONCE(!ubq)) 1747 1663 return; ··· 1750 1672 if (WARN_ON_ONCE(task && task != ubq->ubq_daemon)) 1751 1673 return; 1752 1674 1753 - ub = ubq->dev; 1754 - need_schedule = ublk_abort_requests(ub, ubq); 1675 + if (!ubq->canceling) 1676 + ublk_start_cancel(ubq); 1755 1677 1756 - io = &ubq->ios[pdu->tag]; 1757 - WARN_ON_ONCE(io->cmd != cmd); 1758 - ublk_cancel_cmd(ubq, io, issue_flags); 1759 - 1760 - if (need_schedule) { 1761 - schedule_work(&ub->nosrv_work); 1762 - } 1678 + WARN_ON_ONCE(ubq->ios[pdu->tag].cmd != cmd); 1679 + ublk_cancel_cmd(ubq, pdu->tag, issue_flags); 1763 1680 } 1764 1681 1765 1682 static inline bool ublk_queue_ready(struct ublk_queue *ubq) ··· 1767 1694 int i; 1768 1695 1769 1696 for (i = 0; i < ubq->q_depth; i++) 1770 - ublk_cancel_cmd(ubq, &ubq->ios[i], IO_URING_F_UNLOCKED); 1697 + ublk_cancel_cmd(ubq, i, IO_URING_F_UNLOCKED); 1771 1698 } 1772 1699 1773 1700 /* Cancel all pending commands, must be called after del_gendisk() returns */ ··· 1805 1732 } 1806 1733 } 1807 1734 1808 - static void __ublk_quiesce_dev(struct ublk_device *ub) 1735 + static void ublk_force_abort_dev(struct ublk_device *ub) 1809 1736 { 1810 - pr_devel("%s: quiesce ub: dev_id %d state %s\n", 1737 + int i; 1738 + 1739 + pr_devel("%s: force abort ub: dev_id %d state %s\n", 1811 1740 __func__, ub->dev_info.dev_id, 1812 1741 ub->dev_info.state == UBLK_S_DEV_LIVE ? 1813 1742 "LIVE" : "QUIESCED"); 1814 1743 blk_mq_quiesce_queue(ub->ub_disk->queue); 1815 - ublk_wait_tagset_rqs_idle(ub); 1816 - ub->dev_info.state = UBLK_S_DEV_QUIESCED; 1817 - } 1744 + if (ub->dev_info.state == UBLK_S_DEV_LIVE) 1745 + ublk_wait_tagset_rqs_idle(ub); 1818 1746 1819 - static void ublk_unquiesce_dev(struct ublk_device *ub) 1820 - { 1821 - int i; 1822 - 1823 - pr_devel("%s: unquiesce ub: dev_id %d state %s\n", 1824 - __func__, ub->dev_info.dev_id, 1825 - ub->dev_info.state == UBLK_S_DEV_LIVE ? 1826 - "LIVE" : "QUIESCED"); 1827 - /* quiesce_work has run. We let requeued rqs be aborted 1828 - * before running fallback_wq. "force_abort" must be seen 1829 - * after request queue is unqiuesced. Then del_gendisk() 1830 - * can move on. 1831 - */ 1832 1747 for (i = 0; i < ub->dev_info.nr_hw_queues; i++) 1833 1748 ublk_get_queue(ub, i)->force_abort = true; 1834 - 1835 1749 blk_mq_unquiesce_queue(ub->ub_disk->queue); 1836 1750 /* We may have requeued some rqs in ublk_quiesce_queue() */ 1837 1751 blk_mq_kick_requeue_list(ub->ub_disk->queue); ··· 1839 1779 return disk; 1840 1780 } 1841 1781 1842 - static void ublk_stop_dev(struct ublk_device *ub) 1782 + static void ublk_stop_dev_unlocked(struct ublk_device *ub) 1783 + __must_hold(&ub->mutex) 1843 1784 { 1844 1785 struct gendisk *disk; 1845 1786 1846 - mutex_lock(&ub->mutex); 1847 1787 if (ub->dev_info.state == UBLK_S_DEV_DEAD) 1848 - goto unlock; 1849 - if (ublk_nosrv_dev_should_queue_io(ub)) { 1850 - if (ub->dev_info.state == UBLK_S_DEV_LIVE) 1851 - __ublk_quiesce_dev(ub); 1852 - ublk_unquiesce_dev(ub); 1853 - } 1788 + return; 1789 + 1790 + if (ublk_nosrv_dev_should_queue_io(ub)) 1791 + ublk_force_abort_dev(ub); 1854 1792 del_gendisk(ub->ub_disk); 1855 1793 disk = ublk_detach_disk(ub); 1856 1794 put_disk(disk); 1857 - unlock: 1795 + } 1796 + 1797 + static void ublk_stop_dev(struct ublk_device *ub) 1798 + { 1799 + mutex_lock(&ub->mutex); 1800 + ublk_stop_dev_unlocked(ub); 1858 1801 mutex_unlock(&ub->mutex); 1859 1802 ublk_cancel_dev(ub); 1860 1803 } 1861 1804 1862 - static void ublk_nosrv_work(struct work_struct *work) 1805 + /* reset ublk io_uring queue & io flags */ 1806 + static void ublk_reset_io_flags(struct ublk_device *ub) 1863 1807 { 1864 - struct ublk_device *ub = 1865 - container_of(work, struct ublk_device, nosrv_work); 1866 - int i; 1808 + int i, j; 1867 1809 1868 - if (ublk_nosrv_should_stop_dev(ub)) { 1869 - ublk_stop_dev(ub); 1870 - return; 1810 + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) { 1811 + struct ublk_queue *ubq = ublk_get_queue(ub, i); 1812 + 1813 + /* UBLK_IO_FLAG_CANCELED can be cleared now */ 1814 + spin_lock(&ubq->cancel_lock); 1815 + for (j = 0; j < ubq->q_depth; j++) 1816 + ubq->ios[j].flags &= ~UBLK_IO_FLAG_CANCELED; 1817 + spin_unlock(&ubq->cancel_lock); 1818 + ubq->canceling = false; 1819 + ubq->fail_io = false; 1871 1820 } 1872 - 1873 - mutex_lock(&ub->mutex); 1874 - if (ub->dev_info.state != UBLK_S_DEV_LIVE) 1875 - goto unlock; 1876 - 1877 - if (ublk_nosrv_dev_should_queue_io(ub)) { 1878 - __ublk_quiesce_dev(ub); 1879 - } else { 1880 - blk_mq_quiesce_queue(ub->ub_disk->queue); 1881 - ub->dev_info.state = UBLK_S_DEV_FAIL_IO; 1882 - for (i = 0; i < ub->dev_info.nr_hw_queues; i++) { 1883 - ublk_get_queue(ub, i)->fail_io = true; 1884 - } 1885 - blk_mq_unquiesce_queue(ub->ub_disk->queue); 1886 - } 1887 - 1888 - unlock: 1889 - mutex_unlock(&ub->mutex); 1890 - ublk_cancel_dev(ub); 1891 1821 } 1892 1822 1893 1823 /* device can only be started after all IOs are ready */ 1894 1824 static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq) 1825 + __must_hold(&ub->mutex) 1895 1826 { 1896 - mutex_lock(&ub->mutex); 1897 1827 ubq->nr_io_ready++; 1898 1828 if (ublk_queue_ready(ubq)) { 1899 1829 ubq->ubq_daemon = current; ··· 1893 1843 if (capable(CAP_SYS_ADMIN)) 1894 1844 ub->nr_privileged_daemon++; 1895 1845 } 1896 - if (ub->nr_queues_ready == ub->dev_info.nr_hw_queues) 1846 + 1847 + if (ub->nr_queues_ready == ub->dev_info.nr_hw_queues) { 1848 + /* now we are ready for handling ublk io request */ 1849 + ublk_reset_io_flags(ub); 1897 1850 complete_all(&ub->completion); 1898 - mutex_unlock(&ub->mutex); 1899 - } 1900 - 1901 - static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id, 1902 - int tag) 1903 - { 1904 - struct ublk_queue *ubq = ublk_get_queue(ub, q_id); 1905 - struct request *req = blk_mq_tag_to_rq(ub->tag_set.tags[q_id], tag); 1906 - 1907 - ublk_queue_cmd(ubq, req); 1851 + } 1908 1852 } 1909 1853 1910 1854 static inline int ublk_check_cmd_op(u32 cmd_op) ··· 1973 1929 return io_buffer_unregister_bvec(cmd, index, issue_flags); 1974 1930 } 1975 1931 1932 + static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq, 1933 + struct ublk_io *io, __u64 buf_addr) 1934 + { 1935 + struct ublk_device *ub = ubq->dev; 1936 + int ret = 0; 1937 + 1938 + /* 1939 + * When handling FETCH command for setting up ublk uring queue, 1940 + * ub->mutex is the innermost lock, and we won't block for handling 1941 + * FETCH, so it is fine even for IO_URING_F_NONBLOCK. 1942 + */ 1943 + mutex_lock(&ub->mutex); 1944 + /* UBLK_IO_FETCH_REQ is only allowed before queue is setup */ 1945 + if (ublk_queue_ready(ubq)) { 1946 + ret = -EBUSY; 1947 + goto out; 1948 + } 1949 + 1950 + /* allow each command to be FETCHed at most once */ 1951 + if (io->flags & UBLK_IO_FLAG_ACTIVE) { 1952 + ret = -EINVAL; 1953 + goto out; 1954 + } 1955 + 1956 + WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV); 1957 + 1958 + if (ublk_need_map_io(ubq)) { 1959 + /* 1960 + * FETCH_RQ has to provide IO buffer if NEED GET 1961 + * DATA is not enabled 1962 + */ 1963 + if (!buf_addr && !ublk_need_get_data(ubq)) 1964 + goto out; 1965 + } else if (buf_addr) { 1966 + /* User copy requires addr to be unset */ 1967 + ret = -EINVAL; 1968 + goto out; 1969 + } 1970 + 1971 + ublk_fill_io_cmd(io, cmd, buf_addr); 1972 + ublk_mark_io_ready(ub, ubq); 1973 + out: 1974 + mutex_unlock(&ub->mutex); 1975 + return ret; 1976 + } 1977 + 1976 1978 static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, 1977 1979 unsigned int issue_flags, 1978 1980 const struct ublksrv_io_cmd *ub_cmd) ··· 2075 1985 case UBLK_IO_UNREGISTER_IO_BUF: 2076 1986 return ublk_unregister_io_buf(cmd, ub_cmd->addr, issue_flags); 2077 1987 case UBLK_IO_FETCH_REQ: 2078 - /* UBLK_IO_FETCH_REQ is only allowed before queue is setup */ 2079 - if (ublk_queue_ready(ubq)) { 2080 - ret = -EBUSY; 1988 + ret = ublk_fetch(cmd, ubq, io, ub_cmd->addr); 1989 + if (ret) 2081 1990 goto out; 2082 - } 2083 - /* 2084 - * The io is being handled by server, so COMMIT_RQ is expected 2085 - * instead of FETCH_REQ 2086 - */ 2087 - if (io->flags & UBLK_IO_FLAG_OWNED_BY_SRV) 2088 - goto out; 2089 - 2090 - if (ublk_need_map_io(ubq)) { 2091 - /* 2092 - * FETCH_RQ has to provide IO buffer if NEED GET 2093 - * DATA is not enabled 2094 - */ 2095 - if (!ub_cmd->addr && !ublk_need_get_data(ubq)) 2096 - goto out; 2097 - } else if (ub_cmd->addr) { 2098 - /* User copy requires addr to be unset */ 2099 - ret = -EINVAL; 2100 - goto out; 2101 - } 2102 - 2103 - ublk_fill_io_cmd(io, cmd, ub_cmd->addr); 2104 - ublk_mark_io_ready(ub, ubq); 2105 1991 break; 2106 1992 case UBLK_IO_COMMIT_AND_FETCH_REQ: 2107 1993 req = blk_mq_tag_to_rq(ub->tag_set.tags[ub_cmd->q_id], tag); ··· 2109 2043 if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)) 2110 2044 goto out; 2111 2045 ublk_fill_io_cmd(io, cmd, ub_cmd->addr); 2112 - ublk_handle_need_get_data(ub, ub_cmd->q_id, ub_cmd->tag); 2113 - break; 2046 + req = blk_mq_tag_to_rq(ub->tag_set.tags[ub_cmd->q_id], tag); 2047 + ublk_dispatch_req(ubq, req, issue_flags); 2048 + return -EIOCBQUEUED; 2114 2049 default: 2115 2050 goto out; 2116 2051 } ··· 2478 2411 bool unprivileged; 2479 2412 2480 2413 ublk_stop_dev(ub); 2481 - cancel_work_sync(&ub->nosrv_work); 2482 2414 cdev_device_del(&ub->cdev, &ub->cdev_dev); 2483 2415 unprivileged = ub->dev_info.flags & UBLK_F_UNPRIVILEGED_DEV; 2484 2416 ublk_put_device(ub); ··· 2762 2696 goto out_unlock; 2763 2697 mutex_init(&ub->mutex); 2764 2698 spin_lock_init(&ub->lock); 2765 - INIT_WORK(&ub->nosrv_work, ublk_nosrv_work); 2766 2699 2767 2700 ret = ublk_alloc_dev_number(ub, header->dev_id); 2768 2701 if (ret < 0) ··· 2893 2828 static int ublk_ctrl_stop_dev(struct ublk_device *ub) 2894 2829 { 2895 2830 ublk_stop_dev(ub); 2896 - cancel_work_sync(&ub->nosrv_work); 2897 2831 return 0; 2898 2832 } 2899 2833 ··· 2996 2932 return ret; 2997 2933 } 2998 2934 2999 - static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq) 3000 - { 3001 - int i; 3002 - 3003 - WARN_ON_ONCE(!(ubq->ubq_daemon && ubq_daemon_is_dying(ubq))); 3004 - 3005 - /* All old ioucmds have to be completed */ 3006 - ubq->nr_io_ready = 0; 3007 - /* old daemon is PF_EXITING, put it now */ 3008 - put_task_struct(ubq->ubq_daemon); 3009 - /* We have to reset it to NULL, otherwise ub won't accept new FETCH_REQ */ 3010 - ubq->ubq_daemon = NULL; 3011 - ubq->timeout = false; 3012 - ubq->canceling = false; 3013 - 3014 - for (i = 0; i < ubq->q_depth; i++) { 3015 - struct ublk_io *io = &ubq->ios[i]; 3016 - 3017 - /* forget everything now and be ready for new FETCH_REQ */ 3018 - io->flags = 0; 3019 - io->cmd = NULL; 3020 - io->addr = 0; 3021 - } 3022 - } 3023 - 3024 2935 static int ublk_ctrl_start_recovery(struct ublk_device *ub, 3025 2936 const struct ublksrv_ctrl_cmd *header) 3026 2937 { 3027 2938 int ret = -EINVAL; 3028 - int i; 3029 2939 3030 2940 mutex_lock(&ub->mutex); 3031 2941 if (ublk_nosrv_should_stop_dev(ub)) 3032 - goto out_unlock; 3033 - if (!ub->nr_queues_ready) 3034 2942 goto out_unlock; 3035 2943 /* 3036 2944 * START_RECOVERY is only allowd after: ··· 3027 2991 goto out_unlock; 3028 2992 } 3029 2993 pr_devel("%s: start recovery for dev id %d.\n", __func__, header->dev_id); 3030 - for (i = 0; i < ub->dev_info.nr_hw_queues; i++) 3031 - ublk_queue_reinit(ub, ublk_get_queue(ub, i)); 3032 - /* set to NULL, otherwise new ubq_daemon cannot mmap the io_cmd_buf */ 3033 - ub->mm = NULL; 3034 - ub->nr_queues_ready = 0; 3035 - ub->nr_privileged_daemon = 0; 3036 2994 init_completion(&ub->completion); 3037 2995 ret = 0; 3038 2996 out_unlock: ··· 3039 3009 { 3040 3010 int ublksrv_pid = (int)header->data[0]; 3041 3011 int ret = -EINVAL; 3042 - int i; 3043 3012 3044 3013 pr_devel("%s: Waiting for new ubq_daemons(nr: %d) are ready, dev id %d...\n", 3045 3014 __func__, ub->dev_info.nr_hw_queues, header->dev_id); ··· 3058 3029 goto out_unlock; 3059 3030 } 3060 3031 ub->dev_info.ublksrv_pid = ublksrv_pid; 3032 + ub->dev_info.state = UBLK_S_DEV_LIVE; 3061 3033 pr_devel("%s: new ublksrv_pid %d, dev id %d\n", 3062 3034 __func__, ublksrv_pid, header->dev_id); 3063 - 3064 - if (ublk_nosrv_dev_should_queue_io(ub)) { 3065 - ub->dev_info.state = UBLK_S_DEV_LIVE; 3066 - blk_mq_unquiesce_queue(ub->ub_disk->queue); 3067 - pr_devel("%s: queue unquiesced, dev id %d.\n", 3068 - __func__, header->dev_id); 3069 - blk_mq_kick_requeue_list(ub->ub_disk->queue); 3070 - } else { 3071 - blk_mq_quiesce_queue(ub->ub_disk->queue); 3072 - ub->dev_info.state = UBLK_S_DEV_LIVE; 3073 - for (i = 0; i < ub->dev_info.nr_hw_queues; i++) { 3074 - ublk_get_queue(ub, i)->fail_io = false; 3075 - } 3076 - blk_mq_unquiesce_queue(ub->ub_disk->queue); 3077 - } 3078 - 3035 + blk_mq_kick_requeue_list(ub->ub_disk->queue); 3079 3036 ret = 0; 3080 3037 out_unlock: 3081 3038 mutex_unlock(&ub->mutex);
+2 -3
drivers/md/md-bitmap.c
··· 2357 2357 2358 2358 if (!bitmap) 2359 2359 return -ENOENT; 2360 - if (bitmap->mddev->bitmap_info.external) 2361 - return -ENOENT; 2362 - if (!bitmap->storage.sb_page) /* no superblock */ 2360 + if (!bitmap->mddev->bitmap_info.external && 2361 + !bitmap->storage.sb_page) 2363 2362 return -EINVAL; 2364 2363 sb = kmap_local_page(bitmap->storage.sb_page); 2365 2364 stats->sync_size = le64_to_cpu(sb->sync_size);
+16 -10
drivers/md/raid1.c
··· 2200 2200 if (!rdev_set_badblocks(rdev, sect, s, 0)) 2201 2201 abort = 1; 2202 2202 } 2203 - if (abort) { 2204 - conf->recovery_disabled = 2205 - mddev->recovery_disabled; 2206 - set_bit(MD_RECOVERY_INTR, &mddev->recovery); 2207 - md_done_sync(mddev, r1_bio->sectors, 0); 2208 - put_buf(r1_bio); 2203 + if (abort) 2209 2204 return 0; 2210 - } 2205 + 2211 2206 /* Try next page */ 2212 2207 sectors -= s; 2213 2208 sect += s; ··· 2341 2346 int disks = conf->raid_disks * 2; 2342 2347 struct bio *wbio; 2343 2348 2344 - if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) 2345 - /* ouch - failed to read all of that. */ 2346 - if (!fix_sync_read_error(r1_bio)) 2349 + if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) { 2350 + /* 2351 + * ouch - failed to read all of that. 2352 + * No need to fix read error for check/repair 2353 + * because all member disks are read. 2354 + */ 2355 + if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) || 2356 + !fix_sync_read_error(r1_bio)) { 2357 + conf->recovery_disabled = mddev->recovery_disabled; 2358 + set_bit(MD_RECOVERY_INTR, &mddev->recovery); 2359 + md_done_sync(mddev, r1_bio->sectors, 0); 2360 + put_buf(r1_bio); 2347 2361 return; 2362 + } 2363 + } 2348 2364 2349 2365 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) 2350 2366 process_checks(r1_bio);
+1
drivers/md/raid10.c
··· 1735 1735 * The discard bio returns only first r10bio finishes 1736 1736 */ 1737 1737 if (first_copy) { 1738 + md_account_bio(mddev, &bio); 1738 1739 r10_bio->master_bio = bio; 1739 1740 set_bit(R10BIO_Discard, &r10_bio->state); 1740 1741 first_copy = false;
+1 -1
drivers/nvme/host/core.c
··· 4300 4300 if (test_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) 4301 4301 nvme_queue_scan(ctrl); 4302 4302 #ifdef CONFIG_NVME_MULTIPATH 4303 - else 4303 + else if (ctrl->ana_log_buf) 4304 4304 /* Re-read the ANA log page to not miss updates */ 4305 4305 queue_work(nvme_wq, &ctrl->ana_work); 4306 4306 #endif
+7 -7
drivers/nvme/host/multipath.c
··· 1051 1051 1052 1052 list_for_each_entry_rcu(ns, &head->list, siblings) { 1053 1053 /* 1054 + * Ensure that ns path disk node is already added otherwise we 1055 + * may get invalid kobj name for target 1056 + */ 1057 + if (!test_bit(GD_ADDED, &ns->disk->state)) 1058 + continue; 1059 + 1060 + /* 1054 1061 * Avoid creating link if it already exists for the given path. 1055 1062 * When path ana state transitions from optimized to non- 1056 1063 * optimized or vice-versa, the nvme_mpath_set_live() is ··· 1070 1063 * against multiple nvme paths being simultaneously added. 1071 1064 */ 1072 1065 if (test_and_set_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags)) 1073 - continue; 1074 - 1075 - /* 1076 - * Ensure that ns path disk node is already added otherwise we 1077 - * may get invalid kobj name for target 1078 - */ 1079 - if (!test_bit(GD_ADDED, &ns->disk->state)) 1080 1066 continue; 1081 1067 1082 1068 target = disk_to_dev(ns->disk);
+1 -1
drivers/nvme/target/auth.c
··· 240 240 { 241 241 cancel_delayed_work(&sq->auth_expired_work); 242 242 #ifdef CONFIG_NVME_TARGET_TCP_TLS 243 - sq->tls_key = 0; 243 + sq->tls_key = NULL; 244 244 #endif 245 245 kfree(sq->dhchap_c1); 246 246 sq->dhchap_c1 = NULL;
+3
drivers/nvme/target/core.c
··· 324 324 325 325 lockdep_assert_held(&nvmet_config_sem); 326 326 327 + if (port->disc_addr.trtype == NVMF_TRTYPE_MAX) 328 + return -EINVAL; 329 + 327 330 ops = nvmet_transports[port->disc_addr.trtype]; 328 331 if (!ops) { 329 332 up_write(&nvmet_config_sem);
+58 -30
drivers/nvme/target/pci-epf.c
··· 1648 1648 { 1649 1649 struct nvmet_pci_epf_iod *iod; 1650 1650 int ret, n = 0; 1651 + u16 head = sq->head; 1651 1652 1652 1653 sq->tail = nvmet_pci_epf_bar_read32(ctrl, sq->db); 1653 - while (sq->head != sq->tail && (!ctrl->sq_ab || n < ctrl->sq_ab)) { 1654 + while (head != sq->tail && (!ctrl->sq_ab || n < ctrl->sq_ab)) { 1654 1655 iod = nvmet_pci_epf_alloc_iod(sq); 1655 1656 if (!iod) 1656 1657 break; 1657 1658 1658 1659 /* Get the NVMe command submitted by the host. */ 1659 1660 ret = nvmet_pci_epf_transfer(ctrl, &iod->cmd, 1660 - sq->pci_addr + sq->head * sq->qes, 1661 + sq->pci_addr + head * sq->qes, 1661 1662 sq->qes, DMA_FROM_DEVICE); 1662 1663 if (ret) { 1663 1664 /* Not much we can do... */ ··· 1667 1666 } 1668 1667 1669 1668 dev_dbg(ctrl->dev, "SQ[%u]: head %u, tail %u, command %s\n", 1670 - sq->qid, sq->head, sq->tail, 1669 + sq->qid, head, sq->tail, 1671 1670 nvmet_pci_epf_iod_name(iod)); 1672 1671 1673 - sq->head++; 1674 - if (sq->head == sq->depth) 1675 - sq->head = 0; 1672 + head++; 1673 + if (head == sq->depth) 1674 + head = 0; 1675 + WRITE_ONCE(sq->head, head); 1676 1676 n++; 1677 1677 1678 1678 queue_work_on(WORK_CPU_UNBOUND, sq->iod_wq, &iod->work); ··· 1763 1761 if (!iod) 1764 1762 break; 1765 1763 1766 - /* Post the IOD completion entry. */ 1764 + /* 1765 + * Post the IOD completion entry. If the IOD request was 1766 + * executed (req->execute() called), the CQE is already 1767 + * initialized. However, the IOD may have been failed before 1768 + * that, leaving the CQE not properly initialized. So always 1769 + * initialize it here. 1770 + */ 1767 1771 cqe = &iod->cqe; 1772 + cqe->sq_head = cpu_to_le16(READ_ONCE(iod->sq->head)); 1773 + cqe->sq_id = cpu_to_le16(iod->sq->qid); 1774 + cqe->command_id = iod->cmd.common.command_id; 1768 1775 cqe->status = cpu_to_le16((iod->status << 1) | cq->phase); 1769 1776 1770 1777 dev_dbg(ctrl->dev, ··· 1809 1798 if (ret < 0) 1810 1799 queue_delayed_work(system_highpri_wq, &cq->work, 1811 1800 NVMET_PCI_EPF_CQ_RETRY_INTERVAL); 1801 + } 1802 + 1803 + static void nvmet_pci_epf_clear_ctrl_config(struct nvmet_pci_epf_ctrl *ctrl) 1804 + { 1805 + struct nvmet_ctrl *tctrl = ctrl->tctrl; 1806 + 1807 + /* Initialize controller status. */ 1808 + tctrl->csts = 0; 1809 + ctrl->csts = 0; 1810 + nvmet_pci_epf_bar_write32(ctrl, NVME_REG_CSTS, ctrl->csts); 1811 + 1812 + /* Initialize controller configuration and start polling. */ 1813 + tctrl->cc = 0; 1814 + ctrl->cc = 0; 1815 + nvmet_pci_epf_bar_write32(ctrl, NVME_REG_CC, ctrl->cc); 1812 1816 } 1813 1817 1814 1818 static int nvmet_pci_epf_enable_ctrl(struct nvmet_pci_epf_ctrl *ctrl) ··· 1891 1865 return 0; 1892 1866 1893 1867 err: 1894 - ctrl->csts = 0; 1868 + nvmet_pci_epf_clear_ctrl_config(ctrl); 1895 1869 return -EINVAL; 1896 1870 } 1897 1871 1898 - static void nvmet_pci_epf_disable_ctrl(struct nvmet_pci_epf_ctrl *ctrl) 1872 + static void nvmet_pci_epf_disable_ctrl(struct nvmet_pci_epf_ctrl *ctrl, 1873 + bool shutdown) 1899 1874 { 1900 1875 int qid; 1901 1876 1902 1877 if (!ctrl->enabled) 1903 1878 return; 1904 1879 1905 - dev_info(ctrl->dev, "Disabling controller\n"); 1880 + dev_info(ctrl->dev, "%s controller\n", 1881 + shutdown ? "Shutting down" : "Disabling"); 1906 1882 1907 1883 ctrl->enabled = false; 1908 1884 cancel_delayed_work_sync(&ctrl->poll_sqs); ··· 1921 1893 nvmet_pci_epf_delete_cq(ctrl->tctrl, 0); 1922 1894 1923 1895 ctrl->csts &= ~NVME_CSTS_RDY; 1896 + if (shutdown) { 1897 + ctrl->csts |= NVME_CSTS_SHST_CMPLT; 1898 + ctrl->cc &= ~NVME_CC_ENABLE; 1899 + nvmet_pci_epf_bar_write32(ctrl, NVME_REG_CC, ctrl->cc); 1900 + } 1924 1901 } 1925 1902 1926 1903 static void nvmet_pci_epf_poll_cc_work(struct work_struct *work) ··· 1952 1919 } 1953 1920 1954 1921 if (!nvmet_cc_en(new_cc) && nvmet_cc_en(old_cc)) 1955 - nvmet_pci_epf_disable_ctrl(ctrl); 1922 + nvmet_pci_epf_disable_ctrl(ctrl, false); 1956 1923 1957 - if (nvmet_cc_shn(new_cc) && !nvmet_cc_shn(old_cc)) { 1958 - nvmet_pci_epf_disable_ctrl(ctrl); 1959 - ctrl->csts |= NVME_CSTS_SHST_CMPLT; 1960 - } 1924 + if (nvmet_cc_shn(new_cc) && !nvmet_cc_shn(old_cc)) 1925 + nvmet_pci_epf_disable_ctrl(ctrl, true); 1961 1926 1962 1927 if (!nvmet_cc_shn(new_cc) && nvmet_cc_shn(old_cc)) 1963 1928 ctrl->csts &= ~NVME_CSTS_SHST_CMPLT; ··· 1994 1963 /* Clear Controller Memory Buffer Supported (CMBS). */ 1995 1964 ctrl->cap &= ~(0x1ULL << 57); 1996 1965 1997 - /* Controller configuration. */ 1998 - ctrl->cc = tctrl->cc & (~NVME_CC_ENABLE); 1999 - 2000 - /* Controller status. */ 2001 - ctrl->csts = ctrl->tctrl->csts; 2002 - 2003 1966 nvmet_pci_epf_bar_write64(ctrl, NVME_REG_CAP, ctrl->cap); 2004 1967 nvmet_pci_epf_bar_write32(ctrl, NVME_REG_VS, tctrl->subsys->ver); 2005 - nvmet_pci_epf_bar_write32(ctrl, NVME_REG_CSTS, ctrl->csts); 2006 - nvmet_pci_epf_bar_write32(ctrl, NVME_REG_CC, ctrl->cc); 1968 + 1969 + nvmet_pci_epf_clear_ctrl_config(ctrl); 2007 1970 } 2008 1971 2009 1972 static int nvmet_pci_epf_create_ctrl(struct nvmet_pci_epf *nvme_epf, ··· 2095 2070 2096 2071 static void nvmet_pci_epf_start_ctrl(struct nvmet_pci_epf_ctrl *ctrl) 2097 2072 { 2073 + 2074 + dev_info(ctrl->dev, "PCI link up\n"); 2075 + ctrl->link_up = true; 2076 + 2098 2077 schedule_delayed_work(&ctrl->poll_cc, NVMET_PCI_EPF_CC_POLL_INTERVAL); 2099 2078 } 2100 2079 2101 2080 static void nvmet_pci_epf_stop_ctrl(struct nvmet_pci_epf_ctrl *ctrl) 2102 2081 { 2082 + dev_info(ctrl->dev, "PCI link down\n"); 2083 + ctrl->link_up = false; 2084 + 2103 2085 cancel_delayed_work_sync(&ctrl->poll_cc); 2104 2086 2105 - nvmet_pci_epf_disable_ctrl(ctrl); 2087 + nvmet_pci_epf_disable_ctrl(ctrl, false); 2088 + nvmet_pci_epf_clear_ctrl_config(ctrl); 2106 2089 } 2107 2090 2108 2091 static void nvmet_pci_epf_destroy_ctrl(struct nvmet_pci_epf_ctrl *ctrl) ··· 2333 2300 if (ret) 2334 2301 goto out_clear_bar; 2335 2302 2336 - if (!epc_features->linkup_notifier) { 2337 - ctrl->link_up = true; 2303 + if (!epc_features->linkup_notifier) 2338 2304 nvmet_pci_epf_start_ctrl(&nvme_epf->ctrl); 2339 - } 2340 2305 2341 2306 return 0; 2342 2307 ··· 2350 2319 struct nvmet_pci_epf *nvme_epf = epf_get_drvdata(epf); 2351 2320 struct nvmet_pci_epf_ctrl *ctrl = &nvme_epf->ctrl; 2352 2321 2353 - ctrl->link_up = false; 2354 2322 nvmet_pci_epf_destroy_ctrl(ctrl); 2355 2323 2356 2324 nvmet_pci_epf_deinit_dma(nvme_epf); ··· 2361 2331 struct nvmet_pci_epf *nvme_epf = epf_get_drvdata(epf); 2362 2332 struct nvmet_pci_epf_ctrl *ctrl = &nvme_epf->ctrl; 2363 2333 2364 - ctrl->link_up = true; 2365 2334 nvmet_pci_epf_start_ctrl(ctrl); 2366 2335 2367 2336 return 0; ··· 2371 2342 struct nvmet_pci_epf *nvme_epf = epf_get_drvdata(epf); 2372 2343 struct nvmet_pci_epf_ctrl *ctrl = &nvme_epf->ctrl; 2373 2344 2374 - ctrl->link_up = false; 2375 2345 nvmet_pci_epf_stop_ctrl(ctrl); 2376 2346 2377 2347 return 0;
+1 -4
include/linux/blkdev.h
··· 1614 1614 return bio_end_io_acct_remapped(bio, start_time, bio->bi_bdev); 1615 1615 } 1616 1616 1617 + int bdev_validate_blocksize(struct block_device *bdev, int block_size); 1617 1618 int set_blocksize(struct file *file, int size); 1618 1619 1619 1620 int lookup_bdev(const char *pathname, dev_t *dev); ··· 1670 1669 int bd_prepare_to_claim(struct block_device *bdev, void *holder, 1671 1670 const struct blk_holder_ops *hops); 1672 1671 void bd_abort_claiming(struct block_device *bdev, void *holder); 1673 - 1674 - /* just for blk-cgroup, don't use elsewhere */ 1675 - struct block_device *blkdev_get_no_open(dev_t dev); 1676 - void blkdev_put_no_open(struct block_device *bdev); 1677 1672 1678 1673 struct block_device *I_BDEV(struct inode *inode); 1679 1674 struct block_device *file_bdev(struct file *bdev_file);
+8 -1
tools/testing/selftests/ublk/Makefile
··· 6 6 TEST_PROGS := test_generic_01.sh 7 7 TEST_PROGS += test_generic_02.sh 8 8 TEST_PROGS += test_generic_03.sh 9 + TEST_PROGS += test_generic_04.sh 10 + TEST_PROGS += test_generic_05.sh 11 + TEST_PROGS += test_generic_06.sh 9 12 10 13 TEST_PROGS += test_null_01.sh 11 14 TEST_PROGS += test_null_02.sh ··· 24 21 25 22 TEST_PROGS += test_stress_01.sh 26 23 TEST_PROGS += test_stress_02.sh 24 + TEST_PROGS += test_stress_03.sh 25 + TEST_PROGS += test_stress_04.sh 26 + TEST_PROGS += test_stress_05.sh 27 27 28 28 TEST_GEN_PROGS_EXTENDED = kublk 29 29 30 30 include ../lib.mk 31 31 32 - $(TEST_GEN_PROGS_EXTENDED): kublk.c null.c file_backed.c common.c stripe.c 32 + $(TEST_GEN_PROGS_EXTENDED): kublk.c null.c file_backed.c common.c stripe.c \ 33 + fault_inject.c 33 34 34 35 check: 35 36 shellcheck -x -f gcc *.sh
+98
tools/testing/selftests/ublk/fault_inject.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + /* 4 + * Fault injection ublk target. Hack this up however you like for 5 + * testing specific behaviors of ublk_drv. Currently is a null target 6 + * with a configurable delay before completing each I/O. This delay can 7 + * be used to test ublk_drv's handling of I/O outstanding to the ublk 8 + * server when it dies. 9 + */ 10 + 11 + #include "kublk.h" 12 + 13 + static int ublk_fault_inject_tgt_init(const struct dev_ctx *ctx, 14 + struct ublk_dev *dev) 15 + { 16 + const struct ublksrv_ctrl_dev_info *info = &dev->dev_info; 17 + unsigned long dev_size = 250UL << 30; 18 + 19 + dev->tgt.dev_size = dev_size; 20 + dev->tgt.params = (struct ublk_params) { 21 + .types = UBLK_PARAM_TYPE_BASIC, 22 + .basic = { 23 + .logical_bs_shift = 9, 24 + .physical_bs_shift = 12, 25 + .io_opt_shift = 12, 26 + .io_min_shift = 9, 27 + .max_sectors = info->max_io_buf_bytes >> 9, 28 + .dev_sectors = dev_size >> 9, 29 + }, 30 + }; 31 + 32 + dev->private_data = (void *)(unsigned long)(ctx->fault_inject.delay_us * 1000); 33 + return 0; 34 + } 35 + 36 + static int ublk_fault_inject_queue_io(struct ublk_queue *q, int tag) 37 + { 38 + const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); 39 + struct io_uring_sqe *sqe; 40 + struct __kernel_timespec ts = { 41 + .tv_nsec = (long long)q->dev->private_data, 42 + }; 43 + 44 + ublk_queue_alloc_sqes(q, &sqe, 1); 45 + io_uring_prep_timeout(sqe, &ts, 1, 0); 46 + sqe->user_data = build_user_data(tag, ublksrv_get_op(iod), 0, 1); 47 + 48 + ublk_queued_tgt_io(q, tag, 1); 49 + 50 + return 0; 51 + } 52 + 53 + static void ublk_fault_inject_tgt_io_done(struct ublk_queue *q, int tag, 54 + const struct io_uring_cqe *cqe) 55 + { 56 + const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); 57 + 58 + if (cqe->res != -ETIME) 59 + ublk_err("%s: unexpected cqe res %d\n", __func__, cqe->res); 60 + 61 + if (ublk_completed_tgt_io(q, tag)) 62 + ublk_complete_io(q, tag, iod->nr_sectors << 9); 63 + else 64 + ublk_err("%s: io not complete after 1 cqe\n", __func__); 65 + } 66 + 67 + static void ublk_fault_inject_cmd_line(struct dev_ctx *ctx, int argc, char *argv[]) 68 + { 69 + static const struct option longopts[] = { 70 + { "delay_us", 1, NULL, 0 }, 71 + { 0, 0, 0, 0 } 72 + }; 73 + int option_idx, opt; 74 + 75 + ctx->fault_inject.delay_us = 0; 76 + while ((opt = getopt_long(argc, argv, "", 77 + longopts, &option_idx)) != -1) { 78 + switch (opt) { 79 + case 0: 80 + if (!strcmp(longopts[option_idx].name, "delay_us")) 81 + ctx->fault_inject.delay_us = strtoll(optarg, NULL, 10); 82 + } 83 + } 84 + } 85 + 86 + static void ublk_fault_inject_usage(const struct ublk_tgt_ops *ops) 87 + { 88 + printf("\tfault_inject: [--delay_us us (default 0)]\n"); 89 + } 90 + 91 + const struct ublk_tgt_ops fault_inject_tgt_ops = { 92 + .name = "fault_inject", 93 + .init_tgt = ublk_fault_inject_tgt_init, 94 + .queue_io = ublk_fault_inject_queue_io, 95 + .tgt_io_done = ublk_fault_inject_tgt_io_done, 96 + .parse_cmd_line = ublk_fault_inject_cmd_line, 97 + .usage = ublk_fault_inject_usage, 98 + };
+315 -29
tools/testing/selftests/ublk/kublk.c
··· 5 5 6 6 #include "kublk.h" 7 7 8 + #define MAX_NR_TGT_ARG 64 9 + 8 10 unsigned int ublk_dbg_mask = UBLK_LOG; 9 11 static const struct ublk_tgt_ops *tgt_ops_list[] = { 10 12 &null_tgt_ops, 11 13 &loop_tgt_ops, 12 14 &stripe_tgt_ops, 15 + &fault_inject_tgt_ops, 13 16 }; 14 17 15 18 static const struct ublk_tgt_ops *ublk_find_tgt(const char *name) 16 19 { 17 - const struct ublk_tgt_ops *ops; 18 20 int i; 19 21 20 22 if (name == NULL) 21 23 return NULL; 22 24 23 - for (i = 0; sizeof(tgt_ops_list) / sizeof(ops); i++) 25 + for (i = 0; i < ARRAY_SIZE(tgt_ops_list); i++) 24 26 if (strcmp(tgt_ops_list[i]->name, name) == 0) 25 27 return tgt_ops_list[i]; 26 28 return NULL; ··· 120 118 return __ublk_ctrl_cmd(dev, &data); 121 119 } 122 120 121 + static int ublk_ctrl_start_user_recovery(struct ublk_dev *dev) 122 + { 123 + struct ublk_ctrl_cmd_data data = { 124 + .cmd_op = UBLK_U_CMD_START_USER_RECOVERY, 125 + }; 126 + 127 + return __ublk_ctrl_cmd(dev, &data); 128 + } 129 + 130 + static int ublk_ctrl_end_user_recovery(struct ublk_dev *dev, int daemon_pid) 131 + { 132 + struct ublk_ctrl_cmd_data data = { 133 + .cmd_op = UBLK_U_CMD_END_USER_RECOVERY, 134 + .flags = CTRL_CMD_HAS_DATA, 135 + }; 136 + 137 + dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid; 138 + 139 + return __ublk_ctrl_cmd(dev, &data); 140 + } 141 + 123 142 static int ublk_ctrl_add_dev(struct ublk_dev *dev) 124 143 { 125 144 struct ublk_ctrl_cmd_data data = { ··· 230 207 }; 231 208 } 232 209 210 + static void ublk_print_cpu_set(const cpu_set_t *set, char *buf, unsigned len) 211 + { 212 + unsigned done = 0; 213 + int i; 214 + 215 + for (i = 0; i < CPU_SETSIZE; i++) { 216 + if (CPU_ISSET(i, set)) 217 + done += snprintf(&buf[done], len - done, "%d ", i); 218 + } 219 + } 220 + 221 + static void ublk_adjust_affinity(cpu_set_t *set) 222 + { 223 + int j, updated = 0; 224 + 225 + /* 226 + * Just keep the 1st CPU now. 227 + * 228 + * In future, auto affinity selection can be tried. 229 + */ 230 + for (j = 0; j < CPU_SETSIZE; j++) { 231 + if (CPU_ISSET(j, set)) { 232 + if (!updated) { 233 + updated = 1; 234 + continue; 235 + } 236 + CPU_CLR(j, set); 237 + } 238 + } 239 + } 240 + 241 + /* Caller must free the allocated buffer */ 242 + static int ublk_ctrl_get_affinity(struct ublk_dev *ctrl_dev, cpu_set_t **ptr_buf) 243 + { 244 + struct ublk_ctrl_cmd_data data = { 245 + .cmd_op = UBLK_U_CMD_GET_QUEUE_AFFINITY, 246 + .flags = CTRL_CMD_HAS_DATA | CTRL_CMD_HAS_BUF, 247 + }; 248 + cpu_set_t *buf; 249 + int i, ret; 250 + 251 + buf = malloc(sizeof(cpu_set_t) * ctrl_dev->dev_info.nr_hw_queues); 252 + if (!buf) 253 + return -ENOMEM; 254 + 255 + for (i = 0; i < ctrl_dev->dev_info.nr_hw_queues; i++) { 256 + data.data[0] = i; 257 + data.len = sizeof(cpu_set_t); 258 + data.addr = (__u64)&buf[i]; 259 + 260 + ret = __ublk_ctrl_cmd(ctrl_dev, &data); 261 + if (ret < 0) { 262 + free(buf); 263 + return ret; 264 + } 265 + ublk_adjust_affinity(&buf[i]); 266 + } 267 + 268 + *ptr_buf = buf; 269 + return 0; 270 + } 271 + 233 272 static void ublk_ctrl_dump(struct ublk_dev *dev) 234 273 { 235 274 struct ublksrv_ctrl_dev_info *info = &dev->dev_info; 236 275 struct ublk_params p; 276 + cpu_set_t *affinity; 237 277 int ret; 238 278 239 279 ret = ublk_ctrl_get_params(dev, &p); 240 280 if (ret < 0) { 241 281 ublk_err("failed to get params %d %s\n", ret, strerror(-ret)); 282 + return; 283 + } 284 + 285 + ret = ublk_ctrl_get_affinity(dev, &affinity); 286 + if (ret < 0) { 287 + ublk_err("failed to get affinity %m\n"); 242 288 return; 243 289 } 244 290 ··· 317 225 ublk_log("\tmax rq size %d daemon pid %d flags 0x%llx state %s\n", 318 226 info->max_io_buf_bytes, info->ublksrv_pid, info->flags, 319 227 ublk_dev_state_desc(dev)); 228 + 229 + if (affinity) { 230 + char buf[512]; 231 + int i; 232 + 233 + for (i = 0; i < info->nr_hw_queues; i++) { 234 + ublk_print_cpu_set(&affinity[i], buf, sizeof(buf)); 235 + printf("\tqueue %u: tid %d affinity(%s)\n", 236 + i, dev->q[i].tid, buf); 237 + } 238 + free(affinity); 239 + } 240 + 320 241 fflush(stdout); 321 242 } 322 243 ··· 452 347 } 453 348 454 349 ret = ublk_setup_ring(&q->ring, ring_depth, cq_depth, 455 - IORING_SETUP_COOP_TASKRUN); 350 + IORING_SETUP_COOP_TASKRUN | 351 + IORING_SETUP_SINGLE_ISSUER | 352 + IORING_SETUP_DEFER_TASKRUN); 456 353 if (ret < 0) { 457 354 ublk_err("ublk dev %d queue %d setup io_uring failed %d\n", 458 355 q->dev->dev_info.dev_id, q->q_id, ret); ··· 709 602 return reapped; 710 603 } 711 604 605 + static void ublk_queue_set_sched_affinity(const struct ublk_queue *q, 606 + cpu_set_t *cpuset) 607 + { 608 + if (sched_setaffinity(0, sizeof(*cpuset), cpuset) < 0) 609 + ublk_err("ublk dev %u queue %u set affinity failed", 610 + q->dev->dev_info.dev_id, q->q_id); 611 + } 612 + 613 + struct ublk_queue_info { 614 + struct ublk_queue *q; 615 + sem_t *queue_sem; 616 + cpu_set_t *affinity; 617 + }; 618 + 712 619 static void *ublk_io_handler_fn(void *data) 713 620 { 714 - struct ublk_queue *q = data; 621 + struct ublk_queue_info *info = data; 622 + struct ublk_queue *q = info->q; 715 623 int dev_id = q->dev->dev_info.dev_id; 716 624 int ret; 717 625 ··· 736 614 dev_id, q->q_id); 737 615 return NULL; 738 616 } 617 + /* IO perf is sensitive with queue pthread affinity on NUMA machine*/ 618 + ublk_queue_set_sched_affinity(q, info->affinity); 619 + sem_post(info->queue_sem); 620 + 739 621 ublk_dbg(UBLK_DBG_QUEUE, "tid %d: ublk dev %d queue %d started\n", 740 622 q->tid, dev_id, q->q_id); 741 623 ··· 765 639 dev->dev_info.dev_id, ret); 766 640 } 767 641 768 - static int ublk_send_dev_event(const struct dev_ctx *ctx, int dev_id) 642 + static int ublk_send_dev_event(const struct dev_ctx *ctx, struct ublk_dev *dev, int dev_id) 769 643 { 770 644 uint64_t id; 771 645 int evtfd = ctx->_evtfd; ··· 778 652 else 779 653 id = ERROR_EVTFD_DEVID; 780 654 655 + if (dev && ctx->shadow_dev) 656 + memcpy(&ctx->shadow_dev->q, &dev->q, sizeof(dev->q)); 657 + 781 658 if (write(evtfd, &id, sizeof(id)) != sizeof(id)) 782 659 return -EINVAL; 660 + 661 + close(evtfd); 662 + shmdt(ctx->shadow_dev); 783 663 784 664 return 0; 785 665 } ··· 793 661 794 662 static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev) 795 663 { 796 - int ret, i; 797 - void *thread_ret; 798 664 const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info; 665 + struct ublk_queue_info *qinfo; 666 + cpu_set_t *affinity_buf; 667 + void *thread_ret; 668 + sem_t queue_sem; 669 + int ret, i; 799 670 800 671 ublk_dbg(UBLK_DBG_DEV, "%s enter\n", __func__); 801 672 673 + qinfo = (struct ublk_queue_info *)calloc(sizeof(struct ublk_queue_info), 674 + dinfo->nr_hw_queues); 675 + if (!qinfo) 676 + return -ENOMEM; 677 + 678 + sem_init(&queue_sem, 0, 0); 802 679 ret = ublk_dev_prep(ctx, dev); 680 + if (ret) 681 + return ret; 682 + 683 + ret = ublk_ctrl_get_affinity(dev, &affinity_buf); 803 684 if (ret) 804 685 return ret; 805 686 806 687 for (i = 0; i < dinfo->nr_hw_queues; i++) { 807 688 dev->q[i].dev = dev; 808 689 dev->q[i].q_id = i; 690 + 691 + qinfo[i].q = &dev->q[i]; 692 + qinfo[i].queue_sem = &queue_sem; 693 + qinfo[i].affinity = &affinity_buf[i]; 809 694 pthread_create(&dev->q[i].thread, NULL, 810 695 ublk_io_handler_fn, 811 - &dev->q[i]); 696 + &qinfo[i]); 812 697 } 813 698 699 + for (i = 0; i < dinfo->nr_hw_queues; i++) 700 + sem_wait(&queue_sem); 701 + free(qinfo); 702 + free(affinity_buf); 703 + 814 704 /* everything is fine now, start us */ 815 - ublk_set_parameters(dev); 816 - ret = ublk_ctrl_start_dev(dev, getpid()); 705 + if (ctx->recovery) 706 + ret = ublk_ctrl_end_user_recovery(dev, getpid()); 707 + else { 708 + ublk_set_parameters(dev); 709 + ret = ublk_ctrl_start_dev(dev, getpid()); 710 + } 817 711 if (ret < 0) { 818 712 ublk_err("%s: ublk_ctrl_start_dev failed: %d\n", __func__, ret); 819 713 goto fail; ··· 849 691 if (ctx->fg) 850 692 ublk_ctrl_dump(dev); 851 693 else 852 - ublk_send_dev_event(ctx, dev->dev_info.dev_id); 694 + ublk_send_dev_event(ctx, dev, dev->dev_info.dev_id); 853 695 854 696 /* wait until we are terminated */ 855 697 for (i = 0; i < dinfo->nr_hw_queues; i++) ··· 1014 856 } 1015 857 } 1016 858 1017 - ret = ublk_ctrl_add_dev(dev); 859 + if (ctx->recovery) 860 + ret = ublk_ctrl_start_user_recovery(dev); 861 + else 862 + ret = ublk_ctrl_add_dev(dev); 1018 863 if (ret < 0) { 1019 864 ublk_err("%s: can't add dev id %d, type %s ret %d\n", 1020 865 __func__, dev_id, tgt_type, ret); ··· 1031 870 1032 871 fail: 1033 872 if (ret < 0) 1034 - ublk_send_dev_event(ctx, -1); 873 + ublk_send_dev_event(ctx, dev, -1); 1035 874 ublk_ctrl_deinit(dev); 1036 875 return ret; 1037 876 } ··· 1045 884 if (ctx->fg) 1046 885 goto run; 1047 886 887 + ctx->_shmid = shmget(IPC_PRIVATE, sizeof(struct ublk_dev), IPC_CREAT | 0666); 888 + if (ctx->_shmid < 0) { 889 + ublk_err("%s: failed to shmget %s\n", __func__, strerror(errno)); 890 + exit(-1); 891 + } 892 + ctx->shadow_dev = (struct ublk_dev *)shmat(ctx->_shmid, NULL, 0); 893 + if (ctx->shadow_dev == (struct ublk_dev *)-1) { 894 + ublk_err("%s: failed to shmat %s\n", __func__, strerror(errno)); 895 + exit(-1); 896 + } 1048 897 ctx->_evtfd = eventfd(0, 0); 1049 898 if (ctx->_evtfd < 0) { 1050 899 ublk_err("%s: failed to create eventfd %s\n", __func__, strerror(errno)); 1051 900 exit(-1); 1052 901 } 1053 902 1054 - setsid(); 1055 903 res = fork(); 1056 904 if (res == 0) { 905 + int res2; 906 + 907 + setsid(); 908 + res2 = fork(); 909 + if (res2 == 0) { 910 + /* prepare for detaching */ 911 + close(STDIN_FILENO); 912 + close(STDOUT_FILENO); 913 + close(STDERR_FILENO); 1057 914 run: 1058 - res = __cmd_dev_add(ctx); 1059 - return res; 915 + res = __cmd_dev_add(ctx); 916 + return res; 917 + } else { 918 + /* detached from the foreground task */ 919 + exit(EXIT_SUCCESS); 920 + } 1060 921 } else if (res > 0) { 1061 922 uint64_t id; 923 + int exit_code = EXIT_FAILURE; 1062 924 1063 925 res = read(ctx->_evtfd, &id, sizeof(id)); 1064 926 close(ctx->_evtfd); 1065 927 if (res == sizeof(id) && id != ERROR_EVTFD_DEVID) { 1066 928 ctx->dev_id = id - 1; 1067 - return __cmd_dev_list(ctx); 929 + if (__cmd_dev_list(ctx) >= 0) 930 + exit_code = EXIT_SUCCESS; 1068 931 } 1069 - exit(EXIT_FAILURE); 932 + shmdt(ctx->shadow_dev); 933 + shmctl(ctx->_shmid, IPC_RMID, NULL); 934 + /* wait for child and detach from it */ 935 + wait(NULL); 936 + exit(exit_code); 1070 937 } else { 1071 - return res; 938 + exit(EXIT_FAILURE); 1072 939 } 1073 940 } 1074 941 ··· 1158 969 ublk_err("%s: can't get dev info from %d: %d\n", 1159 970 __func__, ctx->dev_id, ret); 1160 971 } else { 972 + if (ctx->shadow_dev) 973 + memcpy(&dev->q, ctx->shadow_dev->q, sizeof(dev->q)); 974 + 1161 975 ublk_ctrl_dump(dev); 1162 976 } 1163 977 ··· 1231 1039 return ret; 1232 1040 } 1233 1041 1042 + static void __cmd_create_help(char *exe, bool recovery) 1043 + { 1044 + int i; 1045 + 1046 + printf("%s %s -t [null|loop|stripe|fault_inject] [-q nr_queues] [-d depth] [-n dev_id]\n", 1047 + exe, recovery ? "recover" : "add"); 1048 + printf("\t[--foreground] [--quiet] [-z] [--debug_mask mask] [-r 0|1 ] [-g 0|1]\n"); 1049 + printf("\t[-e 0|1 ] [-i 0|1]\n"); 1050 + printf("\t[target options] [backfile1] [backfile2] ...\n"); 1051 + printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n"); 1052 + 1053 + for (i = 0; i < sizeof(tgt_ops_list) / sizeof(tgt_ops_list[0]); i++) { 1054 + const struct ublk_tgt_ops *ops = tgt_ops_list[i]; 1055 + 1056 + if (ops->usage) 1057 + ops->usage(ops); 1058 + } 1059 + } 1060 + 1061 + static void cmd_add_help(char *exe) 1062 + { 1063 + __cmd_create_help(exe, false); 1064 + printf("\n"); 1065 + } 1066 + 1067 + static void cmd_recover_help(char *exe) 1068 + { 1069 + __cmd_create_help(exe, true); 1070 + printf("\tPlease provide exact command line for creating this device with real dev_id\n"); 1071 + printf("\n"); 1072 + } 1073 + 1234 1074 static int cmd_dev_help(char *exe) 1235 1075 { 1236 - printf("%s add -t [null|loop] [-q nr_queues] [-d depth] [-n dev_id] [backfile1] [backfile2] ...\n", exe); 1237 - printf("\t default: nr_queues=2(max 4), depth=128(max 128), dev_id=-1(auto allocation)\n"); 1076 + cmd_add_help(exe); 1077 + cmd_recover_help(exe); 1078 + 1238 1079 printf("%s del [-n dev_id] -a \n", exe); 1239 - printf("\t -a delete all devices -n delete specified device\n"); 1080 + printf("\t -a delete all devices -n delete specified device\n\n"); 1240 1081 printf("%s list [-n dev_id] -a \n", exe); 1241 - printf("\t -a list all devices, -n list specified device, default -a \n"); 1082 + printf("\t -a list all devices, -n list specified device, default -a \n\n"); 1242 1083 printf("%s features\n", exe); 1243 1084 return 0; 1244 1085 } ··· 1288 1063 { "quiet", 0, NULL, 0 }, 1289 1064 { "zero_copy", 0, NULL, 'z' }, 1290 1065 { "foreground", 0, NULL, 0 }, 1291 - { "chunk_size", 1, NULL, 0 }, 1066 + { "recovery", 1, NULL, 'r' }, 1067 + { "recovery_fail_io", 1, NULL, 'e'}, 1068 + { "recovery_reissue", 1, NULL, 'i'}, 1069 + { "get_data", 1, NULL, 'g'}, 1292 1070 { 0, 0, 0, 0 } 1293 1071 }; 1072 + const struct ublk_tgt_ops *ops = NULL; 1294 1073 int option_idx, opt; 1295 1074 const char *cmd = argv[1]; 1296 1075 struct dev_ctx ctx = { ··· 1302 1073 .nr_hw_queues = 2, 1303 1074 .dev_id = -1, 1304 1075 .tgt_type = "unknown", 1305 - .chunk_size = 65536, /* def chunk size is 64K */ 1306 1076 }; 1307 1077 int ret = -EINVAL, i; 1078 + int tgt_argc = 1; 1079 + char *tgt_argv[MAX_NR_TGT_ARG] = { NULL }; 1080 + int value; 1308 1081 1309 1082 if (argc == 1) 1310 1083 return ret; 1311 1084 1085 + opterr = 0; 1312 1086 optind = 2; 1313 - while ((opt = getopt_long(argc, argv, "t:n:d:q:az", 1087 + while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:az", 1314 1088 longopts, &option_idx)) != -1) { 1315 1089 switch (opt) { 1316 1090 case 'a': ··· 1335 1103 case 'z': 1336 1104 ctx.flags |= UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_USER_COPY; 1337 1105 break; 1106 + case 'r': 1107 + value = strtol(optarg, NULL, 10); 1108 + if (value) 1109 + ctx.flags |= UBLK_F_USER_RECOVERY; 1110 + break; 1111 + case 'e': 1112 + value = strtol(optarg, NULL, 10); 1113 + if (value) 1114 + ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_FAIL_IO; 1115 + break; 1116 + case 'i': 1117 + value = strtol(optarg, NULL, 10); 1118 + if (value) 1119 + ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_REISSUE; 1120 + break; 1121 + case 'g': 1122 + value = strtol(optarg, NULL, 10); 1123 + if (value) 1124 + ctx.flags |= UBLK_F_NEED_GET_DATA; 1125 + break; 1338 1126 case 0: 1339 1127 if (!strcmp(longopts[option_idx].name, "debug_mask")) 1340 1128 ublk_dbg_mask = strtol(optarg, NULL, 16); ··· 1362 1110 ublk_dbg_mask = 0; 1363 1111 if (!strcmp(longopts[option_idx].name, "foreground")) 1364 1112 ctx.fg = 1; 1365 - if (!strcmp(longopts[option_idx].name, "chunk_size")) 1366 - ctx.chunk_size = strtol(optarg, NULL, 10); 1113 + break; 1114 + case '?': 1115 + /* 1116 + * target requires every option must have argument 1117 + */ 1118 + if (argv[optind][0] == '-' || argv[optind - 1][0] != '-') { 1119 + fprintf(stderr, "every target option requires argument: %s %s\n", 1120 + argv[optind - 1], argv[optind]); 1121 + exit(EXIT_FAILURE); 1122 + } 1123 + 1124 + if (tgt_argc < (MAX_NR_TGT_ARG - 1) / 2) { 1125 + tgt_argv[tgt_argc++] = argv[optind - 1]; 1126 + tgt_argv[tgt_argc++] = argv[optind]; 1127 + } else { 1128 + fprintf(stderr, "too many target options\n"); 1129 + exit(EXIT_FAILURE); 1130 + } 1131 + optind += 1; 1132 + break; 1367 1133 } 1368 1134 } 1369 1135 ··· 1390 1120 ctx.files[ctx.nr_files++] = argv[i++]; 1391 1121 } 1392 1122 1123 + ops = ublk_find_tgt(ctx.tgt_type); 1124 + if (ops && ops->parse_cmd_line) { 1125 + optind = 0; 1126 + 1127 + tgt_argv[0] = ctx.tgt_type; 1128 + ops->parse_cmd_line(&ctx, tgt_argc, tgt_argv); 1129 + } 1130 + 1393 1131 if (!strcmp(cmd, "add")) 1394 1132 ret = cmd_dev_add(&ctx); 1395 - else if (!strcmp(cmd, "del")) 1133 + else if (!strcmp(cmd, "recover")) { 1134 + if (ctx.dev_id < 0) { 1135 + fprintf(stderr, "device id isn't provided for recovering\n"); 1136 + ret = -EINVAL; 1137 + } else { 1138 + ctx.recovery = 1; 1139 + ret = cmd_dev_add(&ctx); 1140 + } 1141 + } else if (!strcmp(cmd, "del")) 1396 1142 ret = cmd_dev_del(&ctx); 1397 1143 else if (!strcmp(cmd, "list")) { 1398 1144 ctx.all = 1;
+40 -6
tools/testing/selftests/ublk/kublk.h
··· 20 20 #include <sys/wait.h> 21 21 #include <sys/eventfd.h> 22 22 #include <sys/uio.h> 23 + #include <sys/ipc.h> 24 + #include <sys/shm.h> 25 + #include <linux/io_uring.h> 23 26 #include <liburing.h> 24 - #include <linux/ublk_cmd.h> 27 + #include <semaphore.h> 28 + 29 + /* allow ublk_dep.h to override ublk_cmd.h */ 25 30 #include "ublk_dep.h" 31 + #include <linux/ublk_cmd.h> 26 32 27 33 #define __maybe_unused __attribute__((unused)) 28 34 #define MAX_BACK_FILES 4 29 35 #ifndef min 30 36 #define min(a, b) ((a) < (b) ? (a) : (b)) 31 37 #endif 38 + 39 + #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) 32 40 33 41 /****************** part 1: libublk ********************/ 34 42 ··· 50 42 #define UBLKSRV_IO_IDLE_SECS 20 51 43 52 44 #define UBLK_IO_MAX_BYTES (1 << 20) 53 - #define UBLK_MAX_QUEUES 4 54 - #define UBLK_QUEUE_DEPTH 128 45 + #define UBLK_MAX_QUEUES 32 46 + #define UBLK_QUEUE_DEPTH 1024 55 47 56 48 #define UBLK_DBG_DEV (1U << 0) 57 49 #define UBLK_DBG_QUEUE (1U << 1) ··· 62 54 63 55 struct ublk_dev; 64 56 struct ublk_queue; 57 + 58 + struct stripe_ctx { 59 + /* stripe */ 60 + unsigned int chunk_size; 61 + }; 62 + 63 + struct fault_inject_ctx { 64 + /* fault_inject */ 65 + unsigned long delay_us; 66 + }; 65 67 66 68 struct dev_ctx { 67 69 char tgt_type[16]; ··· 84 66 unsigned int logging:1; 85 67 unsigned int all:1; 86 68 unsigned int fg:1; 87 - 88 - /* stripe */ 89 - unsigned int chunk_size; 69 + unsigned int recovery:1; 90 70 91 71 int _evtfd; 72 + int _shmid; 73 + 74 + /* built from shmem, only for ublk_dump_dev() */ 75 + struct ublk_dev *shadow_dev; 76 + 77 + union { 78 + struct stripe_ctx stripe; 79 + struct fault_inject_ctx fault_inject; 80 + }; 92 81 }; 93 82 94 83 struct ublk_ctrl_cmd_data { ··· 132 107 int (*queue_io)(struct ublk_queue *, int tag); 133 108 void (*tgt_io_done)(struct ublk_queue *, 134 109 int tag, const struct io_uring_cqe *); 110 + 111 + /* 112 + * Target specific command line handling 113 + * 114 + * each option requires argument for target command line 115 + */ 116 + void (*parse_cmd_line)(struct dev_ctx *ctx, int argc, char *argv[]); 117 + void (*usage)(const struct ublk_tgt_ops *ops); 135 118 }; 136 119 137 120 struct ublk_tgt { ··· 390 357 extern const struct ublk_tgt_ops null_tgt_ops; 391 358 extern const struct ublk_tgt_ops loop_tgt_ops; 392 359 extern const struct ublk_tgt_ops stripe_tgt_ops; 360 + extern const struct ublk_tgt_ops fault_inject_tgt_ops; 393 361 394 362 void backing_file_tgt_deinit(struct ublk_dev *dev); 395 363 int backing_file_tgt_init(struct ublk_dev *dev);
+27 -1
tools/testing/selftests/ublk/stripe.c
··· 281 281 .max_sectors = dev->dev_info.max_io_buf_bytes >> 9, 282 282 }, 283 283 }; 284 - unsigned chunk_size = ctx->chunk_size; 284 + unsigned chunk_size = ctx->stripe.chunk_size; 285 285 struct stripe_conf *conf; 286 286 unsigned chunk_shift; 287 287 loff_t bytes = 0; ··· 344 344 backing_file_tgt_deinit(dev); 345 345 } 346 346 347 + static void ublk_stripe_cmd_line(struct dev_ctx *ctx, int argc, char *argv[]) 348 + { 349 + static const struct option longopts[] = { 350 + { "chunk_size", 1, NULL, 0 }, 351 + { 0, 0, 0, 0 } 352 + }; 353 + int option_idx, opt; 354 + 355 + ctx->stripe.chunk_size = 65536; 356 + while ((opt = getopt_long(argc, argv, "", 357 + longopts, &option_idx)) != -1) { 358 + switch (opt) { 359 + case 0: 360 + if (!strcmp(longopts[option_idx].name, "chunk_size")) 361 + ctx->stripe.chunk_size = strtol(optarg, NULL, 10); 362 + } 363 + } 364 + } 365 + 366 + static void ublk_stripe_usage(const struct ublk_tgt_ops *ops) 367 + { 368 + printf("\tstripe: [--chunk_size chunk_size (default 65536)]\n"); 369 + } 370 + 347 371 const struct ublk_tgt_ops stripe_tgt_ops = { 348 372 .name = "stripe", 349 373 .init_tgt = ublk_stripe_tgt_init, 350 374 .deinit_tgt = ublk_stripe_tgt_deinit, 351 375 .queue_io = ublk_stripe_queue_io, 352 376 .tgt_io_done = ublk_stripe_io_done, 377 + .parse_cmd_line = ublk_stripe_cmd_line, 378 + .usage = ublk_stripe_usage, 353 379 };
+121 -25
tools/testing/selftests/ublk/test_common.sh
··· 17 17 local minor 18 18 19 19 dev=/dev/ublkb"${dev_id}" 20 - major=$(stat -c '%Hr' "$dev") 21 - minor=$(stat -c '%Lr' "$dev") 20 + major="0x"$(stat -c '%t' "$dev") 21 + minor="0x"$(stat -c '%T' "$dev") 22 22 23 23 echo $(( (major & 0xfff) << 20 | (minor & 0xfffff) )) 24 24 } ··· 30 30 } 31 31 32 32 _create_backfile() { 33 - local my_size=$1 34 - local my_file 33 + local index=$1 34 + local new_size=$2 35 + local old_file 36 + local new_file 35 37 36 - my_file=$(mktemp ublk_file_"${my_size}"_XXXXX) 37 - truncate -s "${my_size}" "${my_file}" 38 - echo "$my_file" 38 + old_file="${UBLK_BACKFILES[$index]}" 39 + [ -f "$old_file" ] && rm -f "$old_file" 40 + 41 + new_file=$(mktemp ublk_file_"${new_size}"_XXXXX) 42 + truncate -s "${new_size}" "${new_file}" 43 + UBLK_BACKFILES["$index"]="$new_file" 39 44 } 40 45 41 - _remove_backfile() { 42 - local file=$1 46 + _remove_files() { 47 + local file 43 48 44 - [ -f "$file" ] && rm -f "$file" 49 + for file in "${UBLK_BACKFILES[@]}"; do 50 + [ -f "$file" ] && rm -f "$file" 51 + done 52 + [ -f "$UBLK_TMP" ] && rm -f "$UBLK_TMP" 45 53 } 46 54 47 55 _create_tmp_dir() { ··· 114 106 local type=$1 115 107 shift 1 116 108 modprobe ublk_drv > /dev/null 2>&1 109 + UBLK_TMP=$(mktemp ublk_test_XXXXX) 117 110 [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "ublk $type: $*" 118 111 } 119 112 ··· 138 129 echo "$1 : [FAIL]" 139 130 fi 140 131 fi 141 - [ "$2" -ne 0 ] && exit "$2" 132 + if [ "$2" -ne 0 ]; then 133 + _remove_files 134 + exit "$2" 135 + fi 142 136 return 0 143 137 } 144 138 ··· 150 138 { 151 139 local tid=$1 152 140 local code=$2 153 - shift 2 141 + 154 142 if [ "${code}" -ne 0 ]; then 155 - _remove_test_files "$@" 156 143 _show_result "${tid}" "${code}" 157 144 fi 158 145 } 159 146 160 147 _cleanup_test() { 161 148 "${UBLK_PROG}" del -a 162 - rm -f "$UBLK_TMP" 149 + 150 + _remove_files 163 151 } 164 152 165 153 _have_feature() ··· 170 158 return 1 171 159 } 172 160 173 - _add_ublk_dev() { 174 - local kublk_temp; 161 + _create_ublk_dev() { 175 162 local dev_id; 163 + local cmd=$1 164 + 165 + shift 1 176 166 177 167 if [ ! -c /dev/ublk-control ]; then 178 168 return ${UBLK_SKIP_CODE} ··· 185 171 fi 186 172 fi 187 173 188 - kublk_temp=$(mktemp /tmp/kublk-XXXXXX) 189 - if ! "${UBLK_PROG}" add "$@" > "${kublk_temp}" 2>&1; then 174 + if ! dev_id=$("${UBLK_PROG}" "$cmd" "$@" | grep "dev id" | awk -F '[ :]' '{print $3}'); then 190 175 echo "fail to add ublk dev $*" 191 - rm -f "${kublk_temp}" 192 176 return 255 193 177 fi 194 - 195 - dev_id=$(grep "dev id" "${kublk_temp}" | awk -F '[ :]' '{print $3}') 196 178 udevadm settle 197 - rm -f "${kublk_temp}" 198 - echo "${dev_id}" 179 + 180 + if [[ "$dev_id" =~ ^[0-9]+$ ]]; then 181 + echo "${dev_id}" 182 + else 183 + return 255 184 + fi 185 + } 186 + 187 + _add_ublk_dev() { 188 + _create_ublk_dev "add" "$@" 189 + } 190 + 191 + _recover_ublk_dev() { 192 + local dev_id 193 + local state 194 + 195 + dev_id=$(_create_ublk_dev "recover" "$@") 196 + for ((j=0;j<20;j++)); do 197 + state=$(_get_ublk_dev_state "${dev_id}") 198 + [ "$state" == "LIVE" ] && break 199 + sleep 1 200 + done 201 + echo "$state" 199 202 } 200 203 201 204 # kill the ublk daemon and return ublk device state ··· 251 220 local kill_server=$3 252 221 253 222 fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \ 254 - --rw=readwrite --iodepth=64 --size="${size}" --numjobs=4 \ 223 + --rw=readwrite --iodepth=256 --size="${size}" --numjobs=4 \ 255 224 --runtime=20 --time_based > /dev/null 2>&1 & 256 225 sleep 2 257 226 if [ "${kill_server}" = "yes" ]; then ··· 269 238 wait 270 239 } 271 240 241 + run_io_and_remove() 242 + { 243 + local size=$1 244 + local dev_id 245 + shift 1 246 + 247 + dev_id=$(_add_ublk_dev "$@") 248 + _check_add_dev "$TID" $? 249 + 250 + [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs. remove device(ublk add $*)" 251 + if ! __run_io_and_remove "$dev_id" "${size}" "no"; then 252 + echo "/dev/ublkc$dev_id isn't removed" 253 + exit 255 254 + fi 255 + } 256 + 257 + run_io_and_kill_daemon() 258 + { 259 + local size=$1 260 + local dev_id 261 + shift 1 262 + 263 + dev_id=$(_add_ublk_dev "$@") 264 + _check_add_dev "$TID" $? 265 + 266 + [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs kill ublk server(ublk add $*)" 267 + if ! __run_io_and_remove "$dev_id" "${size}" "yes"; then 268 + echo "/dev/ublkc$dev_id isn't removed res ${res}" 269 + exit 255 270 + fi 271 + } 272 + 273 + run_io_and_recover() 274 + { 275 + local state 276 + local dev_id 277 + 278 + dev_id=$(_add_ublk_dev "$@") 279 + _check_add_dev "$TID" $? 280 + 281 + fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \ 282 + --rw=readwrite --iodepth=256 --size="${size}" --numjobs=4 \ 283 + --runtime=20 --time_based > /dev/null 2>&1 & 284 + sleep 4 285 + 286 + state=$(__ublk_kill_daemon "${dev_id}" "QUIESCED") 287 + if [ "$state" != "QUIESCED" ]; then 288 + echo "device isn't quiesced($state) after killing daemon" 289 + return 255 290 + fi 291 + 292 + state=$(_recover_ublk_dev -n "$dev_id" "$@") 293 + if [ "$state" != "LIVE" ]; then 294 + echo "faile to recover to LIVE($state)" 295 + return 255 296 + fi 297 + 298 + if ! __remove_ublk_dev_return "${dev_id}"; then 299 + echo "delete dev ${dev_id} failed" 300 + return 255 301 + fi 302 + wait 303 + } 304 + 305 + 272 306 _ublk_test_top_dir() 273 307 { 274 308 cd "$(dirname "$0")" && pwd 275 309 } 276 310 277 - UBLK_TMP=$(mktemp ublk_test_XXXXX) 278 311 UBLK_PROG=$(_ublk_test_top_dir)/kublk 279 312 UBLK_TEST_QUIET=1 280 313 UBLK_TEST_SHOW_RESULT=1 314 + UBLK_BACKFILES=() 281 315 export UBLK_PROG 282 316 export UBLK_TEST_QUIET 283 317 export UBLK_TEST_SHOW_RESULT
+40
tools/testing/selftests/ublk/test_generic_04.sh
··· 1 + #!/bin/bash 2 + # SPDX-License-Identifier: GPL-2.0 3 + 4 + . "$(cd "$(dirname "$0")" && pwd)"/test_common.sh 5 + 6 + TID="generic_04" 7 + ERR_CODE=0 8 + 9 + ublk_run_recover_test() 10 + { 11 + run_io_and_recover "$@" 12 + ERR_CODE=$? 13 + if [ ${ERR_CODE} -ne 0 ]; then 14 + echo "$TID failure: $*" 15 + _show_result $TID $ERR_CODE 16 + fi 17 + } 18 + 19 + if ! _have_program fio; then 20 + exit "$UBLK_SKIP_CODE" 21 + fi 22 + 23 + _prep_test "recover" "basic recover function verification" 24 + 25 + _create_backfile 0 256M 26 + _create_backfile 1 128M 27 + _create_backfile 2 128M 28 + 29 + ublk_run_recover_test -t null -q 2 -r 1 & 30 + ublk_run_recover_test -t loop -q 2 -r 1 "${UBLK_BACKFILES[0]}" & 31 + ublk_run_recover_test -t stripe -q 2 -r 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & 32 + wait 33 + 34 + ublk_run_recover_test -t null -q 2 -r 1 -i 1 & 35 + ublk_run_recover_test -t loop -q 2 -r 1 -i 1 "${UBLK_BACKFILES[0]}" & 36 + ublk_run_recover_test -t stripe -q 2 -r 1 -i 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & 37 + wait 38 + 39 + _cleanup_test "recover" 40 + _show_result $TID $ERR_CODE
+44
tools/testing/selftests/ublk/test_generic_05.sh
··· 1 + #!/bin/bash 2 + # SPDX-License-Identifier: GPL-2.0 3 + 4 + . "$(cd "$(dirname "$0")" && pwd)"/test_common.sh 5 + 6 + TID="generic_05" 7 + ERR_CODE=0 8 + 9 + ublk_run_recover_test() 10 + { 11 + run_io_and_recover "$@" 12 + ERR_CODE=$? 13 + if [ ${ERR_CODE} -ne 0 ]; then 14 + echo "$TID failure: $*" 15 + _show_result $TID $ERR_CODE 16 + fi 17 + } 18 + 19 + if ! _have_program fio; then 20 + exit "$UBLK_SKIP_CODE" 21 + fi 22 + 23 + if ! _have_feature "ZERO_COPY"; then 24 + exit "$UBLK_SKIP_CODE" 25 + fi 26 + 27 + _prep_test "recover" "basic recover function verification (zero copy)" 28 + 29 + _create_backfile 0 256M 30 + _create_backfile 1 128M 31 + _create_backfile 2 128M 32 + 33 + ublk_run_recover_test -t null -q 2 -r 1 -z & 34 + ublk_run_recover_test -t loop -q 2 -r 1 -z "${UBLK_BACKFILES[0]}" & 35 + ublk_run_recover_test -t stripe -q 2 -r 1 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & 36 + wait 37 + 38 + ublk_run_recover_test -t null -q 2 -r 1 -z -i 1 & 39 + ublk_run_recover_test -t loop -q 2 -r 1 -z -i 1 "${UBLK_BACKFILES[0]}" & 40 + ublk_run_recover_test -t stripe -q 2 -r 1 -z -i 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & 41 + wait 42 + 43 + _cleanup_test "recover" 44 + _show_result $TID $ERR_CODE
+41
tools/testing/selftests/ublk/test_generic_06.sh
··· 1 + #!/bin/bash 2 + # SPDX-License-Identifier: GPL-2.0 3 + 4 + . "$(cd "$(dirname "$0")" && pwd)"/test_common.sh 5 + 6 + TID="generic_06" 7 + ERR_CODE=0 8 + 9 + _prep_test "fault_inject" "fast cleanup when all I/Os of one hctx are in server" 10 + 11 + # configure ublk server to sleep 2s before completing each I/O 12 + dev_id=$(_add_ublk_dev -t fault_inject -q 2 -d 1 --delay_us 2000000) 13 + _check_add_dev $TID $? 14 + 15 + STARTTIME=${SECONDS} 16 + 17 + dd if=/dev/urandom of=/dev/ublkb${dev_id} oflag=direct bs=4k count=1 status=none > /dev/null 2>&1 & 18 + dd_pid=$! 19 + 20 + __ublk_kill_daemon ${dev_id} "DEAD" 21 + 22 + wait $dd_pid 23 + dd_exitcode=$? 24 + 25 + ENDTIME=${SECONDS} 26 + ELAPSED=$(($ENDTIME - $STARTTIME)) 27 + 28 + # assert that dd sees an error and exits quickly after ublk server is 29 + # killed. previously this relied on seeing an I/O timeout and so would 30 + # take ~30s 31 + if [ $dd_exitcode -eq 0 ]; then 32 + echo "dd unexpectedly exited successfully!" 33 + ERR_CODE=255 34 + fi 35 + if [ $ELAPSED -ge 5 ]; then 36 + echo "dd took $ELAPSED seconds to exit (>= 5s tolerance)!" 37 + ERR_CODE=255 38 + fi 39 + 40 + _cleanup_test "fault_inject" 41 + _show_result $TID $ERR_CODE
+3 -5
tools/testing/selftests/ublk/test_loop_01.sh
··· 12 12 13 13 _prep_test "loop" "write and verify test" 14 14 15 - backfile_0=$(_create_backfile 256M) 15 + _create_backfile 0 256M 16 16 17 - dev_id=$(_add_ublk_dev -t loop "$backfile_0") 18 - _check_add_dev $TID $? "${backfile_0}" 17 + dev_id=$(_add_ublk_dev -t loop "${UBLK_BACKFILES[0]}") 18 + _check_add_dev $TID $? 19 19 20 20 # run fio over the ublk disk 21 21 _run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M 22 22 ERR_CODE=$? 23 23 24 24 _cleanup_test "loop" 25 - 26 - _remove_backfile "$backfile_0" 27 25 28 26 _show_result $TID $ERR_CODE
+3 -5
tools/testing/selftests/ublk/test_loop_02.sh
··· 8 8 9 9 _prep_test "loop" "mkfs & mount & umount" 10 10 11 - backfile_0=$(_create_backfile 256M) 12 - dev_id=$(_add_ublk_dev -t loop "$backfile_0") 13 - _check_add_dev $TID $? "$backfile_0" 11 + _create_backfile 0 256M 12 + dev_id=$(_add_ublk_dev -t loop "${UBLK_BACKFILES[0]}") 13 + _check_add_dev $TID $? 14 14 15 15 _mkfs_mount_test /dev/ublkb"${dev_id}" 16 16 ERR_CODE=$? 17 17 18 18 _cleanup_test "loop" 19 - 20 - _remove_backfile "$backfile_0" 21 19 22 20 _show_result $TID $ERR_CODE
+3 -5
tools/testing/selftests/ublk/test_loop_03.sh
··· 12 12 13 13 _prep_test "loop" "write and verify over zero copy" 14 14 15 - backfile_0=$(_create_backfile 256M) 16 - dev_id=$(_add_ublk_dev -t loop -z "$backfile_0") 17 - _check_add_dev $TID $? "$backfile_0" 15 + _create_backfile 0 256M 16 + dev_id=$(_add_ublk_dev -t loop -z "${UBLK_BACKFILES[0]}") 17 + _check_add_dev $TID $? 18 18 19 19 # run fio over the ublk disk 20 20 _run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M 21 21 ERR_CODE=$? 22 22 23 23 _cleanup_test "loop" 24 - 25 - _remove_backfile "$backfile_0" 26 24 27 25 _show_result $TID $ERR_CODE
+4 -5
tools/testing/selftests/ublk/test_loop_04.sh
··· 8 8 9 9 _prep_test "loop" "mkfs & mount & umount with zero copy" 10 10 11 - backfile_0=$(_create_backfile 256M) 12 - dev_id=$(_add_ublk_dev -t loop -z "$backfile_0") 13 - _check_add_dev $TID $? "$backfile_0" 11 + _create_backfile 0 256M 12 + 13 + dev_id=$(_add_ublk_dev -t loop -z "${UBLK_BACKFILES[0]}") 14 + _check_add_dev $TID $? 14 15 15 16 _mkfs_mount_test /dev/ublkb"${dev_id}" 16 17 ERR_CODE=$? 17 18 18 19 _cleanup_test "loop" 19 - 20 - _remove_backfile "$backfile_0" 21 20 22 21 _show_result $TID $ERR_CODE
+3 -5
tools/testing/selftests/ublk/test_loop_05.sh
··· 12 12 13 13 _prep_test "loop" "write and verify test" 14 14 15 - backfile_0=$(_create_backfile 256M) 15 + _create_backfile 0 256M 16 16 17 - dev_id=$(_add_ublk_dev -q 2 -t loop "$backfile_0") 18 - _check_add_dev $TID $? "${backfile_0}" 17 + dev_id=$(_add_ublk_dev -q 2 -t loop "${UBLK_BACKFILES[0]}") 18 + _check_add_dev $TID $? 19 19 20 20 # run fio over the ublk disk 21 21 _run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M 22 22 ERR_CODE=$? 23 23 24 24 _cleanup_test "loop" 25 - 26 - _remove_backfile "$backfile_0" 27 25 28 26 _show_result $TID $ERR_CODE
+16 -29
tools/testing/selftests/ublk/test_stress_01.sh
··· 4 4 . "$(cd "$(dirname "$0")" && pwd)"/test_common.sh 5 5 TID="stress_01" 6 6 ERR_CODE=0 7 - DEV_ID=-1 8 7 9 8 ublk_io_and_remove() 10 9 { 11 - local size=$1 12 - shift 1 13 - local backfile="" 14 - if echo "$@" | grep -q "loop"; then 15 - backfile=${*: -1} 16 - fi 17 - DEV_ID=$(_add_ublk_dev "$@") 18 - _check_add_dev $TID $? "${backfile}" 19 - 20 - [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs. remove device(ublk add $*)" 21 - if ! __run_io_and_remove "${DEV_ID}" "${size}" "no"; then 22 - echo "/dev/ublkc${DEV_ID} isn't removed" 23 - _remove_backfile "${backfile}" 24 - exit 255 10 + run_io_and_remove "$@" 11 + ERR_CODE=$? 12 + if [ ${ERR_CODE} -ne 0 ]; then 13 + echo "$TID failure: $*" 14 + _show_result $TID $ERR_CODE 25 15 fi 26 16 } 27 17 18 + if ! _have_program fio; then 19 + exit "$UBLK_SKIP_CODE" 20 + fi 21 + 28 22 _prep_test "stress" "run IO and remove device" 29 23 30 - ublk_io_and_remove 8G -t null -q 4 31 - ERR_CODE=$? 32 - if [ ${ERR_CODE} -ne 0 ]; then 33 - _show_result $TID $ERR_CODE 34 - fi 24 + _create_backfile 0 256M 25 + _create_backfile 1 128M 26 + _create_backfile 2 128M 35 27 36 - BACK_FILE=$(_create_backfile 256M) 37 - ublk_io_and_remove 256M -t loop -q 4 "${BACK_FILE}" 38 - ERR_CODE=$? 39 - if [ ${ERR_CODE} -ne 0 ]; then 40 - _show_result $TID $ERR_CODE 41 - fi 28 + ublk_io_and_remove 8G -t null -q 4 & 29 + ublk_io_and_remove 256M -t loop -q 4 "${UBLK_BACKFILES[0]}" & 30 + ublk_io_and_remove 256M -t stripe -q 4 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & 31 + wait 42 32 43 - ublk_io_and_remove 256M -t loop -q 4 -z "${BACK_FILE}" 44 - ERR_CODE=$? 45 33 _cleanup_test "stress" 46 - _remove_backfile "${BACK_FILE}" 47 34 _show_result $TID $ERR_CODE
+16 -29
tools/testing/selftests/ublk/test_stress_02.sh
··· 4 4 . "$(cd "$(dirname "$0")" && pwd)"/test_common.sh 5 5 TID="stress_02" 6 6 ERR_CODE=0 7 - DEV_ID=-1 7 + 8 + if ! _have_program fio; then 9 + exit "$UBLK_SKIP_CODE" 10 + fi 8 11 9 12 ublk_io_and_kill_daemon() 10 13 { 11 - local size=$1 12 - shift 1 13 - local backfile="" 14 - if echo "$@" | grep -q "loop"; then 15 - backfile=${*: -1} 16 - fi 17 - DEV_ID=$(_add_ublk_dev "$@") 18 - _check_add_dev $TID $? "${backfile}" 19 - 20 - [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs kill ublk server(ublk add $*)" 21 - if ! __run_io_and_remove "${DEV_ID}" "${size}" "yes"; then 22 - echo "/dev/ublkc${DEV_ID} isn't removed res ${res}" 23 - _remove_backfile "${backfile}" 24 - exit 255 14 + run_io_and_kill_daemon "$@" 15 + ERR_CODE=$? 16 + if [ ${ERR_CODE} -ne 0 ]; then 17 + echo "$TID failure: $*" 18 + _show_result $TID $ERR_CODE 25 19 fi 26 20 } 27 21 28 22 _prep_test "stress" "run IO and kill ublk server" 29 23 30 - ublk_io_and_kill_daemon 8G -t null -q 4 31 - ERR_CODE=$? 32 - if [ ${ERR_CODE} -ne 0 ]; then 33 - _show_result $TID $ERR_CODE 34 - fi 24 + _create_backfile 0 256M 25 + _create_backfile 1 128M 26 + _create_backfile 2 128M 35 27 36 - BACK_FILE=$(_create_backfile 256M) 37 - ublk_io_and_kill_daemon 256M -t loop -q 4 "${BACK_FILE}" 38 - ERR_CODE=$? 39 - if [ ${ERR_CODE} -ne 0 ]; then 40 - _show_result $TID $ERR_CODE 41 - fi 28 + ublk_io_and_kill_daemon 8G -t null -q 4 & 29 + ublk_io_and_kill_daemon 256M -t loop -q 4 "${UBLK_BACKFILES[0]}" & 30 + ublk_io_and_kill_daemon 256M -t stripe -q 4 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & 31 + wait 42 32 43 - ublk_io_and_kill_daemon 256M -t loop -q 4 -z "${BACK_FILE}" 44 - ERR_CODE=$? 45 33 _cleanup_test "stress" 46 - _remove_backfile "${BACK_FILE}" 47 34 _show_result $TID $ERR_CODE
+38
tools/testing/selftests/ublk/test_stress_03.sh
··· 1 + #!/bin/bash 2 + # SPDX-License-Identifier: GPL-2.0 3 + 4 + . "$(cd "$(dirname "$0")" && pwd)"/test_common.sh 5 + TID="stress_03" 6 + ERR_CODE=0 7 + 8 + ublk_io_and_remove() 9 + { 10 + run_io_and_remove "$@" 11 + ERR_CODE=$? 12 + if [ ${ERR_CODE} -ne 0 ]; then 13 + echo "$TID failure: $*" 14 + _show_result $TID $ERR_CODE 15 + fi 16 + } 17 + 18 + if ! _have_program fio; then 19 + exit "$UBLK_SKIP_CODE" 20 + fi 21 + 22 + if ! _have_feature "ZERO_COPY"; then 23 + exit "$UBLK_SKIP_CODE" 24 + fi 25 + 26 + _prep_test "stress" "run IO and remove device(zero copy)" 27 + 28 + _create_backfile 0 256M 29 + _create_backfile 1 128M 30 + _create_backfile 2 128M 31 + 32 + ublk_io_and_remove 8G -t null -q 4 -z & 33 + ublk_io_and_remove 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" & 34 + ublk_io_and_remove 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & 35 + wait 36 + 37 + _cleanup_test "stress" 38 + _show_result $TID $ERR_CODE
+37
tools/testing/selftests/ublk/test_stress_04.sh
··· 1 + #!/bin/bash 2 + # SPDX-License-Identifier: GPL-2.0 3 + 4 + . "$(cd "$(dirname "$0")" && pwd)"/test_common.sh 5 + TID="stress_04" 6 + ERR_CODE=0 7 + 8 + ublk_io_and_kill_daemon() 9 + { 10 + run_io_and_kill_daemon "$@" 11 + ERR_CODE=$? 12 + if [ ${ERR_CODE} -ne 0 ]; then 13 + echo "$TID failure: $*" 14 + _show_result $TID $ERR_CODE 15 + fi 16 + } 17 + 18 + if ! _have_program fio; then 19 + exit "$UBLK_SKIP_CODE" 20 + fi 21 + if ! _have_feature "ZERO_COPY"; then 22 + exit "$UBLK_SKIP_CODE" 23 + fi 24 + 25 + _prep_test "stress" "run IO and kill ublk server(zero copy)" 26 + 27 + _create_backfile 0 256M 28 + _create_backfile 1 128M 29 + _create_backfile 2 128M 30 + 31 + ublk_io_and_kill_daemon 8G -t null -q 4 -z & 32 + ublk_io_and_kill_daemon 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" & 33 + ublk_io_and_kill_daemon 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & 34 + wait 35 + 36 + _cleanup_test "stress" 37 + _show_result $TID $ERR_CODE
+64
tools/testing/selftests/ublk/test_stress_05.sh
··· 1 + #!/bin/bash 2 + # SPDX-License-Identifier: GPL-2.0 3 + 4 + . "$(cd "$(dirname "$0")" && pwd)"/test_common.sh 5 + TID="stress_05" 6 + ERR_CODE=0 7 + 8 + run_io_and_remove() 9 + { 10 + local size=$1 11 + local dev_id 12 + local dev_pid 13 + shift 1 14 + 15 + dev_id=$(_add_ublk_dev "$@") 16 + _check_add_dev $TID $? 17 + 18 + [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs. remove device(ublk add $*)" 19 + 20 + fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \ 21 + --rw=readwrite --iodepth=128 --size="${size}" --numjobs=4 \ 22 + --runtime=40 --time_based > /dev/null 2>&1 & 23 + sleep 4 24 + 25 + dev_pid=$(_get_ublk_daemon_pid "$dev_id") 26 + kill -9 "$dev_pid" 27 + 28 + if ! __remove_ublk_dev_return "${dev_id}"; then 29 + echo "delete dev ${dev_id} failed" 30 + return 255 31 + fi 32 + } 33 + 34 + ublk_io_and_remove() 35 + { 36 + run_io_and_remove "$@" 37 + ERR_CODE=$? 38 + if [ ${ERR_CODE} -ne 0 ]; then 39 + echo "$TID failure: $*" 40 + _show_result $TID $ERR_CODE 41 + fi 42 + } 43 + 44 + _prep_test "stress" "run IO and remove device with recovery enabled" 45 + 46 + _create_backfile 0 256M 47 + _create_backfile 1 256M 48 + 49 + for reissue in $(seq 0 1); do 50 + ublk_io_and_remove 8G -t null -q 4 -g 1 -r 1 -i "$reissue" & 51 + ublk_io_and_remove 256M -t loop -q 4 -g 1 -r 1 -i "$reissue" "${UBLK_BACKFILES[0]}" & 52 + wait 53 + done 54 + 55 + if _have_feature "ZERO_COPY"; then 56 + for reissue in $(seq 0 1); do 57 + ublk_io_and_remove 8G -t null -q 4 -g 1 -z -r 1 -i "$reissue" & 58 + ublk_io_and_remove 256M -t loop -q 4 -g 1 -z -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" & 59 + wait 60 + done 61 + fi 62 + 63 + _cleanup_test "stress" 64 + _show_result $TID $ERR_CODE
+4 -8
tools/testing/selftests/ublk/test_stripe_01.sh
··· 12 12 13 13 _prep_test "stripe" "write and verify test" 14 14 15 - backfile_0=$(_create_backfile 256M) 16 - backfile_1=$(_create_backfile 256M) 15 + _create_backfile 0 256M 16 + _create_backfile 1 256M 17 17 18 - dev_id=$(_add_ublk_dev -t stripe "$backfile_0" "$backfile_1") 19 - _check_add_dev $TID $? "${backfile_0}" 18 + dev_id=$(_add_ublk_dev -t stripe "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}") 19 + _check_add_dev $TID $? 20 20 21 21 # run fio over the ublk disk 22 22 _run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=512M 23 23 ERR_CODE=$? 24 24 25 25 _cleanup_test "stripe" 26 - 27 - _remove_backfile "$backfile_0" 28 - _remove_backfile "$backfile_1" 29 - 30 26 _show_result $TID $ERR_CODE
+5 -8
tools/testing/selftests/ublk/test_stripe_02.sh
··· 8 8 9 9 _prep_test "stripe" "mkfs & mount & umount" 10 10 11 - backfile_0=$(_create_backfile 256M) 12 - backfile_1=$(_create_backfile 256M) 13 - dev_id=$(_add_ublk_dev -t stripe "$backfile_0" "$backfile_1") 14 - _check_add_dev $TID $? "$backfile_0" "$backfile_1" 11 + _create_backfile 0 256M 12 + _create_backfile 1 256M 13 + 14 + dev_id=$(_add_ublk_dev -t stripe "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}") 15 + _check_add_dev $TID $? 15 16 16 17 _mkfs_mount_test /dev/ublkb"${dev_id}" 17 18 ERR_CODE=$? 18 19 19 20 _cleanup_test "stripe" 20 - 21 - _remove_backfile "$backfile_0" 22 - _remove_backfile "$backfile_1" 23 - 24 21 _show_result $TID $ERR_CODE
+4 -8
tools/testing/selftests/ublk/test_stripe_03.sh
··· 12 12 13 13 _prep_test "stripe" "write and verify test" 14 14 15 - backfile_0=$(_create_backfile 256M) 16 - backfile_1=$(_create_backfile 256M) 15 + _create_backfile 0 256M 16 + _create_backfile 1 256M 17 17 18 - dev_id=$(_add_ublk_dev -q 2 -t stripe "$backfile_0" "$backfile_1") 19 - _check_add_dev $TID $? "${backfile_0}" 18 + dev_id=$(_add_ublk_dev -q 2 -t stripe "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}") 19 + _check_add_dev $TID $? 20 20 21 21 # run fio over the ublk disk 22 22 _run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=512M 23 23 ERR_CODE=$? 24 24 25 25 _cleanup_test "stripe" 26 - 27 - _remove_backfile "$backfile_0" 28 - _remove_backfile "$backfile_1" 29 - 30 26 _show_result $TID $ERR_CODE
+5 -8
tools/testing/selftests/ublk/test_stripe_04.sh
··· 8 8 9 9 _prep_test "stripe" "mkfs & mount & umount on zero copy" 10 10 11 - backfile_0=$(_create_backfile 256M) 12 - backfile_1=$(_create_backfile 256M) 13 - dev_id=$(_add_ublk_dev -t stripe -z -q 2 "$backfile_0" "$backfile_1") 14 - _check_add_dev $TID $? "$backfile_0" "$backfile_1" 11 + _create_backfile 0 256M 12 + _create_backfile 1 256M 13 + 14 + dev_id=$(_add_ublk_dev -t stripe -z -q 2 "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}") 15 + _check_add_dev $TID $? 15 16 16 17 _mkfs_mount_test /dev/ublkb"${dev_id}" 17 18 ERR_CODE=$? 18 19 19 20 _cleanup_test "stripe" 20 - 21 - _remove_backfile "$backfile_0" 22 - _remove_backfile "$backfile_1" 23 - 24 21 _show_result $TID $ERR_CODE