Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

ublk: support device recovery without I/O queueing

ublk currently supports the following behaviors on ublk server exit:

A: outstanding I/Os get errors, subsequently issued I/Os get errors
B: outstanding I/Os get errors, subsequently issued I/Os queue
C: outstanding I/Os get reissued, subsequently issued I/Os queue

and the following behaviors for recovery of preexisting block devices by
a future incarnation of the ublk server:

1: ublk devices stopped on ublk server exit (no recovery possible)
2: ublk devices are recoverable using start/end_recovery commands

The userspace interface allows selection of combinations of these
behaviors using flags specified at device creation time, namely:

default behavior: A + 1
UBLK_F_USER_RECOVERY: B + 2
UBLK_F_USER_RECOVERY|UBLK_F_USER_RECOVERY_REISSUE: C + 2

The behavior A + 2 is currently unsupported. Add support for this
behavior under the new flag combination
UBLK_F_USER_RECOVERY|UBLK_F_USER_RECOVERY_FAIL_IO.

Signed-off-by: Uday Shankar <ushankar@purestorage.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20241007182419.3263186-5-ushankar@purestorage.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Uday Shankar and committed by
Jens Axboe
59eaa01c 27b5d417

+81 -15
+63 -15
drivers/block/ublk_drv.c
··· 60 60 | UBLK_F_UNPRIVILEGED_DEV \ 61 61 | UBLK_F_CMD_IOCTL_ENCODE \ 62 62 | UBLK_F_USER_COPY \ 63 - | UBLK_F_ZONED) 63 + | UBLK_F_ZONED \ 64 + | UBLK_F_USER_RECOVERY_FAIL_IO) 64 65 65 66 #define UBLK_F_ALL_RECOVERY_FLAGS (UBLK_F_USER_RECOVERY \ 66 - | UBLK_F_USER_RECOVERY_REISSUE) 67 + | UBLK_F_USER_RECOVERY_REISSUE \ 68 + | UBLK_F_USER_RECOVERY_FAIL_IO) 67 69 68 70 /* All UBLK_PARAM_TYPE_* should be included here */ 69 71 #define UBLK_PARAM_TYPE_ALL \ ··· 148 146 bool force_abort; 149 147 bool timeout; 150 148 bool canceling; 149 + bool fail_io; /* copy of dev->state == UBLK_S_DEV_FAIL_IO */ 151 150 unsigned short nr_io_ready; /* how many ios setup */ 152 151 spinlock_t cancel_lock; 153 152 struct ublk_device *dev; ··· 693 690 */ 694 691 static inline bool ublk_nosrv_dev_should_queue_io(struct ublk_device *ub) 695 692 { 696 - return ub->dev_info.flags & UBLK_F_USER_RECOVERY; 693 + return (ub->dev_info.flags & UBLK_F_USER_RECOVERY) && 694 + !(ub->dev_info.flags & UBLK_F_USER_RECOVERY_FAIL_IO); 697 695 } 698 696 699 697 /* ··· 704 700 */ 705 701 static inline bool ublk_nosrv_should_queue_io(struct ublk_queue *ubq) 706 702 { 707 - return ubq->flags & UBLK_F_USER_RECOVERY; 703 + return (ubq->flags & UBLK_F_USER_RECOVERY) && 704 + !(ubq->flags & UBLK_F_USER_RECOVERY_FAIL_IO); 708 705 } 709 706 710 707 /* ··· 717 712 static inline bool ublk_nosrv_should_stop_dev(struct ublk_device *ub) 718 713 { 719 714 return !(ub->dev_info.flags & UBLK_F_USER_RECOVERY); 715 + } 716 + 717 + static inline bool ublk_dev_in_recoverable_state(struct ublk_device *ub) 718 + { 719 + return ub->dev_info.state == UBLK_S_DEV_QUIESCED || 720 + ub->dev_info.state == UBLK_S_DEV_FAIL_IO; 720 721 } 721 722 722 723 static void ublk_free_disk(struct gendisk *disk) ··· 1286 1275 struct request *rq = bd->rq; 1287 1276 blk_status_t res; 1288 1277 1278 + if (unlikely(ubq->fail_io)) { 1279 + return BLK_STS_TARGET; 1280 + } 1281 + 1289 1282 /* fill iod to slot in io cmd buffer */ 1290 1283 res = ublk_setup_iod(ubq, rq); 1291 1284 if (unlikely(res != BLK_STS_OK)) ··· 1640 1625 { 1641 1626 struct ublk_device *ub = 1642 1627 container_of(work, struct ublk_device, nosrv_work); 1628 + int i; 1643 1629 1644 1630 if (ublk_nosrv_should_stop_dev(ub)) { 1645 1631 ublk_stop_dev(ub); ··· 1650 1634 mutex_lock(&ub->mutex); 1651 1635 if (ub->dev_info.state != UBLK_S_DEV_LIVE) 1652 1636 goto unlock; 1653 - __ublk_quiesce_dev(ub); 1637 + 1638 + if (ublk_nosrv_dev_should_queue_io(ub)) { 1639 + __ublk_quiesce_dev(ub); 1640 + } else { 1641 + blk_mq_quiesce_queue(ub->ub_disk->queue); 1642 + ub->dev_info.state = UBLK_S_DEV_FAIL_IO; 1643 + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) { 1644 + ublk_get_queue(ub, i)->fail_io = true; 1645 + } 1646 + blk_mq_unquiesce_queue(ub->ub_disk->queue); 1647 + } 1648 + 1654 1649 unlock: 1655 1650 mutex_unlock(&ub->mutex); 1656 1651 ublk_cancel_dev(ub); ··· 2414 2387 return -EPERM; 2415 2388 2416 2389 /* forbid nonsense combinations of recovery flags */ 2417 - if ((info.flags & UBLK_F_USER_RECOVERY_REISSUE) && 2418 - !(info.flags & UBLK_F_USER_RECOVERY)) { 2390 + switch (info.flags & UBLK_F_ALL_RECOVERY_FLAGS) { 2391 + case 0: 2392 + case UBLK_F_USER_RECOVERY: 2393 + case (UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_REISSUE): 2394 + case (UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_FAIL_IO): 2395 + break; 2396 + default: 2419 2397 pr_warn("%s: invalid recovery flags %llx\n", __func__, 2420 2398 info.flags & UBLK_F_ALL_RECOVERY_FLAGS); 2421 2399 return -EINVAL; ··· 2761 2729 * and related io_uring ctx is freed so file struct of /dev/ublkcX is 2762 2730 * released. 2763 2731 * 2732 + * and one of the following holds 2733 + * 2764 2734 * (2) UBLK_S_DEV_QUIESCED is set, which means the quiesce_work: 2765 2735 * (a)has quiesced request queue 2766 2736 * (b)has requeued every inflight rqs whose io_flags is ACTIVE 2767 2737 * (c)has requeued/aborted every inflight rqs whose io_flags is NOT ACTIVE 2768 2738 * (d)has completed/camceled all ioucmds owned by ther dying process 2739 + * 2740 + * (3) UBLK_S_DEV_FAIL_IO is set, which means the queue is not 2741 + * quiesced, but all I/O is being immediately errored 2769 2742 */ 2770 - if (test_bit(UB_STATE_OPEN, &ub->state) || 2771 - ub->dev_info.state != UBLK_S_DEV_QUIESCED) { 2743 + if (test_bit(UB_STATE_OPEN, &ub->state) || !ublk_dev_in_recoverable_state(ub)) { 2772 2744 ret = -EBUSY; 2773 2745 goto out_unlock; 2774 2746 } ··· 2796 2760 const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); 2797 2761 int ublksrv_pid = (int)header->data[0]; 2798 2762 int ret = -EINVAL; 2763 + int i; 2799 2764 2800 2765 pr_devel("%s: Waiting for new ubq_daemons(nr: %d) are ready, dev id %d...\n", 2801 2766 __func__, ub->dev_info.nr_hw_queues, header->dev_id); ··· 2811 2774 if (ublk_nosrv_should_stop_dev(ub)) 2812 2775 goto out_unlock; 2813 2776 2814 - if (ub->dev_info.state != UBLK_S_DEV_QUIESCED) { 2777 + if (!ublk_dev_in_recoverable_state(ub)) { 2815 2778 ret = -EBUSY; 2816 2779 goto out_unlock; 2817 2780 } 2818 2781 ub->dev_info.ublksrv_pid = ublksrv_pid; 2819 2782 pr_devel("%s: new ublksrv_pid %d, dev id %d\n", 2820 2783 __func__, ublksrv_pid, header->dev_id); 2821 - blk_mq_unquiesce_queue(ub->ub_disk->queue); 2822 - pr_devel("%s: queue unquiesced, dev id %d.\n", 2823 - __func__, header->dev_id); 2824 - blk_mq_kick_requeue_list(ub->ub_disk->queue); 2825 - ub->dev_info.state = UBLK_S_DEV_LIVE; 2784 + 2785 + if (ublk_nosrv_dev_should_queue_io(ub)) { 2786 + ub->dev_info.state = UBLK_S_DEV_LIVE; 2787 + blk_mq_unquiesce_queue(ub->ub_disk->queue); 2788 + pr_devel("%s: queue unquiesced, dev id %d.\n", 2789 + __func__, header->dev_id); 2790 + blk_mq_kick_requeue_list(ub->ub_disk->queue); 2791 + } else { 2792 + blk_mq_quiesce_queue(ub->ub_disk->queue); 2793 + ub->dev_info.state = UBLK_S_DEV_LIVE; 2794 + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) { 2795 + ublk_get_queue(ub, i)->fail_io = false; 2796 + } 2797 + blk_mq_unquiesce_queue(ub->ub_disk->queue); 2798 + } 2799 + 2826 2800 ret = 0; 2827 2801 out_unlock: 2828 2802 mutex_unlock(&ub->mutex);
+18
include/uapi/linux/ublk_cmd.h
··· 147 147 */ 148 148 #define UBLK_F_NEED_GET_DATA (1UL << 2) 149 149 150 + /* 151 + * - Block devices are recoverable if ublk server exits and restarts 152 + * - Outstanding I/O when ublk server exits is met with errors 153 + * - I/O issued while there is no ublk server queues 154 + */ 150 155 #define UBLK_F_USER_RECOVERY (1UL << 3) 151 156 157 + /* 158 + * - Block devices are recoverable if ublk server exits and restarts 159 + * - Outstanding I/O when ublk server exits is reissued 160 + * - I/O issued while there is no ublk server queues 161 + */ 152 162 #define UBLK_F_USER_RECOVERY_REISSUE (1UL << 4) 153 163 154 164 /* ··· 200 190 */ 201 191 #define UBLK_F_ZONED (1ULL << 8) 202 192 193 + /* 194 + * - Block devices are recoverable if ublk server exits and restarts 195 + * - Outstanding I/O when ublk server exits is met with errors 196 + * - I/O issued while there is no ublk server is met with errors 197 + */ 198 + #define UBLK_F_USER_RECOVERY_FAIL_IO (1ULL << 9) 199 + 203 200 /* device state */ 204 201 #define UBLK_S_DEV_DEAD 0 205 202 #define UBLK_S_DEV_LIVE 1 206 203 #define UBLK_S_DEV_QUIESCED 2 204 + #define UBLK_S_DEV_FAIL_IO 3 207 205 208 206 /* shipped via sqe->cmd of io_uring command */ 209 207 struct ublksrv_ctrl_cmd {