Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

selftests: ublk: test that teardown after incomplete recovery completes

Before the fix, teardown of a ublk server that was attempting to recover
a device, but died when it had submitted a nonempty proper subset of the
fetch commands to any queue would loop forever. Add a test to verify
that, after the fix, teardown completes. This is done by:

- Adding a new argument to the fault_inject target that causes it die
after fetching a nonempty proper subset of the IOs to a queue
- Using that argument in a new test while trying to recover an
already-created device
- Attempting to delete the ublk device at the end of the test; this
hangs forever if teardown from the fault-injected ublk server never
completed.

It was manually verified that the test passes with the fix and hangs
without it.

Signed-off-by: Uday Shankar <ushankar@purestorage.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Link: https://patch.msgid.link/20260405-cancel-v2-2-02d711e643c2@purestorage.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Uday Shankar and committed by
Jens Axboe
320f9b1c 0842186d

+95 -3
+1
tools/testing/selftests/ublk/Makefile
··· 18 18 TEST_PROGS += test_generic_12.sh 19 19 TEST_PROGS += test_generic_13.sh 20 20 TEST_PROGS += test_generic_16.sh 21 + TEST_PROGS += test_generic_17.sh 21 22 22 23 TEST_PROGS += test_batch_01.sh 23 24 TEST_PROGS += test_batch_02.sh
+49 -3
tools/testing/selftests/ublk/fault_inject.c
··· 10 10 11 11 #include "kublk.h" 12 12 13 + struct fi_opts { 14 + long long delay_ns; 15 + bool die_during_fetch; 16 + }; 17 + 13 18 static int ublk_fault_inject_tgt_init(const struct dev_ctx *ctx, 14 19 struct ublk_dev *dev) 15 20 { 16 21 const struct ublksrv_ctrl_dev_info *info = &dev->dev_info; 17 22 unsigned long dev_size = 250UL << 30; 23 + struct fi_opts *opts = NULL; 18 24 19 25 if (ctx->auto_zc_fallback) { 20 26 ublk_err("%s: not support auto_zc_fallback\n", __func__); ··· 41 35 }; 42 36 ublk_set_integrity_params(ctx, &dev->tgt.params); 43 37 44 - dev->private_data = (void *)(unsigned long)(ctx->fault_inject.delay_us * 1000); 38 + opts = calloc(1, sizeof(*opts)); 39 + if (!opts) { 40 + ublk_err("%s: couldn't allocate memory for opts\n", __func__); 41 + return -ENOMEM; 42 + } 43 + 44 + opts->delay_ns = ctx->fault_inject.delay_us * 1000; 45 + opts->die_during_fetch = ctx->fault_inject.die_during_fetch; 46 + dev->private_data = opts; 47 + 45 48 return 0; 49 + } 50 + 51 + static void ublk_fault_inject_pre_fetch_io(struct ublk_thread *t, 52 + struct ublk_queue *q, int tag, 53 + bool batch) 54 + { 55 + struct fi_opts *opts = q->dev->private_data; 56 + 57 + if (!opts->die_during_fetch) 58 + return; 59 + 60 + /* 61 + * Each queue fetches its IOs in increasing order of tags, so 62 + * dying just before we're about to fetch tag 1 (regardless of 63 + * what queue we're on) guarantees that we've fetched a nonempty 64 + * proper subset of the tags on that queue. 65 + */ 66 + if (tag == 1) { 67 + /* 68 + * Ensure our commands are actually live in the kernel 69 + * before we die. 70 + */ 71 + io_uring_submit(&t->ring); 72 + raise(SIGKILL); 73 + } 46 74 } 47 75 48 76 static int ublk_fault_inject_queue_io(struct ublk_thread *t, ··· 84 44 { 85 45 const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); 86 46 struct io_uring_sqe *sqe; 47 + struct fi_opts *opts = q->dev->private_data; 87 48 struct __kernel_timespec ts = { 88 - .tv_nsec = (long long)q->dev->private_data, 49 + .tv_nsec = opts->delay_ns, 89 50 }; 90 51 91 52 ublk_io_alloc_sqes(t, &sqe, 1); ··· 118 77 { 119 78 static const struct option longopts[] = { 120 79 { "delay_us", 1, NULL, 0 }, 80 + { "die_during_fetch", 1, NULL, 0 }, 121 81 { 0, 0, 0, 0 } 122 82 }; 123 83 int option_idx, opt; 124 84 125 85 ctx->fault_inject.delay_us = 0; 86 + ctx->fault_inject.die_during_fetch = false; 126 87 while ((opt = getopt_long(argc, argv, "", 127 88 longopts, &option_idx)) != -1) { 128 89 switch (opt) { 129 90 case 0: 130 91 if (!strcmp(longopts[option_idx].name, "delay_us")) 131 92 ctx->fault_inject.delay_us = strtoll(optarg, NULL, 10); 93 + if (!strcmp(longopts[option_idx].name, "die_during_fetch")) 94 + ctx->fault_inject.die_during_fetch = strtoll(optarg, NULL, 10); 132 95 } 133 96 } 134 97 } 135 98 136 99 static void ublk_fault_inject_usage(const struct ublk_tgt_ops *ops) 137 100 { 138 - printf("\tfault_inject: [--delay_us us (default 0)]\n"); 101 + printf("\tfault_inject: [--delay_us us (default 0)] [--die_during_fetch 1]\n"); 139 102 } 140 103 141 104 const struct ublk_tgt_ops fault_inject_tgt_ops = { 142 105 .name = "fault_inject", 143 106 .init_tgt = ublk_fault_inject_tgt_init, 107 + .pre_fetch_io = ublk_fault_inject_pre_fetch_io, 144 108 .queue_io = ublk_fault_inject_queue_io, 145 109 .tgt_io_done = ublk_fault_inject_tgt_io_done, 146 110 .parse_cmd_line = ublk_fault_inject_cmd_line,
+7
tools/testing/selftests/ublk/kublk.c
··· 796 796 q = &t->dev->q[q_id]; 797 797 io = &q->ios[tag]; 798 798 io->buf_index = j++; 799 + if (q->tgt_ops->pre_fetch_io) 800 + q->tgt_ops->pre_fetch_io(t, q, tag, false); 799 801 ublk_queue_io_cmd(t, io); 800 802 } 801 803 } else { ··· 809 807 for (i = 0; i < q->q_depth; i++) { 810 808 io = &q->ios[i]; 811 809 io->buf_index = i; 810 + if (q->tgt_ops->pre_fetch_io) 811 + q->tgt_ops->pre_fetch_io(t, q, i, false); 812 812 ublk_queue_io_cmd(t, io); 813 813 } 814 814 } ··· 986 982 */ 987 983 if (t->q_map[i] == 0) 988 984 continue; 985 + 986 + if (q->tgt_ops->pre_fetch_io) 987 + q->tgt_ops->pre_fetch_io(t, q, 0, true); 989 988 990 989 ret = ublk_batch_queue_prep_io_cmds(t, q); 991 990 ublk_assert(ret >= 0);
+3
tools/testing/selftests/ublk/kublk.h
··· 60 60 struct fault_inject_ctx { 61 61 /* fault_inject */ 62 62 unsigned long delay_us; 63 + bool die_during_fetch; 63 64 }; 64 65 65 66 struct dev_ctx { ··· 139 138 int (*init_tgt)(const struct dev_ctx *ctx, struct ublk_dev *); 140 139 void (*deinit_tgt)(struct ublk_dev *); 141 140 141 + void (*pre_fetch_io)(struct ublk_thread *t, struct ublk_queue *q, 142 + int tag, bool batch); 142 143 int (*queue_io)(struct ublk_thread *, struct ublk_queue *, int tag); 143 144 void (*tgt_io_done)(struct ublk_thread *, struct ublk_queue *, 144 145 const struct io_uring_cqe *);
+35
tools/testing/selftests/ublk/test_generic_17.sh
··· 1 + #!/bin/bash 2 + # SPDX-License-Identifier: GPL-2.0 3 + 4 + . "$(cd "$(dirname "$0")" && pwd)"/test_common.sh 5 + 6 + ERR_CODE=0 7 + 8 + _prep_test "fault_inject" "teardown after incomplete recovery" 9 + 10 + # First start and stop a ublk server with device configured for recovery 11 + dev_id=$(_add_ublk_dev -t fault_inject -r 1) 12 + _check_add_dev $TID $? 13 + state=$(__ublk_kill_daemon "${dev_id}" "QUIESCED") 14 + if [ "$state" != "QUIESCED" ]; then 15 + echo "device isn't quiesced($state) after $action" 16 + ERR_CODE=255 17 + fi 18 + 19 + # Then recover the device, but use --die_during_fetch to have the ublk 20 + # server die while a queue has some (but not all) I/Os fetched 21 + ${UBLK_PROG} recover -n "${dev_id}" --foreground -t fault_inject --die_during_fetch 1 22 + RECOVER_RES=$? 23 + # 137 is the result when dying of SIGKILL 24 + if (( RECOVER_RES != 137 )); then 25 + echo "recover command exited with unexpected code ${RECOVER_RES}!" 26 + ERR_CODE=255 27 + fi 28 + 29 + # Clean up the device. This can only succeed once teardown of the above 30 + # exited ublk server completes. So if teardown never completes, we will 31 + # time out here 32 + _ublk_del_dev "${dev_id}" 33 + 34 + _cleanup_test "fault_inject" 35 + _show_result $TID $ERR_CODE