Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

selftests: ublk: handle UBLK_U_IO_FETCH_IO_CMDS

Add support for UBLK_U_IO_FETCH_IO_CMDS to enable efficient batch
fetching of I/O commands using multishot io_uring operations.

Key improvements:
- Implement multishot UBLK_U_IO_FETCH_IO_CMDS for continuous command fetching
- Add fetch buffer management with page-aligned, mlocked buffers
- Process fetched I/O command tags from kernel-provided buffers
- Integrate fetch operations with existing batch I/O infrastructure
- Significantly reduce uring_cmd issuing overhead through batching

The implementation uses two fetch buffers per thread with automatic
requeuing to maintain continuous I/O command flow. Each fetch operation
retrieves multiple command tags in a single syscall, dramatically
improving performance compared to individual command fetching.

Technical details:
- Fetch buffers are page-aligned and mlocked for optimal performance
- Uses IORING_URING_CMD_MULTISHOT for continuous operation
- Automatic buffer management and requeuing on completion
- Enhanced CQE handling for fetch command completions

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Ming Lei and committed by
Jens Axboe
cb5a6b30 dee7024f

+159 -4
+135 -1
tools/testing/selftests/ublk/batch.c
··· 140 140 t->nr_bufs); 141 141 } 142 142 143 + static void free_batch_fetch_buf(struct ublk_thread *t) 144 + { 145 + int i; 146 + 147 + for (i = 0; i < UBLKS_T_NR_FETCH_BUF; i++) { 148 + io_uring_free_buf_ring(&t->ring, t->fetch[i].br, 1, i); 149 + munlock(t->fetch[i].fetch_buf, t->fetch[i].fetch_buf_size); 150 + free(t->fetch[i].fetch_buf); 151 + } 152 + } 153 + 154 + static int alloc_batch_fetch_buf(struct ublk_thread *t) 155 + { 156 + /* page aligned fetch buffer, and it is mlocked for speedup delivery */ 157 + unsigned pg_sz = getpagesize(); 158 + unsigned buf_size = round_up(t->dev->dev_info.queue_depth * 2, pg_sz); 159 + int ret; 160 + int i = 0; 161 + 162 + for (i = 0; i < UBLKS_T_NR_FETCH_BUF; i++) { 163 + t->fetch[i].fetch_buf_size = buf_size; 164 + 165 + if (posix_memalign((void **)&t->fetch[i].fetch_buf, pg_sz, 166 + t->fetch[i].fetch_buf_size)) 167 + return -ENOMEM; 168 + 169 + /* lock fetch buffer page for fast fetching */ 170 + if (mlock(t->fetch[i].fetch_buf, t->fetch[i].fetch_buf_size)) 171 + ublk_err("%s: can't lock fetch buffer %s\n", __func__, 172 + strerror(errno)); 173 + t->fetch[i].br = io_uring_setup_buf_ring(&t->ring, 1, 174 + i, IOU_PBUF_RING_INC, &ret); 175 + if (!t->fetch[i].br) { 176 + ublk_err("Buffer ring register failed %d\n", ret); 177 + return ret; 178 + } 179 + } 180 + 181 + return 0; 182 + } 183 + 143 184 int ublk_batch_alloc_buf(struct ublk_thread *t) 144 185 { 186 + int ret; 187 + 145 188 ublk_assert(t->nr_commit_buf < 16); 146 - return alloc_batch_commit_buf(t); 189 + 190 + ret = alloc_batch_commit_buf(t); 191 + if (ret) 192 + return ret; 193 + return alloc_batch_fetch_buf(t); 147 194 } 148 195 149 196 void ublk_batch_free_buf(struct ublk_thread *t) 150 197 { 151 198 free_batch_commit_buf(t); 199 + free_batch_fetch_buf(t); 152 200 } 153 201 154 202 static void ublk_init_batch_cmd(struct ublk_thread *t, __u16 q_id, ··· 245 197 246 198 /* Use plain user buffer instead of fixed buffer */ 247 199 cmd->flags |= t->cmd_flags; 200 + } 201 + 202 + static void ublk_batch_queue_fetch(struct ublk_thread *t, 203 + struct ublk_queue *q, 204 + unsigned short buf_idx) 205 + { 206 + unsigned short nr_elem = t->fetch[buf_idx].fetch_buf_size / 2; 207 + struct io_uring_sqe *sqe; 208 + 209 + io_uring_buf_ring_add(t->fetch[buf_idx].br, t->fetch[buf_idx].fetch_buf, 210 + t->fetch[buf_idx].fetch_buf_size, 211 + 0, 0, 0); 212 + io_uring_buf_ring_advance(t->fetch[buf_idx].br, 1); 213 + 214 + ublk_io_alloc_sqes(t, &sqe, 1); 215 + 216 + ublk_init_batch_cmd(t, q->q_id, sqe, UBLK_U_IO_FETCH_IO_CMDS, 2, nr_elem, 217 + buf_idx); 218 + 219 + sqe->rw_flags= IORING_URING_CMD_MULTISHOT; 220 + sqe->buf_group = buf_idx; 221 + sqe->flags |= IOSQE_BUFFER_SELECT; 222 + 223 + t->fetch[buf_idx].fetch_buf_off = 0; 224 + } 225 + 226 + void ublk_batch_start_fetch(struct ublk_thread *t, 227 + struct ublk_queue *q) 228 + { 229 + int i; 230 + 231 + for (i = 0; i < UBLKS_T_NR_FETCH_BUF; i++) 232 + ublk_batch_queue_fetch(t, q, i); 233 + } 234 + 235 + static unsigned short ublk_compl_batch_fetch(struct ublk_thread *t, 236 + struct ublk_queue *q, 237 + const struct io_uring_cqe *cqe) 238 + { 239 + unsigned short buf_idx = user_data_to_tag(cqe->user_data); 240 + unsigned start = t->fetch[buf_idx].fetch_buf_off; 241 + unsigned end = start + cqe->res; 242 + void *buf = t->fetch[buf_idx].fetch_buf; 243 + int i; 244 + 245 + if (cqe->res < 0) 246 + return buf_idx; 247 + 248 + if ((end - start) / 2 > q->q_depth) { 249 + ublk_err("%s: fetch duplicated ios offset %u count %u\n", __func__, start, cqe->res); 250 + 251 + for (i = start; i < end; i += 2) { 252 + unsigned short tag = *(unsigned short *)(buf + i); 253 + 254 + ublk_err("%u ", tag); 255 + } 256 + ublk_err("\n"); 257 + } 258 + 259 + for (i = start; i < end; i += 2) { 260 + unsigned short tag = *(unsigned short *)(buf + i); 261 + 262 + if (tag >= q->q_depth) 263 + ublk_err("%s: bad tag %u\n", __func__, tag); 264 + 265 + if (q->tgt_ops->queue_io) 266 + q->tgt_ops->queue_io(t, q, tag); 267 + } 268 + t->fetch[buf_idx].fetch_buf_off = end; 269 + return buf_idx; 248 270 } 249 271 250 272 int ublk_batch_queue_prep_io_cmds(struct ublk_thread *t, struct ublk_queue *q) ··· 376 258 const struct io_uring_cqe *cqe) 377 259 { 378 260 unsigned op = user_data_to_op(cqe->user_data); 261 + struct ublk_queue *q; 262 + unsigned buf_idx; 263 + unsigned q_id; 379 264 380 265 if (op == _IOC_NR(UBLK_U_IO_PREP_IO_CMDS) || 381 266 op == _IOC_NR(UBLK_U_IO_COMMIT_IO_CMDS)) { 382 267 t->cmd_inflight--; 383 268 ublk_batch_compl_commit_cmd(t, cqe, op); 384 269 return; 270 + } 271 + 272 + /* FETCH command is per queue */ 273 + q_id = user_data_to_q_id(cqe->user_data); 274 + q = &t->dev->q[q_id]; 275 + buf_idx = ublk_compl_batch_fetch(t, q, cqe); 276 + 277 + if (cqe->res < 0 && cqe->res != -ENOBUFS) { 278 + t->cmd_inflight--; 279 + t->state |= UBLKS_T_STOPPING; 280 + } else if (!(cqe->flags & IORING_CQE_F_MORE) || cqe->res == -ENOBUFS) { 281 + t->cmd_inflight--; 282 + ublk_batch_queue_fetch(t, q, buf_idx); 385 283 } 386 284 } 387 285
+11 -3
tools/testing/selftests/ublk/kublk.c
··· 519 519 int ring_depth = dev->tgt.sq_depth, cq_depth = dev->tgt.cq_depth; 520 520 int ret; 521 521 522 + /* FETCH_IO_CMDS is multishot, so increase cq depth for BATCH_IO */ 523 + if (ublk_dev_batch_io(dev)) 524 + cq_depth += dev->dev_info.queue_depth; 525 + 522 526 ret = ublk_setup_ring(&t->ring, ring_depth, cq_depth, 523 527 IORING_SETUP_COOP_TASKRUN | 524 528 IORING_SETUP_SINGLE_ISSUER | ··· 882 878 unsigned q_id = user_data_to_q_id(cqe->user_data); 883 879 unsigned cmd_op = user_data_to_op(cqe->user_data); 884 880 885 - if (cqe->res < 0 && cqe->res != -ENODEV) 881 + if (cqe->res < 0 && cqe->res != -ENODEV && cqe->res != -ENOBUFS) 886 882 ublk_err("%s: res %d userdata %llx thread state %x\n", __func__, 887 883 cqe->res, cqe->user_data, t->state); 888 884 ··· 1005 1001 if (!ublk_thread_batch_io(&t)) { 1006 1002 /* submit all io commands to ublk driver */ 1007 1003 ublk_submit_fetch_commands(&t); 1008 - } else if (!t.idx) { 1004 + } else { 1005 + struct ublk_queue *q = &t.dev->q[t.idx]; 1006 + 1009 1007 /* prepare all io commands in the 1st thread context */ 1010 - ublk_batch_setup_queues(&t); 1008 + if (!t.idx) 1009 + ublk_batch_setup_queues(&t); 1010 + ublk_batch_start_fetch(&t, q); 1011 1011 } 1012 1012 1013 1013 do {
+13
tools/testing/selftests/ublk/kublk.h
··· 198 198 unsigned short count; 199 199 }; 200 200 201 + struct batch_fetch_buf { 202 + struct io_uring_buf_ring *br; 203 + void *fetch_buf; 204 + unsigned int fetch_buf_size; 205 + unsigned int fetch_buf_off; 206 + }; 207 + 201 208 struct ublk_thread { 202 209 struct ublk_dev *dev; 203 210 unsigned idx; ··· 231 224 #define UBLKS_T_COMMIT_BUF_INV_IDX ((unsigned short)-1) 232 225 struct allocator commit_buf_alloc; 233 226 struct batch_commit_buf commit; 227 + /* FETCH_IO_CMDS buffer */ 228 + #define UBLKS_T_NR_FETCH_BUF 2 229 + struct batch_fetch_buf fetch[UBLKS_T_NR_FETCH_BUF]; 234 230 235 231 struct io_uring ring; 236 232 }; ··· 525 515 526 516 /* Queue UBLK_U_IO_PREP_IO_CMDS for a specific queue with batch elements */ 527 517 int ublk_batch_queue_prep_io_cmds(struct ublk_thread *t, struct ublk_queue *q); 518 + /* Start fetching I/O commands using multishot UBLK_U_IO_FETCH_IO_CMDS */ 519 + void ublk_batch_start_fetch(struct ublk_thread *t, 520 + struct ublk_queue *q); 528 521 /* Handle completion of batch I/O commands (prep/commit) */ 529 522 void ublk_batch_compl_cmd(struct ublk_thread *t, 530 523 const struct io_uring_cqe *cqe);