Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at ee9dce44362b2d8132c32964656ab6dff7dfbc6a 826 lines 27 kB view raw
1/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ 2#ifndef USER_BLK_DRV_CMD_INC_H 3#define USER_BLK_DRV_CMD_INC_H 4 5#include <linux/types.h> 6 7/* ublk server command definition */ 8 9/* 10 * Admin commands, issued by ublk server, and handled by ublk driver. 11 * 12 * Legacy command definition, don't use in new application, and don't 13 * add new such definition any more 14 */ 15#define UBLK_CMD_GET_QUEUE_AFFINITY 0x01 16#define UBLK_CMD_GET_DEV_INFO 0x02 17#define UBLK_CMD_ADD_DEV 0x04 18#define UBLK_CMD_DEL_DEV 0x05 19#define UBLK_CMD_START_DEV 0x06 20#define UBLK_CMD_STOP_DEV 0x07 21#define UBLK_CMD_SET_PARAMS 0x08 22#define UBLK_CMD_GET_PARAMS 0x09 23#define UBLK_CMD_START_USER_RECOVERY 0x10 24#define UBLK_CMD_END_USER_RECOVERY 0x11 25#define UBLK_CMD_GET_DEV_INFO2 0x12 26 27/* Any new ctrl command should encode by __IO*() */ 28#define UBLK_U_CMD_GET_QUEUE_AFFINITY \ 29 _IOR('u', UBLK_CMD_GET_QUEUE_AFFINITY, struct ublksrv_ctrl_cmd) 30#define UBLK_U_CMD_GET_DEV_INFO \ 31 _IOR('u', UBLK_CMD_GET_DEV_INFO, struct ublksrv_ctrl_cmd) 32#define UBLK_U_CMD_ADD_DEV \ 33 _IOWR('u', UBLK_CMD_ADD_DEV, struct ublksrv_ctrl_cmd) 34#define UBLK_U_CMD_DEL_DEV \ 35 _IOWR('u', UBLK_CMD_DEL_DEV, struct ublksrv_ctrl_cmd) 36#define UBLK_U_CMD_START_DEV \ 37 _IOWR('u', UBLK_CMD_START_DEV, struct ublksrv_ctrl_cmd) 38#define UBLK_U_CMD_STOP_DEV \ 39 _IOWR('u', UBLK_CMD_STOP_DEV, struct ublksrv_ctrl_cmd) 40#define UBLK_U_CMD_SET_PARAMS \ 41 _IOWR('u', UBLK_CMD_SET_PARAMS, struct ublksrv_ctrl_cmd) 42#define UBLK_U_CMD_GET_PARAMS \ 43 _IOR('u', UBLK_CMD_GET_PARAMS, struct ublksrv_ctrl_cmd) 44#define UBLK_U_CMD_START_USER_RECOVERY \ 45 _IOWR('u', UBLK_CMD_START_USER_RECOVERY, struct ublksrv_ctrl_cmd) 46#define UBLK_U_CMD_END_USER_RECOVERY \ 47 _IOWR('u', UBLK_CMD_END_USER_RECOVERY, struct ublksrv_ctrl_cmd) 48#define UBLK_U_CMD_GET_DEV_INFO2 \ 49 _IOR('u', UBLK_CMD_GET_DEV_INFO2, struct ublksrv_ctrl_cmd) 50#define UBLK_U_CMD_GET_FEATURES \ 51 _IOR('u', 0x13, struct ublksrv_ctrl_cmd) 52#define UBLK_U_CMD_DEL_DEV_ASYNC \ 53 _IOR('u', 0x14, struct ublksrv_ctrl_cmd) 54#define UBLK_U_CMD_UPDATE_SIZE \ 55 _IOWR('u', 0x15, struct ublksrv_ctrl_cmd) 56#define UBLK_U_CMD_QUIESCE_DEV \ 57 _IOWR('u', 0x16, struct ublksrv_ctrl_cmd) 58#define UBLK_U_CMD_TRY_STOP_DEV \ 59 _IOWR('u', 0x17, struct ublksrv_ctrl_cmd) 60/* 61 * Register a shared memory buffer for zero-copy I/O. 62 * Input: ctrl_cmd.addr points to struct ublk_shmem_buf_reg (buffer VA + size) 63 * ctrl_cmd.len = sizeof(struct ublk_shmem_buf_reg) 64 * Result: >= 0 is the assigned buffer index, < 0 is error 65 * 66 * The kernel pins pages from the calling process's address space 67 * and inserts PFN ranges into a per-device maple tree. When a block 68 * request's pages match registered pages, the driver sets 69 * UBLK_IO_F_SHMEM_ZC and encodes the buffer index + offset in addr, 70 * allowing the server to access the data via its own mapping of the 71 * same shared memory — true zero copy. 72 * 73 * The memory can be backed by memfd, hugetlbfs, or any GUP-compatible 74 * shared mapping. Queue freeze is handled internally. 75 * 76 * The buffer VA and size are passed via a user buffer (not inline in 77 * ctrl_cmd) so that unprivileged devices can prepend the device path 78 * to ctrl_cmd.addr without corrupting the VA. 79 */ 80#define UBLK_U_CMD_REG_BUF \ 81 _IOWR('u', 0x18, struct ublksrv_ctrl_cmd) 82/* 83 * Unregister a shared memory buffer. 84 * Input: ctrl_cmd.data[0] = buffer index 85 */ 86#define UBLK_U_CMD_UNREG_BUF \ 87 _IOWR('u', 0x19, struct ublksrv_ctrl_cmd) 88 89/* Parameter buffer for UBLK_U_CMD_REG_BUF, pointed to by ctrl_cmd.addr */ 90struct ublk_shmem_buf_reg { 91 __u64 addr; /* userspace virtual address of shared memory */ 92 __u64 len; /* buffer size in bytes, page-aligned, default max 4GB */ 93 __u32 flags; 94 __u32 reserved; 95}; 96 97/* Pin pages without FOLL_WRITE; usable with write-sealed memfd */ 98#define UBLK_SHMEM_BUF_READ_ONLY (1U << 0) 99/* 100 * 64bits are enough now, and it should be easy to extend in case of 101 * running out of feature flags 102 */ 103#define UBLK_FEATURES_LEN 8 104 105/* 106 * IO commands, issued by ublk server, and handled by ublk driver. 107 * 108 * FETCH_REQ: issued via sqe(URING_CMD) beforehand for fetching IO request 109 * from ublk driver, should be issued only when starting device. After 110 * the associated cqe is returned, request's tag can be retrieved via 111 * cqe->userdata. 112 * 113 * COMMIT_AND_FETCH_REQ: issued via sqe(URING_CMD) after ublkserver handled 114 * this IO request, request's handling result is committed to ublk 115 * driver, meantime FETCH_REQ is piggyback, and FETCH_REQ has to be 116 * handled before completing io request. 117 * 118 * NEED_GET_DATA: only used for write requests to set io addr and copy data 119 * When NEED_GET_DATA is set, ublksrv has to issue UBLK_IO_NEED_GET_DATA 120 * command after ublk driver returns UBLK_IO_RES_NEED_GET_DATA. 121 * 122 * It is only used if ublksrv set UBLK_F_NEED_GET_DATA flag 123 * while starting a ublk device. 124 */ 125 126/* 127 * Legacy IO command definition, don't use in new application, and don't 128 * add new such definition any more 129 */ 130#define UBLK_IO_FETCH_REQ 0x20 131#define UBLK_IO_COMMIT_AND_FETCH_REQ 0x21 132#define UBLK_IO_NEED_GET_DATA 0x22 133 134/* Any new IO command should encode by __IOWR() */ 135#define UBLK_U_IO_FETCH_REQ \ 136 _IOWR('u', UBLK_IO_FETCH_REQ, struct ublksrv_io_cmd) 137#define UBLK_U_IO_COMMIT_AND_FETCH_REQ \ 138 _IOWR('u', UBLK_IO_COMMIT_AND_FETCH_REQ, struct ublksrv_io_cmd) 139#define UBLK_U_IO_NEED_GET_DATA \ 140 _IOWR('u', UBLK_IO_NEED_GET_DATA, struct ublksrv_io_cmd) 141#define UBLK_U_IO_REGISTER_IO_BUF \ 142 _IOWR('u', 0x23, struct ublksrv_io_cmd) 143#define UBLK_U_IO_UNREGISTER_IO_BUF \ 144 _IOWR('u', 0x24, struct ublksrv_io_cmd) 145 146/* 147 * return 0 if the command is run successfully, otherwise failure code 148 * is returned 149 */ 150#define UBLK_U_IO_PREP_IO_CMDS \ 151 _IOWR('u', 0x25, struct ublk_batch_io) 152/* 153 * If failure code is returned, nothing in the command buffer is handled. 154 * Otherwise, the returned value means how many bytes in command buffer 155 * are handled actually, then number of handled IOs can be calculated with 156 * `elem_bytes` for each IO. IOs in the remained bytes are not committed, 157 * userspace has to check return value for dealing with partial committing 158 * correctly. 159 */ 160#define UBLK_U_IO_COMMIT_IO_CMDS \ 161 _IOWR('u', 0x26, struct ublk_batch_io) 162 163/* 164 * Fetch io commands to provided buffer in multishot style, 165 * `IORING_URING_CMD_MULTISHOT` is required for this command. 166 */ 167#define UBLK_U_IO_FETCH_IO_CMDS \ 168 _IOWR('u', 0x27, struct ublk_batch_io) 169 170/* only ABORT means that no re-fetch */ 171#define UBLK_IO_RES_OK 0 172#define UBLK_IO_RES_NEED_GET_DATA 1 173#define UBLK_IO_RES_ABORT (-ENODEV) 174 175#define UBLKSRV_CMD_BUF_OFFSET 0 176#define UBLKSRV_IO_BUF_OFFSET 0x80000000 177 178/* tag bit is 16bit, so far limit at most 4096 IOs for each queue */ 179#define UBLK_MAX_QUEUE_DEPTH 4096 180 181/* single IO buffer max size is 32MB */ 182#define UBLK_IO_BUF_OFF 0 183#define UBLK_IO_BUF_BITS 25 184#define UBLK_IO_BUF_BITS_MASK ((1ULL << UBLK_IO_BUF_BITS) - 1) 185 186/* so at most 64K IOs for each queue */ 187#define UBLK_TAG_OFF UBLK_IO_BUF_BITS 188#define UBLK_TAG_BITS 16 189#define UBLK_TAG_BITS_MASK ((1ULL << UBLK_TAG_BITS) - 1) 190 191/* max 4096 queues */ 192#define UBLK_QID_OFF (UBLK_TAG_OFF + UBLK_TAG_BITS) 193#define UBLK_QID_BITS 12 194#define UBLK_QID_BITS_MASK ((1ULL << UBLK_QID_BITS) - 1) 195 196#define UBLK_MAX_NR_QUEUES (1U << UBLK_QID_BITS) 197 198#define UBLKSRV_IO_BUF_TOTAL_BITS (UBLK_QID_OFF + UBLK_QID_BITS) 199#define UBLKSRV_IO_BUF_TOTAL_SIZE (1ULL << UBLKSRV_IO_BUF_TOTAL_BITS) 200 201/* Copy to/from request integrity buffer instead of data buffer */ 202#define UBLK_INTEGRITY_FLAG_OFF 62 203#define UBLKSRV_IO_INTEGRITY_FLAG (1ULL << UBLK_INTEGRITY_FLAG_OFF) 204 205/* 206 * ublk server can register data buffers for incoming I/O requests with a sparse 207 * io_uring buffer table. The request buffer can then be used as the data buffer 208 * for io_uring operations via the fixed buffer index. 209 * Note that the ublk server can never directly access the request data memory. 210 * 211 * To use this feature, the ublk server must first register a sparse buffer 212 * table on an io_uring instance. 213 * When an incoming ublk request is received, the ublk server submits a 214 * UBLK_U_IO_REGISTER_IO_BUF command to that io_uring instance. The 215 * ublksrv_io_cmd's q_id and tag specify the request whose buffer to register 216 * and addr is the index in the io_uring's buffer table to install the buffer. 217 * SQEs can now be submitted to the io_uring to read/write the request's buffer 218 * by enabling fixed buffers (e.g. using IORING_OP_{READ,WRITE}_FIXED or 219 * IORING_URING_CMD_FIXED) and passing the registered buffer index in buf_index. 220 * Once the last io_uring operation using the request's buffer has completed, 221 * the ublk server submits a UBLK_U_IO_UNREGISTER_IO_BUF command with q_id, tag, 222 * and addr again specifying the request buffer to unregister. 223 * The ublk request is completed when its buffer is unregistered from all 224 * io_uring instances and the ublk server issues UBLK_U_IO_COMMIT_AND_FETCH_REQ. 225 * 226 * Not available for UBLK_F_UNPRIVILEGED_DEV, as a ublk server can leak 227 * uninitialized kernel memory by not reading into the full request buffer. 228 */ 229#define UBLK_F_SUPPORT_ZERO_COPY (1ULL << 0) 230 231/* 232 * Force to complete io cmd via io_uring_cmd_complete_in_task so that 233 * performance comparison is done easily with using task_work_add 234 */ 235#define UBLK_F_URING_CMD_COMP_IN_TASK (1ULL << 1) 236 237/* 238 * User should issue io cmd again for write requests to 239 * set io buffer address and copy data from bio vectors 240 * to the userspace io buffer. 241 * 242 * In this mode, task_work is not used. 243 */ 244#define UBLK_F_NEED_GET_DATA (1UL << 2) 245 246/* 247 * - Block devices are recoverable if ublk server exits and restarts 248 * - Outstanding I/O when ublk server exits is met with errors 249 * - I/O issued while there is no ublk server queues 250 */ 251#define UBLK_F_USER_RECOVERY (1UL << 3) 252 253/* 254 * - Block devices are recoverable if ublk server exits and restarts 255 * - Outstanding I/O when ublk server exits is reissued 256 * - I/O issued while there is no ublk server queues 257 */ 258#define UBLK_F_USER_RECOVERY_REISSUE (1UL << 4) 259 260/* 261 * Unprivileged user can create /dev/ublkcN and /dev/ublkbN. 262 * 263 * /dev/ublk-control needs to be available for unprivileged user, and it 264 * can be done via udev rule to make all control commands available to 265 * unprivileged user. Except for the command of UBLK_CMD_ADD_DEV, all 266 * other commands are only allowed for the owner of the specified device. 267 * 268 * When userspace sends UBLK_CMD_ADD_DEV, the device pair's owner_uid and 269 * owner_gid are stored to ublksrv_ctrl_dev_info by kernel, so far only 270 * the current user's uid/gid is stored, that said owner of the created 271 * device is always the current user. 272 * 273 * We still need udev rule to apply OWNER/GROUP with the stored owner_uid 274 * and owner_gid. 275 * 276 * Then ublk server can be run as unprivileged user, and /dev/ublkbN can 277 * be accessed and managed by its owner represented by owner_uid/owner_gid. 278 */ 279#define UBLK_F_UNPRIVILEGED_DEV (1UL << 5) 280 281/* use ioctl encoding for uring command */ 282#define UBLK_F_CMD_IOCTL_ENCODE (1UL << 6) 283 284/* 285 * Copy between request and user buffer by pread()/pwrite() 286 * 287 * Not available for UBLK_F_UNPRIVILEGED_DEV, otherwise userspace may 288 * deceive us by not filling request buffer, then kernel uninitialized 289 * data may be leaked. 290 */ 291#define UBLK_F_USER_COPY (1UL << 7) 292 293/* 294 * User space sets this flag when setting up the device to request zoned storage support. Kernel may 295 * deny the request by returning an error. 296 */ 297#define UBLK_F_ZONED (1ULL << 8) 298 299/* 300 * - Block devices are recoverable if ublk server exits and restarts 301 * - Outstanding I/O when ublk server exits is met with errors 302 * - I/O issued while there is no ublk server is met with errors 303 */ 304#define UBLK_F_USER_RECOVERY_FAIL_IO (1ULL << 9) 305 306/* 307 * Resizing a block device is possible with UBLK_U_CMD_UPDATE_SIZE 308 * New size is passed in cmd->data[0] and is in units of sectors 309 */ 310#define UBLK_F_UPDATE_SIZE (1ULL << 10) 311 312/* 313 * request buffer is registered automatically to uring_cmd's io_uring 314 * context before delivering this io command to ublk server, meantime 315 * it is un-registered automatically when completing this io command. 316 * 317 * For using this feature: 318 * 319 * - ublk server has to create sparse buffer table on the same `io_ring_ctx` 320 * for issuing `UBLK_IO_FETCH_REQ` and `UBLK_IO_COMMIT_AND_FETCH_REQ`. 321 * If uring_cmd isn't issued on same `io_ring_ctx`, it is ublk server's 322 * responsibility to unregister the buffer by issuing `IO_UNREGISTER_IO_BUF` 323 * manually, otherwise this ublk request won't complete. 324 * 325 * - ublk server passes auto buf register data via uring_cmd's sqe->addr, 326 * `struct ublk_auto_buf_reg` is populated from sqe->addr, please see 327 * the definition of ublk_sqe_addr_to_auto_buf_reg() 328 * 329 * - pass buffer index from `ublk_auto_buf_reg.index` 330 * 331 * - all reserved fields in `ublk_auto_buf_reg` need to be zeroed 332 * 333 * - pass flags from `ublk_auto_buf_reg.flags` if needed 334 * 335 * This way avoids extra cost from two uring_cmd, but also simplifies backend 336 * implementation, such as, the dependency on IO_REGISTER_IO_BUF and 337 * IO_UNREGISTER_IO_BUF becomes not necessary. 338 * 339 * If wrong data or flags are provided, both IO_FETCH_REQ and 340 * IO_COMMIT_AND_FETCH_REQ are failed, for the latter, the ublk IO request 341 * won't be completed until new IO_COMMIT_AND_FETCH_REQ command is issued 342 * successfully 343 */ 344#define UBLK_F_AUTO_BUF_REG (1ULL << 11) 345 346/* 347 * Control command `UBLK_U_CMD_QUIESCE_DEV` is added for quiescing device, 348 * which state can be transitioned to `UBLK_S_DEV_QUIESCED` or 349 * `UBLK_S_DEV_FAIL_IO` finally, and it needs ublk server cooperation for 350 * handling `UBLK_IO_RES_ABORT` correctly. 351 * 352 * Typical use case is for supporting to upgrade ublk server application, 353 * meantime keep ublk block device persistent during the period. 354 * 355 * This feature is only available when UBLK_F_USER_RECOVERY is enabled. 356 * 357 * Note, this command returns -EBUSY in case that all IO commands are being 358 * handled by ublk server and not completed in specified time period which 359 * is passed from the control command parameter. 360 */ 361#define UBLK_F_QUIESCE (1ULL << 12) 362 363/* 364 * If this feature is set, ublk_drv supports each (qid,tag) pair having 365 * its own independent daemon task that is responsible for handling it. 366 * If it is not set, daemons are per-queue instead, so for two pairs 367 * (qid1,tag1) and (qid2,tag2), if qid1 == qid2, then the same task must 368 * be responsible for handling (qid1,tag1) and (qid2,tag2). 369 */ 370#define UBLK_F_PER_IO_DAEMON (1ULL << 13) 371 372/* 373 * If this feature is set, UBLK_U_IO_REGISTER_IO_BUF/UBLK_U_IO_UNREGISTER_IO_BUF 374 * can be issued for an I/O on any task. q_id and tag are also ignored in 375 * UBLK_U_IO_UNREGISTER_IO_BUF's ublksrv_io_cmd. 376 * If it is unset, zero-copy buffers can only be registered and unregistered by 377 * the I/O's daemon task. The q_id and tag of the registered buffer are required 378 * in UBLK_U_IO_UNREGISTER_IO_BUF's ublksrv_io_cmd. 379 */ 380#define UBLK_F_BUF_REG_OFF_DAEMON (1ULL << 14) 381 382/* 383 * Support the following commands for delivering & committing io command 384 * in batch. 385 * 386 * - UBLK_U_IO_PREP_IO_CMDS 387 * - UBLK_U_IO_COMMIT_IO_CMDS 388 * - UBLK_U_IO_FETCH_IO_CMDS 389 * - UBLK_U_IO_REGISTER_IO_BUF 390 * - UBLK_U_IO_UNREGISTER_IO_BUF 391 * 392 * The existing UBLK_U_IO_FETCH_REQ, UBLK_U_IO_COMMIT_AND_FETCH_REQ and 393 * UBLK_U_IO_NEED_GET_DATA uring_cmd are not supported for this feature. 394 */ 395#define UBLK_F_BATCH_IO (1ULL << 15) 396 397/* 398 * ublk device supports requests with integrity/metadata buffer. 399 * Requires UBLK_F_USER_COPY. 400 */ 401#define UBLK_F_INTEGRITY (1ULL << 16) 402 403/* 404 * The device supports the UBLK_CMD_TRY_STOP_DEV command, which 405 * allows stopping the device only if there are no openers. 406 */ 407#define UBLK_F_SAFE_STOP_DEV (1ULL << 17) 408 409/* Disable automatic partition scanning when device is started */ 410#define UBLK_F_NO_AUTO_PART_SCAN (1ULL << 18) 411 412/* 413 * Enable shared memory zero copy. When enabled, the server can register 414 * shared memory buffers via UBLK_U_CMD_REG_BUF. If a block request's 415 * pages match a registered buffer, UBLK_IO_F_SHMEM_ZC is set and addr 416 * encodes the buffer index + offset instead of a userspace buffer address. 417 */ 418#define UBLK_F_SHMEM_ZC (1ULL << 19) 419 420/* device state */ 421#define UBLK_S_DEV_DEAD 0 422#define UBLK_S_DEV_LIVE 1 423#define UBLK_S_DEV_QUIESCED 2 424#define UBLK_S_DEV_FAIL_IO 3 425 426/* shipped via sqe->cmd of io_uring command */ 427struct ublksrv_ctrl_cmd { 428 /* sent to which device, must be valid */ 429 __u32 dev_id; 430 431 /* sent to which queue, must be -1 if the cmd isn't for queue */ 432 __u16 queue_id; 433 /* 434 * cmd specific buffer, can be IN or OUT. 435 */ 436 __u16 len; 437 __u64 addr; 438 439 /* inline data */ 440 __u64 data[1]; 441 442 /* 443 * Used for UBLK_F_UNPRIVILEGED_DEV and UBLK_CMD_GET_DEV_INFO2 444 * only, include null char 445 */ 446 __u16 dev_path_len; 447 __u16 pad; 448 __u32 reserved; 449}; 450 451struct ublksrv_ctrl_dev_info { 452 __u16 nr_hw_queues; 453 __u16 queue_depth; 454 __u16 state; 455 __u16 pad0; 456 457 __u32 max_io_buf_bytes; 458 __u32 dev_id; 459 460 __s32 ublksrv_pid; 461 __u32 pad1; 462 463 __u64 flags; 464 465 /* For ublksrv internal use, invisible to ublk driver */ 466 __u64 ublksrv_flags; 467 468 __u32 owner_uid; /* store by kernel */ 469 __u32 owner_gid; /* store by kernel */ 470 __u64 reserved1; 471 __u64 reserved2; 472}; 473 474#define UBLK_IO_OP_READ 0 475#define UBLK_IO_OP_WRITE 1 476#define UBLK_IO_OP_FLUSH 2 477#define UBLK_IO_OP_DISCARD 3 478#define UBLK_IO_OP_WRITE_SAME 4 479#define UBLK_IO_OP_WRITE_ZEROES 5 480#define UBLK_IO_OP_ZONE_OPEN 10 481#define UBLK_IO_OP_ZONE_CLOSE 11 482#define UBLK_IO_OP_ZONE_FINISH 12 483#define UBLK_IO_OP_ZONE_APPEND 13 484#define UBLK_IO_OP_ZONE_RESET_ALL 14 485#define UBLK_IO_OP_ZONE_RESET 15 486/* 487 * Construct a zone report. The report request is carried in `struct 488 * ublksrv_io_desc`. The `start_sector` field must be the first sector of a zone 489 * and shall indicate the first zone of the report. The `nr_zones` shall 490 * indicate how many zones should be reported at most. The report shall be 491 * delivered as a `struct blk_zone` array. To report fewer zones than requested, 492 * zero the last entry of the returned array. 493 * 494 * Related definitions(blk_zone, blk_zone_cond, blk_zone_type, ...) in 495 * include/uapi/linux/blkzoned.h are part of ublk UAPI. 496 */ 497#define UBLK_IO_OP_REPORT_ZONES 18 498 499#define UBLK_IO_F_FAILFAST_DEV (1U << 8) 500#define UBLK_IO_F_FAILFAST_TRANSPORT (1U << 9) 501#define UBLK_IO_F_FAILFAST_DRIVER (1U << 10) 502#define UBLK_IO_F_META (1U << 11) 503#define UBLK_IO_F_FUA (1U << 13) 504#define UBLK_IO_F_NOUNMAP (1U << 15) 505#define UBLK_IO_F_SWAP (1U << 16) 506/* 507 * For UBLK_F_AUTO_BUF_REG & UBLK_AUTO_BUF_REG_FALLBACK only. 508 * 509 * This flag is set if auto buffer register is failed & ublk server passes 510 * UBLK_AUTO_BUF_REG_FALLBACK, and ublk server need to register buffer 511 * manually for handling the delivered IO command if this flag is observed 512 * 513 * ublk server has to check this flag if UBLK_AUTO_BUF_REG_FALLBACK is 514 * passed in. 515 */ 516#define UBLK_IO_F_NEED_REG_BUF (1U << 17) 517/* Request has an integrity data buffer */ 518#define UBLK_IO_F_INTEGRITY (1UL << 18) 519/* 520 * I/O buffer is in a registered shared memory buffer. When set, the addr 521 * field in ublksrv_io_desc encodes buffer index and byte offset instead 522 * of a userspace virtual address. 523 */ 524#define UBLK_IO_F_SHMEM_ZC (1U << 19) 525 526/* 527 * io cmd is described by this structure, and stored in share memory, indexed 528 * by request tag. 529 * 530 * The data is stored by ublk driver, and read by ublksrv after one fetch command 531 * returns. 532 */ 533struct ublksrv_io_desc { 534 /* op: bit 0-7, flags: bit 8-31 */ 535 __u32 op_flags; 536 537 union { 538 __u32 nr_sectors; 539 __u32 nr_zones; /* for UBLK_IO_OP_REPORT_ZONES */ 540 }; 541 542 /* start sector for this io */ 543 __u64 start_sector; 544 545 /* buffer address in ublksrv daemon vm space, from ublk driver */ 546 __u64 addr; 547}; 548 549static inline __u8 ublksrv_get_op(const struct ublksrv_io_desc *iod) 550{ 551 return iod->op_flags & 0xff; 552} 553 554static inline __u32 ublksrv_get_flags(const struct ublksrv_io_desc *iod) 555{ 556 return iod->op_flags >> 8; 557} 558 559/* 560 * If this flag is set, fallback by completing the uring_cmd and setting 561 * `UBLK_IO_F_NEED_REG_BUF` in case of auto-buf-register failure; 562 * otherwise the client ublk request is failed silently 563 * 564 * If ublk server passes this flag, it has to check if UBLK_IO_F_NEED_REG_BUF 565 * is set in `ublksrv_io_desc.op_flags`. If UBLK_IO_F_NEED_REG_BUF is set, 566 * ublk server needs to register io buffer manually for handling IO command. 567 */ 568#define UBLK_AUTO_BUF_REG_FALLBACK (1 << 0) 569#define UBLK_AUTO_BUF_REG_F_MASK UBLK_AUTO_BUF_REG_FALLBACK 570 571struct ublk_auto_buf_reg { 572 /* index for registering the delivered request buffer */ 573 __u16 index; 574 __u8 flags; 575 __u8 reserved0; 576 577 /* 578 * io_ring FD can be passed via the reserve field in future for 579 * supporting to register io buffer to external io_uring 580 */ 581 __u32 reserved1; 582}; 583 584/* 585 * For UBLK_F_AUTO_BUF_REG, auto buffer register data is carried via 586 * uring_cmd's sqe->addr: 587 * 588 * - bit0 ~ bit15: buffer index 589 * - bit16 ~ bit23: flags 590 * - bit24 ~ bit31: reserved0 591 * - bit32 ~ bit63: reserved1 592 */ 593static inline struct ublk_auto_buf_reg ublk_sqe_addr_to_auto_buf_reg( 594 __u64 sqe_addr) 595{ 596 struct ublk_auto_buf_reg reg = { 597 .index = (__u16)sqe_addr, 598 .flags = (__u8)(sqe_addr >> 16), 599 .reserved0 = (__u8)(sqe_addr >> 24), 600 .reserved1 = (__u32)(sqe_addr >> 32), 601 }; 602 603 return reg; 604} 605 606static inline __u64 607ublk_auto_buf_reg_to_sqe_addr(const struct ublk_auto_buf_reg *buf) 608{ 609 __u64 addr = buf->index | (__u64)buf->flags << 16 | (__u64)buf->reserved0 << 24 | 610 (__u64)buf->reserved1 << 32; 611 612 return addr; 613} 614 615/* issued to ublk driver via /dev/ublkcN */ 616struct ublksrv_io_cmd { 617 __u16 q_id; 618 619 /* for fetch/commit which result */ 620 __u16 tag; 621 622 /* io result, it is valid for COMMIT* command only */ 623 __s32 result; 624 625 union { 626 /* 627 * userspace buffer address in ublksrv daemon process, valid for 628 * FETCH* command only 629 * 630 * `addr` should not be used when UBLK_F_USER_COPY is enabled, 631 * because userspace handles data copy by pread()/pwrite() over 632 * /dev/ublkcN. But in case of UBLK_F_ZONED, this union is 633 * re-used to pass back the allocated LBA for 634 * UBLK_IO_OP_ZONE_APPEND which actually depends on 635 * UBLK_F_USER_COPY 636 */ 637 __u64 addr; 638 __u64 zone_append_lba; 639 }; 640}; 641 642struct ublk_elem_header { 643 __u16 tag; /* IO tag */ 644 645 /* 646 * Buffer index for incoming io command, only valid iff 647 * UBLK_F_AUTO_BUF_REG is set 648 */ 649 __u16 buf_index; 650 __s32 result; /* I/O completion result (commit only) */ 651}; 652 653/* 654 * uring_cmd buffer structure for batch commands 655 * 656 * buffer includes multiple elements, which number is specified by 657 * `nr_elem`. Each element buffer is organized in the following order: 658 * 659 * struct ublk_elem_buffer { 660 * // Mandatory fields (8 bytes) 661 * struct ublk_elem_header header; 662 * 663 * // Optional fields (8 bytes each, included based on flags) 664 * 665 * // Buffer address (if UBLK_BATCH_F_HAS_BUF_ADDR) for copying data 666 * // between ublk request and ublk server buffer 667 * __u64 buf_addr; 668 * 669 * // returned Zone append LBA (if UBLK_BATCH_F_HAS_ZONE_LBA) 670 * __u64 zone_lba; 671 * } 672 * 673 * Used for `UBLK_U_IO_PREP_IO_CMDS` and `UBLK_U_IO_COMMIT_IO_CMDS` 674 */ 675struct ublk_batch_io { 676 __u16 q_id; 677#define UBLK_BATCH_F_HAS_ZONE_LBA (1 << 0) 678#define UBLK_BATCH_F_HAS_BUF_ADDR (1 << 1) 679#define UBLK_BATCH_F_AUTO_BUF_REG_FALLBACK (1 << 2) 680 __u16 flags; 681 __u16 nr_elem; 682 __u8 elem_bytes; 683 __u8 reserved; 684 __u64 reserved2; 685}; 686 687struct ublk_param_basic { 688#define UBLK_ATTR_READ_ONLY (1 << 0) 689#define UBLK_ATTR_ROTATIONAL (1 << 1) 690#define UBLK_ATTR_VOLATILE_CACHE (1 << 2) 691#define UBLK_ATTR_FUA (1 << 3) 692 __u32 attrs; 693 __u8 logical_bs_shift; 694 __u8 physical_bs_shift; 695 __u8 io_opt_shift; 696 __u8 io_min_shift; 697 698 __u32 max_sectors; 699 __u32 chunk_sectors; 700 701 __u64 dev_sectors; 702 __u64 virt_boundary_mask; 703}; 704 705struct ublk_param_discard { 706 __u32 discard_alignment; 707 708 __u32 discard_granularity; 709 __u32 max_discard_sectors; 710 711 __u32 max_write_zeroes_sectors; 712 __u16 max_discard_segments; 713 __u16 reserved0; 714}; 715 716/* 717 * read-only, can't set via UBLK_CMD_SET_PARAMS, disk_devt is available 718 * after device is started 719 */ 720struct ublk_param_devt { 721 __u32 char_major; 722 __u32 char_minor; 723 __u32 disk_major; 724 __u32 disk_minor; 725}; 726 727struct ublk_param_zoned { 728 __u32 max_open_zones; 729 __u32 max_active_zones; 730 __u32 max_zone_append_sectors; 731 __u8 reserved[20]; 732}; 733 734struct ublk_param_dma_align { 735 __u32 alignment; 736 __u8 pad[4]; 737}; 738 739#define UBLK_MIN_SEGMENT_SIZE 4096 740/* 741 * If any one of the three segment parameter is set as 0, the behavior is 742 * undefined. 743 */ 744struct ublk_param_segment { 745 /* 746 * seg_boundary_mask + 1 needs to be power_of_2(), and the sum has 747 * to be >= UBLK_MIN_SEGMENT_SIZE(4096) 748 */ 749 __u64 seg_boundary_mask; 750 751 /* 752 * max_segment_size could be override by virt_boundary_mask, so be 753 * careful when setting both. 754 * 755 * max_segment_size has to be >= UBLK_MIN_SEGMENT_SIZE(4096) 756 */ 757 __u32 max_segment_size; 758 __u16 max_segments; 759 __u8 pad[2]; 760}; 761 762struct ublk_param_integrity { 763 __u32 flags; /* LBMD_PI_CAP_* from linux/fs.h */ 764 __u16 max_integrity_segments; /* 0 means no limit */ 765 __u8 interval_exp; 766 __u8 metadata_size; /* UBLK_PARAM_TYPE_INTEGRITY requires nonzero */ 767 __u8 pi_offset; 768 __u8 csum_type; /* LBMD_PI_CSUM_* from linux/fs.h */ 769 __u8 tag_size; 770 __u8 pad[5]; 771}; 772 773struct ublk_params { 774 /* 775 * Total length of parameters, userspace has to set 'len' for both 776 * SET_PARAMS and GET_PARAMS command, and driver may update len 777 * if two sides use different version of 'ublk_params', same with 778 * 'types' fields. 779 */ 780 __u32 len; 781#define UBLK_PARAM_TYPE_BASIC (1 << 0) 782#define UBLK_PARAM_TYPE_DISCARD (1 << 1) 783#define UBLK_PARAM_TYPE_DEVT (1 << 2) 784#define UBLK_PARAM_TYPE_ZONED (1 << 3) 785#define UBLK_PARAM_TYPE_DMA_ALIGN (1 << 4) 786#define UBLK_PARAM_TYPE_SEGMENT (1 << 5) 787#define UBLK_PARAM_TYPE_INTEGRITY (1 << 6) /* requires UBLK_F_INTEGRITY */ 788 __u32 types; /* types of parameter included */ 789 790 struct ublk_param_basic basic; 791 struct ublk_param_discard discard; 792 struct ublk_param_devt devt; 793 struct ublk_param_zoned zoned; 794 struct ublk_param_dma_align dma; 795 struct ublk_param_segment seg; 796 struct ublk_param_integrity integrity; 797}; 798 799/* 800 * Shared memory zero-copy addr encoding for UBLK_IO_F_SHMEM_ZC. 801 * 802 * When UBLK_IO_F_SHMEM_ZC is set, ublksrv_io_desc.addr is encoded as: 803 * bits [0:31] = byte offset within the buffer (up to 4GB) 804 * bits [32:47] = buffer index (up to 65536) 805 * bits [48:63] = reserved (must be zero) 806 */ 807#define UBLK_SHMEM_ZC_OFF_MASK 0xffffffffULL 808#define UBLK_SHMEM_ZC_IDX_OFF 32 809#define UBLK_SHMEM_ZC_IDX_MASK 0xffffULL 810 811static inline __u64 ublk_shmem_zc_addr(__u16 index, __u32 offset) 812{ 813 return ((__u64)index << UBLK_SHMEM_ZC_IDX_OFF) | offset; 814} 815 816static inline __u16 ublk_shmem_zc_index(__u64 addr) 817{ 818 return (addr >> UBLK_SHMEM_ZC_IDX_OFF) & UBLK_SHMEM_ZC_IDX_MASK; 819} 820 821static inline __u32 ublk_shmem_zc_offset(__u64 addr) 822{ 823 return (__u32)(addr & UBLK_SHMEM_ZC_OFF_MASK); 824} 825 826#endif