Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

ublk: remove struct ublk_rq_data

__ublk_check_and_get_req() attempts to atomically look up the struct
request for a ublk I/O and take a reference on it. However, the request
can be freed between the lookup on the tagset in blk_mq_tag_to_rq() and
the increment of its reference count in ublk_get_req_ref(), for example
if an elevator switch happens concurrently.

Fix the potential use after free by moving the reference count from
ublk_rq_data to ublk_io. Move the fields buf_index and buf_ctx_handle
too to reduce the number of cache lines touched when dispatching and
completing a ublk I/O, allowing ublk_rq_data to be removed entirely.

Suggested-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
Fixes: 62fe99cef94a ("ublk: add read()/write() support for ublk char device")
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20250620151008.3976463-3-csander@purestorage.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Caleb Sander Mateos and committed by
Jens Axboe
7ba962f4 c2f48453

+71 -60
+71 -60
drivers/block/ublk_drv.c
··· 82 82 UBLK_PARAM_TYPE_DEVT | UBLK_PARAM_TYPE_ZONED | \ 83 83 UBLK_PARAM_TYPE_DMA_ALIGN | UBLK_PARAM_TYPE_SEGMENT) 84 84 85 - struct ublk_rq_data { 86 - refcount_t ref; 87 - 88 - /* for auto-unregister buffer in case of UBLK_F_AUTO_BUF_REG */ 89 - u16 buf_index; 90 - void *buf_ctx_handle; 91 - }; 92 - 93 85 struct ublk_uring_cmd_pdu { 94 86 /* 95 87 * Store requests in same batch temporarily for queuing them to ··· 161 169 }; 162 170 163 171 struct task_struct *task; 172 + 173 + /* 174 + * The number of uses of this I/O by the ublk server 175 + * if user copy or zero copy are enabled: 176 + * - 1 from dispatch to the server until UBLK_IO_COMMIT_AND_FETCH_REQ 177 + * - 1 for each inflight ublk_ch_{read,write}_iter() call 178 + * - 1 for each io_uring registered buffer 179 + * The I/O can only be completed once all references are dropped. 180 + * User copy and buffer registration operations are only permitted 181 + * if the reference count is nonzero. 182 + */ 183 + refcount_t ref; 184 + 185 + /* auto-registered buffer, valid if UBLK_IO_FLAG_AUTO_BUF_REG is set */ 186 + u16 buf_index; 187 + void *buf_ctx_handle; 164 188 }; 165 189 166 190 struct ublk_queue { ··· 236 228 static void ublk_stop_dev_unlocked(struct ublk_device *ub); 237 229 static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq); 238 230 static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub, 239 - const struct ublk_queue *ubq, int tag, size_t offset); 231 + const struct ublk_queue *ubq, struct ublk_io *io, 232 + size_t offset); 240 233 static inline unsigned int ublk_req_build_flags(struct request *req); 241 234 242 235 static inline struct ublksrv_io_desc * ··· 682 673 } 683 674 684 675 static inline void ublk_init_req_ref(const struct ublk_queue *ubq, 685 - struct request *req) 676 + struct ublk_io *io) 686 677 { 687 - if (ublk_need_req_ref(ubq)) { 688 - struct ublk_rq_data *data = blk_mq_rq_to_pdu(req); 689 - 690 - refcount_set(&data->ref, 1); 691 - } 678 + if (ublk_need_req_ref(ubq)) 679 + refcount_set(&io->ref, 1); 692 680 } 693 681 694 682 static inline bool ublk_get_req_ref(const struct ublk_queue *ubq, 695 - struct request *req) 683 + struct ublk_io *io) 696 684 { 697 - if (ublk_need_req_ref(ubq)) { 698 - struct ublk_rq_data *data = blk_mq_rq_to_pdu(req); 699 - 700 - return refcount_inc_not_zero(&data->ref); 701 - } 685 + if (ublk_need_req_ref(ubq)) 686 + return refcount_inc_not_zero(&io->ref); 702 687 703 688 return true; 704 689 } 705 690 706 691 static inline void ublk_put_req_ref(const struct ublk_queue *ubq, 707 - struct request *req) 692 + struct ublk_io *io, struct request *req) 708 693 { 709 694 if (ublk_need_req_ref(ubq)) { 710 - struct ublk_rq_data *data = blk_mq_rq_to_pdu(req); 711 - 712 - if (refcount_dec_and_test(&data->ref)) 695 + if (refcount_dec_and_test(&io->ref)) 713 696 __ublk_complete_rq(req); 714 697 } else { 715 698 __ublk_complete_rq(req); ··· 1189 1188 blk_mq_end_request(rq, BLK_STS_IOERR); 1190 1189 } 1191 1190 1192 - static void ublk_auto_buf_reg_fallback(struct request *req) 1191 + static void 1192 + ublk_auto_buf_reg_fallback(const struct ublk_queue *ubq, struct ublk_io *io) 1193 1193 { 1194 - const struct ublk_queue *ubq = req->mq_hctx->driver_data; 1195 - struct ublksrv_io_desc *iod = ublk_get_iod(ubq, req->tag); 1196 - struct ublk_rq_data *data = blk_mq_rq_to_pdu(req); 1194 + unsigned tag = io - ubq->ios; 1195 + struct ublksrv_io_desc *iod = ublk_get_iod(ubq, tag); 1197 1196 1198 1197 iod->op_flags |= UBLK_IO_F_NEED_REG_BUF; 1199 - refcount_set(&data->ref, 1); 1198 + refcount_set(&io->ref, 1); 1200 1199 } 1201 1200 1202 - static bool ublk_auto_buf_reg(struct request *req, struct ublk_io *io, 1203 - unsigned int issue_flags) 1201 + static bool ublk_auto_buf_reg(const struct ublk_queue *ubq, struct request *req, 1202 + struct ublk_io *io, unsigned int issue_flags) 1204 1203 { 1205 1204 struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(io->cmd); 1206 - struct ublk_rq_data *data = blk_mq_rq_to_pdu(req); 1207 1205 int ret; 1208 1206 1209 1207 ret = io_buffer_register_bvec(io->cmd, req, ublk_io_release, 1210 1208 pdu->buf.index, issue_flags); 1211 1209 if (ret) { 1212 1210 if (pdu->buf.flags & UBLK_AUTO_BUF_REG_FALLBACK) { 1213 - ublk_auto_buf_reg_fallback(req); 1211 + ublk_auto_buf_reg_fallback(ubq, io); 1214 1212 return true; 1215 1213 } 1216 1214 blk_mq_end_request(req, BLK_STS_IOERR); 1217 1215 return false; 1218 1216 } 1219 1217 /* one extra reference is dropped by ublk_io_release */ 1220 - refcount_set(&data->ref, 2); 1218 + refcount_set(&io->ref, 2); 1221 1219 1222 - data->buf_ctx_handle = io_uring_cmd_ctx_handle(io->cmd); 1220 + io->buf_ctx_handle = io_uring_cmd_ctx_handle(io->cmd); 1223 1221 /* store buffer index in request payload */ 1224 - data->buf_index = pdu->buf.index; 1222 + io->buf_index = pdu->buf.index; 1225 1223 io->flags |= UBLK_IO_FLAG_AUTO_BUF_REG; 1226 1224 return true; 1227 1225 } ··· 1230 1230 unsigned int issue_flags) 1231 1231 { 1232 1232 if (ublk_support_auto_buf_reg(ubq) && ublk_rq_has_data(req)) 1233 - return ublk_auto_buf_reg(req, io, issue_flags); 1233 + return ublk_auto_buf_reg(ubq, req, io, issue_flags); 1234 1234 1235 - ublk_init_req_ref(ubq, req); 1235 + ublk_init_req_ref(ubq, io); 1236 1236 return true; 1237 1237 } 1238 1238 ··· 1503 1503 put_task_struct(io->task); 1504 1504 io->task = NULL; 1505 1505 } 1506 + 1507 + WARN_ON_ONCE(refcount_read(&io->ref)); 1506 1508 } 1507 1509 } 1508 1510 ··· 2009 2007 { 2010 2008 struct request *rq = priv; 2011 2009 struct ublk_queue *ubq = rq->mq_hctx->driver_data; 2010 + struct ublk_io *io = &ubq->ios[rq->tag]; 2012 2011 2013 - ublk_put_req_ref(ubq, rq); 2012 + ublk_put_req_ref(ubq, io, rq); 2014 2013 } 2015 2014 2016 2015 static int ublk_register_io_buf(struct io_uring_cmd *cmd, 2017 - const struct ublk_queue *ubq, unsigned int tag, 2016 + const struct ublk_queue *ubq, 2017 + struct ublk_io *io, 2018 2018 unsigned int index, unsigned int issue_flags) 2019 2019 { 2020 2020 struct ublk_device *ub = cmd->file->private_data; ··· 2026 2022 if (!ublk_support_zero_copy(ubq)) 2027 2023 return -EINVAL; 2028 2024 2029 - req = __ublk_check_and_get_req(ub, ubq, tag, 0); 2025 + req = __ublk_check_and_get_req(ub, ubq, io, 0); 2030 2026 if (!req) 2031 2027 return -EINVAL; 2032 2028 2033 2029 ret = io_buffer_register_bvec(cmd, req, ublk_io_release, index, 2034 2030 issue_flags); 2035 2031 if (ret) { 2036 - ublk_put_req_ref(ubq, req); 2032 + ublk_put_req_ref(ubq, io, req); 2037 2033 return ret; 2038 2034 } 2039 2035 ··· 2140 2136 * this ublk request gets stuck. 2141 2137 */ 2142 2138 if (io->flags & UBLK_IO_FLAG_AUTO_BUF_REG) { 2143 - struct ublk_rq_data *data = blk_mq_rq_to_pdu(req); 2144 - 2145 - if (data->buf_ctx_handle == io_uring_cmd_ctx_handle(cmd)) 2146 - io_buffer_unregister_bvec(cmd, data->buf_index, 2139 + if (io->buf_ctx_handle == io_uring_cmd_ctx_handle(cmd)) 2140 + io_buffer_unregister_bvec(cmd, io->buf_index, 2147 2141 issue_flags); 2148 2142 io->flags &= ~UBLK_IO_FLAG_AUTO_BUF_REG; 2149 2143 } ··· 2161 2159 req->__sector = ub_cmd->zone_append_lba; 2162 2160 2163 2161 if (likely(!blk_should_fake_timeout(req->q))) 2164 - ublk_put_req_ref(ubq, req); 2162 + ublk_put_req_ref(ubq, io, req); 2165 2163 2166 2164 return 0; 2167 2165 } ··· 2240 2238 ret = -EINVAL; 2241 2239 switch (_IOC_NR(cmd_op)) { 2242 2240 case UBLK_IO_REGISTER_IO_BUF: 2243 - return ublk_register_io_buf(cmd, ubq, tag, ub_cmd->addr, issue_flags); 2241 + return ublk_register_io_buf(cmd, ubq, io, ub_cmd->addr, issue_flags); 2244 2242 case UBLK_IO_UNREGISTER_IO_BUF: 2245 2243 return ublk_unregister_io_buf(cmd, ubq, ub_cmd->addr, issue_flags); 2246 2244 case UBLK_IO_FETCH_REQ: ··· 2280 2278 } 2281 2279 2282 2280 static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub, 2283 - const struct ublk_queue *ubq, int tag, size_t offset) 2281 + const struct ublk_queue *ubq, struct ublk_io *io, size_t offset) 2284 2282 { 2283 + unsigned tag = io - ubq->ios; 2285 2284 struct request *req; 2286 2285 2286 + /* 2287 + * can't use io->req in case of concurrent UBLK_IO_COMMIT_AND_FETCH_REQ, 2288 + * which would overwrite it with io->cmd 2289 + */ 2287 2290 req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag); 2288 2291 if (!req) 2289 2292 return NULL; 2290 2293 2291 - if (!ublk_get_req_ref(ubq, req)) 2294 + if (!ublk_get_req_ref(ubq, io)) 2292 2295 return NULL; 2293 2296 2294 2297 if (unlikely(!blk_mq_request_started(req) || req->tag != tag)) ··· 2307 2300 2308 2301 return req; 2309 2302 fail_put: 2310 - ublk_put_req_ref(ubq, req); 2303 + ublk_put_req_ref(ubq, io, req); 2311 2304 return NULL; 2312 2305 } 2313 2306 ··· 2374 2367 } 2375 2368 2376 2369 static struct request *ublk_check_and_get_req(struct kiocb *iocb, 2377 - struct iov_iter *iter, size_t *off, int dir) 2370 + struct iov_iter *iter, size_t *off, int dir, 2371 + struct ublk_io **io) 2378 2372 { 2379 2373 struct ublk_device *ub = iocb->ki_filp->private_data; 2380 2374 struct ublk_queue *ubq; ··· 2409 2401 if (tag >= ubq->q_depth) 2410 2402 return ERR_PTR(-EINVAL); 2411 2403 2412 - req = __ublk_check_and_get_req(ub, ubq, tag, buf_off); 2404 + *io = &ubq->ios[tag]; 2405 + req = __ublk_check_and_get_req(ub, ubq, *io, buf_off); 2413 2406 if (!req) 2414 2407 return ERR_PTR(-EINVAL); 2415 2408 ··· 2423 2414 *off = buf_off; 2424 2415 return req; 2425 2416 fail: 2426 - ublk_put_req_ref(ubq, req); 2417 + ublk_put_req_ref(ubq, *io, req); 2427 2418 return ERR_PTR(-EACCES); 2428 2419 } 2429 2420 ··· 2431 2422 { 2432 2423 struct ublk_queue *ubq; 2433 2424 struct request *req; 2425 + struct ublk_io *io; 2434 2426 size_t buf_off; 2435 2427 size_t ret; 2436 2428 2437 - req = ublk_check_and_get_req(iocb, to, &buf_off, ITER_DEST); 2429 + req = ublk_check_and_get_req(iocb, to, &buf_off, ITER_DEST, &io); 2438 2430 if (IS_ERR(req)) 2439 2431 return PTR_ERR(req); 2440 2432 2441 2433 ret = ublk_copy_user_pages(req, buf_off, to, ITER_DEST); 2442 2434 ubq = req->mq_hctx->driver_data; 2443 - ublk_put_req_ref(ubq, req); 2435 + ublk_put_req_ref(ubq, io, req); 2444 2436 2445 2437 return ret; 2446 2438 } ··· 2450 2440 { 2451 2441 struct ublk_queue *ubq; 2452 2442 struct request *req; 2443 + struct ublk_io *io; 2453 2444 size_t buf_off; 2454 2445 size_t ret; 2455 2446 2456 - req = ublk_check_and_get_req(iocb, from, &buf_off, ITER_SOURCE); 2447 + req = ublk_check_and_get_req(iocb, from, &buf_off, ITER_SOURCE, &io); 2457 2448 if (IS_ERR(req)) 2458 2449 return PTR_ERR(req); 2459 2450 2460 2451 ret = ublk_copy_user_pages(req, buf_off, from, ITER_SOURCE); 2461 2452 ubq = req->mq_hctx->driver_data; 2462 - ublk_put_req_ref(ubq, req); 2453 + ublk_put_req_ref(ubq, io, req); 2463 2454 2464 2455 return ret; 2465 2456 } ··· 2485 2474 struct ublk_io *io = &ubq->ios[i]; 2486 2475 if (io->task) 2487 2476 put_task_struct(io->task); 2477 + WARN_ON_ONCE(refcount_read(&io->ref)); 2488 2478 } 2489 2479 2490 2480 if (ubq->io_cmd_buf) ··· 2638 2626 ub->tag_set.nr_hw_queues = ub->dev_info.nr_hw_queues; 2639 2627 ub->tag_set.queue_depth = ub->dev_info.queue_depth; 2640 2628 ub->tag_set.numa_node = NUMA_NO_NODE; 2641 - ub->tag_set.cmd_size = sizeof(struct ublk_rq_data); 2642 2629 ub->tag_set.driver_data = ub; 2643 2630 return blk_mq_alloc_tag_set(&ub->tag_set); 2644 2631 }