Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

io_uring: remove looping around handling traditional task_work

A previous commit added looping around handling traditional task_work
as an optimization, and while that may seem like a good idea, it's also
possible to run into application starvation doing so. If the task_work
generation is bursty, we can get very deep task_work queues, and we can
end up looping in here for a very long time.

One immediately observable problem with that is handling network traffic
using provided buffers, where flooding incoming traffic and looping
task_work handling will very quickly lead to buffer starvation as we
keep running task_work rather than returning to the application so it
can handle the associated CQEs and also provide buffers back.

Fixes: 3a0c037b0e16 ("io_uring: batch task_work")
Signed-off-by: Jens Axboe <axboe@kernel.dk>

+7 -38
+7 -38
io_uring/io_uring.c
··· 1175 1175 1176 1176 static unsigned int handle_tw_list(struct llist_node *node, 1177 1177 struct io_ring_ctx **ctx, 1178 - struct io_tw_state *ts, 1179 - struct llist_node *last) 1178 + struct io_tw_state *ts) 1180 1179 { 1181 1180 unsigned int count = 0; 1182 1181 1183 - while (node && node != last) { 1182 + do { 1184 1183 struct llist_node *next = node->next; 1185 1184 struct io_kiocb *req = container_of(node, struct io_kiocb, 1186 1185 io_task_work.node); ··· 1203 1204 *ctx = NULL; 1204 1205 cond_resched(); 1205 1206 } 1206 - } 1207 + } while (node); 1207 1208 1208 1209 return count; 1209 1210 } ··· 1220 1221 struct llist_node *new) 1221 1222 { 1222 1223 return xchg(&head->first, new); 1223 - } 1224 - 1225 - /** 1226 - * io_llist_cmpxchg - possibly swap all entries in a lock-less list 1227 - * @head: the head of lock-less list to delete all entries 1228 - * @old: expected old value of the first entry of the list 1229 - * @new: new entry as the head of the list 1230 - * 1231 - * perform a cmpxchg on the first entry of the list. 1232 - */ 1233 - 1234 - static inline struct llist_node *io_llist_cmpxchg(struct llist_head *head, 1235 - struct llist_node *old, 1236 - struct llist_node *new) 1237 - { 1238 - return cmpxchg(&head->first, old, new); 1239 1224 } 1240 1225 1241 1226 static __cold void io_fallback_tw(struct io_uring_task *tctx, bool sync) ··· 1256 1273 struct io_ring_ctx *ctx = NULL; 1257 1274 struct io_uring_task *tctx = container_of(cb, struct io_uring_task, 1258 1275 task_work); 1259 - struct llist_node fake = {}; 1260 1276 struct llist_node *node; 1261 - unsigned int loops = 0; 1262 1277 unsigned int count = 0; 1263 1278 1264 1279 if (unlikely(current->flags & PF_EXITING)) { ··· 1264 1283 return; 1265 1284 } 1266 1285 1267 - do { 1268 - loops++; 1269 - node = io_llist_xchg(&tctx->task_list, &fake); 1270 - count += handle_tw_list(node, &ctx, &ts, &fake); 1271 - 1272 - /* skip expensive cmpxchg if there are items in the list */ 1273 - if (READ_ONCE(tctx->task_list.first) != &fake) 1274 - continue; 1275 - if (ts.locked && !wq_list_empty(&ctx->submit_state.compl_reqs)) { 1276 - io_submit_flush_completions(ctx); 1277 - if (READ_ONCE(tctx->task_list.first) != &fake) 1278 - continue; 1279 - } 1280 - node = io_llist_cmpxchg(&tctx->task_list, &fake, NULL); 1281 - } while (node != &fake); 1286 + node = llist_del_all(&tctx->task_list); 1287 + if (node) 1288 + count = handle_tw_list(node, &ctx, &ts); 1282 1289 1283 1290 ctx_flush_and_put(ctx, &ts); 1284 1291 ··· 1274 1305 if (unlikely(atomic_read(&tctx->in_cancel))) 1275 1306 io_uring_drop_tctx_refs(current); 1276 1307 1277 - trace_io_uring_task_work_run(tctx, count, loops); 1308 + trace_io_uring_task_work_run(tctx, count, 1); 1278 1309 } 1279 1310 1280 1311 static inline void io_req_local_work_add(struct io_kiocb *req, unsigned flags)