Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

io_uring/napi: add static napi tracking strategy

Add the static napi tracking strategy. That allows the user to manually
manage the napi ids list for busy polling, and eliminate the overhead of
dynamically updating the list from the fast path.

Signed-off-by: Olivier Langlois <olivier@trillion01.com>
Link: https://lore.kernel.org/r/96943de14968c35a5c599352259ad98f3c0770ba.1728828877.git.olivier@trillion01.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Olivier Langlois and committed by
Jens Axboe
6bf90bd8 71afd926

+160 -27
+1 -1
include/linux/io_uring_types.h
··· 408 408 /* napi busy poll default timeout */ 409 409 ktime_t napi_busy_poll_dt; 410 410 bool napi_prefer_busy_poll; 411 - bool napi_enabled; 411 + u8 napi_track_mode; 412 412 413 413 DECLARE_HASHTABLE(napi_ht, 4); 414 414 #endif
+30 -2
include/uapi/linux/io_uring.h
··· 790 790 __u32 resv[8]; 791 791 }; 792 792 793 + enum io_uring_napi_op { 794 + /* register/ungister backward compatible opcode */ 795 + IO_URING_NAPI_REGISTER_OP = 0, 796 + 797 + /* opcodes to update napi_list when static tracking is used */ 798 + IO_URING_NAPI_STATIC_ADD_ID = 1, 799 + IO_URING_NAPI_STATIC_DEL_ID = 2 800 + }; 801 + 802 + enum io_uring_napi_tracking_strategy { 803 + /* value must be 0 for backward compatibility */ 804 + IO_URING_NAPI_TRACKING_DYNAMIC = 0, 805 + IO_URING_NAPI_TRACKING_STATIC = 1, 806 + IO_URING_NAPI_TRACKING_INACTIVE = 255 807 + }; 808 + 793 809 /* argument for IORING_(UN)REGISTER_NAPI */ 794 810 struct io_uring_napi { 795 811 __u32 busy_poll_to; 796 812 __u8 prefer_busy_poll; 797 - __u8 pad[3]; 798 - __u64 resv; 813 + 814 + /* a io_uring_napi_op value */ 815 + __u8 opcode; 816 + __u8 pad[2]; 817 + 818 + /* 819 + * for IO_URING_NAPI_REGISTER_OP, it is a 820 + * io_uring_napi_tracking_strategy value. 821 + * 822 + * for IO_URING_NAPI_STATIC_ADD_ID/IO_URING_NAPI_STATIC_DEL_ID 823 + * it is the napi id to add/del from napi_list. 824 + */ 825 + __u32 op_param; 826 + __u32 resv; 799 827 }; 800 828 801 829 /*
+41 -13
io_uring/fdinfo.c
··· 46 46 return 0; 47 47 } 48 48 49 + #ifdef CONFIG_NET_RX_BUSY_POLL 50 + static __cold void common_tracking_show_fdinfo(struct io_ring_ctx *ctx, 51 + struct seq_file *m, 52 + const char *tracking_strategy) 53 + { 54 + seq_puts(m, "NAPI:\tenabled\n"); 55 + seq_printf(m, "napi tracking:\t%s\n", tracking_strategy); 56 + seq_printf(m, "napi_busy_poll_dt:\t%llu\n", ctx->napi_busy_poll_dt); 57 + if (ctx->napi_prefer_busy_poll) 58 + seq_puts(m, "napi_prefer_busy_poll:\ttrue\n"); 59 + else 60 + seq_puts(m, "napi_prefer_busy_poll:\tfalse\n"); 61 + } 62 + 63 + static __cold void napi_show_fdinfo(struct io_ring_ctx *ctx, 64 + struct seq_file *m) 65 + { 66 + unsigned int mode = READ_ONCE(ctx->napi_track_mode); 67 + 68 + switch (mode) { 69 + case IO_URING_NAPI_TRACKING_INACTIVE: 70 + seq_puts(m, "NAPI:\tdisabled\n"); 71 + break; 72 + case IO_URING_NAPI_TRACKING_DYNAMIC: 73 + common_tracking_show_fdinfo(ctx, m, "dynamic"); 74 + break; 75 + case IO_URING_NAPI_TRACKING_STATIC: 76 + common_tracking_show_fdinfo(ctx, m, "static"); 77 + break; 78 + default: 79 + seq_printf(m, "NAPI:\tunknown mode (%u)\n", mode); 80 + } 81 + } 82 + #else 83 + static inline void napi_show_fdinfo(struct io_ring_ctx *ctx, 84 + struct seq_file *m) 85 + { 86 + } 87 + #endif 88 + 49 89 /* 50 90 * Caller holds a reference to the file already, we don't need to do 51 91 * anything else to get an extra reference. ··· 259 219 260 220 } 261 221 spin_unlock(&ctx->completion_lock); 262 - 263 - #ifdef CONFIG_NET_RX_BUSY_POLL 264 - if (ctx->napi_enabled) { 265 - seq_puts(m, "NAPI:\tenabled\n"); 266 - seq_printf(m, "napi_busy_poll_dt:\t%llu\n", ctx->napi_busy_poll_dt); 267 - if (ctx->napi_prefer_busy_poll) 268 - seq_puts(m, "napi_prefer_busy_poll:\ttrue\n"); 269 - else 270 - seq_puts(m, "napi_prefer_busy_poll:\tfalse\n"); 271 - } else { 272 - seq_puts(m, "NAPI:\tdisabled\n"); 273 - } 274 - #endif 222 + napi_show_fdinfo(ctx, m); 275 223 } 276 224 #endif
+87 -10
io_uring/napi.c
··· 81 81 return 0; 82 82 } 83 83 84 + static int __io_napi_del_id(struct io_ring_ctx *ctx, unsigned int napi_id) 85 + { 86 + struct hlist_head *hash_list; 87 + struct io_napi_entry *e; 88 + 89 + /* Non-NAPI IDs can be rejected. */ 90 + if (napi_id < MIN_NAPI_ID) 91 + return -EINVAL; 92 + 93 + hash_list = &ctx->napi_ht[hash_min(napi_id, HASH_BITS(ctx->napi_ht))]; 94 + guard(spinlock)(&ctx->napi_lock); 95 + e = io_napi_hash_find(hash_list, napi_id); 96 + if (!e) 97 + return -ENOENT; 98 + 99 + list_del_rcu(&e->list); 100 + hash_del_rcu(&e->node); 101 + kfree_rcu(e, rcu); 102 + return 0; 103 + } 104 + 84 105 static void __io_napi_remove_stale(struct io_ring_ctx *ctx) 85 106 { 86 107 struct io_napi_entry *e; ··· 157 136 return false; 158 137 } 159 138 160 - static bool __io_napi_do_busy_loop(struct io_ring_ctx *ctx, 161 - bool (*loop_end)(void *, unsigned long), 162 - void *loop_end_arg) 139 + /* 140 + * never report stale entries 141 + */ 142 + static bool static_tracking_do_busy_loop(struct io_ring_ctx *ctx, 143 + bool (*loop_end)(void *, unsigned long), 144 + void *loop_end_arg) 145 + { 146 + struct io_napi_entry *e; 147 + 148 + list_for_each_entry_rcu(e, &ctx->napi_list, list) 149 + napi_busy_loop_rcu(e->napi_id, loop_end, loop_end_arg, 150 + ctx->napi_prefer_busy_poll, BUSY_POLL_BUDGET); 151 + return false; 152 + } 153 + 154 + static bool 155 + dynamic_tracking_do_busy_loop(struct io_ring_ctx *ctx, 156 + bool (*loop_end)(void *, unsigned long), 157 + void *loop_end_arg) 163 158 { 164 159 struct io_napi_entry *e; 165 160 bool is_stale = false; ··· 189 152 } 190 153 191 154 return is_stale; 155 + } 156 + 157 + static inline bool 158 + __io_napi_do_busy_loop(struct io_ring_ctx *ctx, 159 + bool (*loop_end)(void *, unsigned long), 160 + void *loop_end_arg) 161 + { 162 + if (READ_ONCE(ctx->napi_track_mode) == IO_URING_NAPI_TRACKING_STATIC) 163 + return static_tracking_do_busy_loop(ctx, loop_end, loop_end_arg); 164 + return dynamic_tracking_do_busy_loop(ctx, loop_end, loop_end_arg); 192 165 } 193 166 194 167 static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx, ··· 242 195 spin_lock_init(&ctx->napi_lock); 243 196 ctx->napi_prefer_busy_poll = false; 244 197 ctx->napi_busy_poll_dt = ns_to_ktime(sys_dt); 198 + ctx->napi_track_mode = IO_URING_NAPI_TRACKING_INACTIVE; 245 199 } 246 200 247 201 /* ··· 263 215 INIT_LIST_HEAD_RCU(&ctx->napi_list); 264 216 } 265 217 218 + static int io_napi_register_napi(struct io_ring_ctx *ctx, 219 + struct io_uring_napi *napi) 220 + { 221 + switch (napi->op_param) { 222 + case IO_URING_NAPI_TRACKING_DYNAMIC: 223 + case IO_URING_NAPI_TRACKING_STATIC: 224 + break; 225 + default: 226 + return -EINVAL; 227 + } 228 + /* clean the napi list for new settings */ 229 + io_napi_free(ctx); 230 + WRITE_ONCE(ctx->napi_track_mode, napi->op_param); 231 + WRITE_ONCE(ctx->napi_busy_poll_dt, napi->busy_poll_to * NSEC_PER_USEC); 232 + WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi->prefer_busy_poll); 233 + return 0; 234 + } 235 + 266 236 /* 267 237 * io_napi_register() - Register napi with io-uring 268 238 * @ctx: pointer to io-uring context structure ··· 292 226 { 293 227 const struct io_uring_napi curr = { 294 228 .busy_poll_to = ktime_to_us(ctx->napi_busy_poll_dt), 295 - .prefer_busy_poll = ctx->napi_prefer_busy_poll 229 + .prefer_busy_poll = ctx->napi_prefer_busy_poll, 230 + .op_param = ctx->napi_track_mode 296 231 }; 297 232 struct io_uring_napi napi; 298 233 ··· 301 234 return -EINVAL; 302 235 if (copy_from_user(&napi, arg, sizeof(napi))) 303 236 return -EFAULT; 304 - if (napi.pad[0] || napi.pad[1] || napi.pad[2] || napi.resv) 237 + if (napi.pad[0] || napi.pad[1] || napi.resv) 305 238 return -EINVAL; 306 239 307 240 if (copy_to_user(arg, &curr, sizeof(curr))) 308 241 return -EFAULT; 309 242 310 - WRITE_ONCE(ctx->napi_busy_poll_dt, napi.busy_poll_to * NSEC_PER_USEC); 311 - WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi.prefer_busy_poll); 312 - WRITE_ONCE(ctx->napi_enabled, true); 313 - return 0; 243 + switch (napi.opcode) { 244 + case IO_URING_NAPI_REGISTER_OP: 245 + return io_napi_register_napi(ctx, &napi); 246 + case IO_URING_NAPI_STATIC_ADD_ID: 247 + if (curr.op_param != IO_URING_NAPI_TRACKING_STATIC) 248 + return -EINVAL; 249 + return __io_napi_add_id(ctx, napi.op_param); 250 + case IO_URING_NAPI_STATIC_DEL_ID: 251 + if (curr.op_param != IO_URING_NAPI_TRACKING_STATIC) 252 + return -EINVAL; 253 + return __io_napi_del_id(ctx, napi.op_param); 254 + default: 255 + return -EINVAL; 256 + } 314 257 } 315 258 316 259 /* ··· 343 266 344 267 WRITE_ONCE(ctx->napi_busy_poll_dt, 0); 345 268 WRITE_ONCE(ctx->napi_prefer_busy_poll, false); 346 - WRITE_ONCE(ctx->napi_enabled, false); 269 + WRITE_ONCE(ctx->napi_track_mode, IO_URING_NAPI_TRACKING_INACTIVE); 347 270 return 0; 348 271 } 349 272
+1 -1
io_uring/napi.h
··· 44 44 struct io_ring_ctx *ctx = req->ctx; 45 45 struct socket *sock; 46 46 47 - if (!READ_ONCE(ctx->napi_enabled)) 47 + if (READ_ONCE(ctx->napi_track_mode) != IO_URING_NAPI_TRACKING_DYNAMIC) 48 48 return; 49 49 50 50 sock = sock_from_file(req->file);