Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

io_uring: move fdinfo helpers to its own file

This also means moving a bit more of the fixed file handling to the
filetable side, which makes sense separately too.

Signed-off-by: Jens Axboe <axboe@kernel.dk>

+230 -208
+1 -1
io_uring/Makefile
··· 6 6 sync.o advise.o filetable.o \ 7 7 openclose.o uring_cmd.o epoll.o \ 8 8 statx.o net.o msg_ring.o timeout.o \ 9 - sqpoll.o 9 + sqpoll.o fdinfo.o 10 10 obj-$(CONFIG_IO_WQ) += io-wq.o
+191
io_uring/fdinfo.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <linux/kernel.h> 3 + #include <linux/errno.h> 4 + #include <linux/fs.h> 5 + #include <linux/file.h> 6 + #include <linux/proc_fs.h> 7 + #include <linux/seq_file.h> 8 + #include <linux/io_uring.h> 9 + 10 + #include <uapi/linux/io_uring.h> 11 + 12 + #include "io_uring_types.h" 13 + #include "io_uring.h" 14 + #include "sqpoll.h" 15 + #include "fdinfo.h" 16 + 17 + #ifdef CONFIG_PROC_FS 18 + static __cold int io_uring_show_cred(struct seq_file *m, unsigned int id, 19 + const struct cred *cred) 20 + { 21 + struct user_namespace *uns = seq_user_ns(m); 22 + struct group_info *gi; 23 + kernel_cap_t cap; 24 + unsigned __capi; 25 + int g; 26 + 27 + seq_printf(m, "%5d\n", id); 28 + seq_put_decimal_ull(m, "\tUid:\t", from_kuid_munged(uns, cred->uid)); 29 + seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->euid)); 30 + seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->suid)); 31 + seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->fsuid)); 32 + seq_put_decimal_ull(m, "\n\tGid:\t", from_kgid_munged(uns, cred->gid)); 33 + seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->egid)); 34 + seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->sgid)); 35 + seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->fsgid)); 36 + seq_puts(m, "\n\tGroups:\t"); 37 + gi = cred->group_info; 38 + for (g = 0; g < gi->ngroups; g++) { 39 + seq_put_decimal_ull(m, g ? " " : "", 40 + from_kgid_munged(uns, gi->gid[g])); 41 + } 42 + seq_puts(m, "\n\tCapEff:\t"); 43 + cap = cred->cap_effective; 44 + CAP_FOR_EACH_U32(__capi) 45 + seq_put_hex_ll(m, NULL, cap.cap[CAP_LAST_U32 - __capi], 8); 46 + seq_putc(m, '\n'); 47 + return 0; 48 + } 49 + 50 + static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, 51 + struct seq_file *m) 52 + { 53 + struct io_sq_data *sq = NULL; 54 + struct io_overflow_cqe *ocqe; 55 + struct io_rings *r = ctx->rings; 56 + unsigned int sq_mask = ctx->sq_entries - 1, cq_mask = ctx->cq_entries - 1; 57 + unsigned int sq_head = READ_ONCE(r->sq.head); 58 + unsigned int sq_tail = READ_ONCE(r->sq.tail); 59 + unsigned int cq_head = READ_ONCE(r->cq.head); 60 + unsigned int cq_tail = READ_ONCE(r->cq.tail); 61 + unsigned int cq_shift = 0; 62 + unsigned int sq_entries, cq_entries; 63 + bool has_lock; 64 + bool is_cqe32 = (ctx->flags & IORING_SETUP_CQE32); 65 + unsigned int i; 66 + 67 + if (is_cqe32) 68 + cq_shift = 1; 69 + 70 + /* 71 + * we may get imprecise sqe and cqe info if uring is actively running 72 + * since we get cached_sq_head and cached_cq_tail without uring_lock 73 + * and sq_tail and cq_head are changed by userspace. But it's ok since 74 + * we usually use these info when it is stuck. 75 + */ 76 + seq_printf(m, "SqMask:\t0x%x\n", sq_mask); 77 + seq_printf(m, "SqHead:\t%u\n", sq_head); 78 + seq_printf(m, "SqTail:\t%u\n", sq_tail); 79 + seq_printf(m, "CachedSqHead:\t%u\n", ctx->cached_sq_head); 80 + seq_printf(m, "CqMask:\t0x%x\n", cq_mask); 81 + seq_printf(m, "CqHead:\t%u\n", cq_head); 82 + seq_printf(m, "CqTail:\t%u\n", cq_tail); 83 + seq_printf(m, "CachedCqTail:\t%u\n", ctx->cached_cq_tail); 84 + seq_printf(m, "SQEs:\t%u\n", sq_tail - ctx->cached_sq_head); 85 + sq_entries = min(sq_tail - sq_head, ctx->sq_entries); 86 + for (i = 0; i < sq_entries; i++) { 87 + unsigned int entry = i + sq_head; 88 + unsigned int sq_idx = READ_ONCE(ctx->sq_array[entry & sq_mask]); 89 + struct io_uring_sqe *sqe; 90 + 91 + if (sq_idx > sq_mask) 92 + continue; 93 + sqe = &ctx->sq_sqes[sq_idx]; 94 + seq_printf(m, "%5u: opcode:%d, fd:%d, flags:%x, user_data:%llu\n", 95 + sq_idx, sqe->opcode, sqe->fd, sqe->flags, 96 + sqe->user_data); 97 + } 98 + seq_printf(m, "CQEs:\t%u\n", cq_tail - cq_head); 99 + cq_entries = min(cq_tail - cq_head, ctx->cq_entries); 100 + for (i = 0; i < cq_entries; i++) { 101 + unsigned int entry = i + cq_head; 102 + struct io_uring_cqe *cqe = &r->cqes[(entry & cq_mask) << cq_shift]; 103 + 104 + if (!is_cqe32) { 105 + seq_printf(m, "%5u: user_data:%llu, res:%d, flag:%x\n", 106 + entry & cq_mask, cqe->user_data, cqe->res, 107 + cqe->flags); 108 + } else { 109 + seq_printf(m, "%5u: user_data:%llu, res:%d, flag:%x, " 110 + "extra1:%llu, extra2:%llu\n", 111 + entry & cq_mask, cqe->user_data, cqe->res, 112 + cqe->flags, cqe->big_cqe[0], cqe->big_cqe[1]); 113 + } 114 + } 115 + 116 + /* 117 + * Avoid ABBA deadlock between the seq lock and the io_uring mutex, 118 + * since fdinfo case grabs it in the opposite direction of normal use 119 + * cases. If we fail to get the lock, we just don't iterate any 120 + * structures that could be going away outside the io_uring mutex. 121 + */ 122 + has_lock = mutex_trylock(&ctx->uring_lock); 123 + 124 + if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL)) { 125 + sq = ctx->sq_data; 126 + if (!sq->thread) 127 + sq = NULL; 128 + } 129 + 130 + seq_printf(m, "SqThread:\t%d\n", sq ? task_pid_nr(sq->thread) : -1); 131 + seq_printf(m, "SqThreadCpu:\t%d\n", sq ? task_cpu(sq->thread) : -1); 132 + seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files); 133 + for (i = 0; has_lock && i < ctx->nr_user_files; i++) { 134 + struct file *f = io_file_from_index(&ctx->file_table, i); 135 + 136 + if (f) 137 + seq_printf(m, "%5u: %s\n", i, file_dentry(f)->d_iname); 138 + else 139 + seq_printf(m, "%5u: <none>\n", i); 140 + } 141 + seq_printf(m, "UserBufs:\t%u\n", ctx->nr_user_bufs); 142 + for (i = 0; has_lock && i < ctx->nr_user_bufs; i++) { 143 + struct io_mapped_ubuf *buf = ctx->user_bufs[i]; 144 + unsigned int len = buf->ubuf_end - buf->ubuf; 145 + 146 + seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf, len); 147 + } 148 + if (has_lock && !xa_empty(&ctx->personalities)) { 149 + unsigned long index; 150 + const struct cred *cred; 151 + 152 + seq_printf(m, "Personalities:\n"); 153 + xa_for_each(&ctx->personalities, index, cred) 154 + io_uring_show_cred(m, index, cred); 155 + } 156 + if (has_lock) 157 + mutex_unlock(&ctx->uring_lock); 158 + 159 + seq_puts(m, "PollList:\n"); 160 + spin_lock(&ctx->completion_lock); 161 + for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) { 162 + struct hlist_head *list = &ctx->cancel_hash[i]; 163 + struct io_kiocb *req; 164 + 165 + hlist_for_each_entry(req, list, hash_node) 166 + seq_printf(m, " op=%d, task_works=%d\n", req->opcode, 167 + task_work_pending(req->task)); 168 + } 169 + 170 + seq_puts(m, "CqOverflowList:\n"); 171 + list_for_each_entry(ocqe, &ctx->cq_overflow_list, list) { 172 + struct io_uring_cqe *cqe = &ocqe->cqe; 173 + 174 + seq_printf(m, " user_data=%llu, res=%d, flags=%x\n", 175 + cqe->user_data, cqe->res, cqe->flags); 176 + 177 + } 178 + 179 + spin_unlock(&ctx->completion_lock); 180 + } 181 + 182 + __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f) 183 + { 184 + struct io_ring_ctx *ctx = f->private_data; 185 + 186 + if (percpu_ref_tryget(&ctx->refs)) { 187 + __io_uring_show_fdinfo(ctx, m); 188 + percpu_ref_put(&ctx->refs); 189 + } 190 + } 191 + #endif
+3
io_uring/fdinfo.h
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + void io_uring_show_fdinfo(struct seq_file *m, struct file *f);
+19
io_uring/filetable.h
··· 36 36 void io_free_file_tables(struct io_file_table *table); 37 37 int io_file_bitmap_get(struct io_ring_ctx *ctx); 38 38 39 + unsigned int io_file_get_flags(struct file *file); 40 + 39 41 static inline void io_file_bitmap_clear(struct io_file_table *table, int bit) 40 42 { 41 43 __clear_bit(bit, table->bitmap); ··· 55 53 io_fixed_file_slot(struct io_file_table *table, unsigned i) 56 54 { 57 55 return &table->files[i]; 56 + } 57 + 58 + static inline struct file *io_file_from_index(struct io_file_table *table, 59 + int index) 60 + { 61 + struct io_fixed_file *slot = io_fixed_file_slot(table, index); 62 + 63 + return (struct file *) (slot->file_ptr & FFS_MASK); 64 + } 65 + 66 + static inline void io_fixed_file_set(struct io_fixed_file *file_slot, 67 + struct file *file) 68 + { 69 + unsigned long file_ptr = (unsigned long) file; 70 + 71 + file_ptr |= io_file_get_flags(file); 72 + file_slot->file_ptr = file_ptr; 58 73 } 59 74 60 75 #endif
+3 -207
io_uring/io_uring.c
··· 93 93 #include "io_uring.h" 94 94 #include "refs.h" 95 95 #include "sqpoll.h" 96 + #include "fdinfo.h" 96 97 97 98 #include "xattr.h" 98 99 #include "nop.h" ··· 138 137 139 138 #define IO_TCTX_REFS_CACHE_NR (1U << 10) 140 139 141 - struct io_mapped_ubuf { 142 - u64 ubuf; 143 - u64 ubuf_end; 144 - unsigned int nr_bvecs; 145 - unsigned long acct_pages; 146 - struct bio_vec bvec[]; 147 - }; 148 - 149 140 struct io_ring_ctx; 150 - 151 - struct io_overflow_cqe { 152 - struct list_head list; 153 - struct io_uring_cqe cqe; 154 - }; 155 141 156 142 struct io_rsrc_put { 157 143 struct list_head list; ··· 2313 2325 * any file. For now, just ensure that anything potentially problematic is done 2314 2326 * inline. 2315 2327 */ 2316 - static unsigned int io_file_get_flags(struct file *file) 2328 + unsigned int io_file_get_flags(struct file *file) 2317 2329 { 2318 2330 umode_t mode = file_inode(file)->i_mode; 2319 2331 unsigned int res = 0; ··· 4798 4810 io_req_task_queue_fail(req, ret); 4799 4811 } 4800 4812 4801 - static inline struct file *io_file_from_index(struct io_ring_ctx *ctx, 4802 - int index) 4803 - { 4804 - struct io_fixed_file *slot = io_fixed_file_slot(&ctx->file_table, index); 4805 - 4806 - return (struct file *) (slot->file_ptr & FFS_MASK); 4807 - } 4808 - 4809 - static void io_fixed_file_set(struct io_fixed_file *file_slot, struct file *file) 4810 - { 4811 - unsigned long file_ptr = (unsigned long) file; 4812 - 4813 - file_ptr |= io_file_get_flags(file); 4814 - file_slot->file_ptr = file_ptr; 4815 - } 4816 - 4817 4813 inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd, 4818 4814 unsigned int issue_flags) 4819 4815 { ··· 5639 5667 int i; 5640 5668 5641 5669 for (i = 0; i < ctx->nr_user_files; i++) { 5642 - struct file *file = io_file_from_index(ctx, i); 5670 + struct file *file = io_file_from_index(&ctx->file_table, i); 5643 5671 5644 5672 if (!file) 5645 5673 continue; ··· 7748 7776 fdput(f); 7749 7777 return ret; 7750 7778 } 7751 - 7752 - #ifdef CONFIG_PROC_FS 7753 - static __cold int io_uring_show_cred(struct seq_file *m, unsigned int id, 7754 - const struct cred *cred) 7755 - { 7756 - struct user_namespace *uns = seq_user_ns(m); 7757 - struct group_info *gi; 7758 - kernel_cap_t cap; 7759 - unsigned __capi; 7760 - int g; 7761 - 7762 - seq_printf(m, "%5d\n", id); 7763 - seq_put_decimal_ull(m, "\tUid:\t", from_kuid_munged(uns, cred->uid)); 7764 - seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->euid)); 7765 - seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->suid)); 7766 - seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->fsuid)); 7767 - seq_put_decimal_ull(m, "\n\tGid:\t", from_kgid_munged(uns, cred->gid)); 7768 - seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->egid)); 7769 - seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->sgid)); 7770 - seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->fsgid)); 7771 - seq_puts(m, "\n\tGroups:\t"); 7772 - gi = cred->group_info; 7773 - for (g = 0; g < gi->ngroups; g++) { 7774 - seq_put_decimal_ull(m, g ? " " : "", 7775 - from_kgid_munged(uns, gi->gid[g])); 7776 - } 7777 - seq_puts(m, "\n\tCapEff:\t"); 7778 - cap = cred->cap_effective; 7779 - CAP_FOR_EACH_U32(__capi) 7780 - seq_put_hex_ll(m, NULL, cap.cap[CAP_LAST_U32 - __capi], 8); 7781 - seq_putc(m, '\n'); 7782 - return 0; 7783 - } 7784 - 7785 - static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, 7786 - struct seq_file *m) 7787 - { 7788 - struct io_sq_data *sq = NULL; 7789 - struct io_overflow_cqe *ocqe; 7790 - struct io_rings *r = ctx->rings; 7791 - unsigned int sq_mask = ctx->sq_entries - 1, cq_mask = ctx->cq_entries - 1; 7792 - unsigned int sq_head = READ_ONCE(r->sq.head); 7793 - unsigned int sq_tail = READ_ONCE(r->sq.tail); 7794 - unsigned int cq_head = READ_ONCE(r->cq.head); 7795 - unsigned int cq_tail = READ_ONCE(r->cq.tail); 7796 - unsigned int cq_shift = 0; 7797 - unsigned int sq_entries, cq_entries; 7798 - bool has_lock; 7799 - bool is_cqe32 = (ctx->flags & IORING_SETUP_CQE32); 7800 - unsigned int i; 7801 - 7802 - if (is_cqe32) 7803 - cq_shift = 1; 7804 - 7805 - /* 7806 - * we may get imprecise sqe and cqe info if uring is actively running 7807 - * since we get cached_sq_head and cached_cq_tail without uring_lock 7808 - * and sq_tail and cq_head are changed by userspace. But it's ok since 7809 - * we usually use these info when it is stuck. 7810 - */ 7811 - seq_printf(m, "SqMask:\t0x%x\n", sq_mask); 7812 - seq_printf(m, "SqHead:\t%u\n", sq_head); 7813 - seq_printf(m, "SqTail:\t%u\n", sq_tail); 7814 - seq_printf(m, "CachedSqHead:\t%u\n", ctx->cached_sq_head); 7815 - seq_printf(m, "CqMask:\t0x%x\n", cq_mask); 7816 - seq_printf(m, "CqHead:\t%u\n", cq_head); 7817 - seq_printf(m, "CqTail:\t%u\n", cq_tail); 7818 - seq_printf(m, "CachedCqTail:\t%u\n", ctx->cached_cq_tail); 7819 - seq_printf(m, "SQEs:\t%u\n", sq_tail - ctx->cached_sq_head); 7820 - sq_entries = min(sq_tail - sq_head, ctx->sq_entries); 7821 - for (i = 0; i < sq_entries; i++) { 7822 - unsigned int entry = i + sq_head; 7823 - unsigned int sq_idx = READ_ONCE(ctx->sq_array[entry & sq_mask]); 7824 - struct io_uring_sqe *sqe; 7825 - 7826 - if (sq_idx > sq_mask) 7827 - continue; 7828 - sqe = &ctx->sq_sqes[sq_idx]; 7829 - seq_printf(m, "%5u: opcode:%d, fd:%d, flags:%x, user_data:%llu\n", 7830 - sq_idx, sqe->opcode, sqe->fd, sqe->flags, 7831 - sqe->user_data); 7832 - } 7833 - seq_printf(m, "CQEs:\t%u\n", cq_tail - cq_head); 7834 - cq_entries = min(cq_tail - cq_head, ctx->cq_entries); 7835 - for (i = 0; i < cq_entries; i++) { 7836 - unsigned int entry = i + cq_head; 7837 - struct io_uring_cqe *cqe = &r->cqes[(entry & cq_mask) << cq_shift]; 7838 - 7839 - if (!is_cqe32) { 7840 - seq_printf(m, "%5u: user_data:%llu, res:%d, flag:%x\n", 7841 - entry & cq_mask, cqe->user_data, cqe->res, 7842 - cqe->flags); 7843 - } else { 7844 - seq_printf(m, "%5u: user_data:%llu, res:%d, flag:%x, " 7845 - "extra1:%llu, extra2:%llu\n", 7846 - entry & cq_mask, cqe->user_data, cqe->res, 7847 - cqe->flags, cqe->big_cqe[0], cqe->big_cqe[1]); 7848 - } 7849 - } 7850 - 7851 - /* 7852 - * Avoid ABBA deadlock between the seq lock and the io_uring mutex, 7853 - * since fdinfo case grabs it in the opposite direction of normal use 7854 - * cases. If we fail to get the lock, we just don't iterate any 7855 - * structures that could be going away outside the io_uring mutex. 7856 - */ 7857 - has_lock = mutex_trylock(&ctx->uring_lock); 7858 - 7859 - if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL)) { 7860 - sq = ctx->sq_data; 7861 - if (!sq->thread) 7862 - sq = NULL; 7863 - } 7864 - 7865 - seq_printf(m, "SqThread:\t%d\n", sq ? task_pid_nr(sq->thread) : -1); 7866 - seq_printf(m, "SqThreadCpu:\t%d\n", sq ? task_cpu(sq->thread) : -1); 7867 - seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files); 7868 - for (i = 0; has_lock && i < ctx->nr_user_files; i++) { 7869 - struct file *f = io_file_from_index(ctx, i); 7870 - 7871 - if (f) 7872 - seq_printf(m, "%5u: %s\n", i, file_dentry(f)->d_iname); 7873 - else 7874 - seq_printf(m, "%5u: <none>\n", i); 7875 - } 7876 - seq_printf(m, "UserBufs:\t%u\n", ctx->nr_user_bufs); 7877 - for (i = 0; has_lock && i < ctx->nr_user_bufs; i++) { 7878 - struct io_mapped_ubuf *buf = ctx->user_bufs[i]; 7879 - unsigned int len = buf->ubuf_end - buf->ubuf; 7880 - 7881 - seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf, len); 7882 - } 7883 - if (has_lock && !xa_empty(&ctx->personalities)) { 7884 - unsigned long index; 7885 - const struct cred *cred; 7886 - 7887 - seq_printf(m, "Personalities:\n"); 7888 - xa_for_each(&ctx->personalities, index, cred) 7889 - io_uring_show_cred(m, index, cred); 7890 - } 7891 - if (has_lock) 7892 - mutex_unlock(&ctx->uring_lock); 7893 - 7894 - seq_puts(m, "PollList:\n"); 7895 - spin_lock(&ctx->completion_lock); 7896 - for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) { 7897 - struct hlist_head *list = &ctx->cancel_hash[i]; 7898 - struct io_kiocb *req; 7899 - 7900 - hlist_for_each_entry(req, list, hash_node) 7901 - seq_printf(m, " op=%d, task_works=%d\n", req->opcode, 7902 - task_work_pending(req->task)); 7903 - } 7904 - 7905 - seq_puts(m, "CqOverflowList:\n"); 7906 - list_for_each_entry(ocqe, &ctx->cq_overflow_list, list) { 7907 - struct io_uring_cqe *cqe = &ocqe->cqe; 7908 - 7909 - seq_printf(m, " user_data=%llu, res=%d, flags=%x\n", 7910 - cqe->user_data, cqe->res, cqe->flags); 7911 - 7912 - } 7913 - 7914 - spin_unlock(&ctx->completion_lock); 7915 - } 7916 - 7917 - static __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f) 7918 - { 7919 - struct io_ring_ctx *ctx = f->private_data; 7920 - 7921 - if (percpu_ref_tryget(&ctx->refs)) { 7922 - __io_uring_show_fdinfo(ctx, m); 7923 - percpu_ref_put(&ctx->refs); 7924 - } 7925 - } 7926 - #endif 7927 7779 7928 7780 static const struct file_operations io_uring_fops = { 7929 7781 .release = io_uring_release,
+13
io_uring/io_uring_types.h
··· 498 498 int seq; 499 499 }; 500 500 501 + struct io_overflow_cqe { 502 + struct list_head list; 503 + struct io_uring_cqe cqe; 504 + }; 505 + 506 + struct io_mapped_ubuf { 507 + u64 ubuf; 508 + u64 ubuf_end; 509 + unsigned int nr_bvecs; 510 + unsigned long acct_pages; 511 + struct bio_vec bvec[]; 512 + }; 513 + 501 514 #endif