Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

io_uring/netcmd: add tx timestamping cmd support

Add a new socket command which returns tx time stamps to the user. It
provide an alternative to the existing error queue recvmsg interface.
The command works in a polled multishot mode, which means io_uring will
poll the socket and keep posting timestamps until the request is
cancelled or fails in any other way (e.g. with no space in the CQ). It
reuses the net infra and grabs timestamps from the socket's error queue.

The command requires IORING_SETUP_CQE32. All non-final CQEs (marked with
IORING_CQE_F_MORE) have cqe->res set to the tskey, and the upper 16 bits
of cqe->flags keep tstype (i.e. offset by IORING_CQE_BUFFER_SHIFT). The
timevalue is store in the upper part of the extended CQE. The final
completion won't have IORING_CQE_F_MORE and will have cqe->res storing
0/error.

Suggested-by: Vadim Fedorenko <vadim.fedorenko@linux.dev>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/92ee66e6b33b8de062a977843d825f58f21ecd37.1750065793.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Pavel Begunkov and committed by
Jens Axboe
9e4ed359 ac479eac

+98
+16
include/uapi/linux/io_uring.h
··· 969 969 SOCKET_URING_OP_SIOCOUTQ, 970 970 SOCKET_URING_OP_GETSOCKOPT, 971 971 SOCKET_URING_OP_SETSOCKOPT, 972 + SOCKET_URING_OP_TX_TIMESTAMP, 973 + }; 974 + 975 + /* 976 + * SOCKET_URING_OP_TX_TIMESTAMP definitions 977 + */ 978 + 979 + #define IORING_TIMESTAMP_HW_SHIFT 16 980 + /* The cqe->flags bit from which the timestamp type is stored */ 981 + #define IORING_TIMESTAMP_TYPE_SHIFT (IORING_TIMESTAMP_HW_SHIFT + 1) 982 + /* The cqe->flags flag signifying whether it's a hardware timestamp */ 983 + #define IORING_CQE_F_TSTAMP_HW ((__u32)1 << IORING_TIMESTAMP_HW_SHIFT); 984 + 985 + struct io_timespec { 986 + __u64 tv_sec; 987 + __u64 tv_nsec; 972 988 }; 973 989 974 990 /* Zero copy receive refill queue entry */
+82
io_uring/cmd_net.c
··· 1 1 #include <asm/ioctls.h> 2 2 #include <linux/io_uring/net.h> 3 + #include <linux/errqueue.h> 3 4 #include <net/sock.h> 4 5 5 6 #include "uring_cmd.h" ··· 52 51 optlen); 53 52 } 54 53 54 + static bool io_process_timestamp_skb(struct io_uring_cmd *cmd, struct sock *sk, 55 + struct sk_buff *skb, unsigned issue_flags) 56 + { 57 + struct sock_exterr_skb *serr = SKB_EXT_ERR(skb); 58 + struct io_uring_cqe cqe[2]; 59 + struct io_timespec *iots; 60 + struct timespec64 ts; 61 + u32 tstype, tskey; 62 + int ret; 63 + 64 + BUILD_BUG_ON(sizeof(struct io_uring_cqe) != sizeof(struct io_timespec)); 65 + 66 + ret = skb_get_tx_timestamp(skb, sk, &ts); 67 + if (ret < 0) 68 + return false; 69 + 70 + tskey = serr->ee.ee_data; 71 + tstype = serr->ee.ee_info; 72 + 73 + cqe->user_data = 0; 74 + cqe->res = tskey; 75 + cqe->flags = IORING_CQE_F_MORE; 76 + cqe->flags |= tstype << IORING_TIMESTAMP_TYPE_SHIFT; 77 + if (ret == SOF_TIMESTAMPING_TX_HARDWARE) 78 + cqe->flags |= IORING_CQE_F_TSTAMP_HW; 79 + 80 + iots = (struct io_timespec *)&cqe[1]; 81 + iots->tv_sec = ts.tv_sec; 82 + iots->tv_nsec = ts.tv_nsec; 83 + return io_uring_cmd_post_mshot_cqe32(cmd, issue_flags, cqe); 84 + } 85 + 86 + static int io_uring_cmd_timestamp(struct socket *sock, 87 + struct io_uring_cmd *cmd, 88 + unsigned int issue_flags) 89 + { 90 + struct sock *sk = sock->sk; 91 + struct sk_buff_head *q = &sk->sk_error_queue; 92 + struct sk_buff *skb, *tmp; 93 + struct sk_buff_head list; 94 + int ret; 95 + 96 + if (!(issue_flags & IO_URING_F_CQE32)) 97 + return -EINVAL; 98 + ret = io_cmd_poll_multishot(cmd, issue_flags, EPOLLERR); 99 + if (unlikely(ret)) 100 + return ret; 101 + 102 + if (skb_queue_empty_lockless(q)) 103 + return -EAGAIN; 104 + __skb_queue_head_init(&list); 105 + 106 + scoped_guard(spinlock_irq, &q->lock) { 107 + skb_queue_walk_safe(q, skb, tmp) { 108 + /* don't support skbs with payload */ 109 + if (!skb_has_tx_timestamp(skb, sk) || skb->len) 110 + continue; 111 + __skb_unlink(skb, q); 112 + __skb_queue_tail(&list, skb); 113 + } 114 + } 115 + 116 + while (1) { 117 + skb = skb_peek(&list); 118 + if (!skb) 119 + break; 120 + if (!io_process_timestamp_skb(cmd, sk, skb, issue_flags)) 121 + break; 122 + __skb_dequeue(&list); 123 + consume_skb(skb); 124 + } 125 + 126 + if (!unlikely(skb_queue_empty(&list))) { 127 + scoped_guard(spinlock_irqsave, &q->lock) 128 + skb_queue_splice(q, &list); 129 + } 130 + return -EAGAIN; 131 + } 132 + 55 133 int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags) 56 134 { 57 135 struct socket *sock = cmd->file->private_data; ··· 156 76 return io_uring_cmd_getsockopt(sock, cmd, issue_flags); 157 77 case SOCKET_URING_OP_SETSOCKOPT: 158 78 return io_uring_cmd_setsockopt(sock, cmd, issue_flags); 79 + case SOCKET_URING_OP_TX_TIMESTAMP: 80 + return io_uring_cmd_timestamp(sock, cmd, issue_flags); 159 81 default: 160 82 return -EOPNOTSUPP; 161 83 }