Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 * NET An implementation of the SOCKET network access protocol.
4 * This is the master header file for the Linux NET layer,
5 * or, in plain English: the networking handling part of the
6 * kernel.
7 *
8 * Version: @(#)net.h 1.0.3 05/25/93
9 *
10 * Authors: Orest Zborowski, <obz@Kodak.COM>
11 * Ross Biro
12 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
13 */
14#ifndef _LINUX_NET_H
15#define _LINUX_NET_H
16
17#include <linux/stringify.h>
18#include <linux/random.h>
19#include <linux/wait.h>
20#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
21#include <linux/rcupdate.h>
22#include <linux/once.h>
23#include <linux/fs.h>
24#include <linux/mm.h>
25#include <linux/sockptr.h>
26#include <linux/uio.h>
27
28#include <uapi/linux/net.h>
29
30/**
31 * struct sockopt - socket option value container
32 * @iter_in: iov_iter for reading optval with the content from the caller.
33 * Use copy_from_iter() given this iov direction is ITER_SOURCE
34 * @iter_out: iov_iter for protocols to update optval data to userspace
35 * Use _copy_to_iter() given iov direction is ITER_DEST
36 * @optlen: serves as both input (buffer size) and output (returned data size).
37 *
38 * Type-safe wrapper for socket option data that works with both
39 * user and kernel buffers.
40 *
41 * The optlen field allows callbacks to return a specific length value
42 * independent of the bytes written via copy_to_iter().
43 */
44typedef struct sockopt {
45 struct iov_iter iter_in;
46 struct iov_iter iter_out;
47 int optlen;
48} sockopt_t;
49
50struct poll_table_struct;
51struct pipe_inode_info;
52struct inode;
53struct file;
54struct net;
55
56/* Historically, SOCKWQ_ASYNC_NOSPACE & SOCKWQ_ASYNC_WAITDATA were located
57 * in sock->flags, but moved into sk->sk_wq->flags to be RCU protected.
58 * Eventually all flags will be in sk->sk_wq->flags.
59 */
60enum socket_flags {
61 SOCKWQ_ASYNC_NOSPACE,
62 SOCKWQ_ASYNC_WAITDATA,
63 SOCK_NOSPACE,
64 SOCK_SUPPORT_ZC,
65 SOCK_CUSTOM_SOCKOPT,
66};
67
68#ifndef ARCH_HAS_SOCKET_TYPES
69/**
70 * enum sock_type - Socket types
71 * @SOCK_STREAM: stream (connection) socket
72 * @SOCK_DGRAM: datagram (conn.less) socket
73 * @SOCK_RAW: raw socket
74 * @SOCK_RDM: reliably-delivered message
75 * @SOCK_SEQPACKET: sequential packet socket
76 * @SOCK_DCCP: Datagram Congestion Control Protocol socket
77 * @SOCK_PACKET: linux specific way of getting packets at the dev level.
78 * For writing rarp and other similar things on the user level.
79 *
80 * When adding some new socket type please
81 * grep ARCH_HAS_SOCKET_TYPE include/asm-* /socket.h, at least MIPS
82 * overrides this enum for binary compat reasons.
83 */
84enum sock_type {
85 SOCK_STREAM = 1,
86 SOCK_DGRAM = 2,
87 SOCK_RAW = 3,
88 SOCK_RDM = 4,
89 SOCK_SEQPACKET = 5,
90 SOCK_DCCP = 6,
91 SOCK_PACKET = 10,
92};
93#endif /* ARCH_HAS_SOCKET_TYPES */
94
95#define SOCK_MAX (SOCK_PACKET + 1)
96/* Mask which covers at least up to SOCK_MASK-1. The
97 * remaining bits are used as flags. */
98#define SOCK_TYPE_MASK 0xf
99
100/* Flags for socket, socketpair, accept4 */
101#define SOCK_CLOEXEC O_CLOEXEC
102#ifndef SOCK_NONBLOCK
103#define SOCK_NONBLOCK O_NONBLOCK
104#endif
105#define SOCK_COREDUMP O_NOCTTY
106
107/**
108 * enum sock_shutdown_cmd - Shutdown types
109 * @SHUT_RD: shutdown receptions
110 * @SHUT_WR: shutdown transmissions
111 * @SHUT_RDWR: shutdown receptions/transmissions
112 */
113enum sock_shutdown_cmd {
114 SHUT_RD,
115 SHUT_WR,
116 SHUT_RDWR,
117};
118
119struct socket_wq {
120 /* Note: wait MUST be first field of socket_wq */
121 wait_queue_head_t wait;
122 struct fasync_struct *fasync_list;
123 unsigned long flags; /* %SOCKWQ_ASYNC_NOSPACE, etc */
124 struct rcu_head rcu;
125} ____cacheline_aligned_in_smp;
126
127/**
128 * struct socket - general BSD socket
129 * @state: socket state (%SS_CONNECTED, etc)
130 * @type: socket type (%SOCK_STREAM, etc)
131 * @flags: socket flags (%SOCK_NOSPACE, etc)
132 * @ops: protocol specific socket operations
133 * @file: File back pointer for gc
134 * @sk: internal networking protocol agnostic socket representation
135 * @wq: wait queue for several uses
136 */
137struct socket {
138 socket_state state;
139
140 short type;
141
142 unsigned long flags;
143
144 struct file *file;
145 struct sock *sk;
146 const struct proto_ops *ops; /* Might change with IPV6_ADDRFORM or MPTCP. */
147
148 struct socket_wq wq;
149};
150
151/*
152 * "descriptor" for what we're up to with a read.
153 * This allows us to use the same read code yet
154 * have multiple different users of the data that
155 * we read from a file.
156 *
157 * The simplest case just copies the data to user
158 * mode.
159 */
160typedef struct {
161 size_t written;
162 size_t count;
163 union {
164 char __user *buf;
165 void *data;
166 } arg;
167 int error;
168} read_descriptor_t;
169
170struct vm_area_struct;
171struct page;
172struct msghdr;
173struct module;
174struct sk_buff;
175struct proto_accept_arg;
176typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
177 unsigned int, size_t);
178typedef int (*skb_read_actor_t)(struct sock *, struct sk_buff *);
179
180
181struct proto_ops {
182 int family;
183 struct module *owner;
184 int (*release) (struct socket *sock);
185 int (*bind) (struct socket *sock,
186 struct sockaddr_unsized *myaddr,
187 int sockaddr_len);
188 int (*connect) (struct socket *sock,
189 struct sockaddr_unsized *vaddr,
190 int sockaddr_len, int flags);
191 int (*socketpair)(struct socket *sock1,
192 struct socket *sock2);
193 int (*accept) (struct socket *sock,
194 struct socket *newsock,
195 struct proto_accept_arg *arg);
196 int (*getname) (struct socket *sock,
197 struct sockaddr *addr,
198 int peer);
199 __poll_t (*poll) (struct file *file, struct socket *sock,
200 struct poll_table_struct *wait);
201 int (*ioctl) (struct socket *sock, unsigned int cmd,
202 unsigned long arg);
203#ifdef CONFIG_COMPAT
204 int (*compat_ioctl) (struct socket *sock, unsigned int cmd,
205 unsigned long arg);
206#endif
207 int (*gettstamp) (struct socket *sock, void __user *userstamp,
208 bool timeval, bool time32);
209 int (*listen) (struct socket *sock, int len);
210 int (*shutdown) (struct socket *sock, int flags);
211 int (*setsockopt)(struct socket *sock, int level,
212 int optname, sockptr_t optval,
213 unsigned int optlen);
214 int (*getsockopt)(struct socket *sock, int level,
215 int optname, char __user *optval, int __user *optlen);
216 int (*getsockopt_iter)(struct socket *sock, int level,
217 int optname, sockopt_t *opt);
218 void (*show_fdinfo)(struct seq_file *m, struct socket *sock);
219 int (*sendmsg) (struct socket *sock, struct msghdr *m,
220 size_t total_len);
221 /* Notes for implementing recvmsg:
222 * ===============================
223 * msg->msg_namelen should get updated by the recvmsg handlers
224 * iff msg_name != NULL. It is by default 0 to prevent
225 * returning uninitialized memory to user space. The recvfrom
226 * handlers can assume that msg.msg_name is either NULL or has
227 * a minimum size of sizeof(struct sockaddr_storage).
228 */
229 int (*recvmsg) (struct socket *sock, struct msghdr *m,
230 size_t total_len, int flags);
231 int (*mmap) (struct file *file, struct socket *sock,
232 struct vm_area_struct * vma);
233 ssize_t (*splice_read)(struct socket *sock, loff_t *ppos,
234 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
235 void (*splice_eof)(struct socket *sock);
236 int (*set_peek_off)(struct sock *sk, int val);
237 int (*peek_len)(struct socket *sock);
238
239 /* The following functions are called internally by kernel with
240 * sock lock already held.
241 */
242 int (*read_sock)(struct sock *sk, read_descriptor_t *desc,
243 sk_read_actor_t recv_actor);
244 /* This is different from read_sock(), it reads an entire skb at a time. */
245 int (*read_skb)(struct sock *sk, skb_read_actor_t recv_actor);
246 int (*sendmsg_locked)(struct sock *sk, struct msghdr *msg,
247 size_t size);
248 int (*set_rcvlowat)(struct sock *sk, int val);
249 void (*set_rcvbuf)(struct sock *sk, int val);
250};
251
252#define DECLARE_SOCKADDR(type, dst, src) \
253 type dst = ({ __sockaddr_check_size(sizeof(*dst)); (type) src; })
254
255struct net_proto_family {
256 int family;
257 int (*create)(struct net *net, struct socket *sock,
258 int protocol, int kern);
259 struct module *owner;
260};
261
262struct iovec;
263struct kvec;
264
265enum {
266 SOCK_WAKE_IO,
267 SOCK_WAKE_WAITD,
268 SOCK_WAKE_SPACE,
269 SOCK_WAKE_URG,
270};
271
272int sock_wake_async(struct socket_wq *sk_wq, int how, int band);
273int sock_register(const struct net_proto_family *fam);
274void sock_unregister(int family);
275bool sock_is_registered(int family);
276int __sock_create(struct net *net, int family, int type, int proto,
277 struct socket **res, int kern);
278int sock_create(int family, int type, int proto, struct socket **res);
279int sock_create_kern(struct net *net, int family, int type, int proto, struct socket **res);
280int sock_create_lite(int family, int type, int proto, struct socket **res);
281struct socket *sock_alloc(void);
282void sock_release(struct socket *sock);
283int sock_sendmsg(struct socket *sock, struct msghdr *msg);
284int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags);
285struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname);
286struct socket *sockfd_lookup(int fd, int *err);
287struct socket *sock_from_file(struct file *file);
288#define sockfd_put(sock) fput(sock->file)
289int net_ratelimit(void);
290
291#define net_ratelimited_function(function, ...) \
292do { \
293 if (net_ratelimit()) \
294 function(__VA_ARGS__); \
295} while (0)
296
297#define net_emerg_ratelimited(fmt, ...) \
298 net_ratelimited_function(pr_emerg, fmt, ##__VA_ARGS__)
299#define net_alert_ratelimited(fmt, ...) \
300 net_ratelimited_function(pr_alert, fmt, ##__VA_ARGS__)
301#define net_crit_ratelimited(fmt, ...) \
302 net_ratelimited_function(pr_crit, fmt, ##__VA_ARGS__)
303#define net_err_ratelimited(fmt, ...) \
304 net_ratelimited_function(pr_err, fmt, ##__VA_ARGS__)
305#define net_notice_ratelimited(fmt, ...) \
306 net_ratelimited_function(pr_notice, fmt, ##__VA_ARGS__)
307#define net_warn_ratelimited(fmt, ...) \
308 net_ratelimited_function(pr_warn, fmt, ##__VA_ARGS__)
309#define net_info_ratelimited(fmt, ...) \
310 net_ratelimited_function(pr_info, fmt, ##__VA_ARGS__)
311#if defined(CONFIG_DYNAMIC_DEBUG) || \
312 (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
313#define net_dbg_ratelimited(fmt, ...) \
314do { \
315 DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \
316 if (DYNAMIC_DEBUG_BRANCH(descriptor) && \
317 net_ratelimit()) \
318 __dynamic_pr_debug(&descriptor, pr_fmt(fmt), \
319 ##__VA_ARGS__); \
320} while (0)
321#elif defined(DEBUG)
322#define net_dbg_ratelimited(fmt, ...) \
323 net_ratelimited_function(pr_debug, fmt, ##__VA_ARGS__)
324#else
325#define net_dbg_ratelimited(fmt, ...) \
326 no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
327#endif
328
329#define net_get_random_once(buf, nbytes) \
330 get_random_once((buf), (nbytes))
331#define net_get_random_sleepable_once(buf, nbytes) \
332 get_random_sleepable_once((buf), (nbytes))
333
334/*
335 * E.g. XFS meta- & log-data is in slab pages, or bcache meta
336 * data pages, or other high order pages allocated by
337 * __get_free_pages() without __GFP_COMP, which have a page_count
338 * of 0 and/or have PageSlab() set. We cannot use send_page for
339 * those, as that does get_page(); put_page(); and would cause
340 * either a VM_BUG directly, or __page_cache_release a page that
341 * would actually still be referenced by someone, leading to some
342 * obscure delayed Oops somewhere else.
343 */
344static inline bool sendpage_ok(struct page *page)
345{
346 return !PageSlab(page) && page_count(page) >= 1;
347}
348
349/*
350 * Check sendpage_ok on contiguous pages.
351 */
352static inline bool sendpages_ok(struct page *page, size_t len, size_t offset)
353{
354 struct page *p = page + (offset >> PAGE_SHIFT);
355 size_t count = 0;
356
357 while (count < len) {
358 if (!sendpage_ok(p))
359 return false;
360
361 p++;
362 count += PAGE_SIZE;
363 }
364
365 return true;
366}
367
368int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec,
369 size_t num, size_t len);
370int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec,
371 size_t num, size_t len, int flags);
372
373int kernel_bind(struct socket *sock, struct sockaddr_unsized *addr, int addrlen);
374int kernel_listen(struct socket *sock, int backlog);
375int kernel_accept(struct socket *sock, struct socket **newsock, int flags);
376int kernel_connect(struct socket *sock, struct sockaddr_unsized *addr, int addrlen,
377 int flags);
378int kernel_getsockname(struct socket *sock, struct sockaddr *addr);
379int kernel_getpeername(struct socket *sock, struct sockaddr *addr);
380int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how);
381
382/* Routine returns the IP overhead imposed by a (caller-protected) socket. */
383u32 kernel_sock_ip_overhead(struct sock *sk);
384
385#define MODULE_ALIAS_NETPROTO(proto) \
386 MODULE_ALIAS("net-pf-" __stringify(proto))
387
388#define MODULE_ALIAS_NET_PF_PROTO(pf, proto) \
389 MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto))
390
391#define MODULE_ALIAS_NET_PF_PROTO_TYPE(pf, proto, type) \
392 MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto) \
393 "-type-" __stringify(type))
394
395#define MODULE_ALIAS_NET_PF_PROTO_NAME(pf, proto, name) \
396 MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto) \
397 name)
398#endif /* _LINUX_NET_H */