Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * kernfs.h - pseudo filesystem decoupled from vfs locking
4 */
5
6#ifndef __LINUX_KERNFS_H
7#define __LINUX_KERNFS_H
8
9#include <linux/err.h>
10#include <linux/list.h>
11#include <linux/mutex.h>
12#include <linux/idr.h>
13#include <linux/lockdep.h>
14#include <linux/rbtree.h>
15#include <linux/atomic.h>
16#include <linux/bug.h>
17#include <linux/types.h>
18#include <linux/uidgid.h>
19#include <linux/wait.h>
20#include <linux/rwsem.h>
21#include <linux/cache.h>
22
23struct file;
24struct dentry;
25struct iattr;
26struct ns_common;
27struct seq_file;
28struct vm_area_struct;
29struct vm_operations_struct;
30struct super_block;
31struct file_system_type;
32struct poll_table_struct;
33struct fs_context;
34
35struct kernfs_fs_context;
36struct kernfs_open_node;
37struct kernfs_iattrs;
38
39/*
40 * NR_KERNFS_LOCK_BITS determines size (NR_KERNFS_LOCKS) of hash
41 * table of locks.
42 * Having a small hash table would impact scalability, since
43 * more and more kernfs_node objects will end up using same lock
44 * and having a very large hash table would waste memory.
45 *
46 * At the moment size of hash table of locks is being set based on
47 * the number of CPUs as follows:
48 *
49 * NR_CPU NR_KERNFS_LOCK_BITS NR_KERNFS_LOCKS
50 * 1 1 2
51 * 2-3 2 4
52 * 4-7 4 16
53 * 8-15 6 64
54 * 16-31 8 256
55 * 32 and more 10 1024
56 *
57 * The above relation between NR_CPU and number of locks is based
58 * on some internal experimentation which involved booting qemu
59 * with different values of smp, performing some sysfs operations
60 * on all CPUs and observing how increase in number of locks impacts
61 * completion time of these sysfs operations on each CPU.
62 */
63#ifdef CONFIG_SMP
64#define NR_KERNFS_LOCK_BITS (2 * (ilog2(NR_CPUS < 32 ? NR_CPUS : 32)))
65#else
66#define NR_KERNFS_LOCK_BITS 1
67#endif
68
69#define NR_KERNFS_LOCKS (1 << NR_KERNFS_LOCK_BITS)
70
71/*
72 * There's one kernfs_open_file for each open file and one kernfs_open_node
73 * for each kernfs_node with one or more open files.
74 *
75 * filp->private_data points to seq_file whose ->private points to
76 * kernfs_open_file.
77 *
78 * kernfs_open_files are chained at kernfs_open_node->files, which is
79 * protected by kernfs_global_locks.open_file_mutex[i].
80 *
81 * To reduce possible contention in sysfs access, arising due to single
82 * locks, use an array of locks (e.g. open_file_mutex) and use kernfs_node
83 * object address as hash keys to get the index of these locks.
84 *
85 * Hashed mutexes are safe to use here because operations using these don't
86 * rely on global exclusion.
87 *
88 * In future we intend to replace other global locks with hashed ones as well.
89 * kernfs_global_locks acts as a holder for all such hash tables.
90 */
91struct kernfs_global_locks {
92 struct mutex open_file_mutex[NR_KERNFS_LOCKS];
93};
94
95enum kernfs_node_type {
96 KERNFS_DIR = 0x0001,
97 KERNFS_FILE = 0x0002,
98 KERNFS_LINK = 0x0004,
99};
100
101#define KERNFS_TYPE_MASK 0x000f
102#define KERNFS_FLAG_MASK ~KERNFS_TYPE_MASK
103
104enum kernfs_node_flag {
105 KERNFS_ACTIVATED = 0x0010,
106 KERNFS_NS = 0x0020,
107 KERNFS_HAS_SEQ_SHOW = 0x0040,
108 KERNFS_HAS_MMAP = 0x0080,
109 KERNFS_LOCKDEP = 0x0100,
110 KERNFS_HIDDEN = 0x0200,
111 KERNFS_SUICIDAL = 0x0400,
112 KERNFS_SUICIDED = 0x0800,
113 KERNFS_EMPTY_DIR = 0x1000,
114 KERNFS_HAS_RELEASE = 0x2000,
115 KERNFS_REMOVING = 0x4000,
116};
117
118/* @flags for kernfs_create_root() */
119enum kernfs_root_flag {
120 /*
121 * kernfs_nodes are created in the deactivated state and invisible.
122 * They require explicit kernfs_activate() to become visible. This
123 * can be used to make related nodes become visible atomically
124 * after all nodes are created successfully.
125 */
126 KERNFS_ROOT_CREATE_DEACTIVATED = 0x0001,
127
128 /*
129 * For regular files, if the opener has CAP_DAC_OVERRIDE, open(2)
130 * succeeds regardless of the RW permissions. sysfs had an extra
131 * layer of enforcement where open(2) fails with -EACCES regardless
132 * of CAP_DAC_OVERRIDE if the permission doesn't have the
133 * respective read or write access at all (none of S_IRUGO or
134 * S_IWUGO) or the respective operation isn't implemented. The
135 * following flag enables that behavior.
136 */
137 KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK = 0x0002,
138
139 /*
140 * The filesystem supports exportfs operation, so userspace can use
141 * fhandle to access nodes of the fs.
142 */
143 KERNFS_ROOT_SUPPORT_EXPORTOP = 0x0004,
144
145 /*
146 * Support user xattrs to be written to nodes rooted at this root.
147 */
148 KERNFS_ROOT_SUPPORT_USER_XATTR = 0x0008,
149
150 /*
151 * Renames must not change the parent node.
152 */
153 KERNFS_ROOT_INVARIANT_PARENT = 0x0010,
154};
155
156/* type-specific structures for kernfs_node union members */
157struct kernfs_elem_dir {
158 unsigned long subdirs;
159 /* children rbtree starts here and goes through kn->rb */
160 struct rb_root children;
161
162 /*
163 * The kernfs hierarchy this directory belongs to. This fits
164 * better directly in kernfs_node but is here to save space.
165 */
166 struct kernfs_root *root;
167 /*
168 * Monotonic revision counter, used to identify if a directory
169 * node has changed during negative dentry revalidation.
170 */
171 unsigned long rev;
172};
173
174struct kernfs_elem_symlink {
175 struct kernfs_node *target_kn;
176};
177
178struct kernfs_elem_attr {
179 const struct kernfs_ops *ops;
180 struct kernfs_open_node __rcu *open;
181 loff_t size;
182 struct kernfs_node *notify_next; /* for kernfs_notify() */
183};
184
185/*
186 * kernfs_node - the building block of kernfs hierarchy. Each and every
187 * kernfs node is represented by single kernfs_node. Most fields are
188 * private to kernfs and shouldn't be accessed directly by kernfs users.
189 *
190 * As long as count reference is held, the kernfs_node itself is
191 * accessible. Dereferencing elem or any other outer entity requires
192 * active reference.
193 */
194struct kernfs_node {
195 atomic_t count;
196 atomic_t active;
197#ifdef CONFIG_DEBUG_LOCK_ALLOC
198 struct lockdep_map dep_map;
199#endif
200 /*
201 * Use kernfs_get_parent() and kernfs_name/path() instead of
202 * accessing the following two fields directly. If the node is
203 * never moved to a different parent, it is safe to access the
204 * parent directly.
205 */
206 struct kernfs_node __rcu *__parent;
207 const char __rcu *name;
208
209 struct rb_node rb;
210
211 const struct ns_common *ns; /* namespace tag */
212 unsigned int hash; /* ns + name hash */
213 unsigned short flags;
214 umode_t mode;
215
216 union {
217 struct kernfs_elem_dir dir;
218 struct kernfs_elem_symlink symlink;
219 struct kernfs_elem_attr attr;
220 };
221
222 /*
223 * 64bit unique ID. On 64bit ino setups, id is the ino. On 32bit,
224 * the low 32bits are ino and upper generation.
225 */
226 u64 id;
227
228 void *priv;
229 struct kernfs_iattrs *iattr;
230
231 struct rcu_head rcu;
232};
233
234/*
235 * kernfs_syscall_ops may be specified on kernfs_create_root() to support
236 * syscalls. These optional callbacks are invoked on the matching syscalls
237 * and can perform any kernfs operations which don't necessarily have to be
238 * the exact operation requested. An active reference is held for each
239 * kernfs_node parameter.
240 */
241struct kernfs_syscall_ops {
242 int (*show_options)(struct seq_file *sf, struct kernfs_root *root);
243
244 int (*mkdir)(struct kernfs_node *parent, const char *name,
245 umode_t mode);
246 int (*rmdir)(struct kernfs_node *kn);
247 int (*rename)(struct kernfs_node *kn, struct kernfs_node *new_parent,
248 const char *new_name);
249 int (*show_path)(struct seq_file *sf, struct kernfs_node *kn,
250 struct kernfs_root *root);
251};
252
253struct kernfs_node *kernfs_root_to_node(struct kernfs_root *root);
254
255struct kernfs_open_file {
256 /* published fields */
257 struct kernfs_node *kn;
258 struct file *file;
259 struct seq_file *seq_file;
260 void *priv;
261
262 /* private fields, do not use outside kernfs proper */
263 struct mutex mutex;
264 struct mutex prealloc_mutex;
265 int event;
266 struct list_head list;
267 char *prealloc_buf;
268
269 size_t atomic_write_len;
270 bool mmapped:1;
271 bool released:1;
272 const struct vm_operations_struct *vm_ops;
273};
274
275struct kernfs_ops {
276 /*
277 * Optional open/release methods. Both are called with
278 * @of->seq_file populated.
279 */
280 int (*open)(struct kernfs_open_file *of);
281 void (*release)(struct kernfs_open_file *of);
282
283 /*
284 * Read is handled by either seq_file or raw_read().
285 *
286 * If seq_show() is present, seq_file path is active. Other seq
287 * operations are optional and if not implemented, the behavior is
288 * equivalent to single_open(). @sf->private points to the
289 * associated kernfs_open_file.
290 *
291 * read() is bounced through kernel buffer and a read larger than
292 * PAGE_SIZE results in partial operation of PAGE_SIZE.
293 */
294 int (*seq_show)(struct seq_file *sf, void *v);
295
296 void *(*seq_start)(struct seq_file *sf, loff_t *ppos);
297 void *(*seq_next)(struct seq_file *sf, void *v, loff_t *ppos);
298 void (*seq_stop)(struct seq_file *sf, void *v);
299
300 ssize_t (*read)(struct kernfs_open_file *of, char *buf, size_t bytes,
301 loff_t off);
302
303 /*
304 * write() is bounced through kernel buffer. If atomic_write_len
305 * is not set, a write larger than PAGE_SIZE results in partial
306 * operations of PAGE_SIZE chunks. If atomic_write_len is set,
307 * writes upto the specified size are executed atomically but
308 * larger ones are rejected with -E2BIG.
309 */
310 size_t atomic_write_len;
311 /*
312 * "prealloc" causes a buffer to be allocated at open for
313 * all read/write requests. As ->seq_show uses seq_read()
314 * which does its own allocation, it is incompatible with
315 * ->prealloc. Provide ->read and ->write with ->prealloc.
316 */
317 bool prealloc;
318 ssize_t (*write)(struct kernfs_open_file *of, char *buf, size_t bytes,
319 loff_t off);
320
321 __poll_t (*poll)(struct kernfs_open_file *of,
322 struct poll_table_struct *pt);
323
324 int (*mmap)(struct kernfs_open_file *of, struct vm_area_struct *vma);
325 loff_t (*llseek)(struct kernfs_open_file *of, loff_t offset, int whence);
326};
327
328/*
329 * The kernfs superblock creation/mount parameter context.
330 */
331struct kernfs_fs_context {
332 struct kernfs_root *root; /* Root of the hierarchy being mounted */
333 struct ns_common *ns_tag; /* Namespace tag of the mount (or NULL) */
334 unsigned long magic; /* File system specific magic number */
335
336 /* The following are set/used by kernfs_mount() */
337 bool new_sb_created; /* Set to T if we allocated a new sb */
338};
339
340#ifdef CONFIG_KERNFS
341
342static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn)
343{
344 return kn->flags & KERNFS_TYPE_MASK;
345}
346
347static inline ino_t kernfs_id_ino(u64 id)
348{
349 /* id is ino if ino_t is 64bit; otherwise, low 32bits */
350 if (sizeof(ino_t) >= sizeof(u64))
351 return id;
352 else
353 return (u32)id;
354}
355
356static inline u32 kernfs_id_gen(u64 id)
357{
358 /* gen is fixed at 1 if ino_t is 64bit; otherwise, high 32bits */
359 if (sizeof(ino_t) >= sizeof(u64))
360 return 1;
361 else
362 return id >> 32;
363}
364
365static inline ino_t kernfs_ino(struct kernfs_node *kn)
366{
367 return kernfs_id_ino(kn->id);
368}
369
370static inline ino_t kernfs_gen(struct kernfs_node *kn)
371{
372 return kernfs_id_gen(kn->id);
373}
374
375/**
376 * kernfs_enable_ns - enable namespace under a directory
377 * @kn: directory of interest, should be empty
378 *
379 * This is to be called right after @kn is created to enable namespace
380 * under it. All children of @kn must have non-NULL namespace tags and
381 * only the ones which match the super_block's tag will be visible.
382 */
383static inline void kernfs_enable_ns(struct kernfs_node *kn)
384{
385 WARN_ON_ONCE(kernfs_type(kn) != KERNFS_DIR);
386 WARN_ON_ONCE(!RB_EMPTY_ROOT(&kn->dir.children));
387 kn->flags |= KERNFS_NS;
388}
389
390/**
391 * kernfs_ns_enabled - test whether namespace is enabled
392 * @kn: the node to test
393 *
394 * Test whether namespace filtering is enabled for the children of @ns.
395 */
396static inline bool kernfs_ns_enabled(struct kernfs_node *kn)
397{
398 return kn->flags & KERNFS_NS;
399}
400
401int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen);
402int kernfs_path_from_node(struct kernfs_node *kn_to, struct kernfs_node *kn_from,
403 char *buf, size_t buflen);
404void pr_cont_kernfs_name(struct kernfs_node *kn);
405void pr_cont_kernfs_path(struct kernfs_node *kn);
406struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn);
407struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent,
408 const char *name,
409 const struct ns_common *ns);
410struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent,
411 const char *path,
412 const struct ns_common *ns);
413void kernfs_get(struct kernfs_node *kn);
414void kernfs_put(struct kernfs_node *kn);
415
416struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry);
417struct kernfs_root *kernfs_root_from_sb(struct super_block *sb);
418struct inode *kernfs_get_inode(struct super_block *sb, struct kernfs_node *kn);
419
420struct dentry *kernfs_node_dentry(struct kernfs_node *kn,
421 struct super_block *sb);
422struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
423 unsigned int flags, void *priv);
424void kernfs_destroy_root(struct kernfs_root *root);
425unsigned int kernfs_root_flags(struct kernfs_node *kn);
426
427struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
428 const char *name, umode_t mode,
429 kuid_t uid, kgid_t gid,
430 void *priv,
431 const struct ns_common *ns);
432struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent,
433 const char *name);
434struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
435 const char *name, umode_t mode,
436 kuid_t uid, kgid_t gid,
437 loff_t size,
438 const struct kernfs_ops *ops,
439 void *priv,
440 const struct ns_common *ns,
441 struct lock_class_key *key);
442struct kernfs_node *kernfs_create_link(struct kernfs_node *parent,
443 const char *name,
444 struct kernfs_node *target);
445void kernfs_activate(struct kernfs_node *kn);
446void kernfs_show(struct kernfs_node *kn, bool show);
447void kernfs_remove(struct kernfs_node *kn);
448void kernfs_break_active_protection(struct kernfs_node *kn);
449void kernfs_unbreak_active_protection(struct kernfs_node *kn);
450bool kernfs_remove_self(struct kernfs_node *kn);
451int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
452 const struct ns_common *ns);
453int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
454 const char *new_name, const struct ns_common *new_ns);
455int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr);
456__poll_t kernfs_generic_poll(struct kernfs_open_file *of,
457 struct poll_table_struct *pt);
458void kernfs_notify(struct kernfs_node *kn);
459
460int kernfs_xattr_get(struct kernfs_node *kn, const char *name,
461 void *value, size_t size);
462int kernfs_xattr_set(struct kernfs_node *kn, const char *name,
463 const void *value, size_t size, int flags);
464
465const struct ns_common *kernfs_super_ns(struct super_block *sb);
466int kernfs_get_tree(struct fs_context *fc);
467void kernfs_free_fs_context(struct fs_context *fc);
468void kernfs_kill_sb(struct super_block *sb);
469
470void kernfs_init(void);
471
472struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root,
473 u64 id);
474#else /* CONFIG_KERNFS */
475
476static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn)
477{ return 0; } /* whatever */
478
479static inline void kernfs_enable_ns(struct kernfs_node *kn) { }
480
481static inline bool kernfs_ns_enabled(struct kernfs_node *kn)
482{ return false; }
483
484static inline int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
485{ return -ENOSYS; }
486
487static inline int kernfs_path_from_node(struct kernfs_node *root_kn,
488 struct kernfs_node *kn,
489 char *buf, size_t buflen)
490{ return -ENOSYS; }
491
492static inline void pr_cont_kernfs_name(struct kernfs_node *kn) { }
493static inline void pr_cont_kernfs_path(struct kernfs_node *kn) { }
494
495static inline struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn)
496{ return NULL; }
497
498static inline struct kernfs_node *
499kernfs_find_and_get_ns(struct kernfs_node *parent, const char *name,
500 const struct ns_common *ns)
501{ return NULL; }
502static inline struct kernfs_node *
503kernfs_walk_and_get_ns(struct kernfs_node *parent, const char *path,
504 const struct ns_common *ns)
505{ return NULL; }
506
507static inline void kernfs_get(struct kernfs_node *kn) { }
508static inline void kernfs_put(struct kernfs_node *kn) { }
509
510static inline struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry)
511{ return NULL; }
512
513static inline struct kernfs_root *kernfs_root_from_sb(struct super_block *sb)
514{ return NULL; }
515
516static inline struct inode *
517kernfs_get_inode(struct super_block *sb, struct kernfs_node *kn)
518{ return NULL; }
519
520static inline struct kernfs_root *
521kernfs_create_root(struct kernfs_syscall_ops *scops, unsigned int flags,
522 void *priv)
523{ return ERR_PTR(-ENOSYS); }
524
525static inline void kernfs_destroy_root(struct kernfs_root *root) { }
526static inline unsigned int kernfs_root_flags(struct kernfs_node *kn)
527{ return 0; }
528
529static inline struct kernfs_node *
530kernfs_create_dir_ns(struct kernfs_node *parent, const char *name,
531 umode_t mode, kuid_t uid, kgid_t gid,
532 void *priv, const struct ns_common *ns)
533{ return ERR_PTR(-ENOSYS); }
534
535static inline struct kernfs_node *
536__kernfs_create_file(struct kernfs_node *parent, const char *name,
537 umode_t mode, kuid_t uid, kgid_t gid,
538 loff_t size, const struct kernfs_ops *ops,
539 void *priv, const struct ns_common *ns,
540 struct lock_class_key *key)
541{ return ERR_PTR(-ENOSYS); }
542
543static inline struct kernfs_node *
544kernfs_create_link(struct kernfs_node *parent, const char *name,
545 struct kernfs_node *target)
546{ return ERR_PTR(-ENOSYS); }
547
548static inline void kernfs_activate(struct kernfs_node *kn) { }
549
550static inline void kernfs_remove(struct kernfs_node *kn) { }
551
552static inline bool kernfs_remove_self(struct kernfs_node *kn)
553{ return false; }
554
555static inline int kernfs_remove_by_name_ns(struct kernfs_node *kn,
556 const char *name,
557 const struct ns_common *ns)
558{ return -ENOSYS; }
559
560static inline int kernfs_rename_ns(struct kernfs_node *kn,
561 struct kernfs_node *new_parent,
562 const char *new_name,
563 const struct ns_common *new_ns)
564{ return -ENOSYS; }
565
566static inline int kernfs_setattr(struct kernfs_node *kn,
567 const struct iattr *iattr)
568{ return -ENOSYS; }
569
570static inline __poll_t kernfs_generic_poll(struct kernfs_open_file *of,
571 struct poll_table_struct *pt)
572{ return -ENOSYS; }
573
574static inline void kernfs_notify(struct kernfs_node *kn) { }
575
576static inline int kernfs_xattr_get(struct kernfs_node *kn, const char *name,
577 void *value, size_t size)
578{ return -ENOSYS; }
579
580static inline int kernfs_xattr_set(struct kernfs_node *kn, const char *name,
581 const void *value, size_t size, int flags)
582{ return -ENOSYS; }
583
584static inline const struct ns_common *kernfs_super_ns(struct super_block *sb)
585{ return NULL; }
586
587static inline int kernfs_get_tree(struct fs_context *fc)
588{ return -ENOSYS; }
589
590static inline void kernfs_free_fs_context(struct fs_context *fc) { }
591
592static inline void kernfs_kill_sb(struct super_block *sb) { }
593
594static inline void kernfs_init(void) { }
595
596#endif /* CONFIG_KERNFS */
597
598/**
599 * kernfs_path - build full path of a given node
600 * @kn: kernfs_node of interest
601 * @buf: buffer to copy @kn's name into
602 * @buflen: size of @buf
603 *
604 * If @kn is NULL result will be "(null)".
605 *
606 * Returns the length of the full path. If the full length is equal to or
607 * greater than @buflen, @buf contains the truncated path with the trailing
608 * '\0'. On error, -errno is returned.
609 */
610static inline int kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen)
611{
612 return kernfs_path_from_node(kn, NULL, buf, buflen);
613}
614
615static inline struct kernfs_node *
616kernfs_find_and_get(struct kernfs_node *kn, const char *name)
617{
618 return kernfs_find_and_get_ns(kn, name, NULL);
619}
620
621static inline struct kernfs_node *
622kernfs_walk_and_get(struct kernfs_node *kn, const char *path)
623{
624 return kernfs_walk_and_get_ns(kn, path, NULL);
625}
626
627static inline struct kernfs_node *
628kernfs_create_dir(struct kernfs_node *parent, const char *name, umode_t mode,
629 void *priv)
630{
631 return kernfs_create_dir_ns(parent, name, mode,
632 GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
633 priv, NULL);
634}
635
636static inline int kernfs_remove_by_name(struct kernfs_node *parent,
637 const char *name)
638{
639 return kernfs_remove_by_name_ns(parent, name, NULL);
640}
641
642static inline int kernfs_rename(struct kernfs_node *kn,
643 struct kernfs_node *new_parent,
644 const char *new_name)
645{
646 return kernfs_rename_ns(kn, new_parent, new_name, NULL);
647}
648
649#endif /* __LINUX_KERNFS_H */