Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * net-sysfs.c - network device class and attributes
4 *
5 * Copyright (c) 2003 Stephen Hemminger <shemminger@osdl.org>
6 */
7
8#include <linux/capability.h>
9#include <linux/kernel.h>
10#include <linux/netdevice.h>
11#include <linux/if_arp.h>
12#include <linux/slab.h>
13#include <linux/sched/signal.h>
14#include <linux/sched/isolation.h>
15#include <linux/nsproxy.h>
16#include <net/sock.h>
17#include <net/net_namespace.h>
18#include <linux/rtnetlink.h>
19#include <linux/vmalloc.h>
20#include <linux/export.h>
21#include <linux/jiffies.h>
22#include <linux/pm_runtime.h>
23#include <linux/of.h>
24#include <linux/of_net.h>
25#include <linux/cpu.h>
26#include <net/netdev_lock.h>
27#include <net/netdev_rx_queue.h>
28#include <net/rps.h>
29
30#include "dev.h"
31#include "net-sysfs.h"
32
33#ifdef CONFIG_SYSFS
34static const char fmt_hex[] = "%#x\n";
35static const char fmt_dec[] = "%d\n";
36static const char fmt_uint[] = "%u\n";
37static const char fmt_ulong[] = "%lu\n";
38static const char fmt_u64[] = "%llu\n";
39
40/* Caller holds RTNL, netdev->lock or RCU */
41static inline int dev_isalive(const struct net_device *dev)
42{
43 return READ_ONCE(dev->reg_state) <= NETREG_REGISTERED;
44}
45
46/* There is a possible ABBA deadlock between rtnl_lock and kernfs_node->active,
47 * when unregistering a net device and accessing associated sysfs files. The
48 * potential deadlock is as follow:
49 *
50 * CPU 0 CPU 1
51 *
52 * rtnl_lock vfs_read
53 * unregister_netdevice_many kernfs_seq_start
54 * device_del / kobject_put kernfs_get_active (kn->active++)
55 * kernfs_drain sysfs_kf_seq_show
56 * wait_event( rtnl_lock
57 * kn->active == KN_DEACTIVATED_BIAS) -> waits on CPU 0 to release
58 * -> waits on CPU 1 to decrease kn->active the rtnl lock.
59 *
60 * The historical fix was to use rtnl_trylock with restart_syscall to bail out
61 * of sysfs operations when the lock couldn't be taken. This fixed the above
62 * issue as it allowed CPU 1 to bail out of the ABBA situation.
63 *
64 * But it came with performances issues, as syscalls are being restarted in
65 * loops when there was contention on the rtnl lock, with huge slow downs in
66 * specific scenarios (e.g. lots of virtual interfaces created and userspace
67 * daemons querying their attributes).
68 *
69 * The idea below is to bail out of the active kernfs_node protection
70 * (kn->active) while trying to take the rtnl lock.
71 *
72 * This replaces rtnl_lock() and still has to be used with rtnl_unlock(). The
73 * net device is guaranteed to be alive if this returns successfully.
74 */
75static int sysfs_rtnl_lock(struct kobject *kobj, struct attribute *attr,
76 struct net_device *ndev)
77{
78 struct kernfs_node *kn;
79 int ret = 0;
80
81 /* First, we hold a reference to the net device as the unregistration
82 * path might run in parallel. This will ensure the net device and the
83 * associated sysfs objects won't be freed while we try to take the rtnl
84 * lock.
85 */
86 dev_hold(ndev);
87 /* sysfs_break_active_protection was introduced to allow self-removal of
88 * devices and their associated sysfs files by bailing out of the
89 * sysfs/kernfs protection. We do this here to allow the unregistration
90 * path to complete in parallel. The following takes a reference on the
91 * kobject and the kernfs_node being accessed.
92 *
93 * This works because we hold a reference onto the net device and the
94 * unregistration path will wait for us eventually in netdev_run_todo
95 * (outside an rtnl lock section).
96 */
97 kn = sysfs_break_active_protection(kobj, attr);
98 /* We can now try to take the rtnl lock. This can't deadlock us as the
99 * unregistration path is able to drain sysfs files (kernfs_node) thanks
100 * to the above dance.
101 */
102 if (rtnl_lock_interruptible()) {
103 ret = -ERESTARTSYS;
104 goto unbreak;
105 }
106 /* Check dismantle on the device hasn't started, otherwise deny the
107 * operation.
108 */
109 if (!dev_isalive(ndev)) {
110 rtnl_unlock();
111 ret = -ENODEV;
112 goto unbreak;
113 }
114 /* We are now sure the device dismantle hasn't started nor that it can
115 * start before we exit the locking section as we hold the rtnl lock.
116 * There's no need to keep unbreaking the sysfs protection nor to hold
117 * a net device reference from that point; that was only needed to take
118 * the rtnl lock.
119 */
120unbreak:
121 sysfs_unbreak_active_protection(kn);
122 dev_put(ndev);
123
124 return ret;
125}
126
127/* use same locking rules as GIF* ioctl's */
128static ssize_t netdev_show(const struct device *dev,
129 struct device_attribute *attr, char *buf,
130 ssize_t (*format)(const struct net_device *, char *))
131{
132 struct net_device *ndev = to_net_dev(dev);
133 ssize_t ret = -EINVAL;
134
135 rcu_read_lock();
136 if (dev_isalive(ndev))
137 ret = (*format)(ndev, buf);
138 rcu_read_unlock();
139
140 return ret;
141}
142
143/* generate a show function for simple field */
144#define NETDEVICE_SHOW(field, format_string) \
145static ssize_t format_##field(const struct net_device *dev, char *buf) \
146{ \
147 return sysfs_emit(buf, format_string, READ_ONCE(dev->field)); \
148} \
149static ssize_t field##_show(struct device *dev, \
150 struct device_attribute *attr, char *buf) \
151{ \
152 return netdev_show(dev, attr, buf, format_##field); \
153} \
154
155#define NETDEVICE_SHOW_RO(field, format_string) \
156NETDEVICE_SHOW(field, format_string); \
157static DEVICE_ATTR_RO(field)
158
159#define NETDEVICE_SHOW_RW(field, format_string) \
160NETDEVICE_SHOW(field, format_string); \
161static DEVICE_ATTR_RW(field)
162
163/* use same locking and permission rules as SIF* ioctl's */
164static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
165 const char *buf, size_t len,
166 int (*set)(struct net_device *, unsigned long))
167{
168 struct net_device *netdev = to_net_dev(dev);
169 struct net *net = dev_net(netdev);
170 unsigned long new;
171 int ret;
172
173 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
174 return -EPERM;
175
176 ret = kstrtoul(buf, 0, &new);
177 if (ret)
178 goto err;
179
180 ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
181 if (ret)
182 goto err;
183
184 ret = (*set)(netdev, new);
185 if (ret == 0)
186 ret = len;
187
188 rtnl_unlock();
189 err:
190 return ret;
191}
192
193/* Same as netdev_store() but takes netdev_lock() instead of rtnl_lock() */
194static ssize_t
195netdev_lock_store(struct device *dev, struct device_attribute *attr,
196 const char *buf, size_t len,
197 int (*set)(struct net_device *, unsigned long))
198{
199 struct net_device *netdev = to_net_dev(dev);
200 struct net *net = dev_net(netdev);
201 unsigned long new;
202 int ret;
203
204 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
205 return -EPERM;
206
207 ret = kstrtoul(buf, 0, &new);
208 if (ret)
209 return ret;
210
211 netdev_lock(netdev);
212
213 if (dev_isalive(netdev)) {
214 ret = (*set)(netdev, new);
215 if (ret == 0)
216 ret = len;
217 }
218 netdev_unlock(netdev);
219
220 return ret;
221}
222
223NETDEVICE_SHOW_RO(dev_id, fmt_hex);
224NETDEVICE_SHOW_RO(dev_port, fmt_dec);
225NETDEVICE_SHOW_RO(addr_assign_type, fmt_dec);
226NETDEVICE_SHOW_RO(addr_len, fmt_dec);
227NETDEVICE_SHOW_RO(ifindex, fmt_dec);
228NETDEVICE_SHOW_RO(type, fmt_dec);
229NETDEVICE_SHOW_RO(link_mode, fmt_dec);
230
231static ssize_t iflink_show(struct device *dev, struct device_attribute *attr,
232 char *buf)
233{
234 struct net_device *ndev = to_net_dev(dev);
235
236 return sysfs_emit(buf, fmt_dec, dev_get_iflink(ndev));
237}
238static DEVICE_ATTR_RO(iflink);
239
240static ssize_t format_name_assign_type(const struct net_device *dev, char *buf)
241{
242 return sysfs_emit(buf, fmt_dec, READ_ONCE(dev->name_assign_type));
243}
244
245static ssize_t name_assign_type_show(struct device *dev,
246 struct device_attribute *attr,
247 char *buf)
248{
249 struct net_device *ndev = to_net_dev(dev);
250 ssize_t ret = -EINVAL;
251
252 if (READ_ONCE(ndev->name_assign_type) != NET_NAME_UNKNOWN)
253 ret = netdev_show(dev, attr, buf, format_name_assign_type);
254
255 return ret;
256}
257static DEVICE_ATTR_RO(name_assign_type);
258
259/* use same locking rules as GIFHWADDR ioctl's (netif_get_mac_address()) */
260static ssize_t address_show(struct device *dev, struct device_attribute *attr,
261 char *buf)
262{
263 struct net_device *ndev = to_net_dev(dev);
264 ssize_t ret = -EINVAL;
265
266 down_read(&dev_addr_sem);
267
268 rcu_read_lock();
269 if (dev_isalive(ndev))
270 ret = sysfs_format_mac(buf, ndev->dev_addr, ndev->addr_len);
271 rcu_read_unlock();
272
273 up_read(&dev_addr_sem);
274 return ret;
275}
276static DEVICE_ATTR_RO(address);
277
278static ssize_t broadcast_show(struct device *dev,
279 struct device_attribute *attr, char *buf)
280{
281 struct net_device *ndev = to_net_dev(dev);
282 int ret = -EINVAL;
283
284 rcu_read_lock();
285 if (dev_isalive(ndev))
286 ret = sysfs_format_mac(buf, ndev->broadcast, ndev->addr_len);
287 rcu_read_unlock();
288 return ret;
289}
290static DEVICE_ATTR_RO(broadcast);
291
292static int change_carrier(struct net_device *dev, unsigned long new_carrier)
293{
294 if (!netif_running(dev))
295 return -EINVAL;
296 return dev_change_carrier(dev, (bool)new_carrier);
297}
298
299static ssize_t carrier_store(struct device *dev, struct device_attribute *attr,
300 const char *buf, size_t len)
301{
302 struct net_device *netdev = to_net_dev(dev);
303
304 /* The check is also done in change_carrier; this helps returning early
305 * without hitting the locking section in netdev_store.
306 */
307 if (!netdev->netdev_ops->ndo_change_carrier)
308 return -EOPNOTSUPP;
309
310 return netdev_store(dev, attr, buf, len, change_carrier);
311}
312
313static ssize_t carrier_show(struct device *dev,
314 struct device_attribute *attr, char *buf)
315{
316 struct net_device *netdev = to_net_dev(dev);
317 int ret;
318
319 ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
320 if (ret)
321 return ret;
322
323 ret = -EINVAL;
324 if (netif_running(netdev)) {
325 /* Synchronize carrier state with link watch,
326 * see also rtnl_getlink().
327 */
328 linkwatch_sync_dev(netdev);
329
330 ret = sysfs_emit(buf, fmt_dec, !!netif_carrier_ok(netdev));
331 }
332
333 rtnl_unlock();
334 return ret;
335}
336static DEVICE_ATTR_RW(carrier);
337
338static ssize_t speed_show(struct device *dev,
339 struct device_attribute *attr, char *buf)
340{
341 struct net_device *netdev = to_net_dev(dev);
342 int ret = -EINVAL;
343
344 /* The check is also done in __ethtool_get_link_ksettings; this helps
345 * returning early without hitting the locking section below.
346 */
347 if (!netdev->ethtool_ops->get_link_ksettings)
348 return ret;
349
350 ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
351 if (ret)
352 return ret;
353
354 ret = -EINVAL;
355 if (netif_running(netdev)) {
356 struct ethtool_link_ksettings cmd;
357
358 if (!__ethtool_get_link_ksettings(netdev, &cmd))
359 ret = sysfs_emit(buf, fmt_dec, cmd.base.speed);
360 }
361 rtnl_unlock();
362 return ret;
363}
364static DEVICE_ATTR_RO(speed);
365
366static ssize_t duplex_show(struct device *dev,
367 struct device_attribute *attr, char *buf)
368{
369 struct net_device *netdev = to_net_dev(dev);
370 int ret = -EINVAL;
371
372 /* The check is also done in __ethtool_get_link_ksettings; this helps
373 * returning early without hitting the locking section below.
374 */
375 if (!netdev->ethtool_ops->get_link_ksettings)
376 return ret;
377
378 ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
379 if (ret)
380 return ret;
381
382 ret = -EINVAL;
383 if (netif_running(netdev)) {
384 struct ethtool_link_ksettings cmd;
385
386 if (!__ethtool_get_link_ksettings(netdev, &cmd)) {
387 const char *duplex;
388
389 switch (cmd.base.duplex) {
390 case DUPLEX_HALF:
391 duplex = "half";
392 break;
393 case DUPLEX_FULL:
394 duplex = "full";
395 break;
396 default:
397 duplex = "unknown";
398 break;
399 }
400 ret = sysfs_emit(buf, "%s\n", duplex);
401 }
402 }
403 rtnl_unlock();
404 return ret;
405}
406static DEVICE_ATTR_RO(duplex);
407
408static ssize_t testing_show(struct device *dev,
409 struct device_attribute *attr, char *buf)
410{
411 struct net_device *netdev = to_net_dev(dev);
412
413 if (netif_running(netdev))
414 return sysfs_emit(buf, fmt_dec, !!netif_testing(netdev));
415
416 return -EINVAL;
417}
418static DEVICE_ATTR_RO(testing);
419
420static ssize_t dormant_show(struct device *dev,
421 struct device_attribute *attr, char *buf)
422{
423 struct net_device *netdev = to_net_dev(dev);
424
425 if (netif_running(netdev))
426 return sysfs_emit(buf, fmt_dec, !!netif_dormant(netdev));
427
428 return -EINVAL;
429}
430static DEVICE_ATTR_RO(dormant);
431
432static const char *const operstates[] = {
433 "unknown",
434 "notpresent", /* currently unused */
435 "down",
436 "lowerlayerdown",
437 "testing",
438 "dormant",
439 "up"
440};
441
442static ssize_t operstate_show(struct device *dev,
443 struct device_attribute *attr, char *buf)
444{
445 const struct net_device *netdev = to_net_dev(dev);
446 unsigned char operstate;
447
448 operstate = READ_ONCE(netdev->operstate);
449 if (!netif_running(netdev))
450 operstate = IF_OPER_DOWN;
451
452 if (operstate >= ARRAY_SIZE(operstates))
453 return -EINVAL; /* should not happen */
454
455 return sysfs_emit(buf, "%s\n", operstates[operstate]);
456}
457static DEVICE_ATTR_RO(operstate);
458
459static ssize_t carrier_changes_show(struct device *dev,
460 struct device_attribute *attr,
461 char *buf)
462{
463 struct net_device *netdev = to_net_dev(dev);
464
465 return sysfs_emit(buf, fmt_dec,
466 atomic_read(&netdev->carrier_up_count) +
467 atomic_read(&netdev->carrier_down_count));
468}
469static DEVICE_ATTR_RO(carrier_changes);
470
471static ssize_t carrier_up_count_show(struct device *dev,
472 struct device_attribute *attr,
473 char *buf)
474{
475 struct net_device *netdev = to_net_dev(dev);
476
477 return sysfs_emit(buf, fmt_dec, atomic_read(&netdev->carrier_up_count));
478}
479static DEVICE_ATTR_RO(carrier_up_count);
480
481static ssize_t carrier_down_count_show(struct device *dev,
482 struct device_attribute *attr,
483 char *buf)
484{
485 struct net_device *netdev = to_net_dev(dev);
486
487 return sysfs_emit(buf, fmt_dec, atomic_read(&netdev->carrier_down_count));
488}
489static DEVICE_ATTR_RO(carrier_down_count);
490
491/* read-write attributes */
492
493static int change_mtu(struct net_device *dev, unsigned long new_mtu)
494{
495 return dev_set_mtu(dev, (int)new_mtu);
496}
497
498static ssize_t mtu_store(struct device *dev, struct device_attribute *attr,
499 const char *buf, size_t len)
500{
501 return netdev_store(dev, attr, buf, len, change_mtu);
502}
503NETDEVICE_SHOW_RW(mtu, fmt_dec);
504
505static int change_flags(struct net_device *dev, unsigned long new_flags)
506{
507 return dev_change_flags(dev, (unsigned int)new_flags, NULL);
508}
509
510static ssize_t flags_store(struct device *dev, struct device_attribute *attr,
511 const char *buf, size_t len)
512{
513 return netdev_store(dev, attr, buf, len, change_flags);
514}
515NETDEVICE_SHOW_RW(flags, fmt_hex);
516
517static ssize_t tx_queue_len_store(struct device *dev,
518 struct device_attribute *attr,
519 const char *buf, size_t len)
520{
521 if (!capable(CAP_NET_ADMIN))
522 return -EPERM;
523
524 return netdev_store(dev, attr, buf, len, dev_change_tx_queue_len);
525}
526NETDEVICE_SHOW_RW(tx_queue_len, fmt_dec);
527
528static int change_gro_flush_timeout(struct net_device *dev, unsigned long val)
529{
530 netdev_set_gro_flush_timeout(dev, val);
531 return 0;
532}
533
534static ssize_t gro_flush_timeout_store(struct device *dev,
535 struct device_attribute *attr,
536 const char *buf, size_t len)
537{
538 if (!capable(CAP_NET_ADMIN))
539 return -EPERM;
540
541 return netdev_lock_store(dev, attr, buf, len, change_gro_flush_timeout);
542}
543NETDEVICE_SHOW_RW(gro_flush_timeout, fmt_ulong);
544
545static int change_napi_defer_hard_irqs(struct net_device *dev, unsigned long val)
546{
547 if (val > S32_MAX)
548 return -ERANGE;
549
550 netdev_set_defer_hard_irqs(dev, (u32)val);
551 return 0;
552}
553
554static ssize_t napi_defer_hard_irqs_store(struct device *dev,
555 struct device_attribute *attr,
556 const char *buf, size_t len)
557{
558 if (!capable(CAP_NET_ADMIN))
559 return -EPERM;
560
561 return netdev_lock_store(dev, attr, buf, len,
562 change_napi_defer_hard_irqs);
563}
564NETDEVICE_SHOW_RW(napi_defer_hard_irqs, fmt_uint);
565
566static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr,
567 const char *buf, size_t len)
568{
569 struct net_device *netdev = to_net_dev(dev);
570 struct net *net = dev_net(netdev);
571 size_t count = len;
572 ssize_t ret;
573
574 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
575 return -EPERM;
576
577 /* ignore trailing newline */
578 if (len > 0 && buf[len - 1] == '\n')
579 --count;
580
581 ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
582 if (ret)
583 return ret;
584
585 ret = dev_set_alias(netdev, buf, count);
586 if (ret < 0)
587 goto err;
588 ret = len;
589 netdev_state_change(netdev);
590err:
591 rtnl_unlock();
592
593 return ret;
594}
595
596static ssize_t ifalias_show(struct device *dev,
597 struct device_attribute *attr, char *buf)
598{
599 const struct net_device *netdev = to_net_dev(dev);
600 char tmp[IFALIASZ];
601 ssize_t ret;
602
603 ret = dev_get_alias(netdev, tmp, sizeof(tmp));
604 if (ret > 0)
605 ret = sysfs_emit(buf, "%s\n", tmp);
606 return ret;
607}
608static DEVICE_ATTR_RW(ifalias);
609
610static int change_group(struct net_device *dev, unsigned long new_group)
611{
612 dev_set_group(dev, (int)new_group);
613 return 0;
614}
615
616static ssize_t group_store(struct device *dev, struct device_attribute *attr,
617 const char *buf, size_t len)
618{
619 return netdev_store(dev, attr, buf, len, change_group);
620}
621NETDEVICE_SHOW(group, fmt_dec);
622static DEVICE_ATTR(netdev_group, 0644, group_show, group_store);
623
624static int change_proto_down(struct net_device *dev, unsigned long proto_down)
625{
626 return dev_change_proto_down(dev, (bool)proto_down);
627}
628
629static ssize_t proto_down_store(struct device *dev,
630 struct device_attribute *attr,
631 const char *buf, size_t len)
632{
633 return netdev_store(dev, attr, buf, len, change_proto_down);
634}
635NETDEVICE_SHOW_RW(proto_down, fmt_dec);
636
637static ssize_t phys_port_id_show(struct device *dev,
638 struct device_attribute *attr, char *buf)
639{
640 struct net_device *netdev = to_net_dev(dev);
641 struct netdev_phys_item_id ppid;
642 ssize_t ret;
643
644 ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
645 if (ret)
646 return ret;
647
648 ret = dev_get_phys_port_id(netdev, &ppid);
649 if (!ret)
650 ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id);
651
652 rtnl_unlock();
653
654 return ret;
655}
656static DEVICE_ATTR_RO(phys_port_id);
657
658static ssize_t phys_port_name_show(struct device *dev,
659 struct device_attribute *attr, char *buf)
660{
661 struct net_device *netdev = to_net_dev(dev);
662 char name[IFNAMSIZ];
663 ssize_t ret;
664
665 ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
666 if (ret)
667 return ret;
668
669 ret = dev_get_phys_port_name(netdev, name, sizeof(name));
670 if (!ret)
671 ret = sysfs_emit(buf, "%s\n", name);
672
673 rtnl_unlock();
674
675 return ret;
676}
677static DEVICE_ATTR_RO(phys_port_name);
678
679static ssize_t phys_switch_id_show(struct device *dev,
680 struct device_attribute *attr, char *buf)
681{
682 struct net_device *netdev = to_net_dev(dev);
683 struct netdev_phys_item_id ppid = { };
684 ssize_t ret;
685
686 ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
687 if (ret)
688 return ret;
689
690 ret = netif_get_port_parent_id(netdev, &ppid, false);
691 if (!ret)
692 ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id);
693
694 rtnl_unlock();
695
696 return ret;
697}
698static DEVICE_ATTR_RO(phys_switch_id);
699
700static struct attribute *netdev_phys_attrs[] __ro_after_init = {
701 &dev_attr_phys_port_id.attr,
702 &dev_attr_phys_port_name.attr,
703 &dev_attr_phys_switch_id.attr,
704 NULL,
705};
706
707static umode_t netdev_phys_is_visible(struct kobject *kobj,
708 struct attribute *attr, int index)
709{
710 struct device *dev = kobj_to_dev(kobj);
711 struct net_device *netdev = to_net_dev(dev);
712
713 if (attr == &dev_attr_phys_port_id.attr) {
714 if (!netdev->netdev_ops->ndo_get_phys_port_id)
715 return 0;
716 } else if (attr == &dev_attr_phys_port_name.attr) {
717 if (!netdev->netdev_ops->ndo_get_phys_port_name &&
718 !netdev->devlink_port)
719 return 0;
720 } else if (attr == &dev_attr_phys_switch_id.attr) {
721 if (!netdev->netdev_ops->ndo_get_port_parent_id &&
722 !netdev->devlink_port)
723 return 0;
724 }
725
726 return attr->mode;
727}
728
729static const struct attribute_group netdev_phys_group = {
730 .attrs = netdev_phys_attrs,
731 .is_visible = netdev_phys_is_visible,
732};
733
734static ssize_t threaded_show(struct device *dev,
735 struct device_attribute *attr, char *buf)
736{
737 struct net_device *netdev = to_net_dev(dev);
738 ssize_t ret = -EINVAL;
739
740 rcu_read_lock();
741
742 if (dev_isalive(netdev))
743 ret = sysfs_emit(buf, fmt_dec, READ_ONCE(netdev->threaded));
744
745 rcu_read_unlock();
746
747 return ret;
748}
749
750static int modify_napi_threaded(struct net_device *dev, unsigned long val)
751{
752 int ret;
753
754 if (list_empty(&dev->napi_list))
755 return -EOPNOTSUPP;
756
757 if (val != 0 && val != 1)
758 return -EOPNOTSUPP;
759
760 ret = netif_set_threaded(dev, val);
761
762 return ret;
763}
764
765static ssize_t threaded_store(struct device *dev,
766 struct device_attribute *attr,
767 const char *buf, size_t len)
768{
769 return netdev_lock_store(dev, attr, buf, len, modify_napi_threaded);
770}
771static DEVICE_ATTR_RW(threaded);
772
773static struct attribute *net_class_attrs[] __ro_after_init = {
774 &dev_attr_netdev_group.attr,
775 &dev_attr_type.attr,
776 &dev_attr_dev_id.attr,
777 &dev_attr_dev_port.attr,
778 &dev_attr_iflink.attr,
779 &dev_attr_ifindex.attr,
780 &dev_attr_name_assign_type.attr,
781 &dev_attr_addr_assign_type.attr,
782 &dev_attr_addr_len.attr,
783 &dev_attr_link_mode.attr,
784 &dev_attr_address.attr,
785 &dev_attr_broadcast.attr,
786 &dev_attr_speed.attr,
787 &dev_attr_duplex.attr,
788 &dev_attr_dormant.attr,
789 &dev_attr_testing.attr,
790 &dev_attr_operstate.attr,
791 &dev_attr_carrier_changes.attr,
792 &dev_attr_ifalias.attr,
793 &dev_attr_carrier.attr,
794 &dev_attr_mtu.attr,
795 &dev_attr_flags.attr,
796 &dev_attr_tx_queue_len.attr,
797 &dev_attr_gro_flush_timeout.attr,
798 &dev_attr_napi_defer_hard_irqs.attr,
799 &dev_attr_proto_down.attr,
800 &dev_attr_carrier_up_count.attr,
801 &dev_attr_carrier_down_count.attr,
802 &dev_attr_threaded.attr,
803 NULL,
804};
805ATTRIBUTE_GROUPS(net_class);
806
807/* Show a given an attribute in the statistics group */
808static ssize_t netstat_show(const struct device *d,
809 struct device_attribute *attr, char *buf,
810 unsigned long offset)
811{
812 struct net_device *dev = to_net_dev(d);
813 ssize_t ret = -EINVAL;
814
815 WARN_ON(offset > sizeof(struct rtnl_link_stats64) ||
816 offset % sizeof(u64) != 0);
817
818 rcu_read_lock();
819 if (dev_isalive(dev)) {
820 struct rtnl_link_stats64 temp;
821 const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
822
823 ret = sysfs_emit(buf, fmt_u64, *(u64 *)(((u8 *)stats) + offset));
824 }
825 rcu_read_unlock();
826 return ret;
827}
828
829/* generate a read-only statistics attribute */
830#define NETSTAT_ENTRY(name) \
831static ssize_t name##_show(struct device *d, \
832 struct device_attribute *attr, char *buf) \
833{ \
834 return netstat_show(d, attr, buf, \
835 offsetof(struct rtnl_link_stats64, name)); \
836} \
837static DEVICE_ATTR_RO(name)
838
839NETSTAT_ENTRY(rx_packets);
840NETSTAT_ENTRY(tx_packets);
841NETSTAT_ENTRY(rx_bytes);
842NETSTAT_ENTRY(tx_bytes);
843NETSTAT_ENTRY(rx_errors);
844NETSTAT_ENTRY(tx_errors);
845NETSTAT_ENTRY(rx_dropped);
846NETSTAT_ENTRY(tx_dropped);
847NETSTAT_ENTRY(multicast);
848NETSTAT_ENTRY(collisions);
849NETSTAT_ENTRY(rx_length_errors);
850NETSTAT_ENTRY(rx_over_errors);
851NETSTAT_ENTRY(rx_crc_errors);
852NETSTAT_ENTRY(rx_frame_errors);
853NETSTAT_ENTRY(rx_fifo_errors);
854NETSTAT_ENTRY(rx_missed_errors);
855NETSTAT_ENTRY(tx_aborted_errors);
856NETSTAT_ENTRY(tx_carrier_errors);
857NETSTAT_ENTRY(tx_fifo_errors);
858NETSTAT_ENTRY(tx_heartbeat_errors);
859NETSTAT_ENTRY(tx_window_errors);
860NETSTAT_ENTRY(rx_compressed);
861NETSTAT_ENTRY(tx_compressed);
862NETSTAT_ENTRY(rx_nohandler);
863
864static struct attribute *netstat_attrs[] __ro_after_init = {
865 &dev_attr_rx_packets.attr,
866 &dev_attr_tx_packets.attr,
867 &dev_attr_rx_bytes.attr,
868 &dev_attr_tx_bytes.attr,
869 &dev_attr_rx_errors.attr,
870 &dev_attr_tx_errors.attr,
871 &dev_attr_rx_dropped.attr,
872 &dev_attr_tx_dropped.attr,
873 &dev_attr_multicast.attr,
874 &dev_attr_collisions.attr,
875 &dev_attr_rx_length_errors.attr,
876 &dev_attr_rx_over_errors.attr,
877 &dev_attr_rx_crc_errors.attr,
878 &dev_attr_rx_frame_errors.attr,
879 &dev_attr_rx_fifo_errors.attr,
880 &dev_attr_rx_missed_errors.attr,
881 &dev_attr_tx_aborted_errors.attr,
882 &dev_attr_tx_carrier_errors.attr,
883 &dev_attr_tx_fifo_errors.attr,
884 &dev_attr_tx_heartbeat_errors.attr,
885 &dev_attr_tx_window_errors.attr,
886 &dev_attr_rx_compressed.attr,
887 &dev_attr_tx_compressed.attr,
888 &dev_attr_rx_nohandler.attr,
889 NULL
890};
891
892static const struct attribute_group netstat_group = {
893 .name = "statistics",
894 .attrs = netstat_attrs,
895};
896
897static struct attribute *wireless_attrs[] = {
898 NULL
899};
900
901static const struct attribute_group wireless_group = {
902 .name = "wireless",
903 .attrs = wireless_attrs,
904};
905
906static bool wireless_group_needed(struct net_device *ndev)
907{
908#if IS_ENABLED(CONFIG_CFG80211)
909 if (ndev->ieee80211_ptr)
910 return true;
911#endif
912#if IS_ENABLED(CONFIG_WIRELESS_EXT)
913 if (ndev->wireless_handlers)
914 return true;
915#endif
916 return false;
917}
918
919#else /* CONFIG_SYSFS */
920#define net_class_groups NULL
921#endif /* CONFIG_SYSFS */
922
923#ifdef CONFIG_SYSFS
924#define to_rx_queue_attr(_attr) \
925 container_of(_attr, struct rx_queue_attribute, attr)
926
927#define to_rx_queue(obj) container_of(obj, struct netdev_rx_queue, kobj)
928
929static ssize_t rx_queue_attr_show(struct kobject *kobj, struct attribute *attr,
930 char *buf)
931{
932 const struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
933 struct netdev_rx_queue *queue = to_rx_queue(kobj);
934
935 if (!attribute->show)
936 return -EIO;
937
938 return attribute->show(queue, buf);
939}
940
941static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr,
942 const char *buf, size_t count)
943{
944 const struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
945 struct netdev_rx_queue *queue = to_rx_queue(kobj);
946
947 if (!attribute->store)
948 return -EIO;
949
950 return attribute->store(queue, buf, count);
951}
952
953static const struct sysfs_ops rx_queue_sysfs_ops = {
954 .show = rx_queue_attr_show,
955 .store = rx_queue_attr_store,
956};
957
958#ifdef CONFIG_RPS
959static ssize_t show_rps_map(struct netdev_rx_queue *queue, char *buf)
960{
961 struct rps_map *map;
962 cpumask_var_t mask;
963 int i, len;
964
965 if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
966 return -ENOMEM;
967
968 rcu_read_lock();
969 map = rcu_dereference(queue->rps_map);
970 if (map)
971 for (i = 0; i < map->len; i++)
972 cpumask_set_cpu(map->cpus[i], mask);
973
974 len = sysfs_emit(buf, "%*pb\n", cpumask_pr_args(mask));
975 rcu_read_unlock();
976 free_cpumask_var(mask);
977
978 return len < PAGE_SIZE ? len : -EINVAL;
979}
980
981static int netdev_rx_queue_set_rps_mask(struct netdev_rx_queue *queue,
982 cpumask_var_t mask)
983{
984 static DEFINE_MUTEX(rps_map_mutex);
985 struct rps_map *old_map, *map;
986 int cpu, i;
987
988 map = kzalloc(max_t(unsigned int,
989 RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES),
990 GFP_KERNEL);
991 if (!map)
992 return -ENOMEM;
993
994 i = 0;
995 for_each_cpu_and(cpu, mask, cpu_online_mask)
996 map->cpus[i++] = cpu;
997
998 if (i) {
999 map->len = i;
1000 } else {
1001 kfree(map);
1002 map = NULL;
1003 }
1004
1005 mutex_lock(&rps_map_mutex);
1006 old_map = rcu_dereference_protected(queue->rps_map,
1007 mutex_is_locked(&rps_map_mutex));
1008 rcu_assign_pointer(queue->rps_map, map);
1009
1010 if (map)
1011 static_branch_inc(&rps_needed);
1012 if (old_map)
1013 static_branch_dec(&rps_needed);
1014
1015 mutex_unlock(&rps_map_mutex);
1016
1017 if (old_map)
1018 kfree_rcu(old_map, rcu);
1019 return 0;
1020}
1021
1022int rps_cpumask_housekeeping(struct cpumask *mask)
1023{
1024 if (!cpumask_empty(mask)) {
1025 cpumask_and(mask, mask, housekeeping_cpumask(HK_TYPE_DOMAIN_BOOT));
1026 cpumask_and(mask, mask, housekeeping_cpumask(HK_TYPE_WQ));
1027 if (cpumask_empty(mask))
1028 return -EINVAL;
1029 }
1030 return 0;
1031}
1032
1033static ssize_t store_rps_map(struct netdev_rx_queue *queue,
1034 const char *buf, size_t len)
1035{
1036 cpumask_var_t mask;
1037 int err;
1038
1039 if (!capable(CAP_NET_ADMIN))
1040 return -EPERM;
1041
1042 if (!alloc_cpumask_var(&mask, GFP_KERNEL))
1043 return -ENOMEM;
1044
1045 err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
1046 if (err)
1047 goto out;
1048
1049 err = rps_cpumask_housekeeping(mask);
1050 if (err)
1051 goto out;
1052
1053 err = netdev_rx_queue_set_rps_mask(queue, mask);
1054
1055out:
1056 free_cpumask_var(mask);
1057 return err ? : len;
1058}
1059
1060static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
1061 char *buf)
1062{
1063 unsigned long val = 0;
1064 rps_tag_ptr tag_ptr;
1065
1066 tag_ptr = READ_ONCE(queue->rps_flow_table);
1067 if (tag_ptr)
1068 val = 1UL << rps_tag_to_log(tag_ptr);
1069
1070 return sysfs_emit(buf, "%lu\n", val);
1071}
1072
1073static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
1074 const char *buf, size_t len)
1075{
1076 rps_tag_ptr otag, tag_ptr = 0UL;
1077 struct rps_dev_flow *table;
1078 unsigned long mask, count;
1079 size_t sz;
1080 int rc;
1081
1082 if (!capable(CAP_NET_ADMIN))
1083 return -EPERM;
1084
1085 rc = kstrtoul(buf, 0, &count);
1086 if (rc < 0)
1087 return rc;
1088
1089 if (count) {
1090 mask = count - 1;
1091 /* mask = roundup_pow_of_two(count) - 1;
1092 * without overflows...
1093 */
1094 while ((mask | (mask >> 1)) != mask)
1095 mask |= (mask >> 1);
1096
1097 /* Do not accept too large tables. */
1098 if (mask > (INT_MAX / sizeof(*table) - 1))
1099 return -EINVAL;
1100
1101 sz = max_t(size_t, sizeof(*table) * (mask + 1),
1102 PAGE_SIZE);
1103 if (sz <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER) ||
1104 is_power_of_2(sizeof(*table)))
1105 table = kvmalloc(sz, GFP_KERNEL);
1106 else
1107 table = vmalloc(sz);
1108 if (!table)
1109 return -ENOMEM;
1110 tag_ptr = (rps_tag_ptr)table;
1111 if (rps_tag_to_log(tag_ptr)) {
1112 pr_err_once("store_rps_dev_flow_table_cnt() got a non page aligned allocation.\n");
1113 kvfree(table);
1114 return -ENOMEM;
1115 }
1116 tag_ptr |= (ilog2(mask) + 1);
1117 for (count = 0; count <= mask; count++) {
1118 table[count].cpu = RPS_NO_CPU;
1119 table[count].filter = RPS_NO_FILTER;
1120 }
1121 }
1122
1123 otag = xchg(&queue->rps_flow_table, tag_ptr);
1124 if (otag)
1125 kvfree_rcu_mightsleep(rps_tag_to_table(otag));
1126
1127 return len;
1128}
1129
1130static struct rx_queue_attribute rps_cpus_attribute __ro_after_init
1131 = __ATTR(rps_cpus, 0644, show_rps_map, store_rps_map);
1132
1133static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute __ro_after_init
1134 = __ATTR(rps_flow_cnt, 0644,
1135 show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt);
1136#endif /* CONFIG_RPS */
1137
1138static struct attribute *rx_queue_default_attrs[] __ro_after_init = {
1139#ifdef CONFIG_RPS
1140 &rps_cpus_attribute.attr,
1141 &rps_dev_flow_table_cnt_attribute.attr,
1142#endif
1143 NULL
1144};
1145ATTRIBUTE_GROUPS(rx_queue_default);
1146
1147static void rx_queue_release(struct kobject *kobj)
1148{
1149 struct netdev_rx_queue *queue = to_rx_queue(kobj);
1150#ifdef CONFIG_RPS
1151 rps_tag_ptr tag_ptr;
1152 struct rps_map *map;
1153
1154 map = rcu_dereference_protected(queue->rps_map, 1);
1155 if (map) {
1156 RCU_INIT_POINTER(queue->rps_map, NULL);
1157 kfree_rcu(map, rcu);
1158 }
1159
1160 tag_ptr = xchg(&queue->rps_flow_table, 0UL);
1161 if (tag_ptr)
1162 kvfree_rcu_mightsleep(rps_tag_to_table(tag_ptr));
1163#endif
1164
1165 memset(kobj, 0, sizeof(*kobj));
1166 netdev_put(queue->dev, &queue->dev_tracker);
1167}
1168
1169static const struct ns_common *rx_queue_namespace(const struct kobject *kobj)
1170{
1171 struct netdev_rx_queue *queue = to_rx_queue(kobj);
1172 struct device *dev = &queue->dev->dev;
1173
1174 if (dev->class && dev->class->namespace)
1175 return dev->class->namespace(dev);
1176
1177 return NULL;
1178}
1179
1180static void rx_queue_get_ownership(const struct kobject *kobj,
1181 kuid_t *uid, kgid_t *gid)
1182{
1183 const struct ns_common *ns = rx_queue_namespace(kobj);
1184
1185 net_ns_get_ownership(ns ? container_of(ns, struct net, ns) : NULL,
1186 uid, gid);
1187}
1188
1189static const struct kobj_type rx_queue_ktype = {
1190 .sysfs_ops = &rx_queue_sysfs_ops,
1191 .release = rx_queue_release,
1192 .namespace = rx_queue_namespace,
1193 .get_ownership = rx_queue_get_ownership,
1194};
1195
1196static int rx_queue_default_mask(struct net_device *dev,
1197 struct netdev_rx_queue *queue)
1198{
1199#if IS_ENABLED(CONFIG_RPS) && IS_ENABLED(CONFIG_SYSCTL)
1200 struct cpumask *rps_default_mask;
1201 int res = 0;
1202
1203 mutex_lock(&rps_default_mask_mutex);
1204
1205 rps_default_mask = dev_net(dev)->core.rps_default_mask;
1206 if (rps_default_mask && !cpumask_empty(rps_default_mask))
1207 res = netdev_rx_queue_set_rps_mask(queue, rps_default_mask);
1208
1209 mutex_unlock(&rps_default_mask_mutex);
1210
1211 return res;
1212#else
1213 return 0;
1214#endif
1215}
1216
1217static int rx_queue_add_kobject(struct net_device *dev, int index)
1218{
1219 struct netdev_rx_queue *queue = dev->_rx + index;
1220 struct kobject *kobj = &queue->kobj;
1221 int error = 0;
1222
1223 /* Rx queues are cleared in rx_queue_release to allow later
1224 * re-registration. This is triggered when their kobj refcount is
1225 * dropped.
1226 *
1227 * If a queue is removed while both a read (or write) operation and a
1228 * the re-addition of the same queue are pending (waiting on rntl_lock)
1229 * it might happen that the re-addition will execute before the read,
1230 * making the initial removal to never happen (queue's kobj refcount
1231 * won't drop enough because of the pending read). In such rare case,
1232 * return to allow the removal operation to complete.
1233 */
1234 if (unlikely(kobj->state_initialized)) {
1235 netdev_warn_once(dev, "Cannot re-add rx queues before their removal completed");
1236 return -EAGAIN;
1237 }
1238
1239 /* Kobject_put later will trigger rx_queue_release call which
1240 * decreases dev refcount: Take that reference here
1241 */
1242 netdev_hold(queue->dev, &queue->dev_tracker, GFP_KERNEL);
1243
1244 kobj->kset = dev->queues_kset;
1245 error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL,
1246 "rx-%u", index);
1247 if (error)
1248 goto err;
1249
1250 queue->groups = rx_queue_default_groups;
1251 error = sysfs_create_groups(kobj, queue->groups);
1252 if (error)
1253 goto err;
1254
1255 if (dev->sysfs_rx_queue_group) {
1256 error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group);
1257 if (error)
1258 goto err_default_groups;
1259 }
1260
1261 error = rx_queue_default_mask(dev, queue);
1262 if (error)
1263 goto err_default_groups;
1264
1265 kobject_uevent(kobj, KOBJ_ADD);
1266
1267 return error;
1268
1269err_default_groups:
1270 sysfs_remove_groups(kobj, queue->groups);
1271err:
1272 kobject_put(kobj);
1273 return error;
1274}
1275
1276static int rx_queue_change_owner(struct net_device *dev, int index, kuid_t kuid,
1277 kgid_t kgid)
1278{
1279 struct netdev_rx_queue *queue = dev->_rx + index;
1280 struct kobject *kobj = &queue->kobj;
1281 int error;
1282
1283 error = sysfs_change_owner(kobj, kuid, kgid);
1284 if (error)
1285 return error;
1286
1287 if (dev->sysfs_rx_queue_group)
1288 error = sysfs_group_change_owner(
1289 kobj, dev->sysfs_rx_queue_group, kuid, kgid);
1290
1291 return error;
1292}
1293#endif /* CONFIG_SYSFS */
1294
1295int
1296net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
1297{
1298#ifdef CONFIG_SYSFS
1299 int i;
1300 int error = 0;
1301
1302#ifndef CONFIG_RPS
1303 if (!dev->sysfs_rx_queue_group)
1304 return 0;
1305#endif
1306 for (i = old_num; i < new_num; i++) {
1307 error = rx_queue_add_kobject(dev, i);
1308 if (error) {
1309 new_num = old_num;
1310 break;
1311 }
1312 }
1313
1314 while (--i >= new_num) {
1315 struct netdev_rx_queue *queue = &dev->_rx[i];
1316 struct kobject *kobj = &queue->kobj;
1317
1318 if (!check_net(dev_net(dev)))
1319 kobj->uevent_suppress = 1;
1320 if (dev->sysfs_rx_queue_group)
1321 sysfs_remove_group(kobj, dev->sysfs_rx_queue_group);
1322 sysfs_remove_groups(kobj, queue->groups);
1323 kobject_put(kobj);
1324 }
1325
1326 return error;
1327#else
1328 return 0;
1329#endif
1330}
1331
1332static int net_rx_queue_change_owner(struct net_device *dev, int num,
1333 kuid_t kuid, kgid_t kgid)
1334{
1335#ifdef CONFIG_SYSFS
1336 int error = 0;
1337 int i;
1338
1339#ifndef CONFIG_RPS
1340 if (!dev->sysfs_rx_queue_group)
1341 return 0;
1342#endif
1343 for (i = 0; i < num; i++) {
1344 error = rx_queue_change_owner(dev, i, kuid, kgid);
1345 if (error)
1346 break;
1347 }
1348
1349 return error;
1350#else
1351 return 0;
1352#endif
1353}
1354
1355#ifdef CONFIG_SYSFS
1356/*
1357 * netdev_queue sysfs structures and functions.
1358 */
1359struct netdev_queue_attribute {
1360 struct attribute attr;
1361 ssize_t (*show)(struct kobject *kobj, struct attribute *attr,
1362 struct netdev_queue *queue, char *buf);
1363 ssize_t (*store)(struct kobject *kobj, struct attribute *attr,
1364 struct netdev_queue *queue, const char *buf,
1365 size_t len);
1366};
1367#define to_netdev_queue_attr(_attr) \
1368 container_of(_attr, struct netdev_queue_attribute, attr)
1369
1370#define to_netdev_queue(obj) container_of(obj, struct netdev_queue, kobj)
1371
1372static ssize_t netdev_queue_attr_show(struct kobject *kobj,
1373 struct attribute *attr, char *buf)
1374{
1375 const struct netdev_queue_attribute *attribute
1376 = to_netdev_queue_attr(attr);
1377 struct netdev_queue *queue = to_netdev_queue(kobj);
1378
1379 if (!attribute->show)
1380 return -EIO;
1381
1382 return attribute->show(kobj, attr, queue, buf);
1383}
1384
1385static ssize_t netdev_queue_attr_store(struct kobject *kobj,
1386 struct attribute *attr,
1387 const char *buf, size_t count)
1388{
1389 const struct netdev_queue_attribute *attribute
1390 = to_netdev_queue_attr(attr);
1391 struct netdev_queue *queue = to_netdev_queue(kobj);
1392
1393 if (!attribute->store)
1394 return -EIO;
1395
1396 return attribute->store(kobj, attr, queue, buf, count);
1397}
1398
1399static const struct sysfs_ops netdev_queue_sysfs_ops = {
1400 .show = netdev_queue_attr_show,
1401 .store = netdev_queue_attr_store,
1402};
1403
1404static ssize_t tx_timeout_show(struct kobject *kobj, struct attribute *attr,
1405 struct netdev_queue *queue, char *buf)
1406{
1407 unsigned long trans_timeout = atomic_long_read(&queue->trans_timeout);
1408
1409 return sysfs_emit(buf, fmt_ulong, trans_timeout);
1410}
1411
1412static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
1413{
1414 struct net_device *dev = queue->dev;
1415 unsigned int i;
1416
1417 i = queue - dev->_tx;
1418 BUG_ON(i >= dev->num_tx_queues);
1419
1420 return i;
1421}
1422
1423static ssize_t traffic_class_show(struct kobject *kobj, struct attribute *attr,
1424 struct netdev_queue *queue, char *buf)
1425{
1426 struct net_device *dev = queue->dev;
1427 int num_tc, tc, index, ret;
1428
1429 if (!netif_is_multiqueue(dev))
1430 return -ENOENT;
1431
1432 ret = sysfs_rtnl_lock(kobj, attr, queue->dev);
1433 if (ret)
1434 return ret;
1435
1436 index = get_netdev_queue_index(queue);
1437
1438 /* If queue belongs to subordinate dev use its TC mapping */
1439 dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
1440
1441 num_tc = dev->num_tc;
1442 tc = netdev_txq_to_tc(dev, index);
1443
1444 rtnl_unlock();
1445
1446 if (tc < 0)
1447 return -EINVAL;
1448
1449 /* We can report the traffic class one of two ways:
1450 * Subordinate device traffic classes are reported with the traffic
1451 * class first, and then the subordinate class so for example TC0 on
1452 * subordinate device 2 will be reported as "0-2". If the queue
1453 * belongs to the root device it will be reported with just the
1454 * traffic class, so just "0" for TC 0 for example.
1455 */
1456 return num_tc < 0 ? sysfs_emit(buf, "%d%d\n", tc, num_tc) :
1457 sysfs_emit(buf, "%d\n", tc);
1458}
1459
1460#ifdef CONFIG_XPS
1461static ssize_t tx_maxrate_show(struct kobject *kobj, struct attribute *attr,
1462 struct netdev_queue *queue, char *buf)
1463{
1464 return sysfs_emit(buf, "%lu\n", queue->tx_maxrate);
1465}
1466
1467static ssize_t tx_maxrate_store(struct kobject *kobj, struct attribute *attr,
1468 struct netdev_queue *queue, const char *buf,
1469 size_t len)
1470{
1471 int err, index = get_netdev_queue_index(queue);
1472 struct net_device *dev = queue->dev;
1473 u32 rate = 0;
1474
1475 if (!capable(CAP_NET_ADMIN))
1476 return -EPERM;
1477
1478 /* The check is also done later; this helps returning early without
1479 * hitting the locking section below.
1480 */
1481 if (!dev->netdev_ops->ndo_set_tx_maxrate)
1482 return -EOPNOTSUPP;
1483
1484 err = kstrtou32(buf, 10, &rate);
1485 if (err < 0)
1486 return err;
1487
1488 err = sysfs_rtnl_lock(kobj, attr, dev);
1489 if (err)
1490 return err;
1491
1492 err = -EOPNOTSUPP;
1493 netdev_lock_ops(dev);
1494 if (dev->netdev_ops->ndo_set_tx_maxrate)
1495 err = dev->netdev_ops->ndo_set_tx_maxrate(dev, index, rate);
1496 netdev_unlock_ops(dev);
1497
1498 if (!err) {
1499 queue->tx_maxrate = rate;
1500 rtnl_unlock();
1501 return len;
1502 }
1503
1504 rtnl_unlock();
1505 return err;
1506}
1507
1508static struct netdev_queue_attribute queue_tx_maxrate __ro_after_init
1509 = __ATTR_RW(tx_maxrate);
1510#endif
1511
1512static struct netdev_queue_attribute queue_trans_timeout __ro_after_init
1513 = __ATTR_RO(tx_timeout);
1514
1515static struct netdev_queue_attribute queue_traffic_class __ro_after_init
1516 = __ATTR_RO(traffic_class);
1517
1518#ifdef CONFIG_BQL
1519/*
1520 * Byte queue limits sysfs structures and functions.
1521 */
1522static ssize_t bql_show(char *buf, unsigned int value)
1523{
1524 return sysfs_emit(buf, "%u\n", value);
1525}
1526
1527static ssize_t bql_set(const char *buf, const size_t count,
1528 unsigned int *pvalue)
1529{
1530 unsigned int value;
1531 int err;
1532
1533 if (!strcmp(buf, "max") || !strcmp(buf, "max\n")) {
1534 value = DQL_MAX_LIMIT;
1535 } else {
1536 err = kstrtouint(buf, 10, &value);
1537 if (err < 0)
1538 return err;
1539 if (value > DQL_MAX_LIMIT)
1540 return -EINVAL;
1541 }
1542
1543 *pvalue = value;
1544
1545 return count;
1546}
1547
1548static ssize_t bql_show_hold_time(struct kobject *kobj, struct attribute *attr,
1549 struct netdev_queue *queue, char *buf)
1550{
1551 struct dql *dql = &queue->dql;
1552
1553 return sysfs_emit(buf, "%u\n", jiffies_to_msecs(dql->slack_hold_time));
1554}
1555
1556static ssize_t bql_set_hold_time(struct kobject *kobj, struct attribute *attr,
1557 struct netdev_queue *queue, const char *buf,
1558 size_t len)
1559{
1560 struct dql *dql = &queue->dql;
1561 unsigned int value;
1562 int err;
1563
1564 err = kstrtouint(buf, 10, &value);
1565 if (err < 0)
1566 return err;
1567
1568 dql->slack_hold_time = msecs_to_jiffies(value);
1569
1570 return len;
1571}
1572
1573static struct netdev_queue_attribute bql_hold_time_attribute __ro_after_init
1574 = __ATTR(hold_time, 0644,
1575 bql_show_hold_time, bql_set_hold_time);
1576
1577static ssize_t bql_show_stall_thrs(struct kobject *kobj, struct attribute *attr,
1578 struct netdev_queue *queue, char *buf)
1579{
1580 struct dql *dql = &queue->dql;
1581
1582 return sysfs_emit(buf, "%u\n", jiffies_to_msecs(dql->stall_thrs));
1583}
1584
1585static ssize_t bql_set_stall_thrs(struct kobject *kobj, struct attribute *attr,
1586 struct netdev_queue *queue, const char *buf,
1587 size_t len)
1588{
1589 struct dql *dql = &queue->dql;
1590 unsigned int value;
1591 int err;
1592
1593 err = kstrtouint(buf, 10, &value);
1594 if (err < 0)
1595 return err;
1596
1597 value = msecs_to_jiffies(value);
1598 if (value && (value < 4 || value > 4 / 2 * BITS_PER_LONG))
1599 return -ERANGE;
1600
1601 if (!dql->stall_thrs && value)
1602 dql->last_reap = jiffies;
1603 /* Force last_reap to be live */
1604 smp_wmb();
1605 dql->stall_thrs = value;
1606
1607 return len;
1608}
1609
1610static struct netdev_queue_attribute bql_stall_thrs_attribute __ro_after_init =
1611 __ATTR(stall_thrs, 0644, bql_show_stall_thrs, bql_set_stall_thrs);
1612
1613static ssize_t bql_show_stall_max(struct kobject *kobj, struct attribute *attr,
1614 struct netdev_queue *queue, char *buf)
1615{
1616 return sysfs_emit(buf, "%u\n", READ_ONCE(queue->dql.stall_max));
1617}
1618
1619static ssize_t bql_set_stall_max(struct kobject *kobj, struct attribute *attr,
1620 struct netdev_queue *queue, const char *buf,
1621 size_t len)
1622{
1623 WRITE_ONCE(queue->dql.stall_max, 0);
1624 return len;
1625}
1626
1627static struct netdev_queue_attribute bql_stall_max_attribute __ro_after_init =
1628 __ATTR(stall_max, 0644, bql_show_stall_max, bql_set_stall_max);
1629
1630static ssize_t bql_show_stall_cnt(struct kobject *kobj, struct attribute *attr,
1631 struct netdev_queue *queue, char *buf)
1632{
1633 struct dql *dql = &queue->dql;
1634
1635 return sysfs_emit(buf, "%lu\n", dql->stall_cnt);
1636}
1637
1638static struct netdev_queue_attribute bql_stall_cnt_attribute __ro_after_init =
1639 __ATTR(stall_cnt, 0444, bql_show_stall_cnt, NULL);
1640
1641static ssize_t bql_show_inflight(struct kobject *kobj, struct attribute *attr,
1642 struct netdev_queue *queue, char *buf)
1643{
1644 struct dql *dql = &queue->dql;
1645
1646 return sysfs_emit(buf, "%u\n", dql->num_queued - dql->num_completed);
1647}
1648
1649static struct netdev_queue_attribute bql_inflight_attribute __ro_after_init =
1650 __ATTR(inflight, 0444, bql_show_inflight, NULL);
1651
1652#define BQL_ATTR(NAME, FIELD) \
1653static ssize_t bql_show_ ## NAME(struct kobject *kobj, \
1654 struct attribute *attr, \
1655 struct netdev_queue *queue, char *buf) \
1656{ \
1657 return bql_show(buf, queue->dql.FIELD); \
1658} \
1659 \
1660static ssize_t bql_set_ ## NAME(struct kobject *kobj, \
1661 struct attribute *attr, \
1662 struct netdev_queue *queue, \
1663 const char *buf, size_t len) \
1664{ \
1665 return bql_set(buf, len, &queue->dql.FIELD); \
1666} \
1667 \
1668static struct netdev_queue_attribute bql_ ## NAME ## _attribute __ro_after_init \
1669 = __ATTR(NAME, 0644, \
1670 bql_show_ ## NAME, bql_set_ ## NAME)
1671
1672BQL_ATTR(limit, limit);
1673BQL_ATTR(limit_max, max_limit);
1674BQL_ATTR(limit_min, min_limit);
1675
1676static struct attribute *dql_attrs[] __ro_after_init = {
1677 &bql_limit_attribute.attr,
1678 &bql_limit_max_attribute.attr,
1679 &bql_limit_min_attribute.attr,
1680 &bql_hold_time_attribute.attr,
1681 &bql_inflight_attribute.attr,
1682 &bql_stall_thrs_attribute.attr,
1683 &bql_stall_cnt_attribute.attr,
1684 &bql_stall_max_attribute.attr,
1685 NULL
1686};
1687
1688static const struct attribute_group dql_group = {
1689 .name = "byte_queue_limits",
1690 .attrs = dql_attrs,
1691};
1692#else
1693/* Fake declaration, all the code using it should be dead */
1694static const struct attribute_group dql_group = {};
1695#endif /* CONFIG_BQL */
1696
1697#ifdef CONFIG_XPS
1698static ssize_t xps_queue_show(struct net_device *dev, unsigned int index,
1699 int tc, char *buf, enum xps_map_type type)
1700{
1701 struct xps_dev_maps *dev_maps;
1702 unsigned long *mask;
1703 unsigned int nr_ids;
1704 int j, len;
1705
1706 rcu_read_lock();
1707 dev_maps = rcu_dereference(dev->xps_maps[type]);
1708
1709 /* Default to nr_cpu_ids/dev->num_rx_queues and do not just return 0
1710 * when dev_maps hasn't been allocated yet, to be backward compatible.
1711 */
1712 nr_ids = dev_maps ? dev_maps->nr_ids :
1713 (type == XPS_CPUS ? nr_cpu_ids : dev->num_rx_queues);
1714
1715 mask = bitmap_zalloc(nr_ids, GFP_NOWAIT);
1716 if (!mask) {
1717 rcu_read_unlock();
1718 return -ENOMEM;
1719 }
1720
1721 if (!dev_maps || tc >= dev_maps->num_tc)
1722 goto out_no_maps;
1723
1724 for (j = 0; j < nr_ids; j++) {
1725 int i, tci = j * dev_maps->num_tc + tc;
1726 struct xps_map *map;
1727
1728 map = rcu_dereference(dev_maps->attr_map[tci]);
1729 if (!map)
1730 continue;
1731
1732 for (i = map->len; i--;) {
1733 if (map->queues[i] == index) {
1734 __set_bit(j, mask);
1735 break;
1736 }
1737 }
1738 }
1739out_no_maps:
1740 rcu_read_unlock();
1741
1742 len = sysfs_emit(buf, "%*pb\n", nr_ids, mask);
1743 bitmap_free(mask);
1744
1745 return len < PAGE_SIZE ? len : -EINVAL;
1746}
1747
1748static ssize_t xps_cpus_show(struct kobject *kobj, struct attribute *attr,
1749 struct netdev_queue *queue, char *buf)
1750{
1751 struct net_device *dev = queue->dev;
1752 unsigned int index;
1753 int len, tc, ret;
1754
1755 if (!netif_is_multiqueue(dev))
1756 return -ENOENT;
1757
1758 index = get_netdev_queue_index(queue);
1759
1760 ret = sysfs_rtnl_lock(kobj, attr, queue->dev);
1761 if (ret)
1762 return ret;
1763
1764 /* If queue belongs to subordinate dev use its map */
1765 dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
1766
1767 tc = netdev_txq_to_tc(dev, index);
1768 if (tc < 0) {
1769 rtnl_unlock();
1770 return -EINVAL;
1771 }
1772
1773 /* Increase the net device refcnt to make sure it won't be freed while
1774 * xps_queue_show is running.
1775 */
1776 dev_hold(dev);
1777 rtnl_unlock();
1778
1779 len = xps_queue_show(dev, index, tc, buf, XPS_CPUS);
1780
1781 dev_put(dev);
1782 return len;
1783}
1784
1785static ssize_t xps_cpus_store(struct kobject *kobj, struct attribute *attr,
1786 struct netdev_queue *queue, const char *buf,
1787 size_t len)
1788{
1789 struct net_device *dev = queue->dev;
1790 unsigned int index;
1791 cpumask_var_t mask;
1792 int err;
1793
1794 if (!netif_is_multiqueue(dev))
1795 return -ENOENT;
1796
1797 if (!capable(CAP_NET_ADMIN))
1798 return -EPERM;
1799
1800 if (!alloc_cpumask_var(&mask, GFP_KERNEL))
1801 return -ENOMEM;
1802
1803 index = get_netdev_queue_index(queue);
1804
1805 err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
1806 if (err) {
1807 free_cpumask_var(mask);
1808 return err;
1809 }
1810
1811 err = sysfs_rtnl_lock(kobj, attr, dev);
1812 if (err) {
1813 free_cpumask_var(mask);
1814 return err;
1815 }
1816
1817 err = netif_set_xps_queue(dev, mask, index);
1818 rtnl_unlock();
1819
1820 free_cpumask_var(mask);
1821
1822 return err ? : len;
1823}
1824
1825static struct netdev_queue_attribute xps_cpus_attribute __ro_after_init
1826 = __ATTR_RW(xps_cpus);
1827
1828static ssize_t xps_rxqs_show(struct kobject *kobj, struct attribute *attr,
1829 struct netdev_queue *queue, char *buf)
1830{
1831 struct net_device *dev = queue->dev;
1832 unsigned int index;
1833 int tc, ret;
1834
1835 index = get_netdev_queue_index(queue);
1836
1837 ret = sysfs_rtnl_lock(kobj, attr, dev);
1838 if (ret)
1839 return ret;
1840
1841 tc = netdev_txq_to_tc(dev, index);
1842
1843 /* Increase the net device refcnt to make sure it won't be freed while
1844 * xps_queue_show is running.
1845 */
1846 dev_hold(dev);
1847 rtnl_unlock();
1848
1849 ret = tc >= 0 ? xps_queue_show(dev, index, tc, buf, XPS_RXQS) : -EINVAL;
1850 dev_put(dev);
1851 return ret;
1852}
1853
1854static ssize_t xps_rxqs_store(struct kobject *kobj, struct attribute *attr,
1855 struct netdev_queue *queue, const char *buf,
1856 size_t len)
1857{
1858 struct net_device *dev = queue->dev;
1859 struct net *net = dev_net(dev);
1860 unsigned long *mask;
1861 unsigned int index;
1862 int err;
1863
1864 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1865 return -EPERM;
1866
1867 mask = bitmap_zalloc(dev->num_rx_queues, GFP_KERNEL);
1868 if (!mask)
1869 return -ENOMEM;
1870
1871 index = get_netdev_queue_index(queue);
1872
1873 err = bitmap_parse(buf, len, mask, dev->num_rx_queues);
1874 if (err) {
1875 bitmap_free(mask);
1876 return err;
1877 }
1878
1879 err = sysfs_rtnl_lock(kobj, attr, dev);
1880 if (err) {
1881 bitmap_free(mask);
1882 return err;
1883 }
1884
1885 cpus_read_lock();
1886 err = __netif_set_xps_queue(dev, mask, index, XPS_RXQS);
1887 cpus_read_unlock();
1888
1889 rtnl_unlock();
1890
1891 bitmap_free(mask);
1892 return err ? : len;
1893}
1894
1895static struct netdev_queue_attribute xps_rxqs_attribute __ro_after_init
1896 = __ATTR_RW(xps_rxqs);
1897#endif /* CONFIG_XPS */
1898
1899static struct attribute *netdev_queue_default_attrs[] __ro_after_init = {
1900 &queue_trans_timeout.attr,
1901 &queue_traffic_class.attr,
1902#ifdef CONFIG_XPS
1903 &xps_cpus_attribute.attr,
1904 &xps_rxqs_attribute.attr,
1905 &queue_tx_maxrate.attr,
1906#endif
1907 NULL
1908};
1909ATTRIBUTE_GROUPS(netdev_queue_default);
1910
1911static void netdev_queue_release(struct kobject *kobj)
1912{
1913 struct netdev_queue *queue = to_netdev_queue(kobj);
1914
1915 memset(kobj, 0, sizeof(*kobj));
1916 netdev_put(queue->dev, &queue->dev_tracker);
1917}
1918
1919static const struct ns_common *netdev_queue_namespace(const struct kobject *kobj)
1920{
1921 struct netdev_queue *queue = to_netdev_queue(kobj);
1922 struct device *dev = &queue->dev->dev;
1923
1924 if (dev->class && dev->class->namespace)
1925 return dev->class->namespace(dev);
1926
1927 return NULL;
1928}
1929
1930static void netdev_queue_get_ownership(const struct kobject *kobj,
1931 kuid_t *uid, kgid_t *gid)
1932{
1933 const struct ns_common *ns = netdev_queue_namespace(kobj);
1934
1935 net_ns_get_ownership(ns ? container_of(ns, struct net, ns) : NULL,
1936 uid, gid);
1937}
1938
1939static const struct kobj_type netdev_queue_ktype = {
1940 .sysfs_ops = &netdev_queue_sysfs_ops,
1941 .release = netdev_queue_release,
1942 .namespace = netdev_queue_namespace,
1943 .get_ownership = netdev_queue_get_ownership,
1944};
1945
1946static bool netdev_uses_bql(const struct net_device *dev)
1947{
1948 if (dev->lltx || (dev->priv_flags & IFF_NO_QUEUE))
1949 return false;
1950
1951 return IS_ENABLED(CONFIG_BQL);
1952}
1953
1954static int netdev_queue_add_kobject(struct net_device *dev, int index)
1955{
1956 struct netdev_queue *queue = dev->_tx + index;
1957 struct kobject *kobj = &queue->kobj;
1958 int error = 0;
1959
1960 /* Tx queues are cleared in netdev_queue_release to allow later
1961 * re-registration. This is triggered when their kobj refcount is
1962 * dropped.
1963 *
1964 * If a queue is removed while both a read (or write) operation and a
1965 * the re-addition of the same queue are pending (waiting on rntl_lock)
1966 * it might happen that the re-addition will execute before the read,
1967 * making the initial removal to never happen (queue's kobj refcount
1968 * won't drop enough because of the pending read). In such rare case,
1969 * return to allow the removal operation to complete.
1970 */
1971 if (unlikely(kobj->state_initialized)) {
1972 netdev_warn_once(dev, "Cannot re-add tx queues before their removal completed");
1973 return -EAGAIN;
1974 }
1975
1976 /* Kobject_put later will trigger netdev_queue_release call
1977 * which decreases dev refcount: Take that reference here
1978 */
1979 netdev_hold(queue->dev, &queue->dev_tracker, GFP_KERNEL);
1980
1981 kobj->kset = dev->queues_kset;
1982 error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL,
1983 "tx-%u", index);
1984 if (error)
1985 goto err;
1986
1987 queue->groups = netdev_queue_default_groups;
1988 error = sysfs_create_groups(kobj, queue->groups);
1989 if (error)
1990 goto err;
1991
1992 if (netdev_uses_bql(dev)) {
1993 error = sysfs_create_group(kobj, &dql_group);
1994 if (error)
1995 goto err_default_groups;
1996 }
1997
1998 kobject_uevent(kobj, KOBJ_ADD);
1999 return 0;
2000
2001err_default_groups:
2002 sysfs_remove_groups(kobj, queue->groups);
2003err:
2004 kobject_put(kobj);
2005 return error;
2006}
2007
2008static int tx_queue_change_owner(struct net_device *ndev, int index,
2009 kuid_t kuid, kgid_t kgid)
2010{
2011 struct netdev_queue *queue = ndev->_tx + index;
2012 struct kobject *kobj = &queue->kobj;
2013 int error;
2014
2015 error = sysfs_change_owner(kobj, kuid, kgid);
2016 if (error)
2017 return error;
2018
2019 if (netdev_uses_bql(ndev))
2020 error = sysfs_group_change_owner(kobj, &dql_group, kuid, kgid);
2021
2022 return error;
2023}
2024#endif /* CONFIG_SYSFS */
2025
2026int
2027netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
2028{
2029#ifdef CONFIG_SYSFS
2030 int i;
2031 int error = 0;
2032
2033 /* Tx queue kobjects are allowed to be updated when a device is being
2034 * unregistered, but solely to remove queues from qdiscs. Any path
2035 * adding queues should be fixed.
2036 */
2037 WARN(dev->reg_state == NETREG_UNREGISTERING && new_num > old_num,
2038 "New queues can't be registered after device unregistration.");
2039
2040 for (i = old_num; i < new_num; i++) {
2041 error = netdev_queue_add_kobject(dev, i);
2042 if (error) {
2043 new_num = old_num;
2044 break;
2045 }
2046 }
2047
2048 while (--i >= new_num) {
2049 struct netdev_queue *queue = dev->_tx + i;
2050
2051 if (!check_net(dev_net(dev)))
2052 queue->kobj.uevent_suppress = 1;
2053
2054 if (netdev_uses_bql(dev))
2055 sysfs_remove_group(&queue->kobj, &dql_group);
2056
2057 sysfs_remove_groups(&queue->kobj, queue->groups);
2058 kobject_put(&queue->kobj);
2059 }
2060
2061 return error;
2062#else
2063 return 0;
2064#endif /* CONFIG_SYSFS */
2065}
2066
2067static int net_tx_queue_change_owner(struct net_device *dev, int num,
2068 kuid_t kuid, kgid_t kgid)
2069{
2070#ifdef CONFIG_SYSFS
2071 int error = 0;
2072 int i;
2073
2074 for (i = 0; i < num; i++) {
2075 error = tx_queue_change_owner(dev, i, kuid, kgid);
2076 if (error)
2077 break;
2078 }
2079
2080 return error;
2081#else
2082 return 0;
2083#endif /* CONFIG_SYSFS */
2084}
2085
2086static int register_queue_kobjects(struct net_device *dev)
2087{
2088 int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0;
2089
2090#ifdef CONFIG_SYSFS
2091 dev->queues_kset = kset_create_and_add("queues",
2092 NULL, &dev->dev.kobj);
2093 if (!dev->queues_kset)
2094 return -ENOMEM;
2095 real_rx = dev->real_num_rx_queues;
2096#endif
2097 real_tx = dev->real_num_tx_queues;
2098
2099 error = net_rx_queue_update_kobjects(dev, 0, real_rx);
2100 if (error)
2101 goto error;
2102 rxq = real_rx;
2103
2104 error = netdev_queue_update_kobjects(dev, 0, real_tx);
2105 if (error)
2106 goto error;
2107 txq = real_tx;
2108
2109 return 0;
2110
2111error:
2112 netdev_queue_update_kobjects(dev, txq, 0);
2113 net_rx_queue_update_kobjects(dev, rxq, 0);
2114#ifdef CONFIG_SYSFS
2115 kset_unregister(dev->queues_kset);
2116#endif
2117 return error;
2118}
2119
2120static int queue_change_owner(struct net_device *ndev, kuid_t kuid, kgid_t kgid)
2121{
2122 int error = 0, real_rx = 0, real_tx = 0;
2123
2124#ifdef CONFIG_SYSFS
2125 if (ndev->queues_kset) {
2126 error = sysfs_change_owner(&ndev->queues_kset->kobj, kuid, kgid);
2127 if (error)
2128 return error;
2129 }
2130 real_rx = ndev->real_num_rx_queues;
2131#endif
2132 real_tx = ndev->real_num_tx_queues;
2133
2134 error = net_rx_queue_change_owner(ndev, real_rx, kuid, kgid);
2135 if (error)
2136 return error;
2137
2138 error = net_tx_queue_change_owner(ndev, real_tx, kuid, kgid);
2139 if (error)
2140 return error;
2141
2142 return 0;
2143}
2144
2145static void remove_queue_kobjects(struct net_device *dev)
2146{
2147 int real_rx = 0, real_tx = 0;
2148
2149#ifdef CONFIG_SYSFS
2150 real_rx = dev->real_num_rx_queues;
2151#endif
2152 real_tx = dev->real_num_tx_queues;
2153
2154 net_rx_queue_update_kobjects(dev, real_rx, 0);
2155 netdev_queue_update_kobjects(dev, real_tx, 0);
2156
2157 netdev_lock_ops(dev);
2158 dev->real_num_rx_queues = 0;
2159 dev->real_num_tx_queues = 0;
2160 netdev_unlock_ops(dev);
2161#ifdef CONFIG_SYSFS
2162 kset_unregister(dev->queues_kset);
2163#endif
2164}
2165
2166static bool net_current_may_mount(void)
2167{
2168 struct net *net = current->nsproxy->net_ns;
2169
2170 return ns_capable(net->user_ns, CAP_SYS_ADMIN);
2171}
2172
2173static struct ns_common *net_grab_current_ns(void)
2174{
2175 struct net *net = current->nsproxy->net_ns;
2176#ifdef CONFIG_NET_NS
2177 if (net)
2178 refcount_inc(&net->passive);
2179#endif
2180 return net ? to_ns_common(net) : NULL;
2181}
2182
2183static const struct ns_common *net_initial_ns(void)
2184{
2185 return to_ns_common(&init_net);
2186}
2187
2188static const struct ns_common *net_netlink_ns(struct sock *sk)
2189{
2190 return to_ns_common(sock_net(sk));
2191}
2192
2193const struct kobj_ns_type_operations net_ns_type_operations = {
2194 .type = KOBJ_NS_TYPE_NET,
2195 .current_may_mount = net_current_may_mount,
2196 .grab_current_ns = net_grab_current_ns,
2197 .netlink_ns = net_netlink_ns,
2198 .initial_ns = net_initial_ns,
2199 .drop_ns = net_drop_ns,
2200};
2201EXPORT_SYMBOL_GPL(net_ns_type_operations);
2202
2203static int netdev_uevent(const struct device *d, struct kobj_uevent_env *env)
2204{
2205 const struct net_device *dev = to_net_dev(d);
2206 int retval;
2207
2208 /* pass interface to uevent. */
2209 retval = add_uevent_var(env, "INTERFACE=%s", dev->name);
2210 if (retval)
2211 goto exit;
2212
2213 /* pass ifindex to uevent.
2214 * ifindex is useful as it won't change (interface name may change)
2215 * and is what RtNetlink uses natively.
2216 */
2217 retval = add_uevent_var(env, "IFINDEX=%d", dev->ifindex);
2218
2219exit:
2220 return retval;
2221}
2222
2223/*
2224 * netdev_release -- destroy and free a dead device.
2225 * Called when last reference to device kobject is gone.
2226 */
2227static void netdev_release(struct device *d)
2228{
2229 struct net_device *dev = to_net_dev(d);
2230
2231 BUG_ON(dev->reg_state != NETREG_RELEASED);
2232
2233 /* no need to wait for rcu grace period:
2234 * device is dead and about to be freed.
2235 */
2236 kfree(rcu_access_pointer(dev->ifalias));
2237 kvfree(dev);
2238}
2239
2240static const struct ns_common *net_namespace(const struct device *d)
2241{
2242 const struct net_device *dev = to_net_dev(d);
2243
2244 return to_ns_common(dev_net(dev));
2245}
2246
2247static void net_get_ownership(const struct device *d, kuid_t *uid, kgid_t *gid)
2248{
2249 const struct net_device *dev = to_net_dev(d);
2250 const struct net *net = dev_net(dev);
2251
2252 net_ns_get_ownership(net, uid, gid);
2253}
2254
2255static const struct class net_class = {
2256 .name = "net",
2257 .dev_release = netdev_release,
2258 .dev_groups = net_class_groups,
2259 .dev_uevent = netdev_uevent,
2260 .ns_type = &net_ns_type_operations,
2261 .namespace = net_namespace,
2262 .get_ownership = net_get_ownership,
2263};
2264
2265#ifdef CONFIG_OF
2266static int of_dev_node_match(struct device *dev, const void *data)
2267{
2268 for (; dev; dev = dev->parent) {
2269 if (dev->of_node == data)
2270 return 1;
2271 }
2272
2273 return 0;
2274}
2275
2276/*
2277 * of_find_net_device_by_node - lookup the net device for the device node
2278 * @np: OF device node
2279 *
2280 * Looks up the net_device structure corresponding with the device node.
2281 * If successful, returns a pointer to the net_device with the embedded
2282 * struct device refcount incremented by one, or NULL on failure. The
2283 * refcount must be dropped when done with the net_device.
2284 */
2285struct net_device *of_find_net_device_by_node(struct device_node *np)
2286{
2287 struct device *dev;
2288
2289 dev = class_find_device(&net_class, NULL, np, of_dev_node_match);
2290 if (!dev)
2291 return NULL;
2292
2293 return to_net_dev(dev);
2294}
2295EXPORT_SYMBOL(of_find_net_device_by_node);
2296#endif
2297
2298/* Delete sysfs entries but hold kobject reference until after all
2299 * netdev references are gone.
2300 */
2301void netdev_unregister_kobject(struct net_device *ndev)
2302{
2303 struct device *dev = &ndev->dev;
2304
2305 if (!check_net(dev_net(ndev)))
2306 dev_set_uevent_suppress(dev, 1);
2307
2308 kobject_get(&dev->kobj);
2309
2310 remove_queue_kobjects(ndev);
2311
2312 pm_runtime_set_memalloc_noio(dev, false);
2313
2314 device_del(dev);
2315}
2316
2317/* Create sysfs entries for network device. */
2318int netdev_register_kobject(struct net_device *ndev)
2319{
2320 struct device *dev = &ndev->dev;
2321 const struct attribute_group **groups = ndev->sysfs_groups;
2322 int error = 0;
2323
2324 device_initialize(dev);
2325 dev->class = &net_class;
2326 dev->platform_data = ndev;
2327 dev->groups = groups;
2328
2329 dev_set_name(dev, "%s", ndev->name);
2330
2331#ifdef CONFIG_SYSFS
2332 /* Allow for a device specific group */
2333 if (*groups)
2334 groups++;
2335
2336 *groups++ = &netstat_group;
2337 *groups++ = &netdev_phys_group;
2338
2339 if (wireless_group_needed(ndev))
2340 *groups++ = &wireless_group;
2341#endif /* CONFIG_SYSFS */
2342
2343 error = device_add(dev);
2344 if (error)
2345 return error;
2346
2347 error = register_queue_kobjects(ndev);
2348 if (error) {
2349 device_del(dev);
2350 return error;
2351 }
2352
2353 pm_runtime_set_memalloc_noio(dev, true);
2354
2355 return error;
2356}
2357
2358/* Change owner for sysfs entries when moving network devices across network
2359 * namespaces owned by different user namespaces.
2360 */
2361int netdev_change_owner(struct net_device *ndev, const struct net *net_old,
2362 const struct net *net_new)
2363{
2364 kuid_t old_uid = GLOBAL_ROOT_UID, new_uid = GLOBAL_ROOT_UID;
2365 kgid_t old_gid = GLOBAL_ROOT_GID, new_gid = GLOBAL_ROOT_GID;
2366 struct device *dev = &ndev->dev;
2367 int error;
2368
2369 net_ns_get_ownership(net_old, &old_uid, &old_gid);
2370 net_ns_get_ownership(net_new, &new_uid, &new_gid);
2371
2372 /* The network namespace was changed but the owning user namespace is
2373 * identical so there's no need to change the owner of sysfs entries.
2374 */
2375 if (uid_eq(old_uid, new_uid) && gid_eq(old_gid, new_gid))
2376 return 0;
2377
2378 error = device_change_owner(dev, new_uid, new_gid);
2379 if (error)
2380 return error;
2381
2382 error = queue_change_owner(ndev, new_uid, new_gid);
2383 if (error)
2384 return error;
2385
2386 return 0;
2387}
2388
2389int netdev_class_create_file_ns(const struct class_attribute *class_attr,
2390 const struct ns_common *ns)
2391{
2392 return class_create_file_ns(&net_class, class_attr, ns);
2393}
2394EXPORT_SYMBOL(netdev_class_create_file_ns);
2395
2396void netdev_class_remove_file_ns(const struct class_attribute *class_attr,
2397 const struct ns_common *ns)
2398{
2399 class_remove_file_ns(&net_class, class_attr, ns);
2400}
2401EXPORT_SYMBOL(netdev_class_remove_file_ns);
2402
2403int __init netdev_kobject_init(void)
2404{
2405 kobj_ns_type_register(&net_ns_type_operations);
2406 return class_register(&net_class);
2407}