net/core/net-sysfs.c at master

tjh.dev / kernel
fork
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork
kernel / net / core / net-sysfs.c
at master 2407 lines 60 kB view raw
wrap content
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * net-sysfs.c - network device class and attributes
   4 *
   5 * Copyright (c) 2003 Stephen Hemminger <shemminger@osdl.org>
   6 */
   7
   8#include <linux/capability.h>
   9#include <linux/kernel.h>
  10#include <linux/netdevice.h>
  11#include <linux/if_arp.h>
  12#include <linux/slab.h>
  13#include <linux/sched/signal.h>
  14#include <linux/sched/isolation.h>
  15#include <linux/nsproxy.h>
  16#include <net/sock.h>
  17#include <net/net_namespace.h>
  18#include <linux/rtnetlink.h>
  19#include <linux/vmalloc.h>
  20#include <linux/export.h>
  21#include <linux/jiffies.h>
  22#include <linux/pm_runtime.h>
  23#include <linux/of.h>
  24#include <linux/of_net.h>
  25#include <linux/cpu.h>
  26#include <net/netdev_lock.h>
  27#include <net/netdev_rx_queue.h>
  28#include <net/rps.h>
  29
  30#include "dev.h"
  31#include "net-sysfs.h"
  32
  33#ifdef CONFIG_SYSFS
  34static const char fmt_hex[] = "%#x\n";
  35static const char fmt_dec[] = "%d\n";
  36static const char fmt_uint[] = "%u\n";
  37static const char fmt_ulong[] = "%lu\n";
  38static const char fmt_u64[] = "%llu\n";
  39
  40/* Caller holds RTNL, netdev->lock or RCU */
  41static inline int dev_isalive(const struct net_device *dev)
  42{
  43	return READ_ONCE(dev->reg_state) <= NETREG_REGISTERED;
  44}
  45
  46/* There is a possible ABBA deadlock between rtnl_lock and kernfs_node->active,
  47 * when unregistering a net device and accessing associated sysfs files. The
  48 * potential deadlock is as follow:
  49 *
  50 *         CPU 0                                         CPU 1
  51 *
  52 *    rtnl_lock                                   vfs_read
  53 *    unregister_netdevice_many                   kernfs_seq_start
  54 *    device_del / kobject_put                      kernfs_get_active (kn->active++)
  55 *    kernfs_drain                                sysfs_kf_seq_show
  56 *    wait_event(                                 rtnl_lock
  57 *       kn->active == KN_DEACTIVATED_BIAS)       -> waits on CPU 0 to release
  58 *    -> waits on CPU 1 to decrease kn->active       the rtnl lock.
  59 *
  60 * The historical fix was to use rtnl_trylock with restart_syscall to bail out
  61 * of sysfs operations when the lock couldn't be taken. This fixed the above
  62 * issue as it allowed CPU 1 to bail out of the ABBA situation.
  63 *
  64 * But it came with performances issues, as syscalls are being restarted in
  65 * loops when there was contention on the rtnl lock, with huge slow downs in
  66 * specific scenarios (e.g. lots of virtual interfaces created and userspace
  67 * daemons querying their attributes).
  68 *
  69 * The idea below is to bail out of the active kernfs_node protection
  70 * (kn->active) while trying to take the rtnl lock.
  71 *
  72 * This replaces rtnl_lock() and still has to be used with rtnl_unlock(). The
  73 * net device is guaranteed to be alive if this returns successfully.
  74 */
  75static int sysfs_rtnl_lock(struct kobject *kobj, struct attribute *attr,
  76			   struct net_device *ndev)
  77{
  78	struct kernfs_node *kn;
  79	int ret = 0;
  80
  81	/* First, we hold a reference to the net device as the unregistration
  82	 * path might run in parallel. This will ensure the net device and the
  83	 * associated sysfs objects won't be freed while we try to take the rtnl
  84	 * lock.
  85	 */
  86	dev_hold(ndev);
  87	/* sysfs_break_active_protection was introduced to allow self-removal of
  88	 * devices and their associated sysfs files by bailing out of the
  89	 * sysfs/kernfs protection. We do this here to allow the unregistration
  90	 * path to complete in parallel. The following takes a reference on the
  91	 * kobject and the kernfs_node being accessed.
  92	 *
  93	 * This works because we hold a reference onto the net device and the
  94	 * unregistration path will wait for us eventually in netdev_run_todo
  95	 * (outside an rtnl lock section).
  96	 */
  97	kn = sysfs_break_active_protection(kobj, attr);
  98	/* We can now try to take the rtnl lock. This can't deadlock us as the
  99	 * unregistration path is able to drain sysfs files (kernfs_node) thanks
 100	 * to the above dance.
 101	 */
 102	if (rtnl_lock_interruptible()) {
 103		ret = -ERESTARTSYS;
 104		goto unbreak;
 105	}
 106	/* Check dismantle on the device hasn't started, otherwise deny the
 107	 * operation.
 108	 */
 109	if (!dev_isalive(ndev)) {
 110		rtnl_unlock();
 111		ret = -ENODEV;
 112		goto unbreak;
 113	}
 114	/* We are now sure the device dismantle hasn't started nor that it can
 115	 * start before we exit the locking section as we hold the rtnl lock.
 116	 * There's no need to keep unbreaking the sysfs protection nor to hold
 117	 * a net device reference from that point; that was only needed to take
 118	 * the rtnl lock.
 119	 */
 120unbreak:
 121	sysfs_unbreak_active_protection(kn);
 122	dev_put(ndev);
 123
 124	return ret;
 125}
 126
 127/* use same locking rules as GIF* ioctl's */
 128static ssize_t netdev_show(const struct device *dev,
 129			   struct device_attribute *attr, char *buf,
 130			   ssize_t (*format)(const struct net_device *, char *))
 131{
 132	struct net_device *ndev = to_net_dev(dev);
 133	ssize_t ret = -EINVAL;
 134
 135	rcu_read_lock();
 136	if (dev_isalive(ndev))
 137		ret = (*format)(ndev, buf);
 138	rcu_read_unlock();
 139
 140	return ret;
 141}
 142
 143/* generate a show function for simple field */
 144#define NETDEVICE_SHOW(field, format_string)				\
 145static ssize_t format_##field(const struct net_device *dev, char *buf)	\
 146{									\
 147	return sysfs_emit(buf, format_string, READ_ONCE(dev->field));		\
 148}									\
 149static ssize_t field##_show(struct device *dev,				\
 150			    struct device_attribute *attr, char *buf)	\
 151{									\
 152	return netdev_show(dev, attr, buf, format_##field);		\
 153}									\
 154
 155#define NETDEVICE_SHOW_RO(field, format_string)				\
 156NETDEVICE_SHOW(field, format_string);					\
 157static DEVICE_ATTR_RO(field)
 158
 159#define NETDEVICE_SHOW_RW(field, format_string)				\
 160NETDEVICE_SHOW(field, format_string);					\
 161static DEVICE_ATTR_RW(field)
 162
 163/* use same locking and permission rules as SIF* ioctl's */
 164static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
 165			    const char *buf, size_t len,
 166			    int (*set)(struct net_device *, unsigned long))
 167{
 168	struct net_device *netdev = to_net_dev(dev);
 169	struct net *net = dev_net(netdev);
 170	unsigned long new;
 171	int ret;
 172
 173	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 174		return -EPERM;
 175
 176	ret = kstrtoul(buf, 0, &new);
 177	if (ret)
 178		goto err;
 179
 180	ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
 181	if (ret)
 182		goto err;
 183
 184	ret = (*set)(netdev, new);
 185	if (ret == 0)
 186		ret = len;
 187
 188	rtnl_unlock();
 189 err:
 190	return ret;
 191}
 192
 193/* Same as netdev_store() but takes netdev_lock() instead of rtnl_lock() */
 194static ssize_t
 195netdev_lock_store(struct device *dev, struct device_attribute *attr,
 196		  const char *buf, size_t len,
 197		  int (*set)(struct net_device *, unsigned long))
 198{
 199	struct net_device *netdev = to_net_dev(dev);
 200	struct net *net = dev_net(netdev);
 201	unsigned long new;
 202	int ret;
 203
 204	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 205		return -EPERM;
 206
 207	ret = kstrtoul(buf, 0, &new);
 208	if (ret)
 209		return ret;
 210
 211	netdev_lock(netdev);
 212
 213	if (dev_isalive(netdev)) {
 214		ret = (*set)(netdev, new);
 215		if (ret == 0)
 216			ret = len;
 217	}
 218	netdev_unlock(netdev);
 219
 220	return ret;
 221}
 222
 223NETDEVICE_SHOW_RO(dev_id, fmt_hex);
 224NETDEVICE_SHOW_RO(dev_port, fmt_dec);
 225NETDEVICE_SHOW_RO(addr_assign_type, fmt_dec);
 226NETDEVICE_SHOW_RO(addr_len, fmt_dec);
 227NETDEVICE_SHOW_RO(ifindex, fmt_dec);
 228NETDEVICE_SHOW_RO(type, fmt_dec);
 229NETDEVICE_SHOW_RO(link_mode, fmt_dec);
 230
 231static ssize_t iflink_show(struct device *dev, struct device_attribute *attr,
 232			   char *buf)
 233{
 234	struct net_device *ndev = to_net_dev(dev);
 235
 236	return sysfs_emit(buf, fmt_dec, dev_get_iflink(ndev));
 237}
 238static DEVICE_ATTR_RO(iflink);
 239
 240static ssize_t format_name_assign_type(const struct net_device *dev, char *buf)
 241{
 242	return sysfs_emit(buf, fmt_dec, READ_ONCE(dev->name_assign_type));
 243}
 244
 245static ssize_t name_assign_type_show(struct device *dev,
 246				     struct device_attribute *attr,
 247				     char *buf)
 248{
 249	struct net_device *ndev = to_net_dev(dev);
 250	ssize_t ret = -EINVAL;
 251
 252	if (READ_ONCE(ndev->name_assign_type) != NET_NAME_UNKNOWN)
 253		ret = netdev_show(dev, attr, buf, format_name_assign_type);
 254
 255	return ret;
 256}
 257static DEVICE_ATTR_RO(name_assign_type);
 258
 259/* use same locking rules as GIFHWADDR ioctl's (netif_get_mac_address()) */
 260static ssize_t address_show(struct device *dev, struct device_attribute *attr,
 261			    char *buf)
 262{
 263	struct net_device *ndev = to_net_dev(dev);
 264	ssize_t ret = -EINVAL;
 265
 266	down_read(&dev_addr_sem);
 267
 268	rcu_read_lock();
 269	if (dev_isalive(ndev))
 270		ret = sysfs_format_mac(buf, ndev->dev_addr, ndev->addr_len);
 271	rcu_read_unlock();
 272
 273	up_read(&dev_addr_sem);
 274	return ret;
 275}
 276static DEVICE_ATTR_RO(address);
 277
 278static ssize_t broadcast_show(struct device *dev,
 279			      struct device_attribute *attr, char *buf)
 280{
 281	struct net_device *ndev = to_net_dev(dev);
 282	int ret = -EINVAL;
 283
 284	rcu_read_lock();
 285	if (dev_isalive(ndev))
 286		ret = sysfs_format_mac(buf, ndev->broadcast, ndev->addr_len);
 287	rcu_read_unlock();
 288	return ret;
 289}
 290static DEVICE_ATTR_RO(broadcast);
 291
 292static int change_carrier(struct net_device *dev, unsigned long new_carrier)
 293{
 294	if (!netif_running(dev))
 295		return -EINVAL;
 296	return dev_change_carrier(dev, (bool)new_carrier);
 297}
 298
 299static ssize_t carrier_store(struct device *dev, struct device_attribute *attr,
 300			     const char *buf, size_t len)
 301{
 302	struct net_device *netdev = to_net_dev(dev);
 303
 304	/* The check is also done in change_carrier; this helps returning early
 305	 * without hitting the locking section in netdev_store.
 306	 */
 307	if (!netdev->netdev_ops->ndo_change_carrier)
 308		return -EOPNOTSUPP;
 309
 310	return netdev_store(dev, attr, buf, len, change_carrier);
 311}
 312
 313static ssize_t carrier_show(struct device *dev,
 314			    struct device_attribute *attr, char *buf)
 315{
 316	struct net_device *netdev = to_net_dev(dev);
 317	int ret;
 318
 319	ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
 320	if (ret)
 321		return ret;
 322
 323	ret = -EINVAL;
 324	if (netif_running(netdev)) {
 325		/* Synchronize carrier state with link watch,
 326		 * see also rtnl_getlink().
 327		 */
 328		linkwatch_sync_dev(netdev);
 329
 330		ret = sysfs_emit(buf, fmt_dec, !!netif_carrier_ok(netdev));
 331	}
 332
 333	rtnl_unlock();
 334	return ret;
 335}
 336static DEVICE_ATTR_RW(carrier);
 337
 338static ssize_t speed_show(struct device *dev,
 339			  struct device_attribute *attr, char *buf)
 340{
 341	struct net_device *netdev = to_net_dev(dev);
 342	int ret = -EINVAL;
 343
 344	/* The check is also done in __ethtool_get_link_ksettings; this helps
 345	 * returning early without hitting the locking section below.
 346	 */
 347	if (!netdev->ethtool_ops->get_link_ksettings)
 348		return ret;
 349
 350	ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
 351	if (ret)
 352		return ret;
 353
 354	ret = -EINVAL;
 355	if (netif_running(netdev)) {
 356		struct ethtool_link_ksettings cmd;
 357
 358		if (!__ethtool_get_link_ksettings(netdev, &cmd))
 359			ret = sysfs_emit(buf, fmt_dec, cmd.base.speed);
 360	}
 361	rtnl_unlock();
 362	return ret;
 363}
 364static DEVICE_ATTR_RO(speed);
 365
 366static ssize_t duplex_show(struct device *dev,
 367			   struct device_attribute *attr, char *buf)
 368{
 369	struct net_device *netdev = to_net_dev(dev);
 370	int ret = -EINVAL;
 371
 372	/* The check is also done in __ethtool_get_link_ksettings; this helps
 373	 * returning early without hitting the locking section below.
 374	 */
 375	if (!netdev->ethtool_ops->get_link_ksettings)
 376		return ret;
 377
 378	ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
 379	if (ret)
 380		return ret;
 381
 382	ret = -EINVAL;
 383	if (netif_running(netdev)) {
 384		struct ethtool_link_ksettings cmd;
 385
 386		if (!__ethtool_get_link_ksettings(netdev, &cmd)) {
 387			const char *duplex;
 388
 389			switch (cmd.base.duplex) {
 390			case DUPLEX_HALF:
 391				duplex = "half";
 392				break;
 393			case DUPLEX_FULL:
 394				duplex = "full";
 395				break;
 396			default:
 397				duplex = "unknown";
 398				break;
 399			}
 400			ret = sysfs_emit(buf, "%s\n", duplex);
 401		}
 402	}
 403	rtnl_unlock();
 404	return ret;
 405}
 406static DEVICE_ATTR_RO(duplex);
 407
 408static ssize_t testing_show(struct device *dev,
 409			    struct device_attribute *attr, char *buf)
 410{
 411	struct net_device *netdev = to_net_dev(dev);
 412
 413	if (netif_running(netdev))
 414		return sysfs_emit(buf, fmt_dec, !!netif_testing(netdev));
 415
 416	return -EINVAL;
 417}
 418static DEVICE_ATTR_RO(testing);
 419
 420static ssize_t dormant_show(struct device *dev,
 421			    struct device_attribute *attr, char *buf)
 422{
 423	struct net_device *netdev = to_net_dev(dev);
 424
 425	if (netif_running(netdev))
 426		return sysfs_emit(buf, fmt_dec, !!netif_dormant(netdev));
 427
 428	return -EINVAL;
 429}
 430static DEVICE_ATTR_RO(dormant);
 431
 432static const char *const operstates[] = {
 433	"unknown",
 434	"notpresent", /* currently unused */
 435	"down",
 436	"lowerlayerdown",
 437	"testing",
 438	"dormant",
 439	"up"
 440};
 441
 442static ssize_t operstate_show(struct device *dev,
 443			      struct device_attribute *attr, char *buf)
 444{
 445	const struct net_device *netdev = to_net_dev(dev);
 446	unsigned char operstate;
 447
 448	operstate = READ_ONCE(netdev->operstate);
 449	if (!netif_running(netdev))
 450		operstate = IF_OPER_DOWN;
 451
 452	if (operstate >= ARRAY_SIZE(operstates))
 453		return -EINVAL; /* should not happen */
 454
 455	return sysfs_emit(buf, "%s\n", operstates[operstate]);
 456}
 457static DEVICE_ATTR_RO(operstate);
 458
 459static ssize_t carrier_changes_show(struct device *dev,
 460				    struct device_attribute *attr,
 461				    char *buf)
 462{
 463	struct net_device *netdev = to_net_dev(dev);
 464
 465	return sysfs_emit(buf, fmt_dec,
 466			  atomic_read(&netdev->carrier_up_count) +
 467			  atomic_read(&netdev->carrier_down_count));
 468}
 469static DEVICE_ATTR_RO(carrier_changes);
 470
 471static ssize_t carrier_up_count_show(struct device *dev,
 472				     struct device_attribute *attr,
 473				     char *buf)
 474{
 475	struct net_device *netdev = to_net_dev(dev);
 476
 477	return sysfs_emit(buf, fmt_dec, atomic_read(&netdev->carrier_up_count));
 478}
 479static DEVICE_ATTR_RO(carrier_up_count);
 480
 481static ssize_t carrier_down_count_show(struct device *dev,
 482				       struct device_attribute *attr,
 483				       char *buf)
 484{
 485	struct net_device *netdev = to_net_dev(dev);
 486
 487	return sysfs_emit(buf, fmt_dec, atomic_read(&netdev->carrier_down_count));
 488}
 489static DEVICE_ATTR_RO(carrier_down_count);
 490
 491/* read-write attributes */
 492
 493static int change_mtu(struct net_device *dev, unsigned long new_mtu)
 494{
 495	return dev_set_mtu(dev, (int)new_mtu);
 496}
 497
 498static ssize_t mtu_store(struct device *dev, struct device_attribute *attr,
 499			 const char *buf, size_t len)
 500{
 501	return netdev_store(dev, attr, buf, len, change_mtu);
 502}
 503NETDEVICE_SHOW_RW(mtu, fmt_dec);
 504
 505static int change_flags(struct net_device *dev, unsigned long new_flags)
 506{
 507	return dev_change_flags(dev, (unsigned int)new_flags, NULL);
 508}
 509
 510static ssize_t flags_store(struct device *dev, struct device_attribute *attr,
 511			   const char *buf, size_t len)
 512{
 513	return netdev_store(dev, attr, buf, len, change_flags);
 514}
 515NETDEVICE_SHOW_RW(flags, fmt_hex);
 516
 517static ssize_t tx_queue_len_store(struct device *dev,
 518				  struct device_attribute *attr,
 519				  const char *buf, size_t len)
 520{
 521	if (!capable(CAP_NET_ADMIN))
 522		return -EPERM;
 523
 524	return netdev_store(dev, attr, buf, len, dev_change_tx_queue_len);
 525}
 526NETDEVICE_SHOW_RW(tx_queue_len, fmt_dec);
 527
 528static int change_gro_flush_timeout(struct net_device *dev, unsigned long val)
 529{
 530	netdev_set_gro_flush_timeout(dev, val);
 531	return 0;
 532}
 533
 534static ssize_t gro_flush_timeout_store(struct device *dev,
 535				       struct device_attribute *attr,
 536				       const char *buf, size_t len)
 537{
 538	if (!capable(CAP_NET_ADMIN))
 539		return -EPERM;
 540
 541	return netdev_lock_store(dev, attr, buf, len, change_gro_flush_timeout);
 542}
 543NETDEVICE_SHOW_RW(gro_flush_timeout, fmt_ulong);
 544
 545static int change_napi_defer_hard_irqs(struct net_device *dev, unsigned long val)
 546{
 547	if (val > S32_MAX)
 548		return -ERANGE;
 549
 550	netdev_set_defer_hard_irqs(dev, (u32)val);
 551	return 0;
 552}
 553
 554static ssize_t napi_defer_hard_irqs_store(struct device *dev,
 555					  struct device_attribute *attr,
 556					  const char *buf, size_t len)
 557{
 558	if (!capable(CAP_NET_ADMIN))
 559		return -EPERM;
 560
 561	return netdev_lock_store(dev, attr, buf, len,
 562				 change_napi_defer_hard_irqs);
 563}
 564NETDEVICE_SHOW_RW(napi_defer_hard_irqs, fmt_uint);
 565
 566static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr,
 567			     const char *buf, size_t len)
 568{
 569	struct net_device *netdev = to_net_dev(dev);
 570	struct net *net = dev_net(netdev);
 571	size_t count = len;
 572	ssize_t ret;
 573
 574	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 575		return -EPERM;
 576
 577	/* ignore trailing newline */
 578	if (len >  0 && buf[len - 1] == '\n')
 579		--count;
 580
 581	ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
 582	if (ret)
 583		return ret;
 584
 585	ret = dev_set_alias(netdev, buf, count);
 586	if (ret < 0)
 587		goto err;
 588	ret = len;
 589	netdev_state_change(netdev);
 590err:
 591	rtnl_unlock();
 592
 593	return ret;
 594}
 595
 596static ssize_t ifalias_show(struct device *dev,
 597			    struct device_attribute *attr, char *buf)
 598{
 599	const struct net_device *netdev = to_net_dev(dev);
 600	char tmp[IFALIASZ];
 601	ssize_t ret;
 602
 603	ret = dev_get_alias(netdev, tmp, sizeof(tmp));
 604	if (ret > 0)
 605		ret = sysfs_emit(buf, "%s\n", tmp);
 606	return ret;
 607}
 608static DEVICE_ATTR_RW(ifalias);
 609
 610static int change_group(struct net_device *dev, unsigned long new_group)
 611{
 612	dev_set_group(dev, (int)new_group);
 613	return 0;
 614}
 615
 616static ssize_t group_store(struct device *dev, struct device_attribute *attr,
 617			   const char *buf, size_t len)
 618{
 619	return netdev_store(dev, attr, buf, len, change_group);
 620}
 621NETDEVICE_SHOW(group, fmt_dec);
 622static DEVICE_ATTR(netdev_group, 0644, group_show, group_store);
 623
 624static int change_proto_down(struct net_device *dev, unsigned long proto_down)
 625{
 626	return dev_change_proto_down(dev, (bool)proto_down);
 627}
 628
 629static ssize_t proto_down_store(struct device *dev,
 630				struct device_attribute *attr,
 631				const char *buf, size_t len)
 632{
 633	return netdev_store(dev, attr, buf, len, change_proto_down);
 634}
 635NETDEVICE_SHOW_RW(proto_down, fmt_dec);
 636
 637static ssize_t phys_port_id_show(struct device *dev,
 638				 struct device_attribute *attr, char *buf)
 639{
 640	struct net_device *netdev = to_net_dev(dev);
 641	struct netdev_phys_item_id ppid;
 642	ssize_t ret;
 643
 644	ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
 645	if (ret)
 646		return ret;
 647
 648	ret = dev_get_phys_port_id(netdev, &ppid);
 649	if (!ret)
 650		ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id);
 651
 652	rtnl_unlock();
 653
 654	return ret;
 655}
 656static DEVICE_ATTR_RO(phys_port_id);
 657
 658static ssize_t phys_port_name_show(struct device *dev,
 659				   struct device_attribute *attr, char *buf)
 660{
 661	struct net_device *netdev = to_net_dev(dev);
 662	char name[IFNAMSIZ];
 663	ssize_t ret;
 664
 665	ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
 666	if (ret)
 667		return ret;
 668
 669	ret = dev_get_phys_port_name(netdev, name, sizeof(name));
 670	if (!ret)
 671		ret = sysfs_emit(buf, "%s\n", name);
 672
 673	rtnl_unlock();
 674
 675	return ret;
 676}
 677static DEVICE_ATTR_RO(phys_port_name);
 678
 679static ssize_t phys_switch_id_show(struct device *dev,
 680				   struct device_attribute *attr, char *buf)
 681{
 682	struct net_device *netdev = to_net_dev(dev);
 683	struct netdev_phys_item_id ppid = { };
 684	ssize_t ret;
 685
 686	ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
 687	if (ret)
 688		return ret;
 689
 690	ret = netif_get_port_parent_id(netdev, &ppid, false);
 691	if (!ret)
 692		ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id);
 693
 694	rtnl_unlock();
 695
 696	return ret;
 697}
 698static DEVICE_ATTR_RO(phys_switch_id);
 699
 700static struct attribute *netdev_phys_attrs[] __ro_after_init = {
 701	&dev_attr_phys_port_id.attr,
 702	&dev_attr_phys_port_name.attr,
 703	&dev_attr_phys_switch_id.attr,
 704	NULL,
 705};
 706
 707static umode_t netdev_phys_is_visible(struct kobject *kobj,
 708				      struct attribute *attr, int index)
 709{
 710	struct device *dev = kobj_to_dev(kobj);
 711	struct net_device *netdev = to_net_dev(dev);
 712
 713	if (attr == &dev_attr_phys_port_id.attr) {
 714		if (!netdev->netdev_ops->ndo_get_phys_port_id)
 715			return 0;
 716	} else if (attr == &dev_attr_phys_port_name.attr) {
 717		if (!netdev->netdev_ops->ndo_get_phys_port_name &&
 718		    !netdev->devlink_port)
 719			return 0;
 720	} else if (attr == &dev_attr_phys_switch_id.attr) {
 721		if (!netdev->netdev_ops->ndo_get_port_parent_id &&
 722		    !netdev->devlink_port)
 723			return 0;
 724	}
 725
 726	return attr->mode;
 727}
 728
 729static const struct attribute_group netdev_phys_group = {
 730	.attrs = netdev_phys_attrs,
 731	.is_visible = netdev_phys_is_visible,
 732};
 733
 734static ssize_t threaded_show(struct device *dev,
 735			     struct device_attribute *attr, char *buf)
 736{
 737	struct net_device *netdev = to_net_dev(dev);
 738	ssize_t ret = -EINVAL;
 739
 740	rcu_read_lock();
 741
 742	if (dev_isalive(netdev))
 743		ret = sysfs_emit(buf, fmt_dec, READ_ONCE(netdev->threaded));
 744
 745	rcu_read_unlock();
 746
 747	return ret;
 748}
 749
 750static int modify_napi_threaded(struct net_device *dev, unsigned long val)
 751{
 752	int ret;
 753
 754	if (list_empty(&dev->napi_list))
 755		return -EOPNOTSUPP;
 756
 757	if (val != 0 && val != 1)
 758		return -EOPNOTSUPP;
 759
 760	ret = netif_set_threaded(dev, val);
 761
 762	return ret;
 763}
 764
 765static ssize_t threaded_store(struct device *dev,
 766			      struct device_attribute *attr,
 767			      const char *buf, size_t len)
 768{
 769	return netdev_lock_store(dev, attr, buf, len, modify_napi_threaded);
 770}
 771static DEVICE_ATTR_RW(threaded);
 772
 773static struct attribute *net_class_attrs[] __ro_after_init = {
 774	&dev_attr_netdev_group.attr,
 775	&dev_attr_type.attr,
 776	&dev_attr_dev_id.attr,
 777	&dev_attr_dev_port.attr,
 778	&dev_attr_iflink.attr,
 779	&dev_attr_ifindex.attr,
 780	&dev_attr_name_assign_type.attr,
 781	&dev_attr_addr_assign_type.attr,
 782	&dev_attr_addr_len.attr,
 783	&dev_attr_link_mode.attr,
 784	&dev_attr_address.attr,
 785	&dev_attr_broadcast.attr,
 786	&dev_attr_speed.attr,
 787	&dev_attr_duplex.attr,
 788	&dev_attr_dormant.attr,
 789	&dev_attr_testing.attr,
 790	&dev_attr_operstate.attr,
 791	&dev_attr_carrier_changes.attr,
 792	&dev_attr_ifalias.attr,
 793	&dev_attr_carrier.attr,
 794	&dev_attr_mtu.attr,
 795	&dev_attr_flags.attr,
 796	&dev_attr_tx_queue_len.attr,
 797	&dev_attr_gro_flush_timeout.attr,
 798	&dev_attr_napi_defer_hard_irqs.attr,
 799	&dev_attr_proto_down.attr,
 800	&dev_attr_carrier_up_count.attr,
 801	&dev_attr_carrier_down_count.attr,
 802	&dev_attr_threaded.attr,
 803	NULL,
 804};
 805ATTRIBUTE_GROUPS(net_class);
 806
 807/* Show a given an attribute in the statistics group */
 808static ssize_t netstat_show(const struct device *d,
 809			    struct device_attribute *attr, char *buf,
 810			    unsigned long offset)
 811{
 812	struct net_device *dev = to_net_dev(d);
 813	ssize_t ret = -EINVAL;
 814
 815	WARN_ON(offset > sizeof(struct rtnl_link_stats64) ||
 816		offset % sizeof(u64) != 0);
 817
 818	rcu_read_lock();
 819	if (dev_isalive(dev)) {
 820		struct rtnl_link_stats64 temp;
 821		const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
 822
 823		ret = sysfs_emit(buf, fmt_u64, *(u64 *)(((u8 *)stats) + offset));
 824	}
 825	rcu_read_unlock();
 826	return ret;
 827}
 828
 829/* generate a read-only statistics attribute */
 830#define NETSTAT_ENTRY(name)						\
 831static ssize_t name##_show(struct device *d,				\
 832			   struct device_attribute *attr, char *buf)	\
 833{									\
 834	return netstat_show(d, attr, buf,				\
 835			    offsetof(struct rtnl_link_stats64, name));	\
 836}									\
 837static DEVICE_ATTR_RO(name)
 838
 839NETSTAT_ENTRY(rx_packets);
 840NETSTAT_ENTRY(tx_packets);
 841NETSTAT_ENTRY(rx_bytes);
 842NETSTAT_ENTRY(tx_bytes);
 843NETSTAT_ENTRY(rx_errors);
 844NETSTAT_ENTRY(tx_errors);
 845NETSTAT_ENTRY(rx_dropped);
 846NETSTAT_ENTRY(tx_dropped);
 847NETSTAT_ENTRY(multicast);
 848NETSTAT_ENTRY(collisions);
 849NETSTAT_ENTRY(rx_length_errors);
 850NETSTAT_ENTRY(rx_over_errors);
 851NETSTAT_ENTRY(rx_crc_errors);
 852NETSTAT_ENTRY(rx_frame_errors);
 853NETSTAT_ENTRY(rx_fifo_errors);
 854NETSTAT_ENTRY(rx_missed_errors);
 855NETSTAT_ENTRY(tx_aborted_errors);
 856NETSTAT_ENTRY(tx_carrier_errors);
 857NETSTAT_ENTRY(tx_fifo_errors);
 858NETSTAT_ENTRY(tx_heartbeat_errors);
 859NETSTAT_ENTRY(tx_window_errors);
 860NETSTAT_ENTRY(rx_compressed);
 861NETSTAT_ENTRY(tx_compressed);
 862NETSTAT_ENTRY(rx_nohandler);
 863
 864static struct attribute *netstat_attrs[] __ro_after_init = {
 865	&dev_attr_rx_packets.attr,
 866	&dev_attr_tx_packets.attr,
 867	&dev_attr_rx_bytes.attr,
 868	&dev_attr_tx_bytes.attr,
 869	&dev_attr_rx_errors.attr,
 870	&dev_attr_tx_errors.attr,
 871	&dev_attr_rx_dropped.attr,
 872	&dev_attr_tx_dropped.attr,
 873	&dev_attr_multicast.attr,
 874	&dev_attr_collisions.attr,
 875	&dev_attr_rx_length_errors.attr,
 876	&dev_attr_rx_over_errors.attr,
 877	&dev_attr_rx_crc_errors.attr,
 878	&dev_attr_rx_frame_errors.attr,
 879	&dev_attr_rx_fifo_errors.attr,
 880	&dev_attr_rx_missed_errors.attr,
 881	&dev_attr_tx_aborted_errors.attr,
 882	&dev_attr_tx_carrier_errors.attr,
 883	&dev_attr_tx_fifo_errors.attr,
 884	&dev_attr_tx_heartbeat_errors.attr,
 885	&dev_attr_tx_window_errors.attr,
 886	&dev_attr_rx_compressed.attr,
 887	&dev_attr_tx_compressed.attr,
 888	&dev_attr_rx_nohandler.attr,
 889	NULL
 890};
 891
 892static const struct attribute_group netstat_group = {
 893	.name  = "statistics",
 894	.attrs  = netstat_attrs,
 895};
 896
 897static struct attribute *wireless_attrs[] = {
 898	NULL
 899};
 900
 901static const struct attribute_group wireless_group = {
 902	.name = "wireless",
 903	.attrs = wireless_attrs,
 904};
 905
 906static bool wireless_group_needed(struct net_device *ndev)
 907{
 908#if IS_ENABLED(CONFIG_CFG80211)
 909	if (ndev->ieee80211_ptr)
 910		return true;
 911#endif
 912#if IS_ENABLED(CONFIG_WIRELESS_EXT)
 913	if (ndev->wireless_handlers)
 914		return true;
 915#endif
 916	return false;
 917}
 918
 919#else /* CONFIG_SYSFS */
 920#define net_class_groups	NULL
 921#endif /* CONFIG_SYSFS */
 922
 923#ifdef CONFIG_SYSFS
 924#define to_rx_queue_attr(_attr) \
 925	container_of(_attr, struct rx_queue_attribute, attr)
 926
 927#define to_rx_queue(obj) container_of(obj, struct netdev_rx_queue, kobj)
 928
 929static ssize_t rx_queue_attr_show(struct kobject *kobj, struct attribute *attr,
 930				  char *buf)
 931{
 932	const struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
 933	struct netdev_rx_queue *queue = to_rx_queue(kobj);
 934
 935	if (!attribute->show)
 936		return -EIO;
 937
 938	return attribute->show(queue, buf);
 939}
 940
 941static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr,
 942				   const char *buf, size_t count)
 943{
 944	const struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
 945	struct netdev_rx_queue *queue = to_rx_queue(kobj);
 946
 947	if (!attribute->store)
 948		return -EIO;
 949
 950	return attribute->store(queue, buf, count);
 951}
 952
 953static const struct sysfs_ops rx_queue_sysfs_ops = {
 954	.show = rx_queue_attr_show,
 955	.store = rx_queue_attr_store,
 956};
 957
 958#ifdef CONFIG_RPS
 959static ssize_t show_rps_map(struct netdev_rx_queue *queue, char *buf)
 960{
 961	struct rps_map *map;
 962	cpumask_var_t mask;
 963	int i, len;
 964
 965	if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
 966		return -ENOMEM;
 967
 968	rcu_read_lock();
 969	map = rcu_dereference(queue->rps_map);
 970	if (map)
 971		for (i = 0; i < map->len; i++)
 972			cpumask_set_cpu(map->cpus[i], mask);
 973
 974	len = sysfs_emit(buf, "%*pb\n", cpumask_pr_args(mask));
 975	rcu_read_unlock();
 976	free_cpumask_var(mask);
 977
 978	return len < PAGE_SIZE ? len : -EINVAL;
 979}
 980
 981static int netdev_rx_queue_set_rps_mask(struct netdev_rx_queue *queue,
 982					cpumask_var_t mask)
 983{
 984	static DEFINE_MUTEX(rps_map_mutex);
 985	struct rps_map *old_map, *map;
 986	int cpu, i;
 987
 988	map = kzalloc(max_t(unsigned int,
 989			    RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES),
 990		      GFP_KERNEL);
 991	if (!map)
 992		return -ENOMEM;
 993
 994	i = 0;
 995	for_each_cpu_and(cpu, mask, cpu_online_mask)
 996		map->cpus[i++] = cpu;
 997
 998	if (i) {
 999		map->len = i;
1000	} else {
1001		kfree(map);
1002		map = NULL;
1003	}
1004
1005	mutex_lock(&rps_map_mutex);
1006	old_map = rcu_dereference_protected(queue->rps_map,
1007					    mutex_is_locked(&rps_map_mutex));
1008	rcu_assign_pointer(queue->rps_map, map);
1009
1010	if (map)
1011		static_branch_inc(&rps_needed);
1012	if (old_map)
1013		static_branch_dec(&rps_needed);
1014
1015	mutex_unlock(&rps_map_mutex);
1016
1017	if (old_map)
1018		kfree_rcu(old_map, rcu);
1019	return 0;
1020}
1021
1022int rps_cpumask_housekeeping(struct cpumask *mask)
1023{
1024	if (!cpumask_empty(mask)) {
1025		cpumask_and(mask, mask, housekeeping_cpumask(HK_TYPE_DOMAIN_BOOT));
1026		cpumask_and(mask, mask, housekeeping_cpumask(HK_TYPE_WQ));
1027		if (cpumask_empty(mask))
1028			return -EINVAL;
1029	}
1030	return 0;
1031}
1032
1033static ssize_t store_rps_map(struct netdev_rx_queue *queue,
1034			     const char *buf, size_t len)
1035{
1036	cpumask_var_t mask;
1037	int err;
1038
1039	if (!capable(CAP_NET_ADMIN))
1040		return -EPERM;
1041
1042	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
1043		return -ENOMEM;
1044
1045	err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
1046	if (err)
1047		goto out;
1048
1049	err = rps_cpumask_housekeeping(mask);
1050	if (err)
1051		goto out;
1052
1053	err = netdev_rx_queue_set_rps_mask(queue, mask);
1054
1055out:
1056	free_cpumask_var(mask);
1057	return err ? : len;
1058}
1059
1060static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
1061					   char *buf)
1062{
1063	unsigned long val = 0;
1064	rps_tag_ptr tag_ptr;
1065
1066	tag_ptr = READ_ONCE(queue->rps_flow_table);
1067	if (tag_ptr)
1068		val = 1UL << rps_tag_to_log(tag_ptr);
1069
1070	return sysfs_emit(buf, "%lu\n", val);
1071}
1072
1073static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
1074					    const char *buf, size_t len)
1075{
1076	rps_tag_ptr otag, tag_ptr = 0UL;
1077	struct rps_dev_flow *table;
1078	unsigned long mask, count;
1079	size_t sz;
1080	int rc;
1081
1082	if (!capable(CAP_NET_ADMIN))
1083		return -EPERM;
1084
1085	rc = kstrtoul(buf, 0, &count);
1086	if (rc < 0)
1087		return rc;
1088
1089	if (count) {
1090		mask = count - 1;
1091		/* mask = roundup_pow_of_two(count) - 1;
1092		 * without overflows...
1093		 */
1094		while ((mask | (mask >> 1)) != mask)
1095			mask |= (mask >> 1);
1096
1097		/* Do not accept too large tables. */
1098		if (mask > (INT_MAX / sizeof(*table) - 1))
1099			return -EINVAL;
1100
1101		sz = max_t(size_t, sizeof(*table) * (mask + 1),
1102			   PAGE_SIZE);
1103		if (sz <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER) ||
1104		    is_power_of_2(sizeof(*table)))
1105			table = kvmalloc(sz, GFP_KERNEL);
1106		else
1107			table = vmalloc(sz);
1108		if (!table)
1109			return -ENOMEM;
1110		tag_ptr = (rps_tag_ptr)table;
1111		if (rps_tag_to_log(tag_ptr)) {
1112			pr_err_once("store_rps_dev_flow_table_cnt() got a non page aligned allocation.\n");
1113			kvfree(table);
1114			return -ENOMEM;
1115		}
1116		tag_ptr |= (ilog2(mask) + 1);
1117		for (count = 0; count <= mask; count++) {
1118			table[count].cpu = RPS_NO_CPU;
1119			table[count].filter = RPS_NO_FILTER;
1120		}
1121	}
1122
1123	otag = xchg(&queue->rps_flow_table, tag_ptr);
1124	if (otag)
1125		kvfree_rcu_mightsleep(rps_tag_to_table(otag));
1126
1127	return len;
1128}
1129
1130static struct rx_queue_attribute rps_cpus_attribute __ro_after_init
1131	= __ATTR(rps_cpus, 0644, show_rps_map, store_rps_map);
1132
1133static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute __ro_after_init
1134	= __ATTR(rps_flow_cnt, 0644,
1135		 show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt);
1136#endif /* CONFIG_RPS */
1137
1138static struct attribute *rx_queue_default_attrs[] __ro_after_init = {
1139#ifdef CONFIG_RPS
1140	&rps_cpus_attribute.attr,
1141	&rps_dev_flow_table_cnt_attribute.attr,
1142#endif
1143	NULL
1144};
1145ATTRIBUTE_GROUPS(rx_queue_default);
1146
1147static void rx_queue_release(struct kobject *kobj)
1148{
1149	struct netdev_rx_queue *queue = to_rx_queue(kobj);
1150#ifdef CONFIG_RPS
1151	rps_tag_ptr tag_ptr;
1152	struct rps_map *map;
1153
1154	map = rcu_dereference_protected(queue->rps_map, 1);
1155	if (map) {
1156		RCU_INIT_POINTER(queue->rps_map, NULL);
1157		kfree_rcu(map, rcu);
1158	}
1159
1160	tag_ptr = xchg(&queue->rps_flow_table, 0UL);
1161	if (tag_ptr)
1162		kvfree_rcu_mightsleep(rps_tag_to_table(tag_ptr));
1163#endif
1164
1165	memset(kobj, 0, sizeof(*kobj));
1166	netdev_put(queue->dev, &queue->dev_tracker);
1167}
1168
1169static const struct ns_common *rx_queue_namespace(const struct kobject *kobj)
1170{
1171	struct netdev_rx_queue *queue = to_rx_queue(kobj);
1172	struct device *dev = &queue->dev->dev;
1173
1174	if (dev->class && dev->class->namespace)
1175		return dev->class->namespace(dev);
1176
1177	return NULL;
1178}
1179
1180static void rx_queue_get_ownership(const struct kobject *kobj,
1181				   kuid_t *uid, kgid_t *gid)
1182{
1183	const struct ns_common *ns = rx_queue_namespace(kobj);
1184
1185	net_ns_get_ownership(ns ? container_of(ns, struct net, ns) : NULL,
1186			     uid, gid);
1187}
1188
1189static const struct kobj_type rx_queue_ktype = {
1190	.sysfs_ops = &rx_queue_sysfs_ops,
1191	.release = rx_queue_release,
1192	.namespace = rx_queue_namespace,
1193	.get_ownership = rx_queue_get_ownership,
1194};
1195
1196static int rx_queue_default_mask(struct net_device *dev,
1197				 struct netdev_rx_queue *queue)
1198{
1199#if IS_ENABLED(CONFIG_RPS) && IS_ENABLED(CONFIG_SYSCTL)
1200	struct cpumask *rps_default_mask;
1201	int res = 0;
1202
1203	mutex_lock(&rps_default_mask_mutex);
1204
1205	rps_default_mask = dev_net(dev)->core.rps_default_mask;
1206	if (rps_default_mask && !cpumask_empty(rps_default_mask))
1207		res = netdev_rx_queue_set_rps_mask(queue, rps_default_mask);
1208
1209	mutex_unlock(&rps_default_mask_mutex);
1210
1211	return res;
1212#else
1213	return 0;
1214#endif
1215}
1216
1217static int rx_queue_add_kobject(struct net_device *dev, int index)
1218{
1219	struct netdev_rx_queue *queue = dev->_rx + index;
1220	struct kobject *kobj = &queue->kobj;
1221	int error = 0;
1222
1223	/* Rx queues are cleared in rx_queue_release to allow later
1224	 * re-registration. This is triggered when their kobj refcount is
1225	 * dropped.
1226	 *
1227	 * If a queue is removed while both a read (or write) operation and a
1228	 * the re-addition of the same queue are pending (waiting on rntl_lock)
1229	 * it might happen that the re-addition will execute before the read,
1230	 * making the initial removal to never happen (queue's kobj refcount
1231	 * won't drop enough because of the pending read). In such rare case,
1232	 * return to allow the removal operation to complete.
1233	 */
1234	if (unlikely(kobj->state_initialized)) {
1235		netdev_warn_once(dev, "Cannot re-add rx queues before their removal completed");
1236		return -EAGAIN;
1237	}
1238
1239	/* Kobject_put later will trigger rx_queue_release call which
1240	 * decreases dev refcount: Take that reference here
1241	 */
1242	netdev_hold(queue->dev, &queue->dev_tracker, GFP_KERNEL);
1243
1244	kobj->kset = dev->queues_kset;
1245	error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL,
1246				     "rx-%u", index);
1247	if (error)
1248		goto err;
1249
1250	queue->groups = rx_queue_default_groups;
1251	error = sysfs_create_groups(kobj, queue->groups);
1252	if (error)
1253		goto err;
1254
1255	if (dev->sysfs_rx_queue_group) {
1256		error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group);
1257		if (error)
1258			goto err_default_groups;
1259	}
1260
1261	error = rx_queue_default_mask(dev, queue);
1262	if (error)
1263		goto err_default_groups;
1264
1265	kobject_uevent(kobj, KOBJ_ADD);
1266
1267	return error;
1268
1269err_default_groups:
1270	sysfs_remove_groups(kobj, queue->groups);
1271err:
1272	kobject_put(kobj);
1273	return error;
1274}
1275
1276static int rx_queue_change_owner(struct net_device *dev, int index, kuid_t kuid,
1277				 kgid_t kgid)
1278{
1279	struct netdev_rx_queue *queue = dev->_rx + index;
1280	struct kobject *kobj = &queue->kobj;
1281	int error;
1282
1283	error = sysfs_change_owner(kobj, kuid, kgid);
1284	if (error)
1285		return error;
1286
1287	if (dev->sysfs_rx_queue_group)
1288		error = sysfs_group_change_owner(
1289			kobj, dev->sysfs_rx_queue_group, kuid, kgid);
1290
1291	return error;
1292}
1293#endif /* CONFIG_SYSFS */
1294
1295int
1296net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
1297{
1298#ifdef CONFIG_SYSFS
1299	int i;
1300	int error = 0;
1301
1302#ifndef CONFIG_RPS
1303	if (!dev->sysfs_rx_queue_group)
1304		return 0;
1305#endif
1306	for (i = old_num; i < new_num; i++) {
1307		error = rx_queue_add_kobject(dev, i);
1308		if (error) {
1309			new_num = old_num;
1310			break;
1311		}
1312	}
1313
1314	while (--i >= new_num) {
1315		struct netdev_rx_queue *queue = &dev->_rx[i];
1316		struct kobject *kobj = &queue->kobj;
1317
1318		if (!check_net(dev_net(dev)))
1319			kobj->uevent_suppress = 1;
1320		if (dev->sysfs_rx_queue_group)
1321			sysfs_remove_group(kobj, dev->sysfs_rx_queue_group);
1322		sysfs_remove_groups(kobj, queue->groups);
1323		kobject_put(kobj);
1324	}
1325
1326	return error;
1327#else
1328	return 0;
1329#endif
1330}
1331
1332static int net_rx_queue_change_owner(struct net_device *dev, int num,
1333				     kuid_t kuid, kgid_t kgid)
1334{
1335#ifdef CONFIG_SYSFS
1336	int error = 0;
1337	int i;
1338
1339#ifndef CONFIG_RPS
1340	if (!dev->sysfs_rx_queue_group)
1341		return 0;
1342#endif
1343	for (i = 0; i < num; i++) {
1344		error = rx_queue_change_owner(dev, i, kuid, kgid);
1345		if (error)
1346			break;
1347	}
1348
1349	return error;
1350#else
1351	return 0;
1352#endif
1353}
1354
1355#ifdef CONFIG_SYSFS
1356/*
1357 * netdev_queue sysfs structures and functions.
1358 */
1359struct netdev_queue_attribute {
1360	struct attribute attr;
1361	ssize_t (*show)(struct kobject *kobj, struct attribute *attr,
1362			struct netdev_queue *queue, char *buf);
1363	ssize_t (*store)(struct kobject *kobj, struct attribute *attr,
1364			 struct netdev_queue *queue, const char *buf,
1365			 size_t len);
1366};
1367#define to_netdev_queue_attr(_attr) \
1368	container_of(_attr, struct netdev_queue_attribute, attr)
1369
1370#define to_netdev_queue(obj) container_of(obj, struct netdev_queue, kobj)
1371
1372static ssize_t netdev_queue_attr_show(struct kobject *kobj,
1373				      struct attribute *attr, char *buf)
1374{
1375	const struct netdev_queue_attribute *attribute
1376		= to_netdev_queue_attr(attr);
1377	struct netdev_queue *queue = to_netdev_queue(kobj);
1378
1379	if (!attribute->show)
1380		return -EIO;
1381
1382	return attribute->show(kobj, attr, queue, buf);
1383}
1384
1385static ssize_t netdev_queue_attr_store(struct kobject *kobj,
1386				       struct attribute *attr,
1387				       const char *buf, size_t count)
1388{
1389	const struct netdev_queue_attribute *attribute
1390		= to_netdev_queue_attr(attr);
1391	struct netdev_queue *queue = to_netdev_queue(kobj);
1392
1393	if (!attribute->store)
1394		return -EIO;
1395
1396	return attribute->store(kobj, attr, queue, buf, count);
1397}
1398
1399static const struct sysfs_ops netdev_queue_sysfs_ops = {
1400	.show = netdev_queue_attr_show,
1401	.store = netdev_queue_attr_store,
1402};
1403
1404static ssize_t tx_timeout_show(struct kobject *kobj, struct attribute *attr,
1405			       struct netdev_queue *queue, char *buf)
1406{
1407	unsigned long trans_timeout = atomic_long_read(&queue->trans_timeout);
1408
1409	return sysfs_emit(buf, fmt_ulong, trans_timeout);
1410}
1411
1412static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
1413{
1414	struct net_device *dev = queue->dev;
1415	unsigned int i;
1416
1417	i = queue - dev->_tx;
1418	BUG_ON(i >= dev->num_tx_queues);
1419
1420	return i;
1421}
1422
1423static ssize_t traffic_class_show(struct kobject *kobj, struct attribute *attr,
1424				  struct netdev_queue *queue, char *buf)
1425{
1426	struct net_device *dev = queue->dev;
1427	int num_tc, tc, index, ret;
1428
1429	if (!netif_is_multiqueue(dev))
1430		return -ENOENT;
1431
1432	ret = sysfs_rtnl_lock(kobj, attr, queue->dev);
1433	if (ret)
1434		return ret;
1435
1436	index = get_netdev_queue_index(queue);
1437
1438	/* If queue belongs to subordinate dev use its TC mapping */
1439	dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
1440
1441	num_tc = dev->num_tc;
1442	tc = netdev_txq_to_tc(dev, index);
1443
1444	rtnl_unlock();
1445
1446	if (tc < 0)
1447		return -EINVAL;
1448
1449	/* We can report the traffic class one of two ways:
1450	 * Subordinate device traffic classes are reported with the traffic
1451	 * class first, and then the subordinate class so for example TC0 on
1452	 * subordinate device 2 will be reported as "0-2". If the queue
1453	 * belongs to the root device it will be reported with just the
1454	 * traffic class, so just "0" for TC 0 for example.
1455	 */
1456	return num_tc < 0 ? sysfs_emit(buf, "%d%d\n", tc, num_tc) :
1457			    sysfs_emit(buf, "%d\n", tc);
1458}
1459
1460#ifdef CONFIG_XPS
1461static ssize_t tx_maxrate_show(struct kobject *kobj, struct attribute *attr,
1462			       struct netdev_queue *queue, char *buf)
1463{
1464	return sysfs_emit(buf, "%lu\n", queue->tx_maxrate);
1465}
1466
1467static ssize_t tx_maxrate_store(struct kobject *kobj, struct attribute *attr,
1468				struct netdev_queue *queue, const char *buf,
1469				size_t len)
1470{
1471	int err, index = get_netdev_queue_index(queue);
1472	struct net_device *dev = queue->dev;
1473	u32 rate = 0;
1474
1475	if (!capable(CAP_NET_ADMIN))
1476		return -EPERM;
1477
1478	/* The check is also done later; this helps returning early without
1479	 * hitting the locking section below.
1480	 */
1481	if (!dev->netdev_ops->ndo_set_tx_maxrate)
1482		return -EOPNOTSUPP;
1483
1484	err = kstrtou32(buf, 10, &rate);
1485	if (err < 0)
1486		return err;
1487
1488	err = sysfs_rtnl_lock(kobj, attr, dev);
1489	if (err)
1490		return err;
1491
1492	err = -EOPNOTSUPP;
1493	netdev_lock_ops(dev);
1494	if (dev->netdev_ops->ndo_set_tx_maxrate)
1495		err = dev->netdev_ops->ndo_set_tx_maxrate(dev, index, rate);
1496	netdev_unlock_ops(dev);
1497
1498	if (!err) {
1499		queue->tx_maxrate = rate;
1500		rtnl_unlock();
1501		return len;
1502	}
1503
1504	rtnl_unlock();
1505	return err;
1506}
1507
1508static struct netdev_queue_attribute queue_tx_maxrate __ro_after_init
1509	= __ATTR_RW(tx_maxrate);
1510#endif
1511
1512static struct netdev_queue_attribute queue_trans_timeout __ro_after_init
1513	= __ATTR_RO(tx_timeout);
1514
1515static struct netdev_queue_attribute queue_traffic_class __ro_after_init
1516	= __ATTR_RO(traffic_class);
1517
1518#ifdef CONFIG_BQL
1519/*
1520 * Byte queue limits sysfs structures and functions.
1521 */
1522static ssize_t bql_show(char *buf, unsigned int value)
1523{
1524	return sysfs_emit(buf, "%u\n", value);
1525}
1526
1527static ssize_t bql_set(const char *buf, const size_t count,
1528		       unsigned int *pvalue)
1529{
1530	unsigned int value;
1531	int err;
1532
1533	if (!strcmp(buf, "max") || !strcmp(buf, "max\n")) {
1534		value = DQL_MAX_LIMIT;
1535	} else {
1536		err = kstrtouint(buf, 10, &value);
1537		if (err < 0)
1538			return err;
1539		if (value > DQL_MAX_LIMIT)
1540			return -EINVAL;
1541	}
1542
1543	*pvalue = value;
1544
1545	return count;
1546}
1547
1548static ssize_t bql_show_hold_time(struct kobject *kobj, struct attribute *attr,
1549				  struct netdev_queue *queue, char *buf)
1550{
1551	struct dql *dql = &queue->dql;
1552
1553	return sysfs_emit(buf, "%u\n", jiffies_to_msecs(dql->slack_hold_time));
1554}
1555
1556static ssize_t bql_set_hold_time(struct kobject *kobj, struct attribute *attr,
1557				 struct netdev_queue *queue, const char *buf,
1558				 size_t len)
1559{
1560	struct dql *dql = &queue->dql;
1561	unsigned int value;
1562	int err;
1563
1564	err = kstrtouint(buf, 10, &value);
1565	if (err < 0)
1566		return err;
1567
1568	dql->slack_hold_time = msecs_to_jiffies(value);
1569
1570	return len;
1571}
1572
1573static struct netdev_queue_attribute bql_hold_time_attribute __ro_after_init
1574	= __ATTR(hold_time, 0644,
1575		 bql_show_hold_time, bql_set_hold_time);
1576
1577static ssize_t bql_show_stall_thrs(struct kobject *kobj, struct attribute *attr,
1578				   struct netdev_queue *queue, char *buf)
1579{
1580	struct dql *dql = &queue->dql;
1581
1582	return sysfs_emit(buf, "%u\n", jiffies_to_msecs(dql->stall_thrs));
1583}
1584
1585static ssize_t bql_set_stall_thrs(struct kobject *kobj, struct attribute *attr,
1586				  struct netdev_queue *queue, const char *buf,
1587				  size_t len)
1588{
1589	struct dql *dql = &queue->dql;
1590	unsigned int value;
1591	int err;
1592
1593	err = kstrtouint(buf, 10, &value);
1594	if (err < 0)
1595		return err;
1596
1597	value = msecs_to_jiffies(value);
1598	if (value && (value < 4 || value > 4 / 2 * BITS_PER_LONG))
1599		return -ERANGE;
1600
1601	if (!dql->stall_thrs && value)
1602		dql->last_reap = jiffies;
1603	/* Force last_reap to be live */
1604	smp_wmb();
1605	dql->stall_thrs = value;
1606
1607	return len;
1608}
1609
1610static struct netdev_queue_attribute bql_stall_thrs_attribute __ro_after_init =
1611	__ATTR(stall_thrs, 0644, bql_show_stall_thrs, bql_set_stall_thrs);
1612
1613static ssize_t bql_show_stall_max(struct kobject *kobj, struct attribute *attr,
1614				  struct netdev_queue *queue, char *buf)
1615{
1616	return sysfs_emit(buf, "%u\n", READ_ONCE(queue->dql.stall_max));
1617}
1618
1619static ssize_t bql_set_stall_max(struct kobject *kobj, struct attribute *attr,
1620				 struct netdev_queue *queue, const char *buf,
1621				 size_t len)
1622{
1623	WRITE_ONCE(queue->dql.stall_max, 0);
1624	return len;
1625}
1626
1627static struct netdev_queue_attribute bql_stall_max_attribute __ro_after_init =
1628	__ATTR(stall_max, 0644, bql_show_stall_max, bql_set_stall_max);
1629
1630static ssize_t bql_show_stall_cnt(struct kobject *kobj, struct attribute *attr,
1631				  struct netdev_queue *queue, char *buf)
1632{
1633	struct dql *dql = &queue->dql;
1634
1635	return sysfs_emit(buf, "%lu\n", dql->stall_cnt);
1636}
1637
1638static struct netdev_queue_attribute bql_stall_cnt_attribute __ro_after_init =
1639	__ATTR(stall_cnt, 0444, bql_show_stall_cnt, NULL);
1640
1641static ssize_t bql_show_inflight(struct kobject *kobj, struct attribute *attr,
1642				 struct netdev_queue *queue, char *buf)
1643{
1644	struct dql *dql = &queue->dql;
1645
1646	return sysfs_emit(buf, "%u\n", dql->num_queued - dql->num_completed);
1647}
1648
1649static struct netdev_queue_attribute bql_inflight_attribute __ro_after_init =
1650	__ATTR(inflight, 0444, bql_show_inflight, NULL);
1651
1652#define BQL_ATTR(NAME, FIELD)						\
1653static ssize_t bql_show_ ## NAME(struct kobject *kobj,			\
1654				 struct attribute *attr,		\
1655				 struct netdev_queue *queue, char *buf)	\
1656{									\
1657	return bql_show(buf, queue->dql.FIELD);				\
1658}									\
1659									\
1660static ssize_t bql_set_ ## NAME(struct kobject *kobj,			\
1661				struct attribute *attr,			\
1662				struct netdev_queue *queue,		\
1663				const char *buf, size_t len)		\
1664{									\
1665	return bql_set(buf, len, &queue->dql.FIELD);			\
1666}									\
1667									\
1668static struct netdev_queue_attribute bql_ ## NAME ## _attribute __ro_after_init \
1669	= __ATTR(NAME, 0644,				\
1670		 bql_show_ ## NAME, bql_set_ ## NAME)
1671
1672BQL_ATTR(limit, limit);
1673BQL_ATTR(limit_max, max_limit);
1674BQL_ATTR(limit_min, min_limit);
1675
1676static struct attribute *dql_attrs[] __ro_after_init = {
1677	&bql_limit_attribute.attr,
1678	&bql_limit_max_attribute.attr,
1679	&bql_limit_min_attribute.attr,
1680	&bql_hold_time_attribute.attr,
1681	&bql_inflight_attribute.attr,
1682	&bql_stall_thrs_attribute.attr,
1683	&bql_stall_cnt_attribute.attr,
1684	&bql_stall_max_attribute.attr,
1685	NULL
1686};
1687
1688static const struct attribute_group dql_group = {
1689	.name  = "byte_queue_limits",
1690	.attrs  = dql_attrs,
1691};
1692#else
1693/* Fake declaration, all the code using it should be dead */
1694static const struct attribute_group dql_group = {};
1695#endif /* CONFIG_BQL */
1696
1697#ifdef CONFIG_XPS
1698static ssize_t xps_queue_show(struct net_device *dev, unsigned int index,
1699			      int tc, char *buf, enum xps_map_type type)
1700{
1701	struct xps_dev_maps *dev_maps;
1702	unsigned long *mask;
1703	unsigned int nr_ids;
1704	int j, len;
1705
1706	rcu_read_lock();
1707	dev_maps = rcu_dereference(dev->xps_maps[type]);
1708
1709	/* Default to nr_cpu_ids/dev->num_rx_queues and do not just return 0
1710	 * when dev_maps hasn't been allocated yet, to be backward compatible.
1711	 */
1712	nr_ids = dev_maps ? dev_maps->nr_ids :
1713		 (type == XPS_CPUS ? nr_cpu_ids : dev->num_rx_queues);
1714
1715	mask = bitmap_zalloc(nr_ids, GFP_NOWAIT);
1716	if (!mask) {
1717		rcu_read_unlock();
1718		return -ENOMEM;
1719	}
1720
1721	if (!dev_maps || tc >= dev_maps->num_tc)
1722		goto out_no_maps;
1723
1724	for (j = 0; j < nr_ids; j++) {
1725		int i, tci = j * dev_maps->num_tc + tc;
1726		struct xps_map *map;
1727
1728		map = rcu_dereference(dev_maps->attr_map[tci]);
1729		if (!map)
1730			continue;
1731
1732		for (i = map->len; i--;) {
1733			if (map->queues[i] == index) {
1734				__set_bit(j, mask);
1735				break;
1736			}
1737		}
1738	}
1739out_no_maps:
1740	rcu_read_unlock();
1741
1742	len = sysfs_emit(buf, "%*pb\n", nr_ids, mask);
1743	bitmap_free(mask);
1744
1745	return len < PAGE_SIZE ? len : -EINVAL;
1746}
1747
1748static ssize_t xps_cpus_show(struct kobject *kobj, struct attribute *attr,
1749			     struct netdev_queue *queue, char *buf)
1750{
1751	struct net_device *dev = queue->dev;
1752	unsigned int index;
1753	int len, tc, ret;
1754
1755	if (!netif_is_multiqueue(dev))
1756		return -ENOENT;
1757
1758	index = get_netdev_queue_index(queue);
1759
1760	ret = sysfs_rtnl_lock(kobj, attr, queue->dev);
1761	if (ret)
1762		return ret;
1763
1764	/* If queue belongs to subordinate dev use its map */
1765	dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
1766
1767	tc = netdev_txq_to_tc(dev, index);
1768	if (tc < 0) {
1769		rtnl_unlock();
1770		return -EINVAL;
1771	}
1772
1773	/* Increase the net device refcnt to make sure it won't be freed while
1774	 * xps_queue_show is running.
1775	 */
1776	dev_hold(dev);
1777	rtnl_unlock();
1778
1779	len = xps_queue_show(dev, index, tc, buf, XPS_CPUS);
1780
1781	dev_put(dev);
1782	return len;
1783}
1784
1785static ssize_t xps_cpus_store(struct kobject *kobj, struct attribute *attr,
1786			      struct netdev_queue *queue, const char *buf,
1787			      size_t len)
1788{
1789	struct net_device *dev = queue->dev;
1790	unsigned int index;
1791	cpumask_var_t mask;
1792	int err;
1793
1794	if (!netif_is_multiqueue(dev))
1795		return -ENOENT;
1796
1797	if (!capable(CAP_NET_ADMIN))
1798		return -EPERM;
1799
1800	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
1801		return -ENOMEM;
1802
1803	index = get_netdev_queue_index(queue);
1804
1805	err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
1806	if (err) {
1807		free_cpumask_var(mask);
1808		return err;
1809	}
1810
1811	err = sysfs_rtnl_lock(kobj, attr, dev);
1812	if (err) {
1813		free_cpumask_var(mask);
1814		return err;
1815	}
1816
1817	err = netif_set_xps_queue(dev, mask, index);
1818	rtnl_unlock();
1819
1820	free_cpumask_var(mask);
1821
1822	return err ? : len;
1823}
1824
1825static struct netdev_queue_attribute xps_cpus_attribute __ro_after_init
1826	= __ATTR_RW(xps_cpus);
1827
1828static ssize_t xps_rxqs_show(struct kobject *kobj, struct attribute *attr,
1829			     struct netdev_queue *queue, char *buf)
1830{
1831	struct net_device *dev = queue->dev;
1832	unsigned int index;
1833	int tc, ret;
1834
1835	index = get_netdev_queue_index(queue);
1836
1837	ret = sysfs_rtnl_lock(kobj, attr, dev);
1838	if (ret)
1839		return ret;
1840
1841	tc = netdev_txq_to_tc(dev, index);
1842
1843	/* Increase the net device refcnt to make sure it won't be freed while
1844	 * xps_queue_show is running.
1845	 */
1846	dev_hold(dev);
1847	rtnl_unlock();
1848
1849	ret = tc >= 0 ? xps_queue_show(dev, index, tc, buf, XPS_RXQS) : -EINVAL;
1850	dev_put(dev);
1851	return ret;
1852}
1853
1854static ssize_t xps_rxqs_store(struct kobject *kobj, struct attribute *attr,
1855			      struct netdev_queue *queue, const char *buf,
1856			      size_t len)
1857{
1858	struct net_device *dev = queue->dev;
1859	struct net *net = dev_net(dev);
1860	unsigned long *mask;
1861	unsigned int index;
1862	int err;
1863
1864	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1865		return -EPERM;
1866
1867	mask = bitmap_zalloc(dev->num_rx_queues, GFP_KERNEL);
1868	if (!mask)
1869		return -ENOMEM;
1870
1871	index = get_netdev_queue_index(queue);
1872
1873	err = bitmap_parse(buf, len, mask, dev->num_rx_queues);
1874	if (err) {
1875		bitmap_free(mask);
1876		return err;
1877	}
1878
1879	err = sysfs_rtnl_lock(kobj, attr, dev);
1880	if (err) {
1881		bitmap_free(mask);
1882		return err;
1883	}
1884
1885	cpus_read_lock();
1886	err = __netif_set_xps_queue(dev, mask, index, XPS_RXQS);
1887	cpus_read_unlock();
1888
1889	rtnl_unlock();
1890
1891	bitmap_free(mask);
1892	return err ? : len;
1893}
1894
1895static struct netdev_queue_attribute xps_rxqs_attribute __ro_after_init
1896	= __ATTR_RW(xps_rxqs);
1897#endif /* CONFIG_XPS */
1898
1899static struct attribute *netdev_queue_default_attrs[] __ro_after_init = {
1900	&queue_trans_timeout.attr,
1901	&queue_traffic_class.attr,
1902#ifdef CONFIG_XPS
1903	&xps_cpus_attribute.attr,
1904	&xps_rxqs_attribute.attr,
1905	&queue_tx_maxrate.attr,
1906#endif
1907	NULL
1908};
1909ATTRIBUTE_GROUPS(netdev_queue_default);
1910
1911static void netdev_queue_release(struct kobject *kobj)
1912{
1913	struct netdev_queue *queue = to_netdev_queue(kobj);
1914
1915	memset(kobj, 0, sizeof(*kobj));
1916	netdev_put(queue->dev, &queue->dev_tracker);
1917}
1918
1919static const struct ns_common *netdev_queue_namespace(const struct kobject *kobj)
1920{
1921	struct netdev_queue *queue = to_netdev_queue(kobj);
1922	struct device *dev = &queue->dev->dev;
1923
1924	if (dev->class && dev->class->namespace)
1925		return dev->class->namespace(dev);
1926
1927	return NULL;
1928}
1929
1930static void netdev_queue_get_ownership(const struct kobject *kobj,
1931				       kuid_t *uid, kgid_t *gid)
1932{
1933	const struct ns_common *ns = netdev_queue_namespace(kobj);
1934
1935	net_ns_get_ownership(ns ? container_of(ns, struct net, ns) : NULL,
1936			     uid, gid);
1937}
1938
1939static const struct kobj_type netdev_queue_ktype = {
1940	.sysfs_ops = &netdev_queue_sysfs_ops,
1941	.release = netdev_queue_release,
1942	.namespace = netdev_queue_namespace,
1943	.get_ownership = netdev_queue_get_ownership,
1944};
1945
1946static bool netdev_uses_bql(const struct net_device *dev)
1947{
1948	if (dev->lltx || (dev->priv_flags & IFF_NO_QUEUE))
1949		return false;
1950
1951	return IS_ENABLED(CONFIG_BQL);
1952}
1953
1954static int netdev_queue_add_kobject(struct net_device *dev, int index)
1955{
1956	struct netdev_queue *queue = dev->_tx + index;
1957	struct kobject *kobj = &queue->kobj;
1958	int error = 0;
1959
1960	/* Tx queues are cleared in netdev_queue_release to allow later
1961	 * re-registration. This is triggered when their kobj refcount is
1962	 * dropped.
1963	 *
1964	 * If a queue is removed while both a read (or write) operation and a
1965	 * the re-addition of the same queue are pending (waiting on rntl_lock)
1966	 * it might happen that the re-addition will execute before the read,
1967	 * making the initial removal to never happen (queue's kobj refcount
1968	 * won't drop enough because of the pending read). In such rare case,
1969	 * return to allow the removal operation to complete.
1970	 */
1971	if (unlikely(kobj->state_initialized)) {
1972		netdev_warn_once(dev, "Cannot re-add tx queues before their removal completed");
1973		return -EAGAIN;
1974	}
1975
1976	/* Kobject_put later will trigger netdev_queue_release call
1977	 * which decreases dev refcount: Take that reference here
1978	 */
1979	netdev_hold(queue->dev, &queue->dev_tracker, GFP_KERNEL);
1980
1981	kobj->kset = dev->queues_kset;
1982	error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL,
1983				     "tx-%u", index);
1984	if (error)
1985		goto err;
1986
1987	queue->groups = netdev_queue_default_groups;
1988	error = sysfs_create_groups(kobj, queue->groups);
1989	if (error)
1990		goto err;
1991
1992	if (netdev_uses_bql(dev)) {
1993		error = sysfs_create_group(kobj, &dql_group);
1994		if (error)
1995			goto err_default_groups;
1996	}
1997
1998	kobject_uevent(kobj, KOBJ_ADD);
1999	return 0;
2000
2001err_default_groups:
2002	sysfs_remove_groups(kobj, queue->groups);
2003err:
2004	kobject_put(kobj);
2005	return error;
2006}
2007
2008static int tx_queue_change_owner(struct net_device *ndev, int index,
2009				 kuid_t kuid, kgid_t kgid)
2010{
2011	struct netdev_queue *queue = ndev->_tx + index;
2012	struct kobject *kobj = &queue->kobj;
2013	int error;
2014
2015	error = sysfs_change_owner(kobj, kuid, kgid);
2016	if (error)
2017		return error;
2018
2019	if (netdev_uses_bql(ndev))
2020		error = sysfs_group_change_owner(kobj, &dql_group, kuid, kgid);
2021
2022	return error;
2023}
2024#endif /* CONFIG_SYSFS */
2025
2026int
2027netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
2028{
2029#ifdef CONFIG_SYSFS
2030	int i;
2031	int error = 0;
2032
2033	/* Tx queue kobjects are allowed to be updated when a device is being
2034	 * unregistered, but solely to remove queues from qdiscs. Any path
2035	 * adding queues should be fixed.
2036	 */
2037	WARN(dev->reg_state == NETREG_UNREGISTERING && new_num > old_num,
2038	     "New queues can't be registered after device unregistration.");
2039
2040	for (i = old_num; i < new_num; i++) {
2041		error = netdev_queue_add_kobject(dev, i);
2042		if (error) {
2043			new_num = old_num;
2044			break;
2045		}
2046	}
2047
2048	while (--i >= new_num) {
2049		struct netdev_queue *queue = dev->_tx + i;
2050
2051		if (!check_net(dev_net(dev)))
2052			queue->kobj.uevent_suppress = 1;
2053
2054		if (netdev_uses_bql(dev))
2055			sysfs_remove_group(&queue->kobj, &dql_group);
2056
2057		sysfs_remove_groups(&queue->kobj, queue->groups);
2058		kobject_put(&queue->kobj);
2059	}
2060
2061	return error;
2062#else
2063	return 0;
2064#endif /* CONFIG_SYSFS */
2065}
2066
2067static int net_tx_queue_change_owner(struct net_device *dev, int num,
2068				     kuid_t kuid, kgid_t kgid)
2069{
2070#ifdef CONFIG_SYSFS
2071	int error = 0;
2072	int i;
2073
2074	for (i = 0; i < num; i++) {
2075		error = tx_queue_change_owner(dev, i, kuid, kgid);
2076		if (error)
2077			break;
2078	}
2079
2080	return error;
2081#else
2082	return 0;
2083#endif /* CONFIG_SYSFS */
2084}
2085
2086static int register_queue_kobjects(struct net_device *dev)
2087{
2088	int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0;
2089
2090#ifdef CONFIG_SYSFS
2091	dev->queues_kset = kset_create_and_add("queues",
2092					       NULL, &dev->dev.kobj);
2093	if (!dev->queues_kset)
2094		return -ENOMEM;
2095	real_rx = dev->real_num_rx_queues;
2096#endif
2097	real_tx = dev->real_num_tx_queues;
2098
2099	error = net_rx_queue_update_kobjects(dev, 0, real_rx);
2100	if (error)
2101		goto error;
2102	rxq = real_rx;
2103
2104	error = netdev_queue_update_kobjects(dev, 0, real_tx);
2105	if (error)
2106		goto error;
2107	txq = real_tx;
2108
2109	return 0;
2110
2111error:
2112	netdev_queue_update_kobjects(dev, txq, 0);
2113	net_rx_queue_update_kobjects(dev, rxq, 0);
2114#ifdef CONFIG_SYSFS
2115	kset_unregister(dev->queues_kset);
2116#endif
2117	return error;
2118}
2119
2120static int queue_change_owner(struct net_device *ndev, kuid_t kuid, kgid_t kgid)
2121{
2122	int error = 0, real_rx = 0, real_tx = 0;
2123
2124#ifdef CONFIG_SYSFS
2125	if (ndev->queues_kset) {
2126		error = sysfs_change_owner(&ndev->queues_kset->kobj, kuid, kgid);
2127		if (error)
2128			return error;
2129	}
2130	real_rx = ndev->real_num_rx_queues;
2131#endif
2132	real_tx = ndev->real_num_tx_queues;
2133
2134	error = net_rx_queue_change_owner(ndev, real_rx, kuid, kgid);
2135	if (error)
2136		return error;
2137
2138	error = net_tx_queue_change_owner(ndev, real_tx, kuid, kgid);
2139	if (error)
2140		return error;
2141
2142	return 0;
2143}
2144
2145static void remove_queue_kobjects(struct net_device *dev)
2146{
2147	int real_rx = 0, real_tx = 0;
2148
2149#ifdef CONFIG_SYSFS
2150	real_rx = dev->real_num_rx_queues;
2151#endif
2152	real_tx = dev->real_num_tx_queues;
2153
2154	net_rx_queue_update_kobjects(dev, real_rx, 0);
2155	netdev_queue_update_kobjects(dev, real_tx, 0);
2156
2157	netdev_lock_ops(dev);
2158	dev->real_num_rx_queues = 0;
2159	dev->real_num_tx_queues = 0;
2160	netdev_unlock_ops(dev);
2161#ifdef CONFIG_SYSFS
2162	kset_unregister(dev->queues_kset);
2163#endif
2164}
2165
2166static bool net_current_may_mount(void)
2167{
2168	struct net *net = current->nsproxy->net_ns;
2169
2170	return ns_capable(net->user_ns, CAP_SYS_ADMIN);
2171}
2172
2173static struct ns_common *net_grab_current_ns(void)
2174{
2175	struct net *net = current->nsproxy->net_ns;
2176#ifdef CONFIG_NET_NS
2177	if (net)
2178		refcount_inc(&net->passive);
2179#endif
2180	return net ? to_ns_common(net) : NULL;
2181}
2182
2183static const struct ns_common *net_initial_ns(void)
2184{
2185	return to_ns_common(&init_net);
2186}
2187
2188static const struct ns_common *net_netlink_ns(struct sock *sk)
2189{
2190	return to_ns_common(sock_net(sk));
2191}
2192
2193const struct kobj_ns_type_operations net_ns_type_operations = {
2194	.type = KOBJ_NS_TYPE_NET,
2195	.current_may_mount = net_current_may_mount,
2196	.grab_current_ns = net_grab_current_ns,
2197	.netlink_ns = net_netlink_ns,
2198	.initial_ns = net_initial_ns,
2199	.drop_ns = net_drop_ns,
2200};
2201EXPORT_SYMBOL_GPL(net_ns_type_operations);
2202
2203static int netdev_uevent(const struct device *d, struct kobj_uevent_env *env)
2204{
2205	const struct net_device *dev = to_net_dev(d);
2206	int retval;
2207
2208	/* pass interface to uevent. */
2209	retval = add_uevent_var(env, "INTERFACE=%s", dev->name);
2210	if (retval)
2211		goto exit;
2212
2213	/* pass ifindex to uevent.
2214	 * ifindex is useful as it won't change (interface name may change)
2215	 * and is what RtNetlink uses natively.
2216	 */
2217	retval = add_uevent_var(env, "IFINDEX=%d", dev->ifindex);
2218
2219exit:
2220	return retval;
2221}
2222
2223/*
2224 *	netdev_release -- destroy and free a dead device.
2225 *	Called when last reference to device kobject is gone.
2226 */
2227static void netdev_release(struct device *d)
2228{
2229	struct net_device *dev = to_net_dev(d);
2230
2231	BUG_ON(dev->reg_state != NETREG_RELEASED);
2232
2233	/* no need to wait for rcu grace period:
2234	 * device is dead and about to be freed.
2235	 */
2236	kfree(rcu_access_pointer(dev->ifalias));
2237	kvfree(dev);
2238}
2239
2240static const struct ns_common *net_namespace(const struct device *d)
2241{
2242	const struct net_device *dev = to_net_dev(d);
2243
2244	return to_ns_common(dev_net(dev));
2245}
2246
2247static void net_get_ownership(const struct device *d, kuid_t *uid, kgid_t *gid)
2248{
2249	const struct net_device *dev = to_net_dev(d);
2250	const struct net *net = dev_net(dev);
2251
2252	net_ns_get_ownership(net, uid, gid);
2253}
2254
2255static const struct class net_class = {
2256	.name = "net",
2257	.dev_release = netdev_release,
2258	.dev_groups = net_class_groups,
2259	.dev_uevent = netdev_uevent,
2260	.ns_type = &net_ns_type_operations,
2261	.namespace = net_namespace,
2262	.get_ownership = net_get_ownership,
2263};
2264
2265#ifdef CONFIG_OF
2266static int of_dev_node_match(struct device *dev, const void *data)
2267{
2268	for (; dev; dev = dev->parent) {
2269		if (dev->of_node == data)
2270			return 1;
2271	}
2272
2273	return 0;
2274}
2275
2276/*
2277 * of_find_net_device_by_node - lookup the net device for the device node
2278 * @np: OF device node
2279 *
2280 * Looks up the net_device structure corresponding with the device node.
2281 * If successful, returns a pointer to the net_device with the embedded
2282 * struct device refcount incremented by one, or NULL on failure. The
2283 * refcount must be dropped when done with the net_device.
2284 */
2285struct net_device *of_find_net_device_by_node(struct device_node *np)
2286{
2287	struct device *dev;
2288
2289	dev = class_find_device(&net_class, NULL, np, of_dev_node_match);
2290	if (!dev)
2291		return NULL;
2292
2293	return to_net_dev(dev);
2294}
2295EXPORT_SYMBOL(of_find_net_device_by_node);
2296#endif
2297
2298/* Delete sysfs entries but hold kobject reference until after all
2299 * netdev references are gone.
2300 */
2301void netdev_unregister_kobject(struct net_device *ndev)
2302{
2303	struct device *dev = &ndev->dev;
2304
2305	if (!check_net(dev_net(ndev)))
2306		dev_set_uevent_suppress(dev, 1);
2307
2308	kobject_get(&dev->kobj);
2309
2310	remove_queue_kobjects(ndev);
2311
2312	pm_runtime_set_memalloc_noio(dev, false);
2313
2314	device_del(dev);
2315}
2316
2317/* Create sysfs entries for network device. */
2318int netdev_register_kobject(struct net_device *ndev)
2319{
2320	struct device *dev = &ndev->dev;
2321	const struct attribute_group **groups = ndev->sysfs_groups;
2322	int error = 0;
2323
2324	device_initialize(dev);
2325	dev->class = &net_class;
2326	dev->platform_data = ndev;
2327	dev->groups = groups;
2328
2329	dev_set_name(dev, "%s", ndev->name);
2330
2331#ifdef CONFIG_SYSFS
2332	/* Allow for a device specific group */
2333	if (*groups)
2334		groups++;
2335
2336	*groups++ = &netstat_group;
2337	*groups++ = &netdev_phys_group;
2338
2339	if (wireless_group_needed(ndev))
2340		*groups++ = &wireless_group;
2341#endif /* CONFIG_SYSFS */
2342
2343	error = device_add(dev);
2344	if (error)
2345		return error;
2346
2347	error = register_queue_kobjects(ndev);
2348	if (error) {
2349		device_del(dev);
2350		return error;
2351	}
2352
2353	pm_runtime_set_memalloc_noio(dev, true);
2354
2355	return error;
2356}
2357
2358/* Change owner for sysfs entries when moving network devices across network
2359 * namespaces owned by different user namespaces.
2360 */
2361int netdev_change_owner(struct net_device *ndev, const struct net *net_old,
2362			const struct net *net_new)
2363{
2364	kuid_t old_uid = GLOBAL_ROOT_UID, new_uid = GLOBAL_ROOT_UID;
2365	kgid_t old_gid = GLOBAL_ROOT_GID, new_gid = GLOBAL_ROOT_GID;
2366	struct device *dev = &ndev->dev;
2367	int error;
2368
2369	net_ns_get_ownership(net_old, &old_uid, &old_gid);
2370	net_ns_get_ownership(net_new, &new_uid, &new_gid);
2371
2372	/* The network namespace was changed but the owning user namespace is
2373	 * identical so there's no need to change the owner of sysfs entries.
2374	 */
2375	if (uid_eq(old_uid, new_uid) && gid_eq(old_gid, new_gid))
2376		return 0;
2377
2378	error = device_change_owner(dev, new_uid, new_gid);
2379	if (error)
2380		return error;
2381
2382	error = queue_change_owner(ndev, new_uid, new_gid);
2383	if (error)
2384		return error;
2385
2386	return 0;
2387}
2388
2389int netdev_class_create_file_ns(const struct class_attribute *class_attr,
2390				const struct ns_common *ns)
2391{
2392	return class_create_file_ns(&net_class, class_attr, ns);
2393}
2394EXPORT_SYMBOL(netdev_class_create_file_ns);
2395
2396void netdev_class_remove_file_ns(const struct class_attribute *class_attr,
2397				 const struct ns_common *ns)
2398{
2399	class_remove_file_ns(&net_class, class_attr, ns);
2400}
2401EXPORT_SYMBOL(netdev_class_remove_file_ns);
2402
2403int __init netdev_kobject_init(void)
2404{
2405	kobj_ns_type_register(&net_ns_type_operations);
2406	return class_register(&net_class);
2407}
Configure Feed

Configure Feed