Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

netdevice: define and allocate &net_device _properly_

In fact, this structure contains a flexible array at the end, but
historically its size, alignment etc., is calculated manually.
There are several instances of the structure embedded into other
structures, but also there's ongoing effort to remove them and we
could in the meantime declare &net_device properly.
Declare the array explicitly, use struct_size() and store the array
size inside the structure, so that __counted_by() can be applied.
Don't use PTR_ALIGN(), as SLUB itself tries its best to ensure the
allocated buffer is aligned to what the user expects.
Also, change its alignment from %NETDEV_ALIGN to the cacheline size
as per several suggestions on the netdev ML.

bloat-o-meter for vmlinux:

free_netdev 445 440 -5
netdev_freemem 24 - -24
alloc_netdev_mqs 1481 1450 -31

On x86_64 with several NICs of different vendors, I was never able to
get a &net_device pointer not aligned to the cacheline size after the
change.

Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Signed-off-by: Breno Leitao <leitao@debian.org>
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kees Cook <kees@kernel.org>
Link: https://patch.msgid.link/20240710113036.2125584-1-leitao@debian.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Alexander Lobakin and committed by
Jakub Kicinski
13cabc47 8341eee8

+16 -31
+9 -6
include/linux/netdevice.h
··· 1819 1819 * @priv_flags: Like 'flags' but invisible to userspace, 1820 1820 * see if.h for the definitions 1821 1821 * @gflags: Global flags ( kept as legacy ) 1822 - * @padded: How much padding added by alloc_netdev() 1822 + * @priv_len: Size of the ->priv flexible array 1823 + * @priv: Flexible array containing private data 1823 1824 * @operstate: RFC2863 operstate 1824 1825 * @link_mode: Mapping policy to operstate 1825 1826 * @if_port: Selectable AUI, TP, ... ··· 2200 2199 unsigned short neigh_priv_len; 2201 2200 unsigned short dev_id; 2202 2201 unsigned short dev_port; 2203 - unsigned short padded; 2202 + int irq; 2203 + u32 priv_len; 2204 2204 2205 2205 spinlock_t addr_list_lock; 2206 - int irq; 2207 2206 2208 2207 struct netdev_hw_addr_list uc; 2209 2208 struct netdev_hw_addr_list mc; ··· 2407 2406 2408 2407 /** @irq_moder: dim parameters used if IS_ENABLED(CONFIG_DIMLIB). */ 2409 2408 struct dim_irq_moder *irq_moder; 2410 - }; 2409 + 2410 + u8 priv[] ____cacheline_aligned 2411 + __counted_by(priv_len); 2412 + } ____cacheline_aligned; 2411 2413 #define to_net_dev(d) container_of(d, struct net_device, dev) 2412 2414 2413 2415 /* ··· 2600 2596 */ 2601 2597 static inline void *netdev_priv(const struct net_device *dev) 2602 2598 { 2603 - return (char *)dev + ALIGN(sizeof(struct net_device), NETDEV_ALIGN); 2599 + return (void *)dev->priv; 2604 2600 } 2605 2601 2606 2602 /* Set the sysfs physical device reference for the network logical device ··· 3131 3127 3132 3128 int netdev_refcnt_read(const struct net_device *dev); 3133 3129 void free_netdev(struct net_device *dev); 3134 - void netdev_freemem(struct net_device *dev); 3135 3130 void init_dummy_netdev(struct net_device *dev); 3136 3131 3137 3132 struct net_device *netdev_get_xmit_slave(struct net_device *dev,
+6 -24
net/core/dev.c
··· 11006 11006 } 11007 11007 EXPORT_SYMBOL_GPL(netdev_sw_irq_coalesce_default_on); 11008 11008 11009 - void netdev_freemem(struct net_device *dev) 11010 - { 11011 - char *addr = (char *)dev - dev->padded; 11012 - 11013 - kvfree(addr); 11014 - } 11015 - 11016 11009 /** 11017 11010 * alloc_netdev_mqs - allocate network device 11018 11011 * @sizeof_priv: size of private data to allocate space for ··· 11025 11032 unsigned int txqs, unsigned int rxqs) 11026 11033 { 11027 11034 struct net_device *dev; 11028 - unsigned int alloc_size; 11029 - struct net_device *p; 11030 11035 11031 11036 BUG_ON(strlen(name) >= sizeof(dev->name)); 11032 11037 ··· 11038 11047 return NULL; 11039 11048 } 11040 11049 11041 - alloc_size = sizeof(struct net_device); 11042 - if (sizeof_priv) { 11043 - /* ensure 32-byte alignment of private area */ 11044 - alloc_size = ALIGN(alloc_size, NETDEV_ALIGN); 11045 - alloc_size += sizeof_priv; 11046 - } 11047 - /* ensure 32-byte alignment of whole construct */ 11048 - alloc_size += NETDEV_ALIGN - 1; 11049 - 11050 - p = kvzalloc(alloc_size, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL); 11051 - if (!p) 11050 + dev = kvzalloc(struct_size(dev, priv, sizeof_priv), 11051 + GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL); 11052 + if (!dev) 11052 11053 return NULL; 11053 11054 11054 - dev = PTR_ALIGN(p, NETDEV_ALIGN); 11055 - dev->padded = (char *)dev - (char *)p; 11055 + dev->priv_len = sizeof_priv; 11056 11056 11057 11057 ref_tracker_dir_init(&dev->refcnt_tracker, 128, name); 11058 11058 #ifdef CONFIG_PCPU_DEV_REFCNT ··· 11130 11148 free_percpu(dev->pcpu_refcnt); 11131 11149 free_dev: 11132 11150 #endif 11133 - netdev_freemem(dev); 11151 + kvfree(dev); 11134 11152 return NULL; 11135 11153 } 11136 11154 EXPORT_SYMBOL(alloc_netdev_mqs); ··· 11185 11203 /* Compatibility with error handling in drivers */ 11186 11204 if (dev->reg_state == NETREG_UNINITIALIZED || 11187 11205 dev->reg_state == NETREG_DUMMY) { 11188 - netdev_freemem(dev); 11206 + kvfree(dev); 11189 11207 return; 11190 11208 } 11191 11209
+1 -1
net/core/net-sysfs.c
··· 2028 2028 * device is dead and about to be freed. 2029 2029 */ 2030 2030 kfree(rcu_access_pointer(dev->ifalias)); 2031 - netdev_freemem(dev); 2031 + kvfree(dev); 2032 2032 } 2033 2033 2034 2034 static const void *net_namespace(const struct device *d)