Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'add-sock_kmemdup-helper'

Geliang Tang says:

====================
add sock_kmemdup helper

While developing MPTCP BPF path manager [1], I found it's useful to
add a new sock_kmemdup() helper.

My use case is this:

In mptcp_userspace_pm_append_new_local_addr() function (see patch 3
in this patchset), it uses sock_kmalloc() to allocate an address
entry "e", then immediately duplicate the input "entry" to it:

'''
e = sock_kmalloc(sk, sizeof(*e), GFP_ATOMIC);
if (!e) {
ret = -ENOMEM;
goto append_err;
}

*e = *entry;
'''

When I implemented MPTCP BPF path manager, I needed to implement a
code similar to this in BPF.

The kfunc sock_kmalloc() can be easily invoked in BPF to allocate
an entry "e", but the code "*e = *entry;" that assigns "entry" to
"e" is not easy to implemented.

I had to implement such a "copy entry" helper in BPF:

'''
static void mptcp_pm_copy_addr(struct mptcp_addr_info *dst,
struct mptcp_addr_info *src)
{
dst->id = src->id;
dst->family = src->family;
dst->port = src->port;

if (src->family == AF_INET) {
dst->addr.s_addr = src->addr.s_addr;
} else if (src->family == AF_INET6) {
dst->addr6.s6_addr32[0] = src->addr6.s6_addr32[0];
dst->addr6.s6_addr32[1] = src->addr6.s6_addr32[1];
dst->addr6.s6_addr32[2] = src->addr6.s6_addr32[2];
dst->addr6.s6_addr32[3] = src->addr6.s6_addr32[3];
}
}

static void mptcp_pm_copy_entry(struct mptcp_pm_addr_entry *dst,
struct mptcp_pm_addr_entry *src)
{
mptcp_pm_copy_addr(&dst->addr, &src->addr);

dst->flags = src->flags;
dst->ifindex = src->ifindex;
}
'''

And add "write permission" for BPF to each field of mptcp_pm_addr_entry:

'''
@@ static int bpf_mptcp_pm_btf_struct_access(struct bpf_verifier_log *log,
case offsetof(struct mptcp_pm_addr_entry, addr.port):
end = offsetofend(struct mptcp_pm_addr_entry, addr.port);
break;
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
case offsetof(struct mptcp_pm_addr_entry, addr.addr6.s6_addr32[0]):
end = offsetofend(struct mptcp_pm_addr_entry, addr.addr6.s6_addr32[0]);
break;
case offsetof(struct mptcp_pm_addr_entry, addr.addr6.s6_addr32[1]):
end = offsetofend(struct mptcp_pm_addr_entry, addr.addr6.s6_addr32[1]);
break;
case offsetof(struct mptcp_pm_addr_entry, addr.addr6.s6_addr32[2]):
end = offsetofend(struct mptcp_pm_addr_entry, addr.addr6.s6_addr32[2]);
break;
case offsetof(struct mptcp_pm_addr_entry, addr.addr6.s6_addr32[3]):
end = offsetofend(struct mptcp_pm_addr_entry, addr.addr6.s6_addr32[3]);
break;
#else
case offsetof(struct mptcp_pm_addr_entry, addr.addr.s_addr):
end = offsetofend(struct mptcp_pm_addr_entry, addr.addr.s_addr);
break;
#endif
'''

But if there's a sock_kmemdup() helper, it will become much simpler,
only need to call kfunc sock_kmemdup() instead in BPF.

So this patchset adds this new helper and uses it in several places.

[1]
https://lore.kernel.org/mptcp/cover.1738924875.git.tanggeliang@kylinos.cn/
====================

Link: https://patch.msgid.link/cover.1740735165.git.tanggeliang@kylinos.cn
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+24 -14
+2
include/net/sock.h
··· 1797 1797 } 1798 1798 1799 1799 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority); 1800 + void *sock_kmemdup(struct sock *sk, const void *src, 1801 + int size, gfp_t priority); 1800 1802 void sock_kfree_s(struct sock *sk, void *mem, int size); 1801 1803 void sock_kzfree_s(struct sock *sk, void *mem, int size); 1802 1804 void sk_send_sigurg(struct sock *sk);
+16
net/core/sock.c
··· 2836 2836 } 2837 2837 EXPORT_SYMBOL(sock_kmalloc); 2838 2838 2839 + /* 2840 + * Duplicate the input "src" memory block using the socket's 2841 + * option memory buffer. 2842 + */ 2843 + void *sock_kmemdup(struct sock *sk, const void *src, 2844 + int size, gfp_t priority) 2845 + { 2846 + void *mem; 2847 + 2848 + mem = sock_kmalloc(sk, size, priority); 2849 + if (mem) 2850 + memcpy(mem, src, size); 2851 + return mem; 2852 + } 2853 + EXPORT_SYMBOL(sock_kmemdup); 2854 + 2839 2855 /* Free an option memory block. Note, we actually want the inline 2840 2856 * here as this allows gcc to detect the nullify and fold away the 2841 2857 * condition entirely.
+1 -2
net/ipv6/exthdrs.c
··· 1204 1204 { 1205 1205 struct ipv6_txoptions *opt2; 1206 1206 1207 - opt2 = sock_kmalloc(sk, opt->tot_len, GFP_ATOMIC); 1207 + opt2 = sock_kmemdup(sk, opt, opt->tot_len, GFP_ATOMIC); 1208 1208 if (opt2) { 1209 1209 long dif = (char *)opt2 - (char *)opt; 1210 - memcpy(opt2, opt, opt->tot_len); 1211 1210 if (opt2->hopopt) 1212 1211 *((char **)&opt2->hopopt) += dif; 1213 1212 if (opt2->dst0opt)
+1 -2
net/mptcp/pm_userspace.c
··· 71 71 /* Memory for the entry is allocated from the 72 72 * sock option buffer. 73 73 */ 74 - e = sock_kmalloc(sk, sizeof(*e), GFP_ATOMIC); 74 + e = sock_kmemdup(sk, entry, sizeof(*entry), GFP_ATOMIC); 75 75 if (!e) { 76 76 ret = -ENOMEM; 77 77 goto append_err; 78 78 } 79 79 80 - *e = *entry; 81 80 if (!e->addr.id && needs_id) 82 81 e->addr.id = find_next_zero_bit(id_bitmap, 83 82 MPTCP_PM_MAX_ADDR_ID + 1,
+2 -5
net/mptcp/protocol.c
··· 3178 3178 rcu_read_lock(); 3179 3179 inet_opt = rcu_dereference(inet->inet_opt); 3180 3180 if (inet_opt) { 3181 - newopt = sock_kmalloc(newsk, sizeof(*inet_opt) + 3181 + newopt = sock_kmemdup(newsk, inet_opt, sizeof(*inet_opt) + 3182 3182 inet_opt->opt.optlen, GFP_ATOMIC); 3183 - if (newopt) 3184 - memcpy(newopt, inet_opt, sizeof(*inet_opt) + 3185 - inet_opt->opt.optlen); 3186 - else 3183 + if (!newopt) 3187 3184 net_warn_ratelimited("%s: Failed to copy ip options\n", __func__); 3188 3185 } 3189 3186 RCU_INIT_POINTER(newinet->inet_opt, newopt);
+2 -5
net/sctp/protocol.c
··· 185 185 rcu_read_lock(); 186 186 inet_opt = rcu_dereference(inet->inet_opt); 187 187 if (inet_opt) { 188 - newopt = sock_kmalloc(newsk, sizeof(*inet_opt) + 188 + newopt = sock_kmemdup(newsk, inet_opt, sizeof(*inet_opt) + 189 189 inet_opt->opt.optlen, GFP_ATOMIC); 190 - if (newopt) 191 - memcpy(newopt, inet_opt, sizeof(*inet_opt) + 192 - inet_opt->opt.optlen); 193 - else 190 + if (!newopt) 194 191 pr_err("%s: Failed to copy ip options\n", __func__); 195 192 } 196 193 RCU_INIT_POINTER(newinet->inet_opt, newopt);