Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'nexthop-convert-rtm_-new-del-nexthop-to-per-netns-rtnl'

Kuniyuki Iwashima says:

====================
nexthop: Convert RTM_{NEW,DEL}NEXTHOP to per-netns RTNL.

Patch 1 - 5 move some validation for RTM_NEWNEXTHOP so that it can be
called without RTNL.

Patch 6 & 7 converts RTM_NEWNEXTHOP and RTM_DELNEXTHOP to per-netns RTNL.

Note that RTM_GETNEXTHOP and RTM_GETNEXTHOPBUCKET are not touched in
this series.

rtm_get_nexthop() can be easily converted to RCU, but rtm_dump_nexthop()
needs more work due to the left-to-right rbtree walk, which looks prone
to node deletion and tree rotation without a retry mechanism.

v1: https://lore.kernel.org/netdev/20250318233240.53946-1-kuniyu@amazon.com/
====================

Link: https://patch.msgid.link/20250319230743.65267-1-kuniyu@amazon.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+112 -71
+112 -71
net/ipv4/nexthop.c
··· 1272 1272 u16 nh_grp_type, struct netlink_ext_ack *extack) 1273 1273 { 1274 1274 unsigned int len = nla_len(tb[NHA_GROUP]); 1275 - u8 nh_family = AF_UNSPEC; 1276 1275 struct nexthop_grp *nhg; 1277 1276 unsigned int i, j; 1278 - u8 nhg_fdb = 0; 1279 1277 1280 1278 if (!len || len & (sizeof(struct nexthop_grp) - 1)) { 1281 1279 NL_SET_ERR_MSG(extack, ··· 1305 1307 } 1306 1308 } 1307 1309 1308 - if (tb[NHA_FDB]) 1309 - nhg_fdb = 1; 1310 1310 nhg = nla_data(tb[NHA_GROUP]); 1311 - for (i = 0; i < len; ++i) { 1311 + for (i = NHA_GROUP_TYPE + 1; i < tb_size; ++i) { 1312 + if (!tb[i]) 1313 + continue; 1314 + switch (i) { 1315 + case NHA_HW_STATS_ENABLE: 1316 + case NHA_FDB: 1317 + continue; 1318 + case NHA_RES_GROUP: 1319 + if (nh_grp_type == NEXTHOP_GRP_TYPE_RES) 1320 + continue; 1321 + break; 1322 + } 1323 + NL_SET_ERR_MSG(extack, 1324 + "No other attributes can be set in nexthop groups"); 1325 + return -EINVAL; 1326 + } 1327 + 1328 + return 0; 1329 + } 1330 + 1331 + static int nh_check_attr_group_rtnl(struct net *net, struct nlattr *tb[], 1332 + struct netlink_ext_ack *extack) 1333 + { 1334 + u8 nh_family = AF_UNSPEC; 1335 + struct nexthop_grp *nhg; 1336 + unsigned int len; 1337 + unsigned int i; 1338 + u8 nhg_fdb; 1339 + 1340 + len = nla_len(tb[NHA_GROUP]) / sizeof(*nhg); 1341 + nhg = nla_data(tb[NHA_GROUP]); 1342 + nhg_fdb = !!tb[NHA_FDB]; 1343 + 1344 + for (i = 0; i < len; i++) { 1312 1345 struct nexthop *nh; 1313 1346 bool is_fdb_nh; 1314 1347 ··· 1358 1329 NL_SET_ERR_MSG(extack, "Non FDB nexthop group cannot have fdb nexthops"); 1359 1330 return -EINVAL; 1360 1331 } 1361 - } 1362 - for (i = NHA_GROUP_TYPE + 1; i < tb_size; ++i) { 1363 - if (!tb[i]) 1364 - continue; 1365 - switch (i) { 1366 - case NHA_HW_STATS_ENABLE: 1367 - case NHA_FDB: 1368 - continue; 1369 - case NHA_RES_GROUP: 1370 - if (nh_grp_type == NEXTHOP_GRP_TYPE_RES) 1371 - continue; 1372 - break; 1373 - } 1374 - NL_SET_ERR_MSG(extack, 1375 - "No other attributes can be set in nexthop groups"); 1376 - return -EINVAL; 1377 1332 } 1378 1333 1379 1334 return 0; ··· 2692 2679 int err; 2693 2680 int i; 2694 2681 2695 - if (WARN_ON(!num_nh)) 2696 - return ERR_PTR(-EINVAL); 2697 - 2698 2682 nh = nexthop_alloc(); 2699 2683 if (!nh) 2700 2684 return ERR_PTR(-ENOMEM); ··· 2925 2915 struct nexthop *nh; 2926 2916 int err; 2927 2917 2928 - if (cfg->nlflags & NLM_F_REPLACE && !cfg->nh_id) { 2929 - NL_SET_ERR_MSG(extack, "Replace requires nexthop id"); 2930 - return ERR_PTR(-EINVAL); 2931 - } 2932 - 2933 2918 if (!cfg->nh_id) { 2934 2919 cfg->nh_id = nh_find_unused_id(net); 2935 2920 if (!cfg->nh_id) { ··· 3021 3016 } 3022 3017 3023 3018 static int rtm_to_nh_config(struct net *net, struct sk_buff *skb, 3024 - struct nlmsghdr *nlh, struct nh_config *cfg, 3019 + struct nlmsghdr *nlh, struct nlattr **tb, 3020 + struct nh_config *cfg, 3025 3021 struct netlink_ext_ack *extack) 3026 3022 { 3027 3023 struct nhmsg *nhm = nlmsg_data(nlh); 3028 - struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_new)]; 3029 3024 int err; 3030 - 3031 - err = nlmsg_parse(nlh, sizeof(*nhm), tb, 3032 - ARRAY_SIZE(rtm_nh_policy_new) - 1, 3033 - rtm_nh_policy_new, extack); 3034 - if (err < 0) 3035 - return err; 3036 3025 3037 3026 err = -EINVAL; 3038 3027 if (nhm->resvd || nhm->nh_scope) { ··· 3092 3093 NL_SET_ERR_MSG(extack, "Invalid group type"); 3093 3094 goto out; 3094 3095 } 3095 - err = nh_check_attr_group(net, tb, ARRAY_SIZE(tb), 3096 + 3097 + err = nh_check_attr_group(net, tb, ARRAY_SIZE(rtm_nh_policy_new), 3096 3098 cfg->nh_grp_type, extack); 3097 3099 if (err) 3098 3100 goto out; ··· 3124 3124 if (!cfg->nh_fdb && !tb[NHA_OIF]) { 3125 3125 NL_SET_ERR_MSG(extack, "Device attribute required for non-blackhole and non-fdb nexthops"); 3126 3126 goto out; 3127 - } 3128 - 3129 - if (!cfg->nh_fdb && tb[NHA_OIF]) { 3130 - cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]); 3131 - if (cfg->nh_ifindex) 3132 - cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex); 3133 - 3134 - if (!cfg->dev) { 3135 - NL_SET_ERR_MSG(extack, "Invalid device index"); 3136 - goto out; 3137 - } else if (!(cfg->dev->flags & IFF_UP)) { 3138 - NL_SET_ERR_MSG(extack, "Nexthop device is not up"); 3139 - err = -ENETDOWN; 3140 - goto out; 3141 - } else if (!netif_carrier_ok(cfg->dev)) { 3142 - NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down"); 3143 - err = -ENETDOWN; 3144 - goto out; 3145 - } 3146 3127 } 3147 3128 3148 3129 err = -EINVAL; ··· 3169 3188 3170 3189 cfg->nh_encap_type = nla_get_u16(tb[NHA_ENCAP_TYPE]); 3171 3190 err = lwtunnel_valid_encap_type(cfg->nh_encap_type, 3172 - extack, true); 3191 + extack, false); 3173 3192 if (err < 0) 3174 3193 goto out; 3175 3194 ··· 3188 3207 return err; 3189 3208 } 3190 3209 3210 + static int rtm_to_nh_config_rtnl(struct net *net, struct nlattr **tb, 3211 + struct nh_config *cfg, 3212 + struct netlink_ext_ack *extack) 3213 + { 3214 + if (tb[NHA_GROUP]) 3215 + return nh_check_attr_group_rtnl(net, tb, extack); 3216 + 3217 + if (tb[NHA_OIF]) { 3218 + cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]); 3219 + if (cfg->nh_ifindex) 3220 + cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex); 3221 + 3222 + if (!cfg->dev) { 3223 + NL_SET_ERR_MSG(extack, "Invalid device index"); 3224 + return -EINVAL; 3225 + } 3226 + 3227 + if (!(cfg->dev->flags & IFF_UP)) { 3228 + NL_SET_ERR_MSG(extack, "Nexthop device is not up"); 3229 + return -ENETDOWN; 3230 + } 3231 + 3232 + if (!netif_carrier_ok(cfg->dev)) { 3233 + NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down"); 3234 + return -ENETDOWN; 3235 + } 3236 + } 3237 + 3238 + return 0; 3239 + } 3240 + 3191 3241 /* rtnl */ 3192 3242 static int rtm_new_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh, 3193 3243 struct netlink_ext_ack *extack) 3194 3244 { 3245 + struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_new)]; 3195 3246 struct net *net = sock_net(skb->sk); 3196 3247 struct nh_config cfg; 3197 3248 struct nexthop *nh; 3198 3249 int err; 3199 3250 3200 - err = rtm_to_nh_config(net, skb, nlh, &cfg, extack); 3201 - if (!err) { 3202 - nh = nexthop_add(net, &cfg, extack); 3203 - if (IS_ERR(nh)) 3204 - err = PTR_ERR(nh); 3251 + err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, 3252 + ARRAY_SIZE(rtm_nh_policy_new) - 1, 3253 + rtm_nh_policy_new, extack); 3254 + if (err < 0) 3255 + goto out; 3256 + 3257 + err = rtm_to_nh_config(net, skb, nlh, tb, &cfg, extack); 3258 + if (err) 3259 + goto out; 3260 + 3261 + if (cfg.nlflags & NLM_F_REPLACE && !cfg.nh_id) { 3262 + NL_SET_ERR_MSG(extack, "Replace requires nexthop id"); 3263 + err = -EINVAL; 3264 + goto out; 3205 3265 } 3206 3266 3267 + rtnl_net_lock(net); 3268 + 3269 + err = rtm_to_nh_config_rtnl(net, tb, &cfg, extack); 3270 + if (err) 3271 + goto unlock; 3272 + 3273 + nh = nexthop_add(net, &cfg, extack); 3274 + if (IS_ERR(nh)) 3275 + err = PTR_ERR(nh); 3276 + 3277 + unlock: 3278 + rtnl_net_unlock(net); 3279 + out: 3207 3280 return err; 3208 3281 } 3209 3282 ··· 3314 3279 if (err) 3315 3280 return err; 3316 3281 3282 + rtnl_net_lock(net); 3283 + 3317 3284 nh = nexthop_find_by_id(net, id); 3318 - if (!nh) 3319 - return -ENOENT; 3285 + if (nh) 3286 + remove_nexthop(net, nh, &nlinfo); 3287 + else 3288 + err = -ENOENT; 3320 3289 3321 - remove_nexthop(net, nh, &nlinfo); 3290 + rtnl_net_unlock(net); 3322 3291 3323 - return 0; 3292 + return err; 3324 3293 } 3325 3294 3326 3295 /* rtnl */ ··· 4076 4037 }; 4077 4038 4078 4039 static const struct rtnl_msg_handler nexthop_rtnl_msg_handlers[] __initconst = { 4079 - {.msgtype = RTM_NEWNEXTHOP, .doit = rtm_new_nexthop}, 4080 - {.msgtype = RTM_DELNEXTHOP, .doit = rtm_del_nexthop}, 4040 + {.msgtype = RTM_NEWNEXTHOP, .doit = rtm_new_nexthop, 4041 + .flags = RTNL_FLAG_DOIT_PERNET}, 4042 + {.msgtype = RTM_DELNEXTHOP, .doit = rtm_del_nexthop, 4043 + .flags = RTNL_FLAG_DOIT_PERNET}, 4081 4044 {.msgtype = RTM_GETNEXTHOP, .doit = rtm_get_nexthop, 4082 4045 .dumpit = rtm_dump_nexthop}, 4083 4046 {.msgtype = RTM_GETNEXTHOPBUCKET, .doit = rtm_get_nexthop_bucket, 4084 4047 .dumpit = rtm_dump_nexthop_bucket}, 4085 4048 {.protocol = PF_INET, .msgtype = RTM_NEWNEXTHOP, 4086 - .doit = rtm_new_nexthop}, 4049 + .doit = rtm_new_nexthop, .flags = RTNL_FLAG_DOIT_PERNET}, 4087 4050 {.protocol = PF_INET, .msgtype = RTM_GETNEXTHOP, 4088 4051 .dumpit = rtm_dump_nexthop}, 4089 4052 {.protocol = PF_INET6, .msgtype = RTM_NEWNEXTHOP, 4090 - .doit = rtm_new_nexthop}, 4053 + .doit = rtm_new_nexthop, .flags = RTNL_FLAG_DOIT_PERNET}, 4091 4054 {.protocol = PF_INET6, .msgtype = RTM_GETNEXTHOP, 4092 4055 .dumpit = rtm_dump_nexthop}, 4093 4056 };