Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'net-fib_rules-add-dscp-mask-support'

Ido Schimmel says:

====================
net: fib_rules: Add DSCP mask support

In some deployments users would like to encode path information into
certain bits of the IPv6 flow label, the UDP source port and the DSCP
field and use this information to route packets accordingly.

Redirecting traffic to a routing table based on specific bits in the
DSCP field is not currently possible. Only exact match is currently
supported by FIB rules.

This patchset extends FIB rules to match on the DSCP field with an
optional mask.

Patches #1-#5 gradually extend FIB rules to match on the DSCP field with
an optional mask.

Patch #6 adds test cases for the new functionality.

iproute2 support can be found here [1].

[1] https://github.com/idosch/iproute2/tree/submit/fib_rule_mask_v1
====================

Link: https://patch.msgid.link/20250220080525.831924-1-idosch@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+132 -5
+5
Documentation/netlink/specs/rt_rule.yaml
··· 190 190 name: dport-mask 191 191 type: u16 192 192 display-hint: hex 193 + - 194 + name: dscp-mask 195 + type: u8 196 + display-hint: hex 193 197 194 198 operations: 195 199 enum-model: directional ··· 229 225 - flowlabel-mask 230 226 - sport-mask 231 227 - dport-mask 228 + - dscp-mask 232 229 - 233 230 name: newrule-ntf 234 231 doc: Notify a rule creation
+1
include/uapi/linux/fib_rules.h
··· 72 72 FRA_FLOWLABEL_MASK, /* flowlabel mask */ 73 73 FRA_SPORT_MASK, /* sport mask */ 74 74 FRA_DPORT_MASK, /* dport mask */ 75 + FRA_DSCP_MASK, /* dscp mask */ 75 76 __FRA_MAX 76 77 }; 77 78
+1
net/core/fib_rules.c
··· 845 845 [FRA_FLOWLABEL_MASK] = { .type = NLA_BE32 }, 846 846 [FRA_SPORT_MASK] = { .type = NLA_U16 }, 847 847 [FRA_DPORT_MASK] = { .type = NLA_U16 }, 848 + [FRA_DSCP_MASK] = NLA_POLICY_MASK(NLA_U8, INET_DSCP_MASK >> 2), 848 849 }; 849 850 850 851 int fib_newrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh,
+44 -3
net/ipv4/fib_rules.c
··· 37 37 u8 dst_len; 38 38 u8 src_len; 39 39 dscp_t dscp; 40 + dscp_t dscp_mask; 40 41 u8 dscp_full:1; /* DSCP or TOS selector */ 41 42 __be32 src; 42 43 __be32 srcmask; ··· 193 192 * to mask the upper three DSCP bits prior to matching to maintain 194 193 * legacy behavior. 195 194 */ 196 - if (r->dscp_full && r->dscp != inet_dsfield_to_dscp(fl4->flowi4_tos)) 195 + if (r->dscp_full && 196 + (r->dscp ^ inet_dsfield_to_dscp(fl4->flowi4_tos)) & r->dscp_mask) 197 197 return 0; 198 198 else if (!r->dscp_full && r->dscp && 199 199 !fib_dscp_masked_match(r->dscp, fl4)) ··· 237 235 } 238 236 239 237 rule4->dscp = inet_dsfield_to_dscp(nla_get_u8(nla) << 2); 238 + rule4->dscp_mask = inet_dsfield_to_dscp(INET_DSCP_MASK); 240 239 rule4->dscp_full = true; 240 + 241 + return 0; 242 + } 243 + 244 + static int fib4_nl2rule_dscp_mask(const struct nlattr *nla, 245 + struct fib4_rule *rule4, 246 + struct netlink_ext_ack *extack) 247 + { 248 + dscp_t dscp_mask; 249 + 250 + if (!rule4->dscp_full) { 251 + NL_SET_ERR_MSG_ATTR(extack, nla, 252 + "Cannot specify DSCP mask without DSCP value"); 253 + return -EINVAL; 254 + } 255 + 256 + dscp_mask = inet_dsfield_to_dscp(nla_get_u8(nla) << 2); 257 + if (rule4->dscp & ~dscp_mask) { 258 + NL_SET_ERR_MSG_ATTR(extack, nla, "Invalid DSCP mask"); 259 + return -EINVAL; 260 + } 261 + 262 + rule4->dscp_mask = dscp_mask; 241 263 242 264 return 0; 243 265 } ··· 295 269 296 270 if (tb[FRA_DSCP] && 297 271 fib4_nl2rule_dscp(tb[FRA_DSCP], rule4, extack) < 0) 272 + goto errout; 273 + 274 + if (tb[FRA_DSCP_MASK] && 275 + fib4_nl2rule_dscp_mask(tb[FRA_DSCP_MASK], rule4, extack) < 0) 298 276 goto errout; 299 277 300 278 /* split local/main if they are not already split */ ··· 396 366 return 0; 397 367 } 398 368 369 + if (tb[FRA_DSCP_MASK]) { 370 + dscp_t dscp_mask; 371 + 372 + dscp_mask = inet_dsfield_to_dscp(nla_get_u8(tb[FRA_DSCP_MASK]) << 2); 373 + if (!rule4->dscp_full || rule4->dscp_mask != dscp_mask) 374 + return 0; 375 + } 376 + 399 377 #ifdef CONFIG_IP_ROUTE_CLASSID 400 378 if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW]))) 401 379 return 0; ··· 429 391 if (rule4->dscp_full) { 430 392 frh->tos = 0; 431 393 if (nla_put_u8(skb, FRA_DSCP, 432 - inet_dscp_to_dsfield(rule4->dscp) >> 2)) 394 + inet_dscp_to_dsfield(rule4->dscp) >> 2) || 395 + nla_put_u8(skb, FRA_DSCP_MASK, 396 + inet_dscp_to_dsfield(rule4->dscp_mask) >> 2)) 433 397 goto nla_put_failure; 434 398 } else { 435 399 frh->tos = inet_dscp_to_dsfield(rule4->dscp); ··· 458 418 return nla_total_size(4) /* dst */ 459 419 + nla_total_size(4) /* src */ 460 420 + nla_total_size(4) /* flow */ 461 - + nla_total_size(1); /* dscp */ 421 + + nla_total_size(1) /* dscp */ 422 + + nla_total_size(1); /* dscp mask */ 462 423 } 463 424 464 425 static void fib4_rule_flush_cache(struct fib_rules_ops *ops)
+43 -2
net/ipv6/fib6_rules.c
··· 29 29 __be32 flowlabel; 30 30 __be32 flowlabel_mask; 31 31 dscp_t dscp; 32 + dscp_t dscp_mask; 32 33 u8 dscp_full:1; /* DSCP or TOS selector */ 33 34 }; 34 35 ··· 332 331 return 0; 333 332 } 334 333 335 - if (r->dscp && r->dscp != ip6_dscp(fl6->flowlabel)) 334 + if ((r->dscp ^ ip6_dscp(fl6->flowlabel)) & r->dscp_mask) 336 335 return 0; 337 336 338 337 if ((r->flowlabel ^ flowi6_get_flowlabel(fl6)) & r->flowlabel_mask) ··· 361 360 } 362 361 363 362 rule6->dscp = inet_dsfield_to_dscp(nla_get_u8(nla) << 2); 363 + rule6->dscp_mask = inet_dsfield_to_dscp(INET_DSCP_MASK); 364 364 rule6->dscp_full = true; 365 + 366 + return 0; 367 + } 368 + 369 + static int fib6_nl2rule_dscp_mask(const struct nlattr *nla, 370 + struct fib6_rule *rule6, 371 + struct netlink_ext_ack *extack) 372 + { 373 + dscp_t dscp_mask; 374 + 375 + if (!rule6->dscp_full) { 376 + NL_SET_ERR_MSG_ATTR(extack, nla, 377 + "Cannot specify DSCP mask without DSCP value"); 378 + return -EINVAL; 379 + } 380 + 381 + dscp_mask = inet_dsfield_to_dscp(nla_get_u8(nla) << 2); 382 + if (rule6->dscp & ~dscp_mask) { 383 + NL_SET_ERR_MSG_ATTR(extack, nla, "Invalid DSCP mask"); 384 + return -EINVAL; 385 + } 386 + 387 + rule6->dscp_mask = dscp_mask; 365 388 366 389 return 0; 367 390 } ··· 434 409 goto errout; 435 410 } 436 411 rule6->dscp = inet_dsfield_to_dscp(frh->tos); 412 + rule6->dscp_mask = frh->tos ? inet_dsfield_to_dscp(INET_DSCP_MASK) : 0; 437 413 438 414 if (tb[FRA_DSCP] && fib6_nl2rule_dscp(tb[FRA_DSCP], rule6, extack) < 0) 415 + goto errout; 416 + 417 + if (tb[FRA_DSCP_MASK] && 418 + fib6_nl2rule_dscp_mask(tb[FRA_DSCP_MASK], rule6, extack) < 0) 439 419 goto errout; 440 420 441 421 if ((tb[FRA_FLOWLABEL] || tb[FRA_FLOWLABEL_MASK]) && ··· 512 482 return 0; 513 483 } 514 484 485 + if (tb[FRA_DSCP_MASK]) { 486 + dscp_t dscp_mask; 487 + 488 + dscp_mask = inet_dsfield_to_dscp(nla_get_u8(tb[FRA_DSCP_MASK]) << 2); 489 + if (!rule6->dscp_full || rule6->dscp_mask != dscp_mask) 490 + return 0; 491 + } 492 + 515 493 if (tb[FRA_FLOWLABEL] && 516 494 nla_get_be32(tb[FRA_FLOWLABEL]) != rule6->flowlabel) 517 495 return 0; ··· 550 512 if (rule6->dscp_full) { 551 513 frh->tos = 0; 552 514 if (nla_put_u8(skb, FRA_DSCP, 553 - inet_dscp_to_dsfield(rule6->dscp) >> 2)) 515 + inet_dscp_to_dsfield(rule6->dscp) >> 2) || 516 + nla_put_u8(skb, FRA_DSCP_MASK, 517 + inet_dscp_to_dsfield(rule6->dscp_mask) >> 2)) 554 518 goto nla_put_failure; 555 519 } else { 556 520 frh->tos = inet_dscp_to_dsfield(rule6->dscp); ··· 579 539 return nla_total_size(16) /* dst */ 580 540 + nla_total_size(16) /* src */ 581 541 + nla_total_size(1) /* dscp */ 542 + + nla_total_size(1) /* dscp mask */ 582 543 + nla_total_size(4) /* flowlabel */ 583 544 + nla_total_size(4); /* flowlabel mask */ 584 545 }
+38
tools/testing/selftests/net/fib_rule_tests.sh
··· 310 310 "iif dscp no redirect to table" 311 311 fi 312 312 313 + ip rule help 2>&1 | grep -q "DSCP\[/MASK\]" 314 + if [ $? -eq 0 ]; then 315 + match="dscp 0x0f/0x0f" 316 + tosmatch=$(printf 0x"%x" $((0x1f << 2))) 317 + tosnomatch=$(printf 0x"%x" $((0x1e << 2))) 318 + getmatch="tos $tosmatch" 319 + getnomatch="tos $tosnomatch" 320 + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ 321 + "$getnomatch" "dscp masked redirect to table" \ 322 + "dscp masked no redirect to table" 323 + 324 + match="dscp 0x0f/0x0f" 325 + getmatch="from $SRC_IP6 iif $DEV tos $tosmatch" 326 + getnomatch="from $SRC_IP6 iif $DEV tos $tosnomatch" 327 + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ 328 + "$getnomatch" "iif dscp masked redirect to table" \ 329 + "iif dscp masked no redirect to table" 330 + fi 331 + 313 332 fib_check_iproute_support "flowlabel" "flowlabel" 314 333 if [ $? -eq 0 ]; then 315 334 match="flowlabel 0xfffff" ··· 615 596 fib_rule4_test_match_n_redirect "$match" "$getmatch" \ 616 597 "$getnomatch" "iif dscp redirect to table" \ 617 598 "iif dscp no redirect to table" 599 + fi 600 + 601 + ip rule help 2>&1 | grep -q "DSCP\[/MASK\]" 602 + if [ $? -eq 0 ]; then 603 + match="dscp 0x0f/0x0f" 604 + tosmatch=$(printf 0x"%x" $((0x1f << 2))) 605 + tosnomatch=$(printf 0x"%x" $((0x1e << 2))) 606 + getmatch="tos $tosmatch" 607 + getnomatch="tos $tosnomatch" 608 + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ 609 + "$getnomatch" "dscp masked redirect to table" \ 610 + "dscp masked no redirect to table" 611 + 612 + match="dscp 0x0f/0x0f" 613 + getmatch="from $SRC_IP iif $DEV tos $tosmatch" 614 + getnomatch="from $SRC_IP iif $DEV tos $tosnomatch" 615 + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ 616 + "$getnomatch" "iif dscp masked redirect to table" \ 617 + "iif dscp masked no redirect to table" 618 618 fi 619 619 } 620 620