Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

netfilter: nft_set_rbtree: validate element belonging to interval

The existing partial overlap detection does not check if the elements
belong to the interval, eg.

add element inet x y { 1.1.1.1-2.2.2.2, 4.4.4.4-5.5.5.5 }
add element inet x y { 1.1.1.1-5.5.5.5 } => this should fail: ENOENT

Similar situation occurs with deletions:

add element inet x y { 1.1.1.1-2.2.2.2, 4.4.4.4-5.5.5.5}
delete element inet x y { 1.1.1.1-5.5.5.5 } => this should fail: ENOENT

This currently works via mitigation by nft in userspace, which is
performing the overlap detection before sending the elements to the
kernel. This requires a previous netlink dump of the set content which
slows down incremental updates on interval sets, because a netlink set
content dump is needed.

This patch extends the existing overlap detection to track the most
recent start element that already exists. The pointer to the existing
start element is stored as a cookie (no pointer dereference is ever
possible). If the end element is added and it already exists, then
check that the existing end element is adjacent to the already existing
start element. Similar logic applies to element deactivation.

This patch also annotates the timestamp to identify if start cookie
comes from an older batch, in such case reset it. Otherwise, a failing
create element command leaves the start cookie in place, resulting in
bogus error reporting.

There is still a few more corner cases of overlap detection related to
the open interval that are addressed in follow up patches.

This is address an early design mistake where an interval is expressed
as two elements, using the NFT_SET_ELEM_INTERVAL_END flag, instead of
the more recent NFTA_SET_ELEM_KEY_END attribute that pipapo already
uses.

Fixes: 7c84d41416d8 ("netfilter: nft_set_rbtree: Detect partial overlaps on insertion")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Florian Westphal <fw@strlen.de>

authored by

Pablo Neira Ayuso and committed by
Florian Westphal
782f2688 4780ec14

+143 -4
+143 -4
net/netfilter/nft_set_rbtree.c
··· 33 33 rwlock_t lock; 34 34 struct nft_array __rcu *array; 35 35 struct nft_array *array_next; 36 + unsigned long start_rbe_cookie; 36 37 unsigned long last_gc; 37 38 struct list_head expired; 39 + u64 last_tstamp; 38 40 }; 39 41 40 42 struct nft_rbtree_elem { ··· 265 263 return rb_entry(node, struct nft_rbtree_elem, node); 266 264 } 267 265 266 + static struct nft_rbtree_elem * 267 + __nft_rbtree_next_active(struct rb_node *node, u8 genmask) 268 + { 269 + struct nft_rbtree_elem *next_rbe; 270 + 271 + while (node) { 272 + next_rbe = rb_entry(node, struct nft_rbtree_elem, node); 273 + if (!nft_set_elem_active(&next_rbe->ext, genmask)) { 274 + node = rb_next(node); 275 + continue; 276 + } 277 + 278 + return next_rbe; 279 + } 280 + 281 + return NULL; 282 + } 283 + 284 + static struct nft_rbtree_elem * 285 + nft_rbtree_next_active(struct nft_rbtree_elem *rbe, u8 genmask) 286 + { 287 + return __nft_rbtree_next_active(rb_next(&rbe->node), genmask); 288 + } 289 + 290 + static void nft_rbtree_maybe_reset_start_cookie(struct nft_rbtree *priv, 291 + u64 tstamp) 292 + { 293 + if (priv->last_tstamp != tstamp) { 294 + priv->start_rbe_cookie = 0; 295 + priv->last_tstamp = tstamp; 296 + } 297 + } 298 + 299 + static void nft_rbtree_set_start_cookie(struct nft_rbtree *priv, 300 + const struct nft_rbtree_elem *rbe) 301 + { 302 + priv->start_rbe_cookie = (unsigned long)rbe; 303 + } 304 + 305 + static bool nft_rbtree_cmp_start_cookie(struct nft_rbtree *priv, 306 + const struct nft_rbtree_elem *rbe) 307 + { 308 + return priv->start_rbe_cookie == (unsigned long)rbe; 309 + } 310 + 311 + static bool nft_rbtree_insert_same_interval(const struct net *net, 312 + struct nft_rbtree *priv, 313 + struct nft_rbtree_elem *rbe) 314 + { 315 + u8 genmask = nft_genmask_next(net); 316 + struct nft_rbtree_elem *next_rbe; 317 + 318 + if (!priv->start_rbe_cookie) 319 + return true; 320 + 321 + next_rbe = nft_rbtree_next_active(rbe, genmask); 322 + if (next_rbe) { 323 + /* Closest start element differs from last element added. */ 324 + if (nft_rbtree_interval_start(next_rbe) && 325 + nft_rbtree_cmp_start_cookie(priv, next_rbe)) { 326 + priv->start_rbe_cookie = 0; 327 + return true; 328 + } 329 + } 330 + 331 + priv->start_rbe_cookie = 0; 332 + 333 + return false; 334 + } 335 + 268 336 static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, 269 337 struct nft_rbtree_elem *new, 270 - struct nft_elem_priv **elem_priv) 338 + struct nft_elem_priv **elem_priv, u64 tstamp) 271 339 { 272 340 struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL, *rbe_prev; 273 341 struct rb_node *node, *next, *parent, **p, *first = NULL; 274 342 struct nft_rbtree *priv = nft_set_priv(set); 275 343 u8 cur_genmask = nft_genmask_cur(net); 276 344 u8 genmask = nft_genmask_next(net); 277 - u64 tstamp = nft_net_tstamp(net); 278 345 int d; 279 346 280 347 /* Descend the tree to search for an existing element greater than the ··· 449 378 } 450 379 } 451 380 381 + if (nft_rbtree_interval_null(set, new)) 382 + priv->start_rbe_cookie = 0; 383 + else if (nft_rbtree_interval_start(new) && priv->start_rbe_cookie) 384 + priv->start_rbe_cookie = 0; 385 + 452 386 /* - new start element matching existing start element: full overlap 453 387 * reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given. 454 388 */ 455 389 if (rbe_ge && !nft_rbtree_cmp(set, new, rbe_ge) && 456 390 nft_rbtree_interval_start(rbe_ge) == nft_rbtree_interval_start(new)) { 457 391 *elem_priv = &rbe_ge->priv; 392 + nft_rbtree_set_start_cookie(priv, rbe_ge); 458 393 return -EEXIST; 459 394 } 460 395 ··· 476 399 return -ECANCELED; 477 400 478 401 *elem_priv = &rbe_le->priv; 402 + 403 + /* - start and end element belong to the same interval. */ 404 + if (!nft_rbtree_insert_same_interval(net, priv, rbe_le)) 405 + return -ENOTEMPTY; 406 + 479 407 return -EEXIST; 480 408 } 481 409 ··· 625 543 { 626 544 struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem->priv); 627 545 struct nft_rbtree *priv = nft_set_priv(set); 546 + u64 tstamp = nft_net_tstamp(net); 628 547 int err; 548 + 549 + nft_rbtree_maybe_reset_start_cookie(priv, tstamp); 629 550 630 551 if (nft_array_may_resize(set) < 0) 631 552 return -ENOMEM; ··· 640 555 cond_resched(); 641 556 642 557 write_lock_bh(&priv->lock); 643 - err = __nft_rbtree_insert(net, set, rbe, elem_priv); 558 + err = __nft_rbtree_insert(net, set, rbe, elem_priv, tstamp); 644 559 write_unlock_bh(&priv->lock); 645 560 } while (err == -EAGAIN); 646 561 ··· 673 588 nft_clear(net, &rbe->ext); 674 589 } 675 590 591 + static struct nft_rbtree_elem * 592 + nft_rbtree_next_inactive(struct nft_rbtree_elem *rbe, u8 genmask) 593 + { 594 + struct nft_rbtree_elem *next_rbe; 595 + struct rb_node *node; 596 + 597 + node = rb_next(&rbe->node); 598 + if (node) { 599 + next_rbe = rb_entry(node, struct nft_rbtree_elem, node); 600 + if (nft_rbtree_interval_start(next_rbe) && 601 + !nft_set_elem_active(&next_rbe->ext, genmask)) 602 + return next_rbe; 603 + } 604 + 605 + return NULL; 606 + } 607 + 608 + static bool nft_rbtree_deactivate_same_interval(const struct net *net, 609 + struct nft_rbtree *priv, 610 + struct nft_rbtree_elem *rbe) 611 + { 612 + u8 genmask = nft_genmask_next(net); 613 + struct nft_rbtree_elem *next_rbe; 614 + 615 + if (!priv->start_rbe_cookie) 616 + return true; 617 + 618 + next_rbe = nft_rbtree_next_inactive(rbe, genmask); 619 + if (next_rbe) { 620 + /* Closest start element differs from last element added. */ 621 + if (nft_rbtree_interval_start(next_rbe) && 622 + nft_rbtree_cmp_start_cookie(priv, next_rbe)) { 623 + priv->start_rbe_cookie = 0; 624 + return true; 625 + } 626 + } 627 + 628 + priv->start_rbe_cookie = 0; 629 + 630 + return false; 631 + } 632 + 676 633 static void nft_rbtree_flush(const struct net *net, 677 634 const struct nft_set *set, 678 635 struct nft_elem_priv *elem_priv) ··· 729 602 const struct nft_set_elem *elem) 730 603 { 731 604 struct nft_rbtree_elem *rbe, *this = nft_elem_priv_cast(elem->priv); 732 - const struct nft_rbtree *priv = nft_set_priv(set); 605 + struct nft_rbtree *priv = nft_set_priv(set); 733 606 const struct rb_node *parent = priv->root.rb_node; 734 607 u8 genmask = nft_genmask_next(net); 735 608 u64 tstamp = nft_net_tstamp(net); 736 609 int d; 610 + 611 + nft_rbtree_maybe_reset_start_cookie(priv, tstamp); 612 + 613 + if (nft_rbtree_interval_start(this) || 614 + nft_rbtree_interval_null(set, this)) 615 + priv->start_rbe_cookie = 0; 737 616 738 617 if (nft_array_may_resize(set) < 0) 739 618 return NULL; ··· 768 635 parent = parent->rb_left; 769 636 continue; 770 637 } 638 + 639 + if (nft_rbtree_interval_start(rbe)) 640 + nft_rbtree_set_start_cookie(priv, rbe); 641 + else if (!nft_rbtree_deactivate_same_interval(net, priv, rbe)) 642 + return NULL; 643 + 771 644 nft_rbtree_flush(net, set, &rbe->priv); 772 645 return &rbe->priv; 773 646 }