Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'tcp-bhash2-fixes'

Kuniyuki Iwashima says:

===================
tcp: Fix bhash2 and TIME_WAIT regression.

We forgot to add twsk to bhash2. Therefore TIME_WAIT sockets cannot
prevent bind() to the same local address and port.

Changes:
v1:
* Patch 1:
* Add tw_bind2_node in inet_timewait_sock instead of
moving sk_bind2_node from struct sock to struct
sock_common.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+159 -10
+4
include/net/inet_hashtables.h
··· 108 108 struct hlist_node node; 109 109 /* List of sockets hashed to this bucket */ 110 110 struct hlist_head owners; 111 + /* bhash has twsk in owners, but bhash2 has twsk in 112 + * deathrow not to add a member in struct sock_common. 113 + */ 114 + struct hlist_head deathrow; 111 115 }; 112 116 113 117 static inline struct net *ib_net(const struct inet_bind_bucket *ib)
+5
include/net/inet_timewait_sock.h
··· 73 73 u32 tw_priority; 74 74 struct timer_list tw_timer; 75 75 struct inet_bind_bucket *tw_tb; 76 + struct inet_bind2_bucket *tw_tb2; 77 + struct hlist_node tw_bind2_node; 76 78 }; 77 79 #define tw_tclass tw_tos 80 + 81 + #define twsk_for_each_bound_bhash2(__tw, list) \ 82 + hlist_for_each_entry(__tw, list, tw_bind2_node) 78 83 79 84 static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk) 80 85 {
+23 -5
net/ipv4/inet_connection_sock.c
··· 173 173 return false; 174 174 } 175 175 176 + static bool __inet_bhash2_conflict(const struct sock *sk, struct sock *sk2, 177 + kuid_t sk_uid, bool relax, 178 + bool reuseport_cb_ok, bool reuseport_ok) 179 + { 180 + if (sk->sk_family == AF_INET && ipv6_only_sock(sk2)) 181 + return false; 182 + 183 + return inet_bind_conflict(sk, sk2, sk_uid, relax, 184 + reuseport_cb_ok, reuseport_ok); 185 + } 186 + 176 187 static bool inet_bhash2_conflict(const struct sock *sk, 177 188 const struct inet_bind2_bucket *tb2, 178 189 kuid_t sk_uid, 179 190 bool relax, bool reuseport_cb_ok, 180 191 bool reuseport_ok) 181 192 { 193 + struct inet_timewait_sock *tw2; 182 194 struct sock *sk2; 183 195 184 196 sk_for_each_bound_bhash2(sk2, &tb2->owners) { 185 - if (sk->sk_family == AF_INET && ipv6_only_sock(sk2)) 186 - continue; 187 - 188 - if (inet_bind_conflict(sk, sk2, sk_uid, relax, 189 - reuseport_cb_ok, reuseport_ok)) 197 + if (__inet_bhash2_conflict(sk, sk2, sk_uid, relax, 198 + reuseport_cb_ok, reuseport_ok)) 190 199 return true; 191 200 } 201 + 202 + twsk_for_each_bound_bhash2(tw2, &tb2->deathrow) { 203 + sk2 = (struct sock *)tw2; 204 + 205 + if (__inet_bhash2_conflict(sk, sk2, sk_uid, relax, 206 + reuseport_cb_ok, reuseport_ok)) 207 + return true; 208 + } 209 + 192 210 return false; 193 211 } 194 212
+5 -3
net/ipv4/inet_hashtables.c
··· 116 116 #endif 117 117 tb->rcv_saddr = sk->sk_rcv_saddr; 118 118 INIT_HLIST_HEAD(&tb->owners); 119 + INIT_HLIST_HEAD(&tb->deathrow); 119 120 hlist_add_head(&tb->node, &head->chain); 120 121 } 121 122 ··· 138 137 /* Caller must hold hashbucket lock for this tb with local BH disabled */ 139 138 void inet_bind2_bucket_destroy(struct kmem_cache *cachep, struct inet_bind2_bucket *tb) 140 139 { 141 - if (hlist_empty(&tb->owners)) { 140 + if (hlist_empty(&tb->owners) && hlist_empty(&tb->deathrow)) { 142 141 __hlist_del(&tb->node); 143 142 kmem_cache_free(cachep, tb); 144 143 } ··· 1104 1103 /* Head lock still held and bh's disabled */ 1105 1104 inet_bind_hash(sk, tb, tb2, port); 1106 1105 1107 - spin_unlock(&head2->lock); 1108 - 1109 1106 if (sk_unhashed(sk)) { 1110 1107 inet_sk(sk)->inet_sport = htons(port); 1111 1108 inet_ehash_nolisten(sk, (struct sock *)tw, NULL); 1112 1109 } 1113 1110 if (tw) 1114 1111 inet_twsk_bind_unhash(tw, hinfo); 1112 + 1113 + spin_unlock(&head2->lock); 1115 1114 spin_unlock(&head->lock); 1115 + 1116 1116 if (tw) 1117 1117 inet_twsk_deschedule_put(tw); 1118 1118 local_bh_enable();
+29 -2
net/ipv4/inet_timewait_sock.c
··· 29 29 void inet_twsk_bind_unhash(struct inet_timewait_sock *tw, 30 30 struct inet_hashinfo *hashinfo) 31 31 { 32 + struct inet_bind2_bucket *tb2 = tw->tw_tb2; 32 33 struct inet_bind_bucket *tb = tw->tw_tb; 33 34 34 35 if (!tb) ··· 38 37 __hlist_del(&tw->tw_bind_node); 39 38 tw->tw_tb = NULL; 40 39 inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); 40 + 41 + __hlist_del(&tw->tw_bind2_node); 42 + tw->tw_tb2 = NULL; 43 + inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2); 44 + 41 45 __sock_put((struct sock *)tw); 42 46 } 43 47 ··· 51 45 { 52 46 struct inet_hashinfo *hashinfo = tw->tw_dr->hashinfo; 53 47 spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); 54 - struct inet_bind_hashbucket *bhead; 48 + struct inet_bind_hashbucket *bhead, *bhead2; 55 49 56 50 spin_lock(lock); 57 51 sk_nulls_del_node_init_rcu((struct sock *)tw); ··· 60 54 /* Disassociate with bind bucket. */ 61 55 bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num, 62 56 hashinfo->bhash_size)]; 57 + bhead2 = inet_bhashfn_portaddr(hashinfo, (struct sock *)tw, 58 + twsk_net(tw), tw->tw_num); 63 59 64 60 spin_lock(&bhead->lock); 61 + spin_lock(&bhead2->lock); 65 62 inet_twsk_bind_unhash(tw, hashinfo); 63 + spin_unlock(&bhead2->lock); 66 64 spin_unlock(&bhead->lock); 67 65 68 66 refcount_dec(&tw->tw_dr->tw_refcount); ··· 103 93 hlist_add_head(&tw->tw_bind_node, list); 104 94 } 105 95 96 + static void inet_twsk_add_bind2_node(struct inet_timewait_sock *tw, 97 + struct hlist_head *list) 98 + { 99 + hlist_add_head(&tw->tw_bind2_node, list); 100 + } 101 + 106 102 /* 107 103 * Enter the time wait state. This is called with locally disabled BH. 108 104 * Essentially we whip up a timewait bucket, copy the relevant info into it ··· 121 105 const struct inet_connection_sock *icsk = inet_csk(sk); 122 106 struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash); 123 107 spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); 124 - struct inet_bind_hashbucket *bhead; 108 + struct inet_bind_hashbucket *bhead, *bhead2; 109 + 125 110 /* Step 1: Put TW into bind hash. Original socket stays there too. 126 111 Note, that any socket with inet->num != 0 MUST be bound in 127 112 binding cache, even if it is closed. 128 113 */ 129 114 bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num, 130 115 hashinfo->bhash_size)]; 116 + bhead2 = inet_bhashfn_portaddr(hashinfo, sk, twsk_net(tw), inet->inet_num); 117 + 131 118 spin_lock(&bhead->lock); 119 + spin_lock(&bhead2->lock); 120 + 132 121 tw->tw_tb = icsk->icsk_bind_hash; 133 122 WARN_ON(!icsk->icsk_bind_hash); 134 123 inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); 124 + 125 + tw->tw_tb2 = icsk->icsk_bind2_hash; 126 + WARN_ON(!icsk->icsk_bind2_hash); 127 + inet_twsk_add_bind2_node(tw, &tw->tw_tb2->deathrow); 128 + 129 + spin_unlock(&bhead2->lock); 135 130 spin_unlock(&bhead->lock); 136 131 137 132 spin_lock(lock);
+1
tools/testing/selftests/net/.gitignore
··· 1 1 # SPDX-License-Identifier: GPL-2.0-only 2 2 bind_bhash 3 + bind_timewait 3 4 csum 4 5 cmsg_sender 5 6 diag_uid
+92
tools/testing/selftests/net/bind_timewait.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright Amazon.com Inc. or its affiliates. */ 3 + 4 + #include <sys/socket.h> 5 + #include <netinet/in.h> 6 + 7 + #include "../kselftest_harness.h" 8 + 9 + FIXTURE(bind_timewait) 10 + { 11 + struct sockaddr_in addr; 12 + socklen_t addrlen; 13 + }; 14 + 15 + FIXTURE_VARIANT(bind_timewait) 16 + { 17 + __u32 addr_const; 18 + }; 19 + 20 + FIXTURE_VARIANT_ADD(bind_timewait, localhost) 21 + { 22 + .addr_const = INADDR_LOOPBACK 23 + }; 24 + 25 + FIXTURE_VARIANT_ADD(bind_timewait, addrany) 26 + { 27 + .addr_const = INADDR_ANY 28 + }; 29 + 30 + FIXTURE_SETUP(bind_timewait) 31 + { 32 + self->addr.sin_family = AF_INET; 33 + self->addr.sin_port = 0; 34 + self->addr.sin_addr.s_addr = htonl(variant->addr_const); 35 + self->addrlen = sizeof(self->addr); 36 + } 37 + 38 + FIXTURE_TEARDOWN(bind_timewait) 39 + { 40 + } 41 + 42 + void create_timewait_socket(struct __test_metadata *_metadata, 43 + FIXTURE_DATA(bind_timewait) *self) 44 + { 45 + int server_fd, client_fd, child_fd, ret; 46 + struct sockaddr_in addr; 47 + socklen_t addrlen; 48 + 49 + server_fd = socket(AF_INET, SOCK_STREAM, 0); 50 + ASSERT_GT(server_fd, 0); 51 + 52 + ret = bind(server_fd, (struct sockaddr *)&self->addr, self->addrlen); 53 + ASSERT_EQ(ret, 0); 54 + 55 + ret = listen(server_fd, 1); 56 + ASSERT_EQ(ret, 0); 57 + 58 + ret = getsockname(server_fd, (struct sockaddr *)&self->addr, &self->addrlen); 59 + ASSERT_EQ(ret, 0); 60 + 61 + client_fd = socket(AF_INET, SOCK_STREAM, 0); 62 + ASSERT_GT(client_fd, 0); 63 + 64 + ret = connect(client_fd, (struct sockaddr *)&self->addr, self->addrlen); 65 + ASSERT_EQ(ret, 0); 66 + 67 + addrlen = sizeof(addr); 68 + child_fd = accept(server_fd, (struct sockaddr *)&addr, &addrlen); 69 + ASSERT_GT(child_fd, 0); 70 + 71 + close(child_fd); 72 + close(client_fd); 73 + close(server_fd); 74 + } 75 + 76 + TEST_F(bind_timewait, 1) 77 + { 78 + int fd, ret; 79 + 80 + create_timewait_socket(_metadata, self); 81 + 82 + fd = socket(AF_INET, SOCK_STREAM, 0); 83 + ASSERT_GT(fd, 0); 84 + 85 + ret = bind(fd, (struct sockaddr *)&self->addr, self->addrlen); 86 + ASSERT_EQ(ret, -1); 87 + ASSERT_EQ(errno, EADDRINUSE); 88 + 89 + close(fd); 90 + } 91 + 92 + TEST_HARNESS_MAIN