Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'dlm-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/teigland/linux-dlm

Pull dlm updates from David Teigland:
"This set includes a number of minor fixes and cleanups related to the
networking changes in the last release.

A patch to delay ack messages reduces network traffic significantly"

* tag 'dlm-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/teigland/linux-dlm:
fs: dlm: avoid comms shutdown delay in release_lockspace
fs: dlm: fix return -EINTR on recovery stopped
fs: dlm: implement delayed ack handling
fs: dlm: move receive loop into receive handler
fs: dlm: fix multiple empty writequeue alloc
fs: dlm: generic connect func
fs: dlm: auto load sctp module
fs: dlm: introduce generic listen
fs: dlm: move to static proto ops
fs: dlm: introduce con_next_wq helper
fs: dlm: cleanup and remove _send_rcom
fs: dlm: clear CF_APP_LIMITED on close
fs: dlm: fix typo in tlv prefix
fs: dlm: use READ_ONCE for config var
fs: dlm: use sk->sk_socket instead of con->sock

+456 -413
+3 -1
fs/dlm/dir.c
··· 85 85 for (;;) { 86 86 int left; 87 87 error = dlm_recovery_stopped(ls); 88 - if (error) 88 + if (error) { 89 + error = -EINTR; 89 90 goto out_free; 91 + } 90 92 91 93 error = dlm_rcom_names(ls, memb->nodeid, 92 94 last_name, last_len);
+1 -1
fs/dlm/dlm_internal.h
··· 468 468 struct dlm_opt_header { 469 469 uint16_t t_type; 470 470 uint16_t t_length; 471 - uint32_t o_pad; 471 + uint32_t t_pad; 472 472 /* need to be 8 byte aligned */ 473 473 char t_value[]; 474 474 };
+2 -1
fs/dlm/lockspace.c
··· 498 498 ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS | 499 499 DLM_LSFL_NEWEXCL)); 500 500 501 - size = dlm_config.ci_rsbtbl_size; 501 + size = READ_ONCE(dlm_config.ci_rsbtbl_size); 502 502 ls->ls_rsbtbl_size = size; 503 503 504 504 ls->ls_rsbtbl = vmalloc(array_size(size, sizeof(struct dlm_rsbtable))); ··· 793 793 794 794 if (ls_count == 1) { 795 795 dlm_scand_stop(); 796 + dlm_clear_members(ls); 796 797 dlm_midcomms_shutdown(); 797 798 } 798 799
+384 -382
fs/dlm/lowcomms.c
··· 84 84 struct list_head writequeue; /* List of outgoing writequeue_entries */ 85 85 spinlock_t writequeue_lock; 86 86 atomic_t writequeue_cnt; 87 - void (*connect_action) (struct connection *); /* What to do to connect */ 88 - void (*shutdown_action)(struct connection *con); /* What to do to shutdown */ 89 - bool (*eof_condition)(struct connection *con); /* What to do to eof check */ 87 + struct mutex wq_alloc; 90 88 int retries; 91 89 #define MAX_CONNECT_RETRIES 3 92 90 struct hlist_node list; ··· 143 145 struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT]; 144 146 }; 145 147 148 + struct dlm_proto_ops { 149 + bool try_new_addr; 150 + const char *name; 151 + int proto; 152 + 153 + int (*connect)(struct connection *con, struct socket *sock, 154 + struct sockaddr *addr, int addr_len); 155 + void (*sockopts)(struct socket *sock); 156 + int (*bind)(struct socket *sock); 157 + int (*listen_validate)(void); 158 + void (*listen_sockopts)(struct socket *sock); 159 + int (*listen_bind)(struct socket *sock); 160 + /* What to do to shutdown */ 161 + void (*shutdown_action)(struct connection *con); 162 + /* What to do to eof check */ 163 + bool (*eof_condition)(struct connection *con); 164 + }; 165 + 146 166 static struct listen_sock_callbacks { 147 167 void (*sk_error_report)(struct sock *); 148 168 void (*sk_data_ready)(struct sock *); ··· 184 168 static DEFINE_SPINLOCK(connections_lock); 185 169 DEFINE_STATIC_SRCU(connections_srcu); 186 170 171 + static const struct dlm_proto_ops *dlm_proto_ops; 172 + 187 173 static void process_recv_sockets(struct work_struct *work); 188 174 static void process_send_sockets(struct work_struct *work); 189 175 190 - static void sctp_connect_to_sock(struct connection *con); 191 - static void tcp_connect_to_sock(struct connection *con); 192 - static void dlm_tcp_shutdown(struct connection *con); 176 + /* need to held writequeue_lock */ 177 + static struct writequeue_entry *con_next_wq(struct connection *con) 178 + { 179 + struct writequeue_entry *e; 180 + 181 + if (list_empty(&con->writequeue)) 182 + return NULL; 183 + 184 + e = list_first_entry(&con->writequeue, struct writequeue_entry, 185 + list); 186 + if (e->len == 0) 187 + return NULL; 188 + 189 + return e; 190 + } 193 191 194 192 static struct connection *__find_con(int nodeid, int r) 195 193 { ··· 238 208 INIT_WORK(&con->rwork, process_recv_sockets); 239 209 init_waitqueue_head(&con->shutdown_wait); 240 210 241 - switch (dlm_config.ci_protocol) { 242 - case DLM_PROTO_TCP: 243 - con->connect_action = tcp_connect_to_sock; 244 - con->shutdown_action = dlm_tcp_shutdown; 245 - con->eof_condition = tcp_eof_condition; 246 - break; 247 - case DLM_PROTO_SCTP: 248 - con->connect_action = sctp_connect_to_sock; 249 - break; 250 - default: 251 - kfree(con->rx_buf); 252 - return -EINVAL; 253 - } 254 - 255 211 return 0; 256 212 } 257 213 ··· 264 248 kfree(con); 265 249 return NULL; 266 250 } 251 + 252 + mutex_init(&con->wq_alloc); 267 253 268 254 spin_lock(&connections_lock); 269 255 /* Because multiple workqueues/threads calls this function it can ··· 601 583 goto out; 602 584 603 585 orig_report = listen_sock.sk_error_report; 604 - if (con->sock == NULL || 605 - kernel_getpeername(con->sock, (struct sockaddr *)&saddr) < 0) { 586 + if (kernel_getpeername(sk->sk_socket, (struct sockaddr *)&saddr) < 0) { 606 587 printk_ratelimited(KERN_ERR "dlm: node %d: socket error " 607 588 "sending to node %d, port %d, " 608 589 "sk_err=%d/%d\n", dlm_our_nodeid(), ··· 818 801 819 802 con->rx_leftover = 0; 820 803 con->retries = 0; 804 + clear_bit(CF_APP_LIMITED, &con->flags); 821 805 clear_bit(CF_CONNECTED, &con->flags); 822 806 clear_bit(CF_DELAY_CONNECT, &con->flags); 823 807 clear_bit(CF_RECONNECT, &con->flags); ··· 895 877 /* Data received from remote end */ 896 878 static int receive_from_sock(struct connection *con) 897 879 { 898 - int call_again_soon = 0; 899 880 struct msghdr msg; 900 881 struct kvec iov; 901 882 int ret, buflen; ··· 914 897 goto out_resched; 915 898 } 916 899 917 - /* calculate new buffer parameter regarding last receive and 918 - * possible leftover bytes 919 - */ 920 - iov.iov_base = con->rx_buf + con->rx_leftover; 921 - iov.iov_len = con->rx_buflen - con->rx_leftover; 900 + for (;;) { 901 + /* calculate new buffer parameter regarding last receive and 902 + * possible leftover bytes 903 + */ 904 + iov.iov_base = con->rx_buf + con->rx_leftover; 905 + iov.iov_len = con->rx_buflen - con->rx_leftover; 922 906 923 - memset(&msg, 0, sizeof(msg)); 924 - msg.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL; 925 - ret = kernel_recvmsg(con->sock, &msg, &iov, 1, iov.iov_len, 926 - msg.msg_flags); 927 - if (ret <= 0) 928 - goto out_close; 929 - else if (ret == iov.iov_len) 930 - call_again_soon = 1; 907 + memset(&msg, 0, sizeof(msg)); 908 + msg.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL; 909 + ret = kernel_recvmsg(con->sock, &msg, &iov, 1, iov.iov_len, 910 + msg.msg_flags); 911 + if (ret == -EAGAIN) 912 + break; 913 + else if (ret <= 0) 914 + goto out_close; 931 915 932 - /* new buflen according readed bytes and leftover from last receive */ 933 - buflen = ret + con->rx_leftover; 934 - ret = dlm_process_incoming_buffer(con->nodeid, con->rx_buf, buflen); 935 - if (ret < 0) 936 - goto out_close; 916 + /* new buflen according readed bytes and leftover from last receive */ 917 + buflen = ret + con->rx_leftover; 918 + ret = dlm_process_incoming_buffer(con->nodeid, con->rx_buf, buflen); 919 + if (ret < 0) 920 + goto out_close; 937 921 938 - /* calculate leftover bytes from process and put it into begin of 939 - * the receive buffer, so next receive we have the full message 940 - * at the start address of the receive buffer. 941 - */ 942 - con->rx_leftover = buflen - ret; 943 - if (con->rx_leftover) { 944 - memmove(con->rx_buf, con->rx_buf + ret, 945 - con->rx_leftover); 946 - call_again_soon = true; 922 + /* calculate leftover bytes from process and put it into begin of 923 + * the receive buffer, so next receive we have the full message 924 + * at the start address of the receive buffer. 925 + */ 926 + con->rx_leftover = buflen - ret; 927 + if (con->rx_leftover) { 928 + memmove(con->rx_buf, con->rx_buf + ret, 929 + con->rx_leftover); 930 + } 947 931 } 948 932 949 - if (call_again_soon) 950 - goto out_resched; 951 - 933 + dlm_midcomms_receive_done(con->nodeid); 952 934 mutex_unlock(&con->sock_mutex); 953 935 return 0; 954 936 ··· 962 946 log_print("connection %p got EOF from %d", 963 947 con, con->nodeid); 964 948 965 - if (con->eof_condition && con->eof_condition(con)) { 949 + if (dlm_proto_ops->eof_condition && 950 + dlm_proto_ops->eof_condition(con)) { 966 951 set_bit(CF_EOF, &con->flags); 967 952 mutex_unlock(&con->sock_mutex); 968 953 } else { ··· 1151 1134 return result; 1152 1135 } 1153 1136 1154 - /* Initiate an SCTP association. 1155 - This is a special case of send_to_sock() in that we don't yet have a 1156 - peeled-off socket for this association, so we use the listening socket 1157 - and add the primary IP address of the remote node. 1158 - */ 1159 - static void sctp_connect_to_sock(struct connection *con) 1160 - { 1161 - struct sockaddr_storage daddr; 1162 - int result; 1163 - int addr_len; 1164 - struct socket *sock; 1165 - unsigned int mark; 1166 - 1167 - mutex_lock(&con->sock_mutex); 1168 - 1169 - /* Some odd races can cause double-connects, ignore them */ 1170 - if (con->retries++ > MAX_CONNECT_RETRIES) 1171 - goto out; 1172 - 1173 - if (con->sock) { 1174 - log_print("node %d already connected.", con->nodeid); 1175 - goto out; 1176 - } 1177 - 1178 - memset(&daddr, 0, sizeof(daddr)); 1179 - result = nodeid_to_addr(con->nodeid, &daddr, NULL, true, &mark); 1180 - if (result < 0) { 1181 - log_print("no address for nodeid %d", con->nodeid); 1182 - goto out; 1183 - } 1184 - 1185 - /* Create a socket to communicate with */ 1186 - result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, 1187 - SOCK_STREAM, IPPROTO_SCTP, &sock); 1188 - if (result < 0) 1189 - goto socket_err; 1190 - 1191 - sock_set_mark(sock->sk, mark); 1192 - 1193 - add_sock(sock, con); 1194 - 1195 - /* Bind to all addresses. */ 1196 - if (sctp_bind_addrs(con->sock, 0)) 1197 - goto bind_err; 1198 - 1199 - make_sockaddr(&daddr, dlm_config.ci_tcp_port, &addr_len); 1200 - 1201 - log_print_ratelimited("connecting to %d", con->nodeid); 1202 - 1203 - /* Turn off Nagle's algorithm */ 1204 - sctp_sock_set_nodelay(sock->sk); 1205 - 1206 - /* 1207 - * Make sock->ops->connect() function return in specified time, 1208 - * since O_NONBLOCK argument in connect() function does not work here, 1209 - * then, we should restore the default value of this attribute. 1210 - */ 1211 - sock_set_sndtimeo(sock->sk, 5); 1212 - result = sock->ops->connect(sock, (struct sockaddr *)&daddr, addr_len, 1213 - 0); 1214 - sock_set_sndtimeo(sock->sk, 0); 1215 - 1216 - if (result == -EINPROGRESS) 1217 - result = 0; 1218 - if (result == 0) { 1219 - if (!test_and_set_bit(CF_CONNECTED, &con->flags)) 1220 - log_print("successful connected to node %d", con->nodeid); 1221 - goto out; 1222 - } 1223 - 1224 - bind_err: 1225 - con->sock = NULL; 1226 - sock_release(sock); 1227 - 1228 - socket_err: 1229 - /* 1230 - * Some errors are fatal and this list might need adjusting. For other 1231 - * errors we try again until the max number of retries is reached. 1232 - */ 1233 - if (result != -EHOSTUNREACH && 1234 - result != -ENETUNREACH && 1235 - result != -ENETDOWN && 1236 - result != -EINVAL && 1237 - result != -EPROTONOSUPPORT) { 1238 - log_print("connect %d try %d error %d", con->nodeid, 1239 - con->retries, result); 1240 - mutex_unlock(&con->sock_mutex); 1241 - msleep(1000); 1242 - lowcomms_connect_sock(con); 1243 - return; 1244 - } 1245 - 1246 - out: 1247 - mutex_unlock(&con->sock_mutex); 1248 - } 1249 - 1250 - /* Connect a new socket to its peer */ 1251 - static void tcp_connect_to_sock(struct connection *con) 1252 - { 1253 - struct sockaddr_storage saddr, src_addr; 1254 - unsigned int mark; 1255 - int addr_len; 1256 - struct socket *sock = NULL; 1257 - int result; 1258 - 1259 - mutex_lock(&con->sock_mutex); 1260 - if (con->retries++ > MAX_CONNECT_RETRIES) 1261 - goto out; 1262 - 1263 - /* Some odd races can cause double-connects, ignore them */ 1264 - if (con->sock) 1265 - goto out; 1266 - 1267 - /* Create a socket to communicate with */ 1268 - result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, 1269 - SOCK_STREAM, IPPROTO_TCP, &sock); 1270 - if (result < 0) 1271 - goto out_err; 1272 - 1273 - memset(&saddr, 0, sizeof(saddr)); 1274 - result = nodeid_to_addr(con->nodeid, &saddr, NULL, false, &mark); 1275 - if (result < 0) { 1276 - log_print("no address for nodeid %d", con->nodeid); 1277 - goto out_err; 1278 - } 1279 - 1280 - sock_set_mark(sock->sk, mark); 1281 - 1282 - add_sock(sock, con); 1283 - 1284 - /* Bind to our cluster-known address connecting to avoid 1285 - routing problems */ 1286 - memcpy(&src_addr, dlm_local_addr[0], sizeof(src_addr)); 1287 - make_sockaddr(&src_addr, 0, &addr_len); 1288 - result = sock->ops->bind(sock, (struct sockaddr *) &src_addr, 1289 - addr_len); 1290 - if (result < 0) { 1291 - log_print("could not bind for connect: %d", result); 1292 - /* This *may* not indicate a critical error */ 1293 - } 1294 - 1295 - make_sockaddr(&saddr, dlm_config.ci_tcp_port, &addr_len); 1296 - 1297 - log_print_ratelimited("connecting to %d", con->nodeid); 1298 - 1299 - /* Turn off Nagle's algorithm */ 1300 - tcp_sock_set_nodelay(sock->sk); 1301 - 1302 - result = sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len, 1303 - O_NONBLOCK); 1304 - if (result == -EINPROGRESS) 1305 - result = 0; 1306 - if (result == 0) 1307 - goto out; 1308 - 1309 - out_err: 1310 - if (con->sock) { 1311 - sock_release(con->sock); 1312 - con->sock = NULL; 1313 - } else if (sock) { 1314 - sock_release(sock); 1315 - } 1316 - /* 1317 - * Some errors are fatal and this list might need adjusting. For other 1318 - * errors we try again until the max number of retries is reached. 1319 - */ 1320 - if (result != -EHOSTUNREACH && 1321 - result != -ENETUNREACH && 1322 - result != -ENETDOWN && 1323 - result != -EINVAL && 1324 - result != -EPROTONOSUPPORT) { 1325 - log_print("connect %d try %d error %d", con->nodeid, 1326 - con->retries, result); 1327 - mutex_unlock(&con->sock_mutex); 1328 - msleep(1000); 1329 - lowcomms_connect_sock(con); 1330 - return; 1331 - } 1332 - out: 1333 - mutex_unlock(&con->sock_mutex); 1334 - return; 1335 - } 1336 - 1337 - /* On error caller must run dlm_close_sock() for the 1338 - * listen connection socket. 1339 - */ 1340 - static int tcp_create_listen_sock(struct listen_connection *con, 1341 - struct sockaddr_storage *saddr) 1342 - { 1343 - struct socket *sock = NULL; 1344 - int result = 0; 1345 - int addr_len; 1346 - 1347 - if (dlm_local_addr[0]->ss_family == AF_INET) 1348 - addr_len = sizeof(struct sockaddr_in); 1349 - else 1350 - addr_len = sizeof(struct sockaddr_in6); 1351 - 1352 - /* Create a socket to communicate with */ 1353 - result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, 1354 - SOCK_STREAM, IPPROTO_TCP, &sock); 1355 - if (result < 0) { 1356 - log_print("Can't create listening comms socket"); 1357 - goto create_out; 1358 - } 1359 - 1360 - sock_set_mark(sock->sk, dlm_config.ci_mark); 1361 - 1362 - /* Turn off Nagle's algorithm */ 1363 - tcp_sock_set_nodelay(sock->sk); 1364 - 1365 - sock_set_reuseaddr(sock->sk); 1366 - 1367 - add_listen_sock(sock, con); 1368 - 1369 - /* Bind to our port */ 1370 - make_sockaddr(saddr, dlm_config.ci_tcp_port, &addr_len); 1371 - result = sock->ops->bind(sock, (struct sockaddr *) saddr, addr_len); 1372 - if (result < 0) { 1373 - log_print("Can't bind to port %d", dlm_config.ci_tcp_port); 1374 - goto create_out; 1375 - } 1376 - sock_set_keepalive(sock->sk); 1377 - 1378 - result = sock->ops->listen(sock, 5); 1379 - if (result < 0) { 1380 - log_print("Can't listen on port %d", dlm_config.ci_tcp_port); 1381 - goto create_out; 1382 - } 1383 - 1384 - return 0; 1385 - 1386 - create_out: 1387 - return result; 1388 - } 1389 - 1390 1137 /* Get local addresses */ 1391 1138 static void init_local(void) 1392 1139 { ··· 1176 1395 for (i = 0; i < dlm_local_count; i++) 1177 1396 kfree(dlm_local_addr[i]); 1178 1397 } 1179 - 1180 - /* Initialise SCTP socket and bind to all interfaces 1181 - * On error caller must run dlm_close_sock() for the 1182 - * listen connection socket. 1183 - */ 1184 - static int sctp_listen_for_all(struct listen_connection *con) 1185 - { 1186 - struct socket *sock = NULL; 1187 - int result = -EINVAL; 1188 - 1189 - log_print("Using SCTP for communications"); 1190 - 1191 - result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, 1192 - SOCK_STREAM, IPPROTO_SCTP, &sock); 1193 - if (result < 0) { 1194 - log_print("Can't create comms socket, check SCTP is loaded"); 1195 - goto out; 1196 - } 1197 - 1198 - sock_set_rcvbuf(sock->sk, NEEDED_RMEM); 1199 - sock_set_mark(sock->sk, dlm_config.ci_mark); 1200 - sctp_sock_set_nodelay(sock->sk); 1201 - 1202 - add_listen_sock(sock, con); 1203 - 1204 - /* Bind to all addresses. */ 1205 - result = sctp_bind_addrs(con->sock, dlm_config.ci_tcp_port); 1206 - if (result < 0) 1207 - goto out; 1208 - 1209 - result = sock->ops->listen(sock, 5); 1210 - if (result < 0) { 1211 - log_print("Can't set socket listening"); 1212 - goto out; 1213 - } 1214 - 1215 - return 0; 1216 - 1217 - out: 1218 - return result; 1219 - } 1220 - 1221 - static int tcp_listen_for_all(void) 1222 - { 1223 - /* We don't support multi-homed hosts */ 1224 - if (dlm_local_count > 1) { 1225 - log_print("TCP protocol can't handle multi-homed hosts, " 1226 - "try SCTP"); 1227 - return -EINVAL; 1228 - } 1229 - 1230 - log_print("Using TCP for communications"); 1231 - 1232 - return tcp_create_listen_sock(&listen_con, dlm_local_addr[0]); 1233 - } 1234 - 1235 - 1236 1398 1237 1399 static struct writequeue_entry *new_writequeue_entry(struct connection *con, 1238 1400 gfp_t allocation) ··· 1252 1528 { 1253 1529 struct writequeue_entry *e; 1254 1530 struct dlm_msg *msg; 1531 + bool sleepable; 1255 1532 1256 1533 msg = kzalloc(sizeof(*msg), allocation); 1257 1534 if (!msg) 1258 1535 return NULL; 1259 1536 1537 + /* this mutex is being used as a wait to avoid multiple "fast" 1538 + * new writequeue page list entry allocs in new_wq_entry in 1539 + * normal operation which is sleepable context. Without it 1540 + * we could end in multiple writequeue entries with one 1541 + * dlm message because multiple callers were waiting at 1542 + * the writequeue_lock in new_wq_entry(). 1543 + */ 1544 + sleepable = gfpflags_normal_context(allocation); 1545 + if (sleepable) 1546 + mutex_lock(&con->wq_alloc); 1547 + 1260 1548 kref_init(&msg->ref); 1261 1549 1262 1550 e = new_wq_entry(con, len, allocation, ppc, cb, mh); 1263 1551 if (!e) { 1552 + if (sleepable) 1553 + mutex_unlock(&con->wq_alloc); 1554 + 1264 1555 kfree(msg); 1265 1556 return NULL; 1266 1557 } 1558 + 1559 + if (sleepable) 1560 + mutex_unlock(&con->wq_alloc); 1267 1561 1268 1562 msg->ppc = *ppc; 1269 1563 msg->len = len; ··· 1388 1646 /* Send a message */ 1389 1647 static void send_to_sock(struct connection *con) 1390 1648 { 1391 - int ret = 0; 1392 1649 const int msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL; 1393 1650 struct writequeue_entry *e; 1394 - int len, offset; 1651 + int len, offset, ret; 1395 1652 int count = 0; 1396 1653 1397 1654 mutex_lock(&con->sock_mutex); ··· 1399 1658 1400 1659 spin_lock(&con->writequeue_lock); 1401 1660 for (;;) { 1402 - if (list_empty(&con->writequeue)) 1661 + e = con_next_wq(con); 1662 + if (!e) 1403 1663 break; 1404 1664 1405 1665 e = list_first_entry(&con->writequeue, struct writequeue_entry, list); ··· 1409 1667 BUG_ON(len == 0 && e->users == 0); 1410 1668 spin_unlock(&con->writequeue_lock); 1411 1669 1412 - ret = 0; 1413 - if (len) { 1414 - ret = kernel_sendpage(con->sock, e->page, offset, len, 1415 - msg_flags); 1416 - if (ret == -EAGAIN || ret == 0) { 1417 - if (ret == -EAGAIN && 1418 - test_bit(SOCKWQ_ASYNC_NOSPACE, &con->sock->flags) && 1419 - !test_and_set_bit(CF_APP_LIMITED, &con->flags)) { 1420 - /* Notify TCP that we're limited by the 1421 - * application window size. 1422 - */ 1423 - set_bit(SOCK_NOSPACE, &con->sock->flags); 1424 - con->sock->sk->sk_write_pending++; 1425 - } 1426 - cond_resched(); 1427 - goto out; 1428 - } else if (ret < 0) 1429 - goto out; 1430 - } 1670 + ret = kernel_sendpage(con->sock, e->page, offset, len, 1671 + msg_flags); 1672 + if (ret == -EAGAIN || ret == 0) { 1673 + if (ret == -EAGAIN && 1674 + test_bit(SOCKWQ_ASYNC_NOSPACE, &con->sock->flags) && 1675 + !test_and_set_bit(CF_APP_LIMITED, &con->flags)) { 1676 + /* Notify TCP that we're limited by the 1677 + * application window size. 1678 + */ 1679 + set_bit(SOCK_NOSPACE, &con->sock->flags); 1680 + con->sock->sk->sk_write_pending++; 1681 + } 1682 + cond_resched(); 1683 + goto out; 1684 + } else if (ret < 0) 1685 + goto out; 1431 1686 1432 1687 /* Don't starve people filling buffers */ 1433 1688 if (++count >= MAX_SEND_MSG_COUNT) { ··· 1509 1770 static void process_recv_sockets(struct work_struct *work) 1510 1771 { 1511 1772 struct connection *con = container_of(work, struct connection, rwork); 1512 - int err; 1513 1773 1514 1774 clear_bit(CF_READ_PENDING, &con->flags); 1515 - do { 1516 - err = receive_from_sock(con); 1517 - } while (!err); 1775 + receive_from_sock(con); 1518 1776 } 1519 1777 1520 1778 static void process_listen_recv_socket(struct work_struct *work) 1521 1779 { 1522 1780 accept_from_sock(&listen_con); 1781 + } 1782 + 1783 + static void dlm_connect(struct connection *con) 1784 + { 1785 + struct sockaddr_storage addr; 1786 + int result, addr_len; 1787 + struct socket *sock; 1788 + unsigned int mark; 1789 + 1790 + /* Some odd races can cause double-connects, ignore them */ 1791 + if (con->retries++ > MAX_CONNECT_RETRIES) 1792 + return; 1793 + 1794 + if (con->sock) { 1795 + log_print("node %d already connected.", con->nodeid); 1796 + return; 1797 + } 1798 + 1799 + memset(&addr, 0, sizeof(addr)); 1800 + result = nodeid_to_addr(con->nodeid, &addr, NULL, 1801 + dlm_proto_ops->try_new_addr, &mark); 1802 + if (result < 0) { 1803 + log_print("no address for nodeid %d", con->nodeid); 1804 + return; 1805 + } 1806 + 1807 + /* Create a socket to communicate with */ 1808 + result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, 1809 + SOCK_STREAM, dlm_proto_ops->proto, &sock); 1810 + if (result < 0) 1811 + goto socket_err; 1812 + 1813 + sock_set_mark(sock->sk, mark); 1814 + dlm_proto_ops->sockopts(sock); 1815 + 1816 + add_sock(sock, con); 1817 + 1818 + result = dlm_proto_ops->bind(sock); 1819 + if (result < 0) 1820 + goto add_sock_err; 1821 + 1822 + log_print_ratelimited("connecting to %d", con->nodeid); 1823 + make_sockaddr(&addr, dlm_config.ci_tcp_port, &addr_len); 1824 + result = dlm_proto_ops->connect(con, sock, (struct sockaddr *)&addr, 1825 + addr_len); 1826 + if (result < 0) 1827 + goto add_sock_err; 1828 + 1829 + return; 1830 + 1831 + add_sock_err: 1832 + dlm_close_sock(&con->sock); 1833 + 1834 + socket_err: 1835 + /* 1836 + * Some errors are fatal and this list might need adjusting. For other 1837 + * errors we try again until the max number of retries is reached. 1838 + */ 1839 + if (result != -EHOSTUNREACH && 1840 + result != -ENETUNREACH && 1841 + result != -ENETDOWN && 1842 + result != -EINVAL && 1843 + result != -EPROTONOSUPPORT) { 1844 + log_print("connect %d try %d error %d", con->nodeid, 1845 + con->retries, result); 1846 + msleep(1000); 1847 + lowcomms_connect_sock(con); 1848 + } 1523 1849 } 1524 1850 1525 1851 /* Send workqueue function */ ··· 1601 1797 dlm_midcomms_unack_msg_resend(con->nodeid); 1602 1798 } 1603 1799 1604 - if (con->sock == NULL) { /* not mutex protected so check it inside too */ 1800 + if (con->sock == NULL) { 1605 1801 if (test_and_clear_bit(CF_DELAY_CONNECT, &con->flags)) 1606 1802 msleep(1000); 1607 - con->connect_action(con); 1803 + 1804 + mutex_lock(&con->sock_mutex); 1805 + dlm_connect(con); 1806 + mutex_unlock(&con->sock_mutex); 1608 1807 } 1808 + 1609 1809 if (!list_empty(&con->writequeue)) 1610 1810 send_to_sock(con); 1611 1811 } ··· 1648 1840 1649 1841 static void shutdown_conn(struct connection *con) 1650 1842 { 1651 - if (con->shutdown_action) 1652 - con->shutdown_action(con); 1843 + if (dlm_proto_ops->shutdown_action) 1844 + dlm_proto_ops->shutdown_action(con); 1653 1845 } 1654 1846 1655 1847 void dlm_lowcomms_shutdown(void) ··· 1756 1948 srcu_read_unlock(&connections_srcu, idx); 1757 1949 work_stop(); 1758 1950 deinit_local(); 1951 + 1952 + dlm_proto_ops = NULL; 1759 1953 } 1954 + 1955 + static int dlm_listen_for_all(void) 1956 + { 1957 + struct socket *sock; 1958 + int result; 1959 + 1960 + log_print("Using %s for communications", 1961 + dlm_proto_ops->name); 1962 + 1963 + result = dlm_proto_ops->listen_validate(); 1964 + if (result < 0) 1965 + return result; 1966 + 1967 + result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, 1968 + SOCK_STREAM, dlm_proto_ops->proto, &sock); 1969 + if (result < 0) { 1970 + log_print("Can't create comms socket, check SCTP is loaded"); 1971 + goto out; 1972 + } 1973 + 1974 + sock_set_mark(sock->sk, dlm_config.ci_mark); 1975 + dlm_proto_ops->listen_sockopts(sock); 1976 + 1977 + result = dlm_proto_ops->listen_bind(sock); 1978 + if (result < 0) 1979 + goto out; 1980 + 1981 + save_listen_callbacks(sock); 1982 + add_listen_sock(sock, &listen_con); 1983 + 1984 + INIT_WORK(&listen_con.rwork, process_listen_recv_socket); 1985 + result = sock->ops->listen(sock, 5); 1986 + if (result < 0) { 1987 + dlm_close_sock(&listen_con.sock); 1988 + goto out; 1989 + } 1990 + 1991 + return 0; 1992 + 1993 + out: 1994 + sock_release(sock); 1995 + return result; 1996 + } 1997 + 1998 + static int dlm_tcp_bind(struct socket *sock) 1999 + { 2000 + struct sockaddr_storage src_addr; 2001 + int result, addr_len; 2002 + 2003 + /* Bind to our cluster-known address connecting to avoid 2004 + * routing problems. 2005 + */ 2006 + memcpy(&src_addr, dlm_local_addr[0], sizeof(src_addr)); 2007 + make_sockaddr(&src_addr, 0, &addr_len); 2008 + 2009 + result = sock->ops->bind(sock, (struct sockaddr *)&src_addr, 2010 + addr_len); 2011 + if (result < 0) { 2012 + /* This *may* not indicate a critical error */ 2013 + log_print("could not bind for connect: %d", result); 2014 + } 2015 + 2016 + return 0; 2017 + } 2018 + 2019 + static int dlm_tcp_connect(struct connection *con, struct socket *sock, 2020 + struct sockaddr *addr, int addr_len) 2021 + { 2022 + int ret; 2023 + 2024 + ret = sock->ops->connect(sock, addr, addr_len, O_NONBLOCK); 2025 + switch (ret) { 2026 + case -EINPROGRESS: 2027 + fallthrough; 2028 + case 0: 2029 + return 0; 2030 + } 2031 + 2032 + return ret; 2033 + } 2034 + 2035 + static int dlm_tcp_listen_validate(void) 2036 + { 2037 + /* We don't support multi-homed hosts */ 2038 + if (dlm_local_count > 1) { 2039 + log_print("TCP protocol can't handle multi-homed hosts, try SCTP"); 2040 + return -EINVAL; 2041 + } 2042 + 2043 + return 0; 2044 + } 2045 + 2046 + static void dlm_tcp_sockopts(struct socket *sock) 2047 + { 2048 + /* Turn off Nagle's algorithm */ 2049 + tcp_sock_set_nodelay(sock->sk); 2050 + } 2051 + 2052 + static void dlm_tcp_listen_sockopts(struct socket *sock) 2053 + { 2054 + dlm_tcp_sockopts(sock); 2055 + sock_set_reuseaddr(sock->sk); 2056 + } 2057 + 2058 + static int dlm_tcp_listen_bind(struct socket *sock) 2059 + { 2060 + int addr_len; 2061 + 2062 + /* Bind to our port */ 2063 + make_sockaddr(dlm_local_addr[0], dlm_config.ci_tcp_port, &addr_len); 2064 + return sock->ops->bind(sock, (struct sockaddr *)dlm_local_addr[0], 2065 + addr_len); 2066 + } 2067 + 2068 + static const struct dlm_proto_ops dlm_tcp_ops = { 2069 + .name = "TCP", 2070 + .proto = IPPROTO_TCP, 2071 + .connect = dlm_tcp_connect, 2072 + .sockopts = dlm_tcp_sockopts, 2073 + .bind = dlm_tcp_bind, 2074 + .listen_validate = dlm_tcp_listen_validate, 2075 + .listen_sockopts = dlm_tcp_listen_sockopts, 2076 + .listen_bind = dlm_tcp_listen_bind, 2077 + .shutdown_action = dlm_tcp_shutdown, 2078 + .eof_condition = tcp_eof_condition, 2079 + }; 2080 + 2081 + static int dlm_sctp_bind(struct socket *sock) 2082 + { 2083 + return sctp_bind_addrs(sock, 0); 2084 + } 2085 + 2086 + static int dlm_sctp_connect(struct connection *con, struct socket *sock, 2087 + struct sockaddr *addr, int addr_len) 2088 + { 2089 + int ret; 2090 + 2091 + /* 2092 + * Make sock->ops->connect() function return in specified time, 2093 + * since O_NONBLOCK argument in connect() function does not work here, 2094 + * then, we should restore the default value of this attribute. 2095 + */ 2096 + sock_set_sndtimeo(sock->sk, 5); 2097 + ret = sock->ops->connect(sock, addr, addr_len, 0); 2098 + sock_set_sndtimeo(sock->sk, 0); 2099 + if (ret < 0) 2100 + return ret; 2101 + 2102 + if (!test_and_set_bit(CF_CONNECTED, &con->flags)) 2103 + log_print("successful connected to node %d", con->nodeid); 2104 + 2105 + return 0; 2106 + } 2107 + 2108 + static int dlm_sctp_listen_validate(void) 2109 + { 2110 + if (!IS_ENABLED(CONFIG_IP_SCTP)) { 2111 + log_print("SCTP is not enabled by this kernel"); 2112 + return -EOPNOTSUPP; 2113 + } 2114 + 2115 + request_module("sctp"); 2116 + return 0; 2117 + } 2118 + 2119 + static int dlm_sctp_bind_listen(struct socket *sock) 2120 + { 2121 + return sctp_bind_addrs(sock, dlm_config.ci_tcp_port); 2122 + } 2123 + 2124 + static void dlm_sctp_sockopts(struct socket *sock) 2125 + { 2126 + /* Turn off Nagle's algorithm */ 2127 + sctp_sock_set_nodelay(sock->sk); 2128 + sock_set_rcvbuf(sock->sk, NEEDED_RMEM); 2129 + } 2130 + 2131 + static const struct dlm_proto_ops dlm_sctp_ops = { 2132 + .name = "SCTP", 2133 + .proto = IPPROTO_SCTP, 2134 + .try_new_addr = true, 2135 + .connect = dlm_sctp_connect, 2136 + .sockopts = dlm_sctp_sockopts, 2137 + .bind = dlm_sctp_bind, 2138 + .listen_validate = dlm_sctp_listen_validate, 2139 + .listen_sockopts = dlm_sctp_sockopts, 2140 + .listen_bind = dlm_sctp_bind_listen, 2141 + }; 1760 2142 1761 2143 int dlm_lowcomms_start(void) 1762 2144 { ··· 1974 1976 /* Start listening */ 1975 1977 switch (dlm_config.ci_protocol) { 1976 1978 case DLM_PROTO_TCP: 1977 - error = tcp_listen_for_all(); 1979 + dlm_proto_ops = &dlm_tcp_ops; 1978 1980 break; 1979 1981 case DLM_PROTO_SCTP: 1980 - error = sctp_listen_for_all(&listen_con); 1982 + dlm_proto_ops = &dlm_sctp_ops; 1981 1983 break; 1982 1984 default: 1983 1985 log_print("Invalid protocol identifier %d set", 1984 1986 dlm_config.ci_protocol); 1985 1987 error = -EINVAL; 1986 - break; 1988 + goto fail_proto_ops; 1987 1989 } 1990 + 1991 + error = dlm_listen_for_all(); 1988 1992 if (error) 1989 - goto fail_unlisten; 1993 + goto fail_listen; 1990 1994 1991 1995 return 0; 1992 1996 1993 - fail_unlisten: 1997 + fail_listen: 1998 + dlm_proto_ops = NULL; 1999 + fail_proto_ops: 1994 2000 dlm_allow_conn = 0; 1995 2001 dlm_close_sock(&listen_con.sock); 1996 2002 work_stop();
+1
fs/dlm/lowcomms.h
··· 46 46 int dlm_lowcomms_connect_node(int nodeid); 47 47 int dlm_lowcomms_nodes_set_mark(int nodeid, unsigned int mark); 48 48 int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len); 49 + void dlm_midcomms_receive_done(int nodeid); 49 50 50 51 #endif /* __LOWCOMMS_DOT_H__ */ 51 52
+3 -1
fs/dlm/member.c
··· 443 443 444 444 list_for_each_entry(memb, &ls->ls_nodes, list) { 445 445 error = dlm_recovery_stopped(ls); 446 - if (error) 446 + if (error) { 447 + error = -EINTR; 447 448 break; 449 + } 448 450 error = dlm_rcom_status(ls, memb->nodeid, 0); 449 451 if (error) 450 452 break;
+48 -8
fs/dlm/midcomms.c
··· 109 109 * compatibility. There exists better ways to make a better handling. 110 110 * However this should be changed in the next major version bump of dlm. 111 111 * 112 - * Ack handling: 113 - * 114 - * Currently we send an ack message for every dlm message. However we 115 - * can ack multiple dlm messages with one ack by just delaying the ack 116 - * message. Will reduce some traffic but makes the drop detection slower. 117 - * 118 112 * Tail Size checking: 119 113 * 120 114 * There exists a message tail payload in e.g. DLM_MSG however we don't ··· 163 169 #define DLM_NODE_FLAG_CLOSE 1 164 170 #define DLM_NODE_FLAG_STOP_TX 2 165 171 #define DLM_NODE_FLAG_STOP_RX 3 172 + #define DLM_NODE_ULP_DELIVERED 4 166 173 unsigned long flags; 167 174 wait_queue_head_t shutdown_wait; 168 175 ··· 475 480 { 476 481 if (seq == node->seq_next) { 477 482 node->seq_next++; 478 - /* send ack before fin */ 479 - dlm_send_ack(node->nodeid, node->seq_next); 480 483 481 484 switch (p->header.h_cmd) { 482 485 case DLM_FIN: 486 + /* send ack before fin */ 487 + dlm_send_ack(node->nodeid, node->seq_next); 488 + 483 489 spin_lock(&node->state_lock); 484 490 pr_debug("receive fin msg from node %d with state %s\n", 485 491 node->nodeid, dlm_state_str(node->state)); ··· 530 534 default: 531 535 WARN_ON(test_bit(DLM_NODE_FLAG_STOP_RX, &node->flags)); 532 536 dlm_receive_buffer(p, node->nodeid); 537 + set_bit(DLM_NODE_ULP_DELIVERED, &node->flags); 533 538 break; 534 539 } 535 540 } else { ··· 928 931 } 929 932 930 933 return ret; 934 + } 935 + 936 + void dlm_midcomms_receive_done(int nodeid) 937 + { 938 + struct midcomms_node *node; 939 + int idx; 940 + 941 + idx = srcu_read_lock(&nodes_srcu); 942 + node = nodeid2node(nodeid, 0); 943 + if (!node) { 944 + srcu_read_unlock(&nodes_srcu, idx); 945 + return; 946 + } 947 + 948 + /* old protocol, we do nothing */ 949 + switch (node->version) { 950 + case DLM_VERSION_3_2: 951 + break; 952 + default: 953 + srcu_read_unlock(&nodes_srcu, idx); 954 + return; 955 + } 956 + 957 + /* do nothing if we didn't delivered stateful to ulp */ 958 + if (!test_and_clear_bit(DLM_NODE_ULP_DELIVERED, 959 + &node->flags)) { 960 + srcu_read_unlock(&nodes_srcu, idx); 961 + return; 962 + } 963 + 964 + spin_lock(&node->state_lock); 965 + /* we only ack if state is ESTABLISHED */ 966 + switch (node->state) { 967 + case DLM_ESTABLISHED: 968 + spin_unlock(&node->state_lock); 969 + dlm_send_ack(node->nodeid, node->seq_next); 970 + break; 971 + default: 972 + spin_unlock(&node->state_lock); 973 + /* do nothing FIN has it's own ack send */ 974 + break; 975 + }; 976 + srcu_read_unlock(&nodes_srcu, idx); 931 977 } 932 978 933 979 void dlm_midcomms_unack_msg_resend(int nodeid)
+11 -18
fs/dlm/rcom.c
··· 89 89 return 0; 90 90 } 91 91 92 - static void _send_rcom(struct dlm_ls *ls, struct dlm_rcom *rc) 92 + static void send_rcom(struct dlm_mhandle *mh, struct dlm_rcom *rc) 93 93 { 94 94 dlm_rcom_out(rc); 95 - } 96 - 97 - static void send_rcom(struct dlm_ls *ls, struct dlm_mhandle *mh, 98 - struct dlm_rcom *rc) 99 - { 100 - _send_rcom(ls, rc); 101 95 dlm_midcomms_commit_mhandle(mh); 102 96 } 103 97 104 - static void send_rcom_stateless(struct dlm_ls *ls, struct dlm_msg *msg, 105 - struct dlm_rcom *rc) 98 + static void send_rcom_stateless(struct dlm_msg *msg, struct dlm_rcom *rc) 106 99 { 107 - _send_rcom(ls, rc); 100 + dlm_rcom_out(rc); 108 101 dlm_lowcomms_commit_msg(msg); 109 102 dlm_lowcomms_put_msg(msg); 110 103 } ··· 197 204 allow_sync_reply(ls, &rc->rc_id); 198 205 memset(ls->ls_recover_buf, 0, DLM_MAX_SOCKET_BUFSIZE); 199 206 200 - send_rcom_stateless(ls, msg, rc); 207 + send_rcom_stateless(msg, rc); 201 208 202 209 error = dlm_wait_function(ls, &rcom_response); 203 210 disallow_sync_reply(ls); ··· 280 287 spin_unlock(&ls->ls_recover_lock); 281 288 282 289 do_send: 283 - send_rcom_stateless(ls, msg, rc); 290 + send_rcom_stateless(msg, rc); 284 291 } 285 292 286 293 static void receive_sync_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) ··· 320 327 allow_sync_reply(ls, &rc->rc_id); 321 328 memset(ls->ls_recover_buf, 0, DLM_MAX_SOCKET_BUFSIZE); 322 329 323 - send_rcom_stateless(ls, msg, rc); 330 + send_rcom_stateless(msg, rc); 324 331 325 332 error = dlm_wait_function(ls, &rcom_response); 326 333 disallow_sync_reply(ls); ··· 349 356 350 357 dlm_copy_master_names(ls, rc_in->rc_buf, inlen, rc->rc_buf, outlen, 351 358 nodeid); 352 - send_rcom_stateless(ls, msg, rc); 359 + send_rcom_stateless(msg, rc); 353 360 } 354 361 355 362 int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid) ··· 366 373 memcpy(rc->rc_buf, r->res_name, r->res_length); 367 374 rc->rc_id = (unsigned long) r->res_id; 368 375 369 - send_rcom(ls, mh, rc); 376 + send_rcom(mh, rc); 370 377 out: 371 378 return error; 372 379 } ··· 397 404 rc->rc_id = rc_in->rc_id; 398 405 rc->rc_seq_reply = rc_in->rc_seq; 399 406 400 - send_rcom(ls, mh, rc); 407 + send_rcom(mh, rc); 401 408 } 402 409 403 410 static void receive_rcom_lookup_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) ··· 454 461 pack_rcom_lock(r, lkb, rl); 455 462 rc->rc_id = (unsigned long) r; 456 463 457 - send_rcom(ls, mh, rc); 464 + send_rcom(mh, rc); 458 465 out: 459 466 return error; 460 467 } ··· 480 487 rc->rc_id = rc_in->rc_id; 481 488 rc->rc_seq_reply = rc_in->rc_seq; 482 489 483 - send_rcom(ls, mh, rc); 490 + send_rcom(mh, rc); 484 491 } 485 492 486 493 /* If the lockspace doesn't exist then still send a status message
+3 -1
fs/dlm/recoverd.c
··· 125 125 dlm_recover_waiters_pre(ls); 126 126 127 127 error = dlm_recovery_stopped(ls); 128 - if (error) 128 + if (error) { 129 + error = -EINTR; 129 130 goto fail; 131 + } 130 132 131 133 if (neg || dlm_no_directory(ls)) { 132 134 /*