Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'v7.1-rc-part1-smbdirect-fixes' of git://git.samba.org/ksmbd

Pull smbdirect updates from Steve French:
"Move smbdirect server and client code to common directory:

- temporary use of smbdirect_all_c_files.c to allow micro steps

- factor out common functions into a smbdirect.ko.

- convert cifs.ko to use smbdirect.ko

- convert ksmbd.ko to use smbdirect.ko

- let smbdirect.ko use global workqueues

- move ib_client logic from ksmbd.ko into smbdirect.ko

- remove smbdirect_all_c_files.c hack again

- some locking and teardown related fixes on top"

* tag 'v7.1-rc-part1-smbdirect-fixes' of git://git.samba.org/ksmbd: (145 commits)
smb: smbdirect: let smbdirect_connection_deregister_mr_io unlock while waiting
smb: smbdirect: fix the logic in smbdirect_socket_destroy_sync() without an error
smb: smbdirect: fix copyright header of smbdirect.h
smb: smbdirect: change smbdirect_socket_parameters.{initiator_depth,responder_resources} to __u16
smb: smbdirect: remove unused SMBDIRECT_USE_INLINE_C_FILES logic
smb: server: no longer use smbdirect_socket_set_custom_workqueue()
smb: client: no longer use smbdirect_socket_set_custom_workqueue()
smb: smbdirect: introduce global workqueues
smb: smbdirect: prepare use of dedicated workqueues for different steps
smb: smbdirect: remove unused smbdirect_connection_mr_io_recovery_work()
smb: smbdirect: wrap rdma_disconnect() in rdma_[un]lock_handler()
smb: server: make use of smbdirect_netdev_rdma_capable_mode_type()
smb: smbdirect: introduce smbdirect_netdev_rdma_capable_mode_type()
smb: server: make use of smbdirect.ko
smb: server: remove unused ksmbd_transport_ops.prepare()
smb: server: make use of smbdirect_socket_{listen,accept}()
smb: server: only use public smbdirect functions
smb: server: make use of smbdirect_socket_create_accepting()/smbdirect_socket_release()
smb: server: make use of smbdirect_{socket_init_accepting,connection_wait_for_connected}()
smb: server: make use of smbdirect_connection_send_iter() and related functions
...

+7134 -5838
+1
fs/smb/Kconfig
··· 4 4 5 5 source "fs/smb/client/Kconfig" 6 6 source "fs/smb/server/Kconfig" 7 + source "fs/smb/common/smbdirect/Kconfig" 7 8 8 9 config SMBFS 9 10 tristate
+3 -1
fs/smb/client/Kconfig
··· 180 180 181 181 config CIFS_SMB_DIRECT 182 182 bool "SMB Direct support" 183 - depends on CIFS=m && INFINIBAND && INFINIBAND_ADDR_TRANS || CIFS=y && INFINIBAND=y && INFINIBAND_ADDR_TRANS=y 183 + depends on CIFS && INFINIBAND && INFINIBAND_ADDR_TRANS 184 + depends on CIFS=m || INFINIBAND=y 185 + select SMB_COMMON_SMBDIRECT 184 186 help 185 187 Enables SMB Direct support for SMB 3.0, 3.02 and 3.1.1. 186 188 SMB Direct allows transferring SMB packets over RDMA. If unsure,
+1 -66
fs/smb/client/cifs_debug.c
··· 23 23 #endif 24 24 #ifdef CONFIG_CIFS_SMB_DIRECT 25 25 #include "smbdirect.h" 26 - #include "../common/smbdirect/smbdirect_pdu.h" 27 26 #endif 28 27 #include "cifs_swn.h" 29 28 #include "cached_dir.h" ··· 451 452 c = 0; 452 453 spin_lock(&cifs_tcp_ses_lock); 453 454 list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { 454 - #ifdef CONFIG_CIFS_SMB_DIRECT 455 - struct smbdirect_socket *sc; 456 - struct smbdirect_socket_parameters *sp; 457 - #endif 458 - 459 455 /* channel info will be printed as a part of sessions below */ 460 456 if (SERVER_IS_CHAN(server)) 461 457 continue; ··· 465 471 seq_printf(m, "\nClientGUID: %pUL", server->client_guid); 466 472 spin_unlock(&server->srv_lock); 467 473 #ifdef CONFIG_CIFS_SMB_DIRECT 468 - if (!server->rdma) 469 - goto skip_rdma; 470 - 471 - if (!server->smbd_conn) { 472 - seq_printf(m, "\nSMBDirect transport not available"); 473 - goto skip_rdma; 474 - } 475 - sc = &server->smbd_conn->socket; 476 - sp = &sc->parameters; 477 - 478 - seq_printf(m, "\nSMBDirect protocol version: 0x%x " 479 - "transport status: %s (%u)", 480 - SMBDIRECT_V1, 481 - smbdirect_socket_status_string(sc->status), 482 - sc->status); 483 - seq_printf(m, "\nConn receive_credit_max: %u " 484 - "send_credit_target: %u max_send_size: %u", 485 - sp->recv_credit_max, 486 - sp->send_credit_target, 487 - sp->max_send_size); 488 - seq_printf(m, "\nConn max_fragmented_recv_size: %u " 489 - "max_fragmented_send_size: %u max_receive_size:%u", 490 - sp->max_fragmented_recv_size, 491 - sp->max_fragmented_send_size, 492 - sp->max_recv_size); 493 - seq_printf(m, "\nConn keep_alive_interval: %u " 494 - "max_readwrite_size: %u rdma_readwrite_threshold: %u", 495 - sp->keepalive_interval_msec * 1000, 496 - sp->max_read_write_size, 497 - server->rdma_readwrite_threshold); 498 - seq_printf(m, "\nDebug count_get_receive_buffer: %llu " 499 - "count_put_receive_buffer: %llu count_send_empty: %llu", 500 - sc->statistics.get_receive_buffer, 501 - sc->statistics.put_receive_buffer, 502 - sc->statistics.send_empty); 503 - seq_printf(m, "\nRead Queue " 504 - "count_enqueue_reassembly_queue: %llu " 505 - "count_dequeue_reassembly_queue: %llu " 506 - "reassembly_data_length: %u " 507 - "reassembly_queue_length: %u", 508 - sc->statistics.enqueue_reassembly_queue, 509 - sc->statistics.dequeue_reassembly_queue, 510 - sc->recv_io.reassembly.data_length, 511 - sc->recv_io.reassembly.queue_length); 512 - seq_printf(m, "\nCurrent Credits send_credits: %u " 513 - "receive_credits: %u receive_credit_target: %u", 514 - atomic_read(&sc->send_io.credits.count), 515 - atomic_read(&sc->recv_io.credits.count), 516 - sc->recv_io.credits.target); 517 - seq_printf(m, "\nPending send_pending: %u ", 518 - atomic_read(&sc->send_io.pending.count)); 519 - seq_printf(m, "\nMR responder_resources: %u " 520 - "max_frmr_depth: %u mr_type: 0x%x", 521 - sp->responder_resources, 522 - sp->max_frmr_depth, 523 - sc->mr_io.type); 524 - seq_printf(m, "\nMR mr_ready_count: %u mr_used_count: %u", 525 - atomic_read(&sc->mr_io.ready.count), 526 - atomic_read(&sc->mr_io.used.count)); 527 - skip_rdma: 474 + smbd_debug_proc_show(server, m); 528 475 #endif 529 476 seq_printf(m, "\nNumber of credits: %d,%d,%d Dialect 0x%x", 530 477 server->credits,
+2 -7
fs/smb/client/smb2pdu.c
··· 36 36 #include "../common/smb2status.h" 37 37 #include "smb2glob.h" 38 38 #include "cifs_spnego.h" 39 - #include "../common/smbdirect/smbdirect.h" 40 39 #include "smbdirect.h" 41 40 #include "trace.h" 42 41 #ifdef CONFIG_CIFS_DFS_UPCALL ··· 4553 4554 req->ReadChannelInfoLength = 4554 4555 cpu_to_le16(sizeof(struct smbdirect_buffer_descriptor_v1)); 4555 4556 v1 = (struct smbdirect_buffer_descriptor_v1 *) &req->Buffer[0]; 4556 - v1->offset = cpu_to_le64(rdata->mr->mr->iova); 4557 - v1->token = cpu_to_le32(rdata->mr->mr->rkey); 4558 - v1->length = cpu_to_le32(rdata->mr->mr->length); 4557 + smbd_mr_fill_buffer_descriptor(rdata->mr, v1); 4559 4558 4560 4559 *total_len += sizeof(*v1) - 1; 4561 4560 } ··· 5152 5155 req->WriteChannelInfoLength = 5153 5156 cpu_to_le16(sizeof(struct smbdirect_buffer_descriptor_v1)); 5154 5157 v1 = (struct smbdirect_buffer_descriptor_v1 *) &req->Buffer[0]; 5155 - v1->offset = cpu_to_le64(wdata->mr->mr->iova); 5156 - v1->token = cpu_to_le32(wdata->mr->mr->rkey); 5157 - v1->length = cpu_to_le32(wdata->mr->mr->length); 5158 + smbd_mr_fill_buffer_descriptor(wdata->mr, v1); 5158 5159 5159 5160 rqst.rq_iov[0].iov_len += sizeof(*v1); 5160 5161
+166 -2958
fs/smb/client/smbdirect.c
··· 4 4 * 5 5 * Author(s): Long Li <longli@microsoft.com> 6 6 */ 7 - #include <linux/module.h> 8 - #include <linux/highmem.h> 9 - #include <linux/folio_queue.h> 10 - #define __SMBDIRECT_SOCKET_DISCONNECT(__sc) smbd_disconnect_rdma_connection(__sc) 11 - #include "../common/smbdirect/smbdirect_pdu.h" 7 + 12 8 #include "smbdirect.h" 13 9 #include "cifs_debug.h" 14 10 #include "cifsproto.h" 15 11 #include "smb2proto.h" 16 - 17 - const struct smbdirect_socket_parameters *smbd_get_parameters(struct smbd_connection *conn) 18 - { 19 - struct smbdirect_socket *sc = &conn->socket; 20 - 21 - return &sc->parameters; 22 - } 23 - 24 - static struct smbdirect_recv_io *get_receive_buffer( 25 - struct smbdirect_socket *sc); 26 - static void put_receive_buffer( 27 - struct smbdirect_socket *sc, 28 - struct smbdirect_recv_io *response); 29 - static int allocate_receive_buffers(struct smbdirect_socket *sc, int num_buf); 30 - static void destroy_receive_buffers(struct smbdirect_socket *sc); 31 - 32 - static void enqueue_reassembly( 33 - struct smbdirect_socket *sc, 34 - struct smbdirect_recv_io *response, int data_length); 35 - static struct smbdirect_recv_io *_get_first_reassembly( 36 - struct smbdirect_socket *sc); 37 - 38 - static int smbd_post_send(struct smbdirect_socket *sc, 39 - struct smbdirect_send_batch *batch, 40 - struct smbdirect_send_io *request); 41 - 42 - static int smbd_post_recv( 43 - struct smbdirect_socket *sc, 44 - struct smbdirect_recv_io *response); 45 - 46 - static int smbd_post_send_empty(struct smbdirect_socket *sc); 47 - 48 - static void destroy_mr_list(struct smbdirect_socket *sc); 49 - static int allocate_mr_list(struct smbdirect_socket *sc); 50 - 51 - struct smb_extract_to_rdma { 52 - struct ib_sge *sge; 53 - unsigned int nr_sge; 54 - unsigned int max_sge; 55 - struct ib_device *device; 56 - u32 local_dma_lkey; 57 - enum dma_data_direction direction; 58 - }; 59 - static ssize_t smb_extract_iter_to_rdma(struct iov_iter *iter, size_t len, 60 - struct smb_extract_to_rdma *rdma); 12 + #include "../common/smbdirect/smbdirect_public.h" 61 13 62 14 /* Port numbers for SMBD transport */ 63 15 #define SMB_PORT 445 ··· 24 72 /* The timeout to wait for a keepalive message from peer in seconds */ 25 73 #define KEEPALIVE_RECV_TIMEOUT 5 26 74 27 - /* SMBD minimum receive size and fragmented sized defined in [MS-SMBD] */ 28 - #define SMBD_MIN_RECEIVE_SIZE 128 29 - #define SMBD_MIN_FRAGMENTED_SIZE 131072 30 - 31 75 /* 32 76 * Default maximum number of RDMA read/write outstanding on this connection 33 77 * This value is possibly decreased during QP creation on hardware limit 34 78 */ 35 79 #define SMBD_CM_RESPONDER_RESOURCES 32 36 - 37 - /* Maximum number of retries on data transfer operations */ 38 - #define SMBD_CM_RETRY 6 39 - /* No need to retry on Receiver Not Ready since SMBD manages credits */ 40 - #define SMBD_CM_RNR_RETRY 0 41 80 42 81 /* 43 82 * User configurable initial values per SMBD transport connection ··· 105 162 MODULE_PARM_DESC(smbd_logging_level, 106 163 "Logging level for SMBD transport, 0 (default): error, 1: info"); 107 164 165 + static bool smbd_logging_needed(struct smbdirect_socket *sc, 166 + void *private_ptr, 167 + unsigned int lvl, 168 + unsigned int cls) 169 + { 170 + #define BUILD_BUG_SAME(x) BUILD_BUG_ON(x != SMBDIRECT_LOG_ ##x) 171 + BUILD_BUG_SAME(ERR); 172 + BUILD_BUG_SAME(INFO); 173 + #undef BUILD_BUG_SAME 174 + #define BUILD_BUG_SAME(x) BUILD_BUG_ON(x != SMBDIRECT_ ##x) 175 + BUILD_BUG_SAME(LOG_OUTGOING); 176 + BUILD_BUG_SAME(LOG_INCOMING); 177 + BUILD_BUG_SAME(LOG_READ); 178 + BUILD_BUG_SAME(LOG_WRITE); 179 + BUILD_BUG_SAME(LOG_RDMA_SEND); 180 + BUILD_BUG_SAME(LOG_RDMA_RECV); 181 + BUILD_BUG_SAME(LOG_KEEP_ALIVE); 182 + BUILD_BUG_SAME(LOG_RDMA_EVENT); 183 + BUILD_BUG_SAME(LOG_RDMA_MR); 184 + #undef BUILD_BUG_SAME 185 + 186 + if (lvl <= smbd_logging_level || cls & smbd_logging_class) 187 + return true; 188 + return false; 189 + } 190 + 191 + static void smbd_logging_vaprintf(struct smbdirect_socket *sc, 192 + const char *func, 193 + unsigned int line, 194 + void *private_ptr, 195 + unsigned int lvl, 196 + unsigned int cls, 197 + struct va_format *vaf) 198 + { 199 + cifs_dbg(VFS, "%s:%u %pV", func, line, vaf); 200 + } 201 + 108 202 #define log_rdma(level, class, fmt, args...) \ 109 203 do { \ 110 204 if (level <= smbd_logging_level || class & smbd_logging_class) \ ··· 165 185 #define log_rdma_mr(level, fmt, args...) \ 166 186 log_rdma(level, LOG_RDMA_MR, fmt, ##args) 167 187 168 - static void smbd_disconnect_wake_up_all(struct smbdirect_socket *sc) 169 - { 170 - /* 171 - * Wake up all waiters in all wait queues 172 - * in order to notice the broken connection. 173 - */ 174 - wake_up_all(&sc->status_wait); 175 - wake_up_all(&sc->send_io.lcredits.wait_queue); 176 - wake_up_all(&sc->send_io.credits.wait_queue); 177 - wake_up_all(&sc->send_io.pending.dec_wait_queue); 178 - wake_up_all(&sc->send_io.pending.zero_wait_queue); 179 - wake_up_all(&sc->recv_io.reassembly.wait_queue); 180 - wake_up_all(&sc->mr_io.ready.wait_queue); 181 - wake_up_all(&sc->mr_io.cleanup.wait_queue); 182 - } 183 - 184 - static void smbd_disconnect_rdma_work(struct work_struct *work) 185 - { 186 - struct smbdirect_socket *sc = 187 - container_of(work, struct smbdirect_socket, disconnect_work); 188 - 189 - if (sc->first_error == 0) 190 - sc->first_error = -ECONNABORTED; 191 - 192 - /* 193 - * make sure this and other work is not queued again 194 - * but here we don't block and avoid 195 - * disable[_delayed]_work_sync() 196 - */ 197 - disable_work(&sc->disconnect_work); 198 - disable_work(&sc->recv_io.posted.refill_work); 199 - disable_work(&sc->mr_io.recovery_work); 200 - disable_work(&sc->idle.immediate_work); 201 - disable_delayed_work(&sc->idle.timer_work); 202 - 203 - switch (sc->status) { 204 - case SMBDIRECT_SOCKET_NEGOTIATE_NEEDED: 205 - case SMBDIRECT_SOCKET_NEGOTIATE_RUNNING: 206 - case SMBDIRECT_SOCKET_NEGOTIATE_FAILED: 207 - case SMBDIRECT_SOCKET_CONNECTED: 208 - case SMBDIRECT_SOCKET_ERROR: 209 - sc->status = SMBDIRECT_SOCKET_DISCONNECTING; 210 - rdma_disconnect(sc->rdma.cm_id); 211 - break; 212 - 213 - case SMBDIRECT_SOCKET_CREATED: 214 - case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED: 215 - case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING: 216 - case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED: 217 - case SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED: 218 - case SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING: 219 - case SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED: 220 - case SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED: 221 - case SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING: 222 - case SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED: 223 - /* 224 - * rdma_connect() never reached 225 - * RDMA_CM_EVENT_ESTABLISHED 226 - */ 227 - sc->status = SMBDIRECT_SOCKET_DISCONNECTED; 228 - break; 229 - 230 - case SMBDIRECT_SOCKET_DISCONNECTING: 231 - case SMBDIRECT_SOCKET_DISCONNECTED: 232 - case SMBDIRECT_SOCKET_DESTROYED: 233 - break; 234 - } 235 - 236 - /* 237 - * Wake up all waiters in all wait queues 238 - * in order to notice the broken connection. 239 - */ 240 - smbd_disconnect_wake_up_all(sc); 241 - } 242 - 243 - static void smbd_disconnect_rdma_connection(struct smbdirect_socket *sc) 244 - { 245 - if (sc->first_error == 0) 246 - sc->first_error = -ECONNABORTED; 247 - 248 - /* 249 - * make sure other work (than disconnect_work) is 250 - * not queued again but here we don't block and avoid 251 - * disable[_delayed]_work_sync() 252 - */ 253 - disable_work(&sc->recv_io.posted.refill_work); 254 - disable_work(&sc->mr_io.recovery_work); 255 - disable_work(&sc->idle.immediate_work); 256 - disable_delayed_work(&sc->idle.timer_work); 257 - 258 - switch (sc->status) { 259 - case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED: 260 - case SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED: 261 - case SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED: 262 - case SMBDIRECT_SOCKET_NEGOTIATE_FAILED: 263 - case SMBDIRECT_SOCKET_ERROR: 264 - case SMBDIRECT_SOCKET_DISCONNECTING: 265 - case SMBDIRECT_SOCKET_DISCONNECTED: 266 - case SMBDIRECT_SOCKET_DESTROYED: 267 - /* 268 - * Keep the current error status 269 - */ 270 - break; 271 - 272 - case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED: 273 - case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING: 274 - sc->status = SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED; 275 - break; 276 - 277 - case SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED: 278 - case SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING: 279 - sc->status = SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED; 280 - break; 281 - 282 - case SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED: 283 - case SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING: 284 - sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED; 285 - break; 286 - 287 - case SMBDIRECT_SOCKET_NEGOTIATE_NEEDED: 288 - case SMBDIRECT_SOCKET_NEGOTIATE_RUNNING: 289 - sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED; 290 - break; 291 - 292 - case SMBDIRECT_SOCKET_CREATED: 293 - sc->status = SMBDIRECT_SOCKET_DISCONNECTED; 294 - break; 295 - 296 - case SMBDIRECT_SOCKET_CONNECTED: 297 - sc->status = SMBDIRECT_SOCKET_ERROR; 298 - break; 299 - } 300 - 301 - /* 302 - * Wake up all waiters in all wait queues 303 - * in order to notice the broken connection. 304 - */ 305 - smbd_disconnect_wake_up_all(sc); 306 - 307 - queue_work(sc->workqueue, &sc->disconnect_work); 308 - } 309 - 310 - /* Upcall from RDMA CM */ 311 - static int smbd_conn_upcall( 312 - struct rdma_cm_id *id, struct rdma_cm_event *event) 313 - { 314 - struct smbdirect_socket *sc = id->context; 315 - struct smbdirect_socket_parameters *sp = &sc->parameters; 316 - const char *event_name = rdma_event_msg(event->event); 317 - u8 peer_initiator_depth; 318 - u8 peer_responder_resources; 319 - 320 - log_rdma_event(INFO, "event=%s status=%d\n", 321 - event_name, event->status); 322 - 323 - switch (event->event) { 324 - case RDMA_CM_EVENT_ADDR_RESOLVED: 325 - if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING)) 326 - break; 327 - sc->status = SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED; 328 - wake_up(&sc->status_wait); 329 - break; 330 - 331 - case RDMA_CM_EVENT_ROUTE_RESOLVED: 332 - if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING)) 333 - break; 334 - sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED; 335 - wake_up(&sc->status_wait); 336 - break; 337 - 338 - case RDMA_CM_EVENT_ADDR_ERROR: 339 - log_rdma_event(ERR, "connecting failed event=%s\n", event_name); 340 - sc->status = SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED; 341 - smbd_disconnect_rdma_work(&sc->disconnect_work); 342 - break; 343 - 344 - case RDMA_CM_EVENT_ROUTE_ERROR: 345 - log_rdma_event(ERR, "connecting failed event=%s\n", event_name); 346 - sc->status = SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED; 347 - smbd_disconnect_rdma_work(&sc->disconnect_work); 348 - break; 349 - 350 - case RDMA_CM_EVENT_ESTABLISHED: 351 - log_rdma_event(INFO, "connected event=%s\n", event_name); 352 - 353 - /* 354 - * Here we work around an inconsistency between 355 - * iWarp and other devices (at least rxe and irdma using RoCEv2) 356 - */ 357 - if (rdma_protocol_iwarp(id->device, id->port_num)) { 358 - /* 359 - * iWarp devices report the peer's values 360 - * with the perspective of the peer here. 361 - * Tested with siw and irdma (in iwarp mode) 362 - * We need to change to our perspective here, 363 - * so we need to switch the values. 364 - */ 365 - peer_initiator_depth = event->param.conn.responder_resources; 366 - peer_responder_resources = event->param.conn.initiator_depth; 367 - } else { 368 - /* 369 - * Non iWarp devices report the peer's values 370 - * already changed to our perspective here. 371 - * Tested with rxe and irdma (in roce mode). 372 - */ 373 - peer_initiator_depth = event->param.conn.initiator_depth; 374 - peer_responder_resources = event->param.conn.responder_resources; 375 - } 376 - if (rdma_protocol_iwarp(id->device, id->port_num) && 377 - event->param.conn.private_data_len == 8) { 378 - /* 379 - * Legacy clients with only iWarp MPA v1 support 380 - * need a private blob in order to negotiate 381 - * the IRD/ORD values. 382 - */ 383 - const __be32 *ird_ord_hdr = event->param.conn.private_data; 384 - u32 ird32 = be32_to_cpu(ird_ord_hdr[0]); 385 - u32 ord32 = be32_to_cpu(ird_ord_hdr[1]); 386 - 387 - /* 388 - * cifs.ko sends the legacy IRD/ORD negotiation 389 - * event if iWarp MPA v2 was used. 390 - * 391 - * Here we check that the values match and only 392 - * mark the client as legacy if they don't match. 393 - */ 394 - if ((u32)event->param.conn.initiator_depth != ird32 || 395 - (u32)event->param.conn.responder_resources != ord32) { 396 - /* 397 - * There are broken clients (old cifs.ko) 398 - * using little endian and also 399 - * struct rdma_conn_param only uses u8 400 - * for initiator_depth and responder_resources, 401 - * so we truncate the value to U8_MAX. 402 - * 403 - * smb_direct_accept_client() will then 404 - * do the real negotiation in order to 405 - * select the minimum between client and 406 - * server. 407 - */ 408 - ird32 = min_t(u32, ird32, U8_MAX); 409 - ord32 = min_t(u32, ord32, U8_MAX); 410 - 411 - sc->rdma.legacy_iwarp = true; 412 - peer_initiator_depth = (u8)ird32; 413 - peer_responder_resources = (u8)ord32; 414 - } 415 - } 416 - 417 - /* 418 - * negotiate the value by using the minimum 419 - * between client and server if the client provided 420 - * non 0 values. 421 - */ 422 - if (peer_initiator_depth != 0) 423 - sp->initiator_depth = 424 - min_t(u8, sp->initiator_depth, 425 - peer_initiator_depth); 426 - if (peer_responder_resources != 0) 427 - sp->responder_resources = 428 - min_t(u8, sp->responder_resources, 429 - peer_responder_resources); 430 - 431 - if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING)) 432 - break; 433 - sc->status = SMBDIRECT_SOCKET_NEGOTIATE_NEEDED; 434 - wake_up(&sc->status_wait); 435 - break; 436 - 437 - case RDMA_CM_EVENT_CONNECT_ERROR: 438 - case RDMA_CM_EVENT_UNREACHABLE: 439 - case RDMA_CM_EVENT_REJECTED: 440 - log_rdma_event(ERR, "connecting failed event=%s\n", event_name); 441 - sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED; 442 - smbd_disconnect_rdma_work(&sc->disconnect_work); 443 - break; 444 - 445 - case RDMA_CM_EVENT_DEVICE_REMOVAL: 446 - case RDMA_CM_EVENT_DISCONNECTED: 447 - /* This happens when we fail the negotiation */ 448 - if (sc->status == SMBDIRECT_SOCKET_NEGOTIATE_FAILED) { 449 - log_rdma_event(ERR, "event=%s during negotiation\n", event_name); 450 - } 451 - 452 - sc->status = SMBDIRECT_SOCKET_DISCONNECTED; 453 - smbd_disconnect_rdma_work(&sc->disconnect_work); 454 - break; 455 - 456 - default: 457 - log_rdma_event(ERR, "unexpected event=%s status=%d\n", 458 - event_name, event->status); 459 - break; 460 - } 461 - 462 - return 0; 463 - } 464 - 465 - /* Upcall from RDMA QP */ 466 - static void 467 - smbd_qp_async_error_upcall(struct ib_event *event, void *context) 468 - { 469 - struct smbdirect_socket *sc = context; 470 - 471 - log_rdma_event(ERR, "%s on device %s socket %p\n", 472 - ib_event_msg(event->event), event->device->name, sc); 473 - 474 - switch (event->event) { 475 - case IB_EVENT_CQ_ERR: 476 - case IB_EVENT_QP_FATAL: 477 - smbd_disconnect_rdma_connection(sc); 478 - break; 479 - 480 - default: 481 - break; 482 - } 483 - } 484 - 485 - static inline void *smbdirect_send_io_payload(struct smbdirect_send_io *request) 486 - { 487 - return (void *)request->packet; 488 - } 489 - 490 - static inline void *smbdirect_recv_io_payload(struct smbdirect_recv_io *response) 491 - { 492 - return (void *)response->packet; 493 - } 494 - 495 - static struct smbdirect_send_io *smbd_alloc_send_io(struct smbdirect_socket *sc) 496 - { 497 - struct smbdirect_send_io *msg; 498 - 499 - msg = mempool_alloc(sc->send_io.mem.pool, GFP_KERNEL); 500 - if (!msg) 501 - return ERR_PTR(-ENOMEM); 502 - msg->socket = sc; 503 - INIT_LIST_HEAD(&msg->sibling_list); 504 - msg->num_sge = 0; 505 - 506 - return msg; 507 - } 508 - 509 - static void smbd_free_send_io(struct smbdirect_send_io *msg) 510 - { 511 - struct smbdirect_socket *sc = msg->socket; 512 - size_t i; 513 - 514 - /* 515 - * The list needs to be empty! 516 - * The caller should take care of it. 517 - */ 518 - WARN_ON_ONCE(!list_empty(&msg->sibling_list)); 519 - 520 - /* 521 - * Note we call ib_dma_unmap_page(), even if some sges are mapped using 522 - * ib_dma_map_single(). 523 - * 524 - * The difference between _single() and _page() only matters for the 525 - * ib_dma_map_*() case. 526 - * 527 - * For the ib_dma_unmap_*() case it does not matter as both take the 528 - * dma_addr_t and dma_unmap_single_attrs() is just an alias to 529 - * dma_unmap_page_attrs(). 530 - */ 531 - for (i = 0; i < msg->num_sge; i++) 532 - ib_dma_unmap_page(sc->ib.dev, 533 - msg->sge[i].addr, 534 - msg->sge[i].length, 535 - DMA_TO_DEVICE); 536 - 537 - mempool_free(msg, sc->send_io.mem.pool); 538 - } 539 - 540 - /* Called when a RDMA send is done */ 541 - static void send_done(struct ib_cq *cq, struct ib_wc *wc) 542 - { 543 - struct smbdirect_send_io *request = 544 - container_of(wc->wr_cqe, struct smbdirect_send_io, cqe); 545 - struct smbdirect_socket *sc = request->socket; 546 - struct smbdirect_send_io *sibling, *next; 547 - int lcredits = 0; 548 - 549 - log_rdma_send(INFO, "smbdirect_send_io 0x%p completed wc->status=%s\n", 550 - request, ib_wc_status_msg(wc->status)); 551 - 552 - if (unlikely(!(request->wr.send_flags & IB_SEND_SIGNALED))) { 553 - /* 554 - * This happens when smbdirect_send_io is a sibling 555 - * before the final message, it is signaled on 556 - * error anyway, so we need to skip 557 - * smbdirect_connection_free_send_io here, 558 - * otherwise is will destroy the memory 559 - * of the siblings too, which will cause 560 - * use after free problems for the others 561 - * triggered from ib_drain_qp(). 562 - */ 563 - if (wc->status != IB_WC_SUCCESS) 564 - goto skip_free; 565 - 566 - /* 567 - * This should not happen! 568 - * But we better just close the 569 - * connection... 570 - */ 571 - log_rdma_send(ERR, 572 - "unexpected send completion wc->status=%s (%d) wc->opcode=%d\n", 573 - ib_wc_status_msg(wc->status), wc->status, wc->opcode); 574 - smbd_disconnect_rdma_connection(sc); 575 - return; 576 - } 577 - 578 - /* 579 - * Free possible siblings and then the main send_io 580 - */ 581 - list_for_each_entry_safe(sibling, next, &request->sibling_list, sibling_list) { 582 - list_del_init(&sibling->sibling_list); 583 - smbd_free_send_io(sibling); 584 - lcredits += 1; 585 - } 586 - /* Note this frees wc->wr_cqe, but not wc */ 587 - smbd_free_send_io(request); 588 - lcredits += 1; 589 - 590 - if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) { 591 - skip_free: 592 - if (wc->status != IB_WC_WR_FLUSH_ERR) 593 - log_rdma_send(ERR, "wc->status=%s wc->opcode=%d\n", 594 - ib_wc_status_msg(wc->status), wc->opcode); 595 - smbd_disconnect_rdma_connection(sc); 596 - return; 597 - } 598 - 599 - atomic_add(lcredits, &sc->send_io.lcredits.count); 600 - wake_up(&sc->send_io.lcredits.wait_queue); 601 - 602 - if (atomic_dec_and_test(&sc->send_io.pending.count)) 603 - wake_up(&sc->send_io.pending.zero_wait_queue); 604 - 605 - wake_up(&sc->send_io.pending.dec_wait_queue); 606 - } 607 - 608 - static void dump_smbdirect_negotiate_resp(struct smbdirect_negotiate_resp *resp) 609 - { 610 - log_rdma_event(INFO, "resp message min_version %u max_version %u negotiated_version %u credits_requested %u credits_granted %u status %u max_readwrite_size %u preferred_send_size %u max_receive_size %u max_fragmented_size %u\n", 611 - resp->min_version, resp->max_version, 612 - resp->negotiated_version, resp->credits_requested, 613 - resp->credits_granted, resp->status, 614 - resp->max_readwrite_size, resp->preferred_send_size, 615 - resp->max_receive_size, resp->max_fragmented_size); 616 - } 617 - 618 - /* 619 - * Process a negotiation response message, according to [MS-SMBD]3.1.5.7 620 - * response, packet_length: the negotiation response message 621 - * return value: true if negotiation is a success, false if failed 622 - */ 623 - static bool process_negotiation_response( 624 - struct smbdirect_recv_io *response, int packet_length) 625 - { 626 - struct smbdirect_socket *sc = response->socket; 627 - struct smbdirect_socket_parameters *sp = &sc->parameters; 628 - struct smbdirect_negotiate_resp *packet = smbdirect_recv_io_payload(response); 629 - 630 - if (packet_length < sizeof(struct smbdirect_negotiate_resp)) { 631 - log_rdma_event(ERR, 632 - "error: packet_length=%d\n", packet_length); 633 - return false; 634 - } 635 - 636 - if (le16_to_cpu(packet->negotiated_version) != SMBDIRECT_V1) { 637 - log_rdma_event(ERR, "error: negotiated_version=%x\n", 638 - le16_to_cpu(packet->negotiated_version)); 639 - return false; 640 - } 641 - 642 - if (packet->credits_requested == 0) { 643 - log_rdma_event(ERR, "error: credits_requested==0\n"); 644 - return false; 645 - } 646 - sc->recv_io.credits.target = le16_to_cpu(packet->credits_requested); 647 - sc->recv_io.credits.target = min_t(u16, sc->recv_io.credits.target, sp->recv_credit_max); 648 - 649 - if (packet->credits_granted == 0) { 650 - log_rdma_event(ERR, "error: credits_granted==0\n"); 651 - return false; 652 - } 653 - atomic_set(&sc->send_io.lcredits.count, sp->send_credit_target); 654 - atomic_set(&sc->send_io.credits.count, le16_to_cpu(packet->credits_granted)); 655 - 656 - if (le32_to_cpu(packet->preferred_send_size) > sp->max_recv_size) { 657 - log_rdma_event(ERR, "error: preferred_send_size=%d\n", 658 - le32_to_cpu(packet->preferred_send_size)); 659 - return false; 660 - } 661 - sp->max_recv_size = le32_to_cpu(packet->preferred_send_size); 662 - 663 - if (le32_to_cpu(packet->max_receive_size) < SMBD_MIN_RECEIVE_SIZE) { 664 - log_rdma_event(ERR, "error: max_receive_size=%d\n", 665 - le32_to_cpu(packet->max_receive_size)); 666 - return false; 667 - } 668 - sp->max_send_size = min_t(u32, sp->max_send_size, 669 - le32_to_cpu(packet->max_receive_size)); 670 - 671 - if (le32_to_cpu(packet->max_fragmented_size) < 672 - SMBD_MIN_FRAGMENTED_SIZE) { 673 - log_rdma_event(ERR, "error: max_fragmented_size=%d\n", 674 - le32_to_cpu(packet->max_fragmented_size)); 675 - return false; 676 - } 677 - sp->max_fragmented_send_size = 678 - le32_to_cpu(packet->max_fragmented_size); 679 - 680 - 681 - sp->max_read_write_size = min_t(u32, 682 - le32_to_cpu(packet->max_readwrite_size), 683 - sp->max_frmr_depth * PAGE_SIZE); 684 - sp->max_frmr_depth = sp->max_read_write_size / PAGE_SIZE; 685 - 686 - atomic_set(&sc->send_io.bcredits.count, 1); 687 - sc->recv_io.expected = SMBDIRECT_EXPECT_DATA_TRANSFER; 688 - return true; 689 - } 690 - 691 - static void smbd_post_send_credits(struct work_struct *work) 692 - { 693 - int rc; 694 - struct smbdirect_recv_io *response; 695 - struct smbdirect_socket *sc = 696 - container_of(work, struct smbdirect_socket, recv_io.posted.refill_work); 697 - int posted = 0; 698 - 699 - if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { 700 - return; 701 - } 702 - 703 - if (sc->recv_io.credits.target > 704 - atomic_read(&sc->recv_io.credits.count)) { 705 - while (true) { 706 - response = get_receive_buffer(sc); 707 - if (!response) 708 - break; 709 - 710 - response->first_segment = false; 711 - rc = smbd_post_recv(sc, response); 712 - if (rc) { 713 - log_rdma_recv(ERR, 714 - "post_recv failed rc=%d\n", rc); 715 - put_receive_buffer(sc, response); 716 - break; 717 - } 718 - 719 - atomic_inc(&sc->recv_io.posted.count); 720 - posted += 1; 721 - } 722 - } 723 - 724 - atomic_add(posted, &sc->recv_io.credits.available); 725 - 726 - /* 727 - * If the last send credit is waiting for credits 728 - * it can grant we need to wake it up 729 - */ 730 - if (posted && 731 - atomic_read(&sc->send_io.bcredits.count) == 0 && 732 - atomic_read(&sc->send_io.credits.count) == 0) 733 - wake_up(&sc->send_io.credits.wait_queue); 734 - 735 - /* Promptly send an immediate packet as defined in [MS-SMBD] 3.1.1.1 */ 736 - if (atomic_read(&sc->recv_io.credits.count) < 737 - sc->recv_io.credits.target - 1) { 738 - log_keep_alive(INFO, "schedule send of an empty message\n"); 739 - queue_work(sc->workqueue, &sc->idle.immediate_work); 740 - } 741 - } 742 - 743 - /* Called from softirq, when recv is done */ 744 - static void recv_done(struct ib_cq *cq, struct ib_wc *wc) 745 - { 746 - struct smbdirect_data_transfer *data_transfer; 747 - struct smbdirect_recv_io *response = 748 - container_of(wc->wr_cqe, struct smbdirect_recv_io, cqe); 749 - struct smbdirect_socket *sc = response->socket; 750 - struct smbdirect_socket_parameters *sp = &sc->parameters; 751 - int current_recv_credits; 752 - u16 old_recv_credit_target; 753 - u32 data_offset = 0; 754 - u32 data_length = 0; 755 - u32 remaining_data_length = 0; 756 - bool negotiate_done = false; 757 - 758 - log_rdma_recv(INFO, 759 - "response=0x%p type=%d wc status=%s wc opcode %d byte_len=%d pkey_index=%u\n", 760 - response, sc->recv_io.expected, 761 - ib_wc_status_msg(wc->status), wc->opcode, 762 - wc->byte_len, wc->pkey_index); 763 - 764 - if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) { 765 - if (wc->status != IB_WC_WR_FLUSH_ERR) 766 - log_rdma_recv(ERR, "wc->status=%s opcode=%d\n", 767 - ib_wc_status_msg(wc->status), wc->opcode); 768 - goto error; 769 - } 770 - 771 - ib_dma_sync_single_for_cpu( 772 - wc->qp->device, 773 - response->sge.addr, 774 - response->sge.length, 775 - DMA_FROM_DEVICE); 776 - 777 - /* 778 - * Reset timer to the keepalive interval in 779 - * order to trigger our next keepalive message. 780 - */ 781 - sc->idle.keepalive = SMBDIRECT_KEEPALIVE_NONE; 782 - mod_delayed_work(sc->workqueue, &sc->idle.timer_work, 783 - msecs_to_jiffies(sp->keepalive_interval_msec)); 784 - 785 - switch (sc->recv_io.expected) { 786 - /* SMBD negotiation response */ 787 - case SMBDIRECT_EXPECT_NEGOTIATE_REP: 788 - dump_smbdirect_negotiate_resp(smbdirect_recv_io_payload(response)); 789 - sc->recv_io.reassembly.full_packet_received = true; 790 - negotiate_done = 791 - process_negotiation_response(response, wc->byte_len); 792 - put_receive_buffer(sc, response); 793 - if (SMBDIRECT_CHECK_STATUS_WARN(sc, SMBDIRECT_SOCKET_NEGOTIATE_RUNNING)) 794 - negotiate_done = false; 795 - if (!negotiate_done) { 796 - sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED; 797 - smbd_disconnect_rdma_connection(sc); 798 - } else { 799 - sc->status = SMBDIRECT_SOCKET_CONNECTED; 800 - wake_up(&sc->status_wait); 801 - } 802 - 803 - return; 804 - 805 - /* SMBD data transfer packet */ 806 - case SMBDIRECT_EXPECT_DATA_TRANSFER: 807 - data_transfer = smbdirect_recv_io_payload(response); 808 - 809 - if (wc->byte_len < 810 - offsetof(struct smbdirect_data_transfer, padding)) 811 - goto error; 812 - 813 - remaining_data_length = le32_to_cpu(data_transfer->remaining_data_length); 814 - data_offset = le32_to_cpu(data_transfer->data_offset); 815 - data_length = le32_to_cpu(data_transfer->data_length); 816 - if (wc->byte_len < data_offset || 817 - (u64)wc->byte_len < (u64)data_offset + data_length) 818 - goto error; 819 - 820 - if (remaining_data_length > sp->max_fragmented_recv_size || 821 - data_length > sp->max_fragmented_recv_size || 822 - (u64)remaining_data_length + (u64)data_length > (u64)sp->max_fragmented_recv_size) 823 - goto error; 824 - 825 - if (data_length) { 826 - if (sc->recv_io.reassembly.full_packet_received) 827 - response->first_segment = true; 828 - 829 - if (le32_to_cpu(data_transfer->remaining_data_length)) 830 - sc->recv_io.reassembly.full_packet_received = false; 831 - else 832 - sc->recv_io.reassembly.full_packet_received = true; 833 - } 834 - 835 - atomic_dec(&sc->recv_io.posted.count); 836 - current_recv_credits = atomic_dec_return(&sc->recv_io.credits.count); 837 - 838 - old_recv_credit_target = sc->recv_io.credits.target; 839 - sc->recv_io.credits.target = 840 - le16_to_cpu(data_transfer->credits_requested); 841 - sc->recv_io.credits.target = 842 - min_t(u16, sc->recv_io.credits.target, sp->recv_credit_max); 843 - sc->recv_io.credits.target = 844 - max_t(u16, sc->recv_io.credits.target, 1); 845 - if (le16_to_cpu(data_transfer->credits_granted)) { 846 - atomic_add(le16_to_cpu(data_transfer->credits_granted), 847 - &sc->send_io.credits.count); 848 - /* 849 - * We have new send credits granted from remote peer 850 - * If any sender is waiting for credits, unblock it 851 - */ 852 - wake_up(&sc->send_io.credits.wait_queue); 853 - } 854 - 855 - log_incoming(INFO, "data flags %d data_offset %d data_length %d remaining_data_length %d\n", 856 - le16_to_cpu(data_transfer->flags), 857 - le32_to_cpu(data_transfer->data_offset), 858 - le32_to_cpu(data_transfer->data_length), 859 - le32_to_cpu(data_transfer->remaining_data_length)); 860 - 861 - /* Send an immediate response right away if requested */ 862 - if (le16_to_cpu(data_transfer->flags) & 863 - SMBDIRECT_FLAG_RESPONSE_REQUESTED) { 864 - log_keep_alive(INFO, "schedule send of immediate response\n"); 865 - queue_work(sc->workqueue, &sc->idle.immediate_work); 866 - } 867 - 868 - /* 869 - * If this is a packet with data playload place the data in 870 - * reassembly queue and wake up the reading thread 871 - */ 872 - if (data_length) { 873 - if (current_recv_credits <= (sc->recv_io.credits.target / 4) || 874 - sc->recv_io.credits.target > old_recv_credit_target) 875 - queue_work(sc->workqueue, &sc->recv_io.posted.refill_work); 876 - 877 - enqueue_reassembly(sc, response, data_length); 878 - wake_up(&sc->recv_io.reassembly.wait_queue); 879 - } else 880 - put_receive_buffer(sc, response); 881 - 882 - return; 883 - 884 - case SMBDIRECT_EXPECT_NEGOTIATE_REQ: 885 - /* Only server... */ 886 - break; 887 - } 888 - 889 - /* 890 - * This is an internal error! 891 - */ 892 - log_rdma_recv(ERR, "unexpected response type=%d\n", sc->recv_io.expected); 893 - WARN_ON_ONCE(sc->recv_io.expected != SMBDIRECT_EXPECT_DATA_TRANSFER); 894 - error: 895 - put_receive_buffer(sc, response); 896 - smbd_disconnect_rdma_connection(sc); 897 - } 898 - 899 - static struct rdma_cm_id *smbd_create_id( 900 - struct smbdirect_socket *sc, 901 - struct sockaddr *dstaddr, int port) 902 - { 903 - struct smbdirect_socket_parameters *sp = &sc->parameters; 904 - struct rdma_cm_id *id; 905 - u8 node_type = RDMA_NODE_UNSPECIFIED; 906 - int rc; 907 - __be16 *sport; 908 - 909 - id = rdma_create_id(&init_net, smbd_conn_upcall, sc, 910 - RDMA_PS_TCP, IB_QPT_RC); 911 - if (IS_ERR(id)) { 912 - rc = PTR_ERR(id); 913 - log_rdma_event(ERR, "rdma_create_id() failed %i\n", rc); 914 - return id; 915 - } 916 - 917 - switch (port) { 918 - case SMBD_PORT: 919 - /* 920 - * only allow iWarp devices 921 - * for port 5445. 922 - */ 923 - node_type = RDMA_NODE_RNIC; 924 - break; 925 - case SMB_PORT: 926 - /* 927 - * only allow InfiniBand, RoCEv1 or RoCEv2 928 - * devices for port 445. 929 - * 930 - * (Basically don't allow iWarp devices) 931 - */ 932 - node_type = RDMA_NODE_IB_CA; 933 - break; 934 - } 935 - rc = rdma_restrict_node_type(id, node_type); 936 - if (rc) { 937 - log_rdma_event(ERR, "rdma_restrict_node_type(%u) failed %i\n", 938 - node_type, rc); 939 - goto out; 940 - } 941 - 942 - if (dstaddr->sa_family == AF_INET6) 943 - sport = &((struct sockaddr_in6 *)dstaddr)->sin6_port; 944 - else 945 - sport = &((struct sockaddr_in *)dstaddr)->sin_port; 946 - 947 - *sport = htons(port); 948 - 949 - WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED); 950 - sc->status = SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING; 951 - rc = rdma_resolve_addr(id, NULL, (struct sockaddr *)dstaddr, 952 - sp->resolve_addr_timeout_msec); 953 - if (rc) { 954 - log_rdma_event(ERR, "rdma_resolve_addr() failed %i\n", rc); 955 - goto out; 956 - } 957 - rc = wait_event_interruptible_timeout( 958 - sc->status_wait, 959 - sc->status != SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING, 960 - msecs_to_jiffies(sp->resolve_addr_timeout_msec)); 961 - /* e.g. if interrupted returns -ERESTARTSYS */ 962 - if (rc < 0) { 963 - log_rdma_event(ERR, "rdma_resolve_addr timeout rc: %i\n", rc); 964 - goto out; 965 - } 966 - if (sc->status == SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING) { 967 - rc = -ETIMEDOUT; 968 - log_rdma_event(ERR, "rdma_resolve_addr() completed %i\n", rc); 969 - goto out; 970 - } 971 - if (sc->status != SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED) { 972 - rc = -EHOSTUNREACH; 973 - log_rdma_event(ERR, "rdma_resolve_addr() completed %i\n", rc); 974 - goto out; 975 - } 976 - 977 - WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED); 978 - sc->status = SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING; 979 - rc = rdma_resolve_route(id, sp->resolve_route_timeout_msec); 980 - if (rc) { 981 - log_rdma_event(ERR, "rdma_resolve_route() failed %i\n", rc); 982 - goto out; 983 - } 984 - rc = wait_event_interruptible_timeout( 985 - sc->status_wait, 986 - sc->status != SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING, 987 - msecs_to_jiffies(sp->resolve_route_timeout_msec)); 988 - /* e.g. if interrupted returns -ERESTARTSYS */ 989 - if (rc < 0) { 990 - log_rdma_event(ERR, "rdma_resolve_addr timeout rc: %i\n", rc); 991 - goto out; 992 - } 993 - if (sc->status == SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING) { 994 - rc = -ETIMEDOUT; 995 - log_rdma_event(ERR, "rdma_resolve_route() completed %i\n", rc); 996 - goto out; 997 - } 998 - if (sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED) { 999 - rc = -ENETUNREACH; 1000 - log_rdma_event(ERR, "rdma_resolve_route() completed %i\n", rc); 1001 - goto out; 1002 - } 1003 - 1004 - return id; 1005 - 1006 - out: 1007 - rdma_destroy_id(id); 1008 - return ERR_PTR(rc); 1009 - } 1010 - 1011 - /* 1012 - * Test if FRWR (Fast Registration Work Requests) is supported on the device 1013 - * This implementation requires FRWR on RDMA read/write 1014 - * return value: true if it is supported 1015 - */ 1016 - static bool frwr_is_supported(struct ib_device_attr *attrs) 1017 - { 1018 - if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) 1019 - return false; 1020 - if (attrs->max_fast_reg_page_list_len == 0) 1021 - return false; 1022 - return true; 1023 - } 1024 - 1025 - static int smbd_ia_open( 1026 - struct smbdirect_socket *sc, 1027 - struct sockaddr *dstaddr, int port) 1028 - { 1029 - struct smbdirect_socket_parameters *sp = &sc->parameters; 1030 - int rc; 1031 - 1032 - WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_CREATED); 1033 - sc->status = SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED; 1034 - 1035 - sc->rdma.cm_id = smbd_create_id(sc, dstaddr, port); 1036 - if (IS_ERR(sc->rdma.cm_id)) { 1037 - rc = PTR_ERR(sc->rdma.cm_id); 1038 - goto out1; 1039 - } 1040 - sc->ib.dev = sc->rdma.cm_id->device; 1041 - 1042 - if (!frwr_is_supported(&sc->ib.dev->attrs)) { 1043 - log_rdma_event(ERR, "Fast Registration Work Requests (FRWR) is not supported\n"); 1044 - log_rdma_event(ERR, "Device capability flags = %llx max_fast_reg_page_list_len = %u\n", 1045 - sc->ib.dev->attrs.device_cap_flags, 1046 - sc->ib.dev->attrs.max_fast_reg_page_list_len); 1047 - rc = -EPROTONOSUPPORT; 1048 - goto out2; 1049 - } 1050 - sp->max_frmr_depth = min_t(u32, 1051 - sp->max_frmr_depth, 1052 - sc->ib.dev->attrs.max_fast_reg_page_list_len); 1053 - sc->mr_io.type = IB_MR_TYPE_MEM_REG; 1054 - if (sc->ib.dev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG) 1055 - sc->mr_io.type = IB_MR_TYPE_SG_GAPS; 1056 - 1057 - return 0; 1058 - 1059 - out2: 1060 - rdma_destroy_id(sc->rdma.cm_id); 1061 - sc->rdma.cm_id = NULL; 1062 - 1063 - out1: 1064 - return rc; 1065 - } 1066 - 1067 - /* 1068 - * Send a negotiation request message to the peer 1069 - * The negotiation procedure is in [MS-SMBD] 3.1.5.2 and 3.1.5.3 1070 - * After negotiation, the transport is connected and ready for 1071 - * carrying upper layer SMB payload 1072 - */ 1073 - static int smbd_post_send_negotiate_req(struct smbdirect_socket *sc) 1074 - { 1075 - struct smbdirect_socket_parameters *sp = &sc->parameters; 1076 - int rc; 1077 - struct smbdirect_send_io *request; 1078 - struct smbdirect_negotiate_req *packet; 1079 - 1080 - request = smbd_alloc_send_io(sc); 1081 - if (IS_ERR(request)) 1082 - return PTR_ERR(request); 1083 - 1084 - packet = smbdirect_send_io_payload(request); 1085 - packet->min_version = cpu_to_le16(SMBDIRECT_V1); 1086 - packet->max_version = cpu_to_le16(SMBDIRECT_V1); 1087 - packet->reserved = 0; 1088 - packet->credits_requested = cpu_to_le16(sp->send_credit_target); 1089 - packet->preferred_send_size = cpu_to_le32(sp->max_send_size); 1090 - packet->max_receive_size = cpu_to_le32(sp->max_recv_size); 1091 - packet->max_fragmented_size = 1092 - cpu_to_le32(sp->max_fragmented_recv_size); 1093 - 1094 - request->sge[0].addr = ib_dma_map_single( 1095 - sc->ib.dev, (void *)packet, 1096 - sizeof(*packet), DMA_TO_DEVICE); 1097 - if (ib_dma_mapping_error(sc->ib.dev, request->sge[0].addr)) { 1098 - rc = -EIO; 1099 - goto dma_mapping_failed; 1100 - } 1101 - request->num_sge = 1; 1102 - 1103 - request->sge[0].length = sizeof(*packet); 1104 - request->sge[0].lkey = sc->ib.pd->local_dma_lkey; 1105 - 1106 - rc = smbd_post_send(sc, NULL, request); 1107 - if (!rc) 1108 - return 0; 1109 - 1110 - if (rc == -EAGAIN) 1111 - rc = -EIO; 1112 - 1113 - dma_mapping_failed: 1114 - smbd_free_send_io(request); 1115 - return rc; 1116 - } 1117 - 1118 - /* 1119 - * Extend the credits to remote peer 1120 - * This implements [MS-SMBD] 3.1.5.9 1121 - * The idea is that we should extend credits to remote peer as quickly as 1122 - * it's allowed, to maintain data flow. We allocate as much receive 1123 - * buffer as possible, and extend the receive credits to remote peer 1124 - * return value: the new credtis being granted. 1125 - */ 1126 - static int manage_credits_prior_sending(struct smbdirect_socket *sc) 1127 - { 1128 - int missing; 1129 - int available; 1130 - int new_credits; 1131 - 1132 - if (atomic_read(&sc->recv_io.credits.count) >= sc->recv_io.credits.target) 1133 - return 0; 1134 - 1135 - missing = (int)sc->recv_io.credits.target - atomic_read(&sc->recv_io.credits.count); 1136 - available = atomic_xchg(&sc->recv_io.credits.available, 0); 1137 - new_credits = (u16)min3(U16_MAX, missing, available); 1138 - if (new_credits <= 0) { 1139 - /* 1140 - * If credits are available, but not granted 1141 - * we need to re-add them again. 1142 - */ 1143 - if (available) 1144 - atomic_add(available, &sc->recv_io.credits.available); 1145 - return 0; 1146 - } 1147 - 1148 - if (new_credits < available) { 1149 - /* 1150 - * Readd the remaining available again. 1151 - */ 1152 - available -= new_credits; 1153 - atomic_add(available, &sc->recv_io.credits.available); 1154 - } 1155 - 1156 - /* 1157 - * Remember we granted the credits 1158 - */ 1159 - atomic_add(new_credits, &sc->recv_io.credits.count); 1160 - return new_credits; 1161 - } 1162 - 1163 - /* 1164 - * Check if we need to send a KEEP_ALIVE message 1165 - * The idle connection timer triggers a KEEP_ALIVE message when expires 1166 - * SMBDIRECT_FLAG_RESPONSE_REQUESTED is set in the message flag to have peer send 1167 - * back a response. 1168 - * return value: 1169 - * 1 if SMBDIRECT_FLAG_RESPONSE_REQUESTED needs to be set 1170 - * 0: otherwise 1171 - */ 1172 - static int manage_keep_alive_before_sending(struct smbdirect_socket *sc) 1173 - { 1174 - struct smbdirect_socket_parameters *sp = &sc->parameters; 1175 - 1176 - if (sc->idle.keepalive == SMBDIRECT_KEEPALIVE_PENDING) { 1177 - sc->idle.keepalive = SMBDIRECT_KEEPALIVE_SENT; 1178 - /* 1179 - * Now use the keepalive timeout (instead of keepalive interval) 1180 - * in order to wait for a response 1181 - */ 1182 - mod_delayed_work(sc->workqueue, &sc->idle.timer_work, 1183 - msecs_to_jiffies(sp->keepalive_timeout_msec)); 1184 - return 1; 1185 - } 1186 - return 0; 1187 - } 1188 - 1189 - static int smbd_ib_post_send(struct smbdirect_socket *sc, 1190 - struct ib_send_wr *wr) 1191 - { 1192 - int ret; 1193 - 1194 - atomic_inc(&sc->send_io.pending.count); 1195 - ret = ib_post_send(sc->ib.qp, wr, NULL); 1196 - if (ret) { 1197 - pr_err("failed to post send: %d\n", ret); 1198 - smbd_disconnect_rdma_connection(sc); 1199 - ret = -EAGAIN; 1200 - } 1201 - return ret; 1202 - } 1203 - 1204 - /* Post the send request */ 1205 - static int smbd_post_send(struct smbdirect_socket *sc, 1206 - struct smbdirect_send_batch *batch, 1207 - struct smbdirect_send_io *request) 1208 - { 1209 - int i; 1210 - 1211 - for (i = 0; i < request->num_sge; i++) { 1212 - log_rdma_send(INFO, 1213 - "rdma_request sge[%d] addr=0x%llx length=%u\n", 1214 - i, request->sge[i].addr, request->sge[i].length); 1215 - ib_dma_sync_single_for_device( 1216 - sc->ib.dev, 1217 - request->sge[i].addr, 1218 - request->sge[i].length, 1219 - DMA_TO_DEVICE); 1220 - } 1221 - 1222 - request->cqe.done = send_done; 1223 - request->wr.next = NULL; 1224 - request->wr.sg_list = request->sge; 1225 - request->wr.num_sge = request->num_sge; 1226 - request->wr.opcode = IB_WR_SEND; 1227 - 1228 - if (batch) { 1229 - request->wr.wr_cqe = NULL; 1230 - request->wr.send_flags = 0; 1231 - if (!list_empty(&batch->msg_list)) { 1232 - struct smbdirect_send_io *last; 1233 - 1234 - last = list_last_entry(&batch->msg_list, 1235 - struct smbdirect_send_io, 1236 - sibling_list); 1237 - last->wr.next = &request->wr; 1238 - } 1239 - list_add_tail(&request->sibling_list, &batch->msg_list); 1240 - batch->wr_cnt++; 1241 - return 0; 1242 - } 1243 - 1244 - request->wr.wr_cqe = &request->cqe; 1245 - request->wr.send_flags = IB_SEND_SIGNALED; 1246 - return smbd_ib_post_send(sc, &request->wr); 1247 - } 1248 - 1249 - static void smbd_send_batch_init(struct smbdirect_send_batch *batch, 1250 - bool need_invalidate_rkey, 1251 - unsigned int remote_key) 1252 - { 1253 - INIT_LIST_HEAD(&batch->msg_list); 1254 - batch->wr_cnt = 0; 1255 - batch->need_invalidate_rkey = need_invalidate_rkey; 1256 - batch->remote_key = remote_key; 1257 - batch->credit = 0; 1258 - } 1259 - 1260 - static int smbd_send_batch_flush(struct smbdirect_socket *sc, 1261 - struct smbdirect_send_batch *batch, 1262 - bool is_last) 1263 - { 1264 - struct smbdirect_send_io *first, *last; 1265 - int ret = 0; 1266 - 1267 - if (list_empty(&batch->msg_list)) 1268 - goto release_credit; 1269 - 1270 - first = list_first_entry(&batch->msg_list, 1271 - struct smbdirect_send_io, 1272 - sibling_list); 1273 - last = list_last_entry(&batch->msg_list, 1274 - struct smbdirect_send_io, 1275 - sibling_list); 1276 - 1277 - if (batch->need_invalidate_rkey) { 1278 - first->wr.opcode = IB_WR_SEND_WITH_INV; 1279 - first->wr.ex.invalidate_rkey = batch->remote_key; 1280 - batch->need_invalidate_rkey = false; 1281 - batch->remote_key = 0; 1282 - } 1283 - 1284 - last->wr.send_flags = IB_SEND_SIGNALED; 1285 - last->wr.wr_cqe = &last->cqe; 1286 - 1287 - /* 1288 - * Remove last from batch->msg_list 1289 - * and splice the rest of batch->msg_list 1290 - * to last->sibling_list. 1291 - * 1292 - * batch->msg_list is a valid empty list 1293 - * at the end. 1294 - */ 1295 - list_del_init(&last->sibling_list); 1296 - list_splice_tail_init(&batch->msg_list, &last->sibling_list); 1297 - batch->wr_cnt = 0; 1298 - 1299 - ret = smbd_ib_post_send(sc, &first->wr); 1300 - if (ret) { 1301 - struct smbdirect_send_io *sibling, *next; 1302 - 1303 - list_for_each_entry_safe(sibling, next, &last->sibling_list, sibling_list) { 1304 - list_del_init(&sibling->sibling_list); 1305 - smbd_free_send_io(sibling); 1306 - } 1307 - smbd_free_send_io(last); 1308 - } 1309 - 1310 - release_credit: 1311 - if (is_last && !ret && batch->credit) { 1312 - atomic_add(batch->credit, &sc->send_io.bcredits.count); 1313 - batch->credit = 0; 1314 - wake_up(&sc->send_io.bcredits.wait_queue); 1315 - } 1316 - 1317 - return ret; 1318 - } 1319 - 1320 - static int wait_for_credits(struct smbdirect_socket *sc, 1321 - wait_queue_head_t *waitq, atomic_t *total_credits, 1322 - int needed) 1323 - { 1324 - int ret; 1325 - 1326 - do { 1327 - if (atomic_sub_return(needed, total_credits) >= 0) 1328 - return 0; 1329 - 1330 - atomic_add(needed, total_credits); 1331 - ret = wait_event_interruptible(*waitq, 1332 - atomic_read(total_credits) >= needed || 1333 - sc->status != SMBDIRECT_SOCKET_CONNECTED); 1334 - 1335 - if (sc->status != SMBDIRECT_SOCKET_CONNECTED) 1336 - return -ENOTCONN; 1337 - else if (ret < 0) 1338 - return ret; 1339 - } while (true); 1340 - } 1341 - 1342 - static int wait_for_send_bcredit(struct smbdirect_socket *sc, 1343 - struct smbdirect_send_batch *batch) 1344 - { 1345 - int ret; 1346 - 1347 - if (batch->credit) 1348 - return 0; 1349 - 1350 - ret = wait_for_credits(sc, 1351 - &sc->send_io.bcredits.wait_queue, 1352 - &sc->send_io.bcredits.count, 1353 - 1); 1354 - if (ret) 1355 - return ret; 1356 - 1357 - batch->credit = 1; 1358 - return 0; 1359 - } 1360 - 1361 - static int wait_for_send_lcredit(struct smbdirect_socket *sc, 1362 - struct smbdirect_send_batch *batch) 1363 - { 1364 - if (batch && (atomic_read(&sc->send_io.lcredits.count) <= 1)) { 1365 - int ret; 1366 - 1367 - ret = smbd_send_batch_flush(sc, batch, false); 1368 - if (ret) 1369 - return ret; 1370 - } 1371 - 1372 - return wait_for_credits(sc, 1373 - &sc->send_io.lcredits.wait_queue, 1374 - &sc->send_io.lcredits.count, 1375 - 1); 1376 - } 1377 - 1378 - static int wait_for_send_credits(struct smbdirect_socket *sc, 1379 - struct smbdirect_send_batch *batch) 1380 - { 1381 - if (batch && 1382 - (batch->wr_cnt >= 16 || atomic_read(&sc->send_io.credits.count) <= 1)) { 1383 - int ret; 1384 - 1385 - ret = smbd_send_batch_flush(sc, batch, false); 1386 - if (ret) 1387 - return ret; 1388 - } 1389 - 1390 - return wait_for_credits(sc, 1391 - &sc->send_io.credits.wait_queue, 1392 - &sc->send_io.credits.count, 1393 - 1); 1394 - } 1395 - 1396 - static int smbd_post_send_iter(struct smbdirect_socket *sc, 1397 - struct smbdirect_send_batch *batch, 1398 - struct iov_iter *iter, 1399 - int *_remaining_data_length) 1400 - { 1401 - struct smbdirect_socket_parameters *sp = &sc->parameters; 1402 - int rc; 1403 - int header_length; 1404 - int data_length; 1405 - struct smbdirect_send_io *request; 1406 - struct smbdirect_data_transfer *packet; 1407 - int new_credits = 0; 1408 - struct smbdirect_send_batch _batch; 1409 - 1410 - if (!batch) { 1411 - smbd_send_batch_init(&_batch, false, 0); 1412 - batch = &_batch; 1413 - } 1414 - 1415 - rc = wait_for_send_bcredit(sc, batch); 1416 - if (rc) { 1417 - log_outgoing(ERR, "disconnected not sending on wait_bcredit\n"); 1418 - rc = -EAGAIN; 1419 - goto err_wait_bcredit; 1420 - } 1421 - 1422 - rc = wait_for_send_lcredit(sc, batch); 1423 - if (rc) { 1424 - log_outgoing(ERR, "disconnected not sending on wait_lcredit\n"); 1425 - rc = -EAGAIN; 1426 - goto err_wait_lcredit; 1427 - } 1428 - 1429 - rc = wait_for_send_credits(sc, batch); 1430 - if (rc) { 1431 - log_outgoing(ERR, "disconnected not sending on wait_credit\n"); 1432 - rc = -EAGAIN; 1433 - goto err_wait_credit; 1434 - } 1435 - 1436 - new_credits = manage_credits_prior_sending(sc); 1437 - if (new_credits == 0 && 1438 - atomic_read(&sc->send_io.credits.count) == 0 && 1439 - atomic_read(&sc->recv_io.credits.count) == 0) { 1440 - queue_work(sc->workqueue, &sc->recv_io.posted.refill_work); 1441 - rc = wait_event_interruptible(sc->send_io.credits.wait_queue, 1442 - atomic_read(&sc->send_io.credits.count) >= 1 || 1443 - atomic_read(&sc->recv_io.credits.available) >= 1 || 1444 - sc->status != SMBDIRECT_SOCKET_CONNECTED); 1445 - if (sc->status != SMBDIRECT_SOCKET_CONNECTED) 1446 - rc = -ENOTCONN; 1447 - if (rc < 0) { 1448 - log_outgoing(ERR, "disconnected not sending on last credit\n"); 1449 - rc = -EAGAIN; 1450 - goto err_wait_credit; 1451 - } 1452 - 1453 - new_credits = manage_credits_prior_sending(sc); 1454 - } 1455 - 1456 - request = smbd_alloc_send_io(sc); 1457 - if (IS_ERR(request)) { 1458 - rc = PTR_ERR(request); 1459 - goto err_alloc; 1460 - } 1461 - 1462 - memset(request->sge, 0, sizeof(request->sge)); 1463 - 1464 - /* Map the packet to DMA */ 1465 - header_length = sizeof(struct smbdirect_data_transfer); 1466 - /* If this is a packet without payload, don't send padding */ 1467 - if (!iter) 1468 - header_length = offsetof(struct smbdirect_data_transfer, padding); 1469 - 1470 - packet = smbdirect_send_io_payload(request); 1471 - request->sge[0].addr = ib_dma_map_single(sc->ib.dev, 1472 - (void *)packet, 1473 - header_length, 1474 - DMA_TO_DEVICE); 1475 - if (ib_dma_mapping_error(sc->ib.dev, request->sge[0].addr)) { 1476 - rc = -EIO; 1477 - goto err_dma; 1478 - } 1479 - 1480 - request->sge[0].length = header_length; 1481 - request->sge[0].lkey = sc->ib.pd->local_dma_lkey; 1482 - request->num_sge = 1; 1483 - 1484 - /* Fill in the data payload to find out how much data we can add */ 1485 - if (iter) { 1486 - struct smb_extract_to_rdma extract = { 1487 - .nr_sge = request->num_sge, 1488 - .max_sge = SMBDIRECT_SEND_IO_MAX_SGE, 1489 - .sge = request->sge, 1490 - .device = sc->ib.dev, 1491 - .local_dma_lkey = sc->ib.pd->local_dma_lkey, 1492 - .direction = DMA_TO_DEVICE, 1493 - }; 1494 - size_t payload_len = umin(*_remaining_data_length, 1495 - sp->max_send_size - sizeof(*packet)); 1496 - 1497 - rc = smb_extract_iter_to_rdma(iter, payload_len, 1498 - &extract); 1499 - if (rc < 0) 1500 - goto err_dma; 1501 - data_length = rc; 1502 - request->num_sge = extract.nr_sge; 1503 - *_remaining_data_length -= data_length; 1504 - } else { 1505 - data_length = 0; 1506 - } 1507 - 1508 - /* Fill in the packet header */ 1509 - packet->credits_requested = cpu_to_le16(sp->send_credit_target); 1510 - packet->credits_granted = cpu_to_le16(new_credits); 1511 - 1512 - packet->flags = 0; 1513 - if (manage_keep_alive_before_sending(sc)) 1514 - packet->flags |= cpu_to_le16(SMBDIRECT_FLAG_RESPONSE_REQUESTED); 1515 - 1516 - packet->reserved = 0; 1517 - if (!data_length) 1518 - packet->data_offset = 0; 1519 - else 1520 - packet->data_offset = cpu_to_le32(24); 1521 - packet->data_length = cpu_to_le32(data_length); 1522 - packet->remaining_data_length = cpu_to_le32(*_remaining_data_length); 1523 - packet->padding = 0; 1524 - 1525 - log_outgoing(INFO, "credits_requested=%d credits_granted=%d data_offset=%d data_length=%d remaining_data_length=%d\n", 1526 - le16_to_cpu(packet->credits_requested), 1527 - le16_to_cpu(packet->credits_granted), 1528 - le32_to_cpu(packet->data_offset), 1529 - le32_to_cpu(packet->data_length), 1530 - le32_to_cpu(packet->remaining_data_length)); 1531 - 1532 - rc = smbd_post_send(sc, batch, request); 1533 - if (!rc) { 1534 - /* 1535 - * From here request is moved to batch 1536 - * and we should not free it explicitly. 1537 - */ 1538 - 1539 - if (batch != &_batch) 1540 - return 0; 1541 - 1542 - rc = smbd_send_batch_flush(sc, batch, true); 1543 - if (!rc) 1544 - return 0; 1545 - 1546 - goto err_flush; 1547 - } 1548 - 1549 - err_dma: 1550 - smbd_free_send_io(request); 1551 - 1552 - err_flush: 1553 - err_alloc: 1554 - atomic_inc(&sc->send_io.credits.count); 1555 - wake_up(&sc->send_io.credits.wait_queue); 1556 - 1557 - err_wait_credit: 1558 - atomic_inc(&sc->send_io.lcredits.count); 1559 - wake_up(&sc->send_io.lcredits.wait_queue); 1560 - 1561 - err_wait_lcredit: 1562 - atomic_add(batch->credit, &sc->send_io.bcredits.count); 1563 - batch->credit = 0; 1564 - wake_up(&sc->send_io.bcredits.wait_queue); 1565 - 1566 - err_wait_bcredit: 1567 - return rc; 1568 - } 1569 - 1570 - /* 1571 - * Send an empty message 1572 - * Empty message is used to extend credits to peer to for keep live 1573 - * while there is no upper layer payload to send at the time 1574 - */ 1575 - static int smbd_post_send_empty(struct smbdirect_socket *sc) 1576 - { 1577 - int remaining_data_length = 0; 1578 - 1579 - sc->statistics.send_empty++; 1580 - return smbd_post_send_iter(sc, NULL, NULL, &remaining_data_length); 1581 - } 1582 - 1583 188 static int smbd_post_send_full_iter(struct smbdirect_socket *sc, 1584 189 struct smbdirect_send_batch *batch, 1585 190 struct iov_iter *iter, 1586 - int *_remaining_data_length) 191 + u32 remaining_data_length) 1587 192 { 1588 - int rc = 0; 193 + int bytes = 0; 1589 194 1590 195 /* 1591 - * smbd_post_send_iter() respects the 196 + * smbdirect_connection_send_single_iter() respects the 1592 197 * negotiated max_send_size, so we need to 1593 198 * loop until the full iter is posted 1594 199 */ 1595 200 1596 201 while (iov_iter_count(iter) > 0) { 1597 - rc = smbd_post_send_iter(sc, batch, iter, _remaining_data_length); 202 + int rc; 203 + 204 + rc = smbdirect_connection_send_single_iter(sc, 205 + batch, 206 + iter, 207 + 0, /* flags */ 208 + remaining_data_length); 1598 209 if (rc < 0) 1599 - break; 210 + return rc; 211 + remaining_data_length -= rc; 212 + bytes += rc; 1600 213 } 1601 214 1602 - return rc; 1603 - } 1604 - 1605 - /* 1606 - * Post a receive request to the transport 1607 - * The remote peer can only send data when a receive request is posted 1608 - * The interaction is controlled by send/receive credit system 1609 - */ 1610 - static int smbd_post_recv( 1611 - struct smbdirect_socket *sc, struct smbdirect_recv_io *response) 1612 - { 1613 - struct smbdirect_socket_parameters *sp = &sc->parameters; 1614 - struct ib_recv_wr recv_wr; 1615 - int rc = -EIO; 1616 - 1617 - response->sge.addr = ib_dma_map_single( 1618 - sc->ib.dev, response->packet, 1619 - sp->max_recv_size, DMA_FROM_DEVICE); 1620 - if (ib_dma_mapping_error(sc->ib.dev, response->sge.addr)) 1621 - return rc; 1622 - 1623 - response->sge.length = sp->max_recv_size; 1624 - response->sge.lkey = sc->ib.pd->local_dma_lkey; 1625 - 1626 - response->cqe.done = recv_done; 1627 - 1628 - recv_wr.wr_cqe = &response->cqe; 1629 - recv_wr.next = NULL; 1630 - recv_wr.sg_list = &response->sge; 1631 - recv_wr.num_sge = 1; 1632 - 1633 - rc = ib_post_recv(sc->ib.qp, &recv_wr, NULL); 1634 - if (rc) { 1635 - ib_dma_unmap_single(sc->ib.dev, response->sge.addr, 1636 - response->sge.length, DMA_FROM_DEVICE); 1637 - response->sge.length = 0; 1638 - smbd_disconnect_rdma_connection(sc); 1639 - log_rdma_recv(ERR, "ib_post_recv failed rc=%d\n", rc); 1640 - } 1641 - 1642 - return rc; 1643 - } 1644 - 1645 - /* Perform SMBD negotiate according to [MS-SMBD] 3.1.5.2 */ 1646 - static int smbd_negotiate(struct smbdirect_socket *sc) 1647 - { 1648 - struct smbdirect_socket_parameters *sp = &sc->parameters; 1649 - int rc; 1650 - struct smbdirect_recv_io *response = get_receive_buffer(sc); 1651 - 1652 - WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_NEGOTIATE_NEEDED); 1653 - sc->status = SMBDIRECT_SOCKET_NEGOTIATE_RUNNING; 1654 - 1655 - sc->recv_io.expected = SMBDIRECT_EXPECT_NEGOTIATE_REP; 1656 - rc = smbd_post_recv(sc, response); 1657 - log_rdma_event(INFO, "smbd_post_recv rc=%d iov.addr=0x%llx iov.length=%u iov.lkey=0x%x\n", 1658 - rc, response->sge.addr, 1659 - response->sge.length, response->sge.lkey); 1660 - if (rc) { 1661 - put_receive_buffer(sc, response); 1662 - return rc; 1663 - } 1664 - 1665 - rc = smbd_post_send_negotiate_req(sc); 1666 - if (rc) 1667 - return rc; 1668 - 1669 - rc = wait_event_interruptible_timeout( 1670 - sc->status_wait, 1671 - sc->status != SMBDIRECT_SOCKET_NEGOTIATE_RUNNING, 1672 - msecs_to_jiffies(sp->negotiate_timeout_msec)); 1673 - log_rdma_event(INFO, "wait_event_interruptible_timeout rc=%d\n", rc); 1674 - 1675 - if (sc->status == SMBDIRECT_SOCKET_CONNECTED) 1676 - return 0; 1677 - 1678 - if (rc == 0) 1679 - rc = -ETIMEDOUT; 1680 - else if (rc == -ERESTARTSYS) 1681 - rc = -EINTR; 1682 - else 1683 - rc = -ENOTCONN; 1684 - 1685 - return rc; 1686 - } 1687 - 1688 - /* 1689 - * Implement Connection.FragmentReassemblyBuffer defined in [MS-SMBD] 3.1.1.1 1690 - * This is a queue for reassembling upper layer payload and present to upper 1691 - * layer. All the inncoming payload go to the reassembly queue, regardless of 1692 - * if reassembly is required. The uuper layer code reads from the queue for all 1693 - * incoming payloads. 1694 - * Put a received packet to the reassembly queue 1695 - * response: the packet received 1696 - * data_length: the size of payload in this packet 1697 - */ 1698 - static void enqueue_reassembly( 1699 - struct smbdirect_socket *sc, 1700 - struct smbdirect_recv_io *response, 1701 - int data_length) 1702 - { 1703 - unsigned long flags; 1704 - 1705 - spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); 1706 - list_add_tail(&response->list, &sc->recv_io.reassembly.list); 1707 - sc->recv_io.reassembly.queue_length++; 1708 - /* 1709 - * Make sure reassembly_data_length is updated after list and 1710 - * reassembly_queue_length are updated. On the dequeue side 1711 - * reassembly_data_length is checked without a lock to determine 1712 - * if reassembly_queue_length and list is up to date 1713 - */ 1714 - virt_wmb(); 1715 - sc->recv_io.reassembly.data_length += data_length; 1716 - spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); 1717 - sc->statistics.enqueue_reassembly_queue++; 1718 - } 1719 - 1720 - /* 1721 - * Get the first entry at the front of reassembly queue 1722 - * Caller is responsible for locking 1723 - * return value: the first entry if any, NULL if queue is empty 1724 - */ 1725 - static struct smbdirect_recv_io *_get_first_reassembly(struct smbdirect_socket *sc) 1726 - { 1727 - struct smbdirect_recv_io *ret = NULL; 1728 - 1729 - if (!list_empty(&sc->recv_io.reassembly.list)) { 1730 - ret = list_first_entry( 1731 - &sc->recv_io.reassembly.list, 1732 - struct smbdirect_recv_io, list); 1733 - } 1734 - return ret; 1735 - } 1736 - 1737 - /* 1738 - * Get a receive buffer 1739 - * For each remote send, we need to post a receive. The receive buffers are 1740 - * pre-allocated in advance. 1741 - * return value: the receive buffer, NULL if none is available 1742 - */ 1743 - static struct smbdirect_recv_io *get_receive_buffer(struct smbdirect_socket *sc) 1744 - { 1745 - struct smbdirect_recv_io *ret = NULL; 1746 - unsigned long flags; 1747 - 1748 - spin_lock_irqsave(&sc->recv_io.free.lock, flags); 1749 - if (!list_empty(&sc->recv_io.free.list)) { 1750 - ret = list_first_entry( 1751 - &sc->recv_io.free.list, 1752 - struct smbdirect_recv_io, list); 1753 - list_del(&ret->list); 1754 - sc->statistics.get_receive_buffer++; 1755 - } 1756 - spin_unlock_irqrestore(&sc->recv_io.free.lock, flags); 1757 - 1758 - return ret; 1759 - } 1760 - 1761 - /* 1762 - * Return a receive buffer 1763 - * Upon returning of a receive buffer, we can post new receive and extend 1764 - * more receive credits to remote peer. This is done immediately after a 1765 - * receive buffer is returned. 1766 - */ 1767 - static void put_receive_buffer( 1768 - struct smbdirect_socket *sc, struct smbdirect_recv_io *response) 1769 - { 1770 - unsigned long flags; 1771 - 1772 - if (likely(response->sge.length != 0)) { 1773 - ib_dma_unmap_single(sc->ib.dev, 1774 - response->sge.addr, 1775 - response->sge.length, 1776 - DMA_FROM_DEVICE); 1777 - response->sge.length = 0; 1778 - } 1779 - 1780 - spin_lock_irqsave(&sc->recv_io.free.lock, flags); 1781 - list_add_tail(&response->list, &sc->recv_io.free.list); 1782 - sc->statistics.put_receive_buffer++; 1783 - spin_unlock_irqrestore(&sc->recv_io.free.lock, flags); 1784 - 1785 - queue_work(sc->workqueue, &sc->recv_io.posted.refill_work); 1786 - } 1787 - 1788 - /* Preallocate all receive buffer on transport establishment */ 1789 - static int allocate_receive_buffers(struct smbdirect_socket *sc, int num_buf) 1790 - { 1791 - struct smbdirect_recv_io *response; 1792 - int i; 1793 - 1794 - for (i = 0; i < num_buf; i++) { 1795 - response = mempool_alloc(sc->recv_io.mem.pool, GFP_KERNEL); 1796 - if (!response) 1797 - goto allocate_failed; 1798 - 1799 - response->socket = sc; 1800 - response->sge.length = 0; 1801 - list_add_tail(&response->list, &sc->recv_io.free.list); 1802 - } 1803 - 1804 - return 0; 1805 - 1806 - allocate_failed: 1807 - while (!list_empty(&sc->recv_io.free.list)) { 1808 - response = list_first_entry( 1809 - &sc->recv_io.free.list, 1810 - struct smbdirect_recv_io, list); 1811 - list_del(&response->list); 1812 - 1813 - mempool_free(response, sc->recv_io.mem.pool); 1814 - } 1815 - return -ENOMEM; 1816 - } 1817 - 1818 - static void destroy_receive_buffers(struct smbdirect_socket *sc) 1819 - { 1820 - struct smbdirect_recv_io *response; 1821 - 1822 - while ((response = get_receive_buffer(sc))) 1823 - mempool_free(response, sc->recv_io.mem.pool); 1824 - } 1825 - 1826 - static void send_immediate_empty_message(struct work_struct *work) 1827 - { 1828 - struct smbdirect_socket *sc = 1829 - container_of(work, struct smbdirect_socket, idle.immediate_work); 1830 - 1831 - if (sc->status != SMBDIRECT_SOCKET_CONNECTED) 1832 - return; 1833 - 1834 - log_keep_alive(INFO, "send an empty message\n"); 1835 - smbd_post_send_empty(sc); 1836 - } 1837 - 1838 - /* Implement idle connection timer [MS-SMBD] 3.1.6.2 */ 1839 - static void idle_connection_timer(struct work_struct *work) 1840 - { 1841 - struct smbdirect_socket *sc = 1842 - container_of(work, struct smbdirect_socket, idle.timer_work.work); 1843 - struct smbdirect_socket_parameters *sp = &sc->parameters; 1844 - 1845 - if (sc->idle.keepalive != SMBDIRECT_KEEPALIVE_NONE) { 1846 - log_keep_alive(ERR, 1847 - "error status sc->idle.keepalive=%d\n", 1848 - sc->idle.keepalive); 1849 - smbd_disconnect_rdma_connection(sc); 1850 - return; 1851 - } 1852 - 1853 - if (sc->status != SMBDIRECT_SOCKET_CONNECTED) 1854 - return; 1855 - 1856 - /* 1857 - * Now use the keepalive timeout (instead of keepalive interval) 1858 - * in order to wait for a response 1859 - */ 1860 - sc->idle.keepalive = SMBDIRECT_KEEPALIVE_PENDING; 1861 - mod_delayed_work(sc->workqueue, &sc->idle.timer_work, 1862 - msecs_to_jiffies(sp->keepalive_timeout_msec)); 1863 - log_keep_alive(INFO, "schedule send of empty idle message\n"); 1864 - queue_work(sc->workqueue, &sc->idle.immediate_work); 215 + return bytes; 1865 216 } 1866 217 1867 218 /* ··· 203 1892 void smbd_destroy(struct TCP_Server_Info *server) 204 1893 { 205 1894 struct smbd_connection *info = server->smbd_conn; 206 - struct smbdirect_socket *sc; 207 - struct smbdirect_recv_io *response; 208 - unsigned long flags; 209 1895 210 1896 if (!info) { 211 1897 log_rdma_event(INFO, "rdma session already destroyed\n"); 212 1898 return; 213 1899 } 214 - sc = &info->socket; 215 1900 216 - log_rdma_event(INFO, "cancelling and disable disconnect_work\n"); 217 - disable_work_sync(&sc->disconnect_work); 1901 + smbdirect_socket_release(info->socket); 218 1902 219 - log_rdma_event(INFO, "destroying rdma session\n"); 220 - if (sc->status < SMBDIRECT_SOCKET_DISCONNECTING) 221 - smbd_disconnect_rdma_work(&sc->disconnect_work); 222 - if (sc->status < SMBDIRECT_SOCKET_DISCONNECTED) { 223 - log_rdma_event(INFO, "wait for transport being disconnected\n"); 224 - wait_event(sc->status_wait, sc->status == SMBDIRECT_SOCKET_DISCONNECTED); 225 - log_rdma_event(INFO, "waited for transport being disconnected\n"); 226 - } 227 - 228 - /* 229 - * Wake up all waiters in all wait queues 230 - * in order to notice the broken connection. 231 - * 232 - * Most likely this was already called via 233 - * smbd_disconnect_rdma_work(), but call it again... 234 - */ 235 - smbd_disconnect_wake_up_all(sc); 236 - 237 - log_rdma_event(INFO, "cancelling recv_io.posted.refill_work\n"); 238 - disable_work_sync(&sc->recv_io.posted.refill_work); 239 - 240 - log_rdma_event(INFO, "destroying qp\n"); 241 - ib_drain_qp(sc->ib.qp); 242 - rdma_destroy_qp(sc->rdma.cm_id); 243 - sc->ib.qp = NULL; 244 - 245 - log_rdma_event(INFO, "cancelling idle timer\n"); 246 - disable_delayed_work_sync(&sc->idle.timer_work); 247 - log_rdma_event(INFO, "cancelling send immediate work\n"); 248 - disable_work_sync(&sc->idle.immediate_work); 249 - 250 - /* It's not possible for upper layer to get to reassembly */ 251 - log_rdma_event(INFO, "drain the reassembly queue\n"); 252 - do { 253 - spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); 254 - response = _get_first_reassembly(sc); 255 - if (response) { 256 - list_del(&response->list); 257 - spin_unlock_irqrestore( 258 - &sc->recv_io.reassembly.lock, flags); 259 - put_receive_buffer(sc, response); 260 - } else 261 - spin_unlock_irqrestore( 262 - &sc->recv_io.reassembly.lock, flags); 263 - } while (response); 264 - sc->recv_io.reassembly.data_length = 0; 265 - 266 - log_rdma_event(INFO, "free receive buffers\n"); 267 - destroy_receive_buffers(sc); 268 - 269 - log_rdma_event(INFO, "freeing mr list\n"); 270 - destroy_mr_list(sc); 271 - 272 - ib_free_cq(sc->ib.send_cq); 273 - ib_free_cq(sc->ib.recv_cq); 274 - ib_dealloc_pd(sc->ib.pd); 275 - rdma_destroy_id(sc->rdma.cm_id); 276 - 277 - /* free mempools */ 278 - mempool_destroy(sc->send_io.mem.pool); 279 - kmem_cache_destroy(sc->send_io.mem.cache); 280 - 281 - mempool_destroy(sc->recv_io.mem.pool); 282 - kmem_cache_destroy(sc->recv_io.mem.cache); 283 - 284 - sc->status = SMBDIRECT_SOCKET_DESTROYED; 285 - 286 - destroy_workqueue(sc->workqueue); 287 - log_rdma_event(INFO, "rdma session destroyed\n"); 288 1903 kfree(info); 289 1904 server->smbd_conn = NULL; 290 1905 } ··· 232 1995 * This is possible if transport is disconnected and we haven't received 233 1996 * notification from RDMA, but upper layer has detected timeout 234 1997 */ 235 - if (server->smbd_conn->socket.status == SMBDIRECT_SOCKET_CONNECTED) { 236 - log_rdma_event(INFO, "disconnecting transport\n"); 237 - smbd_destroy(server); 238 - } 1998 + log_rdma_event(INFO, "disconnecting transport\n"); 1999 + smbd_destroy(server); 239 2000 240 2001 create_conn: 241 2002 log_rdma_event(INFO, "creating rdma session\n"); ··· 249 2014 return -ENOENT; 250 2015 } 251 2016 252 - static void destroy_caches(struct smbdirect_socket *sc) 253 - { 254 - destroy_receive_buffers(sc); 255 - mempool_destroy(sc->recv_io.mem.pool); 256 - kmem_cache_destroy(sc->recv_io.mem.cache); 257 - mempool_destroy(sc->send_io.mem.pool); 258 - kmem_cache_destroy(sc->send_io.mem.cache); 259 - } 260 - 261 - #define MAX_NAME_LEN 80 262 - static int allocate_caches(struct smbdirect_socket *sc) 263 - { 264 - struct smbdirect_socket_parameters *sp = &sc->parameters; 265 - char name[MAX_NAME_LEN]; 266 - int rc; 267 - 268 - if (WARN_ON_ONCE(sp->max_recv_size < sizeof(struct smbdirect_data_transfer))) 269 - return -ENOMEM; 270 - 271 - scnprintf(name, MAX_NAME_LEN, "smbdirect_send_io_%p", sc); 272 - sc->send_io.mem.cache = 273 - kmem_cache_create( 274 - name, 275 - sizeof(struct smbdirect_send_io) + 276 - sizeof(struct smbdirect_data_transfer), 277 - 0, SLAB_HWCACHE_ALIGN, NULL); 278 - if (!sc->send_io.mem.cache) 279 - return -ENOMEM; 280 - 281 - sc->send_io.mem.pool = 282 - mempool_create(sp->send_credit_target, mempool_alloc_slab, 283 - mempool_free_slab, sc->send_io.mem.cache); 284 - if (!sc->send_io.mem.pool) 285 - goto out1; 286 - 287 - scnprintf(name, MAX_NAME_LEN, "smbdirect_recv_io_%p", sc); 288 - 289 - struct kmem_cache_args response_args = { 290 - .align = __alignof__(struct smbdirect_recv_io), 291 - .useroffset = (offsetof(struct smbdirect_recv_io, packet) + 292 - sizeof(struct smbdirect_data_transfer)), 293 - .usersize = sp->max_recv_size - sizeof(struct smbdirect_data_transfer), 294 - }; 295 - sc->recv_io.mem.cache = 296 - kmem_cache_create(name, 297 - sizeof(struct smbdirect_recv_io) + sp->max_recv_size, 298 - &response_args, SLAB_HWCACHE_ALIGN); 299 - if (!sc->recv_io.mem.cache) 300 - goto out2; 301 - 302 - sc->recv_io.mem.pool = 303 - mempool_create(sp->recv_credit_max, mempool_alloc_slab, 304 - mempool_free_slab, sc->recv_io.mem.cache); 305 - if (!sc->recv_io.mem.pool) 306 - goto out3; 307 - 308 - rc = allocate_receive_buffers(sc, sp->recv_credit_max); 309 - if (rc) { 310 - log_rdma_event(ERR, "failed to allocate receive buffers\n"); 311 - goto out4; 312 - } 313 - 314 - return 0; 315 - 316 - out4: 317 - mempool_destroy(sc->recv_io.mem.pool); 318 - out3: 319 - kmem_cache_destroy(sc->recv_io.mem.cache); 320 - out2: 321 - mempool_destroy(sc->send_io.mem.pool); 322 - out1: 323 - kmem_cache_destroy(sc->send_io.mem.cache); 324 - return -ENOMEM; 325 - } 326 - 327 2017 /* Create a SMBD connection, called by upper layer */ 328 2018 static struct smbd_connection *_smbd_get_connection( 329 2019 struct TCP_Server_Info *server, struct sockaddr *dstaddr, int port) 330 2020 { 331 - int rc; 2021 + struct net *net = cifs_net_ns(server); 332 2022 struct smbd_connection *info; 333 2023 struct smbdirect_socket *sc; 2024 + struct smbdirect_socket_parameters init_params = {}; 334 2025 struct smbdirect_socket_parameters *sp; 335 - struct rdma_conn_param conn_param; 336 - struct ib_qp_cap qp_cap; 337 - struct ib_qp_init_attr qp_attr; 338 - struct sockaddr_in *addr_in = (struct sockaddr_in *) dstaddr; 339 - struct ib_port_immutable port_immutable; 340 - __be32 ird_ord_hdr[2]; 341 - char wq_name[80]; 342 - struct workqueue_struct *workqueue; 2026 + __be16 *sport; 2027 + u64 port_flags = 0; 2028 + int ret; 343 2029 344 - info = kzalloc_obj(struct smbd_connection); 345 - if (!info) 346 - return NULL; 347 - sc = &info->socket; 348 - scnprintf(wq_name, ARRAY_SIZE(wq_name), "smbd_%p", sc); 349 - workqueue = create_workqueue(wq_name); 350 - if (!workqueue) 351 - goto create_wq_failed; 352 - smbdirect_socket_init(sc); 353 - sc->workqueue = workqueue; 354 - sp = &sc->parameters; 2030 + switch (port) { 2031 + case SMBD_PORT: 2032 + /* 2033 + * only allow iWarp devices 2034 + * for port 5445. 2035 + */ 2036 + port_flags |= SMBDIRECT_FLAG_PORT_RANGE_ONLY_IW; 2037 + break; 2038 + case SMB_PORT: 2039 + /* 2040 + * only allow InfiniBand, RoCEv1 or RoCEv2 2041 + * devices for port 445. 2042 + * 2043 + * (Basically don't allow iWarp devices) 2044 + */ 2045 + port_flags |= SMBDIRECT_FLAG_PORT_RANGE_ONLY_IB; 2046 + break; 2047 + } 355 2048 356 - INIT_WORK(&sc->disconnect_work, smbd_disconnect_rdma_work); 357 - 2049 + /* 2050 + * Create the initial parameters 2051 + */ 2052 + sp = &init_params; 2053 + sp->flags = port_flags; 358 2054 sp->resolve_addr_timeout_msec = RDMA_RESOLVE_TIMEOUT; 359 2055 sp->resolve_route_timeout_msec = RDMA_RESOLVE_TIMEOUT; 360 2056 sp->rdma_connect_timeout_msec = RDMA_RESOLVE_TIMEOUT; ··· 301 2135 sp->keepalive_interval_msec = smbd_keep_alive_interval * 1000; 302 2136 sp->keepalive_timeout_msec = KEEPALIVE_RECV_TIMEOUT * 1000; 303 2137 304 - rc = smbd_ia_open(sc, dstaddr, port); 305 - if (rc) { 306 - log_rdma_event(INFO, "smbd_ia_open rc=%d\n", rc); 307 - goto create_id_failed; 2138 + info = kzalloc_obj(*info); 2139 + if (!info) 2140 + return NULL; 2141 + ret = smbdirect_socket_create_kern(net, &sc); 2142 + if (ret) 2143 + goto socket_init_failed; 2144 + smbdirect_socket_set_logging(sc, NULL, smbd_logging_needed, smbd_logging_vaprintf); 2145 + ret = smbdirect_socket_set_initial_parameters(sc, sp); 2146 + if (ret) 2147 + goto set_params_failed; 2148 + ret = smbdirect_socket_set_kernel_settings(sc, IB_POLL_SOFTIRQ, GFP_KERNEL); 2149 + if (ret) 2150 + goto set_settings_failed; 2151 + 2152 + if (dstaddr->sa_family == AF_INET6) 2153 + sport = &((struct sockaddr_in6 *)dstaddr)->sin6_port; 2154 + else 2155 + sport = &((struct sockaddr_in *)dstaddr)->sin_port; 2156 + 2157 + *sport = htons(port); 2158 + 2159 + ret = smbdirect_connect_sync(sc, dstaddr); 2160 + if (ret) { 2161 + log_rdma_event(ERR, "connect to %pISpsfc failed: %1pe\n", 2162 + dstaddr, ERR_PTR(ret)); 2163 + goto connect_failed; 308 2164 } 309 2165 310 - if (sp->send_credit_target > sc->ib.dev->attrs.max_cqe || 311 - sp->send_credit_target > sc->ib.dev->attrs.max_qp_wr) { 312 - log_rdma_event(ERR, "consider lowering send_credit_target = %d. Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n", 313 - sp->send_credit_target, 314 - sc->ib.dev->attrs.max_cqe, 315 - sc->ib.dev->attrs.max_qp_wr); 316 - goto config_failed; 317 - } 318 - 319 - if (sp->recv_credit_max > sc->ib.dev->attrs.max_cqe || 320 - sp->recv_credit_max > sc->ib.dev->attrs.max_qp_wr) { 321 - log_rdma_event(ERR, "consider lowering receive_credit_max = %d. Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n", 322 - sp->recv_credit_max, 323 - sc->ib.dev->attrs.max_cqe, 324 - sc->ib.dev->attrs.max_qp_wr); 325 - goto config_failed; 326 - } 327 - 328 - if (sc->ib.dev->attrs.max_send_sge < SMBDIRECT_SEND_IO_MAX_SGE || 329 - sc->ib.dev->attrs.max_recv_sge < SMBDIRECT_RECV_IO_MAX_SGE) { 330 - log_rdma_event(ERR, 331 - "device %.*s max_send_sge/max_recv_sge = %d/%d too small\n", 332 - IB_DEVICE_NAME_MAX, 333 - sc->ib.dev->name, 334 - sc->ib.dev->attrs.max_send_sge, 335 - sc->ib.dev->attrs.max_recv_sge); 336 - goto config_failed; 337 - } 338 - 339 - sp->responder_resources = 340 - min_t(u8, sp->responder_resources, 341 - sc->ib.dev->attrs.max_qp_rd_atom); 342 - log_rdma_mr(INFO, "responder_resources=%d\n", 343 - sp->responder_resources); 344 - 345 - /* 346 - * We use allocate sp->responder_resources * 2 MRs 347 - * and each MR needs WRs for REG and INV, so 348 - * we use '* 4'. 349 - * 350 - * +1 for ib_drain_qp() 351 - */ 352 - memset(&qp_cap, 0, sizeof(qp_cap)); 353 - qp_cap.max_send_wr = sp->send_credit_target + sp->responder_resources * 4 + 1; 354 - qp_cap.max_recv_wr = sp->recv_credit_max + 1; 355 - qp_cap.max_send_sge = SMBDIRECT_SEND_IO_MAX_SGE; 356 - qp_cap.max_recv_sge = SMBDIRECT_RECV_IO_MAX_SGE; 357 - 358 - sc->ib.pd = ib_alloc_pd(sc->ib.dev, 0); 359 - if (IS_ERR(sc->ib.pd)) { 360 - rc = PTR_ERR(sc->ib.pd); 361 - sc->ib.pd = NULL; 362 - log_rdma_event(ERR, "ib_alloc_pd() returned %d\n", rc); 363 - goto alloc_pd_failed; 364 - } 365 - 366 - sc->ib.send_cq = 367 - ib_alloc_cq_any(sc->ib.dev, sc, 368 - qp_cap.max_send_wr, IB_POLL_SOFTIRQ); 369 - if (IS_ERR(sc->ib.send_cq)) { 370 - sc->ib.send_cq = NULL; 371 - goto alloc_cq_failed; 372 - } 373 - 374 - sc->ib.recv_cq = 375 - ib_alloc_cq_any(sc->ib.dev, sc, 376 - qp_cap.max_recv_wr, IB_POLL_SOFTIRQ); 377 - if (IS_ERR(sc->ib.recv_cq)) { 378 - sc->ib.recv_cq = NULL; 379 - goto alloc_cq_failed; 380 - } 381 - 382 - memset(&qp_attr, 0, sizeof(qp_attr)); 383 - qp_attr.event_handler = smbd_qp_async_error_upcall; 384 - qp_attr.qp_context = sc; 385 - qp_attr.cap = qp_cap; 386 - qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 387 - qp_attr.qp_type = IB_QPT_RC; 388 - qp_attr.send_cq = sc->ib.send_cq; 389 - qp_attr.recv_cq = sc->ib.recv_cq; 390 - qp_attr.port_num = ~0; 391 - 392 - rc = rdma_create_qp(sc->rdma.cm_id, sc->ib.pd, &qp_attr); 393 - if (rc) { 394 - log_rdma_event(ERR, "rdma_create_qp failed %i\n", rc); 395 - goto create_qp_failed; 396 - } 397 - sc->ib.qp = sc->rdma.cm_id->qp; 398 - 399 - memset(&conn_param, 0, sizeof(conn_param)); 400 - conn_param.initiator_depth = sp->initiator_depth; 401 - conn_param.responder_resources = sp->responder_resources; 402 - 403 - /* Need to send IRD/ORD in private data for iWARP */ 404 - sc->ib.dev->ops.get_port_immutable( 405 - sc->ib.dev, sc->rdma.cm_id->port_num, &port_immutable); 406 - if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) { 407 - ird_ord_hdr[0] = cpu_to_be32(conn_param.responder_resources); 408 - ird_ord_hdr[1] = cpu_to_be32(conn_param.initiator_depth); 409 - conn_param.private_data = ird_ord_hdr; 410 - conn_param.private_data_len = sizeof(ird_ord_hdr); 411 - } else { 412 - conn_param.private_data = NULL; 413 - conn_param.private_data_len = 0; 414 - } 415 - 416 - conn_param.retry_count = SMBD_CM_RETRY; 417 - conn_param.rnr_retry_count = SMBD_CM_RNR_RETRY; 418 - conn_param.flow_control = 0; 419 - 420 - log_rdma_event(INFO, "connecting to IP %pI4 port %d\n", 421 - &addr_in->sin_addr, port); 422 - 423 - WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED); 424 - sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING; 425 - rc = rdma_connect(sc->rdma.cm_id, &conn_param); 426 - if (rc) { 427 - log_rdma_event(ERR, "rdma_connect() failed with %i\n", rc); 428 - goto rdma_connect_failed; 429 - } 430 - 431 - wait_event_interruptible_timeout( 432 - sc->status_wait, 433 - sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING, 434 - msecs_to_jiffies(sp->rdma_connect_timeout_msec)); 435 - 436 - if (sc->status != SMBDIRECT_SOCKET_NEGOTIATE_NEEDED) { 437 - log_rdma_event(ERR, "rdma_connect failed port=%d\n", port); 438 - goto rdma_connect_failed; 439 - } 440 - 441 - log_rdma_event(INFO, "rdma_connect connected\n"); 442 - 443 - rc = allocate_caches(sc); 444 - if (rc) { 445 - log_rdma_event(ERR, "cache allocation failed\n"); 446 - goto allocate_cache_failed; 447 - } 448 - 449 - INIT_WORK(&sc->idle.immediate_work, send_immediate_empty_message); 450 - INIT_DELAYED_WORK(&sc->idle.timer_work, idle_connection_timer); 451 - /* 452 - * start with the negotiate timeout and SMBDIRECT_KEEPALIVE_PENDING 453 - * so that the timer will cause a disconnect. 454 - */ 455 - sc->idle.keepalive = SMBDIRECT_KEEPALIVE_PENDING; 456 - mod_delayed_work(sc->workqueue, &sc->idle.timer_work, 457 - msecs_to_jiffies(sp->negotiate_timeout_msec)); 458 - 459 - INIT_WORK(&sc->recv_io.posted.refill_work, smbd_post_send_credits); 460 - 461 - rc = smbd_negotiate(sc); 462 - if (rc) { 463 - log_rdma_event(ERR, "smbd_negotiate rc=%d\n", rc); 464 - goto negotiation_failed; 465 - } 466 - 467 - rc = allocate_mr_list(sc); 468 - if (rc) { 469 - log_rdma_mr(ERR, "memory registration allocation failed\n"); 470 - goto allocate_mr_failed; 471 - } 472 - 2166 + info->socket = sc; 473 2167 return info; 474 2168 475 - allocate_mr_failed: 476 - /* At this point, need to a full transport shutdown */ 477 - server->smbd_conn = info; 478 - smbd_destroy(server); 479 - return NULL; 480 - 481 - negotiation_failed: 482 - disable_delayed_work_sync(&sc->idle.timer_work); 483 - destroy_caches(sc); 484 - sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED; 485 - rdma_disconnect(sc->rdma.cm_id); 486 - wait_event(sc->status_wait, 487 - sc->status == SMBDIRECT_SOCKET_DISCONNECTED); 488 - 489 - allocate_cache_failed: 490 - rdma_connect_failed: 491 - rdma_destroy_qp(sc->rdma.cm_id); 492 - 493 - create_qp_failed: 494 - alloc_cq_failed: 495 - if (sc->ib.send_cq) 496 - ib_free_cq(sc->ib.send_cq); 497 - if (sc->ib.recv_cq) 498 - ib_free_cq(sc->ib.recv_cq); 499 - 500 - ib_dealloc_pd(sc->ib.pd); 501 - 502 - alloc_pd_failed: 503 - config_failed: 504 - rdma_destroy_id(sc->rdma.cm_id); 505 - 506 - create_id_failed: 507 - destroy_workqueue(sc->workqueue); 508 - create_wq_failed: 2169 + connect_failed: 2170 + set_settings_failed: 2171 + set_params_failed: 2172 + smbdirect_socket_release(sc); 2173 + socket_init_failed: 509 2174 kfree(info); 510 2175 return NULL; 2176 + } 2177 + 2178 + const struct smbdirect_socket_parameters *smbd_get_parameters(struct smbd_connection *conn) 2179 + { 2180 + if (unlikely(!conn->socket)) { 2181 + static const struct smbdirect_socket_parameters zero_params; 2182 + 2183 + return &zero_params; 2184 + } 2185 + 2186 + return smbdirect_socket_get_current_parameters(conn->socket); 511 2187 } 512 2188 513 2189 struct smbd_connection *smbd_get_connection( ··· 370 2362 if (!ret) 371 2363 return NULL; 372 2364 373 - sp = &ret->socket.parameters; 2365 + sp = smbd_get_parameters(ret); 374 2366 375 2367 server->rdma_readwrite_threshold = 376 2368 rdma_readwrite_threshold > sp->max_fragmented_send_size ? ··· 396 2388 */ 397 2389 int smbd_recv(struct smbd_connection *info, struct msghdr *msg) 398 2390 { 399 - struct smbdirect_socket *sc = &info->socket; 400 - struct smbdirect_recv_io *response; 401 - struct smbdirect_data_transfer *data_transfer; 402 - size_t size = iov_iter_count(&msg->msg_iter); 403 - int to_copy, to_read, data_read, offset; 404 - u32 data_length, remaining_data_length, data_offset; 405 - int rc; 2391 + struct smbdirect_socket *sc = info->socket; 406 2392 407 - if (WARN_ON_ONCE(iov_iter_rw(&msg->msg_iter) == WRITE)) 408 - return -EINVAL; /* It's a bug in upper layer to get there */ 2393 + if (!smbdirect_connection_is_connected(sc)) 2394 + return -ENOTCONN; 409 2395 410 - again: 411 - /* 412 - * No need to hold the reassembly queue lock all the time as we are 413 - * the only one reading from the front of the queue. The transport 414 - * may add more entries to the back of the queue at the same time 415 - */ 416 - log_read(INFO, "size=%zd sc->recv_io.reassembly.data_length=%d\n", size, 417 - sc->recv_io.reassembly.data_length); 418 - if (sc->recv_io.reassembly.data_length >= size) { 419 - int queue_length; 420 - int queue_removed = 0; 421 - unsigned long flags; 422 - 423 - /* 424 - * Need to make sure reassembly_data_length is read before 425 - * reading reassembly_queue_length and calling 426 - * _get_first_reassembly. This call is lock free 427 - * as we never read at the end of the queue which are being 428 - * updated in SOFTIRQ as more data is received 429 - */ 430 - virt_rmb(); 431 - queue_length = sc->recv_io.reassembly.queue_length; 432 - data_read = 0; 433 - to_read = size; 434 - offset = sc->recv_io.reassembly.first_entry_offset; 435 - while (data_read < size) { 436 - response = _get_first_reassembly(sc); 437 - data_transfer = smbdirect_recv_io_payload(response); 438 - data_length = le32_to_cpu(data_transfer->data_length); 439 - remaining_data_length = 440 - le32_to_cpu( 441 - data_transfer->remaining_data_length); 442 - data_offset = le32_to_cpu(data_transfer->data_offset); 443 - 444 - /* 445 - * The upper layer expects RFC1002 length at the 446 - * beginning of the payload. Return it to indicate 447 - * the total length of the packet. This minimize the 448 - * change to upper layer packet processing logic. This 449 - * will be eventually remove when an intermediate 450 - * transport layer is added 451 - */ 452 - if (response->first_segment && size == 4) { 453 - unsigned int rfc1002_len = 454 - data_length + remaining_data_length; 455 - __be32 rfc1002_hdr = cpu_to_be32(rfc1002_len); 456 - if (copy_to_iter(&rfc1002_hdr, sizeof(rfc1002_hdr), 457 - &msg->msg_iter) != sizeof(rfc1002_hdr)) 458 - return -EFAULT; 459 - data_read = 4; 460 - response->first_segment = false; 461 - log_read(INFO, "returning rfc1002 length %d\n", 462 - rfc1002_len); 463 - goto read_rfc1002_done; 464 - } 465 - 466 - to_copy = min_t(int, data_length - offset, to_read); 467 - if (copy_to_iter((char *)data_transfer + data_offset + offset, 468 - to_copy, &msg->msg_iter) != to_copy) 469 - return -EFAULT; 470 - 471 - /* move on to the next buffer? */ 472 - if (to_copy == data_length - offset) { 473 - queue_length--; 474 - /* 475 - * No need to lock if we are not at the 476 - * end of the queue 477 - */ 478 - if (queue_length) 479 - list_del(&response->list); 480 - else { 481 - spin_lock_irqsave( 482 - &sc->recv_io.reassembly.lock, flags); 483 - list_del(&response->list); 484 - spin_unlock_irqrestore( 485 - &sc->recv_io.reassembly.lock, flags); 486 - } 487 - queue_removed++; 488 - sc->statistics.dequeue_reassembly_queue++; 489 - put_receive_buffer(sc, response); 490 - offset = 0; 491 - log_read(INFO, "put_receive_buffer offset=0\n"); 492 - } else 493 - offset += to_copy; 494 - 495 - to_read -= to_copy; 496 - data_read += to_copy; 497 - 498 - log_read(INFO, "_get_first_reassembly memcpy %d bytes data_transfer_length-offset=%d after that to_read=%d data_read=%d offset=%d\n", 499 - to_copy, data_length - offset, 500 - to_read, data_read, offset); 501 - } 502 - 503 - spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); 504 - sc->recv_io.reassembly.data_length -= data_read; 505 - sc->recv_io.reassembly.queue_length -= queue_removed; 506 - spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); 507 - 508 - sc->recv_io.reassembly.first_entry_offset = offset; 509 - log_read(INFO, "returning to thread data_read=%d reassembly_data_length=%d first_entry_offset=%d\n", 510 - data_read, sc->recv_io.reassembly.data_length, 511 - sc->recv_io.reassembly.first_entry_offset); 512 - read_rfc1002_done: 513 - return data_read; 514 - } 515 - 516 - log_read(INFO, "wait_event on more data\n"); 517 - rc = wait_event_interruptible( 518 - sc->recv_io.reassembly.wait_queue, 519 - sc->recv_io.reassembly.data_length >= size || 520 - sc->status != SMBDIRECT_SOCKET_CONNECTED); 521 - /* Don't return any data if interrupted */ 522 - if (rc) 523 - return rc; 524 - 525 - if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { 526 - log_read(ERR, "disconnected\n"); 527 - return -ECONNABORTED; 528 - } 529 - 530 - goto again; 2396 + return smbdirect_connection_recvmsg(sc, msg, 0); 531 2397 } 532 2398 533 2399 /* ··· 414 2532 int num_rqst, struct smb_rqst *rqst_array) 415 2533 { 416 2534 struct smbd_connection *info = server->smbd_conn; 417 - struct smbdirect_socket *sc = &info->socket; 418 - struct smbdirect_socket_parameters *sp = &sc->parameters; 2535 + struct smbdirect_socket *sc = info->socket; 2536 + const struct smbdirect_socket_parameters *sp = smbd_get_parameters(info); 419 2537 struct smb_rqst *rqst; 420 2538 struct iov_iter iter; 421 - struct smbdirect_send_batch batch; 2539 + struct smbdirect_send_batch_storage bstorage; 2540 + struct smbdirect_send_batch *batch; 422 2541 unsigned int remaining_data_length, klen; 423 2542 int rc, i, rqst_idx; 424 2543 int error = 0; 425 2544 426 - if (sc->status != SMBDIRECT_SOCKET_CONNECTED) 2545 + if (!smbdirect_connection_is_connected(sc)) 427 2546 return -EAGAIN; 428 2547 429 2548 /* ··· 447 2564 num_rqst, remaining_data_length); 448 2565 449 2566 rqst_idx = 0; 450 - smbd_send_batch_init(&batch, false, 0); 2567 + batch = smbdirect_init_send_batch_storage(&bstorage, false, 0); 451 2568 do { 452 2569 rqst = &rqst_array[rqst_idx]; 453 2570 ··· 466 2583 klen += rqst->rq_iov[i].iov_len; 467 2584 iov_iter_kvec(&iter, ITER_SOURCE, rqst->rq_iov, rqst->rq_nvec, klen); 468 2585 469 - rc = smbd_post_send_full_iter(sc, &batch, &iter, &remaining_data_length); 2586 + rc = smbd_post_send_full_iter(sc, batch, &iter, remaining_data_length); 470 2587 if (rc < 0) { 471 2588 error = rc; 472 2589 break; 473 2590 } 2591 + remaining_data_length -= rc; 474 2592 475 2593 if (iov_iter_count(&rqst->rq_iter) > 0) { 476 2594 /* And then the data pages if there are any */ 477 - rc = smbd_post_send_full_iter(sc, &batch, &rqst->rq_iter, 478 - &remaining_data_length); 2595 + rc = smbd_post_send_full_iter(sc, batch, &rqst->rq_iter, 2596 + remaining_data_length); 479 2597 if (rc < 0) { 480 2598 error = rc; 481 2599 break; 482 2600 } 2601 + remaining_data_length -= rc; 483 2602 } 484 2603 485 2604 } while (++rqst_idx < num_rqst); 486 2605 487 - rc = smbd_send_batch_flush(sc, &batch, true); 2606 + rc = smbdirect_connection_send_batch_flush(sc, batch, true); 488 2607 if (unlikely(!rc && error)) 489 2608 rc = error; 490 2609 ··· 497 2612 * that means all the I/Os have been out and we are good to return 498 2613 */ 499 2614 500 - wait_event(sc->send_io.pending.zero_wait_queue, 501 - atomic_read(&sc->send_io.pending.count) == 0 || 502 - sc->status != SMBDIRECT_SOCKET_CONNECTED); 2615 + error = rc; 2616 + rc = smbdirect_connection_send_wait_zero_pending(sc); 2617 + if (unlikely(rc && !error)) 2618 + error = -EAGAIN; 503 2619 504 - if (sc->status != SMBDIRECT_SOCKET_CONNECTED && rc == 0) 505 - rc = -EAGAIN; 506 - 507 - return rc; 508 - } 509 - 510 - static void register_mr_done(struct ib_cq *cq, struct ib_wc *wc) 511 - { 512 - struct smbdirect_mr_io *mr = 513 - container_of(wc->wr_cqe, struct smbdirect_mr_io, cqe); 514 - struct smbdirect_socket *sc = mr->socket; 515 - 516 - if (wc->status) { 517 - log_rdma_mr(ERR, "status=%d\n", wc->status); 518 - smbd_disconnect_rdma_connection(sc); 519 - } 520 - } 521 - 522 - /* 523 - * The work queue function that recovers MRs 524 - * We need to call ib_dereg_mr() and ib_alloc_mr() before this MR can be used 525 - * again. Both calls are slow, so finish them in a workqueue. This will not 526 - * block I/O path. 527 - * There is one workqueue that recovers MRs, there is no need to lock as the 528 - * I/O requests calling smbd_register_mr will never update the links in the 529 - * mr_list. 530 - */ 531 - static void smbd_mr_recovery_work(struct work_struct *work) 532 - { 533 - struct smbdirect_socket *sc = 534 - container_of(work, struct smbdirect_socket, mr_io.recovery_work); 535 - struct smbdirect_socket_parameters *sp = &sc->parameters; 536 - struct smbdirect_mr_io *smbdirect_mr; 537 - int rc; 538 - 539 - list_for_each_entry(smbdirect_mr, &sc->mr_io.all.list, list) { 540 - if (smbdirect_mr->state == SMBDIRECT_MR_ERROR) { 541 - 542 - /* recover this MR entry */ 543 - rc = ib_dereg_mr(smbdirect_mr->mr); 544 - if (rc) { 545 - log_rdma_mr(ERR, 546 - "ib_dereg_mr failed rc=%x\n", 547 - rc); 548 - smbd_disconnect_rdma_connection(sc); 549 - continue; 550 - } 551 - 552 - smbdirect_mr->mr = ib_alloc_mr( 553 - sc->ib.pd, sc->mr_io.type, 554 - sp->max_frmr_depth); 555 - if (IS_ERR(smbdirect_mr->mr)) { 556 - log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x max_frmr_depth=%x\n", 557 - sc->mr_io.type, 558 - sp->max_frmr_depth); 559 - smbd_disconnect_rdma_connection(sc); 560 - continue; 561 - } 562 - } else 563 - /* This MR is being used, don't recover it */ 564 - continue; 565 - 566 - smbdirect_mr->state = SMBDIRECT_MR_READY; 567 - 568 - /* smbdirect_mr->state is updated by this function 569 - * and is read and updated by I/O issuing CPUs trying 570 - * to get a MR, the call to atomic_inc_return 571 - * implicates a memory barrier and guarantees this 572 - * value is updated before waking up any calls to 573 - * get_mr() from the I/O issuing CPUs 574 - */ 575 - if (atomic_inc_return(&sc->mr_io.ready.count) == 1) 576 - wake_up(&sc->mr_io.ready.wait_queue); 577 - } 578 - } 579 - 580 - static void smbd_mr_disable_locked(struct smbdirect_mr_io *mr) 581 - { 582 - struct smbdirect_socket *sc = mr->socket; 583 - 584 - lockdep_assert_held(&mr->mutex); 585 - 586 - if (mr->state == SMBDIRECT_MR_DISABLED) 587 - return; 588 - 589 - if (mr->mr) 590 - ib_dereg_mr(mr->mr); 591 - if (mr->sgt.nents) 592 - ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); 593 - kfree(mr->sgt.sgl); 594 - 595 - mr->mr = NULL; 596 - mr->sgt.sgl = NULL; 597 - mr->sgt.nents = 0; 598 - 599 - mr->state = SMBDIRECT_MR_DISABLED; 600 - } 601 - 602 - static void smbd_mr_free_locked(struct kref *kref) 603 - { 604 - struct smbdirect_mr_io *mr = 605 - container_of(kref, struct smbdirect_mr_io, kref); 606 - 607 - lockdep_assert_held(&mr->mutex); 608 - 609 - /* 610 - * smbd_mr_disable_locked() should already be called! 611 - */ 612 - if (WARN_ON_ONCE(mr->state != SMBDIRECT_MR_DISABLED)) 613 - smbd_mr_disable_locked(mr); 614 - 615 - mutex_unlock(&mr->mutex); 616 - mutex_destroy(&mr->mutex); 617 - kfree(mr); 618 - } 619 - 620 - static void destroy_mr_list(struct smbdirect_socket *sc) 621 - { 622 - struct smbdirect_mr_io *mr, *tmp; 623 - LIST_HEAD(all_list); 624 - unsigned long flags; 625 - 626 - disable_work_sync(&sc->mr_io.recovery_work); 627 - 628 - spin_lock_irqsave(&sc->mr_io.all.lock, flags); 629 - list_splice_tail_init(&sc->mr_io.all.list, &all_list); 630 - spin_unlock_irqrestore(&sc->mr_io.all.lock, flags); 631 - 632 - list_for_each_entry_safe(mr, tmp, &all_list, list) { 633 - mutex_lock(&mr->mutex); 634 - 635 - smbd_mr_disable_locked(mr); 636 - list_del(&mr->list); 637 - mr->socket = NULL; 638 - 639 - /* 640 - * No kref_put_mutex() as it's already locked. 641 - * 642 - * If smbd_mr_free_locked() is called 643 - * and the mutex is unlocked and mr is gone, 644 - * in that case kref_put() returned 1. 645 - * 646 - * If kref_put() returned 0 we know that 647 - * smbd_mr_free_locked() didn't 648 - * run. Not by us nor by anyone else, as we 649 - * still hold the mutex, so we need to unlock. 650 - * 651 - * If the mr is still registered it will 652 - * be dangling (detached from the connection 653 - * waiting for smbd_deregister_mr() to be 654 - * called in order to free the memory. 655 - */ 656 - if (!kref_put(&mr->kref, smbd_mr_free_locked)) 657 - mutex_unlock(&mr->mutex); 658 - } 659 - } 660 - 661 - /* 662 - * Allocate MRs used for RDMA read/write 663 - * The number of MRs will not exceed hardware capability in responder_resources 664 - * All MRs are kept in mr_list. The MR can be recovered after it's used 665 - * Recovery is done in smbd_mr_recovery_work. The content of list entry changes 666 - * as MRs are used and recovered for I/O, but the list links will not change 667 - */ 668 - static int allocate_mr_list(struct smbdirect_socket *sc) 669 - { 670 - struct smbdirect_socket_parameters *sp = &sc->parameters; 671 - struct smbdirect_mr_io *mr; 672 - int ret; 673 - u32 i; 674 - 675 - if (sp->responder_resources == 0) { 676 - log_rdma_mr(ERR, "responder_resources negotiated as 0\n"); 677 - return -EINVAL; 678 - } 679 - 680 - /* Allocate more MRs (2x) than hardware responder_resources */ 681 - for (i = 0; i < sp->responder_resources * 2; i++) { 682 - mr = kzalloc_obj(*mr); 683 - if (!mr) { 684 - ret = -ENOMEM; 685 - goto kzalloc_mr_failed; 686 - } 687 - 688 - kref_init(&mr->kref); 689 - mutex_init(&mr->mutex); 690 - 691 - mr->mr = ib_alloc_mr(sc->ib.pd, 692 - sc->mr_io.type, 693 - sp->max_frmr_depth); 694 - if (IS_ERR(mr->mr)) { 695 - ret = PTR_ERR(mr->mr); 696 - log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x max_frmr_depth=%x\n", 697 - sc->mr_io.type, sp->max_frmr_depth); 698 - goto ib_alloc_mr_failed; 699 - } 700 - 701 - mr->sgt.sgl = kzalloc_objs(struct scatterlist, 702 - sp->max_frmr_depth); 703 - if (!mr->sgt.sgl) { 704 - ret = -ENOMEM; 705 - log_rdma_mr(ERR, "failed to allocate sgl\n"); 706 - goto kcalloc_sgl_failed; 707 - } 708 - mr->state = SMBDIRECT_MR_READY; 709 - mr->socket = sc; 710 - 711 - list_add_tail(&mr->list, &sc->mr_io.all.list); 712 - atomic_inc(&sc->mr_io.ready.count); 713 - } 714 - 715 - INIT_WORK(&sc->mr_io.recovery_work, smbd_mr_recovery_work); 2620 + if (unlikely(error)) 2621 + return error; 716 2622 717 2623 return 0; 718 - 719 - kcalloc_sgl_failed: 720 - ib_dereg_mr(mr->mr); 721 - ib_alloc_mr_failed: 722 - mutex_destroy(&mr->mutex); 723 - kfree(mr); 724 - kzalloc_mr_failed: 725 - destroy_mr_list(sc); 726 - return ret; 727 - } 728 - 729 - /* 730 - * Get a MR from mr_list. This function waits until there is at least one 731 - * MR available in the list. It may access the list while the 732 - * smbd_mr_recovery_work is recovering the MR list. This doesn't need a lock 733 - * as they never modify the same places. However, there may be several CPUs 734 - * issuing I/O trying to get MR at the same time, mr_list_lock is used to 735 - * protect this situation. 736 - */ 737 - static struct smbdirect_mr_io *get_mr(struct smbdirect_socket *sc) 738 - { 739 - struct smbdirect_mr_io *ret; 740 - unsigned long flags; 741 - int rc; 742 - again: 743 - rc = wait_event_interruptible(sc->mr_io.ready.wait_queue, 744 - atomic_read(&sc->mr_io.ready.count) || 745 - sc->status != SMBDIRECT_SOCKET_CONNECTED); 746 - if (rc) { 747 - log_rdma_mr(ERR, "wait_event_interruptible rc=%x\n", rc); 748 - return NULL; 749 - } 750 - 751 - if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { 752 - log_rdma_mr(ERR, "sc->status=%x\n", sc->status); 753 - return NULL; 754 - } 755 - 756 - spin_lock_irqsave(&sc->mr_io.all.lock, flags); 757 - list_for_each_entry(ret, &sc->mr_io.all.list, list) { 758 - if (ret->state == SMBDIRECT_MR_READY) { 759 - ret->state = SMBDIRECT_MR_REGISTERED; 760 - kref_get(&ret->kref); 761 - spin_unlock_irqrestore(&sc->mr_io.all.lock, flags); 762 - atomic_dec(&sc->mr_io.ready.count); 763 - atomic_inc(&sc->mr_io.used.count); 764 - return ret; 765 - } 766 - } 767 - 768 - spin_unlock_irqrestore(&sc->mr_io.all.lock, flags); 769 - /* 770 - * It is possible that we could fail to get MR because other processes may 771 - * try to acquire a MR at the same time. If this is the case, retry it. 772 - */ 773 - goto again; 774 - } 775 - 776 - /* 777 - * Transcribe the pages from an iterator into an MR scatterlist. 778 - */ 779 - static int smbd_iter_to_mr(struct iov_iter *iter, 780 - struct sg_table *sgt, 781 - unsigned int max_sg) 782 - { 783 - int ret; 784 - 785 - memset(sgt->sgl, 0, max_sg * sizeof(struct scatterlist)); 786 - 787 - ret = extract_iter_to_sg(iter, iov_iter_count(iter), sgt, max_sg, 0); 788 - WARN_ON(ret < 0); 789 - if (sgt->nents > 0) 790 - sg_mark_end(&sgt->sgl[sgt->nents - 1]); 791 - return ret; 792 2624 } 793 2625 794 2626 /* ··· 519 2917 struct iov_iter *iter, 520 2918 bool writing, bool need_invalidate) 521 2919 { 522 - struct smbdirect_socket *sc = &info->socket; 523 - struct smbdirect_socket_parameters *sp = &sc->parameters; 524 - struct smbdirect_mr_io *mr; 525 - int rc, num_pages; 526 - struct ib_reg_wr *reg_wr; 2920 + struct smbdirect_socket *sc = info->socket; 527 2921 528 - num_pages = iov_iter_npages(iter, sp->max_frmr_depth + 1); 529 - if (num_pages > sp->max_frmr_depth) { 530 - log_rdma_mr(ERR, "num_pages=%d max_frmr_depth=%d\n", 531 - num_pages, sp->max_frmr_depth); 532 - WARN_ON_ONCE(1); 2922 + if (!smbdirect_connection_is_connected(sc)) 533 2923 return NULL; 534 - } 535 2924 536 - mr = get_mr(sc); 537 - if (!mr) { 538 - log_rdma_mr(ERR, "get_mr returning NULL\n"); 539 - return NULL; 540 - } 541 - 542 - mutex_lock(&mr->mutex); 543 - 544 - mr->dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; 545 - mr->need_invalidate = need_invalidate; 546 - mr->sgt.nents = 0; 547 - mr->sgt.orig_nents = 0; 548 - 549 - log_rdma_mr(INFO, "num_pages=0x%x count=0x%zx depth=%u\n", 550 - num_pages, iov_iter_count(iter), sp->max_frmr_depth); 551 - smbd_iter_to_mr(iter, &mr->sgt, sp->max_frmr_depth); 552 - 553 - rc = ib_dma_map_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); 554 - if (!rc) { 555 - log_rdma_mr(ERR, "ib_dma_map_sg num_pages=%x dir=%x rc=%x\n", 556 - num_pages, mr->dir, rc); 557 - goto dma_map_error; 558 - } 559 - 560 - rc = ib_map_mr_sg(mr->mr, mr->sgt.sgl, mr->sgt.nents, NULL, PAGE_SIZE); 561 - if (rc != mr->sgt.nents) { 562 - log_rdma_mr(ERR, 563 - "ib_map_mr_sg failed rc = %d nents = %x\n", 564 - rc, mr->sgt.nents); 565 - goto map_mr_error; 566 - } 567 - 568 - ib_update_fast_reg_key(mr->mr, ib_inc_rkey(mr->mr->rkey)); 569 - reg_wr = &mr->wr; 570 - reg_wr->wr.opcode = IB_WR_REG_MR; 571 - mr->cqe.done = register_mr_done; 572 - reg_wr->wr.wr_cqe = &mr->cqe; 573 - reg_wr->wr.num_sge = 0; 574 - reg_wr->wr.send_flags = IB_SEND_SIGNALED; 575 - reg_wr->mr = mr->mr; 576 - reg_wr->key = mr->mr->rkey; 577 - reg_wr->access = writing ? 578 - IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : 579 - IB_ACCESS_REMOTE_READ; 580 - 581 - /* 582 - * There is no need for waiting for complemtion on ib_post_send 583 - * on IB_WR_REG_MR. Hardware enforces a barrier and order of execution 584 - * on the next ib_post_send when we actually send I/O to remote peer 585 - */ 586 - rc = ib_post_send(sc->ib.qp, &reg_wr->wr, NULL); 587 - if (!rc) { 588 - /* 589 - * get_mr() gave us a reference 590 - * via kref_get(&mr->kref), we keep that and let 591 - * the caller use smbd_deregister_mr() 592 - * to remove it again. 593 - */ 594 - mutex_unlock(&mr->mutex); 595 - return mr; 596 - } 597 - 598 - log_rdma_mr(ERR, "ib_post_send failed rc=%x reg_wr->key=%x\n", 599 - rc, reg_wr->key); 600 - 601 - /* If all failed, attempt to recover this MR by setting it SMBDIRECT_MR_ERROR*/ 602 - map_mr_error: 603 - ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); 604 - 605 - dma_map_error: 606 - mr->sgt.nents = 0; 607 - mr->state = SMBDIRECT_MR_ERROR; 608 - if (atomic_dec_and_test(&sc->mr_io.used.count)) 609 - wake_up(&sc->mr_io.cleanup.wait_queue); 610 - 611 - smbd_disconnect_rdma_connection(sc); 612 - 613 - /* 614 - * get_mr() gave us a reference 615 - * via kref_get(&mr->kref), we need to remove it again 616 - * on error. 617 - * 618 - * No kref_put_mutex() as it's already locked. 619 - * 620 - * If smbd_mr_free_locked() is called 621 - * and the mutex is unlocked and mr is gone, 622 - * in that case kref_put() returned 1. 623 - * 624 - * If kref_put() returned 0 we know that 625 - * smbd_mr_free_locked() didn't 626 - * run. Not by us nor by anyone else, as we 627 - * still hold the mutex, so we need to unlock. 628 - */ 629 - if (!kref_put(&mr->kref, smbd_mr_free_locked)) 630 - mutex_unlock(&mr->mutex); 631 - 632 - return NULL; 2925 + return smbdirect_connection_register_mr_io(sc, iter, writing, need_invalidate); 633 2926 } 634 2927 635 - static void local_inv_done(struct ib_cq *cq, struct ib_wc *wc) 2928 + void smbd_mr_fill_buffer_descriptor(struct smbdirect_mr_io *mr, 2929 + struct smbdirect_buffer_descriptor_v1 *v1) 636 2930 { 637 - struct smbdirect_mr_io *smbdirect_mr; 638 - struct ib_cqe *cqe; 639 - 640 - cqe = wc->wr_cqe; 641 - smbdirect_mr = container_of(cqe, struct smbdirect_mr_io, cqe); 642 - smbdirect_mr->state = SMBDIRECT_MR_INVALIDATED; 643 - if (wc->status != IB_WC_SUCCESS) { 644 - log_rdma_mr(ERR, "invalidate failed status=%x\n", wc->status); 645 - smbdirect_mr->state = SMBDIRECT_MR_ERROR; 646 - } 647 - complete(&smbdirect_mr->invalidate_done); 2931 + smbdirect_mr_io_fill_buffer_descriptor(mr, v1); 648 2932 } 649 2933 650 2934 /* ··· 541 3053 */ 542 3054 void smbd_deregister_mr(struct smbdirect_mr_io *mr) 543 3055 { 544 - struct smbdirect_socket *sc = mr->socket; 545 - 546 - mutex_lock(&mr->mutex); 547 - if (mr->state == SMBDIRECT_MR_DISABLED) 548 - goto put_kref; 549 - 550 - if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { 551 - smbd_mr_disable_locked(mr); 552 - goto put_kref; 553 - } 554 - 555 - if (mr->need_invalidate) { 556 - struct ib_send_wr *wr = &mr->inv_wr; 557 - int rc; 558 - 559 - /* Need to finish local invalidation before returning */ 560 - wr->opcode = IB_WR_LOCAL_INV; 561 - mr->cqe.done = local_inv_done; 562 - wr->wr_cqe = &mr->cqe; 563 - wr->num_sge = 0; 564 - wr->ex.invalidate_rkey = mr->mr->rkey; 565 - wr->send_flags = IB_SEND_SIGNALED; 566 - 567 - init_completion(&mr->invalidate_done); 568 - rc = ib_post_send(sc->ib.qp, wr, NULL); 569 - if (rc) { 570 - log_rdma_mr(ERR, "ib_post_send failed rc=%x\n", rc); 571 - smbd_mr_disable_locked(mr); 572 - smbd_disconnect_rdma_connection(sc); 573 - goto done; 574 - } 575 - wait_for_completion(&mr->invalidate_done); 576 - mr->need_invalidate = false; 577 - } else 578 - /* 579 - * For remote invalidation, just set it to SMBDIRECT_MR_INVALIDATED 580 - * and defer to mr_recovery_work to recover the MR for next use 581 - */ 582 - mr->state = SMBDIRECT_MR_INVALIDATED; 583 - 584 - if (mr->sgt.nents) { 585 - ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); 586 - mr->sgt.nents = 0; 587 - } 588 - 589 - if (mr->state == SMBDIRECT_MR_INVALIDATED) { 590 - mr->state = SMBDIRECT_MR_READY; 591 - if (atomic_inc_return(&sc->mr_io.ready.count) == 1) 592 - wake_up(&sc->mr_io.ready.wait_queue); 593 - } else 594 - /* 595 - * Schedule the work to do MR recovery for future I/Os MR 596 - * recovery is slow and don't want it to block current I/O 597 - */ 598 - queue_work(sc->workqueue, &sc->mr_io.recovery_work); 599 - 600 - done: 601 - if (atomic_dec_and_test(&sc->mr_io.used.count)) 602 - wake_up(&sc->mr_io.cleanup.wait_queue); 603 - 604 - put_kref: 605 - /* 606 - * No kref_put_mutex() as it's already locked. 607 - * 608 - * If smbd_mr_free_locked() is called 609 - * and the mutex is unlocked and mr is gone, 610 - * in that case kref_put() returned 1. 611 - * 612 - * If kref_put() returned 0 we know that 613 - * smbd_mr_free_locked() didn't 614 - * run. Not by us nor by anyone else, as we 615 - * still hold the mutex, so we need to unlock 616 - * and keep the mr in SMBDIRECT_MR_READY or 617 - * SMBDIRECT_MR_ERROR state. 618 - */ 619 - if (!kref_put(&mr->kref, smbd_mr_free_locked)) 620 - mutex_unlock(&mr->mutex); 3056 + smbdirect_connection_deregister_mr_io(mr); 621 3057 } 622 3058 623 - static bool smb_set_sge(struct smb_extract_to_rdma *rdma, 624 - struct page *lowest_page, size_t off, size_t len) 3059 + void smbd_debug_proc_show(struct TCP_Server_Info *server, struct seq_file *m) 625 3060 { 626 - struct ib_sge *sge = &rdma->sge[rdma->nr_sge]; 627 - u64 addr; 3061 + if (!server->rdma) 3062 + return; 628 3063 629 - addr = ib_dma_map_page(rdma->device, lowest_page, 630 - off, len, rdma->direction); 631 - if (ib_dma_mapping_error(rdma->device, addr)) 632 - return false; 633 - 634 - sge->addr = addr; 635 - sge->length = len; 636 - sge->lkey = rdma->local_dma_lkey; 637 - rdma->nr_sge++; 638 - return true; 639 - } 640 - 641 - /* 642 - * Extract page fragments from a BVEC-class iterator and add them to an RDMA 643 - * element list. The pages are not pinned. 644 - */ 645 - static ssize_t smb_extract_bvec_to_rdma(struct iov_iter *iter, 646 - struct smb_extract_to_rdma *rdma, 647 - ssize_t maxsize) 648 - { 649 - const struct bio_vec *bv = iter->bvec; 650 - unsigned long start = iter->iov_offset; 651 - unsigned int i; 652 - ssize_t ret = 0; 653 - 654 - for (i = 0; i < iter->nr_segs; i++) { 655 - size_t off, len; 656 - 657 - len = bv[i].bv_len; 658 - if (start >= len) { 659 - start -= len; 660 - continue; 661 - } 662 - 663 - len = min_t(size_t, maxsize, len - start); 664 - off = bv[i].bv_offset + start; 665 - 666 - if (!smb_set_sge(rdma, bv[i].bv_page, off, len)) 667 - return -EIO; 668 - 669 - ret += len; 670 - maxsize -= len; 671 - if (rdma->nr_sge >= rdma->max_sge || maxsize <= 0) 672 - break; 673 - start = 0; 3064 + if (!server->smbd_conn) { 3065 + seq_puts(m, "\nSMBDirect transport not available"); 3066 + return; 674 3067 } 675 3068 676 - if (ret > 0) 677 - iov_iter_advance(iter, ret); 678 - return ret; 679 - } 680 - 681 - /* 682 - * Extract fragments from a KVEC-class iterator and add them to an RDMA list. 683 - * This can deal with vmalloc'd buffers as well as kmalloc'd or static buffers. 684 - * The pages are not pinned. 685 - */ 686 - static ssize_t smb_extract_kvec_to_rdma(struct iov_iter *iter, 687 - struct smb_extract_to_rdma *rdma, 688 - ssize_t maxsize) 689 - { 690 - const struct kvec *kv = iter->kvec; 691 - unsigned long start = iter->iov_offset; 692 - unsigned int i; 693 - ssize_t ret = 0; 694 - 695 - for (i = 0; i < iter->nr_segs; i++) { 696 - struct page *page; 697 - unsigned long kaddr; 698 - size_t off, len, seg; 699 - 700 - len = kv[i].iov_len; 701 - if (start >= len) { 702 - start -= len; 703 - continue; 704 - } 705 - 706 - kaddr = (unsigned long)kv[i].iov_base + start; 707 - off = kaddr & ~PAGE_MASK; 708 - len = min_t(size_t, maxsize, len - start); 709 - kaddr &= PAGE_MASK; 710 - 711 - maxsize -= len; 712 - do { 713 - seg = min_t(size_t, len, PAGE_SIZE - off); 714 - 715 - if (is_vmalloc_or_module_addr((void *)kaddr)) 716 - page = vmalloc_to_page((void *)kaddr); 717 - else 718 - page = virt_to_page((void *)kaddr); 719 - 720 - if (!smb_set_sge(rdma, page, off, seg)) 721 - return -EIO; 722 - 723 - ret += seg; 724 - len -= seg; 725 - kaddr += PAGE_SIZE; 726 - off = 0; 727 - } while (len > 0 && rdma->nr_sge < rdma->max_sge); 728 - 729 - if (rdma->nr_sge >= rdma->max_sge || maxsize <= 0) 730 - break; 731 - start = 0; 732 - } 733 - 734 - if (ret > 0) 735 - iov_iter_advance(iter, ret); 736 - return ret; 737 - } 738 - 739 - /* 740 - * Extract folio fragments from a FOLIOQ-class iterator and add them to an RDMA 741 - * list. The folios are not pinned. 742 - */ 743 - static ssize_t smb_extract_folioq_to_rdma(struct iov_iter *iter, 744 - struct smb_extract_to_rdma *rdma, 745 - ssize_t maxsize) 746 - { 747 - const struct folio_queue *folioq = iter->folioq; 748 - unsigned int slot = iter->folioq_slot; 749 - ssize_t ret = 0; 750 - size_t offset = iter->iov_offset; 751 - 752 - BUG_ON(!folioq); 753 - 754 - if (slot >= folioq_nr_slots(folioq)) { 755 - folioq = folioq->next; 756 - if (WARN_ON_ONCE(!folioq)) 757 - return -EIO; 758 - slot = 0; 759 - } 760 - 761 - do { 762 - struct folio *folio = folioq_folio(folioq, slot); 763 - size_t fsize = folioq_folio_size(folioq, slot); 764 - 765 - if (offset < fsize) { 766 - size_t part = umin(maxsize, fsize - offset); 767 - 768 - if (!smb_set_sge(rdma, folio_page(folio, 0), offset, part)) 769 - return -EIO; 770 - 771 - offset += part; 772 - ret += part; 773 - maxsize -= part; 774 - } 775 - 776 - if (offset >= fsize) { 777 - offset = 0; 778 - slot++; 779 - if (slot >= folioq_nr_slots(folioq)) { 780 - if (!folioq->next) { 781 - WARN_ON_ONCE(ret < iter->count); 782 - break; 783 - } 784 - folioq = folioq->next; 785 - slot = 0; 786 - } 787 - } 788 - } while (rdma->nr_sge < rdma->max_sge && maxsize > 0); 789 - 790 - iter->folioq = folioq; 791 - iter->folioq_slot = slot; 792 - iter->iov_offset = offset; 793 - iter->count -= ret; 794 - return ret; 795 - } 796 - 797 - /* 798 - * Extract page fragments from up to the given amount of the source iterator 799 - * and build up an RDMA list that refers to all of those bits. The RDMA list 800 - * is appended to, up to the maximum number of elements set in the parameter 801 - * block. 802 - * 803 - * The extracted page fragments are not pinned or ref'd in any way; if an 804 - * IOVEC/UBUF-type iterator is to be used, it should be converted to a 805 - * BVEC-type iterator and the pages pinned, ref'd or otherwise held in some 806 - * way. 807 - */ 808 - static ssize_t smb_extract_iter_to_rdma(struct iov_iter *iter, size_t len, 809 - struct smb_extract_to_rdma *rdma) 810 - { 811 - ssize_t ret; 812 - int before = rdma->nr_sge; 813 - 814 - switch (iov_iter_type(iter)) { 815 - case ITER_BVEC: 816 - ret = smb_extract_bvec_to_rdma(iter, rdma, len); 817 - break; 818 - case ITER_KVEC: 819 - ret = smb_extract_kvec_to_rdma(iter, rdma, len); 820 - break; 821 - case ITER_FOLIOQ: 822 - ret = smb_extract_folioq_to_rdma(iter, rdma, len); 823 - break; 824 - default: 825 - WARN_ON_ONCE(1); 826 - return -EIO; 827 - } 828 - 829 - if (ret < 0) { 830 - while (rdma->nr_sge > before) { 831 - struct ib_sge *sge = &rdma->sge[rdma->nr_sge--]; 832 - 833 - ib_dma_unmap_single(rdma->device, sge->addr, sge->length, 834 - rdma->direction); 835 - sge->addr = 0; 836 - } 837 - } 838 - 839 - return ret; 3069 + smbdirect_connection_legacy_debug_proc_show(server->smbd_conn->socket, 3070 + server->rdma_readwrite_threshold, 3071 + m); 840 3072 }
+5 -14
fs/smb/client/smbdirect.h
··· 11 11 #define cifs_rdma_enabled(server) ((server)->rdma) 12 12 13 13 #include "cifsglob.h" 14 - #include <rdma/ib_verbs.h> 15 - #include <rdma/rdma_cm.h> 16 - #include <linux/mempool.h> 17 14 18 15 #include "../common/smbdirect/smbdirect.h" 19 - #include "../common/smbdirect/smbdirect_socket.h" 20 16 21 17 extern int rdma_readwrite_threshold; 22 18 extern int smbd_max_frmr_depth; ··· 23 27 extern int smbd_send_credit_target; 24 28 extern int smbd_receive_credit_max; 25 29 26 - /* 27 - * The context for the SMBDirect transport 28 - * Everything related to the transport is here. It has several logical parts 29 - * 1. RDMA related structures 30 - * 2. SMBDirect connection parameters 31 - * 3. Memory registrations 32 - * 4. Receive and reassembly queues for data receive path 33 - * 5. mempools for allocating packets 34 - */ 35 30 struct smbd_connection { 36 - struct smbdirect_socket socket; 31 + struct smbdirect_socket *socket; 37 32 }; 38 33 39 34 /* Create a SMBDirect session */ ··· 47 60 struct smbdirect_mr_io *smbd_register_mr( 48 61 struct smbd_connection *info, struct iov_iter *iter, 49 62 bool writing, bool need_invalidate); 63 + void smbd_mr_fill_buffer_descriptor(struct smbdirect_mr_io *mr, 64 + struct smbdirect_buffer_descriptor_v1 *v1); 50 65 void smbd_deregister_mr(struct smbdirect_mr_io *mr); 66 + 67 + void smbd_debug_proc_show(struct TCP_Server_Info *server, struct seq_file *m); 51 68 52 69 #else 53 70 #define cifs_rdma_enabled(server) 0
+1
fs/smb/common/Makefile
··· 4 4 # 5 5 6 6 obj-$(CONFIG_SMBFS) += cifs_md4.o 7 + obj-$(CONFIG_SMB_COMMON_SMBDIRECT) += smbdirect/
+9
fs/smb/common/smbdirect/Kconfig
··· 1 + # SPDX-License-Identifier: GPL-2.0-or-later 2 + # 3 + # smbdirect configuration 4 + 5 + config SMB_COMMON_SMBDIRECT 6 + def_tristate n 7 + depends on INFINIBAND && INFINIBAND_ADDR_TRANS 8 + depends on m || INFINIBAND=y 9 + select SG_POOL
+18
fs/smb/common/smbdirect/Makefile
··· 1 + # SPDX-License-Identifier: GPL-2.0-or-later 2 + # 3 + # Makefile for smbdirect support 4 + # 5 + 6 + obj-$(CONFIG_SMB_COMMON_SMBDIRECT) += smbdirect.o 7 + 8 + smbdirect-y := \ 9 + smbdirect_socket.o \ 10 + smbdirect_connection.o \ 11 + smbdirect_mr.o \ 12 + smbdirect_rw.o \ 13 + smbdirect_debug.o \ 14 + smbdirect_connect.o \ 15 + smbdirect_listen.o \ 16 + smbdirect_accept.o \ 17 + smbdirect_devices.o \ 18 + smbdirect_main.o
+10 -4
fs/smb/common/smbdirect/smbdirect.h
··· 1 1 /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 2 /* 3 - * Copyright (C) 2017, Microsoft Corporation. 4 - * Copyright (C) 2018, LG Electronics. 3 + * Copyright (C) 2025 Stefan Metzmacher 5 4 */ 6 5 7 6 #ifndef __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_H__ ··· 24 25 * Some values are important for the upper layer. 25 26 */ 26 27 struct smbdirect_socket_parameters { 28 + __u64 flags; 29 + #define SMBDIRECT_FLAG_PORT_RANGE_ONLY_IB ((__u64)0x1) 30 + #define SMBDIRECT_FLAG_PORT_RANGE_ONLY_IW ((__u64)0x2) 27 31 __u32 resolve_addr_timeout_msec; 28 32 __u32 resolve_route_timeout_msec; 29 33 __u32 rdma_connect_timeout_msec; 30 34 __u32 negotiate_timeout_msec; 31 - __u8 initiator_depth; 32 - __u8 responder_resources; 35 + __u16 initiator_depth; /* limited to U8_MAX */ 36 + __u16 responder_resources; /* limited to U8_MAX */ 33 37 __u16 recv_credit_max; 34 38 __u16 send_credit_target; 35 39 __u32 max_send_size; ··· 44 42 __u32 keepalive_interval_msec; 45 43 __u32 keepalive_timeout_msec; 46 44 } __packed; 45 + 46 + #define SMBDIRECT_FLAG_PORT_RANGE_MASK ( \ 47 + SMBDIRECT_FLAG_PORT_RANGE_ONLY_IB | \ 48 + SMBDIRECT_FLAG_PORT_RANGE_ONLY_IW) 47 49 48 50 #endif /* __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_H__ */
+857
fs/smb/common/smbdirect/smbdirect_accept.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* 3 + * Copyright (C) 2017, Microsoft Corporation. 4 + * Copyright (C) 2018, LG Electronics. 5 + * Copyright (c) 2025, Stefan Metzmacher 6 + */ 7 + 8 + #include "smbdirect_internal.h" 9 + #include <net/sock.h> 10 + #include "../../common/smb2status.h" 11 + 12 + static int smbdirect_accept_rdma_event_handler(struct rdma_cm_id *id, 13 + struct rdma_cm_event *event); 14 + static int smbdirect_accept_init_params(struct smbdirect_socket *sc); 15 + static void smbdirect_accept_negotiate_recv_done(struct ib_cq *cq, struct ib_wc *wc); 16 + static void smbdirect_accept_negotiate_send_done(struct ib_cq *cq, struct ib_wc *wc); 17 + 18 + int smbdirect_accept_connect_request(struct smbdirect_socket *sc, 19 + const struct rdma_conn_param *param) 20 + { 21 + struct smbdirect_socket_parameters *sp = &sc->parameters; 22 + struct smbdirect_recv_io *recv_io; 23 + u8 peer_initiator_depth; 24 + u8 peer_responder_resources; 25 + struct rdma_conn_param conn_param; 26 + __be32 ird_ord_hdr[2]; 27 + int ret; 28 + 29 + if (SMBDIRECT_CHECK_STATUS_WARN(sc, SMBDIRECT_SOCKET_CREATED)) 30 + return -EINVAL; 31 + 32 + /* 33 + * First set what the we as server are able to support 34 + */ 35 + sp->initiator_depth = min_t(u8, sp->initiator_depth, 36 + sc->ib.dev->attrs.max_qp_rd_atom); 37 + 38 + peer_initiator_depth = param->initiator_depth; 39 + peer_responder_resources = param->responder_resources; 40 + smbdirect_connection_negotiate_rdma_resources(sc, 41 + peer_initiator_depth, 42 + peer_responder_resources, 43 + param); 44 + 45 + ret = smbdirect_accept_init_params(sc); 46 + if (ret) { 47 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 48 + "smbdirect_accept_init_params() failed %1pe\n", 49 + SMBDIRECT_DEBUG_ERR_PTR(ret)); 50 + goto init_params_failed; 51 + } 52 + 53 + ret = smbdirect_connection_create_qp(sc); 54 + if (ret) { 55 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 56 + "smbdirect_connection_create_qp() failed %1pe\n", 57 + SMBDIRECT_DEBUG_ERR_PTR(ret)); 58 + goto create_qp_failed; 59 + } 60 + 61 + ret = smbdirect_connection_create_mem_pools(sc); 62 + if (ret) { 63 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 64 + "smbdirect_connection_create_mem_pools() failed %1pe\n", 65 + SMBDIRECT_DEBUG_ERR_PTR(ret)); 66 + goto create_mem_failed; 67 + } 68 + 69 + recv_io = smbdirect_connection_get_recv_io(sc); 70 + if (WARN_ON_ONCE(!recv_io)) { 71 + ret = -EINVAL; 72 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 73 + "smbdirect_connection_get_recv_io() failed %1pe\n", 74 + SMBDIRECT_DEBUG_ERR_PTR(ret)); 75 + goto get_recv_io_failed; 76 + } 77 + recv_io->cqe.done = smbdirect_accept_negotiate_recv_done; 78 + 79 + /* 80 + * Now post the recv_io buffer in order to get 81 + * the negotiate request 82 + */ 83 + sc->recv_io.expected = SMBDIRECT_EXPECT_NEGOTIATE_REQ; 84 + ret = smbdirect_connection_post_recv_io(recv_io); 85 + if (ret) { 86 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 87 + "smbdirect_connection_post_recv_io() failed %1pe\n", 88 + SMBDIRECT_DEBUG_ERR_PTR(ret)); 89 + goto post_recv_io_failed; 90 + } 91 + /* 92 + * From here recv_io is known to the RDMA QP and needs ib_drain_qp and 93 + * smbdirect_accept_negotiate_recv_done to cleanup... 94 + */ 95 + recv_io = NULL; 96 + 97 + /* already checked with SMBDIRECT_CHECK_STATUS_WARN above */ 98 + WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_CREATED); 99 + sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED; 100 + 101 + /* 102 + * We already negotiated sp->initiator_depth 103 + * and sp->responder_resources above. 104 + */ 105 + memset(&conn_param, 0, sizeof(conn_param)); 106 + conn_param.initiator_depth = sp->initiator_depth; 107 + conn_param.responder_resources = sp->responder_resources; 108 + 109 + if (sc->rdma.legacy_iwarp) { 110 + ird_ord_hdr[0] = cpu_to_be32(conn_param.responder_resources); 111 + ird_ord_hdr[1] = cpu_to_be32(conn_param.initiator_depth); 112 + conn_param.private_data = ird_ord_hdr; 113 + conn_param.private_data_len = sizeof(ird_ord_hdr); 114 + } else { 115 + conn_param.private_data = NULL; 116 + conn_param.private_data_len = 0; 117 + } 118 + conn_param.retry_count = SMBDIRECT_RDMA_CM_RETRY; 119 + conn_param.rnr_retry_count = SMBDIRECT_RDMA_CM_RNR_RETRY; 120 + conn_param.flow_control = 0; 121 + 122 + /* explicitly set above */ 123 + WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED); 124 + sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING; 125 + sc->rdma.expected_event = RDMA_CM_EVENT_ESTABLISHED; 126 + sc->rdma.cm_id->event_handler = smbdirect_accept_rdma_event_handler; 127 + ret = rdma_accept(sc->rdma.cm_id, &conn_param); 128 + if (ret) { 129 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 130 + "rdma_accept() failed %1pe\n", 131 + SMBDIRECT_DEBUG_ERR_PTR(ret)); 132 + goto rdma_accept_failed; 133 + } 134 + 135 + /* 136 + * start with the negotiate timeout and SMBDIRECT_KEEPALIVE_PENDING 137 + * so that the timer will cause a disconnect. 138 + */ 139 + INIT_DELAYED_WORK(&sc->idle.timer_work, smbdirect_connection_idle_timer_work); 140 + sc->idle.keepalive = SMBDIRECT_KEEPALIVE_PENDING; 141 + mod_delayed_work(sc->workqueues.idle, &sc->idle.timer_work, 142 + msecs_to_jiffies(sp->negotiate_timeout_msec)); 143 + 144 + return 0; 145 + 146 + rdma_accept_failed: 147 + /* 148 + * smbdirect_connection_destroy_qp() calls ib_drain_qp(), 149 + * so that smbdirect_accept_negotiate_recv_done() will 150 + * call smbdirect_connection_put_recv_io() 151 + */ 152 + post_recv_io_failed: 153 + if (recv_io) 154 + smbdirect_connection_put_recv_io(recv_io); 155 + get_recv_io_failed: 156 + smbdirect_connection_destroy_mem_pools(sc); 157 + create_mem_failed: 158 + smbdirect_connection_destroy_qp(sc); 159 + create_qp_failed: 160 + init_params_failed: 161 + return ret; 162 + } 163 + 164 + static int smbdirect_accept_init_params(struct smbdirect_socket *sc) 165 + { 166 + const struct smbdirect_socket_parameters *sp = &sc->parameters; 167 + int max_send_sges; 168 + unsigned int maxpages; 169 + 170 + /* need 3 more sge. because a SMB_DIRECT header, SMB2 header, 171 + * SMB2 response could be mapped. 172 + */ 173 + max_send_sges = DIV_ROUND_UP(sp->max_send_size, PAGE_SIZE) + 3; 174 + if (max_send_sges > SMBDIRECT_SEND_IO_MAX_SGE) { 175 + pr_err("max_send_size %d is too large\n", sp->max_send_size); 176 + return -EINVAL; 177 + } 178 + 179 + /* 180 + * There is only a single batch credit 181 + */ 182 + atomic_set(&sc->send_io.bcredits.count, 1); 183 + 184 + /* 185 + * Initialize the local credits to post 186 + * IB_WR_SEND[_WITH_INV]. 187 + */ 188 + atomic_set(&sc->send_io.lcredits.count, sp->send_credit_target); 189 + 190 + if (sp->max_read_write_size) { 191 + maxpages = DIV_ROUND_UP(sp->max_read_write_size, PAGE_SIZE); 192 + sc->rw_io.credits.max = rdma_rw_mr_factor(sc->ib.dev, 193 + sc->rdma.cm_id->port_num, 194 + maxpages); 195 + sc->rw_io.credits.num_pages = DIV_ROUND_UP(maxpages, sc->rw_io.credits.max); 196 + /* add one extra in order to handle unaligned pages */ 197 + sc->rw_io.credits.max += 1; 198 + } 199 + 200 + sc->recv_io.credits.target = 1; 201 + 202 + atomic_set(&sc->rw_io.credits.count, sc->rw_io.credits.max); 203 + 204 + return 0; 205 + } 206 + 207 + static void smbdirect_accept_negotiate_recv_work(struct work_struct *work); 208 + 209 + static void smbdirect_accept_negotiate_recv_done(struct ib_cq *cq, struct ib_wc *wc) 210 + { 211 + struct smbdirect_recv_io *recv_io = 212 + container_of(wc->wr_cqe, struct smbdirect_recv_io, cqe); 213 + struct smbdirect_socket *sc = recv_io->socket; 214 + unsigned long flags; 215 + 216 + if (unlikely(wc->status != IB_WC_SUCCESS || WARN_ON_ONCE(wc->opcode != IB_WC_RECV))) { 217 + if (wc->status != IB_WC_WR_FLUSH_ERR) 218 + smbdirect_log_rdma_recv(sc, SMBDIRECT_LOG_ERR, 219 + "wc->status=%s (%d) wc->opcode=%d\n", 220 + ib_wc_status_msg(wc->status), wc->status, wc->opcode); 221 + goto error; 222 + } 223 + 224 + smbdirect_log_rdma_recv(sc, SMBDIRECT_LOG_INFO, 225 + "smbdirect_recv_io completed. status='%s (%d)', opcode=%d\n", 226 + ib_wc_status_msg(wc->status), wc->status, wc->opcode); 227 + 228 + /* 229 + * This is an internal error! 230 + */ 231 + if (WARN_ON_ONCE(sc->recv_io.expected != SMBDIRECT_EXPECT_NEGOTIATE_REQ)) 232 + goto error; 233 + 234 + /* 235 + * Don't reset timer to the keepalive interval in 236 + * this will be done in smbdirect_accept_direct_negotiate_recv_work. 237 + */ 238 + 239 + ib_dma_sync_single_for_cpu(sc->ib.dev, 240 + recv_io->sge.addr, 241 + recv_io->sge.length, 242 + DMA_FROM_DEVICE); 243 + 244 + /* 245 + * Only remember recv_io if it has enough bytes, 246 + * this gives smbdirect_accept_negotiate_recv_work enough 247 + * information in order to disconnect if it was not 248 + * valid. 249 + */ 250 + sc->recv_io.reassembly.full_packet_received = true; 251 + if (wc->byte_len >= sizeof(struct smbdirect_negotiate_req)) 252 + smbdirect_connection_reassembly_append_recv_io(sc, recv_io, 0); 253 + else 254 + smbdirect_connection_put_recv_io(recv_io); 255 + 256 + /* 257 + * Some drivers (at least mlx5_ib and irdma) might post a 258 + * recv completion before RDMA_CM_EVENT_ESTABLISHED, 259 + * we need to adjust our expectation in that case. 260 + * 261 + * So we defer further processing of the negotiation 262 + * to smbdirect_accept_negotiate_recv_work(). 263 + * 264 + * If we are already in SMBDIRECT_SOCKET_NEGOTIATE_NEEDED 265 + * we queue the work directly otherwise 266 + * smbdirect_accept_rdma_event_handler() will do it, when 267 + * RDMA_CM_EVENT_ESTABLISHED arrived. 268 + */ 269 + spin_lock_irqsave(&sc->connect.lock, flags); 270 + if (!sc->first_error) { 271 + INIT_WORK(&sc->connect.work, smbdirect_accept_negotiate_recv_work); 272 + if (sc->status == SMBDIRECT_SOCKET_NEGOTIATE_NEEDED) 273 + queue_work(sc->workqueues.accept, &sc->connect.work); 274 + } 275 + spin_unlock_irqrestore(&sc->connect.lock, flags); 276 + 277 + return; 278 + 279 + error: 280 + /* 281 + * recv_io.posted.refill_work is still disabled, 282 + * so smbdirect_connection_put_recv_io() won't 283 + * start it. 284 + */ 285 + smbdirect_connection_put_recv_io(recv_io); 286 + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); 287 + } 288 + 289 + static void smbdirect_accept_negotiate_recv_work(struct work_struct *work) 290 + { 291 + struct smbdirect_socket *sc = 292 + container_of(work, struct smbdirect_socket, connect.work); 293 + struct smbdirect_socket_parameters *sp = &sc->parameters; 294 + struct smbdirect_recv_io *recv_io; 295 + struct smbdirect_negotiate_req *nreq; 296 + unsigned long flags; 297 + u16 min_version; 298 + u16 max_version; 299 + u16 credits_requested; 300 + u32 preferred_send_size; 301 + u32 max_receive_size; 302 + u32 max_fragmented_size; 303 + u32 ntstatus; 304 + 305 + if (sc->first_error) 306 + return; 307 + 308 + /* 309 + * make sure we won't start again... 310 + */ 311 + disable_work(work); 312 + 313 + /* 314 + * Reset timer to the keepalive interval in 315 + * order to trigger our next keepalive message. 316 + */ 317 + sc->idle.keepalive = SMBDIRECT_KEEPALIVE_NONE; 318 + mod_delayed_work(sc->workqueues.idle, &sc->idle.timer_work, 319 + msecs_to_jiffies(sp->keepalive_interval_msec)); 320 + 321 + /* 322 + * If smbdirect_accept_negotiate_recv_done() detected an 323 + * invalid request we want to disconnect. 324 + */ 325 + recv_io = smbdirect_connection_reassembly_first_recv_io(sc); 326 + if (!recv_io) { 327 + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); 328 + return; 329 + } 330 + spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); 331 + sc->recv_io.reassembly.queue_length--; 332 + list_del(&recv_io->list); 333 + spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); 334 + smbdirect_connection_put_recv_io(recv_io); 335 + 336 + if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_NEGOTIATE_NEEDED)) 337 + return; 338 + sc->status = SMBDIRECT_SOCKET_NEGOTIATE_RUNNING; 339 + 340 + /* 341 + * Note recv_io is already part of the free list, 342 + * as we just called smbdirect_connection_put_recv_io(), 343 + * but it won't be reused before we call 344 + * smbdirect_connection_recv_io_refill() below. 345 + */ 346 + 347 + nreq = (struct smbdirect_negotiate_req *)recv_io->packet; 348 + min_version = le16_to_cpu(nreq->min_version); 349 + max_version = le16_to_cpu(nreq->max_version); 350 + credits_requested = le16_to_cpu(nreq->credits_requested); 351 + preferred_send_size = le32_to_cpu(nreq->preferred_send_size); 352 + max_receive_size = le32_to_cpu(nreq->max_receive_size); 353 + max_fragmented_size = le32_to_cpu(nreq->max_fragmented_size); 354 + 355 + smbdirect_log_negotiate(sc, SMBDIRECT_LOG_INFO, 356 + "ReqIn: %s%x, %s%x, %s%u, %s%u, %s%u, %s%u\n", 357 + "MinVersion=0x", 358 + le16_to_cpu(nreq->min_version), 359 + "MaxVersion=0x", 360 + le16_to_cpu(nreq->max_version), 361 + "CreditsRequested=", 362 + le16_to_cpu(nreq->credits_requested), 363 + "PreferredSendSize=", 364 + le32_to_cpu(nreq->preferred_send_size), 365 + "MaxRecvSize=", 366 + le32_to_cpu(nreq->max_receive_size), 367 + "MaxFragmentedSize=", 368 + le32_to_cpu(nreq->max_fragmented_size)); 369 + 370 + if (!(min_version <= SMBDIRECT_V1 && max_version >= SMBDIRECT_V1)) { 371 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 372 + "invalid: min_version=0x%x max_version=0x%x\n", 373 + min_version, max_version); 374 + ntstatus = le32_to_cpu(STATUS_NOT_SUPPORTED); 375 + goto not_supported; 376 + } 377 + 378 + if (credits_requested == 0) { 379 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 380 + "invalid: credits_requested == 0\n"); 381 + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); 382 + return; 383 + } 384 + 385 + if (max_receive_size < SMBDIRECT_MIN_RECEIVE_SIZE) { 386 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 387 + "invalid: max_receive_size=%u < %u\n", 388 + max_receive_size, 389 + SMBDIRECT_MIN_RECEIVE_SIZE); 390 + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); 391 + return; 392 + } 393 + 394 + if (max_fragmented_size < SMBDIRECT_MIN_FRAGMENTED_SIZE) { 395 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 396 + "invalid: max_fragmented_size=%u < %u\n", 397 + max_fragmented_size, 398 + SMBDIRECT_MIN_FRAGMENTED_SIZE); 399 + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); 400 + return; 401 + } 402 + 403 + /* 404 + * At least the value of SMBDIRECT_MIN_RECEIVE_SIZE is used. 405 + */ 406 + sp->max_recv_size = min_t(u32, sp->max_recv_size, preferred_send_size); 407 + sp->max_recv_size = max_t(u32, sp->max_recv_size, SMBDIRECT_MIN_RECEIVE_SIZE); 408 + 409 + /* 410 + * The maximum fragmented upper-layer payload receive size supported 411 + * 412 + * Assume max_payload_per_credit is 413 + * smb_direct_receive_credit_max - 24 = 1340 414 + * 415 + * The maximum number would be 416 + * smb_direct_receive_credit_max * max_payload_per_credit 417 + * 418 + * 1340 * 255 = 341700 (0x536C4) 419 + * 420 + * The minimum value from the spec is 131072 (0x20000) 421 + * 422 + * For now we use the logic we used in ksmbd before: 423 + * (1364 * 255) / 2 = 173910 (0x2A756) 424 + * 425 + * We need to adjust this here in case the peer 426 + * lowered sp->max_recv_size. 427 + * 428 + * TODO: instead of adjusting max_fragmented_recv_size 429 + * we should adjust the number of available buffers, 430 + * but for now we keep the logic as it was used 431 + * in ksmbd before. 432 + */ 433 + sp->max_fragmented_recv_size = (sp->recv_credit_max * sp->max_recv_size) / 2; 434 + 435 + /* 436 + * We take the value from the peer, which is checked to be higher than 0, 437 + * but we limit it to the max value we support in order to have 438 + * the main logic simpler. 439 + */ 440 + sc->recv_io.credits.target = credits_requested; 441 + sc->recv_io.credits.target = min_t(u16, sc->recv_io.credits.target, 442 + sp->recv_credit_max); 443 + 444 + /* 445 + * Note nreq->max_receive_size was already checked against 446 + * SMBDIRECT_MIN_RECEIVE_SIZE above. 447 + */ 448 + sp->max_send_size = min_t(u32, sp->max_send_size, max_receive_size); 449 + 450 + /* 451 + * Note nreq->max_fragmented_size was already checked against 452 + * SMBDIRECT_MIN_FRAGMENTED_SIZE above. 453 + */ 454 + sp->max_fragmented_send_size = max_fragmented_size; 455 + 456 + if (sc->accept.listener) { 457 + struct smbdirect_socket *lsc = sc->accept.listener; 458 + unsigned long flags; 459 + 460 + spin_lock_irqsave(&lsc->listen.lock, flags); 461 + list_del(&sc->accept.list); 462 + list_add_tail(&sc->accept.list, &lsc->listen.ready); 463 + wake_up(&lsc->listen.wait_queue); 464 + spin_unlock_irqrestore(&lsc->listen.lock, flags); 465 + 466 + /* 467 + * smbdirect_socket_accept() will call 468 + * smbdirect_accept_negotiate_finish(nsc, 0); 469 + * 470 + * So that we don't send the negotiation 471 + * response that grants credits to the peer 472 + * before the socket is accepted by the 473 + * application. 474 + */ 475 + return; 476 + } 477 + 478 + ntstatus = le32_to_cpu(STATUS_SUCCESS); 479 + 480 + not_supported: 481 + smbdirect_accept_negotiate_finish(sc, ntstatus); 482 + } 483 + 484 + void smbdirect_accept_negotiate_finish(struct smbdirect_socket *sc, u32 ntstatus) 485 + { 486 + const struct smbdirect_socket_parameters *sp = &sc->parameters; 487 + struct smbdirect_recv_io *recv_io; 488 + struct smbdirect_send_io *send_io; 489 + struct smbdirect_negotiate_resp *nrep; 490 + int posted; 491 + u16 new_credits; 492 + int ret; 493 + 494 + if (ntstatus) 495 + goto not_supported; 496 + 497 + /* 498 + * Prepare for receiving data_transfer messages 499 + */ 500 + sc->recv_io.reassembly.full_packet_received = true; 501 + sc->recv_io.expected = SMBDIRECT_EXPECT_DATA_TRANSFER; 502 + list_for_each_entry(recv_io, &sc->recv_io.free.list, list) 503 + recv_io->cqe.done = smbdirect_connection_recv_io_done; 504 + recv_io = NULL; 505 + 506 + /* 507 + * We should at least post 1 smbdirect_recv_io! 508 + */ 509 + posted = smbdirect_connection_recv_io_refill(sc); 510 + if (posted < 1) { 511 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 512 + "smbdirect_connection_recv_io_refill() failed %1pe\n", 513 + SMBDIRECT_DEBUG_ERR_PTR(posted)); 514 + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); 515 + return; 516 + } 517 + 518 + /* 519 + * The response will grant credits for all posted 520 + * smbdirect_recv_io messages. 521 + */ 522 + new_credits = smbdirect_connection_grant_recv_credits(sc); 523 + 524 + not_supported: 525 + send_io = smbdirect_connection_alloc_send_io(sc); 526 + if (IS_ERR(send_io)) { 527 + ret = PTR_ERR(send_io); 528 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 529 + "smbdirect_connection_alloc_send_io() failed %1pe\n", 530 + SMBDIRECT_DEBUG_ERR_PTR(ret)); 531 + smbdirect_socket_schedule_cleanup(sc, ret); 532 + return; 533 + } 534 + send_io->cqe.done = smbdirect_accept_negotiate_send_done; 535 + 536 + nrep = (struct smbdirect_negotiate_resp *)send_io->packet; 537 + nrep->min_version = cpu_to_le16(SMBDIRECT_V1); 538 + nrep->max_version = cpu_to_le16(SMBDIRECT_V1); 539 + if (ntstatus == 0) { 540 + nrep->negotiated_version = cpu_to_le16(SMBDIRECT_V1); 541 + nrep->reserved = 0; 542 + nrep->credits_requested = cpu_to_le16(sp->send_credit_target); 543 + nrep->credits_granted = cpu_to_le16(new_credits); 544 + nrep->status = cpu_to_le32(ntstatus); 545 + nrep->max_readwrite_size = cpu_to_le32(sp->max_read_write_size); 546 + nrep->preferred_send_size = cpu_to_le32(sp->max_send_size); 547 + nrep->max_receive_size = cpu_to_le32(sp->max_recv_size); 548 + nrep->max_fragmented_size = cpu_to_le32(sp->max_fragmented_recv_size); 549 + } else { 550 + nrep->negotiated_version = 0; 551 + nrep->reserved = 0; 552 + nrep->credits_requested = 0; 553 + nrep->credits_granted = 0; 554 + nrep->status = cpu_to_le32(ntstatus); 555 + nrep->max_readwrite_size = 0; 556 + nrep->preferred_send_size = 0; 557 + nrep->max_receive_size = 0; 558 + nrep->max_fragmented_size = 0; 559 + } 560 + 561 + smbdirect_log_negotiate(sc, SMBDIRECT_LOG_INFO, 562 + "RepOut: %s%x, %s%x, %s%x, %s%u, %s%u, %s%x, %s%u, %s%u, %s%u, %s%u\n", 563 + "MinVersion=0x", 564 + le16_to_cpu(nrep->min_version), 565 + "MaxVersion=0x", 566 + le16_to_cpu(nrep->max_version), 567 + "NegotiatedVersion=0x", 568 + le16_to_cpu(nrep->negotiated_version), 569 + "CreditsRequested=", 570 + le16_to_cpu(nrep->credits_requested), 571 + "CreditsGranted=", 572 + le16_to_cpu(nrep->credits_granted), 573 + "Status=0x", 574 + le32_to_cpu(nrep->status), 575 + "MaxReadWriteSize=", 576 + le32_to_cpu(nrep->max_readwrite_size), 577 + "PreferredSendSize=", 578 + le32_to_cpu(nrep->preferred_send_size), 579 + "MaxRecvSize=", 580 + le32_to_cpu(nrep->max_receive_size), 581 + "MaxFragmentedSize=", 582 + le32_to_cpu(nrep->max_fragmented_size)); 583 + 584 + send_io->sge[0].addr = ib_dma_map_single(sc->ib.dev, 585 + nrep, 586 + sizeof(*nrep), 587 + DMA_TO_DEVICE); 588 + ret = ib_dma_mapping_error(sc->ib.dev, send_io->sge[0].addr); 589 + if (ret) { 590 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 591 + "ib_dma_mapping_error() failed %1pe\n", 592 + SMBDIRECT_DEBUG_ERR_PTR(ret)); 593 + smbdirect_connection_free_send_io(send_io); 594 + smbdirect_socket_schedule_cleanup(sc, ret); 595 + return; 596 + } 597 + 598 + send_io->sge[0].length = sizeof(*nrep); 599 + send_io->sge[0].lkey = sc->ib.pd->local_dma_lkey; 600 + send_io->num_sge = 1; 601 + 602 + ib_dma_sync_single_for_device(sc->ib.dev, 603 + send_io->sge[0].addr, 604 + send_io->sge[0].length, 605 + DMA_TO_DEVICE); 606 + 607 + send_io->wr.next = NULL; 608 + send_io->wr.wr_cqe = &send_io->cqe; 609 + send_io->wr.sg_list = send_io->sge; 610 + send_io->wr.num_sge = send_io->num_sge; 611 + send_io->wr.opcode = IB_WR_SEND; 612 + send_io->wr.send_flags = IB_SEND_SIGNALED; 613 + 614 + ret = smbdirect_connection_post_send_wr(sc, &send_io->wr); 615 + if (ret) { 616 + /* if we reach here, post send failed */ 617 + smbdirect_log_rdma_send(sc, SMBDIRECT_LOG_ERR, 618 + "smbdirect_connection_post_send_wr() failed %1pe\n", 619 + SMBDIRECT_DEBUG_ERR_PTR(ret)); 620 + /* 621 + * Note smbdirect_connection_free_send_io() 622 + * does ib_dma_unmap_page() 623 + */ 624 + smbdirect_connection_free_send_io(send_io); 625 + smbdirect_socket_schedule_cleanup(sc, ret); 626 + return; 627 + } 628 + 629 + /* 630 + * smbdirect_accept_negotiate_send_done 631 + * will do all remaining work... 632 + */ 633 + } 634 + 635 + static void smbdirect_accept_negotiate_send_done(struct ib_cq *cq, struct ib_wc *wc) 636 + { 637 + struct smbdirect_send_io *send_io = 638 + container_of(wc->wr_cqe, struct smbdirect_send_io, cqe); 639 + struct smbdirect_socket *sc = send_io->socket; 640 + struct smbdirect_negotiate_resp *nrep; 641 + u32 ntstatus; 642 + 643 + smbdirect_log_rdma_send(sc, SMBDIRECT_LOG_INFO, 644 + "smbdirect_send_io completed. status='%s (%d)', opcode=%d\n", 645 + ib_wc_status_msg(wc->status), wc->status, wc->opcode); 646 + 647 + nrep = (struct smbdirect_negotiate_resp *)send_io->packet; 648 + ntstatus = le32_to_cpu(nrep->status); 649 + 650 + /* Note this frees wc->wr_cqe, but not wc */ 651 + smbdirect_connection_free_send_io(send_io); 652 + atomic_dec(&sc->send_io.pending.count); 653 + 654 + if (unlikely(wc->status != IB_WC_SUCCESS || WARN_ON_ONCE(wc->opcode != IB_WC_SEND))) { 655 + if (wc->status != IB_WC_WR_FLUSH_ERR) 656 + smbdirect_log_rdma_send(sc, SMBDIRECT_LOG_ERR, 657 + "wc->status=%s (%d) wc->opcode=%d\n", 658 + ib_wc_status_msg(wc->status), wc->status, wc->opcode); 659 + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); 660 + return; 661 + } 662 + 663 + /* 664 + * If we send a smbdirect_negotiate_resp without NT_STATUS_OK (0) 665 + * we need to disconnect now. 666 + * 667 + * Otherwise smbdirect_connection_negotiation_done() 668 + * will setup all required things and wake up 669 + * the waiter. 670 + */ 671 + if (ntstatus) 672 + smbdirect_socket_schedule_cleanup(sc, -EOPNOTSUPP); 673 + else 674 + smbdirect_connection_negotiation_done(sc); 675 + } 676 + 677 + static int smbdirect_accept_rdma_event_handler(struct rdma_cm_id *id, 678 + struct rdma_cm_event *event) 679 + { 680 + struct smbdirect_socket *sc = id->context; 681 + unsigned long flags; 682 + 683 + /* 684 + * cma_cm_event_handler() has 685 + * lockdep_assert_held(&id_priv->handler_mutex); 686 + * 687 + * Mutexes are not allowed in interrupts, 688 + * and we rely on not being in an interrupt here, 689 + * as we might sleep. 690 + * 691 + * We didn't timeout so we cancel our idle timer, 692 + * it will be scheduled again if needed. 693 + */ 694 + WARN_ON_ONCE(in_interrupt()); 695 + 696 + if (event->status || event->event != sc->rdma.expected_event) { 697 + int ret = -ECONNABORTED; 698 + 699 + if (event->event == RDMA_CM_EVENT_REJECTED) 700 + ret = -ECONNREFUSED; 701 + if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) 702 + ret = -ENETDOWN; 703 + if (IS_ERR(SMBDIRECT_DEBUG_ERR_PTR(event->status))) 704 + ret = event->status; 705 + 706 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 707 + "%s (first_error=%1pe, expected=%s) => event=%s status=%d => ret=%1pe\n", 708 + smbdirect_socket_status_string(sc->status), 709 + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error), 710 + rdma_event_msg(sc->rdma.expected_event), 711 + rdma_event_msg(event->event), 712 + event->status, 713 + SMBDIRECT_DEBUG_ERR_PTR(ret)); 714 + 715 + smbdirect_socket_schedule_cleanup(sc, ret); 716 + return 0; 717 + } 718 + 719 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, 720 + "%s (first_error=%1pe) event=%s\n", 721 + smbdirect_socket_status_string(sc->status), 722 + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error), 723 + rdma_event_msg(event->event)); 724 + 725 + if (sc->first_error) 726 + return 0; 727 + 728 + switch (event->event) { 729 + case RDMA_CM_EVENT_ESTABLISHED: 730 + smbdirect_connection_rdma_established(sc); 731 + 732 + /* 733 + * Some drivers (at least mlx5_ib and irdma) might post a 734 + * recv completion before RDMA_CM_EVENT_ESTABLISHED, 735 + * we need to adjust our expectation in that case. 736 + * 737 + * If smbdirect_accept_negotiate_recv_done was called first 738 + * it initialized sc->connect.work only for us to 739 + * start, so that we turned into 740 + * SMBDIRECT_SOCKET_NEGOTIATE_NEEDED, before 741 + * smbdirect_accept_negotiate_recv_work() runs. 742 + * 743 + * If smbdirect_accept_negotiate_recv_done didn't happen 744 + * yet. sc->connect.work is still be disabled and 745 + * queue_work() is a no-op. 746 + */ 747 + if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING)) 748 + return 0; 749 + sc->status = SMBDIRECT_SOCKET_NEGOTIATE_NEEDED; 750 + spin_lock_irqsave(&sc->connect.lock, flags); 751 + if (!sc->first_error) 752 + queue_work(sc->workqueues.accept, &sc->connect.work); 753 + spin_unlock_irqrestore(&sc->connect.lock, flags); 754 + 755 + /* 756 + * wait for smbdirect_accept_negotiate_recv_done() 757 + * to get the negotiate request. 758 + */ 759 + return 0; 760 + 761 + default: 762 + break; 763 + } 764 + 765 + /* 766 + * This is an internal error 767 + */ 768 + WARN_ON_ONCE(sc->rdma.expected_event != RDMA_CM_EVENT_ESTABLISHED); 769 + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); 770 + return 0; 771 + } 772 + 773 + static long smbdirect_socket_wait_for_accept(struct smbdirect_socket *lsc, long timeo) 774 + { 775 + long ret; 776 + 777 + ret = wait_event_interruptible_timeout(lsc->listen.wait_queue, 778 + !list_empty_careful(&lsc->listen.ready) || 779 + lsc->status != SMBDIRECT_SOCKET_LISTENING || 780 + lsc->first_error, 781 + timeo); 782 + if (lsc->status != SMBDIRECT_SOCKET_LISTENING) 783 + return -EINVAL; 784 + if (lsc->first_error) 785 + return lsc->first_error; 786 + if (!ret) 787 + ret = -ETIMEDOUT; 788 + if (ret < 0) 789 + return ret; 790 + 791 + return 0; 792 + } 793 + 794 + struct smbdirect_socket *smbdirect_socket_accept(struct smbdirect_socket *lsc, 795 + long timeo, 796 + struct proto_accept_arg *arg) 797 + { 798 + struct smbdirect_socket *nsc; 799 + unsigned long flags; 800 + 801 + if (lsc->status != SMBDIRECT_SOCKET_LISTENING) { 802 + arg->err = -EINVAL; 803 + return NULL; 804 + } 805 + 806 + if (lsc->first_error) { 807 + arg->err = lsc->first_error; 808 + return NULL; 809 + } 810 + 811 + if (list_empty_careful(&lsc->listen.ready)) { 812 + int ret; 813 + 814 + if (timeo == 0) { 815 + arg->err = -EAGAIN; 816 + return NULL; 817 + } 818 + 819 + ret = smbdirect_socket_wait_for_accept(lsc, timeo); 820 + if (ret) { 821 + arg->err = ret; 822 + return NULL; 823 + } 824 + } 825 + 826 + spin_lock_irqsave(&lsc->listen.lock, flags); 827 + nsc = list_first_entry_or_null(&lsc->listen.ready, 828 + struct smbdirect_socket, 829 + accept.list); 830 + if (nsc) { 831 + nsc->accept.listener = NULL; 832 + list_del_init_careful(&nsc->accept.list); 833 + arg->is_empty = list_empty_careful(&lsc->listen.ready); 834 + } 835 + spin_unlock_irqrestore(&lsc->listen.lock, flags); 836 + if (!nsc) { 837 + arg->err = -EAGAIN; 838 + return NULL; 839 + } 840 + 841 + /* 842 + * We did not send the negotiation response 843 + * yet, so we did not grant any credits to the client, 844 + * so it didn't grant any credits to us. 845 + * 846 + * The caller expects a connected socket 847 + * now as there are no credits anyway. 848 + * 849 + * Then we send the negotiation response in 850 + * order to grant credits to the peer. 851 + */ 852 + nsc->status = SMBDIRECT_SOCKET_CONNECTED; 853 + smbdirect_accept_negotiate_finish(nsc, 0); 854 + 855 + return nsc; 856 + } 857 + __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_accept);
+925
fs/smb/common/smbdirect/smbdirect_connect.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* 3 + * Copyright (c) 2012,2016,2017,2025 Stefan Metzmacher 4 + */ 5 + 6 + #include "smbdirect_internal.h" 7 + #include "../../common/smb2status.h" 8 + 9 + static int smbdirect_connect_setup_connection(struct smbdirect_socket *sc); 10 + static int smbdirect_connect_resolve_addr(struct smbdirect_socket *sc, 11 + const struct sockaddr *src, 12 + const struct sockaddr *dst); 13 + static int smbdirect_connect_rdma_event_handler(struct rdma_cm_id *id, 14 + struct rdma_cm_event *event); 15 + static int smbdirect_connect_negotiate_start(struct smbdirect_socket *sc); 16 + static void smbdirect_connect_negotiate_send_done(struct ib_cq *cq, struct ib_wc *wc); 17 + static void smbdirect_connect_negotiate_recv_done(struct ib_cq *cq, struct ib_wc *wc); 18 + 19 + int smbdirect_connect(struct smbdirect_socket *sc, const struct sockaddr *dst) 20 + { 21 + const struct sockaddr *src = NULL; 22 + union { 23 + struct sockaddr sa; 24 + struct sockaddr_storage ss; 25 + } src_addr = { 26 + .sa = { 27 + .sa_family = AF_UNSPEC, 28 + }, 29 + }; 30 + int ret; 31 + 32 + if (sc->first_error) 33 + return -ENOTCONN; 34 + 35 + if (sc->status != SMBDIRECT_SOCKET_CREATED) 36 + return -EALREADY; 37 + 38 + if (WARN_ON_ONCE(!sc->rdma.cm_id)) 39 + return -EINVAL; 40 + 41 + src_addr.ss = sc->rdma.cm_id->route.addr.src_addr; 42 + if (src_addr.sa.sa_family != AF_UNSPEC) 43 + src = &src_addr.sa; 44 + 45 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, 46 + "connect: src: %pISpsfc dst: %pISpsfc\n", 47 + src, dst); 48 + 49 + ret = smbdirect_connect_setup_connection(sc); 50 + if (ret) 51 + return ret; 52 + 53 + ret = smbdirect_connect_resolve_addr(sc, src, dst); 54 + if (ret) 55 + return ret; 56 + 57 + /* 58 + * The rest happens async via smbdirect_connect_rdma_event_handler() 59 + * the caller will decide to wait or not. 60 + */ 61 + return 0; 62 + } 63 + __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connect); 64 + 65 + static int smbdirect_connect_setup_connection(struct smbdirect_socket *sc) 66 + { 67 + rdma_lock_handler(sc->rdma.cm_id); 68 + sc->rdma.cm_id->event_handler = smbdirect_connect_rdma_event_handler; 69 + rdma_unlock_handler(sc->rdma.cm_id); 70 + 71 + if (SMBDIRECT_CHECK_STATUS_WARN(sc, SMBDIRECT_SOCKET_CREATED)) 72 + return -EINVAL; 73 + sc->status = SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED; 74 + 75 + return 0; 76 + } 77 + 78 + static int smbdirect_connect_resolve_addr(struct smbdirect_socket *sc, 79 + const struct sockaddr *src, 80 + const struct sockaddr *dst) 81 + { 82 + const struct smbdirect_socket_parameters *sp = &sc->parameters; 83 + struct sockaddr *src_addr = NULL; 84 + struct sockaddr *dst_addr = NULL; 85 + int ret; 86 + 87 + src_addr = (struct sockaddr *)src; 88 + if (src_addr && src_addr->sa_family == AF_UNSPEC) 89 + src_addr = NULL; 90 + dst_addr = (struct sockaddr *)dst; 91 + 92 + if (SMBDIRECT_CHECK_STATUS_WARN(sc, SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED)) 93 + return -EINVAL; 94 + sc->status = SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING; 95 + sc->rdma.expected_event = RDMA_CM_EVENT_ADDR_RESOLVED; 96 + ret = rdma_resolve_addr(sc->rdma.cm_id, src_addr, dst_addr, 97 + sp->resolve_addr_timeout_msec); 98 + if (ret) { 99 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 100 + "rdma_resolve_addr() failed %1pe\n", 101 + SMBDIRECT_DEBUG_ERR_PTR(ret)); 102 + return ret; 103 + } 104 + 105 + return 0; 106 + } 107 + 108 + static int smbdirect_connect_resolve_route(struct smbdirect_socket *sc) 109 + { 110 + const struct smbdirect_socket_parameters *sp = &sc->parameters; 111 + int ret; 112 + 113 + if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED)) 114 + return sc->first_error; 115 + sc->status = SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING; 116 + sc->rdma.expected_event = RDMA_CM_EVENT_ROUTE_RESOLVED; 117 + ret = rdma_resolve_route(sc->rdma.cm_id, sp->resolve_route_timeout_msec); 118 + if (ret) { 119 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 120 + "rdma_resolve_route() failed %1pe\n", 121 + SMBDIRECT_DEBUG_ERR_PTR(ret)); 122 + return ret; 123 + } 124 + 125 + return 0; 126 + } 127 + 128 + static int smbdirect_connect_rdma_connect(struct smbdirect_socket *sc) 129 + { 130 + struct smbdirect_socket_parameters *sp = &sc->parameters; 131 + struct rdma_conn_param conn_param; 132 + __be32 ird_ord_hdr[2]; 133 + int ret; 134 + 135 + sc->ib.dev = sc->rdma.cm_id->device; 136 + 137 + if (!smbdirect_frwr_is_supported(&sc->ib.dev->attrs)) { 138 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 139 + "Fast Registration Work Requests (FRWR) is not supported device %.*s\n", 140 + IB_DEVICE_NAME_MAX, 141 + sc->ib.dev->name); 142 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 143 + "Device capability flags = %llx max_fast_reg_page_list_len = %u\n", 144 + sc->ib.dev->attrs.device_cap_flags, 145 + sc->ib.dev->attrs.max_fast_reg_page_list_len); 146 + return -EPROTONOSUPPORT; 147 + } 148 + 149 + if (sp->flags & SMBDIRECT_FLAG_PORT_RANGE_ONLY_IB && 150 + !rdma_ib_or_roce(sc->ib.dev, sc->rdma.cm_id->port_num)) { 151 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 152 + "Not IB: device: %.*s IW:%u local: %pISpsfc remote: %pISpsfc\n", 153 + IB_DEVICE_NAME_MAX, 154 + sc->ib.dev->name, 155 + rdma_protocol_iwarp(sc->ib.dev, sc->rdma.cm_id->port_num), 156 + &sc->rdma.cm_id->route.addr.src_addr, 157 + &sc->rdma.cm_id->route.addr.dst_addr); 158 + return -EPROTONOSUPPORT; 159 + } 160 + if (sp->flags & SMBDIRECT_FLAG_PORT_RANGE_ONLY_IW && 161 + !rdma_protocol_iwarp(sc->ib.dev, sc->rdma.cm_id->port_num)) { 162 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 163 + "Not IW: device: %.*s IB:%u local: %pISpsfc remote: %pISpsfc\n", 164 + IB_DEVICE_NAME_MAX, 165 + sc->ib.dev->name, 166 + rdma_ib_or_roce(sc->ib.dev, sc->rdma.cm_id->port_num), 167 + &sc->rdma.cm_id->route.addr.src_addr, 168 + &sc->rdma.cm_id->route.addr.dst_addr); 169 + return -EPROTONOSUPPORT; 170 + } 171 + 172 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, 173 + "rdma connect: device: %.*s local: %pISpsfc remote: %pISpsfc\n", 174 + IB_DEVICE_NAME_MAX, 175 + sc->ib.dev->name, 176 + &sc->rdma.cm_id->route.addr.src_addr, 177 + &sc->rdma.cm_id->route.addr.dst_addr); 178 + 179 + sp->max_frmr_depth = min_t(u32, sp->max_frmr_depth, 180 + sc->ib.dev->attrs.max_fast_reg_page_list_len); 181 + sc->mr_io.type = IB_MR_TYPE_MEM_REG; 182 + if (sc->ib.dev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG) 183 + sc->mr_io.type = IB_MR_TYPE_SG_GAPS; 184 + 185 + sp->responder_resources = min_t(u8, sp->responder_resources, 186 + sc->ib.dev->attrs.max_qp_rd_atom); 187 + smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_INFO, 188 + "responder_resources=%d\n", 189 + sp->responder_resources); 190 + 191 + ret = smbdirect_connection_create_qp(sc); 192 + if (ret) { 193 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 194 + "smbdirect_connection_create_qp() failed %1pe\n", 195 + SMBDIRECT_DEBUG_ERR_PTR(ret)); 196 + return ret; 197 + } 198 + 199 + memset(&conn_param, 0, sizeof(conn_param)); 200 + conn_param.initiator_depth = sp->initiator_depth; 201 + conn_param.responder_resources = sp->responder_resources; 202 + 203 + /* Need to send IRD/ORD in private data for iWARP */ 204 + if (rdma_protocol_iwarp(sc->ib.dev, sc->rdma.cm_id->port_num)) { 205 + ird_ord_hdr[0] = cpu_to_be32(conn_param.responder_resources); 206 + ird_ord_hdr[1] = cpu_to_be32(conn_param.initiator_depth); 207 + conn_param.private_data = ird_ord_hdr; 208 + conn_param.private_data_len = sizeof(ird_ord_hdr); 209 + } else { 210 + conn_param.private_data = NULL; 211 + conn_param.private_data_len = 0; 212 + } 213 + 214 + conn_param.retry_count = SMBDIRECT_RDMA_CM_RETRY; 215 + conn_param.rnr_retry_count = SMBDIRECT_RDMA_CM_RNR_RETRY; 216 + conn_param.flow_control = 0; 217 + 218 + if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED)) 219 + return sc->first_error; 220 + sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING; 221 + sc->rdma.expected_event = RDMA_CM_EVENT_ESTABLISHED; 222 + ret = rdma_connect_locked(sc->rdma.cm_id, &conn_param); 223 + if (ret) { 224 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 225 + "rdma_connect_locked() failed %1pe\n", 226 + SMBDIRECT_DEBUG_ERR_PTR(ret)); 227 + return ret; 228 + } 229 + 230 + /* 231 + * start with the rdma connect timeout and SMBDIRECT_KEEPALIVE_PENDING 232 + * so that the timer will cause a disconnect. 233 + */ 234 + INIT_DELAYED_WORK(&sc->idle.timer_work, smbdirect_connection_idle_timer_work); 235 + sc->idle.keepalive = SMBDIRECT_KEEPALIVE_PENDING; 236 + mod_delayed_work(sc->workqueues.idle, &sc->idle.timer_work, 237 + msecs_to_jiffies(sp->rdma_connect_timeout_msec)); 238 + 239 + return 0; 240 + } 241 + 242 + static int smbdirect_connect_rdma_event_handler(struct rdma_cm_id *id, 243 + struct rdma_cm_event *event) 244 + { 245 + struct smbdirect_socket *sc = id->context; 246 + u8 peer_initiator_depth; 247 + u8 peer_responder_resources; 248 + int ret; 249 + 250 + /* 251 + * cma_cm_event_handler() has 252 + * lockdep_assert_held(&id_priv->handler_mutex); 253 + * 254 + * Mutexes are not allowed in interrupts, 255 + * and we rely on not being in an interrupt here, 256 + * as we might sleep. 257 + * 258 + * We didn't timeout so we cancel our idle timer, 259 + * it will be scheduled again if needed. 260 + */ 261 + WARN_ON_ONCE(in_interrupt()); 262 + sc->idle.keepalive = SMBDIRECT_KEEPALIVE_NONE; 263 + cancel_delayed_work_sync(&sc->idle.timer_work); 264 + 265 + if (event->status || event->event != sc->rdma.expected_event) { 266 + int lvl = SMBDIRECT_LOG_ERR; 267 + 268 + ret = -ECONNABORTED; 269 + 270 + if (event->event == RDMA_CM_EVENT_REJECTED) 271 + ret = -ECONNREFUSED; 272 + if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) 273 + ret = -ENETDOWN; 274 + if (IS_ERR(SMBDIRECT_DEBUG_ERR_PTR(event->status))) 275 + ret = event->status; 276 + 277 + if (ret == -ENODEV) 278 + lvl = SMBDIRECT_LOG_INFO; 279 + 280 + smbdirect_log_rdma_event(sc, lvl, 281 + "%s (first_error=%1pe, expected=%s) => event=%s status=%d => ret=%1pe\n", 282 + smbdirect_socket_status_string(sc->status), 283 + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error), 284 + rdma_event_msg(sc->rdma.expected_event), 285 + rdma_event_msg(event->event), 286 + event->status, 287 + SMBDIRECT_DEBUG_ERR_PTR(ret)); 288 + 289 + smbdirect_socket_schedule_cleanup_lvl(sc, 290 + lvl, 291 + ret); 292 + return 0; 293 + } 294 + 295 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, 296 + "%s (first_error=%1pe) event=%s\n", 297 + smbdirect_socket_status_string(sc->status), 298 + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error), 299 + rdma_event_msg(event->event)); 300 + 301 + if (sc->first_error) 302 + return 0; 303 + 304 + switch (event->event) { 305 + case RDMA_CM_EVENT_ADDR_RESOLVED: 306 + if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING)) 307 + return 0; 308 + sc->status = SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED; 309 + 310 + ret = smbdirect_connect_resolve_route(sc); 311 + if (ret) 312 + smbdirect_socket_schedule_cleanup(sc, ret); 313 + return 0; 314 + 315 + case RDMA_CM_EVENT_ROUTE_RESOLVED: 316 + if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING)) 317 + return 0; 318 + sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED; 319 + 320 + ret = smbdirect_connect_rdma_connect(sc); 321 + if (ret) 322 + smbdirect_socket_schedule_cleanup(sc, ret); 323 + return 0; 324 + 325 + case RDMA_CM_EVENT_ESTABLISHED: 326 + smbdirect_connection_rdma_established(sc); 327 + 328 + if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING)) 329 + return 0; 330 + sc->status = SMBDIRECT_SOCKET_NEGOTIATE_NEEDED; 331 + 332 + /* 333 + * Here we work around an inconsistency between 334 + * iWarp and other devices (at least rxe and irdma using RoCEv2) 335 + */ 336 + if (rdma_protocol_iwarp(id->device, id->port_num)) { 337 + /* 338 + * iWarp devices report the peer's values 339 + * with the perspective of the peer here. 340 + * Tested with siw and irdma (in iwarp mode) 341 + * We need to change to our perspective here, 342 + * so we need to switch the values. 343 + */ 344 + peer_initiator_depth = event->param.conn.responder_resources; 345 + peer_responder_resources = event->param.conn.initiator_depth; 346 + } else { 347 + /* 348 + * Non iWarp devices report the peer's values 349 + * already changed to our perspective here. 350 + * Tested with rxe and irdma (in roce mode). 351 + */ 352 + peer_initiator_depth = event->param.conn.initiator_depth; 353 + peer_responder_resources = event->param.conn.responder_resources; 354 + } 355 + smbdirect_connection_negotiate_rdma_resources(sc, 356 + peer_initiator_depth, 357 + peer_responder_resources, 358 + &event->param.conn); 359 + 360 + ret = smbdirect_connect_negotiate_start(sc); 361 + if (ret) 362 + smbdirect_socket_schedule_cleanup(sc, ret); 363 + return 0; 364 + 365 + default: 366 + break; 367 + } 368 + 369 + /* 370 + * This is an internal error 371 + */ 372 + WARN_ON_ONCE(sc->rdma.expected_event != RDMA_CM_EVENT_ESTABLISHED); 373 + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); 374 + return 0; 375 + } 376 + 377 + static int smbdirect_connect_negotiate_start(struct smbdirect_socket *sc) 378 + { 379 + const struct smbdirect_socket_parameters *sp = &sc->parameters; 380 + struct smbdirect_recv_io *recv_io = NULL; 381 + struct smbdirect_send_io *send_io = NULL; 382 + struct smbdirect_negotiate_req *nreq = NULL; 383 + int ret; 384 + 385 + if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_NEGOTIATE_NEEDED)) 386 + return sc->first_error; 387 + sc->status = SMBDIRECT_SOCKET_NEGOTIATE_RUNNING; 388 + 389 + ret = smbdirect_connection_create_mem_pools(sc); 390 + if (ret) { 391 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 392 + "smbdirect_connection_create_mem_pools() failed %1pe\n", 393 + SMBDIRECT_DEBUG_ERR_PTR(ret)); 394 + goto create_mem_pools_failed; 395 + } 396 + 397 + /* 398 + * There is only a single batch credit 399 + */ 400 + atomic_set(&sc->send_io.bcredits.count, 1); 401 + 402 + /* 403 + * Initialize the local credits to post 404 + * IB_WR_SEND[_WITH_INV]. 405 + */ 406 + atomic_set(&sc->send_io.lcredits.count, sp->send_credit_target); 407 + 408 + recv_io = smbdirect_connection_get_recv_io(sc); 409 + if (WARN_ON_ONCE(!recv_io)) { 410 + ret = -EINVAL; 411 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 412 + "smbdirect_connection_get_recv_io() failed %1pe\n", 413 + SMBDIRECT_DEBUG_ERR_PTR(ret)); 414 + goto get_recv_io_failed; 415 + } 416 + recv_io->cqe.done = smbdirect_connect_negotiate_recv_done; 417 + 418 + send_io = smbdirect_connection_alloc_send_io(sc); 419 + if (IS_ERR(send_io)) { 420 + ret = PTR_ERR(send_io); 421 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 422 + "smbdirect_connection_alloc_send_io() failed %1pe\n", 423 + SMBDIRECT_DEBUG_ERR_PTR(ret)); 424 + goto alloc_send_io_failed; 425 + } 426 + send_io->cqe.done = smbdirect_connect_negotiate_send_done; 427 + 428 + nreq = (struct smbdirect_negotiate_req *)send_io->packet; 429 + nreq->min_version = cpu_to_le16(SMBDIRECT_V1); 430 + nreq->max_version = cpu_to_le16(SMBDIRECT_V1); 431 + nreq->reserved = 0; 432 + nreq->credits_requested = cpu_to_le16(sp->send_credit_target); 433 + nreq->preferred_send_size = cpu_to_le32(sp->max_send_size); 434 + nreq->max_receive_size = cpu_to_le32(sp->max_recv_size); 435 + nreq->max_fragmented_size = cpu_to_le32(sp->max_fragmented_recv_size); 436 + 437 + smbdirect_log_negotiate(sc, SMBDIRECT_LOG_INFO, 438 + "ReqOut: %s%x, %s%x, %s%u, %s%u, %s%u, %s%u\n", 439 + "MinVersion=0x", 440 + le16_to_cpu(nreq->min_version), 441 + "MaxVersion=0x", 442 + le16_to_cpu(nreq->max_version), 443 + "CreditsRequested=", 444 + le16_to_cpu(nreq->credits_requested), 445 + "PreferredSendSize=", 446 + le32_to_cpu(nreq->preferred_send_size), 447 + "MaxRecvSize=", 448 + le32_to_cpu(nreq->max_receive_size), 449 + "MaxFragmentedSize=", 450 + le32_to_cpu(nreq->max_fragmented_size)); 451 + 452 + send_io->sge[0].addr = ib_dma_map_single(sc->ib.dev, 453 + nreq, 454 + sizeof(*nreq), 455 + DMA_TO_DEVICE); 456 + ret = ib_dma_mapping_error(sc->ib.dev, send_io->sge[0].addr); 457 + if (ret) { 458 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 459 + "ib_dma_mapping_error() failed %1pe\n", 460 + SMBDIRECT_DEBUG_ERR_PTR(ret)); 461 + goto dma_mapping_failed; 462 + } 463 + 464 + send_io->sge[0].length = sizeof(*nreq); 465 + send_io->sge[0].lkey = sc->ib.pd->local_dma_lkey; 466 + send_io->num_sge = 1; 467 + 468 + ib_dma_sync_single_for_device(sc->ib.dev, 469 + send_io->sge[0].addr, 470 + send_io->sge[0].length, 471 + DMA_TO_DEVICE); 472 + 473 + smbdirect_log_rdma_send(sc, SMBDIRECT_LOG_INFO, 474 + "sge addr=0x%llx length=%u lkey=0x%x\n", 475 + send_io->sge[0].addr, 476 + send_io->sge[0].length, 477 + send_io->sge[0].lkey); 478 + 479 + /* 480 + * Now post the recv_io buffer in order to get 481 + * the negotiate response 482 + */ 483 + sc->recv_io.expected = SMBDIRECT_EXPECT_NEGOTIATE_REP; 484 + ret = smbdirect_connection_post_recv_io(recv_io); 485 + if (ret) { 486 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 487 + "smbdirect_connection_post_recv_io() failed %1pe\n", 488 + SMBDIRECT_DEBUG_ERR_PTR(ret)); 489 + goto post_recv_io_failed; 490 + } 491 + 492 + send_io->wr.next = NULL; 493 + send_io->wr.wr_cqe = &send_io->cqe; 494 + send_io->wr.sg_list = send_io->sge; 495 + send_io->wr.num_sge = send_io->num_sge; 496 + send_io->wr.opcode = IB_WR_SEND; 497 + send_io->wr.send_flags = IB_SEND_SIGNALED; 498 + 499 + ret = smbdirect_connection_post_send_wr(sc, &send_io->wr); 500 + if (ret) { 501 + /* if we reach here, post send failed */ 502 + smbdirect_log_rdma_send(sc, SMBDIRECT_LOG_ERR, 503 + "smbdirect_connection_post_send_wr() failed %1pe\n", 504 + SMBDIRECT_DEBUG_ERR_PTR(ret)); 505 + goto post_send_wr_failed; 506 + } 507 + 508 + /* 509 + * start with the negotiate timeout and SMBDIRECT_KEEPALIVE_PENDING 510 + * so that the timer will cause a disconnect. 511 + */ 512 + sc->idle.keepalive = SMBDIRECT_KEEPALIVE_PENDING; 513 + mod_delayed_work(sc->workqueues.idle, &sc->idle.timer_work, 514 + msecs_to_jiffies(sp->negotiate_timeout_msec)); 515 + 516 + return 0; 517 + 518 + post_send_wr_failed: 519 + /* 520 + * ib_dma_unmap_single is called in 521 + * smbdirect_connection_free_send_io() 522 + */ 523 + smbdirect_connection_free_send_io(send_io); 524 + /* 525 + * recv_io is given to the rdma layer, 526 + * we should not put it even on error 527 + * nor call smbdirect_connection_destroy_mem_pools() 528 + * it will be cleaned up during disconnect. 529 + */ 530 + return ret; 531 + 532 + post_recv_io_failed: 533 + /* 534 + * ib_dma_unmap_single is called in 535 + * smbdirect_connection_free_send_io() 536 + */ 537 + dma_mapping_failed: 538 + smbdirect_connection_free_send_io(send_io); 539 + 540 + alloc_send_io_failed: 541 + smbdirect_connection_put_recv_io(recv_io); 542 + 543 + get_recv_io_failed: 544 + smbdirect_connection_destroy_mem_pools(sc); 545 + 546 + create_mem_pools_failed: 547 + return ret; 548 + } 549 + 550 + static void smbdirect_connect_negotiate_send_done(struct ib_cq *cq, struct ib_wc *wc) 551 + { 552 + struct smbdirect_send_io *send_io = 553 + container_of(wc->wr_cqe, struct smbdirect_send_io, cqe); 554 + struct smbdirect_socket *sc = send_io->socket; 555 + 556 + smbdirect_log_rdma_send(sc, SMBDIRECT_LOG_INFO, 557 + "smbdirect_send_io completed. status='%s (%d)', opcode=%d\n", 558 + ib_wc_status_msg(wc->status), wc->status, wc->opcode); 559 + 560 + /* Note this frees wc->wr_cqe, but not wc */ 561 + smbdirect_connection_free_send_io(send_io); 562 + atomic_dec(&sc->send_io.pending.count); 563 + 564 + if (unlikely(wc->status != IB_WC_SUCCESS || WARN_ON_ONCE(wc->opcode != IB_WC_SEND))) { 565 + if (wc->status != IB_WC_WR_FLUSH_ERR) 566 + smbdirect_log_rdma_send(sc, SMBDIRECT_LOG_ERR, 567 + "wc->status=%s (%d) wc->opcode=%d\n", 568 + ib_wc_status_msg(wc->status), wc->status, wc->opcode); 569 + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); 570 + return; 571 + } 572 + } 573 + 574 + static void smbdirect_connect_negotiate_recv_work(struct work_struct *work); 575 + 576 + static void smbdirect_connect_negotiate_recv_done(struct ib_cq *cq, struct ib_wc *wc) 577 + { 578 + struct smbdirect_recv_io *recv_io = 579 + container_of(wc->wr_cqe, struct smbdirect_recv_io, cqe); 580 + struct smbdirect_socket *sc = recv_io->socket; 581 + unsigned long flags; 582 + 583 + if (unlikely(wc->status != IB_WC_SUCCESS || WARN_ON_ONCE(wc->opcode != IB_WC_RECV))) { 584 + if (wc->status != IB_WC_WR_FLUSH_ERR) 585 + smbdirect_log_rdma_recv(sc, SMBDIRECT_LOG_ERR, 586 + "wc->status=%s (%d) wc->opcode=%d\n", 587 + ib_wc_status_msg(wc->status), wc->status, wc->opcode); 588 + goto error; 589 + } 590 + 591 + smbdirect_log_rdma_recv(sc, SMBDIRECT_LOG_INFO, 592 + "smbdirect_recv_io completed. status='%s (%d)', opcode=%d\n", 593 + ib_wc_status_msg(wc->status), wc->status, wc->opcode); 594 + 595 + /* 596 + * This is an internal error! 597 + */ 598 + if (WARN_ON_ONCE(sc->recv_io.expected != SMBDIRECT_EXPECT_NEGOTIATE_REP)) 599 + goto error; 600 + 601 + /* 602 + * Don't reset timer to the keepalive interval in 603 + * this will be done in smbdirect_accept_direct_negotiate_recv_work. 604 + */ 605 + 606 + ib_dma_sync_single_for_cpu(sc->ib.dev, 607 + recv_io->sge.addr, 608 + recv_io->sge.length, 609 + DMA_FROM_DEVICE); 610 + 611 + /* 612 + * Only remember recv_io if it has enough bytes, 613 + * this gives smbdirect_accept_negotiate_recv_work enough 614 + * information in order to disconnect if it was not 615 + * valid. 616 + */ 617 + sc->recv_io.reassembly.full_packet_received = true; 618 + if (wc->byte_len >= sizeof(struct smbdirect_negotiate_resp)) 619 + smbdirect_connection_reassembly_append_recv_io(sc, recv_io, 0); 620 + else 621 + smbdirect_connection_put_recv_io(recv_io); 622 + 623 + /* 624 + * We continue via the workqueue as we may have 625 + * complex work that might sleep. 626 + * 627 + * So we defer further processing of the negotiation 628 + * to smbdirect_connect_negotiate_recv_work(). 629 + */ 630 + spin_lock_irqsave(&sc->connect.lock, flags); 631 + if (!sc->first_error) { 632 + INIT_WORK(&sc->connect.work, smbdirect_connect_negotiate_recv_work); 633 + if (sc->status == SMBDIRECT_SOCKET_NEGOTIATE_RUNNING) 634 + queue_work(sc->workqueues.connect, &sc->connect.work); 635 + } 636 + spin_unlock_irqrestore(&sc->connect.lock, flags); 637 + 638 + return; 639 + 640 + error: 641 + /* 642 + * recv_io.posted.refill_work is still disabled, 643 + * so smbdirect_connection_put_recv_io() won't 644 + * start it. 645 + */ 646 + smbdirect_connection_put_recv_io(recv_io); 647 + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); 648 + } 649 + 650 + static void smbdirect_connect_negotiate_recv_work(struct work_struct *work) 651 + { 652 + struct smbdirect_socket *sc = 653 + container_of(work, struct smbdirect_socket, connect.work); 654 + struct smbdirect_socket_parameters *sp = &sc->parameters; 655 + struct smbdirect_recv_io *recv_io; 656 + struct smbdirect_negotiate_resp *nrep; 657 + unsigned long flags; 658 + u16 negotiated_version; 659 + u16 credits_requested; 660 + u16 credits_granted; 661 + u32 status; 662 + u32 max_readwrite_size; 663 + u32 preferred_send_size; 664 + u32 max_receive_size; 665 + u32 max_fragmented_size; 666 + int posted; 667 + int ret; 668 + 669 + if (sc->first_error) 670 + return; 671 + 672 + /* 673 + * make sure we won't start again... 674 + */ 675 + disable_work(work); 676 + 677 + /* 678 + * Reset timer to the keepalive interval in 679 + * order to trigger our next keepalive message. 680 + */ 681 + sc->idle.keepalive = SMBDIRECT_KEEPALIVE_NONE; 682 + mod_delayed_work(sc->workqueues.idle, &sc->idle.timer_work, 683 + msecs_to_jiffies(sp->keepalive_interval_msec)); 684 + 685 + /* 686 + * If smbdirect_connect_negotiate_recv_done() detected an 687 + * invalid request we want to disconnect. 688 + */ 689 + recv_io = smbdirect_connection_reassembly_first_recv_io(sc); 690 + if (!recv_io) { 691 + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); 692 + return; 693 + } 694 + spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); 695 + sc->recv_io.reassembly.queue_length--; 696 + list_del(&recv_io->list); 697 + spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); 698 + smbdirect_connection_put_recv_io(recv_io); 699 + 700 + if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_NEGOTIATE_RUNNING)) 701 + return; 702 + 703 + /* 704 + * Note recv_io is already part of the free list, 705 + * as we just called smbdirect_connection_put_recv_io(), 706 + * but it won't be reused before we call 707 + * smbdirect_connection_recv_io_refill() below. 708 + */ 709 + 710 + nrep = (struct smbdirect_negotiate_resp *)recv_io->packet; 711 + negotiated_version = le16_to_cpu(nrep->negotiated_version); 712 + credits_requested = le16_to_cpu(nrep->credits_requested); 713 + credits_granted = le16_to_cpu(nrep->credits_granted); 714 + status = le32_to_cpu(nrep->status); 715 + max_readwrite_size = le32_to_cpu(nrep->max_readwrite_size); 716 + preferred_send_size = le32_to_cpu(nrep->preferred_send_size); 717 + max_receive_size = le32_to_cpu(nrep->max_receive_size); 718 + max_fragmented_size = le32_to_cpu(nrep->max_fragmented_size); 719 + 720 + smbdirect_log_negotiate(sc, SMBDIRECT_LOG_INFO, 721 + "RepIn: %s%x, %s%x, %s%x, %s%u, %s%u, %s%x, %s%u, %s%u, %s%u, %s%u\n", 722 + "MinVersion=0x", 723 + le16_to_cpu(nrep->min_version), 724 + "MaxVersion=0x", 725 + le16_to_cpu(nrep->max_version), 726 + "NegotiatedVersion=0x", 727 + le16_to_cpu(nrep->negotiated_version), 728 + "CreditsRequested=", 729 + le16_to_cpu(nrep->credits_requested), 730 + "CreditsGranted=", 731 + le16_to_cpu(nrep->credits_granted), 732 + "Status=0x", 733 + le32_to_cpu(nrep->status), 734 + "MaxReadWriteSize=", 735 + le32_to_cpu(nrep->max_readwrite_size), 736 + "PreferredSendSize=", 737 + le32_to_cpu(nrep->preferred_send_size), 738 + "MaxRecvSize=", 739 + le32_to_cpu(nrep->max_receive_size), 740 + "MaxFragmentedSize=", 741 + le32_to_cpu(nrep->max_fragmented_size)); 742 + 743 + if (negotiated_version != SMBDIRECT_V1) { 744 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 745 + "invalid: negotiated_version=0x%x\n", 746 + negotiated_version); 747 + smbdirect_socket_schedule_cleanup(sc, -ECONNREFUSED); 748 + return; 749 + } 750 + 751 + if (status != le32_to_cpu(STATUS_SUCCESS)) { 752 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 753 + "invalid: status=0x%x != 0x0\n", 754 + status); 755 + smbdirect_socket_schedule_cleanup(sc, -ECONNREFUSED); 756 + return; 757 + } 758 + 759 + if (max_receive_size < SMBDIRECT_MIN_RECEIVE_SIZE) { 760 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 761 + "invalid: max_receive_size=%u < %u\n", 762 + max_receive_size, 763 + SMBDIRECT_MIN_RECEIVE_SIZE); 764 + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); 765 + return; 766 + } 767 + 768 + if (max_fragmented_size < SMBDIRECT_MIN_FRAGMENTED_SIZE) { 769 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 770 + "invalid: max_fragmented_size=%u < %u\n", 771 + max_fragmented_size, 772 + SMBDIRECT_MIN_FRAGMENTED_SIZE); 773 + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); 774 + return; 775 + } 776 + 777 + if (credits_granted == 0) { 778 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 779 + "invalid: credits_granted == 0\n"); 780 + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); 781 + return; 782 + } 783 + 784 + if (credits_requested == 0) { 785 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 786 + "invalid: credits_requested == 0\n"); 787 + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); 788 + return; 789 + } 790 + 791 + if (preferred_send_size > sp->max_recv_size) { 792 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 793 + "invalid: preferred_send_size=%u > max_recv_size=%u\n", 794 + preferred_send_size, 795 + sp->max_recv_size); 796 + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); 797 + return; 798 + } 799 + 800 + /* 801 + * We take the value from the peer, which is checked to be higher than 0, 802 + * but we limit it to the max value we support in order to have 803 + * the main logic simpler. 804 + */ 805 + sc->recv_io.credits.target = credits_requested; 806 + sc->recv_io.credits.target = min_t(u16, sc->recv_io.credits.target, 807 + sp->recv_credit_max); 808 + 809 + /* 810 + * At least the value of SMBDIRECT_MIN_RECEIVE_SIZE is used. 811 + */ 812 + sp->max_recv_size = min_t(u32, sp->max_recv_size, preferred_send_size); 813 + sp->max_recv_size = max_t(u32, sp->max_recv_size, SMBDIRECT_MIN_RECEIVE_SIZE); 814 + 815 + /* 816 + * We already sent our sp->max_fragmented_recv_size 817 + * to the peer, so we can't lower it here any more. 818 + * 819 + * TODO: but if the peer lowered sp->max_recv_size 820 + * we will have to adjust our number of buffers. 821 + * 822 + * But for now we keep it as the cifs.ko code 823 + * worked before. 824 + */ 825 + 826 + /* 827 + * Note nrep->max_receive_size was already checked against 828 + * SMBDIRECT_MIN_RECEIVE_SIZE above. 829 + */ 830 + sp->max_send_size = min_t(u32, sp->max_send_size, max_receive_size); 831 + 832 + /* 833 + * Make sure the resulting max_frmr_depth is at least 1, 834 + * which means max_read_write_size needs to be at least PAGE_SIZE. 835 + */ 836 + sp->max_read_write_size = min_t(u32, sp->max_frmr_depth * PAGE_SIZE, 837 + max_readwrite_size); 838 + if (sp->max_read_write_size < PAGE_SIZE) { 839 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 840 + "invalid: max_readwrite_size=%u < PAGE_SIZE(%lu)\n", 841 + max_readwrite_size, 842 + PAGE_SIZE); 843 + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); 844 + return; 845 + } 846 + sp->max_frmr_depth = sp->max_read_write_size / PAGE_SIZE; 847 + 848 + /* 849 + * Note nrep->credits_granted was already checked against 0 above. 850 + */ 851 + atomic_set(&sc->send_io.credits.count, credits_granted); 852 + 853 + /* 854 + * Note nrep->max_fragmented_size was already checked against 855 + * SMBDIRECT_MIN_FRAGMENTED_SIZE above. 856 + */ 857 + sp->max_fragmented_send_size = max_fragmented_size; 858 + 859 + ret = smbdirect_connection_create_mr_list(sc); 860 + if (ret) { 861 + smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 862 + "smbdirect_connection_create_mr_list() failed %1pe\n", 863 + SMBDIRECT_DEBUG_ERR_PTR(ret)); 864 + smbdirect_socket_schedule_cleanup(sc, ret); 865 + return; 866 + } 867 + 868 + /* 869 + * Prepare for receiving data_transfer messages 870 + */ 871 + sc->recv_io.reassembly.full_packet_received = true; 872 + sc->recv_io.expected = SMBDIRECT_EXPECT_DATA_TRANSFER; 873 + list_for_each_entry(recv_io, &sc->recv_io.free.list, list) 874 + recv_io->cqe.done = smbdirect_connection_recv_io_done; 875 + recv_io = NULL; 876 + 877 + /* 878 + * We should at least post 1 smbdirect_recv_io! 879 + */ 880 + posted = smbdirect_connection_recv_io_refill(sc); 881 + if (posted < 1) { 882 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 883 + "smbdirect_connection_recv_io_refill() failed %1pe\n", 884 + SMBDIRECT_DEBUG_ERR_PTR(ret)); 885 + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); 886 + return; 887 + } 888 + 889 + /* 890 + * smbdirect_connection_negotiation_done() 891 + * will setup all required things and wake up 892 + * the waiter. 893 + */ 894 + smbdirect_connection_negotiation_done(sc); 895 + } 896 + 897 + int smbdirect_connect_sync(struct smbdirect_socket *sc, 898 + const struct sockaddr *dst) 899 + { 900 + int ret; 901 + 902 + ret = smbdirect_connect(sc, dst); 903 + if (ret) { 904 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 905 + "smbdirect_connect(%pISpsfc) failed %1pe\n", 906 + dst, SMBDIRECT_DEBUG_ERR_PTR(ret)); 907 + return ret; 908 + } 909 + 910 + ret = smbdirect_connection_wait_for_connected(sc); 911 + if (ret) { 912 + int lvl = SMBDIRECT_LOG_ERR; 913 + 914 + if (ret == -ENODEV) 915 + lvl = SMBDIRECT_LOG_INFO; 916 + 917 + smbdirect_log_rdma_event(sc, lvl, 918 + "wait for smbdirect_connect(%pISpsfc) failed %1pe\n", 919 + dst, SMBDIRECT_DEBUG_ERR_PTR(ret)); 920 + return ret; 921 + } 922 + 923 + return 0; 924 + } 925 + __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connect_sync);
+2181
fs/smb/common/smbdirect/smbdirect_connection.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* 3 + * Copyright (C) 2017, Microsoft Corporation. 4 + * Copyright (c) 2025, Stefan Metzmacher 5 + */ 6 + 7 + #include "smbdirect_internal.h" 8 + #include <linux/folio_queue.h> 9 + 10 + struct smbdirect_map_sges { 11 + struct ib_sge *sge; 12 + size_t num_sge; 13 + size_t max_sge; 14 + struct ib_device *device; 15 + u32 local_dma_lkey; 16 + enum dma_data_direction direction; 17 + }; 18 + 19 + static ssize_t smbdirect_map_sges_from_iter(struct iov_iter *iter, size_t len, 20 + struct smbdirect_map_sges *state); 21 + 22 + static void smbdirect_connection_recv_io_refill_work(struct work_struct *work); 23 + static void smbdirect_connection_send_immediate_work(struct work_struct *work); 24 + 25 + static void smbdirect_connection_qp_event_handler(struct ib_event *event, void *context) 26 + { 27 + struct smbdirect_socket *sc = context; 28 + 29 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 30 + "%s on device %.*s socket %p (cm_id=%p) status %s first_error %1pe\n", 31 + ib_event_msg(event->event), 32 + IB_DEVICE_NAME_MAX, 33 + event->device->name, 34 + sc, sc->rdma.cm_id, 35 + smbdirect_socket_status_string(sc->status), 36 + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error)); 37 + 38 + switch (event->event) { 39 + case IB_EVENT_CQ_ERR: 40 + case IB_EVENT_QP_FATAL: 41 + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); 42 + break; 43 + 44 + default: 45 + break; 46 + } 47 + } 48 + 49 + static int smbdirect_connection_rdma_event_handler(struct rdma_cm_id *id, 50 + struct rdma_cm_event *event) 51 + { 52 + struct smbdirect_socket *sc = id->context; 53 + int ret = -ECONNRESET; 54 + 55 + if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) 56 + ret = -ENETDOWN; 57 + if (IS_ERR(SMBDIRECT_DEBUG_ERR_PTR(event->status))) 58 + ret = event->status; 59 + 60 + /* 61 + * cma_cm_event_handler() has 62 + * lockdep_assert_held(&id_priv->handler_mutex); 63 + * 64 + * Mutexes are not allowed in interrupts, 65 + * and we rely on not being in an interrupt here. 66 + */ 67 + WARN_ON_ONCE(in_interrupt()); 68 + 69 + if (event->event != sc->rdma.expected_event) { 70 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 71 + "%s (first_error=%1pe, expected=%s) => event=%s status=%d => ret=%1pe\n", 72 + smbdirect_socket_status_string(sc->status), 73 + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error), 74 + rdma_event_msg(sc->rdma.expected_event), 75 + rdma_event_msg(event->event), 76 + event->status, 77 + SMBDIRECT_DEBUG_ERR_PTR(ret)); 78 + 79 + /* 80 + * If we get RDMA_CM_EVENT_DEVICE_REMOVAL, 81 + * we should change to SMBDIRECT_SOCKET_DISCONNECTED, 82 + * so that rdma_disconnect() is avoided later via 83 + * smbdirect_socket_schedule_cleanup[_status]() => 84 + * smbdirect_socket_cleanup_work(). 85 + * 86 + * As otherwise we'd set SMBDIRECT_SOCKET_DISCONNECTING, 87 + * but never ever get RDMA_CM_EVENT_DISCONNECTED and 88 + * never reach SMBDIRECT_SOCKET_DISCONNECTED. 89 + */ 90 + if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) 91 + smbdirect_socket_schedule_cleanup_status(sc, 92 + SMBDIRECT_LOG_ERR, 93 + ret, 94 + SMBDIRECT_SOCKET_DISCONNECTED); 95 + else 96 + smbdirect_socket_schedule_cleanup(sc, ret); 97 + if (sc->ib.qp) 98 + ib_drain_qp(sc->ib.qp); 99 + return 0; 100 + } 101 + 102 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, 103 + "%s (first_error=%1pe) event=%s\n", 104 + smbdirect_socket_status_string(sc->status), 105 + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error), 106 + rdma_event_msg(event->event)); 107 + 108 + switch (event->event) { 109 + case RDMA_CM_EVENT_DISCONNECTED: 110 + /* 111 + * We need to change to SMBDIRECT_SOCKET_DISCONNECTED, 112 + * so that rdma_disconnect() is avoided later via 113 + * smbdirect_socket_schedule_cleanup_status() => 114 + * smbdirect_socket_cleanup_work(). 115 + * 116 + * As otherwise we'd set SMBDIRECT_SOCKET_DISCONNECTING, 117 + * but never ever get RDMA_CM_EVENT_DISCONNECTED and 118 + * never reach SMBDIRECT_SOCKET_DISCONNECTED. 119 + * 120 + * This is also a normal disconnect so 121 + * SMBDIRECT_LOG_INFO should be good enough 122 + * and avoids spamming the default logs. 123 + */ 124 + smbdirect_socket_schedule_cleanup_status(sc, 125 + SMBDIRECT_LOG_INFO, 126 + ret, 127 + SMBDIRECT_SOCKET_DISCONNECTED); 128 + if (sc->ib.qp) 129 + ib_drain_qp(sc->ib.qp); 130 + return 0; 131 + 132 + default: 133 + break; 134 + } 135 + 136 + /* 137 + * This is an internal error, should be handled above via 138 + * event->event != sc->rdma.expected_event already. 139 + */ 140 + WARN_ON_ONCE(sc->rdma.expected_event != RDMA_CM_EVENT_DISCONNECTED); 141 + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); 142 + return 0; 143 + } 144 + 145 + void smbdirect_connection_rdma_established(struct smbdirect_socket *sc) 146 + { 147 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, 148 + "rdma established: device: %.*s local: %pISpsfc remote: %pISpsfc\n", 149 + IB_DEVICE_NAME_MAX, 150 + sc->ib.dev->name, 151 + &sc->rdma.cm_id->route.addr.src_addr, 152 + &sc->rdma.cm_id->route.addr.dst_addr); 153 + 154 + sc->rdma.cm_id->event_handler = smbdirect_connection_rdma_event_handler; 155 + sc->rdma.expected_event = RDMA_CM_EVENT_DISCONNECTED; 156 + } 157 + 158 + void smbdirect_connection_negotiation_done(struct smbdirect_socket *sc) 159 + { 160 + if (unlikely(sc->first_error)) 161 + return; 162 + 163 + if (sc->status == SMBDIRECT_SOCKET_CONNECTED) 164 + /* 165 + * This is the accept case where 166 + * smbdirect_socket_accept() already sets 167 + * SMBDIRECT_SOCKET_CONNECTED 168 + */ 169 + goto done; 170 + 171 + if (sc->status != SMBDIRECT_SOCKET_NEGOTIATE_RUNNING) { 172 + /* 173 + * Something went wrong... 174 + */ 175 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 176 + "status=%s first_error=%1pe local: %pISpsfc remote: %pISpsfc\n", 177 + smbdirect_socket_status_string(sc->status), 178 + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error), 179 + &sc->rdma.cm_id->route.addr.src_addr, 180 + &sc->rdma.cm_id->route.addr.dst_addr); 181 + return; 182 + } 183 + 184 + /* 185 + * We are done, so we can wake up the waiter. 186 + */ 187 + WARN_ONCE(sc->status == SMBDIRECT_SOCKET_CONNECTED, 188 + "status=%s first_error=%1pe", 189 + smbdirect_socket_status_string(sc->status), 190 + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error)); 191 + sc->status = SMBDIRECT_SOCKET_CONNECTED; 192 + 193 + /* 194 + * We need to setup the refill and send immediate work 195 + * in order to get a working connection. 196 + */ 197 + done: 198 + INIT_WORK(&sc->recv_io.posted.refill_work, smbdirect_connection_recv_io_refill_work); 199 + INIT_WORK(&sc->idle.immediate_work, smbdirect_connection_send_immediate_work); 200 + 201 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, 202 + "negotiated: local: %pISpsfc remote: %pISpsfc\n", 203 + &sc->rdma.cm_id->route.addr.src_addr, 204 + &sc->rdma.cm_id->route.addr.dst_addr); 205 + 206 + wake_up(&sc->status_wait); 207 + } 208 + 209 + static u32 smbdirect_rdma_rw_send_wrs(struct ib_device *dev, 210 + const struct ib_qp_init_attr *attr) 211 + { 212 + /* 213 + * This could be split out of rdma_rw_init_qp() 214 + * and be a helper function next to rdma_rw_mr_factor() 215 + * 216 + * We can't check unlikely(rdma_rw_force_mr) here, 217 + * but that is most likely 0 anyway. 218 + */ 219 + u32 factor; 220 + 221 + WARN_ON_ONCE(attr->port_num == 0); 222 + 223 + /* 224 + * Each context needs at least one RDMA READ or WRITE WR. 225 + * 226 + * For some hardware we might need more, eventually we should ask the 227 + * HCA driver for a multiplier here. 228 + */ 229 + factor = 1; 230 + 231 + /* 232 + * If the device needs MRs to perform RDMA READ or WRITE operations, 233 + * we'll need two additional MRs for the registrations and the 234 + * invalidation. 235 + */ 236 + if (rdma_protocol_iwarp(dev, attr->port_num) || dev->attrs.max_sgl_rd) 237 + factor += 2; /* inv + reg */ 238 + 239 + return factor * attr->cap.max_rdma_ctxs; 240 + } 241 + 242 + int smbdirect_connection_create_qp(struct smbdirect_socket *sc) 243 + { 244 + const struct smbdirect_socket_parameters *sp = &sc->parameters; 245 + struct ib_qp_init_attr qp_attr; 246 + struct ib_qp_cap qp_cap; 247 + u32 rdma_send_wr; 248 + u32 max_send_wr; 249 + int ret; 250 + 251 + /* 252 + * Note that {rdma,ib}_create_qp() will call 253 + * rdma_rw_init_qp() if max_rdma_ctxs is not 0. 254 + * It will adjust max_send_wr to the required 255 + * number of additional WRs for the RDMA RW operations. 256 + * It will cap max_send_wr to the device limit. 257 + * 258 + * We use allocate sp->responder_resources * 2 MRs 259 + * and each MR needs WRs for REG and INV, so 260 + * we use '* 4'. 261 + * 262 + * +1 for ib_drain_qp() 263 + */ 264 + memset(&qp_cap, 0, sizeof(qp_cap)); 265 + qp_cap.max_send_wr = sp->send_credit_target + sp->responder_resources * 4 + 1; 266 + qp_cap.max_recv_wr = sp->recv_credit_max + 1; 267 + qp_cap.max_send_sge = SMBDIRECT_SEND_IO_MAX_SGE; 268 + qp_cap.max_recv_sge = SMBDIRECT_RECV_IO_MAX_SGE; 269 + qp_cap.max_inline_data = 0; 270 + qp_cap.max_rdma_ctxs = sc->rw_io.credits.max; 271 + 272 + /* 273 + * Find out the number of max_send_wr 274 + * after rdma_rw_init_qp() adjusted it. 275 + * 276 + * We only do it on a temporary variable, 277 + * as rdma_create_qp() will trigger 278 + * rdma_rw_init_qp() again. 279 + */ 280 + memset(&qp_attr, 0, sizeof(qp_attr)); 281 + qp_attr.cap = qp_cap; 282 + qp_attr.port_num = sc->rdma.cm_id->port_num; 283 + rdma_send_wr = smbdirect_rdma_rw_send_wrs(sc->ib.dev, &qp_attr); 284 + max_send_wr = qp_cap.max_send_wr + rdma_send_wr; 285 + 286 + if (qp_cap.max_send_wr > sc->ib.dev->attrs.max_cqe || 287 + qp_cap.max_send_wr > sc->ib.dev->attrs.max_qp_wr) { 288 + pr_err("Possible CQE overrun: max_send_wr %d\n", 289 + qp_cap.max_send_wr); 290 + pr_err("device %.*s reporting max_cqe %d max_qp_wr %d\n", 291 + IB_DEVICE_NAME_MAX, 292 + sc->ib.dev->name, 293 + sc->ib.dev->attrs.max_cqe, 294 + sc->ib.dev->attrs.max_qp_wr); 295 + pr_err("consider lowering send_credit_target = %d\n", 296 + sp->send_credit_target); 297 + return -EINVAL; 298 + } 299 + 300 + if (qp_cap.max_rdma_ctxs && 301 + (max_send_wr >= sc->ib.dev->attrs.max_cqe || 302 + max_send_wr >= sc->ib.dev->attrs.max_qp_wr)) { 303 + pr_err("Possible CQE overrun: rdma_send_wr %d + max_send_wr %d = %d\n", 304 + rdma_send_wr, qp_cap.max_send_wr, max_send_wr); 305 + pr_err("device %.*s reporting max_cqe %d max_qp_wr %d\n", 306 + IB_DEVICE_NAME_MAX, 307 + sc->ib.dev->name, 308 + sc->ib.dev->attrs.max_cqe, 309 + sc->ib.dev->attrs.max_qp_wr); 310 + pr_err("consider lowering send_credit_target = %d, max_rdma_ctxs = %d\n", 311 + sp->send_credit_target, qp_cap.max_rdma_ctxs); 312 + return -EINVAL; 313 + } 314 + 315 + if (qp_cap.max_recv_wr > sc->ib.dev->attrs.max_cqe || 316 + qp_cap.max_recv_wr > sc->ib.dev->attrs.max_qp_wr) { 317 + pr_err("Possible CQE overrun: max_recv_wr %d\n", 318 + qp_cap.max_recv_wr); 319 + pr_err("device %.*s reporting max_cqe %d max_qp_wr %d\n", 320 + IB_DEVICE_NAME_MAX, 321 + sc->ib.dev->name, 322 + sc->ib.dev->attrs.max_cqe, 323 + sc->ib.dev->attrs.max_qp_wr); 324 + pr_err("consider lowering receive_credit_max = %d\n", 325 + sp->recv_credit_max); 326 + return -EINVAL; 327 + } 328 + 329 + if (qp_cap.max_send_sge > sc->ib.dev->attrs.max_send_sge || 330 + qp_cap.max_recv_sge > sc->ib.dev->attrs.max_recv_sge) { 331 + pr_err("device %.*s max_send_sge/max_recv_sge = %d/%d too small\n", 332 + IB_DEVICE_NAME_MAX, 333 + sc->ib.dev->name, 334 + sc->ib.dev->attrs.max_send_sge, 335 + sc->ib.dev->attrs.max_recv_sge); 336 + return -EINVAL; 337 + } 338 + 339 + sc->ib.pd = ib_alloc_pd(sc->ib.dev, 0); 340 + if (IS_ERR(sc->ib.pd)) { 341 + pr_err("Can't create RDMA PD: %1pe\n", sc->ib.pd); 342 + ret = PTR_ERR(sc->ib.pd); 343 + sc->ib.pd = NULL; 344 + return ret; 345 + } 346 + 347 + sc->ib.send_cq = ib_alloc_cq_any(sc->ib.dev, sc, 348 + max_send_wr, 349 + sc->ib.poll_ctx); 350 + if (IS_ERR(sc->ib.send_cq)) { 351 + pr_err("Can't create RDMA send CQ: %1pe\n", sc->ib.send_cq); 352 + ret = PTR_ERR(sc->ib.send_cq); 353 + sc->ib.send_cq = NULL; 354 + goto err; 355 + } 356 + 357 + sc->ib.recv_cq = ib_alloc_cq_any(sc->ib.dev, sc, 358 + qp_cap.max_recv_wr, 359 + sc->ib.poll_ctx); 360 + if (IS_ERR(sc->ib.recv_cq)) { 361 + pr_err("Can't create RDMA recv CQ: %1pe\n", sc->ib.recv_cq); 362 + ret = PTR_ERR(sc->ib.recv_cq); 363 + sc->ib.recv_cq = NULL; 364 + goto err; 365 + } 366 + 367 + /* 368 + * We reset completely here! 369 + * As the above use was just temporary 370 + * to calc max_send_wr and rdma_send_wr. 371 + * 372 + * rdma_create_qp() will trigger rdma_rw_init_qp() 373 + * again if max_rdma_ctxs is not 0. 374 + */ 375 + memset(&qp_attr, 0, sizeof(qp_attr)); 376 + qp_attr.event_handler = smbdirect_connection_qp_event_handler; 377 + qp_attr.qp_context = sc; 378 + qp_attr.cap = qp_cap; 379 + qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 380 + qp_attr.qp_type = IB_QPT_RC; 381 + qp_attr.send_cq = sc->ib.send_cq; 382 + qp_attr.recv_cq = sc->ib.recv_cq; 383 + qp_attr.port_num = ~0; 384 + 385 + ret = rdma_create_qp(sc->rdma.cm_id, sc->ib.pd, &qp_attr); 386 + if (ret) { 387 + pr_err("Can't create RDMA QP: %1pe\n", 388 + SMBDIRECT_DEBUG_ERR_PTR(ret)); 389 + goto err; 390 + } 391 + sc->ib.qp = sc->rdma.cm_id->qp; 392 + 393 + return 0; 394 + err: 395 + smbdirect_connection_destroy_qp(sc); 396 + return ret; 397 + } 398 + 399 + void smbdirect_connection_destroy_qp(struct smbdirect_socket *sc) 400 + { 401 + if (sc->ib.qp) { 402 + ib_drain_qp(sc->ib.qp); 403 + sc->ib.qp = NULL; 404 + rdma_destroy_qp(sc->rdma.cm_id); 405 + } 406 + if (sc->ib.recv_cq) { 407 + ib_destroy_cq(sc->ib.recv_cq); 408 + sc->ib.recv_cq = NULL; 409 + } 410 + if (sc->ib.send_cq) { 411 + ib_destroy_cq(sc->ib.send_cq); 412 + sc->ib.send_cq = NULL; 413 + } 414 + if (sc->ib.pd) { 415 + ib_dealloc_pd(sc->ib.pd); 416 + sc->ib.pd = NULL; 417 + } 418 + } 419 + 420 + int smbdirect_connection_create_mem_pools(struct smbdirect_socket *sc) 421 + { 422 + const struct smbdirect_socket_parameters *sp = &sc->parameters; 423 + char name[80]; 424 + size_t i; 425 + 426 + /* 427 + * We use sizeof(struct smbdirect_negotiate_resp) for the 428 + * payload size as it is larger as 429 + * sizeof(struct smbdirect_data_transfer). 430 + * 431 + * This will fit client and server usage for now. 432 + */ 433 + snprintf(name, sizeof(name), "smbdirect_send_io_cache_%p", sc); 434 + struct kmem_cache_args send_io_args = { 435 + .align = __alignof__(struct smbdirect_send_io), 436 + }; 437 + sc->send_io.mem.cache = kmem_cache_create(name, 438 + sizeof(struct smbdirect_send_io) + 439 + sizeof(struct smbdirect_negotiate_resp), 440 + &send_io_args, 441 + SLAB_HWCACHE_ALIGN); 442 + if (!sc->send_io.mem.cache) 443 + goto err; 444 + 445 + sc->send_io.mem.pool = mempool_create_slab_pool(sp->send_credit_target, 446 + sc->send_io.mem.cache); 447 + if (!sc->send_io.mem.pool) 448 + goto err; 449 + 450 + /* 451 + * A payload size of sp->max_recv_size should fit 452 + * any message. 453 + * 454 + * For smbdirect_data_transfer messages the whole 455 + * buffer might be exposed to userspace 456 + * (currently on the client side...) 457 + * The documentation says data_offset = 0 would be 458 + * strange but valid. 459 + */ 460 + snprintf(name, sizeof(name), "smbdirect_recv_io_cache_%p", sc); 461 + struct kmem_cache_args recv_io_args = { 462 + .align = __alignof__(struct smbdirect_recv_io), 463 + .useroffset = sizeof(struct smbdirect_recv_io), 464 + .usersize = sp->max_recv_size, 465 + }; 466 + sc->recv_io.mem.cache = kmem_cache_create(name, 467 + sizeof(struct smbdirect_recv_io) + 468 + sp->max_recv_size, 469 + &recv_io_args, 470 + SLAB_HWCACHE_ALIGN); 471 + if (!sc->recv_io.mem.cache) 472 + goto err; 473 + 474 + sc->recv_io.mem.pool = mempool_create_slab_pool(sp->recv_credit_max, 475 + sc->recv_io.mem.cache); 476 + if (!sc->recv_io.mem.pool) 477 + goto err; 478 + 479 + for (i = 0; i < sp->recv_credit_max; i++) { 480 + struct smbdirect_recv_io *recv_io; 481 + 482 + recv_io = mempool_alloc(sc->recv_io.mem.pool, 483 + sc->recv_io.mem.gfp_mask); 484 + if (!recv_io) 485 + goto err; 486 + recv_io->socket = sc; 487 + recv_io->sge.length = 0; 488 + list_add_tail(&recv_io->list, &sc->recv_io.free.list); 489 + } 490 + 491 + return 0; 492 + err: 493 + smbdirect_connection_destroy_mem_pools(sc); 494 + return -ENOMEM; 495 + } 496 + 497 + void smbdirect_connection_destroy_mem_pools(struct smbdirect_socket *sc) 498 + { 499 + struct smbdirect_recv_io *recv_io, *next_io; 500 + 501 + list_for_each_entry_safe(recv_io, next_io, &sc->recv_io.free.list, list) { 502 + list_del(&recv_io->list); 503 + mempool_free(recv_io, sc->recv_io.mem.pool); 504 + } 505 + 506 + /* 507 + * Note mempool_destroy() and kmem_cache_destroy() 508 + * work fine with a NULL pointer 509 + */ 510 + 511 + mempool_destroy(sc->recv_io.mem.pool); 512 + sc->recv_io.mem.pool = NULL; 513 + 514 + kmem_cache_destroy(sc->recv_io.mem.cache); 515 + sc->recv_io.mem.cache = NULL; 516 + 517 + mempool_destroy(sc->send_io.mem.pool); 518 + sc->send_io.mem.pool = NULL; 519 + 520 + kmem_cache_destroy(sc->send_io.mem.cache); 521 + sc->send_io.mem.cache = NULL; 522 + } 523 + 524 + struct smbdirect_send_io *smbdirect_connection_alloc_send_io(struct smbdirect_socket *sc) 525 + { 526 + struct smbdirect_send_io *msg; 527 + 528 + msg = mempool_alloc(sc->send_io.mem.pool, sc->send_io.mem.gfp_mask); 529 + if (!msg) 530 + return ERR_PTR(-ENOMEM); 531 + msg->socket = sc; 532 + INIT_LIST_HEAD(&msg->sibling_list); 533 + msg->num_sge = 0; 534 + 535 + return msg; 536 + } 537 + 538 + void smbdirect_connection_free_send_io(struct smbdirect_send_io *msg) 539 + { 540 + struct smbdirect_socket *sc = msg->socket; 541 + size_t i; 542 + 543 + /* 544 + * The list needs to be empty! 545 + * The caller should take care of it. 546 + */ 547 + WARN_ON_ONCE(!list_empty(&msg->sibling_list)); 548 + 549 + /* 550 + * Note we call ib_dma_unmap_page(), even if some sges are mapped using 551 + * ib_dma_map_single(). 552 + * 553 + * The difference between _single() and _page() only matters for the 554 + * ib_dma_map_*() case. 555 + * 556 + * For the ib_dma_unmap_*() case it does not matter as both take the 557 + * dma_addr_t and dma_unmap_single_attrs() is just an alias to 558 + * dma_unmap_page_attrs(). 559 + */ 560 + for (i = 0; i < msg->num_sge; i++) 561 + ib_dma_unmap_page(sc->ib.dev, 562 + msg->sge[i].addr, 563 + msg->sge[i].length, 564 + DMA_TO_DEVICE); 565 + 566 + mempool_free(msg, sc->send_io.mem.pool); 567 + } 568 + 569 + struct smbdirect_recv_io *smbdirect_connection_get_recv_io(struct smbdirect_socket *sc) 570 + { 571 + struct smbdirect_recv_io *msg = NULL; 572 + unsigned long flags; 573 + 574 + spin_lock_irqsave(&sc->recv_io.free.lock, flags); 575 + if (likely(!sc->first_error)) 576 + msg = list_first_entry_or_null(&sc->recv_io.free.list, 577 + struct smbdirect_recv_io, 578 + list); 579 + if (likely(msg)) { 580 + list_del(&msg->list); 581 + sc->statistics.get_receive_buffer++; 582 + } 583 + spin_unlock_irqrestore(&sc->recv_io.free.lock, flags); 584 + 585 + return msg; 586 + } 587 + 588 + void smbdirect_connection_put_recv_io(struct smbdirect_recv_io *msg) 589 + { 590 + struct smbdirect_socket *sc = msg->socket; 591 + unsigned long flags; 592 + 593 + if (likely(msg->sge.length != 0)) { 594 + ib_dma_unmap_single(sc->ib.dev, 595 + msg->sge.addr, 596 + msg->sge.length, 597 + DMA_FROM_DEVICE); 598 + msg->sge.length = 0; 599 + } 600 + 601 + spin_lock_irqsave(&sc->recv_io.free.lock, flags); 602 + list_add_tail(&msg->list, &sc->recv_io.free.list); 603 + sc->statistics.put_receive_buffer++; 604 + spin_unlock_irqrestore(&sc->recv_io.free.lock, flags); 605 + 606 + queue_work(sc->workqueues.refill, &sc->recv_io.posted.refill_work); 607 + } 608 + 609 + void smbdirect_connection_reassembly_append_recv_io(struct smbdirect_socket *sc, 610 + struct smbdirect_recv_io *msg, 611 + u32 data_length) 612 + { 613 + unsigned long flags; 614 + 615 + spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); 616 + list_add_tail(&msg->list, &sc->recv_io.reassembly.list); 617 + sc->recv_io.reassembly.queue_length++; 618 + /* 619 + * Make sure reassembly_data_length is updated after list and 620 + * reassembly_queue_length are updated. On the dequeue side 621 + * reassembly_data_length is checked without a lock to determine 622 + * if reassembly_queue_length and list is up to date 623 + */ 624 + virt_wmb(); 625 + sc->recv_io.reassembly.data_length += data_length; 626 + spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); 627 + sc->statistics.enqueue_reassembly_queue++; 628 + } 629 + 630 + struct smbdirect_recv_io * 631 + smbdirect_connection_reassembly_first_recv_io(struct smbdirect_socket *sc) 632 + { 633 + struct smbdirect_recv_io *msg; 634 + 635 + msg = list_first_entry_or_null(&sc->recv_io.reassembly.list, 636 + struct smbdirect_recv_io, 637 + list); 638 + 639 + return msg; 640 + } 641 + 642 + void smbdirect_connection_negotiate_rdma_resources(struct smbdirect_socket *sc, 643 + u8 peer_initiator_depth, 644 + u8 peer_responder_resources, 645 + const struct rdma_conn_param *param) 646 + { 647 + struct smbdirect_socket_parameters *sp = &sc->parameters; 648 + 649 + if (rdma_protocol_iwarp(sc->ib.dev, sc->rdma.cm_id->port_num) && 650 + param->private_data_len == 8) { 651 + /* 652 + * Legacy clients with only iWarp MPA v1 support 653 + * need a private blob in order to negotiate 654 + * the IRD/ORD values. 655 + */ 656 + const __be32 *ird_ord_hdr = param->private_data; 657 + u32 ird32 = be32_to_cpu(ird_ord_hdr[0]); 658 + u32 ord32 = be32_to_cpu(ird_ord_hdr[1]); 659 + 660 + /* 661 + * cifs.ko sends the legacy IRD/ORD negotiation 662 + * event if iWarp MPA v2 was used. 663 + * 664 + * Here we check that the values match and only 665 + * mark the client as legacy if they don't match. 666 + */ 667 + if ((u32)param->initiator_depth != ird32 || 668 + (u32)param->responder_resources != ord32) { 669 + /* 670 + * There are broken clients (old cifs.ko) 671 + * using little endian and also 672 + * struct rdma_conn_param only uses u8 673 + * for initiator_depth and responder_resources, 674 + * so we truncate the value to U8_MAX. 675 + * 676 + * smb_direct_accept_client() will then 677 + * do the real negotiation in order to 678 + * select the minimum between client and 679 + * server. 680 + */ 681 + ird32 = min_t(u32, ird32, U8_MAX); 682 + ord32 = min_t(u32, ord32, U8_MAX); 683 + 684 + sc->rdma.legacy_iwarp = true; 685 + peer_initiator_depth = (u8)ird32; 686 + peer_responder_resources = (u8)ord32; 687 + } 688 + } 689 + 690 + /* 691 + * negotiate the value by using the minimum 692 + * between client and server if the client provided 693 + * non 0 values. 694 + */ 695 + if (peer_initiator_depth != 0) 696 + sp->initiator_depth = min_t(u8, sp->initiator_depth, 697 + peer_initiator_depth); 698 + if (peer_responder_resources != 0) 699 + sp->responder_resources = min_t(u8, sp->responder_resources, 700 + peer_responder_resources); 701 + } 702 + 703 + bool smbdirect_connection_is_connected(struct smbdirect_socket *sc) 704 + { 705 + if (unlikely(!sc || sc->first_error || sc->status != SMBDIRECT_SOCKET_CONNECTED)) 706 + return false; 707 + return true; 708 + } 709 + __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_is_connected); 710 + 711 + int smbdirect_connection_wait_for_connected(struct smbdirect_socket *sc) 712 + { 713 + const struct smbdirect_socket_parameters *sp = &sc->parameters; 714 + union { 715 + struct sockaddr sa; 716 + struct sockaddr_storage ss; 717 + } src_addr, dst_addr; 718 + const struct sockaddr *src = NULL; 719 + const struct sockaddr *dst = NULL; 720 + char _devname[IB_DEVICE_NAME_MAX] = { 0, }; 721 + const char *devname = NULL; 722 + int ret; 723 + 724 + if (sc->rdma.cm_id) { 725 + src_addr.ss = sc->rdma.cm_id->route.addr.src_addr; 726 + if (src_addr.sa.sa_family != AF_UNSPEC) 727 + src = &src_addr.sa; 728 + dst_addr.ss = sc->rdma.cm_id->route.addr.dst_addr; 729 + if (dst_addr.sa.sa_family != AF_UNSPEC) 730 + dst = &dst_addr.sa; 731 + 732 + if (sc->ib.dev) { 733 + memcpy(_devname, sc->ib.dev->name, IB_DEVICE_NAME_MAX); 734 + devname = _devname; 735 + } 736 + } 737 + 738 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, 739 + "waiting for connection: device: %.*s local: %pISpsfc remote: %pISpsfc\n", 740 + IB_DEVICE_NAME_MAX, devname, src, dst); 741 + 742 + ret = wait_event_interruptible_timeout(sc->status_wait, 743 + sc->status == SMBDIRECT_SOCKET_CONNECTED || 744 + sc->first_error, 745 + msecs_to_jiffies(sp->negotiate_timeout_msec)); 746 + if (sc->rdma.cm_id) { 747 + /* 748 + * Maybe src and dev are updated in the meantime. 749 + */ 750 + src_addr.ss = sc->rdma.cm_id->route.addr.src_addr; 751 + if (src_addr.sa.sa_family != AF_UNSPEC) 752 + src = &src_addr.sa; 753 + dst_addr.ss = sc->rdma.cm_id->route.addr.dst_addr; 754 + if (dst_addr.sa.sa_family != AF_UNSPEC) 755 + dst = &dst_addr.sa; 756 + 757 + if (sc->ib.dev) { 758 + memcpy(_devname, sc->ib.dev->name, IB_DEVICE_NAME_MAX); 759 + devname = _devname; 760 + } 761 + } 762 + if (ret == 0) 763 + ret = -ETIMEDOUT; 764 + if (ret < 0) 765 + smbdirect_socket_schedule_cleanup(sc, ret); 766 + if (sc->first_error) { 767 + int lvl = SMBDIRECT_LOG_ERR; 768 + 769 + ret = sc->first_error; 770 + if (ret == -ENODEV) 771 + lvl = SMBDIRECT_LOG_INFO; 772 + 773 + smbdirect_log_rdma_event(sc, lvl, 774 + "connection failed %1pe device: %.*s local: %pISpsfc remote: %pISpsfc\n", 775 + SMBDIRECT_DEBUG_ERR_PTR(ret), 776 + IB_DEVICE_NAME_MAX, devname, src, dst); 777 + return ret; 778 + } 779 + 780 + return 0; 781 + } 782 + __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_wait_for_connected); 783 + 784 + void smbdirect_connection_idle_timer_work(struct work_struct *work) 785 + { 786 + struct smbdirect_socket *sc = 787 + container_of(work, struct smbdirect_socket, idle.timer_work.work); 788 + const struct smbdirect_socket_parameters *sp = &sc->parameters; 789 + 790 + if (sc->idle.keepalive != SMBDIRECT_KEEPALIVE_NONE) { 791 + smbdirect_log_keep_alive(sc, SMBDIRECT_LOG_ERR, 792 + "%s => timeout sc->idle.keepalive=%s\n", 793 + smbdirect_socket_status_string(sc->status), 794 + sc->idle.keepalive == SMBDIRECT_KEEPALIVE_SENT ? 795 + "SENT" : "PENDING"); 796 + smbdirect_socket_schedule_cleanup(sc, -ETIMEDOUT); 797 + return; 798 + } 799 + 800 + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) 801 + return; 802 + 803 + /* 804 + * Now use the keepalive timeout (instead of keepalive interval) 805 + * in order to wait for a response 806 + */ 807 + sc->idle.keepalive = SMBDIRECT_KEEPALIVE_PENDING; 808 + mod_delayed_work(sc->workqueues.idle, &sc->idle.timer_work, 809 + msecs_to_jiffies(sp->keepalive_timeout_msec)); 810 + smbdirect_log_keep_alive(sc, SMBDIRECT_LOG_INFO, 811 + "schedule send of empty idle message\n"); 812 + queue_work(sc->workqueues.immediate, &sc->idle.immediate_work); 813 + } 814 + 815 + u16 smbdirect_connection_grant_recv_credits(struct smbdirect_socket *sc) 816 + { 817 + int missing; 818 + int available; 819 + int new_credits; 820 + 821 + if (atomic_read(&sc->recv_io.credits.count) >= sc->recv_io.credits.target) 822 + return 0; 823 + 824 + missing = (int)sc->recv_io.credits.target - atomic_read(&sc->recv_io.credits.count); 825 + available = atomic_xchg(&sc->recv_io.credits.available, 0); 826 + new_credits = min3((int)U16_MAX, missing, available); 827 + if (new_credits <= 0) { 828 + /* 829 + * If credits are available, but not granted 830 + * we need to re-add them again. 831 + */ 832 + if (available) 833 + atomic_add(available, &sc->recv_io.credits.available); 834 + return 0; 835 + } 836 + 837 + if (new_credits < available) { 838 + /* 839 + * Readd the remaining available again. 840 + */ 841 + available -= new_credits; 842 + atomic_add(available, &sc->recv_io.credits.available); 843 + } 844 + 845 + /* 846 + * Remember we granted the credits 847 + */ 848 + atomic_add(new_credits, &sc->recv_io.credits.count); 849 + return new_credits; 850 + } 851 + 852 + static bool smbdirect_connection_request_keep_alive(struct smbdirect_socket *sc) 853 + { 854 + const struct smbdirect_socket_parameters *sp = &sc->parameters; 855 + 856 + if (sc->idle.keepalive == SMBDIRECT_KEEPALIVE_PENDING) { 857 + sc->idle.keepalive = SMBDIRECT_KEEPALIVE_SENT; 858 + /* 859 + * Now use the keepalive timeout (instead of keepalive interval) 860 + * in order to wait for a response 861 + */ 862 + mod_delayed_work(sc->workqueues.idle, &sc->idle.timer_work, 863 + msecs_to_jiffies(sp->keepalive_timeout_msec)); 864 + return true; 865 + } 866 + 867 + return false; 868 + } 869 + 870 + int smbdirect_connection_post_send_wr(struct smbdirect_socket *sc, 871 + struct ib_send_wr *wr) 872 + { 873 + int ret; 874 + 875 + if (unlikely(sc->first_error)) 876 + return sc->first_error; 877 + 878 + atomic_inc(&sc->send_io.pending.count); 879 + ret = ib_post_send(sc->ib.qp, wr, NULL); 880 + if (ret) { 881 + atomic_dec(&sc->send_io.pending.count); 882 + smbdirect_log_rdma_send(sc, SMBDIRECT_LOG_ERR, 883 + "ib_post_send() failed %1pe\n", 884 + SMBDIRECT_DEBUG_ERR_PTR(ret)); 885 + smbdirect_socket_schedule_cleanup(sc, ret); 886 + } 887 + 888 + return ret; 889 + } 890 + 891 + static void smbdirect_connection_send_batch_init(struct smbdirect_send_batch *batch, 892 + bool need_invalidate_rkey, 893 + unsigned int remote_key) 894 + { 895 + INIT_LIST_HEAD(&batch->msg_list); 896 + batch->wr_cnt = 0; 897 + batch->need_invalidate_rkey = need_invalidate_rkey; 898 + batch->remote_key = remote_key; 899 + batch->credit = 0; 900 + } 901 + 902 + int smbdirect_connection_send_batch_flush(struct smbdirect_socket *sc, 903 + struct smbdirect_send_batch *batch, 904 + bool is_last) 905 + { 906 + struct smbdirect_send_io *first, *last; 907 + int ret = 0; 908 + 909 + if (list_empty(&batch->msg_list)) 910 + goto release_credit; 911 + 912 + first = list_first_entry(&batch->msg_list, 913 + struct smbdirect_send_io, 914 + sibling_list); 915 + last = list_last_entry(&batch->msg_list, 916 + struct smbdirect_send_io, 917 + sibling_list); 918 + 919 + if (batch->need_invalidate_rkey) { 920 + first->wr.opcode = IB_WR_SEND_WITH_INV; 921 + first->wr.ex.invalidate_rkey = batch->remote_key; 922 + batch->need_invalidate_rkey = false; 923 + batch->remote_key = 0; 924 + } 925 + 926 + last->wr.send_flags = IB_SEND_SIGNALED; 927 + last->wr.wr_cqe = &last->cqe; 928 + 929 + /* 930 + * Remove last from send_ctx->msg_list 931 + * and splice the rest of send_ctx->msg_list 932 + * to last->sibling_list. 933 + * 934 + * send_ctx->msg_list is a valid empty list 935 + * at the end. 936 + */ 937 + list_del_init(&last->sibling_list); 938 + list_splice_tail_init(&batch->msg_list, &last->sibling_list); 939 + batch->wr_cnt = 0; 940 + 941 + ret = smbdirect_connection_post_send_wr(sc, &first->wr); 942 + if (ret) { 943 + struct smbdirect_send_io *sibling, *next; 944 + 945 + list_for_each_entry_safe(sibling, next, &last->sibling_list, sibling_list) { 946 + list_del_init(&sibling->sibling_list); 947 + smbdirect_connection_free_send_io(sibling); 948 + } 949 + smbdirect_connection_free_send_io(last); 950 + } 951 + 952 + release_credit: 953 + if (is_last && !ret && batch->credit) { 954 + atomic_add(batch->credit, &sc->send_io.bcredits.count); 955 + batch->credit = 0; 956 + wake_up(&sc->send_io.bcredits.wait_queue); 957 + } 958 + 959 + return ret; 960 + } 961 + __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_send_batch_flush); 962 + 963 + struct smbdirect_send_batch * 964 + smbdirect_init_send_batch_storage(struct smbdirect_send_batch_storage *storage, 965 + bool need_invalidate_rkey, 966 + unsigned int remote_key) 967 + { 968 + struct smbdirect_send_batch *batch = (struct smbdirect_send_batch *)storage; 969 + 970 + memset(storage, 0, sizeof(*storage)); 971 + BUILD_BUG_ON(sizeof(*batch) > sizeof(*storage)); 972 + 973 + smbdirect_connection_send_batch_init(batch, 974 + need_invalidate_rkey, 975 + remote_key); 976 + 977 + return batch; 978 + } 979 + __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_init_send_batch_storage); 980 + 981 + static int smbdirect_connection_wait_for_send_bcredit(struct smbdirect_socket *sc, 982 + struct smbdirect_send_batch *batch) 983 + { 984 + int ret; 985 + 986 + if (batch->credit) 987 + return 0; 988 + 989 + ret = smbdirect_socket_wait_for_credits(sc, 990 + SMBDIRECT_SOCKET_CONNECTED, 991 + -ENOTCONN, 992 + &sc->send_io.bcredits.wait_queue, 993 + &sc->send_io.bcredits.count, 994 + 1); 995 + if (ret) 996 + return ret; 997 + 998 + batch->credit = 1; 999 + return 0; 1000 + } 1001 + 1002 + static int smbdirect_connection_wait_for_send_lcredit(struct smbdirect_socket *sc, 1003 + struct smbdirect_send_batch *batch) 1004 + { 1005 + if (batch && atomic_read(&sc->send_io.lcredits.count) <= 1) { 1006 + int ret; 1007 + 1008 + ret = smbdirect_connection_send_batch_flush(sc, batch, false); 1009 + if (ret) 1010 + return ret; 1011 + } 1012 + 1013 + return smbdirect_socket_wait_for_credits(sc, 1014 + SMBDIRECT_SOCKET_CONNECTED, 1015 + -ENOTCONN, 1016 + &sc->send_io.lcredits.wait_queue, 1017 + &sc->send_io.lcredits.count, 1018 + 1); 1019 + } 1020 + 1021 + static int smbdirect_connection_wait_for_send_credits(struct smbdirect_socket *sc, 1022 + struct smbdirect_send_batch *batch) 1023 + { 1024 + if (batch && (batch->wr_cnt >= 16 || atomic_read(&sc->send_io.credits.count) <= 1)) { 1025 + int ret; 1026 + 1027 + ret = smbdirect_connection_send_batch_flush(sc, batch, false); 1028 + if (ret) 1029 + return ret; 1030 + } 1031 + 1032 + return smbdirect_socket_wait_for_credits(sc, 1033 + SMBDIRECT_SOCKET_CONNECTED, 1034 + -ENOTCONN, 1035 + &sc->send_io.credits.wait_queue, 1036 + &sc->send_io.credits.count, 1037 + 1); 1038 + } 1039 + 1040 + static void smbdirect_connection_send_io_done(struct ib_cq *cq, struct ib_wc *wc); 1041 + 1042 + static int smbdirect_connection_post_send_io(struct smbdirect_socket *sc, 1043 + struct smbdirect_send_batch *batch, 1044 + struct smbdirect_send_io *msg) 1045 + { 1046 + int i; 1047 + 1048 + for (i = 0; i < msg->num_sge; i++) 1049 + ib_dma_sync_single_for_device(sc->ib.dev, 1050 + msg->sge[i].addr, msg->sge[i].length, 1051 + DMA_TO_DEVICE); 1052 + 1053 + msg->cqe.done = smbdirect_connection_send_io_done; 1054 + msg->wr.wr_cqe = &msg->cqe; 1055 + msg->wr.opcode = IB_WR_SEND; 1056 + msg->wr.sg_list = &msg->sge[0]; 1057 + msg->wr.num_sge = msg->num_sge; 1058 + msg->wr.next = NULL; 1059 + 1060 + if (batch) { 1061 + msg->wr.send_flags = 0; 1062 + if (!list_empty(&batch->msg_list)) { 1063 + struct smbdirect_send_io *last; 1064 + 1065 + last = list_last_entry(&batch->msg_list, 1066 + struct smbdirect_send_io, 1067 + sibling_list); 1068 + last->wr.next = &msg->wr; 1069 + } 1070 + list_add_tail(&msg->sibling_list, &batch->msg_list); 1071 + batch->wr_cnt++; 1072 + return 0; 1073 + } 1074 + 1075 + msg->wr.send_flags = IB_SEND_SIGNALED; 1076 + return smbdirect_connection_post_send_wr(sc, &msg->wr); 1077 + } 1078 + 1079 + int smbdirect_connection_send_single_iter(struct smbdirect_socket *sc, 1080 + struct smbdirect_send_batch *batch, 1081 + struct iov_iter *iter, 1082 + unsigned int flags, 1083 + u32 remaining_data_length) 1084 + { 1085 + const struct smbdirect_socket_parameters *sp = &sc->parameters; 1086 + struct smbdirect_send_batch _batch; 1087 + struct smbdirect_send_io *msg; 1088 + struct smbdirect_data_transfer *packet; 1089 + size_t header_length; 1090 + u16 new_credits = 0; 1091 + u32 data_length = 0; 1092 + int ret; 1093 + 1094 + if (WARN_ON_ONCE(flags)) 1095 + return -EINVAL; /* no flags support for now */ 1096 + 1097 + if (iter) { 1098 + if (WARN_ON_ONCE(iov_iter_rw(iter) != ITER_SOURCE)) 1099 + return -EINVAL; /* It's a bug in upper layer to get there */ 1100 + 1101 + header_length = sizeof(struct smbdirect_data_transfer); 1102 + if (WARN_ON_ONCE(remaining_data_length == 0 || 1103 + iov_iter_count(iter) > remaining_data_length)) 1104 + return -EINVAL; 1105 + } else { 1106 + /* If this is a packet without payload, don't send padding */ 1107 + header_length = offsetof(struct smbdirect_data_transfer, padding); 1108 + if (WARN_ON_ONCE(remaining_data_length)) 1109 + return -EINVAL; 1110 + } 1111 + 1112 + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { 1113 + smbdirect_log_write(sc, SMBDIRECT_LOG_ERR, 1114 + "status=%s first_error=%1pe => %1pe\n", 1115 + smbdirect_socket_status_string(sc->status), 1116 + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error), 1117 + SMBDIRECT_DEBUG_ERR_PTR(-ENOTCONN)); 1118 + return -ENOTCONN; 1119 + } 1120 + 1121 + if (!batch) { 1122 + smbdirect_connection_send_batch_init(&_batch, false, 0); 1123 + batch = &_batch; 1124 + } 1125 + 1126 + ret = smbdirect_connection_wait_for_send_bcredit(sc, batch); 1127 + if (ret) 1128 + goto bcredit_failed; 1129 + 1130 + ret = smbdirect_connection_wait_for_send_lcredit(sc, batch); 1131 + if (ret) 1132 + goto lcredit_failed; 1133 + 1134 + ret = smbdirect_connection_wait_for_send_credits(sc, batch); 1135 + if (ret) 1136 + goto credit_failed; 1137 + 1138 + new_credits = smbdirect_connection_grant_recv_credits(sc); 1139 + if (new_credits == 0 && 1140 + atomic_read(&sc->send_io.credits.count) == 0 && 1141 + atomic_read(&sc->recv_io.credits.count) == 0) { 1142 + /* 1143 + * queue the refill work in order to 1144 + * get some new recv credits we can grant to 1145 + * the peer. 1146 + */ 1147 + queue_work(sc->workqueues.refill, &sc->recv_io.posted.refill_work); 1148 + 1149 + /* 1150 + * wait until either the refill work or the peer 1151 + * granted new credits 1152 + */ 1153 + ret = wait_event_interruptible(sc->send_io.credits.wait_queue, 1154 + atomic_read(&sc->send_io.credits.count) >= 1 || 1155 + atomic_read(&sc->recv_io.credits.available) >= 1 || 1156 + sc->status != SMBDIRECT_SOCKET_CONNECTED); 1157 + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) 1158 + ret = -ENOTCONN; 1159 + if (ret < 0) 1160 + goto credit_failed; 1161 + 1162 + new_credits = smbdirect_connection_grant_recv_credits(sc); 1163 + } 1164 + 1165 + msg = smbdirect_connection_alloc_send_io(sc); 1166 + if (IS_ERR(msg)) { 1167 + ret = PTR_ERR(msg); 1168 + goto alloc_failed; 1169 + } 1170 + 1171 + /* Map the packet to DMA */ 1172 + msg->sge[0].addr = ib_dma_map_single(sc->ib.dev, 1173 + msg->packet, 1174 + header_length, 1175 + DMA_TO_DEVICE); 1176 + ret = ib_dma_mapping_error(sc->ib.dev, msg->sge[0].addr); 1177 + if (ret) 1178 + goto err; 1179 + 1180 + msg->sge[0].length = header_length; 1181 + msg->sge[0].lkey = sc->ib.pd->local_dma_lkey; 1182 + msg->num_sge = 1; 1183 + 1184 + if (iter) { 1185 + struct smbdirect_map_sges extract = { 1186 + .num_sge = msg->num_sge, 1187 + .max_sge = ARRAY_SIZE(msg->sge), 1188 + .sge = msg->sge, 1189 + .device = sc->ib.dev, 1190 + .local_dma_lkey = sc->ib.pd->local_dma_lkey, 1191 + .direction = DMA_TO_DEVICE, 1192 + }; 1193 + size_t payload_len = umin(iov_iter_count(iter), 1194 + sp->max_send_size - sizeof(*packet)); 1195 + 1196 + ret = smbdirect_map_sges_from_iter(iter, payload_len, &extract); 1197 + if (ret < 0) 1198 + goto err; 1199 + data_length = ret; 1200 + remaining_data_length -= data_length; 1201 + msg->num_sge = extract.num_sge; 1202 + } 1203 + 1204 + /* Fill in the packet header */ 1205 + packet = (struct smbdirect_data_transfer *)msg->packet; 1206 + packet->credits_requested = cpu_to_le16(sp->send_credit_target); 1207 + packet->credits_granted = cpu_to_le16(new_credits); 1208 + 1209 + packet->flags = 0; 1210 + if (smbdirect_connection_request_keep_alive(sc)) 1211 + packet->flags |= cpu_to_le16(SMBDIRECT_FLAG_RESPONSE_REQUESTED); 1212 + 1213 + packet->reserved = 0; 1214 + if (!data_length) 1215 + packet->data_offset = 0; 1216 + else 1217 + packet->data_offset = cpu_to_le32(24); 1218 + packet->data_length = cpu_to_le32(data_length); 1219 + packet->remaining_data_length = cpu_to_le32(remaining_data_length); 1220 + packet->padding = 0; 1221 + 1222 + smbdirect_log_outgoing(sc, SMBDIRECT_LOG_INFO, 1223 + "DataOut: %s=%u, %s=%u, %s=0x%x, %s=%u, %s=%u, %s=%u\n", 1224 + "CreditsRequested", 1225 + le16_to_cpu(packet->credits_requested), 1226 + "CreditsGranted", 1227 + le16_to_cpu(packet->credits_granted), 1228 + "Flags", 1229 + le16_to_cpu(packet->flags), 1230 + "RemainingDataLength", 1231 + le32_to_cpu(packet->remaining_data_length), 1232 + "DataOffset", 1233 + le32_to_cpu(packet->data_offset), 1234 + "DataLength", 1235 + le32_to_cpu(packet->data_length)); 1236 + 1237 + ret = smbdirect_connection_post_send_io(sc, batch, msg); 1238 + if (ret) 1239 + goto err; 1240 + 1241 + /* 1242 + * From here msg is moved to send_ctx 1243 + * and we should not free it explicitly. 1244 + */ 1245 + 1246 + if (batch == &_batch) { 1247 + ret = smbdirect_connection_send_batch_flush(sc, batch, true); 1248 + if (ret) 1249 + goto flush_failed; 1250 + } 1251 + 1252 + return data_length; 1253 + err: 1254 + smbdirect_connection_free_send_io(msg); 1255 + flush_failed: 1256 + alloc_failed: 1257 + atomic_inc(&sc->send_io.credits.count); 1258 + credit_failed: 1259 + atomic_inc(&sc->send_io.lcredits.count); 1260 + lcredit_failed: 1261 + atomic_add(batch->credit, &sc->send_io.bcredits.count); 1262 + batch->credit = 0; 1263 + bcredit_failed: 1264 + return ret; 1265 + } 1266 + __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_send_single_iter); 1267 + 1268 + int smbdirect_connection_send_wait_zero_pending(struct smbdirect_socket *sc) 1269 + { 1270 + /* 1271 + * As an optimization, we don't wait for individual I/O to finish 1272 + * before sending the next one. 1273 + * Send them all and wait for pending send count to get to 0 1274 + * that means all the I/Os have been out and we are good to return 1275 + */ 1276 + 1277 + wait_event(sc->send_io.pending.zero_wait_queue, 1278 + atomic_read(&sc->send_io.pending.count) == 0 || 1279 + sc->status != SMBDIRECT_SOCKET_CONNECTED); 1280 + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { 1281 + smbdirect_log_write(sc, SMBDIRECT_LOG_ERR, 1282 + "status=%s first_error=%1pe => %1pe\n", 1283 + smbdirect_socket_status_string(sc->status), 1284 + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error), 1285 + SMBDIRECT_DEBUG_ERR_PTR(-ENOTCONN)); 1286 + return -ENOTCONN; 1287 + } 1288 + 1289 + return 0; 1290 + } 1291 + __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_send_wait_zero_pending); 1292 + 1293 + int smbdirect_connection_send_iter(struct smbdirect_socket *sc, 1294 + struct iov_iter *iter, 1295 + unsigned int flags, 1296 + bool need_invalidate, 1297 + unsigned int remote_key) 1298 + { 1299 + const struct smbdirect_socket_parameters *sp = &sc->parameters; 1300 + struct smbdirect_send_batch batch; 1301 + int total_count = iov_iter_count(iter); 1302 + int ret; 1303 + int error = 0; 1304 + __be32 hdr; 1305 + 1306 + if (WARN_ONCE(flags, "unexpected flags=0x%x\n", flags)) 1307 + return -EINVAL; /* no flags support for now */ 1308 + 1309 + if (WARN_ON_ONCE(iov_iter_rw(iter) != ITER_SOURCE)) 1310 + return -EINVAL; /* It's a bug in upper layer to get there */ 1311 + 1312 + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { 1313 + smbdirect_log_write(sc, SMBDIRECT_LOG_INFO, 1314 + "status=%s first_error=%1pe => %1pe\n", 1315 + smbdirect_socket_status_string(sc->status), 1316 + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error), 1317 + SMBDIRECT_DEBUG_ERR_PTR(-ENOTCONN)); 1318 + return -ENOTCONN; 1319 + } 1320 + 1321 + /* 1322 + * For now we expect the iter to have the full 1323 + * message, including a 4 byte length header. 1324 + */ 1325 + if (iov_iter_count(iter) <= 4) 1326 + return -EINVAL; 1327 + if (!copy_from_iter_full(&hdr, sizeof(hdr), iter)) 1328 + return -EFAULT; 1329 + if (iov_iter_count(iter) != be32_to_cpu(hdr)) 1330 + return -EINVAL; 1331 + 1332 + /* 1333 + * The size must fit into the negotiated 1334 + * fragmented send size. 1335 + */ 1336 + if (iov_iter_count(iter) > sp->max_fragmented_send_size) 1337 + return -EMSGSIZE; 1338 + 1339 + smbdirect_log_write(sc, SMBDIRECT_LOG_INFO, 1340 + "Sending (RDMA): length=%zu\n", 1341 + iov_iter_count(iter)); 1342 + 1343 + smbdirect_connection_send_batch_init(&batch, need_invalidate, remote_key); 1344 + while (iov_iter_count(iter)) { 1345 + ret = smbdirect_connection_send_single_iter(sc, 1346 + &batch, 1347 + iter, 1348 + flags, 1349 + iov_iter_count(iter)); 1350 + if (unlikely(ret < 0)) { 1351 + error = ret; 1352 + break; 1353 + } 1354 + } 1355 + 1356 + ret = smbdirect_connection_send_batch_flush(sc, &batch, true); 1357 + if (unlikely(ret && !error)) 1358 + error = ret; 1359 + 1360 + /* 1361 + * As an optimization, we don't wait for individual I/O to finish 1362 + * before sending the next one. 1363 + * Send them all and wait for pending send count to get to 0 1364 + * that means all the I/Os have been out and we are good to return 1365 + */ 1366 + 1367 + ret = smbdirect_connection_send_wait_zero_pending(sc); 1368 + if (unlikely(ret && !error)) 1369 + error = ret; 1370 + 1371 + if (unlikely(error)) 1372 + return error; 1373 + 1374 + return total_count; 1375 + } 1376 + __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_send_iter); 1377 + 1378 + static void smbdirect_connection_send_io_done(struct ib_cq *cq, struct ib_wc *wc) 1379 + { 1380 + struct smbdirect_send_io *msg = 1381 + container_of(wc->wr_cqe, struct smbdirect_send_io, cqe); 1382 + struct smbdirect_socket *sc = msg->socket; 1383 + struct smbdirect_send_io *sibling, *next; 1384 + int lcredits = 0; 1385 + 1386 + smbdirect_log_rdma_send(sc, SMBDIRECT_LOG_INFO, 1387 + "smbdirect_send_io completed. status='%s (%d)', opcode=%d\n", 1388 + ib_wc_status_msg(wc->status), wc->status, wc->opcode); 1389 + 1390 + if (unlikely(!(msg->wr.send_flags & IB_SEND_SIGNALED))) { 1391 + /* 1392 + * This happens when smbdirect_send_io is a sibling 1393 + * before the final message, it is signaled on 1394 + * error anyway, so we need to skip 1395 + * smbdirect_connection_free_send_io here, 1396 + * otherwise is will destroy the memory 1397 + * of the siblings too, which will cause 1398 + * use after free problems for the others 1399 + * triggered from ib_drain_qp(). 1400 + */ 1401 + if (wc->status != IB_WC_SUCCESS) 1402 + goto skip_free; 1403 + 1404 + /* 1405 + * This should not happen! 1406 + * But we better just close the 1407 + * connection... 1408 + */ 1409 + smbdirect_log_rdma_send(sc, SMBDIRECT_LOG_ERR, 1410 + "unexpected send completion wc->status=%s (%d) wc->opcode=%d\n", 1411 + ib_wc_status_msg(wc->status), wc->status, wc->opcode); 1412 + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); 1413 + return; 1414 + } 1415 + 1416 + /* 1417 + * Free possible siblings and then the main send_io 1418 + */ 1419 + list_for_each_entry_safe(sibling, next, &msg->sibling_list, sibling_list) { 1420 + list_del_init(&sibling->sibling_list); 1421 + smbdirect_connection_free_send_io(sibling); 1422 + lcredits += 1; 1423 + } 1424 + /* Note this frees wc->wr_cqe, but not wc */ 1425 + smbdirect_connection_free_send_io(msg); 1426 + lcredits += 1; 1427 + 1428 + if (unlikely(wc->status != IB_WC_SUCCESS || WARN_ON_ONCE(wc->opcode != IB_WC_SEND))) { 1429 + skip_free: 1430 + if (wc->status != IB_WC_WR_FLUSH_ERR) 1431 + smbdirect_log_rdma_send(sc, SMBDIRECT_LOG_ERR, 1432 + "wc->status=%s (%d) wc->opcode=%d\n", 1433 + ib_wc_status_msg(wc->status), wc->status, wc->opcode); 1434 + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); 1435 + return; 1436 + } 1437 + 1438 + atomic_add(lcredits, &sc->send_io.lcredits.count); 1439 + wake_up(&sc->send_io.lcredits.wait_queue); 1440 + 1441 + if (atomic_dec_and_test(&sc->send_io.pending.count)) 1442 + wake_up(&sc->send_io.pending.zero_wait_queue); 1443 + } 1444 + 1445 + static void smbdirect_connection_send_immediate_work(struct work_struct *work) 1446 + { 1447 + struct smbdirect_socket *sc = 1448 + container_of(work, struct smbdirect_socket, idle.immediate_work); 1449 + int ret; 1450 + 1451 + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) 1452 + return; 1453 + 1454 + smbdirect_log_keep_alive(sc, SMBDIRECT_LOG_INFO, 1455 + "send an empty message\n"); 1456 + sc->statistics.send_empty++; 1457 + ret = smbdirect_connection_send_single_iter(sc, NULL, NULL, 0, 0); 1458 + if (ret < 0) { 1459 + smbdirect_log_write(sc, SMBDIRECT_LOG_ERR, 1460 + "smbdirect_connection_send_single_iter ret=%1pe\n", 1461 + SMBDIRECT_DEBUG_ERR_PTR(ret)); 1462 + smbdirect_socket_schedule_cleanup(sc, ret); 1463 + } 1464 + } 1465 + 1466 + int smbdirect_connection_post_recv_io(struct smbdirect_recv_io *msg) 1467 + { 1468 + struct smbdirect_socket *sc = msg->socket; 1469 + const struct smbdirect_socket_parameters *sp = &sc->parameters; 1470 + struct ib_recv_wr recv_wr = { 1471 + .wr_cqe = &msg->cqe, 1472 + .sg_list = &msg->sge, 1473 + .num_sge = 1, 1474 + }; 1475 + int ret; 1476 + 1477 + if (unlikely(sc->first_error)) 1478 + return sc->first_error; 1479 + 1480 + msg->sge.addr = ib_dma_map_single(sc->ib.dev, 1481 + msg->packet, 1482 + sp->max_recv_size, 1483 + DMA_FROM_DEVICE); 1484 + ret = ib_dma_mapping_error(sc->ib.dev, msg->sge.addr); 1485 + if (ret) 1486 + return ret; 1487 + 1488 + msg->sge.length = sp->max_recv_size; 1489 + msg->sge.lkey = sc->ib.pd->local_dma_lkey; 1490 + 1491 + ret = ib_post_recv(sc->ib.qp, &recv_wr, NULL); 1492 + if (ret) { 1493 + smbdirect_log_rdma_recv(sc, SMBDIRECT_LOG_ERR, 1494 + "ib_post_recv failed ret=%d (%1pe)\n", 1495 + ret, SMBDIRECT_DEBUG_ERR_PTR(ret)); 1496 + ib_dma_unmap_single(sc->ib.dev, 1497 + msg->sge.addr, 1498 + msg->sge.length, 1499 + DMA_FROM_DEVICE); 1500 + msg->sge.length = 0; 1501 + smbdirect_socket_schedule_cleanup(sc, ret); 1502 + } 1503 + 1504 + return ret; 1505 + } 1506 + 1507 + void smbdirect_connection_recv_io_done(struct ib_cq *cq, struct ib_wc *wc) 1508 + { 1509 + struct smbdirect_recv_io *recv_io = 1510 + container_of(wc->wr_cqe, struct smbdirect_recv_io, cqe); 1511 + struct smbdirect_socket *sc = recv_io->socket; 1512 + const struct smbdirect_socket_parameters *sp = &sc->parameters; 1513 + struct smbdirect_data_transfer *data_transfer; 1514 + int current_recv_credits; 1515 + u16 old_recv_credit_target; 1516 + u16 credits_requested; 1517 + u16 credits_granted; 1518 + u16 flags; 1519 + u32 data_offset; 1520 + u32 data_length; 1521 + u32 remaining_data_length; 1522 + 1523 + if (unlikely(wc->status != IB_WC_SUCCESS || WARN_ON_ONCE(wc->opcode != IB_WC_RECV))) { 1524 + if (wc->status != IB_WC_WR_FLUSH_ERR) 1525 + smbdirect_log_rdma_recv(sc, SMBDIRECT_LOG_ERR, 1526 + "wc->status=%s (%d) wc->opcode=%d\n", 1527 + ib_wc_status_msg(wc->status), wc->status, wc->opcode); 1528 + goto error; 1529 + } 1530 + 1531 + smbdirect_log_rdma_recv(sc, SMBDIRECT_LOG_INFO, 1532 + "recv_io=0x%p type=%d wc status=%s wc opcode %d byte_len=%d pkey_index=%u\n", 1533 + recv_io, sc->recv_io.expected, 1534 + ib_wc_status_msg(wc->status), wc->opcode, 1535 + wc->byte_len, wc->pkey_index); 1536 + 1537 + /* 1538 + * Reset timer to the keepalive interval in 1539 + * order to trigger our next keepalive message. 1540 + */ 1541 + sc->idle.keepalive = SMBDIRECT_KEEPALIVE_NONE; 1542 + mod_delayed_work(sc->workqueues.idle, &sc->idle.timer_work, 1543 + msecs_to_jiffies(sp->keepalive_interval_msec)); 1544 + 1545 + ib_dma_sync_single_for_cpu(sc->ib.dev, 1546 + recv_io->sge.addr, 1547 + recv_io->sge.length, 1548 + DMA_FROM_DEVICE); 1549 + 1550 + if (unlikely(wc->byte_len < 1551 + offsetof(struct smbdirect_data_transfer, padding))) { 1552 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 1553 + "wc->byte_len=%u < %zu\n", 1554 + wc->byte_len, 1555 + offsetof(struct smbdirect_data_transfer, padding)); 1556 + goto error; 1557 + } 1558 + 1559 + data_transfer = (struct smbdirect_data_transfer *)recv_io->packet; 1560 + credits_requested = le16_to_cpu(data_transfer->credits_requested); 1561 + credits_granted = le16_to_cpu(data_transfer->credits_granted); 1562 + flags = le16_to_cpu(data_transfer->flags); 1563 + remaining_data_length = le32_to_cpu(data_transfer->remaining_data_length); 1564 + data_offset = le32_to_cpu(data_transfer->data_offset); 1565 + data_length = le32_to_cpu(data_transfer->data_length); 1566 + 1567 + smbdirect_log_incoming(sc, SMBDIRECT_LOG_INFO, 1568 + "DataIn: %s=%u, %s=%u, %s=0x%x, %s=%u, %s=%u, %s=%u\n", 1569 + "CreditsRequested", 1570 + credits_requested, 1571 + "CreditsGranted", 1572 + credits_granted, 1573 + "Flags", 1574 + flags, 1575 + "RemainingDataLength", 1576 + remaining_data_length, 1577 + "DataOffset", 1578 + data_offset, 1579 + "DataLength", 1580 + data_length); 1581 + 1582 + if (unlikely(credits_requested == 0)) { 1583 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 1584 + "invalid: credits_requested == 0\n"); 1585 + goto error; 1586 + } 1587 + 1588 + if (unlikely(data_offset % 8 != 0)) { 1589 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 1590 + "invalid: data_offset=%u (0x%x) not aligned to 8\n", 1591 + data_offset, data_offset); 1592 + goto error; 1593 + } 1594 + 1595 + if (unlikely(wc->byte_len < data_offset || 1596 + (u64)wc->byte_len < (u64)data_offset + data_length)) { 1597 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 1598 + "wc->byte_len=%u < date_offset=%u + data_length=%u\n", 1599 + wc->byte_len, data_offset, data_length); 1600 + goto error; 1601 + } 1602 + 1603 + if (unlikely(remaining_data_length > sp->max_fragmented_recv_size || 1604 + data_length > sp->max_fragmented_recv_size || 1605 + (u64)remaining_data_length + (u64)data_length > (u64)sp->max_fragmented_recv_size)) { 1606 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 1607 + "remaining_data_length=%u + data_length=%u > max_fragmented=%u\n", 1608 + remaining_data_length, data_length, sp->max_fragmented_recv_size); 1609 + goto error; 1610 + } 1611 + 1612 + if (data_length) { 1613 + if (sc->recv_io.reassembly.full_packet_received) 1614 + recv_io->first_segment = true; 1615 + 1616 + if (remaining_data_length) 1617 + sc->recv_io.reassembly.full_packet_received = false; 1618 + else 1619 + sc->recv_io.reassembly.full_packet_received = true; 1620 + } 1621 + 1622 + atomic_dec(&sc->recv_io.posted.count); 1623 + current_recv_credits = atomic_dec_return(&sc->recv_io.credits.count); 1624 + 1625 + /* 1626 + * We take the value from the peer, which is checked to be higher than 0, 1627 + * but we limit it to the max value we support in order to have 1628 + * the main logic simpler. 1629 + */ 1630 + old_recv_credit_target = sc->recv_io.credits.target; 1631 + sc->recv_io.credits.target = credits_requested; 1632 + sc->recv_io.credits.target = min_t(u16, sc->recv_io.credits.target, 1633 + sp->recv_credit_max); 1634 + if (credits_granted) { 1635 + atomic_add(credits_granted, &sc->send_io.credits.count); 1636 + /* 1637 + * We have new send credits granted from remote peer 1638 + * If any sender is waiting for credits, unblock it 1639 + */ 1640 + wake_up(&sc->send_io.credits.wait_queue); 1641 + } 1642 + 1643 + /* Send an immediate response right away if requested */ 1644 + if (flags & SMBDIRECT_FLAG_RESPONSE_REQUESTED) { 1645 + smbdirect_log_keep_alive(sc, SMBDIRECT_LOG_INFO, 1646 + "schedule send of immediate response\n"); 1647 + queue_work(sc->workqueues.immediate, &sc->idle.immediate_work); 1648 + } 1649 + 1650 + /* 1651 + * If this is a packet with data playload place the data in 1652 + * reassembly queue and wake up the reading thread 1653 + */ 1654 + if (data_length) { 1655 + if (current_recv_credits <= (sc->recv_io.credits.target / 4) || 1656 + sc->recv_io.credits.target > old_recv_credit_target) 1657 + queue_work(sc->workqueues.refill, &sc->recv_io.posted.refill_work); 1658 + 1659 + smbdirect_connection_reassembly_append_recv_io(sc, recv_io, data_length); 1660 + wake_up(&sc->recv_io.reassembly.wait_queue); 1661 + } else 1662 + smbdirect_connection_put_recv_io(recv_io); 1663 + 1664 + return; 1665 + 1666 + error: 1667 + /* 1668 + * Make sure smbdirect_connection_put_recv_io() does not 1669 + * start recv_io.posted.refill_work. 1670 + */ 1671 + disable_work(&sc->recv_io.posted.refill_work); 1672 + smbdirect_connection_put_recv_io(recv_io); 1673 + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); 1674 + } 1675 + 1676 + int smbdirect_connection_recv_io_refill(struct smbdirect_socket *sc) 1677 + { 1678 + int missing; 1679 + int posted = 0; 1680 + 1681 + if (unlikely(sc->first_error)) 1682 + return sc->first_error; 1683 + 1684 + /* 1685 + * Find out how much smbdirect_recv_io buffers we should post. 1686 + * 1687 + * Note that sc->recv_io.credits.target is the value 1688 + * from the peer and it can in theory change over time, 1689 + * but it is forced to be at least 1 and at max 1690 + * sp->recv_credit_max. 1691 + * 1692 + * So it can happen that missing will be lower than 0, 1693 + * which means the peer has recently lowered its desired 1694 + * target, while be already granted a higher number of credits. 1695 + * 1696 + * Note 'posted' is the number of smbdirect_recv_io buffers 1697 + * posted within this function, while sc->recv_io.posted.count 1698 + * is the overall value of posted smbdirect_recv_io buffers. 1699 + * 1700 + * We try to post as much buffers as missing, but 1701 + * this is limited if a lot of smbdirect_recv_io buffers 1702 + * are still in the sc->recv_io.reassembly.list instead of 1703 + * the sc->recv_io.free.list. 1704 + * 1705 + */ 1706 + missing = (int)sc->recv_io.credits.target - atomic_read(&sc->recv_io.posted.count); 1707 + while (posted < missing) { 1708 + struct smbdirect_recv_io *recv_io; 1709 + int ret; 1710 + 1711 + /* 1712 + * It's ok if smbdirect_connection_get_recv_io() 1713 + * returns NULL, it means smbdirect_recv_io structures 1714 + * are still be in the reassembly.list. 1715 + */ 1716 + recv_io = smbdirect_connection_get_recv_io(sc); 1717 + if (!recv_io) 1718 + break; 1719 + 1720 + recv_io->first_segment = false; 1721 + 1722 + ret = smbdirect_connection_post_recv_io(recv_io); 1723 + if (ret) { 1724 + smbdirect_log_rdma_recv(sc, SMBDIRECT_LOG_ERR, 1725 + "smbdirect_connection_post_recv_io failed rc=%d (%1pe)\n", 1726 + ret, SMBDIRECT_DEBUG_ERR_PTR(ret)); 1727 + smbdirect_connection_put_recv_io(recv_io); 1728 + return ret; 1729 + } 1730 + 1731 + atomic_inc(&sc->recv_io.posted.count); 1732 + posted += 1; 1733 + } 1734 + 1735 + /* If nothing was posted we're done */ 1736 + if (posted == 0) 1737 + return 0; 1738 + 1739 + atomic_add(posted, &sc->recv_io.credits.available); 1740 + 1741 + /* 1742 + * If the last send credit is waiting for credits 1743 + * it can grant we need to wake it up 1744 + */ 1745 + if (atomic_read(&sc->send_io.bcredits.count) == 0 && 1746 + atomic_read(&sc->send_io.credits.count) == 0) 1747 + wake_up(&sc->send_io.credits.wait_queue); 1748 + 1749 + /* 1750 + * If we posted at least one smbdirect_recv_io buffer, 1751 + * we need to inform the peer about it and grant 1752 + * additional credits. 1753 + * 1754 + * However there is one case where we don't want to 1755 + * do that. 1756 + * 1757 + * If only a single credit was missing before 1758 + * reaching the requested target, we should not 1759 + * post an immediate send, as that would cause 1760 + * endless ping pong once a keep alive exchange 1761 + * is started. 1762 + * 1763 + * However if sc->recv_io.credits.target is only 1, 1764 + * the peer has no credit left and we need to 1765 + * grant the credit anyway. 1766 + */ 1767 + if (missing == 1 && sc->recv_io.credits.target != 1) 1768 + return 0; 1769 + 1770 + return posted; 1771 + } 1772 + 1773 + static void smbdirect_connection_recv_io_refill_work(struct work_struct *work) 1774 + { 1775 + struct smbdirect_socket *sc = 1776 + container_of(work, struct smbdirect_socket, recv_io.posted.refill_work); 1777 + int posted; 1778 + 1779 + posted = smbdirect_connection_recv_io_refill(sc); 1780 + if (unlikely(posted < 0)) { 1781 + smbdirect_socket_schedule_cleanup(sc, posted); 1782 + return; 1783 + } 1784 + if (posted > 0) { 1785 + smbdirect_log_keep_alive(sc, SMBDIRECT_LOG_INFO, 1786 + "schedule send of an empty message\n"); 1787 + queue_work(sc->workqueues.immediate, &sc->idle.immediate_work); 1788 + } 1789 + } 1790 + 1791 + int smbdirect_connection_recvmsg(struct smbdirect_socket *sc, 1792 + struct msghdr *msg, 1793 + unsigned int flags) 1794 + { 1795 + struct smbdirect_recv_io *response; 1796 + struct smbdirect_data_transfer *data_transfer; 1797 + size_t size = iov_iter_count(&msg->msg_iter); 1798 + int to_copy, to_read, data_read, offset; 1799 + u32 data_length, remaining_data_length, data_offset; 1800 + int ret; 1801 + 1802 + if (WARN_ONCE(flags, "unexpected flags=0x%x\n", flags)) 1803 + return -EINVAL; /* no flags support for now */ 1804 + 1805 + if (WARN_ON_ONCE(iov_iter_rw(&msg->msg_iter) != ITER_DEST)) 1806 + return -EINVAL; /* It's a bug in upper layer to get there */ 1807 + 1808 + again: 1809 + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { 1810 + smbdirect_log_read(sc, SMBDIRECT_LOG_INFO, 1811 + "status=%s first_error=%1pe => %1pe\n", 1812 + smbdirect_socket_status_string(sc->status), 1813 + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error), 1814 + SMBDIRECT_DEBUG_ERR_PTR(-ENOTCONN)); 1815 + return -ENOTCONN; 1816 + } 1817 + 1818 + /* 1819 + * No need to hold the reassembly queue lock all the time as we are 1820 + * the only one reading from the front of the queue. The transport 1821 + * may add more entries to the back of the queue at the same time 1822 + */ 1823 + smbdirect_log_read(sc, SMBDIRECT_LOG_INFO, 1824 + "size=%zd sc->recv_io.reassembly.data_length=%d\n", 1825 + size, sc->recv_io.reassembly.data_length); 1826 + if (sc->recv_io.reassembly.data_length >= size) { 1827 + int queue_length; 1828 + int queue_removed = 0; 1829 + unsigned long flags; 1830 + 1831 + /* 1832 + * Need to make sure reassembly_data_length is read before 1833 + * reading reassembly_queue_length and calling 1834 + * smbdirect_connection_reassembly_first_recv_io. This call is lock free 1835 + * as we never read at the end of the queue which are being 1836 + * updated in SOFTIRQ as more data is received 1837 + */ 1838 + virt_rmb(); 1839 + queue_length = sc->recv_io.reassembly.queue_length; 1840 + data_read = 0; 1841 + to_read = size; 1842 + offset = sc->recv_io.reassembly.first_entry_offset; 1843 + while (data_read < size) { 1844 + response = smbdirect_connection_reassembly_first_recv_io(sc); 1845 + data_transfer = (void *)response->packet; 1846 + data_length = le32_to_cpu(data_transfer->data_length); 1847 + remaining_data_length = 1848 + le32_to_cpu( 1849 + data_transfer->remaining_data_length); 1850 + data_offset = le32_to_cpu(data_transfer->data_offset); 1851 + 1852 + /* 1853 + * The upper layer expects RFC1002 length at the 1854 + * beginning of the payload. Return it to indicate 1855 + * the total length of the packet. This minimize the 1856 + * change to upper layer packet processing logic. This 1857 + * will be eventually remove when an intermediate 1858 + * transport layer is added 1859 + */ 1860 + if (response->first_segment && size == 4) { 1861 + unsigned int rfc1002_len = 1862 + data_length + remaining_data_length; 1863 + __be32 rfc1002_hdr = cpu_to_be32(rfc1002_len); 1864 + 1865 + if (copy_to_iter(&rfc1002_hdr, sizeof(rfc1002_hdr), 1866 + &msg->msg_iter) != sizeof(rfc1002_hdr)) 1867 + return -EFAULT; 1868 + data_read = 4; 1869 + response->first_segment = false; 1870 + smbdirect_log_read(sc, SMBDIRECT_LOG_INFO, 1871 + "returning rfc1002 length %d\n", 1872 + rfc1002_len); 1873 + goto read_rfc1002_done; 1874 + } 1875 + 1876 + to_copy = min_t(int, data_length - offset, to_read); 1877 + if (copy_to_iter((u8 *)data_transfer + data_offset + offset, 1878 + to_copy, &msg->msg_iter) != to_copy) 1879 + return -EFAULT; 1880 + 1881 + /* move on to the next buffer? */ 1882 + if (to_copy == data_length - offset) { 1883 + queue_length--; 1884 + /* 1885 + * No need to lock if we are not at the 1886 + * end of the queue 1887 + */ 1888 + if (queue_length) 1889 + list_del(&response->list); 1890 + else { 1891 + spin_lock_irqsave( 1892 + &sc->recv_io.reassembly.lock, flags); 1893 + list_del(&response->list); 1894 + spin_unlock_irqrestore( 1895 + &sc->recv_io.reassembly.lock, flags); 1896 + } 1897 + queue_removed++; 1898 + sc->statistics.dequeue_reassembly_queue++; 1899 + smbdirect_connection_put_recv_io(response); 1900 + offset = 0; 1901 + smbdirect_log_read(sc, SMBDIRECT_LOG_INFO, 1902 + "smbdirect_connection_put_recv_io offset=0\n"); 1903 + } else 1904 + offset += to_copy; 1905 + 1906 + to_read -= to_copy; 1907 + data_read += to_copy; 1908 + 1909 + smbdirect_log_read(sc, SMBDIRECT_LOG_INFO, 1910 + "memcpy %d bytes len-ofs=%u => todo=%u done=%u ofs=%u\n", 1911 + to_copy, data_length - offset, 1912 + to_read, data_read, offset); 1913 + } 1914 + 1915 + spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); 1916 + sc->recv_io.reassembly.data_length -= data_read; 1917 + sc->recv_io.reassembly.queue_length -= queue_removed; 1918 + spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); 1919 + 1920 + sc->recv_io.reassembly.first_entry_offset = offset; 1921 + smbdirect_log_read(sc, SMBDIRECT_LOG_INFO, 1922 + "returning data_read=%d reassembly_length=%d first_ofs=%u\n", 1923 + data_read, sc->recv_io.reassembly.data_length, 1924 + sc->recv_io.reassembly.first_entry_offset); 1925 + read_rfc1002_done: 1926 + return data_read; 1927 + } 1928 + 1929 + smbdirect_log_read(sc, SMBDIRECT_LOG_INFO, 1930 + "wait_event on more data\n"); 1931 + ret = wait_event_interruptible(sc->recv_io.reassembly.wait_queue, 1932 + sc->recv_io.reassembly.data_length >= size || 1933 + sc->status != SMBDIRECT_SOCKET_CONNECTED); 1934 + /* Don't return any data if interrupted */ 1935 + if (ret) 1936 + return ret; 1937 + 1938 + goto again; 1939 + } 1940 + __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_recvmsg); 1941 + 1942 + static bool smbdirect_map_sges_single_page(struct smbdirect_map_sges *state, 1943 + struct page *page, size_t off, size_t len) 1944 + { 1945 + struct ib_sge *sge; 1946 + u64 addr; 1947 + 1948 + if (state->num_sge >= state->max_sge) 1949 + return false; 1950 + 1951 + addr = ib_dma_map_page(state->device, page, 1952 + off, len, state->direction); 1953 + if (ib_dma_mapping_error(state->device, addr)) 1954 + return false; 1955 + 1956 + sge = &state->sge[state->num_sge++]; 1957 + sge->addr = addr; 1958 + sge->length = len; 1959 + sge->lkey = state->local_dma_lkey; 1960 + 1961 + return true; 1962 + } 1963 + 1964 + /* 1965 + * Extract page fragments from a BVEC-class iterator and add them to an ib_sge 1966 + * list. The pages are not pinned. 1967 + */ 1968 + static ssize_t smbdirect_map_sges_from_bvec(struct iov_iter *iter, 1969 + struct smbdirect_map_sges *state, 1970 + ssize_t maxsize) 1971 + { 1972 + const struct bio_vec *bv = iter->bvec; 1973 + unsigned long start = iter->iov_offset; 1974 + unsigned int i; 1975 + ssize_t ret = 0; 1976 + 1977 + for (i = 0; i < iter->nr_segs; i++) { 1978 + size_t off, len; 1979 + bool ok; 1980 + 1981 + len = bv[i].bv_len; 1982 + if (start >= len) { 1983 + start -= len; 1984 + continue; 1985 + } 1986 + 1987 + len = min_t(size_t, maxsize, len - start); 1988 + off = bv[i].bv_offset + start; 1989 + 1990 + ok = smbdirect_map_sges_single_page(state, 1991 + bv[i].bv_page, 1992 + off, 1993 + len); 1994 + if (!ok) 1995 + return -EIO; 1996 + 1997 + ret += len; 1998 + maxsize -= len; 1999 + if (state->num_sge >= state->max_sge || maxsize <= 0) 2000 + break; 2001 + start = 0; 2002 + } 2003 + 2004 + if (ret > 0) 2005 + iov_iter_advance(iter, ret); 2006 + return ret; 2007 + } 2008 + 2009 + /* 2010 + * Extract fragments from a KVEC-class iterator and add them to an ib_sge list. 2011 + * This can deal with vmalloc'd buffers as well as kmalloc'd or static buffers. 2012 + * The pages are not pinned. 2013 + */ 2014 + static ssize_t smbdirect_map_sges_from_kvec(struct iov_iter *iter, 2015 + struct smbdirect_map_sges *state, 2016 + ssize_t maxsize) 2017 + { 2018 + const struct kvec *kv = iter->kvec; 2019 + unsigned long start = iter->iov_offset; 2020 + unsigned int i; 2021 + ssize_t ret = 0; 2022 + 2023 + for (i = 0; i < iter->nr_segs; i++) { 2024 + struct page *page; 2025 + unsigned long kaddr; 2026 + size_t off, len, seg; 2027 + 2028 + len = kv[i].iov_len; 2029 + if (start >= len) { 2030 + start -= len; 2031 + continue; 2032 + } 2033 + 2034 + kaddr = (unsigned long)kv[i].iov_base + start; 2035 + off = kaddr & ~PAGE_MASK; 2036 + len = min_t(size_t, maxsize, len - start); 2037 + kaddr &= PAGE_MASK; 2038 + 2039 + maxsize -= len; 2040 + do { 2041 + bool ok; 2042 + 2043 + seg = min_t(size_t, len, PAGE_SIZE - off); 2044 + 2045 + if (is_vmalloc_or_module_addr((void *)kaddr)) 2046 + page = vmalloc_to_page((void *)kaddr); 2047 + else 2048 + page = virt_to_page((void *)kaddr); 2049 + 2050 + ok = smbdirect_map_sges_single_page(state, page, off, seg); 2051 + if (!ok) 2052 + return -EIO; 2053 + 2054 + ret += seg; 2055 + len -= seg; 2056 + kaddr += PAGE_SIZE; 2057 + off = 0; 2058 + } while (len > 0 && state->num_sge < state->max_sge); 2059 + 2060 + if (state->num_sge >= state->max_sge || maxsize <= 0) 2061 + break; 2062 + start = 0; 2063 + } 2064 + 2065 + if (ret > 0) 2066 + iov_iter_advance(iter, ret); 2067 + return ret; 2068 + } 2069 + 2070 + /* 2071 + * Extract folio fragments from a FOLIOQ-class iterator and add them to an 2072 + * ib_sge list. The folios are not pinned. 2073 + */ 2074 + static ssize_t smbdirect_map_sges_from_folioq(struct iov_iter *iter, 2075 + struct smbdirect_map_sges *state, 2076 + ssize_t maxsize) 2077 + { 2078 + const struct folio_queue *folioq = iter->folioq; 2079 + unsigned int slot = iter->folioq_slot; 2080 + ssize_t ret = 0; 2081 + size_t offset = iter->iov_offset; 2082 + 2083 + if (WARN_ON_ONCE(!folioq)) 2084 + return -EIO; 2085 + 2086 + if (slot >= folioq_nr_slots(folioq)) { 2087 + folioq = folioq->next; 2088 + if (WARN_ON_ONCE(!folioq)) 2089 + return -EIO; 2090 + slot = 0; 2091 + } 2092 + 2093 + do { 2094 + struct folio *folio = folioq_folio(folioq, slot); 2095 + size_t fsize = folioq_folio_size(folioq, slot); 2096 + 2097 + if (offset < fsize) { 2098 + size_t part = umin(maxsize, fsize - offset); 2099 + bool ok; 2100 + 2101 + ok = smbdirect_map_sges_single_page(state, 2102 + folio_page(folio, 0), 2103 + offset, 2104 + part); 2105 + if (!ok) 2106 + return -EIO; 2107 + 2108 + offset += part; 2109 + ret += part; 2110 + maxsize -= part; 2111 + } 2112 + 2113 + if (offset >= fsize) { 2114 + offset = 0; 2115 + slot++; 2116 + if (slot >= folioq_nr_slots(folioq)) { 2117 + if (!folioq->next) { 2118 + WARN_ON_ONCE(ret < iter->count); 2119 + break; 2120 + } 2121 + folioq = folioq->next; 2122 + slot = 0; 2123 + } 2124 + } 2125 + } while (state->num_sge < state->max_sge && maxsize > 0); 2126 + 2127 + iter->folioq = folioq; 2128 + iter->folioq_slot = slot; 2129 + iter->iov_offset = offset; 2130 + iter->count -= ret; 2131 + return ret; 2132 + } 2133 + 2134 + /* 2135 + * Extract page fragments from up to the given amount of the source iterator 2136 + * and build up an ib_sge list that refers to all of those bits. The ib_sge list 2137 + * is appended to, up to the maximum number of elements set in the parameter 2138 + * block. 2139 + * 2140 + * The extracted page fragments are not pinned or ref'd in any way; if an 2141 + * IOVEC/UBUF-type iterator is to be used, it should be converted to a 2142 + * BVEC-type iterator and the pages pinned, ref'd or otherwise held in some 2143 + * way. 2144 + */ 2145 + static ssize_t smbdirect_map_sges_from_iter(struct iov_iter *iter, size_t len, 2146 + struct smbdirect_map_sges *state) 2147 + { 2148 + ssize_t ret; 2149 + size_t before = state->num_sge; 2150 + 2151 + if (WARN_ON_ONCE(iov_iter_rw(iter) != ITER_SOURCE)) 2152 + return -EIO; 2153 + 2154 + switch (iov_iter_type(iter)) { 2155 + case ITER_BVEC: 2156 + ret = smbdirect_map_sges_from_bvec(iter, state, len); 2157 + break; 2158 + case ITER_KVEC: 2159 + ret = smbdirect_map_sges_from_kvec(iter, state, len); 2160 + break; 2161 + case ITER_FOLIOQ: 2162 + ret = smbdirect_map_sges_from_folioq(iter, state, len); 2163 + break; 2164 + default: 2165 + WARN_ONCE(1, "iov_iter_type[%u]\n", iov_iter_type(iter)); 2166 + return -EIO; 2167 + } 2168 + 2169 + if (ret < 0) { 2170 + while (state->num_sge > before) { 2171 + struct ib_sge *sge = &state->sge[state->num_sge--]; 2172 + 2173 + ib_dma_unmap_page(state->device, 2174 + sge->addr, 2175 + sge->length, 2176 + state->direction); 2177 + } 2178 + } 2179 + 2180 + return ret; 2181 + }
+88
fs/smb/common/smbdirect/smbdirect_debug.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* 3 + * Copyright (C) 2017, Microsoft Corporation. 4 + * Copyright (c) 2025, Stefan Metzmacher 5 + */ 6 + 7 + #include "smbdirect_internal.h" 8 + #include <linux/seq_file.h> 9 + 10 + void smbdirect_connection_legacy_debug_proc_show(struct smbdirect_socket *sc, 11 + unsigned int rdma_readwrite_threshold, 12 + struct seq_file *m) 13 + { 14 + const struct smbdirect_socket_parameters *sp; 15 + 16 + if (!sc) 17 + return; 18 + sp = &sc->parameters; 19 + 20 + seq_puts(m, "\n"); 21 + seq_printf(m, "SMBDirect protocol version: 0x%x ", 22 + SMBDIRECT_V1); 23 + seq_printf(m, "transport status: %s (%u)", 24 + smbdirect_socket_status_string(sc->status), 25 + sc->status); 26 + 27 + seq_puts(m, "\n"); 28 + seq_printf(m, "Conn receive_credit_max: %u ", 29 + sp->recv_credit_max); 30 + seq_printf(m, "send_credit_target: %u max_send_size: %u", 31 + sp->send_credit_target, 32 + sp->max_send_size); 33 + 34 + seq_puts(m, "\n"); 35 + seq_printf(m, "Conn max_fragmented_recv_size: %u ", 36 + sp->max_fragmented_recv_size); 37 + seq_printf(m, "max_fragmented_send_size: %u max_receive_size:%u", 38 + sp->max_fragmented_send_size, 39 + sp->max_recv_size); 40 + 41 + seq_puts(m, "\n"); 42 + seq_printf(m, "Conn keep_alive_interval: %u ", 43 + sp->keepalive_interval_msec * 1000); 44 + seq_printf(m, "max_readwrite_size: %u rdma_readwrite_threshold: %u", 45 + sp->max_read_write_size, 46 + rdma_readwrite_threshold); 47 + 48 + seq_puts(m, "\n"); 49 + seq_printf(m, "Debug count_get_receive_buffer: %llu ", 50 + sc->statistics.get_receive_buffer); 51 + seq_printf(m, "count_put_receive_buffer: %llu count_send_empty: %llu", 52 + sc->statistics.put_receive_buffer, 53 + sc->statistics.send_empty); 54 + 55 + seq_puts(m, "\n"); 56 + seq_printf(m, "Read Queue count_enqueue_reassembly_queue: %llu ", 57 + sc->statistics.enqueue_reassembly_queue); 58 + seq_printf(m, "count_dequeue_reassembly_queue: %llu ", 59 + sc->statistics.dequeue_reassembly_queue); 60 + seq_printf(m, "reassembly_data_length: %u ", 61 + sc->recv_io.reassembly.data_length); 62 + seq_printf(m, "reassembly_queue_length: %u", 63 + sc->recv_io.reassembly.queue_length); 64 + 65 + seq_puts(m, "\n"); 66 + seq_printf(m, "Current Credits send_credits: %u ", 67 + atomic_read(&sc->send_io.credits.count)); 68 + seq_printf(m, "receive_credits: %u receive_credit_target: %u", 69 + atomic_read(&sc->recv_io.credits.count), 70 + sc->recv_io.credits.target); 71 + 72 + seq_puts(m, "\n"); 73 + seq_printf(m, "Pending send_pending: %u ", 74 + atomic_read(&sc->send_io.pending.count)); 75 + 76 + seq_puts(m, "\n"); 77 + seq_printf(m, "MR responder_resources: %u ", 78 + sp->responder_resources); 79 + seq_printf(m, "max_frmr_depth: %u mr_type: 0x%x", 80 + sp->max_frmr_depth, 81 + sc->mr_io.type); 82 + 83 + seq_puts(m, "\n"); 84 + seq_printf(m, "MR mr_ready_count: %u mr_used_count: %u", 85 + atomic_read(&sc->mr_io.ready.count), 86 + atomic_read(&sc->mr_io.used.count)); 87 + } 88 + __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_legacy_debug_proc_show);
+277
fs/smb/common/smbdirect/smbdirect_devices.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* 3 + * Copyright (C) 2017, Microsoft Corporation. 4 + * Copyright (C) 2018, LG Electronics. 5 + * Copyright (c) 2025 Stefan Metzmacher 6 + */ 7 + 8 + #include "smbdirect_internal.h" 9 + 10 + static u8 smbdirect_ib_device_rdma_capable_node_type(struct ib_device *ib_dev) 11 + { 12 + if (!smbdirect_frwr_is_supported(&ib_dev->attrs)) 13 + return RDMA_NODE_UNSPECIFIED; 14 + 15 + switch (ib_dev->node_type) { 16 + case RDMA_NODE_IB_CA: /* Infiniband, RoCE v1 and v2 */ 17 + case RDMA_NODE_RNIC: /* iWarp */ 18 + return ib_dev->node_type; 19 + } 20 + 21 + return RDMA_NODE_UNSPECIFIED; 22 + } 23 + 24 + static int smbdirect_ib_client_add(struct ib_device *ib_dev) 25 + { 26 + u8 node_type = smbdirect_ib_device_rdma_capable_node_type(ib_dev); 27 + struct smbdirect_device *sdev; 28 + const char *node_str; 29 + const char *action; 30 + u32 pidx; 31 + 32 + switch (node_type) { 33 + case RDMA_NODE_IB_CA: 34 + node_str = "IB_CA"; 35 + action = "added"; 36 + break; 37 + case RDMA_NODE_RNIC: 38 + node_str = "RNIC"; 39 + action = "added"; 40 + break; 41 + case RDMA_NODE_UNSPECIFIED: 42 + node_str = "UNSPECIFIED"; 43 + action = "ignored"; 44 + break; 45 + default: 46 + node_str = "UNKNOWN"; 47 + action = "ignored"; 48 + node_type = RDMA_NODE_UNSPECIFIED; 49 + break; 50 + } 51 + 52 + pr_info("ib_dev[%.*s]: %s: %s %s=%u %s=0x%llx %s=0x%llx %s=0x%llx\n", 53 + IB_DEVICE_NAME_MAX, 54 + ib_dev->name, 55 + action, 56 + node_str, 57 + "max_fast_reg_page_list_len", 58 + ib_dev->attrs.max_fast_reg_page_list_len, 59 + "device_cap_flags", 60 + ib_dev->attrs.device_cap_flags, 61 + "kernel_cap_flags", 62 + ib_dev->attrs.kernel_cap_flags, 63 + "page_size_cap", 64 + ib_dev->attrs.page_size_cap); 65 + 66 + if (node_type == RDMA_NODE_UNSPECIFIED) 67 + return 0; 68 + 69 + pr_info("ib_dev[%.*s]: %s=%u %s=%u %s=%u %s=%u %s=%u %s=%u %s=%u %s=%u %s=%u\n", 70 + IB_DEVICE_NAME_MAX, 71 + ib_dev->name, 72 + "num_ports", 73 + rdma_end_port(ib_dev), 74 + "max_qp_rd_atom", 75 + ib_dev->attrs.max_qp_rd_atom, 76 + "max_qp_init_rd_atom", 77 + ib_dev->attrs.max_qp_init_rd_atom, 78 + "max_sgl_rd", 79 + ib_dev->attrs.max_sgl_rd, 80 + "max_sge_rd", 81 + ib_dev->attrs.max_sge_rd, 82 + "max_cqe", 83 + ib_dev->attrs.max_cqe, 84 + "max_qp_wr", 85 + ib_dev->attrs.max_qp_wr, 86 + "max_send_sge", 87 + ib_dev->attrs.max_send_sge, 88 + "max_recv_sge", 89 + ib_dev->attrs.max_recv_sge); 90 + 91 + rdma_for_each_port(ib_dev, pidx) { 92 + const struct ib_port_immutable *ib_pi = 93 + ib_port_immutable_read(ib_dev, pidx); 94 + u32 core_cap_flags = ib_pi ? ib_pi->core_cap_flags : 0; 95 + 96 + pr_info("ib_dev[%.*s]PORT[%u]: %s=%u %s=%u %s=%u %s=%u %s=%u %s=0x%x\n", 97 + IB_DEVICE_NAME_MAX, 98 + ib_dev->name, 99 + pidx, 100 + "iwarp", 101 + rdma_protocol_iwarp(ib_dev, pidx), 102 + "ib", 103 + rdma_protocol_ib(ib_dev, pidx), 104 + "roce", 105 + rdma_protocol_roce(ib_dev, pidx), 106 + "v1", 107 + rdma_protocol_roce_eth_encap(ib_dev, pidx), 108 + "v2", 109 + rdma_protocol_roce_udp_encap(ib_dev, pidx), 110 + "core_cap_flags", 111 + core_cap_flags); 112 + } 113 + 114 + sdev = kzalloc_obj(*sdev); 115 + if (!sdev) 116 + return -ENOMEM; 117 + sdev->ib_dev = ib_dev; 118 + snprintf(sdev->ib_name, ARRAY_SIZE(sdev->ib_name), "%.*s", 119 + IB_DEVICE_NAME_MAX, ib_dev->name); 120 + 121 + write_lock(&smbdirect_globals.devices.lock); 122 + list_add(&sdev->list, &smbdirect_globals.devices.list); 123 + write_unlock(&smbdirect_globals.devices.lock); 124 + 125 + return 0; 126 + } 127 + 128 + static void smbdirect_ib_client_remove(struct ib_device *ib_dev, void *client_data) 129 + { 130 + struct smbdirect_device *sdev, *tmp; 131 + 132 + write_lock(&smbdirect_globals.devices.lock); 133 + list_for_each_entry_safe(sdev, tmp, &smbdirect_globals.devices.list, list) { 134 + if (sdev->ib_dev == ib_dev) { 135 + list_del(&sdev->list); 136 + pr_info("ib_dev[%.*s] removed\n", 137 + IB_DEVICE_NAME_MAX, sdev->ib_name); 138 + kfree(sdev); 139 + break; 140 + } 141 + } 142 + write_unlock(&smbdirect_globals.devices.lock); 143 + } 144 + 145 + static void smbdirect_ib_client_rename(struct ib_device *ib_dev, void *client_data) 146 + { 147 + struct smbdirect_device *sdev; 148 + 149 + write_lock(&smbdirect_globals.devices.lock); 150 + list_for_each_entry(sdev, &smbdirect_globals.devices.list, list) { 151 + if (sdev->ib_dev == ib_dev) { 152 + pr_info("ib_dev[%.*s] renamed to [%.*s]\n", 153 + IB_DEVICE_NAME_MAX, sdev->ib_name, 154 + IB_DEVICE_NAME_MAX, ib_dev->name); 155 + snprintf(sdev->ib_name, ARRAY_SIZE(sdev->ib_name), "%.*s", 156 + IB_DEVICE_NAME_MAX, ib_dev->name); 157 + break; 158 + } 159 + } 160 + write_unlock(&smbdirect_globals.devices.lock); 161 + } 162 + 163 + static struct ib_client smbdirect_ib_client = { 164 + .name = "smbdirect_ib_client", 165 + .add = smbdirect_ib_client_add, 166 + .remove = smbdirect_ib_client_remove, 167 + .rename = smbdirect_ib_client_rename, 168 + }; 169 + 170 + static u8 smbdirect_netdev_find_rdma_capable_node_type(struct net_device *netdev) 171 + { 172 + struct smbdirect_device *sdev; 173 + u8 node_type = RDMA_NODE_UNSPECIFIED; 174 + 175 + read_lock(&smbdirect_globals.devices.lock); 176 + list_for_each_entry(sdev, &smbdirect_globals.devices.list, list) { 177 + u32 pi; 178 + 179 + rdma_for_each_port(sdev->ib_dev, pi) { 180 + struct net_device *ndev; 181 + 182 + ndev = ib_device_get_netdev(sdev->ib_dev, pi); 183 + if (!ndev) 184 + continue; 185 + 186 + if (ndev == netdev) { 187 + dev_put(ndev); 188 + node_type = sdev->ib_dev->node_type; 189 + goto out; 190 + } 191 + dev_put(ndev); 192 + } 193 + } 194 + out: 195 + read_unlock(&smbdirect_globals.devices.lock); 196 + 197 + if (node_type == RDMA_NODE_UNSPECIFIED) { 198 + struct ib_device *ibdev; 199 + 200 + ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_UNKNOWN); 201 + if (ibdev) { 202 + node_type = smbdirect_ib_device_rdma_capable_node_type(ibdev); 203 + ib_device_put(ibdev); 204 + } 205 + } 206 + 207 + return node_type; 208 + } 209 + 210 + /* 211 + * Returns RDMA_NODE_UNSPECIFIED when the netdev has 212 + * no support for smbdirect capable rdma. 213 + * 214 + * Otherwise RDMA_NODE_RNIC is returned for iwarp devices 215 + * and RDMA_NODE_IB_CA or Infiniband and RoCE (v1 and v2) 216 + */ 217 + u8 smbdirect_netdev_rdma_capable_node_type(struct net_device *netdev) 218 + { 219 + struct net_device *lower_dev; 220 + struct list_head *iter; 221 + u8 node_type = RDMA_NODE_UNSPECIFIED; 222 + 223 + node_type = smbdirect_netdev_find_rdma_capable_node_type(netdev); 224 + if (node_type != RDMA_NODE_UNSPECIFIED) 225 + return node_type; 226 + 227 + /* check if netdev is bridge or VLAN */ 228 + if (netif_is_bridge_master(netdev) || netdev->priv_flags & IFF_802_1Q_VLAN) 229 + netdev_for_each_lower_dev(netdev, lower_dev, iter) { 230 + node_type = smbdirect_netdev_find_rdma_capable_node_type(lower_dev); 231 + if (node_type != RDMA_NODE_UNSPECIFIED) 232 + return node_type; 233 + } 234 + 235 + /* check if netdev is IPoIB safely without layer violation */ 236 + if (netdev->type == ARPHRD_INFINIBAND) 237 + return RDMA_NODE_IB_CA; 238 + 239 + return RDMA_NODE_UNSPECIFIED; 240 + } 241 + __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_netdev_rdma_capable_node_type); 242 + 243 + __init int smbdirect_devices_init(void) 244 + { 245 + int ret; 246 + 247 + rwlock_init(&smbdirect_globals.devices.lock); 248 + INIT_LIST_HEAD(&smbdirect_globals.devices.list); 249 + 250 + ret = ib_register_client(&smbdirect_ib_client); 251 + if (ret) { 252 + pr_crit("failed to ib_register_client: %d %1pe\n", 253 + ret, SMBDIRECT_DEBUG_ERR_PTR(ret)); 254 + return ret; 255 + } 256 + 257 + return 0; 258 + } 259 + 260 + __exit void smbdirect_devices_exit(void) 261 + { 262 + struct smbdirect_device *sdev, *tmp; 263 + 264 + /* 265 + * On exist we just cleanup so that 266 + * smbdirect_ib_client_remove() won't 267 + * print removals of devices. 268 + */ 269 + write_lock(&smbdirect_globals.devices.lock); 270 + list_for_each_entry_safe(sdev, tmp, &smbdirect_globals.devices.list, list) { 271 + list_del(&sdev->list); 272 + kfree(sdev); 273 + } 274 + write_unlock(&smbdirect_globals.devices.lock); 275 + 276 + ib_unregister_client(&smbdirect_ib_client); 277 + }
+141
fs/smb/common/smbdirect/smbdirect_internal.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 + /* 3 + * Copyright (c) 2025, Stefan Metzmacher 4 + */ 5 + 6 + #ifndef __FS_SMB_COMMON_SMBDIRECT_INTERNAL_H__ 7 + #define __FS_SMB_COMMON_SMBDIRECT_INTERNAL_H__ 8 + 9 + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 10 + 11 + #include "smbdirect.h" 12 + #include "smbdirect_pdu.h" 13 + #include "smbdirect_public.h" 14 + 15 + #include <linux/mutex.h> 16 + 17 + struct smbdirect_module_state { 18 + struct mutex mutex; 19 + 20 + struct { 21 + struct workqueue_struct *accept; 22 + struct workqueue_struct *connect; 23 + struct workqueue_struct *idle; 24 + struct workqueue_struct *refill; 25 + struct workqueue_struct *immediate; 26 + struct workqueue_struct *cleanup; 27 + } workqueues; 28 + 29 + struct { 30 + rwlock_t lock; 31 + struct list_head list; 32 + } devices; 33 + }; 34 + 35 + extern struct smbdirect_module_state smbdirect_globals; 36 + 37 + #include "smbdirect_socket.h" 38 + 39 + struct smbdirect_device { 40 + struct list_head list; 41 + struct ib_device *ib_dev; 42 + /* 43 + * copy of ib_dev->name, 44 + * in order to print renames 45 + */ 46 + char ib_name[IB_DEVICE_NAME_MAX]; 47 + }; 48 + 49 + int smbdirect_socket_init_new(struct net *net, struct smbdirect_socket *sc); 50 + 51 + int smbdirect_socket_init_accepting(struct rdma_cm_id *id, struct smbdirect_socket *sc); 52 + 53 + void __smbdirect_socket_schedule_cleanup(struct smbdirect_socket *sc, 54 + const char *macro_name, 55 + unsigned int lvl, 56 + const char *func, 57 + unsigned int line, 58 + int error, 59 + enum smbdirect_socket_status *force_status); 60 + #define smbdirect_socket_schedule_cleanup(__sc, __error) \ 61 + __smbdirect_socket_schedule_cleanup(__sc, \ 62 + "smbdirect_socket_schedule_cleanup", SMBDIRECT_LOG_ERR, \ 63 + __func__, __LINE__, __error, NULL) 64 + #define smbdirect_socket_schedule_cleanup_lvl(__sc, __lvl, __error) \ 65 + __smbdirect_socket_schedule_cleanup(__sc, \ 66 + "smbdirect_socket_schedule_cleanup_lvl", __lvl, \ 67 + __func__, __LINE__, __error, NULL) 68 + #define smbdirect_socket_schedule_cleanup_status(__sc, __lvl, __error, __status) do { \ 69 + enum smbdirect_socket_status __force_status = __status; \ 70 + __smbdirect_socket_schedule_cleanup(__sc, \ 71 + "smbdirect_socket_schedule_cleanup_status", __lvl, \ 72 + __func__, __LINE__, __error, &__force_status); \ 73 + } while (0) 74 + 75 + void smbdirect_socket_destroy_sync(struct smbdirect_socket *sc); 76 + 77 + int smbdirect_socket_wait_for_credits(struct smbdirect_socket *sc, 78 + enum smbdirect_socket_status expected_status, 79 + int unexpected_errno, 80 + wait_queue_head_t *waitq, 81 + atomic_t *total_credits, 82 + int needed); 83 + 84 + void smbdirect_connection_rdma_established(struct smbdirect_socket *sc); 85 + 86 + void smbdirect_connection_negotiation_done(struct smbdirect_socket *sc); 87 + 88 + int smbdirect_connection_create_qp(struct smbdirect_socket *sc); 89 + 90 + void smbdirect_connection_destroy_qp(struct smbdirect_socket *sc); 91 + 92 + int smbdirect_connection_create_mem_pools(struct smbdirect_socket *sc); 93 + 94 + void smbdirect_connection_destroy_mem_pools(struct smbdirect_socket *sc); 95 + 96 + struct smbdirect_send_io *smbdirect_connection_alloc_send_io(struct smbdirect_socket *sc); 97 + 98 + void smbdirect_connection_free_send_io(struct smbdirect_send_io *msg); 99 + 100 + struct smbdirect_recv_io *smbdirect_connection_get_recv_io(struct smbdirect_socket *sc); 101 + 102 + void smbdirect_connection_put_recv_io(struct smbdirect_recv_io *msg); 103 + 104 + void smbdirect_connection_reassembly_append_recv_io(struct smbdirect_socket *sc, 105 + struct smbdirect_recv_io *msg, 106 + u32 data_length); 107 + 108 + struct smbdirect_recv_io * 109 + smbdirect_connection_reassembly_first_recv_io(struct smbdirect_socket *sc); 110 + 111 + void smbdirect_connection_negotiate_rdma_resources(struct smbdirect_socket *sc, 112 + u8 peer_initiator_depth, 113 + u8 peer_responder_resources, 114 + const struct rdma_conn_param *param); 115 + 116 + void smbdirect_connection_idle_timer_work(struct work_struct *work); 117 + 118 + u16 smbdirect_connection_grant_recv_credits(struct smbdirect_socket *sc); 119 + 120 + int smbdirect_connection_post_send_wr(struct smbdirect_socket *sc, 121 + struct ib_send_wr *wr); 122 + 123 + int smbdirect_connection_post_recv_io(struct smbdirect_recv_io *msg); 124 + 125 + void smbdirect_connection_recv_io_done(struct ib_cq *cq, struct ib_wc *wc); 126 + 127 + int smbdirect_connection_recv_io_refill(struct smbdirect_socket *sc); 128 + 129 + int smbdirect_connection_create_mr_list(struct smbdirect_socket *sc); 130 + 131 + void smbdirect_connection_destroy_mr_list(struct smbdirect_socket *sc); 132 + 133 + int smbdirect_accept_connect_request(struct smbdirect_socket *sc, 134 + const struct rdma_conn_param *param); 135 + 136 + void smbdirect_accept_negotiate_finish(struct smbdirect_socket *sc, u32 ntstatus); 137 + 138 + __init int smbdirect_devices_init(void); 139 + __exit void smbdirect_devices_exit(void); 140 + 141 + #endif /* __FS_SMB_COMMON_SMBDIRECT_INTERNAL_H__ */
+308
fs/smb/common/smbdirect/smbdirect_listen.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* 3 + * Copyright (C) 2017, Microsoft Corporation. 4 + * Copyright (C) 2018, LG Electronics. 5 + * Copyright (c) 2025, Stefan Metzmacher 6 + */ 7 + 8 + #include "smbdirect_internal.h" 9 + 10 + static int smbdirect_listen_rdma_event_handler(struct rdma_cm_id *id, 11 + struct rdma_cm_event *event); 12 + 13 + int smbdirect_socket_listen(struct smbdirect_socket *sc, int backlog) 14 + { 15 + int ret; 16 + 17 + if (backlog < 0) 18 + return -EINVAL; 19 + if (!backlog) 20 + backlog = 1; /* use 1 as default for now */ 21 + 22 + if (sc->first_error) 23 + return -EINVAL; 24 + 25 + if (sc->status != SMBDIRECT_SOCKET_CREATED) 26 + return -EINVAL; 27 + 28 + if (WARN_ON_ONCE(!sc->rdma.cm_id)) 29 + return -EINVAL; 30 + 31 + if (sc->rdma.cm_id->device) 32 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, 33 + "try to listen on addr: %pISpsfc dev: %.*s\n", 34 + &sc->rdma.cm_id->route.addr.src_addr, 35 + IB_DEVICE_NAME_MAX, 36 + sc->rdma.cm_id->device->name); 37 + else 38 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, 39 + "try to listen on addr: %pISpsfc\n", 40 + &sc->rdma.cm_id->route.addr.src_addr); 41 + 42 + /* already checked above */ 43 + WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_CREATED); 44 + sc->status = SMBDIRECT_SOCKET_LISTENING; 45 + sc->rdma.expected_event = RDMA_CM_EVENT_CONNECT_REQUEST; 46 + rdma_lock_handler(sc->rdma.cm_id); 47 + sc->rdma.cm_id->event_handler = smbdirect_listen_rdma_event_handler; 48 + rdma_unlock_handler(sc->rdma.cm_id); 49 + 50 + ret = rdma_listen(sc->rdma.cm_id, backlog); 51 + if (ret) { 52 + sc->first_error = ret; 53 + sc->status = SMBDIRECT_SOCKET_DISCONNECTED; 54 + if (sc->rdma.cm_id->device) 55 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, 56 + "listening failed %1pe on addr: %pISpsfc dev: %.*s\n", 57 + SMBDIRECT_DEBUG_ERR_PTR(ret), 58 + &sc->rdma.cm_id->route.addr.src_addr, 59 + IB_DEVICE_NAME_MAX, 60 + sc->rdma.cm_id->device->name); 61 + else 62 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, 63 + "listening failed %1pe on addr: %pISpsfc\n", 64 + SMBDIRECT_DEBUG_ERR_PTR(ret), 65 + &sc->rdma.cm_id->route.addr.src_addr); 66 + return ret; 67 + } 68 + 69 + /* 70 + * This is a value > 0, checked above, 71 + * so we are able to use sc->listen.backlog == -1, 72 + * as indication that the socket was never 73 + * a listener. 74 + */ 75 + sc->listen.backlog = backlog; 76 + 77 + if (sc->rdma.cm_id->device) 78 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, 79 + "listening on addr: %pISpsfc dev: %.*s\n", 80 + &sc->rdma.cm_id->route.addr.src_addr, 81 + IB_DEVICE_NAME_MAX, 82 + sc->rdma.cm_id->device->name); 83 + else 84 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, 85 + "listening on addr: %pISpsfc\n", 86 + &sc->rdma.cm_id->route.addr.src_addr); 87 + 88 + /* 89 + * The rest happens async via smbdirect_listen_rdma_event_handler() 90 + */ 91 + return 0; 92 + } 93 + __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_listen); 94 + 95 + static int smbdirect_new_rdma_event_handler(struct rdma_cm_id *new_id, 96 + struct rdma_cm_event *event) 97 + { 98 + int ret = -ESTALE; 99 + 100 + /* 101 + * This should be replaced before any real work 102 + * starts! So it should never be called! 103 + */ 104 + 105 + if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) 106 + ret = -ENETDOWN; 107 + if (IS_ERR(SMBDIRECT_DEBUG_ERR_PTR(event->status))) 108 + ret = event->status; 109 + WARN_ONCE(1, 110 + "%s should not be called! event=%s status=%d => ret=%1pe\n", 111 + __func__, 112 + rdma_event_msg(event->event), 113 + event->status, 114 + SMBDIRECT_DEBUG_ERR_PTR(ret)); 115 + return -ESTALE; 116 + } 117 + 118 + static int smbdirect_listen_connect_request(struct smbdirect_socket *lsc, 119 + struct rdma_cm_id *new_id, 120 + const struct rdma_cm_event *event); 121 + 122 + static int smbdirect_listen_rdma_event_handler(struct rdma_cm_id *new_id, 123 + struct rdma_cm_event *event) 124 + { 125 + struct smbdirect_socket *lsc = new_id->context; 126 + int ret; 127 + 128 + if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) { 129 + new_id->context = NULL; 130 + new_id->event_handler = smbdirect_new_rdma_event_handler; 131 + } else 132 + new_id = NULL; 133 + 134 + /* 135 + * cma_cm_event_handler() has 136 + * lockdep_assert_held(&id_priv->handler_mutex); 137 + * 138 + * Mutexes are not allowed in interrupts, 139 + * and we rely on not being in an interrupt here, 140 + * as we might sleep. 141 + */ 142 + WARN_ON_ONCE(in_interrupt()); 143 + 144 + if (event->status || event->event != lsc->rdma.expected_event) { 145 + ret = -ECONNABORTED; 146 + 147 + if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) 148 + ret = -ENETDOWN; 149 + if (IS_ERR(SMBDIRECT_DEBUG_ERR_PTR(event->status))) 150 + ret = event->status; 151 + 152 + smbdirect_log_rdma_event(lsc, SMBDIRECT_LOG_ERR, 153 + "%s (first_error=%1pe, expected=%s) => event=%s status=%d => ret=%1pe\n", 154 + smbdirect_socket_status_string(lsc->status), 155 + SMBDIRECT_DEBUG_ERR_PTR(lsc->first_error), 156 + rdma_event_msg(lsc->rdma.expected_event), 157 + rdma_event_msg(event->event), 158 + event->status, 159 + SMBDIRECT_DEBUG_ERR_PTR(ret)); 160 + 161 + /* 162 + * In case of error return it and let the caller 163 + * destroy new_id 164 + */ 165 + smbdirect_socket_schedule_cleanup(lsc, ret); 166 + return new_id ? ret : 0; 167 + } 168 + 169 + smbdirect_log_rdma_event(lsc, SMBDIRECT_LOG_INFO, 170 + "%s (first_error=%1pe) event=%s\n", 171 + smbdirect_socket_status_string(lsc->status), 172 + SMBDIRECT_DEBUG_ERR_PTR(lsc->first_error), 173 + rdma_event_msg(event->event)); 174 + 175 + /* 176 + * In case of error return it and let the caller 177 + * destroy new_id 178 + */ 179 + if (lsc->first_error) 180 + return new_id ? lsc->first_error : 0; 181 + 182 + switch (event->event) { 183 + case RDMA_CM_EVENT_CONNECT_REQUEST: 184 + WARN_ON_ONCE(lsc->status != SMBDIRECT_SOCKET_LISTENING); 185 + 186 + /* 187 + * In case of error return it and let the caller 188 + * destroy new_id 189 + */ 190 + ret = smbdirect_listen_connect_request(lsc, new_id, event); 191 + if (ret) 192 + return ret; 193 + return 0; 194 + 195 + default: 196 + break; 197 + } 198 + 199 + /* 200 + * This is an internal error 201 + */ 202 + WARN_ON_ONCE(lsc->rdma.expected_event != RDMA_CM_EVENT_CONNECT_REQUEST); 203 + smbdirect_socket_schedule_cleanup(lsc, -EINVAL); 204 + return 0; 205 + } 206 + 207 + static int smbdirect_listen_connect_request(struct smbdirect_socket *lsc, 208 + struct rdma_cm_id *new_id, 209 + const struct rdma_cm_event *event) 210 + { 211 + const struct smbdirect_socket_parameters *lsp = &lsc->parameters; 212 + struct smbdirect_socket *nsc; 213 + unsigned long flags; 214 + size_t backlog = max_t(size_t, 1, lsc->listen.backlog); 215 + size_t psockets; 216 + size_t rsockets; 217 + int ret; 218 + 219 + if (!smbdirect_frwr_is_supported(&new_id->device->attrs)) { 220 + smbdirect_log_rdma_event(lsc, SMBDIRECT_LOG_ERR, 221 + "Fast Registration Work Requests (FRWR) is not supported device %.*s\n", 222 + IB_DEVICE_NAME_MAX, 223 + new_id->device->name); 224 + smbdirect_log_rdma_event(lsc, SMBDIRECT_LOG_ERR, 225 + "Device capability flags = %llx max_fast_reg_page_list_len = %u\n", 226 + new_id->device->attrs.device_cap_flags, 227 + new_id->device->attrs.max_fast_reg_page_list_len); 228 + return -EPROTONOSUPPORT; 229 + } 230 + 231 + if (lsp->flags & SMBDIRECT_FLAG_PORT_RANGE_ONLY_IB && 232 + !rdma_ib_or_roce(new_id->device, new_id->port_num)) { 233 + smbdirect_log_rdma_event(lsc, SMBDIRECT_LOG_ERR, 234 + "Not IB: device: %.*s IW:%u local: %pISpsfc remote: %pISpsfc\n", 235 + IB_DEVICE_NAME_MAX, 236 + new_id->device->name, 237 + rdma_protocol_iwarp(new_id->device, new_id->port_num), 238 + &new_id->route.addr.src_addr, 239 + &new_id->route.addr.dst_addr); 240 + return -EPROTONOSUPPORT; 241 + } 242 + if (lsp->flags & SMBDIRECT_FLAG_PORT_RANGE_ONLY_IW && 243 + !rdma_protocol_iwarp(new_id->device, new_id->port_num)) { 244 + smbdirect_log_rdma_event(lsc, SMBDIRECT_LOG_ERR, 245 + "Not IW: device: %.*s IB:%u local: %pISpsfc remote: %pISpsfc\n", 246 + IB_DEVICE_NAME_MAX, 247 + new_id->device->name, 248 + rdma_ib_or_roce(new_id->device, new_id->port_num), 249 + &new_id->route.addr.src_addr, 250 + &new_id->route.addr.dst_addr); 251 + return -EPROTONOSUPPORT; 252 + } 253 + 254 + spin_lock_irqsave(&lsc->listen.lock, flags); 255 + psockets = list_count_nodes(&lsc->listen.pending); 256 + rsockets = list_count_nodes(&lsc->listen.ready); 257 + spin_unlock_irqrestore(&lsc->listen.lock, flags); 258 + 259 + if (psockets > backlog || 260 + rsockets > backlog || 261 + (psockets + rsockets) > backlog) { 262 + smbdirect_log_rdma_event(lsc, SMBDIRECT_LOG_ERR, 263 + "Backlog[%d][%zu] full pending[%zu] ready[%zu]\n", 264 + lsc->listen.backlog, backlog, psockets, rsockets); 265 + return -EBUSY; 266 + } 267 + 268 + ret = smbdirect_socket_create_accepting(new_id, &nsc); 269 + if (ret) 270 + goto socket_init_failed; 271 + 272 + nsc->logging = lsc->logging; 273 + ret = smbdirect_socket_set_initial_parameters(nsc, &lsc->parameters); 274 + if (ret) 275 + goto set_params_failed; 276 + ret = smbdirect_socket_set_kernel_settings(nsc, 277 + lsc->ib.poll_ctx, 278 + lsc->send_io.mem.gfp_mask); 279 + if (ret) 280 + goto set_settings_failed; 281 + 282 + spin_lock_irqsave(&lsc->listen.lock, flags); 283 + list_add_tail(&nsc->accept.list, &lsc->listen.pending); 284 + nsc->accept.listener = lsc; 285 + spin_unlock_irqrestore(&lsc->listen.lock, flags); 286 + 287 + ret = smbdirect_accept_connect_request(nsc, &event->param.conn); 288 + if (ret) 289 + goto accept_connect_failed; 290 + 291 + return 0; 292 + 293 + accept_connect_failed: 294 + spin_lock_irqsave(&lsc->listen.lock, flags); 295 + list_del_init(&nsc->accept.list); 296 + nsc->accept.listener = NULL; 297 + spin_unlock_irqrestore(&lsc->listen.lock, flags); 298 + set_settings_failed: 299 + set_params_failed: 300 + /* 301 + * The caller will destroy new_id 302 + */ 303 + nsc->ib.dev = NULL; 304 + nsc->rdma.cm_id = NULL; 305 + smbdirect_socket_release(nsc); 306 + socket_init_failed: 307 + return ret; 308 + }
+121
fs/smb/common/smbdirect/smbdirect_main.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* 3 + * Copyright (c) 2025, Stefan Metzmacher 4 + */ 5 + 6 + #include "smbdirect_internal.h" 7 + #include <linux/module.h> 8 + 9 + struct smbdirect_module_state smbdirect_globals = { 10 + .mutex = __MUTEX_INITIALIZER(smbdirect_globals.mutex), 11 + }; 12 + 13 + static __init int smbdirect_module_init(void) 14 + { 15 + int ret = -ENOMEM; 16 + 17 + pr_notice("subsystem loading...\n"); 18 + mutex_lock(&smbdirect_globals.mutex); 19 + 20 + smbdirect_globals.workqueues.accept = alloc_workqueue("smbdirect-accept", 21 + WQ_SYSFS | 22 + WQ_PERCPU | 23 + WQ_POWER_EFFICIENT, 24 + 0); 25 + if (smbdirect_globals.workqueues.accept == NULL) 26 + goto alloc_accept_wq_failed; 27 + 28 + smbdirect_globals.workqueues.connect = alloc_workqueue("smbdirect-connect", 29 + WQ_SYSFS | 30 + WQ_PERCPU | 31 + WQ_POWER_EFFICIENT, 32 + 0); 33 + if (smbdirect_globals.workqueues.connect == NULL) 34 + goto alloc_connect_wq_failed; 35 + 36 + smbdirect_globals.workqueues.idle = alloc_workqueue("smbdirect-idle", 37 + WQ_SYSFS | 38 + WQ_PERCPU | 39 + WQ_POWER_EFFICIENT, 40 + 0); 41 + if (smbdirect_globals.workqueues.idle == NULL) 42 + goto alloc_idle_wq_failed; 43 + 44 + smbdirect_globals.workqueues.refill = alloc_workqueue("smbdirect-refill", 45 + WQ_HIGHPRI | 46 + WQ_SYSFS | 47 + WQ_PERCPU | 48 + WQ_POWER_EFFICIENT, 49 + 0); 50 + if (smbdirect_globals.workqueues.refill == NULL) 51 + goto alloc_refill_wq_failed; 52 + 53 + smbdirect_globals.workqueues.immediate = alloc_workqueue("smbdirect-immediate", 54 + WQ_HIGHPRI | 55 + WQ_SYSFS | 56 + WQ_PERCPU | 57 + WQ_POWER_EFFICIENT, 58 + 0); 59 + if (smbdirect_globals.workqueues.immediate == NULL) 60 + goto alloc_immediate_wq_failed; 61 + 62 + smbdirect_globals.workqueues.cleanup = alloc_workqueue("smbdirect-cleanup", 63 + WQ_MEM_RECLAIM | 64 + WQ_HIGHPRI | 65 + WQ_SYSFS | 66 + WQ_PERCPU | 67 + WQ_POWER_EFFICIENT, 68 + 0); 69 + if (smbdirect_globals.workqueues.cleanup == NULL) 70 + goto alloc_cleanup_wq_failed; 71 + 72 + ret = smbdirect_devices_init(); 73 + if (ret) 74 + goto devices_init_failed; 75 + 76 + mutex_unlock(&smbdirect_globals.mutex); 77 + pr_notice("subsystem loaded\n"); 78 + return 0; 79 + 80 + devices_init_failed: 81 + destroy_workqueue(smbdirect_globals.workqueues.cleanup); 82 + alloc_cleanup_wq_failed: 83 + destroy_workqueue(smbdirect_globals.workqueues.immediate); 84 + alloc_immediate_wq_failed: 85 + destroy_workqueue(smbdirect_globals.workqueues.refill); 86 + alloc_refill_wq_failed: 87 + destroy_workqueue(smbdirect_globals.workqueues.idle); 88 + alloc_idle_wq_failed: 89 + destroy_workqueue(smbdirect_globals.workqueues.connect); 90 + alloc_connect_wq_failed: 91 + destroy_workqueue(smbdirect_globals.workqueues.accept); 92 + alloc_accept_wq_failed: 93 + mutex_unlock(&smbdirect_globals.mutex); 94 + pr_crit("failed to loaded: %d (%1pe)\n", 95 + ret, SMBDIRECT_DEBUG_ERR_PTR(ret)); 96 + return ret; 97 + } 98 + 99 + static __exit void smbdirect_module_exit(void) 100 + { 101 + pr_notice("subsystem unloading...\n"); 102 + mutex_lock(&smbdirect_globals.mutex); 103 + 104 + smbdirect_devices_exit(); 105 + 106 + destroy_workqueue(smbdirect_globals.workqueues.accept); 107 + destroy_workqueue(smbdirect_globals.workqueues.connect); 108 + destroy_workqueue(smbdirect_globals.workqueues.idle); 109 + destroy_workqueue(smbdirect_globals.workqueues.refill); 110 + destroy_workqueue(smbdirect_globals.workqueues.immediate); 111 + destroy_workqueue(smbdirect_globals.workqueues.cleanup); 112 + 113 + mutex_unlock(&smbdirect_globals.mutex); 114 + pr_notice("subsystem unloaded\n"); 115 + } 116 + 117 + module_init(smbdirect_module_init); 118 + module_exit(smbdirect_module_exit); 119 + 120 + MODULE_DESCRIPTION("smbdirect subsystem"); 121 + MODULE_LICENSE("GPL");
+493
fs/smb/common/smbdirect/smbdirect_mr.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* 3 + * Copyright (C) 2017, Microsoft Corporation. 4 + * Copyright (c) 2025, Stefan Metzmacher 5 + */ 6 + 7 + #include "smbdirect_internal.h" 8 + 9 + /* 10 + * Allocate MRs used for RDMA read/write 11 + * The number of MRs will not exceed hardware capability in responder_resources 12 + * All MRs are kept in mr_list. The MR can be recovered after it's used 13 + * Recovery is done in smbd_mr_recovery_work. The content of list entry changes 14 + * as MRs are used and recovered for I/O, but the list links will not change 15 + */ 16 + int smbdirect_connection_create_mr_list(struct smbdirect_socket *sc) 17 + { 18 + const struct smbdirect_socket_parameters *sp = &sc->parameters; 19 + struct smbdirect_mr_io *mr; 20 + int ret; 21 + u32 i; 22 + 23 + if (sp->responder_resources == 0) { 24 + smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 25 + "responder_resources negotiated as 0\n"); 26 + return -EINVAL; 27 + } 28 + 29 + /* Allocate more MRs (2x) than hardware responder_resources */ 30 + for (i = 0; i < sp->responder_resources * 2; i++) { 31 + mr = kzalloc_obj(*mr); 32 + if (!mr) { 33 + ret = -ENOMEM; 34 + goto kzalloc_mr_failed; 35 + } 36 + 37 + kref_init(&mr->kref); 38 + mutex_init(&mr->mutex); 39 + 40 + mr->mr = ib_alloc_mr(sc->ib.pd, 41 + sc->mr_io.type, 42 + sp->max_frmr_depth); 43 + if (IS_ERR(mr->mr)) { 44 + ret = PTR_ERR(mr->mr); 45 + smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 46 + "ib_alloc_mr failed ret=%d (%1pe) type=0x%x max_frmr_depth=%u\n", 47 + ret, SMBDIRECT_DEBUG_ERR_PTR(ret), 48 + sc->mr_io.type, sp->max_frmr_depth); 49 + goto ib_alloc_mr_failed; 50 + } 51 + mr->sgt.sgl = kzalloc_objs(struct scatterlist, sp->max_frmr_depth); 52 + if (!mr->sgt.sgl) { 53 + ret = -ENOMEM; 54 + smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 55 + "failed to allocate sgl, max_frmr_depth=%u\n", 56 + sp->max_frmr_depth); 57 + goto kcalloc_sgl_failed; 58 + } 59 + mr->state = SMBDIRECT_MR_READY; 60 + mr->socket = sc; 61 + 62 + list_add_tail(&mr->list, &sc->mr_io.all.list); 63 + atomic_inc(&sc->mr_io.ready.count); 64 + } 65 + 66 + return 0; 67 + 68 + kcalloc_sgl_failed: 69 + ib_dereg_mr(mr->mr); 70 + ib_alloc_mr_failed: 71 + mutex_destroy(&mr->mutex); 72 + kfree(mr); 73 + kzalloc_mr_failed: 74 + smbdirect_connection_destroy_mr_list(sc); 75 + return ret; 76 + } 77 + 78 + static void smbdirect_mr_io_disable_locked(struct smbdirect_mr_io *mr) 79 + { 80 + struct smbdirect_socket *sc = mr->socket; 81 + 82 + lockdep_assert_held(&mr->mutex); 83 + 84 + if (mr->state == SMBDIRECT_MR_DISABLED) 85 + return; 86 + 87 + if (mr->mr) 88 + ib_dereg_mr(mr->mr); 89 + if (mr->sgt.nents) 90 + ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); 91 + kfree(mr->sgt.sgl); 92 + 93 + mr->mr = NULL; 94 + mr->sgt.sgl = NULL; 95 + mr->sgt.nents = 0; 96 + 97 + mr->state = SMBDIRECT_MR_DISABLED; 98 + } 99 + 100 + static void smbdirect_mr_io_free_locked(struct kref *kref) 101 + { 102 + struct smbdirect_mr_io *mr = 103 + container_of(kref, struct smbdirect_mr_io, kref); 104 + 105 + lockdep_assert_held(&mr->mutex); 106 + 107 + /* 108 + * smbdirect_mr_io_disable_locked() should already be called! 109 + */ 110 + if (WARN_ON_ONCE(mr->state != SMBDIRECT_MR_DISABLED)) 111 + smbdirect_mr_io_disable_locked(mr); 112 + 113 + mutex_unlock(&mr->mutex); 114 + mutex_destroy(&mr->mutex); 115 + kfree(mr); 116 + } 117 + 118 + void smbdirect_connection_destroy_mr_list(struct smbdirect_socket *sc) 119 + { 120 + struct smbdirect_mr_io *mr, *tmp; 121 + LIST_HEAD(all_list); 122 + unsigned long flags; 123 + 124 + spin_lock_irqsave(&sc->mr_io.all.lock, flags); 125 + list_splice_tail_init(&sc->mr_io.all.list, &all_list); 126 + spin_unlock_irqrestore(&sc->mr_io.all.lock, flags); 127 + 128 + list_for_each_entry_safe(mr, tmp, &all_list, list) { 129 + mutex_lock(&mr->mutex); 130 + 131 + smbdirect_mr_io_disable_locked(mr); 132 + list_del(&mr->list); 133 + mr->socket = NULL; 134 + 135 + /* 136 + * No kref_put_mutex() as it's already locked. 137 + * 138 + * If smbdirect_mr_io_free_locked() is called 139 + * and the mutex is unlocked and mr is gone, 140 + * in that case kref_put() returned 1. 141 + * 142 + * If kref_put() returned 0 we know that 143 + * smbdirect_mr_io_free_locked() didn't 144 + * run. Not by us nor by anyone else, as we 145 + * still hold the mutex, so we need to unlock. 146 + * 147 + * If the mr is still registered it will 148 + * be dangling (detached from the connection 149 + * waiting for smbd_deregister_mr() to be 150 + * called in order to free the memory. 151 + */ 152 + if (!kref_put(&mr->kref, smbdirect_mr_io_free_locked)) 153 + mutex_unlock(&mr->mutex); 154 + } 155 + } 156 + 157 + /* 158 + * Get a MR from mr_list. This function waits until there is at least one MR 159 + * available in the list. There may be several CPUs issuing I/O trying to get MR 160 + * at the same time, mr_list_lock is used to protect this situation. 161 + */ 162 + static struct smbdirect_mr_io * 163 + smbdirect_connection_get_mr_io(struct smbdirect_socket *sc) 164 + { 165 + struct smbdirect_mr_io *mr; 166 + unsigned long flags; 167 + int ret; 168 + 169 + again: 170 + ret = wait_event_interruptible(sc->mr_io.ready.wait_queue, 171 + atomic_read(&sc->mr_io.ready.count) || 172 + sc->status != SMBDIRECT_SOCKET_CONNECTED); 173 + if (ret) { 174 + smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 175 + "wait_event_interruptible ret=%d (%1pe)\n", 176 + ret, SMBDIRECT_DEBUG_ERR_PTR(ret)); 177 + return NULL; 178 + } 179 + 180 + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { 181 + smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 182 + "sc->status=%s sc->first_error=%1pe\n", 183 + smbdirect_socket_status_string(sc->status), 184 + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error)); 185 + return NULL; 186 + } 187 + 188 + spin_lock_irqsave(&sc->mr_io.all.lock, flags); 189 + list_for_each_entry(mr, &sc->mr_io.all.list, list) { 190 + if (mr->state == SMBDIRECT_MR_READY) { 191 + mr->state = SMBDIRECT_MR_REGISTERED; 192 + kref_get(&mr->kref); 193 + spin_unlock_irqrestore(&sc->mr_io.all.lock, flags); 194 + atomic_dec(&sc->mr_io.ready.count); 195 + atomic_inc(&sc->mr_io.used.count); 196 + return mr; 197 + } 198 + } 199 + 200 + spin_unlock_irqrestore(&sc->mr_io.all.lock, flags); 201 + /* 202 + * It is possible that we could fail to get MR because other processes may 203 + * try to acquire a MR at the same time. If this is the case, retry it. 204 + */ 205 + goto again; 206 + } 207 + 208 + static void smbdirect_connection_mr_io_register_done(struct ib_cq *cq, struct ib_wc *wc) 209 + { 210 + struct smbdirect_mr_io *mr = 211 + container_of(wc->wr_cqe, struct smbdirect_mr_io, cqe); 212 + struct smbdirect_socket *sc = mr->socket; 213 + 214 + if (wc->status != IB_WC_SUCCESS) { 215 + smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 216 + "wc->status=%s opcode=%d\n", 217 + ib_wc_status_msg(wc->status), wc->opcode); 218 + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); 219 + } 220 + } 221 + 222 + static void smbdirect_connection_mr_io_local_inv_done(struct ib_cq *cq, struct ib_wc *wc) 223 + { 224 + struct smbdirect_mr_io *mr = 225 + container_of(wc->wr_cqe, struct smbdirect_mr_io, cqe); 226 + struct smbdirect_socket *sc = mr->socket; 227 + 228 + mr->state = SMBDIRECT_MR_INVALIDATED; 229 + if (wc->status != IB_WC_SUCCESS) { 230 + smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 231 + "invalidate failed status=%s\n", 232 + ib_wc_status_msg(wc->status)); 233 + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); 234 + } 235 + complete(&mr->invalidate_done); 236 + } 237 + 238 + /* 239 + * Transcribe the pages from an iterator into an MR scatterlist. 240 + */ 241 + static int smbdirect_iter_to_sgt(struct iov_iter *iter, 242 + struct sg_table *sgt, 243 + unsigned int max_sg) 244 + { 245 + int ret; 246 + 247 + memset(sgt->sgl, 0, max_sg * sizeof(struct scatterlist)); 248 + 249 + ret = extract_iter_to_sg(iter, iov_iter_count(iter), sgt, max_sg, 0); 250 + WARN_ON(ret < 0); 251 + if (sgt->nents > 0) 252 + sg_mark_end(&sgt->sgl[sgt->nents - 1]); 253 + 254 + return ret; 255 + } 256 + 257 + /* 258 + * Register memory for RDMA read/write 259 + * iter: the buffer to register memory with 260 + * writing: true if this is a RDMA write (SMB read), false for RDMA read 261 + * need_invalidate: true if this MR needs to be locally invalidated after I/O 262 + * return value: the MR registered, NULL if failed. 263 + */ 264 + struct smbdirect_mr_io * 265 + smbdirect_connection_register_mr_io(struct smbdirect_socket *sc, 266 + struct iov_iter *iter, 267 + bool writing, 268 + bool need_invalidate) 269 + { 270 + const struct smbdirect_socket_parameters *sp = &sc->parameters; 271 + struct smbdirect_mr_io *mr; 272 + int ret, num_pages; 273 + struct ib_reg_wr *reg_wr; 274 + 275 + num_pages = iov_iter_npages(iter, sp->max_frmr_depth + 1); 276 + if (num_pages > sp->max_frmr_depth) { 277 + smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 278 + "num_pages=%d max_frmr_depth=%d\n", 279 + num_pages, sp->max_frmr_depth); 280 + WARN_ON_ONCE(1); 281 + return NULL; 282 + } 283 + 284 + mr = smbdirect_connection_get_mr_io(sc); 285 + if (!mr) { 286 + smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 287 + "smbdirect_connection_get_mr_io returning NULL\n"); 288 + return NULL; 289 + } 290 + 291 + mutex_lock(&mr->mutex); 292 + 293 + mr->dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; 294 + mr->need_invalidate = need_invalidate; 295 + mr->sgt.nents = 0; 296 + mr->sgt.orig_nents = 0; 297 + 298 + smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_INFO, 299 + "num_pages=%u count=%zu depth=%u\n", 300 + num_pages, iov_iter_count(iter), sp->max_frmr_depth); 301 + smbdirect_iter_to_sgt(iter, &mr->sgt, sp->max_frmr_depth); 302 + 303 + ret = ib_dma_map_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); 304 + if (!ret) { 305 + smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 306 + "ib_dma_map_sg num_pages=%u dir=%x ret=%d (%1pe)\n", 307 + num_pages, mr->dir, ret, SMBDIRECT_DEBUG_ERR_PTR(ret)); 308 + goto dma_map_error; 309 + } 310 + 311 + ret = ib_map_mr_sg(mr->mr, mr->sgt.sgl, mr->sgt.nents, NULL, PAGE_SIZE); 312 + if (ret != mr->sgt.nents) { 313 + smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 314 + "ib_map_mr_sg failed ret = %d nents = %u\n", 315 + ret, mr->sgt.nents); 316 + goto map_mr_error; 317 + } 318 + 319 + ib_update_fast_reg_key(mr->mr, ib_inc_rkey(mr->mr->rkey)); 320 + reg_wr = &mr->wr; 321 + reg_wr->wr.opcode = IB_WR_REG_MR; 322 + mr->cqe.done = smbdirect_connection_mr_io_register_done; 323 + reg_wr->wr.wr_cqe = &mr->cqe; 324 + reg_wr->wr.num_sge = 0; 325 + reg_wr->wr.send_flags = IB_SEND_SIGNALED; 326 + reg_wr->mr = mr->mr; 327 + reg_wr->key = mr->mr->rkey; 328 + reg_wr->access = writing ? 329 + IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : 330 + IB_ACCESS_REMOTE_READ; 331 + 332 + /* 333 + * There is no need for waiting for complemtion on ib_post_send 334 + * on IB_WR_REG_MR. Hardware enforces a barrier and order of execution 335 + * on the next ib_post_send when we actually send I/O to remote peer 336 + */ 337 + ret = ib_post_send(sc->ib.qp, &reg_wr->wr, NULL); 338 + if (!ret) { 339 + /* 340 + * smbdirect_connection_get_mr_io() gave us a reference 341 + * via kref_get(&mr->kref), we keep that and let 342 + * the caller use smbdirect_connection_deregister_mr_io() 343 + * to remove it again. 344 + */ 345 + mutex_unlock(&mr->mutex); 346 + return mr; 347 + } 348 + 349 + smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 350 + "ib_post_send failed ret=%d (%1pe) reg_wr->key=0x%x\n", 351 + ret, SMBDIRECT_DEBUG_ERR_PTR(ret), reg_wr->key); 352 + 353 + map_mr_error: 354 + ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); 355 + 356 + dma_map_error: 357 + mr->sgt.nents = 0; 358 + mr->state = SMBDIRECT_MR_ERROR; 359 + atomic_dec(&sc->mr_io.used.count); 360 + 361 + smbdirect_socket_schedule_cleanup(sc, ret); 362 + 363 + /* 364 + * smbdirect_connection_get_mr_io() gave us a reference 365 + * via kref_get(&mr->kref), we need to remove it again 366 + * on error. 367 + * 368 + * No kref_put_mutex() as it's already locked. 369 + * 370 + * If smbdirect_mr_io_free_locked() is called 371 + * and the mutex is unlocked and mr is gone, 372 + * in that case kref_put() returned 1. 373 + * 374 + * If kref_put() returned 0 we know that 375 + * smbdirect_mr_io_free_locked() didn't 376 + * run. Not by us nor by anyone else, as we 377 + * still hold the mutex, so we need to unlock. 378 + */ 379 + if (!kref_put(&mr->kref, smbdirect_mr_io_free_locked)) 380 + mutex_unlock(&mr->mutex); 381 + return NULL; 382 + } 383 + __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_register_mr_io); 384 + 385 + void smbdirect_mr_io_fill_buffer_descriptor(struct smbdirect_mr_io *mr, 386 + struct smbdirect_buffer_descriptor_v1 *v1) 387 + { 388 + mutex_lock(&mr->mutex); 389 + if (mr->state == SMBDIRECT_MR_REGISTERED) { 390 + v1->offset = cpu_to_le64(mr->mr->iova); 391 + v1->token = cpu_to_le32(mr->mr->rkey); 392 + v1->length = cpu_to_le32(mr->mr->length); 393 + } else { 394 + v1->offset = cpu_to_le64(U64_MAX); 395 + v1->token = cpu_to_le32(U32_MAX); 396 + v1->length = cpu_to_le32(U32_MAX); 397 + } 398 + mutex_unlock(&mr->mutex); 399 + } 400 + __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_mr_io_fill_buffer_descriptor); 401 + 402 + /* 403 + * Deregister a MR after I/O is done 404 + * This function may wait if remote invalidation is not used 405 + * and we have to locally invalidate the buffer to prevent data is being 406 + * modified by remote peer after upper layer consumes it 407 + */ 408 + void smbdirect_connection_deregister_mr_io(struct smbdirect_mr_io *mr) 409 + { 410 + struct smbdirect_socket *sc = mr->socket; 411 + int ret = 0; 412 + 413 + lock_again: 414 + mutex_lock(&mr->mutex); 415 + if (mr->state == SMBDIRECT_MR_DISABLED) 416 + goto put_kref; 417 + 418 + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { 419 + smbdirect_mr_io_disable_locked(mr); 420 + goto put_kref; 421 + } 422 + 423 + if (mr->need_invalidate) { 424 + struct ib_send_wr *wr = &mr->inv_wr; 425 + 426 + /* Need to finish local invalidation before returning */ 427 + wr->opcode = IB_WR_LOCAL_INV; 428 + mr->cqe.done = smbdirect_connection_mr_io_local_inv_done; 429 + wr->wr_cqe = &mr->cqe; 430 + wr->num_sge = 0; 431 + wr->ex.invalidate_rkey = mr->mr->rkey; 432 + wr->send_flags = IB_SEND_SIGNALED; 433 + 434 + init_completion(&mr->invalidate_done); 435 + ret = ib_post_send(sc->ib.qp, wr, NULL); 436 + if (ret) { 437 + smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, 438 + "ib_post_send failed ret=%d (%1pe)\n", 439 + ret, SMBDIRECT_DEBUG_ERR_PTR(ret)); 440 + smbdirect_mr_io_disable_locked(mr); 441 + smbdirect_socket_schedule_cleanup(sc, ret); 442 + goto done; 443 + } 444 + 445 + /* 446 + * We still hold the reference to mr 447 + * so we can unlock while waiting. 448 + */ 449 + mutex_unlock(&mr->mutex); 450 + wait_for_completion(&mr->invalidate_done); 451 + mr->need_invalidate = false; 452 + goto lock_again; 453 + } else 454 + /* 455 + * For remote invalidation, just set it to SMBDIRECT_MR_INVALIDATED 456 + * and defer to mr_recovery_work to recover the MR for next use 457 + */ 458 + mr->state = SMBDIRECT_MR_INVALIDATED; 459 + 460 + if (mr->sgt.nents) { 461 + ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); 462 + mr->sgt.nents = 0; 463 + } 464 + 465 + WARN_ONCE(mr->state != SMBDIRECT_MR_INVALIDATED, 466 + "mr->state[%u] != SMBDIRECT_MR_INVALIDATED[%u]\n", 467 + mr->state, SMBDIRECT_MR_INVALIDATED); 468 + mr->state = SMBDIRECT_MR_READY; 469 + if (atomic_inc_return(&sc->mr_io.ready.count) == 1) 470 + wake_up(&sc->mr_io.ready.wait_queue); 471 + 472 + done: 473 + atomic_dec(&sc->mr_io.used.count); 474 + 475 + put_kref: 476 + /* 477 + * No kref_put_mutex() as it's already locked. 478 + * 479 + * If smbdirect_mr_io_free_locked() is called 480 + * and the mutex is unlocked and mr is gone, 481 + * in that case kref_put() returned 1. 482 + * 483 + * If kref_put() returned 0 we know that 484 + * smbdirect_mr_io_free_locked() didn't 485 + * run. Not by us nor by anyone else, as we 486 + * still hold the mutex, so we need to unlock 487 + * and keep the mr in SMBDIRECT_MR_READY or 488 + * SMBDIRECT_MR_ERROR state. 489 + */ 490 + if (!kref_put(&mr->kref, smbdirect_mr_io_free_locked)) 491 + mutex_unlock(&mr->mutex); 492 + } 493 + __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_deregister_mr_io);
+4
fs/smb/common/smbdirect/smbdirect_pdu.h
··· 8 8 9 9 #define SMBDIRECT_V1 0x0100 10 10 11 + /* SMBD minimum receive size and fragmented sized defined in [MS-SMBD] */ 12 + #define SMBDIRECT_MIN_RECEIVE_SIZE 128 13 + #define SMBDIRECT_MIN_FRAGMENTED_SIZE 131072 14 + 11 15 /* SMBD negotiation request packet [MS-SMBD] 2.2.1 */ 12 16 struct smbdirect_negotiate_req { 13 17 __le16 min_version;
+148
fs/smb/common/smbdirect/smbdirect_public.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 + /* 3 + * Copyright (C) 2025, Stefan Metzmacher 4 + */ 5 + 6 + #ifndef __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_PUBLIC_H__ 7 + #define __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_PUBLIC_H__ 8 + 9 + struct smbdirect_buffer_descriptor_v1; 10 + struct smbdirect_socket_parameters; 11 + 12 + struct smbdirect_socket; 13 + struct smbdirect_send_batch; 14 + struct smbdirect_mr_io; 15 + 16 + #define __SMBDIRECT_EXPORT_SYMBOL__(__sym) EXPORT_SYMBOL_FOR_MODULES(__sym, "cifs,ksmbd") 17 + 18 + #include <rdma/rw.h> 19 + 20 + u8 smbdirect_netdev_rdma_capable_node_type(struct net_device *netdev); 21 + 22 + bool smbdirect_frwr_is_supported(const struct ib_device_attr *attrs); 23 + 24 + int smbdirect_socket_create_kern(struct net *net, struct smbdirect_socket **_sc); 25 + 26 + int smbdirect_socket_create_accepting(struct rdma_cm_id *id, struct smbdirect_socket **_sc); 27 + 28 + int smbdirect_socket_set_initial_parameters(struct smbdirect_socket *sc, 29 + const struct smbdirect_socket_parameters *sp); 30 + 31 + const struct smbdirect_socket_parameters * 32 + smbdirect_socket_get_current_parameters(struct smbdirect_socket *sc); 33 + 34 + int smbdirect_socket_set_kernel_settings(struct smbdirect_socket *sc, 35 + enum ib_poll_context poll_ctx, 36 + gfp_t gfp_mask); 37 + 38 + #define SMBDIRECT_LOG_ERR 0x0 39 + #define SMBDIRECT_LOG_INFO 0x1 40 + 41 + #define SMBDIRECT_LOG_OUTGOING 0x1 42 + #define SMBDIRECT_LOG_INCOMING 0x2 43 + #define SMBDIRECT_LOG_READ 0x4 44 + #define SMBDIRECT_LOG_WRITE 0x8 45 + #define SMBDIRECT_LOG_RDMA_SEND 0x10 46 + #define SMBDIRECT_LOG_RDMA_RECV 0x20 47 + #define SMBDIRECT_LOG_KEEP_ALIVE 0x40 48 + #define SMBDIRECT_LOG_RDMA_EVENT 0x80 49 + #define SMBDIRECT_LOG_RDMA_MR 0x100 50 + #define SMBDIRECT_LOG_RDMA_RW 0x200 51 + #define SMBDIRECT_LOG_NEGOTIATE 0x400 52 + void smbdirect_socket_set_logging(struct smbdirect_socket *sc, 53 + void *private_ptr, 54 + bool (*needed)(struct smbdirect_socket *sc, 55 + void *private_ptr, 56 + unsigned int lvl, 57 + unsigned int cls), 58 + void (*vaprintf)(struct smbdirect_socket *sc, 59 + const char *func, 60 + unsigned int line, 61 + void *private_ptr, 62 + unsigned int lvl, 63 + unsigned int cls, 64 + struct va_format *vaf)); 65 + 66 + bool smbdirect_connection_is_connected(struct smbdirect_socket *sc); 67 + 68 + int smbdirect_connection_wait_for_connected(struct smbdirect_socket *sc); 69 + 70 + int smbdirect_socket_bind(struct smbdirect_socket *sc, struct sockaddr *addr); 71 + 72 + void smbdirect_socket_shutdown(struct smbdirect_socket *sc); 73 + 74 + void smbdirect_socket_release(struct smbdirect_socket *sc); 75 + 76 + int smbdirect_connection_send_batch_flush(struct smbdirect_socket *sc, 77 + struct smbdirect_send_batch *batch, 78 + bool is_last); 79 + 80 + /* 81 + * This is only temporary and only needed 82 + * as long as the client still requires 83 + * to use smbdirect_connection_send_single_iter() 84 + */ 85 + struct smbdirect_send_batch_storage { 86 + union { 87 + struct list_head __msg_list; 88 + __aligned_u64 __space[5]; 89 + }; 90 + }; 91 + 92 + struct smbdirect_send_batch * 93 + smbdirect_init_send_batch_storage(struct smbdirect_send_batch_storage *storage, 94 + bool need_invalidate_rkey, 95 + unsigned int remote_key); 96 + 97 + int smbdirect_connection_send_single_iter(struct smbdirect_socket *sc, 98 + struct smbdirect_send_batch *batch, 99 + struct iov_iter *iter, 100 + unsigned int flags, 101 + u32 remaining_data_length); 102 + 103 + int smbdirect_connection_send_wait_zero_pending(struct smbdirect_socket *sc); 104 + 105 + int smbdirect_connection_send_iter(struct smbdirect_socket *sc, 106 + struct iov_iter *iter, 107 + unsigned int flags, 108 + bool need_invalidate, 109 + unsigned int remote_key); 110 + 111 + int smbdirect_connection_recvmsg(struct smbdirect_socket *sc, 112 + struct msghdr *msg, 113 + unsigned int flags); 114 + 115 + int smbdirect_connect(struct smbdirect_socket *sc, 116 + const struct sockaddr *dst); 117 + 118 + int smbdirect_connect_sync(struct smbdirect_socket *sc, 119 + const struct sockaddr *dst); 120 + 121 + int smbdirect_socket_listen(struct smbdirect_socket *sc, int backlog); 122 + 123 + struct smbdirect_socket *smbdirect_socket_accept(struct smbdirect_socket *lsc, 124 + long timeo, 125 + struct proto_accept_arg *arg); 126 + 127 + int smbdirect_connection_rdma_xmit(struct smbdirect_socket *sc, 128 + void *buf, size_t buf_len, 129 + struct smbdirect_buffer_descriptor_v1 *desc, 130 + size_t desc_len, 131 + bool is_read); 132 + 133 + struct smbdirect_mr_io * 134 + smbdirect_connection_register_mr_io(struct smbdirect_socket *sc, 135 + struct iov_iter *iter, 136 + bool writing, 137 + bool need_invalidate); 138 + 139 + void smbdirect_mr_io_fill_buffer_descriptor(struct smbdirect_mr_io *mr, 140 + struct smbdirect_buffer_descriptor_v1 *v1); 141 + 142 + void smbdirect_connection_deregister_mr_io(struct smbdirect_mr_io *mr); 143 + 144 + void smbdirect_connection_legacy_debug_proc_show(struct smbdirect_socket *sc, 145 + unsigned int rdma_readwrite_threshold, 146 + struct seq_file *m); 147 + 148 + #endif /* __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_PUBLIC_H__ */
+255
fs/smb/common/smbdirect/smbdirect_rw.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* 3 + * Copyright (C) 2017, Microsoft Corporation. 4 + * Copyright (C) 2018, LG Electronics. 5 + * Copyright (c) 2025, Stefan Metzmacher 6 + */ 7 + 8 + #include "smbdirect_internal.h" 9 + 10 + static int smbdirect_connection_wait_for_rw_credits(struct smbdirect_socket *sc, 11 + int credits) 12 + { 13 + return smbdirect_socket_wait_for_credits(sc, 14 + SMBDIRECT_SOCKET_CONNECTED, 15 + -ENOTCONN, 16 + &sc->rw_io.credits.wait_queue, 17 + &sc->rw_io.credits.count, 18 + credits); 19 + } 20 + 21 + static int smbdirect_connection_calc_rw_credits(struct smbdirect_socket *sc, 22 + const void *buf, 23 + size_t len) 24 + { 25 + return DIV_ROUND_UP(smbdirect_get_buf_page_count(buf, len), 26 + sc->rw_io.credits.num_pages); 27 + } 28 + 29 + static int smbdirect_connection_rdma_get_sg_list(void *buf, 30 + size_t size, 31 + struct scatterlist *sg_list, 32 + size_t nentries) 33 + { 34 + bool high = is_vmalloc_addr(buf); 35 + struct page *page; 36 + size_t offset, len; 37 + int i = 0; 38 + 39 + if (size == 0 || nentries < smbdirect_get_buf_page_count(buf, size)) 40 + return -EINVAL; 41 + 42 + offset = offset_in_page(buf); 43 + buf -= offset; 44 + while (size > 0) { 45 + len = min_t(size_t, PAGE_SIZE - offset, size); 46 + if (high) 47 + page = vmalloc_to_page(buf); 48 + else 49 + page = kmap_to_page(buf); 50 + 51 + if (!sg_list) 52 + return -EINVAL; 53 + sg_set_page(sg_list, page, len, offset); 54 + sg_list = sg_next(sg_list); 55 + 56 + buf += PAGE_SIZE; 57 + size -= len; 58 + offset = 0; 59 + i++; 60 + } 61 + 62 + return i; 63 + } 64 + 65 + static void smbdirect_connection_rw_io_free(struct smbdirect_rw_io *msg, 66 + enum dma_data_direction dir) 67 + { 68 + struct smbdirect_socket *sc = msg->socket; 69 + 70 + rdma_rw_ctx_destroy(&msg->rdma_ctx, 71 + sc->ib.qp, 72 + sc->ib.qp->port, 73 + msg->sgt.sgl, 74 + msg->sgt.nents, 75 + dir); 76 + sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); 77 + kfree(msg); 78 + } 79 + 80 + static void smbdirect_connection_rdma_rw_done(struct ib_cq *cq, struct ib_wc *wc, 81 + enum dma_data_direction dir) 82 + { 83 + struct smbdirect_rw_io *msg = 84 + container_of(wc->wr_cqe, struct smbdirect_rw_io, cqe); 85 + struct smbdirect_socket *sc = msg->socket; 86 + 87 + if (wc->status != IB_WC_SUCCESS) { 88 + msg->error = -EIO; 89 + pr_err("read/write error. opcode = %d, status = %s(%d)\n", 90 + wc->opcode, ib_wc_status_msg(wc->status), wc->status); 91 + if (wc->status != IB_WC_WR_FLUSH_ERR) 92 + smbdirect_socket_schedule_cleanup(sc, msg->error); 93 + } 94 + 95 + complete(msg->completion); 96 + } 97 + 98 + static void smbdirect_connection_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc) 99 + { 100 + smbdirect_connection_rdma_rw_done(cq, wc, DMA_FROM_DEVICE); 101 + } 102 + 103 + static void smbdirect_connection_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc) 104 + { 105 + smbdirect_connection_rdma_rw_done(cq, wc, DMA_TO_DEVICE); 106 + } 107 + 108 + int smbdirect_connection_rdma_xmit(struct smbdirect_socket *sc, 109 + void *buf, size_t buf_len, 110 + struct smbdirect_buffer_descriptor_v1 *desc, 111 + size_t desc_len, 112 + bool is_read) 113 + { 114 + const struct smbdirect_socket_parameters *sp = &sc->parameters; 115 + enum dma_data_direction direction = is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE; 116 + struct smbdirect_rw_io *msg, *next_msg; 117 + size_t i; 118 + int ret; 119 + DECLARE_COMPLETION_ONSTACK(completion); 120 + struct ib_send_wr *first_wr; 121 + LIST_HEAD(msg_list); 122 + u8 *desc_buf; 123 + int credits_needed; 124 + size_t desc_buf_len, desc_num = 0; 125 + 126 + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) 127 + return -ENOTCONN; 128 + 129 + if (buf_len > sp->max_read_write_size) 130 + return -EINVAL; 131 + 132 + /* calculate needed credits */ 133 + credits_needed = 0; 134 + desc_buf = buf; 135 + for (i = 0; i < desc_len / sizeof(*desc); i++) { 136 + if (!buf_len) 137 + break; 138 + 139 + desc_buf_len = le32_to_cpu(desc[i].length); 140 + if (!desc_buf_len) 141 + return -EINVAL; 142 + 143 + if (desc_buf_len > buf_len) { 144 + desc_buf_len = buf_len; 145 + desc[i].length = cpu_to_le32(desc_buf_len); 146 + buf_len = 0; 147 + } 148 + 149 + credits_needed += smbdirect_connection_calc_rw_credits(sc, 150 + desc_buf, 151 + desc_buf_len); 152 + desc_buf += desc_buf_len; 153 + buf_len -= desc_buf_len; 154 + desc_num++; 155 + } 156 + 157 + smbdirect_log_rdma_rw(sc, SMBDIRECT_LOG_INFO, 158 + "RDMA %s, len %zu, needed credits %d\n", 159 + str_read_write(is_read), buf_len, credits_needed); 160 + 161 + ret = smbdirect_connection_wait_for_rw_credits(sc, credits_needed); 162 + if (ret < 0) 163 + return ret; 164 + 165 + /* build rdma_rw_ctx for each descriptor */ 166 + desc_buf = buf; 167 + for (i = 0; i < desc_num; i++) { 168 + size_t page_count; 169 + 170 + msg = kzalloc_flex(*msg, sg_list, SG_CHUNK_SIZE, 171 + sc->rw_io.mem.gfp_mask); 172 + if (!msg) { 173 + ret = -ENOMEM; 174 + goto out; 175 + } 176 + 177 + desc_buf_len = le32_to_cpu(desc[i].length); 178 + page_count = smbdirect_get_buf_page_count(desc_buf, desc_buf_len); 179 + 180 + msg->socket = sc; 181 + msg->cqe.done = is_read ? 182 + smbdirect_connection_rdma_read_done : 183 + smbdirect_connection_rdma_write_done; 184 + msg->completion = &completion; 185 + 186 + msg->sgt.sgl = &msg->sg_list[0]; 187 + ret = sg_alloc_table_chained(&msg->sgt, 188 + page_count, 189 + msg->sg_list, 190 + SG_CHUNK_SIZE); 191 + if (ret) { 192 + ret = -ENOMEM; 193 + goto free_msg; 194 + } 195 + 196 + ret = smbdirect_connection_rdma_get_sg_list(desc_buf, 197 + desc_buf_len, 198 + msg->sgt.sgl, 199 + msg->sgt.orig_nents); 200 + if (ret < 0) 201 + goto free_table; 202 + 203 + ret = rdma_rw_ctx_init(&msg->rdma_ctx, 204 + sc->ib.qp, 205 + sc->ib.qp->port, 206 + msg->sgt.sgl, 207 + page_count, 208 + 0, 209 + le64_to_cpu(desc[i].offset), 210 + le32_to_cpu(desc[i].token), 211 + direction); 212 + if (ret < 0) { 213 + pr_err("failed to init rdma_rw_ctx: %d\n", ret); 214 + goto free_table; 215 + } 216 + 217 + list_add_tail(&msg->list, &msg_list); 218 + desc_buf += desc_buf_len; 219 + } 220 + 221 + /* concatenate work requests of rdma_rw_ctxs */ 222 + first_wr = NULL; 223 + list_for_each_entry_reverse(msg, &msg_list, list) { 224 + first_wr = rdma_rw_ctx_wrs(&msg->rdma_ctx, 225 + sc->ib.qp, 226 + sc->ib.qp->port, 227 + &msg->cqe, 228 + first_wr); 229 + } 230 + 231 + ret = ib_post_send(sc->ib.qp, first_wr, NULL); 232 + if (ret) { 233 + pr_err("failed to post send wr for RDMA R/W: %d\n", ret); 234 + goto out; 235 + } 236 + 237 + msg = list_last_entry(&msg_list, struct smbdirect_rw_io, list); 238 + wait_for_completion(&completion); 239 + ret = msg->error; 240 + out: 241 + list_for_each_entry_safe(msg, next_msg, &msg_list, list) { 242 + list_del(&msg->list); 243 + smbdirect_connection_rw_io_free(msg, direction); 244 + } 245 + atomic_add(credits_needed, &sc->rw_io.credits.count); 246 + wake_up(&sc->rw_io.credits.wait_queue); 247 + return ret; 248 + 249 + free_table: 250 + sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); 251 + free_msg: 252 + kfree(msg); 253 + goto out; 254 + } 255 + __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_rdma_xmit);
+743
fs/smb/common/smbdirect/smbdirect_socket.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* 3 + * Copyright (C) 2017, Microsoft Corporation. 4 + * Copyright (c) 2025, Stefan Metzmacher 5 + */ 6 + 7 + #include "smbdirect_internal.h" 8 + 9 + bool smbdirect_frwr_is_supported(const struct ib_device_attr *attrs) 10 + { 11 + /* 12 + * Test if FRWR (Fast Registration Work Requests) is supported on the 13 + * device This implementation requires FRWR on RDMA read/write return 14 + * value: true if it is supported 15 + */ 16 + 17 + if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) 18 + return false; 19 + if (attrs->max_fast_reg_page_list_len == 0) 20 + return false; 21 + return true; 22 + } 23 + __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_frwr_is_supported); 24 + 25 + static void smbdirect_socket_cleanup_work(struct work_struct *work); 26 + 27 + static int smbdirect_socket_rdma_event_handler(struct rdma_cm_id *id, 28 + struct rdma_cm_event *event) 29 + { 30 + struct smbdirect_socket *sc = id->context; 31 + int ret = -ESTALE; 32 + 33 + /* 34 + * This should be replaced before any real work 35 + * starts! So it should never be called! 36 + */ 37 + 38 + if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) 39 + ret = -ENETDOWN; 40 + if (IS_ERR(SMBDIRECT_DEBUG_ERR_PTR(event->status))) 41 + ret = event->status; 42 + pr_err("%s (first_error=%1pe, expected=%s) => event=%s status=%d => ret=%1pe\n", 43 + smbdirect_socket_status_string(sc->status), 44 + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error), 45 + rdma_event_msg(sc->rdma.expected_event), 46 + rdma_event_msg(event->event), 47 + event->status, 48 + SMBDIRECT_DEBUG_ERR_PTR(ret)); 49 + WARN_ONCE(1, "%s should not be called!\n", __func__); 50 + sc->rdma.cm_id = NULL; 51 + return -ESTALE; 52 + } 53 + 54 + int smbdirect_socket_init_new(struct net *net, struct smbdirect_socket *sc) 55 + { 56 + struct rdma_cm_id *id; 57 + int ret; 58 + 59 + smbdirect_socket_init(sc); 60 + 61 + id = rdma_create_id(net, 62 + smbdirect_socket_rdma_event_handler, 63 + sc, 64 + RDMA_PS_TCP, 65 + IB_QPT_RC); 66 + if (IS_ERR(id)) { 67 + pr_err("%s: rdma_create_id() failed %1pe\n", __func__, id); 68 + return PTR_ERR(id); 69 + } 70 + 71 + ret = rdma_set_afonly(id, 1); 72 + if (ret) { 73 + rdma_destroy_id(id); 74 + pr_err("%s: rdma_set_afonly() failed %1pe\n", 75 + __func__, SMBDIRECT_DEBUG_ERR_PTR(ret)); 76 + return ret; 77 + } 78 + 79 + sc->rdma.cm_id = id; 80 + 81 + INIT_WORK(&sc->disconnect_work, smbdirect_socket_cleanup_work); 82 + 83 + return 0; 84 + } 85 + 86 + int smbdirect_socket_create_kern(struct net *net, struct smbdirect_socket **_sc) 87 + { 88 + struct smbdirect_socket *sc; 89 + int ret; 90 + 91 + ret = -ENOMEM; 92 + sc = kzalloc_obj(*sc); 93 + if (!sc) 94 + goto alloc_failed; 95 + 96 + ret = smbdirect_socket_init_new(net, sc); 97 + if (ret) 98 + goto init_failed; 99 + 100 + kref_init(&sc->refs.destroy); 101 + 102 + *_sc = sc; 103 + return 0; 104 + 105 + init_failed: 106 + kfree(sc); 107 + alloc_failed: 108 + return ret; 109 + } 110 + __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_create_kern); 111 + 112 + int smbdirect_socket_init_accepting(struct rdma_cm_id *id, struct smbdirect_socket *sc) 113 + { 114 + smbdirect_socket_init(sc); 115 + 116 + sc->rdma.cm_id = id; 117 + sc->rdma.cm_id->context = sc; 118 + sc->rdma.cm_id->event_handler = smbdirect_socket_rdma_event_handler; 119 + 120 + sc->ib.dev = sc->rdma.cm_id->device; 121 + 122 + INIT_WORK(&sc->disconnect_work, smbdirect_socket_cleanup_work); 123 + 124 + return 0; 125 + } 126 + 127 + int smbdirect_socket_create_accepting(struct rdma_cm_id *id, struct smbdirect_socket **_sc) 128 + { 129 + struct smbdirect_socket *sc; 130 + int ret; 131 + 132 + ret = -ENOMEM; 133 + sc = kzalloc_obj(*sc); 134 + if (!sc) 135 + goto alloc_failed; 136 + 137 + ret = smbdirect_socket_init_accepting(id, sc); 138 + if (ret) 139 + goto init_failed; 140 + 141 + kref_init(&sc->refs.destroy); 142 + 143 + *_sc = sc; 144 + return 0; 145 + 146 + init_failed: 147 + kfree(sc); 148 + alloc_failed: 149 + return ret; 150 + } 151 + __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_create_accepting); 152 + 153 + int smbdirect_socket_set_initial_parameters(struct smbdirect_socket *sc, 154 + const struct smbdirect_socket_parameters *sp) 155 + { 156 + /* 157 + * This is only allowed before connect or accept 158 + */ 159 + WARN_ONCE(sc->status != SMBDIRECT_SOCKET_CREATED, 160 + "status=%s first_error=%1pe", 161 + smbdirect_socket_status_string(sc->status), 162 + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error)); 163 + if (sc->status != SMBDIRECT_SOCKET_CREATED) 164 + return -EINVAL; 165 + 166 + if (sp->flags & ~SMBDIRECT_FLAG_PORT_RANGE_MASK) 167 + return -EINVAL; 168 + 169 + if (sp->initiator_depth > U8_MAX) 170 + return -EINVAL; 171 + if (sp->responder_resources > U8_MAX) 172 + return -EINVAL; 173 + 174 + if (sp->flags & SMBDIRECT_FLAG_PORT_RANGE_ONLY_IB && 175 + sp->flags & SMBDIRECT_FLAG_PORT_RANGE_ONLY_IW) 176 + return -EINVAL; 177 + else if (sp->flags & SMBDIRECT_FLAG_PORT_RANGE_ONLY_IB) 178 + rdma_restrict_node_type(sc->rdma.cm_id, RDMA_NODE_IB_CA); 179 + else if (sp->flags & SMBDIRECT_FLAG_PORT_RANGE_ONLY_IW) 180 + rdma_restrict_node_type(sc->rdma.cm_id, RDMA_NODE_RNIC); 181 + 182 + /* 183 + * Make a copy of the callers parameters 184 + * from here we only work on the copy 185 + * 186 + * TODO: do we want consistency checking? 187 + */ 188 + sc->parameters = *sp; 189 + 190 + return 0; 191 + } 192 + __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_set_initial_parameters); 193 + 194 + const struct smbdirect_socket_parameters * 195 + smbdirect_socket_get_current_parameters(struct smbdirect_socket *sc) 196 + { 197 + return &sc->parameters; 198 + } 199 + __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_get_current_parameters); 200 + 201 + int smbdirect_socket_set_kernel_settings(struct smbdirect_socket *sc, 202 + enum ib_poll_context poll_ctx, 203 + gfp_t gfp_mask) 204 + { 205 + /* 206 + * This is only allowed before connect or accept 207 + */ 208 + WARN_ONCE(sc->status != SMBDIRECT_SOCKET_CREATED, 209 + "status=%s first_error=%1pe", 210 + smbdirect_socket_status_string(sc->status), 211 + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error)); 212 + if (sc->status != SMBDIRECT_SOCKET_CREATED) 213 + return -EINVAL; 214 + 215 + sc->ib.poll_ctx = poll_ctx; 216 + 217 + sc->send_io.mem.gfp_mask = gfp_mask; 218 + sc->recv_io.mem.gfp_mask = gfp_mask; 219 + sc->rw_io.mem.gfp_mask = gfp_mask; 220 + 221 + return 0; 222 + } 223 + __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_set_kernel_settings); 224 + 225 + void smbdirect_socket_set_logging(struct smbdirect_socket *sc, 226 + void *private_ptr, 227 + bool (*needed)(struct smbdirect_socket *sc, 228 + void *private_ptr, 229 + unsigned int lvl, 230 + unsigned int cls), 231 + void (*vaprintf)(struct smbdirect_socket *sc, 232 + const char *func, 233 + unsigned int line, 234 + void *private_ptr, 235 + unsigned int lvl, 236 + unsigned int cls, 237 + struct va_format *vaf)) 238 + { 239 + sc->logging.private_ptr = private_ptr; 240 + sc->logging.needed = needed; 241 + sc->logging.vaprintf = vaprintf; 242 + } 243 + __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_set_logging); 244 + 245 + static void smbdirect_socket_wake_up_all(struct smbdirect_socket *sc) 246 + { 247 + /* 248 + * Wake up all waiters in all wait queues 249 + * in order to notice the broken connection. 250 + */ 251 + wake_up_all(&sc->status_wait); 252 + wake_up_all(&sc->listen.wait_queue); 253 + wake_up_all(&sc->send_io.bcredits.wait_queue); 254 + wake_up_all(&sc->send_io.lcredits.wait_queue); 255 + wake_up_all(&sc->send_io.credits.wait_queue); 256 + wake_up_all(&sc->send_io.pending.zero_wait_queue); 257 + wake_up_all(&sc->recv_io.reassembly.wait_queue); 258 + wake_up_all(&sc->rw_io.credits.wait_queue); 259 + wake_up_all(&sc->mr_io.ready.wait_queue); 260 + } 261 + 262 + void __smbdirect_socket_schedule_cleanup(struct smbdirect_socket *sc, 263 + const char *macro_name, 264 + unsigned int lvl, 265 + const char *func, 266 + unsigned int line, 267 + int error, 268 + enum smbdirect_socket_status *force_status) 269 + { 270 + struct smbdirect_socket *psc, *tsc; 271 + unsigned long flags; 272 + bool was_first = false; 273 + 274 + if (!sc->first_error) { 275 + ___smbdirect_log_generic(sc, func, line, 276 + lvl, 277 + SMBDIRECT_LOG_RDMA_EVENT, 278 + "%s(%1pe%s%s) called from %s in line=%u status=%s\n", 279 + macro_name, 280 + SMBDIRECT_DEBUG_ERR_PTR(error), 281 + force_status ? ", " : "", 282 + force_status ? smbdirect_socket_status_string(*force_status) : "", 283 + func, line, 284 + smbdirect_socket_status_string(sc->status)); 285 + if (error) 286 + sc->first_error = error; 287 + else 288 + sc->first_error = -ECONNABORTED; 289 + was_first = true; 290 + } 291 + 292 + /* 293 + * make sure other work (than disconnect_work) 294 + * is not queued again but here we don't block and avoid 295 + * disable[_delayed]_work_sync() 296 + */ 297 + disable_work(&sc->connect.work); 298 + disable_work(&sc->recv_io.posted.refill_work); 299 + disable_work(&sc->idle.immediate_work); 300 + sc->idle.keepalive = SMBDIRECT_KEEPALIVE_NONE; 301 + disable_delayed_work(&sc->idle.timer_work); 302 + 303 + /* 304 + * In case we were a listener we need to 305 + * disconnect all pending and ready sockets 306 + * 307 + * First we move ready sockets to pending again. 308 + */ 309 + spin_lock_irqsave(&sc->listen.lock, flags); 310 + list_splice_init(&sc->listen.ready, &sc->listen.pending); 311 + list_for_each_entry_safe(psc, tsc, &sc->listen.pending, accept.list) 312 + smbdirect_socket_schedule_cleanup(psc, sc->first_error); 313 + spin_unlock_irqrestore(&sc->listen.lock, flags); 314 + 315 + switch (sc->status) { 316 + case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED: 317 + case SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED: 318 + case SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED: 319 + case SMBDIRECT_SOCKET_NEGOTIATE_FAILED: 320 + case SMBDIRECT_SOCKET_ERROR: 321 + case SMBDIRECT_SOCKET_DISCONNECTING: 322 + case SMBDIRECT_SOCKET_DISCONNECTED: 323 + case SMBDIRECT_SOCKET_DESTROYED: 324 + /* 325 + * Keep the current error status 326 + */ 327 + break; 328 + 329 + case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED: 330 + case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING: 331 + sc->status = SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED; 332 + break; 333 + 334 + case SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED: 335 + case SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING: 336 + sc->status = SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED; 337 + break; 338 + 339 + case SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED: 340 + case SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING: 341 + sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED; 342 + break; 343 + 344 + case SMBDIRECT_SOCKET_NEGOTIATE_NEEDED: 345 + case SMBDIRECT_SOCKET_NEGOTIATE_RUNNING: 346 + sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED; 347 + break; 348 + 349 + case SMBDIRECT_SOCKET_CREATED: 350 + case SMBDIRECT_SOCKET_LISTENING: 351 + sc->status = SMBDIRECT_SOCKET_DISCONNECTED; 352 + break; 353 + 354 + case SMBDIRECT_SOCKET_CONNECTED: 355 + sc->status = SMBDIRECT_SOCKET_ERROR; 356 + break; 357 + } 358 + 359 + if (force_status && (was_first || *force_status > sc->status)) 360 + sc->status = *force_status; 361 + 362 + /* 363 + * Wake up all waiters in all wait queues 364 + * in order to notice the broken connection. 365 + */ 366 + smbdirect_socket_wake_up_all(sc); 367 + 368 + queue_work(sc->workqueues.cleanup, &sc->disconnect_work); 369 + } 370 + 371 + static void smbdirect_socket_cleanup_work(struct work_struct *work) 372 + { 373 + struct smbdirect_socket *sc = 374 + container_of(work, struct smbdirect_socket, disconnect_work); 375 + struct smbdirect_socket *psc, *tsc; 376 + unsigned long flags; 377 + 378 + /* 379 + * This should not never be called in an interrupt! 380 + */ 381 + WARN_ON_ONCE(in_interrupt()); 382 + 383 + if (!sc->first_error) { 384 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, 385 + "%s called with first_error==0\n", 386 + smbdirect_socket_status_string(sc->status)); 387 + 388 + sc->first_error = -ECONNABORTED; 389 + } 390 + 391 + /* 392 + * make sure this and other work is not queued again 393 + * but here we don't block and avoid 394 + * disable[_delayed]_work_sync() 395 + */ 396 + disable_work(&sc->disconnect_work); 397 + disable_work(&sc->connect.work); 398 + disable_work(&sc->recv_io.posted.refill_work); 399 + disable_work(&sc->idle.immediate_work); 400 + sc->idle.keepalive = SMBDIRECT_KEEPALIVE_NONE; 401 + disable_delayed_work(&sc->idle.timer_work); 402 + 403 + /* 404 + * In case we were a listener we need to 405 + * disconnect all pending and ready sockets 406 + * 407 + * First we move ready sockets to pending again. 408 + */ 409 + spin_lock_irqsave(&sc->listen.lock, flags); 410 + list_splice_init(&sc->listen.ready, &sc->listen.pending); 411 + list_for_each_entry_safe(psc, tsc, &sc->listen.pending, accept.list) 412 + smbdirect_socket_schedule_cleanup(psc, sc->first_error); 413 + spin_unlock_irqrestore(&sc->listen.lock, flags); 414 + 415 + switch (sc->status) { 416 + case SMBDIRECT_SOCKET_NEGOTIATE_NEEDED: 417 + case SMBDIRECT_SOCKET_NEGOTIATE_RUNNING: 418 + case SMBDIRECT_SOCKET_NEGOTIATE_FAILED: 419 + case SMBDIRECT_SOCKET_CONNECTED: 420 + case SMBDIRECT_SOCKET_ERROR: 421 + sc->status = SMBDIRECT_SOCKET_DISCONNECTING; 422 + /* 423 + * Make sure we hold the callback lock 424 + * im order to coordinate with the 425 + * rdma_event handlers, typically 426 + * smbdirect_connection_rdma_event_handler(), 427 + * and smbdirect_socket_destroy(). 428 + * 429 + * So that the order of ib_drain_qp() 430 + * and rdma_disconnect() is controlled 431 + * by the mutex. 432 + */ 433 + rdma_lock_handler(sc->rdma.cm_id); 434 + rdma_disconnect(sc->rdma.cm_id); 435 + rdma_unlock_handler(sc->rdma.cm_id); 436 + break; 437 + 438 + case SMBDIRECT_SOCKET_CREATED: 439 + case SMBDIRECT_SOCKET_LISTENING: 440 + case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED: 441 + case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING: 442 + case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED: 443 + case SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED: 444 + case SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING: 445 + case SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED: 446 + case SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED: 447 + case SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING: 448 + case SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED: 449 + /* 450 + * rdma_{accept,connect}() never reached 451 + * RDMA_CM_EVENT_ESTABLISHED 452 + */ 453 + sc->status = SMBDIRECT_SOCKET_DISCONNECTED; 454 + break; 455 + 456 + case SMBDIRECT_SOCKET_DISCONNECTING: 457 + case SMBDIRECT_SOCKET_DISCONNECTED: 458 + case SMBDIRECT_SOCKET_DESTROYED: 459 + break; 460 + } 461 + 462 + /* 463 + * Wake up all waiters in all wait queues 464 + * in order to notice the broken connection. 465 + */ 466 + smbdirect_socket_wake_up_all(sc); 467 + } 468 + 469 + static void smbdirect_socket_destroy(struct smbdirect_socket *sc) 470 + { 471 + struct smbdirect_socket *psc, *tsc; 472 + size_t psockets; 473 + struct smbdirect_recv_io *recv_io; 474 + struct smbdirect_recv_io *recv_tmp; 475 + LIST_HEAD(all_list); 476 + unsigned long flags; 477 + 478 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, 479 + "status=%s first_error=%1pe", 480 + smbdirect_socket_status_string(sc->status), 481 + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error)); 482 + 483 + /* 484 + * This should not never be called in an interrupt! 485 + */ 486 + WARN_ON_ONCE(in_interrupt()); 487 + 488 + if (sc->status == SMBDIRECT_SOCKET_DESTROYED) 489 + return; 490 + 491 + WARN_ONCE(sc->status != SMBDIRECT_SOCKET_DISCONNECTED, 492 + "status=%s first_error=%1pe", 493 + smbdirect_socket_status_string(sc->status), 494 + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error)); 495 + 496 + /* 497 + * The listener should clear this before we reach this 498 + */ 499 + WARN_ONCE(sc->accept.listener, 500 + "status=%s first_error=%1pe", 501 + smbdirect_socket_status_string(sc->status), 502 + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error)); 503 + 504 + /* 505 + * Wake up all waiters in all wait queues 506 + * in order to notice the broken connection. 507 + * 508 + * Most likely this was already called via 509 + * smbdirect_socket_cleanup_work(), but call it again... 510 + */ 511 + smbdirect_socket_wake_up_all(sc); 512 + 513 + disable_work_sync(&sc->disconnect_work); 514 + disable_work_sync(&sc->connect.work); 515 + disable_work_sync(&sc->recv_io.posted.refill_work); 516 + disable_work_sync(&sc->idle.immediate_work); 517 + disable_delayed_work_sync(&sc->idle.timer_work); 518 + 519 + if (sc->rdma.cm_id) 520 + rdma_lock_handler(sc->rdma.cm_id); 521 + 522 + if (sc->ib.qp) { 523 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, 524 + "drain qp\n"); 525 + ib_drain_qp(sc->ib.qp); 526 + } 527 + 528 + /* 529 + * In case we were a listener we need to 530 + * disconnect all pending and ready sockets 531 + * 532 + * We move ready sockets to pending again. 533 + */ 534 + spin_lock_irqsave(&sc->listen.lock, flags); 535 + list_splice_tail_init(&sc->listen.ready, &all_list); 536 + list_splice_tail_init(&sc->listen.pending, &all_list); 537 + spin_unlock_irqrestore(&sc->listen.lock, flags); 538 + psockets = list_count_nodes(&all_list); 539 + if (sc->listen.backlog != -1) /* was a listener */ 540 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, 541 + "release %zu pending sockets\n", psockets); 542 + list_for_each_entry_safe(psc, tsc, &all_list, accept.list) { 543 + list_del_init(&psc->accept.list); 544 + psc->accept.listener = NULL; 545 + smbdirect_socket_release(psc); 546 + } 547 + if (sc->listen.backlog != -1) /* was a listener */ 548 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, 549 + "released %zu pending sockets\n", psockets); 550 + INIT_LIST_HEAD(&all_list); 551 + 552 + /* It's not possible for upper layer to get to reassembly */ 553 + if (sc->listen.backlog == -1) /* was not a listener */ 554 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, 555 + "drain the reassembly queue\n"); 556 + spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); 557 + list_splice_tail_init(&sc->recv_io.reassembly.list, &all_list); 558 + spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); 559 + list_for_each_entry_safe(recv_io, recv_tmp, &all_list, list) 560 + smbdirect_connection_put_recv_io(recv_io); 561 + sc->recv_io.reassembly.data_length = 0; 562 + 563 + if (sc->listen.backlog == -1) /* was not a listener */ 564 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, 565 + "freeing mr list\n"); 566 + smbdirect_connection_destroy_mr_list(sc); 567 + 568 + if (sc->listen.backlog == -1) /* was not a listener */ 569 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, 570 + "destroying qp\n"); 571 + smbdirect_connection_destroy_qp(sc); 572 + if (sc->rdma.cm_id) { 573 + rdma_unlock_handler(sc->rdma.cm_id); 574 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, 575 + "destroying cm_id\n"); 576 + rdma_destroy_id(sc->rdma.cm_id); 577 + sc->rdma.cm_id = NULL; 578 + } 579 + 580 + if (sc->listen.backlog == -1) /* was not a listener */ 581 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, 582 + "destroying mem pools\n"); 583 + smbdirect_connection_destroy_mem_pools(sc); 584 + 585 + sc->status = SMBDIRECT_SOCKET_DESTROYED; 586 + 587 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, 588 + "rdma session destroyed\n"); 589 + } 590 + 591 + void smbdirect_socket_destroy_sync(struct smbdirect_socket *sc) 592 + { 593 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, 594 + "status=%s first_error=%1pe", 595 + smbdirect_socket_status_string(sc->status), 596 + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error)); 597 + 598 + /* 599 + * This should not never be called in an interrupt! 600 + */ 601 + WARN_ON_ONCE(in_interrupt()); 602 + 603 + /* 604 + * First we try to disable the work 605 + * without disable_work_sync() in a 606 + * non blocking way, if it's already 607 + * running it will be handles by 608 + * disable_work_sync() below. 609 + * 610 + * Here we just want to make sure queue_work() in 611 + * smbdirect_socket_schedule_cleanup_lvl() 612 + * is a no-op. 613 + */ 614 + disable_work(&sc->disconnect_work); 615 + 616 + if (!sc->first_error) 617 + /* 618 + * SMBDIRECT_LOG_INFO is enough here 619 + * as this is the typical case where 620 + * we terminate the connection ourself. 621 + */ 622 + smbdirect_socket_schedule_cleanup_lvl(sc, 623 + SMBDIRECT_LOG_INFO, 624 + -ESHUTDOWN); 625 + 626 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, 627 + "cancelling and disable disconnect_work\n"); 628 + disable_work_sync(&sc->disconnect_work); 629 + 630 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, 631 + "destroying rdma session\n"); 632 + if (sc->status < SMBDIRECT_SOCKET_DISCONNECTING) 633 + smbdirect_socket_cleanup_work(&sc->disconnect_work); 634 + if (sc->status < SMBDIRECT_SOCKET_DISCONNECTED) { 635 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, 636 + "wait for transport being disconnected\n"); 637 + wait_event(sc->status_wait, sc->status == SMBDIRECT_SOCKET_DISCONNECTED); 638 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, 639 + "waited for transport being disconnected\n"); 640 + } 641 + 642 + /* 643 + * Once we reached SMBDIRECT_SOCKET_DISCONNECTED, 644 + * we should call smbdirect_socket_destroy() 645 + */ 646 + smbdirect_socket_destroy(sc); 647 + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, 648 + "status=%s first_error=%1pe", 649 + smbdirect_socket_status_string(sc->status), 650 + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error)); 651 + } 652 + 653 + int smbdirect_socket_bind(struct smbdirect_socket *sc, struct sockaddr *addr) 654 + { 655 + int ret; 656 + 657 + if (sc->status != SMBDIRECT_SOCKET_CREATED) 658 + return -EINVAL; 659 + 660 + ret = rdma_bind_addr(sc->rdma.cm_id, addr); 661 + if (ret) 662 + return ret; 663 + 664 + return 0; 665 + } 666 + __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_bind); 667 + 668 + void smbdirect_socket_shutdown(struct smbdirect_socket *sc) 669 + { 670 + smbdirect_socket_schedule_cleanup_lvl(sc, SMBDIRECT_LOG_INFO, -ESHUTDOWN); 671 + } 672 + __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_shutdown); 673 + 674 + static void smbdirect_socket_release_disconnect(struct kref *kref) 675 + { 676 + struct smbdirect_socket *sc = 677 + container_of(kref, struct smbdirect_socket, refs.disconnect); 678 + 679 + /* 680 + * For now do a sync disconnect/destroy 681 + */ 682 + smbdirect_socket_destroy_sync(sc); 683 + } 684 + 685 + static void smbdirect_socket_release_destroy(struct kref *kref) 686 + { 687 + struct smbdirect_socket *sc = 688 + container_of(kref, struct smbdirect_socket, refs.destroy); 689 + 690 + /* 691 + * Do a sync disconnect/destroy... 692 + * hopefully a no-op, as it should be already 693 + * in DESTROYED state, before we free the memory. 694 + */ 695 + smbdirect_socket_destroy_sync(sc); 696 + kfree(sc); 697 + } 698 + 699 + void smbdirect_socket_release(struct smbdirect_socket *sc) 700 + { 701 + /* 702 + * We expect only 1 disconnect reference 703 + * and if it is already 0, it's a use after free! 704 + */ 705 + WARN_ON_ONCE(kref_read(&sc->refs.disconnect) != 1); 706 + WARN_ON(!kref_put(&sc->refs.disconnect, smbdirect_socket_release_disconnect)); 707 + 708 + /* 709 + * This may not trigger smbdirect_socket_release_destroy(), 710 + * if struct smbdirect_socket is embedded in another structure 711 + * indicated by REFCOUNT_MAX. 712 + */ 713 + kref_put(&sc->refs.destroy, smbdirect_socket_release_destroy); 714 + } 715 + __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_release); 716 + 717 + int smbdirect_socket_wait_for_credits(struct smbdirect_socket *sc, 718 + enum smbdirect_socket_status expected_status, 719 + int unexpected_errno, 720 + wait_queue_head_t *waitq, 721 + atomic_t *total_credits, 722 + int needed) 723 + { 724 + int ret; 725 + 726 + if (WARN_ON_ONCE(needed < 0)) 727 + return -EINVAL; 728 + 729 + do { 730 + if (atomic_sub_return(needed, total_credits) >= 0) 731 + return 0; 732 + 733 + atomic_add(needed, total_credits); 734 + ret = wait_event_interruptible(*waitq, 735 + atomic_read(total_credits) >= needed || 736 + sc->status != expected_status); 737 + 738 + if (sc->status != expected_status) 739 + return unexpected_errno; 740 + else if (ret < 0) 741 + return ret; 742 + } while (true); 743 + }
+146 -37
fs/smb/common/smbdirect/smbdirect_socket.h
··· 6 6 #ifndef __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ 7 7 #define __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ 8 8 9 + #include <linux/wait.h> 10 + #include <linux/workqueue.h> 11 + #include <linux/kref.h> 12 + #include <linux/mempool.h> 13 + #include <linux/spinlock.h> 14 + #include <linux/mutex.h> 15 + #include <linux/completion.h> 9 16 #include <rdma/rw.h> 10 17 11 18 enum smbdirect_socket_status { 12 19 SMBDIRECT_SOCKET_CREATED, 20 + SMBDIRECT_SOCKET_LISTENING, 13 21 SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED, 14 22 SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING, 15 23 SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED, ··· 43 35 switch (status) { 44 36 case SMBDIRECT_SOCKET_CREATED: 45 37 return "CREATED"; 38 + case SMBDIRECT_SOCKET_LISTENING: 39 + return "LISTENING"; 46 40 case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED: 47 41 return "RESOLVE_ADDR_NEEDED"; 48 42 case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING: ··· 109 99 int first_error; 110 100 111 101 /* 112 - * This points to the workqueue to 102 + * This points to the workqueues to 113 103 * be used for this socket. 114 - * It can be per socket (on the client) 115 - * or point to a global workqueue (on the server) 116 104 */ 117 - struct workqueue_struct *workqueue; 105 + struct { 106 + struct workqueue_struct *accept; 107 + struct workqueue_struct *connect; 108 + struct workqueue_struct *idle; 109 + struct workqueue_struct *refill; 110 + struct workqueue_struct *immediate; 111 + struct workqueue_struct *cleanup; 112 + } workqueues; 118 113 119 114 struct work_struct disconnect_work; 115 + 116 + /* 117 + * The reference counts. 118 + */ 119 + struct { 120 + /* 121 + * This holds the references by the 122 + * frontend, typically the smb layer. 123 + * 124 + * It is typically 1 and a disconnect 125 + * will happen if it reaches 0. 126 + */ 127 + struct kref disconnect; 128 + 129 + /* 130 + * This holds the reference by the 131 + * backend, the code that manages 132 + * the lifetime of the whole 133 + * struct smbdirect_socket, 134 + * if this reaches 0 it can will 135 + * be freed. 136 + * 137 + * Can be REFCOUNT_MAX is part 138 + * of another structure. 139 + * 140 + * This is equal or higher than 141 + * the disconnect refcount. 142 + */ 143 + struct kref destroy; 144 + } refs; 120 145 121 146 /* RDMA related */ 122 147 struct { 123 148 struct rdma_cm_id *cm_id; 149 + /* 150 + * The expected event in our current 151 + * cm_id->event_handler, all other events 152 + * are treated as an error. 153 + */ 154 + enum rdma_cm_event_type expected_event; 124 155 /* 125 156 * This is for iWarp MPA v1 126 157 */ ··· 171 120 /* IB verbs related */ 172 121 struct { 173 122 struct ib_pd *pd; 123 + enum ib_poll_context poll_ctx; 174 124 struct ib_cq *send_cq; 175 125 struct ib_cq *recv_cq; 176 126 ··· 202 150 } idle; 203 151 204 152 /* 153 + * The state for listen sockets 154 + */ 155 + struct { 156 + spinlock_t lock; 157 + struct list_head pending; 158 + struct list_head ready; 159 + wait_queue_head_t wait_queue; 160 + /* 161 + * This starts as -1 and a value != -1 162 + * means this socket was in LISTENING state 163 + * before. Note the valid backlog can 164 + * only be > 0. 165 + */ 166 + int backlog; 167 + } listen; 168 + 169 + /* 170 + * The state for sockets waiting 171 + * for accept, either still waiting 172 + * for the negotiation to finish 173 + * or already ready with a usable 174 + * connection. 175 + */ 176 + struct { 177 + struct smbdirect_socket *listener; 178 + struct list_head list; 179 + } accept; 180 + 181 + /* 205 182 * The state for posted send buffers 206 183 */ 207 184 struct { ··· 239 158 * smbdirect_send_io buffers 240 159 */ 241 160 struct { 242 - struct kmem_cache *cache; 243 - mempool_t *pool; 161 + struct kmem_cache *cache; 162 + mempool_t *pool; 163 + gfp_t gfp_mask; 244 164 } mem; 245 165 246 166 /* ··· 277 195 struct { 278 196 atomic_t count; 279 197 /* 280 - * woken when count is decremented 281 - */ 282 - wait_queue_head_t dec_wait_queue; 283 - /* 284 198 * woken when count reached zero 285 199 */ 286 200 wait_queue_head_t zero_wait_queue; ··· 301 223 * smbdirect_recv_io buffers 302 224 */ 303 225 struct { 304 - struct kmem_cache *cache; 305 - mempool_t *pool; 226 + struct kmem_cache *cache; 227 + mempool_t *pool; 228 + gfp_t gfp_mask; 306 229 } mem; 307 230 308 231 /* ··· 389 310 struct { 390 311 atomic_t count; 391 312 } used; 392 - 393 - struct work_struct recovery_work; 394 - 395 - /* Used by transport to wait until all MRs are returned */ 396 - struct { 397 - wait_queue_head_t wait_queue; 398 - } cleanup; 399 313 } mr_io; 400 314 401 315 /* 402 316 * The state for RDMA read/write requests on the server 403 317 */ 404 318 struct { 319 + /* 320 + * Memory hints for 321 + * smbdirect_rw_io structs 322 + */ 323 + struct { 324 + gfp_t gfp_mask; 325 + } mem; 326 + 405 327 /* 406 328 * The credit state for the send side 407 329 */ ··· 432 352 } statistics; 433 353 434 354 struct { 435 - #define SMBDIRECT_LOG_ERR 0x0 436 - #define SMBDIRECT_LOG_INFO 0x1 437 - 438 - #define SMBDIRECT_LOG_OUTGOING 0x1 439 - #define SMBDIRECT_LOG_INCOMING 0x2 440 - #define SMBDIRECT_LOG_READ 0x4 441 - #define SMBDIRECT_LOG_WRITE 0x8 442 - #define SMBDIRECT_LOG_RDMA_SEND 0x10 443 - #define SMBDIRECT_LOG_RDMA_RECV 0x20 444 - #define SMBDIRECT_LOG_KEEP_ALIVE 0x40 445 - #define SMBDIRECT_LOG_RDMA_EVENT 0x80 446 - #define SMBDIRECT_LOG_RDMA_MR 0x100 447 - #define SMBDIRECT_LOG_RDMA_RW 0x200 448 - #define SMBDIRECT_LOG_NEGOTIATE 0x400 449 355 void *private_ptr; 450 356 bool (*needed)(struct smbdirect_socket *sc, 451 357 void *private_ptr, ··· 559 493 560 494 init_waitqueue_head(&sc->status_wait); 561 495 496 + sc->workqueues.accept = smbdirect_globals.workqueues.accept; 497 + sc->workqueues.connect = smbdirect_globals.workqueues.connect; 498 + sc->workqueues.idle = smbdirect_globals.workqueues.idle; 499 + sc->workqueues.refill = smbdirect_globals.workqueues.refill; 500 + sc->workqueues.immediate = smbdirect_globals.workqueues.immediate; 501 + sc->workqueues.cleanup = smbdirect_globals.workqueues.cleanup; 502 + 562 503 INIT_WORK(&sc->disconnect_work, __smbdirect_socket_disabled_work); 563 504 disable_work_sync(&sc->disconnect_work); 505 + 506 + kref_init(&sc->refs.disconnect); 507 + sc->refs.destroy = (struct kref) KREF_INIT(REFCOUNT_MAX); 508 + 509 + sc->rdma.expected_event = RDMA_CM_EVENT_INTERNAL; 510 + 511 + sc->ib.poll_ctx = IB_POLL_UNBOUND_WORKQUEUE; 564 512 565 513 spin_lock_init(&sc->connect.lock); 566 514 INIT_WORK(&sc->connect.work, __smbdirect_socket_disabled_work); ··· 584 504 disable_work_sync(&sc->idle.immediate_work); 585 505 INIT_DELAYED_WORK(&sc->idle.timer_work, __smbdirect_socket_disabled_work); 586 506 disable_delayed_work_sync(&sc->idle.timer_work); 507 + 508 + spin_lock_init(&sc->listen.lock); 509 + INIT_LIST_HEAD(&sc->listen.pending); 510 + INIT_LIST_HEAD(&sc->listen.ready); 511 + sc->listen.backlog = -1; /* not a listener */ 512 + init_waitqueue_head(&sc->listen.wait_queue); 513 + 514 + INIT_LIST_HEAD(&sc->accept.list); 515 + 516 + sc->send_io.mem.gfp_mask = GFP_KERNEL; 587 517 588 518 atomic_set(&sc->send_io.bcredits.count, 0); 589 519 init_waitqueue_head(&sc->send_io.bcredits.wait_queue); ··· 605 515 init_waitqueue_head(&sc->send_io.credits.wait_queue); 606 516 607 517 atomic_set(&sc->send_io.pending.count, 0); 608 - init_waitqueue_head(&sc->send_io.pending.dec_wait_queue); 609 518 init_waitqueue_head(&sc->send_io.pending.zero_wait_queue); 519 + 520 + sc->recv_io.mem.gfp_mask = GFP_KERNEL; 610 521 611 522 INIT_LIST_HEAD(&sc->recv_io.free.list); 612 523 spin_lock_init(&sc->recv_io.free.lock); ··· 623 532 spin_lock_init(&sc->recv_io.reassembly.lock); 624 533 init_waitqueue_head(&sc->recv_io.reassembly.wait_queue); 625 534 535 + sc->rw_io.mem.gfp_mask = GFP_KERNEL; 626 536 atomic_set(&sc->rw_io.credits.count, 0); 627 537 init_waitqueue_head(&sc->rw_io.credits.wait_queue); 628 538 ··· 632 540 atomic_set(&sc->mr_io.ready.count, 0); 633 541 init_waitqueue_head(&sc->mr_io.ready.wait_queue); 634 542 atomic_set(&sc->mr_io.used.count, 0); 635 - INIT_WORK(&sc->mr_io.recovery_work, __smbdirect_socket_disabled_work); 636 - disable_work_sync(&sc->mr_io.recovery_work); 637 - init_waitqueue_head(&sc->mr_io.cleanup.wait_queue); 638 543 639 544 sc->logging.private_ptr = NULL; 640 545 sc->logging.needed = __smbdirect_log_needed; ··· 690 601 691 602 #define SMBDIRECT_CHECK_STATUS_WARN(__sc, __expected_status) \ 692 603 __SMBDIRECT_CHECK_STATUS_WARN(__sc, __expected_status, /* nothing */) 604 + 605 + #ifndef __SMBDIRECT_SOCKET_DISCONNECT 606 + #define __SMBDIRECT_SOCKET_DISCONNECT(__sc) \ 607 + smbdirect_socket_schedule_cleanup(__sc, -ECONNABORTED) 608 + #endif /* ! __SMBDIRECT_SOCKET_DISCONNECT */ 693 609 694 610 #define SMBDIRECT_CHECK_STATUS_DISCONNECT(__sc, __expected_status) \ 695 611 __SMBDIRECT_CHECK_STATUS_WARN(__sc, __expected_status, \ ··· 813 719 struct sg_table sgt; 814 720 struct scatterlist sg_list[]; 815 721 }; 722 + 723 + static inline size_t smbdirect_get_buf_page_count(const void *buf, size_t size) 724 + { 725 + return DIV_ROUND_UP((uintptr_t)buf + size, PAGE_SIZE) - 726 + (uintptr_t)buf / PAGE_SIZE; 727 + } 728 + 729 + /* 730 + * Maximum number of retries on data transfer operations 731 + */ 732 + #define SMBDIRECT_RDMA_CM_RETRY 6 733 + /* 734 + * No need to retry on Receiver Not Ready since SMB_DIRECT manages credits 735 + */ 736 + #define SMBDIRECT_RDMA_CM_RNR_RETRY 0 816 737 817 738 #endif /* __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ */
+3 -2
fs/smb/server/Kconfig
··· 47 47 48 48 config SMB_SERVER_SMBDIRECT 49 49 bool "Support for SMB Direct protocol" 50 - depends on SMB_SERVER=m && INFINIBAND && INFINIBAND_ADDR_TRANS || SMB_SERVER=y && INFINIBAND=y && INFINIBAND_ADDR_TRANS=y 51 - select SG_POOL 50 + depends on SMB_SERVER && INFINIBAND && INFINIBAND_ADDR_TRANS 51 + depends on SMB_SERVER=m || INFINIBAND=y 52 + select SMB_COMMON_SMBDIRECT 52 53 default n 53 54 54 55 help
-5
fs/smb/server/connection.c
··· 376 376 mutex_init(&conn->srv_mutex); 377 377 __module_get(THIS_MODULE); 378 378 379 - if (t->ops->prepare && t->ops->prepare(t)) 380 - goto out; 381 - 382 379 max_req = server_conf.max_inflight_req; 383 380 conn->last_active = jiffies; 384 381 set_freezable(); ··· 467 470 } 468 471 } 469 472 470 - out: 471 473 ksmbd_conn_set_releasing(conn); 472 474 /* Wait till all reference dropped to the Server object*/ 473 475 ksmbd_debug(CONN, "Wait for all pending requests(%d)\n", atomic_read(&conn->r_count)); ··· 562 566 ksmbd_tcp_destroy(); 563 567 ksmbd_rdma_stop_listening(); 564 568 stop_sessions(); 565 - ksmbd_rdma_destroy(); 566 569 mutex_unlock(&init_lock); 567 570 }
-1
fs/smb/server/connection.h
··· 127 127 }; 128 128 129 129 struct ksmbd_transport_ops { 130 - int (*prepare)(struct ksmbd_transport *t); 131 130 void (*disconnect)(struct ksmbd_transport *t); 132 131 void (*shutdown)(struct ksmbd_transport *t); 133 132 int (*read)(struct ksmbd_transport *t, char *buf,
-1
fs/smb/server/smb2pdu.c
··· 24 24 #include "asn1.h" 25 25 #include "connection.h" 26 26 #include "transport_ipc.h" 27 - #include "../common/smbdirect/smbdirect.h" 28 27 #include "transport_rdma.h" 29 28 #include "vfs.h" 30 29 #include "vfs_cache.h"
+226 -2740
fs/smb/server/transport_rdma.c
··· 11 11 12 12 #include <linux/kthread.h> 13 13 #include <linux/list.h> 14 - #include <linux/mempool.h> 15 - #include <linux/highmem.h> 16 - #include <linux/scatterlist.h> 17 14 #include <linux/string_choices.h> 18 - #include <rdma/ib_verbs.h> 19 - #include <rdma/rdma_cm.h> 20 - #include <rdma/rw.h> 21 - 22 - #define __SMBDIRECT_SOCKET_DISCONNECT(__sc) smb_direct_disconnect_rdma_connection(__sc) 23 15 24 16 #include "glob.h" 25 17 #include "connection.h" 26 18 #include "smb_common.h" 27 19 #include "../common/smb2status.h" 28 - #include "../common/smbdirect/smbdirect.h" 29 - #include "../common/smbdirect/smbdirect_pdu.h" 30 - #include "../common/smbdirect/smbdirect_socket.h" 31 20 #include "transport_rdma.h" 21 + #include "../common/smbdirect/smbdirect_public.h" 22 + 32 23 33 24 #define SMB_DIRECT_PORT_IWARP 5445 34 25 #define SMB_DIRECT_PORT_INFINIBAND 445 35 - 36 - #define SMB_DIRECT_VERSION_LE cpu_to_le16(SMBDIRECT_V1) 37 26 38 27 /* SMB_DIRECT negotiation timeout (for the server) in seconds */ 39 28 #define SMB_DIRECT_NEGOTIATE_TIMEOUT 5 ··· 38 49 * This value is possibly decreased during QP creation on hardware limit 39 50 */ 40 51 #define SMB_DIRECT_CM_INITIATOR_DEPTH 8 41 - 42 - /* Maximum number of retries on data transfer operations */ 43 - #define SMB_DIRECT_CM_RETRY 6 44 - /* No need to retry on Receiver Not Ready since SMB_DIRECT manages credits */ 45 - #define SMB_DIRECT_CM_RNR_RETRY 0 46 52 47 53 /* 48 54 * User configurable initial values per SMB_DIRECT transport connection ··· 77 93 78 94 static int smb_direct_max_read_write_size = SMBD_DEFAULT_IOSIZE; 79 95 80 - static LIST_HEAD(smb_direct_device_list); 81 - static DEFINE_RWLOCK(smb_direct_device_lock); 82 - 83 - struct smb_direct_device { 84 - struct ib_device *ib_dev; 85 - struct list_head list; 86 - }; 87 - 88 96 static struct smb_direct_listener { 89 97 int port; 90 - struct rdma_cm_id *cm_id; 91 - } smb_direct_ib_listener, smb_direct_iw_listener; 92 98 93 - static struct workqueue_struct *smb_direct_wq; 99 + struct task_struct *thread; 100 + 101 + struct smbdirect_socket *socket; 102 + } smb_direct_ib_listener, smb_direct_iw_listener; 94 103 95 104 struct smb_direct_transport { 96 105 struct ksmbd_transport transport; 97 106 98 - struct smbdirect_socket socket; 107 + struct smbdirect_socket *socket; 99 108 }; 109 + 110 + static bool smb_direct_logging_needed(struct smbdirect_socket *sc, 111 + void *private_ptr, 112 + unsigned int lvl, 113 + unsigned int cls) 114 + { 115 + if (lvl <= SMBDIRECT_LOG_ERR) 116 + return true; 117 + 118 + if (lvl > SMBDIRECT_LOG_INFO) 119 + return false; 120 + 121 + switch (cls) { 122 + /* 123 + * These were more or less also logged before 124 + * the move to common code. 125 + * 126 + * SMBDIRECT_LOG_RDMA_MR was not used, but 127 + * that's client only code and we should 128 + * notice if it's used on the server... 129 + */ 130 + case SMBDIRECT_LOG_RDMA_EVENT: 131 + case SMBDIRECT_LOG_RDMA_SEND: 132 + case SMBDIRECT_LOG_RDMA_RECV: 133 + case SMBDIRECT_LOG_WRITE: 134 + case SMBDIRECT_LOG_READ: 135 + case SMBDIRECT_LOG_NEGOTIATE: 136 + case SMBDIRECT_LOG_OUTGOING: 137 + case SMBDIRECT_LOG_RDMA_RW: 138 + case SMBDIRECT_LOG_RDMA_MR: 139 + return true; 140 + /* 141 + * These were not logged before the move 142 + * to common code. 143 + */ 144 + case SMBDIRECT_LOG_KEEP_ALIVE: 145 + case SMBDIRECT_LOG_INCOMING: 146 + return false; 147 + } 148 + 149 + /* 150 + * Log all unknown messages 151 + */ 152 + return true; 153 + } 154 + 155 + static void smb_direct_logging_vaprintf(struct smbdirect_socket *sc, 156 + const char *func, 157 + unsigned int line, 158 + void *private_ptr, 159 + unsigned int lvl, 160 + unsigned int cls, 161 + struct va_format *vaf) 162 + { 163 + if (lvl <= SMBDIRECT_LOG_ERR) 164 + pr_err("%pV", vaf); 165 + else 166 + ksmbd_debug(RDMA, "%pV", vaf); 167 + } 100 168 101 169 #define KSMBD_TRANS(t) (&(t)->transport) 102 170 #define SMBD_TRANS(t) (container_of(t, \ ··· 165 129 unsigned int get_smbd_max_read_write_size(struct ksmbd_transport *kt) 166 130 { 167 131 struct smb_direct_transport *t; 168 - struct smbdirect_socket *sc; 169 - struct smbdirect_socket_parameters *sp; 132 + const struct smbdirect_socket_parameters *sp; 170 133 171 134 if (kt->ops != &ksmbd_smb_direct_transport_ops) 172 135 return 0; 173 136 174 137 t = SMBD_TRANS(kt); 175 - sc = &t->socket; 176 - sp = &sc->parameters; 138 + sp = smbdirect_socket_get_current_parameters(t->socket); 177 139 178 140 return sp->max_read_write_size; 179 141 } 180 142 181 - static inline int get_buf_page_count(void *buf, int size) 182 - { 183 - return DIV_ROUND_UP((uintptr_t)buf + size, PAGE_SIZE) - 184 - (uintptr_t)buf / PAGE_SIZE; 185 - } 186 - 187 - static void smb_direct_destroy_pools(struct smbdirect_socket *sc); 188 - static void smb_direct_post_recv_credits(struct work_struct *work); 189 - static int smb_direct_post_send_data(struct smbdirect_socket *sc, 190 - struct smbdirect_send_batch *send_ctx, 191 - struct kvec *iov, int niov, 192 - int remaining_data_length); 193 - 194 - static inline void 195 - *smbdirect_recv_io_payload(struct smbdirect_recv_io *recvmsg) 196 - { 197 - return (void *)recvmsg->packet; 198 - } 199 - 200 - static struct 201 - smbdirect_recv_io *get_free_recvmsg(struct smbdirect_socket *sc) 202 - { 203 - struct smbdirect_recv_io *recvmsg = NULL; 204 - unsigned long flags; 205 - 206 - spin_lock_irqsave(&sc->recv_io.free.lock, flags); 207 - if (!list_empty(&sc->recv_io.free.list)) { 208 - recvmsg = list_first_entry(&sc->recv_io.free.list, 209 - struct smbdirect_recv_io, 210 - list); 211 - list_del(&recvmsg->list); 212 - } 213 - spin_unlock_irqrestore(&sc->recv_io.free.lock, flags); 214 - return recvmsg; 215 - } 216 - 217 - static void put_recvmsg(struct smbdirect_socket *sc, 218 - struct smbdirect_recv_io *recvmsg) 219 - { 220 - unsigned long flags; 221 - 222 - if (likely(recvmsg->sge.length != 0)) { 223 - ib_dma_unmap_single(sc->ib.dev, 224 - recvmsg->sge.addr, 225 - recvmsg->sge.length, 226 - DMA_FROM_DEVICE); 227 - recvmsg->sge.length = 0; 228 - } 229 - 230 - spin_lock_irqsave(&sc->recv_io.free.lock, flags); 231 - list_add(&recvmsg->list, &sc->recv_io.free.list); 232 - spin_unlock_irqrestore(&sc->recv_io.free.lock, flags); 233 - 234 - queue_work(sc->workqueue, &sc->recv_io.posted.refill_work); 235 - } 236 - 237 - static void enqueue_reassembly(struct smbdirect_socket *sc, 238 - struct smbdirect_recv_io *recvmsg, 239 - int data_length) 240 - { 241 - unsigned long flags; 242 - 243 - spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); 244 - list_add_tail(&recvmsg->list, &sc->recv_io.reassembly.list); 245 - sc->recv_io.reassembly.queue_length++; 246 - /* 247 - * Make sure reassembly_data_length is updated after list and 248 - * reassembly_queue_length are updated. On the dequeue side 249 - * reassembly_data_length is checked without a lock to determine 250 - * if reassembly_queue_length and list is up to date 251 - */ 252 - virt_wmb(); 253 - sc->recv_io.reassembly.data_length += data_length; 254 - spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); 255 - } 256 - 257 - static struct smbdirect_recv_io *get_first_reassembly(struct smbdirect_socket *sc) 258 - { 259 - if (!list_empty(&sc->recv_io.reassembly.list)) 260 - return list_first_entry(&sc->recv_io.reassembly.list, 261 - struct smbdirect_recv_io, list); 262 - else 263 - return NULL; 264 - } 265 - 266 - static void smb_direct_disconnect_wake_up_all(struct smbdirect_socket *sc) 267 - { 268 - /* 269 - * Wake up all waiters in all wait queues 270 - * in order to notice the broken connection. 271 - */ 272 - wake_up_all(&sc->status_wait); 273 - wake_up_all(&sc->send_io.bcredits.wait_queue); 274 - wake_up_all(&sc->send_io.lcredits.wait_queue); 275 - wake_up_all(&sc->send_io.credits.wait_queue); 276 - wake_up_all(&sc->send_io.pending.zero_wait_queue); 277 - wake_up_all(&sc->recv_io.reassembly.wait_queue); 278 - wake_up_all(&sc->rw_io.credits.wait_queue); 279 - } 280 - 281 - static void smb_direct_disconnect_rdma_work(struct work_struct *work) 282 - { 283 - struct smbdirect_socket *sc = 284 - container_of(work, struct smbdirect_socket, disconnect_work); 285 - 286 - if (sc->first_error == 0) 287 - sc->first_error = -ECONNABORTED; 288 - 289 - /* 290 - * make sure this and other work is not queued again 291 - * but here we don't block and avoid 292 - * disable[_delayed]_work_sync() 293 - */ 294 - disable_work(&sc->disconnect_work); 295 - disable_work(&sc->connect.work); 296 - disable_work(&sc->recv_io.posted.refill_work); 297 - disable_delayed_work(&sc->idle.timer_work); 298 - disable_work(&sc->idle.immediate_work); 299 - 300 - switch (sc->status) { 301 - case SMBDIRECT_SOCKET_NEGOTIATE_NEEDED: 302 - case SMBDIRECT_SOCKET_NEGOTIATE_RUNNING: 303 - case SMBDIRECT_SOCKET_NEGOTIATE_FAILED: 304 - case SMBDIRECT_SOCKET_CONNECTED: 305 - case SMBDIRECT_SOCKET_ERROR: 306 - sc->status = SMBDIRECT_SOCKET_DISCONNECTING; 307 - rdma_disconnect(sc->rdma.cm_id); 308 - break; 309 - 310 - case SMBDIRECT_SOCKET_CREATED: 311 - case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED: 312 - case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING: 313 - case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED: 314 - case SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED: 315 - case SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING: 316 - case SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED: 317 - case SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED: 318 - case SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING: 319 - case SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED: 320 - /* 321 - * rdma_accept() never reached 322 - * RDMA_CM_EVENT_ESTABLISHED 323 - */ 324 - sc->status = SMBDIRECT_SOCKET_DISCONNECTED; 325 - break; 326 - 327 - case SMBDIRECT_SOCKET_DISCONNECTING: 328 - case SMBDIRECT_SOCKET_DISCONNECTED: 329 - case SMBDIRECT_SOCKET_DESTROYED: 330 - break; 331 - } 332 - 333 - /* 334 - * Wake up all waiters in all wait queues 335 - * in order to notice the broken connection. 336 - */ 337 - smb_direct_disconnect_wake_up_all(sc); 338 - } 339 - 340 - static void 341 - smb_direct_disconnect_rdma_connection(struct smbdirect_socket *sc) 342 - { 343 - if (sc->first_error == 0) 344 - sc->first_error = -ECONNABORTED; 345 - 346 - /* 347 - * make sure other work (than disconnect_work) is 348 - * not queued again but here we don't block and avoid 349 - * disable[_delayed]_work_sync() 350 - */ 351 - disable_work(&sc->connect.work); 352 - disable_work(&sc->recv_io.posted.refill_work); 353 - disable_work(&sc->idle.immediate_work); 354 - disable_delayed_work(&sc->idle.timer_work); 355 - 356 - switch (sc->status) { 357 - case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED: 358 - case SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED: 359 - case SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED: 360 - case SMBDIRECT_SOCKET_NEGOTIATE_FAILED: 361 - case SMBDIRECT_SOCKET_ERROR: 362 - case SMBDIRECT_SOCKET_DISCONNECTING: 363 - case SMBDIRECT_SOCKET_DISCONNECTED: 364 - case SMBDIRECT_SOCKET_DESTROYED: 365 - /* 366 - * Keep the current error status 367 - */ 368 - break; 369 - 370 - case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED: 371 - case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING: 372 - sc->status = SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED; 373 - break; 374 - 375 - case SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED: 376 - case SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING: 377 - sc->status = SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED; 378 - break; 379 - 380 - case SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED: 381 - case SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING: 382 - sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED; 383 - break; 384 - 385 - case SMBDIRECT_SOCKET_NEGOTIATE_NEEDED: 386 - case SMBDIRECT_SOCKET_NEGOTIATE_RUNNING: 387 - sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED; 388 - break; 389 - 390 - case SMBDIRECT_SOCKET_CREATED: 391 - sc->status = SMBDIRECT_SOCKET_DISCONNECTED; 392 - break; 393 - 394 - case SMBDIRECT_SOCKET_CONNECTED: 395 - sc->status = SMBDIRECT_SOCKET_ERROR; 396 - break; 397 - } 398 - 399 - /* 400 - * Wake up all waiters in all wait queues 401 - * in order to notice the broken connection. 402 - */ 403 - smb_direct_disconnect_wake_up_all(sc); 404 - 405 - queue_work(sc->workqueue, &sc->disconnect_work); 406 - } 407 - 408 - static void smb_direct_send_immediate_work(struct work_struct *work) 409 - { 410 - struct smbdirect_socket *sc = 411 - container_of(work, struct smbdirect_socket, idle.immediate_work); 412 - 413 - if (sc->status != SMBDIRECT_SOCKET_CONNECTED) 414 - return; 415 - 416 - smb_direct_post_send_data(sc, NULL, NULL, 0, 0); 417 - } 418 - 419 - static void smb_direct_idle_connection_timer(struct work_struct *work) 420 - { 421 - struct smbdirect_socket *sc = 422 - container_of(work, struct smbdirect_socket, idle.timer_work.work); 423 - struct smbdirect_socket_parameters *sp = &sc->parameters; 424 - 425 - if (sc->idle.keepalive != SMBDIRECT_KEEPALIVE_NONE) { 426 - smb_direct_disconnect_rdma_connection(sc); 427 - return; 428 - } 429 - 430 - if (sc->status != SMBDIRECT_SOCKET_CONNECTED) 431 - return; 432 - 433 - /* 434 - * Now use the keepalive timeout (instead of keepalive interval) 435 - * in order to wait for a response 436 - */ 437 - sc->idle.keepalive = SMBDIRECT_KEEPALIVE_PENDING; 438 - mod_delayed_work(sc->workqueue, &sc->idle.timer_work, 439 - msecs_to_jiffies(sp->keepalive_timeout_msec)); 440 - queue_work(sc->workqueue, &sc->idle.immediate_work); 441 - } 442 - 443 - static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) 143 + static struct smb_direct_transport *alloc_transport(struct smbdirect_socket *sc) 444 144 { 445 145 struct smb_direct_transport *t; 446 - struct smbdirect_socket *sc; 447 - struct smbdirect_socket_parameters *sp; 448 146 struct ksmbd_conn *conn; 449 147 450 148 t = kzalloc_obj(*t, KSMBD_DEFAULT_GFP); 451 149 if (!t) 452 150 return NULL; 453 - sc = &t->socket; 454 - smbdirect_socket_init(sc); 455 - sp = &sc->parameters; 456 - 457 - sc->workqueue = smb_direct_wq; 458 - 459 - INIT_WORK(&sc->disconnect_work, smb_direct_disconnect_rdma_work); 460 - 461 - sp->negotiate_timeout_msec = SMB_DIRECT_NEGOTIATE_TIMEOUT * 1000; 462 - sp->initiator_depth = SMB_DIRECT_CM_INITIATOR_DEPTH; 463 - sp->responder_resources = 1; 464 - sp->recv_credit_max = smb_direct_receive_credit_max; 465 - sp->send_credit_target = smb_direct_send_credit_target; 466 - sp->max_send_size = smb_direct_max_send_size; 467 - sp->max_fragmented_recv_size = smb_direct_max_fragmented_recv_size; 468 - sp->max_recv_size = smb_direct_max_receive_size; 469 - sp->max_read_write_size = smb_direct_max_read_write_size; 470 - sp->keepalive_interval_msec = SMB_DIRECT_KEEPALIVE_SEND_INTERVAL * 1000; 471 - sp->keepalive_timeout_msec = SMB_DIRECT_KEEPALIVE_RECV_TIMEOUT * 1000; 472 - 473 - sc->rdma.cm_id = cm_id; 474 - cm_id->context = sc; 475 - 476 - sc->ib.dev = sc->rdma.cm_id->device; 477 - 478 - INIT_DELAYED_WORK(&sc->idle.timer_work, smb_direct_idle_connection_timer); 151 + t->socket = sc; 479 152 480 153 conn = ksmbd_conn_alloc(); 481 154 if (!conn) 482 - goto err; 155 + goto conn_alloc_failed; 483 156 484 157 down_write(&conn_list_lock); 485 158 hash_add(conn_list, &conn->hlist, 0); ··· 197 452 conn->transport = KSMBD_TRANS(t); 198 453 KSMBD_TRANS(t)->conn = conn; 199 454 KSMBD_TRANS(t)->ops = &ksmbd_smb_direct_transport_ops; 455 + 200 456 return t; 201 - err: 457 + 458 + conn_alloc_failed: 202 459 kfree(t); 203 460 return NULL; 204 461 } 205 462 206 463 static void smb_direct_free_transport(struct ksmbd_transport *kt) 207 464 { 208 - kfree(SMBD_TRANS(kt)); 465 + struct smb_direct_transport *t = SMBD_TRANS(kt); 466 + 467 + smbdirect_socket_release(t->socket); 468 + kfree(t); 209 469 } 210 470 211 471 static void free_transport(struct smb_direct_transport *t) 212 472 { 213 - struct smbdirect_socket *sc = &t->socket; 214 - struct smbdirect_recv_io *recvmsg; 215 - 216 - disable_work_sync(&sc->disconnect_work); 217 - if (sc->status < SMBDIRECT_SOCKET_DISCONNECTING) 218 - smb_direct_disconnect_rdma_work(&sc->disconnect_work); 219 - if (sc->status < SMBDIRECT_SOCKET_DISCONNECTED) 220 - wait_event(sc->status_wait, sc->status == SMBDIRECT_SOCKET_DISCONNECTED); 221 - 222 - /* 223 - * Wake up all waiters in all wait queues 224 - * in order to notice the broken connection. 225 - * 226 - * Most likely this was already called via 227 - * smb_direct_disconnect_rdma_work(), but call it again... 228 - */ 229 - smb_direct_disconnect_wake_up_all(sc); 230 - 231 - disable_work_sync(&sc->connect.work); 232 - disable_work_sync(&sc->recv_io.posted.refill_work); 233 - disable_delayed_work_sync(&sc->idle.timer_work); 234 - disable_work_sync(&sc->idle.immediate_work); 235 - 236 - if (sc->rdma.cm_id) 237 - rdma_lock_handler(sc->rdma.cm_id); 238 - 239 - if (sc->ib.qp) { 240 - ib_drain_qp(sc->ib.qp); 241 - sc->ib.qp = NULL; 242 - rdma_destroy_qp(sc->rdma.cm_id); 243 - } 244 - 245 - ksmbd_debug(RDMA, "drain the reassembly queue\n"); 246 - do { 247 - unsigned long flags; 248 - 249 - spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); 250 - recvmsg = get_first_reassembly(sc); 251 - if (recvmsg) { 252 - list_del(&recvmsg->list); 253 - spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); 254 - put_recvmsg(sc, recvmsg); 255 - } else { 256 - spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); 257 - } 258 - } while (recvmsg); 259 - sc->recv_io.reassembly.data_length = 0; 260 - 261 - if (sc->ib.send_cq) 262 - ib_free_cq(sc->ib.send_cq); 263 - if (sc->ib.recv_cq) 264 - ib_free_cq(sc->ib.recv_cq); 265 - if (sc->ib.pd) 266 - ib_dealloc_pd(sc->ib.pd); 267 - if (sc->rdma.cm_id) { 268 - rdma_unlock_handler(sc->rdma.cm_id); 269 - rdma_destroy_id(sc->rdma.cm_id); 270 - } 271 - 272 - smb_direct_destroy_pools(sc); 473 + smbdirect_socket_shutdown(t->socket); 273 474 ksmbd_conn_free(KSMBD_TRANS(t)->conn); 274 - } 275 - 276 - static struct smbdirect_send_io 277 - *smb_direct_alloc_sendmsg(struct smbdirect_socket *sc) 278 - { 279 - struct smbdirect_send_io *msg; 280 - 281 - msg = mempool_alloc(sc->send_io.mem.pool, KSMBD_DEFAULT_GFP); 282 - if (!msg) 283 - return ERR_PTR(-ENOMEM); 284 - msg->socket = sc; 285 - INIT_LIST_HEAD(&msg->sibling_list); 286 - msg->num_sge = 0; 287 - return msg; 288 - } 289 - 290 - static void smb_direct_free_sendmsg(struct smbdirect_socket *sc, 291 - struct smbdirect_send_io *msg) 292 - { 293 - int i; 294 - 295 - /* 296 - * The list needs to be empty! 297 - * The caller should take care of it. 298 - */ 299 - WARN_ON_ONCE(!list_empty(&msg->sibling_list)); 300 - 301 - if (msg->num_sge > 0) { 302 - ib_dma_unmap_single(sc->ib.dev, 303 - msg->sge[0].addr, msg->sge[0].length, 304 - DMA_TO_DEVICE); 305 - for (i = 1; i < msg->num_sge; i++) 306 - ib_dma_unmap_page(sc->ib.dev, 307 - msg->sge[i].addr, msg->sge[i].length, 308 - DMA_TO_DEVICE); 309 - } 310 - mempool_free(msg, sc->send_io.mem.pool); 311 - } 312 - 313 - static int smb_direct_check_recvmsg(struct smbdirect_recv_io *recvmsg) 314 - { 315 - struct smbdirect_socket *sc = recvmsg->socket; 316 - 317 - switch (sc->recv_io.expected) { 318 - case SMBDIRECT_EXPECT_DATA_TRANSFER: { 319 - struct smbdirect_data_transfer *req = 320 - (struct smbdirect_data_transfer *)recvmsg->packet; 321 - struct smb2_hdr *hdr = (struct smb2_hdr *)(recvmsg->packet 322 - + le32_to_cpu(req->data_offset)); 323 - ksmbd_debug(RDMA, 324 - "CreditGranted: %u, CreditRequested: %u, DataLength: %u, RemainingDataLength: %u, SMB: %x, Command: %u\n", 325 - le16_to_cpu(req->credits_granted), 326 - le16_to_cpu(req->credits_requested), 327 - req->data_length, req->remaining_data_length, 328 - hdr->ProtocolId, hdr->Command); 329 - return 0; 330 - } 331 - case SMBDIRECT_EXPECT_NEGOTIATE_REQ: { 332 - struct smbdirect_negotiate_req *req = 333 - (struct smbdirect_negotiate_req *)recvmsg->packet; 334 - ksmbd_debug(RDMA, 335 - "MinVersion: %u, MaxVersion: %u, CreditRequested: %u, MaxSendSize: %u, MaxRecvSize: %u, MaxFragmentedSize: %u\n", 336 - le16_to_cpu(req->min_version), 337 - le16_to_cpu(req->max_version), 338 - le16_to_cpu(req->credits_requested), 339 - le32_to_cpu(req->preferred_send_size), 340 - le32_to_cpu(req->max_receive_size), 341 - le32_to_cpu(req->max_fragmented_size)); 342 - if (le16_to_cpu(req->min_version) > 0x0100 || 343 - le16_to_cpu(req->max_version) < 0x0100) 344 - return -EOPNOTSUPP; 345 - if (le16_to_cpu(req->credits_requested) <= 0 || 346 - le32_to_cpu(req->max_receive_size) <= 128 || 347 - le32_to_cpu(req->max_fragmented_size) <= 348 - 128 * 1024) 349 - return -ECONNABORTED; 350 - 351 - return 0; 352 - } 353 - case SMBDIRECT_EXPECT_NEGOTIATE_REP: 354 - /* client only */ 355 - break; 356 - } 357 - 358 - /* This is an internal error */ 359 - return -EINVAL; 360 - } 361 - 362 - static void recv_done(struct ib_cq *cq, struct ib_wc *wc) 363 - { 364 - struct smbdirect_recv_io *recvmsg; 365 - struct smbdirect_socket *sc; 366 - struct smbdirect_socket_parameters *sp; 367 - 368 - recvmsg = container_of(wc->wr_cqe, struct smbdirect_recv_io, cqe); 369 - sc = recvmsg->socket; 370 - sp = &sc->parameters; 371 - 372 - if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) { 373 - put_recvmsg(sc, recvmsg); 374 - if (wc->status != IB_WC_WR_FLUSH_ERR) { 375 - pr_err("Recv error. status='%s (%d)' opcode=%d\n", 376 - ib_wc_status_msg(wc->status), wc->status, 377 - wc->opcode); 378 - smb_direct_disconnect_rdma_connection(sc); 379 - } 380 - return; 381 - } 382 - 383 - ksmbd_debug(RDMA, "Recv completed. status='%s (%d)', opcode=%d\n", 384 - ib_wc_status_msg(wc->status), wc->status, 385 - wc->opcode); 386 - 387 - ib_dma_sync_single_for_cpu(wc->qp->device, recvmsg->sge.addr, 388 - recvmsg->sge.length, DMA_FROM_DEVICE); 389 - 390 - /* 391 - * Reset timer to the keepalive interval in 392 - * order to trigger our next keepalive message. 393 - */ 394 - sc->idle.keepalive = SMBDIRECT_KEEPALIVE_NONE; 395 - mod_delayed_work(sc->workqueue, &sc->idle.timer_work, 396 - msecs_to_jiffies(sp->keepalive_interval_msec)); 397 - 398 - switch (sc->recv_io.expected) { 399 - case SMBDIRECT_EXPECT_NEGOTIATE_REQ: 400 - /* see smb_direct_negotiate_recv_done */ 401 - break; 402 - case SMBDIRECT_EXPECT_DATA_TRANSFER: { 403 - struct smbdirect_data_transfer *data_transfer = 404 - (struct smbdirect_data_transfer *)recvmsg->packet; 405 - u32 remaining_data_length, data_offset, data_length; 406 - int current_recv_credits; 407 - u16 old_recv_credit_target; 408 - 409 - if (wc->byte_len < 410 - offsetof(struct smbdirect_data_transfer, padding)) { 411 - put_recvmsg(sc, recvmsg); 412 - smb_direct_disconnect_rdma_connection(sc); 413 - return; 414 - } 415 - 416 - remaining_data_length = le32_to_cpu(data_transfer->remaining_data_length); 417 - data_length = le32_to_cpu(data_transfer->data_length); 418 - data_offset = le32_to_cpu(data_transfer->data_offset); 419 - if (wc->byte_len < data_offset || 420 - wc->byte_len < (u64)data_offset + data_length) { 421 - put_recvmsg(sc, recvmsg); 422 - smb_direct_disconnect_rdma_connection(sc); 423 - return; 424 - } 425 - if (remaining_data_length > sp->max_fragmented_recv_size || 426 - data_length > sp->max_fragmented_recv_size || 427 - (u64)remaining_data_length + (u64)data_length > 428 - (u64)sp->max_fragmented_recv_size) { 429 - put_recvmsg(sc, recvmsg); 430 - smb_direct_disconnect_rdma_connection(sc); 431 - return; 432 - } 433 - 434 - if (data_length) { 435 - if (sc->recv_io.reassembly.full_packet_received) 436 - recvmsg->first_segment = true; 437 - 438 - if (le32_to_cpu(data_transfer->remaining_data_length)) 439 - sc->recv_io.reassembly.full_packet_received = false; 440 - else 441 - sc->recv_io.reassembly.full_packet_received = true; 442 - } 443 - 444 - atomic_dec(&sc->recv_io.posted.count); 445 - current_recv_credits = atomic_dec_return(&sc->recv_io.credits.count); 446 - 447 - old_recv_credit_target = sc->recv_io.credits.target; 448 - sc->recv_io.credits.target = 449 - le16_to_cpu(data_transfer->credits_requested); 450 - sc->recv_io.credits.target = 451 - min_t(u16, sc->recv_io.credits.target, sp->recv_credit_max); 452 - sc->recv_io.credits.target = 453 - max_t(u16, sc->recv_io.credits.target, 1); 454 - atomic_add(le16_to_cpu(data_transfer->credits_granted), 455 - &sc->send_io.credits.count); 456 - 457 - if (le16_to_cpu(data_transfer->flags) & 458 - SMBDIRECT_FLAG_RESPONSE_REQUESTED) 459 - queue_work(sc->workqueue, &sc->idle.immediate_work); 460 - 461 - if (atomic_read(&sc->send_io.credits.count) > 0) 462 - wake_up(&sc->send_io.credits.wait_queue); 463 - 464 - if (data_length) { 465 - if (current_recv_credits <= (sc->recv_io.credits.target / 4) || 466 - sc->recv_io.credits.target > old_recv_credit_target) 467 - queue_work(sc->workqueue, &sc->recv_io.posted.refill_work); 468 - 469 - enqueue_reassembly(sc, recvmsg, (int)data_length); 470 - wake_up(&sc->recv_io.reassembly.wait_queue); 471 - } else 472 - put_recvmsg(sc, recvmsg); 473 - 474 - return; 475 - } 476 - case SMBDIRECT_EXPECT_NEGOTIATE_REP: 477 - /* client only */ 478 - break; 479 - } 480 - 481 - /* 482 - * This is an internal error! 483 - */ 484 - WARN_ON_ONCE(sc->recv_io.expected != SMBDIRECT_EXPECT_DATA_TRANSFER); 485 - put_recvmsg(sc, recvmsg); 486 - smb_direct_disconnect_rdma_connection(sc); 487 - } 488 - 489 - static void smb_direct_negotiate_recv_work(struct work_struct *work); 490 - 491 - static void smb_direct_negotiate_recv_done(struct ib_cq *cq, struct ib_wc *wc) 492 - { 493 - struct smbdirect_recv_io *recv_io = 494 - container_of(wc->wr_cqe, struct smbdirect_recv_io, cqe); 495 - struct smbdirect_socket *sc = recv_io->socket; 496 - unsigned long flags; 497 - 498 - /* 499 - * reset the common recv_done for later reuse. 500 - */ 501 - recv_io->cqe.done = recv_done; 502 - 503 - if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) { 504 - put_recvmsg(sc, recv_io); 505 - if (wc->status != IB_WC_WR_FLUSH_ERR) { 506 - pr_err("Negotiate Recv error. status='%s (%d)' opcode=%d\n", 507 - ib_wc_status_msg(wc->status), wc->status, 508 - wc->opcode); 509 - smb_direct_disconnect_rdma_connection(sc); 510 - } 511 - return; 512 - } 513 - 514 - ksmbd_debug(RDMA, "Negotiate Recv completed. status='%s (%d)', opcode=%d\n", 515 - ib_wc_status_msg(wc->status), wc->status, 516 - wc->opcode); 517 - 518 - ib_dma_sync_single_for_cpu(sc->ib.dev, 519 - recv_io->sge.addr, 520 - recv_io->sge.length, 521 - DMA_FROM_DEVICE); 522 - 523 - /* 524 - * This is an internal error! 525 - */ 526 - if (WARN_ON_ONCE(sc->recv_io.expected != SMBDIRECT_EXPECT_NEGOTIATE_REQ)) { 527 - put_recvmsg(sc, recv_io); 528 - smb_direct_disconnect_rdma_connection(sc); 529 - return; 530 - } 531 - 532 - /* 533 - * Don't reset timer to the keepalive interval in 534 - * this will be done in smb_direct_negotiate_recv_work. 535 - */ 536 - 537 - /* 538 - * Only remember the recv_io if it has enough bytes, 539 - * this gives smb_direct_negotiate_recv_work enough 540 - * information in order to disconnect if it was not 541 - * valid. 542 - */ 543 - sc->recv_io.reassembly.full_packet_received = true; 544 - if (wc->byte_len >= sizeof(struct smbdirect_negotiate_req)) 545 - enqueue_reassembly(sc, recv_io, 0); 546 - else 547 - put_recvmsg(sc, recv_io); 548 - 549 - /* 550 - * Some drivers (at least mlx5_ib and irdma in roce mode) 551 - * might post a recv completion before RDMA_CM_EVENT_ESTABLISHED, 552 - * we need to adjust our expectation in that case. 553 - * 554 - * So we defer further processing of the negotiation 555 - * to smb_direct_negotiate_recv_work(). 556 - * 557 - * If we are already in SMBDIRECT_SOCKET_NEGOTIATE_NEEDED 558 - * we queue the work directly otherwise 559 - * smb_direct_cm_handler() will do it, when 560 - * RDMA_CM_EVENT_ESTABLISHED arrived. 561 - */ 562 - spin_lock_irqsave(&sc->connect.lock, flags); 563 - if (!sc->first_error) { 564 - INIT_WORK(&sc->connect.work, smb_direct_negotiate_recv_work); 565 - if (sc->status == SMBDIRECT_SOCKET_NEGOTIATE_NEEDED) 566 - queue_work(sc->workqueue, &sc->connect.work); 567 - } 568 - spin_unlock_irqrestore(&sc->connect.lock, flags); 569 - } 570 - 571 - static void smb_direct_negotiate_recv_work(struct work_struct *work) 572 - { 573 - struct smbdirect_socket *sc = 574 - container_of(work, struct smbdirect_socket, connect.work); 575 - const struct smbdirect_socket_parameters *sp = &sc->parameters; 576 - struct smbdirect_recv_io *recv_io; 577 - 578 - if (sc->first_error) 579 - return; 580 - 581 - ksmbd_debug(RDMA, "Negotiate Recv Work running\n"); 582 - 583 - /* 584 - * Reset timer to the keepalive interval in 585 - * order to trigger our next keepalive message. 586 - */ 587 - sc->idle.keepalive = SMBDIRECT_KEEPALIVE_NONE; 588 - mod_delayed_work(sc->workqueue, &sc->idle.timer_work, 589 - msecs_to_jiffies(sp->keepalive_interval_msec)); 590 - 591 - /* 592 - * If smb_direct_negotiate_recv_done() detected an 593 - * invalid request we want to disconnect. 594 - */ 595 - recv_io = get_first_reassembly(sc); 596 - if (!recv_io) { 597 - smb_direct_disconnect_rdma_connection(sc); 598 - return; 599 - } 600 - 601 - if (SMBDIRECT_CHECK_STATUS_WARN(sc, SMBDIRECT_SOCKET_NEGOTIATE_NEEDED)) { 602 - smb_direct_disconnect_rdma_connection(sc); 603 - return; 604 - } 605 - sc->status = SMBDIRECT_SOCKET_NEGOTIATE_RUNNING; 606 - wake_up(&sc->status_wait); 607 - } 608 - 609 - static int smb_direct_post_recv(struct smbdirect_socket *sc, 610 - struct smbdirect_recv_io *recvmsg) 611 - { 612 - struct smbdirect_socket_parameters *sp = &sc->parameters; 613 - struct ib_recv_wr wr; 614 - int ret; 615 - 616 - recvmsg->sge.addr = ib_dma_map_single(sc->ib.dev, 617 - recvmsg->packet, 618 - sp->max_recv_size, 619 - DMA_FROM_DEVICE); 620 - ret = ib_dma_mapping_error(sc->ib.dev, recvmsg->sge.addr); 621 - if (ret) 622 - return ret; 623 - recvmsg->sge.length = sp->max_recv_size; 624 - recvmsg->sge.lkey = sc->ib.pd->local_dma_lkey; 625 - 626 - wr.wr_cqe = &recvmsg->cqe; 627 - wr.next = NULL; 628 - wr.sg_list = &recvmsg->sge; 629 - wr.num_sge = 1; 630 - 631 - ret = ib_post_recv(sc->ib.qp, &wr, NULL); 632 - if (ret) { 633 - pr_err("Can't post recv: %d\n", ret); 634 - ib_dma_unmap_single(sc->ib.dev, 635 - recvmsg->sge.addr, recvmsg->sge.length, 636 - DMA_FROM_DEVICE); 637 - recvmsg->sge.length = 0; 638 - smb_direct_disconnect_rdma_connection(sc); 639 - return ret; 640 - } 641 - return ret; 642 475 } 643 476 644 477 static int smb_direct_read(struct ksmbd_transport *t, char *buf, 645 478 unsigned int size, int unused) 646 479 { 647 - struct smbdirect_recv_io *recvmsg; 648 - struct smbdirect_data_transfer *data_transfer; 649 - int to_copy, to_read, data_read, offset; 650 - u32 data_length, remaining_data_length, data_offset; 651 - int rc; 652 480 struct smb_direct_transport *st = SMBD_TRANS(t); 653 - struct smbdirect_socket *sc = &st->socket; 654 - 655 - again: 656 - if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { 657 - pr_err("disconnected\n"); 658 - return -ENOTCONN; 659 - } 660 - 661 - /* 662 - * No need to hold the reassembly queue lock all the time as we are 663 - * the only one reading from the front of the queue. The transport 664 - * may add more entries to the back of the queue at the same time 665 - */ 666 - if (sc->recv_io.reassembly.data_length >= size) { 667 - int queue_length; 668 - int queue_removed = 0; 669 - unsigned long flags; 670 - 671 - /* 672 - * Need to make sure reassembly_data_length is read before 673 - * reading reassembly_queue_length and calling 674 - * get_first_reassembly. This call is lock free 675 - * as we never read at the end of the queue which are being 676 - * updated in SOFTIRQ as more data is received 677 - */ 678 - virt_rmb(); 679 - queue_length = sc->recv_io.reassembly.queue_length; 680 - data_read = 0; 681 - to_read = size; 682 - offset = sc->recv_io.reassembly.first_entry_offset; 683 - while (data_read < size) { 684 - recvmsg = get_first_reassembly(sc); 685 - data_transfer = smbdirect_recv_io_payload(recvmsg); 686 - data_length = le32_to_cpu(data_transfer->data_length); 687 - remaining_data_length = 688 - le32_to_cpu(data_transfer->remaining_data_length); 689 - data_offset = le32_to_cpu(data_transfer->data_offset); 690 - 691 - /* 692 - * The upper layer expects RFC1002 length at the 693 - * beginning of the payload. Return it to indicate 694 - * the total length of the packet. This minimize the 695 - * change to upper layer packet processing logic. This 696 - * will be eventually remove when an intermediate 697 - * transport layer is added 698 - */ 699 - if (recvmsg->first_segment && size == 4) { 700 - unsigned int rfc1002_len = 701 - data_length + remaining_data_length; 702 - *((__be32 *)buf) = cpu_to_be32(rfc1002_len); 703 - data_read = 4; 704 - recvmsg->first_segment = false; 705 - ksmbd_debug(RDMA, 706 - "returning rfc1002 length %d\n", 707 - rfc1002_len); 708 - goto read_rfc1002_done; 709 - } 710 - 711 - to_copy = min_t(int, data_length - offset, to_read); 712 - memcpy(buf + data_read, (char *)data_transfer + data_offset + offset, 713 - to_copy); 714 - 715 - /* move on to the next buffer? */ 716 - if (to_copy == data_length - offset) { 717 - queue_length--; 718 - /* 719 - * No need to lock if we are not at the 720 - * end of the queue 721 - */ 722 - if (queue_length) { 723 - list_del(&recvmsg->list); 724 - } else { 725 - spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); 726 - list_del(&recvmsg->list); 727 - spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); 728 - } 729 - queue_removed++; 730 - put_recvmsg(sc, recvmsg); 731 - offset = 0; 732 - } else { 733 - offset += to_copy; 734 - } 735 - 736 - to_read -= to_copy; 737 - data_read += to_copy; 738 - } 739 - 740 - spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); 741 - sc->recv_io.reassembly.data_length -= data_read; 742 - sc->recv_io.reassembly.queue_length -= queue_removed; 743 - spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); 744 - 745 - sc->recv_io.reassembly.first_entry_offset = offset; 746 - ksmbd_debug(RDMA, 747 - "returning to thread data_read=%d reassembly_data_length=%d first_entry_offset=%d\n", 748 - data_read, sc->recv_io.reassembly.data_length, 749 - sc->recv_io.reassembly.first_entry_offset); 750 - read_rfc1002_done: 751 - return data_read; 752 - } 753 - 754 - ksmbd_debug(RDMA, "wait_event on more data\n"); 755 - rc = wait_event_interruptible(sc->recv_io.reassembly.wait_queue, 756 - sc->recv_io.reassembly.data_length >= size || 757 - sc->status != SMBDIRECT_SOCKET_CONNECTED); 758 - if (rc) 759 - return -EINTR; 760 - 761 - goto again; 762 - } 763 - 764 - static void smb_direct_post_recv_credits(struct work_struct *work) 765 - { 766 - struct smbdirect_socket *sc = 767 - container_of(work, struct smbdirect_socket, recv_io.posted.refill_work); 768 - struct smbdirect_recv_io *recvmsg; 769 - int credits = 0; 481 + struct smbdirect_socket *sc = st->socket; 482 + struct msghdr msg = { .msg_flags = 0, }; 483 + struct kvec iov = { 484 + .iov_base = buf, 485 + .iov_len = size, 486 + }; 770 487 int ret; 771 488 772 - if (atomic_read(&sc->recv_io.credits.count) < sc->recv_io.credits.target) { 773 - while (true) { 774 - recvmsg = get_free_recvmsg(sc); 775 - if (!recvmsg) 776 - break; 777 - 778 - recvmsg->first_segment = false; 779 - 780 - ret = smb_direct_post_recv(sc, recvmsg); 781 - if (ret) { 782 - pr_err("Can't post recv: %d\n", ret); 783 - put_recvmsg(sc, recvmsg); 784 - break; 785 - } 786 - credits++; 787 - 788 - atomic_inc(&sc->recv_io.posted.count); 789 - } 790 - } 791 - 792 - atomic_add(credits, &sc->recv_io.credits.available); 793 - 794 - /* 795 - * If the last send credit is waiting for credits 796 - * it can grant we need to wake it up 797 - */ 798 - if (credits && 799 - atomic_read(&sc->send_io.bcredits.count) == 0 && 800 - atomic_read(&sc->send_io.credits.count) == 0) 801 - wake_up(&sc->send_io.credits.wait_queue); 802 - 803 - if (credits) 804 - queue_work(sc->workqueue, &sc->idle.immediate_work); 805 - } 806 - 807 - static void send_done(struct ib_cq *cq, struct ib_wc *wc) 808 - { 809 - struct smbdirect_send_io *sendmsg, *sibling, *next; 810 - struct smbdirect_socket *sc; 811 - int lcredits = 0; 812 - 813 - sendmsg = container_of(wc->wr_cqe, struct smbdirect_send_io, cqe); 814 - sc = sendmsg->socket; 815 - 816 - ksmbd_debug(RDMA, "Send completed. status='%s (%d)', opcode=%d\n", 817 - ib_wc_status_msg(wc->status), wc->status, 818 - wc->opcode); 819 - 820 - if (unlikely(!(sendmsg->wr.send_flags & IB_SEND_SIGNALED))) { 821 - /* 822 - * This happens when smbdirect_send_io is a sibling 823 - * before the final message, it is signaled on 824 - * error anyway, so we need to skip 825 - * smbdirect_connection_free_send_io here, 826 - * otherwise is will destroy the memory 827 - * of the siblings too, which will cause 828 - * use after free problems for the others 829 - * triggered from ib_drain_qp(). 830 - */ 831 - if (wc->status != IB_WC_SUCCESS) 832 - goto skip_free; 833 - 834 - /* 835 - * This should not happen! 836 - * But we better just close the 837 - * connection... 838 - */ 839 - pr_err("unexpected send completion wc->status=%s (%d) wc->opcode=%d\n", 840 - ib_wc_status_msg(wc->status), wc->status, wc->opcode); 841 - smb_direct_disconnect_rdma_connection(sc); 842 - return; 843 - } 844 - 845 - /* 846 - * Free possible siblings and then the main send_io 847 - */ 848 - list_for_each_entry_safe(sibling, next, &sendmsg->sibling_list, sibling_list) { 849 - list_del_init(&sibling->sibling_list); 850 - smb_direct_free_sendmsg(sc, sibling); 851 - lcredits += 1; 852 - } 853 - /* Note this frees wc->wr_cqe, but not wc */ 854 - smb_direct_free_sendmsg(sc, sendmsg); 855 - lcredits += 1; 856 - 857 - if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) { 858 - skip_free: 859 - pr_err("Send error. status='%s (%d)', opcode=%d\n", 860 - ib_wc_status_msg(wc->status), wc->status, 861 - wc->opcode); 862 - smb_direct_disconnect_rdma_connection(sc); 863 - return; 864 - } 865 - 866 - atomic_add(lcredits, &sc->send_io.lcredits.count); 867 - wake_up(&sc->send_io.lcredits.wait_queue); 868 - 869 - if (atomic_dec_and_test(&sc->send_io.pending.count)) 870 - wake_up(&sc->send_io.pending.zero_wait_queue); 871 - } 872 - 873 - static int manage_credits_prior_sending(struct smbdirect_socket *sc) 874 - { 875 - int missing; 876 - int available; 877 - int new_credits; 878 - 879 - if (atomic_read(&sc->recv_io.credits.count) >= sc->recv_io.credits.target) 880 - return 0; 881 - 882 - missing = (int)sc->recv_io.credits.target - atomic_read(&sc->recv_io.credits.count); 883 - available = atomic_xchg(&sc->recv_io.credits.available, 0); 884 - new_credits = (u16)min3(U16_MAX, missing, available); 885 - if (new_credits <= 0) { 886 - /* 887 - * If credits are available, but not granted 888 - * we need to re-add them again. 889 - */ 890 - if (available) 891 - atomic_add(available, &sc->recv_io.credits.available); 892 - return 0; 893 - } 894 - 895 - if (new_credits < available) { 896 - /* 897 - * Readd the remaining available again. 898 - */ 899 - available -= new_credits; 900 - atomic_add(available, &sc->recv_io.credits.available); 901 - } 902 - 903 - /* 904 - * Remember we granted the credits 905 - */ 906 - atomic_add(new_credits, &sc->recv_io.credits.count); 907 - return new_credits; 908 - } 909 - 910 - static int manage_keep_alive_before_sending(struct smbdirect_socket *sc) 911 - { 912 - struct smbdirect_socket_parameters *sp = &sc->parameters; 913 - 914 - if (sc->idle.keepalive == SMBDIRECT_KEEPALIVE_PENDING) { 915 - sc->idle.keepalive = SMBDIRECT_KEEPALIVE_SENT; 916 - /* 917 - * Now use the keepalive timeout (instead of keepalive interval) 918 - * in order to wait for a response 919 - */ 920 - mod_delayed_work(sc->workqueue, &sc->idle.timer_work, 921 - msecs_to_jiffies(sp->keepalive_timeout_msec)); 922 - return 1; 923 - } 924 - return 0; 925 - } 926 - 927 - static int smb_direct_post_send(struct smbdirect_socket *sc, 928 - struct ib_send_wr *wr) 929 - { 930 - int ret; 931 - 932 - atomic_inc(&sc->send_io.pending.count); 933 - ret = ib_post_send(sc->ib.qp, wr, NULL); 934 - if (ret) { 935 - pr_err("failed to post send: %d\n", ret); 936 - smb_direct_disconnect_rdma_connection(sc); 937 - } 938 - return ret; 939 - } 940 - 941 - static void smb_direct_send_ctx_init(struct smbdirect_send_batch *send_ctx, 942 - bool need_invalidate_rkey, 943 - unsigned int remote_key) 944 - { 945 - INIT_LIST_HEAD(&send_ctx->msg_list); 946 - send_ctx->wr_cnt = 0; 947 - send_ctx->need_invalidate_rkey = need_invalidate_rkey; 948 - send_ctx->remote_key = remote_key; 949 - send_ctx->credit = 0; 950 - } 951 - 952 - static int smb_direct_flush_send_list(struct smbdirect_socket *sc, 953 - struct smbdirect_send_batch *send_ctx, 954 - bool is_last) 955 - { 956 - struct smbdirect_send_io *first, *last; 957 - int ret = 0; 958 - 959 - if (list_empty(&send_ctx->msg_list)) 960 - goto release_credit; 961 - 962 - first = list_first_entry(&send_ctx->msg_list, 963 - struct smbdirect_send_io, 964 - sibling_list); 965 - last = list_last_entry(&send_ctx->msg_list, 966 - struct smbdirect_send_io, 967 - sibling_list); 968 - 969 - if (send_ctx->need_invalidate_rkey) { 970 - first->wr.opcode = IB_WR_SEND_WITH_INV; 971 - first->wr.ex.invalidate_rkey = send_ctx->remote_key; 972 - send_ctx->need_invalidate_rkey = false; 973 - send_ctx->remote_key = 0; 974 - } 975 - 976 - last->wr.send_flags = IB_SEND_SIGNALED; 977 - last->wr.wr_cqe = &last->cqe; 978 - 979 - /* 980 - * Remove last from send_ctx->msg_list 981 - * and splice the rest of send_ctx->msg_list 982 - * to last->sibling_list. 983 - * 984 - * send_ctx->msg_list is a valid empty list 985 - * at the end. 986 - */ 987 - list_del_init(&last->sibling_list); 988 - list_splice_tail_init(&send_ctx->msg_list, &last->sibling_list); 989 - send_ctx->wr_cnt = 0; 990 - 991 - ret = smb_direct_post_send(sc, &first->wr); 992 - if (ret) { 993 - struct smbdirect_send_io *sibling, *next; 994 - 995 - list_for_each_entry_safe(sibling, next, &last->sibling_list, sibling_list) { 996 - list_del_init(&sibling->sibling_list); 997 - smb_direct_free_sendmsg(sc, sibling); 998 - } 999 - smb_direct_free_sendmsg(sc, last); 1000 - } 1001 - 1002 - release_credit: 1003 - if (is_last && !ret && send_ctx->credit) { 1004 - atomic_add(send_ctx->credit, &sc->send_io.bcredits.count); 1005 - send_ctx->credit = 0; 1006 - wake_up(&sc->send_io.bcredits.wait_queue); 1007 - } 1008 - 1009 - return ret; 1010 - } 1011 - 1012 - static int wait_for_credits(struct smbdirect_socket *sc, 1013 - wait_queue_head_t *waitq, atomic_t *total_credits, 1014 - int needed) 1015 - { 1016 - int ret; 1017 - 1018 - do { 1019 - if (atomic_sub_return(needed, total_credits) >= 0) 1020 - return 0; 1021 - 1022 - atomic_add(needed, total_credits); 1023 - ret = wait_event_interruptible(*waitq, 1024 - atomic_read(total_credits) >= needed || 1025 - sc->status != SMBDIRECT_SOCKET_CONNECTED); 1026 - 1027 - if (sc->status != SMBDIRECT_SOCKET_CONNECTED) 1028 - return -ENOTCONN; 1029 - else if (ret < 0) 1030 - return ret; 1031 - } while (true); 1032 - } 1033 - 1034 - static int wait_for_send_bcredit(struct smbdirect_socket *sc, 1035 - struct smbdirect_send_batch *send_ctx) 1036 - { 1037 - int ret; 1038 - 1039 - if (send_ctx->credit) 1040 - return 0; 1041 - 1042 - ret = wait_for_credits(sc, 1043 - &sc->send_io.bcredits.wait_queue, 1044 - &sc->send_io.bcredits.count, 1045 - 1); 1046 - if (ret) 1047 - return ret; 1048 - 1049 - send_ctx->credit = 1; 1050 - return 0; 1051 - } 1052 - 1053 - static int wait_for_send_lcredit(struct smbdirect_socket *sc, 1054 - struct smbdirect_send_batch *send_ctx) 1055 - { 1056 - if (send_ctx && (atomic_read(&sc->send_io.lcredits.count) <= 1)) { 1057 - int ret; 1058 - 1059 - ret = smb_direct_flush_send_list(sc, send_ctx, false); 1060 - if (ret) 1061 - return ret; 1062 - } 1063 - 1064 - return wait_for_credits(sc, 1065 - &sc->send_io.lcredits.wait_queue, 1066 - &sc->send_io.lcredits.count, 1067 - 1); 1068 - } 1069 - 1070 - static int wait_for_send_credits(struct smbdirect_socket *sc, 1071 - struct smbdirect_send_batch *send_ctx) 1072 - { 1073 - int ret; 1074 - 1075 - if (send_ctx && 1076 - (send_ctx->wr_cnt >= 16 || atomic_read(&sc->send_io.credits.count) <= 1)) { 1077 - ret = smb_direct_flush_send_list(sc, send_ctx, false); 1078 - if (ret) 1079 - return ret; 1080 - } 1081 - 1082 - return wait_for_credits(sc, &sc->send_io.credits.wait_queue, &sc->send_io.credits.count, 1); 1083 - } 1084 - 1085 - static int wait_for_rw_credits(struct smbdirect_socket *sc, int credits) 1086 - { 1087 - return wait_for_credits(sc, 1088 - &sc->rw_io.credits.wait_queue, 1089 - &sc->rw_io.credits.count, 1090 - credits); 1091 - } 1092 - 1093 - static int calc_rw_credits(struct smbdirect_socket *sc, 1094 - char *buf, unsigned int len) 1095 - { 1096 - return DIV_ROUND_UP(get_buf_page_count(buf, len), 1097 - sc->rw_io.credits.num_pages); 1098 - } 1099 - 1100 - static int smb_direct_create_header(struct smbdirect_socket *sc, 1101 - int size, int remaining_data_length, 1102 - int new_credits, 1103 - struct smbdirect_send_io **sendmsg_out) 1104 - { 1105 - struct smbdirect_socket_parameters *sp = &sc->parameters; 1106 - struct smbdirect_send_io *sendmsg; 1107 - struct smbdirect_data_transfer *packet; 1108 - int header_length; 1109 - int ret; 1110 - 1111 - sendmsg = smb_direct_alloc_sendmsg(sc); 1112 - if (IS_ERR(sendmsg)) 1113 - return PTR_ERR(sendmsg); 1114 - 1115 - /* Fill in the packet header */ 1116 - packet = (struct smbdirect_data_transfer *)sendmsg->packet; 1117 - packet->credits_requested = cpu_to_le16(sp->send_credit_target); 1118 - packet->credits_granted = cpu_to_le16(new_credits); 1119 - 1120 - packet->flags = 0; 1121 - if (manage_keep_alive_before_sending(sc)) 1122 - packet->flags |= cpu_to_le16(SMBDIRECT_FLAG_RESPONSE_REQUESTED); 1123 - 1124 - packet->reserved = 0; 1125 - if (!size) 1126 - packet->data_offset = 0; 1127 - else 1128 - packet->data_offset = cpu_to_le32(24); 1129 - packet->data_length = cpu_to_le32(size); 1130 - packet->remaining_data_length = cpu_to_le32(remaining_data_length); 1131 - packet->padding = 0; 1132 - 1133 - ksmbd_debug(RDMA, 1134 - "credits_requested=%d credits_granted=%d data_offset=%d data_length=%d remaining_data_length=%d\n", 1135 - le16_to_cpu(packet->credits_requested), 1136 - le16_to_cpu(packet->credits_granted), 1137 - le32_to_cpu(packet->data_offset), 1138 - le32_to_cpu(packet->data_length), 1139 - le32_to_cpu(packet->remaining_data_length)); 1140 - 1141 - /* Map the packet to DMA */ 1142 - header_length = sizeof(struct smbdirect_data_transfer); 1143 - /* If this is a packet without payload, don't send padding */ 1144 - if (!size) 1145 - header_length = 1146 - offsetof(struct smbdirect_data_transfer, padding); 1147 - 1148 - sendmsg->sge[0].addr = ib_dma_map_single(sc->ib.dev, 1149 - (void *)packet, 1150 - header_length, 1151 - DMA_TO_DEVICE); 1152 - ret = ib_dma_mapping_error(sc->ib.dev, sendmsg->sge[0].addr); 1153 - if (ret) { 1154 - smb_direct_free_sendmsg(sc, sendmsg); 1155 - return ret; 1156 - } 1157 - 1158 - sendmsg->num_sge = 1; 1159 - sendmsg->sge[0].length = header_length; 1160 - sendmsg->sge[0].lkey = sc->ib.pd->local_dma_lkey; 1161 - 1162 - *sendmsg_out = sendmsg; 1163 - return 0; 1164 - } 1165 - 1166 - static int get_sg_list(void *buf, int size, struct scatterlist *sg_list, int nentries) 1167 - { 1168 - bool high = is_vmalloc_addr(buf); 1169 - struct page *page; 1170 - int offset, len; 1171 - int i = 0; 1172 - 1173 - if (size <= 0 || nentries < get_buf_page_count(buf, size)) 1174 - return -EINVAL; 1175 - 1176 - offset = offset_in_page(buf); 1177 - buf -= offset; 1178 - while (size > 0) { 1179 - len = min_t(int, PAGE_SIZE - offset, size); 1180 - if (high) 1181 - page = vmalloc_to_page(buf); 1182 - else 1183 - page = kmap_to_page(buf); 1184 - 1185 - if (!sg_list) 1186 - return -EINVAL; 1187 - sg_set_page(sg_list, page, len, offset); 1188 - sg_list = sg_next(sg_list); 1189 - 1190 - buf += PAGE_SIZE; 1191 - size -= len; 1192 - offset = 0; 1193 - i++; 1194 - } 1195 - return i; 1196 - } 1197 - 1198 - static int get_mapped_sg_list(struct ib_device *device, void *buf, int size, 1199 - struct scatterlist *sg_list, int nentries, 1200 - enum dma_data_direction dir, int *npages) 1201 - { 1202 - *npages = get_sg_list(buf, size, sg_list, nentries); 1203 - if (*npages < 0) 1204 - return -EINVAL; 1205 - return ib_dma_map_sg(device, sg_list, *npages, dir); 1206 - } 1207 - 1208 - static int post_sendmsg(struct smbdirect_socket *sc, 1209 - struct smbdirect_send_batch *send_ctx, 1210 - struct smbdirect_send_io *msg) 1211 - { 1212 - int i; 1213 - 1214 - for (i = 0; i < msg->num_sge; i++) 1215 - ib_dma_sync_single_for_device(sc->ib.dev, 1216 - msg->sge[i].addr, msg->sge[i].length, 1217 - DMA_TO_DEVICE); 1218 - 1219 - msg->cqe.done = send_done; 1220 - msg->wr.opcode = IB_WR_SEND; 1221 - msg->wr.sg_list = &msg->sge[0]; 1222 - msg->wr.num_sge = msg->num_sge; 1223 - msg->wr.next = NULL; 1224 - 1225 - if (send_ctx) { 1226 - msg->wr.wr_cqe = NULL; 1227 - msg->wr.send_flags = 0; 1228 - if (!list_empty(&send_ctx->msg_list)) { 1229 - struct smbdirect_send_io *last; 1230 - 1231 - last = list_last_entry(&send_ctx->msg_list, 1232 - struct smbdirect_send_io, 1233 - sibling_list); 1234 - last->wr.next = &msg->wr; 1235 - } 1236 - list_add_tail(&msg->sibling_list, &send_ctx->msg_list); 1237 - send_ctx->wr_cnt++; 1238 - return 0; 1239 - } 1240 - 1241 - msg->wr.wr_cqe = &msg->cqe; 1242 - msg->wr.send_flags = IB_SEND_SIGNALED; 1243 - return smb_direct_post_send(sc, &msg->wr); 1244 - } 1245 - 1246 - static int smb_direct_post_send_data(struct smbdirect_socket *sc, 1247 - struct smbdirect_send_batch *send_ctx, 1248 - struct kvec *iov, int niov, 1249 - int remaining_data_length) 1250 - { 1251 - int i, j, ret; 1252 - struct smbdirect_send_io *msg; 1253 - int data_length; 1254 - struct scatterlist sg[SMBDIRECT_SEND_IO_MAX_SGE - 1]; 1255 - struct smbdirect_send_batch _send_ctx; 1256 - int new_credits; 1257 - 1258 - if (!send_ctx) { 1259 - smb_direct_send_ctx_init(&_send_ctx, false, 0); 1260 - send_ctx = &_send_ctx; 1261 - } 1262 - 1263 - ret = wait_for_send_bcredit(sc, send_ctx); 1264 - if (ret) 1265 - goto bcredit_failed; 1266 - 1267 - ret = wait_for_send_lcredit(sc, send_ctx); 1268 - if (ret) 1269 - goto lcredit_failed; 1270 - 1271 - ret = wait_for_send_credits(sc, send_ctx); 1272 - if (ret) 1273 - goto credit_failed; 1274 - 1275 - new_credits = manage_credits_prior_sending(sc); 1276 - if (new_credits == 0 && 1277 - atomic_read(&sc->send_io.credits.count) == 0 && 1278 - atomic_read(&sc->recv_io.credits.count) == 0) { 1279 - queue_work(sc->workqueue, &sc->recv_io.posted.refill_work); 1280 - ret = wait_event_interruptible(sc->send_io.credits.wait_queue, 1281 - atomic_read(&sc->send_io.credits.count) >= 1 || 1282 - atomic_read(&sc->recv_io.credits.available) >= 1 || 1283 - sc->status != SMBDIRECT_SOCKET_CONNECTED); 1284 - if (sc->status != SMBDIRECT_SOCKET_CONNECTED) 1285 - ret = -ENOTCONN; 1286 - if (ret < 0) 1287 - goto credit_failed; 1288 - 1289 - new_credits = manage_credits_prior_sending(sc); 1290 - } 1291 - 1292 - data_length = 0; 1293 - for (i = 0; i < niov; i++) 1294 - data_length += iov[i].iov_len; 1295 - 1296 - ret = smb_direct_create_header(sc, data_length, remaining_data_length, 1297 - new_credits, &msg); 1298 - if (ret) 1299 - goto header_failed; 1300 - 1301 - for (i = 0; i < niov; i++) { 1302 - struct ib_sge *sge; 1303 - int sg_cnt; 1304 - int npages; 1305 - 1306 - sg_init_table(sg, SMBDIRECT_SEND_IO_MAX_SGE - 1); 1307 - sg_cnt = get_mapped_sg_list(sc->ib.dev, 1308 - iov[i].iov_base, iov[i].iov_len, 1309 - sg, SMBDIRECT_SEND_IO_MAX_SGE - 1, 1310 - DMA_TO_DEVICE, &npages); 1311 - if (sg_cnt <= 0) { 1312 - pr_err("failed to map buffer\n"); 1313 - ret = -ENOMEM; 1314 - goto err; 1315 - } else if (sg_cnt + msg->num_sge > SMBDIRECT_SEND_IO_MAX_SGE) { 1316 - pr_err("buffer not fitted into sges\n"); 1317 - ret = -E2BIG; 1318 - ib_dma_unmap_sg(sc->ib.dev, sg, npages, 1319 - DMA_TO_DEVICE); 1320 - goto err; 1321 - } 1322 - 1323 - for (j = 0; j < sg_cnt; j++) { 1324 - sge = &msg->sge[msg->num_sge]; 1325 - sge->addr = sg_dma_address(&sg[j]); 1326 - sge->length = sg_dma_len(&sg[j]); 1327 - sge->lkey = sc->ib.pd->local_dma_lkey; 1328 - msg->num_sge++; 1329 - } 1330 - } 1331 - 1332 - ret = post_sendmsg(sc, send_ctx, msg); 1333 - if (ret) 1334 - goto err; 1335 - 1336 - /* 1337 - * From here msg is moved to send_ctx 1338 - * and we should not free it explicitly. 1339 - */ 1340 - 1341 - if (send_ctx == &_send_ctx) { 1342 - ret = smb_direct_flush_send_list(sc, send_ctx, true); 1343 - if (ret) 1344 - goto flush_failed; 1345 - } 1346 - 1347 - return 0; 1348 - err: 1349 - smb_direct_free_sendmsg(sc, msg); 1350 - flush_failed: 1351 - header_failed: 1352 - atomic_inc(&sc->send_io.credits.count); 1353 - credit_failed: 1354 - atomic_inc(&sc->send_io.lcredits.count); 1355 - lcredit_failed: 1356 - atomic_add(send_ctx->credit, &sc->send_io.bcredits.count); 1357 - send_ctx->credit = 0; 1358 - bcredit_failed: 489 + iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, size); 490 + 491 + ret = smbdirect_connection_recvmsg(sc, &msg, 0); 492 + if (ret == -ERESTARTSYS) 493 + ret = -EINTR; 1359 494 return ret; 1360 495 } 1361 496 ··· 244 1619 bool need_invalidate, unsigned int remote_key) 245 1620 { 246 1621 struct smb_direct_transport *st = SMBD_TRANS(t); 247 - struct smbdirect_socket *sc = &st->socket; 248 - struct smbdirect_socket_parameters *sp = &sc->parameters; 249 - size_t remaining_data_length; 250 - size_t iov_idx; 251 - size_t iov_ofs; 252 - size_t max_iov_size = sp->max_send_size - 253 - sizeof(struct smbdirect_data_transfer); 254 - int ret; 255 - struct smbdirect_send_batch send_ctx; 256 - int error = 0; 1622 + struct smbdirect_socket *sc = st->socket; 1623 + struct iov_iter iter; 257 1624 258 - if (sc->status != SMBDIRECT_SOCKET_CONNECTED) 259 - return -ENOTCONN; 1625 + iov_iter_kvec(&iter, ITER_SOURCE, iov, niovs, buflen); 260 1626 261 - //FIXME: skip RFC1002 header.. 262 - if (WARN_ON_ONCE(niovs <= 1 || iov[0].iov_len != 4)) 263 - return -EINVAL; 264 - buflen -= 4; 265 - iov_idx = 1; 266 - iov_ofs = 0; 267 - 268 - remaining_data_length = buflen; 269 - ksmbd_debug(RDMA, "Sending smb (RDMA): smb_len=%u\n", buflen); 270 - 271 - smb_direct_send_ctx_init(&send_ctx, need_invalidate, remote_key); 272 - while (remaining_data_length) { 273 - struct kvec vecs[SMBDIRECT_SEND_IO_MAX_SGE - 1]; /* minus smbdirect hdr */ 274 - size_t possible_bytes = max_iov_size; 275 - size_t possible_vecs; 276 - size_t bytes = 0; 277 - size_t nvecs = 0; 278 - 279 - /* 280 - * For the last message remaining_data_length should be 281 - * have been 0 already! 282 - */ 283 - if (WARN_ON_ONCE(iov_idx >= niovs)) { 284 - error = -EINVAL; 285 - goto done; 286 - } 287 - 288 - /* 289 - * We have 2 factors which limit the arguments we pass 290 - * to smb_direct_post_send_data(): 291 - * 292 - * 1. The number of supported sges for the send, 293 - * while one is reserved for the smbdirect header. 294 - * And we currently need one SGE per page. 295 - * 2. The number of negotiated payload bytes per send. 296 - */ 297 - possible_vecs = min_t(size_t, ARRAY_SIZE(vecs), niovs - iov_idx); 298 - 299 - while (iov_idx < niovs && possible_vecs && possible_bytes) { 300 - struct kvec *v = &vecs[nvecs]; 301 - int page_count; 302 - 303 - v->iov_base = ((u8 *)iov[iov_idx].iov_base) + iov_ofs; 304 - v->iov_len = min_t(size_t, 305 - iov[iov_idx].iov_len - iov_ofs, 306 - possible_bytes); 307 - page_count = get_buf_page_count(v->iov_base, v->iov_len); 308 - if (page_count > possible_vecs) { 309 - /* 310 - * If the number of pages in the buffer 311 - * is to much (because we currently require 312 - * one SGE per page), we need to limit the 313 - * length. 314 - * 315 - * We know possible_vecs is at least 1, 316 - * so we always keep the first page. 317 - * 318 - * We need to calculate the number extra 319 - * pages (epages) we can also keep. 320 - * 321 - * We calculate the number of bytes in the 322 - * first page (fplen), this should never be 323 - * larger than v->iov_len because page_count is 324 - * at least 2, but adding a limitation feels 325 - * better. 326 - * 327 - * Then we calculate the number of bytes (elen) 328 - * we can keep for the extra pages. 329 - */ 330 - size_t epages = possible_vecs - 1; 331 - size_t fpofs = offset_in_page(v->iov_base); 332 - size_t fplen = min_t(size_t, PAGE_SIZE - fpofs, v->iov_len); 333 - size_t elen = min_t(size_t, v->iov_len - fplen, epages*PAGE_SIZE); 334 - 335 - v->iov_len = fplen + elen; 336 - page_count = get_buf_page_count(v->iov_base, v->iov_len); 337 - if (WARN_ON_ONCE(page_count > possible_vecs)) { 338 - /* 339 - * Something went wrong in the above 340 - * logic... 341 - */ 342 - error = -EINVAL; 343 - goto done; 344 - } 345 - } 346 - possible_vecs -= page_count; 347 - nvecs += 1; 348 - possible_bytes -= v->iov_len; 349 - bytes += v->iov_len; 350 - 351 - iov_ofs += v->iov_len; 352 - if (iov_ofs >= iov[iov_idx].iov_len) { 353 - iov_idx += 1; 354 - iov_ofs = 0; 355 - } 356 - } 357 - 358 - remaining_data_length -= bytes; 359 - 360 - ret = smb_direct_post_send_data(sc, &send_ctx, 361 - vecs, nvecs, 362 - remaining_data_length); 363 - if (unlikely(ret)) { 364 - error = ret; 365 - goto done; 366 - } 367 - } 368 - 369 - done: 370 - ret = smb_direct_flush_send_list(sc, &send_ctx, true); 371 - if (unlikely(!ret && error)) 372 - ret = error; 373 - 374 - /* 375 - * As an optimization, we don't wait for individual I/O to finish 376 - * before sending the next one. 377 - * Send them all and wait for pending send count to get to 0 378 - * that means all the I/Os have been out and we are good to return 379 - */ 380 - 381 - wait_event(sc->send_io.pending.zero_wait_queue, 382 - atomic_read(&sc->send_io.pending.count) == 0 || 383 - sc->status != SMBDIRECT_SOCKET_CONNECTED); 384 - if (sc->status != SMBDIRECT_SOCKET_CONNECTED && ret == 0) 385 - ret = -ENOTCONN; 386 - 387 - return ret; 388 - } 389 - 390 - static void smb_direct_free_rdma_rw_msg(struct smb_direct_transport *t, 391 - struct smbdirect_rw_io *msg, 392 - enum dma_data_direction dir) 393 - { 394 - struct smbdirect_socket *sc = &t->socket; 395 - 396 - rdma_rw_ctx_destroy(&msg->rdma_ctx, sc->ib.qp, sc->ib.qp->port, 397 - msg->sgt.sgl, msg->sgt.nents, dir); 398 - sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); 399 - kfree(msg); 400 - } 401 - 402 - static void read_write_done(struct ib_cq *cq, struct ib_wc *wc, 403 - enum dma_data_direction dir) 404 - { 405 - struct smbdirect_rw_io *msg = 406 - container_of(wc->wr_cqe, struct smbdirect_rw_io, cqe); 407 - struct smbdirect_socket *sc = msg->socket; 408 - 409 - if (wc->status != IB_WC_SUCCESS) { 410 - msg->error = -EIO; 411 - pr_err("read/write error. opcode = %d, status = %s(%d)\n", 412 - wc->opcode, ib_wc_status_msg(wc->status), wc->status); 413 - if (wc->status != IB_WC_WR_FLUSH_ERR) 414 - smb_direct_disconnect_rdma_connection(sc); 415 - } 416 - 417 - complete(msg->completion); 418 - } 419 - 420 - static void read_done(struct ib_cq *cq, struct ib_wc *wc) 421 - { 422 - read_write_done(cq, wc, DMA_FROM_DEVICE); 423 - } 424 - 425 - static void write_done(struct ib_cq *cq, struct ib_wc *wc) 426 - { 427 - read_write_done(cq, wc, DMA_TO_DEVICE); 428 - } 429 - 430 - static int smb_direct_rdma_xmit(struct smb_direct_transport *t, 431 - void *buf, int buf_len, 432 - struct smbdirect_buffer_descriptor_v1 *desc, 433 - unsigned int desc_len, 434 - bool is_read) 435 - { 436 - struct smbdirect_socket *sc = &t->socket; 437 - struct smbdirect_socket_parameters *sp = &sc->parameters; 438 - struct smbdirect_rw_io *msg, *next_msg; 439 - int i, ret; 440 - DECLARE_COMPLETION_ONSTACK(completion); 441 - struct ib_send_wr *first_wr; 442 - LIST_HEAD(msg_list); 443 - char *desc_buf; 444 - int credits_needed; 445 - unsigned int desc_buf_len, desc_num = 0; 446 - 447 - if (sc->status != SMBDIRECT_SOCKET_CONNECTED) 448 - return -ENOTCONN; 449 - 450 - if (buf_len > sp->max_read_write_size) 451 - return -EINVAL; 452 - 453 - /* calculate needed credits */ 454 - credits_needed = 0; 455 - desc_buf = buf; 456 - for (i = 0; i < desc_len / sizeof(*desc); i++) { 457 - if (!buf_len) 458 - break; 459 - 460 - desc_buf_len = le32_to_cpu(desc[i].length); 461 - if (!desc_buf_len) 462 - return -EINVAL; 463 - 464 - if (desc_buf_len > buf_len) { 465 - desc_buf_len = buf_len; 466 - desc[i].length = cpu_to_le32(desc_buf_len); 467 - buf_len = 0; 468 - } 469 - 470 - credits_needed += calc_rw_credits(sc, desc_buf, desc_buf_len); 471 - desc_buf += desc_buf_len; 472 - buf_len -= desc_buf_len; 473 - desc_num++; 474 - } 475 - 476 - ksmbd_debug(RDMA, "RDMA %s, len %#x, needed credits %#x\n", 477 - str_read_write(is_read), buf_len, credits_needed); 478 - 479 - ret = wait_for_rw_credits(sc, credits_needed); 480 - if (ret < 0) 481 - return ret; 482 - 483 - /* build rdma_rw_ctx for each descriptor */ 484 - desc_buf = buf; 485 - for (i = 0; i < desc_num; i++) { 486 - msg = kzalloc_flex(*msg, sg_list, SG_CHUNK_SIZE, 487 - KSMBD_DEFAULT_GFP); 488 - if (!msg) { 489 - ret = -ENOMEM; 490 - goto out; 491 - } 492 - 493 - desc_buf_len = le32_to_cpu(desc[i].length); 494 - 495 - msg->socket = sc; 496 - msg->cqe.done = is_read ? read_done : write_done; 497 - msg->completion = &completion; 498 - 499 - msg->sgt.sgl = &msg->sg_list[0]; 500 - ret = sg_alloc_table_chained(&msg->sgt, 501 - get_buf_page_count(desc_buf, desc_buf_len), 502 - msg->sg_list, SG_CHUNK_SIZE); 503 - if (ret) { 504 - ret = -ENOMEM; 505 - goto free_msg; 506 - } 507 - 508 - ret = get_sg_list(desc_buf, desc_buf_len, 509 - msg->sgt.sgl, msg->sgt.orig_nents); 510 - if (ret < 0) 511 - goto free_table; 512 - 513 - ret = rdma_rw_ctx_init(&msg->rdma_ctx, sc->ib.qp, sc->ib.qp->port, 514 - msg->sgt.sgl, 515 - get_buf_page_count(desc_buf, desc_buf_len), 516 - 0, 517 - le64_to_cpu(desc[i].offset), 518 - le32_to_cpu(desc[i].token), 519 - is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE); 520 - if (ret < 0) { 521 - pr_err("failed to init rdma_rw_ctx: %d\n", ret); 522 - goto free_table; 523 - } 524 - 525 - list_add_tail(&msg->list, &msg_list); 526 - desc_buf += desc_buf_len; 527 - } 528 - 529 - /* concatenate work requests of rdma_rw_ctxs */ 530 - first_wr = NULL; 531 - list_for_each_entry_reverse(msg, &msg_list, list) { 532 - first_wr = rdma_rw_ctx_wrs(&msg->rdma_ctx, sc->ib.qp, sc->ib.qp->port, 533 - &msg->cqe, first_wr); 534 - } 535 - 536 - ret = ib_post_send(sc->ib.qp, first_wr, NULL); 537 - if (ret) { 538 - pr_err("failed to post send wr for RDMA R/W: %d\n", ret); 539 - goto out; 540 - } 541 - 542 - msg = list_last_entry(&msg_list, struct smbdirect_rw_io, list); 543 - wait_for_completion(&completion); 544 - ret = msg->error; 545 - out: 546 - list_for_each_entry_safe(msg, next_msg, &msg_list, list) { 547 - list_del(&msg->list); 548 - smb_direct_free_rdma_rw_msg(t, msg, 549 - is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE); 550 - } 551 - atomic_add(credits_needed, &sc->rw_io.credits.count); 552 - wake_up(&sc->rw_io.credits.wait_queue); 553 - return ret; 554 - 555 - free_table: 556 - sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); 557 - free_msg: 558 - kfree(msg); 559 - goto out; 1627 + return smbdirect_connection_send_iter(sc, &iter, 0, 1628 + need_invalidate, remote_key); 560 1629 } 561 1630 562 1631 static int smb_direct_rdma_write(struct ksmbd_transport *t, ··· 258 1939 struct smbdirect_buffer_descriptor_v1 *desc, 259 1940 unsigned int desc_len) 260 1941 { 261 - return smb_direct_rdma_xmit(SMBD_TRANS(t), buf, buflen, 262 - desc, desc_len, false); 1942 + struct smb_direct_transport *st = SMBD_TRANS(t); 1943 + struct smbdirect_socket *sc = st->socket; 1944 + 1945 + return smbdirect_connection_rdma_xmit(sc, buf, buflen, 1946 + desc, desc_len, false); 263 1947 } 264 1948 265 1949 static int smb_direct_rdma_read(struct ksmbd_transport *t, ··· 270 1948 struct smbdirect_buffer_descriptor_v1 *desc, 271 1949 unsigned int desc_len) 272 1950 { 273 - return smb_direct_rdma_xmit(SMBD_TRANS(t), buf, buflen, 274 - desc, desc_len, true); 1951 + struct smb_direct_transport *st = SMBD_TRANS(t); 1952 + struct smbdirect_socket *sc = st->socket; 1953 + 1954 + return smbdirect_connection_rdma_xmit(sc, buf, buflen, 1955 + desc, desc_len, true); 275 1956 } 276 1957 277 1958 static void smb_direct_disconnect(struct ksmbd_transport *t) 278 1959 { 279 1960 struct smb_direct_transport *st = SMBD_TRANS(t); 280 - struct smbdirect_socket *sc = &st->socket; 1961 + struct smbdirect_socket *sc = st->socket; 281 1962 282 - ksmbd_debug(RDMA, "Disconnecting cm_id=%p\n", sc->rdma.cm_id); 1963 + ksmbd_debug(RDMA, "Disconnecting sc=%p\n", sc); 283 1964 284 1965 free_transport(st); 285 1966 } ··· 290 1965 static void smb_direct_shutdown(struct ksmbd_transport *t) 291 1966 { 292 1967 struct smb_direct_transport *st = SMBD_TRANS(t); 293 - struct smbdirect_socket *sc = &st->socket; 1968 + struct smbdirect_socket *sc = st->socket; 294 1969 295 - ksmbd_debug(RDMA, "smb-direct shutdown cm_id=%p\n", sc->rdma.cm_id); 1970 + ksmbd_debug(RDMA, "smb-direct shutdown sc=%p\n", sc); 296 1971 297 - smb_direct_disconnect_rdma_work(&sc->disconnect_work); 1972 + smbdirect_socket_shutdown(sc); 298 1973 } 299 1974 300 - static int smb_direct_cm_handler(struct rdma_cm_id *cm_id, 301 - struct rdma_cm_event *event) 1975 + static int smb_direct_new_connection(struct smb_direct_listener *listener, 1976 + struct smbdirect_socket *client_sc) 302 1977 { 303 - struct smbdirect_socket *sc = cm_id->context; 304 - unsigned long flags; 305 - 306 - ksmbd_debug(RDMA, "RDMA CM event. cm_id=%p event=%s (%d)\n", 307 - cm_id, rdma_event_msg(event->event), event->event); 308 - 309 - switch (event->event) { 310 - case RDMA_CM_EVENT_ESTABLISHED: { 311 - /* 312 - * Some drivers (at least mlx5_ib and irdma in roce mode) 313 - * might post a recv completion before RDMA_CM_EVENT_ESTABLISHED, 314 - * we need to adjust our expectation in that case. 315 - * 316 - * If smb_direct_negotiate_recv_done was called first 317 - * it initialized sc->connect.work only for us to 318 - * start, so that we turned into 319 - * SMBDIRECT_SOCKET_NEGOTIATE_NEEDED, before 320 - * smb_direct_negotiate_recv_work() runs. 321 - * 322 - * If smb_direct_negotiate_recv_done didn't happen 323 - * yet. sc->connect.work is still be disabled and 324 - * queue_work() is a no-op. 325 - */ 326 - if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING)) 327 - break; 328 - sc->status = SMBDIRECT_SOCKET_NEGOTIATE_NEEDED; 329 - spin_lock_irqsave(&sc->connect.lock, flags); 330 - if (!sc->first_error) 331 - queue_work(sc->workqueue, &sc->connect.work); 332 - spin_unlock_irqrestore(&sc->connect.lock, flags); 333 - wake_up(&sc->status_wait); 334 - break; 335 - } 336 - case RDMA_CM_EVENT_DEVICE_REMOVAL: 337 - case RDMA_CM_EVENT_DISCONNECTED: { 338 - sc->status = SMBDIRECT_SOCKET_DISCONNECTED; 339 - smb_direct_disconnect_rdma_work(&sc->disconnect_work); 340 - if (sc->ib.qp) 341 - ib_drain_qp(sc->ib.qp); 342 - break; 343 - } 344 - case RDMA_CM_EVENT_CONNECT_ERROR: { 345 - sc->status = SMBDIRECT_SOCKET_DISCONNECTED; 346 - smb_direct_disconnect_rdma_work(&sc->disconnect_work); 347 - break; 348 - } 349 - default: 350 - pr_err("Unexpected RDMA CM event. cm_id=%p, event=%s (%d)\n", 351 - cm_id, rdma_event_msg(event->event), 352 - event->event); 353 - break; 354 - } 355 - return 0; 356 - } 357 - 358 - static void smb_direct_qpair_handler(struct ib_event *event, void *context) 359 - { 360 - struct smbdirect_socket *sc = context; 361 - 362 - ksmbd_debug(RDMA, "Received QP event. cm_id=%p, event=%s (%d)\n", 363 - sc->rdma.cm_id, ib_event_msg(event->event), event->event); 364 - 365 - switch (event->event) { 366 - case IB_EVENT_CQ_ERR: 367 - case IB_EVENT_QP_FATAL: 368 - smb_direct_disconnect_rdma_connection(sc); 369 - break; 370 - default: 371 - break; 372 - } 373 - } 374 - 375 - static int smb_direct_send_negotiate_response(struct smbdirect_socket *sc, 376 - int failed) 377 - { 378 - struct smbdirect_socket_parameters *sp = &sc->parameters; 379 - struct smbdirect_send_io *sendmsg; 380 - struct smbdirect_negotiate_resp *resp; 381 - int ret; 382 - 383 - sendmsg = smb_direct_alloc_sendmsg(sc); 384 - if (IS_ERR(sendmsg)) 385 - return -ENOMEM; 386 - 387 - resp = (struct smbdirect_negotiate_resp *)sendmsg->packet; 388 - if (failed) { 389 - memset(resp, 0, sizeof(*resp)); 390 - resp->min_version = SMB_DIRECT_VERSION_LE; 391 - resp->max_version = SMB_DIRECT_VERSION_LE; 392 - resp->status = STATUS_NOT_SUPPORTED; 393 - 394 - sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED; 395 - } else { 396 - resp->status = STATUS_SUCCESS; 397 - resp->min_version = SMB_DIRECT_VERSION_LE; 398 - resp->max_version = SMB_DIRECT_VERSION_LE; 399 - resp->negotiated_version = SMB_DIRECT_VERSION_LE; 400 - resp->reserved = 0; 401 - resp->credits_requested = 402 - cpu_to_le16(sp->send_credit_target); 403 - resp->credits_granted = cpu_to_le16(manage_credits_prior_sending(sc)); 404 - resp->max_readwrite_size = cpu_to_le32(sp->max_read_write_size); 405 - resp->preferred_send_size = cpu_to_le32(sp->max_send_size); 406 - resp->max_receive_size = cpu_to_le32(sp->max_recv_size); 407 - resp->max_fragmented_size = 408 - cpu_to_le32(sp->max_fragmented_recv_size); 409 - 410 - atomic_set(&sc->send_io.bcredits.count, 1); 411 - sc->recv_io.expected = SMBDIRECT_EXPECT_DATA_TRANSFER; 412 - sc->status = SMBDIRECT_SOCKET_CONNECTED; 413 - } 414 - 415 - sendmsg->sge[0].addr = ib_dma_map_single(sc->ib.dev, 416 - (void *)resp, sizeof(*resp), 417 - DMA_TO_DEVICE); 418 - ret = ib_dma_mapping_error(sc->ib.dev, sendmsg->sge[0].addr); 419 - if (ret) { 420 - smb_direct_free_sendmsg(sc, sendmsg); 421 - return ret; 422 - } 423 - 424 - sendmsg->num_sge = 1; 425 - sendmsg->sge[0].length = sizeof(*resp); 426 - sendmsg->sge[0].lkey = sc->ib.pd->local_dma_lkey; 427 - 428 - ret = post_sendmsg(sc, NULL, sendmsg); 429 - if (ret) { 430 - smb_direct_free_sendmsg(sc, sendmsg); 431 - return ret; 432 - } 433 - 434 - wait_event(sc->send_io.pending.zero_wait_queue, 435 - atomic_read(&sc->send_io.pending.count) == 0 || 436 - sc->status != SMBDIRECT_SOCKET_CONNECTED); 437 - if (sc->status != SMBDIRECT_SOCKET_CONNECTED) 438 - return -ENOTCONN; 439 - 440 - return 0; 441 - } 442 - 443 - static int smb_direct_accept_client(struct smbdirect_socket *sc) 444 - { 445 - struct smbdirect_socket_parameters *sp = &sc->parameters; 446 - struct rdma_conn_param conn_param; 447 - __be32 ird_ord_hdr[2]; 448 - int ret; 449 - 450 - /* 451 - * smb_direct_handle_connect_request() 452 - * already negotiated sp->initiator_depth 453 - * and sp->responder_resources 454 - */ 455 - memset(&conn_param, 0, sizeof(conn_param)); 456 - conn_param.initiator_depth = sp->initiator_depth; 457 - conn_param.responder_resources = sp->responder_resources; 458 - 459 - if (sc->rdma.legacy_iwarp) { 460 - ird_ord_hdr[0] = cpu_to_be32(conn_param.responder_resources); 461 - ird_ord_hdr[1] = cpu_to_be32(conn_param.initiator_depth); 462 - conn_param.private_data = ird_ord_hdr; 463 - conn_param.private_data_len = sizeof(ird_ord_hdr); 464 - } else { 465 - conn_param.private_data = NULL; 466 - conn_param.private_data_len = 0; 467 - } 468 - conn_param.retry_count = SMB_DIRECT_CM_RETRY; 469 - conn_param.rnr_retry_count = SMB_DIRECT_CM_RNR_RETRY; 470 - conn_param.flow_control = 0; 471 - 472 - /* 473 - * start with the negotiate timeout and SMBDIRECT_KEEPALIVE_PENDING 474 - * so that the timer will cause a disconnect. 475 - */ 476 - sc->idle.keepalive = SMBDIRECT_KEEPALIVE_PENDING; 477 - mod_delayed_work(sc->workqueue, &sc->idle.timer_work, 478 - msecs_to_jiffies(sp->negotiate_timeout_msec)); 479 - 480 - WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED); 481 - sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING; 482 - ret = rdma_accept(sc->rdma.cm_id, &conn_param); 483 - if (ret) { 484 - pr_err("error at rdma_accept: %d\n", ret); 485 - return ret; 486 - } 487 - return 0; 488 - } 489 - 490 - static int smb_direct_prepare_negotiation(struct smbdirect_socket *sc) 491 - { 492 - struct smbdirect_recv_io *recvmsg; 493 - bool recv_posted = false; 494 - int ret; 495 - 496 - WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_CREATED); 497 - sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED; 498 - 499 - sc->recv_io.expected = SMBDIRECT_EXPECT_NEGOTIATE_REQ; 500 - 501 - recvmsg = get_free_recvmsg(sc); 502 - if (!recvmsg) 503 - return -ENOMEM; 504 - recvmsg->cqe.done = smb_direct_negotiate_recv_done; 505 - 506 - ret = smb_direct_post_recv(sc, recvmsg); 507 - if (ret) { 508 - pr_err("Can't post recv: %d\n", ret); 509 - goto out_err; 510 - } 511 - recv_posted = true; 512 - 513 - ret = smb_direct_accept_client(sc); 514 - if (ret) { 515 - pr_err("Can't accept client\n"); 516 - goto out_err; 517 - } 518 - 519 - return 0; 520 - out_err: 521 - /* 522 - * If the recv was never posted, return it to the free list. 523 - * If it was posted, leave it alone so disconnect teardown can 524 - * drain the QP and complete it (flush) and the completion path 525 - * will unmap it exactly once. 526 - */ 527 - if (!recv_posted) 528 - put_recvmsg(sc, recvmsg); 529 - return ret; 530 - } 531 - 532 - static int smb_direct_init_params(struct smbdirect_socket *sc) 533 - { 534 - struct smbdirect_socket_parameters *sp = &sc->parameters; 535 - int max_send_sges; 536 - unsigned int maxpages; 537 - 538 - /* need 3 more sge. because a SMB_DIRECT header, SMB2 header, 539 - * SMB2 response could be mapped. 540 - */ 541 - max_send_sges = DIV_ROUND_UP(sp->max_send_size, PAGE_SIZE) + 3; 542 - if (max_send_sges > SMBDIRECT_SEND_IO_MAX_SGE) { 543 - pr_err("max_send_size %d is too large\n", sp->max_send_size); 544 - return -EINVAL; 545 - } 546 - 547 - atomic_set(&sc->send_io.lcredits.count, sp->send_credit_target); 548 - 549 - maxpages = DIV_ROUND_UP(sp->max_read_write_size, PAGE_SIZE); 550 - sc->rw_io.credits.max = rdma_rw_mr_factor(sc->ib.dev, 551 - sc->rdma.cm_id->port_num, 552 - maxpages); 553 - sc->rw_io.credits.num_pages = DIV_ROUND_UP(maxpages, sc->rw_io.credits.max); 554 - /* add one extra in order to handle unaligned pages */ 555 - sc->rw_io.credits.max += 1; 556 - 557 - sc->recv_io.credits.target = 1; 558 - 559 - atomic_set(&sc->rw_io.credits.count, sc->rw_io.credits.max); 560 - 561 - return 0; 562 - } 563 - 564 - static void smb_direct_destroy_pools(struct smbdirect_socket *sc) 565 - { 566 - struct smbdirect_recv_io *recvmsg; 567 - 568 - while ((recvmsg = get_free_recvmsg(sc))) 569 - mempool_free(recvmsg, sc->recv_io.mem.pool); 570 - 571 - mempool_destroy(sc->recv_io.mem.pool); 572 - sc->recv_io.mem.pool = NULL; 573 - 574 - kmem_cache_destroy(sc->recv_io.mem.cache); 575 - sc->recv_io.mem.cache = NULL; 576 - 577 - mempool_destroy(sc->send_io.mem.pool); 578 - sc->send_io.mem.pool = NULL; 579 - 580 - kmem_cache_destroy(sc->send_io.mem.cache); 581 - sc->send_io.mem.cache = NULL; 582 - } 583 - 584 - static int smb_direct_create_pools(struct smbdirect_socket *sc) 585 - { 586 - struct smbdirect_socket_parameters *sp = &sc->parameters; 587 - char name[80]; 588 - int i; 589 - struct smbdirect_recv_io *recvmsg; 590 - 591 - snprintf(name, sizeof(name), "smbdirect_send_io_pool_%p", sc); 592 - sc->send_io.mem.cache = kmem_cache_create(name, 593 - sizeof(struct smbdirect_send_io) + 594 - sizeof(struct smbdirect_negotiate_resp), 595 - 0, SLAB_HWCACHE_ALIGN, NULL); 596 - if (!sc->send_io.mem.cache) 597 - return -ENOMEM; 598 - 599 - sc->send_io.mem.pool = mempool_create(sp->send_credit_target, 600 - mempool_alloc_slab, mempool_free_slab, 601 - sc->send_io.mem.cache); 602 - if (!sc->send_io.mem.pool) 603 - goto err; 604 - 605 - snprintf(name, sizeof(name), "smbdirect_recv_io_pool_%p", sc); 606 - sc->recv_io.mem.cache = kmem_cache_create(name, 607 - sizeof(struct smbdirect_recv_io) + 608 - sp->max_recv_size, 609 - 0, SLAB_HWCACHE_ALIGN, NULL); 610 - if (!sc->recv_io.mem.cache) 611 - goto err; 612 - 613 - sc->recv_io.mem.pool = 614 - mempool_create(sp->recv_credit_max, mempool_alloc_slab, 615 - mempool_free_slab, sc->recv_io.mem.cache); 616 - if (!sc->recv_io.mem.pool) 617 - goto err; 618 - 619 - for (i = 0; i < sp->recv_credit_max; i++) { 620 - recvmsg = mempool_alloc(sc->recv_io.mem.pool, KSMBD_DEFAULT_GFP); 621 - if (!recvmsg) 622 - goto err; 623 - recvmsg->socket = sc; 624 - recvmsg->sge.length = 0; 625 - list_add(&recvmsg->list, &sc->recv_io.free.list); 626 - } 627 - 628 - return 0; 629 - err: 630 - smb_direct_destroy_pools(sc); 631 - return -ENOMEM; 632 - } 633 - 634 - static u32 smb_direct_rdma_rw_send_wrs(struct ib_device *dev, const struct ib_qp_init_attr *attr) 635 - { 636 - /* 637 - * This could be split out of rdma_rw_init_qp() 638 - * and be a helper function next to rdma_rw_mr_factor() 639 - * 640 - * We can't check unlikely(rdma_rw_force_mr) here, 641 - * but that is most likely 0 anyway. 642 - */ 643 - u32 factor; 644 - 645 - WARN_ON_ONCE(attr->port_num == 0); 646 - 647 - /* 648 - * Each context needs at least one RDMA READ or WRITE WR. 649 - * 650 - * For some hardware we might need more, eventually we should ask the 651 - * HCA driver for a multiplier here. 652 - */ 653 - factor = 1; 654 - 655 - /* 656 - * If the device needs MRs to perform RDMA READ or WRITE operations, 657 - * we'll need two additional MRs for the registrations and the 658 - * invalidation. 659 - */ 660 - if (rdma_protocol_iwarp(dev, attr->port_num) || dev->attrs.max_sgl_rd) 661 - factor += 2; /* inv + reg */ 662 - 663 - return factor * attr->cap.max_rdma_ctxs; 664 - } 665 - 666 - static int smb_direct_create_qpair(struct smbdirect_socket *sc) 667 - { 668 - struct smbdirect_socket_parameters *sp = &sc->parameters; 669 - int ret; 670 - struct ib_qp_cap qp_cap; 671 - struct ib_qp_init_attr qp_attr; 672 - u32 max_send_wr; 673 - u32 rdma_send_wr; 674 - 675 - /* 676 - * Note that {rdma,ib}_create_qp() will call 677 - * rdma_rw_init_qp() if cap->max_rdma_ctxs is not 0. 678 - * It will adjust cap->max_send_wr to the required 679 - * number of additional WRs for the RDMA RW operations. 680 - * It will cap cap->max_send_wr to the device limit. 681 - * 682 - * +1 for ib_drain_qp 683 - */ 684 - qp_cap.max_send_wr = sp->send_credit_target + 1; 685 - qp_cap.max_recv_wr = sp->recv_credit_max + 1; 686 - qp_cap.max_send_sge = SMBDIRECT_SEND_IO_MAX_SGE; 687 - qp_cap.max_recv_sge = SMBDIRECT_RECV_IO_MAX_SGE; 688 - qp_cap.max_inline_data = 0; 689 - qp_cap.max_rdma_ctxs = sc->rw_io.credits.max; 690 - 691 - /* 692 - * Find out the number of max_send_wr 693 - * after rdma_rw_init_qp() adjusted it. 694 - * 695 - * We only do it on a temporary variable, 696 - * as rdma_create_qp() will trigger 697 - * rdma_rw_init_qp() again. 698 - */ 699 - memset(&qp_attr, 0, sizeof(qp_attr)); 700 - qp_attr.cap = qp_cap; 701 - qp_attr.port_num = sc->rdma.cm_id->port_num; 702 - rdma_send_wr = smb_direct_rdma_rw_send_wrs(sc->ib.dev, &qp_attr); 703 - max_send_wr = qp_cap.max_send_wr + rdma_send_wr; 704 - 705 - if (qp_cap.max_send_wr > sc->ib.dev->attrs.max_cqe || 706 - qp_cap.max_send_wr > sc->ib.dev->attrs.max_qp_wr) { 707 - pr_err("Possible CQE overrun: max_send_wr %d\n", 708 - qp_cap.max_send_wr); 709 - pr_err("device %.*s reporting max_cqe %d max_qp_wr %d\n", 710 - IB_DEVICE_NAME_MAX, 711 - sc->ib.dev->name, 712 - sc->ib.dev->attrs.max_cqe, 713 - sc->ib.dev->attrs.max_qp_wr); 714 - pr_err("consider lowering send_credit_target = %d\n", 715 - sp->send_credit_target); 716 - return -EINVAL; 717 - } 718 - 719 - if (qp_cap.max_rdma_ctxs && 720 - (max_send_wr >= sc->ib.dev->attrs.max_cqe || 721 - max_send_wr >= sc->ib.dev->attrs.max_qp_wr)) { 722 - pr_err("Possible CQE overrun: rdma_send_wr %d + max_send_wr %d = %d\n", 723 - rdma_send_wr, qp_cap.max_send_wr, max_send_wr); 724 - pr_err("device %.*s reporting max_cqe %d max_qp_wr %d\n", 725 - IB_DEVICE_NAME_MAX, 726 - sc->ib.dev->name, 727 - sc->ib.dev->attrs.max_cqe, 728 - sc->ib.dev->attrs.max_qp_wr); 729 - pr_err("consider lowering send_credit_target = %d, max_rdma_ctxs = %d\n", 730 - sp->send_credit_target, qp_cap.max_rdma_ctxs); 731 - return -EINVAL; 732 - } 733 - 734 - if (qp_cap.max_recv_wr > sc->ib.dev->attrs.max_cqe || 735 - qp_cap.max_recv_wr > sc->ib.dev->attrs.max_qp_wr) { 736 - pr_err("Possible CQE overrun: max_recv_wr %d\n", 737 - qp_cap.max_recv_wr); 738 - pr_err("device %.*s reporting max_cqe %d max_qp_wr %d\n", 739 - IB_DEVICE_NAME_MAX, 740 - sc->ib.dev->name, 741 - sc->ib.dev->attrs.max_cqe, 742 - sc->ib.dev->attrs.max_qp_wr); 743 - pr_err("consider lowering receive_credit_max = %d\n", 744 - sp->recv_credit_max); 745 - return -EINVAL; 746 - } 747 - 748 - if (qp_cap.max_send_sge > sc->ib.dev->attrs.max_send_sge || 749 - qp_cap.max_recv_sge > sc->ib.dev->attrs.max_recv_sge) { 750 - pr_err("device %.*s max_send_sge/max_recv_sge = %d/%d too small\n", 751 - IB_DEVICE_NAME_MAX, 752 - sc->ib.dev->name, 753 - sc->ib.dev->attrs.max_send_sge, 754 - sc->ib.dev->attrs.max_recv_sge); 755 - return -EINVAL; 756 - } 757 - 758 - sc->ib.pd = ib_alloc_pd(sc->ib.dev, 0); 759 - if (IS_ERR(sc->ib.pd)) { 760 - pr_err("Can't create RDMA PD\n"); 761 - ret = PTR_ERR(sc->ib.pd); 762 - sc->ib.pd = NULL; 763 - return ret; 764 - } 765 - 766 - sc->ib.send_cq = ib_alloc_cq_any(sc->ib.dev, sc, 767 - max_send_wr, 768 - IB_POLL_WORKQUEUE); 769 - if (IS_ERR(sc->ib.send_cq)) { 770 - pr_err("Can't create RDMA send CQ\n"); 771 - ret = PTR_ERR(sc->ib.send_cq); 772 - sc->ib.send_cq = NULL; 773 - goto err; 774 - } 775 - 776 - sc->ib.recv_cq = ib_alloc_cq_any(sc->ib.dev, sc, 777 - qp_cap.max_recv_wr, 778 - IB_POLL_WORKQUEUE); 779 - if (IS_ERR(sc->ib.recv_cq)) { 780 - pr_err("Can't create RDMA recv CQ\n"); 781 - ret = PTR_ERR(sc->ib.recv_cq); 782 - sc->ib.recv_cq = NULL; 783 - goto err; 784 - } 785 - 786 - /* 787 - * We reset completely here! 788 - * As the above use was just temporary 789 - * to calc max_send_wr and rdma_send_wr. 790 - * 791 - * rdma_create_qp() will trigger rdma_rw_init_qp() 792 - * again if max_rdma_ctxs is not 0. 793 - */ 794 - memset(&qp_attr, 0, sizeof(qp_attr)); 795 - qp_attr.event_handler = smb_direct_qpair_handler; 796 - qp_attr.qp_context = sc; 797 - qp_attr.cap = qp_cap; 798 - qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 799 - qp_attr.qp_type = IB_QPT_RC; 800 - qp_attr.send_cq = sc->ib.send_cq; 801 - qp_attr.recv_cq = sc->ib.recv_cq; 802 - qp_attr.port_num = ~0; 803 - 804 - ret = rdma_create_qp(sc->rdma.cm_id, sc->ib.pd, &qp_attr); 805 - if (ret) { 806 - pr_err("Can't create RDMA QP: %d\n", ret); 807 - goto err; 808 - } 809 - 810 - sc->ib.qp = sc->rdma.cm_id->qp; 811 - sc->rdma.cm_id->event_handler = smb_direct_cm_handler; 812 - 813 - return 0; 814 - err: 815 - if (sc->ib.qp) { 816 - sc->ib.qp = NULL; 817 - rdma_destroy_qp(sc->rdma.cm_id); 818 - } 819 - if (sc->ib.recv_cq) { 820 - ib_destroy_cq(sc->ib.recv_cq); 821 - sc->ib.recv_cq = NULL; 822 - } 823 - if (sc->ib.send_cq) { 824 - ib_destroy_cq(sc->ib.send_cq); 825 - sc->ib.send_cq = NULL; 826 - } 827 - if (sc->ib.pd) { 828 - ib_dealloc_pd(sc->ib.pd); 829 - sc->ib.pd = NULL; 830 - } 831 - return ret; 832 - } 833 - 834 - static int smb_direct_prepare(struct ksmbd_transport *t) 835 - { 836 - struct smb_direct_transport *st = SMBD_TRANS(t); 837 - struct smbdirect_socket *sc = &st->socket; 838 - struct smbdirect_socket_parameters *sp = &sc->parameters; 839 - struct smbdirect_recv_io *recvmsg; 840 - struct smbdirect_negotiate_req *req; 841 - unsigned long flags; 842 - int ret; 843 - 844 - /* 845 - * We are waiting to pass the following states: 846 - * 847 - * SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED 848 - * SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING 849 - * SMBDIRECT_SOCKET_NEGOTIATE_NEEDED 850 - * 851 - * To finally get to SMBDIRECT_SOCKET_NEGOTIATE_RUNNING 852 - * in order to continue below. 853 - * 854 - * Everything else is unexpected and an error. 855 - */ 856 - ksmbd_debug(RDMA, "Waiting for SMB_DIRECT negotiate request\n"); 857 - ret = wait_event_interruptible_timeout(sc->status_wait, 858 - sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED && 859 - sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING && 860 - sc->status != SMBDIRECT_SOCKET_NEGOTIATE_NEEDED, 861 - msecs_to_jiffies(sp->negotiate_timeout_msec)); 862 - if (ret <= 0 || sc->status != SMBDIRECT_SOCKET_NEGOTIATE_RUNNING) 863 - return ret < 0 ? ret : -ETIMEDOUT; 864 - 865 - recvmsg = get_first_reassembly(sc); 866 - if (!recvmsg) 867 - return -ECONNABORTED; 868 - 869 - ret = smb_direct_check_recvmsg(recvmsg); 870 - if (ret) 871 - goto put; 872 - 873 - req = (struct smbdirect_negotiate_req *)recvmsg->packet; 874 - sp->max_recv_size = min_t(u32, sp->max_recv_size, 875 - le32_to_cpu(req->preferred_send_size)); 876 - sp->max_send_size = min_t(u32, sp->max_send_size, 877 - le32_to_cpu(req->max_receive_size)); 878 - sp->max_fragmented_send_size = 879 - le32_to_cpu(req->max_fragmented_size); 880 - /* 881 - * The maximum fragmented upper-layer payload receive size supported 882 - * 883 - * Assume max_payload_per_credit is 884 - * smb_direct_receive_credit_max - 24 = 1340 885 - * 886 - * The maximum number would be 887 - * smb_direct_receive_credit_max * max_payload_per_credit 888 - * 889 - * 1340 * 255 = 341700 (0x536C4) 890 - * 891 - * The minimum value from the spec is 131072 (0x20000) 892 - * 893 - * For now we use the logic we used before: 894 - * (1364 * 255) / 2 = 173910 (0x2A756) 895 - * 896 - * We need to adjust this here in case the peer 897 - * lowered sp->max_recv_size. 898 - * 899 - * TODO: instead of adjusting max_fragmented_recv_size 900 - * we should adjust the number of available buffers, 901 - * but for now we keep the current logic. 902 - */ 903 - sp->max_fragmented_recv_size = 904 - (sp->recv_credit_max * sp->max_recv_size) / 2; 905 - sc->recv_io.credits.target = le16_to_cpu(req->credits_requested); 906 - sc->recv_io.credits.target = min_t(u16, sc->recv_io.credits.target, sp->recv_credit_max); 907 - sc->recv_io.credits.target = max_t(u16, sc->recv_io.credits.target, 1); 908 - 909 - put: 910 - spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); 911 - sc->recv_io.reassembly.queue_length--; 912 - list_del(&recvmsg->list); 913 - spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); 914 - put_recvmsg(sc, recvmsg); 915 - 916 - if (ret == -ECONNABORTED) 917 - return ret; 918 - 919 - if (ret) 920 - goto respond; 921 - 922 - /* 923 - * We negotiated with success, so we need to refill the recv queue. 924 - * We do that with sc->idle.immediate_work still being disabled 925 - * via smbdirect_socket_init(), so that queue_work(sc->workqueue, 926 - * &sc->idle.immediate_work) in smb_direct_post_recv_credits() 927 - * is a no-op. 928 - * 929 - * The message that grants the credits to the client is 930 - * the negotiate response. 931 - */ 932 - INIT_WORK(&sc->recv_io.posted.refill_work, smb_direct_post_recv_credits); 933 - smb_direct_post_recv_credits(&sc->recv_io.posted.refill_work); 934 - if (unlikely(sc->first_error)) 935 - return sc->first_error; 936 - INIT_WORK(&sc->idle.immediate_work, smb_direct_send_immediate_work); 937 - 938 - respond: 939 - ret = smb_direct_send_negotiate_response(sc, ret); 940 - 941 - return ret; 942 - } 943 - 944 - static int smb_direct_connect(struct smbdirect_socket *sc) 945 - { 946 - struct smbdirect_recv_io *recv_io; 947 - int ret; 948 - 949 - ret = smb_direct_init_params(sc); 950 - if (ret) { 951 - pr_err("Can't configure RDMA parameters\n"); 952 - return ret; 953 - } 954 - 955 - ret = smb_direct_create_pools(sc); 956 - if (ret) { 957 - pr_err("Can't init RDMA pool: %d\n", ret); 958 - return ret; 959 - } 960 - 961 - list_for_each_entry(recv_io, &sc->recv_io.free.list, list) 962 - recv_io->cqe.done = recv_done; 963 - 964 - ret = smb_direct_create_qpair(sc); 965 - if (ret) { 966 - pr_err("Can't accept RDMA client: %d\n", ret); 967 - return ret; 968 - } 969 - 970 - ret = smb_direct_prepare_negotiation(sc); 971 - if (ret) { 972 - pr_err("Can't negotiate: %d\n", ret); 973 - return ret; 974 - } 975 - return 0; 976 - } 977 - 978 - static bool rdma_frwr_is_supported(struct ib_device_attr *attrs) 979 - { 980 - if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) 981 - return false; 982 - if (attrs->max_fast_reg_page_list_len == 0) 983 - return false; 984 - return true; 985 - } 986 - 987 - static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id, 988 - struct rdma_cm_event *event) 989 - { 990 - struct smb_direct_listener *listener = new_cm_id->context; 991 1978 struct smb_direct_transport *t; 992 - struct smbdirect_socket *sc; 993 - struct smbdirect_socket_parameters *sp; 994 1979 struct task_struct *handler; 995 - u8 peer_initiator_depth; 996 - u8 peer_responder_resources; 997 1980 int ret; 998 1981 999 - if (!rdma_frwr_is_supported(&new_cm_id->device->attrs)) { 1000 - ksmbd_debug(RDMA, 1001 - "Fast Registration Work Requests is not supported. device capabilities=%llx\n", 1002 - new_cm_id->device->attrs.device_cap_flags); 1003 - return -EPROTONOSUPPORT; 1004 - } 1005 - 1006 - t = alloc_transport(new_cm_id); 1007 - if (!t) 1982 + t = alloc_transport(client_sc); 1983 + if (!t) { 1984 + smbdirect_socket_release(client_sc); 1008 1985 return -ENOMEM; 1009 - sc = &t->socket; 1010 - sp = &sc->parameters; 1011 - 1012 - peer_initiator_depth = event->param.conn.initiator_depth; 1013 - peer_responder_resources = event->param.conn.responder_resources; 1014 - if (rdma_protocol_iwarp(new_cm_id->device, new_cm_id->port_num) && 1015 - event->param.conn.private_data_len == 8) { 1016 - /* 1017 - * Legacy clients with only iWarp MPA v1 support 1018 - * need a private blob in order to negotiate 1019 - * the IRD/ORD values. 1020 - */ 1021 - const __be32 *ird_ord_hdr = event->param.conn.private_data; 1022 - u32 ird32 = be32_to_cpu(ird_ord_hdr[0]); 1023 - u32 ord32 = be32_to_cpu(ird_ord_hdr[1]); 1024 - 1025 - /* 1026 - * cifs.ko sends the legacy IRD/ORD negotiation 1027 - * event if iWarp MPA v2 was used. 1028 - * 1029 - * Here we check that the values match and only 1030 - * mark the client as legacy if they don't match. 1031 - */ 1032 - if ((u32)event->param.conn.initiator_depth != ird32 || 1033 - (u32)event->param.conn.responder_resources != ord32) { 1034 - /* 1035 - * There are broken clients (old cifs.ko) 1036 - * using little endian and also 1037 - * struct rdma_conn_param only uses u8 1038 - * for initiator_depth and responder_resources, 1039 - * so we truncate the value to U8_MAX. 1040 - * 1041 - * smb_direct_accept_client() will then 1042 - * do the real negotiation in order to 1043 - * select the minimum between client and 1044 - * server. 1045 - */ 1046 - ird32 = min_t(u32, ird32, U8_MAX); 1047 - ord32 = min_t(u32, ord32, U8_MAX); 1048 - 1049 - sc->rdma.legacy_iwarp = true; 1050 - peer_initiator_depth = (u8)ird32; 1051 - peer_responder_resources = (u8)ord32; 1052 - } 1053 1986 } 1054 - 1055 - /* 1056 - * First set what the we as server are able to support 1057 - */ 1058 - sp->initiator_depth = min_t(u8, sp->initiator_depth, 1059 - new_cm_id->device->attrs.max_qp_rd_atom); 1060 - 1061 - /* 1062 - * negotiate the value by using the minimum 1063 - * between client and server if the client provided 1064 - * non 0 values. 1065 - */ 1066 - if (peer_initiator_depth != 0) 1067 - sp->initiator_depth = min_t(u8, sp->initiator_depth, 1068 - peer_initiator_depth); 1069 - if (peer_responder_resources != 0) 1070 - sp->responder_resources = min_t(u8, sp->responder_resources, 1071 - peer_responder_resources); 1072 - 1073 - ret = smb_direct_connect(sc); 1074 - if (ret) 1075 - goto out_err; 1076 1987 1077 1988 handler = kthread_run(ksmbd_conn_handler_loop, 1078 1989 KSMBD_TRANS(t)->conn, "ksmbd:r%u", ··· 325 2764 return ret; 326 2765 } 327 2766 328 - static int smb_direct_listen_handler(struct rdma_cm_id *cm_id, 329 - struct rdma_cm_event *event) 2767 + static int smb_direct_listener_kthread_fn(void *p) 330 2768 { 331 - switch (event->event) { 332 - case RDMA_CM_EVENT_CONNECT_REQUEST: { 333 - int ret = smb_direct_handle_connect_request(cm_id, event); 2769 + struct smb_direct_listener *listener = (struct smb_direct_listener *)p; 2770 + struct smbdirect_socket *client_sc = NULL; 334 2771 335 - if (ret) { 336 - pr_err("Can't create transport: %d\n", ret); 337 - return ret; 338 - } 2772 + while (!kthread_should_stop()) { 2773 + struct proto_accept_arg arg = { .err = -EINVAL, }; 2774 + long timeo = MAX_SCHEDULE_TIMEOUT; 339 2775 340 - ksmbd_debug(RDMA, "Received connection request. cm_id=%p\n", 341 - cm_id); 342 - break; 2776 + if (!listener->socket) 2777 + break; 2778 + client_sc = smbdirect_socket_accept(listener->socket, timeo, &arg); 2779 + if (!client_sc && arg.err == -EINVAL) 2780 + break; 2781 + if (!client_sc) 2782 + continue; 2783 + 2784 + ksmbd_debug(CONN, "connect success: accepted new connection\n"); 2785 + smb_direct_new_connection(listener, client_sc); 343 2786 } 344 - default: 345 - pr_err("Unexpected listen event. cm_id=%p, event=%s (%d)\n", 346 - cm_id, rdma_event_msg(event->event), event->event); 347 - break; 348 - } 2787 + 2788 + ksmbd_debug(CONN, "releasing socket\n"); 349 2789 return 0; 2790 + } 2791 + 2792 + static void smb_direct_listener_destroy(struct smb_direct_listener *listener) 2793 + { 2794 + int ret; 2795 + 2796 + if (listener->socket) 2797 + smbdirect_socket_shutdown(listener->socket); 2798 + 2799 + if (listener->thread) { 2800 + ret = kthread_stop(listener->thread); 2801 + if (ret) 2802 + pr_err("failed to stop forker thread\n"); 2803 + listener->thread = NULL; 2804 + } 2805 + 2806 + if (listener->socket) { 2807 + smbdirect_socket_release(listener->socket); 2808 + listener->socket = NULL; 2809 + } 2810 + 2811 + listener->port = 0; 350 2812 } 351 2813 352 2814 static int smb_direct_listen(struct smb_direct_listener *listener, 353 2815 int port) 354 2816 { 355 - int ret; 356 - struct rdma_cm_id *cm_id; 357 - u8 node_type = RDMA_NODE_UNSPECIFIED; 2817 + struct net *net = current->nsproxy->net_ns; 2818 + struct task_struct *kthread; 358 2819 struct sockaddr_in sin = { 359 2820 .sin_family = AF_INET, 360 2821 .sin_addr.s_addr = htonl(INADDR_ANY), 361 2822 .sin_port = htons(port), 362 2823 }; 2824 + struct smbdirect_socket_parameters init_params = {}; 2825 + struct smbdirect_socket_parameters *sp; 2826 + struct smbdirect_socket *sc; 2827 + u64 port_flags = 0; 2828 + int ret; 363 2829 364 2830 switch (port) { 365 2831 case SMB_DIRECT_PORT_IWARP: ··· 394 2806 * only allow iWarp devices 395 2807 * for port 5445. 396 2808 */ 397 - node_type = RDMA_NODE_RNIC; 2809 + port_flags |= SMBDIRECT_FLAG_PORT_RANGE_ONLY_IW; 398 2810 break; 399 2811 case SMB_DIRECT_PORT_INFINIBAND: 400 2812 /* ··· 403 2815 * 404 2816 * (Basically don't allow iWarp devices) 405 2817 */ 406 - node_type = RDMA_NODE_IB_CA; 2818 + port_flags |= SMBDIRECT_FLAG_PORT_RANGE_ONLY_IB; 407 2819 break; 408 2820 default: 409 2821 pr_err("unsupported smbdirect port=%d!\n", port); 410 2822 return -ENODEV; 411 2823 } 412 2824 413 - cm_id = rdma_create_id(&init_net, smb_direct_listen_handler, 414 - listener, RDMA_PS_TCP, IB_QPT_RC); 415 - if (IS_ERR(cm_id)) { 416 - pr_err("Can't create cm id: %ld\n", PTR_ERR(cm_id)); 417 - return PTR_ERR(cm_id); 2825 + ret = smbdirect_socket_create_kern(net, &sc); 2826 + if (ret) { 2827 + pr_err("smbdirect_socket_create_kern() failed: %d %1pe\n", 2828 + ret, ERR_PTR(ret)); 2829 + return ret; 418 2830 } 419 2831 420 - ret = rdma_restrict_node_type(cm_id, node_type); 2832 + /* 2833 + * Create the initial parameters 2834 + */ 2835 + sp = &init_params; 2836 + sp->flags |= port_flags; 2837 + sp->negotiate_timeout_msec = SMB_DIRECT_NEGOTIATE_TIMEOUT * 1000; 2838 + sp->initiator_depth = SMB_DIRECT_CM_INITIATOR_DEPTH; 2839 + sp->responder_resources = 1; 2840 + sp->recv_credit_max = smb_direct_receive_credit_max; 2841 + sp->send_credit_target = smb_direct_send_credit_target; 2842 + sp->max_send_size = smb_direct_max_send_size; 2843 + sp->max_fragmented_recv_size = smb_direct_max_fragmented_recv_size; 2844 + sp->max_recv_size = smb_direct_max_receive_size; 2845 + sp->max_read_write_size = smb_direct_max_read_write_size; 2846 + sp->keepalive_interval_msec = SMB_DIRECT_KEEPALIVE_SEND_INTERVAL * 1000; 2847 + sp->keepalive_timeout_msec = SMB_DIRECT_KEEPALIVE_RECV_TIMEOUT * 1000; 2848 + 2849 + smbdirect_socket_set_logging(sc, NULL, 2850 + smb_direct_logging_needed, 2851 + smb_direct_logging_vaprintf); 2852 + ret = smbdirect_socket_set_initial_parameters(sc, sp); 421 2853 if (ret) { 422 - pr_err("rdma_restrict_node_type(%u) failed %d\n", 423 - node_type, ret); 2854 + pr_err("Failed smbdirect_socket_set_initial_parameters(): %d %1pe\n", 2855 + ret, ERR_PTR(ret)); 2856 + goto err; 2857 + } 2858 + ret = smbdirect_socket_set_kernel_settings(sc, IB_POLL_WORKQUEUE, KSMBD_DEFAULT_GFP); 2859 + if (ret) { 2860 + pr_err("Failed smbdirect_socket_set_kernel_settings(): %d %1pe\n", 2861 + ret, ERR_PTR(ret)); 424 2862 goto err; 425 2863 } 426 2864 427 - ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin); 2865 + ret = smbdirect_socket_bind(sc, (struct sockaddr *)&sin); 428 2866 if (ret) { 429 - pr_err("Can't bind: %d\n", ret); 2867 + pr_err("smbdirect_socket_bind() failed: %d %1pe\n", 2868 + ret, ERR_PTR(ret)); 430 2869 goto err; 431 2870 } 432 2871 433 - ret = rdma_listen(cm_id, 10); 2872 + ret = smbdirect_socket_listen(sc, 10); 434 2873 if (ret) { 435 - pr_err("Can't listen: %d\n", ret); 2874 + pr_err("Port[%d] smbdirect_socket_listen() failed: %d %1pe\n", 2875 + port, ret, ERR_PTR(ret)); 436 2876 goto err; 437 2877 } 438 2878 439 2879 listener->port = port; 440 - listener->cm_id = cm_id; 2880 + listener->socket = sc; 441 2881 2882 + kthread = kthread_run(smb_direct_listener_kthread_fn, 2883 + listener, 2884 + "ksmbd-smbdirect-listener-%u", port); 2885 + if (IS_ERR(kthread)) { 2886 + ret = PTR_ERR(kthread); 2887 + pr_err("Can't start ksmbd listen kthread: %d %1pe\n", 2888 + ret, ERR_PTR(ret)); 2889 + goto err; 2890 + } 2891 + 2892 + listener->thread = kthread; 442 2893 return 0; 443 2894 err: 444 - listener->port = 0; 445 - listener->cm_id = NULL; 446 - rdma_destroy_id(cm_id); 2895 + smb_direct_listener_destroy(listener); 447 2896 return ret; 448 2897 } 449 - 450 - static int smb_direct_ib_client_add(struct ib_device *ib_dev) 451 - { 452 - struct smb_direct_device *smb_dev; 453 - 454 - if (!rdma_frwr_is_supported(&ib_dev->attrs)) 455 - return 0; 456 - 457 - smb_dev = kzalloc_obj(*smb_dev, KSMBD_DEFAULT_GFP); 458 - if (!smb_dev) 459 - return -ENOMEM; 460 - smb_dev->ib_dev = ib_dev; 461 - 462 - write_lock(&smb_direct_device_lock); 463 - list_add(&smb_dev->list, &smb_direct_device_list); 464 - write_unlock(&smb_direct_device_lock); 465 - 466 - ksmbd_debug(RDMA, "ib device added: name %s\n", ib_dev->name); 467 - return 0; 468 - } 469 - 470 - static void smb_direct_ib_client_remove(struct ib_device *ib_dev, 471 - void *client_data) 472 - { 473 - struct smb_direct_device *smb_dev, *tmp; 474 - 475 - write_lock(&smb_direct_device_lock); 476 - list_for_each_entry_safe(smb_dev, tmp, &smb_direct_device_list, list) { 477 - if (smb_dev->ib_dev == ib_dev) { 478 - list_del(&smb_dev->list); 479 - kfree(smb_dev); 480 - break; 481 - } 482 - } 483 - write_unlock(&smb_direct_device_lock); 484 - } 485 - 486 - static struct ib_client smb_direct_ib_client = { 487 - .name = "ksmbd_smb_direct_ib", 488 - .add = smb_direct_ib_client_add, 489 - .remove = smb_direct_ib_client_remove, 490 - }; 491 2898 492 2899 int ksmbd_rdma_init(void) 493 2900 { 494 2901 int ret; 495 2902 496 2903 smb_direct_ib_listener = smb_direct_iw_listener = (struct smb_direct_listener) { 497 - .cm_id = NULL, 2904 + .socket = NULL, 498 2905 }; 499 - 500 - ret = ib_register_client(&smb_direct_ib_client); 501 - if (ret) { 502 - pr_err("failed to ib_register_client\n"); 503 - return ret; 504 - } 505 - 506 - /* When a client is running out of send credits, the credits are 507 - * granted by the server's sending a packet using this queue. 508 - * This avoids the situation that a clients cannot send packets 509 - * for lack of credits 510 - */ 511 - smb_direct_wq = alloc_workqueue("ksmbd-smb_direct-wq", 512 - WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_PERCPU, 513 - 0); 514 - if (!smb_direct_wq) { 515 - ret = -ENOMEM; 516 - goto err; 517 - } 518 2906 519 2907 ret = smb_direct_listen(&smb_direct_ib_listener, 520 2908 SMB_DIRECT_PORT_INFINIBAND); ··· 499 2935 goto err; 500 2936 } 501 2937 502 - ksmbd_debug(RDMA, "InfiniBand/RoCEv1/RoCEv2 RDMA listener. cm_id=%p\n", 503 - smb_direct_ib_listener.cm_id); 2938 + ksmbd_debug(RDMA, "InfiniBand/RoCEv1/RoCEv2 RDMA listener. socket=%p\n", 2939 + smb_direct_ib_listener.socket); 504 2940 505 2941 ret = smb_direct_listen(&smb_direct_iw_listener, 506 2942 SMB_DIRECT_PORT_IWARP); ··· 509 2945 goto err; 510 2946 } 511 2947 512 - ksmbd_debug(RDMA, "iWarp RDMA listener. cm_id=%p\n", 513 - smb_direct_iw_listener.cm_id); 2948 + ksmbd_debug(RDMA, "iWarp RDMA listener. socket=%p\n", 2949 + smb_direct_iw_listener.socket); 514 2950 515 2951 return 0; 516 2952 err: 517 2953 ksmbd_rdma_stop_listening(); 518 - ksmbd_rdma_destroy(); 519 2954 return ret; 520 2955 } 521 2956 522 2957 void ksmbd_rdma_stop_listening(void) 523 2958 { 524 - if (!smb_direct_ib_listener.cm_id && !smb_direct_iw_listener.cm_id) 525 - return; 526 - 527 - ib_unregister_client(&smb_direct_ib_client); 528 - 529 - if (smb_direct_ib_listener.cm_id) 530 - rdma_destroy_id(smb_direct_ib_listener.cm_id); 531 - if (smb_direct_iw_listener.cm_id) 532 - rdma_destroy_id(smb_direct_iw_listener.cm_id); 533 - 534 - smb_direct_ib_listener = smb_direct_iw_listener = (struct smb_direct_listener) { 535 - .cm_id = NULL, 536 - }; 537 - } 538 - 539 - void ksmbd_rdma_destroy(void) 540 - { 541 - if (smb_direct_wq) { 542 - destroy_workqueue(smb_direct_wq); 543 - smb_direct_wq = NULL; 544 - } 545 - } 546 - 547 - static bool ksmbd_find_rdma_capable_netdev(struct net_device *netdev) 548 - { 549 - struct smb_direct_device *smb_dev; 550 - int i; 551 - bool rdma_capable = false; 552 - 553 - read_lock(&smb_direct_device_lock); 554 - list_for_each_entry(smb_dev, &smb_direct_device_list, list) { 555 - for (i = 0; i < smb_dev->ib_dev->phys_port_cnt; i++) { 556 - struct net_device *ndev; 557 - 558 - ndev = ib_device_get_netdev(smb_dev->ib_dev, i + 1); 559 - if (!ndev) 560 - continue; 561 - 562 - if (ndev == netdev) { 563 - dev_put(ndev); 564 - rdma_capable = true; 565 - goto out; 566 - } 567 - dev_put(ndev); 568 - } 569 - } 570 - out: 571 - read_unlock(&smb_direct_device_lock); 572 - 573 - if (rdma_capable == false) { 574 - struct ib_device *ibdev; 575 - 576 - ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_UNKNOWN); 577 - if (ibdev) { 578 - rdma_capable = rdma_frwr_is_supported(&ibdev->attrs); 579 - ib_device_put(ibdev); 580 - } 581 - } 582 - 583 - ksmbd_debug(RDMA, "netdev(%s) rdma capable : %s\n", 584 - netdev->name, str_true_false(rdma_capable)); 585 - 586 - return rdma_capable; 2959 + smb_direct_listener_destroy(&smb_direct_ib_listener); 2960 + smb_direct_listener_destroy(&smb_direct_iw_listener); 587 2961 } 588 2962 589 2963 bool ksmbd_rdma_capable_netdev(struct net_device *netdev) 590 2964 { 591 - struct net_device *lower_dev; 592 - struct list_head *iter; 2965 + u8 node_type = smbdirect_netdev_rdma_capable_node_type(netdev); 593 2966 594 - if (ksmbd_find_rdma_capable_netdev(netdev)) 595 - return true; 596 - 597 - /* check if netdev is bridge or VLAN */ 598 - if (netif_is_bridge_master(netdev) || 599 - netdev->priv_flags & IFF_802_1Q_VLAN) 600 - netdev_for_each_lower_dev(netdev, lower_dev, iter) 601 - if (ksmbd_find_rdma_capable_netdev(lower_dev)) 602 - return true; 603 - 604 - /* check if netdev is IPoIB safely without layer violation */ 605 - if (netdev->type == ARPHRD_INFINIBAND) 606 - return true; 607 - 608 - return false; 2967 + return node_type != RDMA_NODE_UNSPECIFIED; 609 2968 } 610 2969 611 2970 static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = { 612 - .prepare = smb_direct_prepare, 613 2971 .disconnect = smb_direct_disconnect, 614 2972 .shutdown = smb_direct_shutdown, 615 2973 .writev = smb_direct_writev,
+2 -2
fs/smb/server/transport_rdma.h
··· 14 14 #ifdef CONFIG_SMB_SERVER_SMBDIRECT 15 15 int ksmbd_rdma_init(void); 16 16 void ksmbd_rdma_stop_listening(void); 17 - void ksmbd_rdma_destroy(void); 18 17 bool ksmbd_rdma_capable_netdev(struct net_device *netdev); 19 18 void init_smbd_max_io_size(unsigned int sz); 20 19 unsigned int get_smbd_max_read_write_size(struct ksmbd_transport *kt); 21 20 #else 22 21 static inline int ksmbd_rdma_init(void) { return 0; } 23 22 static inline void ksmbd_rdma_stop_listening(void) { } 24 - static inline void ksmbd_rdma_destroy(void) { } 25 23 static inline bool ksmbd_rdma_capable_netdev(struct net_device *netdev) { return false; } 26 24 static inline void init_smbd_max_io_size(unsigned int sz) { } 27 25 static inline unsigned int get_smbd_max_read_write_size(struct ksmbd_transport *kt) { return 0; } 28 26 #endif 27 + 28 + #include "../common/smbdirect/smbdirect.h" 29 29 30 30 #endif /* __KSMBD_TRANSPORT_RDMA_H__ */