Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at master 987 lines 25 kB view raw
1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * virtio transport for vsock 4 * 5 * Copyright (C) 2013-2015 Red Hat, Inc. 6 * Author: Asias He <asias@redhat.com> 7 * Stefan Hajnoczi <stefanha@redhat.com> 8 * 9 * Some of the code is take from Gerd Hoffmann <kraxel@redhat.com>'s 10 * early virtio-vsock proof-of-concept bits. 11 */ 12#include <linux/spinlock.h> 13#include <linux/module.h> 14#include <linux/list.h> 15#include <linux/atomic.h> 16#include <linux/virtio.h> 17#include <linux/virtio_ids.h> 18#include <linux/virtio_config.h> 19#include <linux/virtio_vsock.h> 20#include <linux/dma-mapping.h> 21#include <net/sock.h> 22#include <linux/mutex.h> 23#include <net/af_vsock.h> 24 25static struct workqueue_struct *virtio_vsock_workqueue; 26static struct virtio_vsock __rcu *the_virtio_vsock; 27static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */ 28static struct virtio_transport virtio_transport; /* forward declaration */ 29 30struct virtio_vsock { 31 struct virtio_device *vdev; 32 struct virtqueue *vqs[VSOCK_VQ_MAX]; 33 34 /* Virtqueue processing is deferred to a workqueue */ 35 struct work_struct tx_work; 36 struct work_struct rx_work; 37 struct work_struct event_work; 38 39 /* The following fields are protected by tx_lock. vqs[VSOCK_VQ_TX] 40 * must be accessed with tx_lock held. 41 */ 42 struct mutex tx_lock; 43 bool tx_run; 44 45 struct work_struct send_pkt_work; 46 struct sk_buff_head send_pkt_queue; 47 48 atomic_t queued_replies; 49 50 /* The following fields are protected by rx_lock. vqs[VSOCK_VQ_RX] 51 * must be accessed with rx_lock held. 52 */ 53 struct mutex rx_lock; 54 bool rx_run; 55 int rx_buf_nr; 56 int rx_buf_max_nr; 57 58 u32 guest_cid; 59 bool seqpacket_allow; 60 61 /* These fields are used only in tx path in function 62 * 'virtio_transport_send_pkt_work()', so to save 63 * stack space in it, place both of them here. Each 64 * pointer from 'out_sgs' points to the corresponding 65 * element in 'out_bufs' - this is initialized in 66 * 'virtio_vsock_probe()'. Both fields are protected 67 * by 'tx_lock'. +1 is needed for packet header. 68 */ 69 struct scatterlist *out_sgs[MAX_SKB_FRAGS + 1]; 70 struct scatterlist out_bufs[MAX_SKB_FRAGS + 1]; 71 72 /* The following fields are protected by event_lock. 73 * vqs[VSOCK_VQ_EVENT] must be accessed with event_lock held. 74 */ 75 struct mutex event_lock; 76 bool event_run; 77 __dma_from_device_group_begin(); 78 struct virtio_vsock_event event_list[8]; 79 __dma_from_device_group_end(); 80}; 81 82static u32 virtio_transport_get_local_cid(void) 83{ 84 struct virtio_vsock *vsock; 85 u32 ret; 86 87 rcu_read_lock(); 88 vsock = rcu_dereference(the_virtio_vsock); 89 if (!vsock) { 90 ret = VMADDR_CID_ANY; 91 goto out_rcu; 92 } 93 94 ret = vsock->guest_cid; 95out_rcu: 96 rcu_read_unlock(); 97 return ret; 98} 99 100/* Caller need to hold vsock->tx_lock on vq */ 101static int virtio_transport_send_skb(struct sk_buff *skb, struct virtqueue *vq, 102 struct virtio_vsock *vsock, gfp_t gfp) 103{ 104 int ret, in_sg = 0, out_sg = 0; 105 struct scatterlist **sgs; 106 107 sgs = vsock->out_sgs; 108 sg_init_one(sgs[out_sg], virtio_vsock_hdr(skb), 109 sizeof(*virtio_vsock_hdr(skb))); 110 out_sg++; 111 112 if (!skb_is_nonlinear(skb)) { 113 if (skb->len > 0) { 114 sg_init_one(sgs[out_sg], skb->data, skb->len); 115 out_sg++; 116 } 117 } else { 118 struct skb_shared_info *si; 119 int i; 120 121 /* If skb is nonlinear, then its buffer must contain 122 * only header and nothing more. Data is stored in 123 * the fragged part. 124 */ 125 WARN_ON_ONCE(skb_headroom(skb) != sizeof(*virtio_vsock_hdr(skb))); 126 127 si = skb_shinfo(skb); 128 129 for (i = 0; i < si->nr_frags; i++) { 130 skb_frag_t *skb_frag = &si->frags[i]; 131 void *va; 132 133 /* We will use 'page_to_virt()' for the userspace page 134 * here, because virtio or dma-mapping layers will call 135 * 'virt_to_phys()' later to fill the buffer descriptor. 136 * We don't touch memory at "virtual" address of this page. 137 */ 138 va = page_to_virt(skb_frag_page(skb_frag)); 139 sg_init_one(sgs[out_sg], 140 va + skb_frag_off(skb_frag), 141 skb_frag_size(skb_frag)); 142 out_sg++; 143 } 144 } 145 146 ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, skb, gfp); 147 /* Usually this means that there is no more space available in 148 * the vq 149 */ 150 if (ret < 0) 151 return ret; 152 153 virtio_transport_deliver_tap_pkt(skb); 154 return 0; 155} 156 157static void 158virtio_transport_send_pkt_work(struct work_struct *work) 159{ 160 struct virtio_vsock *vsock = 161 container_of(work, struct virtio_vsock, send_pkt_work); 162 struct virtqueue *vq; 163 bool added = false; 164 bool restart_rx = false; 165 166 mutex_lock(&vsock->tx_lock); 167 168 if (!vsock->tx_run) 169 goto out; 170 171 vq = vsock->vqs[VSOCK_VQ_TX]; 172 173 for (;;) { 174 struct sk_buff *skb; 175 bool reply; 176 int ret; 177 178 skb = virtio_vsock_skb_dequeue(&vsock->send_pkt_queue); 179 if (!skb) 180 break; 181 182 reply = virtio_vsock_skb_reply(skb); 183 184 ret = virtio_transport_send_skb(skb, vq, vsock, GFP_KERNEL); 185 if (ret < 0) { 186 virtio_vsock_skb_queue_head(&vsock->send_pkt_queue, skb); 187 break; 188 } 189 190 if (reply) { 191 struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX]; 192 int val; 193 194 val = atomic_dec_return(&vsock->queued_replies); 195 196 /* Do we now have resources to resume rx processing? */ 197 if (val + 1 == virtqueue_get_vring_size(rx_vq)) 198 restart_rx = true; 199 } 200 201 added = true; 202 } 203 204 if (added) 205 virtqueue_kick(vq); 206 207out: 208 mutex_unlock(&vsock->tx_lock); 209 210 if (restart_rx) 211 queue_work(virtio_vsock_workqueue, &vsock->rx_work); 212} 213 214/* Caller need to hold RCU for vsock. 215 * Returns 0 if the packet is successfully put on the vq. 216 */ 217static int virtio_transport_send_skb_fast_path(struct virtio_vsock *vsock, struct sk_buff *skb) 218{ 219 struct virtqueue *vq = vsock->vqs[VSOCK_VQ_TX]; 220 int ret; 221 222 /* Inside RCU, can't sleep! */ 223 ret = mutex_trylock(&vsock->tx_lock); 224 if (unlikely(ret == 0)) 225 return -EBUSY; 226 227 ret = virtio_transport_send_skb(skb, vq, vsock, GFP_ATOMIC); 228 if (ret == 0) 229 virtqueue_kick(vq); 230 231 mutex_unlock(&vsock->tx_lock); 232 233 return ret; 234} 235 236static int 237virtio_transport_send_pkt(struct sk_buff *skb, struct net *net) 238{ 239 struct virtio_vsock_hdr *hdr; 240 struct virtio_vsock *vsock; 241 int len = skb->len; 242 243 hdr = virtio_vsock_hdr(skb); 244 245 rcu_read_lock(); 246 vsock = rcu_dereference(the_virtio_vsock); 247 if (!vsock) { 248 kfree_skb(skb); 249 len = -ENODEV; 250 goto out_rcu; 251 } 252 253 if (le64_to_cpu(hdr->dst_cid) == vsock->guest_cid) { 254 kfree_skb(skb); 255 len = -ENODEV; 256 goto out_rcu; 257 } 258 259 /* If send_pkt_queue is empty, we can safely bypass this queue 260 * because packet order is maintained and (try) to put the packet 261 * on the virtqueue using virtio_transport_send_skb_fast_path. 262 * If this fails we simply put the packet on the intermediate 263 * queue and schedule the worker. 264 */ 265 if (!skb_queue_empty_lockless(&vsock->send_pkt_queue) || 266 virtio_transport_send_skb_fast_path(vsock, skb)) { 267 if (virtio_vsock_skb_reply(skb)) 268 atomic_inc(&vsock->queued_replies); 269 270 virtio_vsock_skb_queue_tail(&vsock->send_pkt_queue, skb); 271 queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work); 272 } 273 274out_rcu: 275 rcu_read_unlock(); 276 return len; 277} 278 279static int 280virtio_transport_cancel_pkt(struct vsock_sock *vsk) 281{ 282 struct virtio_vsock *vsock; 283 int cnt = 0, ret; 284 285 rcu_read_lock(); 286 vsock = rcu_dereference(the_virtio_vsock); 287 if (!vsock) { 288 ret = -ENODEV; 289 goto out_rcu; 290 } 291 292 cnt = virtio_transport_purge_skbs(vsk, &vsock->send_pkt_queue); 293 294 if (cnt) { 295 struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX]; 296 int new_cnt; 297 298 new_cnt = atomic_sub_return(cnt, &vsock->queued_replies); 299 if (new_cnt + cnt >= virtqueue_get_vring_size(rx_vq) && 300 new_cnt < virtqueue_get_vring_size(rx_vq)) 301 queue_work(virtio_vsock_workqueue, &vsock->rx_work); 302 } 303 304 ret = 0; 305 306out_rcu: 307 rcu_read_unlock(); 308 return ret; 309} 310 311static void virtio_vsock_rx_fill(struct virtio_vsock *vsock) 312{ 313 int total_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE; 314 struct scatterlist pkt, *p; 315 struct virtqueue *vq; 316 struct sk_buff *skb; 317 int ret; 318 319 vq = vsock->vqs[VSOCK_VQ_RX]; 320 321 do { 322 skb = virtio_vsock_alloc_linear_skb(total_len, GFP_KERNEL); 323 if (!skb) 324 break; 325 326 memset(skb->head, 0, VIRTIO_VSOCK_SKB_HEADROOM); 327 sg_init_one(&pkt, virtio_vsock_hdr(skb), total_len); 328 p = &pkt; 329 ret = virtqueue_add_sgs(vq, &p, 0, 1, skb, GFP_KERNEL); 330 if (ret < 0) { 331 kfree_skb(skb); 332 break; 333 } 334 335 vsock->rx_buf_nr++; 336 } while (vq->num_free); 337 if (vsock->rx_buf_nr > vsock->rx_buf_max_nr) 338 vsock->rx_buf_max_nr = vsock->rx_buf_nr; 339 virtqueue_kick(vq); 340} 341 342static void virtio_transport_tx_work(struct work_struct *work) 343{ 344 struct virtio_vsock *vsock = 345 container_of(work, struct virtio_vsock, tx_work); 346 struct virtqueue *vq; 347 bool added = false; 348 349 vq = vsock->vqs[VSOCK_VQ_TX]; 350 mutex_lock(&vsock->tx_lock); 351 352 if (!vsock->tx_run) 353 goto out; 354 355 do { 356 struct sk_buff *skb; 357 unsigned int len; 358 359 virtqueue_disable_cb(vq); 360 while ((skb = virtqueue_get_buf(vq, &len)) != NULL) { 361 virtio_transport_consume_skb_sent(skb, true); 362 added = true; 363 } 364 } while (!virtqueue_enable_cb(vq)); 365 366out: 367 mutex_unlock(&vsock->tx_lock); 368 369 if (added) 370 queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work); 371} 372 373/* Is there space left for replies to rx packets? */ 374static bool virtio_transport_more_replies(struct virtio_vsock *vsock) 375{ 376 struct virtqueue *vq = vsock->vqs[VSOCK_VQ_RX]; 377 int val; 378 379 smp_rmb(); /* paired with atomic_inc() and atomic_dec_return() */ 380 val = atomic_read(&vsock->queued_replies); 381 382 return val < virtqueue_get_vring_size(vq); 383} 384 385/* event_lock must be held */ 386static int virtio_vsock_event_fill_one(struct virtio_vsock *vsock, 387 struct virtio_vsock_event *event) 388{ 389 struct scatterlist sg; 390 struct virtqueue *vq; 391 392 vq = vsock->vqs[VSOCK_VQ_EVENT]; 393 394 sg_init_one(&sg, event, sizeof(*event)); 395 396 return virtqueue_add_inbuf_cache_clean(vq, &sg, 1, event, GFP_KERNEL); 397} 398 399/* event_lock must be held */ 400static void virtio_vsock_event_fill(struct virtio_vsock *vsock) 401{ 402 size_t i; 403 404 for (i = 0; i < ARRAY_SIZE(vsock->event_list); i++) { 405 struct virtio_vsock_event *event = &vsock->event_list[i]; 406 407 virtio_vsock_event_fill_one(vsock, event); 408 } 409 410 virtqueue_kick(vsock->vqs[VSOCK_VQ_EVENT]); 411} 412 413static void virtio_vsock_reset_sock(struct sock *sk) 414{ 415 /* vmci_transport.c doesn't take sk_lock here either. At least we're 416 * under vsock_table_lock so the sock cannot disappear while we're 417 * executing. 418 */ 419 420 sk->sk_state = TCP_CLOSE; 421 sk->sk_err = ECONNRESET; 422 sk_error_report(sk); 423} 424 425static void virtio_vsock_update_guest_cid(struct virtio_vsock *vsock) 426{ 427 struct virtio_device *vdev = vsock->vdev; 428 __le64 guest_cid; 429 430 vdev->config->get(vdev, offsetof(struct virtio_vsock_config, guest_cid), 431 &guest_cid, sizeof(guest_cid)); 432 vsock->guest_cid = le64_to_cpu(guest_cid); 433} 434 435/* event_lock must be held */ 436static void virtio_vsock_event_handle(struct virtio_vsock *vsock, 437 struct virtio_vsock_event *event) 438{ 439 switch (le32_to_cpu(event->id)) { 440 case VIRTIO_VSOCK_EVENT_TRANSPORT_RESET: 441 virtio_vsock_update_guest_cid(vsock); 442 vsock_for_each_connected_socket(&virtio_transport.transport, 443 virtio_vsock_reset_sock); 444 break; 445 } 446} 447 448static void virtio_transport_event_work(struct work_struct *work) 449{ 450 struct virtio_vsock *vsock = 451 container_of(work, struct virtio_vsock, event_work); 452 struct virtqueue *vq; 453 454 vq = vsock->vqs[VSOCK_VQ_EVENT]; 455 456 mutex_lock(&vsock->event_lock); 457 458 if (!vsock->event_run) 459 goto out; 460 461 do { 462 struct virtio_vsock_event *event; 463 unsigned int len; 464 465 virtqueue_disable_cb(vq); 466 while ((event = virtqueue_get_buf(vq, &len)) != NULL) { 467 if (len == sizeof(*event)) 468 virtio_vsock_event_handle(vsock, event); 469 470 virtio_vsock_event_fill_one(vsock, event); 471 } 472 } while (!virtqueue_enable_cb(vq)); 473 474 virtqueue_kick(vsock->vqs[VSOCK_VQ_EVENT]); 475out: 476 mutex_unlock(&vsock->event_lock); 477} 478 479static void virtio_vsock_event_done(struct virtqueue *vq) 480{ 481 struct virtio_vsock *vsock = vq->vdev->priv; 482 483 if (!vsock) 484 return; 485 queue_work(virtio_vsock_workqueue, &vsock->event_work); 486} 487 488static void virtio_vsock_tx_done(struct virtqueue *vq) 489{ 490 struct virtio_vsock *vsock = vq->vdev->priv; 491 492 if (!vsock) 493 return; 494 queue_work(virtio_vsock_workqueue, &vsock->tx_work); 495} 496 497static void virtio_vsock_rx_done(struct virtqueue *vq) 498{ 499 struct virtio_vsock *vsock = vq->vdev->priv; 500 501 if (!vsock) 502 return; 503 queue_work(virtio_vsock_workqueue, &vsock->rx_work); 504} 505 506static bool virtio_transport_can_msgzerocopy(int bufs_num) 507{ 508 struct virtio_vsock *vsock; 509 bool res = false; 510 511 rcu_read_lock(); 512 513 vsock = rcu_dereference(the_virtio_vsock); 514 if (vsock) { 515 struct virtqueue *vq = vsock->vqs[VSOCK_VQ_TX]; 516 517 /* Check that tx queue is large enough to keep whole 518 * data to send. This is needed, because when there is 519 * not enough free space in the queue, current skb to 520 * send will be reinserted to the head of tx list of 521 * the socket to retry transmission later, so if skb 522 * is bigger than whole queue, it will be reinserted 523 * again and again, thus blocking other skbs to be sent. 524 * Each page of the user provided buffer will be added 525 * as a single buffer to the tx virtqueue, so compare 526 * number of pages against maximum capacity of the queue. 527 */ 528 if (bufs_num <= vq->num_max) 529 res = true; 530 } 531 532 rcu_read_unlock(); 533 534 return res; 535} 536 537static bool virtio_transport_msgzerocopy_allow(void) 538{ 539 return true; 540} 541 542bool virtio_transport_stream_allow(struct vsock_sock *vsk, u32 cid, u32 port) 543{ 544 return vsock_net_mode_global(vsk); 545} 546 547static bool virtio_transport_seqpacket_allow(struct vsock_sock *vsk, 548 u32 remote_cid); 549 550static bool virtio_transport_has_remote_cid(struct vsock_sock *vsk, u32 cid) 551{ 552 /* The CID could be implemented by the host. Always assume it is. */ 553 return true; 554} 555 556static struct virtio_transport virtio_transport = { 557 .transport = { 558 .module = THIS_MODULE, 559 560 .get_local_cid = virtio_transport_get_local_cid, 561 .has_remote_cid = virtio_transport_has_remote_cid, 562 563 .init = virtio_transport_do_socket_init, 564 .destruct = virtio_transport_destruct, 565 .release = virtio_transport_release, 566 .connect = virtio_transport_connect, 567 .shutdown = virtio_transport_shutdown, 568 .cancel_pkt = virtio_transport_cancel_pkt, 569 570 .dgram_bind = virtio_transport_dgram_bind, 571 .dgram_dequeue = virtio_transport_dgram_dequeue, 572 .dgram_enqueue = virtio_transport_dgram_enqueue, 573 .dgram_allow = virtio_transport_dgram_allow, 574 575 .stream_dequeue = virtio_transport_stream_dequeue, 576 .stream_enqueue = virtio_transport_stream_enqueue, 577 .stream_has_data = virtio_transport_stream_has_data, 578 .stream_has_space = virtio_transport_stream_has_space, 579 .stream_rcvhiwat = virtio_transport_stream_rcvhiwat, 580 .stream_is_active = virtio_transport_stream_is_active, 581 .stream_allow = virtio_transport_stream_allow, 582 583 .seqpacket_dequeue = virtio_transport_seqpacket_dequeue, 584 .seqpacket_enqueue = virtio_transport_seqpacket_enqueue, 585 .seqpacket_allow = virtio_transport_seqpacket_allow, 586 .seqpacket_has_data = virtio_transport_seqpacket_has_data, 587 588 .msgzerocopy_allow = virtio_transport_msgzerocopy_allow, 589 590 .notify_poll_in = virtio_transport_notify_poll_in, 591 .notify_poll_out = virtio_transport_notify_poll_out, 592 .notify_recv_init = virtio_transport_notify_recv_init, 593 .notify_recv_pre_block = virtio_transport_notify_recv_pre_block, 594 .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue, 595 .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue, 596 .notify_send_init = virtio_transport_notify_send_init, 597 .notify_send_pre_block = virtio_transport_notify_send_pre_block, 598 .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, 599 .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, 600 .notify_buffer_size = virtio_transport_notify_buffer_size, 601 .notify_set_rcvlowat = virtio_transport_notify_set_rcvlowat, 602 603 .unsent_bytes = virtio_transport_unsent_bytes, 604 605 .read_skb = virtio_transport_read_skb, 606 }, 607 608 .send_pkt = virtio_transport_send_pkt, 609 .can_msgzerocopy = virtio_transport_can_msgzerocopy, 610}; 611 612static bool 613virtio_transport_seqpacket_allow(struct vsock_sock *vsk, u32 remote_cid) 614{ 615 struct virtio_vsock *vsock; 616 bool seqpacket_allow; 617 618 if (!vsock_net_mode_global(vsk)) 619 return false; 620 621 seqpacket_allow = false; 622 rcu_read_lock(); 623 vsock = rcu_dereference(the_virtio_vsock); 624 if (vsock) 625 seqpacket_allow = vsock->seqpacket_allow; 626 rcu_read_unlock(); 627 628 return seqpacket_allow; 629} 630 631static void virtio_transport_rx_work(struct work_struct *work) 632{ 633 struct virtio_vsock *vsock = 634 container_of(work, struct virtio_vsock, rx_work); 635 struct virtqueue *vq; 636 637 vq = vsock->vqs[VSOCK_VQ_RX]; 638 639 mutex_lock(&vsock->rx_lock); 640 641 if (!vsock->rx_run) 642 goto out; 643 644 do { 645 virtqueue_disable_cb(vq); 646 for (;;) { 647 unsigned int len, payload_len; 648 struct virtio_vsock_hdr *hdr; 649 struct sk_buff *skb; 650 651 if (!virtio_transport_more_replies(vsock)) { 652 /* Stop rx until the device processes already 653 * pending replies. Leave rx virtqueue 654 * callbacks disabled. 655 */ 656 goto out; 657 } 658 659 skb = virtqueue_get_buf(vq, &len); 660 if (!skb) 661 break; 662 663 vsock->rx_buf_nr--; 664 665 /* Drop short/long packets */ 666 if (unlikely(len < sizeof(*hdr) || 667 len > virtio_vsock_skb_len(skb))) { 668 kfree_skb(skb); 669 continue; 670 } 671 672 hdr = virtio_vsock_hdr(skb); 673 payload_len = le32_to_cpu(hdr->len); 674 if (unlikely(payload_len > len - sizeof(*hdr))) { 675 kfree_skb(skb); 676 continue; 677 } 678 679 if (payload_len) 680 virtio_vsock_skb_put(skb, payload_len); 681 682 virtio_transport_deliver_tap_pkt(skb); 683 684 /* Force virtio-transport into global mode since it 685 * does not yet support local-mode namespacing. 686 */ 687 virtio_transport_recv_pkt(&virtio_transport, skb, NULL); 688 } 689 } while (!virtqueue_enable_cb(vq)); 690 691out: 692 if (vsock->rx_buf_nr < vsock->rx_buf_max_nr / 2) 693 virtio_vsock_rx_fill(vsock); 694 mutex_unlock(&vsock->rx_lock); 695} 696 697static int virtio_vsock_vqs_init(struct virtio_vsock *vsock) 698{ 699 struct virtio_device *vdev = vsock->vdev; 700 struct virtqueue_info vqs_info[] = { 701 { "rx", virtio_vsock_rx_done }, 702 { "tx", virtio_vsock_tx_done }, 703 { "event", virtio_vsock_event_done }, 704 }; 705 int ret; 706 707 mutex_lock(&vsock->rx_lock); 708 vsock->rx_buf_nr = 0; 709 vsock->rx_buf_max_nr = 0; 710 mutex_unlock(&vsock->rx_lock); 711 712 atomic_set(&vsock->queued_replies, 0); 713 714 ret = virtio_find_vqs(vdev, VSOCK_VQ_MAX, vsock->vqs, vqs_info, NULL); 715 if (ret < 0) 716 return ret; 717 718 virtio_vsock_update_guest_cid(vsock); 719 720 virtio_device_ready(vdev); 721 722 return 0; 723} 724 725static void virtio_vsock_vqs_start(struct virtio_vsock *vsock) 726{ 727 mutex_lock(&vsock->tx_lock); 728 vsock->tx_run = true; 729 mutex_unlock(&vsock->tx_lock); 730 731 mutex_lock(&vsock->rx_lock); 732 virtio_vsock_rx_fill(vsock); 733 vsock->rx_run = true; 734 mutex_unlock(&vsock->rx_lock); 735 736 mutex_lock(&vsock->event_lock); 737 virtio_vsock_event_fill(vsock); 738 vsock->event_run = true; 739 mutex_unlock(&vsock->event_lock); 740 741 /* virtio_transport_send_pkt() can queue packets once 742 * the_virtio_vsock is set, but they won't be processed until 743 * vsock->tx_run is set to true. We queue vsock->send_pkt_work 744 * when initialization finishes to send those packets queued 745 * earlier. 746 * We don't need to queue the other workers (rx, event) because 747 * as long as we don't fill the queues with empty buffers, the 748 * host can't send us any notification. 749 */ 750 queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work); 751} 752 753static void virtio_vsock_vqs_del(struct virtio_vsock *vsock) 754{ 755 struct virtio_device *vdev = vsock->vdev; 756 struct sk_buff *skb; 757 758 /* Reset all connected sockets when the VQs disappear */ 759 vsock_for_each_connected_socket(&virtio_transport.transport, 760 virtio_vsock_reset_sock); 761 762 /* Stop all work handlers to make sure no one is accessing the device, 763 * so we can safely call virtio_reset_device(). 764 */ 765 mutex_lock(&vsock->rx_lock); 766 vsock->rx_run = false; 767 mutex_unlock(&vsock->rx_lock); 768 769 mutex_lock(&vsock->tx_lock); 770 vsock->tx_run = false; 771 mutex_unlock(&vsock->tx_lock); 772 773 mutex_lock(&vsock->event_lock); 774 vsock->event_run = false; 775 mutex_unlock(&vsock->event_lock); 776 777 /* Flush all device writes and interrupts, device will not use any 778 * more buffers. 779 */ 780 virtio_reset_device(vdev); 781 782 mutex_lock(&vsock->rx_lock); 783 while ((skb = virtqueue_detach_unused_buf(vsock->vqs[VSOCK_VQ_RX]))) 784 kfree_skb(skb); 785 mutex_unlock(&vsock->rx_lock); 786 787 mutex_lock(&vsock->tx_lock); 788 while ((skb = virtqueue_detach_unused_buf(vsock->vqs[VSOCK_VQ_TX]))) 789 kfree_skb(skb); 790 mutex_unlock(&vsock->tx_lock); 791 792 virtio_vsock_skb_queue_purge(&vsock->send_pkt_queue); 793 794 /* Delete virtqueues and flush outstanding callbacks if any */ 795 vdev->config->del_vqs(vdev); 796} 797 798static int virtio_vsock_probe(struct virtio_device *vdev) 799{ 800 struct virtio_vsock *vsock = NULL; 801 int ret; 802 int i; 803 804 ret = mutex_lock_interruptible(&the_virtio_vsock_mutex); 805 if (ret) 806 return ret; 807 808 /* Only one virtio-vsock device per guest is supported */ 809 if (rcu_dereference_protected(the_virtio_vsock, 810 lockdep_is_held(&the_virtio_vsock_mutex))) { 811 ret = -EBUSY; 812 goto out; 813 } 814 815 vsock = kzalloc_obj(*vsock); 816 if (!vsock) { 817 ret = -ENOMEM; 818 goto out; 819 } 820 821 vsock->vdev = vdev; 822 823 824 mutex_init(&vsock->tx_lock); 825 mutex_init(&vsock->rx_lock); 826 mutex_init(&vsock->event_lock); 827 skb_queue_head_init(&vsock->send_pkt_queue); 828 INIT_WORK(&vsock->rx_work, virtio_transport_rx_work); 829 INIT_WORK(&vsock->tx_work, virtio_transport_tx_work); 830 INIT_WORK(&vsock->event_work, virtio_transport_event_work); 831 INIT_WORK(&vsock->send_pkt_work, virtio_transport_send_pkt_work); 832 833 if (virtio_has_feature(vdev, VIRTIO_VSOCK_F_SEQPACKET)) 834 vsock->seqpacket_allow = true; 835 836 vdev->priv = vsock; 837 838 ret = virtio_vsock_vqs_init(vsock); 839 if (ret < 0) 840 goto out; 841 842 for (i = 0; i < ARRAY_SIZE(vsock->out_sgs); i++) 843 vsock->out_sgs[i] = &vsock->out_bufs[i]; 844 845 rcu_assign_pointer(the_virtio_vsock, vsock); 846 virtio_vsock_vqs_start(vsock); 847 848 mutex_unlock(&the_virtio_vsock_mutex); 849 850 return 0; 851 852out: 853 kfree(vsock); 854 mutex_unlock(&the_virtio_vsock_mutex); 855 return ret; 856} 857 858static void virtio_vsock_remove(struct virtio_device *vdev) 859{ 860 struct virtio_vsock *vsock = vdev->priv; 861 862 mutex_lock(&the_virtio_vsock_mutex); 863 864 vdev->priv = NULL; 865 rcu_assign_pointer(the_virtio_vsock, NULL); 866 synchronize_rcu(); 867 868 virtio_vsock_vqs_del(vsock); 869 870 /* Other works can be queued before 'config->del_vqs()', so we flush 871 * all works before to free the vsock object to avoid use after free. 872 */ 873 flush_work(&vsock->rx_work); 874 flush_work(&vsock->tx_work); 875 flush_work(&vsock->event_work); 876 flush_work(&vsock->send_pkt_work); 877 878 mutex_unlock(&the_virtio_vsock_mutex); 879 880 kfree(vsock); 881} 882 883#ifdef CONFIG_PM_SLEEP 884static int virtio_vsock_freeze(struct virtio_device *vdev) 885{ 886 struct virtio_vsock *vsock = vdev->priv; 887 888 mutex_lock(&the_virtio_vsock_mutex); 889 890 rcu_assign_pointer(the_virtio_vsock, NULL); 891 synchronize_rcu(); 892 893 virtio_vsock_vqs_del(vsock); 894 895 mutex_unlock(&the_virtio_vsock_mutex); 896 897 return 0; 898} 899 900static int virtio_vsock_restore(struct virtio_device *vdev) 901{ 902 struct virtio_vsock *vsock = vdev->priv; 903 int ret; 904 905 mutex_lock(&the_virtio_vsock_mutex); 906 907 /* Only one virtio-vsock device per guest is supported */ 908 if (rcu_dereference_protected(the_virtio_vsock, 909 lockdep_is_held(&the_virtio_vsock_mutex))) { 910 ret = -EBUSY; 911 goto out; 912 } 913 914 ret = virtio_vsock_vqs_init(vsock); 915 if (ret < 0) 916 goto out; 917 918 rcu_assign_pointer(the_virtio_vsock, vsock); 919 virtio_vsock_vqs_start(vsock); 920 921out: 922 mutex_unlock(&the_virtio_vsock_mutex); 923 return ret; 924} 925#endif /* CONFIG_PM_SLEEP */ 926 927static struct virtio_device_id id_table[] = { 928 { VIRTIO_ID_VSOCK, VIRTIO_DEV_ANY_ID }, 929 { 0 }, 930}; 931 932static unsigned int features[] = { 933 VIRTIO_VSOCK_F_SEQPACKET 934}; 935 936static struct virtio_driver virtio_vsock_driver = { 937 .feature_table = features, 938 .feature_table_size = ARRAY_SIZE(features), 939 .driver.name = KBUILD_MODNAME, 940 .id_table = id_table, 941 .probe = virtio_vsock_probe, 942 .remove = virtio_vsock_remove, 943#ifdef CONFIG_PM_SLEEP 944 .freeze = virtio_vsock_freeze, 945 .restore = virtio_vsock_restore, 946#endif 947}; 948 949static int __init virtio_vsock_init(void) 950{ 951 int ret; 952 953 virtio_vsock_workqueue = alloc_workqueue("virtio_vsock", WQ_PERCPU, 0); 954 if (!virtio_vsock_workqueue) 955 return -ENOMEM; 956 957 ret = vsock_core_register(&virtio_transport.transport, 958 VSOCK_TRANSPORT_F_G2H); 959 if (ret) 960 goto out_wq; 961 962 ret = register_virtio_driver(&virtio_vsock_driver); 963 if (ret) 964 goto out_vci; 965 966 return 0; 967 968out_vci: 969 vsock_core_unregister(&virtio_transport.transport); 970out_wq: 971 destroy_workqueue(virtio_vsock_workqueue); 972 return ret; 973} 974 975static void __exit virtio_vsock_exit(void) 976{ 977 unregister_virtio_driver(&virtio_vsock_driver); 978 vsock_core_unregister(&virtio_transport.transport); 979 destroy_workqueue(virtio_vsock_workqueue); 980} 981 982module_init(virtio_vsock_init); 983module_exit(virtio_vsock_exit); 984MODULE_LICENSE("GPL v2"); 985MODULE_AUTHOR("Asias He"); 986MODULE_DESCRIPTION("virtio transport for vsock"); 987MODULE_DEVICE_TABLE(virtio, id_table);