Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0-or-later
2/* Virtio ring implementation.
3 *
4 * Copyright 2007 Rusty Russell IBM Corporation
5 */
6#include <linux/virtio.h>
7#include <linux/virtio_ring.h>
8#include <linux/virtio_config.h>
9#include <linux/device.h>
10#include <linux/slab.h>
11#include <linux/module.h>
12#include <linux/hrtimer.h>
13#include <linux/dma-mapping.h>
14#include <linux/kmsan.h>
15#include <linux/spinlock.h>
16#include <xen/xen.h>
17
18#ifdef DEBUG
19/* For development, we want to crash whenever the ring is screwed. */
20#define BAD_RING(_vq, fmt, args...) \
21 do { \
22 dev_err(&(_vq)->vq.vdev->dev, \
23 "%s:"fmt, (_vq)->vq.name, ##args); \
24 BUG(); \
25 } while (0)
26/* Caller is supposed to guarantee no reentry. */
27#define START_USE(_vq) \
28 do { \
29 if ((_vq)->in_use) \
30 panic("%s:in_use = %i\n", \
31 (_vq)->vq.name, (_vq)->in_use); \
32 (_vq)->in_use = __LINE__; \
33 } while (0)
34#define END_USE(_vq) \
35 do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
36#define LAST_ADD_TIME_UPDATE(_vq) \
37 do { \
38 ktime_t now = ktime_get(); \
39 \
40 /* No kick or get, with .1 second between? Warn. */ \
41 if ((_vq)->last_add_time_valid) \
42 WARN_ON(ktime_to_ms(ktime_sub(now, \
43 (_vq)->last_add_time)) > 100); \
44 (_vq)->last_add_time = now; \
45 (_vq)->last_add_time_valid = true; \
46 } while (0)
47#define LAST_ADD_TIME_CHECK(_vq) \
48 do { \
49 if ((_vq)->last_add_time_valid) { \
50 WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \
51 (_vq)->last_add_time)) > 100); \
52 } \
53 } while (0)
54#define LAST_ADD_TIME_INVALID(_vq) \
55 ((_vq)->last_add_time_valid = false)
56#else
57#define BAD_RING(_vq, fmt, args...) \
58 do { \
59 dev_err(&_vq->vq.vdev->dev, \
60 "%s:"fmt, (_vq)->vq.name, ##args); \
61 (_vq)->broken = true; \
62 } while (0)
63#define START_USE(vq)
64#define END_USE(vq)
65#define LAST_ADD_TIME_UPDATE(vq)
66#define LAST_ADD_TIME_CHECK(vq)
67#define LAST_ADD_TIME_INVALID(vq)
68#endif
69
70enum vq_layout {
71 VQ_LAYOUT_SPLIT = 0,
72 VQ_LAYOUT_PACKED,
73 VQ_LAYOUT_SPLIT_IN_ORDER,
74 VQ_LAYOUT_PACKED_IN_ORDER,
75};
76
77struct vring_desc_state_split {
78 void *data; /* Data for callback. */
79
80 /* Indirect desc table and extra table, if any. These two will be
81 * allocated together. So we won't stress more to the memory allocator.
82 */
83 struct vring_desc *indir_desc;
84 u32 total_in_len;
85};
86
87struct vring_desc_state_packed {
88 void *data; /* Data for callback. */
89
90 /* Indirect desc table and extra table, if any. These two will be
91 * allocated together. So we won't stress more to the memory allocator.
92 */
93 struct vring_packed_desc *indir_desc;
94 u16 num; /* Descriptor list length. */
95 u16 last; /* The last desc state in a list. */
96 u32 total_in_len; /* In length for the skipped buffer. */
97};
98
99struct vring_desc_extra {
100 dma_addr_t addr; /* Descriptor DMA addr. */
101 u32 len; /* Descriptor length. */
102 u16 flags; /* Descriptor flags. */
103 u16 next; /* The next desc state in a list. */
104};
105
106struct vring_virtqueue_split {
107 /* Actual memory layout for this queue. */
108 struct vring vring;
109
110 /* Last written value to avail->flags */
111 u16 avail_flags_shadow;
112
113 /*
114 * Last written value to avail->idx in
115 * guest byte order.
116 */
117 u16 avail_idx_shadow;
118
119 /* Per-descriptor state. */
120 struct vring_desc_state_split *desc_state;
121 struct vring_desc_extra *desc_extra;
122
123 /* DMA address and size information */
124 dma_addr_t queue_dma_addr;
125 size_t queue_size_in_bytes;
126
127 /*
128 * The parameters for creating vrings are reserved for creating new
129 * vring.
130 */
131 u32 vring_align;
132 bool may_reduce_num;
133};
134
135struct vring_virtqueue_packed {
136 /* Actual memory layout for this queue. */
137 struct {
138 unsigned int num;
139 struct vring_packed_desc *desc;
140 struct vring_packed_desc_event *driver;
141 struct vring_packed_desc_event *device;
142 } vring;
143
144 /* Driver ring wrap counter. */
145 bool avail_wrap_counter;
146
147 /* Avail used flags. */
148 u16 avail_used_flags;
149
150 /* Index of the next avail descriptor. */
151 u16 next_avail_idx;
152
153 /*
154 * Last written value to driver->flags in
155 * guest byte order.
156 */
157 u16 event_flags_shadow;
158
159 /* Per-descriptor state. */
160 struct vring_desc_state_packed *desc_state;
161 struct vring_desc_extra *desc_extra;
162
163 /* DMA address and size information */
164 dma_addr_t ring_dma_addr;
165 dma_addr_t driver_event_dma_addr;
166 dma_addr_t device_event_dma_addr;
167 size_t ring_size_in_bytes;
168 size_t event_size_in_bytes;
169};
170
171struct vring_virtqueue;
172
173struct virtqueue_ops {
174 int (*add)(struct vring_virtqueue *vq, struct scatterlist *sgs[],
175 unsigned int total_sg, unsigned int out_sgs,
176 unsigned int in_sgs, void *data,
177 void *ctx, bool premapped, gfp_t gfp,
178 unsigned long attr);
179 void *(*get)(struct vring_virtqueue *vq, unsigned int *len, void **ctx);
180 bool (*kick_prepare)(struct vring_virtqueue *vq);
181 void (*disable_cb)(struct vring_virtqueue *vq);
182 bool (*enable_cb_delayed)(struct vring_virtqueue *vq);
183 unsigned int (*enable_cb_prepare)(struct vring_virtqueue *vq);
184 bool (*poll)(const struct vring_virtqueue *vq,
185 unsigned int last_used_idx);
186 void *(*detach_unused_buf)(struct vring_virtqueue *vq);
187 bool (*more_used)(const struct vring_virtqueue *vq);
188 int (*resize)(struct vring_virtqueue *vq, u32 num);
189 void (*reset)(struct vring_virtqueue *vq);
190};
191
192struct vring_virtqueue {
193 struct virtqueue vq;
194
195 /* Is DMA API used? */
196 bool use_map_api;
197
198 /* Can we use weak barriers? */
199 bool weak_barriers;
200
201 /* Other side has made a mess, don't try any more. */
202 bool broken;
203
204 /* Host supports indirect buffers */
205 bool indirect;
206
207 /* Host publishes avail event idx */
208 bool event;
209
210 enum vq_layout layout;
211
212 /*
213 * Without IN_ORDER it's the head of free buffer list. With
214 * IN_ORDER and SPLIT, it's the next available buffer
215 * index. With IN_ORDER and PACKED, it's unused.
216 */
217 unsigned int free_head;
218
219 /*
220 * With IN_ORDER, once we see an in-order batch, this stores
221 * this last entry, and until we return the last buffer.
222 * After this, id is set to UINT_MAX to mark it invalid.
223 * Unused without IN_ORDER.
224 */
225 struct used_entry {
226 u32 id;
227 u32 len;
228 } batch_last;
229
230 /* Number we've added since last sync. */
231 unsigned int num_added;
232
233 /* Last used index we've seen.
234 * for split ring, it just contains last used index
235 * for packed ring:
236 * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index.
237 * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter.
238 */
239 u16 last_used_idx;
240
241 /* With IN_ORDER and SPLIT, last descriptor id we used to
242 * detach buffer.
243 */
244 u16 last_used;
245
246 /* Hint for event idx: already triggered no need to disable. */
247 bool event_triggered;
248
249 union {
250 /* Available for split ring */
251 struct vring_virtqueue_split split;
252
253 /* Available for packed ring */
254 struct vring_virtqueue_packed packed;
255 };
256
257 /* How to notify other side. FIXME: commonalize hcalls! */
258 bool (*notify)(struct virtqueue *vq);
259
260 /* DMA, allocation, and size information */
261 bool we_own_ring;
262
263 union virtio_map map;
264
265#ifdef DEBUG
266 /* They're supposed to lock for us. */
267 unsigned int in_use;
268
269 /* Figure out if their kicks are too delayed. */
270 bool last_add_time_valid;
271 ktime_t last_add_time;
272#endif
273};
274
275static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num);
276static void vring_free(struct virtqueue *_vq);
277
278/*
279 * Helpers.
280 */
281
282#define to_vvq(_vq) container_of_const(_vq, struct vring_virtqueue, vq)
283
284
285static inline bool virtqueue_is_packed(const struct vring_virtqueue *vq)
286{
287 return vq->layout == VQ_LAYOUT_PACKED ||
288 vq->layout == VQ_LAYOUT_PACKED_IN_ORDER;
289}
290
291static inline bool virtqueue_is_in_order(const struct vring_virtqueue *vq)
292{
293 return vq->layout == VQ_LAYOUT_SPLIT_IN_ORDER ||
294 vq->layout == VQ_LAYOUT_PACKED_IN_ORDER;
295}
296
297static bool virtqueue_use_indirect(const struct vring_virtqueue *vq,
298 unsigned int total_sg)
299{
300 /*
301 * If the host supports indirect descriptor tables, and we have multiple
302 * buffers, then go indirect. FIXME: tune this threshold
303 */
304 return (vq->indirect && total_sg > 1 && vq->vq.num_free);
305}
306
307/*
308 * Modern virtio devices have feature bits to specify whether they need a
309 * quirk and bypass the IOMMU. If not there, just use the DMA API.
310 *
311 * If there, the interaction between virtio and DMA API is messy.
312 *
313 * On most systems with virtio, physical addresses match bus addresses,
314 * and it doesn't particularly matter whether we use the DMA API.
315 *
316 * On some systems, including Xen and any system with a physical device
317 * that speaks virtio behind a physical IOMMU, we must use the DMA API
318 * for virtio DMA to work at all.
319 *
320 * On other systems, including SPARC and PPC64, virtio-pci devices are
321 * enumerated as though they are behind an IOMMU, but the virtio host
322 * ignores the IOMMU, so we must either pretend that the IOMMU isn't
323 * there or somehow map everything as the identity.
324 *
325 * For the time being, we preserve historic behavior and bypass the DMA
326 * API.
327 *
328 * TODO: install a per-device DMA ops structure that does the right thing
329 * taking into account all the above quirks, and use the DMA API
330 * unconditionally on data path.
331 */
332
333static bool vring_use_map_api(const struct virtio_device *vdev)
334{
335 if (!virtio_has_dma_quirk(vdev))
336 return true;
337
338 /* Otherwise, we are left to guess. */
339 /*
340 * In theory, it's possible to have a buggy QEMU-supposed
341 * emulated Q35 IOMMU and Xen enabled at the same time. On
342 * such a configuration, virtio has never worked and will
343 * not work without an even larger kludge. Instead, enable
344 * the DMA API if we're a Xen guest, which at least allows
345 * all of the sensible Xen configurations to work correctly.
346 */
347 if (xen_domain())
348 return true;
349
350 return false;
351}
352
353static bool vring_need_unmap_buffer(const struct vring_virtqueue *vring,
354 const struct vring_desc_extra *extra)
355{
356 return vring->use_map_api && (extra->addr != DMA_MAPPING_ERROR);
357}
358
359size_t virtio_max_dma_size(const struct virtio_device *vdev)
360{
361 size_t max_segment_size = SIZE_MAX;
362
363 if (vring_use_map_api(vdev)) {
364 if (vdev->map) {
365 max_segment_size =
366 vdev->map->max_mapping_size(vdev->vmap);
367 } else
368 max_segment_size =
369 dma_max_mapping_size(vdev->dev.parent);
370 }
371
372 return max_segment_size;
373}
374EXPORT_SYMBOL_GPL(virtio_max_dma_size);
375
376static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
377 dma_addr_t *map_handle, gfp_t flag,
378 union virtio_map map)
379{
380 if (vring_use_map_api(vdev)) {
381 return virtqueue_map_alloc_coherent(vdev, map, size,
382 map_handle, flag);
383 } else {
384 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
385
386 if (queue) {
387 phys_addr_t phys_addr = virt_to_phys(queue);
388 *map_handle = (dma_addr_t)phys_addr;
389
390 /*
391 * Sanity check: make sure we dind't truncate
392 * the address. The only arches I can find that
393 * have 64-bit phys_addr_t but 32-bit dma_addr_t
394 * are certain non-highmem MIPS and x86
395 * configurations, but these configurations
396 * should never allocate physical pages above 32
397 * bits, so this is fine. Just in case, throw a
398 * warning and abort if we end up with an
399 * unrepresentable address.
400 */
401 if (WARN_ON_ONCE(*map_handle != phys_addr)) {
402 free_pages_exact(queue, PAGE_ALIGN(size));
403 return NULL;
404 }
405 }
406 return queue;
407 }
408}
409
410static void vring_free_queue(struct virtio_device *vdev, size_t size,
411 void *queue, dma_addr_t map_handle,
412 union virtio_map map)
413{
414 if (vring_use_map_api(vdev))
415 virtqueue_map_free_coherent(vdev, map, size,
416 queue, map_handle);
417 else
418 free_pages_exact(queue, PAGE_ALIGN(size));
419}
420
421/*
422 * The DMA ops on various arches are rather gnarly right now, and
423 * making all of the arch DMA ops work on the vring device itself
424 * is a mess.
425 */
426static struct device *vring_dma_dev(const struct vring_virtqueue *vq)
427{
428 return vq->map.dma_dev;
429}
430
431static int vring_mapping_error(const struct vring_virtqueue *vq,
432 dma_addr_t addr)
433{
434 struct virtio_device *vdev = vq->vq.vdev;
435
436 if (!vq->use_map_api)
437 return 0;
438
439 if (vdev->map)
440 return vdev->map->mapping_error(vq->map, addr);
441 else
442 return dma_mapping_error(vring_dma_dev(vq), addr);
443}
444
445/* Map one sg entry. */
446static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg,
447 enum dma_data_direction direction, dma_addr_t *addr,
448 u32 *len, bool premapped, unsigned long attr)
449{
450 if (premapped) {
451 *addr = sg_dma_address(sg);
452 *len = sg_dma_len(sg);
453 return 0;
454 }
455
456 *len = sg->length;
457
458 if (!vq->use_map_api) {
459 /*
460 * If DMA is not used, KMSAN doesn't know that the scatterlist
461 * is initialized by the hardware. Explicitly check/unpoison it
462 * depending on the direction.
463 */
464 kmsan_handle_dma(sg_phys(sg), sg->length, direction);
465 *addr = (dma_addr_t)sg_phys(sg);
466 return 0;
467 }
468
469 /*
470 * We can't use dma_map_sg, because we don't use scatterlists in
471 * the way it expects (we don't guarantee that the scatterlist
472 * will exist for the lifetime of the mapping).
473 */
474 *addr = virtqueue_map_page_attrs(&vq->vq, sg_page(sg),
475 sg->offset, sg->length,
476 direction, attr);
477
478 if (vring_mapping_error(vq, *addr))
479 return -ENOMEM;
480
481 return 0;
482}
483
484static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
485 void *cpu_addr, size_t size,
486 enum dma_data_direction direction)
487{
488 if (!vq->use_map_api)
489 return (dma_addr_t)virt_to_phys(cpu_addr);
490
491 return virtqueue_map_single_attrs(&vq->vq, cpu_addr,
492 size, direction, 0);
493}
494
495static void virtqueue_init(struct vring_virtqueue *vq, u32 num)
496{
497 vq->vq.num_free = num;
498
499 if (virtqueue_is_packed(vq))
500 vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR);
501 else
502 vq->last_used_idx = 0;
503
504 vq->last_used = 0;
505
506 vq->event_triggered = false;
507 vq->num_added = 0;
508
509#ifdef DEBUG
510 vq->in_use = false;
511 vq->last_add_time_valid = false;
512#endif
513}
514
515
516/*
517 * Split ring specific functions - *_split().
518 */
519
520static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
521 struct vring_desc_extra *extra)
522{
523 u16 flags;
524
525 flags = extra->flags;
526
527 if (flags & VRING_DESC_F_INDIRECT) {
528 if (!vq->use_map_api)
529 goto out;
530 } else if (!vring_need_unmap_buffer(vq, extra))
531 goto out;
532
533 virtqueue_unmap_page_attrs(&vq->vq,
534 extra->addr,
535 extra->len,
536 (flags & VRING_DESC_F_WRITE) ?
537 DMA_FROM_DEVICE : DMA_TO_DEVICE,
538 0);
539
540out:
541 return extra->next;
542}
543
544static struct vring_desc *alloc_indirect_split(struct vring_virtqueue *vq,
545 unsigned int total_sg,
546 gfp_t gfp)
547{
548 struct vring_desc_extra *extra;
549 struct vring_desc *desc;
550 unsigned int i, size;
551
552 /*
553 * We require lowmem mappings for the descriptors because
554 * otherwise virt_to_phys will give us bogus addresses in the
555 * virtqueue.
556 */
557 gfp &= ~__GFP_HIGHMEM;
558
559 size = sizeof(*desc) * total_sg + sizeof(*extra) * total_sg;
560
561 desc = kmalloc(size, gfp);
562 if (!desc)
563 return NULL;
564
565 extra = (struct vring_desc_extra *)&desc[total_sg];
566
567 for (i = 0; i < total_sg; i++)
568 extra[i].next = i + 1;
569
570 return desc;
571}
572
573static inline unsigned int virtqueue_add_desc_split(struct vring_virtqueue *vq,
574 struct vring_desc *desc,
575 struct vring_desc_extra *extra,
576 unsigned int i,
577 dma_addr_t addr,
578 unsigned int len,
579 u16 flags, bool premapped)
580{
581 struct virtio_device *vdev = vq->vq.vdev;
582 u16 next;
583
584 desc[i].flags = cpu_to_virtio16(vdev, flags);
585 desc[i].addr = cpu_to_virtio64(vdev, addr);
586 desc[i].len = cpu_to_virtio32(vdev, len);
587
588 extra[i].addr = premapped ? DMA_MAPPING_ERROR : addr;
589 extra[i].len = len;
590 extra[i].flags = flags;
591
592 next = extra[i].next;
593
594 desc[i].next = cpu_to_virtio16(vdev, next);
595
596 return next;
597}
598
599static inline int virtqueue_add_split(struct vring_virtqueue *vq,
600 struct scatterlist *sgs[],
601 unsigned int total_sg,
602 unsigned int out_sgs,
603 unsigned int in_sgs,
604 void *data,
605 void *ctx,
606 bool premapped,
607 gfp_t gfp,
608 unsigned long attr)
609{
610 struct vring_desc_extra *extra;
611 struct scatterlist *sg;
612 struct vring_desc *desc;
613 unsigned int i, n, avail, descs_used, err_idx, sg_count = 0;
614 /* Total length for in-order */
615 unsigned int total_in_len = 0;
616 int head;
617 bool indirect;
618
619 START_USE(vq);
620
621 BUG_ON(data == NULL);
622 BUG_ON(ctx && vq->indirect);
623
624 if (unlikely(vq->broken)) {
625 END_USE(vq);
626 return -EIO;
627 }
628
629 LAST_ADD_TIME_UPDATE(vq);
630
631 BUG_ON(total_sg == 0);
632
633 head = vq->free_head;
634
635 if (virtqueue_use_indirect(vq, total_sg))
636 desc = alloc_indirect_split(vq, total_sg, gfp);
637 else {
638 desc = NULL;
639 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect);
640 }
641
642 if (desc) {
643 /* Use a single buffer which doesn't continue */
644 indirect = true;
645 /* Set up rest to use this indirect table. */
646 i = 0;
647 descs_used = 1;
648 extra = (struct vring_desc_extra *)&desc[total_sg];
649 } else {
650 indirect = false;
651 desc = vq->split.vring.desc;
652 extra = vq->split.desc_extra;
653 i = head;
654 descs_used = total_sg;
655 }
656
657 if (unlikely(vq->vq.num_free < descs_used)) {
658 pr_debug("Can't add buf len %i - avail = %i\n",
659 descs_used, vq->vq.num_free);
660 /* FIXME: for historical reasons, we force a notify here if
661 * there are outgoing parts to the buffer. Presumably the
662 * host should service the ring ASAP. */
663 if (out_sgs)
664 vq->notify(&vq->vq);
665 if (indirect)
666 kfree(desc);
667 END_USE(vq);
668 return -ENOSPC;
669 }
670
671 for (n = 0; n < out_sgs; n++) {
672 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
673 dma_addr_t addr;
674 u32 len;
675 u16 flags = 0;
676
677 if (++sg_count != total_sg)
678 flags |= VRING_DESC_F_NEXT;
679
680 if (vring_map_one_sg(vq, sg, DMA_TO_DEVICE, &addr, &len,
681 premapped, attr))
682 goto unmap_release;
683
684 /* Note that we trust indirect descriptor
685 * table since it use stream DMA mapping.
686 */
687 i = virtqueue_add_desc_split(vq, desc, extra, i, addr,
688 len, flags, premapped);
689 }
690 }
691 for (; n < (out_sgs + in_sgs); n++) {
692 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
693 dma_addr_t addr;
694 u32 len;
695 u16 flags = VRING_DESC_F_WRITE;
696
697 if (++sg_count != total_sg)
698 flags |= VRING_DESC_F_NEXT;
699
700 if (vring_map_one_sg(vq, sg, DMA_FROM_DEVICE, &addr, &len,
701 premapped, attr))
702 goto unmap_release;
703
704 /* Note that we trust indirect descriptor
705 * table since it use stream DMA mapping.
706 */
707 i = virtqueue_add_desc_split(vq, desc, extra, i, addr,
708 len, flags, premapped);
709 total_in_len += len;
710 }
711 }
712
713 if (indirect) {
714 /* Now that the indirect table is filled in, map it. */
715 dma_addr_t addr = vring_map_single(
716 vq, desc, total_sg * sizeof(struct vring_desc),
717 DMA_TO_DEVICE);
718 if (vring_mapping_error(vq, addr))
719 goto unmap_release;
720
721 virtqueue_add_desc_split(vq, vq->split.vring.desc,
722 vq->split.desc_extra,
723 head, addr,
724 total_sg * sizeof(struct vring_desc),
725 VRING_DESC_F_INDIRECT, false);
726 }
727
728 /* We're using some buffers from the free list. */
729 vq->vq.num_free -= descs_used;
730
731 /* Update free pointer */
732 if (virtqueue_is_in_order(vq)) {
733 vq->free_head += descs_used;
734 if (vq->free_head >= vq->split.vring.num)
735 vq->free_head -= vq->split.vring.num;
736 vq->split.desc_state[head].total_in_len = total_in_len;
737 } else if (indirect)
738 vq->free_head = vq->split.desc_extra[head].next;
739 else
740 vq->free_head = i;
741
742 /* Store token and indirect buffer state. */
743 vq->split.desc_state[head].data = data;
744 if (indirect)
745 vq->split.desc_state[head].indir_desc = desc;
746 else
747 vq->split.desc_state[head].indir_desc = ctx;
748
749 /* Put entry in available array (but don't update avail->idx until they
750 * do sync). */
751 avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
752 vq->split.vring.avail->ring[avail] = cpu_to_virtio16(vq->vq.vdev, head);
753
754 /* Descriptors and available array need to be set before we expose the
755 * new available array entries. */
756 virtio_wmb(vq->weak_barriers);
757 vq->split.avail_idx_shadow++;
758 vq->split.vring.avail->idx = cpu_to_virtio16(vq->vq.vdev,
759 vq->split.avail_idx_shadow);
760 vq->num_added++;
761
762 pr_debug("Added buffer head %i to %p\n", head, vq);
763 END_USE(vq);
764
765 /* This is very unlikely, but theoretically possible. Kick
766 * just in case. */
767 if (unlikely(vq->num_added == (1 << 16) - 1))
768 virtqueue_kick(&vq->vq);
769
770 return 0;
771
772unmap_release:
773 err_idx = i;
774
775 if (indirect)
776 i = 0;
777 else
778 i = head;
779
780 for (n = 0; n < total_sg; n++) {
781 if (i == err_idx)
782 break;
783
784 i = vring_unmap_one_split(vq, &extra[i]);
785 }
786
787 if (indirect)
788 kfree(desc);
789
790 END_USE(vq);
791 return -ENOMEM;
792}
793
794static bool virtqueue_kick_prepare_split(struct vring_virtqueue *vq)
795{
796 u16 new, old;
797 bool needs_kick;
798
799 START_USE(vq);
800 /* We need to expose available array entries before checking avail
801 * event. */
802 virtio_mb(vq->weak_barriers);
803
804 old = vq->split.avail_idx_shadow - vq->num_added;
805 new = vq->split.avail_idx_shadow;
806 vq->num_added = 0;
807
808 LAST_ADD_TIME_CHECK(vq);
809 LAST_ADD_TIME_INVALID(vq);
810
811 if (vq->event) {
812 needs_kick = vring_need_event(virtio16_to_cpu(vq->vq.vdev,
813 vring_avail_event(&vq->split.vring)),
814 new, old);
815 } else {
816 needs_kick = !(vq->split.vring.used->flags &
817 cpu_to_virtio16(vq->vq.vdev,
818 VRING_USED_F_NO_NOTIFY));
819 }
820 END_USE(vq);
821 return needs_kick;
822}
823
824static void detach_indirect_split(struct vring_virtqueue *vq,
825 unsigned int head)
826{
827 struct vring_desc_extra *extra = vq->split.desc_extra;
828 struct vring_desc *indir_desc = vq->split.desc_state[head].indir_desc;
829 unsigned int j;
830 u32 len, num;
831
832 /* Free the indirect table, if any, now that it's unmapped. */
833 if (!indir_desc)
834 return;
835 len = vq->split.desc_extra[head].len;
836
837 BUG_ON(!(vq->split.desc_extra[head].flags &
838 VRING_DESC_F_INDIRECT));
839 BUG_ON(len == 0 || len % sizeof(struct vring_desc));
840
841 num = len / sizeof(struct vring_desc);
842
843 extra = (struct vring_desc_extra *)&indir_desc[num];
844
845 if (vq->use_map_api) {
846 for (j = 0; j < num; j++)
847 vring_unmap_one_split(vq, &extra[j]);
848 }
849
850 kfree(indir_desc);
851 vq->split.desc_state[head].indir_desc = NULL;
852}
853
854static unsigned detach_buf_split_in_order(struct vring_virtqueue *vq,
855 unsigned int head,
856 void **ctx)
857{
858 struct vring_desc_extra *extra;
859 unsigned int i;
860 __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
861
862 /* Clear data ptr. */
863 vq->split.desc_state[head].data = NULL;
864
865 extra = vq->split.desc_extra;
866
867 /* Put back on free list: unmap first-level descriptors and find end */
868 i = head;
869
870 while (vq->split.vring.desc[i].flags & nextflag) {
871 i = vring_unmap_one_split(vq, &extra[i]);
872 vq->vq.num_free++;
873 }
874
875 vring_unmap_one_split(vq, &extra[i]);
876
877 /* Plus final descriptor */
878 vq->vq.num_free++;
879
880 if (vq->indirect)
881 detach_indirect_split(vq, head);
882 else if (ctx)
883 *ctx = vq->split.desc_state[head].indir_desc;
884
885 return i;
886}
887
888static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
889 void **ctx)
890{
891 unsigned int i = detach_buf_split_in_order(vq, head, ctx);
892
893 vq->split.desc_extra[i].next = vq->free_head;
894 vq->free_head = head;
895}
896
897static bool virtqueue_poll_split(const struct vring_virtqueue *vq,
898 unsigned int last_used_idx)
899{
900 return (u16)last_used_idx != virtio16_to_cpu(vq->vq.vdev,
901 vq->split.vring.used->idx);
902}
903
904static bool more_used_split(const struct vring_virtqueue *vq)
905{
906 return virtqueue_poll_split(vq, vq->last_used_idx);
907}
908
909static bool more_used_split_in_order(const struct vring_virtqueue *vq)
910{
911 if (vq->batch_last.id != UINT_MAX)
912 return true;
913
914 return virtqueue_poll_split(vq, vq->last_used_idx);
915}
916
917static void *virtqueue_get_buf_ctx_split(struct vring_virtqueue *vq,
918 unsigned int *len,
919 void **ctx)
920{
921 void *ret;
922 unsigned int i;
923 u16 last_used;
924
925 START_USE(vq);
926
927 if (unlikely(vq->broken)) {
928 END_USE(vq);
929 return NULL;
930 }
931
932 if (!more_used_split(vq)) {
933 pr_debug("No more buffers in queue\n");
934 END_USE(vq);
935 return NULL;
936 }
937
938 /* Only get used array entries after they have been exposed by host. */
939 virtio_rmb(vq->weak_barriers);
940
941 last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
942 i = virtio32_to_cpu(vq->vq.vdev,
943 vq->split.vring.used->ring[last_used].id);
944 *len = virtio32_to_cpu(vq->vq.vdev,
945 vq->split.vring.used->ring[last_used].len);
946
947 if (unlikely(i >= vq->split.vring.num)) {
948 BAD_RING(vq, "id %u out of range\n", i);
949 return NULL;
950 }
951 if (unlikely(!vq->split.desc_state[i].data)) {
952 BAD_RING(vq, "id %u is not a head!\n", i);
953 return NULL;
954 }
955
956 /* detach_buf_split clears data, so grab it now. */
957 ret = vq->split.desc_state[i].data;
958 detach_buf_split(vq, i, ctx);
959 vq->last_used_idx++;
960 /* If we expect an interrupt for the next entry, tell host
961 * by writing event index and flush out the write before
962 * the read in the next get_buf call. */
963 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
964 virtio_store_mb(vq->weak_barriers,
965 &vring_used_event(&vq->split.vring),
966 cpu_to_virtio16(vq->vq.vdev, vq->last_used_idx));
967
968 LAST_ADD_TIME_INVALID(vq);
969
970 END_USE(vq);
971 return ret;
972}
973
974static void *virtqueue_get_buf_ctx_split_in_order(struct vring_virtqueue *vq,
975 unsigned int *len,
976 void **ctx)
977{
978 void *ret;
979 unsigned int num = vq->split.vring.num;
980 unsigned int num_free = vq->vq.num_free;
981 u16 last_used, last_used_idx;
982
983 START_USE(vq);
984
985 if (unlikely(vq->broken)) {
986 END_USE(vq);
987 return NULL;
988 }
989
990 last_used = vq->last_used & (num - 1);
991 last_used_idx = vq->last_used_idx & (num - 1);
992
993 if (vq->batch_last.id == UINT_MAX) {
994 if (!more_used_split_in_order(vq)) {
995 pr_debug("No more buffers in queue\n");
996 END_USE(vq);
997 return NULL;
998 }
999
1000 /*
1001 * Only get used array entries after they have been
1002 * exposed by host.
1003 */
1004 virtio_rmb(vq->weak_barriers);
1005
1006 vq->batch_last.id = virtio32_to_cpu(vq->vq.vdev,
1007 vq->split.vring.used->ring[last_used_idx].id);
1008 vq->batch_last.len = virtio32_to_cpu(vq->vq.vdev,
1009 vq->split.vring.used->ring[last_used_idx].len);
1010 }
1011
1012 if (vq->batch_last.id == last_used) {
1013 vq->batch_last.id = UINT_MAX;
1014 *len = vq->batch_last.len;
1015 } else {
1016 *len = vq->split.desc_state[last_used].total_in_len;
1017 }
1018
1019 if (unlikely(!vq->split.desc_state[last_used].data)) {
1020 BAD_RING(vq, "id %u is not a head!\n", last_used);
1021 return NULL;
1022 }
1023
1024 /* detach_buf_split clears data, so grab it now. */
1025 ret = vq->split.desc_state[last_used].data;
1026 detach_buf_split_in_order(vq, last_used, ctx);
1027
1028 vq->last_used_idx++;
1029 vq->last_used += (vq->vq.num_free - num_free);
1030 /* If we expect an interrupt for the next entry, tell host
1031 * by writing event index and flush out the write before
1032 * the read in the next get_buf call. */
1033 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
1034 virtio_store_mb(vq->weak_barriers,
1035 &vring_used_event(&vq->split.vring),
1036 cpu_to_virtio16(vq->vq.vdev, vq->last_used_idx));
1037
1038 LAST_ADD_TIME_INVALID(vq);
1039
1040 END_USE(vq);
1041 return ret;
1042}
1043
1044static void virtqueue_disable_cb_split(struct vring_virtqueue *vq)
1045{
1046 if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
1047 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
1048
1049 /*
1050 * If device triggered an event already it won't trigger one again:
1051 * no need to disable.
1052 */
1053 if (vq->event_triggered)
1054 return;
1055
1056 if (vq->event)
1057 /* TODO: this is a hack. Figure out a cleaner value to write. */
1058 vring_used_event(&vq->split.vring) = 0x0;
1059 else
1060 vq->split.vring.avail->flags =
1061 cpu_to_virtio16(vq->vq.vdev,
1062 vq->split.avail_flags_shadow);
1063 }
1064}
1065
1066static unsigned int virtqueue_enable_cb_prepare_split(struct vring_virtqueue *vq)
1067{
1068 u16 last_used_idx;
1069
1070 START_USE(vq);
1071
1072 /* We optimistically turn back on interrupts, then check if there was
1073 * more to do. */
1074 /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
1075 * either clear the flags bit or point the event index at the next
1076 * entry. Always do both to keep code simple. */
1077 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
1078 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
1079 if (!vq->event)
1080 vq->split.vring.avail->flags =
1081 cpu_to_virtio16(vq->vq.vdev,
1082 vq->split.avail_flags_shadow);
1083 }
1084 vring_used_event(&vq->split.vring) = cpu_to_virtio16(vq->vq.vdev,
1085 last_used_idx = vq->last_used_idx);
1086 END_USE(vq);
1087 return last_used_idx;
1088}
1089
1090static bool virtqueue_enable_cb_delayed_split(struct vring_virtqueue *vq)
1091{
1092 u16 bufs;
1093
1094 START_USE(vq);
1095
1096 /* We optimistically turn back on interrupts, then check if there was
1097 * more to do. */
1098 /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
1099 * either clear the flags bit or point the event index at the next
1100 * entry. Always update the event index to keep code simple. */
1101 if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
1102 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
1103 if (!vq->event)
1104 vq->split.vring.avail->flags =
1105 cpu_to_virtio16(vq->vq.vdev,
1106 vq->split.avail_flags_shadow);
1107 }
1108 /* TODO: tune this threshold */
1109 bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4;
1110
1111 virtio_store_mb(vq->weak_barriers,
1112 &vring_used_event(&vq->split.vring),
1113 cpu_to_virtio16(vq->vq.vdev, vq->last_used_idx + bufs));
1114
1115 if (unlikely((u16)(virtio16_to_cpu(vq->vq.vdev, vq->split.vring.used->idx)
1116 - vq->last_used_idx) > bufs)) {
1117 END_USE(vq);
1118 return false;
1119 }
1120
1121 END_USE(vq);
1122 return true;
1123}
1124
1125static void *virtqueue_detach_unused_buf_split(struct vring_virtqueue *vq)
1126{
1127 unsigned int i;
1128 void *buf;
1129
1130 START_USE(vq);
1131
1132 for (i = 0; i < vq->split.vring.num; i++) {
1133 if (!vq->split.desc_state[i].data)
1134 continue;
1135 /* detach_buf_split clears data, so grab it now. */
1136 buf = vq->split.desc_state[i].data;
1137 if (virtqueue_is_in_order(vq))
1138 detach_buf_split_in_order(vq, i, NULL);
1139 else
1140 detach_buf_split(vq, i, NULL);
1141 vq->split.avail_idx_shadow--;
1142 vq->split.vring.avail->idx = cpu_to_virtio16(vq->vq.vdev,
1143 vq->split.avail_idx_shadow);
1144 END_USE(vq);
1145 return buf;
1146 }
1147 /* That should have freed everything. */
1148 BUG_ON(vq->vq.num_free != vq->split.vring.num);
1149
1150 END_USE(vq);
1151 return NULL;
1152}
1153
1154static void virtqueue_vring_init_split(struct vring_virtqueue_split *vring_split,
1155 struct vring_virtqueue *vq)
1156{
1157 struct virtio_device *vdev;
1158
1159 vdev = vq->vq.vdev;
1160
1161 vring_split->avail_flags_shadow = 0;
1162 vring_split->avail_idx_shadow = 0;
1163
1164 /* No callback? Tell other side not to bother us. */
1165 if (!vq->vq.callback) {
1166 vring_split->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
1167 if (!vq->event)
1168 vring_split->vring.avail->flags = cpu_to_virtio16(vdev,
1169 vring_split->avail_flags_shadow);
1170 }
1171}
1172
1173static void virtqueue_reset_split(struct vring_virtqueue *vq)
1174{
1175 int num;
1176
1177 num = vq->split.vring.num;
1178
1179 vq->split.vring.avail->flags = 0;
1180 vq->split.vring.avail->idx = 0;
1181
1182 /* reset avail event */
1183 vq->split.vring.avail->ring[num] = 0;
1184
1185 vq->split.vring.used->flags = 0;
1186 vq->split.vring.used->idx = 0;
1187
1188 /* reset used event */
1189 *(__virtio16 *)&(vq->split.vring.used->ring[num]) = 0;
1190
1191 virtqueue_init(vq, num);
1192
1193 virtqueue_vring_init_split(&vq->split, vq);
1194}
1195
1196static void virtqueue_vring_attach_split(struct vring_virtqueue *vq,
1197 struct vring_virtqueue_split *vring_split)
1198{
1199 vq->split = *vring_split;
1200
1201 /* Put everything in free lists. */
1202 vq->free_head = 0;
1203 vq->batch_last.id = UINT_MAX;
1204}
1205
1206static int vring_alloc_state_extra_split(struct vring_virtqueue_split *vring_split)
1207{
1208 struct vring_desc_state_split *state;
1209 struct vring_desc_extra *extra;
1210 u32 num = vring_split->vring.num;
1211
1212 state = kmalloc_objs(struct vring_desc_state_split, num);
1213 if (!state)
1214 goto err_state;
1215
1216 extra = vring_alloc_desc_extra(num);
1217 if (!extra)
1218 goto err_extra;
1219
1220 memset(state, 0, num * sizeof(struct vring_desc_state_split));
1221
1222 vring_split->desc_state = state;
1223 vring_split->desc_extra = extra;
1224 return 0;
1225
1226err_extra:
1227 kfree(state);
1228err_state:
1229 return -ENOMEM;
1230}
1231
1232static void vring_free_split(struct vring_virtqueue_split *vring_split,
1233 struct virtio_device *vdev,
1234 union virtio_map map)
1235{
1236 vring_free_queue(vdev, vring_split->queue_size_in_bytes,
1237 vring_split->vring.desc,
1238 vring_split->queue_dma_addr,
1239 map);
1240
1241 kfree(vring_split->desc_state);
1242 kfree(vring_split->desc_extra);
1243}
1244
1245static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split,
1246 struct virtio_device *vdev,
1247 u32 num,
1248 unsigned int vring_align,
1249 bool may_reduce_num,
1250 union virtio_map map)
1251{
1252 void *queue = NULL;
1253 dma_addr_t dma_addr;
1254
1255 /* We assume num is a power of 2. */
1256 if (!is_power_of_2(num)) {
1257 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
1258 return -EINVAL;
1259 }
1260
1261 /* TODO: allocate each queue chunk individually */
1262 for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
1263 queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
1264 &dma_addr,
1265 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
1266 map);
1267 if (queue)
1268 break;
1269 if (!may_reduce_num)
1270 return -ENOMEM;
1271 }
1272
1273 if (!num)
1274 return -ENOMEM;
1275
1276 if (!queue) {
1277 /* Try to get a single page. You are my only hope! */
1278 queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
1279 &dma_addr, GFP_KERNEL | __GFP_ZERO,
1280 map);
1281 }
1282 if (!queue)
1283 return -ENOMEM;
1284
1285 vring_init(&vring_split->vring, num, queue, vring_align);
1286
1287 vring_split->queue_dma_addr = dma_addr;
1288 vring_split->queue_size_in_bytes = vring_size(num, vring_align);
1289
1290 vring_split->vring_align = vring_align;
1291 vring_split->may_reduce_num = may_reduce_num;
1292
1293 return 0;
1294}
1295
1296static const struct virtqueue_ops split_ops;
1297
1298static struct virtqueue *__vring_new_virtqueue_split(unsigned int index,
1299 struct vring_virtqueue_split *vring_split,
1300 struct virtio_device *vdev,
1301 bool weak_barriers,
1302 bool context,
1303 bool (*notify)(struct virtqueue *),
1304 void (*callback)(struct virtqueue *),
1305 const char *name,
1306 union virtio_map map)
1307{
1308 struct vring_virtqueue *vq;
1309 int err;
1310
1311 vq = kmalloc_obj(*vq);
1312 if (!vq)
1313 return NULL;
1314
1315 vq->vq.callback = callback;
1316 vq->vq.vdev = vdev;
1317 vq->vq.name = name;
1318 vq->vq.index = index;
1319 vq->vq.reset = false;
1320 vq->we_own_ring = false;
1321 vq->notify = notify;
1322 vq->weak_barriers = weak_barriers;
1323#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
1324 vq->broken = true;
1325#else
1326 vq->broken = false;
1327#endif
1328 vq->map = map;
1329 vq->use_map_api = vring_use_map_api(vdev);
1330
1331 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
1332 !context;
1333 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1334 vq->layout = virtio_has_feature(vdev, VIRTIO_F_IN_ORDER) ?
1335 VQ_LAYOUT_SPLIT_IN_ORDER : VQ_LAYOUT_SPLIT;
1336
1337 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
1338 vq->weak_barriers = false;
1339
1340 err = vring_alloc_state_extra_split(vring_split);
1341 if (err) {
1342 kfree(vq);
1343 return NULL;
1344 }
1345
1346 virtqueue_vring_init_split(vring_split, vq);
1347
1348 virtqueue_init(vq, vring_split->vring.num);
1349 virtqueue_vring_attach_split(vq, vring_split);
1350
1351 spin_lock(&vdev->vqs_list_lock);
1352 list_add_tail(&vq->vq.list, &vdev->vqs);
1353 spin_unlock(&vdev->vqs_list_lock);
1354 return &vq->vq;
1355}
1356
1357static struct virtqueue *vring_create_virtqueue_split(
1358 unsigned int index,
1359 unsigned int num,
1360 unsigned int vring_align,
1361 struct virtio_device *vdev,
1362 bool weak_barriers,
1363 bool may_reduce_num,
1364 bool context,
1365 bool (*notify)(struct virtqueue *),
1366 void (*callback)(struct virtqueue *),
1367 const char *name,
1368 union virtio_map map)
1369{
1370 struct vring_virtqueue_split vring_split = {};
1371 struct virtqueue *vq;
1372 int err;
1373
1374 err = vring_alloc_queue_split(&vring_split, vdev, num, vring_align,
1375 may_reduce_num, map);
1376 if (err)
1377 return NULL;
1378
1379 vq = __vring_new_virtqueue_split(index, &vring_split, vdev, weak_barriers,
1380 context, notify, callback, name, map);
1381 if (!vq) {
1382 vring_free_split(&vring_split, vdev, map);
1383 return NULL;
1384 }
1385
1386 to_vvq(vq)->we_own_ring = true;
1387
1388 return vq;
1389}
1390
1391static int virtqueue_resize_split(struct vring_virtqueue *vq, u32 num)
1392{
1393 struct vring_virtqueue_split vring_split = {};
1394 struct virtio_device *vdev = vq->vq.vdev;
1395 int err;
1396
1397 err = vring_alloc_queue_split(&vring_split, vdev, num,
1398 vq->split.vring_align,
1399 vq->split.may_reduce_num,
1400 vq->map);
1401 if (err)
1402 goto err;
1403
1404 err = vring_alloc_state_extra_split(&vring_split);
1405 if (err)
1406 goto err_state_extra;
1407
1408 vring_free(&vq->vq);
1409
1410 virtqueue_vring_init_split(&vring_split, vq);
1411
1412 virtqueue_init(vq, vring_split.vring.num);
1413 virtqueue_vring_attach_split(vq, &vring_split);
1414
1415 return 0;
1416
1417err_state_extra:
1418 vring_free_split(&vring_split, vdev, vq->map);
1419err:
1420 virtqueue_reset_split(vq);
1421 return -ENOMEM;
1422}
1423
1424
1425/*
1426 * Packed ring specific functions - *_packed().
1427 */
1428static bool packed_used_wrap_counter(u16 last_used_idx)
1429{
1430 return !!(last_used_idx & (1 << VRING_PACKED_EVENT_F_WRAP_CTR));
1431}
1432
1433static u16 packed_last_used(u16 last_used_idx)
1434{
1435 return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR));
1436}
1437
1438static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
1439 const struct vring_desc_extra *extra)
1440{
1441 u16 flags;
1442
1443 flags = extra->flags;
1444
1445 if (flags & VRING_DESC_F_INDIRECT) {
1446 if (!vq->use_map_api)
1447 return;
1448 } else if (!vring_need_unmap_buffer(vq, extra))
1449 return;
1450
1451 virtqueue_unmap_page_attrs(&vq->vq,
1452 extra->addr, extra->len,
1453 (flags & VRING_DESC_F_WRITE) ?
1454 DMA_FROM_DEVICE : DMA_TO_DEVICE,
1455 0);
1456}
1457
1458static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg,
1459 gfp_t gfp)
1460{
1461 struct vring_desc_extra *extra;
1462 struct vring_packed_desc *desc;
1463 int i, size;
1464
1465 /*
1466 * We require lowmem mappings for the descriptors because
1467 * otherwise virt_to_phys will give us bogus addresses in the
1468 * virtqueue.
1469 */
1470 gfp &= ~__GFP_HIGHMEM;
1471
1472 size = (sizeof(*desc) + sizeof(*extra)) * total_sg;
1473
1474 desc = kmalloc(size, gfp);
1475 if (!desc)
1476 return NULL;
1477
1478 extra = (struct vring_desc_extra *)&desc[total_sg];
1479
1480 for (i = 0; i < total_sg; i++)
1481 extra[i].next = i + 1;
1482
1483 return desc;
1484}
1485
1486static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
1487 struct scatterlist *sgs[],
1488 unsigned int total_sg,
1489 unsigned int out_sgs,
1490 unsigned int in_sgs,
1491 void *data,
1492 bool premapped,
1493 gfp_t gfp,
1494 u16 id,
1495 unsigned long attr)
1496{
1497 struct vring_desc_extra *extra;
1498 struct vring_packed_desc *desc;
1499 struct scatterlist *sg;
1500 unsigned int i, n, err_idx, len, total_in_len = 0;
1501 u16 head;
1502 dma_addr_t addr;
1503
1504 head = vq->packed.next_avail_idx;
1505 desc = alloc_indirect_packed(total_sg, gfp);
1506 if (!desc)
1507 return -ENOMEM;
1508
1509 extra = (struct vring_desc_extra *)&desc[total_sg];
1510
1511 if (unlikely(vq->vq.num_free < 1)) {
1512 pr_debug("Can't add buf len 1 - avail = 0\n");
1513 kfree(desc);
1514 END_USE(vq);
1515 return -ENOSPC;
1516 }
1517
1518 i = 0;
1519
1520 for (n = 0; n < out_sgs + in_sgs; n++) {
1521 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1522 if (vring_map_one_sg(vq, sg, n < out_sgs ?
1523 DMA_TO_DEVICE : DMA_FROM_DEVICE,
1524 &addr, &len, premapped, attr))
1525 goto unmap_release;
1526
1527 desc[i].flags = cpu_to_le16(n < out_sgs ?
1528 0 : VRING_DESC_F_WRITE);
1529 desc[i].addr = cpu_to_le64(addr);
1530 desc[i].len = cpu_to_le32(len);
1531
1532 if (unlikely(vq->use_map_api)) {
1533 extra[i].addr = premapped ? DMA_MAPPING_ERROR : addr;
1534 extra[i].len = len;
1535 extra[i].flags = n < out_sgs ? 0 : VRING_DESC_F_WRITE;
1536 }
1537
1538 if (n >= out_sgs)
1539 total_in_len += len;
1540 i++;
1541 }
1542 }
1543
1544 /* Now that the indirect table is filled in, map it. */
1545 addr = vring_map_single(vq, desc,
1546 total_sg * sizeof(struct vring_packed_desc),
1547 DMA_TO_DEVICE);
1548 if (vring_mapping_error(vq, addr))
1549 goto unmap_release;
1550
1551 vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
1552 vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
1553 sizeof(struct vring_packed_desc));
1554 vq->packed.vring.desc[head].id = cpu_to_le16(id);
1555
1556 if (vq->use_map_api) {
1557 vq->packed.desc_extra[id].addr = addr;
1558 vq->packed.desc_extra[id].len = total_sg *
1559 sizeof(struct vring_packed_desc);
1560 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT |
1561 vq->packed.avail_used_flags;
1562 }
1563
1564 /*
1565 * A driver MUST NOT make the first descriptor in the list
1566 * available before all subsequent descriptors comprising
1567 * the list are made available.
1568 */
1569 virtio_wmb(vq->weak_barriers);
1570 vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT |
1571 vq->packed.avail_used_flags);
1572
1573 /* We're using some buffers from the free list. */
1574 vq->vq.num_free -= 1;
1575
1576 /* Update free pointer */
1577 n = head + 1;
1578 if (n >= vq->packed.vring.num) {
1579 n = 0;
1580 vq->packed.avail_wrap_counter ^= 1;
1581 vq->packed.avail_used_flags ^=
1582 1 << VRING_PACKED_DESC_F_AVAIL |
1583 1 << VRING_PACKED_DESC_F_USED;
1584 }
1585 vq->packed.next_avail_idx = n;
1586 if (!virtqueue_is_in_order(vq))
1587 vq->free_head = vq->packed.desc_extra[id].next;
1588
1589 /* Store token and indirect buffer state. */
1590 vq->packed.desc_state[id].num = 1;
1591 vq->packed.desc_state[id].data = data;
1592 vq->packed.desc_state[id].indir_desc = desc;
1593 vq->packed.desc_state[id].last = id;
1594 vq->packed.desc_state[id].total_in_len = total_in_len;
1595
1596 vq->num_added += 1;
1597
1598 pr_debug("Added buffer head %i to %p\n", head, vq);
1599 END_USE(vq);
1600
1601 return 0;
1602
1603unmap_release:
1604 err_idx = i;
1605
1606 for (i = 0; i < err_idx; i++)
1607 vring_unmap_extra_packed(vq, &extra[i]);
1608
1609 kfree(desc);
1610
1611 END_USE(vq);
1612 return -ENOMEM;
1613}
1614
1615static inline int virtqueue_add_packed(struct vring_virtqueue *vq,
1616 struct scatterlist *sgs[],
1617 unsigned int total_sg,
1618 unsigned int out_sgs,
1619 unsigned int in_sgs,
1620 void *data,
1621 void *ctx,
1622 bool premapped,
1623 gfp_t gfp,
1624 unsigned long attr)
1625{
1626 struct vring_packed_desc *desc;
1627 struct scatterlist *sg;
1628 unsigned int i, n, c, descs_used, err_idx, len;
1629 __le16 head_flags, flags;
1630 u16 head, id, prev, curr, avail_used_flags;
1631 int err;
1632
1633 START_USE(vq);
1634
1635 BUG_ON(data == NULL);
1636 BUG_ON(ctx && vq->indirect);
1637
1638 if (unlikely(vq->broken)) {
1639 END_USE(vq);
1640 return -EIO;
1641 }
1642
1643 LAST_ADD_TIME_UPDATE(vq);
1644
1645 BUG_ON(total_sg == 0);
1646
1647 if (virtqueue_use_indirect(vq, total_sg)) {
1648 id = vq->free_head;
1649 BUG_ON(id == vq->packed.vring.num);
1650 err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
1651 in_sgs, data, premapped, gfp,
1652 id, attr);
1653 if (err != -ENOMEM) {
1654 END_USE(vq);
1655 return err;
1656 }
1657
1658 /* fall back on direct */
1659 }
1660
1661 head = vq->packed.next_avail_idx;
1662 avail_used_flags = vq->packed.avail_used_flags;
1663
1664 WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect);
1665
1666 desc = vq->packed.vring.desc;
1667 i = head;
1668 descs_used = total_sg;
1669
1670 if (unlikely(vq->vq.num_free < descs_used)) {
1671 pr_debug("Can't add buf len %i - avail = %i\n",
1672 descs_used, vq->vq.num_free);
1673 END_USE(vq);
1674 return -ENOSPC;
1675 }
1676
1677 id = vq->free_head;
1678 BUG_ON(id == vq->packed.vring.num);
1679
1680 curr = id;
1681 c = 0;
1682 for (n = 0; n < out_sgs + in_sgs; n++) {
1683 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1684 dma_addr_t addr;
1685
1686 if (vring_map_one_sg(vq, sg, n < out_sgs ?
1687 DMA_TO_DEVICE : DMA_FROM_DEVICE,
1688 &addr, &len, premapped, attr))
1689 goto unmap_release;
1690
1691 flags = cpu_to_le16(vq->packed.avail_used_flags |
1692 (++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
1693 (n < out_sgs ? 0 : VRING_DESC_F_WRITE));
1694 if (i == head)
1695 head_flags = flags;
1696 else
1697 desc[i].flags = flags;
1698
1699 desc[i].addr = cpu_to_le64(addr);
1700 desc[i].len = cpu_to_le32(len);
1701 desc[i].id = cpu_to_le16(id);
1702
1703 if (unlikely(vq->use_map_api)) {
1704 vq->packed.desc_extra[curr].addr = premapped ?
1705 DMA_MAPPING_ERROR : addr;
1706 vq->packed.desc_extra[curr].len = len;
1707 vq->packed.desc_extra[curr].flags =
1708 le16_to_cpu(flags);
1709 }
1710 prev = curr;
1711 curr = vq->packed.desc_extra[curr].next;
1712
1713 if ((unlikely(++i >= vq->packed.vring.num))) {
1714 i = 0;
1715 vq->packed.avail_used_flags ^=
1716 1 << VRING_PACKED_DESC_F_AVAIL |
1717 1 << VRING_PACKED_DESC_F_USED;
1718 }
1719 }
1720 }
1721
1722 if (i <= head)
1723 vq->packed.avail_wrap_counter ^= 1;
1724
1725 /* We're using some buffers from the free list. */
1726 vq->vq.num_free -= descs_used;
1727
1728 /* Update free pointer */
1729 vq->packed.next_avail_idx = i;
1730 vq->free_head = curr;
1731
1732 /* Store token. */
1733 vq->packed.desc_state[id].num = descs_used;
1734 vq->packed.desc_state[id].data = data;
1735 vq->packed.desc_state[id].indir_desc = ctx;
1736 vq->packed.desc_state[id].last = prev;
1737
1738 /*
1739 * A driver MUST NOT make the first descriptor in the list
1740 * available before all subsequent descriptors comprising
1741 * the list are made available.
1742 */
1743 virtio_wmb(vq->weak_barriers);
1744 vq->packed.vring.desc[head].flags = head_flags;
1745 vq->num_added += descs_used;
1746
1747 pr_debug("Added buffer head %i to %p\n", head, vq);
1748 END_USE(vq);
1749
1750 return 0;
1751
1752unmap_release:
1753 err_idx = i;
1754 i = head;
1755 curr = vq->free_head;
1756
1757 vq->packed.avail_used_flags = avail_used_flags;
1758
1759 for (n = 0; n < total_sg; n++) {
1760 if (i == err_idx)
1761 break;
1762 vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]);
1763 curr = vq->packed.desc_extra[curr].next;
1764 i++;
1765 if (i >= vq->packed.vring.num)
1766 i = 0;
1767 }
1768
1769 END_USE(vq);
1770 return -EIO;
1771}
1772
1773static inline int virtqueue_add_packed_in_order(struct vring_virtqueue *vq,
1774 struct scatterlist *sgs[],
1775 unsigned int total_sg,
1776 unsigned int out_sgs,
1777 unsigned int in_sgs,
1778 void *data,
1779 void *ctx,
1780 bool premapped,
1781 gfp_t gfp,
1782 unsigned long attr)
1783{
1784 struct vring_packed_desc *desc;
1785 struct scatterlist *sg;
1786 unsigned int i, n, sg_count, err_idx, total_in_len = 0;
1787 __le16 head_flags, flags;
1788 u16 head, avail_used_flags;
1789 bool avail_wrap_counter;
1790 int err;
1791
1792 START_USE(vq);
1793
1794 BUG_ON(data == NULL);
1795 BUG_ON(ctx && vq->indirect);
1796
1797 if (unlikely(vq->broken)) {
1798 END_USE(vq);
1799 return -EIO;
1800 }
1801
1802 LAST_ADD_TIME_UPDATE(vq);
1803
1804 BUG_ON(total_sg == 0);
1805
1806 if (virtqueue_use_indirect(vq, total_sg)) {
1807 err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
1808 in_sgs, data, premapped, gfp,
1809 vq->packed.next_avail_idx,
1810 attr);
1811 if (err != -ENOMEM) {
1812 END_USE(vq);
1813 return err;
1814 }
1815
1816 /* fall back on direct */
1817 }
1818
1819 head = vq->packed.next_avail_idx;
1820 avail_used_flags = vq->packed.avail_used_flags;
1821 avail_wrap_counter = vq->packed.avail_wrap_counter;
1822
1823 WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect);
1824
1825 desc = vq->packed.vring.desc;
1826 i = head;
1827
1828 if (unlikely(vq->vq.num_free < total_sg)) {
1829 pr_debug("Can't add buf len %i - avail = %i\n",
1830 total_sg, vq->vq.num_free);
1831 END_USE(vq);
1832 return -ENOSPC;
1833 }
1834
1835 sg_count = 0;
1836 for (n = 0; n < out_sgs + in_sgs; n++) {
1837 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1838 dma_addr_t addr;
1839 u32 len;
1840
1841 flags = 0;
1842 if (++sg_count != total_sg)
1843 flags |= cpu_to_le16(VRING_DESC_F_NEXT);
1844 if (n >= out_sgs)
1845 flags |= cpu_to_le16(VRING_DESC_F_WRITE);
1846
1847 if (vring_map_one_sg(vq, sg, n < out_sgs ?
1848 DMA_TO_DEVICE : DMA_FROM_DEVICE,
1849 &addr, &len, premapped, attr))
1850 goto unmap_release;
1851
1852 flags |= cpu_to_le16(vq->packed.avail_used_flags);
1853
1854 if (i == head)
1855 head_flags = flags;
1856 else
1857 desc[i].flags = flags;
1858
1859 desc[i].addr = cpu_to_le64(addr);
1860 desc[i].len = cpu_to_le32(len);
1861 desc[i].id = cpu_to_le16(head);
1862
1863 if (unlikely(vq->use_map_api)) {
1864 vq->packed.desc_extra[i].addr = premapped ?
1865 DMA_MAPPING_ERROR : addr;
1866 vq->packed.desc_extra[i].len = len;
1867 vq->packed.desc_extra[i].flags =
1868 le16_to_cpu(flags);
1869 }
1870
1871 if ((unlikely(++i >= vq->packed.vring.num))) {
1872 i = 0;
1873 vq->packed.avail_used_flags ^=
1874 1 << VRING_PACKED_DESC_F_AVAIL |
1875 1 << VRING_PACKED_DESC_F_USED;
1876 vq->packed.avail_wrap_counter ^= 1;
1877 }
1878
1879 if (n >= out_sgs)
1880 total_in_len += len;
1881 }
1882 }
1883
1884 /* We're using some buffers from the free list. */
1885 vq->vq.num_free -= total_sg;
1886
1887 /* Update free pointer */
1888 vq->packed.next_avail_idx = i;
1889
1890 /* Store token. */
1891 vq->packed.desc_state[head].num = total_sg;
1892 vq->packed.desc_state[head].data = data;
1893 vq->packed.desc_state[head].indir_desc = ctx;
1894 vq->packed.desc_state[head].total_in_len = total_in_len;
1895
1896 /*
1897 * A driver MUST NOT make the first descriptor in the list
1898 * available before all subsequent descriptors comprising
1899 * the list are made available.
1900 */
1901 virtio_wmb(vq->weak_barriers);
1902 vq->packed.vring.desc[head].flags = head_flags;
1903 vq->num_added += total_sg;
1904
1905 pr_debug("Added buffer head %i to %p\n", head, vq);
1906 END_USE(vq);
1907
1908 return 0;
1909
1910unmap_release:
1911 err_idx = i;
1912 i = head;
1913 vq->packed.avail_used_flags = avail_used_flags;
1914 vq->packed.avail_wrap_counter = avail_wrap_counter;
1915
1916 for (n = 0; n < total_sg; n++) {
1917 if (i == err_idx)
1918 break;
1919 vring_unmap_extra_packed(vq, &vq->packed.desc_extra[i]);
1920 i++;
1921 if (i >= vq->packed.vring.num)
1922 i = 0;
1923 }
1924
1925 END_USE(vq);
1926 return -EIO;
1927}
1928
1929static bool virtqueue_kick_prepare_packed(struct vring_virtqueue *vq)
1930{
1931 u16 new, old, off_wrap, flags, wrap_counter, event_idx;
1932 bool needs_kick;
1933 union {
1934 struct {
1935 __le16 off_wrap;
1936 __le16 flags;
1937 };
1938 u32 u32;
1939 } snapshot;
1940
1941 START_USE(vq);
1942
1943 /*
1944 * We need to expose the new flags value before checking notification
1945 * suppressions.
1946 */
1947 virtio_mb(vq->weak_barriers);
1948
1949 old = vq->packed.next_avail_idx - vq->num_added;
1950 new = vq->packed.next_avail_idx;
1951 vq->num_added = 0;
1952
1953 snapshot.u32 = *(u32 *)vq->packed.vring.device;
1954 flags = le16_to_cpu(snapshot.flags);
1955
1956 LAST_ADD_TIME_CHECK(vq);
1957 LAST_ADD_TIME_INVALID(vq);
1958
1959 if (flags != VRING_PACKED_EVENT_FLAG_DESC) {
1960 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE);
1961 goto out;
1962 }
1963
1964 off_wrap = le16_to_cpu(snapshot.off_wrap);
1965
1966 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1967 event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1968 if (wrap_counter != vq->packed.avail_wrap_counter)
1969 event_idx -= vq->packed.vring.num;
1970
1971 needs_kick = vring_need_event(event_idx, new, old);
1972out:
1973 END_USE(vq);
1974 return needs_kick;
1975}
1976
1977static void detach_buf_packed_in_order(struct vring_virtqueue *vq,
1978 unsigned int id, void **ctx)
1979{
1980 struct vring_desc_state_packed *state = NULL;
1981 struct vring_packed_desc *desc;
1982 unsigned int i, curr;
1983
1984 state = &vq->packed.desc_state[id];
1985
1986 /* Clear data ptr. */
1987 state->data = NULL;
1988
1989 vq->vq.num_free += state->num;
1990
1991 if (unlikely(vq->use_map_api)) {
1992 curr = id;
1993 for (i = 0; i < state->num; i++) {
1994 vring_unmap_extra_packed(vq,
1995 &vq->packed.desc_extra[curr]);
1996 curr = vq->packed.desc_extra[curr].next;
1997 }
1998 }
1999
2000 if (vq->indirect) {
2001 struct vring_desc_extra *extra;
2002 u32 len, num;
2003
2004 /* Free the indirect table, if any, now that it's unmapped. */
2005 desc = state->indir_desc;
2006 if (!desc)
2007 return;
2008
2009 if (vq->use_map_api) {
2010 len = vq->packed.desc_extra[id].len;
2011 num = len / sizeof(struct vring_packed_desc);
2012
2013 extra = (struct vring_desc_extra *)&desc[num];
2014
2015 for (i = 0; i < num; i++)
2016 vring_unmap_extra_packed(vq, &extra[i]);
2017 }
2018 kfree(desc);
2019 state->indir_desc = NULL;
2020 } else if (ctx) {
2021 *ctx = state->indir_desc;
2022 }
2023}
2024
2025static void detach_buf_packed(struct vring_virtqueue *vq,
2026 unsigned int id, void **ctx)
2027{
2028 struct vring_desc_state_packed *state = &vq->packed.desc_state[id];
2029
2030 vq->packed.desc_extra[state->last].next = vq->free_head;
2031 vq->free_head = id;
2032
2033 detach_buf_packed_in_order(vq, id, ctx);
2034}
2035
2036static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
2037 u16 idx, bool used_wrap_counter)
2038{
2039 bool avail, used;
2040 u16 flags;
2041
2042 flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
2043 avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
2044 used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
2045
2046 return avail == used && used == used_wrap_counter;
2047}
2048
2049static bool virtqueue_poll_packed(const struct vring_virtqueue *vq,
2050 unsigned int off_wrap)
2051{
2052 bool wrap_counter;
2053 u16 used_idx;
2054
2055 wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
2056 used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
2057
2058 return is_used_desc_packed(vq, used_idx, wrap_counter);
2059}
2060
2061static bool more_used_packed(const struct vring_virtqueue *vq)
2062{
2063 return virtqueue_poll_packed(vq, READ_ONCE(vq->last_used_idx));
2064}
2065
2066static void update_last_used_idx_packed(struct vring_virtqueue *vq,
2067 u16 id, u16 last_used,
2068 u16 used_wrap_counter)
2069{
2070 last_used += vq->packed.desc_state[id].num;
2071 if (unlikely(last_used >= vq->packed.vring.num)) {
2072 last_used -= vq->packed.vring.num;
2073 used_wrap_counter ^= 1;
2074 }
2075
2076 last_used = (last_used | (used_wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
2077 WRITE_ONCE(vq->last_used_idx, last_used);
2078
2079 /*
2080 * If we expect an interrupt for the next entry, tell host
2081 * by writing event index and flush out the write before
2082 * the read in the next get_buf call.
2083 */
2084 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC)
2085 virtio_store_mb(vq->weak_barriers,
2086 &vq->packed.vring.driver->off_wrap,
2087 cpu_to_le16(vq->last_used_idx));
2088}
2089
2090static bool more_used_packed_in_order(const struct vring_virtqueue *vq)
2091{
2092 if (vq->batch_last.id != UINT_MAX)
2093 return true;
2094
2095 return virtqueue_poll_packed(vq, READ_ONCE(vq->last_used_idx));
2096}
2097
2098static void *virtqueue_get_buf_ctx_packed_in_order(struct vring_virtqueue *vq,
2099 unsigned int *len,
2100 void **ctx)
2101{
2102 unsigned int num = vq->packed.vring.num;
2103 u16 last_used, last_used_idx;
2104 bool used_wrap_counter;
2105 void *ret;
2106
2107 START_USE(vq);
2108
2109 if (unlikely(vq->broken)) {
2110 END_USE(vq);
2111 return NULL;
2112 }
2113
2114 last_used_idx = vq->last_used_idx;
2115 used_wrap_counter = packed_used_wrap_counter(last_used_idx);
2116 last_used = packed_last_used(last_used_idx);
2117
2118 if (vq->batch_last.id == UINT_MAX) {
2119 if (!more_used_packed_in_order(vq)) {
2120 pr_debug("No more buffers in queue\n");
2121 END_USE(vq);
2122 return NULL;
2123 }
2124 /* Only get used elements after they have been exposed by host. */
2125 virtio_rmb(vq->weak_barriers);
2126 vq->batch_last.id =
2127 le16_to_cpu(vq->packed.vring.desc[last_used].id);
2128 vq->batch_last.len =
2129 le32_to_cpu(vq->packed.vring.desc[last_used].len);
2130 }
2131
2132 if (vq->batch_last.id == last_used) {
2133 vq->batch_last.id = UINT_MAX;
2134 *len = vq->batch_last.len;
2135 } else {
2136 *len = vq->packed.desc_state[last_used].total_in_len;
2137 }
2138
2139 if (unlikely(last_used >= num)) {
2140 BAD_RING(vq, "id %u out of range\n", last_used);
2141 return NULL;
2142 }
2143 if (unlikely(!vq->packed.desc_state[last_used].data)) {
2144 BAD_RING(vq, "id %u is not a head!\n", last_used);
2145 return NULL;
2146 }
2147
2148 /* detach_buf_packed clears data, so grab it now. */
2149 ret = vq->packed.desc_state[last_used].data;
2150 detach_buf_packed_in_order(vq, last_used, ctx);
2151
2152 update_last_used_idx_packed(vq, last_used, last_used,
2153 used_wrap_counter);
2154
2155 LAST_ADD_TIME_INVALID(vq);
2156
2157 END_USE(vq);
2158 return ret;
2159}
2160
2161static void *virtqueue_get_buf_ctx_packed(struct vring_virtqueue *vq,
2162 unsigned int *len,
2163 void **ctx)
2164{
2165 unsigned int num = vq->packed.vring.num;
2166 u16 last_used, id, last_used_idx;
2167 bool used_wrap_counter;
2168 void *ret;
2169
2170 START_USE(vq);
2171
2172 if (unlikely(vq->broken)) {
2173 END_USE(vq);
2174 return NULL;
2175 }
2176
2177 if (!more_used_packed(vq)) {
2178 pr_debug("No more buffers in queue\n");
2179 END_USE(vq);
2180 return NULL;
2181 }
2182
2183 /* Only get used elements after they have been exposed by host. */
2184 virtio_rmb(vq->weak_barriers);
2185
2186 last_used_idx = READ_ONCE(vq->last_used_idx);
2187 used_wrap_counter = packed_used_wrap_counter(last_used_idx);
2188 last_used = packed_last_used(last_used_idx);
2189 id = le16_to_cpu(vq->packed.vring.desc[last_used].id);
2190 *len = le32_to_cpu(vq->packed.vring.desc[last_used].len);
2191
2192 if (unlikely(id >= num)) {
2193 BAD_RING(vq, "id %u out of range\n", id);
2194 return NULL;
2195 }
2196 if (unlikely(!vq->packed.desc_state[id].data)) {
2197 BAD_RING(vq, "id %u is not a head!\n", id);
2198 return NULL;
2199 }
2200
2201 /* detach_buf_packed clears data, so grab it now. */
2202 ret = vq->packed.desc_state[id].data;
2203 detach_buf_packed(vq, id, ctx);
2204
2205 update_last_used_idx_packed(vq, id, last_used, used_wrap_counter);
2206
2207 LAST_ADD_TIME_INVALID(vq);
2208
2209 END_USE(vq);
2210 return ret;
2211}
2212
2213static void virtqueue_disable_cb_packed(struct vring_virtqueue *vq)
2214{
2215 if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
2216 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
2217
2218 /*
2219 * If device triggered an event already it won't trigger one again:
2220 * no need to disable.
2221 */
2222 if (vq->event_triggered)
2223 return;
2224
2225 vq->packed.vring.driver->flags =
2226 cpu_to_le16(vq->packed.event_flags_shadow);
2227 }
2228}
2229
2230static unsigned int virtqueue_enable_cb_prepare_packed(struct vring_virtqueue *vq)
2231{
2232 START_USE(vq);
2233
2234 /*
2235 * We optimistically turn back on interrupts, then check if there was
2236 * more to do.
2237 */
2238
2239 if (vq->event) {
2240 vq->packed.vring.driver->off_wrap =
2241 cpu_to_le16(vq->last_used_idx);
2242 /*
2243 * We need to update event offset and event wrap
2244 * counter first before updating event flags.
2245 */
2246 virtio_wmb(vq->weak_barriers);
2247 }
2248
2249 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
2250 vq->packed.event_flags_shadow = vq->event ?
2251 VRING_PACKED_EVENT_FLAG_DESC :
2252 VRING_PACKED_EVENT_FLAG_ENABLE;
2253 vq->packed.vring.driver->flags =
2254 cpu_to_le16(vq->packed.event_flags_shadow);
2255 }
2256
2257 END_USE(vq);
2258 return vq->last_used_idx;
2259}
2260
2261static bool virtqueue_enable_cb_delayed_packed(struct vring_virtqueue *vq)
2262{
2263 u16 used_idx, wrap_counter, last_used_idx;
2264 u16 bufs;
2265
2266 START_USE(vq);
2267
2268 /*
2269 * We optimistically turn back on interrupts, then check if there was
2270 * more to do.
2271 */
2272
2273 if (vq->event) {
2274 /* TODO: tune this threshold */
2275 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4;
2276 last_used_idx = READ_ONCE(vq->last_used_idx);
2277 wrap_counter = packed_used_wrap_counter(last_used_idx);
2278
2279 used_idx = packed_last_used(last_used_idx) + bufs;
2280 if (used_idx >= vq->packed.vring.num) {
2281 used_idx -= vq->packed.vring.num;
2282 wrap_counter ^= 1;
2283 }
2284
2285 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx |
2286 (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
2287
2288 /*
2289 * We need to update event offset and event wrap
2290 * counter first before updating event flags.
2291 */
2292 virtio_wmb(vq->weak_barriers);
2293 }
2294
2295 if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
2296 vq->packed.event_flags_shadow = vq->event ?
2297 VRING_PACKED_EVENT_FLAG_DESC :
2298 VRING_PACKED_EVENT_FLAG_ENABLE;
2299 vq->packed.vring.driver->flags =
2300 cpu_to_le16(vq->packed.event_flags_shadow);
2301 }
2302
2303 /*
2304 * We need to update event suppression structure first
2305 * before re-checking for more used buffers.
2306 */
2307 virtio_mb(vq->weak_barriers);
2308
2309 last_used_idx = READ_ONCE(vq->last_used_idx);
2310 wrap_counter = packed_used_wrap_counter(last_used_idx);
2311 used_idx = packed_last_used(last_used_idx);
2312 if (is_used_desc_packed(vq, used_idx, wrap_counter)) {
2313 END_USE(vq);
2314 return false;
2315 }
2316
2317 END_USE(vq);
2318 return true;
2319}
2320
2321static void *virtqueue_detach_unused_buf_packed(struct vring_virtqueue *vq)
2322{
2323 unsigned int i;
2324 void *buf;
2325
2326 START_USE(vq);
2327
2328 for (i = 0; i < vq->packed.vring.num; i++) {
2329 if (!vq->packed.desc_state[i].data)
2330 continue;
2331 /* detach_buf clears data, so grab it now. */
2332 buf = vq->packed.desc_state[i].data;
2333 if (virtqueue_is_in_order(vq))
2334 detach_buf_packed_in_order(vq, i, NULL);
2335 else
2336 detach_buf_packed(vq, i, NULL);
2337 END_USE(vq);
2338 return buf;
2339 }
2340 /* That should have freed everything. */
2341 BUG_ON(vq->vq.num_free != vq->packed.vring.num);
2342
2343 END_USE(vq);
2344 return NULL;
2345}
2346
2347static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num)
2348{
2349 struct vring_desc_extra *desc_extra;
2350 unsigned int i;
2351
2352 desc_extra = kmalloc_objs(struct vring_desc_extra, num);
2353 if (!desc_extra)
2354 return NULL;
2355
2356 memset(desc_extra, 0, num * sizeof(struct vring_desc_extra));
2357
2358 for (i = 0; i < num - 1; i++)
2359 desc_extra[i].next = i + 1;
2360
2361 desc_extra[num - 1].next = 0;
2362
2363 return desc_extra;
2364}
2365
2366static void vring_free_packed(struct vring_virtqueue_packed *vring_packed,
2367 struct virtio_device *vdev,
2368 union virtio_map map)
2369{
2370 if (vring_packed->vring.desc)
2371 vring_free_queue(vdev, vring_packed->ring_size_in_bytes,
2372 vring_packed->vring.desc,
2373 vring_packed->ring_dma_addr,
2374 map);
2375
2376 if (vring_packed->vring.driver)
2377 vring_free_queue(vdev, vring_packed->event_size_in_bytes,
2378 vring_packed->vring.driver,
2379 vring_packed->driver_event_dma_addr,
2380 map);
2381
2382 if (vring_packed->vring.device)
2383 vring_free_queue(vdev, vring_packed->event_size_in_bytes,
2384 vring_packed->vring.device,
2385 vring_packed->device_event_dma_addr,
2386 map);
2387
2388 kfree(vring_packed->desc_state);
2389 kfree(vring_packed->desc_extra);
2390}
2391
2392static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed,
2393 struct virtio_device *vdev,
2394 u32 num, union virtio_map map)
2395{
2396 struct vring_packed_desc *ring;
2397 struct vring_packed_desc_event *driver, *device;
2398 dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
2399 size_t ring_size_in_bytes, event_size_in_bytes;
2400
2401 ring_size_in_bytes = num * sizeof(struct vring_packed_desc);
2402
2403 ring = vring_alloc_queue(vdev, ring_size_in_bytes,
2404 &ring_dma_addr,
2405 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
2406 map);
2407 if (!ring)
2408 goto err;
2409
2410 vring_packed->vring.desc = ring;
2411 vring_packed->ring_dma_addr = ring_dma_addr;
2412 vring_packed->ring_size_in_bytes = ring_size_in_bytes;
2413
2414 event_size_in_bytes = sizeof(struct vring_packed_desc_event);
2415
2416 driver = vring_alloc_queue(vdev, event_size_in_bytes,
2417 &driver_event_dma_addr,
2418 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
2419 map);
2420 if (!driver)
2421 goto err;
2422
2423 vring_packed->vring.driver = driver;
2424 vring_packed->event_size_in_bytes = event_size_in_bytes;
2425 vring_packed->driver_event_dma_addr = driver_event_dma_addr;
2426
2427 device = vring_alloc_queue(vdev, event_size_in_bytes,
2428 &device_event_dma_addr,
2429 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
2430 map);
2431 if (!device)
2432 goto err;
2433
2434 vring_packed->vring.device = device;
2435 vring_packed->device_event_dma_addr = device_event_dma_addr;
2436
2437 vring_packed->vring.num = num;
2438
2439 return 0;
2440
2441err:
2442 vring_free_packed(vring_packed, vdev, map);
2443 return -ENOMEM;
2444}
2445
2446static int vring_alloc_state_extra_packed(struct vring_virtqueue_packed *vring_packed)
2447{
2448 struct vring_desc_state_packed *state;
2449 struct vring_desc_extra *extra;
2450 u32 num = vring_packed->vring.num;
2451
2452 state = kmalloc_objs(struct vring_desc_state_packed, num);
2453 if (!state)
2454 goto err_desc_state;
2455
2456 memset(state, 0, num * sizeof(struct vring_desc_state_packed));
2457
2458 extra = vring_alloc_desc_extra(num);
2459 if (!extra)
2460 goto err_desc_extra;
2461
2462 vring_packed->desc_state = state;
2463 vring_packed->desc_extra = extra;
2464
2465 return 0;
2466
2467err_desc_extra:
2468 kfree(state);
2469err_desc_state:
2470 return -ENOMEM;
2471}
2472
2473static void virtqueue_vring_init_packed(struct vring_virtqueue_packed *vring_packed,
2474 bool callback)
2475{
2476 vring_packed->next_avail_idx = 0;
2477 vring_packed->avail_wrap_counter = 1;
2478 vring_packed->event_flags_shadow = 0;
2479 vring_packed->avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;
2480
2481 /* No callback? Tell other side not to bother us. */
2482 if (!callback) {
2483 vring_packed->event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
2484 vring_packed->vring.driver->flags =
2485 cpu_to_le16(vring_packed->event_flags_shadow);
2486 }
2487}
2488
2489static void virtqueue_vring_attach_packed(struct vring_virtqueue *vq,
2490 struct vring_virtqueue_packed *vring_packed)
2491{
2492 vq->packed = *vring_packed;
2493
2494 if (virtqueue_is_in_order(vq)) {
2495 vq->batch_last.id = UINT_MAX;
2496 } else {
2497 /*
2498 * Put everything in free lists. Note that
2499 * next_avail_idx is sufficient with IN_ORDER so
2500 * free_head is unused.
2501 */
2502 vq->free_head = 0;
2503 }
2504}
2505static void virtqueue_reset_packed(struct vring_virtqueue *vq)
2506{
2507 memset(vq->packed.vring.device, 0, vq->packed.event_size_in_bytes);
2508 memset(vq->packed.vring.driver, 0, vq->packed.event_size_in_bytes);
2509
2510 /* we need to reset the desc.flags. For more, see is_used_desc_packed() */
2511 memset(vq->packed.vring.desc, 0, vq->packed.ring_size_in_bytes);
2512 virtqueue_init(vq, vq->packed.vring.num);
2513 virtqueue_vring_init_packed(&vq->packed, !!vq->vq.callback);
2514}
2515
2516static const struct virtqueue_ops packed_ops;
2517
2518static struct virtqueue *__vring_new_virtqueue_packed(unsigned int index,
2519 struct vring_virtqueue_packed *vring_packed,
2520 struct virtio_device *vdev,
2521 bool weak_barriers,
2522 bool context,
2523 bool (*notify)(struct virtqueue *),
2524 void (*callback)(struct virtqueue *),
2525 const char *name,
2526 union virtio_map map)
2527{
2528 struct vring_virtqueue *vq;
2529 int err;
2530
2531 vq = kmalloc_obj(*vq);
2532 if (!vq)
2533 return NULL;
2534
2535 vq->vq.callback = callback;
2536 vq->vq.vdev = vdev;
2537 vq->vq.name = name;
2538 vq->vq.index = index;
2539 vq->vq.reset = false;
2540 vq->we_own_ring = false;
2541 vq->notify = notify;
2542 vq->weak_barriers = weak_barriers;
2543#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
2544 vq->broken = true;
2545#else
2546 vq->broken = false;
2547#endif
2548 vq->map = map;
2549 vq->use_map_api = vring_use_map_api(vdev);
2550
2551 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2552 !context;
2553 vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
2554 vq->layout = virtio_has_feature(vdev, VIRTIO_F_IN_ORDER) ?
2555 VQ_LAYOUT_PACKED_IN_ORDER : VQ_LAYOUT_PACKED;
2556
2557 if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2558 vq->weak_barriers = false;
2559
2560 err = vring_alloc_state_extra_packed(vring_packed);
2561 if (err) {
2562 kfree(vq);
2563 return NULL;
2564 }
2565
2566 virtqueue_vring_init_packed(vring_packed, !!callback);
2567
2568 virtqueue_init(vq, vring_packed->vring.num);
2569 virtqueue_vring_attach_packed(vq, vring_packed);
2570
2571 spin_lock(&vdev->vqs_list_lock);
2572 list_add_tail(&vq->vq.list, &vdev->vqs);
2573 spin_unlock(&vdev->vqs_list_lock);
2574 return &vq->vq;
2575}
2576
2577static struct virtqueue *vring_create_virtqueue_packed(
2578 unsigned int index,
2579 unsigned int num,
2580 unsigned int vring_align,
2581 struct virtio_device *vdev,
2582 bool weak_barriers,
2583 bool may_reduce_num,
2584 bool context,
2585 bool (*notify)(struct virtqueue *),
2586 void (*callback)(struct virtqueue *),
2587 const char *name,
2588 union virtio_map map)
2589{
2590 struct vring_virtqueue_packed vring_packed = {};
2591 struct virtqueue *vq;
2592
2593 if (vring_alloc_queue_packed(&vring_packed, vdev, num, map))
2594 return NULL;
2595
2596 vq = __vring_new_virtqueue_packed(index, &vring_packed, vdev, weak_barriers,
2597 context, notify, callback, name, map);
2598 if (!vq) {
2599 vring_free_packed(&vring_packed, vdev, map);
2600 return NULL;
2601 }
2602
2603 to_vvq(vq)->we_own_ring = true;
2604
2605 return vq;
2606}
2607
2608static int virtqueue_resize_packed(struct vring_virtqueue *vq, u32 num)
2609{
2610 struct vring_virtqueue_packed vring_packed = {};
2611 struct virtio_device *vdev = vq->vq.vdev;
2612 int err;
2613
2614 if (vring_alloc_queue_packed(&vring_packed, vdev, num, vq->map))
2615 goto err_ring;
2616
2617 err = vring_alloc_state_extra_packed(&vring_packed);
2618 if (err)
2619 goto err_state_extra;
2620
2621 vring_free(&vq->vq);
2622
2623 virtqueue_vring_init_packed(&vring_packed, !!vq->vq.callback);
2624
2625 virtqueue_init(vq, vring_packed.vring.num);
2626 virtqueue_vring_attach_packed(vq, &vring_packed);
2627
2628 return 0;
2629
2630err_state_extra:
2631 vring_free_packed(&vring_packed, vdev, vq->map);
2632err_ring:
2633 virtqueue_reset_packed(vq);
2634 return -ENOMEM;
2635}
2636
2637static const struct virtqueue_ops split_ops = {
2638 .add = virtqueue_add_split,
2639 .get = virtqueue_get_buf_ctx_split,
2640 .kick_prepare = virtqueue_kick_prepare_split,
2641 .disable_cb = virtqueue_disable_cb_split,
2642 .enable_cb_delayed = virtqueue_enable_cb_delayed_split,
2643 .enable_cb_prepare = virtqueue_enable_cb_prepare_split,
2644 .poll = virtqueue_poll_split,
2645 .detach_unused_buf = virtqueue_detach_unused_buf_split,
2646 .more_used = more_used_split,
2647 .resize = virtqueue_resize_split,
2648 .reset = virtqueue_reset_split,
2649};
2650
2651static const struct virtqueue_ops packed_ops = {
2652 .add = virtqueue_add_packed,
2653 .get = virtqueue_get_buf_ctx_packed,
2654 .kick_prepare = virtqueue_kick_prepare_packed,
2655 .disable_cb = virtqueue_disable_cb_packed,
2656 .enable_cb_delayed = virtqueue_enable_cb_delayed_packed,
2657 .enable_cb_prepare = virtqueue_enable_cb_prepare_packed,
2658 .poll = virtqueue_poll_packed,
2659 .detach_unused_buf = virtqueue_detach_unused_buf_packed,
2660 .more_used = more_used_packed,
2661 .resize = virtqueue_resize_packed,
2662 .reset = virtqueue_reset_packed,
2663};
2664
2665static const struct virtqueue_ops split_in_order_ops = {
2666 .add = virtqueue_add_split,
2667 .get = virtqueue_get_buf_ctx_split_in_order,
2668 .kick_prepare = virtqueue_kick_prepare_split,
2669 .disable_cb = virtqueue_disable_cb_split,
2670 .enable_cb_delayed = virtqueue_enable_cb_delayed_split,
2671 .enable_cb_prepare = virtqueue_enable_cb_prepare_split,
2672 .poll = virtqueue_poll_split,
2673 .detach_unused_buf = virtqueue_detach_unused_buf_split,
2674 .more_used = more_used_split_in_order,
2675 .resize = virtqueue_resize_split,
2676 .reset = virtqueue_reset_split,
2677};
2678
2679static const struct virtqueue_ops packed_in_order_ops = {
2680 .add = virtqueue_add_packed_in_order,
2681 .get = virtqueue_get_buf_ctx_packed_in_order,
2682 .kick_prepare = virtqueue_kick_prepare_packed,
2683 .disable_cb = virtqueue_disable_cb_packed,
2684 .enable_cb_delayed = virtqueue_enable_cb_delayed_packed,
2685 .enable_cb_prepare = virtqueue_enable_cb_prepare_packed,
2686 .poll = virtqueue_poll_packed,
2687 .detach_unused_buf = virtqueue_detach_unused_buf_packed,
2688 .more_used = more_used_packed_in_order,
2689 .resize = virtqueue_resize_packed,
2690 .reset = virtqueue_reset_packed,
2691};
2692
2693static int virtqueue_disable_and_recycle(struct virtqueue *_vq,
2694 void (*recycle)(struct virtqueue *vq, void *buf))
2695{
2696 struct vring_virtqueue *vq = to_vvq(_vq);
2697 struct virtio_device *vdev = vq->vq.vdev;
2698 void *buf;
2699 int err;
2700
2701 if (!vq->we_own_ring)
2702 return -EPERM;
2703
2704 if (!vdev->config->disable_vq_and_reset)
2705 return -ENOENT;
2706
2707 if (!vdev->config->enable_vq_after_reset)
2708 return -ENOENT;
2709
2710 err = vdev->config->disable_vq_and_reset(_vq);
2711 if (err)
2712 return err;
2713
2714 while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL)
2715 recycle(_vq, buf);
2716
2717 return 0;
2718}
2719
2720static int virtqueue_enable_after_reset(struct virtqueue *_vq)
2721{
2722 struct vring_virtqueue *vq = to_vvq(_vq);
2723 struct virtio_device *vdev = vq->vq.vdev;
2724
2725 if (vdev->config->enable_vq_after_reset(_vq))
2726 return -EBUSY;
2727
2728 return 0;
2729}
2730
2731/*
2732 * Generic functions and exported symbols.
2733 */
2734
2735#define VIRTQUEUE_CALL(vq, op, ...) \
2736 ({ \
2737 typeof(vq) __VIRTQUEUE_CALL_vq = (vq); \
2738 typeof(split_ops.op(__VIRTQUEUE_CALL_vq, ##__VA_ARGS__)) ret; \
2739 \
2740 switch (__VIRTQUEUE_CALL_vq->layout) { \
2741 case VQ_LAYOUT_SPLIT: \
2742 ret = split_ops.op(__VIRTQUEUE_CALL_vq, ##__VA_ARGS__); \
2743 break; \
2744 case VQ_LAYOUT_PACKED: \
2745 ret = packed_ops.op(__VIRTQUEUE_CALL_vq, ##__VA_ARGS__);\
2746 break; \
2747 case VQ_LAYOUT_SPLIT_IN_ORDER: \
2748 ret = split_in_order_ops.op(vq, ##__VA_ARGS__); \
2749 break; \
2750 case VQ_LAYOUT_PACKED_IN_ORDER: \
2751 ret = packed_in_order_ops.op(vq, ##__VA_ARGS__); \
2752 break; \
2753 default: \
2754 BUG(); \
2755 break; \
2756 } \
2757 ret; \
2758})
2759
2760#define VOID_VIRTQUEUE_CALL(vq, op, ...) \
2761 ({ \
2762 typeof(vq) __VIRTQUEUE_CALL_vq = (vq); \
2763 \
2764 switch (__VIRTQUEUE_CALL_vq->layout) { \
2765 case VQ_LAYOUT_SPLIT: \
2766 split_ops.op(__VIRTQUEUE_CALL_vq, ##__VA_ARGS__); \
2767 break; \
2768 case VQ_LAYOUT_PACKED: \
2769 packed_ops.op(__VIRTQUEUE_CALL_vq, ##__VA_ARGS__); \
2770 break; \
2771 case VQ_LAYOUT_SPLIT_IN_ORDER: \
2772 split_in_order_ops.op(vq, ##__VA_ARGS__); \
2773 break; \
2774 case VQ_LAYOUT_PACKED_IN_ORDER: \
2775 packed_in_order_ops.op(vq, ##__VA_ARGS__); \
2776 break; \
2777 default: \
2778 BUG(); \
2779 break; \
2780 } \
2781})
2782
2783static inline int virtqueue_add(struct virtqueue *_vq,
2784 struct scatterlist *sgs[],
2785 unsigned int total_sg,
2786 unsigned int out_sgs,
2787 unsigned int in_sgs,
2788 void *data,
2789 void *ctx,
2790 bool premapped,
2791 gfp_t gfp,
2792 unsigned long attr)
2793{
2794 struct vring_virtqueue *vq = to_vvq(_vq);
2795
2796 return VIRTQUEUE_CALL(vq, add, sgs, total_sg,
2797 out_sgs, in_sgs, data,
2798 ctx, premapped, gfp, attr);
2799}
2800
2801/**
2802 * virtqueue_add_sgs - expose buffers to other end
2803 * @_vq: the struct virtqueue we're talking about.
2804 * @sgs: array of terminated scatterlists.
2805 * @out_sgs: the number of scatterlists readable by other side
2806 * @in_sgs: the number of scatterlists which are writable (after readable ones)
2807 * @data: the token identifying the buffer.
2808 * @gfp: how to do memory allocations (if necessary).
2809 *
2810 * Caller must ensure we don't call this with other virtqueue operations
2811 * at the same time (except where noted).
2812 *
2813 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2814 *
2815 * NB: ENOSPC is a special code that is only returned on an attempt to add a
2816 * buffer to a full VQ. It indicates that some buffers are outstanding and that
2817 * the operation can be retried after some buffers have been used.
2818 */
2819int virtqueue_add_sgs(struct virtqueue *_vq,
2820 struct scatterlist *sgs[],
2821 unsigned int out_sgs,
2822 unsigned int in_sgs,
2823 void *data,
2824 gfp_t gfp)
2825{
2826 unsigned int i, total_sg = 0;
2827
2828 /* Count them first. */
2829 for (i = 0; i < out_sgs + in_sgs; i++) {
2830 struct scatterlist *sg;
2831
2832 for (sg = sgs[i]; sg; sg = sg_next(sg))
2833 total_sg++;
2834 }
2835 return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs,
2836 data, NULL, false, gfp, 0);
2837}
2838EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
2839
2840/**
2841 * virtqueue_add_outbuf - expose output buffers to other end
2842 * @vq: the struct virtqueue we're talking about.
2843 * @sg: scatterlist (must be well-formed and terminated!)
2844 * @num: the number of entries in @sg readable by other side
2845 * @data: the token identifying the buffer.
2846 * @gfp: how to do memory allocations (if necessary).
2847 *
2848 * Caller must ensure we don't call this with other virtqueue operations
2849 * at the same time (except where noted).
2850 *
2851 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2852 */
2853int virtqueue_add_outbuf(struct virtqueue *vq,
2854 struct scatterlist *sg, unsigned int num,
2855 void *data,
2856 gfp_t gfp)
2857{
2858 return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, false, gfp, 0);
2859}
2860EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
2861
2862/**
2863 * virtqueue_add_outbuf_premapped - expose output buffers to other end
2864 * @vq: the struct virtqueue we're talking about.
2865 * @sg: scatterlist (must be well-formed and terminated!)
2866 * @num: the number of entries in @sg readable by other side
2867 * @data: the token identifying the buffer.
2868 * @gfp: how to do memory allocations (if necessary).
2869 *
2870 * Caller must ensure we don't call this with other virtqueue operations
2871 * at the same time (except where noted).
2872 *
2873 * Return:
2874 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2875 */
2876int virtqueue_add_outbuf_premapped(struct virtqueue *vq,
2877 struct scatterlist *sg, unsigned int num,
2878 void *data,
2879 gfp_t gfp)
2880{
2881 return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, true, gfp, 0);
2882}
2883EXPORT_SYMBOL_GPL(virtqueue_add_outbuf_premapped);
2884
2885/**
2886 * virtqueue_add_inbuf - expose input buffers to other end
2887 * @vq: the struct virtqueue we're talking about.
2888 * @sg: scatterlist (must be well-formed and terminated!)
2889 * @num: the number of entries in @sg writable by other side
2890 * @data: the token identifying the buffer.
2891 * @gfp: how to do memory allocations (if necessary).
2892 *
2893 * Caller must ensure we don't call this with other virtqueue operations
2894 * at the same time (except where noted).
2895 *
2896 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2897 */
2898int virtqueue_add_inbuf(struct virtqueue *vq,
2899 struct scatterlist *sg, unsigned int num,
2900 void *data,
2901 gfp_t gfp)
2902{
2903 return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, false, gfp, 0);
2904}
2905EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
2906
2907/**
2908 * virtqueue_add_inbuf_cache_clean - expose input buffers with cache clean
2909 * @vq: the struct virtqueue we're talking about.
2910 * @sg: scatterlist (must be well-formed and terminated!)
2911 * @num: the number of entries in @sg writable by other side
2912 * @data: the token identifying the buffer.
2913 * @gfp: how to do memory allocations (if necessary).
2914 *
2915 * Same as virtqueue_add_inbuf but passes DMA_ATTR_DEBUGGING_IGNORE_CACHELINES
2916 * to indicate that the CPU will not dirty any cacheline overlapping this buffer
2917 * while it is available, and to suppress overlapping cacheline warnings in DMA
2918 * debug builds.
2919 *
2920 * Caller must ensure we don't call this with other virtqueue operations
2921 * at the same time (except where noted).
2922 *
2923 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2924 */
2925int virtqueue_add_inbuf_cache_clean(struct virtqueue *vq,
2926 struct scatterlist *sg, unsigned int num,
2927 void *data,
2928 gfp_t gfp)
2929{
2930 return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, false, gfp,
2931 DMA_ATTR_DEBUGGING_IGNORE_CACHELINES);
2932}
2933EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_cache_clean);
2934
2935/**
2936 * virtqueue_add_inbuf_ctx - expose input buffers to other end
2937 * @vq: the struct virtqueue we're talking about.
2938 * @sg: scatterlist (must be well-formed and terminated!)
2939 * @num: the number of entries in @sg writable by other side
2940 * @data: the token identifying the buffer.
2941 * @ctx: extra context for the token
2942 * @gfp: how to do memory allocations (if necessary).
2943 *
2944 * Caller must ensure we don't call this with other virtqueue operations
2945 * at the same time (except where noted).
2946 *
2947 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2948 */
2949int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
2950 struct scatterlist *sg, unsigned int num,
2951 void *data,
2952 void *ctx,
2953 gfp_t gfp)
2954{
2955 return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, false, gfp, 0);
2956}
2957EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
2958
2959/**
2960 * virtqueue_add_inbuf_premapped - expose input buffers to other end
2961 * @vq: the struct virtqueue we're talking about.
2962 * @sg: scatterlist (must be well-formed and terminated!)
2963 * @num: the number of entries in @sg writable by other side
2964 * @data: the token identifying the buffer.
2965 * @ctx: extra context for the token
2966 * @gfp: how to do memory allocations (if necessary).
2967 *
2968 * Caller must ensure we don't call this with other virtqueue operations
2969 * at the same time (except where noted).
2970 *
2971 * Return:
2972 * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2973 */
2974int virtqueue_add_inbuf_premapped(struct virtqueue *vq,
2975 struct scatterlist *sg, unsigned int num,
2976 void *data,
2977 void *ctx,
2978 gfp_t gfp)
2979{
2980 return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, true, gfp, 0);
2981}
2982EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_premapped);
2983
2984/**
2985 * virtqueue_dma_dev - get the dma dev
2986 * @_vq: the struct virtqueue we're talking about.
2987 *
2988 * Returns the dma dev. That can been used for dma api.
2989 */
2990struct device *virtqueue_dma_dev(struct virtqueue *_vq)
2991{
2992 struct vring_virtqueue *vq = to_vvq(_vq);
2993
2994 if (vq->use_map_api && !_vq->vdev->map)
2995 return vq->map.dma_dev;
2996 else
2997 return NULL;
2998}
2999EXPORT_SYMBOL_GPL(virtqueue_dma_dev);
3000
3001/**
3002 * virtqueue_kick_prepare - first half of split virtqueue_kick call.
3003 * @_vq: the struct virtqueue
3004 *
3005 * Instead of virtqueue_kick(), you can do:
3006 * if (virtqueue_kick_prepare(vq))
3007 * virtqueue_notify(vq);
3008 *
3009 * This is sometimes useful because the virtqueue_kick_prepare() needs
3010 * to be serialized, but the actual virtqueue_notify() call does not.
3011 */
3012bool virtqueue_kick_prepare(struct virtqueue *_vq)
3013{
3014 struct vring_virtqueue *vq = to_vvq(_vq);
3015
3016 return VIRTQUEUE_CALL(vq, kick_prepare);
3017}
3018EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
3019
3020/**
3021 * virtqueue_notify - second half of split virtqueue_kick call.
3022 * @_vq: the struct virtqueue
3023 *
3024 * This does not need to be serialized.
3025 *
3026 * Returns false if host notify failed or queue is broken, otherwise true.
3027 */
3028bool virtqueue_notify(struct virtqueue *_vq)
3029{
3030 struct vring_virtqueue *vq = to_vvq(_vq);
3031
3032 if (unlikely(vq->broken))
3033 return false;
3034
3035 /* Prod other side to tell it about changes. */
3036 if (!vq->notify(_vq)) {
3037 vq->broken = true;
3038 return false;
3039 }
3040 return true;
3041}
3042EXPORT_SYMBOL_GPL(virtqueue_notify);
3043
3044/**
3045 * virtqueue_kick - update after add_buf
3046 * @vq: the struct virtqueue
3047 *
3048 * After one or more virtqueue_add_* calls, invoke this to kick
3049 * the other side.
3050 *
3051 * Caller must ensure we don't call this with other virtqueue
3052 * operations at the same time (except where noted).
3053 *
3054 * Returns false if kick failed, otherwise true.
3055 */
3056bool virtqueue_kick(struct virtqueue *vq)
3057{
3058 if (virtqueue_kick_prepare(vq))
3059 return virtqueue_notify(vq);
3060 return true;
3061}
3062EXPORT_SYMBOL_GPL(virtqueue_kick);
3063
3064/**
3065 * virtqueue_get_buf_ctx - get the next used buffer
3066 * @_vq: the struct virtqueue we're talking about.
3067 * @len: the length written into the buffer
3068 * @ctx: extra context for the token
3069 *
3070 * If the device wrote data into the buffer, @len will be set to the
3071 * amount written. This means you don't need to clear the buffer
3072 * beforehand to ensure there's no data leakage in the case of short
3073 * writes.
3074 *
3075 * Caller must ensure we don't call this with other virtqueue
3076 * operations at the same time (except where noted).
3077 *
3078 * Returns NULL if there are no used buffers, or the "data" token
3079 * handed to virtqueue_add_*().
3080 */
3081void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
3082 void **ctx)
3083{
3084 struct vring_virtqueue *vq = to_vvq(_vq);
3085
3086 return VIRTQUEUE_CALL(vq, get, len, ctx);
3087}
3088EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
3089
3090void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
3091{
3092 return virtqueue_get_buf_ctx(_vq, len, NULL);
3093}
3094EXPORT_SYMBOL_GPL(virtqueue_get_buf);
3095/**
3096 * virtqueue_disable_cb - disable callbacks
3097 * @_vq: the struct virtqueue we're talking about.
3098 *
3099 * Note that this is not necessarily synchronous, hence unreliable and only
3100 * useful as an optimization.
3101 *
3102 * Unlike other operations, this need not be serialized.
3103 */
3104void virtqueue_disable_cb(struct virtqueue *_vq)
3105{
3106 struct vring_virtqueue *vq = to_vvq(_vq);
3107
3108 VOID_VIRTQUEUE_CALL(vq, disable_cb);
3109}
3110EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
3111
3112/**
3113 * virtqueue_enable_cb_prepare - restart callbacks after disable_cb
3114 * @_vq: the struct virtqueue we're talking about.
3115 *
3116 * This re-enables callbacks; it returns current queue state
3117 * in an opaque unsigned value. This value should be later tested by
3118 * virtqueue_poll, to detect a possible race between the driver checking for
3119 * more work, and enabling callbacks.
3120 *
3121 * Caller must ensure we don't call this with other virtqueue
3122 * operations at the same time (except where noted).
3123 */
3124unsigned int virtqueue_enable_cb_prepare(struct virtqueue *_vq)
3125{
3126 struct vring_virtqueue *vq = to_vvq(_vq);
3127
3128 if (vq->event_triggered)
3129 vq->event_triggered = false;
3130
3131 return VIRTQUEUE_CALL(vq, enable_cb_prepare);
3132}
3133EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
3134
3135/**
3136 * virtqueue_poll - query pending used buffers
3137 * @_vq: the struct virtqueue we're talking about.
3138 * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare).
3139 *
3140 * Returns "true" if there are pending used buffers in the queue.
3141 *
3142 * This does not need to be serialized.
3143 */
3144bool virtqueue_poll(struct virtqueue *_vq, unsigned int last_used_idx)
3145{
3146 struct vring_virtqueue *vq = to_vvq(_vq);
3147
3148 if (unlikely(vq->broken))
3149 return false;
3150
3151 virtio_mb(vq->weak_barriers);
3152
3153 return VIRTQUEUE_CALL(vq, poll, last_used_idx);
3154}
3155EXPORT_SYMBOL_GPL(virtqueue_poll);
3156
3157/**
3158 * virtqueue_enable_cb - restart callbacks after disable_cb.
3159 * @_vq: the struct virtqueue we're talking about.
3160 *
3161 * This re-enables callbacks; it returns "false" if there are pending
3162 * buffers in the queue, to detect a possible race between the driver
3163 * checking for more work, and enabling callbacks.
3164 *
3165 * Caller must ensure we don't call this with other virtqueue
3166 * operations at the same time (except where noted).
3167 */
3168bool virtqueue_enable_cb(struct virtqueue *_vq)
3169{
3170 unsigned int last_used_idx = virtqueue_enable_cb_prepare(_vq);
3171
3172 return !virtqueue_poll(_vq, last_used_idx);
3173}
3174EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
3175
3176/**
3177 * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
3178 * @_vq: the struct virtqueue we're talking about.
3179 *
3180 * This re-enables callbacks but hints to the other side to delay
3181 * interrupts until most of the available buffers have been processed;
3182 * it returns "false" if there are many pending buffers in the queue,
3183 * to detect a possible race between the driver checking for more work,
3184 * and enabling callbacks.
3185 *
3186 * Caller must ensure we don't call this with other virtqueue
3187 * operations at the same time (except where noted).
3188 */
3189bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
3190{
3191 struct vring_virtqueue *vq = to_vvq(_vq);
3192
3193 if (vq->event_triggered)
3194 data_race(vq->event_triggered = false);
3195
3196 return VIRTQUEUE_CALL(vq, enable_cb_delayed);
3197}
3198EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
3199
3200/**
3201 * virtqueue_detach_unused_buf - detach first unused buffer
3202 * @_vq: the struct virtqueue we're talking about.
3203 *
3204 * Returns NULL or the "data" token handed to virtqueue_add_*().
3205 * This is not valid on an active queue; it is useful for device
3206 * shutdown or the reset queue.
3207 */
3208void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
3209{
3210 struct vring_virtqueue *vq = to_vvq(_vq);
3211
3212 return VIRTQUEUE_CALL(vq, detach_unused_buf);
3213}
3214EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
3215
3216static inline bool more_used(const struct vring_virtqueue *vq)
3217{
3218 return VIRTQUEUE_CALL(vq, more_used);
3219}
3220
3221/**
3222 * vring_interrupt - notify a virtqueue on an interrupt
3223 * @irq: the IRQ number (ignored)
3224 * @_vq: the struct virtqueue to notify
3225 *
3226 * Calls the callback function of @_vq to process the virtqueue
3227 * notification.
3228 */
3229irqreturn_t vring_interrupt(int irq, void *_vq)
3230{
3231 struct vring_virtqueue *vq = to_vvq(_vq);
3232
3233 if (!more_used(vq)) {
3234 pr_debug("virtqueue interrupt with no work for %p\n", vq);
3235 return IRQ_NONE;
3236 }
3237
3238 if (unlikely(vq->broken)) {
3239#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
3240 dev_warn_once(&vq->vq.vdev->dev,
3241 "virtio vring IRQ raised before DRIVER_OK");
3242 return IRQ_NONE;
3243#else
3244 return IRQ_HANDLED;
3245#endif
3246 }
3247
3248 /* Just a hint for performance: so it's ok that this can be racy! */
3249 if (vq->event)
3250 data_race(vq->event_triggered = true);
3251
3252 pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
3253 if (vq->vq.callback)
3254 vq->vq.callback(&vq->vq);
3255
3256 return IRQ_HANDLED;
3257}
3258EXPORT_SYMBOL_GPL(vring_interrupt);
3259
3260struct virtqueue *vring_create_virtqueue(
3261 unsigned int index,
3262 unsigned int num,
3263 unsigned int vring_align,
3264 struct virtio_device *vdev,
3265 bool weak_barriers,
3266 bool may_reduce_num,
3267 bool context,
3268 bool (*notify)(struct virtqueue *),
3269 void (*callback)(struct virtqueue *),
3270 const char *name)
3271{
3272 union virtio_map map = {.dma_dev = vdev->dev.parent};
3273
3274 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
3275 return vring_create_virtqueue_packed(index, num, vring_align,
3276 vdev, weak_barriers, may_reduce_num,
3277 context, notify, callback, name, map);
3278
3279 return vring_create_virtqueue_split(index, num, vring_align,
3280 vdev, weak_barriers, may_reduce_num,
3281 context, notify, callback, name, map);
3282}
3283EXPORT_SYMBOL_GPL(vring_create_virtqueue);
3284
3285struct virtqueue *vring_create_virtqueue_map(
3286 unsigned int index,
3287 unsigned int num,
3288 unsigned int vring_align,
3289 struct virtio_device *vdev,
3290 bool weak_barriers,
3291 bool may_reduce_num,
3292 bool context,
3293 bool (*notify)(struct virtqueue *),
3294 void (*callback)(struct virtqueue *),
3295 const char *name,
3296 union virtio_map map)
3297{
3298
3299 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
3300 return vring_create_virtqueue_packed(index, num, vring_align,
3301 vdev, weak_barriers, may_reduce_num,
3302 context, notify, callback, name, map);
3303
3304 return vring_create_virtqueue_split(index, num, vring_align,
3305 vdev, weak_barriers, may_reduce_num,
3306 context, notify, callback, name, map);
3307}
3308EXPORT_SYMBOL_GPL(vring_create_virtqueue_map);
3309
3310/**
3311 * virtqueue_resize - resize the vring of vq
3312 * @_vq: the struct virtqueue we're talking about.
3313 * @num: new ring num
3314 * @recycle: callback to recycle unused buffers
3315 * @recycle_done: callback to be invoked when recycle for all unused buffers done
3316 *
3317 * When it is really necessary to create a new vring, it will set the current vq
3318 * into the reset state. Then call the passed callback to recycle the buffer
3319 * that is no longer used. Only after the new vring is successfully created, the
3320 * old vring will be released.
3321 *
3322 * Caller must ensure we don't call this with other virtqueue operations
3323 * at the same time (except where noted).
3324 *
3325 * Returns zero or a negative error.
3326 * 0: success.
3327 * -ENOMEM: Failed to allocate a new ring, fall back to the original ring size.
3328 * vq can still work normally
3329 * -EBUSY: Failed to sync with device, vq may not work properly
3330 * -ENOENT: Transport or device not supported
3331 * -E2BIG/-EINVAL: num error
3332 * -EPERM: Operation not permitted
3333 *
3334 */
3335int virtqueue_resize(struct virtqueue *_vq, u32 num,
3336 void (*recycle)(struct virtqueue *vq, void *buf),
3337 void (*recycle_done)(struct virtqueue *vq))
3338{
3339 struct vring_virtqueue *vq = to_vvq(_vq);
3340 int err, err_reset;
3341
3342 if (num > vq->vq.num_max)
3343 return -E2BIG;
3344
3345 if (!num)
3346 return -EINVAL;
3347
3348 if (virtqueue_get_vring_size(_vq) == num)
3349 return 0;
3350
3351 err = virtqueue_disable_and_recycle(_vq, recycle);
3352 if (err)
3353 return err;
3354 if (recycle_done)
3355 recycle_done(_vq);
3356
3357 err = VIRTQUEUE_CALL(vq, resize, num);
3358
3359 err_reset = virtqueue_enable_after_reset(_vq);
3360 if (err_reset)
3361 return err_reset;
3362
3363 return err;
3364}
3365EXPORT_SYMBOL_GPL(virtqueue_resize);
3366
3367/**
3368 * virtqueue_reset - detach and recycle all unused buffers
3369 * @_vq: the struct virtqueue we're talking about.
3370 * @recycle: callback to recycle unused buffers
3371 * @recycle_done: callback to be invoked when recycle for all unused buffers done
3372 *
3373 * Caller must ensure we don't call this with other virtqueue operations
3374 * at the same time (except where noted).
3375 *
3376 * Returns zero or a negative error.
3377 * 0: success.
3378 * -EBUSY: Failed to sync with device, vq may not work properly
3379 * -ENOENT: Transport or device not supported
3380 * -EPERM: Operation not permitted
3381 */
3382int virtqueue_reset(struct virtqueue *_vq,
3383 void (*recycle)(struct virtqueue *vq, void *buf),
3384 void (*recycle_done)(struct virtqueue *vq))
3385{
3386 struct vring_virtqueue *vq = to_vvq(_vq);
3387 int err;
3388
3389 err = virtqueue_disable_and_recycle(_vq, recycle);
3390 if (err)
3391 return err;
3392 if (recycle_done)
3393 recycle_done(_vq);
3394
3395 VOID_VIRTQUEUE_CALL(vq, reset);
3396
3397 return virtqueue_enable_after_reset(_vq);
3398}
3399EXPORT_SYMBOL_GPL(virtqueue_reset);
3400
3401struct virtqueue *vring_new_virtqueue(unsigned int index,
3402 unsigned int num,
3403 unsigned int vring_align,
3404 struct virtio_device *vdev,
3405 bool weak_barriers,
3406 bool context,
3407 void *pages,
3408 bool (*notify)(struct virtqueue *vq),
3409 void (*callback)(struct virtqueue *vq),
3410 const char *name)
3411{
3412 struct vring_virtqueue_split vring_split = {};
3413 union virtio_map map = {.dma_dev = vdev->dev.parent};
3414
3415 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3416 struct vring_virtqueue_packed vring_packed = {};
3417
3418 vring_packed.vring.num = num;
3419 vring_packed.vring.desc = pages;
3420 return __vring_new_virtqueue_packed(index, &vring_packed,
3421 vdev, weak_barriers,
3422 context, notify, callback,
3423 name, map);
3424 }
3425
3426 vring_init(&vring_split.vring, num, pages, vring_align);
3427 return __vring_new_virtqueue_split(index, &vring_split, vdev, weak_barriers,
3428 context, notify, callback, name,
3429 map);
3430}
3431EXPORT_SYMBOL_GPL(vring_new_virtqueue);
3432
3433static void vring_free(struct virtqueue *_vq)
3434{
3435 struct vring_virtqueue *vq = to_vvq(_vq);
3436
3437 if (vq->we_own_ring) {
3438 if (virtqueue_is_packed(vq)) {
3439 vring_free_queue(vq->vq.vdev,
3440 vq->packed.ring_size_in_bytes,
3441 vq->packed.vring.desc,
3442 vq->packed.ring_dma_addr,
3443 vq->map);
3444
3445 vring_free_queue(vq->vq.vdev,
3446 vq->packed.event_size_in_bytes,
3447 vq->packed.vring.driver,
3448 vq->packed.driver_event_dma_addr,
3449 vq->map);
3450
3451 vring_free_queue(vq->vq.vdev,
3452 vq->packed.event_size_in_bytes,
3453 vq->packed.vring.device,
3454 vq->packed.device_event_dma_addr,
3455 vq->map);
3456
3457 kfree(vq->packed.desc_state);
3458 kfree(vq->packed.desc_extra);
3459 } else {
3460 vring_free_queue(vq->vq.vdev,
3461 vq->split.queue_size_in_bytes,
3462 vq->split.vring.desc,
3463 vq->split.queue_dma_addr,
3464 vq->map);
3465 }
3466 }
3467 if (!virtqueue_is_packed(vq)) {
3468 kfree(vq->split.desc_state);
3469 kfree(vq->split.desc_extra);
3470 }
3471}
3472
3473void vring_del_virtqueue(struct virtqueue *_vq)
3474{
3475 struct vring_virtqueue *vq = to_vvq(_vq);
3476
3477 spin_lock(&vq->vq.vdev->vqs_list_lock);
3478 list_del(&_vq->list);
3479 spin_unlock(&vq->vq.vdev->vqs_list_lock);
3480
3481 vring_free(_vq);
3482
3483 kfree(vq);
3484}
3485EXPORT_SYMBOL_GPL(vring_del_virtqueue);
3486
3487u32 vring_notification_data(struct virtqueue *_vq)
3488{
3489 struct vring_virtqueue *vq = to_vvq(_vq);
3490 u16 next;
3491
3492 if (virtqueue_is_packed(vq))
3493 next = (vq->packed.next_avail_idx &
3494 ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR))) |
3495 vq->packed.avail_wrap_counter <<
3496 VRING_PACKED_EVENT_F_WRAP_CTR;
3497 else
3498 next = vq->split.avail_idx_shadow;
3499
3500 return next << 16 | _vq->index;
3501}
3502EXPORT_SYMBOL_GPL(vring_notification_data);
3503
3504/* Manipulates transport-specific feature bits. */
3505void vring_transport_features(struct virtio_device *vdev)
3506{
3507 unsigned int i;
3508
3509 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
3510 switch (i) {
3511 case VIRTIO_RING_F_INDIRECT_DESC:
3512 break;
3513 case VIRTIO_RING_F_EVENT_IDX:
3514 break;
3515 case VIRTIO_F_VERSION_1:
3516 break;
3517 case VIRTIO_F_ACCESS_PLATFORM:
3518 break;
3519 case VIRTIO_F_RING_PACKED:
3520 break;
3521 case VIRTIO_F_ORDER_PLATFORM:
3522 break;
3523 case VIRTIO_F_NOTIFICATION_DATA:
3524 break;
3525 case VIRTIO_F_IN_ORDER:
3526 break;
3527 default:
3528 /* We don't understand this bit. */
3529 __virtio_clear_bit(vdev, i);
3530 }
3531 }
3532}
3533EXPORT_SYMBOL_GPL(vring_transport_features);
3534
3535/**
3536 * virtqueue_get_vring_size - return the size of the virtqueue's vring
3537 * @_vq: the struct virtqueue containing the vring of interest.
3538 *
3539 * Returns the size of the vring. This is mainly used for boasting to
3540 * userspace. Unlike other operations, this need not be serialized.
3541 */
3542unsigned int virtqueue_get_vring_size(const struct virtqueue *_vq)
3543{
3544
3545 const struct vring_virtqueue *vq = to_vvq(_vq);
3546
3547 return virtqueue_is_packed(vq) ? vq->packed.vring.num :
3548 vq->split.vring.num;
3549}
3550EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
3551
3552/*
3553 * This function should only be called by the core, not directly by the driver.
3554 */
3555void __virtqueue_break(struct virtqueue *_vq)
3556{
3557 struct vring_virtqueue *vq = to_vvq(_vq);
3558
3559 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
3560 WRITE_ONCE(vq->broken, true);
3561}
3562EXPORT_SYMBOL_GPL(__virtqueue_break);
3563
3564/*
3565 * This function should only be called by the core, not directly by the driver.
3566 */
3567void __virtqueue_unbreak(struct virtqueue *_vq)
3568{
3569 struct vring_virtqueue *vq = to_vvq(_vq);
3570
3571 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
3572 WRITE_ONCE(vq->broken, false);
3573}
3574EXPORT_SYMBOL_GPL(__virtqueue_unbreak);
3575
3576bool virtqueue_is_broken(const struct virtqueue *_vq)
3577{
3578 const struct vring_virtqueue *vq = to_vvq(_vq);
3579
3580 return READ_ONCE(vq->broken);
3581}
3582EXPORT_SYMBOL_GPL(virtqueue_is_broken);
3583
3584/*
3585 * This should prevent the device from being used, allowing drivers to
3586 * recover. You may need to grab appropriate locks to flush.
3587 */
3588void virtio_break_device(struct virtio_device *dev)
3589{
3590 struct virtqueue *_vq;
3591
3592 spin_lock(&dev->vqs_list_lock);
3593 list_for_each_entry(_vq, &dev->vqs, list) {
3594 struct vring_virtqueue *vq = to_vvq(_vq);
3595
3596 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
3597 WRITE_ONCE(vq->broken, true);
3598 }
3599 spin_unlock(&dev->vqs_list_lock);
3600}
3601EXPORT_SYMBOL_GPL(virtio_break_device);
3602
3603/*
3604 * This should allow the device to be used by the driver. You may
3605 * need to grab appropriate locks to flush the write to
3606 * vq->broken. This should only be used in some specific case e.g
3607 * (probing and restoring). This function should only be called by the
3608 * core, not directly by the driver.
3609 */
3610void __virtio_unbreak_device(struct virtio_device *dev)
3611{
3612 struct virtqueue *_vq;
3613
3614 spin_lock(&dev->vqs_list_lock);
3615 list_for_each_entry(_vq, &dev->vqs, list) {
3616 struct vring_virtqueue *vq = to_vvq(_vq);
3617
3618 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
3619 WRITE_ONCE(vq->broken, false);
3620 }
3621 spin_unlock(&dev->vqs_list_lock);
3622}
3623EXPORT_SYMBOL_GPL(__virtio_unbreak_device);
3624
3625dma_addr_t virtqueue_get_desc_addr(const struct virtqueue *_vq)
3626{
3627 const struct vring_virtqueue *vq = to_vvq(_vq);
3628
3629 BUG_ON(!vq->we_own_ring);
3630
3631 if (virtqueue_is_packed(vq))
3632 return vq->packed.ring_dma_addr;
3633
3634 return vq->split.queue_dma_addr;
3635}
3636EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
3637
3638dma_addr_t virtqueue_get_avail_addr(const struct virtqueue *_vq)
3639{
3640 const struct vring_virtqueue *vq = to_vvq(_vq);
3641
3642 BUG_ON(!vq->we_own_ring);
3643
3644 if (virtqueue_is_packed(vq))
3645 return vq->packed.driver_event_dma_addr;
3646
3647 return vq->split.queue_dma_addr +
3648 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc);
3649}
3650EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
3651
3652dma_addr_t virtqueue_get_used_addr(const struct virtqueue *_vq)
3653{
3654 const struct vring_virtqueue *vq = to_vvq(_vq);
3655
3656 BUG_ON(!vq->we_own_ring);
3657
3658 if (virtqueue_is_packed(vq))
3659 return vq->packed.device_event_dma_addr;
3660
3661 return vq->split.queue_dma_addr +
3662 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc);
3663}
3664EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
3665
3666/* Only available for split ring */
3667const struct vring *virtqueue_get_vring(const struct virtqueue *vq)
3668{
3669 return &to_vvq(vq)->split.vring;
3670}
3671EXPORT_SYMBOL_GPL(virtqueue_get_vring);
3672
3673/**
3674 * virtqueue_map_alloc_coherent - alloc coherent mapping
3675 * @vdev: the virtio device we are talking to
3676 * @map: metadata for performing mapping
3677 * @size: the size of the buffer
3678 * @map_handle: the pointer to the mapped address
3679 * @gfp: allocation flag (GFP_XXX)
3680 *
3681 * return virtual address or NULL on error
3682 */
3683void *virtqueue_map_alloc_coherent(struct virtio_device *vdev,
3684 union virtio_map map,
3685 size_t size, dma_addr_t *map_handle,
3686 gfp_t gfp)
3687{
3688 if (vdev->map)
3689 return vdev->map->alloc(map, size,
3690 map_handle, gfp);
3691 else
3692 return dma_alloc_coherent(map.dma_dev, size,
3693 map_handle, gfp);
3694}
3695EXPORT_SYMBOL_GPL(virtqueue_map_alloc_coherent);
3696
3697/**
3698 * virtqueue_map_free_coherent - free coherent mapping
3699 * @vdev: the virtio device we are talking to
3700 * @map: metadata for performing mapping
3701 * @size: the size of the buffer
3702 * @vaddr: the virtual address that needs to be freed
3703 * @map_handle: the mapped address that needs to be freed
3704 *
3705 */
3706void virtqueue_map_free_coherent(struct virtio_device *vdev,
3707 union virtio_map map, size_t size, void *vaddr,
3708 dma_addr_t map_handle)
3709{
3710 if (vdev->map)
3711 vdev->map->free(map, size, vaddr,
3712 map_handle, 0);
3713 else
3714 dma_free_coherent(map.dma_dev, size, vaddr, map_handle);
3715}
3716EXPORT_SYMBOL_GPL(virtqueue_map_free_coherent);
3717
3718/**
3719 * virtqueue_map_page_attrs - map a page to the device
3720 * @_vq: the virtqueue we are talking to
3721 * @page: the page that will be mapped by the device
3722 * @offset: the offset in the page for a buffer
3723 * @size: the buffer size
3724 * @dir: mapping direction
3725 * @attrs: mapping attributes
3726 *
3727 * Returns mapped address. Caller should check that by virtqueue_map_mapping_error().
3728 */
3729dma_addr_t virtqueue_map_page_attrs(const struct virtqueue *_vq,
3730 struct page *page,
3731 unsigned long offset,
3732 size_t size,
3733 enum dma_data_direction dir,
3734 unsigned long attrs)
3735{
3736 const struct vring_virtqueue *vq = to_vvq(_vq);
3737 struct virtio_device *vdev = _vq->vdev;
3738
3739 if (vdev->map)
3740 return vdev->map->map_page(vq->map,
3741 page, offset, size,
3742 dir, attrs);
3743
3744 return dma_map_page_attrs(vring_dma_dev(vq),
3745 page, offset, size,
3746 dir, attrs);
3747}
3748EXPORT_SYMBOL_GPL(virtqueue_map_page_attrs);
3749
3750/**
3751 * virtqueue_unmap_page_attrs - map a page to the device
3752 * @_vq: the virtqueue we are talking to
3753 * @map_handle: the mapped address
3754 * @size: the buffer size
3755 * @dir: mapping direction
3756 * @attrs: unmapping attributes
3757 */
3758void virtqueue_unmap_page_attrs(const struct virtqueue *_vq,
3759 dma_addr_t map_handle,
3760 size_t size, enum dma_data_direction dir,
3761 unsigned long attrs)
3762{
3763 const struct vring_virtqueue *vq = to_vvq(_vq);
3764 struct virtio_device *vdev = _vq->vdev;
3765
3766 if (vdev->map)
3767 vdev->map->unmap_page(vq->map,
3768 map_handle, size, dir, attrs);
3769 else
3770 dma_unmap_page_attrs(vring_dma_dev(vq), map_handle,
3771 size, dir, attrs);
3772}
3773EXPORT_SYMBOL_GPL(virtqueue_unmap_page_attrs);
3774
3775/**
3776 * virtqueue_map_single_attrs - map DMA for _vq
3777 * @_vq: the struct virtqueue we're talking about.
3778 * @ptr: the pointer of the buffer to do dma
3779 * @size: the size of the buffer to do dma
3780 * @dir: DMA direction
3781 * @attrs: DMA Attrs
3782 *
3783 * The caller calls this to do dma mapping in advance. The DMA address can be
3784 * passed to this _vq when it is in pre-mapped mode.
3785 *
3786 * return mapped address. Caller should check that by virtqueue_map_mapping_error().
3787 */
3788dma_addr_t virtqueue_map_single_attrs(const struct virtqueue *_vq, void *ptr,
3789 size_t size,
3790 enum dma_data_direction dir,
3791 unsigned long attrs)
3792{
3793 const struct vring_virtqueue *vq = to_vvq(_vq);
3794
3795 if (!vq->use_map_api) {
3796 kmsan_handle_dma(virt_to_phys(ptr), size, dir);
3797 return (dma_addr_t)virt_to_phys(ptr);
3798 }
3799
3800 /* DMA must never operate on areas that might be remapped. */
3801 if (dev_WARN_ONCE(&_vq->vdev->dev, is_vmalloc_addr(ptr),
3802 "rejecting DMA map of vmalloc memory\n"))
3803 return DMA_MAPPING_ERROR;
3804
3805 return virtqueue_map_page_attrs(&vq->vq, virt_to_page(ptr),
3806 offset_in_page(ptr), size, dir, attrs);
3807}
3808EXPORT_SYMBOL_GPL(virtqueue_map_single_attrs);
3809
3810/**
3811 * virtqueue_unmap_single_attrs - unmap map for _vq
3812 * @_vq: the struct virtqueue we're talking about.
3813 * @addr: the dma address to unmap
3814 * @size: the size of the buffer
3815 * @dir: DMA direction
3816 * @attrs: DMA Attrs
3817 *
3818 * Unmap the address that is mapped by the virtqueue_map_* APIs.
3819 *
3820 */
3821void virtqueue_unmap_single_attrs(const struct virtqueue *_vq,
3822 dma_addr_t addr,
3823 size_t size, enum dma_data_direction dir,
3824 unsigned long attrs)
3825{
3826 const struct vring_virtqueue *vq = to_vvq(_vq);
3827
3828 if (!vq->use_map_api)
3829 return;
3830
3831 virtqueue_unmap_page_attrs(_vq, addr, size, dir, attrs);
3832}
3833EXPORT_SYMBOL_GPL(virtqueue_unmap_single_attrs);
3834
3835/**
3836 * virtqueue_map_mapping_error - check dma address
3837 * @_vq: the struct virtqueue we're talking about.
3838 * @addr: DMA address
3839 *
3840 * Returns 0 means dma valid. Other means invalid dma address.
3841 */
3842int virtqueue_map_mapping_error(const struct virtqueue *_vq, dma_addr_t addr)
3843{
3844 const struct vring_virtqueue *vq = to_vvq(_vq);
3845
3846 return vring_mapping_error(vq, addr);
3847}
3848EXPORT_SYMBOL_GPL(virtqueue_map_mapping_error);
3849
3850/**
3851 * virtqueue_map_need_sync - check a dma address needs sync
3852 * @_vq: the struct virtqueue we're talking about.
3853 * @addr: DMA address
3854 *
3855 * Check if the dma address mapped by the virtqueue_map_* APIs needs to be
3856 * synchronized
3857 *
3858 * return bool
3859 */
3860bool virtqueue_map_need_sync(const struct virtqueue *_vq, dma_addr_t addr)
3861{
3862 const struct vring_virtqueue *vq = to_vvq(_vq);
3863 struct virtio_device *vdev = _vq->vdev;
3864
3865 if (!vq->use_map_api)
3866 return false;
3867
3868 if (vdev->map)
3869 return vdev->map->need_sync(vq->map, addr);
3870 else
3871 return dma_need_sync(vring_dma_dev(vq), addr);
3872}
3873EXPORT_SYMBOL_GPL(virtqueue_map_need_sync);
3874
3875/**
3876 * virtqueue_map_sync_single_range_for_cpu - map sync for cpu
3877 * @_vq: the struct virtqueue we're talking about.
3878 * @addr: DMA address
3879 * @offset: DMA address offset
3880 * @size: buf size for sync
3881 * @dir: DMA direction
3882 *
3883 * Before calling this function, use virtqueue_map_need_sync() to confirm that
3884 * the DMA address really needs to be synchronized
3885 *
3886 */
3887void virtqueue_map_sync_single_range_for_cpu(const struct virtqueue *_vq,
3888 dma_addr_t addr,
3889 unsigned long offset, size_t size,
3890 enum dma_data_direction dir)
3891{
3892 const struct vring_virtqueue *vq = to_vvq(_vq);
3893 struct virtio_device *vdev = _vq->vdev;
3894
3895 if (!vq->use_map_api)
3896 return;
3897
3898 if (vdev->map)
3899 vdev->map->sync_single_for_cpu(vq->map,
3900 addr + offset, size, dir);
3901 else
3902 dma_sync_single_range_for_cpu(vring_dma_dev(vq),
3903 addr, offset, size, dir);
3904}
3905EXPORT_SYMBOL_GPL(virtqueue_map_sync_single_range_for_cpu);
3906
3907/**
3908 * virtqueue_map_sync_single_range_for_device - map sync for device
3909 * @_vq: the struct virtqueue we're talking about.
3910 * @addr: DMA address
3911 * @offset: DMA address offset
3912 * @size: buf size for sync
3913 * @dir: DMA direction
3914 *
3915 * Before calling this function, use virtqueue_map_need_sync() to confirm that
3916 * the DMA address really needs to be synchronized
3917 */
3918void virtqueue_map_sync_single_range_for_device(const struct virtqueue *_vq,
3919 dma_addr_t addr,
3920 unsigned long offset, size_t size,
3921 enum dma_data_direction dir)
3922{
3923 const struct vring_virtqueue *vq = to_vvq(_vq);
3924 struct virtio_device *vdev = _vq->vdev;
3925
3926 if (!vq->use_map_api)
3927 return;
3928
3929 if (vdev->map)
3930 vdev->map->sync_single_for_device(vq->map,
3931 addr + offset,
3932 size, dir);
3933 else
3934 dma_sync_single_range_for_device(vring_dma_dev(vq), addr,
3935 offset, size, dir);
3936}
3937EXPORT_SYMBOL_GPL(virtqueue_map_sync_single_range_for_device);
3938
3939MODULE_DESCRIPTION("Virtio ring implementation");
3940MODULE_LICENSE("GPL");