Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

vduse: add vq group asid support

Add support for assigning Address Space Identifiers (ASIDs) to each VQ
group. This enables mapping each group into a distinct memory space.

The vq group to ASID association is protected by a rwlock now. But the
mutex domain_lock keeps protecting the domains of all ASIDs, as some
operations like the one related with the bounce buffer size still
requires to lock all the ASIDs.

Acked-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Message-Id: <20260119143306.1818855-12-eperezma@redhat.com>

authored by

Eugenio Pérez and committed by
Michael S. Tsirkin
079212f6 f3dc3a8a

+315 -136
+252 -133
drivers/vdpa/vdpa_user/vduse_dev.c
··· 9 9 */ 10 10 11 11 #include "linux/virtio_net.h" 12 + #include <linux/cleanup.h> 12 13 #include <linux/init.h> 13 14 #include <linux/module.h> 14 15 #include <linux/cdev.h> ··· 42 41 43 42 #define VDUSE_DEV_MAX (1U << MINORBITS) 44 43 #define VDUSE_DEV_MAX_GROUPS 0xffff 44 + #define VDUSE_DEV_MAX_AS 0xffff 45 45 #define VDUSE_MAX_BOUNCE_SIZE (1024 * 1024 * 1024) 46 46 #define VDUSE_MIN_BOUNCE_SIZE (1024 * 1024) 47 47 #define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024) ··· 88 86 struct mm_struct *mm; 89 87 }; 90 88 89 + struct vduse_as { 90 + struct vduse_iova_domain *domain; 91 + struct vduse_umem *umem; 92 + struct mutex mem_lock; 93 + }; 94 + 91 95 struct vduse_vq_group { 96 + rwlock_t as_lock; 97 + struct vduse_as *as; /* Protected by as_lock */ 92 98 struct vduse_dev *dev; 93 99 }; 94 100 ··· 104 94 struct vduse_vdpa *vdev; 105 95 struct device *dev; 106 96 struct vduse_virtqueue **vqs; 107 - struct vduse_iova_domain *domain; 97 + struct vduse_as *as; 108 98 char *name; 109 99 struct mutex lock; 110 100 spinlock_t msg_lock; ··· 132 122 u32 vq_num; 133 123 u32 vq_align; 134 124 u32 ngroups; 135 - struct vduse_umem *umem; 125 + u32 nas; 136 126 struct vduse_vq_group *groups; 137 - struct mutex mem_lock; 138 127 unsigned int bounce_size; 139 128 struct mutex domain_lock; 140 129 }; ··· 323 314 return vduse_dev_msg_sync(dev, &msg); 324 315 } 325 316 326 - static int vduse_dev_update_iotlb(struct vduse_dev *dev, 317 + static int vduse_dev_update_iotlb(struct vduse_dev *dev, u32 asid, 327 318 u64 start, u64 last) 328 319 { 329 320 struct vduse_dev_msg msg = { 0 }; ··· 332 323 return -EINVAL; 333 324 334 325 msg.req.type = VDUSE_UPDATE_IOTLB; 335 - msg.req.iova.start = start; 336 - msg.req.iova.last = last; 326 + if (dev->api_version < VDUSE_API_VERSION_1) { 327 + msg.req.iova.start = start; 328 + msg.req.iova.last = last; 329 + } else { 330 + msg.req.iova_v2.start = start; 331 + msg.req.iova_v2.last = last; 332 + msg.req.iova_v2.asid = asid; 333 + } 337 334 338 335 return vduse_dev_msg_sync(dev, &msg); 339 336 } ··· 454 439 static void vduse_dev_reset(struct vduse_dev *dev) 455 440 { 456 441 int i; 457 - struct vduse_iova_domain *domain = dev->domain; 458 442 459 443 /* The coherent mappings are handled in vduse_dev_free_coherent() */ 460 - if (domain && domain->bounce_map) 461 - vduse_domain_reset_bounce_map(domain); 444 + for (i = 0; i < dev->nas; i++) { 445 + struct vduse_iova_domain *domain = dev->as[i].domain; 446 + 447 + if (domain && domain->bounce_map) 448 + vduse_domain_reset_bounce_map(domain); 449 + } 462 450 463 451 down_write(&dev->rwsem); 464 452 ··· 640 622 return ret; 641 623 } 642 624 625 + DEFINE_GUARD(vq_group_as_read_lock, struct vduse_vq_group *, 626 + if (_T->dev->nas > 1) 627 + read_lock(&_T->as_lock), 628 + if (_T->dev->nas > 1) 629 + read_unlock(&_T->as_lock)) 630 + 631 + DEFINE_GUARD(vq_group_as_write_lock, struct vduse_vq_group *, 632 + if (_T->dev->nas > 1) 633 + write_lock(&_T->as_lock), 634 + if (_T->dev->nas > 1) 635 + write_unlock(&_T->as_lock)) 636 + 637 + static int vduse_set_group_asid(struct vdpa_device *vdpa, unsigned int group, 638 + unsigned int asid) 639 + { 640 + struct vduse_dev *dev = vdpa_to_vduse(vdpa); 641 + struct vduse_dev_msg msg = { 0 }; 642 + int r; 643 + 644 + if (dev->api_version < VDUSE_API_VERSION_1) 645 + return -EINVAL; 646 + 647 + msg.req.type = VDUSE_SET_VQ_GROUP_ASID; 648 + msg.req.vq_group_asid.group = group; 649 + msg.req.vq_group_asid.asid = asid; 650 + 651 + r = vduse_dev_msg_sync(dev, &msg); 652 + if (r < 0) 653 + return r; 654 + 655 + guard(vq_group_as_write_lock)(&dev->groups[group]); 656 + dev->groups[group].as = &dev->as[asid]; 657 + 658 + return 0; 659 + } 660 + 643 661 static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx, 644 662 struct vdpa_vq_state *state) 645 663 { ··· 847 793 struct vduse_dev *dev = vdpa_to_vduse(vdpa); 848 794 int ret; 849 795 850 - ret = vduse_domain_set_map(dev->domain, iotlb); 796 + ret = vduse_domain_set_map(dev->as[asid].domain, iotlb); 851 797 if (ret) 852 798 return ret; 853 799 854 - ret = vduse_dev_update_iotlb(dev, 0ULL, ULLONG_MAX); 800 + ret = vduse_dev_update_iotlb(dev, asid, 0ULL, ULLONG_MAX); 855 801 if (ret) { 856 - vduse_domain_clear_map(dev->domain, iotlb); 802 + vduse_domain_clear_map(dev->as[asid].domain, iotlb); 857 803 return ret; 858 804 } 859 805 ··· 896 842 .get_vq_affinity = vduse_vdpa_get_vq_affinity, 897 843 .reset = vduse_vdpa_reset, 898 844 .set_map = vduse_vdpa_set_map, 845 + .set_group_asid = vduse_set_group_asid, 899 846 .get_vq_map = vduse_get_vq_map, 900 847 .free = vduse_vdpa_free, 901 848 }; ··· 905 850 dma_addr_t dma_addr, size_t size, 906 851 enum dma_data_direction dir) 907 852 { 908 - struct vduse_dev *vdev; 909 853 struct vduse_iova_domain *domain; 910 854 911 855 if (!token.group) 912 856 return; 913 857 914 - vdev = token.group->dev; 915 - domain = vdev->domain; 916 - 858 + guard(vq_group_as_read_lock)(token.group); 859 + domain = token.group->as->domain; 917 860 vduse_domain_sync_single_for_device(domain, dma_addr, size, dir); 918 861 } 919 862 ··· 919 866 dma_addr_t dma_addr, size_t size, 920 867 enum dma_data_direction dir) 921 868 { 922 - struct vduse_dev *vdev; 923 869 struct vduse_iova_domain *domain; 924 870 925 871 if (!token.group) 926 872 return; 927 873 928 - vdev = token.group->dev; 929 - domain = vdev->domain; 930 - 874 + guard(vq_group_as_read_lock)(token.group); 875 + domain = token.group->as->domain; 931 876 vduse_domain_sync_single_for_cpu(domain, dma_addr, size, dir); 932 877 } 933 878 ··· 934 883 enum dma_data_direction dir, 935 884 unsigned long attrs) 936 885 { 937 - struct vduse_dev *vdev; 938 886 struct vduse_iova_domain *domain; 939 887 940 888 if (!token.group) 941 889 return DMA_MAPPING_ERROR; 942 890 943 - vdev = token.group->dev; 944 - domain = vdev->domain; 945 - 891 + guard(vq_group_as_read_lock)(token.group); 892 + domain = token.group->as->domain; 946 893 return vduse_domain_map_page(domain, page, offset, size, dir, attrs); 947 894 } 948 895 ··· 948 899 size_t size, enum dma_data_direction dir, 949 900 unsigned long attrs) 950 901 { 951 - struct vduse_dev *vdev; 952 902 struct vduse_iova_domain *domain; 953 903 954 904 if (!token.group) 955 905 return; 956 906 957 - vdev = token.group->dev; 958 - domain = vdev->domain; 959 - 960 - return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs); 907 + guard(vq_group_as_read_lock)(token.group); 908 + domain = token.group->as->domain; 909 + vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs); 961 910 } 962 911 963 912 static void *vduse_dev_alloc_coherent(union virtio_map token, size_t size, 964 913 dma_addr_t *dma_addr, gfp_t flag) 965 914 { 966 - struct vduse_dev *vdev; 967 - struct vduse_iova_domain *domain; 968 915 void *addr; 969 916 970 917 *dma_addr = DMA_MAPPING_ERROR; ··· 971 926 if (!addr) 972 927 return NULL; 973 928 974 - vdev = token.group->dev; 975 - domain = vdev->domain; 976 - *dma_addr = vduse_domain_alloc_coherent(domain, size, addr); 977 - if (*dma_addr == DMA_MAPPING_ERROR) 978 - goto err; 929 + { 930 + struct vduse_iova_domain *domain; 931 + 932 + guard(vq_group_as_read_lock)(token.group); 933 + domain = token.group->as->domain; 934 + *dma_addr = vduse_domain_alloc_coherent(domain, size, addr); 935 + if (*dma_addr == DMA_MAPPING_ERROR) 936 + goto err; 937 + } 979 938 980 939 return addr; 981 940 ··· 992 943 void *vaddr, dma_addr_t dma_addr, 993 944 unsigned long attrs) 994 945 { 995 - struct vduse_dev *vdev; 996 - struct vduse_iova_domain *domain; 997 - 998 946 if (!token.group) 999 947 return; 1000 948 1001 - vdev = token.group->dev; 1002 - domain = vdev->domain; 949 + { 950 + struct vduse_iova_domain *domain; 1003 951 1004 - vduse_domain_free_coherent(domain, size, dma_addr, attrs); 952 + guard(vq_group_as_read_lock)(token.group); 953 + domain = token.group->as->domain; 954 + vduse_domain_free_coherent(domain, size, dma_addr, attrs); 955 + } 956 + 1005 957 free_pages_exact(vaddr, size); 1006 958 } 1007 959 1008 960 static bool vduse_dev_need_sync(union virtio_map token, dma_addr_t dma_addr) 1009 961 { 1010 - struct vduse_dev *vdev; 1011 - struct vduse_iova_domain *domain; 1012 - 1013 962 if (!token.group) 1014 963 return false; 1015 964 1016 - vdev = token.group->dev; 1017 - domain = vdev->domain; 1018 - 1019 - return dma_addr < domain->bounce_size; 965 + guard(vq_group_as_read_lock)(token.group); 966 + return dma_addr < token.group->as->domain->bounce_size; 1020 967 } 1021 968 1022 969 static int vduse_dev_mapping_error(union virtio_map token, dma_addr_t dma_addr) ··· 1024 979 1025 980 static size_t vduse_dev_max_mapping_size(union virtio_map token) 1026 981 { 1027 - struct vduse_dev *vdev; 1028 - struct vduse_iova_domain *domain; 1029 - 1030 982 if (!token.group) 1031 983 return 0; 1032 984 1033 - vdev = token.group->dev; 1034 - domain = vdev->domain; 1035 - 1036 - return domain->bounce_size; 985 + guard(vq_group_as_read_lock)(token.group); 986 + return token.group->as->domain->bounce_size; 1037 987 } 1038 988 1039 989 static const struct virtio_map_ops vduse_map_ops = { ··· 1168 1128 return ret; 1169 1129 } 1170 1130 1171 - static int vduse_dev_dereg_umem(struct vduse_dev *dev, 1131 + static int vduse_dev_dereg_umem(struct vduse_dev *dev, u32 asid, 1172 1132 u64 iova, u64 size) 1173 1133 { 1174 1134 int ret; 1175 1135 1176 - mutex_lock(&dev->mem_lock); 1136 + mutex_lock(&dev->as[asid].mem_lock); 1177 1137 ret = -ENOENT; 1178 - if (!dev->umem) 1138 + if (!dev->as[asid].umem) 1179 1139 goto unlock; 1180 1140 1181 1141 ret = -EINVAL; 1182 - if (!dev->domain) 1142 + if (!dev->as[asid].domain) 1183 1143 goto unlock; 1184 1144 1185 - if (dev->umem->iova != iova || size != dev->domain->bounce_size) 1145 + if (dev->as[asid].umem->iova != iova || 1146 + size != dev->as[asid].domain->bounce_size) 1186 1147 goto unlock; 1187 1148 1188 - vduse_domain_remove_user_bounce_pages(dev->domain); 1189 - unpin_user_pages_dirty_lock(dev->umem->pages, 1190 - dev->umem->npages, true); 1191 - atomic64_sub(dev->umem->npages, &dev->umem->mm->pinned_vm); 1192 - mmdrop(dev->umem->mm); 1193 - vfree(dev->umem->pages); 1194 - kfree(dev->umem); 1195 - dev->umem = NULL; 1149 + vduse_domain_remove_user_bounce_pages(dev->as[asid].domain); 1150 + unpin_user_pages_dirty_lock(dev->as[asid].umem->pages, 1151 + dev->as[asid].umem->npages, true); 1152 + atomic64_sub(dev->as[asid].umem->npages, &dev->as[asid].umem->mm->pinned_vm); 1153 + mmdrop(dev->as[asid].umem->mm); 1154 + vfree(dev->as[asid].umem->pages); 1155 + kfree(dev->as[asid].umem); 1156 + dev->as[asid].umem = NULL; 1196 1157 ret = 0; 1197 1158 unlock: 1198 - mutex_unlock(&dev->mem_lock); 1159 + mutex_unlock(&dev->as[asid].mem_lock); 1199 1160 return ret; 1200 1161 } 1201 1162 1202 1163 static int vduse_dev_reg_umem(struct vduse_dev *dev, 1203 - u64 iova, u64 uaddr, u64 size) 1164 + u32 asid, u64 iova, u64 uaddr, u64 size) 1204 1165 { 1205 1166 struct page **page_list = NULL; 1206 1167 struct vduse_umem *umem = NULL; ··· 1209 1168 unsigned long npages, lock_limit; 1210 1169 int ret; 1211 1170 1212 - if (!dev->domain || !dev->domain->bounce_map || 1213 - size != dev->domain->bounce_size || 1171 + if (!dev->as[asid].domain || !dev->as[asid].domain->bounce_map || 1172 + size != dev->as[asid].domain->bounce_size || 1214 1173 iova != 0 || uaddr & ~PAGE_MASK) 1215 1174 return -EINVAL; 1216 1175 1217 - mutex_lock(&dev->mem_lock); 1176 + mutex_lock(&dev->as[asid].mem_lock); 1218 1177 ret = -EEXIST; 1219 - if (dev->umem) 1178 + if (dev->as[asid].umem) 1220 1179 goto unlock; 1221 1180 1222 1181 ret = -ENOMEM; ··· 1240 1199 goto out; 1241 1200 } 1242 1201 1243 - ret = vduse_domain_add_user_bounce_pages(dev->domain, 1202 + ret = vduse_domain_add_user_bounce_pages(dev->as[asid].domain, 1244 1203 page_list, pinned); 1245 1204 if (ret) 1246 1205 goto out; ··· 1253 1212 umem->mm = current->mm; 1254 1213 mmgrab(current->mm); 1255 1214 1256 - dev->umem = umem; 1215 + dev->as[asid].umem = umem; 1257 1216 out: 1258 1217 if (ret && pinned > 0) 1259 1218 unpin_user_pages(page_list, pinned); ··· 1264 1223 vfree(page_list); 1265 1224 kfree(umem); 1266 1225 } 1267 - mutex_unlock(&dev->mem_lock); 1226 + mutex_unlock(&dev->as[asid].mem_lock); 1268 1227 return ret; 1269 1228 } 1270 1229 ··· 1285 1244 } 1286 1245 1287 1246 static int vduse_dev_iotlb_entry(struct vduse_dev *dev, 1288 - struct vduse_iotlb_entry *entry, 1247 + struct vduse_iotlb_entry_v2 *entry, 1289 1248 struct file **f, uint64_t *capability) 1290 1249 { 1250 + u32 asid; 1291 1251 int r = -EINVAL; 1292 1252 struct vhost_iotlb_map *map; 1293 1253 1294 - if (entry->start > entry->last) 1254 + if (entry->v1.start > entry->v1.last || entry->asid >= dev->nas) 1295 1255 return -EINVAL; 1296 1256 1257 + asid = array_index_nospec(entry->asid, dev->nas); 1297 1258 mutex_lock(&dev->domain_lock); 1298 - if (!dev->domain) 1259 + 1260 + if (!dev->as[asid].domain) 1299 1261 goto out; 1300 1262 1301 - spin_lock(&dev->domain->iotlb_lock); 1302 - map = vhost_iotlb_itree_first(dev->domain->iotlb, entry->start, 1303 - entry->last); 1263 + spin_lock(&dev->as[asid].domain->iotlb_lock); 1264 + map = vhost_iotlb_itree_first(dev->as[asid].domain->iotlb, 1265 + entry->v1.start, entry->v1.last); 1304 1266 if (map) { 1305 1267 if (f) { 1306 1268 const struct vdpa_map_file *map_file; 1307 1269 1308 1270 map_file = (struct vdpa_map_file *)map->opaque; 1309 - entry->offset = map_file->offset; 1271 + entry->v1.offset = map_file->offset; 1310 1272 *f = get_file(map_file->file); 1311 1273 } 1312 - entry->start = map->start; 1313 - entry->last = map->last; 1314 - entry->perm = map->perm; 1274 + entry->v1.start = map->start; 1275 + entry->v1.last = map->last; 1276 + entry->v1.perm = map->perm; 1315 1277 if (capability) { 1316 1278 *capability = 0; 1317 1279 1318 - if (dev->domain->bounce_map && map->start == 0 && 1319 - map->last == dev->domain->bounce_size - 1) 1280 + if (dev->as[asid].domain->bounce_map && map->start == 0 && 1281 + map->last == dev->as[asid].domain->bounce_size - 1) 1320 1282 *capability |= VDUSE_IOVA_CAP_UMEM; 1321 1283 } 1322 1284 1323 1285 r = 0; 1324 1286 } 1325 - spin_unlock(&dev->domain->iotlb_lock); 1287 + spin_unlock(&dev->as[asid].domain->iotlb_lock); 1326 1288 1327 1289 out: 1328 1290 mutex_unlock(&dev->domain_lock); ··· 1343 1299 return -EPERM; 1344 1300 1345 1301 switch (cmd) { 1346 - case VDUSE_IOTLB_GET_FD: { 1347 - struct vduse_iotlb_entry entry; 1302 + case VDUSE_IOTLB_GET_FD: 1303 + case VDUSE_IOTLB_GET_FD2: { 1304 + struct vduse_iotlb_entry_v2 entry = {0}; 1348 1305 struct file *f = NULL; 1349 1306 1307 + ret = -ENOIOCTLCMD; 1308 + if (dev->api_version < VDUSE_API_VERSION_1 && 1309 + cmd == VDUSE_IOTLB_GET_FD2) 1310 + break; 1311 + 1350 1312 ret = -EFAULT; 1351 - if (copy_from_user(&entry, argp, sizeof(entry))) 1313 + if (cmd == VDUSE_IOTLB_GET_FD2) { 1314 + if (copy_from_user(&entry, argp, sizeof(entry))) 1315 + break; 1316 + } else { 1317 + if (copy_from_user(&entry.v1, argp, 1318 + sizeof(entry.v1))) 1319 + break; 1320 + } 1321 + 1322 + ret = -EINVAL; 1323 + if (!is_mem_zero((const char *)entry.reserved, 1324 + sizeof(entry.reserved))) 1352 1325 break; 1353 1326 1354 1327 ret = vduse_dev_iotlb_entry(dev, &entry, &f, NULL); ··· 1376 1315 if (!f) 1377 1316 break; 1378 1317 1379 - ret = -EFAULT; 1380 - if (copy_to_user(argp, &entry, sizeof(entry))) { 1318 + if (cmd == VDUSE_IOTLB_GET_FD2) 1319 + ret = copy_to_user(argp, &entry, 1320 + sizeof(entry)); 1321 + else 1322 + ret = copy_to_user(argp, &entry.v1, 1323 + sizeof(entry.v1)); 1324 + 1325 + if (ret) { 1326 + ret = -EFAULT; 1381 1327 fput(f); 1382 1328 break; 1383 1329 } 1384 - ret = receive_fd(f, NULL, perm_to_file_flags(entry.perm)); 1330 + ret = receive_fd(f, NULL, perm_to_file_flags(entry.v1.perm)); 1385 1331 fput(f); 1386 1332 break; 1387 1333 } ··· 1533 1465 } 1534 1466 case VDUSE_IOTLB_REG_UMEM: { 1535 1467 struct vduse_iova_umem umem; 1468 + u32 asid; 1536 1469 1537 1470 ret = -EFAULT; 1538 1471 if (copy_from_user(&umem, argp, sizeof(umem))) ··· 1541 1472 1542 1473 ret = -EINVAL; 1543 1474 if (!is_mem_zero((const char *)umem.reserved, 1544 - sizeof(umem.reserved))) 1475 + sizeof(umem.reserved)) || 1476 + (dev->api_version < VDUSE_API_VERSION_1 && 1477 + umem.asid != 0) || umem.asid >= dev->nas) 1545 1478 break; 1546 1479 1547 1480 mutex_lock(&dev->domain_lock); 1548 - ret = vduse_dev_reg_umem(dev, umem.iova, 1481 + asid = array_index_nospec(umem.asid, dev->nas); 1482 + ret = vduse_dev_reg_umem(dev, asid, umem.iova, 1549 1483 umem.uaddr, umem.size); 1550 1484 mutex_unlock(&dev->domain_lock); 1551 1485 break; 1552 1486 } 1553 1487 case VDUSE_IOTLB_DEREG_UMEM: { 1554 1488 struct vduse_iova_umem umem; 1489 + u32 asid; 1555 1490 1556 1491 ret = -EFAULT; 1557 1492 if (copy_from_user(&umem, argp, sizeof(umem))) ··· 1563 1490 1564 1491 ret = -EINVAL; 1565 1492 if (!is_mem_zero((const char *)umem.reserved, 1566 - sizeof(umem.reserved))) 1493 + sizeof(umem.reserved)) || 1494 + (dev->api_version < VDUSE_API_VERSION_1 && 1495 + umem.asid != 0) || 1496 + umem.asid >= dev->nas) 1567 1497 break; 1498 + 1568 1499 mutex_lock(&dev->domain_lock); 1569 - ret = vduse_dev_dereg_umem(dev, umem.iova, 1500 + asid = array_index_nospec(umem.asid, dev->nas); 1501 + ret = vduse_dev_dereg_umem(dev, asid, umem.iova, 1570 1502 umem.size); 1571 1503 mutex_unlock(&dev->domain_lock); 1572 1504 break; 1573 1505 } 1574 1506 case VDUSE_IOTLB_GET_INFO: { 1575 1507 struct vduse_iova_info info; 1576 - struct vduse_iotlb_entry entry; 1508 + struct vduse_iotlb_entry_v2 entry; 1577 1509 1578 1510 ret = -EFAULT; 1579 1511 if (copy_from_user(&info, argp, sizeof(info))) ··· 1588 1510 sizeof(info.reserved))) 1589 1511 break; 1590 1512 1591 - entry.start = info.start; 1592 - entry.last = info.last; 1513 + if (dev->api_version < VDUSE_API_VERSION_1) { 1514 + if (info.asid) 1515 + break; 1516 + } else if (info.asid >= dev->nas) 1517 + break; 1518 + 1519 + entry.v1.start = info.start; 1520 + entry.v1.last = info.last; 1521 + entry.asid = info.asid; 1593 1522 ret = vduse_dev_iotlb_entry(dev, &entry, NULL, 1594 1523 &info.capability); 1595 1524 if (ret < 0) 1596 1525 break; 1597 1526 1598 - info.start = entry.start; 1599 - info.last = entry.last; 1527 + info.start = entry.v1.start; 1528 + info.last = entry.v1.last; 1529 + info.asid = entry.asid; 1600 1530 1601 1531 ret = -EFAULT; 1602 1532 if (copy_to_user(argp, &info, sizeof(info))) ··· 1626 1540 struct vduse_dev *dev = file->private_data; 1627 1541 1628 1542 mutex_lock(&dev->domain_lock); 1629 - if (dev->domain) 1630 - vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size); 1543 + for (int i = 0; i < dev->nas; i++) 1544 + if (dev->as[i].domain) 1545 + vduse_dev_dereg_umem(dev, i, 0, 1546 + dev->as[i].domain->bounce_size); 1631 1547 mutex_unlock(&dev->domain_lock); 1632 1548 spin_lock(&dev->msg_lock); 1633 1549 /* Make sure the inflight messages can processed after reconncection */ ··· 1848 1760 return NULL; 1849 1761 1850 1762 mutex_init(&dev->lock); 1851 - mutex_init(&dev->mem_lock); 1852 1763 mutex_init(&dev->domain_lock); 1853 1764 spin_lock_init(&dev->msg_lock); 1854 1765 INIT_LIST_HEAD(&dev->send_list); ··· 1898 1811 idr_remove(&vduse_idr, dev->minor); 1899 1812 kvfree(dev->config); 1900 1813 vduse_dev_deinit_vqs(dev); 1901 - if (dev->domain) 1902 - vduse_domain_destroy(dev->domain); 1814 + for (int i = 0; i < dev->nas; i++) { 1815 + if (dev->as[i].domain) 1816 + vduse_domain_destroy(dev->as[i].domain); 1817 + } 1818 + kfree(dev->as); 1903 1819 kfree(dev->name); 1904 1820 kfree(dev->groups); 1905 1821 vduse_dev_destroy(dev); ··· 1949 1859 sizeof(config->reserved))) 1950 1860 return false; 1951 1861 1952 - if (api_version < VDUSE_API_VERSION_1 && config->ngroups) 1862 + if (api_version < VDUSE_API_VERSION_1 && 1863 + (config->ngroups || config->nas)) 1953 1864 return false; 1954 1865 1955 - if (api_version >= VDUSE_API_VERSION_1 && 1956 - (!config->ngroups || config->ngroups > VDUSE_DEV_MAX_GROUPS)) 1957 - return false; 1866 + if (api_version >= VDUSE_API_VERSION_1) { 1867 + if (!config->ngroups || config->ngroups > VDUSE_DEV_MAX_GROUPS) 1868 + return false; 1869 + 1870 + if (!config->nas || config->nas > VDUSE_DEV_MAX_AS) 1871 + return false; 1872 + } 1958 1873 1959 1874 if (config->vq_align > PAGE_SIZE) 1960 1875 return false; ··· 2024 1929 2025 1930 ret = -EPERM; 2026 1931 mutex_lock(&dev->domain_lock); 2027 - if (dev->domain) 1932 + /* Assuming that if the first domain is allocated, all are allocated */ 1933 + if (dev->as[0].domain) 2028 1934 goto unlock; 2029 1935 2030 1936 ret = kstrtouint(buf, 10, &bounce_size); ··· 2077 1981 dev->device_features = config->features; 2078 1982 dev->device_id = config->device_id; 2079 1983 dev->vendor_id = config->vendor_id; 1984 + 1985 + dev->nas = (dev->api_version < VDUSE_API_VERSION_1) ? 1 : config->nas; 1986 + dev->as = kcalloc(dev->nas, sizeof(dev->as[0]), GFP_KERNEL); 1987 + if (!dev->as) 1988 + goto err_as; 1989 + for (int i = 0; i < dev->nas; i++) 1990 + mutex_init(&dev->as[i].mem_lock); 1991 + 2080 1992 dev->ngroups = (dev->api_version < VDUSE_API_VERSION_1) 2081 1993 ? 1 2082 1994 : config->ngroups; ··· 2092 1988 GFP_KERNEL); 2093 1989 if (!dev->groups) 2094 1990 goto err_vq_groups; 2095 - for (u32 i = 0; i < dev->ngroups; ++i) 1991 + for (u32 i = 0; i < dev->ngroups; ++i) { 2096 1992 dev->groups[i].dev = dev; 1993 + rwlock_init(&dev->groups[i].as_lock); 1994 + dev->groups[i].as = &dev->as[0]; 1995 + } 2097 1996 2098 1997 dev->name = kstrdup(config->name, GFP_KERNEL); 2099 1998 if (!dev->name) ··· 2136 2029 err_str: 2137 2030 kfree(dev->groups); 2138 2031 err_vq_groups: 2032 + kfree(dev->as); 2033 + err_as: 2139 2034 vduse_dev_destroy(dev); 2140 2035 err: 2141 2036 return ret; ··· 2261 2152 2262 2153 vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev, 2263 2154 &vduse_vdpa_config_ops, &vduse_map_ops, 2264 - dev->ngroups, 1, name, true); 2155 + dev->ngroups, dev->nas, name, true); 2265 2156 if (IS_ERR(vdev)) 2266 2157 return PTR_ERR(vdev); 2267 2158 ··· 2276 2167 const struct vdpa_dev_set_config *config) 2277 2168 { 2278 2169 struct vduse_dev *dev; 2279 - int ret; 2170 + size_t domain_bounce_size; 2171 + int ret, i; 2280 2172 2281 2173 mutex_lock(&vduse_lock); 2282 2174 dev = vduse_find_dev(name); ··· 2291 2181 return ret; 2292 2182 2293 2183 mutex_lock(&dev->domain_lock); 2294 - if (!dev->domain) 2295 - dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1, 2296 - dev->bounce_size); 2297 - mutex_unlock(&dev->domain_lock); 2298 - if (!dev->domain) { 2299 - ret = -ENOMEM; 2300 - goto domain_err; 2184 + ret = 0; 2185 + 2186 + domain_bounce_size = dev->bounce_size / dev->nas; 2187 + for (i = 0; i < dev->nas; ++i) { 2188 + dev->as[i].domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1, 2189 + domain_bounce_size); 2190 + if (!dev->as[i].domain) { 2191 + ret = -ENOMEM; 2192 + goto err; 2193 + } 2301 2194 } 2302 2195 2196 + mutex_unlock(&dev->domain_lock); 2197 + 2303 2198 ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num); 2304 - if (ret) { 2305 - goto register_err; 2306 - } 2199 + if (ret) 2200 + goto err_register; 2307 2201 2308 2202 return 0; 2309 2203 2310 - register_err: 2204 + err_register: 2311 2205 mutex_lock(&dev->domain_lock); 2312 - vduse_domain_destroy(dev->domain); 2313 - dev->domain = NULL; 2206 + 2207 + err: 2208 + for (int j = 0; j < i; j++) { 2209 + if (dev->as[j].domain) { 2210 + vduse_domain_destroy(dev->as[j].domain); 2211 + dev->as[j].domain = NULL; 2212 + } 2213 + } 2314 2214 mutex_unlock(&dev->domain_lock); 2315 2215 2316 - domain_err: 2317 2216 put_device(&dev->vdev->vdpa.dev); 2318 2217 2319 2218 return ret;
+63 -3
include/uapi/linux/vduse.h
··· 32 32 * @vq_num: the number of virtqueues 33 33 * @vq_align: the allocation alignment of virtqueue's metadata 34 34 * @ngroups: number of vq groups that VDUSE device declares 35 + * @nas: number of address spaces that VDUSE device declares 35 36 * @reserved: for future use, needs to be initialized to zero 36 37 * @config_size: the size of the configuration space 37 38 * @config: the buffer of the configuration space ··· 48 47 __u32 vq_num; 49 48 __u32 vq_align; 50 49 __u32 ngroups; /* if VDUSE_API_VERSION >= 1 */ 51 - __u32 reserved[12]; 50 + __u32 nas; /* if VDUSE_API_VERSION >= 1 */ 51 + __u32 reserved[11]; 52 52 __u32 config_size; 53 53 __u8 config[]; 54 54 }; ··· 169 167 }; 170 168 171 169 /** 170 + * struct vduse_vq_group_asid - virtqueue group ASID 171 + * @group: Index of the virtqueue group 172 + * @asid: Address space ID of the group 173 + */ 174 + struct vduse_vq_group_asid { 175 + __u32 group; 176 + __u32 asid; 177 + }; 178 + 179 + /** 172 180 * struct vduse_vq_info - information of a virtqueue 173 181 * @index: virtqueue index 174 182 * @num: the size of virtqueue ··· 237 225 * @uaddr: start address of userspace memory, it must be aligned to page size 238 226 * @iova: start of the IOVA region 239 227 * @size: size of the IOVA region 228 + * @asid: Address space ID of the IOVA region 240 229 * @reserved: for future use, needs to be initialized to zero 241 230 * 242 231 * Structure used by VDUSE_IOTLB_REG_UMEM and VDUSE_IOTLB_DEREG_UMEM ··· 247 234 __u64 uaddr; 248 235 __u64 iova; 249 236 __u64 size; 250 - __u64 reserved[3]; 237 + __u32 asid; 238 + __u32 reserved[5]; 251 239 }; 252 240 253 241 /* Register userspace memory for IOVA regions */ ··· 262 248 * @start: start of the IOVA region 263 249 * @last: last of the IOVA region 264 250 * @capability: capability of the IOVA region 251 + * @asid: Address space ID of the IOVA region, only if device API version >= 1 265 252 * @reserved: for future use, needs to be initialized to zero 266 253 * 267 254 * Structure used by VDUSE_IOTLB_GET_INFO ioctl to get information of ··· 273 258 __u64 last; 274 259 #define VDUSE_IOVA_CAP_UMEM (1 << 0) 275 260 __u64 capability; 276 - __u64 reserved[3]; 261 + __u32 asid; /* Only if device API version >= 1 */ 262 + __u32 reserved[5]; 277 263 }; 278 264 279 265 /* ··· 282 266 * and return some information on it. Caller should set start and last fields. 283 267 */ 284 268 #define VDUSE_IOTLB_GET_INFO _IOWR(VDUSE_BASE, 0x1a, struct vduse_iova_info) 269 + 270 + /** 271 + * struct vduse_iotlb_entry_v2 - entry of IOTLB to describe one IOVA region 272 + * 273 + * @v1: the original vduse_iotlb_entry 274 + * @asid: address space ID of the IOVA region 275 + * @reserved: for future use, needs to be initialized to zero 276 + * 277 + * Structure used by VDUSE_IOTLB_GET_FD2 ioctl to find an overlapped IOVA region. 278 + */ 279 + struct vduse_iotlb_entry_v2 { 280 + struct vduse_iotlb_entry v1; 281 + __u32 asid; 282 + __u32 reserved[12]; 283 + }; 284 + 285 + /* 286 + * Same as VDUSE_IOTLB_GET_FD but with vduse_iotlb_entry_v2 argument that 287 + * support extra fields. 288 + */ 289 + #define VDUSE_IOTLB_GET_FD2 _IOWR(VDUSE_BASE, 0x1b, struct vduse_iotlb_entry_v2) 290 + 285 291 286 292 /* The control messages definition for read(2)/write(2) on /dev/vduse/$NAME */ 287 293 ··· 313 275 * @VDUSE_SET_STATUS: set the device status 314 276 * @VDUSE_UPDATE_IOTLB: Notify userspace to update the memory mapping for 315 277 * specified IOVA range via VDUSE_IOTLB_GET_FD ioctl 278 + * @VDUSE_SET_VQ_GROUP_ASID: Notify userspace to update the address space of a 279 + * virtqueue group. 316 280 */ 317 281 enum vduse_req_type { 318 282 VDUSE_GET_VQ_STATE, 319 283 VDUSE_SET_STATUS, 320 284 VDUSE_UPDATE_IOTLB, 285 + VDUSE_SET_VQ_GROUP_ASID, 321 286 }; 322 287 323 288 /** ··· 356 315 }; 357 316 358 317 /** 318 + * struct vduse_iova_range_v2 - IOVA range [start, last] if API_VERSION >= 1 319 + * @start: start of the IOVA range 320 + * @last: last of the IOVA range 321 + * @asid: address space ID of the IOVA range 322 + */ 323 + struct vduse_iova_range_v2 { 324 + __u64 start; 325 + __u64 last; 326 + __u32 asid; 327 + }; 328 + 329 + /** 359 330 * struct vduse_dev_request - control request 360 331 * @type: request type 361 332 * @request_id: request id ··· 375 322 * @vq_state: virtqueue state, only index field is available 376 323 * @s: device status 377 324 * @iova: IOVA range for updating 325 + * @iova_v2: IOVA range for updating if API_VERSION >= 1 326 + * @vq_group_asid: ASID of a virtqueue group 378 327 * @padding: padding 379 328 * 380 329 * Structure used by read(2) on /dev/vduse/$NAME. ··· 389 334 struct vduse_vq_state vq_state; 390 335 struct vduse_dev_status s; 391 336 struct vduse_iova_range iova; 337 + /* Following members but padding exist only if vduse api 338 + * version >= 1 339 + */ 340 + struct vduse_iova_range_v2 iova_v2; 341 + struct vduse_vq_group_asid vq_group_asid; 392 342 __u32 padding[32]; 393 343 }; 394 344 };