Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

vduse: Use fixed 4KB bounce pages for non-4KB page size

The allocation granularity of bounce pages is PAGE_SIZE. This may cause
even small IO requests to occupy an entire bounce page exclusively. The
kind of memory waste will be more significant when PAGE_SIZE is larger
than 4KB (e.g. arm64 with 64KB pages).

So, optimize it by using fixed 4KB bounce maps and iova allocation
granularity. A single IO request occupies at least a 4KB bounce page
instead of the entire memory page of PAGE_SIZE.

Signed-off-by: Sheng Zhao <sheng.zhao@bytedance.com>
Message-Id: <20250925113516.60305-1-sheng.zhao@bytedance.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

authored by

Sheng Zhao and committed by
Michael S. Tsirkin
3fc3068e 1c14b0e4

+94 -41
+89 -41
drivers/vdpa/vdpa_user/iova_domain.c
··· 103 103 static int vduse_domain_map_bounce_page(struct vduse_iova_domain *domain, 104 104 u64 iova, u64 size, u64 paddr) 105 105 { 106 - struct vduse_bounce_map *map; 106 + struct vduse_bounce_map *map, *head_map; 107 + struct page *tmp_page; 107 108 u64 last = iova + size - 1; 108 109 109 110 while (iova <= last) { 110 - map = &domain->bounce_maps[iova >> PAGE_SHIFT]; 111 + /* 112 + * When PAGE_SIZE is larger than 4KB, multiple adjacent bounce_maps will 113 + * point to the same memory page of PAGE_SIZE. Since bounce_maps originate 114 + * from IO requests, we may not be able to guarantee that the orig_phys 115 + * values of all IO requests within the same 64KB memory page are contiguous. 116 + * Therefore, we need to store them separately. 117 + * 118 + * Bounce pages are allocated on demand. As a result, it may occur that 119 + * multiple bounce pages corresponding to the same 64KB memory page attempt 120 + * to allocate memory simultaneously, so we use cmpxchg to handle this 121 + * concurrency. 122 + */ 123 + map = &domain->bounce_maps[iova >> BOUNCE_MAP_SHIFT]; 111 124 if (!map->bounce_page) { 112 - map->bounce_page = alloc_page(GFP_ATOMIC); 113 - if (!map->bounce_page) 114 - return -ENOMEM; 125 + head_map = &domain->bounce_maps[(iova & PAGE_MASK) >> BOUNCE_MAP_SHIFT]; 126 + if (!head_map->bounce_page) { 127 + tmp_page = alloc_page(GFP_ATOMIC); 128 + if (!tmp_page) 129 + return -ENOMEM; 130 + if (cmpxchg(&head_map->bounce_page, NULL, tmp_page)) 131 + __free_page(tmp_page); 132 + } 133 + map->bounce_page = head_map->bounce_page; 115 134 } 116 135 map->orig_phys = paddr; 117 - paddr += PAGE_SIZE; 118 - iova += PAGE_SIZE; 136 + paddr += BOUNCE_MAP_SIZE; 137 + iova += BOUNCE_MAP_SIZE; 119 138 } 120 139 return 0; 121 140 } ··· 146 127 u64 last = iova + size - 1; 147 128 148 129 while (iova <= last) { 149 - map = &domain->bounce_maps[iova >> PAGE_SHIFT]; 130 + map = &domain->bounce_maps[iova >> BOUNCE_MAP_SHIFT]; 150 131 map->orig_phys = INVALID_PHYS_ADDR; 151 - iova += PAGE_SIZE; 132 + iova += BOUNCE_MAP_SIZE; 152 133 } 134 + } 135 + 136 + static unsigned int offset_in_bounce_page(dma_addr_t addr) 137 + { 138 + return (addr & ~BOUNCE_MAP_MASK); 153 139 } 154 140 155 141 static void do_bounce(phys_addr_t orig, void *addr, size_t size, ··· 187 163 { 188 164 struct vduse_bounce_map *map; 189 165 struct page *page; 190 - unsigned int offset; 166 + unsigned int offset, head_offset; 191 167 void *addr; 192 168 size_t sz; 193 169 ··· 195 171 return; 196 172 197 173 while (size) { 198 - map = &domain->bounce_maps[iova >> PAGE_SHIFT]; 199 - offset = offset_in_page(iova); 200 - sz = min_t(size_t, PAGE_SIZE - offset, size); 174 + map = &domain->bounce_maps[iova >> BOUNCE_MAP_SHIFT]; 175 + head_offset = offset_in_page(iova); 176 + offset = offset_in_bounce_page(iova); 177 + sz = min_t(size_t, BOUNCE_MAP_SIZE - offset, size); 201 178 202 179 if (WARN_ON(!map->bounce_page || 203 180 map->orig_phys == INVALID_PHYS_ADDR)) ··· 208 183 map->user_bounce_page : map->bounce_page; 209 184 210 185 addr = kmap_local_page(page); 211 - do_bounce(map->orig_phys + offset, addr + offset, sz, dir); 186 + do_bounce(map->orig_phys + offset, addr + head_offset, sz, dir); 212 187 kunmap_local(addr); 213 188 size -= sz; 214 189 iova += sz; ··· 243 218 struct page *page = NULL; 244 219 245 220 read_lock(&domain->bounce_lock); 246 - map = &domain->bounce_maps[iova >> PAGE_SHIFT]; 221 + map = &domain->bounce_maps[iova >> BOUNCE_MAP_SHIFT]; 247 222 if (domain->user_bounce_pages || !map->bounce_page) 248 223 goto out; 249 224 ··· 261 236 struct vduse_bounce_map *map; 262 237 unsigned long pfn, bounce_pfns; 263 238 264 - bounce_pfns = domain->bounce_size >> PAGE_SHIFT; 239 + bounce_pfns = domain->bounce_size >> BOUNCE_MAP_SHIFT; 265 240 266 241 for (pfn = 0; pfn < bounce_pfns; pfn++) { 267 242 map = &domain->bounce_maps[pfn]; ··· 271 246 if (!map->bounce_page) 272 247 continue; 273 248 274 - __free_page(map->bounce_page); 249 + if (!((pfn << BOUNCE_MAP_SHIFT) & ~PAGE_MASK)) 250 + __free_page(map->bounce_page); 275 251 map->bounce_page = NULL; 276 252 } 277 253 } ··· 280 254 int vduse_domain_add_user_bounce_pages(struct vduse_iova_domain *domain, 281 255 struct page **pages, int count) 282 256 { 283 - struct vduse_bounce_map *map; 284 - int i, ret; 257 + struct vduse_bounce_map *map, *head_map; 258 + int i, j, ret; 259 + int inner_pages = PAGE_SIZE / BOUNCE_MAP_SIZE; 260 + int bounce_pfns = domain->bounce_size >> BOUNCE_MAP_SHIFT; 261 + struct page *head_page = NULL; 262 + bool need_copy; 285 263 286 264 /* Now we don't support partial mapping */ 287 265 if (count != (domain->bounce_size >> PAGE_SHIFT)) ··· 297 267 goto out; 298 268 299 269 for (i = 0; i < count; i++) { 300 - map = &domain->bounce_maps[i]; 301 - if (map->bounce_page) { 270 + need_copy = false; 271 + head_map = &domain->bounce_maps[(i * inner_pages)]; 272 + head_page = head_map->bounce_page; 273 + for (j = 0; j < inner_pages; j++) { 274 + if ((i * inner_pages + j) >= bounce_pfns) 275 + break; 276 + map = &domain->bounce_maps[(i * inner_pages + j)]; 302 277 /* Copy kernel page to user page if it's in use */ 303 - if (map->orig_phys != INVALID_PHYS_ADDR) 304 - memcpy_to_page(pages[i], 0, 305 - page_address(map->bounce_page), 306 - PAGE_SIZE); 278 + if ((head_page) && (map->orig_phys != INVALID_PHYS_ADDR)) 279 + need_copy = true; 280 + map->user_bounce_page = pages[i]; 307 281 } 308 - map->user_bounce_page = pages[i]; 309 282 get_page(pages[i]); 283 + if ((head_page) && (need_copy)) 284 + memcpy_to_page(pages[i], 0, 285 + page_address(head_page), 286 + PAGE_SIZE); 310 287 } 311 288 domain->user_bounce_pages = true; 312 289 ret = 0; ··· 325 288 326 289 void vduse_domain_remove_user_bounce_pages(struct vduse_iova_domain *domain) 327 290 { 328 - struct vduse_bounce_map *map; 329 - unsigned long i, count; 291 + struct vduse_bounce_map *map, *head_map; 292 + unsigned long i, j, count; 293 + int inner_pages = PAGE_SIZE / BOUNCE_MAP_SIZE; 294 + int bounce_pfns = domain->bounce_size >> BOUNCE_MAP_SHIFT; 295 + struct page *head_page = NULL; 296 + bool need_copy; 330 297 331 298 write_lock(&domain->bounce_lock); 332 299 if (!domain->user_bounce_pages) ··· 338 297 339 298 count = domain->bounce_size >> PAGE_SHIFT; 340 299 for (i = 0; i < count; i++) { 341 - struct page *page = NULL; 342 - 343 - map = &domain->bounce_maps[i]; 344 - if (WARN_ON(!map->user_bounce_page)) 300 + need_copy = false; 301 + head_map = &domain->bounce_maps[(i * inner_pages)]; 302 + if (WARN_ON(!head_map->user_bounce_page)) 345 303 continue; 304 + head_page = head_map->user_bounce_page; 346 305 347 - /* Copy user page to kernel page if it's in use */ 348 - if (map->orig_phys != INVALID_PHYS_ADDR) { 349 - page = map->bounce_page; 350 - memcpy_from_page(page_address(page), 351 - map->user_bounce_page, 0, PAGE_SIZE); 306 + for (j = 0; j < inner_pages; j++) { 307 + if ((i * inner_pages + j) >= bounce_pfns) 308 + break; 309 + map = &domain->bounce_maps[(i * inner_pages + j)]; 310 + if (WARN_ON(!map->user_bounce_page)) 311 + continue; 312 + /* Copy user page to kernel page if it's in use */ 313 + if ((map->orig_phys != INVALID_PHYS_ADDR) && (head_map->bounce_page)) 314 + need_copy = true; 315 + map->user_bounce_page = NULL; 352 316 } 353 - put_page(map->user_bounce_page); 354 - map->user_bounce_page = NULL; 317 + if (need_copy) 318 + memcpy_from_page(page_address(head_map->bounce_page), 319 + head_page, 0, PAGE_SIZE); 320 + put_page(head_page); 355 321 } 356 322 domain->user_bounce_pages = false; 357 323 out: ··· 629 581 unsigned long pfn, bounce_pfns; 630 582 int ret; 631 583 632 - bounce_pfns = PAGE_ALIGN(bounce_size) >> PAGE_SHIFT; 584 + bounce_pfns = PAGE_ALIGN(bounce_size) >> BOUNCE_MAP_SHIFT; 633 585 if (iova_limit <= bounce_size) 634 586 return NULL; 635 587 ··· 661 613 rwlock_init(&domain->bounce_lock); 662 614 spin_lock_init(&domain->iotlb_lock); 663 615 init_iova_domain(&domain->stream_iovad, 664 - PAGE_SIZE, IOVA_START_PFN); 616 + BOUNCE_MAP_SIZE, IOVA_START_PFN); 665 617 ret = iova_domain_init_rcaches(&domain->stream_iovad); 666 618 if (ret) 667 619 goto err_iovad_stream;
+5
drivers/vdpa/vdpa_user/iova_domain.h
··· 19 19 20 20 #define INVALID_PHYS_ADDR (~(phys_addr_t)0) 21 21 22 + #define BOUNCE_MAP_SHIFT 12 23 + #define BOUNCE_MAP_SIZE (1 << BOUNCE_MAP_SHIFT) 24 + #define BOUNCE_MAP_MASK (~(BOUNCE_MAP_SIZE - 1)) 25 + #define BOUNCE_MAP_ALIGN(addr) (((addr) + BOUNCE_MAP_SIZE - 1) & ~(BOUNCE_MAP_SIZE - 1)) 26 + 22 27 struct vduse_bounce_map { 23 28 struct page *bounce_page; 24 29 struct page *user_bounce_page;