Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: MIT
2/*
3 * Copyright © 2021 Intel Corporation
4 */
5
6#include <linux/bug.h>
7#include <linux/export.h>
8#include <linux/kmemleak.h>
9#include <linux/module.h>
10#include <linux/sizes.h>
11
12#include <linux/gpu_buddy.h>
13
14/**
15 * gpu_buddy_assert - assert a condition in the buddy allocator
16 * @condition: condition expected to be true
17 *
18 * When CONFIG_KUNIT is enabled, evaluates @condition and, if false, triggers
19 * a WARN_ON() and also calls kunit_fail_current_test() so that any running
20 * kunit test is properly marked as failed. The stringified condition is
21 * included in the failure message for easy identification.
22 *
23 * When CONFIG_KUNIT is not enabled, this reduces to WARN_ON() so production
24 * builds retain the same warning semantics as before.
25 */
26#if IS_ENABLED(CONFIG_KUNIT)
27#include <kunit/test-bug.h>
28#define gpu_buddy_assert(condition) do { \
29 if (WARN_ON(!(condition))) \
30 kunit_fail_current_test("gpu_buddy_assert(" #condition ")"); \
31} while (0)
32#else
33#define gpu_buddy_assert(condition) WARN_ON(!(condition))
34#endif
35
36static struct kmem_cache *slab_blocks;
37
38static unsigned int
39gpu_buddy_block_state(struct gpu_buddy_block *block)
40{
41 return block->header & GPU_BUDDY_HEADER_STATE;
42}
43
44static bool
45gpu_buddy_block_is_allocated(struct gpu_buddy_block *block)
46{
47 return gpu_buddy_block_state(block) == GPU_BUDDY_ALLOCATED;
48}
49
50static bool
51gpu_buddy_block_is_split(struct gpu_buddy_block *block)
52{
53 return gpu_buddy_block_state(block) == GPU_BUDDY_SPLIT;
54}
55
56static unsigned int gpu_buddy_block_offset_alignment(struct gpu_buddy_block *block)
57{
58 u64 offset = gpu_buddy_block_offset(block);
59
60 if (!offset)
61 /*
62 * __ffs64(0) is undefined; offset 0 is maximally aligned, so return
63 * a value greater than any possible alignment.
64 */
65 return 64 + 1;
66
67 return __ffs64(offset);
68}
69
70RB_DECLARE_CALLBACKS_MAX(static, gpu_buddy_augment_cb,
71 struct gpu_buddy_block, rb,
72 unsigned int, subtree_max_alignment,
73 gpu_buddy_block_offset_alignment);
74
75static struct gpu_buddy_block *gpu_block_alloc(struct gpu_buddy *mm,
76 struct gpu_buddy_block *parent,
77 unsigned int order,
78 u64 offset)
79{
80 struct gpu_buddy_block *block;
81
82 BUG_ON(order > GPU_BUDDY_MAX_ORDER);
83
84 block = kmem_cache_zalloc(slab_blocks, GFP_KERNEL);
85 if (!block)
86 return NULL;
87
88 block->header = offset;
89 block->header |= order;
90 block->parent = parent;
91
92 RB_CLEAR_NODE(&block->rb);
93
94 BUG_ON(block->header & GPU_BUDDY_HEADER_UNUSED);
95 return block;
96}
97
98static void gpu_block_free(struct gpu_buddy *mm,
99 struct gpu_buddy_block *block)
100{
101 kmem_cache_free(slab_blocks, block);
102}
103
104static enum gpu_buddy_free_tree
105get_block_tree(struct gpu_buddy_block *block)
106{
107 return gpu_buddy_block_is_clear(block) ?
108 GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
109}
110
111static struct gpu_buddy_block *
112rbtree_get_free_block(const struct rb_node *node)
113{
114 return node ? rb_entry(node, struct gpu_buddy_block, rb) : NULL;
115}
116
117static struct gpu_buddy_block *
118rbtree_last_free_block(struct rb_root *root)
119{
120 return rbtree_get_free_block(rb_last(root));
121}
122
123static bool rbtree_is_empty(struct rb_root *root)
124{
125 return RB_EMPTY_ROOT(root);
126}
127
128static void rbtree_insert(struct gpu_buddy *mm,
129 struct gpu_buddy_block *block,
130 enum gpu_buddy_free_tree tree)
131{
132 struct rb_node **link, *parent = NULL;
133 unsigned int block_alignment, order;
134 struct gpu_buddy_block *node;
135 struct rb_root *root;
136
137 order = gpu_buddy_block_order(block);
138 block_alignment = gpu_buddy_block_offset_alignment(block);
139
140 root = &mm->free_trees[tree][order];
141 link = &root->rb_node;
142
143 while (*link) {
144 parent = *link;
145 node = rbtree_get_free_block(parent);
146 /*
147 * Manual augmentation update during insertion traversal. Required
148 * because rb_insert_augmented() only calls rotate callback during
149 * rotations. This ensures all ancestors on the insertion path have
150 * correct subtree_max_alignment values.
151 */
152 if (node->subtree_max_alignment < block_alignment)
153 node->subtree_max_alignment = block_alignment;
154
155 if (gpu_buddy_block_offset(block) < gpu_buddy_block_offset(node))
156 link = &parent->rb_left;
157 else
158 link = &parent->rb_right;
159 }
160
161 block->subtree_max_alignment = block_alignment;
162 rb_link_node(&block->rb, parent, link);
163 rb_insert_augmented(&block->rb, root, &gpu_buddy_augment_cb);
164}
165
166static void rbtree_remove(struct gpu_buddy *mm,
167 struct gpu_buddy_block *block)
168{
169 unsigned int order = gpu_buddy_block_order(block);
170 enum gpu_buddy_free_tree tree;
171 struct rb_root *root;
172
173 tree = get_block_tree(block);
174 root = &mm->free_trees[tree][order];
175
176 rb_erase_augmented(&block->rb, root, &gpu_buddy_augment_cb);
177 RB_CLEAR_NODE(&block->rb);
178}
179
180static void clear_reset(struct gpu_buddy_block *block)
181{
182 block->header &= ~GPU_BUDDY_HEADER_CLEAR;
183}
184
185static void mark_cleared(struct gpu_buddy_block *block)
186{
187 block->header |= GPU_BUDDY_HEADER_CLEAR;
188}
189
190static void mark_allocated(struct gpu_buddy *mm,
191 struct gpu_buddy_block *block)
192{
193 block->header &= ~GPU_BUDDY_HEADER_STATE;
194 block->header |= GPU_BUDDY_ALLOCATED;
195
196 rbtree_remove(mm, block);
197}
198
199static void mark_free(struct gpu_buddy *mm,
200 struct gpu_buddy_block *block)
201{
202 enum gpu_buddy_free_tree tree;
203
204 block->header &= ~GPU_BUDDY_HEADER_STATE;
205 block->header |= GPU_BUDDY_FREE;
206
207 tree = get_block_tree(block);
208 rbtree_insert(mm, block, tree);
209}
210
211static void mark_split(struct gpu_buddy *mm,
212 struct gpu_buddy_block *block)
213{
214 block->header &= ~GPU_BUDDY_HEADER_STATE;
215 block->header |= GPU_BUDDY_SPLIT;
216
217 rbtree_remove(mm, block);
218}
219
220static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
221{
222 return s1 <= e2 && e1 >= s2;
223}
224
225static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
226{
227 return s1 <= s2 && e1 >= e2;
228}
229
230static struct gpu_buddy_block *
231__get_buddy(struct gpu_buddy_block *block)
232{
233 struct gpu_buddy_block *parent;
234
235 parent = block->parent;
236 if (!parent)
237 return NULL;
238
239 if (parent->left == block)
240 return parent->right;
241
242 return parent->left;
243}
244
245static unsigned int __gpu_buddy_free(struct gpu_buddy *mm,
246 struct gpu_buddy_block *block,
247 bool force_merge)
248{
249 struct gpu_buddy_block *parent;
250 unsigned int order;
251
252 while ((parent = block->parent)) {
253 struct gpu_buddy_block *buddy;
254
255 buddy = __get_buddy(block);
256
257 if (!gpu_buddy_block_is_free(buddy))
258 break;
259
260 if (!force_merge) {
261 /*
262 * Check the block and its buddy clear state and exit
263 * the loop if they both have the dissimilar state.
264 */
265 if (gpu_buddy_block_is_clear(block) !=
266 gpu_buddy_block_is_clear(buddy))
267 break;
268
269 if (gpu_buddy_block_is_clear(block))
270 mark_cleared(parent);
271 }
272
273 rbtree_remove(mm, buddy);
274 if (force_merge && gpu_buddy_block_is_clear(buddy))
275 mm->clear_avail -= gpu_buddy_block_size(mm, buddy);
276
277 gpu_block_free(mm, block);
278 gpu_block_free(mm, buddy);
279
280 block = parent;
281 }
282
283 order = gpu_buddy_block_order(block);
284 mark_free(mm, block);
285
286 return order;
287}
288
289static int __force_merge(struct gpu_buddy *mm,
290 u64 start,
291 u64 end,
292 unsigned int min_order)
293{
294 unsigned int tree, order;
295 int i;
296
297 if (!min_order)
298 return -ENOMEM;
299
300 if (min_order > mm->max_order)
301 return -EINVAL;
302
303 for_each_free_tree(tree) {
304 for (i = min_order - 1; i >= 0; i--) {
305 struct rb_node *iter = rb_last(&mm->free_trees[tree][i]);
306
307 while (iter) {
308 struct gpu_buddy_block *block, *buddy;
309 u64 block_start, block_end;
310
311 block = rbtree_get_free_block(iter);
312 iter = rb_prev(iter);
313
314 if (!block || !block->parent)
315 continue;
316
317 block_start = gpu_buddy_block_offset(block);
318 block_end = block_start + gpu_buddy_block_size(mm, block) - 1;
319
320 if (!contains(start, end, block_start, block_end))
321 continue;
322
323 buddy = __get_buddy(block);
324 if (!gpu_buddy_block_is_free(buddy))
325 continue;
326
327 gpu_buddy_assert(gpu_buddy_block_is_clear(block) !=
328 gpu_buddy_block_is_clear(buddy));
329
330 /*
331 * Advance to the next node when the current node is the buddy,
332 * as freeing the block will also remove its buddy from the tree.
333 */
334 if (iter == &buddy->rb)
335 iter = rb_prev(iter);
336
337 rbtree_remove(mm, block);
338 if (gpu_buddy_block_is_clear(block))
339 mm->clear_avail -= gpu_buddy_block_size(mm, block);
340
341 order = __gpu_buddy_free(mm, block, true);
342 if (order >= min_order)
343 return 0;
344 }
345 }
346 }
347
348 return -ENOMEM;
349}
350
351/**
352 * gpu_buddy_init - init memory manager
353 *
354 * @mm: GPU buddy manager to initialize
355 * @size: size in bytes to manage
356 * @chunk_size: minimum page size in bytes for our allocations
357 *
358 * Initializes the memory manager and its resources.
359 *
360 * Returns:
361 * 0 on success, error code on failure.
362 */
363int gpu_buddy_init(struct gpu_buddy *mm, u64 size, u64 chunk_size)
364{
365 unsigned int i, j, root_count = 0;
366 u64 offset = 0;
367
368 if (size < chunk_size)
369 return -EINVAL;
370
371 if (chunk_size < SZ_4K)
372 return -EINVAL;
373
374 if (!is_power_of_2(chunk_size))
375 return -EINVAL;
376
377 size = round_down(size, chunk_size);
378
379 mm->size = size;
380 mm->avail = size;
381 mm->clear_avail = 0;
382 mm->chunk_size = chunk_size;
383 mm->max_order = ilog2(size) - ilog2(chunk_size);
384
385 BUG_ON(mm->max_order > GPU_BUDDY_MAX_ORDER);
386
387 mm->free_trees = kmalloc_array(GPU_BUDDY_MAX_FREE_TREES,
388 sizeof(*mm->free_trees),
389 GFP_KERNEL);
390 if (!mm->free_trees)
391 return -ENOMEM;
392
393 for_each_free_tree(i) {
394 mm->free_trees[i] = kmalloc_array(mm->max_order + 1,
395 sizeof(struct rb_root),
396 GFP_KERNEL);
397 if (!mm->free_trees[i])
398 goto out_free_tree;
399
400 for (j = 0; j <= mm->max_order; ++j)
401 mm->free_trees[i][j] = RB_ROOT;
402 }
403
404 mm->n_roots = hweight64(size);
405
406 mm->roots = kmalloc_array(mm->n_roots,
407 sizeof(struct gpu_buddy_block *),
408 GFP_KERNEL);
409 if (!mm->roots)
410 goto out_free_tree;
411
412 /*
413 * Split into power-of-two blocks, in case we are given a size that is
414 * not itself a power-of-two.
415 */
416 do {
417 struct gpu_buddy_block *root;
418 unsigned int order;
419 u64 root_size;
420
421 order = ilog2(size) - ilog2(chunk_size);
422 root_size = chunk_size << order;
423
424 root = gpu_block_alloc(mm, NULL, order, offset);
425 if (!root)
426 goto out_free_roots;
427
428 mark_free(mm, root);
429
430 BUG_ON(root_count > mm->max_order);
431 BUG_ON(gpu_buddy_block_size(mm, root) < chunk_size);
432
433 mm->roots[root_count] = root;
434
435 offset += root_size;
436 size -= root_size;
437 root_count++;
438 } while (size);
439
440 return 0;
441
442out_free_roots:
443 while (root_count--)
444 gpu_block_free(mm, mm->roots[root_count]);
445 kfree(mm->roots);
446out_free_tree:
447 while (i--)
448 kfree(mm->free_trees[i]);
449 kfree(mm->free_trees);
450 return -ENOMEM;
451}
452EXPORT_SYMBOL(gpu_buddy_init);
453
454/**
455 * gpu_buddy_fini - tear down the memory manager
456 *
457 * @mm: GPU buddy manager to free
458 *
459 * Cleanup memory manager resources and the freetree
460 */
461void gpu_buddy_fini(struct gpu_buddy *mm)
462{
463 u64 root_size, size, start;
464 unsigned int order;
465 int i;
466
467 size = mm->size;
468
469 for (i = 0; i < mm->n_roots; ++i) {
470 order = ilog2(size) - ilog2(mm->chunk_size);
471 start = gpu_buddy_block_offset(mm->roots[i]);
472 __force_merge(mm, start, start + size, order);
473
474 gpu_buddy_assert(gpu_buddy_block_is_free(mm->roots[i]));
475
476 gpu_block_free(mm, mm->roots[i]);
477
478 root_size = mm->chunk_size << order;
479 size -= root_size;
480 }
481
482 gpu_buddy_assert(mm->avail == mm->size);
483
484 for_each_free_tree(i)
485 kfree(mm->free_trees[i]);
486 kfree(mm->free_trees);
487 kfree(mm->roots);
488}
489EXPORT_SYMBOL(gpu_buddy_fini);
490
491static int split_block(struct gpu_buddy *mm,
492 struct gpu_buddy_block *block)
493{
494 unsigned int block_order = gpu_buddy_block_order(block) - 1;
495 u64 offset = gpu_buddy_block_offset(block);
496
497 BUG_ON(!gpu_buddy_block_is_free(block));
498 BUG_ON(!gpu_buddy_block_order(block));
499
500 block->left = gpu_block_alloc(mm, block, block_order, offset);
501 if (!block->left)
502 return -ENOMEM;
503
504 block->right = gpu_block_alloc(mm, block, block_order,
505 offset + (mm->chunk_size << block_order));
506 if (!block->right) {
507 gpu_block_free(mm, block->left);
508 return -ENOMEM;
509 }
510
511 mark_split(mm, block);
512
513 if (gpu_buddy_block_is_clear(block)) {
514 mark_cleared(block->left);
515 mark_cleared(block->right);
516 clear_reset(block);
517 }
518
519 mark_free(mm, block->left);
520 mark_free(mm, block->right);
521
522 return 0;
523}
524
525/**
526 * gpu_buddy_reset_clear - reset blocks clear state
527 *
528 * @mm: GPU buddy manager
529 * @is_clear: blocks clear state
530 *
531 * Reset the clear state based on @is_clear value for each block
532 * in the freetree.
533 */
534void gpu_buddy_reset_clear(struct gpu_buddy *mm, bool is_clear)
535{
536 enum gpu_buddy_free_tree src_tree, dst_tree;
537 u64 root_size, size, start;
538 unsigned int order;
539 int i;
540
541 size = mm->size;
542 for (i = 0; i < mm->n_roots; ++i) {
543 order = ilog2(size) - ilog2(mm->chunk_size);
544 start = gpu_buddy_block_offset(mm->roots[i]);
545 __force_merge(mm, start, start + size, order);
546
547 root_size = mm->chunk_size << order;
548 size -= root_size;
549 }
550
551 src_tree = is_clear ? GPU_BUDDY_DIRTY_TREE : GPU_BUDDY_CLEAR_TREE;
552 dst_tree = is_clear ? GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
553
554 for (i = 0; i <= mm->max_order; ++i) {
555 struct rb_root *root = &mm->free_trees[src_tree][i];
556 struct gpu_buddy_block *block, *tmp;
557
558 rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) {
559 rbtree_remove(mm, block);
560 if (is_clear) {
561 mark_cleared(block);
562 mm->clear_avail += gpu_buddy_block_size(mm, block);
563 } else {
564 clear_reset(block);
565 mm->clear_avail -= gpu_buddy_block_size(mm, block);
566 }
567
568 rbtree_insert(mm, block, dst_tree);
569 }
570 }
571}
572EXPORT_SYMBOL(gpu_buddy_reset_clear);
573
574/**
575 * gpu_buddy_free_block - free a block
576 *
577 * @mm: GPU buddy manager
578 * @block: block to be freed
579 */
580void gpu_buddy_free_block(struct gpu_buddy *mm,
581 struct gpu_buddy_block *block)
582{
583 BUG_ON(!gpu_buddy_block_is_allocated(block));
584 mm->avail += gpu_buddy_block_size(mm, block);
585 if (gpu_buddy_block_is_clear(block))
586 mm->clear_avail += gpu_buddy_block_size(mm, block);
587
588 __gpu_buddy_free(mm, block, false);
589}
590EXPORT_SYMBOL(gpu_buddy_free_block);
591
592static void __gpu_buddy_free_list(struct gpu_buddy *mm,
593 struct list_head *objects,
594 bool mark_clear,
595 bool mark_dirty)
596{
597 struct gpu_buddy_block *block, *on;
598
599 gpu_buddy_assert(!(mark_dirty && mark_clear));
600
601 list_for_each_entry_safe(block, on, objects, link) {
602 if (mark_clear)
603 mark_cleared(block);
604 else if (mark_dirty)
605 clear_reset(block);
606 gpu_buddy_free_block(mm, block);
607 cond_resched();
608 }
609 INIT_LIST_HEAD(objects);
610}
611
612static void gpu_buddy_free_list_internal(struct gpu_buddy *mm,
613 struct list_head *objects)
614{
615 /*
616 * Don't touch the clear/dirty bit, since allocation is still internal
617 * at this point. For example we might have just failed part of the
618 * allocation.
619 */
620 __gpu_buddy_free_list(mm, objects, false, false);
621}
622
623/**
624 * gpu_buddy_free_list - free blocks
625 *
626 * @mm: GPU buddy manager
627 * @objects: input list head to free blocks
628 * @flags: optional flags like GPU_BUDDY_CLEARED
629 */
630void gpu_buddy_free_list(struct gpu_buddy *mm,
631 struct list_head *objects,
632 unsigned int flags)
633{
634 bool mark_clear = flags & GPU_BUDDY_CLEARED;
635
636 __gpu_buddy_free_list(mm, objects, mark_clear, !mark_clear);
637}
638EXPORT_SYMBOL(gpu_buddy_free_list);
639
640static bool block_incompatible(struct gpu_buddy_block *block, unsigned int flags)
641{
642 bool needs_clear = flags & GPU_BUDDY_CLEAR_ALLOCATION;
643
644 return needs_clear != gpu_buddy_block_is_clear(block);
645}
646
647static struct gpu_buddy_block *
648__alloc_range_bias(struct gpu_buddy *mm,
649 u64 start, u64 end,
650 unsigned int order,
651 unsigned long flags,
652 bool fallback)
653{
654 u64 req_size = mm->chunk_size << order;
655 struct gpu_buddy_block *block;
656 struct gpu_buddy_block *buddy;
657 LIST_HEAD(dfs);
658 int err;
659 int i;
660
661 end = end - 1;
662
663 for (i = 0; i < mm->n_roots; ++i)
664 list_add_tail(&mm->roots[i]->tmp_link, &dfs);
665
666 do {
667 u64 block_start;
668 u64 block_end;
669
670 block = list_first_entry_or_null(&dfs,
671 struct gpu_buddy_block,
672 tmp_link);
673 if (!block)
674 break;
675
676 list_del(&block->tmp_link);
677
678 if (gpu_buddy_block_order(block) < order)
679 continue;
680
681 block_start = gpu_buddy_block_offset(block);
682 block_end = block_start + gpu_buddy_block_size(mm, block) - 1;
683
684 if (!overlaps(start, end, block_start, block_end))
685 continue;
686
687 if (gpu_buddy_block_is_allocated(block))
688 continue;
689
690 if (block_start < start || block_end > end) {
691 u64 adjusted_start = max(block_start, start);
692 u64 adjusted_end = min(block_end, end);
693
694 if (round_down(adjusted_end + 1, req_size) <=
695 round_up(adjusted_start, req_size))
696 continue;
697 }
698
699 if (!fallback && block_incompatible(block, flags))
700 continue;
701
702 if (contains(start, end, block_start, block_end) &&
703 order == gpu_buddy_block_order(block)) {
704 /*
705 * Find the free block within the range.
706 */
707 if (gpu_buddy_block_is_free(block))
708 return block;
709
710 continue;
711 }
712
713 if (!gpu_buddy_block_is_split(block)) {
714 err = split_block(mm, block);
715 if (unlikely(err))
716 goto err_undo;
717 }
718
719 list_add(&block->right->tmp_link, &dfs);
720 list_add(&block->left->tmp_link, &dfs);
721 } while (1);
722
723 return ERR_PTR(-ENOSPC);
724
725err_undo:
726 /*
727 * We really don't want to leave around a bunch of split blocks, since
728 * bigger is better, so make sure we merge everything back before we
729 * free the allocated blocks.
730 */
731 buddy = __get_buddy(block);
732 if (buddy &&
733 (gpu_buddy_block_is_free(block) &&
734 gpu_buddy_block_is_free(buddy)))
735 __gpu_buddy_free(mm, block, false);
736 return ERR_PTR(err);
737}
738
739static struct gpu_buddy_block *
740__gpu_buddy_alloc_range_bias(struct gpu_buddy *mm,
741 u64 start, u64 end,
742 unsigned int order,
743 unsigned long flags)
744{
745 struct gpu_buddy_block *block;
746 bool fallback = false;
747
748 block = __alloc_range_bias(mm, start, end, order,
749 flags, fallback);
750 if (IS_ERR(block))
751 return __alloc_range_bias(mm, start, end, order,
752 flags, !fallback);
753
754 return block;
755}
756
757static struct gpu_buddy_block *
758get_maxblock(struct gpu_buddy *mm,
759 unsigned int order,
760 enum gpu_buddy_free_tree tree)
761{
762 struct gpu_buddy_block *max_block = NULL, *block = NULL;
763 struct rb_root *root;
764 unsigned int i;
765
766 for (i = order; i <= mm->max_order; ++i) {
767 root = &mm->free_trees[tree][i];
768 block = rbtree_last_free_block(root);
769 if (!block)
770 continue;
771
772 if (!max_block) {
773 max_block = block;
774 continue;
775 }
776
777 if (gpu_buddy_block_offset(block) >
778 gpu_buddy_block_offset(max_block)) {
779 max_block = block;
780 }
781 }
782
783 return max_block;
784}
785
786static struct gpu_buddy_block *
787alloc_from_freetree(struct gpu_buddy *mm,
788 unsigned int order,
789 unsigned long flags)
790{
791 struct gpu_buddy_block *block = NULL;
792 struct rb_root *root;
793 enum gpu_buddy_free_tree tree;
794 unsigned int tmp;
795 int err;
796
797 tree = (flags & GPU_BUDDY_CLEAR_ALLOCATION) ?
798 GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
799
800 if (flags & GPU_BUDDY_TOPDOWN_ALLOCATION) {
801 block = get_maxblock(mm, order, tree);
802 if (block)
803 /* Store the obtained block order */
804 tmp = gpu_buddy_block_order(block);
805 } else {
806 for (tmp = order; tmp <= mm->max_order; ++tmp) {
807 /* Get RB tree root for this order and tree */
808 root = &mm->free_trees[tree][tmp];
809 block = rbtree_last_free_block(root);
810 if (block)
811 break;
812 }
813 }
814
815 if (!block) {
816 /* Try allocating from the other tree */
817 tree = (tree == GPU_BUDDY_CLEAR_TREE) ?
818 GPU_BUDDY_DIRTY_TREE : GPU_BUDDY_CLEAR_TREE;
819
820 for (tmp = order; tmp <= mm->max_order; ++tmp) {
821 root = &mm->free_trees[tree][tmp];
822 block = rbtree_last_free_block(root);
823 if (block)
824 break;
825 }
826
827 if (!block)
828 return ERR_PTR(-ENOSPC);
829 }
830
831 BUG_ON(!gpu_buddy_block_is_free(block));
832
833 while (tmp != order) {
834 err = split_block(mm, block);
835 if (unlikely(err))
836 goto err_undo;
837
838 block = block->right;
839 tmp--;
840 }
841 return block;
842
843err_undo:
844 if (tmp != order)
845 __gpu_buddy_free(mm, block, false);
846 return ERR_PTR(err);
847}
848
849static bool
850gpu_buddy_can_offset_align(u64 size, u64 min_block_size)
851{
852 return size < min_block_size && is_power_of_2(size);
853}
854
855static bool gpu_buddy_subtree_can_satisfy(struct rb_node *node,
856 unsigned int alignment)
857{
858 struct gpu_buddy_block *block;
859
860 block = rbtree_get_free_block(node);
861 return block->subtree_max_alignment >= alignment;
862}
863
864static struct gpu_buddy_block *
865gpu_buddy_find_block_aligned(struct gpu_buddy *mm,
866 enum gpu_buddy_free_tree tree,
867 unsigned int order,
868 unsigned int alignment,
869 unsigned long flags)
870{
871 struct rb_root *root = &mm->free_trees[tree][order];
872 struct rb_node *rb = root->rb_node;
873
874 while (rb) {
875 struct gpu_buddy_block *block = rbtree_get_free_block(rb);
876 struct rb_node *left_node = rb->rb_left, *right_node = rb->rb_right;
877
878 if (right_node) {
879 if (gpu_buddy_subtree_can_satisfy(right_node, alignment)) {
880 rb = right_node;
881 continue;
882 }
883 }
884
885 if (gpu_buddy_block_offset_alignment(block) >= alignment)
886 return block;
887
888 if (left_node) {
889 if (gpu_buddy_subtree_can_satisfy(left_node, alignment)) {
890 rb = left_node;
891 continue;
892 }
893 }
894
895 break;
896 }
897
898 return NULL;
899}
900
901static struct gpu_buddy_block *
902gpu_buddy_offset_aligned_allocation(struct gpu_buddy *mm,
903 u64 size,
904 u64 min_block_size,
905 unsigned long flags)
906{
907 struct gpu_buddy_block *block = NULL;
908 unsigned int order, tmp, alignment;
909 struct gpu_buddy_block *buddy;
910 enum gpu_buddy_free_tree tree;
911 unsigned long pages;
912 int err;
913
914 alignment = ilog2(min_block_size);
915 pages = size >> ilog2(mm->chunk_size);
916 order = fls(pages) - 1;
917
918 tree = (flags & GPU_BUDDY_CLEAR_ALLOCATION) ?
919 GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
920
921 for (tmp = order; tmp <= mm->max_order; ++tmp) {
922 block = gpu_buddy_find_block_aligned(mm, tree, tmp,
923 alignment, flags);
924 if (!block) {
925 tree = (tree == GPU_BUDDY_CLEAR_TREE) ?
926 GPU_BUDDY_DIRTY_TREE : GPU_BUDDY_CLEAR_TREE;
927 block = gpu_buddy_find_block_aligned(mm, tree, tmp,
928 alignment, flags);
929 }
930
931 if (block)
932 break;
933 }
934
935 if (!block)
936 return ERR_PTR(-ENOSPC);
937
938 while (gpu_buddy_block_order(block) > order) {
939 struct gpu_buddy_block *left, *right;
940
941 err = split_block(mm, block);
942 if (unlikely(err))
943 goto err_undo;
944
945 left = block->left;
946 right = block->right;
947
948 if (gpu_buddy_block_offset_alignment(right) >= alignment)
949 block = right;
950 else
951 block = left;
952 }
953
954 return block;
955
956err_undo:
957 /*
958 * We really don't want to leave around a bunch of split blocks, since
959 * bigger is better, so make sure we merge everything back before we
960 * free the allocated blocks.
961 */
962 buddy = __get_buddy(block);
963 if (buddy &&
964 (gpu_buddy_block_is_free(block) &&
965 gpu_buddy_block_is_free(buddy)))
966 __gpu_buddy_free(mm, block, false);
967 return ERR_PTR(err);
968}
969
970static int __alloc_range(struct gpu_buddy *mm,
971 struct list_head *dfs,
972 u64 start, u64 size,
973 struct list_head *blocks,
974 u64 *total_allocated_on_err)
975{
976 struct gpu_buddy_block *block;
977 struct gpu_buddy_block *buddy;
978 u64 total_allocated = 0;
979 LIST_HEAD(allocated);
980 u64 end;
981 int err;
982
983 end = start + size - 1;
984
985 do {
986 u64 block_start;
987 u64 block_end;
988
989 block = list_first_entry_or_null(dfs,
990 struct gpu_buddy_block,
991 tmp_link);
992 if (!block)
993 break;
994
995 list_del(&block->tmp_link);
996
997 block_start = gpu_buddy_block_offset(block);
998 block_end = block_start + gpu_buddy_block_size(mm, block) - 1;
999
1000 if (!overlaps(start, end, block_start, block_end))
1001 continue;
1002
1003 if (gpu_buddy_block_is_allocated(block)) {
1004 err = -ENOSPC;
1005 goto err_free;
1006 }
1007
1008 if (contains(start, end, block_start, block_end)) {
1009 if (gpu_buddy_block_is_free(block)) {
1010 mark_allocated(mm, block);
1011 total_allocated += gpu_buddy_block_size(mm, block);
1012 mm->avail -= gpu_buddy_block_size(mm, block);
1013 if (gpu_buddy_block_is_clear(block))
1014 mm->clear_avail -= gpu_buddy_block_size(mm, block);
1015 list_add_tail(&block->link, &allocated);
1016 continue;
1017 } else if (!mm->clear_avail) {
1018 err = -ENOSPC;
1019 goto err_free;
1020 }
1021 }
1022
1023 if (!gpu_buddy_block_is_split(block)) {
1024 err = split_block(mm, block);
1025 if (unlikely(err))
1026 goto err_undo;
1027 }
1028
1029 list_add(&block->right->tmp_link, dfs);
1030 list_add(&block->left->tmp_link, dfs);
1031 } while (1);
1032
1033 if (total_allocated < size) {
1034 err = -ENOSPC;
1035 goto err_free;
1036 }
1037
1038 list_splice_tail(&allocated, blocks);
1039
1040 return 0;
1041
1042err_undo:
1043 /*
1044 * We really don't want to leave around a bunch of split blocks, since
1045 * bigger is better, so make sure we merge everything back before we
1046 * free the allocated blocks.
1047 */
1048 buddy = __get_buddy(block);
1049 if (buddy &&
1050 (gpu_buddy_block_is_free(block) &&
1051 gpu_buddy_block_is_free(buddy)))
1052 __gpu_buddy_free(mm, block, false);
1053
1054err_free:
1055 if (err == -ENOSPC && total_allocated_on_err) {
1056 list_splice_tail(&allocated, blocks);
1057 *total_allocated_on_err = total_allocated;
1058 } else {
1059 gpu_buddy_free_list_internal(mm, &allocated);
1060 }
1061
1062 return err;
1063}
1064
1065static int __gpu_buddy_alloc_range(struct gpu_buddy *mm,
1066 u64 start,
1067 u64 size,
1068 u64 *total_allocated_on_err,
1069 struct list_head *blocks)
1070{
1071 LIST_HEAD(dfs);
1072 int i;
1073
1074 for (i = 0; i < mm->n_roots; ++i)
1075 list_add_tail(&mm->roots[i]->tmp_link, &dfs);
1076
1077 return __alloc_range(mm, &dfs, start, size,
1078 blocks, total_allocated_on_err);
1079}
1080
1081static int __alloc_contig_try_harder(struct gpu_buddy *mm,
1082 u64 size,
1083 u64 min_block_size,
1084 struct list_head *blocks)
1085{
1086 u64 rhs_offset, lhs_offset, lhs_size, filled;
1087 struct gpu_buddy_block *block;
1088 unsigned int tree, order;
1089 LIST_HEAD(blocks_lhs);
1090 unsigned long pages;
1091 u64 modify_size;
1092 int err;
1093
1094 modify_size = rounddown_pow_of_two(size);
1095 pages = modify_size >> ilog2(mm->chunk_size);
1096 order = fls(pages) - 1;
1097 if (order == 0)
1098 return -ENOSPC;
1099
1100 for_each_free_tree(tree) {
1101 struct rb_root *root;
1102 struct rb_node *iter;
1103
1104 root = &mm->free_trees[tree][order];
1105 if (rbtree_is_empty(root))
1106 continue;
1107
1108 iter = rb_last(root);
1109 while (iter) {
1110 block = rbtree_get_free_block(iter);
1111
1112 /* Allocate blocks traversing RHS */
1113 rhs_offset = gpu_buddy_block_offset(block);
1114 err = __gpu_buddy_alloc_range(mm, rhs_offset, size,
1115 &filled, blocks);
1116 if (!err || err != -ENOSPC)
1117 return err;
1118
1119 lhs_size = max((size - filled), min_block_size);
1120 if (!IS_ALIGNED(lhs_size, min_block_size))
1121 lhs_size = round_up(lhs_size, min_block_size);
1122
1123 /* Allocate blocks traversing LHS */
1124 lhs_offset = gpu_buddy_block_offset(block) - lhs_size;
1125 err = __gpu_buddy_alloc_range(mm, lhs_offset, lhs_size,
1126 NULL, &blocks_lhs);
1127 if (!err) {
1128 list_splice(&blocks_lhs, blocks);
1129 return 0;
1130 } else if (err != -ENOSPC) {
1131 gpu_buddy_free_list_internal(mm, blocks);
1132 return err;
1133 }
1134 /* Free blocks for the next iteration */
1135 gpu_buddy_free_list_internal(mm, blocks);
1136
1137 iter = rb_prev(iter);
1138 }
1139 }
1140
1141 return -ENOSPC;
1142}
1143
1144/**
1145 * gpu_buddy_block_trim - free unused pages
1146 *
1147 * @mm: GPU buddy manager
1148 * @start: start address to begin the trimming.
1149 * @new_size: original size requested
1150 * @blocks: Input and output list of allocated blocks.
1151 * MUST contain single block as input to be trimmed.
1152 * On success will contain the newly allocated blocks
1153 * making up the @new_size. Blocks always appear in
1154 * ascending order
1155 *
1156 * For contiguous allocation, we round up the size to the nearest
1157 * power of two value, drivers consume *actual* size, so remaining
1158 * portions are unused and can be optionally freed with this function
1159 *
1160 * Returns:
1161 * 0 on success, error code on failure.
1162 */
1163int gpu_buddy_block_trim(struct gpu_buddy *mm,
1164 u64 *start,
1165 u64 new_size,
1166 struct list_head *blocks)
1167{
1168 struct gpu_buddy_block *parent;
1169 struct gpu_buddy_block *block;
1170 u64 block_start, block_end;
1171 LIST_HEAD(dfs);
1172 u64 new_start;
1173 int err;
1174
1175 if (!list_is_singular(blocks))
1176 return -EINVAL;
1177
1178 block = list_first_entry(blocks,
1179 struct gpu_buddy_block,
1180 link);
1181
1182 block_start = gpu_buddy_block_offset(block);
1183 block_end = block_start + gpu_buddy_block_size(mm, block);
1184
1185 if (WARN_ON(!gpu_buddy_block_is_allocated(block)))
1186 return -EINVAL;
1187
1188 if (new_size > gpu_buddy_block_size(mm, block))
1189 return -EINVAL;
1190
1191 if (!new_size || !IS_ALIGNED(new_size, mm->chunk_size))
1192 return -EINVAL;
1193
1194 if (new_size == gpu_buddy_block_size(mm, block))
1195 return 0;
1196
1197 new_start = block_start;
1198 if (start) {
1199 new_start = *start;
1200
1201 if (new_start < block_start)
1202 return -EINVAL;
1203
1204 if (!IS_ALIGNED(new_start, mm->chunk_size))
1205 return -EINVAL;
1206
1207 if (range_overflows(new_start, new_size, block_end))
1208 return -EINVAL;
1209 }
1210
1211 list_del(&block->link);
1212 mark_free(mm, block);
1213 mm->avail += gpu_buddy_block_size(mm, block);
1214 if (gpu_buddy_block_is_clear(block))
1215 mm->clear_avail += gpu_buddy_block_size(mm, block);
1216
1217 /* Prevent recursively freeing this node */
1218 parent = block->parent;
1219 block->parent = NULL;
1220
1221 list_add(&block->tmp_link, &dfs);
1222 err = __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL);
1223 if (err) {
1224 mark_allocated(mm, block);
1225 mm->avail -= gpu_buddy_block_size(mm, block);
1226 if (gpu_buddy_block_is_clear(block))
1227 mm->clear_avail -= gpu_buddy_block_size(mm, block);
1228 list_add(&block->link, blocks);
1229 }
1230
1231 block->parent = parent;
1232 return err;
1233}
1234EXPORT_SYMBOL(gpu_buddy_block_trim);
1235
1236static struct gpu_buddy_block *
1237__gpu_buddy_alloc_blocks(struct gpu_buddy *mm,
1238 u64 start, u64 end,
1239 u64 size, u64 min_block_size,
1240 unsigned int order,
1241 unsigned long flags)
1242{
1243 if (flags & GPU_BUDDY_RANGE_ALLOCATION)
1244 /* Allocate traversing within the range */
1245 return __gpu_buddy_alloc_range_bias(mm, start, end,
1246 order, flags);
1247 else if (size < min_block_size)
1248 /* Allocate from an offset-aligned region without size rounding */
1249 return gpu_buddy_offset_aligned_allocation(mm, size,
1250 min_block_size,
1251 flags);
1252 else
1253 /* Allocate from freetree */
1254 return alloc_from_freetree(mm, order, flags);
1255}
1256
1257/**
1258 * gpu_buddy_alloc_blocks - allocate power-of-two blocks
1259 *
1260 * @mm: GPU buddy manager to allocate from
1261 * @start: start of the allowed range for this block
1262 * @end: end of the allowed range for this block
1263 * @size: size of the allocation in bytes
1264 * @min_block_size: alignment of the allocation
1265 * @blocks: output list head to add allocated blocks
1266 * @flags: GPU_BUDDY_*_ALLOCATION flags
1267 *
1268 * alloc_range_bias() called on range limitations, which traverses
1269 * the tree and returns the desired block.
1270 *
1271 * alloc_from_freetree() called when *no* range restrictions
1272 * are enforced, which picks the block from the freetree.
1273 *
1274 * Returns:
1275 * 0 on success, error code on failure.
1276 */
1277int gpu_buddy_alloc_blocks(struct gpu_buddy *mm,
1278 u64 start, u64 end, u64 size,
1279 u64 min_block_size,
1280 struct list_head *blocks,
1281 unsigned long flags)
1282{
1283 struct gpu_buddy_block *block = NULL;
1284 u64 original_size, original_min_size;
1285 unsigned int min_order, order;
1286 LIST_HEAD(allocated);
1287 unsigned long pages;
1288 int err;
1289
1290 if (size < mm->chunk_size)
1291 return -EINVAL;
1292
1293 if (min_block_size < mm->chunk_size)
1294 return -EINVAL;
1295
1296 if (!is_power_of_2(min_block_size))
1297 return -EINVAL;
1298
1299 if (!IS_ALIGNED(start | end | size, mm->chunk_size))
1300 return -EINVAL;
1301
1302 if (end > mm->size)
1303 return -EINVAL;
1304
1305 if (range_overflows(start, size, mm->size))
1306 return -EINVAL;
1307
1308 /* Actual range allocation */
1309 if (start + size == end) {
1310 if (!IS_ALIGNED(start | end, min_block_size))
1311 return -EINVAL;
1312
1313 return __gpu_buddy_alloc_range(mm, start, size, NULL, blocks);
1314 }
1315
1316 original_size = size;
1317 original_min_size = min_block_size;
1318
1319 /* Roundup the size to power of 2 */
1320 if (flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION) {
1321 size = roundup_pow_of_two(size);
1322 min_block_size = size;
1323 /*
1324 * Normalize the requested size to min_block_size for regular allocations.
1325 * Offset-aligned allocations intentionally skip size rounding.
1326 */
1327 } else if (!gpu_buddy_can_offset_align(size, min_block_size)) {
1328 size = round_up(size, min_block_size);
1329 }
1330
1331 pages = size >> ilog2(mm->chunk_size);
1332 order = fls(pages) - 1;
1333 min_order = ilog2(min_block_size) - ilog2(mm->chunk_size);
1334
1335 if (order > mm->max_order || size > mm->size) {
1336 if ((flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION) &&
1337 !(flags & GPU_BUDDY_RANGE_ALLOCATION))
1338 return __alloc_contig_try_harder(mm, original_size,
1339 original_min_size, blocks);
1340
1341 return -EINVAL;
1342 }
1343
1344 do {
1345 order = min(order, (unsigned int)fls(pages) - 1);
1346 BUG_ON(order > mm->max_order);
1347 /*
1348 * Regular allocations must not allocate blocks smaller than min_block_size.
1349 * Offset-aligned allocations deliberately bypass this constraint.
1350 */
1351 BUG_ON(size >= min_block_size && order < min_order);
1352
1353 do {
1354 unsigned int fallback_order;
1355
1356 block = __gpu_buddy_alloc_blocks(mm, start,
1357 end,
1358 size,
1359 min_block_size,
1360 order,
1361 flags);
1362 if (!IS_ERR(block))
1363 break;
1364
1365 if (size < min_block_size) {
1366 fallback_order = order;
1367 } else if (order == min_order) {
1368 fallback_order = min_order;
1369 } else {
1370 order--;
1371 continue;
1372 }
1373
1374 /* Try allocation through force merge method */
1375 if (mm->clear_avail &&
1376 !__force_merge(mm, start, end, fallback_order)) {
1377 block = __gpu_buddy_alloc_blocks(mm, start,
1378 end,
1379 size,
1380 min_block_size,
1381 fallback_order,
1382 flags);
1383 if (!IS_ERR(block)) {
1384 order = fallback_order;
1385 break;
1386 }
1387 }
1388
1389 /*
1390 * Try contiguous block allocation through
1391 * try harder method.
1392 */
1393 if (flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION &&
1394 !(flags & GPU_BUDDY_RANGE_ALLOCATION))
1395 return __alloc_contig_try_harder(mm,
1396 original_size,
1397 original_min_size,
1398 blocks);
1399 err = -ENOSPC;
1400 goto err_free;
1401 } while (1);
1402
1403 mark_allocated(mm, block);
1404 mm->avail -= gpu_buddy_block_size(mm, block);
1405 if (gpu_buddy_block_is_clear(block))
1406 mm->clear_avail -= gpu_buddy_block_size(mm, block);
1407 kmemleak_update_trace(block);
1408 list_add_tail(&block->link, &allocated);
1409
1410 pages -= BIT(order);
1411
1412 if (!pages)
1413 break;
1414 } while (1);
1415
1416 /* Trim the allocated block to the required size */
1417 if (!(flags & GPU_BUDDY_TRIM_DISABLE) &&
1418 original_size != size) {
1419 struct list_head *trim_list;
1420 LIST_HEAD(temp);
1421 u64 trim_size;
1422
1423 trim_list = &allocated;
1424 trim_size = original_size;
1425
1426 if (!list_is_singular(&allocated)) {
1427 block = list_last_entry(&allocated, typeof(*block), link);
1428 list_move(&block->link, &temp);
1429 trim_list = &temp;
1430 trim_size = gpu_buddy_block_size(mm, block) -
1431 (size - original_size);
1432 }
1433
1434 gpu_buddy_block_trim(mm,
1435 NULL,
1436 trim_size,
1437 trim_list);
1438
1439 if (!list_empty(&temp))
1440 list_splice_tail(trim_list, &allocated);
1441 }
1442
1443 list_splice_tail(&allocated, blocks);
1444 return 0;
1445
1446err_free:
1447 gpu_buddy_free_list_internal(mm, &allocated);
1448 return err;
1449}
1450EXPORT_SYMBOL(gpu_buddy_alloc_blocks);
1451
1452/**
1453 * gpu_buddy_block_print - print block information
1454 *
1455 * @mm: GPU buddy manager
1456 * @block: GPU buddy block
1457 */
1458void gpu_buddy_block_print(struct gpu_buddy *mm,
1459 struct gpu_buddy_block *block)
1460{
1461 u64 start = gpu_buddy_block_offset(block);
1462 u64 size = gpu_buddy_block_size(mm, block);
1463
1464 pr_info("%#018llx-%#018llx: %llu\n", start, start + size, size);
1465}
1466EXPORT_SYMBOL(gpu_buddy_block_print);
1467
1468/**
1469 * gpu_buddy_print - print allocator state
1470 *
1471 * @mm: GPU buddy manager
1472 * @p: GPU printer to use
1473 */
1474void gpu_buddy_print(struct gpu_buddy *mm)
1475{
1476 int order;
1477
1478 pr_info("chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB, clear_free: %lluMiB\n",
1479 mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20);
1480
1481 for (order = mm->max_order; order >= 0; order--) {
1482 struct gpu_buddy_block *block, *tmp;
1483 struct rb_root *root;
1484 u64 count = 0, free;
1485 unsigned int tree;
1486
1487 for_each_free_tree(tree) {
1488 root = &mm->free_trees[tree][order];
1489
1490 rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) {
1491 BUG_ON(!gpu_buddy_block_is_free(block));
1492 count++;
1493 }
1494 }
1495
1496 free = count * (mm->chunk_size << order);
1497 if (free < SZ_1M)
1498 pr_info("order-%2d free: %8llu KiB, blocks: %llu\n",
1499 order, free >> 10, count);
1500 else
1501 pr_info("order-%2d free: %8llu MiB, blocks: %llu\n",
1502 order, free >> 20, count);
1503 }
1504}
1505EXPORT_SYMBOL(gpu_buddy_print);
1506
1507static void gpu_buddy_module_exit(void)
1508{
1509 kmem_cache_destroy(slab_blocks);
1510}
1511
1512static int __init gpu_buddy_module_init(void)
1513{
1514 slab_blocks = KMEM_CACHE(gpu_buddy_block, 0);
1515 if (!slab_blocks)
1516 return -ENOMEM;
1517
1518 return 0;
1519}
1520
1521module_init(gpu_buddy_module_init);
1522module_exit(gpu_buddy_module_exit);
1523
1524MODULE_DESCRIPTION("GPU Buddy Allocator");
1525MODULE_LICENSE("Dual MIT/GPL");