Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/* SPDX-License-Identifier: MIT */
2/*
3 * Copyright © 2021 Intel Corporation
4 */
5
6#ifndef __GPU_BUDDY_H__
7#define __GPU_BUDDY_H__
8
9#include <linux/bitops.h>
10#include <linux/list.h>
11#include <linux/slab.h>
12#include <linux/sched.h>
13#include <linux/rbtree.h>
14#include <linux/rbtree_augmented.h>
15
16/**
17 * GPU_BUDDY_RANGE_ALLOCATION - Allocate within a specific address range
18 *
19 * When set, allocation is restricted to the range [start, end) specified
20 * in gpu_buddy_alloc_blocks(). Without this flag, start/end are ignored
21 * and allocation can use any free space.
22 */
23#define GPU_BUDDY_RANGE_ALLOCATION BIT(0)
24
25/**
26 * GPU_BUDDY_TOPDOWN_ALLOCATION - Allocate from top of address space
27 *
28 * Allocate starting from high addresses and working down. Useful for
29 * separating different allocation types (e.g., kernel vs userspace)
30 * to reduce fragmentation.
31 */
32#define GPU_BUDDY_TOPDOWN_ALLOCATION BIT(1)
33
34/**
35 * GPU_BUDDY_CONTIGUOUS_ALLOCATION - Require physically contiguous blocks
36 *
37 * The allocation must be satisfied with a single contiguous block.
38 * If the requested size cannot be allocated contiguously, the
39 * allocation fails with -ENOSPC.
40 */
41#define GPU_BUDDY_CONTIGUOUS_ALLOCATION BIT(2)
42
43/**
44 * GPU_BUDDY_CLEAR_ALLOCATION - Prefer pre-cleared (zeroed) memory
45 *
46 * Attempt to allocate from the clear tree first. If insufficient clear
47 * memory is available, falls back to dirty memory. Useful when the
48 * caller needs zeroed memory and wants to avoid GPU clear operations.
49 */
50#define GPU_BUDDY_CLEAR_ALLOCATION BIT(3)
51
52/**
53 * GPU_BUDDY_CLEARED - Mark returned blocks as cleared
54 *
55 * Used with gpu_buddy_free_list() to indicate that the memory being
56 * freed has been cleared (zeroed). The blocks will be placed in the
57 * clear tree for future GPU_BUDDY_CLEAR_ALLOCATION requests.
58 */
59#define GPU_BUDDY_CLEARED BIT(4)
60
61/**
62 * GPU_BUDDY_TRIM_DISABLE - Disable automatic block trimming
63 *
64 * By default, if an allocation is smaller than the allocated block,
65 * excess memory is trimmed and returned to the free pool. This flag
66 * disables trimming, keeping the full power-of-two block size.
67 */
68#define GPU_BUDDY_TRIM_DISABLE BIT(5)
69
70enum gpu_buddy_free_tree {
71 GPU_BUDDY_CLEAR_TREE = 0,
72 GPU_BUDDY_DIRTY_TREE,
73 GPU_BUDDY_MAX_FREE_TREES,
74};
75
76#define for_each_free_tree(tree) \
77 for ((tree) = 0; (tree) < GPU_BUDDY_MAX_FREE_TREES; (tree)++)
78
79/**
80 * struct gpu_buddy_block - Block within a buddy allocator
81 *
82 * Each block in the buddy allocator is represented by this structure.
83 * Blocks are organized in a binary tree where each parent block can be
84 * split into two children (left and right buddies). The allocator manages
85 * blocks at various orders (power-of-2 sizes) from chunk_size up to the
86 * largest contiguous region.
87 *
88 * @private: Private data owned by the allocator user (e.g., driver-specific data)
89 * @link: List node for user ownership while block is allocated
90 */
91struct gpu_buddy_block {
92/* private: */
93 /*
94 * Header bit layout:
95 * - Bits 63:12: block offset within the address space
96 * - Bits 11:10: state (ALLOCATED, FREE, or SPLIT)
97 * - Bit 9: clear bit (1 if memory is zeroed)
98 * - Bits 8:6: reserved
99 * - Bits 5:0: order (log2 of size relative to chunk_size)
100 */
101#define GPU_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
102#define GPU_BUDDY_HEADER_STATE GENMASK_ULL(11, 10)
103#define GPU_BUDDY_ALLOCATED (1 << 10)
104#define GPU_BUDDY_FREE (2 << 10)
105#define GPU_BUDDY_SPLIT (3 << 10)
106#define GPU_BUDDY_HEADER_CLEAR GENMASK_ULL(9, 9)
107/* Free to be used, if needed in the future */
108#define GPU_BUDDY_HEADER_UNUSED GENMASK_ULL(8, 6)
109#define GPU_BUDDY_HEADER_ORDER GENMASK_ULL(5, 0)
110 u64 header;
111
112 struct gpu_buddy_block *left;
113 struct gpu_buddy_block *right;
114 struct gpu_buddy_block *parent;
115/* public: */
116 void *private; /* owned by creator */
117
118 /*
119 * While the block is allocated by the user through gpu_buddy_alloc*,
120 * the user has ownership of the link, for example to maintain within
121 * a list, if so desired. As soon as the block is freed with
122 * gpu_buddy_free* ownership is given back to the mm.
123 */
124 union {
125/* private: */
126 struct rb_node rb;
127/* public: */
128 struct list_head link;
129 };
130/* private: */
131 struct list_head tmp_link;
132 unsigned int subtree_max_alignment;
133};
134
135/* Order-zero must be at least SZ_4K */
136#define GPU_BUDDY_MAX_ORDER (63 - 12)
137
138/**
139 * struct gpu_buddy - GPU binary buddy allocator
140 *
141 * The buddy allocator provides efficient power-of-two memory allocation
142 * with fast allocation and free operations. It is commonly used for GPU
143 * memory management where allocations can be split into power-of-two
144 * block sizes.
145 *
146 * Locking should be handled by the user; a simple mutex around
147 * gpu_buddy_alloc_blocks() and gpu_buddy_free_block()/gpu_buddy_free_list()
148 * should suffice.
149 *
150 * @n_roots: Number of root blocks in the roots array.
151 * @max_order: Maximum block order (log2 of largest block size / chunk_size).
152 * @chunk_size: Minimum allocation granularity in bytes. Must be at least SZ_4K.
153 * @size: Total size of the address space managed by this allocator in bytes.
154 * @avail: Total free space currently available for allocation in bytes.
155 * @clear_avail: Free space available in the clear tree (zeroed memory) in bytes.
156 * This is a subset of @avail.
157 */
158struct gpu_buddy {
159/* private: */
160 /*
161 * Array of red-black trees for free block management.
162 * Indexed as free_trees[clear/dirty][order] where:
163 * - Index 0 (GPU_BUDDY_CLEAR_TREE): blocks with zeroed content
164 * - Index 1 (GPU_BUDDY_DIRTY_TREE): blocks with unknown content
165 * Each tree holds free blocks of the corresponding order.
166 */
167 struct rb_root **free_trees;
168 /*
169 * Array of root blocks representing the top-level blocks of the
170 * binary tree(s). Multiple roots exist when the total size is not
171 * a power of two, with each root being the largest power-of-two
172 * that fits in the remaining space.
173 */
174 struct gpu_buddy_block **roots;
175/* public: */
176 unsigned int n_roots;
177 unsigned int max_order;
178 u64 chunk_size;
179 u64 size;
180 u64 avail;
181 u64 clear_avail;
182};
183
184static inline u64
185gpu_buddy_block_offset(const struct gpu_buddy_block *block)
186{
187 return block->header & GPU_BUDDY_HEADER_OFFSET;
188}
189
190static inline unsigned int
191gpu_buddy_block_order(struct gpu_buddy_block *block)
192{
193 return block->header & GPU_BUDDY_HEADER_ORDER;
194}
195
196static inline bool
197gpu_buddy_block_is_free(struct gpu_buddy_block *block)
198{
199 return (block->header & GPU_BUDDY_HEADER_STATE) == GPU_BUDDY_FREE;
200}
201
202static inline bool
203gpu_buddy_block_is_clear(struct gpu_buddy_block *block)
204{
205 return block->header & GPU_BUDDY_HEADER_CLEAR;
206}
207
208static inline u64
209gpu_buddy_block_size(struct gpu_buddy *mm,
210 struct gpu_buddy_block *block)
211{
212 return mm->chunk_size << gpu_buddy_block_order(block);
213}
214
215int gpu_buddy_init(struct gpu_buddy *mm, u64 size, u64 chunk_size);
216
217void gpu_buddy_fini(struct gpu_buddy *mm);
218
219int gpu_buddy_alloc_blocks(struct gpu_buddy *mm,
220 u64 start, u64 end, u64 size,
221 u64 min_page_size,
222 struct list_head *blocks,
223 unsigned long flags);
224
225int gpu_buddy_block_trim(struct gpu_buddy *mm,
226 u64 *start,
227 u64 new_size,
228 struct list_head *blocks);
229
230void gpu_buddy_reset_clear(struct gpu_buddy *mm, bool is_clear);
231
232void gpu_buddy_free_block(struct gpu_buddy *mm, struct gpu_buddy_block *block);
233
234void gpu_buddy_free_list(struct gpu_buddy *mm,
235 struct list_head *objects,
236 unsigned int flags);
237
238void gpu_buddy_print(struct gpu_buddy *mm);
239void gpu_buddy_block_print(struct gpu_buddy *mm,
240 struct gpu_buddy_block *block);
241#endif