Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at ee9dce44362b2d8132c32964656ab6dff7dfbc6a 241 lines 7.5 kB view raw
1/* SPDX-License-Identifier: MIT */ 2/* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6#ifndef __GPU_BUDDY_H__ 7#define __GPU_BUDDY_H__ 8 9#include <linux/bitops.h> 10#include <linux/list.h> 11#include <linux/slab.h> 12#include <linux/sched.h> 13#include <linux/rbtree.h> 14#include <linux/rbtree_augmented.h> 15 16/** 17 * GPU_BUDDY_RANGE_ALLOCATION - Allocate within a specific address range 18 * 19 * When set, allocation is restricted to the range [start, end) specified 20 * in gpu_buddy_alloc_blocks(). Without this flag, start/end are ignored 21 * and allocation can use any free space. 22 */ 23#define GPU_BUDDY_RANGE_ALLOCATION BIT(0) 24 25/** 26 * GPU_BUDDY_TOPDOWN_ALLOCATION - Allocate from top of address space 27 * 28 * Allocate starting from high addresses and working down. Useful for 29 * separating different allocation types (e.g., kernel vs userspace) 30 * to reduce fragmentation. 31 */ 32#define GPU_BUDDY_TOPDOWN_ALLOCATION BIT(1) 33 34/** 35 * GPU_BUDDY_CONTIGUOUS_ALLOCATION - Require physically contiguous blocks 36 * 37 * The allocation must be satisfied with a single contiguous block. 38 * If the requested size cannot be allocated contiguously, the 39 * allocation fails with -ENOSPC. 40 */ 41#define GPU_BUDDY_CONTIGUOUS_ALLOCATION BIT(2) 42 43/** 44 * GPU_BUDDY_CLEAR_ALLOCATION - Prefer pre-cleared (zeroed) memory 45 * 46 * Attempt to allocate from the clear tree first. If insufficient clear 47 * memory is available, falls back to dirty memory. Useful when the 48 * caller needs zeroed memory and wants to avoid GPU clear operations. 49 */ 50#define GPU_BUDDY_CLEAR_ALLOCATION BIT(3) 51 52/** 53 * GPU_BUDDY_CLEARED - Mark returned blocks as cleared 54 * 55 * Used with gpu_buddy_free_list() to indicate that the memory being 56 * freed has been cleared (zeroed). The blocks will be placed in the 57 * clear tree for future GPU_BUDDY_CLEAR_ALLOCATION requests. 58 */ 59#define GPU_BUDDY_CLEARED BIT(4) 60 61/** 62 * GPU_BUDDY_TRIM_DISABLE - Disable automatic block trimming 63 * 64 * By default, if an allocation is smaller than the allocated block, 65 * excess memory is trimmed and returned to the free pool. This flag 66 * disables trimming, keeping the full power-of-two block size. 67 */ 68#define GPU_BUDDY_TRIM_DISABLE BIT(5) 69 70enum gpu_buddy_free_tree { 71 GPU_BUDDY_CLEAR_TREE = 0, 72 GPU_BUDDY_DIRTY_TREE, 73 GPU_BUDDY_MAX_FREE_TREES, 74}; 75 76#define for_each_free_tree(tree) \ 77 for ((tree) = 0; (tree) < GPU_BUDDY_MAX_FREE_TREES; (tree)++) 78 79/** 80 * struct gpu_buddy_block - Block within a buddy allocator 81 * 82 * Each block in the buddy allocator is represented by this structure. 83 * Blocks are organized in a binary tree where each parent block can be 84 * split into two children (left and right buddies). The allocator manages 85 * blocks at various orders (power-of-2 sizes) from chunk_size up to the 86 * largest contiguous region. 87 * 88 * @private: Private data owned by the allocator user (e.g., driver-specific data) 89 * @link: List node for user ownership while block is allocated 90 */ 91struct gpu_buddy_block { 92/* private: */ 93 /* 94 * Header bit layout: 95 * - Bits 63:12: block offset within the address space 96 * - Bits 11:10: state (ALLOCATED, FREE, or SPLIT) 97 * - Bit 9: clear bit (1 if memory is zeroed) 98 * - Bits 8:6: reserved 99 * - Bits 5:0: order (log2 of size relative to chunk_size) 100 */ 101#define GPU_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12) 102#define GPU_BUDDY_HEADER_STATE GENMASK_ULL(11, 10) 103#define GPU_BUDDY_ALLOCATED (1 << 10) 104#define GPU_BUDDY_FREE (2 << 10) 105#define GPU_BUDDY_SPLIT (3 << 10) 106#define GPU_BUDDY_HEADER_CLEAR GENMASK_ULL(9, 9) 107/* Free to be used, if needed in the future */ 108#define GPU_BUDDY_HEADER_UNUSED GENMASK_ULL(8, 6) 109#define GPU_BUDDY_HEADER_ORDER GENMASK_ULL(5, 0) 110 u64 header; 111 112 struct gpu_buddy_block *left; 113 struct gpu_buddy_block *right; 114 struct gpu_buddy_block *parent; 115/* public: */ 116 void *private; /* owned by creator */ 117 118 /* 119 * While the block is allocated by the user through gpu_buddy_alloc*, 120 * the user has ownership of the link, for example to maintain within 121 * a list, if so desired. As soon as the block is freed with 122 * gpu_buddy_free* ownership is given back to the mm. 123 */ 124 union { 125/* private: */ 126 struct rb_node rb; 127/* public: */ 128 struct list_head link; 129 }; 130/* private: */ 131 struct list_head tmp_link; 132 unsigned int subtree_max_alignment; 133}; 134 135/* Order-zero must be at least SZ_4K */ 136#define GPU_BUDDY_MAX_ORDER (63 - 12) 137 138/** 139 * struct gpu_buddy - GPU binary buddy allocator 140 * 141 * The buddy allocator provides efficient power-of-two memory allocation 142 * with fast allocation and free operations. It is commonly used for GPU 143 * memory management where allocations can be split into power-of-two 144 * block sizes. 145 * 146 * Locking should be handled by the user; a simple mutex around 147 * gpu_buddy_alloc_blocks() and gpu_buddy_free_block()/gpu_buddy_free_list() 148 * should suffice. 149 * 150 * @n_roots: Number of root blocks in the roots array. 151 * @max_order: Maximum block order (log2 of largest block size / chunk_size). 152 * @chunk_size: Minimum allocation granularity in bytes. Must be at least SZ_4K. 153 * @size: Total size of the address space managed by this allocator in bytes. 154 * @avail: Total free space currently available for allocation in bytes. 155 * @clear_avail: Free space available in the clear tree (zeroed memory) in bytes. 156 * This is a subset of @avail. 157 */ 158struct gpu_buddy { 159/* private: */ 160 /* 161 * Array of red-black trees for free block management. 162 * Indexed as free_trees[clear/dirty][order] where: 163 * - Index 0 (GPU_BUDDY_CLEAR_TREE): blocks with zeroed content 164 * - Index 1 (GPU_BUDDY_DIRTY_TREE): blocks with unknown content 165 * Each tree holds free blocks of the corresponding order. 166 */ 167 struct rb_root **free_trees; 168 /* 169 * Array of root blocks representing the top-level blocks of the 170 * binary tree(s). Multiple roots exist when the total size is not 171 * a power of two, with each root being the largest power-of-two 172 * that fits in the remaining space. 173 */ 174 struct gpu_buddy_block **roots; 175/* public: */ 176 unsigned int n_roots; 177 unsigned int max_order; 178 u64 chunk_size; 179 u64 size; 180 u64 avail; 181 u64 clear_avail; 182}; 183 184static inline u64 185gpu_buddy_block_offset(const struct gpu_buddy_block *block) 186{ 187 return block->header & GPU_BUDDY_HEADER_OFFSET; 188} 189 190static inline unsigned int 191gpu_buddy_block_order(struct gpu_buddy_block *block) 192{ 193 return block->header & GPU_BUDDY_HEADER_ORDER; 194} 195 196static inline bool 197gpu_buddy_block_is_free(struct gpu_buddy_block *block) 198{ 199 return (block->header & GPU_BUDDY_HEADER_STATE) == GPU_BUDDY_FREE; 200} 201 202static inline bool 203gpu_buddy_block_is_clear(struct gpu_buddy_block *block) 204{ 205 return block->header & GPU_BUDDY_HEADER_CLEAR; 206} 207 208static inline u64 209gpu_buddy_block_size(struct gpu_buddy *mm, 210 struct gpu_buddy_block *block) 211{ 212 return mm->chunk_size << gpu_buddy_block_order(block); 213} 214 215int gpu_buddy_init(struct gpu_buddy *mm, u64 size, u64 chunk_size); 216 217void gpu_buddy_fini(struct gpu_buddy *mm); 218 219int gpu_buddy_alloc_blocks(struct gpu_buddy *mm, 220 u64 start, u64 end, u64 size, 221 u64 min_page_size, 222 struct list_head *blocks, 223 unsigned long flags); 224 225int gpu_buddy_block_trim(struct gpu_buddy *mm, 226 u64 *start, 227 u64 new_size, 228 struct list_head *blocks); 229 230void gpu_buddy_reset_clear(struct gpu_buddy *mm, bool is_clear); 231 232void gpu_buddy_free_block(struct gpu_buddy *mm, struct gpu_buddy_block *block); 233 234void gpu_buddy_free_list(struct gpu_buddy *mm, 235 struct list_head *objects, 236 unsigned int flags); 237 238void gpu_buddy_print(struct gpu_buddy *mm); 239void gpu_buddy_block_print(struct gpu_buddy *mm, 240 struct gpu_buddy_block *block); 241#endif