mirror of OpenBSD xenocara tree github.com/openbsd/xenocara
openbsd
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

at jcs 200 lines 7.1 kB view raw
1/* 2 * Copyright © 2020 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "nir_builder.h" 25 26#include <string.h> 27 28/** Returns the type to use for a copy of the given size. 29 * 30 * The actual type doesn't matter here all that much as we're just going to do 31 * a load/store on it and never any arithmetic. 32 */ 33static const struct glsl_type * 34copy_type_for_byte_size(unsigned size) 35{ 36 switch (size) { 37 case 1: 38 return glsl_vector_type(GLSL_TYPE_UINT8, 1); 39 case 2: 40 return glsl_vector_type(GLSL_TYPE_UINT16, 1); 41 case 4: 42 return glsl_vector_type(GLSL_TYPE_UINT, 1); 43 case 8: 44 return glsl_vector_type(GLSL_TYPE_UINT, 2); 45 case 16: 46 return glsl_vector_type(GLSL_TYPE_UINT, 4); 47 default: 48 unreachable("Unsupported size"); 49 } 50} 51 52static nir_def * 53memcpy_load_deref_elem(nir_builder *b, nir_deref_instr *parent, 54 nir_def *index) 55{ 56 nir_deref_instr *deref; 57 58 index = nir_i2iN(b, index, parent->def.bit_size); 59 assert(parent->deref_type == nir_deref_type_cast); 60 deref = nir_build_deref_ptr_as_array(b, parent, index); 61 62 return nir_load_deref(b, deref); 63} 64 65static nir_def * 66memcpy_load_deref_elem_imm(nir_builder *b, nir_deref_instr *parent, 67 uint64_t index) 68{ 69 nir_def *idx = nir_imm_intN_t(b, index, parent->def.bit_size); 70 return memcpy_load_deref_elem(b, parent, idx); 71} 72 73static void 74memcpy_store_deref_elem(nir_builder *b, nir_deref_instr *parent, 75 nir_def *index, nir_def *value) 76{ 77 nir_deref_instr *deref; 78 79 index = nir_i2iN(b, index, parent->def.bit_size); 80 assert(parent->deref_type == nir_deref_type_cast); 81 deref = nir_build_deref_ptr_as_array(b, parent, index); 82 nir_store_deref(b, deref, value, ~0); 83} 84 85static void 86memcpy_store_deref_elem_imm(nir_builder *b, nir_deref_instr *parent, 87 uint64_t index, nir_def *value) 88{ 89 nir_def *idx = nir_imm_intN_t(b, index, parent->def.bit_size); 90 memcpy_store_deref_elem(b, parent, idx, value); 91} 92 93static bool 94lower_memcpy_impl(nir_function_impl *impl) 95{ 96 nir_builder b = nir_builder_create(impl); 97 98 bool found_const_memcpy = false, found_non_const_memcpy = false; 99 100 nir_foreach_block_safe(block, impl) { 101 nir_foreach_instr_safe(instr, block) { 102 if (instr->type != nir_instr_type_intrinsic) 103 continue; 104 105 nir_intrinsic_instr *cpy = nir_instr_as_intrinsic(instr); 106 if (cpy->intrinsic != nir_intrinsic_memcpy_deref) 107 continue; 108 109 b.cursor = nir_instr_remove(&cpy->instr); 110 111 nir_deref_instr *dst = nir_src_as_deref(cpy->src[0]); 112 nir_deref_instr *src = nir_src_as_deref(cpy->src[1]); 113 if (nir_src_is_const(cpy->src[2])) { 114 found_const_memcpy = true; 115 uint64_t size = nir_src_as_uint(cpy->src[2]); 116 uint64_t offset = 0; 117 while (offset < size) { 118 uint64_t remaining = size - offset; 119 /* Find the largest chunk size power-of-two (MSB in remaining) 120 * and limit our chunk to 16B (a vec4). It's important to do as 121 * many 16B chunks as possible first so that the index 122 * computation is correct for 123 * memcpy_(load|store)_deref_elem_imm. 124 */ 125 unsigned copy_size = 1u << MIN2(util_last_bit64(remaining) - 1, 4); 126 const struct glsl_type *copy_type = 127 copy_type_for_byte_size(copy_size); 128 129 nir_deref_instr *copy_dst = 130 nir_build_deref_cast(&b, &dst->def, dst->modes, 131 copy_type, copy_size); 132 nir_deref_instr *copy_src = 133 nir_build_deref_cast(&b, &src->def, src->modes, 134 copy_type, copy_size); 135 136 uint64_t index = offset / copy_size; 137 nir_def *value = 138 memcpy_load_deref_elem_imm(&b, copy_src, index); 139 memcpy_store_deref_elem_imm(&b, copy_dst, index, value); 140 offset += copy_size; 141 } 142 } else { 143 found_non_const_memcpy = true; 144 nir_def *size = cpy->src[2].ssa; 145 146 /* In this case, we don't have any idea what the size is so we 147 * emit a loop which copies one byte at a time. 148 */ 149 nir_deref_instr *copy_dst = 150 nir_build_deref_cast(&b, &dst->def, dst->modes, 151 glsl_uint8_t_type(), 1); 152 nir_deref_instr *copy_src = 153 nir_build_deref_cast(&b, &src->def, src->modes, 154 glsl_uint8_t_type(), 1); 155 156 nir_variable *i = nir_local_variable_create(impl, 157 glsl_uintN_t_type(size->bit_size), NULL); 158 nir_store_var(&b, i, nir_imm_intN_t(&b, 0, size->bit_size), ~0); 159 nir_push_loop(&b); 160 { 161 nir_def *index = nir_load_var(&b, i); 162 nir_push_if(&b, nir_uge(&b, index, size)); 163 { 164 nir_jump(&b, nir_jump_break); 165 } 166 nir_pop_if(&b, NULL); 167 168 nir_def *value = 169 memcpy_load_deref_elem(&b, copy_src, index); 170 memcpy_store_deref_elem(&b, copy_dst, index, value); 171 nir_store_var(&b, i, nir_iadd_imm(&b, index, 1), ~0); 172 } 173 nir_pop_loop(&b, NULL); 174 } 175 } 176 } 177 178 if (found_non_const_memcpy) { 179 nir_metadata_preserve(impl, nir_metadata_none); 180 } else if (found_const_memcpy) { 181 nir_metadata_preserve(impl, nir_metadata_control_flow); 182 } else { 183 nir_metadata_preserve(impl, nir_metadata_all); 184 } 185 186 return found_const_memcpy || found_non_const_memcpy; 187} 188 189bool 190nir_lower_memcpy(nir_shader *shader) 191{ 192 bool progress = false; 193 194 nir_foreach_function_impl(impl, shader) { 195 if (lower_memcpy_impl(impl)) 196 progress = true; 197 } 198 199 return progress; 200}