mirror of OpenBSD xenocara tree github.com/openbsd/xenocara
openbsd
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

at jcs 470 lines 14 kB view raw
1/* 2 * Copyright © 2021 Google, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "nir.h" 25#include "nir_builder.h" 26 27/* 28 * This pass tries to reduce the bitsize of phi instructions by either 29 * moving narrowing conversions from the phi's consumers to the phi's 30 * sources, if all the uses of the phi are equivalent narrowing 31 * instructions. In other words, convert: 32 * 33 * vec1 32 ssa_124 = load_const (0x00000000) 34 * ... 35 * loop { 36 * ... 37 * vec1 32 ssa_155 = phi block_0: ssa_124, block_4: ssa_53 38 * vec1 16 ssa_8 = i2imp ssa_155 39 * ... 40 * vec1 32 ssa_53 = i2i32 ssa_52 41 * } 42 * 43 * into: 44 * 45 * vec1 32 ssa_124 = load_const (0x00000000) 46 * vec1 16 ssa_156 = i2imp ssa_124 47 * ... 48 * loop { 49 * ... 50 * vec1 16 ssa_8 = phi block_0: ssa_156, block_4: ssa_157 51 * ... 52 * vec1 32 ssa_53 = i2i32 ssa_52 53 * vec1 16 ssa_157 = i2i16 ssa_53 54 * } 55 * 56 * Or failing that, tries to push widening conversion of phi srcs to 57 * the phi def. In this case, since load_const is frequently one 58 * of the phi sources this pass checks if can be narrowed without a 59 * loss of precision: 60 * 61 * vec1 32 ssa_0 = load_const (0x00000000) 62 * ... 63 * loop { 64 * ... 65 * vec1 32 ssa_8 = phi block_0: ssa_0, block_4: ssa_19 66 * ... 67 * vec1 16 ssa_18 = iadd ssa_21, ssa_3 68 * vec1 32 ssa_19 = i2i32 ssa_18 69 * } 70 * 71 * into: 72 * 73 * vec1 32 ssa_0 = load_const (0x00000000) 74 * vec1 16 ssa_22 = i2i16 ssa_0 75 * ... 76 * loop { 77 * ... 78 * vec1 16 ssa_8 = phi block_0: ssa_22, block_4: ssa_18 79 * vec1 32 ssa_23 = i2i32 ssa_8 80 * ... 81 * vec1 16 ssa_18 = iadd ssa_21, ssa_3 82 * } 83 * 84 * Note that either transformations can convert x2ymp into x2y16, which 85 * is normally done later in nir_opt_algebraic_late(), losing the option 86 * to fold away sequences like (i2i32 (i2imp (x))), but algebraic opts 87 * cannot see through phis. 88 */ 89 90#define INVALID_OP nir_num_opcodes 91 92/** 93 * Get the corresponding exact conversion for a x2ymp conversion 94 */ 95static nir_op 96concrete_conversion(nir_op op) 97{ 98 switch (op) { 99 case nir_op_i2imp: 100 return nir_op_i2i16; 101 case nir_op_i2fmp: 102 return nir_op_i2f16; 103 case nir_op_u2fmp: 104 return nir_op_u2f16; 105 case nir_op_f2fmp: 106 return nir_op_f2f16; 107 case nir_op_f2imp: 108 return nir_op_f2i16; 109 case nir_op_f2ump: 110 return nir_op_f2u16; 111 default: 112 return op; 113 } 114} 115 116static nir_op 117narrowing_conversion_op(nir_instr *instr, nir_op current_op) 118{ 119 if (instr->type != nir_instr_type_alu) 120 return INVALID_OP; 121 122 nir_op op = nir_instr_as_alu(instr)->op; 123 switch (op) { 124 case nir_op_i2imp: 125 case nir_op_i2i16: 126 case nir_op_i2fmp: 127 case nir_op_i2f16: 128 case nir_op_u2fmp: 129 case nir_op_u2f16: 130 case nir_op_f2fmp: 131 case nir_op_f2f16: 132 case nir_op_f2imp: 133 case nir_op_f2i16: 134 case nir_op_f2ump: 135 case nir_op_f2u16: 136 case nir_op_f2f16_rtne: 137 case nir_op_f2f16_rtz: 138 break; 139 default: 140 return INVALID_OP; 141 } 142 143 /* If we've already picked a conversion op from a previous phi use, 144 * make sure it is compatible with the current use 145 */ 146 if (current_op != INVALID_OP) { 147 if (current_op != op) { 148 /* If we have different conversions, but one can be converted 149 * to the other, then let's do that: 150 */ 151 if (concrete_conversion(current_op) == concrete_conversion(op)) { 152 op = concrete_conversion(op); 153 } else { 154 return INVALID_OP; 155 } 156 } 157 } 158 159 return op; 160} 161 162static nir_op 163widening_conversion_op(nir_instr *instr, unsigned *bit_size) 164{ 165 if (instr->type != nir_instr_type_alu) 166 return INVALID_OP; 167 168 nir_alu_instr *alu = nir_instr_as_alu(instr); 169 switch (alu->op) { 170 case nir_op_i2i32: 171 case nir_op_i2f32: 172 case nir_op_u2f32: 173 case nir_op_f2f32: 174 case nir_op_f2i32: 175 case nir_op_f2u32: 176 break; 177 default: 178 return INVALID_OP; 179 } 180 181 *bit_size = nir_src_bit_size(alu->src[0].src); 182 183 /* We also need to check that the conversion's dest was actually 184 * wider: 185 */ 186 if (alu->def.bit_size <= *bit_size) 187 return INVALID_OP; 188 189 return alu->op; 190} 191 192static nir_alu_type 193op_to_type(nir_op op) 194{ 195 return nir_alu_type_get_base_type(nir_op_infos[op].output_type); 196} 197 198/* Try to move narrowing instructions consuming the phi into the phi's 199 * sources to reduce the phi's precision: 200 */ 201static bool 202try_move_narrowing_dst(nir_builder *b, nir_phi_instr *phi) 203{ 204 nir_op op = INVALID_OP; 205 206 /* If the phi has already been narrowed, nothing more to do: */ 207 if (phi->def.bit_size != 32) 208 return false; 209 210 /* Are the only uses of the phi conversion instructions, and 211 * are they all the same conversion? 212 */ 213 nir_foreach_use_including_if(use, &phi->def) { 214 /* an if use means the phi is used directly in a conditional, ie. 215 * without a conversion 216 */ 217 if (nir_src_is_if(use)) 218 return false; 219 220 op = narrowing_conversion_op(nir_src_parent_instr(use), op); 221 222 /* Not a (compatible) narrowing conversion: */ 223 if (op == INVALID_OP) 224 return false; 225 } 226 227 /* If the phi has no uses, then nothing to do: */ 228 if (op == INVALID_OP) 229 return false; 230 231 /* construct replacement phi instruction: */ 232 nir_phi_instr *new_phi = nir_phi_instr_create(b->shader); 233 nir_def_init(&new_phi->instr, &new_phi->def, 234 phi->def.num_components, 235 nir_alu_type_get_type_size(nir_op_infos[op].output_type)); 236 237 /* Push the conversion into the new phi sources: */ 238 nir_foreach_phi_src(src, phi) { 239 /* insert new conversion instr in block of original phi src: */ 240 b->cursor = nir_after_instr_and_phis(src->src.ssa->parent_instr); 241 nir_def *old_src = src->src.ssa; 242 nir_def *new_src = nir_build_alu(b, op, old_src, NULL, NULL, NULL); 243 244 /* and add corresponding phi_src to the new_phi: */ 245 nir_phi_instr_add_src(new_phi, src->pred, new_src); 246 } 247 248 /* And finally rewrite the original uses of the original phi uses to 249 * directly use the new phi, skipping the conversion out of the orig 250 * phi 251 */ 252 nir_foreach_use(use, &phi->def) { 253 /* We've previously established that all the uses were alu 254 * conversion ops. Turn them into movs instead. 255 */ 256 nir_alu_instr *alu = nir_instr_as_alu(nir_src_parent_instr(use)); 257 alu->op = nir_op_mov; 258 } 259 nir_def_rewrite_uses(&phi->def, &new_phi->def); 260 261 /* And finally insert the new phi after all sources are in place: */ 262 b->cursor = nir_after_instr(&phi->instr); 263 nir_builder_instr_insert(b, &new_phi->instr); 264 265 return true; 266} 267 268static bool 269can_convert_load_const(nir_load_const_instr *lc, nir_op op) 270{ 271 nir_alu_type type = op_to_type(op); 272 273 /* Note that we only handle phi's with bit_size == 32: */ 274 assert(lc->def.bit_size == 32); 275 276 for (unsigned i = 0; i < lc->def.num_components; i++) { 277 switch (type) { 278 case nir_type_int: 279 if (lc->value[i].i32 != (int32_t)(int16_t)lc->value[i].i32) 280 return false; 281 break; 282 case nir_type_uint: 283 if (lc->value[i].u32 != (uint32_t)(uint16_t)lc->value[i].u32) 284 return false; 285 break; 286 case nir_type_float: 287 if (lc->value[i].f32 != _mesa_half_to_float( 288 _mesa_float_to_half(lc->value[i].f32))) 289 return false; 290 break; 291 default: 292 unreachable("bad type"); 293 return false; 294 } 295 } 296 297 return true; 298} 299 300/* Check all the phi sources to see if they are the same widening op, in 301 * which case we can push the widening op to the other side of the phi 302 */ 303static nir_op 304find_widening_op(nir_phi_instr *phi, unsigned *bit_size) 305{ 306 nir_op op = INVALID_OP; 307 308 bool has_load_const = false; 309 *bit_size = 0; 310 311 nir_foreach_phi_src(src, phi) { 312 nir_instr *instr = src->src.ssa->parent_instr; 313 if (instr->type == nir_instr_type_load_const) { 314 has_load_const = true; 315 continue; 316 } 317 318 unsigned src_bit_size; 319 nir_op src_op = widening_conversion_op(instr, &src_bit_size); 320 321 /* Not a widening conversion: */ 322 if (src_op == INVALID_OP) 323 return INVALID_OP; 324 325 /* If it is a widening conversion, it needs to be the same op as 326 * other phi sources: 327 */ 328 if ((op != INVALID_OP) && (op != src_op)) 329 return INVALID_OP; 330 331 if (*bit_size && (*bit_size != src_bit_size)) 332 return INVALID_OP; 333 334 op = src_op; 335 *bit_size = src_bit_size; 336 } 337 338 if ((op == INVALID_OP) || !has_load_const) 339 return op; 340 341 /* If we could otherwise move widening sources, but load_const is 342 * one of the phi sources (and does not have a widening conversion, 343 * but could have a narrowing->widening sequence inserted without 344 * loss of precision), then we could insert a narrowing->widening 345 * sequence to make the rest of the transformation possible: 346 */ 347 nir_foreach_phi_src(src, phi) { 348 nir_instr *instr = src->src.ssa->parent_instr; 349 if (instr->type != nir_instr_type_load_const) 350 continue; 351 352 if (!can_convert_load_const(nir_instr_as_load_const(instr), op)) 353 return INVALID_OP; 354 } 355 356 return op; 357} 358 359/* Try to move widening conversions into the phi to the phi's output 360 * to reduce the phi's precision: 361 */ 362static bool 363try_move_widening_src(nir_builder *b, nir_phi_instr *phi) 364{ 365 /* If the phi has already been narrowed, nothing more to do: */ 366 if (phi->def.bit_size != 32) 367 return false; 368 369 unsigned bit_size; 370 nir_op op = find_widening_op(phi, &bit_size); 371 372 if (op == INVALID_OP) 373 return false; 374 375 /* construct replacement phi instruction: */ 376 nir_phi_instr *new_phi = nir_phi_instr_create(b->shader); 377 nir_def_init(&new_phi->instr, &new_phi->def, 378 phi->def.num_components, bit_size); 379 380 /* Remove the widening conversions from the phi sources: */ 381 nir_foreach_phi_src(src, phi) { 382 nir_instr *instr = src->src.ssa->parent_instr; 383 nir_def *new_src; 384 385 b->cursor = nir_after_instr(instr); 386 387 if (instr->type == nir_instr_type_load_const) { 388 /* if the src is a load_const, we've already verified that it 389 * is safe to insert a narrowing conversion to make the rest 390 * of this transformation legal: 391 */ 392 nir_load_const_instr *lc = nir_instr_as_load_const(instr); 393 394 if (op_to_type(op) == nir_type_float) { 395 new_src = nir_f2f16(b, &lc->def); 396 } else { 397 new_src = nir_i2i16(b, &lc->def); 398 } 399 } else { 400 /* at this point we know the sources source is a conversion: */ 401 nir_alu_instr *alu = nir_instr_as_alu(instr); 402 403 /* The conversion we are stripping off could have had a swizzle, 404 * so replace it with a mov if necessary: 405 */ 406 unsigned num_comp = alu->def.num_components; 407 new_src = nir_mov_alu(b, alu->src[0], num_comp); 408 } 409 410 /* add corresponding phi_src to the new_phi: */ 411 nir_phi_instr_add_src(new_phi, src->pred, new_src); 412 } 413 414 /* And insert the new phi after all sources are in place: */ 415 b->cursor = nir_after_instr(&phi->instr); 416 nir_builder_instr_insert(b, &new_phi->instr); 417 418 /* And finally add back the widening conversion after the phi, 419 * and re-write the original phi's uses 420 */ 421 b->cursor = nir_after_instr_and_phis(&new_phi->instr); 422 nir_def *def = nir_build_alu(b, op, &new_phi->def, NULL, NULL, NULL); 423 424 nir_def_rewrite_uses(&phi->def, def); 425 426 return true; 427} 428 429static bool 430lower_phi(nir_builder *b, nir_phi_instr *phi) 431{ 432 bool progress = try_move_narrowing_dst(b, phi); 433 if (!progress) 434 progress = try_move_widening_src(b, phi); 435 return progress; 436} 437 438bool 439nir_opt_phi_precision(nir_shader *shader) 440{ 441 bool progress = false; 442 443 /* If 8b or 16b bit_sizes are not used, no point to run this pass: */ 444 unsigned bit_sizes_used = shader->info.bit_sizes_float | 445 shader->info.bit_sizes_int; 446 447 /* Note: if the info is zeroed, we conservatively run to avoid gathering 448 * info, which doesn't work for libraries. 449 */ 450 if (bit_sizes_used && !(bit_sizes_used & (8 | 16))) 451 return false; 452 453 nir_foreach_function_impl(impl, shader) { 454 nir_builder b = nir_builder_create(impl); 455 456 nir_foreach_block(block, impl) { 457 nir_foreach_phi_safe(phi, block) 458 progress |= lower_phi(&b, phi); 459 } 460 461 if (progress) { 462 nir_metadata_preserve(impl, 463 nir_metadata_control_flow); 464 } else { 465 nir_metadata_preserve(impl, nir_metadata_all); 466 } 467 } 468 469 return progress; 470}