mirror of OpenBSD xenocara tree github.com/openbsd/xenocara
openbsd
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

at jcs 1431 lines 49 kB view raw
1/* 2 * Copyright © 2015 Thomas Helland 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "nir_loop_analyze.h" 25#include "util/hash_table.h" 26#include "util/ralloc.h" 27#include "nir.h" 28#include "nir_constant_expressions.h" 29 30typedef struct { 31 /* The loop we store information for */ 32 nir_loop *loop; 33 34 nir_variable_mode indirect_mask; 35 36 bool force_unroll_sampler_indirect; 37} loop_info_state; 38 39static nir_loop_induction_variable * 40get_loop_var(nir_def *value, loop_info_state *state) 41{ 42 struct hash_entry *entry = _mesa_hash_table_search(state->loop->info->induction_vars, value); 43 if (entry) 44 return entry->data; 45 else 46 return NULL; 47} 48 49/** Calculate an estimated cost in number of instructions 50 * 51 * We do this so that we don't unroll loops which will later get massively 52 * inflated due to int64 or fp64 lowering. The estimates provided here don't 53 * have to be massively accurate; they just have to be good enough that loop 54 * unrolling doesn't cause things to blow up too much. 55 */ 56static unsigned 57instr_cost(loop_info_state *state, nir_instr *instr, 58 const nir_shader_compiler_options *options) 59{ 60 if (instr->type == nir_instr_type_intrinsic || 61 instr->type == nir_instr_type_tex) 62 return 1; 63 64 if (instr->type != nir_instr_type_alu) 65 return 0; 66 67 nir_alu_instr *alu = nir_instr_as_alu(instr); 68 const nir_op_info *info = &nir_op_infos[alu->op]; 69 unsigned cost = 1; 70 71 if (nir_op_is_selection(alu->op)) { 72 nir_scalar cond_scalar = { alu->src[0].src.ssa, 0 }; 73 if (nir_is_terminator_condition_with_two_inputs(cond_scalar)) { 74 nir_instr *sel_cond = alu->src[0].src.ssa->parent_instr; 75 nir_alu_instr *sel_alu = nir_instr_as_alu(sel_cond); 76 77 nir_scalar rhs, lhs; 78 lhs = nir_scalar_chase_alu_src(cond_scalar, 0); 79 rhs = nir_scalar_chase_alu_src(cond_scalar, 1); 80 81 /* If the selects condition is a comparision between a constant and 82 * a basic induction variable we know that it will be eliminated once 83 * the loop is unrolled so here we assign it a cost of 0. 84 */ 85 if ((nir_src_is_const(sel_alu->src[0].src) && 86 get_loop_var(rhs.def, state)) || 87 (nir_src_is_const(sel_alu->src[1].src) && 88 get_loop_var(lhs.def, state))) { 89 /* Also if the selects condition is only used by the select then 90 * remove that alu instructons cost from the cost total also. 91 */ 92 if (!list_is_singular(&sel_alu->def.uses) || 93 nir_def_used_by_if(&sel_alu->def)) 94 return 0; 95 else 96 return -1; 97 } 98 } 99 } 100 101 if (alu->op == nir_op_flrp) { 102 if ((options->lower_flrp16 && alu->def.bit_size == 16) || 103 (options->lower_flrp32 && alu->def.bit_size == 32) || 104 (options->lower_flrp64 && alu->def.bit_size == 64)) 105 cost *= 3; 106 } 107 108 /* Assume everything 16 or 32-bit is cheap. 109 * 110 * There are no 64-bit ops that don't have a 64-bit thing as their 111 * destination or first source. 112 */ 113 if (alu->def.bit_size < 64 && 114 nir_src_bit_size(alu->src[0].src) < 64) 115 return cost; 116 117 bool is_fp64 = alu->def.bit_size == 64 && 118 nir_alu_type_get_base_type(info->output_type) == nir_type_float; 119 for (unsigned i = 0; i < info->num_inputs; i++) { 120 if (nir_src_bit_size(alu->src[i].src) == 64 && 121 nir_alu_type_get_base_type(info->input_types[i]) == nir_type_float) 122 is_fp64 = true; 123 } 124 125 if (is_fp64) { 126 /* If it's something lowered normally, it's expensive. */ 127 if (options->lower_doubles_options & 128 nir_lower_doubles_op_to_options_mask(alu->op)) 129 cost *= 20; 130 131 /* If it's full software, it's even more expensive */ 132 if (options->lower_doubles_options & nir_lower_fp64_full_software) { 133 cost *= 100; 134 state->loop->info->has_soft_fp64 = true; 135 } 136 137 return cost; 138 } else { 139 if (options->lower_int64_options & 140 nir_lower_int64_op_to_options_mask(alu->op)) { 141 /* These require a doing the division algorithm. */ 142 if (alu->op == nir_op_idiv || alu->op == nir_op_udiv || 143 alu->op == nir_op_imod || alu->op == nir_op_umod || 144 alu->op == nir_op_irem) 145 return cost * 100; 146 147 /* Other int64 lowering isn't usually all that expensive */ 148 return cost * 5; 149 } 150 151 return cost; 152 } 153} 154 155/* If all of the instruction sources point to identical ALU instructions (as 156 * per nir_instrs_equal), return one of the ALU instructions. Otherwise, 157 * return NULL. 158 */ 159static nir_alu_instr * 160phi_instr_as_alu(nir_phi_instr *phi) 161{ 162 nir_alu_instr *first = NULL; 163 nir_foreach_phi_src(src, phi) { 164 if (src->src.ssa->parent_instr->type != nir_instr_type_alu) 165 return NULL; 166 167 nir_alu_instr *alu = nir_instr_as_alu(src->src.ssa->parent_instr); 168 if (first == NULL) { 169 first = alu; 170 } else { 171 if (!nir_instrs_equal(&first->instr, &alu->instr)) 172 return NULL; 173 } 174 } 175 176 return first; 177} 178 179static bool 180alu_src_has_identity_swizzle(nir_alu_instr *alu, unsigned src_idx) 181{ 182 assert(nir_op_infos[alu->op].input_sizes[src_idx] == 0); 183 for (unsigned i = 0; i < alu->def.num_components; i++) { 184 if (alu->src[src_idx].swizzle[i] != i) 185 return false; 186 } 187 188 return true; 189} 190 191static bool 192is_only_uniform_src(nir_src *src) 193{ 194 nir_instr *instr = src->ssa->parent_instr; 195 196 switch (instr->type) { 197 case nir_instr_type_alu: { 198 /* Return true if all sources return true. */ 199 nir_alu_instr *alu = nir_instr_as_alu(instr); 200 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { 201 if (!is_only_uniform_src(&alu->src[i].src)) 202 return false; 203 } 204 return true; 205 } 206 207 case nir_instr_type_intrinsic: { 208 nir_intrinsic_instr *inst = nir_instr_as_intrinsic(instr); 209 /* current uniform inline only support load ubo */ 210 return inst->intrinsic == nir_intrinsic_load_ubo; 211 } 212 213 case nir_instr_type_load_const: 214 /* Always return true for constants. */ 215 return true; 216 217 default: 218 return false; 219 } 220} 221 222static bool 223compute_induction_information(loop_info_state *state) 224{ 225 bool progress = false; 226 227 /* We are only interested in checking phis for the basic induction 228 * variable case as its simple to detect. All basic induction variables 229 * have a phi node 230 */ 231 nir_block *header = nir_loop_first_block(state->loop); 232 nir_block *preheader = nir_block_cf_tree_prev(header); 233 234 nir_foreach_phi(phi, header) { 235 nir_loop_induction_variable var = { .basis = &phi->def }; 236 237 nir_foreach_phi_src(phi_src, phi) { 238 nir_def *src = phi_src->src.ssa; 239 240 if (phi_src->pred == preheader) { 241 var.init_src = &phi_src->src; 242 continue; 243 } 244 245 /* If one of the sources is in an if branch or nested loop then don't 246 * attempt to go any further. 247 */ 248 if (src->parent_instr->block->cf_node.parent != &state->loop->cf_node) 249 break; 250 251 /* Detect inductions variables that are incremented in both branches 252 * of an unnested if rather than in a loop block. 253 */ 254 if (src->parent_instr->type == nir_instr_type_phi) { 255 nir_phi_instr *src_phi = nir_instr_as_phi(src->parent_instr); 256 nir_alu_instr *src_phi_alu = phi_instr_as_alu(src_phi); 257 if (src_phi_alu) { 258 src = &src_phi_alu->def; 259 } 260 } 261 262 if (src->parent_instr->type == nir_instr_type_alu && !var.update_src) { 263 var.def = src; 264 nir_alu_instr *alu = nir_instr_as_alu(src->parent_instr); 265 266 /* Check for unsupported alu operations */ 267 if (alu->op != nir_op_iadd && alu->op != nir_op_fadd && 268 alu->op != nir_op_imul && alu->op != nir_op_fmul && 269 alu->op != nir_op_ishl && alu->op != nir_op_ishr && 270 alu->op != nir_op_ushr) 271 break; 272 273 if (nir_op_infos[alu->op].num_inputs == 2) { 274 for (unsigned i = 0; i < 2; i++) { 275 /* Is one of the operands const or uniform, and the other the phi. 276 * The phi source can't be swizzled in any way. 277 */ 278 if (alu->src[1 - i].src.ssa == &phi->def && 279 alu_src_has_identity_swizzle(alu, 1 - i)) { 280 if (is_only_uniform_src(&alu->src[i].src)) 281 var.update_src = alu->src + i; 282 } 283 } 284 } 285 286 if (!var.update_src) 287 break; 288 } else { 289 var.update_src = NULL; 290 break; 291 } 292 } 293 294 if (var.update_src && var.init_src && 295 is_only_uniform_src(var.init_src)) { 296 /* Insert induction variable into hash table. */ 297 struct hash_table *vars = state->loop->info->induction_vars; 298 nir_loop_induction_variable *induction_var = ralloc(vars, nir_loop_induction_variable); 299 *induction_var = var; 300 _mesa_hash_table_insert(vars, induction_var->def, induction_var); 301 _mesa_hash_table_insert(vars, induction_var->basis, induction_var); 302 progress = true; 303 } 304 } 305 306 return progress; 307} 308 309static bool 310find_loop_terminators(loop_info_state *state) 311{ 312 bool success = false; 313 foreach_list_typed_safe(nir_cf_node, node, node, &state->loop->body) { 314 if (node->type == nir_cf_node_if) { 315 nir_if *nif = nir_cf_node_as_if(node); 316 317 nir_block *break_blk = NULL; 318 nir_block *continue_from_blk = NULL; 319 bool continue_from_then = true; 320 321 nir_block *last_then = nir_if_last_then_block(nif); 322 nir_block *last_else = nir_if_last_else_block(nif); 323 if (nir_block_ends_in_break(last_then)) { 324 break_blk = last_then; 325 continue_from_blk = last_else; 326 continue_from_then = false; 327 } else if (nir_block_ends_in_break(last_else)) { 328 break_blk = last_else; 329 continue_from_blk = last_then; 330 } 331 332 /* If there is a break then we should find a terminator. If we can 333 * not find a loop terminator, but there is a break-statement then 334 * we should return false so that we do not try to find trip-count 335 */ 336 if (!nir_is_trivial_loop_if(nif, break_blk)) { 337 state->loop->info->complex_loop = true; 338 return false; 339 } 340 341 /* Continue if the if contained no jumps at all */ 342 if (!break_blk) 343 continue; 344 345 if (nif->condition.ssa->parent_instr->type == nir_instr_type_phi) { 346 state->loop->info->complex_loop = true; 347 return false; 348 } 349 350 nir_loop_terminator *terminator = 351 rzalloc(state->loop->info, nir_loop_terminator); 352 353 list_addtail(&terminator->loop_terminator_link, 354 &state->loop->info->loop_terminator_list); 355 356 terminator->nif = nif; 357 terminator->break_block = break_blk; 358 terminator->continue_from_block = continue_from_blk; 359 terminator->continue_from_then = continue_from_then; 360 terminator->conditional_instr = nif->condition.ssa->parent_instr; 361 362 success = true; 363 } 364 } 365 366 return success; 367} 368 369/* This function looks for an array access within a loop that uses an 370 * induction variable for the array index. If found it returns the size of the 371 * array, otherwise 0 is returned. If we find an induction var we pass it back 372 * to the caller via array_index_out. 373 */ 374static unsigned 375find_array_access_via_induction(loop_info_state *state, 376 nir_deref_instr *deref, 377 nir_loop_induction_variable **array_index_out) 378{ 379 for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) { 380 if (d->deref_type != nir_deref_type_array) 381 continue; 382 383 nir_loop_induction_variable *array_index = get_loop_var(d->arr.index.ssa, state); 384 385 if (!array_index) 386 continue; 387 388 if (array_index_out) 389 *array_index_out = array_index; 390 391 nir_deref_instr *parent = nir_deref_instr_parent(d); 392 393 if (glsl_type_is_array_or_matrix(parent->type)) { 394 return glsl_get_length(parent->type); 395 } else { 396 assert(glsl_type_is_vector(parent->type)); 397 return glsl_get_vector_elements(parent->type); 398 } 399 } 400 401 return 0; 402} 403 404static unsigned 405guess_loop_limit(loop_info_state *state) 406{ 407 unsigned min_array_size = UINT_MAX; 408 409 nir_foreach_block_in_cf_node(block, &state->loop->cf_node) { 410 nir_foreach_instr(instr, block) { 411 if (instr->type != nir_instr_type_intrinsic) 412 continue; 413 414 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 415 416 /* Check for arrays variably-indexed by a loop induction variable. */ 417 if (intrin->intrinsic == nir_intrinsic_load_deref || 418 intrin->intrinsic == nir_intrinsic_store_deref || 419 intrin->intrinsic == nir_intrinsic_copy_deref) { 420 421 nir_loop_induction_variable *array_idx = NULL; 422 unsigned array_size = 423 find_array_access_via_induction(state, 424 nir_src_as_deref(intrin->src[0]), 425 &array_idx); 426 if (array_idx) 427 min_array_size = MIN2(min_array_size, array_size); 428 429 if (intrin->intrinsic != nir_intrinsic_copy_deref) 430 continue; 431 432 array_size = 433 find_array_access_via_induction(state, 434 nir_src_as_deref(intrin->src[1]), 435 &array_idx); 436 if (array_idx) 437 min_array_size = MIN2(min_array_size, array_size); 438 } 439 } 440 } 441 442 if (min_array_size != UINT_MAX) 443 return min_array_size; 444 else 445 return 0; 446} 447 448static nir_op invert_comparison_if_needed(nir_op alu_op, bool invert); 449 450/* Returns whether "limit_op(a, b) alu_op c" is equivalent to "(a alu_op c) || (b alu_op c)". */ 451static bool 452is_minmax_compatible(nir_op limit_op, nir_op alu_op, bool limit_rhs, bool invert_cond) 453{ 454 bool is_max; 455 switch (limit_op) { 456 case nir_op_imin: 457 case nir_op_fmin: 458 case nir_op_umin: 459 is_max = false; 460 break; 461 case nir_op_imax: 462 case nir_op_fmax: 463 case nir_op_umax: 464 is_max = true; 465 break; 466 default: 467 return false; 468 } 469 470 if (nir_op_infos[limit_op].input_types[0] != nir_op_infos[alu_op].input_types[0]) 471 return false; 472 473 /* Comparisons we can split are: 474 * - min(a, b) < c 475 * - c < max(a, b) 476 * - max(a, b) >= c 477 * - c >= min(a, b) 478 */ 479 switch (invert_comparison_if_needed(alu_op, invert_cond)) { 480 case nir_op_ilt: 481 case nir_op_flt: 482 case nir_op_ult: 483 return (!limit_rhs && !is_max) || (limit_rhs && is_max); 484 case nir_op_ige: 485 case nir_op_fge: 486 case nir_op_uge: 487 return (!limit_rhs && is_max) || (limit_rhs && !is_max); 488 default: 489 return false; 490 } 491} 492 493static bool 494try_find_limit_of_alu(nir_scalar limit, nir_const_value *limit_val, nir_op alu_op, 495 bool invert_cond, nir_loop_terminator *terminator, 496 loop_info_state *state) 497{ 498 if (!nir_scalar_is_alu(limit)) 499 return false; 500 501 nir_op limit_op = nir_scalar_alu_op(limit); 502 if (is_minmax_compatible(limit_op, alu_op, !terminator->induction_rhs, invert_cond)) { 503 for (unsigned i = 0; i < 2; i++) { 504 nir_scalar src = nir_scalar_chase_alu_src(limit, i); 505 if (nir_scalar_is_const(src)) { 506 *limit_val = nir_scalar_as_const_value(src); 507 terminator->exact_trip_count_unknown = true; 508 return true; 509 } 510 } 511 } 512 513 return false; 514} 515 516static nir_const_value 517eval_const_unop(nir_op op, unsigned bit_size, nir_const_value src0, 518 unsigned execution_mode) 519{ 520 assert(nir_op_infos[op].num_inputs == 1); 521 nir_const_value dest; 522 nir_const_value *src[1] = { &src0 }; 523 nir_eval_const_opcode(op, &dest, 1, bit_size, src, execution_mode); 524 return dest; 525} 526 527static nir_const_value 528eval_const_binop(nir_op op, unsigned bit_size, 529 nir_const_value src0, nir_const_value src1, 530 unsigned execution_mode) 531{ 532 assert(nir_op_infos[op].num_inputs == 2); 533 nir_const_value dest; 534 nir_const_value *src[2] = { &src0, &src1 }; 535 nir_eval_const_opcode(op, &dest, 1, bit_size, src, execution_mode); 536 return dest; 537} 538 539static int 540find_replacement(const nir_scalar *originals, nir_scalar key, 541 unsigned num_replacements) 542{ 543 for (int i = 0; i < num_replacements; i++) { 544 if (nir_scalar_equal(originals[i], key)) 545 return i; 546 } 547 548 return -1; 549} 550 551/** 552 * Try to evaluate an ALU instruction as a constant with a replacement 553 * 554 * Much like \c nir_opt_constant_folding.c:try_fold_alu, this method attempts 555 * to evaluate an ALU instruction as a constant. There are two significant 556 * differences. 557 * 558 * First, this method performs the evaluation recursively. If any source of 559 * the ALU instruction is not itself a constant, it is first evaluated. 560 * 561 * Second, if the SSA value \c original is encountered as a source of the ALU 562 * instruction, the value \c replacement is substituted. 563 * 564 * The intended purpose of this function is to evaluate an arbitrary 565 * expression involving a loop induction variable. In this case, \c original 566 * would be the phi node associated with the induction variable, and 567 * \c replacement is the initial value of the induction variable. 568 * 569 * \returns true if the ALU instruction can be evaluated as constant (after 570 * applying the previously described substitution) or false otherwise. 571 */ 572static bool 573try_eval_const_alu(nir_const_value *dest, nir_scalar alu_s, const nir_scalar *originals, 574 const nir_const_value *replacements, 575 unsigned num_replacements, unsigned execution_mode) 576{ 577 nir_alu_instr *alu = nir_instr_as_alu(alu_s.def->parent_instr); 578 579 if (nir_op_infos[alu->op].output_size) 580 return false; 581 582 /* In the case that any outputs/inputs have unsized types, then we need to 583 * guess the bit-size. In this case, the validator ensures that all 584 * bit-sizes match so we can just take the bit-size from first 585 * output/input with an unsized type. If all the outputs/inputs are sized 586 * then we don't need to guess the bit-size at all because the code we 587 * generate for constant opcodes in this case already knows the sizes of 588 * the types involved and does not need the provided bit-size for anything 589 * (although it still requires to receive a valid bit-size). 590 */ 591 unsigned bit_size = 0; 592 if (!nir_alu_type_get_type_size(nir_op_infos[alu->op].output_type)) { 593 bit_size = alu->def.bit_size; 594 } else { 595 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { 596 if (!nir_alu_type_get_type_size(nir_op_infos[alu->op].input_types[i])) 597 bit_size = alu->src[i].src.ssa->bit_size; 598 } 599 600 if (bit_size == 0) 601 bit_size = 32; 602 } 603 604 nir_const_value src[NIR_MAX_VEC_COMPONENTS]; 605 nir_const_value *src_ptrs[NIR_MAX_VEC_COMPONENTS]; 606 607 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { 608 nir_scalar src_s = nir_scalar_chase_alu_src(alu_s, i); 609 610 src_ptrs[i] = &src[i]; 611 if (nir_scalar_is_const(src_s)) { 612 src[i] = nir_scalar_as_const_value(src_s); 613 continue; 614 } 615 616 int r = find_replacement(originals, src_s, num_replacements); 617 if (r >= 0) { 618 src[i] = replacements[r]; 619 } else if (!nir_scalar_is_alu(src_s) || 620 !try_eval_const_alu(&src[i], src_s, 621 originals, replacements, 622 num_replacements, execution_mode)) { 623 return false; 624 } 625 } 626 627 nir_eval_const_opcode(alu->op, dest, 1, bit_size, src_ptrs, execution_mode); 628 629 return true; 630} 631 632static nir_op 633invert_comparison_if_needed(nir_op alu_op, bool invert) 634{ 635 if (!invert) 636 return alu_op; 637 638 switch (alu_op) { 639 case nir_op_fge: 640 return nir_op_flt; 641 case nir_op_ige: 642 return nir_op_ilt; 643 case nir_op_uge: 644 return nir_op_ult; 645 case nir_op_flt: 646 return nir_op_fge; 647 case nir_op_ilt: 648 return nir_op_ige; 649 case nir_op_ult: 650 return nir_op_uge; 651 case nir_op_feq: 652 return nir_op_fneu; 653 case nir_op_ieq: 654 return nir_op_ine; 655 case nir_op_fneu: 656 return nir_op_feq; 657 case nir_op_ine: 658 return nir_op_ieq; 659 default: 660 unreachable("Unsuported comparison!"); 661 } 662} 663 664static int32_t 665get_iteration(nir_op cond_op, nir_const_value initial, nir_const_value step, 666 nir_const_value limit, bool invert_cond, unsigned bit_size, 667 unsigned execution_mode) 668{ 669 nir_const_value span, iter; 670 unsigned iter_bit_size = bit_size; 671 672 switch (invert_comparison_if_needed(cond_op, invert_cond)) { 673 case nir_op_ine: 674 /* In order for execution to be here, limit must be the same as initial. 675 * Otherwise will_break_on_first_iteration would have returned false. 676 * If step is zero, the loop is infinite. Otherwise the loop will 677 * execute once. 678 */ 679 return step.u64 == 0 ? -1 : 1; 680 681 case nir_op_ige: 682 case nir_op_ilt: 683 case nir_op_ieq: 684 span = eval_const_binop(nir_op_isub, bit_size, limit, initial, 685 execution_mode); 686 iter = eval_const_binop(nir_op_idiv, bit_size, span, step, 687 execution_mode); 688 break; 689 690 case nir_op_uge: 691 case nir_op_ult: 692 span = eval_const_binop(nir_op_isub, bit_size, limit, initial, 693 execution_mode); 694 iter = eval_const_binop(nir_op_udiv, bit_size, span, step, 695 execution_mode); 696 break; 697 698 case nir_op_fneu: 699 /* In order for execution to be here, limit must be the same as initial. 700 * Otherwise will_break_on_first_iteration would have returned false. 701 * If step is zero, the loop is infinite. Otherwise the loop will 702 * execute once. 703 * 704 * This is a little more tricky for floating point since X-Y might still 705 * be X even if Y is not zero. Instead check that (initial + step) != 706 * initial. 707 */ 708 span = eval_const_binop(nir_op_fadd, bit_size, initial, step, 709 execution_mode); 710 iter = eval_const_binop(nir_op_feq, bit_size, initial, 711 span, execution_mode); 712 713 /* return (initial + step) == initial ? -1 : 1 */ 714 return iter.b ? -1 : 1; 715 716 case nir_op_fge: 717 case nir_op_flt: 718 case nir_op_feq: 719 span = eval_const_binop(nir_op_fsub, bit_size, limit, initial, 720 execution_mode); 721 iter = eval_const_binop(nir_op_fdiv, bit_size, span, 722 step, execution_mode); 723 iter = eval_const_unop(nir_op_f2i64, bit_size, iter, execution_mode); 724 iter_bit_size = 64; 725 break; 726 727 default: 728 return -1; 729 } 730 731 uint64_t iter_u64 = nir_const_value_as_uint(iter, iter_bit_size); 732 return iter_u64 > u_intN_max(iter_bit_size) ? -1 : (int)iter_u64; 733} 734 735static int32_t 736get_iteration_empirical(nir_scalar cond, nir_alu_instr *incr_alu, 737 nir_scalar basis, nir_const_value initial, 738 nir_scalar limit_basis, nir_const_value limit, 739 bool invert_cond, unsigned execution_mode, 740 unsigned max_unroll_iterations) 741{ 742 int iter_count = 0; 743 nir_const_value result; 744 745 const nir_scalar incr = nir_get_scalar(&incr_alu->def, basis.comp); 746 747 const nir_scalar original[] = {basis, limit_basis}; 748 nir_const_value replacement[] = {initial, limit}; 749 750 while (iter_count <= max_unroll_iterations) { 751 bool success; 752 753 success = try_eval_const_alu(&result, cond, original, replacement, 754 2, execution_mode); 755 if (!success) 756 return -1; 757 758 const bool cond_succ = invert_cond ? !result.b : result.b; 759 if (cond_succ) 760 return iter_count; 761 762 iter_count++; 763 764 success = try_eval_const_alu(&result, incr, original, replacement, 765 2, execution_mode); 766 assert(success); 767 768 replacement[0] = result; 769 } 770 771 return -1; 772} 773 774static bool 775will_break_on_first_iteration(nir_scalar cond, nir_scalar basis, 776 nir_scalar limit_basis, 777 nir_const_value initial, nir_const_value limit, 778 bool invert_cond, unsigned execution_mode) 779{ 780 nir_const_value result; 781 782 const nir_scalar originals[2] = { basis, limit_basis }; 783 const nir_const_value replacements[2] = { initial, limit }; 784 785 ASSERTED bool success = try_eval_const_alu(&result, cond, originals, 786 replacements, 2, execution_mode); 787 788 assert(success); 789 790 return invert_cond ? !result.b : result.b; 791} 792 793static bool 794test_iterations(int32_t iter_int, nir_const_value step, 795 nir_const_value limit, nir_op cond_op, unsigned bit_size, 796 nir_alu_type induction_base_type, 797 nir_const_value initial, bool limit_rhs, bool invert_cond, 798 unsigned execution_mode) 799{ 800 assert(nir_op_infos[cond_op].num_inputs == 2); 801 802 nir_const_value iter_src; 803 nir_op mul_op; 804 nir_op add_op; 805 switch (induction_base_type) { 806 case nir_type_float: 807 iter_src = nir_const_value_for_float(iter_int, bit_size); 808 mul_op = nir_op_fmul; 809 add_op = nir_op_fadd; 810 break; 811 case nir_type_int: 812 case nir_type_uint: 813 iter_src = nir_const_value_for_int(iter_int, bit_size); 814 mul_op = nir_op_imul; 815 add_op = nir_op_iadd; 816 break; 817 default: 818 unreachable("Unhandled induction variable base type!"); 819 } 820 821 /* Multiple the iteration count we are testing by the number of times we 822 * step the induction variable each iteration. 823 */ 824 nir_const_value mul_result = 825 eval_const_binop(mul_op, bit_size, iter_src, step, execution_mode); 826 827 /* Add the initial value to the accumulated induction variable total */ 828 nir_const_value add_result = 829 eval_const_binop(add_op, bit_size, mul_result, initial, execution_mode); 830 831 nir_const_value *src[2]; 832 src[limit_rhs ? 0 : 1] = &add_result; 833 src[limit_rhs ? 1 : 0] = &limit; 834 835 /* Evaluate the loop exit condition */ 836 nir_const_value result; 837 nir_eval_const_opcode(cond_op, &result, 1, bit_size, src, execution_mode); 838 839 return invert_cond ? !result.b : result.b; 840} 841 842static int 843calculate_iterations(nir_scalar basis, nir_scalar limit_basis, 844 nir_const_value initial, nir_const_value step, 845 nir_const_value limit, nir_alu_instr *alu, 846 nir_scalar cond, nir_op alu_op, bool limit_rhs, 847 bool invert_cond, unsigned execution_mode, 848 unsigned max_unroll_iterations) 849{ 850 /* nir_op_isub should have been lowered away by this point */ 851 assert(alu->op != nir_op_isub); 852 853 /* Make sure the alu type for our induction variable is compatible with the 854 * conditional alus input type. If its not something has gone really wrong. 855 */ 856 nir_alu_type induction_base_type = 857 nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type); 858 if (induction_base_type == nir_type_int || induction_base_type == nir_type_uint) { 859 assert(nir_alu_type_get_base_type(nir_op_infos[alu_op].input_types[1]) == nir_type_int || 860 nir_alu_type_get_base_type(nir_op_infos[alu_op].input_types[1]) == nir_type_uint); 861 } else { 862 assert(nir_alu_type_get_base_type(nir_op_infos[alu_op].input_types[0]) == 863 induction_base_type); 864 } 865 866 /* do-while loops can increment the starting value before the condition is 867 * checked. e.g. 868 * 869 * do { 870 * ndx++; 871 * } while (ndx < 3); 872 * 873 * Here we check if the induction variable is used directly by the loop 874 * condition and if so we assume we need to step the initial value. 875 */ 876 unsigned trip_offset = 0; 877 nir_alu_instr *cond_alu = nir_instr_as_alu(cond.def->parent_instr); 878 if (cond_alu->src[0].src.ssa == &alu->def || 879 cond_alu->src[1].src.ssa == &alu->def) { 880 trip_offset = 1; 881 } 882 883 unsigned bit_size = nir_src_bit_size(alu->src[0].src); 884 885 /* get_iteration works under assumption that iterator will be 886 * incremented or decremented until it hits the limit, 887 * however if the loop condition is false on the first iteration 888 * get_iteration's assumption is broken. Handle such loops first. 889 */ 890 if (will_break_on_first_iteration(cond, basis, limit_basis, initial, 891 limit, invert_cond, execution_mode)) { 892 return 0; 893 } 894 895 /* For loops incremented with addition operation, it's easy to 896 * calculate the number of iterations theoretically. Even though it 897 * is possible for other operations as well, it is much more error 898 * prone, and doesn't cover all possible cases. So, we try to 899 * emulate the loop. 900 */ 901 int iter_int; 902 switch (alu->op) { 903 case nir_op_iadd: 904 case nir_op_fadd: 905 assert(nir_src_bit_size(alu->src[0].src) == 906 nir_src_bit_size(alu->src[1].src)); 907 908 iter_int = get_iteration(alu_op, initial, step, limit, invert_cond, 909 bit_size, execution_mode); 910 break; 911 case nir_op_fmul: 912 /* Detecting non-zero loop counts when the loop increment is floating 913 * point multiplication triggers a preexisting problem in 914 * glsl-fs-loop-unroll-mul-fp64.shader_test. See 915 * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3445#note_1779438. 916 */ 917 return -1; 918 case nir_op_imul: 919 case nir_op_ishl: 920 case nir_op_ishr: 921 case nir_op_ushr: 922 return get_iteration_empirical(cond, alu, basis, initial, 923 limit_basis, limit, invert_cond, 924 execution_mode, max_unroll_iterations); 925 default: 926 unreachable("Invalid induction variable increment operation."); 927 } 928 929 /* If iter_int is negative the loop is ill-formed or is the conditional is 930 * unsigned with a huge iteration count so don't bother going any further. 931 */ 932 if (iter_int < 0) 933 return -1; 934 935 nir_op actual_alu_op = invert_comparison_if_needed(alu_op, invert_cond); 936 if (actual_alu_op == nir_op_ine || actual_alu_op == nir_op_fneu) 937 return iter_int; 938 939 /* An explanation from the GLSL unrolling pass: 940 * 941 * Make sure that the calculated number of iterations satisfies the exit 942 * condition. This is needed to catch off-by-one errors and some types of 943 * ill-formed loops. For example, we need to detect that the following 944 * loop does not have a maximum iteration count. 945 * 946 * for (float x = 0.0; x != 0.9; x += 0.2); 947 */ 948 for (int bias = -1; bias <= 1; bias++) { 949 const int iter_bias = iter_int + bias; 950 if (iter_bias < 1) 951 continue; 952 953 if (test_iterations(iter_bias, step, limit, alu_op, bit_size, 954 induction_base_type, initial, 955 limit_rhs, invert_cond, execution_mode)) { 956 return iter_bias - trip_offset; 957 } 958 } 959 960 return -1; 961} 962 963static bool 964get_induction_and_limit_vars(nir_scalar cond, 965 nir_scalar *ind, 966 nir_scalar *limit, 967 bool *limit_rhs, 968 loop_info_state *state) 969{ 970 nir_scalar rhs, lhs; 971 lhs = nir_scalar_chase_alu_src(cond, 0); 972 rhs = nir_scalar_chase_alu_src(cond, 1); 973 974 nir_loop_induction_variable *src0_lv = get_loop_var(lhs.def, state); 975 nir_loop_induction_variable *src1_lv = get_loop_var(rhs.def, state); 976 977 if (src0_lv) { 978 *ind = lhs; 979 *limit = rhs; 980 *limit_rhs = true; 981 return true; 982 } else if (src1_lv) { 983 *ind = rhs; 984 *limit = lhs; 985 *limit_rhs = false; 986 return true; 987 } else { 988 return false; 989 } 990} 991 992static bool 993try_find_trip_count_vars_in_logical_op(nir_scalar *cond, 994 nir_scalar *ind, 995 nir_scalar *limit, 996 bool *limit_rhs, 997 loop_info_state *state) 998{ 999 const nir_op alu_op = nir_scalar_alu_op(*cond); 1000 bool exit_loop_on_false = alu_op == nir_op_ieq || alu_op == nir_op_inot; 1001 nir_scalar logical_op = exit_loop_on_false ? 1002 nir_scalar_chase_alu_src(*cond, 0) : *cond; 1003 1004 if (alu_op == nir_op_ieq) { 1005 nir_scalar zero = nir_scalar_chase_alu_src(*cond, 1); 1006 1007 if (!nir_scalar_is_alu(logical_op) || !nir_scalar_is_const(zero)) { 1008 /* Maybe we had it the wrong way, flip things around */ 1009 nir_scalar tmp = zero; 1010 zero = logical_op; 1011 logical_op = tmp; 1012 1013 /* If we still didn't find what we need then return */ 1014 if (!nir_scalar_is_const(zero)) 1015 return false; 1016 } 1017 1018 /* If the loop is not breaking on (x && y) == 0 then return */ 1019 if (nir_scalar_as_uint(zero) != 0) 1020 return false; 1021 } 1022 1023 if (!nir_scalar_is_alu(logical_op)) 1024 return false; 1025 1026 if ((exit_loop_on_false && (nir_scalar_alu_op(logical_op) != nir_op_iand)) || 1027 (!exit_loop_on_false && (nir_scalar_alu_op(logical_op) != nir_op_ior))) 1028 return false; 1029 1030 /* Check if iand src is a terminator condition and try get induction var 1031 * and trip limit var. 1032 */ 1033 bool found_induction_var = false; 1034 for (unsigned i = 0; i < 2; i++) { 1035 nir_scalar src = nir_scalar_chase_alu_src(logical_op, i); 1036 if (nir_is_terminator_condition_with_two_inputs(src) && 1037 get_induction_and_limit_vars(src, ind, limit, limit_rhs, state)) { 1038 *cond = src; 1039 found_induction_var = true; 1040 1041 /* If we've found one with a constant limit, stop. */ 1042 if (nir_scalar_is_const(*limit)) 1043 return true; 1044 } 1045 } 1046 1047 return found_induction_var; 1048} 1049 1050/* Run through each of the terminators of the loop and try to infer a possible 1051 * trip-count. We need to check them all, and set the lowest trip-count as the 1052 * trip-count of our loop. If one of the terminators has an undecidable 1053 * trip-count we can not safely assume anything about the duration of the 1054 * loop. 1055 */ 1056static void 1057find_trip_count(loop_info_state *state, unsigned execution_mode, 1058 unsigned max_unroll_iterations) 1059{ 1060 bool trip_count_known = true; 1061 bool guessed_trip_count = false; 1062 nir_loop_terminator *limiting_terminator = NULL; 1063 int max_trip_count = -1; 1064 1065 list_for_each_entry(nir_loop_terminator, terminator, 1066 &state->loop->info->loop_terminator_list, 1067 loop_terminator_link) { 1068 nir_scalar cond = { terminator->nif->condition.ssa, 0 }; 1069 1070 if (!nir_scalar_is_alu(cond)) { 1071 /* If we get here the loop is dead and will get cleaned up by the 1072 * nir_opt_dead_cf pass. 1073 */ 1074 trip_count_known = false; 1075 terminator->exact_trip_count_unknown = true; 1076 continue; 1077 } 1078 1079 nir_op alu_op = nir_scalar_alu_op(cond); 1080 1081 bool invert_cond = terminator->continue_from_then; 1082 1083 bool limit_rhs; 1084 nir_scalar basic_ind = { NULL, 0 }; 1085 nir_scalar limit; 1086 1087 if ((alu_op == nir_op_inot || alu_op == nir_op_ieq || alu_op == nir_op_ior) && 1088 try_find_trip_count_vars_in_logical_op(&cond, &basic_ind, &limit, 1089 &limit_rhs, state)) { 1090 1091 /* The loop is exiting on (x && y) == 0 so we need to get the 1092 * inverse of x or y (i.e. which ever contained the induction var) in 1093 * order to compute the trip count. 1094 */ 1095 if (alu_op == nir_op_inot || alu_op == nir_op_ieq) 1096 invert_cond = !invert_cond; 1097 1098 alu_op = nir_scalar_alu_op(cond); 1099 trip_count_known = false; 1100 terminator->conditional_instr = cond.def->parent_instr; 1101 terminator->exact_trip_count_unknown = true; 1102 } 1103 1104 if (!basic_ind.def) { 1105 if (nir_is_supported_terminator_condition(cond)) { 1106 /* Extract and inverse the comparision if it is wrapped in an inot 1107 */ 1108 if (alu_op == nir_op_inot) { 1109 cond = nir_scalar_chase_alu_src(cond, 0); 1110 alu_op = nir_scalar_alu_op(cond); 1111 invert_cond = !invert_cond; 1112 } 1113 1114 get_induction_and_limit_vars(cond, &basic_ind, 1115 &limit, &limit_rhs, state); 1116 } 1117 } 1118 1119 /* The comparison has to have a basic induction variable for us to be 1120 * able to find trip counts. 1121 */ 1122 if (!basic_ind.def) { 1123 trip_count_known = false; 1124 terminator->exact_trip_count_unknown = true; 1125 continue; 1126 } 1127 1128 terminator->induction_rhs = !limit_rhs; 1129 1130 /* Attempt to find a constant limit for the loop */ 1131 nir_const_value limit_val; 1132 if (nir_scalar_is_const(limit)) { 1133 limit_val = nir_scalar_as_const_value(limit); 1134 } else { 1135 trip_count_known = false; 1136 1137 if (!try_find_limit_of_alu(limit, &limit_val, alu_op, invert_cond, terminator, state)) { 1138 /* Guess loop limit based on array access */ 1139 unsigned guessed_loop_limit = guess_loop_limit(state); 1140 if (guessed_loop_limit) { 1141 limit_val = nir_const_value_for_uint(guessed_loop_limit, 1142 basic_ind.def->bit_size); 1143 } else { 1144 terminator->exact_trip_count_unknown = true; 1145 continue; 1146 } 1147 1148 guessed_trip_count = true; 1149 } 1150 } 1151 1152 /* We have determined that we have the following constants: 1153 * (With the typical int i = 0; i < x; i++; as an example) 1154 * - Upper limit. 1155 * - Starting value 1156 * - Step / iteration size 1157 * Thats all thats needed to calculate the trip-count 1158 */ 1159 1160 nir_loop_induction_variable *lv = get_loop_var(basic_ind.def, state); 1161 1162 /* The basic induction var might be a vector but, because we guarantee 1163 * earlier that the phi source has a scalar swizzle, we can take the 1164 * component from basic_ind. 1165 */ 1166 nir_scalar initial_s = { lv->init_src->ssa, basic_ind.comp }; 1167 nir_scalar alu_s = { 1168 lv->update_src->src.ssa, 1169 lv->update_src->swizzle[basic_ind.comp] 1170 }; 1171 1172 nir_alu_instr *step_alu = 1173 nir_instr_as_alu(nir_src_parent_instr(&lv->update_src->src)); 1174 1175 /* If the comparision is of unsigned type we don't necessarily need to 1176 * know the initial value to be able to calculate the max number of 1177 * iterations 1178 */ 1179 bool can_find_max_trip_count = step_alu->op == nir_op_iadd && 1180 ((alu_op == nir_op_uge && !invert_cond && limit_rhs) || 1181 (alu_op == nir_op_ult && !invert_cond && !limit_rhs)); 1182 1183 /* nir_op_isub should have been lowered away by this point */ 1184 assert(step_alu->op != nir_op_isub); 1185 1186 /* For nir_op_uge as alu_op, the induction variable is [0,limit). For 1187 * nir_op_ult, it's [0,limit]. It must always be step_val larger in the 1188 * next iteration to use the can_find_max_trip_count=true path. This 1189 * check ensures that no unsigned overflow happens. 1190 * TODO: support for overflow could be added if a non-zero initial_val 1191 * is chosen. 1192 */ 1193 if (can_find_max_trip_count && nir_scalar_is_const(alu_s)) { 1194 uint64_t uint_max = u_uintN_max(alu_s.def->bit_size); 1195 uint64_t max_step_val = 1196 uint_max - nir_const_value_as_uint(limit_val, alu_s.def->bit_size) + 1197 (alu_op == nir_op_uge ? 1 : 0); 1198 can_find_max_trip_count &= nir_scalar_as_uint(alu_s) <= max_step_val; 1199 } 1200 1201 /* We are not guaranteed by that at one of these sources is a constant. 1202 * Try to find one. 1203 */ 1204 if ((!nir_scalar_is_const(initial_s) && !can_find_max_trip_count) || 1205 !nir_scalar_is_const(alu_s)) 1206 continue; 1207 1208 nir_const_value initial_val; 1209 if (nir_scalar_is_const(initial_s)) 1210 initial_val = nir_scalar_as_const_value(initial_s); 1211 else { 1212 trip_count_known = false; 1213 terminator->exact_trip_count_unknown = true; 1214 initial_val = nir_const_value_for_uint(0, 32); 1215 assert(can_find_max_trip_count); 1216 } 1217 nir_const_value step_val = nir_scalar_as_const_value(alu_s); 1218 1219 int iterations = calculate_iterations(nir_get_scalar(lv->basis, basic_ind.comp), limit, 1220 initial_val, step_val, limit_val, 1221 step_alu, cond, 1222 alu_op, limit_rhs, 1223 invert_cond, 1224 execution_mode, 1225 max_unroll_iterations); 1226 1227 /* Where we not able to calculate the iteration count */ 1228 if (iterations == -1) { 1229 trip_count_known = false; 1230 guessed_trip_count = false; 1231 terminator->exact_trip_count_unknown = true; 1232 continue; 1233 } 1234 1235 if (guessed_trip_count) { 1236 guessed_trip_count = false; 1237 terminator->exact_trip_count_unknown = true; 1238 if (state->loop->info->guessed_trip_count == 0 || 1239 state->loop->info->guessed_trip_count > iterations) 1240 state->loop->info->guessed_trip_count = iterations; 1241 1242 continue; 1243 } 1244 1245 /* If this is the first run or we have found a smaller amount of 1246 * iterations than previously (we have identified a more limiting 1247 * terminator) set the trip count and limiting terminator. 1248 */ 1249 if (max_trip_count == -1 || iterations < max_trip_count) { 1250 max_trip_count = iterations; 1251 limiting_terminator = terminator; 1252 } 1253 } 1254 1255 state->loop->info->exact_trip_count_known = trip_count_known; 1256 if (max_trip_count > -1) 1257 state->loop->info->max_trip_count = max_trip_count; 1258 state->loop->info->limiting_terminator = limiting_terminator; 1259} 1260 1261static bool 1262force_unroll_array_access(loop_info_state *state, nir_deref_instr *deref, 1263 bool contains_sampler) 1264{ 1265 unsigned array_size = find_array_access_via_induction(state, deref, NULL); 1266 if (array_size) { 1267 if ((array_size == state->loop->info->max_trip_count) && 1268 nir_deref_mode_must_be(deref, nir_var_shader_in | 1269 nir_var_shader_out | 1270 nir_var_shader_temp | 1271 nir_var_function_temp)) 1272 return true; 1273 1274 if (nir_deref_mode_must_be(deref, state->indirect_mask)) 1275 return true; 1276 1277 if (contains_sampler && state->force_unroll_sampler_indirect) 1278 return true; 1279 } 1280 1281 return false; 1282} 1283 1284static bool 1285force_unroll_heuristics(loop_info_state *state, nir_block *block) 1286{ 1287 nir_foreach_instr(instr, block) { 1288 if (instr->type == nir_instr_type_tex) { 1289 nir_tex_instr *tex_instr = nir_instr_as_tex(instr); 1290 int sampler_idx = 1291 nir_tex_instr_src_index(tex_instr, 1292 nir_tex_src_sampler_deref); 1293 1294 if (sampler_idx >= 0) { 1295 nir_deref_instr *deref = 1296 nir_instr_as_deref(tex_instr->src[sampler_idx].src.ssa->parent_instr); 1297 if (force_unroll_array_access(state, deref, true)) 1298 return true; 1299 } 1300 } 1301 1302 if (instr->type != nir_instr_type_intrinsic) 1303 continue; 1304 1305 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 1306 1307 /* Check for arrays variably-indexed by a loop induction variable. 1308 * Unrolling the loop may convert that access into constant-indexing. 1309 */ 1310 if (intrin->intrinsic == nir_intrinsic_load_deref || 1311 intrin->intrinsic == nir_intrinsic_store_deref || 1312 intrin->intrinsic == nir_intrinsic_copy_deref) { 1313 if (force_unroll_array_access(state, 1314 nir_src_as_deref(intrin->src[0]), 1315 false)) 1316 return true; 1317 1318 if (intrin->intrinsic == nir_intrinsic_copy_deref && 1319 force_unroll_array_access(state, 1320 nir_src_as_deref(intrin->src[1]), 1321 false)) 1322 return true; 1323 } 1324 } 1325 1326 return false; 1327} 1328 1329static void 1330get_loop_info(loop_info_state *state, nir_function_impl *impl) 1331{ 1332 nir_shader *shader = impl->function->shader; 1333 const nir_shader_compiler_options *options = shader->options; 1334 1335 /* Try to find all simple terminators of the loop. If we can't find any, 1336 * or we find possible terminators that have side effects then bail. 1337 */ 1338 if (!find_loop_terminators(state)) { 1339 list_for_each_entry_safe(nir_loop_terminator, terminator, 1340 &state->loop->info->loop_terminator_list, 1341 loop_terminator_link) { 1342 list_del(&terminator->loop_terminator_link); 1343 ralloc_free(terminator); 1344 } 1345 return; 1346 } 1347 1348 if (!compute_induction_information(state)) 1349 return; 1350 1351 /* Run through each of the terminators and try to compute a trip-count */ 1352 find_trip_count(state, 1353 impl->function->shader->info.float_controls_execution_mode, 1354 impl->function->shader->options->max_unroll_iterations); 1355 1356 nir_foreach_block_in_cf_node(block, &state->loop->cf_node) { 1357 nir_foreach_instr(instr, block) { 1358 state->loop->info->instr_cost += instr_cost(state, instr, options); 1359 } 1360 1361 if (state->loop->info->force_unroll) 1362 continue; 1363 1364 if (force_unroll_heuristics(state, block)) { 1365 state->loop->info->force_unroll = true; 1366 } 1367 } 1368} 1369 1370static void 1371initialize_loop_info(nir_loop *loop) 1372{ 1373 if (loop->info) 1374 ralloc_free(loop->info); 1375 1376 loop->info = rzalloc(loop, nir_loop_info); 1377 loop->info->induction_vars = _mesa_pointer_hash_table_create(loop->info); 1378 1379 list_inithead(&loop->info->loop_terminator_list); 1380} 1381 1382static void 1383process_loops(nir_cf_node *cf_node, nir_variable_mode indirect_mask, 1384 bool force_unroll_sampler_indirect) 1385{ 1386 switch (cf_node->type) { 1387 case nir_cf_node_block: 1388 return; 1389 case nir_cf_node_if: { 1390 nir_if *if_stmt = nir_cf_node_as_if(cf_node); 1391 foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->then_list) 1392 process_loops(nested_node, indirect_mask, force_unroll_sampler_indirect); 1393 foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->else_list) 1394 process_loops(nested_node, indirect_mask, force_unroll_sampler_indirect); 1395 return; 1396 } 1397 case nir_cf_node_loop: { 1398 nir_loop *loop = nir_cf_node_as_loop(cf_node); 1399 assert(!nir_loop_has_continue_construct(loop)); 1400 1401 foreach_list_typed(nir_cf_node, nested_node, node, &loop->body) 1402 process_loops(nested_node, indirect_mask, force_unroll_sampler_indirect); 1403 break; 1404 } 1405 default: 1406 unreachable("unknown cf node type"); 1407 } 1408 1409 nir_loop *loop = nir_cf_node_as_loop(cf_node); 1410 nir_function_impl *impl = nir_cf_node_get_function(cf_node); 1411 loop_info_state state = { 1412 .loop = loop, 1413 .indirect_mask = indirect_mask, 1414 .force_unroll_sampler_indirect = force_unroll_sampler_indirect, 1415 }; 1416 1417 initialize_loop_info(loop); 1418 get_loop_info(&state, impl); 1419} 1420 1421void 1422nir_loop_analyze_impl(nir_function_impl *impl, 1423 nir_variable_mode indirect_mask, 1424 bool force_unroll_sampler_indirect) 1425{ 1426 foreach_list_typed(nir_cf_node, node, node, &impl->body) 1427 process_loops(node, indirect_mask, force_unroll_sampler_indirect); 1428 1429 impl->loop_analysis_indirect_mask = indirect_mask; 1430 impl->loop_analysis_force_unroll_sampler_indirect = force_unroll_sampler_indirect; 1431}