mirror of OpenBSD xenocara tree github.com/openbsd/xenocara
openbsd
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

at jcs 973 lines 32 kB view raw
1/* 2 * Copyright © 2021 Valve Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "util/set.h" 25#include "nir.h" 26#include "nir_builder.h" 27 28/* This pass provides a way to move computations that are always the same for 29 * an entire draw/compute dispatch into a "preamble" that runs before the main 30 * entrypoint. 31 * 32 * We also expose a separate API to get or construct the preamble of a shader 33 * in case backends want to insert their own code. 34 */ 35 36nir_function_impl * 37nir_shader_get_preamble(nir_shader *shader) 38{ 39 nir_function_impl *entrypoint = nir_shader_get_entrypoint(shader); 40 if (entrypoint->preamble) { 41 return entrypoint->preamble->impl; 42 } else { 43 nir_function *preamble = nir_function_create(shader, "@preamble"); 44 preamble->is_preamble = true; 45 nir_function_impl *impl = nir_function_impl_create(preamble); 46 entrypoint->preamble = preamble; 47 return impl; 48 } 49} 50 51typedef struct { 52 bool can_move; 53 bool candidate; 54 bool must_stay; 55 bool replace; 56 57 unsigned can_move_users; 58 59 unsigned size, align; 60 61 unsigned offset; 62 63 /* Average the cost of a value among its users, to try to account for 64 * values that have multiple can_move uses. 65 */ 66 float value; 67 68 /* Overall benefit, i.e. the value minus any cost to inserting 69 * load_preamble. 70 */ 71 float benefit; 72} def_state; 73 74typedef struct { 75 /* Per-definition array of states */ 76 def_state *states; 77 78 /* Number of levels of non-uniform control flow we're in. We don't 79 * reconstruct loops, so loops count as non-uniform conservatively. If-else 80 * is counted if the condition is not marked can_move. 81 */ 82 unsigned nonuniform_cf_nesting; 83 84 /* Set of nir_if's that must be reconstructed in the preamble. Note an if may 85 * need reconstruction even when not entirely moved. This does not account 86 * for nesting: the parent CF nodes of ifs in this set must be reconstructed 87 * but may not be in this set, even if the parent is another if. 88 */ 89 struct set *reconstructed_ifs; 90 91 /* Set of definitions that must be reconstructed in the preamble. This is a 92 * subset of can_move instructions, determined after replacement. 93 */ 94 BITSET_WORD *reconstructed_defs; 95 96 nir_def *def; 97 98 const nir_opt_preamble_options *options; 99} opt_preamble_ctx; 100 101static bool 102instr_can_speculate(nir_instr *instr) 103{ 104 /* Intrinsics with an ACCESS index can only be speculated if they are 105 * explicitly CAN_SPECULATE. 106 */ 107 if (instr->type == nir_instr_type_intrinsic) { 108 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 109 110 if (nir_intrinsic_has_access(intr)) 111 return nir_intrinsic_access(intr) & ACCESS_CAN_SPECULATE; 112 } 113 114 /* For now, everything else can be speculated. TODO: Bindless textures. */ 115 return true; 116} 117 118static float 119get_instr_cost(nir_instr *instr, const nir_opt_preamble_options *options) 120{ 121 /* No backend will want to hoist load_const or undef by itself, so handle 122 * this for them. 123 */ 124 if (instr->type == nir_instr_type_load_const || 125 instr->type == nir_instr_type_undef) 126 return 0; 127 128 return options->instr_cost_cb(instr, options->cb_data); 129} 130 131static bool 132can_move_src(nir_src *src, void *state) 133{ 134 opt_preamble_ctx *ctx = state; 135 136 return ctx->states[src->ssa->index].can_move; 137} 138 139static bool 140can_move_srcs(nir_instr *instr, opt_preamble_ctx *ctx) 141{ 142 return nir_foreach_src(instr, can_move_src, ctx); 143} 144 145static bool 146can_move_intrinsic(nir_intrinsic_instr *instr, opt_preamble_ctx *ctx) 147{ 148 switch (instr->intrinsic) { 149 /* Intrinsics which can always be moved */ 150 case nir_intrinsic_load_push_constant: 151 case nir_intrinsic_load_work_dim: 152 case nir_intrinsic_load_num_workgroups: 153 case nir_intrinsic_load_ray_launch_size: 154 case nir_intrinsic_load_sbt_base_amd: 155 case nir_intrinsic_load_is_indexed_draw: 156 case nir_intrinsic_load_viewport_scale: 157 case nir_intrinsic_load_user_clip_plane: 158 case nir_intrinsic_load_viewport_x_scale: 159 case nir_intrinsic_load_viewport_y_scale: 160 case nir_intrinsic_load_viewport_z_scale: 161 case nir_intrinsic_load_viewport_offset: 162 case nir_intrinsic_load_viewport_x_offset: 163 case nir_intrinsic_load_viewport_y_offset: 164 case nir_intrinsic_load_viewport_z_offset: 165 case nir_intrinsic_load_blend_const_color_a_float: 166 case nir_intrinsic_load_blend_const_color_b_float: 167 case nir_intrinsic_load_blend_const_color_g_float: 168 case nir_intrinsic_load_blend_const_color_r_float: 169 case nir_intrinsic_load_blend_const_color_rgba: 170 case nir_intrinsic_load_blend_const_color_aaaa8888_unorm: 171 case nir_intrinsic_load_blend_const_color_rgba8888_unorm: 172 case nir_intrinsic_load_line_width: 173 case nir_intrinsic_load_aa_line_width: 174 case nir_intrinsic_load_fb_layers_v3d: 175 case nir_intrinsic_load_fep_w_v3d: 176 case nir_intrinsic_load_tcs_num_patches_amd: 177 case nir_intrinsic_load_sample_positions_pan: 178 case nir_intrinsic_load_pipeline_stat_query_enabled_amd: 179 case nir_intrinsic_load_prim_gen_query_enabled_amd: 180 case nir_intrinsic_load_prim_xfb_query_enabled_amd: 181 case nir_intrinsic_load_clamp_vertex_color_amd: 182 case nir_intrinsic_load_cull_front_face_enabled_amd: 183 case nir_intrinsic_load_cull_back_face_enabled_amd: 184 case nir_intrinsic_load_cull_ccw_amd: 185 case nir_intrinsic_load_cull_small_triangles_enabled_amd: 186 case nir_intrinsic_load_cull_small_lines_enabled_amd: 187 case nir_intrinsic_load_cull_any_enabled_amd: 188 case nir_intrinsic_load_cull_small_triangle_precision_amd: 189 case nir_intrinsic_load_vbo_base_agx: 190 return true; 191 192 /* Intrinsics which can be moved depending on hardware */ 193 case nir_intrinsic_load_base_instance: 194 case nir_intrinsic_load_base_vertex: 195 case nir_intrinsic_load_first_vertex: 196 case nir_intrinsic_load_draw_id: 197 return ctx->options->drawid_uniform; 198 199 case nir_intrinsic_load_subgroup_size: 200 case nir_intrinsic_load_num_subgroups: 201 return ctx->options->subgroup_size_uniform; 202 203 case nir_intrinsic_load_workgroup_size: 204 return ctx->options->load_workgroup_size_allowed; 205 206 /* Intrinsics which can be moved if the sources can */ 207 case nir_intrinsic_load_ubo: 208 case nir_intrinsic_load_ubo_vec4: 209 case nir_intrinsic_get_ubo_size: 210 case nir_intrinsic_get_ssbo_size: 211 case nir_intrinsic_ballot_bitfield_extract: 212 case nir_intrinsic_ballot_find_lsb: 213 case nir_intrinsic_ballot_find_msb: 214 case nir_intrinsic_ballot_bit_count_reduce: 215 case nir_intrinsic_load_deref: 216 case nir_intrinsic_load_global_constant: 217 case nir_intrinsic_load_uniform: 218 case nir_intrinsic_load_preamble: 219 case nir_intrinsic_load_constant: 220 case nir_intrinsic_load_sample_pos_from_id: 221 case nir_intrinsic_load_kernel_input: 222 case nir_intrinsic_load_buffer_amd: 223 case nir_intrinsic_image_levels: 224 case nir_intrinsic_image_deref_levels: 225 case nir_intrinsic_bindless_image_levels: 226 case nir_intrinsic_image_samples: 227 case nir_intrinsic_image_deref_samples: 228 case nir_intrinsic_bindless_image_samples: 229 case nir_intrinsic_image_size: 230 case nir_intrinsic_image_deref_size: 231 case nir_intrinsic_bindless_image_size: 232 case nir_intrinsic_vulkan_resource_index: 233 case nir_intrinsic_vulkan_resource_reindex: 234 case nir_intrinsic_load_vulkan_descriptor: 235 case nir_intrinsic_quad_swizzle_amd: 236 case nir_intrinsic_masked_swizzle_amd: 237 case nir_intrinsic_load_ssbo_address: 238 case nir_intrinsic_bindless_resource_ir3: 239 case nir_intrinsic_load_const_ir3: 240 case nir_intrinsic_load_constant_agx: 241 return can_move_srcs(&instr->instr, ctx); 242 243 /* Image/SSBO loads can be moved if they are CAN_REORDER and their 244 * sources can be moved. 245 */ 246 case nir_intrinsic_image_load: 247 case nir_intrinsic_image_samples_identical: 248 case nir_intrinsic_bindless_image_load: 249 case nir_intrinsic_load_ssbo: 250 case nir_intrinsic_load_ssbo_ir3: 251 return (nir_intrinsic_access(instr) & ACCESS_CAN_REORDER) && 252 can_move_srcs(&instr->instr, ctx); 253 254 default: 255 return false; 256 } 257} 258 259static bool 260can_move_instr(nir_instr *instr, opt_preamble_ctx *ctx) 261{ 262 /* If we are only contained within uniform control flow, no speculation is 263 * needed since the control flow will be reconstructed in the preamble. But 264 * if we are not, we must be able to speculate instructions to move them. 265 */ 266 if (ctx->nonuniform_cf_nesting > 0 && !instr_can_speculate(instr)) 267 return false; 268 269 switch (instr->type) { 270 case nir_instr_type_tex: { 271 nir_tex_instr *tex = nir_instr_as_tex(instr); 272 /* See note below about derivatives. We have special code to convert tex 273 * to txd, though, because it's a common case. 274 */ 275 if (nir_tex_instr_has_implicit_derivative(tex) && 276 tex->op != nir_texop_tex) { 277 return false; 278 } 279 return can_move_srcs(instr, ctx); 280 } 281 case nir_instr_type_alu: 282 return can_move_srcs(instr, ctx); 283 284 case nir_instr_type_intrinsic: 285 return can_move_intrinsic(nir_instr_as_intrinsic(instr), ctx); 286 287 case nir_instr_type_load_const: 288 case nir_instr_type_undef: 289 return true; 290 291 case nir_instr_type_deref: { 292 nir_deref_instr *deref = nir_instr_as_deref(instr); 293 if (deref->deref_type == nir_deref_type_var) { 294 switch (deref->modes) { 295 case nir_var_uniform: 296 case nir_var_mem_ubo: 297 return true; 298 default: 299 return false; 300 } 301 } else { 302 return can_move_srcs(instr, ctx); 303 } 304 } 305 306 /* We can only move phis if all of their sources are movable, and it is a phi 307 * for an if-else that is itself movable. 308 */ 309 case nir_instr_type_phi: { 310 nir_cf_node *prev_node = nir_cf_node_prev(&instr->block->cf_node); 311 if (!prev_node) 312 return false; 313 314 if (prev_node->type != nir_cf_node_if) { 315 assert(prev_node->type == nir_cf_node_loop); 316 return false; 317 } 318 319 nir_if *nif = nir_cf_node_as_if(prev_node); 320 if (!can_move_src(&nif->condition, ctx)) 321 return false; 322 323 return can_move_srcs(instr, ctx); 324 } 325 326 default: 327 return false; 328 } 329} 330 331/* True if we should avoid making this a candidate. This is only called on 332 * instructions we already determined we can move, this just makes it so that 333 * uses of this instruction cannot be rewritten. Typically this happens 334 * because of static constraints on the IR, for example some deref chains 335 * cannot be broken. 336 */ 337static bool 338avoid_instr(nir_instr *instr, const nir_opt_preamble_options *options) 339{ 340 if (instr->type == nir_instr_type_deref) 341 return true; 342 343 return options->avoid_instr_cb(instr, options->cb_data); 344} 345 346static bool 347update_src_value(nir_src *src, void *data) 348{ 349 opt_preamble_ctx *ctx = data; 350 351 def_state *state = &ctx->states[ctx->def->index]; 352 def_state *src_state = &ctx->states[src->ssa->index]; 353 354 assert(src_state->can_move); 355 356 /* If an instruction has can_move and non-can_move users, it becomes a 357 * candidate and its value shouldn't propagate downwards. For example, 358 * imagine a chain like this: 359 * 360 * -- F (cannot move) 361 * / 362 * A <-- B <-- C <-- D <-- E (cannot move) 363 * 364 * B and D are marked candidates. Picking B removes A and B, picking D 365 * removes C and D, and picking both removes all 4. Therefore B and D are 366 * independent and B's value shouldn't flow into D. 367 * 368 * A similar argument holds for must_stay values. 369 */ 370 if (!src_state->must_stay && !src_state->candidate) 371 state->value += src_state->value; 372 return true; 373} 374 375static int 376candidate_sort(const void *data1, const void *data2) 377{ 378 const def_state *state1 = *(def_state **)data1; 379 const def_state *state2 = *(def_state **)data2; 380 381 float value1 = state1->value / state1->size; 382 float value2 = state2->value / state2->size; 383 if (value1 < value2) 384 return 1; 385 else if (value1 > value2) 386 return -1; 387 else 388 return 0; 389} 390 391static bool 392calculate_can_move_for_block(opt_preamble_ctx *ctx, nir_block *block) 393{ 394 bool all_can_move = true; 395 396 nir_foreach_instr(instr, block) { 397 nir_def *def = nir_instr_def(instr); 398 if (!def) 399 continue; 400 401 def_state *state = &ctx->states[def->index]; 402 state->can_move = can_move_instr(instr, ctx); 403 all_can_move &= state->can_move; 404 } 405 406 return all_can_move; 407} 408 409static bool 410calculate_can_move_for_cf_list(opt_preamble_ctx *ctx, struct exec_list *list) 411{ 412 bool all_can_move = true; 413 414 foreach_list_typed(nir_cf_node, node, node, list) { 415 switch (node->type) { 416 case nir_cf_node_block: 417 all_can_move &= 418 calculate_can_move_for_block(ctx, nir_cf_node_as_block(node)); 419 break; 420 421 case nir_cf_node_if: { 422 nir_if *nif = nir_cf_node_as_if(node); 423 bool uniform = can_move_src(&nif->condition, ctx); 424 425 if (!uniform) 426 ctx->nonuniform_cf_nesting++; 427 428 bool if_can_move = uniform; 429 if_can_move &= calculate_can_move_for_cf_list(ctx, &nif->then_list); 430 if_can_move &= calculate_can_move_for_cf_list(ctx, &nif->else_list); 431 432 if (!uniform) 433 ctx->nonuniform_cf_nesting--; 434 435 all_can_move &= if_can_move; 436 break; 437 } 438 439 case nir_cf_node_loop: { 440 nir_loop *loop = nir_cf_node_as_loop(node); 441 442 /* Conservatively treat loops like conditional control flow, since an 443 * instruction might be conditionally unreachabled due to an earlier 444 * break in a loop that executes only one iteration. 445 */ 446 ctx->nonuniform_cf_nesting++; 447 calculate_can_move_for_cf_list(ctx, &loop->body); 448 ctx->nonuniform_cf_nesting--; 449 all_can_move = false; 450 break; 451 } 452 453 default: 454 unreachable("Unexpected CF node type"); 455 } 456 } 457 458 return all_can_move; 459} 460 461static void 462replace_for_block(nir_builder *b, opt_preamble_ctx *ctx, 463 struct hash_table *remap_table, nir_block *block) 464{ 465 nir_foreach_instr(instr, block) { 466 nir_def *def = nir_instr_def(instr); 467 if (!def) 468 continue; 469 470 /* Only replace what we actually need. This is a micro-optimization for 471 * compile-time performance of regular instructions, but it's required for 472 * correctness with phi nodes, since we might not reconstruct the 473 * corresponding if. 474 */ 475 if (!BITSET_TEST(ctx->reconstructed_defs, def->index)) 476 continue; 477 478 def_state *state = &ctx->states[def->index]; 479 assert(state->can_move && "reconstructed => can_move"); 480 481 nir_instr *clone; 482 483 if (instr->type == nir_instr_type_phi) { 484 /* Phis are special since they can't be cloned with nir_instr_clone */ 485 nir_phi_instr *phi = nir_instr_as_phi(instr); 486 487 nir_cf_node *nif_cf = nir_cf_node_prev(&block->cf_node); 488 assert(nif_cf->type == nir_cf_node_if && "only if's are moveable"); 489 nir_if *nif = nir_cf_node_as_if(nif_cf); 490 491 nir_block *then_block = nir_if_last_then_block(nif); 492 nir_block *else_block = nir_if_last_else_block(nif); 493 494 nir_def *then_def = NULL, *else_def = NULL; 495 496 nir_foreach_phi_src(phi_src, phi) { 497 if (phi_src->pred == then_block) { 498 assert(then_def == NULL); 499 then_def = phi_src->src.ssa; 500 } else if (phi_src->pred == else_block) { 501 assert(else_def == NULL); 502 else_def = phi_src->src.ssa; 503 } else { 504 unreachable("Invalid predecessor for phi of if"); 505 } 506 } 507 508 assert(exec_list_length(&phi->srcs) == 2 && "only if's are movable"); 509 assert(then_def && else_def && "all sources seen"); 510 511 /* Remap */ 512 then_def = _mesa_hash_table_search(remap_table, then_def)->data; 513 else_def = _mesa_hash_table_search(remap_table, else_def)->data; 514 515 b->cursor = 516 nir_before_block_after_phis(nir_cursor_current_block(b->cursor)); 517 518 nir_def *repl = nir_if_phi(b, then_def, else_def); 519 clone = repl->parent_instr; 520 521 _mesa_hash_table_insert(remap_table, &phi->def, repl); 522 } else { 523 clone = nir_instr_clone_deep(b->shader, instr, remap_table); 524 nir_builder_instr_insert(b, clone); 525 } 526 527 if (clone->type == nir_instr_type_tex) { 528 nir_tex_instr *tex = nir_instr_as_tex(clone); 529 if (tex->op == nir_texop_tex) { 530 /* For maximum compatibility, replace normal textures with 531 * textureGrad with a gradient of 0. 532 * TODO: Handle txb somehow. 533 */ 534 b->cursor = nir_before_instr(clone); 535 536 nir_def *zero = 537 nir_imm_zero(b, tex->coord_components - tex->is_array, 32); 538 nir_tex_instr_add_src(tex, nir_tex_src_ddx, zero); 539 nir_tex_instr_add_src(tex, nir_tex_src_ddy, zero); 540 tex->op = nir_texop_txd; 541 542 b->cursor = nir_after_instr(clone); 543 } 544 } 545 546 if (state->replace) { 547 nir_def *clone_def = nir_instr_def(clone); 548 nir_store_preamble(b, clone_def, .base = state->offset); 549 } 550 } 551} 552 553static void 554replace_for_cf_list(nir_builder *b, opt_preamble_ctx *ctx, 555 struct hash_table *remap_table, struct exec_list *list) 556{ 557 foreach_list_typed(nir_cf_node, node, node, list) { 558 switch (node->type) { 559 case nir_cf_node_block: { 560 replace_for_block(b, ctx, remap_table, nir_cf_node_as_block(node)); 561 break; 562 } 563 564 case nir_cf_node_if: { 565 nir_if *nif = nir_cf_node_as_if(node); 566 567 /* If we moved something that requires reconstructing the if, do so */ 568 if (_mesa_set_search(ctx->reconstructed_ifs, nif)) { 569 assert(can_move_src(&nif->condition, ctx)); 570 571 struct hash_entry *entry = 572 _mesa_hash_table_search(remap_table, nif->condition.ssa); 573 assert(entry != NULL && "can_move condition, def dominates use"); 574 nir_def *remap_cond = entry->data; 575 576 nir_if *reconstructed_nif = NULL; 577 reconstructed_nif = nir_push_if(b, remap_cond); 578 579 b->cursor = nir_before_cf_list(&reconstructed_nif->then_list); 580 replace_for_cf_list(b, ctx, remap_table, &nif->then_list); 581 582 b->cursor = nir_before_cf_list(&reconstructed_nif->else_list); 583 replace_for_cf_list(b, ctx, remap_table, &nif->else_list); 584 585 nir_pop_if(b, reconstructed_nif); 586 b->cursor = nir_after_cf_node(&reconstructed_nif->cf_node); 587 } else { 588 replace_for_cf_list(b, ctx, remap_table, &nif->then_list); 589 replace_for_cf_list(b, ctx, remap_table, &nif->else_list); 590 } 591 592 break; 593 } 594 595 case nir_cf_node_loop: { 596 /* We don't try to reconstruct loops */ 597 nir_loop *loop = nir_cf_node_as_loop(node); 598 replace_for_cf_list(b, ctx, remap_table, &loop->body); 599 break; 600 } 601 602 default: 603 unreachable("Unexpected CF node type"); 604 } 605 } 606} 607 608/* 609 * If an if-statement contains an instruction that cannot be speculated, the 610 * if-statement must be reconstructed so we avoid the speculation. This applies 611 * even for nested if-statements. Determine which if-statements must be 612 * reconstructed for this reason by walking the program forward and looking 613 * inside uniform if's. 614 * 615 * Returns whether the CF list contains a reconstructed instruction that would 616 * otherwise be speculated, updating the reconstructed_ifs set. This depends on 617 * reconstructed_defs being correctly set by analyze_reconstructed. 618 */ 619static bool 620analyze_speculation_for_cf_list(opt_preamble_ctx *ctx, struct exec_list *list) 621{ 622 bool reconstruct_cf_list = false; 623 624 foreach_list_typed(nir_cf_node, node, node, list) { 625 switch (node->type) { 626 case nir_cf_node_block: { 627 nir_foreach_instr(instr, nir_cf_node_as_block(node)) { 628 nir_def *def = nir_instr_def(instr); 629 if (!def) 630 continue; 631 632 if (!BITSET_TEST(ctx->reconstructed_defs, def->index)) 633 continue; 634 635 if (!instr_can_speculate(instr)) { 636 reconstruct_cf_list = true; 637 break; 638 } 639 } 640 641 break; 642 } 643 644 case nir_cf_node_if: { 645 nir_if *nif = nir_cf_node_as_if(node); 646 647 /* If we can move the if, we might need to reconstruct */ 648 if (can_move_src(&nif->condition, ctx)) { 649 bool any = false; 650 any |= analyze_speculation_for_cf_list(ctx, &nif->then_list); 651 any |= analyze_speculation_for_cf_list(ctx, &nif->else_list); 652 653 if (any) 654 _mesa_set_add(ctx->reconstructed_ifs, nif); 655 656 reconstruct_cf_list |= any; 657 } 658 659 break; 660 } 661 662 /* We don't reconstruct loops */ 663 default: 664 break; 665 } 666 } 667 668 return reconstruct_cf_list; 669} 670 671static bool 672mark_reconstructed(nir_src *src, void *state) 673{ 674 BITSET_WORD *reconstructed_defs = state; 675 BITSET_SET(reconstructed_defs, src->ssa->index); 676 return true; 677} 678 679/* 680 * If a phi is moved into the preamble, then the if it depends on must also be 681 * moved. However, it is not necessary to consider any nested control flow. As 682 * an example, if we have a shader: 683 * 684 * if (not moveable condition) { 685 * if (moveable condition) { 686 * x = moveable 687 * } 688 * y = phi x, moveable 689 * z = floor y 690 * } 691 * 692 * Then if 'z' is in the replace set, we need to reconstruct the inner if, but 693 * not the outer if, unless there's also speculation to worry about. 694 * 695 * We do this by marking defs that need to be reconstructed, with a backwards 696 * sweep of the program (compatible with reverse dominance), and marking the 697 * if's preceding reconstructed phis. 698 */ 699static void 700analyze_reconstructed(opt_preamble_ctx *ctx, nir_function_impl *impl) 701{ 702 nir_foreach_block_reverse(block, impl) { 703 /* If an if-statement is reconstructed, its condition must be as well */ 704 nir_if *nif = nir_block_get_following_if(block); 705 if (nif && _mesa_set_search(ctx->reconstructed_ifs, nif)) 706 BITSET_SET(ctx->reconstructed_defs, nif->condition.ssa->index); 707 708 nir_foreach_instr_reverse(instr, block) { 709 nir_def *def = nir_instr_def(instr); 710 if (!def) 711 continue; 712 713 def_state *state = &ctx->states[def->index]; 714 715 /* Anything that's replaced must be reconstructed */ 716 if (state->replace) 717 BITSET_SET(ctx->reconstructed_defs, def->index); 718 else if (!BITSET_TEST(ctx->reconstructed_defs, def->index)) 719 continue; 720 721 /* If it must be reconstructed, it better be moveable */ 722 assert(state->can_move); 723 724 /* Anything that depends on something reconstructed is reconstructed */ 725 nir_foreach_src(instr, mark_reconstructed, ctx->reconstructed_defs); 726 727 /* Reconstructed phis need their ifs reconstructed */ 728 if (instr->type == nir_instr_type_phi) { 729 nir_cf_node *prev_node = nir_cf_node_prev(&instr->block->cf_node); 730 731 /* Invariants guaranteed by can_move_instr */ 732 assert(prev_node != NULL); 733 assert(prev_node->type == nir_cf_node_if); 734 735 nir_if *nif = nir_cf_node_as_if(prev_node); 736 assert(can_move_src(&nif->condition, ctx)); 737 738 /* Mark the if for reconstruction */ 739 _mesa_set_add(ctx->reconstructed_ifs, nif); 740 } 741 } 742 } 743} 744 745bool 746nir_opt_preamble(nir_shader *shader, const nir_opt_preamble_options *options, 747 unsigned *size) 748{ 749 opt_preamble_ctx ctx = { 750 .options = options, 751 }; 752 753 nir_function_impl *impl = nir_shader_get_entrypoint(shader); 754 ctx.states = calloc(impl->ssa_alloc, sizeof(*ctx.states)); 755 756 /* Step 1: Calculate can_move */ 757 calculate_can_move_for_cf_list(&ctx, &impl->body); 758 759 /* Step 2: Calculate is_candidate. This is complicated by the presence of 760 * non-candidate instructions like derefs whose users cannot be rewritten. 761 * If a deref chain is used at all by a non-can_move thing, then any offset 762 * sources anywhere along the chain should be considered candidates because 763 * the entire deref chain will never be deleted, but if it's only used by 764 * can_move things then it becomes subsumed by its users and none of the 765 * offset sources should be considered candidates as they will be removed 766 * when the users of the deref chain are moved. We need to replace "are 767 * there any non-can_move users" with "are there any non-can_move users, 768 * *recursing through non-candidate users*". We do this by walking backward 769 * and marking when a non-candidate instruction must stay in the final 770 * program because it has a non-can_move user, including recursively. 771 */ 772 unsigned num_candidates = 0; 773 nir_foreach_block_reverse(block, impl) { 774 nir_foreach_instr_reverse(instr, block) { 775 nir_def *def = nir_instr_def(instr); 776 if (!def) 777 continue; 778 779 def_state *state = &ctx.states[def->index]; 780 if (!state->can_move) 781 continue; 782 783 state->value = get_instr_cost(instr, options); 784 bool is_candidate = !avoid_instr(instr, options); 785 state->candidate = false; 786 state->must_stay = false; 787 nir_foreach_use_including_if(use, def) { 788 bool is_can_move_user; 789 790 if (nir_src_is_if(use)) { 791 is_can_move_user = false; 792 } else { 793 nir_def *use_def = nir_instr_def(nir_src_parent_instr(use)); 794 is_can_move_user = use_def != NULL && 795 ctx.states[use_def->index].can_move && 796 !ctx.states[use_def->index].must_stay; 797 } 798 799 if (is_can_move_user) { 800 state->can_move_users++; 801 } else { 802 if (is_candidate) 803 state->candidate = true; 804 else 805 state->must_stay = true; 806 } 807 } 808 809 if (state->candidate) 810 num_candidates++; 811 } 812 } 813 814 if (num_candidates == 0) { 815 free(ctx.states); 816 return false; 817 } 818 819 def_state **candidates = malloc(sizeof(*candidates) * num_candidates); 820 unsigned candidate_idx = 0; 821 unsigned total_size = 0; 822 823 /* Step 3: Calculate value of candidates by propagating downwards. We try 824 * to share the value amongst can_move uses, in case there are multiple. 825 * This won't always find the most optimal solution, but is hopefully a 826 * good heuristic. 827 * 828 * Note that we use the can_move adjusted in the last pass, because if a 829 * can_move instruction cannot be moved because it's not a candidate and it 830 * has a non-can_move source then we don't want to count it as a use. 831 * 832 * While we're here, also collect an array of candidates. 833 */ 834 nir_foreach_block(block, impl) { 835 nir_foreach_instr(instr, block) { 836 nir_def *def = nir_instr_def(instr); 837 if (!def) 838 continue; 839 840 def_state *state = &ctx.states[def->index]; 841 if (!state->can_move || state->must_stay) 842 continue; 843 844 ctx.def = def; 845 nir_foreach_src(instr, update_src_value, &ctx); 846 847 /* If this instruction is a candidate, its value shouldn't be 848 * propagated so we skip dividing it. 849 * 850 * Note: if it's can_move but not a candidate, then all its users 851 * must be can_move, so if there are no users then it must be dead. 852 */ 853 if (!state->candidate && !state->must_stay) { 854 if (state->can_move_users > 0) 855 state->value /= state->can_move_users; 856 else 857 state->value = 0; 858 } 859 860 if (state->candidate) { 861 state->benefit = state->value - 862 options->rewrite_cost_cb(def, options->cb_data); 863 864 if (state->benefit > 0) { 865 options->def_size(def, &state->size, &state->align); 866 total_size = ALIGN_POT(total_size, state->align); 867 total_size += state->size; 868 candidates[candidate_idx++] = state; 869 } 870 } 871 } 872 } 873 874 assert(candidate_idx <= num_candidates); 875 num_candidates = candidate_idx; 876 877 if (num_candidates == 0) { 878 free(ctx.states); 879 free(candidates); 880 return false; 881 } 882 883 /* Step 4: Figure out which candidates we're going to replace and assign an 884 * offset. Assuming there is no expression sharing, this is similar to the 885 * 0-1 knapsack problem, except when there is a gap introduced by 886 * alignment. We use a well-known greedy approximation, sorting by value 887 * divided by size. 888 */ 889 890 if (((*size) + total_size) > options->preamble_storage_size) { 891 qsort(candidates, num_candidates, sizeof(*candidates), candidate_sort); 892 } 893 894 unsigned offset = *size; 895 for (unsigned i = 0; i < num_candidates; i++) { 896 def_state *state = candidates[i]; 897 offset = ALIGN_POT(offset, state->align); 898 899 if (offset + state->size > options->preamble_storage_size) 900 break; 901 902 state->replace = true; 903 state->offset = offset; 904 905 offset += state->size; 906 } 907 908 *size = offset; 909 910 free(candidates); 911 912 /* Determine which if's need to be reconstructed, based on the replacements 913 * we did. 914 */ 915 ctx.reconstructed_ifs = _mesa_pointer_set_create(NULL); 916 ctx.reconstructed_defs = calloc(BITSET_WORDS(impl->ssa_alloc), 917 sizeof(BITSET_WORD)); 918 analyze_reconstructed(&ctx, impl); 919 920 /* If we make progress analyzing speculation, we need to re-analyze 921 * reconstructed defs to get the if-conditions in there. 922 */ 923 if (analyze_speculation_for_cf_list(&ctx, &impl->body)) 924 analyze_reconstructed(&ctx, impl); 925 926 /* Step 5: Actually do the replacement. */ 927 struct hash_table *remap_table = 928 _mesa_pointer_hash_table_create(NULL); 929 nir_function_impl *preamble = 930 nir_shader_get_preamble(impl->function->shader); 931 nir_builder preamble_builder = nir_builder_at(nir_before_impl(preamble)); 932 nir_builder *b = &preamble_builder; 933 934 replace_for_cf_list(b, &ctx, remap_table, &impl->body); 935 936 nir_builder builder = nir_builder_create(impl); 937 b = &builder; 938 939 unsigned max_index = impl->ssa_alloc; 940 nir_foreach_block(block, impl) { 941 nir_foreach_instr_safe(instr, block) { 942 nir_def *def = nir_instr_def(instr); 943 if (!def) 944 continue; 945 946 /* Ignore new load_preamble instructions */ 947 if (def->index >= max_index) 948 continue; 949 950 def_state *state = &ctx.states[def->index]; 951 if (!state->replace) 952 continue; 953 954 b->cursor = nir_after_instr_and_phis(instr); 955 956 nir_def *new_def = 957 nir_load_preamble(b, def->num_components, def->bit_size, 958 .base = state->offset); 959 960 nir_def_rewrite_uses(def, new_def); 961 nir_instr_free_and_dce(instr); 962 } 963 } 964 965 nir_metadata_preserve(impl, 966 nir_metadata_control_flow); 967 968 ralloc_free(remap_table); 969 free(ctx.states); 970 free(ctx.reconstructed_defs); 971 _mesa_set_destroy(ctx.reconstructed_ifs, NULL); 972 return true; 973}