mirror of OpenBSD xenocara tree github.com/openbsd/xenocara
openbsd
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

at jcs 1622 lines 58 kB view raw
1/* 2 * Copyright © 2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "util/hash_table.h" 25#include "util/set.h" 26#include "nir.h" 27#include "nir_builder.h" 28 29/* This file contains various little helpers for doing simple linking in 30 * NIR. Eventually, we'll probably want a full-blown varying packing 31 * implementation in here. Right now, it just deletes unused things. 32 */ 33 34/** 35 * Returns the bits in the inputs_read, or outputs_written 36 * bitfield corresponding to this variable. 37 */ 38static uint64_t 39get_variable_io_mask(nir_variable *var, gl_shader_stage stage) 40{ 41 if (var->data.location < 0) 42 return 0; 43 44 unsigned location = var->data.patch ? var->data.location - VARYING_SLOT_PATCH0 : var->data.location; 45 46 assert(var->data.mode == nir_var_shader_in || 47 var->data.mode == nir_var_shader_out); 48 assert(var->data.location >= 0); 49 assert(location < 64); 50 51 const struct glsl_type *type = var->type; 52 if (nir_is_arrayed_io(var, stage)) { 53 assert(glsl_type_is_array(type)); 54 type = glsl_get_array_element(type); 55 } 56 57 unsigned slots = glsl_count_attribute_slots(type, false); 58 return BITFIELD64_MASK(slots) << location; 59} 60 61static bool 62is_non_generic_patch_var(nir_variable *var) 63{ 64 return var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || 65 var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER || 66 var->data.location == VARYING_SLOT_BOUNDING_BOX0 || 67 var->data.location == VARYING_SLOT_BOUNDING_BOX1; 68} 69 70static uint8_t 71get_num_components(nir_variable *var) 72{ 73 if (glsl_type_is_struct_or_ifc(glsl_without_array(var->type))) 74 return 4; 75 76 return glsl_get_vector_elements(glsl_without_array(var->type)); 77} 78 79static void 80add_output_reads(nir_shader *shader, uint64_t *read, uint64_t *patches_read) 81{ 82 nir_foreach_function_impl(impl, shader) { 83 nir_foreach_block(block, impl) { 84 nir_foreach_instr(instr, block) { 85 if (instr->type != nir_instr_type_intrinsic) 86 continue; 87 88 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 89 if (intrin->intrinsic != nir_intrinsic_load_deref) 90 continue; 91 92 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 93 if (!nir_deref_mode_is(deref, nir_var_shader_out)) 94 continue; 95 96 nir_variable *var = nir_deref_instr_get_variable(deref); 97 for (unsigned i = 0; i < get_num_components(var); i++) { 98 if (var->data.patch) { 99 if (is_non_generic_patch_var(var)) 100 continue; 101 102 patches_read[var->data.location_frac + i] |= 103 get_variable_io_mask(var, shader->info.stage); 104 } else { 105 read[var->data.location_frac + i] |= 106 get_variable_io_mask(var, shader->info.stage); 107 } 108 } 109 } 110 } 111 } 112} 113 114static bool 115remove_unused_io_access(nir_builder *b, nir_intrinsic_instr *intrin, void *cb_data) 116{ 117 nir_variable_mode mode = *(nir_variable_mode *)cb_data; 118 119 unsigned srcn = 0; 120 switch (intrin->intrinsic) { 121 case nir_intrinsic_load_deref: 122 case nir_intrinsic_store_deref: 123 case nir_intrinsic_interp_deref_at_centroid: 124 case nir_intrinsic_interp_deref_at_sample: 125 case nir_intrinsic_interp_deref_at_offset: 126 case nir_intrinsic_interp_deref_at_vertex: 127 break; 128 case nir_intrinsic_copy_deref: 129 srcn = mode == nir_var_shader_in ? 1 : 0; 130 break; 131 default: 132 return false; 133 } 134 135 nir_variable *var = nir_intrinsic_get_var(intrin, srcn); 136 if (!var || var->data.mode != mode || var->data.location != NUM_TOTAL_VARYING_SLOTS) 137 return false; 138 139 if (intrin->intrinsic != nir_intrinsic_store_deref && 140 intrin->intrinsic != nir_intrinsic_copy_deref) { 141 b->cursor = nir_before_instr(&intrin->instr); 142 nir_def *undef = nir_undef(b, intrin->num_components, intrin->def.bit_size); 143 nir_def_rewrite_uses(&intrin->def, undef); 144 } 145 146 nir_instr_remove(&intrin->instr); 147 nir_deref_instr_remove_if_unused(nir_src_as_deref(intrin->src[srcn])); 148 149 return true; 150} 151 152/** 153 * Helper for removing unused shader I/O variables, by demoting them to global 154 * variables (which may then by dead code eliminated). 155 * 156 * Example usage is: 157 * 158 * progress = nir_remove_unused_io_vars(producer, nir_var_shader_out, 159 * read, patches_read) || 160 * progress; 161 */ 162bool 163nir_remove_unused_io_vars(nir_shader *shader, 164 nir_variable_mode mode, 165 uint64_t *used_by_other_stage, 166 uint64_t *used_by_other_stage_patches) 167{ 168 bool progress = false; 169 uint64_t *used; 170 171 assert(mode == nir_var_shader_in || mode == nir_var_shader_out); 172 173 uint64_t read[4] = { 0 }; 174 uint64_t patches_read[4] = { 0 }; 175 if (mode == nir_var_shader_out) 176 add_output_reads(shader, read, patches_read); 177 178 nir_foreach_variable_with_modes_safe(var, shader, mode) { 179 if (var->data.patch) 180 used = used_by_other_stage_patches; 181 else 182 used = used_by_other_stage; 183 184 if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0 && 185 !(shader->info.stage == MESA_SHADER_MESH && var->data.location == VARYING_SLOT_PRIMITIVE_ID)) 186 continue; 187 188 if (var->data.always_active_io) 189 continue; 190 191 if (var->data.explicit_xfb_buffer) 192 continue; 193 194 uint64_t other_stage = 0; 195 uint64_t this_stage = 0; 196 for (unsigned i = 0; i < get_num_components(var); i++) { 197 other_stage |= used[var->data.location_frac + i]; 198 this_stage |= (var->data.patch ? patches_read : read)[var->data.location_frac + i]; 199 } 200 201 uint64_t var_mask = get_variable_io_mask(var, shader->info.stage); 202 if (!((other_stage | this_stage) & var_mask)) { 203 /* Mark the variable as removed by setting the location to an invalid value. */ 204 var->data.location = NUM_TOTAL_VARYING_SLOTS; 205 exec_node_remove(&var->node); 206 progress = true; 207 } 208 } 209 210 if (progress) { 211 nir_shader_intrinsics_pass(shader, &remove_unused_io_access, nir_metadata_control_flow, &mode); 212 } else { 213 nir_shader_preserve_all_metadata(shader); 214 } 215 216 return progress; 217} 218 219bool 220nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer) 221{ 222 assert(producer->info.stage != MESA_SHADER_FRAGMENT); 223 assert(consumer->info.stage != MESA_SHADER_VERTEX); 224 225 uint64_t read[4] = { 0 }, written[4] = { 0 }; 226 uint64_t patches_read[4] = { 0 }, patches_written[4] = { 0 }; 227 228 nir_foreach_shader_out_variable(var, producer) { 229 for (unsigned i = 0; i < get_num_components(var); i++) { 230 if (var->data.patch) { 231 if (is_non_generic_patch_var(var)) 232 continue; 233 234 patches_written[var->data.location_frac + i] |= 235 get_variable_io_mask(var, producer->info.stage); 236 } else { 237 written[var->data.location_frac + i] |= 238 get_variable_io_mask(var, producer->info.stage); 239 } 240 } 241 } 242 243 nir_foreach_shader_in_variable(var, consumer) { 244 for (unsigned i = 0; i < get_num_components(var); i++) { 245 if (var->data.patch) { 246 if (is_non_generic_patch_var(var)) 247 continue; 248 249 patches_read[var->data.location_frac + i] |= 250 get_variable_io_mask(var, consumer->info.stage); 251 } else { 252 read[var->data.location_frac + i] |= 253 get_variable_io_mask(var, consumer->info.stage); 254 } 255 } 256 } 257 258 bool progress = false; 259 progress = nir_remove_unused_io_vars(producer, nir_var_shader_out, read, 260 patches_read); 261 262 progress = nir_remove_unused_io_vars(consumer, nir_var_shader_in, written, 263 patches_written) || 264 progress; 265 266 return progress; 267} 268 269static uint8_t 270get_interp_type(nir_variable *var, const struct glsl_type *type, 271 bool default_to_smooth_interp) 272{ 273 if (var->data.per_primitive) 274 return INTERP_MODE_NONE; 275 if (glsl_type_is_integer(type)) 276 return INTERP_MODE_FLAT; 277 else if (var->data.interpolation != INTERP_MODE_NONE) 278 return var->data.interpolation; 279 else if (default_to_smooth_interp) 280 return INTERP_MODE_SMOOTH; 281 else 282 return INTERP_MODE_NONE; 283} 284 285#define INTERPOLATE_LOC_SAMPLE 0 286#define INTERPOLATE_LOC_CENTROID 1 287#define INTERPOLATE_LOC_CENTER 2 288 289static uint8_t 290get_interp_loc(nir_variable *var) 291{ 292 if (var->data.sample) 293 return INTERPOLATE_LOC_SAMPLE; 294 else if (var->data.centroid) 295 return INTERPOLATE_LOC_CENTROID; 296 else 297 return INTERPOLATE_LOC_CENTER; 298} 299 300static bool 301is_packing_supported_for_type(const struct glsl_type *type) 302{ 303 /* We ignore complex types such as arrays, matrices, structs and bitsizes 304 * other then 32bit. All other vector types should have been split into 305 * scalar variables by the lower_io_to_scalar pass. The only exception 306 * should be OpenGL xfb varyings. 307 * TODO: add support for more complex types? 308 */ 309 return glsl_type_is_scalar(type) && glsl_type_is_32bit(type); 310} 311 312struct assigned_comps { 313 uint8_t comps; 314 uint8_t interp_type; 315 uint8_t interp_loc; 316 bool is_32bit; 317 bool is_mediump; 318 bool is_per_primitive; 319}; 320 321/* Packing arrays and dual slot varyings is difficult so to avoid complex 322 * algorithms this function just assigns them their existing location for now. 323 * TODO: allow better packing of complex types. 324 */ 325static void 326get_unmoveable_components_masks(nir_shader *shader, 327 nir_variable_mode mode, 328 struct assigned_comps *comps, 329 gl_shader_stage stage, 330 bool default_to_smooth_interp) 331{ 332 nir_foreach_variable_with_modes_safe(var, shader, mode) { 333 assert(var->data.location >= 0); 334 335 /* Only remap things that aren't built-ins. */ 336 if (var->data.location >= VARYING_SLOT_VAR0 && 337 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) { 338 339 const struct glsl_type *type = var->type; 340 if (nir_is_arrayed_io(var, stage)) { 341 assert(glsl_type_is_array(type)); 342 type = glsl_get_array_element(type); 343 } 344 345 /* If we can pack this varying then don't mark the components as 346 * used. 347 */ 348 if (is_packing_supported_for_type(type) && 349 !var->data.always_active_io) 350 continue; 351 352 unsigned location = var->data.location - VARYING_SLOT_VAR0; 353 354 unsigned elements = 355 glsl_type_is_vector_or_scalar(glsl_without_array(type)) ? glsl_get_vector_elements(glsl_without_array(type)) : 4; 356 357 bool dual_slot = glsl_type_is_dual_slot(glsl_without_array(type)); 358 unsigned slots = glsl_count_attribute_slots(type, false); 359 unsigned dmul = glsl_type_is_64bit(glsl_without_array(type)) ? 2 : 1; 360 unsigned comps_slot2 = 0; 361 for (unsigned i = 0; i < slots; i++) { 362 if (dual_slot) { 363 if (i & 1) { 364 comps[location + i].comps |= ((1 << comps_slot2) - 1); 365 } else { 366 unsigned num_comps = 4 - var->data.location_frac; 367 comps_slot2 = (elements * dmul) - num_comps; 368 369 /* Assume ARB_enhanced_layouts packing rules for doubles */ 370 assert(var->data.location_frac == 0 || 371 var->data.location_frac == 2); 372 assert(comps_slot2 <= 4); 373 374 comps[location + i].comps |= 375 ((1 << num_comps) - 1) << var->data.location_frac; 376 } 377 } else { 378 comps[location + i].comps |= 379 ((1 << (elements * dmul)) - 1) << var->data.location_frac; 380 } 381 382 comps[location + i].interp_type = 383 get_interp_type(var, type, default_to_smooth_interp); 384 comps[location + i].interp_loc = get_interp_loc(var); 385 comps[location + i].is_32bit = 386 glsl_type_is_32bit(glsl_without_array(type)); 387 comps[location + i].is_mediump = 388 var->data.precision == GLSL_PRECISION_MEDIUM || 389 var->data.precision == GLSL_PRECISION_LOW; 390 comps[location + i].is_per_primitive = var->data.per_primitive; 391 } 392 } 393 } 394} 395 396struct varying_loc { 397 uint8_t component; 398 uint32_t location; 399}; 400 401static void 402mark_all_used_slots(nir_variable *var, uint64_t *slots_used, 403 uint64_t slots_used_mask, unsigned num_slots) 404{ 405 unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0; 406 407 slots_used[var->data.patch ? 1 : 0] |= slots_used_mask & 408 BITFIELD64_RANGE(var->data.location - loc_offset, num_slots); 409} 410 411static void 412mark_used_slot(nir_variable *var, uint64_t *slots_used, unsigned offset) 413{ 414 unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0; 415 416 slots_used[var->data.patch ? 1 : 0] |= 417 BITFIELD64_BIT(var->data.location - loc_offset + offset); 418} 419 420static void 421remap_slots_and_components(nir_shader *shader, nir_variable_mode mode, 422 struct varying_loc (*remap)[4], 423 uint64_t *slots_used, uint64_t *out_slots_read, 424 uint32_t *p_slots_used, uint32_t *p_out_slots_read) 425{ 426 const gl_shader_stage stage = shader->info.stage; 427 uint64_t out_slots_read_tmp[2] = { 0 }; 428 uint64_t slots_used_tmp[2] = { 0 }; 429 430 /* We don't touch builtins so just copy the bitmask */ 431 slots_used_tmp[0] = *slots_used & BITFIELD64_RANGE(0, VARYING_SLOT_VAR0); 432 433 nir_foreach_variable_with_modes(var, shader, mode) { 434 assert(var->data.location >= 0); 435 436 /* Only remap things that aren't built-ins */ 437 if (var->data.location >= VARYING_SLOT_VAR0 && 438 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) { 439 440 const struct glsl_type *type = var->type; 441 if (nir_is_arrayed_io(var, stage)) { 442 assert(glsl_type_is_array(type)); 443 type = glsl_get_array_element(type); 444 } 445 446 unsigned num_slots = glsl_count_attribute_slots(type, false); 447 bool used_across_stages = false; 448 bool outputs_read = false; 449 450 unsigned location = var->data.location - VARYING_SLOT_VAR0; 451 struct varying_loc *new_loc = &remap[location][var->data.location_frac]; 452 453 unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0; 454 uint64_t used = var->data.patch ? *p_slots_used : *slots_used; 455 uint64_t outs_used = 456 var->data.patch ? *p_out_slots_read : *out_slots_read; 457 uint64_t slots = 458 BITFIELD64_RANGE(var->data.location - loc_offset, num_slots); 459 460 if (slots & used) 461 used_across_stages = true; 462 463 if (slots & outs_used) 464 outputs_read = true; 465 466 if (new_loc->location) { 467 var->data.location = new_loc->location; 468 var->data.location_frac = new_loc->component; 469 } 470 471 if (var->data.always_active_io) { 472 /* We can't apply link time optimisations (specifically array 473 * splitting) to these so we need to copy the existing mask 474 * otherwise we will mess up the mask for things like partially 475 * marked arrays. 476 */ 477 if (used_across_stages) 478 mark_all_used_slots(var, slots_used_tmp, used, num_slots); 479 480 if (outputs_read) { 481 mark_all_used_slots(var, out_slots_read_tmp, outs_used, 482 num_slots); 483 } 484 } else { 485 for (unsigned i = 0; i < num_slots; i++) { 486 if (used_across_stages) 487 mark_used_slot(var, slots_used_tmp, i); 488 489 if (outputs_read) 490 mark_used_slot(var, out_slots_read_tmp, i); 491 } 492 } 493 } 494 } 495 496 *slots_used = slots_used_tmp[0]; 497 *out_slots_read = out_slots_read_tmp[0]; 498 *p_slots_used = slots_used_tmp[1]; 499 *p_out_slots_read = out_slots_read_tmp[1]; 500} 501 502struct varying_component { 503 nir_variable *var; 504 uint8_t interp_type; 505 uint8_t interp_loc; 506 bool is_32bit; 507 bool is_patch; 508 bool is_per_primitive; 509 bool is_mediump; 510 bool is_intra_stage_only; 511 bool initialised; 512}; 513 514static int 515cmp_varying_component(const void *comp1_v, const void *comp2_v) 516{ 517 struct varying_component *comp1 = (struct varying_component *)comp1_v; 518 struct varying_component *comp2 = (struct varying_component *)comp2_v; 519 520 /* We want patches to be order at the end of the array */ 521 if (comp1->is_patch != comp2->is_patch) 522 return comp1->is_patch ? 1 : -1; 523 524 /* Sort per-primitive outputs after per-vertex ones to allow 525 * better compaction when they are mixed in the shader's source. 526 */ 527 if (comp1->is_per_primitive != comp2->is_per_primitive) 528 return comp1->is_per_primitive ? 1 : -1; 529 530 /* We want to try to group together TCS outputs that are only read by other 531 * TCS invocations and not consumed by the follow stage. 532 */ 533 if (comp1->is_intra_stage_only != comp2->is_intra_stage_only) 534 return comp1->is_intra_stage_only ? 1 : -1; 535 536 /* Group mediump varyings together. */ 537 if (comp1->is_mediump != comp2->is_mediump) 538 return comp1->is_mediump ? 1 : -1; 539 540 /* We can only pack varyings with matching interpolation types so group 541 * them together. 542 */ 543 if (comp1->interp_type != comp2->interp_type) 544 return comp1->interp_type - comp2->interp_type; 545 546 /* Interpolation loc must match also. */ 547 if (comp1->interp_loc != comp2->interp_loc) 548 return comp1->interp_loc - comp2->interp_loc; 549 550 /* If everything else matches just use the original location to sort */ 551 const struct nir_variable_data *const data1 = &comp1->var->data; 552 const struct nir_variable_data *const data2 = &comp2->var->data; 553 if (data1->location != data2->location) 554 return data1->location - data2->location; 555 return (int)data1->location_frac - (int)data2->location_frac; 556} 557 558static void 559gather_varying_component_info(nir_shader *producer, nir_shader *consumer, 560 struct varying_component **varying_comp_info, 561 unsigned *varying_comp_info_size, 562 bool default_to_smooth_interp) 563{ 564 unsigned store_varying_info_idx[MAX_VARYINGS_INCL_PATCH][4] = { { 0 } }; 565 unsigned num_of_comps_to_pack = 0; 566 567 /* Count the number of varying that can be packed and create a mapping 568 * of those varyings to the array we will pass to qsort. 569 */ 570 nir_foreach_shader_out_variable(var, producer) { 571 572 /* Only remap things that aren't builtins. */ 573 if (var->data.location >= VARYING_SLOT_VAR0 && 574 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) { 575 576 /* We can't repack xfb varyings. */ 577 if (var->data.always_active_io) 578 continue; 579 580 const struct glsl_type *type = var->type; 581 if (nir_is_arrayed_io(var, producer->info.stage)) { 582 assert(glsl_type_is_array(type)); 583 type = glsl_get_array_element(type); 584 } 585 586 if (!is_packing_supported_for_type(type)) 587 continue; 588 589 unsigned loc = var->data.location - VARYING_SLOT_VAR0; 590 store_varying_info_idx[loc][var->data.location_frac] = 591 ++num_of_comps_to_pack; 592 } 593 } 594 595 *varying_comp_info_size = num_of_comps_to_pack; 596 *varying_comp_info = rzalloc_array(NULL, struct varying_component, 597 num_of_comps_to_pack); 598 599 nir_function_impl *impl = nir_shader_get_entrypoint(consumer); 600 601 /* Walk over the shader and populate the varying component info array */ 602 nir_foreach_block(block, impl) { 603 nir_foreach_instr(instr, block) { 604 if (instr->type != nir_instr_type_intrinsic) 605 continue; 606 607 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 608 if (intr->intrinsic != nir_intrinsic_load_deref && 609 intr->intrinsic != nir_intrinsic_interp_deref_at_centroid && 610 intr->intrinsic != nir_intrinsic_interp_deref_at_sample && 611 intr->intrinsic != nir_intrinsic_interp_deref_at_offset && 612 intr->intrinsic != nir_intrinsic_interp_deref_at_vertex) 613 continue; 614 615 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); 616 if (!nir_deref_mode_is(deref, nir_var_shader_in)) 617 continue; 618 619 /* We only remap things that aren't builtins. */ 620 nir_variable *in_var = nir_deref_instr_get_variable(deref); 621 if (in_var->data.location < VARYING_SLOT_VAR0) 622 continue; 623 624 /* Do not remap per-vertex shader inputs because it's an array of 625 * 3-elements and this isn't supported. 626 */ 627 if (in_var->data.per_vertex) 628 continue; 629 630 unsigned location = in_var->data.location - VARYING_SLOT_VAR0; 631 if (location >= MAX_VARYINGS_INCL_PATCH) 632 continue; 633 634 unsigned var_info_idx = 635 store_varying_info_idx[location][in_var->data.location_frac]; 636 if (!var_info_idx) 637 continue; 638 639 struct varying_component *vc_info = 640 &(*varying_comp_info)[var_info_idx - 1]; 641 642 if (!vc_info->initialised) { 643 const struct glsl_type *type = in_var->type; 644 if (nir_is_arrayed_io(in_var, consumer->info.stage)) { 645 assert(glsl_type_is_array(type)); 646 type = glsl_get_array_element(type); 647 } 648 649 vc_info->var = in_var; 650 vc_info->interp_type = 651 get_interp_type(in_var, type, default_to_smooth_interp); 652 vc_info->interp_loc = get_interp_loc(in_var); 653 vc_info->is_32bit = glsl_type_is_32bit(type); 654 vc_info->is_patch = in_var->data.patch; 655 vc_info->is_per_primitive = in_var->data.per_primitive; 656 vc_info->is_mediump = !(producer->options->io_options & nir_io_mediump_is_32bit) && 657 (in_var->data.precision == GLSL_PRECISION_MEDIUM || 658 in_var->data.precision == GLSL_PRECISION_LOW); 659 vc_info->is_intra_stage_only = false; 660 vc_info->initialised = true; 661 } 662 } 663 } 664 665 /* Walk over the shader and populate the varying component info array 666 * for varyings which are read by other TCS instances but are not consumed 667 * by the TES. 668 */ 669 if (producer->info.stage == MESA_SHADER_TESS_CTRL) { 670 impl = nir_shader_get_entrypoint(producer); 671 672 nir_foreach_block(block, impl) { 673 nir_foreach_instr(instr, block) { 674 if (instr->type != nir_instr_type_intrinsic) 675 continue; 676 677 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 678 if (intr->intrinsic != nir_intrinsic_load_deref) 679 continue; 680 681 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); 682 if (!nir_deref_mode_is(deref, nir_var_shader_out)) 683 continue; 684 685 /* We only remap things that aren't builtins. */ 686 nir_variable *out_var = nir_deref_instr_get_variable(deref); 687 if (out_var->data.location < VARYING_SLOT_VAR0) 688 continue; 689 690 unsigned location = out_var->data.location - VARYING_SLOT_VAR0; 691 if (location >= MAX_VARYINGS_INCL_PATCH) 692 continue; 693 694 unsigned var_info_idx = 695 store_varying_info_idx[location][out_var->data.location_frac]; 696 if (!var_info_idx) { 697 /* Something went wrong, the shader interfaces didn't match, so 698 * abandon packing. This can happen for example when the 699 * inputs are scalars but the outputs are struct members. 700 */ 701 *varying_comp_info_size = 0; 702 break; 703 } 704 705 struct varying_component *vc_info = 706 &(*varying_comp_info)[var_info_idx - 1]; 707 708 if (!vc_info->initialised) { 709 const struct glsl_type *type = out_var->type; 710 if (nir_is_arrayed_io(out_var, producer->info.stage)) { 711 assert(glsl_type_is_array(type)); 712 type = glsl_get_array_element(type); 713 } 714 715 vc_info->var = out_var; 716 vc_info->interp_type = 717 get_interp_type(out_var, type, default_to_smooth_interp); 718 vc_info->interp_loc = get_interp_loc(out_var); 719 vc_info->is_32bit = glsl_type_is_32bit(type); 720 vc_info->is_patch = out_var->data.patch; 721 vc_info->is_per_primitive = out_var->data.per_primitive; 722 vc_info->is_mediump = !(producer->options->io_options & nir_io_mediump_is_32bit) && 723 (out_var->data.precision == GLSL_PRECISION_MEDIUM || 724 out_var->data.precision == GLSL_PRECISION_LOW); 725 vc_info->is_intra_stage_only = true; 726 vc_info->initialised = true; 727 } 728 } 729 } 730 } 731 732 for (unsigned i = 0; i < *varying_comp_info_size; i++) { 733 struct varying_component *vc_info = &(*varying_comp_info)[i]; 734 if (!vc_info->initialised) { 735 /* Something went wrong, the shader interfaces didn't match, so 736 * abandon packing. This can happen for example when the outputs are 737 * scalars but the inputs are struct members. 738 */ 739 *varying_comp_info_size = 0; 740 break; 741 } 742 } 743} 744 745static bool 746allow_pack_interp_type(nir_io_options options, int type) 747{ 748 switch (type) { 749 case INTERP_MODE_NONE: 750 case INTERP_MODE_SMOOTH: 751 case INTERP_MODE_NOPERSPECTIVE: 752 return options & nir_io_has_flexible_input_interpolation_except_flat; 753 default: 754 return false; 755 } 756} 757 758static void 759 assign_remap_locations(struct varying_loc (*remap)[4], 760 struct assigned_comps *assigned_comps, 761 struct varying_component *info, 762 unsigned *cursor, unsigned *comp, 763 unsigned max_location, 764 nir_io_options options) 765{ 766 unsigned tmp_cursor = *cursor; 767 unsigned tmp_comp = *comp; 768 769 for (; tmp_cursor < max_location; tmp_cursor++) { 770 771 if (assigned_comps[tmp_cursor].comps) { 772 /* Don't pack per-primitive and per-vertex varyings together. */ 773 if (assigned_comps[tmp_cursor].is_per_primitive != info->is_per_primitive) { 774 tmp_comp = 0; 775 continue; 776 } 777 778 /* We can only pack varyings with matching precision. */ 779 if (assigned_comps[tmp_cursor].is_mediump != info->is_mediump) { 780 tmp_comp = 0; 781 continue; 782 } 783 784 /* We can only pack varyings with matching interpolation type 785 * if driver does not support it. 786 */ 787 if (assigned_comps[tmp_cursor].interp_type != info->interp_type && 788 (!allow_pack_interp_type(options, assigned_comps[tmp_cursor].interp_type) || 789 !allow_pack_interp_type(options, info->interp_type))) { 790 tmp_comp = 0; 791 continue; 792 } 793 794 /* We can only pack varyings with matching interpolation location 795 * if driver does not support it. 796 */ 797 if (assigned_comps[tmp_cursor].interp_loc != info->interp_loc && 798 !(options & nir_io_has_flexible_input_interpolation_except_flat)) { 799 tmp_comp = 0; 800 continue; 801 } 802 803 /* We can only pack varyings with matching types, and the current 804 * algorithm only supports packing 32-bit. 805 */ 806 if (!assigned_comps[tmp_cursor].is_32bit) { 807 tmp_comp = 0; 808 continue; 809 } 810 811 while (tmp_comp < 4 && 812 (assigned_comps[tmp_cursor].comps & (1 << tmp_comp))) { 813 tmp_comp++; 814 } 815 } 816 817 if (tmp_comp == 4) { 818 tmp_comp = 0; 819 continue; 820 } 821 822 unsigned location = info->var->data.location - VARYING_SLOT_VAR0; 823 824 /* Once we have assigned a location mark it as used */ 825 assigned_comps[tmp_cursor].comps |= (1 << tmp_comp); 826 assigned_comps[tmp_cursor].interp_type = info->interp_type; 827 assigned_comps[tmp_cursor].interp_loc = info->interp_loc; 828 assigned_comps[tmp_cursor].is_32bit = info->is_32bit; 829 assigned_comps[tmp_cursor].is_mediump = info->is_mediump; 830 assigned_comps[tmp_cursor].is_per_primitive = info->is_per_primitive; 831 832 /* Assign remap location */ 833 remap[location][info->var->data.location_frac].component = tmp_comp++; 834 remap[location][info->var->data.location_frac].location = 835 tmp_cursor + VARYING_SLOT_VAR0; 836 837 break; 838 } 839 840 *cursor = tmp_cursor; 841 *comp = tmp_comp; 842} 843 844/* If there are empty components in the slot compact the remaining components 845 * as close to component 0 as possible. This will make it easier to fill the 846 * empty components with components from a different slot in a following pass. 847 */ 848static void 849compact_components(nir_shader *producer, nir_shader *consumer, 850 struct assigned_comps *assigned_comps, 851 bool default_to_smooth_interp) 852{ 853 struct varying_loc remap[MAX_VARYINGS_INCL_PATCH][4] = { { { 0 }, { 0 } } }; 854 struct varying_component *varying_comp_info; 855 unsigned varying_comp_info_size; 856 857 /* Gather varying component info */ 858 gather_varying_component_info(producer, consumer, &varying_comp_info, 859 &varying_comp_info_size, 860 default_to_smooth_interp); 861 862 /* Sort varying components. */ 863 qsort(varying_comp_info, varying_comp_info_size, 864 sizeof(struct varying_component), cmp_varying_component); 865 866 unsigned cursor = 0; 867 unsigned comp = 0; 868 869 /* Set the remap array based on the sorted components */ 870 for (unsigned i = 0; i < varying_comp_info_size; i++) { 871 struct varying_component *info = &varying_comp_info[i]; 872 873 assert(info->is_patch || cursor < MAX_VARYING); 874 if (info->is_patch) { 875 /* The list should be sorted with all non-patch inputs first followed 876 * by patch inputs. When we hit our first patch input, we need to 877 * reset the cursor to MAX_VARYING so we put them in the right slot. 878 */ 879 if (cursor < MAX_VARYING) { 880 cursor = MAX_VARYING; 881 comp = 0; 882 } 883 884 assign_remap_locations(remap, assigned_comps, info, 885 &cursor, &comp, MAX_VARYINGS_INCL_PATCH, 886 consumer->options->io_options); 887 } else { 888 assign_remap_locations(remap, assigned_comps, info, 889 &cursor, &comp, MAX_VARYING, 890 consumer->options->io_options); 891 892 /* Check if we failed to assign a remap location. This can happen if 893 * for example there are a bunch of unmovable components with 894 * mismatching interpolation types causing us to skip over locations 895 * that would have been useful for packing later components. 896 * The solution is to iterate over the locations again (this should 897 * happen very rarely in practice). 898 */ 899 if (cursor == MAX_VARYING) { 900 cursor = 0; 901 comp = 0; 902 assign_remap_locations(remap, assigned_comps, info, 903 &cursor, &comp, MAX_VARYING, 904 consumer->options->io_options); 905 } 906 } 907 } 908 909 ralloc_free(varying_comp_info); 910 911 uint64_t zero = 0; 912 uint32_t zero32 = 0; 913 remap_slots_and_components(consumer, nir_var_shader_in, remap, 914 &consumer->info.inputs_read, &zero, 915 &consumer->info.patch_inputs_read, &zero32); 916 remap_slots_and_components(producer, nir_var_shader_out, remap, 917 &producer->info.outputs_written, 918 &producer->info.outputs_read, 919 &producer->info.patch_outputs_written, 920 &producer->info.patch_outputs_read); 921} 922 923/* We assume that this has been called more-or-less directly after 924 * remove_unused_varyings. At this point, all of the varyings that we 925 * aren't going to be using have been completely removed and the 926 * inputs_read and outputs_written fields in nir_shader_info reflect 927 * this. Therefore, the total set of valid slots is the OR of the two 928 * sets of varyings; this accounts for varyings which one side may need 929 * to read/write even if the other doesn't. This can happen if, for 930 * instance, an array is used indirectly from one side causing it to be 931 * unsplittable but directly from the other. 932 */ 933void 934nir_compact_varyings(nir_shader *producer, nir_shader *consumer, 935 bool default_to_smooth_interp) 936{ 937 assert(producer->info.stage != MESA_SHADER_FRAGMENT); 938 assert(consumer->info.stage != MESA_SHADER_VERTEX); 939 940 struct assigned_comps assigned_comps[MAX_VARYINGS_INCL_PATCH] = { { 0 } }; 941 942 get_unmoveable_components_masks(producer, nir_var_shader_out, 943 assigned_comps, 944 producer->info.stage, 945 default_to_smooth_interp); 946 get_unmoveable_components_masks(consumer, nir_var_shader_in, 947 assigned_comps, 948 consumer->info.stage, 949 default_to_smooth_interp); 950 951 compact_components(producer, consumer, assigned_comps, 952 default_to_smooth_interp); 953} 954 955/* 956 * Mark XFB varyings as always_active_io in the consumer so the linking opts 957 * don't touch them. 958 */ 959void 960nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer) 961{ 962 nir_variable *input_vars[MAX_VARYING][4] = { 0 }; 963 964 nir_foreach_shader_in_variable(var, consumer) { 965 if (var->data.location >= VARYING_SLOT_VAR0 && 966 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) { 967 968 unsigned location = var->data.location - VARYING_SLOT_VAR0; 969 input_vars[location][var->data.location_frac] = var; 970 } 971 } 972 973 nir_foreach_shader_out_variable(var, producer) { 974 if (var->data.location >= VARYING_SLOT_VAR0 && 975 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) { 976 977 if (!var->data.always_active_io) 978 continue; 979 980 unsigned location = var->data.location - VARYING_SLOT_VAR0; 981 if (input_vars[location][var->data.location_frac]) { 982 input_vars[location][var->data.location_frac]->data.always_active_io = true; 983 } 984 } 985 } 986} 987 988static bool 989does_varying_match(nir_variable *out_var, nir_variable *in_var) 990{ 991 return in_var->data.location == out_var->data.location && 992 in_var->data.location_frac == out_var->data.location_frac && 993 in_var->type == out_var->type; 994} 995 996static nir_variable * 997get_matching_input_var(nir_shader *consumer, nir_variable *out_var) 998{ 999 nir_foreach_shader_in_variable(var, consumer) { 1000 if (does_varying_match(out_var, var)) 1001 return var; 1002 } 1003 1004 return NULL; 1005} 1006 1007static bool 1008can_replace_varying(nir_variable *out_var) 1009{ 1010 /* Skip types that require more complex handling. 1011 * TODO: add support for these types. 1012 */ 1013 if (glsl_type_is_array(out_var->type) || 1014 glsl_type_is_dual_slot(out_var->type) || 1015 glsl_type_is_matrix(out_var->type) || 1016 glsl_type_is_struct_or_ifc(out_var->type)) 1017 return false; 1018 1019 /* Limit this pass to scalars for now to keep things simple. Most varyings 1020 * should have been lowered to scalars at this point anyway. 1021 */ 1022 if (!glsl_type_is_scalar(out_var->type)) 1023 return false; 1024 1025 if (out_var->data.location < VARYING_SLOT_VAR0 || 1026 out_var->data.location - VARYING_SLOT_VAR0 >= MAX_VARYING) 1027 return false; 1028 1029 return true; 1030} 1031 1032static bool 1033replace_varying_input_by_constant_load(nir_shader *shader, 1034 nir_intrinsic_instr *store_intr) 1035{ 1036 nir_function_impl *impl = nir_shader_get_entrypoint(shader); 1037 1038 nir_builder b = nir_builder_create(impl); 1039 1040 nir_variable *out_var = nir_intrinsic_get_var(store_intr, 0); 1041 1042 bool progress = false; 1043 nir_foreach_block(block, impl) { 1044 nir_foreach_instr(instr, block) { 1045 if (instr->type != nir_instr_type_intrinsic) 1046 continue; 1047 1048 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 1049 if (intr->intrinsic != nir_intrinsic_load_deref) 1050 continue; 1051 1052 nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]); 1053 if (!nir_deref_mode_is(in_deref, nir_var_shader_in)) 1054 continue; 1055 1056 nir_variable *in_var = nir_deref_instr_get_variable(in_deref); 1057 1058 if (!does_varying_match(out_var, in_var)) 1059 continue; 1060 1061 b.cursor = nir_before_instr(instr); 1062 1063 nir_load_const_instr *out_const = 1064 nir_instr_as_load_const(store_intr->src[1].ssa->parent_instr); 1065 1066 /* Add new const to replace the input */ 1067 nir_def *nconst = nir_build_imm(&b, store_intr->num_components, 1068 intr->def.bit_size, 1069 out_const->value); 1070 1071 nir_def_rewrite_uses(&intr->def, nconst); 1072 1073 progress = true; 1074 } 1075 } 1076 1077 return progress; 1078} 1079 1080static bool 1081replace_duplicate_input(nir_shader *shader, nir_variable *input_var, 1082 nir_intrinsic_instr *dup_store_intr) 1083{ 1084 assert(input_var); 1085 1086 nir_function_impl *impl = nir_shader_get_entrypoint(shader); 1087 1088 nir_builder b = nir_builder_create(impl); 1089 1090 nir_variable *dup_out_var = nir_intrinsic_get_var(dup_store_intr, 0); 1091 1092 bool progress = false; 1093 nir_foreach_block(block, impl) { 1094 nir_foreach_instr(instr, block) { 1095 if (instr->type != nir_instr_type_intrinsic) 1096 continue; 1097 1098 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 1099 if (intr->intrinsic != nir_intrinsic_load_deref) 1100 continue; 1101 1102 nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]); 1103 if (!nir_deref_mode_is(in_deref, nir_var_shader_in)) 1104 continue; 1105 1106 nir_variable *in_var = nir_deref_instr_get_variable(in_deref); 1107 1108 if (!does_varying_match(dup_out_var, in_var) || 1109 in_var->data.interpolation != input_var->data.interpolation || 1110 get_interp_loc(in_var) != get_interp_loc(input_var) || 1111 in_var->data.per_vertex) 1112 continue; 1113 1114 b.cursor = nir_before_instr(instr); 1115 1116 nir_def *load = nir_load_var(&b, input_var); 1117 nir_def_rewrite_uses(&intr->def, load); 1118 1119 progress = true; 1120 } 1121 } 1122 1123 return progress; 1124} 1125 1126static bool 1127is_direct_uniform_load(nir_def *def, nir_scalar *s) 1128{ 1129 /* def is sure to be scalar as can_replace_varying() filter out vector case. */ 1130 assert(def->num_components == 1); 1131 1132 /* Uniform load may hide behind some move instruction for converting 1133 * vector to scalar: 1134 * 1135 * vec1 32 ssa_1 = deref_var &color (uniform vec3) 1136 * vec3 32 ssa_2 = intrinsic load_deref (ssa_1) (0) 1137 * vec1 32 ssa_3 = mov ssa_2.x 1138 * vec1 32 ssa_4 = deref_var &color_out (shader_out float) 1139 * intrinsic store_deref (ssa_4, ssa_3) (1, 0) 1140 */ 1141 *s = nir_scalar_resolved(def, 0); 1142 1143 nir_def *ssa = s->def; 1144 if (ssa->parent_instr->type != nir_instr_type_intrinsic) 1145 return false; 1146 1147 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(ssa->parent_instr); 1148 if (intr->intrinsic != nir_intrinsic_load_deref) 1149 return false; 1150 1151 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); 1152 /* TODO: support nir_var_mem_ubo. */ 1153 if (!nir_deref_mode_is(deref, nir_var_uniform)) 1154 return false; 1155 1156 /* Does not support indirect uniform load. */ 1157 return !nir_deref_instr_has_indirect(deref); 1158} 1159 1160/** 1161 * Add a uniform variable from one shader to a different shader. 1162 * 1163 * \param nir The shader where to add the uniform 1164 * \param uniform The uniform that's declared in another shader. 1165 */ 1166nir_variable * 1167nir_clone_uniform_variable(nir_shader *nir, nir_variable *uniform, bool spirv) 1168{ 1169 /* Find if uniform already exists in consumer. */ 1170 nir_variable *new_var = NULL; 1171 nir_foreach_variable_with_modes(v, nir, uniform->data.mode) { 1172 if ((spirv && uniform->data.mode & nir_var_mem_ubo && 1173 v->data.binding == uniform->data.binding) || 1174 (!spirv && 1175 (!strcmp(uniform->name, v->name) && 1176 uniform->data.explicit_binding == v->data.explicit_binding && 1177 uniform->data.binding == v->data.binding))) { 1178 new_var = v; 1179 break; 1180 } 1181 } 1182 1183 /* Create a variable if not exist. */ 1184 if (!new_var) { 1185 new_var = nir_variable_clone(uniform, nir); 1186 nir_shader_add_variable(nir, new_var); 1187 } 1188 1189 return new_var; 1190} 1191 1192nir_deref_instr * 1193nir_clone_deref_instr(nir_builder *b, nir_variable *var, 1194 nir_deref_instr *deref) 1195{ 1196 if (deref->deref_type == nir_deref_type_var) 1197 return nir_build_deref_var(b, var); 1198 1199 nir_deref_instr *parent_deref = nir_deref_instr_parent(deref); 1200 nir_deref_instr *parent = nir_clone_deref_instr(b, var, parent_deref); 1201 1202 /* Build array and struct deref instruction. 1203 * "deref" instr is sure to be direct (see is_direct_uniform_load()). 1204 */ 1205 switch (deref->deref_type) { 1206 case nir_deref_type_array: { 1207 if (b->shader == 1208 nir_cf_node_get_function(&deref->instr.block->cf_node)->function->shader) { 1209 /* Cloning within the same shader. */ 1210 return nir_build_deref_array(b, parent, deref->arr.index.ssa); 1211 } else { 1212 /* Cloning to a different shader. The index must be constant because 1213 * we don't implement cloning the index SSA here. 1214 */ 1215 nir_load_const_instr *index = 1216 nir_instr_as_load_const(deref->arr.index.ssa->parent_instr); 1217 return nir_build_deref_array_imm(b, parent, index->value->i64); 1218 } 1219 } 1220 case nir_deref_type_ptr_as_array: { 1221 nir_load_const_instr *index = 1222 nir_instr_as_load_const(deref->arr.index.ssa->parent_instr); 1223 nir_def *ssa = nir_imm_intN_t(b, index->value->i64, 1224 parent->def.bit_size); 1225 return nir_build_deref_ptr_as_array(b, parent, ssa); 1226 } 1227 case nir_deref_type_struct: 1228 return nir_build_deref_struct(b, parent, deref->strct.index); 1229 default: 1230 unreachable("invalid type"); 1231 return NULL; 1232 } 1233} 1234 1235static bool 1236replace_varying_input_by_uniform_load(nir_shader *shader, 1237 nir_intrinsic_instr *store_intr, 1238 nir_scalar *scalar) 1239{ 1240 nir_function_impl *impl = nir_shader_get_entrypoint(shader); 1241 1242 nir_builder b = nir_builder_create(impl); 1243 1244 nir_variable *out_var = nir_intrinsic_get_var(store_intr, 0); 1245 1246 nir_intrinsic_instr *load = nir_instr_as_intrinsic(scalar->def->parent_instr); 1247 nir_deref_instr *deref = nir_src_as_deref(load->src[0]); 1248 nir_variable *uni_var = nir_deref_instr_get_variable(deref); 1249 uni_var = nir_clone_uniform_variable(shader, uni_var, false); 1250 1251 bool progress = false; 1252 nir_foreach_block(block, impl) { 1253 nir_foreach_instr(instr, block) { 1254 if (instr->type != nir_instr_type_intrinsic) 1255 continue; 1256 1257 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 1258 if (intr->intrinsic != nir_intrinsic_load_deref) 1259 continue; 1260 1261 nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]); 1262 if (!nir_deref_mode_is(in_deref, nir_var_shader_in)) 1263 continue; 1264 1265 nir_variable *in_var = nir_deref_instr_get_variable(in_deref); 1266 1267 if (!does_varying_match(out_var, in_var)) 1268 continue; 1269 1270 b.cursor = nir_before_instr(instr); 1271 1272 /* Clone instructions start from deref load to variable deref. */ 1273 nir_deref_instr *uni_deref = nir_clone_deref_instr(&b, uni_var, deref); 1274 nir_def *uni_def = nir_load_deref(&b, uni_deref); 1275 1276 /* Add a vector to scalar move if uniform is a vector. */ 1277 if (uni_def->num_components > 1) { 1278 nir_alu_src src = { 0 }; 1279 src.src = nir_src_for_ssa(uni_def); 1280 src.swizzle[0] = scalar->comp; 1281 uni_def = nir_mov_alu(&b, src, 1); 1282 } 1283 1284 /* Replace load input with load uniform. */ 1285 nir_def_rewrite_uses(&intr->def, uni_def); 1286 1287 progress = true; 1288 } 1289 } 1290 1291 return progress; 1292} 1293 1294/* The GLSL ES 3.20 spec says: 1295 * 1296 * "The precision of a vertex output does not need to match the precision of 1297 * the corresponding fragment input. The minimum precision at which vertex 1298 * outputs are interpolated is the minimum of the vertex output precision and 1299 * the fragment input precision, with the exception that for highp, 1300 * implementations do not have to support full IEEE 754 precision." (9.1 "Input 1301 * Output Matching by Name in Linked Programs") 1302 * 1303 * To implement this, when linking shaders we will take the minimum precision 1304 * qualifier (allowing drivers to interpolate at lower precision). For 1305 * input/output between non-fragment stages (e.g. VERTEX to GEOMETRY), the spec 1306 * requires we use the *last* specified precision if there is a conflict. 1307 * 1308 * Precisions are ordered as (NONE, HIGH, MEDIUM, LOW). If either precision is 1309 * NONE, we'll return the other precision, since there is no conflict. 1310 * Otherwise for fragment interpolation, we'll pick the smallest of (HIGH, 1311 * MEDIUM, LOW) by picking the maximum of the raw values - note the ordering is 1312 * "backwards". For non-fragment stages, we'll pick the latter precision to 1313 * comply with the spec. (Note that the order matters.) 1314 * 1315 * For streamout, "Variables declared with lowp or mediump precision are 1316 * promoted to highp before being written." (12.2 "Transform Feedback", p. 341 1317 * of OpenGL ES 3.2 specification). So drivers should promote them 1318 * the transform feedback memory store, but not the output store. 1319 */ 1320 1321static unsigned 1322nir_link_precision(unsigned producer, unsigned consumer, bool fs) 1323{ 1324 if (producer == GLSL_PRECISION_NONE) 1325 return consumer; 1326 else if (consumer == GLSL_PRECISION_NONE) 1327 return producer; 1328 else 1329 return fs ? MAX2(producer, consumer) : consumer; 1330} 1331 1332static nir_variable * 1333find_consumer_variable(const nir_shader *consumer, 1334 const nir_variable *producer_var) 1335{ 1336 nir_foreach_variable_with_modes(var, consumer, nir_var_shader_in) { 1337 if (var->data.location == producer_var->data.location && 1338 var->data.location_frac == producer_var->data.location_frac) 1339 return var; 1340 } 1341 return NULL; 1342} 1343 1344void 1345nir_link_varying_precision(nir_shader *producer, nir_shader *consumer) 1346{ 1347 bool frag = consumer->info.stage == MESA_SHADER_FRAGMENT; 1348 1349 nir_foreach_shader_out_variable(producer_var, producer) { 1350 /* Skip if the slot is not assigned */ 1351 if (producer_var->data.location < 0) 1352 continue; 1353 1354 nir_variable *consumer_var = find_consumer_variable(consumer, 1355 producer_var); 1356 1357 /* Skip if the variable will be eliminated */ 1358 if (!consumer_var) 1359 continue; 1360 1361 /* Now we have a pair of variables. Let's pick the smaller precision. */ 1362 unsigned precision_1 = producer_var->data.precision; 1363 unsigned precision_2 = consumer_var->data.precision; 1364 unsigned minimum = nir_link_precision(precision_1, precision_2, frag); 1365 1366 /* Propagate the new precision */ 1367 producer_var->data.precision = consumer_var->data.precision = minimum; 1368 } 1369} 1370 1371bool 1372nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer) 1373{ 1374 /* TODO: Add support for more shader stage combinations */ 1375 if (consumer->info.stage != MESA_SHADER_FRAGMENT || 1376 (producer->info.stage != MESA_SHADER_VERTEX && 1377 producer->info.stage != MESA_SHADER_TESS_EVAL)) 1378 return false; 1379 1380 bool progress = false; 1381 1382 nir_function_impl *impl = nir_shader_get_entrypoint(producer); 1383 1384 struct hash_table *varying_values = _mesa_pointer_hash_table_create(NULL); 1385 1386 /* If we find a store in the last block of the producer we can be sure this 1387 * is the only possible value for this output. 1388 */ 1389 nir_block *last_block = nir_impl_last_block(impl); 1390 nir_foreach_instr_reverse(instr, last_block) { 1391 if (instr->type != nir_instr_type_intrinsic) 1392 continue; 1393 1394 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 1395 1396 if (intr->intrinsic != nir_intrinsic_store_deref) 1397 continue; 1398 1399 nir_deref_instr *out_deref = nir_src_as_deref(intr->src[0]); 1400 if (!nir_deref_mode_is(out_deref, nir_var_shader_out)) 1401 continue; 1402 1403 nir_variable *out_var = nir_deref_instr_get_variable(out_deref); 1404 if (!can_replace_varying(out_var)) 1405 continue; 1406 1407 nir_def *ssa = intr->src[1].ssa; 1408 if (ssa->parent_instr->type == nir_instr_type_load_const) { 1409 progress |= replace_varying_input_by_constant_load(consumer, intr); 1410 continue; 1411 } 1412 1413 nir_scalar uni_scalar; 1414 if (consumer->options->max_varying_expression_cost >= 2 && 1415 is_direct_uniform_load(ssa, &uni_scalar)) { 1416 progress |= replace_varying_input_by_uniform_load(consumer, intr, 1417 &uni_scalar); 1418 continue; 1419 } 1420 1421 struct hash_entry *entry = _mesa_hash_table_search(varying_values, ssa); 1422 if (entry) { 1423 progress |= replace_duplicate_input(consumer, 1424 (nir_variable *)entry->data, 1425 intr); 1426 } else { 1427 nir_variable *in_var = get_matching_input_var(consumer, out_var); 1428 if (in_var) { 1429 _mesa_hash_table_insert(varying_values, ssa, in_var); 1430 } 1431 } 1432 } 1433 1434 _mesa_hash_table_destroy(varying_values, NULL); 1435 1436 return progress; 1437} 1438 1439/* TODO any better helper somewhere to sort a list? */ 1440 1441static void 1442insert_sorted(struct exec_list *var_list, nir_variable *new_var) 1443{ 1444 nir_foreach_variable_in_list(var, var_list) { 1445 /* Use the `per_primitive` bool to sort per-primitive variables 1446 * to the end of the list, so they get the last driver locations 1447 * by nir_assign_io_var_locations. 1448 * 1449 * This is done because AMD HW requires that per-primitive outputs 1450 * are the last params. 1451 * In the future we can add an option for this, if needed by other HW. 1452 */ 1453 if (new_var->data.per_primitive < var->data.per_primitive || 1454 (new_var->data.per_primitive == var->data.per_primitive && 1455 (var->data.location > new_var->data.location || 1456 (var->data.location == new_var->data.location && 1457 var->data.location_frac > new_var->data.location_frac)))) { 1458 exec_node_insert_node_before(&var->node, &new_var->node); 1459 return; 1460 } 1461 } 1462 exec_list_push_tail(var_list, &new_var->node); 1463} 1464 1465static void 1466sort_varyings(nir_shader *shader, nir_variable_mode mode, 1467 struct exec_list *sorted_list) 1468{ 1469 exec_list_make_empty(sorted_list); 1470 nir_foreach_variable_with_modes_safe(var, shader, mode) { 1471 exec_node_remove(&var->node); 1472 insert_sorted(sorted_list, var); 1473 } 1474} 1475 1476void 1477nir_sort_variables_by_location(nir_shader *shader, nir_variable_mode mode) 1478{ 1479 struct exec_list vars; 1480 1481 sort_varyings(shader, mode, &vars); 1482 exec_list_append(&shader->variables, &vars); 1483} 1484 1485void 1486nir_assign_io_var_locations(nir_shader *shader, nir_variable_mode mode, 1487 unsigned *size, gl_shader_stage stage) 1488{ 1489 unsigned location = 0; 1490 unsigned assigned_locations[VARYING_SLOT_TESS_MAX][2]; 1491 uint64_t processed_locs[2] = { 0 }; 1492 1493 struct exec_list io_vars; 1494 sort_varyings(shader, mode, &io_vars); 1495 1496 int ASSERTED last_loc = 0; 1497 bool ASSERTED last_per_prim = false; 1498 bool last_partial = false; 1499 nir_foreach_variable_in_list(var, &io_vars) { 1500 const struct glsl_type *type = var->type; 1501 if (nir_is_arrayed_io(var, stage)) { 1502 assert(glsl_type_is_array(type)); 1503 type = glsl_get_array_element(type); 1504 } 1505 1506 int base; 1507 if (var->data.mode == nir_var_shader_in && stage == MESA_SHADER_VERTEX) 1508 base = VERT_ATTRIB_GENERIC0; 1509 else if (var->data.mode == nir_var_shader_out && 1510 stage == MESA_SHADER_FRAGMENT) 1511 base = FRAG_RESULT_DATA0; 1512 else 1513 base = VARYING_SLOT_VAR0; 1514 1515 unsigned var_size, driver_size; 1516 if (var->data.compact) { 1517 /* If we are inside a partial compact, 1518 * don't allow another compact to be in this slot 1519 * if it starts at component 0. 1520 */ 1521 if (last_partial && var->data.location_frac == 0) { 1522 location++; 1523 } 1524 1525 /* compact variables must be arrays of scalars */ 1526 assert(!var->data.per_view); 1527 assert(glsl_type_is_array(type)); 1528 assert(glsl_type_is_scalar(glsl_get_array_element(type))); 1529 unsigned start = 4 * location + var->data.location_frac; 1530 unsigned end = start + glsl_get_length(type); 1531 var_size = driver_size = end / 4 - location; 1532 last_partial = end % 4 != 0; 1533 } else { 1534 /* Compact variables bypass the normal varying compacting pass, 1535 * which means they cannot be in the same vec4 slot as a normal 1536 * variable. If part of the current slot is taken up by a compact 1537 * variable, we need to go to the next one. 1538 */ 1539 if (last_partial) { 1540 location++; 1541 last_partial = false; 1542 } 1543 1544 var_size = glsl_count_attribute_slots(type, false); 1545 if (var->data.per_view && 1546 shader->options->per_view_unique_driver_locations) { 1547 /* per-view variables have an extra array dimension, which is 1548 * ignored when counting user-facing slots (var->data.location), 1549 * but *not* with driver slots (var->data.driver_location). That 1550 * is, each user slot maps to multiple driver slots. */ 1551 const struct glsl_type *array_type = var->type; 1552 driver_size = glsl_count_attribute_slots(array_type, false); 1553 } else { 1554 driver_size = var_size; 1555 } 1556 } 1557 1558 /* Builtins don't allow component packing so we only need to worry about 1559 * user defined varyings sharing the same location. 1560 */ 1561 bool processed = false; 1562 if (var->data.location >= base) { 1563 unsigned glsl_location = var->data.location - base; 1564 1565 for (unsigned i = 0; i < var_size; i++) { 1566 if (processed_locs[var->data.index] & 1567 ((uint64_t)1 << (glsl_location + i))) 1568 processed = true; 1569 else 1570 processed_locs[var->data.index] |= 1571 ((uint64_t)1 << (glsl_location + i)); 1572 } 1573 } 1574 1575 /* Because component packing allows varyings to share the same location 1576 * we may have already have processed this location. 1577 */ 1578 if (processed) { 1579 /* TODO handle overlapping per-view variables */ 1580 assert(!var->data.per_view); 1581 unsigned driver_location = assigned_locations[var->data.location][var->data.index]; 1582 var->data.driver_location = driver_location; 1583 1584 /* An array may be packed such that is crosses multiple other arrays 1585 * or variables, we need to make sure we have allocated the elements 1586 * consecutively if the previously proccessed var was shorter than 1587 * the current array we are processing. 1588 * 1589 * NOTE: The code below assumes the var list is ordered in ascending 1590 * location order, but per-vertex/per-primitive outputs may be 1591 * grouped separately. 1592 */ 1593 assert(last_loc <= var->data.location || 1594 last_per_prim != var->data.per_primitive); 1595 last_loc = var->data.location; 1596 last_per_prim = var->data.per_primitive; 1597 unsigned last_slot_location = driver_location + var_size; 1598 if (last_slot_location > location) { 1599 unsigned num_unallocated_slots = last_slot_location - location; 1600 unsigned first_unallocated_slot = var_size - num_unallocated_slots; 1601 for (unsigned i = first_unallocated_slot; i < var_size; i++) { 1602 assigned_locations[var->data.location + i][var->data.index] = location; 1603 location++; 1604 } 1605 } 1606 continue; 1607 } 1608 1609 for (unsigned i = 0; i < var_size; i++) { 1610 assigned_locations[var->data.location + i][var->data.index] = location + i; 1611 } 1612 1613 var->data.driver_location = location; 1614 location += driver_size; 1615 } 1616 1617 if (last_partial) 1618 location++; 1619 1620 exec_list_append(&shader->variables, &io_vars); 1621 *size = location; 1622}