mirror of OpenBSD xenocara tree github.com/openbsd/xenocara
openbsd
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

at jcs 7306 lines 244 kB view raw
1/* 2 * Copyright © 2014 Connor Abbott 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Connor Abbott (cwabbott0@gmail.com) 25 * 26 */ 27 28#ifndef NIR_H 29#define NIR_H 30 31#include <stdint.h> 32#include "compiler/glsl_types.h" 33#include "compiler/glsl/list.h" 34#include "compiler/shader_enums.h" 35#include "compiler/shader_info.h" 36#include "util/bitscan.h" 37#include "util/bitset.h" 38#include "util/compiler.h" 39#include "util/enum_operators.h" 40#include "util/format/u_format.h" 41#include "util/hash_table.h" 42#include "util/list.h" 43#include "util/log.h" 44#include "util/macros.h" 45#include "util/ralloc.h" 46#include "util/set.h" 47#include "util/u_math.h" 48#include "util/u_printf.h" 49#include "nir_defines.h" 50#define XXH_INLINE_ALL 51#include <stdio.h> 52#include "util/xxhash.h" 53 54#ifndef NDEBUG 55#include "util/u_debug.h" 56#endif /* NDEBUG */ 57 58#include "nir_opcodes.h" 59 60#ifdef __cplusplus 61extern "C" { 62#endif 63 64extern uint32_t nir_debug; 65extern bool nir_debug_print_shader[MESA_SHADER_KERNEL + 1]; 66 67#ifndef NDEBUG 68#define NIR_DEBUG(flag) unlikely(nir_debug &(NIR_DEBUG_##flag)) 69#else 70#define NIR_DEBUG(flag) false 71#endif 72 73#define NIR_DEBUG_CLONE (1u << 0) 74#define NIR_DEBUG_SERIALIZE (1u << 1) 75#define NIR_DEBUG_NOVALIDATE (1u << 2) 76#define NIR_DEBUG_TGSI (1u << 4) 77#define NIR_DEBUG_PRINT_VS (1u << 5) 78#define NIR_DEBUG_PRINT_TCS (1u << 6) 79#define NIR_DEBUG_PRINT_TES (1u << 7) 80#define NIR_DEBUG_PRINT_GS (1u << 8) 81#define NIR_DEBUG_PRINT_FS (1u << 9) 82#define NIR_DEBUG_PRINT_CS (1u << 10) 83#define NIR_DEBUG_PRINT_TS (1u << 11) 84#define NIR_DEBUG_PRINT_MS (1u << 12) 85#define NIR_DEBUG_PRINT_RGS (1u << 13) 86#define NIR_DEBUG_PRINT_AHS (1u << 14) 87#define NIR_DEBUG_PRINT_CHS (1u << 15) 88#define NIR_DEBUG_PRINT_MHS (1u << 16) 89#define NIR_DEBUG_PRINT_IS (1u << 17) 90#define NIR_DEBUG_PRINT_CBS (1u << 18) 91#define NIR_DEBUG_PRINT_KS (1u << 19) 92#define NIR_DEBUG_PRINT_NO_INLINE_CONSTS (1u << 20) 93#define NIR_DEBUG_PRINT_INTERNAL (1u << 21) 94#define NIR_DEBUG_PRINT_PASS_FLAGS (1u << 22) 95 96#define NIR_DEBUG_PRINT (NIR_DEBUG_PRINT_VS | \ 97 NIR_DEBUG_PRINT_TCS | \ 98 NIR_DEBUG_PRINT_TES | \ 99 NIR_DEBUG_PRINT_GS | \ 100 NIR_DEBUG_PRINT_FS | \ 101 NIR_DEBUG_PRINT_CS | \ 102 NIR_DEBUG_PRINT_TS | \ 103 NIR_DEBUG_PRINT_MS | \ 104 NIR_DEBUG_PRINT_RGS | \ 105 NIR_DEBUG_PRINT_AHS | \ 106 NIR_DEBUG_PRINT_CHS | \ 107 NIR_DEBUG_PRINT_MHS | \ 108 NIR_DEBUG_PRINT_IS | \ 109 NIR_DEBUG_PRINT_CBS | \ 110 NIR_DEBUG_PRINT_KS) 111 112#define NIR_FALSE 0u 113#define NIR_TRUE (~0u) 114#define NIR_MAX_VEC_COMPONENTS 16 115#define NIR_MAX_MATRIX_COLUMNS 4 116#define NIR_STREAM_PACKED (1 << 8) 117typedef uint16_t nir_component_mask_t; 118 119static inline bool 120nir_num_components_valid(unsigned num_components) 121{ 122 return (num_components >= 1 && 123 num_components <= 5) || 124 num_components == 8 || 125 num_components == 16; 126} 127 128/* 129 * Round up a vector size to a vector size that's valid in NIR. At present, NIR 130 * supports only vec2-5, vec8, and vec16. Attempting to generate other sizes 131 * will fail validation. 132 */ 133static inline unsigned 134nir_round_up_components(unsigned n) 135{ 136 return (n > 5) ? util_next_power_of_two(n) : n; 137} 138 139static inline nir_component_mask_t 140nir_component_mask(unsigned num_components) 141{ 142 assert(nir_num_components_valid(num_components)); 143 return (1u << num_components) - 1; 144} 145 146void 147nir_process_debug_variable(void); 148 149bool nir_component_mask_can_reinterpret(nir_component_mask_t mask, 150 unsigned old_bit_size, 151 unsigned new_bit_size); 152nir_component_mask_t 153nir_component_mask_reinterpret(nir_component_mask_t mask, 154 unsigned old_bit_size, 155 unsigned new_bit_size); 156 157/** Defines a cast function 158 * 159 * This macro defines a cast function from in_type to out_type where 160 * out_type is some structure type that contains a field of type out_type. 161 * 162 * Note that you have to be a bit careful as the generated cast function 163 * destroys constness. 164 */ 165#define NIR_DEFINE_CAST(name, in_type, out_type, field, \ 166 type_field, type_value) \ 167 static inline out_type * \ 168 name(const in_type *parent) \ 169 { \ 170 assert(parent && parent->type_field == type_value); \ 171 return exec_node_data(out_type, parent, field); \ 172 } 173 174struct nir_function; 175struct nir_shader; 176struct nir_instr; 177struct nir_builder; 178struct nir_xfb_info; 179 180/** 181 * Description of built-in state associated with a uniform 182 * 183 * :c:member:`nir_variable.state_slots` 184 */ 185typedef struct { 186 gl_state_index16 tokens[STATE_LENGTH]; 187} nir_state_slot; 188 189/* clang-format off */ 190typedef enum { 191 nir_var_system_value = (1 << 0), 192 nir_var_uniform = (1 << 1), 193 nir_var_shader_in = (1 << 2), 194 nir_var_shader_out = (1 << 3), 195 nir_var_image = (1 << 4), 196 /** Incoming call or ray payload data for ray-tracing shaders */ 197 nir_var_shader_call_data = (1 << 5), 198 /** Ray hit attributes */ 199 nir_var_ray_hit_attrib = (1 << 6), 200 201 /* Modes named nir_var_mem_* have explicit data layout */ 202 nir_var_mem_ubo = (1 << 7), 203 nir_var_mem_push_const = (1 << 8), 204 nir_var_mem_ssbo = (1 << 9), 205 nir_var_mem_constant = (1 << 10), 206 nir_var_mem_task_payload = (1 << 11), 207 nir_var_mem_node_payload = (1 << 12), 208 nir_var_mem_node_payload_in = (1 << 13), 209 210 nir_var_function_in = (1 << 14), 211 nir_var_function_out = (1 << 15), 212 nir_var_function_inout = (1 << 16), 213 214 /* Generic modes intentionally come last. See encode_dref_modes() in 215 * nir_serialize.c for more details. 216 */ 217 nir_var_shader_temp = (1 << 17), 218 nir_var_function_temp = (1 << 18), 219 nir_var_mem_shared = (1 << 19), 220 nir_var_mem_global = (1 << 20), 221 222 nir_var_mem_generic = (nir_var_shader_temp | 223 nir_var_function_temp | 224 nir_var_mem_shared | 225 nir_var_mem_global), 226 227 nir_var_read_only_modes = nir_var_shader_in | nir_var_uniform | 228 nir_var_system_value | nir_var_mem_constant | 229 nir_var_mem_ubo, 230 /* Modes where vector derefs can be indexed as arrays. nir_var_shader_out 231 * is only for mesh stages. nir_var_system_value is only for kernel stages. 232 */ 233 nir_var_vec_indexable_modes = nir_var_shader_temp | nir_var_function_temp | 234 nir_var_mem_ubo | nir_var_mem_ssbo | 235 nir_var_mem_shared | nir_var_mem_global | 236 nir_var_mem_push_const | nir_var_mem_task_payload | 237 nir_var_shader_out | nir_var_system_value, 238 nir_num_variable_modes = 21, 239 nir_var_all = (1 << nir_num_variable_modes) - 1, 240} nir_variable_mode; 241MESA_DEFINE_CPP_ENUM_BITFIELD_OPERATORS(nir_variable_mode) 242/* clang-format on */ 243 244/** 245 * Rounding modes. 246 */ 247typedef enum { 248 nir_rounding_mode_undef = 0, 249 nir_rounding_mode_rtne = 1, /* round to nearest even */ 250 nir_rounding_mode_ru = 2, /* round up */ 251 nir_rounding_mode_rd = 3, /* round down */ 252 nir_rounding_mode_rtz = 4, /* round towards zero */ 253} nir_rounding_mode; 254 255/** 256 * Ray query values that can read from a RayQueryKHR object. 257 */ 258typedef enum { 259 nir_ray_query_value_intersection_type, 260 nir_ray_query_value_intersection_t, 261 nir_ray_query_value_intersection_instance_custom_index, 262 nir_ray_query_value_intersection_instance_id, 263 nir_ray_query_value_intersection_instance_sbt_index, 264 nir_ray_query_value_intersection_geometry_index, 265 nir_ray_query_value_intersection_primitive_index, 266 nir_ray_query_value_intersection_barycentrics, 267 nir_ray_query_value_intersection_front_face, 268 nir_ray_query_value_intersection_object_ray_direction, 269 nir_ray_query_value_intersection_object_ray_origin, 270 nir_ray_query_value_intersection_object_to_world, 271 nir_ray_query_value_intersection_world_to_object, 272 nir_ray_query_value_intersection_candidate_aabb_opaque, 273 nir_ray_query_value_tmin, 274 nir_ray_query_value_flags, 275 nir_ray_query_value_world_ray_direction, 276 nir_ray_query_value_world_ray_origin, 277 nir_ray_query_value_intersection_triangle_vertex_positions 278} nir_ray_query_value; 279 280/** 281 * Intel resource flags 282 */ 283typedef enum { 284 nir_resource_intel_bindless = 1u << 0, 285 nir_resource_intel_pushable = 1u << 1, 286 nir_resource_intel_sampler = 1u << 2, 287 nir_resource_intel_non_uniform = 1u << 3, 288 nir_resource_intel_sampler_embedded = 1u << 4, 289} nir_resource_data_intel; 290 291/** 292 * Which components to interpret as signed in cmat_muladd. 293 * See 'Cooperative Matrix Operands' in SPV_KHR_cooperative_matrix. 294 */ 295typedef enum { 296 NIR_CMAT_A_SIGNED = 1u << 0, 297 NIR_CMAT_B_SIGNED = 1u << 1, 298 NIR_CMAT_C_SIGNED = 1u << 2, 299 NIR_CMAT_RESULT_SIGNED = 1u << 3, 300} nir_cmat_signed; 301 302typedef union { 303 bool b; 304 float f32; 305 double f64; 306 int8_t i8; 307 uint8_t u8; 308 int16_t i16; 309 uint16_t u16; 310 int32_t i32; 311 uint32_t u32; 312 int64_t i64; 313 uint64_t u64; 314} nir_const_value; 315 316#define nir_const_value_to_array(arr, c, components, m) \ 317 do { \ 318 for (unsigned i = 0; i < components; ++i) \ 319 arr[i] = c[i].m; \ 320 } while (false) 321 322static inline nir_const_value 323nir_const_value_for_raw_uint(uint64_t x, unsigned bit_size) 324{ 325 nir_const_value v; 326 memset(&v, 0, sizeof(v)); 327 328 /* clang-format off */ 329 switch (bit_size) { 330 case 1: v.b = x; break; 331 case 8: v.u8 = (uint8_t)x; break; 332 case 16: v.u16 = (uint16_t)x; break; 333 case 32: v.u32 = (uint32_t)x; break; 334 case 64: v.u64 = x; break; 335 default: 336 unreachable("Invalid bit size"); 337 } 338 /* clang-format on */ 339 340 return v; 341} 342 343static inline nir_const_value 344nir_const_value_for_int(int64_t i, unsigned bit_size) 345{ 346 assert(bit_size <= 64); 347 if (bit_size < 64) { 348 assert(i >= (-(1ll << (bit_size - 1)))); 349 assert(i < (1ll << (bit_size - 1))); 350 } 351 352 return nir_const_value_for_raw_uint(i, bit_size); 353} 354 355static inline nir_const_value 356nir_const_value_for_uint(uint64_t u, unsigned bit_size) 357{ 358 assert(bit_size <= 64); 359 if (bit_size < 64) 360 assert(u < (1ull << bit_size)); 361 362 return nir_const_value_for_raw_uint(u, bit_size); 363} 364 365static inline nir_const_value 366nir_const_value_for_bool(bool b, unsigned bit_size) 367{ 368 /* Booleans use a 0/-1 convention */ 369 return nir_const_value_for_int(-(int)b, bit_size); 370} 371 372/* This one isn't inline because it requires half-float conversion */ 373nir_const_value nir_const_value_for_float(double b, unsigned bit_size); 374 375static inline int64_t 376nir_const_value_as_int(nir_const_value value, unsigned bit_size) 377{ 378 /* clang-format off */ 379 switch (bit_size) { 380 /* int1_t uses 0/-1 convention */ 381 case 1: return -(int)value.b; 382 case 8: return value.i8; 383 case 16: return value.i16; 384 case 32: return value.i32; 385 case 64: return value.i64; 386 default: 387 unreachable("Invalid bit size"); 388 } 389 /* clang-format on */ 390} 391 392static inline uint64_t 393nir_const_value_as_uint(nir_const_value value, unsigned bit_size) 394{ 395 /* clang-format off */ 396 switch (bit_size) { 397 case 1: return value.b; 398 case 8: return value.u8; 399 case 16: return value.u16; 400 case 32: return value.u32; 401 case 64: return value.u64; 402 default: 403 unreachable("Invalid bit size"); 404 } 405 /* clang-format on */ 406} 407 408static inline bool 409nir_const_value_as_bool(nir_const_value value, unsigned bit_size) 410{ 411 int64_t i = nir_const_value_as_int(value, bit_size); 412 413 /* Booleans of any size use 0/-1 convention */ 414 assert(i == 0 || i == -1); 415 416 return i; 417} 418 419/* This one isn't inline because it requires half-float conversion */ 420double nir_const_value_as_float(nir_const_value value, unsigned bit_size); 421 422typedef struct nir_constant { 423 /** 424 * Value of the constant. 425 * 426 * The field used to back the values supplied by the constant is determined 427 * by the type associated with the ``nir_variable``. Constants may be 428 * scalars, vectors, or matrices. 429 */ 430 nir_const_value values[NIR_MAX_VEC_COMPONENTS]; 431 432 /* Indicates all the values are 0s which can enable some optimizations */ 433 bool is_null_constant; 434 435 /* we could get this from the var->type but makes clone *much* easier to 436 * not have to care about the type. 437 */ 438 unsigned num_elements; 439 440 /* Array elements / Structure Fields */ 441 struct nir_constant **elements; 442} nir_constant; 443 444/** 445 * Layout qualifiers for gl_FragDepth. 446 * 447 * The AMD/ARB_conservative_depth extensions allow gl_FragDepth to be redeclared 448 * with a layout qualifier. 449 */ 450typedef enum { 451 /** No depth layout is specified. */ 452 nir_depth_layout_none, 453 nir_depth_layout_any, 454 nir_depth_layout_greater, 455 nir_depth_layout_less, 456 nir_depth_layout_unchanged 457} nir_depth_layout; 458 459/** 460 * Enum keeping track of how a variable was declared. 461 */ 462typedef enum { 463 /** 464 * Normal declaration. 465 */ 466 nir_var_declared_normally = 0, 467 468 /** 469 * Variable is an implicitly declared built-in that has not been explicitly 470 * re-declared by the shader. 471 */ 472 nir_var_declared_implicitly, 473 474 /** 475 * Variable is implicitly generated by the compiler and should not be 476 * visible via the API. 477 */ 478 nir_var_hidden, 479} nir_var_declaration_type; 480 481/** 482 * Either a uniform, global variable, shader input, or shader output. Based on 483 * ir_variable - it should be easy to translate between the two. 484 */ 485 486typedef struct nir_variable { 487 struct exec_node node; 488 489 /** 490 * Declared type of the variable 491 */ 492 const struct glsl_type *type; 493 494 /** 495 * Declared name of the variable 496 */ 497 char *name; 498 499 struct nir_variable_data { 500 /** 501 * Storage class of the variable. 502 * 503 * :c:struct:`nir_variable_mode` 504 */ 505 unsigned mode : 21; 506 507 /** 508 * Is the variable read-only? 509 * 510 * This is set for variables declared as ``const``, shader inputs, 511 * and uniforms. 512 */ 513 unsigned read_only : 1; 514 unsigned centroid : 1; 515 unsigned sample : 1; 516 unsigned patch : 1; 517 unsigned invariant : 1; 518 519 /** 520 * Was an 'invariant' qualifier explicitly set in the shader? 521 * 522 * This is used to cross validate glsl qualifiers. 523 */ 524 unsigned explicit_invariant:1; 525 526 /** 527 * Is the variable a ray query? 528 */ 529 unsigned ray_query : 1; 530 531 /** 532 * Precision qualifier. 533 * 534 * In desktop GLSL we do not care about precision qualifiers at all, in 535 * fact, the spec says that precision qualifiers are ignored. 536 * 537 * To make things easy, we make it so that this field is always 538 * GLSL_PRECISION_NONE on desktop shaders. This way all the variables 539 * have the same precision value and the checks we add in the compiler 540 * for this field will never break a desktop shader compile. 541 */ 542 unsigned precision : 2; 543 544 /** 545 * Has this variable been statically assigned? 546 * 547 * This answers whether the variable was assigned in any path of 548 * the shader during ast_to_hir. This doesn't answer whether it is 549 * still written after dead code removal, nor is it maintained in 550 * non-ast_to_hir.cpp (GLSL parsing) paths. 551 */ 552 unsigned assigned : 1; 553 554 /** 555 * Can this variable be coalesced with another? 556 * 557 * This is set by nir_lower_io_to_temporaries to say that any 558 * copies involving this variable should stay put. Propagating it can 559 * duplicate the resulting load/store, which is not wanted, and may 560 * result in a load/store of the variable with an indirect offset which 561 * the backend may not be able to handle. 562 */ 563 unsigned cannot_coalesce : 1; 564 565 /** 566 * When separate shader programs are enabled, only input/outputs between 567 * the stages of a multi-stage separate program can be safely removed 568 * from the shader interface. Other input/outputs must remains active. 569 * 570 * This is also used to make sure xfb varyings that are unused by the 571 * fragment shader are not removed. 572 */ 573 unsigned always_active_io : 1; 574 575 /** 576 * Interpolation mode for shader inputs / outputs 577 * 578 * :c:enum:`glsl_interp_mode` 579 */ 580 unsigned interpolation : 3; 581 582 /** 583 * If non-zero, then this variable may be packed along with other variables 584 * into a single varying slot, so this offset should be applied when 585 * accessing components. For example, an offset of 1 means that the x 586 * component of this variable is actually stored in component y of the 587 * location specified by ``location``. 588 */ 589 unsigned location_frac : 2; 590 591 /** 592 * If true, this variable represents an array of scalars that should 593 * be tightly packed. In other words, consecutive array elements 594 * should be stored one component apart, rather than one slot apart. 595 */ 596 unsigned compact : 1; 597 598 /** 599 * Whether this is a fragment shader output implicitly initialized with 600 * the previous contents of the specified render target at the 601 * framebuffer location corresponding to this shader invocation. 602 */ 603 unsigned fb_fetch_output : 1; 604 605 /** 606 * Non-zero if this variable is considered bindless as defined by 607 * ARB_bindless_texture. 608 */ 609 unsigned bindless : 1; 610 611 /** 612 * Was an explicit binding set in the shader? 613 */ 614 unsigned explicit_binding : 1; 615 616 /** 617 * Was the location explicitly set in the shader? 618 * 619 * If the location is explicitly set in the shader, it **cannot** be changed 620 * by the linker or by the API (e.g., calls to ``glBindAttribLocation`` have 621 * no effect). 622 */ 623 unsigned explicit_location : 1; 624 625 /* Was the array implicitly sized during linking */ 626 unsigned implicit_sized_array : 1; 627 628 /** 629 * Highest element accessed with a constant array index 630 * 631 * Not used for non-array variables. -1 is never accessed. 632 */ 633 int max_array_access; 634 635 /** 636 * Does this variable have an initializer? 637 * 638 * This is used by the linker to cross-validiate initializers of global 639 * variables. 640 */ 641 unsigned has_initializer:1; 642 643 /** 644 * Is the initializer created by the compiler (glsl_zero_init) 645 */ 646 unsigned is_implicit_initializer:1; 647 648 /** 649 * Is this varying used by transform feedback? 650 * 651 * This is used by the linker to decide if it's safe to pack the varying. 652 */ 653 unsigned is_xfb : 1; 654 655 /** 656 * Is this varying used only by transform feedback? 657 * 658 * This is used by the linker to decide if its safe to pack the varying. 659 */ 660 unsigned is_xfb_only : 1; 661 662 /** 663 * Was a transfer feedback buffer set in the shader? 664 */ 665 unsigned explicit_xfb_buffer : 1; 666 667 /** 668 * Was a transfer feedback stride set in the shader? 669 */ 670 unsigned explicit_xfb_stride : 1; 671 672 /** 673 * Was an explicit offset set in the shader? 674 */ 675 unsigned explicit_offset : 1; 676 677 /** 678 * Layout of the matrix. Uses glsl_matrix_layout values. 679 */ 680 unsigned matrix_layout : 2; 681 682 /** 683 * Non-zero if this variable was created by lowering a named interface 684 * block. 685 */ 686 unsigned from_named_ifc_block : 1; 687 688 /** 689 * Unsized array buffer variable. 690 */ 691 unsigned from_ssbo_unsized_array : 1; 692 693 /** 694 * Non-zero if the variable must be a shader input. This is useful for 695 * constraints on function parameters. 696 */ 697 unsigned must_be_shader_input : 1; 698 699 /** 700 * Has this variable been used for reading or writing? 701 * 702 * Several GLSL semantic checks require knowledge of whether or not a 703 * variable has been used. For example, it is an error to redeclare a 704 * variable as invariant after it has been used. 705 */ 706 unsigned used:1; 707 708 /** 709 * How the variable was declared. See nir_var_declaration_type. 710 * 711 * This is used to detect variables generated by the compiler, so should 712 * not be visible via the API. 713 */ 714 unsigned how_declared : 2; 715 716 /** 717 * Is this variable per-view? If so, we know it must be an array with 718 * size corresponding to the number of views. 719 */ 720 unsigned per_view : 1; 721 722 /** 723 * Whether the variable is per-primitive. 724 * Can be use by Mesh Shader outputs and corresponding Fragment Shader inputs. 725 */ 726 unsigned per_primitive : 1; 727 728 /** 729 * Whether the variable is declared to indicate that a fragment shader 730 * input will not have interpolated values. 731 */ 732 unsigned per_vertex : 1; 733 734 /** 735 * Layout qualifier for gl_FragDepth. See nir_depth_layout. 736 * 737 * This is not equal to ``ir_depth_layout_none`` if and only if this 738 * variable is ``gl_FragDepth`` and a layout qualifier is specified. 739 */ 740 unsigned depth_layout : 3; 741 742 /** 743 * Vertex stream output identifier. 744 * 745 * For packed outputs, NIR_STREAM_PACKED is set and bits [2*i+1,2*i] 746 * indicate the stream of the i-th component. 747 */ 748 unsigned stream : 9; 749 750 /** 751 * See gl_access_qualifier. 752 * 753 * Access flags for memory variables (SSBO/global), image uniforms, and 754 * bindless images in uniforms/inputs/outputs. 755 */ 756 unsigned access : 9; 757 758 /** 759 * Descriptor set binding for sampler or UBO. 760 */ 761 unsigned descriptor_set : 5; 762 763#define NIR_VARIABLE_NO_INDEX ~0 764 765 /** 766 * Output index for dual source blending or input attachment index. If 767 * it is not declared it is NIR_VARIABLE_NO_INDEX. 768 */ 769 unsigned index; 770 771 /** 772 * Initial binding point for a sampler or UBO. 773 * 774 * For array types, this represents the binding point for the first element. 775 */ 776 unsigned binding; 777 778 /** 779 * Storage location of the base of this variable 780 * 781 * The precise meaning of this field depends on the nature of the variable. 782 * 783 * - Vertex shader input: one of the values from ``gl_vert_attrib``. 784 * - Vertex shader output: one of the values from ``gl_varying_slot``. 785 * - Geometry shader input: one of the values from ``gl_varying_slot``. 786 * - Geometry shader output: one of the values from ``gl_varying_slot``. 787 * - Fragment shader input: one of the values from ``gl_varying_slot``. 788 * - Fragment shader output: one of the values from ``gl_frag_result``. 789 * - Task shader output: one of the values from ``gl_varying_slot``. 790 * - Mesh shader input: one of the values from ``gl_varying_slot``. 791 * - Mesh shader output: one of the values from ``gl_varying_slot``. 792 * - Uniforms: Per-stage uniform slot number for default uniform block. 793 * - Uniforms: Index within the uniform block definition for UBO members. 794 * - Non-UBO Uniforms: uniform slot number. 795 * - Other: This field is not currently used. 796 * 797 * If the variable is a uniform, shader input, or shader output, and the 798 * slot has not been assigned, the value will be -1. 799 */ 800 int location; 801 802 /** Required alignment of this variable */ 803 unsigned alignment; 804 805 /** 806 * The actual location of the variable in the IR. Only valid for inputs, 807 * outputs, uniforms (including samplers and images), and for UBO and SSBO 808 * variables in GLSL. 809 */ 810 unsigned driver_location; 811 812 /** 813 * Location an atomic counter or transform feedback is stored at. 814 */ 815 unsigned offset; 816 817 union { 818 struct { 819 /** Image internal format if specified explicitly, otherwise PIPE_FORMAT_NONE. */ 820 enum pipe_format format; 821 } image; 822 823 struct { 824 /** 825 * For OpenCL inline samplers. See cl_sampler_addressing_mode and cl_sampler_filter_mode 826 */ 827 unsigned is_inline_sampler : 1; 828 unsigned addressing_mode : 3; 829 unsigned normalized_coordinates : 1; 830 unsigned filter_mode : 1; 831 } sampler; 832 833 struct { 834 /** 835 * Transform feedback buffer. 836 */ 837 uint16_t buffer : 2; 838 839 /** 840 * Transform feedback stride. 841 */ 842 uint16_t stride; 843 } xfb; 844 }; 845 846 /** Name of the node this payload will be enqueued to. */ 847 const char *node_name; 848 } data; 849 850 /** 851 * Identifier for this variable generated by nir_index_vars() that is unique 852 * among other variables in the same exec_list. 853 */ 854 unsigned index; 855 856 /* Number of nir_variable_data members */ 857 uint16_t num_members; 858 859 /** 860 * For variables with non NULL interface_type, this points to an array of 861 * integers such that if the ith member of the interface block is an array, 862 * max_ifc_array_access[i] is the maximum array element of that member that 863 * has been accessed. If the ith member of the interface block is not an 864 * array, max_ifc_array_access[i] is unused. 865 * 866 * For variables whose type is not an interface block, this pointer is 867 * NULL. 868 */ 869 int *max_ifc_array_access; 870 871 /** 872 * Built-in state that backs this uniform 873 * 874 * Once set at variable creation, ``state_slots`` must remain invariant. 875 * This is because, ideally, this array would be shared by all clones of 876 * this variable in the IR tree. In other words, we'd really like for it 877 * to be a fly-weight. 878 * 879 * If the variable is not a uniform, ``num_state_slots`` will be zero and 880 * ``state_slots`` will be ``NULL``. 881 * 882 * Number of state slots used. 883 */ 884 uint16_t num_state_slots; 885 /** State descriptors. */ 886 nir_state_slot *state_slots; 887 888 /** 889 * Constant expression assigned in the initializer of the variable 890 * 891 * This field should only be used temporarily by creators of NIR shaders 892 * and then nir_lower_variable_initializers can be used to get rid of them. 893 * Most of the rest of NIR ignores this field or asserts that it's NULL. 894 */ 895 nir_constant *constant_initializer; 896 897 /** 898 * Global variable assigned in the initializer of the variable 899 * This field should only be used temporarily by creators of NIR shaders 900 * and then nir_lower_variable_initializers can be used to get rid of them. 901 * Most of the rest of NIR ignores this field or asserts that it's NULL. 902 */ 903 struct nir_variable *pointer_initializer; 904 905 /** 906 * For variables that are in an interface block or are an instance of an 907 * interface block, this is the ``GLSL_TYPE_INTERFACE`` type for that block. 908 * 909 * ``ir_variable.location`` 910 */ 911 const struct glsl_type *interface_type; 912 913 /** 914 * Description of per-member data for per-member struct variables 915 * 916 * This is used for variables which are actually an amalgamation of 917 * multiple entities such as a struct of built-in values or a struct of 918 * inputs each with their own layout specifier. This is only allowed on 919 * variables with a struct or array of array of struct type. 920 */ 921 struct nir_variable_data *members; 922} nir_variable; 923 924static inline bool 925_nir_shader_variable_has_mode(nir_variable *var, unsigned modes) 926{ 927 /* This isn't a shader variable */ 928 assert(!(modes & nir_var_function_temp)); 929 return var->data.mode & modes; 930} 931 932#define nir_foreach_variable_in_list(var, var_list) \ 933 foreach_list_typed(nir_variable, var, node, var_list) 934 935#define nir_foreach_variable_in_list_safe(var, var_list) \ 936 foreach_list_typed_safe(nir_variable, var, node, var_list) 937 938#define nir_foreach_variable_in_shader(var, shader) \ 939 nir_foreach_variable_in_list(var, &(shader)->variables) 940 941#define nir_foreach_variable_in_shader_safe(var, shader) \ 942 nir_foreach_variable_in_list_safe(var, &(shader)->variables) 943 944#define nir_foreach_variable_with_modes(var, shader, modes) \ 945 nir_foreach_variable_in_shader(var, shader) \ 946 if (_nir_shader_variable_has_mode(var, modes)) 947 948#define nir_foreach_variable_with_modes_safe(var, shader, modes) \ 949 nir_foreach_variable_in_shader_safe(var, shader) \ 950 if (_nir_shader_variable_has_mode(var, modes)) 951 952#define nir_foreach_shader_in_variable(var, shader) \ 953 nir_foreach_variable_with_modes(var, shader, nir_var_shader_in) 954 955#define nir_foreach_shader_in_variable_safe(var, shader) \ 956 nir_foreach_variable_with_modes_safe(var, shader, nir_var_shader_in) 957 958#define nir_foreach_shader_out_variable(var, shader) \ 959 nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) 960 961#define nir_foreach_shader_out_variable_safe(var, shader) \ 962 nir_foreach_variable_with_modes_safe(var, shader, nir_var_shader_out) 963 964#define nir_foreach_uniform_variable(var, shader) \ 965 nir_foreach_variable_with_modes(var, shader, nir_var_uniform) 966 967#define nir_foreach_uniform_variable_safe(var, shader) \ 968 nir_foreach_variable_with_modes_safe(var, shader, nir_var_uniform) 969 970#define nir_foreach_image_variable(var, shader) \ 971 nir_foreach_variable_with_modes(var, shader, nir_var_image) 972 973#define nir_foreach_image_variable_safe(var, shader) \ 974 nir_foreach_variable_with_modes_safe(var, shader, nir_var_image) 975 976static inline bool 977nir_variable_is_global(const nir_variable *var) 978{ 979 return var->data.mode != nir_var_function_temp; 980} 981 982typedef enum ENUM_PACKED { 983 nir_instr_type_alu, 984 nir_instr_type_deref, 985 nir_instr_type_call, 986 nir_instr_type_tex, 987 nir_instr_type_intrinsic, 988 nir_instr_type_load_const, 989 nir_instr_type_jump, 990 nir_instr_type_undef, 991 nir_instr_type_phi, 992 nir_instr_type_parallel_copy, 993 nir_instr_type_debug_info, 994} nir_instr_type; 995 996typedef struct nir_instr { 997 struct exec_node node; 998 struct nir_block *block; 999 nir_instr_type type; 1000 1001 /* A temporary for optimization and analysis passes to use for storing 1002 * flags. For instance, DCE uses this to store the "dead/live" info. 1003 */ 1004 uint8_t pass_flags; 1005 1006 /** generic instruction index. */ 1007 uint32_t index; 1008} nir_instr; 1009 1010static inline nir_instr * 1011nir_instr_next(nir_instr *instr) 1012{ 1013 struct exec_node *next = exec_node_get_next(&instr->node); 1014 if (exec_node_is_tail_sentinel(next)) 1015 return NULL; 1016 else 1017 return exec_node_data(nir_instr, next, node); 1018} 1019 1020static inline nir_instr * 1021nir_instr_prev(nir_instr *instr) 1022{ 1023 struct exec_node *prev = exec_node_get_prev(&instr->node); 1024 if (exec_node_is_head_sentinel(prev)) 1025 return NULL; 1026 else 1027 return exec_node_data(nir_instr, prev, node); 1028} 1029 1030static inline bool 1031nir_instr_is_first(const nir_instr *instr) 1032{ 1033 return exec_node_is_head_sentinel(exec_node_get_prev_const(&instr->node)); 1034} 1035 1036static inline bool 1037nir_instr_is_last(const nir_instr *instr) 1038{ 1039 return exec_node_is_tail_sentinel(exec_node_get_next_const(&instr->node)); 1040} 1041 1042typedef struct nir_def { 1043 /** Instruction which produces this SSA value. */ 1044 nir_instr *parent_instr; 1045 1046 /** set of nir_instrs where this register is used (read from) */ 1047 struct list_head uses; 1048 1049 /** generic SSA definition index. */ 1050 unsigned index; 1051 1052 uint8_t num_components; 1053 1054 /* The bit-size of each channel; must be one of 1, 8, 16, 32, or 64 */ 1055 uint8_t bit_size; 1056 1057 /** 1058 * True if this SSA value may have different values in different SIMD 1059 * invocations of the shader. This is set by nir_divergence_analysis. 1060 */ 1061 bool divergent; 1062 1063 /** 1064 * True if this SSA value is loop invariant w.r.t. the innermost parent 1065 * loop. This is set by nir_divergence_analysis and used to determine 1066 * the divergence of a nir_src. 1067 */ 1068 bool loop_invariant; 1069} nir_def; 1070 1071struct nir_src; 1072struct nir_if; 1073 1074typedef struct nir_src { 1075 /* Instruction or if-statement that consumes this value as a source. This 1076 * should only be accessed through nir_src_* helpers. 1077 * 1078 * Internally, it is a tagged pointer to a nir_instr or nir_if. 1079 */ 1080 uintptr_t _parent; 1081 1082 struct list_head use_link; 1083 nir_def *ssa; 1084} nir_src; 1085 1086/* Layout of the _parent pointer. Bottom bit is set for nir_if parents (clear 1087 * for nir_instr parents). Remaining bits are the pointer. 1088 */ 1089#define NIR_SRC_PARENT_IS_IF (0x1) 1090#define NIR_SRC_PARENT_MASK (~((uintptr_t) NIR_SRC_PARENT_IS_IF)) 1091 1092static inline bool 1093nir_src_is_if(const nir_src *src) 1094{ 1095 return src->_parent & NIR_SRC_PARENT_IS_IF; 1096} 1097 1098static inline nir_instr * 1099nir_src_parent_instr(const nir_src *src) 1100{ 1101 assert(!nir_src_is_if(src)); 1102 1103 /* Because it is not an if, the tag is 0, therefore we do not need to mask */ 1104 return (nir_instr *)(src->_parent); 1105} 1106 1107static inline struct nir_if * 1108nir_src_parent_if(const nir_src *src) 1109{ 1110 assert(nir_src_is_if(src)); 1111 1112 /* Because it is an if, the tag is 1, so we need to mask */ 1113 return (struct nir_if *)(src->_parent & NIR_SRC_PARENT_MASK); 1114} 1115 1116static inline void 1117_nir_src_set_parent(nir_src *src, void *parent, bool is_if) 1118{ 1119 uintptr_t ptr = (uintptr_t) parent; 1120 assert((ptr & ~NIR_SRC_PARENT_MASK) == 0 && "pointer must be aligned"); 1121 1122 if (is_if) 1123 ptr |= NIR_SRC_PARENT_IS_IF; 1124 1125 src->_parent = ptr; 1126} 1127 1128static inline void 1129nir_src_set_parent_instr(nir_src *src, nir_instr *parent_instr) 1130{ 1131 _nir_src_set_parent(src, parent_instr, false); 1132} 1133 1134static inline void 1135nir_src_set_parent_if(nir_src *src, struct nir_if *parent_if) 1136{ 1137 _nir_src_set_parent(src, parent_if, true); 1138} 1139 1140static inline nir_src 1141nir_src_init(void) 1142{ 1143 nir_src src = { 0 }; 1144 return src; 1145} 1146 1147#define NIR_SRC_INIT nir_src_init() 1148 1149#define nir_foreach_use_including_if(src, reg_or_ssa_def) \ 1150 list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->uses, use_link) 1151 1152#define nir_foreach_use_including_if_safe(src, reg_or_ssa_def) \ 1153 list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->uses, use_link) 1154 1155#define nir_foreach_use(src, reg_or_ssa_def) \ 1156 nir_foreach_use_including_if(src, reg_or_ssa_def) \ 1157 if (!nir_src_is_if(src)) 1158 1159#define nir_foreach_use_safe(src, reg_or_ssa_def) \ 1160 nir_foreach_use_including_if_safe(src, reg_or_ssa_def) \ 1161 if (!nir_src_is_if(src)) 1162 1163#define nir_foreach_if_use(src, reg_or_ssa_def) \ 1164 nir_foreach_use_including_if(src, reg_or_ssa_def) \ 1165 if (nir_src_is_if(src)) 1166 1167#define nir_foreach_if_use_safe(src, reg_or_ssa_def) \ 1168 nir_foreach_use_including_if_safe(src, reg_or_ssa_def) \ 1169 if (nir_src_is_if(src)) 1170 1171static inline bool 1172nir_def_used_by_if(const nir_def *def) 1173{ 1174 nir_foreach_if_use(_, def) 1175 return true; 1176 1177 return false; 1178} 1179 1180static inline bool 1181nir_def_only_used_by_if(const nir_def *def) 1182{ 1183 nir_foreach_use(_, def) 1184 return false; 1185 1186 return true; 1187} 1188 1189static inline nir_src 1190nir_src_for_ssa(nir_def *def) 1191{ 1192 nir_src src = NIR_SRC_INIT; 1193 1194 src.ssa = def; 1195 1196 return src; 1197} 1198 1199static inline unsigned 1200nir_src_bit_size(nir_src src) 1201{ 1202 return src.ssa->bit_size; 1203} 1204 1205static inline unsigned 1206nir_src_num_components(nir_src src) 1207{ 1208 return src.ssa->num_components; 1209} 1210 1211static inline bool 1212nir_src_is_const(nir_src src) 1213{ 1214 return src.ssa->parent_instr->type == nir_instr_type_load_const; 1215} 1216 1217static inline bool 1218nir_src_is_undef(nir_src src) 1219{ 1220 return src.ssa->parent_instr->type == nir_instr_type_undef; 1221} 1222 1223bool nir_src_is_divergent(nir_src *src); 1224 1225/* Are all components the same, ie. .xxxx */ 1226static inline bool 1227nir_is_same_comp_swizzle(uint8_t *swiz, unsigned nr_comp) 1228{ 1229 for (unsigned i = 1; i < nr_comp; i++) 1230 if (swiz[i] != swiz[0]) 1231 return false; 1232 return true; 1233} 1234 1235/* Are all components sequential, ie. .yzw */ 1236static inline bool 1237nir_is_sequential_comp_swizzle(uint8_t *swiz, unsigned nr_comp) 1238{ 1239 for (unsigned i = 1; i < nr_comp; i++) 1240 if (swiz[i] != (swiz[0] + i)) 1241 return false; 1242 return true; 1243} 1244 1245/***/ 1246typedef struct nir_alu_src { 1247 /** Base source */ 1248 nir_src src; 1249 1250 /** 1251 * For each input component, says which component of the register it is 1252 * chosen from. 1253 * 1254 * Note that which elements of the swizzle are used and which are ignored 1255 * are based on the write mask for most opcodes - for example, a statement 1256 * like "foo.xzw = bar.zyx" would have a writemask of 1101b and a swizzle 1257 * of {2, 1, x, 0} where x means "don't care." 1258 */ 1259 uint8_t swizzle[NIR_MAX_VEC_COMPONENTS]; 1260} nir_alu_src; 1261 1262nir_alu_type 1263nir_get_nir_type_for_glsl_base_type(enum glsl_base_type base_type); 1264 1265static inline nir_alu_type 1266nir_get_nir_type_for_glsl_type(const struct glsl_type *type) 1267{ 1268 return nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(type)); 1269} 1270 1271enum glsl_base_type 1272nir_get_glsl_base_type_for_nir_type(nir_alu_type base_type); 1273 1274nir_op nir_type_conversion_op(nir_alu_type src, nir_alu_type dst, 1275 nir_rounding_mode rnd); 1276 1277/** 1278 * Atomic intrinsics perform different operations depending on the value of 1279 * their atomic_op constant index. nir_atomic_op defines the operations. 1280 */ 1281typedef enum { 1282 nir_atomic_op_iadd, 1283 nir_atomic_op_imin, 1284 nir_atomic_op_umin, 1285 nir_atomic_op_imax, 1286 nir_atomic_op_umax, 1287 nir_atomic_op_iand, 1288 nir_atomic_op_ior, 1289 nir_atomic_op_ixor, 1290 nir_atomic_op_xchg, 1291 nir_atomic_op_fadd, 1292 nir_atomic_op_fmin, 1293 nir_atomic_op_fmax, 1294 nir_atomic_op_cmpxchg, 1295 nir_atomic_op_fcmpxchg, 1296 nir_atomic_op_inc_wrap, 1297 nir_atomic_op_dec_wrap, 1298 nir_atomic_op_ordered_add_gfx12_amd, 1299} nir_atomic_op; 1300 1301static inline nir_alu_type 1302nir_atomic_op_type(nir_atomic_op op) 1303{ 1304 switch (op) { 1305 case nir_atomic_op_imin: 1306 case nir_atomic_op_imax: 1307 return nir_type_int; 1308 1309 case nir_atomic_op_fadd: 1310 case nir_atomic_op_fmin: 1311 case nir_atomic_op_fmax: 1312 case nir_atomic_op_fcmpxchg: 1313 return nir_type_float; 1314 1315 case nir_atomic_op_iadd: 1316 case nir_atomic_op_iand: 1317 case nir_atomic_op_ior: 1318 case nir_atomic_op_ixor: 1319 case nir_atomic_op_xchg: 1320 case nir_atomic_op_cmpxchg: 1321 case nir_atomic_op_umin: 1322 case nir_atomic_op_umax: 1323 case nir_atomic_op_inc_wrap: 1324 case nir_atomic_op_dec_wrap: 1325 case nir_atomic_op_ordered_add_gfx12_amd: 1326 return nir_type_uint; 1327 } 1328 1329 unreachable("Invalid nir_atomic_op"); 1330} 1331 1332nir_op 1333nir_atomic_op_to_alu(nir_atomic_op op); 1334 1335/** Returns nir_op_vec<num_components> or nir_op_mov if num_components == 1 1336 * 1337 * This is subtly different from nir_op_is_vec() which returns false for 1338 * nir_op_mov. Returning nir_op_mov from nir_op_vec() when num_components == 1 1339 * makes sense under the assumption that the num_components of the resulting 1340 * nir_def will same as what is passed in here because a single-component mov 1341 * is effectively a vec1. However, if alu->def.num_components > 1, nir_op_mov 1342 * has different semantics from nir_op_vec* so so code which detects "is this 1343 * a vec?" typically needs to handle nir_op_mov separate from nir_op_vecN. 1344 * 1345 * In the unlikely case where you can handle nir_op_vecN and nir_op_mov 1346 * together, use nir_op_is_vec_or_mov(). 1347 */ 1348nir_op 1349nir_op_vec(unsigned num_components); 1350 1351/** Returns true if this op is one of nir_op_vec* 1352 * 1353 * Returns false for nir_op_mov. See nir_op_vec() for more details. 1354 */ 1355bool 1356nir_op_is_vec(nir_op op); 1357 1358static inline bool 1359nir_op_is_vec_or_mov(nir_op op) 1360{ 1361 return op == nir_op_mov || nir_op_is_vec(op); 1362} 1363 1364static inline bool 1365nir_is_float_control_signed_zero_preserve(unsigned execution_mode, unsigned bit_size) 1366{ 1367 return (16 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP16) || 1368 (32 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP32) || 1369 (64 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP64); 1370} 1371 1372static inline bool 1373nir_is_float_control_inf_preserve(unsigned execution_mode, unsigned bit_size) 1374{ 1375 return (16 == bit_size && execution_mode & FLOAT_CONTROLS_INF_PRESERVE_FP16) || 1376 (32 == bit_size && execution_mode & FLOAT_CONTROLS_INF_PRESERVE_FP32) || 1377 (64 == bit_size && execution_mode & FLOAT_CONTROLS_INF_PRESERVE_FP64); 1378} 1379 1380static inline bool 1381nir_is_float_control_nan_preserve(unsigned execution_mode, unsigned bit_size) 1382{ 1383 return (16 == bit_size && execution_mode & FLOAT_CONTROLS_NAN_PRESERVE_FP16) || 1384 (32 == bit_size && execution_mode & FLOAT_CONTROLS_NAN_PRESERVE_FP32) || 1385 (64 == bit_size && execution_mode & FLOAT_CONTROLS_NAN_PRESERVE_FP64); 1386} 1387 1388static inline bool 1389nir_is_float_control_signed_zero_inf_nan_preserve(unsigned execution_mode, unsigned bit_size) 1390{ 1391 return (16 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16) || 1392 (32 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32) || 1393 (64 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP64); 1394} 1395 1396static inline bool 1397nir_is_denorm_flush_to_zero(unsigned execution_mode, unsigned bit_size) 1398{ 1399 return (16 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16) || 1400 (32 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32) || 1401 (64 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP64); 1402} 1403 1404static inline bool 1405nir_is_denorm_preserve(unsigned execution_mode, unsigned bit_size) 1406{ 1407 return (16 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP16) || 1408 (32 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP32) || 1409 (64 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP64); 1410} 1411 1412static inline bool 1413nir_is_rounding_mode_rtne(unsigned execution_mode, unsigned bit_size) 1414{ 1415 return (16 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16) || 1416 (32 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32) || 1417 (64 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64); 1418} 1419 1420static inline bool 1421nir_is_rounding_mode_rtz(unsigned execution_mode, unsigned bit_size) 1422{ 1423 return (16 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16) || 1424 (32 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32) || 1425 (64 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64); 1426} 1427 1428static inline bool 1429nir_has_any_rounding_mode_rtz(unsigned execution_mode) 1430{ 1431 return (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16) || 1432 (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32) || 1433 (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64); 1434} 1435 1436static inline bool 1437nir_has_any_rounding_mode_rtne(unsigned execution_mode) 1438{ 1439 return (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16) || 1440 (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32) || 1441 (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64); 1442} 1443 1444static inline nir_rounding_mode 1445nir_get_rounding_mode_from_float_controls(unsigned execution_mode, 1446 nir_alu_type type) 1447{ 1448 if (nir_alu_type_get_base_type(type) != nir_type_float) 1449 return nir_rounding_mode_undef; 1450 1451 unsigned bit_size = nir_alu_type_get_type_size(type); 1452 1453 if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) 1454 return nir_rounding_mode_rtz; 1455 if (nir_is_rounding_mode_rtne(execution_mode, bit_size)) 1456 return nir_rounding_mode_rtne; 1457 return nir_rounding_mode_undef; 1458} 1459 1460static inline bool 1461nir_has_any_rounding_mode_enabled(unsigned execution_mode) 1462{ 1463 bool result = 1464 nir_has_any_rounding_mode_rtne(execution_mode) || 1465 nir_has_any_rounding_mode_rtz(execution_mode); 1466 return result; 1467} 1468 1469typedef enum { 1470 /** 1471 * Operation where the first two sources are commutative. 1472 * 1473 * For 2-source operations, this just mathematical commutativity. Some 1474 * 3-source operations, like ffma, are only commutative in the first two 1475 * sources. 1476 */ 1477 NIR_OP_IS_2SRC_COMMUTATIVE = (1 << 0), 1478 1479 /** 1480 * Operation is associative 1481 */ 1482 NIR_OP_IS_ASSOCIATIVE = (1 << 1), 1483 1484 /** 1485 * Operation where src[0] is used to select src[1] on true or src[2] false. 1486 * src[0] may be Boolean, or it may be another type used in an implicit 1487 * comparison. 1488 */ 1489 NIR_OP_IS_SELECTION = (1 << 2), 1490} nir_op_algebraic_property; 1491 1492/* vec16 is the widest ALU op in NIR, making the max number of input of ALU 1493 * instructions to be the same as NIR_MAX_VEC_COMPONENTS. 1494 */ 1495#define NIR_ALU_MAX_INPUTS NIR_MAX_VEC_COMPONENTS 1496 1497/***/ 1498typedef struct nir_op_info { 1499 /** Name of the NIR ALU opcode */ 1500 const char *name; 1501 1502 /** Number of inputs (sources) */ 1503 uint8_t num_inputs; 1504 1505 /** 1506 * The number of components in the output 1507 * 1508 * If non-zero, this is the size of the output and input sizes are 1509 * explicitly given; swizzle and writemask are still in effect, but if 1510 * the output component is masked out, then the input component may 1511 * still be in use. 1512 * 1513 * If zero, the opcode acts in the standard, per-component manner; the 1514 * operation is performed on each component (except the ones that are 1515 * masked out) with the input being taken from the input swizzle for 1516 * that component. 1517 * 1518 * The size of some of the inputs may be given (i.e. non-zero) even 1519 * though output_size is zero; in that case, the inputs with a zero 1520 * size act per-component, while the inputs with non-zero size don't. 1521 */ 1522 uint8_t output_size; 1523 1524 /** 1525 * The type of vector that the instruction outputs. 1526 */ 1527 nir_alu_type output_type; 1528 1529 /** 1530 * The number of components in each input 1531 * 1532 * See nir_op_infos::output_size for more detail about the relationship 1533 * between input and output sizes. 1534 */ 1535 uint8_t input_sizes[NIR_ALU_MAX_INPUTS]; 1536 1537 /** 1538 * The type of vector that each input takes. 1539 */ 1540 nir_alu_type input_types[NIR_ALU_MAX_INPUTS]; 1541 1542 /** Algebraic properties of this opcode */ 1543 nir_op_algebraic_property algebraic_properties; 1544 1545 /** Whether this represents a numeric conversion opcode */ 1546 bool is_conversion; 1547} nir_op_info; 1548 1549/** Metadata for each nir_op, indexed by opcode */ 1550extern const nir_op_info nir_op_infos[nir_num_opcodes]; 1551 1552static inline bool 1553nir_op_is_selection(nir_op op) 1554{ 1555 return (nir_op_infos[op].algebraic_properties & NIR_OP_IS_SELECTION) != 0; 1556} 1557 1558/***/ 1559typedef struct nir_alu_instr { 1560 /** Base instruction */ 1561 nir_instr instr; 1562 1563 /** Opcode */ 1564 nir_op op; 1565 1566 /** Indicates that this ALU instruction generates an exact value 1567 * 1568 * This is kind of a mixture of GLSL "precise" and "invariant" and not 1569 * really equivalent to either. This indicates that the value generated by 1570 * this operation is high-precision and any code transformations that touch 1571 * it must ensure that the resulting value is bit-for-bit identical to the 1572 * original. 1573 */ 1574 bool exact : 1; 1575 1576 /** 1577 * Indicates that this instruction doese not cause signed integer wrapping 1578 * to occur, in the form of overflow or underflow. 1579 */ 1580 bool no_signed_wrap : 1; 1581 1582 /** 1583 * Indicates that this instruction does not cause unsigned integer wrapping 1584 * to occur, in the form of overflow or underflow. 1585 */ 1586 bool no_unsigned_wrap : 1; 1587 1588 /** 1589 * The float controls bit float_controls2 cares about. That is, 1590 * NAN/INF/SIGNED_ZERO_PRESERVE only. Allow{Contract,Reassoc,Transform} are 1591 * still handled through the exact bit, and the other float controls bits 1592 * (rounding mode and denorm handling) remain in the execution mode only. 1593 */ 1594 uint32_t fp_fast_math : 9; 1595 1596 /** Destination */ 1597 nir_def def; 1598 1599 /** Sources 1600 * 1601 * The size of the array is given by :c:member:`nir_op_info.num_inputs`. 1602 */ 1603 nir_alu_src src[]; 1604} nir_alu_instr; 1605 1606static inline bool 1607nir_alu_instr_is_signed_zero_preserve(nir_alu_instr *alu) 1608{ 1609 return nir_is_float_control_signed_zero_preserve(alu->fp_fast_math, alu->def.bit_size); 1610} 1611 1612static inline bool 1613nir_alu_instr_is_inf_preserve(nir_alu_instr *alu) 1614{ 1615 return nir_is_float_control_inf_preserve(alu->fp_fast_math, alu->def.bit_size); 1616} 1617 1618static inline bool 1619nir_alu_instr_is_nan_preserve(nir_alu_instr *alu) 1620{ 1621 return nir_is_float_control_nan_preserve(alu->fp_fast_math, alu->def.bit_size); 1622} 1623 1624static inline bool 1625nir_alu_instr_is_signed_zero_inf_nan_preserve(nir_alu_instr *alu) 1626{ 1627 return nir_is_float_control_signed_zero_inf_nan_preserve(alu->fp_fast_math, alu->def.bit_size); 1628} 1629 1630void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src); 1631 1632nir_component_mask_t 1633nir_alu_instr_src_read_mask(const nir_alu_instr *instr, unsigned src); 1634/** 1635 * Get the number of channels used for a source 1636 */ 1637unsigned 1638nir_ssa_alu_instr_src_components(const nir_alu_instr *instr, unsigned src); 1639 1640/* is this source channel used? */ 1641static inline bool 1642nir_alu_instr_channel_used(const nir_alu_instr *instr, unsigned src, 1643 unsigned channel) 1644{ 1645 return channel < nir_ssa_alu_instr_src_components(instr, src); 1646} 1647 1648bool 1649nir_alu_instr_is_comparison(const nir_alu_instr *instr); 1650 1651bool nir_const_value_negative_equal(nir_const_value c1, nir_const_value c2, 1652 nir_alu_type full_type); 1653 1654bool nir_alu_srcs_equal(const nir_alu_instr *alu1, const nir_alu_instr *alu2, 1655 unsigned src1, unsigned src2); 1656 1657bool nir_alu_srcs_negative_equal_typed(const nir_alu_instr *alu1, 1658 const nir_alu_instr *alu2, 1659 unsigned src1, unsigned src2, 1660 nir_alu_type base_type); 1661bool nir_alu_srcs_negative_equal(const nir_alu_instr *alu1, 1662 const nir_alu_instr *alu2, 1663 unsigned src1, unsigned src2); 1664 1665bool nir_alu_src_is_trivial_ssa(const nir_alu_instr *alu, unsigned srcn); 1666 1667typedef enum { 1668 nir_deref_type_var, 1669 nir_deref_type_array, 1670 nir_deref_type_array_wildcard, 1671 nir_deref_type_ptr_as_array, 1672 nir_deref_type_struct, 1673 nir_deref_type_cast, 1674} nir_deref_type; 1675 1676typedef struct { 1677 nir_instr instr; 1678 1679 /** The type of this deref instruction */ 1680 nir_deref_type deref_type; 1681 1682 /** Bitmask what modes the underlying variable might be 1683 * 1684 * For OpenCL-style generic pointers, we may not know exactly what mode it 1685 * is at any given point in time in the compile process. This bitfield 1686 * contains the set of modes which it MAY be. 1687 * 1688 * Generally, this field should not be accessed directly. Use one of the 1689 * nir_deref_mode_ helpers instead. 1690 */ 1691 nir_variable_mode modes; 1692 1693 /** The dereferenced type of the resulting pointer value */ 1694 const struct glsl_type *type; 1695 1696 union { 1697 /** Variable being dereferenced if deref_type is a deref_var */ 1698 nir_variable *var; 1699 1700 /** Parent deref if deref_type is not deref_var */ 1701 nir_src parent; 1702 }; 1703 1704 /** Additional deref parameters */ 1705 union { 1706 struct { 1707 nir_src index; 1708 bool in_bounds; 1709 } arr; 1710 1711 struct { 1712 unsigned index; 1713 } strct; 1714 1715 struct { 1716 unsigned ptr_stride; 1717 unsigned align_mul; 1718 unsigned align_offset; 1719 } cast; 1720 }; 1721 1722 /** Destination to store the resulting "pointer" */ 1723 nir_def def; 1724} nir_deref_instr; 1725 1726/** 1727 * Returns true if the cast is trivial, i.e. the source and destination type is 1728 * the same. 1729 */ 1730bool nir_deref_cast_is_trivial(nir_deref_instr *cast); 1731 1732/** Returns true if deref might have one of the given modes 1733 * 1734 * For multi-mode derefs, this returns true if any of the possible modes for 1735 * the deref to have any of the specified modes. This function returning true 1736 * does NOT mean that the deref definitely has one of those modes. It simply 1737 * means that, with the best information we have at the time, it might. 1738 */ 1739static inline bool 1740nir_deref_mode_may_be(const nir_deref_instr *deref, nir_variable_mode modes) 1741{ 1742 assert(!(modes & ~nir_var_all)); 1743 assert(deref->modes != 0); 1744 return deref->modes & modes; 1745} 1746 1747/** Returns true if deref must have one of the given modes 1748 * 1749 * For multi-mode derefs, this returns true if NIR can prove that the given 1750 * deref has one of the specified modes. This function returning false does 1751 * NOT mean that deref doesn't have one of the given mode. It very well may 1752 * have one of those modes, we just don't have enough information to prove 1753 * that it does for sure. 1754 */ 1755static inline bool 1756nir_deref_mode_must_be(const nir_deref_instr *deref, nir_variable_mode modes) 1757{ 1758 assert(!(modes & ~nir_var_all)); 1759 assert(deref->modes != 0); 1760 return !(deref->modes & ~modes); 1761} 1762 1763/** Returns true if deref has the given mode 1764 * 1765 * This returns true if the deref has exactly the mode specified. If the 1766 * deref may have that mode but may also have a different mode (i.e. modes has 1767 * multiple bits set), this will assert-fail. 1768 * 1769 * If you're confused about which nir_deref_mode_ helper to use, use this one 1770 * or nir_deref_mode_is_one_of below. 1771 */ 1772static inline bool 1773nir_deref_mode_is(const nir_deref_instr *deref, nir_variable_mode mode) 1774{ 1775 assert(util_bitcount(mode) == 1 && (mode & nir_var_all)); 1776 assert(deref->modes != 0); 1777 1778 /* This is only for "simple" cases so, if modes might interact with this 1779 * deref then the deref has to have a single mode. 1780 */ 1781 if (nir_deref_mode_may_be(deref, mode)) { 1782 assert(util_bitcount(deref->modes) == 1); 1783 assert(deref->modes == mode); 1784 } 1785 1786 return deref->modes == mode; 1787} 1788 1789/** Returns true if deref has one of the given modes 1790 * 1791 * This returns true if the deref has exactly one possible mode and that mode 1792 * is one of the modes specified. If the deref may have one of those modes 1793 * but may also have a different mode (i.e. modes has multiple bits set), this 1794 * will assert-fail. 1795 */ 1796static inline bool 1797nir_deref_mode_is_one_of(const nir_deref_instr *deref, nir_variable_mode modes) 1798{ 1799 /* This is only for "simple" cases so, if modes might interact with this 1800 * deref then the deref has to have a single mode. 1801 */ 1802 if (nir_deref_mode_may_be(deref, modes)) { 1803 assert(util_bitcount(deref->modes) == 1); 1804 assert(nir_deref_mode_must_be(deref, modes)); 1805 } 1806 1807 return nir_deref_mode_may_be(deref, modes); 1808} 1809 1810/** Returns true if deref's possible modes lie in the given set of modes 1811 * 1812 * This returns true if the deref's modes lie in the given set of modes. If 1813 * the deref's modes overlap with the specified modes but aren't entirely 1814 * contained in the specified set of modes, this will assert-fail. In 1815 * particular, if this is used in a generic pointers scenario, the specified 1816 * modes has to contain all or none of the possible generic pointer modes. 1817 * 1818 * This is intended mostly for mass-lowering of derefs which might have 1819 * generic pointers. 1820 */ 1821static inline bool 1822nir_deref_mode_is_in_set(const nir_deref_instr *deref, nir_variable_mode modes) 1823{ 1824 if (nir_deref_mode_may_be(deref, modes)) 1825 assert(nir_deref_mode_must_be(deref, modes)); 1826 1827 return nir_deref_mode_may_be(deref, modes); 1828} 1829 1830static inline nir_deref_instr *nir_src_as_deref(nir_src src); 1831 1832static inline nir_deref_instr * 1833nir_deref_instr_parent(const nir_deref_instr *instr) 1834{ 1835 if (instr->deref_type == nir_deref_type_var) 1836 return NULL; 1837 else 1838 return nir_src_as_deref(instr->parent); 1839} 1840 1841static inline nir_variable * 1842nir_deref_instr_get_variable(const nir_deref_instr *instr) 1843{ 1844 while (instr->deref_type != nir_deref_type_var) { 1845 if (instr->deref_type == nir_deref_type_cast) 1846 return NULL; 1847 1848 instr = nir_deref_instr_parent(instr); 1849 } 1850 1851 return instr->var; 1852} 1853 1854bool nir_deref_instr_has_indirect(nir_deref_instr *instr); 1855bool nir_deref_instr_is_known_out_of_bounds(nir_deref_instr *instr); 1856 1857typedef enum { 1858 nir_deref_instr_has_complex_use_allow_memcpy_src = (1 << 0), 1859 nir_deref_instr_has_complex_use_allow_memcpy_dst = (1 << 1), 1860 nir_deref_instr_has_complex_use_allow_atomics = (1 << 2), 1861} nir_deref_instr_has_complex_use_options; 1862 1863bool nir_deref_instr_has_complex_use(nir_deref_instr *instr, 1864 nir_deref_instr_has_complex_use_options opts); 1865 1866bool nir_deref_instr_remove_if_unused(nir_deref_instr *instr); 1867 1868unsigned nir_deref_instr_array_stride(nir_deref_instr *instr); 1869 1870typedef struct { 1871 nir_instr instr; 1872 1873 struct nir_function *callee; 1874 /* If this function call is indirect, the function pointer to call. 1875 * Otherwise, null initialized. 1876 */ 1877 nir_src indirect_callee; 1878 1879 unsigned num_params; 1880 nir_src params[]; 1881} nir_call_instr; 1882 1883#include "nir_intrinsics.h" 1884 1885#define NIR_INTRINSIC_MAX_CONST_INDEX 8 1886 1887/** Represents an intrinsic 1888 * 1889 * An intrinsic is an instruction type for handling things that are 1890 * more-or-less regular operations but don't just consume and produce SSA 1891 * values like ALU operations do. Intrinsics are not for things that have 1892 * special semantic meaning such as phi nodes and parallel copies. 1893 * Examples of intrinsics include variable load/store operations, system 1894 * value loads, and the like. Even though texturing more-or-less falls 1895 * under this category, texturing is its own instruction type because 1896 * trying to represent texturing with intrinsics would lead to a 1897 * combinatorial explosion of intrinsic opcodes. 1898 * 1899 * By having a single instruction type for handling a lot of different 1900 * cases, optimization passes can look for intrinsics and, for the most 1901 * part, completely ignore them. Each intrinsic type also has a few 1902 * possible flags that govern whether or not they can be reordered or 1903 * eliminated. That way passes like dead code elimination can still work 1904 * on intrisics without understanding the meaning of each. 1905 * 1906 * Each intrinsic has some number of constant indices, some number of 1907 * variables, and some number of sources. What these sources, variables, 1908 * and indices mean depends on the intrinsic and is documented with the 1909 * intrinsic declaration in nir_intrinsics.h. Intrinsics and texture 1910 * instructions are the only types of instruction that can operate on 1911 * variables. 1912 */ 1913typedef struct { 1914 nir_instr instr; 1915 1916 nir_intrinsic_op intrinsic; 1917 1918 nir_def def; 1919 1920 /** number of components if this is a vectorized intrinsic 1921 * 1922 * Similarly to ALU operations, some intrinsics are vectorized. 1923 * An intrinsic is vectorized if nir_intrinsic_infos.dest_components == 0. 1924 * For vectorized intrinsics, the num_components field specifies the 1925 * number of destination components and the number of source components 1926 * for all sources with nir_intrinsic_infos.src_components[i] == 0. 1927 */ 1928 uint8_t num_components; 1929 1930 int const_index[NIR_INTRINSIC_MAX_CONST_INDEX]; 1931 1932 /* a variable name associated with this instr; cannot be modified or freed */ 1933 const char *name; 1934 1935 nir_src src[]; 1936} nir_intrinsic_instr; 1937 1938static inline nir_variable * 1939nir_intrinsic_get_var(const nir_intrinsic_instr *intrin, unsigned i) 1940{ 1941 return nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[i])); 1942} 1943 1944typedef enum { 1945 /* Memory ordering. */ 1946 NIR_MEMORY_ACQUIRE = 1 << 0, 1947 NIR_MEMORY_RELEASE = 1 << 1, 1948 NIR_MEMORY_ACQ_REL = NIR_MEMORY_ACQUIRE | NIR_MEMORY_RELEASE, 1949 1950 /* Memory visibility operations. */ 1951 NIR_MEMORY_MAKE_AVAILABLE = 1 << 2, 1952 NIR_MEMORY_MAKE_VISIBLE = 1 << 3, 1953} nir_memory_semantics; 1954 1955/** 1956 * NIR intrinsics semantic flags 1957 * 1958 * information about what the compiler can do with the intrinsics. 1959 * 1960 * :c:member:`nir_intrinsic_info.flags` 1961 */ 1962typedef enum { 1963 /** 1964 * whether the intrinsic can be safely eliminated if none of its output 1965 * value is not being used. 1966 */ 1967 NIR_INTRINSIC_CAN_ELIMINATE = (1 << 0), 1968 1969 /** 1970 * Whether the intrinsic can be reordered with respect to any other 1971 * intrinsic, i.e. whether the only reordering dependencies of the 1972 * intrinsic are due to the register reads/writes. 1973 */ 1974 NIR_INTRINSIC_CAN_REORDER = (1 << 1), 1975} nir_intrinsic_semantic_flag; 1976 1977/** 1978 * Maximum valid value for a nir align_mul value (in intrinsics or derefs). 1979 * 1980 * Offsets can be signed, so this is the largest power of two in int32_t. 1981 */ 1982#define NIR_ALIGN_MUL_MAX 0x40000000 1983 1984typedef struct nir_io_semantics { 1985 unsigned location : 7; /* gl_vert_attrib, gl_varying_slot, or gl_frag_result */ 1986 unsigned num_slots : 6; /* max 32, may be pessimistic with const indexing */ 1987 unsigned dual_source_blend_index : 1; 1988 unsigned fb_fetch_output : 1; /* for GL_KHR_blend_equation_advanced */ 1989 unsigned fb_fetch_output_coherent : 1; 1990 unsigned gs_streams : 8; /* xxyyzzww: 2-bit stream index for each component */ 1991 unsigned medium_precision : 1; /* GLSL mediump qualifier */ 1992 unsigned per_view : 1; 1993 unsigned high_16bits : 1; /* whether accessing low or high half of the slot */ 1994 unsigned invariant : 1; /* The variable has the invariant flag set */ 1995 unsigned high_dvec2 : 1; /* whether accessing the high half of dvec3/dvec4 */ 1996 /* CLIP_DISTn, LAYER, VIEWPORT, and TESS_LEVEL_* have up to 3 uses: 1997 * - an output consumed by the next stage 1998 * - a system value output affecting fixed-func hardware, e.g. the clipper 1999 * - a transform feedback output written to memory 2000 * The following fields disable the first two. Transform feedback is disabled 2001 * by transform feedback info. 2002 */ 2003 unsigned no_varying : 1; /* whether this output isn't consumed by the next stage */ 2004 unsigned no_sysval_output : 1; /* whether this system value output has no 2005 effect due to current pipeline states */ 2006 unsigned interp_explicit_strict : 1; /* preserve original vertex order */ 2007} nir_io_semantics; 2008 2009/* Transform feedback info for 2 outputs. nir_intrinsic_store_output contains 2010 * this structure twice to support up to 4 outputs. The structure is limited 2011 * to 32 bits because it's stored in nir_intrinsic_instr::const_index[]. 2012 */ 2013typedef struct nir_io_xfb { 2014 struct { 2015 /* start_component is equal to the index of out[]; add 2 for io_xfb2 */ 2016 /* start_component is not relative to nir_intrinsic_component */ 2017 /* get the stream index from nir_io_semantics */ 2018 uint8_t num_components : 4; /* max 4; if this is 0, xfb is disabled */ 2019 uint8_t buffer : 4; /* buffer index, max 3 */ 2020 uint8_t offset; /* transform feedback buffer offset in dwords, 2021 max (1K - 4) bytes */ 2022 } out[2]; 2023} nir_io_xfb; 2024 2025unsigned 2026nir_instr_xfb_write_mask(nir_intrinsic_instr *instr); 2027 2028#define NIR_INTRINSIC_MAX_INPUTS 11 2029 2030typedef struct { 2031 const char *name; 2032 2033 /** number of register/SSA inputs */ 2034 uint8_t num_srcs; 2035 2036 /** number of components of each input register 2037 * 2038 * If this value is 0, the number of components is given by the 2039 * num_components field of nir_intrinsic_instr. If this value is -1, the 2040 * intrinsic consumes however many components are provided and it is not 2041 * validated at all. 2042 */ 2043 int8_t src_components[NIR_INTRINSIC_MAX_INPUTS]; 2044 2045 bool has_dest; 2046 2047 /** number of components of the output register 2048 * 2049 * If this value is 0, the number of components is given by the 2050 * num_components field of nir_intrinsic_instr. 2051 */ 2052 uint8_t dest_components; 2053 2054 /** bitfield of legal bit sizes */ 2055 uint8_t dest_bit_sizes; 2056 2057 /** source which the destination bit size must match 2058 * 2059 * Some intrinsics, such as subgroup intrinsics, are data manipulation 2060 * intrinsics and they have similar bit-size rules to ALU ops. This enables 2061 * validation to validate a bit more and enables auto-generated builder code 2062 * to properly determine destination bit sizes automatically. 2063 */ 2064 int8_t bit_size_src; 2065 2066 /** the number of constant indices used by the intrinsic */ 2067 uint8_t num_indices; 2068 2069 /** list of indices */ 2070 uint8_t indices[NIR_INTRINSIC_MAX_CONST_INDEX]; 2071 2072 /** indicates the usage of intr->const_index[n] */ 2073 uint8_t index_map[NIR_INTRINSIC_NUM_INDEX_FLAGS]; 2074 2075 /** semantic flags for calls to this intrinsic */ 2076 nir_intrinsic_semantic_flag flags; 2077} nir_intrinsic_info; 2078 2079extern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics]; 2080 2081unsigned 2082nir_intrinsic_src_components(const nir_intrinsic_instr *intr, unsigned srcn); 2083 2084unsigned 2085nir_intrinsic_dest_components(nir_intrinsic_instr *intr); 2086 2087nir_alu_type 2088nir_intrinsic_instr_src_type(const nir_intrinsic_instr *intrin, unsigned src); 2089 2090nir_alu_type 2091nir_intrinsic_instr_dest_type(const nir_intrinsic_instr *intrin); 2092 2093/** 2094 * Helper to copy const_index[] from src to dst, without assuming they 2095 * match in order. 2096 */ 2097void nir_intrinsic_copy_const_indices(nir_intrinsic_instr *dst, nir_intrinsic_instr *src); 2098 2099#include "nir_intrinsics_indices.h" 2100 2101static inline void 2102nir_intrinsic_set_align(nir_intrinsic_instr *intrin, 2103 unsigned align_mul, unsigned align_offset) 2104{ 2105 assert(util_is_power_of_two_nonzero(align_mul)); 2106 assert(align_offset < align_mul); 2107 nir_intrinsic_set_align_mul(intrin, align_mul); 2108 nir_intrinsic_set_align_offset(intrin, align_offset); 2109} 2110 2111/** Returns a simple alignment for an align_mul/offset pair 2112 * 2113 * This helper converts from the full mul+offset alignment scheme used by 2114 * most NIR intrinsics to a simple alignment. The returned value is the 2115 * largest power of two which divides both align_mul and align_offset. 2116 * For any offset X which satisfies the complex alignment described by 2117 * align_mul/offset, X % align == 0. 2118 */ 2119static inline uint32_t 2120nir_combined_align(uint32_t align_mul, uint32_t align_offset) 2121{ 2122 assert(util_is_power_of_two_nonzero(align_mul)); 2123 assert(align_offset < align_mul); 2124 return align_offset ? 1 << (ffs(align_offset) - 1) : align_mul; 2125} 2126 2127/** Returns a simple alignment for a load/store intrinsic offset 2128 * 2129 * Instead of the full mul+offset alignment scheme provided by the ALIGN_MUL 2130 * and ALIGN_OFFSET parameters, this helper takes both into account and 2131 * provides a single simple alignment parameter. The offset X is guaranteed 2132 * to satisfy X % align == 0. 2133 */ 2134static inline unsigned 2135nir_intrinsic_align(const nir_intrinsic_instr *intrin) 2136{ 2137 return nir_combined_align(nir_intrinsic_align_mul(intrin), 2138 nir_intrinsic_align_offset(intrin)); 2139} 2140 2141static inline bool 2142nir_intrinsic_has_align(const nir_intrinsic_instr *intrin) 2143{ 2144 return nir_intrinsic_has_align_mul(intrin) && 2145 nir_intrinsic_has_align_offset(intrin); 2146} 2147 2148unsigned 2149nir_image_intrinsic_coord_components(const nir_intrinsic_instr *instr); 2150 2151/* Converts a image_deref_* intrinsic into a image_* one */ 2152void nir_rewrite_image_intrinsic(nir_intrinsic_instr *instr, 2153 nir_def *handle, bool bindless); 2154 2155/* Determine if an intrinsic can be arbitrarily reordered and eliminated. */ 2156bool nir_intrinsic_can_reorder(nir_intrinsic_instr *instr); 2157 2158bool nir_intrinsic_writes_external_memory(const nir_intrinsic_instr *instr); 2159 2160static inline bool 2161nir_intrinsic_is_ray_query(nir_intrinsic_op intrinsic) 2162{ 2163 switch (intrinsic) { 2164 case nir_intrinsic_rq_confirm_intersection: 2165 case nir_intrinsic_rq_generate_intersection: 2166 case nir_intrinsic_rq_initialize: 2167 case nir_intrinsic_rq_load: 2168 case nir_intrinsic_rq_proceed: 2169 case nir_intrinsic_rq_terminate: 2170 return true; 2171 default: 2172 return false; 2173 } 2174} 2175 2176/** Texture instruction source type */ 2177typedef enum nir_tex_src_type { 2178 /** Texture coordinate 2179 * 2180 * Must have :c:member:`nir_tex_instr.coord_components` components. 2181 */ 2182 nir_tex_src_coord, 2183 2184 /** Projector 2185 * 2186 * The texture coordinate (except for the array component, if any) is 2187 * divided by this value before LOD computation and sampling. 2188 * 2189 * Must be a float scalar. 2190 */ 2191 nir_tex_src_projector, 2192 2193 /** Shadow comparator 2194 * 2195 * For shadow sampling, the fetched texel values are compared against the 2196 * shadow comparator using the compare op specified by the sampler object 2197 * and converted to 1.0 if the comparison succeeds and 0.0 if it fails. 2198 * Interpolation happens after this conversion so the actual result may be 2199 * anywhere in the range [0.0, 1.0]. 2200 * 2201 * Only valid if :c:member:`nir_tex_instr.is_shadow` and must be a float 2202 * scalar. 2203 */ 2204 nir_tex_src_comparator, 2205 2206 /** Coordinate offset 2207 * 2208 * An integer value that is added to the texel address before sampling. 2209 * This is only allowed with operations that take an explicit LOD as it is 2210 * applied in integer texel space after LOD selection and not normalized 2211 * coordinate space. 2212 */ 2213 nir_tex_src_offset, 2214 2215 /** LOD bias 2216 * 2217 * This value is added to the computed LOD before mip-mapping. 2218 */ 2219 nir_tex_src_bias, 2220 2221 /** Explicit LOD */ 2222 nir_tex_src_lod, 2223 2224 /** Min LOD 2225 * 2226 * The computed LOD is clamped to be at least as large as min_lod before 2227 * mip-mapping. 2228 */ 2229 nir_tex_src_min_lod, 2230 2231 /** MSAA sample index */ 2232 nir_tex_src_ms_index, 2233 2234 /** Intel-specific MSAA compression data */ 2235 nir_tex_src_ms_mcs_intel, 2236 2237 /** Explicit horizontal (X-major) coordinate derivative */ 2238 nir_tex_src_ddx, 2239 2240 /** Explicit vertical (Y-major) coordinate derivative */ 2241 nir_tex_src_ddy, 2242 2243 /** Texture variable dereference */ 2244 nir_tex_src_texture_deref, 2245 2246 /** Sampler variable dereference */ 2247 nir_tex_src_sampler_deref, 2248 2249 /** Texture index offset 2250 * 2251 * This is added to :c:member:`nir_tex_instr.texture_index`. Unless 2252 * :c:member:`nir_tex_instr.texture_non_uniform` is set, this is guaranteed 2253 * to be dynamically uniform. 2254 */ 2255 nir_tex_src_texture_offset, 2256 2257 /** Dynamically uniform sampler index offset 2258 * 2259 * This is added to :c:member:`nir_tex_instr.sampler_index`. Unless 2260 * :c:member:`nir_tex_instr.sampler_non_uniform` is set, this is guaranteed to be 2261 * dynamically uniform. This should not be present until GLSL ES 3.20, GLSL 2262 * 4.00, or ARB_gpu_shader5, because in ES 3.10 and GL 3.30 samplers said 2263 * "When aggregated into arrays within a shader, samplers can only be indexed 2264 * with a constant integral expression." 2265 */ 2266 nir_tex_src_sampler_offset, 2267 2268 /** Bindless texture handle 2269 * 2270 * This is, unfortunately, a bit overloaded at the moment. There are 2271 * generally two types of bindless handles: 2272 * 2273 * 1. For GL_ARB_bindless bindless handles. These are part of the 2274 * GL/Gallium-level API and are always a 64-bit integer. 2275 * 2276 * 2. HW-specific handles. GL_ARB_bindless handles may be lowered to 2277 * these. Also, these are used by many Vulkan drivers to implement 2278 * descriptor sets, especially for UPDATE_AFTER_BIND descriptors. 2279 * The details of hardware handles (bit size, format, etc.) is 2280 * HW-specific. 2281 * 2282 * Because of this overloading and the resulting ambiguity, we currently 2283 * don't validate anything for these. 2284 */ 2285 nir_tex_src_texture_handle, 2286 2287 /** Bindless sampler handle 2288 * 2289 * See nir_tex_src_texture_handle, 2290 */ 2291 nir_tex_src_sampler_handle, 2292 2293 /** Tex src intrinsic 2294 * 2295 * This is an intrinsic used before function inlining i.e. before we know 2296 * if a bindless value has been given as function param for use as a tex 2297 * src. 2298 */ 2299 nir_tex_src_sampler_deref_intrinsic, 2300 nir_tex_src_texture_deref_intrinsic, 2301 2302 /** Plane index for multi-plane YCbCr textures */ 2303 nir_tex_src_plane, 2304 2305 /** 2306 * Backend-specific vec4 tex src argument. 2307 * 2308 * Can be used to have NIR optimization (copy propagation, lower_vec_to_regs) 2309 * apply to the packing of the tex srcs. This lowering must only happen 2310 * after nir_lower_tex(). 2311 * 2312 * The nir_tex_instr_src_type() of this argument is float, so no lowering 2313 * will happen if nir_lower_int_to_float is used. 2314 */ 2315 nir_tex_src_backend1, 2316 2317 /** Second backend-specific vec4 tex src argument, see nir_tex_src_backend1. */ 2318 nir_tex_src_backend2, 2319 2320 nir_num_tex_src_types 2321} nir_tex_src_type; 2322 2323/** A texture instruction source */ 2324typedef struct nir_tex_src { 2325 /** Base source */ 2326 nir_src src; 2327 2328 /** Type of this source */ 2329 nir_tex_src_type src_type; 2330} nir_tex_src; 2331 2332/** Texture instruction opcode */ 2333typedef enum nir_texop { 2334 /** Regular texture look-up */ 2335 nir_texop_tex, 2336 /** Texture look-up with LOD bias */ 2337 nir_texop_txb, 2338 /** Texture look-up with explicit LOD */ 2339 nir_texop_txl, 2340 /** Texture look-up with partial derivatives */ 2341 nir_texop_txd, 2342 /** Texel fetch with explicit LOD */ 2343 nir_texop_txf, 2344 /** Multisample texture fetch */ 2345 nir_texop_txf_ms, 2346 /** Multisample texture fetch from framebuffer */ 2347 nir_texop_txf_ms_fb, 2348 /** Multisample compression value fetch */ 2349 nir_texop_txf_ms_mcs_intel, 2350 /** Texture size */ 2351 nir_texop_txs, 2352 /** Texture lod query */ 2353 nir_texop_lod, 2354 /** Texture gather */ 2355 nir_texop_tg4, 2356 /** Texture levels query */ 2357 nir_texop_query_levels, 2358 /** Texture samples query */ 2359 nir_texop_texture_samples, 2360 /** Query whether all samples are definitely identical. */ 2361 nir_texop_samples_identical, 2362 /** Regular texture look-up, eligible for pre-dispatch */ 2363 nir_texop_tex_prefetch, 2364 /** Multisample fragment color texture fetch */ 2365 nir_texop_fragment_fetch_amd, 2366 /** Multisample fragment mask texture fetch */ 2367 nir_texop_fragment_mask_fetch_amd, 2368 /** Returns a buffer or image descriptor. */ 2369 nir_texop_descriptor_amd, 2370 /** Returns a sampler descriptor. */ 2371 nir_texop_sampler_descriptor_amd, 2372 /** Returns the sampler's LOD bias */ 2373 nir_texop_lod_bias_agx, 2374 /** Returns a bool indicating that the sampler uses a custom border colour */ 2375 nir_texop_has_custom_border_color_agx, 2376 /** Returns the sampler's custom border colour (if has_custom_border_agx) */ 2377 nir_texop_custom_border_color_agx, 2378 /** Maps to TXQ.DIMENSION */ 2379 nir_texop_hdr_dim_nv, 2380 /** Maps to TXQ.TEXTURE_TYPE */ 2381 nir_texop_tex_type_nv, 2382} nir_texop; 2383 2384/** Represents a texture instruction */ 2385typedef struct nir_tex_instr { 2386 /** Base instruction */ 2387 nir_instr instr; 2388 2389 /** Dimensionality of the texture operation 2390 * 2391 * This will typically match the dimensionality of the texture deref type 2392 * if a nir_tex_src_texture_deref is present. However, it may not if 2393 * texture lowering has occurred. 2394 */ 2395 enum glsl_sampler_dim sampler_dim; 2396 2397 /** ALU type of the destination 2398 * 2399 * This is the canonical sampled type for this texture operation and may 2400 * not exactly match the sampled type of the deref type when a 2401 * nir_tex_src_texture_deref is present. For OpenCL, the sampled type of 2402 * the texture deref will be GLSL_TYPE_VOID and this is allowed to be 2403 * anything. With SPIR-V, the signedness of integer types is allowed to 2404 * differ. For all APIs, the bit size may differ if the driver has done 2405 * any sort of mediump or similar lowering since texture types always have 2406 * 32-bit sampled types. 2407 */ 2408 nir_alu_type dest_type; 2409 2410 /** Texture opcode */ 2411 nir_texop op; 2412 2413 /** Destination */ 2414 nir_def def; 2415 2416 /** Array of sources 2417 * 2418 * This array has :c:member:`nir_tex_instr.num_srcs` elements 2419 */ 2420 nir_tex_src *src; 2421 2422 /** Number of sources */ 2423 unsigned num_srcs; 2424 2425 /** Number of components in the coordinate, if any */ 2426 unsigned coord_components; 2427 2428 /** True if the texture instruction acts on an array texture */ 2429 bool is_array; 2430 2431 /** True if the texture instruction performs a shadow comparison 2432 * 2433 * If this is true, the texture instruction must have a 2434 * nir_tex_src_comparator. 2435 */ 2436 bool is_shadow; 2437 2438 /** 2439 * If is_shadow is true, whether this is the old-style shadow that outputs 2440 * 4 components or the new-style shadow that outputs 1 component. 2441 */ 2442 bool is_new_style_shadow; 2443 2444 /** 2445 * True if this texture instruction should return a sparse residency code. 2446 * The code is in the last component of the result. 2447 */ 2448 bool is_sparse; 2449 2450 /** nir_texop_tg4 component selector 2451 * 2452 * This determines which RGBA component is gathered. 2453 */ 2454 unsigned component : 2; 2455 2456 /** Validation needs to know this for gradient component count */ 2457 unsigned array_is_lowered_cube : 1; 2458 2459 /** True if this tg4 instruction has an implicit LOD or LOD bias, instead of using level 0 */ 2460 unsigned is_gather_implicit_lod : 1; 2461 2462 /** Gather offsets */ 2463 int8_t tg4_offsets[4][2]; 2464 2465 /** True if the texture index or handle is not dynamically uniform */ 2466 bool texture_non_uniform; 2467 2468 /** True if the sampler index or handle is not dynamically uniform. 2469 * 2470 * This may be set when VK_EXT_descriptor_indexing is supported and the 2471 * appropriate capability is enabled. 2472 * 2473 * This should always be false in GLSL (GLSL ES 3.20 says "When aggregated 2474 * into arrays within a shader, opaque types can only be indexed with a 2475 * dynamically uniform integral expression", and GLSL 4.60 says "When 2476 * aggregated into arrays within a shader, [texture, sampler, and 2477 * samplerShadow] types can only be indexed with a dynamically uniform 2478 * expression, or texture lookup will result in undefined values."). 2479 */ 2480 bool sampler_non_uniform; 2481 2482 /** The texture index 2483 * 2484 * If this texture instruction has a nir_tex_src_texture_offset source, 2485 * then the texture index is given by texture_index + texture_offset. 2486 */ 2487 unsigned texture_index; 2488 2489 /** The sampler index 2490 * 2491 * The following operations do not require a sampler and, as such, this 2492 * field should be ignored: 2493 * 2494 * - nir_texop_txf 2495 * - nir_texop_txf_ms 2496 * - nir_texop_txs 2497 * - nir_texop_query_levels 2498 * - nir_texop_texture_samples 2499 * - nir_texop_samples_identical 2500 * 2501 * If this texture instruction has a nir_tex_src_sampler_offset source, 2502 * then the sampler index is given by sampler_index + sampler_offset. 2503 */ 2504 unsigned sampler_index; 2505 2506 /* Back-end specific flags, intended to be used in combination with 2507 * nir_tex_src_backend1/2 to provide additional hw-specific information 2508 * to the back-end compiler. 2509 */ 2510 uint32_t backend_flags; 2511} nir_tex_instr; 2512 2513/** 2514 * Returns true if the texture operation requires a sampler as a general rule 2515 * 2516 * Note that the specific hw/driver backend could require to a sampler 2517 * object/configuration packet in any case, for some other reason. 2518 * 2519 * See also :c:member:`nir_tex_instr.sampler_index`. 2520 */ 2521bool nir_tex_instr_need_sampler(const nir_tex_instr *instr); 2522 2523/** Returns the number of components returned by this nir_tex_instr 2524 * 2525 * Useful for code building texture instructions when you don't want to think 2526 * about how many components a particular texture op returns. This does not 2527 * include the sparse residency code. 2528 */ 2529unsigned 2530nir_tex_instr_result_size(const nir_tex_instr *instr); 2531 2532/** 2533 * Returns the destination size of this nir_tex_instr including the sparse 2534 * residency code, if any. 2535 */ 2536static inline unsigned 2537nir_tex_instr_dest_size(const nir_tex_instr *instr) 2538{ 2539 /* One more component is needed for the residency code. */ 2540 return nir_tex_instr_result_size(instr) + instr->is_sparse; 2541} 2542 2543/** 2544 * Returns true if this texture operation queries something about the texture 2545 * rather than actually sampling it. 2546 */ 2547bool 2548nir_tex_instr_is_query(const nir_tex_instr *instr); 2549 2550/** Returns true if this texture instruction does implicit derivatives 2551 * 2552 * This is important as there are extra control-flow rules around derivatives 2553 * and texture instructions which perform them implicitly. 2554 */ 2555bool 2556nir_tex_instr_has_implicit_derivative(const nir_tex_instr *instr); 2557 2558/** Returns the ALU type of the given texture instruction source */ 2559nir_alu_type 2560nir_tex_instr_src_type(const nir_tex_instr *instr, unsigned src); 2561 2562/** 2563 * Returns the number of components required by the given texture instruction 2564 * source 2565 */ 2566unsigned 2567nir_tex_instr_src_size(const nir_tex_instr *instr, unsigned src); 2568 2569/** 2570 * Returns the index of the texture instruction source with the given 2571 * nir_tex_src_type or -1 if no such source exists. 2572 */ 2573static inline int 2574nir_tex_instr_src_index(const nir_tex_instr *instr, nir_tex_src_type type) 2575{ 2576 for (unsigned i = 0; i < instr->num_srcs; i++) 2577 if (instr->src[i].src_type == type) 2578 return (int)i; 2579 2580 return -1; 2581} 2582 2583/** Adds a source to a texture instruction */ 2584void nir_tex_instr_add_src(nir_tex_instr *tex, 2585 nir_tex_src_type src_type, 2586 nir_def *src); 2587 2588/** Removes a source from a texture instruction */ 2589void nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx); 2590 2591bool nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex); 2592 2593typedef struct { 2594 nir_instr instr; 2595 2596 nir_def def; 2597 2598 nir_const_value value[]; 2599} nir_load_const_instr; 2600 2601typedef enum { 2602 /** Return from a function 2603 * 2604 * This instruction is a classic function return. It jumps to 2605 * nir_function_impl::end_block. No return value is provided in this 2606 * instruction. Instead, the function is expected to write any return 2607 * data to a deref passed in from the caller. 2608 */ 2609 nir_jump_return, 2610 2611 /** Immediately exit the current shader 2612 * 2613 * This instruction is roughly the equivalent of C's "exit()" in that it 2614 * immediately terminates the current shader invocation. From a CFG 2615 * perspective, it looks like a jump to nir_function_impl::end_block but 2616 * it actually jumps to the end block of the shader entrypoint. A halt 2617 * instruction in the shader entrypoint itself is semantically identical 2618 * to a return. 2619 * 2620 * For shaders with built-in I/O, any outputs written prior to a halt 2621 * instruction remain written and any outputs not written prior to the 2622 * halt have undefined values. It does NOT cause an implicit discard of 2623 * written results. If one wants discard results in a fragment shader, 2624 * for instance, a discard or demote intrinsic is required. 2625 */ 2626 nir_jump_halt, 2627 2628 /** Break out of the inner-most loop 2629 * 2630 * This has the same semantics as C's "break" statement. 2631 */ 2632 nir_jump_break, 2633 2634 /** Jump back to the top of the inner-most loop 2635 * 2636 * This has the same semantics as C's "continue" statement assuming that a 2637 * NIR loop is implemented as "while (1) { body }". 2638 */ 2639 nir_jump_continue, 2640 2641 /** Jumps for unstructured CFG. 2642 * 2643 * As within an unstructured CFG we can't rely on block ordering we need to 2644 * place explicit jumps at the end of every block. 2645 */ 2646 nir_jump_goto, 2647 nir_jump_goto_if, 2648} nir_jump_type; 2649 2650typedef struct { 2651 nir_instr instr; 2652 nir_jump_type type; 2653 nir_src condition; 2654 struct nir_block *target; 2655 struct nir_block *else_target; 2656} nir_jump_instr; 2657 2658/* creates a new SSA variable in an undefined state */ 2659 2660typedef struct { 2661 nir_instr instr; 2662 nir_def def; 2663} nir_undef_instr; 2664 2665typedef struct { 2666 struct exec_node node; 2667 2668 /* The predecessor block corresponding to this source */ 2669 struct nir_block *pred; 2670 2671 nir_src src; 2672} nir_phi_src; 2673 2674#define nir_foreach_phi_src(phi_src, phi) \ 2675 foreach_list_typed(nir_phi_src, phi_src, node, &(phi)->srcs) 2676#define nir_foreach_phi_src_safe(phi_src, phi) \ 2677 foreach_list_typed_safe(nir_phi_src, phi_src, node, &(phi)->srcs) 2678 2679typedef struct { 2680 nir_instr instr; 2681 2682 /** list of nir_phi_src */ 2683 struct exec_list srcs; 2684 2685 nir_def def; 2686} nir_phi_instr; 2687 2688static inline nir_phi_src * 2689nir_phi_get_src_from_block(nir_phi_instr *phi, struct nir_block *block) 2690{ 2691 nir_foreach_phi_src(src, phi) { 2692 if (src->pred == block) 2693 return src; 2694 } 2695 2696 assert(!"Block is not a predecessor of phi."); 2697 return NULL; 2698} 2699 2700typedef struct { 2701 struct exec_node node; 2702 bool src_is_reg; 2703 bool dest_is_reg; 2704 nir_src src; 2705 union { 2706 nir_def def; 2707 nir_src reg; 2708 } dest; 2709} nir_parallel_copy_entry; 2710 2711#define nir_foreach_parallel_copy_entry(entry, pcopy) \ 2712 foreach_list_typed(nir_parallel_copy_entry, entry, node, &(pcopy)->entries) 2713 2714typedef struct { 2715 nir_instr instr; 2716 2717 /* A list of nir_parallel_copy_entrys. The sources of all of the 2718 * entries are copied to the corresponding destinations "in parallel". 2719 * In other words, if we have two entries: a -> b and b -> a, the values 2720 * get swapped. 2721 */ 2722 struct exec_list entries; 2723} nir_parallel_copy_instr; 2724 2725typedef enum nir_debug_info_type { 2726 nir_debug_info_src_loc, 2727 nir_debug_info_string, 2728} nir_debug_info_type; 2729 2730typedef enum nir_debug_info_source { 2731 nir_debug_info_spirv, 2732 nir_debug_info_nir, 2733} nir_debug_info_source; 2734 2735typedef struct nir_debug_info_instr { 2736 nir_instr instr; 2737 2738 nir_debug_info_type type; 2739 2740 union { 2741 struct { 2742 nir_src filename; 2743 /* 0 if only the spirv_offset is available. */ 2744 uint32_t line; 2745 uint32_t column; 2746 2747 uint32_t spirv_offset; 2748 2749 nir_debug_info_source source; 2750 } src_loc; 2751 2752 uint16_t string_length; 2753 }; 2754 2755 nir_def def; 2756 2757 char string[]; 2758} nir_debug_info_instr; 2759 2760NIR_DEFINE_CAST(nir_instr_as_alu, nir_instr, nir_alu_instr, instr, 2761 type, nir_instr_type_alu) 2762NIR_DEFINE_CAST(nir_instr_as_deref, nir_instr, nir_deref_instr, instr, 2763 type, nir_instr_type_deref) 2764NIR_DEFINE_CAST(nir_instr_as_call, nir_instr, nir_call_instr, instr, 2765 type, nir_instr_type_call) 2766NIR_DEFINE_CAST(nir_instr_as_jump, nir_instr, nir_jump_instr, instr, 2767 type, nir_instr_type_jump) 2768NIR_DEFINE_CAST(nir_instr_as_tex, nir_instr, nir_tex_instr, instr, 2769 type, nir_instr_type_tex) 2770NIR_DEFINE_CAST(nir_instr_as_intrinsic, nir_instr, nir_intrinsic_instr, instr, 2771 type, nir_instr_type_intrinsic) 2772NIR_DEFINE_CAST(nir_instr_as_load_const, nir_instr, nir_load_const_instr, instr, 2773 type, nir_instr_type_load_const) 2774NIR_DEFINE_CAST(nir_instr_as_undef, nir_instr, nir_undef_instr, instr, 2775 type, nir_instr_type_undef) 2776NIR_DEFINE_CAST(nir_instr_as_phi, nir_instr, nir_phi_instr, instr, 2777 type, nir_instr_type_phi) 2778NIR_DEFINE_CAST(nir_instr_as_parallel_copy, nir_instr, 2779 nir_parallel_copy_instr, instr, 2780 type, nir_instr_type_parallel_copy) 2781NIR_DEFINE_CAST(nir_instr_as_debug_info, nir_instr, 2782 nir_debug_info_instr, instr, 2783 type, nir_instr_type_debug_info) 2784 2785#define NIR_DEFINE_SRC_AS_CONST(type, suffix) \ 2786 static inline type \ 2787 nir_src_comp_as_##suffix(nir_src src, unsigned comp) \ 2788 { \ 2789 assert(nir_src_is_const(src)); \ 2790 nir_load_const_instr *load = \ 2791 nir_instr_as_load_const(src.ssa->parent_instr); \ 2792 assert(comp < load->def.num_components); \ 2793 return nir_const_value_as_##suffix(load->value[comp], \ 2794 load->def.bit_size); \ 2795 } \ 2796 \ 2797 static inline type \ 2798 nir_src_as_##suffix(nir_src src) \ 2799 { \ 2800 assert(nir_src_num_components(src) == 1); \ 2801 return nir_src_comp_as_##suffix(src, 0); \ 2802 } 2803 2804NIR_DEFINE_SRC_AS_CONST(int64_t, int) 2805NIR_DEFINE_SRC_AS_CONST(uint64_t, uint) 2806NIR_DEFINE_SRC_AS_CONST(bool, bool) 2807NIR_DEFINE_SRC_AS_CONST(double, float) 2808 2809#undef NIR_DEFINE_SRC_AS_CONST 2810 2811typedef struct { 2812 nir_def *def; 2813 unsigned comp; 2814} nir_scalar; 2815 2816static inline bool 2817nir_scalar_is_const(nir_scalar s) 2818{ 2819 return s.def->parent_instr->type == nir_instr_type_load_const; 2820} 2821 2822static inline bool 2823nir_scalar_is_undef(nir_scalar s) 2824{ 2825 return s.def->parent_instr->type == nir_instr_type_undef; 2826} 2827 2828static inline nir_const_value 2829nir_scalar_as_const_value(nir_scalar s) 2830{ 2831 assert(s.comp < s.def->num_components); 2832 nir_load_const_instr *load = nir_instr_as_load_const(s.def->parent_instr); 2833 return load->value[s.comp]; 2834} 2835 2836#define NIR_DEFINE_SCALAR_AS_CONST(type, suffix) \ 2837 static inline type \ 2838 nir_scalar_as_##suffix(nir_scalar s) \ 2839 { \ 2840 return nir_const_value_as_##suffix( \ 2841 nir_scalar_as_const_value(s), s.def->bit_size); \ 2842 } 2843 2844NIR_DEFINE_SCALAR_AS_CONST(int64_t, int) 2845NIR_DEFINE_SCALAR_AS_CONST(uint64_t, uint) 2846NIR_DEFINE_SCALAR_AS_CONST(bool, bool) 2847NIR_DEFINE_SCALAR_AS_CONST(double, float) 2848 2849#undef NIR_DEFINE_SCALAR_AS_CONST 2850 2851static inline bool 2852nir_scalar_is_alu(nir_scalar s) 2853{ 2854 return s.def->parent_instr->type == nir_instr_type_alu; 2855} 2856 2857static inline nir_op 2858nir_scalar_alu_op(nir_scalar s) 2859{ 2860 return nir_instr_as_alu(s.def->parent_instr)->op; 2861} 2862 2863static inline bool 2864nir_scalar_is_intrinsic(nir_scalar s) 2865{ 2866 return s.def->parent_instr->type == nir_instr_type_intrinsic; 2867} 2868 2869static inline nir_intrinsic_op 2870nir_scalar_intrinsic_op(nir_scalar s) 2871{ 2872 return nir_instr_as_intrinsic(s.def->parent_instr)->intrinsic; 2873} 2874 2875static inline nir_scalar 2876nir_scalar_chase_alu_src(nir_scalar s, unsigned alu_src_idx) 2877{ 2878 nir_scalar out = { NULL, 0 }; 2879 2880 nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr); 2881 assert(alu_src_idx < nir_op_infos[alu->op].num_inputs); 2882 2883 /* Our component must be written */ 2884 assert(s.comp < s.def->num_components); 2885 2886 out.def = alu->src[alu_src_idx].src.ssa; 2887 2888 if (nir_op_infos[alu->op].input_sizes[alu_src_idx] == 0) { 2889 /* The ALU src is unsized so the source component follows the 2890 * destination component. 2891 */ 2892 out.comp = alu->src[alu_src_idx].swizzle[s.comp]; 2893 } else { 2894 /* This is a sized source so all source components work together to 2895 * produce all the destination components. Since we need to return a 2896 * scalar, this only works if the source is a scalar. 2897 */ 2898 assert(nir_op_infos[alu->op].input_sizes[alu_src_idx] == 1); 2899 out.comp = alu->src[alu_src_idx].swizzle[0]; 2900 } 2901 assert(out.comp < out.def->num_components); 2902 2903 return out; 2904} 2905 2906nir_scalar nir_scalar_chase_movs(nir_scalar s); 2907 2908static inline nir_scalar 2909nir_get_scalar(nir_def *def, unsigned channel) 2910{ 2911 nir_scalar s = { def, channel }; 2912 return s; 2913} 2914 2915/** Returns a nir_scalar where we've followed the bit-exact mov/vec use chain to the original definition */ 2916static inline nir_scalar 2917nir_scalar_resolved(nir_def *def, unsigned channel) 2918{ 2919 return nir_scalar_chase_movs(nir_get_scalar(def, channel)); 2920} 2921 2922static inline bool 2923nir_scalar_equal(nir_scalar s1, nir_scalar s2) 2924{ 2925 return s1.def == s2.def && s1.comp == s2.comp; 2926} 2927 2928static inline uint64_t 2929nir_alu_src_as_uint(nir_alu_src src) 2930{ 2931 nir_scalar scalar = nir_get_scalar(src.src.ssa, src.swizzle[0]); 2932 return nir_scalar_as_uint(scalar); 2933} 2934 2935typedef struct { 2936 bool success; 2937 2938 nir_variable *var; 2939 unsigned desc_set; 2940 unsigned binding; 2941 unsigned num_indices; 2942 nir_src indices[4]; 2943 bool read_first_invocation; 2944} nir_binding; 2945 2946nir_binding nir_chase_binding(nir_src rsrc); 2947nir_variable *nir_get_binding_variable(struct nir_shader *shader, nir_binding binding); 2948 2949/* 2950 * Control flow 2951 * 2952 * Control flow consists of a tree of control flow nodes, which include 2953 * if-statements and loops. The leaves of the tree are basic blocks, lists of 2954 * instructions that always run start-to-finish. Each basic block also keeps 2955 * track of its successors (blocks which may run immediately after the current 2956 * block) and predecessors (blocks which could have run immediately before the 2957 * current block). Each function also has a start block and an end block which 2958 * all return statements point to (which is always empty). Together, all the 2959 * blocks with their predecessors and successors make up the control flow 2960 * graph (CFG) of the function. There are helpers that modify the tree of 2961 * control flow nodes while modifying the CFG appropriately; these should be 2962 * used instead of modifying the tree directly. 2963 */ 2964 2965typedef enum { 2966 nir_cf_node_block, 2967 nir_cf_node_if, 2968 nir_cf_node_loop, 2969 nir_cf_node_function 2970} nir_cf_node_type; 2971 2972typedef struct nir_cf_node { 2973 struct exec_node node; 2974 nir_cf_node_type type; 2975 struct nir_cf_node *parent; 2976} nir_cf_node; 2977 2978typedef struct nir_block { 2979 nir_cf_node cf_node; 2980 2981 /** list of nir_instr */ 2982 struct exec_list instr_list; 2983 2984 /** generic block index; generated by nir_index_blocks */ 2985 unsigned index; 2986 2987 /* This indicates whether the block or any parent block is executed 2988 * conditionally and whether the condition uses a divergent value. 2989 */ 2990 bool divergent; 2991 2992 /* 2993 * Each block can only have up to 2 successors, so we put them in a simple 2994 * array - no need for anything more complicated. 2995 */ 2996 struct nir_block *successors[2]; 2997 2998 /* Set of nir_block predecessors in the CFG */ 2999 struct set *predecessors; 3000 3001 /* 3002 * this node's immediate dominator in the dominance tree - set to NULL for 3003 * the start block and any unreachable blocks. 3004 */ 3005 struct nir_block *imm_dom; 3006 3007 /* This node's children in the dominance tree */ 3008 unsigned num_dom_children; 3009 struct nir_block **dom_children; 3010 3011 /* Set of nir_blocks on the dominance frontier of this block */ 3012 struct set *dom_frontier; 3013 3014 /* 3015 * These two indices have the property that dom_{pre,post}_index for each 3016 * child of this block in the dominance tree will always be between 3017 * dom_pre_index and dom_post_index for this block, which makes testing if 3018 * a given block is dominated by another block an O(1) operation. 3019 */ 3020 uint32_t dom_pre_index, dom_post_index; 3021 3022 /** 3023 * Value just before the first nir_instr->index in the block, but after 3024 * end_ip that of any predecessor block. 3025 */ 3026 uint32_t start_ip; 3027 /** 3028 * Value just after the last nir_instr->index in the block, but before the 3029 * start_ip of any successor block. 3030 */ 3031 uint32_t end_ip; 3032 3033 /* SSA def live in and out for this block; used for liveness analysis. 3034 * Indexed by ssa_def->index 3035 */ 3036 BITSET_WORD *live_in; 3037 BITSET_WORD *live_out; 3038} nir_block; 3039 3040static inline bool 3041nir_block_is_reachable(nir_block *b) 3042{ 3043 /* See also nir_block_dominates */ 3044 return b->dom_post_index != 0; 3045} 3046 3047static inline nir_instr * 3048nir_block_first_instr(nir_block *block) 3049{ 3050 struct exec_node *head = exec_list_get_head(&block->instr_list); 3051 return exec_node_data(nir_instr, head, node); 3052} 3053 3054static inline nir_instr * 3055nir_block_last_instr(nir_block *block) 3056{ 3057 struct exec_node *tail = exec_list_get_tail(&block->instr_list); 3058 return exec_node_data(nir_instr, tail, node); 3059} 3060 3061static inline bool 3062nir_block_ends_in_jump(nir_block *block) 3063{ 3064 return !exec_list_is_empty(&block->instr_list) && 3065 nir_block_last_instr(block)->type == nir_instr_type_jump; 3066} 3067 3068static inline bool 3069nir_block_ends_in_return_or_halt(nir_block *block) 3070{ 3071 if (exec_list_is_empty(&block->instr_list)) 3072 return false; 3073 3074 nir_instr *instr = nir_block_last_instr(block); 3075 if (instr->type != nir_instr_type_jump) 3076 return false; 3077 3078 nir_jump_instr *jump_instr = nir_instr_as_jump(instr); 3079 return jump_instr->type == nir_jump_return || 3080 jump_instr->type == nir_jump_halt; 3081} 3082 3083static inline bool 3084nir_block_ends_in_break(nir_block *block) 3085{ 3086 if (exec_list_is_empty(&block->instr_list)) 3087 return false; 3088 3089 nir_instr *instr = nir_block_last_instr(block); 3090 return instr->type == nir_instr_type_jump && 3091 nir_instr_as_jump(instr)->type == nir_jump_break; 3092} 3093 3094bool nir_block_contains_work(nir_block *block); 3095 3096#define nir_foreach_instr(instr, block) \ 3097 foreach_list_typed(nir_instr, instr, node, &(block)->instr_list) 3098#define nir_foreach_instr_reverse(instr, block) \ 3099 foreach_list_typed_reverse(nir_instr, instr, node, &(block)->instr_list) 3100#define nir_foreach_instr_safe(instr, block) \ 3101 foreach_list_typed_safe(nir_instr, instr, node, &(block)->instr_list) 3102#define nir_foreach_instr_reverse_safe(instr, block) \ 3103 foreach_list_typed_reverse_safe(nir_instr, instr, node, &(block)->instr_list) 3104 3105/* Phis come first in the block */ 3106static inline nir_phi_instr * 3107nir_first_phi_in_block(nir_block *block) 3108{ 3109 nir_foreach_instr(instr, block) { 3110 if (instr->type == nir_instr_type_phi) 3111 return nir_instr_as_phi(instr); 3112 else 3113 return NULL; 3114 } 3115 3116 return NULL; 3117} 3118 3119static inline nir_phi_instr * 3120nir_next_phi(nir_phi_instr *phi) 3121{ 3122 nir_instr *next = nir_instr_next(&phi->instr); 3123 3124 if (next && next->type == nir_instr_type_phi) 3125 return nir_instr_as_phi(next); 3126 else 3127 return NULL; 3128} 3129 3130#define nir_foreach_phi(instr, block) \ 3131 for (nir_phi_instr *instr = nir_first_phi_in_block(block); instr != NULL; \ 3132 instr = nir_next_phi(instr)) 3133 3134#define nir_foreach_phi_safe(instr, block) \ 3135 for (nir_phi_instr *instr = nir_first_phi_in_block(block), \ 3136 *__next = instr ? nir_next_phi(instr) : NULL; \ 3137 instr != NULL; \ 3138 instr = __next, __next = instr ? nir_next_phi(instr) : NULL) 3139 3140static inline nir_phi_instr * 3141nir_block_last_phi_instr(nir_block *block) 3142{ 3143 nir_phi_instr *last_phi = NULL; 3144 nir_foreach_phi(instr, block) 3145 last_phi = instr; 3146 3147 return last_phi; 3148} 3149 3150typedef enum { 3151 nir_selection_control_none = 0x0, 3152 3153 /** 3154 * Defined by SPIR-V spec 3.22 "Selection Control". 3155 * The application prefers to remove control flow. 3156 */ 3157 nir_selection_control_flatten = 0x1, 3158 3159 /** 3160 * Defined by SPIR-V spec 3.22 "Selection Control". 3161 * The application prefers to keep control flow. 3162 */ 3163 nir_selection_control_dont_flatten = 0x2, 3164 3165 /** 3166 * May be applied by the compiler stack when it knows 3167 * that a branch is divergent, and: 3168 * - either both the if and else are always taken 3169 * - the if or else is empty and the other is always taken 3170 */ 3171 nir_selection_control_divergent_always_taken = 0x3, 3172} nir_selection_control; 3173 3174typedef struct nir_if { 3175 nir_cf_node cf_node; 3176 nir_src condition; 3177 nir_selection_control control; 3178 3179 /** list of nir_cf_node */ 3180 struct exec_list then_list; 3181 3182 /** list of nir_cf_node */ 3183 struct exec_list else_list; 3184} nir_if; 3185 3186typedef struct { 3187 nir_if *nif; 3188 3189 /** Condition instruction that contains the induction variable */ 3190 nir_instr *conditional_instr; 3191 3192 /** Block within ::nif that has the break instruction. */ 3193 nir_block *break_block; 3194 3195 /** Last block for the then- or else-path that does not contain the break. */ 3196 nir_block *continue_from_block; 3197 3198 /** True when ::break_block is in the else-path of ::nif. */ 3199 bool continue_from_then; 3200 bool induction_rhs; 3201 3202 /* This is true if the terminators exact trip count is unknown. For 3203 * example: 3204 * 3205 * for (int i = 0; i < imin(x, 4); i++) 3206 * ... 3207 * 3208 * Here loop analysis would have set a max_trip_count of 4 however we dont 3209 * know for sure that this is the exact trip count. 3210 */ 3211 bool exact_trip_count_unknown; 3212 3213 struct list_head loop_terminator_link; 3214} nir_loop_terminator; 3215 3216typedef struct { 3217 /* SSA def of the phi-node associated with this induction variable. */ 3218 nir_def *basis; 3219 3220 /* SSA def of the increment of the induction variable. */ 3221 nir_def *def; 3222 3223 /* Init statement */ 3224 nir_src *init_src; 3225 3226 /* Update statement */ 3227 nir_alu_src *update_src; 3228} nir_loop_induction_variable; 3229 3230typedef struct { 3231 /* Estimated cost (in number of instructions) of the loop */ 3232 unsigned instr_cost; 3233 3234 /* Contains fp64 ops that will be lowered */ 3235 bool has_soft_fp64; 3236 3237 /* Guessed trip count based on array indexing */ 3238 unsigned guessed_trip_count; 3239 3240 /* Maximum number of times the loop is run (if known) */ 3241 unsigned max_trip_count; 3242 3243 /* Do we know the exact number of times the loop will be run */ 3244 bool exact_trip_count_known; 3245 3246 /* Unroll the loop regardless of its size */ 3247 bool force_unroll; 3248 3249 /* Does the loop contain complex loop terminators, continues or other 3250 * complex behaviours? If this is true we can't rely on 3251 * loop_terminator_list to be complete or accurate. 3252 */ 3253 bool complex_loop; 3254 3255 nir_loop_terminator *limiting_terminator; 3256 3257 /* A list of loop_terminators terminating this loop. */ 3258 struct list_head loop_terminator_list; 3259 3260 /* hash table of induction variables for this loop */ 3261 struct hash_table *induction_vars; 3262} nir_loop_info; 3263 3264typedef enum { 3265 nir_loop_control_none = 0x0, 3266 nir_loop_control_unroll = 0x1, 3267 nir_loop_control_dont_unroll = 0x2, 3268} nir_loop_control; 3269 3270typedef struct { 3271 nir_cf_node cf_node; 3272 3273 /** list of nir_cf_node */ 3274 struct exec_list body; 3275 3276 /** (optional) list of nir_cf_node */ 3277 struct exec_list continue_list; 3278 3279 nir_loop_info *info; 3280 nir_loop_control control; 3281 bool partially_unrolled; 3282 3283 /** 3284 * Whether some loop-active invocations might take a different control-flow path: 3285 * divergent_continue indicates that a continue statement might be taken by 3286 * only some of the loop-active invocations. A subsequent break is always 3287 * considered divergent. 3288 */ 3289 bool divergent_continue; 3290 bool divergent_break; 3291} nir_loop; 3292 3293static inline bool 3294nir_loop_is_divergent(nir_loop *loop) 3295{ 3296 return loop->divergent_continue || loop->divergent_break; 3297} 3298 3299/** 3300 * Various bits of metadata that can may be created or required by 3301 * optimization and analysis passes 3302 */ 3303typedef enum { 3304 nir_metadata_none = 0x0, 3305 3306 /** Indicates that nir_block::index values are valid. 3307 * 3308 * The start block has index 0 and they increase through a natural walk of 3309 * the CFG. nir_function_impl::num_blocks is the number of blocks and 3310 * every block index is in the range [0, nir_function_impl::num_blocks]. 3311 * 3312 * A pass can preserve this metadata type if it doesn't touch the CFG. 3313 */ 3314 nir_metadata_block_index = 0x1, 3315 3316 /** Indicates that block dominance information is valid 3317 * 3318 * This includes: 3319 * 3320 * - nir_block::num_dom_children 3321 * - nir_block::dom_children 3322 * - nir_block::dom_frontier 3323 * - nir_block::dom_pre_index 3324 * - nir_block::dom_post_index 3325 * 3326 * A pass can preserve this metadata type if it doesn't touch the CFG. 3327 */ 3328 nir_metadata_dominance = 0x2, 3329 3330 /** Indicates that SSA def data-flow liveness information is valid 3331 * 3332 * This includes: 3333 * 3334 * - nir_block::live_in 3335 * - nir_block::live_out 3336 * 3337 * A pass can preserve this metadata type if it never adds or removes any 3338 * SSA defs or uses of SSA defs (most passes shouldn't preserve this 3339 * metadata type). 3340 */ 3341 nir_metadata_live_defs = 0x4, 3342 3343 /** A dummy metadata value to track when a pass forgot to call 3344 * nir_metadata_preserve. 3345 * 3346 * A pass should always clear this value even if it doesn't make any 3347 * progress to indicate that it thought about preserving metadata. 3348 */ 3349 nir_metadata_not_properly_reset = 0x8, 3350 3351 /** Indicates that loop analysis information is valid. 3352 * 3353 * This includes everything pointed to by nir_loop::info. 3354 * 3355 * A pass can preserve this metadata type if it is guaranteed to not affect 3356 * any loop metadata. However, since loop metadata includes things like 3357 * loop counts which depend on arithmetic in the loop, this is very hard to 3358 * determine. Most passes shouldn't preserve this metadata type. 3359 */ 3360 nir_metadata_loop_analysis = 0x10, 3361 3362 /** Indicates that nir_instr::index values are valid. 3363 * 3364 * The start instruction has index 0 and they increase through a natural 3365 * walk of instructions in blocks in the CFG. The indices my have holes 3366 * after passes such as DCE. 3367 * 3368 * A pass can preserve this metadata type if it never adds or moves any 3369 * instructions (most passes shouldn't preserve this metadata type), but 3370 * can preserve it if it only removes instructions. 3371 */ 3372 nir_metadata_instr_index = 0x20, 3373 3374 /** All control flow metadata 3375 * 3376 * This includes all metadata preserved by a pass that preserves control flow 3377 * but modifies instructions. For example, a pass using 3378 * nir_shader_instructions_pass will typically preserve this if it does not 3379 * insert control flow. 3380 * 3381 * This is the most common metadata set to preserve, so it has its own alias. 3382 */ 3383 nir_metadata_control_flow = nir_metadata_block_index | 3384 nir_metadata_dominance, 3385 3386 /** All metadata 3387 * 3388 * This includes all nir_metadata flags except not_properly_reset. Passes 3389 * which do not change the shader in any way should call 3390 * 3391 * nir_metadata_preserve(impl, nir_metadata_all); 3392 */ 3393 nir_metadata_all = ~nir_metadata_not_properly_reset, 3394} nir_metadata; 3395MESA_DEFINE_CPP_ENUM_BITFIELD_OPERATORS(nir_metadata) 3396 3397typedef struct { 3398 nir_cf_node cf_node; 3399 3400 /** pointer to the function of which this is an implementation */ 3401 struct nir_function *function; 3402 3403 /** 3404 * For entrypoints, a pointer to a nir_function_impl which runs before 3405 * it, once per draw or dispatch, communicating via store_preamble and 3406 * load_preamble intrinsics. If NULL then there is no preamble. 3407 */ 3408 struct nir_function *preamble; 3409 3410 /** list of nir_cf_node */ 3411 struct exec_list body; 3412 3413 nir_block *end_block; 3414 3415 /** list for all local variables in the function */ 3416 struct exec_list locals; 3417 3418 /** next available SSA value index */ 3419 unsigned ssa_alloc; 3420 3421 /* total number of basic blocks, only valid when block_index_dirty = false */ 3422 unsigned num_blocks; 3423 3424 /** True if this nir_function_impl uses structured control-flow 3425 * 3426 * Structured nir_function_impls have different validation rules. 3427 */ 3428 bool structured; 3429 3430 nir_metadata valid_metadata; 3431 nir_variable_mode loop_analysis_indirect_mask; 3432 bool loop_analysis_force_unroll_sampler_indirect; 3433} nir_function_impl; 3434 3435#define nir_foreach_function_temp_variable(var, impl) \ 3436 foreach_list_typed(nir_variable, var, node, &(impl)->locals) 3437 3438#define nir_foreach_function_temp_variable_safe(var, impl) \ 3439 foreach_list_typed_safe(nir_variable, var, node, &(impl)->locals) 3440 3441ATTRIBUTE_RETURNS_NONNULL static inline nir_block * 3442nir_start_block(nir_function_impl *impl) 3443{ 3444 return (nir_block *)impl->body.head_sentinel.next; 3445} 3446 3447ATTRIBUTE_RETURNS_NONNULL static inline nir_block * 3448nir_impl_last_block(nir_function_impl *impl) 3449{ 3450 return (nir_block *)impl->body.tail_sentinel.prev; 3451} 3452 3453static inline nir_cf_node * 3454nir_cf_node_next(nir_cf_node *node) 3455{ 3456 struct exec_node *next = exec_node_get_next(&node->node); 3457 if (exec_node_is_tail_sentinel(next)) 3458 return NULL; 3459 else 3460 return exec_node_data(nir_cf_node, next, node); 3461} 3462 3463static inline nir_cf_node * 3464nir_cf_node_prev(nir_cf_node *node) 3465{ 3466 struct exec_node *prev = exec_node_get_prev(&node->node); 3467 if (exec_node_is_head_sentinel(prev)) 3468 return NULL; 3469 else 3470 return exec_node_data(nir_cf_node, prev, node); 3471} 3472 3473static inline bool 3474nir_cf_node_is_first(const nir_cf_node *node) 3475{ 3476 return exec_node_is_head_sentinel(node->node.prev); 3477} 3478 3479static inline bool 3480nir_cf_node_is_last(const nir_cf_node *node) 3481{ 3482 return exec_node_is_tail_sentinel(node->node.next); 3483} 3484 3485NIR_DEFINE_CAST(nir_cf_node_as_block, nir_cf_node, nir_block, cf_node, 3486 type, nir_cf_node_block) 3487NIR_DEFINE_CAST(nir_cf_node_as_if, nir_cf_node, nir_if, cf_node, 3488 type, nir_cf_node_if) 3489NIR_DEFINE_CAST(nir_cf_node_as_loop, nir_cf_node, nir_loop, cf_node, 3490 type, nir_cf_node_loop) 3491NIR_DEFINE_CAST(nir_cf_node_as_function, nir_cf_node, 3492 nir_function_impl, cf_node, type, nir_cf_node_function) 3493 3494static inline nir_block * 3495nir_if_first_then_block(nir_if *if_stmt) 3496{ 3497 struct exec_node *head = exec_list_get_head(&if_stmt->then_list); 3498 return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node)); 3499} 3500 3501static inline nir_block * 3502nir_if_last_then_block(nir_if *if_stmt) 3503{ 3504 struct exec_node *tail = exec_list_get_tail(&if_stmt->then_list); 3505 return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node)); 3506} 3507 3508static inline nir_block * 3509nir_if_first_else_block(nir_if *if_stmt) 3510{ 3511 struct exec_node *head = exec_list_get_head(&if_stmt->else_list); 3512 return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node)); 3513} 3514 3515static inline nir_block * 3516nir_if_last_else_block(nir_if *if_stmt) 3517{ 3518 struct exec_node *tail = exec_list_get_tail(&if_stmt->else_list); 3519 return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node)); 3520} 3521 3522static inline nir_block * 3523nir_loop_first_block(nir_loop *loop) 3524{ 3525 struct exec_node *head = exec_list_get_head(&loop->body); 3526 return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node)); 3527} 3528 3529static inline nir_block * 3530nir_loop_last_block(nir_loop *loop) 3531{ 3532 struct exec_node *tail = exec_list_get_tail(&loop->body); 3533 return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node)); 3534} 3535 3536static inline bool 3537nir_loop_has_continue_construct(const nir_loop *loop) 3538{ 3539 return !exec_list_is_empty(&loop->continue_list); 3540} 3541 3542static inline nir_block * 3543nir_loop_first_continue_block(nir_loop *loop) 3544{ 3545 assert(nir_loop_has_continue_construct(loop)); 3546 struct exec_node *head = exec_list_get_head(&loop->continue_list); 3547 return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node)); 3548} 3549 3550static inline nir_block * 3551nir_loop_last_continue_block(nir_loop *loop) 3552{ 3553 assert(nir_loop_has_continue_construct(loop)); 3554 struct exec_node *tail = exec_list_get_tail(&loop->continue_list); 3555 return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node)); 3556} 3557 3558/** 3559 * Return the target block of a nir_jump_continue statement 3560 */ 3561static inline nir_block * 3562nir_loop_continue_target(nir_loop *loop) 3563{ 3564 if (nir_loop_has_continue_construct(loop)) 3565 return nir_loop_first_continue_block(loop); 3566 else 3567 return nir_loop_first_block(loop); 3568} 3569 3570/** 3571 * Return true if this list of cf_nodes contains a single empty block. 3572 */ 3573static inline bool 3574nir_cf_list_is_empty_block(struct exec_list *cf_list) 3575{ 3576 if (exec_list_is_singular(cf_list)) { 3577 struct exec_node *head = exec_list_get_head(cf_list); 3578 nir_block *block = 3579 nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node)); 3580 return exec_list_is_empty(&block->instr_list); 3581 } 3582 return false; 3583} 3584 3585typedef struct { 3586 uint8_t num_components; 3587 uint8_t bit_size; 3588 3589 /* True if this parameter is a deref used for returning values */ 3590 bool is_return; 3591 3592 bool implicit_conversion_prohibited; 3593 3594 /* True if this parameter is not divergent. This is inverted to make 3595 * parameters divergent by default unless explicitly specified 3596 * otherwise. 3597 */ 3598 bool is_uniform; 3599 3600 nir_variable_mode mode; 3601 3602 /* Drivers may optionally stash flags here describing the parameter. 3603 * For example, this might encode whether the driver expects the value 3604 * to be uniform or divergent, if the driver handles divergent parameters 3605 * differently from uniform ones. 3606 * 3607 * NIR will preserve this value but does not interpret it in any way. 3608 */ 3609 uint32_t driver_attributes; 3610 3611 /* The type of the function param */ 3612 const struct glsl_type *type; 3613 3614 /* Name if known, null if unknown */ 3615 const char *name; 3616} nir_parameter; 3617 3618typedef struct nir_function { 3619 struct exec_node node; 3620 3621 const char *name; 3622 struct nir_shader *shader; 3623 3624 unsigned num_params; 3625 nir_parameter *params; 3626 3627 /** The implementation of this function. 3628 * 3629 * If the function is only declared and not implemented, this is NULL. 3630 * 3631 * Unless setting to NULL or NIR_SERIALIZE_FUNC_HAS_IMPL, set with 3632 * nir_function_set_impl to maintain IR invariants. 3633 */ 3634 nir_function_impl *impl; 3635 3636 /* Drivers may optionally stash flags here describing the function call. 3637 * For example, this might encode the ABI used for the call if a driver 3638 * supports multiple ABIs. 3639 * 3640 * NIR will preserve this value but does not interpret it in any way. 3641 */ 3642 uint32_t driver_attributes; 3643 3644 bool is_entrypoint; 3645 /* from SPIR-V linkage, only for libraries */ 3646 bool is_exported; 3647 bool is_preamble; 3648 /* from SPIR-V function control */ 3649 bool should_inline; 3650 bool dont_inline; /* from SPIR-V */ 3651 3652 /* Static workgroup size, if this is a kernel function in a library of OpenCL 3653 * kernels. Normally, the size in the shader info is used instead. 3654 */ 3655 unsigned workgroup_size[3]; 3656 3657 /** 3658 * Is this function a subroutine type declaration 3659 * e.g. subroutine void type1(float arg1); 3660 */ 3661 bool is_subroutine; 3662 3663 /* Temporary function created to wrap global instructions before they can 3664 * be inlined into the main function. 3665 */ 3666 bool is_tmp_globals_wrapper; 3667 3668 /** 3669 * Is this function associated to a subroutine type 3670 * e.g. subroutine (type1, type2) function_name { function_body }; 3671 * would have num_subroutine_types 2, 3672 * and pointers to the type1 and type2 types. 3673 */ 3674 int num_subroutine_types; 3675 const struct glsl_type **subroutine_types; 3676 3677 int subroutine_index; 3678 3679 /* A temporary for passes to use for storing flags. */ 3680 uint32_t pass_flags; 3681} nir_function; 3682 3683typedef enum { 3684 nir_lower_imul64 = (1 << 0), 3685 nir_lower_isign64 = (1 << 1), 3686 /** Lower all int64 modulus and division opcodes */ 3687 nir_lower_divmod64 = (1 << 2), 3688 /** Lower all 64-bit umul_high and imul_high opcodes */ 3689 nir_lower_imul_high64 = (1 << 3), 3690 nir_lower_bcsel64 = (1 << 4), 3691 nir_lower_icmp64 = (1 << 5), 3692 nir_lower_iadd64 = (1 << 6), 3693 nir_lower_iabs64 = (1 << 7), 3694 nir_lower_ineg64 = (1 << 8), 3695 nir_lower_logic64 = (1 << 9), 3696 nir_lower_minmax64 = (1 << 10), 3697 nir_lower_shift64 = (1 << 11), 3698 nir_lower_imul_2x32_64 = (1 << 12), 3699 nir_lower_extract64 = (1 << 13), 3700 nir_lower_ufind_msb64 = (1 << 14), 3701 nir_lower_bit_count64 = (1 << 15), 3702 nir_lower_subgroup_shuffle64 = (1 << 16), 3703 nir_lower_scan_reduce_bitwise64 = (1 << 17), 3704 nir_lower_scan_reduce_iadd64 = (1 << 18), 3705 nir_lower_vote_ieq64 = (1 << 19), 3706 nir_lower_usub_sat64 = (1 << 20), 3707 nir_lower_iadd_sat64 = (1 << 21), 3708 nir_lower_find_lsb64 = (1 << 22), 3709 nir_lower_conv64 = (1 << 23), 3710 nir_lower_uadd_sat64 = (1 << 24), 3711 nir_lower_iadd3_64 = (1 << 25), 3712} nir_lower_int64_options; 3713 3714typedef enum { 3715 nir_lower_drcp = (1 << 0), 3716 nir_lower_dsqrt = (1 << 1), 3717 nir_lower_drsq = (1 << 2), 3718 nir_lower_dtrunc = (1 << 3), 3719 nir_lower_dfloor = (1 << 4), 3720 nir_lower_dceil = (1 << 5), 3721 nir_lower_dfract = (1 << 6), 3722 nir_lower_dround_even = (1 << 7), 3723 nir_lower_dmod = (1 << 8), 3724 nir_lower_dsub = (1 << 9), 3725 nir_lower_ddiv = (1 << 10), 3726 nir_lower_dsign = (1 << 11), 3727 nir_lower_dminmax = (1 << 12), 3728 nir_lower_dsat = (1 << 13), 3729 nir_lower_fp64_full_software = (1 << 14), 3730} nir_lower_doubles_options; 3731 3732typedef enum { 3733 nir_divergence_single_prim_per_subgroup = (1 << 0), 3734 nir_divergence_single_patch_per_tcs_subgroup = (1 << 1), 3735 nir_divergence_single_patch_per_tes_subgroup = (1 << 2), 3736 nir_divergence_view_index_uniform = (1 << 3), 3737 nir_divergence_single_frag_shading_rate_per_subgroup = (1 << 4), 3738 nir_divergence_multiple_workgroup_per_compute_subgroup = (1 << 5), 3739 nir_divergence_shader_record_ptr_uniform = (1 << 6), 3740 nir_divergence_uniform_load_tears = (1 << 7), 3741 /* If used, this allows phis for divergent merges with undef and a uniform source to be considered uniform */ 3742 nir_divergence_ignore_undef_if_phi_srcs = (1 << 8), 3743} nir_divergence_options; 3744 3745typedef enum { 3746 /** 3747 * Whether a fragment shader can interpolate the same input multiple times 3748 * with different modes (smooth, noperspective) and locations (pixel, 3749 * centroid, sample, at_offset, at_sample), excluding the flat mode. 3750 * 3751 * This matches AMD GPU flexibility and limitations and is a superset of 3752 * the GL4 requirement that each input can be interpolated at its specified 3753 * location, and then also as centroid, at_offset, and at_sample. 3754 */ 3755 nir_io_has_flexible_input_interpolation_except_flat = BITFIELD_BIT(0), 3756 3757 /** 3758 * nir_opt_varyings compacts (relocates) components of varyings by 3759 * rewriting their locations completely, effectively moving components of 3760 * varyings between slots. This option forces nir_opt_varyings to make 3761 * VARYING_SLOT_POS unused by moving its contents to VARn if the consumer 3762 * is not FS. If this option is not set and POS is unused, it moves 3763 * components of VARn to POS until it's fully used. 3764 */ 3765 nir_io_dont_use_pos_for_non_fs_varyings = BITFIELD_BIT(1), 3766 3767 nir_io_16bit_input_output_support = BITFIELD_BIT(2), 3768 3769 /** 3770 * Implement mediump inputs and outputs as normal 32-bit IO. 3771 * Causes the mediump flag to be not set for IO semantics, essentially 3772 * destroying any mediump-related IO information in the shader. 3773 */ 3774 nir_io_mediump_is_32bit = BITFIELD_BIT(3), 3775 3776 /** 3777 * Whether nir_opt_vectorize_io should ignore FS inputs. 3778 */ 3779 nir_io_prefer_scalar_fs_inputs = BITFIELD_BIT(4), 3780 3781 /** 3782 * Whether interpolated fragment shader vec4 slots can use load_input for 3783 * a subset of its components to skip interpolation for those components. 3784 * The result of such load_input is a value from a random (not necessarily 3785 * provoking) vertex. If a value from the provoking vertex is required, 3786 * the vec4 slot should have no load_interpolated_input instructions. 3787 * 3788 * This exposes the AMD capability that allows packing flat inputs with 3789 * interpolated inputs in a limited number of cases. Normally, flat 3790 * components must be in a separate vec4 slot to get the value from 3791 * the provoking vertex. If the compiler can prove that all per-vertex 3792 * values are equal (convergent, i.e. the provoking vertex doesn't matter), 3793 * it can put such flat components into any interpolated vec4 slot. 3794 * 3795 * It should also be set if the hw can mix flat and interpolated components 3796 * in the same vec4 slot. 3797 * 3798 * This causes nir_opt_varyings to skip interpolation for all varyings 3799 * that are convergent, and enables better compaction and inter-shader code 3800 * motion for convergent varyings. 3801 */ 3802 nir_io_mix_convergent_flat_with_interpolated = BITFIELD_BIT(5), 3803 3804 /** 3805 * Whether src_type and dest_type of IO intrinsics are irrelevant and 3806 * should be ignored by nir_opt_vectorize_io. All drivers that always treat 3807 * load_input and store_output as untyped and load_interpolated_input as 3808 * float##bit_size should set this. 3809 */ 3810 nir_io_vectorizer_ignores_types = BITFIELD_BIT(6), 3811 3812 /** 3813 * Whether nir_opt_varyings should never promote convergent FS inputs 3814 * to flat. 3815 */ 3816 nir_io_always_interpolate_convergent_fs_inputs = BITFIELD_BIT(7), 3817 3818 /** 3819 * Whether the first assigned color channel component should be equal to 3820 * the first unused VARn component. 3821 * 3822 * For example, if the first unused VARn channel is VAR0.z, color channels 3823 * are assigned in this order: 3824 * COL0.z, COL0.w, COL0.x, COL0.y, COL1.z, COL1.w, COL1.x, COL1.y 3825 * 3826 * This allows certain drivers to merge outputs if each output sets 3827 * different components, for example 2 outputs writing VAR0.xy and COL0.z 3828 * will only use 1 HW output. 3829 */ 3830 nir_io_compaction_rotates_color_channels = BITFIELD_BIT(8), 3831 3832 /* Options affecting the GLSL compiler or Gallium are below. */ 3833 3834 /** 3835 * Lower load_deref/store_deref to load_input/store_output/etc. intrinsics. 3836 * This is only affects GLSL compilation and Gallium. 3837 */ 3838 nir_io_has_intrinsics = BITFIELD_BIT(16), 3839 3840 /** 3841 * Don't run nir_opt_varyings and nir_opt_vectorize_io. 3842 * 3843 * This option is deprecated and is a hack. DO NOT USE. 3844 * Use MESA_GLSL_DISABLE_IO_OPT=1 instead. 3845 */ 3846 nir_io_dont_optimize = BITFIELD_BIT(17), 3847 3848 /** 3849 * Whether clip and cull distance arrays should be separate. If this is not 3850 * set, cull distances will be moved into VARYING_SLOT_CLIP_DISTn after clip 3851 * distances, and shader_info::clip_distance_array_size will be the index 3852 * of the first cull distance. nir_lower_clip_cull_distance_arrays does 3853 * that. 3854 */ 3855 nir_io_separate_clip_cull_distance_arrays = BITFIELD_BIT(18), 3856} nir_io_options; 3857 3858typedef enum { 3859 nir_lower_packing_op_pack_64_2x32, 3860 nir_lower_packing_op_unpack_64_2x32, 3861 nir_lower_packing_op_pack_64_4x16, 3862 nir_lower_packing_op_unpack_64_4x16, 3863 nir_lower_packing_op_pack_32_2x16, 3864 nir_lower_packing_op_unpack_32_2x16, 3865 nir_lower_packing_op_pack_32_4x8, 3866 nir_lower_packing_op_unpack_32_4x8, 3867 nir_lower_packing_num_ops, 3868} nir_lower_packing_op; 3869 3870/** An instruction filtering callback 3871 * 3872 * Returns true if the instruction should be processed and false otherwise. 3873 */ 3874typedef bool (*nir_instr_filter_cb)(const nir_instr *, const void *); 3875 3876/* Like nir_instr_filter_cb but specialized to intrinsics */ 3877typedef bool (*nir_intrin_filter_cb)(const nir_intrinsic_instr *, const void *); 3878 3879/** A vectorization width callback 3880 * 3881 * Returns the maximum vectorization width per instruction. 3882 * 0, if the instruction must not be modified. 3883 * 3884 * The vectorization width must be a power of 2. 3885 */ 3886typedef uint8_t (*nir_vectorize_cb)(const nir_instr *, const void *); 3887 3888typedef struct nir_shader_compiler_options { 3889 bool lower_fdiv; 3890 bool lower_ffma16; 3891 bool lower_ffma32; 3892 bool lower_ffma64; 3893 bool fuse_ffma16; 3894 bool fuse_ffma32; 3895 bool fuse_ffma64; 3896 bool lower_flrp16; 3897 bool lower_flrp32; 3898 /** Lowers flrp when it does not support doubles */ 3899 bool lower_flrp64; 3900 bool lower_fpow; 3901 bool lower_fsat; 3902 bool lower_fsqrt; 3903 bool lower_sincos; 3904 bool lower_fmod; 3905 /** Lowers ibitfield_extract/ubitfield_extract. */ 3906 bool lower_bitfield_extract; 3907 /** Lowers bitfield_insert. */ 3908 bool lower_bitfield_insert; 3909 /** Lowers bitfield_reverse to shifts. */ 3910 bool lower_bitfield_reverse; 3911 /** Lowers bit_count to shifts. */ 3912 bool lower_bit_count; 3913 /** Lowers ifind_msb. */ 3914 bool lower_ifind_msb; 3915 /** Lowers ufind_msb. */ 3916 bool lower_ufind_msb; 3917 /** Lowers find_lsb to ufind_msb and logic ops */ 3918 bool lower_find_lsb; 3919 bool lower_uadd_carry; 3920 bool lower_usub_borrow; 3921 /** Lowers imul_high/umul_high to 16-bit multiplies and carry operations. */ 3922 bool lower_mul_high; 3923 /** lowers fneg to fmul(x, -1.0). Driver must call nir_opt_algebraic_late() */ 3924 bool lower_fneg; 3925 /** lowers ineg to isub. Driver must call nir_opt_algebraic_late(). */ 3926 bool lower_ineg; 3927 /** lowers fisnormal to alu ops. */ 3928 bool lower_fisnormal; 3929 3930 /* lower {slt,sge,seq,sne} to {flt,fge,feq,fneu} + b2f: */ 3931 bool lower_scmp; 3932 3933 /* lower b/fall_equalN/b/fany_nequalN (ex:fany_nequal4 to sne+fdot4+fsat) */ 3934 bool lower_vector_cmp; 3935 3936 /** enable rules to avoid bit ops */ 3937 bool lower_bitops; 3938 3939 /** enables rules to lower isign to imin+imax */ 3940 bool lower_isign; 3941 3942 /** enables rules to lower fsign to fsub and flt */ 3943 bool lower_fsign; 3944 3945 /** enables rules to lower iabs to ineg+imax */ 3946 bool lower_iabs; 3947 3948 /** enable rules that avoid generating umax from signed integer ops */ 3949 bool lower_umax; 3950 3951 /** enable rules that avoid generating umin from signed integer ops */ 3952 bool lower_umin; 3953 3954 /* lower fmin/fmax with signed zero preserve to fmin/fmax with 3955 * no_signed_zero, for backends whose fmin/fmax implementations do not 3956 * implement IEEE-754-2019 semantics for signed zero. 3957 */ 3958 bool lower_fminmax_signed_zero; 3959 3960 /* lower fdph to fdot4 */ 3961 bool lower_fdph; 3962 3963 /** lower fdot to fmul and fsum/fadd. */ 3964 bool lower_fdot; 3965 3966 /* Does the native fdot instruction replicate its result for four 3967 * components? If so, then opt_algebraic_late will turn all fdotN 3968 * instructions into fdotN_replicated instructions. 3969 */ 3970 bool fdot_replicates; 3971 3972 /** lowers ffloor to fsub+ffract: */ 3973 bool lower_ffloor; 3974 3975 /** lowers ffract to fsub+ffloor: */ 3976 bool lower_ffract; 3977 3978 /** lowers fceil to fneg+ffloor+fneg: */ 3979 bool lower_fceil; 3980 3981 bool lower_ftrunc; 3982 3983 /** Lowers fround_even to ffract+feq+csel. 3984 * 3985 * Not correct in that it doesn't correctly handle the "_even" part of the 3986 * rounding, but good enough for DX9 array indexing handling on DX9-class 3987 * hardware. 3988 */ 3989 bool lower_fround_even; 3990 3991 bool lower_ldexp; 3992 3993 bool lower_pack_half_2x16; 3994 bool lower_pack_unorm_2x16; 3995 bool lower_pack_snorm_2x16; 3996 bool lower_pack_unorm_4x8; 3997 bool lower_pack_snorm_4x8; 3998 bool lower_pack_64_2x32; 3999 bool lower_pack_64_4x16; 4000 bool lower_pack_32_2x16; 4001 bool lower_pack_64_2x32_split; 4002 bool lower_pack_32_2x16_split; 4003 bool lower_unpack_half_2x16; 4004 bool lower_unpack_unorm_2x16; 4005 bool lower_unpack_snorm_2x16; 4006 bool lower_unpack_unorm_4x8; 4007 bool lower_unpack_snorm_4x8; 4008 bool lower_unpack_64_2x32_split; 4009 bool lower_unpack_32_2x16_split; 4010 4011 bool lower_pack_split; 4012 4013 bool lower_extract_byte; 4014 bool lower_extract_word; 4015 bool lower_insert_byte; 4016 bool lower_insert_word; 4017 4018 /* TODO: this flag is potentially useless, remove? */ 4019 bool lower_all_io_to_temps; 4020 4021 /* Indicates that the driver only has zero-based vertex id */ 4022 bool vertex_id_zero_based; 4023 4024 /** 4025 * If enabled, gl_BaseVertex will be lowered as: 4026 * is_indexed_draw (~0/0) & firstvertex 4027 */ 4028 bool lower_base_vertex; 4029 4030 /** 4031 * If enabled, gl_HelperInvocation will be lowered as: 4032 * 4033 * !((1 << sample_id) & sample_mask_in)) 4034 * 4035 * This depends on some possibly hw implementation details, which may 4036 * not be true for all hw. In particular that the FS is only executed 4037 * for covered samples or for helper invocations. So, do not blindly 4038 * enable this option. 4039 * 4040 * Note: See also issue #22 in ARB_shader_image_load_store 4041 */ 4042 bool lower_helper_invocation; 4043 4044 /** 4045 * Convert gl_SampleMaskIn to gl_HelperInvocation as follows: 4046 * 4047 * gl_SampleMaskIn == 0 ---> gl_HelperInvocation 4048 * gl_SampleMaskIn != 0 ---> !gl_HelperInvocation 4049 */ 4050 bool optimize_sample_mask_in; 4051 4052 /** 4053 * Optimize load_front_face ? a : -a to load_front_face_fsign * a 4054 */ 4055 bool optimize_load_front_face_fsign; 4056 4057 /** 4058 * Optimize boolean reductions of quad broadcasts. This should only be enabled if 4059 * nir_intrinsic_reduce supports INCLUDE_HELPERS. 4060 */ 4061 bool optimize_quad_vote_to_reduce; 4062 4063 bool lower_cs_local_index_to_id; 4064 bool lower_cs_local_id_to_index; 4065 4066 /* Prevents lowering global_invocation_id to be in terms of workgroup_id */ 4067 bool has_cs_global_id; 4068 4069 bool lower_device_index_to_zero; 4070 4071 /* Set if nir_lower_pntc_ytransform() should invert gl_PointCoord. 4072 * Either when frame buffer is flipped or GL_POINT_SPRITE_COORD_ORIGIN 4073 * is GL_LOWER_LEFT. 4074 */ 4075 bool lower_wpos_pntc; 4076 4077 /** 4078 * Set if nir_op_[iu]hadd and nir_op_[iu]rhadd instructions should be 4079 * lowered to simple arithmetic. 4080 * 4081 * If this flag is set, the lowering will be applied to all bit-sizes of 4082 * these instructions. 4083 * 4084 * :c:member:`lower_hadd64` 4085 */ 4086 bool lower_hadd; 4087 4088 /** 4089 * Set if only 64-bit nir_op_[iu]hadd and nir_op_[iu]rhadd instructions 4090 * should be lowered to simple arithmetic. 4091 * 4092 * If this flag is set, the lowering will be applied to only 64-bit 4093 * versions of these instructions. 4094 * 4095 * :c:member:`lower_hadd` 4096 */ 4097 bool lower_hadd64; 4098 4099 /** 4100 * Set if nir_op_uadd_sat should be lowered to simple arithmetic. 4101 * 4102 * If this flag is set, the lowering will be applied to all bit-sizes of 4103 * these instructions. 4104 */ 4105 bool lower_uadd_sat; 4106 4107 /** 4108 * Set if nir_op_usub_sat should be lowered to simple arithmetic. 4109 * 4110 * If this flag is set, the lowering will be applied to all bit-sizes of 4111 * these instructions. 4112 */ 4113 bool lower_usub_sat; 4114 4115 /** 4116 * Set if nir_op_iadd_sat and nir_op_isub_sat should be lowered to simple 4117 * arithmetic. 4118 * 4119 * If this flag is set, the lowering will be applied to all bit-sizes of 4120 * these instructions. 4121 */ 4122 bool lower_iadd_sat; 4123 4124 /** 4125 * Set if imul_32x16 and umul_32x16 should be lowered to simple 4126 * arithmetic. 4127 */ 4128 bool lower_mul_32x16; 4129 4130 bool vectorize_tess_levels; 4131 bool lower_to_scalar; 4132 nir_instr_filter_cb lower_to_scalar_filter; 4133 4134 /** 4135 * Disables potentially harmful algebraic transformations for architectures 4136 * with SIMD-within-a-register semantics. 4137 * 4138 * Note, to actually vectorize 16bit instructions, use nir_opt_vectorize() 4139 * with a suitable callback function. 4140 */ 4141 bool vectorize_vec2_16bit; 4142 4143 /** 4144 * Should the linker unify inputs_read/outputs_written between adjacent 4145 * shader stages which are linked into a single program? 4146 */ 4147 bool unify_interfaces; 4148 4149 /** 4150 * Whether nir_lower_io() will lower interpolateAt functions to 4151 * load_interpolated_input intrinsics. 4152 * 4153 * Unlike nir_lower_io_use_interpolated_input_intrinsics this will only 4154 * lower these functions and leave input load intrinsics untouched. 4155 */ 4156 bool lower_interpolate_at; 4157 4158 /* Lowers when 32x32->64 bit multiplication is not supported */ 4159 bool lower_mul_2x32_64; 4160 4161 /* Indicates that urol and uror are supported */ 4162 bool has_rotate8; 4163 bool has_rotate16; 4164 bool has_rotate32; 4165 4166 /** Backend supports shfr */ 4167 bool has_shfr32; 4168 4169 /** Backend supports ternary addition */ 4170 bool has_iadd3; 4171 4172 /** 4173 * Backend supports amul and would like them generated whenever 4174 * possible. This is stronger than has_imul24 for amul, but does not imply 4175 * support for imul24. 4176 */ 4177 bool has_amul; 4178 4179 /** 4180 * Backend supports imul24, and would like to use it (when possible) 4181 * for address/offset calculation. If true, driver should call 4182 * nir_lower_amul(). (If not set, amul will automatically be lowered 4183 * to imul.) 4184 */ 4185 bool has_imul24; 4186 4187 /** Backend supports umul24, if not set umul24 will automatically be lowered 4188 * to imul with masked inputs */ 4189 bool has_umul24; 4190 4191 /** Backend supports 32-bit imad */ 4192 bool has_imad32; 4193 4194 /** Backend supports umad24, if not set umad24 will automatically be lowered 4195 * to imul with masked inputs and iadd */ 4196 bool has_umad24; 4197 4198 /* Backend supports fused compare against zero and csel */ 4199 bool has_fused_comp_and_csel; 4200 /* Backend supports fused int eq/ne against zero and csel. */ 4201 bool has_icsel_eqz64; 4202 bool has_icsel_eqz32; 4203 bool has_icsel_eqz16; 4204 4205 /* Backend supports fneo, fequ, fltu, fgeu. */ 4206 bool has_fneo_fcmpu; 4207 4208 /* Backend supports ford and funord. */ 4209 bool has_ford_funord; 4210 4211 /** Backend supports fsub, if not set fsub will automatically be lowered to 4212 * fadd(x, fneg(y)). If true, driver should call nir_opt_algebraic_late(). */ 4213 bool has_fsub; 4214 4215 /** Backend supports isub, if not set isub will automatically be lowered to 4216 * iadd(x, ineg(y)). If true, driver should call nir_opt_algebraic_late(). */ 4217 bool has_isub; 4218 4219 /** Backend supports pack_32_4x8 or pack_32_4x8_split. */ 4220 bool has_pack_32_4x8; 4221 4222 /** Backend supports nir_load_texture_scale and prefers it over txs for nir 4223 * lowerings. */ 4224 bool has_texture_scaling; 4225 4226 /** Backend supports sdot_4x8_iadd. */ 4227 bool has_sdot_4x8; 4228 4229 /** Backend supports udot_4x8_uadd. */ 4230 bool has_udot_4x8; 4231 4232 /** Backend supports sudot_4x8_iadd. */ 4233 bool has_sudot_4x8; 4234 4235 /** Backend supports sdot_4x8_iadd_sat. */ 4236 bool has_sdot_4x8_sat; 4237 4238 /** Backend supports udot_4x8_uadd_sat. */ 4239 bool has_udot_4x8_sat; 4240 4241 /** Backend supports sudot_4x8_iadd_sat. */ 4242 bool has_sudot_4x8_sat; 4243 4244 /** Backend supports sdot_2x16 and udot_2x16 opcodes. */ 4245 bool has_dot_2x16; 4246 4247 /** Backend supports fmulz (and ffmaz if lower_ffma32=false) */ 4248 bool has_fmulz; 4249 4250 /** 4251 * Backend supports fmulz (and ffmaz if lower_ffma32=false) but only if 4252 * FLOAT_CONTROLS_DENORM_PRESERVE_FP32 is not set 4253 */ 4254 bool has_fmulz_no_denorms; 4255 4256 /** Backend supports 32bit ufind_msb_rev and ifind_msb_rev. */ 4257 bool has_find_msb_rev; 4258 4259 /** Backend supports pack_half_2x16_rtz_split. */ 4260 bool has_pack_half_2x16_rtz; 4261 4262 /** Backend supports bitz/bitnz. */ 4263 bool has_bit_test; 4264 4265 /** Backend supports ubfe/ibfe. */ 4266 bool has_bfe; 4267 4268 /** Backend supports bfm. */ 4269 bool has_bfm; 4270 4271 /** Backend supports bfi. */ 4272 bool has_bfi; 4273 4274 /** Backend supports bitfield_select. */ 4275 bool has_bitfield_select; 4276 4277 /** Backend supports uclz. */ 4278 bool has_uclz; 4279 4280 /** Backend support msad_u4x8. */ 4281 bool has_msad; 4282 4283 /** 4284 * Is this the Intel vec4 backend? 4285 * 4286 * Used to inhibit algebraic optimizations that are known to be harmful on 4287 * the Intel vec4 backend. This is generally applicable to any 4288 * optimization that might cause more immediate values to be used in 4289 * 3-source (e.g., ffma and flrp) instructions. 4290 */ 4291 bool intel_vec4; 4292 4293 /** 4294 * For most Intel GPUs, all ternary operations such as FMA and BFE cannot 4295 * have immediates, so two to three instructions may eventually be needed. 4296 */ 4297 bool avoid_ternary_with_two_constants; 4298 4299 /** Whether 8-bit ALU is supported. */ 4300 bool support_8bit_alu; 4301 4302 /** Whether 16-bit ALU is supported. */ 4303 bool support_16bit_alu; 4304 4305 unsigned max_unroll_iterations; 4306 unsigned max_unroll_iterations_aggressive; 4307 unsigned max_unroll_iterations_fp64; 4308 4309 bool lower_uniforms_to_ubo; 4310 4311 /* Specifies if indirect sampler array access will trigger forced loop 4312 * unrolling. 4313 */ 4314 bool force_indirect_unrolling_sampler; 4315 4316 /* Some older drivers don't support GLSL versions with the concept of flat 4317 * varyings and also don't support integers. This setting helps us avoid 4318 * marking varyings as flat and potentially having them changed to ints via 4319 * varying packing. 4320 */ 4321 bool no_integers; 4322 4323 /** 4324 * Specifies which type of indirectly accessed variables should force 4325 * loop unrolling. 4326 */ 4327 nir_variable_mode force_indirect_unrolling; 4328 4329 bool driver_functions; 4330 4331 /** 4332 * If true, the driver will call nir_lower_int64 itself and the frontend 4333 * should not do so. This may enable better optimization around address 4334 * modes. 4335 */ 4336 bool late_lower_int64; 4337 nir_lower_int64_options lower_int64_options; 4338 nir_lower_doubles_options lower_doubles_options; 4339 nir_divergence_options divergence_analysis_options; 4340 4341 /** 4342 * The masks of shader stages that support indirect indexing with 4343 * load_input and store_output intrinsics. It's used by 4344 * nir_lower_io_passes. 4345 */ 4346 uint8_t support_indirect_inputs; 4347 uint8_t support_indirect_outputs; 4348 4349 /** store the variable offset into the instrinsic range_base instead 4350 * of adding it to the image index. 4351 */ 4352 bool lower_image_offset_to_range_base; 4353 4354 /** store the variable offset into the instrinsic range_base instead 4355 * of adding it to the atomic source 4356 */ 4357 bool lower_atomic_offset_to_range_base; 4358 4359 /** Don't convert medium-precision casts (e.g. f2fmp) into concrete 4360 * type casts (e.g. f2f16). 4361 */ 4362 bool preserve_mediump; 4363 4364 /** lowers fquantize2f16 to alu ops. */ 4365 bool lower_fquantize2f16; 4366 4367 /** Lower f2f16 to f2f16_rtz when execution mode is not rtne. */ 4368 bool force_f2f16_rtz; 4369 4370 /** Lower VARYING_SLOT_LAYER in FS to SYSTEM_VALUE_LAYER_ID. */ 4371 bool lower_layer_fs_input_to_sysval; 4372 4373 /** clip/cull distance and tess level arrays use compact semantics */ 4374 bool compact_arrays; 4375 4376 /** 4377 * Whether discard gets emitted as nir_intrinsic_demote. 4378 * Otherwise, nir_intrinsic_terminate is being used. 4379 */ 4380 bool discard_is_demote; 4381 4382 /** 4383 * Whether the new-style derivative intrinsics are supported. If false, 4384 * legacy ALU derivative ops will be emitted. This transitional option will 4385 * be removed once all drivers are converted to derivative intrinsics. 4386 */ 4387 bool has_ddx_intrinsics; 4388 4389 /** Whether derivative intrinsics must be scalarized. */ 4390 bool scalarize_ddx; 4391 4392 /** 4393 * Assign a range of driver locations to per-view outputs, with unique 4394 * slots for each view. If unset, per-view outputs will be treated 4395 * similarly to other arrayed IO, and only slots for one view will be 4396 * assigned. Regardless of this setting, per-view outputs are only assigned 4397 * slots for one value in var->data.location. 4398 */ 4399 bool per_view_unique_driver_locations; 4400 4401 /** 4402 * Emit nir_intrinsic_store_per_view_output with compacted view indices 4403 * rather than absolute view indices. When using compacted indices, the Nth 4404 * index refers to the Nth enabled view, not the Nth absolute view. For 4405 * example, with view mask 0b1010, compacted index 0 is absolute index 1, 4406 * and compacted index 1 is absolute index 3. Note that compacted view 4407 * indices do not correspond directly to gl_ViewIndex. 4408 * 4409 * If compact_view_index is unset, per-view indices must be constant before 4410 * nir_lower_io. This can be guaranteed by calling nir_lower_io_temporaries 4411 * first. 4412 */ 4413 bool compact_view_index; 4414 4415 /** Options determining lowering and behavior of inputs and outputs. */ 4416 nir_io_options io_options; 4417 4418 /** 4419 * Bit mask of nir_lower_packing_op to skip lowering some nir ops in 4420 * nir_lower_packing(). 4421 */ 4422 unsigned skip_lower_packing_ops; 4423 4424 /** Driver callback where drivers can define how to lower mediump. 4425 * Used by nir_lower_io_passes. 4426 */ 4427 void (*lower_mediump_io)(struct nir_shader *nir); 4428 4429 /** 4430 * Return the maximum cost of an expression that's written to a shader 4431 * output that can be moved into the next shader to remove that output. 4432 * 4433 * Currently only uniform expressions are moved. A uniform expression is 4434 * any ALU expression sourcing only constants, uniforms, and UBO loads. 4435 * 4436 * Set to NULL or return 0 if you only want to propagate constants from 4437 * outputs to inputs. 4438 * 4439 * Drivers can set the maximum cost based on the types of consecutive 4440 * shaders or shader SHA1s. 4441 * 4442 * Drivers should also set "varying_estimate_instr_cost". 4443 */ 4444 unsigned (*varying_expression_max_cost)(struct nir_shader *consumer, 4445 struct nir_shader *producer); 4446 4447 /** 4448 * Return the cost of an instruction that could be moved into the next 4449 * shader. If the cost of all instructions in an expression is <= 4450 * varying_expression_max_cost(), the instruction is moved. 4451 * 4452 * When this callback isn't set, nir_opt_varyings uses its own version. 4453 */ 4454 unsigned (*varying_estimate_instr_cost)(struct nir_instr *instr); 4455 4456 /** 4457 * When the varying_expression_max_cost callback isn't set, this specifies 4458 * the maximum cost of a uniform expression that is allowed to be moved 4459 * from output stores into the next shader stage to eliminate those output 4460 * stores and corresponding inputs. 4461 * 4462 * 0 only allows propagating constants written to output stores to 4463 * the next shader. 4464 * 4465 * At least 2 is required for moving a uniform stored in an output into 4466 * the next shader according to default_varying_estimate_instr_cost. 4467 */ 4468 unsigned max_varying_expression_cost; 4469} nir_shader_compiler_options; 4470 4471typedef struct nir_shader { 4472 gc_ctx *gctx; 4473 4474 /** list of uniforms (nir_variable) */ 4475 struct exec_list variables; 4476 4477 /** Set of driver-specific options for the shader. 4478 * 4479 * The memory for the options is expected to be kept in a single static 4480 * copy by the driver. 4481 */ 4482 const struct nir_shader_compiler_options *options; 4483 4484 /** Various bits of compile-time information about a given shader */ 4485 struct shader_info info; 4486 4487 /** list of nir_function */ 4488 struct exec_list functions; 4489 4490 /** 4491 * The size of the variable space for load_input_*, load_uniform_*, etc. 4492 * intrinsics. This is in back-end specific units which is likely one of 4493 * bytes, dwords, or vec4s depending on context and back-end. 4494 */ 4495 unsigned num_inputs, num_uniforms, num_outputs; 4496 4497 /** Size in bytes of required implicitly bound global memory */ 4498 unsigned global_mem_size; 4499 4500 /** Size in bytes of required scratch space */ 4501 unsigned scratch_size; 4502 4503 /** Constant data associated with this shader. 4504 * 4505 * Constant data is loaded through load_constant intrinsics (as compared to 4506 * the NIR load_const instructions which have the constant value inlined 4507 * into them). This is usually generated by nir_opt_large_constants (so 4508 * shaders don't have to load_const into a temporary array when they want 4509 * to indirect on a const array). 4510 */ 4511 void *constant_data; 4512 /** Size of the constant data associated with the shader, in bytes */ 4513 unsigned constant_data_size; 4514 4515 struct nir_xfb_info *xfb_info; 4516 4517 unsigned printf_info_count; 4518 u_printf_info *printf_info; 4519} nir_shader; 4520 4521#define nir_foreach_function(func, shader) \ 4522 foreach_list_typed(nir_function, func, node, &(shader)->functions) 4523 4524#define nir_foreach_function_safe(func, shader) \ 4525 foreach_list_typed_safe(nir_function, func, node, &(shader)->functions) 4526 4527#define nir_foreach_entrypoint(func, lib) \ 4528 nir_foreach_function(func, lib) \ 4529 if (func->is_entrypoint) 4530 4531#define nir_foreach_entrypoint_safe(func, lib) \ 4532 nir_foreach_function_safe(func, lib) \ 4533 if (func->is_entrypoint) 4534 4535static inline nir_function * 4536nir_foreach_function_with_impl_first(const nir_shader *shader) 4537{ 4538 foreach_list_typed(nir_function, func, node, &shader->functions) { 4539 if (func->impl != NULL) 4540 return func; 4541 } 4542 4543 return NULL; 4544} 4545 4546static inline nir_function_impl * 4547nir_foreach_function_with_impl_next(nir_function **it) 4548{ 4549 foreach_list_typed_from(nir_function, func, node, _, (*it)->node.next) { 4550 if (func->impl != NULL) { 4551 *it = func; 4552 return func->impl; 4553 } 4554 } 4555 4556 return NULL; 4557} 4558 4559#define nir_foreach_function_with_impl(it, impl_it, shader) \ 4560 for (nir_function *it = nir_foreach_function_with_impl_first(shader); \ 4561 it != NULL; \ 4562 it = NULL) \ 4563 \ 4564 for (nir_function_impl *impl_it = it->impl; \ 4565 impl_it != NULL; \ 4566 impl_it = nir_foreach_function_with_impl_next(&it)) 4567 4568/* Equivalent to 4569 * 4570 * nir_foreach_function(func, shader) { 4571 * if (func->impl != NULL) { 4572 * ... 4573 * } 4574 * } 4575 * 4576 * Carefully written to ensure break/continue work in the user code. 4577 */ 4578 4579#define nir_foreach_function_impl(it, shader) \ 4580 nir_foreach_function_with_impl(_func_##it, it, shader) 4581 4582static inline nir_function_impl * 4583nir_shader_get_entrypoint(const nir_shader *shader) 4584{ 4585 nir_function *func = NULL; 4586 4587 nir_foreach_function(function, shader) { 4588 assert(func == NULL); 4589 if (function->is_entrypoint) { 4590 func = function; 4591#ifndef NDEBUG 4592 break; 4593#endif 4594 } 4595 } 4596 4597 if (!func) 4598 return NULL; 4599 4600 assert(func->num_params == 0); 4601 assert(func->impl); 4602 return func->impl; 4603} 4604 4605static inline nir_function * 4606nir_shader_get_function_for_name(const nir_shader *shader, const char *name) 4607{ 4608 nir_foreach_function(func, shader) { 4609 if (func->name && strcmp(func->name, name) == 0) 4610 return func; 4611 } 4612 4613 return NULL; 4614} 4615 4616/* 4617 * After all functions are forcibly inlined, these passes remove redundant 4618 * functions from a shader and library respectively. 4619 */ 4620void nir_remove_non_entrypoints(nir_shader *shader); 4621void nir_remove_non_exported(nir_shader *shader); 4622void nir_remove_entrypoints(nir_shader *shader); 4623void nir_fixup_is_exported(nir_shader *shader); 4624 4625nir_shader *nir_shader_create(void *mem_ctx, 4626 gl_shader_stage stage, 4627 const nir_shader_compiler_options *options, 4628 shader_info *si); 4629 4630/** Adds a variable to the appropriate list in nir_shader */ 4631void nir_shader_add_variable(nir_shader *shader, nir_variable *var); 4632 4633static inline void 4634nir_function_impl_add_variable(nir_function_impl *impl, nir_variable *var) 4635{ 4636 assert(var->data.mode == nir_var_function_temp); 4637 exec_list_push_tail(&impl->locals, &var->node); 4638} 4639 4640/** creates a variable, sets a few defaults, and adds it to the list */ 4641nir_variable *nir_variable_create(nir_shader *shader, 4642 nir_variable_mode mode, 4643 const struct glsl_type *type, 4644 const char *name); 4645/** creates a local variable and adds it to the list */ 4646nir_variable *nir_local_variable_create(nir_function_impl *impl, 4647 const struct glsl_type *type, 4648 const char *name); 4649 4650/** Creates a uniform builtin state variable. */ 4651nir_variable * 4652nir_state_variable_create(nir_shader *shader, 4653 const struct glsl_type *type, 4654 const char *name, 4655 const gl_state_index16 tokens[STATE_LENGTH]); 4656 4657/* Gets the variable for the given mode and location, creating it (with the given 4658 * type) if necessary. 4659 */ 4660nir_variable * 4661nir_get_variable_with_location(nir_shader *shader, nir_variable_mode mode, int location, 4662 const struct glsl_type *type); 4663 4664/* Creates a variable for the given mode and location. 4665 */ 4666nir_variable * 4667nir_create_variable_with_location(nir_shader *shader, nir_variable_mode mode, int location, 4668 const struct glsl_type *type); 4669 4670nir_variable *nir_find_variable_with_location(nir_shader *shader, 4671 nir_variable_mode mode, 4672 unsigned location); 4673 4674nir_variable *nir_find_variable_with_driver_location(nir_shader *shader, 4675 nir_variable_mode mode, 4676 unsigned location); 4677 4678nir_variable *nir_find_state_variable(nir_shader *s, 4679 gl_state_index16 tokens[STATE_LENGTH]); 4680 4681nir_variable *nir_find_sampler_variable_with_tex_index(nir_shader *shader, 4682 unsigned texture_index); 4683 4684void nir_sort_variables_with_modes(nir_shader *shader, 4685 int (*compar)(const nir_variable *, 4686 const nir_variable *), 4687 nir_variable_mode modes); 4688 4689/** creates a function and adds it to the shader's list of functions */ 4690nir_function *nir_function_create(nir_shader *shader, const char *name); 4691 4692static inline void 4693nir_function_set_impl(nir_function *func, nir_function_impl *impl) 4694{ 4695 func->impl = impl; 4696 impl->function = func; 4697} 4698 4699nir_function_impl *nir_function_impl_create(nir_function *func); 4700/** creates a function_impl that isn't tied to any particular function */ 4701nir_function_impl *nir_function_impl_create_bare(nir_shader *shader); 4702 4703nir_block *nir_block_create(nir_shader *shader); 4704nir_if *nir_if_create(nir_shader *shader); 4705nir_loop *nir_loop_create(nir_shader *shader); 4706 4707nir_function_impl *nir_cf_node_get_function(nir_cf_node *node); 4708 4709/** requests that the given pieces of metadata be generated */ 4710void nir_metadata_require(nir_function_impl *impl, nir_metadata required, ...); 4711/** dirties all but the preserved metadata */ 4712void nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved); 4713/** Preserves all metadata for the given shader */ 4714void nir_shader_preserve_all_metadata(nir_shader *shader); 4715 4716/** creates an instruction with default swizzle/writemask/etc. with NULL registers */ 4717nir_alu_instr *nir_alu_instr_create(nir_shader *shader, nir_op op); 4718 4719nir_deref_instr *nir_deref_instr_create(nir_shader *shader, 4720 nir_deref_type deref_type); 4721 4722nir_jump_instr *nir_jump_instr_create(nir_shader *shader, nir_jump_type type); 4723 4724nir_load_const_instr *nir_load_const_instr_create(nir_shader *shader, 4725 unsigned num_components, 4726 unsigned bit_size); 4727 4728nir_intrinsic_instr *nir_intrinsic_instr_create(nir_shader *shader, 4729 nir_intrinsic_op op); 4730 4731nir_call_instr *nir_call_instr_create(nir_shader *shader, 4732 nir_function *callee); 4733 4734/** Creates a NIR texture instruction */ 4735nir_tex_instr *nir_tex_instr_create(nir_shader *shader, unsigned num_srcs); 4736 4737nir_phi_instr *nir_phi_instr_create(nir_shader *shader); 4738nir_phi_src *nir_phi_instr_add_src(nir_phi_instr *instr, 4739 nir_block *pred, nir_def *src); 4740 4741nir_parallel_copy_instr *nir_parallel_copy_instr_create(nir_shader *shader); 4742 4743nir_debug_info_instr *nir_debug_info_instr_create(nir_shader *shader, 4744 nir_debug_info_type type, 4745 uint32_t string_length); 4746 4747nir_undef_instr *nir_undef_instr_create(nir_shader *shader, 4748 unsigned num_components, 4749 unsigned bit_size); 4750 4751nir_const_value nir_alu_binop_identity(nir_op binop, unsigned bit_size); 4752 4753/** 4754 * NIR Cursors and Instruction Insertion API 4755 * @{ 4756 * 4757 * A tiny struct representing a point to insert/extract instructions or 4758 * control flow nodes. Helps reduce the combinatorial explosion of possible 4759 * points to insert/extract. 4760 * 4761 * \sa nir_control_flow.h 4762 */ 4763typedef enum { 4764 nir_cursor_before_block, 4765 nir_cursor_after_block, 4766 nir_cursor_before_instr, 4767 nir_cursor_after_instr, 4768} nir_cursor_option; 4769 4770typedef struct { 4771 nir_cursor_option option; 4772 union { 4773 nir_block *block; 4774 nir_instr *instr; 4775 }; 4776} nir_cursor; 4777 4778static inline nir_block * 4779nir_cursor_current_block(nir_cursor cursor) 4780{ 4781 if (cursor.option == nir_cursor_before_instr || 4782 cursor.option == nir_cursor_after_instr) { 4783 return cursor.instr->block; 4784 } else { 4785 return cursor.block; 4786 } 4787} 4788 4789bool nir_cursors_equal(nir_cursor a, nir_cursor b); 4790 4791static inline nir_cursor 4792nir_before_block(nir_block *block) 4793{ 4794 nir_cursor cursor; 4795 cursor.option = nir_cursor_before_block; 4796 cursor.block = block; 4797 return cursor; 4798} 4799 4800static inline nir_cursor 4801nir_after_block(nir_block *block) 4802{ 4803 nir_cursor cursor; 4804 cursor.option = nir_cursor_after_block; 4805 cursor.block = block; 4806 return cursor; 4807} 4808 4809static inline nir_cursor 4810nir_before_instr(nir_instr *instr) 4811{ 4812 nir_cursor cursor; 4813 cursor.option = nir_cursor_before_instr; 4814 cursor.instr = instr; 4815 return cursor; 4816} 4817 4818static inline nir_cursor 4819nir_after_instr(nir_instr *instr) 4820{ 4821 nir_cursor cursor; 4822 cursor.option = nir_cursor_after_instr; 4823 cursor.instr = instr; 4824 return cursor; 4825} 4826 4827static inline nir_cursor 4828nir_before_block_after_phis(nir_block *block) 4829{ 4830 nir_phi_instr *last_phi = nir_block_last_phi_instr(block); 4831 if (last_phi) 4832 return nir_after_instr(&last_phi->instr); 4833 else 4834 return nir_before_block(block); 4835} 4836 4837static inline nir_cursor 4838nir_after_block_before_jump(nir_block *block) 4839{ 4840 nir_instr *last_instr = nir_block_last_instr(block); 4841 if (last_instr && last_instr->type == nir_instr_type_jump) { 4842 return nir_before_instr(last_instr); 4843 } else { 4844 return nir_after_block(block); 4845 } 4846} 4847 4848static inline nir_cursor 4849nir_before_src(nir_src *src) 4850{ 4851 if (nir_src_is_if(src)) { 4852 nir_block *prev_block = 4853 nir_cf_node_as_block(nir_cf_node_prev(&nir_src_parent_if(src)->cf_node)); 4854 return nir_after_block(prev_block); 4855 } else if (nir_src_parent_instr(src)->type == nir_instr_type_phi) { 4856#ifndef NDEBUG 4857 nir_phi_instr *cond_phi = nir_instr_as_phi(nir_src_parent_instr(src)); 4858 bool found = false; 4859 nir_foreach_phi_src(phi_src, cond_phi) { 4860 if (phi_src->src.ssa == src->ssa) { 4861 found = true; 4862 break; 4863 } 4864 } 4865 assert(found); 4866#endif 4867 /* The list_entry() macro is a generic container-of macro, it just happens 4868 * to have a more specific name. 4869 */ 4870 nir_phi_src *phi_src = list_entry(src, nir_phi_src, src); 4871 return nir_after_block_before_jump(phi_src->pred); 4872 } else { 4873 return nir_before_instr(nir_src_parent_instr(src)); 4874 } 4875} 4876 4877static inline nir_cursor 4878nir_before_cf_node(nir_cf_node *node) 4879{ 4880 if (node->type == nir_cf_node_block) 4881 return nir_before_block(nir_cf_node_as_block(node)); 4882 4883 return nir_after_block(nir_cf_node_as_block(nir_cf_node_prev(node))); 4884} 4885 4886static inline nir_cursor 4887nir_after_cf_node(nir_cf_node *node) 4888{ 4889 if (node->type == nir_cf_node_block) 4890 return nir_after_block(nir_cf_node_as_block(node)); 4891 4892 return nir_before_block(nir_cf_node_as_block(nir_cf_node_next(node))); 4893} 4894 4895static inline nir_cursor 4896nir_after_phis(nir_block *block) 4897{ 4898 nir_foreach_instr(instr, block) { 4899 if (instr->type != nir_instr_type_phi) 4900 return nir_before_instr(instr); 4901 } 4902 return nir_after_block(block); 4903} 4904 4905static inline nir_cursor 4906nir_after_instr_and_phis(nir_instr *instr) 4907{ 4908 if (instr->type == nir_instr_type_phi) 4909 return nir_after_phis(instr->block); 4910 else 4911 return nir_after_instr(instr); 4912} 4913 4914static inline nir_cursor 4915nir_after_cf_node_and_phis(nir_cf_node *node) 4916{ 4917 if (node->type == nir_cf_node_block) 4918 return nir_after_block(nir_cf_node_as_block(node)); 4919 4920 nir_block *block = nir_cf_node_as_block(nir_cf_node_next(node)); 4921 4922 return nir_after_phis(block); 4923} 4924 4925static inline nir_cursor 4926nir_before_cf_list(struct exec_list *cf_list) 4927{ 4928 nir_cf_node *first_node = exec_node_data(nir_cf_node, 4929 exec_list_get_head(cf_list), node); 4930 return nir_before_cf_node(first_node); 4931} 4932 4933static inline nir_cursor 4934nir_after_cf_list(struct exec_list *cf_list) 4935{ 4936 nir_cf_node *last_node = exec_node_data(nir_cf_node, 4937 exec_list_get_tail(cf_list), node); 4938 return nir_after_cf_node(last_node); 4939} 4940 4941static inline nir_cursor 4942nir_before_impl(nir_function_impl *impl) 4943{ 4944 return nir_before_cf_list(&impl->body); 4945} 4946 4947static inline nir_cursor 4948nir_after_impl(nir_function_impl *impl) 4949{ 4950 return nir_after_cf_list(&impl->body); 4951} 4952 4953/** 4954 * Insert a NIR instruction at the given cursor. 4955 * 4956 * Note: This does not update the cursor. 4957 */ 4958void nir_instr_insert(nir_cursor cursor, nir_instr *instr); 4959 4960bool nir_instr_move(nir_cursor cursor, nir_instr *instr); 4961 4962static inline void 4963nir_instr_insert_before(nir_instr *instr, nir_instr *before) 4964{ 4965 nir_instr_insert(nir_before_instr(instr), before); 4966} 4967 4968static inline void 4969nir_instr_insert_after(nir_instr *instr, nir_instr *after) 4970{ 4971 nir_instr_insert(nir_after_instr(instr), after); 4972} 4973 4974static inline void 4975nir_instr_insert_before_block(nir_block *block, nir_instr *before) 4976{ 4977 nir_instr_insert(nir_before_block(block), before); 4978} 4979 4980static inline void 4981nir_instr_insert_after_block(nir_block *block, nir_instr *after) 4982{ 4983 nir_instr_insert(nir_after_block(block), after); 4984} 4985 4986static inline void 4987nir_instr_insert_before_cf(nir_cf_node *node, nir_instr *before) 4988{ 4989 nir_instr_insert(nir_before_cf_node(node), before); 4990} 4991 4992static inline void 4993nir_instr_insert_after_cf(nir_cf_node *node, nir_instr *after) 4994{ 4995 nir_instr_insert(nir_after_cf_node(node), after); 4996} 4997 4998static inline void 4999nir_instr_insert_before_cf_list(struct exec_list *list, nir_instr *before) 5000{ 5001 nir_instr_insert(nir_before_cf_list(list), before); 5002} 5003 5004static inline void 5005nir_instr_insert_after_cf_list(struct exec_list *list, nir_instr *after) 5006{ 5007 nir_instr_insert(nir_after_cf_list(list), after); 5008} 5009 5010void nir_instr_remove_v(nir_instr *instr); 5011void nir_instr_free(nir_instr *instr); 5012void nir_instr_free_list(struct exec_list *list); 5013 5014static inline nir_cursor 5015nir_instr_remove(nir_instr *instr) 5016{ 5017 nir_cursor cursor; 5018 nir_instr *prev = nir_instr_prev(instr); 5019 if (prev) { 5020 cursor = nir_after_instr(prev); 5021 } else { 5022 cursor = nir_before_block(instr->block); 5023 } 5024 nir_instr_remove_v(instr); 5025 return cursor; 5026} 5027 5028nir_cursor nir_instr_free_and_dce(nir_instr *instr); 5029 5030/** @} */ 5031 5032nir_def *nir_instr_def(nir_instr *instr); 5033 5034typedef bool (*nir_foreach_def_cb)(nir_def *def, void *state); 5035typedef bool (*nir_foreach_src_cb)(nir_src *src, void *state); 5036static inline bool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state); 5037bool nir_foreach_phi_src_leaving_block(nir_block *instr, 5038 nir_foreach_src_cb cb, 5039 void *state); 5040 5041nir_const_value *nir_src_as_const_value(nir_src src); 5042 5043#define NIR_SRC_AS_(name, c_type, type_enum, cast_macro) \ 5044 static inline c_type * \ 5045 nir_src_as_##name(nir_src src) \ 5046 { \ 5047 return src.ssa->parent_instr->type == type_enum \ 5048 ? cast_macro(src.ssa->parent_instr) \ 5049 : NULL; \ 5050 } 5051 5052NIR_SRC_AS_(alu_instr, nir_alu_instr, nir_instr_type_alu, nir_instr_as_alu) 5053NIR_SRC_AS_(intrinsic, nir_intrinsic_instr, 5054 nir_instr_type_intrinsic, nir_instr_as_intrinsic) 5055NIR_SRC_AS_(deref, nir_deref_instr, nir_instr_type_deref, nir_instr_as_deref) 5056NIR_SRC_AS_(debug_info, nir_debug_info_instr, nir_instr_type_debug_info, nir_instr_as_debug_info) 5057 5058const char *nir_src_as_string(nir_src src); 5059 5060bool nir_src_is_always_uniform(nir_src src); 5061bool nir_srcs_equal(nir_src src1, nir_src src2); 5062bool nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2); 5063nir_block *nir_src_get_block(nir_src *src); 5064 5065static inline void 5066nir_src_rewrite(nir_src *src, nir_def *new_ssa) 5067{ 5068 assert(src->ssa); 5069 assert(nir_src_is_if(src) ? (nir_src_parent_if(src) != NULL) : (nir_src_parent_instr(src) != NULL)); 5070 list_del(&src->use_link); 5071 src->ssa = new_ssa; 5072 list_addtail(&src->use_link, &new_ssa->uses); 5073} 5074 5075/** Initialize a nir_src 5076 * 5077 * This is almost never the helper you want to use. This helper assumes that 5078 * the source is uninitialized garbage and blasts over it without doing any 5079 * tear-down the existing source, including removing it from uses lists. 5080 * Using this helper on a source that currently exists in any uses list will 5081 * result in linked list corruption. It also assumes that the instruction is 5082 * currently live in the IR and adds the source to the uses list for the given 5083 * nir_def as part of setup. 5084 * 5085 * This is pretty much only useful for adding sources to extant instructions 5086 * or manipulating parallel copy instructions as part of out-of-SSA. 5087 * 5088 * When in doubt, use nir_src_rewrite() instead. 5089 */ 5090void nir_instr_init_src(nir_instr *instr, nir_src *src, nir_def *def); 5091 5092/** Clear a nir_src 5093 * 5094 * This helper clears a nir_src by removing it from any uses lists and 5095 * resetting its contents to NIR_SRC_INIT. This is typically used as a 5096 * precursor to removing the source from the instruction by adjusting a 5097 * num_srcs parameter somewhere or overwriting it with nir_instr_move_src(). 5098 */ 5099void nir_instr_clear_src(nir_instr *instr, nir_src *src); 5100 5101void nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src); 5102 5103/** Returns true if first comes before second in a block. */ 5104bool nir_instr_is_before(nir_instr *first, nir_instr *second); 5105 5106void nir_def_init(nir_instr *instr, nir_def *def, 5107 unsigned num_components, unsigned bit_size); 5108static inline void 5109nir_def_init_for_type(nir_instr *instr, nir_def *def, 5110 const struct glsl_type *type) 5111{ 5112 assert(glsl_type_is_vector_or_scalar(type)); 5113 nir_def_init(instr, def, glsl_get_components(type), 5114 glsl_get_bit_size(type)); 5115} 5116void nir_def_rewrite_uses(nir_def *def, nir_def *new_ssa); 5117void nir_def_rewrite_uses_src(nir_def *def, nir_src new_src); 5118void nir_def_rewrite_uses_after(nir_def *def, nir_def *new_ssa, 5119 nir_instr *after_me); 5120 5121static inline void 5122nir_def_replace(nir_def *def, nir_def *new_ssa) 5123{ 5124 nir_def_rewrite_uses(def, new_ssa); 5125 nir_instr_remove(def->parent_instr); 5126} 5127 5128nir_component_mask_t nir_src_components_read(const nir_src *src); 5129nir_component_mask_t nir_def_components_read(const nir_def *def); 5130bool nir_def_all_uses_are_fsat(const nir_def *def); 5131bool nir_def_all_uses_ignore_sign_bit(const nir_def *def); 5132 5133static inline int 5134nir_def_first_component_read(nir_def *def) 5135{ 5136 return (int)ffs(nir_def_components_read(def)) - 1; 5137} 5138 5139static inline int 5140nir_def_last_component_read(nir_def *def) 5141{ 5142 return (int)util_last_bit(nir_def_components_read(def)) - 1; 5143} 5144 5145static inline bool 5146nir_def_is_unused(nir_def *ssa) 5147{ 5148 return list_is_empty(&ssa->uses); 5149} 5150 5151/** Sorts unstructured blocks 5152 * 5153 * NIR requires that unstructured blocks be sorted in reverse post 5154 * depth-first-search order. This is the standard ordering used in the 5155 * compiler literature which guarantees dominance. In particular, reverse 5156 * post-DFS order guarantees that dominators occur in the list before the 5157 * blocks they dominate. 5158 * 5159 * NOTE: This function also implicitly deletes any unreachable blocks. 5160 */ 5161void nir_sort_unstructured_blocks(nir_function_impl *impl); 5162 5163/** Returns the next block 5164 * 5165 * For structured control-flow, this follows the same order as 5166 * nir_block_cf_tree_next(). For unstructured control-flow the blocks are in 5167 * reverse post-DFS order. (See nir_sort_unstructured_blocks() above.) 5168 */ 5169nir_block *nir_block_unstructured_next(nir_block *block); 5170nir_block *nir_unstructured_start_block(nir_function_impl *impl); 5171 5172#define nir_foreach_block_unstructured(block, impl) \ 5173 for (nir_block *block = nir_unstructured_start_block(impl); block != NULL; \ 5174 block = nir_block_unstructured_next(block)) 5175 5176#define nir_foreach_block_unstructured_safe(block, impl) \ 5177 for (nir_block *block = nir_unstructured_start_block(impl), \ 5178 *next = nir_block_unstructured_next(block); \ 5179 block != NULL; \ 5180 block = next, next = nir_block_unstructured_next(block)) 5181 5182/* 5183 * finds the next basic block in source-code order, returns NULL if there is 5184 * none 5185 */ 5186 5187nir_block *nir_block_cf_tree_next(nir_block *block); 5188 5189/* Performs the opposite of nir_block_cf_tree_next() */ 5190 5191nir_block *nir_block_cf_tree_prev(nir_block *block); 5192 5193/* Gets the first block in a CF node in source-code order */ 5194 5195nir_block *nir_cf_node_cf_tree_first(nir_cf_node *node); 5196 5197/* Gets the last block in a CF node in source-code order */ 5198 5199nir_block *nir_cf_node_cf_tree_last(nir_cf_node *node); 5200 5201/* Gets the next block after a CF node in source-code order */ 5202 5203nir_block *nir_cf_node_cf_tree_next(nir_cf_node *node); 5204 5205/* Gets the block before a CF node in source-code order */ 5206 5207nir_block *nir_cf_node_cf_tree_prev(nir_cf_node *node); 5208 5209/* Macros for loops that visit blocks in source-code order */ 5210 5211#define nir_foreach_block(block, impl) \ 5212 for (nir_block *block = nir_start_block(impl); block != NULL; \ 5213 block = nir_block_cf_tree_next(block)) 5214 5215#define nir_foreach_block_safe(block, impl) \ 5216 for (nir_block *block = nir_start_block(impl), \ 5217 *next = nir_block_cf_tree_next(block); \ 5218 block != NULL; \ 5219 block = next, next = nir_block_cf_tree_next(block)) 5220 5221#define nir_foreach_block_reverse(block, impl) \ 5222 for (nir_block *block = nir_impl_last_block(impl); block != NULL; \ 5223 block = nir_block_cf_tree_prev(block)) 5224 5225#define nir_foreach_block_reverse_safe(block, impl) \ 5226 for (nir_block *block = nir_impl_last_block(impl), \ 5227 *prev = nir_block_cf_tree_prev(block); \ 5228 block != NULL; \ 5229 block = prev, prev = nir_block_cf_tree_prev(block)) 5230 5231#define nir_foreach_block_in_cf_node(block, node) \ 5232 for (nir_block *block = nir_cf_node_cf_tree_first(node); \ 5233 block != nir_cf_node_cf_tree_next(node); \ 5234 block = nir_block_cf_tree_next(block)) 5235 5236#define nir_foreach_block_in_cf_node_safe(block, node) \ 5237 for (nir_block *block = nir_cf_node_cf_tree_first(node), \ 5238 *next = nir_block_cf_tree_next(block); \ 5239 block != nir_cf_node_cf_tree_next(node); \ 5240 block = next, next = nir_block_cf_tree_next(block)) 5241 5242#define nir_foreach_block_in_cf_node_reverse(block, node) \ 5243 for (nir_block *block = nir_cf_node_cf_tree_last(node); \ 5244 block != nir_cf_node_cf_tree_prev(node); \ 5245 block = nir_block_cf_tree_prev(block)) 5246 5247#define nir_foreach_block_in_cf_node_reverse_safe(block, node) \ 5248 for (nir_block *block = nir_cf_node_cf_tree_last(node), \ 5249 *prev = nir_block_cf_tree_prev(block); \ 5250 block != nir_cf_node_cf_tree_prev(node); \ 5251 block = prev, prev = nir_block_cf_tree_prev(block)) 5252 5253/* If the following CF node is an if, this function returns that if. 5254 * Otherwise, it returns NULL. 5255 */ 5256nir_if *nir_block_get_following_if(nir_block *block); 5257 5258nir_loop *nir_block_get_following_loop(nir_block *block); 5259 5260nir_block **nir_block_get_predecessors_sorted(const nir_block *block, void *mem_ctx); 5261 5262void nir_index_ssa_defs(nir_function_impl *impl); 5263unsigned nir_index_instrs(nir_function_impl *impl); 5264 5265void nir_index_blocks(nir_function_impl *impl); 5266 5267void nir_shader_clear_pass_flags(nir_shader *shader); 5268 5269unsigned nir_shader_index_vars(nir_shader *shader, nir_variable_mode modes); 5270unsigned nir_function_impl_index_vars(nir_function_impl *impl); 5271 5272void nir_print_shader(nir_shader *shader, FILE *fp); 5273void nir_print_shader_annotated(nir_shader *shader, FILE *fp, struct hash_table *errors); 5274void nir_print_instr(const nir_instr *instr, FILE *fp); 5275void nir_print_deref(const nir_deref_instr *deref, FILE *fp); 5276void nir_log_shader_annotated_tagged(enum mesa_log_level level, const char *tag, nir_shader *shader, struct hash_table *annotations); 5277#define nir_log_shadere(s) nir_log_shader_annotated_tagged(MESA_LOG_ERROR, (MESA_LOG_TAG), (s), NULL) 5278#define nir_log_shaderw(s) nir_log_shader_annotated_tagged(MESA_LOG_WARN, (MESA_LOG_TAG), (s), NULL) 5279#define nir_log_shaderi(s) nir_log_shader_annotated_tagged(MESA_LOG_INFO, (MESA_LOG_TAG), (s), NULL) 5280#define nir_log_shader_annotated(s, annotations) nir_log_shader_annotated_tagged(MESA_LOG_ERROR, (MESA_LOG_TAG), (s), annotations) 5281 5282char *nir_shader_as_str(nir_shader *nir, void *mem_ctx); 5283char *nir_shader_as_str_annotated(nir_shader *nir, struct hash_table *annotations, void *mem_ctx); 5284char *nir_instr_as_str(const nir_instr *instr, void *mem_ctx); 5285 5286/** Adds debug information to the shader. The line numbers point to 5287 * the corresponding lines in the printed NIR, starting first_line; 5288 */ 5289char *nir_shader_gather_debug_info(nir_shader *shader, const char *filename, uint32_t first_line); 5290 5291/** Shallow clone of a single instruction. */ 5292nir_instr *nir_instr_clone(nir_shader *s, const nir_instr *orig); 5293 5294/** Clone a single instruction, including a remap table to rewrite sources. */ 5295nir_instr *nir_instr_clone_deep(nir_shader *s, const nir_instr *orig, 5296 struct hash_table *remap_table); 5297 5298/** Shallow clone of a single ALU instruction. */ 5299nir_alu_instr *nir_alu_instr_clone(nir_shader *s, const nir_alu_instr *orig); 5300 5301nir_shader *nir_shader_clone(void *mem_ctx, const nir_shader *s); 5302nir_function *nir_function_clone(nir_shader *ns, const nir_function *fxn); 5303nir_function_impl *nir_function_impl_clone(nir_shader *shader, 5304 const nir_function_impl *fi); 5305nir_function_impl * 5306nir_function_impl_clone_remap_globals(nir_shader *shader, 5307 const nir_function_impl *fi, 5308 struct hash_table *remap_table); 5309nir_constant *nir_constant_clone(const nir_constant *c, nir_variable *var); 5310nir_variable *nir_variable_clone(const nir_variable *c, nir_shader *shader); 5311 5312void nir_shader_replace(nir_shader *dest, nir_shader *src); 5313 5314void nir_shader_serialize_deserialize(nir_shader *s); 5315 5316#ifndef NDEBUG 5317void nir_validate_shader(nir_shader *shader, const char *when); 5318void nir_validate_ssa_dominance(nir_shader *shader, const char *when); 5319void nir_metadata_set_validation_flag(nir_shader *shader); 5320void nir_metadata_check_validation_flag(nir_shader *shader); 5321 5322static inline bool 5323should_skip_nir(const char *name) 5324{ 5325 static const char *list = NULL; 5326 if (!list) { 5327 /* Comma separated list of names to skip. */ 5328 list = getenv("NIR_SKIP"); 5329 if (!list) 5330 list = ""; 5331 } 5332 5333 if (!list[0]) 5334 return false; 5335 5336 return comma_separated_list_contains(list, name); 5337} 5338 5339static inline bool 5340should_print_nir(nir_shader *shader) 5341{ 5342 if ((shader->info.internal && !NIR_DEBUG(PRINT_INTERNAL)) || 5343 shader->info.stage < 0 || 5344 shader->info.stage > MESA_SHADER_KERNEL) 5345 return false; 5346 5347 return unlikely(nir_debug_print_shader[shader->info.stage]); 5348} 5349#else 5350static inline void 5351nir_validate_shader(nir_shader *shader, const char *when) 5352{ 5353 (void)shader; 5354 (void)when; 5355} 5356static inline void 5357nir_validate_ssa_dominance(nir_shader *shader, const char *when) 5358{ 5359 (void)shader; 5360 (void)when; 5361} 5362static inline void 5363nir_metadata_set_validation_flag(nir_shader *shader) 5364{ 5365 (void)shader; 5366} 5367static inline void 5368nir_metadata_check_validation_flag(nir_shader *shader) 5369{ 5370 (void)shader; 5371} 5372static inline bool 5373should_skip_nir(UNUSED const char *pass_name) 5374{ 5375 return false; 5376} 5377static inline bool 5378should_print_nir(UNUSED nir_shader *shader) 5379{ 5380 return false; 5381} 5382#endif /* NDEBUG */ 5383 5384#define _PASS(pass, nir, do_pass) \ 5385 do { \ 5386 if (should_skip_nir(#pass)) { \ 5387 printf("skipping %s\n", #pass); \ 5388 break; \ 5389 } \ 5390 do_pass if (NIR_DEBUG(CLONE)) \ 5391 { \ 5392 nir_shader *_clone = nir_shader_clone(ralloc_parent(nir), nir);\ 5393 nir_shader_replace(nir, _clone); \ 5394 } \ 5395 if (NIR_DEBUG(SERIALIZE)) { \ 5396 nir_shader_serialize_deserialize(nir); \ 5397 } \ 5398 } while (0) 5399 5400#define NIR_PASS(progress, nir, pass, ...) _PASS(pass, nir, { \ 5401 nir_metadata_set_validation_flag(nir); \ 5402 if (should_print_nir(nir)) \ 5403 printf("%s\n", #pass); \ 5404 if (pass(nir, ##__VA_ARGS__)) { \ 5405 nir_validate_shader(nir, "after " #pass " in " __FILE__); \ 5406 UNUSED bool _; \ 5407 progress = true; \ 5408 if (should_print_nir(nir)) \ 5409 nir_print_shader(nir, stdout); \ 5410 nir_metadata_check_validation_flag(nir); \ 5411 } \ 5412}) 5413 5414#define NIR_PASS_V(nir, pass, ...) _PASS(pass, nir, { \ 5415 if (should_print_nir(nir)) \ 5416 printf("%s\n", #pass); \ 5417 pass(nir, ##__VA_ARGS__); \ 5418 nir_validate_shader(nir, "after " #pass " in " __FILE__); \ 5419 if (should_print_nir(nir)) \ 5420 nir_print_shader(nir, stdout); \ 5421}) 5422 5423#define _NIR_LOOP_PASS(progress, idempotent, skip, nir, pass, ...) \ 5424do { \ 5425 bool nir_loop_pass_progress = false; \ 5426 if (!_mesa_set_search(skip, (void (*)())&pass)) \ 5427 NIR_PASS(nir_loop_pass_progress, nir, pass, ##__VA_ARGS__); \ 5428 if (nir_loop_pass_progress) \ 5429 _mesa_set_clear(skip, NULL); \ 5430 if (idempotent || !nir_loop_pass_progress) \ 5431 _mesa_set_add(skip, (void (*)())&pass); \ 5432 UNUSED bool _ = false; \ 5433 progress |= nir_loop_pass_progress; \ 5434} while (0) 5435 5436/* Helper to skip a pass if no different passes have made progress since it was 5437 * previously run. Note that two passes are considered the same if they have 5438 * the same function pointer, even if they used different options. 5439 * 5440 * The usage of this is mostly identical to NIR_PASS. "skip" is a "struct set *" 5441 * (created by _mesa_pointer_set_create) which the macro uses to keep track of 5442 * already run passes. 5443 * 5444 * Example: 5445 * bool progress = true; 5446 * struct set *skip = _mesa_pointer_set_create(NULL); 5447 * while (progress) { 5448 * progress = false; 5449 * NIR_LOOP_PASS(progress, skip, nir, pass1); 5450 * NIR_LOOP_PASS_NOT_IDEMPOTENT(progress, skip, nir, nir_opt_algebraic); 5451 * NIR_LOOP_PASS(progress, skip, nir, pass2); 5452 * ... 5453 * } 5454 * _mesa_set_destroy(skip, NULL); 5455 * 5456 * You shouldn't mix usage of this with the NIR_PASS set of helpers, without 5457 * using a new "skip" in-between. 5458 */ 5459#define NIR_LOOP_PASS(progress, skip, nir, pass, ...) \ 5460 _NIR_LOOP_PASS(progress, true, skip, nir, pass, ##__VA_ARGS__) 5461 5462/* Like NIR_LOOP_PASS, but use this for passes which may make further progress 5463 * when repeated. 5464 */ 5465#define NIR_LOOP_PASS_NOT_IDEMPOTENT(progress, skip, nir, pass, ...) \ 5466 _NIR_LOOP_PASS(progress, false, skip, nir, pass, ##__VA_ARGS__) 5467 5468#define NIR_SKIP(name) should_skip_nir(#name) 5469 5470/** An instruction filtering callback with writemask 5471 * 5472 * Returns true if the instruction should be processed with the associated 5473 * writemask and false otherwise. 5474 */ 5475typedef bool (*nir_instr_writemask_filter_cb)(const nir_instr *, 5476 unsigned writemask, const void *); 5477 5478/** A simple instruction lowering callback 5479 * 5480 * Many instruction lowering passes can be written as a simple function which 5481 * takes an instruction as its input and returns a sequence of instructions 5482 * that implement the consumed instruction. This function type represents 5483 * such a lowering function. When called, a function with this prototype 5484 * should either return NULL indicating that no lowering needs to be done or 5485 * emit a sequence of instructions using the provided builder (whose cursor 5486 * will already be placed after the instruction to be lowered) and return the 5487 * resulting nir_def. 5488 */ 5489typedef nir_def *(*nir_lower_instr_cb)(struct nir_builder *, 5490 nir_instr *, void *); 5491 5492/** 5493 * Special return value for nir_lower_instr_cb when some progress occurred 5494 * (like changing an input to the instr) that didn't result in a replacement 5495 * SSA def being generated. 5496 */ 5497#define NIR_LOWER_INSTR_PROGRESS ((nir_def *)(uintptr_t)1) 5498 5499/** 5500 * Special return value for nir_lower_instr_cb when some progress occurred 5501 * that should remove the current instruction that doesn't create an output 5502 * (like a store) 5503 */ 5504 5505#define NIR_LOWER_INSTR_PROGRESS_REPLACE ((nir_def *)(uintptr_t)2) 5506 5507/** Iterate over all the instructions in a nir_function_impl and lower them 5508 * using the provided callbacks 5509 * 5510 * This function implements the guts of a standard lowering pass for you. It 5511 * iterates over all of the instructions in a nir_function_impl and calls the 5512 * filter callback on each one. If the filter callback returns true, it then 5513 * calls the lowering call back on the instruction. (Splitting it this way 5514 * allows us to avoid some save/restore work for instructions we know won't be 5515 * lowered.) If the instruction is dead after the lowering is complete, it 5516 * will be removed. If new instructions are added, the lowering callback will 5517 * also be called on them in case multiple lowerings are required. 5518 * 5519 * If the callback indicates that the original instruction is replaced (either 5520 * through a new SSA def or NIR_LOWER_INSTR_PROGRESS_REPLACE), then the 5521 * instruction is removed along with any now-dead SSA defs it used. 5522 * 5523 * The metadata for the nir_function_impl will also be updated. If any blocks 5524 * are added (they cannot be removed), dominance and block indices will be 5525 * invalidated. 5526 */ 5527bool nir_function_impl_lower_instructions(nir_function_impl *impl, 5528 nir_instr_filter_cb filter, 5529 nir_lower_instr_cb lower, 5530 void *cb_data); 5531bool nir_shader_lower_instructions(nir_shader *shader, 5532 nir_instr_filter_cb filter, 5533 nir_lower_instr_cb lower, 5534 void *cb_data); 5535 5536void nir_calc_dominance_impl(nir_function_impl *impl); 5537void nir_calc_dominance(nir_shader *shader); 5538 5539nir_block *nir_dominance_lca(nir_block *b1, nir_block *b2); 5540bool nir_block_dominates(nir_block *parent, nir_block *child); 5541bool nir_block_is_unreachable(nir_block *block); 5542 5543void nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp); 5544void nir_dump_dom_tree(nir_shader *shader, FILE *fp); 5545 5546void nir_dump_dom_frontier_impl(nir_function_impl *impl, FILE *fp); 5547void nir_dump_dom_frontier(nir_shader *shader, FILE *fp); 5548 5549void nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp); 5550void nir_dump_cfg(nir_shader *shader, FILE *fp); 5551 5552void nir_gs_count_vertices_and_primitives(const nir_shader *shader, 5553 int *out_vtxcnt, 5554 int *out_prmcnt, 5555 int *out_decomposed_prmcnt, 5556 unsigned num_streams); 5557 5558typedef enum { 5559 nir_group_all, 5560 nir_group_same_resource_only, 5561} nir_load_grouping; 5562 5563void nir_group_loads(nir_shader *shader, nir_load_grouping grouping, 5564 unsigned max_distance); 5565 5566bool nir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes); 5567bool nir_split_array_vars(nir_shader *shader, nir_variable_mode modes); 5568bool nir_split_var_copies(nir_shader *shader); 5569bool nir_split_per_member_structs(nir_shader *shader); 5570bool nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes); 5571 5572bool nir_lower_returns_impl(nir_function_impl *impl); 5573bool nir_lower_returns(nir_shader *shader); 5574 5575void nir_inline_function_impl(struct nir_builder *b, 5576 const nir_function_impl *impl, 5577 nir_def **params, 5578 struct hash_table *shader_var_remap); 5579bool nir_inline_functions(nir_shader *shader); 5580void nir_cleanup_functions(nir_shader *shader); 5581bool nir_link_shader_functions(nir_shader *shader, 5582 const nir_shader *link_shader); 5583bool nir_lower_calls_to_builtins(nir_shader *s); 5584 5585void nir_find_inlinable_uniforms(nir_shader *shader); 5586void nir_inline_uniforms(nir_shader *shader, unsigned num_uniforms, 5587 const uint32_t *uniform_values, 5588 const uint16_t *uniform_dw_offsets); 5589bool nir_collect_src_uniforms(const nir_src *src, int component, 5590 uint32_t *uni_offsets, uint8_t *num_offsets, 5591 unsigned max_num_bo, unsigned max_offset); 5592void nir_add_inlinable_uniforms(const nir_src *cond, nir_loop_info *info, 5593 uint32_t *uni_offsets, uint8_t *num_offsets, 5594 unsigned max_num_bo, unsigned max_offset); 5595 5596bool nir_propagate_invariant(nir_shader *shader, bool invariant_prim); 5597 5598void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, nir_shader *shader); 5599void nir_lower_deref_copy_instr(struct nir_builder *b, 5600 nir_intrinsic_instr *copy); 5601bool nir_lower_var_copies(nir_shader *shader); 5602 5603bool nir_opt_memcpy(nir_shader *shader); 5604bool nir_lower_memcpy(nir_shader *shader); 5605 5606void nir_fixup_deref_modes(nir_shader *shader); 5607void nir_fixup_deref_types(nir_shader *shader); 5608 5609bool nir_lower_global_vars_to_local(nir_shader *shader); 5610void nir_lower_constant_to_temp(nir_shader *shader); 5611 5612typedef enum { 5613 nir_lower_direct_array_deref_of_vec_load = (1 << 0), 5614 nir_lower_indirect_array_deref_of_vec_load = (1 << 1), 5615 nir_lower_direct_array_deref_of_vec_store = (1 << 2), 5616 nir_lower_indirect_array_deref_of_vec_store = (1 << 3), 5617} nir_lower_array_deref_of_vec_options; 5618 5619bool nir_lower_array_deref_of_vec(nir_shader *shader, nir_variable_mode modes, 5620 bool (*filter)(nir_variable *), 5621 nir_lower_array_deref_of_vec_options options); 5622 5623bool nir_lower_indirect_derefs(nir_shader *shader, nir_variable_mode modes, 5624 uint32_t max_lower_array_len); 5625 5626bool nir_lower_indirect_var_derefs(nir_shader *shader, 5627 const struct set *vars); 5628 5629bool nir_lower_locals_to_regs(nir_shader *shader, uint8_t bool_bitsize); 5630 5631bool nir_lower_io_to_temporaries(nir_shader *shader, 5632 nir_function_impl *entrypoint, 5633 bool outputs, bool inputs); 5634 5635bool nir_lower_vars_to_scratch(nir_shader *shader, 5636 nir_variable_mode modes, 5637 int size_threshold, 5638 glsl_type_size_align_func variable_size_align, 5639 glsl_type_size_align_func scratch_layout_size_align); 5640 5641bool nir_lower_scratch_to_var(nir_shader *nir); 5642 5643void nir_lower_clip_halfz(nir_shader *shader); 5644 5645void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint); 5646 5647void nir_gather_types(nir_function_impl *impl, 5648 BITSET_WORD *float_types, 5649 BITSET_WORD *int_types); 5650 5651typedef struct { 5652 /* Whether all invocations write tess level outputs. 5653 * 5654 * This is useful when a pass wants to read tess level values at the end 5655 * of the shader. If this is true, the pass doesn't have to insert a barrier 5656 * and use output loads, it can just use the SSA defs that are being stored 5657 * (or phis thereof) to get the tess level output values. 5658 */ 5659 bool all_invocations_define_tess_levels; 5660 5661 /* Whether any of the outer tess level components is effectively 0, meaning 5662 * that the shader discards the patch. NaNs and negative values are included 5663 * in this. If the patch is discarded, inner tess levels have no effect. 5664 */ 5665 bool all_tess_levels_are_effectively_zero; 5666 5667 /* Whether all tess levels are effectively 1, meaning that the tessellator 5668 * behaves as if they were 1. There is a range of values that lead to that 5669 * behavior depending on the tessellation spacing. 5670 */ 5671 bool all_tess_levels_are_effectively_one; 5672 5673 /* Whether the shader uses a barrier synchronizing TCS output stores. 5674 * For example, passes that write an output at the beginning of the shader 5675 * and load it at the end can use this to determine whether they have to 5676 * insert a barrier or whether the shader already contains a barrier. 5677 */ 5678 bool always_executes_barrier; 5679 5680 /* Whether outer tess levels <= 0 are written anywhere in the shader. */ 5681 bool discards_patches; 5682} nir_tcs_info; 5683 5684void 5685nir_gather_tcs_info(const nir_shader *nir, nir_tcs_info *info, 5686 enum tess_primitive_mode prim, 5687 enum gl_tess_spacing spacing); 5688 5689void nir_assign_var_locations(nir_shader *shader, nir_variable_mode mode, 5690 unsigned *size, 5691 int (*type_size)(const struct glsl_type *, bool)); 5692 5693/* Some helpers to do very simple linking */ 5694bool nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer); 5695bool nir_remove_unused_io_vars(nir_shader *shader, nir_variable_mode mode, 5696 uint64_t *used_by_other_stage, 5697 uint64_t *used_by_other_stage_patches); 5698void nir_compact_varyings(nir_shader *producer, nir_shader *consumer, 5699 bool default_to_smooth_interp); 5700void nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer); 5701bool nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer); 5702void nir_link_varying_precision(nir_shader *producer, nir_shader *consumer); 5703nir_variable *nir_clone_uniform_variable(nir_shader *nir, 5704 nir_variable *uniform, bool spirv); 5705nir_deref_instr *nir_clone_deref_instr(struct nir_builder *b, 5706 nir_variable *var, 5707 nir_deref_instr *deref); 5708 5709 5710/* Return status from nir_opt_varyings. */ 5711typedef enum { 5712 /* Whether the IR changed such that NIR optimizations should be run, such 5713 * as due to removal of loads and stores. IO semantic changes such as 5714 * compaction don't count as IR changes because they don't affect NIR 5715 * optimizations. 5716 */ 5717 nir_progress_producer = BITFIELD_BIT(0), 5718 nir_progress_consumer = BITFIELD_BIT(1), 5719} nir_opt_varyings_progress; 5720 5721nir_opt_varyings_progress 5722nir_opt_varyings(nir_shader *producer, nir_shader *consumer, bool spirv, 5723 unsigned max_uniform_components, unsigned max_ubos_per_stage); 5724 5725bool nir_slot_is_sysval_output(gl_varying_slot slot, 5726 gl_shader_stage next_shader); 5727bool nir_slot_is_varying(gl_varying_slot slot, gl_shader_stage next_shader); 5728bool nir_slot_is_sysval_output_and_varying(gl_varying_slot slot, 5729 gl_shader_stage next_shader); 5730bool nir_remove_varying(nir_intrinsic_instr *intr, gl_shader_stage next_shader); 5731bool nir_remove_sysval_output(nir_intrinsic_instr *intr, gl_shader_stage next_shader); 5732 5733bool nir_lower_amul(nir_shader *shader, 5734 int (*type_size)(const struct glsl_type *, bool)); 5735 5736bool nir_lower_ubo_vec4(nir_shader *shader); 5737 5738void nir_sort_variables_by_location(nir_shader *shader, nir_variable_mode mode); 5739void nir_assign_io_var_locations(nir_shader *shader, 5740 nir_variable_mode mode, 5741 unsigned *size, 5742 gl_shader_stage stage); 5743 5744bool nir_opt_clip_cull_const(nir_shader *shader); 5745 5746typedef enum { 5747 /* If set, this causes all 64-bit IO operations to be lowered on-the-fly 5748 * to 32-bit operations. This is only valid for nir_var_shader_in/out 5749 * modes. 5750 * 5751 * Note that this destroys dual-slot information i.e. whether an input 5752 * occupies the low or high half of dvec4. Instead, it adds an offset of 1 5753 * to the load (which is ambiguous) and expects driver locations of inputs 5754 * to be final, which prevents any further optimizations. 5755 * 5756 * TODO: remove this in favor of nir_lower_io_lower_64bit_to_32_new. 5757 */ 5758 nir_lower_io_lower_64bit_to_32 = (1 << 0), 5759 5760 /* If set, this causes the subset of 64-bit IO operations involving floats to be lowered on-the-fly 5761 * to 32-bit operations. This is only valid for nir_var_shader_in/out 5762 * modes. 5763 */ 5764 nir_lower_io_lower_64bit_float_to_32 = (1 << 1), 5765 5766 /* This causes all 64-bit IO operations to be lowered to 32-bit operations. 5767 * This is only valid for nir_var_shader_in/out modes. 5768 * 5769 * Only VS inputs: Dual slot information is preserved as nir_io_semantics:: 5770 * high_dvec2 and gathered into shader_info::dual_slot_inputs, so that 5771 * the shader can be arbitrarily optimized and the low or high half of 5772 * dvec4 can be DCE'd independently without affecting the other half. 5773 */ 5774 nir_lower_io_lower_64bit_to_32_new = (1 << 2), 5775 5776 /** 5777 * Should nir_lower_io() create load_interpolated_input intrinsics? 5778 * 5779 * If not, it generates regular load_input intrinsics and interpolation 5780 * information must be inferred from the list of input nir_variables. 5781 */ 5782 nir_lower_io_use_interpolated_input_intrinsics = (1 << 3), 5783} nir_lower_io_options; 5784bool nir_lower_io(nir_shader *shader, 5785 nir_variable_mode modes, 5786 int (*type_size)(const struct glsl_type *, bool), 5787 nir_lower_io_options); 5788 5789bool nir_io_add_const_offset_to_base(nir_shader *nir, nir_variable_mode modes); 5790bool nir_lower_color_inputs(nir_shader *nir); 5791void nir_lower_io_passes(nir_shader *nir, bool renumber_vs_inputs); 5792bool nir_io_add_intrinsic_xfb_info(nir_shader *nir); 5793 5794bool 5795nir_lower_vars_to_explicit_types(nir_shader *shader, 5796 nir_variable_mode modes, 5797 glsl_type_size_align_func type_info); 5798void 5799nir_gather_explicit_io_initializers(nir_shader *shader, 5800 void *dst, size_t dst_size, 5801 nir_variable_mode mode); 5802 5803bool nir_lower_vec3_to_vec4(nir_shader *shader, nir_variable_mode modes); 5804 5805typedef enum { 5806 /** 5807 * An address format which is a simple 32-bit global GPU address. 5808 */ 5809 nir_address_format_32bit_global, 5810 5811 /** 5812 * An address format which is a simple 64-bit global GPU address. 5813 */ 5814 nir_address_format_64bit_global, 5815 5816 /** 5817 * An address format which is a 64-bit global GPU address encoded as a 5818 * 2x32-bit vector. 5819 */ 5820 nir_address_format_2x32bit_global, 5821 5822 /** 5823 * An address format which is a 64-bit global base address and a 32-bit 5824 * offset. 5825 * 5826 * This is identical to 64bit_bounded_global except that bounds checking 5827 * is not applied when lowering to global access. Even though the size is 5828 * never used for an actual bounds check, it needs to be valid so we can 5829 * lower deref_buffer_array_length properly. 5830 */ 5831 nir_address_format_64bit_global_32bit_offset, 5832 5833 /** 5834 * An address format which is a bounds-checked 64-bit global GPU address. 5835 * 5836 * The address is comprised as a 32-bit vec4 where .xy are a uint64_t base 5837 * address stored with the low bits in .x and high bits in .y, .z is a 5838 * size, and .w is an offset. When the final I/O operation is lowered, .w 5839 * is checked against .z and the operation is predicated on the result. 5840 */ 5841 nir_address_format_64bit_bounded_global, 5842 5843 /** 5844 * An address format which is comprised of a vec2 where the first 5845 * component is a buffer index and the second is an offset. 5846 */ 5847 nir_address_format_32bit_index_offset, 5848 5849 /** 5850 * An address format which is a 64-bit value, where the high 32 bits 5851 * are a buffer index, and the low 32 bits are an offset. 5852 */ 5853 nir_address_format_32bit_index_offset_pack64, 5854 5855 /** 5856 * An address format which is comprised of a vec3 where the first two 5857 * components specify the buffer and the third is an offset. 5858 */ 5859 nir_address_format_vec2_index_32bit_offset, 5860 5861 /** 5862 * An address format which represents generic pointers with a 62-bit 5863 * pointer and a 2-bit enum in the top two bits. The top two bits have 5864 * the following meanings: 5865 * 5866 * - 0x0: Global memory 5867 * - 0x1: Shared memory 5868 * - 0x2: Scratch memory 5869 * - 0x3: Global memory 5870 * 5871 * The redundancy between 0x0 and 0x3 is because of Intel sign-extension of 5872 * addresses. Valid global memory addresses may naturally have either 0 or 5873 * ~0 as their high bits. 5874 * 5875 * Shared and scratch pointers are represented as 32-bit offsets with the 5876 * top 32 bits only being used for the enum. This allows us to avoid 5877 * 64-bit address calculations in a bunch of cases. 5878 */ 5879 nir_address_format_62bit_generic, 5880 5881 /** 5882 * An address format which is a simple 32-bit offset. 5883 */ 5884 nir_address_format_32bit_offset, 5885 5886 /** 5887 * An address format which is a simple 32-bit offset cast to 64-bit. 5888 */ 5889 nir_address_format_32bit_offset_as_64bit, 5890 5891 /** 5892 * An address format representing a purely logical addressing model. In 5893 * this model, all deref chains must be complete from the dereference 5894 * operation to the variable. Cast derefs are not allowed. These 5895 * addresses will be 32-bit scalars but the format is immaterial because 5896 * you can always chase the chain. 5897 */ 5898 nir_address_format_logical, 5899} nir_address_format; 5900 5901unsigned 5902nir_address_format_bit_size(nir_address_format addr_format); 5903 5904unsigned 5905nir_address_format_num_components(nir_address_format addr_format); 5906 5907static inline const struct glsl_type * 5908nir_address_format_to_glsl_type(nir_address_format addr_format) 5909{ 5910 unsigned bit_size = nir_address_format_bit_size(addr_format); 5911 assert(bit_size == 32 || bit_size == 64); 5912 return glsl_vector_type(bit_size == 32 ? GLSL_TYPE_UINT : GLSL_TYPE_UINT64, 5913 nir_address_format_num_components(addr_format)); 5914} 5915 5916const nir_const_value *nir_address_format_null_value(nir_address_format addr_format); 5917 5918nir_def *nir_build_addr_iadd(struct nir_builder *b, nir_def *addr, 5919 nir_address_format addr_format, 5920 nir_variable_mode modes, 5921 nir_def *offset); 5922 5923nir_def *nir_build_addr_iadd_imm(struct nir_builder *b, nir_def *addr, 5924 nir_address_format addr_format, 5925 nir_variable_mode modes, 5926 int64_t offset); 5927 5928nir_def *nir_build_addr_ieq(struct nir_builder *b, nir_def *addr0, nir_def *addr1, 5929 nir_address_format addr_format); 5930 5931nir_def *nir_build_addr_isub(struct nir_builder *b, nir_def *addr0, nir_def *addr1, 5932 nir_address_format addr_format); 5933 5934nir_def *nir_explicit_io_address_from_deref(struct nir_builder *b, 5935 nir_deref_instr *deref, 5936 nir_def *base_addr, 5937 nir_address_format addr_format); 5938 5939bool nir_get_explicit_deref_align(nir_deref_instr *deref, 5940 bool default_to_type_align, 5941 uint32_t *align_mul, 5942 uint32_t *align_offset); 5943 5944void nir_lower_explicit_io_instr(struct nir_builder *b, 5945 nir_intrinsic_instr *io_instr, 5946 nir_def *addr, 5947 nir_address_format addr_format); 5948 5949bool nir_lower_explicit_io(nir_shader *shader, 5950 nir_variable_mode modes, 5951 nir_address_format); 5952 5953typedef enum { 5954 /* Use open-coded funnel shifts for each component. */ 5955 nir_mem_access_shift_method_scalar, 5956 /* Prefer to use 64-bit shifts to do the same with less instructions. Useful 5957 * if 64-bit shifts are cheap. 5958 */ 5959 nir_mem_access_shift_method_shift64, 5960 /* If nir_op_alignbyte_amd can be used, this is the best option with just a 5961 * single nir_op_alignbyte_amd for each 32-bit components. 5962 */ 5963 nir_mem_access_shift_method_bytealign_amd, 5964} nir_mem_access_shift_method; 5965 5966typedef struct { 5967 uint8_t num_components; 5968 uint8_t bit_size; 5969 uint16_t align; 5970 /* If a load's alignment is increased, this specifies how the data should be 5971 * shifted before converting to the original bit size. 5972 */ 5973 nir_mem_access_shift_method shift; 5974} nir_mem_access_size_align; 5975 5976/* clang-format off */ 5977typedef nir_mem_access_size_align 5978 (*nir_lower_mem_access_bit_sizes_cb)(nir_intrinsic_op intrin, 5979 uint8_t bytes, 5980 uint8_t bit_size, 5981 uint32_t align_mul, 5982 uint32_t align_offset, 5983 bool offset_is_const, 5984 enum gl_access_qualifier, 5985 const void *cb_data); 5986/* clang-format on */ 5987 5988typedef struct { 5989 nir_lower_mem_access_bit_sizes_cb callback; 5990 nir_variable_mode modes; 5991 bool may_lower_unaligned_stores_to_atomics; 5992 void *cb_data; 5993} nir_lower_mem_access_bit_sizes_options; 5994 5995bool nir_lower_mem_access_bit_sizes(nir_shader *shader, 5996 const nir_lower_mem_access_bit_sizes_options *options); 5997 5998bool nir_lower_robust_access(nir_shader *s, 5999 nir_intrin_filter_cb filter, const void *data); 6000 6001/* clang-format off */ 6002typedef bool (*nir_should_vectorize_mem_func)(unsigned align_mul, 6003 unsigned align_offset, 6004 unsigned bit_size, 6005 unsigned num_components, 6006 /* The hole between low and 6007 * high if they are not adjacent. */ 6008 int64_t hole_size, 6009 nir_intrinsic_instr *low, 6010 nir_intrinsic_instr *high, 6011 void *data); 6012/* clang-format on */ 6013 6014typedef struct { 6015 nir_should_vectorize_mem_func callback; 6016 nir_variable_mode modes; 6017 nir_variable_mode robust_modes; 6018 void *cb_data; 6019 bool has_shared2_amd; 6020} nir_load_store_vectorize_options; 6021 6022bool nir_opt_load_store_vectorize(nir_shader *shader, const nir_load_store_vectorize_options *options); 6023bool nir_opt_load_store_update_alignments(nir_shader *shader); 6024 6025typedef bool (*nir_lower_shader_calls_should_remat_func)(nir_instr *instr, void *data); 6026 6027typedef struct nir_lower_shader_calls_options { 6028 /* Address format used for load/store operations on the call stack. */ 6029 nir_address_format address_format; 6030 6031 /* Stack alignment */ 6032 unsigned stack_alignment; 6033 6034 /* Put loads from the stack as close as possible from where they're needed. 6035 * You might want to disable combined_loads for best effects. 6036 */ 6037 bool localized_loads; 6038 6039 /* If this function pointer is not NULL, lower_shader_calls will run 6040 * nir_opt_load_store_vectorize for stack load/store operations. Otherwise 6041 * the optimizaion is not run. 6042 */ 6043 nir_should_vectorize_mem_func vectorizer_callback; 6044 6045 /* Data passed to vectorizer_callback */ 6046 void *vectorizer_data; 6047 6048 /* If this function pointer is not NULL, lower_shader_calls will call this 6049 * function on instructions that require spill/fill/rematerialization of 6050 * their value. If this function returns true, lower_shader_calls will 6051 * ensure that the instruction is rematerialized, adding the sources of the 6052 * instruction to be spilled/filled. 6053 */ 6054 nir_lower_shader_calls_should_remat_func should_remat_callback; 6055 6056 /* Data passed to should_remat_callback */ 6057 void *should_remat_data; 6058} nir_lower_shader_calls_options; 6059 6060bool 6061nir_lower_shader_calls(nir_shader *shader, 6062 const nir_lower_shader_calls_options *options, 6063 nir_shader ***resume_shaders_out, 6064 uint32_t *num_resume_shaders_out, 6065 void *mem_ctx); 6066 6067int nir_get_io_offset_src_number(const nir_intrinsic_instr *instr); 6068int nir_get_io_arrayed_index_src_number(const nir_intrinsic_instr *instr); 6069 6070nir_src *nir_get_io_offset_src(nir_intrinsic_instr *instr); 6071nir_src *nir_get_io_arrayed_index_src(nir_intrinsic_instr *instr); 6072nir_src *nir_get_shader_call_payload_src(nir_intrinsic_instr *call); 6073 6074bool nir_is_arrayed_io(const nir_variable *var, gl_shader_stage stage); 6075 6076bool nir_lower_reg_intrinsics_to_ssa_impl(nir_function_impl *impl); 6077bool nir_lower_reg_intrinsics_to_ssa(nir_shader *shader); 6078bool nir_lower_vars_to_ssa(nir_shader *shader); 6079 6080bool nir_remove_dead_derefs(nir_shader *shader); 6081bool nir_remove_dead_derefs_impl(nir_function_impl *impl); 6082 6083typedef struct nir_remove_dead_variables_options { 6084 bool (*can_remove_var)(nir_variable *var, void *data); 6085 void *can_remove_var_data; 6086} nir_remove_dead_variables_options; 6087 6088bool nir_remove_dead_variables(nir_shader *shader, nir_variable_mode modes, 6089 const nir_remove_dead_variables_options *options); 6090 6091bool nir_lower_variable_initializers(nir_shader *shader, 6092 nir_variable_mode modes); 6093bool nir_zero_initialize_shared_memory(nir_shader *shader, 6094 const unsigned shared_size, 6095 const unsigned chunk_size); 6096bool nir_clear_shared_memory(nir_shader *shader, 6097 const unsigned shared_size, 6098 const unsigned chunk_size); 6099 6100bool nir_move_vec_src_uses_to_dest(nir_shader *shader, bool skip_const_srcs); 6101bool nir_move_output_stores_to_end(nir_shader *nir); 6102bool nir_lower_vec_to_regs(nir_shader *shader, nir_instr_writemask_filter_cb cb, 6103 const void *_data); 6104bool nir_lower_alpha_test(nir_shader *shader, enum compare_func func, 6105 bool alpha_to_one, 6106 const gl_state_index16 *alpha_ref_state_tokens); 6107bool nir_lower_alu(nir_shader *shader); 6108 6109bool nir_lower_flrp(nir_shader *shader, unsigned lowering_mask, 6110 bool always_precise); 6111 6112bool nir_scale_fdiv(nir_shader *shader); 6113 6114bool nir_lower_alu_to_scalar(nir_shader *shader, nir_instr_filter_cb cb, const void *data); 6115bool nir_lower_alu_width(nir_shader *shader, nir_vectorize_cb cb, const void *data); 6116bool nir_lower_alu_vec8_16_srcs(nir_shader *shader); 6117bool nir_lower_bool_to_bitsize(nir_shader *shader); 6118bool nir_lower_bool_to_float(nir_shader *shader, bool has_fcsel_ne); 6119bool nir_lower_bool_to_int32(nir_shader *shader); 6120bool nir_opt_simplify_convert_alu_types(nir_shader *shader); 6121bool nir_lower_const_arrays_to_uniforms(nir_shader *shader, 6122 unsigned max_uniform_components); 6123bool nir_lower_convert_alu_types(nir_shader *shader, 6124 bool (*should_lower)(nir_intrinsic_instr *)); 6125bool nir_lower_constant_convert_alu_types(nir_shader *shader); 6126bool nir_lower_alu_conversion_to_intrinsic(nir_shader *shader); 6127bool nir_lower_int_to_float(nir_shader *shader); 6128bool nir_lower_load_const_to_scalar(nir_shader *shader); 6129bool nir_lower_read_invocation_to_scalar(nir_shader *shader); 6130bool nir_lower_phis_to_scalar(nir_shader *shader, bool lower_all); 6131void nir_lower_io_arrays_to_elements(nir_shader *producer, nir_shader *consumer); 6132bool nir_lower_io_arrays_to_elements_no_indirects(nir_shader *shader, 6133 bool outputs_only); 6134bool nir_lower_io_to_scalar(nir_shader *shader, nir_variable_mode mask, nir_instr_filter_cb filter, void *filter_data); 6135bool nir_lower_io_to_scalar_early(nir_shader *shader, nir_variable_mode mask); 6136bool nir_lower_io_to_vector(nir_shader *shader, nir_variable_mode mask); 6137bool nir_vectorize_tess_levels(nir_shader *shader); 6138nir_shader *nir_create_passthrough_tcs_impl(const nir_shader_compiler_options *options, 6139 unsigned *locations, unsigned num_locations, 6140 uint8_t patch_vertices); 6141nir_shader *nir_create_passthrough_tcs(const nir_shader_compiler_options *options, 6142 const nir_shader *vs, uint8_t patch_vertices); 6143nir_shader *nir_create_passthrough_gs(const nir_shader_compiler_options *options, 6144 const nir_shader *prev_stage, 6145 enum mesa_prim primitive_type, 6146 enum mesa_prim output_primitive_type, 6147 bool emulate_edgeflags, 6148 bool force_line_strip_out, 6149 bool passthrough_prim_id); 6150 6151bool nir_lower_fragcolor(nir_shader *shader, unsigned max_cbufs); 6152bool nir_lower_fragcoord_wtrans(nir_shader *shader); 6153bool nir_opt_frag_coord_to_pixel_coord(nir_shader *shader); 6154bool nir_lower_frag_coord_to_pixel_coord(nir_shader *shader); 6155bool nir_lower_viewport_transform(nir_shader *shader); 6156bool nir_lower_uniforms_to_ubo(nir_shader *shader, bool dword_packed, bool load_vec4); 6157 6158bool nir_lower_is_helper_invocation(nir_shader *shader); 6159 6160bool nir_lower_single_sampled(nir_shader *shader); 6161 6162bool nir_lower_atomics(nir_shader *shader, nir_instr_filter_cb filter); 6163 6164typedef struct nir_lower_subgroups_options { 6165 /* In addition to the boolean lowering options below, this optional callback 6166 * will filter instructions for lowering if non-NULL. The data passed will be 6167 * filter_data. 6168 */ 6169 nir_instr_filter_cb filter; 6170 6171 /* Extra data passed to the filter. */ 6172 const void *filter_data; 6173 6174 /* In case the exact subgroup size is not known, subgroup_size should be 6175 * set to 0. In that case, the maximum subgroup size will be calculated by 6176 * ballot_components * ballot_bit_size. 6177 */ 6178 uint8_t subgroup_size; 6179 uint8_t ballot_bit_size; 6180 uint8_t ballot_components; 6181 bool lower_to_scalar : 1; 6182 bool lower_vote_trivial : 1; 6183 bool lower_vote_eq : 1; 6184 bool lower_vote_bool_eq : 1; 6185 bool lower_first_invocation_to_ballot : 1; 6186 bool lower_read_first_invocation : 1; 6187 bool lower_subgroup_masks : 1; 6188 bool lower_relative_shuffle : 1; 6189 bool lower_shuffle_to_32bit : 1; 6190 bool lower_shuffle_to_swizzle_amd : 1; 6191 bool lower_shuffle : 1; 6192 bool lower_quad : 1; 6193 bool lower_quad_broadcast_dynamic : 1; 6194 bool lower_quad_broadcast_dynamic_to_const : 1; 6195 bool lower_quad_vote : 1; 6196 bool lower_elect : 1; 6197 bool lower_read_invocation_to_cond : 1; 6198 bool lower_rotate_to_shuffle : 1; 6199 bool lower_rotate_clustered_to_shuffle : 1; 6200 bool lower_ballot_bit_count_to_mbcnt_amd : 1; 6201 bool lower_inverse_ballot : 1; 6202 bool lower_reduce : 1; 6203 bool lower_boolean_reduce : 1; 6204 bool lower_boolean_shuffle : 1; 6205} nir_lower_subgroups_options; 6206 6207bool nir_lower_subgroups(nir_shader *shader, 6208 const nir_lower_subgroups_options *options); 6209 6210bool nir_lower_system_values(nir_shader *shader); 6211 6212nir_def * 6213nir_build_lowered_load_helper_invocation(struct nir_builder *b); 6214 6215typedef struct nir_lower_compute_system_values_options { 6216 bool has_base_global_invocation_id : 1; 6217 bool has_base_workgroup_id : 1; 6218 bool has_global_size : 1; 6219 bool shuffle_local_ids_for_quad_derivatives : 1; 6220 bool lower_local_invocation_index : 1; 6221 bool lower_cs_local_id_to_index : 1; 6222 bool lower_workgroup_id_to_index : 1; 6223 bool global_id_is_32bit : 1; 6224 /* At shader execution time, check if WorkGroupId should be 1D 6225 * and compute it quickly. Fall back to slow computation if not. 6226 */ 6227 bool shortcut_1d_workgroup_id : 1; 6228 uint32_t num_workgroups[3]; /* Compile-time-known dispatch sizes, or 0 if unknown. */ 6229} nir_lower_compute_system_values_options; 6230 6231bool nir_lower_compute_system_values(nir_shader *shader, 6232 const nir_lower_compute_system_values_options *options); 6233 6234struct nir_lower_sysvals_to_varyings_options { 6235 bool frag_coord : 1; 6236 bool front_face : 1; 6237 bool point_coord : 1; 6238}; 6239 6240bool 6241nir_lower_sysvals_to_varyings(nir_shader *shader, 6242 const struct nir_lower_sysvals_to_varyings_options *options); 6243 6244/***/ 6245enum ENUM_PACKED nir_lower_tex_packing { 6246 /** No packing */ 6247 nir_lower_tex_packing_none = 0, 6248 /** 6249 * The sampler returns up to 2 32-bit words of half floats or 16-bit signed 6250 * or unsigned ints based on the sampler type 6251 */ 6252 nir_lower_tex_packing_16, 6253 /** The sampler returns 1 32-bit word of 4x8 unorm */ 6254 nir_lower_tex_packing_8, 6255}; 6256 6257/***/ 6258typedef struct nir_lower_tex_options { 6259 /** 6260 * bitmask of (1 << GLSL_SAMPLER_DIM_x) to control for which 6261 * sampler types a texture projector is lowered. 6262 */ 6263 unsigned lower_txp; 6264 6265 /** 6266 * If true, lower texture projector for any array sampler dims 6267 */ 6268 bool lower_txp_array; 6269 6270 /** 6271 * If true, lower away nir_tex_src_offset for all texelfetch instructions. 6272 */ 6273 bool lower_txf_offset; 6274 6275 /** 6276 * If true, lower away nir_tex_src_offset for all rect textures. 6277 */ 6278 bool lower_rect_offset; 6279 6280 /** 6281 * If not NULL, this filter will return true for tex instructions that 6282 * should lower away nir_tex_src_offset. 6283 */ 6284 nir_instr_filter_cb lower_offset_filter; 6285 6286 /** 6287 * If true, lower rect textures to 2D, using txs to fetch the 6288 * texture dimensions and dividing the texture coords by the 6289 * texture dims to normalize. 6290 */ 6291 bool lower_rect; 6292 6293 /** 6294 * If true, lower 1D textures to 2D. This requires the GL/VK driver to map 1D 6295 * textures to 2D textures with height=1. 6296 * 6297 * lower_1d_shadow does this lowering for shadow textures only. 6298 */ 6299 bool lower_1d; 6300 bool lower_1d_shadow; 6301 6302 /** 6303 * If true, convert yuv to rgb. 6304 */ 6305 unsigned lower_y_uv_external; 6306 unsigned lower_y_vu_external; 6307 unsigned lower_y_u_v_external; 6308 unsigned lower_yx_xuxv_external; 6309 unsigned lower_yx_xvxu_external; 6310 unsigned lower_xy_uxvx_external; 6311 unsigned lower_xy_vxux_external; 6312 unsigned lower_ayuv_external; 6313 unsigned lower_xyuv_external; 6314 unsigned lower_yuv_external; 6315 unsigned lower_yu_yv_external; 6316 unsigned lower_yv_yu_external; 6317 unsigned lower_y41x_external; 6318 unsigned bt709_external; 6319 unsigned bt2020_external; 6320 unsigned yuv_full_range_external; 6321 6322 /** 6323 * To emulate certain texture wrap modes, this can be used 6324 * to saturate the specified tex coord to [0.0, 1.0]. The 6325 * bits are according to sampler #, ie. if, for example: 6326 * 6327 * (conf->saturate_s & (1 << n)) 6328 * 6329 * is true, then the s coord for sampler n is saturated. 6330 * 6331 * Note that clamping must happen *after* projector lowering 6332 * so any projected texture sample instruction with a clamped 6333 * coordinate gets automatically lowered, regardless of the 6334 * 'lower_txp' setting. 6335 */ 6336 unsigned saturate_s; 6337 unsigned saturate_t; 6338 unsigned saturate_r; 6339 6340 /* Bitmask of textures that need swizzling. 6341 * 6342 * If (swizzle_result & (1 << texture_index)), then the swizzle in 6343 * swizzles[texture_index] is applied to the result of the texturing 6344 * operation. 6345 */ 6346 unsigned swizzle_result; 6347 6348 /* A swizzle for each texture. Values 0-3 represent x, y, z, or w swizzles 6349 * while 4 and 5 represent 0 and 1 respectively. 6350 * 6351 * Indexed by texture-id. 6352 */ 6353 uint8_t swizzles[32][4]; 6354 6355 /* Can be used to scale sampled values in range required by the 6356 * format. 6357 * 6358 * Indexed by texture-id. 6359 */ 6360 float scale_factors[32]; 6361 6362 /** 6363 * Bitmap of textures that need srgb to linear conversion. If 6364 * (lower_srgb & (1 << texture_index)) then the rgb (xyz) components 6365 * of the texture are lowered to linear. 6366 */ 6367 unsigned lower_srgb; 6368 6369 /** 6370 * If true, lower nir_texop_txd on cube maps with nir_texop_txl. 6371 */ 6372 bool lower_txd_cube_map; 6373 6374 /** 6375 * If true, lower nir_texop_txd on 3D surfaces with nir_texop_txl. 6376 */ 6377 bool lower_txd_3d; 6378 6379 /** 6380 * If true, lower nir_texop_txd any array surfaces with nir_texop_txl. 6381 */ 6382 bool lower_txd_array; 6383 6384 /** 6385 * If true, lower nir_texop_txd on shadow samplers (except cube maps) 6386 * with nir_texop_txl. Notice that cube map shadow samplers are lowered 6387 * with lower_txd_cube_map. 6388 */ 6389 bool lower_txd_shadow; 6390 6391 /** 6392 * If true, lower nir_texop_txd on all samplers to a nir_texop_txl. 6393 * Implies lower_txd_cube_map and lower_txd_shadow. 6394 */ 6395 bool lower_txd; 6396 6397 /** 6398 * If true, lower nir_texop_txd when it uses min_lod. 6399 */ 6400 bool lower_txd_clamp; 6401 6402 /** 6403 * If true, lower nir_texop_txb that try to use shadow compare and min_lod 6404 * at the same time to a nir_texop_lod, some math, and nir_texop_tex. 6405 */ 6406 bool lower_txb_shadow_clamp; 6407 6408 /** 6409 * If true, lower nir_texop_txd on shadow samplers when it uses min_lod 6410 * with nir_texop_txl. This includes cube maps. 6411 */ 6412 bool lower_txd_shadow_clamp; 6413 6414 /** 6415 * If true, lower nir_texop_txd on when it uses both offset and min_lod 6416 * with nir_texop_txl. This includes cube maps. 6417 */ 6418 bool lower_txd_offset_clamp; 6419 6420 /** 6421 * If true, lower nir_texop_txd with min_lod to a nir_texop_txl if the 6422 * sampler is bindless. 6423 */ 6424 bool lower_txd_clamp_bindless_sampler; 6425 6426 /** 6427 * If true, lower nir_texop_txd with min_lod to a nir_texop_txl if the 6428 * sampler index is not statically determinable to be less than 16. 6429 */ 6430 bool lower_txd_clamp_if_sampler_index_not_lt_16; 6431 6432 /** 6433 * If true, lower nir_texop_txs with a non-0-lod into nir_texop_txs with 6434 * 0-lod followed by a nir_ishr. 6435 */ 6436 bool lower_txs_lod; 6437 6438 /** 6439 * If true, lower nir_texop_txs for cube arrays to a nir_texop_txs with a 6440 * 2D array type followed by a nir_idiv by 6. 6441 */ 6442 bool lower_txs_cube_array; 6443 6444 /** 6445 * If true, apply a .bagr swizzle on tg4 results to handle Broadcom's 6446 * mixed-up tg4 locations. 6447 */ 6448 bool lower_tg4_broadcom_swizzle; 6449 6450 /** 6451 * If true, lowers tg4 with 4 constant offsets to 4 tg4 calls 6452 */ 6453 bool lower_tg4_offsets; 6454 6455 /** 6456 * Lower txf_ms to fragment_mask_fetch and fragment_fetch and samples_identical to 6457 * fragment_mask_fetch. 6458 */ 6459 bool lower_to_fragment_fetch_amd; 6460 6461 /** 6462 * To lower packed sampler return formats. This will be called for all 6463 * tex instructions. 6464 */ 6465 enum nir_lower_tex_packing (*lower_tex_packing_cb)(const nir_tex_instr *tex, const void *data); 6466 const void *lower_tex_packing_data; 6467 6468 /** 6469 * If true, lower nir_texop_lod to return -FLT_MAX if the sum of the 6470 * absolute values of derivatives is 0 for all coordinates. 6471 */ 6472 bool lower_lod_zero_width; 6473 6474 /* Turns nir_op_tex and other ops with an implicit derivative, in stages 6475 * without implicit derivatives (like the vertex shader) to have an explicit 6476 * LOD with a value of 0. 6477 */ 6478 bool lower_invalid_implicit_lod; 6479 6480 /* If true, texture_index (sampler_index) will be zero if a texture_offset 6481 * (sampler_offset) source is present. This is convenient for backends that 6482 * support indirect indexing of textures (samplers) but not offsetting it. 6483 */ 6484 bool lower_index_to_offset; 6485 6486 /** 6487 * Payload data to be sent to callback / filter functions. 6488 */ 6489 void *callback_data; 6490} nir_lower_tex_options; 6491 6492/** Lowers complex texture instructions to simpler ones */ 6493bool nir_lower_tex(nir_shader *shader, 6494 const nir_lower_tex_options *options); 6495 6496typedef struct nir_lower_tex_shadow_swizzle { 6497 unsigned swizzle_r : 3; 6498 unsigned swizzle_g : 3; 6499 unsigned swizzle_b : 3; 6500 unsigned swizzle_a : 3; 6501} nir_lower_tex_shadow_swizzle; 6502 6503bool 6504nir_lower_tex_shadow(nir_shader *s, 6505 unsigned n_states, 6506 enum compare_func *compare_func, 6507 nir_lower_tex_shadow_swizzle *tex_swizzles, 6508 bool is_fixed_point_format); 6509 6510typedef struct nir_lower_image_options { 6511 /** 6512 * If true, lower cube size operations. 6513 */ 6514 bool lower_cube_size; 6515 6516 /** 6517 * Lower multi sample image load and samples_identical to use fragment_mask_load. 6518 */ 6519 bool lower_to_fragment_mask_load_amd; 6520 6521 /** 6522 * Lower image_samples to a constant in case the driver doesn't support multisampled 6523 * images. 6524 */ 6525 bool lower_image_samples_to_one; 6526} nir_lower_image_options; 6527 6528bool nir_lower_image(nir_shader *nir, 6529 const nir_lower_image_options *options); 6530 6531bool 6532nir_lower_image_atomics_to_global(nir_shader *s); 6533 6534bool nir_lower_readonly_images_to_tex(nir_shader *shader, bool per_variable); 6535 6536enum nir_lower_non_uniform_access_type { 6537 nir_lower_non_uniform_ubo_access = (1 << 0), 6538 nir_lower_non_uniform_ssbo_access = (1 << 1), 6539 nir_lower_non_uniform_texture_access = (1 << 2), 6540 nir_lower_non_uniform_image_access = (1 << 3), 6541 nir_lower_non_uniform_get_ssbo_size = (1 << 4), 6542 nir_lower_non_uniform_access_type_count = 5, 6543}; 6544 6545/* Given the nir_src used for the resource, return the channels which might be non-uniform. */ 6546typedef nir_component_mask_t (*nir_lower_non_uniform_access_callback)(const nir_src *, void *); 6547 6548typedef struct nir_lower_non_uniform_access_options { 6549 enum nir_lower_non_uniform_access_type types; 6550 nir_lower_non_uniform_access_callback callback; 6551 void *callback_data; 6552} nir_lower_non_uniform_access_options; 6553 6554bool nir_has_non_uniform_access(nir_shader *shader, enum nir_lower_non_uniform_access_type types); 6555bool nir_opt_non_uniform_access(nir_shader *shader); 6556bool nir_lower_non_uniform_access(nir_shader *shader, 6557 const nir_lower_non_uniform_access_options *options); 6558 6559typedef struct { 6560 /* Whether 16-bit floating point arithmetic should be allowed in 8-bit 6561 * division lowering 6562 */ 6563 bool allow_fp16; 6564} nir_lower_idiv_options; 6565 6566bool nir_lower_idiv(nir_shader *shader, const nir_lower_idiv_options *options); 6567 6568typedef struct nir_input_attachment_options { 6569 bool use_fragcoord_sysval; 6570 bool use_layer_id_sysval; 6571 bool use_view_id_for_layer; 6572 bool unscaled_depth_stencil_ir3; 6573 uint32_t unscaled_input_attachment_ir3; 6574} nir_input_attachment_options; 6575 6576bool nir_lower_input_attachments(nir_shader *shader, 6577 const nir_input_attachment_options *options); 6578 6579bool nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables, 6580 bool use_vars, 6581 bool use_clipdist_array, 6582 const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]); 6583bool nir_lower_clip_gs(nir_shader *shader, unsigned ucp_enables, 6584 bool use_clipdist_array, 6585 const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]); 6586bool nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables, 6587 bool use_clipdist_array, bool use_load_interp); 6588 6589bool nir_lower_clip_cull_distance_to_vec4s(nir_shader *shader); 6590bool nir_lower_clip_cull_distance_arrays(nir_shader *nir); 6591bool nir_lower_clip_disable(nir_shader *shader, unsigned clip_plane_enable); 6592 6593bool nir_lower_point_size_mov(nir_shader *shader, 6594 const gl_state_index16 *pointsize_state_tokens); 6595 6596bool nir_lower_frexp(nir_shader *nir); 6597 6598bool nir_lower_two_sided_color(nir_shader *shader, bool face_sysval); 6599 6600bool nir_lower_clamp_color_outputs(nir_shader *shader); 6601 6602bool nir_lower_flatshade(nir_shader *shader); 6603 6604bool nir_lower_passthrough_edgeflags(nir_shader *shader); 6605bool nir_lower_patch_vertices(nir_shader *nir, unsigned static_count, 6606 const gl_state_index16 *uniform_state_tokens); 6607 6608typedef struct nir_lower_wpos_ytransform_options { 6609 gl_state_index16 state_tokens[STATE_LENGTH]; 6610 bool fs_coord_origin_upper_left : 1; 6611 bool fs_coord_origin_lower_left : 1; 6612 bool fs_coord_pixel_center_integer : 1; 6613 bool fs_coord_pixel_center_half_integer : 1; 6614} nir_lower_wpos_ytransform_options; 6615 6616bool nir_lower_wpos_ytransform(nir_shader *shader, 6617 const nir_lower_wpos_ytransform_options *options); 6618bool nir_lower_wpos_center(nir_shader *shader); 6619 6620bool nir_lower_pntc_ytransform(nir_shader *shader, 6621 const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]); 6622 6623bool nir_lower_wrmasks(nir_shader *shader, nir_instr_filter_cb cb, const void *data); 6624 6625bool nir_lower_fb_read(nir_shader *shader); 6626 6627typedef struct nir_lower_drawpixels_options { 6628 gl_state_index16 texcoord_state_tokens[STATE_LENGTH]; 6629 gl_state_index16 scale_state_tokens[STATE_LENGTH]; 6630 gl_state_index16 bias_state_tokens[STATE_LENGTH]; 6631 unsigned drawpix_sampler; 6632 unsigned pixelmap_sampler; 6633 bool pixel_maps : 1; 6634 bool scale_and_bias : 1; 6635} nir_lower_drawpixels_options; 6636 6637bool nir_lower_drawpixels(nir_shader *shader, 6638 const nir_lower_drawpixels_options *options); 6639 6640typedef struct nir_lower_bitmap_options { 6641 unsigned sampler; 6642 bool swizzle_xxxx; 6643} nir_lower_bitmap_options; 6644 6645bool nir_lower_bitmap(nir_shader *shader, const nir_lower_bitmap_options *options); 6646 6647bool nir_lower_atomics_to_ssbo(nir_shader *shader, unsigned offset_align_state); 6648 6649typedef enum { 6650 nir_lower_gs_intrinsics_per_stream = 1 << 0, 6651 nir_lower_gs_intrinsics_count_primitives = 1 << 1, 6652 nir_lower_gs_intrinsics_count_vertices_per_primitive = 1 << 2, 6653 nir_lower_gs_intrinsics_overwrite_incomplete = 1 << 3, 6654 nir_lower_gs_intrinsics_always_end_primitive = 1 << 4, 6655 nir_lower_gs_intrinsics_count_decomposed_primitives = 1 << 5, 6656} nir_lower_gs_intrinsics_flags; 6657 6658bool nir_lower_gs_intrinsics(nir_shader *shader, nir_lower_gs_intrinsics_flags options); 6659 6660bool nir_lower_tess_coord_z(nir_shader *shader, bool triangles); 6661 6662typedef struct { 6663 bool payload_to_shared_for_atomics : 1; 6664 bool payload_to_shared_for_small_types : 1; 6665 uint32_t payload_offset_in_bytes; 6666} nir_lower_task_shader_options; 6667 6668bool nir_lower_task_shader(nir_shader *shader, nir_lower_task_shader_options options); 6669 6670typedef unsigned (*nir_lower_bit_size_callback)(const nir_instr *, void *); 6671 6672bool nir_lower_bit_size(nir_shader *shader, 6673 nir_lower_bit_size_callback callback, 6674 void *callback_data); 6675bool nir_lower_64bit_phis(nir_shader *shader); 6676 6677bool nir_split_64bit_vec3_and_vec4(nir_shader *shader); 6678 6679nir_lower_int64_options nir_lower_int64_op_to_options_mask(nir_op opcode); 6680bool nir_lower_int64(nir_shader *shader); 6681bool nir_lower_int64_float_conversions(nir_shader *shader); 6682 6683nir_lower_doubles_options nir_lower_doubles_op_to_options_mask(nir_op opcode); 6684bool nir_lower_doubles(nir_shader *shader, const nir_shader *softfp64, 6685 nir_lower_doubles_options options); 6686bool nir_lower_pack(nir_shader *shader); 6687 6688bool nir_recompute_io_bases(nir_shader *nir, nir_variable_mode modes); 6689bool nir_lower_mediump_vars(nir_shader *nir, nir_variable_mode modes); 6690bool nir_lower_mediump_io(nir_shader *nir, nir_variable_mode modes, 6691 uint64_t varying_mask, bool use_16bit_slots); 6692bool nir_force_mediump_io(nir_shader *nir, nir_variable_mode modes, 6693 nir_alu_type types); 6694bool nir_unpack_16bit_varying_slots(nir_shader *nir, nir_variable_mode modes); 6695 6696struct nir_opt_tex_srcs_options { 6697 unsigned sampler_dims; 6698 unsigned src_types; 6699}; 6700 6701struct nir_opt_16bit_tex_image_options { 6702 nir_rounding_mode rounding_mode; 6703 nir_alu_type opt_tex_dest_types; 6704 nir_alu_type opt_image_dest_types; 6705 bool integer_dest_saturates; 6706 bool opt_image_store_data; 6707 bool opt_image_srcs; 6708 unsigned opt_srcs_options_count; 6709 struct nir_opt_tex_srcs_options *opt_srcs_options; 6710}; 6711 6712bool nir_opt_16bit_tex_image(nir_shader *nir, 6713 struct nir_opt_16bit_tex_image_options *options); 6714 6715typedef struct { 6716 bool legalize_type; /* whether this src should be legalized */ 6717 uint8_t bit_size; /* bit_size to enforce */ 6718 nir_tex_src_type match_src; /* if bit_size is 0, match bit size of this */ 6719} nir_tex_src_type_constraint, nir_tex_src_type_constraints[nir_num_tex_src_types]; 6720 6721bool nir_legalize_16bit_sampler_srcs(nir_shader *nir, 6722 nir_tex_src_type_constraints constraints); 6723 6724bool nir_lower_point_size(nir_shader *shader, float min, float max); 6725 6726void nir_lower_texcoord_replace(nir_shader *s, unsigned coord_replace, 6727 bool point_coord_is_sysval, bool yinvert); 6728 6729bool nir_lower_texcoord_replace_late(nir_shader *s, unsigned coord_replace, 6730 bool point_coord_is_sysval); 6731 6732typedef enum { 6733 nir_lower_interpolation_at_sample = (1 << 1), 6734 nir_lower_interpolation_at_offset = (1 << 2), 6735 nir_lower_interpolation_centroid = (1 << 3), 6736 nir_lower_interpolation_pixel = (1 << 4), 6737 nir_lower_interpolation_sample = (1 << 5), 6738} nir_lower_interpolation_options; 6739 6740bool nir_lower_interpolation(nir_shader *shader, 6741 nir_lower_interpolation_options options); 6742 6743typedef enum { 6744 nir_lower_discard_if_to_cf = (1 << 0), 6745 nir_lower_demote_if_to_cf = (1 << 1), 6746 nir_lower_terminate_if_to_cf = (1 << 2), 6747} nir_lower_discard_if_options; 6748 6749bool nir_lower_discard_if(nir_shader *shader, nir_lower_discard_if_options options); 6750 6751bool nir_lower_terminate_to_demote(nir_shader *nir); 6752 6753bool nir_lower_memory_model(nir_shader *shader); 6754 6755bool nir_lower_goto_ifs(nir_shader *shader); 6756bool nir_lower_continue_constructs(nir_shader *shader); 6757 6758typedef struct nir_lower_multiview_options { 6759 uint32_t view_mask; 6760 6761 /** 6762 * Bitfield of output locations that may be converted to a per-view array. 6763 * 6764 * If a variable exists in an allowed location, it will be converted to an 6765 * array even if its value does not depend on the view index. 6766 */ 6767 uint64_t allowed_per_view_outputs; 6768} nir_lower_multiview_options; 6769 6770bool nir_shader_uses_view_index(nir_shader *shader); 6771bool nir_can_lower_multiview(nir_shader *shader, nir_lower_multiview_options options); 6772bool nir_lower_multiview(nir_shader *shader, nir_lower_multiview_options options); 6773 6774bool nir_lower_view_index_to_device_index(nir_shader *shader); 6775 6776typedef enum { 6777 nir_lower_fp16_rtz = (1 << 0), 6778 nir_lower_fp16_rtne = (1 << 1), 6779 nir_lower_fp16_ru = (1 << 2), 6780 nir_lower_fp16_rd = (1 << 3), 6781 nir_lower_fp16_all = 0xf, 6782 nir_lower_fp16_split_fp64 = (1 << 4), 6783} nir_lower_fp16_cast_options; 6784bool nir_lower_fp16_casts(nir_shader *shader, nir_lower_fp16_cast_options options); 6785bool nir_normalize_cubemap_coords(nir_shader *shader); 6786 6787bool nir_shader_supports_implicit_lod(nir_shader *shader); 6788 6789void nir_live_defs_impl(nir_function_impl *impl); 6790 6791const BITSET_WORD *nir_get_live_defs(nir_cursor cursor, void *mem_ctx); 6792 6793void nir_loop_analyze_impl(nir_function_impl *impl, 6794 nir_variable_mode indirect_mask, 6795 bool force_unroll_sampler_indirect); 6796 6797/* This requires both nir_metadata_live_defs and nir_metadata_instr_index. */ 6798bool nir_defs_interfere(nir_def *a, nir_def *b); 6799 6800bool nir_repair_ssa_impl(nir_function_impl *impl); 6801bool nir_repair_ssa(nir_shader *shader); 6802 6803void nir_convert_loop_to_lcssa(nir_loop *loop); 6804bool nir_convert_to_lcssa(nir_shader *shader, bool skip_invariants, bool skip_bool_invariants); 6805void nir_divergence_analysis_impl(nir_function_impl *impl, nir_divergence_options options); 6806void nir_divergence_analysis(nir_shader *shader); 6807void nir_vertex_divergence_analysis(nir_shader *shader); 6808bool nir_has_divergent_loop(nir_shader *shader); 6809 6810void 6811nir_rewrite_uses_to_load_reg(struct nir_builder *b, nir_def *old, 6812 nir_def *reg); 6813 6814/* If phi_webs_only is true, only convert SSA values involved in phi nodes to 6815 * registers. If false, convert all values (even those not involved in a phi 6816 * node) to registers. 6817 * If consider_divergence is true, this pass will use divergence information 6818 * in order to not coalesce copies from uniform to divergent registers. 6819 */ 6820bool nir_convert_from_ssa(nir_shader *shader, 6821 bool phi_webs_only, bool consider_divergence); 6822 6823bool nir_lower_phis_to_regs_block(nir_block *block); 6824bool nir_lower_ssa_defs_to_regs_block(nir_block *block); 6825 6826bool nir_rematerialize_deref_in_use_blocks(nir_deref_instr *instr); 6827bool nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl); 6828 6829bool nir_lower_samplers(nir_shader *shader); 6830bool nir_lower_cl_images(nir_shader *shader, bool lower_image_derefs, bool lower_sampler_derefs); 6831bool nir_dedup_inline_samplers(nir_shader *shader); 6832 6833typedef struct nir_lower_ssbo_options { 6834 bool native_loads; 6835 bool native_offset; 6836} nir_lower_ssbo_options; 6837 6838bool nir_lower_ssbo(nir_shader *shader, const nir_lower_ssbo_options *opts); 6839 6840bool nir_lower_helper_writes(nir_shader *shader, bool lower_plain_stores); 6841 6842typedef struct nir_lower_printf_options { 6843 unsigned max_buffer_size; 6844 unsigned ptr_bit_size; 6845 bool use_printf_base_identifier; 6846 bool hash_format_strings; 6847} nir_lower_printf_options; 6848 6849bool nir_lower_printf(nir_shader *nir, const nir_lower_printf_options *options); 6850bool nir_lower_printf_buffer(nir_shader *nir, uint64_t address, uint32_t size); 6851 6852/* This is here for unit tests. */ 6853bool nir_opt_comparison_pre_impl(nir_function_impl *impl); 6854 6855bool nir_opt_comparison_pre(nir_shader *shader); 6856 6857typedef struct nir_opt_access_options { 6858 bool is_vulkan; 6859} nir_opt_access_options; 6860 6861bool nir_opt_access(nir_shader *shader, const nir_opt_access_options *options); 6862bool nir_opt_algebraic(nir_shader *shader); 6863bool nir_opt_algebraic_before_ffma(nir_shader *shader); 6864bool nir_opt_algebraic_before_lower_int64(nir_shader *shader); 6865bool nir_opt_algebraic_late(nir_shader *shader); 6866bool nir_opt_algebraic_distribute_src_mods(nir_shader *shader); 6867bool nir_opt_constant_folding(nir_shader *shader); 6868 6869/* Try to combine a and b into a. Return true if combination was possible, 6870 * which will result in b being removed by the pass. Return false if 6871 * combination wasn't possible. 6872 */ 6873typedef bool (*nir_combine_barrier_cb)( 6874 nir_intrinsic_instr *a, nir_intrinsic_instr *b, void *data); 6875 6876bool nir_opt_combine_barriers(nir_shader *shader, 6877 nir_combine_barrier_cb combine_cb, 6878 void *data); 6879bool nir_opt_barrier_modes(nir_shader *shader); 6880 6881bool nir_minimize_call_live_states(nir_shader *shader); 6882 6883bool nir_opt_combine_stores(nir_shader *shader, nir_variable_mode modes); 6884 6885bool nir_copy_prop_impl(nir_function_impl *impl); 6886bool nir_copy_prop(nir_shader *shader); 6887 6888bool nir_opt_copy_prop_vars(nir_shader *shader); 6889 6890bool nir_opt_cse(nir_shader *shader); 6891 6892bool nir_opt_dce(nir_shader *shader); 6893 6894bool nir_opt_dead_cf(nir_shader *shader); 6895 6896bool nir_opt_dead_write_vars(nir_shader *shader); 6897 6898bool nir_opt_deref_impl(nir_function_impl *impl); 6899bool nir_opt_deref(nir_shader *shader); 6900 6901bool nir_opt_find_array_copies(nir_shader *shader); 6902 6903bool nir_def_is_frag_coord_z(nir_def *def); 6904bool nir_opt_fragdepth(nir_shader *shader); 6905 6906bool nir_opt_gcm(nir_shader *shader, bool value_number); 6907 6908bool nir_opt_generate_bfi(nir_shader *shader); 6909 6910bool nir_opt_idiv_const(nir_shader *shader, unsigned min_bit_size); 6911 6912bool nir_opt_mqsad(nir_shader *shader); 6913 6914typedef enum { 6915 nir_opt_if_optimize_phi_true_false = (1 << 0), 6916 nir_opt_if_avoid_64bit_phis = (1 << 1), 6917} nir_opt_if_options; 6918 6919bool nir_opt_if(nir_shader *shader, nir_opt_if_options options); 6920 6921bool nir_opt_intrinsics(nir_shader *shader); 6922 6923bool nir_opt_large_constants(nir_shader *shader, 6924 glsl_type_size_align_func size_align, 6925 unsigned threshold); 6926 6927bool nir_opt_licm(nir_shader *shader); 6928bool nir_opt_loop(nir_shader *shader); 6929 6930bool nir_opt_loop_unroll(nir_shader *shader); 6931 6932typedef enum { 6933 nir_move_const_undef = (1 << 0), 6934 nir_move_load_ubo = (1 << 1), 6935 nir_move_load_input = (1 << 2), 6936 nir_move_comparisons = (1 << 3), 6937 nir_move_copies = (1 << 4), 6938 nir_move_load_ssbo = (1 << 5), 6939 nir_move_load_uniform = (1 << 6), 6940 nir_move_alu = (1 << 7), 6941} nir_move_options; 6942 6943bool nir_can_move_instr(nir_instr *instr, nir_move_options options); 6944 6945bool nir_opt_sink(nir_shader *shader, nir_move_options options); 6946 6947bool nir_opt_move(nir_shader *shader, nir_move_options options); 6948 6949typedef struct { 6950 /** nir_load_uniform max base offset */ 6951 uint32_t uniform_max; 6952 6953 /** nir_load_ubo_vec4 max base offset */ 6954 uint32_t ubo_vec4_max; 6955 6956 /** nir_var_mem_shared max base offset */ 6957 uint32_t shared_max; 6958 6959 /** nir_var_mem_shared atomic max base offset */ 6960 uint32_t shared_atomic_max; 6961 6962 /** nir_load/store_buffer_amd max base offset */ 6963 uint32_t buffer_max; 6964 6965 /** 6966 * Callback to get the max base offset for instructions for which the 6967 * corresponding value above is zero. 6968 */ 6969 uint32_t (*max_offset_cb)(nir_intrinsic_instr *intr, const void *data); 6970 6971 /** Data to pass to max_offset_cb. */ 6972 const void *max_offset_data; 6973 6974 /** 6975 * Allow the offset calculation to wrap. If false, constant additions that 6976 * might wrap will not be folded into the offset. 6977 */ 6978 bool allow_offset_wrap; 6979} nir_opt_offsets_options; 6980 6981bool nir_opt_offsets(nir_shader *shader, const nir_opt_offsets_options *options); 6982 6983bool nir_opt_peephole_select(nir_shader *shader, unsigned limit, 6984 bool indirect_load_ok, bool expensive_alu_ok); 6985 6986bool nir_opt_reassociate_bfi(nir_shader *shader); 6987 6988bool nir_opt_rematerialize_compares(nir_shader *shader); 6989 6990bool nir_opt_remove_phis(nir_shader *shader); 6991bool nir_remove_single_src_phis_block(nir_block *block); 6992 6993bool nir_opt_phi_precision(nir_shader *shader); 6994 6995bool nir_opt_shrink_stores(nir_shader *shader, bool shrink_image_store); 6996 6997bool nir_opt_shrink_vectors(nir_shader *shader, bool shrink_start); 6998 6999bool nir_opt_undef(nir_shader *shader); 7000 7001bool nir_lower_undef_to_zero(nir_shader *shader); 7002 7003bool nir_opt_uniform_atomics(nir_shader *shader, bool fs_atomics_predicated); 7004 7005bool nir_opt_uniform_subgroup(nir_shader *shader, 7006 const nir_lower_subgroups_options *); 7007 7008bool nir_opt_vectorize(nir_shader *shader, nir_vectorize_cb filter, 7009 void *data); 7010bool nir_opt_vectorize_io(nir_shader *shader, nir_variable_mode modes); 7011 7012bool nir_opt_conditional_discard(nir_shader *shader); 7013bool nir_opt_move_discards_to_top(nir_shader *shader); 7014 7015bool nir_opt_ray_queries(nir_shader *shader); 7016 7017bool nir_opt_ray_query_ranges(nir_shader *shader); 7018 7019void nir_sweep(nir_shader *shader); 7020 7021nir_intrinsic_op nir_intrinsic_from_system_value(gl_system_value val); 7022gl_system_value nir_system_value_from_intrinsic(nir_intrinsic_op intrin); 7023 7024static inline bool 7025nir_variable_is_in_ubo(const nir_variable *var) 7026{ 7027 return (var->data.mode == nir_var_mem_ubo && 7028 var->interface_type != NULL); 7029} 7030 7031static inline bool 7032nir_variable_is_in_ssbo(const nir_variable *var) 7033{ 7034 return (var->data.mode == nir_var_mem_ssbo && 7035 var->interface_type != NULL); 7036} 7037 7038static inline bool 7039nir_variable_is_in_block(const nir_variable *var) 7040{ 7041 return nir_variable_is_in_ubo(var) || nir_variable_is_in_ssbo(var); 7042} 7043 7044static inline unsigned 7045nir_variable_count_slots(const nir_variable *var, const struct glsl_type *type) 7046{ 7047 return var->data.compact ? DIV_ROUND_UP(var->data.location_frac + glsl_get_length(type), 4) : glsl_count_attribute_slots(type, false); 7048} 7049 7050static inline unsigned 7051nir_deref_count_slots(nir_deref_instr *deref, nir_variable *var) 7052{ 7053 if (var->data.compact) { 7054 switch (deref->deref_type) { 7055 case nir_deref_type_array: 7056 return 1; 7057 case nir_deref_type_var: 7058 return nir_variable_count_slots(var, deref->type); 7059 default: 7060 unreachable("illegal deref type"); 7061 } 7062 } 7063 return glsl_count_attribute_slots(deref->type, false); 7064} 7065 7066/* See default_ub_config in nir_range_analysis.c for documentation. */ 7067typedef struct nir_unsigned_upper_bound_config { 7068 unsigned min_subgroup_size; 7069 unsigned max_subgroup_size; 7070 unsigned max_workgroup_invocations; 7071 unsigned max_workgroup_count[3]; 7072 unsigned max_workgroup_size[3]; 7073 7074 uint32_t vertex_attrib_max[32]; 7075} nir_unsigned_upper_bound_config; 7076 7077uint32_t 7078nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht, 7079 nir_scalar scalar, 7080 const nir_unsigned_upper_bound_config *config); 7081 7082bool 7083nir_addition_might_overflow(nir_shader *shader, struct hash_table *range_ht, 7084 nir_scalar ssa, unsigned const_val, 7085 const nir_unsigned_upper_bound_config *config); 7086 7087typedef struct { 7088 /* True if gl_DrawID is considered uniform, i.e. if the preamble is run 7089 * at least once per "internal" draw rather than per user-visible draw. 7090 */ 7091 bool drawid_uniform; 7092 7093 /* True if the subgroup size is uniform. */ 7094 bool subgroup_size_uniform; 7095 7096 /* True if load_workgroup_size is supported in the preamble. */ 7097 bool load_workgroup_size_allowed; 7098 7099 /* size/align for load/store_preamble. */ 7100 void (*def_size)(nir_def *def, unsigned *size, unsigned *align); 7101 7102 /* Total available size for load/store_preamble storage, in units 7103 * determined by def_size. 7104 */ 7105 unsigned preamble_storage_size; 7106 7107 /* Give the cost for an instruction. nir_opt_preamble will prioritize 7108 * instructions with higher costs. Instructions with cost 0 may still be 7109 * lifted, but only when required to lift other instructions with non-0 7110 * cost (e.g. a load_const source of an expression). 7111 */ 7112 float (*instr_cost_cb)(nir_instr *instr, const void *data); 7113 7114 /* Give the cost of rewriting the instruction to use load_preamble. This 7115 * may happen from inserting move instructions, etc. If the benefit doesn't 7116 * exceed the cost here then we won't rewrite it. 7117 */ 7118 float (*rewrite_cost_cb)(nir_def *def, const void *data); 7119 7120 /* Instructions whose definitions should not be rewritten. These could 7121 * still be moved to the preamble, but they shouldn't be the root of a 7122 * replacement expression. Instructions with cost 0 and derefs are 7123 * automatically included by the pass. 7124 */ 7125 nir_instr_filter_cb avoid_instr_cb; 7126 7127 const void *cb_data; 7128} nir_opt_preamble_options; 7129 7130bool 7131nir_opt_preamble(nir_shader *shader, 7132 const nir_opt_preamble_options *options, 7133 unsigned *size); 7134 7135nir_function_impl *nir_shader_get_preamble(nir_shader *shader); 7136 7137bool nir_lower_point_smooth(nir_shader *shader, bool set_barycentrics); 7138bool nir_lower_poly_line_smooth(nir_shader *shader, unsigned num_smooth_aa_sample); 7139 7140bool nir_mod_analysis(nir_scalar val, nir_alu_type val_type, unsigned div, unsigned *mod); 7141 7142bool 7143nir_remove_tex_shadow(nir_shader *shader, unsigned textures_bitmask); 7144 7145void 7146nir_trivialize_registers(nir_shader *s); 7147 7148unsigned 7149nir_static_workgroup_size(const nir_shader *s); 7150 7151static inline nir_intrinsic_instr * 7152nir_reg_get_decl(nir_def *reg) 7153{ 7154 assert(reg->parent_instr->type == nir_instr_type_intrinsic); 7155 nir_intrinsic_instr *decl = nir_instr_as_intrinsic(reg->parent_instr); 7156 assert(decl->intrinsic == nir_intrinsic_decl_reg); 7157 7158 return decl; 7159} 7160 7161static inline nir_intrinsic_instr * 7162nir_next_decl_reg(nir_intrinsic_instr *prev, nir_function_impl *impl) 7163{ 7164 nir_instr *start; 7165 if (prev != NULL) 7166 start = nir_instr_next(&prev->instr); 7167 else if (impl != NULL) 7168 start = nir_block_first_instr(nir_start_block(impl)); 7169 else 7170 return NULL; 7171 7172 for (nir_instr *instr = start; instr; instr = nir_instr_next(instr)) { 7173 if (instr->type != nir_instr_type_intrinsic) 7174 continue; 7175 7176 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 7177 if (intrin->intrinsic == nir_intrinsic_decl_reg) 7178 return intrin; 7179 } 7180 7181 return NULL; 7182} 7183 7184#define nir_foreach_reg_decl(reg, impl) \ 7185 for (nir_intrinsic_instr *reg = nir_next_decl_reg(NULL, impl); \ 7186 reg; reg = nir_next_decl_reg(reg, NULL)) 7187 7188#define nir_foreach_reg_decl_safe(reg, impl) \ 7189 for (nir_intrinsic_instr *reg = nir_next_decl_reg(NULL, impl), \ 7190 *next_ = nir_next_decl_reg(reg, NULL); \ 7191 reg; reg = next_, next_ = nir_next_decl_reg(next_, NULL)) 7192 7193static inline nir_cursor 7194nir_after_reg_decls(nir_function_impl *impl) 7195{ 7196 nir_intrinsic_instr *last_reg_decl = NULL; 7197 nir_foreach_reg_decl(reg_decl, impl) 7198 last_reg_decl = reg_decl; 7199 7200 if (last_reg_decl != NULL) 7201 return nir_after_instr(&last_reg_decl->instr); 7202 return nir_before_impl(impl); 7203} 7204 7205static inline bool 7206nir_is_load_reg(nir_intrinsic_instr *intr) 7207{ 7208 return intr->intrinsic == nir_intrinsic_load_reg || 7209 intr->intrinsic == nir_intrinsic_load_reg_indirect; 7210} 7211 7212static inline bool 7213nir_is_store_reg(nir_intrinsic_instr *intr) 7214{ 7215 return intr->intrinsic == nir_intrinsic_store_reg || 7216 intr->intrinsic == nir_intrinsic_store_reg_indirect; 7217} 7218 7219#define nir_foreach_reg_load(load, reg) \ 7220 assert(reg->intrinsic == nir_intrinsic_decl_reg); \ 7221 \ 7222 nir_foreach_use(load, &reg->def) \ 7223 if (nir_is_load_reg(nir_instr_as_intrinsic(nir_src_parent_instr(load)))) 7224 7225#define nir_foreach_reg_load_safe(load, reg) \ 7226 assert(reg->intrinsic == nir_intrinsic_decl_reg); \ 7227 \ 7228 nir_foreach_use_safe(load, &reg->def) \ 7229 if (nir_is_load_reg(nir_instr_as_intrinsic(nir_src_parent_instr(load)))) 7230 7231#define nir_foreach_reg_store(store, reg) \ 7232 assert(reg->intrinsic == nir_intrinsic_decl_reg); \ 7233 \ 7234 nir_foreach_use(store, &reg->def) \ 7235 if (nir_is_store_reg(nir_instr_as_intrinsic(nir_src_parent_instr(store)))) 7236 7237#define nir_foreach_reg_store_safe(store, reg) \ 7238 assert(reg->intrinsic == nir_intrinsic_decl_reg); \ 7239 \ 7240 nir_foreach_use_safe(store, &reg->def) \ 7241 if (nir_is_store_reg(nir_instr_as_intrinsic(nir_src_parent_instr(store)))) 7242 7243static inline nir_intrinsic_instr * 7244nir_load_reg_for_def(const nir_def *def) 7245{ 7246 if (def->parent_instr->type != nir_instr_type_intrinsic) 7247 return NULL; 7248 7249 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(def->parent_instr); 7250 if (!nir_is_load_reg(intr)) 7251 return NULL; 7252 7253 return intr; 7254} 7255 7256static inline nir_intrinsic_instr * 7257nir_store_reg_for_def(const nir_def *def) 7258{ 7259 /* Look for the trivial store: single use of our destination by a 7260 * store_register intrinsic. 7261 */ 7262 if (!list_is_singular(&def->uses)) 7263 return NULL; 7264 7265 nir_src *src = list_first_entry(&def->uses, nir_src, use_link); 7266 if (nir_src_is_if(src)) 7267 return NULL; 7268 7269 nir_instr *parent = nir_src_parent_instr(src); 7270 if (parent->type != nir_instr_type_intrinsic) 7271 return NULL; 7272 7273 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent); 7274 if (!nir_is_store_reg(intr)) 7275 return NULL; 7276 7277 /* The first value is data. Third is indirect index, ignore that one. */ 7278 if (&intr->src[0] != src) 7279 return NULL; 7280 7281 return intr; 7282} 7283 7284struct nir_use_dominance_state; 7285 7286struct nir_use_dominance_state * 7287nir_calc_use_dominance_impl(nir_function_impl *impl, bool post_dominance); 7288 7289nir_instr * 7290nir_get_immediate_use_dominator(struct nir_use_dominance_state *state, 7291 nir_instr *instr); 7292nir_instr *nir_use_dominance_lca(struct nir_use_dominance_state *state, 7293 nir_instr *i1, nir_instr *i2); 7294bool nir_instr_dominates_use(struct nir_use_dominance_state *state, 7295 nir_instr *parent, nir_instr *child); 7296void nir_print_use_dominators(struct nir_use_dominance_state *state, 7297 nir_instr **instructions, 7298 unsigned num_instructions); 7299 7300#include "nir_inline_helpers.h" 7301 7302#ifdef __cplusplus 7303} /* extern "C" */ 7304#endif 7305 7306#endif /* NIR_H */