mirror of OpenBSD xenocara tree github.com/openbsd/xenocara
openbsd
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

at jcs 1819 lines 62 kB view raw
1/* 2 * Copyright © 2023 Valve Corporation 3 * Copyright © 2015 Broadcom 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 */ 24 25/* 26 * This lowering pass supports (as configured via nir_lower_tex_options) 27 * various texture related conversions: 28 * + texture projector lowering: converts the coordinate division for 29 * texture projection to be done in ALU instructions instead of 30 * asking the texture operation to do so. 31 * + lowering RECT: converts the un-normalized RECT texture coordinates 32 * to normalized coordinates with txs plus ALU instructions 33 * + saturate s/t/r coords: to emulate certain texture clamp/wrap modes, 34 * inserts instructions to clamp specified coordinates to [0.0, 1.0]. 35 * Note that this automatically triggers texture projector lowering if 36 * needed, since clamping must happen after projector lowering. 37 * + YUV-to-RGB conversion: to allow sampling YUV values as RGB values 38 * according to a specific YUV color space and range. 39 */ 40 41#include "nir.h" 42#include "nir_builder.h" 43#include "nir_builtin_builder.h" 44#include "nir_format_convert.h" 45 46typedef struct nir_const_value_3_4 { 47 nir_const_value v[3][4]; 48} nir_const_value_3_4; 49 50static const nir_const_value_3_4 bt601_limited_range_csc_coeffs = { { 51 { { .f32 = 1.16438356f }, { .f32 = 1.16438356f }, { .f32 = 1.16438356f } }, 52 { { .f32 = 0.0f }, { .f32 = -0.39176229f }, { .f32 = 2.01723214f } }, 53 { { .f32 = 1.59602678f }, { .f32 = -0.81296764f }, { .f32 = 0.0f } }, 54} }; 55static const nir_const_value_3_4 bt601_full_range_csc_coeffs = { { 56 { { .f32 = 1.0f }, { .f32 = 1.0f }, { .f32 = 1.0f } }, 57 { { .f32 = 0.0f }, { .f32 = -0.34413629f }, { .f32 = 1.772f } }, 58 { { .f32 = 1.402f }, { .f32 = -0.71413629f }, { .f32 = 0.0f } }, 59} }; 60static const nir_const_value_3_4 bt709_limited_range_csc_coeffs = { { 61 { { .f32 = 1.16438356f }, { .f32 = 1.16438356f }, { .f32 = 1.16438356f } }, 62 { { .f32 = 0.0f }, { .f32 = -0.21324861f }, { .f32 = 2.11240179f } }, 63 { { .f32 = 1.79274107f }, { .f32 = -0.53290933f }, { .f32 = 0.0f } }, 64} }; 65static const nir_const_value_3_4 bt709_full_range_csc_coeffs = { { 66 { { .f32 = 1.0f }, { .f32 = 1.0f }, { .f32 = 1.0f } }, 67 { { .f32 = 0.0f }, { .f32 = -0.18732427f }, { .f32 = 1.8556f } }, 68 { { .f32 = 1.5748f }, { .f32 = -0.46812427f }, { .f32 = 0.0f } }, 69} }; 70static const nir_const_value_3_4 bt2020_limited_range_csc_coeffs = { { 71 { { .f32 = 1.16438356f }, { .f32 = 1.16438356f }, { .f32 = 1.16438356f } }, 72 { { .f32 = 0.0f }, { .f32 = -0.18732610f }, { .f32 = 2.14177232f } }, 73 { { .f32 = 1.67878795f }, { .f32 = -0.65046843f }, { .f32 = 0.0f } }, 74} }; 75static const nir_const_value_3_4 bt2020_full_range_csc_coeffs = { { 76 { { .f32 = 1.0f }, { .f32 = 1.0f }, { .f32 = 1.0f } }, 77 { { .f32 = 0.0f }, { .f32 = -0.16455313f }, { .f32 = 1.88140000f } }, 78 { { .f32 = 1.4747f }, { .f32 = -0.57139187f }, { .f32 = 0.0f } }, 79} }; 80 81static const float bt601_limited_range_csc_offsets[3] = { 82 -0.874202218f, 0.531667823f, -1.085630789f 83}; 84static const float bt601_full_range_csc_offsets[3] = { 85 -0.701000000f, 0.529136286f, -0.886000000f 86}; 87static const float bt709_limited_range_csc_offsets[3] = { 88 -0.972945075f, 0.301482665f, -1.133402218f 89}; 90static const float bt709_full_range_csc_offsets[3] = { 91 -0.787400000f, 0.327724273f, -0.927800000f 92}; 93static const float bt2020_limited_range_csc_offsets[3] = { 94 -0.915745075f, 0.347480639f, -1.148145075f 95}; 96static const float bt2020_full_range_csc_offsets[3] = { 97 -0.737350000f, 0.367972500f, -0.940700000f 98}; 99 100static bool 101project_src(nir_builder *b, nir_tex_instr *tex) 102{ 103 nir_def *proj = nir_steal_tex_src(tex, nir_tex_src_projector); 104 if (!proj) 105 return false; 106 107 b->cursor = nir_before_instr(&tex->instr); 108 nir_def *inv_proj = nir_frcp(b, proj); 109 110 /* Walk through the sources projecting the arguments. */ 111 for (unsigned i = 0; i < tex->num_srcs; i++) { 112 switch (tex->src[i].src_type) { 113 case nir_tex_src_coord: 114 case nir_tex_src_comparator: 115 break; 116 default: 117 continue; 118 } 119 nir_def *unprojected = 120 tex->src[i].src.ssa; 121 nir_def *projected = nir_fmul(b, unprojected, inv_proj); 122 123 /* Array indices don't get projected, so make an new vector with the 124 * coordinate's array index untouched. 125 */ 126 if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) { 127 switch (tex->coord_components) { 128 case 4: 129 projected = nir_vec4(b, 130 nir_channel(b, projected, 0), 131 nir_channel(b, projected, 1), 132 nir_channel(b, projected, 2), 133 nir_channel(b, unprojected, 3)); 134 break; 135 case 3: 136 projected = nir_vec3(b, 137 nir_channel(b, projected, 0), 138 nir_channel(b, projected, 1), 139 nir_channel(b, unprojected, 2)); 140 break; 141 case 2: 142 projected = nir_vec2(b, 143 nir_channel(b, projected, 0), 144 nir_channel(b, unprojected, 1)); 145 break; 146 default: 147 unreachable("bad texture coord count for array"); 148 break; 149 } 150 } 151 152 nir_src_rewrite(&tex->src[i].src, projected); 153 } 154 155 return true; 156} 157 158static bool 159lower_offset(nir_builder *b, nir_tex_instr *tex) 160{ 161 nir_def *offset = nir_steal_tex_src(tex, nir_tex_src_offset); 162 if (!offset) 163 return false; 164 165 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord); 166 assert(coord_index >= 0); 167 168 nir_def *coord = tex->src[coord_index].src.ssa; 169 170 b->cursor = nir_before_instr(&tex->instr); 171 172 nir_def *offset_coord; 173 if (nir_tex_instr_src_type(tex, coord_index) == nir_type_float) { 174 if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) { 175 offset_coord = nir_fadd(b, coord, nir_i2f32(b, offset)); 176 } else { 177 nir_def *scale = NULL; 178 179 if (b->shader->options->has_texture_scaling) { 180 nir_def *idx = nir_imm_int(b, tex->texture_index); 181 scale = nir_load_texture_scale(b, 32, idx); 182 } else { 183 nir_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex)); 184 scale = nir_frcp(b, txs); 185 } 186 187 offset_coord = nir_fadd(b, coord, 188 nir_fmul(b, 189 nir_i2f32(b, offset), 190 scale)); 191 } 192 } else { 193 offset_coord = nir_iadd(b, coord, offset); 194 } 195 196 if (tex->is_array) { 197 /* The offset is not applied to the array index */ 198 if (tex->coord_components == 2) { 199 offset_coord = nir_vec2(b, nir_channel(b, offset_coord, 0), 200 nir_channel(b, coord, 1)); 201 } else if (tex->coord_components == 3) { 202 offset_coord = nir_vec3(b, nir_channel(b, offset_coord, 0), 203 nir_channel(b, offset_coord, 1), 204 nir_channel(b, coord, 2)); 205 } else { 206 unreachable("Invalid number of components"); 207 } 208 } 209 210 nir_src_rewrite(&tex->src[coord_index].src, offset_coord); 211 212 return true; 213} 214 215static void 216lower_rect(nir_builder *b, nir_tex_instr *tex) 217{ 218 /* Set the sampler_dim to 2D here so that get_texture_size picks up the 219 * right dimensionality. 220 */ 221 tex->sampler_dim = GLSL_SAMPLER_DIM_2D; 222 223 nir_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex)); 224 nir_def *scale = nir_frcp(b, txs); 225 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord); 226 227 if (coord_index != -1) { 228 nir_def *coords = 229 tex->src[coord_index].src.ssa; 230 nir_src_rewrite(&tex->src[coord_index].src, nir_fmul(b, coords, scale)); 231 } 232} 233 234static void 235lower_rect_tex_scale(nir_builder *b, nir_tex_instr *tex) 236{ 237 b->cursor = nir_before_instr(&tex->instr); 238 239 nir_def *idx = nir_imm_int(b, tex->texture_index); 240 nir_def *scale = nir_load_texture_scale(b, 32, idx); 241 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord); 242 243 if (coord_index != -1) { 244 nir_def *coords = 245 tex->src[coord_index].src.ssa; 246 nir_src_rewrite(&tex->src[coord_index].src, nir_fmul(b, coords, scale)); 247 } 248} 249 250static void 251lower_1d(nir_builder *b, nir_tex_instr *tex) 252{ 253 b->cursor = nir_before_instr(&tex->instr); 254 255 nir_def *coords = nir_steal_tex_src(tex, nir_tex_src_coord); 256 nir_def *offset = nir_steal_tex_src(tex, nir_tex_src_offset); 257 nir_def *ddx = nir_steal_tex_src(tex, nir_tex_src_ddx); 258 nir_def *ddy = nir_steal_tex_src(tex, nir_tex_src_ddy); 259 260 /* Add in 2D sources to become a 2D operation */ 261 tex->sampler_dim = GLSL_SAMPLER_DIM_2D; 262 263 if (coords) { 264 /* We want to fetch texel 0 along the Y-axis. To do so, we sample at 0.5 265 * to get texel 0 with correct handling of wrap modes. 266 */ 267 nir_def *y = nir_imm_floatN_t(b, tex->op == nir_texop_txf ? 0.0 : 0.5, 268 coords->bit_size); 269 270 tex->coord_components++; 271 272 if (tex->is_array && tex->op != nir_texop_lod) { 273 assert(tex->coord_components == 3); 274 275 nir_def *x = nir_channel(b, coords, 0); 276 nir_def *idx = nir_channel(b, coords, 1); 277 coords = nir_vec3(b, x, y, idx); 278 } else { 279 assert(tex->coord_components == 2); 280 coords = nir_vec2(b, coords, y); 281 } 282 283 nir_tex_instr_add_src(tex, nir_tex_src_coord, coords); 284 } 285 286 if (offset) { 287 nir_tex_instr_add_src(tex, nir_tex_src_offset, 288 nir_pad_vector_imm_int(b, offset, 0, 2)); 289 } 290 291 if (ddx || ddy) { 292 nir_tex_instr_add_src(tex, nir_tex_src_ddx, 293 nir_pad_vector_imm_int(b, ddx, 0, 2)); 294 295 nir_tex_instr_add_src(tex, nir_tex_src_ddy, 296 nir_pad_vector_imm_int(b, ddy, 0, 2)); 297 } 298 299 /* Handle destination component mismatch for txs. */ 300 if (tex->op == nir_texop_txs) { 301 b->cursor = nir_after_instr(&tex->instr); 302 303 nir_def *dst; 304 if (tex->is_array) { 305 assert(tex->def.num_components == 2); 306 tex->def.num_components = 3; 307 308 /* For array, we take .xz to skip the newly added height */ 309 dst = nir_channels(b, &tex->def, (1 << 0) | (1 << 2)); 310 } else { 311 assert(tex->def.num_components == 1); 312 tex->def.num_components = 2; 313 314 dst = nir_channel(b, &tex->def, 0); 315 } 316 317 nir_def_rewrite_uses_after(&tex->def, dst, dst->parent_instr); 318 } 319} 320 321static void 322lower_lod(nir_builder *b, nir_tex_instr *tex, nir_def *lod) 323{ 324 assert(tex->op == nir_texop_tex || tex->op == nir_texop_txb); 325 assert(nir_tex_instr_src_index(tex, nir_tex_src_lod) < 0); 326 assert(nir_tex_instr_src_index(tex, nir_tex_src_ddx) < 0); 327 assert(nir_tex_instr_src_index(tex, nir_tex_src_ddy) < 0); 328 329 /* If we have a bias, add it in */ 330 nir_def *bias = nir_steal_tex_src(tex, nir_tex_src_bias); 331 if (bias) 332 lod = nir_fadd(b, lod, bias); 333 334 /* If we have a minimum LOD, clamp LOD accordingly */ 335 nir_def *min_lod = nir_steal_tex_src(tex, nir_tex_src_min_lod); 336 if (min_lod) 337 lod = nir_fmax(b, lod, min_lod); 338 339 nir_tex_instr_add_src(tex, nir_tex_src_lod, lod); 340 tex->op = nir_texop_txl; 341} 342 343static void 344lower_implicit_lod(nir_builder *b, nir_tex_instr *tex) 345{ 346 b->cursor = nir_before_instr(&tex->instr); 347 lower_lod(b, tex, nir_get_texture_lod(b, tex)); 348} 349 350static void 351lower_zero_lod(nir_builder *b, nir_tex_instr *tex) 352{ 353 b->cursor = nir_before_instr(&tex->instr); 354 355 if (tex->op == nir_texop_lod) { 356 nir_def_replace(&tex->def, nir_imm_int(b, 0)); 357 return; 358 } 359 360 lower_lod(b, tex, nir_imm_int(b, 0)); 361} 362 363static nir_def * 364sample_plane(nir_builder *b, nir_tex_instr *tex, int plane, 365 const nir_lower_tex_options *options) 366{ 367 assert(nir_tex_instr_dest_size(tex) == 4); 368 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float); 369 assert(tex->op == nir_texop_tex); 370 assert(tex->coord_components == 2); 371 372 nir_tex_instr *plane_tex = 373 nir_tex_instr_create(b->shader, tex->num_srcs + 1); 374 for (unsigned i = 0; i < tex->num_srcs; i++) { 375 plane_tex->src[i].src = nir_src_for_ssa(tex->src[i].src.ssa); 376 plane_tex->src[i].src_type = tex->src[i].src_type; 377 } 378 plane_tex->src[tex->num_srcs] = nir_tex_src_for_ssa(nir_tex_src_plane, 379 nir_imm_int(b, plane)); 380 plane_tex->op = nir_texop_tex; 381 plane_tex->sampler_dim = GLSL_SAMPLER_DIM_2D; 382 plane_tex->dest_type = nir_type_float | tex->def.bit_size; 383 plane_tex->coord_components = 2; 384 385 plane_tex->texture_index = tex->texture_index; 386 plane_tex->sampler_index = tex->sampler_index; 387 388 nir_def_init(&plane_tex->instr, &plane_tex->def, 4, 389 tex->def.bit_size); 390 391 nir_builder_instr_insert(b, &plane_tex->instr); 392 393 /* If scaling_factor is set, return a scaled value. */ 394 if (options->scale_factors[tex->texture_index]) 395 return nir_fmul_imm(b, &plane_tex->def, 396 options->scale_factors[tex->texture_index]); 397 398 return &plane_tex->def; 399} 400 401static void 402convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex, 403 nir_def *y, nir_def *u, nir_def *v, 404 nir_def *a, 405 const nir_lower_tex_options *options, 406 unsigned texture_index) 407{ 408 409 const float *offset_vals; 410 const nir_const_value_3_4 *m; 411 assert((options->bt709_external & options->bt2020_external) == 0); 412 if (options->yuv_full_range_external & (1u << texture_index)) { 413 if (options->bt709_external & (1u << texture_index)) { 414 m = &bt709_full_range_csc_coeffs; 415 offset_vals = bt709_full_range_csc_offsets; 416 } else if (options->bt2020_external & (1u << texture_index)) { 417 m = &bt2020_full_range_csc_coeffs; 418 offset_vals = bt2020_full_range_csc_offsets; 419 } else { 420 m = &bt601_full_range_csc_coeffs; 421 offset_vals = bt601_full_range_csc_offsets; 422 } 423 } else { 424 if (options->bt709_external & (1u << texture_index)) { 425 m = &bt709_limited_range_csc_coeffs; 426 offset_vals = bt709_limited_range_csc_offsets; 427 } else if (options->bt2020_external & (1u << texture_index)) { 428 m = &bt2020_limited_range_csc_coeffs; 429 offset_vals = bt2020_limited_range_csc_offsets; 430 } else { 431 m = &bt601_limited_range_csc_coeffs; 432 offset_vals = bt601_limited_range_csc_offsets; 433 } 434 } 435 436 unsigned bit_size = tex->def.bit_size; 437 438 nir_def *offset = 439 nir_vec4(b, 440 nir_imm_floatN_t(b, offset_vals[0], a->bit_size), 441 nir_imm_floatN_t(b, offset_vals[1], a->bit_size), 442 nir_imm_floatN_t(b, offset_vals[2], a->bit_size), 443 a); 444 445 offset = nir_f2fN(b, offset, bit_size); 446 447 nir_def *m0 = nir_f2fN(b, nir_build_imm(b, 4, 32, m->v[0]), bit_size); 448 nir_def *m1 = nir_f2fN(b, nir_build_imm(b, 4, 32, m->v[1]), bit_size); 449 nir_def *m2 = nir_f2fN(b, nir_build_imm(b, 4, 32, m->v[2]), bit_size); 450 451 nir_def *result = 452 nir_ffma(b, y, m0, nir_ffma(b, u, m1, nir_ffma(b, v, m2, offset))); 453 454 nir_def_rewrite_uses(&tex->def, result); 455} 456 457static void 458lower_y_uv_external(nir_builder *b, nir_tex_instr *tex, 459 const nir_lower_tex_options *options, 460 unsigned texture_index) 461{ 462 b->cursor = nir_after_instr(&tex->instr); 463 464 nir_def *y = sample_plane(b, tex, 0, options); 465 nir_def *uv = sample_plane(b, tex, 1, options); 466 467 convert_yuv_to_rgb(b, tex, 468 nir_channel(b, y, 0), 469 nir_channel(b, uv, 0), 470 nir_channel(b, uv, 1), 471 nir_imm_float(b, 1.0f), 472 options, 473 texture_index); 474} 475 476static void 477lower_y_vu_external(nir_builder *b, nir_tex_instr *tex, 478 const nir_lower_tex_options *options, 479 unsigned texture_index) 480{ 481 b->cursor = nir_after_instr(&tex->instr); 482 483 nir_def *y = sample_plane(b, tex, 0, options); 484 nir_def *vu = sample_plane(b, tex, 1, options); 485 486 convert_yuv_to_rgb(b, tex, 487 nir_channel(b, y, 0), 488 nir_channel(b, vu, 1), 489 nir_channel(b, vu, 0), 490 nir_imm_float(b, 1.0f), 491 options, 492 texture_index); 493} 494 495static void 496lower_y_u_v_external(nir_builder *b, nir_tex_instr *tex, 497 const nir_lower_tex_options *options, 498 unsigned texture_index) 499{ 500 b->cursor = nir_after_instr(&tex->instr); 501 502 nir_def *y = sample_plane(b, tex, 0, options); 503 nir_def *u = sample_plane(b, tex, 1, options); 504 nir_def *v = sample_plane(b, tex, 2, options); 505 506 convert_yuv_to_rgb(b, tex, 507 nir_channel(b, y, 0), 508 nir_channel(b, u, 0), 509 nir_channel(b, v, 0), 510 nir_imm_float(b, 1.0f), 511 options, 512 texture_index); 513} 514 515static void 516lower_yx_xuxv_external(nir_builder *b, nir_tex_instr *tex, 517 const nir_lower_tex_options *options, 518 unsigned texture_index) 519{ 520 b->cursor = nir_after_instr(&tex->instr); 521 522 nir_def *y = sample_plane(b, tex, 0, options); 523 nir_def *xuxv = sample_plane(b, tex, 1, options); 524 525 convert_yuv_to_rgb(b, tex, 526 nir_channel(b, y, 0), 527 nir_channel(b, xuxv, 1), 528 nir_channel(b, xuxv, 3), 529 nir_imm_float(b, 1.0f), 530 options, 531 texture_index); 532} 533 534static void 535lower_yx_xvxu_external(nir_builder *b, nir_tex_instr *tex, 536 const nir_lower_tex_options *options, 537 unsigned texture_index) 538{ 539 b->cursor = nir_after_instr(&tex->instr); 540 541 nir_def *y = sample_plane(b, tex, 0, options); 542 nir_def *xvxu = sample_plane(b, tex, 1, options); 543 544 convert_yuv_to_rgb(b, tex, 545 nir_channel(b, y, 0), 546 nir_channel(b, xvxu, 3), 547 nir_channel(b, xvxu, 1), 548 nir_imm_float(b, 1.0f), 549 options, 550 texture_index); 551} 552 553static void 554lower_xy_uxvx_external(nir_builder *b, nir_tex_instr *tex, 555 const nir_lower_tex_options *options, 556 unsigned texture_index) 557{ 558 b->cursor = nir_after_instr(&tex->instr); 559 560 nir_def *y = sample_plane(b, tex, 0, options); 561 nir_def *uxvx = sample_plane(b, tex, 1, options); 562 563 convert_yuv_to_rgb(b, tex, 564 nir_channel(b, y, 1), 565 nir_channel(b, uxvx, 0), 566 nir_channel(b, uxvx, 2), 567 nir_imm_float(b, 1.0f), 568 options, 569 texture_index); 570} 571 572static void 573lower_xy_vxux_external(nir_builder *b, nir_tex_instr *tex, 574 const nir_lower_tex_options *options, 575 unsigned texture_index) 576{ 577 b->cursor = nir_after_instr(&tex->instr); 578 579 nir_def *y = sample_plane(b, tex, 0, options); 580 nir_def *vxux = sample_plane(b, tex, 1, options); 581 582 convert_yuv_to_rgb(b, tex, 583 nir_channel(b, y, 1), 584 nir_channel(b, vxux, 2), 585 nir_channel(b, vxux, 0), 586 nir_imm_float(b, 1.0f), 587 options, 588 texture_index); 589} 590 591static void 592lower_ayuv_external(nir_builder *b, nir_tex_instr *tex, 593 const nir_lower_tex_options *options, 594 unsigned texture_index) 595{ 596 b->cursor = nir_after_instr(&tex->instr); 597 598 nir_def *ayuv = sample_plane(b, tex, 0, options); 599 600 convert_yuv_to_rgb(b, tex, 601 nir_channel(b, ayuv, 2), 602 nir_channel(b, ayuv, 1), 603 nir_channel(b, ayuv, 0), 604 nir_channel(b, ayuv, 3), 605 options, 606 texture_index); 607} 608 609static void 610lower_y41x_external(nir_builder *b, nir_tex_instr *tex, 611 const nir_lower_tex_options *options, 612 unsigned texture_index) 613{ 614 b->cursor = nir_after_instr(&tex->instr); 615 616 nir_def *y41x = sample_plane(b, tex, 0, options); 617 618 convert_yuv_to_rgb(b, tex, 619 nir_channel(b, y41x, 1), 620 nir_channel(b, y41x, 0), 621 nir_channel(b, y41x, 2), 622 nir_channel(b, y41x, 3), 623 options, 624 texture_index); 625} 626 627static void 628lower_xyuv_external(nir_builder *b, nir_tex_instr *tex, 629 const nir_lower_tex_options *options, 630 unsigned texture_index) 631{ 632 b->cursor = nir_after_instr(&tex->instr); 633 634 nir_def *xyuv = sample_plane(b, tex, 0, options); 635 636 convert_yuv_to_rgb(b, tex, 637 nir_channel(b, xyuv, 2), 638 nir_channel(b, xyuv, 1), 639 nir_channel(b, xyuv, 0), 640 nir_imm_float(b, 1.0f), 641 options, 642 texture_index); 643} 644 645static void 646lower_yuv_external(nir_builder *b, nir_tex_instr *tex, 647 const nir_lower_tex_options *options, 648 unsigned texture_index) 649{ 650 b->cursor = nir_after_instr(&tex->instr); 651 652 nir_def *yuv = sample_plane(b, tex, 0, options); 653 654 convert_yuv_to_rgb(b, tex, 655 nir_channel(b, yuv, 0), 656 nir_channel(b, yuv, 1), 657 nir_channel(b, yuv, 2), 658 nir_imm_float(b, 1.0f), 659 options, 660 texture_index); 661} 662 663static void 664lower_yu_yv_external(nir_builder *b, nir_tex_instr *tex, 665 const nir_lower_tex_options *options, 666 unsigned texture_index) 667{ 668 b->cursor = nir_after_instr(&tex->instr); 669 670 nir_def *yuv = sample_plane(b, tex, 0, options); 671 672 convert_yuv_to_rgb(b, tex, 673 nir_channel(b, yuv, 1), 674 nir_channel(b, yuv, 2), 675 nir_channel(b, yuv, 0), 676 nir_imm_float(b, 1.0f), 677 options, 678 texture_index); 679} 680 681static void 682lower_yv_yu_external(nir_builder *b, nir_tex_instr *tex, 683 const nir_lower_tex_options *options, 684 unsigned texture_index) 685{ 686 b->cursor = nir_after_instr(&tex->instr); 687 688 nir_def *yuv = sample_plane(b, tex, 0, options); 689 690 convert_yuv_to_rgb(b, tex, 691 nir_channel(b, yuv, 2), 692 nir_channel(b, yuv, 1), 693 nir_channel(b, yuv, 0), 694 nir_imm_float(b, 1.0f), 695 options, 696 texture_index); 697} 698 699/* 700 * Converts a nir_texop_txd instruction to nir_texop_txl with the given lod 701 * computed from the gradients. 702 */ 703static void 704replace_gradient_with_lod(nir_builder *b, nir_def *lod, nir_tex_instr *tex) 705{ 706 assert(tex->op == nir_texop_txd); 707 708 nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddx)); 709 nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddy)); 710 711 /* If we have a minimum LOD, clamp LOD accordingly */ 712 nir_def *min_lod = nir_steal_tex_src(tex, nir_tex_src_min_lod); 713 if (min_lod) 714 lod = nir_fmax(b, lod, min_lod); 715 716 nir_tex_instr_add_src(tex, nir_tex_src_lod, lod); 717 tex->op = nir_texop_txl; 718} 719 720static void 721lower_gradient_cube_map(nir_builder *b, nir_tex_instr *tex) 722{ 723 assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE); 724 assert(tex->op == nir_texop_txd); 725 726 /* Use textureSize() to get the width and height of LOD 0 */ 727 nir_def *size = nir_i2f32(b, nir_get_texture_size(b, tex)); 728 729 /* Cubemap texture lookups first generate a texture coordinate normalized 730 * to [-1, 1] on the appropiate face. The appropiate face is determined 731 * by which component has largest magnitude and its sign. The texture 732 * coordinate is the quotient of the remaining texture coordinates against 733 * that absolute value of the component of largest magnitude. This 734 * division requires that the computing of the derivative of the texel 735 * coordinate must use the quotient rule. The high level GLSL code is as 736 * follows: 737 * 738 * Step 1: selection 739 * 740 * vec3 abs_p, Q, dQdx, dQdy; 741 * abs_p = abs(ir->coordinate); 742 * if (abs_p.x >= max(abs_p.y, abs_p.z)) { 743 * Q = ir->coordinate.yzx; 744 * dQdx = ir->lod_info.grad.dPdx.yzx; 745 * dQdy = ir->lod_info.grad.dPdy.yzx; 746 * } 747 * if (abs_p.y >= max(abs_p.x, abs_p.z)) { 748 * Q = ir->coordinate.xzy; 749 * dQdx = ir->lod_info.grad.dPdx.xzy; 750 * dQdy = ir->lod_info.grad.dPdy.xzy; 751 * } 752 * if (abs_p.z >= max(abs_p.x, abs_p.y)) { 753 * Q = ir->coordinate; 754 * dQdx = ir->lod_info.grad.dPdx; 755 * dQdy = ir->lod_info.grad.dPdy; 756 * } 757 * 758 * Step 2: use quotient rule to compute derivative. The normalized to 759 * [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are 760 * only concerned with the magnitudes of the derivatives whose values are 761 * not affected by the sign. We drop the sign from the computation. 762 * 763 * vec2 dx, dy; 764 * float recip; 765 * 766 * recip = 1.0 / Q.z; 767 * dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) ); 768 * dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) ); 769 * 770 * Step 3: compute LOD. At this point we have the derivatives of the 771 * texture coordinates normalized to [-1,1]. We take the LOD to be 772 * result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L) 773 * = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L) 774 * = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L) 775 * = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy)))) 776 * = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy))) 777 * where L is the dimension of the cubemap. The code is: 778 * 779 * float M, result; 780 * M = max(dot(dx, dx), dot(dy, dy)); 781 * L = textureSize(sampler, 0).x; 782 * result = -1.0 + 0.5 * log2(L * L * M); 783 */ 784 785 /* coordinate */ 786 nir_def *p = 787 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_coord)].src.ssa; 788 789 /* unmodified dPdx, dPdy values */ 790 nir_def *dPdx = 791 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa; 792 nir_def *dPdy = 793 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa; 794 795 nir_def *abs_p = nir_fabs(b, p); 796 nir_def *abs_p_x = nir_channel(b, abs_p, 0); 797 nir_def *abs_p_y = nir_channel(b, abs_p, 1); 798 nir_def *abs_p_z = nir_channel(b, abs_p, 2); 799 800 /* 1. compute selector */ 801 nir_def *Q, *dQdx, *dQdy; 802 803 nir_def *cond_z = nir_fge(b, abs_p_z, nir_fmax(b, abs_p_x, abs_p_y)); 804 nir_def *cond_y = nir_fge(b, abs_p_y, nir_fmax(b, abs_p_x, abs_p_z)); 805 806 unsigned yzx[3] = { 1, 2, 0 }; 807 unsigned xzy[3] = { 0, 2, 1 }; 808 809 Q = nir_bcsel(b, cond_z, 810 p, 811 nir_bcsel(b, cond_y, 812 nir_swizzle(b, p, xzy, 3), 813 nir_swizzle(b, p, yzx, 3))); 814 815 dQdx = nir_bcsel(b, cond_z, 816 dPdx, 817 nir_bcsel(b, cond_y, 818 nir_swizzle(b, dPdx, xzy, 3), 819 nir_swizzle(b, dPdx, yzx, 3))); 820 821 dQdy = nir_bcsel(b, cond_z, 822 dPdy, 823 nir_bcsel(b, cond_y, 824 nir_swizzle(b, dPdy, xzy, 3), 825 nir_swizzle(b, dPdy, yzx, 3))); 826 827 /* 2. quotient rule */ 828 829 /* tmp = Q.xy * recip; 830 * dx = recip * ( dQdx.xy - (tmp * dQdx.z) ); 831 * dy = recip * ( dQdy.xy - (tmp * dQdy.z) ); 832 */ 833 nir_def *rcp_Q_z = nir_frcp(b, nir_channel(b, Q, 2)); 834 835 nir_def *Q_xy = nir_trim_vector(b, Q, 2); 836 nir_def *tmp = nir_fmul(b, Q_xy, rcp_Q_z); 837 838 nir_def *dQdx_xy = nir_trim_vector(b, dQdx, 2); 839 nir_def *dQdx_z = nir_channel(b, dQdx, 2); 840 nir_def *dx = 841 nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdx_xy, nir_fmul(b, tmp, dQdx_z))); 842 843 nir_def *dQdy_xy = nir_trim_vector(b, dQdy, 2); 844 nir_def *dQdy_z = nir_channel(b, dQdy, 2); 845 nir_def *dy = 846 nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdy_xy, nir_fmul(b, tmp, dQdy_z))); 847 848 /* M = max(dot(dx, dx), dot(dy, dy)); */ 849 nir_def *M = nir_fmax(b, nir_fdot(b, dx, dx), nir_fdot(b, dy, dy)); 850 851 /* size has textureSize() of LOD 0 */ 852 nir_def *L = nir_channel(b, size, 0); 853 854 /* lod = -1.0 + 0.5 * log2(L * L * M); */ 855 nir_def *lod = 856 nir_fadd(b, 857 nir_imm_float(b, -1.0f), 858 nir_fmul(b, 859 nir_imm_float(b, 0.5f), 860 nir_flog2(b, nir_fmul(b, L, nir_fmul(b, L, M))))); 861 862 /* 3. Replace the gradient instruction with an equivalent lod instruction */ 863 replace_gradient_with_lod(b, lod, tex); 864} 865 866static void 867lower_gradient(nir_builder *b, nir_tex_instr *tex) 868{ 869 /* Cubes are more complicated and have their own function */ 870 if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { 871 lower_gradient_cube_map(b, tex); 872 return; 873 } 874 875 assert(tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE); 876 assert(tex->op == nir_texop_txd); 877 878 /* Use textureSize() to get the width and height of LOD 0 */ 879 unsigned component_mask; 880 switch (tex->sampler_dim) { 881 case GLSL_SAMPLER_DIM_3D: 882 component_mask = 7; 883 break; 884 case GLSL_SAMPLER_DIM_1D: 885 component_mask = 1; 886 break; 887 default: 888 component_mask = 3; 889 break; 890 } 891 892 nir_def *size = 893 nir_channels(b, nir_i2f32(b, nir_get_texture_size(b, tex)), 894 component_mask); 895 896 /* Scale the gradients by width and height. Effectively, the incoming 897 * gradients are s'(x,y), t'(x,y), and r'(x,y) from equation 3.19 in the 898 * GL 3.0 spec; we want u'(x,y), which is w_t * s'(x,y). 899 */ 900 nir_def *ddx = 901 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa; 902 nir_def *ddy = 903 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa; 904 905 nir_def *dPdx = nir_fmul(b, ddx, size); 906 nir_def *dPdy = nir_fmul(b, ddy, size); 907 908 nir_def *rho; 909 if (dPdx->num_components == 1) { 910 rho = nir_fmax(b, nir_fabs(b, dPdx), nir_fabs(b, dPdy)); 911 } else { 912 rho = nir_fmax(b, 913 nir_fsqrt(b, nir_fdot(b, dPdx, dPdx)), 914 nir_fsqrt(b, nir_fdot(b, dPdy, dPdy))); 915 } 916 917 /* lod = log2(rho). We're ignoring GL state biases for now. */ 918 nir_def *lod = nir_flog2(b, rho); 919 920 /* Replace the gradient instruction with an equivalent lod instruction */ 921 replace_gradient_with_lod(b, lod, tex); 922} 923 924/* tex(s, coord) = txd(s, coord, dfdx(coord), dfdy(coord)) */ 925static nir_tex_instr * 926lower_tex_to_txd(nir_builder *b, nir_tex_instr *tex) 927{ 928 b->cursor = nir_after_instr(&tex->instr); 929 nir_tex_instr *txd = nir_tex_instr_create(b->shader, tex->num_srcs + 2); 930 931 txd->op = nir_texop_txd; 932 txd->sampler_dim = tex->sampler_dim; 933 txd->dest_type = tex->dest_type; 934 txd->coord_components = tex->coord_components; 935 txd->texture_index = tex->texture_index; 936 txd->sampler_index = tex->sampler_index; 937 txd->is_array = tex->is_array; 938 txd->is_shadow = tex->is_shadow; 939 txd->is_new_style_shadow = tex->is_new_style_shadow; 940 941 /* reuse existing srcs */ 942 for (unsigned i = 0; i < tex->num_srcs; i++) { 943 txd->src[i].src = nir_src_for_ssa(tex->src[i].src.ssa); 944 txd->src[i].src_type = tex->src[i].src_type; 945 } 946 int coord_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord); 947 assert(coord_idx >= 0); 948 nir_def *coord = tex->src[coord_idx].src.ssa; 949 /* don't take the derivative of the array index */ 950 if (tex->is_array) 951 coord = nir_channels(b, coord, nir_component_mask(coord->num_components - 1)); 952 nir_def *dfdx = nir_ddx(b, coord); 953 nir_def *dfdy = nir_ddy(b, coord); 954 txd->src[tex->num_srcs] = nir_tex_src_for_ssa(nir_tex_src_ddx, dfdx); 955 txd->src[tex->num_srcs + 1] = nir_tex_src_for_ssa(nir_tex_src_ddy, dfdy); 956 957 nir_def_init(&txd->instr, &txd->def, 958 tex->def.num_components, 959 tex->def.bit_size); 960 nir_builder_instr_insert(b, &txd->instr); 961 nir_def_replace(&tex->def, &txd->def); 962 return txd; 963} 964 965/* txb(s, coord, bias) = txl(s, coord, lod(s, coord).y + bias) */ 966static nir_tex_instr * 967lower_txb_to_txl(nir_builder *b, nir_tex_instr *tex) 968{ 969 b->cursor = nir_after_instr(&tex->instr); 970 nir_tex_instr *txl = nir_tex_instr_create(b->shader, tex->num_srcs); 971 972 txl->op = nir_texop_txl; 973 txl->sampler_dim = tex->sampler_dim; 974 txl->dest_type = tex->dest_type; 975 txl->coord_components = tex->coord_components; 976 txl->texture_index = tex->texture_index; 977 txl->sampler_index = tex->sampler_index; 978 txl->is_array = tex->is_array; 979 txl->is_shadow = tex->is_shadow; 980 txl->is_new_style_shadow = tex->is_new_style_shadow; 981 982 /* reuse all but bias src */ 983 for (int i = 0; i < tex->num_srcs; i++) { 984 if (tex->src[i].src_type != nir_tex_src_bias) { 985 txl->src[i].src = nir_src_for_ssa(tex->src[i].src.ssa); 986 txl->src[i].src_type = tex->src[i].src_type; 987 } 988 } 989 nir_def *lod = nir_get_texture_lod(b, tex); 990 991 int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias); 992 assert(bias_idx >= 0); 993 lod = nir_fadd(b, lod, tex->src[bias_idx].src.ssa); 994 txl->src[tex->num_srcs - 1] = nir_tex_src_for_ssa(nir_tex_src_lod, lod); 995 996 nir_def_init(&txl->instr, &txl->def, 997 tex->def.num_components, 998 tex->def.bit_size); 999 nir_builder_instr_insert(b, &txl->instr); 1000 nir_def_replace(&tex->def, &txl->def); 1001 return txl; 1002} 1003 1004static nir_tex_instr * 1005saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask) 1006{ 1007 if (tex->op == nir_texop_tex) 1008 tex = lower_tex_to_txd(b, tex); 1009 else if (tex->op == nir_texop_txb) 1010 tex = lower_txb_to_txl(b, tex); 1011 1012 b->cursor = nir_before_instr(&tex->instr); 1013 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord); 1014 1015 if (coord_index != -1) { 1016 nir_def *src = 1017 tex->src[coord_index].src.ssa; 1018 1019 /* split src into components: */ 1020 nir_def *comp[4]; 1021 1022 assume(tex->coord_components >= 1); 1023 1024 for (unsigned j = 0; j < tex->coord_components; j++) 1025 comp[j] = nir_channel(b, src, j); 1026 1027 /* clamp requested components, array index does not get clamped: */ 1028 unsigned ncomp = tex->coord_components; 1029 if (tex->is_array) 1030 ncomp--; 1031 1032 for (unsigned j = 0; j < ncomp; j++) { 1033 if ((1 << j) & sat_mask) { 1034 if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) { 1035 /* non-normalized texture coords, so clamp to texture 1036 * size rather than [0.0, 1.0] 1037 */ 1038 nir_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex)); 1039 comp[j] = nir_fmax(b, comp[j], nir_imm_float(b, 0.0)); 1040 comp[j] = nir_fmin(b, comp[j], nir_channel(b, txs, j)); 1041 } else { 1042 comp[j] = nir_fsat(b, comp[j]); 1043 } 1044 } 1045 } 1046 1047 /* and move the result back into a single vecN: */ 1048 src = nir_vec(b, comp, tex->coord_components); 1049 1050 nir_src_rewrite(&tex->src[coord_index].src, src); 1051 } 1052 return tex; 1053} 1054 1055static nir_def * 1056get_zero_or_one(nir_builder *b, nir_alu_type type, uint8_t swizzle_val) 1057{ 1058 nir_const_value v[4]; 1059 1060 memset(&v, 0, sizeof(v)); 1061 1062 if (swizzle_val == 4) { 1063 v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 0; 1064 } else { 1065 assert(swizzle_val == 5); 1066 if (type == nir_type_float32) 1067 v[0].f32 = v[1].f32 = v[2].f32 = v[3].f32 = 1.0; 1068 else 1069 v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 1; 1070 } 1071 1072 return nir_build_imm(b, 4, 32, v); 1073} 1074 1075static void 1076swizzle_tg4_broadcom(nir_builder *b, nir_tex_instr *tex) 1077{ 1078 b->cursor = nir_after_instr(&tex->instr); 1079 1080 assert(nir_tex_instr_dest_size(tex) == 4); 1081 unsigned swiz[4] = { 2, 3, 1, 0 }; 1082 nir_def *swizzled = nir_swizzle(b, &tex->def, swiz, 4); 1083 1084 nir_def_rewrite_uses_after(&tex->def, swizzled, 1085 swizzled->parent_instr); 1086} 1087 1088static void 1089swizzle_result(nir_builder *b, nir_tex_instr *tex, const uint8_t swizzle[4]) 1090{ 1091 b->cursor = nir_after_instr(&tex->instr); 1092 1093 nir_def *swizzled; 1094 if (tex->op == nir_texop_tg4) { 1095 if (swizzle[tex->component] < 4) { 1096 /* This one's easy */ 1097 tex->component = swizzle[tex->component]; 1098 return; 1099 } else { 1100 swizzled = get_zero_or_one(b, tex->dest_type, swizzle[tex->component]); 1101 } 1102 } else { 1103 assert(nir_tex_instr_dest_size(tex) == 4); 1104 if (swizzle[0] < 4 && swizzle[1] < 4 && 1105 swizzle[2] < 4 && swizzle[3] < 4) { 1106 unsigned swiz[4] = { swizzle[0], swizzle[1], swizzle[2], swizzle[3] }; 1107 /* We have no 0s or 1s, just emit a swizzling MOV */ 1108 swizzled = nir_swizzle(b, &tex->def, swiz, 4); 1109 } else { 1110 nir_scalar srcs[4]; 1111 for (unsigned i = 0; i < 4; i++) { 1112 if (swizzle[i] < 4) { 1113 srcs[i] = nir_get_scalar(&tex->def, swizzle[i]); 1114 } else { 1115 srcs[i] = nir_get_scalar(get_zero_or_one(b, tex->dest_type, swizzle[i]), 0); 1116 } 1117 } 1118 swizzled = nir_vec_scalars(b, srcs, 4); 1119 } 1120 } 1121 1122 nir_def_rewrite_uses_after(&tex->def, swizzled, 1123 swizzled->parent_instr); 1124} 1125 1126static void 1127linearize_srgb_result(nir_builder *b, nir_tex_instr *tex) 1128{ 1129 assert(nir_tex_instr_dest_size(tex) == 4); 1130 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float); 1131 1132 b->cursor = nir_after_instr(&tex->instr); 1133 1134 nir_def *rgb = 1135 nir_format_srgb_to_linear(b, nir_trim_vector(b, &tex->def, 3)); 1136 1137 /* alpha is untouched: */ 1138 nir_def *result = nir_vec4(b, 1139 nir_channel(b, rgb, 0), 1140 nir_channel(b, rgb, 1), 1141 nir_channel(b, rgb, 2), 1142 nir_channel(b, &tex->def, 3)); 1143 1144 nir_def_rewrite_uses_after(&tex->def, result, 1145 result->parent_instr); 1146} 1147 1148/** 1149 * Lowers texture instructions from giving a vec4 result to a vec2 of f16, 1150 * i16, or u16, or a single unorm4x8 value. 1151 * 1152 * Note that we don't change the destination num_components, because 1153 * nir_tex_instr_dest_size() will still return 4. The driver is just expected 1154 * to not store the other channels, given that nothing at the NIR level will 1155 * read them. 1156 */ 1157static bool 1158lower_tex_packing(nir_builder *b, nir_tex_instr *tex, 1159 const nir_lower_tex_options *options) 1160{ 1161 nir_def *color = &tex->def; 1162 1163 b->cursor = nir_after_instr(&tex->instr); 1164 1165 assert(options->lower_tex_packing_cb); 1166 enum nir_lower_tex_packing packing = 1167 options->lower_tex_packing_cb(tex, options->lower_tex_packing_data); 1168 1169 switch (packing) { 1170 case nir_lower_tex_packing_none: 1171 return false; 1172 1173 case nir_lower_tex_packing_16: { 1174 static const unsigned bits[4] = { 16, 16, 16, 16 }; 1175 1176 switch (nir_alu_type_get_base_type(tex->dest_type)) { 1177 case nir_type_float: 1178 switch (nir_tex_instr_dest_size(tex)) { 1179 case 1: 1180 assert(tex->is_shadow && tex->is_new_style_shadow); 1181 color = nir_unpack_half_2x16_split_x(b, nir_channel(b, color, 0)); 1182 break; 1183 case 2: { 1184 nir_def *rg = nir_channel(b, color, 0); 1185 color = nir_vec2(b, 1186 nir_unpack_half_2x16_split_x(b, rg), 1187 nir_unpack_half_2x16_split_y(b, rg)); 1188 break; 1189 } 1190 case 4: { 1191 nir_def *rg = nir_channel(b, color, 0); 1192 nir_def *ba = nir_channel(b, color, 1); 1193 color = nir_vec4(b, 1194 nir_unpack_half_2x16_split_x(b, rg), 1195 nir_unpack_half_2x16_split_y(b, rg), 1196 nir_unpack_half_2x16_split_x(b, ba), 1197 nir_unpack_half_2x16_split_y(b, ba)); 1198 break; 1199 } 1200 default: 1201 unreachable("wrong dest_size"); 1202 } 1203 break; 1204 1205 case nir_type_int: 1206 color = nir_format_unpack_sint(b, color, bits, 4); 1207 break; 1208 1209 case nir_type_uint: 1210 color = nir_format_unpack_uint(b, color, bits, 4); 1211 break; 1212 1213 default: 1214 unreachable("unknown base type"); 1215 } 1216 break; 1217 } 1218 1219 case nir_lower_tex_packing_8: 1220 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float); 1221 color = nir_unpack_unorm_4x8(b, nir_channel(b, color, 0)); 1222 break; 1223 } 1224 1225 nir_def_rewrite_uses_after(&tex->def, color, 1226 color->parent_instr); 1227 return true; 1228} 1229 1230static bool 1231sampler_index_lt(nir_tex_instr *tex, unsigned max) 1232{ 1233 assert(nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref) == -1); 1234 1235 unsigned sampler_index = tex->sampler_index; 1236 1237 int sampler_offset_idx = 1238 nir_tex_instr_src_index(tex, nir_tex_src_sampler_offset); 1239 if (sampler_offset_idx >= 0) { 1240 if (!nir_src_is_const(tex->src[sampler_offset_idx].src)) 1241 return false; 1242 1243 sampler_index += nir_src_as_uint(tex->src[sampler_offset_idx].src); 1244 } 1245 1246 return sampler_index < max; 1247} 1248 1249static bool 1250lower_tg4_offsets(nir_builder *b, nir_tex_instr *tex) 1251{ 1252 assert(tex->op == nir_texop_tg4); 1253 assert(nir_tex_instr_has_explicit_tg4_offsets(tex)); 1254 assert(nir_tex_instr_src_index(tex, nir_tex_src_offset) == -1); 1255 1256 b->cursor = nir_after_instr(&tex->instr); 1257 1258 nir_scalar dest[5] = { 0 }; 1259 nir_def *residency = NULL; 1260 for (unsigned i = 0; i < 4; ++i) { 1261 nir_tex_instr *tex_copy = nir_tex_instr_create(b->shader, tex->num_srcs + 1); 1262 tex_copy->op = tex->op; 1263 tex_copy->coord_components = tex->coord_components; 1264 tex_copy->sampler_dim = tex->sampler_dim; 1265 tex_copy->is_array = tex->is_array; 1266 tex_copy->is_shadow = tex->is_shadow; 1267 tex_copy->is_new_style_shadow = tex->is_new_style_shadow; 1268 tex_copy->is_sparse = tex->is_sparse; 1269 tex_copy->is_gather_implicit_lod = tex->is_gather_implicit_lod; 1270 tex_copy->component = tex->component; 1271 tex_copy->dest_type = tex->dest_type; 1272 tex_copy->texture_index = tex->texture_index; 1273 tex_copy->sampler_index = tex->sampler_index; 1274 tex_copy->backend_flags = tex->backend_flags; 1275 1276 for (unsigned j = 0; j < tex->num_srcs; ++j) { 1277 tex_copy->src[j].src = nir_src_for_ssa(tex->src[j].src.ssa); 1278 tex_copy->src[j].src_type = tex->src[j].src_type; 1279 } 1280 1281 nir_def *offset = nir_imm_ivec2(b, tex->tg4_offsets[i][0], 1282 tex->tg4_offsets[i][1]); 1283 nir_tex_src src = nir_tex_src_for_ssa(nir_tex_src_offset, offset); 1284 tex_copy->src[tex_copy->num_srcs - 1] = src; 1285 1286 nir_def_init(&tex_copy->instr, &tex_copy->def, 1287 nir_tex_instr_dest_size(tex), 32); 1288 1289 nir_builder_instr_insert(b, &tex_copy->instr); 1290 1291 dest[i] = nir_get_scalar(&tex_copy->def, 3); 1292 if (tex->is_sparse) { 1293 nir_def *code = nir_channel(b, &tex_copy->def, 4); 1294 if (residency) 1295 residency = nir_sparse_residency_code_and(b, residency, code); 1296 else 1297 residency = code; 1298 } 1299 } 1300 dest[4] = nir_get_scalar(residency, 0); 1301 1302 nir_def *res = nir_vec_scalars(b, dest, tex->def.num_components); 1303 nir_def_replace(&tex->def, res); 1304 1305 return true; 1306} 1307 1308static bool 1309nir_lower_txs_lod(nir_builder *b, nir_tex_instr *tex) 1310{ 1311 int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod); 1312 if (lod_idx < 0 || 1313 (nir_src_is_const(tex->src[lod_idx].src) && 1314 nir_src_as_int(tex->src[lod_idx].src) == 0)) 1315 return false; 1316 1317 unsigned dest_size = nir_tex_instr_dest_size(tex); 1318 1319 b->cursor = nir_before_instr(&tex->instr); 1320 nir_def *lod = tex->src[lod_idx].src.ssa; 1321 1322 /* Replace the non-0-LOD in the initial TXS operation by a 0-LOD. */ 1323 nir_src_rewrite(&tex->src[lod_idx].src, nir_imm_int(b, 0)); 1324 1325 /* TXS(LOD) = max(TXS(0) >> LOD, 1) 1326 * But we do min(TXS(0), TXS(LOD)) to catch the case of a null surface, 1327 * which should return 0, not 1. 1328 */ 1329 b->cursor = nir_after_instr(&tex->instr); 1330 nir_def *minified = nir_imin(b, &tex->def, 1331 nir_imax(b, nir_ushr(b, &tex->def, lod), 1332 nir_imm_int(b, 1))); 1333 1334 /* Make sure the component encoding the array size (if any) is not 1335 * minified. 1336 */ 1337 if (tex->is_array) { 1338 nir_def *comp[3]; 1339 1340 assert(dest_size <= ARRAY_SIZE(comp)); 1341 for (unsigned i = 0; i < dest_size - 1; i++) 1342 comp[i] = nir_channel(b, minified, i); 1343 1344 comp[dest_size - 1] = nir_channel(b, &tex->def, dest_size - 1); 1345 minified = nir_vec(b, comp, dest_size); 1346 } 1347 1348 nir_def_rewrite_uses_after(&tex->def, minified, 1349 minified->parent_instr); 1350 return true; 1351} 1352 1353static void 1354nir_lower_txs_cube_array(nir_builder *b, nir_tex_instr *tex) 1355{ 1356 assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && tex->is_array); 1357 tex->sampler_dim = GLSL_SAMPLER_DIM_2D; 1358 1359 b->cursor = nir_after_instr(&tex->instr); 1360 1361 assert(tex->def.num_components == 3); 1362 nir_def *size = &tex->def; 1363 size = nir_vec3(b, nir_channel(b, size, 1), 1364 nir_channel(b, size, 1), 1365 nir_idiv(b, nir_channel(b, size, 2), 1366 nir_imm_int(b, 6))); 1367 1368 nir_def_rewrite_uses_after(&tex->def, size, size->parent_instr); 1369} 1370 1371/* Adjust the sample index according to AMD FMASK (fragment mask). 1372 * 1373 * For uncompressed MSAA surfaces, FMASK should return 0x76543210, 1374 * which is the identity mapping. Each nibble says which physical sample 1375 * should be fetched to get that sample. 1376 * 1377 * For example, 0x11111100 means there are only 2 samples stored and 1378 * the second sample covers 3/4 of the pixel. When reading samples 0 1379 * and 1, return physical sample 0 (determined by the first two 0s 1380 * in FMASK), otherwise return physical sample 1. 1381 * 1382 * The sample index should be adjusted as follows: 1383 * sample_index = ubfe(fmask, sample_index * 4, 3); 1384 * 1385 * Only extract 3 bits because EQAA can generate number 8 in FMASK, which 1386 * means the physical sample index is unknown. We can map 8 to any valid 1387 * sample index, and extracting only 3 bits will map it to 0, which works 1388 * with all MSAA modes. 1389 */ 1390static void 1391nir_lower_ms_txf_to_fragment_fetch(nir_builder *b, nir_tex_instr *tex) 1392{ 1393 lower_offset(b, tex); 1394 1395 b->cursor = nir_before_instr(&tex->instr); 1396 1397 /* Create FMASK fetch. */ 1398 assert(tex->texture_index == 0); 1399 nir_tex_instr *fmask_fetch = nir_tex_instr_create(b->shader, tex->num_srcs - 1); 1400 fmask_fetch->op = nir_texop_fragment_mask_fetch_amd; 1401 fmask_fetch->coord_components = tex->coord_components; 1402 fmask_fetch->sampler_dim = tex->sampler_dim; 1403 fmask_fetch->is_array = tex->is_array; 1404 fmask_fetch->texture_non_uniform = tex->texture_non_uniform; 1405 fmask_fetch->dest_type = nir_type_uint32; 1406 nir_def_init(&fmask_fetch->instr, &fmask_fetch->def, 1, 32); 1407 1408 fmask_fetch->num_srcs = 0; 1409 for (unsigned i = 0; i < tex->num_srcs; i++) { 1410 if (tex->src[i].src_type == nir_tex_src_ms_index) 1411 continue; 1412 nir_tex_src *src = &fmask_fetch->src[fmask_fetch->num_srcs++]; 1413 src->src = nir_src_for_ssa(tex->src[i].src.ssa); 1414 src->src_type = tex->src[i].src_type; 1415 } 1416 1417 nir_builder_instr_insert(b, &fmask_fetch->instr); 1418 1419 /* Obtain new sample index. */ 1420 int ms_index = nir_tex_instr_src_index(tex, nir_tex_src_ms_index); 1421 assert(ms_index >= 0); 1422 nir_def *sample = tex->src[ms_index].src.ssa; 1423 nir_def *new_sample = nir_ubfe(b, &fmask_fetch->def, 1424 nir_u2u32(b, nir_ishl_imm(b, sample, 2)), 1425 nir_imm_int(b, 3)); 1426 1427 /* Update instruction. */ 1428 tex->op = nir_texop_fragment_fetch_amd; 1429 nir_src_rewrite(&tex->src[ms_index].src, 1430 nir_u2uN(b, new_sample, sample->bit_size)); 1431} 1432 1433static void 1434nir_lower_samples_identical_to_fragment_fetch(nir_builder *b, nir_tex_instr *tex) 1435{ 1436 b->cursor = nir_after_instr(&tex->instr); 1437 1438 nir_tex_instr *fmask_fetch = nir_instr_as_tex(nir_instr_clone(b->shader, &tex->instr)); 1439 fmask_fetch->op = nir_texop_fragment_mask_fetch_amd; 1440 fmask_fetch->dest_type = nir_type_uint32; 1441 nir_def_init(&fmask_fetch->instr, &fmask_fetch->def, 1, 32); 1442 nir_builder_instr_insert(b, &fmask_fetch->instr); 1443 1444 nir_def_rewrite_uses(&tex->def, nir_ieq_imm(b, &fmask_fetch->def, 0)); 1445 nir_instr_remove_v(&tex->instr); 1446} 1447 1448static void 1449nir_lower_lod_zero_width(nir_builder *b, nir_tex_instr *tex) 1450{ 1451 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord); 1452 assert(coord_index >= 0); 1453 1454 b->cursor = nir_after_instr(&tex->instr); 1455 1456 nir_def *is_zero = nir_imm_true(b); 1457 for (unsigned i = 0; i < tex->coord_components; i++) { 1458 nir_def *coord = nir_channel(b, tex->src[coord_index].src.ssa, i); 1459 1460 /* Compute the sum of the absolute values of derivatives. */ 1461 nir_def *dfdx = nir_ddx(b, coord); 1462 nir_def *dfdy = nir_ddy(b, coord); 1463 nir_def *fwidth = nir_fadd(b, nir_fabs(b, dfdx), nir_fabs(b, dfdy)); 1464 1465 /* Check if the sum is 0. */ 1466 is_zero = nir_iand(b, is_zero, nir_feq_imm(b, fwidth, 0.0)); 1467 } 1468 1469 /* Replace the raw LOD by -FLT_MAX if the sum is 0 for all coordinates. */ 1470 nir_def *adjusted_lod = 1471 nir_bcsel(b, is_zero, nir_imm_float(b, -FLT_MAX), 1472 nir_channel(b, &tex->def, 1)); 1473 1474 nir_def *def = 1475 nir_vec2(b, nir_channel(b, &tex->def, 0), adjusted_lod); 1476 1477 nir_def_rewrite_uses_after(&tex->def, def, def->parent_instr); 1478} 1479 1480static bool 1481lower_index_to_offset(nir_builder *b, nir_tex_instr *tex) 1482{ 1483 bool progress = false; 1484 b->cursor = nir_before_instr(&tex->instr); 1485 1486 for (unsigned i = 0; i < tex->num_srcs; i++) { 1487 unsigned *index; 1488 switch (tex->src[i].src_type) { 1489 case nir_tex_src_texture_offset: 1490 index = &tex->texture_index; 1491 break; 1492 case nir_tex_src_sampler_offset: 1493 index = &tex->sampler_index; 1494 break; 1495 default: 1496 continue; 1497 } 1498 1499 /* If there's no base index, there's nothing to lower */ 1500 if ((*index) == 0) 1501 continue; 1502 1503 nir_def *sum = nir_iadd_imm(b, tex->src[i].src.ssa, *index); 1504 nir_src_rewrite(&tex->src[i].src, sum); 1505 *index = 0; 1506 progress = true; 1507 } 1508 1509 return progress; 1510} 1511 1512static bool 1513nir_lower_tex_block(nir_block *block, nir_builder *b, 1514 const nir_lower_tex_options *options, 1515 const struct nir_shader_compiler_options *compiler_options) 1516{ 1517 bool progress = false; 1518 1519 nir_foreach_instr_safe(instr, block) { 1520 if (instr->type != nir_instr_type_tex) 1521 continue; 1522 1523 nir_tex_instr *tex = nir_instr_as_tex(instr); 1524 bool lower_txp = !!(options->lower_txp & (1 << tex->sampler_dim)); 1525 1526 /* mask of src coords to saturate (clamp): */ 1527 unsigned sat_mask = 0; 1528 /* ignore saturate for txf ops: these don't use samplers and can't GL_CLAMP */ 1529 if (nir_tex_instr_need_sampler(tex)) { 1530 if ((1 << tex->sampler_index) & options->saturate_r) 1531 sat_mask |= (1 << 2); /* .z */ 1532 if ((1 << tex->sampler_index) & options->saturate_t) 1533 sat_mask |= (1 << 1); /* .y */ 1534 if ((1 << tex->sampler_index) & options->saturate_s) 1535 sat_mask |= (1 << 0); /* .x */ 1536 } 1537 1538 if (options->lower_index_to_offset) 1539 progress |= lower_index_to_offset(b, tex); 1540 1541 /* If we are clamping any coords, we must lower projector first 1542 * as clamping happens *after* projection: 1543 */ 1544 if (lower_txp || sat_mask || 1545 (options->lower_txp_array && tex->is_array)) { 1546 progress |= project_src(b, tex); 1547 } 1548 1549 if ((tex->op == nir_texop_txf && options->lower_txf_offset) || 1550 (sat_mask && nir_tex_instr_src_index(tex, nir_tex_src_coord) >= 0) || 1551 (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT && 1552 options->lower_rect_offset) || 1553 (options->lower_offset_filter && 1554 options->lower_offset_filter(instr, options->callback_data))) { 1555 progress = lower_offset(b, tex) || progress; 1556 } 1557 1558 if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) && options->lower_rect && 1559 tex->op != nir_texop_txf) { 1560 if (nir_tex_instr_is_query(tex)) 1561 tex->sampler_dim = GLSL_SAMPLER_DIM_2D; 1562 else if (compiler_options->has_texture_scaling) 1563 lower_rect_tex_scale(b, tex); 1564 else 1565 lower_rect(b, tex); 1566 1567 progress = true; 1568 } 1569 1570 if (tex->sampler_dim == GLSL_SAMPLER_DIM_1D && 1571 (options->lower_1d || (tex->is_shadow && options->lower_1d_shadow))) { 1572 lower_1d(b, tex); 1573 progress = true; 1574 } 1575 1576 unsigned texture_index = tex->texture_index; 1577 uint32_t texture_mask = 1u << texture_index; 1578 int tex_index = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref); 1579 if (tex_index >= 0) { 1580 nir_deref_instr *deref = nir_src_as_deref(tex->src[tex_index].src); 1581 nir_variable *var = nir_deref_instr_get_variable(deref); 1582 texture_index = var ? var->data.binding : 0; 1583 texture_mask = var && texture_index < 32 ? (1u << texture_index) : 0u; 1584 } 1585 1586 if (texture_mask & options->lower_y_uv_external) { 1587 lower_y_uv_external(b, tex, options, texture_index); 1588 progress = true; 1589 } 1590 1591 if (texture_mask & options->lower_y_vu_external) { 1592 lower_y_vu_external(b, tex, options, texture_index); 1593 progress = true; 1594 } 1595 1596 if (texture_mask & options->lower_y_u_v_external) { 1597 lower_y_u_v_external(b, tex, options, texture_index); 1598 progress = true; 1599 } 1600 1601 if (texture_mask & options->lower_yx_xuxv_external) { 1602 lower_yx_xuxv_external(b, tex, options, texture_index); 1603 progress = true; 1604 } 1605 1606 if (texture_mask & options->lower_yx_xvxu_external) { 1607 lower_yx_xvxu_external(b, tex, options, texture_index); 1608 progress = true; 1609 } 1610 1611 if (texture_mask & options->lower_xy_uxvx_external) { 1612 lower_xy_uxvx_external(b, tex, options, texture_index); 1613 progress = true; 1614 } 1615 1616 if (texture_mask & options->lower_xy_vxux_external) { 1617 lower_xy_vxux_external(b, tex, options, texture_index); 1618 progress = true; 1619 } 1620 1621 if (texture_mask & options->lower_ayuv_external) { 1622 lower_ayuv_external(b, tex, options, texture_index); 1623 progress = true; 1624 } 1625 1626 if (texture_mask & options->lower_xyuv_external) { 1627 lower_xyuv_external(b, tex, options, texture_index); 1628 progress = true; 1629 } 1630 1631 if (texture_mask & options->lower_yuv_external) { 1632 lower_yuv_external(b, tex, options, texture_index); 1633 progress = true; 1634 } 1635 1636 if (texture_mask & options->lower_yu_yv_external) { 1637 lower_yu_yv_external(b, tex, options, texture_index); 1638 progress = true; 1639 } 1640 1641 if (texture_mask & options->lower_yv_yu_external) { 1642 lower_yv_yu_external(b, tex, options, texture_index); 1643 progress = true; 1644 } 1645 1646 if (texture_mask & options->lower_y41x_external) { 1647 lower_y41x_external(b, tex, options, texture_index); 1648 progress = true; 1649 } 1650 1651 if (sat_mask) { 1652 tex = saturate_src(b, tex, sat_mask); 1653 progress = true; 1654 } 1655 1656 if (tex->op == nir_texop_tg4 && options->lower_tg4_broadcom_swizzle) { 1657 swizzle_tg4_broadcom(b, tex); 1658 progress = true; 1659 } 1660 1661 if ((texture_mask & options->swizzle_result) && 1662 !nir_tex_instr_is_query(tex) && 1663 !(tex->is_shadow && tex->is_new_style_shadow)) { 1664 swizzle_result(b, tex, options->swizzles[tex->texture_index]); 1665 progress = true; 1666 } 1667 1668 /* should be after swizzle so we know which channels are rgb: */ 1669 if ((texture_mask & options->lower_srgb) && 1670 !nir_tex_instr_is_query(tex) && !tex->is_shadow) { 1671 linearize_srgb_result(b, tex); 1672 progress = true; 1673 } 1674 1675 const bool has_min_lod = 1676 nir_tex_instr_src_index(tex, nir_tex_src_min_lod) >= 0; 1677 const bool has_offset = 1678 nir_tex_instr_src_index(tex, nir_tex_src_offset) >= 0; 1679 1680 if (tex->op == nir_texop_txb && tex->is_shadow && has_min_lod && 1681 options->lower_txb_shadow_clamp) { 1682 lower_implicit_lod(b, tex); 1683 progress = true; 1684 } 1685 1686 if (options->lower_tex_packing_cb && 1687 tex->op != nir_texop_txs && 1688 tex->op != nir_texop_query_levels && 1689 tex->op != nir_texop_texture_samples) { 1690 progress |= lower_tex_packing(b, tex, options); 1691 } 1692 1693 if (tex->op == nir_texop_txd && 1694 (options->lower_txd || 1695 (options->lower_txd_clamp && has_min_lod) || 1696 (options->lower_txd_shadow && tex->is_shadow) || 1697 (options->lower_txd_shadow_clamp && tex->is_shadow && has_min_lod) || 1698 (options->lower_txd_offset_clamp && has_offset && has_min_lod) || 1699 (options->lower_txd_clamp_bindless_sampler && has_min_lod && 1700 nir_tex_instr_src_index(tex, nir_tex_src_sampler_handle) != -1) || 1701 (options->lower_txd_clamp_if_sampler_index_not_lt_16 && 1702 has_min_lod && !sampler_index_lt(tex, 16)) || 1703 (options->lower_txd_cube_map && 1704 tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) || 1705 (options->lower_txd_3d && 1706 tex->sampler_dim == GLSL_SAMPLER_DIM_3D) || 1707 (options->lower_txd_array && tex->is_array))) { 1708 lower_gradient(b, tex); 1709 progress = true; 1710 continue; 1711 } 1712 1713 /* TXF, TXS and TXL require a LOD but not everything we implement using those 1714 * three opcodes provides one. Provide a default LOD of 0. 1715 */ 1716 if ((nir_tex_instr_src_index(tex, nir_tex_src_lod) == -1) && 1717 (tex->op == nir_texop_txf || tex->op == nir_texop_txs || 1718 tex->op == nir_texop_txl || tex->op == nir_texop_query_levels)) { 1719 b->cursor = nir_before_instr(&tex->instr); 1720 nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_imm_int(b, 0)); 1721 progress = true; 1722 continue; 1723 } 1724 1725 /* Only fragment and compute (in some cases) support implicit 1726 * derivatives. Lower those opcodes which use implicit derivatives to 1727 * use an explicit LOD of 0. 1728 * But don't touch RECT samplers because they don't have mips. 1729 */ 1730 if (options->lower_invalid_implicit_lod && 1731 nir_tex_instr_has_implicit_derivative(tex) && 1732 tex->sampler_dim != GLSL_SAMPLER_DIM_RECT && 1733 !nir_shader_supports_implicit_lod(b->shader)) { 1734 lower_zero_lod(b, tex); 1735 progress = true; 1736 } 1737 1738 if (options->lower_txs_lod && tex->op == nir_texop_txs) { 1739 progress |= nir_lower_txs_lod(b, tex); 1740 continue; 1741 } 1742 1743 if (options->lower_txs_cube_array && tex->op == nir_texop_txs && 1744 tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && tex->is_array) { 1745 nir_lower_txs_cube_array(b, tex); 1746 progress = true; 1747 continue; 1748 } 1749 1750 /* has to happen after all the other lowerings as the original tg4 gets 1751 * replaced by 4 tg4 instructions. 1752 */ 1753 if (tex->op == nir_texop_tg4 && 1754 nir_tex_instr_has_explicit_tg4_offsets(tex) && 1755 options->lower_tg4_offsets) { 1756 progress |= lower_tg4_offsets(b, tex); 1757 continue; 1758 } 1759 1760 if (options->lower_to_fragment_fetch_amd && tex->op == nir_texop_txf_ms) { 1761 nir_lower_ms_txf_to_fragment_fetch(b, tex); 1762 progress = true; 1763 continue; 1764 } 1765 1766 if (options->lower_to_fragment_fetch_amd && tex->op == nir_texop_samples_identical) { 1767 nir_lower_samples_identical_to_fragment_fetch(b, tex); 1768 progress = true; 1769 continue; 1770 } 1771 1772 if (options->lower_lod_zero_width && tex->op == nir_texop_lod) { 1773 nir_lower_lod_zero_width(b, tex); 1774 progress = true; 1775 continue; 1776 } 1777 } 1778 1779 return progress; 1780} 1781 1782static bool 1783nir_lower_tex_impl(nir_function_impl *impl, 1784 const nir_lower_tex_options *options, 1785 const struct nir_shader_compiler_options *compiler_options) 1786{ 1787 bool progress = false; 1788 nir_builder builder = nir_builder_create(impl); 1789 1790 nir_foreach_block(block, impl) { 1791 progress |= nir_lower_tex_block(block, &builder, options, compiler_options); 1792 } 1793 1794 nir_metadata_preserve(impl, nir_metadata_control_flow); 1795 return progress; 1796} 1797 1798bool 1799nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options) 1800{ 1801 bool progress = false; 1802 1803 /* lower_tg4_offsets injects new tg4 instructions that won't be lowered 1804 * if lower_tg4_broadcom_swizzle is also requested so when both are set 1805 * we want to run lower_tg4_offsets in a separate pass first. 1806 */ 1807 if (options->lower_tg4_offsets && options->lower_tg4_broadcom_swizzle) { 1808 nir_lower_tex_options _options = { 1809 .lower_tg4_offsets = true, 1810 }; 1811 progress = nir_lower_tex(shader, &_options); 1812 } 1813 1814 nir_foreach_function_impl(impl, shader) { 1815 progress |= nir_lower_tex_impl(impl, options, shader->options); 1816 } 1817 1818 return progress; 1819}