mirror of OpenBSD xenocara tree github.com/openbsd/xenocara
openbsd
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

at jcs 2258 lines 70 kB view raw
1/* 2 * Copyright © 2017 Connor Abbott 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "nir_serialize.h" 25#include "util/u_dynarray.h" 26#include "util/u_math.h" 27#include "nir_control_flow.h" 28#include "nir_xfb_info.h" 29 30#define NIR_SERIALIZE_FUNC_HAS_IMPL ((void *)(intptr_t)1) 31#define MAX_OBJECT_IDS (1 << 20) 32 33typedef struct { 34 size_t blob_offset; 35 nir_def *src; 36 nir_block *block; 37} write_phi_fixup; 38 39typedef struct { 40 const nir_shader *nir; 41 42 struct blob *blob; 43 44 /* maps pointer to index */ 45 struct hash_table *remap_table; 46 47 /* the next index to assign to a NIR in-memory object */ 48 uint32_t next_idx; 49 50 /* Array of write_phi_fixup structs representing phi sources that need to 51 * be resolved in the second pass. 52 */ 53 struct util_dynarray phi_fixups; 54 55 /* The last serialized type. */ 56 const struct glsl_type *last_type; 57 const struct glsl_type *last_interface_type; 58 struct nir_variable_data last_var_data; 59 60 /* For skipping equal ALU headers (typical after scalarization). */ 61 nir_instr_type last_instr_type; 62 uintptr_t last_alu_header_offset; 63 uint32_t last_alu_header; 64 65 /* Don't write optional data such as variable names. */ 66 bool strip; 67} write_ctx; 68 69typedef struct { 70 nir_shader *nir; 71 72 struct blob_reader *blob; 73 74 /* the next index to assign to a NIR in-memory object */ 75 uint32_t next_idx; 76 77 /* The length of the index -> object table */ 78 uint32_t idx_table_len; 79 80 /* map from index to deserialized pointer */ 81 void **idx_table; 82 83 /* List of phi sources. */ 84 struct list_head phi_srcs; 85 86 /* The last deserialized type. */ 87 const struct glsl_type *last_type; 88 const struct glsl_type *last_interface_type; 89 struct nir_variable_data last_var_data; 90} read_ctx; 91 92static void 93write_add_object(write_ctx *ctx, const void *obj) 94{ 95 uint32_t index = ctx->next_idx++; 96 assert(index != MAX_OBJECT_IDS); 97 _mesa_hash_table_insert(ctx->remap_table, obj, (void *)(uintptr_t)index); 98} 99 100static uint32_t 101write_lookup_object(write_ctx *ctx, const void *obj) 102{ 103 struct hash_entry *entry = _mesa_hash_table_search(ctx->remap_table, obj); 104 assert(entry); 105 return (uint32_t)(uintptr_t)entry->data; 106} 107 108static void 109read_add_object(read_ctx *ctx, void *obj) 110{ 111 assert(ctx->next_idx < ctx->idx_table_len); 112 ctx->idx_table[ctx->next_idx++] = obj; 113} 114 115static void * 116read_lookup_object(read_ctx *ctx, uint32_t idx) 117{ 118 assert(idx < ctx->idx_table_len); 119 return ctx->idx_table[idx]; 120} 121 122static void * 123read_object(read_ctx *ctx) 124{ 125 return read_lookup_object(ctx, blob_read_uint32(ctx->blob)); 126} 127 128static uint32_t 129encode_bit_size_3bits(uint8_t bit_size) 130{ 131 /* Encode values of 0, 1, 2, 4, 8, 16, 32, 64 in 3 bits. */ 132 assert(bit_size <= 64 && util_is_power_of_two_or_zero(bit_size)); 133 if (bit_size) 134 return util_logbase2(bit_size) + 1; 135 return 0; 136} 137 138static uint8_t 139decode_bit_size_3bits(uint8_t bit_size) 140{ 141 if (bit_size) 142 return 1 << (bit_size - 1); 143 return 0; 144} 145 146#define NUM_COMPONENTS_IS_SEPARATE_7 7 147 148static uint8_t 149encode_num_components_in_3bits(uint8_t num_components) 150{ 151 if (num_components <= 4) 152 return num_components; 153 if (num_components == 8) 154 return 5; 155 if (num_components == 16) 156 return 6; 157 158 /* special value indicating that num_components is in the next uint32 */ 159 return NUM_COMPONENTS_IS_SEPARATE_7; 160} 161 162static uint8_t 163decode_num_components_in_3bits(uint8_t value) 164{ 165 if (value <= 4) 166 return value; 167 if (value == 5) 168 return 8; 169 if (value == 6) 170 return 16; 171 172 unreachable("invalid num_components encoding"); 173 return 0; 174} 175 176static void 177write_constant(write_ctx *ctx, const nir_constant *c) 178{ 179 blob_write_bytes(ctx->blob, c->values, sizeof(c->values)); 180 blob_write_uint32(ctx->blob, c->num_elements); 181 for (unsigned i = 0; i < c->num_elements; i++) 182 write_constant(ctx, c->elements[i]); 183} 184 185static nir_constant * 186read_constant(read_ctx *ctx, nir_variable *nvar) 187{ 188 nir_constant *c = ralloc(nvar, nir_constant); 189 190 static const nir_const_value zero_vals[ARRAY_SIZE(c->values)] = { 0 }; 191 blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values)); 192 c->is_null_constant = memcmp(c->values, zero_vals, sizeof(c->values)) == 0; 193 c->num_elements = blob_read_uint32(ctx->blob); 194 c->elements = ralloc_array(nvar, nir_constant *, c->num_elements); 195 for (unsigned i = 0; i < c->num_elements; i++) { 196 c->elements[i] = read_constant(ctx, nvar); 197 c->is_null_constant &= c->elements[i]->is_null_constant; 198 } 199 200 return c; 201} 202 203enum var_data_encoding { 204 var_encode_full, 205 var_encode_location_diff, 206}; 207 208union packed_var { 209 uint32_t u32; 210 struct { 211 unsigned has_name : 1; 212 unsigned has_constant_initializer : 1; 213 unsigned has_pointer_initializer : 1; 214 unsigned has_interface_type : 1; 215 unsigned num_state_slots : 7; 216 unsigned data_encoding : 2; 217 unsigned type_same_as_last : 1; 218 unsigned interface_type_same_as_last : 1; 219 unsigned ray_query : 1; 220 unsigned num_members : 16; 221 } u; 222}; 223 224union packed_var_data_diff { 225 uint32_t u32; 226 struct { 227 int location : 13; 228 int location_frac : 3; 229 int driver_location : 16; 230 } u; 231}; 232 233static void 234write_variable(write_ctx *ctx, const nir_variable *var) 235{ 236 write_add_object(ctx, var); 237 238 assert(var->num_state_slots < (1 << 7)); 239 240 STATIC_ASSERT(sizeof(union packed_var) == 4); 241 union packed_var flags; 242 flags.u32 = 0; 243 244 flags.u.has_name = !ctx->strip && var->name; 245 flags.u.has_constant_initializer = !!(var->constant_initializer); 246 flags.u.has_pointer_initializer = !!(var->pointer_initializer); 247 flags.u.has_interface_type = !!(var->interface_type); 248 flags.u.type_same_as_last = var->type == ctx->last_type; 249 flags.u.interface_type_same_as_last = 250 var->interface_type && var->interface_type == ctx->last_interface_type; 251 flags.u.num_state_slots = var->num_state_slots; 252 flags.u.num_members = var->num_members; 253 254 struct nir_variable_data data = var->data; 255 256 /* When stripping, we expect that the location is no longer needed, 257 * which is typically after shaders are linked. 258 */ 259 if (ctx->strip && 260 data.mode != nir_var_system_value && 261 data.mode != nir_var_shader_in && 262 data.mode != nir_var_shader_out) 263 data.location = 0; 264 265 struct nir_variable_data tmp = data; 266 267 tmp.location = ctx->last_var_data.location; 268 tmp.location_frac = ctx->last_var_data.location_frac; 269 tmp.driver_location = ctx->last_var_data.driver_location; 270 271 /* See if we can encode only the difference in locations from the last 272 * variable. 273 */ 274 if (memcmp(&ctx->last_var_data, &tmp, sizeof(tmp)) == 0 && 275 abs((int)data.location - 276 (int)ctx->last_var_data.location) < (1 << 12) && 277 abs((int)data.driver_location - 278 (int)ctx->last_var_data.driver_location) < (1 << 15)) 279 flags.u.data_encoding = var_encode_location_diff; 280 else 281 flags.u.data_encoding = var_encode_full; 282 283 flags.u.ray_query = var->data.ray_query; 284 285 blob_write_uint32(ctx->blob, flags.u32); 286 287 if (!flags.u.type_same_as_last) { 288 encode_type_to_blob(ctx->blob, var->type); 289 ctx->last_type = var->type; 290 } 291 292 if (var->interface_type && !flags.u.interface_type_same_as_last) { 293 encode_type_to_blob(ctx->blob, var->interface_type); 294 ctx->last_interface_type = var->interface_type; 295 } 296 297 if (flags.u.has_name) 298 blob_write_string(ctx->blob, var->name); 299 300 if (flags.u.data_encoding == var_encode_full) { 301 blob_write_bytes(ctx->blob, &data, sizeof(data)); 302 } else { 303 /* Serialize only the difference in locations from the last variable. 304 */ 305 union packed_var_data_diff diff; 306 307 diff.u.location = data.location - ctx->last_var_data.location; 308 diff.u.location_frac = data.location_frac - 309 ctx->last_var_data.location_frac; 310 diff.u.driver_location = data.driver_location - 311 ctx->last_var_data.driver_location; 312 313 blob_write_uint32(ctx->blob, diff.u32); 314 } 315 316 ctx->last_var_data = data; 317 318 for (unsigned i = 0; i < var->num_state_slots; i++) { 319 blob_write_bytes(ctx->blob, &var->state_slots[i], 320 sizeof(var->state_slots[i])); 321 } 322 if (var->constant_initializer) 323 write_constant(ctx, var->constant_initializer); 324 if (var->pointer_initializer) 325 blob_write_uint32(ctx->blob, 326 write_lookup_object(ctx, var->pointer_initializer)); 327 if (var->num_members > 0) { 328 blob_write_bytes(ctx->blob, (uint8_t *)var->members, 329 var->num_members * sizeof(*var->members)); 330 } 331} 332 333static nir_variable * 334read_variable(read_ctx *ctx) 335{ 336 nir_variable *var = rzalloc(ctx->nir, nir_variable); 337 read_add_object(ctx, var); 338 339 union packed_var flags; 340 flags.u32 = blob_read_uint32(ctx->blob); 341 342 if (flags.u.type_same_as_last) { 343 var->type = ctx->last_type; 344 } else { 345 var->type = decode_type_from_blob(ctx->blob); 346 ctx->last_type = var->type; 347 } 348 349 if (flags.u.has_interface_type) { 350 if (flags.u.interface_type_same_as_last) { 351 var->interface_type = ctx->last_interface_type; 352 } else { 353 var->interface_type = decode_type_from_blob(ctx->blob); 354 ctx->last_interface_type = var->interface_type; 355 } 356 } 357 358 if (flags.u.has_name) { 359 const char *name = blob_read_string(ctx->blob); 360 var->name = ralloc_strdup(var, name); 361 } else { 362 var->name = NULL; 363 } 364 365 if (flags.u.data_encoding == var_encode_full) { 366 blob_copy_bytes(ctx->blob, (uint8_t *)&var->data, sizeof(var->data)); 367 ctx->last_var_data = var->data; 368 } else { /* var_encode_location_diff */ 369 union packed_var_data_diff diff; 370 diff.u32 = blob_read_uint32(ctx->blob); 371 372 var->data = ctx->last_var_data; 373 var->data.location += diff.u.location; 374 var->data.location_frac += diff.u.location_frac; 375 var->data.driver_location += diff.u.driver_location; 376 377 ctx->last_var_data = var->data; 378 } 379 380 var->data.ray_query = flags.u.ray_query; 381 382 var->num_state_slots = flags.u.num_state_slots; 383 if (var->num_state_slots != 0) { 384 var->state_slots = ralloc_array(var, nir_state_slot, 385 var->num_state_slots); 386 for (unsigned i = 0; i < var->num_state_slots; i++) { 387 blob_copy_bytes(ctx->blob, &var->state_slots[i], 388 sizeof(var->state_slots[i])); 389 } 390 } 391 if (flags.u.has_constant_initializer) 392 var->constant_initializer = read_constant(ctx, var); 393 else 394 var->constant_initializer = NULL; 395 396 if (flags.u.has_pointer_initializer) 397 var->pointer_initializer = read_object(ctx); 398 else 399 var->pointer_initializer = NULL; 400 401 var->num_members = flags.u.num_members; 402 if (var->num_members > 0) { 403 var->members = ralloc_array(var, struct nir_variable_data, 404 var->num_members); 405 blob_copy_bytes(ctx->blob, (uint8_t *)var->members, 406 var->num_members * sizeof(*var->members)); 407 } 408 409 return var; 410} 411 412static void 413write_var_list(write_ctx *ctx, const struct exec_list *src) 414{ 415 blob_write_uint32(ctx->blob, exec_list_length(src)); 416 foreach_list_typed(nir_variable, var, node, src) { 417 write_variable(ctx, var); 418 } 419} 420 421static void 422read_var_list(read_ctx *ctx, struct exec_list *dst) 423{ 424 exec_list_make_empty(dst); 425 unsigned num_vars = blob_read_uint32(ctx->blob); 426 for (unsigned i = 0; i < num_vars; i++) { 427 nir_variable *var = read_variable(ctx); 428 exec_list_push_tail(dst, &var->node); 429 } 430} 431 432union packed_src { 433 uint32_t u32; 434 struct { 435 unsigned _pad : 2; /* <-- Header */ 436 unsigned object_idx : 20; 437 unsigned _footer : 10; /* <-- Footer */ 438 } any; 439 struct { 440 unsigned _header : 22; /* <-- Header */ 441 unsigned _pad : 2; /* <-- Footer */ 442 unsigned swizzle_x : 2; 443 unsigned swizzle_y : 2; 444 unsigned swizzle_z : 2; 445 unsigned swizzle_w : 2; 446 } alu; 447 struct { 448 unsigned _header : 22; /* <-- Header */ 449 unsigned src_type : 5; /* <-- Footer */ 450 unsigned _pad : 5; 451 } tex; 452}; 453 454static void 455write_src_full(write_ctx *ctx, const nir_src *src, union packed_src header) 456{ 457 header.any.object_idx = write_lookup_object(ctx, src->ssa); 458 blob_write_uint32(ctx->blob, header.u32); 459} 460 461static void 462write_src(write_ctx *ctx, const nir_src *src) 463{ 464 union packed_src header = { 0 }; 465 write_src_full(ctx, src, header); 466} 467 468static union packed_src 469read_src(read_ctx *ctx, nir_src *src) 470{ 471 STATIC_ASSERT(sizeof(union packed_src) == 4); 472 union packed_src header; 473 header.u32 = blob_read_uint32(ctx->blob); 474 475 src->ssa = read_lookup_object(ctx, header.any.object_idx); 476 return header; 477} 478 479union packed_def { 480 uint8_t u8; 481 struct { 482 uint8_t num_components : 3; 483 uint8_t bit_size : 3; 484 uint8_t divergent : 1; 485 uint8_t loop_invariant : 1; 486 }; 487}; 488 489enum intrinsic_const_indices_encoding { 490 /* Use packed_const_indices to store tightly packed indices. 491 * 492 * The common case for load_ubo is 0, 0, 0, which is trivially represented. 493 * The common cases for load_interpolated_input also fit here, e.g.: 7, 3 494 */ 495 const_indices_all_combined, 496 497 const_indices_8bit, /* 8 bits per element */ 498 const_indices_16bit, /* 16 bits per element */ 499 const_indices_32bit, /* 32 bits per element */ 500}; 501 502enum load_const_packing { 503 /* Constants are not packed and are stored in following dwords. */ 504 load_const_full, 505 506 /* packed_value contains high 19 bits, low bits are 0, 507 * good for floating-point decimals 508 */ 509 load_const_scalar_hi_19bits, 510 511 /* packed_value contains low 19 bits, high bits are sign-extended */ 512 load_const_scalar_lo_19bits_sext, 513}; 514 515union packed_instr { 516 uint32_t u32; 517 struct { 518 unsigned instr_type : 4; /* always present */ 519 unsigned _pad : 20; 520 unsigned def : 8; /* always last */ 521 } any; 522 struct { 523 unsigned instr_type : 4; 524 unsigned exact : 1; 525 unsigned no_signed_wrap : 1; 526 unsigned no_unsigned_wrap : 1; 527 unsigned padding : 1; 528 /* Swizzles for 2 srcs */ 529 unsigned two_swizzles : 4; 530 unsigned op : 9; 531 unsigned packed_src_ssa_16bit : 1; 532 /* Scalarized ALUs always have the same header. */ 533 unsigned num_followup_alu_sharing_header : 2; 534 unsigned def : 8; 535 } alu; 536 struct { 537 unsigned instr_type : 4; 538 unsigned deref_type : 3; 539 unsigned cast_type_same_as_last : 1; 540 unsigned modes : 6; /* See (de|en)code_deref_modes() */ 541 unsigned _pad : 8; 542 unsigned in_bounds : 1; 543 unsigned packed_src_ssa_16bit : 1; /* deref_var redefines this */ 544 unsigned def : 8; 545 } deref; 546 struct { 547 unsigned instr_type : 4; 548 unsigned deref_type : 3; 549 unsigned _pad : 1; 550 unsigned object_idx : 16; /* if 0, the object ID is a separate uint32 */ 551 unsigned def : 8; 552 } deref_var; 553 struct { 554 unsigned instr_type : 4; 555 unsigned intrinsic : 10; 556 unsigned const_indices_encoding : 2; 557 unsigned packed_const_indices : 8; 558 unsigned def : 8; 559 } intrinsic; 560 struct { 561 unsigned instr_type : 4; 562 unsigned last_component : 4; 563 unsigned bit_size : 3; 564 unsigned packing : 2; /* enum load_const_packing */ 565 unsigned packed_value : 19; /* meaning determined by packing */ 566 } load_const; 567 struct { 568 unsigned instr_type : 4; 569 unsigned last_component : 4; 570 unsigned bit_size : 3; 571 unsigned _pad : 21; 572 } undef; 573 struct { 574 unsigned instr_type : 4; 575 unsigned num_srcs : 4; 576 unsigned op : 5; 577 unsigned _pad : 11; 578 unsigned def : 8; 579 } tex; 580 struct { 581 unsigned instr_type : 4; 582 unsigned num_srcs : 20; 583 unsigned def : 8; 584 } phi; 585 struct { 586 unsigned instr_type : 4; 587 unsigned type : 2; 588 unsigned _pad : 26; 589 } jump; 590 struct { 591 unsigned instr_type : 4; 592 unsigned type : 4; 593 unsigned string_length : 16; 594 unsigned def : 8; 595 } debug_info; 596}; 597 598/* Write "lo24" as low 24 bits in the first uint32. */ 599static void 600write_def(write_ctx *ctx, const nir_def *def, union packed_instr header, 601 nir_instr_type instr_type) 602{ 603 STATIC_ASSERT(sizeof(union packed_def) == 1); 604 union packed_def pdef; 605 pdef.u8 = 0; 606 607 pdef.num_components = 608 encode_num_components_in_3bits(def->num_components); 609 pdef.bit_size = encode_bit_size_3bits(def->bit_size); 610 pdef.divergent = def->divergent; 611 pdef.loop_invariant = def->loop_invariant; 612 header.any.def = pdef.u8; 613 614 /* Check if the current ALU instruction has the same header as the previous 615 * instruction that is also ALU. If it is, we don't have to write 616 * the current header. This is a typical occurence after scalarization. 617 */ 618 if (instr_type == nir_instr_type_alu) { 619 bool equal_header = false; 620 621 if (ctx->last_instr_type == nir_instr_type_alu) { 622 assert(ctx->last_alu_header_offset); 623 union packed_instr last_header; 624 last_header.u32 = ctx->last_alu_header; 625 626 /* Clear the field that counts ALUs with equal headers. */ 627 union packed_instr clean_header; 628 clean_header.u32 = last_header.u32; 629 clean_header.alu.num_followup_alu_sharing_header = 0; 630 631 /* There can be at most 4 consecutive ALU instructions 632 * sharing the same header. 633 */ 634 if (last_header.alu.num_followup_alu_sharing_header < 3 && 635 header.u32 == clean_header.u32) { 636 last_header.alu.num_followup_alu_sharing_header++; 637 blob_overwrite_uint32(ctx->blob, ctx->last_alu_header_offset, 638 last_header.u32); 639 ctx->last_alu_header = last_header.u32; 640 equal_header = true; 641 } 642 } 643 644 if (!equal_header) { 645 ctx->last_alu_header_offset = blob_reserve_uint32(ctx->blob); 646 blob_overwrite_uint32(ctx->blob, ctx->last_alu_header_offset, header.u32); 647 ctx->last_alu_header = header.u32; 648 } 649 } else { 650 blob_write_uint32(ctx->blob, header.u32); 651 } 652 653 if (pdef.num_components == NUM_COMPONENTS_IS_SEPARATE_7) 654 blob_write_uint32(ctx->blob, def->num_components); 655 656 write_add_object(ctx, def); 657} 658 659static void 660read_def(read_ctx *ctx, nir_def *def, nir_instr *instr, 661 union packed_instr header) 662{ 663 union packed_def pdef; 664 pdef.u8 = header.any.def; 665 666 unsigned bit_size = decode_bit_size_3bits(pdef.bit_size); 667 unsigned num_components; 668 if (pdef.num_components == NUM_COMPONENTS_IS_SEPARATE_7) 669 num_components = blob_read_uint32(ctx->blob); 670 else 671 num_components = decode_num_components_in_3bits(pdef.num_components); 672 nir_def_init(instr, def, num_components, bit_size); 673 def->divergent = pdef.divergent; 674 def->loop_invariant = pdef.loop_invariant; 675 read_add_object(ctx, def); 676} 677 678static bool 679are_object_ids_16bit(write_ctx *ctx) 680{ 681 /* Check the highest object ID, because they are monotonic. */ 682 return ctx->next_idx < (1 << 16); 683} 684 685static bool 686is_alu_src_ssa_16bit(write_ctx *ctx, const nir_alu_instr *alu) 687{ 688 unsigned num_srcs = nir_op_infos[alu->op].num_inputs; 689 690 for (unsigned i = 0; i < num_srcs; i++) { 691 unsigned src_components = nir_ssa_alu_instr_src_components(alu, i); 692 693 for (unsigned chan = 0; chan < src_components; chan++) { 694 /* The swizzles for src0.x and src1.x are stored 695 * in two_swizzles for SSA ALUs. 696 */ 697 if (i < 2 && chan == 0 && alu->src[i].swizzle[chan] < 4) 698 continue; 699 700 if (alu->src[i].swizzle[chan] != chan) 701 return false; 702 } 703 } 704 705 return are_object_ids_16bit(ctx); 706} 707 708static void 709write_alu(write_ctx *ctx, const nir_alu_instr *alu) 710{ 711 unsigned num_srcs = nir_op_infos[alu->op].num_inputs; 712 713 /* 9 bits for nir_op */ 714 STATIC_ASSERT(nir_num_opcodes <= 512); 715 union packed_instr header; 716 header.u32 = 0; 717 718 header.alu.instr_type = alu->instr.type; 719 header.alu.exact = alu->exact; 720 header.alu.no_signed_wrap = alu->no_signed_wrap; 721 header.alu.no_unsigned_wrap = alu->no_unsigned_wrap; 722 header.alu.op = alu->op; 723 header.alu.packed_src_ssa_16bit = is_alu_src_ssa_16bit(ctx, alu); 724 725 if (header.alu.packed_src_ssa_16bit) { 726 /* For packed srcs of SSA ALUs, this field stores the swizzles. */ 727 header.alu.two_swizzles = alu->src[0].swizzle[0]; 728 if (num_srcs > 1) 729 header.alu.two_swizzles |= alu->src[1].swizzle[0] << 2; 730 } 731 732 write_def(ctx, &alu->def, header, alu->instr.type); 733 blob_write_uint32(ctx->blob, alu->fp_fast_math); 734 735 if (header.alu.packed_src_ssa_16bit) { 736 for (unsigned i = 0; i < num_srcs; i++) { 737 unsigned idx = write_lookup_object(ctx, alu->src[i].src.ssa); 738 assert(idx < (1 << 16)); 739 blob_write_uint16(ctx->blob, idx); 740 } 741 } else { 742 for (unsigned i = 0; i < num_srcs; i++) { 743 unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i); 744 unsigned src_components = nir_src_num_components(alu->src[i].src); 745 union packed_src src; 746 bool packed = src_components <= 4 && src_channels <= 4; 747 src.u32 = 0; 748 749 if (packed) { 750 src.alu.swizzle_x = alu->src[i].swizzle[0]; 751 src.alu.swizzle_y = alu->src[i].swizzle[1]; 752 src.alu.swizzle_z = alu->src[i].swizzle[2]; 753 src.alu.swizzle_w = alu->src[i].swizzle[3]; 754 } 755 756 write_src_full(ctx, &alu->src[i].src, src); 757 758 /* Store swizzles for vec8 and vec16. */ 759 if (!packed) { 760 for (unsigned o = 0; o < src_channels; o += 8) { 761 unsigned value = 0; 762 763 for (unsigned j = 0; j < 8 && o + j < src_channels; j++) { 764 value |= (uint32_t)alu->src[i].swizzle[o + j] << (4 * j); /* 4 bits per swizzle */ 765 } 766 767 blob_write_uint32(ctx->blob, value); 768 } 769 } 770 } 771 } 772} 773 774static nir_alu_instr * 775read_alu(read_ctx *ctx, union packed_instr header) 776{ 777 unsigned num_srcs = nir_op_infos[header.alu.op].num_inputs; 778 nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, header.alu.op); 779 780 alu->exact = header.alu.exact; 781 alu->no_signed_wrap = header.alu.no_signed_wrap; 782 alu->no_unsigned_wrap = header.alu.no_unsigned_wrap; 783 784 read_def(ctx, &alu->def, &alu->instr, header); 785 alu->fp_fast_math = blob_read_uint32(ctx->blob); 786 787 if (header.alu.packed_src_ssa_16bit) { 788 for (unsigned i = 0; i < num_srcs; i++) { 789 nir_alu_src *src = &alu->src[i]; 790 src->src.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob)); 791 792 memset(&src->swizzle, 0, sizeof(src->swizzle)); 793 794 unsigned src_components = nir_ssa_alu_instr_src_components(alu, i); 795 796 for (unsigned chan = 0; chan < src_components; chan++) 797 src->swizzle[chan] = chan; 798 } 799 } else { 800 for (unsigned i = 0; i < num_srcs; i++) { 801 union packed_src src = read_src(ctx, &alu->src[i].src); 802 unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i); 803 unsigned src_components = nir_src_num_components(alu->src[i].src); 804 bool packed = src_components <= 4 && src_channels <= 4; 805 806 memset(&alu->src[i].swizzle, 0, sizeof(alu->src[i].swizzle)); 807 808 if (packed) { 809 alu->src[i].swizzle[0] = src.alu.swizzle_x; 810 alu->src[i].swizzle[1] = src.alu.swizzle_y; 811 alu->src[i].swizzle[2] = src.alu.swizzle_z; 812 alu->src[i].swizzle[3] = src.alu.swizzle_w; 813 } else { 814 /* Load swizzles for vec8 and vec16. */ 815 for (unsigned o = 0; o < src_channels; o += 8) { 816 unsigned value = blob_read_uint32(ctx->blob); 817 818 for (unsigned j = 0; j < 8 && o + j < src_channels; j++) { 819 alu->src[i].swizzle[o + j] = 820 (value >> (4 * j)) & 0xf; /* 4 bits per swizzle */ 821 } 822 } 823 } 824 } 825 } 826 827 if (header.alu.packed_src_ssa_16bit) { 828 alu->src[0].swizzle[0] = header.alu.two_swizzles & 0x3; 829 if (num_srcs > 1) 830 alu->src[1].swizzle[0] = header.alu.two_swizzles >> 2; 831 } 832 833 return alu; 834} 835 836#define NUM_GENERIC_MODES 4 837#define MODE_ENC_GENERIC_BIT (1 << 5) 838 839static nir_variable_mode 840decode_deref_modes(unsigned modes) 841{ 842 if (modes & MODE_ENC_GENERIC_BIT) { 843 modes &= ~MODE_ENC_GENERIC_BIT; 844 return modes << (ffs(nir_var_mem_generic) - 1); 845 } else { 846 return 1 << modes; 847 } 848} 849 850static unsigned 851encode_deref_modes(nir_variable_mode modes) 852{ 853 /* Mode sets on derefs generally come in two forms. For certain OpenCL 854 * cases, we can have more than one of the generic modes set. In this 855 * case, we need the full bitfield. Fortunately, there are only 4 of 856 * these. For all other modes, we can only have one mode at a time so we 857 * can compress them by only storing the bit position. This, plus one bit 858 * to select encoding, lets us pack the entire bitfield in 6 bits. 859 */ 860 861 /* Assert that the modes we are compressing fit along with the generic bit 862 */ 863 STATIC_ASSERT((nir_num_variable_modes - NUM_GENERIC_MODES) < 864 MODE_ENC_GENERIC_BIT); 865 866 /* Assert that the generic modes are defined at the end of the modes enum 867 */ 868 STATIC_ASSERT((nir_var_all & ~nir_var_mem_generic) < 869 (1 << (nir_num_variable_modes - NUM_GENERIC_MODES))); 870 871 unsigned enc; 872 if (modes == 0 || (modes & nir_var_mem_generic)) { 873 assert(!(modes & ~nir_var_mem_generic)); 874 enc = modes >> (ffs(nir_var_mem_generic) - 1); 875 assert(enc < MODE_ENC_GENERIC_BIT); 876 enc |= MODE_ENC_GENERIC_BIT; 877 } else { 878 assert(util_is_power_of_two_nonzero(modes)); 879 enc = ffs(modes) - 1; 880 assert(enc < MODE_ENC_GENERIC_BIT); 881 } 882 assert(modes == decode_deref_modes(enc)); 883 return enc; 884} 885 886static void 887write_deref(write_ctx *ctx, const nir_deref_instr *deref) 888{ 889 assert(deref->deref_type < 8); 890 891 union packed_instr header; 892 header.u32 = 0; 893 894 header.deref.instr_type = deref->instr.type; 895 header.deref.deref_type = deref->deref_type; 896 897 if (deref->deref_type == nir_deref_type_cast) { 898 header.deref.modes = encode_deref_modes(deref->modes); 899 header.deref.cast_type_same_as_last = deref->type == ctx->last_type; 900 } 901 902 unsigned var_idx = 0; 903 if (deref->deref_type == nir_deref_type_var) { 904 var_idx = write_lookup_object(ctx, deref->var); 905 if (var_idx && var_idx < (1 << 16)) 906 header.deref_var.object_idx = var_idx; 907 } 908 909 if (deref->deref_type == nir_deref_type_array || 910 deref->deref_type == nir_deref_type_ptr_as_array) { 911 header.deref.packed_src_ssa_16bit = are_object_ids_16bit(ctx); 912 913 header.deref.in_bounds = deref->arr.in_bounds; 914 } 915 916 write_def(ctx, &deref->def, header, deref->instr.type); 917 918 switch (deref->deref_type) { 919 case nir_deref_type_var: 920 if (!header.deref_var.object_idx) 921 blob_write_uint32(ctx->blob, var_idx); 922 break; 923 924 case nir_deref_type_struct: 925 write_src(ctx, &deref->parent); 926 blob_write_uint32(ctx->blob, deref->strct.index); 927 break; 928 929 case nir_deref_type_array: 930 case nir_deref_type_ptr_as_array: 931 if (header.deref.packed_src_ssa_16bit) { 932 blob_write_uint16(ctx->blob, 933 write_lookup_object(ctx, deref->parent.ssa)); 934 blob_write_uint16(ctx->blob, 935 write_lookup_object(ctx, deref->arr.index.ssa)); 936 } else { 937 write_src(ctx, &deref->parent); 938 write_src(ctx, &deref->arr.index); 939 } 940 break; 941 942 case nir_deref_type_cast: 943 write_src(ctx, &deref->parent); 944 blob_write_uint32(ctx->blob, deref->cast.ptr_stride); 945 blob_write_uint32(ctx->blob, deref->cast.align_mul); 946 blob_write_uint32(ctx->blob, deref->cast.align_offset); 947 if (!header.deref.cast_type_same_as_last) { 948 encode_type_to_blob(ctx->blob, deref->type); 949 ctx->last_type = deref->type; 950 } 951 break; 952 953 case nir_deref_type_array_wildcard: 954 write_src(ctx, &deref->parent); 955 break; 956 957 default: 958 unreachable("Invalid deref type"); 959 } 960} 961 962static nir_deref_instr * 963read_deref(read_ctx *ctx, union packed_instr header) 964{ 965 nir_deref_type deref_type = header.deref.deref_type; 966 nir_deref_instr *deref = nir_deref_instr_create(ctx->nir, deref_type); 967 968 read_def(ctx, &deref->def, &deref->instr, header); 969 970 nir_deref_instr *parent; 971 972 switch (deref->deref_type) { 973 case nir_deref_type_var: 974 if (header.deref_var.object_idx) 975 deref->var = read_lookup_object(ctx, header.deref_var.object_idx); 976 else 977 deref->var = read_object(ctx); 978 979 deref->type = deref->var->type; 980 break; 981 982 case nir_deref_type_struct: 983 read_src(ctx, &deref->parent); 984 parent = nir_src_as_deref(deref->parent); 985 deref->strct.index = blob_read_uint32(ctx->blob); 986 deref->type = glsl_get_struct_field(parent->type, deref->strct.index); 987 break; 988 989 case nir_deref_type_array: 990 case nir_deref_type_ptr_as_array: 991 if (header.deref.packed_src_ssa_16bit) { 992 deref->parent.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob)); 993 deref->arr.index.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob)); 994 } else { 995 read_src(ctx, &deref->parent); 996 read_src(ctx, &deref->arr.index); 997 } 998 999 deref->arr.in_bounds = header.deref.in_bounds; 1000 1001 parent = nir_src_as_deref(deref->parent); 1002 if (deref->deref_type == nir_deref_type_array) 1003 deref->type = glsl_get_array_element(parent->type); 1004 else 1005 deref->type = parent->type; 1006 break; 1007 1008 case nir_deref_type_cast: 1009 read_src(ctx, &deref->parent); 1010 deref->cast.ptr_stride = blob_read_uint32(ctx->blob); 1011 deref->cast.align_mul = blob_read_uint32(ctx->blob); 1012 deref->cast.align_offset = blob_read_uint32(ctx->blob); 1013 if (header.deref.cast_type_same_as_last) { 1014 deref->type = ctx->last_type; 1015 } else { 1016 deref->type = decode_type_from_blob(ctx->blob); 1017 ctx->last_type = deref->type; 1018 } 1019 break; 1020 1021 case nir_deref_type_array_wildcard: 1022 read_src(ctx, &deref->parent); 1023 parent = nir_src_as_deref(deref->parent); 1024 deref->type = glsl_get_array_element(parent->type); 1025 break; 1026 1027 default: 1028 unreachable("Invalid deref type"); 1029 } 1030 1031 if (deref_type == nir_deref_type_var) { 1032 deref->modes = deref->var->data.mode; 1033 } else if (deref->deref_type == nir_deref_type_cast) { 1034 deref->modes = decode_deref_modes(header.deref.modes); 1035 } else { 1036 deref->modes = nir_instr_as_deref(deref->parent.ssa->parent_instr)->modes; 1037 } 1038 1039 return deref; 1040} 1041 1042static void 1043write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin) 1044{ 1045 /* 10 bits for nir_intrinsic_op */ 1046 STATIC_ASSERT(nir_num_intrinsics <= 1024); 1047 unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs; 1048 unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices; 1049 assert(intrin->intrinsic < 1024); 1050 1051 union packed_instr header; 1052 header.u32 = 0; 1053 1054 header.intrinsic.instr_type = intrin->instr.type; 1055 header.intrinsic.intrinsic = intrin->intrinsic; 1056 1057 /* Analyze constant indices to decide how to encode them. */ 1058 if (num_indices) { 1059 unsigned max_bits = 0; 1060 for (unsigned i = 0; i < num_indices; i++) { 1061 unsigned max = util_last_bit(intrin->const_index[i]); 1062 max_bits = MAX2(max_bits, max); 1063 } 1064 1065 if (max_bits * num_indices <= 8) { 1066 header.intrinsic.const_indices_encoding = const_indices_all_combined; 1067 1068 /* Pack all const indices into 8 bits. */ 1069 unsigned bit_size = 8 / num_indices; 1070 for (unsigned i = 0; i < num_indices; i++) { 1071 header.intrinsic.packed_const_indices |= 1072 intrin->const_index[i] << (i * bit_size); 1073 } 1074 } else if (max_bits <= 8) 1075 header.intrinsic.const_indices_encoding = const_indices_8bit; 1076 else if (max_bits <= 16) 1077 header.intrinsic.const_indices_encoding = const_indices_16bit; 1078 else 1079 header.intrinsic.const_indices_encoding = const_indices_32bit; 1080 } 1081 1082 if (nir_intrinsic_infos[intrin->intrinsic].has_dest) 1083 write_def(ctx, &intrin->def, header, intrin->instr.type); 1084 else 1085 blob_write_uint32(ctx->blob, header.u32); 1086 1087 for (unsigned i = 0; i < num_srcs; i++) 1088 write_src(ctx, &intrin->src[i]); 1089 1090 if (num_indices) { 1091 switch (header.intrinsic.const_indices_encoding) { 1092 case const_indices_8bit: 1093 for (unsigned i = 0; i < num_indices; i++) 1094 blob_write_uint8(ctx->blob, intrin->const_index[i]); 1095 break; 1096 case const_indices_16bit: 1097 for (unsigned i = 0; i < num_indices; i++) 1098 blob_write_uint16(ctx->blob, intrin->const_index[i]); 1099 break; 1100 case const_indices_32bit: 1101 for (unsigned i = 0; i < num_indices; i++) 1102 blob_write_uint32(ctx->blob, intrin->const_index[i]); 1103 break; 1104 } 1105 } 1106} 1107 1108static nir_intrinsic_instr * 1109read_intrinsic(read_ctx *ctx, union packed_instr header) 1110{ 1111 nir_intrinsic_op op = header.intrinsic.intrinsic; 1112 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op); 1113 1114 unsigned num_srcs = nir_intrinsic_infos[op].num_srcs; 1115 unsigned num_indices = nir_intrinsic_infos[op].num_indices; 1116 1117 if (nir_intrinsic_infos[op].has_dest) 1118 read_def(ctx, &intrin->def, &intrin->instr, header); 1119 1120 for (unsigned i = 0; i < num_srcs; i++) 1121 read_src(ctx, &intrin->src[i]); 1122 1123 /* Vectorized instrinsics have num_components same as dst or src that has 1124 * 0 components in the info. Find it. 1125 */ 1126 if (nir_intrinsic_infos[op].has_dest && 1127 nir_intrinsic_infos[op].dest_components == 0) { 1128 intrin->num_components = intrin->def.num_components; 1129 } else { 1130 for (unsigned i = 0; i < num_srcs; i++) { 1131 if (nir_intrinsic_infos[op].src_components[i] == 0) { 1132 intrin->num_components = nir_src_num_components(intrin->src[i]); 1133 break; 1134 } 1135 } 1136 } 1137 1138 if (num_indices) { 1139 switch (header.intrinsic.const_indices_encoding) { 1140 case const_indices_all_combined: { 1141 unsigned bit_size = 8 / num_indices; 1142 unsigned bit_mask = u_bit_consecutive(0, bit_size); 1143 for (unsigned i = 0; i < num_indices; i++) { 1144 intrin->const_index[i] = 1145 (header.intrinsic.packed_const_indices >> (i * bit_size)) & 1146 bit_mask; 1147 } 1148 break; 1149 } 1150 case const_indices_8bit: 1151 for (unsigned i = 0; i < num_indices; i++) 1152 intrin->const_index[i] = blob_read_uint8(ctx->blob); 1153 break; 1154 case const_indices_16bit: 1155 for (unsigned i = 0; i < num_indices; i++) 1156 intrin->const_index[i] = blob_read_uint16(ctx->blob); 1157 break; 1158 case const_indices_32bit: 1159 for (unsigned i = 0; i < num_indices; i++) 1160 intrin->const_index[i] = blob_read_uint32(ctx->blob); 1161 break; 1162 } 1163 } 1164 1165 return intrin; 1166} 1167 1168static void 1169write_load_const(write_ctx *ctx, const nir_load_const_instr *lc) 1170{ 1171 assert(lc->def.num_components >= 1 && lc->def.num_components <= 16); 1172 union packed_instr header; 1173 header.u32 = 0; 1174 1175 header.load_const.instr_type = lc->instr.type; 1176 header.load_const.last_component = lc->def.num_components - 1; 1177 header.load_const.bit_size = encode_bit_size_3bits(lc->def.bit_size); 1178 header.load_const.packing = load_const_full; 1179 1180 /* Try to pack 1-component constants into the 19 free bits in the header. */ 1181 if (lc->def.num_components == 1) { 1182 switch (lc->def.bit_size) { 1183 case 64: 1184 if ((lc->value[0].u64 & 0x1fffffffffffull) == 0) { 1185 /* packed_value contains high 19 bits, low bits are 0 */ 1186 header.load_const.packing = load_const_scalar_hi_19bits; 1187 header.load_const.packed_value = lc->value[0].u64 >> 45; 1188 } else if (util_mask_sign_extend(lc->value[0].i64, 19) == lc->value[0].i64) { 1189 /* packed_value contains low 19 bits, high bits are sign-extended */ 1190 header.load_const.packing = load_const_scalar_lo_19bits_sext; 1191 header.load_const.packed_value = lc->value[0].u64; 1192 } 1193 break; 1194 1195 case 32: 1196 if ((lc->value[0].u32 & 0x1fff) == 0) { 1197 header.load_const.packing = load_const_scalar_hi_19bits; 1198 header.load_const.packed_value = lc->value[0].u32 >> 13; 1199 } else if (util_mask_sign_extend(lc->value[0].i32, 19) == lc->value[0].i32) { 1200 header.load_const.packing = load_const_scalar_lo_19bits_sext; 1201 header.load_const.packed_value = lc->value[0].u32; 1202 } 1203 break; 1204 1205 case 16: 1206 header.load_const.packing = load_const_scalar_lo_19bits_sext; 1207 header.load_const.packed_value = lc->value[0].u16; 1208 break; 1209 case 8: 1210 header.load_const.packing = load_const_scalar_lo_19bits_sext; 1211 header.load_const.packed_value = lc->value[0].u8; 1212 break; 1213 case 1: 1214 header.load_const.packing = load_const_scalar_lo_19bits_sext; 1215 header.load_const.packed_value = lc->value[0].b; 1216 break; 1217 default: 1218 unreachable("invalid bit_size"); 1219 } 1220 } 1221 1222 blob_write_uint32(ctx->blob, header.u32); 1223 1224 if (header.load_const.packing == load_const_full) { 1225 switch (lc->def.bit_size) { 1226 case 64: 1227 blob_write_bytes(ctx->blob, lc->value, 1228 sizeof(*lc->value) * lc->def.num_components); 1229 break; 1230 1231 case 32: 1232 for (unsigned i = 0; i < lc->def.num_components; i++) 1233 blob_write_uint32(ctx->blob, lc->value[i].u32); 1234 break; 1235 1236 case 16: 1237 for (unsigned i = 0; i < lc->def.num_components; i++) 1238 blob_write_uint16(ctx->blob, lc->value[i].u16); 1239 break; 1240 1241 default: 1242 assert(lc->def.bit_size <= 8); 1243 for (unsigned i = 0; i < lc->def.num_components; i++) 1244 blob_write_uint8(ctx->blob, lc->value[i].u8); 1245 break; 1246 } 1247 } 1248 1249 write_add_object(ctx, &lc->def); 1250} 1251 1252static nir_load_const_instr * 1253read_load_const(read_ctx *ctx, union packed_instr header) 1254{ 1255 nir_load_const_instr *lc = 1256 nir_load_const_instr_create(ctx->nir, header.load_const.last_component + 1, 1257 decode_bit_size_3bits(header.load_const.bit_size)); 1258 lc->def.divergent = false; 1259 lc->def.loop_invariant = true; 1260 1261 switch (header.load_const.packing) { 1262 case load_const_scalar_hi_19bits: 1263 switch (lc->def.bit_size) { 1264 case 64: 1265 lc->value[0].u64 = (uint64_t)header.load_const.packed_value << 45; 1266 break; 1267 case 32: 1268 lc->value[0].u32 = (uint64_t)header.load_const.packed_value << 13; 1269 break; 1270 default: 1271 unreachable("invalid bit_size"); 1272 } 1273 break; 1274 1275 case load_const_scalar_lo_19bits_sext: 1276 switch (lc->def.bit_size) { 1277 case 64: 1278 lc->value[0].u64 = header.load_const.packed_value; 1279 if (lc->value[0].u64 >> 18) 1280 lc->value[0].u64 |= UINT64_C(0xfffffffffff80000); 1281 break; 1282 case 32: 1283 lc->value[0].u32 = header.load_const.packed_value; 1284 if (lc->value[0].u32 >> 18) 1285 lc->value[0].u32 |= 0xfff80000; 1286 break; 1287 case 16: 1288 lc->value[0].u16 = header.load_const.packed_value; 1289 break; 1290 case 8: 1291 lc->value[0].u8 = header.load_const.packed_value; 1292 break; 1293 case 1: 1294 lc->value[0].b = header.load_const.packed_value; 1295 break; 1296 default: 1297 unreachable("invalid bit_size"); 1298 } 1299 break; 1300 1301 case load_const_full: 1302 switch (lc->def.bit_size) { 1303 case 64: 1304 blob_copy_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components); 1305 break; 1306 1307 case 32: 1308 for (unsigned i = 0; i < lc->def.num_components; i++) 1309 lc->value[i].u32 = blob_read_uint32(ctx->blob); 1310 break; 1311 1312 case 16: 1313 for (unsigned i = 0; i < lc->def.num_components; i++) 1314 lc->value[i].u16 = blob_read_uint16(ctx->blob); 1315 break; 1316 1317 default: 1318 assert(lc->def.bit_size <= 8); 1319 for (unsigned i = 0; i < lc->def.num_components; i++) 1320 lc->value[i].u8 = blob_read_uint8(ctx->blob); 1321 break; 1322 } 1323 break; 1324 } 1325 1326 read_add_object(ctx, &lc->def); 1327 return lc; 1328} 1329 1330static void 1331write_ssa_undef(write_ctx *ctx, const nir_undef_instr *undef) 1332{ 1333 assert(undef->def.num_components >= 1 && undef->def.num_components <= 16); 1334 1335 union packed_instr header; 1336 header.u32 = 0; 1337 1338 header.undef.instr_type = undef->instr.type; 1339 header.undef.last_component = undef->def.num_components - 1; 1340 header.undef.bit_size = encode_bit_size_3bits(undef->def.bit_size); 1341 1342 blob_write_uint32(ctx->blob, header.u32); 1343 write_add_object(ctx, &undef->def); 1344} 1345 1346static nir_undef_instr * 1347read_ssa_undef(read_ctx *ctx, union packed_instr header) 1348{ 1349 nir_undef_instr *undef = 1350 nir_undef_instr_create(ctx->nir, header.undef.last_component + 1, 1351 decode_bit_size_3bits(header.undef.bit_size)); 1352 1353 undef->def.divergent = false; 1354 undef->def.loop_invariant = true; 1355 1356 read_add_object(ctx, &undef->def); 1357 return undef; 1358} 1359 1360union packed_tex_data { 1361 uint32_t u32; 1362 struct { 1363 unsigned sampler_dim : 4; 1364 unsigned dest_type : 8; 1365 unsigned coord_components : 3; 1366 unsigned is_array : 1; 1367 unsigned is_shadow : 1; 1368 unsigned is_new_style_shadow : 1; 1369 unsigned is_sparse : 1; 1370 unsigned component : 2; 1371 unsigned texture_non_uniform : 1; 1372 unsigned sampler_non_uniform : 1; 1373 unsigned array_is_lowered_cube : 1; 1374 unsigned is_gather_implicit_lod : 1; 1375 unsigned unused : 5; /* Mark unused for valgrind. */ 1376 } u; 1377}; 1378 1379static void 1380write_tex(write_ctx *ctx, const nir_tex_instr *tex) 1381{ 1382 assert(tex->num_srcs < 16); 1383 assert(tex->op < 32); 1384 1385 union packed_instr header; 1386 header.u32 = 0; 1387 1388 header.tex.instr_type = tex->instr.type; 1389 header.tex.num_srcs = tex->num_srcs; 1390 header.tex.op = tex->op; 1391 1392 write_def(ctx, &tex->def, header, tex->instr.type); 1393 1394 blob_write_uint32(ctx->blob, tex->texture_index); 1395 blob_write_uint32(ctx->blob, tex->sampler_index); 1396 blob_write_uint32(ctx->blob, tex->backend_flags); 1397 if (tex->op == nir_texop_tg4) 1398 blob_write_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets)); 1399 1400 STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t)); 1401 union packed_tex_data packed = { 1402 .u.sampler_dim = tex->sampler_dim, 1403 .u.dest_type = tex->dest_type, 1404 .u.coord_components = tex->coord_components, 1405 .u.is_array = tex->is_array, 1406 .u.is_shadow = tex->is_shadow, 1407 .u.is_new_style_shadow = tex->is_new_style_shadow, 1408 .u.is_sparse = tex->is_sparse, 1409 .u.component = tex->component, 1410 .u.texture_non_uniform = tex->texture_non_uniform, 1411 .u.sampler_non_uniform = tex->sampler_non_uniform, 1412 .u.array_is_lowered_cube = tex->array_is_lowered_cube, 1413 .u.is_gather_implicit_lod = tex->is_gather_implicit_lod, 1414 }; 1415 blob_write_uint32(ctx->blob, packed.u32); 1416 1417 for (unsigned i = 0; i < tex->num_srcs; i++) { 1418 union packed_src src; 1419 src.u32 = 0; 1420 src.tex.src_type = tex->src[i].src_type; 1421 write_src_full(ctx, &tex->src[i].src, src); 1422 } 1423} 1424 1425static nir_tex_instr * 1426read_tex(read_ctx *ctx, union packed_instr header) 1427{ 1428 nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, header.tex.num_srcs); 1429 1430 read_def(ctx, &tex->def, &tex->instr, header); 1431 1432 tex->op = header.tex.op; 1433 tex->texture_index = blob_read_uint32(ctx->blob); 1434 tex->sampler_index = blob_read_uint32(ctx->blob); 1435 tex->backend_flags = blob_read_uint32(ctx->blob); 1436 if (tex->op == nir_texop_tg4) 1437 blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets)); 1438 1439 union packed_tex_data packed; 1440 packed.u32 = blob_read_uint32(ctx->blob); 1441 tex->sampler_dim = packed.u.sampler_dim; 1442 tex->dest_type = packed.u.dest_type; 1443 tex->coord_components = packed.u.coord_components; 1444 tex->is_array = packed.u.is_array; 1445 tex->is_shadow = packed.u.is_shadow; 1446 tex->is_new_style_shadow = packed.u.is_new_style_shadow; 1447 tex->is_sparse = packed.u.is_sparse; 1448 tex->component = packed.u.component; 1449 tex->texture_non_uniform = packed.u.texture_non_uniform; 1450 tex->sampler_non_uniform = packed.u.sampler_non_uniform; 1451 tex->array_is_lowered_cube = packed.u.array_is_lowered_cube; 1452 tex->is_gather_implicit_lod = packed.u.is_gather_implicit_lod; 1453 1454 for (unsigned i = 0; i < tex->num_srcs; i++) { 1455 union packed_src src = read_src(ctx, &tex->src[i].src); 1456 tex->src[i].src_type = src.tex.src_type; 1457 } 1458 1459 return tex; 1460} 1461 1462static void 1463write_phi(write_ctx *ctx, const nir_phi_instr *phi) 1464{ 1465 union packed_instr header; 1466 header.u32 = 0; 1467 1468 header.phi.instr_type = phi->instr.type; 1469 header.phi.num_srcs = exec_list_length(&phi->srcs); 1470 1471 /* Phi nodes are special, since they may reference SSA definitions and 1472 * basic blocks that don't exist yet. We leave two empty uint32_t's here, 1473 * and then store enough information so that a later fixup pass can fill 1474 * them in correctly. 1475 */ 1476 write_def(ctx, &phi->def, header, phi->instr.type); 1477 1478 nir_foreach_phi_src(src, phi) { 1479 size_t blob_offset = blob_reserve_uint32(ctx->blob); 1480 ASSERTED size_t blob_offset2 = blob_reserve_uint32(ctx->blob); 1481 assert(blob_offset + sizeof(uint32_t) == blob_offset2); 1482 write_phi_fixup fixup = { 1483 .blob_offset = blob_offset, 1484 .src = src->src.ssa, 1485 .block = src->pred, 1486 }; 1487 util_dynarray_append(&ctx->phi_fixups, write_phi_fixup, fixup); 1488 } 1489} 1490 1491static void 1492write_fixup_phis(write_ctx *ctx) 1493{ 1494 util_dynarray_foreach(&ctx->phi_fixups, write_phi_fixup, fixup) { 1495 blob_overwrite_uint32(ctx->blob, fixup->blob_offset, 1496 write_lookup_object(ctx, fixup->src)); 1497 blob_overwrite_uint32(ctx->blob, fixup->blob_offset + sizeof(uint32_t), 1498 write_lookup_object(ctx, fixup->block)); 1499 } 1500 1501 util_dynarray_clear(&ctx->phi_fixups); 1502} 1503 1504static nir_phi_instr * 1505read_phi(read_ctx *ctx, nir_block *blk, union packed_instr header) 1506{ 1507 nir_phi_instr *phi = nir_phi_instr_create(ctx->nir); 1508 1509 read_def(ctx, &phi->def, &phi->instr, header); 1510 1511 /* For similar reasons as before, we just store the index directly into the 1512 * pointer, and let a later pass resolve the phi sources. 1513 * 1514 * In order to ensure that the copied sources (which are just the indices 1515 * from the blob for now) don't get inserted into the old shader's use-def 1516 * lists, we have to add the phi instruction *before* we set up its 1517 * sources. 1518 */ 1519 nir_instr_insert_after_block(blk, &phi->instr); 1520 1521 for (unsigned i = 0; i < header.phi.num_srcs; i++) { 1522 nir_def *def = (nir_def *)(uintptr_t)blob_read_uint32(ctx->blob); 1523 nir_block *pred = (nir_block *)(uintptr_t)blob_read_uint32(ctx->blob); 1524 nir_phi_src *src = nir_phi_instr_add_src(phi, pred, def); 1525 1526 /* Since we're not letting nir_insert_instr handle use/def stuff for us, 1527 * we have to set the parent_instr manually. It doesn't really matter 1528 * when we do it, so we might as well do it here. 1529 */ 1530 nir_src_set_parent_instr(&src->src, &phi->instr); 1531 1532 /* Stash it in the list of phi sources. We'll walk this list and fix up 1533 * sources at the very end of read_function_impl. 1534 */ 1535 list_add(&src->src.use_link, &ctx->phi_srcs); 1536 } 1537 1538 return phi; 1539} 1540 1541static void 1542read_fixup_phis(read_ctx *ctx) 1543{ 1544 list_for_each_entry_safe(nir_phi_src, src, &ctx->phi_srcs, src.use_link) { 1545 src->pred = read_lookup_object(ctx, (uintptr_t)src->pred); 1546 src->src.ssa = read_lookup_object(ctx, (uintptr_t)src->src.ssa); 1547 1548 /* Remove from this list */ 1549 list_del(&src->src.use_link); 1550 1551 list_addtail(&src->src.use_link, &src->src.ssa->uses); 1552 } 1553 assert(list_is_empty(&ctx->phi_srcs)); 1554} 1555 1556static void 1557write_jump(write_ctx *ctx, const nir_jump_instr *jmp) 1558{ 1559 /* These aren't handled because they require special block linking */ 1560 assert(jmp->type != nir_jump_goto && jmp->type != nir_jump_goto_if); 1561 1562 assert(jmp->type < 4); 1563 1564 union packed_instr header; 1565 header.u32 = 0; 1566 1567 header.jump.instr_type = jmp->instr.type; 1568 header.jump.type = jmp->type; 1569 1570 blob_write_uint32(ctx->blob, header.u32); 1571} 1572 1573static nir_jump_instr * 1574read_jump(read_ctx *ctx, union packed_instr header) 1575{ 1576 /* These aren't handled because they require special block linking */ 1577 assert(header.jump.type != nir_jump_goto && 1578 header.jump.type != nir_jump_goto_if); 1579 1580 nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, header.jump.type); 1581 return jmp; 1582} 1583 1584static void 1585write_call(write_ctx *ctx, const nir_call_instr *call) 1586{ 1587 blob_write_uint32(ctx->blob, write_lookup_object(ctx, call->callee)); 1588 1589 for (unsigned i = 0; i < call->num_params; i++) 1590 write_src(ctx, &call->params[i]); 1591} 1592 1593static nir_call_instr * 1594read_call(read_ctx *ctx) 1595{ 1596 nir_function *callee = read_object(ctx); 1597 nir_call_instr *call = nir_call_instr_create(ctx->nir, callee); 1598 1599 for (unsigned i = 0; i < call->num_params; i++) 1600 read_src(ctx, &call->params[i]); 1601 1602 return call; 1603} 1604 1605static void 1606write_debug_info(write_ctx *ctx, const nir_debug_info_instr *di) 1607{ 1608 union packed_instr header; 1609 header.u32 = 0; 1610 1611 header.debug_info.instr_type = nir_instr_type_debug_info; 1612 header.debug_info.type = di->type; 1613 header.debug_info.string_length = di->string_length; 1614 1615 switch (di->type) { 1616 case nir_debug_info_src_loc: 1617 blob_write_uint32(ctx->blob, header.u32); 1618 blob_write_uint32(ctx->blob, di->src_loc.line); 1619 blob_write_uint32(ctx->blob, di->src_loc.column); 1620 blob_write_uint32(ctx->blob, di->src_loc.spirv_offset); 1621 blob_write_uint8(ctx->blob, di->src_loc.source); 1622 if (di->src_loc.line) 1623 write_src(ctx, &di->src_loc.filename); 1624 return; 1625 case nir_debug_info_string: 1626 write_def(ctx, &di->def, header, di->instr.type); 1627 blob_write_bytes(ctx->blob, di->string, di->string_length); 1628 return; 1629 } 1630 1631 unreachable("Unimplemented nir_debug_info_type"); 1632} 1633 1634static nir_debug_info_instr * 1635read_debug_info(read_ctx *ctx, union packed_instr header) 1636{ 1637 nir_debug_info_type type = header.debug_info.type; 1638 1639 switch (type) { 1640 case nir_debug_info_src_loc: { 1641 nir_debug_info_instr *di = nir_debug_info_instr_create(ctx->nir, type, 0); 1642 di->src_loc.line = blob_read_uint32(ctx->blob); 1643 di->src_loc.column = blob_read_uint32(ctx->blob); 1644 di->src_loc.spirv_offset = blob_read_uint32(ctx->blob); 1645 di->src_loc.source = blob_read_uint8(ctx->blob); 1646 if (di->src_loc.line) 1647 read_src(ctx, &di->src_loc.filename); 1648 return di; 1649 } 1650 case nir_debug_info_string: { 1651 nir_debug_info_instr *di = 1652 nir_debug_info_instr_create(ctx->nir, type, header.debug_info.string_length); 1653 read_def(ctx, &di->def, &di->instr, header); 1654 memcpy(di->string, blob_read_bytes(ctx->blob, di->string_length), di->string_length); 1655 return di; 1656 } 1657 } 1658 1659 unreachable("Unimplemented nir_debug_info_type"); 1660} 1661 1662static void 1663write_instr(write_ctx *ctx, const nir_instr *instr) 1664{ 1665 /* We have only 4 bits for the instruction type. */ 1666 assert(instr->type < 16); 1667 1668 switch (instr->type) { 1669 case nir_instr_type_alu: 1670 write_alu(ctx, nir_instr_as_alu(instr)); 1671 break; 1672 case nir_instr_type_deref: 1673 write_deref(ctx, nir_instr_as_deref(instr)); 1674 break; 1675 case nir_instr_type_intrinsic: 1676 write_intrinsic(ctx, nir_instr_as_intrinsic(instr)); 1677 break; 1678 case nir_instr_type_load_const: 1679 write_load_const(ctx, nir_instr_as_load_const(instr)); 1680 break; 1681 case nir_instr_type_undef: 1682 write_ssa_undef(ctx, nir_instr_as_undef(instr)); 1683 break; 1684 case nir_instr_type_tex: 1685 write_tex(ctx, nir_instr_as_tex(instr)); 1686 break; 1687 case nir_instr_type_phi: 1688 write_phi(ctx, nir_instr_as_phi(instr)); 1689 break; 1690 case nir_instr_type_jump: 1691 write_jump(ctx, nir_instr_as_jump(instr)); 1692 break; 1693 case nir_instr_type_call: 1694 blob_write_uint32(ctx->blob, instr->type); 1695 write_call(ctx, nir_instr_as_call(instr)); 1696 break; 1697 case nir_instr_type_debug_info: 1698 write_debug_info(ctx, nir_instr_as_debug_info(instr)); 1699 break; 1700 case nir_instr_type_parallel_copy: 1701 unreachable("Cannot write parallel copies"); 1702 default: 1703 unreachable("bad instr type"); 1704 } 1705} 1706 1707/* Return the number of instructions read. */ 1708static unsigned 1709read_instr(read_ctx *ctx, nir_block *block) 1710{ 1711 STATIC_ASSERT(sizeof(union packed_instr) == 4); 1712 union packed_instr header; 1713 header.u32 = blob_read_uint32(ctx->blob); 1714 nir_instr *instr; 1715 1716 switch (header.any.instr_type) { 1717 case nir_instr_type_alu: 1718 for (unsigned i = 0; i <= header.alu.num_followup_alu_sharing_header; i++) 1719 nir_instr_insert_after_block(block, &read_alu(ctx, header)->instr); 1720 return header.alu.num_followup_alu_sharing_header + 1; 1721 case nir_instr_type_deref: 1722 instr = &read_deref(ctx, header)->instr; 1723 break; 1724 case nir_instr_type_intrinsic: 1725 instr = &read_intrinsic(ctx, header)->instr; 1726 break; 1727 case nir_instr_type_load_const: 1728 instr = &read_load_const(ctx, header)->instr; 1729 break; 1730 case nir_instr_type_undef: 1731 instr = &read_ssa_undef(ctx, header)->instr; 1732 break; 1733 case nir_instr_type_tex: 1734 instr = &read_tex(ctx, header)->instr; 1735 break; 1736 case nir_instr_type_phi: 1737 /* Phi instructions are a bit of a special case when reading because we 1738 * don't want inserting the instruction to automatically handle use/defs 1739 * for us. Instead, we need to wait until all the blocks/instructions 1740 * are read so that we can set their sources up. 1741 */ 1742 read_phi(ctx, block, header); 1743 return 1; 1744 case nir_instr_type_jump: 1745 instr = &read_jump(ctx, header)->instr; 1746 break; 1747 case nir_instr_type_call: 1748 instr = &read_call(ctx)->instr; 1749 break; 1750 case nir_instr_type_debug_info: 1751 instr = &read_debug_info(ctx, header)->instr; 1752 break; 1753 case nir_instr_type_parallel_copy: 1754 unreachable("Cannot read parallel copies"); 1755 default: 1756 unreachable("bad instr type"); 1757 } 1758 1759 nir_instr_insert_after_block(block, instr); 1760 return 1; 1761} 1762 1763static void 1764write_block(write_ctx *ctx, const nir_block *block) 1765{ 1766 write_add_object(ctx, block); 1767 blob_write_uint8(ctx->blob, block->divergent); 1768 blob_write_uint32(ctx->blob, exec_list_length(&block->instr_list)); 1769 1770 ctx->last_instr_type = ~0; 1771 ctx->last_alu_header_offset = 0; 1772 1773 nir_foreach_instr(instr, block) { 1774 write_instr(ctx, instr); 1775 ctx->last_instr_type = instr->type; 1776 } 1777} 1778 1779static void 1780read_block(read_ctx *ctx, struct exec_list *cf_list) 1781{ 1782 /* Don't actually create a new block. Just use the one from the tail of 1783 * the list. NIR guarantees that the tail of the list is a block and that 1784 * no two blocks are side-by-side in the IR; It should be empty. 1785 */ 1786 nir_block *block = 1787 exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node); 1788 1789 read_add_object(ctx, block); 1790 block->divergent = blob_read_uint8(ctx->blob); 1791 unsigned num_instrs = blob_read_uint32(ctx->blob); 1792 for (unsigned i = 0; i < num_instrs;) { 1793 i += read_instr(ctx, block); 1794 } 1795} 1796 1797static void 1798write_cf_list(write_ctx *ctx, const struct exec_list *cf_list); 1799 1800static void 1801read_cf_list(read_ctx *ctx, struct exec_list *cf_list); 1802 1803static void 1804write_if(write_ctx *ctx, nir_if *nif) 1805{ 1806 write_src(ctx, &nif->condition); 1807 blob_write_uint8(ctx->blob, nif->control); 1808 1809 write_cf_list(ctx, &nif->then_list); 1810 write_cf_list(ctx, &nif->else_list); 1811} 1812 1813static void 1814read_if(read_ctx *ctx, struct exec_list *cf_list) 1815{ 1816 nir_if *nif = nir_if_create(ctx->nir); 1817 1818 read_src(ctx, &nif->condition); 1819 nif->control = blob_read_uint8(ctx->blob); 1820 1821 nir_cf_node_insert_end(cf_list, &nif->cf_node); 1822 1823 read_cf_list(ctx, &nif->then_list); 1824 read_cf_list(ctx, &nif->else_list); 1825} 1826 1827static void 1828write_loop(write_ctx *ctx, nir_loop *loop) 1829{ 1830 blob_write_uint8(ctx->blob, loop->control); 1831 blob_write_uint8(ctx->blob, loop->divergent_continue); 1832 blob_write_uint8(ctx->blob, loop->divergent_break); 1833 bool has_continue_construct = nir_loop_has_continue_construct(loop); 1834 blob_write_uint8(ctx->blob, has_continue_construct); 1835 1836 write_cf_list(ctx, &loop->body); 1837 if (has_continue_construct) { 1838 write_cf_list(ctx, &loop->continue_list); 1839 } 1840} 1841 1842static void 1843read_loop(read_ctx *ctx, struct exec_list *cf_list) 1844{ 1845 nir_loop *loop = nir_loop_create(ctx->nir); 1846 1847 nir_cf_node_insert_end(cf_list, &loop->cf_node); 1848 1849 loop->control = blob_read_uint8(ctx->blob); 1850 loop->divergent_continue = blob_read_uint8(ctx->blob); 1851 loop->divergent_break = blob_read_uint8(ctx->blob); 1852 bool has_continue_construct = blob_read_uint8(ctx->blob); 1853 1854 read_cf_list(ctx, &loop->body); 1855 if (has_continue_construct) { 1856 nir_loop_add_continue_construct(loop); 1857 read_cf_list(ctx, &loop->continue_list); 1858 } 1859} 1860 1861static void 1862write_cf_node(write_ctx *ctx, nir_cf_node *cf) 1863{ 1864 blob_write_uint32(ctx->blob, cf->type); 1865 1866 switch (cf->type) { 1867 case nir_cf_node_block: 1868 write_block(ctx, nir_cf_node_as_block(cf)); 1869 break; 1870 case nir_cf_node_if: 1871 write_if(ctx, nir_cf_node_as_if(cf)); 1872 break; 1873 case nir_cf_node_loop: 1874 write_loop(ctx, nir_cf_node_as_loop(cf)); 1875 break; 1876 default: 1877 unreachable("bad cf type"); 1878 } 1879} 1880 1881static void 1882read_cf_node(read_ctx *ctx, struct exec_list *list) 1883{ 1884 nir_cf_node_type type = blob_read_uint32(ctx->blob); 1885 1886 switch (type) { 1887 case nir_cf_node_block: 1888 read_block(ctx, list); 1889 break; 1890 case nir_cf_node_if: 1891 read_if(ctx, list); 1892 break; 1893 case nir_cf_node_loop: 1894 read_loop(ctx, list); 1895 break; 1896 default: 1897 unreachable("bad cf type"); 1898 } 1899} 1900 1901static void 1902write_cf_list(write_ctx *ctx, const struct exec_list *cf_list) 1903{ 1904 blob_write_uint32(ctx->blob, exec_list_length(cf_list)); 1905 foreach_list_typed(nir_cf_node, cf, node, cf_list) { 1906 write_cf_node(ctx, cf); 1907 } 1908} 1909 1910static void 1911read_cf_list(read_ctx *ctx, struct exec_list *cf_list) 1912{ 1913 uint32_t num_cf_nodes = blob_read_uint32(ctx->blob); 1914 for (unsigned i = 0; i < num_cf_nodes; i++) 1915 read_cf_node(ctx, cf_list); 1916} 1917 1918static void 1919write_function_impl(write_ctx *ctx, const nir_function_impl *fi) 1920{ 1921 blob_write_uint8(ctx->blob, fi->structured); 1922 blob_write_uint8(ctx->blob, !!fi->preamble); 1923 1924 if (fi->preamble) 1925 blob_write_uint32(ctx->blob, write_lookup_object(ctx, fi->preamble)); 1926 1927 write_var_list(ctx, &fi->locals); 1928 1929 write_cf_list(ctx, &fi->body); 1930 write_fixup_phis(ctx); 1931} 1932 1933static nir_function_impl * 1934read_function_impl(read_ctx *ctx) 1935{ 1936 nir_function_impl *fi = nir_function_impl_create_bare(ctx->nir); 1937 1938 fi->structured = blob_read_uint8(ctx->blob); 1939 bool preamble = blob_read_uint8(ctx->blob); 1940 1941 if (preamble) 1942 fi->preamble = read_object(ctx); 1943 1944 read_var_list(ctx, &fi->locals); 1945 1946 read_cf_list(ctx, &fi->body); 1947 read_fixup_phis(ctx); 1948 1949 fi->valid_metadata = 0; 1950 1951 return fi; 1952} 1953 1954static void 1955write_function(write_ctx *ctx, const nir_function *fxn) 1956{ 1957 uint32_t flags = 0; 1958 if (fxn->is_entrypoint) 1959 flags |= 0x1; 1960 if (fxn->is_preamble) 1961 flags |= 0x2; 1962 if (fxn->name && !ctx->strip) 1963 flags |= 0x4; 1964 if (fxn->impl) 1965 flags |= 0x8; 1966 if (fxn->should_inline) 1967 flags |= 0x10; 1968 if (fxn->dont_inline) 1969 flags |= 0x20; 1970 if (fxn->is_subroutine) 1971 flags |= 0x40; 1972 if (fxn->is_tmp_globals_wrapper) 1973 flags |= 0x80; 1974 if (fxn->workgroup_size[0] || fxn->workgroup_size[1] || fxn->workgroup_size[2]) 1975 flags |= 0x100; 1976 blob_write_uint32(ctx->blob, flags); 1977 if (fxn->name && !ctx->strip) 1978 blob_write_string(ctx->blob, fxn->name); 1979 1980 if (flags & 0x100) { 1981 blob_write_uint32(ctx->blob, fxn->workgroup_size[0]); 1982 blob_write_uint32(ctx->blob, fxn->workgroup_size[1]); 1983 blob_write_uint32(ctx->blob, fxn->workgroup_size[2]); 1984 } 1985 1986 blob_write_uint32(ctx->blob, fxn->driver_attributes); 1987 1988 blob_write_uint32(ctx->blob, fxn->subroutine_index); 1989 blob_write_uint32(ctx->blob, fxn->num_subroutine_types); 1990 for (unsigned i = 0; i < fxn->num_subroutine_types; i++) { 1991 encode_type_to_blob(ctx->blob, fxn->subroutine_types[i]); 1992 } 1993 1994 write_add_object(ctx, fxn); 1995 1996 blob_write_uint32(ctx->blob, fxn->num_params); 1997 for (unsigned i = 0; i < fxn->num_params; i++) { 1998 uint32_t val = 1999 ((uint32_t)fxn->params[i].num_components) | 2000 ((uint32_t)fxn->params[i].bit_size) << 8; 2001 2002 bool has_name = fxn->params[i].name && !ctx->strip; 2003 if (has_name) 2004 val |= 0x10000; 2005 2006 if (fxn->params[i].is_return) 2007 val |= (1u << 17); 2008 if (fxn->params[i].is_uniform) 2009 val |= (1u << 18); 2010 blob_write_uint32(ctx->blob, val); 2011 if (has_name) 2012 blob_write_string(ctx->blob, fxn->params[i].name); 2013 2014 encode_type_to_blob(ctx->blob, fxn->params[i].type); 2015 blob_write_uint32(ctx->blob, encode_deref_modes(fxn->params[i].mode)); 2016 blob_write_uint32(ctx->blob, fxn->params[i].driver_attributes); 2017 } 2018 2019 /* At first glance, it looks like we should write the function_impl here. 2020 * However, call instructions need to be able to reference at least the 2021 * function and those will get processed as we write the function_impls. 2022 * We stop here and write function_impls as a second pass. 2023 */ 2024} 2025 2026static void 2027read_function(read_ctx *ctx) 2028{ 2029 uint32_t flags = blob_read_uint32(ctx->blob); 2030 2031 bool has_name = flags & 0x4; 2032 char *name = has_name ? blob_read_string(ctx->blob) : NULL; 2033 2034 nir_function *fxn = nir_function_create(ctx->nir, name); 2035 2036 if (flags & 0x100) { 2037 fxn->workgroup_size[0] = blob_read_uint32(ctx->blob); 2038 fxn->workgroup_size[1] = blob_read_uint32(ctx->blob); 2039 fxn->workgroup_size[2] = blob_read_uint32(ctx->blob); 2040 } 2041 2042 fxn->driver_attributes = blob_read_uint32(ctx->blob); 2043 fxn->subroutine_index = blob_read_uint32(ctx->blob); 2044 fxn->num_subroutine_types = blob_read_uint32(ctx->blob); 2045 for (unsigned i = 0; i < fxn->num_subroutine_types; i++) { 2046 fxn->subroutine_types[i] = decode_type_from_blob(ctx->blob); 2047 } 2048 2049 read_add_object(ctx, fxn); 2050 2051 fxn->num_params = blob_read_uint32(ctx->blob); 2052 fxn->params = rzalloc_array(fxn, nir_parameter, fxn->num_params); 2053 for (unsigned i = 0; i < fxn->num_params; i++) { 2054 uint32_t val = blob_read_uint32(ctx->blob); 2055 bool has_name = (val & 0x10000); 2056 if (has_name) { 2057 char *name = blob_read_string(ctx->blob); 2058 fxn->params[i].name = ralloc_strdup(ctx->nir, name); 2059 } 2060 2061 fxn->params[i].num_components = val & 0xff; 2062 fxn->params[i].bit_size = (val >> 8) & 0xff; 2063 fxn->params[i].is_return = val & (1u << 17); 2064 fxn->params[i].is_uniform = val & (1u << 18); 2065 fxn->params[i].type = decode_type_from_blob(ctx->blob); 2066 fxn->params[i].mode = decode_deref_modes(blob_read_uint32(ctx->blob)); 2067 fxn->params[i].driver_attributes = blob_read_uint32(ctx->blob); 2068 } 2069 2070 fxn->is_entrypoint = flags & 0x1; 2071 fxn->is_preamble = flags & 0x2; 2072 if (flags & 0x8) 2073 fxn->impl = NIR_SERIALIZE_FUNC_HAS_IMPL; 2074 fxn->should_inline = flags & 0x10; 2075 fxn->dont_inline = flags & 0x20; 2076 fxn->is_subroutine = flags & 0x40; 2077 fxn->is_tmp_globals_wrapper = flags & 0x80; 2078} 2079 2080static void 2081write_xfb_info(write_ctx *ctx, const nir_xfb_info *xfb) 2082{ 2083 if (xfb == NULL) { 2084 blob_write_uint32(ctx->blob, 0); 2085 } else { 2086 size_t size = nir_xfb_info_size(xfb->output_count); 2087 assert(size <= UINT32_MAX); 2088 blob_write_uint32(ctx->blob, size); 2089 blob_write_bytes(ctx->blob, xfb, size); 2090 } 2091} 2092 2093static nir_xfb_info * 2094read_xfb_info(read_ctx *ctx) 2095{ 2096 uint32_t size = blob_read_uint32(ctx->blob); 2097 if (size == 0) 2098 return NULL; 2099 2100 struct nir_xfb_info *xfb = ralloc_size(ctx->nir, size); 2101 blob_copy_bytes(ctx->blob, (void *)xfb, size); 2102 2103 return xfb; 2104} 2105 2106/** 2107 * Serialize NIR into a binary blob. 2108 * 2109 * \param strip Don't serialize information only useful for debugging, 2110 * such as variable names, making cache hits from similar 2111 * shaders more likely. 2112 */ 2113void 2114nir_serialize(struct blob *blob, const nir_shader *nir, bool strip) 2115{ 2116 write_ctx ctx = { 0 }; 2117 ctx.remap_table = _mesa_pointer_hash_table_create(NULL); 2118 ctx.blob = blob; 2119 ctx.nir = nir; 2120 ctx.strip = strip; 2121 util_dynarray_init(&ctx.phi_fixups, NULL); 2122 2123 size_t idx_size_offset = blob_reserve_uint32(blob); 2124 2125 struct shader_info info = nir->info; 2126 uint32_t strings = 0; 2127 if (!strip && info.name) 2128 strings |= 0x1; 2129 if (!strip && info.label) 2130 strings |= 0x2; 2131 blob_write_uint32(blob, strings); 2132 if (!strip && info.name) 2133 blob_write_string(blob, info.name); 2134 if (!strip && info.label) 2135 blob_write_string(blob, info.label); 2136 info.name = info.label = NULL; 2137 blob_write_bytes(blob, (uint8_t *)&info, sizeof(info)); 2138 2139 write_var_list(&ctx, &nir->variables); 2140 2141 blob_write_uint32(blob, nir->num_inputs); 2142 blob_write_uint32(blob, nir->num_uniforms); 2143 blob_write_uint32(blob, nir->num_outputs); 2144 blob_write_uint32(blob, nir->scratch_size); 2145 2146 blob_write_uint32(blob, exec_list_length(&nir->functions)); 2147 nir_foreach_function(fxn, nir) { 2148 write_function(&ctx, fxn); 2149 } 2150 2151 nir_foreach_function_impl(impl, nir) { 2152 write_function_impl(&ctx, impl); 2153 } 2154 2155 blob_write_uint32(blob, nir->constant_data_size); 2156 if (nir->constant_data_size > 0) 2157 blob_write_bytes(blob, nir->constant_data, nir->constant_data_size); 2158 2159 write_xfb_info(&ctx, nir->xfb_info); 2160 2161 if (nir->info.uses_printf) 2162 u_printf_serialize_info(blob, nir->printf_info, nir->printf_info_count); 2163 2164 blob_overwrite_uint32(blob, idx_size_offset, ctx.next_idx); 2165 2166 _mesa_hash_table_destroy(ctx.remap_table, NULL); 2167 util_dynarray_fini(&ctx.phi_fixups); 2168} 2169 2170nir_shader * 2171nir_deserialize(void *mem_ctx, 2172 const struct nir_shader_compiler_options *options, 2173 struct blob_reader *blob) 2174{ 2175 read_ctx ctx = { 0 }; 2176 ctx.blob = blob; 2177 list_inithead(&ctx.phi_srcs); 2178 ctx.idx_table_len = blob_read_uint32(blob); 2179 ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t)); 2180 2181 uint32_t strings = blob_read_uint32(blob); 2182 char *name = (strings & 0x1) ? blob_read_string(blob) : NULL; 2183 char *label = (strings & 0x2) ? blob_read_string(blob) : NULL; 2184 2185 struct shader_info info; 2186 blob_copy_bytes(blob, (uint8_t *)&info, sizeof(info)); 2187 2188 ctx.nir = nir_shader_create(mem_ctx, info.stage, options, NULL); 2189 2190 info.name = name ? ralloc_strdup(ctx.nir, name) : NULL; 2191 info.label = label ? ralloc_strdup(ctx.nir, label) : NULL; 2192 2193 ctx.nir->info = info; 2194 2195 read_var_list(&ctx, &ctx.nir->variables); 2196 2197 ctx.nir->num_inputs = blob_read_uint32(blob); 2198 ctx.nir->num_uniforms = blob_read_uint32(blob); 2199 ctx.nir->num_outputs = blob_read_uint32(blob); 2200 ctx.nir->scratch_size = blob_read_uint32(blob); 2201 2202 unsigned num_functions = blob_read_uint32(blob); 2203 for (unsigned i = 0; i < num_functions; i++) 2204 read_function(&ctx); 2205 2206 nir_foreach_function(fxn, ctx.nir) { 2207 if (fxn->impl == NIR_SERIALIZE_FUNC_HAS_IMPL) 2208 nir_function_set_impl(fxn, read_function_impl(&ctx)); 2209 } 2210 2211 ctx.nir->constant_data_size = blob_read_uint32(blob); 2212 if (ctx.nir->constant_data_size > 0) { 2213 ctx.nir->constant_data = 2214 ralloc_size(ctx.nir, ctx.nir->constant_data_size); 2215 blob_copy_bytes(blob, ctx.nir->constant_data, 2216 ctx.nir->constant_data_size); 2217 } 2218 2219 ctx.nir->xfb_info = read_xfb_info(&ctx); 2220 2221 if (ctx.nir->info.uses_printf) { 2222 ctx.nir->printf_info = 2223 u_printf_deserialize_info(ctx.nir, blob, 2224 &ctx.nir->printf_info_count); 2225 } 2226 2227 free(ctx.idx_table); 2228 2229 nir_validate_shader(ctx.nir, "after deserialize"); 2230 2231 return ctx.nir; 2232} 2233 2234void 2235nir_shader_serialize_deserialize(nir_shader *shader) 2236{ 2237 const struct nir_shader_compiler_options *options = shader->options; 2238 2239 struct blob writer; 2240 blob_init(&writer); 2241 nir_serialize(&writer, shader, false); 2242 2243 /* Delete all of dest's ralloc children but leave dest alone */ 2244 void *dead_ctx = ralloc_context(NULL); 2245 ralloc_adopt(dead_ctx, shader); 2246 ralloc_free(dead_ctx); 2247 2248 dead_ctx = ralloc_context(NULL); 2249 2250 struct blob_reader reader; 2251 blob_reader_init(&reader, writer.data, writer.size); 2252 nir_shader *copy = nir_deserialize(dead_ctx, options, &reader); 2253 2254 blob_finish(&writer); 2255 2256 nir_shader_replace(shader, copy); 2257 ralloc_free(dead_ctx); 2258}