lib/mesa/src/compiler/nir/nir_precompiled.h at jcs

jcs.org / openbsd-xenocara
fork
mirror of OpenBSD xenocara tree github.com/openbsd/xenocara
openbsd
fork
openbsd-xenocara / lib / mesa / src / compiler / nir / nir_precompiled.h
at jcs 692 lines 24 kB view raw
wrap content
jsg Import Mesa 25.0.7 11mo ago
67d6f117
  1/*
  2 * Copyright 2024 Valve Corporation
  3 * SPDX-License-Identifier: MIT
  4 */
  5
  6#pragma once
  7
  8#include <ctype.h>
  9#include "nir.h"
 10#include "nir_builder.h"
 11#include "nir_serialize.h"
 12
 13/*
 14 * This file contains helpers for precompiling OpenCL kernels with a Mesa driver
 15 * and dispatching them from within the driver. It is a grab bag of utility
 16 * functions, rather than an all-in-one solution, to give drivers flexibility to
 17 * customize the compile pipeline. See asahi_clc for how the pieces fit
 18 * together, and see libagx for real world examples of this infrastructure.
 19 *
 20 * Why OpenCL C?
 21 *
 22 * 1. Mesa drivers are generally written in C. OpenCL C is close enough to C11
 23 *    that we can share driver code between host and device. This is the "killer
 24 *    feature" and enables implementing device-generated commands in a sane way.
 25 *    Both generated (e.g. GenXML) headers and entire complex driver logic may
 26 *    be shared for a major maintenance win.
 27 *
 28 * 2. OpenCL C has significant better ergonomics than GLSL, particularly around
 29 *    raw pointers. Plainly, GLSL was never designed as a systems language. What
 30 *    we need for implementing driver features on-device is a systems language,
 31 *    not a shading language.
 32 *
 33 * 3. OpenCL is the compute standard, and it is supported in Mesa via rusticl.
 34 *    Using OpenCL in our drivers is a way of "eating our own dog food". If Mesa
 35 *    based OpenCL isn't good enough for us, it's not good enough for our users
 36 *    either.
 37 *
 38 * 4. OpenCL C has enough affordances for GPUs that it is suitable for GPU use,
 39 *    unlike pure C11.
 40 *
 41 * Why precompile?
 42 *
 43 * 1. Precompiling lets us do build-time reflection on internal shaders to
 44 *    generate data layouts and dispatch macros automatically. The precompile
 45 *    pipeline implemented in this file offers significantly better ergonomics
 46 *    than handrolling kernels at runtime.
 47 *
 48 * 2. Compiling internal shaders at draw-time can introduce jank. Compiling
 49 *    internal shaders with application shaders slows down application shader
 50 *    compile time (and might still introduce jank in a hash-and-cache scheme).
 51 *    Compiling shaders at device creation time slows down initialization. The
 52 *    only time we can compile with no performance impact is when building the
 53 *    driver ahead-of-time.
 54 *
 55 * 3. Mesa is built (on developer and packager machines) far less often than it
 56 *    is run (on user machines). Compiling at build-time is simply more
 57 *    efficient in a global sense.
 58 *
 59 * 4. Compiling /all/ internal shaders with the Mesa build can turn runtime
 60 *    assertion fails into build failures, allowing for backend compilers to be
 61 *    smoke-tested without hardware testing and hence allowing regressions to be
 62 *    caught sooner.
 63 *
 64 * At a high level, a library of kernels is compiled to SPIR-V. That SPIR-V is
 65 * then translated to NIR and optimized, leaving many entrypoints. Each NIR
 66 * entrypoint represents one `kernel` to be precompiled.
 67 *
 68 * Kernels generally have arguments. Arguments may be either scalars or
 69 * pointers. It is not necessary to explicitly define a data layout for the
 70 * arguments. You simply declare arguments to the OpenCL side kernel:
 71 *
 72 *    KERNEL(1) void foo(int x, int y) { .. }
 73 *
 74 * The data layout is automatically derived from the function signature
 75 * (nir_precomp_derive_layout). The data layout is exposed to the CPU as
 76 * structures (nir_precomp_print_layout_struct).
 77 *
 78 *    struct foo_args {
 79 *       uint32_t x;
 80 *       uint32_t y;
 81 *    } PACKED;
 82 *
 83 * The data is expected to be mapped to something like Vulkan push constants in
 84 * the hardware. The driver defines a callback to load an argument given a byte
 85 * offset (e.g. via load_push_constant intrinsics). When building a variant,
 86 * nir_precomp_build_variant will load the arguments according to the chosen
 87 * layout:
 88 *
 89 *    %0 = load_push_constant 0
 90 *    %1 = load_push_constant 4
 91 *    ...
 92 *
 93 * This ensures that data layouts match between CPU and GPU, without any
 94 * boilerplate, while giving drivers control over exactly how arguments are
 95 * passed. (This can save an indirection compared to stuffing in a UBO.)
 96 *
 97 * To dispatch kernels from the driver, the kernel is "called" like a function:
 98 *
 99 *    foo(cmdbuf, grid(4, 4, 1), x, y);
100 *
101 * This resolves to generated dispatch macros
102 * (nir_precomp_print_dispatch_macros), which lay out their arguments according
103 * to the derived layout and then call the driver-specific dispatch. To
104 * implement that mechanism, a driver must implement the following function
105 * signature:
106 *
107 *    MESA_DISPATCH_PRECOMP(context, grid, barrier, kernel index,
108 *                          argument pointer, size of arguments)
109 *
110 * The exact types used are determined by the driver. context is something like
111 * a Vulkan command buffer. grid represents the 3D dispatch size. barrier
112 * describes the synchronization and cache flushing required before and after
113 * the dispatch. kernel index is the index of the precompiled kernel
114 * (nir_precomp_index). argument pointer is a host pointer to the sized argument
115 * structure, which the driver must upload and bind (e.g. as push constants).
116 *
117 * Because the types are ambiguous here, the same mechanism works for both
118 * Gallium and Vulkan drivers.
119 *
120 * Although the generated header could be consumed by OpenCL code,
121 * MESA_DISPATCH_PRECOMP is not intended to be implemented on the device side.
122 * Instead, an analogous mechanism can be implemented for device-side enqueue
123 * with automatic data layout handling. Device-side enqueue of precompiled
124 * kernels has various applications, most obviously for implementing
125 * device-generated commands.
126 *
127 * All precompiled kernels for a given target are zero-indexed and referenced in
128 * an array of binaries. These indices are enum values, generated by
129 * nir_precomp_print_program_enum. The array of kernels is generated by
130 * nir_precomp_print_binary_map. There is generally an array for each hardware
131 * target supported by a driver. On device creation, the driver would select the
132 * array of binaries for the probed hardware.
133 *
134 * Sometimes a single binary can be used for multiple targets. In this case, the
135 * driver should compile it only once and remap the binary arrays with the
136 * callback passed to nir_precomp_print_binary_map.
137 *
138 * A single entrypoint may have multiple variants, as a small shader key. To
139 * support this, kernel parameters suffixed with __n will automatically vary
140 * from 0 to n - 1. This mechanism is controlled by
141 * nir_precomp_parse_variant_param. For example:
142 *
143 *    KERNEL(1) void bar(uchar *x, int variant__4) {
144 *       for (uint i = 0; i <= variant__4; ++i)
145 *          x[i]++;
146 *    }
147 *
148 * will generate 4 binaries with 1, 2, 3, and 4 additions respectively. This
149 * mechanism (sigil suffixing) is kinda ugly, but I can't figure out a nicer way
150 * to attach metadata to the argument in standard OpenCL.
151 *
152 * Internally, all variants of a given kernel have a flat index. The bijection
153 * between n variant parameters and 1 flat index is given in the
154 * nir_precomp_decode_variant_index comment.
155 *
156 * Kernels must declare their workgroup size with
157 * __attribute__((reqd_work_group_size(...))) for two reasons. First, variable
158 * workgroup sizes have tricky register allocation problems in several backends,
159 * avoided here. Second, it makes more sense to attach the workgroup size to the
160 * kernel than to the caller so this improves ergonomics of the dispatch macros.
161 */
162
163#define NIR_PRECOMP_MAX_ARGS (64)
164
165struct nir_precomp_opts {
166   /* If nonzero, minimum (power-of-two) alignment required for kernel
167    * arguments. Kernel arguments will be naturally aligned regardless, but this
168    * models a minimum alignment required by some hardware.
169    */
170   unsigned arg_align_B;
171};
172
173struct nir_precomp_layout {
174   unsigned size_B;
175   unsigned offset_B[NIR_PRECOMP_MAX_ARGS];
176   bool prepadded[NIR_PRECOMP_MAX_ARGS];
177};
178
179static inline unsigned
180nir_precomp_parse_variant_param(const nir_function *f, unsigned p)
181{
182   assert(p < f->num_params);
183
184   const char *token = "__";
185   const char *q = strstr(f->params[p].name, token);
186   if (q == NULL)
187      return 0;
188
189   int n = atoi(q + strlen(token));
190
191   /* Ensure the number is something reasonable */
192   assert(n > 1 && n < 32 && "sanity check");
193   return n;
194}
195
196static inline bool
197nir_precomp_is_variant_param(const nir_function *f, unsigned p)
198{
199   return nir_precomp_parse_variant_param(f, p) != 0;
200}
201
202#define nir_precomp_foreach_arg(f, p)           \
203   for (unsigned p = 0; p < f->num_params; ++p) \
204      if (!nir_precomp_is_variant_param(f, p))
205
206#define nir_precomp_foreach_variant_param(f, p) \
207   for (unsigned p = 0; p < f->num_params; ++p) \
208      if (nir_precomp_is_variant_param(f, p))
209
210static inline unsigned
211nir_precomp_nr_variants(const nir_function *f)
212{
213   unsigned nr = 1;
214
215   nir_precomp_foreach_variant_param(f, p) {
216      nr *= nir_precomp_parse_variant_param(f, p);
217   }
218
219   return nr;
220}
221
222static inline bool
223nir_precomp_has_variants(const nir_function *f)
224{
225   return nir_precomp_nr_variants(f) > 1;
226}
227
228static inline struct nir_precomp_layout
229nir_precomp_derive_layout(const struct nir_precomp_opts *opt,
230                          const nir_function *f)
231{
232   struct nir_precomp_layout l = { 0 };
233
234   nir_precomp_foreach_arg(f, a) {
235      nir_parameter param = f->params[a];
236      assert(a < ARRAY_SIZE(l.offset_B));
237
238      /* Align members naturally */
239      l.offset_B[a] = ALIGN_POT(l.size_B, param.bit_size / 8);
240
241      /* Align arguments to driver minimum */
242      if (opt->arg_align_B) {
243         l.offset_B[a] = ALIGN_POT(l.offset_B[a], opt->arg_align_B);
244      }
245
246      l.prepadded[a] = (l.offset_B[a] != l.size_B);
247      l.size_B = l.offset_B[a] + (param.num_components * param.bit_size) / 8;
248   }
249
250   return l;
251}
252
253static inline unsigned
254nir_precomp_index(const nir_shader *lib, const nir_function *func)
255{
256   unsigned i = 0;
257
258   nir_foreach_entrypoint(candidate, lib) {
259      if (candidate == func)
260         return i;
261
262      i += nir_precomp_nr_variants(candidate);
263   }
264
265   unreachable("function must be in library");
266}
267
268static inline void
269nir_print_uppercase(FILE *fp, const char *str)
270{
271   for (unsigned i = 0; i < strlen(str); ++i) {
272      fputc(toupper(str[i]), fp);
273   }
274}
275
276static inline void
277nir_precomp_print_enum_value(FILE *fp, const nir_function *func)
278{
279   nir_print_uppercase(fp, func->name);
280}
281
282static inline void
283nir_precomp_print_enum_variant_value(FILE *fp, const nir_function *func, unsigned v)
284{
285   nir_precomp_print_enum_value(fp, func);
286
287   if (nir_precomp_has_variants(func)) {
288      fprintf(fp, "_%u", v);
289   } else {
290      assert(v == 0);
291   }
292}
293
294static inline void
295nir_precomp_print_variant_params(FILE *fp, nir_function *func, bool with_types)
296{
297   if (nir_precomp_has_variants(func)) {
298      fprintf(fp, "(");
299
300      bool first = true;
301      nir_precomp_foreach_variant_param(func, p) {
302         fprintf(fp, "%s%s%s", first ? "" : ", ", with_types ? "unsigned " : "",
303                 func->params[p].name);
304         first = false;
305      }
306
307      fprintf(fp, ")");
308   }
309}
310
311/*
312 * Given a flattened 1D index, extract the i'th coordinate of the original N-D
313 * vector. The forward map is:
314 *
315 *    I = sum(t=1...n) [x_t product(j=1...(t-1)) [k_j]]
316 *
317 * It can be shown that
318 *
319 *    I < product_(j=1...n)[k_j]
320 *
321 *    x_i = floor(I / product(j=1...(i-1)) [k_j]) mod k_i
322 *
323 * The inequality is by induction on n. The equivalence follows from the
324 * inequality by splitting the sum of I at t=i, showing the smaller terms get
325 * killed by the floor and the higher terms get killed by the modulus leaving
326 * just x_i.
327 *
328 * The forward map is emitted in nir_precomp_print_program_enum. The inverse is
329 * calculated here.
330 */
331static inline unsigned
332nir_precomp_decode_variant_index(const nir_function *func, unsigned I,
333                                 unsigned i)
334{
335   unsigned product = 1;
336
337   nir_precomp_foreach_variant_param(func, j) {
338      if (j >= i)
339         break;
340
341      unsigned k_j = nir_precomp_parse_variant_param(func, j);
342      product *= k_j;
343   }
344
345   unsigned k_i = nir_precomp_parse_variant_param(func, i);
346   return (I / product) % k_i;
347}
348
349static inline void
350nir_precomp_print_program_enum(FILE *fp, const nir_shader *lib, const char *prefix)
351{
352   /* Generate an enum indexing all binaries */
353   fprintf(fp, "enum %s_program {\n", prefix);
354   nir_foreach_entrypoint(func, lib) {
355      unsigned index = nir_precomp_index(lib, func);
356
357      for (unsigned v = 0; v < nir_precomp_nr_variants(func); ++v) {
358         fprintf(fp, "    ");
359         nir_precomp_print_enum_variant_value(fp, func, v);
360         fprintf(fp, " = %u,\n", index + v);
361      }
362   }
363   fprintf(fp, "    ");
364   nir_print_uppercase(fp, prefix);
365   fprintf(fp, "_NUM_PROGRAMS,\n");
366   fprintf(fp, "};\n\n");
367
368   /* Generate indexing variants */
369   nir_foreach_entrypoint(func, lib) {
370      if (nir_precomp_has_variants(func)) {
371         fprintf(fp, "static inline unsigned\n");
372         nir_precomp_print_enum_value(fp, func);
373         nir_precomp_print_variant_params(fp, func, true);
374         fprintf(fp, "\n");
375         fprintf(fp, "{\n");
376
377         nir_precomp_foreach_variant_param(func, p) {
378            /* Assert indices are in bounds. These provides some safety. */
379            fprintf(fp, "   assert(%s < %u);\n", func->params[p].name,
380                    nir_precomp_parse_variant_param(func, p));
381         }
382
383         /* Flatten an N-D index into a 1D index using the standard mapping.
384          *
385          * We iterate parameters backwards so we can do a single multiply-add
386          * each step for simplicity (similar to Horner's method).
387          */
388         fprintf(fp, "\n");
389         bool first = true;
390         for (signed p = func->num_params - 1; p >= 0; --p) {
391            if (!nir_precomp_is_variant_param(func, p))
392               continue;
393
394            if (first) {
395               fprintf(fp, "   unsigned idx = %s;\n", func->params[p].name);
396            } else {
397               fprintf(fp, "   idx = (idx * %u) + %s;\n",
398                       nir_precomp_parse_variant_param(func, p),
399                       func->params[p].name);
400            }
401
402            first = false;
403         }
404
405         /* Post-condition: flattened index is in bounds. */
406         fprintf(fp, "\n");
407         fprintf(fp, "   assert(idx < %u);\n", nir_precomp_nr_variants(func));
408
409         fprintf(fp, "   return ");
410         nir_precomp_print_enum_variant_value(fp, func, 0);
411         fprintf(fp, " + idx;\n");
412         fprintf(fp, "}\n\n");
413      }
414   }
415   fprintf(fp, "\n");
416}
417
418static inline void
419nir_precomp_print_layout_struct(FILE *fp, const struct nir_precomp_opts *opt,
420                                const nir_function *func)
421{
422   struct nir_precomp_layout layout = nir_precomp_derive_layout(opt, func);
423
424   /* Generate a C struct matching the data layout we chose. This is how
425    * the CPU will pack arguments.
426    */
427   unsigned offset_B = 0;
428
429   fprintf(fp, "struct %s_args {\n", func->name);
430   nir_precomp_foreach_arg(func, a) {
431      nir_parameter param = func->params[a];
432      assert(param.name != NULL && "kernel args must be named");
433
434      assert(layout.offset_B[a] >= offset_B);
435      unsigned pad = layout.offset_B[a] - offset_B;
436      assert((pad > 0) == layout.prepadded[a]);
437
438      if (pad > 0) {
439         fprintf(fp, "   uint8_t _pad%u[%u];\n", a, pad);
440         offset_B += pad;
441      }
442
443      /* After padding, the layout will match. */
444      assert(layout.offset_B[a] == offset_B);
445
446      fprintf(fp, "   uint%u_t %s", param.bit_size, param.name);
447      if (param.num_components > 1) {
448         fprintf(fp, "[%u]", param.num_components);
449      }
450      fprintf(fp, ";\n");
451
452      offset_B += param.num_components * (param.bit_size / 8);
453   }
454   fprintf(fp, "} PACKED;\n\n");
455
456   /* Assert that the layout on the CPU matches the layout on the GPU. Because
457    * of the asserts above, these are mostly just sanity checking the compiler.
458    * But better err on the side of defensive because alignment bugs are REALLY
459    * painful to track down and we don't pay by the static assert.
460    */
461   nir_precomp_foreach_arg(func, a) {
462      nir_parameter param = func->params[a];
463
464      fprintf(fp, "static_assert(offsetof(struct %s_args, %s) == %u, \"\");\n",
465              func->name, param.name, layout.offset_B[a]);
466   }
467   fprintf(fp, "static_assert(sizeof(struct %s_args) == %u, \"\");\n",
468           func->name, layout.size_B);
469
470   fprintf(fp, "\n");
471}
472
473static inline void
474nir_precomp_print_dispatch_macros(FILE *fp, const struct nir_precomp_opts *opt,
475                                  const nir_shader *nir)
476{
477   nir_foreach_entrypoint(func, nir) {
478      struct nir_precomp_layout layout = nir_precomp_derive_layout(opt, func);
479
480      for (unsigned i = 0; i < 2; ++i) {
481         bool is_struct = i == 0;
482
483         fprintf(fp, "#define %s%s(_context, _grid, _barrier%s", func->name,
484                 is_struct ? "_struct" : "", is_struct ? ", _data" : "");
485
486         /* Add the arguments, including variant parameters. For struct macros,
487          * we include only the variant parameters; the kernel arguments are
488          * taken from the struct.
489          */
490         for (unsigned p = 0; p < func->num_params; ++p) {
491            if (!is_struct || nir_precomp_is_variant_param(func, p))
492               fprintf(fp, ", %s", func->params[p].name);
493         }
494
495         fprintf(fp, ") do { \\\n");
496
497         fprintf(fp, "   struct %s_args _args = ", func->name);
498
499         if (is_struct) {
500            fprintf(fp, "_data");
501         } else {
502            fprintf(fp, "{");
503
504            nir_precomp_foreach_arg(func, a) {
505               /* We need to zero out the padding between members. We cannot use
506                * a designated initializer without prefixing the macro
507                * arguments, which would add noise to the macro signature
508                * reported in IDEs (which should ideally match the actual
509                * signature as close as possible).
510                */
511               if (layout.prepadded[a]) {
512                  assert(a > 0 && "first argument is never prepadded");
513                  fprintf(fp, ", {0}");
514               }
515
516               fprintf(fp, "%s%s", a == 0 ? "" : ", ", func->params[a].name);
517            }
518
519            fprintf(fp, "}");
520         }
521
522         fprintf(fp, ";\\\n");
523
524         /* Dispatch via MESA_DISPATCH_PRECOMP, which the driver must #define
525          * suitably before #include-ing this file.
526          */
527         fprintf(fp, "   MESA_DISPATCH_PRECOMP(_context, _grid, _barrier, ");
528         nir_precomp_print_enum_value(fp, func);
529         nir_precomp_print_variant_params(fp, func, false);
530         fprintf(fp, ", &_args, sizeof(_args)); \\\n");
531         fprintf(fp, "} while(0);\n\n");
532      }
533   }
534   fprintf(fp, "\n");
535}
536
537static inline void
538nir_precomp_print_extern_binary_map(FILE *fp,
539                                    const char *prefix, const char *target)
540{
541   fprintf(fp, "extern const uint32_t *%s_%s[", prefix, target);
542   nir_print_uppercase(fp, prefix);
543   fprintf(fp, "_NUM_PROGRAMS];\n");
544}
545
546static inline void
547nir_precomp_print_binary_map(FILE *fp, const nir_shader *nir,
548                             const char *prefix, const char *target,
549                             const char *(*map)(nir_function *func,
550                                                unsigned variant,
551                                                const char *target))
552{
553   fprintf(fp, "const uint32_t *%s_%s[", prefix, target);
554   nir_print_uppercase(fp, prefix);
555   fprintf(fp, "_NUM_PROGRAMS] = {\n");
556
557   nir_foreach_entrypoint(func, nir) {
558      for (unsigned v = 0; v < nir_precomp_nr_variants(func); ++v) {
559         fprintf(fp, "    [");
560         nir_precomp_print_enum_variant_value(fp, func, v);
561         fprintf(fp, "] = %s_%u_%s,\n", func->name, v,
562                 map ? map(func, v, target) : target);
563      }
564   }
565
566   fprintf(fp, "};\n\n");
567}
568
569static inline nir_shader *
570nir_precompiled_build_variant(const nir_function *libfunc, unsigned variant,
571                              const nir_shader_compiler_options *opts,
572                              const struct nir_precomp_opts *precomp_opt,
573                              nir_def *(*load_arg)(nir_builder *b,
574                                                   unsigned num_components,
575                                                   unsigned bit_size,
576                                                   unsigned offset_B))
577{
578   bool has_variants = nir_precomp_has_variants(libfunc);
579   struct nir_precomp_layout layout =
580      nir_precomp_derive_layout(precomp_opt, libfunc);
581
582   nir_builder b;
583   if (has_variants) {
584      b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, opts,
585                                         "%s variant %u", libfunc->name,
586                                         variant);
587   } else {
588      b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, opts, "%s",
589                                         libfunc->name);
590   }
591
592   assert(libfunc->workgroup_size[0] != 0 && "must set workgroup size");
593
594   b.shader->info.workgroup_size[0] = libfunc->workgroup_size[0];
595   b.shader->info.workgroup_size[1] = libfunc->workgroup_size[1];
596   b.shader->info.workgroup_size[2] = libfunc->workgroup_size[2];
597
598   nir_function *func = nir_function_clone(b.shader, libfunc);
599   func->is_entrypoint = false;
600
601   nir_def *args[NIR_PRECOMP_MAX_ARGS] = { NULL };
602
603   /* Some parameters are variant indices and others are kernel arguments */
604   for (unsigned a = 0; a < libfunc->num_params; ++a) {
605      nir_parameter p = func->params[a];
606
607      if (nir_precomp_is_variant_param(libfunc, a)) {
608         unsigned idx = nir_precomp_decode_variant_index(libfunc, variant, a);
609         args[a] = nir_imm_intN_t(&b, idx, p.bit_size);
610      } else {
611         args[a] = load_arg(&b, p.num_components, p.bit_size, layout.offset_B[a]);
612      }
613   }
614
615   nir_build_call(&b, func, func->num_params, args);
616   return b.shader;
617}
618
619static inline void
620nir_precomp_print_blob(FILE *fp, const char *arr_name, const char *suffix,
621                       uint32_t variant, const uint32_t *data, size_t len, bool is_static)
622{
623   fprintf(fp, "%sconst uint32_t %s_%u_%s[%zu] = {", is_static ? "static " : "", arr_name, variant, suffix,
624           DIV_ROUND_UP(len, 4));
625   for (unsigned i = 0; i < (len / 4); i++) {
626      if (i % 4 == 0)
627         fprintf(fp, "\n   ");
628
629      fprintf(fp, " 0x%08" PRIx32 ",", data[i]);
630   }
631
632   if (len % 4) {
633      const uint8_t *data_u8 = (const uint8_t *)data;
634      uint32_t last = 0;
635      unsigned last_offs = ROUND_DOWN_TO(len, 4);
636      for (unsigned i = 0; i < len % 4; ++i) {
637         last |= (uint32_t)data_u8[last_offs + i] << (i * 8);
638      }
639
640      fprintf(fp, " 0x%08" PRIx32 ",", last);
641   }
642
643   fprintf(fp, "\n};\n");
644}
645
646static inline void
647nir_precomp_print_nir(FILE *fp_c, FILE *fp_h, const nir_shader *nir,
648                      const char *name, const char *suffix)
649{
650   struct blob blob;
651   blob_init(&blob);
652   nir_serialize(&blob, nir, true /* strip */);
653
654   nir_precomp_print_blob(fp_c, name, suffix, 0, (const uint32_t *)blob.data,
655                          blob.size, false);
656
657   fprintf(fp_h, "extern const uint32_t %s_0_%s[%zu];\n", name, suffix,
658           DIV_ROUND_UP(blob.size, 4));
659
660   blob_finish(&blob);
661}
662
663static inline void
664nir_precomp_print_header(FILE *fp_c, FILE *fp_h, const char *copyright,
665                         const char *h_name)
666{
667   for (unsigned i = 0; i < 2; ++i) {
668      FILE *fp = i ? fp_c : fp_h;
669      fprintf(fp, "/*\n");
670      fprintf(fp, " * Copyright %s\n", copyright);
671      fprintf(fp, " * SPDX-License-Identifier: MIT\n");
672      fprintf(fp, " *\n");
673      fprintf(fp, " * Autogenerated file, do not edit\n");
674      fprintf(fp, " */\n\n");
675
676      /* uint32_t types are used throughout */
677      fprintf(fp, "#include <stdint.h>\n\n");
678   }
679
680   /* The generated C code depends on the header we will generate */
681   fprintf(fp_c, "#include \"%s\"\n", h_name);
682
683   /* Include guard the header. This relies on a grown up compiler. If you're
684    * doing precompiled, you have one.
685    */
686   fprintf(fp_h, "#pragma once\n");
687
688   /* The generated header uses unprefixed static_assert which needs an #include
689    * seemingly.
690    */
691   fprintf(fp_h, "#include \"util/macros.h\"\n\n");
692}
Configure Feed

Configure Feed