lib/mesa/src/compiler/nir/nir_opt_phi_precision.c at jcs

jcs.org / openbsd-xenocara
fork
mirror of OpenBSD xenocara tree github.com/openbsd/xenocara
openbsd
fork
openbsd-xenocara / lib / mesa / src / compiler / nir / nir_opt_phi_precision.c
at jcs 470 lines 14 kB view raw
wrap content
jsg Import Mesa 25.0.7 11mo ago
67d6f117
  1/*
  2 * Copyright © 2021 Google, Inc.
  3 *
  4 * Permission is hereby granted, free of charge, to any person obtaining a
  5 * copy of this software and associated documentation files (the "Software"),
  6 * to deal in the Software without restriction, including without limitation
  7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8 * and/or sell copies of the Software, and to permit persons to whom the
  9 * Software is furnished to do so, subject to the following conditions:
 10 *
 11 * The above copyright notice and this permission notice (including the next
 12 * paragraph) shall be included in all copies or substantial portions of the
 13 * Software.
 14 *
 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 21 * IN THE SOFTWARE.
 22 */
 23
 24#include "nir.h"
 25#include "nir_builder.h"
 26
 27/*
 28 * This pass tries to reduce the bitsize of phi instructions by either
 29 * moving narrowing conversions from the phi's consumers to the phi's
 30 * sources, if all the uses of the phi are equivalent narrowing
 31 * instructions.  In other words, convert:
 32 *
 33 *    vec1 32 ssa_124 = load_const (0x00000000)
 34 *    ...
 35 *    loop {
 36 *        ...
 37 *        vec1 32 ssa_155 = phi block_0: ssa_124, block_4: ssa_53
 38 *        vec1 16 ssa_8 = i2imp ssa_155
 39 *        ...
 40 *        vec1 32 ssa_53 = i2i32 ssa_52
 41 *    }
 42 *
 43 * into:
 44 *
 45 *    vec1 32 ssa_124 = load_const (0x00000000)
 46 *    vec1 16 ssa_156 = i2imp ssa_124
 47 *    ...
 48 *    loop {
 49 *        ...
 50 *        vec1 16 ssa_8 = phi block_0: ssa_156, block_4: ssa_157
 51 *        ...
 52 *        vec1 32 ssa_53 = i2i32 ssa_52
 53 *        vec1 16 ssa_157 = i2i16 ssa_53
 54 *    }
 55 *
 56 * Or failing that, tries to push widening conversion of phi srcs to
 57 * the phi def.  In this case, since load_const is frequently one
 58 * of the phi sources this pass checks if can be narrowed without a
 59 * loss of precision:
 60 *
 61 *    vec1 32 ssa_0 = load_const (0x00000000)
 62 *    ...
 63 *    loop {
 64 *        ...
 65 *        vec1 32 ssa_8 = phi block_0: ssa_0, block_4: ssa_19
 66 *        ...
 67 *        vec1 16 ssa_18 = iadd ssa_21, ssa_3
 68 *        vec1 32 ssa_19 = i2i32 ssa_18
 69 *    }
 70 *
 71 * into:
 72 *
 73 *    vec1 32 ssa_0 = load_const (0x00000000)
 74 *    vec1 16 ssa_22 = i2i16 ssa_0
 75 *    ...
 76 *    loop {
 77 *        ...
 78 *        vec1 16 ssa_8 = phi block_0: ssa_22, block_4: ssa_18
 79 *        vec1 32 ssa_23 = i2i32 ssa_8
 80 *        ...
 81 *        vec1 16 ssa_18 = iadd ssa_21, ssa_3
 82 *    }
 83 *
 84 * Note that either transformations can convert x2ymp  into x2y16, which
 85 * is normally done later in nir_opt_algebraic_late(), losing the option
 86 * to fold away sequences like (i2i32 (i2imp (x))), but algebraic opts
 87 * cannot see through phis.
 88 */
 89
 90#define INVALID_OP nir_num_opcodes
 91
 92/**
 93 * Get the corresponding exact conversion for a x2ymp conversion
 94 */
 95static nir_op
 96concrete_conversion(nir_op op)
 97{
 98   switch (op) {
 99   case nir_op_i2imp:
100      return nir_op_i2i16;
101   case nir_op_i2fmp:
102      return nir_op_i2f16;
103   case nir_op_u2fmp:
104      return nir_op_u2f16;
105   case nir_op_f2fmp:
106      return nir_op_f2f16;
107   case nir_op_f2imp:
108      return nir_op_f2i16;
109   case nir_op_f2ump:
110      return nir_op_f2u16;
111   default:
112      return op;
113   }
114}
115
116static nir_op
117narrowing_conversion_op(nir_instr *instr, nir_op current_op)
118{
119   if (instr->type != nir_instr_type_alu)
120      return INVALID_OP;
121
122   nir_op op = nir_instr_as_alu(instr)->op;
123   switch (op) {
124   case nir_op_i2imp:
125   case nir_op_i2i16:
126   case nir_op_i2fmp:
127   case nir_op_i2f16:
128   case nir_op_u2fmp:
129   case nir_op_u2f16:
130   case nir_op_f2fmp:
131   case nir_op_f2f16:
132   case nir_op_f2imp:
133   case nir_op_f2i16:
134   case nir_op_f2ump:
135   case nir_op_f2u16:
136   case nir_op_f2f16_rtne:
137   case nir_op_f2f16_rtz:
138      break;
139   default:
140      return INVALID_OP;
141   }
142
143   /* If we've already picked a conversion op from a previous phi use,
144    * make sure it is compatible with the current use
145    */
146   if (current_op != INVALID_OP) {
147      if (current_op != op) {
148         /* If we have different conversions, but one can be converted
149          * to the other, then let's do that:
150          */
151         if (concrete_conversion(current_op) == concrete_conversion(op)) {
152            op = concrete_conversion(op);
153         } else {
154            return INVALID_OP;
155         }
156      }
157   }
158
159   return op;
160}
161
162static nir_op
163widening_conversion_op(nir_instr *instr, unsigned *bit_size)
164{
165   if (instr->type != nir_instr_type_alu)
166      return INVALID_OP;
167
168   nir_alu_instr *alu = nir_instr_as_alu(instr);
169   switch (alu->op) {
170   case nir_op_i2i32:
171   case nir_op_i2f32:
172   case nir_op_u2f32:
173   case nir_op_f2f32:
174   case nir_op_f2i32:
175   case nir_op_f2u32:
176      break;
177   default:
178      return INVALID_OP;
179   }
180
181   *bit_size = nir_src_bit_size(alu->src[0].src);
182
183   /* We also need to check that the conversion's dest was actually
184    * wider:
185    */
186   if (alu->def.bit_size <= *bit_size)
187      return INVALID_OP;
188
189   return alu->op;
190}
191
192static nir_alu_type
193op_to_type(nir_op op)
194{
195   return nir_alu_type_get_base_type(nir_op_infos[op].output_type);
196}
197
198/* Try to move narrowing instructions consuming the phi into the phi's
199 * sources to reduce the phi's precision:
200 */
201static bool
202try_move_narrowing_dst(nir_builder *b, nir_phi_instr *phi)
203{
204   nir_op op = INVALID_OP;
205
206   /* If the phi has already been narrowed, nothing more to do: */
207   if (phi->def.bit_size != 32)
208      return false;
209
210   /* Are the only uses of the phi conversion instructions, and
211    * are they all the same conversion?
212    */
213   nir_foreach_use_including_if(use, &phi->def) {
214      /* an if use means the phi is used directly in a conditional, ie.
215       * without a conversion
216       */
217      if (nir_src_is_if(use))
218         return false;
219
220      op = narrowing_conversion_op(nir_src_parent_instr(use), op);
221
222      /* Not a (compatible) narrowing conversion: */
223      if (op == INVALID_OP)
224         return false;
225   }
226
227   /* If the phi has no uses, then nothing to do: */
228   if (op == INVALID_OP)
229      return false;
230
231   /* construct replacement phi instruction: */
232   nir_phi_instr *new_phi = nir_phi_instr_create(b->shader);
233   nir_def_init(&new_phi->instr, &new_phi->def,
234                phi->def.num_components,
235                nir_alu_type_get_type_size(nir_op_infos[op].output_type));
236
237   /* Push the conversion into the new phi sources: */
238   nir_foreach_phi_src(src, phi) {
239      /* insert new conversion instr in block of original phi src: */
240      b->cursor = nir_after_instr_and_phis(src->src.ssa->parent_instr);
241      nir_def *old_src = src->src.ssa;
242      nir_def *new_src = nir_build_alu(b, op, old_src, NULL, NULL, NULL);
243
244      /* and add corresponding phi_src to the new_phi: */
245      nir_phi_instr_add_src(new_phi, src->pred, new_src);
246   }
247
248   /* And finally rewrite the original uses of the original phi uses to
249    * directly use the new phi, skipping the conversion out of the orig
250    * phi
251    */
252   nir_foreach_use(use, &phi->def) {
253      /* We've previously established that all the uses were alu
254       * conversion ops.  Turn them into movs instead.
255       */
256      nir_alu_instr *alu = nir_instr_as_alu(nir_src_parent_instr(use));
257      alu->op = nir_op_mov;
258   }
259   nir_def_rewrite_uses(&phi->def, &new_phi->def);
260
261   /* And finally insert the new phi after all sources are in place: */
262   b->cursor = nir_after_instr(&phi->instr);
263   nir_builder_instr_insert(b, &new_phi->instr);
264
265   return true;
266}
267
268static bool
269can_convert_load_const(nir_load_const_instr *lc, nir_op op)
270{
271   nir_alu_type type = op_to_type(op);
272
273   /* Note that we only handle phi's with bit_size == 32: */
274   assert(lc->def.bit_size == 32);
275
276   for (unsigned i = 0; i < lc->def.num_components; i++) {
277      switch (type) {
278      case nir_type_int:
279         if (lc->value[i].i32 != (int32_t)(int16_t)lc->value[i].i32)
280            return false;
281         break;
282      case nir_type_uint:
283         if (lc->value[i].u32 != (uint32_t)(uint16_t)lc->value[i].u32)
284            return false;
285         break;
286      case nir_type_float:
287         if (lc->value[i].f32 != _mesa_half_to_float(
288                                    _mesa_float_to_half(lc->value[i].f32)))
289            return false;
290         break;
291      default:
292         unreachable("bad type");
293         return false;
294      }
295   }
296
297   return true;
298}
299
300/* Check all the phi sources to see if they are the same widening op, in
301 * which case we can push the widening op to the other side of the phi
302 */
303static nir_op
304find_widening_op(nir_phi_instr *phi, unsigned *bit_size)
305{
306   nir_op op = INVALID_OP;
307
308   bool has_load_const = false;
309   *bit_size = 0;
310
311   nir_foreach_phi_src(src, phi) {
312      nir_instr *instr = src->src.ssa->parent_instr;
313      if (instr->type == nir_instr_type_load_const) {
314         has_load_const = true;
315         continue;
316      }
317
318      unsigned src_bit_size;
319      nir_op src_op = widening_conversion_op(instr, &src_bit_size);
320
321      /* Not a widening conversion: */
322      if (src_op == INVALID_OP)
323         return INVALID_OP;
324
325      /* If it is a widening conversion, it needs to be the same op as
326       * other phi sources:
327       */
328      if ((op != INVALID_OP) && (op != src_op))
329         return INVALID_OP;
330
331      if (*bit_size && (*bit_size != src_bit_size))
332         return INVALID_OP;
333
334      op = src_op;
335      *bit_size = src_bit_size;
336   }
337
338   if ((op == INVALID_OP) || !has_load_const)
339      return op;
340
341   /* If we could otherwise move widening sources, but load_const is
342    * one of the phi sources (and does not have a widening conversion,
343    * but could have a narrowing->widening sequence inserted without
344    * loss of precision), then we could insert a narrowing->widening
345    * sequence to make the rest of the transformation possible:
346    */
347   nir_foreach_phi_src(src, phi) {
348      nir_instr *instr = src->src.ssa->parent_instr;
349      if (instr->type != nir_instr_type_load_const)
350         continue;
351
352      if (!can_convert_load_const(nir_instr_as_load_const(instr), op))
353         return INVALID_OP;
354   }
355
356   return op;
357}
358
359/* Try to move widening conversions into the phi to the phi's output
360 * to reduce the phi's precision:
361 */
362static bool
363try_move_widening_src(nir_builder *b, nir_phi_instr *phi)
364{
365   /* If the phi has already been narrowed, nothing more to do: */
366   if (phi->def.bit_size != 32)
367      return false;
368
369   unsigned bit_size;
370   nir_op op = find_widening_op(phi, &bit_size);
371
372   if (op == INVALID_OP)
373      return false;
374
375   /* construct replacement phi instruction: */
376   nir_phi_instr *new_phi = nir_phi_instr_create(b->shader);
377   nir_def_init(&new_phi->instr, &new_phi->def,
378                phi->def.num_components, bit_size);
379
380   /* Remove the widening conversions from the phi sources: */
381   nir_foreach_phi_src(src, phi) {
382      nir_instr *instr = src->src.ssa->parent_instr;
383      nir_def *new_src;
384
385      b->cursor = nir_after_instr(instr);
386
387      if (instr->type == nir_instr_type_load_const) {
388         /* if the src is a load_const, we've already verified that it
389          * is safe to insert a narrowing conversion to make the rest
390          * of this transformation legal:
391          */
392         nir_load_const_instr *lc = nir_instr_as_load_const(instr);
393
394         if (op_to_type(op) == nir_type_float) {
395            new_src = nir_f2f16(b, &lc->def);
396         } else {
397            new_src = nir_i2i16(b, &lc->def);
398         }
399      } else {
400         /* at this point we know the sources source is a conversion: */
401         nir_alu_instr *alu = nir_instr_as_alu(instr);
402
403         /* The conversion we are stripping off could have had a swizzle,
404          * so replace it with a mov if necessary:
405          */
406         unsigned num_comp = alu->def.num_components;
407         new_src = nir_mov_alu(b, alu->src[0], num_comp);
408      }
409
410      /* add corresponding phi_src to the new_phi: */
411      nir_phi_instr_add_src(new_phi, src->pred, new_src);
412   }
413
414   /* And insert the new phi after all sources are in place: */
415   b->cursor = nir_after_instr(&phi->instr);
416   nir_builder_instr_insert(b, &new_phi->instr);
417
418   /* And finally add back the widening conversion after the phi,
419    * and re-write the original phi's uses
420    */
421   b->cursor = nir_after_instr_and_phis(&new_phi->instr);
422   nir_def *def = nir_build_alu(b, op, &new_phi->def, NULL, NULL, NULL);
423
424   nir_def_rewrite_uses(&phi->def, def);
425
426   return true;
427}
428
429static bool
430lower_phi(nir_builder *b, nir_phi_instr *phi)
431{
432   bool progress = try_move_narrowing_dst(b, phi);
433   if (!progress)
434      progress = try_move_widening_src(b, phi);
435   return progress;
436}
437
438bool
439nir_opt_phi_precision(nir_shader *shader)
440{
441   bool progress = false;
442
443   /* If 8b or 16b bit_sizes are not used, no point to run this pass: */
444   unsigned bit_sizes_used = shader->info.bit_sizes_float |
445                             shader->info.bit_sizes_int;
446
447   /* Note: if the info is zeroed, we conservatively run to avoid gathering
448    * info, which doesn't work for libraries.
449    */
450   if (bit_sizes_used && !(bit_sizes_used & (8 | 16)))
451      return false;
452
453   nir_foreach_function_impl(impl, shader) {
454      nir_builder b = nir_builder_create(impl);
455
456      nir_foreach_block(block, impl) {
457         nir_foreach_phi_safe(phi, block)
458            progress |= lower_phi(&b, phi);
459      }
460
461      if (progress) {
462         nir_metadata_preserve(impl,
463                               nir_metadata_control_flow);
464      } else {
465         nir_metadata_preserve(impl, nir_metadata_all);
466      }
467   }
468
469   return progress;
470}
Configure Feed

Configure Feed