lib/mesa/src/compiler/nir/nir_conversion_builder.h at jcs

jcs.org / openbsd-xenocara
fork
mirror of OpenBSD xenocara tree github.com/openbsd/xenocara
openbsd
fork
openbsd-xenocara / lib / mesa / src / compiler / nir / nir_conversion_builder.h
at jcs 524 lines 18 kB view raw
wrap content
jsg Import Mesa 25.0.7 11mo ago
67d6f117
  1/*
  2 * Copyright © 2020 Collabora Ltd.
  3 *
  4 * Permission is hereby granted, free of charge, to any person obtaining a
  5 * copy of this software and associated documentation files (the "Software"),
  6 * to deal in the Software without restriction, including without limitation
  7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8 * and/or sell copies of the Software, and to permit persons to whom the
  9 * Software is furnished to do so, subject to the following conditions:
 10 *
 11 * The above copyright notice and this permission notice (including the next
 12 * paragraph) shall be included in all copies or substantial portions of the
 13 * Software.
 14 *
 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 21 * IN THE SOFTWARE.
 22 */
 23
 24#ifndef NIR_CONVERSION_BUILDER_H
 25#define NIR_CONVERSION_BUILDER_H
 26
 27#include "util/u_math.h"
 28#include "nir_builder.h"
 29#include "nir_builtin_builder.h"
 30
 31#ifdef __cplusplus
 32extern "C" {
 33#endif
 34
 35static inline nir_def *
 36nir_round_float_to_int(nir_builder *b, nir_def *src,
 37                       nir_rounding_mode round)
 38{
 39   switch (round) {
 40   case nir_rounding_mode_ru:
 41      return nir_fceil(b, src);
 42
 43   case nir_rounding_mode_rd:
 44      return nir_ffloor(b, src);
 45
 46   case nir_rounding_mode_rtne:
 47      return nir_fround_even(b, src);
 48
 49   case nir_rounding_mode_undef:
 50   case nir_rounding_mode_rtz:
 51      break;
 52   }
 53   unreachable("unexpected rounding mode");
 54}
 55
 56static inline nir_def *
 57nir_round_float_to_float(nir_builder *b, nir_def *src,
 58                         unsigned dest_bit_size,
 59                         nir_rounding_mode round)
 60{
 61   unsigned src_bit_size = src->bit_size;
 62   if (dest_bit_size > src_bit_size)
 63      return src; /* No rounding is needed for an up-convert */
 64
 65   nir_op low_conv = nir_type_conversion_op(nir_type_float | src_bit_size,
 66                                            nir_type_float | dest_bit_size,
 67                                            nir_rounding_mode_undef);
 68   nir_op high_conv = nir_type_conversion_op(nir_type_float | dest_bit_size,
 69                                             nir_type_float | src_bit_size,
 70                                             nir_rounding_mode_undef);
 71
 72   switch (round) {
 73   case nir_rounding_mode_ru: {
 74      /* If lower-precision conversion results in a lower value, push it
 75       * up one ULP. */
 76      nir_def *lower_prec =
 77         nir_build_alu(b, low_conv, src, NULL, NULL, NULL);
 78      nir_def *roundtrip =
 79         nir_build_alu(b, high_conv, lower_prec, NULL, NULL, NULL);
 80      nir_def *cmp = nir_flt(b, roundtrip, src);
 81      nir_def *inf = nir_imm_floatN_t(b, INFINITY, dest_bit_size);
 82      return nir_bcsel(b, cmp, nir_nextafter(b, lower_prec, inf), lower_prec);
 83   }
 84   case nir_rounding_mode_rd: {
 85      /* If lower-precision conversion results in a higher value, push it
 86       * down one ULP. */
 87      nir_def *lower_prec =
 88         nir_build_alu(b, low_conv, src, NULL, NULL, NULL);
 89      nir_def *roundtrip =
 90         nir_build_alu(b, high_conv, lower_prec, NULL, NULL, NULL);
 91      nir_def *cmp = nir_flt(b, src, roundtrip);
 92      nir_def *neg_inf = nir_imm_floatN_t(b, -INFINITY, dest_bit_size);
 93      return nir_bcsel(b, cmp, nir_nextafter(b, lower_prec, neg_inf), lower_prec);
 94   }
 95   case nir_rounding_mode_rtz:
 96      return nir_bcsel(b, nir_flt_imm(b, src, 1),
 97                       nir_round_float_to_float(b, src, dest_bit_size,
 98                                                nir_rounding_mode_ru),
 99                       nir_round_float_to_float(b, src, dest_bit_size,
100                                                nir_rounding_mode_rd));
101   case nir_rounding_mode_rtne:
102   case nir_rounding_mode_undef:
103      break;
104   }
105   unreachable("unexpected rounding mode");
106}
107
108static inline nir_def *
109nir_round_int_to_float(nir_builder *b, nir_def *src,
110                       nir_alu_type src_type,
111                       unsigned dest_bit_size,
112                       nir_rounding_mode round)
113{
114   /* We only care whether or not its signed */
115   src_type = nir_alu_type_get_base_type(src_type);
116
117   unsigned mantissa_bits;
118   switch (dest_bit_size) {
119   case 16:
120      mantissa_bits = 10;
121      break;
122   case 32:
123      mantissa_bits = 23;
124      break;
125   case 64:
126      mantissa_bits = 52;
127      break;
128   default:
129      unreachable("Unsupported bit size");
130   }
131
132   if (src->bit_size < mantissa_bits)
133      return src;
134
135   if (src_type == nir_type_int) {
136      nir_def *sign =
137         nir_i2b(b, nir_ishr(b, src, nir_imm_int(b, src->bit_size - 1)));
138      nir_def *abs = nir_iabs(b, src);
139      nir_def *positive_rounded =
140         nir_round_int_to_float(b, abs, nir_type_uint, dest_bit_size, round);
141      nir_def *max_positive =
142         nir_imm_intN_t(b, (1ull << (src->bit_size - 1)) - 1, src->bit_size);
143      switch (round) {
144      case nir_rounding_mode_rtz:
145         return nir_bcsel(b, sign, nir_ineg(b, positive_rounded),
146                          positive_rounded);
147         break;
148      case nir_rounding_mode_ru:
149         return nir_bcsel(b, sign,
150                          nir_ineg(b, nir_round_int_to_float(b, abs, nir_type_uint, dest_bit_size, nir_rounding_mode_rd)),
151                          nir_umin(b, positive_rounded, max_positive));
152         break;
153      case nir_rounding_mode_rd:
154         return nir_bcsel(b, sign,
155                          nir_ineg(b,
156                                   nir_umin(b, max_positive,
157                                            nir_round_int_to_float(b, abs, nir_type_uint, dest_bit_size, nir_rounding_mode_ru))),
158                          positive_rounded);
159      case nir_rounding_mode_rtne:
160      case nir_rounding_mode_undef:
161         break;
162      }
163      unreachable("unexpected rounding mode");
164   } else {
165      nir_def *mantissa_bit_size = nir_imm_int(b, mantissa_bits);
166      nir_def *msb = nir_imax(b, nir_ufind_msb(b, src), mantissa_bit_size);
167      nir_def *bits_to_lose = nir_isub(b, msb, mantissa_bit_size);
168      nir_def *one = nir_imm_intN_t(b, 1, src->bit_size);
169      nir_def *adjust = nir_ishl(b, one, bits_to_lose);
170      nir_def *mask = nir_inot(b, nir_isub(b, adjust, one));
171      nir_def *truncated = nir_iand(b, src, mask);
172      switch (round) {
173      case nir_rounding_mode_rtz:
174      case nir_rounding_mode_rd:
175         return truncated;
176         break;
177      case nir_rounding_mode_ru:
178         return nir_bcsel(b, nir_ieq(b, src, truncated),
179                          src, nir_uadd_sat(b, truncated, adjust));
180      case nir_rounding_mode_rtne:
181      case nir_rounding_mode_undef:
182         break;
183      }
184      unreachable("unexpected rounding mode");
185   }
186}
187
188/** Returns true if the representable range of a contains the representable
189 * range of b.
190 */
191static inline bool
192nir_alu_type_range_contains_type_range(nir_alu_type a, nir_alu_type b)
193{
194   /* Split types from bit sizes */
195   nir_alu_type a_base_type = nir_alu_type_get_base_type(a);
196   nir_alu_type b_base_type = nir_alu_type_get_base_type(b);
197   unsigned a_bit_size = nir_alu_type_get_type_size(a);
198   unsigned b_bit_size = nir_alu_type_get_type_size(b);
199
200   /* This requires sized types */
201   assert(a_bit_size > 0 && b_bit_size > 0);
202
203   if (a_base_type == b_base_type && a_bit_size >= b_bit_size)
204      return true;
205
206   if (a_base_type == nir_type_int && b_base_type == nir_type_uint &&
207       a_bit_size > b_bit_size)
208      return true;
209
210   /* 16-bit floats fit in 32-bit integers */
211   if (a_base_type == nir_type_int && a_bit_size >= 32 &&
212       b == nir_type_float16)
213      return true;
214
215   /* All signed or unsigned ints can fit in float or above. A uint8 can fit
216    * in a float16.
217    */
218   if (a_base_type == nir_type_float && b_base_type != nir_type_float &&
219       (a_bit_size >= 32 || b_bit_size == 8))
220      return true;
221
222   return false;
223}
224
225/**
226 * Retrieves limits used for clamping a value of the src type into
227 * the widest representable range of the dst type via cmp + bcsel
228 */
229static inline void
230nir_get_clamp_limits(nir_builder *b,
231                     nir_alu_type src_type,
232                     nir_alu_type dest_type,
233                     nir_def **low, nir_def **high)
234{
235   /* Split types from bit sizes */
236   nir_alu_type src_base_type = nir_alu_type_get_base_type(src_type);
237   nir_alu_type dest_base_type = nir_alu_type_get_base_type(dest_type);
238   unsigned src_bit_size = nir_alu_type_get_type_size(src_type);
239   unsigned dest_bit_size = nir_alu_type_get_type_size(dest_type);
240   assert(dest_bit_size != 0 && src_bit_size != 0);
241
242   *low = NULL;
243   *high = NULL;
244
245   /* limits of the destination type, expressed in the source type */
246   switch (dest_base_type) {
247   case nir_type_int: {
248      int64_t ilow, ihigh;
249      if (dest_bit_size == 64) {
250         ilow = INT64_MIN;
251         ihigh = INT64_MAX;
252      } else {
253         ilow = -(1ll << (dest_bit_size - 1));
254         ihigh = (1ll << (dest_bit_size - 1)) - 1;
255      }
256
257      if (src_base_type == nir_type_int) {
258         *low = nir_imm_intN_t(b, ilow, src_bit_size);
259         *high = nir_imm_intN_t(b, ihigh, src_bit_size);
260      } else if (src_base_type == nir_type_uint) {
261         assert(src_bit_size >= dest_bit_size);
262         *high = nir_imm_intN_t(b, ihigh, src_bit_size);
263      } else {
264         *low = nir_imm_floatN_t(b, ilow, src_bit_size);
265         *high = nir_imm_floatN_t(b, ihigh, src_bit_size);
266      }
267      break;
268   }
269   case nir_type_uint: {
270      uint64_t uhigh = dest_bit_size == 64 ? ~0ull : (1ull << dest_bit_size) - 1;
271      if (src_base_type != nir_type_float) {
272         /* for uint->uint conversions, no need to clamp negatives */
273         if (src_base_type != nir_type_uint)
274            *low = nir_imm_intN_t(b, 0, src_bit_size);
275
276         if (src_base_type == nir_type_uint || src_bit_size > dest_bit_size)
277            *high = nir_imm_intN_t(b, uhigh, src_bit_size);
278      } else {
279         *low = nir_imm_floatN_t(b, 0.0f, src_bit_size);
280         *high = nir_imm_floatN_t(b, uhigh, src_bit_size);
281      }
282      break;
283   }
284   case nir_type_float: {
285      double flow, fhigh;
286      switch (dest_bit_size) {
287      case 16:
288         flow = -65504.0f;
289         fhigh = 65504.0f;
290         break;
291      case 32:
292         flow = -FLT_MAX;
293         fhigh = FLT_MAX;
294         break;
295      case 64:
296         flow = -DBL_MAX;
297         fhigh = DBL_MAX;
298         break;
299      default:
300         unreachable("Unhandled bit size");
301      }
302
303      switch (src_base_type) {
304      case nir_type_int: {
305         int64_t src_ilow, src_ihigh;
306         if (src_bit_size == 64) {
307            src_ilow = INT64_MIN;
308            src_ihigh = INT64_MAX;
309         } else {
310            src_ilow = -(1ll << (src_bit_size - 1));
311            src_ihigh = (1ll << (src_bit_size - 1)) - 1;
312         }
313         if (src_ilow < flow)
314            *low = nir_imm_intN_t(b, flow, src_bit_size);
315         if (src_ihigh > fhigh)
316            *high = nir_imm_intN_t(b, fhigh, src_bit_size);
317         break;
318      }
319      case nir_type_uint: {
320         uint64_t src_uhigh = src_bit_size == 64 ? ~0ull : (1ull << src_bit_size) - 1;
321         if (src_uhigh > fhigh)
322            *high = nir_imm_intN_t(b, fhigh, src_bit_size);
323         break;
324      }
325      case nir_type_float:
326         *low = nir_imm_floatN_t(b, flow, src_bit_size);
327         *high = nir_imm_floatN_t(b, fhigh, src_bit_size);
328         break;
329      default:
330         unreachable("Clamping from unknown type");
331      }
332      break;
333   }
334   default:
335      unreachable("clamping to unknown type");
336      break;
337   }
338}
339
340/**
341 * Clamp the value into the widest representatble range of the
342 * destination type with cmp + bcsel.
343 *
344 * val/val_type: The variables used for bcsel
345 * src/src_type: The variables used for comparison
346 * dest_type: The type which determines the range used for comparison
347 */
348static inline nir_def *
349nir_clamp_to_type_range(nir_builder *b,
350                        nir_def *val, nir_alu_type val_type,
351                        nir_def *src, nir_alu_type src_type,
352                        nir_alu_type dest_type)
353{
354   assert(nir_alu_type_get_type_size(src_type) == 0 ||
355          nir_alu_type_get_type_size(src_type) == src->bit_size);
356   src_type |= src->bit_size;
357   if (nir_alu_type_range_contains_type_range(dest_type, src_type))
358      return val;
359
360   /* limits of the destination type, expressed in the source type */
361   nir_def *low = NULL, *high = NULL;
362   nir_get_clamp_limits(b, src_type, dest_type, &low, &high);
363
364   nir_def *low_cond = NULL, *high_cond = NULL;
365   switch (nir_alu_type_get_base_type(src_type)) {
366   case nir_type_int:
367      low_cond = low ? nir_ilt(b, src, low) : NULL;
368      high_cond = high ? nir_ilt(b, high, src) : NULL;
369      break;
370   case nir_type_uint:
371      low_cond = low ? nir_ult(b, src, low) : NULL;
372      high_cond = high ? nir_ult(b, high, src) : NULL;
373      break;
374   case nir_type_float:
375      low_cond = low ? nir_fge(b, low, src) : NULL;
376      high_cond = high ? nir_fge(b, src, high) : NULL;
377      break;
378   default:
379      unreachable("clamping from unknown type");
380   }
381
382   nir_def *val_low = low, *val_high = high;
383   if (val_type != src_type) {
384      nir_get_clamp_limits(b, val_type, dest_type, &val_low, &val_high);
385   }
386
387   nir_def *res = val;
388   if (low_cond && val_low)
389      res = nir_bcsel(b, low_cond, val_low, res);
390   if (high_cond && val_high)
391      res = nir_bcsel(b, high_cond, val_high, res);
392
393   return res;
394}
395
396static inline nir_rounding_mode
397nir_simplify_conversion_rounding(nir_alu_type src_type,
398                                 nir_alu_type dest_type,
399                                 nir_rounding_mode rounding)
400{
401   nir_alu_type src_base_type = nir_alu_type_get_base_type(src_type);
402   nir_alu_type dest_base_type = nir_alu_type_get_base_type(dest_type);
403   unsigned src_bit_size = nir_alu_type_get_type_size(src_type);
404   unsigned dest_bit_size = nir_alu_type_get_type_size(dest_type);
405   assert(src_bit_size > 0 && dest_bit_size > 0);
406
407   if (rounding == nir_rounding_mode_undef)
408      return rounding;
409
410   /* Pure integer conversion doesn't have any rounding */
411   if (src_base_type != nir_type_float &&
412       dest_base_type != nir_type_float)
413      return nir_rounding_mode_undef;
414
415   /* Float down-casts don't round */
416   if (src_base_type == nir_type_float &&
417       dest_base_type == nir_type_float &&
418       dest_bit_size >= src_bit_size)
419      return nir_rounding_mode_undef;
420
421   /* Regular float to int conversions are RTZ */
422   if (src_base_type == nir_type_float &&
423       dest_base_type != nir_type_float &&
424       rounding == nir_rounding_mode_rtz)
425      return nir_rounding_mode_undef;
426
427   /* The CL spec requires regular conversions to float to be RTNE */
428   if (dest_base_type == nir_type_float &&
429       rounding == nir_rounding_mode_rtne)
430      return nir_rounding_mode_undef;
431
432   /* Couldn't simplify */
433   return rounding;
434}
435
436static inline nir_def *
437nir_convert_with_rounding(nir_builder *b,
438                          nir_def *src, nir_alu_type src_type,
439                          nir_alu_type dest_type,
440                          nir_rounding_mode round,
441                          bool clamp)
442{
443   /* Some stuff wants sized types */
444   assert(nir_alu_type_get_type_size(src_type) == 0 ||
445          nir_alu_type_get_type_size(src_type) == src->bit_size);
446   src_type |= src->bit_size;
447
448   /* Split types from bit sizes */
449   nir_alu_type src_base_type = nir_alu_type_get_base_type(src_type);
450   nir_alu_type dest_base_type = nir_alu_type_get_base_type(dest_type);
451   unsigned dest_bit_size = nir_alu_type_get_type_size(dest_type);
452
453   /* Try to simplify the conversion if we can */
454   clamp = clamp &&
455           !nir_alu_type_range_contains_type_range(dest_type, src_type);
456   round = nir_simplify_conversion_rounding(src_type, dest_type, round);
457
458   /* For float -> int/uint conversions, we might not be able to represent
459    * the destination range in the source float accurately. For these cases,
460    * do the comparison in float range, but the bcsel in the destination range.
461    */
462   bool clamp_after_conversion = clamp &&
463                                 src_base_type == nir_type_float &&
464                                 dest_base_type != nir_type_float;
465
466   /*
467    * If we don't care about rounding and clamping, we can just use NIR's
468    * built-in ops. There is also a special case for SPIR-V in shaders, where
469    * f32/f64 -> f16 conversions can have one of two rounding modes applied,
470    * which NIR has built-in opcodes for.
471    *
472    * For the rest, we have our own implementation of rounding and clamping.
473    */
474   bool trivial_convert;
475   if (!clamp && round == nir_rounding_mode_undef) {
476      trivial_convert = true;
477   } else if (!clamp && src_type == nir_type_float32 &&
478              dest_type == nir_type_float16 &&
479              (round == nir_rounding_mode_rtne ||
480               round == nir_rounding_mode_rtz)) {
481      trivial_convert = true;
482   } else {
483      trivial_convert = false;
484   }
485
486   if (trivial_convert)
487      return nir_type_convert(b, src, src_type, dest_type, round);
488
489   nir_def *dest = src;
490
491   /* clamp the result into range */
492   if (clamp && !clamp_after_conversion)
493      dest = nir_clamp_to_type_range(b, src, src_type, src, src_type, dest_type);
494
495   /* round with selected rounding mode */
496   if (!trivial_convert && round != nir_rounding_mode_undef) {
497      if (src_base_type == nir_type_float) {
498         if (dest_base_type == nir_type_float) {
499            dest = nir_round_float_to_float(b, dest, dest_bit_size, round);
500         } else {
501            dest = nir_round_float_to_int(b, dest, round);
502         }
503      } else {
504         dest = nir_round_int_to_float(b, dest, src_type, dest_bit_size, round);
505      }
506
507      round = nir_rounding_mode_undef;
508   }
509
510   /* now we can convert the value */
511   nir_op op = nir_type_conversion_op(src_type, dest_type, round);
512   dest = nir_build_alu(b, op, dest, NULL, NULL, NULL);
513
514   if (clamp_after_conversion)
515      dest = nir_clamp_to_type_range(b, dest, dest_type, src, src_type, dest_type);
516
517   return dest;
518}
519
520#ifdef __cplusplus
521}
522#endif
523
524#endif /* NIR_CONVERSION_BUILDER_H */
Configure Feed

Configure Feed