this repo has no description
1// long double nearbyintl(long double x)
2//
3// returns the argument rounded to an integral value using the prevailing
4// rounding mode, and without raising the inexact flag (C99 7.12.9.4).
5//
6// Original version due to Ian Ollmann, 2007
7//
8// Edits by Steve Canon January 2010:
9// rearrange branches to match nearbyint[f]
10// minimal tune-up to get sign of zero right
11
12#include <machine/asm.h>
13#include "abi.h"
14
15ENTRY( nearbyintl )
16 SUBP $(32-FRAME_SIZE), STACKP
17
18 //write the current rounding mode to the stack. Using the mxcsr instead cost an extra 20 cycles.
19 fnstcw 16(STACKP)
20
21 fldz // { 0 }
22 movq 32(STACKP), %xmm0 //Load the mantissa
23 fldt 32(STACKP) // { x, 0 }
24 pxor %xmm1, %xmm1 // set our default truncation mask to 0
25 xorl %ecx, %ecx
26 movw 40(STACKP), %cx //Load the signed exponent
27 movl %ecx, %edx // save signed exponent
28 movw %cx, 8(STACKP) // write out exponent of truncated value
29 andl $0x8000, %edx // sign
30 andl $0x7fff, %ecx // biased exponent
31 shll $16, %edx // sign << 16
32 addl $(16384-62), %ecx // add 16384-62 to exponent. This forces Inf, NaN, numbers >= 2**63 to be negative
33 cmpw $(16383+16384-62), %cx // values with exponents less than 16384-62 are fractional numbers only
34 jl L_xIsLargeOrSmall // branch for |x| < 1.0L, |x| >= 0x1.0p63L, NaN
35
36 //Now we know we have a number 1.0 <= |x| < 2**63
37
38 fstp %st(1) // { x }
39 //create a mask of non-fractional bits to calculate trunc(x)
40 subl $(16383+16384-62+63), %ecx // unbias the exponent, undo our magic above
41 negl %ecx // bits to shift -1ULL left to create a truncation mask
42 pcmpeqb %xmm1, %xmm1 // -1ULL
43 movd %ecx, %xmm7 // bits to shift -1ULL left to create a truncation mask
44 psllq %xmm7, %xmm1 // left shift -1ULL by number of fractional bits in x
45
46 //Round. We get here in one of two ways:
47 // fall through from above: xmm2 has a truncation mask
48 // jmp from |x| < 1.0: xmm2 holds the sign of x (stored as +-0.0f), which rounds x to zero of the correct sign in this step
49L_reentryForSmallX:
50 pand %xmm0, %xmm1 // trunc(mantissa)
51 movq %xmm1, (STACKP)
52 fldt (STACKP) // { trunc(x), x }
53 fucomi %st(1), %st(0) // trunc(x) == x ?
54 je L_truncate // if the value was an integer, we are done, so return x (satisfies x==0, small integer cases )
55
56 //find our what our rounding mode is
57 movl 16(STACKP), %ecx
58 andl $0xC00, %ecx // isolate the RC field
59 subl $0x400, %ecx // move one of the possible values negative
60 cmpw $0x400, %cx // test the RC value
61
62 // Rounding modes in eax:
63 // -0x400 Round to nearest even
64 // 0 Round to -Inf
65 // 0x400 Round to +inf
66 // 0x800 Round to zero
67
68 jg L_truncate //Round to zero mode. Note: signed compare here
69 je L_roundUp //Round to +Inf, Go there
70 jb L_roundDown //Round to -Inf, Go there. Note: unsigned compare here
71
72 //Round to nearest even is the fall through path, because it is most common
73 fsubr %st(0), %st(1) // { trunc(x), fract }
74
75 orl $0x3f000000, %edx // copysign( 0.5f, x )
76 movl %edx, 16(STACKP)
77 flds 16(STACKP) // { copysign( 0.5L, x ), trunc(x), fract }
78
79 //check for fract == copysign( 0.5f, x )
80
81 fld %st(2) // { fract, copysign( 0.5L, x ), trunc(x), fract }
82 fabs // { |fract|, copysign( 0.5L, x ), trunc(x), fract }
83 fld %st(1) // { copysign( 0.5L, x ), |fract|, copysign( 0.5L, x ), trunc(x), fract }
84 fabs // { 0.5L, |fract|, copysign( 0.5L, x ), trunc(x), fract }
85 fucomip %st(1), %st(0) // { |fract|, copysign( 0.5L, x ), trunc(x), fract }
86 fstp %st(0) // { copysign( 0.5L, x ), trunc(x), fract }
87 je L_exactHalfway // |fract| == 0.5, figure out which way to round in the comfort and safety of a separate code branch
88
89 fldz // { 0.0L, copysign( 0.5L, x ), trunc(x), fract }
90 fcmovb %st(1), %st(0) // { 0.5L < |fract| ? copysign( 0.5L, x ) : 0.0L, copysign( 0.5L, x ), trunc(x), fract }
91 fstp %st(1) // { 0.5L < |fract| ? copysign( 0.5L, x ) : 0.0L, trunc(x), fract }
92 fadd %st(0), %st(0) // { 0.5L < |fract| ? copysign( 1.0L, x ) : 0.0L, trunc(x), fract }
93 faddp // { correctly rounded x, fract }
94 fstp %st(1)
95 fabs // { |result| }
96 cmp $0, %edx
97 jge 1f
98 fchs // flip the sign if x is negative
991: ADDP $(32-FRAME_SIZE), STACKP
100 ret
101
102L_exactHalfway: // { copysign( 0.5L, x ), trunc(x), fract }
103 addl $0x20000000, %edx // copysign( 0x1.0p63f, x )
104 movl %edx, 16(STACKP) // copysign( 0x1.0p63f, x )
105 flds 16(STACKP) // { copysign( 0x1.0p63L, x ), copysign( 0.5L, x ), trunc(x), fract }
106 fadd %st(2), %st(0) // { trunc(x) + copysign( 0x1.0p63L, x ), copysign( 0.5L, x ), trunc(x), fract }
107 fstpt (STACKP) // { copysign( 0.5L, x ), trunc(x), fract }
108 fstp %st(0) // { trunc(x), fract }
109 fxch // { fract, trunc(x) }
110 fadd %st(0), %st(0) // { copysign( 1.0L, x ), trunc(x) }
111 movl $1, %eax //
112 fldz // { 0, copysign( 1.0L, x ), trunc(x) }
113 andl (STACKP), %eax // (mantissa & 1ULL) == 0
114 fcmovne %st(1), %st(0) // { rounding value, copysign( 1.0L, x ), trunc(x) }
115 fstp %st(1) // { rounding value, trunc(x) }
116 faddp // { result }
117 fabs // { |result| }
118 cmp $0, %edx
119 jge 1f
120 fchs // flip the sign if x is negative
1211: ADDP $(32-FRAME_SIZE), STACKP
122 ret
123
124L_truncate:
125 fstp %st(1)
126 ADDP $(32-FRAME_SIZE), STACKP
127 ret
128
129L_roundUp: // { trunc(x), x }
130 fsubr %st(0), %st(1) // { trunc(x), fract }
131 fldz // { 0.0L, trunc(x), fract }
132 fucomi %st(2), %st(0) // { 0.0L, trunc(x), fract } 0.0L < fract ?
133 fld1 // { 1.0L, 0.0L, trunc(x), fract }
134 fcmovnbe %st(1), %st(0) // { 0.0L or 1.0L, 0.0L, trunc(x), fract }
135 fstp %st(3) // { 0.0L, trunc(x), 0.0L or 1.0L }
136 fstp %st(0) // { trunc(x), 0.0L or 1.0L }
137 fadd %st(1), %st(0) // { correctly rounded x, 0.0L or 1.0L }
138 fstp %st(1) // { correctly rounded x }
139 fabs // { |result| }
140 cmp $0, %edx
141 jge 1f
142 fchs // flip the sign if x is negative
1431: ADDP $(32-FRAME_SIZE), STACKP
144 ret
145
146L_roundDown: // { trunc(x), x }
147 fsubr %st(0), %st(1) // { trunc(x), fract }
148 fldz // { 0.0L, trunc(x), fract }
149 fucomi %st(2), %st(0) // { 0.0L, trunc(x), fract } 0.0L > fract ?
150 fld1 // { 1.0L, 0.0L, trunc(x), fract }
151 fcmovb %st(1), %st(0) // { 0.0L or 1.0L, 0.0L, trunc(x), fract }
152 fstp %st(3) // { 0.0L, trunc(x), 0.0L or 1.0L }
153 fstp %st(0) // { trunc(x), 0.0L or 1.0L }
154 fsub %st(1), %st(0) // { correctly rounded x, 0.0L or 1.0L }
155 fstp %st(1) // { correctly rounded x }
156 fabs // { |result| }
157 cmp $0, %edx
158 jge 1f
159 fchs // flip the sign if x is negative
1601: ADDP $(32-FRAME_SIZE), STACKP
161 ret
162
163L_xIsLargeOrSmall:
164 jae L_truncate // NaN, |x| >= 2**63 we are done
165
166 // |x| < 1.0f skip to rounding
167 fstp %st(1) // { x }
168 andw $0x8000, 8(STACKP) // zero exponent of truncated value, but preserve sign
169 jmp L_reentryForSmallX