this repo has no description
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at fixPythonPipStalling 169 lines 9.9 kB view raw
1// long double nearbyintl(long double x) 2// 3// returns the argument rounded to an integral value using the prevailing 4// rounding mode, and without raising the inexact flag (C99 7.12.9.4). 5// 6// Original version due to Ian Ollmann, 2007 7// 8// Edits by Steve Canon January 2010: 9// rearrange branches to match nearbyint[f] 10// minimal tune-up to get sign of zero right 11 12#include <machine/asm.h> 13#include "abi.h" 14 15ENTRY( nearbyintl ) 16 SUBP $(32-FRAME_SIZE), STACKP 17 18 //write the current rounding mode to the stack. Using the mxcsr instead cost an extra 20 cycles. 19 fnstcw 16(STACKP) 20 21 fldz // { 0 } 22 movq 32(STACKP), %xmm0 //Load the mantissa 23 fldt 32(STACKP) // { x, 0 } 24 pxor %xmm1, %xmm1 // set our default truncation mask to 0 25 xorl %ecx, %ecx 26 movw 40(STACKP), %cx //Load the signed exponent 27 movl %ecx, %edx // save signed exponent 28 movw %cx, 8(STACKP) // write out exponent of truncated value 29 andl $0x8000, %edx // sign 30 andl $0x7fff, %ecx // biased exponent 31 shll $16, %edx // sign << 16 32 addl $(16384-62), %ecx // add 16384-62 to exponent. This forces Inf, NaN, numbers >= 2**63 to be negative 33 cmpw $(16383+16384-62), %cx // values with exponents less than 16384-62 are fractional numbers only 34 jl L_xIsLargeOrSmall // branch for |x| < 1.0L, |x| >= 0x1.0p63L, NaN 35 36 //Now we know we have a number 1.0 <= |x| < 2**63 37 38 fstp %st(1) // { x } 39 //create a mask of non-fractional bits to calculate trunc(x) 40 subl $(16383+16384-62+63), %ecx // unbias the exponent, undo our magic above 41 negl %ecx // bits to shift -1ULL left to create a truncation mask 42 pcmpeqb %xmm1, %xmm1 // -1ULL 43 movd %ecx, %xmm7 // bits to shift -1ULL left to create a truncation mask 44 psllq %xmm7, %xmm1 // left shift -1ULL by number of fractional bits in x 45 46 //Round. We get here in one of two ways: 47 // fall through from above: xmm2 has a truncation mask 48 // jmp from |x| < 1.0: xmm2 holds the sign of x (stored as +-0.0f), which rounds x to zero of the correct sign in this step 49L_reentryForSmallX: 50 pand %xmm0, %xmm1 // trunc(mantissa) 51 movq %xmm1, (STACKP) 52 fldt (STACKP) // { trunc(x), x } 53 fucomi %st(1), %st(0) // trunc(x) == x ? 54 je L_truncate // if the value was an integer, we are done, so return x (satisfies x==0, small integer cases ) 55 56 //find our what our rounding mode is 57 movl 16(STACKP), %ecx 58 andl $0xC00, %ecx // isolate the RC field 59 subl $0x400, %ecx // move one of the possible values negative 60 cmpw $0x400, %cx // test the RC value 61 62 // Rounding modes in eax: 63 // -0x400 Round to nearest even 64 // 0 Round to -Inf 65 // 0x400 Round to +inf 66 // 0x800 Round to zero 67 68 jg L_truncate //Round to zero mode. Note: signed compare here 69 je L_roundUp //Round to +Inf, Go there 70 jb L_roundDown //Round to -Inf, Go there. Note: unsigned compare here 71 72 //Round to nearest even is the fall through path, because it is most common 73 fsubr %st(0), %st(1) // { trunc(x), fract } 74 75 orl $0x3f000000, %edx // copysign( 0.5f, x ) 76 movl %edx, 16(STACKP) 77 flds 16(STACKP) // { copysign( 0.5L, x ), trunc(x), fract } 78 79 //check for fract == copysign( 0.5f, x ) 80 81 fld %st(2) // { fract, copysign( 0.5L, x ), trunc(x), fract } 82 fabs // { |fract|, copysign( 0.5L, x ), trunc(x), fract } 83 fld %st(1) // { copysign( 0.5L, x ), |fract|, copysign( 0.5L, x ), trunc(x), fract } 84 fabs // { 0.5L, |fract|, copysign( 0.5L, x ), trunc(x), fract } 85 fucomip %st(1), %st(0) // { |fract|, copysign( 0.5L, x ), trunc(x), fract } 86 fstp %st(0) // { copysign( 0.5L, x ), trunc(x), fract } 87 je L_exactHalfway // |fract| == 0.5, figure out which way to round in the comfort and safety of a separate code branch 88 89 fldz // { 0.0L, copysign( 0.5L, x ), trunc(x), fract } 90 fcmovb %st(1), %st(0) // { 0.5L < |fract| ? copysign( 0.5L, x ) : 0.0L, copysign( 0.5L, x ), trunc(x), fract } 91 fstp %st(1) // { 0.5L < |fract| ? copysign( 0.5L, x ) : 0.0L, trunc(x), fract } 92 fadd %st(0), %st(0) // { 0.5L < |fract| ? copysign( 1.0L, x ) : 0.0L, trunc(x), fract } 93 faddp // { correctly rounded x, fract } 94 fstp %st(1) 95 fabs // { |result| } 96 cmp $0, %edx 97 jge 1f 98 fchs // flip the sign if x is negative 991: ADDP $(32-FRAME_SIZE), STACKP 100 ret 101 102L_exactHalfway: // { copysign( 0.5L, x ), trunc(x), fract } 103 addl $0x20000000, %edx // copysign( 0x1.0p63f, x ) 104 movl %edx, 16(STACKP) // copysign( 0x1.0p63f, x ) 105 flds 16(STACKP) // { copysign( 0x1.0p63L, x ), copysign( 0.5L, x ), trunc(x), fract } 106 fadd %st(2), %st(0) // { trunc(x) + copysign( 0x1.0p63L, x ), copysign( 0.5L, x ), trunc(x), fract } 107 fstpt (STACKP) // { copysign( 0.5L, x ), trunc(x), fract } 108 fstp %st(0) // { trunc(x), fract } 109 fxch // { fract, trunc(x) } 110 fadd %st(0), %st(0) // { copysign( 1.0L, x ), trunc(x) } 111 movl $1, %eax // 112 fldz // { 0, copysign( 1.0L, x ), trunc(x) } 113 andl (STACKP), %eax // (mantissa & 1ULL) == 0 114 fcmovne %st(1), %st(0) // { rounding value, copysign( 1.0L, x ), trunc(x) } 115 fstp %st(1) // { rounding value, trunc(x) } 116 faddp // { result } 117 fabs // { |result| } 118 cmp $0, %edx 119 jge 1f 120 fchs // flip the sign if x is negative 1211: ADDP $(32-FRAME_SIZE), STACKP 122 ret 123 124L_truncate: 125 fstp %st(1) 126 ADDP $(32-FRAME_SIZE), STACKP 127 ret 128 129L_roundUp: // { trunc(x), x } 130 fsubr %st(0), %st(1) // { trunc(x), fract } 131 fldz // { 0.0L, trunc(x), fract } 132 fucomi %st(2), %st(0) // { 0.0L, trunc(x), fract } 0.0L < fract ? 133 fld1 // { 1.0L, 0.0L, trunc(x), fract } 134 fcmovnbe %st(1), %st(0) // { 0.0L or 1.0L, 0.0L, trunc(x), fract } 135 fstp %st(3) // { 0.0L, trunc(x), 0.0L or 1.0L } 136 fstp %st(0) // { trunc(x), 0.0L or 1.0L } 137 fadd %st(1), %st(0) // { correctly rounded x, 0.0L or 1.0L } 138 fstp %st(1) // { correctly rounded x } 139 fabs // { |result| } 140 cmp $0, %edx 141 jge 1f 142 fchs // flip the sign if x is negative 1431: ADDP $(32-FRAME_SIZE), STACKP 144 ret 145 146L_roundDown: // { trunc(x), x } 147 fsubr %st(0), %st(1) // { trunc(x), fract } 148 fldz // { 0.0L, trunc(x), fract } 149 fucomi %st(2), %st(0) // { 0.0L, trunc(x), fract } 0.0L > fract ? 150 fld1 // { 1.0L, 0.0L, trunc(x), fract } 151 fcmovb %st(1), %st(0) // { 0.0L or 1.0L, 0.0L, trunc(x), fract } 152 fstp %st(3) // { 0.0L, trunc(x), 0.0L or 1.0L } 153 fstp %st(0) // { trunc(x), 0.0L or 1.0L } 154 fsub %st(1), %st(0) // { correctly rounded x, 0.0L or 1.0L } 155 fstp %st(1) // { correctly rounded x } 156 fabs // { |result| } 157 cmp $0, %edx 158 jge 1f 159 fchs // flip the sign if x is negative 1601: ADDP $(32-FRAME_SIZE), STACKP 161 ret 162 163L_xIsLargeOrSmall: 164 jae L_truncate // NaN, |x| >= 2**63 we are done 165 166 // |x| < 1.0f skip to rounding 167 fstp %st(1) // { x } 168 andw $0x8000, 8(STACKP) // zero exponent of truncated value, but preserve sign 169 jmp L_reentryForSmallX