src/libm/Source/Intel/xmm_erfgamma.c at fixPythonPipStalling

overby.me / darling-nix
fork
this repo has no description
fork
darling-nix / src / libm / Source / Intel / xmm_erfgamma.c
at fixPythonPipStalling 1639 lines 62 kB view raw
wrap content
Lubos Dolezel Restructured source tree to prepare for merge with the "darling" repo 10y ago
f228ae16
   1/*
   2 * Copyright (c) 2002 Apple Computer, Inc. All rights reserved.
   3 *
   4 * @APPLE_LICENSE_HEADER_START@
   5 * 
   6 * The contents of this file constitute Original Code as defined in and
   7 * are subject to the Apple Public Source License Version 1.1 (the
   8 * "License").  You may not use this file except in compliance with the
   9 * License.  Please obtain a copy of the License at
  10 * http://www.apple.com/publicsource and read it before using this file.
  11 * 
  12 * This Original Code and all software distributed under the License are
  13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17 * License for the specific language governing rights and limitations
  18 * under the License.
  19 * 
  20 * @APPLE_LICENSE_HEADER_END@
  21 */
  22/*******************************************************************************
  23*                                                                              *
  24*      File gamma.c,                                                           *
  25*      Functions gamma(x),                                                     *
  26*      Implementation of the gamma function for the PowerPC.                   *
  27*                                                                              *
  28*      Copyright c 1993 Apple Computer, Inc.  All rights reserved.             *
  29*                                                                              *
  30*      Written by Ali Sazegari, started on January 1993,                       *
  31*		Ported by Ian Ollmann to i386, October 2005                            *
  32*                                                                              *
  33*      based on FORTRAN routines in SpecFun by J. W. Cody and L. Stoltz of     *
  34*      Applied Mathematics Division of Argonne National Laboratory,            *
  35*      Argonne, IL 60439.                                                      *
  36*                                                                              *
  37*      W A R N I N G:  This routine expects a 64 bit double model.             *
  38*                                                                              *
  39*      January  29  1993: First C implementation for PowerPC.                  *
  40*      April    26  1993: fixed a few bugs in the interval [xbig,inf].         *
  41*      July     14  1993: added #pragma fenv_access. This function is now      *
  42*                         using the the string oriented �nan�.  replaced       *
  43*                         feholdexcept by _feprocentry to guard rounding.      *
  44*      July     29  1993: corrected the string nan.                            *
  45*      October  07  1993: removed the raising of the overflow flag for arg= �. *
  46*      September19  1994: changed all environemental funtions to __setflm,     *
  47*                         changed HUGE_VAL to Huge.d for perfrormance.         *
  48*      January  11  1995: a humilating error corrected.  in the interval       *
  49*                         [12,178], the nonexistant array element c[7] is      *
  50*                         addressed.  it should be c[6].                       *
  51*                         a short error analysis reveals that in double        *
  52*                         precision referencing c[7] instead of c[6] has no    *
  53*                         impact on the accuracy of the result, provided that  *
  54*                         the compiler assigns 0.0 to c[7], which the ibm xlc  *
  55*                         does.  this explains why the double precision        *
  56*                         gamma function passed accuracy tests.  the relative  *
  57*                         error in extended is at most 5.56E-17 and occurs     *
  58*                         for x=12.0.  the calculation is no longer affected   *
  59*                         for arguments x�19.                                  *
  60*                                                                              *
  61********************************************************************************
  62*                                                                              *
  63*                              G  A  M  M  A  (  X  )                          *
  64*                                                                              *
  65*      The gamma function is an increasing function over [2,inf].  For large   *
  66*      enough x, it behaves like [ e^( x ln ( x/ e ) ] / sqrt(x/pi).  It may   *
  67*      be more appropriate to work with the logorithm of Gamma.                *
  68*                                                                              *
  69*      This routine calculates the gamma function for a real argument x.       *
  70*      Computation is based on an algorithm outlined in reference 1 below.     *
  71*      The program uses rational functions that approximate the gamma          *
  72*      function to at least 20 significant decimal digits.  Coefficients       *
  73*      for the approximation over the interval (1,2) are unpublished.          *
  74*      Those for the approximation for x >= 12 are from reference 2 below.     *
  75*                                                                              *
  76********************************************************************************
  77*                                                                              *
  78*      Explanation of machine-dependent constants:                             *
  79*                                                                              *
  80*      xbig     - the largest argument for which gamma(x) is representable     *
  81*                 in the machine, i.e., the solution to the equation           *
  82*                 gamma ( xbig ) = 2 ** MaxExp, where MaxExp is the maximum    *
  83*                 power of 2 before infinity;                                  *
  84*      xinf     - the largest machine representable floating-point number      *
  85*                 before infinity, approximately 2 ** MaxExp;                  *
  86*      eps      - the smallest positive floating-point number such that        *
  87*                 1.0 + eps > 1.0;                                             *
  88*      MinimumX - the smallest positive floating-point number such that        *
  89*                 1/MinimumX is machine representable.                         *
  90*                                                                              *
  91*      Approximate values for the macintosh and the powerpc are:               *
  92*                                                                              *
  93*                                base       MaxExp        xbig                 *
  94*                                                                              *
  95*      Macintosh     extended     2         16382        1755.36               *
  96*      PowerPC        double      2          1023        171.624               *
  97*                                                                              *
  98*                                 xinf         eps        MinimumX             *
  99*                                                                              *
 100*      Macintosh     extended   1.19x+4932   5.42x-20    8.40x-4933            *
 101*      PowerPC        double    1.79d+308    2.22d-16    2.23d-308             *
 102*                                                                              *
 103********************************************************************************
 104*                                                                              *
 105*      The program returns a quiet NaN for singularities and infinity when     *
 106*      overflow occurs.  The computation is believed to be free of undeserved  *
 107*      underflow and overflow.                                                 *
 108*                                                                              *
 109*      References:                                                             *
 110*                                                                              *
 111*      [1] "An Overview of Software Development for Special Functions",        *
 112*          W. J. Cody, Lecture Notes in Mathematics, 506, Numerical Analysis   *
 113*          Dundee, 1975, G. A. Watson (ed.), Springer Verlag, Berlin, 1976.    *
 114*                                                                              *
 115*      [2] Computer Approximations, Hart, Et. Al., Wiley and sons, New York    *
 116*          1968.                                                               *
 117*                                                                              *
 118*******************************************************************************/
 119
 120#include      "math.h"
 121#include	  "float.h"
 122#include      "fenv.h"
 123#include      "fenv_private.h"
 124
 125/*******************************************************************************
 126*            Functions needed for the computation.                             *
 127*******************************************************************************/
 128
 129/*     the following fp.h functions are used:                                 */
 130/*     exp, log, sin, __fpclassifyd and nan;                                  */
 131/*     the following environmental functions are used:                        */
 132/*     __setflm.                                                              */
 133
 134#include "xmmLibm_prefix.h"
 135
 136/*******************************************************************************
 137*     Coefficients for P in gamma approximation over [1,2] in decreasing order.*
 138*******************************************************************************/
 139
 140static const double p[8] = { -1.71618513886549492533811e+0,
 141                        2.47656508055759199108314e+1,
 142                       -3.79804256470945635097577e+2,
 143                        6.29331155312818442661052e+2,
 144                        8.66966202790413211295064e+2,
 145                       -3.14512729688483675254357e+4,
 146                       -3.61444134186911729807069e+4,
 147                        6.64561438202405440627855e+4 };
 148
 149/*******************************************************************************
 150*     Coefficients for Q in gamma approximation over [1,2] in decreasing order.*
 151*******************************************************************************/
 152
 153static const double q[8] = { -3.08402300119738975254353e+1,
 154                        3.15350626979604161529144e+2,
 155                       -1.01515636749021914166146e+3,
 156                       -3.10777167157231109440444e+3,
 157                        2.25381184209801510330112e+4,
 158                        4.75584627752788110767815e+3,
 159                       -1.34659959864969306392456e+5,
 160                       -1.15132259675553483497211e+5 };
 161                         
 162/*******************************************************************************
 163*     Coefficients for Stirling's series  for ln(Gamma) over [12, INF].        *
 164*******************************************************************************/
 165
 166static const long double c[7] = {	0xa.aaaaaaaaaaaaaabp-7,
 167	-0xb.60b60b60b60b60bp-12,
 168	0xd.00d00d00d00d00dp-14,
 169	-0x9.c09c09c09c09c0ap-14,
 170	0xd.ca8f158c7f91ab8p-14,
 171	-0xf.b5586ccc9e3e41p-13,
 1720xd.20d20d20d20d20dp-11 };
 173
 174static const double LogSqrt2pi = 0.9189385332046727417803297e+0;
 175static const double pi         = 3.1415926535897932384626434e+0;
 176static const double xbig       = 171.624e+0;
 177static const double MinimumX   = 2.23e-308;
 178static const double eps        = 2.22e-16;
 179
 180#define      GAMMA_NAN      "42"
 181#define      SET_INVALID    0x01000000
 182
 183
 184static inline double _tgamma ( double x )   ALWAYS_INLINE;
 185static inline double _tgamma ( double x )
 186{
 187      register int n, parity, i;
 188      register double y, y1, result, fact, fraction, z, numerator, denominator, ysquared, sum; 
 189    
 190	
 191/*******************************************************************************
 192*     The next switch will decipher what sort of argument we have. If argument *
 193*     is SNaN then a QNaN has to be returned and the invalid flag signaled.    * 
 194*******************************************************************************/
 195
 196	if( x != x )
 197		return x + x;		//silence NaN
 198	
 199	if( x == 0.0 )
 200		return LogSqrt2pi / x;
 201	
 202	if( __builtin_fabs(x) == __builtin_inf() )
 203	{
 204		if( x < 0 )
 205		{
 206			SET_INVALID_FLAG();
 207			return __builtin_nan( GAMMA_NAN );
 208		}
 209	
 210		return x;
 211	}
 212
 213      
 214	parity = 0;
 215	fact = 1.0;
 216	n = 0;
 217	y = x;
 218
 219/*******************************************************************************
 220*     The argument is negative.                                                *
 221*******************************************************************************/
 222
 223	if ( y <= 0.0 )
 224	{
 225		y = - x;
 226		if ( y < MinimumX )
 227			return 1.0 / x;
 228
 229		y1 = trunc ( y );
 230		fraction = y - y1;
 231		if ( fraction != 0.0 )                   /* is it an integer?   */
 232	    {                                   /* is it odd or even?  */
 233			if ( y1 != trunc ( y1 * 0.5 ) * 2.0 ) 
 234				parity = 1;
 235			fact = - pi / sin ( pi * fraction );
 236			y += 1.0;
 237		}
 238        else
 239        {
 240			SET_INVALID_FLAG();
 241			return __builtin_nan( GAMMA_NAN );
 242		}
 243	}
 244
 245/*******************************************************************************
 246*     The argument is positive.                                                *
 247*******************************************************************************/
 248
 249	if ( y < eps )                         /* argument is less than epsilon. */
 250	{
 251		result = 1.0 / y;
 252	}
 253	else if ( y < 12.0 )                 /* argument x is eps < x < 12.0.  */
 254	{
 255		y1 = y;
 256		if ( y < 1.0 )                 /* x is in (eps, 1.0).            */
 257		{
 258			z = y;
 259			y += 1.0;
 260		}
 261		else                           /* x is in [1.0,12.0].            */
 262		{
 263			n = ( int ) y - 1;
 264			y -= ( double ) n;
 265			z = y - 1.0;
 266		}
 267		numerator = 0.0;
 268		denominator = 1.0;
 269		for ( i = 0; i < 8; i++ )
 270		{
 271			numerator = ( numerator + p[i] ) * z;
 272			denominator = denominator * z + q[i];
 273		}
 274		result = numerator / denominator + 1.0;
 275		if ( y1 < y )
 276			result /= y1;
 277		else if ( y1 > y )
 278		{
 279			for ( i = 0; i < n; i++ )
 280			{
 281				result *= y;
 282				y += 1.0;
 283			}
 284		}
 285	}
 286	else
 287	{
 288		if ( x <= xbig )
 289		{
 290			ysquared = y * y;
 291			sum = c[6];
 292			for ( i=5; i >= 0; i-- )
 293			sum = sum / ysquared + c[i];
 294			sum = sum / y - y + LogSqrt2pi;
 295			sum += ( y - 0.5 ) * log ( y );
 296			result = exp ( sum );
 297		}
 298		else
 299			return x * 0x1.0p1023;			//set overflow, return inf
 300	}
 301	
 302	if ( parity ) 
 303		result = - result;
 304	if ( fact != 1.0 ) 
 305		result = fact / result;
 306
 307	return result;
 308}
 309
 310double tgamma ( double x )
 311{
 312	return _tgamma( x );
 313}
 314
 315double gamma ( double x )   //legacy -- required for various calculators in the OS and 3rd party
 316{
 317    return _tgamma( x );
 318}
 319
 320float tgammaf( float x )
 321{
 322	return (float) _tgamma( x );
 323}
 324
 325/*******************************************************************************
 326*     Coefficients for P in gamma approximation over [1,2] in decreasing order.*
 327*******************************************************************************/
 328
 329static const long double pL[8] = { -1.71618513886549492533811e+0L,
 330									2.47656508055759199108314e+1L,
 331								   -3.79804256470945635097577e+2L,
 332									6.29331155312818442661052e+2L,
 333									8.66966202790413211295064e+2L,
 334								   -3.14512729688483675254357e+4L,
 335								   -3.61444134186911729807069e+4L,
 336									6.64561438202405440627855e+4L };
 337
 338/*******************************************************************************
 339*     Coefficients for Q in gamma approximation over [1,2] in decreasing order.*
 340*******************************************************************************/
 341
 342static const long double qL[8] = { -3.08402300119738975254353e+1L,
 343									3.15350626979604161529144e+2L,
 344								   -1.01515636749021914166146e+3L,
 345								   -3.10777167157231109440444e+3L,
 346									2.25381184209801510330112e+4L,
 347									4.75584627752788110767815e+3L,
 348								   -1.34659959864969306392456e+5L,
 349								   -1.15132259675553483497211e+5L };
 350                         
 351/*******************************************************************************
 352*     Coefficients for Stirling's Approximation to ln(Gamma) on [12,inf]       *
 353*******************************************************************************/
 354
 355static const long double cL[7] = {	0xa.aaaaaaaaaaaaaabp-7L,
 356																	 -0xb.60b60b60b60b60bp-12L,
 357																		0xd.00d00d00d00d00dp-14L,
 358																	 -0x9.c09c09c09c09c0ap-14L,
 359																		0xd.ca8f158c7f91ab8p-14L,
 360																	 -0xf.b5586ccc9e3e41p-13L,
 361																		0xd.20d20d20d20d20dp-11L };
 362
 363static const long double LogSqrt2piL = 0.9189385332046727417803297e+0L;		//Ln( sqrt( 2*pi))
 364static const long double piL         = 3.1415926535897932384626434e+0L;		//pi
 365static const long double xbigL       = 0xd.b718c066b352e21p+7L;						//cutoff for overflow condition = 1755.54...  
 366static const long double MinimumXL   = 1.0022L * LDBL_MIN;					//
 367static const long double epsL        = 0.9998L * LDBL_EPSILON;
 368
 369
 370long double tgammal( long double x )
 371{
 372      register int n, parity, i;
 373      register long double y, y1, result, fact, fraction, z, numerator, denominator, ysquared, sum; 
 374    
 375	
 376/*******************************************************************************
 377*     The next switch will decipher what sort of argument we have. If argument *
 378*     is SNaN then a QNaN has to be returned and the invalid flag signaled.    * 
 379*******************************************************************************/
 380
 381	if( x != x )
 382		return x + x;		//silence NaN
 383	
 384	if( x == 0.0 )
 385		return LogSqrt2piL / x;
 386	
 387	if( __builtin_fabsl(x) == __builtin_infl() )
 388	{
 389		if( x < 0.0L )
 390		{
 391			SET_INVALID_FLAG();
 392			return __builtin_nanl( GAMMA_NAN ); 
 393		}
 394	
 395		return x;
 396	}
 397
 398      
 399	parity = 0;
 400	fact = 1.0L;
 401	n = 0;
 402	y = x;
 403
 404/*******************************************************************************
 405*     The argument is negative.                                                *
 406*******************************************************************************/
 407
 408	if ( y <= 0.0L )
 409	{
 410		y = - x;
 411		if ( y < MinimumXL )
 412			return 1.0L / x;
 413
 414		y1 = truncl( y );
 415		fraction = y - y1;
 416		if ( fraction != 0.0L )             /* is it an integer?   */
 417	    {                                   /* is it odd or even?  */
 418			if ( y1 != truncl ( y1 * 0.5L ) * 2.0L ) 
 419				parity = 1;
 420			fact = - piL / sinl ( piL * fraction );
 421			y += 1.0L;
 422		}
 423        else
 424        {
 425			volatile long double err = __builtin_nanl( GAMMA_NAN );
 426			return err + (int) err;
 427		}
 428	}
 429
 430/*******************************************************************************
 431*     The argument is positive.                                                *
 432*******************************************************************************/
 433
 434	if ( y < epsL )                         /* argument is less than epsilon. */
 435	{
 436		result = 1.0L / y;
 437	}
 438	else if ( y < 12.0L )                 /* argument x is eps < x < 12.0.  */
 439	{
 440		y1 = y;
 441		if ( y < 1.0L )                 /* x is in (eps, 1.0).            */
 442		{
 443			z = y;
 444			y += 1.0L;
 445		}
 446		else                           /* x is in [1.0,12.0].            */
 447		{
 448			n = ( int ) y - 1;
 449			y -= ( long double ) n;
 450			z = y - 1.0L;
 451		}
 452		numerator = 0.0L;
 453		denominator = 1.0L;
 454		for ( i = 0; i < 8; i++ )
 455		{
 456			numerator = ( numerator + pL[i] ) * z;
 457			denominator = denominator * z + qL[i];
 458		}
 459		result = numerator / denominator + 1.0L;
 460		if ( y1 < y )
 461			result /= y1;
 462		else if ( y1 > y )
 463		{
 464			for ( i = 0; i < n; i++ )
 465			{
 466				result *= y;
 467				y += 1.0L;
 468			}
 469		}
 470	}
 471	else
 472	{
 473		if ( x <= xbigL )
 474		{
 475			ysquared = y * y;
 476			sum = cL[6];
 477			for ( i = 5; i >= 0; i-- )
 478				sum = sum / ysquared + cL[i];
 479			sum = sum / y - y + LogSqrt2piL;
 480			sum += ( y - 0.5L ) * logl( y );
 481			result = expl ( sum );
 482		}
 483		else
 484			return x * 0x1.0p16383L;			//set overflow, return inf
 485	}
 486	
 487	if ( parity ) 
 488		result = - result;
 489	if ( fact != 1.0L ) 
 490		result = fact / result;
 491
 492	return result;
 493}
 494
 495#pragma mark -
 496
 497/*******************************************************************************
 498*     Coefficients for P1 in lgamma approximation over [0.5,1.5) in decreasing *
 499*     order.                                                                   *
 500*******************************************************************************/
 501
 502static const double d1 = -5.772156649015328605195174e-1;
 503
 504static const double p1[8] = { 4.945235359296727046734888e+0,
 505                        2.018112620856775083915565e+2,
 506                        2.290838373831346393026739e+3,
 507                        1.131967205903380828685045e+4,
 508                        2.855724635671635335736389e+4,
 509                        3.848496228443793359990269e+4,
 510                        2.637748787624195437963534e+4,
 511                        7.225813979700288197698961e+3 };
 512                       
 513/*******************************************************************************
 514*     Coefficients for Q1 in gamma approximation over [0.5,1.5) in decreasing  *
 515*     order.                                                                   *
 516*******************************************************************************/
 517
 518static const double q1[8] = { 6.748212550303777196073036e+1,
 519                        1.113332393857199323513008e+3,
 520                        7.738757056935398733233834e+3,
 521                        2.763987074403340708898585e+4,
 522                        5.499310206226157329794414e+4,
 523                        6.161122180066002127833352e+4,
 524                        3.635127591501940507276287e+4,
 525                        8.785536302431013170870835e+3 };
 526                        
 527/*******************************************************************************
 528*     Coefficients for P2 in lgamma approximation over [1.5,4) in decreasing   *
 529*     order.                                                                   *
 530*******************************************************************************/
 531
 532static const double d2 = 4.227843350984671393993777e-1;
 533
 534static const double p2[8] = { 4.974607845568932035012064e+0,
 535                        5.424138599891070494101986e+2,
 536                        1.550693864978364947665077e+4,
 537                        1.847932904445632425417223e+5,
 538                        1.088204769468828767498470e+6,
 539                        3.338152967987029735917223e+6,
 540                        5.106661678927352456275255e+6,
 541                        3.074109054850539556250927e+6 };
 542
 543/*******************************************************************************
 544*     Coefficients for Q2 in gamma approximation over [1.5,4) in decreasing    *
 545*     order.                                                                   *
 546*******************************************************************************/
 547
 548static const double q2[8] = { 1.830328399370592604055942e+2,
 549                        7.765049321445005871323047e+3,
 550                        1.331903827966074194402448e+5,
 551                        1.136705821321969608938755e+6,
 552                        5.267964117437946917577538e+6,
 553                        1.346701454311101692290052e+7,
 554                        1.782736530353274213975932e+7,
 555                        9.533095591844353613395747e+6 };
 556
 557/*******************************************************************************
 558*     Coefficients for P4 in lgamma approximation over [4,12) in decreasing    *
 559*     order.                                                                   *
 560*******************************************************************************/
 561
 562static const double d4 = 1.791759469228055000094023e+0;
 563
 564static const double p4[8] = { 1.474502166059939948905062e+04,
 565                        2.426813369486704502836312e+06,
 566                        1.214755574045093227939592e+08,
 567                        2.663432449630976949898078e+09,
 568                        2.940378956634553899906876e+10,
 569                        1.702665737765398868392998e+11,
 570                        4.926125793377430887588120e+11,
 571                        5.606251856223951465078242e+11 };
 572
 573/*******************************************************************************
 574*     Coefficients for Q4 in gamma approximation over [4,12) in decreasing     *
 575*     order.                                                                   *
 576*******************************************************************************/
 577
 578static const double q4[8] = { 2.690530175870899333379843e+03,
 579                        6.393885654300092398984238e+05,
 580                        4.135599930241388052042842e+07,
 581                        1.120872109616147941376570e+09,
 582                        1.488613728678813811542398e+10,
 583                        1.016803586272438228077304e+11,
 584                        3.417476345507377132798597e+11,
 585                        4.463158187419713286462081e+11 };
 586                        
 587/*******************************************************************************
 588*     Coefficients for minimax approximation over [12, xbig].                  *
 589*******************************************************************************/
 590
 591static const double cc[7] = { -1.910444077728e-03,
 592                        8.4171387781295e-04,
 593                       -5.952379913043012e-04,
 594                        7.93650793500350248e-04,
 595                       -2.777777777777681622553e-03,
 596                        8.333333333333333331554247e-02,
 597                        5.7083835261e-03 };
 598
 599static const double xbigger       = 2.55e+305;
 600static const double Root4xbig  = 2.25e+76;
 601static const double pnt68      = 0.6796875e+0;
 602
 603static const double twoTo52      = 0x1.0p+52; // 4503599627370496.0;
 604
 605/*******************************************************************************
 606*            Value of special function NaN.                                    *
 607*******************************************************************************/
 608
 609#define      SET_INVALID    0x01000000
 610#define      GAMMA_NAN      "42"
 611
 612/* Note: The use of signgam is not thread safe */
 613/* The value of signgam is not meaningful if the result is NaN, but will be 1 */
 614int signgam; /* global return value by lgamma of the sign of gamma(x). */
 615
 616static inline double lgammaApprox ( double x, int *psigngam );
 617static inline double lgammaApprox ( double x, int *psigngam )
 618{
 619      register int i;
 620      register double y, result, numerator, denominator, ysquared, 
 621                      corrector, xMinus1, xMinus2, xMinus4; 
 622      
 623	  *psigngam = 1; 
 624      
 625/*******************************************************************************
 626*     The next switch will decipher what sort of argument we have. If argument *
 627*     is SNaN then a QNaN has to be returned and the invalid flag signaled.    * 
 628*******************************************************************************/
 629	
 630	if( x != x )
 631		return x + x;
 632		
 633	if( x == 0.0 )
 634		return 1.0 / __builtin_fabs( x );			//set div/0 return inf
 635
 636	if( __builtin_fabs( x ) == __builtin_inf() )
 637		return __builtin_fabs( x );
 638      
 639/*******************************************************************************
 640 *      For negative x, since (G is gamma function)
 641 *		-x*G(-x)*G(x) = pi/sin(pi*x),
 642 * 	we have
 643 * 		G(x) = pi/(sin(pi*x)*(-x)*G(-x))
 644 *	since G(-x) is positive, sign(G(x)) = sign(sin(pi*x)) for x<0
 645 *	Hence, for x<0, signgam = sign(sin(pi*x)) and
 646 *		lgamma(x) = log(|Gamma(x)|)
 647 *			  = log(pi/(|x*sin(pi*x)|)) - lgamma(-x);
 648 *******************************************************************************/
 649
 650	if ( x < 0.0 )
 651	{
 652		int dummy = 1; 	
 653		register double a, y1, fraction;
 654            
 655		if ( x <= -twoTo52 ) // big negative integer?
 656			return x / -0.0;
 657                
 658		y = - x;
 659		y1 = trunc ( y );
 660		fraction = y - y1; // excess over the boundary
 661            
 662		if ( fraction == 0.0 ) // negative integer?
 663			return x / -0.0;
 664		else {
 665			a = sin ( pi * fraction );
 666			if ( y1 == trunc ( y1 * 0.5 ) * 2.0 ) // 0, 2, 4, ...
 667				{
 668				*psigngam = -1; /* Gamma(x) < 0 */ 
 669				} // otherwise leave psigngam = 1
 670            }
 671			
 672		return log ( pi / __builtin_fabs ( a * x ) ) - lgammaApprox ( -x, &dummy );
 673	}
 674      
 675/*******************************************************************************
 676*     The argument is positive, if it is bigger than xbigger = 2.55e+305 then     *
 677*     overflow.                                                                *
 678*******************************************************************************/
 679
 680	if ( x > xbigger )
 681		return x * 0x1.0p1023;	//return inf, set overflow
 682
 683	y = x;
 684
 685/*******************************************************************************
 686*     x <= eps then return the approximation log(x).                           *
 687*******************************************************************************/
 688
 689	if ( y <= eps )
 690		return ( - log ( y ) );
 691
 692/*******************************************************************************
 693*     x is in (eps,1.5] then use d1, p1 and q1 coefficients.                   *
 694*******************************************************************************/
 695
 696	else if ( y <= 1.5 )
 697	{
 698		if ( y < pnt68 )
 699		{
 700			corrector = - log ( y );
 701			xMinus1 = y;
 702		}
 703		else
 704	    {
 705			corrector = 0.0;
 706			xMinus1 = ( y - 0.5 ) - 0.5;
 707		}
 708		
 709		if ( ( y <= 0.5 ) || ( y >= pnt68 ) )
 710		{
 711			denominator = 1.0;
 712			numerator = 0.0;
 713			for ( i = 0; i < 8; i++ )
 714			{
 715				numerator = numerator * xMinus1 + p1[i];
 716				denominator = denominator * xMinus1 + q1[i];
 717			}
 718			result = corrector + ( xMinus1 * ( d1 + xMinus1 * ( numerator / denominator ) ) );
 719		}
 720		else
 721		{
 722			xMinus2 = ( y - 0.5 ) - 0.5;
 723			denominator = 1.0;
 724			numerator = 0.0;
 725			for ( i = 0; i < 8; i++ )
 726			{
 727				numerator = numerator * xMinus2 + p2[i];
 728				denominator = denominator * xMinus2 + q2[i];
 729			}
 730			result = corrector + ( xMinus2 * ( d2 + xMinus2 * ( numerator / denominator ) ) );
 731		}
 732	}
 733
 734/*******************************************************************************
 735*     x is in (1.5,4.0] then use d2, p2 and q2 coefficients.                   *
 736*******************************************************************************/
 737
 738	else if ( y <= 4.0 )
 739	{
 740		xMinus2 = y - 2.0;
 741		denominator = 1.0;
 742		numerator = 0.0;
 743		for ( i = 0; i < 8; i++ )
 744		{
 745			numerator = numerator * xMinus2 + p2[i];
 746			denominator = denominator * xMinus2 + q2[i];
 747		}
 748		result = xMinus2 * ( d2 + xMinus2 * ( numerator / denominator ) );
 749	}
 750            
 751/*******************************************************************************
 752*     x is in (4.0,12.0] then use d4, p4 and q4 coefficients.                  *
 753*******************************************************************************/
 754
 755	else if ( y <= 12.0 )
 756	{
 757		xMinus4 = y - 4.0;
 758		denominator = - 1.0;
 759		numerator = 0.0;
 760		for ( i = 0; i < 8; i++ )
 761		{
 762			numerator = numerator * xMinus4 + p4[i];
 763			denominator = denominator * xMinus4 + q4[i];
 764		}
 765		result = d4 + xMinus4 * ( numerator / denominator );
 766	}
 767	else  /* ( y >= 12.0 ) */
 768	{
 769		result = 0.0;
 770		if ( y <= Root4xbig )
 771		{
 772			result = cc[6];
 773			ysquared = y * y;
 774			for ( i = 0; i < 6; i++ )
 775				result = result / ysquared + cc[i];
 776		}
 777		result /= y;
 778		corrector = log ( y );
 779		result += LogSqrt2pi - 0.5 * corrector;
 780		result += y * ( corrector - 1.0 );
 781	}
 782      
 783	x = rint ( x ); // INEXACT set as a side effect for non integer x  
 784	return result;
 785}
 786      
 787double lgamma ( double x ) //sets signgam as side effect
 788{
 789    return lgammaApprox ( x, &signgam );
 790}
 791
 792double lgamma_r( double , int * );
 793double lgamma_r( double x , int *psigngam ) // threadsafe.
 794{
 795	return lgammaApprox(x, psigngam);
 796}
 797
 798float lgammaf( float x ) //sets signgam as side effect
 799{
 800    return (float) lgammaApprox ( x, &signgam );
 801}
 802
 803float lgammaf_r( float , int * );
 804float lgammaf_r( float x , int *psigngam ) // threadsafe.
 805{
 806	double lg = lgammaApprox((double)x, psigngam);
 807	return (float)lg;
 808}
 809
 810/*******************************************************************************
 811*     Coefficients for P1 in lgamma approximation over [0.5,1.5) in decreasing *
 812*     order.                                                                   *
 813*******************************************************************************/
 814
 815static const long double d1L = -5.772156649015328605195174e-1L;
 816
 817static const long double p1L[8] = { 4.945235359296727046734888e+0L,
 818									2.018112620856775083915565e+2L,
 819									2.290838373831346393026739e+3L,
 820									1.131967205903380828685045e+4L,
 821									2.855724635671635335736389e+4L,
 822									3.848496228443793359990269e+4L,
 823									2.637748787624195437963534e+4L,
 824									7.225813979700288197698961e+3L };
 825                       
 826/*******************************************************************************
 827*     Coefficients for Q1 in gamma approximation over [0.5,1.5) in decreasing  *
 828*     order.                                                                   *
 829*******************************************************************************/
 830
 831static const long double q1L[8] = { 6.748212550303777196073036e+1L,
 832									1.113332393857199323513008e+3L,
 833									7.738757056935398733233834e+3L,
 834									2.763987074403340708898585e+4L,
 835									5.499310206226157329794414e+4L,
 836									6.161122180066002127833352e+4L,
 837									3.635127591501940507276287e+4L,
 838									8.785536302431013170870835e+3L };
 839                        
 840/*******************************************************************************
 841*     Coefficients for P2 in lgamma approximation over [1.5,4) in decreasing   *
 842*     order.                                                                   *
 843*******************************************************************************/
 844
 845static const long double d2L = 4.227843350984671393993777e-1L;
 846
 847static const long double p2L[8] = { 4.974607845568932035012064e+0L,
 848									5.424138599891070494101986e+2L,
 849									1.550693864978364947665077e+4L,
 850									1.847932904445632425417223e+5L,
 851									1.088204769468828767498470e+6L,
 852									3.338152967987029735917223e+6L,
 853									5.106661678927352456275255e+6L,
 854									3.074109054850539556250927e+6L };
 855
 856/*******************************************************************************
 857*     Coefficients for Q2 in gamma approximation over [1.5,4) in decreasing    *
 858*     order.                                                                   *
 859*******************************************************************************/
 860
 861static const long double q2L[8] = {	1.830328399370592604055942e+2L,
 862									7.765049321445005871323047e+3L,
 863									1.331903827966074194402448e+5L,
 864									1.136705821321969608938755e+6L,
 865									5.267964117437946917577538e+6L,
 866									1.346701454311101692290052e+7L,
 867									1.782736530353274213975932e+7L,
 868									9.533095591844353613395747e+6L };
 869
 870/*******************************************************************************
 871*     Coefficients for P4 in lgamma approximation over [4,12) in decreasing    *
 872*     order.                                                                   *
 873*******************************************************************************/
 874
 875static const long double d4L = 1.791759469228055000094023e+0L;
 876
 877static const long double p4L[8] = { 1.474502166059939948905062e+04L,
 878									2.426813369486704502836312e+06L,
 879									1.214755574045093227939592e+08L,
 880									2.663432449630976949898078e+09L,
 881									2.940378956634553899906876e+10L,
 882									1.702665737765398868392998e+11L,
 883									4.926125793377430887588120e+11L,
 884									5.606251856223951465078242e+11L };
 885
 886/*******************************************************************************
 887*     Coefficients for Q4 in gamma approximation over [4,12) in decreasing     *
 888*     order.                                                                   *
 889*******************************************************************************/
 890
 891static const long double q4L[8] = { 2.690530175870899333379843e+03L,
 892									6.393885654300092398984238e+05L,
 893									4.135599930241388052042842e+07L,
 894									1.120872109616147941376570e+09L,
 895									1.488613728678813811542398e+10L,
 896									1.016803586272438228077304e+11L,
 897									3.417476345507377132798597e+11L,
 898									4.463158187419713286462081e+11L };
 899                        
 900/*******************************************************************************
 901*     Coefficients for minimax approximation over [12, xbig].                  *
 902*******************************************************************************/
 903
 904static const long double ccL[7] = { -1.910444077728e-03L,
 905									8.4171387781295e-04L,
 906								   -5.952379913043012e-04L,
 907									7.93650793500350248e-04L,
 908								   -2.777777777777681622553e-03L,
 909									8.333333333333333331554247e-02L,
 910									5.7083835261e-03L };
 911
 912static const long double xbiggerL		= 2.55e+305L;		// ????
 913static const long double Root4xbigL		= 2.25e+76L;		// ????  //pow( xbiggerL, 0.25 )
 914static const long double pnt68L			= 0.6796875e+0L;	
 915
 916static const long double twoTo63		= 0x1.0p+63L; // 4503599627370496.0;
 917
 918
 919static inline long double lgammaApproxL ( long double x, int *psigngam );
 920static inline long double lgammaApproxL ( long double x, int *psigngam )
 921{
 922      register int i;
 923      register long double y, result, numerator, denominator, ysquared, 
 924                      corrector, xMinus1, xMinus2, xMinus4; 
 925      
 926	  *psigngam = 1; 
 927      
 928/*******************************************************************************
 929*     The next switch will decipher what sort of argument we have. If argument *
 930*     is SNaN then a QNaN has to be returned and the invalid flag signaled.    * 
 931*******************************************************************************/
 932	
 933	if( x != x )
 934		return x + x;
 935		
 936	if( x == 0.0L )
 937		return 1.0L / __builtin_fabsl( x );			//set div/0 return inf
 938
 939	if( __builtin_fabsl( x ) == __builtin_infl() )
 940		return __builtin_fabsl( x );
 941      
 942/*******************************************************************************
 943 *      For negative x, since (G is gamma function)
 944 *		-x*G(-x)*G(x) = pi/sin(pi*x),
 945 * 	we have
 946 * 		G(x) = pi/(sin(pi*x)*(-x)*G(-x))
 947 *	since G(-x) is positive, sign(G(x)) = sign(sin(pi*x)) for x<0
 948 *	Hence, for x<0, signgam = sign(sin(pi*x)) and
 949 *		lgamma(x) = log(|Gamma(x)|)
 950 *			  = log(pi/(|x*sin(pi*x)|)) - lgamma(-x);
 951 *******************************************************************************/
 952
 953	if ( x < 0.0L )
 954	{
 955		int dummy = 1; 	
 956		register long double a, y1, fraction;
 957            
 958		if ( x <= -twoTo63 ) // big negative integer?
 959			return x / -0.0L;
 960                
 961		y = - x;
 962		y1 = truncl( y );
 963		fraction = y - y1; // excess over the boundary
 964            
 965		if ( fraction == 0.0L ) // negative integer?
 966			return x / -0.0L;
 967		else {
 968			a = sinl ( pi * fraction );
 969			if ( y1 == truncl ( y1 * 0.5 ) * 2.0 ) // 0, 2, 4, ...
 970				{
 971				*psigngam = -1; /* Gamma(x) < 0 */ 
 972				} // otherwise leave psigngam = 1
 973            }
 974			
 975		return logl ( pi / __builtin_fabsl ( a * x ) ) - lgammaApproxL ( -x, &dummy );
 976	}
 977      
 978/*******************************************************************************
 979*     The argument is positive, if it is bigger than xbigger = 2.55e+305 then     *
 980*     overflow.                                                                *
 981*******************************************************************************/
 982
 983	if ( x > xbiggerL )
 984		return x * 0x1.0p16383L;	//return inf, set overflow
 985
 986	y = x;
 987
 988/*******************************************************************************
 989*     x <= eps then return the approximation log(x).                           *
 990*******************************************************************************/
 991
 992	if ( y <= epsL )
 993		return ( - logl ( y ) );
 994
 995/*******************************************************************************
 996*     x is in (eps,1.5] then use d1, p1 and q1 coefficients.                   *
 997*******************************************************************************/
 998
 999	else if ( y <= 1.5L )
1000	{
1001		if ( y < pnt68L )
1002		{
1003			corrector = - logl ( y );
1004			xMinus1 = y;
1005		}
1006		else
1007	    {
1008			corrector = 0.0L;
1009			xMinus1 = ( y - 0.5L ) - 0.5L;
1010		}
1011		
1012		if ( ( y <= 0.5 ) || ( y >= pnt68L ) )
1013		{
1014			denominator = 1.0L;
1015			numerator = 0.0L;
1016			for ( i = 0; i < 8; i++ )
1017			{
1018				numerator = numerator * xMinus1 + p1L[i];
1019				denominator = denominator * xMinus1 + q1L[i];
1020			}
1021			result = corrector + ( xMinus1 * ( d1L + xMinus1 * ( numerator / denominator ) ) );
1022		}
1023		else
1024		{
1025			xMinus2 = ( y - 0.5L ) - 0.5L;
1026			denominator = 1.0L;
1027			numerator = 0.0L;
1028			for ( i = 0; i < 8; i++ )
1029			{
1030				numerator = numerator * xMinus2 + p2L[i];
1031				denominator = denominator * xMinus2 + q2L[i];
1032			}
1033			result = corrector + ( xMinus2 * ( d2L + xMinus2 * ( numerator / denominator ) ) );
1034		}
1035	}
1036
1037/*******************************************************************************
1038*     x is in (1.5,4.0] then use d2, p2 and q2 coefficients.                   *
1039*******************************************************************************/
1040
1041	else if ( y <= 4.0L )
1042	{
1043		xMinus2 = y - 2.0L;
1044		denominator = 1.0L;
1045		numerator = 0.0L;
1046		for ( i = 0; i < 8; i++ )
1047		{
1048			numerator = numerator * xMinus2 + p2L[i];
1049			denominator = denominator * xMinus2 + q2L[i];
1050		}
1051		result = xMinus2 * ( d2L + xMinus2 * ( numerator / denominator ) );
1052	}
1053            
1054/*******************************************************************************
1055*     x is in (4.0,12.0] then use d4, p4 and q4 coefficients.                  *
1056*******************************************************************************/
1057
1058	else if ( y <= 12.0L )
1059	{
1060		xMinus4 = y - 4.0L;
1061		denominator = - 1.0L;
1062		numerator = 0.0L;
1063		for ( i = 0; i < 8; i++ )
1064		{
1065			numerator = numerator * xMinus4 + p4L[i];
1066			denominator = denominator * xMinus4 + q4L[i];
1067		}
1068		result = d4L + xMinus4 * ( numerator / denominator );
1069	}
1070	else  /* ( y >= 12.0 ) */
1071	{
1072		result = 0.0L;
1073		if ( y <= Root4xbigL )
1074		{
1075			result = ccL[6];
1076			ysquared = y * y;
1077			for ( i = 0; i < 6; i++ )
1078				result = result / ysquared + ccL[i];
1079		}
1080		result /= y;
1081		corrector = logl( y );
1082		result += LogSqrt2piL - 0.5L * corrector;
1083		result += y * ( corrector - 1.0L );
1084	}
1085      
1086	x = rintl ( x ); // INEXACT set as a side effect for non integer x  
1087	return result;
1088}
1089
1090long double lgammal ( long double x ) //sets signgam as side effect
1091{
1092    return lgammaApproxL ( x, &signgam );
1093}
1094
1095long double lgammal_r ( long double, int * );
1096long double lgammal_r ( long double x, int *psigngam )
1097{
1098	return lgammaApproxL ( x, psigngam );
1099}
1100
1101#pragma mark -
1102
1103/*******************************************************************************
1104*     Coefficients for approximation to erf in [ -0.46875, 0.46875] in         *
1105*     decreasing order.                                                        *
1106*******************************************************************************/
1107
1108static const double a[5] = { 3.16112374387056560e+0,
1109                       1.13864154151050156e+2,
1110                       3.77485237685302021e+2,
1111                       3.20937758913846947e+3,
1112                       1.85777706184603153e-1 };
1113                       
1114static const double b[4] = { 2.36012909523441209e+1,
1115                       2.44024637934444173e+2,
1116                       1.28261652607737228e+3,
1117                       2.84423683343917062e+3 };
1118     
1119/*******************************************************************************
1120*     Coefficients for approximation to erfc in [-4.0,-0.46875)U(0.46875,4.0]  *
1121*     in decreasing order.                                                     *
1122*******************************************************************************/
1123
1124static const double ccc[9] = { 5.64188496988670089e-1,
1125                       8.88314979438837594e+0,
1126                       6.61191906371416295e+1,
1127                       2.98635138197400131e+2,
1128                       8.81952221241769090e+2,
1129                       1.71204761263407058e+3,
1130                       2.05107837782607147e+3,
1131                       1.23033935479799725e+3,
1132                       2.15311535474403846e-8 };
1133
1134static const double d[8] = { 1.57449261107098347e+1,
1135                       1.17693950891312499e+2,
1136                       5.37181101862009858e+2,
1137                       1.62138957456669019e+3,
1138                       3.29079923573345963e+3,
1139                       4.36261909014324716e+3,
1140                       3.43936767414372164e+3,
1141                       1.23033935480374942e+3 };
1142                       
1143/*******************************************************************************
1144*    Coefficients for approximation to  erfc in [-inf,-4.0)U(4.0,inf] in       *
1145*    decreasing order.                                                         *
1146*******************************************************************************/
1147
1148static const double pp[6] = { 3.05326634961232344e-1,
1149                       3.60344899949804439e-1,
1150                       1.25781726111229246e-1,
1151                       1.60837851487422766e-2,
1152                       6.58749161529837803e-4,
1153                       1.63153871373020978e-2 };
1154
1155static const double qq[5] = { 2.56852019228982242e+0,
1156                       1.87295284992346047e+0,
1157                       5.27905102951428412e-1,
1158                       6.05183413124413191e-2,
1159                       2.33520497626869185e-3 };
1160
1161static const double InvSqrtPI = 5.6418958354775628695e-1;
1162static const double xxbig      = 27.2e+0;		
1163static const double Maximum   = 2.53e+307;
1164static const double _HUGE      = 6.71e+7;
1165
1166static inline double ErrFunApprox ( double arg, double result, int which ) ALWAYS_INLINE;
1167
1168/*******************************************************************************
1169*              E   R   R   O   R      F   U   N   C   T   I   O   N            *
1170*******************************************************************************/
1171
1172double erf ( double x )
1173{
1174	register int which = 0;
1175	register double result = 0.0;
1176      
1177/*******************************************************************************
1178*     The next switch will decipher what sort of argument we have. If argument *
1179*     is SNaN then a QNaN has to be returned and the invalid flag signaled.    * 
1180*******************************************************************************/
1181
1182	if( x != x )
1183		return x + x;
1184
1185	if( x == 0.0 )
1186		return x;
1187
1188	if( __builtin_fabs(x) == __builtin_inf() )
1189		return x > 0.0 ? 1.0 : -1.0;
1190
1191      result = 1.0;
1192      result = ErrFunApprox ( x, result, which );
1193
1194/*******************************************************************************
1195*      Take care of the negative argument.                                     *
1196*******************************************************************************/
1197
1198	return x < 0 ? -result : result;
1199}
1200
1201float erff( float x )
1202{
1203	register int which = 0;
1204	register float result = 0.0f;
1205      
1206/*******************************************************************************
1207*     The next switch will decipher what sort of argument we have. If argument *
1208*     is SNaN then a QNaN has to be returned and the invalid flag signaled.    * 
1209*******************************************************************************/
1210
1211	if( x != x )
1212		return x + x;
1213
1214	if( x == 0.0f )
1215		return x;
1216
1217	if( __builtin_fabsf(x) == __builtin_inff() )
1218		return x > 0.0f ? 1.0f : -1.0f;
1219
1220      result = 1.0f;
1221      result = (float) ErrFunApprox ( x, result, which );
1222
1223/*******************************************************************************
1224*      Take care of the negative argument.                                     *
1225*******************************************************************************/
1226
1227	return x < 0 ? -result : result;
1228}
1229
1230/*******************************************************************************
1231*        C O M P L E M E N T A R Y    E R R O R    F U N C T I O N             *
1232*******************************************************************************/
1233
1234double erfc ( double x )
1235{
1236	register int which = 1;
1237	register double result = 0.0;
1238      
1239      
1240/*******************************************************************************
1241*     The next switch will decipher what sort of argument we have. If argument *
1242*     is SNaN then a QNaN has to be returned and the invalid flag signaled.    * 
1243*******************************************************************************/
1244
1245	if( x != x )
1246		return x + x;			//silence NaNs
1247		
1248	if( x == 0.0 )
1249		return 1.0;
1250	
1251	if( __builtin_fabs(x) == __builtin_inf() )
1252		return x > 0.0 ? 0.0 : 2.0;
1253
1254	
1255	result = 0.0;
1256	result = ErrFunApprox ( x, result, which );
1257
1258/*******************************************************************************
1259*      Take care of the negative argument.                                     *
1260*******************************************************************************/
1261
1262	if ( x < 0.0 )
1263		result = 2.0 - result;
1264      
1265	return ( result );
1266}
1267
1268
1269float erfcf ( float x )
1270{
1271	register int which = 1;
1272	register float result = 0.0f;
1273      
1274      
1275/*******************************************************************************
1276*     The next switch will decipher what sort of argument we have. If argument *
1277*     is SNaN then a QNaN has to be returned and the invalid flag signaled.    * 
1278*******************************************************************************/
1279
1280	if( x != x )
1281		return x + x;			//silence NaNs
1282		
1283	if( x == 0.0f )
1284		return 1.0f;
1285	
1286	if( __builtin_fabsf(x) == __builtin_inff() )
1287		return x > 0.0f ? 0.0f : 2.0f;
1288
1289	
1290	result = 0.0f;
1291	result = (float) ErrFunApprox ( x, result, which );
1292
1293/*******************************************************************************
1294*      Take care of the negative argument.                                     *
1295*******************************************************************************/
1296
1297	if ( x < 0.0f )
1298		result = 2.0f - result;
1299      
1300	return ( result );
1301}
1302
1303
1304/*******************************************************************************
1305*            C  O  R  E    A  P  P  R  O  X  I  M  A  T  I  O  N               *
1306*******************************************************************************/
1307
1308static inline double ErrFunApprox ( double arg, double result, int which )
1309{
1310	register int i;
1311	register double x, y, ysquared, numerator, denominator, del; 
1312
1313	x = arg;
1314	y = __builtin_fabs ( x );
1315
1316/*******************************************************************************
1317*      Evaluate  erfc  for |x| <= 0.46875.                                     *
1318*******************************************************************************/
1319
1320	if ( y <= 0.46875e+0 )
1321	{
1322		ysquared = 0.0;
1323		if ( y > 1.11e-16 )
1324			ysquared = y * y;
1325		numerator = a[4] * ysquared;
1326		denominator = ysquared;
1327		for ( i = 0; i < 3; i++ )
1328		{
1329			numerator = ( numerator + a[i] ) * ysquared;
1330			denominator = ( denominator + b[i] ) * ysquared;
1331		}
1332		result = y * ( numerator + a[3] ) / ( denominator + b[3] );
1333		if ( which )
1334			result = 1.0 - result;
1335		return result;
1336	}
1337
1338/*******************************************************************************
1339*      Evaluate  erfc  for 0.46875 < |x| <= 4.0                                *
1340*******************************************************************************/
1341      
1342	else if ( y <= 4.0 )
1343	{
1344		numerator = ccc[8] * y;
1345		denominator = y;
1346		for ( i = 0; i < 7; i++ )
1347		{
1348			numerator = ( numerator + ccc[i] ) * y;
1349			denominator = ( denominator + d[i] ) * y;
1350		}
1351		result = ( numerator + ccc[7] ) / ( denominator + d[7] );
1352		ysquared = trunc ( y * 16.0 ) / 16.0;
1353		del = ( y - ysquared ) * ( y + ysquared );
1354		result = exp ( - ysquared * ysquared ) * exp ( - del ) * result;
1355	}
1356
1357/*******************************************************************************
1358*      Evaluate  erfc  for |x| > 4.0                                           *
1359*******************************************************************************/
1360      
1361	else
1362	{
1363		if ( y >= xxbig )
1364		{
1365			if ( ( which != 2 ) || ( y >= Maximum ) )
1366			{
1367				if ( which == 1 )
1368				{
1369					double oldresult = result;
1370					result *= 0x1.0000000000001p-1022;
1371					result *= 0x1.0000000000001p-1022;
1372					result *= 0x1.0000000000001p-1022;			//set underflow
1373					result += oldresult;
1374				}
1375
1376				return result;
1377			}
1378			if ( y >= _HUGE )
1379			{
1380				result = InvSqrtPI / y;
1381				return result;
1382			}
1383		}
1384		ysquared = 1.0 / ( y * y );
1385		numerator = pp[5] * ysquared;
1386		denominator = ysquared;
1387		for ( i = 0; i < 4; i++ )
1388		{
1389			numerator = ( numerator + pp[i] ) * ysquared;
1390			denominator = ( denominator + qq[i] ) * ysquared;
1391		}
1392		result = ysquared * ( numerator + pp[4] ) / ( denominator + qq[4] );
1393		result = ( InvSqrtPI - result ) / y;
1394		ysquared = trunc ( y * 16.0 ) / 16.0;
1395		del = ( y - ysquared ) * ( y + ysquared );
1396		result = exp ( - ysquared * ysquared ) * exp ( - del ) * result;
1397	}
1398      
1399/*******************************************************************************
1400*     if the calling function is erf instead of erfc, take care of the		 *
1401*     underserved underflow.  otherwise, the computation will produce the	 *
1402*	exception for erfc.                                                      *
1403*******************************************************************************/
1404
1405		
1406	return ( which ) ? result : ( 0.5 - result ) + 0.5;
1407}
1408
1409
1410/*******************************************************************************
1411*     Coefficients for approximation to erf in [ -0.46875, 0.46875] in         *
1412*     decreasing order.                                                        *
1413*******************************************************************************/
1414
1415static const long double aL[5] = { 3.16112374387056560e+0L,
1416								   1.13864154151050156e+2L,
1417								   3.77485237685302021e+2L,
1418								   3.20937758913846947e+3L,
1419								   1.85777706184603153e-1L };
1420                       
1421static const long double bL[4] = {  2.36012909523441209e+1L,
1422								   2.44024637934444173e+2L,
1423								   1.28261652607737228e+3L,
1424								   2.84423683343917062e+3L };
1425				 
1426/*******************************************************************************
1427*     Coefficients for approximation to erfc in [-4.0,-0.46875)U(0.46875,4.0]  *
1428*     in decreasing order.                                                     *
1429*******************************************************************************/
1430
1431static const long double cccL[9] = {   5.64188496988670089e-1L,
1432									   8.88314979438837594e+0L,
1433									   6.61191906371416295e+1L,
1434									   2.98635138197400131e+2L,
1435									   8.81952221241769090e+2L,
1436									   1.71204761263407058e+3L,
1437									   2.05107837782607147e+3L,
1438									   1.23033935479799725e+3L,
1439									   2.15311535474403846e-8L };
1440
1441static const long double dL[8] = { 1.57449261107098347e+1L,
1442								   1.17693950891312499e+2L,
1443								   5.37181101862009858e+2L,
1444								   1.62138957456669019e+3L,
1445								   3.29079923573345963e+3L,
1446								   4.36261909014324716e+3L,
1447								   3.43936767414372164e+3L,
1448								   1.23033935480374942e+3L };
1449                       
1450/*******************************************************************************
1451*    Coefficients for approximation to  erfc in [-inf,-4.0)U(4.0,inf] in       *
1452*    decreasing order.                                                         *
1453*******************************************************************************/
1454
1455static const long double ppL[6] = { 3.05326634961232344e-1L,
1456								   3.60344899949804439e-1L,
1457								   1.25781726111229246e-1L,
1458								   1.60837851487422766e-2L,
1459								   6.58749161529837803e-4L,
1460								   1.63153871373020978e-2L };
1461
1462static const long double qqL[5] = { 2.56852019228982242e+0L,
1463								   1.87295284992346047e+0L,
1464								   5.27905102951428412e-1L,
1465								   6.05183413124413191e-2L,
1466								   2.33520497626869185e-3L };
1467
1468static const long double InvSqrtPIL = 5.6418958354775628695e-1L;
1469static const long double xxbigL      = 108.7;						// a bit larger than sqrt( ln( LDBL_MAX) )
1470static const long double MaximumL   = ( 2.53e+307L / DBL_MAX ) * LDBL_MAX;
1471static const long double _HUGEL      = 6.71e+7L;			// This appears to not be used because which is always 0 or 1
1472
1473static inline long double ErrFunApproxL ( long double arg, long double result, int which ) ALWAYS_INLINE;
1474
1475static inline long double ErrFunApproxL ( long double arg, long double result, int which ) 
1476{
1477	register int i;
1478	register long double x, y, ysquared, numerator, denominator, del; 
1479
1480	x = arg;
1481	y = __builtin_fabsl ( x );
1482
1483/*******************************************************************************
1484*      Evaluate  erfc  for |x| <= 0.46875.                                     *
1485*******************************************************************************/
1486
1487	if ( y <= 0.46875e+0L )
1488	{
1489		ysquared = 0.0L;
1490		if ( y > LDBL_EPSILON / 2.0L )
1491			ysquared = y * y;
1492		numerator = aL[4] * ysquared;
1493		denominator = ysquared;
1494		for ( i = 0; i < 3; i++ )
1495		{
1496			numerator = ( numerator + aL[i] ) * ysquared;
1497			denominator = ( denominator + bL[i] ) * ysquared;
1498		}
1499		result = y * ( numerator + aL[3] ) / ( denominator + bL[3] );
1500		if ( which )
1501			result = 1.0L - result;
1502		return result;
1503	}
1504
1505/*******************************************************************************
1506*      Evaluate  erfc  for 0.46875 < |x| <= 4.0                                *
1507*******************************************************************************/
1508      
1509	else if ( y <= 4.0 )
1510	{
1511		numerator = cccL[8] * y;
1512		denominator = y;
1513		for ( i = 0; i < 7; i++ )
1514		{
1515			numerator = ( numerator + cccL[i] ) * y;
1516			denominator = ( denominator + dL[i] ) * y;
1517		}
1518		result = ( numerator + cccL[7] ) / ( denominator + dL[7] );
1519		ysquared = trunc ( y * 16.0L ) / 16.0L;
1520		del = ( y - ysquared ) * ( y + ysquared );
1521		result = expl ( - ysquared * ysquared ) * expl ( - del ) * result;
1522	}
1523
1524/*******************************************************************************
1525*      Evaluate  erfc  for |x| > 4.0                                           *
1526*******************************************************************************/
1527      
1528	else
1529	{
1530		if ( y >= xxbigL )
1531		{
1532			if ( ( which != 2 ) || ( y >= MaximumL ) )
1533			{
1534				if ( which == 1 )
1535				{
1536					long double oldresult = result;
1537					result *= 0x1.0000000000001p-16382L;
1538					result *= 0x1.0000000000001p-16382L;
1539					result *= 0x1.0000000000001p-16382L;			//set underflow
1540					result += oldresult;
1541				}
1542
1543				return result;
1544			}
1545			if ( y >= _HUGEL )
1546			{
1547				result = InvSqrtPIL / y;
1548				return result;
1549			}
1550		}
1551		ysquared = 1.0L / ( y * y );
1552		numerator = ppL[5] * ysquared;
1553		denominator = ysquared;
1554		for ( i = 0; i < 4; i++ )
1555		{
1556			numerator = ( numerator + ppL[i] ) * ysquared;
1557			denominator = ( denominator + qqL[i] ) * ysquared;
1558		}
1559		result = ysquared * ( numerator + ppL[4] ) / ( denominator + qqL[4] );
1560		result = ( InvSqrtPIL - result ) / y;
1561		ysquared = trunc ( y * 16.0L ) / 16.0L;
1562		del = ( y - ysquared ) * ( y + ysquared );
1563		result = expl ( - ysquared * ysquared ) * expl ( - del ) * result;
1564	}
1565      
1566/*******************************************************************************
1567*     if the calling function is erf instead of erfc, take care of the		 *
1568*     underserved underflow.  otherwise, the computation will produce the	 *
1569*	exception for erfc.                                                      *
1570*******************************************************************************/
1571
1572		
1573	return ( which ) ? result : ( 0.5L - result ) + 0.5L;
1574}
1575
1576long double erfl ( long double x )
1577{
1578	register int which = 0;
1579	register long double result = 0.0L;
1580      
1581/*******************************************************************************
1582*     The next switch will decipher what sort of argument we have. If argument *
1583*     is SNaN then a QNaN has to be returned and the invalid flag signaled.    * 
1584*******************************************************************************/
1585
1586	if( x != x )
1587		return x + x;
1588
1589	if( x == 0.0L )
1590		return x;
1591
1592	if( __builtin_fabsl(x) == __builtin_infl() )
1593		return x > 0.0L ? 1.0L : -1.0L;
1594
1595      result = 1.0L;
1596      result = ErrFunApproxL ( x, result, which );
1597
1598/*******************************************************************************
1599*      Take care of the negative argument.                                     *
1600*******************************************************************************/
1601
1602	return x < 0 ? -result : result;
1603}
1604
1605
1606long double erfcl ( long double x )
1607{
1608	register int which = 1;
1609	register long double result = 0.0L;
1610      
1611      
1612/*******************************************************************************
1613*     The next switch will decipher what sort of argument we have. If argument *
1614*     is SNaN then a QNaN has to be returned and the invalid flag signaled.    * 
1615*******************************************************************************/
1616
1617	if( x != x )
1618		return x + x;			//silence NaNs
1619		
1620	if( x == 0.0L )
1621		return 1.0L;
1622	
1623	if( __builtin_fabsl(x) == __builtin_infl() )
1624		return x > 0.0L ? 0.0L : 2.0L;
1625
1626	
1627	result = 0.0L;
1628	result = ErrFunApproxL ( x, result, which );
1629
1630/*******************************************************************************
1631*      Take care of the negative argument.                                     *
1632*******************************************************************************/
1633
1634	if ( x < 0.0L )
1635		result = 2.0L - result;
1636      
1637	return ( result );
1638}
1639
Configure Feed

Configure Feed