/*filename: intpow.c       revision date: 12-12-2013  */

/*"Computing x^n where n is an integer" by Gary Knott and Daniel Kerner */

/* This file contains the routine intpow(x,n) where x is a 64-bit
double floating-point value and n is a 16-bit integer.  intpow
computes the best approximation of x^n and returns this 64-bit
floating-point value.

The code below depends on being linked with appropriate
overflow-exception handling code such as that found in MLAB.
Specifically, when a floating-point arithmetic operation produces
a result whose magnitude is too great to represent as a 64-bit 
floating-point value (also known as a 64-bit 'double',)
we expect the overflow handler to "patch-in" the correctly-signed
largest-magnitude representable value, either MAXPOS or MAXNEG.

[MAXPOS = the positive largest magnitude 64-bit double 
        = (2^1023)*(1+(2^52-1)/2^52) 
        = approx 1.797...E+308 
        = (0 | 11111111110 | 1111...1111) 
              as (sign|biased-exponent|fraction)

 MAXNEG = the negative largest magnitude 64-bit double 
        = -MAXPOS
        = (1 | 11111111110 | 1111...1111)
]

If we have a "hard"-underflow, the floating-point chip acceptably
produces 0 "automatically" as the result, so the underflow exception
is to be "masked-off".

The detailed anatomy of an IEEE-standard 64-bit (double)
floating-point number is as follows.

A 64-bit floating-point value is of the form (g b f) where g is the
sign bit (0 for + and 1 for -,) b is the 11-bit biased-exponent, and f
is the 52-bit fraction-part.  The actual exponent e represented by b
is e= b-1023, unless b= 11111111111 or unless b = 00000000000. (1023
is the "bias".) The biased-exponent b = 00000000000 corresponds to 
e=-1022, the same as b = 00000000001.  Also, the largest biased-
exponent is b = 11111111110, corresponding to e = 1023.

The 64-bit floating-point value (g b f) represents the real value
s*(2^(b-1023))*[1 +f/2^52] when 00000000001 <= b <= 11111111110, where
s = 1 when g=0 and s= -1 when g=1. (Note f is treated as an integer.)
These values are called the normalized floating-point numbers.  The
largest positive normalized number is (2^1023)[2-1/2^52]. The smallest
positive normalized number is 2^-1022.

When b = 0000000000, the 64-bit floating-point value (g b f)
represents the real value s*(2^-1022))*[f/2^52], where s = 1 when g=0
and s= -1 when g=1.  These values are called the denormalized
floating-point numbers.  The largest positive denormalized number is
(2^-1022)[1-1/2^52]. The smallest positive denormalized number is
2^-1074. (Denormalized floating-point numbers have successively lower
precision as they decrease.)

The value 0 is represented with b = 00000000000 and f=0; g may be
either 0 or 1.

When b = 11111111111, the 64-bit floating-point value (g b f)
represents a NaN value (NaN stands for "not a number".)  The NaNs
where f=0 are often called 'infinity' NaNs. The floating-point
arithmetic unit has special behavior associated with NaNs, none of
which we want to use.

------------------

We have the following constants of interest.

   maxpos  = the positive largest magnitude 64-bit double 
           = (2^1023)*(1+(2^52-1)/2^52)) = (2^1023)*(2-2^-52)
           = approx 1.797...E+308.
           = (0 | 11111111110  | 1111...1111)

   maxneg  = the negative largest magnitude 64-bit double
           = -(2^1023)*(1+(2^52-1)/2^52))
           = approx -1.797...E+308.
           = (1 | 11111111110 | 1111...1111)

   minpos  = the positive smallest magnitude 64-bit normalized double
           = 2^(-1022)
           = approx 2.225073858507e-308.
           = (0 | 00000000001 | 0000...0000)

   minneg  = the negative smallest magnitude 64-bit normalized double 
           = -2^(-1022)
           = approx -2.225073858507e-308.
           = (1 | 00000000001 | 0000...0000)

   dminpos = the positive smallest magnitude 64-bit denormal double 
           = 2^(-1022)*2^(-52)
           = approx 4.940656458412465e-324
           = (0 | 00000000000 | 0000...0001)  
 
   dminneg = the negative smallest magnitude 64-bit denormal double
           = -2^(-1022)*2^(-52)
           = approx -4.940656458412465e-324
           = (1 | 00000000000 |  0000...0001)  

   one     = 1.0
           = (2^0)*(1+0)
           = (0 | 01111111111 | 0000...0000)

   meps    = "machine epsilon".  This is the least positive value
             such that 1+meps > 1.
           = 2^-52
           = (0 | 01111001011 | 0000...0000)

   NaN    = (g | 11111111111 |  xxxx...xxxx) (NaN means "not-a-number".)

*/

/****************************************************************
   This file contains the functions:
 (private)     basetwoexp()  double's base-2 exponent
 (private)     pintpow()     positive integer power of a double
 (export)      intpow()      integer power of a double
*****************************************************************/

/* stdcsi.h contains the definitions of the macros EQ, AND, OR,
   LITTLEENDIAN, int32, etc. used herein, as well as all other needed
   definitions and declarations such as the function warning(). */
#include "stdcsi.h"

/* The parity macro defined here is used in intpow().  This macro uses
   the fmod() C-library function: fmod(x,n) = x - iround(x/n)*n where
   iround(a) = floor(a) when a>=0 and iround(a) = ceil(a) when a<0.
   This definition holds for all real x and n.  When n=2, then
   fmod(x,2) = 0 only when x is an even integer.x  */
#define parity(y)   ((fmod(y,2.)==0)?0:1))

/*===================================================================*/
private int16 basetwoexp(double d)
/*--------------------------------------------------------------------
 Compute the (unbiased) base-2 exponent of the 64-bit floating-point
 value d.  The result is the correct integer value (the bias has
 been removed, except if d is a denormal, -1023 will be returned
 instead of the correct smaller value.
 *--------------------------------------------------------------------*/
{int32 x,*lptr,i;
 double z;

 /* Set lptr to the address of the first byte of the 8-byte argument.*/
 lptr = (int32 *)&d;

 /* Set x to the 32-bit field of the 64-bit double that contains the
    biased exponent.  This is either the first 4 bytes if we have a
    big-endian machine, or the second 4 bytes if we have a little-endian
    machine (e.g. an Intel 80x86).*/
#ifdef BIGENDIAN
 /* This is for Motorola 68K and PowerPC chips, among others.*/
 x = *lptr;
#endif /* BIGENDIAN */

#ifdef LITTLEENDIAN
 /* This is for Intel 80x86-based chips, among others.*/
 x = *(lptr+1);
#endif /* LITTLEENDIAN */

 /* Zero all bits in x that are not biased exponent bits to extract
    the 11-bit biased-exponent field.*/
 x = x&0x7ff00000;

 /* Right-justify (shift down) the exponent in the 32-bit field.*/
 x = x>>20;

 /* Subtract the exponent bias 1023 to obtain the true exponent.
    Note, the biased-exponent 00000000000 really corresponds to -1022
    (specifying 2^(-1022)) rather than -1023. (Such a zero-valued
    biased-exponent specifies a "Denormal" value where the implicit
    integer digit is 0!)  Thus, to indicate the biased-exponent
    00000000000 was seen, we return -1023 (!)

    The exponent field 00000000001 represents -1022, 01111111110 =
    represents -1, 01111111111 = represents 0, 10000000000 represents
    1, 10000000001 represents 2, and 11111111110 represents
    1023. (11111111111 represents a NaN.)
*/
 return(x-1023);
}

/*====================================================================*/
private double pintpow(double x, int32 n)
/* double x; base
 * int32 n;  exponent
 */
/*----------------------------------------------------------------------
 Compute x^n by shifting n and accumulating factors of x raised to the
 powers of 2 corresponding to the one-bits in n.  When this routine is
 called, we will have: x != 0, x != 1, and n > 1 with n an integer.
 Note, x^n may overflow if x > 1, or x^n may be a denormal or
 underflow to 0 if x < 1.  As discussed above, this code needs an
 appropriate overflow-handler to work properly.

 Note, the argument n will be the absolute value of an int16 
 in the range [-32768,32767], but an int32 is required to hold 
 abs(-32768).
 ----------------------------------------------------------------------*/
{double r;

  /* Since this function is never called with x = 0, if we return 0, it
     must be due to an underflow to 0.  If we wish, we could instead
     return (s^n)*DMINPOS which is the least-magnitude correctly-signed
     representable non-zero floating-point value, where s = sign(x).
     Returning 0 values accuracy more, but returning the denormal
     (s^n)*DMINPOS considers returning a result of the correct sign
     more important than minimizing the absolute error in the result.
     Both points-of-view are valid.  
  */
 r = 1.0;
 for (;;)
    {if ((n&1) != 0) r *= x;
     n >>= 1; /* n = n >> 1; i.e., n = floor(n/2) */
     if (n EQ 0) return(r);
     x *= x; /* Here x is replaced by one of  x^2, x^4, x^8, .... */
    }
}

/*=======================================================================*/
export double intpow(double x, int16 n)
/* double x; base
 * int16 n;  exponent
 */
/*-----------------------------------------------------------------------
n is an integer and x is a 64-bit double floating-point value.  We
compute x^n and return its 64-bit double value. The magnitude of this
value is either 0, or lies in the range [DMINPOS, MAXPOS] where
DMINPOS is the least representable positive 64-bit double value
(DMINPOS = (2^(-1022))*(2^(-52)) = approx 4.940656458412465e-324,) and
MAXPOS = the largest representable positive 64-bit double value ( =
(2^1023)* (1+(2^52-1)/2^52) = approx 1.797...E+308.) (DMINPOS and
MAXPOS are not reciprocals because our floating-point number system
admits so-called "denormals".)

Except for various special cases, x^n is computed by shifting n and
accumulating a product of factors composed of x raised to the powers
of 2 corresponding to one-bits in n.

As discussed above, this code needs an appropriate overflow-handler
to work.

Let m = -n, a = abs(x), and let s = sign(x) (=1 or 0 or -1). 

We have the following sequential cases: 

   x=0 and n>0: Return 0.

   x=0 and n<=0: Return error-this is 0^0 or 1/(0^m).

   x=1 or n=0: Return 1.

   n=-1: Return 1./x

   n>0: Compute x^n by accumulating products of powers of x.  An
        overflow in x^n results in (s^n)*MAXPOS.  Note when abs(x)<1, x^n
        might be a denormal or underflow to 0.

   a>=1 and n<0: Compute 1/(x^m) where m = -n.  We can only do this if
        a^m < MAXPOS; otherwise we must compute (1/x)^m and accept 0 if
        we underflow. We prefer to compute 1/(x^m) rather than (1/x)^m,
        because the latter has more chance of greater round-off error in
        the result, especially when a is a not-too-large positive
        integer.

   a<1 and n<0: Compute 1/(x^m).  Note, x^m might be a denormal.  If
       so, 1/(x^m) will overflow to MAXPOS or MAXNEG, as appropriate.
       If x^m underflows to 0, then we must generate the
       correctly-signed result, MAXPOS or MAXNEG, explicitly.

--------------------------------------------------------------------*/
{int32 m;
 int16 k;
 double v,a;

#define LOG2MAXPOS   1023.99

 if (x EQ 0.) 
   {if (n > 0) return(0.);
    if (n EQ 0) 
      {warning("Trying to compute 0^0, 1 is returned.");
       return(1.); /* This is a 0^0 error.*/
      }
    warning("Trying to compute 1/(0^m), MAXPOS is returned.");
    return(MAXPOS); /* This is a 1/(0^m) error.*/
   }

 /* Here, x != 0.*/
 if ((x EQ 1.) OR (n EQ 0)) return(1.);
 if (n EQ -1) return(1./x);
 if (n > 0) return(pintpow(x,(int32)n));
 /* When x<1, pintpow(x,n) may be a denormal or underflow to 0. */

 /* Here we have n < 0.  Note, here -32768 <= n <= -2.*/
 a = fabs(x); m = -(int32)n;

 if (a >= 1.) 
    {/* If a^m <= MAXPOS return 1./pintpow(x,m). This is equivalent
        to: if (m*log2(a) <= log2(MAXPOS)).  We must err on the
        conservative side: a^m <= MAXPOS must never be decided to be
        true when it is false, but we may decide a^m > MAXPOS when
        really a^m <= MAXPOS.

        Let a = (2^k)*f where 0 < f < 2.  Then log2(a) = k+log2(f).
        Also log2(f) < 1.  Thus log2(a) < k+1.  Therefore, the test
        m*(k+1) <= log2(MAXPOS) will be appropriate when log2(MAXPOS)
        is greater than or equal to the true log2(MAXPOS) value.*/
      k = basetwoexp(a); /* Here a is a normalized positive double. */

     if ((m*(k+1)) <= LOG2MAXPOS) return(1./pintpow(x,m));
        /* Here the value pintpow(a,m) will never be greater than
           MAXPOS, so computing 1./pintpow(x,m) will never involve an
           overflow. Below, x^m might overflow so we compute
           (1/x)^m. Note pintpow(1./x,m) may be a denormal or 
           underflow to 0.  See the remark in pintpow.*/
     return(pintpow(1./x,m)); 
    }

 /* Here, a<1, the values a and x might be denormals, and pintpow(x,m)
    might be a denormal or 0. See the remark in pintpow. */
 v = pintpow(x,m);
 /* pintpow(x,m) might be denormal, or even underflowed to 0.*/

 if (v EQ 0.) {if (parity(n) EQ 1) return(MAXNEG); return(MAXPOS);}
 return(1./v); 
 /* Note, if v is denormal, 1./v will overflow and MAXPOS or MAXNEG, as
    appropriate, will be returned courtesy of our overflow handler.*/
}
/* end of file intpow.c */