URL https://opencores.org/ocsvn/plasma/plasma/trunk

Subversion Repositories plasma

[/] [plasma/] [tags/] [V3_0/] [kernel/] [math.c] - Blame information for rev 393

Go to most recent revision | Details | Compare with Previous | View Log


/*--------------------------------------------------------------------
 * TITLE: Plasma Floating Point Library
 * AUTHOR: Steve Rhoads (rhoadss@yahoo.com)
 * DATE CREATED: 3/2/06
 * FILENAME: math.c
 * PROJECT: Plasma CPU core
 * COPYRIGHT: Software placed into the public domain by the author.
 *    Software 'as is' without warranty.  Author liable for nothing.
 * DESCRIPTION:
 *    Plasma Floating Point Library
 *--------------------------------------------------------------------
 * IEEE_fp = sign(1) | exponent(8) | fraction(23)
 * cos(x)=1-x^2/2!+x^4/4!-x^6/6!+...
 * exp(x)=1+x+x^2/2!+x^3/3!+...
 * ln(1+x)=x-x^2/2+x^3/3-x^4/4+...
 * atan(x)=x-x^3/3+x^5/5-x^7/7+...
 * pow(x,y)=exp(y*ln(x))
 * x=tan(a+b)=(tan(a)+tan(b))/(1-tan(a)*tan(b))
 * atan(x)=b+atan((x-atan(b))/(1+x*atan(b)))
 * ln(a*x)=ln(a)+ln(x); ln(x^n)=n*ln(x)
 *--------------------------------------------------------------------*/
#include "rtos.h"
 
#define PI ((float)3.1415926)
#define PI_2 ((float)(PI/2.0))
#define PI2 ((float)(PI*2.0))
 
#define FtoL(X) (*(unsigned long*)&(X))
#define LtoF(X) (*(float*)&(X))
 
 
float FP_Neg(float a_fp)
{
   unsigned long a;
   a = FtoL(a_fp);
   a ^= 0x80000000;
   return LtoF(a);
}
 
 
float FP_Add(float a_fp, float b_fp)
{
   unsigned long a, b, c;
   unsigned long as, bs, cs;     //sign
   long ae, af, be, bf, ce, cf;  //exponent and fraction
   a = FtoL(a_fp);
   b = FtoL(b_fp);
   as = a >> 31;                        //sign
   ae = (a >> 23) & 0xff;               //exponent
   af = 0x00800000 | (a & 0x007fffff);  //fraction
   bs = b >> 31;
   be = (b >> 23) & 0xff;
   bf = 0x00800000 | (b & 0x007fffff);
   if(ae > be)
   {
      if(ae - be < 30)
         bf >>= ae - be;
      else
         bf = 0;
      ce = ae;
   }
   else
   {
      if(be - ae < 30)
         af >>= be - ae;
      else
         af = 0;
      ce = be;
   }
   cf = (as ? -af : af) + (bs ? -bf : bf);
   cs = cf < 0;
   cf = cf>=0 ? cf : -cf;
   if(cf == 0)
      return LtoF(cf);
   while(cf & 0xff000000)
   {
      ++ce;
      cf >>= 1;
   }
   while((cf & 0xff800000) == 0)
   {
      --ce;
      cf <<= 1;
   }
   c = (cs << 31) | (ce << 23) | (cf & 0x007fffff);
   if(ce < 1)
      c = 0;
   return LtoF(c);
}
 
 
float FP_Sub(float a_fp, float b_fp)
{
   return FP_Add(a_fp, FP_Neg(b_fp));
}
 
 
float FP_Mult(float a_fp, float b_fp)
{
   unsigned long a, b, c;
   unsigned long as, af, bs, bf, cs, cf;
   long ae, be, ce;
#ifdef WIN32
   unsigned long a2, a1, b2, b1, med1, med2;
#endif
   unsigned long hi, lo;
   a = FtoL(a_fp);
   b = FtoL(b_fp);
   as = a >> 31;
   ae = (a >> 23) & 0xff;
   af = 0x00800000 | (a & 0x007fffff);
   bs = b >> 31;
   be = (b >> 23) & 0xff;
   bf = 0x00800000 | (b & 0x007fffff);
   cs = as ^ bs;
#ifdef WIN32
   a1 = af & 0xffff;
   a2 = af >> 16;
   b1 = bf & 0xffff;
   b2 = bf >> 16;
   lo = a1 * b1;
   med1 = a2 * b1 + (lo >> 16);
   med2 = a1 * b2;
   hi = a2 * b2 + (med1 >> 16) + (med2 >> 16);
   med1 = (med1 & 0xffff) + (med2 & 0xffff);
   hi += (med1 >> 16);
   lo = (med1 << 16) | (lo & 0xffff);
#else
   lo = OS_AsmMult(af, bf, &hi);
#endif
   cf = (hi << 9) | (lo >> 23);
   ce = ae + be - 0x80 + 1;
   if(cf == 0)
      return LtoF(cf);
   while(cf & 0xff000000)
   {
      ++ce;
      cf >>= 1;
   }
   c = (cs << 31) | (ce << 23) | (cf & 0x007fffff);
   if(ce < 1)
      c = 0;
   return LtoF(c);
}
 
 
float FP_Div(float a_fp, float b_fp)
{
   unsigned long a, b, c;
   unsigned long as, af, bs, bf, cs, cf;
   unsigned long a1, b1;
#if WIN32
   unsigned long a2, b2, med1, med2;
#endif
   unsigned long hi, lo;
   long ae, be, ce, d;
   a = FtoL(a_fp);
   b = FtoL(b_fp);
   as = a >> 31;
   ae = (a >> 23) & 0xff;
   af = 0x00800000 | (a & 0x007fffff);
   bs = b >> 31;
   be = (b >> 23) & 0xff;
   bf = 0x00800000 | (b & 0x007fffff);
   cs = as ^ bs;
   ce = ae - (be - 0x80) + 6 - 8;
   a1 = af << 4; //8
   b1 = bf >> 8;
   cf = a1 / b1;
   cf <<= 12; //8
#if 1                  /*non-quick*/
#if WIN32
   a1 = cf & 0xffff;
   a2 = cf >> 16;
   b1 = bf & 0xffff;
   b2 = bf >> 16;
   lo = a1 * b1;
   med1 =a2 * b1 + (lo >> 16);
   med2 = a1 * b2;
   hi = a2 * b2 + (med1 >> 16) + (med2 >> 16);
   med1 = (med1 & 0xffff) + (med2 & 0xffff);
   hi += (med1 >> 16);
   lo = (med1 << 16) | (lo & 0xffff);
#else
   lo = OS_AsmMult(cf, bf, &hi);
#endif
   lo = (hi << 8) | (lo >> 24);
   d = af - lo;    //remainder
   assert(-0xffff < d && d < 0xffff);
   d <<= 16;
   b1 = bf >> 8;
   d = d / (long)b1;
   cf += d;
#endif
   if(cf == 0)
      return LtoF(cf);
   while(cf & 0xff000000)
   {
      ++ce;
      cf >>= 1;
   }
   if(ce < 0)
      ce = 0;
   c = (cs << 31) | (ce << 23) | (cf & 0x007fffff);
   if(ce < 1)
      c = 0;
   return LtoF(c);
}
 
 
long FP_ToLong(float a_fp)
{
   unsigned long a;
   unsigned long as;
   long ae;
   long af, shift;
   a = FtoL(a_fp);
   as = a >> 31;
   ae = (a >> 23) & 0xff;
   af = 0x00800000 | (a & 0x007fffff);
   af <<= 7;
   shift = -(ae - 0x80 - 29);
   if(shift > 0)
   {
      if(shift < 31)
         af >>= shift;
      else
         af = 0;
   }
   af = as ? -af: af;
   return af;
}
 
 
float FP_ToFloat(long af)
{
   unsigned long a;
   unsigned long as, ae;
   as = af>=0 ? 0: 1;
   af = af>=0 ? af: -af;
   ae = 0x80 + 22;
   if(af == 0)
      return LtoF(af);
   while(af & 0xff000000)
   {
      ++ae;
      af >>= 1;
   }
   while((af & 0xff800000) == 0)
   {
      --ae;
      af <<= 1;
   }
   a = (as << 31) | (ae << 23) | (af & 0x007fffff);
   return LtoF(a);
}
 
 
//0 iff a==b; 1 iff a>b; -1 iff a<b
int FP_Cmp(float a_fp, float b_fp)
{
   unsigned long a, b;
   unsigned long as, ae, af, bs, be, bf;
   int gt;
   a = FtoL(a_fp);
   b = FtoL(b_fp);
   if(a == b)
      return 0;
   as = a >> 31;
   bs = b >> 31;
   if(as > bs)
      return -1;
   if(as < bs)
      return 1;
   gt = as ? -1 : 1;
   ae = (a >> 23) & 0xff;
   be = (b >> 23) & 0xff;
   if(ae > be)
      return gt;
   if(ae < be)
      return -gt;
   af = 0x00800000 | (a & 0x007fffff);
   bf = 0x00800000 | (b & 0x007fffff);
   if(af > bf)
      return gt;
   return -gt;
}
 
 
int __ltsf2(float a, float b)
{
   return FP_Cmp(a, b);
}
 
int __lesf2(float a, float b)
{
   return FP_Cmp(a, b);
}
 
int __gtsf2(float a, float b)
{
   return FP_Cmp(a, b);
}
 
int __gesf2(float a, float b)
{
   return FP_Cmp(a, b);
}
 
int __eqsf2(float a, float b)
{
   return FtoL(a) != FtoL(b);
}
 
int __nesf2(float a, float b)
{
   return FtoL(a) != FtoL(b);
}
 
 
float FP_Sqrt(float a)
{
   float x1, y1, x2, y2, x3;
   long i;
   x1 = FP_ToFloat(1);
   y1 = FP_Sub(FP_Mult(x1, x1), a);  //y1=x1*x1-a;
   x2 = FP_ToFloat(100);
   y2 = FP_Sub(FP_Mult(x2, x2), a);
   for(i = 0; i < 10; ++i)
   {
      if(FtoL(y1) == FtoL(y2))
         return x2;
      //x3=x2-(x1-x2)*y2/(y1-y2);
      x3 = FP_Sub(x2, FP_Div(FP_Mult(FP_Sub(x1, x2), y2), FP_Sub(y1, y2)));
      x1 = x2;
      y1 = y2;
      x2 = x3;
      y2 = FP_Sub(FP_Mult(x2, x2), a);
   }
   return x2;
}
 
 
float FP_Cos(float rad)
{
   int n;
   float answer, x2, top, bottom, sign;
   while(FP_Cmp(rad, PI2) > 0)
      rad = FP_Sub(rad, PI2);
   while(FP_Cmp(rad, (float)0.0) < 0)
      rad = FP_Add(rad, PI2);
   answer = 1.0;
   sign = 1.0;
   if(FP_Cmp(rad, PI) >= 0)
   {
      rad = FP_Sub(rad, PI);
      sign = FP_ToFloat(-1);
   }
   if(FP_Cmp(rad, PI_2) >= 0)
   {
      rad = FP_Sub(PI, rad);
      sign = FP_Neg(sign);
   }
   x2 = FP_Mult(rad, rad);
   top = 1.0;
   bottom = 1.0;
   for(n = 2; n < 12; n += 2)
   {
      top = FP_Mult(top, FP_Neg(x2));
      bottom = FP_Mult(bottom, FP_ToFloat((n - 1) * n));
      answer = FP_Add(answer, FP_Div(top, bottom));
   }
   return FP_Mult(answer, sign);
}
 
 
float FP_Sin(float rad)
{
   const float pi_2=(float)(PI/2.0);
   return FP_Cos(FP_Sub(rad, pi_2));
}
 
 
float FP_Atan(float x)
{
   const float b=(float)(PI/8.0);
   const float atan_b=(float)0.37419668; //atan(b);
   int n;
   float answer, x2, top;
   if(FP_Cmp(x, (float)0.0) >= 0)
   {
      if(FP_Cmp(x, (float)1.0) > 0)
         return FP_Sub(PI_2, FP_Atan(FP_Div((float)1.0, x)));
   }
   else
   {
      if(FP_Cmp(x, (float)-1.0) > 0)
         return FP_Sub(-PI_2, FP_Atan(FP_Div((float)1.0, x)));
   }
   if(FP_Cmp(x, (float)0.45) > 0)
   {
      //answer = (x - atan_b) / (1 + x * atan_b);
      answer = FP_Div(FP_Sub(x, atan_b), FP_Add(1.0, FP_Mult(x, atan_b)));
      //answer = b + FP_Atan(answer) - (float)0.034633; /*FIXME fudge?*/
      answer = FP_Sub(FP_Add(b, FP_Atan(answer)), (float)0.034633);
      return answer;
   }
   if(FP_Cmp(x, (float)-0.45) < 0)
   {
      x = FP_Neg(x);
      //answer = (x - atan_b) / (1 + x * atan_b);
      answer = FP_Div(FP_Sub(x, atan_b), FP_Add(1.0, FP_Mult(x, atan_b)));
      //answer = b + FP_Atan(answer) - (float)0.034633; /*FIXME*/
      answer = FP_Sub(FP_Add(b, FP_Atan(answer)), (float)0.034633);
      return FP_Neg(answer);
   }
   answer = x;
   x2 = FP_Mult(FP_Neg(x), x);
   top = x;
   for(n = 3; n < 14; n += 2)
   {
      top = FP_Mult(top, x2);
      answer = FP_Add(answer, FP_Div(top, FP_ToFloat(n)));
   }
   return answer;
}
 
 
float FP_Atan2(float y, float x)
{
   float answer,r;
   r = y / x;
   answer = FP_Atan(r);
   if(FP_Cmp(x, (float)0.0) < 0)
   {
      if(FP_Cmp(y, (float)0.0) > 0)
         answer = FP_Add(answer, PI);
      else
         answer = FP_Sub(answer, PI);
   }
   return answer;
}
 
 
float FP_Exp(float x)
{
   const float e2=(float)7.389056099;
   const float inv_e2=(float)0.135335283;
   float answer, top, bottom, mult;
   int n;
 
   mult = 1.0;
   while(FP_Cmp(x, (float)2.0) > 0)
   {
      mult = FP_Mult(mult, e2);
      x = FP_Add(x, (float)-2.0);
   }
   while(FP_Cmp(x, (float)-2.0) < 0)
   {
      mult = FP_Mult(mult, inv_e2);
      x = FP_Add(x, (float)2.0);
   }
   answer = FP_Add((float)1.0, x);
   top = x;
   bottom = 1.0;
   for(n = 2; n < 15; ++n)
   {
      top = FP_Mult(top, x);
      bottom = FP_Mult(bottom, FP_ToFloat(n));
      answer = FP_Add(answer, FP_Div(top, bottom));
   }
   return FP_Mult(answer, mult);
}
 
 
float FP_Log(float x)
{
   const float log_2=(float)0.69314718; /*log(2.0)*/
   int n;
   float answer, top, add;
   add = 0.0;
   while(FP_Cmp(x, 16.0) > 0)
   {
      x = FP_Mult(x, (float)0.0625);
      add = FP_Add(add, (float)(log_2 * 4));
   }
   while(FP_Cmp(x, 1.5) > 0)
   {
      x = FP_Mult(x, 0.5);
      add = FP_Add(add, log_2);
   }
   while(FP_Cmp(x, 0.5) < 0)
   {
      x = FP_Mult(x, 2.0);
      add = FP_Sub(add, log_2);
   }
   x = FP_Sub(x, 1.0);
   answer = 0.0;
   top = -1.0;
   for(n = 1; n < 14; ++n)
   {
      top = FP_Mult(top, FP_Neg(x));
      answer = FP_Add(answer, FP_Div(top, FP_ToFloat(n)));
   }
   return FP_Add(answer, add);
}
 
 
float FP_Pow(float x, float y)
{
   return FP_Exp(y * FP_Log(x));
}
 
 
/*************** Test *****************/
#ifdef WIN32
#include <math.h>
#undef printf
int printf(const char *, ...);
struct {
   char *name;
   float low, high;
   double (*func1)(double);
   float (*func2)(float);
} test_info[]={
   {"cos", -2*PI, 2*PI, cos, FP_Cos},
   {"sin", -2*PI, 2*PI, sin, FP_Sin},
   {"atan", -3.0, 2.0, atan, FP_Atan},
   {"log", (float)0.01, (float)4.0, log, FP_Log},
   {"exp", (float)-5.01, (float)30.0, exp, FP_Exp},
   {"sqrt", (float)0.01, (float)1000.0, sqrt, FP_Sqrt}
};
 
 
void TestMathFull(void)
{
   float a, b, c, d;
   float error1, error2, error3, error4, error5;
   int test;
 
   a = PI * PI;
   b = PI;
   c = FP_Div(a, b);
   printf("%10f %10f %10f %10f %10f\n",
      (double)a, (double)b, (double)(a/b), (double)c, (double)(a/b-c));
   a = a * 200;
   for(b = -(float)2.718281828*100; b < 300; b += (float)23.678)
   {
      c = FP_Div(a, b);
      d = a / b - c;
      printf("%10f %10f %10f %10f %10f\n",
         (double)a, (double)b, (double)(a/b), (double)c, (double)(a/b-c));
   }
   getch();
 
   for(test = 0; test < 6; ++test)
   {
      printf("\nTesting %s\n", test_info[test].name);
      for(a = test_info[test].low;
          a <= test_info[test].high;
          a += (test_info[test].high-test_info[test].low)/(float)20.0)
      {
         b = (float)test_info[test].func1(a);
         c = test_info[test].func2(a);
         d = b - c;
         printf("%s %10f %10f %10f %10f\n", test_info[test].name, a, b, c, d);
      }
      getch();
   }
 
   a = FP_ToFloat((long)6.0);
   b = FP_ToFloat((long)2.0);
   printf("%f %f\n", (double)a, (double)b);
   c = FP_Add(a, b);
   printf("add %f %f\n", (double)(a + b), (double)c);
   c = FP_Sub(a, b);
   printf("sub %f %f\n", (double)(a - b), (double)c);
   c = FP_Mult(a, b);
   printf("mult %f %f\n", (double)(a * b), (double)c);
   c = FP_Div(a, b);
   printf("div %f %f\n", (double)(a / b), (double)c);
   getch();
 
   for(a = (float)-13756.54; a < (float)17400.0; a += (float)64.45)
   {
      for(b = (float)-875.36; b < (float)935.8; b += (float)36.7)
      {
         error1 = (float)1.0 - (a + b) / FP_Add(a, b);
         error2 = (float)1.0 - (a * b) / FP_Mult(a, b);
         error3 = (float)1.0 - (a / b) / FP_Div(a, b);
         error4 = (float)1.0 - a / FP_ToFloat(FP_ToLong(a));
         error5 = error1 + error2 + error3 + error4;
         if(error5 < 0.00005)
            continue;
         printf("ERROR!\n");
         printf("a=%f b=%f\n", (double)a, (double)b);
         printf("  a+b=%f %f\n", (double)(a+b), (double)FP_Add(a, b));
         printf("  a*b=%f %f\n", (double)(a*b), (double)FP_Mult(a, b));
         printf("  a/b=%f %f\n", (double)(a/b), (double)FP_Div(a, b));
         printf("  a=%f %ld %f\n", (double)a, FP_ToLong(a),
                                   (double)FP_ToFloat((long)a));
         printf("  %f %f %f %f\n", (double)error1, (double)error2,
            (double)error3, (double)error4);
         if(error5 > 0.001)
            getch();
      }
   }
   printf("done.\n");
   getch();
}
#endif
 
 

Browse

Tools

Subversion Repositories plasma

[/] [plasma/] [tags/] [V3_0/] [kernel/] [math.c] - Blame information for rev 393

Line No.	Rev	Author	Line
1	138	rhoads	`/*--------------------------------------------------------------------`
2			`* TITLE: Plasma Floating Point Library`
3			`* AUTHOR: Steve Rhoads (rhoadss@yahoo.com)`
4			`* DATE CREATED: 3/2/06`
5			`* FILENAME: math.c`
6			`* PROJECT: Plasma CPU core`
7			`* COPYRIGHT: Software placed into the public domain by the author.`
8			`* Software 'as is' without warranty. Author liable for nothing.`
9			`* DESCRIPTION:`
10			`* Plasma Floating Point Library`
11			`*--------------------------------------------------------------------`
12			`* IEEE_fp = sign(1) \| exponent(8) \| fraction(23)`
13			`* cos(x)=1-x^2/2!+x^4/4!-x^6/6!+...`
14			`* exp(x)=1+x+x^2/2!+x^3/3!+...`
15			`* ln(1+x)=x-x^2/2+x^3/3-x^4/4+...`
16			`* atan(x)=x-x^3/3+x^5/5-x^7/7+...`
17			`* pow(x,y)=exp(y*ln(x))`
18			`* x=tan(a+b)=(tan(a)+tan(b))/(1-tan(a)*tan(b))`
19			`* atan(x)=b+atan((x-atan(b))/(1+x*atan(b)))`
20			`* ln(ax)=ln(a)+ln(x); ln(x^n)=nln(x)`
21			`--------------------------------------------------------------------/`
22			`#include "rtos.h"`
23
24			`#define PI ((float)3.1415926)`
25			`#define PI_2 ((float)(PI/2.0))`
26			`#define PI2 ((float)(PI*2.0))`
27
28			`#define FtoL(X) ((unsigned long)&(X))`
29			`#define LtoF(X) ((float)&(X))`
30
31
32			`float FP_Neg(float a_fp)`
33			`{`
34			`unsigned long a;`
35			`a = FtoL(a_fp);`
36			`a ^= 0x80000000;`
37			`return LtoF(a);`
38			`}`
39
40
41			`float FP_Add(float a_fp, float b_fp)`
42			`{`
43			`unsigned long a, b, c;`
44			`unsigned long as, bs, cs; //sign`
45			`long ae, af, be, bf, ce, cf; //exponent and fraction`
46			`a = FtoL(a_fp);`
47			`b = FtoL(b_fp);`
48			`as = a >> 31; //sign`
49			`ae = (a >> 23) & 0xff; //exponent`
50			`af = 0x00800000 \| (a & 0x007fffff); //fraction`
51			`bs = b >> 31;`
52			`be = (b >> 23) & 0xff;`
53			`bf = 0x00800000 \| (b & 0x007fffff);`
54			`if(ae > be)`
55			`{`
56			`if(ae - be < 30)`
57			`bf >>= ae - be;`
58			`else`
59			`bf = 0;`
60			`ce = ae;`
61			`}`
62			`else`
63			`{`
64			`if(be - ae < 30)`
65			`af >>= be - ae;`
66			`else`
67			`af = 0;`
68			`ce = be;`
69			`}`
70			`cf = (as ? -af : af) + (bs ? -bf : bf);`
71			`cs = cf < 0;`
72			`cf = cf>=0 ? cf : -cf;`
73			`if(cf == 0)`
74			`return LtoF(cf);`
75			`while(cf & 0xff000000)`
76			`{`
77			`++ce;`
78			`cf >>= 1;`
79			`}`
80			`while((cf & 0xff800000) == 0)`
81			`{`
82			`--ce;`
83			`cf <<= 1;`
84			`}`
85			`c = (cs << 31) \| (ce << 23) \| (cf & 0x007fffff);`
86			`if(ce < 1)`
87			`c = 0;`
88			`return LtoF(c);`
89			`}`
90
91
92			`float FP_Sub(float a_fp, float b_fp)`
93			`{`
94			`return FP_Add(a_fp, FP_Neg(b_fp));`
95			`}`
96
97
98			`float FP_Mult(float a_fp, float b_fp)`
99			`{`
100			`unsigned long a, b, c;`
101			`unsigned long as, af, bs, bf, cs, cf;`
102			`long ae, be, ce;`
103			`#ifdef WIN32`
104			`unsigned long a2, a1, b2, b1, med1, med2;`
105			`#endif`
106			`unsigned long hi, lo;`
107			`a = FtoL(a_fp);`
108			`b = FtoL(b_fp);`
109			`as = a >> 31;`
110			`ae = (a >> 23) & 0xff;`
111			`af = 0x00800000 \| (a & 0x007fffff);`
112			`bs = b >> 31;`
113			`be = (b >> 23) & 0xff;`
114			`bf = 0x00800000 \| (b & 0x007fffff);`
115			`cs = as ^ bs;`
116			`#ifdef WIN32`
117			`a1 = af & 0xffff;`
118			`a2 = af >> 16;`
119			`b1 = bf & 0xffff;`
120			`b2 = bf >> 16;`
121			`lo = a1 * b1;`
122			`med1 = a2 * b1 + (lo >> 16);`
123			`med2 = a1 * b2;`
124			`hi = a2 * b2 + (med1 >> 16) + (med2 >> 16);`
125			`med1 = (med1 & 0xffff) + (med2 & 0xffff);`
126			`hi += (med1 >> 16);`
127			`lo = (med1 << 16) \| (lo & 0xffff);`
128			`#else`
129			`lo = OS_AsmMult(af, bf, &hi);`
130			`#endif`
131			`cf = (hi << 9) \| (lo >> 23);`
132			`ce = ae + be - 0x80 + 1;`
133			`if(cf == 0)`
134			`return LtoF(cf);`
135			`while(cf & 0xff000000)`
136			`{`
137			`++ce;`
138			`cf >>= 1;`
139			`}`
140			`c = (cs << 31) \| (ce << 23) \| (cf & 0x007fffff);`
141			`if(ce < 1)`
142			`c = 0;`
143			`return LtoF(c);`
144			`}`
145
146
147			`float FP_Div(float a_fp, float b_fp)`
148			`{`
149			`unsigned long a, b, c;`
150			`unsigned long as, af, bs, bf, cs, cf;`
151			`unsigned long a1, b1;`
152			`#if WIN32`
153			`unsigned long a2, b2, med1, med2;`
154			`#endif`
155			`unsigned long hi, lo;`
156			`long ae, be, ce, d;`
157			`a = FtoL(a_fp);`
158			`b = FtoL(b_fp);`
159			`as = a >> 31;`
160			`ae = (a >> 23) & 0xff;`
161			`af = 0x00800000 \| (a & 0x007fffff);`
162			`bs = b >> 31;`
163			`be = (b >> 23) & 0xff;`
164			`bf = 0x00800000 \| (b & 0x007fffff);`
165			`cs = as ^ bs;`
166			`ce = ae - (be - 0x80) + 6 - 8;`
167			`a1 = af << 4; //8`
168			`b1 = bf >> 8;`
169			`cf = a1 / b1;`
170			`cf <<= 12; //8`
171			`#if 1 /non-quick/`
172			`#if WIN32`
173			`a1 = cf & 0xffff;`
174			`a2 = cf >> 16;`
175			`b1 = bf & 0xffff;`
176			`b2 = bf >> 16;`
177			`lo = a1 * b1;`
178			`med1 =a2 * b1 + (lo >> 16);`
179			`med2 = a1 * b2;`
180			`hi = a2 * b2 + (med1 >> 16) + (med2 >> 16);`
181			`med1 = (med1 & 0xffff) + (med2 & 0xffff);`
182			`hi += (med1 >> 16);`
183			`lo = (med1 << 16) \| (lo & 0xffff);`
184			`#else`
185			`lo = OS_AsmMult(cf, bf, &hi);`
186			`#endif`
187			`lo = (hi << 8) \| (lo >> 24);`
188			`d = af - lo; //remainder`
189			`assert(-0xffff < d && d < 0xffff);`
190			`d <<= 16;`
191			`b1 = bf >> 8;`
192			`d = d / (long)b1;`
193			`cf += d;`
194			`#endif`
195			`if(cf == 0)`
196			`return LtoF(cf);`
197			`while(cf & 0xff000000)`
198			`{`
199			`++ce;`
200			`cf >>= 1;`
201			`}`
202			`if(ce < 0)`
203			`ce = 0;`
204			`c = (cs << 31) \| (ce << 23) \| (cf & 0x007fffff);`
205			`if(ce < 1)`
206			`c = 0;`
207			`return LtoF(c);`
208			`}`
209
210
211			`long FP_ToLong(float a_fp)`
212			`{`
213			`unsigned long a;`
214			`unsigned long as;`
215			`long ae;`
216			`long af, shift;`
217			`a = FtoL(a_fp);`
218			`as = a >> 31;`
219			`ae = (a >> 23) & 0xff;`
220			`af = 0x00800000 \| (a & 0x007fffff);`
221			`af <<= 7;`
222			`shift = -(ae - 0x80 - 29);`
223			`if(shift > 0)`
224			`{`
225			`if(shift < 31)`
226			`af >>= shift;`
227			`else`
228			`af = 0;`
229			`}`
230			`af = as ? -af: af;`
231			`return af;`
232			`}`
233
234
235			`float FP_ToFloat(long af)`
236			`{`
237			`unsigned long a;`
238			`unsigned long as, ae;`
239			`as = af>=0 ? 0: 1;`
240			`af = af>=0 ? af: -af;`
241			`ae = 0x80 + 22;`
242			`if(af == 0)`
243			`return LtoF(af);`
244			`while(af & 0xff000000)`
245			`{`
246			`++ae;`
247			`af >>= 1;`
248			`}`
249			`while((af & 0xff800000) == 0)`
250			`{`
251			`--ae;`
252			`af <<= 1;`
253			`}`
254			`a = (as << 31) \| (ae << 23) \| (af & 0x007fffff);`
255			`return LtoF(a);`
256			`}`
257
258
259			`//0 iff a==b; 1 iff a>b; -1 iff a<b`
260			`int FP_Cmp(float a_fp, float b_fp)`
261			`{`
262			`unsigned long a, b;`
263			`unsigned long as, ae, af, bs, be, bf;`
264			`int gt;`
265			`a = FtoL(a_fp);`
266			`b = FtoL(b_fp);`
267			`if(a == b)`
268			`return 0;`
269			`as = a >> 31;`
270			`bs = b >> 31;`
271			`if(as > bs)`
272			`return -1;`
273			`if(as < bs)`
274			`return 1;`
275			`gt = as ? -1 : 1;`
276			`ae = (a >> 23) & 0xff;`
277			`be = (b >> 23) & 0xff;`
278			`if(ae > be)`
279			`return gt;`
280			`if(ae < be)`
281			`return -gt;`
282			`af = 0x00800000 \| (a & 0x007fffff);`
283			`bf = 0x00800000 \| (b & 0x007fffff);`
284			`if(af > bf)`
285			`return gt;`
286			`return -gt;`
287			`}`
288
289
290			`int __ltsf2(float a, float b)`
291			`{`
292			`return FP_Cmp(a, b);`
293			`}`
294
295			`int __lesf2(float a, float b)`
296			`{`
297			`return FP_Cmp(a, b);`
298			`}`
299
300			`int __gtsf2(float a, float b)`
301			`{`
302			`return FP_Cmp(a, b);`
303			`}`
304
305			`int __gesf2(float a, float b)`
306			`{`
307			`return FP_Cmp(a, b);`
308			`}`
309
310			`int __eqsf2(float a, float b)`
311			`{`
312			`return FtoL(a) != FtoL(b);`
313			`}`
314
315			`int __nesf2(float a, float b)`
316			`{`
317			`return FtoL(a) != FtoL(b);`
318			`}`
319
320
321			`float FP_Sqrt(float a)`
322			`{`
323			`float x1, y1, x2, y2, x3;`
324			`long i;`
325			`x1 = FP_ToFloat(1);`
326			`y1 = FP_Sub(FP_Mult(x1, x1), a); //y1=x1*x1-a;`
327			`x2 = FP_ToFloat(100);`
328			`y2 = FP_Sub(FP_Mult(x2, x2), a);`
329			`for(i = 0; i < 10; ++i)`
330			`{`
331			`if(FtoL(y1) == FtoL(y2))`
332			`return x2;`
333			`//x3=x2-(x1-x2)*y2/(y1-y2);`
334			`x3 = FP_Sub(x2, FP_Div(FP_Mult(FP_Sub(x1, x2), y2), FP_Sub(y1, y2)));`
335			`x1 = x2;`
336			`y1 = y2;`
337			`x2 = x3;`
338			`y2 = FP_Sub(FP_Mult(x2, x2), a);`
339			`}`
340			`return x2;`
341			`}`
342
343
344			`float FP_Cos(float rad)`
345			`{`
346			`int n;`
347			`float answer, x2, top, bottom, sign;`
348			`while(FP_Cmp(rad, PI2) > 0)`
349			`rad = FP_Sub(rad, PI2);`
350			`while(FP_Cmp(rad, (float)0.0) < 0)`
351			`rad = FP_Add(rad, PI2);`
352			`answer = 1.0;`
353			`sign = 1.0;`
354			`if(FP_Cmp(rad, PI) >= 0)`
355			`{`
356			`rad = FP_Sub(rad, PI);`
357			`sign = FP_ToFloat(-1);`
358			`}`
359			`if(FP_Cmp(rad, PI_2) >= 0)`
360			`{`
361			`rad = FP_Sub(PI, rad);`
362			`sign = FP_Neg(sign);`
363			`}`
364			`x2 = FP_Mult(rad, rad);`
365			`top = 1.0;`
366			`bottom = 1.0;`
367			`for(n = 2; n < 12; n += 2)`
368			`{`
369			`top = FP_Mult(top, FP_Neg(x2));`
370			`bottom = FP_Mult(bottom, FP_ToFloat((n - 1) * n));`
371			`answer = FP_Add(answer, FP_Div(top, bottom));`
372			`}`
373			`return FP_Mult(answer, sign);`
374			`}`
375
376
377			`float FP_Sin(float rad)`
378			`{`
379			`const float pi_2=(float)(PI/2.0);`
380			`return FP_Cos(FP_Sub(rad, pi_2));`
381			`}`
382
383
384			`float FP_Atan(float x)`
385			`{`
386			`const float b=(float)(PI/8.0);`
387			`const float atan_b=(float)0.37419668; //atan(b);`
388			`int n;`
389			`float answer, x2, top;`
390			`if(FP_Cmp(x, (float)0.0) >= 0)`
391			`{`
392			`if(FP_Cmp(x, (float)1.0) > 0)`
393			`return FP_Sub(PI_2, FP_Atan(FP_Div((float)1.0, x)));`
394			`}`
395			`else`
396			`{`
397			`if(FP_Cmp(x, (float)-1.0) > 0)`
398			`return FP_Sub(-PI_2, FP_Atan(FP_Div((float)1.0, x)));`
399			`}`
400			`if(FP_Cmp(x, (float)0.45) > 0)`
401			`{`
402			`//answer = (x - atan_b) / (1 + x * atan_b);`
403			`answer = FP_Div(FP_Sub(x, atan_b), FP_Add(1.0, FP_Mult(x, atan_b)));`
404			`//answer = b + FP_Atan(answer) - (float)0.034633; /FIXME fudge?/`
405			`answer = FP_Sub(FP_Add(b, FP_Atan(answer)), (float)0.034633);`
406			`return answer;`
407			`}`
408			`if(FP_Cmp(x, (float)-0.45) < 0)`
409			`{`
410			`x = FP_Neg(x);`
411			`//answer = (x - atan_b) / (1 + x * atan_b);`
412			`answer = FP_Div(FP_Sub(x, atan_b), FP_Add(1.0, FP_Mult(x, atan_b)));`
413			`//answer = b + FP_Atan(answer) - (float)0.034633; /FIXME/`
414			`answer = FP_Sub(FP_Add(b, FP_Atan(answer)), (float)0.034633);`
415			`return FP_Neg(answer);`
416			`}`
417			`answer = x;`
418			`x2 = FP_Mult(FP_Neg(x), x);`
419			`top = x;`
420			`for(n = 3; n < 14; n += 2)`
421			`{`
422			`top = FP_Mult(top, x2);`
423			`answer = FP_Add(answer, FP_Div(top, FP_ToFloat(n)));`
424			`}`
425			`return answer;`
426			`}`
427
428
429			`float FP_Atan2(float y, float x)`
430			`{`
431			`float answer,r;`
432			`r = y / x;`
433			`answer = FP_Atan(r);`
434			`if(FP_Cmp(x, (float)0.0) < 0)`
435			`{`
436			`if(FP_Cmp(y, (float)0.0) > 0)`
437			`answer = FP_Add(answer, PI);`
438			`else`
439			`answer = FP_Sub(answer, PI);`
440			`}`
441			`return answer;`
442			`}`
443
444
445			`float FP_Exp(float x)`
446			`{`
447			`const float e2=(float)7.389056099;`
448			`const float inv_e2=(float)0.135335283;`
449			`float answer, top, bottom, mult;`
450			`int n;`
451
452			`mult = 1.0;`
453			`while(FP_Cmp(x, (float)2.0) > 0)`
454			`{`
455			`mult = FP_Mult(mult, e2);`
456			`x = FP_Add(x, (float)-2.0);`
457			`}`
458			`while(FP_Cmp(x, (float)-2.0) < 0)`
459			`{`
460			`mult = FP_Mult(mult, inv_e2);`
461			`x = FP_Add(x, (float)2.0);`
462			`}`
463			`answer = FP_Add((float)1.0, x);`
464			`top = x;`
465			`bottom = 1.0;`
466			`for(n = 2; n < 15; ++n)`
467			`{`
468			`top = FP_Mult(top, x);`
469			`bottom = FP_Mult(bottom, FP_ToFloat(n));`
470			`answer = FP_Add(answer, FP_Div(top, bottom));`
471			`}`
472			`return FP_Mult(answer, mult);`
473			`}`
474
475
476			`float FP_Log(float x)`
477			`{`
478			`const float log_2=(float)0.69314718; /log(2.0)/`
479			`int n;`
480			`float answer, top, add;`
481			`add = 0.0;`
482			`while(FP_Cmp(x, 16.0) > 0)`
483			`{`
484			`x = FP_Mult(x, (float)0.0625);`
485			`add = FP_Add(add, (float)(log_2 * 4));`
486			`}`
487			`while(FP_Cmp(x, 1.5) > 0)`
488			`{`
489			`x = FP_Mult(x, 0.5);`
490			`add = FP_Add(add, log_2);`
491			`}`
492			`while(FP_Cmp(x, 0.5) < 0)`
493			`{`
494			`x = FP_Mult(x, 2.0);`
495			`add = FP_Sub(add, log_2);`
496			`}`
497			`x = FP_Sub(x, 1.0);`
498			`answer = 0.0;`
499			`top = -1.0;`
500			`for(n = 1; n < 14; ++n)`
501			`{`
502			`top = FP_Mult(top, FP_Neg(x));`
503			`answer = FP_Add(answer, FP_Div(top, FP_ToFloat(n)));`
504			`}`
505			`return FP_Add(answer, add);`
506			`}`
507
508
509			`float FP_Pow(float x, float y)`
510			`{`
511			`return FP_Exp(y * FP_Log(x));`
512			`}`
513
514
515			`/************* Test ***************/`
516			`#ifdef WIN32`
517			`#include <math.h>`
518			`#undef printf`
519			`int printf(const char *, ...);`
520			`struct {`
521			`char *name;`
522			`float low, high;`
523			`double (*func1)(double);`
524			`float (*func2)(float);`
525			`} test_info[]={`
526			`{"cos", -2PI, 2PI, cos, FP_Cos},`
527			`{"sin", -2PI, 2PI, sin, FP_Sin},`
528			`{"atan", -3.0, 2.0, atan, FP_Atan},`
529			`{"log", (float)0.01, (float)4.0, log, FP_Log},`
530			`{"exp", (float)-5.01, (float)30.0, exp, FP_Exp},`
531			`{"sqrt", (float)0.01, (float)1000.0, sqrt, FP_Sqrt}`
532			`};`
533
534
535			`void TestMathFull(void)`
536			`{`
537			`float a, b, c, d;`
538			`float error1, error2, error3, error4, error5;`
539			`int test;`
540
541			`a = PI * PI;`
542			`b = PI;`
543			`c = FP_Div(a, b);`
544			`printf("%10f %10f %10f %10f %10f\n",`
545			`(double)a, (double)b, (double)(a/b), (double)c, (double)(a/b-c));`
546			`a = a * 200;`
547			`for(b = -(float)2.718281828*100; b < 300; b += (float)23.678)`
548			`{`
549			`c = FP_Div(a, b);`
550			`d = a / b - c;`
551			`printf("%10f %10f %10f %10f %10f\n",`
552			`(double)a, (double)b, (double)(a/b), (double)c, (double)(a/b-c));`
553			`}`
554			`getch();`
555
556			`for(test = 0; test < 6; ++test)`
557			`{`
558			`printf("\nTesting %s\n", test_info[test].name);`
559			`for(a = test_info[test].low;`
560			`a <= test_info[test].high;`
561			`a += (test_info[test].high-test_info[test].low)/(float)20.0)`
562			`{`
563			`b = (float)test_info[test].func1(a);`
564			`c = test_info[test].func2(a);`
565			`d = b - c;`
566			`printf("%s %10f %10f %10f %10f\n", test_info[test].name, a, b, c, d);`
567			`}`
568			`getch();`
569			`}`
570
571			`a = FP_ToFloat((long)6.0);`
572			`b = FP_ToFloat((long)2.0);`
573			`printf("%f %f\n", (double)a, (double)b);`
574			`c = FP_Add(a, b);`
575			`printf("add %f %f\n", (double)(a + b), (double)c);`
576			`c = FP_Sub(a, b);`
577			`printf("sub %f %f\n", (double)(a - b), (double)c);`
578			`c = FP_Mult(a, b);`
579			`printf("mult %f %f\n", (double)(a * b), (double)c);`
580			`c = FP_Div(a, b);`
581			`printf("div %f %f\n", (double)(a / b), (double)c);`
582			`getch();`
583
584			`for(a = (float)-13756.54; a < (float)17400.0; a += (float)64.45)`
585			`{`
586			`for(b = (float)-875.36; b < (float)935.8; b += (float)36.7)`
587			`{`
588			`error1 = (float)1.0 - (a + b) / FP_Add(a, b);`
589			`error2 = (float)1.0 - (a * b) / FP_Mult(a, b);`
590			`error3 = (float)1.0 - (a / b) / FP_Div(a, b);`
591			`error4 = (float)1.0 - a / FP_ToFloat(FP_ToLong(a));`
592			`error5 = error1 + error2 + error3 + error4;`
593			`if(error5 < 0.00005)`
594			`continue;`
595			`printf("ERROR!\n");`
596			`printf("a=%f b=%f\n", (double)a, (double)b);`
597			`printf(" a+b=%f %f\n", (double)(a+b), (double)FP_Add(a, b));`
598			`printf(" ab=%f %f\n", (double)(ab), (double)FP_Mult(a, b));`
599			`printf(" a/b=%f %f\n", (double)(a/b), (double)FP_Div(a, b));`
600			`printf(" a=%f %ld %f\n", (double)a, FP_ToLong(a),`
601			`(double)FP_ToFloat((long)a));`
602			`printf(" %f %f %f %f\n", (double)error1, (double)error2,`
603			`(double)error3, (double)error4);`
604			`if(error5 > 0.001)`
605			`getch();`
606			`}`
607			`}`
608			`printf("done.\n");`
609			`getch();`
610			`}`
611			`#endif`
612
613