OpenCores
URL https://opencores.org/ocsvn/plasma/plasma/trunk

Subversion Repositories plasma

[/] [plasma/] [tags/] [V3_0/] [kernel/] [math.c] - Blame information for rev 393

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 138 rhoads
/*--------------------------------------------------------------------
2
 * TITLE: Plasma Floating Point Library
3
 * AUTHOR: Steve Rhoads (rhoadss@yahoo.com)
4
 * DATE CREATED: 3/2/06
5
 * FILENAME: math.c
6
 * PROJECT: Plasma CPU core
7
 * COPYRIGHT: Software placed into the public domain by the author.
8
 *    Software 'as is' without warranty.  Author liable for nothing.
9
 * DESCRIPTION:
10
 *    Plasma Floating Point Library
11
 *--------------------------------------------------------------------
12
 * IEEE_fp = sign(1) | exponent(8) | fraction(23)
13
 * cos(x)=1-x^2/2!+x^4/4!-x^6/6!+...
14
 * exp(x)=1+x+x^2/2!+x^3/3!+...
15
 * ln(1+x)=x-x^2/2+x^3/3-x^4/4+...
16
 * atan(x)=x-x^3/3+x^5/5-x^7/7+...
17
 * pow(x,y)=exp(y*ln(x))
18
 * x=tan(a+b)=(tan(a)+tan(b))/(1-tan(a)*tan(b))
19
 * atan(x)=b+atan((x-atan(b))/(1+x*atan(b)))
20
 * ln(a*x)=ln(a)+ln(x); ln(x^n)=n*ln(x)
21
 *--------------------------------------------------------------------*/
22
#include "rtos.h"
23
 
24
#define PI ((float)3.1415926)
25
#define PI_2 ((float)(PI/2.0))
26
#define PI2 ((float)(PI*2.0))
27
 
28
#define FtoL(X) (*(unsigned long*)&(X))
29
#define LtoF(X) (*(float*)&(X))
30
 
31
 
32
float FP_Neg(float a_fp)
33
{
34
   unsigned long a;
35
   a = FtoL(a_fp);
36
   a ^= 0x80000000;
37
   return LtoF(a);
38
}
39
 
40
 
41
float FP_Add(float a_fp, float b_fp)
42
{
43
   unsigned long a, b, c;
44
   unsigned long as, bs, cs;     //sign
45
   long ae, af, be, bf, ce, cf;  //exponent and fraction
46
   a = FtoL(a_fp);
47
   b = FtoL(b_fp);
48
   as = a >> 31;                        //sign
49
   ae = (a >> 23) & 0xff;               //exponent
50
   af = 0x00800000 | (a & 0x007fffff);  //fraction
51
   bs = b >> 31;
52
   be = (b >> 23) & 0xff;
53
   bf = 0x00800000 | (b & 0x007fffff);
54
   if(ae > be)
55
   {
56
      if(ae - be < 30)
57
         bf >>= ae - be;
58
      else
59
         bf = 0;
60
      ce = ae;
61
   }
62
   else
63
   {
64
      if(be - ae < 30)
65
         af >>= be - ae;
66
      else
67
         af = 0;
68
      ce = be;
69
   }
70
   cf = (as ? -af : af) + (bs ? -bf : bf);
71
   cs = cf < 0;
72
   cf = cf>=0 ? cf : -cf;
73
   if(cf == 0)
74
      return LtoF(cf);
75
   while(cf & 0xff000000)
76
   {
77
      ++ce;
78
      cf >>= 1;
79
   }
80
   while((cf & 0xff800000) == 0)
81
   {
82
      --ce;
83
      cf <<= 1;
84
   }
85
   c = (cs << 31) | (ce << 23) | (cf & 0x007fffff);
86
   if(ce < 1)
87
      c = 0;
88
   return LtoF(c);
89
}
90
 
91
 
92
float FP_Sub(float a_fp, float b_fp)
93
{
94
   return FP_Add(a_fp, FP_Neg(b_fp));
95
}
96
 
97
 
98
float FP_Mult(float a_fp, float b_fp)
99
{
100
   unsigned long a, b, c;
101
   unsigned long as, af, bs, bf, cs, cf;
102
   long ae, be, ce;
103
#ifdef WIN32
104
   unsigned long a2, a1, b2, b1, med1, med2;
105
#endif
106
   unsigned long hi, lo;
107
   a = FtoL(a_fp);
108
   b = FtoL(b_fp);
109
   as = a >> 31;
110
   ae = (a >> 23) & 0xff;
111
   af = 0x00800000 | (a & 0x007fffff);
112
   bs = b >> 31;
113
   be = (b >> 23) & 0xff;
114
   bf = 0x00800000 | (b & 0x007fffff);
115
   cs = as ^ bs;
116
#ifdef WIN32
117
   a1 = af & 0xffff;
118
   a2 = af >> 16;
119
   b1 = bf & 0xffff;
120
   b2 = bf >> 16;
121
   lo = a1 * b1;
122
   med1 = a2 * b1 + (lo >> 16);
123
   med2 = a1 * b2;
124
   hi = a2 * b2 + (med1 >> 16) + (med2 >> 16);
125
   med1 = (med1 & 0xffff) + (med2 & 0xffff);
126
   hi += (med1 >> 16);
127
   lo = (med1 << 16) | (lo & 0xffff);
128
#else
129
   lo = OS_AsmMult(af, bf, &hi);
130
#endif
131
   cf = (hi << 9) | (lo >> 23);
132
   ce = ae + be - 0x80 + 1;
133
   if(cf == 0)
134
      return LtoF(cf);
135
   while(cf & 0xff000000)
136
   {
137
      ++ce;
138
      cf >>= 1;
139
   }
140
   c = (cs << 31) | (ce << 23) | (cf & 0x007fffff);
141
   if(ce < 1)
142
      c = 0;
143
   return LtoF(c);
144
}
145
 
146
 
147
float FP_Div(float a_fp, float b_fp)
148
{
149
   unsigned long a, b, c;
150
   unsigned long as, af, bs, bf, cs, cf;
151
   unsigned long a1, b1;
152
#if WIN32
153
   unsigned long a2, b2, med1, med2;
154
#endif
155
   unsigned long hi, lo;
156
   long ae, be, ce, d;
157
   a = FtoL(a_fp);
158
   b = FtoL(b_fp);
159
   as = a >> 31;
160
   ae = (a >> 23) & 0xff;
161
   af = 0x00800000 | (a & 0x007fffff);
162
   bs = b >> 31;
163
   be = (b >> 23) & 0xff;
164
   bf = 0x00800000 | (b & 0x007fffff);
165
   cs = as ^ bs;
166
   ce = ae - (be - 0x80) + 6 - 8;
167
   a1 = af << 4; //8
168
   b1 = bf >> 8;
169
   cf = a1 / b1;
170
   cf <<= 12; //8
171
#if 1                  /*non-quick*/
172
#if WIN32
173
   a1 = cf & 0xffff;
174
   a2 = cf >> 16;
175
   b1 = bf & 0xffff;
176
   b2 = bf >> 16;
177
   lo = a1 * b1;
178
   med1 =a2 * b1 + (lo >> 16);
179
   med2 = a1 * b2;
180
   hi = a2 * b2 + (med1 >> 16) + (med2 >> 16);
181
   med1 = (med1 & 0xffff) + (med2 & 0xffff);
182
   hi += (med1 >> 16);
183
   lo = (med1 << 16) | (lo & 0xffff);
184
#else
185
   lo = OS_AsmMult(cf, bf, &hi);
186
#endif
187
   lo = (hi << 8) | (lo >> 24);
188
   d = af - lo;    //remainder
189
   assert(-0xffff < d && d < 0xffff);
190
   d <<= 16;
191
   b1 = bf >> 8;
192
   d = d / (long)b1;
193
   cf += d;
194
#endif
195
   if(cf == 0)
196
      return LtoF(cf);
197
   while(cf & 0xff000000)
198
   {
199
      ++ce;
200
      cf >>= 1;
201
   }
202
   if(ce < 0)
203
      ce = 0;
204
   c = (cs << 31) | (ce << 23) | (cf & 0x007fffff);
205
   if(ce < 1)
206
      c = 0;
207
   return LtoF(c);
208
}
209
 
210
 
211
long FP_ToLong(float a_fp)
212
{
213
   unsigned long a;
214
   unsigned long as;
215
   long ae;
216
   long af, shift;
217
   a = FtoL(a_fp);
218
   as = a >> 31;
219
   ae = (a >> 23) & 0xff;
220
   af = 0x00800000 | (a & 0x007fffff);
221
   af <<= 7;
222
   shift = -(ae - 0x80 - 29);
223
   if(shift > 0)
224
   {
225
      if(shift < 31)
226
         af >>= shift;
227
      else
228
         af = 0;
229
   }
230
   af = as ? -af: af;
231
   return af;
232
}
233
 
234
 
235
float FP_ToFloat(long af)
236
{
237
   unsigned long a;
238
   unsigned long as, ae;
239
   as = af>=0 ? 0: 1;
240
   af = af>=0 ? af: -af;
241
   ae = 0x80 + 22;
242
   if(af == 0)
243
      return LtoF(af);
244
   while(af & 0xff000000)
245
   {
246
      ++ae;
247
      af >>= 1;
248
   }
249
   while((af & 0xff800000) == 0)
250
   {
251
      --ae;
252
      af <<= 1;
253
   }
254
   a = (as << 31) | (ae << 23) | (af & 0x007fffff);
255
   return LtoF(a);
256
}
257
 
258
 
259
//0 iff a==b; 1 iff a>b; -1 iff a<b
260
int FP_Cmp(float a_fp, float b_fp)
261
{
262
   unsigned long a, b;
263
   unsigned long as, ae, af, bs, be, bf;
264
   int gt;
265
   a = FtoL(a_fp);
266
   b = FtoL(b_fp);
267
   if(a == b)
268
      return 0;
269
   as = a >> 31;
270
   bs = b >> 31;
271
   if(as > bs)
272
      return -1;
273
   if(as < bs)
274
      return 1;
275
   gt = as ? -1 : 1;
276
   ae = (a >> 23) & 0xff;
277
   be = (b >> 23) & 0xff;
278
   if(ae > be)
279
      return gt;
280
   if(ae < be)
281
      return -gt;
282
   af = 0x00800000 | (a & 0x007fffff);
283
   bf = 0x00800000 | (b & 0x007fffff);
284
   if(af > bf)
285
      return gt;
286
   return -gt;
287
}
288
 
289
 
290
int __ltsf2(float a, float b)
291
{
292
   return FP_Cmp(a, b);
293
}
294
 
295
int __lesf2(float a, float b)
296
{
297
   return FP_Cmp(a, b);
298
}
299
 
300
int __gtsf2(float a, float b)
301
{
302
   return FP_Cmp(a, b);
303
}
304
 
305
int __gesf2(float a, float b)
306
{
307
   return FP_Cmp(a, b);
308
}
309
 
310
int __eqsf2(float a, float b)
311
{
312
   return FtoL(a) != FtoL(b);
313
}
314
 
315
int __nesf2(float a, float b)
316
{
317
   return FtoL(a) != FtoL(b);
318
}
319
 
320
 
321
float FP_Sqrt(float a)
322
{
323
   float x1, y1, x2, y2, x3;
324
   long i;
325
   x1 = FP_ToFloat(1);
326
   y1 = FP_Sub(FP_Mult(x1, x1), a);  //y1=x1*x1-a;
327
   x2 = FP_ToFloat(100);
328
   y2 = FP_Sub(FP_Mult(x2, x2), a);
329
   for(i = 0; i < 10; ++i)
330
   {
331
      if(FtoL(y1) == FtoL(y2))
332
         return x2;
333
      //x3=x2-(x1-x2)*y2/(y1-y2);
334
      x3 = FP_Sub(x2, FP_Div(FP_Mult(FP_Sub(x1, x2), y2), FP_Sub(y1, y2)));
335
      x1 = x2;
336
      y1 = y2;
337
      x2 = x3;
338
      y2 = FP_Sub(FP_Mult(x2, x2), a);
339
   }
340
   return x2;
341
}
342
 
343
 
344
float FP_Cos(float rad)
345
{
346
   int n;
347
   float answer, x2, top, bottom, sign;
348
   while(FP_Cmp(rad, PI2) > 0)
349
      rad = FP_Sub(rad, PI2);
350
   while(FP_Cmp(rad, (float)0.0) < 0)
351
      rad = FP_Add(rad, PI2);
352
   answer = 1.0;
353
   sign = 1.0;
354
   if(FP_Cmp(rad, PI) >= 0)
355
   {
356
      rad = FP_Sub(rad, PI);
357
      sign = FP_ToFloat(-1);
358
   }
359
   if(FP_Cmp(rad, PI_2) >= 0)
360
   {
361
      rad = FP_Sub(PI, rad);
362
      sign = FP_Neg(sign);
363
   }
364
   x2 = FP_Mult(rad, rad);
365
   top = 1.0;
366
   bottom = 1.0;
367
   for(n = 2; n < 12; n += 2)
368
   {
369
      top = FP_Mult(top, FP_Neg(x2));
370
      bottom = FP_Mult(bottom, FP_ToFloat((n - 1) * n));
371
      answer = FP_Add(answer, FP_Div(top, bottom));
372
   }
373
   return FP_Mult(answer, sign);
374
}
375
 
376
 
377
float FP_Sin(float rad)
378
{
379
   const float pi_2=(float)(PI/2.0);
380
   return FP_Cos(FP_Sub(rad, pi_2));
381
}
382
 
383
 
384
float FP_Atan(float x)
385
{
386
   const float b=(float)(PI/8.0);
387
   const float atan_b=(float)0.37419668; //atan(b);
388
   int n;
389
   float answer, x2, top;
390
   if(FP_Cmp(x, (float)0.0) >= 0)
391
   {
392
      if(FP_Cmp(x, (float)1.0) > 0)
393
         return FP_Sub(PI_2, FP_Atan(FP_Div((float)1.0, x)));
394
   }
395
   else
396
   {
397
      if(FP_Cmp(x, (float)-1.0) > 0)
398
         return FP_Sub(-PI_2, FP_Atan(FP_Div((float)1.0, x)));
399
   }
400
   if(FP_Cmp(x, (float)0.45) > 0)
401
   {
402
      //answer = (x - atan_b) / (1 + x * atan_b);
403
      answer = FP_Div(FP_Sub(x, atan_b), FP_Add(1.0, FP_Mult(x, atan_b)));
404
      //answer = b + FP_Atan(answer) - (float)0.034633; /*FIXME fudge?*/
405
      answer = FP_Sub(FP_Add(b, FP_Atan(answer)), (float)0.034633);
406
      return answer;
407
   }
408
   if(FP_Cmp(x, (float)-0.45) < 0)
409
   {
410
      x = FP_Neg(x);
411
      //answer = (x - atan_b) / (1 + x * atan_b);
412
      answer = FP_Div(FP_Sub(x, atan_b), FP_Add(1.0, FP_Mult(x, atan_b)));
413
      //answer = b + FP_Atan(answer) - (float)0.034633; /*FIXME*/
414
      answer = FP_Sub(FP_Add(b, FP_Atan(answer)), (float)0.034633);
415
      return FP_Neg(answer);
416
   }
417
   answer = x;
418
   x2 = FP_Mult(FP_Neg(x), x);
419
   top = x;
420
   for(n = 3; n < 14; n += 2)
421
   {
422
      top = FP_Mult(top, x2);
423
      answer = FP_Add(answer, FP_Div(top, FP_ToFloat(n)));
424
   }
425
   return answer;
426
}
427
 
428
 
429
float FP_Atan2(float y, float x)
430
{
431
   float answer,r;
432
   r = y / x;
433
   answer = FP_Atan(r);
434
   if(FP_Cmp(x, (float)0.0) < 0)
435
   {
436
      if(FP_Cmp(y, (float)0.0) > 0)
437
         answer = FP_Add(answer, PI);
438
      else
439
         answer = FP_Sub(answer, PI);
440
   }
441
   return answer;
442
}
443
 
444
 
445
float FP_Exp(float x)
446
{
447
   const float e2=(float)7.389056099;
448
   const float inv_e2=(float)0.135335283;
449
   float answer, top, bottom, mult;
450
   int n;
451
 
452
   mult = 1.0;
453
   while(FP_Cmp(x, (float)2.0) > 0)
454
   {
455
      mult = FP_Mult(mult, e2);
456
      x = FP_Add(x, (float)-2.0);
457
   }
458
   while(FP_Cmp(x, (float)-2.0) < 0)
459
   {
460
      mult = FP_Mult(mult, inv_e2);
461
      x = FP_Add(x, (float)2.0);
462
   }
463
   answer = FP_Add((float)1.0, x);
464
   top = x;
465
   bottom = 1.0;
466
   for(n = 2; n < 15; ++n)
467
   {
468
      top = FP_Mult(top, x);
469
      bottom = FP_Mult(bottom, FP_ToFloat(n));
470
      answer = FP_Add(answer, FP_Div(top, bottom));
471
   }
472
   return FP_Mult(answer, mult);
473
}
474
 
475
 
476
float FP_Log(float x)
477
{
478
   const float log_2=(float)0.69314718; /*log(2.0)*/
479
   int n;
480
   float answer, top, add;
481
   add = 0.0;
482
   while(FP_Cmp(x, 16.0) > 0)
483
   {
484
      x = FP_Mult(x, (float)0.0625);
485
      add = FP_Add(add, (float)(log_2 * 4));
486
   }
487
   while(FP_Cmp(x, 1.5) > 0)
488
   {
489
      x = FP_Mult(x, 0.5);
490
      add = FP_Add(add, log_2);
491
   }
492
   while(FP_Cmp(x, 0.5) < 0)
493
   {
494
      x = FP_Mult(x, 2.0);
495
      add = FP_Sub(add, log_2);
496
   }
497
   x = FP_Sub(x, 1.0);
498
   answer = 0.0;
499
   top = -1.0;
500
   for(n = 1; n < 14; ++n)
501
   {
502
      top = FP_Mult(top, FP_Neg(x));
503
      answer = FP_Add(answer, FP_Div(top, FP_ToFloat(n)));
504
   }
505
   return FP_Add(answer, add);
506
}
507
 
508
 
509
float FP_Pow(float x, float y)
510
{
511
   return FP_Exp(y * FP_Log(x));
512
}
513
 
514
 
515
/*************** Test *****************/
516
#ifdef WIN32
517
#include <math.h>
518
#undef printf
519
int printf(const char *, ...);
520
struct {
521
   char *name;
522
   float low, high;
523
   double (*func1)(double);
524
   float (*func2)(float);
525
} test_info[]={
526
   {"cos", -2*PI, 2*PI, cos, FP_Cos},
527
   {"sin", -2*PI, 2*PI, sin, FP_Sin},
528
   {"atan", -3.0, 2.0, atan, FP_Atan},
529
   {"log", (float)0.01, (float)4.0, log, FP_Log},
530
   {"exp", (float)-5.01, (float)30.0, exp, FP_Exp},
531
   {"sqrt", (float)0.01, (float)1000.0, sqrt, FP_Sqrt}
532
};
533
 
534
 
535
void TestMathFull(void)
536
{
537
   float a, b, c, d;
538
   float error1, error2, error3, error4, error5;
539
   int test;
540
 
541
   a = PI * PI;
542
   b = PI;
543
   c = FP_Div(a, b);
544
   printf("%10f %10f %10f %10f %10f\n",
545
      (double)a, (double)b, (double)(a/b), (double)c, (double)(a/b-c));
546
   a = a * 200;
547
   for(b = -(float)2.718281828*100; b < 300; b += (float)23.678)
548
   {
549
      c = FP_Div(a, b);
550
      d = a / b - c;
551
      printf("%10f %10f %10f %10f %10f\n",
552
         (double)a, (double)b, (double)(a/b), (double)c, (double)(a/b-c));
553
   }
554
   getch();
555
 
556
   for(test = 0; test < 6; ++test)
557
   {
558
      printf("\nTesting %s\n", test_info[test].name);
559
      for(a = test_info[test].low;
560
          a <= test_info[test].high;
561
          a += (test_info[test].high-test_info[test].low)/(float)20.0)
562
      {
563
         b = (float)test_info[test].func1(a);
564
         c = test_info[test].func2(a);
565
         d = b - c;
566
         printf("%s %10f %10f %10f %10f\n", test_info[test].name, a, b, c, d);
567
      }
568
      getch();
569
   }
570
 
571
   a = FP_ToFloat((long)6.0);
572
   b = FP_ToFloat((long)2.0);
573
   printf("%f %f\n", (double)a, (double)b);
574
   c = FP_Add(a, b);
575
   printf("add %f %f\n", (double)(a + b), (double)c);
576
   c = FP_Sub(a, b);
577
   printf("sub %f %f\n", (double)(a - b), (double)c);
578
   c = FP_Mult(a, b);
579
   printf("mult %f %f\n", (double)(a * b), (double)c);
580
   c = FP_Div(a, b);
581
   printf("div %f %f\n", (double)(a / b), (double)c);
582
   getch();
583
 
584
   for(a = (float)-13756.54; a < (float)17400.0; a += (float)64.45)
585
   {
586
      for(b = (float)-875.36; b < (float)935.8; b += (float)36.7)
587
      {
588
         error1 = (float)1.0 - (a + b) / FP_Add(a, b);
589
         error2 = (float)1.0 - (a * b) / FP_Mult(a, b);
590
         error3 = (float)1.0 - (a / b) / FP_Div(a, b);
591
         error4 = (float)1.0 - a / FP_ToFloat(FP_ToLong(a));
592
         error5 = error1 + error2 + error3 + error4;
593
         if(error5 < 0.00005)
594
            continue;
595
         printf("ERROR!\n");
596
         printf("a=%f b=%f\n", (double)a, (double)b);
597
         printf("  a+b=%f %f\n", (double)(a+b), (double)FP_Add(a, b));
598
         printf("  a*b=%f %f\n", (double)(a*b), (double)FP_Mult(a, b));
599
         printf("  a/b=%f %f\n", (double)(a/b), (double)FP_Div(a, b));
600
         printf("  a=%f %ld %f\n", (double)a, FP_ToLong(a),
601
                                   (double)FP_ToFloat((long)a));
602
         printf("  %f %f %f %f\n", (double)error1, (double)error2,
603
            (double)error3, (double)error4);
604
         if(error5 > 0.001)
605
            getch();
606
      }
607
   }
608
   printf("done.\n");
609
   getch();
610
}
611
#endif
612
 
613
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.