OpenCores
URL https://opencores.org/ocsvn/eco32/eco32/trunk

Subversion Repositories eco32

[/] [eco32/] [trunk/] [fp/] [implementation/] [arith/] [mmix-arith.w] - Blame information for rev 232

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 15 hellwig
% This file is part of the MMIXware package (c) Donald E Knuth 1999
2
@i boilerplate.w %<< legal stuff: PLEASE READ IT BEFORE MAKING ANY CHANGES!
3
 
4
\def\title{MMIX-ARITH}
5
 
6
\def\MMIX{\.{MMIX}}
7
\def\MMIXAL{\.{MMIXAL}}
8
\def\Hex#1{\hbox{$^{\scriptscriptstyle\#}$\tt#1}} % experimental hex constant
9
\def\dts{\mathinner{\ldotp\ldotp}}
10
\def\<#1>{\hbox{$\langle\,$#1$\,\rangle$}}\let\is=\longrightarrow
11
\def\ff{\\{ff\kern-.05em}}
12
@s ff TeX
13
@s bool normal @q unreserve a C++ keyword @>
14
@s xor normal @q unreserve a C++ keyword @>
15
 
16
@* Introduction. The subroutines below are used to simulate 64-bit \MMIX\
17
arithmetic on an old-fashioned 32-bit computer---like the one the author
18
had when he wrote \MMIXAL\ and the first \MMIX\ simulators in 1998 and 1999.
19
All operations are fabricated from 32-bit arithmetic, including
20
a full implementation of the IEEE floating point standard,
21
assuming only that the \CEE/ compiler has a 32-bit unsigned integer type.
22
 
23
Some day 64-bit machines will be commonplace and the awkward manipulations of
24
the present program will look quite archaic. Interested readers who have such
25
computers will be able to convert the code to a pure 64-bit form without
26
difficulty, thereby obtaining much faster and simpler routines. Meanwhile,
27
however, we can simulate the future and hope for continued progress.
28
 
29
This program module has a simple structure, intended to make it
30
suitable for loading with \MMIX\ simulators and assemblers.
31
 
32
@c
33
#include 
34
#include 
35
#include 
36
@@;
37
typedef enum{@+false,true@+} bool;
38
@@;
39
@@;
40
@@;
41
@
42
 
43
@ Subroutines of this program are declared first with a prototype,
44
as in {\mc ANSI C}, then with an old-style \CEE/ function definition.
45
Here are some preprocessor commands that make this work correctly with both
46
new-style and old-style compilers.
47
@^prototypes for functions@>
48
 
49
@=
50
#ifdef __STDC__
51
#define ARGS(list) list
52
#else
53
#define ARGS(list) ()
54
#endif
55
 
56
@ The definition of type \&{tetra} should be changed, if necessary, so that
57
it represents an unsigned 32-bit integer.
58
@^system dependencies@>
59
 
60
@=
61
typedef unsigned int tetra;
62
 /* for systems conforming to the LP-64 data model */
63
typedef struct { tetra h,l;} octa; /* two tetrabytes make one octabyte */
64
 
65
@ @d sign_bit ((unsigned)0x80000000)
66
 
67
@=
68
octa zero_octa; /* |zero_octa.h=zero_octa.l=0| */
69
octa neg_one={-1,-1}; /* |neg_one.h=neg_one.l=-1| */
70
octa inf_octa={0x7ff00000,0}; /* floating point $+\infty$ */
71
octa standard_NaN={0x7ff80000,0}; /* floating point NaN(.5) */
72
octa aux; /* auxiliary output of a subroutine */
73
bool overflow; /* set by certain subroutines for signed arithmetic */
74
 
75
@ It's easy to add and subtract octabytes, if we aren't terribly
76
worried about speed.
77
 
78
@=
79
octa oplus @,@,@[ARGS((octa,octa))@];@+@t}\6{@>
80
octa oplus(y,z) /* compute $y+z$ */
81
  octa y,z;
82
{@+ octa x;
83
  x.h=y.h+z.h;@+
84
  x.l=y.l+z.l;
85
  if (x.l
86
  return x;
87
}
88
@#
89
octa ominus @,@,@[ARGS((octa,octa))@];@+@t}\6{@>
90
octa ominus(y,z) /* compute $y-z$ */
91
  octa y,z;
92
{@+ octa x;
93
  x.h=y.h-z.h;@+
94
  x.l=y.l-z.l;
95
  if (x.l>y.l) x.h--;
96
  return x;
97
}
98
 
99
@ In the following subroutine, |delta| is a signed quantity that is
100
assumed to fit in a signed tetrabyte.
101
 
102
@=
103
octa incr @,@,@[ARGS((octa,int))@];@+@t}\6{@>
104
octa incr(y,delta) /* compute $y+\delta$ */
105
  octa y;
106
  int delta;
107
{@+ octa x;
108
  x.h=y.h;@+ x.l=y.l+delta;
109
  if (delta>=0) {
110
    if (x.l
111
  }@+else if (x.l>y.l) x.h--;
112
  return x;
113
}
114
 
115
@ Left and right shifts are only a bit more difficult.
116
 
117
@=
118
octa shift_left @,@,@[ARGS((octa,int))@];@+@t}\6{@>
119
octa shift_left(y,s) /* shift left by $s$ bits, where $0\le s\le64$ */
120
  octa y;
121
  int s;
122
{
123
  while (s>=32) y.h=y.l,y.l=0,s-=32;
124
  if (s) {@+register tetra yhl=y.h<>(32-s);
125
    y.h=yhl+ylh;@+ y.l<<=s;
126
  }
127
  return y;
128
}
129
@#
130
octa shift_right @,@,@[ARGS((octa,int,int))@];@+@t}\6{@>
131
octa shift_right(y,s,u) /* shift right, arithmetically if $u=0$ */
132
  octa y;
133
  int s,u;
134
{
135
  while (s>=32) y.l=y.h, y.h=(u?0: -(y.h>>31)), s-=32;
136
  if (s) {@+register tetra yhl=y.h<<(32-s),ylh=y.l>>s;
137
    y.h=(u? 0:(-(y.h>>31))<<(32-s))+(y.h>>s);@+ y.l=yhl+ylh;
138
  }
139
  return y;
140
}
141
 
142
@* Multiplication. We need to multiply two unsigned 64-bit integers, obtaining
143
an unsigned 128-bit product. It is easy to do this on a 32-bit machine
144
by using Algorithm 4.3.1M of {\sl Seminumerical Algorithms}, with $b=2^{16}$.
145
@^multiprecision multiplication@>
146
 
147
The following subroutine returns the lower half of the product, and
148
puts the upper half into a global octabyte called |aux|.
149
 
150
@=
151
octa omult @,@,@[ARGS((octa,octa))@];@+@t}\6{@>
152
octa omult(y,z)
153
  octa y,z;
154
{
155
  register int i,j,k;
156
  tetra u[4],v[4],w[8];
157
  register tetra t;
158
  octa acc;
159
  @;
160
  for (j=0;j<4;j++) w[j]=0;
161
  for (j=0;j<4;j++)
162
    if (!v[j]) w[j+4]=0;
163
    else {
164
      for (i=k=0;i<4;i++) {
165
        t=u[i]*v[j]+w[i+j]+k;
166
        w[i+j]=t&0xffff, k=t>>16;
167
      }
168
      w[j+4]=k;
169
    }
170
  @;
171
  return acc;
172
}
173
 
174
@ @=
175
extern octa aux; /* secondary output of subroutines with multiple outputs */
176
extern bool overflow;
177
 
178
@ @=
179
u[3]=y.h>>16, u[2]=y.h&0xffff, u[1]= y.l>>16, u[0]=y.l&0xffff;
180
v[3]=z.h>>16, v[2]=z.h&0xffff, v[1]= z.l>>16, v[0]=z.l&0xffff;
181
 
182
@ @=
183
aux.h=(w[7]<<16)+w[6], aux.l=(w[5]<<16)+w[4];
184
acc.h=(w[3]<<16)+w[2], acc.l=(w[1]<<16)+w[0];
185
 
186
@ Signed multiplication has the same lower half product as unsigned
187
multiplication. The signed upper half product is obtained with at most two
188
further subtractions, after which the result has overflowed if and only if
189
the upper half is unequal to 64 copies of the sign bit in the lower half.
190
 
191
@=
192
octa signed_omult @,@,@[ARGS((octa,octa))@];@+@t}\6{@>
193
octa signed_omult(y,z)
194
  octa y,z;
195
{
196
  octa acc;
197
  acc=omult(y,z);
198
  if (y.h&sign_bit) aux=ominus(aux,z);
199
  if (z.h&sign_bit) aux=ominus(aux,y);
200
  overflow=(aux.h!=aux.l || (aux.h^(aux.h>>1)^(acc.h&sign_bit)));
201
  return acc;
202
}
203
 
204
@* Division. Long division of an unsigned 128-bit integer by an unsigned
205
64-bit integer is, of course, one of the most challenging routines
206
needed for \MMIX\ arithmetic. The following program, based on
207
Algorithm 4.3.1D of {\sl Seminumerical Algorithms}, computes
208
octabytes $q$ and $r$ such that $(2^{64}x+y)=qz+r$ and $0\le r
209
given octabytes $x$, $y$, and~$z$, assuming that $x
210
(If $x\ge z$, it simply sets $q=x$ and $r=y$.)
211
The quotient~$q$ is returned by the subroutine;
212
the remainder~$r$ is stored in |aux|.
213
@^multiprecision division@>
214
 
215
@=
216
octa odiv @,@,@[ARGS((octa,octa,octa))@];@+@t}\6{@>
217
octa odiv(x,y,z)
218
  octa x,y,z;
219
{
220
  register int i,j,k,n,d;
221
  tetra u[8],v[4],q[4],mask,qhat,rhat,vh,vmh;
222
  register tetra t;
223
  octa acc;
224
  @;
225
  @;
226
  @;
227
  @;
228
  for (j=3;j>=0;j--) @;
229
  @;
230
  @;
231
  return acc;
232
}
233
 
234
@ @=
235
if (x.h>z.h || (x.h==z.h && x.l>=z.l)) {
236
  aux=y;@+ return x;
237
}
238
 
239
@ @=
240
u[7]=x.h>>16, u[6]=x.h&0xffff, u[5]=x.l>>16, u[4]=x.l&0xffff;
241
u[3]=y.h>>16, u[2]=y.h&0xffff, u[1]=y.l>>16, u[0]=y.l&0xffff;
242
v[3]=z.h>>16, v[2]=z.h&0xffff, v[1]=z.l>>16, v[0]=z.l&0xffff;
243
 
244
@ @=
245
for (n=4;v[n-1]==0;n--);
246
 
247
@ We shift |u| and |v| left by |d| places, where |d| is chosen to
248
make $2^{15}\le v_{n-1}<2^{16}$.
249
 
250
@=
251
vh=v[n-1];
252
for (d=0;vh<0x8000;d++,vh<<=1);
253
for (j=k=0; j
254
  t=(u[j]<
255
  u[j]=t&0xffff, k=t>>16;
256
}
257
for (j=k=0; j
258
  t=(v[j]<
259
  v[j]=t&0xffff, k=t>>16;
260
}
261
vh=v[n-1];
262
vmh=(n>1? v[n-2]: 0);
263
 
264
@ @=
265
mask=(1<
266
for (j=3; j>=n; j--) u[j]=0;
267
for (k=0;j>=0;j--) {
268
  t=(k<<16)+u[j];
269
  u[j]=t>>d, k=t&mask;
270
}
271
 
272
@ @=
273
acc.h=(q[3]<<16)+q[2], acc.l=(q[1]<<16)+q[0];
274
aux.h=(u[3]<<16)+u[2], aux.l=(u[1]<<16)+u[0];
275
 
276
@ @=
277
{
278
  @;
279
  @;
280
  @;
281
  q[j]=qhat;
282
}
283
 
284
@ @=
285
t=(u[j+n]<<16)+u[j+n-1];
286
qhat=t/vh, rhat=t-vh*qhat;
287
if (n>1) while (qhat==0x10000 || qhat*vmh>(rhat<<16)+u[j+n-2]) {
288
  qhat--, rhat+=vh;
289
  if (rhat>=0x10000) break;
290
}
291
 
292
@ After this step, |u[j+n]| will either equal |k| or |k-1|. The
293
true value of~|u| would be obtained by subtracting~|k| from |u[j+n]|;
294
but we don't have to fuss over |u[j+n]|, because it won't be examined later.
295
 
296
@=
297
for (i=k=0; i
298
  t=u[i+j]+0xffff0000-k-qhat*v[i];
299
  u[i+j]=t&0xffff, k=0xffff-(t>>16);
300
}
301
 
302
@ The correction here occurs only rarely, but it can be necessary---for
303
example, when dividing the number \Hex{7fff800100000000} by \Hex{800080020005}.
304
 
305
@=
306
if (u[j+n]!=k) {
307
  qhat--;
308
  for (i=k=0; i
309
    t=u[i+j]+v[i]+k;
310
    u[i+j]=t&0xffff, k=t>>16;
311
  }
312
}
313
 
314
@ Signed division can be reduced to unsigned division in a tedious
315
but straightforward manner. We assume that the divisor isn't zero.
316
 
317
@=
318
octa signed_odiv @,@,@[ARGS((octa,octa))@];@+@t}\6{@>
319
octa signed_odiv(y,z)
320
  octa y,z;
321
{
322
  octa yy,zz,q;
323
  register int sy,sz;
324
  if (y.h&sign_bit) sy=2, yy=ominus(zero_octa,y);
325
  else sy=0, yy=y;
326
  if (z.h&sign_bit) sz=1, zz=ominus(zero_octa,z);
327
  else sz=0, zz=z;
328
  q=odiv(zero_octa,yy,zz);
329
  overflow=false;
330
  switch (sy+sz) {
331
 case 2+1: aux=ominus(zero_octa,aux);
332
  if (q.h==sign_bit) overflow=true;
333
 case 0+0: return q;
334
 case 2+0:@+ if (aux.h || aux.l) aux=ominus(zz,aux);
335
  goto negate_q;
336
 case 0+1:@+ if (aux.h || aux.l) aux=ominus(aux,zz);
337
 negate_q:@+ if (aux.h || aux.l) return ominus(neg_one,q);
338
  else return ominus(zero_octa,q);
339
  }
340
}
341
 
342
@* Bit fiddling. The bitwise operators of \MMIX\ are fairly easy to
343
implement directly, but three of them occur often enough to deserve
344
packaging as subroutines.
345
 
346
@=
347
octa oand @,@,@[ARGS((octa,octa))@];@+@t}\6{@>
348
octa oand(y,z) /* compute $y\land z$ */
349
  octa y,z;
350
{@+ octa x;
351
  x.h=y.h&z.h;@+ x.l=y.l&z.l;
352
  return x;
353
}
354
@#
355
octa oandn @,@,@[ARGS((octa,octa))@];@+@t}\6{@>
356
octa oandn(y,z) /* compute $y\land\bar z$ */
357
  octa y,z;
358
{@+ octa x;
359
  x.h=y.h&~z.h;@+ x.l=y.l&~z.l;
360
  return x;
361
}
362
@#
363
octa oxor @,@,@[ARGS((octa,octa))@];@+@t}\6{@>
364
octa oxor(y,z) /* compute $y\oplus z$ */
365
  octa y,z;
366
{@+ octa x;
367
  x.h=y.h^z.h;@+ x.l=y.l^z.l;
368
  return x;
369
}
370
 
371
@ Here's a fun way to count the number of bits in a tetrabyte.
372
[This classical trick is called the ``Gillies--Miller method
373
for sideways addition'' in {\sl The Preparation of Programs
374
for an Electronic Digital Computer\/} by Wilkes, Wheeler, and
375
Gill, second edition (Reading, Mass.:\ Addison--Wesley, 1957),
376
191--193. Some of the tricks used here were suggested by
377
Balbir Singh, Peter Rossmanith, and Stefan Schwoon.]
378
@^Gillies, Donald Bruce@>
379
@^Miller, Jeffrey Charles Percy@>
380
@^Wilkes, Maurice Vincent@>
381
@^Wheeler, David John@>
382
@^Gill, Stanley@>
383
@^Singh, Balbir@>
384
@^Rossmanith, Peter@>
385
@^Schwoon, Stefan@>
386
 
387
@=
388
int count_bits @,@,@[ARGS((tetra))@];@+@t}\6{@>
389
int count_bits(x)
390
  tetra x;
391
{
392
  register int xx=x;
393
  xx=xx-((xx>>1)&0x55555555);
394
  xx=(xx&0x33333333)+((xx>>2)&0x33333333);
395
  xx=(xx+(xx>>4))&0x0f0f0f0f;
396
  xx=xx+(xx>>8);
397
  return (xx+(xx>>16)) & 0xff;
398
}
399
 
400
@ To compute the nonnegative byte differences of two given tetrabytes,
401
we can carry out the following 20-step branchless computation:
402
 
403
@=
404
tetra byte_diff @,@,@[ARGS((tetra,tetra))@];@+@t}\6{@>
405
tetra byte_diff(y,z)
406
  tetra y,z;
407
{
408
  register tetra d=(y&0x00ff00ff)+0x01000100-(z&0x00ff00ff);
409
  register tetra m=d&0x01000100;
410
  register tetra x=d&(m-(m>>8));
411
  d=((y>>8)&0x00ff00ff)+0x01000100-((z>>8)&0x00ff00ff);
412
  m=d&0x01000100;
413
  return x+((d&(m-(m>>8)))<<8);
414
}
415
 
416
@ To compute the nonnegative wyde differences of two tetrabytes,
417
another trick leads to a 15-step branchless computation.
418
(Research problem: Can |count_bits|, |byte_diff|, or |wyde_diff| be done
419
with fewer operations?)
420
 
421
@=
422
tetra wyde_diff @,@,@[ARGS((tetra,tetra))@];@+@t}\6{@>
423
tetra wyde_diff(y,z)
424
  tetra y,z;
425
{
426
  register tetra a=((y>>16)-(z>>16))&0x10000;
427
  register tetra b=((y&0xffff)-(z&0xffff))&0x10000;
428
  return y-(z^((y^z)&(b-a-(b>>16))));
429
}
430
 
431
@ The last bitwise subroutine we need is the most interesting:
432
It implements \MMIX's \.{MOR} and \.{MXOR} operations.
433
 
434
@=
435
octa bool_mult @,@,@[ARGS((octa,octa,bool))@];@+@t}\6{@>
436
octa bool_mult(y,z,xor)
437
  octa y,z; /* the operands */
438
  bool xor; /* do we do xor instead of or? */
439
{
440
  octa o,x;
441
  register tetra a,b,c;
442
  register int k;
443
  for (k=0,o=y,x=zero_octa;o.h||o.l;k++,o=shift_right(o,8,1))
444
    if (o.l&0xff) {
445
      a=((z.h>>k)&0x01010101)*0xff;
446
      b=((z.l>>k)&0x01010101)*0xff;
447
      c=(o.l&0xff)*0x01010101;
448
      if (xor) x.h^=a&c, x.l^=b&c;
449
      else x.h|=a&c, x.l|=b&c;
450
    }
451
  return x;
452
}
453
 
454
@* Floating point packing and unpacking. Standard IEEE floating binary
455
numbers pack a sign, exponent, and fraction into a tetrabyte
456
or octabyte. In this section we consider basic subroutines that
457
convert between IEEE format and the separate unpacked components.
458
 
459
@d ROUND_OFF 1
460
@d ROUND_UP 2
461
@d ROUND_DOWN 3
462
@d ROUND_NEAR 4
463
 
464
@=
465
int cur_round; /* the current rounding mode */
466
 
467
@ The |fpack| routine takes an octabyte $f$, a raw exponent~$e$,
468
and a sign~|s|, and packs them
469
into the floating binary number that corresponds to
470
$\pm2^{e-1076}f$, using a given rounding mode.
471
The value of $f$ should satisfy $2^{54}\le f\le 2^{55}$.
472
 
473
Thus, for example, the floating binary number $+1.0=\Hex{3ff0000000000000}$
474
is obtained when $f=2^{54}$, $e=\Hex{3fe}$, and |s='+'|.
475
The raw exponent~$e$ is usually one less than
476
the final exponent value; the leading bit of~$f$ is essentially added
477
to the exponent. (This trick works nicely for subnormal numbers, when
478
$e<0$, or in cases where the value of $f$ is rounded upwards to $2^{55}$.)
479
 
480
Exceptional events are noted by oring appropriate bits into
481
the global variable |exceptions|. Special considerations apply to
482
underflow, which is not fully specified by Section 7.4 of the IEEE standard:
483
Implementations of the standard are free to choose between two definitions
484
of ``tininess'' and two definitions of ``accuracy loss.''
485
\MMIX\ determines tininess {\it after\/} rounding, hence a result with
486
$e<0$ is not necessarily tiny; \MMIX\ treats accuracy loss as equivalent
487
to inexactness. Thus, a result underflows if and only if
488
it is tiny and either (i)~it is inexact or (ii)~the underflow trap is enabled.
489
The |fpack| routine sets |U_BIT| in |exceptions| if and only if the result is
490
tiny, |X_BIT| if and only if the result is inexact.
491
@^underflow@>
492
 
493
@d X_BIT (1<<8) /* floating inexact */
494
@d Z_BIT (1<<9) /* floating division by zero */
495
@d U_BIT (1<<10) /* floating underflow */
496
@d O_BIT (1<<11) /* floating overflow */
497
@d I_BIT (1<<12) /* floating invalid operation */
498
@d W_BIT (1<<13) /* float-to-fix overflow */
499
@d V_BIT (1<<14) /* integer overflow */
500
@d D_BIT (1<<15) /* integer divide check */
501
@d E_BIT (1<<18) /* external (dynamic) trap bit */
502
 
503
@=
504
octa fpack @,@,@[ARGS((octa,int,char,int))@];@+@t}\6{@>
505
octa fpack(f,e,s,r)
506
  octa f; /* the normalized fraction part */
507
  int e; /* the raw exponent */
508
  char s; /* the sign */
509
  int r; /* the rounding mode */
510
{
511
  octa o;
512
  if (e>0x7fd) e=0x7ff, o=zero_octa;
513
  else {
514
    if (e<0) {
515
      if (e<-54) o.h=0, o.l=1;
516
      else {@+octa oo;
517
        o=shift_right(f,-e,1);
518
        oo=shift_left(o,-e);
519
        if (oo.l!=f.l || oo.h!=f.h) o.l |= 1; /* sticky bit */
520
@^sticky bit@>
521
      }
522
      e=0;
523
    }@+else o=f;
524
  }
525
  @;
526
}
527
 
528
@ @=
529
int exceptions; /* bits possibly destined for rA */
530
 
531
@ Everything falls together so nicely here, it's almost too good to be true!
532
 
533
@=
534
if (o.l&3) exceptions |= X_BIT;
535
switch (r) {
536
 case ROUND_DOWN:@+ if (s=='-') o=incr(o,3);@+break;
537
 case ROUND_UP:@+ if (s!='-') o=incr(o,3);
538
 case ROUND_OFF: break;
539
 case ROUND_NEAR: o=incr(o, o.l&4? 2: 1);@+break;
540
}
541
o = shift_right(o,2,1);
542
o.h += e<<20;
543
if (o.h>=0x7ff00000) exceptions |= O_BIT+X_BIT; /* overflow */
544
else if (o.h<0x100000) exceptions |= U_BIT; /* tininess */
545
if (s=='-') o.h |= sign_bit;
546
return o;
547
 
548
@ Similarly, |sfpack| packs a short float, from inputs
549
having the same conventions as |fpack|.
550
 
551
@=
552
tetra sfpack @,@,@[ARGS((octa,int,char,int))@];@+@t}\6{@>
553
tetra sfpack(f,e,s,r)
554
  octa f; /* the fraction part */
555
  int e; /* the raw exponent */
556
  char s; /* the sign */
557
  int r; /* the rounding mode */
558
{
559
  register tetra o;
560
  if (e>0x47d) e=0x47f, o=0;
561
  else {
562
    o=shift_left(f,3).h;
563
    if (f.l&0x1fffffff) o|=1;
564
    if (e<0x380) {
565
      if (e<0x380-25) o=1;
566
      else {@+register tetra o0,oo;
567
        o0 = o;
568
        o = o>>(0x380-e);
569
        oo = o<<(0x380-e);
570
        if (oo!=o0) o |= 1; /* sticky bit */
571
@^sticky bit@>
572
      }
573
      e=0x380;
574
    }
575
  }
576
  @;
577
}
578
 
579
@ @=
580
if (o&3) exceptions |= X_BIT;
581
switch (r) {
582
 case ROUND_DOWN:@+ if (s=='-') o+=3;@+break;
583
 case ROUND_UP:@+ if (s!='-') o+=3;
584
 case ROUND_OFF: break;
585
 case ROUND_NEAR: o+=(o&4? 2: 1);@+break;
586
}
587
o = o>>2;
588
o += (e-0x380)<<23;
589
if (o>=0x7f800000) exceptions |= O_BIT+X_BIT; /* overflow */
590
else if (o<0x100000) exceptions |= U_BIT; /* tininess */
591
if (s=='-') o |= sign_bit;
592
return o;
593
 
594
@ The |funpack| routine is, roughly speaking, the opposite of |fpack|.
595
It takes a given floating point number~$x$ and separates out its
596
fraction part~$f$, exponent~$e$, and sign~$s$. It clears |exceptions|
597
to zero. It returns the type of value found: |zro|, |num|, |inf|,
598
or |nan|. When it returns |num|,
599
it will have set $f$, $e$, and~$s$
600
to the values from which |fpack| would produce the original number~$x$
601
without exceptions.
602
 
603
@d zero_exponent (-1000) /* zero is assumed to have this exponent */
604
 
605
@=
606
typedef enum {@!zro,@!num,@!inf,@!nan}@+ftype;
607
 
608
@ @=
609
ftype funpack @,@,@[ARGS((octa,octa*,int*,char*))@];@+@t}\6{@>
610
ftype funpack(x,f,e,s)
611
  octa x; /* the given floating point value */
612
  octa *f; /* address where the fraction part should be stored */
613
  int *e; /* address where the exponent part should be stored */
614
  char *s; /* address where the sign should be stored */
615
{
616
  register int ee;
617
  exceptions=0;
618
  *s=(x.h&sign_bit? '-': '+');
619
  *f=shift_left(x,2);
620
  f->h &= 0x3fffff;
621
  ee=(x.h>>20)&0x7ff;
622
  if (ee) {
623
    *e=ee-1;
624
    f->h |= 0x400000;
625
    return (ee<0x7ff? num: f->h==0x400000 && !f->l? inf: nan);
626
  }
627
  if (!x.l && !f->h) {
628
    *e=zero_exponent;@+ return zro;
629
  }
630
  do {@+ ee--;@+ *f=shift_left(*f,1);@+} while (!(f->h&0x400000));
631
  *e=ee;@+ return num;
632
}
633
 
634
@ @=
635
ftype sfunpack @,@,@[ARGS((tetra,octa*,int*,char*))@];@+@t}\6{@>
636
ftype sfunpack(x,f,e,s)
637
  tetra x; /* the given floating point value */
638
  octa *f; /* address where the fraction part should be stored */
639
  int *e; /* address where the exponent part should be stored */
640
  char *s; /* address where the sign should be stored */
641
{
642
  register int ee;
643
  exceptions=0;
644
  *s=(x&sign_bit? '-': '+');
645
  f->h=(x>>1)&0x3fffff, f->l=x<<31;
646
  ee=(x>>23)&0xff;
647
  if (ee) {
648
    *e=ee+0x380-1;
649
    f->h |= 0x400000;
650
    return (ee<0xff? num: (x&0x7fffffff)==0x7f800000? inf: nan);
651
  }
652
  if (!(x&0x7fffffff)) {
653
    *e=zero_exponent;@+return zro;
654
  }
655
  do {@+ ee--;@+ *f=shift_left(*f,1);@+} while (!(f->h&0x400000));
656
  *e=ee+0x380;@+ return num;
657
}
658
 
659
@ Since \MMIX\ downplays 32-bit operations, it uses |sfpack| and |sfunpack|
660
only when loading and storing short floats, or when converting
661
from fixed point to floating point.
662
 
663
@=
664
octa load_sf @,@,@[ARGS((tetra))@];@+@t}\6{@>
665
octa load_sf(z)
666
  tetra z; /* 32 bits to be loaded into a 64-bit register */
667
{
668
  octa f,x;@+int e;@+char s;@+ftype t;
669
  t=sfunpack(z,&f,&e,&s);
670
  switch (t) {
671
 case zro: x=zero_octa;@+break;
672
 case num: return fpack(f,e,s,ROUND_OFF);
673
 case inf: x=inf_octa;@+break;
674
 case nan: x=shift_right(f,2,1);@+x.h|=0x7ff00000;@+break;
675
  }
676
  if (s=='-') x.h|=sign_bit;
677
  return x;
678
}
679
 
680
@ @=
681
tetra store_sf @,@,@[ARGS((octa))@];@+@t}\6{@>
682
tetra store_sf(x)
683
  octa x; /* 64 bits to be loaded into a 32-bit word */
684
{
685
  octa f;@+tetra z;@+int e;@+char s;@+ftype t;
686
  t=funpack(x,&f,&e,&s);
687
  switch (t) {
688
 case zro: z=0;@+break;
689
 case num: return sfpack(f,e,s,cur_round);
690
 case inf: z=0x7f800000;@+break;
691
 case nan:@+ if (!(f.h&0x200000)) {
692
      f.h|=0x200000;@+exceptions|=I_BIT; /* NaN was signaling */
693
    }
694
    z=0x7f800000|(f.h<<1)|(f.l>>31);@+break;
695
  }
696
  if (s=='-') z|=sign_bit;
697
  return z;
698
}
699
 
700
@* Floating multiplication and division.
701
The hardest fixed point operations were multiplication and division;
702
but these two operations are the {\it easiest\/} to implement in floating point
703
arithmetic, once their fixed point counterparts are available.
704
 
705
@=
706
octa fmult @,@,@[ARGS((octa,octa))@];@+@t}\6{@>
707
octa fmult(y,z)
708
  octa y,z;
709
{
710
  ftype yt,zt;
711
  int ye,ze;
712
  char ys,zs;
713
  octa x,xf,yf,zf;
714
  register int xe;
715
  register char xs;
716
  yt=funpack(y,&yf,&ye,&ys);
717
  zt=funpack(z,&zf,&ze,&zs);
718
  xs=ys+zs-'+'; /* will be |'-'| when the result is negative */
719
  switch (4*yt+zt) {
720
 @t\4@>@;
721
 case 4*zro+zro: case 4*zro+num: case 4*num+zro: x=zero_octa;@+break;
722
 case 4*num+inf: case 4*inf+num: case 4*inf+inf: x=inf_octa;@+break;
723
 case 4*zro+inf: case 4*inf+zro: x=standard_NaN;
724
  exceptions|=I_BIT;@+break;
725
 case 4*num+num: @;
726
  }
727
  if (xs=='-') x.h|=sign_bit;
728
  return x;
729
}
730
 
731
@ @=
732
case 4*nan+nan:@+if (!(y.h&0x80000)) exceptions|=I_BIT; /* |y| is signaling */
733
case 4*zro+nan: case 4*num+nan: case 4*inf+nan:
734
  if (!(z.h&0x80000)) exceptions|=I_BIT, z.h|=0x80000;
735
  return z;
736
case 4*nan+zro: case 4*nan+num: case 4*nan+inf:
737
  if (!(y.h&0x80000)) exceptions|=I_BIT, y.h|=0x80000;
738
  return y;
739
 
740
@ @=
741
xe=ye+ze-0x3fd; /* the raw exponent */
742
x=omult(yf,shift_left(zf,9));
743
if (aux.h>=0x400000) xf=aux;
744
else xf=shift_left(aux,1), xe--;
745
if (x.h||x.l) xf.l|=1; /* adjust the sticky bit */
746
return fpack(xf,xe,xs,cur_round);
747
 
748
@ @=
749
octa fdivide @,@,@[ARGS((octa,octa))@];@+@t}\6{@>
750
octa fdivide(y,z)
751
  octa y,z;
752
{
753
  ftype yt,zt;
754
  int ye,ze;
755
  char ys,zs;
756
  octa x,xf,yf,zf;
757
  register int xe;
758
  register char xs;
759
  yt=funpack(y,&yf,&ye,&ys);
760
  zt=funpack(z,&zf,&ze,&zs);
761
  xs=ys+zs-'+'; /* will be |'-'| when the result is negative */
762
  switch (4*yt+zt) {
763
 @t\4@>@;
764
 case 4*zro+inf: case 4*zro+num: case 4*num+inf: x=zero_octa;@+break;
765
 case 4*num+zro: exceptions|=Z_BIT;
766
 case 4*inf+num: case 4*inf+zro: x=inf_octa;@+break;
767
 case 4*zro+zro: case 4*inf+inf: x=standard_NaN;
768
  exceptions|=I_BIT;@+break;
769
 case 4*num+num: @;
770
  }
771
  if (xs=='-') x.h|=sign_bit;
772
  return x;
773
}
774
 
775
@ @=
776
xe=ye-ze+0x3fd; /* the raw exponent */
777
xf=odiv(yf,zero_octa,shift_left(zf,9));
778
if (xf.h>=0x800000) {
779
  aux.l|=xf.l&1;
780
  xf=shift_right(xf,1,1);
781
  xe++;
782
}
783
if (aux.h||aux.l) xf.l|=1; /* adjust the sticky bit */
784
return fpack(xf,xe,xs,cur_round);
785
 
786
@*Floating addition and subtraction. Now for the bread-and-butter
787
operation, the sum of two floating point numbers.
788
It is not terribly difficult, but many cases need to be handled carefully.
789
 
790
@=
791
octa fplus @,@,@[ARGS((octa,octa))@];@+@t}\6{@>
792
octa fplus(y,z)
793
  octa y,z;
794
{
795
  ftype yt,zt;
796
  int ye,ze;
797
  char ys,zs;
798
  octa x,xf,yf,zf;
799
  register int xe,d;
800
  register char xs;
801
  yt=funpack(y,&yf,&ye,&ys);
802
  zt=funpack(z,&zf,&ze,&zs);
803
  switch (4*yt+zt) {
804
 @t\4@>@;
805
 case 4*zro+num: return fpack(zf,ze,zs,ROUND_OFF);@+break; /* may underflow */
806
 case 4*num+zro: return fpack(yf,ye,ys,ROUND_OFF);@+break; /* may underflow */
807
 case 4*inf+inf:@+if (ys!=zs) {
808
    exceptions|=I_BIT;@+x=standard_NaN;@+xs=zs;@+break;
809
  }
810
 case 4*num+inf: case 4*zro+inf: x=inf_octa;@+xs=zs;@+break;
811
 case 4*inf+num: case 4*inf+zro: x=inf_octa;@+xs=ys;@+break;
812
 case 4*num+num:@+ if (y.h!=(z.h^0x80000000) || y.l!=z.l)
813
   @;
814
 case 4*zro+zro: x=zero_octa;
815
  xs=(ys==zs? ys: cur_round==ROUND_DOWN? '-': '+');@+break;
816
  }
817
  if (xs=='-') x.h|=sign_bit;
818
  return x;
819
}
820
 
821
@ @=
822
{@+octa o,oo;
823
  if (ye
824
    @;
825
  d=ye-ze;
826
  xs=ys, xe=ye;
827
  if (d) @;
828
  if (ys==zs) {
829
    xf=oplus(yf,zf);
830
    if (xf.h>=0x800000) xe++, d=xf.l&1, xf=shift_right(xf,1,1), xf.l|=d;
831
  }@+else {
832
    xf=ominus(yf,zf);
833
    if (xf.h>=0x800000) xe++, d=xf.l&1, xf=shift_right(xf,1,1), xf.l|=d;
834
    else@+ while (xf.h<0x400000) xe--, xf=shift_left(xf,1);
835
  }
836
  return fpack(xf,xe,xs,cur_round);
837
}
838
 
839
@ @=
840
{
841
  o=yf, yf=zf, zf=o;
842
  d=ye, ye=ze, ze=d;
843
  d=ys, ys=zs, zs=d;
844
}
845
 
846
@ Proper rounding requires two bits to the right of the fraction delivered
847
to~|fpack|. The first is the true next bit of the result;
848
the other is a ``sticky'' bit, which is nonzero if any further bits of the
849
true result are nonzero. Sticky rounding to an integer takes
850
$x$ into the number $\lfloor x/2\rfloor+\lceil x/2\rceil$.
851
@^sticky bit@>
852
 
853
Some subtleties need to be observed here, in order to
854
prevent the sticky bit from being shifted left. If we did not
855
shift |yf| left~1 before shifting |zf| to the right, an incorrect
856
answer would be obtained in certain cases---for example, if
857
$|yf|=2^{54}$, $|zf|=2^{54}+2^{53}-1$, $d=52$.
858
 
859
@=
860
{
861
  if (d<=2) zf=shift_right(zf,d,1); /* exact result */
862
  else if (d>53) zf.h=0, zf.l=1; /* tricky but OK */
863
  else {
864
    if (ys!=zs) d--,xe--,yf=shift_left(yf,1);
865
    o=zf;
866
    zf=shift_right(o,d,1);
867
    oo=shift_left(zf,d);
868
    if (oo.l!=o.l || oo.h!=o.h) zf.l|=1;
869
  }
870
}
871
 
872
@ The comparison of floating point numbers with respect to $\epsilon$
873
shares some of the characteristics of floating point addition/subtraction.
874
In some ways it is simpler, and in other ways it is more difficult;
875
we might as well deal with it now. % anyways
876
 
877
Subroutine |fepscomp(y,z,e,s)| returns 2 if |y|, |z|, or |e| is a NaN
878
or |e| is negative. It returns 1 if |s=0| and $y\approx z\ (e)$ or if
879
|s!=0| and $y\sim z\ (e)$,
880
as defined in Section~4.2.2 of {\sl Seminumerical Algorithms\/};
881
otherwise it returns~0.
882
 
883
@=
884
int fepscomp @,@,@[ARGS((octa,octa,octa,int))@];@+@t}\6{@>
885
int fepscomp(y,z,e,s)
886
  octa y,z,e; /* the operands */
887
  int s; /* test similarity? */
888
{
889
  octa yf,zf,ef,o,oo;
890
  int ye,ze,ee;
891
  char ys,zs,es;
892
  register int yt,zt,et,d;
893
  et=funpack(e,&ef,&ee,&es);
894
  if (es=='-') return 2;
895
  switch (et) {
896
 case nan: return 2;
897
 case inf: ee=10000;
898
 case num: case zro: break;
899
  }
900
  yt=funpack(y,&yf,&ye,&ys);
901
  zt=funpack(z,&zf,&ze,&zs);
902
  switch (4*yt+zt) {
903
 case 4*nan+nan: case 4*nan+inf: case 4*nan+num: case 4*nan+zro:
904
 case 4*inf+nan: case 4*num+nan: case 4*zro+nan: return 2;
905
 case 4*inf+inf: return (ys==zs || ee>=1023);
906
 case 4*inf+num: case 4*inf+zro: case 4*num+inf: case 4*zro+inf:
907
   return (s && ee>=1022);
908
 case 4*zro+zro: return 1;
909
 case 4*zro+num: case 4*num+zro:@+ if (!s) return 0;
910
 case 4*num+num: break;
911
  }
912
  @;
913
}
914
 
915
@ The relation $y\approx z\ (\epsilon)$ reduces to
916
$y\sim z\ (\epsilon/2^d)$, if $d$~is the difference between the
917
larger and smaller exponents of $y$ and~$z$.
918
 
919
@=
920
@;
921
if (ye
922
  @;
923
if (ze==zero_exponent) ze=ye;
924
d=ye-ze;
925
if (!s) ee-=d;
926
if (ee>=1023) return 1; /* if $\epsilon\ge2$, $z\in N_\epsilon(y)$ */
927
@;
928
if (!o.h && !o.l) return 1;
929
if (ee<968) return 0; /* if $y\ne z$ and $\epsilon<2^{-54}$, $y\not\sim z$ */
930
if (ee>=1021) ef=shift_left(ef,ee-1021);
931
else ef=shift_right(ef,1021-ee,1);
932
return o.h
933
 
934
@ @=
935
if (ye<0 && yt!=zro) yf=shift_left(y,2), ye=0;
936
if (ze<0 && zt!=zro) zf=shift_left(z,2), ze=0;
937
 
938
@ At this point $y\sim z$ if and only if
939
$$|yf|+(-1)^{[ys=zs]}|zf|/2^d\le 2^{ee-1021}|ef|=2^{55}\epsilon.$$
940
We need to evaluate this relation without overstepping the bounds of
941
our simulated 64-bit registers.
942
 
943
When $d>2$, the difference of fraction parts might not fit exactly
944
in an octabyte;
945
in that case the numbers are not similar unless $\epsilon>3/8$,
946
and we replace the difference by the ceiling of the
947
true result. When $\epsilon<1/8$, our program essentially replaces
948
$2^{55}\epsilon$ by $\lfloor2^{55}\epsilon\rfloor$. These
949
truncations are not needed simultaneously. Therefore the logic
950
is justified by the facts that, if $n$ is an integer, we have
951
$x\le n$ if and only if $\lceil x\rceil\le n$;
952
$n\le x$ if and only if $n\le\lfloor x\rfloor$. (Notice that the
953
concept of ``sticky bit'' is {\it not\/} appropriate here.)
954
@^sticky bit@>
955
 
956
@=
957
if (d>54) o=zero_octa,oo=zf;
958
else o=shift_right(zf,d,1),oo=shift_left(o,d);
959
if (oo.h!=zf.h || oo.l!=zf.l) { /* truncated result, hence $d>2$ */
960
  if (ee<1020) return 0; /* difference is too large for similarity */
961
  o=incr(o,ys==zs? 0: 1); /* adjust for ceiling */
962
}
963
o=(ys==zs? ominus(yf,o): oplus(yf,o));
964
 
965
@*Floating point output conversion.
966
The |print_float| routine converts an octabyte to a floating decimal
967
representation that will be input as precisely the same value.
968
@^binary-to-decimal conversion@>
969
@^radix conversion@>
970
@^multiprecision conversion@>
971
 
972
@=
973
static void bignum_times_ten @,@,@[ARGS((bignum*))@];
974
static void bignum_dec @,@,@[ARGS((bignum*,bignum*,tetra))@];
975
static int bignum_compare @,@,@[ARGS((bignum*,bignum*))@];
976
void print_float @,@,@[ARGS((octa))@];@+@t}\6{@>
977
void print_float(x)
978
  octa x;
979
{
980
  @;
981
  if (x.h&sign_bit) printf("-");
982
  @
983
       fraction interval $[f\dts g]$ or $(f\dts g)$@>;
984
  @;
985
  @;
986
  @;
987
}
988
 
989
@ One way to visualize the problem being solved here is to consider
990
the vastly simpler case in which there are only 2-bit exponents
991
and 2-bit fractions. Then the sixteen possible 4-bit combinations
992
have the following interpretations:
993
$$\def\\{\;\dts\;}
994
\vbox{\halign{#\qquad&$#$\hfil\cr
995
0000&[0\\0.125]\cr
996
0001&(0.125\\0.375)\cr
997
0010&[0.375\\0.625]\cr
998
0011&(0.625\\0.875)\cr
999
0100&[0.875\\1.125]\cr
1000
0101&(1.125\\1.375)\cr
1001
0110&[1.375\\1.625]\cr
1002
0111&(1.625\\1.875)\cr
1003
1000&[1.875\\2.25]\cr
1004
1001&(2.25\\2.75)\cr
1005
1010&[2.75\\3.25]\cr
1006
1011&(3.25\\3.75)\cr
1007
1100&[3.75\\\infty]\cr
1008
1101&\rm NaN(0\\0.375)\cr
1009
1110&\rm NaN[0.375\\0.625]\cr
1010
1111&\rm NaN(0.625\\1)\cr}}$$
1011
Notice that the interval is closed, $[f\dts g]$, when the fraction part
1012
is even; it is open, $(f\dts g)$, when the fraction part is odd.
1013
The printed outputs for these sixteen values, if we actually were
1014
dealing with such short exponents and fractions, would be
1015
\.{0.}, \.{.2}, \.{.5}, \.{.7}, \.{1.}, \.{1.2}, \.{1.5}, \.{1.7},
1016
\.{2.}, \.{2.5}, \.{3.}, \.{3.5}, \.{Inf}, \.{NaN.2}, \.{NaN}, \.{NaN.8},
1017
respectively.
1018
 
1019
@=
1020
f=shift_left(x,1);
1021
e=f.h>>21;
1022
f.h&=0x1fffff;
1023
if (!f.h && !f.l) @@;
1024
else {
1025
  g=incr(f,1);
1026
  f=incr(f,-1);
1027
  if (!e) e=1; /* subnormal */
1028
  else if (e==0x7ff) {
1029
    printf("NaN");
1030
    if (g.h==0x100000 && g.l==1) return; /* the ``standard'' NaN */
1031
    e=0x3ff; /* extreme NaNs come out OK even without adjusting |f| or |g| */
1032
  }@+else f.h|=0x200000, g.h|=0x200000;
1033
}
1034
 
1035
@ @=
1036
octa f,g; /* lower and upper bounds on the fraction part */
1037
register int e; /* exponent part */
1038
register int j,k; /* all purpose indices */
1039
 
1040
@ The transition points between exponents correspond to powers of~2. At
1041
such points the interval extends only half as far to the left of that
1042
power of~2 as it does to the right. For example, in the 4-bit minifloat numbers
1043
considered above, case 1000 corresponds to the interval $[1.875\;\dts\;2.25]$.
1044
 
1045
@=
1046
{
1047
  if (!e) {
1048
    printf("0.");@+return;
1049
  }
1050
  if (e==0x7ff) {
1051
    printf("Inf");@+return;
1052
  }
1053
  e--;
1054
  f.h=0x3fffff, f.l=0xffffffff;
1055
  g.h=0x400000, g.l=2;
1056
}
1057
 
1058
@ We want to find the ``simplest'' value in the interval corresponding
1059
to the given number, in the sense that it has fewest significant
1060
digits when expressed in decimal notation. Thus, for example,
1061
if the floating point number can be described by a relatively
1062
short string such as `\.{.1}' or `\.{37e100}', we want to discover that
1063
representation.
1064
 
1065
The basic idea is to generate the decimal representations of the
1066
two endpoints of the interval, outputting the leading digits where
1067
both endpoints agree, then making a final decision at the first place where
1068
they disagree.
1069
 
1070
The ``simplest'' value is not always unique. For example, in the
1071
case of 4-bit minifloat numbers we could represent the bit pattern 0001 as
1072
either \.{.2} or \.{.3}, and we could represent 1001 in five equally short
1073
ways: \.{2.3} or \.{2.4} or \.{2.5} or \.{2.6} or \.{2.7}. The
1074
algorithm below tries to choose the middle possibility in such cases.
1075
 
1076
[A solution to the analogous problem for fixed-point representations,
1077
without the additional complication of round-to-even, was used by
1078
the author in the program for \TeX; see {\sl Beauty is Our Business\/}
1079
(Springer, 1990), 233--242.]
1080
@^Knuth, Donald Ervin@>
1081
 
1082
Suppose we are given two fractions $f$ and $g$, where $0\le f
1083
we want to compute the shortest decimal in the closed interval $[f\dts g]$.
1084
If $f=0$, we are done. Otherwise let $10f=d+f'$ and $10g=e+g'$, where
1085
$0\le f'<1$ and $0\le g'<1$. If $d
1086
any of the digits $d+1$, \dots,~$e$; otherwise we output the
1087
common digit $d=e$, and repeat the process on the fractions $0\le f'
1088
A similar procedure works with respect to the open interval $(f\dts g)$.
1089
 
1090
@ The program below carries out the stated algorithm by using multiprecision
1091
arithmetic on 77-place integers with 28 bits each. This choice
1092
facilitates multiplication by~10, and allows us to deal with the whole range of
1093
floating binary numbers using fixed point arithmetic. We keep track of
1094
the leading and trailing digit positions so that trivial operations on
1095
zeros are avoided.
1096
 
1097
If |f| points to a \&{bignum}, its radix-$2^{28}$ digits are
1098
|f->dat[0]| through |f->dat[76]|, from most significant to least significant.
1099
We assume that all digit positions are zero unless they lie in the
1100
subarray between indices |f->a| and |f->b|, inclusive.
1101
Furthermore, both |f->dat[f->a]| and |f->dat[f->b]| are nonzero,
1102
unless |f->a=f->b=bignum_prec-1|.
1103
 
1104
The \&{bignum} data type can be used with any radix less than
1105
$2^{32}$; we will use it later with radix~$10^9$. The |dat| array
1106
is made large enough to accommodate both applications.
1107
 
1108
@d bignum_prec 157 /* would be 77 if we cared only about |print_float| */
1109
 
1110
@=
1111
typedef struct {
1112
  int a; /* index of the most significant digit */
1113
  int b; /* index of the least significant digit; must be $\ge a$ */
1114
  tetra dat[bignum_prec]; /* the digits; undefined except between |a| and |b| */
1115
} bignum;
1116
 
1117
@ Here, for example, is how we go from $f$ to $10f$, assuming that
1118
overflow will not occur and that the radix is $2^{28}$:
1119
 
1120
@=
1121
static void bignum_times_ten(f)
1122
  bignum *f;
1123
{
1124
  register tetra *p,*q; register tetra x,carry;
1125
  for (p=&f->dat[f->b],q=&f->dat[f->a],carry=0; p>=q; p--) {
1126
    x=*p*10+carry;
1127
    *p=x&0xfffffff;
1128
    carry=x>>28;
1129
  }
1130
  *p=carry;
1131
  if (carry) f->a--;
1132
  if (f->dat[f->b]==0 && f->b>f->a) f->b--;
1133
}
1134
 
1135
@ And here is how we test whether $fg$, using any
1136
radix whatever:
1137
 
1138
@=
1139
static int bignum_compare(f,g)
1140
  bignum *f,*g;
1141
{
1142
  register tetra *p,*pp,*q,*qq;
1143
  if (f->a!=g->a) return f->a > g->a? -1: 1;
1144
  pp=&f->dat[f->b], qq=&g->dat[g->b];
1145
  for (p=&f->dat[f->a],q=&g->dat[g->a]; p<=pp; p++,q++) {
1146
    if (*p!=*q) return *p<*q? -1: 1;
1147
    if (q==qq) return p
1148
  }
1149
  return -1;
1150
}
1151
 
1152
@ The following subroutine subtracts $g$ from~$f$, assuming that
1153
$f\ge g>0$ and using a given radix.
1154
 
1155
@=
1156
static void bignum_dec(f,g,r)
1157
  bignum *f,*g;
1158
  tetra r; /* the radix */
1159
{
1160
  register tetra *p,*q,*qq;
1161
  register int x,borrow;
1162
  while (g->b>f->b) f->dat[++f->b]=0;
1163
  qq=&g->dat[g->a];
1164
  for (p=&f->dat[g->b],q=&g->dat[g->b],borrow=0;q>=qq;p--,q--) {
1165
    x=*p - *q - borrow;
1166
    if (x>=0) borrow=0, *p=x;
1167
    else borrow=1, *p=x+r;
1168
  }
1169
  for (;borrow;p--)
1170
    if (*p) borrow=0, *p=*p-1;
1171
    else *p=r-1;
1172
  while (f->dat[f->a]==0) {
1173
    if (f->a==f->b) { /* the result is zero */
1174
      f->a=f->b=bignum_prec-1, f->dat[bignum_prec-1]=0;
1175
      return;
1176
    }
1177
    f->a++;
1178
  }
1179
  while (f->dat[f->b]==0) f->b--;
1180
}
1181
 
1182
@ Armed with these subroutines, we are ready to solve the problem.
1183
The first task is to put the numbers into \&{bignum} form.
1184
If the exponent is |e|, the number destined for digit |dat[k]| will
1185
consist of the rightmost 28 bits of the given fraction after it has
1186
been shifted right $c-e-28k$ bits, for some constant~$c$.
1187
We choose $c$ so that,
1188
when $e$ has its maximum value \Hex{7ff}, the leading digit will
1189
go into position |dat[1]|, and so that when the number to be printed
1190
is exactly~1 the integer part of~$g$ will also be exactly~1.
1191
 
1192
@d magic_offset 2112 /* the constant $c$ that makes it work */
1193
@d origin 37 /* the radix point follows |dat[37]| */
1194
 
1195
@=
1196
k=(magic_offset-e)/28;
1197
ff.dat[k-1]=shift_right(f,magic_offset+28-e-28*k,1).l&0xfffffff;
1198
gg.dat[k-1]=shift_right(g,magic_offset+28-e-28*k,1).l&0xfffffff;
1199
ff.dat[k]=shift_right(f,magic_offset-e-28*k,1).l&0xfffffff;
1200
gg.dat[k]=shift_right(g,magic_offset-e-28*k,1).l&0xfffffff;
1201
ff.dat[k+1]=shift_left(f,e+28*k-(magic_offset-28)).l&0xfffffff;
1202
gg.dat[k+1]=shift_left(g,e+28*k-(magic_offset-28)).l&0xfffffff;
1203
ff.a=(ff.dat[k-1]? k-1: k);
1204
ff.b=(ff.dat[k+1]? k+1: k);
1205
gg.a=(gg.dat[k-1]? k-1: k);
1206
gg.b=(gg.dat[k+1]? k+1: k);
1207
 
1208
@ If $e$ is sufficiently small, the fractions $f$ and $g$ will be less than~1,
1209
and we can use the stated algorithm directly. Of course, if $e$ is
1210
extremely small, a lot of leading zeros need to be lopped off; in the
1211
worst case, we may have to multiply $f$ and~$g$ by~10 more than 300 times.
1212
But hey, we don't need to do that extremely often, and computers are
1213
pretty fast nowadays.
1214
 
1215
In the small-exponent case, the computation always terminates before
1216
$f$ becomes zero, because the interval endpoints are fractions with
1217
denominator $2^t$ for some $t>50$.
1218
 
1219
The invariant relations |ff.dat[ff.a]!=0| and |gg.dat[gg.a]!=0| are
1220
not maintained by the computation here, when |ff.a=origin| or |gg.a=origin|.
1221
But no harm is done, because |bignum_compare| is not used.
1222
 
1223
@=
1224
if (e>0x401) @@;
1225
else@+{ /* if |e<=0x401| we have |gg.a>=origin| and |gg.dat[origin]<=8| */
1226
  if (ff.a>origin) ff.dat[origin]=0;
1227
  for (e=1, p=s; gg.a>origin || ff.dat[origin]==gg.dat[origin]; ) {
1228
    if (gg.a>origin) e--;
1229
    else *p++=ff.dat[origin]+'0', ff.dat[origin]=0, gg.dat[origin]=0;
1230
    bignum_times_ten(&ff);
1231
    bignum_times_ten(&gg);
1232
  }
1233
  *p++=((ff.dat[origin]+1+gg.dat[origin])>>1)+'0'; /* the middle digit */
1234
}
1235
*p='\0'; /* terminate the string |s| */
1236
 
1237
@ When |e| is large, we use the stated algorithm by considering $f$ and
1238
$g$ to be fractions whose denominator is a power of~10.
1239
 
1240
An interesting case arises when the number to be converted is
1241
\Hex{44ada56a4b0835bf}, since the interval turns out to be
1242
$$ (69999999999999991611392\ \ \dts\ \ 70000000000000000000000).$$
1243
If this were a closed interval, we could simply give the answer
1244
\.{7e22}; but the number \.{7e22} actually corresponds to
1245
\Hex{44ada56a4b0835c0}
1246
because of the round-to-even rule. Therefore the correct answer is, say,
1247
\.{6.9999999999999995e22}. This example shows that we need a slightly
1248
different strategy in the case of open intervals; we cannot simply
1249
look at the first position in which the endpoints have different
1250
decimal digits. Therefore we change the invariant relation to $0\le f
1251
when open intervals are involved,
1252
and we do not terminate the process when $f=0$ or $g=1$.
1253
 
1254
@=
1255
{@+register int open=x.l&1;
1256
  tt.dat[origin]=10;
1257
  tt.a=tt.b=origin;
1258
  for (e=1;bignum_compare(&gg,&tt)>=open;e++)
1259
    bignum_times_ten(&tt);
1260
  p=s;
1261
  while (1) {
1262
    bignum_times_ten(&ff);
1263
    bignum_times_ten(&gg);
1264
    for (j='0';bignum_compare(&ff,&tt)>=0;j++)
1265
      bignum_dec(&ff,&tt,0x10000000),bignum_dec(&gg,&tt,0x10000000);
1266
    if (bignum_compare(&gg,&tt)>=open) break;
1267
    *p++=j;
1268
    if (ff.a==bignum_prec-1 && !open)
1269
      goto done; /* $f=0$ in a closed interval */
1270
  }
1271
  for (k=j;bignum_compare(&gg,&tt)>=open;k++) bignum_dec(&gg,&tt,0x10000000);
1272
  *p++=(j+1+k)>>1; /* the middle digit */
1273
 done:;
1274
}
1275
 
1276
@ The length of string~|s| will be at most 17. For if $f$ and $g$
1277
agree to 17 places, we have $g/f<1+10^{-16}$; but the
1278
ratio $g/f$ is always $\ge(1+2^{-52}+2^{-53})/(1+2^{-52}-2^{-53})
1279
>1+2\times10^{-16}$.
1280
 
1281
@=
1282
bignum ff,gg; /* fractions or numerators of fractions */
1283
bignum tt; /* power of ten (used as the denominator) */
1284
char s[18];
1285
register char *p;
1286
 
1287
@ At this point the significant digits are in string |s|, and |s[0]!='0'|.
1288
If we put a decimal point at the left of~|s|, the result should
1289
be multiplied by $10^e$.
1290
 
1291
We prefer the output `\.{300.}' to the form `\.{3e2}', and we prefer
1292
`\.{.03}' to `\.{3e-2}'. In general, the output will use an
1293
explicit exponent only if the alternative would take more than
1294
18~characters.
1295
 
1296
@=
1297
if (e>17 || e<(int)strlen(s)-17)
1298
  printf("%c%s%se%d",s[0],(s[1]? ".": ""),s+1,e-1);
1299
else if (e<0) printf(".%0*d%s",-e,0,s);
1300
else if (strlen(s)>=e) printf("%.*s.%s",e,s,s+e);
1301
else printf("%s%0*d.",s,e-(int)strlen(s),0);
1302
 
1303
@*Floating point input conversion. Going the other way, we want to
1304
be able to convert a given decimal number into its floating binary
1305
@^decimal-to-binary conversion@>
1306
@^radix conversion@>
1307
@^multiprecision conversion@>
1308
equivalent. The following syntax is supported:
1309
$$\vbox{\halign{$#$\hfil\cr
1310
\\is\.0\mid\.1\mid\.2\mid\.3\mid\.4\mid
1311
        \.5\mid\.6\mid\.7\mid\.8\mid\.9\cr
1312
\\is\\mid\\\cr
1313
\\is\\..\mid\..\\mid
1314
                      \\..\\cr
1315
\\is\\mid\.+\mid\.-\cr
1316
\\is\.e\\\cr
1317
\\is\\mid\\cr
1318
\\is\\\mid
1319
                    \\\mid\cr
1320
\hskip12em          \.{Inf}\mid\.{NaN}\mid\.{NaN.}\\cr
1321
\\is\\\cr
1322
\\is\\\cr
1323
}}$$
1324
For example, `\.{-3.}' is the floating constant \Hex{c008000000000000}\thinspace;
1325
`\.{1e3}' and `\.{1000}' are both equivalent to \Hex{408f400000000000}\thinspace;
1326
`\.{NaN}' and `\.{+NaN.5}' are both equivalent to \Hex{7ff8000000000000}.
1327
 
1328
The |scan_const| routine looks at a given string and finds the
1329
longest initial substring that matches the syntax of either \
1330
constant> or \. It puts the corresponding value
1331
into the global octabyte variable~|val|; it also puts the position of the first
1332
unscanned character in the global pointer variable |next_char|.
1333
It returns 1 if a floating constant was found, 0~if a decimal constant
1334
was found, $-1$ if nothing was found. A decimal constant that doesn't
1335
fit in an octabyte is computed modulo~$2^{64}$.
1336
@^syntax of floating point constants@>
1337
 
1338
The value of |exceptions| set by |scan_const| is not necessarily correct.
1339
 
1340
@=
1341
static void bignum_double @,@,@[ARGS((bignum*))@];
1342
int scan_const @,@,@[ARGS((char*))@];@+@t}\6{@>
1343
int scan_const(s)
1344
  char *s;
1345
{
1346
  @;
1347
  val.h=val.l=0;
1348
  p=s;
1349
  if (*p=='+' || *p=='-') sign=*p++;@+else sign='+';
1350
  if (strncmp(p,"NaN",3)==0) NaN=true, p+=3;
1351
  else NaN=false;
1352
  if ((isdigit(*p)&&!NaN) || (*p=='.' && isdigit(*(p+1))))
1353
    @;
1354
  if (NaN) @;
1355
  if (strncmp(p,"Inf",3)==0) @;
1356
 no_const_found: next_char=s;@+return -1;
1357
}
1358
 
1359
@ @=
1360
octa val; /* value returned by |scan_const| */
1361
char *next_char; /* pointer returned by |scan_const| */
1362
 
1363
@ @=
1364
register char *p,*q; /* for string manipulations */
1365
register bool NaN; /* are we processing a NaN? */
1366
int sign; /* |'+'| or |'-'| */
1367
 
1368
@ @=
1369
{
1370
  next_char=p;
1371
  val.h=0x600000, exp=0x3fe;
1372
  goto packit;
1373
}
1374
 
1375
@ @=
1376
{
1377
  next_char=p+3;
1378
  goto make_it_infinite;
1379
}
1380
 
1381
@ We saw above that a string of at most 17 digits is enough to characterize
1382
a floating point number, for purposes of output. But a much longer buffer
1383
for digits is needed when we're doing input. For example, consider the
1384
borderline quantity $(1+2^{-53})/2^{1022}$; its decimal expansion, when
1385
written out exactly, is a number with more than 750 significant digits:
1386
\.{2.2250738585...8125e-308}.
1387
If {\it any one\/} of those digits is increased, or if
1388
additional nonzero digits are added as in
1389
\.{2.2250738585...81250000001e-308},
1390
the rounded value is supposed to change from \Hex{0010000000000000}
1391
to \Hex{0010000000000001}.
1392
 
1393
We assume here that the user prefers a perfectly correct answer to
1394
a speedy almost-correct one, so we implement the most general case.
1395
 
1396
@=
1397
{
1398
  for (q=buf0,dec_pt=(char*)0;isdigit(*p);p++) {
1399
    val=oplus(val,shift_left(val,2)); /* multiply by 5 */
1400
    val=incr(shift_left(val,1),*p-'0');
1401
    if (q>buf0 || *p!='0')
1402
       if (q
1403
       else if (*(q-1)=='0') *(q-1)=*p;
1404
  }
1405
  if (NaN) *q++='1';
1406
  if (*p=='.') @;
1407
  next_char=p;
1408
  if (*p=='e' && !NaN) @@;
1409
  else exp=0;
1410
  if (dec_pt) @;
1411
  if (sign=='-') val=ominus(zero_octa,val);
1412
  return 0;
1413
}
1414
 
1415
@ @=
1416
{
1417
  dec_pt=q;
1418
  p++;
1419
  for (zeros=0;isdigit(*p);p++)
1420
    if (*p=='0' && q==buf0) zeros++;
1421
    else if (q
1422
    else if (*(q-1)=='0') *(q-1)=*p;
1423
}
1424
 
1425
@ The buffer needs room for eight digits of padding at the left, followed
1426
by up to $1022+53-307$ significant digits, followed by a ``sticky'' digit
1427
at position |buf_max-1|, and eight more digits of padding.
1428
 
1429
@d buf0 (buf+8)
1430
@d buf_max (buf+777)
1431
 
1432
@=
1433
static char buf[785]="00000000"; /* where we put significant input digits */
1434
 
1435
@ @=
1436
register char* dec_pt; /* position of decimal point in |buf| */
1437
register int exp; /* scanned exponent; later used for raw binary exponent */
1438
register int zeros; /* leading zeros removed after decimal point */
1439
 
1440
@ Here we don't advance |next_char| and force a decimal point until we
1441
know that a syntactically correct exponent exists.
1442
 
1443
The code here will convert extra-large inputs like
1444
`\.{9e+9999999999999999}' into $\infty$ and extra-small inputs into zero.
1445
Strange inputs like `\.{-00.0e9999999}' must also be accommodated.
1446
 
1447
@=
1448
{@+register char exp_sign;
1449
  p++;
1450
  if (*p=='+' || *p=='-') exp_sign=*p++;@+else exp_sign='+';
1451
  if (isdigit(*p)) {
1452
    for (exp=*p++ -'0';isdigit(*p);p++)
1453
      if (exp<1000) exp = 10*exp + *p - '0';
1454
    if (!dec_pt) dec_pt=q, zeros=0;
1455
    if (exp_sign=='-') exp=-exp;
1456
    next_char=p;
1457
  }
1458
}
1459
 
1460
@ @=
1461
{
1462
  @;
1463
  @;
1464
packit:  @;
1465
  return 1;
1466
}
1467
 
1468
@ Now we get ready to compute the binary fraction bits, by putting the
1469
scanned input digits into a multiprecision fixed-point
1470
accumulator |ff| that spans the full necessary range.
1471
After this step, the number that we want to convert to floating binary
1472
will appear in |ff.dat[ff.a]|, |ff.dat[ff.a+1]|, \dots,
1473
|ff.dat[ff.b]|.
1474
The radix-$10^9$ digit in ${\it ff}[36-k]$ is understood to be multiplied
1475
by $10^{9k}$, for $36\ge k\ge-120$.
1476
 
1477
@=
1478
x=buf+341+zeros-dec_pt-exp;
1479
if (q==buf0 || x>=1413) {
1480
 make_it_zero: exp=-99999;@+ goto packit;
1481
}
1482
if (x<0) {
1483
 make_it_infinite: exp=99999;@+ goto packit;
1484
}
1485
ff.a=x/9;
1486
for (p=q;p
1487
q=q-1-(q+341+zeros-dec_pt-exp)%9; /* compute stopping place in |buf| */
1488
for (p=buf0-x%9,k=ff.a;p<=q && k<=156; p+=9, k++)
1489
  @
1490
       into |ff.dat[k]|@>;
1491
ff.b=k-1;
1492
for (x=0;p<=q;p+=9) if (strncmp(p,"000000000",9)!=0) x=1;
1493
ff.dat[156]+=x; /* nonzero digits that fall off the right are sticky */
1494
@^sticky bit@>
1495
while (ff.dat[ff.b]==0) ff.b--;
1496
 
1497
@ @=
1498
{
1499
  for (x=*p-'0',pp=p+1;pp
1500
  ff.dat[k]=x;
1501
}
1502
 
1503
@ @=
1504
register int k,x;
1505
register char *pp;
1506
bignum ff,tt;
1507
 
1508
@ Here's a subroutine that is dual to |bignum_times_ten|. It changes $f$
1509
to~$2f$, assuming that overflow will not occur and that the radix is $10^9$.
1510
 
1511
@=
1512
static void bignum_double(f)
1513
  bignum *f;
1514
{
1515
  register tetra *p,*q; register int x,carry;
1516
  for (p=&f->dat[f->b],q=&f->dat[f->a],carry=0; p>=q; p--) {
1517
    x = *p + *p + carry;
1518
    if (x>=1000000000) carry=1, *p=x-1000000000;
1519
    else carry=0, *p=x;
1520
  }
1521
  *p=carry;
1522
  if (carry) f->a--;
1523
  if (f->dat[f->b]==0 && f->b>f->a) f->b--;
1524
}
1525
 
1526
@ @=
1527
val=zero_octa;
1528
if (ff.a>36) {
1529
  for (exp=0x3fe;ff.a>36;exp--) bignum_double(&ff);
1530
  for (k=54;k;k--) {
1531
    if (ff.dat[36]) {
1532
      if (k>=32) val.h |= 1<<(k-32);@+else val.l |= 1<
1533
      ff.dat[36]=0;
1534
      if (ff.b==36) break; /* break if |ff| now zero */
1535
    }
1536
    bignum_double(&ff);
1537
  }
1538
}@+else {
1539
  tt.a=tt.b=36, tt.dat[36]=2;
1540
  for (exp=0x3fe;bignum_compare(&ff,&tt)>=0;exp++) bignum_double(&tt);
1541
  for (k=54;k;k--) {
1542
    bignum_double(&ff);
1543
    if (bignum_compare(&ff,&tt)>=0) {
1544
      if (k>=32) val.h |= 1<<(k-32);@+else val.l |= 1<
1545
      bignum_dec(&ff,&tt,1000000000);
1546
      if (ff.a==bignum_prec-1) break; /* break if |ff| now zero */
1547
    }
1548
  }
1549
}
1550
if (k==0) val.l |= 1; /* add sticky bit if |ff| nonzero */
1551
 
1552
@ We need to be careful that the input `\.{NaN.999999999999999999999}' doesn't
1553
get rounded up; it is supposed to yield \Hex{7fffffffffffffff}.
1554
 
1555
Although the input `\.{NaN.0}' is illegal, strictly speaking, we silently
1556
convert it to \Hex{7ff0000000000001}---a number that would be
1557
output as `\.{NaN.0000000000000002}'.
1558
 
1559
@=
1560
val=fpack(val,exp,sign,ROUND_NEAR);
1561
if (NaN) {
1562
  if ((val.h&0x7fffffff)==0x40000000) val.h |= 0x7fffffff, val.l=0xffffffff;
1563
  else if ((val.h&0x7fffffff)==0x3ff00000 && !val.l) val.h|=0x40000000,val.l=1;
1564
  else val.h |= 0x40000000;
1565
}
1566
 
1567
@*Floating point remainders. In this section we implement the remainder
1568
of the floating point operations---one of which happens to be the
1569
operation of taking the remainder.
1570
 
1571
The easiest task remaining is to compare two floating point quantities.
1572
Routine |fcomp| returns $-1$~if~$yz$, and
1573
$+2$~if $y$ and~$z$ are unordered.
1574
 
1575
@=
1576
int fcomp @,@,@[ARGS((octa,octa))@];@+@t}\6{@>
1577
int fcomp(y,z)
1578
  octa y,z;
1579
{
1580
  ftype yt,zt;
1581
  int ye,ze;
1582
  char ys,zs;
1583
  octa yf,zf;
1584
  register int x;
1585
  yt=funpack(y,&yf,&ye,&ys);
1586
  zt=funpack(z,&zf,&ze,&zs);
1587
  switch (4*yt+zt) {
1588
 case 4*nan+nan: case 4*zro+nan: case 4*num+nan: case 4*inf+nan:
1589
 case 4*nan+zro: case 4*nan+num: case 4*nan+inf: return 2;
1590
 case 4*zro+zro: return 0;
1591
 case 4*zro+num: case 4*num+zro: case 4*zro+inf: case 4*inf+zro:
1592
 case 4*num+num: case 4*num+inf: case 4*inf+num: case 4*inf+inf:
1593
  if (ys!=zs) x=1;
1594
  else if (y.h>z.h) x=1;
1595
  else if (y.h
1596
  else if (y.l>z.l) x=1;
1597
  else if (y.l
1598
  else return 0;
1599
  break;
1600
  }
1601
  return (ys=='-'? -x: x);
1602
}
1603
 
1604
@ Several \MMIX\ operations act on a single floating point number and
1605
accept an arbitrary rounding mode. For example, consider the
1606
operation of rounding to the nearest floating point integer:
1607
 
1608
@=
1609
octa fintegerize @,@,@[ARGS((octa,int))@];@+@t}\6{@>
1610
octa fintegerize(z,r)
1611
  octa z; /* the operand */
1612
  int r; /* the rounding mode */
1613
{
1614
  ftype zt;
1615
  int ze;
1616
  char zs;
1617
  octa xf,zf;
1618
  zt=funpack(z,&zf,&ze,&zs);
1619
  if (!r) r=cur_round;
1620
  switch (zt) {
1621
 case nan:@+if (!(z.h&0x80000)) {@+exceptions|=I_BIT;@+z.h|=0x80000;@+}
1622
 case inf: case zro: return z;
1623
 case num: @;
1624
  }
1625
}
1626
 
1627
@ @=
1628
if (ze>=1074) return fpack(zf,ze,zs,ROUND_OFF); /* already an integer */
1629
if (ze<=1020) xf.h=0,xf.l=1;
1630
else {@+octa oo;
1631
  xf=shift_right(zf,1074-ze,1);
1632
  oo=shift_left(xf,1074-ze);
1633
  if (oo.l!=zf.l || oo.h!=zf.h) xf.l|=1; /* sticky bit */
1634
@^sticky bit@>
1635
}
1636
switch (r) {
1637
 case ROUND_DOWN:@+ if (zs=='-') xf=incr(xf,3);@+break;
1638
 case ROUND_UP:@+ if (zs!='-') xf=incr(xf,3);
1639
 case ROUND_OFF: break;
1640
 case ROUND_NEAR: xf=incr(xf, xf.l&4? 2: 1);@+break;
1641
}
1642
xf.l&=0xfffffffc;
1643
if (ze>=1022) return fpack(shift_left(xf,1074-ze),ze,zs,ROUND_OFF);
1644
if (xf.l) xf.h=0x3ff00000, xf.l=0;
1645
if (zs=='-') xf.h|=sign_bit;
1646
return xf;
1647
 
1648
@ To convert floating point to fixed point, we use |fixit|.
1649
 
1650
@=
1651
octa fixit @,@,@[ARGS((octa,int))@];@+@t}\6{@>
1652
octa fixit(z,r)
1653
  octa z; /* the operand */
1654
  int r; /* the rounding mode */
1655
{
1656
  ftype zt;
1657
  int ze;
1658
  char zs;
1659
  octa zf,o;
1660
  zt=funpack(z,&zf,&ze,&zs);
1661
  if (!r) r=cur_round;
1662
  switch (zt) {
1663
 case nan: case inf: exceptions|=I_BIT;@+return z;
1664
 case zro: return zero_octa;
1665
 case num:@+if (funpack(fintegerize(z,r),&zf,&ze,&zs)==zro) return zero_octa;
1666
   if (ze<=1076) o=shift_right(zf,1076-ze,1);
1667
   else {
1668
     if (ze>1085 || (ze==1085 && (zf.h>0x400000 || @|
1669
           (zf.h==0x400000 && (zf.l || zs!='-'))))) exceptions|=W_BIT;
1670
     if (ze>=1140) return zero_octa;
1671
     o=shift_left(zf,ze-1076);
1672
    }
1673
  return (zs=='-'? ominus(zero_octa,o): o);
1674
  }
1675
}
1676
 
1677
@ Going the other way, we can specify not only a rounding mode but whether
1678
the given fixed point octabyte is signed or unsigned, and whether the
1679
result should be rounded to short precision.
1680
 
1681
@=
1682
octa floatit @,@,@[ARGS((octa,int,int,int))@];@+@t}\6{@>
1683
octa floatit(z,r,u,p)
1684
  octa z; /* octabyte to float */
1685
  int r; /* rounding mode */
1686
  int u; /* unsigned? */
1687
  int p; /* short precision? */
1688
{
1689
  int e;@+char s;
1690
  register int t;
1691
  exceptions=0;
1692
  if (!z.h && !z.l) return zero_octa;
1693
  if (!r) r=cur_round;
1694
  if (!u && (z.h&sign_bit)) s='-', z=ominus(zero_octa,z);@+ else s='+';
1695
  e=1076;
1696
  while (z.h<0x400000) e--,z=shift_left(z,1);
1697
  while (z.h>=0x800000) {
1698
    e++;
1699
    t=z.l&1;
1700
    z=shift_right(z,1,1);
1701
    z.l|=t;
1702
  }
1703
  if (p) @;
1704
  return fpack(z,e,s,r);
1705
}
1706
 
1707
@ @=
1708
{
1709
  register int ex;@+register tetra t;
1710
  t=sfpack(z,e,s,r);
1711
  ex=exceptions;
1712
  sfunpack(t,&z,&e,&s);
1713
  exceptions=ex;
1714
}
1715
 
1716
@ The square root operation is more interesting.
1717
 
1718
@=
1719
octa froot @,@,@[ARGS((octa,int))@];@+@t}\6{@>
1720
octa froot(z,r)
1721
  octa z; /* the operand */
1722
  int r; /* the rounding mode */
1723
{
1724
  ftype zt;
1725
  int ze;
1726
  char zs;
1727
  octa x,xf,rf,zf;
1728
  register int xe,k;
1729
  if (!r) r=cur_round;
1730
  zt=funpack(z,&zf,&ze,&zs);
1731
  if (zs=='-' && zt!=zro) exceptions|=I_BIT, x=standard_NaN;
1732
  else@+switch (zt) {
1733
 case nan:@+ if (!(z.h&0x80000)) exceptions|=I_BIT, z.h|=0x80000;
1734
  return z;
1735
 case inf: case zro: x=z;@+break;
1736
 case num: @;
1737
  }
1738
  if (zs=='-') x.h|=sign_bit;
1739
  return x;
1740
}
1741
 
1742
@ The square root can be found by an adaptation of the old pencil-and-paper
1743
method. If $n=\lfloor\sqrt s\rfloor$, where $s$ is an integer,
1744
we have $s=n^2+r$ where $0\le r\le2n$;
1745
this invariant can be maintained if we replace $s$ by $4s+(0,1,2,3)$
1746
and $n$ by $2n+(0,1)$. The following code implements this idea with
1747
$2n$ in~|xf| and $r$ in~|rf|. (It could easily be made to run about
1748
twice as fast.)
1749
 
1750
@=
1751
xf.h=0, xf.l=2;
1752
xe=(ze+0x3fe)>>1;
1753
if (ze&1) zf=shift_left(zf,1);
1754
rf.h=0, rf.l=(zf.h>>22)-1;
1755
for (k=53;k;k--) {
1756
  rf=shift_left(rf,2);@+ xf=shift_left(xf,1);
1757
  if (k>=43) rf=incr(rf,(zf.h>>(2*(k-43)))&3);
1758
  else if (k>=27) rf=incr(rf,(zf.l>>(2*(k-27)))&3);
1759
  if ((rf.l>xf.l && rf.h>=xf.h) || rf.h>xf.h) {
1760
    xf.l++;@+rf=ominus(rf,xf);@+xf.l++;
1761
  }
1762
}
1763
if (rf.h || rf.l) xf.l++; /* sticky bit */
1764
return fpack(xf,xe,'+',r);
1765
 
1766
@ And finally, the genuine floating point remainder. Subroutine |fremstep|
1767
either calculates $y\,{\rm rem}\,z$ or reduces $y$ to a smaller number
1768
having the same remainder with respect to~$z$. In the latter case
1769
the |E_BIT| is set in |exceptions|. A third parameter, |delta|,
1770
gives a decrease in exponent that is acceptable for incomplete results;
1771
if |delta| is sufficiently large, say 2500, the correct result will
1772
always be obtained in one step of |fremstep|.
1773
 
1774
@=
1775
octa fremstep @,@,@[ARGS((octa,octa,int))@];@+@t}\6{@>
1776
octa fremstep(y,z,delta)
1777
  octa y,z;
1778
  int delta;
1779
{
1780
  ftype yt,zt;
1781
  int ye,ze;
1782
  char xs,ys,zs;
1783
  octa x,xf,yf,zf;
1784
  register int xe,thresh,odd;
1785
  yt=funpack(y,&yf,&ye,&ys);
1786
  zt=funpack(z,&zf,&ze,&zs);
1787
  switch (4*yt+zt) {
1788
 @t\4@>@;
1789
 case 4*zro+zro: case 4*num+zro: case 4*inf+zro:
1790
 case 4*inf+num: case 4*inf+inf: x=standard_NaN;
1791
  exceptions|=I_BIT;@+break;
1792
 case 4*zro+num: case 4*zro+inf: case 4*num+inf: return y;
1793
 case 4*num+num: @;
1794
 zero_out: x=zero_octa;
1795
  }
1796
  if (ys=='-') x.h|=sign_bit;
1797
  return x;
1798
}
1799
 
1800
@ If there's a huge difference in exponents and the remainder is nonzero,
1801
this computation will take a long time. One could compute
1802
$(2^ny)\,{\rm rem}\,z$ much more quickly for large~$n$ by using $O(\log n)$
1803
multiplications modulo~$z$, but the floating remainder operation isn't
1804
important enough to justify such expensive hardware.
1805
 
1806
Results of floating remainder are always exact, so the rounding mode
1807
is immaterial.
1808
 
1809
@=
1810
odd=0; /* will be 1 if we've subtracted an odd multiple of~$z$ from $y$ */
1811
thresh=ye-delta;
1812
if (thresh
1813
while (ye>=thresh) @
1814
   |goto zero_out| if the remainder is zero,
1815
   |goto try_complement| if appropriate@>;
1816
if (ye>=ze) {
1817
  exceptions|=E_BIT;@+return fpack(yf,ye,ys,ROUND_OFF);
1818
}
1819
if (ye
1820
yf=shift_right(yf,1,1);
1821
try_complement: xf=ominus(zf,yf), xe=ze, xs='+' + '-' - ys;
1822
if (xf.h>yf.h || (xf.h==yf.h && (xf.l>yf.l || (xf.l==yf.l && !odd))))
1823
  xf=yf, xs=ys;
1824
while (xf.h<0x400000) xe--, xf=shift_left(xf,1);
1825
return fpack(xf,xe,xs,ROUND_OFF);
1826
 
1827
@ Here we are careful not to change the sign of |y|, because a remainder
1828
of~0 is supposed to inherit the original sign of~|y|.
1829
 
1830
@=
1831
{
1832
  if (yf.h==zf.h && yf.l==zf.l) goto zero_out;
1833
  if (yf.h
1834
    if (ye==ze) goto try_complement;
1835
    ye--, yf=shift_left(yf,1);
1836
  }
1837
  yf=ominus(yf,zf);
1838
  if (ye==ze) odd=1;
1839
  while (yf.h<0x400000) ye--, yf=shift_left(yf,1);
1840
}
1841
 
1842
@* Index.
1843
 

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.