1 |
15 |
hellwig |
% This file is part of the MMIXware package (c) Donald E Knuth 1999
|
2 |
|
|
@i boilerplate.w %<< legal stuff: PLEASE READ IT BEFORE MAKING ANY CHANGES!
|
3 |
|
|
|
4 |
|
|
\def\title{MMIX-ARITH}
|
5 |
|
|
|
6 |
|
|
\def\MMIX{\.{MMIX}}
|
7 |
|
|
\def\MMIXAL{\.{MMIXAL}}
|
8 |
|
|
\def\Hex#1{\hbox{$^{\scriptscriptstyle\#}$\tt#1}} % experimental hex constant
|
9 |
|
|
\def\dts{\mathinner{\ldotp\ldotp}}
|
10 |
|
|
\def\<#1>{\hbox{$\langle\,$#1$\,\rangle$}}\let\is=\longrightarrow
|
11 |
|
|
\def\ff{\\{ff\kern-.05em}}
|
12 |
|
|
@s ff TeX
|
13 |
|
|
@s bool normal @q unreserve a C++ keyword @>
|
14 |
|
|
@s xor normal @q unreserve a C++ keyword @>
|
15 |
|
|
|
16 |
|
|
@* Introduction. The subroutines below are used to simulate 64-bit \MMIX\
|
17 |
|
|
arithmetic on an old-fashioned 32-bit computer---like the one the author
|
18 |
|
|
had when he wrote \MMIXAL\ and the first \MMIX\ simulators in 1998 and 1999.
|
19 |
|
|
All operations are fabricated from 32-bit arithmetic, including
|
20 |
|
|
a full implementation of the IEEE floating point standard,
|
21 |
|
|
assuming only that the \CEE/ compiler has a 32-bit unsigned integer type.
|
22 |
|
|
|
23 |
|
|
Some day 64-bit machines will be commonplace and the awkward manipulations of
|
24 |
|
|
the present program will look quite archaic. Interested readers who have such
|
25 |
|
|
computers will be able to convert the code to a pure 64-bit form without
|
26 |
|
|
difficulty, thereby obtaining much faster and simpler routines. Meanwhile,
|
27 |
|
|
however, we can simulate the future and hope for continued progress.
|
28 |
|
|
|
29 |
|
|
This program module has a simple structure, intended to make it
|
30 |
|
|
suitable for loading with \MMIX\ simulators and assemblers.
|
31 |
|
|
|
32 |
|
|
@c
|
33 |
|
|
#include
|
34 |
|
|
#include
|
35 |
|
|
#include
|
36 |
|
|
@@;
|
37 |
|
|
typedef enum{@+false,true@+} bool;
|
38 |
|
|
@@;
|
39 |
|
|
@@;
|
40 |
|
|
@@;
|
41 |
|
|
@
|
42 |
|
|
|
43 |
|
|
@ Subroutines of this program are declared first with a prototype,
|
44 |
|
|
as in {\mc ANSI C}, then with an old-style \CEE/ function definition.
|
45 |
|
|
Here are some preprocessor commands that make this work correctly with both
|
46 |
|
|
new-style and old-style compilers.
|
47 |
|
|
@^prototypes for functions@>
|
48 |
|
|
|
49 |
|
|
@=
|
50 |
|
|
#ifdef __STDC__
|
51 |
|
|
#define ARGS(list) list
|
52 |
|
|
#else
|
53 |
|
|
#define ARGS(list) ()
|
54 |
|
|
#endif
|
55 |
|
|
|
56 |
|
|
@ The definition of type \&{tetra} should be changed, if necessary, so that
|
57 |
|
|
it represents an unsigned 32-bit integer.
|
58 |
|
|
@^system dependencies@>
|
59 |
|
|
|
60 |
|
|
@=
|
61 |
|
|
typedef unsigned int tetra;
|
62 |
|
|
/* for systems conforming to the LP-64 data model */
|
63 |
|
|
typedef struct { tetra h,l;} octa; /* two tetrabytes make one octabyte */
|
64 |
|
|
|
65 |
|
|
@ @d sign_bit ((unsigned)0x80000000)
|
66 |
|
|
|
67 |
|
|
@=
|
68 |
|
|
octa zero_octa; /* |zero_octa.h=zero_octa.l=0| */
|
69 |
|
|
octa neg_one={-1,-1}; /* |neg_one.h=neg_one.l=-1| */
|
70 |
|
|
octa inf_octa={0x7ff00000,0}; /* floating point $+\infty$ */
|
71 |
|
|
octa standard_NaN={0x7ff80000,0}; /* floating point NaN(.5) */
|
72 |
|
|
octa aux; /* auxiliary output of a subroutine */
|
73 |
|
|
bool overflow; /* set by certain subroutines for signed arithmetic */
|
74 |
|
|
|
75 |
|
|
@ It's easy to add and subtract octabytes, if we aren't terribly
|
76 |
|
|
worried about speed.
|
77 |
|
|
|
78 |
|
|
@=
|
79 |
|
|
octa oplus @,@,@[ARGS((octa,octa))@];@+@t}\6{@>
|
80 |
|
|
octa oplus(y,z) /* compute $y+z$ */
|
81 |
|
|
octa y,z;
|
82 |
|
|
{@+ octa x;
|
83 |
|
|
x.h=y.h+z.h;@+
|
84 |
|
|
x.l=y.l+z.l;
|
85 |
|
|
if (x.l
|
86 |
|
|
return x;
|
87 |
|
|
}
|
88 |
|
|
@#
|
89 |
|
|
octa ominus @,@,@[ARGS((octa,octa))@];@+@t}\6{@>
|
90 |
|
|
octa ominus(y,z) /* compute $y-z$ */
|
91 |
|
|
octa y,z;
|
92 |
|
|
{@+ octa x;
|
93 |
|
|
x.h=y.h-z.h;@+
|
94 |
|
|
x.l=y.l-z.l;
|
95 |
|
|
if (x.l>y.l) x.h--;
|
96 |
|
|
return x;
|
97 |
|
|
}
|
98 |
|
|
|
99 |
|
|
@ In the following subroutine, |delta| is a signed quantity that is
|
100 |
|
|
assumed to fit in a signed tetrabyte.
|
101 |
|
|
|
102 |
|
|
@=
|
103 |
|
|
octa incr @,@,@[ARGS((octa,int))@];@+@t}\6{@>
|
104 |
|
|
octa incr(y,delta) /* compute $y+\delta$ */
|
105 |
|
|
octa y;
|
106 |
|
|
int delta;
|
107 |
|
|
{@+ octa x;
|
108 |
|
|
x.h=y.h;@+ x.l=y.l+delta;
|
109 |
|
|
if (delta>=0) {
|
110 |
|
|
if (x.l
|
111 |
|
|
}@+else if (x.l>y.l) x.h--;
|
112 |
|
|
return x;
|
113 |
|
|
}
|
114 |
|
|
|
115 |
|
|
@ Left and right shifts are only a bit more difficult.
|
116 |
|
|
|
117 |
|
|
@=
|
118 |
|
|
octa shift_left @,@,@[ARGS((octa,int))@];@+@t}\6{@>
|
119 |
|
|
octa shift_left(y,s) /* shift left by $s$ bits, where $0\le s\le64$ */
|
120 |
|
|
octa y;
|
121 |
|
|
int s;
|
122 |
|
|
{
|
123 |
|
|
while (s>=32) y.h=y.l,y.l=0,s-=32;
|
124 |
|
|
if (s) {@+register tetra yhl=y.h<>(32-s);
|
125 |
|
|
y.h=yhl+ylh;@+ y.l<<=s;
|
126 |
|
|
}
|
127 |
|
|
return y;
|
128 |
|
|
}
|
129 |
|
|
@#
|
130 |
|
|
octa shift_right @,@,@[ARGS((octa,int,int))@];@+@t}\6{@>
|
131 |
|
|
octa shift_right(y,s,u) /* shift right, arithmetically if $u=0$ */
|
132 |
|
|
octa y;
|
133 |
|
|
int s,u;
|
134 |
|
|
{
|
135 |
|
|
while (s>=32) y.l=y.h, y.h=(u?0: -(y.h>>31)), s-=32;
|
136 |
|
|
if (s) {@+register tetra yhl=y.h<<(32-s),ylh=y.l>>s;
|
137 |
|
|
y.h=(u? 0:(-(y.h>>31))<<(32-s))+(y.h>>s);@+ y.l=yhl+ylh;
|
138 |
|
|
}
|
139 |
|
|
return y;
|
140 |
|
|
}
|
141 |
|
|
|
142 |
|
|
@* Multiplication. We need to multiply two unsigned 64-bit integers, obtaining
|
143 |
|
|
an unsigned 128-bit product. It is easy to do this on a 32-bit machine
|
144 |
|
|
by using Algorithm 4.3.1M of {\sl Seminumerical Algorithms}, with $b=2^{16}$.
|
145 |
|
|
@^multiprecision multiplication@>
|
146 |
|
|
|
147 |
|
|
The following subroutine returns the lower half of the product, and
|
148 |
|
|
puts the upper half into a global octabyte called |aux|.
|
149 |
|
|
|
150 |
|
|
@=
|
151 |
|
|
octa omult @,@,@[ARGS((octa,octa))@];@+@t}\6{@>
|
152 |
|
|
octa omult(y,z)
|
153 |
|
|
octa y,z;
|
154 |
|
|
{
|
155 |
|
|
register int i,j,k;
|
156 |
|
|
tetra u[4],v[4],w[8];
|
157 |
|
|
register tetra t;
|
158 |
|
|
octa acc;
|
159 |
|
|
@;
|
160 |
|
|
for (j=0;j<4;j++) w[j]=0;
|
161 |
|
|
for (j=0;j<4;j++)
|
162 |
|
|
if (!v[j]) w[j+4]=0;
|
163 |
|
|
else {
|
164 |
|
|
for (i=k=0;i<4;i++) {
|
165 |
|
|
t=u[i]*v[j]+w[i+j]+k;
|
166 |
|
|
w[i+j]=t&0xffff, k=t>>16;
|
167 |
|
|
}
|
168 |
|
|
w[j+4]=k;
|
169 |
|
|
}
|
170 |
|
|
@;
|
171 |
|
|
return acc;
|
172 |
|
|
}
|
173 |
|
|
|
174 |
|
|
@ @=
|
175 |
|
|
extern octa aux; /* secondary output of subroutines with multiple outputs */
|
176 |
|
|
extern bool overflow;
|
177 |
|
|
|
178 |
|
|
@ @=
|
179 |
|
|
u[3]=y.h>>16, u[2]=y.h&0xffff, u[1]= y.l>>16, u[0]=y.l&0xffff;
|
180 |
|
|
v[3]=z.h>>16, v[2]=z.h&0xffff, v[1]= z.l>>16, v[0]=z.l&0xffff;
|
181 |
|
|
|
182 |
|
|
@ @=
|
183 |
|
|
aux.h=(w[7]<<16)+w[6], aux.l=(w[5]<<16)+w[4];
|
184 |
|
|
acc.h=(w[3]<<16)+w[2], acc.l=(w[1]<<16)+w[0];
|
185 |
|
|
|
186 |
|
|
@ Signed multiplication has the same lower half product as unsigned
|
187 |
|
|
multiplication. The signed upper half product is obtained with at most two
|
188 |
|
|
further subtractions, after which the result has overflowed if and only if
|
189 |
|
|
the upper half is unequal to 64 copies of the sign bit in the lower half.
|
190 |
|
|
|
191 |
|
|
@=
|
192 |
|
|
octa signed_omult @,@,@[ARGS((octa,octa))@];@+@t}\6{@>
|
193 |
|
|
octa signed_omult(y,z)
|
194 |
|
|
octa y,z;
|
195 |
|
|
{
|
196 |
|
|
octa acc;
|
197 |
|
|
acc=omult(y,z);
|
198 |
|
|
if (y.h&sign_bit) aux=ominus(aux,z);
|
199 |
|
|
if (z.h&sign_bit) aux=ominus(aux,y);
|
200 |
|
|
overflow=(aux.h!=aux.l || (aux.h^(aux.h>>1)^(acc.h&sign_bit)));
|
201 |
|
|
return acc;
|
202 |
|
|
}
|
203 |
|
|
|
204 |
|
|
@* Division. Long division of an unsigned 128-bit integer by an unsigned
|
205 |
|
|
64-bit integer is, of course, one of the most challenging routines
|
206 |
|
|
needed for \MMIX\ arithmetic. The following program, based on
|
207 |
|
|
Algorithm 4.3.1D of {\sl Seminumerical Algorithms}, computes
|
208 |
|
|
octabytes $q$ and $r$ such that $(2^{64}x+y)=qz+r$ and $0\le r
|
209 |
|
|
given octabytes $x$, $y$, and~$z$, assuming that $x
|
210 |
|
|
(If $x\ge z$, it simply sets $q=x$ and $r=y$.)
|
211 |
|
|
The quotient~$q$ is returned by the subroutine;
|
212 |
|
|
the remainder~$r$ is stored in |aux|.
|
213 |
|
|
@^multiprecision division@>
|
214 |
|
|
|
215 |
|
|
@=
|
216 |
|
|
octa odiv @,@,@[ARGS((octa,octa,octa))@];@+@t}\6{@>
|
217 |
|
|
octa odiv(x,y,z)
|
218 |
|
|
octa x,y,z;
|
219 |
|
|
{
|
220 |
|
|
register int i,j,k,n,d;
|
221 |
|
|
tetra u[8],v[4],q[4],mask,qhat,rhat,vh,vmh;
|
222 |
|
|
register tetra t;
|
223 |
|
|
octa acc;
|
224 |
|
|
@;
|
225 |
|
|
@;
|
226 |
|
|
@;
|
227 |
|
|
@;
|
228 |
|
|
for (j=3;j>=0;j--) @;
|
229 |
|
|
@;
|
230 |
|
|
@;
|
231 |
|
|
return acc;
|
232 |
|
|
}
|
233 |
|
|
|
234 |
|
|
@ @=
|
235 |
|
|
if (x.h>z.h || (x.h==z.h && x.l>=z.l)) {
|
236 |
|
|
aux=y;@+ return x;
|
237 |
|
|
}
|
238 |
|
|
|
239 |
|
|
@ @=
|
240 |
|
|
u[7]=x.h>>16, u[6]=x.h&0xffff, u[5]=x.l>>16, u[4]=x.l&0xffff;
|
241 |
|
|
u[3]=y.h>>16, u[2]=y.h&0xffff, u[1]=y.l>>16, u[0]=y.l&0xffff;
|
242 |
|
|
v[3]=z.h>>16, v[2]=z.h&0xffff, v[1]=z.l>>16, v[0]=z.l&0xffff;
|
243 |
|
|
|
244 |
|
|
@ @=
|
245 |
|
|
for (n=4;v[n-1]==0;n--);
|
246 |
|
|
|
247 |
|
|
@ We shift |u| and |v| left by |d| places, where |d| is chosen to
|
248 |
|
|
make $2^{15}\le v_{n-1}<2^{16}$.
|
249 |
|
|
|
250 |
|
|
@=
|
251 |
|
|
vh=v[n-1];
|
252 |
|
|
for (d=0;vh<0x8000;d++,vh<<=1);
|
253 |
|
|
for (j=k=0; j
|
254 |
|
|
t=(u[j]<
|
255 |
|
|
u[j]=t&0xffff, k=t>>16;
|
256 |
|
|
}
|
257 |
|
|
for (j=k=0; j
|
258 |
|
|
t=(v[j]<
|
259 |
|
|
v[j]=t&0xffff, k=t>>16;
|
260 |
|
|
}
|
261 |
|
|
vh=v[n-1];
|
262 |
|
|
vmh=(n>1? v[n-2]: 0);
|
263 |
|
|
|
264 |
|
|
@ @=
|
265 |
|
|
mask=(1<
|
266 |
|
|
for (j=3; j>=n; j--) u[j]=0;
|
267 |
|
|
for (k=0;j>=0;j--) {
|
268 |
|
|
t=(k<<16)+u[j];
|
269 |
|
|
u[j]=t>>d, k=t&mask;
|
270 |
|
|
}
|
271 |
|
|
|
272 |
|
|
@ @=
|
273 |
|
|
acc.h=(q[3]<<16)+q[2], acc.l=(q[1]<<16)+q[0];
|
274 |
|
|
aux.h=(u[3]<<16)+u[2], aux.l=(u[1]<<16)+u[0];
|
275 |
|
|
|
276 |
|
|
@ @=
|
277 |
|
|
{
|
278 |
|
|
@;
|
279 |
|
|
@;
|
280 |
|
|
@;
|
281 |
|
|
q[j]=qhat;
|
282 |
|
|
}
|
283 |
|
|
|
284 |
|
|
@ @=
|
285 |
|
|
t=(u[j+n]<<16)+u[j+n-1];
|
286 |
|
|
qhat=t/vh, rhat=t-vh*qhat;
|
287 |
|
|
if (n>1) while (qhat==0x10000 || qhat*vmh>(rhat<<16)+u[j+n-2]) {
|
288 |
|
|
qhat--, rhat+=vh;
|
289 |
|
|
if (rhat>=0x10000) break;
|
290 |
|
|
}
|
291 |
|
|
|
292 |
|
|
@ After this step, |u[j+n]| will either equal |k| or |k-1|. The
|
293 |
|
|
true value of~|u| would be obtained by subtracting~|k| from |u[j+n]|;
|
294 |
|
|
but we don't have to fuss over |u[j+n]|, because it won't be examined later.
|
295 |
|
|
|
296 |
|
|
@=
|
297 |
|
|
for (i=k=0; i
|
298 |
|
|
t=u[i+j]+0xffff0000-k-qhat*v[i];
|
299 |
|
|
u[i+j]=t&0xffff, k=0xffff-(t>>16);
|
300 |
|
|
}
|
301 |
|
|
|
302 |
|
|
@ The correction here occurs only rarely, but it can be necessary---for
|
303 |
|
|
example, when dividing the number \Hex{7fff800100000000} by \Hex{800080020005}.
|
304 |
|
|
|
305 |
|
|
@=
|
306 |
|
|
if (u[j+n]!=k) {
|
307 |
|
|
qhat--;
|
308 |
|
|
for (i=k=0; i
|
309 |
|
|
t=u[i+j]+v[i]+k;
|
310 |
|
|
u[i+j]=t&0xffff, k=t>>16;
|
311 |
|
|
}
|
312 |
|
|
}
|
313 |
|
|
|
314 |
|
|
@ Signed division can be reduced to unsigned division in a tedious
|
315 |
|
|
but straightforward manner. We assume that the divisor isn't zero.
|
316 |
|
|
|
317 |
|
|
@=
|
318 |
|
|
octa signed_odiv @,@,@[ARGS((octa,octa))@];@+@t}\6{@>
|
319 |
|
|
octa signed_odiv(y,z)
|
320 |
|
|
octa y,z;
|
321 |
|
|
{
|
322 |
|
|
octa yy,zz,q;
|
323 |
|
|
register int sy,sz;
|
324 |
|
|
if (y.h&sign_bit) sy=2, yy=ominus(zero_octa,y);
|
325 |
|
|
else sy=0, yy=y;
|
326 |
|
|
if (z.h&sign_bit) sz=1, zz=ominus(zero_octa,z);
|
327 |
|
|
else sz=0, zz=z;
|
328 |
|
|
q=odiv(zero_octa,yy,zz);
|
329 |
|
|
overflow=false;
|
330 |
|
|
switch (sy+sz) {
|
331 |
|
|
case 2+1: aux=ominus(zero_octa,aux);
|
332 |
|
|
if (q.h==sign_bit) overflow=true;
|
333 |
|
|
case 0+0: return q;
|
334 |
|
|
case 2+0:@+ if (aux.h || aux.l) aux=ominus(zz,aux);
|
335 |
|
|
goto negate_q;
|
336 |
|
|
case 0+1:@+ if (aux.h || aux.l) aux=ominus(aux,zz);
|
337 |
|
|
negate_q:@+ if (aux.h || aux.l) return ominus(neg_one,q);
|
338 |
|
|
else return ominus(zero_octa,q);
|
339 |
|
|
}
|
340 |
|
|
}
|
341 |
|
|
|
342 |
|
|
@* Bit fiddling. The bitwise operators of \MMIX\ are fairly easy to
|
343 |
|
|
implement directly, but three of them occur often enough to deserve
|
344 |
|
|
packaging as subroutines.
|
345 |
|
|
|
346 |
|
|
@=
|
347 |
|
|
octa oand @,@,@[ARGS((octa,octa))@];@+@t}\6{@>
|
348 |
|
|
octa oand(y,z) /* compute $y\land z$ */
|
349 |
|
|
octa y,z;
|
350 |
|
|
{@+ octa x;
|
351 |
|
|
x.h=y.h&z.h;@+ x.l=y.l&z.l;
|
352 |
|
|
return x;
|
353 |
|
|
}
|
354 |
|
|
@#
|
355 |
|
|
octa oandn @,@,@[ARGS((octa,octa))@];@+@t}\6{@>
|
356 |
|
|
octa oandn(y,z) /* compute $y\land\bar z$ */
|
357 |
|
|
octa y,z;
|
358 |
|
|
{@+ octa x;
|
359 |
|
|
x.h=y.h&~z.h;@+ x.l=y.l&~z.l;
|
360 |
|
|
return x;
|
361 |
|
|
}
|
362 |
|
|
@#
|
363 |
|
|
octa oxor @,@,@[ARGS((octa,octa))@];@+@t}\6{@>
|
364 |
|
|
octa oxor(y,z) /* compute $y\oplus z$ */
|
365 |
|
|
octa y,z;
|
366 |
|
|
{@+ octa x;
|
367 |
|
|
x.h=y.h^z.h;@+ x.l=y.l^z.l;
|
368 |
|
|
return x;
|
369 |
|
|
}
|
370 |
|
|
|
371 |
|
|
@ Here's a fun way to count the number of bits in a tetrabyte.
|
372 |
|
|
[This classical trick is called the ``Gillies--Miller method
|
373 |
|
|
for sideways addition'' in {\sl The Preparation of Programs
|
374 |
|
|
for an Electronic Digital Computer\/} by Wilkes, Wheeler, and
|
375 |
|
|
Gill, second edition (Reading, Mass.:\ Addison--Wesley, 1957),
|
376 |
|
|
191--193. Some of the tricks used here were suggested by
|
377 |
|
|
Balbir Singh, Peter Rossmanith, and Stefan Schwoon.]
|
378 |
|
|
@^Gillies, Donald Bruce@>
|
379 |
|
|
@^Miller, Jeffrey Charles Percy@>
|
380 |
|
|
@^Wilkes, Maurice Vincent@>
|
381 |
|
|
@^Wheeler, David John@>
|
382 |
|
|
@^Gill, Stanley@>
|
383 |
|
|
@^Singh, Balbir@>
|
384 |
|
|
@^Rossmanith, Peter@>
|
385 |
|
|
@^Schwoon, Stefan@>
|
386 |
|
|
|
387 |
|
|
@=
|
388 |
|
|
int count_bits @,@,@[ARGS((tetra))@];@+@t}\6{@>
|
389 |
|
|
int count_bits(x)
|
390 |
|
|
tetra x;
|
391 |
|
|
{
|
392 |
|
|
register int xx=x;
|
393 |
|
|
xx=xx-((xx>>1)&0x55555555);
|
394 |
|
|
xx=(xx&0x33333333)+((xx>>2)&0x33333333);
|
395 |
|
|
xx=(xx+(xx>>4))&0x0f0f0f0f;
|
396 |
|
|
xx=xx+(xx>>8);
|
397 |
|
|
return (xx+(xx>>16)) & 0xff;
|
398 |
|
|
}
|
399 |
|
|
|
400 |
|
|
@ To compute the nonnegative byte differences of two given tetrabytes,
|
401 |
|
|
we can carry out the following 20-step branchless computation:
|
402 |
|
|
|
403 |
|
|
@=
|
404 |
|
|
tetra byte_diff @,@,@[ARGS((tetra,tetra))@];@+@t}\6{@>
|
405 |
|
|
tetra byte_diff(y,z)
|
406 |
|
|
tetra y,z;
|
407 |
|
|
{
|
408 |
|
|
register tetra d=(y&0x00ff00ff)+0x01000100-(z&0x00ff00ff);
|
409 |
|
|
register tetra m=d&0x01000100;
|
410 |
|
|
register tetra x=d&(m-(m>>8));
|
411 |
|
|
d=((y>>8)&0x00ff00ff)+0x01000100-((z>>8)&0x00ff00ff);
|
412 |
|
|
m=d&0x01000100;
|
413 |
|
|
return x+((d&(m-(m>>8)))<<8);
|
414 |
|
|
}
|
415 |
|
|
|
416 |
|
|
@ To compute the nonnegative wyde differences of two tetrabytes,
|
417 |
|
|
another trick leads to a 15-step branchless computation.
|
418 |
|
|
(Research problem: Can |count_bits|, |byte_diff|, or |wyde_diff| be done
|
419 |
|
|
with fewer operations?)
|
420 |
|
|
|
421 |
|
|
@=
|
422 |
|
|
tetra wyde_diff @,@,@[ARGS((tetra,tetra))@];@+@t}\6{@>
|
423 |
|
|
tetra wyde_diff(y,z)
|
424 |
|
|
tetra y,z;
|
425 |
|
|
{
|
426 |
|
|
register tetra a=((y>>16)-(z>>16))&0x10000;
|
427 |
|
|
register tetra b=((y&0xffff)-(z&0xffff))&0x10000;
|
428 |
|
|
return y-(z^((y^z)&(b-a-(b>>16))));
|
429 |
|
|
}
|
430 |
|
|
|
431 |
|
|
@ The last bitwise subroutine we need is the most interesting:
|
432 |
|
|
It implements \MMIX's \.{MOR} and \.{MXOR} operations.
|
433 |
|
|
|
434 |
|
|
@=
|
435 |
|
|
octa bool_mult @,@,@[ARGS((octa,octa,bool))@];@+@t}\6{@>
|
436 |
|
|
octa bool_mult(y,z,xor)
|
437 |
|
|
octa y,z; /* the operands */
|
438 |
|
|
bool xor; /* do we do xor instead of or? */
|
439 |
|
|
{
|
440 |
|
|
octa o,x;
|
441 |
|
|
register tetra a,b,c;
|
442 |
|
|
register int k;
|
443 |
|
|
for (k=0,o=y,x=zero_octa;o.h||o.l;k++,o=shift_right(o,8,1))
|
444 |
|
|
if (o.l&0xff) {
|
445 |
|
|
a=((z.h>>k)&0x01010101)*0xff;
|
446 |
|
|
b=((z.l>>k)&0x01010101)*0xff;
|
447 |
|
|
c=(o.l&0xff)*0x01010101;
|
448 |
|
|
if (xor) x.h^=a&c, x.l^=b&c;
|
449 |
|
|
else x.h|=a&c, x.l|=b&c;
|
450 |
|
|
}
|
451 |
|
|
return x;
|
452 |
|
|
}
|
453 |
|
|
|
454 |
|
|
@* Floating point packing and unpacking. Standard IEEE floating binary
|
455 |
|
|
numbers pack a sign, exponent, and fraction into a tetrabyte
|
456 |
|
|
or octabyte. In this section we consider basic subroutines that
|
457 |
|
|
convert between IEEE format and the separate unpacked components.
|
458 |
|
|
|
459 |
|
|
@d ROUND_OFF 1
|
460 |
|
|
@d ROUND_UP 2
|
461 |
|
|
@d ROUND_DOWN 3
|
462 |
|
|
@d ROUND_NEAR 4
|
463 |
|
|
|
464 |
|
|
@=
|
465 |
|
|
int cur_round; /* the current rounding mode */
|
466 |
|
|
|
467 |
|
|
@ The |fpack| routine takes an octabyte $f$, a raw exponent~$e$,
|
468 |
|
|
and a sign~|s|, and packs them
|
469 |
|
|
into the floating binary number that corresponds to
|
470 |
|
|
$\pm2^{e-1076}f$, using a given rounding mode.
|
471 |
|
|
The value of $f$ should satisfy $2^{54}\le f\le 2^{55}$.
|
472 |
|
|
|
473 |
|
|
Thus, for example, the floating binary number $+1.0=\Hex{3ff0000000000000}$
|
474 |
|
|
is obtained when $f=2^{54}$, $e=\Hex{3fe}$, and |s='+'|.
|
475 |
|
|
The raw exponent~$e$ is usually one less than
|
476 |
|
|
the final exponent value; the leading bit of~$f$ is essentially added
|
477 |
|
|
to the exponent. (This trick works nicely for subnormal numbers, when
|
478 |
|
|
$e<0$, or in cases where the value of $f$ is rounded upwards to $2^{55}$.)
|
479 |
|
|
|
480 |
|
|
Exceptional events are noted by oring appropriate bits into
|
481 |
|
|
the global variable |exceptions|. Special considerations apply to
|
482 |
|
|
underflow, which is not fully specified by Section 7.4 of the IEEE standard:
|
483 |
|
|
Implementations of the standard are free to choose between two definitions
|
484 |
|
|
of ``tininess'' and two definitions of ``accuracy loss.''
|
485 |
|
|
\MMIX\ determines tininess {\it after\/} rounding, hence a result with
|
486 |
|
|
$e<0$ is not necessarily tiny; \MMIX\ treats accuracy loss as equivalent
|
487 |
|
|
to inexactness. Thus, a result underflows if and only if
|
488 |
|
|
it is tiny and either (i)~it is inexact or (ii)~the underflow trap is enabled.
|
489 |
|
|
The |fpack| routine sets |U_BIT| in |exceptions| if and only if the result is
|
490 |
|
|
tiny, |X_BIT| if and only if the result is inexact.
|
491 |
|
|
@^underflow@>
|
492 |
|
|
|
493 |
|
|
@d X_BIT (1<<8) /* floating inexact */
|
494 |
|
|
@d Z_BIT (1<<9) /* floating division by zero */
|
495 |
|
|
@d U_BIT (1<<10) /* floating underflow */
|
496 |
|
|
@d O_BIT (1<<11) /* floating overflow */
|
497 |
|
|
@d I_BIT (1<<12) /* floating invalid operation */
|
498 |
|
|
@d W_BIT (1<<13) /* float-to-fix overflow */
|
499 |
|
|
@d V_BIT (1<<14) /* integer overflow */
|
500 |
|
|
@d D_BIT (1<<15) /* integer divide check */
|
501 |
|
|
@d E_BIT (1<<18) /* external (dynamic) trap bit */
|
502 |
|
|
|
503 |
|
|
@=
|
504 |
|
|
octa fpack @,@,@[ARGS((octa,int,char,int))@];@+@t}\6{@>
|
505 |
|
|
octa fpack(f,e,s,r)
|
506 |
|
|
octa f; /* the normalized fraction part */
|
507 |
|
|
int e; /* the raw exponent */
|
508 |
|
|
char s; /* the sign */
|
509 |
|
|
int r; /* the rounding mode */
|
510 |
|
|
{
|
511 |
|
|
octa o;
|
512 |
|
|
if (e>0x7fd) e=0x7ff, o=zero_octa;
|
513 |
|
|
else {
|
514 |
|
|
if (e<0) {
|
515 |
|
|
if (e<-54) o.h=0, o.l=1;
|
516 |
|
|
else {@+octa oo;
|
517 |
|
|
o=shift_right(f,-e,1);
|
518 |
|
|
oo=shift_left(o,-e);
|
519 |
|
|
if (oo.l!=f.l || oo.h!=f.h) o.l |= 1; /* sticky bit */
|
520 |
|
|
@^sticky bit@>
|
521 |
|
|
}
|
522 |
|
|
e=0;
|
523 |
|
|
}@+else o=f;
|
524 |
|
|
}
|
525 |
|
|
@;
|
526 |
|
|
}
|
527 |
|
|
|
528 |
|
|
@ @=
|
529 |
|
|
int exceptions; /* bits possibly destined for rA */
|
530 |
|
|
|
531 |
|
|
@ Everything falls together so nicely here, it's almost too good to be true!
|
532 |
|
|
|
533 |
|
|
@=
|
534 |
|
|
if (o.l&3) exceptions |= X_BIT;
|
535 |
|
|
switch (r) {
|
536 |
|
|
case ROUND_DOWN:@+ if (s=='-') o=incr(o,3);@+break;
|
537 |
|
|
case ROUND_UP:@+ if (s!='-') o=incr(o,3);
|
538 |
|
|
case ROUND_OFF: break;
|
539 |
|
|
case ROUND_NEAR: o=incr(o, o.l&4? 2: 1);@+break;
|
540 |
|
|
}
|
541 |
|
|
o = shift_right(o,2,1);
|
542 |
|
|
o.h += e<<20;
|
543 |
|
|
if (o.h>=0x7ff00000) exceptions |= O_BIT+X_BIT; /* overflow */
|
544 |
|
|
else if (o.h<0x100000) exceptions |= U_BIT; /* tininess */
|
545 |
|
|
if (s=='-') o.h |= sign_bit;
|
546 |
|
|
return o;
|
547 |
|
|
|
548 |
|
|
@ Similarly, |sfpack| packs a short float, from inputs
|
549 |
|
|
having the same conventions as |fpack|.
|
550 |
|
|
|
551 |
|
|
@=
|
552 |
|
|
tetra sfpack @,@,@[ARGS((octa,int,char,int))@];@+@t}\6{@>
|
553 |
|
|
tetra sfpack(f,e,s,r)
|
554 |
|
|
octa f; /* the fraction part */
|
555 |
|
|
int e; /* the raw exponent */
|
556 |
|
|
char s; /* the sign */
|
557 |
|
|
int r; /* the rounding mode */
|
558 |
|
|
{
|
559 |
|
|
register tetra o;
|
560 |
|
|
if (e>0x47d) e=0x47f, o=0;
|
561 |
|
|
else {
|
562 |
|
|
o=shift_left(f,3).h;
|
563 |
|
|
if (f.l&0x1fffffff) o|=1;
|
564 |
|
|
if (e<0x380) {
|
565 |
|
|
if (e<0x380-25) o=1;
|
566 |
|
|
else {@+register tetra o0,oo;
|
567 |
|
|
o0 = o;
|
568 |
|
|
o = o>>(0x380-e);
|
569 |
|
|
oo = o<<(0x380-e);
|
570 |
|
|
if (oo!=o0) o |= 1; /* sticky bit */
|
571 |
|
|
@^sticky bit@>
|
572 |
|
|
}
|
573 |
|
|
e=0x380;
|
574 |
|
|
}
|
575 |
|
|
}
|
576 |
|
|
@;
|
577 |
|
|
}
|
578 |
|
|
|
579 |
|
|
@ @=
|
580 |
|
|
if (o&3) exceptions |= X_BIT;
|
581 |
|
|
switch (r) {
|
582 |
|
|
case ROUND_DOWN:@+ if (s=='-') o+=3;@+break;
|
583 |
|
|
case ROUND_UP:@+ if (s!='-') o+=3;
|
584 |
|
|
case ROUND_OFF: break;
|
585 |
|
|
case ROUND_NEAR: o+=(o&4? 2: 1);@+break;
|
586 |
|
|
}
|
587 |
|
|
o = o>>2;
|
588 |
|
|
o += (e-0x380)<<23;
|
589 |
|
|
if (o>=0x7f800000) exceptions |= O_BIT+X_BIT; /* overflow */
|
590 |
|
|
else if (o<0x100000) exceptions |= U_BIT; /* tininess */
|
591 |
|
|
if (s=='-') o |= sign_bit;
|
592 |
|
|
return o;
|
593 |
|
|
|
594 |
|
|
@ The |funpack| routine is, roughly speaking, the opposite of |fpack|.
|
595 |
|
|
It takes a given floating point number~$x$ and separates out its
|
596 |
|
|
fraction part~$f$, exponent~$e$, and sign~$s$. It clears |exceptions|
|
597 |
|
|
to zero. It returns the type of value found: |zro|, |num|, |inf|,
|
598 |
|
|
or |nan|. When it returns |num|,
|
599 |
|
|
it will have set $f$, $e$, and~$s$
|
600 |
|
|
to the values from which |fpack| would produce the original number~$x$
|
601 |
|
|
without exceptions.
|
602 |
|
|
|
603 |
|
|
@d zero_exponent (-1000) /* zero is assumed to have this exponent */
|
604 |
|
|
|
605 |
|
|
@=
|
606 |
|
|
typedef enum {@!zro,@!num,@!inf,@!nan}@+ftype;
|
607 |
|
|
|
608 |
|
|
@ @=
|
609 |
|
|
ftype funpack @,@,@[ARGS((octa,octa*,int*,char*))@];@+@t}\6{@>
|
610 |
|
|
ftype funpack(x,f,e,s)
|
611 |
|
|
octa x; /* the given floating point value */
|
612 |
|
|
octa *f; /* address where the fraction part should be stored */
|
613 |
|
|
int *e; /* address where the exponent part should be stored */
|
614 |
|
|
char *s; /* address where the sign should be stored */
|
615 |
|
|
{
|
616 |
|
|
register int ee;
|
617 |
|
|
exceptions=0;
|
618 |
|
|
*s=(x.h&sign_bit? '-': '+');
|
619 |
|
|
*f=shift_left(x,2);
|
620 |
|
|
f->h &= 0x3fffff;
|
621 |
|
|
ee=(x.h>>20)&0x7ff;
|
622 |
|
|
if (ee) {
|
623 |
|
|
*e=ee-1;
|
624 |
|
|
f->h |= 0x400000;
|
625 |
|
|
return (ee<0x7ff? num: f->h==0x400000 && !f->l? inf: nan);
|
626 |
|
|
}
|
627 |
|
|
if (!x.l && !f->h) {
|
628 |
|
|
*e=zero_exponent;@+ return zro;
|
629 |
|
|
}
|
630 |
|
|
do {@+ ee--;@+ *f=shift_left(*f,1);@+} while (!(f->h&0x400000));
|
631 |
|
|
*e=ee;@+ return num;
|
632 |
|
|
}
|
633 |
|
|
|
634 |
|
|
@ @=
|
635 |
|
|
ftype sfunpack @,@,@[ARGS((tetra,octa*,int*,char*))@];@+@t}\6{@>
|
636 |
|
|
ftype sfunpack(x,f,e,s)
|
637 |
|
|
tetra x; /* the given floating point value */
|
638 |
|
|
octa *f; /* address where the fraction part should be stored */
|
639 |
|
|
int *e; /* address where the exponent part should be stored */
|
640 |
|
|
char *s; /* address where the sign should be stored */
|
641 |
|
|
{
|
642 |
|
|
register int ee;
|
643 |
|
|
exceptions=0;
|
644 |
|
|
*s=(x&sign_bit? '-': '+');
|
645 |
|
|
f->h=(x>>1)&0x3fffff, f->l=x<<31;
|
646 |
|
|
ee=(x>>23)&0xff;
|
647 |
|
|
if (ee) {
|
648 |
|
|
*e=ee+0x380-1;
|
649 |
|
|
f->h |= 0x400000;
|
650 |
|
|
return (ee<0xff? num: (x&0x7fffffff)==0x7f800000? inf: nan);
|
651 |
|
|
}
|
652 |
|
|
if (!(x&0x7fffffff)) {
|
653 |
|
|
*e=zero_exponent;@+return zro;
|
654 |
|
|
}
|
655 |
|
|
do {@+ ee--;@+ *f=shift_left(*f,1);@+} while (!(f->h&0x400000));
|
656 |
|
|
*e=ee+0x380;@+ return num;
|
657 |
|
|
}
|
658 |
|
|
|
659 |
|
|
@ Since \MMIX\ downplays 32-bit operations, it uses |sfpack| and |sfunpack|
|
660 |
|
|
only when loading and storing short floats, or when converting
|
661 |
|
|
from fixed point to floating point.
|
662 |
|
|
|
663 |
|
|
@=
|
664 |
|
|
octa load_sf @,@,@[ARGS((tetra))@];@+@t}\6{@>
|
665 |
|
|
octa load_sf(z)
|
666 |
|
|
tetra z; /* 32 bits to be loaded into a 64-bit register */
|
667 |
|
|
{
|
668 |
|
|
octa f,x;@+int e;@+char s;@+ftype t;
|
669 |
|
|
t=sfunpack(z,&f,&e,&s);
|
670 |
|
|
switch (t) {
|
671 |
|
|
case zro: x=zero_octa;@+break;
|
672 |
|
|
case num: return fpack(f,e,s,ROUND_OFF);
|
673 |
|
|
case inf: x=inf_octa;@+break;
|
674 |
|
|
case nan: x=shift_right(f,2,1);@+x.h|=0x7ff00000;@+break;
|
675 |
|
|
}
|
676 |
|
|
if (s=='-') x.h|=sign_bit;
|
677 |
|
|
return x;
|
678 |
|
|
}
|
679 |
|
|
|
680 |
|
|
@ @=
|
681 |
|
|
tetra store_sf @,@,@[ARGS((octa))@];@+@t}\6{@>
|
682 |
|
|
tetra store_sf(x)
|
683 |
|
|
octa x; /* 64 bits to be loaded into a 32-bit word */
|
684 |
|
|
{
|
685 |
|
|
octa f;@+tetra z;@+int e;@+char s;@+ftype t;
|
686 |
|
|
t=funpack(x,&f,&e,&s);
|
687 |
|
|
switch (t) {
|
688 |
|
|
case zro: z=0;@+break;
|
689 |
|
|
case num: return sfpack(f,e,s,cur_round);
|
690 |
|
|
case inf: z=0x7f800000;@+break;
|
691 |
|
|
case nan:@+ if (!(f.h&0x200000)) {
|
692 |
|
|
f.h|=0x200000;@+exceptions|=I_BIT; /* NaN was signaling */
|
693 |
|
|
}
|
694 |
|
|
z=0x7f800000|(f.h<<1)|(f.l>>31);@+break;
|
695 |
|
|
}
|
696 |
|
|
if (s=='-') z|=sign_bit;
|
697 |
|
|
return z;
|
698 |
|
|
}
|
699 |
|
|
|
700 |
|
|
@* Floating multiplication and division.
|
701 |
|
|
The hardest fixed point operations were multiplication and division;
|
702 |
|
|
but these two operations are the {\it easiest\/} to implement in floating point
|
703 |
|
|
arithmetic, once their fixed point counterparts are available.
|
704 |
|
|
|
705 |
|
|
@=
|
706 |
|
|
octa fmult @,@,@[ARGS((octa,octa))@];@+@t}\6{@>
|
707 |
|
|
octa fmult(y,z)
|
708 |
|
|
octa y,z;
|
709 |
|
|
{
|
710 |
|
|
ftype yt,zt;
|
711 |
|
|
int ye,ze;
|
712 |
|
|
char ys,zs;
|
713 |
|
|
octa x,xf,yf,zf;
|
714 |
|
|
register int xe;
|
715 |
|
|
register char xs;
|
716 |
|
|
yt=funpack(y,&yf,&ye,&ys);
|
717 |
|
|
zt=funpack(z,&zf,&ze,&zs);
|
718 |
|
|
xs=ys+zs-'+'; /* will be |'-'| when the result is negative */
|
719 |
|
|
switch (4*yt+zt) {
|
720 |
|
|
@t\4@>@;
|
721 |
|
|
case 4*zro+zro: case 4*zro+num: case 4*num+zro: x=zero_octa;@+break;
|
722 |
|
|
case 4*num+inf: case 4*inf+num: case 4*inf+inf: x=inf_octa;@+break;
|
723 |
|
|
case 4*zro+inf: case 4*inf+zro: x=standard_NaN;
|
724 |
|
|
exceptions|=I_BIT;@+break;
|
725 |
|
|
case 4*num+num: @;
|
726 |
|
|
}
|
727 |
|
|
if (xs=='-') x.h|=sign_bit;
|
728 |
|
|
return x;
|
729 |
|
|
}
|
730 |
|
|
|
731 |
|
|
@ @=
|
732 |
|
|
case 4*nan+nan:@+if (!(y.h&0x80000)) exceptions|=I_BIT; /* |y| is signaling */
|
733 |
|
|
case 4*zro+nan: case 4*num+nan: case 4*inf+nan:
|
734 |
|
|
if (!(z.h&0x80000)) exceptions|=I_BIT, z.h|=0x80000;
|
735 |
|
|
return z;
|
736 |
|
|
case 4*nan+zro: case 4*nan+num: case 4*nan+inf:
|
737 |
|
|
if (!(y.h&0x80000)) exceptions|=I_BIT, y.h|=0x80000;
|
738 |
|
|
return y;
|
739 |
|
|
|
740 |
|
|
@ @=
|
741 |
|
|
xe=ye+ze-0x3fd; /* the raw exponent */
|
742 |
|
|
x=omult(yf,shift_left(zf,9));
|
743 |
|
|
if (aux.h>=0x400000) xf=aux;
|
744 |
|
|
else xf=shift_left(aux,1), xe--;
|
745 |
|
|
if (x.h||x.l) xf.l|=1; /* adjust the sticky bit */
|
746 |
|
|
return fpack(xf,xe,xs,cur_round);
|
747 |
|
|
|
748 |
|
|
@ @=
|
749 |
|
|
octa fdivide @,@,@[ARGS((octa,octa))@];@+@t}\6{@>
|
750 |
|
|
octa fdivide(y,z)
|
751 |
|
|
octa y,z;
|
752 |
|
|
{
|
753 |
|
|
ftype yt,zt;
|
754 |
|
|
int ye,ze;
|
755 |
|
|
char ys,zs;
|
756 |
|
|
octa x,xf,yf,zf;
|
757 |
|
|
register int xe;
|
758 |
|
|
register char xs;
|
759 |
|
|
yt=funpack(y,&yf,&ye,&ys);
|
760 |
|
|
zt=funpack(z,&zf,&ze,&zs);
|
761 |
|
|
xs=ys+zs-'+'; /* will be |'-'| when the result is negative */
|
762 |
|
|
switch (4*yt+zt) {
|
763 |
|
|
@t\4@>@;
|
764 |
|
|
case 4*zro+inf: case 4*zro+num: case 4*num+inf: x=zero_octa;@+break;
|
765 |
|
|
case 4*num+zro: exceptions|=Z_BIT;
|
766 |
|
|
case 4*inf+num: case 4*inf+zro: x=inf_octa;@+break;
|
767 |
|
|
case 4*zro+zro: case 4*inf+inf: x=standard_NaN;
|
768 |
|
|
exceptions|=I_BIT;@+break;
|
769 |
|
|
case 4*num+num: @;
|
770 |
|
|
}
|
771 |
|
|
if (xs=='-') x.h|=sign_bit;
|
772 |
|
|
return x;
|
773 |
|
|
}
|
774 |
|
|
|
775 |
|
|
@ @=
|
776 |
|
|
xe=ye-ze+0x3fd; /* the raw exponent */
|
777 |
|
|
xf=odiv(yf,zero_octa,shift_left(zf,9));
|
778 |
|
|
if (xf.h>=0x800000) {
|
779 |
|
|
aux.l|=xf.l&1;
|
780 |
|
|
xf=shift_right(xf,1,1);
|
781 |
|
|
xe++;
|
782 |
|
|
}
|
783 |
|
|
if (aux.h||aux.l) xf.l|=1; /* adjust the sticky bit */
|
784 |
|
|
return fpack(xf,xe,xs,cur_round);
|
785 |
|
|
|
786 |
|
|
@*Floating addition and subtraction. Now for the bread-and-butter
|
787 |
|
|
operation, the sum of two floating point numbers.
|
788 |
|
|
It is not terribly difficult, but many cases need to be handled carefully.
|
789 |
|
|
|
790 |
|
|
@=
|
791 |
|
|
octa fplus @,@,@[ARGS((octa,octa))@];@+@t}\6{@>
|
792 |
|
|
octa fplus(y,z)
|
793 |
|
|
octa y,z;
|
794 |
|
|
{
|
795 |
|
|
ftype yt,zt;
|
796 |
|
|
int ye,ze;
|
797 |
|
|
char ys,zs;
|
798 |
|
|
octa x,xf,yf,zf;
|
799 |
|
|
register int xe,d;
|
800 |
|
|
register char xs;
|
801 |
|
|
yt=funpack(y,&yf,&ye,&ys);
|
802 |
|
|
zt=funpack(z,&zf,&ze,&zs);
|
803 |
|
|
switch (4*yt+zt) {
|
804 |
|
|
@t\4@>@;
|
805 |
|
|
case 4*zro+num: return fpack(zf,ze,zs,ROUND_OFF);@+break; /* may underflow */
|
806 |
|
|
case 4*num+zro: return fpack(yf,ye,ys,ROUND_OFF);@+break; /* may underflow */
|
807 |
|
|
case 4*inf+inf:@+if (ys!=zs) {
|
808 |
|
|
exceptions|=I_BIT;@+x=standard_NaN;@+xs=zs;@+break;
|
809 |
|
|
}
|
810 |
|
|
case 4*num+inf: case 4*zro+inf: x=inf_octa;@+xs=zs;@+break;
|
811 |
|
|
case 4*inf+num: case 4*inf+zro: x=inf_octa;@+xs=ys;@+break;
|
812 |
|
|
case 4*num+num:@+ if (y.h!=(z.h^0x80000000) || y.l!=z.l)
|
813 |
|
|
@;
|
814 |
|
|
case 4*zro+zro: x=zero_octa;
|
815 |
|
|
xs=(ys==zs? ys: cur_round==ROUND_DOWN? '-': '+');@+break;
|
816 |
|
|
}
|
817 |
|
|
if (xs=='-') x.h|=sign_bit;
|
818 |
|
|
return x;
|
819 |
|
|
}
|
820 |
|
|
|
821 |
|
|
@ @=
|
822 |
|
|
{@+octa o,oo;
|
823 |
|
|
if (ye
|
824 |
|
|
@;
|
825 |
|
|
d=ye-ze;
|
826 |
|
|
xs=ys, xe=ye;
|
827 |
|
|
if (d) @;
|
828 |
|
|
if (ys==zs) {
|
829 |
|
|
xf=oplus(yf,zf);
|
830 |
|
|
if (xf.h>=0x800000) xe++, d=xf.l&1, xf=shift_right(xf,1,1), xf.l|=d;
|
831 |
|
|
}@+else {
|
832 |
|
|
xf=ominus(yf,zf);
|
833 |
|
|
if (xf.h>=0x800000) xe++, d=xf.l&1, xf=shift_right(xf,1,1), xf.l|=d;
|
834 |
|
|
else@+ while (xf.h<0x400000) xe--, xf=shift_left(xf,1);
|
835 |
|
|
}
|
836 |
|
|
return fpack(xf,xe,xs,cur_round);
|
837 |
|
|
}
|
838 |
|
|
|
839 |
|
|
@ @=
|
840 |
|
|
{
|
841 |
|
|
o=yf, yf=zf, zf=o;
|
842 |
|
|
d=ye, ye=ze, ze=d;
|
843 |
|
|
d=ys, ys=zs, zs=d;
|
844 |
|
|
}
|
845 |
|
|
|
846 |
|
|
@ Proper rounding requires two bits to the right of the fraction delivered
|
847 |
|
|
to~|fpack|. The first is the true next bit of the result;
|
848 |
|
|
the other is a ``sticky'' bit, which is nonzero if any further bits of the
|
849 |
|
|
true result are nonzero. Sticky rounding to an integer takes
|
850 |
|
|
$x$ into the number $\lfloor x/2\rfloor+\lceil x/2\rceil$.
|
851 |
|
|
@^sticky bit@>
|
852 |
|
|
|
853 |
|
|
Some subtleties need to be observed here, in order to
|
854 |
|
|
prevent the sticky bit from being shifted left. If we did not
|
855 |
|
|
shift |yf| left~1 before shifting |zf| to the right, an incorrect
|
856 |
|
|
answer would be obtained in certain cases---for example, if
|
857 |
|
|
$|yf|=2^{54}$, $|zf|=2^{54}+2^{53}-1$, $d=52$.
|
858 |
|
|
|
859 |
|
|
@=
|
860 |
|
|
{
|
861 |
|
|
if (d<=2) zf=shift_right(zf,d,1); /* exact result */
|
862 |
|
|
else if (d>53) zf.h=0, zf.l=1; /* tricky but OK */
|
863 |
|
|
else {
|
864 |
|
|
if (ys!=zs) d--,xe--,yf=shift_left(yf,1);
|
865 |
|
|
o=zf;
|
866 |
|
|
zf=shift_right(o,d,1);
|
867 |
|
|
oo=shift_left(zf,d);
|
868 |
|
|
if (oo.l!=o.l || oo.h!=o.h) zf.l|=1;
|
869 |
|
|
}
|
870 |
|
|
}
|
871 |
|
|
|
872 |
|
|
@ The comparison of floating point numbers with respect to $\epsilon$
|
873 |
|
|
shares some of the characteristics of floating point addition/subtraction.
|
874 |
|
|
In some ways it is simpler, and in other ways it is more difficult;
|
875 |
|
|
we might as well deal with it now. % anyways
|
876 |
|
|
|
877 |
|
|
Subroutine |fepscomp(y,z,e,s)| returns 2 if |y|, |z|, or |e| is a NaN
|
878 |
|
|
or |e| is negative. It returns 1 if |s=0| and $y\approx z\ (e)$ or if
|
879 |
|
|
|s!=0| and $y\sim z\ (e)$,
|
880 |
|
|
as defined in Section~4.2.2 of {\sl Seminumerical Algorithms\/};
|
881 |
|
|
otherwise it returns~0.
|
882 |
|
|
|
883 |
|
|
@=
|
884 |
|
|
int fepscomp @,@,@[ARGS((octa,octa,octa,int))@];@+@t}\6{@>
|
885 |
|
|
int fepscomp(y,z,e,s)
|
886 |
|
|
octa y,z,e; /* the operands */
|
887 |
|
|
int s; /* test similarity? */
|
888 |
|
|
{
|
889 |
|
|
octa yf,zf,ef,o,oo;
|
890 |
|
|
int ye,ze,ee;
|
891 |
|
|
char ys,zs,es;
|
892 |
|
|
register int yt,zt,et,d;
|
893 |
|
|
et=funpack(e,&ef,&ee,&es);
|
894 |
|
|
if (es=='-') return 2;
|
895 |
|
|
switch (et) {
|
896 |
|
|
case nan: return 2;
|
897 |
|
|
case inf: ee=10000;
|
898 |
|
|
case num: case zro: break;
|
899 |
|
|
}
|
900 |
|
|
yt=funpack(y,&yf,&ye,&ys);
|
901 |
|
|
zt=funpack(z,&zf,&ze,&zs);
|
902 |
|
|
switch (4*yt+zt) {
|
903 |
|
|
case 4*nan+nan: case 4*nan+inf: case 4*nan+num: case 4*nan+zro:
|
904 |
|
|
case 4*inf+nan: case 4*num+nan: case 4*zro+nan: return 2;
|
905 |
|
|
case 4*inf+inf: return (ys==zs || ee>=1023);
|
906 |
|
|
case 4*inf+num: case 4*inf+zro: case 4*num+inf: case 4*zro+inf:
|
907 |
|
|
return (s && ee>=1022);
|
908 |
|
|
case 4*zro+zro: return 1;
|
909 |
|
|
case 4*zro+num: case 4*num+zro:@+ if (!s) return 0;
|
910 |
|
|
case 4*num+num: break;
|
911 |
|
|
}
|
912 |
|
|
@;
|
913 |
|
|
}
|
914 |
|
|
|
915 |
|
|
@ The relation $y\approx z\ (\epsilon)$ reduces to
|
916 |
|
|
$y\sim z\ (\epsilon/2^d)$, if $d$~is the difference between the
|
917 |
|
|
larger and smaller exponents of $y$ and~$z$.
|
918 |
|
|
|
919 |
|
|
@=
|
920 |
|
|
@;
|
921 |
|
|
if (ye
|
922 |
|
|
@;
|
923 |
|
|
if (ze==zero_exponent) ze=ye;
|
924 |
|
|
d=ye-ze;
|
925 |
|
|
if (!s) ee-=d;
|
926 |
|
|
if (ee>=1023) return 1; /* if $\epsilon\ge2$, $z\in N_\epsilon(y)$ */
|
927 |
|
|
@;
|
928 |
|
|
if (!o.h && !o.l) return 1;
|
929 |
|
|
if (ee<968) return 0; /* if $y\ne z$ and $\epsilon<2^{-54}$, $y\not\sim z$ */
|
930 |
|
|
if (ee>=1021) ef=shift_left(ef,ee-1021);
|
931 |
|
|
else ef=shift_right(ef,1021-ee,1);
|
932 |
|
|
return o.h
|
933 |
|
|
|
934 |
|
|
@ @=
|
935 |
|
|
if (ye<0 && yt!=zro) yf=shift_left(y,2), ye=0;
|
936 |
|
|
if (ze<0 && zt!=zro) zf=shift_left(z,2), ze=0;
|
937 |
|
|
|
938 |
|
|
@ At this point $y\sim z$ if and only if
|
939 |
|
|
$$|yf|+(-1)^{[ys=zs]}|zf|/2^d\le 2^{ee-1021}|ef|=2^{55}\epsilon.$$
|
940 |
|
|
We need to evaluate this relation without overstepping the bounds of
|
941 |
|
|
our simulated 64-bit registers.
|
942 |
|
|
|
943 |
|
|
When $d>2$, the difference of fraction parts might not fit exactly
|
944 |
|
|
in an octabyte;
|
945 |
|
|
in that case the numbers are not similar unless $\epsilon>3/8$,
|
946 |
|
|
and we replace the difference by the ceiling of the
|
947 |
|
|
true result. When $\epsilon<1/8$, our program essentially replaces
|
948 |
|
|
$2^{55}\epsilon$ by $\lfloor2^{55}\epsilon\rfloor$. These
|
949 |
|
|
truncations are not needed simultaneously. Therefore the logic
|
950 |
|
|
is justified by the facts that, if $n$ is an integer, we have
|
951 |
|
|
$x\le n$ if and only if $\lceil x\rceil\le n$;
|
952 |
|
|
$n\le x$ if and only if $n\le\lfloor x\rfloor$. (Notice that the
|
953 |
|
|
concept of ``sticky bit'' is {\it not\/} appropriate here.)
|
954 |
|
|
@^sticky bit@>
|
955 |
|
|
|
956 |
|
|
@=
|
957 |
|
|
if (d>54) o=zero_octa,oo=zf;
|
958 |
|
|
else o=shift_right(zf,d,1),oo=shift_left(o,d);
|
959 |
|
|
if (oo.h!=zf.h || oo.l!=zf.l) { /* truncated result, hence $d>2$ */
|
960 |
|
|
if (ee<1020) return 0; /* difference is too large for similarity */
|
961 |
|
|
o=incr(o,ys==zs? 0: 1); /* adjust for ceiling */
|
962 |
|
|
}
|
963 |
|
|
o=(ys==zs? ominus(yf,o): oplus(yf,o));
|
964 |
|
|
|
965 |
|
|
@*Floating point output conversion.
|
966 |
|
|
The |print_float| routine converts an octabyte to a floating decimal
|
967 |
|
|
representation that will be input as precisely the same value.
|
968 |
|
|
@^binary-to-decimal conversion@>
|
969 |
|
|
@^radix conversion@>
|
970 |
|
|
@^multiprecision conversion@>
|
971 |
|
|
|
972 |
|
|
@=
|
973 |
|
|
static void bignum_times_ten @,@,@[ARGS((bignum*))@];
|
974 |
|
|
static void bignum_dec @,@,@[ARGS((bignum*,bignum*,tetra))@];
|
975 |
|
|
static int bignum_compare @,@,@[ARGS((bignum*,bignum*))@];
|
976 |
|
|
void print_float @,@,@[ARGS((octa))@];@+@t}\6{@>
|
977 |
|
|
void print_float(x)
|
978 |
|
|
octa x;
|
979 |
|
|
{
|
980 |
|
|
@;
|
981 |
|
|
if (x.h&sign_bit) printf("-");
|
982 |
|
|
@
|
983 |
|
|
fraction interval $[f\dts g]$ or $(f\dts g)$@>;
|
984 |
|
|
@;
|
985 |
|
|
@;
|
986 |
|
|
@;
|
987 |
|
|
}
|
988 |
|
|
|
989 |
|
|
@ One way to visualize the problem being solved here is to consider
|
990 |
|
|
the vastly simpler case in which there are only 2-bit exponents
|
991 |
|
|
and 2-bit fractions. Then the sixteen possible 4-bit combinations
|
992 |
|
|
have the following interpretations:
|
993 |
|
|
$$\def\\{\;\dts\;}
|
994 |
|
|
\vbox{\halign{#\qquad&$#$\hfil\cr
|
995 |
|
|
0000&[0\\0.125]\cr
|
996 |
|
|
0001&(0.125\\0.375)\cr
|
997 |
|
|
0010&[0.375\\0.625]\cr
|
998 |
|
|
0011&(0.625\\0.875)\cr
|
999 |
|
|
0100&[0.875\\1.125]\cr
|
1000 |
|
|
0101&(1.125\\1.375)\cr
|
1001 |
|
|
0110&[1.375\\1.625]\cr
|
1002 |
|
|
0111&(1.625\\1.875)\cr
|
1003 |
|
|
1000&[1.875\\2.25]\cr
|
1004 |
|
|
1001&(2.25\\2.75)\cr
|
1005 |
|
|
1010&[2.75\\3.25]\cr
|
1006 |
|
|
1011&(3.25\\3.75)\cr
|
1007 |
|
|
1100&[3.75\\\infty]\cr
|
1008 |
|
|
1101&\rm NaN(0\\0.375)\cr
|
1009 |
|
|
1110&\rm NaN[0.375\\0.625]\cr
|
1010 |
|
|
1111&\rm NaN(0.625\\1)\cr}}$$
|
1011 |
|
|
Notice that the interval is closed, $[f\dts g]$, when the fraction part
|
1012 |
|
|
is even; it is open, $(f\dts g)$, when the fraction part is odd.
|
1013 |
|
|
The printed outputs for these sixteen values, if we actually were
|
1014 |
|
|
dealing with such short exponents and fractions, would be
|
1015 |
|
|
\.{0.}, \.{.2}, \.{.5}, \.{.7}, \.{1.}, \.{1.2}, \.{1.5}, \.{1.7},
|
1016 |
|
|
\.{2.}, \.{2.5}, \.{3.}, \.{3.5}, \.{Inf}, \.{NaN.2}, \.{NaN}, \.{NaN.8},
|
1017 |
|
|
respectively.
|
1018 |
|
|
|
1019 |
|
|
@=
|
1020 |
|
|
f=shift_left(x,1);
|
1021 |
|
|
e=f.h>>21;
|
1022 |
|
|
f.h&=0x1fffff;
|
1023 |
|
|
if (!f.h && !f.l) @@;
|
1024 |
|
|
else {
|
1025 |
|
|
g=incr(f,1);
|
1026 |
|
|
f=incr(f,-1);
|
1027 |
|
|
if (!e) e=1; /* subnormal */
|
1028 |
|
|
else if (e==0x7ff) {
|
1029 |
|
|
printf("NaN");
|
1030 |
|
|
if (g.h==0x100000 && g.l==1) return; /* the ``standard'' NaN */
|
1031 |
|
|
e=0x3ff; /* extreme NaNs come out OK even without adjusting |f| or |g| */
|
1032 |
|
|
}@+else f.h|=0x200000, g.h|=0x200000;
|
1033 |
|
|
}
|
1034 |
|
|
|
1035 |
|
|
@ @=
|
1036 |
|
|
octa f,g; /* lower and upper bounds on the fraction part */
|
1037 |
|
|
register int e; /* exponent part */
|
1038 |
|
|
register int j,k; /* all purpose indices */
|
1039 |
|
|
|
1040 |
|
|
@ The transition points between exponents correspond to powers of~2. At
|
1041 |
|
|
such points the interval extends only half as far to the left of that
|
1042 |
|
|
power of~2 as it does to the right. For example, in the 4-bit minifloat numbers
|
1043 |
|
|
considered above, case 1000 corresponds to the interval $[1.875\;\dts\;2.25]$.
|
1044 |
|
|
|
1045 |
|
|
@=
|
1046 |
|
|
{
|
1047 |
|
|
if (!e) {
|
1048 |
|
|
printf("0.");@+return;
|
1049 |
|
|
}
|
1050 |
|
|
if (e==0x7ff) {
|
1051 |
|
|
printf("Inf");@+return;
|
1052 |
|
|
}
|
1053 |
|
|
e--;
|
1054 |
|
|
f.h=0x3fffff, f.l=0xffffffff;
|
1055 |
|
|
g.h=0x400000, g.l=2;
|
1056 |
|
|
}
|
1057 |
|
|
|
1058 |
|
|
@ We want to find the ``simplest'' value in the interval corresponding
|
1059 |
|
|
to the given number, in the sense that it has fewest significant
|
1060 |
|
|
digits when expressed in decimal notation. Thus, for example,
|
1061 |
|
|
if the floating point number can be described by a relatively
|
1062 |
|
|
short string such as `\.{.1}' or `\.{37e100}', we want to discover that
|
1063 |
|
|
representation.
|
1064 |
|
|
|
1065 |
|
|
The basic idea is to generate the decimal representations of the
|
1066 |
|
|
two endpoints of the interval, outputting the leading digits where
|
1067 |
|
|
both endpoints agree, then making a final decision at the first place where
|
1068 |
|
|
they disagree.
|
1069 |
|
|
|
1070 |
|
|
The ``simplest'' value is not always unique. For example, in the
|
1071 |
|
|
case of 4-bit minifloat numbers we could represent the bit pattern 0001 as
|
1072 |
|
|
either \.{.2} or \.{.3}, and we could represent 1001 in five equally short
|
1073 |
|
|
ways: \.{2.3} or \.{2.4} or \.{2.5} or \.{2.6} or \.{2.7}. The
|
1074 |
|
|
algorithm below tries to choose the middle possibility in such cases.
|
1075 |
|
|
|
1076 |
|
|
[A solution to the analogous problem for fixed-point representations,
|
1077 |
|
|
without the additional complication of round-to-even, was used by
|
1078 |
|
|
the author in the program for \TeX; see {\sl Beauty is Our Business\/}
|
1079 |
|
|
(Springer, 1990), 233--242.]
|
1080 |
|
|
@^Knuth, Donald Ervin@>
|
1081 |
|
|
|
1082 |
|
|
Suppose we are given two fractions $f$ and $g$, where $0\le f
|
1083 |
|
|
we want to compute the shortest decimal in the closed interval $[f\dts g]$.
|
1084 |
|
|
If $f=0$, we are done. Otherwise let $10f=d+f'$ and $10g=e+g'$, where
|
1085 |
|
|
$0\le f'<1$ and $0\le g'<1$. If $d
|
1086 |
|
|
any of the digits $d+1$, \dots,~$e$; otherwise we output the
|
1087 |
|
|
common digit $d=e$, and repeat the process on the fractions $0\le f'
|
1088 |
|
|
A similar procedure works with respect to the open interval $(f\dts g)$.
|
1089 |
|
|
|
1090 |
|
|
@ The program below carries out the stated algorithm by using multiprecision
|
1091 |
|
|
arithmetic on 77-place integers with 28 bits each. This choice
|
1092 |
|
|
facilitates multiplication by~10, and allows us to deal with the whole range of
|
1093 |
|
|
floating binary numbers using fixed point arithmetic. We keep track of
|
1094 |
|
|
the leading and trailing digit positions so that trivial operations on
|
1095 |
|
|
zeros are avoided.
|
1096 |
|
|
|
1097 |
|
|
If |f| points to a \&{bignum}, its radix-$2^{28}$ digits are
|
1098 |
|
|
|f->dat[0]| through |f->dat[76]|, from most significant to least significant.
|
1099 |
|
|
We assume that all digit positions are zero unless they lie in the
|
1100 |
|
|
subarray between indices |f->a| and |f->b|, inclusive.
|
1101 |
|
|
Furthermore, both |f->dat[f->a]| and |f->dat[f->b]| are nonzero,
|
1102 |
|
|
unless |f->a=f->b=bignum_prec-1|.
|
1103 |
|
|
|
1104 |
|
|
The \&{bignum} data type can be used with any radix less than
|
1105 |
|
|
$2^{32}$; we will use it later with radix~$10^9$. The |dat| array
|
1106 |
|
|
is made large enough to accommodate both applications.
|
1107 |
|
|
|
1108 |
|
|
@d bignum_prec 157 /* would be 77 if we cared only about |print_float| */
|
1109 |
|
|
|
1110 |
|
|
@=
|
1111 |
|
|
typedef struct {
|
1112 |
|
|
int a; /* index of the most significant digit */
|
1113 |
|
|
int b; /* index of the least significant digit; must be $\ge a$ */
|
1114 |
|
|
tetra dat[bignum_prec]; /* the digits; undefined except between |a| and |b| */
|
1115 |
|
|
} bignum;
|
1116 |
|
|
|
1117 |
|
|
@ Here, for example, is how we go from $f$ to $10f$, assuming that
|
1118 |
|
|
overflow will not occur and that the radix is $2^{28}$:
|
1119 |
|
|
|
1120 |
|
|
@=
|
1121 |
|
|
static void bignum_times_ten(f)
|
1122 |
|
|
bignum *f;
|
1123 |
|
|
{
|
1124 |
|
|
register tetra *p,*q; register tetra x,carry;
|
1125 |
|
|
for (p=&f->dat[f->b],q=&f->dat[f->a],carry=0; p>=q; p--) {
|
1126 |
|
|
x=*p*10+carry;
|
1127 |
|
|
*p=x&0xfffffff;
|
1128 |
|
|
carry=x>>28;
|
1129 |
|
|
}
|
1130 |
|
|
*p=carry;
|
1131 |
|
|
if (carry) f->a--;
|
1132 |
|
|
if (f->dat[f->b]==0 && f->b>f->a) f->b--;
|
1133 |
|
|
}
|
1134 |
|
|
|
1135 |
|
|
@ And here is how we test whether $fg$, using any
|
1136 |
|
|
radix whatever:
|
1137 |
|
|
|
1138 |
|
|
@=
|
1139 |
|
|
static int bignum_compare(f,g)
|
1140 |
|
|
bignum *f,*g;
|
1141 |
|
|
{
|
1142 |
|
|
register tetra *p,*pp,*q,*qq;
|
1143 |
|
|
if (f->a!=g->a) return f->a > g->a? -1: 1;
|
1144 |
|
|
pp=&f->dat[f->b], qq=&g->dat[g->b];
|
1145 |
|
|
for (p=&f->dat[f->a],q=&g->dat[g->a]; p<=pp; p++,q++) {
|
1146 |
|
|
if (*p!=*q) return *p<*q? -1: 1;
|
1147 |
|
|
if (q==qq) return p
|
1148 |
|
|
}
|
1149 |
|
|
return -1;
|
1150 |
|
|
}
|
1151 |
|
|
|
1152 |
|
|
@ The following subroutine subtracts $g$ from~$f$, assuming that
|
1153 |
|
|
$f\ge g>0$ and using a given radix.
|
1154 |
|
|
|
1155 |
|
|
@=
|
1156 |
|
|
static void bignum_dec(f,g,r)
|
1157 |
|
|
bignum *f,*g;
|
1158 |
|
|
tetra r; /* the radix */
|
1159 |
|
|
{
|
1160 |
|
|
register tetra *p,*q,*qq;
|
1161 |
|
|
register int x,borrow;
|
1162 |
|
|
while (g->b>f->b) f->dat[++f->b]=0;
|
1163 |
|
|
qq=&g->dat[g->a];
|
1164 |
|
|
for (p=&f->dat[g->b],q=&g->dat[g->b],borrow=0;q>=qq;p--,q--) {
|
1165 |
|
|
x=*p - *q - borrow;
|
1166 |
|
|
if (x>=0) borrow=0, *p=x;
|
1167 |
|
|
else borrow=1, *p=x+r;
|
1168 |
|
|
}
|
1169 |
|
|
for (;borrow;p--)
|
1170 |
|
|
if (*p) borrow=0, *p=*p-1;
|
1171 |
|
|
else *p=r-1;
|
1172 |
|
|
while (f->dat[f->a]==0) {
|
1173 |
|
|
if (f->a==f->b) { /* the result is zero */
|
1174 |
|
|
f->a=f->b=bignum_prec-1, f->dat[bignum_prec-1]=0;
|
1175 |
|
|
return;
|
1176 |
|
|
}
|
1177 |
|
|
f->a++;
|
1178 |
|
|
}
|
1179 |
|
|
while (f->dat[f->b]==0) f->b--;
|
1180 |
|
|
}
|
1181 |
|
|
|
1182 |
|
|
@ Armed with these subroutines, we are ready to solve the problem.
|
1183 |
|
|
The first task is to put the numbers into \&{bignum} form.
|
1184 |
|
|
If the exponent is |e|, the number destined for digit |dat[k]| will
|
1185 |
|
|
consist of the rightmost 28 bits of the given fraction after it has
|
1186 |
|
|
been shifted right $c-e-28k$ bits, for some constant~$c$.
|
1187 |
|
|
We choose $c$ so that,
|
1188 |
|
|
when $e$ has its maximum value \Hex{7ff}, the leading digit will
|
1189 |
|
|
go into position |dat[1]|, and so that when the number to be printed
|
1190 |
|
|
is exactly~1 the integer part of~$g$ will also be exactly~1.
|
1191 |
|
|
|
1192 |
|
|
@d magic_offset 2112 /* the constant $c$ that makes it work */
|
1193 |
|
|
@d origin 37 /* the radix point follows |dat[37]| */
|
1194 |
|
|
|
1195 |
|
|
@=
|
1196 |
|
|
k=(magic_offset-e)/28;
|
1197 |
|
|
ff.dat[k-1]=shift_right(f,magic_offset+28-e-28*k,1).l&0xfffffff;
|
1198 |
|
|
gg.dat[k-1]=shift_right(g,magic_offset+28-e-28*k,1).l&0xfffffff;
|
1199 |
|
|
ff.dat[k]=shift_right(f,magic_offset-e-28*k,1).l&0xfffffff;
|
1200 |
|
|
gg.dat[k]=shift_right(g,magic_offset-e-28*k,1).l&0xfffffff;
|
1201 |
|
|
ff.dat[k+1]=shift_left(f,e+28*k-(magic_offset-28)).l&0xfffffff;
|
1202 |
|
|
gg.dat[k+1]=shift_left(g,e+28*k-(magic_offset-28)).l&0xfffffff;
|
1203 |
|
|
ff.a=(ff.dat[k-1]? k-1: k);
|
1204 |
|
|
ff.b=(ff.dat[k+1]? k+1: k);
|
1205 |
|
|
gg.a=(gg.dat[k-1]? k-1: k);
|
1206 |
|
|
gg.b=(gg.dat[k+1]? k+1: k);
|
1207 |
|
|
|
1208 |
|
|
@ If $e$ is sufficiently small, the fractions $f$ and $g$ will be less than~1,
|
1209 |
|
|
and we can use the stated algorithm directly. Of course, if $e$ is
|
1210 |
|
|
extremely small, a lot of leading zeros need to be lopped off; in the
|
1211 |
|
|
worst case, we may have to multiply $f$ and~$g$ by~10 more than 300 times.
|
1212 |
|
|
But hey, we don't need to do that extremely often, and computers are
|
1213 |
|
|
pretty fast nowadays.
|
1214 |
|
|
|
1215 |
|
|
In the small-exponent case, the computation always terminates before
|
1216 |
|
|
$f$ becomes zero, because the interval endpoints are fractions with
|
1217 |
|
|
denominator $2^t$ for some $t>50$.
|
1218 |
|
|
|
1219 |
|
|
The invariant relations |ff.dat[ff.a]!=0| and |gg.dat[gg.a]!=0| are
|
1220 |
|
|
not maintained by the computation here, when |ff.a=origin| or |gg.a=origin|.
|
1221 |
|
|
But no harm is done, because |bignum_compare| is not used.
|
1222 |
|
|
|
1223 |
|
|
@=
|
1224 |
|
|
if (e>0x401) @@;
|
1225 |
|
|
else@+{ /* if |e<=0x401| we have |gg.a>=origin| and |gg.dat[origin]<=8| */
|
1226 |
|
|
if (ff.a>origin) ff.dat[origin]=0;
|
1227 |
|
|
for (e=1, p=s; gg.a>origin || ff.dat[origin]==gg.dat[origin]; ) {
|
1228 |
|
|
if (gg.a>origin) e--;
|
1229 |
|
|
else *p++=ff.dat[origin]+'0', ff.dat[origin]=0, gg.dat[origin]=0;
|
1230 |
|
|
bignum_times_ten(&ff);
|
1231 |
|
|
bignum_times_ten(&gg);
|
1232 |
|
|
}
|
1233 |
|
|
*p++=((ff.dat[origin]+1+gg.dat[origin])>>1)+'0'; /* the middle digit */
|
1234 |
|
|
}
|
1235 |
|
|
*p='\0'; /* terminate the string |s| */
|
1236 |
|
|
|
1237 |
|
|
@ When |e| is large, we use the stated algorithm by considering $f$ and
|
1238 |
|
|
$g$ to be fractions whose denominator is a power of~10.
|
1239 |
|
|
|
1240 |
|
|
An interesting case arises when the number to be converted is
|
1241 |
|
|
\Hex{44ada56a4b0835bf}, since the interval turns out to be
|
1242 |
|
|
$$ (69999999999999991611392\ \ \dts\ \ 70000000000000000000000).$$
|
1243 |
|
|
If this were a closed interval, we could simply give the answer
|
1244 |
|
|
\.{7e22}; but the number \.{7e22} actually corresponds to
|
1245 |
|
|
\Hex{44ada56a4b0835c0}
|
1246 |
|
|
because of the round-to-even rule. Therefore the correct answer is, say,
|
1247 |
|
|
\.{6.9999999999999995e22}. This example shows that we need a slightly
|
1248 |
|
|
different strategy in the case of open intervals; we cannot simply
|
1249 |
|
|
look at the first position in which the endpoints have different
|
1250 |
|
|
decimal digits. Therefore we change the invariant relation to $0\le f
|
1251 |
|
|
when open intervals are involved,
|
1252 |
|
|
and we do not terminate the process when $f=0$ or $g=1$.
|
1253 |
|
|
|
1254 |
|
|
@=
|
1255 |
|
|
{@+register int open=x.l&1;
|
1256 |
|
|
tt.dat[origin]=10;
|
1257 |
|
|
tt.a=tt.b=origin;
|
1258 |
|
|
for (e=1;bignum_compare(&gg,&tt)>=open;e++)
|
1259 |
|
|
bignum_times_ten(&tt);
|
1260 |
|
|
p=s;
|
1261 |
|
|
while (1) {
|
1262 |
|
|
bignum_times_ten(&ff);
|
1263 |
|
|
bignum_times_ten(&gg);
|
1264 |
|
|
for (j='0';bignum_compare(&ff,&tt)>=0;j++)
|
1265 |
|
|
bignum_dec(&ff,&tt,0x10000000),bignum_dec(&gg,&tt,0x10000000);
|
1266 |
|
|
if (bignum_compare(&gg,&tt)>=open) break;
|
1267 |
|
|
*p++=j;
|
1268 |
|
|
if (ff.a==bignum_prec-1 && !open)
|
1269 |
|
|
goto done; /* $f=0$ in a closed interval */
|
1270 |
|
|
}
|
1271 |
|
|
for (k=j;bignum_compare(&gg,&tt)>=open;k++) bignum_dec(&gg,&tt,0x10000000);
|
1272 |
|
|
*p++=(j+1+k)>>1; /* the middle digit */
|
1273 |
|
|
done:;
|
1274 |
|
|
}
|
1275 |
|
|
|
1276 |
|
|
@ The length of string~|s| will be at most 17. For if $f$ and $g$
|
1277 |
|
|
agree to 17 places, we have $g/f<1+10^{-16}$; but the
|
1278 |
|
|
ratio $g/f$ is always $\ge(1+2^{-52}+2^{-53})/(1+2^{-52}-2^{-53})
|
1279 |
|
|
>1+2\times10^{-16}$.
|
1280 |
|
|
|
1281 |
|
|
@=
|
1282 |
|
|
bignum ff,gg; /* fractions or numerators of fractions */
|
1283 |
|
|
bignum tt; /* power of ten (used as the denominator) */
|
1284 |
|
|
char s[18];
|
1285 |
|
|
register char *p;
|
1286 |
|
|
|
1287 |
|
|
@ At this point the significant digits are in string |s|, and |s[0]!='0'|.
|
1288 |
|
|
If we put a decimal point at the left of~|s|, the result should
|
1289 |
|
|
be multiplied by $10^e$.
|
1290 |
|
|
|
1291 |
|
|
We prefer the output `\.{300.}' to the form `\.{3e2}', and we prefer
|
1292 |
|
|
`\.{.03}' to `\.{3e-2}'. In general, the output will use an
|
1293 |
|
|
explicit exponent only if the alternative would take more than
|
1294 |
|
|
18~characters.
|
1295 |
|
|
|
1296 |
|
|
@=
|
1297 |
|
|
if (e>17 || e<(int)strlen(s)-17)
|
1298 |
|
|
printf("%c%s%se%d",s[0],(s[1]? ".": ""),s+1,e-1);
|
1299 |
|
|
else if (e<0) printf(".%0*d%s",-e,0,s);
|
1300 |
|
|
else if (strlen(s)>=e) printf("%.*s.%s",e,s,s+e);
|
1301 |
|
|
else printf("%s%0*d.",s,e-(int)strlen(s),0);
|
1302 |
|
|
|
1303 |
|
|
@*Floating point input conversion. Going the other way, we want to
|
1304 |
|
|
be able to convert a given decimal number into its floating binary
|
1305 |
|
|
@^decimal-to-binary conversion@>
|
1306 |
|
|
@^radix conversion@>
|
1307 |
|
|
@^multiprecision conversion@>
|
1308 |
|
|
equivalent. The following syntax is supported:
|
1309 |
|
|
$$\vbox{\halign{$#$\hfil\cr
|
1310 |
|
|
\\is\.0\mid\.1\mid\.2\mid\.3\mid\.4\mid
|
1311 |
|
|
\.5\mid\.6\mid\.7\mid\.8\mid\.9\cr
|
1312 |
|
|
\\is\\mid\\\cr
|
1313 |
|
|
\\is\\..\mid\..\\mid
|
1314 |
|
|
\\..\\cr
|
1315 |
|
|
\\is\\mid\.+\mid\.-\cr
|
1316 |
|
|
\\is\.e\\\cr
|
1317 |
|
|
\\is\\mid\\cr
|
1318 |
|
|
\\is\\\mid
|
1319 |
|
|
\\\mid\cr
|
1320 |
|
|
\hskip12em \.{Inf}\mid\.{NaN}\mid\.{NaN.}\\cr
|
1321 |
|
|
\\is\\\cr
|
1322 |
|
|
\\is\\\cr
|
1323 |
|
|
}}$$
|
1324 |
|
|
For example, `\.{-3.}' is the floating constant \Hex{c008000000000000}\thinspace;
|
1325 |
|
|
`\.{1e3}' and `\.{1000}' are both equivalent to \Hex{408f400000000000}\thinspace;
|
1326 |
|
|
`\.{NaN}' and `\.{+NaN.5}' are both equivalent to \Hex{7ff8000000000000}.
|
1327 |
|
|
|
1328 |
|
|
The |scan_const| routine looks at a given string and finds the
|
1329 |
|
|
longest initial substring that matches the syntax of either \
|
1330 |
|
|
constant> or \. It puts the corresponding value
|
1331 |
|
|
into the global octabyte variable~|val|; it also puts the position of the first
|
1332 |
|
|
unscanned character in the global pointer variable |next_char|.
|
1333 |
|
|
It returns 1 if a floating constant was found, 0~if a decimal constant
|
1334 |
|
|
was found, $-1$ if nothing was found. A decimal constant that doesn't
|
1335 |
|
|
fit in an octabyte is computed modulo~$2^{64}$.
|
1336 |
|
|
@^syntax of floating point constants@>
|
1337 |
|
|
|
1338 |
|
|
The value of |exceptions| set by |scan_const| is not necessarily correct.
|
1339 |
|
|
|
1340 |
|
|
@=
|
1341 |
|
|
static void bignum_double @,@,@[ARGS((bignum*))@];
|
1342 |
|
|
int scan_const @,@,@[ARGS((char*))@];@+@t}\6{@>
|
1343 |
|
|
int scan_const(s)
|
1344 |
|
|
char *s;
|
1345 |
|
|
{
|
1346 |
|
|
@;
|
1347 |
|
|
val.h=val.l=0;
|
1348 |
|
|
p=s;
|
1349 |
|
|
if (*p=='+' || *p=='-') sign=*p++;@+else sign='+';
|
1350 |
|
|
if (strncmp(p,"NaN",3)==0) NaN=true, p+=3;
|
1351 |
|
|
else NaN=false;
|
1352 |
|
|
if ((isdigit(*p)&&!NaN) || (*p=='.' && isdigit(*(p+1))))
|
1353 |
|
|
@;
|
1354 |
|
|
if (NaN) @;
|
1355 |
|
|
if (strncmp(p,"Inf",3)==0) @;
|
1356 |
|
|
no_const_found: next_char=s;@+return -1;
|
1357 |
|
|
}
|
1358 |
|
|
|
1359 |
|
|
@ @=
|
1360 |
|
|
octa val; /* value returned by |scan_const| */
|
1361 |
|
|
char *next_char; /* pointer returned by |scan_const| */
|
1362 |
|
|
|
1363 |
|
|
@ @=
|
1364 |
|
|
register char *p,*q; /* for string manipulations */
|
1365 |
|
|
register bool NaN; /* are we processing a NaN? */
|
1366 |
|
|
int sign; /* |'+'| or |'-'| */
|
1367 |
|
|
|
1368 |
|
|
@ @=
|
1369 |
|
|
{
|
1370 |
|
|
next_char=p;
|
1371 |
|
|
val.h=0x600000, exp=0x3fe;
|
1372 |
|
|
goto packit;
|
1373 |
|
|
}
|
1374 |
|
|
|
1375 |
|
|
@ @=
|
1376 |
|
|
{
|
1377 |
|
|
next_char=p+3;
|
1378 |
|
|
goto make_it_infinite;
|
1379 |
|
|
}
|
1380 |
|
|
|
1381 |
|
|
@ We saw above that a string of at most 17 digits is enough to characterize
|
1382 |
|
|
a floating point number, for purposes of output. But a much longer buffer
|
1383 |
|
|
for digits is needed when we're doing input. For example, consider the
|
1384 |
|
|
borderline quantity $(1+2^{-53})/2^{1022}$; its decimal expansion, when
|
1385 |
|
|
written out exactly, is a number with more than 750 significant digits:
|
1386 |
|
|
\.{2.2250738585...8125e-308}.
|
1387 |
|
|
If {\it any one\/} of those digits is increased, or if
|
1388 |
|
|
additional nonzero digits are added as in
|
1389 |
|
|
\.{2.2250738585...81250000001e-308},
|
1390 |
|
|
the rounded value is supposed to change from \Hex{0010000000000000}
|
1391 |
|
|
to \Hex{0010000000000001}.
|
1392 |
|
|
|
1393 |
|
|
We assume here that the user prefers a perfectly correct answer to
|
1394 |
|
|
a speedy almost-correct one, so we implement the most general case.
|
1395 |
|
|
|
1396 |
|
|
@=
|
1397 |
|
|
{
|
1398 |
|
|
for (q=buf0,dec_pt=(char*)0;isdigit(*p);p++) {
|
1399 |
|
|
val=oplus(val,shift_left(val,2)); /* multiply by 5 */
|
1400 |
|
|
val=incr(shift_left(val,1),*p-'0');
|
1401 |
|
|
if (q>buf0 || *p!='0')
|
1402 |
|
|
if (q
|
1403 |
|
|
else if (*(q-1)=='0') *(q-1)=*p;
|
1404 |
|
|
}
|
1405 |
|
|
if (NaN) *q++='1';
|
1406 |
|
|
if (*p=='.') @;
|
1407 |
|
|
next_char=p;
|
1408 |
|
|
if (*p=='e' && !NaN) @@;
|
1409 |
|
|
else exp=0;
|
1410 |
|
|
if (dec_pt) @;
|
1411 |
|
|
if (sign=='-') val=ominus(zero_octa,val);
|
1412 |
|
|
return 0;
|
1413 |
|
|
}
|
1414 |
|
|
|
1415 |
|
|
@ @=
|
1416 |
|
|
{
|
1417 |
|
|
dec_pt=q;
|
1418 |
|
|
p++;
|
1419 |
|
|
for (zeros=0;isdigit(*p);p++)
|
1420 |
|
|
if (*p=='0' && q==buf0) zeros++;
|
1421 |
|
|
else if (q
|
1422 |
|
|
else if (*(q-1)=='0') *(q-1)=*p;
|
1423 |
|
|
}
|
1424 |
|
|
|
1425 |
|
|
@ The buffer needs room for eight digits of padding at the left, followed
|
1426 |
|
|
by up to $1022+53-307$ significant digits, followed by a ``sticky'' digit
|
1427 |
|
|
at position |buf_max-1|, and eight more digits of padding.
|
1428 |
|
|
|
1429 |
|
|
@d buf0 (buf+8)
|
1430 |
|
|
@d buf_max (buf+777)
|
1431 |
|
|
|
1432 |
|
|
@=
|
1433 |
|
|
static char buf[785]="00000000"; /* where we put significant input digits */
|
1434 |
|
|
|
1435 |
|
|
@ @=
|
1436 |
|
|
register char* dec_pt; /* position of decimal point in |buf| */
|
1437 |
|
|
register int exp; /* scanned exponent; later used for raw binary exponent */
|
1438 |
|
|
register int zeros; /* leading zeros removed after decimal point */
|
1439 |
|
|
|
1440 |
|
|
@ Here we don't advance |next_char| and force a decimal point until we
|
1441 |
|
|
know that a syntactically correct exponent exists.
|
1442 |
|
|
|
1443 |
|
|
The code here will convert extra-large inputs like
|
1444 |
|
|
`\.{9e+9999999999999999}' into $\infty$ and extra-small inputs into zero.
|
1445 |
|
|
Strange inputs like `\.{-00.0e9999999}' must also be accommodated.
|
1446 |
|
|
|
1447 |
|
|
@=
|
1448 |
|
|
{@+register char exp_sign;
|
1449 |
|
|
p++;
|
1450 |
|
|
if (*p=='+' || *p=='-') exp_sign=*p++;@+else exp_sign='+';
|
1451 |
|
|
if (isdigit(*p)) {
|
1452 |
|
|
for (exp=*p++ -'0';isdigit(*p);p++)
|
1453 |
|
|
if (exp<1000) exp = 10*exp + *p - '0';
|
1454 |
|
|
if (!dec_pt) dec_pt=q, zeros=0;
|
1455 |
|
|
if (exp_sign=='-') exp=-exp;
|
1456 |
|
|
next_char=p;
|
1457 |
|
|
}
|
1458 |
|
|
}
|
1459 |
|
|
|
1460 |
|
|
@ @=
|
1461 |
|
|
{
|
1462 |
|
|
@;
|
1463 |
|
|
@;
|
1464 |
|
|
packit: @;
|
1465 |
|
|
return 1;
|
1466 |
|
|
}
|
1467 |
|
|
|
1468 |
|
|
@ Now we get ready to compute the binary fraction bits, by putting the
|
1469 |
|
|
scanned input digits into a multiprecision fixed-point
|
1470 |
|
|
accumulator |ff| that spans the full necessary range.
|
1471 |
|
|
After this step, the number that we want to convert to floating binary
|
1472 |
|
|
will appear in |ff.dat[ff.a]|, |ff.dat[ff.a+1]|, \dots,
|
1473 |
|
|
|ff.dat[ff.b]|.
|
1474 |
|
|
The radix-$10^9$ digit in ${\it ff}[36-k]$ is understood to be multiplied
|
1475 |
|
|
by $10^{9k}$, for $36\ge k\ge-120$.
|
1476 |
|
|
|
1477 |
|
|
@=
|
1478 |
|
|
x=buf+341+zeros-dec_pt-exp;
|
1479 |
|
|
if (q==buf0 || x>=1413) {
|
1480 |
|
|
make_it_zero: exp=-99999;@+ goto packit;
|
1481 |
|
|
}
|
1482 |
|
|
if (x<0) {
|
1483 |
|
|
make_it_infinite: exp=99999;@+ goto packit;
|
1484 |
|
|
}
|
1485 |
|
|
ff.a=x/9;
|
1486 |
|
|
for (p=q;p
|
1487 |
|
|
q=q-1-(q+341+zeros-dec_pt-exp)%9; /* compute stopping place in |buf| */
|
1488 |
|
|
for (p=buf0-x%9,k=ff.a;p<=q && k<=156; p+=9, k++)
|
1489 |
|
|
@
|
1490 |
|
|
into |ff.dat[k]|@>;
|
1491 |
|
|
ff.b=k-1;
|
1492 |
|
|
for (x=0;p<=q;p+=9) if (strncmp(p,"000000000",9)!=0) x=1;
|
1493 |
|
|
ff.dat[156]+=x; /* nonzero digits that fall off the right are sticky */
|
1494 |
|
|
@^sticky bit@>
|
1495 |
|
|
while (ff.dat[ff.b]==0) ff.b--;
|
1496 |
|
|
|
1497 |
|
|
@ @=
|
1498 |
|
|
{
|
1499 |
|
|
for (x=*p-'0',pp=p+1;pp
|
1500 |
|
|
ff.dat[k]=x;
|
1501 |
|
|
}
|
1502 |
|
|
|
1503 |
|
|
@ @=
|
1504 |
|
|
register int k,x;
|
1505 |
|
|
register char *pp;
|
1506 |
|
|
bignum ff,tt;
|
1507 |
|
|
|
1508 |
|
|
@ Here's a subroutine that is dual to |bignum_times_ten|. It changes $f$
|
1509 |
|
|
to~$2f$, assuming that overflow will not occur and that the radix is $10^9$.
|
1510 |
|
|
|
1511 |
|
|
@=
|
1512 |
|
|
static void bignum_double(f)
|
1513 |
|
|
bignum *f;
|
1514 |
|
|
{
|
1515 |
|
|
register tetra *p,*q; register int x,carry;
|
1516 |
|
|
for (p=&f->dat[f->b],q=&f->dat[f->a],carry=0; p>=q; p--) {
|
1517 |
|
|
x = *p + *p + carry;
|
1518 |
|
|
if (x>=1000000000) carry=1, *p=x-1000000000;
|
1519 |
|
|
else carry=0, *p=x;
|
1520 |
|
|
}
|
1521 |
|
|
*p=carry;
|
1522 |
|
|
if (carry) f->a--;
|
1523 |
|
|
if (f->dat[f->b]==0 && f->b>f->a) f->b--;
|
1524 |
|
|
}
|
1525 |
|
|
|
1526 |
|
|
@ @=
|
1527 |
|
|
val=zero_octa;
|
1528 |
|
|
if (ff.a>36) {
|
1529 |
|
|
for (exp=0x3fe;ff.a>36;exp--) bignum_double(&ff);
|
1530 |
|
|
for (k=54;k;k--) {
|
1531 |
|
|
if (ff.dat[36]) {
|
1532 |
|
|
if (k>=32) val.h |= 1<<(k-32);@+else val.l |= 1<
|
1533 |
|
|
ff.dat[36]=0;
|
1534 |
|
|
if (ff.b==36) break; /* break if |ff| now zero */
|
1535 |
|
|
}
|
1536 |
|
|
bignum_double(&ff);
|
1537 |
|
|
}
|
1538 |
|
|
}@+else {
|
1539 |
|
|
tt.a=tt.b=36, tt.dat[36]=2;
|
1540 |
|
|
for (exp=0x3fe;bignum_compare(&ff,&tt)>=0;exp++) bignum_double(&tt);
|
1541 |
|
|
for (k=54;k;k--) {
|
1542 |
|
|
bignum_double(&ff);
|
1543 |
|
|
if (bignum_compare(&ff,&tt)>=0) {
|
1544 |
|
|
if (k>=32) val.h |= 1<<(k-32);@+else val.l |= 1<
|
1545 |
|
|
bignum_dec(&ff,&tt,1000000000);
|
1546 |
|
|
if (ff.a==bignum_prec-1) break; /* break if |ff| now zero */
|
1547 |
|
|
}
|
1548 |
|
|
}
|
1549 |
|
|
}
|
1550 |
|
|
if (k==0) val.l |= 1; /* add sticky bit if |ff| nonzero */
|
1551 |
|
|
|
1552 |
|
|
@ We need to be careful that the input `\.{NaN.999999999999999999999}' doesn't
|
1553 |
|
|
get rounded up; it is supposed to yield \Hex{7fffffffffffffff}.
|
1554 |
|
|
|
1555 |
|
|
Although the input `\.{NaN.0}' is illegal, strictly speaking, we silently
|
1556 |
|
|
convert it to \Hex{7ff0000000000001}---a number that would be
|
1557 |
|
|
output as `\.{NaN.0000000000000002}'.
|
1558 |
|
|
|
1559 |
|
|
@=
|
1560 |
|
|
val=fpack(val,exp,sign,ROUND_NEAR);
|
1561 |
|
|
if (NaN) {
|
1562 |
|
|
if ((val.h&0x7fffffff)==0x40000000) val.h |= 0x7fffffff, val.l=0xffffffff;
|
1563 |
|
|
else if ((val.h&0x7fffffff)==0x3ff00000 && !val.l) val.h|=0x40000000,val.l=1;
|
1564 |
|
|
else val.h |= 0x40000000;
|
1565 |
|
|
}
|
1566 |
|
|
|
1567 |
|
|
@*Floating point remainders. In this section we implement the remainder
|
1568 |
|
|
of the floating point operations---one of which happens to be the
|
1569 |
|
|
operation of taking the remainder.
|
1570 |
|
|
|
1571 |
|
|
The easiest task remaining is to compare two floating point quantities.
|
1572 |
|
|
Routine |fcomp| returns $-1$~if~$yz$, and
|
1573 |
|
|
$+2$~if $y$ and~$z$ are unordered.
|
1574 |
|
|
|
1575 |
|
|
@=
|
1576 |
|
|
int fcomp @,@,@[ARGS((octa,octa))@];@+@t}\6{@>
|
1577 |
|
|
int fcomp(y,z)
|
1578 |
|
|
octa y,z;
|
1579 |
|
|
{
|
1580 |
|
|
ftype yt,zt;
|
1581 |
|
|
int ye,ze;
|
1582 |
|
|
char ys,zs;
|
1583 |
|
|
octa yf,zf;
|
1584 |
|
|
register int x;
|
1585 |
|
|
yt=funpack(y,&yf,&ye,&ys);
|
1586 |
|
|
zt=funpack(z,&zf,&ze,&zs);
|
1587 |
|
|
switch (4*yt+zt) {
|
1588 |
|
|
case 4*nan+nan: case 4*zro+nan: case 4*num+nan: case 4*inf+nan:
|
1589 |
|
|
case 4*nan+zro: case 4*nan+num: case 4*nan+inf: return 2;
|
1590 |
|
|
case 4*zro+zro: return 0;
|
1591 |
|
|
case 4*zro+num: case 4*num+zro: case 4*zro+inf: case 4*inf+zro:
|
1592 |
|
|
case 4*num+num: case 4*num+inf: case 4*inf+num: case 4*inf+inf:
|
1593 |
|
|
if (ys!=zs) x=1;
|
1594 |
|
|
else if (y.h>z.h) x=1;
|
1595 |
|
|
else if (y.h
|
1596 |
|
|
else if (y.l>z.l) x=1;
|
1597 |
|
|
else if (y.l
|
1598 |
|
|
else return 0;
|
1599 |
|
|
break;
|
1600 |
|
|
}
|
1601 |
|
|
return (ys=='-'? -x: x);
|
1602 |
|
|
}
|
1603 |
|
|
|
1604 |
|
|
@ Several \MMIX\ operations act on a single floating point number and
|
1605 |
|
|
accept an arbitrary rounding mode. For example, consider the
|
1606 |
|
|
operation of rounding to the nearest floating point integer:
|
1607 |
|
|
|
1608 |
|
|
@=
|
1609 |
|
|
octa fintegerize @,@,@[ARGS((octa,int))@];@+@t}\6{@>
|
1610 |
|
|
octa fintegerize(z,r)
|
1611 |
|
|
octa z; /* the operand */
|
1612 |
|
|
int r; /* the rounding mode */
|
1613 |
|
|
{
|
1614 |
|
|
ftype zt;
|
1615 |
|
|
int ze;
|
1616 |
|
|
char zs;
|
1617 |
|
|
octa xf,zf;
|
1618 |
|
|
zt=funpack(z,&zf,&ze,&zs);
|
1619 |
|
|
if (!r) r=cur_round;
|
1620 |
|
|
switch (zt) {
|
1621 |
|
|
case nan:@+if (!(z.h&0x80000)) {@+exceptions|=I_BIT;@+z.h|=0x80000;@+}
|
1622 |
|
|
case inf: case zro: return z;
|
1623 |
|
|
case num: @;
|
1624 |
|
|
}
|
1625 |
|
|
}
|
1626 |
|
|
|
1627 |
|
|
@ @=
|
1628 |
|
|
if (ze>=1074) return fpack(zf,ze,zs,ROUND_OFF); /* already an integer */
|
1629 |
|
|
if (ze<=1020) xf.h=0,xf.l=1;
|
1630 |
|
|
else {@+octa oo;
|
1631 |
|
|
xf=shift_right(zf,1074-ze,1);
|
1632 |
|
|
oo=shift_left(xf,1074-ze);
|
1633 |
|
|
if (oo.l!=zf.l || oo.h!=zf.h) xf.l|=1; /* sticky bit */
|
1634 |
|
|
@^sticky bit@>
|
1635 |
|
|
}
|
1636 |
|
|
switch (r) {
|
1637 |
|
|
case ROUND_DOWN:@+ if (zs=='-') xf=incr(xf,3);@+break;
|
1638 |
|
|
case ROUND_UP:@+ if (zs!='-') xf=incr(xf,3);
|
1639 |
|
|
case ROUND_OFF: break;
|
1640 |
|
|
case ROUND_NEAR: xf=incr(xf, xf.l&4? 2: 1);@+break;
|
1641 |
|
|
}
|
1642 |
|
|
xf.l&=0xfffffffc;
|
1643 |
|
|
if (ze>=1022) return fpack(shift_left(xf,1074-ze),ze,zs,ROUND_OFF);
|
1644 |
|
|
if (xf.l) xf.h=0x3ff00000, xf.l=0;
|
1645 |
|
|
if (zs=='-') xf.h|=sign_bit;
|
1646 |
|
|
return xf;
|
1647 |
|
|
|
1648 |
|
|
@ To convert floating point to fixed point, we use |fixit|.
|
1649 |
|
|
|
1650 |
|
|
@=
|
1651 |
|
|
octa fixit @,@,@[ARGS((octa,int))@];@+@t}\6{@>
|
1652 |
|
|
octa fixit(z,r)
|
1653 |
|
|
octa z; /* the operand */
|
1654 |
|
|
int r; /* the rounding mode */
|
1655 |
|
|
{
|
1656 |
|
|
ftype zt;
|
1657 |
|
|
int ze;
|
1658 |
|
|
char zs;
|
1659 |
|
|
octa zf,o;
|
1660 |
|
|
zt=funpack(z,&zf,&ze,&zs);
|
1661 |
|
|
if (!r) r=cur_round;
|
1662 |
|
|
switch (zt) {
|
1663 |
|
|
case nan: case inf: exceptions|=I_BIT;@+return z;
|
1664 |
|
|
case zro: return zero_octa;
|
1665 |
|
|
case num:@+if (funpack(fintegerize(z,r),&zf,&ze,&zs)==zro) return zero_octa;
|
1666 |
|
|
if (ze<=1076) o=shift_right(zf,1076-ze,1);
|
1667 |
|
|
else {
|
1668 |
|
|
if (ze>1085 || (ze==1085 && (zf.h>0x400000 || @|
|
1669 |
|
|
(zf.h==0x400000 && (zf.l || zs!='-'))))) exceptions|=W_BIT;
|
1670 |
|
|
if (ze>=1140) return zero_octa;
|
1671 |
|
|
o=shift_left(zf,ze-1076);
|
1672 |
|
|
}
|
1673 |
|
|
return (zs=='-'? ominus(zero_octa,o): o);
|
1674 |
|
|
}
|
1675 |
|
|
}
|
1676 |
|
|
|
1677 |
|
|
@ Going the other way, we can specify not only a rounding mode but whether
|
1678 |
|
|
the given fixed point octabyte is signed or unsigned, and whether the
|
1679 |
|
|
result should be rounded to short precision.
|
1680 |
|
|
|
1681 |
|
|
@=
|
1682 |
|
|
octa floatit @,@,@[ARGS((octa,int,int,int))@];@+@t}\6{@>
|
1683 |
|
|
octa floatit(z,r,u,p)
|
1684 |
|
|
octa z; /* octabyte to float */
|
1685 |
|
|
int r; /* rounding mode */
|
1686 |
|
|
int u; /* unsigned? */
|
1687 |
|
|
int p; /* short precision? */
|
1688 |
|
|
{
|
1689 |
|
|
int e;@+char s;
|
1690 |
|
|
register int t;
|
1691 |
|
|
exceptions=0;
|
1692 |
|
|
if (!z.h && !z.l) return zero_octa;
|
1693 |
|
|
if (!r) r=cur_round;
|
1694 |
|
|
if (!u && (z.h&sign_bit)) s='-', z=ominus(zero_octa,z);@+ else s='+';
|
1695 |
|
|
e=1076;
|
1696 |
|
|
while (z.h<0x400000) e--,z=shift_left(z,1);
|
1697 |
|
|
while (z.h>=0x800000) {
|
1698 |
|
|
e++;
|
1699 |
|
|
t=z.l&1;
|
1700 |
|
|
z=shift_right(z,1,1);
|
1701 |
|
|
z.l|=t;
|
1702 |
|
|
}
|
1703 |
|
|
if (p) @;
|
1704 |
|
|
return fpack(z,e,s,r);
|
1705 |
|
|
}
|
1706 |
|
|
|
1707 |
|
|
@ @=
|
1708 |
|
|
{
|
1709 |
|
|
register int ex;@+register tetra t;
|
1710 |
|
|
t=sfpack(z,e,s,r);
|
1711 |
|
|
ex=exceptions;
|
1712 |
|
|
sfunpack(t,&z,&e,&s);
|
1713 |
|
|
exceptions=ex;
|
1714 |
|
|
}
|
1715 |
|
|
|
1716 |
|
|
@ The square root operation is more interesting.
|
1717 |
|
|
|
1718 |
|
|
@=
|
1719 |
|
|
octa froot @,@,@[ARGS((octa,int))@];@+@t}\6{@>
|
1720 |
|
|
octa froot(z,r)
|
1721 |
|
|
octa z; /* the operand */
|
1722 |
|
|
int r; /* the rounding mode */
|
1723 |
|
|
{
|
1724 |
|
|
ftype zt;
|
1725 |
|
|
int ze;
|
1726 |
|
|
char zs;
|
1727 |
|
|
octa x,xf,rf,zf;
|
1728 |
|
|
register int xe,k;
|
1729 |
|
|
if (!r) r=cur_round;
|
1730 |
|
|
zt=funpack(z,&zf,&ze,&zs);
|
1731 |
|
|
if (zs=='-' && zt!=zro) exceptions|=I_BIT, x=standard_NaN;
|
1732 |
|
|
else@+switch (zt) {
|
1733 |
|
|
case nan:@+ if (!(z.h&0x80000)) exceptions|=I_BIT, z.h|=0x80000;
|
1734 |
|
|
return z;
|
1735 |
|
|
case inf: case zro: x=z;@+break;
|
1736 |
|
|
case num: @;
|
1737 |
|
|
}
|
1738 |
|
|
if (zs=='-') x.h|=sign_bit;
|
1739 |
|
|
return x;
|
1740 |
|
|
}
|
1741 |
|
|
|
1742 |
|
|
@ The square root can be found by an adaptation of the old pencil-and-paper
|
1743 |
|
|
method. If $n=\lfloor\sqrt s\rfloor$, where $s$ is an integer,
|
1744 |
|
|
we have $s=n^2+r$ where $0\le r\le2n$;
|
1745 |
|
|
this invariant can be maintained if we replace $s$ by $4s+(0,1,2,3)$
|
1746 |
|
|
and $n$ by $2n+(0,1)$. The following code implements this idea with
|
1747 |
|
|
$2n$ in~|xf| and $r$ in~|rf|. (It could easily be made to run about
|
1748 |
|
|
twice as fast.)
|
1749 |
|
|
|
1750 |
|
|
@=
|
1751 |
|
|
xf.h=0, xf.l=2;
|
1752 |
|
|
xe=(ze+0x3fe)>>1;
|
1753 |
|
|
if (ze&1) zf=shift_left(zf,1);
|
1754 |
|
|
rf.h=0, rf.l=(zf.h>>22)-1;
|
1755 |
|
|
for (k=53;k;k--) {
|
1756 |
|
|
rf=shift_left(rf,2);@+ xf=shift_left(xf,1);
|
1757 |
|
|
if (k>=43) rf=incr(rf,(zf.h>>(2*(k-43)))&3);
|
1758 |
|
|
else if (k>=27) rf=incr(rf,(zf.l>>(2*(k-27)))&3);
|
1759 |
|
|
if ((rf.l>xf.l && rf.h>=xf.h) || rf.h>xf.h) {
|
1760 |
|
|
xf.l++;@+rf=ominus(rf,xf);@+xf.l++;
|
1761 |
|
|
}
|
1762 |
|
|
}
|
1763 |
|
|
if (rf.h || rf.l) xf.l++; /* sticky bit */
|
1764 |
|
|
return fpack(xf,xe,'+',r);
|
1765 |
|
|
|
1766 |
|
|
@ And finally, the genuine floating point remainder. Subroutine |fremstep|
|
1767 |
|
|
either calculates $y\,{\rm rem}\,z$ or reduces $y$ to a smaller number
|
1768 |
|
|
having the same remainder with respect to~$z$. In the latter case
|
1769 |
|
|
the |E_BIT| is set in |exceptions|. A third parameter, |delta|,
|
1770 |
|
|
gives a decrease in exponent that is acceptable for incomplete results;
|
1771 |
|
|
if |delta| is sufficiently large, say 2500, the correct result will
|
1772 |
|
|
always be obtained in one step of |fremstep|.
|
1773 |
|
|
|
1774 |
|
|
@=
|
1775 |
|
|
octa fremstep @,@,@[ARGS((octa,octa,int))@];@+@t}\6{@>
|
1776 |
|
|
octa fremstep(y,z,delta)
|
1777 |
|
|
octa y,z;
|
1778 |
|
|
int delta;
|
1779 |
|
|
{
|
1780 |
|
|
ftype yt,zt;
|
1781 |
|
|
int ye,ze;
|
1782 |
|
|
char xs,ys,zs;
|
1783 |
|
|
octa x,xf,yf,zf;
|
1784 |
|
|
register int xe,thresh,odd;
|
1785 |
|
|
yt=funpack(y,&yf,&ye,&ys);
|
1786 |
|
|
zt=funpack(z,&zf,&ze,&zs);
|
1787 |
|
|
switch (4*yt+zt) {
|
1788 |
|
|
@t\4@>@;
|
1789 |
|
|
case 4*zro+zro: case 4*num+zro: case 4*inf+zro:
|
1790 |
|
|
case 4*inf+num: case 4*inf+inf: x=standard_NaN;
|
1791 |
|
|
exceptions|=I_BIT;@+break;
|
1792 |
|
|
case 4*zro+num: case 4*zro+inf: case 4*num+inf: return y;
|
1793 |
|
|
case 4*num+num: @;
|
1794 |
|
|
zero_out: x=zero_octa;
|
1795 |
|
|
}
|
1796 |
|
|
if (ys=='-') x.h|=sign_bit;
|
1797 |
|
|
return x;
|
1798 |
|
|
}
|
1799 |
|
|
|
1800 |
|
|
@ If there's a huge difference in exponents and the remainder is nonzero,
|
1801 |
|
|
this computation will take a long time. One could compute
|
1802 |
|
|
$(2^ny)\,{\rm rem}\,z$ much more quickly for large~$n$ by using $O(\log n)$
|
1803 |
|
|
multiplications modulo~$z$, but the floating remainder operation isn't
|
1804 |
|
|
important enough to justify such expensive hardware.
|
1805 |
|
|
|
1806 |
|
|
Results of floating remainder are always exact, so the rounding mode
|
1807 |
|
|
is immaterial.
|
1808 |
|
|
|
1809 |
|
|
@=
|
1810 |
|
|
odd=0; /* will be 1 if we've subtracted an odd multiple of~$z$ from $y$ */
|
1811 |
|
|
thresh=ye-delta;
|
1812 |
|
|
if (thresh
|
1813 |
|
|
while (ye>=thresh) @
|
1814 |
|
|
|goto zero_out| if the remainder is zero,
|
1815 |
|
|
|goto try_complement| if appropriate@>;
|
1816 |
|
|
if (ye>=ze) {
|
1817 |
|
|
exceptions|=E_BIT;@+return fpack(yf,ye,ys,ROUND_OFF);
|
1818 |
|
|
}
|
1819 |
|
|
if (ye
|
1820 |
|
|
yf=shift_right(yf,1,1);
|
1821 |
|
|
try_complement: xf=ominus(zf,yf), xe=ze, xs='+' + '-' - ys;
|
1822 |
|
|
if (xf.h>yf.h || (xf.h==yf.h && (xf.l>yf.l || (xf.l==yf.l && !odd))))
|
1823 |
|
|
xf=yf, xs=ys;
|
1824 |
|
|
while (xf.h<0x400000) xe--, xf=shift_left(xf,1);
|
1825 |
|
|
return fpack(xf,xe,xs,ROUND_OFF);
|
1826 |
|
|
|
1827 |
|
|
@ Here we are careful not to change the sign of |y|, because a remainder
|
1828 |
|
|
of~0 is supposed to inherit the original sign of~|y|.
|
1829 |
|
|
|
1830 |
|
|
@=
|
1831 |
|
|
{
|
1832 |
|
|
if (yf.h==zf.h && yf.l==zf.l) goto zero_out;
|
1833 |
|
|
if (yf.h
|
1834 |
|
|
if (ye==ze) goto try_complement;
|
1835 |
|
|
ye--, yf=shift_left(yf,1);
|
1836 |
|
|
}
|
1837 |
|
|
yf=ominus(yf,zf);
|
1838 |
|
|
if (ye==ze) odd=1;
|
1839 |
|
|
while (yf.h<0x400000) ye--, yf=shift_left(yf,1);
|
1840 |
|
|
}
|
1841 |
|
|
|
1842 |
|
|
@* Index.
|
1843 |
|
|
|