OpenCores

Rev 15	Rev 21
`% This file is part of the MMIXware package (c) Donald E Knuth 1999`	`% This file is part of the MMIXware package (c) Donald E Knuth 1999`
`@i boilerplate.w %<< legal stuff: PLEASE READ IT BEFORE MAKING ANY CHANGES!`	`@i boilerplate.w %<< legal stuff: PLEASE READ IT BEFORE MAKING ANY CHANGES!`

`\def\title{MMIX-ARITH}`	`\def\title{MMIX-ARITH}`

`\def\MMIX{\.{MMIX}}`	`\def\MMIX{\.{MMIX}}`
`\def\MMIXAL{\.{MMIXAL}}`	`\def\MMIXAL{\.{MMIXAL}}`
`\def\Hex#1{\hbox{$^{\scriptscriptstyle\#}$\tt#1}} % experimental hex constant`	`\def\Hex#1{\hbox{$^{\scriptscriptstyle\#}$\tt#1}} % experimental hex constant`
`\def\dts{\mathinner{\ldotp\ldotp}}`	`\def\dts{\mathinner{\ldotp\ldotp}}`
`\def\<#1>{\hbox{$\langle\,$#1$\,\rangle$}}\let\is=\longrightarrow`	`\def\<#1>{\hbox{$\langle\,$#1$\,\rangle$}}\let\is=\longrightarrow`
`\def\ff{\\{ff\kern-.05em}}`	`\def\ff{\\{ff\kern-.05em}}`
`@s ff TeX`	`@s ff TeX`
`@s bool normal @q unreserve a C++ keyword @>`	`@s bool normal @q unreserve a C++ keyword @>`
`@s xor normal @q unreserve a C++ keyword @>`	`@s xor normal @q unreserve a C++ keyword @>`

`@* Introduction. The subroutines below are used to simulate 64-bit \MMIX\`	`@* Introduction. The subroutines below are used to simulate 64-bit \MMIX\`
`arithmetic on an old-fashioned 32-bit computer---like the one the author`	`arithmetic on an old-fashioned 32-bit computer---like the one the author`
`had when he wrote \MMIXAL\ and the first \MMIX\ simulators in 1998 and 1999.`	`had when he wrote \MMIXAL\ and the first \MMIX\ simulators in 1998 and 1999.`
`All operations are fabricated from 32-bit arithmetic, including`	`All operations are fabricated from 32-bit arithmetic, including`
`a full implementation of the IEEE floating point standard,`	`a full implementation of the IEEE floating point standard,`
`assuming only that the \CEE/ compiler has a 32-bit unsigned integer type.`	`assuming only that the \CEE/ compiler has a 32-bit unsigned integer type.`

`Some day 64-bit machines will be commonplace and the awkward manipulations of`	`Some day 64-bit machines will be commonplace and the awkward manipulations of`
`the present program will look quite archaic. Interested readers who have such`	`the present program will look quite archaic. Interested readers who have such`
`computers will be able to convert the code to a pure 64-bit form without`	`computers will be able to convert the code to a pure 64-bit form without`
`difficulty, thereby obtaining much faster and simpler routines. Meanwhile,`	`difficulty, thereby obtaining much faster and simpler routines. Meanwhile,`
`however, we can simulate the future and hope for continued progress.`	`however, we can simulate the future and hope for continued progress.`

`This program module has a simple structure, intended to make it`	`This program module has a simple structure, intended to make it`
`suitable for loading with \MMIX\ simulators and assemblers.`	`suitable for loading with \MMIX\ simulators and assemblers.`

`@c`	`@c`
`#include`	`#include`
`#include`	`#include`
`#include`	`#include`
`@@;`	`@@;`
`typedef enum{@+false,true@+} bool;`	`typedef enum{@+false,true@+} bool;`
`@@;`	`@@;`
`@@;`	`@@;`
`@@;`	`@@;`
`@`	`@`

`@ Subroutines of this program are declared first with a prototype,`	`@ Subroutines of this program are declared first with a prototype,`
`as in {\mc ANSI C}, then with an old-style \CEE/ function definition.`	`as in {\mc ANSI C}, then with an old-style \CEE/ function definition.`
`Here are some preprocessor commands that make this work correctly with both`	`Here are some preprocessor commands that make this work correctly with both`
`new-style and old-style compilers.`	`new-style and old-style compilers.`
`@^prototypes for functions@>`	`@^prototypes for functions@>`

`@=`	`@=`
`#ifdef __STDC__`	`#ifdef __STDC__`
`#define ARGS(list) list`	`#define ARGS(list) list`
`#else`	`#else`
`#define ARGS(list) ()`	`#define ARGS(list) ()`
`#endif`	`#endif`

`@ The definition of type \&{tetra} should be changed, if necessary, so that`	`@ The definition of type \&{tetra} should be changed, if necessary, so that`
`it represents an unsigned 32-bit integer.`	`it represents an unsigned 32-bit integer.`
`@^system dependencies@>`	`@^system dependencies@>`

`@=`	`@=`
`typedef unsigned int tetra;`	`typedef unsigned int tetra;`
`/* for systems conforming to the LP-64 data model */`	`/* for systems conforming to the LP-64 data model */`
`typedef struct { tetra h,l;} octa; /* two tetrabytes make one octabyte */`	`typedef struct { tetra h,l;} octa; /* two tetrabytes make one octabyte */`

`@ @d sign_bit ((unsigned)0x80000000)`	`@ @d sign_bit ((unsigned)0x80000000)`

`@=`	`@=`
`octa zero_octa; /* \|zero_octa.h=zero_octa.l=0\| */`	`octa zero_octa; /* \|zero_octa.h=zero_octa.l=0\| */`
`octa neg_one={-1,-1}; /* \|neg_one.h=neg_one.l=-1\| */`	`octa neg_one={-1,-1}; /* \|neg_one.h=neg_one.l=-1\| */`
`octa inf_octa={0x7ff00000,0}; /* floating point $+\infty$ */`	`octa inf_octa={0x7ff00000,0}; /* floating point $+\infty$ */`
`octa standard_NaN={0x7ff80000,0}; /* floating point NaN(.5) */`	`octa standard_NaN={0x7ff80000,0}; /* floating point NaN(.5) */`
`octa aux; /* auxiliary output of a subroutine */`	`octa aux; /* auxiliary output of a subroutine */`
`bool overflow; /* set by certain subroutines for signed arithmetic */`	`bool overflow; /* set by certain subroutines for signed arithmetic */`

`@ It's easy to add and subtract octabytes, if we aren't terribly`	`@ It's easy to add and subtract octabytes, if we aren't terribly`
`worried about speed.`	`worried about speed.`

`@=`	`@=`
`octa oplus @,@,@[ARGS((octa,octa))@];@+@t}\6{@>`	`octa oplus @,@,@[ARGS((octa,octa))@];@+@t}\6{@>`
`octa oplus(y,z) /* compute $y+z$ */`	`octa oplus(y,z) /* compute $y+z$ */`
`octa y,z;`	`octa y,z;`
`{@+ octa x;`	`{@+ octa x;`
`x.h=y.h+z.h;@+`	`x.h=y.h+z.h;@+`
`x.l=y.l+z.l;`	`x.l=y.l+z.l;`
`if (x.l`	`if (x.l`
`return x;`	`return x;`
`}`	`}`
`@#`	`@#`
`octa ominus @,@,@[ARGS((octa,octa))@];@+@t}\6{@>`	`octa ominus @,@,@[ARGS((octa,octa))@];@+@t}\6{@>`
`octa ominus(y,z) /* compute $y-z$ */`	`octa ominus(y,z) /* compute $y-z$ */`
`octa y,z;`	`octa y,z;`
`{@+ octa x;`	`{@+ octa x;`
`x.h=y.h-z.h;@+`	`x.h=y.h-z.h;@+`
`x.l=y.l-z.l;`	`x.l=y.l-z.l;`
`if (x.l>y.l) x.h--;`	`if (x.l>y.l) x.h--;`
`return x;`	`return x;`
`}`	`}`

`@ In the following subroutine, \|delta\| is a signed quantity that is`	`@ In the following subroutine, \|delta\| is a signed quantity that is`
`assumed to fit in a signed tetrabyte.`	`assumed to fit in a signed tetrabyte.`

`@=`	`@=`
`octa incr @,@,@[ARGS((octa,int))@];@+@t}\6{@>`	`octa incr @,@,@[ARGS((octa,int))@];@+@t}\6{@>`
`octa incr(y,delta) /* compute $y+\delta$ */`	`octa incr(y,delta) /* compute $y+\delta$ */`
`octa y;`	`octa y;`
`int delta;`	`int delta;`
`{@+ octa x;`	`{@+ octa x;`
`x.h=y.h;@+ x.l=y.l+delta;`	`x.h=y.h;@+ x.l=y.l+delta;`
`if (delta>=0) {`	`if (delta>=0) {`
`if (x.l`	`if (x.l`
`}@+else if (x.l>y.l) x.h--;`	`}@+else if (x.l>y.l) x.h--;`
`return x;`	`return x;`
`}`	`}`

`@ Left and right shifts are only a bit more difficult.`	`@ Left and right shifts are only a bit more difficult.`

`@=`	`@=`
`octa shift_left @,@,@[ARGS((octa,int))@];@+@t}\6{@>`	`octa shift_left @,@,@[ARGS((octa,int))@];@+@t}\6{@>`
`octa shift_left(y,s) /* shift left by $s$ bits, where $0\le s\le64$ */`	`octa shift_left(y,s) /* shift left by $s$ bits, where $0\le s\le64$ */`
`octa y;`	`octa y;`
`int s;`	`int s;`
`{`	`{`
`while (s>=32) y.h=y.l,y.l=0,s-=32;`	`while (s>=32) y.h=y.l,y.l=0,s-=32;`
`if (s) {@+register tetra yhl=y.h<>(32-s);`	`if (s) {@+register tetra yhl=y.h<>(32-s);`
`y.h=yhl+ylh;@+ y.l<<=s;`	`y.h=yhl+ylh;@+ y.l<<=s;`
`}`	`}`
`return y;`	`return y;`
`}`	`}`
`@#`	`@#`
`octa shift_right @,@,@[ARGS((octa,int,int))@];@+@t}\6{@>`	`octa shift_right @,@,@[ARGS((octa,int,int))@];@+@t}\6{@>`
`octa shift_right(y,s,u) /* shift right, arithmetically if $u=0$ */`	`octa shift_right(y,s,u) /* shift right, arithmetically if $u=0$ */`
`octa y;`	`octa y;`
`int s,u;`	`int s,u;`
`{`	`{`
`while (s>=32) y.l=y.h, y.h=(u?0: -(y.h>>31)), s-=32;`	`while (s>=32) y.l=y.h, y.h=(u?0: -(y.h>>31)), s-=32;`
`if (s) {@+register tetra yhl=y.h<<(32-s),ylh=y.l>>s;`	`if (s) {@+register tetra yhl=y.h<<(32-s),ylh=y.l>>s;`
`y.h=(u? 0:(-(y.h>>31))<<(32-s))+(y.h>>s);@+ y.l=yhl+ylh;`	`y.h=(u? 0:(-(y.h>>31))<<(32-s))+(y.h>>s);@+ y.l=yhl+ylh;`
`}`	`}`
`return y;`	`return y;`
`}`	`}`

`@* Multiplication. We need to multiply two unsigned 64-bit integers, obtaining`	`@* Multiplication. We need to multiply two unsigned 64-bit integers, obtaining`
`an unsigned 128-bit product. It is easy to do this on a 32-bit machine`	`an unsigned 128-bit product. It is easy to do this on a 32-bit machine`
`by using Algorithm 4.3.1M of {\sl Seminumerical Algorithms}, with $b=2^{16}$.`	`by using Algorithm 4.3.1M of {\sl Seminumerical Algorithms}, with $b=2^{16}$.`
`@^multiprecision multiplication@>`	`@^multiprecision multiplication@>`

`The following subroutine returns the lower half of the product, and`	`The following subroutine returns the lower half of the product, and`
`puts the upper half into a global octabyte called \|aux\|.`	`puts the upper half into a global octabyte called \|aux\|.`

`@=`	`@=`
`octa omult @,@,@[ARGS((octa,octa))@];@+@t}\6{@>`	`octa omult @,@,@[ARGS((octa,octa))@];@+@t}\6{@>`
`octa omult(y,z)`	`octa omult(y,z)`
`octa y,z;`	`octa y,z;`
`{`	`{`
`register int i,j,k;`	`register int i,j,k;`
`tetra u[4],v[4],w[8];`	`tetra u[4],v[4],w[8];`
`register tetra t;`	`register tetra t;`
`octa acc;`	`octa acc;`
`@;`	`@;`
`for (j=0;j<4;j++) w[j]=0;`	`for (j=0;j<4;j++) w[j]=0;`
`for (j=0;j<4;j++)`	`for (j=0;j<4;j++)`
`if (!v[j]) w[j+4]=0;`	`if (!v[j]) w[j+4]=0;`
`else {`	`else {`
`for (i=k=0;i<4;i++) {`	`for (i=k=0;i<4;i++) {`
`t=u[i]*v[j]+w[i+j]+k;`	`t=u[i]*v[j]+w[i+j]+k;`
`w[i+j]=t&0xffff, k=t>>16;`	`w[i+j]=t&0xffff, k=t>>16;`
`}`	`}`
`w[j+4]=k;`	`w[j+4]=k;`
`}`	`}`
`@;`	`@;`
`return acc;`	`return acc;`
`}`	`}`

`@ @=`	`@ @=`
`extern octa aux; /* secondary output of subroutines with multiple outputs */`	`extern octa aux; /* secondary output of subroutines with multiple outputs */`
`extern bool overflow;`	`extern bool overflow;`

`@ @=`	`@ @=`
`u[3]=y.h>>16, u[2]=y.h&0xffff, u[1]= y.l>>16, u[0]=y.l&0xffff;`	`u[3]=y.h>>16, u[2]=y.h&0xffff, u[1]= y.l>>16, u[0]=y.l&0xffff;`
`v[3]=z.h>>16, v[2]=z.h&0xffff, v[1]= z.l>>16, v[0]=z.l&0xffff;`	`v[3]=z.h>>16, v[2]=z.h&0xffff, v[1]= z.l>>16, v[0]=z.l&0xffff;`

`@ @=`	`@ @=`
`aux.h=(w[7]<<16)+w[6], aux.l=(w[5]<<16)+w[4];`	`aux.h=(w[7]<<16)+w[6], aux.l=(w[5]<<16)+w[4];`
`acc.h=(w[3]<<16)+w[2], acc.l=(w[1]<<16)+w[0];`	`acc.h=(w[3]<<16)+w[2], acc.l=(w[1]<<16)+w[0];`

`@ Signed multiplication has the same lower half product as unsigned`	`@ Signed multiplication has the same lower half product as unsigned`
`multiplication. The signed upper half product is obtained with at most two`	`multiplication. The signed upper half product is obtained with at most two`
`further subtractions, after which the result has overflowed if and only if`	`further subtractions, after which the result has overflowed if and only if`
`the upper half is unequal to 64 copies of the sign bit in the lower half.`	`the upper half is unequal to 64 copies of the sign bit in the lower half.`

`@=`	`@=`
`octa signed_omult @,@,@[ARGS((octa,octa))@];@+@t}\6{@>`	`octa signed_omult @,@,@[ARGS((octa,octa))@];@+@t}\6{@>`
`octa signed_omult(y,z)`	`octa signed_omult(y,z)`
`octa y,z;`	`octa y,z;`
`{`	`{`
`octa acc;`	`octa acc;`
`acc=omult(y,z);`	`acc=omult(y,z);`
`if (y.h&sign_bit) aux=ominus(aux,z);`	`if (y.h&sign_bit) aux=ominus(aux,z);`
`if (z.h&sign_bit) aux=ominus(aux,y);`	`if (z.h&sign_bit) aux=ominus(aux,y);`
`overflow=(aux.h!=aux.l \|\| (aux.h^(aux.h>>1)^(acc.h&sign_bit)));`	`overflow=(aux.h!=aux.l \|\| (aux.h^(aux.h>>1)^(acc.h&sign_bit)));`
`return acc;`	`return acc;`
`}`	`}`

`@* Division. Long division of an unsigned 128-bit integer by an unsigned`	`@* Division. Long division of an unsigned 128-bit integer by an unsigned`
`64-bit integer is, of course, one of the most challenging routines`	`64-bit integer is, of course, one of the most challenging routines`
`needed for \MMIX\ arithmetic. The following program, based on`	`needed for \MMIX\ arithmetic. The following program, based on`
`Algorithm 4.3.1D of {\sl Seminumerical Algorithms}, computes`	`Algorithm 4.3.1D of {\sl Seminumerical Algorithms}, computes`
`octabytes $q$ and $r$ such that $(2^{64}x+y)=qz+r$ and $0\le r`	`octabytes $q$ and $r$ such that $(2^{64}x+y)=qz+r$ and $0\le r`
`given octabytes $x$, $y$, and~$z$, assuming that $x`	`given octabytes $x$, $y$, and~$z$, assuming that $x`
`(If $x\ge z$, it simply sets $q=x$ and $r=y$.)`	`(If $x\ge z$, it simply sets $q=x$ and $r=y$.)`
`The quotient~$q$ is returned by the subroutine;`	`The quotient~$q$ is returned by the subroutine;`
`the remainder~$r$ is stored in \|aux\|.`	`the remainder~$r$ is stored in \|aux\|.`
`@^multiprecision division@>`	`@^multiprecision division@>`

`@=`	`@=`
`octa odiv @,@,@[ARGS((octa,octa,octa))@];@+@t}\6{@>`	`octa odiv @,@,@[ARGS((octa,octa,octa))@];@+@t}\6{@>`
`octa odiv(x,y,z)`	`octa odiv(x,y,z)`
`octa x,y,z;`	`octa x,y,z;`
`{`	`{`
`register int i,j,k,n,d;`	`register int i,j,k,n,d;`
`tetra u[8],v[4],q[4],mask,qhat,rhat,vh,vmh;`	`tetra u[8],v[4],q[4],mask,qhat,rhat,vh,vmh;`
`register tetra t;`	`register tetra t;`
`octa acc;`	`octa acc;`
`@;`	`@;`
`@;`	`@;`
`@;`	`@;`
`@;`	`@;`
`for (j=3;j>=0;j--) @;`	`for (j=3;j>=0;j--) @;`
`@;`	`@;`
`@;`	`@;`
`return acc;`	`return acc;`
`}`	`}`

`@ @=`	`@ @=`
`if (x.h>z.h \|\| (x.h==z.h && x.l>=z.l)) {`	`if (x.h>z.h \|\| (x.h==z.h && x.l>=z.l)) {`
`aux=y;@+ return x;`	`aux=y;@+ return x;`
`}`	`}`

`@ @=`	`@ @=`
`u[7]=x.h>>16, u[6]=x.h&0xffff, u[5]=x.l>>16, u[4]=x.l&0xffff;`	`u[7]=x.h>>16, u[6]=x.h&0xffff, u[5]=x.l>>16, u[4]=x.l&0xffff;`
`u[3]=y.h>>16, u[2]=y.h&0xffff, u[1]=y.l>>16, u[0]=y.l&0xffff;`	`u[3]=y.h>>16, u[2]=y.h&0xffff, u[1]=y.l>>16, u[0]=y.l&0xffff;`
`v[3]=z.h>>16, v[2]=z.h&0xffff, v[1]=z.l>>16, v[0]=z.l&0xffff;`	`v[3]=z.h>>16, v[2]=z.h&0xffff, v[1]=z.l>>16, v[0]=z.l&0xffff;`

`@ @=`	`@ @=`
`for (n=4;v[n-1]==0;n--);`	`for (n=4;v[n-1]==0;n--);`

`@ We shift \|u\| and \|v\| left by \|d\| places, where \|d\| is chosen to`	`@ We shift \|u\| and \|v\| left by \|d\| places, where \|d\| is chosen to`
`make $2^{15}\le v_{n-1}<2^{16}$.`	`make $2^{15}\le v_{n-1}<2^{16}$.`

`@=`	`@=`
`vh=v[n-1];`	`vh=v[n-1];`
`for (d=0;vh<0x8000;d++,vh<<=1);`	`for (d=0;vh<0x8000;d++,vh<<=1);`
`for (j=k=0; j`	`for (j=k=0; j`
`t=(u[j]<`	`t=(u[j]<`
`u[j]=t&0xffff, k=t>>16;`	`u[j]=t&0xffff, k=t>>16;`
`}`	`}`
`for (j=k=0; j`	`for (j=k=0; j`
`t=(v[j]<`	`t=(v[j]<`
`v[j]=t&0xffff, k=t>>16;`	`v[j]=t&0xffff, k=t>>16;`
`}`	`}`
`vh=v[n-1];`	`vh=v[n-1];`
`vmh=(n>1? v[n-2]: 0);`	`vmh=(n>1? v[n-2]: 0);`

`@ @=`	`@ @=`
`mask=(1<`	`mask=(1<`
`for (j=3; j>=n; j--) u[j]=0;`	`for (j=3; j>=n; j--) u[j]=0;`
`for (k=0;j>=0;j--) {`	`for (k=0;j>=0;j--) {`
`t=(k<<16)+u[j];`	`t=(k<<16)+u[j];`
`u[j]=t>>d, k=t&mask;`	`u[j]=t>>d, k=t&mask;`
`}`	`}`

`@ @=`	`@ @=`
`acc.h=(q[3]<<16)+q[2], acc.l=(q[1]<<16)+q[0];`	`acc.h=(q[3]<<16)+q[2], acc.l=(q[1]<<16)+q[0];`
`aux.h=(u[3]<<16)+u[2], aux.l=(u[1]<<16)+u[0];`	`aux.h=(u[3]<<16)+u[2], aux.l=(u[1]<<16)+u[0];`

`@ @=`	`@ @=`
`{`	`{`
`@;`	`@;`
`@;`	`@;`
`@;`	`@;`
`q[j]=qhat;`	`q[j]=qhat;`
`}`	`}`

`@ @=`	`@ @=`
`t=(u[j+n]<<16)+u[j+n-1];`	`t=(u[j+n]<<16)+u[j+n-1];`
`qhat=t/vh, rhat=t-vh*qhat;`	`qhat=t/vh, rhat=t-vh*qhat;`
`if (n>1) while (qhat==0x10000 \|\| qhat*vmh>(rhat<<16)+u[j+n-2]) {`	`if (n>1) while (qhat==0x10000 \|\| qhat*vmh>(rhat<<16)+u[j+n-2]) {`
`qhat--, rhat+=vh;`	`qhat--, rhat+=vh;`
`if (rhat>=0x10000) break;`	`if (rhat>=0x10000) break;`
`}`	`}`

`@ After this step, \|u[j+n]\| will either equal \|k\| or \|k-1\|. The`	`@ After this step, \|u[j+n]\| will either equal \|k\| or \|k-1\|. The`
`true value of~\|u\| would be obtained by subtracting~\|k\| from \|u[j+n]\|;`	`true value of~\|u\| would be obtained by subtracting~\|k\| from \|u[j+n]\|;`
`but we don't have to fuss over \|u[j+n]\|, because it won't be examined later.`	`but we don't have to fuss over \|u[j+n]\|, because it won't be examined later.`

`@=`	`@=`
`for (i=k=0; i`	`for (i=k=0; i`
`t=u[i+j]+0xffff0000-k-qhat*v[i];`	`t=u[i+j]+0xffff0000-k-qhat*v[i];`
`u[i+j]=t&0xffff, k=0xffff-(t>>16);`	`u[i+j]=t&0xffff, k=0xffff-(t>>16);`
`}`	`}`

`@ The correction here occurs only rarely, but it can be necessary---for`	`@ The correction here occurs only rarely, but it can be necessary---for`
`example, when dividing the number \Hex{7fff800100000000} by \Hex{800080020005}.`	`example, when dividing the number \Hex{7fff800100000000} by \Hex{800080020005}.`

`@=`	`@=`
`if (u[j+n]!=k) {`	`if (u[j+n]!=k) {`
`qhat--;`	`qhat--;`
`for (i=k=0; i`	`for (i=k=0; i`
`t=u[i+j]+v[i]+k;`	`t=u[i+j]+v[i]+k;`
`u[i+j]=t&0xffff, k=t>>16;`	`u[i+j]=t&0xffff, k=t>>16;`
`}`	`}`
`}`	`}`

`@ Signed division can be reduced to unsigned division in a tedious`	`@ Signed division can be reduced to unsigned division in a tedious`
`but straightforward manner. We assume that the divisor isn't zero.`	`but straightforward manner. We assume that the divisor isn't zero.`

`@=`	`@=`
`octa signed_odiv @,@,@[ARGS((octa,octa))@];@+@t}\6{@>`	`octa signed_odiv @,@,@[ARGS((octa,octa))@];@+@t}\6{@>`
`octa signed_odiv(y,z)`	`octa signed_odiv(y,z)`
`octa y,z;`	`octa y,z;`
`{`	`{`
`octa yy,zz,q;`	`octa yy,zz,q;`
`register int sy,sz;`	`register int sy,sz;`
`if (y.h&sign_bit) sy=2, yy=ominus(zero_octa,y);`	`if (y.h&sign_bit) sy=2, yy=ominus(zero_octa,y);`
`else sy=0, yy=y;`	`else sy=0, yy=y;`
`if (z.h&sign_bit) sz=1, zz=ominus(zero_octa,z);`	`if (z.h&sign_bit) sz=1, zz=ominus(zero_octa,z);`
`else sz=0, zz=z;`	`else sz=0, zz=z;`
`q=odiv(zero_octa,yy,zz);`	`q=odiv(zero_octa,yy,zz);`
`overflow=false;`	`overflow=false;`
`switch (sy+sz) {`	`switch (sy+sz) {`
`case 2+1: aux=ominus(zero_octa,aux);`	`case 2+1: aux=ominus(zero_octa,aux);`
`if (q.h==sign_bit) overflow=true;`	`if (q.h==sign_bit) overflow=true;`
`case 0+0: return q;`	`case 0+0: return q;`
`case 2+0:@+ if (aux.h \|\| aux.l) aux=ominus(zz,aux);`	`case 2+0:@+ if (aux.h \|\| aux.l) aux=ominus(zz,aux);`
`goto negate_q;`	`goto negate_q;`
`case 0+1:@+ if (aux.h \|\| aux.l) aux=ominus(aux,zz);`	`case 0+1:@+ if (aux.h \|\| aux.l) aux=ominus(aux,zz);`
`negate_q:@+ if (aux.h \|\| aux.l) return ominus(neg_one,q);`	`negate_q:@+ if (aux.h \|\| aux.l) return ominus(neg_one,q);`
`else return ominus(zero_octa,q);`	`else return ominus(zero_octa,q);`
`}`	`}`
`}`	`}`

`@* Bit fiddling. The bitwise operators of \MMIX\ are fairly easy to`	`@* Bit fiddling. The bitwise operators of \MMIX\ are fairly easy to`
`implement directly, but three of them occur often enough to deserve`	`implement directly, but three of them occur often enough to deserve`
`packaging as subroutines.`	`packaging as subroutines.`

`@=`	`@=`
`octa oand @,@,@[ARGS((octa,octa))@];@+@t}\6{@>`	`octa oand @,@,@[ARGS((octa,octa))@];@+@t}\6{@>`
`octa oand(y,z) /* compute $y\land z$ */`	`octa oand(y,z) /* compute $y\land z$ */`
`octa y,z;`	`octa y,z;`
`{@+ octa x;`	`{@+ octa x;`
`x.h=y.h&z.h;@+ x.l=y.l&z.l;`	`x.h=y.h&z.h;@+ x.l=y.l&z.l;`
`return x;`	`return x;`
`}`	`}`
`@#`	`@#`
`octa oandn @,@,@[ARGS((octa,octa))@];@+@t}\6{@>`	`octa oandn @,@,@[ARGS((octa,octa))@];@+@t}\6{@>`
`octa oandn(y,z) /* compute $y\land\bar z$ */`	`octa oandn(y,z) /* compute $y\land\bar z$ */`
`octa y,z;`	`octa y,z;`
`{@+ octa x;`	`{@+ octa x;`
`x.h=y.h&~z.h;@+ x.l=y.l&~z.l;`	`x.h=y.h&~z.h;@+ x.l=y.l&~z.l;`
`return x;`	`return x;`
`}`	`}`
`@#`	`@#`
`octa oxor @,@,@[ARGS((octa,octa))@];@+@t}\6{@>`	`octa oxor @,@,@[ARGS((octa,octa))@];@+@t}\6{@>`
`octa oxor(y,z) /* compute $y\oplus z$ */`	`octa oxor(y,z) /* compute $y\oplus z$ */`
`octa y,z;`	`octa y,z;`
`{@+ octa x;`	`{@+ octa x;`
`x.h=y.h^z.h;@+ x.l=y.l^z.l;`	`x.h=y.h^z.h;@+ x.l=y.l^z.l;`
`return x;`	`return x;`
`}`	`}`

`@ Here's a fun way to count the number of bits in a tetrabyte.`	`@ Here's a fun way to count the number of bits in a tetrabyte.`
[This classical trick is called the ``Gillies--Miller method	[This classical trick is called the ``Gillies--Miller method
`for sideways addition'' in {\sl The Preparation of Programs`	`for sideways addition'' in {\sl The Preparation of Programs`
`for an Electronic Digital Computer\/} by Wilkes, Wheeler, and`	`for an Electronic Digital Computer\/} by Wilkes, Wheeler, and`
`Gill, second edition (Reading, Mass.:\ Addison--Wesley, 1957),`	`Gill, second edition (Reading, Mass.:\ Addison--Wesley, 1957),`
`191--193. Some of the tricks used here were suggested by`	`191--193. Some of the tricks used here were suggested by`
`Balbir Singh, Peter Rossmanith, and Stefan Schwoon.]`	`Balbir Singh, Peter Rossmanith, and Stefan Schwoon.]`
`@^Gillies, Donald Bruce@>`	`@^Gillies, Donald Bruce@>`
`@^Miller, Jeffrey Charles Percy@>`	`@^Miller, Jeffrey Charles Percy@>`
`@^Wilkes, Maurice Vincent@>`	`@^Wilkes, Maurice Vincent@>`
`@^Wheeler, David John@>`	`@^Wheeler, David John@>`
`@^Gill, Stanley@>`	`@^Gill, Stanley@>`
`@^Singh, Balbir@>`	`@^Singh, Balbir@>`
`@^Rossmanith, Peter@>`	`@^Rossmanith, Peter@>`
`@^Schwoon, Stefan@>`	`@^Schwoon, Stefan@>`

`@=`	`@=`
`int count_bits @,@,@[ARGS((tetra))@];@+@t}\6{@>`	`int count_bits @,@,@[ARGS((tetra))@];@+@t}\6{@>`
`int count_bits(x)`	`int count_bits(x)`
`tetra x;`	`tetra x;`
`{`	`{`
`register int xx=x;`	`register int xx=x;`
`xx=xx-((xx>>1)&0x55555555);`	`xx=xx-((xx>>1)&0x55555555);`
`xx=(xx&0x33333333)+((xx>>2)&0x33333333);`	`xx=(xx&0x33333333)+((xx>>2)&0x33333333);`
`xx=(xx+(xx>>4))&0x0f0f0f0f;`	`xx=(xx+(xx>>4))&0x0f0f0f0f;`
`xx=xx+(xx>>8);`	`xx=xx+(xx>>8);`
`return (xx+(xx>>16)) & 0xff;`	`return (xx+(xx>>16)) & 0xff;`
`}`	`}`

`@ To compute the nonnegative byte differences of two given tetrabytes,`	`@ To compute the nonnegative byte differences of two given tetrabytes,`
`we can carry out the following 20-step branchless computation:`	`we can carry out the following 20-step branchless computation:`

`@=`	`@=`
`tetra byte_diff @,@,@[ARGS((tetra,tetra))@];@+@t}\6{@>`	`tetra byte_diff @,@,@[ARGS((tetra,tetra))@];@+@t}\6{@>`
`tetra byte_diff(y,z)`	`tetra byte_diff(y,z)`
`tetra y,z;`	`tetra y,z;`
`{`	`{`
`register tetra d=(y&0x00ff00ff)+0x01000100-(z&0x00ff00ff);`	`register tetra d=(y&0x00ff00ff)+0x01000100-(z&0x00ff00ff);`
`register tetra m=d&0x01000100;`	`register tetra m=d&0x01000100;`
`register tetra x=d&(m-(m>>8));`	`register tetra x=d&(m-(m>>8));`
`d=((y>>8)&0x00ff00ff)+0x01000100-((z>>8)&0x00ff00ff);`	`d=((y>>8)&0x00ff00ff)+0x01000100-((z>>8)&0x00ff00ff);`
`m=d&0x01000100;`	`m=d&0x01000100;`
`return x+((d&(m-(m>>8)))<<8);`	`return x+((d&(m-(m>>8)))<<8);`
`}`	`}`

`@ To compute the nonnegative wyde differences of two tetrabytes,`	`@ To compute the nonnegative wyde differences of two tetrabytes,`
`another trick leads to a 15-step branchless computation.`	`another trick leads to a 15-step branchless computation.`
`(Research problem: Can \|count_bits\|, \|byte_diff\|, or \|wyde_diff\| be done`	`(Research problem: Can \|count_bits\|, \|byte_diff\|, or \|wyde_diff\| be done`
`with fewer operations?)`	`with fewer operations?)`

`@=`	`@=`
`tetra wyde_diff @,@,@[ARGS((tetra,tetra))@];@+@t}\6{@>`	`tetra wyde_diff @,@,@[ARGS((tetra,tetra))@];@+@t}\6{@>`
`tetra wyde_diff(y,z)`	`tetra wyde_diff(y,z)`
`tetra y,z;`	`tetra y,z;`
`{`	`{`
`register tetra a=((y>>16)-(z>>16))&0x10000;`	`register tetra a=((y>>16)-(z>>16))&0x10000;`
`register tetra b=((y&0xffff)-(z&0xffff))&0x10000;`	`register tetra b=((y&0xffff)-(z&0xffff))&0x10000;`
`return y-(z^((y^z)&(b-a-(b>>16))));`	`return y-(z^((y^z)&(b-a-(b>>16))));`
`}`	`}`

`@ The last bitwise subroutine we need is the most interesting:`	`@ The last bitwise subroutine we need is the most interesting:`
`It implements \MMIX's \.{MOR} and \.{MXOR} operations.`	`It implements \MMIX's \.{MOR} and \.{MXOR} operations.`

`@=`	`@=`
`octa bool_mult @,@,@[ARGS((octa,octa,bool))@];@+@t}\6{@>`	`octa bool_mult @,@,@[ARGS((octa,octa,bool))@];@+@t}\6{@>`
`octa bool_mult(y,z,xor)`	`octa bool_mult(y,z,xor)`
`octa y,z; /* the operands */`	`octa y,z; /* the operands */`
`bool xor; /* do we do xor instead of or? */`	`bool xor; /* do we do xor instead of or? */`
`{`	`{`
`octa o,x;`	`octa o,x;`
`register tetra a,b,c;`	`register tetra a,b,c;`
`register int k;`	`register int k;`
`for (k=0,o=y,x=zero_octa;o.h\|\|o.l;k++,o=shift_right(o,8,1))`	`for (k=0,o=y,x=zero_octa;o.h\|\|o.l;k++,o=shift_right(o,8,1))`
`if (o.l&0xff) {`	`if (o.l&0xff) {`
`a=((z.h>>k)&0x01010101)*0xff;`	`a=((z.h>>k)&0x01010101)*0xff;`
`b=((z.l>>k)&0x01010101)*0xff;`	`b=((z.l>>k)&0x01010101)*0xff;`
`c=(o.l&0xff)*0x01010101;`	`c=(o.l&0xff)*0x01010101;`
`if (xor) x.h^=a&c, x.l^=b&c;`	`if (xor) x.h^=a&c, x.l^=b&c;`
`else x.h\|=a&c, x.l\|=b&c;`	`else x.h\|=a&c, x.l\|=b&c;`
`}`	`}`
`return x;`	`return x;`
`}`	`}`

`@* Floating point packing and unpacking. Standard IEEE floating binary`	`@* Floating point packing and unpacking. Standard IEEE floating binary`
`numbers pack a sign, exponent, and fraction into a tetrabyte`	`numbers pack a sign, exponent, and fraction into a tetrabyte`
`or octabyte. In this section we consider basic subroutines that`	`or octabyte. In this section we consider basic subroutines that`
`convert between IEEE format and the separate unpacked components.`	`convert between IEEE format and the separate unpacked components.`

`@d ROUND_OFF 1`	`@d ROUND_OFF 1`
`@d ROUND_UP 2`	`@d ROUND_UP 2`
`@d ROUND_DOWN 3`	`@d ROUND_DOWN 3`
`@d ROUND_NEAR 4`	`@d ROUND_NEAR 4`

`@=`	`@=`
`int cur_round; /* the current rounding mode */`	`int cur_round; /* the current rounding mode */`

`@ The \|fpack\| routine takes an octabyte $f$, a raw exponent~$e$,`	`@ The \|fpack\| routine takes an octabyte $f$, a raw exponent~$e$,`
`and a sign~\|s\|, and packs them`	`and a sign~\|s\|, and packs them`
`into the floating binary number that corresponds to`	`into the floating binary number that corresponds to`
`$\pm2^{e-1076}f$, using a given rounding mode.`	`$\pm2^{e-1076}f$, using a given rounding mode.`
`The value of $f$ should satisfy $2^{54}\le f\le 2^{55}$.`	`The value of $f$ should satisfy $2^{54}\le f\le 2^{55}$.`

`Thus, for example, the floating binary number $+1.0=\Hex{3ff0000000000000}$`	`Thus, for example, the floating binary number $+1.0=\Hex{3ff0000000000000}$`
`is obtained when $f=2^{54}$, $e=\Hex{3fe}$, and \|s='+'\|.`	`is obtained when $f=2^{54}$, $e=\Hex{3fe}$, and \|s='+'\|.`
`The raw exponent~$e$ is usually one less than`	`The raw exponent~$e$ is usually one less than`
`the final exponent value; the leading bit of~$f$ is essentially added`	`the final exponent value; the leading bit of~$f$ is essentially added`
`to the exponent. (This trick works nicely for subnormal numbers, when`	`to the exponent. (This trick works nicely for subnormal numbers, when`
`$e<0$, or in cases where the value of $f$ is rounded upwards to $2^{55}$.)`	`$e<0$, or in cases where the value of $f$ is rounded upwards to $2^{55}$.)`

`Exceptional events are noted by oring appropriate bits into`	`Exceptional events are noted by oring appropriate bits into`
`the global variable \|exceptions\|. Special considerations apply to`	`the global variable \|exceptions\|. Special considerations apply to`
`underflow, which is not fully specified by Section 7.4 of the IEEE standard:`	`underflow, which is not fully specified by Section 7.4 of the IEEE standard:`
`Implementations of the standard are free to choose between two definitions`	`Implementations of the standard are free to choose between two definitions`
of ``tininess'' and two definitions of ``accuracy loss.''	of ``tininess'' and two definitions of ``accuracy loss.''
`\MMIX\ determines tininess {\it after\/} rounding, hence a result with`	`\MMIX\ determines tininess {\it after\/} rounding, hence a result with`
`$e<0$ is not necessarily tiny; \MMIX\ treats accuracy loss as equivalent`	`$e<0$ is not necessarily tiny; \MMIX\ treats accuracy loss as equivalent`
`to inexactness. Thus, a result underflows if and only if`	`to inexactness. Thus, a result underflows if and only if`
`it is tiny and either (i)~it is inexact or (ii)~the underflow trap is enabled.`	`it is tiny and either (i)~it is inexact or (ii)~the underflow trap is enabled.`
`The \|fpack\| routine sets \|U_BIT\| in \|exceptions\| if and only if the result is`	`The \|fpack\| routine sets \|U_BIT\| in \|exceptions\| if and only if the result is`
`tiny, \|X_BIT\| if and only if the result is inexact.`	`tiny, \|X_BIT\| if and only if the result is inexact.`
`@^underflow@>`	`@^underflow@>`

`@d X_BIT (1<<8) /* floating inexact */`	`@d X_BIT (1<<8) /* floating inexact */`
`@d Z_BIT (1<<9) /* floating division by zero */`	`@d Z_BIT (1<<9) /* floating division by zero */`
`@d U_BIT (1<<10) /* floating underflow */`	`@d U_BIT (1<<10) /* floating underflow */`
`@d O_BIT (1<<11) /* floating overflow */`	`@d O_BIT (1<<11) /* floating overflow */`
`@d I_BIT (1<<12) /* floating invalid operation */`	`@d I_BIT (1<<12) /* floating invalid operation */`
`@d W_BIT (1<<13) /* float-to-fix overflow */`	`@d W_BIT (1<<13) /* float-to-fix overflow */`
`@d V_BIT (1<<14) /* integer overflow */`	`@d V_BIT (1<<14) /* integer overflow */`
`@d D_BIT (1<<15) /* integer divide check */`	`@d D_BIT (1<<15) /* integer divide check */`

% This file is part of the MMIXware package (c) Donald E Knuth 1999

% This file is part of the MMIXware package (c) Donald E Knuth 1999

@i boilerplate.w %<< legal stuff: PLEASE READ IT BEFORE MAKING ANY CHANGES!

@i boilerplate.w %<< legal stuff: PLEASE READ IT BEFORE MAKING ANY CHANGES!

\def\title{MMIX-ARITH}

\def\title{MMIX-ARITH}

\def\MMIX{\.{MMIX}}

\def\MMIX{\.{MMIX}}

\def\MMIXAL{\.{MMIXAL}}

\def\MMIXAL{\.{MMIXAL}}

\def\Hex#1{\hbox{$^{\scriptscriptstyle\#}$\tt#1}} % experimental hex constant

\def\Hex#1{\hbox{$^{\scriptscriptstyle\#}$\tt#1}} % experimental hex constant

\def\dts{\mathinner{\ldotp\ldotp}}

\def\dts{\mathinner{\ldotp\ldotp}}

\def\<#1>{\hbox{$\langle\,$#1$\,\rangle$}}\let\is=\longrightarrow

\def\<#1>{\hbox{$\langle\,$#1$\,\rangle$}}\let\is=\longrightarrow

\def\ff{\\{ff\kern-.05em}}

\def\ff{\\{ff\kern-.05em}}

@s ff TeX

@s ff TeX

@s bool normal @q unreserve a C++ keyword @>

@s bool normal @q unreserve a C++ keyword @>

@s xor normal @q unreserve a C++ keyword @>

@s xor normal @q unreserve a C++ keyword @>

@* Introduction. The subroutines below are used to simulate 64-bit \MMIX\

@* Introduction. The subroutines below are used to simulate 64-bit \MMIX\

arithmetic on an old-fashioned 32-bit computer---like the one the author

arithmetic on an old-fashioned 32-bit computer---like the one the author

had when he wrote \MMIXAL\ and the first \MMIX\ simulators in 1998 and 1999.

had when he wrote \MMIXAL\ and the first \MMIX\ simulators in 1998 and 1999.

All operations are fabricated from 32-bit arithmetic, including

All operations are fabricated from 32-bit arithmetic, including

a full implementation of the IEEE floating point standard,

a full implementation of the IEEE floating point standard,

assuming only that the \CEE/ compiler has a 32-bit unsigned integer type.

assuming only that the \CEE/ compiler has a 32-bit unsigned integer type.

Some day 64-bit machines will be commonplace and the awkward manipulations of

Some day 64-bit machines will be commonplace and the awkward manipulations of

the present program will look quite archaic. Interested readers who have such

the present program will look quite archaic. Interested readers who have such

computers will be able to convert the code to a pure 64-bit form without

computers will be able to convert the code to a pure 64-bit form without

difficulty, thereby obtaining much faster and simpler routines. Meanwhile,

difficulty, thereby obtaining much faster and simpler routines. Meanwhile,

however, we can simulate the future and hope for continued progress.

however, we can simulate the future and hope for continued progress.

This program module has a simple structure, intended to make it

This program module has a simple structure, intended to make it

suitable for loading with \MMIX\ simulators and assemblers.

suitable for loading with \MMIX\ simulators and assemblers.

@c

@c

#include

#include

#include

#include

#include

#include

@@;

@@;

typedef enum{@+false,true@+} bool;

typedef enum{@+false,true@+} bool;

@@;

@@;

@@;

@@;

@@;

@@;

@ Subroutines of this program are declared first with a prototype,

@ Subroutines of this program are declared first with a prototype,

as in {\mc ANSI C}, then with an old-style \CEE/ function definition.

as in {\mc ANSI C}, then with an old-style \CEE/ function definition.

Here are some preprocessor commands that make this work correctly with both

Here are some preprocessor commands that make this work correctly with both

new-style and old-style compilers.

new-style and old-style compilers.

@^prototypes for functions@>

@^prototypes for functions@>

@=

@=

#ifdef __STDC__

#ifdef __STDC__

#define ARGS(list) list

#define ARGS(list) list

#else

#else

#define ARGS(list) ()

#define ARGS(list) ()

#endif

#endif

@ The definition of type \&{tetra} should be changed, if necessary, so that

@ The definition of type \&{tetra} should be changed, if necessary, so that

it represents an unsigned 32-bit integer.

it represents an unsigned 32-bit integer.

@^system dependencies@>

@^system dependencies@>

@=

@=

typedef unsigned int tetra;

typedef unsigned int tetra;

 /* for systems conforming to the LP-64 data model */

 /* for systems conforming to the LP-64 data model */

typedef struct { tetra h,l;} octa; /* two tetrabytes make one octabyte */

typedef struct { tetra h,l;} octa; /* two tetrabytes make one octabyte */

@ @d sign_bit ((unsigned)0x80000000)

@ @d sign_bit ((unsigned)0x80000000)

@=

@=

octa zero_octa; /* |zero_octa.h=zero_octa.l=0| */

octa zero_octa; /* |zero_octa.h=zero_octa.l=0| */

octa neg_one={-1,-1}; /* |neg_one.h=neg_one.l=-1| */

octa neg_one={-1,-1}; /* |neg_one.h=neg_one.l=-1| */

octa inf_octa={0x7ff00000,0}; /* floating point $+\infty$ */

octa inf_octa={0x7ff00000,0}; /* floating point $+\infty$ */

octa standard_NaN={0x7ff80000,0}; /* floating point NaN(.5) */

octa standard_NaN={0x7ff80000,0}; /* floating point NaN(.5) */

octa aux; /* auxiliary output of a subroutine */

octa aux; /* auxiliary output of a subroutine */

bool overflow; /* set by certain subroutines for signed arithmetic */

bool overflow; /* set by certain subroutines for signed arithmetic */

@ It's easy to add and subtract octabytes, if we aren't terribly

@ It's easy to add and subtract octabytes, if we aren't terribly

worried about speed.

worried about speed.

@=

@=

octa oplus @,@,@[ARGS((octa,octa))@];@+@t}\6{@>

octa oplus @,@,@[ARGS((octa,octa))@];@+@t}\6{@>

octa oplus(y,z) /* compute $y+z$ */

octa oplus(y,z) /* compute $y+z$ */

  octa y,z;

  octa y,z;

{@+ octa x;

{@+ octa x;

  x.h=y.h+z.h;@+

  x.h=y.h+z.h;@+

  x.l=y.l+z.l;

  x.l=y.l+z.l;

  if (x.l

  if (x.l

  return x;

  return x;

@#

@#

octa ominus @,@,@[ARGS((octa,octa))@];@+@t}\6{@>

octa ominus @,@,@[ARGS((octa,octa))@];@+@t}\6{@>

octa ominus(y,z) /* compute $y-z$ */

octa ominus(y,z) /* compute $y-z$ */

  octa y,z;

  octa y,z;

{@+ octa x;

{@+ octa x;

  x.h=y.h-z.h;@+

  x.h=y.h-z.h;@+

  x.l=y.l-z.l;

  x.l=y.l-z.l;

  if (x.l>y.l) x.h--;

  if (x.l>y.l) x.h--;

  return x;

  return x;

@ In the following subroutine, |delta| is a signed quantity that is

@ In the following subroutine, |delta| is a signed quantity that is

assumed to fit in a signed tetrabyte.

assumed to fit in a signed tetrabyte.

@=

@=

octa incr @,@,@[ARGS((octa,int))@];@+@t}\6{@>

octa incr @,@,@[ARGS((octa,int))@];@+@t}\6{@>

octa incr(y,delta) /* compute $y+\delta$ */

octa incr(y,delta) /* compute $y+\delta$ */

  octa y;

  octa y;

  int delta;

  int delta;

{@+ octa x;

{@+ octa x;

  x.h=y.h;@+ x.l=y.l+delta;

  x.h=y.h;@+ x.l=y.l+delta;

  if (delta>=0) {

  if (delta>=0) {

    if (x.l

    if (x.l

  }@+else if (x.l>y.l) x.h--;

  }@+else if (x.l>y.l) x.h--;

  return x;

  return x;

@ Left and right shifts are only a bit more difficult.

@ Left and right shifts are only a bit more difficult.

@=

@=

octa shift_left @,@,@[ARGS((octa,int))@];@+@t}\6{@>

octa shift_left @,@,@[ARGS((octa,int))@];@+@t}\6{@>

octa shift_left(y,s) /* shift left by $s$ bits, where $0\le s\le64$ */

octa shift_left(y,s) /* shift left by $s$ bits, where $0\le s\le64$ */

  octa y;

  octa y;

  int s;

  int s;

  while (s>=32) y.h=y.l,y.l=0,s-=32;

  while (s>=32) y.h=y.l,y.l=0,s-=32;

  if (s) {@+register tetra yhl=y.h<>(32-s);

  if (s) {@+register tetra yhl=y.h<>(32-s);

    y.h=yhl+ylh;@+ y.l<<=s;

    y.h=yhl+ylh;@+ y.l<<=s;

  return y;

  return y;

@#

@#

octa shift_right @,@,@[ARGS((octa,int,int))@];@+@t}\6{@>

octa shift_right @,@,@[ARGS((octa,int,int))@];@+@t}\6{@>

octa shift_right(y,s,u) /* shift right, arithmetically if $u=0$ */

octa shift_right(y,s,u) /* shift right, arithmetically if $u=0$ */

  octa y;

  octa y;

  int s,u;

  int s,u;

  while (s>=32) y.l=y.h, y.h=(u?0: -(y.h>>31)), s-=32;

  while (s>=32) y.l=y.h, y.h=(u?0: -(y.h>>31)), s-=32;

  if (s) {@+register tetra yhl=y.h<<(32-s),ylh=y.l>>s;

  if (s) {@+register tetra yhl=y.h<<(32-s),ylh=y.l>>s;

    y.h=(u? 0:(-(y.h>>31))<<(32-s))+(y.h>>s);@+ y.l=yhl+ylh;

    y.h=(u? 0:(-(y.h>>31))<<(32-s))+(y.h>>s);@+ y.l=yhl+ylh;

  return y;

  return y;

@* Multiplication. We need to multiply two unsigned 64-bit integers, obtaining

@* Multiplication. We need to multiply two unsigned 64-bit integers, obtaining

an unsigned 128-bit product. It is easy to do this on a 32-bit machine

an unsigned 128-bit product. It is easy to do this on a 32-bit machine

by using Algorithm 4.3.1M of {\sl Seminumerical Algorithms}, with $b=2^{16}$.

by using Algorithm 4.3.1M of {\sl Seminumerical Algorithms}, with $b=2^{16}$.

@^multiprecision multiplication@>

@^multiprecision multiplication@>

The following subroutine returns the lower half of the product, and

The following subroutine returns the lower half of the product, and

puts the upper half into a global octabyte called |aux|.

puts the upper half into a global octabyte called |aux|.

@=

@=

octa omult @,@,@[ARGS((octa,octa))@];@+@t}\6{@>

octa omult @,@,@[ARGS((octa,octa))@];@+@t}\6{@>

octa omult(y,z)

octa omult(y,z)

  octa y,z;

  octa y,z;

  register int i,j,k;

  register int i,j,k;

  tetra u[4],v[4],w[8];

  tetra u[4],v[4],w[8];

  register tetra t;

  register tetra t;

  octa acc;

  octa acc;

@;

@;

  for (j=0;j<4;j++) w[j]=0;

  for (j=0;j<4;j++) w[j]=0;

  for (j=0;j<4;j++)

  for (j=0;j<4;j++)

    if (!v[j]) w[j+4]=0;

    if (!v[j]) w[j+4]=0;

    else {

    else {

      for (i=k=0;i<4;i++) {

      for (i=k=0;i<4;i++) {

        t=u[i]*v[j]+w[i+j]+k;

        t=u[i]*v[j]+w[i+j]+k;

        w[i+j]=t&0xffff, k=t>>16;

        w[i+j]=t&0xffff, k=t>>16;

      w[j+4]=k;

      w[j+4]=k;

@;

@;

  return acc;

  return acc;

@ @=

@ @=

extern octa aux; /* secondary output of subroutines with multiple outputs */

extern octa aux; /* secondary output of subroutines with multiple outputs */

extern bool overflow;

extern bool overflow;

@ @=

@ @=

u[3]=y.h>>16, u[2]=y.h&0xffff, u[1]= y.l>>16, u[0]=y.l&0xffff;

u[3]=y.h>>16, u[2]=y.h&0xffff, u[1]= y.l>>16, u[0]=y.l&0xffff;

v[3]=z.h>>16, v[2]=z.h&0xffff, v[1]= z.l>>16, v[0]=z.l&0xffff;

v[3]=z.h>>16, v[2]=z.h&0xffff, v[1]= z.l>>16, v[0]=z.l&0xffff;

@ @=

@ @=

aux.h=(w[7]<<16)+w[6], aux.l=(w[5]<<16)+w[4];

aux.h=(w[7]<<16)+w[6], aux.l=(w[5]<<16)+w[4];

acc.h=(w[3]<<16)+w[2], acc.l=(w[1]<<16)+w[0];

acc.h=(w[3]<<16)+w[2], acc.l=(w[1]<<16)+w[0];

@ Signed multiplication has the same lower half product as unsigned

@ Signed multiplication has the same lower half product as unsigned

multiplication. The signed upper half product is obtained with at most two

multiplication. The signed upper half product is obtained with at most two

further subtractions, after which the result has overflowed if and only if

further subtractions, after which the result has overflowed if and only if

the upper half is unequal to 64 copies of the sign bit in the lower half.

the upper half is unequal to 64 copies of the sign bit in the lower half.

@=

@=

octa signed_omult @,@,@[ARGS((octa,octa))@];@+@t}\6{@>

octa signed_omult @,@,@[ARGS((octa,octa))@];@+@t}\6{@>

octa signed_omult(y,z)

octa signed_omult(y,z)

  octa y,z;

  octa y,z;

  octa acc;

  octa acc;

  acc=omult(y,z);

  acc=omult(y,z);

  if (y.h&sign_bit) aux=ominus(aux,z);

  if (y.h&sign_bit) aux=ominus(aux,z);

  if (z.h&sign_bit) aux=ominus(aux,y);

  if (z.h&sign_bit) aux=ominus(aux,y);

  overflow=(aux.h!=aux.l || (aux.h^(aux.h>>1)^(acc.h&sign_bit)));

  overflow=(aux.h!=aux.l || (aux.h^(aux.h>>1)^(acc.h&sign_bit)));

  return acc;

  return acc;

@* Division. Long division of an unsigned 128-bit integer by an unsigned

@* Division. Long division of an unsigned 128-bit integer by an unsigned

64-bit integer is, of course, one of the most challenging routines

64-bit integer is, of course, one of the most challenging routines

needed for \MMIX\ arithmetic. The following program, based on

needed for \MMIX\ arithmetic. The following program, based on

Algorithm 4.3.1D of {\sl Seminumerical Algorithms}, computes

Algorithm 4.3.1D of {\sl Seminumerical Algorithms}, computes

octabytes $q$ and $r$ such that $(2^{64}x+y)=qz+r$ and $0\le r

octabytes $q$ and $r$ such that $(2^{64}x+y)=qz+r$ and $0\le r

given octabytes $x$, $y$, and~$z$, assuming that $x

given octabytes $x$, $y$, and~$z$, assuming that $x

(If $x\ge z$, it simply sets $q=x$ and $r=y$.)

(If $x\ge z$, it simply sets $q=x$ and $r=y$.)

The quotient~$q$ is returned by the subroutine;

The quotient~$q$ is returned by the subroutine;

the remainder~$r$ is stored in |aux|.

the remainder~$r$ is stored in |aux|.

@^multiprecision division@>

@^multiprecision division@>

@=

@=

octa odiv @,@,@[ARGS((octa,octa,octa))@];@+@t}\6{@>

octa odiv @,@,@[ARGS((octa,octa,octa))@];@+@t}\6{@>

octa odiv(x,y,z)

octa odiv(x,y,z)

  octa x,y,z;

  octa x,y,z;

  register int i,j,k,n,d;

  register int i,j,k,n,d;

  tetra u[8],v[4],q[4],mask,qhat,rhat,vh,vmh;

  tetra u[8],v[4],q[4],mask,qhat,rhat,vh,vmh;

  register tetra t;

  register tetra t;

  octa acc;

  octa acc;

@;

@;

@;

@;

@;

@;

@;

@;

  for (j=3;j>=0;j--) @;

  for (j=3;j>=0;j--) @;

@;

@;

@;

@;

  return acc;

  return acc;

@ @=

@ @=

if (x.h>z.h || (x.h==z.h && x.l>=z.l)) {

if (x.h>z.h || (x.h==z.h && x.l>=z.l)) {

  aux=y;@+ return x;

  aux=y;@+ return x;

@ @=

@ @=

u[7]=x.h>>16, u[6]=x.h&0xffff, u[5]=x.l>>16, u[4]=x.l&0xffff;

u[7]=x.h>>16, u[6]=x.h&0xffff, u[5]=x.l>>16, u[4]=x.l&0xffff;

u[3]=y.h>>16, u[2]=y.h&0xffff, u[1]=y.l>>16, u[0]=y.l&0xffff;

u[3]=y.h>>16, u[2]=y.h&0xffff, u[1]=y.l>>16, u[0]=y.l&0xffff;

v[3]=z.h>>16, v[2]=z.h&0xffff, v[1]=z.l>>16, v[0]=z.l&0xffff;

v[3]=z.h>>16, v[2]=z.h&0xffff, v[1]=z.l>>16, v[0]=z.l&0xffff;

@ @=

@ @=

for (n=4;v[n-1]==0;n--);

for (n=4;v[n-1]==0;n--);

@ We shift |u| and |v| left by |d| places, where |d| is chosen to

@ We shift |u| and |v| left by |d| places, where |d| is chosen to

make $2^{15}\le v_{n-1}<2^{16}$.

make $2^{15}\le v_{n-1}<2^{16}$.

@=

@=

vh=v[n-1];

vh=v[n-1];

for (d=0;vh<0x8000;d++,vh<<=1);

for (d=0;vh<0x8000;d++,vh<<=1);

for (j=k=0; j

for (j=k=0; j

  t=(u[j]<

  t=(u[j]<

  u[j]=t&0xffff, k=t>>16;

  u[j]=t&0xffff, k=t>>16;

for (j=k=0; j

for (j=k=0; j

  t=(v[j]<

  t=(v[j]<

  v[j]=t&0xffff, k=t>>16;

  v[j]=t&0xffff, k=t>>16;

vh=v[n-1];

vh=v[n-1];

vmh=(n>1? v[n-2]: 0);

vmh=(n>1? v[n-2]: 0);

@ @=

@ @=

mask=(1<

mask=(1<

for (j=3; j>=n; j--) u[j]=0;

for (j=3; j>=n; j--) u[j]=0;

for (k=0;j>=0;j--) {

for (k=0;j>=0;j--) {

  t=(k<<16)+u[j];

  t=(k<<16)+u[j];

  u[j]=t>>d, k=t&mask;

  u[j]=t>>d, k=t&mask;

@ @=

@ @=

acc.h=(q[3]<<16)+q[2], acc.l=(q[1]<<16)+q[0];

acc.h=(q[3]<<16)+q[2], acc.l=(q[1]<<16)+q[0];

aux.h=(u[3]<<16)+u[2], aux.l=(u[1]<<16)+u[0];

aux.h=(u[3]<<16)+u[2], aux.l=(u[1]<<16)+u[0];

@ @=

@ @=

@;

@;

@;

@;

@;

@;

  q[j]=qhat;

  q[j]=qhat;

@ @=

@ @=

t=(u[j+n]<<16)+u[j+n-1];

t=(u[j+n]<<16)+u[j+n-1];

qhat=t/vh, rhat=t-vh*qhat;

qhat=t/vh, rhat=t-vh*qhat;

if (n>1) while (qhat==0x10000 || qhat*vmh>(rhat<<16)+u[j+n-2]) {

if (n>1) while (qhat==0x10000 || qhat*vmh>(rhat<<16)+u[j+n-2]) {

  qhat--, rhat+=vh;

  qhat--, rhat+=vh;

  if (rhat>=0x10000) break;

  if (rhat>=0x10000) break;

@ After this step, |u[j+n]| will either equal |k| or |k-1|. The

@ After this step, |u[j+n]| will either equal |k| or |k-1|. The

true value of~|u| would be obtained by subtracting~|k| from |u[j+n]|;

true value of~|u| would be obtained by subtracting~|k| from |u[j+n]|;

but we don't have to fuss over |u[j+n]|, because it won't be examined later.

but we don't have to fuss over |u[j+n]|, because it won't be examined later.

@=

@=

for (i=k=0; i

for (i=k=0; i

  t=u[i+j]+0xffff0000-k-qhat*v[i];

  t=u[i+j]+0xffff0000-k-qhat*v[i];

  u[i+j]=t&0xffff, k=0xffff-(t>>16);

  u[i+j]=t&0xffff, k=0xffff-(t>>16);

@ The correction here occurs only rarely, but it can be necessary---for

@ The correction here occurs only rarely, but it can be necessary---for

example, when dividing the number \Hex{7fff800100000000} by \Hex{800080020005}.

example, when dividing the number \Hex{7fff800100000000} by \Hex{800080020005}.

@=

@=

if (u[j+n]!=k) {

if (u[j+n]!=k) {

  qhat--;

  qhat--;

  for (i=k=0; i

  for (i=k=0; i

    t=u[i+j]+v[i]+k;

    t=u[i+j]+v[i]+k;

    u[i+j]=t&0xffff, k=t>>16;

    u[i+j]=t&0xffff, k=t>>16;

@ Signed division can be reduced to unsigned division in a tedious

@ Signed division can be reduced to unsigned division in a tedious

but straightforward manner. We assume that the divisor isn't zero.

but straightforward manner. We assume that the divisor isn't zero.

@=

@=

octa signed_odiv @,@,@[ARGS((octa,octa))@];@+@t}\6{@>

octa signed_odiv @,@,@[ARGS((octa,octa))@];@+@t}\6{@>

octa signed_odiv(y,z)

octa signed_odiv(y,z)

  octa y,z;

  octa y,z;

  octa yy,zz,q;

  octa yy,zz,q;

  register int sy,sz;

  register int sy,sz;

  if (y.h&sign_bit) sy=2, yy=ominus(zero_octa,y);

  if (y.h&sign_bit) sy=2, yy=ominus(zero_octa,y);

  else sy=0, yy=y;

  else sy=0, yy=y;

  if (z.h&sign_bit) sz=1, zz=ominus(zero_octa,z);

  if (z.h&sign_bit) sz=1, zz=ominus(zero_octa,z);

  else sz=0, zz=z;

  else sz=0, zz=z;

  q=odiv(zero_octa,yy,zz);

  q=odiv(zero_octa,yy,zz);

  overflow=false;

  overflow=false;

  switch (sy+sz) {

  switch (sy+sz) {

 case 2+1: aux=ominus(zero_octa,aux);

 case 2+1: aux=ominus(zero_octa,aux);

  if (q.h==sign_bit) overflow=true;

  if (q.h==sign_bit) overflow=true;

 case 0+0: return q;

 case 0+0: return q;

 case 2+0:@+ if (aux.h || aux.l) aux=ominus(zz,aux);

 case 2+0:@+ if (aux.h || aux.l) aux=ominus(zz,aux);

  goto negate_q;

  goto negate_q;

 case 0+1:@+ if (aux.h || aux.l) aux=ominus(aux,zz);

 case 0+1:@+ if (aux.h || aux.l) aux=ominus(aux,zz);

 negate_q:@+ if (aux.h || aux.l) return ominus(neg_one,q);

 negate_q:@+ if (aux.h || aux.l) return ominus(neg_one,q);

  else return ominus(zero_octa,q);

  else return ominus(zero_octa,q);

@* Bit fiddling. The bitwise operators of \MMIX\ are fairly easy to

@* Bit fiddling. The bitwise operators of \MMIX\ are fairly easy to

implement directly, but three of them occur often enough to deserve

implement directly, but three of them occur often enough to deserve

packaging as subroutines.

packaging as subroutines.

@=

@=

octa oand @,@,@[ARGS((octa,octa))@];@+@t}\6{@>

octa oand @,@,@[ARGS((octa,octa))@];@+@t}\6{@>

octa oand(y,z) /* compute $y\land z$ */

octa oand(y,z) /* compute $y\land z$ */

  octa y,z;

  octa y,z;

{@+ octa x;

{@+ octa x;

  x.h=y.h&z.h;@+ x.l=y.l&z.l;

  x.h=y.h&z.h;@+ x.l=y.l&z.l;

  return x;

  return x;

@#

@#

octa oandn @,@,@[ARGS((octa,octa))@];@+@t}\6{@>

octa oandn @,@,@[ARGS((octa,octa))@];@+@t}\6{@>

octa oandn(y,z) /* compute $y\land\bar z$ */

octa oandn(y,z) /* compute $y\land\bar z$ */

  octa y,z;

  octa y,z;

{@+ octa x;

{@+ octa x;

  x.h=y.h&~z.h;@+ x.l=y.l&~z.l;

  x.h=y.h&~z.h;@+ x.l=y.l&~z.l;

  return x;

  return x;

@#

@#

octa oxor @,@,@[ARGS((octa,octa))@];@+@t}\6{@>

octa oxor @,@,@[ARGS((octa,octa))@];@+@t}\6{@>

octa oxor(y,z) /* compute $y\oplus z$ */

octa oxor(y,z) /* compute $y\oplus z$ */

  octa y,z;

  octa y,z;

{@+ octa x;

{@+ octa x;

  x.h=y.h^z.h;@+ x.l=y.l^z.l;

  x.h=y.h^z.h;@+ x.l=y.l^z.l;

  return x;

  return x;

@ Here's a fun way to count the number of bits in a tetrabyte.

@ Here's a fun way to count the number of bits in a tetrabyte.

[This classical trick is called the ``Gillies--Miller method

[This classical trick is called the ``Gillies--Miller method

for sideways addition'' in {\sl The Preparation of Programs

for sideways addition'' in {\sl The Preparation of Programs

for an Electronic Digital Computer\/} by Wilkes, Wheeler, and

for an Electronic Digital Computer\/} by Wilkes, Wheeler, and

Gill, second edition (Reading, Mass.:\ Addison--Wesley, 1957),

Gill, second edition (Reading, Mass.:\ Addison--Wesley, 1957),

191--193. Some of the tricks used here were suggested by

191--193. Some of the tricks used here were suggested by

Balbir Singh, Peter Rossmanith, and Stefan Schwoon.]

Balbir Singh, Peter Rossmanith, and Stefan Schwoon.]

@^Gillies, Donald Bruce@>

@^Gillies, Donald Bruce@>

@^Miller, Jeffrey Charles Percy@>

@^Miller, Jeffrey Charles Percy@>

@^Wilkes, Maurice Vincent@>

@^Wilkes, Maurice Vincent@>

@^Wheeler, David John@>

@^Wheeler, David John@>

@^Gill, Stanley@>

@^Gill, Stanley@>

@^Singh, Balbir@>

@^Singh, Balbir@>

@^Rossmanith, Peter@>

@^Rossmanith, Peter@>

@^Schwoon, Stefan@>

@^Schwoon, Stefan@>

@=

@=

int count_bits @,@,@[ARGS((tetra))@];@+@t}\6{@>

int count_bits @,@,@[ARGS((tetra))@];@+@t}\6{@>

int count_bits(x)

int count_bits(x)

  tetra x;

  tetra x;

  register int xx=x;

  register int xx=x;

  xx=xx-((xx>>1)&0x55555555);

  xx=xx-((xx>>1)&0x55555555);

  xx=(xx&0x33333333)+((xx>>2)&0x33333333);

  xx=(xx&0x33333333)+((xx>>2)&0x33333333);

  xx=(xx+(xx>>4))&0x0f0f0f0f;

  xx=(xx+(xx>>4))&0x0f0f0f0f;

  xx=xx+(xx>>8);

  xx=xx+(xx>>8);

  return (xx+(xx>>16)) & 0xff;

  return (xx+(xx>>16)) & 0xff;

@ To compute the nonnegative byte differences of two given tetrabytes,

@ To compute the nonnegative byte differences of two given tetrabytes,

we can carry out the following 20-step branchless computation:

we can carry out the following 20-step branchless computation:

@=

@=

tetra byte_diff @,@,@[ARGS((tetra,tetra))@];@+@t}\6{@>

tetra byte_diff @,@,@[ARGS((tetra,tetra))@];@+@t}\6{@>

tetra byte_diff(y,z)

tetra byte_diff(y,z)

  tetra y,z;

  tetra y,z;

  register tetra d=(y&0x00ff00ff)+0x01000100-(z&0x00ff00ff);

  register tetra d=(y&0x00ff00ff)+0x01000100-(z&0x00ff00ff);

  register tetra m=d&0x01000100;

  register tetra m=d&0x01000100;

  register tetra x=d&(m-(m>>8));

  register tetra x=d&(m-(m>>8));

  d=((y>>8)&0x00ff00ff)+0x01000100-((z>>8)&0x00ff00ff);

  d=((y>>8)&0x00ff00ff)+0x01000100-((z>>8)&0x00ff00ff);

  m=d&0x01000100;

  m=d&0x01000100;

  return x+((d&(m-(m>>8)))<<8);

  return x+((d&(m-(m>>8)))<<8);

@ To compute the nonnegative wyde differences of two tetrabytes,

@ To compute the nonnegative wyde differences of two tetrabytes,

another trick leads to a 15-step branchless computation.

another trick leads to a 15-step branchless computation.

(Research problem: Can |count_bits|, |byte_diff|, or |wyde_diff| be done

(Research problem: Can |count_bits|, |byte_diff|, or |wyde_diff| be done

with fewer operations?)

with fewer operations?)

@=

@=

tetra wyde_diff @,@,@[ARGS((tetra,tetra))@];@+@t}\6{@>

tetra wyde_diff @,@,@[ARGS((tetra,tetra))@];@+@t}\6{@>

tetra wyde_diff(y,z)

tetra wyde_diff(y,z)

  tetra y,z;

  tetra y,z;

  register tetra a=((y>>16)-(z>>16))&0x10000;

  register tetra a=((y>>16)-(z>>16))&0x10000;

  register tetra b=((y&0xffff)-(z&0xffff))&0x10000;

  register tetra b=((y&0xffff)-(z&0xffff))&0x10000;

  return y-(z^((y^z)&(b-a-(b>>16))));

  return y-(z^((y^z)&(b-a-(b>>16))));

@ The last bitwise subroutine we need is the most interesting:

@ The last bitwise subroutine we need is the most interesting:

It implements \MMIX's \.{MOR} and \.{MXOR} operations.

It implements \MMIX's \.{MOR} and \.{MXOR} operations.

@=

@=

octa bool_mult @,@,@[ARGS((octa,octa,bool))@];@+@t}\6{@>

octa bool_mult @,@,@[ARGS((octa,octa,bool))@];@+@t}\6{@>

octa bool_mult(y,z,xor)

octa bool_mult(y,z,xor)

  octa y,z; /* the operands */

  octa y,z; /* the operands */

  bool xor; /* do we do xor instead of or? */

  bool xor; /* do we do xor instead of or? */

  octa o,x;

  octa o,x;

  register tetra a,b,c;

  register tetra a,b,c;

  register int k;

  register int k;

  for (k=0,o=y,x=zero_octa;o.h||o.l;k++,o=shift_right(o,8,1))

  for (k=0,o=y,x=zero_octa;o.h||o.l;k++,o=shift_right(o,8,1))

    if (o.l&0xff) {

    if (o.l&0xff) {

      a=((z.h>>k)&0x01010101)*0xff;

      a=((z.h>>k)&0x01010101)*0xff;

      b=((z.l>>k)&0x01010101)*0xff;

      b=((z.l>>k)&0x01010101)*0xff;

      c=(o.l&0xff)*0x01010101;

      c=(o.l&0xff)*0x01010101;

      if (xor) x.h^=a&c, x.l^=b&c;

      if (xor) x.h^=a&c, x.l^=b&c;

      else x.h|=a&c, x.l|=b&c;

      else x.h|=a&c, x.l|=b&c;

  return x;

  return x;

@* Floating point packing and unpacking. Standard IEEE floating binary

@* Floating point packing and unpacking. Standard IEEE floating binary

numbers pack a sign, exponent, and fraction into a tetrabyte

numbers pack a sign, exponent, and fraction into a tetrabyte

or octabyte. In this section we consider basic subroutines that

or octabyte. In this section we consider basic subroutines that

convert between IEEE format and the separate unpacked components.

convert between IEEE format and the separate unpacked components.

@d ROUND_OFF 1

@d ROUND_OFF 1

@d ROUND_UP 2

@d ROUND_UP 2

@d ROUND_DOWN 3

@d ROUND_DOWN 3

@d ROUND_NEAR 4

@d ROUND_NEAR 4

@=

@=

int cur_round; /* the current rounding mode */

int cur_round; /* the current rounding mode */

@ The |fpack| routine takes an octabyte $f$, a raw exponent~$e$,

@ The |fpack| routine takes an octabyte $f$, a raw exponent~$e$,

and a sign~|s|, and packs them

and a sign~|s|, and packs them

into the floating binary number that corresponds to

into the floating binary number that corresponds to

$\pm2^{e-1076}f$, using a given rounding mode.

$\pm2^{e-1076}f$, using a given rounding mode.

The value of $f$ should satisfy $2^{54}\le f\le 2^{55}$.

The value of $f$ should satisfy $2^{54}\le f\le 2^{55}$.

Thus, for example, the floating binary number $+1.0=\Hex{3ff0000000000000}$

Thus, for example, the floating binary number $+1.0=\Hex{3ff0000000000000}$

is obtained when $f=2^{54}$, $e=\Hex{3fe}$, and |s='+'|.

is obtained when $f=2^{54}$, $e=\Hex{3fe}$, and |s='+'|.

The raw exponent~$e$ is usually one less than

The raw exponent~$e$ is usually one less than

the final exponent value; the leading bit of~$f$ is essentially added

the final exponent value; the leading bit of~$f$ is essentially added

to the exponent. (This trick works nicely for subnormal numbers, when

to the exponent. (This trick works nicely for subnormal numbers, when

$e<0$, or in cases where the value of $f$ is rounded upwards to $2^{55}$.)

$e<0$, or in cases where the value of $f$ is rounded upwards to $2^{55}$.)

Exceptional events are noted by oring appropriate bits into

Exceptional events are noted by oring appropriate bits into

the global variable |exceptions|. Special considerations apply to

the global variable |exceptions|. Special considerations apply to

underflow, which is not fully specified by Section 7.4 of the IEEE standard:

underflow, which is not fully specified by Section 7.4 of the IEEE standard:

Implementations of the standard are free to choose between two definitions

Implementations of the standard are free to choose between two definitions

of ``tininess'' and two definitions of ``accuracy loss.''

of ``tininess'' and two definitions of ``accuracy loss.''

\MMIX\ determines tininess {\it after\/} rounding, hence a result with

\MMIX\ determines tininess {\it after\/} rounding, hence a result with

$e<0$ is not necessarily tiny; \MMIX\ treats accuracy loss as equivalent

$e<0$ is not necessarily tiny; \MMIX\ treats accuracy loss as equivalent

to inexactness. Thus, a result underflows if and only if

to inexactness. Thus, a result underflows if and only if

it is tiny and either (i)~it is inexact or (ii)~the underflow trap is enabled.

it is tiny and either (i)~it is inexact or (ii)~the underflow trap is enabled.

The |fpack| routine sets |U_BIT| in |exceptions| if and only if the result is

The |fpack| routine sets |U_BIT| in |exceptions| if and only if the result is

tiny, |X_BIT| if and only if the result is inexact.

tiny, |X_BIT| if and only if the result is inexact.

@^underflow@>

@^underflow@>

@d X_BIT (1<<8) /* floating inexact */

@d X_BIT (1<<8) /* floating inexact */

@d Z_BIT (1<<9) /* floating division by zero */

@d Z_BIT (1<<9) /* floating division by zero */

@d U_BIT (1<<10) /* floating underflow */

@d U_BIT (1<<10) /* floating underflow */

@d O_BIT (1<<11) /* floating overflow */

@d O_BIT (1<<11) /* floating overflow */

@d I_BIT (1<<12) /* floating invalid operation */

@d I_BIT (1<<12) /* floating invalid operation */

@d W_BIT (1<<13) /* float-to-fix overflow */

@d W_BIT (1<<13) /* float-to-fix overflow */

@d V_BIT (1<<14) /* integer overflow */

@d V_BIT (1<<14) /* integer overflow */

@d D_BIT (1<<15) /* integer divide check */

@d D_BIT (1<<15) /* integer divide check */

@d E_BIT (1<<18) /* external (dynamic) trap bit */

@d E_BIT (1<<18) /* external (dynamic) trap bit */

@=

@=

octa fpack @,@,@[ARGS((octa,int,char,int))@];@+@t}\6{@>

octa fpack @,@,@[ARGS((octa,int,char,int))@];@+@t}\6{@>

octa fpack(f,e,s,r)

octa fpack(f,e,s,r)

  octa f; /* the normalized fraction part */

  octa f; /* the normalized fraction part */

  int e; /* the raw exponent */

  int e; /* the raw exponent */

  char s; /* the sign */

  char s; /* the sign */

  int r; /* the rounding mode */

  int r; /* the rounding mode */

  octa o;

  octa o;

  if (e>0x7fd) e=0x7ff, o=zero_octa;

  if (e>0x7fd) e=0x7ff, o=zero_octa;

  else {

  else {

    if (e<0) {

    if (e<0) {

      if (e<-54) o.h=0, o.l=1;

      if (e<-54) o.h=0, o.l=1;

      else {@+octa oo;

      else {@+octa oo;

        o=shift_right(f,-e,1);

        o=shift_right(f,-e,1);

        oo=shift_left(o,-e);

        oo=shift_left(o,-e);

        if (oo.l!=f.l || oo.h!=f.h) o.l |= 1; /* sticky bit */

        if (oo.l!=f.l || oo.h!=f.h) o.l |= 1; /* sticky bit */

@^sticky bit@>

@^sticky bit@>

      e=0;

      e=0;

    }@+else o=f;

    }@+else o=f;

@;

@;

@ @=

@ @=

int exceptions; /* bits possibly destined for rA */

int exceptions; /* bits possibly destined for rA */

@ Everything falls together so nicely here, it's almost too good to be true!

@ Everything falls together so nicely here, it's almost too good to be true!

@=

@=

if (o.l&3) exceptions |= X_BIT;

if (o.l&3) exceptions |= X_BIT;

switch (r) {

switch (r) {

 case ROUND_DOWN:@+ if (s=='-') o=incr(o,3);@+break;

 case ROUND_DOWN:@+ if (s=='-') o=incr(o,3);@+break;

 case ROUND_UP:@+ if (s!='-') o=incr(o,3);

 case ROUND_UP:@+ if (s!='-') o=incr(o,3);

 case ROUND_OFF: break;

 case ROUND_OFF: break;

 case ROUND_NEAR: o=incr(o, o.l&4? 2: 1);@+break;

 case ROUND_NEAR: o=incr(o, o.l&4? 2: 1);@+break;

o = shift_right(o,2,1);

o = shift_right(o,2,1);

o.h += e<<20;

o.h += e<<20;

if (o.h>=0x7ff00000) exceptions |= O_BIT+X_BIT; /* overflow */

if (o.h>=0x7ff00000) exceptions |= O_BIT+X_BIT; /* overflow */

else if (o.h<0x100000) exceptions |= U_BIT; /* tininess */

else if (o.h<0x100000) exceptions |= U_BIT; /* tininess */

if (s=='-') o.h |= sign_bit;

if (s=='-') o.h |= sign_bit;

return o;

return o;

@ Similarly, |sfpack| packs a short float, from inputs

@ Similarly, |sfpack| packs a short float, from inputs

having the same conventions as |fpack|.

having the same conventions as |fpack|.

@=

@=

tetra sfpack @,@,@[ARGS((octa,int,char,int))@];@+@t}\6{@>

tetra sfpack @,@,@[ARGS((octa,int,char,int))@];@+@t}\6{@>

tetra sfpack(f,e,s,r)

tetra sfpack(f,e,s,r)

  octa f; /* the fraction part */

  octa f; /* the fraction part */

  int e; /* the raw exponent */

  int e; /* the raw exponent */

  char s; /* the sign */

  char s; /* the sign */

  int r; /* the rounding mode */

  int r; /* the rounding mode */

  register tetra o;

  register tetra o;

  if (e>0x47d) e=0x47f, o=0;

  if (e>0x47d) e=0x47f, o=0;

  else {

  else {

    o=shift_left(f,3).h;

    o=shift_left(f,3).h;

    if (f.l&0x1fffffff) o|=1;

    if (f.l&0x1fffffff) o|=1;

    if (e<0x380) {

    if (e<0x380) {

      if (e<0x380-25) o=1;

      if (e<0x380-25) o=1;

      else {@+register tetra o0,oo;

      else {@+register tetra o0,oo;

        o0 = o;

        o0 = o;

        o = o>>(0x380-e);

        o = o>>(0x380-e);

        oo = o<<(0x380-e);

        oo = o<<(0x380-e);

        if (oo!=o0) o |= 1; /* sticky bit */

        if (oo!=o0) o |= 1; /* sticky bit */

@^sticky bit@>

@^sticky bit@>

      e=0x380;

      e=0x380;

@;

@;

@ @=

@ @=

if (o&3) exceptions |= X_BIT;

if (o&3) exceptions |= X_BIT;

switch (r) {

switch (r) {

 case ROUND_DOWN:@+ if (s=='-') o+=3;@+break;

 case ROUND_DOWN:@+ if (s=='-') o+=3;@+break;

 case ROUND_UP:@+ if (s!='-') o+=3;

 case ROUND_UP:@+ if (s!='-') o+=3;

 case ROUND_OFF: break;

 case ROUND_OFF: break;

 case ROUND_NEAR: o+=(o&4? 2: 1);@+break;

 case ROUND_NEAR: o+=(o&4? 2: 1);@+break;

o = o>>2;

o = o>>2;

o += (e-0x380)<<23;

o += (e-0x380)<<23;

if (o>=0x7f800000) exceptions |= O_BIT+X_BIT; /* overflow */

if (o>=0x7f800000) exceptions |= O_BIT+X_BIT; /* overflow */

else if (o<0x100000) exceptions |= U_BIT; /* tininess */

else if (o<0x100000) exceptions |= U_BIT; /* tininess */

if (s=='-') o |= sign_bit;

if (s=='-') o |= sign_bit;

return o;

return o;

@ The |funpack| routine is, roughly speaking, the opposite of |fpack|.

@ The |funpack| routine is, roughly speaking, the opposite of |fpack|.

It takes a given floating point number~$x$ and separates out its

It takes a given floating point number~$x$ and separates out its

fraction part~$f$, exponent~$e$, and sign~$s$. It clears |exceptions|

fraction part~$f$, exponent~$e$, and sign~$s$. It clears |exceptions|

to zero. It returns the type of value found: |zro|, |num|, |inf|,

to zero. It returns the type of value found: |zro|, |num|, |inf|,

or |nan|. When it returns |num|,

or |nan|. When it returns |num|,

it will have set $f$, $e$, and~$s$

it will have set $f$, $e$, and~$s$

to the values from which |fpack| would produce the original number~$x$

to the values from which |fpack| would produce the original number~$x$

without exceptions.

without exceptions.

@d zero_exponent (-1000) /* zero is assumed to have this exponent */

@d zero_exponent (-1000) /* zero is assumed to have this exponent */

@=

@=

typedef enum {@!zro,@!num,@!inf,@!nan}@+ftype;

typedef enum {@!zro,@!num,@!inf,@!nan}@+ftype;

@ @=

@ @=

ftype funpack @,@,@[ARGS((octa,octa*,int*,char*))@];@+@t}\6{@>

ftype funpack @,@,@[ARGS((octa,octa*,int*,char*))@];@+@t}\6{@>

ftype funpack(x,f,e,s)

ftype funpack(x,f,e,s)

  octa x; /* the given floating point value */

  octa x; /* the given floating point value */

  octa *f; /* address where the fraction part should be stored */

  octa *f; /* address where the fraction part should be stored */

  int *e; /* address where the exponent part should be stored */

  int *e; /* address where the exponent part should be stored */

  char *s; /* address where the sign should be stored */

  char *s; /* address where the sign should be stored */

  register int ee;

  register int ee;

  exceptions=0;

  exceptions=0;

  *s=(x.h&sign_bit? '-': '+');

  *s=(x.h&sign_bit? '-': '+');

  *f=shift_left(x,2);

  *f=shift_left(x,2);

  f->h &= 0x3fffff;

  f->h &= 0x3fffff;

  ee=(x.h>>20)&0x7ff;

  ee=(x.h>>20)&0x7ff;

  if (ee) {

  if (ee) {

    *e=ee-1;

    *e=ee-1;

    f->h |= 0x400000;

    f->h |= 0x400000;

    return (ee<0x7ff? num: f->h==0x400000 && !f->l? inf: nan);

    return (ee<0x7ff? num: f->h==0x400000 && !f->l? inf: nan);

  if (!x.l && !f->h) {

  if (!x.l && !f->h) {

    *e=zero_exponent;@+ return zro;

    *e=zero_exponent;@+ return zro;

  do {@+ ee--;@+ *f=shift_left(*f,1);@+} while (!(f->h&0x400000));

  do {@+ ee--;@+ *f=shift_left(*f,1);@+} while (!(f->h&0x400000));

  *e=ee;@+ return num;

  *e=ee;@+ return num;

@ @=

@ @=

ftype sfunpack @,@,@[ARGS((tetra,octa*,int*,char*))@];@+@t}\6{@>

ftype sfunpack @,@,@[ARGS((tetra,octa*,int*,char*))@];@+@t}\6{@>

ftype sfunpack(x,f,e,s)

ftype sfunpack(x,f,e,s)

  tetra x; /* the given floating point value */

  tetra x; /* the given floating point value */

  octa *f; /* address where the fraction part should be stored */

  octa *f; /* address where the fraction part should be stored */

  int *e; /* address where the exponent part should be stored */

  int *e; /* address where the exponent part should be stored */

  char *s; /* address where the sign should be stored */

  char *s; /* address where the sign should be stored */

  register int ee;

  register int ee;

  exceptions=0;

  exceptions=0;

  *s=(x&sign_bit? '-': '+');

  *s=(x&sign_bit? '-': '+');

  f->h=(x>>1)&0x3fffff, f->l=x<<31;

  f->h=(x>>1)&0x3fffff, f->l=x<<31;

  ee=(x>>23)&0xff;

  ee=(x>>23)&0xff;

  if (ee) {

  if (ee) {

    *e=ee+0x380-1;

    *e=ee+0x380-1;

    f->h |= 0x400000;

    f->h |= 0x400000;

    return (ee<0xff? num: (x&0x7fffffff)==0x7f800000? inf: nan);

    return (ee<0xff? num: (x&0x7fffffff)==0x7f800000? inf: nan);

  if (!(x&0x7fffffff)) {

  if (!(x&0x7fffffff)) {

    *e=zero_exponent;@+return zro;

    *e=zero_exponent;@+return zro;

  do {@+ ee--;@+ *f=shift_left(*f,1);@+} while (!(f->h&0x400000));

  do {@+ ee--;@+ *f=shift_left(*f,1);@+} while (!(f->h&0x400000));

  *e=ee+0x380;@+ return num;

  *e=ee+0x380;@+ return num;

@ Since \MMIX\ downplays 32-bit operations, it uses |sfpack| and |sfunpack|

@ Since \MMIX\ downplays 32-bit operations, it uses |sfpack| and |sfunpack|

only when loading and storing short floats, or when converting

only when loading and storing short floats, or when converting

from fixed point to floating point.

from fixed point to floating point.

@=

@=

octa load_sf @,@,@[ARGS((tetra))@];@+@t}\6{@>

octa load_sf @,@,@[ARGS((tetra))@];@+@t}\6{@>

octa load_sf(z)

octa load_sf(z)

  tetra z; /* 32 bits to be loaded into a 64-bit register */

  tetra z; /* 32 bits to be loaded into a 64-bit register */

  octa f,x;@+int e;@+char s;@+ftype t;

  octa f,x;@+int e;@+char s;@+ftype t;

  t=sfunpack(z,&f,&e,&s);

  t=sfunpack(z,&f,&e,&s);

  switch (t) {

  switch (t) {

 case zro: x=zero_octa;@+break;

 case zro: x=zero_octa;@+break;

 case num: return fpack(f,e,s,ROUND_OFF);

 case num: return fpack(f,e,s,ROUND_OFF);

 case inf: x=inf_octa;@+break;

 case inf: x=inf_octa;@+break;

 case nan: x=shift_right(f,2,1);@+x.h|=0x7ff00000;@+break;

 case nan: x=shift_right(f,2,1);@+x.h|=0x7ff00000;@+break;

  if (s=='-') x.h|=sign_bit;

  if (s=='-') x.h|=sign_bit;

  return x;

  return x;

@ @=

@ @=

tetra store_sf @,@,@[ARGS((octa))@];@+@t}\6{@>

tetra store_sf @,@,@[ARGS((octa))@];@+@t}\6{@>

tetra store_sf(x)

tetra store_sf(x)

  octa x; /* 64 bits to be loaded into a 32-bit word */

  octa x; /* 64 bits to be loaded into a 32-bit word */

  octa f;@+tetra z;@+int e;@+char s;@+ftype t;

  octa f;@+tetra z;@+int e;@+char s;@+ftype t;

  t=funpack(x,&f,&e,&s);

  t=funpack(x,&f,&e,&s);

  switch (t) {

  switch (t) {

 case zro: z=0;@+break;

 case zro: z=0;@+break;

 case num: return sfpack(f,e,s,cur_round);

 case num: return sfpack(f,e,s,cur_round);

 case inf: z=0x7f800000;@+break;

 case inf: z=0x7f800000;@+break;

 case nan:@+ if (!(f.h&0x200000)) {

 case nan:@+ if (!(f.h&0x200000)) {

      f.h|=0x200000;@+exceptions|=I_BIT; /* NaN was signaling */

      f.h|=0x200000;@+exceptions|=I_BIT; /* NaN was signaling */

    z=0x7f800000|(f.h<<1)|(f.l>>31);@+break;

    z=0x7f800000|(f.h<<1)|(f.l>>31);@+break;

  if (s=='-') z|=sign_bit;

  if (s=='-') z|=sign_bit;

  return z;

  return z;

@* Floating multiplication and division.

@* Floating multiplication and division.

The hardest fixed point operations were multiplication and division;

The hardest fixed point operations were multiplication and division;

but these two operations are the {\it easiest\/} to implement in floating point

but these two operations are the {\it easiest\/} to implement in floating point

arithmetic, once their fixed point counterparts are available.

arithmetic, once their fixed point counterparts are available.

@=

@=

octa fmult @,@,@[ARGS((octa,octa))@];@+@t}\6{@>

octa fmult @,@,@[ARGS((octa,octa))@];@+@t}\6{@>

octa fmult(y,z)

octa fmult(y,z)

  octa y,z;

  octa y,z;

  ftype yt,zt;

  ftype yt,zt;

  int ye,ze;

  int ye,ze;

  char ys,zs;

  char ys,zs;

  octa x,xf,yf,zf;

  octa x,xf,yf,zf;

  register int xe;

  register int xe;

  register char xs;

  register char xs;

  yt=funpack(y,&yf,&ye,&ys);

  yt=funpack(y,&yf,&ye,&ys);

  zt=funpack(z,&zf,&ze,&zs);

  zt=funpack(z,&zf,&ze,&zs);

  xs=ys+zs-'+'; /* will be |'-'| when the result is negative */

  xs=ys+zs-'+'; /* will be |'-'| when the result is negative */

  switch (4*yt+zt) {

  switch (4*yt+zt) {

 @t\4@>@;

 @t\4@>@;

 case 4*zro+zro: case 4*zro+num: case 4*num+zro: x=zero_octa;@+break;

 case 4*zro+zro: case 4*zro+num: case 4*num+zro: x=zero_octa;@+break;

 case 4*num+inf: case 4*inf+num: case 4*inf+inf: x=inf_octa;@+break;

 case 4*num+inf: case 4*inf+num: case 4*inf+inf: x=inf_octa;@+break;

 case 4*zro+inf: case 4*inf+zro: x=standard_NaN;

 case 4*zro+inf: case 4*inf+zro: x=standard_NaN;

  exceptions|=I_BIT;@+break;

  exceptions|=I_BIT;@+break;

 case 4*num+num: @;

 case 4*num+num: @;

  if (xs=='-') x.h|=sign_bit;

  if (xs=='-') x.h|=sign_bit;

  return x;

  return x;

@ @=

@ @=

case 4*nan+nan:@+if (!(y.h&0x80000)) exceptions|=I_BIT; /* |y| is signaling */

case 4*nan+nan:@+if (!(y.h&0x80000)) exceptions|=I_BIT; /* |y| is signaling */

case 4*zro+nan: case 4*num+nan: case 4*inf+nan:

case 4*zro+nan: case 4*num+nan: case 4*inf+nan:

  if (!(z.h&0x80000)) exceptions|=I_BIT, z.h|=0x80000;

  if (!(z.h&0x80000)) exceptions|=I_BIT, z.h|=0x80000;

  return z;

  return z;

case 4*nan+zro: case 4*nan+num: case 4*nan+inf:

case 4*nan+zro: case 4*nan+num: case 4*nan+inf:

  if (!(y.h&0x80000)) exceptions|=I_BIT, y.h|=0x80000;

  if (!(y.h&0x80000)) exceptions|=I_BIT, y.h|=0x80000;

  return y;

  return y;

@ @=

@ @=

xe=ye+ze-0x3fd; /* the raw exponent */

xe=ye+ze-0x3fd; /* the raw exponent */

x=omult(yf,shift_left(zf,9));

x=omult(yf,shift_left(zf,9));

if (aux.h>=0x400000) xf=aux;

if (aux.h>=0x400000) xf=aux;

else xf=shift_left(aux,1), xe--;

else xf=shift_left(aux,1), xe--;

if (x.h||x.l) xf.l|=1; /* adjust the sticky bit */

if (x.h||x.l) xf.l|=1; /* adjust the sticky bit */

return fpack(xf,xe,xs,cur_round);

return fpack(xf,xe,xs,cur_round);

@ @=

@ @=

octa fdivide @,@,@[ARGS((octa,octa))@];@+@t}\6{@>

octa fdivide @,@,@[ARGS((octa,octa))@];@+@t}\6{@>

octa fdivide(y,z)

octa fdivide(y,z)

  octa y,z;

  octa y,z;

  ftype yt,zt;

  ftype yt,zt;

  int ye,ze;

  int ye,ze;

  char ys,zs;

  char ys,zs;

  octa x,xf,yf,zf;

  octa x,xf,yf,zf;

  register int xe;

  register int xe;

  register char xs;

  register char xs;

  yt=funpack(y,&yf,&ye,&ys);

  yt=funpack(y,&yf,&ye,&ys);

  zt=funpack(z,&zf,&ze,&zs);

  zt=funpack(z,&zf,&ze,&zs);

  xs=ys+zs-'+'; /* will be |'-'| when the result is negative */

  xs=ys+zs-'+'; /* will be |'-'| when the result is negative */

  switch (4*yt+zt) {

  switch (4*yt+zt) {

 @t\4@>@;

 @t\4@>@;

 case 4*zro+inf: case 4*zro+num: case 4*num+inf: x=zero_octa;@+break;

 case 4*zro+inf: case 4*zro+num: case 4*num+inf: x=zero_octa;@+break;

 case 4*num+zro: exceptions|=Z_BIT;

 case 4*num+zro: exceptions|=Z_BIT;

 case 4*inf+num: case 4*inf+zro: x=inf_octa;@+break;

 case 4*inf+num: case 4*inf+zro: x=inf_octa;@+break;

 case 4*zro+zro: case 4*inf+inf: x=standard_NaN;

 case 4*zro+zro: case 4*inf+inf: x=standard_NaN;

  exceptions|=I_BIT;@+break;

  exceptions|=I_BIT;@+break;

 case 4*num+num: @;

 case 4*num+num: @;

  if (xs=='-') x.h|=sign_bit;

  if (xs=='-') x.h|=sign_bit;

  return x;

  return x;

@ @=

@ @=

xe=ye-ze+0x3fd; /* the raw exponent */

xe=ye-ze+0x3fd; /* the raw exponent */

xf=odiv(yf,zero_octa,shift_left(zf,9));

xf=odiv(yf,zero_octa,shift_left(zf,9));

if (xf.h>=0x800000) {

if (xf.h>=0x800000) {

  aux.l|=xf.l&1;

  aux.l|=xf.l&1;

  xf=shift_right(xf,1,1);

  xf=shift_right(xf,1,1);

  xe++;

  xe++;

if (aux.h||aux.l) xf.l|=1; /* adjust the sticky bit */

if (aux.h||aux.l) xf.l|=1; /* adjust the sticky bit */

return fpack(xf,xe,xs,cur_round);

return fpack(xf,xe,xs,cur_round);

@*Floating addition and subtraction. Now for the bread-and-butter

@*Floating addition and subtraction. Now for the bread-and-butter

operation, the sum of two floating point numbers.

operation, the sum of two floating point numbers.

It is not terribly difficult, but many cases need to be handled carefully.

It is not terribly difficult, but many cases need to be handled carefully.

@=

@=

octa fplus @,@,@[ARGS((octa,octa))@];@+@t}\6{@>

octa fplus @,@,@[ARGS((octa,octa))@];@+@t}\6{@>

octa fplus(y,z)

octa fplus(y,z)

  octa y,z;

  octa y,z;

  ftype yt,zt;

  ftype yt,zt;

  int ye,ze;

  int ye,ze;

  char ys,zs;

  char ys,zs;

  octa x,xf,yf,zf;

  octa x,xf,yf,zf;

  register int xe,d;

  register int xe,d;

  register char xs;

  register char xs;

  yt=funpack(y,&yf,&ye,&ys);

  yt=funpack(y,&yf,&ye,&ys);

  zt=funpack(z,&zf,&ze,&zs);

  zt=funpack(z,&zf,&ze,&zs);

  switch (4*yt+zt) {

  switch (4*yt+zt) {

 @t\4@>@;

 @t\4@>@;

 case 4*zro+num: return fpack(zf,ze,zs,ROUND_OFF);@+break; /* may underflow */

 case 4*zro+num: return fpack(zf,ze,zs,ROUND_OFF);@+break; /* may underflow */

 case 4*num+zro: return fpack(yf,ye,ys,ROUND_OFF);@+break; /* may underflow */

 case 4*num+zro: return fpack(yf,ye,ys,ROUND_OFF);@+break; /* may underflow */

 case 4*inf+inf:@+if (ys!=zs) {

 case 4*inf+inf:@+if (ys!=zs) {

    exceptions|=I_BIT;@+x=standard_NaN;@+xs=zs;@+break;

    exceptions|=I_BIT;@+x=standard_NaN;@+xs=zs;@+break;

 case 4*num+inf: case 4*zro+inf: x=inf_octa;@+xs=zs;@+break;

 case 4*num+inf: case 4*zro+inf: x=inf_octa;@+xs=zs;@+break;

 case 4*inf+num: case 4*inf+zro: x=inf_octa;@+xs=ys;@+break;

 case 4*inf+num: case 4*inf+zro: x=inf_octa;@+xs=ys;@+break;

 case 4*num+num:@+ if (y.h!=(z.h^0x80000000) || y.l!=z.l)

 case 4*num+num:@+ if (y.h!=(z.h^0x80000000) || y.l!=z.l)

@;

@;

 case 4*zro+zro: x=zero_octa;

 case 4*zro+zro: x=zero_octa;

  xs=(ys==zs? ys: cur_round==ROUND_DOWN? '-': '+');@+break;

  xs=(ys==zs? ys: cur_round==ROUND_DOWN? '-': '+');@+break;

  if (xs=='-') x.h|=sign_bit;

  if (xs=='-') x.h|=sign_bit;

  return x;

  return x;

@ @=

@ @=

{@+octa o,oo;

{@+octa o,oo;

  if (ye

  if (ye

@;

@;

  d=ye-ze;

  d=ye-ze;

  xs=ys, xe=ye;

  xs=ys, xe=ye;

  if (d) @;

  if (d) @;

  if (ys==zs) {

  if (ys==zs) {

    xf=oplus(yf,zf);

    xf=oplus(yf,zf);

    if (xf.h>=0x800000) xe++, d=xf.l&1, xf=shift_right(xf,1,1), xf.l|=d;

    if (xf.h>=0x800000) xe++, d=xf.l&1, xf=shift_right(xf,1,1), xf.l|=d;

  }@+else {

  }@+else {

    xf=ominus(yf,zf);

    xf=ominus(yf,zf);

    if (xf.h>=0x800000) xe++, d=xf.l&1, xf=shift_right(xf,1,1), xf.l|=d;

    if (xf.h>=0x800000) xe++, d=xf.l&1, xf=shift_right(xf,1,1), xf.l|=d;

    else@+ while (xf.h<0x400000) xe--, xf=shift_left(xf,1);

    else@+ while (xf.h<0x400000) xe--, xf=shift_left(xf,1);

  return fpack(xf,xe,xs,cur_round);

  return fpack(xf,xe,xs,cur_round);

@ @=

@ @=

  o=yf, yf=zf, zf=o;

  o=yf, yf=zf, zf=o;

  d=ye, ye=ze, ze=d;

  d=ye, ye=ze, ze=d;

  d=ys, ys=zs, zs=d;

  d=ys, ys=zs, zs=d;

@ Proper rounding requires two bits to the right of the fraction delivered

@ Proper rounding requires two bits to the right of the fraction delivered

to~|fpack|. The first is the true next bit of the result;

to~|fpack|. The first is the true next bit of the result;

the other is a ``sticky'' bit, which is nonzero if any further bits of the

the other is a ``sticky'' bit, which is nonzero if any further bits of the

true result are nonzero. Sticky rounding to an integer takes

true result are nonzero. Sticky rounding to an integer takes

$x$ into the number $\lfloor x/2\rfloor+\lceil x/2\rceil$.

$x$ into the number $\lfloor x/2\rfloor+\lceil x/2\rceil$.

@^sticky bit@>

@^sticky bit@>

Some subtleties need to be observed here, in order to

Some subtleties need to be observed here, in order to

prevent the sticky bit from being shifted left. If we did not

prevent the sticky bit from being shifted left. If we did not

shift |yf| left~1 before shifting |zf| to the right, an incorrect

shift |yf| left~1 before shifting |zf| to the right, an incorrect

answer would be obtained in certain cases---for example, if

answer would be obtained in certain cases---for example, if

$|yf|=2^{54}$, $|zf|=2^{54}+2^{53}-1$, $d=52$.

$|yf|=2^{54}$, $|zf|=2^{54}+2^{53}-1$, $d=52$.

@=

@=

  if (d<=2) zf=shift_right(zf,d,1); /* exact result */

  if (d<=2) zf=shift_right(zf,d,1); /* exact result */

  else if (d>53) zf.h=0, zf.l=1; /* tricky but OK */

  else if (d>53) zf.h=0, zf.l=1; /* tricky but OK */

  else {

  else {

    if (ys!=zs) d--,xe--,yf=shift_left(yf,1);

    if (ys!=zs) d--,xe--,yf=shift_left(yf,1);

    o=zf;

    o=zf;

    zf=shift_right(o,d,1);

    zf=shift_right(o,d,1);

    oo=shift_left(zf,d);

    oo=shift_left(zf,d);

    if (oo.l!=o.l || oo.h!=o.h) zf.l|=1;

    if (oo.l!=o.l || oo.h!=o.h) zf.l|=1;

@ The comparison of floating point numbers with respect to $\epsilon$

@ The comparison of floating point numbers with respect to $\epsilon$

shares some of the characteristics of floating point addition/subtraction.

shares some of the characteristics of floating point addition/subtraction.

In some ways it is simpler, and in other ways it is more difficult;

In some ways it is simpler, and in other ways it is more difficult;

we might as well deal with it now. % anyways

we might as well deal with it now. % anyways

Subroutine |fepscomp(y,z,e,s)| returns 2 if |y|, |z|, or |e| is a NaN

Subroutine |fepscomp(y,z,e,s)| returns 2 if |y|, |z|, or |e| is a NaN

or |e| is negative. It returns 1 if |s=0| and $y\approx z\ (e)$ or if

or |e| is negative. It returns 1 if |s=0| and $y\approx z\ (e)$ or if

|s!=0| and $y\sim z\ (e)$,

|s!=0| and $y\sim z\ (e)$,

as defined in Section~4.2.2 of {\sl Seminumerical Algorithms\/};

as defined in Section~4.2.2 of {\sl Seminumerical Algorithms\/};

otherwise it returns~0.

otherwise it returns~0.

@=

@=

int fepscomp @,@,@[ARGS((octa,octa,octa,int))@];@+@t}\6{@>

int fepscomp @,@,@[ARGS((octa,octa,octa,int))@];@+@t}\6{@>

int fepscomp(y,z,e,s)

int fepscomp(y,z,e,s)

  octa y,z,e; /* the operands */

  octa y,z,e; /* the operands */

  int s; /* test similarity? */

  int s; /* test similarity? */

  octa yf,zf,ef,o,oo;

  octa yf,zf,ef,o,oo;

  int ye,ze,ee;

  int ye,ze,ee;

  char ys,zs,es;

  char ys,zs,es;

  register int yt,zt,et,d;

  register int yt,zt,et,d;

  et=funpack(e,&ef,&ee,&es);

  et=funpack(e,&ef,&ee,&es);

  if (es=='-') return 2;

  if (es=='-') return 2;

  switch (et) {

  switch (et) {

 case nan: return 2;

 case nan: return 2;

 case inf: ee=10000;

 case inf: ee=10000;

 case num: case zro: break;

 case num: case zro: break;

  yt=funpack(y,&yf,&ye,&ys);

  yt=funpack(y,&yf,&ye,&ys);

  zt=funpack(z,&zf,&ze,&zs);

  zt=funpack(z,&zf,&ze,&zs);

  switch (4*yt+zt) {

  switch (4*yt+zt) {

 case 4*nan+nan: case 4*nan+inf: case 4*nan+num: case 4*nan+zro:

 case 4*nan+nan: case 4*nan+inf: case 4*nan+num: case 4*nan+zro:

 case 4*inf+nan: case 4*num+nan: case 4*zro+nan: return 2;

 case 4*inf+nan: case 4*num+nan: case 4*zro+nan: return 2;

 case 4*inf+inf: return (ys==zs || ee>=1023);

 case 4*inf+inf: return (ys==zs || ee>=1023);

 case 4*inf+num: case 4*inf+zro: case 4*num+inf: case 4*zro+inf:

 case 4*inf+num: case 4*inf+zro: case 4*num+inf: case 4*zro+inf:

   return (s && ee>=1022);

   return (s && ee>=1022);

 case 4*zro+zro: return 1;

 case 4*zro+zro: return 1;

 case 4*zro+num: case 4*num+zro:@+ if (!s) return 0;

 case 4*zro+num: case 4*num+zro:@+ if (!s) return 0;

 case 4*num+num: break;

 case 4*num+num: break;

@;

@;

@ The relation $y\approx z\ (\epsilon)$ reduces to

@ The relation $y\approx z\ (\epsilon)$ reduces to

$y\sim z\ (\epsilon/2^d)$, if $d$~is the difference between the

$y\sim z\ (\epsilon/2^d)$, if $d$~is the difference between the

larger and smaller exponents of $y$ and~$z$.

larger and smaller exponents of $y$ and~$z$.

@=

@=

@;

@;

if (ye

if (ye

@;

@;

if (ze==zero_exponent) ze=ye;

if (ze==zero_exponent) ze=ye;

d=ye-ze;

d=ye-ze;

if (!s) ee-=d;

if (!s) ee-=d;

if (ee>=1023) return 1; /* if $\epsilon\ge2$, $z\in N_\epsilon(y)$ */

if (ee>=1023) return 1; /* if $\epsilon\ge2$, $z\in N_\epsilon(y)$ */

@;

@;

if (!o.h && !o.l) return 1;

if (!o.h && !o.l) return 1;

if (ee<968) return 0; /* if $y\ne z$ and $\epsilon<2^{-54}$, $y\not\sim z$ */

if (ee<968) return 0; /* if $y\ne z$ and $\epsilon<2^{-54}$, $y\not\sim z$ */

if (ee>=1021) ef=shift_left(ef,ee-1021);

if (ee>=1021) ef=shift_left(ef,ee-1021);

else ef=shift_right(ef,1021-ee,1);

else ef=shift_right(ef,1021-ee,1);

return o.h

return o.h

@ @=

@ @=

if (ye<0 && yt!=zro) yf=shift_left(y,2), ye=0;

if (ye<0 && yt!=zro) yf=shift_left(y,2), ye=0;

if (ze<0 && zt!=zro) zf=shift_left(z,2), ze=0;

if (ze<0 && zt!=zro) zf=shift_left(z,2), ze=0;

@ At this point $y\sim z$ if and only if

@ At this point $y\sim z$ if and only if

$$|yf|+(-1)^{[ys=zs]}|zf|/2^d\le 2^{ee-1021}|ef|=2^{55}\epsilon.$$

$$|yf|+(-1)^{[ys=zs]}|zf|/2^d\le 2^{ee-1021}|ef|=2^{55}\epsilon.$$

We need to evaluate this relation without overstepping the bounds of

We need to evaluate this relation without overstepping the bounds of

our simulated 64-bit registers.

our simulated 64-bit registers.

When $d>2$, the difference of fraction parts might not fit exactly

When $d>2$, the difference of fraction parts might not fit exactly

in an octabyte;

in an octabyte;

in that case the numbers are not similar unless $\epsilon>3/8$,

in that case the numbers are not similar unless $\epsilon>3/8$,

and we replace the difference by the ceiling of the

and we replace the difference by the ceiling of the

true result. When $\epsilon<1/8$, our program essentially replaces

true result. When $\epsilon<1/8$, our program essentially replaces

$2^{55}\epsilon$ by $\lfloor2^{55}\epsilon\rfloor$. These

$2^{55}\epsilon$ by $\lfloor2^{55}\epsilon\rfloor$. These

truncations are not needed simultaneously. Therefore the logic

truncations are not needed simultaneously. Therefore the logic

is justified by the facts that, if $n$ is an integer, we have

is justified by the facts that, if $n$ is an integer, we have

$x\le n$ if and only if $\lceil x\rceil\le n$;

$x\le n$ if and only if $\lceil x\rceil\le n$;

$n\le x$ if and only if $n\le\lfloor x\rfloor$. (Notice that the

$n\le x$ if and only if $n\le\lfloor x\rfloor$. (Notice that the

concept of ``sticky bit'' is {\it not\/} appropriate here.)

concept of ``sticky bit'' is {\it not\/} appropriate here.)

@^sticky bit@>

@^sticky bit@>

@=

@=

if (d>54) o=zero_octa,oo=zf;

if (d>54) o=zero_octa,oo=zf;

else o=shift_right(zf,d,1),oo=shift_left(o,d);

else o=shift_right(zf,d,1),oo=shift_left(o,d);

if (oo.h!=zf.h || oo.l!=zf.l) { /* truncated result, hence $d>2$ */

if (oo.h!=zf.h || oo.l!=zf.l) { /* truncated result, hence $d>2$ */

  if (ee<1020) return 0; /* difference is too large for similarity */

  if (ee<1020) return 0; /* difference is too large for similarity */

  o=incr(o,ys==zs? 0: 1); /* adjust for ceiling */

  o=incr(o,ys==zs? 0: 1); /* adjust for ceiling */

o=(ys==zs? ominus(yf,o): oplus(yf,o));

o=(ys==zs? ominus(yf,o): oplus(yf,o));

@*Floating point output conversion.

@*Floating point output conversion.

The |print_float| routine converts an octabyte to a floating decimal

The |print_float| routine converts an octabyte to a floating decimal

representation that will be input as precisely the same value.

representation that will be input as precisely the same value.

@^binary-to-decimal conversion@>

@^binary-to-decimal conversion@>

@^radix conversion@>

@^radix conversion@>

@^multiprecision conversion@>

@^multiprecision conversion@>

@=

@=

static void bignum_times_ten @,@,@[ARGS((bignum*))@];

static void bignum_times_ten @,@,@[ARGS((bignum*))@];

static void bignum_dec @,@,@[ARGS((bignum*,bignum*,tetra))@];

static void bignum_dec @,@,@[ARGS((bignum*,bignum*,tetra))@];

static int bignum_compare @,@,@[ARGS((bignum*,bignum*))@];

static int bignum_compare @,@,@[ARGS((bignum*,bignum*))@];

void print_float @,@,@[ARGS((octa))@];@+@t}\6{@>

void print_float @,@,@[ARGS((octa))@];@+@t}\6{@>

void print_float(x)

void print_float(x)

  octa x;

  octa x;

@;

@;

  if (x.h&sign_bit) printf("-");

  if (x.h&sign_bit) printf("-");

       fraction interval $[f\dts g]$ or $(f\dts g)$@>;

       fraction interval $[f\dts g]$ or $(f\dts g)$@>;

@;

@;

@;

@;

@;

@;

@ One way to visualize the problem being solved here is to consider

@ One way to visualize the problem being solved here is to consider

the vastly simpler case in which there are only 2-bit exponents

the vastly simpler case in which there are only 2-bit exponents

and 2-bit fractions. Then the sixteen possible 4-bit combinations

and 2-bit fractions. Then the sixteen possible 4-bit combinations

have the following interpretations:

have the following interpretations:

$$\def\\{\;\dts\;}

$$\def\\{\;\dts\;}

\vbox{\halign{#\qquad&$#$\hfil\cr

\vbox{\halign{#\qquad&$#$\hfil\cr

0000&[0\\0.125]\cr

0000&[0\\0.125]\cr

0001&(0.125\\0.375)\cr

0001&(0.125\\0.375)\cr

0010&[0.375\\0.625]\cr

0010&[0.375\\0.625]\cr

0011&(0.625\\0.875)\cr

0011&(0.625\\0.875)\cr

0100&[0.875\\1.125]\cr

0100&[0.875\\1.125]\cr

0101&(1.125\\1.375)\cr

0101&(1.125\\1.375)\cr

0110&[1.375\\1.625]\cr

0110&[1.375\\1.625]\cr

0111&(1.625\\1.875)\cr

0111&(1.625\\1.875)\cr

1000&[1.875\\2.25]\cr

1000&[1.875\\2.25]\cr

1001&(2.25\\2.75)\cr

1001&(2.25\\2.75)\cr

1010&[2.75\\3.25]\cr

1010&[2.75\\3.25]\cr

1011&(3.25\\3.75)\cr

1011&(3.25\\3.75)\cr

1100&[3.75\\\infty]\cr

1100&[3.75\\\infty]\cr

1101&\rm NaN(0\\0.375)\cr

1101&\rm NaN(0\\0.375)\cr

1110&\rm NaN[0.375\\0.625]\cr

1110&\rm NaN[0.375\\0.625]\cr

1111&\rm NaN(0.625\\1)\cr}}$$

1111&\rm NaN(0.625\\1)\cr}}$$

Notice that the interval is closed, $[f\dts g]$, when the fraction part

Notice that the interval is closed, $[f\dts g]$, when the fraction part

is even; it is open, $(f\dts g)$, when the fraction part is odd.

is even; it is open, $(f\dts g)$, when the fraction part is odd.

The printed outputs for these sixteen values, if we actually were

The printed outputs for these sixteen values, if we actually were

dealing with such short exponents and fractions, would be

dealing with such short exponents and fractions, would be

\.{0.}, \.{.2}, \.{.5}, \.{.7}, \.{1.}, \.{1.2}, \.{1.5}, \.{1.7},

\.{0.}, \.{.2}, \.{.5}, \.{.7}, \.{1.}, \.{1.2}, \.{1.5}, \.{1.7},

\.{2.}, \.{2.5}, \.{3.}, \.{3.5}, \.{Inf}, \.{NaN.2}, \.{NaN}, \.{NaN.8},

\.{2.}, \.{2.5}, \.{3.}, \.{3.5}, \.{Inf}, \.{NaN.2}, \.{NaN}, \.{NaN.8},

respectively.

respectively.

@=

@=

f=shift_left(x,1);

f=shift_left(x,1);

e=f.h>>21;

e=f.h>>21;

f.h&=0x1fffff;

f.h&=0x1fffff;

if (!f.h && !f.l) @@;

if (!f.h && !f.l) @@;

else {

else {

  g=incr(f,1);

  g=incr(f,1);

  f=incr(f,-1);

  f=incr(f,-1);

  if (!e) e=1; /* subnormal */

  if (!e) e=1; /* subnormal */

  else if (e==0x7ff) {

  else if (e==0x7ff) {

    printf("NaN");

    printf("NaN");

    if (g.h==0x100000 && g.l==1) return; /* the ``standard'' NaN */

    if (g.h==0x100000 && g.l==1) return; /* the ``standard'' NaN */

    e=0x3ff; /* extreme NaNs come out OK even without adjusting |f| or |g| */

    e=0x3ff; /* extreme NaNs come out OK even without adjusting |f| or |g| */

  }@+else f.h|=0x200000, g.h|=0x200000;

  }@+else f.h|=0x200000, g.h|=0x200000;

@ @=

@ @=

octa f,g; /* lower and upper bounds on the fraction part */

octa f,g; /* lower and upper bounds on the fraction part */

register int e; /* exponent part */

register int e; /* exponent part */

register int j,k; /* all purpose indices */

register int j,k; /* all purpose indices */

@ The transition points between exponents correspond to powers of~2. At

@ The transition points between exponents correspond to powers of~2. At

such points the interval extends only half as far to the left of that

such points the interval extends only half as far to the left of that

power of~2 as it does to the right. For example, in the 4-bit minifloat numbers

power of~2 as it does to the right. For example, in the 4-bit minifloat numbers

considered above, case 1000 corresponds to the interval $[1.875\;\dts\;2.25]$.

considered above, case 1000 corresponds to the interval $[1.875\;\dts\;2.25]$.

@=

@=

  if (!e) {

  if (!e) {

    printf("0.");@+return;

    printf("0.");@+return;

  if (e==0x7ff) {

  if (e==0x7ff) {

    printf("Inf");@+return;

    printf("Inf");@+return;

  e--;

  e--;

  f.h=0x3fffff, f.l=0xffffffff;

  f.h=0x3fffff, f.l=0xffffffff;

  g.h=0x400000, g.l=2;

  g.h=0x400000, g.l=2;

@ We want to find the ``simplest'' value in the interval corresponding

@ We want to find the ``simplest'' value in the interval corresponding

to the given number, in the sense that it has fewest significant

to the given number, in the sense that it has fewest significant

digits when expressed in decimal notation. Thus, for example,

digits when expressed in decimal notation. Thus, for example,

if the floating point number can be described by a relatively

if the floating point number can be described by a relatively

short string such as `\.{.1}' or `\.{37e100}', we want to discover that

short string such as `\.{.1}' or `\.{37e100}', we want to discover that

representation.

representation.

The basic idea is to generate the decimal representations of the

The basic idea is to generate the decimal representations of the

two endpoints of the interval, outputting the leading digits where

two endpoints of the interval, outputting the leading digits where

both endpoints agree, then making a final decision at the first place where

both endpoints agree, then making a final decision at the first place where

they disagree.

they disagree.

The ``simplest'' value is not always unique. For example, in the

The ``simplest'' value is not always unique. For example, in the

case of 4-bit minifloat numbers we could represent the bit pattern 0001 as

case of 4-bit minifloat numbers we could represent the bit pattern 0001 as

either \.{.2} or \.{.3}, and we could represent 1001 in five equally short

either \.{.2} or \.{.3}, and we could represent 1001 in five equally short

ways: \.{2.3} or \.{2.4} or \.{2.5} or \.{2.6} or \.{2.7}. The

ways: \.{2.3} or \.{2.4} or \.{2.5} or \.{2.6} or \.{2.7}. The

algorithm below tries to choose the middle possibility in such cases.

algorithm below tries to choose the middle possibility in such cases.

[A solution to the analogous problem for fixed-point representations,

[A solution to the analogous problem for fixed-point representations,

without the additional complication of round-to-even, was used by

without the additional complication of round-to-even, was used by

the author in the program for \TeX; see {\sl Beauty is Our Business\/}

the author in the program for \TeX; see {\sl Beauty is Our Business\/}

(Springer, 1990), 233--242.]

(Springer, 1990), 233--242.]

@^Knuth, Donald Ervin@>

@^Knuth, Donald Ervin@>

Suppose we are given two fractions $f$ and $g$, where $0\le f

Suppose we are given two fractions $f$ and $g$, where $0\le f

we want to compute the shortest decimal in the closed interval $[f\dts g]$.

we want to compute the shortest decimal in the closed interval $[f\dts g]$.

If $f=0$, we are done. Otherwise let $10f=d+f'$ and $10g=e+g'$, where

If $f=0$, we are done. Otherwise let $10f=d+f'$ and $10g=e+g'$, where

$0\le f'<1$ and $0\le g'<1$. If $d

$0\le f'<1$ and $0\le g'<1$. If $d

any of the digits $d+1$, \dots,~$e$; otherwise we output the

any of the digits $d+1$, \dots,~$e$; otherwise we output the

common digit $d=e$, and repeat the process on the fractions $0\le f'

common digit $d=e$, and repeat the process on the fractions $0\le f'

A similar procedure works with respect to the open interval $(f\dts g)$.

A similar procedure works with respect to the open interval $(f\dts g)$.

@ The program below carries out the stated algorithm by using multiprecision

@ The program below carries out the stated algorithm by using multiprecision

arithmetic on 77-place integers with 28 bits each. This choice

arithmetic on 77-place integers with 28 bits each. This choice

facilitates multiplication by~10, and allows us to deal with the whole range of

facilitates multiplication by~10, and allows us to deal with the whole range of

floating binary numbers using fixed point arithmetic. We keep track of

floating binary numbers using fixed point arithmetic. We keep track of

the leading and trailing digit positions so that trivial operations on

the leading and trailing digit positions so that trivial operations on

zeros are avoided.

zeros are avoided.

If |f| points to a \&{bignum}, its radix-$2^{28}$ digits are

If |f| points to a \&{bignum}, its radix-$2^{28}$ digits are

|f->dat[0]| through |f->dat[76]|, from most significant to least significant.

|f->dat[0]| through |f->dat[76]|, from most significant to least significant.

We assume that all digit positions are zero unless they lie in the

We assume that all digit positions are zero unless they lie in the

subarray between indices |f->a| and |f->b|, inclusive.

subarray between indices |f->a| and |f->b|, inclusive.

Furthermore, both |f->dat[f->a]| and |f->dat[f->b]| are nonzero,

Furthermore, both |f->dat[f->a]| and |f->dat[f->b]| are nonzero,

unless |f->a=f->b=bignum_prec-1|.

unless |f->a=f->b=bignum_prec-1|.

The \&{bignum} data type can be used with any radix less than

The \&{bignum} data type can be used with any radix less than

$2^{32}$; we will use it later with radix~$10^9$. The |dat| array

$2^{32}$; we will use it later with radix~$10^9$. The |dat| array

is made large enough to accommodate both applications.

is made large enough to accommodate both applications.

@d bignum_prec 157 /* would be 77 if we cared only about |print_float| */

@d bignum_prec 157 /* would be 77 if we cared only about |print_float| */

@=

@=

typedef struct {

typedef struct {

  int a; /* index of the most significant digit */

  int a; /* index of the most significant digit */

  int b; /* index of the least significant digit; must be $\ge a$ */

  int b; /* index of the least significant digit; must be $\ge a$ */

  tetra dat[bignum_prec]; /* the digits; undefined except between |a| and |b| */

  tetra dat[bignum_prec]; /* the digits; undefined except between |a| and |b| */

} bignum;

} bignum;

@ Here, for example, is how we go from $f$ to $10f$, assuming that

@ Here, for example, is how we go from $f$ to $10f$, assuming that

overflow will not occur and that the radix is $2^{28}$:

overflow will not occur and that the radix is $2^{28}$:

@=

@=

static void bignum_times_ten(f)

static void bignum_times_ten(f)

  bignum *f;

  bignum *f;

  register tetra *p,*q; register tetra x,carry;

  register tetra *p,*q; register tetra x,carry;

  for (p=&f->dat[f->b],q=&f->dat[f->a],carry=0; p>=q; p--) {

  for (p=&f->dat[f->b],q=&f->dat[f->a],carry=0; p>=q; p--) {

    x=*p*10+carry;

    x=*p*10+carry;

    *p=x&0xfffffff;

    *p=x&0xfffffff;

    carry=x>>28;

    carry=x>>28;

  *p=carry;

  *p=carry;

  if (carry) f->a--;

  if (carry) f->a--;

  if (f->dat[f->b]==0 && f->b>f->a) f->b--;

  if (f->dat[f->b]==0 && f->b>f->a) f->b--;

@ And here is how we test whether $fg$, using any

@ And here is how we test whether $fg$, using any

radix whatever:

radix whatever:

@=

@=

static int bignum_compare(f,g)

static int bignum_compare(f,g)

  bignum *f,*g;

  bignum *f,*g;

  register tetra *p,*pp,*q,*qq;

  register tetra *p,*pp,*q,*qq;

  if (f->a!=g->a) return f->a > g->a? -1: 1;

  if (f->a!=g->a) return f->a > g->a? -1: 1;

  pp=&f->dat[f->b], qq=&g->dat[g->b];

  pp=&f->dat[f->b], qq=&g->dat[g->b];

  for (p=&f->dat[f->a],q=&g->dat[g->a]; p<=pp; p++,q++) {

  for (p=&f->dat[f->a],q=&g->dat[g->a]; p<=pp; p++,q++) {

    if (*p!=*q) return *p<*q? -1: 1;

    if (*p!=*q) return *p<*q? -1: 1;

    if (q==qq) return p

    if (q==qq) return p

  return -1;

  return -1;

@ The following subroutine subtracts $g$ from~$f$, assuming that

@ The following subroutine subtracts $g$ from~$f$, assuming that

$f\ge g>0$ and using a given radix.

$f\ge g>0$ and using a given radix.

@=

@=

static void bignum_dec(f,g,r)

static void bignum_dec(f,g,r)

  bignum *f,*g;

  bignum *f,*g;

  tetra r; /* the radix */

  tetra r; /* the radix */

  register tetra *p,*q,*qq;

  register tetra *p,*q,*qq;

  register int x,borrow;

  register int x,borrow;

  while (g->b>f->b) f->dat[++f->b]=0;

  while (g->b>f->b) f->dat[++f->b]=0;

  qq=&g->dat[g->a];

  qq=&g->dat[g->a];

  for (p=&f->dat[g->b],q=&g->dat[g->b],borrow=0;q>=qq;p--,q--) {

  for (p=&f->dat[g->b],q=&g->dat[g->b],borrow=0;q>=qq;p--,q--) {

    x=*p - *q - borrow;

    x=*p - *q - borrow;

    if (x>=0) borrow=0, *p=x;

    if (x>=0) borrow=0, *p=x;

    else borrow=1, *p=x+r;

    else borrow=1, *p=x+r;

  for (;borrow;p--)

  for (;borrow;p--)

    if (*p) borrow=0, *p=*p-1;

    if (*p) borrow=0, *p=*p-1;

    else *p=r-1;

    else *p=r-1;

  while (f->dat[f->a]==0) {

  while (f->dat[f->a]==0) {

    if (f->a==f->b) { /* the result is zero */

    if (f->a==f->b) { /* the result is zero */

      f->a=f->b=bignum_prec-1, f->dat[bignum_prec-1]=0;

      f->a=f->b=bignum_prec-1, f->dat[bignum_prec-1]=0;

      return;

      return;

    f->a++;

    f->a++;

  while (f->dat[f->b]==0) f->b--;

  while (f->dat[f->b]==0) f->b--;

@ Armed with these subroutines, we are ready to solve the problem.

@ Armed with these subroutines, we are ready to solve the problem.

The first task is to put the numbers into \&{bignum} form.

The first task is to put the numbers into \&{bignum} form.

If the exponent is |e|, the number destined for digit |dat[k]| will

If the exponent is |e|, the number destined for digit |dat[k]| will

consist of the rightmost 28 bits of the given fraction after it has

consist of the rightmost 28 bits of the given fraction after it has

been shifted right $c-e-28k$ bits, for some constant~$c$.

been shifted right $c-e-28k$ bits, for some constant~$c$.

We choose $c$ so that,

We choose $c$ so that,

when $e$ has its maximum value \Hex{7ff}, the leading digit will

when $e$ has its maximum value \Hex{7ff}, the leading digit will

go into position |dat[1]|, and so that when the number to be printed

go into position |dat[1]|, and so that when the number to be printed

is exactly~1 the integer part of~$g$ will also be exactly~1.

is exactly~1 the integer part of~$g$ will also be exactly~1.

@d magic_offset 2112 /* the constant $c$ that makes it work */

@d magic_offset 2112 /* the constant $c$ that makes it work */

@d origin 37 /* the radix point follows |dat[37]| */

@d origin 37 /* the radix point follows |dat[37]| */

@=

@=

k=(magic_offset-e)/28;

k=(magic_offset-e)/28;

ff.dat[k-1]=shift_right(f,magic_offset+28-e-28*k,1).l&0xfffffff;

ff.dat[k-1]=shift_right(f,magic_offset+28-e-28*k,1).l&0xfffffff;

gg.dat[k-1]=shift_right(g,magic_offset+28-e-28*k,1).l&0xfffffff;

gg.dat[k-1]=shift_right(g,magic_offset+28-e-28*k,1).l&0xfffffff;

ff.dat[k]=shift_right(f,magic_offset-e-28*k,1).l&0xfffffff;

ff.dat[k]=shift_right(f,magic_offset-e-28*k,1).l&0xfffffff;

gg.dat[k]=shift_right(g,magic_offset-e-28*k,1).l&0xfffffff;

gg.dat[k]=shift_right(g,magic_offset-e-28*k,1).l&0xfffffff;

ff.dat[k+1]=shift_left(f,e+28*k-(magic_offset-28)).l&0xfffffff;

ff.dat[k+1]=shift_left(f,e+28*k-(magic_offset-28)).l&0xfffffff;

gg.dat[k+1]=shift_left(g,e+28*k-(magic_offset-28)).l&0xfffffff;

gg.dat[k+1]=shift_left(g,e+28*k-(magic_offset-28)).l&0xfffffff;

ff.a=(ff.dat[k-1]? k-1: k);

ff.a=(ff.dat[k-1]? k-1: k);

ff.b=(ff.dat[k+1]? k+1: k);

ff.b=(ff.dat[k+1]? k+1: k);

gg.a=(gg.dat[k-1]? k-1: k);

gg.a=(gg.dat[k-1]? k-1: k);

gg.b=(gg.dat[k+1]? k+1: k);

gg.b=(gg.dat[k+1]? k+1: k);

@ If $e$ is sufficiently small, the fractions $f$ and $g$ will be less than~1,

@ If $e$ is sufficiently small, the fractions $f$ and $g$ will be less than~1,

and we can use the stated algorithm directly. Of course, if $e$ is

and we can use the stated algorithm directly. Of course, if $e$ is

extremely small, a lot of leading zeros need to be lopped off; in the

extremely small, a lot of leading zeros need to be lopped off; in the

worst case, we may have to multiply $f$ and~$g$ by~10 more than 300 times.

worst case, we may have to multiply $f$ and~$g$ by~10 more than 300 times.

But hey, we don't need to do that extremely often, and computers are

But hey, we don't need to do that extremely often, and computers are

pretty fast nowadays.

pretty fast nowadays.

In the small-exponent case, the computation always terminates before

In the small-exponent case, the computation always terminates before

$f$ becomes zero, because the interval endpoints are fractions with

$f$ becomes zero, because the interval endpoints are fractions with

denominator $2^t$ for some $t>50$.

denominator $2^t$ for some $t>50$.

The invariant relations |ff.dat[ff.a]!=0| and |gg.dat[gg.a]!=0| are

The invariant relations |ff.dat[ff.a]!=0| and |gg.dat[gg.a]!=0| are

not maintained by the computation here, when |ff.a=origin| or |gg.a=origin|.

not maintained by the computation here, when |ff.a=origin| or |gg.a=origin|.

But no harm is done, because |bignum_compare| is not used.

But no harm is done, because |bignum_compare| is not used.

@=

@=

if (e>0x401) @@;

if (e>0x401) @@;

else@+{ /* if |e<=0x401| we have |gg.a>=origin| and |gg.dat[origin]<=8| */

else@+{ /* if |e<=0x401| we have |gg.a>=origin| and |gg.dat[origin]<=8| */

  if (ff.a>origin) ff.dat[origin]=0;

  if (ff.a>origin) ff.dat[origin]=0;

  for (e=1, p=s; gg.a>origin || ff.dat[origin]==gg.dat[origin]; ) {

  for (e=1, p=s; gg.a>origin || ff.dat[origin]==gg.dat[origin]; ) {

    if (gg.a>origin) e--;

    if (gg.a>origin) e--;

    else *p++=ff.dat[origin]+'0', ff.dat[origin]=0, gg.dat[origin]=0;

    else *p++=ff.dat[origin]+'0', ff.dat[origin]=0, gg.dat[origin]=0;

    bignum_times_ten(&ff);

    bignum_times_ten(&ff);

    bignum_times_ten(&gg);

    bignum_times_ten(&gg);

  *p++=((ff.dat[origin]+1+gg.dat[origin])>>1)+'0'; /* the middle digit */

  *p++=((ff.dat[origin]+1+gg.dat[origin])>>1)+'0'; /* the middle digit */

*p='\0'; /* terminate the string |s| */

*p='\0'; /* terminate the string |s| */

@ When |e| is large, we use the stated algorithm by considering $f$ and

@ When |e| is large, we use the stated algorithm by considering $f$ and

$g$ to be fractions whose denominator is a power of~10.

$g$ to be fractions whose denominator is a power of~10.

An interesting case arises when the number to be converted is

An interesting case arises when the number to be converted is

\Hex{44ada56a4b0835bf}, since the interval turns out to be

\Hex{44ada56a4b0835bf}, since the interval turns out to be

$$ (69999999999999991611392\ \ \dts\ \ 70000000000000000000000).$$

$$ (69999999999999991611392\ \ \dts\ \ 70000000000000000000000).$$

If this were a closed interval, we could simply give the answer

If this were a closed interval, we could simply give the answer

\.{7e22}; but the number \.{7e22} actually corresponds to

\.{7e22}; but the number \.{7e22} actually corresponds to

\Hex{44ada56a4b0835c0}

\Hex{44ada56a4b0835c0}

because of the round-to-even rule. Therefore the correct answer is, say,

because of the round-to-even rule. Therefore the correct answer is, say,

\.{6.9999999999999995e22}. This example shows that we need a slightly

\.{6.9999999999999995e22}. This example shows that we need a slightly

different strategy in the case of open intervals; we cannot simply

different strategy in the case of open intervals; we cannot simply

look at the first position in which the endpoints have different

look at the first position in which the endpoints have different

decimal digits. Therefore we change the invariant relation to $0\le f

decimal digits. Therefore we change the invariant relation to $0\le f

when open intervals are involved,

when open intervals are involved,

and we do not terminate the process when $f=0$ or $g=1$.

and we do not terminate the process when $f=0$ or $g=1$.

@=

@=

{@+register int open=x.l&1;

{@+register int open=x.l&1;

  tt.dat[origin]=10;

  tt.dat[origin]=10;

  tt.a=tt.b=origin;

  tt.a=tt.b=origin;

  for (e=1;bignum_compare(&gg,&tt)>=open;e++)

  for (e=1;bignum_compare(&gg,&tt)>=open;e++)

    bignum_times_ten(&tt);

    bignum_times_ten(&tt);

  p=s;

  p=s;

  while (1) {

  while (1) {

    bignum_times_ten(&ff);

    bignum_times_ten(&ff);

    bignum_times_ten(&gg);

    bignum_times_ten(&gg);

    for (j='0';bignum_compare(&ff,&tt)>=0;j++)

    for (j='0';bignum_compare(&ff,&tt)>=0;j++)

      bignum_dec(&ff,&tt,0x10000000),bignum_dec(&gg,&tt,0x10000000);

      bignum_dec(&ff,&tt,0x10000000),bignum_dec(&gg,&tt,0x10000000);

    if (bignum_compare(&gg,&tt)>=open) break;

    if (bignum_compare(&gg,&tt)>=open) break;

    *p++=j;

    *p++=j;

    if (ff.a==bignum_prec-1 && !open)

    if (ff.a==bignum_prec-1 && !open)

      goto done; /* $f=0$ in a closed interval */

      goto done; /* $f=0$ in a closed interval */

  for (k=j;bignum_compare(&gg,&tt)>=open;k++) bignum_dec(&gg,&tt,0x10000000);

  for (k=j;bignum_compare(&gg,&tt)>=open;k++) bignum_dec(&gg,&tt,0x10000000);

  *p++=(j+1+k)>>1; /* the middle digit */

  *p++=(j+1+k)>>1; /* the middle digit */

 done:;

 done:;

@ The length of string~|s| will be at most 17. For if $f$ and $g$

@ The length of string~|s| will be at most 17. For if $f$ and $g$

agree to 17 places, we have $g/f<1+10^{-16}$; but the

agree to 17 places, we have $g/f<1+10^{-16}$; but the

ratio $g/f$ is always $\ge(1+2^{-52}+2^{-53})/(1+2^{-52}-2^{-53})

ratio $g/f$ is always $\ge(1+2^{-52}+2^{-53})/(1+2^{-52}-2^{-53})

>1+2\times10^{-16}$.

>1+2\times10^{-16}$.

@=

@=

bignum ff,gg; /* fractions or numerators of fractions */

bignum ff,gg; /* fractions or numerators of fractions */

bignum tt; /* power of ten (used as the denominator) */

bignum tt; /* power of ten (used as the denominator) */

char s[18];

char s[18];

register char *p;

register char *p;

@ At this point the significant digits are in string |s|, and |s[0]!='0'|.

@ At this point the significant digits are in string |s|, and |s[0]!='0'|.

If we put a decimal point at the left of~|s|, the result should

If we put a decimal point at the left of~|s|, the result should

be multiplied by $10^e$.

be multiplied by $10^e$.

We prefer the output `\.{300.}' to the form `\.{3e2}', and we prefer

We prefer the output `\.{300.}' to the form `\.{3e2}', and we prefer

`\.{.03}' to `\.{3e-2}'. In general, the output will use an

`\.{.03}' to `\.{3e-2}'. In general, the output will use an

explicit exponent only if the alternative would take more than

explicit exponent only if the alternative would take more than

18~characters.

18~characters.

@=

@=

if (e>17 || e<(int)strlen(s)-17)

if (e>17 || e<(int)strlen(s)-17)

  printf("%c%s%se%d",s[0],(s[1]? ".": ""),s+1,e-1);

  printf("%c%s%se%d",s[0],(s[1]? ".": ""),s+1,e-1);

else if (e<0) printf(".%0*d%s",-e,0,s);

else if (e<0) printf(".%0*d%s",-e,0,s);

else if (strlen(s)>=e) printf("%.*s.%s",e,s,s+e);

else if (strlen(s)>=e) printf("%.*s.%s",e,s,s+e);

else printf("%s%0*d.",s,e-(int)strlen(s),0);

else printf("%s%0*d.",s,e-(int)strlen(s),0);

@*Floating point input conversion. Going the other way, we want to

@*Floating point input conversion. Going the other way, we want to

be able to convert a given decimal number into its floating binary

be able to convert a given decimal number into its floating binary

@^decimal-to-binary conversion@>

@^decimal-to-binary conversion@>

@^radix conversion@>

@^radix conversion@>

@^multiprecision conversion@>

@^multiprecision conversion@>

equivalent. The following syntax is supported:

equivalent. The following syntax is supported:

$$\vbox{\halign{$#$\hfil\cr

$$\vbox{\halign{$#$\hfil\cr

\\is\.0\mid\.1\mid\.2\mid\.3\mid\.4\mid

\\is\.0\mid\.1\mid\.2\mid\.3\mid\.4\mid

        \.5\mid\.6\mid\.7\mid\.8\mid\.9\cr

        \.5\mid\.6\mid\.7\mid\.8\mid\.9\cr

\\is\\mid\\\cr

\\is\\mid\\\cr

\\is\\..\mid\..\\mid

\\is\\..\mid\..\\mid

                      \\..\\cr

                      \\..\\cr

\\is\\mid\.+\mid\.-\cr

\\is\\mid\.+\mid\.-\cr

\\is\.e\\\cr

\\is\.e\\\cr

\\is\\mid\\cr

\\is\\mid\\cr

\\is\\\mid

\\is\\\mid

                    \\\mid\cr

                    \\\mid\cr

\hskip12em          \.{Inf}\mid\.{NaN}\mid\.{NaN.}\\cr

\hskip12em          \.{Inf}\mid\.{NaN}\mid\.{NaN.}\\cr

\\is\\\cr

\\is\\\cr

\\is\\\cr

\\is\\\cr

}}$$

}}$$

For example, `\.{-3.}' is the floating constant \Hex{c008000000000000}\thinspace;

For example, `\.{-3.}' is the floating constant \Hex{c008000000000000}\thinspace;

`\.{1e3}' and `\.{1000}' are both equivalent to \Hex{408f400000000000}\thinspace;

`\.{1e3}' and `\.{1000}' are both equivalent to \Hex{408f400000000000}\thinspace;

`\.{NaN}' and `\.{+NaN.5}' are both equivalent to \Hex{7ff8000000000000}.

`\.{NaN}' and `\.{+NaN.5}' are both equivalent to \Hex{7ff8000000000000}.

The |scan_const| routine looks at a given string and finds the

The |scan_const| routine looks at a given string and finds the

longest initial substring that matches the syntax of either \

longest initial substring that matches the syntax of either \

constant> or \. It puts the corresponding value

constant> or \. It puts the corresponding value

into the global octabyte variable~|val|; it also puts the position of the first

into the global octabyte variable~|val|; it also puts the position of the first

unscanned character in the global pointer variable |next_char|.

unscanned character in the global pointer variable |next_char|.

It returns 1 if a floating constant was found, 0~if a decimal constant

It returns 1 if a floating constant was found, 0~if a decimal constant

was found, $-1$ if nothing was found. A decimal constant that doesn't

was found, $-1$ if nothing was found. A decimal constant that doesn't

fit in an octabyte is computed modulo~$2^{64}$.

fit in an octabyte is computed modulo~$2^{64}$.

@^syntax of floating point constants@>

@^syntax of floating point constants@>

The value of |exceptions| set by |scan_const| is not necessarily correct.

The value of |exceptions| set by |scan_const| is not necessarily correct.

@=

@=

static void bignum_double @,@,@[ARGS((bignum*))@];

static void bignum_double @,@,@[ARGS((bignum*))@];

int scan_const @,@,@[ARGS((char*))@];@+@t}\6{@>

int scan_const @,@,@[ARGS((char*))@];@+@t}\6{@>

int scan_const(s)

int scan_const(s)

  char *s;

  char *s;

@;

@;

  val.h=val.l=0;

  val.h=val.l=0;

  p=s;

  p=s;

  if (*p=='+' || *p=='-') sign=*p++;@+else sign='+';

  if (*p=='+' || *p=='-') sign=*p++;@+else sign='+';

  if (strncmp(p,"NaN",3)==0) NaN=true, p+=3;

  if (strncmp(p,"NaN",3)==0) NaN=true, p+=3;

  else NaN=false;

  else NaN=false;

  if ((isdigit(*p)&&!NaN) || (*p=='.' && isdigit(*(p+1))))

  if ((isdigit(*p)&&!NaN) || (*p=='.' && isdigit(*(p+1))))

@;

@;

  if (NaN) @;

  if (NaN) @;

  if (strncmp(p,"Inf",3)==0) @;

  if (strncmp(p,"Inf",3)==0) @;

 no_const_found: next_char=s;@+return -1;

 no_const_found: next_char=s;@+return -1;

@ @=

@ @=

octa val; /* value returned by |scan_const| */

octa val; /* value returned by |scan_const| */

char *next_char; /* pointer returned by |scan_const| */

char *next_char; /* pointer returned by |scan_const| */

@ @=

@ @=

register char *p,*q; /* for string manipulations */

register char *p,*q; /* for string manipulations */

register bool NaN; /* are we processing a NaN? */

register bool NaN; /* are we processing a NaN? */

int sign; /* |'+'| or |'-'| */

int sign; /* |'+'| or |'-'| */

@ @=

@ @=

  next_char=p;

  next_char=p;

  val.h=0x600000, exp=0x3fe;

  val.h=0x600000, exp=0x3fe;

  goto packit;

  goto packit;

@ @=

@ @=

  next_char=p+3;

  next_char=p+3;

  goto make_it_infinite;

  goto make_it_infinite;

@ We saw above that a string of at most 17 digits is enough to characterize

@ We saw above that a string of at most 17 digits is enough to characterize

a floating point number, for purposes of output. But a much longer buffer

a floating point number, for purposes of output. But a much longer buffer

for digits is needed when we're doing input. For example, consider the

for digits is needed when we're doing input. For example, consider the

borderline quantity $(1+2^{-53})/2^{1022}$; its decimal expansion, when

borderline quantity $(1+2^{-53})/2^{1022}$; its decimal expansion, when

written out exactly, is a number with more than 750 significant digits:

written out exactly, is a number with more than 750 significant digits:

\.{2.2250738585...8125e-308}.

\.{2.2250738585...8125e-308}.

If {\it any one\/} of those digits is increased, or if

If {\it any one\/} of those digits is increased, or if

additional nonzero digits are added as in

additional nonzero digits are added as in

\.{2.2250738585...81250000001e-308},

\.{2.2250738585...81250000001e-308},

the rounded value is supposed to change from \Hex{0010000000000000}

the rounded value is supposed to change from \Hex{0010000000000000}

to \Hex{0010000000000001}.

to \Hex{0010000000000001}.

We assume here that the user prefers a perfectly correct answer to

We assume here that the user prefers a perfectly correct answer to

a speedy almost-correct one, so we implement the most general case.

a speedy almost-correct one, so we implement the most general case.

@=

@=

  for (q=buf0,dec_pt=(char*)0;isdigit(*p);p++) {

  for (q=buf0,dec_pt=(char*)0;isdigit(*p);p++) {

    val=oplus(val,shift_left(val,2)); /* multiply by 5 */

    val=oplus(val,shift_left(val,2)); /* multiply by 5 */

    val=incr(shift_left(val,1),*p-'0');

    val=incr(shift_left(val,1),*p-'0');

    if (q>buf0 || *p!='0')

    if (q>buf0 || *p!='0')

       if (q

       if (q

       else if (*(q-1)=='0') *(q-1)=*p;

       else if (*(q-1)=='0') *(q-1)=*p;

  if (NaN) *q++='1';

  if (NaN) *q++='1';

  if (*p=='.') @;

  if (*p=='.') @;

  next_char=p;

  next_char=p;

  if (*p=='e' && !NaN) @@;

  if (*p=='e' && !NaN) @@;

  else exp=0;

  else exp=0;

  if (dec_pt) @;

  if (dec_pt) @;

  if (sign=='-') val=ominus(zero_octa,val);

  if (sign=='-') val=ominus(zero_octa,val);

  return 0;

  return 0;

@ @=

@ @=

  dec_pt=q;

  dec_pt=q;

  p++;

  p++;

  for (zeros=0;isdigit(*p);p++)

  for (zeros=0;isdigit(*p);p++)

    if (*p=='0' && q==buf0) zeros++;

    if (*p=='0' && q==buf0) zeros++;

    else if (q

    else if (q

    else if (*(q-1)=='0') *(q-1)=*p;

    else if (*(q-1)=='0') *(q-1)=*p;

@ The buffer needs room for eight digits of padding at the left, followed

@ The buffer needs room for eight digits of padding at the left, followed

by up to $1022+53-307$ significant digits, followed by a ``sticky'' digit

by up to $1022+53-307$ significant digits, followed by a ``sticky'' digit

at position |buf_max-1|, and eight more digits of padding.

at position |buf_max-1|, and eight more digits of padding.

@d buf0 (buf+8)

@d buf0 (buf+8)

@d buf_max (buf+777)

@d buf_max (buf+777)

@=

@=

static char buf[785]="00000000"; /* where we put significant input digits */

static char buf[785]="00000000"; /* where we put significant input digits */

@ @=

@ @=

register char* dec_pt; /* position of decimal point in |buf| */

register char* dec_pt; /* position of decimal point in |buf| */

register int exp; /* scanned exponent; later used for raw binary exponent */

register int exp; /* scanned exponent; later used for raw binary exponent */

register int zeros; /* leading zeros removed after decimal point */

register int zeros; /* leading zeros removed after decimal point */

@ Here we don't advance |next_char| and force a decimal point until we

@ Here we don't advance |next_char| and force a decimal point until we

know that a syntactically correct exponent exists.

know that a syntactically correct exponent exists.

The code here will convert extra-large inputs like

The code here will convert extra-large inputs like

`\.{9e+9999999999999999}' into $\infty$ and extra-small inputs into zero.

`\.{9e+9999999999999999}' into $\infty$ and extra-small inputs into zero.

Strange inputs like `\.{-00.0e9999999}' must also be accommodated.

Strange inputs like `\.{-00.0e9999999}' must also be accommodated.

@=

@=

{@+register char exp_sign;

{@+register char exp_sign;

  p++;

  p++;

  if (*p=='+' || *p=='-') exp_sign=*p++;@+else exp_sign='+';

  if (*p=='+' || *p=='-') exp_sign=*p++;@+else exp_sign='+';

  if (isdigit(*p)) {

  if (isdigit(*p)) {

    for (exp=*p++ -'0';isdigit(*p);p++)

    for (exp=*p++ -'0';isdigit(*p);p++)

      if (exp<1000) exp = 10*exp + *p - '0';

      if (exp<1000) exp = 10*exp + *p - '0';

    if (!dec_pt) dec_pt=q, zeros=0;

    if (!dec_pt) dec_pt=q, zeros=0;

    if (exp_sign=='-') exp=-exp;

    if (exp_sign=='-') exp=-exp;

    next_char=p;

    next_char=p;

@ @=

@ @=

@;

@;

@;

@;

packit:  @;

packit:  @;

  return 1;

  return 1;

@ Now we get ready to compute the binary fraction bits, by putting the

@ Now we get ready to compute the binary fraction bits, by putting the

scanned input digits into a multiprecision fixed-point

scanned input digits into a multiprecision fixed-point

accumulator |ff| that spans the full necessary range.

accumulator |ff| that spans the full necessary range.

After this step, the number that we want to convert to floating binary

After this step, the number that we want to convert to floating binary

will appear in |ff.dat[ff.a]|, |ff.dat[ff.a+1]|, \dots,

will appear in |ff.dat[ff.a]|, |ff.dat[ff.a+1]|, \dots,

|ff.dat[ff.b]|.

|ff.dat[ff.b]|.

The radix-$10^9$ digit in ${\it ff}[36-k]$ is understood to be multiplied

The radix-$10^9$ digit in ${\it ff}[36-k]$ is understood to be multiplied

by $10^{9k}$, for $36\ge k\ge-120$.

by $10^{9k}$, for $36\ge k\ge-120$.

@=

@=

x=buf+341+zeros-dec_pt-exp;

x=buf+341+zeros-dec_pt-exp;

if (q==buf0 || x>=1413) {

if (q==buf0 || x>=1413) {

 make_it_zero: exp=-99999;@+ goto packit;

 make_it_zero: exp=-99999;@+ goto packit;

if (x<0) {

if (x<0) {

 make_it_infinite: exp=99999;@+ goto packit;

 make_it_infinite: exp=99999;@+ goto packit;

ff.a=x/9;

ff.a=x/9;

for (p=q;p

for (p=q;p

q=q-1-(q+341+zeros-dec_pt-exp)%9; /* compute stopping place in |buf| */

q=q-1-(q+341+zeros-dec_pt-exp)%9; /* compute stopping place in |buf| */

for (p=buf0-x%9,k=ff.a;p<=q && k<=156; p+=9, k++)

for (p=buf0-x%9,k=ff.a;p<=q && k<=156; p+=9, k++)

       into |ff.dat[k]|@>;

       into |ff.dat[k]|@>;

ff.b=k-1;

ff.b=k-1;

for (x=0;p<=q;p+=9) if (strncmp(p,"000000000",9)!=0) x=1;

for (x=0;p<=q;p+=9) if (strncmp(p,"000000000",9)!=0) x=1;

ff.dat[156]+=x; /* nonzero digits that fall off the right are sticky */

ff.dat[156]+=x; /* nonzero digits that fall off the right are sticky */

@^sticky bit@>

@^sticky bit@>

while (ff.dat[ff.b]==0) ff.b--;

while (ff.dat[ff.b]==0) ff.b--;

@ @=

@ @=

  for (x=*p-'0',pp=p+1;pp

  for (x=*p-'0',pp=p+1;pp

  ff.dat[k]=x;

  ff.dat[k]=x;

@ @=

@ @=

register int k,x;

register int k,x;

register char *pp;

register char *pp;

bignum ff,tt;

bignum ff,tt;

@ Here's a subroutine that is dual to |bignum_times_ten|. It changes $f$

@ Here's a subroutine that is dual to |bignum_times_ten|. It changes $f$

to~$2f$, assuming that overflow will not occur and that the radix is $10^9$.

to~$2f$, assuming that overflow will not occur and that the radix is $10^9$.

@=

@=

static void bignum_double(f)

static void bignum_double(f)

  bignum *f;

  bignum *f;

  register tetra *p,*q; register int x,carry;

  register tetra *p,*q; register int x,carry;

  for (p=&f->dat[f->b],q=&f->dat[f->a],carry=0; p>=q; p--) {

  for (p=&f->dat[f->b],q=&f->dat[f->a],carry=0; p>=q; p--) {

    x = *p + *p + carry;

    x = *p + *p + carry;

    if (x>=1000000000) carry=1, *p=x-1000000000;

    if (x>=1000000000) carry=1, *p=x-1000000000;

    else carry=0, *p=x;

    else carry=0, *p=x;

  *p=carry;

  *p=carry;

  if (carry) f->a--;

  if (carry) f->a--;

  if (f->dat[f->b]==0 && f->b>f->a) f->b--;

  if (f->dat[f->b]==0 && f->b>f->a) f->b--;

@ @=

@ @=

val=zero_octa;

val=zero_octa;

if (ff.a>36) {

if (ff.a>36) {

  for (exp=0x3fe;ff.a>36;exp--) bignum_double(&ff);

  for (exp=0x3fe;ff.a>36;exp--) bignum_double(&ff);

  for (k=54;k;k--) {

  for (k=54;k;k--) {

    if (ff.dat[36]) {

    if (ff.dat[36]) {

      if (k>=32) val.h |= 1<<(k-32);@+else val.l |= 1<

      if (k>=32) val.h |= 1<<(k-32);@+else val.l |= 1<

      ff.dat[36]=0;

      ff.dat[36]=0;

      if (ff.b==36) break; /* break if |ff| now zero */

      if (ff.b==36) break; /* break if |ff| now zero */

    bignum_double(&ff);

    bignum_double(&ff);

}@+else {

}@+else {

  tt.a=tt.b=36, tt.dat[36]=2;

  tt.a=tt.b=36, tt.dat[36]=2;

  for (exp=0x3fe;bignum_compare(&ff,&tt)>=0;exp++) bignum_double(&tt);

  for (exp=0x3fe;bignum_compare(&ff,&tt)>=0;exp++) bignum_double(&tt);

  for (k=54;k;k--) {

  for (k=54;k;k--) {

    bignum_double(&ff);

    bignum_double(&ff);

    if (bignum_compare(&ff,&tt)>=0) {

    if (bignum_compare(&ff,&tt)>=0) {

      if (k>=32) val.h |= 1<<(k-32);@+else val.l |= 1<

      if (k>=32) val.h |= 1<<(k-32);@+else val.l |= 1<

      bignum_dec(&ff,&tt,1000000000);

      bignum_dec(&ff,&tt,1000000000);

      if (ff.a==bignum_prec-1) break; /* break if |ff| now zero */

      if (ff.a==bignum_prec-1) break; /* break if |ff| now zero */

if (k==0) val.l |= 1; /* add sticky bit if |ff| nonzero */

if (k==0) val.l |= 1; /* add sticky bit if |ff| nonzero */

@ We need to be careful that the input `\.{NaN.999999999999999999999}' doesn't

@ We need to be careful that the input `\.{NaN.999999999999999999999}' doesn't

get rounded up; it is supposed to yield \Hex{7fffffffffffffff}.

get rounded up; it is supposed to yield \Hex{7fffffffffffffff}.

Although the input `\.{NaN.0}' is illegal, strictly speaking, we silently

Although the input `\.{NaN.0}' is illegal, strictly speaking, we silently

convert it to \Hex{7ff0000000000001}---a number that would be

convert it to \Hex{7ff0000000000001}---a number that would be

output as `\.{NaN.0000000000000002}'.

output as `\.{NaN.0000000000000002}'.

@=

@=

val=fpack(val,exp,sign,ROUND_NEAR);

val=fpack(val,exp,sign,ROUND_NEAR);

if (NaN) {

if (NaN) {

  if ((val.h&0x7fffffff)==0x40000000) val.h |= 0x7fffffff, val.l=0xffffffff;

  if ((val.h&0x7fffffff)==0x40000000) val.h |= 0x7fffffff, val.l=0xffffffff;

  else if ((val.h&0x7fffffff)==0x3ff00000 && !val.l) val.h|=0x40000000,val.l=1;

  else if ((val.h&0x7fffffff)==0x3ff00000 && !val.l) val.h|=0x40000000,val.l=1;

  else val.h |= 0x40000000;

  else val.h |= 0x40000000;

@*Floating point remainders. In this section we implement the remainder

@*Floating point remainders. In this section we implement the remainder

of the floating point operations---one of which happens to be the

of the floating point operations---one of which happens to be the

operation of taking the remainder.

operation of taking the remainder.

The easiest task remaining is to compare two floating point quantities.

The easiest task remaining is to compare two floating point quantities.

Routine |fcomp| returns $-1$~if~$yz$, and

Routine |fcomp| returns $-1$~if~$yz$, and

$+2$~if $y$ and~$z$ are unordered.

$+2$~if $y$ and~$z$ are unordered.

@=

@=

int fcomp @,@,@[ARGS((octa,octa))@];@+@t}\6{@>

int fcomp @,@,@[ARGS((octa,octa))@];@+@t}\6{@>

int fcomp(y,z)

int fcomp(y,z)

  octa y,z;

  octa y,z;

  ftype yt,zt;

  ftype yt,zt;

  int ye,ze;

  int ye,ze;

  char ys,zs;

  char ys,zs;

  octa yf,zf;

  octa yf,zf;

  register int x;

  register int x;

  yt=funpack(y,&yf,&ye,&ys);

  yt=funpack(y,&yf,&ye,&ys);

  zt=funpack(z,&zf,&ze,&zs);

  zt=funpack(z,&zf,&ze,&zs);

  switch (4*yt+zt) {

  switch (4*yt+zt) {

 case 4*nan+nan: case 4*zro+nan: case 4*num+nan: case 4*inf+nan:

 case 4*nan+nan: case 4*zro+nan: case 4*num+nan: case 4*inf+nan:

 case 4*nan+zro: case 4*nan+num: case 4*nan+inf: return 2;

 case 4*nan+zro: case 4*nan+num: case 4*nan+inf: return 2;

 case 4*zro+zro: return 0;

 case 4*zro+zro: return 0;

 case 4*zro+num: case 4*num+zro: case 4*zro+inf: case 4*inf+zro:

 case 4*zro+num: case 4*num+zro: case 4*zro+inf: case 4*inf+zro:

 case 4*num+num: case 4*num+inf: case 4*inf+num: case 4*inf+inf:

 case 4*num+num: case 4*num+inf: case 4*inf+num: case 4*inf+inf:

  if (ys!=zs) x=1;

  if (ys!=zs) x=1;

  else if (y.h>z.h) x=1;

  else if (y.h>z.h) x=1;

  else if (y.h

  else if (y.h

  else if (y.l>z.l) x=1;

  else if (y.l>z.l) x=1;

  else if (y.l

  else if (y.l

  else return 0;

  else return 0;

  break;

  break;

  return (ys=='-'? -x: x);

  return (ys=='-'? -x: x);

@ Several \MMIX\ operations act on a single floating point number and

@ Several \MMIX\ operations act on a single floating point number and

accept an arbitrary rounding mode. For example, consider the

accept an arbitrary rounding mode. For example, consider the

operation of rounding to the nearest floating point integer:

operation of rounding to the nearest floating point integer:

@=

@=

octa fintegerize @,@,@[ARGS((octa,int))@];@+@t}\6{@>

octa fintegerize @,@,@[ARGS((octa,int))@];@+@t}\6{@>

octa fintegerize(z,r)

octa fintegerize(z,r)

  octa z; /* the operand */

  octa z; /* the operand */

  int r; /* the rounding mode */

  int r; /* the rounding mode */

  ftype zt;

  ftype zt;

  int ze;

  int ze;

  char zs;

  char zs;

  octa xf,zf;

  octa xf,zf;

  zt=funpack(z,&zf,&ze,&zs);

  zt=funpack(z,&zf,&ze,&zs);

  if (!r) r=cur_round;

  if (!r) r=cur_round;

  switch (zt) {

  switch (zt) {

 case nan:@+if (!(z.h&0x80000)) {@+exceptions|=I_BIT;@+z.h|=0x80000;@+}

 case nan:@+if (!(z.h&0x80000)) {@+exceptions|=I_BIT;@+z.h|=0x80000;@+}

 case inf: case zro: return z;

 case inf: case zro: return z;

 case num: @;

 case num: @;

@ @=

@ @=

if (ze>=1074) return fpack(zf,ze,zs,ROUND_OFF); /* already an integer */

if (ze>=1074) return fpack(zf,ze,zs,ROUND_OFF); /* already an integer */

if (ze<=1020) xf.h=0,xf.l=1;

if (ze<=1020) xf.h=0,xf.l=1;

else {@+octa oo;

else {@+octa oo;

  xf=shift_right(zf,1074-ze,1);

  xf=shift_right(zf,1074-ze,1);

  oo=shift_left(xf,1074-ze);

  oo=shift_left(xf,1074-ze);

  if (oo.l!=zf.l || oo.h!=zf.h) xf.l|=1; /* sticky bit */

  if (oo.l!=zf.l || oo.h!=zf.h) xf.l|=1; /* sticky bit */

@^sticky bit@>

@^sticky bit@>

switch (r) {

switch (r) {

 case ROUND_DOWN:@+ if (zs=='-') xf=incr(xf,3);@+break;

 case ROUND_DOWN:@+ if (zs=='-') xf=incr(xf,3);@+break;

 case ROUND_UP:@+ if (zs!='-') xf=incr(xf,3);

 case ROUND_UP:@+ if (zs!='-') xf=incr(xf,3);

 case ROUND_OFF: break;

 case ROUND_OFF: break;

 case ROUND_NEAR: xf=incr(xf, xf.l&4? 2: 1);@+break;

 case ROUND_NEAR: xf=incr(xf, xf.l&4? 2: 1);@+break;

xf.l&=0xfffffffc;

xf.l&=0xfffffffc;

if (ze>=1022) return fpack(shift_left(xf,1074-ze),ze,zs,ROUND_OFF);

if (ze>=1022) return fpack(shift_left(xf,1074-ze),ze,zs,ROUND_OFF);

if (xf.l) xf.h=0x3ff00000, xf.l=0;

if (xf.l) xf.h=0x3ff00000, xf.l=0;

if (zs=='-') xf.h|=sign_bit;

if (zs=='-') xf.h|=sign_bit;

return xf;

return xf;

@ To convert floating point to fixed point, we use |fixit|.

@ To convert floating point to fixed point, we use |fixit|.

@=

@=

octa fixit @,@,@[ARGS((octa,int))@];@+@t}\6{@>

octa fixit @,@,@[ARGS((octa,int))@];@+@t}\6{@>

octa fixit(z,r)

octa fixit(z,r)

  octa z; /* the operand */

  octa z; /* the operand */

  int r; /* the rounding mode */

  int r; /* the rounding mode */

  ftype zt;

  ftype zt;

  int ze;

  int ze;

  char zs;

  char zs;

  octa zf,o;

  octa zf,o;

  zt=funpack(z,&zf,&ze,&zs);

  zt=funpack(z,&zf,&ze,&zs);

  if (!r) r=cur_round;

  if (!r) r=cur_round;

  switch (zt) {

  switch (zt) {

 case nan: case inf: exceptions|=I_BIT;@+return z;

 case nan: case inf: exceptions|=I_BIT;@+return z;

 case zro: return zero_octa;

 case zro: return zero_octa;

 case num:@+if (funpack(fintegerize(z,r),&zf,&ze,&zs)==zro) return zero_octa;

 case num:@+if (funpack(fintegerize(z,r),&zf,&ze,&zs)==zro) return zero_octa;

   if (ze<=1076) o=shift_right(zf,1076-ze,1);

   if (ze<=1076) o=shift_right(zf,1076-ze,1);

   else {

   else {

     if (ze>1085 || (ze==1085 && (zf.h>0x400000 || @|

     if (ze>1085 || (ze==1085 && (zf.h>0x400000 || @|

           (zf.h==0x400000 && (zf.l || zs!='-'))))) exceptions|=W_BIT;

           (zf.h==0x400000 && (zf.l || zs!='-'))))) exceptions|=W_BIT;

     if (ze>=1140) return zero_octa;

     if (ze>=1140) return zero_octa;

     o=shift_left(zf,ze-1076);

     o=shift_left(zf,ze-1076);

  return (zs=='-'? ominus(zero_octa,o): o);

  return (zs=='-'? ominus(zero_octa,o): o);

@ Going the other way, we can specify not only a rounding mode but whether

@ Going the other way, we can specify not only a rounding mode but whether

the given fixed point octabyte is signed or unsigned, and whether the

the given fixed point octabyte is signed or unsigned, and whether the

result should be rounded to short precision.

result should be rounded to short precision.

@=

@=

octa floatit @,@,@[ARGS((octa,int,int,int))@];@+@t}\6{@>

octa floatit @,@,@[ARGS((octa,int,int,int))@];@+@t}\6{@>

octa floatit(z,r,u,p)

octa floatit(z,r,u,p)

  octa z; /* octabyte to float */

  octa z; /* octabyte to float */

  int r; /* rounding mode */

  int r; /* rounding mode */

  int u; /* unsigned? */

  int u; /* unsigned? */

  int p; /* short precision? */

  int p; /* short precision? */

  int e;@+char s;

  int e;@+char s;

  register int t;

  register int t;

  exceptions=0;

  exceptions=0;

  if (!z.h && !z.l) return zero_octa;

  if (!z.h && !z.l) return zero_octa;

  if (!r) r=cur_round;

  if (!r) r=cur_round;

  if (!u && (z.h&sign_bit)) s='-', z=ominus(zero_octa,z);@+ else s='+';

  if (!u && (z.h&sign_bit)) s='-', z=ominus(zero_octa,z);@+ else s='+';

  e=1076;

  e=1076;

  while (z.h<0x400000) e--,z=shift_left(z,1);

  while (z.h<0x400000) e--,z=shift_left(z,1);

  while (z.h>=0x800000) {

  while (z.h>=0x800000) {

    e++;

    e++;

    t=z.l&1;

    t=z.l&1;

    z=shift_right(z,1,1);

    z=shift_right(z,1,1);

    z.l|=t;

    z.l|=t;

  if (p) @;

  if (p) @;

  return fpack(z,e,s,r);

  return fpack(z,e,s,r);

@ @=

@ @=

  register int ex;@+register tetra t;

  register int ex;@+register tetra t;

  t=sfpack(z,e,s,r);

  t=sfpack(z,e,s,r);

  ex=exceptions;

  ex=exceptions;

  sfunpack(t,&z,&e,&s);

  sfunpack(t,&z,&e,&s);

  exceptions=ex;

  exceptions=ex;

@ The square root operation is more interesting.

@ The square root operation is more interesting.

@=

@=

octa froot @,@,@[ARGS((octa,int))@];@+@t}\6{@>

octa froot @,@,@[ARGS((octa,int))@];@+@t}\6{@>

octa froot(z,r)

octa froot(z,r)

  octa z; /* the operand */

  octa z; /* the operand */

  int r; /* the rounding mode */

  int r; /* the rounding mode */

  ftype zt;

  ftype zt;

  int ze;

  int ze;

  char zs;

  char zs;

  octa x,xf,rf,zf;

  octa x,xf,rf,zf;

  register int xe,k;

  register int xe,k;

  if (!r) r=cur_round;

  if (!r) r=cur_round;

  zt=funpack(z,&zf,&ze,&zs);

  zt=funpack(z,&zf,&ze,&zs);

  if (zs=='-' && zt!=zro) exceptions|=I_BIT, x=standard_NaN;

  if (zs=='-' && zt!=zro) exceptions|=I_BIT, x=standard_NaN;

  else@+switch (zt) {

  else@+switch (zt) {

 case nan:@+ if (!(z.h&0x80000)) exceptions|=I_BIT, z.h|=0x80000;

 case nan:@+ if (!(z.h&0x80000)) exceptions|=I_BIT, z.h|=0x80000;

  return z;

  return z;

 case inf: case zro: x=z;@+break;

 case inf: case zro: x=z;@+break;

 case num: @;

 case num: @;

  if (zs=='-') x.h|=sign_bit;

  if (zs=='-') x.h|=sign_bit;

  return x;

  return x;

@ The square root can be found by an adaptation of the old pencil-and-paper

@ The square root can be found by an adaptation of the old pencil-and-paper

method. If $n=\lfloor\sqrt s\rfloor$, where $s$ is an integer,

method. If $n=\lfloor\sqrt s\rfloor$, where $s$ is an integer,

we have $s=n^2+r$ where $0\le r\le2n$;

we have $s=n^2+r$ where $0\le r\le2n$;

this invariant can be maintained if we replace $s$ by $4s+(0,1,2,3)$

this invariant can be maintained if we replace $s$ by $4s+(0,1,2,3)$

and $n$ by $2n+(0,1)$. The following code implements this idea with

and $n$ by $2n+(0,1)$. The following code implements this idea with

$2n$ in~|xf| and $r$ in~|rf|. (It could easily be made to run about

$2n$ in~|xf| and $r$ in~|rf|. (It could easily be made to run about

twice as fast.)

twice as fast.)

@=

@=

xf.h=0, xf.l=2;

xf.h=0, xf.l=2;

xe=(ze+0x3fe)>>1;

xe=(ze+0x3fe)>>1;

if (ze&1) zf=shift_left(zf,1);

if (ze&1) zf=shift_left(zf,1);

rf.h=0, rf.l=(zf.h>>22)-1;

rf.h=0, rf.l=(zf.h>>22)-1;

for (k=53;k;k--) {

for (k=53;k;k--) {

  rf=shift_left(rf,2);@+ xf=shift_left(xf,1);

  rf=shift_left(rf,2);@+ xf=shift_left(xf,1);

  if (k>=43) rf=incr(rf,(zf.h>>(2*(k-43)))&3);

  if (k>=43) rf=incr(rf,(zf.h>>(2*(k-43)))&3);

  else if (k>=27) rf=incr(rf,(zf.l>>(2*(k-27)))&3);

  else if (k>=27) rf=incr(rf,(zf.l>>(2*(k-27)))&3);

  if ((rf.l>xf.l && rf.h>=xf.h) || rf.h>xf.h) {

  if ((rf.l>xf.l && rf.h>=xf.h) || rf.h>xf.h) {

    xf.l++;@+rf=ominus(rf,xf);@+xf.l++;

    xf.l++;@+rf=ominus(rf,xf);@+xf.l++;

if (rf.h || rf.l) xf.l++; /* sticky bit */

if (rf.h || rf.l) xf.l++; /* sticky bit */

return fpack(xf,xe,'+',r);

return fpack(xf,xe,'+',r);

@ And finally, the genuine floating point remainder. Subroutine |fremstep|

@ And finally, the genuine floating point remainder. Subroutine |fremstep|

either calculates $y\,{\rm rem}\,z$ or reduces $y$ to a smaller number

either calculates $y\,{\rm rem}\,z$ or reduces $y$ to a smaller number

having the same remainder with respect to~$z$. In the latter case

having the same remainder with respect to~$z$. In the latter case

the |E_BIT| is set in |exceptions|. A third parameter, |delta|,

the |E_BIT| is set in |exceptions|. A third parameter, |delta|,

gives a decrease in exponent that is acceptable for incomplete results;

gives a decrease in exponent that is acceptable for incomplete results;

if |delta| is sufficiently large, say 2500, the correct result will

if |delta| is sufficiently large, say 2500, the correct result will

always be obtained in one step of |fremstep|.

always be obtained in one step of |fremstep|.

@=

@=

octa fremstep @,@,@[ARGS((octa,octa,int))@];@+@t}\6{@>

octa fremstep @,@,@[ARGS((octa,octa,int))@];@+@t}\6{@>

octa fremstep(y,z,delta)

octa fremstep(y,z,delta)

  octa y,z;

  octa y,z;

  int delta;

  int delta;

  ftype yt,zt;

  ftype yt,zt;

  int ye,ze;

  int ye,ze;

  char xs,ys,zs;

  char xs,ys,zs;

  octa x,xf,yf,zf;

  octa x,xf,yf,zf;

  register int xe,thresh,odd;

  register int xe,thresh,odd;

  yt=funpack(y,&yf,&ye,&ys);

  yt=funpack(y,&yf,&ye,&ys);

  zt=funpack(z,&zf,&ze,&zs);

  zt=funpack(z,&zf,&ze,&zs);

  switch (4*yt+zt) {

  switch (4*yt+zt) {

 @t\4@>@;

 @t\4@>@;

 case 4*zro+zro: case 4*num+zro: case 4*inf+zro:

 case 4*zro+zro: case 4*num+zro: case 4*inf+zro:

 case 4*inf+num: case 4*inf+inf: x=standard_NaN;

 case 4*inf+num: case 4*inf+inf: x=standard_NaN;

  exceptions|=I_BIT;@+break;

  exceptions|=I_BIT;@+break;

 case 4*zro+num: case 4*zro+inf: case 4*num+inf: return y;

 case 4*zro+num: case 4*zro+inf: case 4*num+inf: return y;

 case 4*num+num: @;

 case 4*num+num: @;

 zero_out: x=zero_octa;

 zero_out: x=zero_octa;

  if (ys=='-') x.h|=sign_bit;

  if (ys=='-') x.h|=sign_bit;

  return x;

  return x;

@ If there's a huge difference in exponents and the remainder is nonzero,

@ If there's a huge difference in exponents and the remainder is nonzero,

this computation will take a long time. One could compute

this computation will take a long time. One could compute

$(2^ny)\,{\rm rem}\,z$ much more quickly for large~$n$ by using $O(\log n)$

$(2^ny)\,{\rm rem}\,z$ much more quickly for large~$n$ by using $O(\log n)$

multiplications modulo~$z$, but the floating remainder operation isn't

multiplications modulo~$z$, but the floating remainder operation isn't

important enough to justify such expensive hardware.

important enough to justify such expensive hardware.

Results of floating remainder are always exact, so the rounding mode

Results of floating remainder are always exact, so the rounding mode

is immaterial.

is immaterial.

@=

@=

odd=0; /* will be 1 if we've subtracted an odd multiple of~$z$ from $y$ */

odd=0; /* will be 1 if we've subtracted an odd multiple of~$z$ from $y$ */

thresh=ye-delta;

thresh=ye-delta;

if (thresh

if (thresh

while (ye>=thresh) @

while (ye>=thresh) @

   |goto zero_out| if the remainder is zero,

   |goto zero_out| if the remainder is zero,

   |goto try_complement| if appropriate@>;

   |goto try_complement| if appropriate@>;

if (ye>=ze) {

if (ye>=ze) {

  exceptions|=E_BIT;@+return fpack(yf,ye,ys,ROUND_OFF);

  exceptions|=E_BIT;@+return fpack(yf,ye,ys,ROUND_OFF);

if (ye

if (ye

yf=shift_right(yf,1,1);

yf=shift_right(yf,1,1);

try_complement: xf=ominus(zf,yf), xe=ze, xs='+' + '-' - ys;

try_complement: xf=ominus(zf,yf), xe=ze, xs='+' + '-' - ys;

if (xf.h>yf.h || (xf.h==yf.h && (xf.l>yf.l || (xf.l==yf.l && !odd))))

if (xf.h>yf.h || (xf.h==yf.h && (xf.l>yf.l || (xf.l==yf.l && !odd))))

  xf=yf, xs=ys;

  xf=yf, xs=ys;

while (xf.h<0x400000) xe--, xf=shift_left(xf,1);

while (xf.h<0x400000) xe--, xf=shift_left(xf,1);

return fpack(xf,xe,xs,ROUND_OFF);

return fpack(xf,xe,xs,ROUND_OFF);

@ Here we are careful not to change the sign of |y|, because a remainder

@ Here we are careful not to change the sign of |y|, because a remainder

of~0 is supposed to inherit the original sign of~|y|.

of~0 is supposed to inherit the original sign of~|y|.

@=

@=

  if (yf.h==zf.h && yf.l==zf.l) goto zero_out;

  if (yf.h==zf.h && yf.l==zf.l) goto zero_out;

  if (yf.h

  if (yf.h

    if (ye==ze) goto try_complement;

    if (ye==ze) goto try_complement;

    ye--, yf=shift_left(yf,1);

    ye--, yf=shift_left(yf,1);

  yf=ominus(yf,zf);

  yf=ominus(yf,zf);

  if (ye==ze) odd=1;

  if (ye==ze) odd=1;

  while (yf.h<0x400000) ye--, yf=shift_left(yf,1);

  while (yf.h<0x400000) ye--, yf=shift_left(yf,1);

@* Index.

@* Index.

Browse

Tools

Subversion Repositories eco32

[/] [eco32/] [tags/] [eco32-0.22/] [fp/] [implementation/] [arith/] [mmix-arith.w] - Diff between revs 15 and 21