URL https://opencores.org/ocsvn/or1k_old/or1k_old/trunk

Subversion Repositories or1k_old

[/] [or1k_old/] [trunk/] [rc203soc/] [sw/] [uClinux/] [arch/] [i386/] [math-emu/] [wm_sqrt.S] - Blame information for rev 1765

Go to most recent revision | Details | Compare with Previous | View Log


        .file   "wm_sqrt.S"
/*---------------------------------------------------------------------------+
 |  wm_sqrt.S                                                                |
 |                                                                           |
 | Fixed point arithmetic square root evaluation.                            |
 |                                                                           |
 | Copyright (C) 1992,1993,1995                                              |
 |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
 |                       Australia.  E-mail billm@jacobi.maths.monash.edu.au |
 |                                                                           |
 | Call from C as:                                                           |
 |   void wm_sqrt(FPU_REG *n, unsigned int control_word)                     |
 |                                                                           |
 +---------------------------------------------------------------------------*/
 
/*---------------------------------------------------------------------------+
 |  wm_sqrt(FPU_REG *n, unsigned int control_word)                           |
 |    returns the square root of n in n.                                     |
 |                                                                           |
 |  Use Newton's method to compute the square root of a number, which must   |
 |  be in the range  [1.0 .. 4.0),  to 64 bits accuracy.                     |
 |  Does not check the sign or tag of the argument.                          |
 |  Sets the exponent, but not the sign or tag of the result.                |
 |                                                                           |
 |  The guess is kept in %esi:%edi                                           |
 +---------------------------------------------------------------------------*/
 
#include "exception.h"
#include "fpu_emu.h"
 
 
#ifndef NON_REENTRANT_FPU
/*      Local storage on the stack: */
#define FPU_accum_3     -4(%ebp)        /* ms word */
#define FPU_accum_2     -8(%ebp)
#define FPU_accum_1     -12(%ebp)
#define FPU_accum_0     -16(%ebp)
 
/*
 * The de-normalised argument:
 *                  sq_2                  sq_1              sq_0
 *        b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
 *           ^ binary point here
 */
#define FPU_fsqrt_arg_2 -20(%ebp)       /* ms word */
#define FPU_fsqrt_arg_1 -24(%ebp)
#define FPU_fsqrt_arg_0 -28(%ebp)       /* ls word, at most the ms bit is set */
 
#else
/*      Local storage in a static area: */
.data
        .align 4,0
FPU_accum_3:
        .long   0                /* ms word */
FPU_accum_2:
        .long   0
FPU_accum_1:
        .long   0
FPU_accum_0:
        .long   0
 
/* The de-normalised argument:
                    sq_2                  sq_1              sq_0
          b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
             ^ binary point here
 */
FPU_fsqrt_arg_2:
        .long   0                /* ms word */
FPU_fsqrt_arg_1:
        .long   0
FPU_fsqrt_arg_0:
        .long   0                /* ls word, at most the ms bit is set */
#endif NON_REENTRANT_FPU
 
 
.text
ENTRY(wm_sqrt)
        pushl   %ebp
        movl    %esp,%ebp
#ifndef NON_REENTRANT_FPU
        subl    $28,%esp
#endif NON_REENTRANT_FPU
        pushl   %esi
        pushl   %edi
        pushl   %ebx
 
        movl    PARAM1,%esi
 
        movl    SIGH(%esi),%eax
        movl    SIGL(%esi),%ecx
        xorl    %edx,%edx
 
/* We use a rough linear estimate for the first guess.. */
 
        cmpl    EXP_BIAS,EXP(%esi)
        jnz     sqrt_arg_ge_2
 
        shrl    $1,%eax                 /* arg is in the range  [1.0 .. 2.0) */
        rcrl    $1,%ecx
        rcrl    $1,%edx
 
sqrt_arg_ge_2:
/* From here on, n is never accessed directly again until it is
   replaced by the answer. */
 
        movl    %eax,FPU_fsqrt_arg_2            /* ms word of n */
        movl    %ecx,FPU_fsqrt_arg_1
        movl    %edx,FPU_fsqrt_arg_0
 
/* Make a linear first estimate */
        shrl    $1,%eax
        addl    $0x40000000,%eax
        movl    $0xaaaaaaaa,%ecx
        mull    %ecx
        shll    %edx                    /* max result was 7fff... */
        testl   $0x80000000,%edx        /* but min was 3fff... */
        jnz     sqrt_prelim_no_adjust
 
        movl    $0x80000000,%edx        /* round up */
 
sqrt_prelim_no_adjust:
        movl    %edx,%esi       /* Our first guess */
 
/* We have now computed (approx)   (2 + x) / 3, which forms the basis
   for a few iterations of Newton's method */
 
        movl    FPU_fsqrt_arg_2,%ecx    /* ms word */
 
/*
 * From our initial estimate, three iterations are enough to get us
 * to 30 bits or so. This will then allow two iterations at better
 * precision to complete the process.
 */
 
/* Compute  (g + n/g)/2  at each iteration (g is the guess). */
        shrl    %ecx            /* Doing this first will prevent a divide */
                                /* overflow later. */
 
        movl    %ecx,%edx       /* msw of the arg / 2 */
        divl    %esi            /* current estimate */
        shrl    %esi            /* divide by 2 */
        addl    %eax,%esi       /* the new estimate */
 
        movl    %ecx,%edx
        divl    %esi
        shrl    %esi
        addl    %eax,%esi
 
        movl    %ecx,%edx
        divl    %esi
        shrl    %esi
        addl    %eax,%esi
 
/*
 * Now that an estimate accurate to about 30 bits has been obtained (in %esi),
 * we improve it to 60 bits or so.
 *
 * The strategy from now on is to compute new estimates from
 *      guess := guess + (n - guess^2) / (2 * guess)
 */
 
/* First, find the square of the guess */
        movl    %esi,%eax
        mull    %esi
/* guess^2 now in %edx:%eax */
 
        movl    FPU_fsqrt_arg_1,%ecx
        subl    %ecx,%eax
        movl    FPU_fsqrt_arg_2,%ecx    /* ms word of normalized n */
        sbbl    %ecx,%edx
        jnc     sqrt_stage_2_positive
 
/* Subtraction gives a negative result,
   negate the result before division. */
        notl    %edx
        notl    %eax
        addl    $1,%eax
        adcl    $0,%edx
 
        divl    %esi
        movl    %eax,%ecx
 
        movl    %edx,%eax
        divl    %esi
        jmp     sqrt_stage_2_finish
 
sqrt_stage_2_positive:
        divl    %esi
        movl    %eax,%ecx
 
        movl    %edx,%eax
        divl    %esi
 
        notl    %ecx
        notl    %eax
        addl    $1,%eax
        adcl    $0,%ecx
 
sqrt_stage_2_finish:
        sarl    $1,%ecx         /* divide by 2 */
        rcrl    $1,%eax
 
        /* Form the new estimate in %esi:%edi */
        movl    %eax,%edi
        addl    %ecx,%esi
 
        jnz     sqrt_stage_2_done       /* result should be [1..2) */
 
#ifdef PARANOID
/* It should be possible to get here only if the arg is ffff....ffff */
        cmp     $0xffffffff,FPU_fsqrt_arg_1
        jnz     sqrt_stage_2_error
#endif PARANOID
 
/* The best rounded result. */
        xorl    %eax,%eax
        decl    %eax
        movl    %eax,%edi
        movl    %eax,%esi
        movl    $0x7fffffff,%eax
        jmp     sqrt_round_result
 
#ifdef PARANOID
sqrt_stage_2_error:
        pushl   EX_INTERNAL|0x213
        call    EXCEPTION
#endif PARANOID
 
sqrt_stage_2_done:
 
/* Now the square root has been computed to better than 60 bits. */
 
/* Find the square of the guess. */
        movl    %edi,%eax               /* ls word of guess */
        mull    %edi
        movl    %edx,FPU_accum_1
 
        movl    %esi,%eax
        mull    %esi
        movl    %edx,FPU_accum_3
        movl    %eax,FPU_accum_2
 
        movl    %edi,%eax
        mull    %esi
        addl    %eax,FPU_accum_1
        adcl    %edx,FPU_accum_2
        adcl    $0,FPU_accum_3
 
/*      movl    %esi,%eax */
/*      mull    %edi */
        addl    %eax,FPU_accum_1
        adcl    %edx,FPU_accum_2
        adcl    $0,FPU_accum_3
 
/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */
 
        movl    FPU_fsqrt_arg_0,%eax            /* get normalized n */
        subl    %eax,FPU_accum_1
        movl    FPU_fsqrt_arg_1,%eax
        sbbl    %eax,FPU_accum_2
        movl    FPU_fsqrt_arg_2,%eax            /* ms word of normalized n */
        sbbl    %eax,FPU_accum_3
        jnc     sqrt_stage_3_positive
 
/* Subtraction gives a negative result,
   negate the result before division */
        notl    FPU_accum_1
        notl    FPU_accum_2
        notl    FPU_accum_3
        addl    $1,FPU_accum_1
        adcl    $0,FPU_accum_2
 
#ifdef PARANOID
        adcl    $0,FPU_accum_3  /* This must be zero */
        jz      sqrt_stage_3_no_error
 
sqrt_stage_3_error:
        pushl   EX_INTERNAL|0x207
        call    EXCEPTION
 
sqrt_stage_3_no_error:
#endif PARANOID
 
        movl    FPU_accum_2,%edx
        movl    FPU_accum_1,%eax
        divl    %esi
        movl    %eax,%ecx
 
        movl    %edx,%eax
        divl    %esi
 
        sarl    $1,%ecx         /* divide by 2 */
        rcrl    $1,%eax
 
        /* prepare to round the result */
 
        addl    %ecx,%edi
        adcl    $0,%esi
 
        jmp     sqrt_stage_3_finished
 
sqrt_stage_3_positive:
        movl    FPU_accum_2,%edx
        movl    FPU_accum_1,%eax
        divl    %esi
        movl    %eax,%ecx
 
        movl    %edx,%eax
        divl    %esi
 
        sarl    $1,%ecx         /* divide by 2 */
        rcrl    $1,%eax
 
        /* prepare to round the result */
 
        notl    %eax            /* Negate the correction term */
        notl    %ecx
        addl    $1,%eax
        adcl    $0,%ecx         /* carry here ==> correction == 0 */
        adcl    $0xffffffff,%esi
 
        addl    %ecx,%edi
        adcl    $0,%esi
 
sqrt_stage_3_finished:
 
/*
 * The result in %esi:%edi:%esi should be good to about 90 bits here,
 * and the rounding information here does not have sufficient accuracy
 * in a few rare cases.
 */
        cmpl    $0xffffffe0,%eax
        ja      sqrt_near_exact_x
 
        cmpl    $0x00000020,%eax
        jb      sqrt_near_exact
 
        cmpl    $0x7fffffe0,%eax
        jb      sqrt_round_result
 
        cmpl    $0x80000020,%eax
        jb      sqrt_get_more_precision
 
sqrt_round_result:
/* Set up for rounding operations */
        movl    %eax,%edx
        movl    %esi,%eax
        movl    %edi,%ebx
        movl    PARAM1,%edi
        movl    EXP_BIAS,EXP(%edi)      /* Result is in  [1.0 .. 2.0) */
        movl    PARAM2,%ecx
        jmp     fpu_reg_round_sqrt
 
 
sqrt_near_exact_x:
/* First, the estimate must be rounded up. */
        addl    $1,%edi
        adcl    $0,%esi
 
sqrt_near_exact:
/*
 * This is an easy case because x^1/2 is monotonic.
 * We need just find the square of our estimate, compare it
 * with the argument, and deduce whether our estimate is
 * above, below, or exact. We use the fact that the estimate
 * is known to be accurate to about 90 bits.
 */
        movl    %edi,%eax               /* ls word of guess */
        mull    %edi
        movl    %edx,%ebx               /* 2nd ls word of square */
        movl    %eax,%ecx               /* ls word of square */
 
        movl    %edi,%eax
        mull    %esi
        addl    %eax,%ebx
        addl    %eax,%ebx
 
#ifdef PARANOID
        cmp     $0xffffffb0,%ebx
        jb      sqrt_near_exact_ok
 
        cmp     $0x00000050,%ebx
        ja      sqrt_near_exact_ok
 
        pushl   EX_INTERNAL|0x214
        call    EXCEPTION
 
sqrt_near_exact_ok:
#endif PARANOID
 
        or      %ebx,%ebx
        js      sqrt_near_exact_small
 
        jnz     sqrt_near_exact_large
 
        or      %ebx,%edx
        jnz     sqrt_near_exact_large
 
/* Our estimate is exactly the right answer */
        xorl    %eax,%eax
        jmp     sqrt_round_result
 
sqrt_near_exact_small:
/* Our estimate is too small */
        movl    $0x000000ff,%eax
        jmp     sqrt_round_result
 
sqrt_near_exact_large:
/* Our estimate is too large, we need to decrement it */
        subl    $1,%edi
        sbbl    $0,%esi
        movl    $0xffffff00,%eax
        jmp     sqrt_round_result
 
 
sqrt_get_more_precision:
/* This case is almost the same as the above, except we start
   with an extra bit of precision in the estimate. */
        stc                     /* The extra bit. */
        rcll    $1,%edi         /* Shift the estimate left one bit */
        rcll    $1,%esi
 
        movl    %edi,%eax               /* ls word of guess */
        mull    %edi
        movl    %edx,%ebx               /* 2nd ls word of square */
        movl    %eax,%ecx               /* ls word of square */
 
        movl    %edi,%eax
        mull    %esi
        addl    %eax,%ebx
        addl    %eax,%ebx
 
/* Put our estimate back to its original value */
        stc                     /* The ms bit. */
        rcrl    $1,%esi         /* Shift the estimate left one bit */
        rcrl    $1,%edi
 
#ifdef PARANOID
        cmp     $0xffffff60,%ebx
        jb      sqrt_more_prec_ok
 
        cmp     $0x000000a0,%ebx
        ja      sqrt_more_prec_ok
 
        pushl   EX_INTERNAL|0x215
        call    EXCEPTION
 
sqrt_more_prec_ok:
#endif PARANOID
 
        or      %ebx,%ebx
        js      sqrt_more_prec_small
 
        jnz     sqrt_more_prec_large
 
        or      %ebx,%ecx
        jnz     sqrt_more_prec_large
 
/* Our estimate is exactly the right answer */
        movl    $0x80000000,%eax
        jmp     sqrt_round_result
 
sqrt_more_prec_small:
/* Our estimate is too small */
        movl    $0x800000ff,%eax
        jmp     sqrt_round_result
 
sqrt_more_prec_large:
/* Our estimate is too large */
        movl    $0x7fffff00,%eax
        jmp     sqrt_round_result

Browse

Tools

Subversion Repositories or1k_old

[/] [or1k_old/] [trunk/] [rc203soc/] [sw/] [uClinux/] [arch/] [i386/] [math-emu/] [wm_sqrt.S] - Blame information for rev 1765

Line No.	Rev	Author	Line
1	1623	jcastillo	`.file "wm_sqrt.S"`
2			`/*---------------------------------------------------------------------------+`
3			`\| wm_sqrt.S \|`
4			`\| \|`
5			`\| Fixed point arithmetic square root evaluation. \|`
6			`\| \|`
7			`\| Copyright (C) 1992,1993,1995 \|`
8			`\| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, \|`
9			`\| Australia. E-mail billm@jacobi.maths.monash.edu.au \|`
10			`\| \|`
11			`\| Call from C as: \|`
12			`\| void wm_sqrt(FPU_REG *n, unsigned int control_word) \|`
13			`\| \|`
14			`+---------------------------------------------------------------------------*/`
15
16			`/*---------------------------------------------------------------------------+`
17			`\| wm_sqrt(FPU_REG *n, unsigned int control_word) \|`
18			`\| returns the square root of n in n. \|`
19			`\| \|`
20			`\| Use Newton's method to compute the square root of a number, which must \|`
21			`\| be in the range [1.0 .. 4.0), to 64 bits accuracy. \|`
22			`\| Does not check the sign or tag of the argument. \|`
23			`\| Sets the exponent, but not the sign or tag of the result. \|`
24			`\| \|`
25			`\| The guess is kept in %esi:%edi \|`
26			`+---------------------------------------------------------------------------*/`
27
28			`#include "exception.h"`
29			`#include "fpu_emu.h"`
30
31
32			`#ifndef NON_REENTRANT_FPU`
33			`/* Local storage on the stack: */`
34			`#define FPU_accum_3 -4(%ebp) /* ms word */`
35			`#define FPU_accum_2 -8(%ebp)`
36			`#define FPU_accum_1 -12(%ebp)`
37			`#define FPU_accum_0 -16(%ebp)`
38
39			`/*`
40			`* The de-normalised argument:`
41			`* sq_2 sq_1 sq_0`
42			`* b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0`
43			`* ^ binary point here`
44			`*/`
45			`#define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */`
46			`#define FPU_fsqrt_arg_1 -24(%ebp)`
47			`#define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */`
48
49			`#else`
50			`/* Local storage in a static area: */`
51			`.data`
52			`.align 4,0`
53			`FPU_accum_3:`
54			`.long 0 /* ms word */`
55			`FPU_accum_2:`
56			`.long 0`
57			`FPU_accum_1:`
58			`.long 0`
59			`FPU_accum_0:`
60			`.long 0`
61
62			`/* The de-normalised argument:`
63			`sq_2 sq_1 sq_0`
64			`b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0`
65			`^ binary point here`
66			`*/`
67			`FPU_fsqrt_arg_2:`
68			`.long 0 /* ms word */`
69			`FPU_fsqrt_arg_1:`
70			`.long 0`
71			`FPU_fsqrt_arg_0:`
72			`.long 0 /* ls word, at most the ms bit is set */`
73			`#endif NON_REENTRANT_FPU`
74
75
76			`.text`
77			`ENTRY(wm_sqrt)`
78			`pushl %ebp`
79			`movl %esp,%ebp`
80			`#ifndef NON_REENTRANT_FPU`
81			`subl $28,%esp`
82			`#endif NON_REENTRANT_FPU`
83			`pushl %esi`
84			`pushl %edi`
85			`pushl %ebx`
86
87			`movl PARAM1,%esi`
88
89			`movl SIGH(%esi),%eax`
90			`movl SIGL(%esi),%ecx`
91			`xorl %edx,%edx`
92
93			`/* We use a rough linear estimate for the first guess.. */`
94
95			`cmpl EXP_BIAS,EXP(%esi)`
96			`jnz sqrt_arg_ge_2`
97
98			`shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */`
99			`rcrl $1,%ecx`
100			`rcrl $1,%edx`
101
102			`sqrt_arg_ge_2:`
103			`/* From here on, n is never accessed directly again until it is`
104			`replaced by the answer. */`
105
106			`movl %eax,FPU_fsqrt_arg_2 /* ms word of n */`
107			`movl %ecx,FPU_fsqrt_arg_1`
108			`movl %edx,FPU_fsqrt_arg_0`
109
110			`/* Make a linear first estimate */`
111			`shrl $1,%eax`
112			`addl $0x40000000,%eax`
113			`movl $0xaaaaaaaa,%ecx`
114			`mull %ecx`
115			`shll %edx /* max result was 7fff... */`
116			`testl $0x80000000,%edx /* but min was 3fff... */`
117			`jnz sqrt_prelim_no_adjust`
118
119			`movl $0x80000000,%edx /* round up */`
120
121			`sqrt_prelim_no_adjust:`
122			`movl %edx,%esi /* Our first guess */`
123
124			`/* We have now computed (approx) (2 + x) / 3, which forms the basis`
125			`for a few iterations of Newton's method */`
126
127			`movl FPU_fsqrt_arg_2,%ecx /* ms word */`
128
129			`/*`
130			`* From our initial estimate, three iterations are enough to get us`
131			`* to 30 bits or so. This will then allow two iterations at better`
132			`* precision to complete the process.`
133			`*/`
134
135			`/* Compute (g + n/g)/2 at each iteration (g is the guess). */`
136			`shrl %ecx /* Doing this first will prevent a divide */`
137			`/* overflow later. */`
138
139			`movl %ecx,%edx /* msw of the arg / 2 */`
140			`divl %esi /* current estimate */`
141			`shrl %esi /* divide by 2 */`
142			`addl %eax,%esi /* the new estimate */`
143
144			`movl %ecx,%edx`
145			`divl %esi`
146			`shrl %esi`
147			`addl %eax,%esi`
148
149			`movl %ecx,%edx`
150			`divl %esi`
151			`shrl %esi`
152			`addl %eax,%esi`
153
154			`/*`
155			`* Now that an estimate accurate to about 30 bits has been obtained (in %esi),`
156			`* we improve it to 60 bits or so.`
157			`*`
158			`* The strategy from now on is to compute new estimates from`
159			`* guess := guess + (n - guess^2) / (2 * guess)`
160			`*/`
161
162			`/* First, find the square of the guess */`
163			`movl %esi,%eax`
164			`mull %esi`
165			`/* guess^2 now in %edx:%eax */`
166
167			`movl FPU_fsqrt_arg_1,%ecx`
168			`subl %ecx,%eax`
169			`movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */`
170			`sbbl %ecx,%edx`
171			`jnc sqrt_stage_2_positive`
172
173			`/* Subtraction gives a negative result,`
174			`negate the result before division. */`
175			`notl %edx`
176			`notl %eax`
177			`addl $1,%eax`
178			`adcl $0,%edx`
179
180			`divl %esi`
181			`movl %eax,%ecx`
182
183			`movl %edx,%eax`
184			`divl %esi`
185			`jmp sqrt_stage_2_finish`
186
187			`sqrt_stage_2_positive:`
188			`divl %esi`
189			`movl %eax,%ecx`
190
191			`movl %edx,%eax`
192			`divl %esi`
193
194			`notl %ecx`
195			`notl %eax`
196			`addl $1,%eax`
197			`adcl $0,%ecx`
198
199			`sqrt_stage_2_finish:`
200			`sarl $1,%ecx /* divide by 2 */`
201			`rcrl $1,%eax`
202
203			`/* Form the new estimate in %esi:%edi */`
204			`movl %eax,%edi`
205			`addl %ecx,%esi`
206
207			`jnz sqrt_stage_2_done /* result should be [1..2) */`
208
209			`#ifdef PARANOID`
210			`/* It should be possible to get here only if the arg is ffff....ffff */`
211			`cmp $0xffffffff,FPU_fsqrt_arg_1`
212			`jnz sqrt_stage_2_error`
213			`#endif PARANOID`
214
215			`/* The best rounded result. */`
216			`xorl %eax,%eax`
217			`decl %eax`
218			`movl %eax,%edi`
219			`movl %eax,%esi`
220			`movl $0x7fffffff,%eax`
221			`jmp sqrt_round_result`
222
223			`#ifdef PARANOID`
224			`sqrt_stage_2_error:`
225			`pushl EX_INTERNAL\|0x213`
226			`call EXCEPTION`
227			`#endif PARANOID`
228
229			`sqrt_stage_2_done:`
230
231			`/* Now the square root has been computed to better than 60 bits. */`
232
233			`/* Find the square of the guess. */`
234			`movl %edi,%eax /* ls word of guess */`
235			`mull %edi`
236			`movl %edx,FPU_accum_1`
237
238			`movl %esi,%eax`
239			`mull %esi`
240			`movl %edx,FPU_accum_3`
241			`movl %eax,FPU_accum_2`
242
243			`movl %edi,%eax`
244			`mull %esi`
245			`addl %eax,FPU_accum_1`
246			`adcl %edx,FPU_accum_2`
247			`adcl $0,FPU_accum_3`
248
249			`/* movl %esi,%eax */`
250			`/* mull %edi */`
251			`addl %eax,FPU_accum_1`
252			`adcl %edx,FPU_accum_2`
253			`adcl $0,FPU_accum_3`
254
255			`/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */`
256
257			`movl FPU_fsqrt_arg_0,%eax /* get normalized n */`
258			`subl %eax,FPU_accum_1`
259			`movl FPU_fsqrt_arg_1,%eax`
260			`sbbl %eax,FPU_accum_2`
261			`movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */`
262			`sbbl %eax,FPU_accum_3`
263			`jnc sqrt_stage_3_positive`
264
265			`/* Subtraction gives a negative result,`
266			`negate the result before division */`
267			`notl FPU_accum_1`
268			`notl FPU_accum_2`
269			`notl FPU_accum_3`
270			`addl $1,FPU_accum_1`
271			`adcl $0,FPU_accum_2`
272
273			`#ifdef PARANOID`
274			`adcl $0,FPU_accum_3 /* This must be zero */`
275			`jz sqrt_stage_3_no_error`
276
277			`sqrt_stage_3_error:`
278			`pushl EX_INTERNAL\|0x207`
279			`call EXCEPTION`
280
281			`sqrt_stage_3_no_error:`
282			`#endif PARANOID`
283
284			`movl FPU_accum_2,%edx`
285			`movl FPU_accum_1,%eax`
286			`divl %esi`
287			`movl %eax,%ecx`
288
289			`movl %edx,%eax`
290			`divl %esi`
291
292			`sarl $1,%ecx /* divide by 2 */`
293			`rcrl $1,%eax`
294
295			`/* prepare to round the result */`
296
297			`addl %ecx,%edi`
298			`adcl $0,%esi`
299
300			`jmp sqrt_stage_3_finished`
301
302			`sqrt_stage_3_positive:`
303			`movl FPU_accum_2,%edx`
304			`movl FPU_accum_1,%eax`
305			`divl %esi`
306			`movl %eax,%ecx`
307
308			`movl %edx,%eax`
309			`divl %esi`
310
311			`sarl $1,%ecx /* divide by 2 */`
312			`rcrl $1,%eax`
313
314			`/* prepare to round the result */`
315
316			`notl %eax /* Negate the correction term */`
317			`notl %ecx`
318			`addl $1,%eax`
319			`adcl $0,%ecx /* carry here ==> correction == 0 */`
320			`adcl $0xffffffff,%esi`
321
322			`addl %ecx,%edi`
323			`adcl $0,%esi`
324
325			`sqrt_stage_3_finished:`
326
327			`/*`
328			`* The result in %esi:%edi:%esi should be good to about 90 bits here,`
329			`* and the rounding information here does not have sufficient accuracy`
330			`* in a few rare cases.`
331			`*/`
332			`cmpl $0xffffffe0,%eax`
333			`ja sqrt_near_exact_x`
334
335			`cmpl $0x00000020,%eax`
336			`jb sqrt_near_exact`
337
338			`cmpl $0x7fffffe0,%eax`
339			`jb sqrt_round_result`
340
341			`cmpl $0x80000020,%eax`
342			`jb sqrt_get_more_precision`
343
344			`sqrt_round_result:`
345			`/* Set up for rounding operations */`
346			`movl %eax,%edx`
347			`movl %esi,%eax`
348			`movl %edi,%ebx`
349			`movl PARAM1,%edi`
350			`movl EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */`
351			`movl PARAM2,%ecx`
352			`jmp fpu_reg_round_sqrt`
353
354
355			`sqrt_near_exact_x:`
356			`/* First, the estimate must be rounded up. */`
357			`addl $1,%edi`
358			`adcl $0,%esi`
359
360			`sqrt_near_exact:`
361			`/*`
362			`* This is an easy case because x^1/2 is monotonic.`
363			`* We need just find the square of our estimate, compare it`
364			`* with the argument, and deduce whether our estimate is`
365			`* above, below, or exact. We use the fact that the estimate`
366			`* is known to be accurate to about 90 bits.`
367			`*/`
368			`movl %edi,%eax /* ls word of guess */`
369			`mull %edi`
370			`movl %edx,%ebx /* 2nd ls word of square */`
371			`movl %eax,%ecx /* ls word of square */`
372
373			`movl %edi,%eax`
374			`mull %esi`
375			`addl %eax,%ebx`
376			`addl %eax,%ebx`
377
378			`#ifdef PARANOID`
379			`cmp $0xffffffb0,%ebx`
380			`jb sqrt_near_exact_ok`
381
382			`cmp $0x00000050,%ebx`
383			`ja sqrt_near_exact_ok`
384
385			`pushl EX_INTERNAL\|0x214`
386			`call EXCEPTION`
387
388			`sqrt_near_exact_ok:`
389			`#endif PARANOID`
390
391			`or %ebx,%ebx`
392			`js sqrt_near_exact_small`
393
394			`jnz sqrt_near_exact_large`
395
396			`or %ebx,%edx`
397			`jnz sqrt_near_exact_large`
398
399			`/* Our estimate is exactly the right answer */`
400			`xorl %eax,%eax`
401			`jmp sqrt_round_result`
402
403			`sqrt_near_exact_small:`
404			`/* Our estimate is too small */`
405			`movl $0x000000ff,%eax`
406			`jmp sqrt_round_result`
407
408			`sqrt_near_exact_large:`
409			`/* Our estimate is too large, we need to decrement it */`
410			`subl $1,%edi`
411			`sbbl $0,%esi`
412			`movl $0xffffff00,%eax`
413			`jmp sqrt_round_result`
414
415
416			`sqrt_get_more_precision:`
417			`/* This case is almost the same as the above, except we start`
418			`with an extra bit of precision in the estimate. */`
419			`stc /* The extra bit. */`
420			`rcll $1,%edi /* Shift the estimate left one bit */`
421			`rcll $1,%esi`
422
423			`movl %edi,%eax /* ls word of guess */`
424			`mull %edi`
425			`movl %edx,%ebx /* 2nd ls word of square */`
426			`movl %eax,%ecx /* ls word of square */`
427
428			`movl %edi,%eax`
429			`mull %esi`
430			`addl %eax,%ebx`
431			`addl %eax,%ebx`
432
433			`/* Put our estimate back to its original value */`
434			`stc /* The ms bit. */`
435			`rcrl $1,%esi /* Shift the estimate left one bit */`
436			`rcrl $1,%edi`
437
438			`#ifdef PARANOID`
439			`cmp $0xffffff60,%ebx`
440			`jb sqrt_more_prec_ok`
441
442			`cmp $0x000000a0,%ebx`
443			`ja sqrt_more_prec_ok`
444
445			`pushl EX_INTERNAL\|0x215`
446			`call EXCEPTION`
447
448			`sqrt_more_prec_ok:`
449			`#endif PARANOID`
450
451			`or %ebx,%ebx`
452			`js sqrt_more_prec_small`
453
454			`jnz sqrt_more_prec_large`
455
456			`or %ebx,%ecx`
457			`jnz sqrt_more_prec_large`
458
459			`/* Our estimate is exactly the right answer */`
460			`movl $0x80000000,%eax`
461			`jmp sqrt_round_result`
462
463			`sqrt_more_prec_small:`
464			`/* Our estimate is too small */`
465			`movl $0x800000ff,%eax`
466			`jmp sqrt_round_result`
467
468			`sqrt_more_prec_large:`
469			`/* Our estimate is too large */`
470			`movl $0x7fffff00,%eax`
471			`jmp sqrt_round_result`