OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [uclinux/] [uClinux-2.0.x/] [arch/] [i386/] [math-emu/] [wm_sqrt.S] - Blame information for rev 1765

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 199 simons
        .file   "wm_sqrt.S"
2
/*---------------------------------------------------------------------------+
3
 |  wm_sqrt.S                                                                |
4
 |                                                                           |
5
 | Fixed point arithmetic square root evaluation.                            |
6
 |                                                                           |
7
 | Copyright (C) 1992,1993,1995                                              |
8
 |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
9
 |                       Australia.  E-mail billm@jacobi.maths.monash.edu.au |
10
 |                                                                           |
11
 | Call from C as:                                                           |
12
 |   void wm_sqrt(FPU_REG *n, unsigned int control_word)                     |
13
 |                                                                           |
14
 +---------------------------------------------------------------------------*/
15
 
16
/*---------------------------------------------------------------------------+
17
 |  wm_sqrt(FPU_REG *n, unsigned int control_word)                           |
18
 |    returns the square root of n in n.                                     |
19
 |                                                                           |
20
 |  Use Newton's method to compute the square root of a number, which must   |
21
 |  be in the range  [1.0 .. 4.0),  to 64 bits accuracy.                     |
22
 |  Does not check the sign or tag of the argument.                          |
23
 |  Sets the exponent, but not the sign or tag of the result.                |
24
 |                                                                           |
25
 |  The guess is kept in %esi:%edi                                           |
26
 +---------------------------------------------------------------------------*/
27
 
28
#include "exception.h"
29
#include "fpu_emu.h"
30
 
31
 
32
#ifndef NON_REENTRANT_FPU
33
/*      Local storage on the stack: */
34
#define FPU_accum_3     -4(%ebp)        /* ms word */
35
#define FPU_accum_2     -8(%ebp)
36
#define FPU_accum_1     -12(%ebp)
37
#define FPU_accum_0     -16(%ebp)
38
 
39
/*
40
 * The de-normalised argument:
41
 *                  sq_2                  sq_1              sq_0
42
 *        b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
43
 *           ^ binary point here
44
 */
45
#define FPU_fsqrt_arg_2 -20(%ebp)       /* ms word */
46
#define FPU_fsqrt_arg_1 -24(%ebp)
47
#define FPU_fsqrt_arg_0 -28(%ebp)       /* ls word, at most the ms bit is set */
48
 
49
#else
50
/*      Local storage in a static area: */
51
.data
52
        .align 4,0
53
FPU_accum_3:
54
        .long   0                /* ms word */
55
FPU_accum_2:
56
        .long   0
57
FPU_accum_1:
58
        .long   0
59
FPU_accum_0:
60
        .long   0
61
 
62
/* The de-normalised argument:
63
                    sq_2                  sq_1              sq_0
64
          b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
65
             ^ binary point here
66
 */
67
FPU_fsqrt_arg_2:
68
        .long   0                /* ms word */
69
FPU_fsqrt_arg_1:
70
        .long   0
71
FPU_fsqrt_arg_0:
72
        .long   0                /* ls word, at most the ms bit is set */
73
#endif NON_REENTRANT_FPU
74
 
75
 
76
.text
77
ENTRY(wm_sqrt)
78
        pushl   %ebp
79
        movl    %esp,%ebp
80
#ifndef NON_REENTRANT_FPU
81
        subl    $28,%esp
82
#endif NON_REENTRANT_FPU
83
        pushl   %esi
84
        pushl   %edi
85
        pushl   %ebx
86
 
87
        movl    PARAM1,%esi
88
 
89
        movl    SIGH(%esi),%eax
90
        movl    SIGL(%esi),%ecx
91
        xorl    %edx,%edx
92
 
93
/* We use a rough linear estimate for the first guess.. */
94
 
95
        cmpl    EXP_BIAS,EXP(%esi)
96
        jnz     sqrt_arg_ge_2
97
 
98
        shrl    $1,%eax                 /* arg is in the range  [1.0 .. 2.0) */
99
        rcrl    $1,%ecx
100
        rcrl    $1,%edx
101
 
102
sqrt_arg_ge_2:
103
/* From here on, n is never accessed directly again until it is
104
   replaced by the answer. */
105
 
106
        movl    %eax,FPU_fsqrt_arg_2            /* ms word of n */
107
        movl    %ecx,FPU_fsqrt_arg_1
108
        movl    %edx,FPU_fsqrt_arg_0
109
 
110
/* Make a linear first estimate */
111
        shrl    $1,%eax
112
        addl    $0x40000000,%eax
113
        movl    $0xaaaaaaaa,%ecx
114
        mull    %ecx
115
        shll    %edx                    /* max result was 7fff... */
116
        testl   $0x80000000,%edx        /* but min was 3fff... */
117
        jnz     sqrt_prelim_no_adjust
118
 
119
        movl    $0x80000000,%edx        /* round up */
120
 
121
sqrt_prelim_no_adjust:
122
        movl    %edx,%esi       /* Our first guess */
123
 
124
/* We have now computed (approx)   (2 + x) / 3, which forms the basis
125
   for a few iterations of Newton's method */
126
 
127
        movl    FPU_fsqrt_arg_2,%ecx    /* ms word */
128
 
129
/*
130
 * From our initial estimate, three iterations are enough to get us
131
 * to 30 bits or so. This will then allow two iterations at better
132
 * precision to complete the process.
133
 */
134
 
135
/* Compute  (g + n/g)/2  at each iteration (g is the guess). */
136
        shrl    %ecx            /* Doing this first will prevent a divide */
137
                                /* overflow later. */
138
 
139
        movl    %ecx,%edx       /* msw of the arg / 2 */
140
        divl    %esi            /* current estimate */
141
        shrl    %esi            /* divide by 2 */
142
        addl    %eax,%esi       /* the new estimate */
143
 
144
        movl    %ecx,%edx
145
        divl    %esi
146
        shrl    %esi
147
        addl    %eax,%esi
148
 
149
        movl    %ecx,%edx
150
        divl    %esi
151
        shrl    %esi
152
        addl    %eax,%esi
153
 
154
/*
155
 * Now that an estimate accurate to about 30 bits has been obtained (in %esi),
156
 * we improve it to 60 bits or so.
157
 *
158
 * The strategy from now on is to compute new estimates from
159
 *      guess := guess + (n - guess^2) / (2 * guess)
160
 */
161
 
162
/* First, find the square of the guess */
163
        movl    %esi,%eax
164
        mull    %esi
165
/* guess^2 now in %edx:%eax */
166
 
167
        movl    FPU_fsqrt_arg_1,%ecx
168
        subl    %ecx,%eax
169
        movl    FPU_fsqrt_arg_2,%ecx    /* ms word of normalized n */
170
        sbbl    %ecx,%edx
171
        jnc     sqrt_stage_2_positive
172
 
173
/* Subtraction gives a negative result,
174
   negate the result before division. */
175
        notl    %edx
176
        notl    %eax
177
        addl    $1,%eax
178
        adcl    $0,%edx
179
 
180
        divl    %esi
181
        movl    %eax,%ecx
182
 
183
        movl    %edx,%eax
184
        divl    %esi
185
        jmp     sqrt_stage_2_finish
186
 
187
sqrt_stage_2_positive:
188
        divl    %esi
189
        movl    %eax,%ecx
190
 
191
        movl    %edx,%eax
192
        divl    %esi
193
 
194
        notl    %ecx
195
        notl    %eax
196
        addl    $1,%eax
197
        adcl    $0,%ecx
198
 
199
sqrt_stage_2_finish:
200
        sarl    $1,%ecx         /* divide by 2 */
201
        rcrl    $1,%eax
202
 
203
        /* Form the new estimate in %esi:%edi */
204
        movl    %eax,%edi
205
        addl    %ecx,%esi
206
 
207
        jnz     sqrt_stage_2_done       /* result should be [1..2) */
208
 
209
#ifdef PARANOID
210
/* It should be possible to get here only if the arg is ffff....ffff */
211
        cmp     $0xffffffff,FPU_fsqrt_arg_1
212
        jnz     sqrt_stage_2_error
213
#endif PARANOID
214
 
215
/* The best rounded result. */
216
        xorl    %eax,%eax
217
        decl    %eax
218
        movl    %eax,%edi
219
        movl    %eax,%esi
220
        movl    $0x7fffffff,%eax
221
        jmp     sqrt_round_result
222
 
223
#ifdef PARANOID
224
sqrt_stage_2_error:
225
        pushl   EX_INTERNAL|0x213
226
        call    EXCEPTION
227
#endif PARANOID
228
 
229
sqrt_stage_2_done:
230
 
231
/* Now the square root has been computed to better than 60 bits. */
232
 
233
/* Find the square of the guess. */
234
        movl    %edi,%eax               /* ls word of guess */
235
        mull    %edi
236
        movl    %edx,FPU_accum_1
237
 
238
        movl    %esi,%eax
239
        mull    %esi
240
        movl    %edx,FPU_accum_3
241
        movl    %eax,FPU_accum_2
242
 
243
        movl    %edi,%eax
244
        mull    %esi
245
        addl    %eax,FPU_accum_1
246
        adcl    %edx,FPU_accum_2
247
        adcl    $0,FPU_accum_3
248
 
249
/*      movl    %esi,%eax */
250
/*      mull    %edi */
251
        addl    %eax,FPU_accum_1
252
        adcl    %edx,FPU_accum_2
253
        adcl    $0,FPU_accum_3
254
 
255
/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */
256
 
257
        movl    FPU_fsqrt_arg_0,%eax            /* get normalized n */
258
        subl    %eax,FPU_accum_1
259
        movl    FPU_fsqrt_arg_1,%eax
260
        sbbl    %eax,FPU_accum_2
261
        movl    FPU_fsqrt_arg_2,%eax            /* ms word of normalized n */
262
        sbbl    %eax,FPU_accum_3
263
        jnc     sqrt_stage_3_positive
264
 
265
/* Subtraction gives a negative result,
266
   negate the result before division */
267
        notl    FPU_accum_1
268
        notl    FPU_accum_2
269
        notl    FPU_accum_3
270
        addl    $1,FPU_accum_1
271
        adcl    $0,FPU_accum_2
272
 
273
#ifdef PARANOID
274
        adcl    $0,FPU_accum_3  /* This must be zero */
275
        jz      sqrt_stage_3_no_error
276
 
277
sqrt_stage_3_error:
278
        pushl   EX_INTERNAL|0x207
279
        call    EXCEPTION
280
 
281
sqrt_stage_3_no_error:
282
#endif PARANOID
283
 
284
        movl    FPU_accum_2,%edx
285
        movl    FPU_accum_1,%eax
286
        divl    %esi
287
        movl    %eax,%ecx
288
 
289
        movl    %edx,%eax
290
        divl    %esi
291
 
292
        sarl    $1,%ecx         /* divide by 2 */
293
        rcrl    $1,%eax
294
 
295
        /* prepare to round the result */
296
 
297
        addl    %ecx,%edi
298
        adcl    $0,%esi
299
 
300
        jmp     sqrt_stage_3_finished
301
 
302
sqrt_stage_3_positive:
303
        movl    FPU_accum_2,%edx
304
        movl    FPU_accum_1,%eax
305
        divl    %esi
306
        movl    %eax,%ecx
307
 
308
        movl    %edx,%eax
309
        divl    %esi
310
 
311
        sarl    $1,%ecx         /* divide by 2 */
312
        rcrl    $1,%eax
313
 
314
        /* prepare to round the result */
315
 
316
        notl    %eax            /* Negate the correction term */
317
        notl    %ecx
318
        addl    $1,%eax
319
        adcl    $0,%ecx         /* carry here ==> correction == 0 */
320
        adcl    $0xffffffff,%esi
321
 
322
        addl    %ecx,%edi
323
        adcl    $0,%esi
324
 
325
sqrt_stage_3_finished:
326
 
327
/*
328
 * The result in %esi:%edi:%esi should be good to about 90 bits here,
329
 * and the rounding information here does not have sufficient accuracy
330
 * in a few rare cases.
331
 */
332
        cmpl    $0xffffffe0,%eax
333
        ja      sqrt_near_exact_x
334
 
335
        cmpl    $0x00000020,%eax
336
        jb      sqrt_near_exact
337
 
338
        cmpl    $0x7fffffe0,%eax
339
        jb      sqrt_round_result
340
 
341
        cmpl    $0x80000020,%eax
342
        jb      sqrt_get_more_precision
343
 
344
sqrt_round_result:
345
/* Set up for rounding operations */
346
        movl    %eax,%edx
347
        movl    %esi,%eax
348
        movl    %edi,%ebx
349
        movl    PARAM1,%edi
350
        movl    EXP_BIAS,EXP(%edi)      /* Result is in  [1.0 .. 2.0) */
351
        movl    PARAM2,%ecx
352
        jmp     fpu_reg_round_sqrt
353
 
354
 
355
sqrt_near_exact_x:
356
/* First, the estimate must be rounded up. */
357
        addl    $1,%edi
358
        adcl    $0,%esi
359
 
360
sqrt_near_exact:
361
/*
362
 * This is an easy case because x^1/2 is monotonic.
363
 * We need just find the square of our estimate, compare it
364
 * with the argument, and deduce whether our estimate is
365
 * above, below, or exact. We use the fact that the estimate
366
 * is known to be accurate to about 90 bits.
367
 */
368
        movl    %edi,%eax               /* ls word of guess */
369
        mull    %edi
370
        movl    %edx,%ebx               /* 2nd ls word of square */
371
        movl    %eax,%ecx               /* ls word of square */
372
 
373
        movl    %edi,%eax
374
        mull    %esi
375
        addl    %eax,%ebx
376
        addl    %eax,%ebx
377
 
378
#ifdef PARANOID
379
        cmp     $0xffffffb0,%ebx
380
        jb      sqrt_near_exact_ok
381
 
382
        cmp     $0x00000050,%ebx
383
        ja      sqrt_near_exact_ok
384
 
385
        pushl   EX_INTERNAL|0x214
386
        call    EXCEPTION
387
 
388
sqrt_near_exact_ok:
389
#endif PARANOID
390
 
391
        or      %ebx,%ebx
392
        js      sqrt_near_exact_small
393
 
394
        jnz     sqrt_near_exact_large
395
 
396
        or      %ebx,%edx
397
        jnz     sqrt_near_exact_large
398
 
399
/* Our estimate is exactly the right answer */
400
        xorl    %eax,%eax
401
        jmp     sqrt_round_result
402
 
403
sqrt_near_exact_small:
404
/* Our estimate is too small */
405
        movl    $0x000000ff,%eax
406
        jmp     sqrt_round_result
407
 
408
sqrt_near_exact_large:
409
/* Our estimate is too large, we need to decrement it */
410
        subl    $1,%edi
411
        sbbl    $0,%esi
412
        movl    $0xffffff00,%eax
413
        jmp     sqrt_round_result
414
 
415
 
416
sqrt_get_more_precision:
417
/* This case is almost the same as the above, except we start
418
   with an extra bit of precision in the estimate. */
419
        stc                     /* The extra bit. */
420
        rcll    $1,%edi         /* Shift the estimate left one bit */
421
        rcll    $1,%esi
422
 
423
        movl    %edi,%eax               /* ls word of guess */
424
        mull    %edi
425
        movl    %edx,%ebx               /* 2nd ls word of square */
426
        movl    %eax,%ecx               /* ls word of square */
427
 
428
        movl    %edi,%eax
429
        mull    %esi
430
        addl    %eax,%ebx
431
        addl    %eax,%ebx
432
 
433
/* Put our estimate back to its original value */
434
        stc                     /* The ms bit. */
435
        rcrl    $1,%esi         /* Shift the estimate left one bit */
436
        rcrl    $1,%edi
437
 
438
#ifdef PARANOID
439
        cmp     $0xffffff60,%ebx
440
        jb      sqrt_more_prec_ok
441
 
442
        cmp     $0x000000a0,%ebx
443
        ja      sqrt_more_prec_ok
444
 
445
        pushl   EX_INTERNAL|0x215
446
        call    EXCEPTION
447
 
448
sqrt_more_prec_ok:
449
#endif PARANOID
450
 
451
        or      %ebx,%ebx
452
        js      sqrt_more_prec_small
453
 
454
        jnz     sqrt_more_prec_large
455
 
456
        or      %ebx,%ecx
457
        jnz     sqrt_more_prec_large
458
 
459
/* Our estimate is exactly the right answer */
460
        movl    $0x80000000,%eax
461
        jmp     sqrt_round_result
462
 
463
sqrt_more_prec_small:
464
/* Our estimate is too small */
465
        movl    $0x800000ff,%eax
466
        jmp     sqrt_round_result
467
 
468
sqrt_more_prec_large:
469
/* Our estimate is too large */
470
        movl    $0x7fffff00,%eax
471
        jmp     sqrt_round_result

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.