URL https://opencores.org/ocsvn/or1k/or1k/trunk
Subversion Repositories or1k

[/] [or1k/] [trunk/] [uclinux/] [uClinux-2.0.x/] [arch/] [i386/] [math-emu/] [wm_sqrt.S] - Rev 1765

Compare with Previous | Blame | View Log
        .file   "wm_sqrt.S"
/*---------------------------------------------------------------------------+
 |  wm_sqrt.S                                                                |
 |                                                                           |
 | Fixed point arithmetic square root evaluation.                            |
 |                                                                           |
 | Copyright (C) 1992,1993,1995                                              |
 |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
 |                       Australia.  E-mail billm@jacobi.maths.monash.edu.au |
 |                                                                           |
 | Call from C as:                                                           |
 |   void wm_sqrt(FPU_REG *n, unsigned int control_word)                     |
 |                                                                           |
 +---------------------------------------------------------------------------*/

/*---------------------------------------------------------------------------+
 |  wm_sqrt(FPU_REG *n, unsigned int control_word)                           |
 |    returns the square root of n in n.                                     |
 |                                                                           |
 |  Use Newton's method to compute the square root of a number, which must   |
 |  be in the range  [1.0 .. 4.0),  to 64 bits accuracy.                     |
 |  Does not check the sign or tag of the argument.                          |
 |  Sets the exponent, but not the sign or tag of the result.                |
 |                                                                           |
 |  The guess is kept in %esi:%edi                                           |
 +---------------------------------------------------------------------------*/

#include "exception.h"
#include "fpu_emu.h"


#ifndef NON_REENTRANT_FPU
/*      Local storage on the stack: */
#define FPU_accum_3     -4(%ebp)        /* ms word */
#define FPU_accum_2     -8(%ebp)
#define FPU_accum_1     -12(%ebp)
#define FPU_accum_0     -16(%ebp)

/*
 * The de-normalised argument:
 *                  sq_2                  sq_1              sq_0
 *        b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
 *           ^ binary point here
 */
#define FPU_fsqrt_arg_2 -20(%ebp)       /* ms word */
#define FPU_fsqrt_arg_1 -24(%ebp)
#define FPU_fsqrt_arg_0 -28(%ebp)       /* ls word, at most the ms bit is set */

#else
/*      Local storage in a static area: */
.data
        .align 4,0
FPU_accum_3:
        .long   0                /* ms word */
FPU_accum_2:
        .long   0
FPU_accum_1:
        .long   0
FPU_accum_0:
        .long   0

/* The de-normalised argument:
                    sq_2                  sq_1              sq_0
          b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
             ^ binary point here
 */
FPU_fsqrt_arg_2:
        .long   0                /* ms word */
FPU_fsqrt_arg_1:
        .long   0
FPU_fsqrt_arg_0:
        .long   0                /* ls word, at most the ms bit is set */
#endif NON_REENTRANT_FPU


.text
ENTRY(wm_sqrt)
        pushl   %ebp
        movl    %esp,%ebp
#ifndef NON_REENTRANT_FPU
        subl    $28,%esp
#endif NON_REENTRANT_FPU
        pushl   %esi
        pushl   %edi
        pushl   %ebx

        movl    PARAM1,%esi

        movl    SIGH(%esi),%eax
        movl    SIGL(%esi),%ecx
        xorl    %edx,%edx

/* We use a rough linear estimate for the first guess.. */

        cmpl    EXP_BIAS,EXP(%esi)
        jnz     sqrt_arg_ge_2

        shrl    $1,%eax                 /* arg is in the range  [1.0 .. 2.0) */
        rcrl    $1,%ecx
        rcrl    $1,%edx

sqrt_arg_ge_2:
/* From here on, n is never accessed directly again until it is
   replaced by the answer. */

        movl    %eax,FPU_fsqrt_arg_2            /* ms word of n */
        movl    %ecx,FPU_fsqrt_arg_1
        movl    %edx,FPU_fsqrt_arg_0

/* Make a linear first estimate */
        shrl    $1,%eax
        addl    $0x40000000,%eax
        movl    $0xaaaaaaaa,%ecx
        mull    %ecx
        shll    %edx                    /* max result was 7fff... */
        testl   $0x80000000,%edx        /* but min was 3fff... */
        jnz     sqrt_prelim_no_adjust

        movl    $0x80000000,%edx        /* round up */

sqrt_prelim_no_adjust:
        movl    %edx,%esi       /* Our first guess */

/* We have now computed (approx)   (2 + x) / 3, which forms the basis
   for a few iterations of Newton's method */

        movl    FPU_fsqrt_arg_2,%ecx    /* ms word */

/*
 * From our initial estimate, three iterations are enough to get us
 * to 30 bits or so. This will then allow two iterations at better
 * precision to complete the process.
 */

/* Compute  (g + n/g)/2  at each iteration (g is the guess). */
        shrl    %ecx            /* Doing this first will prevent a divide */
                                /* overflow later. */

        movl    %ecx,%edx       /* msw of the arg / 2 */
        divl    %esi            /* current estimate */
        shrl    %esi            /* divide by 2 */
        addl    %eax,%esi       /* the new estimate */

        movl    %ecx,%edx
        divl    %esi
        shrl    %esi
        addl    %eax,%esi

        movl    %ecx,%edx
        divl    %esi
        shrl    %esi
        addl    %eax,%esi

/*
 * Now that an estimate accurate to about 30 bits has been obtained (in %esi),
 * we improve it to 60 bits or so.
 *
 * The strategy from now on is to compute new estimates from
 *      guess := guess + (n - guess^2) / (2 * guess)
 */

/* First, find the square of the guess */
        movl    %esi,%eax
        mull    %esi
/* guess^2 now in %edx:%eax */

        movl    FPU_fsqrt_arg_1,%ecx
        subl    %ecx,%eax
        movl    FPU_fsqrt_arg_2,%ecx    /* ms word of normalized n */
        sbbl    %ecx,%edx
        jnc     sqrt_stage_2_positive

/* Subtraction gives a negative result,
   negate the result before division. */
        notl    %edx
        notl    %eax
        addl    $1,%eax
        adcl    $0,%edx

        divl    %esi
        movl    %eax,%ecx

        movl    %edx,%eax
        divl    %esi
        jmp     sqrt_stage_2_finish

sqrt_stage_2_positive:
        divl    %esi
        movl    %eax,%ecx

        movl    %edx,%eax
        divl    %esi

        notl    %ecx
        notl    %eax
        addl    $1,%eax
        adcl    $0,%ecx

sqrt_stage_2_finish:
        sarl    $1,%ecx         /* divide by 2 */
        rcrl    $1,%eax

        /* Form the new estimate in %esi:%edi */
        movl    %eax,%edi
        addl    %ecx,%esi

        jnz     sqrt_stage_2_done       /* result should be [1..2) */

#ifdef PARANOID
/* It should be possible to get here only if the arg is ffff....ffff */
        cmp     $0xffffffff,FPU_fsqrt_arg_1
        jnz     sqrt_stage_2_error
#endif PARANOID

/* The best rounded result. */
        xorl    %eax,%eax
        decl    %eax
        movl    %eax,%edi
        movl    %eax,%esi
        movl    $0x7fffffff,%eax
        jmp     sqrt_round_result

#ifdef PARANOID
sqrt_stage_2_error:
        pushl   EX_INTERNAL|0x213
        call    EXCEPTION
#endif PARANOID

sqrt_stage_2_done:

/* Now the square root has been computed to better than 60 bits. */

/* Find the square of the guess. */
        movl    %edi,%eax               /* ls word of guess */
        mull    %edi
        movl    %edx,FPU_accum_1

        movl    %esi,%eax
        mull    %esi
        movl    %edx,FPU_accum_3
        movl    %eax,FPU_accum_2

        movl    %edi,%eax
        mull    %esi
        addl    %eax,FPU_accum_1
        adcl    %edx,FPU_accum_2
        adcl    $0,FPU_accum_3

/*      movl    %esi,%eax */
/*      mull    %edi */
        addl    %eax,FPU_accum_1
        adcl    %edx,FPU_accum_2
        adcl    $0,FPU_accum_3

/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */

        movl    FPU_fsqrt_arg_0,%eax            /* get normalized n */
        subl    %eax,FPU_accum_1
        movl    FPU_fsqrt_arg_1,%eax
        sbbl    %eax,FPU_accum_2
        movl    FPU_fsqrt_arg_2,%eax            /* ms word of normalized n */
        sbbl    %eax,FPU_accum_3
        jnc     sqrt_stage_3_positive

/* Subtraction gives a negative result,
   negate the result before division */
        notl    FPU_accum_1
        notl    FPU_accum_2
        notl    FPU_accum_3
        addl    $1,FPU_accum_1
        adcl    $0,FPU_accum_2

#ifdef PARANOID
        adcl    $0,FPU_accum_3  /* This must be zero */
        jz      sqrt_stage_3_no_error

sqrt_stage_3_error:
        pushl   EX_INTERNAL|0x207
        call    EXCEPTION

sqrt_stage_3_no_error:
#endif PARANOID

        movl    FPU_accum_2,%edx
        movl    FPU_accum_1,%eax
        divl    %esi
        movl    %eax,%ecx

        movl    %edx,%eax
        divl    %esi

        sarl    $1,%ecx         /* divide by 2 */
        rcrl    $1,%eax

        /* prepare to round the result */

        addl    %ecx,%edi
        adcl    $0,%esi

        jmp     sqrt_stage_3_finished

sqrt_stage_3_positive:
        movl    FPU_accum_2,%edx
        movl    FPU_accum_1,%eax
        divl    %esi
        movl    %eax,%ecx

        movl    %edx,%eax
        divl    %esi

        sarl    $1,%ecx         /* divide by 2 */
        rcrl    $1,%eax

        /* prepare to round the result */

        notl    %eax            /* Negate the correction term */
        notl    %ecx
        addl    $1,%eax
        adcl    $0,%ecx         /* carry here ==> correction == 0 */
        adcl    $0xffffffff,%esi

        addl    %ecx,%edi
        adcl    $0,%esi

sqrt_stage_3_finished:

/*
 * The result in %esi:%edi:%esi should be good to about 90 bits here,
 * and the rounding information here does not have sufficient accuracy
 * in a few rare cases.
 */
        cmpl    $0xffffffe0,%eax
        ja      sqrt_near_exact_x

        cmpl    $0x00000020,%eax
        jb      sqrt_near_exact

        cmpl    $0x7fffffe0,%eax
        jb      sqrt_round_result

        cmpl    $0x80000020,%eax
        jb      sqrt_get_more_precision

sqrt_round_result:
/* Set up for rounding operations */
        movl    %eax,%edx
        movl    %esi,%eax
        movl    %edi,%ebx
        movl    PARAM1,%edi
        movl    EXP_BIAS,EXP(%edi)      /* Result is in  [1.0 .. 2.0) */
        movl    PARAM2,%ecx
        jmp     fpu_reg_round_sqrt


sqrt_near_exact_x:
/* First, the estimate must be rounded up. */
        addl    $1,%edi
        adcl    $0,%esi

sqrt_near_exact:
/*
 * This is an easy case because x^1/2 is monotonic.
 * We need just find the square of our estimate, compare it
 * with the argument, and deduce whether our estimate is
 * above, below, or exact. We use the fact that the estimate
 * is known to be accurate to about 90 bits.
 */
        movl    %edi,%eax               /* ls word of guess */
        mull    %edi
        movl    %edx,%ebx               /* 2nd ls word of square */
        movl    %eax,%ecx               /* ls word of square */

        movl    %edi,%eax
        mull    %esi
        addl    %eax,%ebx
        addl    %eax,%ebx

#ifdef PARANOID
        cmp     $0xffffffb0,%ebx
        jb      sqrt_near_exact_ok

        cmp     $0x00000050,%ebx
        ja      sqrt_near_exact_ok

        pushl   EX_INTERNAL|0x214
        call    EXCEPTION

sqrt_near_exact_ok:
#endif PARANOID

        or      %ebx,%ebx
        js      sqrt_near_exact_small

        jnz     sqrt_near_exact_large

        or      %ebx,%edx
        jnz     sqrt_near_exact_large

/* Our estimate is exactly the right answer */
        xorl    %eax,%eax
        jmp     sqrt_round_result

sqrt_near_exact_small:
/* Our estimate is too small */
        movl    $0x000000ff,%eax
        jmp     sqrt_round_result
        
sqrt_near_exact_large:
/* Our estimate is too large, we need to decrement it */
        subl    $1,%edi
        sbbl    $0,%esi
        movl    $0xffffff00,%eax
        jmp     sqrt_round_result


sqrt_get_more_precision:
/* This case is almost the same as the above, except we start
   with an extra bit of precision in the estimate. */
        stc                     /* The extra bit. */
        rcll    $1,%edi         /* Shift the estimate left one bit */
        rcll    $1,%esi

        movl    %edi,%eax               /* ls word of guess */
        mull    %edi
        movl    %edx,%ebx               /* 2nd ls word of square */
        movl    %eax,%ecx               /* ls word of square */

        movl    %edi,%eax
        mull    %esi
        addl    %eax,%ebx
        addl    %eax,%ebx

/* Put our estimate back to its original value */
        stc                     /* The ms bit. */
        rcrl    $1,%esi         /* Shift the estimate left one bit */
        rcrl    $1,%edi

#ifdef PARANOID
        cmp     $0xffffff60,%ebx
        jb      sqrt_more_prec_ok

        cmp     $0x000000a0,%ebx
        ja      sqrt_more_prec_ok

        pushl   EX_INTERNAL|0x215
        call    EXCEPTION

sqrt_more_prec_ok:
#endif PARANOID

        or      %ebx,%ebx
        js      sqrt_more_prec_small

        jnz     sqrt_more_prec_large

        or      %ebx,%ecx
        jnz     sqrt_more_prec_large

/* Our estimate is exactly the right answer */
        movl    $0x80000000,%eax
        jmp     sqrt_round_result

sqrt_more_prec_small:
/* Our estimate is too small */
        movl    $0x800000ff,%eax
        jmp     sqrt_round_result
        
sqrt_more_prec_large:
/* Our estimate is too large */
        movl    $0x7fffff00,%eax
        jmp     sqrt_round_result
Compare with Previous | Blame | View Log
Browse

Tools

Subversion Repositories or1k

[/] [or1k/] [trunk/] [uclinux/] [uClinux-2.0.x/] [arch/] [i386/] [math-emu/] [wm_sqrt.S] - Rev 1765