URL https://opencores.org/ocsvn/ao486/ao486/trunk

Subversion Repositories ao486

[/] [ao486/] [trunk/] [syn/] [components/] [sd_card/] [firmware/] [bsp/] [HAL/] [src/] [alt_exception_muldiv.S] - Blame information for rev 8

Details | Compare with Previous | View Log


/******************************************************************************
*                                                                             *
* License Agreement                                                           *
*                                                                             *
* Copyright (c) 2003-2005 Altera Corporation, San Jose, California, USA.      *
* All rights reserved.                                                        *
*                                                                             *
* Permission is hereby granted, free of charge, to any person obtaining a     *
* copy of this software and associated documentation files (the "Software"),  *
* to deal in the Software without restriction, including without limitation   *
* the rights to use, copy, modify, merge, publish, distribute, sublicense,    *
* and/or sell copies of the Software, and to permit persons to whom the       *
* Software is furnished to do so, subject to the following conditions:        *
*                                                                             *
* The above copyright notice and this permission notice shall be included in  *
* all copies or substantial portions of the Software.                         *
*                                                                             *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR  *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,    *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE *
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER      *
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING     *
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER         *
* DEALINGS IN THE SOFTWARE.                                                   *
*                                                                             *
* This agreement shall be governed in all respects by the laws of the State   *
* of California and by the laws of the United States of America.              *
*                                                                             *
******************************************************************************/
 
        /*
         * This is the software multiply/divide handler for Nios2.
         */
 
        /*
         * Provide a label which can be used to pull this file in.
         */
 
        .section .exceptions.start
        .globl alt_exception_muldiv
alt_exception_muldiv:
 
        /*
         * Pull in the entry/exit code.
         */
        .globl alt_exception
 
 
        .section .exceptions.soft, "xa"
 
 
        /* INSTRUCTION EMULATION
        *  ---------------------
        *
        * Nios II processors generate exceptions for unimplemented instructions.
        * The routines below emulate these instructions.  Depending on the
        * processor core, the only instructions that might need to be emulated
        * are div, divu, mul, muli, mulxss, mulxsu, and mulxuu.
        *
        * The emulations match the instructions, except for the following
        * limitations:
        *
        * 1) The emulation routines do not emulate the use of the exception
        *    temporary register (et) as a source operand because the exception
        *    handler already has modified it.
        *
        * 2) The routines do not emulate the use of the stack pointer (sp) or the
        *    exception return address register (ea) as a destination because
        *    modifying these registers crashes the exception handler or the
        *    interrupted routine.
        *
        * 3) To save code size, the routines do not emulate the use of the
        *    breakpoint registers (ba and bt) as operands.
        *
        * Detailed Design
        * ---------------
        *
        * The emulation routines expect the contents of integer registers r0-r31
        * to be on the stack at addresses sp, 4(sp), 8(sp), ... 124(sp).  The
        * routines retrieve source operands from the stack and modify the
        * destination register's value on the stack prior to the end of the
        * exception handler.  Then all registers except the destination register
        * are restored to their previous values.
        *
        * The instruction that causes the exception is found at address -4(ea).
        * The instruction's OP and OPX fields identify the operation to be
        * performed.
        *
        * One instruction, muli, is an I-type instruction that is identified by
        * an OP field of 0x24.
        *
        * muli   AAAAA,BBBBB,IIIIIIIIIIIIIIII,-0x24-
        *           27    22                6      0    <-- LSB of field
        *
        * The remaining emulated instructions are R-type and have an OP field
        * of 0x3a.  Their OPX fields identify them.
        *
        * R-type AAAAA,BBBBB,CCCCC,XXXXXX,NNNNN,-0x3a-
        *           27    22    17     11     6      0  <-- LSB of field
        *
        *
        */
 
 
        /*
         * Split the instruction into its fields.  We need 4*A, 4*B, and 4*C as
         * offsets to the stack pointer for access to the stored register values.
         */
                             /* r2 = AAAAA,BBBBB,IIIIIIIIIIIIIIII,PPPPPP    */
        roli  r3, r2, 7      /* r3 = BBB,IIIIIIIIIIIIIIII,PPPPPP,AAAAA,BB   */
        roli  r4, r3, 3      /* r4 = IIIIIIIIIIIIIIII,PPPPPP,AAAAA,BBBBB    */
        roli  r6, r4, 2      /* r6 = IIIIIIIIIIIIII,PPPPPP,AAAAA,BBBBB,II   */
        srai  r4, r4, 16     /* r4 = (sign-extended) IMM16                  */
        xori  r6, r6, 0x42   /* r6 = CCC,XXXXXX,NNNNN,PPPPPP,AAAAA,bBBBB,cC */
        roli  r7, r6, 5      /* r7 = XXXX,NNNNN,PPPPPP,AAAAA,bBBBB,cCCCC,XX */
        andi  r5, r2, 0x3f   /* r5 = 00000000000000000000000000,PPPPPP      */
        xori  r3, r3, 0x40
        andi  r3, r3, 0x7c   /* r3 = 0000000000000000000000000,aAAAA,00     */
        andi  r6, r6, 0x7c   /* r6 = 0000000000000000000000000,bBBBB,00     */
        andi  r7, r7, 0x7c   /* r7 = 0000000000000000000000000,cCCCC,00     */
 
        /* Now either
         *  r5 = OP
         *  r3 = 4*(A^16)
         *  r4 = IMM16 (sign extended)
         *  r6 = 4*(B^16)
         *  r7 = 4*(C^16)
         * or
         *  r5 = OP
         */
 
 
        /*
         * Save everything on the stack to make it easy for the emulation routines
         * to retrieve the source register operands.  The exception entry code has
         * already saved some of this so we don't need to do it all again.
         */
 
        addi  sp, sp, -60
        stw   zero, 64(sp)   /* Save zero on stack to avoid special case for r0. */
                             /* Register at and r2-r15 have already been saved.  */
 
        stw   r16,  0(sp)
        stw   r17,  4(sp)
        stw   r18,  8(sp)
        stw   r19, 12(sp)
        stw   r20, 16(sp)
        stw   r21, 20(sp)
        stw   r22, 24(sp)
        stw   r23, 28(sp)
                            /* et @ 32 - Has already been changed.*/
                            /* bt @ 36 - Usually isn't an operand.   */
        stw   gp,  40(sp)
        stw   sp,  44(sp)
        stw   fp,  48(sp)
                            /* ea @ 52 - Don't bother to save - it's already been changed */
                            /* ba @ 56 - Breakpoint register usually isn't an operand */
                            /* ra @ 60 - Has already been saved */
 
 
        /*
         *  Prepare for either multiplication or division loop.
         *  They both loop 32 times.
         */
        movi   r14, 32
 
 
        /*
         * Get the operands.
         *
         * It is necessary to check for muli because it uses an I-type instruction
         * format, while the other instructions are have an R-type format.
         */
        add    r3, r3, sp     /* r3 = address of A-operand. */
        ldw    r3, 0(r3)      /* r3 = A-operand. */
        movi   r15, 0x24      /* muli opcode (I-type instruction format) */
        beq    r5, r15, .Lmul_immed /* muli doesn't use the B register as a source */
 
        add    r6, r6, sp     /* r6 = address of B-operand.               */
        ldw    r6, 0(r6)      /* r6 = B-operand.                          */
                              /* r4 = SSSSSSSSSSSSSSSS,-----IMM16------   */
                              /* IMM16 not needed, align OPX portion      */
                              /* r4 = SSSSSSSSSSSSSSSS,CCCCC,-OPX--,00000 */
        srli   r4, r4, 5      /* r4 = 00000,SSSSSSSSSSSSSSSS,CCCCC,-OPX-- */
        andi   r4, r4, 0x3f   /* r4 = 00000000000000000000000000,-OPX--   */
 
        /* Now
         * r5 = OP
         * r3 = src1
         * r6 = src2
         * r4 = OPX (no longer can be muli)
         * r7 = 4*(C^16)
         * r14 = loop counter
         */
 
        /* ILLEGAL-INSTRUCTION EXCEPTION
         *  -----------------------------
         *
         *  This code is for Nios II cores that generate exceptions when attempting
         *  to execute illegal instructions.  Nios II cores that support an
         *  illegal-instruction exception are identified by the presence of the
         *  macro definition NIOS2_HAS_ILLEGAL_INSTRUCTION_EXCEPTION in system.h .
         *
         *  Remember that illegal instructions are different than unimplemented
         *  instructions.  Illegal instructions are instruction encodings that
         *  have not been defined by the Nios II ISA.  Unimplemented instructions
         *  are legal instructions that must be emulated by some Nios II cores.
         *
         *  If we get here, all instructions except multiplies and divides
         *  are illegal.
         *
         *  This code assumes that OP is not muli (because muli was tested above).
         *  All other multiplies and divides are legal.  Anything else is illegal.
         */
 
        movi  r8, 0x3a                        /* OP for R-type mul* and div* */
        bne   r5, r8, .Lnot_muldiv
 
        /* r15 already is 0x24 */            /* OPX of divu */
        beq   r4, r15, .Ldivide
 
        movi  r15,0x27                        /* OPX of mul */
        beq   r4, r15, .Lmultiply
 
        movi  r15,0x07                        /* OPX of mulxuu */
        beq   r4, r15, .Lmultiply
 
        movi  r15,0x17                        /* OPX of mulxsu */
        beq   r4, r15, .Lmultiply
 
        movi  r15,0x1f                        /* OPX of mulxss */
        beq   r4, r15, .Lmultiply
 
        movi  r15,0x25                        /* OPX of div */
        bne   r4, r15, .Lnot_muldiv
 
 
        /* DIVISION
         *
         * Divide an unsigned dividend by an unsigned divisor using
         * a shift-and-subtract algorithm.  The example below shows
         * 43 div 7 = 6 for 8-bit integers.  This classic algorithm uses a
         * single register to store both the dividend and the quotient,
         * allowing both values to be shifted with a single instruction.
         *
         *                               remainder dividend:quotient
         *                               --------- -----------------
         *   initialize                   00000000     00101011:
         *   shift                        00000000     0101011:_
         *   remainder >= divisor? no     00000000     0101011:0
         *   shift                        00000000     101011:0_
         *   remainder >= divisor? no     00000000     101011:00
         *   shift                        00000001     01011:00_
         *   remainder >= divisor? no     00000001     01011:000
         *   shift                        00000010     1011:000_
         *   remainder >= divisor? no     00000010     1011:0000
         *   shift                        00000101     011:0000_
         *   remainder >= divisor? no     00000101     011:00000
         *   shift                        00001010     11:00000_
         *   remainder >= divisor? yes    00001010     11:000001
         *       remainder -= divisor   - 00000111
         *                              ----------
         *                                00000011     11:000001
         *   shift                        00000111     1:000001_
         *   remainder >= divisor? yes    00000111     1:0000011
         *       remainder -= divisor   - 00000111
         *                              ----------
         *                                00000000     1:0000011
         *   shift                        00000001     :0000011_
         *   remainder >= divisor? no     00000001     :00000110
         *
         * The quotient is 00000110.
         */
 
.Ldivide:
        /*
         *  Prepare for division by assuming the result
         *  is unsigned, and storing its "sign" as 0.
         */
        movi   r17, 0
 
 
        /* Which division opcode? */
        xori   r15, r4, 0x25         /* OPX of div */
        bne    r15, zero, .Lunsigned_division
 
 
        /*
         *  OPX is div.  Determine and store the sign of the quotient.
         *  Then take the absolute value of both operands.
         */
        xor   r17, r3, r6      /* MSB contains sign of quotient */
        bge   r3, zero, 0f
        sub   r3, zero, r3     /* -r3 */
0:
        bge   r6, zero, 0f
        sub   r6, zero, r6     /* -r6 */
0:
 
 
.Lunsigned_division:
        /* Initialize the unsigned-division loop. */
        movi  r13, 0          /* remainder = 0 */
 
        /* Now
        * r3 = dividend : quotient
        * r4 = 0x25 for div, 0x24 for divu
        * r6 = divisor
        * r13 = remainder
        * r14 = loop counter (already initialized to 32)
        * r17 = MSB contains sign of quotient
        */
 
 
        /*
        *   for (count = 32; count > 0; --count)
        *   {
        */
.Ldivide_loop:
 
        /*
        *       Division:
        *
        *       (remainder:dividend:quotient) <<= 1;
        */
        slli  r13, r13, 1
        cmplt r15, r3, zero        /* r15 = MSB of r3 */
        or    r13, r13, r15
        slli  r3, r3, 1
 
 
        /*
        *       if (remainder >= divisor)
        *       {
        *           set LSB of quotient
        *           remainder -= divisor;
        *       }
        */
        bltu  r13, r6, .Ldiv_skip
        ori   r3, r3, 1
        sub   r13, r13, r6
.Ldiv_skip:
 
        /*
        *   }
        */
        subi  r14, r14, 1
        bne   r14, zero, .Ldivide_loop
 
        mov   r9, r3
 
 
        /* Now
        * r9 = quotient
        * r4 = 0x25 for div, 0x24 for divu
        * r7 = 4*(C^16)
        * r17 = MSB contains sign of quotient
        */
 
 
        /*
        *  Conditionally negate signed quotient.  If quotient is unsigned,
        *  the sign already is initialized to 0.
        */
        bge   r17, zero, .Lstore_result
        sub   r9, zero, r9     /* -r9 */
 
        br    .Lstore_result
 
 
 
 
        /* MULTIPLICATION
        *
        * A "product" is the number that one gets by summing a "multiplicand"
        * several times.  The "multiplier" specifies the number of copies of the
        * multiplicand that are summed.
        *
        * Actual multiplication algorithms don't use repeated addition, however.
        * Shift-and-add algorithms get the same answer as repeated addition, and
        * they are faster.  To compute the lower half of a product (pppp below)
        * one shifts the product left before adding in each of the partial products
        * (a * mmmm) through (d * mmmm).
        *
        * To compute the upper half of a product (PPPP below), one adds in the
        * partial products (d * mmmm) through (a * mmmm), each time following the
        * add by a right shift of the product.
        *
        *     mmmm
        *   * abcd
        *   ------
        *     ####  = d * mmmm
        *    ####   = c * mmmm
        *   ####    = b * mmmm
        *  ####     = a * mmmm
        * --------
        * PPPPpppp
        *
        * The example above shows 4 partial products.  Computing actual Nios II
        * products requires 32 partials.
        *
        * It is possible to compute the result of mulxsu from the result of mulxuu
        * because the only difference between the results of these two opcodes is
        * the value of the partial product associated with the sign bit of rA.
        *
        *   mulxsu = mulxuu - ((rA < 0) ? rB : 0);
        *
        * It is possible to compute the result of mulxss from the result of mulxsu
        * because the only difference between the results of these two opcodes is
        * the value of the partial product associated with the sign bit of rB.
        *
        *   mulxss = mulxsu - ((rB < 0) ? rA : 0);
        *
        */
 
.Lmul_immed:
        /* Opcode is muli.  Change it into mul for remainder of algorithm. */
        mov   r7, r6         /* Field B is dest register, not field C. */
        mov   r6, r4         /* Field IMM16 is src2, not field B. */
        movi  r4, 0x27       /* OPX of mul is 0x27 */
 
.Lmultiply:
        /* Initialize the multiplication loop. */
        movi  r9, 0          /* mul_product    = 0 */
        movi  r10, 0         /* mulxuu_product = 0 */
        mov   r11, r6        /* save original multiplier for mulxsu and mulxss */
        mov   r12, r6        /* mulxuu_multiplier (will be shifted) */
        movi  r16, 1         /* used to create "rori B,A,1" from "ror B,A,r16" */
 
        /* Now
        * r3 = multiplicand
        * r6 = mul_multiplier
        * r7 = 4 * dest_register (used later as offset to sp)
        * r9 = mul_product
        * r10 = mulxuu_product
        * r11 = original multiplier
        * r12 = mulxuu_multiplier
        * r14 = loop counter (already initialized)
        * r15 = temp
        * r16 = 1
        */
 
 
        /*
        *   for (count = 32; count > 0; --count)
        *   {
        */
.Lmultiply_loop:
 
        /*
        *       mul_product <<= 1;
        *       lsb = multiplier & 1;
        */
        slli   r9, r9, 1
        andi   r15, r12, 1
 
        /*
        *       if (lsb == 1)
        *       {
        *           mulxuu_product += multiplicand;
        *       }
        */
        beq   r15, zero, .Lmulx_skip
        add   r10, r10, r3
        cmpltu r15, r10, r3  /* Save the carry from the MSB of mulxuu_product. */
        ror   r15, r15, r16  /* r15 = 0x80000000 on carry, or else 0x00000000 */
.Lmulx_skip:
 
        /*
        *       if (MSB of mul_multiplier == 1)
        *       {
        *           mul_product += multiplicand;
        *       }
        */
        bge   r6, zero, .Lmul_skip
        add   r9, r9, r3
.Lmul_skip:
 
        /*
        *       mulxuu_product >>= 1;           logical shift
        *       mul_multiplier <<= 1;           done with MSB
        *       mulx_multiplier >>= 1;          done with LSB
        */
        srli   r10, r10, 1
        or     r10, r10, r15           /* OR in the saved carry bit. */
        slli   r6, r6, 1
        srli   r12, r12, 1
 
 
        /*
        *   }
        */
        subi   r14, r14, 1
        bne    r14, zero, .Lmultiply_loop
 
 
        /*
        *  Multiply emulation loop done.
        */
 
        /* Now
        * r3 = multiplicand
        * r4 = OPX
        * r7 = 4 * dest_register (used later as offset to sp)
        * r9 = mul_product
        * r10 = mulxuu_product
        * r11 = original multiplier
        * r15 = temp
        */
 
 
        /*
        *  Select/compute the result based on OPX.
        */
 
 
        /* OPX == mul?  Then store. */
        xori  r15, r4, 0x27
        beq   r15, zero, .Lstore_result
 
        /* It's one of the mulx.. opcodes.  Move over the result. */
        mov   r9, r10
 
        /* OPX == mulxuu?  Then store. */
        xori  r15, r4, 0x07
        beq   r15, zero, .Lstore_result
 
        /* Compute mulxsu
         *
         * mulxsu = mulxuu - ((rA < 0) ? rB : 0);
         */
        bge   r3, zero, .Lmulxsu_skip
        sub   r9, r9, r11
.Lmulxsu_skip:
 
        /* OPX == mulxsu?  Then store. */
        xori  r15, r4, 0x17
        beq   r15, zero, .Lstore_result
 
        /* Compute mulxss
         *
         * mulxss = mulxsu - ((rB < 0) ? rA : 0);
         */
        bge   r11, zero, .Lmulxss_skip
        sub   r9, r9, r3
.Lmulxss_skip:
        /* At this point, assume that OPX is mulxss, so store */
 
 
.Lstore_result:
        add   r7, r7, sp
        stw   r9, 0(r7)
 
        ldw   r16,  0(sp)
        ldw   r17,  4(sp)
        ldw   r18,  8(sp)
        ldw   r19, 12(sp)
        ldw   r20, 16(sp)
        ldw   r21, 20(sp)
        ldw   r22, 24(sp)
        ldw   r23, 28(sp)
 
                            /* bt @ 32 - Breakpoint register usually isn't an operand. */
                            /* et @ 36 - Don't corrupt et. */
                            /* gp @ 40 - Don't corrupt gp. */
                            /* sp @ 44 - Don't corrupt sp. */
        ldw   fp,  48(sp)
                            /* ea @ 52 - Don't corrupt ea. */
                            /* ba @ 56 - Breakpoint register usually isn't an operand. */
 
        addi  sp, sp, 60
 
        br    .Lexception_exit
 
 
.Lnot_muldiv:
 
        addi  sp, sp, 60
 
 
        .section .exceptions.exit.label
.Lexception_exit:
 

Browse

Tools

Subversion Repositories ao486

[/] [ao486/] [trunk/] [syn/] [components/] [sd_card/] [firmware/] [bsp/] [HAL/] [src/] [alt_exception_muldiv.S] - Blame information for rev 8

Line No.	Rev	Author	Line
1	8	alfik	`/******************************************************************************`
2			`* *`
3			`* License Agreement *`
4			`* *`
5			`* Copyright (c) 2003-2005 Altera Corporation, San Jose, California, USA. *`
6			`* All rights reserved. *`
7			`* *`
8			`* Permission is hereby granted, free of charge, to any person obtaining a *`
9			`* copy of this software and associated documentation files (the "Software"), *`
10			`* to deal in the Software without restriction, including without limitation *`
11			`* the rights to use, copy, modify, merge, publish, distribute, sublicense, *`
12			`* and/or sell copies of the Software, and to permit persons to whom the *`
13			`* Software is furnished to do so, subject to the following conditions: *`
14			`* *`
15			`* The above copyright notice and this permission notice shall be included in *`
16			`* all copies or substantial portions of the Software. *`
17			`* *`
18			`* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *`
19			`* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *`
20			`* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE *`
21			`* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER *`
22			`* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING *`
23			`* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER *`
24			`* DEALINGS IN THE SOFTWARE. *`
25			`* *`
26			`* This agreement shall be governed in all respects by the laws of the State *`
27			`* of California and by the laws of the United States of America. *`
28			`* *`
29			`******************************************************************************/`
30
31			`/*`
32			`* This is the software multiply/divide handler for Nios2.`
33			`*/`
34
35			`/*`
36			`* Provide a label which can be used to pull this file in.`
37			`*/`
38
39			`.section .exceptions.start`
40			`.globl alt_exception_muldiv`
41			`alt_exception_muldiv:`
42
43			`/*`
44			`* Pull in the entry/exit code.`
45			`*/`
46			`.globl alt_exception`
47
48
49			`.section .exceptions.soft, "xa"`
50
51
52			`/* INSTRUCTION EMULATION`
53			`* ---------------------`
54			`*`
55			`* Nios II processors generate exceptions for unimplemented instructions.`
56			`* The routines below emulate these instructions. Depending on the`
57			`* processor core, the only instructions that might need to be emulated`
58			`* are div, divu, mul, muli, mulxss, mulxsu, and mulxuu.`
59			`*`
60			`* The emulations match the instructions, except for the following`
61			`* limitations:`
62			`*`
63			`* 1) The emulation routines do not emulate the use of the exception`
64			`* temporary register (et) as a source operand because the exception`
65			`* handler already has modified it.`
66			`*`
67			`* 2) The routines do not emulate the use of the stack pointer (sp) or the`
68			`* exception return address register (ea) as a destination because`
69			`* modifying these registers crashes the exception handler or the`
70			`* interrupted routine.`
71			`*`
72			`* 3) To save code size, the routines do not emulate the use of the`
73			`* breakpoint registers (ba and bt) as operands.`
74			`*`
75			`* Detailed Design`
76			`* ---------------`
77			`*`
78			`* The emulation routines expect the contents of integer registers r0-r31`
79			`* to be on the stack at addresses sp, 4(sp), 8(sp), ... 124(sp). The`
80			`* routines retrieve source operands from the stack and modify the`
81			`* destination register's value on the stack prior to the end of the`
82			`* exception handler. Then all registers except the destination register`
83			`* are restored to their previous values.`
84			`*`
85			`* The instruction that causes the exception is found at address -4(ea).`
86			`* The instruction's OP and OPX fields identify the operation to be`
87			`* performed.`
88			`*`
89			`* One instruction, muli, is an I-type instruction that is identified by`
90			`* an OP field of 0x24.`
91			`*`
92			`* muli AAAAA,BBBBB,IIIIIIIIIIIIIIII,-0x24-`
93			`* 27 22 6 0 <-- LSB of field`
94			`*`
95			`* The remaining emulated instructions are R-type and have an OP field`
96			`* of 0x3a. Their OPX fields identify them.`
97			`*`
98			`* R-type AAAAA,BBBBB,CCCCC,XXXXXX,NNNNN,-0x3a-`
99			`* 27 22 17 11 6 0 <-- LSB of field`
100			`*`
101			`*`
102			`*/`
103
104
105			`/*`
106			`* Split the instruction into its fields. We need 4A, 4B, and 4*C as`
107			`* offsets to the stack pointer for access to the stored register values.`
108			`*/`
109			`/* r2 = AAAAA,BBBBB,IIIIIIIIIIIIIIII,PPPPPP */`
110			`roli r3, r2, 7 /* r3 = BBB,IIIIIIIIIIIIIIII,PPPPPP,AAAAA,BB */`
111			`roli r4, r3, 3 /* r4 = IIIIIIIIIIIIIIII,PPPPPP,AAAAA,BBBBB */`
112			`roli r6, r4, 2 /* r6 = IIIIIIIIIIIIII,PPPPPP,AAAAA,BBBBB,II */`
113			`srai r4, r4, 16 /* r4 = (sign-extended) IMM16 */`
114			`xori r6, r6, 0x42 /* r6 = CCC,XXXXXX,NNNNN,PPPPPP,AAAAA,bBBBB,cC */`
115			`roli r7, r6, 5 /* r7 = XXXX,NNNNN,PPPPPP,AAAAA,bBBBB,cCCCC,XX */`
116			`andi r5, r2, 0x3f /* r5 = 00000000000000000000000000,PPPPPP */`
117			`xori r3, r3, 0x40`
118			`andi r3, r3, 0x7c /* r3 = 0000000000000000000000000,aAAAA,00 */`
119			`andi r6, r6, 0x7c /* r6 = 0000000000000000000000000,bBBBB,00 */`
120			`andi r7, r7, 0x7c /* r7 = 0000000000000000000000000,cCCCC,00 */`
121
122			`/* Now either`
123			`* r5 = OP`
124			`* r3 = 4*(A^16)`
125			`* r4 = IMM16 (sign extended)`
126			`* r6 = 4*(B^16)`
127			`* r7 = 4*(C^16)`
128			`* or`
129			`* r5 = OP`
130			`*/`
131
132
133			`/*`
134			`* Save everything on the stack to make it easy for the emulation routines`
135			`* to retrieve the source register operands. The exception entry code has`
136			`* already saved some of this so we don't need to do it all again.`
137			`*/`
138
139			`addi sp, sp, -60`
140			`stw zero, 64(sp) /* Save zero on stack to avoid special case for r0. */`
141			`/* Register at and r2-r15 have already been saved. */`
142
143			`stw r16, 0(sp)`
144			`stw r17, 4(sp)`
145			`stw r18, 8(sp)`
146			`stw r19, 12(sp)`
147			`stw r20, 16(sp)`
148			`stw r21, 20(sp)`
149			`stw r22, 24(sp)`
150			`stw r23, 28(sp)`
151			`/* et @ 32 - Has already been changed.*/`
152			`/* bt @ 36 - Usually isn't an operand. */`
153			`stw gp, 40(sp)`
154			`stw sp, 44(sp)`
155			`stw fp, 48(sp)`
156			`/* ea @ 52 - Don't bother to save - it's already been changed */`
157			`/* ba @ 56 - Breakpoint register usually isn't an operand */`
158			`/* ra @ 60 - Has already been saved */`
159
160
161			`/*`
162			`* Prepare for either multiplication or division loop.`
163			`* They both loop 32 times.`
164			`*/`
165			`movi r14, 32`
166
167
168			`/*`
169			`* Get the operands.`
170			`*`
171			`* It is necessary to check for muli because it uses an I-type instruction`
172			`* format, while the other instructions are have an R-type format.`
173			`*/`
174			`add r3, r3, sp /* r3 = address of A-operand. */`
175			`ldw r3, 0(r3) /* r3 = A-operand. */`
176			`movi r15, 0x24 /* muli opcode (I-type instruction format) */`
177			`beq r5, r15, .Lmul_immed /* muli doesn't use the B register as a source */`
178
179			`add r6, r6, sp /* r6 = address of B-operand. */`
180			`ldw r6, 0(r6) /* r6 = B-operand. */`
181			`/* r4 = SSSSSSSSSSSSSSSS,-----IMM16------ */`
182			`/* IMM16 not needed, align OPX portion */`
183			`/* r4 = SSSSSSSSSSSSSSSS,CCCCC,-OPX--,00000 */`
184			`srli r4, r4, 5 /* r4 = 00000,SSSSSSSSSSSSSSSS,CCCCC,-OPX-- */`
185			`andi r4, r4, 0x3f /* r4 = 00000000000000000000000000,-OPX-- */`
186
187			`/* Now`
188			`* r5 = OP`
189			`* r3 = src1`
190			`* r6 = src2`
191			`* r4 = OPX (no longer can be muli)`
192			`* r7 = 4*(C^16)`
193			`* r14 = loop counter`
194			`*/`
195
196			`/* ILLEGAL-INSTRUCTION EXCEPTION`
197			`* -----------------------------`
198			`*`
199			`* This code is for Nios II cores that generate exceptions when attempting`
200			`* to execute illegal instructions. Nios II cores that support an`
201			`* illegal-instruction exception are identified by the presence of the`
202			`* macro definition NIOS2_HAS_ILLEGAL_INSTRUCTION_EXCEPTION in system.h .`
203			`*`
204			`* Remember that illegal instructions are different than unimplemented`
205			`* instructions. Illegal instructions are instruction encodings that`
206			`* have not been defined by the Nios II ISA. Unimplemented instructions`
207			`* are legal instructions that must be emulated by some Nios II cores.`
208			`*`
209			`* If we get here, all instructions except multiplies and divides`
210			`* are illegal.`
211			`*`
212			`* This code assumes that OP is not muli (because muli was tested above).`
213			`* All other multiplies and divides are legal. Anything else is illegal.`
214			`*/`
215
216			`movi r8, 0x3a /* OP for R-type mul* and div* */`
217			`bne r5, r8, .Lnot_muldiv`
218
219			`/* r15 already is 0x24 / / OPX of divu */`
220			`beq r4, r15, .Ldivide`
221
222			`movi r15,0x27 /* OPX of mul */`
223			`beq r4, r15, .Lmultiply`
224
225			`movi r15,0x07 /* OPX of mulxuu */`
226			`beq r4, r15, .Lmultiply`
227
228			`movi r15,0x17 /* OPX of mulxsu */`
229			`beq r4, r15, .Lmultiply`
230
231			`movi r15,0x1f /* OPX of mulxss */`
232			`beq r4, r15, .Lmultiply`
233
234			`movi r15,0x25 /* OPX of div */`
235			`bne r4, r15, .Lnot_muldiv`
236
237
238			`/* DIVISION`
239			`*`
240			`* Divide an unsigned dividend by an unsigned divisor using`
241			`* a shift-and-subtract algorithm. The example below shows`
242			`* 43 div 7 = 6 for 8-bit integers. This classic algorithm uses a`
243			`* single register to store both the dividend and the quotient,`
244			`* allowing both values to be shifted with a single instruction.`
245			`*`
246			`* remainder dividend:quotient`
247			`* --------- -----------------`
248			`* initialize 00000000 00101011:`
249			`* shift 00000000 0101011:_`
250			`* remainder >= divisor? no 00000000 0101011:0`
251			`* shift 00000000 101011:0_`
252			`* remainder >= divisor? no 00000000 101011:00`
253			`* shift 00000001 01011:00_`
254			`* remainder >= divisor? no 00000001 01011:000`
255			`* shift 00000010 1011:000_`
256			`* remainder >= divisor? no 00000010 1011:0000`
257			`* shift 00000101 011:0000_`
258			`* remainder >= divisor? no 00000101 011:00000`
259			`* shift 00001010 11:00000_`
260			`* remainder >= divisor? yes 00001010 11:000001`
261			`* remainder -= divisor - 00000111`
262			`* ----------`
263			`* 00000011 11:000001`
264			`* shift 00000111 1:000001_`
265			`* remainder >= divisor? yes 00000111 1:0000011`
266			`* remainder -= divisor - 00000111`
267			`* ----------`
268			`* 00000000 1:0000011`
269			`* shift 00000001 :0000011_`
270			`* remainder >= divisor? no 00000001 :00000110`
271			`*`
272			`* The quotient is 00000110.`
273			`*/`
274
275			`.Ldivide:`
276			`/*`
277			`* Prepare for division by assuming the result`
278			`* is unsigned, and storing its "sign" as 0.`
279			`*/`
280			`movi r17, 0`
281
282
283			`/* Which division opcode? */`
284			`xori r15, r4, 0x25 /* OPX of div */`
285			`bne r15, zero, .Lunsigned_division`
286
287
288			`/*`
289			`* OPX is div. Determine and store the sign of the quotient.`
290			`* Then take the absolute value of both operands.`
291			`*/`
292			`xor r17, r3, r6 /* MSB contains sign of quotient */`
293			`bge r3, zero, 0f`
294			`sub r3, zero, r3 /* -r3 */`
295			`0:`
296			`bge r6, zero, 0f`
297			`sub r6, zero, r6 /* -r6 */`
298			`0:`
299
300
301			`.Lunsigned_division:`
302			`/* Initialize the unsigned-division loop. */`
303			`movi r13, 0 /* remainder = 0 */`
304
305			`/* Now`
306			`* r3 = dividend : quotient`
307			`* r4 = 0x25 for div, 0x24 for divu`
308			`* r6 = divisor`
309			`* r13 = remainder`
310			`* r14 = loop counter (already initialized to 32)`
311			`* r17 = MSB contains sign of quotient`
312			`*/`
313
314
315			`/*`
316			`* for (count = 32; count > 0; --count)`
317			`* {`
318			`*/`
319			`.Ldivide_loop:`
320
321			`/*`
322			`* Division:`
323			`*`
324			`* (remainder:dividend:quotient) <<= 1;`
325			`*/`
326			`slli r13, r13, 1`
327			`cmplt r15, r3, zero /* r15 = MSB of r3 */`
328			`or r13, r13, r15`
329			`slli r3, r3, 1`
330
331
332			`/*`
333			`* if (remainder >= divisor)`
334			`* {`
335			`* set LSB of quotient`
336			`* remainder -= divisor;`
337			`* }`
338			`*/`
339			`bltu r13, r6, .Ldiv_skip`
340			`ori r3, r3, 1`
341			`sub r13, r13, r6`
342			`.Ldiv_skip:`
343
344			`/*`
345			`* }`
346			`*/`
347			`subi r14, r14, 1`
348			`bne r14, zero, .Ldivide_loop`
349
350			`mov r9, r3`
351
352
353			`/* Now`
354			`* r9 = quotient`
355			`* r4 = 0x25 for div, 0x24 for divu`
356			`* r7 = 4*(C^16)`
357			`* r17 = MSB contains sign of quotient`
358			`*/`
359
360
361			`/*`
362			`* Conditionally negate signed quotient. If quotient is unsigned,`
363			`* the sign already is initialized to 0.`
364			`*/`
365			`bge r17, zero, .Lstore_result`
366			`sub r9, zero, r9 /* -r9 */`
367
368			`br .Lstore_result`
369
370
371
372
373			`/* MULTIPLICATION`
374			`*`
375			`* A "product" is the number that one gets by summing a "multiplicand"`
376			`* several times. The "multiplier" specifies the number of copies of the`
377			`* multiplicand that are summed.`
378			`*`
379			`* Actual multiplication algorithms don't use repeated addition, however.`
380			`* Shift-and-add algorithms get the same answer as repeated addition, and`
381			`* they are faster. To compute the lower half of a product (pppp below)`
382			`* one shifts the product left before adding in each of the partial products`
383			`* (a * mmmm) through (d * mmmm).`
384			`*`
385			`* To compute the upper half of a product (PPPP below), one adds in the`
386			`* partial products (d * mmmm) through (a * mmmm), each time following the`
387			`* add by a right shift of the product.`
388			`*`
389			`* mmmm`
390			`* * abcd`
391			`* ------`
392			`* #### = d * mmmm`
393			`* #### = c * mmmm`
394			`* #### = b * mmmm`
395			`* #### = a * mmmm`
396			`* --------`
397			`* PPPPpppp`
398			`*`
399			`* The example above shows 4 partial products. Computing actual Nios II`
400			`* products requires 32 partials.`
401			`*`
402			`* It is possible to compute the result of mulxsu from the result of mulxuu`
403			`* because the only difference between the results of these two opcodes is`
404			`* the value of the partial product associated with the sign bit of rA.`
405			`*`
406			`* mulxsu = mulxuu - ((rA < 0) ? rB : 0);`
407			`*`
408			`* It is possible to compute the result of mulxss from the result of mulxsu`
409			`* because the only difference between the results of these two opcodes is`
410			`* the value of the partial product associated with the sign bit of rB.`
411			`*`
412			`* mulxss = mulxsu - ((rB < 0) ? rA : 0);`
413			`*`
414			`*/`
415
416			`.Lmul_immed:`
417			`/* Opcode is muli. Change it into mul for remainder of algorithm. */`
418			`mov r7, r6 /* Field B is dest register, not field C. */`
419			`mov r6, r4 /* Field IMM16 is src2, not field B. */`
420			`movi r4, 0x27 /* OPX of mul is 0x27 */`
421
422			`.Lmultiply:`
423			`/* Initialize the multiplication loop. */`
424			`movi r9, 0 /* mul_product = 0 */`
425			`movi r10, 0 /* mulxuu_product = 0 */`
426			`mov r11, r6 /* save original multiplier for mulxsu and mulxss */`
427			`mov r12, r6 /* mulxuu_multiplier (will be shifted) */`
428			`movi r16, 1 /* used to create "rori B,A,1" from "ror B,A,r16" */`
429
430			`/* Now`
431			`* r3 = multiplicand`
432			`* r6 = mul_multiplier`
433			`* r7 = 4 * dest_register (used later as offset to sp)`
434			`* r9 = mul_product`
435			`* r10 = mulxuu_product`
436			`* r11 = original multiplier`
437			`* r12 = mulxuu_multiplier`
438			`* r14 = loop counter (already initialized)`
439			`* r15 = temp`
440			`* r16 = 1`
441			`*/`
442
443
444			`/*`
445			`* for (count = 32; count > 0; --count)`
446			`* {`
447			`*/`
448			`.Lmultiply_loop:`
449
450			`/*`
451			`* mul_product <<= 1;`
452			`* lsb = multiplier & 1;`
453			`*/`
454			`slli r9, r9, 1`
455			`andi r15, r12, 1`
456
457			`/*`
458			`* if (lsb == 1)`
459			`* {`
460			`* mulxuu_product += multiplicand;`
461			`* }`
462			`*/`
463			`beq r15, zero, .Lmulx_skip`
464			`add r10, r10, r3`
465			`cmpltu r15, r10, r3 /* Save the carry from the MSB of mulxuu_product. */`
466			`ror r15, r15, r16 /* r15 = 0x80000000 on carry, or else 0x00000000 */`
467			`.Lmulx_skip:`
468
469			`/*`
470			`* if (MSB of mul_multiplier == 1)`
471			`* {`
472			`* mul_product += multiplicand;`
473			`* }`
474			`*/`
475			`bge r6, zero, .Lmul_skip`
476			`add r9, r9, r3`
477			`.Lmul_skip:`
478
479			`/*`
480			`* mulxuu_product >>= 1; logical shift`
481			`* mul_multiplier <<= 1; done with MSB`
482			`* mulx_multiplier >>= 1; done with LSB`
483			`*/`
484			`srli r10, r10, 1`
485			`or r10, r10, r15 /* OR in the saved carry bit. */`
486			`slli r6, r6, 1`
487			`srli r12, r12, 1`
488
489
490			`/*`
491			`* }`
492			`*/`
493			`subi r14, r14, 1`
494			`bne r14, zero, .Lmultiply_loop`
495
496
497			`/*`
498			`* Multiply emulation loop done.`
499			`*/`
500
501			`/* Now`
502			`* r3 = multiplicand`
503			`* r4 = OPX`
504			`* r7 = 4 * dest_register (used later as offset to sp)`
505			`* r9 = mul_product`
506			`* r10 = mulxuu_product`
507			`* r11 = original multiplier`
508			`* r15 = temp`
509			`*/`
510
511
512			`/*`
513			`* Select/compute the result based on OPX.`
514			`*/`
515
516
517			`/* OPX == mul? Then store. */`
518			`xori r15, r4, 0x27`
519			`beq r15, zero, .Lstore_result`
520
521			`/* It's one of the mulx.. opcodes. Move over the result. */`
522			`mov r9, r10`
523
524			`/* OPX == mulxuu? Then store. */`
525			`xori r15, r4, 0x07`
526			`beq r15, zero, .Lstore_result`
527
528			`/* Compute mulxsu`
529			`*`
530			`* mulxsu = mulxuu - ((rA < 0) ? rB : 0);`
531			`*/`
532			`bge r3, zero, .Lmulxsu_skip`
533			`sub r9, r9, r11`
534			`.Lmulxsu_skip:`
535
536			`/* OPX == mulxsu? Then store. */`
537			`xori r15, r4, 0x17`
538			`beq r15, zero, .Lstore_result`
539
540			`/* Compute mulxss`
541			`*`
542			`* mulxss = mulxsu - ((rB < 0) ? rA : 0);`
543			`*/`
544			`bge r11, zero, .Lmulxss_skip`
545			`sub r9, r9, r3`
546			`.Lmulxss_skip:`
547			`/* At this point, assume that OPX is mulxss, so store */`
548
549
550			`.Lstore_result:`
551			`add r7, r7, sp`
552			`stw r9, 0(r7)`
553
554			`ldw r16, 0(sp)`
555			`ldw r17, 4(sp)`
556			`ldw r18, 8(sp)`
557			`ldw r19, 12(sp)`
558			`ldw r20, 16(sp)`
559			`ldw r21, 20(sp)`
560			`ldw r22, 24(sp)`
561			`ldw r23, 28(sp)`
562
563			`/* bt @ 32 - Breakpoint register usually isn't an operand. */`
564			`/* et @ 36 - Don't corrupt et. */`
565			`/* gp @ 40 - Don't corrupt gp. */`
566			`/* sp @ 44 - Don't corrupt sp. */`
567			`ldw fp, 48(sp)`
568			`/* ea @ 52 - Don't corrupt ea. */`
569			`/* ba @ 56 - Breakpoint register usually isn't an operand. */`
570
571			`addi sp, sp, 60`
572
573			`br .Lexception_exit`
574
575
576			`.Lnot_muldiv:`
577
578			`addi sp, sp, 60`
579
580
581			`.section .exceptions.exit.label`
582			`.Lexception_exit:`
583