URL
https://opencores.org/ocsvn/ao486/ao486/trunk
Subversion Repositories ao486
[/] [ao486/] [trunk/] [syn/] [components/] [ps2/] [software/] [exe_bsp/] [HAL/] [src/] [alt_exception_muldiv.S] - Rev 2
Compare with Previous | Blame | View Log
/******************************************************************************* ** License Agreement ** ** Copyright (c) 2003-2005 Altera Corporation, San Jose, California, USA. ** All rights reserved. ** ** Permission is hereby granted, free of charge, to any person obtaining a ** copy of this software and associated documentation files (the "Software"), ** to deal in the Software without restriction, including without limitation ** the rights to use, copy, modify, merge, publish, distribute, sublicense, ** and/or sell copies of the Software, and to permit persons to whom the ** Software is furnished to do so, subject to the following conditions: ** ** The above copyright notice and this permission notice shall be included in ** all copies or substantial portions of the Software. ** ** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ** FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ** DEALINGS IN THE SOFTWARE. ** ** This agreement shall be governed in all respects by the laws of the State ** of California and by the laws of the United States of America. ** *******************************************************************************//** This is the software multiply/divide handler for Nios2.*//** Provide a label which can be used to pull this file in.*/.section .exceptions.start.globl alt_exception_muldivalt_exception_muldiv:/** Pull in the entry/exit code.*/.globl alt_exception.section .exceptions.soft, "xa"/* INSTRUCTION EMULATION* ---------------------** Nios II processors generate exceptions for unimplemented instructions.* The routines below emulate these instructions. Depending on the* processor core, the only instructions that might need to be emulated* are div, divu, mul, muli, mulxss, mulxsu, and mulxuu.** The emulations match the instructions, except for the following* limitations:** 1) The emulation routines do not emulate the use of the exception* temporary register (et) as a source operand because the exception* handler already has modified it.** 2) The routines do not emulate the use of the stack pointer (sp) or the* exception return address register (ea) as a destination because* modifying these registers crashes the exception handler or the* interrupted routine.** 3) To save code size, the routines do not emulate the use of the* breakpoint registers (ba and bt) as operands.** Detailed Design* ---------------** The emulation routines expect the contents of integer registers r0-r31* to be on the stack at addresses sp, 4(sp), 8(sp), ... 124(sp). The* routines retrieve source operands from the stack and modify the* destination register's value on the stack prior to the end of the* exception handler. Then all registers except the destination register* are restored to their previous values.** The instruction that causes the exception is found at address -4(ea).* The instruction's OP and OPX fields identify the operation to be* performed.** One instruction, muli, is an I-type instruction that is identified by* an OP field of 0x24.** muli AAAAA,BBBBB,IIIIIIIIIIIIIIII,-0x24-* 27 22 6 0 <-- LSB of field** The remaining emulated instructions are R-type and have an OP field* of 0x3a. Their OPX fields identify them.** R-type AAAAA,BBBBB,CCCCC,XXXXXX,NNNNN,-0x3a-* 27 22 17 11 6 0 <-- LSB of field***//** Split the instruction into its fields. We need 4*A, 4*B, and 4*C as* offsets to the stack pointer for access to the stored register values.*//* r2 = AAAAA,BBBBB,IIIIIIIIIIIIIIII,PPPPPP */roli r3, r2, 7 /* r3 = BBB,IIIIIIIIIIIIIIII,PPPPPP,AAAAA,BB */roli r4, r3, 3 /* r4 = IIIIIIIIIIIIIIII,PPPPPP,AAAAA,BBBBB */roli r6, r4, 2 /* r6 = IIIIIIIIIIIIII,PPPPPP,AAAAA,BBBBB,II */srai r4, r4, 16 /* r4 = (sign-extended) IMM16 */xori r6, r6, 0x42 /* r6 = CCC,XXXXXX,NNNNN,PPPPPP,AAAAA,bBBBB,cC */roli r7, r6, 5 /* r7 = XXXX,NNNNN,PPPPPP,AAAAA,bBBBB,cCCCC,XX */andi r5, r2, 0x3f /* r5 = 00000000000000000000000000,PPPPPP */xori r3, r3, 0x40andi r3, r3, 0x7c /* r3 = 0000000000000000000000000,aAAAA,00 */andi r6, r6, 0x7c /* r6 = 0000000000000000000000000,bBBBB,00 */andi r7, r7, 0x7c /* r7 = 0000000000000000000000000,cCCCC,00 *//* Now either* r5 = OP* r3 = 4*(A^16)* r4 = IMM16 (sign extended)* r6 = 4*(B^16)* r7 = 4*(C^16)* or* r5 = OP*//** Save everything on the stack to make it easy for the emulation routines* to retrieve the source register operands. The exception entry code has* already saved some of this so we don't need to do it all again.*/addi sp, sp, -60stw zero, 64(sp) /* Save zero on stack to avoid special case for r0. *//* Register at and r2-r15 have already been saved. */stw r16, 0(sp)stw r17, 4(sp)stw r18, 8(sp)stw r19, 12(sp)stw r20, 16(sp)stw r21, 20(sp)stw r22, 24(sp)stw r23, 28(sp)/* et @ 32 - Has already been changed.*//* bt @ 36 - Usually isn't an operand. */stw gp, 40(sp)stw sp, 44(sp)stw fp, 48(sp)/* ea @ 52 - Don't bother to save - it's already been changed *//* ba @ 56 - Breakpoint register usually isn't an operand *//* ra @ 60 - Has already been saved *//** Prepare for either multiplication or division loop.* They both loop 32 times.*/movi r14, 32/** Get the operands.** It is necessary to check for muli because it uses an I-type instruction* format, while the other instructions are have an R-type format.*/add r3, r3, sp /* r3 = address of A-operand. */ldw r3, 0(r3) /* r3 = A-operand. */movi r15, 0x24 /* muli opcode (I-type instruction format) */beq r5, r15, .Lmul_immed /* muli doesn't use the B register as a source */add r6, r6, sp /* r6 = address of B-operand. */ldw r6, 0(r6) /* r6 = B-operand. *//* r4 = SSSSSSSSSSSSSSSS,-----IMM16------ *//* IMM16 not needed, align OPX portion *//* r4 = SSSSSSSSSSSSSSSS,CCCCC,-OPX--,00000 */srli r4, r4, 5 /* r4 = 00000,SSSSSSSSSSSSSSSS,CCCCC,-OPX-- */andi r4, r4, 0x3f /* r4 = 00000000000000000000000000,-OPX-- *//* Now* r5 = OP* r3 = src1* r6 = src2* r4 = OPX (no longer can be muli)* r7 = 4*(C^16)* r14 = loop counter*//* ILLEGAL-INSTRUCTION EXCEPTION* -----------------------------** This code is for Nios II cores that generate exceptions when attempting* to execute illegal instructions. Nios II cores that support an* illegal-instruction exception are identified by the presence of the* macro definition NIOS2_HAS_ILLEGAL_INSTRUCTION_EXCEPTION in system.h .** Remember that illegal instructions are different than unimplemented* instructions. Illegal instructions are instruction encodings that* have not been defined by the Nios II ISA. Unimplemented instructions* are legal instructions that must be emulated by some Nios II cores.** If we get here, all instructions except multiplies and divides* are illegal.** This code assumes that OP is not muli (because muli was tested above).* All other multiplies and divides are legal. Anything else is illegal.*/movi r8, 0x3a /* OP for R-type mul* and div* */bne r5, r8, .Lnot_muldiv/* r15 already is 0x24 */ /* OPX of divu */beq r4, r15, .Ldividemovi r15,0x27 /* OPX of mul */beq r4, r15, .Lmultiplymovi r15,0x07 /* OPX of mulxuu */beq r4, r15, .Lmultiplymovi r15,0x17 /* OPX of mulxsu */beq r4, r15, .Lmultiplymovi r15,0x1f /* OPX of mulxss */beq r4, r15, .Lmultiplymovi r15,0x25 /* OPX of div */bne r4, r15, .Lnot_muldiv/* DIVISION** Divide an unsigned dividend by an unsigned divisor using* a shift-and-subtract algorithm. The example below shows* 43 div 7 = 6 for 8-bit integers. This classic algorithm uses a* single register to store both the dividend and the quotient,* allowing both values to be shifted with a single instruction.** remainder dividend:quotient* --------- -----------------* initialize 00000000 00101011:* shift 00000000 0101011:_* remainder >= divisor? no 00000000 0101011:0* shift 00000000 101011:0_* remainder >= divisor? no 00000000 101011:00* shift 00000001 01011:00_* remainder >= divisor? no 00000001 01011:000* shift 00000010 1011:000_* remainder >= divisor? no 00000010 1011:0000* shift 00000101 011:0000_* remainder >= divisor? no 00000101 011:00000* shift 00001010 11:00000_* remainder >= divisor? yes 00001010 11:000001* remainder -= divisor - 00000111* ----------* 00000011 11:000001* shift 00000111 1:000001_* remainder >= divisor? yes 00000111 1:0000011* remainder -= divisor - 00000111* ----------* 00000000 1:0000011* shift 00000001 :0000011_* remainder >= divisor? no 00000001 :00000110** The quotient is 00000110.*/.Ldivide:/** Prepare for division by assuming the result* is unsigned, and storing its "sign" as 0.*/movi r17, 0/* Which division opcode? */xori r15, r4, 0x25 /* OPX of div */bne r15, zero, .Lunsigned_division/** OPX is div. Determine and store the sign of the quotient.* Then take the absolute value of both operands.*/xor r17, r3, r6 /* MSB contains sign of quotient */bge r3, zero, 0fsub r3, zero, r3 /* -r3 */0:bge r6, zero, 0fsub r6, zero, r6 /* -r6 */0:.Lunsigned_division:/* Initialize the unsigned-division loop. */movi r13, 0 /* remainder = 0 *//* Now* r3 = dividend : quotient* r4 = 0x25 for div, 0x24 for divu* r6 = divisor* r13 = remainder* r14 = loop counter (already initialized to 32)* r17 = MSB contains sign of quotient*//** for (count = 32; count > 0; --count)* {*/.Ldivide_loop:/** Division:** (remainder:dividend:quotient) <<= 1;*/slli r13, r13, 1cmplt r15, r3, zero /* r15 = MSB of r3 */or r13, r13, r15slli r3, r3, 1/** if (remainder >= divisor)* {* set LSB of quotient* remainder -= divisor;* }*/bltu r13, r6, .Ldiv_skipori r3, r3, 1sub r13, r13, r6.Ldiv_skip:/** }*/subi r14, r14, 1bne r14, zero, .Ldivide_loopmov r9, r3/* Now* r9 = quotient* r4 = 0x25 for div, 0x24 for divu* r7 = 4*(C^16)* r17 = MSB contains sign of quotient*//** Conditionally negate signed quotient. If quotient is unsigned,* the sign already is initialized to 0.*/bge r17, zero, .Lstore_resultsub r9, zero, r9 /* -r9 */br .Lstore_result/* MULTIPLICATION** A "product" is the number that one gets by summing a "multiplicand"* several times. The "multiplier" specifies the number of copies of the* multiplicand that are summed.** Actual multiplication algorithms don't use repeated addition, however.* Shift-and-add algorithms get the same answer as repeated addition, and* they are faster. To compute the lower half of a product (pppp below)* one shifts the product left before adding in each of the partial products* (a * mmmm) through (d * mmmm).** To compute the upper half of a product (PPPP below), one adds in the* partial products (d * mmmm) through (a * mmmm), each time following the* add by a right shift of the product.** mmmm* * abcd* ------* #### = d * mmmm* #### = c * mmmm* #### = b * mmmm* #### = a * mmmm* --------* PPPPpppp** The example above shows 4 partial products. Computing actual Nios II* products requires 32 partials.** It is possible to compute the result of mulxsu from the result of mulxuu* because the only difference between the results of these two opcodes is* the value of the partial product associated with the sign bit of rA.** mulxsu = mulxuu - ((rA < 0) ? rB : 0);** It is possible to compute the result of mulxss from the result of mulxsu* because the only difference between the results of these two opcodes is* the value of the partial product associated with the sign bit of rB.** mulxss = mulxsu - ((rB < 0) ? rA : 0);**/.Lmul_immed:/* Opcode is muli. Change it into mul for remainder of algorithm. */mov r7, r6 /* Field B is dest register, not field C. */mov r6, r4 /* Field IMM16 is src2, not field B. */movi r4, 0x27 /* OPX of mul is 0x27 */.Lmultiply:/* Initialize the multiplication loop. */movi r9, 0 /* mul_product = 0 */movi r10, 0 /* mulxuu_product = 0 */mov r11, r6 /* save original multiplier for mulxsu and mulxss */mov r12, r6 /* mulxuu_multiplier (will be shifted) */movi r16, 1 /* used to create "rori B,A,1" from "ror B,A,r16" *//* Now* r3 = multiplicand* r6 = mul_multiplier* r7 = 4 * dest_register (used later as offset to sp)* r9 = mul_product* r10 = mulxuu_product* r11 = original multiplier* r12 = mulxuu_multiplier* r14 = loop counter (already initialized)* r15 = temp* r16 = 1*//** for (count = 32; count > 0; --count)* {*/.Lmultiply_loop:/** mul_product <<= 1;* lsb = multiplier & 1;*/slli r9, r9, 1andi r15, r12, 1/** if (lsb == 1)* {* mulxuu_product += multiplicand;* }*/beq r15, zero, .Lmulx_skipadd r10, r10, r3cmpltu r15, r10, r3 /* Save the carry from the MSB of mulxuu_product. */ror r15, r15, r16 /* r15 = 0x80000000 on carry, or else 0x00000000 */.Lmulx_skip:/** if (MSB of mul_multiplier == 1)* {* mul_product += multiplicand;* }*/bge r6, zero, .Lmul_skipadd r9, r9, r3.Lmul_skip:/** mulxuu_product >>= 1; logical shift* mul_multiplier <<= 1; done with MSB* mulx_multiplier >>= 1; done with LSB*/srli r10, r10, 1or r10, r10, r15 /* OR in the saved carry bit. */slli r6, r6, 1srli r12, r12, 1/** }*/subi r14, r14, 1bne r14, zero, .Lmultiply_loop/** Multiply emulation loop done.*//* Now* r3 = multiplicand* r4 = OPX* r7 = 4 * dest_register (used later as offset to sp)* r9 = mul_product* r10 = mulxuu_product* r11 = original multiplier* r15 = temp*//** Select/compute the result based on OPX.*//* OPX == mul? Then store. */xori r15, r4, 0x27beq r15, zero, .Lstore_result/* It's one of the mulx.. opcodes. Move over the result. */mov r9, r10/* OPX == mulxuu? Then store. */xori r15, r4, 0x07beq r15, zero, .Lstore_result/* Compute mulxsu** mulxsu = mulxuu - ((rA < 0) ? rB : 0);*/bge r3, zero, .Lmulxsu_skipsub r9, r9, r11.Lmulxsu_skip:/* OPX == mulxsu? Then store. */xori r15, r4, 0x17beq r15, zero, .Lstore_result/* Compute mulxss** mulxss = mulxsu - ((rB < 0) ? rA : 0);*/bge r11, zero, .Lmulxss_skipsub r9, r9, r3.Lmulxss_skip:/* At this point, assume that OPX is mulxss, so store */.Lstore_result:add r7, r7, spstw r9, 0(r7)ldw r16, 0(sp)ldw r17, 4(sp)ldw r18, 8(sp)ldw r19, 12(sp)ldw r20, 16(sp)ldw r21, 20(sp)ldw r22, 24(sp)ldw r23, 28(sp)/* bt @ 32 - Breakpoint register usually isn't an operand. *//* et @ 36 - Don't corrupt et. *//* gp @ 40 - Don't corrupt gp. *//* sp @ 44 - Don't corrupt sp. */ldw fp, 48(sp)/* ea @ 52 - Don't corrupt ea. *//* ba @ 56 - Breakpoint register usually isn't an operand. */addi sp, sp, 60br .Lexception_exit.Lnot_muldiv:addi sp, sp, 60.section .exceptions.exit.label.Lexception_exit:
