OpenCores
URL https://opencores.org/ocsvn/ao486/ao486/trunk

Subversion Repositories ao486

[/] [ao486/] [trunk/] [syn/] [components/] [sd_card/] [firmware/] [bsp/] [HAL/] [src/] [alt_exception_muldiv.S] - Blame information for rev 8

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 8 alfik
/******************************************************************************
2
*                                                                             *
3
* License Agreement                                                           *
4
*                                                                             *
5
* Copyright (c) 2003-2005 Altera Corporation, San Jose, California, USA.      *
6
* All rights reserved.                                                        *
7
*                                                                             *
8
* Permission is hereby granted, free of charge, to any person obtaining a     *
9
* copy of this software and associated documentation files (the "Software"),  *
10
* to deal in the Software without restriction, including without limitation   *
11
* the rights to use, copy, modify, merge, publish, distribute, sublicense,    *
12
* and/or sell copies of the Software, and to permit persons to whom the       *
13
* Software is furnished to do so, subject to the following conditions:        *
14
*                                                                             *
15
* The above copyright notice and this permission notice shall be included in  *
16
* all copies or substantial portions of the Software.                         *
17
*                                                                             *
18
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR  *
19
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,    *
20
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE *
21
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER      *
22
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING     *
23
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER         *
24
* DEALINGS IN THE SOFTWARE.                                                   *
25
*                                                                             *
26
* This agreement shall be governed in all respects by the laws of the State   *
27
* of California and by the laws of the United States of America.              *
28
*                                                                             *
29
******************************************************************************/
30
 
31
        /*
32
         * This is the software multiply/divide handler for Nios2.
33
         */
34
 
35
        /*
36
         * Provide a label which can be used to pull this file in.
37
         */
38
 
39
        .section .exceptions.start
40
        .globl alt_exception_muldiv
41
alt_exception_muldiv:
42
 
43
        /*
44
         * Pull in the entry/exit code.
45
         */
46
        .globl alt_exception
47
 
48
 
49
        .section .exceptions.soft, "xa"
50
 
51
 
52
        /* INSTRUCTION EMULATION
53
        *  ---------------------
54
        *
55
        * Nios II processors generate exceptions for unimplemented instructions.
56
        * The routines below emulate these instructions.  Depending on the
57
        * processor core, the only instructions that might need to be emulated
58
        * are div, divu, mul, muli, mulxss, mulxsu, and mulxuu.
59
        *
60
        * The emulations match the instructions, except for the following
61
        * limitations:
62
        *
63
        * 1) The emulation routines do not emulate the use of the exception
64
        *    temporary register (et) as a source operand because the exception
65
        *    handler already has modified it.
66
        *
67
        * 2) The routines do not emulate the use of the stack pointer (sp) or the
68
        *    exception return address register (ea) as a destination because
69
        *    modifying these registers crashes the exception handler or the
70
        *    interrupted routine.
71
        *
72
        * 3) To save code size, the routines do not emulate the use of the
73
        *    breakpoint registers (ba and bt) as operands.
74
        *
75
        * Detailed Design
76
        * ---------------
77
        *
78
        * The emulation routines expect the contents of integer registers r0-r31
79
        * to be on the stack at addresses sp, 4(sp), 8(sp), ... 124(sp).  The
80
        * routines retrieve source operands from the stack and modify the
81
        * destination register's value on the stack prior to the end of the
82
        * exception handler.  Then all registers except the destination register
83
        * are restored to their previous values.
84
        *
85
        * The instruction that causes the exception is found at address -4(ea).
86
        * The instruction's OP and OPX fields identify the operation to be
87
        * performed.
88
        *
89
        * One instruction, muli, is an I-type instruction that is identified by
90
        * an OP field of 0x24.
91
        *
92
        * muli   AAAAA,BBBBB,IIIIIIIIIIIIIIII,-0x24-
93
        *           27    22                6      0    <-- LSB of field
94
        *
95
        * The remaining emulated instructions are R-type and have an OP field
96
        * of 0x3a.  Their OPX fields identify them.
97
        *
98
        * R-type AAAAA,BBBBB,CCCCC,XXXXXX,NNNNN,-0x3a-
99
        *           27    22    17     11     6      0  <-- LSB of field
100
        *
101
        *
102
        */
103
 
104
 
105
        /*
106
         * Split the instruction into its fields.  We need 4*A, 4*B, and 4*C as
107
         * offsets to the stack pointer for access to the stored register values.
108
         */
109
                             /* r2 = AAAAA,BBBBB,IIIIIIIIIIIIIIII,PPPPPP    */
110
        roli  r3, r2, 7      /* r3 = BBB,IIIIIIIIIIIIIIII,PPPPPP,AAAAA,BB   */
111
        roli  r4, r3, 3      /* r4 = IIIIIIIIIIIIIIII,PPPPPP,AAAAA,BBBBB    */
112
        roli  r6, r4, 2      /* r6 = IIIIIIIIIIIIII,PPPPPP,AAAAA,BBBBB,II   */
113
        srai  r4, r4, 16     /* r4 = (sign-extended) IMM16                  */
114
        xori  r6, r6, 0x42   /* r6 = CCC,XXXXXX,NNNNN,PPPPPP,AAAAA,bBBBB,cC */
115
        roli  r7, r6, 5      /* r7 = XXXX,NNNNN,PPPPPP,AAAAA,bBBBB,cCCCC,XX */
116
        andi  r5, r2, 0x3f   /* r5 = 00000000000000000000000000,PPPPPP      */
117
        xori  r3, r3, 0x40
118
        andi  r3, r3, 0x7c   /* r3 = 0000000000000000000000000,aAAAA,00     */
119
        andi  r6, r6, 0x7c   /* r6 = 0000000000000000000000000,bBBBB,00     */
120
        andi  r7, r7, 0x7c   /* r7 = 0000000000000000000000000,cCCCC,00     */
121
 
122
        /* Now either
123
         *  r5 = OP
124
         *  r3 = 4*(A^16)
125
         *  r4 = IMM16 (sign extended)
126
         *  r6 = 4*(B^16)
127
         *  r7 = 4*(C^16)
128
         * or
129
         *  r5 = OP
130
         */
131
 
132
 
133
        /*
134
         * Save everything on the stack to make it easy for the emulation routines
135
         * to retrieve the source register operands.  The exception entry code has
136
         * already saved some of this so we don't need to do it all again.
137
         */
138
 
139
        addi  sp, sp, -60
140
        stw   zero, 64(sp)   /* Save zero on stack to avoid special case for r0. */
141
                             /* Register at and r2-r15 have already been saved.  */
142
 
143
        stw   r16,  0(sp)
144
        stw   r17,  4(sp)
145
        stw   r18,  8(sp)
146
        stw   r19, 12(sp)
147
        stw   r20, 16(sp)
148
        stw   r21, 20(sp)
149
        stw   r22, 24(sp)
150
        stw   r23, 28(sp)
151
                            /* et @ 32 - Has already been changed.*/
152
                            /* bt @ 36 - Usually isn't an operand.   */
153
        stw   gp,  40(sp)
154
        stw   sp,  44(sp)
155
        stw   fp,  48(sp)
156
                            /* ea @ 52 - Don't bother to save - it's already been changed */
157
                            /* ba @ 56 - Breakpoint register usually isn't an operand */
158
                            /* ra @ 60 - Has already been saved */
159
 
160
 
161
        /*
162
         *  Prepare for either multiplication or division loop.
163
         *  They both loop 32 times.
164
         */
165
        movi   r14, 32
166
 
167
 
168
        /*
169
         * Get the operands.
170
         *
171
         * It is necessary to check for muli because it uses an I-type instruction
172
         * format, while the other instructions are have an R-type format.
173
         */
174
        add    r3, r3, sp     /* r3 = address of A-operand. */
175
        ldw    r3, 0(r3)      /* r3 = A-operand. */
176
        movi   r15, 0x24      /* muli opcode (I-type instruction format) */
177
        beq    r5, r15, .Lmul_immed /* muli doesn't use the B register as a source */
178
 
179
        add    r6, r6, sp     /* r6 = address of B-operand.               */
180
        ldw    r6, 0(r6)      /* r6 = B-operand.                          */
181
                              /* r4 = SSSSSSSSSSSSSSSS,-----IMM16------   */
182
                              /* IMM16 not needed, align OPX portion      */
183
                              /* r4 = SSSSSSSSSSSSSSSS,CCCCC,-OPX--,00000 */
184
        srli   r4, r4, 5      /* r4 = 00000,SSSSSSSSSSSSSSSS,CCCCC,-OPX-- */
185
        andi   r4, r4, 0x3f   /* r4 = 00000000000000000000000000,-OPX--   */
186
 
187
        /* Now
188
         * r5 = OP
189
         * r3 = src1
190
         * r6 = src2
191
         * r4 = OPX (no longer can be muli)
192
         * r7 = 4*(C^16)
193
         * r14 = loop counter
194
         */
195
 
196
        /* ILLEGAL-INSTRUCTION EXCEPTION
197
         *  -----------------------------
198
         *
199
         *  This code is for Nios II cores that generate exceptions when attempting
200
         *  to execute illegal instructions.  Nios II cores that support an
201
         *  illegal-instruction exception are identified by the presence of the
202
         *  macro definition NIOS2_HAS_ILLEGAL_INSTRUCTION_EXCEPTION in system.h .
203
         *
204
         *  Remember that illegal instructions are different than unimplemented
205
         *  instructions.  Illegal instructions are instruction encodings that
206
         *  have not been defined by the Nios II ISA.  Unimplemented instructions
207
         *  are legal instructions that must be emulated by some Nios II cores.
208
         *
209
         *  If we get here, all instructions except multiplies and divides
210
         *  are illegal.
211
         *
212
         *  This code assumes that OP is not muli (because muli was tested above).
213
         *  All other multiplies and divides are legal.  Anything else is illegal.
214
         */
215
 
216
        movi  r8, 0x3a                        /* OP for R-type mul* and div* */
217
        bne   r5, r8, .Lnot_muldiv
218
 
219
        /* r15 already is 0x24 */            /* OPX of divu */
220
        beq   r4, r15, .Ldivide
221
 
222
        movi  r15,0x27                        /* OPX of mul */
223
        beq   r4, r15, .Lmultiply
224
 
225
        movi  r15,0x07                        /* OPX of mulxuu */
226
        beq   r4, r15, .Lmultiply
227
 
228
        movi  r15,0x17                        /* OPX of mulxsu */
229
        beq   r4, r15, .Lmultiply
230
 
231
        movi  r15,0x1f                        /* OPX of mulxss */
232
        beq   r4, r15, .Lmultiply
233
 
234
        movi  r15,0x25                        /* OPX of div */
235
        bne   r4, r15, .Lnot_muldiv
236
 
237
 
238
        /* DIVISION
239
         *
240
         * Divide an unsigned dividend by an unsigned divisor using
241
         * a shift-and-subtract algorithm.  The example below shows
242
         * 43 div 7 = 6 for 8-bit integers.  This classic algorithm uses a
243
         * single register to store both the dividend and the quotient,
244
         * allowing both values to be shifted with a single instruction.
245
         *
246
         *                               remainder dividend:quotient
247
         *                               --------- -----------------
248
         *   initialize                   00000000     00101011:
249
         *   shift                        00000000     0101011:_
250
         *   remainder >= divisor? no     00000000     0101011:0
251
         *   shift                        00000000     101011:0_
252
         *   remainder >= divisor? no     00000000     101011:00
253
         *   shift                        00000001     01011:00_
254
         *   remainder >= divisor? no     00000001     01011:000
255
         *   shift                        00000010     1011:000_
256
         *   remainder >= divisor? no     00000010     1011:0000
257
         *   shift                        00000101     011:0000_
258
         *   remainder >= divisor? no     00000101     011:00000
259
         *   shift                        00001010     11:00000_
260
         *   remainder >= divisor? yes    00001010     11:000001
261
         *       remainder -= divisor   - 00000111
262
         *                              ----------
263
         *                                00000011     11:000001
264
         *   shift                        00000111     1:000001_
265
         *   remainder >= divisor? yes    00000111     1:0000011
266
         *       remainder -= divisor   - 00000111
267
         *                              ----------
268
         *                                00000000     1:0000011
269
         *   shift                        00000001     :0000011_
270
         *   remainder >= divisor? no     00000001     :00000110
271
         *
272
         * The quotient is 00000110.
273
         */
274
 
275
.Ldivide:
276
        /*
277
         *  Prepare for division by assuming the result
278
         *  is unsigned, and storing its "sign" as 0.
279
         */
280
        movi   r17, 0
281
 
282
 
283
        /* Which division opcode? */
284
        xori   r15, r4, 0x25         /* OPX of div */
285
        bne    r15, zero, .Lunsigned_division
286
 
287
 
288
        /*
289
         *  OPX is div.  Determine and store the sign of the quotient.
290
         *  Then take the absolute value of both operands.
291
         */
292
        xor   r17, r3, r6      /* MSB contains sign of quotient */
293
        bge   r3, zero, 0f
294
        sub   r3, zero, r3     /* -r3 */
295
0:
296
        bge   r6, zero, 0f
297
        sub   r6, zero, r6     /* -r6 */
298
0:
299
 
300
 
301
.Lunsigned_division:
302
        /* Initialize the unsigned-division loop. */
303
        movi  r13, 0          /* remainder = 0 */
304
 
305
        /* Now
306
        * r3 = dividend : quotient
307
        * r4 = 0x25 for div, 0x24 for divu
308
        * r6 = divisor
309
        * r13 = remainder
310
        * r14 = loop counter (already initialized to 32)
311
        * r17 = MSB contains sign of quotient
312
        */
313
 
314
 
315
        /*
316
        *   for (count = 32; count > 0; --count)
317
        *   {
318
        */
319
.Ldivide_loop:
320
 
321
        /*
322
        *       Division:
323
        *
324
        *       (remainder:dividend:quotient) <<= 1;
325
        */
326
        slli  r13, r13, 1
327
        cmplt r15, r3, zero        /* r15 = MSB of r3 */
328
        or    r13, r13, r15
329
        slli  r3, r3, 1
330
 
331
 
332
        /*
333
        *       if (remainder >= divisor)
334
        *       {
335
        *           set LSB of quotient
336
        *           remainder -= divisor;
337
        *       }
338
        */
339
        bltu  r13, r6, .Ldiv_skip
340
        ori   r3, r3, 1
341
        sub   r13, r13, r6
342
.Ldiv_skip:
343
 
344
        /*
345
        *   }
346
        */
347
        subi  r14, r14, 1
348
        bne   r14, zero, .Ldivide_loop
349
 
350
        mov   r9, r3
351
 
352
 
353
        /* Now
354
        * r9 = quotient
355
        * r4 = 0x25 for div, 0x24 for divu
356
        * r7 = 4*(C^16)
357
        * r17 = MSB contains sign of quotient
358
        */
359
 
360
 
361
        /*
362
        *  Conditionally negate signed quotient.  If quotient is unsigned,
363
        *  the sign already is initialized to 0.
364
        */
365
        bge   r17, zero, .Lstore_result
366
        sub   r9, zero, r9     /* -r9 */
367
 
368
        br    .Lstore_result
369
 
370
 
371
 
372
 
373
        /* MULTIPLICATION
374
        *
375
        * A "product" is the number that one gets by summing a "multiplicand"
376
        * several times.  The "multiplier" specifies the number of copies of the
377
        * multiplicand that are summed.
378
        *
379
        * Actual multiplication algorithms don't use repeated addition, however.
380
        * Shift-and-add algorithms get the same answer as repeated addition, and
381
        * they are faster.  To compute the lower half of a product (pppp below)
382
        * one shifts the product left before adding in each of the partial products
383
        * (a * mmmm) through (d * mmmm).
384
        *
385
        * To compute the upper half of a product (PPPP below), one adds in the
386
        * partial products (d * mmmm) through (a * mmmm), each time following the
387
        * add by a right shift of the product.
388
        *
389
        *     mmmm
390
        *   * abcd
391
        *   ------
392
        *     ####  = d * mmmm
393
        *    ####   = c * mmmm
394
        *   ####    = b * mmmm
395
        *  ####     = a * mmmm
396
        * --------
397
        * PPPPpppp
398
        *
399
        * The example above shows 4 partial products.  Computing actual Nios II
400
        * products requires 32 partials.
401
        *
402
        * It is possible to compute the result of mulxsu from the result of mulxuu
403
        * because the only difference between the results of these two opcodes is
404
        * the value of the partial product associated with the sign bit of rA.
405
        *
406
        *   mulxsu = mulxuu - ((rA < 0) ? rB : 0);
407
        *
408
        * It is possible to compute the result of mulxss from the result of mulxsu
409
        * because the only difference between the results of these two opcodes is
410
        * the value of the partial product associated with the sign bit of rB.
411
        *
412
        *   mulxss = mulxsu - ((rB < 0) ? rA : 0);
413
        *
414
        */
415
 
416
.Lmul_immed:
417
        /* Opcode is muli.  Change it into mul for remainder of algorithm. */
418
        mov   r7, r6         /* Field B is dest register, not field C. */
419
        mov   r6, r4         /* Field IMM16 is src2, not field B. */
420
        movi  r4, 0x27       /* OPX of mul is 0x27 */
421
 
422
.Lmultiply:
423
        /* Initialize the multiplication loop. */
424
        movi  r9, 0          /* mul_product    = 0 */
425
        movi  r10, 0         /* mulxuu_product = 0 */
426
        mov   r11, r6        /* save original multiplier for mulxsu and mulxss */
427
        mov   r12, r6        /* mulxuu_multiplier (will be shifted) */
428
        movi  r16, 1         /* used to create "rori B,A,1" from "ror B,A,r16" */
429
 
430
        /* Now
431
        * r3 = multiplicand
432
        * r6 = mul_multiplier
433
        * r7 = 4 * dest_register (used later as offset to sp)
434
        * r9 = mul_product
435
        * r10 = mulxuu_product
436
        * r11 = original multiplier
437
        * r12 = mulxuu_multiplier
438
        * r14 = loop counter (already initialized)
439
        * r15 = temp
440
        * r16 = 1
441
        */
442
 
443
 
444
        /*
445
        *   for (count = 32; count > 0; --count)
446
        *   {
447
        */
448
.Lmultiply_loop:
449
 
450
        /*
451
        *       mul_product <<= 1;
452
        *       lsb = multiplier & 1;
453
        */
454
        slli   r9, r9, 1
455
        andi   r15, r12, 1
456
 
457
        /*
458
        *       if (lsb == 1)
459
        *       {
460
        *           mulxuu_product += multiplicand;
461
        *       }
462
        */
463
        beq   r15, zero, .Lmulx_skip
464
        add   r10, r10, r3
465
        cmpltu r15, r10, r3  /* Save the carry from the MSB of mulxuu_product. */
466
        ror   r15, r15, r16  /* r15 = 0x80000000 on carry, or else 0x00000000 */
467
.Lmulx_skip:
468
 
469
        /*
470
        *       if (MSB of mul_multiplier == 1)
471
        *       {
472
        *           mul_product += multiplicand;
473
        *       }
474
        */
475
        bge   r6, zero, .Lmul_skip
476
        add   r9, r9, r3
477
.Lmul_skip:
478
 
479
        /*
480
        *       mulxuu_product >>= 1;           logical shift
481
        *       mul_multiplier <<= 1;           done with MSB
482
        *       mulx_multiplier >>= 1;          done with LSB
483
        */
484
        srli   r10, r10, 1
485
        or     r10, r10, r15           /* OR in the saved carry bit. */
486
        slli   r6, r6, 1
487
        srli   r12, r12, 1
488
 
489
 
490
        /*
491
        *   }
492
        */
493
        subi   r14, r14, 1
494
        bne    r14, zero, .Lmultiply_loop
495
 
496
 
497
        /*
498
        *  Multiply emulation loop done.
499
        */
500
 
501
        /* Now
502
        * r3 = multiplicand
503
        * r4 = OPX
504
        * r7 = 4 * dest_register (used later as offset to sp)
505
        * r9 = mul_product
506
        * r10 = mulxuu_product
507
        * r11 = original multiplier
508
        * r15 = temp
509
        */
510
 
511
 
512
        /*
513
        *  Select/compute the result based on OPX.
514
        */
515
 
516
 
517
        /* OPX == mul?  Then store. */
518
        xori  r15, r4, 0x27
519
        beq   r15, zero, .Lstore_result
520
 
521
        /* It's one of the mulx.. opcodes.  Move over the result. */
522
        mov   r9, r10
523
 
524
        /* OPX == mulxuu?  Then store. */
525
        xori  r15, r4, 0x07
526
        beq   r15, zero, .Lstore_result
527
 
528
        /* Compute mulxsu
529
         *
530
         * mulxsu = mulxuu - ((rA < 0) ? rB : 0);
531
         */
532
        bge   r3, zero, .Lmulxsu_skip
533
        sub   r9, r9, r11
534
.Lmulxsu_skip:
535
 
536
        /* OPX == mulxsu?  Then store. */
537
        xori  r15, r4, 0x17
538
        beq   r15, zero, .Lstore_result
539
 
540
        /* Compute mulxss
541
         *
542
         * mulxss = mulxsu - ((rB < 0) ? rA : 0);
543
         */
544
        bge   r11, zero, .Lmulxss_skip
545
        sub   r9, r9, r3
546
.Lmulxss_skip:
547
        /* At this point, assume that OPX is mulxss, so store */
548
 
549
 
550
.Lstore_result:
551
        add   r7, r7, sp
552
        stw   r9, 0(r7)
553
 
554
        ldw   r16,  0(sp)
555
        ldw   r17,  4(sp)
556
        ldw   r18,  8(sp)
557
        ldw   r19, 12(sp)
558
        ldw   r20, 16(sp)
559
        ldw   r21, 20(sp)
560
        ldw   r22, 24(sp)
561
        ldw   r23, 28(sp)
562
 
563
                            /* bt @ 32 - Breakpoint register usually isn't an operand. */
564
                            /* et @ 36 - Don't corrupt et. */
565
                            /* gp @ 40 - Don't corrupt gp. */
566
                            /* sp @ 44 - Don't corrupt sp. */
567
        ldw   fp,  48(sp)
568
                            /* ea @ 52 - Don't corrupt ea. */
569
                            /* ba @ 56 - Breakpoint register usually isn't an operand. */
570
 
571
        addi  sp, sp, 60
572
 
573
        br    .Lexception_exit
574
 
575
 
576
.Lnot_muldiv:
577
 
578
        addi  sp, sp, 60
579
 
580
 
581
        .section .exceptions.exit.label
582
.Lexception_exit:
583
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.