OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgcc/] [config/] [microblaze/] [muldi3_hard.S] - Rev 784

Go to most recent revision | Compare with Previous | Blame | View Log

###################################- 
# 
#  Copyright 2009, 2010, 2011 Free Software Foundation, Inc.
#
#  Contributed by Michael Eager <eager@eagercon.com>.
#
#  This file is free software; you can redistribute it and/or modify it
#  under the terms of the GNU General Public License as published by the
#  Free Software Foundation; either version 3, or (at your option) any
#  later version.
#
#  GCC is distributed in the hope that it will be useful, but WITHOUT
#  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
#  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
#  License for more details.
#
#  Under Section 7 of GPL version 3, you are granted additional
#  permissions described in the GCC Runtime Library Exception, version
#  3.1, as published by the Free Software Foundation.
#
#  You should have received a copy of the GNU General Public License and
#  a copy of the GCC Runtime Library Exception along with this program;
#  see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
#  <http://www.gnu.org/licenses/>. 
# 
#  muldi3_hard.S
# 
#  Multiply operation for 64 bit integers, for devices with hard multiply
#       Input : Operand1[H] in Reg r5
#               Operand1[L] in Reg r6           
#               Operand2[H] in Reg r7
#               Operand2[L] in Reg r8   
#       Output: Result[H] in Reg r3
#               Result[L] in Reg r4     
# 
#  Explaination:
#
#       Both the input numbers are divided into 16 bit number as follows
#               op1 = A B C D
#               op2 = E F G H
#       result =    D * H 
#                + (C * H + D * G) << 16
#                + (B * H + C * G + D * F) << 32
#                + (A * H + B * G + C * F + D * E) << 48 
#
#       Only 64 bits of the output are considered
#
#######################################

        .globl  muldi3_hardproc
        .ent    muldi3_hardproc
muldi3_hardproc:
        addi    r1,r1,-40

#  Save the input operands on the caller's stack
        swi     r5,r1,44
        swi     r6,r1,48
        swi     r7,r1,52
        swi     r8,r1,56

# Store all the callee saved registers 
        sw      r20,r1,r0
        swi     r21,r1,4
        swi     r22,r1,8
        swi     r23,r1,12
        swi     r24,r1,16
        swi     r25,r1,20
        swi     r26,r1,24
        swi     r27,r1,28

# Load all the 16 bit values for A thru H
        lhui    r20,r1,44   # A
        lhui    r21,r1,46   # B
        lhui    r22,r1,48   # C
        lhui    r23,r1,50   # D
        lhui    r24,r1,52   # E
        lhui    r25,r1,54   # F
        lhui    r26,r1,56   # G
        lhui    r27,r1,58   # H

# D * H ==> LSB of the result on stack ==> Store1
        mul     r9,r23,r27
        swi     r9,r1,36    # Pos2 and Pos3

# Hi (Store1) + C * H + D * G ==> Store2 ==> Pos1 and Pos2
# Store the carry generated in position 2 for Pos 3
        lhui    r11,r1,36   # Pos2
        mul     r9,r22,r27   # C * H
        mul     r10,r23,r26  # D * G
        add     r9,r9,r10
        addc    r12,r0,r0
        add     r9,r9,r11
        addc    r12,r12,r0    # Store the Carry
        shi     r9,r1,36    # Store Pos2
        swi     r9,r1,32 
        lhui    r11,r1,32
        shi     r11,r1,34   # Store Pos1

# Hi (Store2) + B * H + C * G + D * F ==> Store3 ==> Pos0 and Pos1
        mul     r9,r21,r27  # B * H
        mul     r10,r22,r26 # C * G
        mul     r7,r23,r25 # D * F      
        add     r9,r9,r11
        add     r9,r9,r10
        add     r9,r9,r7
        swi     r9,r1,32   # Pos0 and Pos1

# Hi (Store3) + A * H + B * G + C * F + D * E ==> Store3 ==> Pos0
        lhui    r11,r1,32  # Pos0
        mul     r9,r20,r27  # A * H
        mul     r10,r21,r26 # B * G
        mul     r7,r22,r25 # C * F
        mul     r8,r23,r24 # D * E
        add     r9,r9,r11
        add     r9,r9,r10
        add     r9,r9,r7
        add     r9,r9,r8
        sext16  r9,r9       # Sign extend the MSB
        shi     r9,r1,32

# Move results to r3 and r4
        lhui    r3,r1,32
        add     r3,r3,r12
        shi     r3,r1,32
        lwi     r3,r1,32  # Hi Part
        lwi     r4,r1,36  # Lo Part

# Restore Callee saved registers
        lw      r20,r1,r0
        lwi     r21,r1,4
        lwi     r22,r1,8
        lwi     r23,r1,12
        lwi     r24,r1,16
        lwi     r25,r1,20
        lwi     r26,r1,24
        lwi     r27,r1,28

# Restore Frame and return      
        rtsd    r15,8
        addi    r1,r1,40

.end muldi3_hardproc 
        

Go to most recent revision | Compare with Previous | Blame | View Log

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.