;; libgcc routines for the Renesas H8/300 CPU.
|
;; libgcc routines for the Renesas H8/300 CPU.
|
;; Contributed by Steve Chamberlain
|
;; Contributed by Steve Chamberlain
|
;; Optimizations by Toshiyasu Morita
|
;; Optimizations by Toshiyasu Morita
|
|
|
/* Copyright (C) 1994, 2000, 2001, 2002, 2003, 2004
|
/* Copyright (C) 1994, 2000, 2001, 2002, 2003, 2004
|
Free Software Foundation, Inc.
|
Free Software Foundation, Inc.
|
|
|
This file is free software; you can redistribute it and/or modify it
|
This file is free software; you can redistribute it and/or modify it
|
under the terms of the GNU General Public License as published by the
|
under the terms of the GNU General Public License as published by the
|
Free Software Foundation; either version 2, or (at your option) any
|
Free Software Foundation; either version 2, or (at your option) any
|
later version.
|
later version.
|
|
|
In addition to the permissions in the GNU General Public License, the
|
In addition to the permissions in the GNU General Public License, the
|
Free Software Foundation gives you unlimited permission to link the
|
Free Software Foundation gives you unlimited permission to link the
|
compiled version of this file into combinations with other programs,
|
compiled version of this file into combinations with other programs,
|
and to distribute those combinations without any restriction coming
|
and to distribute those combinations without any restriction coming
|
from the use of this file. (The General Public License restrictions
|
from the use of this file. (The General Public License restrictions
|
do apply in other respects; for example, they cover modification of
|
do apply in other respects; for example, they cover modification of
|
the file, and distribution when not linked into a combine
|
the file, and distribution when not linked into a combine
|
executable.)
|
executable.)
|
|
|
This file is distributed in the hope that it will be useful, but
|
This file is distributed in the hope that it will be useful, but
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
General Public License for more details.
|
General Public License for more details.
|
|
|
You should have received a copy of the GNU General Public License
|
You should have received a copy of the GNU General Public License
|
along with this program; see the file COPYING. If not, write to
|
along with this program; see the file COPYING. If not, write to
|
the Free Software Foundation, 51 Franklin Street, Fifth Floor,
|
the Free Software Foundation, 51 Franklin Street, Fifth Floor,
|
Boston, MA 02110-1301, USA. */
|
Boston, MA 02110-1301, USA. */
|
|
|
/* Assembler register definitions. */
|
/* Assembler register definitions. */
|
|
|
#define A0 r0
|
#define A0 r0
|
#define A0L r0l
|
#define A0L r0l
|
#define A0H r0h
|
#define A0H r0h
|
|
|
#define A1 r1
|
#define A1 r1
|
#define A1L r1l
|
#define A1L r1l
|
#define A1H r1h
|
#define A1H r1h
|
|
|
#define A2 r2
|
#define A2 r2
|
#define A2L r2l
|
#define A2L r2l
|
#define A2H r2h
|
#define A2H r2h
|
|
|
#define A3 r3
|
#define A3 r3
|
#define A3L r3l
|
#define A3L r3l
|
#define A3H r3h
|
#define A3H r3h
|
|
|
#define S0 r4
|
#define S0 r4
|
#define S0L r4l
|
#define S0L r4l
|
#define S0H r4h
|
#define S0H r4h
|
|
|
#define S1 r5
|
#define S1 r5
|
#define S1L r5l
|
#define S1L r5l
|
#define S1H r5h
|
#define S1H r5h
|
|
|
#define S2 r6
|
#define S2 r6
|
#define S2L r6l
|
#define S2L r6l
|
#define S2H r6h
|
#define S2H r6h
|
|
|
#ifdef __H8300__
|
#ifdef __H8300__
|
#define PUSHP push
|
#define PUSHP push
|
#define POPP pop
|
#define POPP pop
|
|
|
#define A0P r0
|
#define A0P r0
|
#define A1P r1
|
#define A1P r1
|
#define A2P r2
|
#define A2P r2
|
#define A3P r3
|
#define A3P r3
|
#define S0P r4
|
#define S0P r4
|
#define S1P r5
|
#define S1P r5
|
#define S2P r6
|
#define S2P r6
|
#endif
|
#endif
|
|
|
#if defined (__H8300H__) || defined (__H8300S__) || defined (__H8300SX__)
|
#if defined (__H8300H__) || defined (__H8300S__) || defined (__H8300SX__)
|
#define PUSHP push.l
|
#define PUSHP push.l
|
#define POPP pop.l
|
#define POPP pop.l
|
|
|
#define A0P er0
|
#define A0P er0
|
#define A1P er1
|
#define A1P er1
|
#define A2P er2
|
#define A2P er2
|
#define A3P er3
|
#define A3P er3
|
#define S0P er4
|
#define S0P er4
|
#define S1P er5
|
#define S1P er5
|
#define S2P er6
|
#define S2P er6
|
|
|
#define A0E e0
|
#define A0E e0
|
#define A1E e1
|
#define A1E e1
|
#define A2E e2
|
#define A2E e2
|
#define A3E e3
|
#define A3E e3
|
#endif
|
#endif
|
|
|
#ifdef __H8300H__
|
#ifdef __H8300H__
|
#ifdef __NORMAL_MODE__
|
#ifdef __NORMAL_MODE__
|
.h8300hn
|
.h8300hn
|
#else
|
#else
|
.h8300h
|
.h8300h
|
#endif
|
#endif
|
#endif
|
#endif
|
|
|
#ifdef __H8300S__
|
#ifdef __H8300S__
|
#ifdef __NORMAL_MODE__
|
#ifdef __NORMAL_MODE__
|
.h8300sn
|
.h8300sn
|
#else
|
#else
|
.h8300s
|
.h8300s
|
#endif
|
#endif
|
#endif
|
#endif
|
#ifdef __H8300SX__
|
#ifdef __H8300SX__
|
#ifdef __NORMAL_MODE__
|
#ifdef __NORMAL_MODE__
|
.h8300sxn
|
.h8300sxn
|
#else
|
#else
|
.h8300sx
|
.h8300sx
|
#endif
|
#endif
|
#endif
|
#endif
|
|
|
#ifdef L_cmpsi2
|
#ifdef L_cmpsi2
|
#ifdef __H8300__
|
#ifdef __H8300__
|
.section .text
|
.section .text
|
.align 2
|
.align 2
|
.global ___cmpsi2
|
.global ___cmpsi2
|
___cmpsi2:
|
___cmpsi2:
|
cmp.w A0,A2
|
cmp.w A0,A2
|
bne .L2
|
bne .L2
|
cmp.w A1,A3
|
cmp.w A1,A3
|
bne .L4
|
bne .L4
|
mov.w #1,A0
|
mov.w #1,A0
|
rts
|
rts
|
.L2:
|
.L2:
|
bgt .L5
|
bgt .L5
|
.L3:
|
.L3:
|
mov.w #2,A0
|
mov.w #2,A0
|
rts
|
rts
|
.L4:
|
.L4:
|
bls .L3
|
bls .L3
|
.L5:
|
.L5:
|
sub.w A0,A0
|
sub.w A0,A0
|
rts
|
rts
|
.end
|
.end
|
#endif
|
#endif
|
#endif /* L_cmpsi2 */
|
#endif /* L_cmpsi2 */
|
|
|
#ifdef L_ucmpsi2
|
#ifdef L_ucmpsi2
|
#ifdef __H8300__
|
#ifdef __H8300__
|
.section .text
|
.section .text
|
.align 2
|
.align 2
|
.global ___ucmpsi2
|
.global ___ucmpsi2
|
___ucmpsi2:
|
___ucmpsi2:
|
cmp.w A0,A2
|
cmp.w A0,A2
|
bne .L2
|
bne .L2
|
cmp.w A1,A3
|
cmp.w A1,A3
|
bne .L4
|
bne .L4
|
mov.w #1,A0
|
mov.w #1,A0
|
rts
|
rts
|
.L2:
|
.L2:
|
bhi .L5
|
bhi .L5
|
.L3:
|
.L3:
|
mov.w #2,A0
|
mov.w #2,A0
|
rts
|
rts
|
.L4:
|
.L4:
|
bls .L3
|
bls .L3
|
.L5:
|
.L5:
|
sub.w A0,A0
|
sub.w A0,A0
|
rts
|
rts
|
.end
|
.end
|
#endif
|
#endif
|
#endif /* L_ucmpsi2 */
|
#endif /* L_ucmpsi2 */
|
|
|
#ifdef L_divhi3
|
#ifdef L_divhi3
|
|
|
;; HImode divides for the H8/300.
|
;; HImode divides for the H8/300.
|
;; We bunch all of this into one object file since there are several
|
;; We bunch all of this into one object file since there are several
|
;; "supporting routines".
|
;; "supporting routines".
|
|
|
; general purpose normalize routine
|
; general purpose normalize routine
|
;
|
;
|
; divisor in A0
|
; divisor in A0
|
; dividend in A1
|
; dividend in A1
|
; turns both into +ve numbers, and leaves what the answer sign
|
; turns both into +ve numbers, and leaves what the answer sign
|
; should be in A2L
|
; should be in A2L
|
|
|
#ifdef __H8300__
|
#ifdef __H8300__
|
.section .text
|
.section .text
|
.align 2
|
.align 2
|
divnorm:
|
divnorm:
|
or A0H,A0H ; is divisor > 0
|
or A0H,A0H ; is divisor > 0
|
stc ccr,A2L
|
stc ccr,A2L
|
bge _lab1
|
bge _lab1
|
not A0H ; no - then make it +ve
|
not A0H ; no - then make it +ve
|
not A0L
|
not A0L
|
adds #1,A0
|
adds #1,A0
|
_lab1: or A1H,A1H ; look at dividend
|
_lab1: or A1H,A1H ; look at dividend
|
bge _lab2
|
bge _lab2
|
not A1H ; it is -ve, make it positive
|
not A1H ; it is -ve, make it positive
|
not A1L
|
not A1L
|
adds #1,A1
|
adds #1,A1
|
xor #0x8,A2L; and toggle sign of result
|
xor #0x8,A2L; and toggle sign of result
|
_lab2: rts
|
_lab2: rts
|
;; Basically the same, except that the sign of the divisor determines
|
;; Basically the same, except that the sign of the divisor determines
|
;; the sign.
|
;; the sign.
|
modnorm:
|
modnorm:
|
or A0H,A0H ; is divisor > 0
|
or A0H,A0H ; is divisor > 0
|
stc ccr,A2L
|
stc ccr,A2L
|
bge _lab7
|
bge _lab7
|
not A0H ; no - then make it +ve
|
not A0H ; no - then make it +ve
|
not A0L
|
not A0L
|
adds #1,A0
|
adds #1,A0
|
_lab7: or A1H,A1H ; look at dividend
|
_lab7: or A1H,A1H ; look at dividend
|
bge _lab8
|
bge _lab8
|
not A1H ; it is -ve, make it positive
|
not A1H ; it is -ve, make it positive
|
not A1L
|
not A1L
|
adds #1,A1
|
adds #1,A1
|
_lab8: rts
|
_lab8: rts
|
|
|
; A0=A0/A1 signed
|
; A0=A0/A1 signed
|
|
|
.global ___divhi3
|
.global ___divhi3
|
___divhi3:
|
___divhi3:
|
bsr divnorm
|
bsr divnorm
|
bsr ___udivhi3
|
bsr ___udivhi3
|
negans: btst #3,A2L ; should answer be negative ?
|
negans: btst #3,A2L ; should answer be negative ?
|
beq _lab4
|
beq _lab4
|
not A0H ; yes, so make it so
|
not A0H ; yes, so make it so
|
not A0L
|
not A0L
|
adds #1,A0
|
adds #1,A0
|
_lab4: rts
|
_lab4: rts
|
|
|
; A0=A0%A1 signed
|
; A0=A0%A1 signed
|
|
|
.global ___modhi3
|
.global ___modhi3
|
___modhi3:
|
___modhi3:
|
bsr modnorm
|
bsr modnorm
|
bsr ___udivhi3
|
bsr ___udivhi3
|
mov A3,A0
|
mov A3,A0
|
bra negans
|
bra negans
|
|
|
; A0=A0%A1 unsigned
|
; A0=A0%A1 unsigned
|
|
|
.global ___umodhi3
|
.global ___umodhi3
|
___umodhi3:
|
___umodhi3:
|
bsr ___udivhi3
|
bsr ___udivhi3
|
mov A3,A0
|
mov A3,A0
|
rts
|
rts
|
|
|
; A0=A0/A1 unsigned
|
; A0=A0/A1 unsigned
|
; A3=A0%A1 unsigned
|
; A3=A0%A1 unsigned
|
; A2H trashed
|
; A2H trashed
|
; D high 8 bits of denom
|
; D high 8 bits of denom
|
; d low 8 bits of denom
|
; d low 8 bits of denom
|
; N high 8 bits of num
|
; N high 8 bits of num
|
; n low 8 bits of num
|
; n low 8 bits of num
|
; M high 8 bits of mod
|
; M high 8 bits of mod
|
; m low 8 bits of mod
|
; m low 8 bits of mod
|
; Q high 8 bits of quot
|
; Q high 8 bits of quot
|
; q low 8 bits of quot
|
; q low 8 bits of quot
|
; P preserve
|
; P preserve
|
|
|
; The H8/300 only has a 16/8 bit divide, so we look at the incoming and
|
; The H8/300 only has a 16/8 bit divide, so we look at the incoming and
|
; see how to partition up the expression.
|
; see how to partition up the expression.
|
|
|
.global ___udivhi3
|
.global ___udivhi3
|
___udivhi3:
|
___udivhi3:
|
; A0 A1 A2 A3
|
; A0 A1 A2 A3
|
; Nn Dd P
|
; Nn Dd P
|
sub.w A3,A3 ; Nn Dd xP 00
|
sub.w A3,A3 ; Nn Dd xP 00
|
or A1H,A1H
|
or A1H,A1H
|
bne divlongway
|
bne divlongway
|
or A0H,A0H
|
or A0H,A0H
|
beq _lab6
|
beq _lab6
|
|
|
; we know that D == 0 and N is != 0
|
; we know that D == 0 and N is != 0
|
mov.b A0H,A3L ; Nn Dd xP 0N
|
mov.b A0H,A3L ; Nn Dd xP 0N
|
divxu A1L,A3 ; MQ
|
divxu A1L,A3 ; MQ
|
mov.b A3L,A0H ; Q
|
mov.b A3L,A0H ; Q
|
; dealt with N, do n
|
; dealt with N, do n
|
_lab6: mov.b A0L,A3L ; n
|
_lab6: mov.b A0L,A3L ; n
|
divxu A1L,A3 ; mq
|
divxu A1L,A3 ; mq
|
mov.b A3L,A0L ; Qq
|
mov.b A3L,A0L ; Qq
|
mov.b A3H,A3L ; m
|
mov.b A3H,A3L ; m
|
mov.b #0x0,A3H ; Qq 0m
|
mov.b #0x0,A3H ; Qq 0m
|
rts
|
rts
|
|
|
; D != 0 - which means the denominator is
|
; D != 0 - which means the denominator is
|
; loop around to get the result.
|
; loop around to get the result.
|
|
|
divlongway:
|
divlongway:
|
mov.b A0H,A3L ; Nn Dd xP 0N
|
mov.b A0H,A3L ; Nn Dd xP 0N
|
mov.b #0x0,A0H ; high byte of answer has to be zero
|
mov.b #0x0,A0H ; high byte of answer has to be zero
|
mov.b #0x8,A2H ; 8
|
mov.b #0x8,A2H ; 8
|
div8: add.b A0L,A0L ; n*=2
|
div8: add.b A0L,A0L ; n*=2
|
rotxl A3L ; Make remainder bigger
|
rotxl A3L ; Make remainder bigger
|
rotxl A3H
|
rotxl A3H
|
sub.w A1,A3 ; Q-=N
|
sub.w A1,A3 ; Q-=N
|
bhs setbit ; set a bit ?
|
bhs setbit ; set a bit ?
|
add.w A1,A3 ; no : too far , Q+=N
|
add.w A1,A3 ; no : too far , Q+=N
|
|
|
dec A2H
|
dec A2H
|
bne div8 ; next bit
|
bne div8 ; next bit
|
rts
|
rts
|
|
|
setbit: inc A0L ; do insert bit
|
setbit: inc A0L ; do insert bit
|
dec A2H
|
dec A2H
|
bne div8 ; next bit
|
bne div8 ; next bit
|
rts
|
rts
|
|
|
#endif /* __H8300__ */
|
#endif /* __H8300__ */
|
#endif /* L_divhi3 */
|
#endif /* L_divhi3 */
|
|
|
#ifdef L_divsi3
|
#ifdef L_divsi3
|
|
|
;; 4 byte integer divides for the H8/300.
|
;; 4 byte integer divides for the H8/300.
|
;;
|
;;
|
;; We have one routine which does all the work and lots of
|
;; We have one routine which does all the work and lots of
|
;; little ones which prepare the args and massage the sign.
|
;; little ones which prepare the args and massage the sign.
|
;; We bunch all of this into one object file since there are several
|
;; We bunch all of this into one object file since there are several
|
;; "supporting routines".
|
;; "supporting routines".
|
|
|
.section .text
|
.section .text
|
.align 2
|
.align 2
|
|
|
; Put abs SIs into r0/r1 and r2/r3, and leave a 1 in r6l with sign of rest.
|
; Put abs SIs into r0/r1 and r2/r3, and leave a 1 in r6l with sign of rest.
|
; This function is here to keep branch displacements small.
|
; This function is here to keep branch displacements small.
|
|
|
#ifdef __H8300__
|
#ifdef __H8300__
|
|
|
divnorm:
|
divnorm:
|
mov.b A0H,A0H ; is the numerator -ve
|
mov.b A0H,A0H ; is the numerator -ve
|
stc ccr,S2L ; keep the sign in bit 3 of S2L
|
stc ccr,S2L ; keep the sign in bit 3 of S2L
|
bge postive
|
bge postive
|
|
|
; negate arg
|
; negate arg
|
not A0H
|
not A0H
|
not A1H
|
not A1H
|
not A0L
|
not A0L
|
not A1L
|
not A1L
|
|
|
add #1,A1L
|
add #1,A1L
|
addx #0,A1H
|
addx #0,A1H
|
addx #0,A0L
|
addx #0,A0L
|
addx #0,A0H
|
addx #0,A0H
|
postive:
|
postive:
|
mov.b A2H,A2H ; is the denominator -ve
|
mov.b A2H,A2H ; is the denominator -ve
|
bge postive2
|
bge postive2
|
not A2L
|
not A2L
|
not A2H
|
not A2H
|
not A3L
|
not A3L
|
not A3H
|
not A3H
|
add.b #1,A3L
|
add.b #1,A3L
|
addx #0,A3H
|
addx #0,A3H
|
addx #0,A2L
|
addx #0,A2L
|
addx #0,A2H
|
addx #0,A2H
|
xor.b #0x08,S2L ; toggle the result sign
|
xor.b #0x08,S2L ; toggle the result sign
|
postive2:
|
postive2:
|
rts
|
rts
|
|
|
;; Basically the same, except that the sign of the divisor determines
|
;; Basically the same, except that the sign of the divisor determines
|
;; the sign.
|
;; the sign.
|
modnorm:
|
modnorm:
|
mov.b A0H,A0H ; is the numerator -ve
|
mov.b A0H,A0H ; is the numerator -ve
|
stc ccr,S2L ; keep the sign in bit 3 of S2L
|
stc ccr,S2L ; keep the sign in bit 3 of S2L
|
bge mpostive
|
bge mpostive
|
|
|
; negate arg
|
; negate arg
|
not A0H
|
not A0H
|
not A1H
|
not A1H
|
not A0L
|
not A0L
|
not A1L
|
not A1L
|
|
|
add #1,A1L
|
add #1,A1L
|
addx #0,A1H
|
addx #0,A1H
|
addx #0,A0L
|
addx #0,A0L
|
addx #0,A0H
|
addx #0,A0H
|
mpostive:
|
mpostive:
|
mov.b A2H,A2H ; is the denominator -ve
|
mov.b A2H,A2H ; is the denominator -ve
|
bge mpostive2
|
bge mpostive2
|
not A2L
|
not A2L
|
not A2H
|
not A2H
|
not A3L
|
not A3L
|
not A3H
|
not A3H
|
add.b #1,A3L
|
add.b #1,A3L
|
addx #0,A3H
|
addx #0,A3H
|
addx #0,A2L
|
addx #0,A2L
|
addx #0,A2H
|
addx #0,A2H
|
mpostive2:
|
mpostive2:
|
rts
|
rts
|
|
|
#else /* __H8300H__ */
|
#else /* __H8300H__ */
|
|
|
divnorm:
|
divnorm:
|
mov.l A0P,A0P ; is the numerator -ve
|
mov.l A0P,A0P ; is the numerator -ve
|
stc ccr,S2L ; keep the sign in bit 3 of S2L
|
stc ccr,S2L ; keep the sign in bit 3 of S2L
|
bge postive
|
bge postive
|
|
|
neg.l A0P ; negate arg
|
neg.l A0P ; negate arg
|
|
|
postive:
|
postive:
|
mov.l A1P,A1P ; is the denominator -ve
|
mov.l A1P,A1P ; is the denominator -ve
|
bge postive2
|
bge postive2
|
|
|
neg.l A1P ; negate arg
|
neg.l A1P ; negate arg
|
xor.b #0x08,S2L ; toggle the result sign
|
xor.b #0x08,S2L ; toggle the result sign
|
|
|
postive2:
|
postive2:
|
rts
|
rts
|
|
|
;; Basically the same, except that the sign of the divisor determines
|
;; Basically the same, except that the sign of the divisor determines
|
;; the sign.
|
;; the sign.
|
modnorm:
|
modnorm:
|
mov.l A0P,A0P ; is the numerator -ve
|
mov.l A0P,A0P ; is the numerator -ve
|
stc ccr,S2L ; keep the sign in bit 3 of S2L
|
stc ccr,S2L ; keep the sign in bit 3 of S2L
|
bge mpostive
|
bge mpostive
|
|
|
neg.l A0P ; negate arg
|
neg.l A0P ; negate arg
|
|
|
mpostive:
|
mpostive:
|
mov.l A1P,A1P ; is the denominator -ve
|
mov.l A1P,A1P ; is the denominator -ve
|
bge mpostive2
|
bge mpostive2
|
|
|
neg.l A1P ; negate arg
|
neg.l A1P ; negate arg
|
|
|
mpostive2:
|
mpostive2:
|
rts
|
rts
|
|
|
#endif
|
#endif
|
|
|
; numerator in A0/A1
|
; numerator in A0/A1
|
; denominator in A2/A3
|
; denominator in A2/A3
|
.global ___modsi3
|
.global ___modsi3
|
___modsi3:
|
___modsi3:
|
#ifdef __H8300__
|
#ifdef __H8300__
|
PUSHP S2P
|
PUSHP S2P
|
PUSHP S0P
|
PUSHP S0P
|
PUSHP S1P
|
PUSHP S1P
|
bsr modnorm
|
bsr modnorm
|
bsr divmodsi4
|
bsr divmodsi4
|
mov S0,A0
|
mov S0,A0
|
mov S1,A1
|
mov S1,A1
|
bra exitdiv
|
bra exitdiv
|
#else
|
#else
|
PUSHP S2P
|
PUSHP S2P
|
bsr modnorm
|
bsr modnorm
|
bsr ___udivsi3
|
bsr ___udivsi3
|
mov.l er3,er0
|
mov.l er3,er0
|
bra exitdiv
|
bra exitdiv
|
#endif
|
#endif
|
|
|
;; H8/300H and H8S version of ___udivsi3 is defined later in
|
;; H8/300H and H8S version of ___udivsi3 is defined later in
|
;; the file.
|
;; the file.
|
#ifdef __H8300__
|
#ifdef __H8300__
|
.global ___udivsi3
|
.global ___udivsi3
|
___udivsi3:
|
___udivsi3:
|
PUSHP S2P
|
PUSHP S2P
|
PUSHP S0P
|
PUSHP S0P
|
PUSHP S1P
|
PUSHP S1P
|
bsr divmodsi4
|
bsr divmodsi4
|
bra reti
|
bra reti
|
#endif
|
#endif
|
|
|
.global ___umodsi3
|
.global ___umodsi3
|
___umodsi3:
|
___umodsi3:
|
#ifdef __H8300__
|
#ifdef __H8300__
|
PUSHP S2P
|
PUSHP S2P
|
PUSHP S0P
|
PUSHP S0P
|
PUSHP S1P
|
PUSHP S1P
|
bsr divmodsi4
|
bsr divmodsi4
|
mov S0,A0
|
mov S0,A0
|
mov S1,A1
|
mov S1,A1
|
bra reti
|
bra reti
|
#else
|
#else
|
bsr ___udivsi3
|
bsr ___udivsi3
|
mov.l er3,er0
|
mov.l er3,er0
|
rts
|
rts
|
#endif
|
#endif
|
|
|
.global ___divsi3
|
.global ___divsi3
|
___divsi3:
|
___divsi3:
|
#ifdef __H8300__
|
#ifdef __H8300__
|
PUSHP S2P
|
PUSHP S2P
|
PUSHP S0P
|
PUSHP S0P
|
PUSHP S1P
|
PUSHP S1P
|
jsr divnorm
|
jsr divnorm
|
jsr divmodsi4
|
jsr divmodsi4
|
#else
|
#else
|
PUSHP S2P
|
PUSHP S2P
|
jsr divnorm
|
jsr divnorm
|
bsr ___udivsi3
|
bsr ___udivsi3
|
#endif
|
#endif
|
|
|
; examine what the sign should be
|
; examine what the sign should be
|
exitdiv:
|
exitdiv:
|
btst #3,S2L
|
btst #3,S2L
|
beq reti
|
beq reti
|
|
|
; should be -ve
|
; should be -ve
|
#ifdef __H8300__
|
#ifdef __H8300__
|
not A0H
|
not A0H
|
not A1H
|
not A1H
|
not A0L
|
not A0L
|
not A1L
|
not A1L
|
|
|
add #1,A1L
|
add #1,A1L
|
addx #0,A1H
|
addx #0,A1H
|
addx #0,A0L
|
addx #0,A0L
|
addx #0,A0H
|
addx #0,A0H
|
#else /* __H8300H__ */
|
#else /* __H8300H__ */
|
neg.l A0P
|
neg.l A0P
|
#endif
|
#endif
|
|
|
reti:
|
reti:
|
#ifdef __H8300__
|
#ifdef __H8300__
|
POPP S1P
|
POPP S1P
|
POPP S0P
|
POPP S0P
|
#endif
|
#endif
|
POPP S2P
|
POPP S2P
|
rts
|
rts
|
|
|
; takes A0/A1 numerator (A0P for H8/300H)
|
; takes A0/A1 numerator (A0P for H8/300H)
|
; A2/A3 denominator (A1P for H8/300H)
|
; A2/A3 denominator (A1P for H8/300H)
|
; returns A0/A1 quotient (A0P for H8/300H)
|
; returns A0/A1 quotient (A0P for H8/300H)
|
; S0/S1 remainder (S0P for H8/300H)
|
; S0/S1 remainder (S0P for H8/300H)
|
; trashes S2H
|
; trashes S2H
|
|
|
#ifdef __H8300__
|
#ifdef __H8300__
|
|
|
divmodsi4:
|
divmodsi4:
|
sub.w S0,S0 ; zero play area
|
sub.w S0,S0 ; zero play area
|
mov.w S0,S1
|
mov.w S0,S1
|
mov.b A2H,S2H
|
mov.b A2H,S2H
|
or A2L,S2H
|
or A2L,S2H
|
or A3H,S2H
|
or A3H,S2H
|
bne DenHighNonZero
|
bne DenHighNonZero
|
mov.b A0H,A0H
|
mov.b A0H,A0H
|
bne NumByte0Zero
|
bne NumByte0Zero
|
mov.b A0L,A0L
|
mov.b A0L,A0L
|
bne NumByte1Zero
|
bne NumByte1Zero
|
mov.b A1H,A1H
|
mov.b A1H,A1H
|
bne NumByte2Zero
|
bne NumByte2Zero
|
bra NumByte3Zero
|
bra NumByte3Zero
|
NumByte0Zero:
|
NumByte0Zero:
|
mov.b A0H,S1L
|
mov.b A0H,S1L
|
divxu A3L,S1
|
divxu A3L,S1
|
mov.b S1L,A0H
|
mov.b S1L,A0H
|
NumByte1Zero:
|
NumByte1Zero:
|
mov.b A0L,S1L
|
mov.b A0L,S1L
|
divxu A3L,S1
|
divxu A3L,S1
|
mov.b S1L,A0L
|
mov.b S1L,A0L
|
NumByte2Zero:
|
NumByte2Zero:
|
mov.b A1H,S1L
|
mov.b A1H,S1L
|
divxu A3L,S1
|
divxu A3L,S1
|
mov.b S1L,A1H
|
mov.b S1L,A1H
|
NumByte3Zero:
|
NumByte3Zero:
|
mov.b A1L,S1L
|
mov.b A1L,S1L
|
divxu A3L,S1
|
divxu A3L,S1
|
mov.b S1L,A1L
|
mov.b S1L,A1L
|
|
|
mov.b S1H,S1L
|
mov.b S1H,S1L
|
mov.b #0x0,S1H
|
mov.b #0x0,S1H
|
rts
|
rts
|
|
|
; have to do the divide by shift and test
|
; have to do the divide by shift and test
|
DenHighNonZero:
|
DenHighNonZero:
|
mov.b A0H,S1L
|
mov.b A0H,S1L
|
mov.b A0L,A0H
|
mov.b A0L,A0H
|
mov.b A1H,A0L
|
mov.b A1H,A0L
|
mov.b A1L,A1H
|
mov.b A1L,A1H
|
|
|
mov.b #0,A1L
|
mov.b #0,A1L
|
mov.b #24,S2H ; only do 24 iterations
|
mov.b #24,S2H ; only do 24 iterations
|
|
|
nextbit:
|
nextbit:
|
add.w A1,A1 ; double the answer guess
|
add.w A1,A1 ; double the answer guess
|
rotxl A0L
|
rotxl A0L
|
rotxl A0H
|
rotxl A0H
|
|
|
rotxl S1L ; double remainder
|
rotxl S1L ; double remainder
|
rotxl S1H
|
rotxl S1H
|
rotxl S0L
|
rotxl S0L
|
rotxl S0H
|
rotxl S0H
|
sub.w A3,S1 ; does it all fit
|
sub.w A3,S1 ; does it all fit
|
subx A2L,S0L
|
subx A2L,S0L
|
subx A2H,S0H
|
subx A2H,S0H
|
bhs setone
|
bhs setone
|
|
|
add.w A3,S1 ; no, restore mistake
|
add.w A3,S1 ; no, restore mistake
|
addx A2L,S0L
|
addx A2L,S0L
|
addx A2H,S0H
|
addx A2H,S0H
|
|
|
dec S2H
|
dec S2H
|
bne nextbit
|
bne nextbit
|
rts
|
rts
|
|
|
setone:
|
setone:
|
inc A1L
|
inc A1L
|
dec S2H
|
dec S2H
|
bne nextbit
|
bne nextbit
|
rts
|
rts
|
|
|
#else /* __H8300H__ */
|
#else /* __H8300H__ */
|
|
|
;; This function also computes the remainder and stores it in er3.
|
;; This function also computes the remainder and stores it in er3.
|
.global ___udivsi3
|
.global ___udivsi3
|
___udivsi3:
|
___udivsi3:
|
mov.w A1E,A1E ; denominator top word 0?
|
mov.w A1E,A1E ; denominator top word 0?
|
bne DenHighNonZero
|
bne DenHighNonZero
|
|
|
; do it the easy way, see page 107 in manual
|
; do it the easy way, see page 107 in manual
|
mov.w A0E,A2
|
mov.w A0E,A2
|
extu.l A2P
|
extu.l A2P
|
divxu.w A1,A2P
|
divxu.w A1,A2P
|
mov.w A2E,A0E
|
mov.w A2E,A0E
|
divxu.w A1,A0P
|
divxu.w A1,A0P
|
mov.w A0E,A3
|
mov.w A0E,A3
|
mov.w A2,A0E
|
mov.w A2,A0E
|
extu.l A3P
|
extu.l A3P
|
rts
|
rts
|
|
|
; er0 = er0 / er1
|
; er0 = er0 / er1
|
; er3 = er0 % er1
|
; er3 = er0 % er1
|
; trashes er1 er2
|
; trashes er1 er2
|
; expects er1 >= 2^16
|
; expects er1 >= 2^16
|
DenHighNonZero:
|
DenHighNonZero:
|
mov.l er0,er3
|
mov.l er0,er3
|
mov.l er1,er2
|
mov.l er1,er2
|
#ifdef __H8300H__
|
#ifdef __H8300H__
|
divmod_L21:
|
divmod_L21:
|
shlr.l er0
|
shlr.l er0
|
shlr.l er2 ; make divisor < 2^16
|
shlr.l er2 ; make divisor < 2^16
|
mov.w e2,e2
|
mov.w e2,e2
|
bne divmod_L21
|
bne divmod_L21
|
#else
|
#else
|
shlr.l #2,er2 ; make divisor < 2^16
|
shlr.l #2,er2 ; make divisor < 2^16
|
mov.w e2,e2
|
mov.w e2,e2
|
beq divmod_L22A
|
beq divmod_L22A
|
divmod_L21:
|
divmod_L21:
|
shlr.l #2,er0
|
shlr.l #2,er0
|
divmod_L22:
|
divmod_L22:
|
shlr.l #2,er2 ; make divisor < 2^16
|
shlr.l #2,er2 ; make divisor < 2^16
|
mov.w e2,e2
|
mov.w e2,e2
|
bne divmod_L21
|
bne divmod_L21
|
divmod_L22A:
|
divmod_L22A:
|
rotxl.w r2
|
rotxl.w r2
|
bcs divmod_L23
|
bcs divmod_L23
|
shlr.l er0
|
shlr.l er0
|
bra divmod_L24
|
bra divmod_L24
|
divmod_L23:
|
divmod_L23:
|
rotxr.w r2
|
rotxr.w r2
|
shlr.l #2,er0
|
shlr.l #2,er0
|
divmod_L24:
|
divmod_L24:
|
#endif
|
#endif
|
;; At this point,
|
;; At this point,
|
;; er0 contains shifted dividend
|
;; er0 contains shifted dividend
|
;; er1 contains divisor
|
;; er1 contains divisor
|
;; er2 contains shifted divisor
|
;; er2 contains shifted divisor
|
;; er3 contains dividend, later remainder
|
;; er3 contains dividend, later remainder
|
divxu.w r2,er0 ; r0 now contains the approximate quotient (AQ)
|
divxu.w r2,er0 ; r0 now contains the approximate quotient (AQ)
|
extu.l er0
|
extu.l er0
|
beq divmod_L25
|
beq divmod_L25
|
subs #1,er0 ; er0 = AQ - 1
|
subs #1,er0 ; er0 = AQ - 1
|
mov.w e1,r2
|
mov.w e1,r2
|
mulxu.w r0,er2 ; er2 = upper (AQ - 1) * divisor
|
mulxu.w r0,er2 ; er2 = upper (AQ - 1) * divisor
|
sub.w r2,e3 ; dividend - 65536 * er2
|
sub.w r2,e3 ; dividend - 65536 * er2
|
mov.w r1,r2
|
mov.w r1,r2
|
mulxu.w r0,er2 ; compute er3 = remainder (tentative)
|
mulxu.w r0,er2 ; compute er3 = remainder (tentative)
|
sub.l er2,er3 ; er3 = dividend - (AQ - 1) * divisor
|
sub.l er2,er3 ; er3 = dividend - (AQ - 1) * divisor
|
divmod_L25:
|
divmod_L25:
|
cmp.l er1,er3 ; is divisor < remainder?
|
cmp.l er1,er3 ; is divisor < remainder?
|
blo divmod_L26
|
blo divmod_L26
|
adds #1,er0
|
adds #1,er0
|
sub.l er1,er3 ; correct the remainder
|
sub.l er1,er3 ; correct the remainder
|
divmod_L26:
|
divmod_L26:
|
rts
|
rts
|
|
|
#endif
|
#endif
|
#endif /* L_divsi3 */
|
#endif /* L_divsi3 */
|
|
|
#ifdef L_mulhi3
|
#ifdef L_mulhi3
|
|
|
;; HImode multiply.
|
;; HImode multiply.
|
; The H8/300 only has an 8*8->16 multiply.
|
; The H8/300 only has an 8*8->16 multiply.
|
; The answer is the same as:
|
; The answer is the same as:
|
;
|
;
|
; product = (srca.l * srcb.l) + ((srca.h * srcb.l) + (srcb.h * srca.l)) * 256
|
; product = (srca.l * srcb.l) + ((srca.h * srcb.l) + (srcb.h * srca.l)) * 256
|
; (we can ignore A1.h * A0.h cause that will all off the top)
|
; (we can ignore A1.h * A0.h cause that will all off the top)
|
; A0 in
|
; A0 in
|
; A1 in
|
; A1 in
|
; A0 answer
|
; A0 answer
|
|
|
#ifdef __H8300__
|
#ifdef __H8300__
|
.section .text
|
.section .text
|
.align 2
|
.align 2
|
.global ___mulhi3
|
.global ___mulhi3
|
___mulhi3:
|
___mulhi3:
|
mov.b A1L,A2L ; A2l gets srcb.l
|
mov.b A1L,A2L ; A2l gets srcb.l
|
mulxu A0L,A2 ; A2 gets first sub product
|
mulxu A0L,A2 ; A2 gets first sub product
|
|
|
mov.b A0H,A3L ; prepare for
|
mov.b A0H,A3L ; prepare for
|
mulxu A1L,A3 ; second sub product
|
mulxu A1L,A3 ; second sub product
|
|
|
add.b A3L,A2H ; sum first two terms
|
add.b A3L,A2H ; sum first two terms
|
|
|
mov.b A1H,A3L ; third sub product
|
mov.b A1H,A3L ; third sub product
|
mulxu A0L,A3
|
mulxu A0L,A3
|
|
|
add.b A3L,A2H ; almost there
|
add.b A3L,A2H ; almost there
|
mov.w A2,A0 ; that is
|
mov.w A2,A0 ; that is
|
rts
|
rts
|
|
|
#endif
|
#endif
|
#endif /* L_mulhi3 */
|
#endif /* L_mulhi3 */
|
|
|
#ifdef L_mulsi3
|
#ifdef L_mulsi3
|
|
|
;; SImode multiply.
|
;; SImode multiply.
|
;;
|
;;
|
;; I think that shift and add may be sufficient for this. Using the
|
;; I think that shift and add may be sufficient for this. Using the
|
;; supplied 8x8->16 would need 10 ops of 14 cycles each + overhead. This way
|
;; supplied 8x8->16 would need 10 ops of 14 cycles each + overhead. This way
|
;; the inner loop uses maybe 20 cycles + overhead, but terminates
|
;; the inner loop uses maybe 20 cycles + overhead, but terminates
|
;; quickly on small args.
|
;; quickly on small args.
|
;;
|
;;
|
;; A0/A1 src_a
|
;; A0/A1 src_a
|
;; A2/A3 src_b
|
;; A2/A3 src_b
|
;;
|
;;
|
;; while (a)
|
;; while (a)
|
;; {
|
;; {
|
;; if (a & 1)
|
;; if (a & 1)
|
;; r += b;
|
;; r += b;
|
;; a >>= 1;
|
;; a >>= 1;
|
;; b <<= 1;
|
;; b <<= 1;
|
;; }
|
;; }
|
|
|
.section .text
|
.section .text
|
.align 2
|
.align 2
|
|
|
#ifdef __H8300__
|
#ifdef __H8300__
|
|
|
.global ___mulsi3
|
.global ___mulsi3
|
___mulsi3:
|
___mulsi3:
|
PUSHP S0P
|
PUSHP S0P
|
PUSHP S1P
|
PUSHP S1P
|
|
|
sub.w S0,S0
|
sub.w S0,S0
|
sub.w S1,S1
|
sub.w S1,S1
|
|
|
; while (a)
|
; while (a)
|
_top: mov.w A0,A0
|
_top: mov.w A0,A0
|
bne _more
|
bne _more
|
mov.w A1,A1
|
mov.w A1,A1
|
beq _done
|
beq _done
|
_more: ; if (a & 1)
|
_more: ; if (a & 1)
|
bld #0,A1L
|
bld #0,A1L
|
bcc _nobit
|
bcc _nobit
|
; r += b
|
; r += b
|
add.w A3,S1
|
add.w A3,S1
|
addx A2L,S0L
|
addx A2L,S0L
|
addx A2H,S0H
|
addx A2H,S0H
|
_nobit:
|
_nobit:
|
; a >>= 1
|
; a >>= 1
|
shlr A0H
|
shlr A0H
|
rotxr A0L
|
rotxr A0L
|
rotxr A1H
|
rotxr A1H
|
rotxr A1L
|
rotxr A1L
|
|
|
; b <<= 1
|
; b <<= 1
|
add.w A3,A3
|
add.w A3,A3
|
addx A2L,A2L
|
addx A2L,A2L
|
addx A2H,A2H
|
addx A2H,A2H
|
bra _top
|
bra _top
|
|
|
_done:
|
_done:
|
mov.w S0,A0
|
mov.w S0,A0
|
mov.w S1,A1
|
mov.w S1,A1
|
POPP S1P
|
POPP S1P
|
POPP S0P
|
POPP S0P
|
rts
|
rts
|
|
|
#else /* __H8300H__ */
|
#else /* __H8300H__ */
|
|
|
;
|
;
|
; mulsi3 for H8/300H - based on Renesas SH implementation
|
; mulsi3 for H8/300H - based on Renesas SH implementation
|
;
|
;
|
; by Toshiyasu Morita
|
; by Toshiyasu Morita
|
;
|
;
|
; Old code:
|
; Old code:
|
;
|
;
|
; 16b * 16b = 372 states (worst case)
|
; 16b * 16b = 372 states (worst case)
|
; 32b * 32b = 724 states (worst case)
|
; 32b * 32b = 724 states (worst case)
|
;
|
;
|
; New code:
|
; New code:
|
;
|
;
|
; 16b * 16b = 48 states
|
; 16b * 16b = 48 states
|
; 16b * 32b = 72 states
|
; 16b * 32b = 72 states
|
; 32b * 32b = 92 states
|
; 32b * 32b = 92 states
|
;
|
;
|
|
|
.global ___mulsi3
|
.global ___mulsi3
|
___mulsi3:
|
___mulsi3:
|
mov.w r1,r2 ; ( 2 states) b * d
|
mov.w r1,r2 ; ( 2 states) b * d
|
mulxu r0,er2 ; (22 states)
|
mulxu r0,er2 ; (22 states)
|
|
|
mov.w e0,r3 ; ( 2 states) a * d
|
mov.w e0,r3 ; ( 2 states) a * d
|
beq L_skip1 ; ( 4 states)
|
beq L_skip1 ; ( 4 states)
|
mulxu r1,er3 ; (22 states)
|
mulxu r1,er3 ; (22 states)
|
add.w r3,e2 ; ( 2 states)
|
add.w r3,e2 ; ( 2 states)
|
|
|
L_skip1:
|
L_skip1:
|
mov.w e1,r3 ; ( 2 states) c * b
|
mov.w e1,r3 ; ( 2 states) c * b
|
beq L_skip2 ; ( 4 states)
|
beq L_skip2 ; ( 4 states)
|
mulxu r0,er3 ; (22 states)
|
mulxu r0,er3 ; (22 states)
|
add.w r3,e2 ; ( 2 states)
|
add.w r3,e2 ; ( 2 states)
|
|
|
L_skip2:
|
L_skip2:
|
mov.l er2,er0 ; ( 2 states)
|
mov.l er2,er0 ; ( 2 states)
|
rts ; (10 states)
|
rts ; (10 states)
|
|
|
#endif
|
#endif
|
#endif /* L_mulsi3 */
|
#endif /* L_mulsi3 */
|
#ifdef L_fixunssfsi_asm
|
#ifdef L_fixunssfsi_asm
|
/* For the h8300 we use asm to save some bytes, to
|
/* For the h8300 we use asm to save some bytes, to
|
allow more programs to fit into the tiny address
|
allow more programs to fit into the tiny address
|
space. For the H8/300H and H8S, the C version is good enough. */
|
space. For the H8/300H and H8S, the C version is good enough. */
|
#ifdef __H8300__
|
#ifdef __H8300__
|
/* We still treat NANs different than libgcc2.c, but then, the
|
/* We still treat NANs different than libgcc2.c, but then, the
|
behavior is undefined anyways. */
|
behavior is undefined anyways. */
|
.global ___fixunssfsi
|
.global ___fixunssfsi
|
___fixunssfsi:
|
___fixunssfsi:
|
cmp.b #0x4f,r0h
|
cmp.b #0x4f,r0h
|
bge Large_num
|
bge Large_num
|
jmp @___fixsfsi
|
jmp @___fixsfsi
|
Large_num:
|
Large_num:
|
bhi L_huge_num
|
bhi L_huge_num
|
xor.b #0x80,A0L
|
xor.b #0x80,A0L
|
bmi L_shift8
|
bmi L_shift8
|
L_huge_num:
|
L_huge_num:
|
mov.w #65535,A0
|
mov.w #65535,A0
|
mov.w A0,A1
|
mov.w A0,A1
|
rts
|
rts
|
L_shift8:
|
L_shift8:
|
mov.b A0L,A0H
|
mov.b A0L,A0H
|
mov.b A1H,A0L
|
mov.b A1H,A0L
|
mov.b A1L,A1H
|
mov.b A1L,A1H
|
mov.b #0,A1L
|
mov.b #0,A1L
|
rts
|
rts
|
#endif
|
#endif
|
#endif /* L_fixunssfsi_asm */
|
#endif /* L_fixunssfsi_asm */
|
|
|