OpenCores
URL https://opencores.org/ocsvn/openrisc_2011-10-31/openrisc_2011-10-31/trunk

Subversion Repositories openrisc_2011-10-31

[/] [openrisc/] [tags/] [gnu-src/] [gcc-4.5.1/] [gcc-4.5.1-or32-1.0rc1/] [gcc/] [config/] [sh/] [divcost-analysis] - Diff between revs 282 and 338

Only display areas with differences | Details | Blame | View Log

Rev 282 Rev 338
Analysis of cycle costs for SH4:
Analysis of cycle costs for SH4:
-> udiv_le128:            5
-> udiv_le128:            5
-> udiv_ge64k:            6
-> udiv_ge64k:            6
-> udiv udiv_25:         10
-> udiv udiv_25:         10
-> pos_divisor:           3
-> pos_divisor:           3
-> pos_result linear:     5
-> pos_result linear:     5
-> pos_result - -:        5
-> pos_result - -:        5
-> div_le128:             7
-> div_le128:             7
-> div_ge64k:             9
-> div_ge64k:             9
sdivsi3 -> udiv_25             13
sdivsi3 -> udiv_25             13
udiv25 -> div_ge64k_end:       15
udiv25 -> div_ge64k_end:       15
div_ge64k_end -> rts:          13
div_ge64k_end -> rts:          13
div_le128 -> div_le128_2:       2, r1 latency 3
div_le128 -> div_le128_2:       2, r1 latency 3
udiv_le128 -> div_le128_2:      2, r1 latency 3
udiv_le128 -> div_le128_2:      2, r1 latency 3
(u)div_le128 -> div_by_1:       9
(u)div_le128 -> div_by_1:       9
(u)div_le128 -> rts:           17
(u)div_le128 -> rts:           17
div_by_1(_neg) -> rts:          4
div_by_1(_neg) -> rts:          4
div_ge64k -> div_r8:            2
div_ge64k -> div_r8:            2
div_ge64k -> div_ge64k_2:       3
div_ge64k -> div_ge64k_2:       3
udiv_ge64k -> udiv_r8:          3
udiv_ge64k -> udiv_r8:          3
udiv_ge64k -> div_ge64k_2:      3 + LS
udiv_ge64k -> div_ge64k_2:      3 + LS
(u)div_ge64k -> div_ge64k_end: 13
(u)div_ge64k -> div_ge64k_end: 13
div_r8 -> div_r8_2:             2
div_r8 -> div_r8_2:             2
udiv_r8 -> div_r8_2:            2 + LS
udiv_r8 -> div_r8_2:            2 + LS
(u)div_r8 -> rts:              21
(u)div_r8 -> rts:              21
-> - + neg_result:             5
-> - + neg_result:             5
-> + - neg_result:             5
-> + - neg_result:             5
-> div_le128_neg:              7
-> div_le128_neg:              7
-> div_ge64k_neg:              9
-> div_ge64k_neg:              9
-> div_r8_neg:                11
-> div_r8_neg:                11
-> <64k div_ge64k_neg_end:    28
-> <64k div_ge64k_neg_end:    28
-> >=64k div_ge64k_neg_end:   22
-> >=64k div_ge64k_neg_end:   22
div_ge64k_neg_end ft -> rts:  14
div_ge64k_neg_end ft -> rts:  14
div_r8_neg_end -> rts:         4
div_r8_neg_end -> rts:         4
div_r8_neg -> div_r8_neg_end: 18
div_r8_neg -> div_r8_neg_end: 18
div_le128_neg -> div_by_1_neg: 4
div_le128_neg -> div_by_1_neg: 4
div_le128_neg -> rts          18
div_le128_neg -> rts          18
         sh4-200    absolute divisor range:
         sh4-200    absolute divisor range:
            1  [2..128]  [129..64K) [64K..|dividend|/256] >=64K,>|dividend/256|
            1  [2..128]  [129..64K) [64K..|dividend|/256] >=64K,>|dividend/256|
udiv       18     22         38            32                   30
udiv       18     22         38            32                   30
sdiv pos:  20     24         41            35                   32
sdiv pos:  20     24         41            35                   32
sdiv neg:  15     25         42            36                   33
sdiv neg:  15     25         42            36                   33
         sh4-300    absolute divisor range:
         sh4-300    absolute divisor range:
                 8 bit      16 bit       24 bit              > 24 bit
                 8 bit      16 bit       24 bit              > 24 bit
udiv              15         35            28                   25
udiv              15         35            28                   25
sdiv              14         36            34                   31
sdiv              14         36            34                   31
fp-based:
fp-based:
unsigned: 42 + 3 + 3 (lingering ftrc latency + sts fpul,rx) at caller's site
unsigned: 42 + 3 + 3 (lingering ftrc latency + sts fpul,rx) at caller's site
signed: 33 + 3 + 3 (lingering ftrc latency + sts fpul,rx) at caller's site
signed: 33 + 3 + 3 (lingering ftrc latency + sts fpul,rx) at caller's site
call-div1:    divisor range:
call-div1:    divisor range:
              [1..64K)  >= 64K
              [1..64K)  >= 64K
unsigned:       63        58
unsigned:       63        58
signed:         76        76
signed:         76        76
SFUNC_STATIC call overhead:
SFUNC_STATIC call overhead:
mov.l 0f,r1
mov.l 0f,r1
bsrf r1
bsrf r1
SFUNC_GOT call overhead - current:
SFUNC_GOT call overhead - current:
mov.l 0f,r1
mov.l 0f,r1
mova 0f,r0
mova 0f,r0
mov.l 1f,r2
mov.l 1f,r2
add r1,r0
add r1,r0
mov.l @(r0,r2),r0
mov.l @(r0,r2),r0
jmp @r0
jmp @r0
; 3 cycles worse than SFUNC_STATIC
; 3 cycles worse than SFUNC_STATIC
SFUNC_GOT call overhead - improved assembler:
SFUNC_GOT call overhead - improved assembler:
mov.l 0f,r1
mov.l 0f,r1
mova 0f,r0
mova 0f,r0
mov.l @(r0,r1),r0
mov.l @(r0,r1),r0
jmp @r0
jmp @r0
; 2 cycles worse than SFUNC_STATIC
; 2 cycles worse than SFUNC_STATIC


Copyright (C) 2006, 2007 Free Software Foundation, Inc.
Copyright (C) 2006, 2007 Free Software Foundation, Inc.
Copying and distribution of this file, with or without modification,
Copying and distribution of this file, with or without modification,
are permitted in any medium without royalty provided the copyright
are permitted in any medium without royalty provided the copyright
notice and this notice are preserved.
notice and this notice are preserved.
 
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.