OpenCores
URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [gcc-4.5.1/] [gcc/] [config/] [arm/] [cortex-r4.md] - Blame information for rev 282

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 282 jeremybenn
;; ARM Cortex-R4 scheduling description.
2
;; Copyright (C) 2007, 2008 Free Software Foundation, Inc.
3
;; Contributed by CodeSourcery.
4
 
5
;; This file is part of GCC.
6
 
7
;; GCC is free software; you can redistribute it and/or modify it
8
;; under the terms of the GNU General Public License as published
9
;; by the Free Software Foundation; either version 3, or (at your
10
;; option) any later version.
11
 
12
;; GCC is distributed in the hope that it will be useful, but WITHOUT
13
;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14
;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15
;; License for more details.
16
 
17
;; You should have received a copy of the GNU General Public License
18
;; along with GCC; see the file COPYING3.  If not see
19
;; .
20
 
21
(define_automaton "cortex_r4")
22
 
23
;; We approximate the dual-issue constraints of this core using four
24
;; "issue units" and a reservation matrix as follows.  The numbers indicate
25
;; the instruction groups' preferences in order.  Multiple entries for
26
;; the same numbered preference indicate units that must be reserved
27
;; together.
28
;;
29
;; Issue unit:          A       B       C       ALU
30
;;
31
;; ALU w/o reg shift    1st     2nd             1st and 2nd
32
;; ALU w/ reg shift     1st     2nd     2nd     1st and 2nd
33
;; Moves                1st     2nd             2nd
34
;; Multiplication       1st                     1st
35
;; Division             1st                     1st
36
;; Load/store single    1st             1st
37
;; Other load/store     1st     1st
38
;; Branches                     1st
39
 
40
(define_cpu_unit "cortex_r4_issue_a" "cortex_r4")
41
(define_cpu_unit "cortex_r4_issue_b" "cortex_r4")
42
(define_cpu_unit "cortex_r4_issue_c" "cortex_r4")
43
(define_cpu_unit "cortex_r4_issue_alu" "cortex_r4")
44
 
45
(define_reservation "cortex_r4_alu"
46
                    "(cortex_r4_issue_a+cortex_r4_issue_alu)|\
47
                     (cortex_r4_issue_b+cortex_r4_issue_alu)")
48
(define_reservation "cortex_r4_alu_shift_reg"
49
                    "(cortex_r4_issue_a+cortex_r4_issue_alu)|\
50
                     (cortex_r4_issue_b+cortex_r4_issue_c+\
51
                      cortex_r4_issue_alu)")
52
(define_reservation "cortex_r4_mov"
53
                    "cortex_r4_issue_a|(cortex_r4_issue_b+\
54
                     cortex_r4_issue_alu)")
55
(define_reservation "cortex_r4_mul" "cortex_r4_issue_a+cortex_r4_issue_alu")
56
(define_reservation "cortex_r4_mul_2"
57
                    "(cortex_r4_issue_a+cortex_r4_issue_alu)*2")
58
;; Division instructions execute out-of-order with respect to the
59
;; rest of the pipeline and only require reservations on their first and
60
;; final cycles.
61
(define_reservation "cortex_r4_div_9"
62
                    "cortex_r4_issue_a+cortex_r4_issue_alu,\
63
                     nothing*7,\
64
                     cortex_r4_issue_a+cortex_r4_issue_alu")
65
(define_reservation "cortex_r4_div_10"
66
                    "cortex_r4_issue_a+cortex_r4_issue_alu,\
67
                     nothing*8,\
68
                     cortex_r4_issue_a+cortex_r4_issue_alu")
69
(define_reservation "cortex_r4_load_store"
70
                    "cortex_r4_issue_a+cortex_r4_issue_c")
71
(define_reservation "cortex_r4_load_store_2"
72
                    "(cortex_r4_issue_a+cortex_r4_issue_b)*2")
73
(define_reservation "cortex_r4_branch" "cortex_r4_issue_b")
74
 
75
;; We assume that all instructions are unconditional.
76
 
77
;; Data processing instructions.  Moves without shifts are kept separate
78
;; for the purposes of the dual-issue constraints above.
79
(define_insn_reservation "cortex_r4_alu" 2
80
  (and (eq_attr "tune_cortexr4" "yes")
81
       (and (eq_attr "type" "alu")
82
            (not (eq_attr "insn" "mov"))))
83
  "cortex_r4_alu")
84
 
85
(define_insn_reservation "cortex_r4_mov" 2
86
  (and (eq_attr "tune_cortexr4" "yes")
87
       (and (eq_attr "type" "alu")
88
            (eq_attr "insn" "mov")))
89
  "cortex_r4_mov")
90
 
91
(define_insn_reservation "cortex_r4_alu_shift" 2
92
  (and (eq_attr "tune_cortexr4" "yes")
93
       (eq_attr "type" "alu_shift"))
94
  "cortex_r4_alu")
95
 
96
(define_insn_reservation "cortex_r4_alu_shift_reg" 2
97
  (and (eq_attr "tune_cortexr4" "yes")
98
       (eq_attr "type" "alu_shift_reg"))
99
  "cortex_r4_alu_shift_reg")
100
 
101
;; An ALU instruction followed by an ALU instruction with no early dep.
102
(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\
103
                  cortex_r4_mov"
104
               "cortex_r4_alu")
105
(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\
106
                  cortex_r4_mov"
107
               "cortex_r4_alu_shift"
108
               "arm_no_early_alu_shift_dep")
109
(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\
110
                  cortex_r4_mov"
111
               "cortex_r4_alu_shift_reg"
112
               "arm_no_early_alu_shift_value_dep")
113
 
114
;; In terms of availabilities, a consumer mov could theoretically be
115
;; issued together with a producer ALU instruction, without stalls.
116
;; In practice this cannot happen because mov;add (in that order) is not
117
;; eligible for dual issue and furthermore dual issue is not permitted
118
;; when a dependency is involved.  We therefore note it as latency one.
119
;; A mov followed by another of the same is also latency one.
120
(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\
121
                  cortex_r4_mov"
122
               "cortex_r4_mov")
123
 
124
;; qadd, qdadd, qsub and qdsub are not currently emitted, and neither are
125
;; media data processing instructions nor sad instructions.
126
 
127
;; Multiplication instructions.
128
 
129
(define_insn_reservation "cortex_r4_mul_4" 4
130
  (and (eq_attr "tune_cortexr4" "yes")
131
       (eq_attr "insn" "mul,smmul"))
132
  "cortex_r4_mul_2")
133
 
134
(define_insn_reservation "cortex_r4_mul_3" 3
135
  (and (eq_attr "tune_cortexr4" "yes")
136
       (eq_attr "insn" "smulxy,smulwy,smuad,smusd"))
137
  "cortex_r4_mul")
138
 
139
(define_insn_reservation "cortex_r4_mla_4" 4
140
  (and (eq_attr "tune_cortexr4" "yes")
141
       (eq_attr "insn" "mla,smmla"))
142
  "cortex_r4_mul_2")
143
 
144
(define_insn_reservation "cortex_r4_mla_3" 3
145
  (and (eq_attr "tune_cortexr4" "yes")
146
       (eq_attr "insn" "smlaxy,smlawy,smlad,smlsd"))
147
  "cortex_r4_mul")
148
 
149
(define_insn_reservation "cortex_r4_smlald" 3
150
  (and (eq_attr "tune_cortexr4" "yes")
151
       (eq_attr "insn" "smlald,smlsld"))
152
  "cortex_r4_mul")
153
 
154
(define_insn_reservation "cortex_r4_mull" 4
155
  (and (eq_attr "tune_cortexr4" "yes")
156
       (eq_attr "insn" "smull,umull,umlal,umaal"))
157
  "cortex_r4_mul_2")
158
 
159
;; A multiply or an MLA with a single-register result, followed by an
160
;; MLA with an accumulator dependency, has its result forwarded.
161
(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3"
162
               "cortex_r4_mla_3,cortex_r4_mla_4"
163
               "arm_mac_accumulator_is_mul_result")
164
 
165
(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4"
166
               "cortex_r4_mla_3,cortex_r4_mla_4"
167
               "arm_mac_accumulator_is_mul_result")
168
 
169
;; A multiply followed by an ALU instruction needing the multiply
170
;; result only at ALU has lower latency than one needing it at Shift.
171
(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
172
               "cortex_r4_alu")
173
(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
174
               "cortex_r4_alu_shift"
175
               "arm_no_early_alu_shift_dep")
176
(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
177
               "cortex_r4_alu_shift_reg"
178
               "arm_no_early_alu_shift_value_dep")
179
(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
180
               "cortex_r4_alu")
181
(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
182
               "cortex_r4_alu_shift"
183
               "arm_no_early_alu_shift_dep")
184
(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
185
               "cortex_r4_alu_shift_reg"
186
               "arm_no_early_alu_shift_value_dep")
187
 
188
;; A multiply followed by a mov has one cycle lower latency again.
189
(define_bypass 1 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
190
               "cortex_r4_mov")
191
(define_bypass 2 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
192
               "cortex_r4_mov")
193
 
194
;; We guess that division of A/B using sdiv or udiv, on average,
195
;; is performed with B having ten more leading zeros than A.
196
;; This gives a latency of nine for udiv and ten for sdiv.
197
(define_insn_reservation "cortex_r4_udiv" 9
198
  (and (eq_attr "tune_cortexr4" "yes")
199
       (eq_attr "insn" "udiv"))
200
  "cortex_r4_div_9")
201
 
202
(define_insn_reservation "cortex_r4_sdiv" 10
203
  (and (eq_attr "tune_cortexr4" "yes")
204
       (eq_attr "insn" "sdiv"))
205
  "cortex_r4_div_10")
206
 
207
;; Branches.  We assume correct prediction.
208
 
209
(define_insn_reservation "cortex_r4_branch" 0
210
  (and (eq_attr "tune_cortexr4" "yes")
211
       (eq_attr "type" "branch"))
212
  "cortex_r4_branch")
213
 
214
;; Call latencies are not predictable.  A semi-arbitrary very large
215
;; number is used as "positive infinity" so that everything should be
216
;; finished by the time of return.
217
(define_insn_reservation "cortex_r4_call" 32
218
  (and (eq_attr "tune_cortexr4" "yes")
219
       (eq_attr "type" "call"))
220
  "nothing")
221
 
222
;; Status register access instructions are not currently emitted.
223
 
224
;; Load instructions.
225
;; We do not model the "addr_md_3cycle" cases and assume that
226
;; accesses following are correctly aligned.
227
 
228
(define_insn_reservation "cortex_r4_load_1_2" 3
229
  (and (eq_attr "tune_cortexr4" "yes")
230
       (eq_attr "type" "load1,load2"))
231
  "cortex_r4_load_store")
232
 
233
(define_insn_reservation "cortex_r4_load_3_4" 4
234
  (and (eq_attr "tune_cortexr4" "yes")
235
       (eq_attr "type" "load3,load4"))
236
  "cortex_r4_load_store_2")
237
 
238
;; If a producing load is followed by an instruction consuming only
239
;; as a Normal Reg, there is one fewer cycle of latency.
240
 
241
(define_bypass 2 "cortex_r4_load_1_2"
242
               "cortex_r4_alu")
243
(define_bypass 2 "cortex_r4_load_1_2"
244
               "cortex_r4_alu_shift"
245
               "arm_no_early_alu_shift_dep")
246
(define_bypass 2 "cortex_r4_load_1_2"
247
               "cortex_r4_alu_shift_reg"
248
               "arm_no_early_alu_shift_value_dep")
249
 
250
(define_bypass 3 "cortex_r4_load_3_4"
251
               "cortex_r4_alu")
252
(define_bypass 3 "cortex_r4_load_3_4"
253
               "cortex_r4_alu_shift"
254
               "arm_no_early_alu_shift_dep")
255
(define_bypass 3 "cortex_r4_load_3_4"
256
               "cortex_r4_alu_shift_reg"
257
               "arm_no_early_alu_shift_value_dep")
258
 
259
;; If a producing load is followed by an instruction consuming only
260
;; as a Late Reg, there are two fewer cycles of latency.  Such consumer
261
;; instructions are moves and stores.
262
 
263
(define_bypass 1 "cortex_r4_load_1_2"
264
               "cortex_r4_mov,cortex_r4_store_1_2,cortex_r4_store_3_4")
265
(define_bypass 2 "cortex_r4_load_3_4"
266
               "cortex_r4_mov,cortex_r4_store_1_2,cortex_r4_store_3_4")
267
 
268
;; If a producer's result is required as the base or offset of a load,
269
;; there is an extra cycle latency.
270
 
271
(define_bypass 3 "cortex_r4_alu,cortex_r4_mov,cortex_r4_alu_shift,\
272
                  cortex_r4_alu_shift_reg"
273
               "cortex_r4_load_1_2,cortex_r4_load_3_4")
274
 
275
(define_bypass 4 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
276
               "cortex_r4_load_1_2,cortex_r4_load_3_4")
277
 
278
(define_bypass 5 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
279
               "cortex_r4_load_1_2,cortex_r4_load_3_4")
280
 
281
;; Store instructions.
282
 
283
(define_insn_reservation "cortex_r4_store_1_2" 0
284
  (and (eq_attr "tune_cortexr4" "yes")
285
       (eq_attr "type" "store1,store2"))
286
  "cortex_r4_load_store")
287
 
288
(define_insn_reservation "cortex_r4_store_3_4" 0
289
  (and (eq_attr "tune_cortexr4" "yes")
290
       (eq_attr "type" "store3,store4"))
291
  "cortex_r4_load_store_2")
292
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.