OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [gcc/] [config/] [arm/] [cortex-a8.md] - Blame information for rev 737

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 709 jeremybenn
;; ARM Cortex-A8 scheduling description.
2
;; Copyright (C) 2007, 2010 Free Software Foundation, Inc.
3
;; Contributed by CodeSourcery.
4
 
5
;; This file is part of GCC.
6
 
7
;; GCC is free software; you can redistribute it and/or modify it
8
;; under the terms of the GNU General Public License as published
9
;; by the Free Software Foundation; either version 3, or (at your
10
;; option) any later version.
11
 
12
;; GCC is distributed in the hope that it will be useful, but WITHOUT
13
;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14
;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15
;; License for more details.
16
 
17
;; You should have received a copy of the GNU General Public License
18
;; along with GCC; see the file COPYING3.  If not see
19
;; .
20
 
21
(define_automaton "cortex_a8")
22
 
23
;; Only one load/store instruction can be issued per cycle
24
;; (although reservation of this unit is only required for single
25
;; loads and stores -- see below).
26
(define_cpu_unit "cortex_a8_issue_ls" "cortex_a8")
27
 
28
;; Only one branch instruction can be issued per cycle.
29
(define_cpu_unit "cortex_a8_issue_branch" "cortex_a8")
30
 
31
;; The two ALU pipelines.
32
(define_cpu_unit "cortex_a8_alu0" "cortex_a8")
33
(define_cpu_unit "cortex_a8_alu1" "cortex_a8")
34
 
35
;; The usual flow of an instruction through the pipelines.
36
(define_reservation "cortex_a8_default"
37
                    "cortex_a8_alu0|cortex_a8_alu1")
38
 
39
;; The flow of a branch instruction through the pipelines.
40
(define_reservation "cortex_a8_branch"
41
                    "(cortex_a8_alu0+cortex_a8_issue_branch)|\
42
                     (cortex_a8_alu1+cortex_a8_issue_branch)")
43
 
44
;; The flow of a load or store instruction through the pipeline in
45
;; the case where that instruction consists of only one micro-op...
46
(define_reservation "cortex_a8_load_store_1"
47
                    "(cortex_a8_alu0+cortex_a8_issue_ls)|\
48
                     (cortex_a8_alu1+cortex_a8_issue_ls)")
49
 
50
;; ...and in the case of two micro-ops.  Dual issue is altogether forbidden
51
;; during the issue cycle of the first micro-op.  (Instead of modelling
52
;; a separate issue unit, we instead reserve alu0 and alu1 to
53
;; prevent any other instructions from being issued upon that first cycle.)
54
;; Even though the load/store pipeline is usually available in either
55
;; ALU pipe, multi-cycle instructions always issue in pipeline 0.
56
(define_reservation "cortex_a8_load_store_2"
57
                    "cortex_a8_alu0+cortex_a8_alu1+cortex_a8_issue_ls,\
58
                     cortex_a8_alu0+cortex_a8_issue_ls")
59
 
60
;; The flow of a single-cycle multiplication.
61
(define_reservation "cortex_a8_multiply"
62
                    "cortex_a8_alu0")
63
 
64
;; The flow of a multiplication instruction that gets decomposed into
65
;; two micro-ops.  The two micro-ops will be issued to pipeline 0 on
66
;; successive cycles.  Dual issue cannot happen at the same time as the
67
;; first of the micro-ops.
68
(define_reservation "cortex_a8_multiply_2"
69
                    "cortex_a8_alu0+cortex_a8_alu1,\
70
                     cortex_a8_alu0")
71
 
72
;; Similarly, the flow of a multiplication instruction that gets
73
;; decomposed into three micro-ops.  Dual issue cannot occur except on
74
;; the cycle upon which the third micro-op is issued.
75
(define_reservation "cortex_a8_multiply_3"
76
                    "cortex_a8_alu0+cortex_a8_alu1,\
77
                     cortex_a8_alu0+cortex_a8_alu1,\
78
                     cortex_a8_alu0")
79
 
80
;; The model given here assumes that all instructions are unconditional.
81
 
82
;; Data processing instructions, but not move instructions.
83
 
84
;; We include CLZ with these since it has the same execution pattern
85
;; (source read in E2 and destination available at the end of that cycle).
86
(define_insn_reservation "cortex_a8_alu" 2
87
  (and (eq_attr "tune" "cortexa8")
88
       (ior (and (and (eq_attr "type" "alu")
89
                      (eq_attr "neon_type" "none"))
90
                 (not (eq_attr "insn" "mov,mvn")))
91
            (eq_attr "insn" "clz")))
92
  "cortex_a8_default")
93
 
94
(define_insn_reservation "cortex_a8_alu_shift" 2
95
  (and (eq_attr "tune" "cortexa8")
96
       (and (eq_attr "type" "alu_shift")
97
            (not (eq_attr "insn" "mov,mvn"))))
98
  "cortex_a8_default")
99
 
100
(define_insn_reservation "cortex_a8_alu_shift_reg" 2
101
  (and (eq_attr "tune" "cortexa8")
102
       (and (eq_attr "type" "alu_shift_reg")
103
            (not (eq_attr "insn" "mov,mvn"))))
104
  "cortex_a8_default")
105
 
106
;; Move instructions.
107
 
108
(define_insn_reservation "cortex_a8_mov" 1
109
  (and (eq_attr "tune" "cortexa8")
110
       (and (eq_attr "type" "alu,alu_shift,alu_shift_reg")
111
            (eq_attr "insn" "mov,mvn")))
112
  "cortex_a8_default")
113
 
114
;; Exceptions to the default latencies for data processing instructions.
115
 
116
;; A move followed by an ALU instruction with no early dep.
117
;; (Such a pair can be issued in parallel, hence latency zero.)
118
(define_bypass 0 "cortex_a8_mov" "cortex_a8_alu")
119
(define_bypass 0 "cortex_a8_mov" "cortex_a8_alu_shift"
120
               "arm_no_early_alu_shift_dep")
121
(define_bypass 0 "cortex_a8_mov" "cortex_a8_alu_shift_reg"
122
               "arm_no_early_alu_shift_value_dep")
123
 
124
;; An ALU instruction followed by an ALU instruction with no early dep.
125
(define_bypass 1 "cortex_a8_alu,cortex_a8_alu_shift,cortex_a8_alu_shift_reg"
126
               "cortex_a8_alu")
127
(define_bypass 1 "cortex_a8_alu,cortex_a8_alu_shift,cortex_a8_alu_shift_reg"
128
               "cortex_a8_alu_shift"
129
               "arm_no_early_alu_shift_dep")
130
(define_bypass 1 "cortex_a8_alu,cortex_a8_alu_shift,cortex_a8_alu_shift_reg"
131
               "cortex_a8_alu_shift_reg"
132
               "arm_no_early_alu_shift_value_dep")
133
 
134
;; Multiplication instructions.  These are categorized according to their
135
;; reservation behavior and the need below to distinguish certain
136
;; varieties for bypasses.  Results are available at the E5 stage
137
;; (but some of these are multi-cycle instructions which explains the
138
;; latencies below).
139
 
140
(define_insn_reservation "cortex_a8_mul" 6
141
  (and (eq_attr "tune" "cortexa8")
142
       (eq_attr "insn" "mul,smulxy,smmul"))
143
  "cortex_a8_multiply_2")
144
 
145
(define_insn_reservation "cortex_a8_mla" 6
146
  (and (eq_attr "tune" "cortexa8")
147
       (eq_attr "insn" "mla,smlaxy,smlawy,smmla,smlad,smlsd"))
148
  "cortex_a8_multiply_2")
149
 
150
(define_insn_reservation "cortex_a8_mull" 7
151
  (and (eq_attr "tune" "cortexa8")
152
       (eq_attr "insn" "smull,umull,smlal,umlal,umaal,smlalxy"))
153
  "cortex_a8_multiply_3")
154
 
155
(define_insn_reservation "cortex_a8_smulwy" 5
156
  (and (eq_attr "tune" "cortexa8")
157
       (eq_attr "insn" "smulwy,smuad,smusd"))
158
  "cortex_a8_multiply")
159
 
160
;; smlald and smlsld are multiply-accumulate instructions but do not
161
;; received bypassed data from other multiplication results; thus, they
162
;; cannot go in cortex_a8_mla above.  (See below for bypass details.)
163
(define_insn_reservation "cortex_a8_smlald" 6
164
  (and (eq_attr "tune" "cortexa8")
165
       (eq_attr "insn" "smlald,smlsld"))
166
  "cortex_a8_multiply_2")
167
 
168
;; A multiply with a single-register result or an MLA, followed by an
169
;; MLA with an accumulator dependency, has its result forwarded so two
170
;; such instructions can issue back-to-back.
171
(define_bypass 1 "cortex_a8_mul,cortex_a8_mla,cortex_a8_smulwy"
172
               "cortex_a8_mla"
173
               "arm_mac_accumulator_is_mul_result")
174
 
175
;; A multiply followed by an ALU instruction needing the multiply
176
;; result only at E2 has lower latency than one needing it at E1.
177
(define_bypass 4 "cortex_a8_mul,cortex_a8_mla,cortex_a8_mull,\
178
                  cortex_a8_smulwy,cortex_a8_smlald"
179
               "cortex_a8_alu")
180
(define_bypass 4 "cortex_a8_mul,cortex_a8_mla,cortex_a8_mull,\
181
                  cortex_a8_smulwy,cortex_a8_smlald"
182
               "cortex_a8_alu_shift"
183
               "arm_no_early_alu_shift_dep")
184
(define_bypass 4 "cortex_a8_mul,cortex_a8_mla,cortex_a8_mull,\
185
                  cortex_a8_smulwy,cortex_a8_smlald"
186
               "cortex_a8_alu_shift_reg"
187
               "arm_no_early_alu_shift_value_dep")
188
 
189
;; Load instructions.
190
;; The presence of any register writeback is ignored here.
191
 
192
;; A load result has latency 3 unless the dependent instruction has
193
;; no early dep, in which case it is only latency two.
194
;; We assume 64-bit alignment for doubleword loads.
195
(define_insn_reservation "cortex_a8_load1_2" 3
196
  (and (eq_attr "tune" "cortexa8")
197
       (eq_attr "type" "load1,load2,load_byte"))
198
  "cortex_a8_load_store_1")
199
 
200
(define_bypass 2 "cortex_a8_load1_2"
201
               "cortex_a8_alu")
202
(define_bypass 2 "cortex_a8_load1_2"
203
               "cortex_a8_alu_shift"
204
               "arm_no_early_alu_shift_dep")
205
(define_bypass 2 "cortex_a8_load1_2"
206
               "cortex_a8_alu_shift_reg"
207
               "arm_no_early_alu_shift_value_dep")
208
 
209
;; We do not currently model the fact that loads with scaled register
210
;; offsets that are not LSL #2 have an extra cycle latency (they issue
211
;; as two micro-ops).
212
 
213
;; A load multiple of three registers is usually issued as two micro-ops.
214
;; The first register will be available at E3 of the first iteration,
215
;; the second at E3 of the second iteration, and the third at E4 of
216
;; the second iteration.  A load multiple of four registers is usually
217
;; issued as two micro-ops.
218
(define_insn_reservation "cortex_a8_load3_4" 5
219
  (and (eq_attr "tune" "cortexa8")
220
       (eq_attr "type" "load3,load4"))
221
  "cortex_a8_load_store_2")
222
 
223
(define_bypass 4 "cortex_a8_load3_4"
224
               "cortex_a8_alu")
225
(define_bypass 4 "cortex_a8_load3_4"
226
               "cortex_a8_alu_shift"
227
               "arm_no_early_alu_shift_dep")
228
(define_bypass 4 "cortex_a8_load3_4"
229
               "cortex_a8_alu_shift_reg"
230
               "arm_no_early_alu_shift_value_dep")
231
 
232
;; Store instructions.
233
;; Writeback is again ignored.
234
 
235
(define_insn_reservation "cortex_a8_store1_2" 0
236
  (and (eq_attr "tune" "cortexa8")
237
       (eq_attr "type" "store1,store2"))
238
  "cortex_a8_load_store_1")
239
 
240
(define_insn_reservation "cortex_a8_store3_4" 0
241
  (and (eq_attr "tune" "cortexa8")
242
       (eq_attr "type" "store3,store4"))
243
  "cortex_a8_load_store_2")
244
 
245
;; An ALU instruction acting as a producer for a store instruction
246
;; that only uses the result as the value to be stored (as opposed to
247
;; using it to calculate the address) has latency zero; the store
248
;; reads the value to be stored at the start of E3 and the ALU insn
249
;; writes it at the end of E2.  Move instructions actually produce the
250
;; result at the end of E1, but since we don't have delay slots, the
251
;; scheduling behavior will be the same.
252
(define_bypass 0 "cortex_a8_alu,cortex_a8_alu_shift,\
253
                  cortex_a8_alu_shift_reg,cortex_a8_mov"
254
               "cortex_a8_store1_2,cortex_a8_store3_4"
255
               "arm_no_early_store_addr_dep")
256
 
257
;; Branch instructions
258
 
259
(define_insn_reservation "cortex_a8_branch" 0
260
  (and (eq_attr "tune" "cortexa8")
261
       (eq_attr "type" "branch"))
262
  "cortex_a8_branch")
263
 
264
;; Call latencies are not predictable.  A semi-arbitrary very large
265
;; number is used as "positive infinity" so that everything should be
266
;; finished by the time of return.
267
(define_insn_reservation "cortex_a8_call" 32
268
  (and (eq_attr "tune" "cortexa8")
269
       (eq_attr "type" "call"))
270
  "cortex_a8_issue_branch")
271
 
272
;; NEON (including VFP) instructions.
273
 
274
(include "cortex-a8-neon.md")
275
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.