URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [gcc-4.5.1/] [gcc/] [config/] [arm/] [cortex-a8.md] - Blame information for rev 332

Go to most recent revision | Details | Compare with Previous | View Log


;; ARM Cortex-A8 scheduling description.
;; Copyright (C) 2007 Free Software Foundation, Inc.
;; Contributed by CodeSourcery.
 
;; This file is part of GCC.
 
;; GCC is free software; you can redistribute it and/or modify it
;; under the terms of the GNU General Public License as published
;; by the Free Software Foundation; either version 3, or (at your
;; option) any later version.
 
;; GCC is distributed in the hope that it will be useful, but WITHOUT
;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
;; License for more details.
 
;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3.  If not see
;; .
 
(define_automaton "cortex_a8")
 
;; Only one load/store instruction can be issued per cycle
;; (although reservation of this unit is only required for single
;; loads and stores -- see below).
(define_cpu_unit "cortex_a8_issue_ls" "cortex_a8")
 
;; Only one branch instruction can be issued per cycle.
(define_cpu_unit "cortex_a8_issue_branch" "cortex_a8")
 
;; The two ALU pipelines.
(define_cpu_unit "cortex_a8_alu0" "cortex_a8")
(define_cpu_unit "cortex_a8_alu1" "cortex_a8")
 
;; The usual flow of an instruction through the pipelines.
(define_reservation "cortex_a8_default"
                    "cortex_a8_alu0|cortex_a8_alu1")
 
;; The flow of a branch instruction through the pipelines.
(define_reservation "cortex_a8_branch"
                    "(cortex_a8_alu0+cortex_a8_issue_branch)|\
                     (cortex_a8_alu1+cortex_a8_issue_branch)")
 
;; The flow of a load or store instruction through the pipeline in
;; the case where that instruction consists of only one micro-op...
(define_reservation "cortex_a8_load_store_1"
                    "(cortex_a8_alu0+cortex_a8_issue_ls)|\
                     (cortex_a8_alu1+cortex_a8_issue_ls)")
 
;; ...and in the case of two micro-ops.  Dual issue is altogether forbidden
;; during the issue cycle of the first micro-op.  (Instead of modelling
;; a separate issue unit, we instead reserve alu0 and alu1 to
;; prevent any other instructions from being issued upon that first cycle.)
;; Even though the load/store pipeline is usually available in either
;; ALU pipe, multi-cycle instructions always issue in pipeline 0.
(define_reservation "cortex_a8_load_store_2"
                    "cortex_a8_alu0+cortex_a8_alu1+cortex_a8_issue_ls,\
                     cortex_a8_alu0+cortex_a8_issue_ls")
 
;; The flow of a single-cycle multiplication.
(define_reservation "cortex_a8_multiply"
                    "cortex_a8_alu0")
 
;; The flow of a multiplication instruction that gets decomposed into
;; two micro-ops.  The two micro-ops will be issued to pipeline 0 on
;; successive cycles.  Dual issue cannot happen at the same time as the
;; first of the micro-ops.
(define_reservation "cortex_a8_multiply_2"
                    "cortex_a8_alu0+cortex_a8_alu1,\
                     cortex_a8_alu0")
 
;; Similarly, the flow of a multiplication instruction that gets
;; decomposed into three micro-ops.  Dual issue cannot occur except on
;; the cycle upon which the third micro-op is issued.
(define_reservation "cortex_a8_multiply_3"
                    "cortex_a8_alu0+cortex_a8_alu1,\
                     cortex_a8_alu0+cortex_a8_alu1,\
                     cortex_a8_alu0")
 
;; The model given here assumes that all instructions are unconditional.
 
;; Data processing instructions, but not move instructions.
 
;; We include CLZ with these since it has the same execution pattern
;; (source read in E2 and destination available at the end of that cycle).
(define_insn_reservation "cortex_a8_alu" 2
  (and (eq_attr "tune" "cortexa8")
       (ior (and (and (eq_attr "type" "alu")
                      (eq_attr "neon_type" "none"))
                 (not (eq_attr "insn" "mov,mvn")))
            (eq_attr "insn" "clz")))
  "cortex_a8_default")
 
(define_insn_reservation "cortex_a8_alu_shift" 2
  (and (eq_attr "tune" "cortexa8")
       (and (eq_attr "type" "alu_shift")
            (not (eq_attr "insn" "mov,mvn"))))
  "cortex_a8_default")
 
(define_insn_reservation "cortex_a8_alu_shift_reg" 2
  (and (eq_attr "tune" "cortexa8")
       (and (eq_attr "type" "alu_shift_reg")
            (not (eq_attr "insn" "mov,mvn"))))
  "cortex_a8_default")
 
;; Move instructions.
 
(define_insn_reservation "cortex_a8_mov" 1
  (and (eq_attr "tune" "cortexa8")
       (and (eq_attr "type" "alu,alu_shift,alu_shift_reg")
            (eq_attr "insn" "mov,mvn")))
  "cortex_a8_default")
 
;; Exceptions to the default latencies for data processing instructions.
 
;; A move followed by an ALU instruction with no early dep.
;; (Such a pair can be issued in parallel, hence latency zero.)
(define_bypass 0 "cortex_a8_mov" "cortex_a8_alu")
(define_bypass 0 "cortex_a8_mov" "cortex_a8_alu_shift"
               "arm_no_early_alu_shift_dep")
(define_bypass 0 "cortex_a8_mov" "cortex_a8_alu_shift_reg"
               "arm_no_early_alu_shift_value_dep")
 
;; An ALU instruction followed by an ALU instruction with no early dep.
(define_bypass 1 "cortex_a8_alu,cortex_a8_alu_shift,cortex_a8_alu_shift_reg"
               "cortex_a8_alu")
(define_bypass 1 "cortex_a8_alu,cortex_a8_alu_shift,cortex_a8_alu_shift_reg"
               "cortex_a8_alu_shift"
               "arm_no_early_alu_shift_dep")
(define_bypass 1 "cortex_a8_alu,cortex_a8_alu_shift,cortex_a8_alu_shift_reg"
               "cortex_a8_alu_shift_reg"
               "arm_no_early_alu_shift_value_dep")
 
;; Multiplication instructions.  These are categorized according to their
;; reservation behavior and the need below to distinguish certain
;; varieties for bypasses.  Results are available at the E5 stage
;; (but some of these are multi-cycle instructions which explains the
;; latencies below).
 
(define_insn_reservation "cortex_a8_mul" 6
  (and (eq_attr "tune" "cortexa8")
       (eq_attr "insn" "mul,smulxy,smmul"))
  "cortex_a8_multiply_2")
 
(define_insn_reservation "cortex_a8_mla" 6
  (and (eq_attr "tune" "cortexa8")
       (eq_attr "insn" "mla,smlaxy,smlawy,smmla,smlad,smlsd"))
  "cortex_a8_multiply_2")
 
(define_insn_reservation "cortex_a8_mull" 7
  (and (eq_attr "tune" "cortexa8")
       (eq_attr "insn" "smull,umull,smlal,umlal,umaal,smlalxy"))
  "cortex_a8_multiply_3")
 
(define_insn_reservation "cortex_a8_smulwy" 5
  (and (eq_attr "tune" "cortexa8")
       (eq_attr "insn" "smulwy,smuad,smusd"))
  "cortex_a8_multiply")
 
;; smlald and smlsld are multiply-accumulate instructions but do not
;; received bypassed data from other multiplication results; thus, they
;; cannot go in cortex_a8_mla above.  (See below for bypass details.)
(define_insn_reservation "cortex_a8_smlald" 6
  (and (eq_attr "tune" "cortexa8")
       (eq_attr "insn" "smlald,smlsld"))
  "cortex_a8_multiply_2")
 
;; A multiply with a single-register result or an MLA, followed by an
;; MLA with an accumulator dependency, has its result forwarded so two
;; such instructions can issue back-to-back.
(define_bypass 1 "cortex_a8_mul,cortex_a8_mla,cortex_a8_smulwy"
               "cortex_a8_mla"
               "arm_mac_accumulator_is_mul_result")
 
;; A multiply followed by an ALU instruction needing the multiply
;; result only at E2 has lower latency than one needing it at E1.
(define_bypass 4 "cortex_a8_mul,cortex_a8_mla,cortex_a8_mull,\
                  cortex_a8_smulwy,cortex_a8_smlald"
               "cortex_a8_alu")
(define_bypass 4 "cortex_a8_mul,cortex_a8_mla,cortex_a8_mull,\
                  cortex_a8_smulwy,cortex_a8_smlald"
               "cortex_a8_alu_shift"
               "arm_no_early_alu_shift_dep")
(define_bypass 4 "cortex_a8_mul,cortex_a8_mla,cortex_a8_mull,\
                  cortex_a8_smulwy,cortex_a8_smlald"
               "cortex_a8_alu_shift_reg"
               "arm_no_early_alu_shift_value_dep")
 
;; Load instructions.
;; The presence of any register writeback is ignored here.
 
;; A load result has latency 3 unless the dependent instruction has
;; no early dep, in which case it is only latency two.
;; We assume 64-bit alignment for doubleword loads.
(define_insn_reservation "cortex_a8_load1_2" 3
  (and (eq_attr "tune" "cortexa8")
       (eq_attr "type" "load1,load2,load_byte"))
  "cortex_a8_load_store_1")
 
(define_bypass 2 "cortex_a8_load1_2"
               "cortex_a8_alu")
(define_bypass 2 "cortex_a8_load1_2"
               "cortex_a8_alu_shift"
               "arm_no_early_alu_shift_dep")
(define_bypass 2 "cortex_a8_load1_2"
               "cortex_a8_alu_shift_reg"
               "arm_no_early_alu_shift_value_dep")
 
;; We do not currently model the fact that loads with scaled register
;; offsets that are not LSL #2 have an extra cycle latency (they issue
;; as two micro-ops).
 
;; A load multiple of three registers is usually issued as two micro-ops.
;; The first register will be available at E3 of the first iteration,
;; the second at E3 of the second iteration, and the third at E4 of
;; the second iteration.  A load multiple of four registers is usually
;; issued as two micro-ops.
(define_insn_reservation "cortex_a8_load3_4" 5
  (and (eq_attr "tune" "cortexa8")
       (eq_attr "type" "load3,load4"))
  "cortex_a8_load_store_2")
 
(define_bypass 4 "cortex_a8_load3_4"
               "cortex_a8_alu")
(define_bypass 4 "cortex_a8_load3_4"
               "cortex_a8_alu_shift"
               "arm_no_early_alu_shift_dep")
(define_bypass 4 "cortex_a8_load3_4"
               "cortex_a8_alu_shift_reg"
               "arm_no_early_alu_shift_value_dep")
 
;; Store instructions.
;; Writeback is again ignored.
 
(define_insn_reservation "cortex_a8_store1_2" 0
  (and (eq_attr "tune" "cortexa8")
       (eq_attr "type" "store1,store2"))
  "cortex_a8_load_store_1")
 
(define_insn_reservation "cortex_a8_store3_4" 0
  (and (eq_attr "tune" "cortexa8")
       (eq_attr "type" "store3,store4"))
  "cortex_a8_load_store_2")
 
;; An ALU instruction acting as a producer for a store instruction
;; that only uses the result as the value to be stored (as opposed to
;; using it to calculate the address) has latency zero; the store
;; reads the value to be stored at the start of E3 and the ALU insn
;; writes it at the end of E2.  Move instructions actually produce the
;; result at the end of E1, but since we don't have delay slots, the
;; scheduling behavior will be the same.
(define_bypass 0 "cortex_a8_alu,cortex_a8_alu_shift,\
                  cortex_a8_alu_shift_reg,cortex_a8_mov"
               "cortex_a8_store1_2,cortex_a8_store3_4"
               "arm_no_early_store_addr_dep")
 
;; Branch instructions
 
(define_insn_reservation "cortex_a8_branch" 0
  (and (eq_attr "tune" "cortexa8")
       (eq_attr "type" "branch"))
  "cortex_a8_branch")
 
;; Call latencies are not predictable.  A semi-arbitrary very large
;; number is used as "positive infinity" so that everything should be
;; finished by the time of return.
(define_insn_reservation "cortex_a8_call" 32
  (and (eq_attr "tune" "cortexa8")
       (eq_attr "type" "call"))
  "cortex_a8_issue_branch")
 
;; NEON (including VFP) instructions.
 
(include "cortex-a8-neon.md")
 

Browse

Tools

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [gcc-4.5.1/] [gcc/] [config/] [arm/] [cortex-a8.md] - Blame information for rev 332

Line No.	Rev	Author	Line
1	282	jeremybenn	`;; ARM Cortex-A8 scheduling description.`
2			`;; Copyright (C) 2007 Free Software Foundation, Inc.`
3			`;; Contributed by CodeSourcery.`
4
5			`;; This file is part of GCC.`
6
7			`;; GCC is free software; you can redistribute it and/or modify it`
8			`;; under the terms of the GNU General Public License as published`
9			`;; by the Free Software Foundation; either version 3, or (at your`
10			`;; option) any later version.`
11
12			`;; GCC is distributed in the hope that it will be useful, but WITHOUT`
13			`;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY`
14			`;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public`
15			`;; License for more details.`
16
17			`;; You should have received a copy of the GNU General Public License`
18			`;; along with GCC; see the file COPYING3. If not see`
19			`;; .`
20
21			`(define_automaton "cortex_a8")`
22
23			`;; Only one load/store instruction can be issued per cycle`
24			`;; (although reservation of this unit is only required for single`
25			`;; loads and stores -- see below).`
26			`(define_cpu_unit "cortex_a8_issue_ls" "cortex_a8")`
27
28			`;; Only one branch instruction can be issued per cycle.`
29			`(define_cpu_unit "cortex_a8_issue_branch" "cortex_a8")`
30
31			`;; The two ALU pipelines.`
32			`(define_cpu_unit "cortex_a8_alu0" "cortex_a8")`
33			`(define_cpu_unit "cortex_a8_alu1" "cortex_a8")`
34
35			`;; The usual flow of an instruction through the pipelines.`
36			`(define_reservation "cortex_a8_default"`
37			`"cortex_a8_alu0\|cortex_a8_alu1")`
38
39			`;; The flow of a branch instruction through the pipelines.`
40			`(define_reservation "cortex_a8_branch"`
41			`"(cortex_a8_alu0+cortex_a8_issue_branch)\|\`
42			`(cortex_a8_alu1+cortex_a8_issue_branch)")`
43
44			`;; The flow of a load or store instruction through the pipeline in`
45			`;; the case where that instruction consists of only one micro-op...`
46			`(define_reservation "cortex_a8_load_store_1"`
47			`"(cortex_a8_alu0+cortex_a8_issue_ls)\|\`
48			`(cortex_a8_alu1+cortex_a8_issue_ls)")`
49
50			`;; ...and in the case of two micro-ops. Dual issue is altogether forbidden`
51			`;; during the issue cycle of the first micro-op. (Instead of modelling`
52			`;; a separate issue unit, we instead reserve alu0 and alu1 to`
53			`;; prevent any other instructions from being issued upon that first cycle.)`
54			`;; Even though the load/store pipeline is usually available in either`
55			`;; ALU pipe, multi-cycle instructions always issue in pipeline 0.`
56			`(define_reservation "cortex_a8_load_store_2"`
57			`"cortex_a8_alu0+cortex_a8_alu1+cortex_a8_issue_ls,\`
58			`cortex_a8_alu0+cortex_a8_issue_ls")`
59
60			`;; The flow of a single-cycle multiplication.`
61			`(define_reservation "cortex_a8_multiply"`
62			`"cortex_a8_alu0")`
63
64			`;; The flow of a multiplication instruction that gets decomposed into`
65			`;; two micro-ops. The two micro-ops will be issued to pipeline 0 on`
66			`;; successive cycles. Dual issue cannot happen at the same time as the`
67			`;; first of the micro-ops.`
68			`(define_reservation "cortex_a8_multiply_2"`
69			`"cortex_a8_alu0+cortex_a8_alu1,\`
70			`cortex_a8_alu0")`
71
72			`;; Similarly, the flow of a multiplication instruction that gets`
73			`;; decomposed into three micro-ops. Dual issue cannot occur except on`
74			`;; the cycle upon which the third micro-op is issued.`
75			`(define_reservation "cortex_a8_multiply_3"`
76			`"cortex_a8_alu0+cortex_a8_alu1,\`
77			`cortex_a8_alu0+cortex_a8_alu1,\`
78			`cortex_a8_alu0")`
79
80			`;; The model given here assumes that all instructions are unconditional.`
81
82			`;; Data processing instructions, but not move instructions.`
83
84			`;; We include CLZ with these since it has the same execution pattern`
85			`;; (source read in E2 and destination available at the end of that cycle).`
86			`(define_insn_reservation "cortex_a8_alu" 2`
87			`(and (eq_attr "tune" "cortexa8")`
88			`(ior (and (and (eq_attr "type" "alu")`
89			`(eq_attr "neon_type" "none"))`
90			`(not (eq_attr "insn" "mov,mvn")))`
91			`(eq_attr "insn" "clz")))`
92			`"cortex_a8_default")`
93
94			`(define_insn_reservation "cortex_a8_alu_shift" 2`
95			`(and (eq_attr "tune" "cortexa8")`
96			`(and (eq_attr "type" "alu_shift")`
97			`(not (eq_attr "insn" "mov,mvn"))))`
98			`"cortex_a8_default")`
99
100			`(define_insn_reservation "cortex_a8_alu_shift_reg" 2`
101			`(and (eq_attr "tune" "cortexa8")`
102			`(and (eq_attr "type" "alu_shift_reg")`
103			`(not (eq_attr "insn" "mov,mvn"))))`
104			`"cortex_a8_default")`
105
106			`;; Move instructions.`
107
108			`(define_insn_reservation "cortex_a8_mov" 1`
109			`(and (eq_attr "tune" "cortexa8")`
110			`(and (eq_attr "type" "alu,alu_shift,alu_shift_reg")`
111			`(eq_attr "insn" "mov,mvn")))`
112			`"cortex_a8_default")`
113
114			`;; Exceptions to the default latencies for data processing instructions.`
115
116			`;; A move followed by an ALU instruction with no early dep.`
117			`;; (Such a pair can be issued in parallel, hence latency zero.)`
118			`(define_bypass 0 "cortex_a8_mov" "cortex_a8_alu")`
119			`(define_bypass 0 "cortex_a8_mov" "cortex_a8_alu_shift"`
120			`"arm_no_early_alu_shift_dep")`
121			`(define_bypass 0 "cortex_a8_mov" "cortex_a8_alu_shift_reg"`
122			`"arm_no_early_alu_shift_value_dep")`
123
124			`;; An ALU instruction followed by an ALU instruction with no early dep.`
125			`(define_bypass 1 "cortex_a8_alu,cortex_a8_alu_shift,cortex_a8_alu_shift_reg"`
126			`"cortex_a8_alu")`
127			`(define_bypass 1 "cortex_a8_alu,cortex_a8_alu_shift,cortex_a8_alu_shift_reg"`
128			`"cortex_a8_alu_shift"`
129			`"arm_no_early_alu_shift_dep")`
130			`(define_bypass 1 "cortex_a8_alu,cortex_a8_alu_shift,cortex_a8_alu_shift_reg"`
131			`"cortex_a8_alu_shift_reg"`
132			`"arm_no_early_alu_shift_value_dep")`
133
134			`;; Multiplication instructions. These are categorized according to their`
135			`;; reservation behavior and the need below to distinguish certain`
136			`;; varieties for bypasses. Results are available at the E5 stage`
137			`;; (but some of these are multi-cycle instructions which explains the`
138			`;; latencies below).`
139
140			`(define_insn_reservation "cortex_a8_mul" 6`
141			`(and (eq_attr "tune" "cortexa8")`
142			`(eq_attr "insn" "mul,smulxy,smmul"))`
143			`"cortex_a8_multiply_2")`
144
145			`(define_insn_reservation "cortex_a8_mla" 6`
146			`(and (eq_attr "tune" "cortexa8")`
147			`(eq_attr "insn" "mla,smlaxy,smlawy,smmla,smlad,smlsd"))`
148			`"cortex_a8_multiply_2")`
149
150			`(define_insn_reservation "cortex_a8_mull" 7`
151			`(and (eq_attr "tune" "cortexa8")`
152			`(eq_attr "insn" "smull,umull,smlal,umlal,umaal,smlalxy"))`
153			`"cortex_a8_multiply_3")`
154
155			`(define_insn_reservation "cortex_a8_smulwy" 5`
156			`(and (eq_attr "tune" "cortexa8")`
157			`(eq_attr "insn" "smulwy,smuad,smusd"))`
158			`"cortex_a8_multiply")`
159
160			`;; smlald and smlsld are multiply-accumulate instructions but do not`
161			`;; received bypassed data from other multiplication results; thus, they`
162			`;; cannot go in cortex_a8_mla above. (See below for bypass details.)`
163			`(define_insn_reservation "cortex_a8_smlald" 6`
164			`(and (eq_attr "tune" "cortexa8")`
165			`(eq_attr "insn" "smlald,smlsld"))`
166			`"cortex_a8_multiply_2")`
167
168			`;; A multiply with a single-register result or an MLA, followed by an`
169			`;; MLA with an accumulator dependency, has its result forwarded so two`
170			`;; such instructions can issue back-to-back.`
171			`(define_bypass 1 "cortex_a8_mul,cortex_a8_mla,cortex_a8_smulwy"`
172			`"cortex_a8_mla"`
173			`"arm_mac_accumulator_is_mul_result")`
174
175			`;; A multiply followed by an ALU instruction needing the multiply`
176			`;; result only at E2 has lower latency than one needing it at E1.`
177			`(define_bypass 4 "cortex_a8_mul,cortex_a8_mla,cortex_a8_mull,\`
178			`cortex_a8_smulwy,cortex_a8_smlald"`
179			`"cortex_a8_alu")`
180			`(define_bypass 4 "cortex_a8_mul,cortex_a8_mla,cortex_a8_mull,\`
181			`cortex_a8_smulwy,cortex_a8_smlald"`
182			`"cortex_a8_alu_shift"`
183			`"arm_no_early_alu_shift_dep")`
184			`(define_bypass 4 "cortex_a8_mul,cortex_a8_mla,cortex_a8_mull,\`
185			`cortex_a8_smulwy,cortex_a8_smlald"`
186			`"cortex_a8_alu_shift_reg"`
187			`"arm_no_early_alu_shift_value_dep")`
188
189			`;; Load instructions.`
190			`;; The presence of any register writeback is ignored here.`
191
192			`;; A load result has latency 3 unless the dependent instruction has`
193			`;; no early dep, in which case it is only latency two.`
194			`;; We assume 64-bit alignment for doubleword loads.`
195			`(define_insn_reservation "cortex_a8_load1_2" 3`
196			`(and (eq_attr "tune" "cortexa8")`
197			`(eq_attr "type" "load1,load2,load_byte"))`
198			`"cortex_a8_load_store_1")`
199
200			`(define_bypass 2 "cortex_a8_load1_2"`
201			`"cortex_a8_alu")`
202			`(define_bypass 2 "cortex_a8_load1_2"`
203			`"cortex_a8_alu_shift"`
204			`"arm_no_early_alu_shift_dep")`
205			`(define_bypass 2 "cortex_a8_load1_2"`
206			`"cortex_a8_alu_shift_reg"`
207			`"arm_no_early_alu_shift_value_dep")`
208
209			`;; We do not currently model the fact that loads with scaled register`
210			`;; offsets that are not LSL #2 have an extra cycle latency (they issue`
211			`;; as two micro-ops).`
212
213			`;; A load multiple of three registers is usually issued as two micro-ops.`
214			`;; The first register will be available at E3 of the first iteration,`
215			`;; the second at E3 of the second iteration, and the third at E4 of`
216			`;; the second iteration. A load multiple of four registers is usually`
217			`;; issued as two micro-ops.`
218			`(define_insn_reservation "cortex_a8_load3_4" 5`
219			`(and (eq_attr "tune" "cortexa8")`
220			`(eq_attr "type" "load3,load4"))`
221			`"cortex_a8_load_store_2")`
222
223			`(define_bypass 4 "cortex_a8_load3_4"`
224			`"cortex_a8_alu")`
225			`(define_bypass 4 "cortex_a8_load3_4"`
226			`"cortex_a8_alu_shift"`
227			`"arm_no_early_alu_shift_dep")`
228			`(define_bypass 4 "cortex_a8_load3_4"`
229			`"cortex_a8_alu_shift_reg"`
230			`"arm_no_early_alu_shift_value_dep")`
231
232			`;; Store instructions.`
233			`;; Writeback is again ignored.`
234
235			`(define_insn_reservation "cortex_a8_store1_2" 0`
236			`(and (eq_attr "tune" "cortexa8")`
237			`(eq_attr "type" "store1,store2"))`
238			`"cortex_a8_load_store_1")`
239
240			`(define_insn_reservation "cortex_a8_store3_4" 0`
241			`(and (eq_attr "tune" "cortexa8")`
242			`(eq_attr "type" "store3,store4"))`
243			`"cortex_a8_load_store_2")`
244
245			`;; An ALU instruction acting as a producer for a store instruction`
246			`;; that only uses the result as the value to be stored (as opposed to`
247			`;; using it to calculate the address) has latency zero; the store`
248			`;; reads the value to be stored at the start of E3 and the ALU insn`
249			`;; writes it at the end of E2. Move instructions actually produce the`
250			`;; result at the end of E1, but since we don't have delay slots, the`
251			`;; scheduling behavior will be the same.`
252			`(define_bypass 0 "cortex_a8_alu,cortex_a8_alu_shift,\`
253			`cortex_a8_alu_shift_reg,cortex_a8_mov"`
254			`"cortex_a8_store1_2,cortex_a8_store3_4"`
255			`"arm_no_early_store_addr_dep")`
256
257			`;; Branch instructions`
258
259			`(define_insn_reservation "cortex_a8_branch" 0`
260			`(and (eq_attr "tune" "cortexa8")`
261			`(eq_attr "type" "branch"))`
262			`"cortex_a8_branch")`
263
264			`;; Call latencies are not predictable. A semi-arbitrary very large`
265			`;; number is used as "positive infinity" so that everything should be`
266			`;; finished by the time of return.`
267			`(define_insn_reservation "cortex_a8_call" 32`
268			`(and (eq_attr "tune" "cortexa8")`
269			`(eq_attr "type" "call"))`
270			`"cortex_a8_issue_branch")`
271
272			`;; NEON (including VFP) instructions.`
273
274			`(include "cortex-a8-neon.md")`
275