1 |
282 |
jeremybenn |
;; DFA-based pipeline description for the VR1x000.
|
2 |
|
|
;; Copyright (C) 2005, 2006, 2008 Free Software Foundation, Inc.
|
3 |
|
|
;;
|
4 |
|
|
;; This file is part of GCC.
|
5 |
|
|
|
6 |
|
|
;; GCC is free software; you can redistribute it and/or modify it
|
7 |
|
|
;; under the terms of the GNU General Public License as published
|
8 |
|
|
;; by the Free Software Foundation; either version 3, or (at your
|
9 |
|
|
;; option) any later version.
|
10 |
|
|
|
11 |
|
|
;; GCC is distributed in the hope that it will be useful, but WITHOUT
|
12 |
|
|
;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
13 |
|
|
;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
|
14 |
|
|
;; License for more details.
|
15 |
|
|
|
16 |
|
|
;; You should have received a copy of the GNU General Public License
|
17 |
|
|
;; along with GCC; see the file COPYING3. If not see
|
18 |
|
|
;; .
|
19 |
|
|
|
20 |
|
|
|
21 |
|
|
;; R12K/R14K/R16K are derivatives of R10K, thus copy its description
|
22 |
|
|
;; until specific tuning for each is added.
|
23 |
|
|
|
24 |
|
|
;; R10000 has an int queue, fp queue, address queue.
|
25 |
|
|
;; The int queue feeds ALU1 and ALU2.
|
26 |
|
|
;; The fp queue feeds the fp-adder and fp-multiplier.
|
27 |
|
|
;; The addr queue feeds the Load/Store unit.
|
28 |
|
|
;;
|
29 |
|
|
;; However, we define the fp-adder and fp-multiplier as
|
30 |
|
|
;; separate automatons, because the fp-multiplier is
|
31 |
|
|
;; divided into fp-multiplier, fp-division, and
|
32 |
|
|
;; fp-squareroot units, all of which share the same
|
33 |
|
|
;; issue and completion logic, yet can operate in
|
34 |
|
|
;; parallel.
|
35 |
|
|
;;
|
36 |
|
|
;; This is based on the model described in the R10K Manual
|
37 |
|
|
;; and it helps to reduce the size of the automata.
|
38 |
|
|
(define_automaton "r10k_a_int, r10k_a_fpadder, r10k_a_addr,
|
39 |
|
|
r10k_a_fpmpy, r10k_a_fpdiv, r10k_a_fpsqrt")
|
40 |
|
|
|
41 |
|
|
(define_cpu_unit "r10k_alu1" "r10k_a_int")
|
42 |
|
|
(define_cpu_unit "r10k_alu2" "r10k_a_int")
|
43 |
|
|
(define_cpu_unit "r10k_fpadd" "r10k_a_fpadder")
|
44 |
|
|
(define_cpu_unit "r10k_fpmpy" "r10k_a_fpmpy")
|
45 |
|
|
(define_cpu_unit "r10k_fpdiv" "r10k_a_fpdiv")
|
46 |
|
|
(define_cpu_unit "r10k_fpsqrt" "r10k_a_fpsqrt")
|
47 |
|
|
(define_cpu_unit "r10k_loadstore" "r10k_a_addr")
|
48 |
|
|
|
49 |
|
|
|
50 |
|
|
;; R10k Loads and Stores.
|
51 |
|
|
(define_insn_reservation "r10k_load" 2
|
52 |
|
|
(and (eq_attr "cpu" "r10000")
|
53 |
|
|
(eq_attr "type" "load,prefetch,prefetchx"))
|
54 |
|
|
"r10k_loadstore")
|
55 |
|
|
|
56 |
|
|
(define_insn_reservation "r10k_store" 0
|
57 |
|
|
(and (eq_attr "cpu" "r10000")
|
58 |
|
|
(eq_attr "type" "store,fpstore,fpidxstore"))
|
59 |
|
|
"r10k_loadstore")
|
60 |
|
|
|
61 |
|
|
(define_insn_reservation "r10k_fpload" 3
|
62 |
|
|
(and (eq_attr "cpu" "r10000")
|
63 |
|
|
(eq_attr "type" "fpload,fpidxload"))
|
64 |
|
|
"r10k_loadstore")
|
65 |
|
|
|
66 |
|
|
|
67 |
|
|
;; Integer add/sub + logic ops, and mt hi/lo can be done by alu1 or alu2.
|
68 |
|
|
;; Miscellaneous arith goes here too (this is a guess).
|
69 |
|
|
(define_insn_reservation "r10k_arith" 1
|
70 |
|
|
(and (eq_attr "cpu" "r10000")
|
71 |
|
|
(eq_attr "type" "arith,mthilo,slt,clz,const,nop,trap,logical"))
|
72 |
|
|
"r10k_alu1 | r10k_alu2")
|
73 |
|
|
|
74 |
|
|
;; We treat mfhilo differently, because we need to know when
|
75 |
|
|
;; it's HI and when it's LO.
|
76 |
|
|
(define_insn_reservation "r10k_mfhi" 1
|
77 |
|
|
(and (eq_attr "cpu" "r10000")
|
78 |
|
|
(and (eq_attr "type" "mfhilo")
|
79 |
|
|
(not (match_operand 1 "lo_operand"))))
|
80 |
|
|
"r10k_alu1 | r10k_alu2")
|
81 |
|
|
|
82 |
|
|
(define_insn_reservation "r10k_mflo" 1
|
83 |
|
|
(and (eq_attr "cpu" "r10000")
|
84 |
|
|
(and (eq_attr "type" "mfhilo")
|
85 |
|
|
(match_operand 1 "lo_operand")))
|
86 |
|
|
"r10k_alu1 | r10k_alu2")
|
87 |
|
|
|
88 |
|
|
|
89 |
|
|
;; ALU1 handles shifts, branch eval, and condmove.
|
90 |
|
|
;;
|
91 |
|
|
;; Brancher is separate, but part of ALU1, but can only
|
92 |
|
|
;; do one branch per cycle (is this even implementable?).
|
93 |
|
|
;;
|
94 |
|
|
;; Unsure if the brancher handles jumps and calls as well, but since
|
95 |
|
|
;; they're related, we'll add them here for now.
|
96 |
|
|
(define_insn_reservation "r10k_brancher" 1
|
97 |
|
|
(and (eq_attr "cpu" "r10000")
|
98 |
|
|
(eq_attr "type" "shift,branch,jump,call"))
|
99 |
|
|
"r10k_alu1")
|
100 |
|
|
|
101 |
|
|
(define_insn_reservation "r10k_int_cmove" 1
|
102 |
|
|
(and (eq_attr "cpu" "r10000")
|
103 |
|
|
(and (eq_attr "type" "condmove")
|
104 |
|
|
(eq_attr "mode" "SI,DI")))
|
105 |
|
|
"r10k_alu1")
|
106 |
|
|
|
107 |
|
|
|
108 |
|
|
;; Coprocessor Moves.
|
109 |
|
|
;; mtc1/dmtc1 are handled by ALU1.
|
110 |
|
|
;; mfc1/dmfc1 are handled by the fp-multiplier.
|
111 |
|
|
(define_insn_reservation "r10k_mt_xfer" 3
|
112 |
|
|
(and (eq_attr "cpu" "r10000")
|
113 |
|
|
(eq_attr "type" "mtc"))
|
114 |
|
|
"r10k_alu1")
|
115 |
|
|
|
116 |
|
|
(define_insn_reservation "r10k_mf_xfer" 2
|
117 |
|
|
(and (eq_attr "cpu" "r10000")
|
118 |
|
|
(eq_attr "type" "mfc"))
|
119 |
|
|
"r10k_fpmpy")
|
120 |
|
|
|
121 |
|
|
|
122 |
|
|
;; Only ALU2 does int multiplications and divisions.
|
123 |
|
|
;;
|
124 |
|
|
;; According to the Vr10000 series user manual,
|
125 |
|
|
;; integer mult and div insns can be issued one
|
126 |
|
|
;; cycle earlier if using register Lo. We model
|
127 |
|
|
;; this by using the Lo value by default, as it
|
128 |
|
|
;; is the more common value, and use a bypass
|
129 |
|
|
;; for the Hi value when needed.
|
130 |
|
|
;;
|
131 |
|
|
;; Also of note, There are different latencies
|
132 |
|
|
;; for MULT/DMULT (Lo 5/Hi 6) and MULTU/DMULTU (Lo 6/Hi 7).
|
133 |
|
|
;; However, gcc does not have separate types
|
134 |
|
|
;; for these insns. Thus to strike a balance,
|
135 |
|
|
;; we use the Hi latency value for imul
|
136 |
|
|
;; operations until the imul type can be split.
|
137 |
|
|
(define_insn_reservation "r10k_imul_single" 6
|
138 |
|
|
(and (eq_attr "cpu" "r10000")
|
139 |
|
|
(and (eq_attr "type" "imul,imul3")
|
140 |
|
|
(eq_attr "mode" "SI")))
|
141 |
|
|
"r10k_alu2 * 6")
|
142 |
|
|
|
143 |
|
|
(define_insn_reservation "r10k_imul_double" 10
|
144 |
|
|
(and (eq_attr "cpu" "r10000")
|
145 |
|
|
(and (eq_attr "type" "imul,imul3")
|
146 |
|
|
(eq_attr "mode" "DI")))
|
147 |
|
|
"r10k_alu2 * 10")
|
148 |
|
|
|
149 |
|
|
;; Divides keep ALU2 busy.
|
150 |
|
|
(define_insn_reservation "r10k_idiv_single" 34
|
151 |
|
|
(and (eq_attr "cpu" "r10000")
|
152 |
|
|
(and (eq_attr "type" "idiv")
|
153 |
|
|
(eq_attr "mode" "SI")))
|
154 |
|
|
"r10k_alu2 * 35")
|
155 |
|
|
|
156 |
|
|
(define_insn_reservation "r10k_idiv_double" 66
|
157 |
|
|
(and (eq_attr "cpu" "r10000")
|
158 |
|
|
(and (eq_attr "type" "idiv")
|
159 |
|
|
(eq_attr "mode" "DI")))
|
160 |
|
|
"r10k_alu2 * 67")
|
161 |
|
|
|
162 |
|
|
(define_bypass 35 "r10k_idiv_single" "r10k_mfhi")
|
163 |
|
|
(define_bypass 67 "r10k_idiv_double" "r10k_mfhi")
|
164 |
|
|
|
165 |
|
|
|
166 |
|
|
;; Floating point add/sub, mul, abs value, neg, comp, & moves.
|
167 |
|
|
(define_insn_reservation "r10k_fp_miscadd" 2
|
168 |
|
|
(and (eq_attr "cpu" "r10000")
|
169 |
|
|
(eq_attr "type" "fadd,fabs,fneg,fcmp"))
|
170 |
|
|
"r10k_fpadd")
|
171 |
|
|
|
172 |
|
|
(define_insn_reservation "r10k_fp_miscmul" 2
|
173 |
|
|
(and (eq_attr "cpu" "r10000")
|
174 |
|
|
(eq_attr "type" "fmul,fmove"))
|
175 |
|
|
"r10k_fpmpy")
|
176 |
|
|
|
177 |
|
|
(define_insn_reservation "r10k_fp_cmove" 2
|
178 |
|
|
(and (eq_attr "cpu" "r10000")
|
179 |
|
|
(and (eq_attr "type" "condmove")
|
180 |
|
|
(eq_attr "mode" "SF,DF")))
|
181 |
|
|
"r10k_fpmpy")
|
182 |
|
|
|
183 |
|
|
|
184 |
|
|
;; The fcvt.s.[wl] insn has latency 4, repeat 2.
|
185 |
|
|
;; All other fcvt insns have latency 2, repeat 1.
|
186 |
|
|
(define_insn_reservation "r10k_fcvt_single" 4
|
187 |
|
|
(and (eq_attr "cpu" "r10000")
|
188 |
|
|
(and (eq_attr "type" "fcvt")
|
189 |
|
|
(eq_attr "cnv_mode" "I2S")))
|
190 |
|
|
"r10k_fpadd * 2")
|
191 |
|
|
|
192 |
|
|
(define_insn_reservation "r10k_fcvt_other" 2
|
193 |
|
|
(and (eq_attr "cpu" "r10000")
|
194 |
|
|
(and (eq_attr "type" "fcvt")
|
195 |
|
|
(eq_attr "cnv_mode" "!I2S")))
|
196 |
|
|
"r10k_fpadd")
|
197 |
|
|
|
198 |
|
|
|
199 |
|
|
;; Run the fmadd insn through fp-adder first, then fp-multiplier.
|
200 |
|
|
;;
|
201 |
|
|
;; The latency for fmadd is 2 cycles if the result is used
|
202 |
|
|
;; by another fmadd instruction.
|
203 |
|
|
(define_insn_reservation "r10k_fmadd" 4
|
204 |
|
|
(and (eq_attr "cpu" "r10000")
|
205 |
|
|
(eq_attr "type" "fmadd"))
|
206 |
|
|
"r10k_fpadd, r10k_fpmpy")
|
207 |
|
|
|
208 |
|
|
(define_bypass 2 "r10k_fmadd" "r10k_fmadd")
|
209 |
|
|
|
210 |
|
|
|
211 |
|
|
;; Floating point Divisions & square roots.
|
212 |
|
|
(define_insn_reservation "r10k_fdiv_single" 12
|
213 |
|
|
(and (eq_attr "cpu" "r10000")
|
214 |
|
|
(and (eq_attr "type" "fdiv,frdiv")
|
215 |
|
|
(eq_attr "mode" "SF")))
|
216 |
|
|
"r10k_fpdiv * 14")
|
217 |
|
|
|
218 |
|
|
(define_insn_reservation "r10k_fdiv_double" 19
|
219 |
|
|
(and (eq_attr "cpu" "r10000")
|
220 |
|
|
(and (eq_attr "type" "fdiv,frdiv")
|
221 |
|
|
(eq_attr "mode" "DF")))
|
222 |
|
|
"r10k_fpdiv * 21")
|
223 |
|
|
|
224 |
|
|
(define_insn_reservation "r10k_fsqrt_single" 18
|
225 |
|
|
(and (eq_attr "cpu" "r10000")
|
226 |
|
|
(and (eq_attr "type" "fsqrt")
|
227 |
|
|
(eq_attr "mode" "SF")))
|
228 |
|
|
"r10k_fpsqrt * 20")
|
229 |
|
|
|
230 |
|
|
(define_insn_reservation "r10k_fsqrt_double" 33
|
231 |
|
|
(and (eq_attr "cpu" "r10000")
|
232 |
|
|
(and (eq_attr "type" "fsqrt")
|
233 |
|
|
(eq_attr "mode" "DF")))
|
234 |
|
|
"r10k_fpsqrt * 35")
|
235 |
|
|
|
236 |
|
|
(define_insn_reservation "r10k_frsqrt_single" 30
|
237 |
|
|
(and (eq_attr "cpu" "r10000")
|
238 |
|
|
(and (eq_attr "type" "frsqrt")
|
239 |
|
|
(eq_attr "mode" "SF")))
|
240 |
|
|
"r10k_fpsqrt * 20")
|
241 |
|
|
|
242 |
|
|
(define_insn_reservation "r10k_frsqrt_double" 52
|
243 |
|
|
(and (eq_attr "cpu" "r10000")
|
244 |
|
|
(and (eq_attr "type" "frsqrt")
|
245 |
|
|
(eq_attr "mode" "DF")))
|
246 |
|
|
"r10k_fpsqrt * 35")
|
247 |
|
|
|
248 |
|
|
|
249 |
|
|
;; Handle unknown/multi insns here (this is a guess).
|
250 |
|
|
(define_insn_reservation "r10k_unknown" 1
|
251 |
|
|
(and (eq_attr "cpu" "r10000")
|
252 |
|
|
(eq_attr "type" "unknown,multi"))
|
253 |
|
|
"r10k_alu1 + r10k_alu2")
|