OpenCores
URL https://opencores.org/ocsvn/openrisc_2011-10-31/openrisc_2011-10-31/trunk

Subversion Repositories openrisc_2011-10-31

[/] [openrisc/] [trunk/] [gnu-src/] [gcc-4.2.2/] [gcc/] [config/] [mips/] [sr71k.md] - Blame information for rev 38

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 38 julius
;; .........................
2
;;
3
;; DFA-based pipeline description for Sandcraft SR3 (MIPS64 based)
4
;;
5
;; The SR3 is described as:
6
;;     - nine-stage pipeline, insn buffering with out-of-order issue to
7
;;       multiple function units, with an average dispatch rate of 2
8
;;       insn.s per cycle (max 6 insns: 2 fpu, 4 cpu).
9
;;
10
;;  The details on this are scant except for a diagram in
11
;;  Chap. 6 of Rev. 1.0 SR3 Spec.
12
;;
13
;;  The model employed below is designed to closely approximate the
14
;;  published latencies. Emulation of out-of-order issue and the insn
15
;;  buffering is done via a VLIW dispatch style (with a packing of 6 insns);
16
;;  the function unit reservations restrictions (define_*_set) are
17
;;  contrived to support published timings.
18
;;
19
;; Reference:
20
;;   "SR3 Microprocessor Specification, System development information,"
21
;;   Revision 1.0, 13 December 2000.
22
;;
23
;;
24
;; Reservation model is based on:
25
;;   1) Figure 6-1, from the 1.0 specification.
26
;;   2) Chapter 19, from the 1.0 specification.
27
;;   3) following questions(Red Hat)/answers(Sandcraft):
28
;;     RH> From Section 19.1
29
;;     RH>      1) In terms of figure 6-1, are all the instructions in
30
;;     RH>         table 19-1 restricted
31
;;     RH>         to ALUx? When ALUx is not in use for an instruction in table;;     RH>          19-1 is
32
;;     RH>         it fully compatible with all insns that issue to ALUy?
33
;;
34
;;     Yes, all the instructions in Table 19-1 only go to ALUX, and all the
35
;;     instructions that can be issued to ALUY can also be issued to ALUX.
36
;;
37
;;
38
;;     RH> From Section 19.2
39
;;     RH>      2) Explain conditional moves execution path (in terms of
40
;;     RH>      figure 6-1)
41
;;
42
;;     Conditional move of integer registers (based on floating point condition
43
;;     codes or integer register value) go to ALUX or ALUY.
44
;;
45
;;     RH>      3) Explain floating point store execution path (in terms of
46
;;     RH>      figure 6-1)
47
;;
48
;;     Floating point stores go to Ld/St and go to MOV in the floating point
49
;;     pipeline.
50
;;
51
;;     Floating point loads go to Ld/St and go to LOAD in the floating point
52
;;     pipeline.
53
;;
54
;;     RH>      4) Explain branch on floating condition (in terms of figure 6-1);;
55
;;     Branch on floating condition go to BRU.
56
;;
57
;;     RH>      5) Is the column for single RECIP instruction latency correct?
58
;;     RH>      What about for RSQRT single and double?
59
;;
60
;;     The latency/repeat for RECIP and RSQRT are correct.
61
;;
62
 
63
;;
64
;; Use four automata to isolate long latency operations, and to
65
;; reduce the complexity of cpu+fpu, reducing space.
66
;;
67
(define_automaton "sr71_cpu, sr71_cpu1, sr71_cp1, sr71_cp2, sr71_fextra, sr71_imacc")
68
 
69
;;  feeders for CPU function units and feeders for fpu (CP1 interface)
70
(define_cpu_unit "sr_iss0,sr_iss1,sr_iss2,sr_iss3,sr_iss4,sr_iss5" "sr71_cpu")
71
 
72
;; CPU function units
73
(define_cpu_unit "ipu_bru"       "sr71_cpu1")
74
(define_cpu_unit "ipu_alux"      "sr71_cpu1")
75
(define_cpu_unit "ipu_aluy"      "sr71_cpu1")
76
(define_cpu_unit "ipu_ldst"      "sr71_cpu1")
77
(define_cpu_unit "ipu_macc_iter" "sr71_imacc")
78
 
79
 
80
;; Floating-point unit (Co-processor interface 1).
81
(define_cpu_unit "fpu_mov"          "sr71_cp1")
82
(define_cpu_unit "fpu_load"         "sr71_cp1")
83
(define_cpu_unit "fpu_fpu"          "sr71_cp2")
84
 
85
;; fictitous unit to track long float insns with separate automaton
86
(define_cpu_unit "fpu_iter"         "sr71_fextra")
87
 
88
 
89
;;
90
;; Define common execution path (reservation) combinations
91
;;
92
 
93
;;
94
(define_reservation "cpu_iss"         "sr_iss0|sr_iss1|sr_iss2|sr_iss3")
95
 
96
;; two cycles are used for instruction using the fpu as it runs
97
;; at half the clock speed of the cpu. By adding an extra cycle
98
;; to the issue units, the default/minimum "repeat" dispatch delay is
99
;; accounted for all insn.s
100
(define_reservation "cp1_iss"         "(sr_iss4*2)|(sr_iss5*2)")
101
 
102
(define_reservation "serial_dispatch" "sr_iss0+sr_iss1+sr_iss2+sr_iss3+sr_iss4+sr_iss5")
103
 
104
;; Simulate a 6 insn VLIW dispatch, 1 cycle in dispatch followed by
105
;; reservation of function unit.
106
(define_reservation "ri_insns"         "cpu_iss,(ipu_alux|ipu_aluy)")
107
(define_reservation "ri_mem"           "cpu_iss,ipu_ldst")
108
(define_reservation "ri_alux"          "cpu_iss,ipu_alux")
109
(define_reservation "ri_branch"        "cpu_iss,ipu_bru")
110
 
111
(define_reservation "rf_insn"          "cp1_iss,fpu_fpu")
112
(define_reservation "rf_ldmem"         "cp1_iss,fpu_load")
113
 
114
; simultaneous reservation of pseudo-unit keeps cp1 fpu tied
115
; up until long cycle insn is finished...
116
(define_reservation "rf_multi1"        "rf_insn+fpu_iter")
117
 
118
;;
119
;; The ordering of the instruction-execution-path/resource-usage
120
;; descriptions (also known as reservation RTL) is roughly ordered
121
;; based on the define attribute RTL for the "type" classification.
122
;; When modifying, remember that the first test that matches is the
123
;; reservation used!
124
;;
125
 
126
 
127
(define_insn_reservation "ir_sr70_unknown" 1
128
  (and (eq_attr "cpu" "sr71000")
129
       (eq_attr "type" "unknown"))
130
  "serial_dispatch")
131
 
132
 
133
;; Assume prediction fails.
134
(define_insn_reservation "ir_sr70_branch" 6
135
  (and (eq_attr "cpu" "sr71000")
136
       (eq_attr "type" "branch,jump,call"))
137
  "ri_branch")
138
 
139
(define_insn_reservation "ir_sr70_load" 2
140
  (and (eq_attr "cpu" "sr71000")
141
       (eq_attr "type" "load"))
142
  "ri_mem")
143
 
144
(define_insn_reservation "ir_sr70_store" 1
145
  (and (eq_attr "cpu" "sr71000")
146
       (eq_attr "type" "store"))
147
  "ri_mem")
148
 
149
 
150
;;
151
;; float loads/stores flow through both cpu and cp1...
152
;;
153
(define_insn_reservation "ir_sr70_fload" 9
154
  (and (eq_attr "cpu" "sr71000")
155
       (eq_attr "type" "fpload,fpidxload"))
156
  "(cpu_iss+cp1_iss),(ri_mem+rf_ldmem)")
157
 
158
(define_insn_reservation "ir_sr70_fstore" 1
159
  (and (eq_attr "cpu" "sr71000")
160
       (eq_attr "type" "fpstore,fpidxstore"))
161
  "(cpu_iss+cp1_iss),(fpu_mov+ri_mem)")
162
 
163
 
164
;; This reservation is for conditional move based on integer
165
;; or floating point CC.
166
(define_insn_reservation "ir_sr70_condmove" 4
167
  (and (eq_attr "cpu" "sr71000")
168
       (eq_attr "type" "condmove"))
169
  "ri_insns")
170
 
171
;; Try to discriminate move-from-cp1 versus move-to-cp1 as latencies
172
;; are different. Like float load/store, these insns use multiple
173
;; resources simultaneously
174
(define_insn_reservation "ir_sr70_xfer_from" 6
175
  (and (eq_attr "cpu" "sr71000")
176
       (and (eq_attr "type" "xfer")
177
            (eq_attr "mode" "!SF,DF,FPSW")))
178
  "(cpu_iss+cp1_iss),(fpu_mov+ri_mem)")
179
 
180
(define_insn_reservation "ir_sr70_xfer_to" 9
181
  (and (eq_attr "cpu" "sr71000")
182
       (and (eq_attr "type" "xfer")
183
            (eq_attr "mode" "SF,DF")))
184
  "(cpu_iss+cp1_iss),(ri_mem+rf_ldmem)")
185
 
186
(define_insn_reservation "ir_sr70_hilo" 1
187
  (and (eq_attr "cpu" "sr71000")
188
       (eq_attr "type" "mthilo,mfhilo"))
189
  "ri_insns")
190
 
191
(define_insn_reservation "ir_sr70_arith" 1
192
  (and (eq_attr "cpu" "sr71000")
193
       (eq_attr "type" "arith,shift,slt,clz,const,trap"))
194
  "ri_insns")
195
 
196
;; emulate repeat (dispatch stall) by spending extra cycle(s) in
197
;; in iter unit
198
(define_insn_reservation "ir_sr70_imul_si" 4
199
  (and (eq_attr "cpu" "sr71000")
200
       (and (eq_attr "type" "imul,imul3,imadd")
201
            (eq_attr "mode" "SI")))
202
  "ri_alux,ipu_alux,ipu_macc_iter")
203
 
204
(define_insn_reservation "ir_sr70_imul_di" 6
205
  (and (eq_attr "cpu" "sr71000")
206
       (and (eq_attr "type" "imul,imul3,imadd")
207
            (eq_attr "mode" "DI")))
208
  "ri_alux,ipu_alux,(ipu_macc_iter*3)")
209
 
210
;; Divide algorithm is early out with best latency of 7 pcycles.
211
;; Use worst case for scheduling purposes.
212
(define_insn_reservation "ir_sr70_idiv_si" 41
213
  (and (eq_attr "cpu" "sr71000")
214
       (and (eq_attr "type" "idiv")
215
            (eq_attr "mode" "SI")))
216
  "ri_alux,ipu_alux,(ipu_macc_iter*38)")
217
 
218
(define_insn_reservation "ir_sr70_idiv_di" 73
219
  (and (eq_attr "cpu" "sr71000")
220
       (and (eq_attr "type" "idiv")
221
            (eq_attr "mode" "DI")))
222
  "ri_alux,ipu_alux,(ipu_macc_iter*70)")
223
 
224
;; extra reservations of fpu_fpu are for repeat latency
225
(define_insn_reservation "ir_sr70_fadd_sf" 8
226
  (and (eq_attr "cpu" "sr71000")
227
       (and (eq_attr "type" "fadd")
228
            (eq_attr "mode" "SF")))
229
  "rf_insn,fpu_fpu")
230
 
231
(define_insn_reservation "ir_sr70_fadd_df" 10
232
  (and (eq_attr "cpu" "sr71000")
233
       (and (eq_attr "type" "fadd")
234
            (eq_attr "mode" "DF")))
235
  "rf_insn,fpu_fpu")
236
 
237
;; Latencies for MADD,MSUB, NMADD, NMSUB assume the Multiply is fused
238
;; with the sub or add.
239
(define_insn_reservation "ir_sr70_fmul_sf" 8
240
  (and (eq_attr "cpu" "sr71000")
241
       (and (eq_attr "type" "fmul,fmadd")
242
            (eq_attr "mode" "SF")))
243
  "rf_insn,fpu_fpu")
244
 
245
;; tie up the fpu unit to emulate the balance for the "repeat
246
;; rate" of 8 (2 are spent in the iss unit)
247
(define_insn_reservation "ir_sr70_fmul_df" 16
248
  (and (eq_attr "cpu" "sr71000")
249
       (and (eq_attr "type" "fmul,fmadd")
250
            (eq_attr "mode" "DF")))
251
  "rf_insn,fpu_fpu*6")
252
 
253
 
254
;; RECIP insn uses same type attr as div, and for SR3, has same
255
;; timings for double. However, single RECIP has a latency of
256
;; 28 -- only way to fix this is to introduce new insn attrs.
257
;; cycles spent in iter unit are designed to satisfy balance
258
;; of "repeat" latency after insn uses up rf_multi1 reservation
259
(define_insn_reservation "ir_sr70_fdiv_sf" 60
260
  (and (eq_attr "cpu" "sr71000")
261
       (and (eq_attr "type" "fdiv,frdiv")
262
            (eq_attr "mode" "SF")))
263
  "rf_multi1+(fpu_iter*51)")
264
 
265
(define_insn_reservation "ir_sr70_fdiv_df" 120
266
  (and (eq_attr "cpu" "sr71000")
267
       (and (eq_attr "type" "fdiv,frdiv")
268
            (eq_attr "mode" "DF")))
269
  "rf_multi1+(fpu_iter*109)")
270
 
271
(define_insn_reservation "ir_sr70_fabs" 4
272
  (and (eq_attr "cpu" "sr71000")
273
       (eq_attr "type" "fabs,fneg,fmove"))
274
  "rf_insn,fpu_fpu")
275
 
276
(define_insn_reservation "ir_sr70_fcmp" 10
277
  (and (eq_attr "cpu" "sr71000")
278
       (eq_attr "type" "fcmp"))
279
  "rf_insn,fpu_fpu")
280
 
281
;; "fcvt" type attribute covers a number of diff insns, most have the same
282
;; latency descriptions, a few vary. We use the
283
;; most common timing (which is also worst case).
284
(define_insn_reservation "ir_sr70_fcvt" 12
285
  (and (eq_attr "cpu" "sr71000")
286
       (eq_attr "type" "fcvt"))
287
  "rf_insn,fpu_fpu*4")
288
 
289
(define_insn_reservation "ir_sr70_fsqrt_sf" 62
290
  (and (eq_attr "cpu" "sr71000")
291
       (and (eq_attr "type" "fsqrt")
292
            (eq_attr "mode" "SF")))
293
  "rf_multi1+(fpu_iter*53)")
294
 
295
(define_insn_reservation "ir_sr70_fsqrt_df" 122
296
  (and (eq_attr "cpu" "sr71000")
297
       (and (eq_attr "type" "fsqrt")
298
            (eq_attr "mode" "DF")))
299
  "rf_multi1+(fpu_iter*111)")
300
 
301
(define_insn_reservation "ir_sr70_frsqrt_sf" 48
302
  (and (eq_attr "cpu" "sr71000")
303
       (and (eq_attr "type" "frsqrt")
304
            (eq_attr "mode" "SF")))
305
  "rf_multi1+(fpu_iter*39)")
306
 
307
(define_insn_reservation "ir_sr70_frsqrt_df" 240
308
  (and (eq_attr "cpu" "sr71000")
309
       (and (eq_attr "type" "frsqrt")
310
            (eq_attr "mode" "DF")))
311
  "rf_multi1+(fpu_iter*229)")
312
 
313
(define_insn_reservation "ir_sr70_multi" 1
314
  (and (eq_attr "cpu" "sr71000")
315
       (eq_attr "type" "multi"))
316
  "serial_dispatch")
317
 
318
(define_insn_reservation "ir_sr70_nop" 1
319
  (and (eq_attr "cpu" "sr71000")
320
       (eq_attr "type" "nop"))
321
  "ri_insns")

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.