OpenCores
URL https://opencores.org/ocsvn/scarts/scarts/trunk

Subversion Repositories scarts

[/] [scarts/] [trunk/] [toolchain/] [scarts-gcc/] [gcc-4.1.1/] [gcc/] [config/] [i386/] [k6.md] - Blame information for rev 20

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 12 jlechner
;; AMD K6/K6-2 Scheduling
2
;; Copyright (C) 2002, 2004
3
;; Free Software Foundation, Inc.
4
;;
5
;; This file is part of GCC.
6
;;
7
;; GCC is free software; you can redistribute it and/or modify
8
;; it under the terms of the GNU General Public License as published by
9
;; the Free Software Foundation; either version 2, or (at your option)
10
;; any later version.
11
;;
12
;; GCC is distributed in the hope that it will be useful,
13
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
;; GNU General Public License for more details.
16
;;
17
;; You should have received a copy of the GNU General Public License
18
;; along with GCC; see the file COPYING.  If not, write to
19
;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20
;; Boston, MA 02110-1301, USA.
21
;;
22
;; The K6 architecture is quite similar to PPro.  Important difference is
23
;; that there are only two decoders and they seems to be much slower than
24
;; any of the execution units.  So we have to pay much more attention to
25
;; proper scheduling for the decoders.
26
;; FIXME: We don't do that right now.  A good start would be to sort the
27
;;        instructions based on length.
28
;;
29
;; This description is based on data from the following documents:
30
;;
31
;;    "AMD-K6 Processor Data Sheet (Preliminary information)"
32
;;    Advanced Micro Devices, Inc., 1998.
33
;;
34
;;    "AMD-K6 Processor Code Optimization Application Note"
35
;;    Advanced Micro Devices, Inc., 2000.
36
;;
37
;; CPU execution units of the K6:
38
;;
39
;; store        describes the Store unit.  This unit is not modelled
40
;;              completely and it is only used to model lea operation.
41
;;              Otherwise it lies outside of any critical path.
42
;; load         describes the Load unit
43
;; alux         describes the Integer X unit
44
;; mm           describes the Multimedia unit, which shares a pipe
45
;;              with the Integer X unit.  This unit is used for MMX,
46
;;              which is not implemented for K6.
47
;; aluy         describes the Integer Y unit
48
;; fpu          describes the FPU unit
49
;; branch       describes the Branch unit
50
;;
51
;; The fp unit is not pipelined, and it can only do one operation per two
52
;; cycles, including fxcg.
53
;;
54
;; Generally this is a very poor description, but at least no worse than
55
;; the old description, and a lot easier to extend to something more
56
;; reasonable if anyone still cares enough about this architecture in 2004.
57
;;
58
;; ??? fxch isn't handled; not an issue until sched3 after reg-stack is real.
59
 
60
(define_automaton "k6_decoder,k6_load_unit,k6_store_unit,k6_integer_units,k6_fpu_unit,k6_branch_unit")
61
 
62
;; The K6 instruction decoding begins before the on-chip instruction cache is
63
;; filled.  Depending on the length of the instruction, two simple instructions
64
;; can be decoded in two parallel short decoders, or one complex instruction can
65
;; be decoded in either the long or the vector decoder.  For all practical
66
;; purposes, the long and vector decoder can be modelled as one decoder.
67
(define_cpu_unit "k6_decode_short0" "k6_decoder")
68
(define_cpu_unit "k6_decode_short1" "k6_decoder")
69
(define_cpu_unit "k6_decode_long" "k6_decoder")
70
(exclusion_set "k6_decode_long" "k6_decode_short0,k6_decode_short1")
71
(define_reservation "k6_decode_short" "k6_decode_short0|k6_decode_short1")
72
(define_reservation "k6_decode_vector" "k6_decode_long")
73
 
74
(define_cpu_unit "k6_store" "k6_store_unit")
75
(define_cpu_unit "k6_load" "k6_load_unit")
76
(define_cpu_unit "k6_alux,k6_aluy" "k6_integer_units")
77
(define_cpu_unit "k6_fpu" "k6_fpu_unit")
78
(define_cpu_unit "k6_branch" "k6_branch_unit")
79
 
80
;; Shift instructions and certain arithmetic are issued only on Integer X.
81
(define_insn_reservation "k6_alux_only" 1
82
                         (and (eq_attr "cpu" "k6")
83
                              (and (eq_attr "type" "ishift,ishift1,rotate,rotate1,alu1,negnot,cld")
84
                                   (eq_attr "memory" "none")))
85
                         "k6_decode_short,k6_alux")
86
 
87
(define_insn_reservation "k6_alux_only_load" 3
88
                         (and (eq_attr "cpu" "k6")
89
                               (and (eq_attr "type" "ishift,ishift1,rotate,rotate1,alu1,negnot,cld")
90
                                    (eq_attr "memory" "load")))
91
                         "k6_decode_short,k6_load,k6_alux")
92
 
93
(define_insn_reservation "k6_alux_only_store" 3
94
                         (and (eq_attr "cpu" "k6")
95
                               (and (eq_attr "type" "ishift,ishift1,rotate,rotate1,alu1,negnot,cld")
96
                                    (eq_attr "memory" "store,both,unknown")))
97
                         "k6_decode_long,k6_load,k6_alux,k6_store")
98
 
99
;; Integer divide and multiply can only be issued on Integer X, too.
100
(define_insn_reservation "k6_alu_imul" 2
101
                         (and (eq_attr "cpu" "k6")
102
                              (eq_attr "type" "imul"))
103
                         "k6_decode_vector,k6_alux*3")
104
 
105
(define_insn_reservation "k6_alu_imul_load" 4
106
                         (and (eq_attr "cpu" "k6")
107
                              (and (eq_attr "type" "imul")
108
                                   (eq_attr "memory" "load")))
109
                         "k6_decode_vector,k6_load,k6_alux*3")
110
 
111
(define_insn_reservation "k6_alu_imul_store" 4
112
                         (and (eq_attr "cpu" "k6")
113
                              (and (eq_attr "type" "imul")
114
                                   (eq_attr "memory" "store,both,unknown")))
115
                         "k6_decode_vector,k6_load,k6_alux*3,k6_store")
116
 
117
;; ??? Guessed latencies based on the old pipeline description.
118
(define_insn_reservation "k6_alu_idiv" 17
119
                         (and (eq_attr "cpu" "k6")
120
                              (and (eq_attr "type" "idiv")
121
                                   (eq_attr "memory" "none")))
122
                         "k6_decode_vector,k6_alux*17")
123
 
124
(define_insn_reservation "k6_alu_idiv_mem" 19
125
                         (and (eq_attr "cpu" "k6")
126
                              (and (eq_attr "type" "idiv")
127
                                   (eq_attr "memory" "!none")))
128
                         "k6_decode_vector,k6_load,k6_alux*17")
129
 
130
;; Basic word and doubleword ALU ops can be issued on both Integer units.
131
(define_insn_reservation "k6_alu" 1
132
                         (and (eq_attr "cpu" "k6")
133
                              (and (eq_attr "type" "alu,alu1,negnot,icmp,test,imovx,incdec,setcc")
134
                                   (eq_attr "memory" "none")))
135
                         "k6_decode_short,k6_alux|k6_aluy")
136
 
137
(define_insn_reservation "k6_alu_load" 3
138
                         (and (eq_attr "cpu" "k6")
139
                              (and (eq_attr "type" "alu,alu1,negnot,icmp,test,imovx,incdec,setcc")
140
                                   (eq_attr "memory" "load")))
141
                         "k6_decode_short,k6_load,k6_alux|k6_aluy")
142
 
143
(define_insn_reservation "k6_alu_store" 3
144
                         (and (eq_attr "cpu" "k6")
145
                              (and (eq_attr "type" "alu,alu1,negnot,icmp,test,imovx,incdec,setcc")
146
                                   (eq_attr "memory" "store,both,unknown")))
147
                         "k6_decode_long,k6_load,k6_alux|k6_aluy,k6_store")
148
 
149
;; A "load immediate" operation does not require execution at all,
150
;; it is available immediately after decoding.  Special-case this.
151
(define_insn_reservation "k6_alu_imov" 1
152
                         (and (eq_attr "cpu" "k6")
153
                              (and (eq_attr "type" "imov")
154
                                   (and (eq_attr "memory" "none")
155
                                        (match_operand 1 "nonimmediate_operand"))))
156
                         "k6_decode_short,k6_alux|k6_aluy")
157
 
158
(define_insn_reservation "k6_alu_imov_imm" 0
159
                         (and (eq_attr "cpu" "k6")
160
                              (and (eq_attr "type" "imov")
161
                                   (and (eq_attr "memory" "none")
162
                                        (match_operand 1 "immediate_operand"))))
163
                         "k6_decode_short")
164
 
165
(define_insn_reservation "k6_alu_imov_load" 2
166
                         (and (eq_attr "cpu" "k6")
167
                              (and (eq_attr "type" "imov")
168
                                   (eq_attr "memory" "load")))
169
                         "k6_decode_short,k6_load")
170
 
171
(define_insn_reservation "k6_alu_imov_store" 1
172
                         (and (eq_attr "cpu" "k6")
173
                              (and (eq_attr "type" "imov")
174
                                   (eq_attr "memory" "store")))
175
                         "k6_decode_short,k6_store")
176
 
177
(define_insn_reservation "k6_alu_imov_both" 2
178
                         (and (eq_attr "cpu" "k6")
179
                              (and (eq_attr "type" "imov")
180
                                   (eq_attr "memory" "both,unknown")))
181
                         "k6_decode_long,k6_load,k6_alux|k6_aluy")
182
 
183
;; The branch unit.
184
(define_insn_reservation "k6_branch_call" 1
185
                         (and (eq_attr "cpu" "k6")
186
                              (eq_attr "type" "call,callv"))
187
                         "k6_decode_vector,k6_branch")
188
 
189
(define_insn_reservation "k6_branch_branch" 1
190
                         (and (eq_attr "cpu" "k6")
191
                              (eq_attr "type" "ibr"))
192
                         "k6_decode_short,k6_branch")
193
 
194
;; The load and units have two pipeline stages.  The load latency is
195
;; two cycles.
196
(define_insn_reservation "k6_load_pop" 3
197
                         (and (eq_attr "cpu" "k6")
198
                              (ior (eq_attr "type" "pop")
199
                                   (eq_attr "memory" "load,both")))
200
                         "k6_decode_short,k6_load")
201
 
202
(define_insn_reservation "k6_load_leave" 5
203
                         (and (eq_attr "cpu" "k6")
204
                              (eq_attr "type" "leave"))
205
                         "k6_decode_long,k6_load,(k6_alux|k6_aluy)*2")
206
 
207
;; ??? From the old pipeline description.  Egad!
208
;; ??? Apparently we take care of this reservation in adjust_cost.
209
(define_insn_reservation "k6_load_str" 10
210
                         (and (eq_attr "cpu" "k6")
211
                              (and (eq_attr "type" "str")
212
                                   (eq_attr "memory" "load,both")))
213
                         "k6_decode_vector,k6_load*10")
214
 
215
;; The store unit handles lea and push.  It is otherwise unmodelled.
216
(define_insn_reservation "k6_store_lea" 2
217
                         (and (eq_attr "cpu" "k6")
218
                              (eq_attr "type" "lea"))
219
                         "k6_decode_short,k6_store,k6_alux|k6_aluy")
220
 
221
(define_insn_reservation "k6_store_push" 2
222
                         (and (eq_attr "cpu" "k6")
223
                              (ior (eq_attr "type" "push")
224
                                   (eq_attr "memory" "store,both")))
225
                         "k6_decode_short,k6_store")
226
 
227
(define_insn_reservation "k6_store_str" 10
228
                         (and (eq_attr "cpu" "k6")
229
                              (eq_attr "type" "str"))
230
                         "k6_store*10")
231
 
232
;; Most FPU instructions have latency 2 and throughput 2.
233
(define_insn_reservation "k6_fpu" 2
234
                         (and (eq_attr "cpu" "k6")
235
                              (and (eq_attr "type" "fop,fmov,fcmp,fistp")
236
                                   (eq_attr "memory" "none")))
237
                         "k6_decode_vector,k6_fpu*2")
238
 
239
(define_insn_reservation "k6_fpu_load" 6
240
                         (and (eq_attr "cpu" "k6")
241
                              (and (eq_attr "type" "fop,fmov,fcmp,fistp")
242
                                   (eq_attr "memory" "load,both")))
243
                         "k6_decode_short,k6_load,k6_fpu*2")
244
 
245
(define_insn_reservation "k6_fpu_store" 6
246
                         (and (eq_attr "cpu" "k6")
247
                              (and (eq_attr "type" "fop,fmov,fcmp,fistp")
248
                                   (eq_attr "memory" "store")))
249
                         "k6_decode_short,k6_store,k6_fpu*2")
250
 
251
(define_insn_reservation "k6_fpu_fmul" 2
252
                         (and (eq_attr "cpu" "k6")
253
                              (and (eq_attr "type" "fmul")
254
                                   (eq_attr "memory" "none")))
255
                         "k6_decode_short,k6_fpu*2")
256
 
257
(define_insn_reservation "k6_fpu_fmul_load" 2
258
                         (and (eq_attr "cpu" "k6")
259
                              (and (eq_attr "type" "fmul")
260
                                   (eq_attr "memory" "load,both")))
261
                         "k6_decode_short,k6_load,k6_fpu*2")
262
 
263
;; ??? Guessed latencies from the old pipeline description.
264
(define_insn_reservation "k6_fpu_expensive" 56
265
                         (and (eq_attr "cpu" "k6")
266
                              (eq_attr "type" "fdiv,fpspc"))
267
                         "k6_decode_short,k6_fpu*56")
268
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.