| 1 | 282 | jeremybenn | ;; DFA scheduling description for SH4.
 | 
      
         | 2 |  |  | ;; Copyright (C) 2004, 2006, 2007 Free Software Foundation, Inc.
 | 
      
         | 3 |  |  |  
 | 
      
         | 4 |  |  | ;; This file is part of GCC.
 | 
      
         | 5 |  |  |  
 | 
      
         | 6 |  |  | ;; GCC is free software; you can redistribute it and/or modify
 | 
      
         | 7 |  |  | ;; it under the terms of the GNU General Public License as published by
 | 
      
         | 8 |  |  | ;; the Free Software Foundation; either version 3, or (at your option)
 | 
      
         | 9 |  |  | ;; any later version.
 | 
      
         | 10 |  |  |  
 | 
      
         | 11 |  |  | ;; GCC is distributed in the hope that it will be useful,
 | 
      
         | 12 |  |  | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
      
         | 13 |  |  | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
      
         | 14 |  |  | ;; GNU General Public License for more details.
 | 
      
         | 15 |  |  |  
 | 
      
         | 16 |  |  | ;; You should have received a copy of the GNU General Public License
 | 
      
         | 17 |  |  | ;; along with GCC; see the file COPYING3.  If not see
 | 
      
         | 18 |  |  | ;; .
 | 
      
         | 19 |  |  |  
 | 
      
         | 20 |  |  | ;; Load and store instructions save a cycle if they are aligned on a
 | 
      
         | 21 |  |  | ;; four byte boundary.  Using a function unit for stores encourages
 | 
      
         | 22 |  |  | ;; gcc to separate load and store instructions by one instruction,
 | 
      
         | 23 |  |  | ;; which makes it more likely that the linker will be able to word
 | 
      
         | 24 |  |  | ;; align them when relaxing.
 | 
      
         | 25 |  |  |  
 | 
      
         | 26 |  |  | ;; The following description models the SH4 pipeline using the DFA based
 | 
      
         | 27 |  |  | ;; scheduler.  The DFA based description is better way to model a
 | 
      
         | 28 |  |  | ;; superscalar pipeline as compared to function unit reservation model.
 | 
      
         | 29 |  |  | ;; 1. The function unit based model is oriented to describe at most one
 | 
      
         | 30 |  |  | ;;    unit reservation by each insn. It is difficult to model unit reservations
 | 
      
         | 31 |  |  | ;;    in multiple pipeline units by same insn.  This can be done using DFA
 | 
      
         | 32 |  |  | ;;    based description.
 | 
      
         | 33 |  |  | ;; 2. The execution performance of DFA based scheduler does not depend on
 | 
      
         | 34 |  |  | ;;    processor complexity.
 | 
      
         | 35 |  |  | ;; 3. Writing all unit reservations for an instruction class is a more natural
 | 
      
         | 36 |  |  | ;;    description of the pipeline and makes the interface to the hazard
 | 
      
         | 37 |  |  | ;;    recognizer simpler than the old function unit based model.
 | 
      
         | 38 |  |  | ;; 4. The DFA model is richer and is a part of greater overall framework
 | 
      
         | 39 |  |  | ;;    of RCSP.
 | 
      
         | 40 |  |  |  
 | 
      
         | 41 |  |  |  
 | 
      
         | 42 |  |  | ;; Two automata are defined to reduce number of states
 | 
      
         | 43 |  |  | ;; which a single large automaton will have. (Factoring)
 | 
      
         | 44 |  |  |  
 | 
      
         | 45 |  |  | (define_automaton "inst_pipeline,fpu_pipe")
 | 
      
         | 46 |  |  |  
 | 
      
         | 47 |  |  | ;; This unit is basically the decode unit of the processor.
 | 
      
         | 48 |  |  | ;; Since SH4 is a dual issue machine,it is as if there are two
 | 
      
         | 49 |  |  | ;; units so that any insn can be processed by either one
 | 
      
         | 50 |  |  | ;; of the decoding unit.
 | 
      
         | 51 |  |  |  
 | 
      
         | 52 |  |  | (define_cpu_unit "pipe_01,pipe_02" "inst_pipeline")
 | 
      
         | 53 |  |  |  
 | 
      
         | 54 |  |  |  
 | 
      
         | 55 |  |  | ;; The fixed point arithmetic calculator(?? EX Unit).
 | 
      
         | 56 |  |  |  
 | 
      
         | 57 |  |  | (define_cpu_unit  "int" "inst_pipeline")
 | 
      
         | 58 |  |  |  
 | 
      
         | 59 |  |  | ;; f1_1 and f1_2 are floating point units.Actually there is
 | 
      
         | 60 |  |  | ;; a f1 unit which can overlap with other f1 unit but
 | 
      
         | 61 |  |  | ;; not another F1 unit.It is as though there were two
 | 
      
         | 62 |  |  | ;; f1 units.
 | 
      
         | 63 |  |  |  
 | 
      
         | 64 |  |  | (define_cpu_unit "f1_1,f1_2" "fpu_pipe")
 | 
      
         | 65 |  |  |  
 | 
      
         | 66 |  |  | ;; The floating point units (except FS - F2 always precedes it.)
 | 
      
         | 67 |  |  |  
 | 
      
         | 68 |  |  | (define_cpu_unit "F0,F1,F2,F3" "fpu_pipe")
 | 
      
         | 69 |  |  |  
 | 
      
         | 70 |  |  | ;; This is basically the MA unit of SH4
 | 
      
         | 71 |  |  | ;; used in LOAD/STORE pipeline.
 | 
      
         | 72 |  |  |  
 | 
      
         | 73 |  |  | (define_cpu_unit "memory" "inst_pipeline")
 | 
      
         | 74 |  |  |  
 | 
      
         | 75 |  |  | ;; However, there are LS group insns that don't use it, even ones that
 | 
      
         | 76 |  |  | ;; complete in 0 cycles.  So we use an extra unit for the issue of LS insns.
 | 
      
         | 77 |  |  | (define_cpu_unit "load_store" "inst_pipeline")
 | 
      
         | 78 |  |  |  
 | 
      
         | 79 |  |  | ;; The address calculator used for branch instructions.
 | 
      
         | 80 |  |  | ;; This will be reserved after "issue" of branch instructions
 | 
      
         | 81 |  |  | ;; and this is to make sure that no two branch instructions
 | 
      
         | 82 |  |  | ;; can be issued in parallel.
 | 
      
         | 83 |  |  |  
 | 
      
         | 84 |  |  | (define_cpu_unit "pcr_addrcalc" "inst_pipeline")
 | 
      
         | 85 |  |  |  
 | 
      
         | 86 |  |  | ;; ----------------------------------------------------
 | 
      
         | 87 |  |  | ;; This reservation is to simplify the dual issue description.
 | 
      
         | 88 |  |  |  
 | 
      
         | 89 |  |  | (define_reservation  "issue"  "pipe_01|pipe_02")
 | 
      
         | 90 |  |  |  
 | 
      
         | 91 |  |  | ;; This is to express the locking of D stage.
 | 
      
         | 92 |  |  | ;; Note that the issue of a CO group insn also effectively locks the D stage.
 | 
      
         | 93 |  |  |  
 | 
      
         | 94 |  |  | (define_reservation  "d_lock" "pipe_01+pipe_02")
 | 
      
         | 95 |  |  |  
 | 
      
         | 96 |  |  | ;; Every FE instruction but fipr / ftrv starts with issue and this.
 | 
      
         | 97 |  |  | (define_reservation "F01" "F0+F1")
 | 
      
         | 98 |  |  |  
 | 
      
         | 99 |  |  | ;; This is to simplify description where F1,F2,FS
 | 
      
         | 100 |  |  | ;; are used simultaneously.
 | 
      
         | 101 |  |  |  
 | 
      
         | 102 |  |  | (define_reservation "fpu" "F1+F2")
 | 
      
         | 103 |  |  |  
 | 
      
         | 104 |  |  | ;; This is to highlight the fact that f1
 | 
      
         | 105 |  |  | ;; cannot overlap with F1.
 | 
      
         | 106 |  |  |  
 | 
      
         | 107 |  |  | (exclusion_set  "f1_1,f1_2" "F1")
 | 
      
         | 108 |  |  |  
 | 
      
         | 109 |  |  | (define_insn_reservation "nil" 0 (eq_attr "type" "nil") "nothing")
 | 
      
         | 110 |  |  |  
 | 
      
         | 111 |  |  | ;; Although reg moves have a latency of zero
 | 
      
         | 112 |  |  | ;; we need to highlight that they use D stage
 | 
      
         | 113 |  |  | ;; for one cycle.
 | 
      
         | 114 |  |  |  
 | 
      
         | 115 |  |  | ;; Group:       MT
 | 
      
         | 116 |  |  |  
 | 
      
         | 117 |  |  | (define_insn_reservation "reg_mov" 0
 | 
      
         | 118 |  |  |   (and (eq_attr "pipe_model" "sh4")
 | 
      
         | 119 |  |  |        (eq_attr "type" "move"))
 | 
      
         | 120 |  |  |   "issue")
 | 
      
         | 121 |  |  |  
 | 
      
         | 122 |  |  | ;; Group:       LS
 | 
      
         | 123 |  |  |  
 | 
      
         | 124 |  |  | (define_insn_reservation "freg_mov" 0
 | 
      
         | 125 |  |  |   (and (eq_attr "pipe_model" "sh4")
 | 
      
         | 126 |  |  |        (eq_attr "type" "fmove"))
 | 
      
         | 127 |  |  |   "issue+load_store")
 | 
      
         | 128 |  |  |  
 | 
      
         | 129 |  |  | ;; We don't model all pipeline stages; we model the issue ('D') stage
 | 
      
         | 130 |  |  | ;; inasmuch as we allow only two instructions to issue simultaneously,
 | 
      
         | 131 |  |  | ;; and CO instructions prevent any simultaneous issue of another instruction.
 | 
      
         | 132 |  |  | ;; (This uses pipe_01 and pipe_02).
 | 
      
         | 133 |  |  | ;; Double issue of EX insns is prevented by using the int unit in the EX stage.
 | 
      
         | 134 |  |  | ;; Double issue of EX / BR insns is prevented by using the int unit /
 | 
      
         | 135 |  |  | ;; pcr_addrcalc unit in the EX stage.
 | 
      
         | 136 |  |  | ;; Double issue of BR / LS instructions is prevented by using the
 | 
      
         | 137 |  |  | ;; pcr_addrcalc / load_store unit in the issue cycle.
 | 
      
         | 138 |  |  | ;; Double issue of FE instructions is prevented by using F0 in the first
 | 
      
         | 139 |  |  | ;; pipeline stage after the first D stage.
 | 
      
         | 140 |  |  | ;; There is no need to describe the [ES]X / [MN]A / S stages after a D stage
 | 
      
         | 141 |  |  | ;; (except in the cases outlined above), nor to describe the FS stage after
 | 
      
         | 142 |  |  | ;; the F2 stage.
 | 
      
         | 143 |  |  |  
 | 
      
         | 144 |  |  | ;; Other MT  group instructions(1 step operations)
 | 
      
         | 145 |  |  | ;; Group:       MT
 | 
      
         | 146 |  |  | ;; Latency:     1
 | 
      
         | 147 |  |  | ;; Issue Rate:  1
 | 
      
         | 148 |  |  |  
 | 
      
         | 149 |  |  | (define_insn_reservation "mt" 1
 | 
      
         | 150 |  |  |   (and (eq_attr "pipe_model" "sh4")
 | 
      
         | 151 |  |  |        (eq_attr "type" "mt_group"))
 | 
      
         | 152 |  |  |   "issue")
 | 
      
         | 153 |  |  |  
 | 
      
         | 154 |  |  | ;; Fixed Point Arithmetic Instructions(1 step operations)
 | 
      
         | 155 |  |  | ;; Group:       EX
 | 
      
         | 156 |  |  | ;; Latency:     1
 | 
      
         | 157 |  |  | ;; Issue Rate:  1
 | 
      
         | 158 |  |  |  
 | 
      
         | 159 |  |  | (define_insn_reservation "sh4_simple_arith" 1
 | 
      
         | 160 |  |  |   (and (eq_attr "pipe_model" "sh4")
 | 
      
         | 161 |  |  |        (eq_attr "insn_class" "ex_group"))
 | 
      
         | 162 |  |  |   "issue,int")
 | 
      
         | 163 |  |  |  
 | 
      
         | 164 |  |  | ;; Load and store instructions have no alignment peculiarities for the SH4,
 | 
      
         | 165 |  |  | ;; but they use the load-store unit, which they share with the fmove type
 | 
      
         | 166 |  |  | ;; insns (fldi[01]; fmov frn,frm; flds; fsts; fabs; fneg) .
 | 
      
         | 167 |  |  | ;; Loads have a latency of two.
 | 
      
         | 168 |  |  | ;; However, call insns can only paired with a preceding insn, and have
 | 
      
         | 169 |  |  | ;; a delay slot, so that we want two more insns to be scheduled between the
 | 
      
         | 170 |  |  | ;; load of the function address and the call.  This is equivalent to a
 | 
      
         | 171 |  |  | ;; latency of three.
 | 
      
         | 172 |  |  | ;; ADJUST_COST can only properly handle reductions of the cost, so we
 | 
      
         | 173 |  |  | ;; use a latency of three here, which gets multiplied by 10 to yield 30.
 | 
      
         | 174 |  |  | ;; We only do this for SImode loads of general registers, to make the work
 | 
      
         | 175 |  |  | ;; for ADJUST_COST easier.
 | 
      
         | 176 |  |  |  
 | 
      
         | 177 |  |  | ;; Load Store instructions. (MOV.[BWL]@(d,GBR)
 | 
      
         | 178 |  |  | ;; Group:       LS
 | 
      
         | 179 |  |  | ;; Latency:     2
 | 
      
         | 180 |  |  | ;; Issue Rate:  1
 | 
      
         | 181 |  |  |  
 | 
      
         | 182 |  |  | (define_insn_reservation "sh4_load" 2
 | 
      
         | 183 |  |  |   (and (eq_attr "pipe_model" "sh4")
 | 
      
         | 184 |  |  |        (eq_attr "type" "load,pcload"))
 | 
      
         | 185 |  |  |   "issue+load_store,nothing,memory")
 | 
      
         | 186 |  |  |  
 | 
      
         | 187 |  |  | ;; calls / sfuncs need an extra instruction for their delay slot.
 | 
      
         | 188 |  |  | ;; Moreover, estimating the latency for SImode loads as 3 will also allow
 | 
      
         | 189 |  |  | ;; adjust_cost to meaningfully bump it back up to 3 if they load the shift
 | 
      
         | 190 |  |  | ;; count of a dynamic shift.
 | 
      
         | 191 |  |  | (define_insn_reservation "sh4_load_si" 3
 | 
      
         | 192 |  |  |   (and (eq_attr "pipe_model" "sh4")
 | 
      
         | 193 |  |  |        (eq_attr "type" "load_si,pcload_si"))
 | 
      
         | 194 |  |  |   "issue+load_store,nothing,memory")
 | 
      
         | 195 |  |  |  
 | 
      
         | 196 |  |  | ;; (define_bypass 2 "sh4_load_si" "!sh4_call")
 | 
      
         | 197 |  |  |  
 | 
      
         | 198 |  |  | ;; The load latency is upped to three higher if the dependent insn does
 | 
      
         | 199 |  |  | ;; double precision computation.  We want the 'default' latency to reflect
 | 
      
         | 200 |  |  | ;; that increased latency because otherwise the insn priorities won't
 | 
      
         | 201 |  |  | ;; allow proper scheduling.
 | 
      
         | 202 |  |  | (define_insn_reservation "sh4_fload" 3
 | 
      
         | 203 |  |  |   (and (eq_attr "pipe_model" "sh4")
 | 
      
         | 204 |  |  |        (eq_attr "type" "fload,pcfload"))
 | 
      
         | 205 |  |  |   "issue+load_store,nothing,memory")
 | 
      
         | 206 |  |  |  
 | 
      
         | 207 |  |  | ;; (define_bypass 2 "sh4_fload" "!")
 | 
      
         | 208 |  |  |  
 | 
      
         | 209 |  |  | (define_insn_reservation "sh4_store" 1
 | 
      
         | 210 |  |  |   (and (eq_attr "pipe_model" "sh4")
 | 
      
         | 211 |  |  |        (eq_attr "type" "store,fstore"))
 | 
      
         | 212 |  |  |   "issue+load_store,nothing,memory")
 | 
      
         | 213 |  |  |  
 | 
      
         | 214 |  |  | (define_insn_reservation "mac_mem" 1
 | 
      
         | 215 |  |  |   (and (eq_attr "pipe_model" "sh4")
 | 
      
         | 216 |  |  |        (eq_attr "type" "mac_mem"))
 | 
      
         | 217 |  |  |   "d_lock,nothing,memory")
 | 
      
         | 218 |  |  |  
 | 
      
         | 219 |  |  | ;; Load Store instructions.
 | 
      
         | 220 |  |  | ;; Group:       LS
 | 
      
         | 221 |  |  | ;; Latency:     1
 | 
      
         | 222 |  |  | ;; Issue Rate:  1
 | 
      
         | 223 |  |  |  
 | 
      
         | 224 |  |  | (define_insn_reservation "sh4_gp_fpul" 1
 | 
      
         | 225 |  |  |   (and (eq_attr "pipe_model" "sh4")
 | 
      
         | 226 |  |  |        (eq_attr "type" "gp_fpul"))
 | 
      
         | 227 |  |  |   "issue+load_store")
 | 
      
         | 228 |  |  |  
 | 
      
         | 229 |  |  | ;; Load Store instructions.
 | 
      
         | 230 |  |  | ;; Group:       LS
 | 
      
         | 231 |  |  | ;; Latency:     3
 | 
      
         | 232 |  |  | ;; Issue Rate:  1
 | 
      
         | 233 |  |  |  
 | 
      
         | 234 |  |  | (define_insn_reservation "sh4_fpul_gp" 3
 | 
      
         | 235 |  |  |   (and (eq_attr "pipe_model" "sh4")
 | 
      
         | 236 |  |  |        (eq_attr "type" "fpul_gp"))
 | 
      
         | 237 |  |  |   "issue+load_store")
 | 
      
         | 238 |  |  |  
 | 
      
         | 239 |  |  | ;; Branch (BF,BF/S,BT,BT/S,BRA)
 | 
      
         | 240 |  |  | ;; Group:       BR
 | 
      
         | 241 |  |  | ;; Latency when taken:  2 (or 1)
 | 
      
         | 242 |  |  | ;; Issue Rate:  1
 | 
      
         | 243 |  |  | ;; The latency is 1 when displacement is 0.
 | 
      
         | 244 |  |  | ;; We can't really do much with the latency, even if we could express it,
 | 
      
         | 245 |  |  | ;; but the pairing restrictions are useful to take into account.
 | 
      
         | 246 |  |  | ;; ??? If the branch is likely, we might want to fill the delay slot;
 | 
      
         | 247 |  |  | ;; if the branch is likely, but not very likely, should we pretend to use
 | 
      
         | 248 |  |  | ;; a resource that CO instructions use, to get a pairable delay slot insn?
 | 
      
         | 249 |  |  |  
 | 
      
         | 250 |  |  | (define_insn_reservation "sh4_branch"  1
 | 
      
         | 251 |  |  |   (and (eq_attr "pipe_model" "sh4")
 | 
      
         | 252 |  |  |        (eq_attr "type" "cbranch,jump"))
 | 
      
         | 253 |  |  |   "issue+pcr_addrcalc")
 | 
      
         | 254 |  |  |  
 | 
      
         | 255 |  |  | ;; Branch Far (JMP,RTS,BRAF)
 | 
      
         | 256 |  |  | ;; Group:       CO
 | 
      
         | 257 |  |  | ;; Latency:     3
 | 
      
         | 258 |  |  | ;; Issue Rate:  2
 | 
      
         | 259 |  |  | ;; ??? Scheduling happens before branch shortening, and hence jmp and braf
 | 
      
         | 260 |  |  | ;; can't be distinguished from bra for the "jump" pattern.
 | 
      
         | 261 |  |  |  
 | 
      
         | 262 |  |  | (define_insn_reservation "sh4_return" 3
 | 
      
         | 263 |  |  |   (and (eq_attr "pipe_model" "sh4")
 | 
      
         | 264 |  |  |        (eq_attr "type" "return,jump_ind"))
 | 
      
         | 265 |  |  |          "d_lock*2")
 | 
      
         | 266 |  |  |  
 | 
      
         | 267 |  |  | ;; RTE
 | 
      
         | 268 |  |  | ;; Group:       CO
 | 
      
         | 269 |  |  | ;; Latency:     5
 | 
      
         | 270 |  |  | ;; Issue Rate:  5
 | 
      
         | 271 |  |  | ;; this instruction can be executed in any of the pipelines
 | 
      
         | 272 |  |  | ;; and blocks the pipeline for next 4 stages.
 | 
      
         | 273 |  |  |  
 | 
      
         | 274 |  |  | (define_insn_reservation "sh4_return_from_exp" 5
 | 
      
         | 275 |  |  |   (and (eq_attr "pipe_model" "sh4")
 | 
      
         | 276 |  |  |        (eq_attr "type" "rte"))
 | 
      
         | 277 |  |  |   "d_lock*5")
 | 
      
         | 278 |  |  |  
 | 
      
         | 279 |  |  | ;; OCBP, OCBWB
 | 
      
         | 280 |  |  | ;; Group:       CO
 | 
      
         | 281 |  |  | ;; Latency:     1-5
 | 
      
         | 282 |  |  | ;; Issue Rate:  1
 | 
      
         | 283 |  |  |  
 | 
      
         | 284 |  |  | ;; cwb is used for the sequence ocbwb @%0; extu.w %0,%2; or %1,%2; mov.l %0,@%2
 | 
      
         | 285 |  |  | ;; ocbwb on its own would be "d_lock,nothing,memory*5"
 | 
      
         | 286 |  |  | (define_insn_reservation "ocbwb"  6
 | 
      
         | 287 |  |  |   (and (eq_attr "pipe_model" "sh4")
 | 
      
         | 288 |  |  |        (eq_attr "type" "cwb"))
 | 
      
         | 289 |  |  |   "d_lock*2,(d_lock+memory)*3,issue+load_store+memory,memory*2")
 | 
      
         | 290 |  |  |  
 | 
      
         | 291 |  |  | ;; LDS to PR,JSR
 | 
      
         | 292 |  |  | ;; Group:       CO
 | 
      
         | 293 |  |  | ;; Latency:     3
 | 
      
         | 294 |  |  | ;; Issue Rate:  2
 | 
      
         | 295 |  |  | ;; The SX stage is blocked for last 2 cycles.
 | 
      
         | 296 |  |  | ;; OTOH, the only time that has an effect for insns generated by the compiler
 | 
      
         | 297 |  |  | ;; is when lds to PR is followed by sts from PR - and that is highly unlikely -
 | 
      
         | 298 |  |  | ;; or when we are doing a function call - and we don't do inter-function
 | 
      
         | 299 |  |  | ;; scheduling.  For the function call case, it's really best that we end with
 | 
      
         | 300 |  |  | ;; something that models an rts.
 | 
      
         | 301 |  |  |  
 | 
      
         | 302 |  |  | (define_insn_reservation "sh4_lds_to_pr" 3
 | 
      
         | 303 |  |  |   (and (eq_attr "pipe_model" "sh4")
 | 
      
         | 304 |  |  |        (eq_attr "type" "prset") )
 | 
      
         | 305 |  |  |   "d_lock*2")
 | 
      
         | 306 |  |  |  
 | 
      
         | 307 |  |  | ;; calls introduce a longisch delay that is likely to flush the pipelines
 | 
      
         | 308 |  |  | ;; of the caller's instructions.  Ordinary functions tend to end with a
 | 
      
         | 309 |  |  | ;; load to restore a register (in the delay slot of rts), while sfuncs
 | 
      
         | 310 |  |  | ;; tend to end with an EX or MT insn.  But that is not actually relevant,
 | 
      
         | 311 |  |  | ;; since there are no instructions that contend for memory access early.
 | 
      
         | 312 |  |  | ;; We could, of course, provide exact scheduling information for specific
 | 
      
         | 313 |  |  | ;; sfuncs, if that should prove useful.
 | 
      
         | 314 |  |  |  
 | 
      
         | 315 |  |  | (define_insn_reservation "sh4_call" 16
 | 
      
         | 316 |  |  |   (and (eq_attr "pipe_model" "sh4")
 | 
      
         | 317 |  |  |        (eq_attr "type" "call,sfunc"))
 | 
      
         | 318 |  |  |   "d_lock*16")
 | 
      
         | 319 |  |  |  
 | 
      
         | 320 |  |  | ;; LDS.L to PR
 | 
      
         | 321 |  |  | ;; Group:       CO
 | 
      
         | 322 |  |  | ;; Latency:     3
 | 
      
         | 323 |  |  | ;; Issue Rate:  2
 | 
      
         | 324 |  |  | ;; The SX unit is blocked for last 2 cycles.
 | 
      
         | 325 |  |  |  
 | 
      
         | 326 |  |  | (define_insn_reservation "ldsmem_to_pr"  3
 | 
      
         | 327 |  |  |   (and (eq_attr "pipe_model" "sh4")
 | 
      
         | 328 |  |  |        (eq_attr "type" "pload"))
 | 
      
         | 329 |  |  |   "d_lock*2")
 | 
      
         | 330 |  |  |  
 | 
      
         | 331 |  |  | ;; STS from PR
 | 
      
         | 332 |  |  | ;; Group:       CO
 | 
      
         | 333 |  |  | ;; Latency:     2
 | 
      
         | 334 |  |  | ;; Issue Rate:  2
 | 
      
         | 335 |  |  | ;; The SX unit in second and third cycles.
 | 
      
         | 336 |  |  |  
 | 
      
         | 337 |  |  | (define_insn_reservation "sts_from_pr" 2
 | 
      
         | 338 |  |  |   (and (eq_attr "pipe_model" "sh4")
 | 
      
         | 339 |  |  |        (eq_attr "type" "prget"))
 | 
      
         | 340 |  |  |   "d_lock*2")
 | 
      
         | 341 |  |  |  
 | 
      
         | 342 |  |  | ;; STS.L from PR
 | 
      
         | 343 |  |  | ;; Group:       CO
 | 
      
         | 344 |  |  | ;; Latency:     2
 | 
      
         | 345 |  |  | ;; Issue Rate:  2
 | 
      
         | 346 |  |  |  
 | 
      
         | 347 |  |  | (define_insn_reservation "sh4_prstore_mem" 2
 | 
      
         | 348 |  |  |   (and (eq_attr "pipe_model" "sh4")
 | 
      
         | 349 |  |  |        (eq_attr "type" "pstore"))
 | 
      
         | 350 |  |  |   "d_lock*2,nothing,memory")
 | 
      
         | 351 |  |  |  
 | 
      
         | 352 |  |  | ;; LDS to FPSCR
 | 
      
         | 353 |  |  | ;; Group:       CO
 | 
      
         | 354 |  |  | ;; Latency:     4
 | 
      
         | 355 |  |  | ;; Issue Rate:  1
 | 
      
         | 356 |  |  | ;; F1 is blocked for last three cycles.
 | 
      
         | 357 |  |  |  
 | 
      
         | 358 |  |  | (define_insn_reservation "fpscr_load" 4
 | 
      
         | 359 |  |  |   (and (eq_attr "pipe_model" "sh4")
 | 
      
         | 360 |  |  |        (eq_attr "type" "gp_fpscr"))
 | 
      
         | 361 |  |  |   "d_lock,nothing,F1*3")
 | 
      
         | 362 |  |  |  
 | 
      
         | 363 |  |  | ;; LDS.L to FPSCR
 | 
      
         | 364 |  |  | ;; Group:       CO
 | 
      
         | 365 |  |  | ;; Latency:     1 / 4
 | 
      
         | 366 |  |  | ;; Latency to update Rn is 1 and latency to update FPSCR is 4
 | 
      
         | 367 |  |  | ;; Issue Rate:  1
 | 
      
         | 368 |  |  | ;; F1 is blocked for last three cycles.
 | 
      
         | 369 |  |  |  
 | 
      
         | 370 |  |  | (define_insn_reservation "fpscr_load_mem" 4
 | 
      
         | 371 |  |  |   (and (eq_attr "pipe_model" "sh4")
 | 
      
         | 372 |  |  |        (eq_attr "type"  "mem_fpscr"))
 | 
      
         | 373 |  |  |   "d_lock,nothing,(F1+memory),F1*2")
 | 
      
         | 374 |  |  |  
 | 
      
         | 375 |  |  |  
 | 
      
         | 376 |  |  | ;; Fixed point multiplication (DMULS.L DMULU.L MUL.L MULS.W,MULU.W)
 | 
      
         | 377 |  |  | ;; Group:       CO
 | 
      
         | 378 |  |  | ;; Latency:     4 / 4
 | 
      
         | 379 |  |  | ;; Issue Rate:  2
 | 
      
         | 380 |  |  |  
 | 
      
         | 381 |  |  | (define_insn_reservation "multi" 4
 | 
      
         | 382 |  |  |   (and (eq_attr "pipe_model" "sh4")
 | 
      
         | 383 |  |  |        (eq_attr "type" "smpy,dmpy"))
 | 
      
         | 384 |  |  |   "d_lock,(d_lock+f1_1),(f1_1|f1_2)*3,F2")
 | 
      
         | 385 |  |  |  
 | 
      
         | 386 |  |  | ;; Fixed STS from, and LDS to MACL / MACH
 | 
      
         | 387 |  |  | ;; Group:       CO
 | 
      
         | 388 |  |  | ;; Latency:     3
 | 
      
         | 389 |  |  | ;; Issue Rate:  1
 | 
      
         | 390 |  |  |  
 | 
      
         | 391 |  |  | (define_insn_reservation "sh4_mac_gp" 3
 | 
      
         | 392 |  |  |   (and (eq_attr "pipe_model" "sh4")
 | 
      
         | 393 |  |  |        (eq_attr "type" "mac_gp,gp_mac,mem_mac"))
 | 
      
         | 394 |  |  |   "d_lock")
 | 
      
         | 395 |  |  |  
 | 
      
         | 396 |  |  |  
 | 
      
         | 397 |  |  | ;; Single precision floating point computation FCMP/EQ,
 | 
      
         | 398 |  |  | ;; FCMP/GT, FADD, FLOAT, FMAC, FMUL, FSUB, FTRC, FRCHG, FSCHG
 | 
      
         | 399 |  |  | ;; Group:       FE
 | 
      
         | 400 |  |  | ;; Latency:     3/4
 | 
      
         | 401 |  |  | ;; Issue Rate:  1
 | 
      
         | 402 |  |  |  
 | 
      
         | 403 |  |  | (define_insn_reservation "fp_arith"  3
 | 
      
         | 404 |  |  |   (and (eq_attr "pipe_model" "sh4")
 | 
      
         | 405 |  |  |        (eq_attr "type" "fp,fp_cmp"))
 | 
      
         | 406 |  |  |   "issue,F01,F2")
 | 
      
         | 407 |  |  |  
 | 
      
         | 408 |  |  | ;; We don't model the resource usage of this exactly because that would
 | 
      
         | 409 |  |  | ;; introduce a bogus latency.
 | 
      
         | 410 |  |  | (define_insn_reservation "sh4_fpscr_toggle"  1
 | 
      
         | 411 |  |  |   (and (eq_attr "pipe_model" "sh4")
 | 
      
         | 412 |  |  |        (eq_attr "type" "fpscr_toggle"))
 | 
      
         | 413 |  |  |   "issue")
 | 
      
         | 414 |  |  |  
 | 
      
         | 415 |  |  | (define_insn_reservation "fp_arith_ftrc"  3
 | 
      
         | 416 |  |  |   (and (eq_attr "pipe_model" "sh4")
 | 
      
         | 417 |  |  |        (eq_attr "type" "ftrc_s"))
 | 
      
         | 418 |  |  |   "issue,F01,F2")
 | 
      
         | 419 |  |  |  
 | 
      
         | 420 |  |  | (define_bypass 1 "fp_arith_ftrc" "sh4_fpul_gp")
 | 
      
         | 421 |  |  |  
 | 
      
         | 422 |  |  | ;; Single Precision FDIV/SQRT
 | 
      
         | 423 |  |  | ;; Group:       FE
 | 
      
         | 424 |  |  | ;; Latency:     12/13 (FDIV); 11/12 (FSQRT)
 | 
      
         | 425 |  |  | ;; Issue Rate:  1
 | 
      
         | 426 |  |  | ;; We describe fdiv here; fsqrt is actually one cycle faster.
 | 
      
         | 427 |  |  |  
 | 
      
         | 428 |  |  | (define_insn_reservation "fp_div" 12
 | 
      
         | 429 |  |  |   (and (eq_attr "pipe_model" "sh4")
 | 
      
         | 430 |  |  |        (eq_attr "type" "fdiv"))
 | 
      
         | 431 |  |  |   "issue,F01+F3,F2+F3,F3*7,F1+F3,F2")
 | 
      
         | 432 |  |  |  
 | 
      
         | 433 |  |  | ;; Double Precision floating point computation
 | 
      
         | 434 |  |  | ;; (FCNVDS, FCNVSD, FLOAT, FTRC)
 | 
      
         | 435 |  |  | ;; Group:       FE
 | 
      
         | 436 |  |  | ;; Latency:     (3,4)/5
 | 
      
         | 437 |  |  | ;; Issue Rate:  1
 | 
      
         | 438 |  |  |  
 | 
      
         | 439 |  |  | (define_insn_reservation "dp_float" 4
 | 
      
         | 440 |  |  |   (and (eq_attr "pipe_model" "sh4")
 | 
      
         | 441 |  |  |        (eq_attr "type" "dfp_conv"))
 | 
      
         | 442 |  |  |   "issue,F01,F1+F2,F2")
 | 
      
         | 443 |  |  |  
 | 
      
         | 444 |  |  | ;; Double-precision floating-point (FADD,FMUL,FSUB)
 | 
      
         | 445 |  |  | ;; Group:       FE
 | 
      
         | 446 |  |  | ;; Latency:     (7,8)/9
 | 
      
         | 447 |  |  | ;; Issue Rate:  1
 | 
      
         | 448 |  |  |  
 | 
      
         | 449 |  |  | (define_insn_reservation "fp_double_arith" 8
 | 
      
         | 450 |  |  |   (and (eq_attr "pipe_model" "sh4")
 | 
      
         | 451 |  |  |        (eq_attr "type" "dfp_arith,dfp_mul"))
 | 
      
         | 452 |  |  |   "issue,F01,F1+F2,fpu*4,F2")
 | 
      
         | 453 |  |  |  
 | 
      
         | 454 |  |  | ;; Double-precision FCMP (FCMP/EQ,FCMP/GT)
 | 
      
         | 455 |  |  | ;; Group:       CO
 | 
      
         | 456 |  |  | ;; Latency:     3/5
 | 
      
         | 457 |  |  | ;; Issue Rate:  2
 | 
      
         | 458 |  |  |  
 | 
      
         | 459 |  |  | (define_insn_reservation "fp_double_cmp" 3
 | 
      
         | 460 |  |  |   (and (eq_attr "pipe_model" "sh4")
 | 
      
         | 461 |  |  |        (eq_attr "type" "dfp_cmp"))
 | 
      
         | 462 |  |  |   "d_lock,(d_lock+F01),F1+F2,F2")
 | 
      
         | 463 |  |  |  
 | 
      
         | 464 |  |  | ;; Double precision FDIV/SQRT
 | 
      
         | 465 |  |  | ;; Group:       FE
 | 
      
         | 466 |  |  | ;; Latency:     (24,25)/26
 | 
      
         | 467 |  |  | ;; Issue Rate:  1
 | 
      
         | 468 |  |  |  
 | 
      
         | 469 |  |  | (define_insn_reservation "dp_div" 25
 | 
      
         | 470 |  |  |   (and (eq_attr "pipe_model" "sh4")
 | 
      
         | 471 |  |  |        (eq_attr "type" "dfdiv"))
 | 
      
         | 472 |  |  |   "issue,F01+F3,F1+F2+F3,F2+F3,F3*16,F1+F3,(fpu+F3)*2,F2")
 | 
      
         | 473 |  |  |  
 | 
      
         | 474 |  |  |  
 | 
      
         | 475 |  |  | ;; Use the branch-not-taken case to model arith3 insns.  For the branch taken
 | 
      
         | 476 |  |  | ;; case, we'd get a d_lock instead of issue at the end.
 | 
      
         | 477 |  |  | (define_insn_reservation "arith3" 3
 | 
      
         | 478 |  |  |   (and (eq_attr "pipe_model" "sh4")
 | 
      
         | 479 |  |  |        (eq_attr "type" "arith3"))
 | 
      
         | 480 |  |  |   "issue,d_lock+pcr_addrcalc,issue")
 | 
      
         | 481 |  |  |  
 | 
      
         | 482 |  |  | ;; arith3b insns schedule the same no matter if the branch is taken or not.
 | 
      
         | 483 |  |  | (define_insn_reservation "arith3b" 2
 | 
      
         | 484 |  |  |   (and (eq_attr "pipe_model" "sh4")
 | 
      
         | 485 |  |  |        (eq_attr "type" "arith3"))
 | 
      
         | 486 |  |  |   "issue,d_lock+pcr_addrcalc")
 |