URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [gcc/] [config/] [arm/] [cortex-a9.md] - Blame information for rev 801

Go to most recent revision | Details | Compare with Previous | View Log


;; ARM Cortex-A9 pipeline description
;; Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc.
;; Originally written by CodeSourcery for VFP.
;;
;; Rewritten by Ramana Radhakrishnan 
;; Integer Pipeline description contributed by ARM Ltd.
;; VFP Pipeline description rewritten and contributed by ARM Ltd.
 
;; This file is part of GCC.
;;
;; GCC is free software; you can redistribute it and/or modify it
;; under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 3, or (at your option)
;; any later version.
;;
;; GCC is distributed in the hope that it will be useful, but
;; WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;; General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3.  If not see
;; .
 
(define_automaton "cortex_a9")
 
;; The Cortex-A9 core is modelled as a dual issue pipeline that has
;; the following components.
;; 1. 1 Load Store Pipeline.
;; 2. P0 / main pipeline for data processing instructions.
;; 3. P1 / Dual pipeline for Data processing instructions.
;; 4. MAC pipeline for multiply as well as multiply
;;    and accumulate instructions.
;; 5. 1 VFP and an optional Neon unit.
;; The Load/Store, VFP and Neon issue pipeline are multiplexed.
;; The P0 / main pipeline and M1 stage of the MAC pipeline are
;;   multiplexed.
;; The P1 / dual pipeline and M2 stage of the MAC pipeline are
;;   multiplexed.
;; There are only 4 integer register read ports and hence at any point of
;; time we can't have issue down the E1 and the E2 ports unless
;; of course there are bypass paths that get exercised.
;; Both P0 and P1 have 2 stages E1 and E2.
;; Data processing instructions issue to E1 or E2 depending on
;; whether they have an early shift or not.
 
(define_cpu_unit "ca9_issue_vfp_neon, cortex_a9_ls" "cortex_a9")
(define_cpu_unit "cortex_a9_p0_e1, cortex_a9_p0_e2" "cortex_a9")
(define_cpu_unit "cortex_a9_p1_e1, cortex_a9_p1_e2" "cortex_a9")
(define_cpu_unit "cortex_a9_p0_wb, cortex_a9_p1_wb" "cortex_a9")
(define_cpu_unit "cortex_a9_mac_m1, cortex_a9_mac_m2" "cortex_a9")
(define_cpu_unit "cortex_a9_branch, cortex_a9_issue_branch" "cortex_a9")
 
(define_reservation "cortex_a9_p0_default" "cortex_a9_p0_e2, cortex_a9_p0_wb")
(define_reservation "cortex_a9_p1_default" "cortex_a9_p1_e2, cortex_a9_p1_wb")
(define_reservation "cortex_a9_p0_shift" "cortex_a9_p0_e1, cortex_a9_p0_default")
(define_reservation "cortex_a9_p1_shift" "cortex_a9_p1_e1, cortex_a9_p1_default")
 
(define_reservation "cortex_a9_multcycle1"
  "cortex_a9_p0_e2 + cortex_a9_mac_m1 + cortex_a9_mac_m2 + \
cortex_a9_p1_e2 + cortex_a9_p0_e1 + cortex_a9_p1_e1")
 
(define_reservation "cortex_a9_mult16"
  "cortex_a9_mac_m1, cortex_a9_mac_m2, cortex_a9_p0_wb")
(define_reservation "cortex_a9_mac16"
  "cortex_a9_multcycle1, cortex_a9_mac_m2, cortex_a9_p0_wb")
(define_reservation "cortex_a9_mult"
  "cortex_a9_mac_m1*2, cortex_a9_mac_m2, cortex_a9_p0_wb")
(define_reservation "cortex_a9_mac"
  "cortex_a9_multcycle1*2 ,cortex_a9_mac_m2, cortex_a9_p0_wb")
(define_reservation "cortex_a9_mult_long"
  "cortex_a9_mac_m1*3, cortex_a9_mac_m2, cortex_a9_p0_wb")
 
;; Issue at the same time along the load store pipeline and
;; the VFP / Neon pipeline is not possible.
(exclusion_set "cortex_a9_ls" "ca9_issue_vfp_neon")
 
;; Default data processing instruction without any shift
;; The only exception to this is the mov instruction
;; which can go down E2 without any problem.
(define_insn_reservation "cortex_a9_dp" 2
  (and (eq_attr "tune" "cortexa9")
         (ior (and (eq_attr "type" "alu")
                        (eq_attr "neon_type" "none"))
              (and (and (eq_attr "type" "alu_shift_reg, alu_shift")
                        (eq_attr "insn" "mov"))
                 (eq_attr "neon_type" "none"))))
  "cortex_a9_p0_default|cortex_a9_p1_default")
 
;; An instruction using the shifter will go down E1.
(define_insn_reservation "cortex_a9_dp_shift" 3
   (and (eq_attr "tune" "cortexa9")
        (and (eq_attr "type" "alu_shift_reg, alu_shift")
             (not (eq_attr "insn" "mov"))))
   "cortex_a9_p0_shift | cortex_a9_p1_shift")
 
;; Loads have a latency of 4 cycles.
;; We don't model autoincrement instructions. These
;; instructions use the load store pipeline and 1 of
;; the E2 units to write back the result of the increment.
 
(define_insn_reservation "cortex_a9_load1_2" 4
  (and (eq_attr "tune" "cortexa9")
       (eq_attr "type" "load1, load2, load_byte, f_loads, f_loadd"))
  "cortex_a9_ls")
 
;; Loads multiples and store multiples can't be issued for 2 cycles in a
;; row. The description below assumes that addresses are 64 bit aligned.
;; If not, there is an extra cycle latency which is not modelled.
 
(define_insn_reservation "cortex_a9_load3_4" 5
  (and (eq_attr "tune" "cortexa9")
       (eq_attr "type" "load3, load4"))
  "cortex_a9_ls, cortex_a9_ls")
 
(define_insn_reservation "cortex_a9_store1_2" 0
  (and (eq_attr "tune" "cortexa9")
       (eq_attr "type" "store1, store2, f_stores, f_stored"))
  "cortex_a9_ls")
 
;; Almost all our store multiples use an auto-increment
;; form. Don't issue back to back load and store multiples
;; because the load store unit will stall.
 
(define_insn_reservation "cortex_a9_store3_4" 0
  (and (eq_attr "tune" "cortexa9")
       (eq_attr "type" "store3, store4"))
  "cortex_a9_ls+(cortex_a9_p0_default | cortex_a9_p1_default), cortex_a9_ls")
 
;; We get 16*16 multiply / mac results in 3 cycles.
(define_insn_reservation "cortex_a9_mult16" 3
  (and (eq_attr "tune" "cortexa9")
       (eq_attr "insn" "smulxy"))
       "cortex_a9_mult16")
 
;; The 16*16 mac is slightly different that it
;; reserves M1 and M2 in the same cycle.
(define_insn_reservation "cortex_a9_mac16" 3
  (and (eq_attr "tune" "cortexa9")
       (eq_attr "insn" "smlaxy"))
  "cortex_a9_mac16")
 
(define_insn_reservation "cortex_a9_multiply" 4
  (and (eq_attr "tune" "cortexa9")
       (eq_attr "insn" "mul,smmul,smmulr"))
       "cortex_a9_mult")
 
(define_insn_reservation "cortex_a9_mac" 4
  (and (eq_attr "tune" "cortexa9")
       (eq_attr "insn" "mla,smmla"))
       "cortex_a9_mac")
 
(define_insn_reservation "cortex_a9_multiply_long" 5
  (and (eq_attr "tune" "cortexa9")
       (eq_attr "insn" "smull,umull,smulls,umulls,smlal,smlals,umlal,umlals"))
       "cortex_a9_mult_long")
 
;; An instruction with a result in E2 can be forwarded
;; to E2 or E1 or M1 or the load store unit in the next cycle.
 
(define_bypass 1 "cortex_a9_dp"
                 "cortex_a9_dp_shift, cortex_a9_multiply,
 cortex_a9_load1_2, cortex_a9_dp, cortex_a9_store1_2,
 cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4,
 cortex_a9_multiply_long")
 
(define_bypass 2 "cortex_a9_dp_shift"
                 "cortex_a9_dp_shift, cortex_a9_multiply,
 cortex_a9_load1_2, cortex_a9_dp, cortex_a9_store1_2,
 cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4,
 cortex_a9_multiply_long")
 
;; An instruction in the load store pipeline can provide
;; read access to a DP instruction in the P0 default pipeline
;; before the writeback stage.
 
(define_bypass 3 "cortex_a9_load1_2" "cortex_a9_dp, cortex_a9_load1_2,
cortex_a9_store3_4, cortex_a9_store1_2")
 
(define_bypass 4 "cortex_a9_load3_4" "cortex_a9_dp, cortex_a9_load1_2,
cortex_a9_store3_4, cortex_a9_store1_2,  cortex_a9_load3_4")
 
;; Calls and branches.
 
;; Branch instructions
 
(define_insn_reservation "cortex_a9_branch" 0
  (and (eq_attr "tune" "cortexa9")
       (eq_attr "type" "branch"))
  "cortex_a9_branch")
 
;; Call latencies are essentially 0 but make sure
;; dual issue doesn't happen i.e the next instruction
;; starts at the next cycle.
(define_insn_reservation "cortex_a9_call"  0
  (and (eq_attr "tune" "cortexa9")
       (eq_attr "type" "call"))
  "cortex_a9_issue_branch + cortex_a9_multcycle1 + cortex_a9_ls + ca9_issue_vfp_neon")
 
 
;; Pipelining for VFP instructions.
;; Issue happens either along load store unit or the VFP / Neon unit.
;; Pipeline   Instruction Classification.
;; FPS - fcpys, ffariths, ffarithd,r_2_f,f_2_r
;; FP_ADD   - fadds, faddd, fcmps (1)
;; FPMUL   - fmul{s,d}, fmac{s,d}
;; FPDIV - fdiv{s,d}
(define_cpu_unit "ca9fps" "cortex_a9")
(define_cpu_unit "ca9fp_add1, ca9fp_add2, ca9fp_add3, ca9fp_add4" "cortex_a9")
(define_cpu_unit "ca9fp_mul1, ca9fp_mul2 , ca9fp_mul3, ca9fp_mul4" "cortex_a9")
(define_cpu_unit "ca9fp_ds1" "cortex_a9")
 
 
;; fmrs, fmrrd, fmstat and fmrx - The data is available after 1 cycle.
(define_insn_reservation "cortex_a9_fps" 2
 (and (eq_attr "tune" "cortexa9")
      (eq_attr "type" "fcpys, fconsts, fconstd, ffariths, ffarithd, r_2_f, f_2_r, f_flag"))
 "ca9_issue_vfp_neon + ca9fps")
 
(define_bypass 1
  "cortex_a9_fps"
  "cortex_a9_fadd, cortex_a9_fps, cortex_a9_fcmp, cortex_a9_dp, cortex_a9_dp_shift, cortex_a9_multiply, cortex_a9_multiply_long")
 
;; Scheduling on the FP_ADD pipeline.
(define_reservation "ca9fp_add" "ca9_issue_vfp_neon + ca9fp_add1, ca9fp_add2, ca9fp_add3, ca9fp_add4")
 
(define_insn_reservation "cortex_a9_fadd" 4
  (and (eq_attr "tune" "cortexa9")
       (eq_attr "type" "fadds, faddd, f_cvt"))
  "ca9fp_add")
 
(define_insn_reservation "cortex_a9_fcmp" 1
  (and (eq_attr "tune" "cortexa9")
      (eq_attr "type" "fcmps, fcmpd"))
 "ca9_issue_vfp_neon + ca9fp_add1")
 
;; Scheduling for the Multiply and MAC instructions.
(define_reservation "ca9fmuls"
  "ca9fp_mul1 + ca9_issue_vfp_neon, ca9fp_mul2, ca9fp_mul3, ca9fp_mul4")
 
(define_reservation "ca9fmuld"
  "ca9fp_mul1 + ca9_issue_vfp_neon, (ca9fp_mul1 + ca9fp_mul2), ca9fp_mul2, ca9fp_mul3, ca9fp_mul4")
 
(define_insn_reservation "cortex_a9_fmuls" 4
  (and (eq_attr "tune" "cortexa9")
       (eq_attr "type" "fmuls"))
  "ca9fmuls")
 
(define_insn_reservation "cortex_a9_fmuld" 5
  (and (eq_attr "tune" "cortexa9")
       (eq_attr "type" "fmuld"))
  "ca9fmuld")
 
(define_insn_reservation "cortex_a9_fmacs" 8
  (and (eq_attr "tune" "cortexa9")
       (eq_attr "type" "fmacs"))
  "ca9fmuls, ca9fp_add")
 
(define_insn_reservation "cortex_a9_fmacd" 9
  (and (eq_attr "tune" "cortexa9")
       (eq_attr "type" "fmacd"))
  "ca9fmuld, ca9fp_add")
 
;; Division pipeline description.
(define_insn_reservation "cortex_a9_fdivs" 15
  (and (eq_attr "tune" "cortexa9")
       (eq_attr "type" "fdivs"))
  "ca9fp_ds1 + ca9_issue_vfp_neon, nothing*14")
 
(define_insn_reservation "cortex_a9_fdivd" 25
  (and (eq_attr "tune" "cortexa9")
       (eq_attr "type" "fdivd"))
  "ca9fp_ds1 + ca9_issue_vfp_neon, nothing*24")
 
;; Include Neon pipeline description
(include "cortex-a9-neon.md")

Browse

Tools

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [gcc/] [config/] [arm/] [cortex-a9.md] - Blame information for rev 801

Line No.	Rev	Author	Line
1	709	jeremybenn	`;; ARM Cortex-A9 pipeline description`
2			`;; Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc.`
3			`;; Originally written by CodeSourcery for VFP.`
4			`;;`
5			`;; Rewritten by Ramana Radhakrishnan`
6			`;; Integer Pipeline description contributed by ARM Ltd.`
7			`;; VFP Pipeline description rewritten and contributed by ARM Ltd.`
8
9			`;; This file is part of GCC.`
10			`;;`
11			`;; GCC is free software; you can redistribute it and/or modify it`
12			`;; under the terms of the GNU General Public License as published by`
13			`;; the Free Software Foundation; either version 3, or (at your option)`
14			`;; any later version.`
15			`;;`
16			`;; GCC is distributed in the hope that it will be useful, but`
17			`;; WITHOUT ANY WARRANTY; without even the implied warranty of`
18			`;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
19			`;; General Public License for more details.`
20			`;;`
21			`;; You should have received a copy of the GNU General Public License`
22			`;; along with GCC; see the file COPYING3. If not see`
23			`;; .`
24
25			`(define_automaton "cortex_a9")`
26
27			`;; The Cortex-A9 core is modelled as a dual issue pipeline that has`
28			`;; the following components.`
29			`;; 1. 1 Load Store Pipeline.`
30			`;; 2. P0 / main pipeline for data processing instructions.`
31			`;; 3. P1 / Dual pipeline for Data processing instructions.`
32			`;; 4. MAC pipeline for multiply as well as multiply`
33			`;; and accumulate instructions.`
34			`;; 5. 1 VFP and an optional Neon unit.`
35			`;; The Load/Store, VFP and Neon issue pipeline are multiplexed.`
36			`;; The P0 / main pipeline and M1 stage of the MAC pipeline are`
37			`;; multiplexed.`
38			`;; The P1 / dual pipeline and M2 stage of the MAC pipeline are`
39			`;; multiplexed.`
40			`;; There are only 4 integer register read ports and hence at any point of`
41			`;; time we can't have issue down the E1 and the E2 ports unless`
42			`;; of course there are bypass paths that get exercised.`
43			`;; Both P0 and P1 have 2 stages E1 and E2.`
44			`;; Data processing instructions issue to E1 or E2 depending on`
45			`;; whether they have an early shift or not.`
46
47			`(define_cpu_unit "ca9_issue_vfp_neon, cortex_a9_ls" "cortex_a9")`
48			`(define_cpu_unit "cortex_a9_p0_e1, cortex_a9_p0_e2" "cortex_a9")`
49			`(define_cpu_unit "cortex_a9_p1_e1, cortex_a9_p1_e2" "cortex_a9")`
50			`(define_cpu_unit "cortex_a9_p0_wb, cortex_a9_p1_wb" "cortex_a9")`
51			`(define_cpu_unit "cortex_a9_mac_m1, cortex_a9_mac_m2" "cortex_a9")`
52			`(define_cpu_unit "cortex_a9_branch, cortex_a9_issue_branch" "cortex_a9")`
53
54			`(define_reservation "cortex_a9_p0_default" "cortex_a9_p0_e2, cortex_a9_p0_wb")`
55			`(define_reservation "cortex_a9_p1_default" "cortex_a9_p1_e2, cortex_a9_p1_wb")`
56			`(define_reservation "cortex_a9_p0_shift" "cortex_a9_p0_e1, cortex_a9_p0_default")`
57			`(define_reservation "cortex_a9_p1_shift" "cortex_a9_p1_e1, cortex_a9_p1_default")`
58
59			`(define_reservation "cortex_a9_multcycle1"`
60			`"cortex_a9_p0_e2 + cortex_a9_mac_m1 + cortex_a9_mac_m2 + \`
61			`cortex_a9_p1_e2 + cortex_a9_p0_e1 + cortex_a9_p1_e1")`
62
63			`(define_reservation "cortex_a9_mult16"`
64			`"cortex_a9_mac_m1, cortex_a9_mac_m2, cortex_a9_p0_wb")`
65			`(define_reservation "cortex_a9_mac16"`
66			`"cortex_a9_multcycle1, cortex_a9_mac_m2, cortex_a9_p0_wb")`
67			`(define_reservation "cortex_a9_mult"`
68			`"cortex_a9_mac_m1*2, cortex_a9_mac_m2, cortex_a9_p0_wb")`
69			`(define_reservation "cortex_a9_mac"`
70			`"cortex_a9_multcycle1*2 ,cortex_a9_mac_m2, cortex_a9_p0_wb")`
71			`(define_reservation "cortex_a9_mult_long"`
72			`"cortex_a9_mac_m1*3, cortex_a9_mac_m2, cortex_a9_p0_wb")`
73
74			`;; Issue at the same time along the load store pipeline and`
75			`;; the VFP / Neon pipeline is not possible.`
76			`(exclusion_set "cortex_a9_ls" "ca9_issue_vfp_neon")`
77
78			`;; Default data processing instruction without any shift`
79			`;; The only exception to this is the mov instruction`
80			`;; which can go down E2 without any problem.`
81			`(define_insn_reservation "cortex_a9_dp" 2`
82			`(and (eq_attr "tune" "cortexa9")`
83			`(ior (and (eq_attr "type" "alu")`
84			`(eq_attr "neon_type" "none"))`
85			`(and (and (eq_attr "type" "alu_shift_reg, alu_shift")`
86			`(eq_attr "insn" "mov"))`
87			`(eq_attr "neon_type" "none"))))`
88			`"cortex_a9_p0_default\|cortex_a9_p1_default")`
89
90			`;; An instruction using the shifter will go down E1.`
91			`(define_insn_reservation "cortex_a9_dp_shift" 3`
92			`(and (eq_attr "tune" "cortexa9")`
93			`(and (eq_attr "type" "alu_shift_reg, alu_shift")`
94			`(not (eq_attr "insn" "mov"))))`
95			`"cortex_a9_p0_shift \| cortex_a9_p1_shift")`
96
97			`;; Loads have a latency of 4 cycles.`
98			`;; We don't model autoincrement instructions. These`
99			`;; instructions use the load store pipeline and 1 of`
100			`;; the E2 units to write back the result of the increment.`
101
102			`(define_insn_reservation "cortex_a9_load1_2" 4`
103			`(and (eq_attr "tune" "cortexa9")`
104			`(eq_attr "type" "load1, load2, load_byte, f_loads, f_loadd"))`
105			`"cortex_a9_ls")`
106
107			`;; Loads multiples and store multiples can't be issued for 2 cycles in a`
108			`;; row. The description below assumes that addresses are 64 bit aligned.`
109			`;; If not, there is an extra cycle latency which is not modelled.`
110
111			`(define_insn_reservation "cortex_a9_load3_4" 5`
112			`(and (eq_attr "tune" "cortexa9")`
113			`(eq_attr "type" "load3, load4"))`
114			`"cortex_a9_ls, cortex_a9_ls")`
115
116			`(define_insn_reservation "cortex_a9_store1_2" 0`
117			`(and (eq_attr "tune" "cortexa9")`
118			`(eq_attr "type" "store1, store2, f_stores, f_stored"))`
119			`"cortex_a9_ls")`
120
121			`;; Almost all our store multiples use an auto-increment`
122			`;; form. Don't issue back to back load and store multiples`
123			`;; because the load store unit will stall.`
124
125			`(define_insn_reservation "cortex_a9_store3_4" 0`
126			`(and (eq_attr "tune" "cortexa9")`
127			`(eq_attr "type" "store3, store4"))`
128			`"cortex_a9_ls+(cortex_a9_p0_default \| cortex_a9_p1_default), cortex_a9_ls")`
129
130			`;; We get 16*16 multiply / mac results in 3 cycles.`
131			`(define_insn_reservation "cortex_a9_mult16" 3`
132			`(and (eq_attr "tune" "cortexa9")`
133			`(eq_attr "insn" "smulxy"))`
134			`"cortex_a9_mult16")`
135
136			`;; The 16*16 mac is slightly different that it`
137			`;; reserves M1 and M2 in the same cycle.`
138			`(define_insn_reservation "cortex_a9_mac16" 3`
139			`(and (eq_attr "tune" "cortexa9")`
140			`(eq_attr "insn" "smlaxy"))`
141			`"cortex_a9_mac16")`
142
143			`(define_insn_reservation "cortex_a9_multiply" 4`
144			`(and (eq_attr "tune" "cortexa9")`
145			`(eq_attr "insn" "mul,smmul,smmulr"))`
146			`"cortex_a9_mult")`
147
148			`(define_insn_reservation "cortex_a9_mac" 4`
149			`(and (eq_attr "tune" "cortexa9")`
150			`(eq_attr "insn" "mla,smmla"))`
151			`"cortex_a9_mac")`
152
153			`(define_insn_reservation "cortex_a9_multiply_long" 5`
154			`(and (eq_attr "tune" "cortexa9")`
155			`(eq_attr "insn" "smull,umull,smulls,umulls,smlal,smlals,umlal,umlals"))`
156			`"cortex_a9_mult_long")`
157
158			`;; An instruction with a result in E2 can be forwarded`
159			`;; to E2 or E1 or M1 or the load store unit in the next cycle.`
160
161			`(define_bypass 1 "cortex_a9_dp"`
162			`"cortex_a9_dp_shift, cortex_a9_multiply,`
163			`cortex_a9_load1_2, cortex_a9_dp, cortex_a9_store1_2,`
164			`cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4,`
165			`cortex_a9_multiply_long")`
166
167			`(define_bypass 2 "cortex_a9_dp_shift"`
168			`"cortex_a9_dp_shift, cortex_a9_multiply,`
169			`cortex_a9_load1_2, cortex_a9_dp, cortex_a9_store1_2,`
170			`cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4,`
171			`cortex_a9_multiply_long")`
172
173			`;; An instruction in the load store pipeline can provide`
174			`;; read access to a DP instruction in the P0 default pipeline`
175			`;; before the writeback stage.`
176
177			`(define_bypass 3 "cortex_a9_load1_2" "cortex_a9_dp, cortex_a9_load1_2,`
178			`cortex_a9_store3_4, cortex_a9_store1_2")`
179
180			`(define_bypass 4 "cortex_a9_load3_4" "cortex_a9_dp, cortex_a9_load1_2,`
181			`cortex_a9_store3_4, cortex_a9_store1_2, cortex_a9_load3_4")`
182
183			`;; Calls and branches.`
184
185			`;; Branch instructions`
186
187			`(define_insn_reservation "cortex_a9_branch" 0`
188			`(and (eq_attr "tune" "cortexa9")`
189			`(eq_attr "type" "branch"))`
190			`"cortex_a9_branch")`
191
192			`;; Call latencies are essentially 0 but make sure`
193			`;; dual issue doesn't happen i.e the next instruction`
194			`;; starts at the next cycle.`
195			`(define_insn_reservation "cortex_a9_call" 0`
196			`(and (eq_attr "tune" "cortexa9")`
197			`(eq_attr "type" "call"))`
198			`"cortex_a9_issue_branch + cortex_a9_multcycle1 + cortex_a9_ls + ca9_issue_vfp_neon")`
199
200
201			`;; Pipelining for VFP instructions.`
202			`;; Issue happens either along load store unit or the VFP / Neon unit.`
203			`;; Pipeline Instruction Classification.`
204			`;; FPS - fcpys, ffariths, ffarithd,r_2_f,f_2_r`
205			`;; FP_ADD - fadds, faddd, fcmps (1)`
206			`;; FPMUL - fmul{s,d}, fmac{s,d}`
207			`;; FPDIV - fdiv{s,d}`
208			`(define_cpu_unit "ca9fps" "cortex_a9")`
209			`(define_cpu_unit "ca9fp_add1, ca9fp_add2, ca9fp_add3, ca9fp_add4" "cortex_a9")`
210			`(define_cpu_unit "ca9fp_mul1, ca9fp_mul2 , ca9fp_mul3, ca9fp_mul4" "cortex_a9")`
211			`(define_cpu_unit "ca9fp_ds1" "cortex_a9")`
212
213
214			`;; fmrs, fmrrd, fmstat and fmrx - The data is available after 1 cycle.`
215			`(define_insn_reservation "cortex_a9_fps" 2`
216			`(and (eq_attr "tune" "cortexa9")`
217			`(eq_attr "type" "fcpys, fconsts, fconstd, ffariths, ffarithd, r_2_f, f_2_r, f_flag"))`
218			`"ca9_issue_vfp_neon + ca9fps")`
219
220			`(define_bypass 1`
221			`"cortex_a9_fps"`
222			`"cortex_a9_fadd, cortex_a9_fps, cortex_a9_fcmp, cortex_a9_dp, cortex_a9_dp_shift, cortex_a9_multiply, cortex_a9_multiply_long")`
223
224			`;; Scheduling on the FP_ADD pipeline.`
225			`(define_reservation "ca9fp_add" "ca9_issue_vfp_neon + ca9fp_add1, ca9fp_add2, ca9fp_add3, ca9fp_add4")`
226
227			`(define_insn_reservation "cortex_a9_fadd" 4`
228			`(and (eq_attr "tune" "cortexa9")`
229			`(eq_attr "type" "fadds, faddd, f_cvt"))`
230			`"ca9fp_add")`
231
232			`(define_insn_reservation "cortex_a9_fcmp" 1`
233			`(and (eq_attr "tune" "cortexa9")`
234			`(eq_attr "type" "fcmps, fcmpd"))`
235			`"ca9_issue_vfp_neon + ca9fp_add1")`
236
237			`;; Scheduling for the Multiply and MAC instructions.`
238			`(define_reservation "ca9fmuls"`
239			`"ca9fp_mul1 + ca9_issue_vfp_neon, ca9fp_mul2, ca9fp_mul3, ca9fp_mul4")`
240
241			`(define_reservation "ca9fmuld"`
242			`"ca9fp_mul1 + ca9_issue_vfp_neon, (ca9fp_mul1 + ca9fp_mul2), ca9fp_mul2, ca9fp_mul3, ca9fp_mul4")`
243
244			`(define_insn_reservation "cortex_a9_fmuls" 4`
245			`(and (eq_attr "tune" "cortexa9")`
246			`(eq_attr "type" "fmuls"))`
247			`"ca9fmuls")`
248
249			`(define_insn_reservation "cortex_a9_fmuld" 5`
250			`(and (eq_attr "tune" "cortexa9")`
251			`(eq_attr "type" "fmuld"))`
252			`"ca9fmuld")`
253
254			`(define_insn_reservation "cortex_a9_fmacs" 8`
255			`(and (eq_attr "tune" "cortexa9")`
256			`(eq_attr "type" "fmacs"))`
257			`"ca9fmuls, ca9fp_add")`
258
259			`(define_insn_reservation "cortex_a9_fmacd" 9`
260			`(and (eq_attr "tune" "cortexa9")`
261			`(eq_attr "type" "fmacd"))`
262			`"ca9fmuld, ca9fp_add")`
263
264			`;; Division pipeline description.`
265			`(define_insn_reservation "cortex_a9_fdivs" 15`
266			`(and (eq_attr "tune" "cortexa9")`
267			`(eq_attr "type" "fdivs"))`
268			`"ca9fp_ds1 + ca9_issue_vfp_neon, nothing*14")`
269
270			`(define_insn_reservation "cortex_a9_fdivd" 25`
271			`(and (eq_attr "tune" "cortexa9")`
272			`(eq_attr "type" "fdivd"))`
273			`"ca9fp_ds1 + ca9_issue_vfp_neon, nothing*24")`
274
275			`;; Include Neon pipeline description`
276			`(include "cortex-a9-neon.md")`