URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [gcc/] [config/] [arm/] [fa726te.md] - Blame information for rev 757

Go to most recent revision | Details | Compare with Previous | View Log


;; Faraday FA726TE Pipeline Description
;; Copyright (C) 2010 Free Software Foundation, Inc.
;; Written by I-Jui Sung, based on ARM926EJ-S Pipeline Description.
;;
;; This file is part of GCC.
;;
;; GCC is free software; you can redistribute it and/or modify it under
;; the terms of the GNU General Public License as published by the Free
;; Software Foundation; either version 3, or (at your option) any later
;; version.
;;
;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
;; for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3.  If not see
;; .  */
 
;; These descriptions are based on the information contained in the
;; FA726TE Core Design Note, Copyright (c) 2010 Faraday Technology Corp.
 
;; This automaton provides a pipeline description for the Faraday
;; FA726TE core.
;;
;; The model given here assumes that the condition for all conditional
;; instructions is "true", i.e., that all of the instructions are
;; actually executed.
 
(define_automaton "fa726te")
 
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Pipelines
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
;;   The ALU pipeline has fetch, decode, execute, memory, and
;;   write stages.  We only need to model the execute, memory and write
;;   stages.
 
;;      E1      E2      E3      E4      E5      WB
;;______________________________________________________
;;
;;      <-------------- LD/ST ----------->
;;    shifter + LU      <-- AU -->
;;      <-- AU -->     shifter + LU    CPSR     (Pipe 0)
;;______________________________________________________
;;
;;      <---------- MUL --------->
;;    shifter + LU      <-- AU -->
;;      <-- AU -->     shifter + LU    CPSR     (Pipe 1)
 
 
(define_cpu_unit "fa726te_alu0_pipe,fa726te_alu1_pipe" "fa726te")
(define_cpu_unit "fa726te_mac_pipe" "fa726te")
(define_cpu_unit "fa726te_lsu_pipe_e,fa726te_lsu_pipe_w" "fa726te")
 
;; Pretend we have 2 LSUs (the second is ONLY for LDR), which can possibly
;; improve code quality.
(define_query_cpu_unit "fa726te_lsu1_pipe_e,fa726te_lsu1_pipe_w" "fa726te")
(define_cpu_unit "fa726te_is0,fa726te_is1" "fa726te")
 
(define_reservation "fa726te_issue" "(fa726te_is0|fa726te_is1)")
;; Reservation to restrict issue to 1.
(define_reservation "fa726te_blockage" "(fa726te_is0+fa726te_is1)")
 
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ALU Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
;; ALU instructions require three cycles to execute, and use the ALU
;; pipeline in each of the three stages.  The results are available
;; after the execute stage stage has finished.
;;
;; If the destination register is the PC, the pipelines are stalled
;; for several cycles.  That case is not modeled here.
 
;; Move instructions.
(define_insn_reservation "726te_shift_op" 1
  (and (eq_attr "tune" "fa726te")
       (eq_attr "insn" "mov,mvn"))
  "fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)")
 
;; ALU operations with no shifted operand will finished in 1 cycle
;; Other ALU instructions 2 cycles.
(define_insn_reservation "726te_alu_op" 1
 (and (eq_attr "tune" "fa726te")
      (and (eq_attr "type" "alu")
           (not (eq_attr "insn" "mov,mvn"))))
  "fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)")
 
;; ALU operations with a shift-by-register operand.
;; These really stall in the decoder, in order to read the shift value
;; in the first cycle.  If the instruction uses both shifter and AU,
;; it takes 3 cycles.
(define_insn_reservation "726te_alu_shift_op" 3
 (and (eq_attr "tune" "fa726te")
      (and (eq_attr "type" "alu_shift")
           (not (eq_attr "insn" "mov,mvn"))))
  "fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)")
 
(define_insn_reservation "726te_alu_shift_reg_op" 3
 (and (eq_attr "tune" "fa726te")
      (and (eq_attr "type" "alu_shift_reg")
           (not (eq_attr "insn" "mov,mvn"))))
  "fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Multiplication Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
;; Multiplication instructions loop in the execute stage until the
;; instruction has been passed through the multiplier array enough
;; times.  Multiply operations occur in both the execute and memory
;; stages of the pipeline
 
(define_insn_reservation "726te_mult_op" 3
 (and (eq_attr "tune" "fa726te")
      (eq_attr "insn" "smlalxy,mul,mla,muls,mlas,umull,umlal,smull,smlal,\
                       umulls,umlals,smulls,smlals,smlawx,smulxy,smlaxy"))
 "fa726te_issue+fa726te_mac_pipe")
 
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Load/Store Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
;; The models for load/store instructions do not accurately describe
;; the difference between operations with a base register writeback
;; (such as "ldm!").  These models assume that all memory references
;; hit in dcache.
 
;; Loads with a shifted offset take 3 cycles, and are (a) probably the
;; most common and (b) the pessimistic assumption will lead to fewer stalls.
 
;; Scalar loads are pipelined in FA726TE LSU pipe.
;; Here we model the resource conflict between Load@E3-stage & Store@W-stage.
;; The 2nd LSU (lsu1) is to model the fact that if 2 loads are scheduled in the
;; same "bundle", and the 2nd load will introudce another ISSUE stall but is
;; still ok to execute (and may be benefical sometimes).
 
(define_insn_reservation "726te_load1_op" 3
 (and (eq_attr "tune" "fa726te")
      (eq_attr "type" "load1,load_byte"))
 "(fa726te_issue+fa726te_lsu_pipe_e+fa726te_lsu_pipe_w)\
  | (fa726te_issue+fa726te_lsu1_pipe_e+fa726te_lsu1_pipe_w,fa726te_blockage)")
 
(define_insn_reservation "726te_store1_op" 1
 (and (eq_attr "tune" "fa726te")
      (eq_attr "type" "store1"))
 "fa726te_blockage*2")
 
;; Load/Store Multiple blocks all pipelines in EX stages until WB.
;; No other instructions can be issued together.  Since they essentially
;; prevent all scheduling opportunities, we model them together here.
 
;; The LDM is breaking into multiple load instructions, later instruction in
;; the pipe 1 is stalled.
(define_insn_reservation "726te_ldm2_op" 4
 (and (eq_attr "tune" "fa726te")
      (eq_attr "type" "load2,load3"))
 "fa726te_blockage*4")
 
(define_insn_reservation "726te_ldm3_op" 5
 (and (eq_attr "tune" "fa726te")
      (eq_attr "type" "load4"))
 "fa726te_blockage*5")
 
(define_insn_reservation "726te_stm2_op" 2
 (and (eq_attr "tune" "fa726te")
      (eq_attr "type" "store2,store3"))
 "fa726te_blockage*3")
 
(define_insn_reservation "726te_stm3_op" 3
 (and (eq_attr "tune" "fa726te")
      (eq_attr "type" "store4"))
 "fa726te_blockage*4")
 
(define_bypass 1 "726te_load1_op,726te_ldm2_op,726te_ldm3_op" "726te_store1_op,\
                  726te_stm2_op,726te_stm3_op" "arm_no_early_store_addr_dep")
(define_bypass 0 "726te_shift_op,726te_alu_op,726te_alu_shift_op,\
                 726te_alu_shift_reg_op,726te_mult_op" "726te_store1_op"
                 "arm_no_early_store_addr_dep")
(define_bypass 0 "726te_shift_op,726te_alu_op" "726te_shift_op,726te_alu_op")
(define_bypass 1 "726te_alu_shift_op,726te_alu_shift_reg_op"
                 "726te_shift_op,726te_alu_op")
(define_bypass 1 "726te_alu_shift_op,726te_alu_shift_reg_op,726te_mult_op"
                 "726te_alu_shift_op" "arm_no_early_alu_shift_dep")
(define_bypass 1 "726te_alu_shift_op,726te_alu_shift_reg_op,726te_mult_op"
                 "726te_alu_shift_reg_op" "arm_no_early_alu_shift_value_dep")
(define_bypass 1 "726te_mult_op" "726te_shift_op,726te_alu_op")
 
(define_bypass 4 "726te_load1_op" "726te_mult_op")
(define_bypass 5 "726te_ldm2_op" "726te_mult_op")
(define_bypass 6 "726te_ldm3_op" "726te_mult_op")
 
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Branch and Call Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
;; Branch instructions are difficult to model accurately.  The FA726TE
;; core can predict most branches.  If the branch is predicted
;; correctly, and predicted early enough, the branch can be completely
;; eliminated from the instruction stream.  Some branches can
;; therefore appear to require zero cycle to execute.  We assume that
;; all branches are predicted correctly, and that the latency is
;; therefore the minimum value.
 
(define_insn_reservation "726te_branch_op" 0
 (and (eq_attr "tune" "fa726te")
      (eq_attr "type" "branch"))
 "fa726te_blockage")
 
;; The latency for a call is actually the latency when the result is available.
;; i.e. R0 is ready for int return value.
(define_insn_reservation "726te_call_op" 1
 (and (eq_attr "tune" "fa726te")
      (eq_attr "type" "call"))
 "fa726te_blockage")
 

Browse

Tools

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [gcc/] [config/] [arm/] [fa726te.md] - Blame information for rev 757

Line No.	Rev	Author	Line
1	709	jeremybenn	`;; Faraday FA726TE Pipeline Description`
2			`;; Copyright (C) 2010 Free Software Foundation, Inc.`
3			`;; Written by I-Jui Sung, based on ARM926EJ-S Pipeline Description.`
4			`;;`
5			`;; This file is part of GCC.`
6			`;;`
7			`;; GCC is free software; you can redistribute it and/or modify it under`
8			`;; the terms of the GNU General Public License as published by the Free`
9			`;; Software Foundation; either version 3, or (at your option) any later`
10			`;; version.`
11			`;;`
12			`;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY`
13			`;; WARRANTY; without even the implied warranty of MERCHANTABILITY or`
14			`;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License`
15			`;; for more details.`
16			`;;`
17			`;; You should have received a copy of the GNU General Public License`
18			`;; along with GCC; see the file COPYING3. If not see`
19			`;; . */`
20
21			`;; These descriptions are based on the information contained in the`
22			`;; FA726TE Core Design Note, Copyright (c) 2010 Faraday Technology Corp.`
23
24			`;; This automaton provides a pipeline description for the Faraday`
25			`;; FA726TE core.`
26			`;;`
27			`;; The model given here assumes that the condition for all conditional`
28			`;; instructions is "true", i.e., that all of the instructions are`
29			`;; actually executed.`
30
31			`(define_automaton "fa726te")`
32
33			`;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;`
34			`;; Pipelines`
35			`;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;`
36
37			`;; The ALU pipeline has fetch, decode, execute, memory, and`
38			`;; write stages. We only need to model the execute, memory and write`
39			`;; stages.`
40
41			`;; E1 E2 E3 E4 E5 WB`
42			`;;______________________________________________________`
43			`;;`
44			`;; <-------------- LD/ST ----------->`
45			`;; shifter + LU <-- AU -->`
46			`;; <-- AU --> shifter + LU CPSR (Pipe 0)`
47			`;;______________________________________________________`
48			`;;`
49			`;; <---------- MUL --------->`
50			`;; shifter + LU <-- AU -->`
51			`;; <-- AU --> shifter + LU CPSR (Pipe 1)`
52
53
54			`(define_cpu_unit "fa726te_alu0_pipe,fa726te_alu1_pipe" "fa726te")`
55			`(define_cpu_unit "fa726te_mac_pipe" "fa726te")`
56			`(define_cpu_unit "fa726te_lsu_pipe_e,fa726te_lsu_pipe_w" "fa726te")`
57
58			`;; Pretend we have 2 LSUs (the second is ONLY for LDR), which can possibly`
59			`;; improve code quality.`
60			`(define_query_cpu_unit "fa726te_lsu1_pipe_e,fa726te_lsu1_pipe_w" "fa726te")`
61			`(define_cpu_unit "fa726te_is0,fa726te_is1" "fa726te")`
62
63			`(define_reservation "fa726te_issue" "(fa726te_is0\|fa726te_is1)")`
64			`;; Reservation to restrict issue to 1.`
65			`(define_reservation "fa726te_blockage" "(fa726te_is0+fa726te_is1)")`
66
67			`;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;`
68			`;; ALU Instructions`
69			`;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;`
70
71			`;; ALU instructions require three cycles to execute, and use the ALU`
72			`;; pipeline in each of the three stages. The results are available`
73			`;; after the execute stage stage has finished.`
74			`;;`
75			`;; If the destination register is the PC, the pipelines are stalled`
76			`;; for several cycles. That case is not modeled here.`
77
78			`;; Move instructions.`
79			`(define_insn_reservation "726te_shift_op" 1`
80			`(and (eq_attr "tune" "fa726te")`
81			`(eq_attr "insn" "mov,mvn"))`
82			`"fa726te_issue+(fa726te_alu0_pipe\|fa726te_alu1_pipe)")`
83
84			`;; ALU operations with no shifted operand will finished in 1 cycle`
85			`;; Other ALU instructions 2 cycles.`
86			`(define_insn_reservation "726te_alu_op" 1`
87			`(and (eq_attr "tune" "fa726te")`
88			`(and (eq_attr "type" "alu")`
89			`(not (eq_attr "insn" "mov,mvn"))))`
90			`"fa726te_issue+(fa726te_alu0_pipe\|fa726te_alu1_pipe)")`
91
92			`;; ALU operations with a shift-by-register operand.`
93			`;; These really stall in the decoder, in order to read the shift value`
94			`;; in the first cycle. If the instruction uses both shifter and AU,`
95			`;; it takes 3 cycles.`
96			`(define_insn_reservation "726te_alu_shift_op" 3`
97			`(and (eq_attr "tune" "fa726te")`
98			`(and (eq_attr "type" "alu_shift")`
99			`(not (eq_attr "insn" "mov,mvn"))))`
100			`"fa726te_issue+(fa726te_alu0_pipe\|fa726te_alu1_pipe)")`
101
102			`(define_insn_reservation "726te_alu_shift_reg_op" 3`
103			`(and (eq_attr "tune" "fa726te")`
104			`(and (eq_attr "type" "alu_shift_reg")`
105			`(not (eq_attr "insn" "mov,mvn"))))`
106			`"fa726te_issue+(fa726te_alu0_pipe\|fa726te_alu1_pipe)")`
107			`;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;`
108			`;; Multiplication Instructions`
109			`;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;`
110
111			`;; Multiplication instructions loop in the execute stage until the`
112			`;; instruction has been passed through the multiplier array enough`
113			`;; times. Multiply operations occur in both the execute and memory`
114			`;; stages of the pipeline`
115
116			`(define_insn_reservation "726te_mult_op" 3`
117			`(and (eq_attr "tune" "fa726te")`
118			`(eq_attr "insn" "smlalxy,mul,mla,muls,mlas,umull,umlal,smull,smlal,\`
119			`umulls,umlals,smulls,smlals,smlawx,smulxy,smlaxy"))`
120			`"fa726te_issue+fa726te_mac_pipe")`
121
122			`;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;`
123			`;; Load/Store Instructions`
124			`;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;`
125
126			`;; The models for load/store instructions do not accurately describe`
127			`;; the difference between operations with a base register writeback`
128			`;; (such as "ldm!"). These models assume that all memory references`
129			`;; hit in dcache.`
130
131			`;; Loads with a shifted offset take 3 cycles, and are (a) probably the`
132			`;; most common and (b) the pessimistic assumption will lead to fewer stalls.`
133
134			`;; Scalar loads are pipelined in FA726TE LSU pipe.`
135			`;; Here we model the resource conflict between Load@E3-stage & Store@W-stage.`
136			`;; The 2nd LSU (lsu1) is to model the fact that if 2 loads are scheduled in the`
137			`;; same "bundle", and the 2nd load will introudce another ISSUE stall but is`
138			`;; still ok to execute (and may be benefical sometimes).`
139
140			`(define_insn_reservation "726te_load1_op" 3`
141			`(and (eq_attr "tune" "fa726te")`
142			`(eq_attr "type" "load1,load_byte"))`
143			`"(fa726te_issue+fa726te_lsu_pipe_e+fa726te_lsu_pipe_w)\`
144			`\| (fa726te_issue+fa726te_lsu1_pipe_e+fa726te_lsu1_pipe_w,fa726te_blockage)")`
145
146			`(define_insn_reservation "726te_store1_op" 1`
147			`(and (eq_attr "tune" "fa726te")`
148			`(eq_attr "type" "store1"))`
149			`"fa726te_blockage*2")`
150
151			`;; Load/Store Multiple blocks all pipelines in EX stages until WB.`
152			`;; No other instructions can be issued together. Since they essentially`
153			`;; prevent all scheduling opportunities, we model them together here.`
154
155			`;; The LDM is breaking into multiple load instructions, later instruction in`
156			`;; the pipe 1 is stalled.`
157			`(define_insn_reservation "726te_ldm2_op" 4`
158			`(and (eq_attr "tune" "fa726te")`
159			`(eq_attr "type" "load2,load3"))`
160			`"fa726te_blockage*4")`
161
162			`(define_insn_reservation "726te_ldm3_op" 5`
163			`(and (eq_attr "tune" "fa726te")`
164			`(eq_attr "type" "load4"))`
165			`"fa726te_blockage*5")`
166
167			`(define_insn_reservation "726te_stm2_op" 2`
168			`(and (eq_attr "tune" "fa726te")`
169			`(eq_attr "type" "store2,store3"))`
170			`"fa726te_blockage*3")`
171
172			`(define_insn_reservation "726te_stm3_op" 3`
173			`(and (eq_attr "tune" "fa726te")`
174			`(eq_attr "type" "store4"))`
175			`"fa726te_blockage*4")`
176
177			`(define_bypass 1 "726te_load1_op,726te_ldm2_op,726te_ldm3_op" "726te_store1_op,\`
178			`726te_stm2_op,726te_stm3_op" "arm_no_early_store_addr_dep")`
179			`(define_bypass 0 "726te_shift_op,726te_alu_op,726te_alu_shift_op,\`
180			`726te_alu_shift_reg_op,726te_mult_op" "726te_store1_op"`
181			`"arm_no_early_store_addr_dep")`
182			`(define_bypass 0 "726te_shift_op,726te_alu_op" "726te_shift_op,726te_alu_op")`
183			`(define_bypass 1 "726te_alu_shift_op,726te_alu_shift_reg_op"`
184			`"726te_shift_op,726te_alu_op")`
185			`(define_bypass 1 "726te_alu_shift_op,726te_alu_shift_reg_op,726te_mult_op"`
186			`"726te_alu_shift_op" "arm_no_early_alu_shift_dep")`
187			`(define_bypass 1 "726te_alu_shift_op,726te_alu_shift_reg_op,726te_mult_op"`
188			`"726te_alu_shift_reg_op" "arm_no_early_alu_shift_value_dep")`
189			`(define_bypass 1 "726te_mult_op" "726te_shift_op,726te_alu_op")`
190
191			`(define_bypass 4 "726te_load1_op" "726te_mult_op")`
192			`(define_bypass 5 "726te_ldm2_op" "726te_mult_op")`
193			`(define_bypass 6 "726te_ldm3_op" "726te_mult_op")`
194
195			`;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;`
196			`;; Branch and Call Instructions`
197			`;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;`
198
199			`;; Branch instructions are difficult to model accurately. The FA726TE`
200			`;; core can predict most branches. If the branch is predicted`
201			`;; correctly, and predicted early enough, the branch can be completely`
202			`;; eliminated from the instruction stream. Some branches can`
203			`;; therefore appear to require zero cycle to execute. We assume that`
204			`;; all branches are predicted correctly, and that the latency is`
205			`;; therefore the minimum value.`
206
207			`(define_insn_reservation "726te_branch_op" 0`
208			`(and (eq_attr "tune" "fa726te")`
209			`(eq_attr "type" "branch"))`
210			`"fa726te_blockage")`
211
212			`;; The latency for a call is actually the latency when the result is available.`
213			`;; i.e. R0 is ready for int return value.`
214			`(define_insn_reservation "726te_call_op" 1`
215			`(and (eq_attr "tune" "fa726te")`
216			`(eq_attr "type" "call"))`
217			`"fa726te_blockage")`
218