1 |
26 |
jlechner |
/* Profiling definitions for the FRV simulator
|
2 |
|
|
Copyright (C) 1998, 1999, 2000, 2001, 2003, 2007, 2008
|
3 |
|
|
Free Software Foundation, Inc.
|
4 |
|
|
Contributed by Red Hat.
|
5 |
|
|
|
6 |
|
|
This file is part of the GNU Simulators.
|
7 |
|
|
|
8 |
|
|
This program is free software; you can redistribute it and/or modify
|
9 |
|
|
it under the terms of the GNU General Public License as published by
|
10 |
|
|
the Free Software Foundation; either version 3 of the License, or
|
11 |
|
|
(at your option) any later version.
|
12 |
|
|
|
13 |
|
|
This program is distributed in the hope that it will be useful,
|
14 |
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
15 |
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
16 |
|
|
GNU General Public License for more details.
|
17 |
|
|
|
18 |
|
|
You should have received a copy of the GNU General Public License
|
19 |
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
20 |
|
|
|
21 |
|
|
#ifndef PROFILE_H
|
22 |
|
|
#define PROFILE_H
|
23 |
|
|
|
24 |
|
|
#include "frv-desc.h"
|
25 |
|
|
|
26 |
|
|
/* This struct defines the state of profiling. All fields are of general
|
27 |
|
|
use to all machines. */
|
28 |
|
|
typedef struct
|
29 |
|
|
{
|
30 |
|
|
long vliw_insns; /* total number of VLIW insns. */
|
31 |
|
|
long vliw_wait; /* number of cycles that the current VLIW insn must wait. */
|
32 |
|
|
long post_wait; /* number of cycles that post processing in the current
|
33 |
|
|
VLIW insn must wait. */
|
34 |
|
|
long vliw_cycles;/* number of cycles used by current VLIW insn. */
|
35 |
|
|
|
36 |
|
|
int past_first_p; /* Not the first insns in the VLIW */
|
37 |
|
|
|
38 |
|
|
/* Register latencies. Must be signed since they can be temporarily
|
39 |
|
|
negative. */
|
40 |
|
|
int gr_busy[64]; /* Cycles until GR is available. */
|
41 |
|
|
int fr_busy[64]; /* Cycles until FR is available. */
|
42 |
|
|
int acc_busy[64]; /* Cycles until FR is available. */
|
43 |
|
|
int ccr_busy[8]; /* Cycles until ICC/FCC is available. */
|
44 |
|
|
int spr_busy[4096]; /* Cycles until spr is available. */
|
45 |
|
|
int idiv_busy[2]; /* Cycles until integer division unit is available. */
|
46 |
|
|
int fdiv_busy[2]; /* Cycles until float division unit is available. */
|
47 |
|
|
int fsqrt_busy[2]; /* Cycles until square root unit is available. */
|
48 |
|
|
int float_busy[4]; /* Cycles until floating point unit is available. */
|
49 |
|
|
int media_busy[4]; /* Cycles until media unit is available. */
|
50 |
|
|
int branch_penalty; /* Cycles until branch is complete. */
|
51 |
|
|
|
52 |
|
|
int gr_latency[64]; /* Cycles until target GR is available. */
|
53 |
|
|
int fr_latency[64]; /* Cycles until target FR is available. */
|
54 |
|
|
int acc_latency[64]; /* Cycles until target FR is available. */
|
55 |
|
|
int ccr_latency[8]; /* Cycles until target ICC/FCC is available. */
|
56 |
|
|
int spr_latency[4096]; /* Cycles until target spr is available. */
|
57 |
|
|
|
58 |
|
|
/* Some registers are busy for a shorter number of cycles than normal
|
59 |
|
|
depending on how they are used next. the xxx_busy_adjust arrays keep track
|
60 |
|
|
of how many cycles to adjust down.
|
61 |
|
|
*/
|
62 |
|
|
int fr_busy_adjust[64];
|
63 |
|
|
int acc_busy_adjust[64];
|
64 |
|
|
|
65 |
|
|
/* Register flags. Each bit represents one register. */
|
66 |
|
|
DI cur_gr_complex;
|
67 |
|
|
DI prev_gr_complex;
|
68 |
|
|
|
69 |
|
|
/* Keep track of the total queued post-processing time required before a
|
70 |
|
|
resource is available. This is applied to the resource's latency once all
|
71 |
|
|
pending loads for the resource are completed. */
|
72 |
|
|
int fr_ptime[64];
|
73 |
|
|
|
74 |
|
|
int branch_hint; /* hint field from branch insn. */
|
75 |
|
|
USI branch_address; /* Address of predicted branch. */
|
76 |
|
|
USI insn_fetch_address;/* Address of sequential insns fetched. */
|
77 |
|
|
int mclracc_acc; /* ACC number of register cleared by mclracc. */
|
78 |
|
|
int mclracc_A; /* A field of mclracc. */
|
79 |
|
|
|
80 |
|
|
/* We need to know when the first branch of a vliw insn is taken, so that
|
81 |
|
|
we don't consider the remaining branches in the vliw insn. */
|
82 |
|
|
int vliw_branch_taken;
|
83 |
|
|
|
84 |
|
|
/* Keep track of the maximum load stall for each VLIW insn. */
|
85 |
|
|
int vliw_load_stall;
|
86 |
|
|
|
87 |
|
|
/* Need to know if all cache entries are affected by various cache
|
88 |
|
|
operations. */
|
89 |
|
|
int all_cache_entries;
|
90 |
|
|
} FRV_PROFILE_STATE;
|
91 |
|
|
|
92 |
|
|
#define DUAL_REG(reg) ((reg) >= 0 && (reg) < 63 ? (reg) + 1 : -1)
|
93 |
|
|
#define DUAL_DOUBLE(reg) ((reg) >= 0 && (reg) < 61 ? (reg) + 2 : -1)
|
94 |
|
|
|
95 |
|
|
/* Return the GNER register associated with the given GR register.
|
96 |
|
|
There is no GNER associated with gr0. */
|
97 |
|
|
#define GNER_FOR_GR(gr) ((gr) > 63 ? -1 : \
|
98 |
|
|
(gr) > 31 ? H_SPR_GNER0 : \
|
99 |
|
|
(gr) > 0 ? H_SPR_GNER1 : \
|
100 |
|
|
-1)
|
101 |
|
|
/* Return the GNER register associated with the given GR register.
|
102 |
|
|
There is no GNER associated with gr0. */
|
103 |
|
|
#define FNER_FOR_FR(fr) ((fr) > 63 ? -1 : \
|
104 |
|
|
(fr) > 31 ? H_SPR_FNER0 : \
|
105 |
|
|
(fr) > 0 ? H_SPR_FNER1 : \
|
106 |
|
|
-1)
|
107 |
|
|
|
108 |
|
|
/* Top up the latency of the given GR by the given number of cycles. */
|
109 |
|
|
void update_GR_latency (SIM_CPU *, INT, int);
|
110 |
|
|
void update_GRdouble_latency (SIM_CPU *, INT, int);
|
111 |
|
|
void update_GR_latency_for_load (SIM_CPU *, INT, int);
|
112 |
|
|
void update_GRdouble_latency_for_load (SIM_CPU *, INT, int);
|
113 |
|
|
void update_GR_latency_for_swap (SIM_CPU *, INT, int);
|
114 |
|
|
void update_FR_latency (SIM_CPU *, INT, int);
|
115 |
|
|
void update_FRdouble_latency (SIM_CPU *, INT, int);
|
116 |
|
|
void update_FR_latency_for_load (SIM_CPU *, INT, int);
|
117 |
|
|
void update_FRdouble_latency_for_load (SIM_CPU *, INT, int);
|
118 |
|
|
void update_FR_ptime (SIM_CPU *, INT, int);
|
119 |
|
|
void update_FRdouble_ptime (SIM_CPU *, INT, int);
|
120 |
|
|
void decrease_ACC_busy (SIM_CPU *, INT, int);
|
121 |
|
|
void decrease_FR_busy (SIM_CPU *, INT, int);
|
122 |
|
|
void decrease_GR_busy (SIM_CPU *, INT, int);
|
123 |
|
|
void increase_FR_busy (SIM_CPU *, INT, int);
|
124 |
|
|
void increase_ACC_busy (SIM_CPU *, INT, int);
|
125 |
|
|
void update_ACC_latency (SIM_CPU *, INT, int);
|
126 |
|
|
void update_CCR_latency (SIM_CPU *, INT, int);
|
127 |
|
|
void update_SPR_latency (SIM_CPU *, INT, int);
|
128 |
|
|
void update_idiv_resource_latency (SIM_CPU *, INT, int);
|
129 |
|
|
void update_fdiv_resource_latency (SIM_CPU *, INT, int);
|
130 |
|
|
void update_fsqrt_resource_latency (SIM_CPU *, INT, int);
|
131 |
|
|
void update_float_resource_latency (SIM_CPU *, INT, int);
|
132 |
|
|
void update_media_resource_latency (SIM_CPU *, INT, int);
|
133 |
|
|
void update_branch_penalty (SIM_CPU *, int);
|
134 |
|
|
void update_ACC_ptime (SIM_CPU *, INT, int);
|
135 |
|
|
void update_SPR_ptime (SIM_CPU *, INT, int);
|
136 |
|
|
void vliw_wait_for_GR (SIM_CPU *, INT);
|
137 |
|
|
void vliw_wait_for_GRdouble (SIM_CPU *, INT);
|
138 |
|
|
void vliw_wait_for_FR (SIM_CPU *, INT);
|
139 |
|
|
void vliw_wait_for_FRdouble (SIM_CPU *, INT);
|
140 |
|
|
void vliw_wait_for_CCR (SIM_CPU *, INT);
|
141 |
|
|
void vliw_wait_for_ACC (SIM_CPU *, INT);
|
142 |
|
|
void vliw_wait_for_SPR (SIM_CPU *, INT);
|
143 |
|
|
void vliw_wait_for_idiv_resource (SIM_CPU *, INT);
|
144 |
|
|
void vliw_wait_for_fdiv_resource (SIM_CPU *, INT);
|
145 |
|
|
void vliw_wait_for_fsqrt_resource (SIM_CPU *, INT);
|
146 |
|
|
void vliw_wait_for_float_resource (SIM_CPU *, INT);
|
147 |
|
|
void vliw_wait_for_media_resource (SIM_CPU *, INT);
|
148 |
|
|
void load_wait_for_GR (SIM_CPU *, INT);
|
149 |
|
|
void load_wait_for_FR (SIM_CPU *, INT);
|
150 |
|
|
void load_wait_for_GRdouble (SIM_CPU *, INT);
|
151 |
|
|
void load_wait_for_FRdouble (SIM_CPU *, INT);
|
152 |
|
|
void enforce_full_fr_latency (SIM_CPU *, INT);
|
153 |
|
|
void enforce_full_acc_latency (SIM_CPU *, INT);
|
154 |
|
|
int post_wait_for_FR (SIM_CPU *, INT);
|
155 |
|
|
int post_wait_for_FRdouble (SIM_CPU *, INT);
|
156 |
|
|
int post_wait_for_ACC (SIM_CPU *, INT);
|
157 |
|
|
int post_wait_for_CCR (SIM_CPU *, INT);
|
158 |
|
|
int post_wait_for_SPR (SIM_CPU *, INT);
|
159 |
|
|
int post_wait_for_fdiv (SIM_CPU *, INT);
|
160 |
|
|
int post_wait_for_fsqrt (SIM_CPU *, INT);
|
161 |
|
|
int post_wait_for_float (SIM_CPU *, INT);
|
162 |
|
|
int post_wait_for_media (SIM_CPU *, INT);
|
163 |
|
|
|
164 |
|
|
void trace_vliw_wait_cycles (SIM_CPU *);
|
165 |
|
|
void handle_resource_wait (SIM_CPU *);
|
166 |
|
|
|
167 |
|
|
void request_cache_load (SIM_CPU *, INT, int, int);
|
168 |
|
|
void request_cache_flush (SIM_CPU *, FRV_CACHE *, int);
|
169 |
|
|
void request_cache_invalidate (SIM_CPU *, FRV_CACHE *, int);
|
170 |
|
|
void request_cache_preload (SIM_CPU *, FRV_CACHE *, int);
|
171 |
|
|
void request_cache_unlock (SIM_CPU *, FRV_CACHE *, int);
|
172 |
|
|
int load_pending_for_register (SIM_CPU *, int, int, int);
|
173 |
|
|
|
174 |
|
|
void set_use_is_gr_complex (SIM_CPU *, INT);
|
175 |
|
|
void set_use_not_gr_complex (SIM_CPU *, INT);
|
176 |
|
|
int use_is_gr_complex (SIM_CPU *, INT);
|
177 |
|
|
|
178 |
|
|
typedef struct
|
179 |
|
|
{
|
180 |
|
|
SI address;
|
181 |
|
|
unsigned reqno;
|
182 |
|
|
} FRV_INSN_FETCH_BUFFER;
|
183 |
|
|
|
184 |
|
|
extern FRV_INSN_FETCH_BUFFER frv_insn_fetch_buffer[];
|
185 |
|
|
|
186 |
|
|
PROFILE_INFO_CPU_CALLBACK_FN frv_profile_info;
|
187 |
|
|
|
188 |
|
|
enum {
|
189 |
|
|
/* Simulator specific profile bits begin here. */
|
190 |
|
|
/* Profile caches. */
|
191 |
|
|
PROFILE_CACHE_IDX = PROFILE_NEXT_IDX,
|
192 |
|
|
/* Profile parallelization. */
|
193 |
|
|
PROFILE_PARALLEL_IDX
|
194 |
|
|
};
|
195 |
|
|
|
196 |
|
|
/* Masks so WITH_PROFILE can have symbolic values.
|
197 |
|
|
The case choice here is on purpose. The lowercase parts are args to
|
198 |
|
|
--with-profile. */
|
199 |
|
|
#define PROFILE_cache (1 << PROFILE_INSN_IDX)
|
200 |
|
|
#define PROFILE_parallel (1 << PROFILE_INSN_IDX)
|
201 |
|
|
|
202 |
|
|
/* Preprocessor macros to simplify tests of WITH_PROFILE. */
|
203 |
|
|
#define WITH_PROFILE_CACHE_P (WITH_PROFILE & PROFILE_insn)
|
204 |
|
|
#define WITH_PROFILE_PARALLEL_P (WITH_PROFILE & PROFILE_insn)
|
205 |
|
|
|
206 |
|
|
#define FRV_COUNT_CYCLES(cpu, condition) \
|
207 |
|
|
((PROFILE_MODEL_P (cpu) && (condition)) || frv_interrupt_state.timer.enabled)
|
208 |
|
|
|
209 |
|
|
/* Modelling support. */
|
210 |
|
|
extern int frv_save_profile_model_p;
|
211 |
|
|
|
212 |
|
|
extern enum FRV_INSN_MODELING {
|
213 |
|
|
FRV_INSN_NO_MODELING = 0,
|
214 |
|
|
FRV_INSN_MODEL_PASS_1,
|
215 |
|
|
FRV_INSN_MODEL_PASS_2,
|
216 |
|
|
FRV_INSN_MODEL_WRITEBACK
|
217 |
|
|
} model_insn;
|
218 |
|
|
|
219 |
|
|
void
|
220 |
|
|
frv_model_advance_cycles (SIM_CPU *, int);
|
221 |
|
|
void
|
222 |
|
|
frv_model_trace_wait_cycles (SIM_CPU *, int, const char *);
|
223 |
|
|
|
224 |
|
|
/* Register types for queued load requests. */
|
225 |
|
|
#define REGTYPE_NONE 0
|
226 |
|
|
#define REGTYPE_FR 1
|
227 |
|
|
#define REGTYPE_ACC 2
|
228 |
|
|
|
229 |
|
|
#endif /* PROFILE_H */
|