1 |
684 |
jeremybenn |
/* Inlining decision heuristics.
|
2 |
|
|
Copyright (C) 2003, 2004, 2007, 2008, 2009, 2010, 2011
|
3 |
|
|
Free Software Foundation, Inc.
|
4 |
|
|
Contributed by Jan Hubicka
|
5 |
|
|
|
6 |
|
|
This file is part of GCC.
|
7 |
|
|
|
8 |
|
|
GCC is free software; you can redistribute it and/or modify it under
|
9 |
|
|
the terms of the GNU General Public License as published by the Free
|
10 |
|
|
Software Foundation; either version 3, or (at your option) any later
|
11 |
|
|
version.
|
12 |
|
|
|
13 |
|
|
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
14 |
|
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
15 |
|
|
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
16 |
|
|
for more details.
|
17 |
|
|
|
18 |
|
|
You should have received a copy of the GNU General Public License
|
19 |
|
|
along with GCC; see the file COPYING3. If not see
|
20 |
|
|
<http://www.gnu.org/licenses/>. */
|
21 |
|
|
|
22 |
|
|
/* Inlining decision heuristics
|
23 |
|
|
|
24 |
|
|
The implementation of inliner is organized as follows:
|
25 |
|
|
|
26 |
|
|
inlining heuristics limits
|
27 |
|
|
|
28 |
|
|
can_inline_edge_p allow to check that particular inlining is allowed
|
29 |
|
|
by the limits specified by user (allowed function growth, growth and so
|
30 |
|
|
on).
|
31 |
|
|
|
32 |
|
|
Functions are inlined when it is obvious the result is profitable (such
|
33 |
|
|
as functions called once or when inlining reduce code size).
|
34 |
|
|
In addition to that we perform inlining of small functions and recursive
|
35 |
|
|
inlining.
|
36 |
|
|
|
37 |
|
|
inlining heuristics
|
38 |
|
|
|
39 |
|
|
The inliner itself is split into two passes:
|
40 |
|
|
|
41 |
|
|
pass_early_inlining
|
42 |
|
|
|
43 |
|
|
Simple local inlining pass inlining callees into current function.
|
44 |
|
|
This pass makes no use of whole unit analysis and thus it can do only
|
45 |
|
|
very simple decisions based on local properties.
|
46 |
|
|
|
47 |
|
|
The strength of the pass is that it is run in topological order
|
48 |
|
|
(reverse postorder) on the callgraph. Functions are converted into SSA
|
49 |
|
|
form just before this pass and optimized subsequently. As a result, the
|
50 |
|
|
callees of the function seen by the early inliner was already optimized
|
51 |
|
|
and results of early inlining adds a lot of optimization opportunities
|
52 |
|
|
for the local optimization.
|
53 |
|
|
|
54 |
|
|
The pass handle the obvious inlining decisions within the compilation
|
55 |
|
|
unit - inlining auto inline functions, inlining for size and
|
56 |
|
|
flattening.
|
57 |
|
|
|
58 |
|
|
main strength of the pass is the ability to eliminate abstraction
|
59 |
|
|
penalty in C++ code (via combination of inlining and early
|
60 |
|
|
optimization) and thus improve quality of analysis done by real IPA
|
61 |
|
|
optimizers.
|
62 |
|
|
|
63 |
|
|
Because of lack of whole unit knowledge, the pass can not really make
|
64 |
|
|
good code size/performance tradeoffs. It however does very simple
|
65 |
|
|
speculative inlining allowing code size to grow by
|
66 |
|
|
EARLY_INLINING_INSNS when callee is leaf function. In this case the
|
67 |
|
|
optimizations performed later are very likely to eliminate the cost.
|
68 |
|
|
|
69 |
|
|
pass_ipa_inline
|
70 |
|
|
|
71 |
|
|
This is the real inliner able to handle inlining with whole program
|
72 |
|
|
knowledge. It performs following steps:
|
73 |
|
|
|
74 |
|
|
1) inlining of small functions. This is implemented by greedy
|
75 |
|
|
algorithm ordering all inlinable cgraph edges by their badness and
|
76 |
|
|
inlining them in this order as long as inline limits allows doing so.
|
77 |
|
|
|
78 |
|
|
This heuristics is not very good on inlining recursive calls. Recursive
|
79 |
|
|
calls can be inlined with results similar to loop unrolling. To do so,
|
80 |
|
|
special purpose recursive inliner is executed on function when
|
81 |
|
|
recursive edge is met as viable candidate.
|
82 |
|
|
|
83 |
|
|
2) Unreachable functions are removed from callgraph. Inlining leads
|
84 |
|
|
to devirtualization and other modification of callgraph so functions
|
85 |
|
|
may become unreachable during the process. Also functions declared as
|
86 |
|
|
extern inline or virtual functions are removed, since after inlining
|
87 |
|
|
we no longer need the offline bodies.
|
88 |
|
|
|
89 |
|
|
3) Functions called once and not exported from the unit are inlined.
|
90 |
|
|
This should almost always lead to reduction of code size by eliminating
|
91 |
|
|
the need for offline copy of the function. */
|
92 |
|
|
|
93 |
|
|
#include "config.h"
|
94 |
|
|
#include "system.h"
|
95 |
|
|
#include "coretypes.h"
|
96 |
|
|
#include "tm.h"
|
97 |
|
|
#include "tree.h"
|
98 |
|
|
#include "tree-inline.h"
|
99 |
|
|
#include "langhooks.h"
|
100 |
|
|
#include "flags.h"
|
101 |
|
|
#include "cgraph.h"
|
102 |
|
|
#include "diagnostic.h"
|
103 |
|
|
#include "gimple-pretty-print.h"
|
104 |
|
|
#include "timevar.h"
|
105 |
|
|
#include "params.h"
|
106 |
|
|
#include "fibheap.h"
|
107 |
|
|
#include "intl.h"
|
108 |
|
|
#include "tree-pass.h"
|
109 |
|
|
#include "coverage.h"
|
110 |
|
|
#include "ggc.h"
|
111 |
|
|
#include "rtl.h"
|
112 |
|
|
#include "tree-flow.h"
|
113 |
|
|
#include "ipa-prop.h"
|
114 |
|
|
#include "except.h"
|
115 |
|
|
#include "target.h"
|
116 |
|
|
#include "ipa-inline.h"
|
117 |
|
|
#include "ipa-utils.h"
|
118 |
|
|
|
119 |
|
|
/* Statistics we collect about inlining algorithm. */
|
120 |
|
|
static int overall_size;
|
121 |
|
|
static gcov_type max_count;
|
122 |
|
|
|
123 |
|
|
/* Return false when inlining edge E would lead to violating
|
124 |
|
|
limits on function unit growth or stack usage growth.
|
125 |
|
|
|
126 |
|
|
The relative function body growth limit is present generally
|
127 |
|
|
to avoid problems with non-linear behavior of the compiler.
|
128 |
|
|
To allow inlining huge functions into tiny wrapper, the limit
|
129 |
|
|
is always based on the bigger of the two functions considered.
|
130 |
|
|
|
131 |
|
|
For stack growth limits we always base the growth in stack usage
|
132 |
|
|
of the callers. We want to prevent applications from segfaulting
|
133 |
|
|
on stack overflow when functions with huge stack frames gets
|
134 |
|
|
inlined. */
|
135 |
|
|
|
136 |
|
|
static bool
|
137 |
|
|
caller_growth_limits (struct cgraph_edge *e)
|
138 |
|
|
{
|
139 |
|
|
struct cgraph_node *to = e->caller;
|
140 |
|
|
struct cgraph_node *what = cgraph_function_or_thunk_node (e->callee, NULL);
|
141 |
|
|
int newsize;
|
142 |
|
|
int limit = 0;
|
143 |
|
|
HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
|
144 |
|
|
struct inline_summary *info, *what_info, *outer_info = inline_summary (to);
|
145 |
|
|
|
146 |
|
|
/* Look for function e->caller is inlined to. While doing
|
147 |
|
|
so work out the largest function body on the way. As
|
148 |
|
|
described above, we want to base our function growth
|
149 |
|
|
limits based on that. Not on the self size of the
|
150 |
|
|
outer function, not on the self size of inline code
|
151 |
|
|
we immediately inline to. This is the most relaxed
|
152 |
|
|
interpretation of the rule "do not grow large functions
|
153 |
|
|
too much in order to prevent compiler from exploding". */
|
154 |
|
|
while (true)
|
155 |
|
|
{
|
156 |
|
|
info = inline_summary (to);
|
157 |
|
|
if (limit < info->self_size)
|
158 |
|
|
limit = info->self_size;
|
159 |
|
|
if (stack_size_limit < info->estimated_self_stack_size)
|
160 |
|
|
stack_size_limit = info->estimated_self_stack_size;
|
161 |
|
|
if (to->global.inlined_to)
|
162 |
|
|
to = to->callers->caller;
|
163 |
|
|
else
|
164 |
|
|
break;
|
165 |
|
|
}
|
166 |
|
|
|
167 |
|
|
what_info = inline_summary (what);
|
168 |
|
|
|
169 |
|
|
if (limit < what_info->self_size)
|
170 |
|
|
limit = what_info->self_size;
|
171 |
|
|
|
172 |
|
|
limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
|
173 |
|
|
|
174 |
|
|
/* Check the size after inlining against the function limits. But allow
|
175 |
|
|
the function to shrink if it went over the limits by forced inlining. */
|
176 |
|
|
newsize = estimate_size_after_inlining (to, e);
|
177 |
|
|
if (newsize >= info->size
|
178 |
|
|
&& newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
|
179 |
|
|
&& newsize > limit)
|
180 |
|
|
{
|
181 |
|
|
e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
|
182 |
|
|
return false;
|
183 |
|
|
}
|
184 |
|
|
|
185 |
|
|
if (!what_info->estimated_stack_size)
|
186 |
|
|
return true;
|
187 |
|
|
|
188 |
|
|
/* FIXME: Stack size limit often prevents inlining in Fortran programs
|
189 |
|
|
due to large i/o datastructures used by the Fortran front-end.
|
190 |
|
|
We ought to ignore this limit when we know that the edge is executed
|
191 |
|
|
on every invocation of the caller (i.e. its call statement dominates
|
192 |
|
|
exit block). We do not track this information, yet. */
|
193 |
|
|
stack_size_limit += ((gcov_type)stack_size_limit
|
194 |
|
|
* PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100);
|
195 |
|
|
|
196 |
|
|
inlined_stack = (outer_info->stack_frame_offset
|
197 |
|
|
+ outer_info->estimated_self_stack_size
|
198 |
|
|
+ what_info->estimated_stack_size);
|
199 |
|
|
/* Check new stack consumption with stack consumption at the place
|
200 |
|
|
stack is used. */
|
201 |
|
|
if (inlined_stack > stack_size_limit
|
202 |
|
|
/* If function already has large stack usage from sibling
|
203 |
|
|
inline call, we can inline, too.
|
204 |
|
|
This bit overoptimistically assume that we are good at stack
|
205 |
|
|
packing. */
|
206 |
|
|
&& inlined_stack > info->estimated_stack_size
|
207 |
|
|
&& inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
|
208 |
|
|
{
|
209 |
|
|
e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
|
210 |
|
|
return false;
|
211 |
|
|
}
|
212 |
|
|
return true;
|
213 |
|
|
}
|
214 |
|
|
|
215 |
|
|
/* Dump info about why inlining has failed. */
|
216 |
|
|
|
217 |
|
|
static void
|
218 |
|
|
report_inline_failed_reason (struct cgraph_edge *e)
|
219 |
|
|
{
|
220 |
|
|
if (dump_file)
|
221 |
|
|
{
|
222 |
|
|
fprintf (dump_file, " not inlinable: %s/%i -> %s/%i, %s\n",
|
223 |
|
|
cgraph_node_name (e->caller), e->caller->uid,
|
224 |
|
|
cgraph_node_name (e->callee), e->callee->uid,
|
225 |
|
|
cgraph_inline_failed_string (e->inline_failed));
|
226 |
|
|
}
|
227 |
|
|
}
|
228 |
|
|
|
229 |
|
|
/* Decide if we can inline the edge and possibly update
|
230 |
|
|
inline_failed reason.
|
231 |
|
|
We check whether inlining is possible at all and whether
|
232 |
|
|
caller growth limits allow doing so.
|
233 |
|
|
|
234 |
|
|
if REPORT is true, output reason to the dump file. */
|
235 |
|
|
|
236 |
|
|
static bool
|
237 |
|
|
can_inline_edge_p (struct cgraph_edge *e, bool report)
|
238 |
|
|
{
|
239 |
|
|
bool inlinable = true;
|
240 |
|
|
enum availability avail;
|
241 |
|
|
struct cgraph_node *callee
|
242 |
|
|
= cgraph_function_or_thunk_node (e->callee, &avail);
|
243 |
|
|
tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (e->caller->decl);
|
244 |
|
|
tree callee_tree
|
245 |
|
|
= callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
|
246 |
|
|
struct function *caller_cfun = DECL_STRUCT_FUNCTION (e->caller->decl);
|
247 |
|
|
struct function *callee_cfun
|
248 |
|
|
= callee ? DECL_STRUCT_FUNCTION (callee->decl) : NULL;
|
249 |
|
|
|
250 |
|
|
if (!caller_cfun && e->caller->clone_of)
|
251 |
|
|
caller_cfun = DECL_STRUCT_FUNCTION (e->caller->clone_of->decl);
|
252 |
|
|
|
253 |
|
|
if (!callee_cfun && callee && callee->clone_of)
|
254 |
|
|
callee_cfun = DECL_STRUCT_FUNCTION (callee->clone_of->decl);
|
255 |
|
|
|
256 |
|
|
gcc_assert (e->inline_failed);
|
257 |
|
|
|
258 |
|
|
if (!callee || !callee->analyzed)
|
259 |
|
|
{
|
260 |
|
|
e->inline_failed = CIF_BODY_NOT_AVAILABLE;
|
261 |
|
|
inlinable = false;
|
262 |
|
|
}
|
263 |
|
|
else if (!inline_summary (callee)->inlinable)
|
264 |
|
|
{
|
265 |
|
|
e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
|
266 |
|
|
inlinable = false;
|
267 |
|
|
}
|
268 |
|
|
else if (avail <= AVAIL_OVERWRITABLE)
|
269 |
|
|
{
|
270 |
|
|
e->inline_failed = CIF_OVERWRITABLE;
|
271 |
|
|
return false;
|
272 |
|
|
}
|
273 |
|
|
else if (e->call_stmt_cannot_inline_p)
|
274 |
|
|
{
|
275 |
|
|
e->inline_failed = CIF_MISMATCHED_ARGUMENTS;
|
276 |
|
|
inlinable = false;
|
277 |
|
|
}
|
278 |
|
|
/* Don't inline if the functions have different EH personalities. */
|
279 |
|
|
else if (DECL_FUNCTION_PERSONALITY (e->caller->decl)
|
280 |
|
|
&& DECL_FUNCTION_PERSONALITY (callee->decl)
|
281 |
|
|
&& (DECL_FUNCTION_PERSONALITY (e->caller->decl)
|
282 |
|
|
!= DECL_FUNCTION_PERSONALITY (callee->decl)))
|
283 |
|
|
{
|
284 |
|
|
e->inline_failed = CIF_EH_PERSONALITY;
|
285 |
|
|
inlinable = false;
|
286 |
|
|
}
|
287 |
|
|
/* TM pure functions should not be inlined into non-TM_pure
|
288 |
|
|
functions. */
|
289 |
|
|
else if (is_tm_pure (callee->decl)
|
290 |
|
|
&& !is_tm_pure (e->caller->decl))
|
291 |
|
|
{
|
292 |
|
|
e->inline_failed = CIF_UNSPECIFIED;
|
293 |
|
|
inlinable = false;
|
294 |
|
|
}
|
295 |
|
|
/* Don't inline if the callee can throw non-call exceptions but the
|
296 |
|
|
caller cannot.
|
297 |
|
|
FIXME: this is obviously wrong for LTO where STRUCT_FUNCTION is missing.
|
298 |
|
|
Move the flag into cgraph node or mirror it in the inline summary. */
|
299 |
|
|
else if (callee_cfun && callee_cfun->can_throw_non_call_exceptions
|
300 |
|
|
&& !(caller_cfun && caller_cfun->can_throw_non_call_exceptions))
|
301 |
|
|
{
|
302 |
|
|
e->inline_failed = CIF_NON_CALL_EXCEPTIONS;
|
303 |
|
|
inlinable = false;
|
304 |
|
|
}
|
305 |
|
|
/* Check compatibility of target optimization options. */
|
306 |
|
|
else if (!targetm.target_option.can_inline_p (e->caller->decl,
|
307 |
|
|
callee->decl))
|
308 |
|
|
{
|
309 |
|
|
e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
|
310 |
|
|
inlinable = false;
|
311 |
|
|
}
|
312 |
|
|
/* Check if caller growth allows the inlining. */
|
313 |
|
|
else if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
|
314 |
|
|
&& !lookup_attribute ("flatten",
|
315 |
|
|
DECL_ATTRIBUTES
|
316 |
|
|
(e->caller->global.inlined_to
|
317 |
|
|
? e->caller->global.inlined_to->decl
|
318 |
|
|
: e->caller->decl))
|
319 |
|
|
&& !caller_growth_limits (e))
|
320 |
|
|
inlinable = false;
|
321 |
|
|
/* Don't inline a function with a higher optimization level than the
|
322 |
|
|
caller. FIXME: this is really just tip of iceberg of handling
|
323 |
|
|
optimization attribute. */
|
324 |
|
|
else if (caller_tree != callee_tree)
|
325 |
|
|
{
|
326 |
|
|
struct cl_optimization *caller_opt
|
327 |
|
|
= TREE_OPTIMIZATION ((caller_tree)
|
328 |
|
|
? caller_tree
|
329 |
|
|
: optimization_default_node);
|
330 |
|
|
|
331 |
|
|
struct cl_optimization *callee_opt
|
332 |
|
|
= TREE_OPTIMIZATION ((callee_tree)
|
333 |
|
|
? callee_tree
|
334 |
|
|
: optimization_default_node);
|
335 |
|
|
|
336 |
|
|
if (((caller_opt->x_optimize > callee_opt->x_optimize)
|
337 |
|
|
|| (caller_opt->x_optimize_size != callee_opt->x_optimize_size))
|
338 |
|
|
/* gcc.dg/pr43564.c. Look at forced inline even in -O0. */
|
339 |
|
|
&& !DECL_DISREGARD_INLINE_LIMITS (e->callee->decl))
|
340 |
|
|
{
|
341 |
|
|
e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
|
342 |
|
|
inlinable = false;
|
343 |
|
|
}
|
344 |
|
|
}
|
345 |
|
|
|
346 |
|
|
if (!inlinable && report)
|
347 |
|
|
report_inline_failed_reason (e);
|
348 |
|
|
return inlinable;
|
349 |
|
|
}
|
350 |
|
|
|
351 |
|
|
|
352 |
|
|
/* Return true if the edge E is inlinable during early inlining. */
|
353 |
|
|
|
354 |
|
|
static bool
|
355 |
|
|
can_early_inline_edge_p (struct cgraph_edge *e)
|
356 |
|
|
{
|
357 |
|
|
struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee,
|
358 |
|
|
NULL);
|
359 |
|
|
/* Early inliner might get called at WPA stage when IPA pass adds new
|
360 |
|
|
function. In this case we can not really do any of early inlining
|
361 |
|
|
because function bodies are missing. */
|
362 |
|
|
if (!gimple_has_body_p (callee->decl))
|
363 |
|
|
{
|
364 |
|
|
e->inline_failed = CIF_BODY_NOT_AVAILABLE;
|
365 |
|
|
return false;
|
366 |
|
|
}
|
367 |
|
|
/* In early inliner some of callees may not be in SSA form yet
|
368 |
|
|
(i.e. the callgraph is cyclic and we did not process
|
369 |
|
|
the callee by early inliner, yet). We don't have CIF code for this
|
370 |
|
|
case; later we will re-do the decision in the real inliner. */
|
371 |
|
|
if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
|
372 |
|
|
|| !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
|
373 |
|
|
{
|
374 |
|
|
if (dump_file)
|
375 |
|
|
fprintf (dump_file, " edge not inlinable: not in SSA form\n");
|
376 |
|
|
return false;
|
377 |
|
|
}
|
378 |
|
|
if (!can_inline_edge_p (e, true))
|
379 |
|
|
return false;
|
380 |
|
|
return true;
|
381 |
|
|
}
|
382 |
|
|
|
383 |
|
|
|
384 |
|
|
/* Return true when N is leaf function. Accept cheap builtins
|
385 |
|
|
in leaf functions. */
|
386 |
|
|
|
387 |
|
|
static bool
|
388 |
|
|
leaf_node_p (struct cgraph_node *n)
|
389 |
|
|
{
|
390 |
|
|
struct cgraph_edge *e;
|
391 |
|
|
for (e = n->callees; e; e = e->next_callee)
|
392 |
|
|
if (!is_inexpensive_builtin (e->callee->decl))
|
393 |
|
|
return false;
|
394 |
|
|
return true;
|
395 |
|
|
}
|
396 |
|
|
|
397 |
|
|
|
398 |
|
|
/* Return true if we are interested in inlining small function. */
|
399 |
|
|
|
400 |
|
|
static bool
|
401 |
|
|
want_early_inline_function_p (struct cgraph_edge *e)
|
402 |
|
|
{
|
403 |
|
|
bool want_inline = true;
|
404 |
|
|
struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
|
405 |
|
|
|
406 |
|
|
if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
|
407 |
|
|
;
|
408 |
|
|
else if (!DECL_DECLARED_INLINE_P (callee->decl)
|
409 |
|
|
&& !flag_inline_small_functions)
|
410 |
|
|
{
|
411 |
|
|
e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
|
412 |
|
|
report_inline_failed_reason (e);
|
413 |
|
|
want_inline = false;
|
414 |
|
|
}
|
415 |
|
|
else
|
416 |
|
|
{
|
417 |
|
|
int growth = estimate_edge_growth (e);
|
418 |
|
|
if (growth <= 0)
|
419 |
|
|
;
|
420 |
|
|
else if (!cgraph_maybe_hot_edge_p (e)
|
421 |
|
|
&& growth > 0)
|
422 |
|
|
{
|
423 |
|
|
if (dump_file)
|
424 |
|
|
fprintf (dump_file, " will not early inline: %s/%i->%s/%i, "
|
425 |
|
|
"call is cold and code would grow by %i\n",
|
426 |
|
|
cgraph_node_name (e->caller), e->caller->uid,
|
427 |
|
|
cgraph_node_name (callee), callee->uid,
|
428 |
|
|
growth);
|
429 |
|
|
want_inline = false;
|
430 |
|
|
}
|
431 |
|
|
else if (!leaf_node_p (callee)
|
432 |
|
|
&& growth > 0)
|
433 |
|
|
{
|
434 |
|
|
if (dump_file)
|
435 |
|
|
fprintf (dump_file, " will not early inline: %s/%i->%s/%i, "
|
436 |
|
|
"callee is not leaf and code would grow by %i\n",
|
437 |
|
|
cgraph_node_name (e->caller), e->caller->uid,
|
438 |
|
|
cgraph_node_name (callee), callee->uid,
|
439 |
|
|
growth);
|
440 |
|
|
want_inline = false;
|
441 |
|
|
}
|
442 |
|
|
else if (growth > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
|
443 |
|
|
{
|
444 |
|
|
if (dump_file)
|
445 |
|
|
fprintf (dump_file, " will not early inline: %s/%i->%s/%i, "
|
446 |
|
|
"growth %i exceeds --param early-inlining-insns\n",
|
447 |
|
|
cgraph_node_name (e->caller), e->caller->uid,
|
448 |
|
|
cgraph_node_name (callee), callee->uid,
|
449 |
|
|
growth);
|
450 |
|
|
want_inline = false;
|
451 |
|
|
}
|
452 |
|
|
}
|
453 |
|
|
return want_inline;
|
454 |
|
|
}
|
455 |
|
|
|
456 |
|
|
/* Return true if we are interested in inlining small function.
|
457 |
|
|
When REPORT is true, report reason to dump file. */
|
458 |
|
|
|
459 |
|
|
static bool
|
460 |
|
|
want_inline_small_function_p (struct cgraph_edge *e, bool report)
|
461 |
|
|
{
|
462 |
|
|
bool want_inline = true;
|
463 |
|
|
struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
|
464 |
|
|
|
465 |
|
|
if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
|
466 |
|
|
;
|
467 |
|
|
else if (!DECL_DECLARED_INLINE_P (callee->decl)
|
468 |
|
|
&& !flag_inline_small_functions)
|
469 |
|
|
{
|
470 |
|
|
e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
|
471 |
|
|
want_inline = false;
|
472 |
|
|
}
|
473 |
|
|
else
|
474 |
|
|
{
|
475 |
|
|
int growth = estimate_edge_growth (e);
|
476 |
|
|
|
477 |
|
|
if (growth <= 0)
|
478 |
|
|
;
|
479 |
|
|
else if (DECL_DECLARED_INLINE_P (callee->decl)
|
480 |
|
|
&& growth >= MAX_INLINE_INSNS_SINGLE)
|
481 |
|
|
{
|
482 |
|
|
e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
|
483 |
|
|
want_inline = false;
|
484 |
|
|
}
|
485 |
|
|
/* Before giving up based on fact that caller size will grow, allow
|
486 |
|
|
functions that are called few times and eliminating the offline
|
487 |
|
|
copy will lead to overall code size reduction.
|
488 |
|
|
Not all of these will be handled by subsequent inlining of functions
|
489 |
|
|
called once: in particular weak functions are not handled or funcitons
|
490 |
|
|
that inline to multiple calls but a lot of bodies is optimized out.
|
491 |
|
|
Finally we want to inline earlier to allow inlining of callbacks.
|
492 |
|
|
|
493 |
|
|
This is slightly wrong on aggressive side: it is entirely possible
|
494 |
|
|
that function is called many times with a context where inlining
|
495 |
|
|
reduces code size and few times with a context where inlining increase
|
496 |
|
|
code size. Resoluting growth estimate will be negative even if it
|
497 |
|
|
would make more sense to keep offline copy and do not inline into the
|
498 |
|
|
call sites that makes the code size grow.
|
499 |
|
|
|
500 |
|
|
When badness orders the calls in a way that code reducing calls come
|
501 |
|
|
first, this situation is not a problem at all: after inlining all
|
502 |
|
|
"good" calls, we will realize that keeping the function around is
|
503 |
|
|
better. */
|
504 |
|
|
else if (growth <= MAX_INLINE_INSNS_SINGLE
|
505 |
|
|
/* Unlike for functions called once, we play unsafe with
|
506 |
|
|
COMDATs. We can allow that since we know functions
|
507 |
|
|
in consideration are small (and thus risk is small) and
|
508 |
|
|
moreover grow estimates already accounts that COMDAT
|
509 |
|
|
functions may or may not disappear when eliminated from
|
510 |
|
|
current unit. With good probability making aggressive
|
511 |
|
|
choice in all units is going to make overall program
|
512 |
|
|
smaller.
|
513 |
|
|
|
514 |
|
|
Consequently we ask cgraph_can_remove_if_no_direct_calls_p
|
515 |
|
|
instead of
|
516 |
|
|
cgraph_will_be_removed_from_program_if_no_direct_calls */
|
517 |
|
|
&& !DECL_EXTERNAL (callee->decl)
|
518 |
|
|
&& cgraph_can_remove_if_no_direct_calls_p (callee)
|
519 |
|
|
&& estimate_growth (callee) <= 0)
|
520 |
|
|
;
|
521 |
|
|
else if (!DECL_DECLARED_INLINE_P (callee->decl)
|
522 |
|
|
&& !flag_inline_functions)
|
523 |
|
|
{
|
524 |
|
|
e->inline_failed = CIF_NOT_DECLARED_INLINED;
|
525 |
|
|
want_inline = false;
|
526 |
|
|
}
|
527 |
|
|
else if (!DECL_DECLARED_INLINE_P (callee->decl)
|
528 |
|
|
&& growth >= MAX_INLINE_INSNS_AUTO)
|
529 |
|
|
{
|
530 |
|
|
e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
|
531 |
|
|
want_inline = false;
|
532 |
|
|
}
|
533 |
|
|
/* If call is cold, do not inline when function body would grow. */
|
534 |
|
|
else if (!cgraph_maybe_hot_edge_p (e))
|
535 |
|
|
{
|
536 |
|
|
e->inline_failed = CIF_UNLIKELY_CALL;
|
537 |
|
|
want_inline = false;
|
538 |
|
|
}
|
539 |
|
|
}
|
540 |
|
|
if (!want_inline && report)
|
541 |
|
|
report_inline_failed_reason (e);
|
542 |
|
|
return want_inline;
|
543 |
|
|
}
|
544 |
|
|
|
545 |
|
|
/* EDGE is self recursive edge.
|
546 |
|
|
We hand two cases - when function A is inlining into itself
|
547 |
|
|
or when function A is being inlined into another inliner copy of function
|
548 |
|
|
A within function B.
|
549 |
|
|
|
550 |
|
|
In first case OUTER_NODE points to the toplevel copy of A, while
|
551 |
|
|
in the second case OUTER_NODE points to the outermost copy of A in B.
|
552 |
|
|
|
553 |
|
|
In both cases we want to be extra selective since
|
554 |
|
|
inlining the call will just introduce new recursive calls to appear. */
|
555 |
|
|
|
556 |
|
|
static bool
|
557 |
|
|
want_inline_self_recursive_call_p (struct cgraph_edge *edge,
|
558 |
|
|
struct cgraph_node *outer_node,
|
559 |
|
|
bool peeling,
|
560 |
|
|
int depth)
|
561 |
|
|
{
|
562 |
|
|
char const *reason = NULL;
|
563 |
|
|
bool want_inline = true;
|
564 |
|
|
int caller_freq = CGRAPH_FREQ_BASE;
|
565 |
|
|
int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
|
566 |
|
|
|
567 |
|
|
if (DECL_DECLARED_INLINE_P (edge->caller->decl))
|
568 |
|
|
max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
|
569 |
|
|
|
570 |
|
|
if (!cgraph_maybe_hot_edge_p (edge))
|
571 |
|
|
{
|
572 |
|
|
reason = "recursive call is cold";
|
573 |
|
|
want_inline = false;
|
574 |
|
|
}
|
575 |
|
|
else if (max_count && !outer_node->count)
|
576 |
|
|
{
|
577 |
|
|
reason = "not executed in profile";
|
578 |
|
|
want_inline = false;
|
579 |
|
|
}
|
580 |
|
|
else if (depth > max_depth)
|
581 |
|
|
{
|
582 |
|
|
reason = "--param max-inline-recursive-depth exceeded.";
|
583 |
|
|
want_inline = false;
|
584 |
|
|
}
|
585 |
|
|
|
586 |
|
|
if (outer_node->global.inlined_to)
|
587 |
|
|
caller_freq = outer_node->callers->frequency;
|
588 |
|
|
|
589 |
|
|
if (!want_inline)
|
590 |
|
|
;
|
591 |
|
|
/* Inlining of self recursive function into copy of itself within other function
|
592 |
|
|
is transformation similar to loop peeling.
|
593 |
|
|
|
594 |
|
|
Peeling is profitable if we can inline enough copies to make probability
|
595 |
|
|
of actual call to the self recursive function very small. Be sure that
|
596 |
|
|
the probability of recursion is small.
|
597 |
|
|
|
598 |
|
|
We ensure that the frequency of recursing is at most 1 - (1/max_depth).
|
599 |
|
|
This way the expected number of recision is at most max_depth. */
|
600 |
|
|
else if (peeling)
|
601 |
|
|
{
|
602 |
|
|
int max_prob = CGRAPH_FREQ_BASE - ((CGRAPH_FREQ_BASE + max_depth - 1)
|
603 |
|
|
/ max_depth);
|
604 |
|
|
int i;
|
605 |
|
|
for (i = 1; i < depth; i++)
|
606 |
|
|
max_prob = max_prob * max_prob / CGRAPH_FREQ_BASE;
|
607 |
|
|
if (max_count
|
608 |
|
|
&& (edge->count * CGRAPH_FREQ_BASE / outer_node->count
|
609 |
|
|
>= max_prob))
|
610 |
|
|
{
|
611 |
|
|
reason = "profile of recursive call is too large";
|
612 |
|
|
want_inline = false;
|
613 |
|
|
}
|
614 |
|
|
if (!max_count
|
615 |
|
|
&& (edge->frequency * CGRAPH_FREQ_BASE / caller_freq
|
616 |
|
|
>= max_prob))
|
617 |
|
|
{
|
618 |
|
|
reason = "frequency of recursive call is too large";
|
619 |
|
|
want_inline = false;
|
620 |
|
|
}
|
621 |
|
|
}
|
622 |
|
|
/* Recursive inlining, i.e. equivalent of unrolling, is profitable if recursion
|
623 |
|
|
depth is large. We reduce function call overhead and increase chances that
|
624 |
|
|
things fit in hardware return predictor.
|
625 |
|
|
|
626 |
|
|
Recursive inlining might however increase cost of stack frame setup
|
627 |
|
|
actually slowing down functions whose recursion tree is wide rather than
|
628 |
|
|
deep.
|
629 |
|
|
|
630 |
|
|
Deciding reliably on when to do recursive inlining without profile feedback
|
631 |
|
|
is tricky. For now we disable recursive inlining when probability of self
|
632 |
|
|
recursion is low.
|
633 |
|
|
|
634 |
|
|
Recursive inlining of self recursive call within loop also results in large loop
|
635 |
|
|
depths that generally optimize badly. We may want to throttle down inlining
|
636 |
|
|
in those cases. In particular this seems to happen in one of libstdc++ rb tree
|
637 |
|
|
methods. */
|
638 |
|
|
else
|
639 |
|
|
{
|
640 |
|
|
if (max_count
|
641 |
|
|
&& (edge->count * 100 / outer_node->count
|
642 |
|
|
<= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
|
643 |
|
|
{
|
644 |
|
|
reason = "profile of recursive call is too small";
|
645 |
|
|
want_inline = false;
|
646 |
|
|
}
|
647 |
|
|
else if (!max_count
|
648 |
|
|
&& (edge->frequency * 100 / caller_freq
|
649 |
|
|
<= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
|
650 |
|
|
{
|
651 |
|
|
reason = "frequency of recursive call is too small";
|
652 |
|
|
want_inline = false;
|
653 |
|
|
}
|
654 |
|
|
}
|
655 |
|
|
if (!want_inline && dump_file)
|
656 |
|
|
fprintf (dump_file, " not inlining recursively: %s\n", reason);
|
657 |
|
|
return want_inline;
|
658 |
|
|
}
|
659 |
|
|
|
660 |
|
|
/* Return true when NODE has caller other than EDGE.
|
661 |
|
|
Worker for cgraph_for_node_and_aliases. */
|
662 |
|
|
|
663 |
|
|
static bool
|
664 |
|
|
check_caller_edge (struct cgraph_node *node, void *edge)
|
665 |
|
|
{
|
666 |
|
|
return (node->callers
|
667 |
|
|
&& node->callers != edge);
|
668 |
|
|
}
|
669 |
|
|
|
670 |
|
|
|
671 |
|
|
/* Decide if NODE is called once inlining it would eliminate need
|
672 |
|
|
for the offline copy of function. */
|
673 |
|
|
|
674 |
|
|
static bool
|
675 |
|
|
want_inline_function_called_once_p (struct cgraph_node *node)
|
676 |
|
|
{
|
677 |
|
|
struct cgraph_node *function = cgraph_function_or_thunk_node (node, NULL);
|
678 |
|
|
/* Already inlined? */
|
679 |
|
|
if (function->global.inlined_to)
|
680 |
|
|
return false;
|
681 |
|
|
/* Zero or more then one callers? */
|
682 |
|
|
if (!node->callers
|
683 |
|
|
|| node->callers->next_caller)
|
684 |
|
|
return false;
|
685 |
|
|
/* Maybe other aliases has more direct calls. */
|
686 |
|
|
if (cgraph_for_node_and_aliases (node, check_caller_edge, node->callers, true))
|
687 |
|
|
return false;
|
688 |
|
|
/* Recursive call makes no sense to inline. */
|
689 |
|
|
if (cgraph_edge_recursive_p (node->callers))
|
690 |
|
|
return false;
|
691 |
|
|
/* External functions are not really in the unit, so inlining
|
692 |
|
|
them when called once would just increase the program size. */
|
693 |
|
|
if (DECL_EXTERNAL (function->decl))
|
694 |
|
|
return false;
|
695 |
|
|
/* Offline body must be optimized out. */
|
696 |
|
|
if (!cgraph_will_be_removed_from_program_if_no_direct_calls (function))
|
697 |
|
|
return false;
|
698 |
|
|
if (!can_inline_edge_p (node->callers, true))
|
699 |
|
|
return false;
|
700 |
|
|
return true;
|
701 |
|
|
}
|
702 |
|
|
|
703 |
|
|
|
704 |
|
|
/* Return relative time improvement for inlining EDGE in range
|
705 |
|
|
1...2^9. */
|
706 |
|
|
|
707 |
|
|
static inline int
|
708 |
|
|
relative_time_benefit (struct inline_summary *callee_info,
|
709 |
|
|
struct cgraph_edge *edge,
|
710 |
|
|
int time_growth)
|
711 |
|
|
{
|
712 |
|
|
int relbenefit;
|
713 |
|
|
gcov_type uninlined_call_time;
|
714 |
|
|
|
715 |
|
|
uninlined_call_time =
|
716 |
|
|
((gcov_type)
|
717 |
|
|
(callee_info->time
|
718 |
|
|
+ inline_edge_summary (edge)->call_stmt_time) * edge->frequency
|
719 |
|
|
+ CGRAPH_FREQ_BASE / 2) / CGRAPH_FREQ_BASE;
|
720 |
|
|
/* Compute relative time benefit, i.e. how much the call becomes faster.
|
721 |
|
|
??? perhaps computing how much the caller+calle together become faster
|
722 |
|
|
would lead to more realistic results. */
|
723 |
|
|
if (!uninlined_call_time)
|
724 |
|
|
uninlined_call_time = 1;
|
725 |
|
|
relbenefit =
|
726 |
|
|
(uninlined_call_time - time_growth) * 256 / (uninlined_call_time);
|
727 |
|
|
relbenefit = MIN (relbenefit, 512);
|
728 |
|
|
relbenefit = MAX (relbenefit, 1);
|
729 |
|
|
return relbenefit;
|
730 |
|
|
}
|
731 |
|
|
|
732 |
|
|
|
733 |
|
|
/* A cost model driving the inlining heuristics in a way so the edges with
|
734 |
|
|
smallest badness are inlined first. After each inlining is performed
|
735 |
|
|
the costs of all caller edges of nodes affected are recomputed so the
|
736 |
|
|
metrics may accurately depend on values such as number of inlinable callers
|
737 |
|
|
of the function or function body size. */
|
738 |
|
|
|
739 |
|
|
static int
|
740 |
|
|
edge_badness (struct cgraph_edge *edge, bool dump)
|
741 |
|
|
{
|
742 |
|
|
gcov_type badness;
|
743 |
|
|
int growth, time_growth;
|
744 |
|
|
struct cgraph_node *callee = cgraph_function_or_thunk_node (edge->callee,
|
745 |
|
|
NULL);
|
746 |
|
|
struct inline_summary *callee_info = inline_summary (callee);
|
747 |
|
|
|
748 |
|
|
if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
|
749 |
|
|
return INT_MIN;
|
750 |
|
|
|
751 |
|
|
growth = estimate_edge_growth (edge);
|
752 |
|
|
time_growth = estimate_edge_time (edge);
|
753 |
|
|
|
754 |
|
|
if (dump)
|
755 |
|
|
{
|
756 |
|
|
fprintf (dump_file, " Badness calculation for %s -> %s\n",
|
757 |
|
|
cgraph_node_name (edge->caller),
|
758 |
|
|
cgraph_node_name (callee));
|
759 |
|
|
fprintf (dump_file, " size growth %i, time growth %i\n",
|
760 |
|
|
growth,
|
761 |
|
|
time_growth);
|
762 |
|
|
}
|
763 |
|
|
|
764 |
|
|
/* Always prefer inlining saving code size. */
|
765 |
|
|
if (growth <= 0)
|
766 |
|
|
{
|
767 |
|
|
badness = INT_MIN / 2 + growth;
|
768 |
|
|
if (dump)
|
769 |
|
|
fprintf (dump_file, " %i: Growth %i <= 0\n", (int) badness,
|
770 |
|
|
growth);
|
771 |
|
|
}
|
772 |
|
|
|
773 |
|
|
/* When profiling is available, compute badness as:
|
774 |
|
|
|
775 |
|
|
relative_edge_count * relative_time_benefit
|
776 |
|
|
goodness = -------------------------------------------
|
777 |
|
|
edge_growth
|
778 |
|
|
badness = -goodness
|
779 |
|
|
|
780 |
|
|
The fraction is upside down, becuase on edge counts and time beneits
|
781 |
|
|
the bounds are known. Edge growth is essentially unlimited. */
|
782 |
|
|
|
783 |
|
|
else if (max_count)
|
784 |
|
|
{
|
785 |
|
|
int relbenefit = relative_time_benefit (callee_info, edge, time_growth);
|
786 |
|
|
badness =
|
787 |
|
|
((int)
|
788 |
|
|
((double) edge->count * INT_MIN / 2 / max_count / 512) *
|
789 |
|
|
relative_time_benefit (callee_info, edge, time_growth)) / growth;
|
790 |
|
|
|
791 |
|
|
/* Be sure that insanity of the profile won't lead to increasing counts
|
792 |
|
|
in the scalling and thus to overflow in the computation above. */
|
793 |
|
|
gcc_assert (max_count >= edge->count);
|
794 |
|
|
if (dump)
|
795 |
|
|
{
|
796 |
|
|
fprintf (dump_file,
|
797 |
|
|
" %i (relative %f): profile info. Relative count %f"
|
798 |
|
|
" * Relative benefit %f\n",
|
799 |
|
|
(int) badness, (double) badness / INT_MIN,
|
800 |
|
|
(double) edge->count / max_count,
|
801 |
|
|
relbenefit * 100 / 256.0);
|
802 |
|
|
}
|
803 |
|
|
}
|
804 |
|
|
|
805 |
|
|
/* When function local profile is available. Compute badness as:
|
806 |
|
|
|
807 |
|
|
|
808 |
|
|
growth_of_callee
|
809 |
|
|
badness = -------------------------------------- + growth_for-all
|
810 |
|
|
relative_time_benefit * edge_frequency
|
811 |
|
|
|
812 |
|
|
*/
|
813 |
|
|
else if (flag_guess_branch_prob)
|
814 |
|
|
{
|
815 |
|
|
int div = edge->frequency * (1<<10) / CGRAPH_FREQ_MAX;
|
816 |
|
|
|
817 |
|
|
div = MAX (div, 1);
|
818 |
|
|
gcc_checking_assert (edge->frequency <= CGRAPH_FREQ_MAX);
|
819 |
|
|
div *= relative_time_benefit (callee_info, edge, time_growth);
|
820 |
|
|
|
821 |
|
|
/* frequency is normalized in range 1...2^10.
|
822 |
|
|
relbenefit in range 1...2^9
|
823 |
|
|
DIV should be in range 1....2^19. */
|
824 |
|
|
gcc_checking_assert (div >= 1 && div <= (1<<19));
|
825 |
|
|
|
826 |
|
|
/* Result must be integer in range 0...INT_MAX.
|
827 |
|
|
Set the base of fixed point calculation so we don't lose much of
|
828 |
|
|
precision for small bandesses (those are interesting) yet we don't
|
829 |
|
|
overflow for growths that are still in interesting range.
|
830 |
|
|
|
831 |
|
|
Fixed point arithmetic with point at 8th bit. */
|
832 |
|
|
badness = ((gcov_type)growth) * (1<<(19+8));
|
833 |
|
|
badness = (badness + div / 2) / div;
|
834 |
|
|
|
835 |
|
|
/* Overall growth of inlining all calls of function matters: we want to
|
836 |
|
|
inline so offline copy of function is no longer needed.
|
837 |
|
|
|
838 |
|
|
Additionally functions that can be fully inlined without much of
|
839 |
|
|
effort are better inline candidates than functions that can be fully
|
840 |
|
|
inlined only after noticeable overall unit growths. The latter
|
841 |
|
|
are better in a sense compressing of code size by factoring out common
|
842 |
|
|
code into separate function shared by multiple code paths.
|
843 |
|
|
|
844 |
|
|
We might mix the valud into the fraction by taking into account
|
845 |
|
|
relative growth of the unit, but for now just add the number
|
846 |
|
|
into resulting fraction. */
|
847 |
|
|
if (badness > INT_MAX / 2)
|
848 |
|
|
{
|
849 |
|
|
badness = INT_MAX / 2;
|
850 |
|
|
if (dump)
|
851 |
|
|
fprintf (dump_file, "Badness overflow\n");
|
852 |
|
|
}
|
853 |
|
|
if (dump)
|
854 |
|
|
{
|
855 |
|
|
fprintf (dump_file,
|
856 |
|
|
" %i: guessed profile. frequency %f,"
|
857 |
|
|
" benefit %f%%, divisor %i\n",
|
858 |
|
|
(int) badness, (double)edge->frequency / CGRAPH_FREQ_BASE,
|
859 |
|
|
relative_time_benefit (callee_info, edge, time_growth) * 100 / 256.0, div);
|
860 |
|
|
}
|
861 |
|
|
}
|
862 |
|
|
/* When function local profile is not available or it does not give
|
863 |
|
|
useful information (ie frequency is zero), base the cost on
|
864 |
|
|
loop nest and overall size growth, so we optimize for overall number
|
865 |
|
|
of functions fully inlined in program. */
|
866 |
|
|
else
|
867 |
|
|
{
|
868 |
|
|
int nest = MIN (inline_edge_summary (edge)->loop_depth, 8);
|
869 |
|
|
badness = growth * 256;
|
870 |
|
|
|
871 |
|
|
/* Decrease badness if call is nested. */
|
872 |
|
|
if (badness > 0)
|
873 |
|
|
badness >>= nest;
|
874 |
|
|
else
|
875 |
|
|
{
|
876 |
|
|
badness <<= nest;
|
877 |
|
|
}
|
878 |
|
|
if (dump)
|
879 |
|
|
fprintf (dump_file, " %i: no profile. nest %i\n", (int) badness,
|
880 |
|
|
nest);
|
881 |
|
|
}
|
882 |
|
|
|
883 |
|
|
/* Ensure that we did not overflow in all the fixed point math above. */
|
884 |
|
|
gcc_assert (badness >= INT_MIN);
|
885 |
|
|
gcc_assert (badness <= INT_MAX - 1);
|
886 |
|
|
/* Make recursive inlining happen always after other inlining is done. */
|
887 |
|
|
if (cgraph_edge_recursive_p (edge))
|
888 |
|
|
return badness + 1;
|
889 |
|
|
else
|
890 |
|
|
return badness;
|
891 |
|
|
}
|
892 |
|
|
|
893 |
|
|
/* Recompute badness of EDGE and update its key in HEAP if needed. */
|
894 |
|
|
static inline void
|
895 |
|
|
update_edge_key (fibheap_t heap, struct cgraph_edge *edge)
|
896 |
|
|
{
|
897 |
|
|
int badness = edge_badness (edge, false);
|
898 |
|
|
if (edge->aux)
|
899 |
|
|
{
|
900 |
|
|
fibnode_t n = (fibnode_t) edge->aux;
|
901 |
|
|
gcc_checking_assert (n->data == edge);
|
902 |
|
|
|
903 |
|
|
/* fibheap_replace_key only decrease the keys.
|
904 |
|
|
When we increase the key we do not update heap
|
905 |
|
|
and instead re-insert the element once it becomes
|
906 |
|
|
a minimum of heap. */
|
907 |
|
|
if (badness < n->key)
|
908 |
|
|
{
|
909 |
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
910 |
|
|
{
|
911 |
|
|
fprintf (dump_file,
|
912 |
|
|
" decreasing badness %s/%i -> %s/%i, %i to %i\n",
|
913 |
|
|
cgraph_node_name (edge->caller), edge->caller->uid,
|
914 |
|
|
cgraph_node_name (edge->callee), edge->callee->uid,
|
915 |
|
|
(int)n->key,
|
916 |
|
|
badness);
|
917 |
|
|
}
|
918 |
|
|
fibheap_replace_key (heap, n, badness);
|
919 |
|
|
gcc_checking_assert (n->key == badness);
|
920 |
|
|
}
|
921 |
|
|
}
|
922 |
|
|
else
|
923 |
|
|
{
|
924 |
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
925 |
|
|
{
|
926 |
|
|
fprintf (dump_file,
|
927 |
|
|
" enqueuing call %s/%i -> %s/%i, badness %i\n",
|
928 |
|
|
cgraph_node_name (edge->caller), edge->caller->uid,
|
929 |
|
|
cgraph_node_name (edge->callee), edge->callee->uid,
|
930 |
|
|
badness);
|
931 |
|
|
}
|
932 |
|
|
edge->aux = fibheap_insert (heap, badness, edge);
|
933 |
|
|
}
|
934 |
|
|
}
|
935 |
|
|
|
936 |
|
|
|
937 |
|
|
/* NODE was inlined.
|
938 |
|
|
All caller edges needs to be resetted because
|
939 |
|
|
size estimates change. Similarly callees needs reset
|
940 |
|
|
because better context may be known. */
|
941 |
|
|
|
942 |
|
|
static void
|
943 |
|
|
reset_edge_caches (struct cgraph_node *node)
|
944 |
|
|
{
|
945 |
|
|
struct cgraph_edge *edge;
|
946 |
|
|
struct cgraph_edge *e = node->callees;
|
947 |
|
|
struct cgraph_node *where = node;
|
948 |
|
|
int i;
|
949 |
|
|
struct ipa_ref *ref;
|
950 |
|
|
|
951 |
|
|
if (where->global.inlined_to)
|
952 |
|
|
where = where->global.inlined_to;
|
953 |
|
|
|
954 |
|
|
/* WHERE body size has changed, the cached growth is invalid. */
|
955 |
|
|
reset_node_growth_cache (where);
|
956 |
|
|
|
957 |
|
|
for (edge = where->callers; edge; edge = edge->next_caller)
|
958 |
|
|
if (edge->inline_failed)
|
959 |
|
|
reset_edge_growth_cache (edge);
|
960 |
|
|
for (i = 0; ipa_ref_list_refering_iterate (&where->ref_list, i, ref); i++)
|
961 |
|
|
if (ref->use == IPA_REF_ALIAS)
|
962 |
|
|
reset_edge_caches (ipa_ref_refering_node (ref));
|
963 |
|
|
|
964 |
|
|
if (!e)
|
965 |
|
|
return;
|
966 |
|
|
|
967 |
|
|
while (true)
|
968 |
|
|
if (!e->inline_failed && e->callee->callees)
|
969 |
|
|
e = e->callee->callees;
|
970 |
|
|
else
|
971 |
|
|
{
|
972 |
|
|
if (e->inline_failed)
|
973 |
|
|
reset_edge_growth_cache (e);
|
974 |
|
|
if (e->next_callee)
|
975 |
|
|
e = e->next_callee;
|
976 |
|
|
else
|
977 |
|
|
{
|
978 |
|
|
do
|
979 |
|
|
{
|
980 |
|
|
if (e->caller == node)
|
981 |
|
|
return;
|
982 |
|
|
e = e->caller->callers;
|
983 |
|
|
}
|
984 |
|
|
while (!e->next_callee);
|
985 |
|
|
e = e->next_callee;
|
986 |
|
|
}
|
987 |
|
|
}
|
988 |
|
|
}
|
989 |
|
|
|
990 |
|
|
/* Recompute HEAP nodes for each of caller of NODE.
|
991 |
|
|
UPDATED_NODES track nodes we already visited, to avoid redundant work.
|
992 |
|
|
When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
|
993 |
|
|
it is inlinable. Otherwise check all edges. */
|
994 |
|
|
|
995 |
|
|
static void
|
996 |
|
|
update_caller_keys (fibheap_t heap, struct cgraph_node *node,
|
997 |
|
|
bitmap updated_nodes,
|
998 |
|
|
struct cgraph_edge *check_inlinablity_for)
|
999 |
|
|
{
|
1000 |
|
|
struct cgraph_edge *edge;
|
1001 |
|
|
int i;
|
1002 |
|
|
struct ipa_ref *ref;
|
1003 |
|
|
|
1004 |
|
|
if ((!node->alias && !inline_summary (node)->inlinable)
|
1005 |
|
|
|| cgraph_function_body_availability (node) <= AVAIL_OVERWRITABLE
|
1006 |
|
|
|| node->global.inlined_to)
|
1007 |
|
|
return;
|
1008 |
|
|
if (!bitmap_set_bit (updated_nodes, node->uid))
|
1009 |
|
|
return;
|
1010 |
|
|
|
1011 |
|
|
for (i = 0; ipa_ref_list_refering_iterate (&node->ref_list, i, ref); i++)
|
1012 |
|
|
if (ref->use == IPA_REF_ALIAS)
|
1013 |
|
|
{
|
1014 |
|
|
struct cgraph_node *alias = ipa_ref_refering_node (ref);
|
1015 |
|
|
update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
|
1016 |
|
|
}
|
1017 |
|
|
|
1018 |
|
|
for (edge = node->callers; edge; edge = edge->next_caller)
|
1019 |
|
|
if (edge->inline_failed)
|
1020 |
|
|
{
|
1021 |
|
|
if (!check_inlinablity_for
|
1022 |
|
|
|| check_inlinablity_for == edge)
|
1023 |
|
|
{
|
1024 |
|
|
if (can_inline_edge_p (edge, false)
|
1025 |
|
|
&& want_inline_small_function_p (edge, false))
|
1026 |
|
|
update_edge_key (heap, edge);
|
1027 |
|
|
else if (edge->aux)
|
1028 |
|
|
{
|
1029 |
|
|
report_inline_failed_reason (edge);
|
1030 |
|
|
fibheap_delete_node (heap, (fibnode_t) edge->aux);
|
1031 |
|
|
edge->aux = NULL;
|
1032 |
|
|
}
|
1033 |
|
|
}
|
1034 |
|
|
else if (edge->aux)
|
1035 |
|
|
update_edge_key (heap, edge);
|
1036 |
|
|
}
|
1037 |
|
|
}
|
1038 |
|
|
|
1039 |
|
|
/* Recompute HEAP nodes for each uninlined call in NODE.
|
1040 |
|
|
This is used when we know that edge badnesses are going only to increase
|
1041 |
|
|
(we introduced new call site) and thus all we need is to insert newly
|
1042 |
|
|
created edges into heap. */
|
1043 |
|
|
|
1044 |
|
|
static void
|
1045 |
|
|
update_callee_keys (fibheap_t heap, struct cgraph_node *node,
|
1046 |
|
|
bitmap updated_nodes)
|
1047 |
|
|
{
|
1048 |
|
|
struct cgraph_edge *e = node->callees;
|
1049 |
|
|
|
1050 |
|
|
if (!e)
|
1051 |
|
|
return;
|
1052 |
|
|
while (true)
|
1053 |
|
|
if (!e->inline_failed && e->callee->callees)
|
1054 |
|
|
e = e->callee->callees;
|
1055 |
|
|
else
|
1056 |
|
|
{
|
1057 |
|
|
enum availability avail;
|
1058 |
|
|
struct cgraph_node *callee;
|
1059 |
|
|
/* We do not reset callee growth cache here. Since we added a new call,
|
1060 |
|
|
growth chould have just increased and consequentely badness metric
|
1061 |
|
|
don't need updating. */
|
1062 |
|
|
if (e->inline_failed
|
1063 |
|
|
&& (callee = cgraph_function_or_thunk_node (e->callee, &avail))
|
1064 |
|
|
&& inline_summary (callee)->inlinable
|
1065 |
|
|
&& cgraph_function_body_availability (callee) >= AVAIL_AVAILABLE
|
1066 |
|
|
&& !bitmap_bit_p (updated_nodes, callee->uid))
|
1067 |
|
|
{
|
1068 |
|
|
if (can_inline_edge_p (e, false)
|
1069 |
|
|
&& want_inline_small_function_p (e, false))
|
1070 |
|
|
update_edge_key (heap, e);
|
1071 |
|
|
else if (e->aux)
|
1072 |
|
|
{
|
1073 |
|
|
report_inline_failed_reason (e);
|
1074 |
|
|
fibheap_delete_node (heap, (fibnode_t) e->aux);
|
1075 |
|
|
e->aux = NULL;
|
1076 |
|
|
}
|
1077 |
|
|
}
|
1078 |
|
|
if (e->next_callee)
|
1079 |
|
|
e = e->next_callee;
|
1080 |
|
|
else
|
1081 |
|
|
{
|
1082 |
|
|
do
|
1083 |
|
|
{
|
1084 |
|
|
if (e->caller == node)
|
1085 |
|
|
return;
|
1086 |
|
|
e = e->caller->callers;
|
1087 |
|
|
}
|
1088 |
|
|
while (!e->next_callee);
|
1089 |
|
|
e = e->next_callee;
|
1090 |
|
|
}
|
1091 |
|
|
}
|
1092 |
|
|
}
|
1093 |
|
|
|
1094 |
|
|
/* Recompute heap nodes for each of caller edges of each of callees.
|
1095 |
|
|
Walk recursively into all inline clones. */
|
1096 |
|
|
|
1097 |
|
|
static void
|
1098 |
|
|
update_all_callee_keys (fibheap_t heap, struct cgraph_node *node,
|
1099 |
|
|
bitmap updated_nodes)
|
1100 |
|
|
{
|
1101 |
|
|
struct cgraph_edge *e = node->callees;
|
1102 |
|
|
if (!e)
|
1103 |
|
|
return;
|
1104 |
|
|
while (true)
|
1105 |
|
|
if (!e->inline_failed && e->callee->callees)
|
1106 |
|
|
e = e->callee->callees;
|
1107 |
|
|
else
|
1108 |
|
|
{
|
1109 |
|
|
struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee,
|
1110 |
|
|
NULL);
|
1111 |
|
|
|
1112 |
|
|
/* We inlined and thus callees might have different number of calls.
|
1113 |
|
|
Reset their caches */
|
1114 |
|
|
reset_node_growth_cache (callee);
|
1115 |
|
|
if (e->inline_failed)
|
1116 |
|
|
update_caller_keys (heap, callee, updated_nodes, e);
|
1117 |
|
|
if (e->next_callee)
|
1118 |
|
|
e = e->next_callee;
|
1119 |
|
|
else
|
1120 |
|
|
{
|
1121 |
|
|
do
|
1122 |
|
|
{
|
1123 |
|
|
if (e->caller == node)
|
1124 |
|
|
return;
|
1125 |
|
|
e = e->caller->callers;
|
1126 |
|
|
}
|
1127 |
|
|
while (!e->next_callee);
|
1128 |
|
|
e = e->next_callee;
|
1129 |
|
|
}
|
1130 |
|
|
}
|
1131 |
|
|
}
|
1132 |
|
|
|
1133 |
|
|
/* Enqueue all recursive calls from NODE into priority queue depending on
|
1134 |
|
|
how likely we want to recursively inline the call. */
|
1135 |
|
|
|
1136 |
|
|
static void
|
1137 |
|
|
lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
|
1138 |
|
|
fibheap_t heap)
|
1139 |
|
|
{
|
1140 |
|
|
struct cgraph_edge *e;
|
1141 |
|
|
enum availability avail;
|
1142 |
|
|
|
1143 |
|
|
for (e = where->callees; e; e = e->next_callee)
|
1144 |
|
|
if (e->callee == node
|
1145 |
|
|
|| (cgraph_function_or_thunk_node (e->callee, &avail) == node
|
1146 |
|
|
&& avail > AVAIL_OVERWRITABLE))
|
1147 |
|
|
{
|
1148 |
|
|
/* When profile feedback is available, prioritize by expected number
|
1149 |
|
|
of calls. */
|
1150 |
|
|
fibheap_insert (heap,
|
1151 |
|
|
!max_count ? -e->frequency
|
1152 |
|
|
: -(e->count / ((max_count + (1<<24) - 1) / (1<<24))),
|
1153 |
|
|
e);
|
1154 |
|
|
}
|
1155 |
|
|
for (e = where->callees; e; e = e->next_callee)
|
1156 |
|
|
if (!e->inline_failed)
|
1157 |
|
|
lookup_recursive_calls (node, e->callee, heap);
|
1158 |
|
|
}
|
1159 |
|
|
|
1160 |
|
|
/* Decide on recursive inlining: in the case function has recursive calls,
|
1161 |
|
|
inline until body size reaches given argument. If any new indirect edges
|
1162 |
|
|
are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
|
1163 |
|
|
is NULL. */
|
1164 |
|
|
|
1165 |
|
|
static bool
|
1166 |
|
|
recursive_inlining (struct cgraph_edge *edge,
|
1167 |
|
|
VEC (cgraph_edge_p, heap) **new_edges)
|
1168 |
|
|
{
|
1169 |
|
|
int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
|
1170 |
|
|
fibheap_t heap;
|
1171 |
|
|
struct cgraph_node *node;
|
1172 |
|
|
struct cgraph_edge *e;
|
1173 |
|
|
struct cgraph_node *master_clone = NULL, *next;
|
1174 |
|
|
int depth = 0;
|
1175 |
|
|
int n = 0;
|
1176 |
|
|
|
1177 |
|
|
node = edge->caller;
|
1178 |
|
|
if (node->global.inlined_to)
|
1179 |
|
|
node = node->global.inlined_to;
|
1180 |
|
|
|
1181 |
|
|
if (DECL_DECLARED_INLINE_P (node->decl))
|
1182 |
|
|
limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
|
1183 |
|
|
|
1184 |
|
|
/* Make sure that function is small enough to be considered for inlining. */
|
1185 |
|
|
if (estimate_size_after_inlining (node, edge) >= limit)
|
1186 |
|
|
return false;
|
1187 |
|
|
heap = fibheap_new ();
|
1188 |
|
|
lookup_recursive_calls (node, node, heap);
|
1189 |
|
|
if (fibheap_empty (heap))
|
1190 |
|
|
{
|
1191 |
|
|
fibheap_delete (heap);
|
1192 |
|
|
return false;
|
1193 |
|
|
}
|
1194 |
|
|
|
1195 |
|
|
if (dump_file)
|
1196 |
|
|
fprintf (dump_file,
|
1197 |
|
|
" Performing recursive inlining on %s\n",
|
1198 |
|
|
cgraph_node_name (node));
|
1199 |
|
|
|
1200 |
|
|
/* Do the inlining and update list of recursive call during process. */
|
1201 |
|
|
while (!fibheap_empty (heap))
|
1202 |
|
|
{
|
1203 |
|
|
struct cgraph_edge *curr
|
1204 |
|
|
= (struct cgraph_edge *) fibheap_extract_min (heap);
|
1205 |
|
|
struct cgraph_node *cnode;
|
1206 |
|
|
|
1207 |
|
|
if (estimate_size_after_inlining (node, curr) > limit)
|
1208 |
|
|
break;
|
1209 |
|
|
|
1210 |
|
|
if (!can_inline_edge_p (curr, true))
|
1211 |
|
|
continue;
|
1212 |
|
|
|
1213 |
|
|
depth = 1;
|
1214 |
|
|
for (cnode = curr->caller;
|
1215 |
|
|
cnode->global.inlined_to; cnode = cnode->callers->caller)
|
1216 |
|
|
if (node->decl
|
1217 |
|
|
== cgraph_function_or_thunk_node (curr->callee, NULL)->decl)
|
1218 |
|
|
depth++;
|
1219 |
|
|
|
1220 |
|
|
if (!want_inline_self_recursive_call_p (curr, node, false, depth))
|
1221 |
|
|
continue;
|
1222 |
|
|
|
1223 |
|
|
if (dump_file)
|
1224 |
|
|
{
|
1225 |
|
|
fprintf (dump_file,
|
1226 |
|
|
" Inlining call of depth %i", depth);
|
1227 |
|
|
if (node->count)
|
1228 |
|
|
{
|
1229 |
|
|
fprintf (dump_file, " called approx. %.2f times per call",
|
1230 |
|
|
(double)curr->count / node->count);
|
1231 |
|
|
}
|
1232 |
|
|
fprintf (dump_file, "\n");
|
1233 |
|
|
}
|
1234 |
|
|
if (!master_clone)
|
1235 |
|
|
{
|
1236 |
|
|
/* We need original clone to copy around. */
|
1237 |
|
|
master_clone = cgraph_clone_node (node, node->decl,
|
1238 |
|
|
node->count, CGRAPH_FREQ_BASE,
|
1239 |
|
|
false, NULL, true);
|
1240 |
|
|
for (e = master_clone->callees; e; e = e->next_callee)
|
1241 |
|
|
if (!e->inline_failed)
|
1242 |
|
|
clone_inlined_nodes (e, true, false, NULL);
|
1243 |
|
|
}
|
1244 |
|
|
|
1245 |
|
|
cgraph_redirect_edge_callee (curr, master_clone);
|
1246 |
|
|
inline_call (curr, false, new_edges, &overall_size);
|
1247 |
|
|
lookup_recursive_calls (node, curr->callee, heap);
|
1248 |
|
|
n++;
|
1249 |
|
|
}
|
1250 |
|
|
|
1251 |
|
|
if (!fibheap_empty (heap) && dump_file)
|
1252 |
|
|
fprintf (dump_file, " Recursive inlining growth limit met.\n");
|
1253 |
|
|
fibheap_delete (heap);
|
1254 |
|
|
|
1255 |
|
|
if (!master_clone)
|
1256 |
|
|
return false;
|
1257 |
|
|
|
1258 |
|
|
if (dump_file)
|
1259 |
|
|
fprintf (dump_file,
|
1260 |
|
|
"\n Inlined %i times, "
|
1261 |
|
|
"body grown from size %i to %i, time %i to %i\n", n,
|
1262 |
|
|
inline_summary (master_clone)->size, inline_summary (node)->size,
|
1263 |
|
|
inline_summary (master_clone)->time, inline_summary (node)->time);
|
1264 |
|
|
|
1265 |
|
|
/* Remove master clone we used for inlining. We rely that clones inlined
|
1266 |
|
|
into master clone gets queued just before master clone so we don't
|
1267 |
|
|
need recursion. */
|
1268 |
|
|
for (node = cgraph_nodes; node != master_clone;
|
1269 |
|
|
node = next)
|
1270 |
|
|
{
|
1271 |
|
|
next = node->next;
|
1272 |
|
|
if (node->global.inlined_to == master_clone)
|
1273 |
|
|
cgraph_remove_node (node);
|
1274 |
|
|
}
|
1275 |
|
|
cgraph_remove_node (master_clone);
|
1276 |
|
|
return true;
|
1277 |
|
|
}
|
1278 |
|
|
|
1279 |
|
|
|
1280 |
|
|
/* Given whole compilation unit estimate of INSNS, compute how large we can
|
1281 |
|
|
allow the unit to grow. */
|
1282 |
|
|
|
1283 |
|
|
static int
|
1284 |
|
|
compute_max_insns (int insns)
|
1285 |
|
|
{
|
1286 |
|
|
int max_insns = insns;
|
1287 |
|
|
if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
|
1288 |
|
|
max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
|
1289 |
|
|
|
1290 |
|
|
return ((HOST_WIDEST_INT) max_insns
|
1291 |
|
|
* (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
|
1292 |
|
|
}
|
1293 |
|
|
|
1294 |
|
|
|
1295 |
|
|
/* Compute badness of all edges in NEW_EDGES and add them to the HEAP. */
|
1296 |
|
|
|
1297 |
|
|
static void
|
1298 |
|
|
add_new_edges_to_heap (fibheap_t heap, VEC (cgraph_edge_p, heap) *new_edges)
|
1299 |
|
|
{
|
1300 |
|
|
while (VEC_length (cgraph_edge_p, new_edges) > 0)
|
1301 |
|
|
{
|
1302 |
|
|
struct cgraph_edge *edge = VEC_pop (cgraph_edge_p, new_edges);
|
1303 |
|
|
|
1304 |
|
|
gcc_assert (!edge->aux);
|
1305 |
|
|
if (edge->inline_failed
|
1306 |
|
|
&& can_inline_edge_p (edge, true)
|
1307 |
|
|
&& want_inline_small_function_p (edge, true))
|
1308 |
|
|
edge->aux = fibheap_insert (heap, edge_badness (edge, false), edge);
|
1309 |
|
|
}
|
1310 |
|
|
}
|
1311 |
|
|
|
1312 |
|
|
|
1313 |
|
|
/* We use greedy algorithm for inlining of small functions:
|
1314 |
|
|
All inline candidates are put into prioritized heap ordered in
|
1315 |
|
|
increasing badness.
|
1316 |
|
|
|
1317 |
|
|
The inlining of small functions is bounded by unit growth parameters. */
|
1318 |
|
|
|
1319 |
|
|
static void
|
1320 |
|
|
inline_small_functions (void)
|
1321 |
|
|
{
|
1322 |
|
|
struct cgraph_node *node;
|
1323 |
|
|
struct cgraph_edge *edge;
|
1324 |
|
|
fibheap_t heap = fibheap_new ();
|
1325 |
|
|
bitmap updated_nodes = BITMAP_ALLOC (NULL);
|
1326 |
|
|
int min_size, max_size;
|
1327 |
|
|
VEC (cgraph_edge_p, heap) *new_indirect_edges = NULL;
|
1328 |
|
|
int initial_size = 0;
|
1329 |
|
|
|
1330 |
|
|
if (flag_indirect_inlining)
|
1331 |
|
|
new_indirect_edges = VEC_alloc (cgraph_edge_p, heap, 8);
|
1332 |
|
|
|
1333 |
|
|
if (dump_file)
|
1334 |
|
|
fprintf (dump_file,
|
1335 |
|
|
"\nDeciding on inlining of small functions. Starting with size %i.\n",
|
1336 |
|
|
initial_size);
|
1337 |
|
|
|
1338 |
|
|
/* Compute overall unit size and other global parameters used by badness
|
1339 |
|
|
metrics. */
|
1340 |
|
|
|
1341 |
|
|
max_count = 0;
|
1342 |
|
|
initialize_growth_caches ();
|
1343 |
|
|
|
1344 |
|
|
FOR_EACH_DEFINED_FUNCTION (node)
|
1345 |
|
|
if (!node->global.inlined_to)
|
1346 |
|
|
{
|
1347 |
|
|
if (cgraph_function_with_gimple_body_p (node)
|
1348 |
|
|
|| node->thunk.thunk_p)
|
1349 |
|
|
{
|
1350 |
|
|
struct inline_summary *info = inline_summary (node);
|
1351 |
|
|
|
1352 |
|
|
if (!DECL_EXTERNAL (node->decl))
|
1353 |
|
|
initial_size += info->size;
|
1354 |
|
|
}
|
1355 |
|
|
|
1356 |
|
|
for (edge = node->callers; edge; edge = edge->next_caller)
|
1357 |
|
|
if (max_count < edge->count)
|
1358 |
|
|
max_count = edge->count;
|
1359 |
|
|
}
|
1360 |
|
|
|
1361 |
|
|
overall_size = initial_size;
|
1362 |
|
|
max_size = compute_max_insns (overall_size);
|
1363 |
|
|
min_size = overall_size;
|
1364 |
|
|
|
1365 |
|
|
/* Populate the heeap with all edges we might inline. */
|
1366 |
|
|
|
1367 |
|
|
FOR_EACH_DEFINED_FUNCTION (node)
|
1368 |
|
|
if (!node->global.inlined_to)
|
1369 |
|
|
{
|
1370 |
|
|
if (dump_file)
|
1371 |
|
|
fprintf (dump_file, "Enqueueing calls of %s/%i.\n",
|
1372 |
|
|
cgraph_node_name (node), node->uid);
|
1373 |
|
|
|
1374 |
|
|
for (edge = node->callers; edge; edge = edge->next_caller)
|
1375 |
|
|
if (edge->inline_failed
|
1376 |
|
|
&& can_inline_edge_p (edge, true)
|
1377 |
|
|
&& want_inline_small_function_p (edge, true)
|
1378 |
|
|
&& edge->inline_failed)
|
1379 |
|
|
{
|
1380 |
|
|
gcc_assert (!edge->aux);
|
1381 |
|
|
update_edge_key (heap, edge);
|
1382 |
|
|
}
|
1383 |
|
|
}
|
1384 |
|
|
|
1385 |
|
|
gcc_assert (in_lto_p
|
1386 |
|
|
|| !max_count
|
1387 |
|
|
|| (profile_info && flag_branch_probabilities));
|
1388 |
|
|
|
1389 |
|
|
while (!fibheap_empty (heap))
|
1390 |
|
|
{
|
1391 |
|
|
int old_size = overall_size;
|
1392 |
|
|
struct cgraph_node *where, *callee;
|
1393 |
|
|
int badness = fibheap_min_key (heap);
|
1394 |
|
|
int current_badness;
|
1395 |
|
|
int cached_badness;
|
1396 |
|
|
int growth;
|
1397 |
|
|
|
1398 |
|
|
edge = (struct cgraph_edge *) fibheap_extract_min (heap);
|
1399 |
|
|
gcc_assert (edge->aux);
|
1400 |
|
|
edge->aux = NULL;
|
1401 |
|
|
if (!edge->inline_failed)
|
1402 |
|
|
continue;
|
1403 |
|
|
|
1404 |
|
|
/* Be sure that caches are maintained consistent.
|
1405 |
|
|
We can not make this ENABLE_CHECKING only because it cause differnt
|
1406 |
|
|
updates of the fibheap queue. */
|
1407 |
|
|
cached_badness = edge_badness (edge, false);
|
1408 |
|
|
reset_edge_growth_cache (edge);
|
1409 |
|
|
reset_node_growth_cache (edge->callee);
|
1410 |
|
|
|
1411 |
|
|
/* When updating the edge costs, we only decrease badness in the keys.
|
1412 |
|
|
Increases of badness are handled lazilly; when we see key with out
|
1413 |
|
|
of date value on it, we re-insert it now. */
|
1414 |
|
|
current_badness = edge_badness (edge, false);
|
1415 |
|
|
gcc_assert (cached_badness == current_badness);
|
1416 |
|
|
gcc_assert (current_badness >= badness);
|
1417 |
|
|
if (current_badness != badness)
|
1418 |
|
|
{
|
1419 |
|
|
edge->aux = fibheap_insert (heap, current_badness, edge);
|
1420 |
|
|
continue;
|
1421 |
|
|
}
|
1422 |
|
|
|
1423 |
|
|
if (!can_inline_edge_p (edge, true))
|
1424 |
|
|
continue;
|
1425 |
|
|
|
1426 |
|
|
callee = cgraph_function_or_thunk_node (edge->callee, NULL);
|
1427 |
|
|
growth = estimate_edge_growth (edge);
|
1428 |
|
|
if (dump_file)
|
1429 |
|
|
{
|
1430 |
|
|
fprintf (dump_file,
|
1431 |
|
|
"\nConsidering %s with %i size\n",
|
1432 |
|
|
cgraph_node_name (callee),
|
1433 |
|
|
inline_summary (callee)->size);
|
1434 |
|
|
fprintf (dump_file,
|
1435 |
|
|
" to be inlined into %s in %s:%i\n"
|
1436 |
|
|
" Estimated growth after inlined into all is %+i insns.\n"
|
1437 |
|
|
" Estimated badness is %i, frequency %.2f.\n",
|
1438 |
|
|
cgraph_node_name (edge->caller),
|
1439 |
|
|
flag_wpa ? "unknown"
|
1440 |
|
|
: gimple_filename ((const_gimple) edge->call_stmt),
|
1441 |
|
|
flag_wpa ? -1
|
1442 |
|
|
: gimple_lineno ((const_gimple) edge->call_stmt),
|
1443 |
|
|
estimate_growth (callee),
|
1444 |
|
|
badness,
|
1445 |
|
|
edge->frequency / (double)CGRAPH_FREQ_BASE);
|
1446 |
|
|
if (edge->count)
|
1447 |
|
|
fprintf (dump_file," Called "HOST_WIDEST_INT_PRINT_DEC"x\n",
|
1448 |
|
|
edge->count);
|
1449 |
|
|
if (dump_flags & TDF_DETAILS)
|
1450 |
|
|
edge_badness (edge, true);
|
1451 |
|
|
}
|
1452 |
|
|
|
1453 |
|
|
if (overall_size + growth > max_size
|
1454 |
|
|
&& !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
|
1455 |
|
|
{
|
1456 |
|
|
edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
|
1457 |
|
|
report_inline_failed_reason (edge);
|
1458 |
|
|
continue;
|
1459 |
|
|
}
|
1460 |
|
|
|
1461 |
|
|
if (!want_inline_small_function_p (edge, true))
|
1462 |
|
|
continue;
|
1463 |
|
|
|
1464 |
|
|
/* Heuristics for inlining small functions works poorly for
|
1465 |
|
|
recursive calls where we do efect similar to loop unrolling.
|
1466 |
|
|
When inliing such edge seems profitable, leave decision on
|
1467 |
|
|
specific inliner. */
|
1468 |
|
|
if (cgraph_edge_recursive_p (edge))
|
1469 |
|
|
{
|
1470 |
|
|
where = edge->caller;
|
1471 |
|
|
if (where->global.inlined_to)
|
1472 |
|
|
where = where->global.inlined_to;
|
1473 |
|
|
if (!recursive_inlining (edge,
|
1474 |
|
|
flag_indirect_inlining
|
1475 |
|
|
? &new_indirect_edges : NULL))
|
1476 |
|
|
{
|
1477 |
|
|
edge->inline_failed = CIF_RECURSIVE_INLINING;
|
1478 |
|
|
continue;
|
1479 |
|
|
}
|
1480 |
|
|
reset_edge_caches (where);
|
1481 |
|
|
/* Recursive inliner inlines all recursive calls of the function
|
1482 |
|
|
at once. Consequently we need to update all callee keys. */
|
1483 |
|
|
if (flag_indirect_inlining)
|
1484 |
|
|
add_new_edges_to_heap (heap, new_indirect_edges);
|
1485 |
|
|
update_all_callee_keys (heap, where, updated_nodes);
|
1486 |
|
|
}
|
1487 |
|
|
else
|
1488 |
|
|
{
|
1489 |
|
|
struct cgraph_node *outer_node = NULL;
|
1490 |
|
|
int depth = 0;
|
1491 |
|
|
|
1492 |
|
|
/* Consider the case where self recursive function A is inlined into B.
|
1493 |
|
|
This is desired optimization in some cases, since it leads to effect
|
1494 |
|
|
similar of loop peeling and we might completely optimize out the
|
1495 |
|
|
recursive call. However we must be extra selective. */
|
1496 |
|
|
|
1497 |
|
|
where = edge->caller;
|
1498 |
|
|
while (where->global.inlined_to)
|
1499 |
|
|
{
|
1500 |
|
|
if (where->decl == callee->decl)
|
1501 |
|
|
outer_node = where, depth++;
|
1502 |
|
|
where = where->callers->caller;
|
1503 |
|
|
}
|
1504 |
|
|
if (outer_node
|
1505 |
|
|
&& !want_inline_self_recursive_call_p (edge, outer_node,
|
1506 |
|
|
true, depth))
|
1507 |
|
|
{
|
1508 |
|
|
edge->inline_failed
|
1509 |
|
|
= (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
|
1510 |
|
|
? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
|
1511 |
|
|
continue;
|
1512 |
|
|
}
|
1513 |
|
|
else if (depth && dump_file)
|
1514 |
|
|
fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
|
1515 |
|
|
|
1516 |
|
|
gcc_checking_assert (!callee->global.inlined_to);
|
1517 |
|
|
inline_call (edge, true, &new_indirect_edges, &overall_size);
|
1518 |
|
|
if (flag_indirect_inlining)
|
1519 |
|
|
add_new_edges_to_heap (heap, new_indirect_edges);
|
1520 |
|
|
|
1521 |
|
|
reset_edge_caches (edge->callee);
|
1522 |
|
|
reset_node_growth_cache (callee);
|
1523 |
|
|
|
1524 |
|
|
/* We inlined last offline copy to the body. This might lead
|
1525 |
|
|
to callees of function having fewer call sites and thus they
|
1526 |
|
|
may need updating.
|
1527 |
|
|
|
1528 |
|
|
FIXME: the callee size could also shrink because more information
|
1529 |
|
|
is propagated from caller. We don't track when this happen and
|
1530 |
|
|
thus we need to recompute everything all the time. Once this is
|
1531 |
|
|
solved, "|| 1" should go away. */
|
1532 |
|
|
if (callee->global.inlined_to || 1)
|
1533 |
|
|
update_all_callee_keys (heap, callee, updated_nodes);
|
1534 |
|
|
else
|
1535 |
|
|
update_callee_keys (heap, edge->callee, updated_nodes);
|
1536 |
|
|
}
|
1537 |
|
|
where = edge->caller;
|
1538 |
|
|
if (where->global.inlined_to)
|
1539 |
|
|
where = where->global.inlined_to;
|
1540 |
|
|
|
1541 |
|
|
/* Our profitability metric can depend on local properties
|
1542 |
|
|
such as number of inlinable calls and size of the function body.
|
1543 |
|
|
After inlining these properties might change for the function we
|
1544 |
|
|
inlined into (since it's body size changed) and for the functions
|
1545 |
|
|
called by function we inlined (since number of it inlinable callers
|
1546 |
|
|
might change). */
|
1547 |
|
|
update_caller_keys (heap, where, updated_nodes, NULL);
|
1548 |
|
|
|
1549 |
|
|
/* We removed one call of the function we just inlined. If offline
|
1550 |
|
|
copy is still needed, be sure to update the keys. */
|
1551 |
|
|
if (callee != where && !callee->global.inlined_to)
|
1552 |
|
|
update_caller_keys (heap, callee, updated_nodes, NULL);
|
1553 |
|
|
bitmap_clear (updated_nodes);
|
1554 |
|
|
|
1555 |
|
|
if (dump_file)
|
1556 |
|
|
{
|
1557 |
|
|
fprintf (dump_file,
|
1558 |
|
|
" Inlined into %s which now has time %i and size %i,"
|
1559 |
|
|
"net change of %+i.\n",
|
1560 |
|
|
cgraph_node_name (edge->caller),
|
1561 |
|
|
inline_summary (edge->caller)->time,
|
1562 |
|
|
inline_summary (edge->caller)->size,
|
1563 |
|
|
overall_size - old_size);
|
1564 |
|
|
}
|
1565 |
|
|
if (min_size > overall_size)
|
1566 |
|
|
{
|
1567 |
|
|
min_size = overall_size;
|
1568 |
|
|
max_size = compute_max_insns (min_size);
|
1569 |
|
|
|
1570 |
|
|
if (dump_file)
|
1571 |
|
|
fprintf (dump_file, "New minimal size reached: %i\n", min_size);
|
1572 |
|
|
}
|
1573 |
|
|
}
|
1574 |
|
|
|
1575 |
|
|
free_growth_caches ();
|
1576 |
|
|
if (new_indirect_edges)
|
1577 |
|
|
VEC_free (cgraph_edge_p, heap, new_indirect_edges);
|
1578 |
|
|
fibheap_delete (heap);
|
1579 |
|
|
if (dump_file)
|
1580 |
|
|
fprintf (dump_file,
|
1581 |
|
|
"Unit growth for small function inlining: %i->%i (%i%%)\n",
|
1582 |
|
|
initial_size, overall_size,
|
1583 |
|
|
initial_size ? overall_size * 100 / (initial_size) - 100: 0);
|
1584 |
|
|
BITMAP_FREE (updated_nodes);
|
1585 |
|
|
}
|
1586 |
|
|
|
1587 |
|
|
/* Flatten NODE. Performed both during early inlining and
|
1588 |
|
|
at IPA inlining time. */
|
1589 |
|
|
|
1590 |
|
|
static void
|
1591 |
|
|
flatten_function (struct cgraph_node *node, bool early)
|
1592 |
|
|
{
|
1593 |
|
|
struct cgraph_edge *e;
|
1594 |
|
|
|
1595 |
|
|
/* We shouldn't be called recursively when we are being processed. */
|
1596 |
|
|
gcc_assert (node->aux == NULL);
|
1597 |
|
|
|
1598 |
|
|
node->aux = (void *) node;
|
1599 |
|
|
|
1600 |
|
|
for (e = node->callees; e; e = e->next_callee)
|
1601 |
|
|
{
|
1602 |
|
|
struct cgraph_node *orig_callee;
|
1603 |
|
|
struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
|
1604 |
|
|
|
1605 |
|
|
/* We've hit cycle? It is time to give up. */
|
1606 |
|
|
if (callee->aux)
|
1607 |
|
|
{
|
1608 |
|
|
if (dump_file)
|
1609 |
|
|
fprintf (dump_file,
|
1610 |
|
|
"Not inlining %s into %s to avoid cycle.\n",
|
1611 |
|
|
cgraph_node_name (callee),
|
1612 |
|
|
cgraph_node_name (e->caller));
|
1613 |
|
|
e->inline_failed = CIF_RECURSIVE_INLINING;
|
1614 |
|
|
continue;
|
1615 |
|
|
}
|
1616 |
|
|
|
1617 |
|
|
/* When the edge is already inlined, we just need to recurse into
|
1618 |
|
|
it in order to fully flatten the leaves. */
|
1619 |
|
|
if (!e->inline_failed)
|
1620 |
|
|
{
|
1621 |
|
|
flatten_function (callee, early);
|
1622 |
|
|
continue;
|
1623 |
|
|
}
|
1624 |
|
|
|
1625 |
|
|
/* Flatten attribute needs to be processed during late inlining. For
|
1626 |
|
|
extra code quality we however do flattening during early optimization,
|
1627 |
|
|
too. */
|
1628 |
|
|
if (!early
|
1629 |
|
|
? !can_inline_edge_p (e, true)
|
1630 |
|
|
: !can_early_inline_edge_p (e))
|
1631 |
|
|
continue;
|
1632 |
|
|
|
1633 |
|
|
if (cgraph_edge_recursive_p (e))
|
1634 |
|
|
{
|
1635 |
|
|
if (dump_file)
|
1636 |
|
|
fprintf (dump_file, "Not inlining: recursive call.\n");
|
1637 |
|
|
continue;
|
1638 |
|
|
}
|
1639 |
|
|
|
1640 |
|
|
if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
|
1641 |
|
|
!= gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
|
1642 |
|
|
{
|
1643 |
|
|
if (dump_file)
|
1644 |
|
|
fprintf (dump_file, "Not inlining: SSA form does not match.\n");
|
1645 |
|
|
continue;
|
1646 |
|
|
}
|
1647 |
|
|
|
1648 |
|
|
/* Inline the edge and flatten the inline clone. Avoid
|
1649 |
|
|
recursing through the original node if the node was cloned. */
|
1650 |
|
|
if (dump_file)
|
1651 |
|
|
fprintf (dump_file, " Inlining %s into %s.\n",
|
1652 |
|
|
cgraph_node_name (callee),
|
1653 |
|
|
cgraph_node_name (e->caller));
|
1654 |
|
|
orig_callee = callee;
|
1655 |
|
|
inline_call (e, true, NULL, NULL);
|
1656 |
|
|
if (e->callee != orig_callee)
|
1657 |
|
|
orig_callee->aux = (void *) node;
|
1658 |
|
|
flatten_function (e->callee, early);
|
1659 |
|
|
if (e->callee != orig_callee)
|
1660 |
|
|
orig_callee->aux = NULL;
|
1661 |
|
|
}
|
1662 |
|
|
|
1663 |
|
|
node->aux = NULL;
|
1664 |
|
|
}
|
1665 |
|
|
|
1666 |
|
|
/* Decide on the inlining. We do so in the topological order to avoid
|
1667 |
|
|
expenses on updating data structures. */
|
1668 |
|
|
|
1669 |
|
|
static unsigned int
|
1670 |
|
|
ipa_inline (void)
|
1671 |
|
|
{
|
1672 |
|
|
struct cgraph_node *node;
|
1673 |
|
|
int nnodes;
|
1674 |
|
|
struct cgraph_node **order =
|
1675 |
|
|
XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
|
1676 |
|
|
int i;
|
1677 |
|
|
|
1678 |
|
|
if (in_lto_p && optimize)
|
1679 |
|
|
ipa_update_after_lto_read ();
|
1680 |
|
|
|
1681 |
|
|
if (dump_file)
|
1682 |
|
|
dump_inline_summaries (dump_file);
|
1683 |
|
|
|
1684 |
|
|
nnodes = ipa_reverse_postorder (order);
|
1685 |
|
|
|
1686 |
|
|
for (node = cgraph_nodes; node; node = node->next)
|
1687 |
|
|
node->aux = 0;
|
1688 |
|
|
|
1689 |
|
|
if (dump_file)
|
1690 |
|
|
fprintf (dump_file, "\nFlattening functions:\n");
|
1691 |
|
|
|
1692 |
|
|
/* In the first pass handle functions to be flattened. Do this with
|
1693 |
|
|
a priority so none of our later choices will make this impossible. */
|
1694 |
|
|
for (i = nnodes - 1; i >= 0; i--)
|
1695 |
|
|
{
|
1696 |
|
|
node = order[i];
|
1697 |
|
|
|
1698 |
|
|
/* Handle nodes to be flattened.
|
1699 |
|
|
Ideally when processing callees we stop inlining at the
|
1700 |
|
|
entry of cycles, possibly cloning that entry point and
|
1701 |
|
|
try to flatten itself turning it into a self-recursive
|
1702 |
|
|
function. */
|
1703 |
|
|
if (lookup_attribute ("flatten",
|
1704 |
|
|
DECL_ATTRIBUTES (node->decl)) != NULL)
|
1705 |
|
|
{
|
1706 |
|
|
if (dump_file)
|
1707 |
|
|
fprintf (dump_file,
|
1708 |
|
|
"Flattening %s\n", cgraph_node_name (node));
|
1709 |
|
|
flatten_function (node, false);
|
1710 |
|
|
}
|
1711 |
|
|
}
|
1712 |
|
|
|
1713 |
|
|
inline_small_functions ();
|
1714 |
|
|
cgraph_remove_unreachable_nodes (true, dump_file);
|
1715 |
|
|
free (order);
|
1716 |
|
|
|
1717 |
|
|
/* We already perform some inlining of functions called once during
|
1718 |
|
|
inlining small functions above. After unreachable nodes are removed,
|
1719 |
|
|
we still might do a quick check that nothing new is found. */
|
1720 |
|
|
if (flag_inline_functions_called_once)
|
1721 |
|
|
{
|
1722 |
|
|
int cold;
|
1723 |
|
|
if (dump_file)
|
1724 |
|
|
fprintf (dump_file, "\nDeciding on functions called once:\n");
|
1725 |
|
|
|
1726 |
|
|
/* Inlining one function called once has good chance of preventing
|
1727 |
|
|
inlining other function into the same callee. Ideally we should
|
1728 |
|
|
work in priority order, but probably inlining hot functions first
|
1729 |
|
|
is good cut without the extra pain of maintaining the queue.
|
1730 |
|
|
|
1731 |
|
|
??? this is not really fitting the bill perfectly: inlining function
|
1732 |
|
|
into callee often leads to better optimization of callee due to
|
1733 |
|
|
increased context for optimization.
|
1734 |
|
|
For example if main() function calls a function that outputs help
|
1735 |
|
|
and then function that does the main optmization, we should inline
|
1736 |
|
|
the second with priority even if both calls are cold by themselves.
|
1737 |
|
|
|
1738 |
|
|
We probably want to implement new predicate replacing our use of
|
1739 |
|
|
maybe_hot_edge interpreted as maybe_hot_edge || callee is known
|
1740 |
|
|
to be hot. */
|
1741 |
|
|
for (cold = 0; cold <= 1; cold ++)
|
1742 |
|
|
{
|
1743 |
|
|
for (node = cgraph_nodes; node; node = node->next)
|
1744 |
|
|
{
|
1745 |
|
|
if (want_inline_function_called_once_p (node)
|
1746 |
|
|
&& (cold
|
1747 |
|
|
|| cgraph_maybe_hot_edge_p (node->callers)))
|
1748 |
|
|
{
|
1749 |
|
|
struct cgraph_node *caller = node->callers->caller;
|
1750 |
|
|
|
1751 |
|
|
if (dump_file)
|
1752 |
|
|
{
|
1753 |
|
|
fprintf (dump_file,
|
1754 |
|
|
"\nInlining %s size %i.\n",
|
1755 |
|
|
cgraph_node_name (node), inline_summary (node)->size);
|
1756 |
|
|
fprintf (dump_file,
|
1757 |
|
|
" Called once from %s %i insns.\n",
|
1758 |
|
|
cgraph_node_name (node->callers->caller),
|
1759 |
|
|
inline_summary (node->callers->caller)->size);
|
1760 |
|
|
}
|
1761 |
|
|
|
1762 |
|
|
inline_call (node->callers, true, NULL, NULL);
|
1763 |
|
|
if (dump_file)
|
1764 |
|
|
fprintf (dump_file,
|
1765 |
|
|
" Inlined into %s which now has %i size\n",
|
1766 |
|
|
cgraph_node_name (caller),
|
1767 |
|
|
inline_summary (caller)->size);
|
1768 |
|
|
}
|
1769 |
|
|
}
|
1770 |
|
|
}
|
1771 |
|
|
}
|
1772 |
|
|
|
1773 |
|
|
/* Free ipa-prop structures if they are no longer needed. */
|
1774 |
|
|
if (optimize)
|
1775 |
|
|
ipa_free_all_structures_after_iinln ();
|
1776 |
|
|
|
1777 |
|
|
if (dump_file)
|
1778 |
|
|
fprintf (dump_file,
|
1779 |
|
|
"\nInlined %i calls, eliminated %i functions\n\n",
|
1780 |
|
|
ncalls_inlined, nfunctions_inlined);
|
1781 |
|
|
|
1782 |
|
|
if (dump_file)
|
1783 |
|
|
dump_inline_summaries (dump_file);
|
1784 |
|
|
/* In WPA we use inline summaries for partitioning process. */
|
1785 |
|
|
if (!flag_wpa)
|
1786 |
|
|
inline_free_summary ();
|
1787 |
|
|
return 0;
|
1788 |
|
|
}
|
1789 |
|
|
|
1790 |
|
|
/* Inline always-inline function calls in NODE. */
|
1791 |
|
|
|
1792 |
|
|
static bool
|
1793 |
|
|
inline_always_inline_functions (struct cgraph_node *node)
|
1794 |
|
|
{
|
1795 |
|
|
struct cgraph_edge *e;
|
1796 |
|
|
bool inlined = false;
|
1797 |
|
|
|
1798 |
|
|
for (e = node->callees; e; e = e->next_callee)
|
1799 |
|
|
{
|
1800 |
|
|
struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
|
1801 |
|
|
if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
|
1802 |
|
|
continue;
|
1803 |
|
|
|
1804 |
|
|
if (cgraph_edge_recursive_p (e))
|
1805 |
|
|
{
|
1806 |
|
|
if (dump_file)
|
1807 |
|
|
fprintf (dump_file, " Not inlining recursive call to %s.\n",
|
1808 |
|
|
cgraph_node_name (e->callee));
|
1809 |
|
|
e->inline_failed = CIF_RECURSIVE_INLINING;
|
1810 |
|
|
continue;
|
1811 |
|
|
}
|
1812 |
|
|
|
1813 |
|
|
if (!can_early_inline_edge_p (e))
|
1814 |
|
|
continue;
|
1815 |
|
|
|
1816 |
|
|
if (dump_file)
|
1817 |
|
|
fprintf (dump_file, " Inlining %s into %s (always_inline).\n",
|
1818 |
|
|
cgraph_node_name (e->callee),
|
1819 |
|
|
cgraph_node_name (e->caller));
|
1820 |
|
|
inline_call (e, true, NULL, NULL);
|
1821 |
|
|
inlined = true;
|
1822 |
|
|
}
|
1823 |
|
|
|
1824 |
|
|
return inlined;
|
1825 |
|
|
}
|
1826 |
|
|
|
1827 |
|
|
/* Decide on the inlining. We do so in the topological order to avoid
|
1828 |
|
|
expenses on updating data structures. */
|
1829 |
|
|
|
1830 |
|
|
static bool
|
1831 |
|
|
early_inline_small_functions (struct cgraph_node *node)
|
1832 |
|
|
{
|
1833 |
|
|
struct cgraph_edge *e;
|
1834 |
|
|
bool inlined = false;
|
1835 |
|
|
|
1836 |
|
|
for (e = node->callees; e; e = e->next_callee)
|
1837 |
|
|
{
|
1838 |
|
|
struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
|
1839 |
|
|
if (!inline_summary (callee)->inlinable
|
1840 |
|
|
|| !e->inline_failed)
|
1841 |
|
|
continue;
|
1842 |
|
|
|
1843 |
|
|
/* Do not consider functions not declared inline. */
|
1844 |
|
|
if (!DECL_DECLARED_INLINE_P (callee->decl)
|
1845 |
|
|
&& !flag_inline_small_functions
|
1846 |
|
|
&& !flag_inline_functions)
|
1847 |
|
|
continue;
|
1848 |
|
|
|
1849 |
|
|
if (dump_file)
|
1850 |
|
|
fprintf (dump_file, "Considering inline candidate %s.\n",
|
1851 |
|
|
cgraph_node_name (callee));
|
1852 |
|
|
|
1853 |
|
|
if (!can_early_inline_edge_p (e))
|
1854 |
|
|
continue;
|
1855 |
|
|
|
1856 |
|
|
if (cgraph_edge_recursive_p (e))
|
1857 |
|
|
{
|
1858 |
|
|
if (dump_file)
|
1859 |
|
|
fprintf (dump_file, " Not inlining: recursive call.\n");
|
1860 |
|
|
continue;
|
1861 |
|
|
}
|
1862 |
|
|
|
1863 |
|
|
if (!want_early_inline_function_p (e))
|
1864 |
|
|
continue;
|
1865 |
|
|
|
1866 |
|
|
if (dump_file)
|
1867 |
|
|
fprintf (dump_file, " Inlining %s into %s.\n",
|
1868 |
|
|
cgraph_node_name (callee),
|
1869 |
|
|
cgraph_node_name (e->caller));
|
1870 |
|
|
inline_call (e, true, NULL, NULL);
|
1871 |
|
|
inlined = true;
|
1872 |
|
|
}
|
1873 |
|
|
|
1874 |
|
|
return inlined;
|
1875 |
|
|
}
|
1876 |
|
|
|
1877 |
|
|
/* Do inlining of small functions. Doing so early helps profiling and other
|
1878 |
|
|
passes to be somewhat more effective and avoids some code duplication in
|
1879 |
|
|
later real inlining pass for testcases with very many function calls. */
|
1880 |
|
|
static unsigned int
|
1881 |
|
|
early_inliner (void)
|
1882 |
|
|
{
|
1883 |
|
|
struct cgraph_node *node = cgraph_get_node (current_function_decl);
|
1884 |
|
|
struct cgraph_edge *edge;
|
1885 |
|
|
unsigned int todo = 0;
|
1886 |
|
|
int iterations = 0;
|
1887 |
|
|
bool inlined = false;
|
1888 |
|
|
|
1889 |
|
|
if (seen_error ())
|
1890 |
|
|
return 0;
|
1891 |
|
|
|
1892 |
|
|
/* Do nothing if datastructures for ipa-inliner are already computed. This
|
1893 |
|
|
happens when some pass decides to construct new function and
|
1894 |
|
|
cgraph_add_new_function calls lowering passes and early optimization on
|
1895 |
|
|
it. This may confuse ourself when early inliner decide to inline call to
|
1896 |
|
|
function clone, because function clones don't have parameter list in
|
1897 |
|
|
ipa-prop matching their signature. */
|
1898 |
|
|
if (ipa_node_params_vector)
|
1899 |
|
|
return 0;
|
1900 |
|
|
|
1901 |
|
|
#ifdef ENABLE_CHECKING
|
1902 |
|
|
verify_cgraph_node (node);
|
1903 |
|
|
#endif
|
1904 |
|
|
|
1905 |
|
|
/* Even when not optimizing or not inlining inline always-inline
|
1906 |
|
|
functions. */
|
1907 |
|
|
inlined = inline_always_inline_functions (node);
|
1908 |
|
|
|
1909 |
|
|
if (!optimize
|
1910 |
|
|
|| flag_no_inline
|
1911 |
|
|
|| !flag_early_inlining
|
1912 |
|
|
/* Never inline regular functions into always-inline functions
|
1913 |
|
|
during incremental inlining. This sucks as functions calling
|
1914 |
|
|
always inline functions will get less optimized, but at the
|
1915 |
|
|
same time inlining of functions calling always inline
|
1916 |
|
|
function into an always inline function might introduce
|
1917 |
|
|
cycles of edges to be always inlined in the callgraph.
|
1918 |
|
|
|
1919 |
|
|
We might want to be smarter and just avoid this type of inlining. */
|
1920 |
|
|
|| DECL_DISREGARD_INLINE_LIMITS (node->decl))
|
1921 |
|
|
;
|
1922 |
|
|
else if (lookup_attribute ("flatten",
|
1923 |
|
|
DECL_ATTRIBUTES (node->decl)) != NULL)
|
1924 |
|
|
{
|
1925 |
|
|
/* When the function is marked to be flattened, recursively inline
|
1926 |
|
|
all calls in it. */
|
1927 |
|
|
if (dump_file)
|
1928 |
|
|
fprintf (dump_file,
|
1929 |
|
|
"Flattening %s\n", cgraph_node_name (node));
|
1930 |
|
|
flatten_function (node, true);
|
1931 |
|
|
inlined = true;
|
1932 |
|
|
}
|
1933 |
|
|
else
|
1934 |
|
|
{
|
1935 |
|
|
/* We iterate incremental inlining to get trivial cases of indirect
|
1936 |
|
|
inlining. */
|
1937 |
|
|
while (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS)
|
1938 |
|
|
&& early_inline_small_functions (node))
|
1939 |
|
|
{
|
1940 |
|
|
timevar_push (TV_INTEGRATION);
|
1941 |
|
|
todo |= optimize_inline_calls (current_function_decl);
|
1942 |
|
|
|
1943 |
|
|
/* Technically we ought to recompute inline parameters so the new
|
1944 |
|
|
iteration of early inliner works as expected. We however have
|
1945 |
|
|
values approximately right and thus we only need to update edge
|
1946 |
|
|
info that might be cleared out for newly discovered edges. */
|
1947 |
|
|
for (edge = node->callees; edge; edge = edge->next_callee)
|
1948 |
|
|
{
|
1949 |
|
|
struct inline_edge_summary *es = inline_edge_summary (edge);
|
1950 |
|
|
es->call_stmt_size
|
1951 |
|
|
= estimate_num_insns (edge->call_stmt, &eni_size_weights);
|
1952 |
|
|
es->call_stmt_time
|
1953 |
|
|
= estimate_num_insns (edge->call_stmt, &eni_time_weights);
|
1954 |
|
|
if (edge->callee->decl
|
1955 |
|
|
&& !gimple_check_call_matching_types (edge->call_stmt,
|
1956 |
|
|
edge->callee->decl))
|
1957 |
|
|
edge->call_stmt_cannot_inline_p = true;
|
1958 |
|
|
}
|
1959 |
|
|
timevar_pop (TV_INTEGRATION);
|
1960 |
|
|
iterations++;
|
1961 |
|
|
inlined = false;
|
1962 |
|
|
}
|
1963 |
|
|
if (dump_file)
|
1964 |
|
|
fprintf (dump_file, "Iterations: %i\n", iterations);
|
1965 |
|
|
}
|
1966 |
|
|
|
1967 |
|
|
if (inlined)
|
1968 |
|
|
{
|
1969 |
|
|
timevar_push (TV_INTEGRATION);
|
1970 |
|
|
todo |= optimize_inline_calls (current_function_decl);
|
1971 |
|
|
timevar_pop (TV_INTEGRATION);
|
1972 |
|
|
}
|
1973 |
|
|
|
1974 |
|
|
cfun->always_inline_functions_inlined = true;
|
1975 |
|
|
|
1976 |
|
|
return todo;
|
1977 |
|
|
}
|
1978 |
|
|
|
1979 |
|
|
struct gimple_opt_pass pass_early_inline =
|
1980 |
|
|
{
|
1981 |
|
|
{
|
1982 |
|
|
GIMPLE_PASS,
|
1983 |
|
|
"einline", /* name */
|
1984 |
|
|
NULL, /* gate */
|
1985 |
|
|
early_inliner, /* execute */
|
1986 |
|
|
NULL, /* sub */
|
1987 |
|
|
NULL, /* next */
|
1988 |
|
|
0, /* static_pass_number */
|
1989 |
|
|
TV_INLINE_HEURISTICS, /* tv_id */
|
1990 |
|
|
PROP_ssa, /* properties_required */
|
1991 |
|
|
0, /* properties_provided */
|
1992 |
|
|
0, /* properties_destroyed */
|
1993 |
|
|
0, /* todo_flags_start */
|
1994 |
|
|
|
1995 |
|
|
}
|
1996 |
|
|
};
|
1997 |
|
|
|
1998 |
|
|
|
1999 |
|
|
/* When to run IPA inlining. Inlining of always-inline functions
|
2000 |
|
|
happens during early inlining.
|
2001 |
|
|
|
2002 |
|
|
Enable inlining unconditoinally at -flto. We need size estimates to
|
2003 |
|
|
drive partitioning. */
|
2004 |
|
|
|
2005 |
|
|
static bool
|
2006 |
|
|
gate_ipa_inline (void)
|
2007 |
|
|
{
|
2008 |
|
|
return optimize || flag_lto || flag_wpa;
|
2009 |
|
|
}
|
2010 |
|
|
|
2011 |
|
|
struct ipa_opt_pass_d pass_ipa_inline =
|
2012 |
|
|
{
|
2013 |
|
|
{
|
2014 |
|
|
IPA_PASS,
|
2015 |
|
|
"inline", /* name */
|
2016 |
|
|
gate_ipa_inline, /* gate */
|
2017 |
|
|
ipa_inline, /* execute */
|
2018 |
|
|
NULL, /* sub */
|
2019 |
|
|
NULL, /* next */
|
2020 |
|
|
0, /* static_pass_number */
|
2021 |
|
|
TV_INLINE_HEURISTICS, /* tv_id */
|
2022 |
|
|
0, /* properties_required */
|
2023 |
|
|
0, /* properties_provided */
|
2024 |
|
|
0, /* properties_destroyed */
|
2025 |
|
|
TODO_remove_functions, /* todo_flags_finish */
|
2026 |
|
|
TODO_dump_cgraph
|
2027 |
|
|
| TODO_remove_functions | TODO_ggc_collect /* todo_flags_finish */
|
2028 |
|
|
},
|
2029 |
|
|
inline_generate_summary, /* generate_summary */
|
2030 |
|
|
inline_write_summary, /* write_summary */
|
2031 |
|
|
inline_read_summary, /* read_summary */
|
2032 |
|
|
NULL, /* write_optimization_summary */
|
2033 |
|
|
NULL, /* read_optimization_summary */
|
2034 |
|
|
NULL, /* stmt_fixup */
|
2035 |
|
|
0, /* TODOs */
|
2036 |
|
|
inline_transform, /* function_transform */
|
2037 |
|
|
NULL, /* variable_transform */
|
2038 |
|
|
};
|