OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-old/] [gcc-4.2.2/] [gcc/] [tree-vectorizer.c] - Blame information for rev 867

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 38 julius
/* Loop Vectorization
2
   Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3
   Contributed by Dorit Naishlos <dorit@il.ibm.com>
4
 
5
This file is part of GCC.
6
 
7
GCC is free software; you can redistribute it and/or modify it under
8
the terms of the GNU General Public License as published by the Free
9
Software Foundation; either version 3, or (at your option) any later
10
version.
11
 
12
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13
WARRANTY; without even the implied warranty of MERCHANTABILITY or
14
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15
for more details.
16
 
17
You should have received a copy of the GNU General Public License
18
along with GCC; see the file COPYING3.  If not see
19
<http://www.gnu.org/licenses/>.  */
20
 
21
/* Loop Vectorization Pass.
22
 
23
   This pass tries to vectorize loops. This first implementation focuses on
24
   simple inner-most loops, with no conditional control flow, and a set of
25
   simple operations which vector form can be expressed using existing
26
   tree codes (PLUS, MULT etc).
27
 
28
   For example, the vectorizer transforms the following simple loop:
29
 
30
        short a[N]; short b[N]; short c[N]; int i;
31
 
32
        for (i=0; i<N; i++){
33
          a[i] = b[i] + c[i];
34
        }
35
 
36
   as if it was manually vectorized by rewriting the source code into:
37
 
38
        typedef int __attribute__((mode(V8HI))) v8hi;
39
        short a[N];  short b[N]; short c[N];   int i;
40
        v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
41
        v8hi va, vb, vc;
42
 
43
        for (i=0; i<N/8; i++){
44
          vb = pb[i];
45
          vc = pc[i];
46
          va = vb + vc;
47
          pa[i] = va;
48
        }
49
 
50
        The main entry to this pass is vectorize_loops(), in which
51
   the vectorizer applies a set of analyses on a given set of loops,
52
   followed by the actual vectorization transformation for the loops that
53
   had successfully passed the analysis phase.
54
 
55
        Throughout this pass we make a distinction between two types of
56
   data: scalars (which are represented by SSA_NAMES), and memory references
57
   ("data-refs"). These two types of data require different handling both
58
   during analysis and transformation. The types of data-refs that the
59
   vectorizer currently supports are ARRAY_REFS which base is an array DECL
60
   (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
61
   accesses are required to have a  simple (consecutive) access pattern.
62
 
63
   Analysis phase:
64
   ===============
65
        The driver for the analysis phase is vect_analyze_loop_nest().
66
   It applies a set of analyses, some of which rely on the scalar evolution
67
   analyzer (scev) developed by Sebastian Pop.
68
 
69
        During the analysis phase the vectorizer records some information
70
   per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
71
   loop, as well as general information about the loop as a whole, which is
72
   recorded in a "loop_vec_info" struct attached to each loop.
73
 
74
   Transformation phase:
75
   =====================
76
        The loop transformation phase scans all the stmts in the loop, and
77
   creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
78
   the loop that needs to be vectorized. It insert the vector code sequence
79
   just before the scalar stmt S, and records a pointer to the vector code
80
   in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
81
   attached to S). This pointer will be used for the vectorization of following
82
   stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
83
   otherwise, we rely on dead code elimination for removing it.
84
 
85
        For example, say stmt S1 was vectorized into stmt VS1:
86
 
87
   VS1: vb = px[i];
88
   S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
89
   S2:  a = b;
90
 
91
   To vectorize stmt S2, the vectorizer first finds the stmt that defines
92
   the operand 'b' (S1), and gets the relevant vector def 'vb' from the
93
   vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
94
   resulting sequence would be:
95
 
96
   VS1: vb = px[i];
97
   S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
98
   VS2: va = vb;
99
   S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
100
 
101
        Operands that are not SSA_NAMEs, are data-refs that appear in
102
   load/store operations (like 'x[i]' in S1), and are handled differently.
103
 
104
   Target modeling:
105
   =================
106
        Currently the only target specific information that is used is the
107
   size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
108
   support different sizes of vectors, for now will need to specify one value
109
   for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
110
 
111
        Since we only vectorize operations which vector form can be
112
   expressed using existing tree codes, to verify that an operation is
113
   supported, the vectorizer checks the relevant optab at the relevant
114
   machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
115
   the value found is CODE_FOR_nothing, then there's no target support, and
116
   we can't vectorize the stmt.
117
 
118
   For additional information on this project see:
119
   http://gcc.gnu.org/projects/tree-ssa/vectorization.html
120
*/
121
 
122
#include "config.h"
123
#include "system.h"
124
#include "coretypes.h"
125
#include "tm.h"
126
#include "ggc.h"
127
#include "tree.h"
128
#include "target.h"
129
#include "rtl.h"
130
#include "basic-block.h"
131
#include "diagnostic.h"
132
#include "tree-flow.h"
133
#include "tree-dump.h"
134
#include "timevar.h"
135
#include "cfgloop.h"
136
#include "cfglayout.h"
137
#include "expr.h"
138
#include "optabs.h"
139
#include "params.h"
140
#include "toplev.h"
141
#include "tree-chrec.h"
142
#include "tree-data-ref.h"
143
#include "tree-scalar-evolution.h"
144
#include "input.h"
145
#include "tree-vectorizer.h"
146
#include "tree-pass.h"
147
 
148
/*************************************************************************
149
  Simple Loop Peeling Utilities
150
 *************************************************************************/
151
static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
152
  (struct loop *, struct loops *, edge);
153
static void slpeel_update_phis_for_duplicate_loop
154
  (struct loop *, struct loop *, bool after);
155
static void slpeel_update_phi_nodes_for_guard1
156
  (edge, struct loop *, bool, basic_block *, bitmap *);
157
static void slpeel_update_phi_nodes_for_guard2
158
  (edge, struct loop *, bool, basic_block *);
159
static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
160
 
161
static void rename_use_op (use_operand_p);
162
static void rename_variables_in_bb (basic_block);
163
static void rename_variables_in_loop (struct loop *);
164
 
165
/*************************************************************************
166
  General Vectorization Utilities
167
 *************************************************************************/
168
static void vect_set_dump_settings (void);
169
 
170
/* vect_dump will be set to stderr or dump_file if exist.  */
171
FILE *vect_dump;
172
 
173
/* vect_verbosity_level set to an invalid value
174
   to mark that it's uninitialized.  */
175
enum verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL;
176
 
177
/* Number of loops, at the beginning of vectorization.  */
178
unsigned int vect_loops_num;
179
 
180
/* Loop location.  */
181
static LOC vect_loop_location;
182
 
183
/* Bitmap of virtual variables to be renamed.  */
184
bitmap vect_vnames_to_rename;
185
 
186
/*************************************************************************
187
  Simple Loop Peeling Utilities
188
 
189
  Utilities to support loop peeling for vectorization purposes.
190
 *************************************************************************/
191
 
192
 
193
/* Renames the use *OP_P.  */
194
 
195
static void
196
rename_use_op (use_operand_p op_p)
197
{
198
  tree new_name;
199
 
200
  if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
201
    return;
202
 
203
  new_name = get_current_def (USE_FROM_PTR (op_p));
204
 
205
  /* Something defined outside of the loop.  */
206
  if (!new_name)
207
    return;
208
 
209
  /* An ordinary ssa name defined in the loop.  */
210
 
211
  SET_USE (op_p, new_name);
212
}
213
 
214
 
215
/* Renames the variables in basic block BB.  */
216
 
217
static void
218
rename_variables_in_bb (basic_block bb)
219
{
220
  tree phi;
221
  block_stmt_iterator bsi;
222
  tree stmt;
223
  use_operand_p use_p;
224
  ssa_op_iter iter;
225
  edge e;
226
  edge_iterator ei;
227
  struct loop *loop = bb->loop_father;
228
 
229
  for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
230
    {
231
      stmt = bsi_stmt (bsi);
232
      FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter,
233
                                 (SSA_OP_ALL_USES | SSA_OP_ALL_KILLS))
234
        rename_use_op (use_p);
235
    }
236
 
237
  FOR_EACH_EDGE (e, ei, bb->succs)
238
    {
239
      if (!flow_bb_inside_loop_p (loop, e->dest))
240
        continue;
241
      for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
242
        rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
243
    }
244
}
245
 
246
 
247
/* Renames variables in new generated LOOP.  */
248
 
249
static void
250
rename_variables_in_loop (struct loop *loop)
251
{
252
  unsigned i;
253
  basic_block *bbs;
254
 
255
  bbs = get_loop_body (loop);
256
 
257
  for (i = 0; i < loop->num_nodes; i++)
258
    rename_variables_in_bb (bbs[i]);
259
 
260
  free (bbs);
261
}
262
 
263
 
264
/* Update the PHI nodes of NEW_LOOP.
265
 
266
   NEW_LOOP is a duplicate of ORIG_LOOP.
267
   AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
268
   AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
269
   executes before it.  */
270
 
271
static void
272
slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
273
                                       struct loop *new_loop, bool after)
274
{
275
  tree new_ssa_name;
276
  tree phi_new, phi_orig;
277
  tree def;
278
  edge orig_loop_latch = loop_latch_edge (orig_loop);
279
  edge orig_entry_e = loop_preheader_edge (orig_loop);
280
  edge new_loop_exit_e = new_loop->single_exit;
281
  edge new_loop_entry_e = loop_preheader_edge (new_loop);
282
  edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
283
 
284
  /*
285
     step 1. For each loop-header-phi:
286
             Add the first phi argument for the phi in NEW_LOOP
287
            (the one associated with the entry of NEW_LOOP)
288
 
289
     step 2. For each loop-header-phi:
290
             Add the second phi argument for the phi in NEW_LOOP
291
            (the one associated with the latch of NEW_LOOP)
292
 
293
     step 3. Update the phis in the successor block of NEW_LOOP.
294
 
295
        case 1: NEW_LOOP was placed before ORIG_LOOP:
296
                The successor block of NEW_LOOP is the header of ORIG_LOOP.
297
                Updating the phis in the successor block can therefore be done
298
                along with the scanning of the loop header phis, because the
299
                header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
300
                phi nodes, organized in the same order.
301
 
302
        case 2: NEW_LOOP was placed after ORIG_LOOP:
303
                The successor block of NEW_LOOP is the original exit block of
304
                ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
305
                We postpone updating these phis to a later stage (when
306
                loop guards are added).
307
   */
308
 
309
 
310
  /* Scan the phis in the headers of the old and new loops
311
     (they are organized in exactly the same order).  */
312
 
313
  for (phi_new = phi_nodes (new_loop->header),
314
       phi_orig = phi_nodes (orig_loop->header);
315
       phi_new && phi_orig;
316
       phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
317
    {
318
      /* step 1.  */
319
      def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
320
      add_phi_arg (phi_new, def, new_loop_entry_e);
321
 
322
      /* step 2.  */
323
      def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
324
      if (TREE_CODE (def) != SSA_NAME)
325
        continue;
326
 
327
      new_ssa_name = get_current_def (def);
328
      if (!new_ssa_name)
329
        {
330
          /* This only happens if there are no definitions
331
             inside the loop. use the phi_result in this case.  */
332
          new_ssa_name = PHI_RESULT (phi_new);
333
        }
334
 
335
      /* An ordinary ssa name defined in the loop.  */
336
      add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
337
 
338
      /* step 3 (case 1).  */
339
      if (!after)
340
        {
341
          gcc_assert (new_loop_exit_e == orig_entry_e);
342
          SET_PHI_ARG_DEF (phi_orig,
343
                           new_loop_exit_e->dest_idx,
344
                           new_ssa_name);
345
        }
346
    }
347
}
348
 
349
 
350
/* Update PHI nodes for a guard of the LOOP.
351
 
352
   Input:
353
   - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
354
        controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
355
        originates from the guard-bb, skips LOOP and reaches the (unique) exit
356
        bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
357
        We denote this bb NEW_MERGE_BB because before the guard code was added
358
        it had a single predecessor (the LOOP header), and now it became a merge
359
        point of two paths - the path that ends with the LOOP exit-edge, and
360
        the path that ends with GUARD_EDGE.
361
   - NEW_EXIT_BB: New basic block that is added by this function between LOOP
362
        and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
363
 
364
   ===> The CFG before the guard-code was added:
365
        LOOP_header_bb:
366
          loop_body
367
          if (exit_loop) goto update_bb
368
          else           goto LOOP_header_bb
369
        update_bb:
370
 
371
   ==> The CFG after the guard-code was added:
372
        guard_bb:
373
          if (LOOP_guard_condition) goto new_merge_bb
374
          else                      goto LOOP_header_bb
375
        LOOP_header_bb:
376
          loop_body
377
          if (exit_loop_condition) goto new_merge_bb
378
          else                     goto LOOP_header_bb
379
        new_merge_bb:
380
          goto update_bb
381
        update_bb:
382
 
383
   ==> The CFG after this function:
384
        guard_bb:
385
          if (LOOP_guard_condition) goto new_merge_bb
386
          else                      goto LOOP_header_bb
387
        LOOP_header_bb:
388
          loop_body
389
          if (exit_loop_condition) goto new_exit_bb
390
          else                     goto LOOP_header_bb
391
        new_exit_bb:
392
        new_merge_bb:
393
          goto update_bb
394
        update_bb:
395
 
396
   This function:
397
   1. creates and updates the relevant phi nodes to account for the new
398
      incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
399
      1.1. Create phi nodes at NEW_MERGE_BB.
400
      1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
401
           UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
402
   2. preserves loop-closed-ssa-form by creating the required phi nodes
403
      at the exit of LOOP (i.e, in NEW_EXIT_BB).
404
 
405
   There are two flavors to this function:
406
 
407
   slpeel_update_phi_nodes_for_guard1:
408
     Here the guard controls whether we enter or skip LOOP, where LOOP is a
409
     prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
410
     for variables that have phis in the loop header.
411
 
412
   slpeel_update_phi_nodes_for_guard2:
413
     Here the guard controls whether we enter or skip LOOP, where LOOP is an
414
     epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
415
     for variables that have phis in the loop exit.
416
 
417
   I.E., the overall structure is:
418
 
419
        loop1_preheader_bb:
420
                guard1 (goto loop1/merg1_bb)
421
        loop1
422
        loop1_exit_bb:
423
                guard2 (goto merge1_bb/merge2_bb)
424
        merge1_bb
425
        loop2
426
        loop2_exit_bb
427
        merge2_bb
428
        next_bb
429
 
430
   slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
431
   loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
432
   that have phis in loop1->header).
433
 
434
   slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
435
   loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
436
   that have phis in next_bb). It also adds some of these phis to
437
   loop1_exit_bb.
438
 
439
   slpeel_update_phi_nodes_for_guard1 is always called before
440
   slpeel_update_phi_nodes_for_guard2. They are both needed in order
441
   to create correct data-flow and loop-closed-ssa-form.
442
 
443
   Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
444
   that change between iterations of a loop (and therefore have a phi-node
445
   at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
446
   phis for variables that are used out of the loop (and therefore have
447
   loop-closed exit phis). Some variables may be both updated between
448
   iterations and used after the loop. This is why in loop1_exit_bb we
449
   may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
450
   and exit phis (created by slpeel_update_phi_nodes_for_guard2).
451
 
452
   - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
453
     an original loop. i.e., we have:
454
 
455
           orig_loop
456
           guard_bb (goto LOOP/new_merge)
457
           new_loop <-- LOOP
458
           new_exit
459
           new_merge
460
           next_bb
461
 
462
     If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
463
     have:
464
 
465
           new_loop
466
           guard_bb (goto LOOP/new_merge)
467
           orig_loop <-- LOOP
468
           new_exit
469
           new_merge
470
           next_bb
471
 
472
     The SSA names defined in the original loop have a current
473
     reaching definition that that records the corresponding new
474
     ssa-name used in the new duplicated loop copy.
475
  */
476
 
477
/* Function slpeel_update_phi_nodes_for_guard1
478
 
479
   Input:
480
   - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
481
   - DEFS - a bitmap of ssa names to mark new names for which we recorded
482
            information.
483
 
484
   In the context of the overall structure, we have:
485
 
486
        loop1_preheader_bb:
487
                guard1 (goto loop1/merg1_bb)
488
LOOP->  loop1
489
        loop1_exit_bb:
490
                guard2 (goto merge1_bb/merge2_bb)
491
        merge1_bb
492
        loop2
493
        loop2_exit_bb
494
        merge2_bb
495
        next_bb
496
 
497
   For each name updated between loop iterations (i.e - for each name that has
498
   an entry (loop-header) phi in LOOP) we create a new phi in:
499
   1. merge1_bb (to account for the edge from guard1)
500
   2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
501
*/
502
 
503
static void
504
slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
505
                                    bool is_new_loop, basic_block *new_exit_bb,
506
                                    bitmap *defs)
507
{
508
  tree orig_phi, new_phi;
509
  tree update_phi, update_phi2;
510
  tree guard_arg, loop_arg;
511
  basic_block new_merge_bb = guard_edge->dest;
512
  edge e = EDGE_SUCC (new_merge_bb, 0);
513
  basic_block update_bb = e->dest;
514
  basic_block orig_bb = loop->header;
515
  edge new_exit_e;
516
  tree current_new_name;
517
  tree name;
518
 
519
  /* Create new bb between loop and new_merge_bb.  */
520
  *new_exit_bb = split_edge (loop->single_exit);
521
  add_bb_to_loop (*new_exit_bb, loop->outer);
522
 
523
  new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
524
 
525
  for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
526
       orig_phi && update_phi;
527
       orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
528
    {
529
      /* Virtual phi; Mark it for renaming. We actually want to call
530
         mar_sym_for_renaming, but since all ssa renaming datastructures
531
         are going to be freed before we get to call ssa_upate, we just
532
         record this name for now in a bitmap, and will mark it for
533
         renaming later.  */
534
      name = PHI_RESULT (orig_phi);
535
      if (!is_gimple_reg (SSA_NAME_VAR (name)))
536
        bitmap_set_bit (vect_vnames_to_rename, SSA_NAME_VERSION (name));
537
 
538
      /** 1. Handle new-merge-point phis  **/
539
 
540
      /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
541
      new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
542
                                 new_merge_bb);
543
 
544
      /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
545
            of LOOP. Set the two phi args in NEW_PHI for these edges:  */
546
      loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
547
      guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
548
 
549
      add_phi_arg (new_phi, loop_arg, new_exit_e);
550
      add_phi_arg (new_phi, guard_arg, guard_edge);
551
 
552
      /* 1.3. Update phi in successor block.  */
553
      gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
554
                  || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
555
      SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
556
      update_phi2 = new_phi;
557
 
558
 
559
      /** 2. Handle loop-closed-ssa-form phis  **/
560
 
561
      /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
562
      new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
563
                                 *new_exit_bb);
564
 
565
      /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
566
      add_phi_arg (new_phi, loop_arg, loop->single_exit);
567
 
568
      /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
569
      gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
570
      SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
571
 
572
      /* 2.4. Record the newly created name with set_current_def.
573
         We want to find a name such that
574
                name = get_current_def (orig_loop_name)
575
         and to set its current definition as follows:
576
                set_current_def (name, new_phi_name)
577
 
578
         If LOOP is a new loop then loop_arg is already the name we're
579
         looking for. If LOOP is the original loop, then loop_arg is
580
         the orig_loop_name and the relevant name is recorded in its
581
         current reaching definition.  */
582
      if (is_new_loop)
583
        current_new_name = loop_arg;
584
      else
585
        {
586
          current_new_name = get_current_def (loop_arg);
587
          /* current_def is not available only if the variable does not
588
             change inside the loop, in which case we also don't care
589
             about recording a current_def for it because we won't be
590
             trying to create loop-exit-phis for it.  */
591
          if (!current_new_name)
592
            continue;
593
        }
594
      gcc_assert (get_current_def (current_new_name) == NULL_TREE);
595
 
596
      set_current_def (current_new_name, PHI_RESULT (new_phi));
597
      bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name));
598
    }
599
 
600
  set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
601
}
602
 
603
 
604
/* Function slpeel_update_phi_nodes_for_guard2
605
 
606
   Input:
607
   - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
608
 
609
   In the context of the overall structure, we have:
610
 
611
        loop1_preheader_bb:
612
                guard1 (goto loop1/merg1_bb)
613
        loop1
614
        loop1_exit_bb:
615
                guard2 (goto merge1_bb/merge2_bb)
616
        merge1_bb
617
LOOP->  loop2
618
        loop2_exit_bb
619
        merge2_bb
620
        next_bb
621
 
622
   For each name used out side the loop (i.e - for each name that has an exit
623
   phi in next_bb) we create a new phi in:
624
   1. merge2_bb (to account for the edge from guard_bb)
625
   2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
626
   3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
627
      if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
628
*/
629
 
630
static void
631
slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
632
                                    bool is_new_loop, basic_block *new_exit_bb)
633
{
634
  tree orig_phi, new_phi;
635
  tree update_phi, update_phi2;
636
  tree guard_arg, loop_arg;
637
  basic_block new_merge_bb = guard_edge->dest;
638
  edge e = EDGE_SUCC (new_merge_bb, 0);
639
  basic_block update_bb = e->dest;
640
  edge new_exit_e;
641
  tree orig_def, orig_def_new_name;
642
  tree new_name, new_name2;
643
  tree arg;
644
 
645
  /* Create new bb between loop and new_merge_bb.  */
646
  *new_exit_bb = split_edge (loop->single_exit);
647
  add_bb_to_loop (*new_exit_bb, loop->outer);
648
 
649
  new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
650
 
651
  for (update_phi = phi_nodes (update_bb); update_phi;
652
       update_phi = PHI_CHAIN (update_phi))
653
    {
654
      orig_phi = update_phi;
655
      orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
656
      /* This loop-closed-phi actually doesn't represent a use
657
         out of the loop - the phi arg is a constant.  */
658
      if (TREE_CODE (orig_def) != SSA_NAME)
659
        continue;
660
      orig_def_new_name = get_current_def (orig_def);
661
      arg = NULL_TREE;
662
 
663
      /** 1. Handle new-merge-point phis  **/
664
 
665
      /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
666
      new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
667
                                 new_merge_bb);
668
 
669
      /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
670
            of LOOP. Set the two PHI args in NEW_PHI for these edges:  */
671
      new_name = orig_def;
672
      new_name2 = NULL_TREE;
673
      if (orig_def_new_name)
674
        {
675
          new_name = orig_def_new_name;
676
          /* Some variables have both loop-entry-phis and loop-exit-phis.
677
             Such variables were given yet newer names by phis placed in
678
             guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
679
             new_name2 = get_current_def (get_current_def (orig_name)).  */
680
          new_name2 = get_current_def (new_name);
681
        }
682
 
683
      if (is_new_loop)
684
        {
685
          guard_arg = orig_def;
686
          loop_arg = new_name;
687
        }
688
      else
689
        {
690
          guard_arg = new_name;
691
          loop_arg = orig_def;
692
        }
693
      if (new_name2)
694
        guard_arg = new_name2;
695
 
696
      add_phi_arg (new_phi, loop_arg, new_exit_e);
697
      add_phi_arg (new_phi, guard_arg, guard_edge);
698
 
699
      /* 1.3. Update phi in successor block.  */
700
      gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
701
      SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
702
      update_phi2 = new_phi;
703
 
704
 
705
      /** 2. Handle loop-closed-ssa-form phis  **/
706
 
707
      /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
708
      new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
709
                                 *new_exit_bb);
710
 
711
      /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
712
      add_phi_arg (new_phi, loop_arg, loop->single_exit);
713
 
714
      /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
715
      gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
716
      SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
717
 
718
 
719
      /** 3. Handle loop-closed-ssa-form phis for first loop  **/
720
 
721
      /* 3.1. Find the relevant names that need an exit-phi in
722
         GUARD_BB, i.e. names for which
723
         slpeel_update_phi_nodes_for_guard1 had not already created a
724
         phi node. This is the case for names that are used outside
725
         the loop (and therefore need an exit phi) but are not updated
726
         across loop iterations (and therefore don't have a
727
         loop-header-phi).
728
 
729
         slpeel_update_phi_nodes_for_guard1 is responsible for
730
         creating loop-exit phis in GUARD_BB for names that have a
731
         loop-header-phi.  When such a phi is created we also record
732
         the new name in its current definition.  If this new name
733
         exists, then guard_arg was set to this new name (see 1.2
734
         above).  Therefore, if guard_arg is not this new name, this
735
         is an indication that an exit-phi in GUARD_BB was not yet
736
         created, so we take care of it here.  */
737
      if (guard_arg == new_name2)
738
        continue;
739
      arg = guard_arg;
740
 
741
      /* 3.2. Generate new phi node in GUARD_BB:  */
742
      new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
743
                                 guard_edge->src);
744
 
745
      /* 3.3. GUARD_BB has one incoming edge:  */
746
      gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
747
      add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0));
748
 
749
      /* 3.4. Update phi in successor of GUARD_BB:  */
750
      gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
751
                                                                == guard_arg);
752
      SET_PHI_ARG_DEF (update_phi2, guard_edge->dest_idx, PHI_RESULT (new_phi));
753
    }
754
 
755
  set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
756
}
757
 
758
 
759
/* Make the LOOP iterate NITERS times. This is done by adding a new IV
760
   that starts at zero, increases by one and its limit is NITERS.
761
 
762
   Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
763
 
764
void
765
slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
766
{
767
  tree indx_before_incr, indx_after_incr, cond_stmt, cond;
768
  tree orig_cond;
769
  edge exit_edge = loop->single_exit;
770
  block_stmt_iterator loop_cond_bsi;
771
  block_stmt_iterator incr_bsi;
772
  bool insert_after;
773
  tree begin_label = tree_block_label (loop->latch);
774
  tree exit_label = tree_block_label (loop->single_exit->dest);
775
  tree init = build_int_cst (TREE_TYPE (niters), 0);
776
  tree step = build_int_cst (TREE_TYPE (niters), 1);
777
  tree then_label;
778
  tree else_label;
779
  LOC loop_loc;
780
 
781
  orig_cond = get_loop_exit_condition (loop);
782
  gcc_assert (orig_cond);
783
  loop_cond_bsi = bsi_for_stmt (orig_cond);
784
 
785
  standard_iv_increment_position (loop, &incr_bsi, &insert_after);
786
  create_iv (init, step, NULL_TREE, loop,
787
             &incr_bsi, insert_after, &indx_before_incr, &indx_after_incr);
788
 
789
  if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop.  */
790
    {
791
      cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
792
      then_label = build1 (GOTO_EXPR, void_type_node, exit_label);
793
      else_label = build1 (GOTO_EXPR, void_type_node, begin_label);
794
    }
795
  else /* 'then' edge loops back.  */
796
    {
797
      cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
798
      then_label = build1 (GOTO_EXPR, void_type_node, begin_label);
799
      else_label = build1 (GOTO_EXPR, void_type_node, exit_label);
800
    }
801
 
802
  cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
803
                     then_label, else_label);
804
  bsi_insert_before (&loop_cond_bsi, cond_stmt, BSI_SAME_STMT);
805
 
806
  /* Remove old loop exit test:  */
807
  bsi_remove (&loop_cond_bsi, true);
808
 
809
  loop_loc = find_loop_location (loop);
810
  if (dump_file && (dump_flags & TDF_DETAILS))
811
    {
812
      if (loop_loc != UNKNOWN_LOC)
813
        fprintf (dump_file, "\nloop at %s:%d: ",
814
                 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
815
      print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
816
    }
817
 
818
  loop->nb_iterations = niters;
819
}
820
 
821
 
822
/* Given LOOP this function generates a new copy of it and puts it
823
   on E which is either the entry or exit of LOOP.  */
824
 
825
static struct loop *
826
slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
827
                                        edge e)
828
{
829
  struct loop *new_loop;
830
  basic_block *new_bbs, *bbs;
831
  bool at_exit;
832
  bool was_imm_dom;
833
  basic_block exit_dest;
834
  tree phi, phi_arg;
835
 
836
  at_exit = (e == loop->single_exit);
837
  if (!at_exit && e != loop_preheader_edge (loop))
838
    return NULL;
839
 
840
  bbs = get_loop_body (loop);
841
 
842
  /* Check whether duplication is possible.  */
843
  if (!can_copy_bbs_p (bbs, loop->num_nodes))
844
    {
845
      free (bbs);
846
      return NULL;
847
    }
848
 
849
  /* Generate new loop structure.  */
850
  new_loop = duplicate_loop (loops, loop, loop->outer);
851
  if (!new_loop)
852
    {
853
      free (bbs);
854
      return NULL;
855
    }
856
 
857
  exit_dest = loop->single_exit->dest;
858
  was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
859
                                          exit_dest) == loop->header ?
860
                 true : false);
861
 
862
  new_bbs = XNEWVEC (basic_block, loop->num_nodes);
863
 
864
  copy_bbs (bbs, loop->num_nodes, new_bbs,
865
            &loop->single_exit, 1, &new_loop->single_exit, NULL,
866
            e->src);
867
 
868
  /* Duplicating phi args at exit bbs as coming
869
     also from exit of duplicated loop.  */
870
  for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
871
    {
872
      phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->single_exit);
873
      if (phi_arg)
874
        {
875
          edge new_loop_exit_edge;
876
 
877
          if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
878
            new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
879
          else
880
            new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
881
 
882
          add_phi_arg (phi, phi_arg, new_loop_exit_edge);
883
        }
884
    }
885
 
886
  if (at_exit) /* Add the loop copy at exit.  */
887
    {
888
      redirect_edge_and_branch_force (e, new_loop->header);
889
      set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
890
      if (was_imm_dom)
891
        set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
892
    }
893
  else /* Add the copy at entry.  */
894
    {
895
      edge new_exit_e;
896
      edge entry_e = loop_preheader_edge (loop);
897
      basic_block preheader = entry_e->src;
898
 
899
      if (!flow_bb_inside_loop_p (new_loop,
900
                                  EDGE_SUCC (new_loop->header, 0)->dest))
901
        new_exit_e = EDGE_SUCC (new_loop->header, 0);
902
      else
903
        new_exit_e = EDGE_SUCC (new_loop->header, 1);
904
 
905
      redirect_edge_and_branch_force (new_exit_e, loop->header);
906
      set_immediate_dominator (CDI_DOMINATORS, loop->header,
907
                               new_exit_e->src);
908
 
909
      /* We have to add phi args to the loop->header here as coming
910
         from new_exit_e edge.  */
911
      for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
912
        {
913
          phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
914
          if (phi_arg)
915
            add_phi_arg (phi, phi_arg, new_exit_e);
916
        }
917
 
918
      redirect_edge_and_branch_force (entry_e, new_loop->header);
919
      set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
920
    }
921
 
922
  free (new_bbs);
923
  free (bbs);
924
 
925
  return new_loop;
926
}
927
 
928
 
929
/* Given the condition statement COND, put it as the last statement
930
   of GUARD_BB; EXIT_BB is the basic block to skip the loop;
931
   Assumes that this is the single exit of the guarded loop.
932
   Returns the skip edge.  */
933
 
934
static edge
935
slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
936
                        basic_block dom_bb)
937
{
938
  block_stmt_iterator bsi;
939
  edge new_e, enter_e;
940
  tree cond_stmt, then_label, else_label;
941
 
942
  enter_e = EDGE_SUCC (guard_bb, 0);
943
  enter_e->flags &= ~EDGE_FALLTHRU;
944
  enter_e->flags |= EDGE_FALSE_VALUE;
945
  bsi = bsi_last (guard_bb);
946
 
947
  then_label = build1 (GOTO_EXPR, void_type_node,
948
                       tree_block_label (exit_bb));
949
  else_label = build1 (GOTO_EXPR, void_type_node,
950
                       tree_block_label (enter_e->dest));
951
  cond_stmt = build3 (COND_EXPR, void_type_node, cond,
952
                     then_label, else_label);
953
  bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
954
  /* Add new edge to connect guard block to the merge/loop-exit block.  */
955
  new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
956
  set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
957
  return new_e;
958
}
959
 
960
 
961
/* This function verifies that the following restrictions apply to LOOP:
962
   (1) it is innermost
963
   (2) it consists of exactly 2 basic blocks - header, and an empty latch.
964
   (3) it is single entry, single exit
965
   (4) its exit condition is the last stmt in the header
966
   (5) E is the entry/exit edge of LOOP.
967
 */
968
 
969
bool
970
slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
971
{
972
  edge exit_e = loop->single_exit;
973
  edge entry_e = loop_preheader_edge (loop);
974
  tree orig_cond = get_loop_exit_condition (loop);
975
  block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
976
 
977
  if (need_ssa_update_p ())
978
    return false;
979
 
980
  if (loop->inner
981
      /* All loops have an outer scope; the only case loop->outer is NULL is for
982
         the function itself.  */
983
      || !loop->outer
984
      || loop->num_nodes != 2
985
      || !empty_block_p (loop->latch)
986
      || !loop->single_exit
987
      /* Verify that new loop exit condition can be trivially modified.  */
988
      || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
989
      || (e != exit_e && e != entry_e))
990
    return false;
991
 
992
  return true;
993
}
994
 
995
#ifdef ENABLE_CHECKING
996
void
997
slpeel_verify_cfg_after_peeling (struct loop *first_loop,
998
                                 struct loop *second_loop)
999
{
1000
  basic_block loop1_exit_bb = first_loop->single_exit->dest;
1001
  basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
1002
  basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
1003
 
1004
  /* A guard that controls whether the second_loop is to be executed or skipped
1005
     is placed in first_loop->exit.  first_loopt->exit therefore has two
1006
     successors - one is the preheader of second_loop, and the other is a bb
1007
     after second_loop.
1008
   */
1009
  gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
1010
 
1011
  /* 1. Verify that one of the successors of first_loopt->exit is the preheader
1012
        of second_loop.  */
1013
 
1014
  /* The preheader of new_loop is expected to have two predecessors:
1015
     first_loop->exit and the block that precedes first_loop.  */
1016
 
1017
  gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
1018
              && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
1019
                   && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
1020
               || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
1021
                   && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
1022
 
1023
  /* Verify that the other successor of first_loopt->exit is after the
1024
     second_loop.  */
1025
  /* TODO */
1026
}
1027
#endif
1028
 
1029
/* Function slpeel_tree_peel_loop_to_edge.
1030
 
1031
   Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1032
   that is placed on the entry (exit) edge E of LOOP. After this transformation
1033
   we have two loops one after the other - first-loop iterates FIRST_NITERS
1034
   times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
1035
 
1036
   Input:
1037
   - LOOP: the loop to be peeled.
1038
   - E: the exit or entry edge of LOOP.
1039
        If it is the entry edge, we peel the first iterations of LOOP. In this
1040
        case first-loop is LOOP, and second-loop is the newly created loop.
1041
        If it is the exit edge, we peel the last iterations of LOOP. In this
1042
        case, first-loop is the newly created loop, and second-loop is LOOP.
1043
   - NITERS: the number of iterations that LOOP iterates.
1044
   - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1045
   - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
1046
        for updating the loop bound of the first-loop to FIRST_NITERS.  If it
1047
        is false, the caller of this function may want to take care of this
1048
        (this can be useful if we don't want new stmts added to first-loop).
1049
 
1050
   Output:
1051
   The function returns a pointer to the new loop-copy, or NULL if it failed
1052
   to perform the transformation.
1053
 
1054
   The function generates two if-then-else guards: one before the first loop,
1055
   and the other before the second loop:
1056
   The first guard is:
1057
     if (FIRST_NITERS == 0) then skip the first loop,
1058
     and go directly to the second loop.
1059
   The second guard is:
1060
     if (FIRST_NITERS == NITERS) then skip the second loop.
1061
 
1062
   FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1063
   FORNOW the resulting code will not be in loop-closed-ssa form.
1064
*/
1065
 
1066
struct loop*
1067
slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
1068
                               edge e, tree first_niters,
1069
                               tree niters, bool update_first_loop_count)
1070
{
1071
  struct loop *new_loop = NULL, *first_loop, *second_loop;
1072
  edge skip_e;
1073
  tree pre_condition;
1074
  bitmap definitions;
1075
  basic_block bb_before_second_loop, bb_after_second_loop;
1076
  basic_block bb_before_first_loop;
1077
  basic_block bb_between_loops;
1078
  basic_block new_exit_bb;
1079
  edge exit_e = loop->single_exit;
1080
  LOC loop_loc;
1081
 
1082
  if (!slpeel_can_duplicate_loop_p (loop, e))
1083
    return NULL;
1084
 
1085
  /* We have to initialize cfg_hooks. Then, when calling
1086
   cfg_hooks->split_edge, the function tree_split_edge
1087
   is actually called and, when calling cfg_hooks->duplicate_block,
1088
   the function tree_duplicate_bb is called.  */
1089
  tree_register_cfg_hooks ();
1090
 
1091
 
1092
  /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1093
        Resulting CFG would be:
1094
 
1095
        first_loop:
1096
        do {
1097
        } while ...
1098
 
1099
        second_loop:
1100
        do {
1101
        } while ...
1102
 
1103
        orig_exit_bb:
1104
   */
1105
 
1106
  if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
1107
    {
1108
      loop_loc = find_loop_location (loop);
1109
      if (dump_file && (dump_flags & TDF_DETAILS))
1110
        {
1111
          if (loop_loc != UNKNOWN_LOC)
1112
            fprintf (dump_file, "\n%s:%d: note: ",
1113
                     LOC_FILE (loop_loc), LOC_LINE (loop_loc));
1114
          fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1115
        }
1116
      return NULL;
1117
    }
1118
 
1119
  if (e == exit_e)
1120
    {
1121
      /* NEW_LOOP was placed after LOOP.  */
1122
      first_loop = loop;
1123
      second_loop = new_loop;
1124
    }
1125
  else
1126
    {
1127
      /* NEW_LOOP was placed before LOOP.  */
1128
      first_loop = new_loop;
1129
      second_loop = loop;
1130
    }
1131
 
1132
  definitions = ssa_names_to_replace ();
1133
  slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1134
  rename_variables_in_loop (new_loop);
1135
 
1136
 
1137
  /* 2. Add the guard that controls whether the first loop is executed.
1138
        Resulting CFG would be:
1139
 
1140
        bb_before_first_loop:
1141
        if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1142
                               GOTO first-loop
1143
 
1144
        first_loop:
1145
        do {
1146
        } while ...
1147
 
1148
        bb_before_second_loop:
1149
 
1150
        second_loop:
1151
        do {
1152
        } while ...
1153
 
1154
        orig_exit_bb:
1155
   */
1156
 
1157
  bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1158
  add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1159
  bb_before_second_loop = split_edge (first_loop->single_exit);
1160
  add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1161
 
1162
  pre_condition =
1163
    fold_build2 (LE_EXPR, boolean_type_node, first_niters,
1164
                 build_int_cst (TREE_TYPE (first_niters), 0));
1165
  skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1166
                                  bb_before_second_loop, bb_before_first_loop);
1167
  slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1168
                                      first_loop == new_loop,
1169
                                      &new_exit_bb, &definitions);
1170
 
1171
 
1172
  /* 3. Add the guard that controls whether the second loop is executed.
1173
        Resulting CFG would be:
1174
 
1175
        bb_before_first_loop:
1176
        if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1177
                               GOTO first-loop
1178
 
1179
        first_loop:
1180
        do {
1181
        } while ...
1182
 
1183
        bb_between_loops:
1184
        if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1185
                                    GOTO bb_before_second_loop
1186
 
1187
        bb_before_second_loop:
1188
 
1189
        second_loop:
1190
        do {
1191
        } while ...
1192
 
1193
        bb_after_second_loop:
1194
 
1195
        orig_exit_bb:
1196
   */
1197
 
1198
  bb_between_loops = new_exit_bb;
1199
  bb_after_second_loop = split_edge (second_loop->single_exit);
1200
  add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1201
 
1202
  pre_condition =
1203
        fold_build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1204
  skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1205
                                  bb_after_second_loop, bb_before_first_loop);
1206
  slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1207
                                     second_loop == new_loop, &new_exit_bb);
1208
 
1209
  /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1210
   */
1211
  if (update_first_loop_count)
1212
    slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1213
 
1214
  BITMAP_FREE (definitions);
1215
  delete_update_ssa ();
1216
 
1217
  return new_loop;
1218
}
1219
 
1220
/* Function vect_get_loop_location.
1221
 
1222
   Extract the location of the loop in the source code.
1223
   If the loop is not well formed for vectorization, an estimated
1224
   location is calculated.
1225
   Return the loop location if succeed and NULL if not.  */
1226
 
1227
LOC
1228
find_loop_location (struct loop *loop)
1229
{
1230
  tree node = NULL_TREE;
1231
  basic_block bb;
1232
  block_stmt_iterator si;
1233
 
1234
  if (!loop)
1235
    return UNKNOWN_LOC;
1236
 
1237
  node = get_loop_exit_condition (loop);
1238
 
1239
  if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node)
1240
      && EXPR_FILENAME (node) && EXPR_LINENO (node))
1241
    return EXPR_LOC (node);
1242
 
1243
  /* If we got here the loop is probably not "well formed",
1244
     try to estimate the loop location */
1245
 
1246
  if (!loop->header)
1247
    return UNKNOWN_LOC;
1248
 
1249
  bb = loop->header;
1250
 
1251
  for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1252
    {
1253
      node = bsi_stmt (si);
1254
      if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node))
1255
        return EXPR_LOC (node);
1256
    }
1257
 
1258
  return UNKNOWN_LOC;
1259
}
1260
 
1261
 
1262
/*************************************************************************
1263
  Vectorization Debug Information.
1264
 *************************************************************************/
1265
 
1266
/* Function vect_set_verbosity_level.
1267
 
1268
   Called from toplev.c upon detection of the
1269
   -ftree-vectorizer-verbose=N option.  */
1270
 
1271
void
1272
vect_set_verbosity_level (const char *val)
1273
{
1274
   unsigned int vl;
1275
 
1276
   vl = atoi (val);
1277
   if (vl < MAX_VERBOSITY_LEVEL)
1278
     vect_verbosity_level = vl;
1279
   else
1280
     vect_verbosity_level = MAX_VERBOSITY_LEVEL - 1;
1281
}
1282
 
1283
 
1284
/* Function vect_set_dump_settings.
1285
 
1286
   Fix the verbosity level of the vectorizer if the
1287
   requested level was not set explicitly using the flag
1288
   -ftree-vectorizer-verbose=N.
1289
   Decide where to print the debugging information (dump_file/stderr).
1290
   If the user defined the verbosity level, but there is no dump file,
1291
   print to stderr, otherwise print to the dump file.  */
1292
 
1293
static void
1294
vect_set_dump_settings (void)
1295
{
1296
  vect_dump = dump_file;
1297
 
1298
  /* Check if the verbosity level was defined by the user:  */
1299
  if (vect_verbosity_level != MAX_VERBOSITY_LEVEL)
1300
    {
1301
      /* If there is no dump file, print to stderr.  */
1302
      if (!dump_file)
1303
        vect_dump = stderr;
1304
      return;
1305
    }
1306
 
1307
  /* User didn't specify verbosity level:  */
1308
  if (dump_file && (dump_flags & TDF_DETAILS))
1309
    vect_verbosity_level = REPORT_DETAILS;
1310
  else if (dump_file && (dump_flags & TDF_STATS))
1311
    vect_verbosity_level = REPORT_UNVECTORIZED_LOOPS;
1312
  else
1313
    vect_verbosity_level = REPORT_NONE;
1314
 
1315
  gcc_assert (dump_file || vect_verbosity_level == REPORT_NONE);
1316
}
1317
 
1318
 
1319
/* Function debug_loop_details.
1320
 
1321
   For vectorization debug dumps.  */
1322
 
1323
bool
1324
vect_print_dump_info (enum verbosity_levels vl)
1325
{
1326
  if (vl > vect_verbosity_level)
1327
    return false;
1328
 
1329
  if (!current_function_decl || !vect_dump)
1330
    return false;
1331
 
1332
  if (vect_loop_location == UNKNOWN_LOC)
1333
    fprintf (vect_dump, "\n%s:%d: note: ",
1334
                 DECL_SOURCE_FILE (current_function_decl),
1335
                 DECL_SOURCE_LINE (current_function_decl));
1336
  else
1337
    fprintf (vect_dump, "\n%s:%d: note: ",
1338
             LOC_FILE (vect_loop_location), LOC_LINE (vect_loop_location));
1339
 
1340
  return true;
1341
}
1342
 
1343
 
1344
/*************************************************************************
1345
  Vectorization Utilities.
1346
 *************************************************************************/
1347
 
1348
/* Function new_stmt_vec_info.
1349
 
1350
   Create and initialize a new stmt_vec_info struct for STMT.  */
1351
 
1352
stmt_vec_info
1353
new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
1354
{
1355
  stmt_vec_info res;
1356
  res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1357
 
1358
  STMT_VINFO_TYPE (res) = undef_vec_info_type;
1359
  STMT_VINFO_STMT (res) = stmt;
1360
  STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
1361
  STMT_VINFO_RELEVANT_P (res) = 0;
1362
  STMT_VINFO_LIVE_P (res) = 0;
1363
  STMT_VINFO_VECTYPE (res) = NULL;
1364
  STMT_VINFO_VEC_STMT (res) = NULL;
1365
  STMT_VINFO_IN_PATTERN_P (res) = false;
1366
  STMT_VINFO_RELATED_STMT (res) = NULL;
1367
  STMT_VINFO_DATA_REF (res) = NULL;
1368
  if (TREE_CODE (stmt) == PHI_NODE)
1369
    STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
1370
  else
1371
    STMT_VINFO_DEF_TYPE (res) = vect_loop_def;
1372
  STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
1373
 
1374
  return res;
1375
}
1376
 
1377
 
1378
/* Function new_loop_vec_info.
1379
 
1380
   Create and initialize a new loop_vec_info struct for LOOP, as well as
1381
   stmt_vec_info structs for all the stmts in LOOP.  */
1382
 
1383
loop_vec_info
1384
new_loop_vec_info (struct loop *loop)
1385
{
1386
  loop_vec_info res;
1387
  basic_block *bbs;
1388
  block_stmt_iterator si;
1389
  unsigned int i;
1390
 
1391
  res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1392
 
1393
  bbs = get_loop_body (loop);
1394
 
1395
  /* Create stmt_info for all stmts in the loop.  */
1396
  for (i = 0; i < loop->num_nodes; i++)
1397
    {
1398
      basic_block bb = bbs[i];
1399
      tree phi;
1400
 
1401
      for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1402
        {
1403
          stmt_ann_t ann = get_stmt_ann (phi);
1404
          set_stmt_info (ann, new_stmt_vec_info (phi, res));
1405
        }
1406
 
1407
      for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1408
        {
1409
          tree stmt = bsi_stmt (si);
1410
          stmt_ann_t ann;
1411
 
1412
          ann = stmt_ann (stmt);
1413
          set_stmt_info (ann, new_stmt_vec_info (stmt, res));
1414
        }
1415
    }
1416
 
1417
  LOOP_VINFO_LOOP (res) = loop;
1418
  LOOP_VINFO_BBS (res) = bbs;
1419
  LOOP_VINFO_EXIT_COND (res) = NULL;
1420
  LOOP_VINFO_NITERS (res) = NULL;
1421
  LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1422
  LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
1423
  LOOP_VINFO_VECT_FACTOR (res) = 0;
1424
  LOOP_VINFO_DATAREFS (res) = VEC_alloc (data_reference_p, heap, 10);
1425
  LOOP_VINFO_DDRS (res) = VEC_alloc (ddr_p, heap, 10 * 10);
1426
  LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1427
  LOOP_VINFO_MAY_MISALIGN_STMTS (res)
1428
    = VEC_alloc (tree, heap, PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS));
1429
 
1430
  return res;
1431
}
1432
 
1433
 
1434
/* Function destroy_loop_vec_info.
1435
 
1436
   Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1437
   stmts in the loop.  */
1438
 
1439
void
1440
destroy_loop_vec_info (loop_vec_info loop_vinfo)
1441
{
1442
  struct loop *loop;
1443
  basic_block *bbs;
1444
  int nbbs;
1445
  block_stmt_iterator si;
1446
  int j;
1447
 
1448
  if (!loop_vinfo)
1449
    return;
1450
 
1451
  loop = LOOP_VINFO_LOOP (loop_vinfo);
1452
 
1453
  bbs = LOOP_VINFO_BBS (loop_vinfo);
1454
  nbbs = loop->num_nodes;
1455
 
1456
  for (j = 0; j < nbbs; j++)
1457
    {
1458
      basic_block bb = bbs[j];
1459
      tree phi;
1460
      stmt_vec_info stmt_info;
1461
 
1462
      for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1463
        {
1464
          stmt_ann_t ann = stmt_ann (phi);
1465
 
1466
          stmt_info = vinfo_for_stmt (phi);
1467
          free (stmt_info);
1468
          set_stmt_info (ann, NULL);
1469
        }
1470
 
1471
      for (si = bsi_start (bb); !bsi_end_p (si); )
1472
        {
1473
          tree stmt = bsi_stmt (si);
1474
          stmt_ann_t ann = stmt_ann (stmt);
1475
          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1476
 
1477
          if (stmt_info)
1478
            {
1479
              /* Check if this is a "pattern stmt" (introduced by the
1480
                 vectorizer during the pattern recognition pass).  */
1481
              bool remove_stmt_p = false;
1482
              tree orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1483
              if (orig_stmt)
1484
                {
1485
                  stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
1486
                  if (orig_stmt_info
1487
                      && STMT_VINFO_IN_PATTERN_P (orig_stmt_info))
1488
                    remove_stmt_p = true;
1489
                }
1490
 
1491
              /* Free stmt_vec_info.  */
1492
              VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1493
              free (stmt_info);
1494
              set_stmt_info (ann, NULL);
1495
 
1496
              /* Remove dead "pattern stmts".  */
1497
              if (remove_stmt_p)
1498
                bsi_remove (&si, true);
1499
            }
1500
          bsi_next (&si);
1501
        }
1502
    }
1503
 
1504
  free (LOOP_VINFO_BBS (loop_vinfo));
1505
  free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
1506
  free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
1507
  VEC_free (tree, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
1508
 
1509
  free (loop_vinfo);
1510
}
1511
 
1512
 
1513
/* Function vect_force_dr_alignment_p.
1514
 
1515
   Returns whether the alignment of a DECL can be forced to be aligned
1516
   on ALIGNMENT bit boundary.  */
1517
 
1518
bool
1519
vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1520
{
1521
  if (TREE_CODE (decl) != VAR_DECL)
1522
    return false;
1523
 
1524
  if (DECL_EXTERNAL (decl))
1525
    return false;
1526
 
1527
  if (TREE_ASM_WRITTEN (decl))
1528
    return false;
1529
 
1530
  if (TREE_STATIC (decl))
1531
    return (alignment <= MAX_OFILE_ALIGNMENT);
1532
  else
1533
    /* This is not 100% correct.  The absolute correct stack alignment
1534
       is STACK_BOUNDARY.  We're supposed to hope, but not assume, that
1535
       PREFERRED_STACK_BOUNDARY is honored by all translation units.
1536
       However, until someone implements forced stack alignment, SSE
1537
       isn't really usable without this.  */
1538
    return (alignment <= PREFERRED_STACK_BOUNDARY);
1539
}
1540
 
1541
 
1542
/* Function get_vectype_for_scalar_type.
1543
 
1544
   Returns the vector type corresponding to SCALAR_TYPE as supported
1545
   by the target.  */
1546
 
1547
tree
1548
get_vectype_for_scalar_type (tree scalar_type)
1549
{
1550
  enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1551
  int nbytes = GET_MODE_SIZE (inner_mode);
1552
  int nunits;
1553
  tree vectype;
1554
 
1555
  if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD)
1556
    return NULL_TREE;
1557
 
1558
  /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1559
     is expected.  */
1560
  nunits = UNITS_PER_SIMD_WORD / nbytes;
1561
 
1562
  vectype = build_vector_type (scalar_type, nunits);
1563
  if (vect_print_dump_info (REPORT_DETAILS))
1564
    {
1565
      fprintf (vect_dump, "get vectype with %d units of type ", nunits);
1566
      print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1567
    }
1568
 
1569
  if (!vectype)
1570
    return NULL_TREE;
1571
 
1572
  if (vect_print_dump_info (REPORT_DETAILS))
1573
    {
1574
      fprintf (vect_dump, "vectype: ");
1575
      print_generic_expr (vect_dump, vectype, TDF_SLIM);
1576
    }
1577
 
1578
  if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1579
      && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
1580
    {
1581
      if (vect_print_dump_info (REPORT_DETAILS))
1582
        fprintf (vect_dump, "mode not supported by target.");
1583
      return NULL_TREE;
1584
    }
1585
 
1586
  return vectype;
1587
}
1588
 
1589
 
1590
/* Function vect_supportable_dr_alignment
1591
 
1592
   Return whether the data reference DR is supported with respect to its
1593
   alignment.  */
1594
 
1595
enum dr_alignment_support
1596
vect_supportable_dr_alignment (struct data_reference *dr)
1597
{
1598
  tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
1599
  enum machine_mode mode = (int) TYPE_MODE (vectype);
1600
 
1601
  if (aligned_access_p (dr))
1602
    return dr_aligned;
1603
 
1604
  /* Possibly unaligned access.  */
1605
 
1606
  if (DR_IS_READ (dr))
1607
    {
1608
      if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
1609
          && (!targetm.vectorize.builtin_mask_for_load
1610
              || targetm.vectorize.builtin_mask_for_load ()))
1611
        return dr_unaligned_software_pipeline;
1612
 
1613
      if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing)
1614
        /* Can't software pipeline the loads, but can at least do them.  */
1615
        return dr_unaligned_supported;
1616
    }
1617
 
1618
  /* Unsupported.  */
1619
  return dr_unaligned_unsupported;
1620
}
1621
 
1622
 
1623
/* Function vect_is_simple_use.
1624
 
1625
   Input:
1626
   LOOP - the loop that is being vectorized.
1627
   OPERAND - operand of a stmt in LOOP.
1628
   DEF - the defining stmt in case OPERAND is an SSA_NAME.
1629
 
1630
   Returns whether a stmt with OPERAND can be vectorized.
1631
   Supportable operands are constants, loop invariants, and operands that are
1632
   defined by the current iteration of the loop. Unsupportable operands are
1633
   those that are defined by a previous iteration of the loop (as is the case
1634
   in reduction/induction computations).  */
1635
 
1636
bool
1637
vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt,
1638
                    tree *def, enum vect_def_type *dt)
1639
{
1640
  basic_block bb;
1641
  stmt_vec_info stmt_vinfo;
1642
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1643
 
1644
  *def_stmt = NULL_TREE;
1645
  *def = NULL_TREE;
1646
 
1647
  if (vect_print_dump_info (REPORT_DETAILS))
1648
    {
1649
      fprintf (vect_dump, "vect_is_simple_use: operand ");
1650
      print_generic_expr (vect_dump, operand, TDF_SLIM);
1651
    }
1652
 
1653
  if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
1654
    {
1655
      *dt = vect_constant_def;
1656
      return true;
1657
    }
1658
 
1659
  if (TREE_CODE (operand) != SSA_NAME)
1660
    {
1661
      if (vect_print_dump_info (REPORT_DETAILS))
1662
        fprintf (vect_dump, "not ssa-name.");
1663
      return false;
1664
    }
1665
 
1666
  *def_stmt = SSA_NAME_DEF_STMT (operand);
1667
  if (*def_stmt == NULL_TREE )
1668
    {
1669
      if (vect_print_dump_info (REPORT_DETAILS))
1670
        fprintf (vect_dump, "no def_stmt.");
1671
      return false;
1672
    }
1673
 
1674
  if (vect_print_dump_info (REPORT_DETAILS))
1675
    {
1676
      fprintf (vect_dump, "def_stmt: ");
1677
      print_generic_expr (vect_dump, *def_stmt, TDF_SLIM);
1678
    }
1679
 
1680
  /* empty stmt is expected only in case of a function argument.
1681
     (Otherwise - we expect a phi_node or a modify_expr).  */
1682
  if (IS_EMPTY_STMT (*def_stmt))
1683
    {
1684
      tree arg = TREE_OPERAND (*def_stmt, 0);
1685
      if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
1686
        {
1687
          *def = operand;
1688
          *dt = vect_invariant_def;
1689
          return true;
1690
        }
1691
 
1692
      if (vect_print_dump_info (REPORT_DETAILS))
1693
        fprintf (vect_dump, "Unexpected empty stmt.");
1694
      return false;
1695
    }
1696
 
1697
  bb = bb_for_stmt (*def_stmt);
1698
  if (!flow_bb_inside_loop_p (loop, bb))
1699
    *dt = vect_invariant_def;
1700
  else
1701
    {
1702
      stmt_vinfo = vinfo_for_stmt (*def_stmt);
1703
      *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
1704
    }
1705
 
1706
  if (*dt == vect_unknown_def_type)
1707
    {
1708
      if (vect_print_dump_info (REPORT_DETAILS))
1709
        fprintf (vect_dump, "Unsupported pattern.");
1710
      return false;
1711
    }
1712
 
1713
  /* stmts inside the loop that have been identified as performing
1714
     a reduction operation cannot have uses in the loop.  */
1715
  if (*dt == vect_reduction_def && TREE_CODE (*def_stmt) != PHI_NODE)
1716
    {
1717
      if (vect_print_dump_info (REPORT_DETAILS))
1718
        fprintf (vect_dump, "reduction used in loop.");
1719
      return false;
1720
    }
1721
 
1722
  if (vect_print_dump_info (REPORT_DETAILS))
1723
    fprintf (vect_dump, "type of def: %d.",*dt);
1724
 
1725
  switch (TREE_CODE (*def_stmt))
1726
    {
1727
    case PHI_NODE:
1728
      *def = PHI_RESULT (*def_stmt);
1729
      gcc_assert (*dt == vect_induction_def || *dt == vect_reduction_def
1730
                  || *dt == vect_invariant_def);
1731
      break;
1732
 
1733
    case MODIFY_EXPR:
1734
      *def = TREE_OPERAND (*def_stmt, 0);
1735
      gcc_assert (*dt == vect_loop_def || *dt == vect_invariant_def);
1736
      break;
1737
 
1738
    default:
1739
      if (vect_print_dump_info (REPORT_DETAILS))
1740
        fprintf (vect_dump, "unsupported defining stmt: ");
1741
      return false;
1742
    }
1743
 
1744
  if (*dt == vect_induction_def)
1745
    {
1746
      if (vect_print_dump_info (REPORT_DETAILS))
1747
        fprintf (vect_dump, "induction not supported.");
1748
      return false;
1749
    }
1750
 
1751
  return true;
1752
}
1753
 
1754
 
1755
/* Function reduction_code_for_scalar_code
1756
 
1757
   Input:
1758
   CODE - tree_code of a reduction operations.
1759
 
1760
   Output:
1761
   REDUC_CODE - the corresponding tree-code to be used to reduce the
1762
      vector of partial results into a single scalar result (which
1763
      will also reside in a vector).
1764
 
1765
   Return TRUE if a corresponding REDUC_CODE was found, FALSE otherwise.  */
1766
 
1767
bool
1768
reduction_code_for_scalar_code (enum tree_code code,
1769
                                enum tree_code *reduc_code)
1770
{
1771
  switch (code)
1772
  {
1773
  case MAX_EXPR:
1774
    *reduc_code = REDUC_MAX_EXPR;
1775
    return true;
1776
 
1777
  case MIN_EXPR:
1778
    *reduc_code = REDUC_MIN_EXPR;
1779
    return true;
1780
 
1781
  case PLUS_EXPR:
1782
    *reduc_code = REDUC_PLUS_EXPR;
1783
    return true;
1784
 
1785
  default:
1786
    return false;
1787
  }
1788
}
1789
 
1790
 
1791
/* Function vect_is_simple_reduction
1792
 
1793
   Detect a cross-iteration def-use cucle that represents a simple
1794
   reduction computation. We look for the following pattern:
1795
 
1796
   loop_header:
1797
     a1 = phi < a0, a2 >
1798
     a3 = ...
1799
     a2 = operation (a3, a1)
1800
 
1801
   such that:
1802
   1. operation is commutative and associative and it is safe to
1803
      change the order of the computation.
1804
   2. no uses for a2 in the loop (a2 is used out of the loop)
1805
   3. no uses of a1 in the loop besides the reduction operation.
1806
 
1807
   Condition 1 is tested here.
1808
   Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.  */
1809
 
1810
tree
1811
vect_is_simple_reduction (struct loop *loop, tree phi)
1812
{
1813
  edge latch_e = loop_latch_edge (loop);
1814
  tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
1815
  tree def_stmt, def1, def2;
1816
  enum tree_code code;
1817
  int op_type;
1818
  tree operation, op1, op2;
1819
  tree type;
1820
 
1821
  if (TREE_CODE (loop_arg) != SSA_NAME)
1822
    {
1823
      if (vect_print_dump_info (REPORT_DETAILS))
1824
        {
1825
          fprintf (vect_dump, "reduction: not ssa_name: ");
1826
          print_generic_expr (vect_dump, loop_arg, TDF_SLIM);
1827
        }
1828
      return NULL_TREE;
1829
    }
1830
 
1831
  def_stmt = SSA_NAME_DEF_STMT (loop_arg);
1832
  if (!def_stmt)
1833
    {
1834
      if (vect_print_dump_info (REPORT_DETAILS))
1835
        fprintf (vect_dump, "reduction: no def_stmt.");
1836
      return NULL_TREE;
1837
    }
1838
 
1839
  if (TREE_CODE (def_stmt) != MODIFY_EXPR)
1840
    {
1841
      if (vect_print_dump_info (REPORT_DETAILS))
1842
        {
1843
          print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
1844
        }
1845
      return NULL_TREE;
1846
    }
1847
 
1848
  operation = TREE_OPERAND (def_stmt, 1);
1849
  code = TREE_CODE (operation);
1850
  if (!commutative_tree_code (code) || !associative_tree_code (code))
1851
    {
1852
      if (vect_print_dump_info (REPORT_DETAILS))
1853
        {
1854
          fprintf (vect_dump, "reduction: not commutative/associative: ");
1855
          print_generic_expr (vect_dump, operation, TDF_SLIM);
1856
        }
1857
      return NULL_TREE;
1858
    }
1859
 
1860
  op_type = TREE_CODE_LENGTH (code);
1861
  if (op_type != binary_op)
1862
    {
1863
      if (vect_print_dump_info (REPORT_DETAILS))
1864
        {
1865
          fprintf (vect_dump, "reduction: not binary operation: ");
1866
          print_generic_expr (vect_dump, operation, TDF_SLIM);
1867
        }
1868
      return NULL_TREE;
1869
    }
1870
 
1871
  op1 = TREE_OPERAND (operation, 0);
1872
  op2 = TREE_OPERAND (operation, 1);
1873
  if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME)
1874
    {
1875
      if (vect_print_dump_info (REPORT_DETAILS))
1876
        {
1877
          fprintf (vect_dump, "reduction: uses not ssa_names: ");
1878
          print_generic_expr (vect_dump, operation, TDF_SLIM);
1879
        }
1880
      return NULL_TREE;
1881
    }
1882
 
1883
  /* Check that it's ok to change the order of the computation.  */
1884
  type = TREE_TYPE (operation);
1885
  if (TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op1))
1886
      || TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op2)))
1887
    {
1888
      if (vect_print_dump_info (REPORT_DETAILS))
1889
        {
1890
          fprintf (vect_dump, "reduction: multiple types: operation type: ");
1891
          print_generic_expr (vect_dump, type, TDF_SLIM);
1892
          fprintf (vect_dump, ", operands types: ");
1893
          print_generic_expr (vect_dump, TREE_TYPE (op1), TDF_SLIM);
1894
          fprintf (vect_dump, ",");
1895
          print_generic_expr (vect_dump, TREE_TYPE (op2), TDF_SLIM);
1896
        }
1897
      return NULL_TREE;
1898
    }
1899
 
1900
  /* CHECKME: check for !flag_finite_math_only too?  */
1901
  if (SCALAR_FLOAT_TYPE_P (type) && !flag_unsafe_math_optimizations)
1902
    {
1903
      /* Changing the order of operations changes the semantics.  */
1904
      if (vect_print_dump_info (REPORT_DETAILS))
1905
        {
1906
          fprintf (vect_dump, "reduction: unsafe fp math optimization: ");
1907
          print_generic_expr (vect_dump, operation, TDF_SLIM);
1908
        }
1909
      return NULL_TREE;
1910
    }
1911
  else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
1912
    {
1913
      /* Changing the order of operations changes the semantics.  */
1914
      if (vect_print_dump_info (REPORT_DETAILS))
1915
        {
1916
          fprintf (vect_dump, "reduction: unsafe int math optimization: ");
1917
          print_generic_expr (vect_dump, operation, TDF_SLIM);
1918
        }
1919
      return NULL_TREE;
1920
    }
1921
 
1922
  /* reduction is safe. we're dealing with one of the following:
1923
     1) integer arithmetic and no trapv
1924
     2) floating point arithmetic, and special flags permit this optimization.
1925
   */
1926
  def1 = SSA_NAME_DEF_STMT (op1);
1927
  def2 = SSA_NAME_DEF_STMT (op2);
1928
  if (!def1 || !def2)
1929
    {
1930
      if (vect_print_dump_info (REPORT_DETAILS))
1931
        {
1932
          fprintf (vect_dump, "reduction: no defs for operands: ");
1933
          print_generic_expr (vect_dump, operation, TDF_SLIM);
1934
        }
1935
      return NULL_TREE;
1936
    }
1937
 
1938
  if (TREE_CODE (def1) == MODIFY_EXPR
1939
      && flow_bb_inside_loop_p (loop, bb_for_stmt (def1))
1940
      && def2 == phi)
1941
    {
1942
      if (vect_print_dump_info (REPORT_DETAILS))
1943
        {
1944
          fprintf (vect_dump, "detected reduction:");
1945
          print_generic_expr (vect_dump, operation, TDF_SLIM);
1946
        }
1947
      return def_stmt;
1948
    }
1949
  else if (TREE_CODE (def2) == MODIFY_EXPR
1950
      && flow_bb_inside_loop_p (loop, bb_for_stmt (def2))
1951
      && def1 == phi)
1952
    {
1953
      /* Swap operands (just for simplicity - so that the rest of the code
1954
         can assume that the reduction variable is always the last (second)
1955
         argument).  */
1956
      if (vect_print_dump_info (REPORT_DETAILS))
1957
        {
1958
          fprintf (vect_dump, "detected reduction: need to swap operands:");
1959
          print_generic_expr (vect_dump, operation, TDF_SLIM);
1960
        }
1961
      swap_tree_operands (def_stmt, &TREE_OPERAND (operation, 0),
1962
                                    &TREE_OPERAND (operation, 1));
1963
      return def_stmt;
1964
    }
1965
  else
1966
    {
1967
      if (vect_print_dump_info (REPORT_DETAILS))
1968
        {
1969
          fprintf (vect_dump, "reduction: unknown pattern.");
1970
          print_generic_expr (vect_dump, operation, TDF_SLIM);
1971
        }
1972
      return NULL_TREE;
1973
    }
1974
}
1975
 
1976
 
1977
/* Function vect_is_simple_iv_evolution.
1978
 
1979
   FORNOW: A simple evolution of an induction variables in the loop is
1980
   considered a polynomial evolution with constant step.  */
1981
 
1982
bool
1983
vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
1984
                             tree * step)
1985
{
1986
  tree init_expr;
1987
  tree step_expr;
1988
 
1989
  tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
1990
 
1991
  /* When there is no evolution in this loop, the evolution function
1992
     is not "simple".  */
1993
  if (evolution_part == NULL_TREE)
1994
    return false;
1995
 
1996
  /* When the evolution is a polynomial of degree >= 2
1997
     the evolution function is not "simple".  */
1998
  if (tree_is_chrec (evolution_part))
1999
    return false;
2000
 
2001
  step_expr = evolution_part;
2002
  init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
2003
                                                           loop_nb));
2004
 
2005
  if (vect_print_dump_info (REPORT_DETAILS))
2006
    {
2007
      fprintf (vect_dump, "step: ");
2008
      print_generic_expr (vect_dump, step_expr, TDF_SLIM);
2009
      fprintf (vect_dump, ",  init: ");
2010
      print_generic_expr (vect_dump, init_expr, TDF_SLIM);
2011
    }
2012
 
2013
  *init = init_expr;
2014
  *step = step_expr;
2015
 
2016
  if (TREE_CODE (step_expr) != INTEGER_CST)
2017
    {
2018
      if (vect_print_dump_info (REPORT_DETAILS))
2019
        fprintf (vect_dump, "step unknown.");
2020
      return false;
2021
    }
2022
 
2023
  return true;
2024
}
2025
 
2026
 
2027
/* Function vectorize_loops.
2028
 
2029
   Entry Point to loop vectorization phase.  */
2030
 
2031
void
2032
vectorize_loops (struct loops *loops)
2033
{
2034
  unsigned int i;
2035
  unsigned int num_vectorized_loops = 0;
2036
 
2037
  /* Fix the verbosity level if not defined explicitly by the user.  */
2038
  vect_set_dump_settings ();
2039
 
2040
  /* Allocate the bitmap that records which virtual variables that
2041
     need to be renamed.  */
2042
  vect_vnames_to_rename = BITMAP_ALLOC (NULL);
2043
 
2044
  /*  ----------- Analyze loops. -----------  */
2045
 
2046
  /* If some loop was duplicated, it gets bigger number
2047
     than all previously defined loops. This fact allows us to run
2048
     only over initial loops skipping newly generated ones.  */
2049
  vect_loops_num = loops->num;
2050
  for (i = 1; i < vect_loops_num; i++)
2051
    {
2052
      loop_vec_info loop_vinfo;
2053
      struct loop *loop = loops->parray[i];
2054
 
2055
      if (!loop)
2056
        continue;
2057
 
2058
      vect_loop_location = find_loop_location (loop);
2059
      loop_vinfo = vect_analyze_loop (loop);
2060
      loop->aux = loop_vinfo;
2061
 
2062
      if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
2063
        continue;
2064
 
2065
      vect_transform_loop (loop_vinfo, loops);
2066
      num_vectorized_loops++;
2067
    }
2068
  vect_loop_location = UNKNOWN_LOC;
2069
 
2070
  if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
2071
    fprintf (vect_dump, "vectorized %u loops in function.\n",
2072
             num_vectorized_loops);
2073
 
2074
  /*  ----------- Finalize. -----------  */
2075
 
2076
  BITMAP_FREE (vect_vnames_to_rename);
2077
 
2078
  for (i = 1; i < vect_loops_num; i++)
2079
    {
2080
      struct loop *loop = loops->parray[i];
2081
      loop_vec_info loop_vinfo;
2082
 
2083
      if (!loop)
2084
        continue;
2085
      loop_vinfo = loop->aux;
2086
      destroy_loop_vec_info (loop_vinfo);
2087
      loop->aux = NULL;
2088
    }
2089
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.