OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-stable/] [gcc-4.5.1/] [gcc/] [tree-vect-loop-manip.c] - Blame information for rev 826

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 280 jeremybenn
/* Vectorizer Specific Loop Manipulations
2
   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3
   Free Software Foundation, Inc.
4
   Contributed by Dorit Naishlos <dorit@il.ibm.com>
5
   and Ira Rosen <irar@il.ibm.com>
6
 
7
This file is part of GCC.
8
 
9
GCC is free software; you can redistribute it and/or modify it under
10
the terms of the GNU General Public License as published by the Free
11
Software Foundation; either version 3, or (at your option) any later
12
version.
13
 
14
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15
WARRANTY; without even the implied warranty of MERCHANTABILITY or
16
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17
for more details.
18
 
19
You should have received a copy of the GNU General Public License
20
along with GCC; see the file COPYING3.  If not see
21
<http://www.gnu.org/licenses/>.  */
22
 
23
#include "config.h"
24
#include "system.h"
25
#include "coretypes.h"
26
#include "tm.h"
27
#include "ggc.h"
28
#include "tree.h"
29
#include "basic-block.h"
30
#include "diagnostic.h"
31
#include "tree-flow.h"
32
#include "tree-dump.h"
33
#include "cfgloop.h"
34
#include "cfglayout.h"
35
#include "expr.h"
36
#include "toplev.h"
37
#include "tree-scalar-evolution.h"
38
#include "tree-vectorizer.h"
39
#include "langhooks.h"
40
 
41
/*************************************************************************
42
  Simple Loop Peeling Utilities
43
 
44
  Utilities to support loop peeling for vectorization purposes.
45
 *************************************************************************/
46
 
47
 
48
/* Renames the use *OP_P.  */
49
 
50
static void
51
rename_use_op (use_operand_p op_p)
52
{
53
  tree new_name;
54
 
55
  if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
56
    return;
57
 
58
  new_name = get_current_def (USE_FROM_PTR (op_p));
59
 
60
  /* Something defined outside of the loop.  */
61
  if (!new_name)
62
    return;
63
 
64
  /* An ordinary ssa name defined in the loop.  */
65
 
66
  SET_USE (op_p, new_name);
67
}
68
 
69
 
70
/* Renames the variables in basic block BB.  */
71
 
72
void
73
rename_variables_in_bb (basic_block bb)
74
{
75
  gimple_stmt_iterator gsi;
76
  gimple stmt;
77
  use_operand_p use_p;
78
  ssa_op_iter iter;
79
  edge e;
80
  edge_iterator ei;
81
  struct loop *loop = bb->loop_father;
82
 
83
  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
84
    {
85
      stmt = gsi_stmt (gsi);
86
      FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
87
        rename_use_op (use_p);
88
    }
89
 
90
  FOR_EACH_EDGE (e, ei, bb->succs)
91
    {
92
      if (!flow_bb_inside_loop_p (loop, e->dest))
93
        continue;
94
      for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
95
        rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi_stmt (gsi), e));
96
    }
97
}
98
 
99
 
100
/* Renames variables in new generated LOOP.  */
101
 
102
void
103
rename_variables_in_loop (struct loop *loop)
104
{
105
  unsigned i;
106
  basic_block *bbs;
107
 
108
  bbs = get_loop_body (loop);
109
 
110
  for (i = 0; i < loop->num_nodes; i++)
111
    rename_variables_in_bb (bbs[i]);
112
 
113
  free (bbs);
114
}
115
 
116
typedef struct
117
{
118
  tree from, to;
119
  basic_block bb;
120
} adjust_info;
121
 
122
DEF_VEC_O(adjust_info);
123
DEF_VEC_ALLOC_O_STACK(adjust_info);
124
#define VEC_adjust_info_stack_alloc(alloc) VEC_stack_alloc (adjust_info, alloc)
125
 
126
/* A stack of values to be adjusted in debug stmts.  We have to
127
   process them LIFO, so that the closest substitution applies.  If we
128
   processed them FIFO, without the stack, we might substitute uses
129
   with a PHI DEF that would soon become non-dominant, and when we got
130
   to the suitable one, it wouldn't have anything to substitute any
131
   more.  */
132
static VEC(adjust_info, stack) *adjust_vec;
133
 
134
/* Adjust any debug stmts that referenced AI->from values to use the
135
   loop-closed AI->to, if the references are dominated by AI->bb and
136
   not by the definition of AI->from.  */
137
 
138
static void
139
adjust_debug_stmts_now (adjust_info *ai)
140
{
141
  basic_block bbphi = ai->bb;
142
  tree orig_def = ai->from;
143
  tree new_def = ai->to;
144
  imm_use_iterator imm_iter;
145
  gimple stmt;
146
  basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
147
 
148
  gcc_assert (dom_info_available_p (CDI_DOMINATORS));
149
 
150
  /* Adjust any debug stmts that held onto non-loop-closed
151
     references.  */
152
  FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
153
    {
154
      use_operand_p use_p;
155
      basic_block bbuse;
156
 
157
      if (!is_gimple_debug (stmt))
158
        continue;
159
 
160
      gcc_assert (gimple_debug_bind_p (stmt));
161
 
162
      bbuse = gimple_bb (stmt);
163
 
164
      if ((bbuse == bbphi
165
           || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
166
          && !(bbuse == bbdef
167
               || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
168
        {
169
          if (new_def)
170
            FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
171
              SET_USE (use_p, new_def);
172
          else
173
            {
174
              gimple_debug_bind_reset_value (stmt);
175
              update_stmt (stmt);
176
            }
177
        }
178
    }
179
}
180
 
181
/* Adjust debug stmts as scheduled before.  */
182
 
183
static void
184
adjust_vec_debug_stmts (void)
185
{
186
  if (!MAY_HAVE_DEBUG_STMTS)
187
    return;
188
 
189
  gcc_assert (adjust_vec);
190
 
191
  while (!VEC_empty (adjust_info, adjust_vec))
192
    {
193
      adjust_debug_stmts_now (VEC_last (adjust_info, adjust_vec));
194
      VEC_pop (adjust_info, adjust_vec);
195
    }
196
 
197
  VEC_free (adjust_info, stack, adjust_vec);
198
}
199
 
200
/* Adjust any debug stmts that referenced FROM values to use the
201
   loop-closed TO, if the references are dominated by BB and not by
202
   the definition of FROM.  If adjust_vec is non-NULL, adjustments
203
   will be postponed until adjust_vec_debug_stmts is called.  */
204
 
205
static void
206
adjust_debug_stmts (tree from, tree to, basic_block bb)
207
{
208
  adjust_info ai;
209
 
210
  if (MAY_HAVE_DEBUG_STMTS && TREE_CODE (from) == SSA_NAME
211
      && SSA_NAME_VAR (from) != gimple_vop (cfun))
212
    {
213
      ai.from = from;
214
      ai.to = to;
215
      ai.bb = bb;
216
 
217
      if (adjust_vec)
218
        VEC_safe_push (adjust_info, stack, adjust_vec, &ai);
219
      else
220
        adjust_debug_stmts_now (&ai);
221
    }
222
}
223
 
224
/* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
225
   to adjust any debug stmts that referenced the old phi arg,
226
   presumably non-loop-closed references left over from other
227
   transformations.  */
228
 
229
static void
230
adjust_phi_and_debug_stmts (gimple update_phi, edge e, tree new_def)
231
{
232
  tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
233
 
234
  SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
235
 
236
  if (MAY_HAVE_DEBUG_STMTS)
237
    adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
238
                        gimple_bb (update_phi));
239
}
240
 
241
 
242
/* Update the PHI nodes of NEW_LOOP.
243
 
244
   NEW_LOOP is a duplicate of ORIG_LOOP.
245
   AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
246
   AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
247
   executes before it.  */
248
 
249
static void
250
slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
251
                                       struct loop *new_loop, bool after)
252
{
253
  tree new_ssa_name;
254
  gimple phi_new, phi_orig;
255
  tree def;
256
  edge orig_loop_latch = loop_latch_edge (orig_loop);
257
  edge orig_entry_e = loop_preheader_edge (orig_loop);
258
  edge new_loop_exit_e = single_exit (new_loop);
259
  edge new_loop_entry_e = loop_preheader_edge (new_loop);
260
  edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
261
  gimple_stmt_iterator gsi_new, gsi_orig;
262
 
263
  /*
264
     step 1. For each loop-header-phi:
265
             Add the first phi argument for the phi in NEW_LOOP
266
            (the one associated with the entry of NEW_LOOP)
267
 
268
     step 2. For each loop-header-phi:
269
             Add the second phi argument for the phi in NEW_LOOP
270
            (the one associated with the latch of NEW_LOOP)
271
 
272
     step 3. Update the phis in the successor block of NEW_LOOP.
273
 
274
        case 1: NEW_LOOP was placed before ORIG_LOOP:
275
                The successor block of NEW_LOOP is the header of ORIG_LOOP.
276
                Updating the phis in the successor block can therefore be done
277
                along with the scanning of the loop header phis, because the
278
                header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
279
                phi nodes, organized in the same order.
280
 
281
        case 2: NEW_LOOP was placed after ORIG_LOOP:
282
                The successor block of NEW_LOOP is the original exit block of
283
                ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
284
                We postpone updating these phis to a later stage (when
285
                loop guards are added).
286
   */
287
 
288
 
289
  /* Scan the phis in the headers of the old and new loops
290
     (they are organized in exactly the same order).  */
291
 
292
  for (gsi_new = gsi_start_phis (new_loop->header),
293
       gsi_orig = gsi_start_phis (orig_loop->header);
294
       !gsi_end_p (gsi_new) && !gsi_end_p (gsi_orig);
295
       gsi_next (&gsi_new), gsi_next (&gsi_orig))
296
    {
297
      source_location locus;
298
      phi_new = gsi_stmt (gsi_new);
299
      phi_orig = gsi_stmt (gsi_orig);
300
 
301
      /* step 1.  */
302
      def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
303
      locus = gimple_phi_arg_location_from_edge (phi_orig, entry_arg_e);
304
      add_phi_arg (phi_new, def, new_loop_entry_e, locus);
305
 
306
      /* step 2.  */
307
      def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
308
      locus = gimple_phi_arg_location_from_edge (phi_orig, orig_loop_latch);
309
      if (TREE_CODE (def) != SSA_NAME)
310
        continue;
311
 
312
      new_ssa_name = get_current_def (def);
313
      if (!new_ssa_name)
314
        {
315
          /* This only happens if there are no definitions
316
             inside the loop. use the phi_result in this case.  */
317
          new_ssa_name = PHI_RESULT (phi_new);
318
        }
319
 
320
      /* An ordinary ssa name defined in the loop.  */
321
      add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop), locus);
322
 
323
      /* Drop any debug references outside the loop, if they would
324
         become ill-formed SSA.  */
325
      adjust_debug_stmts (def, NULL, single_exit (orig_loop)->dest);
326
 
327
      /* step 3 (case 1).  */
328
      if (!after)
329
        {
330
          gcc_assert (new_loop_exit_e == orig_entry_e);
331
          adjust_phi_and_debug_stmts (phi_orig, new_loop_exit_e, new_ssa_name);
332
        }
333
    }
334
}
335
 
336
 
337
/* Update PHI nodes for a guard of the LOOP.
338
 
339
   Input:
340
   - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
341
        controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
342
        originates from the guard-bb, skips LOOP and reaches the (unique) exit
343
        bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
344
        We denote this bb NEW_MERGE_BB because before the guard code was added
345
        it had a single predecessor (the LOOP header), and now it became a merge
346
        point of two paths - the path that ends with the LOOP exit-edge, and
347
        the path that ends with GUARD_EDGE.
348
   - NEW_EXIT_BB: New basic block that is added by this function between LOOP
349
        and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
350
 
351
   ===> The CFG before the guard-code was added:
352
        LOOP_header_bb:
353
          loop_body
354
          if (exit_loop) goto update_bb
355
          else           goto LOOP_header_bb
356
        update_bb:
357
 
358
   ==> The CFG after the guard-code was added:
359
        guard_bb:
360
          if (LOOP_guard_condition) goto new_merge_bb
361
          else                      goto LOOP_header_bb
362
        LOOP_header_bb:
363
          loop_body
364
          if (exit_loop_condition) goto new_merge_bb
365
          else                     goto LOOP_header_bb
366
        new_merge_bb:
367
          goto update_bb
368
        update_bb:
369
 
370
   ==> The CFG after this function:
371
        guard_bb:
372
          if (LOOP_guard_condition) goto new_merge_bb
373
          else                      goto LOOP_header_bb
374
        LOOP_header_bb:
375
          loop_body
376
          if (exit_loop_condition) goto new_exit_bb
377
          else                     goto LOOP_header_bb
378
        new_exit_bb:
379
        new_merge_bb:
380
          goto update_bb
381
        update_bb:
382
 
383
   This function:
384
   1. creates and updates the relevant phi nodes to account for the new
385
      incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
386
      1.1. Create phi nodes at NEW_MERGE_BB.
387
      1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
388
           UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
389
   2. preserves loop-closed-ssa-form by creating the required phi nodes
390
      at the exit of LOOP (i.e, in NEW_EXIT_BB).
391
 
392
   There are two flavors to this function:
393
 
394
   slpeel_update_phi_nodes_for_guard1:
395
     Here the guard controls whether we enter or skip LOOP, where LOOP is a
396
     prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
397
     for variables that have phis in the loop header.
398
 
399
   slpeel_update_phi_nodes_for_guard2:
400
     Here the guard controls whether we enter or skip LOOP, where LOOP is an
401
     epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
402
     for variables that have phis in the loop exit.
403
 
404
   I.E., the overall structure is:
405
 
406
        loop1_preheader_bb:
407
                guard1 (goto loop1/merge1_bb)
408
        loop1
409
        loop1_exit_bb:
410
                guard2 (goto merge1_bb/merge2_bb)
411
        merge1_bb
412
        loop2
413
        loop2_exit_bb
414
        merge2_bb
415
        next_bb
416
 
417
   slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
418
   loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
419
   that have phis in loop1->header).
420
 
421
   slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
422
   loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
423
   that have phis in next_bb). It also adds some of these phis to
424
   loop1_exit_bb.
425
 
426
   slpeel_update_phi_nodes_for_guard1 is always called before
427
   slpeel_update_phi_nodes_for_guard2. They are both needed in order
428
   to create correct data-flow and loop-closed-ssa-form.
429
 
430
   Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
431
   that change between iterations of a loop (and therefore have a phi-node
432
   at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
433
   phis for variables that are used out of the loop (and therefore have
434
   loop-closed exit phis). Some variables may be both updated between
435
   iterations and used after the loop. This is why in loop1_exit_bb we
436
   may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
437
   and exit phis (created by slpeel_update_phi_nodes_for_guard2).
438
 
439
   - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
440
     an original loop. i.e., we have:
441
 
442
           orig_loop
443
           guard_bb (goto LOOP/new_merge)
444
           new_loop <-- LOOP
445
           new_exit
446
           new_merge
447
           next_bb
448
 
449
     If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
450
     have:
451
 
452
           new_loop
453
           guard_bb (goto LOOP/new_merge)
454
           orig_loop <-- LOOP
455
           new_exit
456
           new_merge
457
           next_bb
458
 
459
     The SSA names defined in the original loop have a current
460
     reaching definition that that records the corresponding new
461
     ssa-name used in the new duplicated loop copy.
462
  */
463
 
464
/* Function slpeel_update_phi_nodes_for_guard1
465
 
466
   Input:
467
   - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
468
   - DEFS - a bitmap of ssa names to mark new names for which we recorded
469
            information.
470
 
471
   In the context of the overall structure, we have:
472
 
473
        loop1_preheader_bb:
474
                guard1 (goto loop1/merge1_bb)
475
LOOP->  loop1
476
        loop1_exit_bb:
477
                guard2 (goto merge1_bb/merge2_bb)
478
        merge1_bb
479
        loop2
480
        loop2_exit_bb
481
        merge2_bb
482
        next_bb
483
 
484
   For each name updated between loop iterations (i.e - for each name that has
485
   an entry (loop-header) phi in LOOP) we create a new phi in:
486
   1. merge1_bb (to account for the edge from guard1)
487
   2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
488
*/
489
 
490
static void
491
slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
492
                                    bool is_new_loop, basic_block *new_exit_bb,
493
                                    bitmap *defs)
494
{
495
  gimple orig_phi, new_phi;
496
  gimple update_phi, update_phi2;
497
  tree guard_arg, loop_arg;
498
  basic_block new_merge_bb = guard_edge->dest;
499
  edge e = EDGE_SUCC (new_merge_bb, 0);
500
  basic_block update_bb = e->dest;
501
  basic_block orig_bb = loop->header;
502
  edge new_exit_e;
503
  tree current_new_name;
504
  gimple_stmt_iterator gsi_orig, gsi_update;
505
 
506
  /* Create new bb between loop and new_merge_bb.  */
507
  *new_exit_bb = split_edge (single_exit (loop));
508
 
509
  new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
510
 
511
  for (gsi_orig = gsi_start_phis (orig_bb),
512
       gsi_update = gsi_start_phis (update_bb);
513
       !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
514
       gsi_next (&gsi_orig), gsi_next (&gsi_update))
515
    {
516
      source_location loop_locus, guard_locus;;
517
      orig_phi = gsi_stmt (gsi_orig);
518
      update_phi = gsi_stmt (gsi_update);
519
 
520
      /** 1. Handle new-merge-point phis  **/
521
 
522
      /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
523
      new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
524
                                 new_merge_bb);
525
 
526
      /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
527
            of LOOP. Set the two phi args in NEW_PHI for these edges:  */
528
      loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
529
      loop_locus = gimple_phi_arg_location_from_edge (orig_phi,
530
                                                      EDGE_SUCC (loop->latch,
531
                                                                 0));
532
      guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
533
      guard_locus
534
        = gimple_phi_arg_location_from_edge (orig_phi,
535
                                             loop_preheader_edge (loop));
536
 
537
      add_phi_arg (new_phi, loop_arg, new_exit_e, loop_locus);
538
      add_phi_arg (new_phi, guard_arg, guard_edge, guard_locus);
539
 
540
      /* 1.3. Update phi in successor block.  */
541
      gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
542
                  || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
543
      adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
544
      update_phi2 = new_phi;
545
 
546
 
547
      /** 2. Handle loop-closed-ssa-form phis  **/
548
 
549
      if (!is_gimple_reg (PHI_RESULT (orig_phi)))
550
        continue;
551
 
552
      /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
553
      new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
554
                                 *new_exit_bb);
555
 
556
      /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
557
      add_phi_arg (new_phi, loop_arg, single_exit (loop), loop_locus);
558
 
559
      /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
560
      gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
561
      adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
562
                                  PHI_RESULT (new_phi));
563
 
564
      /* 2.4. Record the newly created name with set_current_def.
565
         We want to find a name such that
566
                name = get_current_def (orig_loop_name)
567
         and to set its current definition as follows:
568
                set_current_def (name, new_phi_name)
569
 
570
         If LOOP is a new loop then loop_arg is already the name we're
571
         looking for. If LOOP is the original loop, then loop_arg is
572
         the orig_loop_name and the relevant name is recorded in its
573
         current reaching definition.  */
574
      if (is_new_loop)
575
        current_new_name = loop_arg;
576
      else
577
        {
578
          current_new_name = get_current_def (loop_arg);
579
          /* current_def is not available only if the variable does not
580
             change inside the loop, in which case we also don't care
581
             about recording a current_def for it because we won't be
582
             trying to create loop-exit-phis for it.  */
583
          if (!current_new_name)
584
            continue;
585
        }
586
      gcc_assert (get_current_def (current_new_name) == NULL_TREE);
587
 
588
      set_current_def (current_new_name, PHI_RESULT (new_phi));
589
      bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name));
590
    }
591
}
592
 
593
 
594
/* Function slpeel_update_phi_nodes_for_guard2
595
 
596
   Input:
597
   - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
598
 
599
   In the context of the overall structure, we have:
600
 
601
        loop1_preheader_bb:
602
                guard1 (goto loop1/merge1_bb)
603
        loop1
604
        loop1_exit_bb:
605
                guard2 (goto merge1_bb/merge2_bb)
606
        merge1_bb
607
LOOP->  loop2
608
        loop2_exit_bb
609
        merge2_bb
610
        next_bb
611
 
612
   For each name used out side the loop (i.e - for each name that has an exit
613
   phi in next_bb) we create a new phi in:
614
   1. merge2_bb (to account for the edge from guard_bb)
615
   2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
616
   3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
617
      if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
618
*/
619
 
620
static void
621
slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
622
                                    bool is_new_loop, basic_block *new_exit_bb)
623
{
624
  gimple orig_phi, new_phi;
625
  gimple update_phi, update_phi2;
626
  tree guard_arg, loop_arg;
627
  basic_block new_merge_bb = guard_edge->dest;
628
  edge e = EDGE_SUCC (new_merge_bb, 0);
629
  basic_block update_bb = e->dest;
630
  edge new_exit_e;
631
  tree orig_def, orig_def_new_name;
632
  tree new_name, new_name2;
633
  tree arg;
634
  gimple_stmt_iterator gsi;
635
 
636
  /* Create new bb between loop and new_merge_bb.  */
637
  *new_exit_bb = split_edge (single_exit (loop));
638
 
639
  new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
640
 
641
  for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi))
642
    {
643
      update_phi = gsi_stmt (gsi);
644
      orig_phi = update_phi;
645
      orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
646
      /* This loop-closed-phi actually doesn't represent a use
647
         out of the loop - the phi arg is a constant.  */
648
      if (TREE_CODE (orig_def) != SSA_NAME)
649
        continue;
650
      orig_def_new_name = get_current_def (orig_def);
651
      arg = NULL_TREE;
652
 
653
      /** 1. Handle new-merge-point phis  **/
654
 
655
      /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
656
      new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
657
                                 new_merge_bb);
658
 
659
      /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
660
            of LOOP. Set the two PHI args in NEW_PHI for these edges:  */
661
      new_name = orig_def;
662
      new_name2 = NULL_TREE;
663
      if (orig_def_new_name)
664
        {
665
          new_name = orig_def_new_name;
666
          /* Some variables have both loop-entry-phis and loop-exit-phis.
667
             Such variables were given yet newer names by phis placed in
668
             guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
669
             new_name2 = get_current_def (get_current_def (orig_name)).  */
670
          new_name2 = get_current_def (new_name);
671
        }
672
 
673
      if (is_new_loop)
674
        {
675
          guard_arg = orig_def;
676
          loop_arg = new_name;
677
        }
678
      else
679
        {
680
          guard_arg = new_name;
681
          loop_arg = orig_def;
682
        }
683
      if (new_name2)
684
        guard_arg = new_name2;
685
 
686
      add_phi_arg (new_phi, loop_arg, new_exit_e, UNKNOWN_LOCATION);
687
      add_phi_arg (new_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
688
 
689
      /* 1.3. Update phi in successor block.  */
690
      gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
691
      adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
692
      update_phi2 = new_phi;
693
 
694
 
695
      /** 2. Handle loop-closed-ssa-form phis  **/
696
 
697
      /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
698
      new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
699
                                 *new_exit_bb);
700
 
701
      /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
702
      add_phi_arg (new_phi, loop_arg, single_exit (loop), UNKNOWN_LOCATION);
703
 
704
      /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
705
      gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
706
      adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
707
                                  PHI_RESULT (new_phi));
708
 
709
 
710
      /** 3. Handle loop-closed-ssa-form phis for first loop  **/
711
 
712
      /* 3.1. Find the relevant names that need an exit-phi in
713
         GUARD_BB, i.e. names for which
714
         slpeel_update_phi_nodes_for_guard1 had not already created a
715
         phi node. This is the case for names that are used outside
716
         the loop (and therefore need an exit phi) but are not updated
717
         across loop iterations (and therefore don't have a
718
         loop-header-phi).
719
 
720
         slpeel_update_phi_nodes_for_guard1 is responsible for
721
         creating loop-exit phis in GUARD_BB for names that have a
722
         loop-header-phi.  When such a phi is created we also record
723
         the new name in its current definition.  If this new name
724
         exists, then guard_arg was set to this new name (see 1.2
725
         above).  Therefore, if guard_arg is not this new name, this
726
         is an indication that an exit-phi in GUARD_BB was not yet
727
         created, so we take care of it here.  */
728
      if (guard_arg == new_name2)
729
        continue;
730
      arg = guard_arg;
731
 
732
      /* 3.2. Generate new phi node in GUARD_BB:  */
733
      new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
734
                                 guard_edge->src);
735
 
736
      /* 3.3. GUARD_BB has one incoming edge:  */
737
      gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
738
      add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0),
739
                   UNKNOWN_LOCATION);
740
 
741
      /* 3.4. Update phi in successor of GUARD_BB:  */
742
      gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
743
                                                                == guard_arg);
744
      adjust_phi_and_debug_stmts (update_phi2, guard_edge,
745
                                  PHI_RESULT (new_phi));
746
    }
747
}
748
 
749
 
750
/* Make the LOOP iterate NITERS times. This is done by adding a new IV
751
   that starts at zero, increases by one and its limit is NITERS.
752
 
753
   Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
754
 
755
void
756
slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
757
{
758
  tree indx_before_incr, indx_after_incr;
759
  gimple cond_stmt;
760
  gimple orig_cond;
761
  edge exit_edge = single_exit (loop);
762
  gimple_stmt_iterator loop_cond_gsi;
763
  gimple_stmt_iterator incr_gsi;
764
  bool insert_after;
765
  tree init = build_int_cst (TREE_TYPE (niters), 0);
766
  tree step = build_int_cst (TREE_TYPE (niters), 1);
767
  LOC loop_loc;
768
  enum tree_code code;
769
 
770
  orig_cond = get_loop_exit_condition (loop);
771
  gcc_assert (orig_cond);
772
  loop_cond_gsi = gsi_for_stmt (orig_cond);
773
 
774
  standard_iv_increment_position (loop, &incr_gsi, &insert_after);
775
  create_iv (init, step, NULL_TREE, loop,
776
             &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
777
 
778
  indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
779
                                              true, NULL_TREE, true,
780
                                              GSI_SAME_STMT);
781
  niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
782
                                     true, GSI_SAME_STMT);
783
 
784
  code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
785
  cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
786
                                 NULL_TREE);
787
 
788
  gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
789
 
790
  /* Remove old loop exit test:  */
791
  gsi_remove (&loop_cond_gsi, true);
792
 
793
  loop_loc = find_loop_location (loop);
794
  if (dump_file && (dump_flags & TDF_DETAILS))
795
    {
796
      if (loop_loc != UNKNOWN_LOC)
797
        fprintf (dump_file, "\nloop at %s:%d: ",
798
                 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
799
      print_gimple_stmt (dump_file, cond_stmt, 0, TDF_SLIM);
800
    }
801
 
802
  loop->nb_iterations = niters;
803
}
804
 
805
 
806
/* Given LOOP this function generates a new copy of it and puts it
807
   on E which is either the entry or exit of LOOP.  */
808
 
809
struct loop *
810
slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
811
{
812
  struct loop *new_loop;
813
  basic_block *new_bbs, *bbs;
814
  bool at_exit;
815
  bool was_imm_dom;
816
  basic_block exit_dest;
817
  gimple phi;
818
  tree phi_arg;
819
  edge exit, new_exit;
820
  gimple_stmt_iterator gsi;
821
 
822
  at_exit = (e == single_exit (loop));
823
  if (!at_exit && e != loop_preheader_edge (loop))
824
    return NULL;
825
 
826
  bbs = get_loop_body (loop);
827
 
828
  /* Check whether duplication is possible.  */
829
  if (!can_copy_bbs_p (bbs, loop->num_nodes))
830
    {
831
      free (bbs);
832
      return NULL;
833
    }
834
 
835
  /* Generate new loop structure.  */
836
  new_loop = duplicate_loop (loop, loop_outer (loop));
837
  if (!new_loop)
838
    {
839
      free (bbs);
840
      return NULL;
841
    }
842
 
843
  exit_dest = single_exit (loop)->dest;
844
  was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
845
                                          exit_dest) == loop->header ?
846
                 true : false);
847
 
848
  new_bbs = XNEWVEC (basic_block, loop->num_nodes);
849
 
850
  exit = single_exit (loop);
851
  copy_bbs (bbs, loop->num_nodes, new_bbs,
852
            &exit, 1, &new_exit, NULL,
853
            e->src);
854
 
855
  /* Duplicating phi args at exit bbs as coming
856
     also from exit of duplicated loop.  */
857
  for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi); gsi_next (&gsi))
858
    {
859
      phi = gsi_stmt (gsi);
860
      phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop));
861
      if (phi_arg)
862
        {
863
          edge new_loop_exit_edge;
864
          source_location locus;
865
 
866
          locus = gimple_phi_arg_location_from_edge (phi, single_exit (loop));
867
          if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
868
            new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
869
          else
870
            new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
871
 
872
          add_phi_arg (phi, phi_arg, new_loop_exit_edge, locus);
873
        }
874
    }
875
 
876
  if (at_exit) /* Add the loop copy at exit.  */
877
    {
878
      redirect_edge_and_branch_force (e, new_loop->header);
879
      PENDING_STMT (e) = NULL;
880
      set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
881
      if (was_imm_dom)
882
        set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
883
    }
884
  else /* Add the copy at entry.  */
885
    {
886
      edge new_exit_e;
887
      edge entry_e = loop_preheader_edge (loop);
888
      basic_block preheader = entry_e->src;
889
 
890
      if (!flow_bb_inside_loop_p (new_loop,
891
                                  EDGE_SUCC (new_loop->header, 0)->dest))
892
        new_exit_e = EDGE_SUCC (new_loop->header, 0);
893
      else
894
        new_exit_e = EDGE_SUCC (new_loop->header, 1);
895
 
896
      redirect_edge_and_branch_force (new_exit_e, loop->header);
897
      PENDING_STMT (new_exit_e) = NULL;
898
      set_immediate_dominator (CDI_DOMINATORS, loop->header,
899
                               new_exit_e->src);
900
 
901
      /* We have to add phi args to the loop->header here as coming
902
         from new_exit_e edge.  */
903
      for (gsi = gsi_start_phis (loop->header);
904
           !gsi_end_p (gsi);
905
           gsi_next (&gsi))
906
        {
907
          phi = gsi_stmt (gsi);
908
          phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
909
          if (phi_arg)
910
            add_phi_arg (phi, phi_arg, new_exit_e,
911
                         gimple_phi_arg_location_from_edge (phi, entry_e));
912
        }
913
 
914
      redirect_edge_and_branch_force (entry_e, new_loop->header);
915
      PENDING_STMT (entry_e) = NULL;
916
      set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
917
    }
918
 
919
  free (new_bbs);
920
  free (bbs);
921
 
922
  return new_loop;
923
}
924
 
925
 
926
/* Given the condition statement COND, put it as the last statement
927
   of GUARD_BB; EXIT_BB is the basic block to skip the loop;
928
   Assumes that this is the single exit of the guarded loop.
929
   Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST.  */
930
 
931
static edge
932
slpeel_add_loop_guard (basic_block guard_bb, tree cond,
933
                       gimple_seq cond_expr_stmt_list,
934
                       basic_block exit_bb, basic_block dom_bb)
935
{
936
  gimple_stmt_iterator gsi;
937
  edge new_e, enter_e;
938
  gimple cond_stmt;
939
  gimple_seq gimplify_stmt_list = NULL;
940
 
941
  enter_e = EDGE_SUCC (guard_bb, 0);
942
  enter_e->flags &= ~EDGE_FALLTHRU;
943
  enter_e->flags |= EDGE_FALSE_VALUE;
944
  gsi = gsi_last_bb (guard_bb);
945
 
946
  cond = force_gimple_operand (cond, &gimplify_stmt_list, true, NULL_TREE);
947
  if (gimplify_stmt_list)
948
    gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
949
  cond_stmt = gimple_build_cond (NE_EXPR,
950
                                 cond, build_int_cst (TREE_TYPE (cond), 0),
951
                                 NULL_TREE, NULL_TREE);
952
  if (cond_expr_stmt_list)
953
    gsi_insert_seq_after (&gsi, cond_expr_stmt_list, GSI_NEW_STMT);
954
 
955
  gsi = gsi_last_bb (guard_bb);
956
  gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
957
 
958
  /* Add new edge to connect guard block to the merge/loop-exit block.  */
959
  new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
960
  set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
961
  return new_e;
962
}
963
 
964
 
965
/* This function verifies that the following restrictions apply to LOOP:
966
   (1) it is innermost
967
   (2) it consists of exactly 2 basic blocks - header, and an empty latch.
968
   (3) it is single entry, single exit
969
   (4) its exit condition is the last stmt in the header
970
   (5) E is the entry/exit edge of LOOP.
971
 */
972
 
973
bool
974
slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
975
{
976
  edge exit_e = single_exit (loop);
977
  edge entry_e = loop_preheader_edge (loop);
978
  gimple orig_cond = get_loop_exit_condition (loop);
979
  gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
980
 
981
  if (need_ssa_update_p (cfun))
982
    return false;
983
 
984
  if (loop->inner
985
      /* All loops have an outer scope; the only case loop->outer is NULL is for
986
         the function itself.  */
987
      || !loop_outer (loop)
988
      || loop->num_nodes != 2
989
      || !empty_block_p (loop->latch)
990
      || !single_exit (loop)
991
      /* Verify that new loop exit condition can be trivially modified.  */
992
      || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
993
      || (e != exit_e && e != entry_e))
994
    return false;
995
 
996
  return true;
997
}
998
 
999
#ifdef ENABLE_CHECKING
1000
static void
1001
slpeel_verify_cfg_after_peeling (struct loop *first_loop,
1002
                                 struct loop *second_loop)
1003
{
1004
  basic_block loop1_exit_bb = single_exit (first_loop)->dest;
1005
  basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
1006
  basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
1007
 
1008
  /* A guard that controls whether the second_loop is to be executed or skipped
1009
     is placed in first_loop->exit.  first_loop->exit therefore has two
1010
     successors - one is the preheader of second_loop, and the other is a bb
1011
     after second_loop.
1012
   */
1013
  gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
1014
 
1015
  /* 1. Verify that one of the successors of first_loop->exit is the preheader
1016
        of second_loop.  */
1017
 
1018
  /* The preheader of new_loop is expected to have two predecessors:
1019
     first_loop->exit and the block that precedes first_loop.  */
1020
 
1021
  gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
1022
              && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
1023
                   && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
1024
               || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
1025
                   && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
1026
 
1027
  /* Verify that the other successor of first_loop->exit is after the
1028
     second_loop.  */
1029
  /* TODO */
1030
}
1031
#endif
1032
 
1033
/* If the run time cost model check determines that vectorization is
1034
   not profitable and hence scalar loop should be generated then set
1035
   FIRST_NITERS to prologue peeled iterations. This will allow all the
1036
   iterations to be executed in the prologue peeled scalar loop.  */
1037
 
1038
static void
1039
set_prologue_iterations (basic_block bb_before_first_loop,
1040
                         tree first_niters,
1041
                         struct loop *loop,
1042
                         unsigned int th)
1043
{
1044
  edge e;
1045
  basic_block cond_bb, then_bb;
1046
  tree var, prologue_after_cost_adjust_name;
1047
  gimple_stmt_iterator gsi;
1048
  gimple newphi;
1049
  edge e_true, e_false, e_fallthru;
1050
  gimple cond_stmt;
1051
  gimple_seq gimplify_stmt_list = NULL, stmts = NULL;
1052
  tree cost_pre_condition = NULL_TREE;
1053
  tree scalar_loop_iters =
1054
    unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
1055
 
1056
  e = single_pred_edge (bb_before_first_loop);
1057
  cond_bb = split_edge(e);
1058
 
1059
  e = single_pred_edge (bb_before_first_loop);
1060
  then_bb = split_edge(e);
1061
  set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
1062
 
1063
  e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
1064
                                   EDGE_FALSE_VALUE);
1065
  set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
1066
 
1067
  e_true = EDGE_PRED (then_bb, 0);
1068
  e_true->flags &= ~EDGE_FALLTHRU;
1069
  e_true->flags |= EDGE_TRUE_VALUE;
1070
 
1071
  e_fallthru = EDGE_SUCC (then_bb, 0);
1072
 
1073
  cost_pre_condition =
1074
    fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1075
                 build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1076
  cost_pre_condition =
1077
    force_gimple_operand (cost_pre_condition, &gimplify_stmt_list,
1078
                          true, NULL_TREE);
1079
  cond_stmt = gimple_build_cond (NE_EXPR, cost_pre_condition,
1080
                                 build_int_cst (TREE_TYPE (cost_pre_condition),
1081
                                                0), NULL_TREE, NULL_TREE);
1082
 
1083
  gsi = gsi_last_bb (cond_bb);
1084
  if (gimplify_stmt_list)
1085
    gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
1086
 
1087
  gsi = gsi_last_bb (cond_bb);
1088
  gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
1089
 
1090
  var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
1091
                        "prologue_after_cost_adjust");
1092
  add_referenced_var (var);
1093
  prologue_after_cost_adjust_name =
1094
    force_gimple_operand (scalar_loop_iters, &stmts, false, var);
1095
 
1096
  gsi = gsi_last_bb (then_bb);
1097
  if (stmts)
1098
    gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
1099
 
1100
  newphi = create_phi_node (var, bb_before_first_loop);
1101
  add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru,
1102
               UNKNOWN_LOCATION);
1103
  add_phi_arg (newphi, first_niters, e_false, UNKNOWN_LOCATION);
1104
 
1105
  first_niters = PHI_RESULT (newphi);
1106
}
1107
 
1108
 
1109
/* Function slpeel_tree_peel_loop_to_edge.
1110
 
1111
   Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1112
   that is placed on the entry (exit) edge E of LOOP. After this transformation
1113
   we have two loops one after the other - first-loop iterates FIRST_NITERS
1114
   times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
1115
   If the cost model indicates that it is profitable to emit a scalar
1116
   loop instead of the vector one, then the prolog (epilog) loop will iterate
1117
   for the entire unchanged scalar iterations of the loop.
1118
 
1119
   Input:
1120
   - LOOP: the loop to be peeled.
1121
   - E: the exit or entry edge of LOOP.
1122
        If it is the entry edge, we peel the first iterations of LOOP. In this
1123
        case first-loop is LOOP, and second-loop is the newly created loop.
1124
        If it is the exit edge, we peel the last iterations of LOOP. In this
1125
        case, first-loop is the newly created loop, and second-loop is LOOP.
1126
   - NITERS: the number of iterations that LOOP iterates.
1127
   - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1128
   - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
1129
        for updating the loop bound of the first-loop to FIRST_NITERS.  If it
1130
        is false, the caller of this function may want to take care of this
1131
        (this can be useful if we don't want new stmts added to first-loop).
1132
   - TH: cost model profitability threshold of iterations for vectorization.
1133
   - CHECK_PROFITABILITY: specify whether cost model check has not occurred
1134
                          during versioning and hence needs to occur during
1135
                          prologue generation or whether cost model check
1136
                          has not occurred during prologue generation and hence
1137
                          needs to occur during epilogue generation.
1138
 
1139
 
1140
   Output:
1141
   The function returns a pointer to the new loop-copy, or NULL if it failed
1142
   to perform the transformation.
1143
 
1144
   The function generates two if-then-else guards: one before the first loop,
1145
   and the other before the second loop:
1146
   The first guard is:
1147
     if (FIRST_NITERS == 0) then skip the first loop,
1148
     and go directly to the second loop.
1149
   The second guard is:
1150
     if (FIRST_NITERS == NITERS) then skip the second loop.
1151
 
1152
   If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given
1153
   then the generated condition is combined with COND_EXPR and the
1154
   statements in COND_EXPR_STMT_LIST are emitted together with it.
1155
 
1156
   FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1157
   FORNOW the resulting code will not be in loop-closed-ssa form.
1158
*/
1159
 
1160
static struct loop*
1161
slpeel_tree_peel_loop_to_edge (struct loop *loop,
1162
                               edge e, tree first_niters,
1163
                               tree niters, bool update_first_loop_count,
1164
                               unsigned int th, bool check_profitability,
1165
                               tree cond_expr, gimple_seq cond_expr_stmt_list)
1166
{
1167
  struct loop *new_loop = NULL, *first_loop, *second_loop;
1168
  edge skip_e;
1169
  tree pre_condition = NULL_TREE;
1170
  bitmap definitions;
1171
  basic_block bb_before_second_loop, bb_after_second_loop;
1172
  basic_block bb_before_first_loop;
1173
  basic_block bb_between_loops;
1174
  basic_block new_exit_bb;
1175
  edge exit_e = single_exit (loop);
1176
  LOC loop_loc;
1177
  tree cost_pre_condition = NULL_TREE;
1178
 
1179
  if (!slpeel_can_duplicate_loop_p (loop, e))
1180
    return NULL;
1181
 
1182
  /* We have to initialize cfg_hooks. Then, when calling
1183
   cfg_hooks->split_edge, the function tree_split_edge
1184
   is actually called and, when calling cfg_hooks->duplicate_block,
1185
   the function tree_duplicate_bb is called.  */
1186
  gimple_register_cfg_hooks ();
1187
 
1188
 
1189
  /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1190
        Resulting CFG would be:
1191
 
1192
        first_loop:
1193
        do {
1194
        } while ...
1195
 
1196
        second_loop:
1197
        do {
1198
        } while ...
1199
 
1200
        orig_exit_bb:
1201
   */
1202
 
1203
  if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e)))
1204
    {
1205
      loop_loc = find_loop_location (loop);
1206
      if (dump_file && (dump_flags & TDF_DETAILS))
1207
        {
1208
          if (loop_loc != UNKNOWN_LOC)
1209
            fprintf (dump_file, "\n%s:%d: note: ",
1210
                     LOC_FILE (loop_loc), LOC_LINE (loop_loc));
1211
          fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1212
        }
1213
      return NULL;
1214
    }
1215
 
1216
  if (MAY_HAVE_DEBUG_STMTS)
1217
    {
1218
      gcc_assert (!adjust_vec);
1219
      adjust_vec = VEC_alloc (adjust_info, stack, 32);
1220
    }
1221
 
1222
  if (e == exit_e)
1223
    {
1224
      /* NEW_LOOP was placed after LOOP.  */
1225
      first_loop = loop;
1226
      second_loop = new_loop;
1227
    }
1228
  else
1229
    {
1230
      /* NEW_LOOP was placed before LOOP.  */
1231
      first_loop = new_loop;
1232
      second_loop = loop;
1233
    }
1234
 
1235
  definitions = ssa_names_to_replace ();
1236
  slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1237
  rename_variables_in_loop (new_loop);
1238
 
1239
 
1240
  /* 2.  Add the guard code in one of the following ways:
1241
 
1242
     2.a Add the guard that controls whether the first loop is executed.
1243
         This occurs when this function is invoked for prologue or epilogue
1244
         generation and when the cost model check can be done at compile time.
1245
 
1246
         Resulting CFG would be:
1247
 
1248
         bb_before_first_loop:
1249
         if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1250
                                GOTO first-loop
1251
 
1252
         first_loop:
1253
         do {
1254
         } while ...
1255
 
1256
         bb_before_second_loop:
1257
 
1258
         second_loop:
1259
         do {
1260
         } while ...
1261
 
1262
         orig_exit_bb:
1263
 
1264
     2.b Add the cost model check that allows the prologue
1265
         to iterate for the entire unchanged scalar
1266
         iterations of the loop in the event that the cost
1267
         model indicates that the scalar loop is more
1268
         profitable than the vector one. This occurs when
1269
         this function is invoked for prologue generation
1270
         and the cost model check needs to be done at run
1271
         time.
1272
 
1273
         Resulting CFG after prologue peeling would be:
1274
 
1275
         if (scalar_loop_iterations <= th)
1276
           FIRST_NITERS = scalar_loop_iterations
1277
 
1278
         bb_before_first_loop:
1279
         if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1280
                                GOTO first-loop
1281
 
1282
         first_loop:
1283
         do {
1284
         } while ...
1285
 
1286
         bb_before_second_loop:
1287
 
1288
         second_loop:
1289
         do {
1290
         } while ...
1291
 
1292
         orig_exit_bb:
1293
 
1294
     2.c Add the cost model check that allows the epilogue
1295
         to iterate for the entire unchanged scalar
1296
         iterations of the loop in the event that the cost
1297
         model indicates that the scalar loop is more
1298
         profitable than the vector one. This occurs when
1299
         this function is invoked for epilogue generation
1300
         and the cost model check needs to be done at run
1301
         time.  This check is combined with any pre-existing
1302
         check in COND_EXPR to avoid versioning.
1303
 
1304
         Resulting CFG after prologue peeling would be:
1305
 
1306
         bb_before_first_loop:
1307
         if ((scalar_loop_iterations <= th)
1308
             ||
1309
             FIRST_NITERS == 0) GOTO bb_before_second_loop
1310
                                GOTO first-loop
1311
 
1312
         first_loop:
1313
         do {
1314
         } while ...
1315
 
1316
         bb_before_second_loop:
1317
 
1318
         second_loop:
1319
         do {
1320
         } while ...
1321
 
1322
         orig_exit_bb:
1323
  */
1324
 
1325
  bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1326
  bb_before_second_loop = split_edge (single_exit (first_loop));
1327
 
1328
  /* Epilogue peeling.  */
1329
  if (!update_first_loop_count)
1330
    {
1331
      pre_condition =
1332
        fold_build2 (LE_EXPR, boolean_type_node, first_niters,
1333
                     build_int_cst (TREE_TYPE (first_niters), 0));
1334
      if (check_profitability)
1335
        {
1336
          tree scalar_loop_iters
1337
            = unshare_expr (LOOP_VINFO_NITERS_UNCHANGED
1338
                                        (loop_vec_info_for_loop (loop)));
1339
          cost_pre_condition =
1340
            fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1341
                         build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1342
 
1343
          pre_condition = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1344
                                       cost_pre_condition, pre_condition);
1345
        }
1346
      if (cond_expr)
1347
        {
1348
          pre_condition =
1349
            fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1350
                         pre_condition,
1351
                         fold_build1 (TRUTH_NOT_EXPR, boolean_type_node,
1352
                                      cond_expr));
1353
        }
1354
    }
1355
 
1356
  /* Prologue peeling.  */
1357
  else
1358
    {
1359
      if (check_profitability)
1360
        set_prologue_iterations (bb_before_first_loop, first_niters,
1361
                                 loop, th);
1362
 
1363
      pre_condition =
1364
        fold_build2 (LE_EXPR, boolean_type_node, first_niters,
1365
                     build_int_cst (TREE_TYPE (first_niters), 0));
1366
    }
1367
 
1368
  skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1369
                                  cond_expr_stmt_list,
1370
                                  bb_before_second_loop, bb_before_first_loop);
1371
  slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1372
                                      first_loop == new_loop,
1373
                                      &new_exit_bb, &definitions);
1374
 
1375
 
1376
  /* 3. Add the guard that controls whether the second loop is executed.
1377
        Resulting CFG would be:
1378
 
1379
        bb_before_first_loop:
1380
        if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1381
                               GOTO first-loop
1382
 
1383
        first_loop:
1384
        do {
1385
        } while ...
1386
 
1387
        bb_between_loops:
1388
        if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1389
                                    GOTO bb_before_second_loop
1390
 
1391
        bb_before_second_loop:
1392
 
1393
        second_loop:
1394
        do {
1395
        } while ...
1396
 
1397
        bb_after_second_loop:
1398
 
1399
        orig_exit_bb:
1400
   */
1401
 
1402
  bb_between_loops = new_exit_bb;
1403
  bb_after_second_loop = split_edge (single_exit (second_loop));
1404
 
1405
  pre_condition =
1406
        fold_build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1407
  skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
1408
                                  bb_after_second_loop, bb_before_first_loop);
1409
  slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1410
                                     second_loop == new_loop, &new_exit_bb);
1411
 
1412
  /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1413
   */
1414
  if (update_first_loop_count)
1415
    slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1416
 
1417
  adjust_vec_debug_stmts ();
1418
 
1419
  BITMAP_FREE (definitions);
1420
  delete_update_ssa ();
1421
 
1422
  return new_loop;
1423
}
1424
 
1425
/* Function vect_get_loop_location.
1426
 
1427
   Extract the location of the loop in the source code.
1428
   If the loop is not well formed for vectorization, an estimated
1429
   location is calculated.
1430
   Return the loop location if succeed and NULL if not.  */
1431
 
1432
LOC
1433
find_loop_location (struct loop *loop)
1434
{
1435
  gimple stmt = NULL;
1436
  basic_block bb;
1437
  gimple_stmt_iterator si;
1438
 
1439
  if (!loop)
1440
    return UNKNOWN_LOC;
1441
 
1442
  stmt = get_loop_exit_condition (loop);
1443
 
1444
  if (stmt && gimple_location (stmt) != UNKNOWN_LOC)
1445
    return gimple_location (stmt);
1446
 
1447
  /* If we got here the loop is probably not "well formed",
1448
     try to estimate the loop location */
1449
 
1450
  if (!loop->header)
1451
    return UNKNOWN_LOC;
1452
 
1453
  bb = loop->header;
1454
 
1455
  for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1456
    {
1457
      stmt = gsi_stmt (si);
1458
      if (gimple_location (stmt) != UNKNOWN_LOC)
1459
        return gimple_location (stmt);
1460
    }
1461
 
1462
  return UNKNOWN_LOC;
1463
}
1464
 
1465
 
1466
/* This function builds ni_name = number of iterations loop executes
1467
   on the loop preheader.  If SEQ is given the stmt is instead emitted
1468
   there.  */
1469
 
1470
static tree
1471
vect_build_loop_niters (loop_vec_info loop_vinfo, gimple_seq seq)
1472
{
1473
  tree ni_name, var;
1474
  gimple_seq stmts = NULL;
1475
  edge pe;
1476
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1477
  tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1478
 
1479
  var = create_tmp_var (TREE_TYPE (ni), "niters");
1480
  add_referenced_var (var);
1481
  ni_name = force_gimple_operand (ni, &stmts, false, var);
1482
 
1483
  pe = loop_preheader_edge (loop);
1484
  if (stmts)
1485
    {
1486
      if (seq)
1487
        gimple_seq_add_seq (&seq, stmts);
1488
      else
1489
        {
1490
          basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1491
          gcc_assert (!new_bb);
1492
        }
1493
    }
1494
 
1495
  return ni_name;
1496
}
1497
 
1498
 
1499
/* This function generates the following statements:
1500
 
1501
 ni_name = number of iterations loop executes
1502
 ratio = ni_name / vf
1503
 ratio_mult_vf_name = ratio * vf
1504
 
1505
 and places them at the loop preheader edge or in COND_EXPR_STMT_LIST
1506
 if that is non-NULL.  */
1507
 
1508
static void
1509
vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
1510
                                 tree *ni_name_ptr,
1511
                                 tree *ratio_mult_vf_name_ptr,
1512
                                 tree *ratio_name_ptr,
1513
                                 gimple_seq cond_expr_stmt_list)
1514
{
1515
 
1516
  edge pe;
1517
  basic_block new_bb;
1518
  gimple_seq stmts;
1519
  tree ni_name;
1520
  tree var;
1521
  tree ratio_name;
1522
  tree ratio_mult_vf_name;
1523
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1524
  tree ni = LOOP_VINFO_NITERS (loop_vinfo);
1525
  int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1526
  tree log_vf;
1527
 
1528
  pe = loop_preheader_edge (loop);
1529
 
1530
  /* Generate temporary variable that contains
1531
     number of iterations loop executes.  */
1532
 
1533
  ni_name = vect_build_loop_niters (loop_vinfo, cond_expr_stmt_list);
1534
  log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
1535
 
1536
  /* Create: ratio = ni >> log2(vf) */
1537
 
1538
  ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf);
1539
  if (!is_gimple_val (ratio_name))
1540
    {
1541
      var = create_tmp_var (TREE_TYPE (ni), "bnd");
1542
      add_referenced_var (var);
1543
 
1544
      stmts = NULL;
1545
      ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
1546
      if (cond_expr_stmt_list)
1547
        gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1548
      else
1549
        {
1550
          pe = loop_preheader_edge (loop);
1551
          new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1552
          gcc_assert (!new_bb);
1553
        }
1554
    }
1555
 
1556
  /* Create: ratio_mult_vf = ratio << log2 (vf).  */
1557
 
1558
  ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
1559
                                    ratio_name, log_vf);
1560
  if (!is_gimple_val (ratio_mult_vf_name))
1561
    {
1562
      var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
1563
      add_referenced_var (var);
1564
 
1565
      stmts = NULL;
1566
      ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
1567
                                                 true, var);
1568
      if (cond_expr_stmt_list)
1569
        gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1570
      else
1571
        {
1572
          pe = loop_preheader_edge (loop);
1573
          new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1574
          gcc_assert (!new_bb);
1575
        }
1576
    }
1577
 
1578
  *ni_name_ptr = ni_name;
1579
  *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
1580
  *ratio_name_ptr = ratio_name;
1581
 
1582
  return;
1583
}
1584
 
1585
/* Function vect_can_advance_ivs_p
1586
 
1587
   In case the number of iterations that LOOP iterates is unknown at compile
1588
   time, an epilog loop will be generated, and the loop induction variables
1589
   (IVs) will be "advanced" to the value they are supposed to take just before
1590
   the epilog loop.  Here we check that the access function of the loop IVs
1591
   and the expression that represents the loop bound are simple enough.
1592
   These restrictions will be relaxed in the future.  */
1593
 
1594
bool
1595
vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1596
{
1597
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1598
  basic_block bb = loop->header;
1599
  gimple phi;
1600
  gimple_stmt_iterator gsi;
1601
 
1602
  /* Analyze phi functions of the loop header.  */
1603
 
1604
  if (vect_print_dump_info (REPORT_DETAILS))
1605
    fprintf (vect_dump, "vect_can_advance_ivs_p:");
1606
 
1607
  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1608
    {
1609
      tree access_fn = NULL;
1610
      tree evolution_part;
1611
 
1612
      phi = gsi_stmt (gsi);
1613
      if (vect_print_dump_info (REPORT_DETAILS))
1614
        {
1615
          fprintf (vect_dump, "Analyze phi: ");
1616
          print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1617
        }
1618
 
1619
      /* Skip virtual phi's. The data dependences that are associated with
1620
         virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
1621
 
1622
      if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1623
        {
1624
          if (vect_print_dump_info (REPORT_DETAILS))
1625
            fprintf (vect_dump, "virtual phi. skip.");
1626
          continue;
1627
        }
1628
 
1629
      /* Skip reduction phis.  */
1630
 
1631
      if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1632
        {
1633
          if (vect_print_dump_info (REPORT_DETAILS))
1634
            fprintf (vect_dump, "reduc phi. skip.");
1635
          continue;
1636
        }
1637
 
1638
      /* Analyze the evolution function.  */
1639
 
1640
      access_fn = instantiate_parameters
1641
        (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1642
 
1643
      if (!access_fn)
1644
        {
1645
          if (vect_print_dump_info (REPORT_DETAILS))
1646
            fprintf (vect_dump, "No Access function.");
1647
          return false;
1648
        }
1649
 
1650
      if (vect_print_dump_info (REPORT_DETAILS))
1651
        {
1652
          fprintf (vect_dump, "Access function of PHI: ");
1653
          print_generic_expr (vect_dump, access_fn, TDF_SLIM);
1654
        }
1655
 
1656
      evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1657
 
1658
      if (evolution_part == NULL_TREE)
1659
        {
1660
          if (vect_print_dump_info (REPORT_DETAILS))
1661
            fprintf (vect_dump, "No evolution.");
1662
          return false;
1663
        }
1664
 
1665
      /* FORNOW: We do not transform initial conditions of IVs
1666
         which evolution functions are a polynomial of degree >= 2.  */
1667
 
1668
      if (tree_is_chrec (evolution_part))
1669
        return false;
1670
    }
1671
 
1672
  return true;
1673
}
1674
 
1675
 
1676
/*   Function vect_update_ivs_after_vectorizer.
1677
 
1678
     "Advance" the induction variables of LOOP to the value they should take
1679
     after the execution of LOOP.  This is currently necessary because the
1680
     vectorizer does not handle induction variables that are used after the
1681
     loop.  Such a situation occurs when the last iterations of LOOP are
1682
     peeled, because:
1683
     1. We introduced new uses after LOOP for IVs that were not originally used
1684
        after LOOP: the IVs of LOOP are now used by an epilog loop.
1685
     2. LOOP is going to be vectorized; this means that it will iterate N/VF
1686
        times, whereas the loop IVs should be bumped N times.
1687
 
1688
     Input:
1689
     - LOOP - a loop that is going to be vectorized. The last few iterations
1690
              of LOOP were peeled.
1691
     - NITERS - the number of iterations that LOOP executes (before it is
1692
                vectorized). i.e, the number of times the ivs should be bumped.
1693
     - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1694
                  coming out from LOOP on which there are uses of the LOOP ivs
1695
                  (this is the path from LOOP->exit to epilog_loop->preheader).
1696
 
1697
                  The new definitions of the ivs are placed in LOOP->exit.
1698
                  The phi args associated with the edge UPDATE_E in the bb
1699
                  UPDATE_E->dest are updated accordingly.
1700
 
1701
     Assumption 1: Like the rest of the vectorizer, this function assumes
1702
     a single loop exit that has a single predecessor.
1703
 
1704
     Assumption 2: The phi nodes in the LOOP header and in update_bb are
1705
     organized in the same order.
1706
 
1707
     Assumption 3: The access function of the ivs is simple enough (see
1708
     vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
1709
 
1710
     Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1711
     coming out of LOOP on which the ivs of LOOP are used (this is the path
1712
     that leads to the epilog loop; other paths skip the epilog loop).  This
1713
     path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1714
     needs to have its phis updated.
1715
 */
1716
 
1717
static void
1718
vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
1719
                                  edge update_e)
1720
{
1721
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1722
  basic_block exit_bb = single_exit (loop)->dest;
1723
  gimple phi, phi1;
1724
  gimple_stmt_iterator gsi, gsi1;
1725
  basic_block update_bb = update_e->dest;
1726
 
1727
  /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
1728
 
1729
  /* Make sure there exists a single-predecessor exit bb:  */
1730
  gcc_assert (single_pred_p (exit_bb));
1731
 
1732
  for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1733
       !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1734
       gsi_next (&gsi), gsi_next (&gsi1))
1735
    {
1736
      tree access_fn = NULL;
1737
      tree evolution_part;
1738
      tree init_expr;
1739
      tree step_expr, off;
1740
      tree type;
1741
      tree var, ni, ni_name;
1742
      gimple_stmt_iterator last_gsi;
1743
 
1744
      phi = gsi_stmt (gsi);
1745
      phi1 = gsi_stmt (gsi1);
1746
      if (vect_print_dump_info (REPORT_DETAILS))
1747
        {
1748
          fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
1749
          print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1750
        }
1751
 
1752
      /* Skip virtual phi's.  */
1753
      if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1754
        {
1755
          if (vect_print_dump_info (REPORT_DETAILS))
1756
            fprintf (vect_dump, "virtual phi. skip.");
1757
          continue;
1758
        }
1759
 
1760
      /* Skip reduction phis.  */
1761
      if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1762
        {
1763
          if (vect_print_dump_info (REPORT_DETAILS))
1764
            fprintf (vect_dump, "reduc phi. skip.");
1765
          continue;
1766
        }
1767
 
1768
      access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
1769
      gcc_assert (access_fn);
1770
      /* We can end up with an access_fn like
1771
           (short int) {(short unsigned int) i_49, +, 1}_1
1772
         for further analysis we need to strip the outer cast but we
1773
         need to preserve the original type.  */
1774
      type = TREE_TYPE (access_fn);
1775
      STRIP_NOPS (access_fn);
1776
      evolution_part =
1777
         unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
1778
      gcc_assert (evolution_part != NULL_TREE);
1779
 
1780
      /* FORNOW: We do not support IVs whose evolution function is a polynomial
1781
         of degree >= 2 or exponential.  */
1782
      gcc_assert (!tree_is_chrec (evolution_part));
1783
 
1784
      step_expr = evolution_part;
1785
      init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
1786
                                                               loop->num));
1787
      init_expr = fold_convert (type, init_expr);
1788
 
1789
      off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
1790
                         fold_convert (TREE_TYPE (step_expr), niters),
1791
                         step_expr);
1792
      if (POINTER_TYPE_P (TREE_TYPE (init_expr)))
1793
        ni = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (init_expr),
1794
                          init_expr,
1795
                          fold_convert (sizetype, off));
1796
      else
1797
        ni = fold_build2 (PLUS_EXPR, TREE_TYPE (init_expr),
1798
                          init_expr,
1799
                          fold_convert (TREE_TYPE (init_expr), off));
1800
 
1801
      var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
1802
      add_referenced_var (var);
1803
 
1804
      last_gsi = gsi_last_bb (exit_bb);
1805
      ni_name = force_gimple_operand_gsi (&last_gsi, ni, false, var,
1806
                                          true, GSI_SAME_STMT);
1807
 
1808
      /* Fix phi expressions in the successor bb.  */
1809
      adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
1810
    }
1811
}
1812
 
1813
/* Return the more conservative threshold between the
1814
   min_profitable_iters returned by the cost model and the user
1815
   specified threshold, if provided.  */
1816
 
1817
static unsigned int
1818
conservative_cost_threshold (loop_vec_info loop_vinfo,
1819
                             int min_profitable_iters)
1820
{
1821
  unsigned int th;
1822
  int min_scalar_loop_bound;
1823
 
1824
  min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
1825
                            * LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 1);
1826
 
1827
  /* Use the cost model only if it is more conservative than user specified
1828
     threshold.  */
1829
  th = (unsigned) min_scalar_loop_bound;
1830
  if (min_profitable_iters
1831
      && (!min_scalar_loop_bound
1832
          || min_profitable_iters > min_scalar_loop_bound))
1833
    th = (unsigned) min_profitable_iters;
1834
 
1835
  if (th && vect_print_dump_info (REPORT_COST))
1836
    fprintf (vect_dump, "Profitability threshold is %u loop iterations.", th);
1837
 
1838
  return th;
1839
}
1840
 
1841
/* Function vect_do_peeling_for_loop_bound
1842
 
1843
   Peel the last iterations of the loop represented by LOOP_VINFO.
1844
   The peeled iterations form a new epilog loop.  Given that the loop now
1845
   iterates NITERS times, the new epilog loop iterates
1846
   NITERS % VECTORIZATION_FACTOR times.
1847
 
1848
   The original loop will later be made to iterate
1849
   NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
1850
 
1851
   COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
1852
   test.  */
1853
 
1854
void
1855
vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
1856
                                tree cond_expr, gimple_seq cond_expr_stmt_list)
1857
{
1858
  tree ni_name, ratio_mult_vf_name;
1859
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1860
  struct loop *new_loop;
1861
  edge update_e;
1862
  basic_block preheader;
1863
  int loop_num;
1864
  bool check_profitability = false;
1865
  unsigned int th = 0;
1866
  int min_profitable_iters;
1867
 
1868
  if (vect_print_dump_info (REPORT_DETAILS))
1869
    fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
1870
 
1871
  initialize_original_copy_tables ();
1872
 
1873
  /* Generate the following variables on the preheader of original loop:
1874
 
1875
     ni_name = number of iteration the original loop executes
1876
     ratio = ni_name / vf
1877
     ratio_mult_vf_name = ratio * vf  */
1878
  vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
1879
                                   &ratio_mult_vf_name, ratio,
1880
                                   cond_expr_stmt_list);
1881
 
1882
  loop_num  = loop->num;
1883
 
1884
  /* If cost model check not done during versioning and
1885
     peeling for alignment.  */
1886
  if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
1887
      && !LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)
1888
      && !LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)
1889
      && !cond_expr)
1890
    {
1891
      check_profitability = true;
1892
 
1893
      /* Get profitability threshold for vectorized loop.  */
1894
      min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
1895
 
1896
      th = conservative_cost_threshold (loop_vinfo,
1897
                                        min_profitable_iters);
1898
    }
1899
 
1900
  new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
1901
                                            ratio_mult_vf_name, ni_name, false,
1902
                                            th, check_profitability,
1903
                                            cond_expr, cond_expr_stmt_list);
1904
  gcc_assert (new_loop);
1905
  gcc_assert (loop_num == loop->num);
1906
#ifdef ENABLE_CHECKING
1907
  slpeel_verify_cfg_after_peeling (loop, new_loop);
1908
#endif
1909
 
1910
  /* A guard that controls whether the new_loop is to be executed or skipped
1911
     is placed in LOOP->exit.  LOOP->exit therefore has two successors - one
1912
     is the preheader of NEW_LOOP, where the IVs from LOOP are used.  The other
1913
     is a bb after NEW_LOOP, where these IVs are not used.  Find the edge that
1914
     is on the path where the LOOP IVs are used and need to be updated.  */
1915
 
1916
  preheader = loop_preheader_edge (new_loop)->src;
1917
  if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
1918
    update_e = EDGE_PRED (preheader, 0);
1919
  else
1920
    update_e = EDGE_PRED (preheader, 1);
1921
 
1922
  /* Update IVs of original loop as if they were advanced
1923
     by ratio_mult_vf_name steps.  */
1924
  vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
1925
 
1926
  /* After peeling we have to reset scalar evolution analyzer.  */
1927
  scev_reset ();
1928
 
1929
  free_original_copy_tables ();
1930
}
1931
 
1932
 
1933
/* Function vect_gen_niters_for_prolog_loop
1934
 
1935
   Set the number of iterations for the loop represented by LOOP_VINFO
1936
   to the minimum between LOOP_NITERS (the original iteration count of the loop)
1937
   and the misalignment of DR - the data reference recorded in
1938
   LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).  As a result, after the execution of
1939
   this loop, the data reference DR will refer to an aligned location.
1940
 
1941
   The following computation is generated:
1942
 
1943
   If the misalignment of DR is known at compile time:
1944
     addr_mis = int mis = DR_MISALIGNMENT (dr);
1945
   Else, compute address misalignment in bytes:
1946
     addr_mis = addr & (vectype_size - 1)
1947
 
1948
   prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
1949
 
1950
   (elem_size = element type size; an element is the scalar element whose type
1951
   is the inner type of the vectype)
1952
 
1953
   When the step of the data-ref in the loop is not 1 (as in interleaved data
1954
   and SLP), the number of iterations of the prolog must be divided by the step
1955
   (which is equal to the size of interleaved group).
1956
 
1957
   The above formulas assume that VF == number of elements in the vector. This
1958
   may not hold when there are multiple-types in the loop.
1959
   In this case, for some data-references in the loop the VF does not represent
1960
   the number of elements that fit in the vector.  Therefore, instead of VF we
1961
   use TYPE_VECTOR_SUBPARTS.  */
1962
 
1963
static tree
1964
vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters,
1965
                                 tree *wide_prolog_niters)
1966
{
1967
  struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1968
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1969
  tree var;
1970
  gimple_seq stmts;
1971
  tree iters, iters_name;
1972
  edge pe;
1973
  basic_block new_bb;
1974
  gimple dr_stmt = DR_STMT (dr);
1975
  stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
1976
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1977
  int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
1978
  tree niters_type = TREE_TYPE (loop_niters);
1979
  int step = 1;
1980
  int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1981
  int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1982
 
1983
  if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1984
    step = DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1985
 
1986
  pe = loop_preheader_edge (loop);
1987
 
1988
  if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1989
    {
1990
      int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
1991
      int elem_misalign = byte_misalign / element_size;
1992
 
1993
      if (vect_print_dump_info (REPORT_DETAILS))
1994
        fprintf (vect_dump, "known alignment = %d.", byte_misalign);
1995
 
1996
      iters = build_int_cst (niters_type,
1997
                     (((nelements - elem_misalign) & (nelements - 1)) / step));
1998
    }
1999
  else
2000
    {
2001
      gimple_seq new_stmts = NULL;
2002
      tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
2003
                                                &new_stmts, NULL_TREE, loop);
2004
      tree ptr_type = TREE_TYPE (start_addr);
2005
      tree size = TYPE_SIZE (ptr_type);
2006
      tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
2007
      tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
2008
      tree elem_size_log =
2009
        build_int_cst (type, exact_log2 (vectype_align/nelements));
2010
      tree nelements_minus_1 = build_int_cst (type, nelements - 1);
2011
      tree nelements_tree = build_int_cst (type, nelements);
2012
      tree byte_misalign;
2013
      tree elem_misalign;
2014
 
2015
      new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts);
2016
      gcc_assert (!new_bb);
2017
 
2018
      /* Create:  byte_misalign = addr & (vectype_size - 1)  */
2019
      byte_misalign =
2020
        fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr), vectype_size_minus_1);
2021
 
2022
      /* Create:  elem_misalign = byte_misalign / element_size  */
2023
      elem_misalign =
2024
        fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
2025
 
2026
      /* Create:  (niters_type) (nelements - elem_misalign)&(nelements - 1)  */
2027
      iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
2028
      iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
2029
      iters = fold_convert (niters_type, iters);
2030
    }
2031
 
2032
  /* Create:  prolog_loop_niters = min (iters, loop_niters) */
2033
  /* If the loop bound is known at compile time we already verified that it is
2034
     greater than vf; since the misalignment ('iters') is at most vf, there's
2035
     no need to generate the MIN_EXPR in this case.  */
2036
  if (TREE_CODE (loop_niters) != INTEGER_CST)
2037
    iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
2038
 
2039
  if (vect_print_dump_info (REPORT_DETAILS))
2040
    {
2041
      fprintf (vect_dump, "niters for prolog loop: ");
2042
      print_generic_expr (vect_dump, iters, TDF_SLIM);
2043
    }
2044
 
2045
  var = create_tmp_var (niters_type, "prolog_loop_niters");
2046
  add_referenced_var (var);
2047
  stmts = NULL;
2048
  iters_name = force_gimple_operand (iters, &stmts, false, var);
2049
  if (types_compatible_p (sizetype, niters_type))
2050
    *wide_prolog_niters = iters_name;
2051
  else
2052
    {
2053
      gimple_seq seq = NULL;
2054
      tree wide_iters = fold_convert (sizetype, iters);
2055
      var = create_tmp_var (sizetype, "prolog_loop_niters");
2056
      add_referenced_var (var);
2057
      *wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false,
2058
                                                  var);
2059
      if (seq)
2060
        gimple_seq_add_seq (&stmts, seq);
2061
    }
2062
 
2063
  /* Insert stmt on loop preheader edge.  */
2064
  if (stmts)
2065
    {
2066
      basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2067
      gcc_assert (!new_bb);
2068
    }
2069
 
2070
  return iters_name;
2071
}
2072
 
2073
 
2074
/* Function vect_update_init_of_dr
2075
 
2076
   NITERS iterations were peeled from LOOP.  DR represents a data reference
2077
   in LOOP.  This function updates the information recorded in DR to
2078
   account for the fact that the first NITERS iterations had already been
2079
   executed.  Specifically, it updates the OFFSET field of DR.  */
2080
 
2081
static void
2082
vect_update_init_of_dr (struct data_reference *dr, tree niters)
2083
{
2084
  tree offset = DR_OFFSET (dr);
2085
 
2086
  niters = fold_build2 (MULT_EXPR, sizetype,
2087
                        fold_convert (sizetype, niters),
2088
                        fold_convert (sizetype, DR_STEP (dr)));
2089
  offset = fold_build2 (PLUS_EXPR, sizetype,
2090
                        fold_convert (sizetype, offset), niters);
2091
  DR_OFFSET (dr) = offset;
2092
}
2093
 
2094
 
2095
/* Function vect_update_inits_of_drs
2096
 
2097
   NITERS iterations were peeled from the loop represented by LOOP_VINFO.
2098
   This function updates the information recorded for the data references in
2099
   the loop to account for the fact that the first NITERS iterations had
2100
   already been executed.  Specifically, it updates the initial_condition of
2101
   the access_function of all the data_references in the loop.  */
2102
 
2103
static void
2104
vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
2105
{
2106
  unsigned int i;
2107
  VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2108
  struct data_reference *dr;
2109
 
2110
  if (vect_print_dump_info (REPORT_DETAILS))
2111
    fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
2112
 
2113
  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2114
    vect_update_init_of_dr (dr, niters);
2115
}
2116
 
2117
 
2118
/* Function vect_do_peeling_for_alignment
2119
 
2120
   Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
2121
   'niters' is set to the misalignment of one of the data references in the
2122
   loop, thereby forcing it to refer to an aligned location at the beginning
2123
   of the execution of this loop.  The data reference for which we are
2124
   peeling is recorded in LOOP_VINFO_UNALIGNED_DR.  */
2125
 
2126
void
2127
vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
2128
{
2129
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2130
  tree niters_of_prolog_loop, ni_name;
2131
  tree n_iters;
2132
  tree wide_prolog_niters;
2133
  struct loop *new_loop;
2134
  unsigned int th = 0;
2135
  int min_profitable_iters;
2136
 
2137
  if (vect_print_dump_info (REPORT_DETAILS))
2138
    fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
2139
 
2140
  initialize_original_copy_tables ();
2141
 
2142
  ni_name = vect_build_loop_niters (loop_vinfo, NULL);
2143
  niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name,
2144
                                                           &wide_prolog_niters);
2145
 
2146
 
2147
  /* Get profitability threshold for vectorized loop.  */
2148
  min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
2149
  th = conservative_cost_threshold (loop_vinfo,
2150
                                    min_profitable_iters);
2151
 
2152
  /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
2153
  new_loop =
2154
    slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
2155
                                   niters_of_prolog_loop, ni_name, true,
2156
                                   th, true, NULL_TREE, NULL);
2157
 
2158
  gcc_assert (new_loop);
2159
#ifdef ENABLE_CHECKING
2160
  slpeel_verify_cfg_after_peeling (new_loop, loop);
2161
#endif
2162
 
2163
  /* Update number of times loop executes.  */
2164
  n_iters = LOOP_VINFO_NITERS (loop_vinfo);
2165
  LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2166
                TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
2167
 
2168
  /* Update the init conditions of the access functions of all data refs.  */
2169
  vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters);
2170
 
2171
  /* After peeling we have to reset scalar evolution analyzer.  */
2172
  scev_reset ();
2173
 
2174
  free_original_copy_tables ();
2175
}
2176
 
2177
 
2178
/* Function vect_create_cond_for_align_checks.
2179
 
2180
   Create a conditional expression that represents the alignment checks for
2181
   all of data references (array element references) whose alignment must be
2182
   checked at runtime.
2183
 
2184
   Input:
2185
   COND_EXPR  - input conditional expression.  New conditions will be chained
2186
                with logical AND operation.
2187
   LOOP_VINFO - two fields of the loop information are used.
2188
                LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2189
                LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2190
 
2191
   Output:
2192
   COND_EXPR_STMT_LIST - statements needed to construct the conditional
2193
                         expression.
2194
   The returned value is the conditional expression to be used in the if
2195
   statement that controls which version of the loop gets executed at runtime.
2196
 
2197
   The algorithm makes two assumptions:
2198
     1) The number of bytes "n" in a vector is a power of 2.
2199
     2) An address "a" is aligned if a%n is zero and that this
2200
        test can be done as a&(n-1) == 0.  For example, for 16
2201
        byte vectors the test is a&0xf == 0.  */
2202
 
2203
static void
2204
vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2205
                                   tree *cond_expr,
2206
                                   gimple_seq *cond_expr_stmt_list)
2207
{
2208
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2209
  VEC(gimple,heap) *may_misalign_stmts
2210
    = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2211
  gimple ref_stmt;
2212
  int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2213
  tree mask_cst;
2214
  unsigned int i;
2215
  tree psize;
2216
  tree int_ptrsize_type;
2217
  char tmp_name[20];
2218
  tree or_tmp_name = NULL_TREE;
2219
  tree and_tmp, and_tmp_name;
2220
  gimple and_stmt;
2221
  tree ptrsize_zero;
2222
  tree part_cond_expr;
2223
 
2224
  /* Check that mask is one less than a power of 2, i.e., mask is
2225
     all zeros followed by all ones.  */
2226
  gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2227
 
2228
  /* CHECKME: what is the best integer or unsigned type to use to hold a
2229
     cast from a pointer value?  */
2230
  psize = TYPE_SIZE (ptr_type_node);
2231
  int_ptrsize_type
2232
    = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
2233
 
2234
  /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2235
     of the first vector of the i'th data reference. */
2236
 
2237
  for (i = 0; VEC_iterate (gimple, may_misalign_stmts, i, ref_stmt); i++)
2238
    {
2239
      gimple_seq new_stmt_list = NULL;
2240
      tree addr_base;
2241
      tree addr_tmp, addr_tmp_name;
2242
      tree or_tmp, new_or_tmp_name;
2243
      gimple addr_stmt, or_stmt;
2244
 
2245
      /* create: addr_tmp = (int)(address_of_first_vector) */
2246
      addr_base =
2247
        vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2248
                                              NULL_TREE, loop);
2249
      if (new_stmt_list != NULL)
2250
        gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2251
 
2252
      sprintf (tmp_name, "%s%d", "addr2int", i);
2253
      addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2254
      add_referenced_var (addr_tmp);
2255
      addr_tmp_name = make_ssa_name (addr_tmp, NULL);
2256
      addr_stmt = gimple_build_assign_with_ops (NOP_EXPR, addr_tmp_name,
2257
                                                addr_base, NULL_TREE);
2258
      SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
2259
      gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2260
 
2261
      /* The addresses are OR together.  */
2262
 
2263
      if (or_tmp_name != NULL_TREE)
2264
        {
2265
          /* create: or_tmp = or_tmp | addr_tmp */
2266
          sprintf (tmp_name, "%s%d", "orptrs", i);
2267
          or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2268
          add_referenced_var (or_tmp);
2269
          new_or_tmp_name = make_ssa_name (or_tmp, NULL);
2270
          or_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR,
2271
                                                  new_or_tmp_name,
2272
                                                  or_tmp_name, addr_tmp_name);
2273
          SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
2274
          gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2275
          or_tmp_name = new_or_tmp_name;
2276
        }
2277
      else
2278
        or_tmp_name = addr_tmp_name;
2279
 
2280
    } /* end for i */
2281
 
2282
  mask_cst = build_int_cst (int_ptrsize_type, mask);
2283
 
2284
  /* create: and_tmp = or_tmp & mask  */
2285
  and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
2286
  add_referenced_var (and_tmp);
2287
  and_tmp_name = make_ssa_name (and_tmp, NULL);
2288
 
2289
  and_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, and_tmp_name,
2290
                                           or_tmp_name, mask_cst);
2291
  SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
2292
  gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2293
 
2294
  /* Make and_tmp the left operand of the conditional test against zero.
2295
     if and_tmp has a nonzero bit then some address is unaligned.  */
2296
  ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2297
  part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2298
                                and_tmp_name, ptrsize_zero);
2299
  if (*cond_expr)
2300
    *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2301
                              *cond_expr, part_cond_expr);
2302
  else
2303
    *cond_expr = part_cond_expr;
2304
}
2305
 
2306
 
2307
/* Function vect_vfa_segment_size.
2308
 
2309
   Create an expression that computes the size of segment
2310
   that will be accessed for a data reference.  The functions takes into
2311
   account that realignment loads may access one more vector.
2312
 
2313
   Input:
2314
     DR: The data reference.
2315
     VECT_FACTOR: vectorization factor.
2316
 
2317
   Return an expression whose value is the size of segment which will be
2318
   accessed by DR.  */
2319
 
2320
static tree
2321
vect_vfa_segment_size (struct data_reference *dr, tree vect_factor)
2322
{
2323
  tree segment_length = fold_build2 (MULT_EXPR, integer_type_node,
2324
                                     DR_STEP (dr), vect_factor);
2325
 
2326
  if (vect_supportable_dr_alignment (dr) == dr_explicit_realign_optimized)
2327
    {
2328
      tree vector_size = TYPE_SIZE_UNIT
2329
                          (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
2330
 
2331
      segment_length = fold_build2 (PLUS_EXPR, integer_type_node,
2332
                                    segment_length, vector_size);
2333
    }
2334
  return fold_convert (sizetype, segment_length);
2335
}
2336
 
2337
 
2338
/* Function vect_create_cond_for_alias_checks.
2339
 
2340
   Create a conditional expression that represents the run-time checks for
2341
   overlapping of address ranges represented by a list of data references
2342
   relations passed as input.
2343
 
2344
   Input:
2345
   COND_EXPR  - input conditional expression.  New conditions will be chained
2346
                with logical AND operation.
2347
   LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2348
                to be checked.
2349
 
2350
   Output:
2351
   COND_EXPR - conditional expression.
2352
   COND_EXPR_STMT_LIST - statements needed to construct the conditional
2353
                         expression.
2354
 
2355
 
2356
   The returned value is the conditional expression to be used in the if
2357
   statement that controls which version of the loop gets executed at runtime.
2358
*/
2359
 
2360
static void
2361
vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
2362
                                   tree * cond_expr,
2363
                                   gimple_seq * cond_expr_stmt_list)
2364
{
2365
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2366
  VEC (ddr_p, heap) * may_alias_ddrs =
2367
    LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2368
  tree vect_factor =
2369
    build_int_cst (integer_type_node, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2370
 
2371
  ddr_p ddr;
2372
  unsigned int i;
2373
  tree part_cond_expr;
2374
 
2375
  /* Create expression
2376
     ((store_ptr_0 + store_segment_length_0) < load_ptr_0)
2377
     || (load_ptr_0 + load_segment_length_0) < store_ptr_0))
2378
     &&
2379
     ...
2380
     &&
2381
     ((store_ptr_n + store_segment_length_n) < load_ptr_n)
2382
     || (load_ptr_n + load_segment_length_n) < store_ptr_n))  */
2383
 
2384
  if (VEC_empty (ddr_p, may_alias_ddrs))
2385
    return;
2386
 
2387
  for (i = 0; VEC_iterate (ddr_p, may_alias_ddrs, i, ddr); i++)
2388
    {
2389
      struct data_reference *dr_a, *dr_b;
2390
      gimple dr_group_first_a, dr_group_first_b;
2391
      tree addr_base_a, addr_base_b;
2392
      tree segment_length_a, segment_length_b;
2393
      gimple stmt_a, stmt_b;
2394
 
2395
      dr_a = DDR_A (ddr);
2396
      stmt_a = DR_STMT (DDR_A (ddr));
2397
      dr_group_first_a = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_a));
2398
      if (dr_group_first_a)
2399
        {
2400
          stmt_a = dr_group_first_a;
2401
          dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a));
2402
        }
2403
 
2404
      dr_b = DDR_B (ddr);
2405
      stmt_b = DR_STMT (DDR_B (ddr));
2406
      dr_group_first_b = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_b));
2407
      if (dr_group_first_b)
2408
        {
2409
          stmt_b = dr_group_first_b;
2410
          dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b));
2411
        }
2412
 
2413
      addr_base_a =
2414
        vect_create_addr_base_for_vector_ref (stmt_a, cond_expr_stmt_list,
2415
                                              NULL_TREE, loop);
2416
      addr_base_b =
2417
        vect_create_addr_base_for_vector_ref (stmt_b, cond_expr_stmt_list,
2418
                                              NULL_TREE, loop);
2419
 
2420
      segment_length_a = vect_vfa_segment_size (dr_a, vect_factor);
2421
      segment_length_b = vect_vfa_segment_size (dr_b, vect_factor);
2422
 
2423
      if (vect_print_dump_info (REPORT_DR_DETAILS))
2424
        {
2425
          fprintf (vect_dump,
2426
                   "create runtime check for data references ");
2427
          print_generic_expr (vect_dump, DR_REF (dr_a), TDF_SLIM);
2428
          fprintf (vect_dump, " and ");
2429
          print_generic_expr (vect_dump, DR_REF (dr_b), TDF_SLIM);
2430
        }
2431
 
2432
 
2433
      part_cond_expr =
2434
        fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2435
          fold_build2 (LT_EXPR, boolean_type_node,
2436
            fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_a),
2437
              addr_base_a,
2438
              segment_length_a),
2439
            addr_base_b),
2440
          fold_build2 (LT_EXPR, boolean_type_node,
2441
            fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_b),
2442
              addr_base_b,
2443
              segment_length_b),
2444
            addr_base_a));
2445
 
2446
      if (*cond_expr)
2447
        *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2448
                                  *cond_expr, part_cond_expr);
2449
      else
2450
        *cond_expr = part_cond_expr;
2451
    }
2452
 
2453
  if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))
2454
    fprintf (vect_dump, "created %u versioning for alias checks.\n",
2455
             VEC_length (ddr_p, may_alias_ddrs));
2456
}
2457
 
2458
 
2459
/* Function vect_loop_versioning.
2460
 
2461
   If the loop has data references that may or may not be aligned or/and
2462
   has data reference relations whose independence was not proven then
2463
   two versions of the loop need to be generated, one which is vectorized
2464
   and one which isn't.  A test is then generated to control which of the
2465
   loops is executed.  The test checks for the alignment of all of the
2466
   data references that may or may not be aligned.  An additional
2467
   sequence of runtime tests is generated for each pairs of DDRs whose
2468
   independence was not proven.  The vectorized version of loop is
2469
   executed only if both alias and alignment tests are passed.
2470
 
2471
   The test generated to check which version of loop is executed
2472
   is modified to also check for profitability as indicated by the
2473
   cost model initially.
2474
 
2475
   The versioning precondition(s) are placed in *COND_EXPR and
2476
   *COND_EXPR_STMT_LIST.  If DO_VERSIONING is true versioning is
2477
   also performed, otherwise only the conditions are generated.  */
2478
 
2479
void
2480
vect_loop_versioning (loop_vec_info loop_vinfo, bool do_versioning,
2481
                      tree *cond_expr, gimple_seq *cond_expr_stmt_list)
2482
{
2483
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2484
  basic_block condition_bb;
2485
  gimple_stmt_iterator gsi, cond_exp_gsi;
2486
  basic_block merge_bb;
2487
  basic_block new_exit_bb;
2488
  edge new_exit_e, e;
2489
  gimple orig_phi, new_phi;
2490
  tree arg;
2491
  unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2492
  gimple_seq gimplify_stmt_list = NULL;
2493
  tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2494
  int min_profitable_iters = 0;
2495
  unsigned int th;
2496
 
2497
  /* Get profitability threshold for vectorized loop.  */
2498
  min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
2499
 
2500
  th = conservative_cost_threshold (loop_vinfo,
2501
                                    min_profitable_iters);
2502
 
2503
  *cond_expr =
2504
    fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2505
                 build_int_cst (TREE_TYPE (scalar_loop_iters), th));
2506
 
2507
  *cond_expr = force_gimple_operand (*cond_expr, cond_expr_stmt_list,
2508
                                     false, NULL_TREE);
2509
 
2510
  if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2511
      vect_create_cond_for_align_checks (loop_vinfo, cond_expr,
2512
                                         cond_expr_stmt_list);
2513
 
2514
  if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2515
    vect_create_cond_for_alias_checks (loop_vinfo, cond_expr,
2516
                                       cond_expr_stmt_list);
2517
 
2518
  *cond_expr =
2519
    fold_build2 (NE_EXPR, boolean_type_node, *cond_expr, integer_zero_node);
2520
  *cond_expr =
2521
    force_gimple_operand (*cond_expr, &gimplify_stmt_list, true, NULL_TREE);
2522
  gimple_seq_add_seq (cond_expr_stmt_list, gimplify_stmt_list);
2523
 
2524
  /* If we only needed the extra conditions and a new loop copy
2525
     bail out here.  */
2526
  if (!do_versioning)
2527
    return;
2528
 
2529
  initialize_original_copy_tables ();
2530
  loop_version (loop, *cond_expr, &condition_bb,
2531
                prob, prob, REG_BR_PROB_BASE - prob, true);
2532
  free_original_copy_tables();
2533
 
2534
  /* Loop versioning violates an assumption we try to maintain during
2535
     vectorization - that the loop exit block has a single predecessor.
2536
     After versioning, the exit block of both loop versions is the same
2537
     basic block (i.e. it has two predecessors). Just in order to simplify
2538
     following transformations in the vectorizer, we fix this situation
2539
     here by adding a new (empty) block on the exit-edge of the loop,
2540
     with the proper loop-exit phis to maintain loop-closed-form.  */
2541
 
2542
  merge_bb = single_exit (loop)->dest;
2543
  gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
2544
  new_exit_bb = split_edge (single_exit (loop));
2545
  new_exit_e = single_exit (loop);
2546
  e = EDGE_SUCC (new_exit_bb, 0);
2547
 
2548
  for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2549
    {
2550
      orig_phi = gsi_stmt (gsi);
2551
      new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
2552
                                  new_exit_bb);
2553
      arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2554
      add_phi_arg (new_phi, arg, new_exit_e,
2555
                   gimple_phi_arg_location_from_edge (orig_phi, e));
2556
      adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2557
    }
2558
 
2559
  /* End loop-exit-fixes after versioning.  */
2560
 
2561
  update_ssa (TODO_update_ssa);
2562
  if (*cond_expr_stmt_list)
2563
    {
2564
      cond_exp_gsi = gsi_last_bb (condition_bb);
2565
      gsi_insert_seq_before (&cond_exp_gsi, *cond_expr_stmt_list,
2566
                             GSI_SAME_STMT);
2567
      *cond_expr_stmt_list = NULL;
2568
    }
2569
  *cond_expr = NULL_TREE;
2570
}
2571
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.