OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [gcc/] [tree-vect-loop-manip.c] - Blame information for rev 764

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 684 jeremybenn
/* Vectorizer Specific Loop Manipulations
2
   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012
3
   Free Software Foundation, Inc.
4
   Contributed by Dorit Naishlos <dorit@il.ibm.com>
5
   and Ira Rosen <irar@il.ibm.com>
6
 
7
This file is part of GCC.
8
 
9
GCC is free software; you can redistribute it and/or modify it under
10
the terms of the GNU General Public License as published by the Free
11
Software Foundation; either version 3, or (at your option) any later
12
version.
13
 
14
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15
WARRANTY; without even the implied warranty of MERCHANTABILITY or
16
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17
for more details.
18
 
19
You should have received a copy of the GNU General Public License
20
along with GCC; see the file COPYING3.  If not see
21
<http://www.gnu.org/licenses/>.  */
22
 
23
#include "config.h"
24
#include "system.h"
25
#include "coretypes.h"
26
#include "tm.h"
27
#include "ggc.h"
28
#include "tree.h"
29
#include "basic-block.h"
30
#include "tree-pretty-print.h"
31
#include "gimple-pretty-print.h"
32
#include "tree-flow.h"
33
#include "tree-dump.h"
34
#include "cfgloop.h"
35
#include "cfglayout.h"
36
#include "diagnostic-core.h"
37
#include "tree-scalar-evolution.h"
38
#include "tree-vectorizer.h"
39
#include "langhooks.h"
40
 
41
/*************************************************************************
42
  Simple Loop Peeling Utilities
43
 
44
  Utilities to support loop peeling for vectorization purposes.
45
 *************************************************************************/
46
 
47
 
48
/* Renames the use *OP_P.  */
49
 
50
static void
51
rename_use_op (use_operand_p op_p)
52
{
53
  tree new_name;
54
 
55
  if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
56
    return;
57
 
58
  new_name = get_current_def (USE_FROM_PTR (op_p));
59
 
60
  /* Something defined outside of the loop.  */
61
  if (!new_name)
62
    return;
63
 
64
  /* An ordinary ssa name defined in the loop.  */
65
 
66
  SET_USE (op_p, new_name);
67
}
68
 
69
 
70
/* Renames the variables in basic block BB.  */
71
 
72
void
73
rename_variables_in_bb (basic_block bb)
74
{
75
  gimple_stmt_iterator gsi;
76
  gimple stmt;
77
  use_operand_p use_p;
78
  ssa_op_iter iter;
79
  edge e;
80
  edge_iterator ei;
81
  struct loop *loop = bb->loop_father;
82
 
83
  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
84
    {
85
      stmt = gsi_stmt (gsi);
86
      FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
87
        rename_use_op (use_p);
88
    }
89
 
90
  FOR_EACH_EDGE (e, ei, bb->succs)
91
    {
92
      if (!flow_bb_inside_loop_p (loop, e->dest))
93
        continue;
94
      for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
95
        rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi_stmt (gsi), e));
96
    }
97
}
98
 
99
 
100
/* Renames variables in new generated LOOP.  */
101
 
102
void
103
rename_variables_in_loop (struct loop *loop)
104
{
105
  unsigned i;
106
  basic_block *bbs;
107
 
108
  bbs = get_loop_body (loop);
109
 
110
  for (i = 0; i < loop->num_nodes; i++)
111
    rename_variables_in_bb (bbs[i]);
112
 
113
  free (bbs);
114
}
115
 
116
typedef struct
117
{
118
  tree from, to;
119
  basic_block bb;
120
} adjust_info;
121
 
122
DEF_VEC_O(adjust_info);
123
DEF_VEC_ALLOC_O_STACK(adjust_info);
124
#define VEC_adjust_info_stack_alloc(alloc) VEC_stack_alloc (adjust_info, alloc)
125
 
126
/* A stack of values to be adjusted in debug stmts.  We have to
127
   process them LIFO, so that the closest substitution applies.  If we
128
   processed them FIFO, without the stack, we might substitute uses
129
   with a PHI DEF that would soon become non-dominant, and when we got
130
   to the suitable one, it wouldn't have anything to substitute any
131
   more.  */
132
static VEC(adjust_info, stack) *adjust_vec;
133
 
134
/* Adjust any debug stmts that referenced AI->from values to use the
135
   loop-closed AI->to, if the references are dominated by AI->bb and
136
   not by the definition of AI->from.  */
137
 
138
static void
139
adjust_debug_stmts_now (adjust_info *ai)
140
{
141
  basic_block bbphi = ai->bb;
142
  tree orig_def = ai->from;
143
  tree new_def = ai->to;
144
  imm_use_iterator imm_iter;
145
  gimple stmt;
146
  basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
147
 
148
  gcc_assert (dom_info_available_p (CDI_DOMINATORS));
149
 
150
  /* Adjust any debug stmts that held onto non-loop-closed
151
     references.  */
152
  FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
153
    {
154
      use_operand_p use_p;
155
      basic_block bbuse;
156
 
157
      if (!is_gimple_debug (stmt))
158
        continue;
159
 
160
      gcc_assert (gimple_debug_bind_p (stmt));
161
 
162
      bbuse = gimple_bb (stmt);
163
 
164
      if ((bbuse == bbphi
165
           || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
166
          && !(bbuse == bbdef
167
               || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
168
        {
169
          if (new_def)
170
            FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
171
              SET_USE (use_p, new_def);
172
          else
173
            {
174
              gimple_debug_bind_reset_value (stmt);
175
              update_stmt (stmt);
176
            }
177
        }
178
    }
179
}
180
 
181
/* Adjust debug stmts as scheduled before.  */
182
 
183
static void
184
adjust_vec_debug_stmts (void)
185
{
186
  if (!MAY_HAVE_DEBUG_STMTS)
187
    return;
188
 
189
  gcc_assert (adjust_vec);
190
 
191
  while (!VEC_empty (adjust_info, adjust_vec))
192
    {
193
      adjust_debug_stmts_now (VEC_last (adjust_info, adjust_vec));
194
      VEC_pop (adjust_info, adjust_vec);
195
    }
196
 
197
  VEC_free (adjust_info, stack, adjust_vec);
198
}
199
 
200
/* Adjust any debug stmts that referenced FROM values to use the
201
   loop-closed TO, if the references are dominated by BB and not by
202
   the definition of FROM.  If adjust_vec is non-NULL, adjustments
203
   will be postponed until adjust_vec_debug_stmts is called.  */
204
 
205
static void
206
adjust_debug_stmts (tree from, tree to, basic_block bb)
207
{
208
  adjust_info ai;
209
 
210
  if (MAY_HAVE_DEBUG_STMTS && TREE_CODE (from) == SSA_NAME
211
      && SSA_NAME_VAR (from) != gimple_vop (cfun))
212
    {
213
      ai.from = from;
214
      ai.to = to;
215
      ai.bb = bb;
216
 
217
      if (adjust_vec)
218
        VEC_safe_push (adjust_info, stack, adjust_vec, &ai);
219
      else
220
        adjust_debug_stmts_now (&ai);
221
    }
222
}
223
 
224
/* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
225
   to adjust any debug stmts that referenced the old phi arg,
226
   presumably non-loop-closed references left over from other
227
   transformations.  */
228
 
229
static void
230
adjust_phi_and_debug_stmts (gimple update_phi, edge e, tree new_def)
231
{
232
  tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
233
 
234
  SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
235
 
236
  if (MAY_HAVE_DEBUG_STMTS)
237
    adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
238
                        gimple_bb (update_phi));
239
}
240
 
241
 
242
/* Update the PHI nodes of NEW_LOOP.
243
 
244
   NEW_LOOP is a duplicate of ORIG_LOOP.
245
   AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
246
   AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
247
   executes before it.  */
248
 
249
static void
250
slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
251
                                       struct loop *new_loop, bool after)
252
{
253
  tree new_ssa_name;
254
  gimple phi_new, phi_orig;
255
  tree def;
256
  edge orig_loop_latch = loop_latch_edge (orig_loop);
257
  edge orig_entry_e = loop_preheader_edge (orig_loop);
258
  edge new_loop_exit_e = single_exit (new_loop);
259
  edge new_loop_entry_e = loop_preheader_edge (new_loop);
260
  edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
261
  gimple_stmt_iterator gsi_new, gsi_orig;
262
 
263
  /*
264
     step 1. For each loop-header-phi:
265
             Add the first phi argument for the phi in NEW_LOOP
266
            (the one associated with the entry of NEW_LOOP)
267
 
268
     step 2. For each loop-header-phi:
269
             Add the second phi argument for the phi in NEW_LOOP
270
            (the one associated with the latch of NEW_LOOP)
271
 
272
     step 3. Update the phis in the successor block of NEW_LOOP.
273
 
274
        case 1: NEW_LOOP was placed before ORIG_LOOP:
275
                The successor block of NEW_LOOP is the header of ORIG_LOOP.
276
                Updating the phis in the successor block can therefore be done
277
                along with the scanning of the loop header phis, because the
278
                header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
279
                phi nodes, organized in the same order.
280
 
281
        case 2: NEW_LOOP was placed after ORIG_LOOP:
282
                The successor block of NEW_LOOP is the original exit block of
283
                ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
284
                We postpone updating these phis to a later stage (when
285
                loop guards are added).
286
   */
287
 
288
 
289
  /* Scan the phis in the headers of the old and new loops
290
     (they are organized in exactly the same order).  */
291
 
292
  for (gsi_new = gsi_start_phis (new_loop->header),
293
       gsi_orig = gsi_start_phis (orig_loop->header);
294
       !gsi_end_p (gsi_new) && !gsi_end_p (gsi_orig);
295
       gsi_next (&gsi_new), gsi_next (&gsi_orig))
296
    {
297
      source_location locus;
298
      phi_new = gsi_stmt (gsi_new);
299
      phi_orig = gsi_stmt (gsi_orig);
300
 
301
      /* step 1.  */
302
      def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
303
      locus = gimple_phi_arg_location_from_edge (phi_orig, entry_arg_e);
304
      add_phi_arg (phi_new, def, new_loop_entry_e, locus);
305
 
306
      /* step 2.  */
307
      def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
308
      locus = gimple_phi_arg_location_from_edge (phi_orig, orig_loop_latch);
309
      if (TREE_CODE (def) != SSA_NAME)
310
        continue;
311
 
312
      new_ssa_name = get_current_def (def);
313
      if (!new_ssa_name)
314
        {
315
          /* This only happens if there are no definitions
316
             inside the loop. use the phi_result in this case.  */
317
          new_ssa_name = PHI_RESULT (phi_new);
318
        }
319
 
320
      /* An ordinary ssa name defined in the loop.  */
321
      add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop), locus);
322
 
323
      /* Drop any debug references outside the loop, if they would
324
         become ill-formed SSA.  */
325
      adjust_debug_stmts (def, NULL, single_exit (orig_loop)->dest);
326
 
327
      /* step 3 (case 1).  */
328
      if (!after)
329
        {
330
          gcc_assert (new_loop_exit_e == orig_entry_e);
331
          adjust_phi_and_debug_stmts (phi_orig, new_loop_exit_e, new_ssa_name);
332
        }
333
    }
334
}
335
 
336
 
337
/* Update PHI nodes for a guard of the LOOP.
338
 
339
   Input:
340
   - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
341
        controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
342
        originates from the guard-bb, skips LOOP and reaches the (unique) exit
343
        bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
344
        We denote this bb NEW_MERGE_BB because before the guard code was added
345
        it had a single predecessor (the LOOP header), and now it became a merge
346
        point of two paths - the path that ends with the LOOP exit-edge, and
347
        the path that ends with GUARD_EDGE.
348
   - NEW_EXIT_BB: New basic block that is added by this function between LOOP
349
        and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
350
 
351
   ===> The CFG before the guard-code was added:
352
        LOOP_header_bb:
353
          loop_body
354
          if (exit_loop) goto update_bb
355
          else           goto LOOP_header_bb
356
        update_bb:
357
 
358
   ==> The CFG after the guard-code was added:
359
        guard_bb:
360
          if (LOOP_guard_condition) goto new_merge_bb
361
          else                      goto LOOP_header_bb
362
        LOOP_header_bb:
363
          loop_body
364
          if (exit_loop_condition) goto new_merge_bb
365
          else                     goto LOOP_header_bb
366
        new_merge_bb:
367
          goto update_bb
368
        update_bb:
369
 
370
   ==> The CFG after this function:
371
        guard_bb:
372
          if (LOOP_guard_condition) goto new_merge_bb
373
          else                      goto LOOP_header_bb
374
        LOOP_header_bb:
375
          loop_body
376
          if (exit_loop_condition) goto new_exit_bb
377
          else                     goto LOOP_header_bb
378
        new_exit_bb:
379
        new_merge_bb:
380
          goto update_bb
381
        update_bb:
382
 
383
   This function:
384
   1. creates and updates the relevant phi nodes to account for the new
385
      incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
386
      1.1. Create phi nodes at NEW_MERGE_BB.
387
      1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
388
           UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
389
   2. preserves loop-closed-ssa-form by creating the required phi nodes
390
      at the exit of LOOP (i.e, in NEW_EXIT_BB).
391
 
392
   There are two flavors to this function:
393
 
394
   slpeel_update_phi_nodes_for_guard1:
395
     Here the guard controls whether we enter or skip LOOP, where LOOP is a
396
     prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
397
     for variables that have phis in the loop header.
398
 
399
   slpeel_update_phi_nodes_for_guard2:
400
     Here the guard controls whether we enter or skip LOOP, where LOOP is an
401
     epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
402
     for variables that have phis in the loop exit.
403
 
404
   I.E., the overall structure is:
405
 
406
        loop1_preheader_bb:
407
                guard1 (goto loop1/merge1_bb)
408
        loop1
409
        loop1_exit_bb:
410
                guard2 (goto merge1_bb/merge2_bb)
411
        merge1_bb
412
        loop2
413
        loop2_exit_bb
414
        merge2_bb
415
        next_bb
416
 
417
   slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
418
   loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
419
   that have phis in loop1->header).
420
 
421
   slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
422
   loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
423
   that have phis in next_bb). It also adds some of these phis to
424
   loop1_exit_bb.
425
 
426
   slpeel_update_phi_nodes_for_guard1 is always called before
427
   slpeel_update_phi_nodes_for_guard2. They are both needed in order
428
   to create correct data-flow and loop-closed-ssa-form.
429
 
430
   Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
431
   that change between iterations of a loop (and therefore have a phi-node
432
   at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
433
   phis for variables that are used out of the loop (and therefore have
434
   loop-closed exit phis). Some variables may be both updated between
435
   iterations and used after the loop. This is why in loop1_exit_bb we
436
   may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
437
   and exit phis (created by slpeel_update_phi_nodes_for_guard2).
438
 
439
   - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
440
     an original loop. i.e., we have:
441
 
442
           orig_loop
443
           guard_bb (goto LOOP/new_merge)
444
           new_loop <-- LOOP
445
           new_exit
446
           new_merge
447
           next_bb
448
 
449
     If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
450
     have:
451
 
452
           new_loop
453
           guard_bb (goto LOOP/new_merge)
454
           orig_loop <-- LOOP
455
           new_exit
456
           new_merge
457
           next_bb
458
 
459
     The SSA names defined in the original loop have a current
460
     reaching definition that that records the corresponding new
461
     ssa-name used in the new duplicated loop copy.
462
  */
463
 
464
/* Function slpeel_update_phi_nodes_for_guard1
465
 
466
   Input:
467
   - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
468
   - DEFS - a bitmap of ssa names to mark new names for which we recorded
469
            information.
470
 
471
   In the context of the overall structure, we have:
472
 
473
        loop1_preheader_bb:
474
                guard1 (goto loop1/merge1_bb)
475
LOOP->  loop1
476
        loop1_exit_bb:
477
                guard2 (goto merge1_bb/merge2_bb)
478
        merge1_bb
479
        loop2
480
        loop2_exit_bb
481
        merge2_bb
482
        next_bb
483
 
484
   For each name updated between loop iterations (i.e - for each name that has
485
   an entry (loop-header) phi in LOOP) we create a new phi in:
486
   1. merge1_bb (to account for the edge from guard1)
487
   2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
488
*/
489
 
490
static void
491
slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
492
                                    bool is_new_loop, basic_block *new_exit_bb,
493
                                    bitmap *defs)
494
{
495
  gimple orig_phi, new_phi;
496
  gimple update_phi, update_phi2;
497
  tree guard_arg, loop_arg;
498
  basic_block new_merge_bb = guard_edge->dest;
499
  edge e = EDGE_SUCC (new_merge_bb, 0);
500
  basic_block update_bb = e->dest;
501
  basic_block orig_bb = loop->header;
502
  edge new_exit_e;
503
  tree current_new_name;
504
  gimple_stmt_iterator gsi_orig, gsi_update;
505
 
506
  /* Create new bb between loop and new_merge_bb.  */
507
  *new_exit_bb = split_edge (single_exit (loop));
508
 
509
  new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
510
 
511
  for (gsi_orig = gsi_start_phis (orig_bb),
512
       gsi_update = gsi_start_phis (update_bb);
513
       !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
514
       gsi_next (&gsi_orig), gsi_next (&gsi_update))
515
    {
516
      source_location loop_locus, guard_locus;
517
      orig_phi = gsi_stmt (gsi_orig);
518
      update_phi = gsi_stmt (gsi_update);
519
 
520
      /** 1. Handle new-merge-point phis  **/
521
 
522
      /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
523
      new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
524
                                 new_merge_bb);
525
 
526
      /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
527
            of LOOP. Set the two phi args in NEW_PHI for these edges:  */
528
      loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
529
      loop_locus = gimple_phi_arg_location_from_edge (orig_phi,
530
                                                      EDGE_SUCC (loop->latch,
531
                                                                 0));
532
      guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
533
      guard_locus
534
        = gimple_phi_arg_location_from_edge (orig_phi,
535
                                             loop_preheader_edge (loop));
536
 
537
      add_phi_arg (new_phi, loop_arg, new_exit_e, loop_locus);
538
      add_phi_arg (new_phi, guard_arg, guard_edge, guard_locus);
539
 
540
      /* 1.3. Update phi in successor block.  */
541
      gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
542
                  || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
543
      adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
544
      update_phi2 = new_phi;
545
 
546
 
547
      /** 2. Handle loop-closed-ssa-form phis  **/
548
 
549
      if (!is_gimple_reg (PHI_RESULT (orig_phi)))
550
        continue;
551
 
552
      /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
553
      new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
554
                                 *new_exit_bb);
555
 
556
      /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
557
      add_phi_arg (new_phi, loop_arg, single_exit (loop), loop_locus);
558
 
559
      /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
560
      gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
561
      adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
562
                                  PHI_RESULT (new_phi));
563
 
564
      /* 2.4. Record the newly created name with set_current_def.
565
         We want to find a name such that
566
                name = get_current_def (orig_loop_name)
567
         and to set its current definition as follows:
568
                set_current_def (name, new_phi_name)
569
 
570
         If LOOP is a new loop then loop_arg is already the name we're
571
         looking for. If LOOP is the original loop, then loop_arg is
572
         the orig_loop_name and the relevant name is recorded in its
573
         current reaching definition.  */
574
      if (is_new_loop)
575
        current_new_name = loop_arg;
576
      else
577
        {
578
          current_new_name = get_current_def (loop_arg);
579
          /* current_def is not available only if the variable does not
580
             change inside the loop, in which case we also don't care
581
             about recording a current_def for it because we won't be
582
             trying to create loop-exit-phis for it.  */
583
          if (!current_new_name)
584
            continue;
585
        }
586
      gcc_assert (get_current_def (current_new_name) == NULL_TREE);
587
 
588
      set_current_def (current_new_name, PHI_RESULT (new_phi));
589
      bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name));
590
    }
591
}
592
 
593
 
594
/* Function slpeel_update_phi_nodes_for_guard2
595
 
596
   Input:
597
   - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
598
 
599
   In the context of the overall structure, we have:
600
 
601
        loop1_preheader_bb:
602
                guard1 (goto loop1/merge1_bb)
603
        loop1
604
        loop1_exit_bb:
605
                guard2 (goto merge1_bb/merge2_bb)
606
        merge1_bb
607
LOOP->  loop2
608
        loop2_exit_bb
609
        merge2_bb
610
        next_bb
611
 
612
   For each name used out side the loop (i.e - for each name that has an exit
613
   phi in next_bb) we create a new phi in:
614
   1. merge2_bb (to account for the edge from guard_bb)
615
   2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
616
   3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
617
      if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
618
*/
619
 
620
static void
621
slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
622
                                    bool is_new_loop, basic_block *new_exit_bb)
623
{
624
  gimple orig_phi, new_phi;
625
  gimple update_phi, update_phi2;
626
  tree guard_arg, loop_arg;
627
  basic_block new_merge_bb = guard_edge->dest;
628
  edge e = EDGE_SUCC (new_merge_bb, 0);
629
  basic_block update_bb = e->dest;
630
  edge new_exit_e;
631
  tree orig_def, orig_def_new_name;
632
  tree new_name, new_name2;
633
  tree arg;
634
  gimple_stmt_iterator gsi;
635
 
636
  /* Create new bb between loop and new_merge_bb.  */
637
  *new_exit_bb = split_edge (single_exit (loop));
638
 
639
  new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
640
 
641
  for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi))
642
    {
643
      update_phi = gsi_stmt (gsi);
644
      orig_phi = update_phi;
645
      orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
646
      /* This loop-closed-phi actually doesn't represent a use
647
         out of the loop - the phi arg is a constant.  */
648
      if (TREE_CODE (orig_def) != SSA_NAME)
649
        continue;
650
      orig_def_new_name = get_current_def (orig_def);
651
      arg = NULL_TREE;
652
 
653
      /** 1. Handle new-merge-point phis  **/
654
 
655
      /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
656
      new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
657
                                 new_merge_bb);
658
 
659
      /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
660
            of LOOP. Set the two PHI args in NEW_PHI for these edges:  */
661
      new_name = orig_def;
662
      new_name2 = NULL_TREE;
663
      if (orig_def_new_name)
664
        {
665
          new_name = orig_def_new_name;
666
          /* Some variables have both loop-entry-phis and loop-exit-phis.
667
             Such variables were given yet newer names by phis placed in
668
             guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
669
             new_name2 = get_current_def (get_current_def (orig_name)).  */
670
          new_name2 = get_current_def (new_name);
671
        }
672
 
673
      if (is_new_loop)
674
        {
675
          guard_arg = orig_def;
676
          loop_arg = new_name;
677
        }
678
      else
679
        {
680
          guard_arg = new_name;
681
          loop_arg = orig_def;
682
        }
683
      if (new_name2)
684
        guard_arg = new_name2;
685
 
686
      add_phi_arg (new_phi, loop_arg, new_exit_e, UNKNOWN_LOCATION);
687
      add_phi_arg (new_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
688
 
689
      /* 1.3. Update phi in successor block.  */
690
      gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
691
      adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
692
      update_phi2 = new_phi;
693
 
694
 
695
      /** 2. Handle loop-closed-ssa-form phis  **/
696
 
697
      /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
698
      new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
699
                                 *new_exit_bb);
700
 
701
      /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
702
      add_phi_arg (new_phi, loop_arg, single_exit (loop), UNKNOWN_LOCATION);
703
 
704
      /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
705
      gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
706
      adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
707
                                  PHI_RESULT (new_phi));
708
 
709
 
710
      /** 3. Handle loop-closed-ssa-form phis for first loop  **/
711
 
712
      /* 3.1. Find the relevant names that need an exit-phi in
713
         GUARD_BB, i.e. names for which
714
         slpeel_update_phi_nodes_for_guard1 had not already created a
715
         phi node. This is the case for names that are used outside
716
         the loop (and therefore need an exit phi) but are not updated
717
         across loop iterations (and therefore don't have a
718
         loop-header-phi).
719
 
720
         slpeel_update_phi_nodes_for_guard1 is responsible for
721
         creating loop-exit phis in GUARD_BB for names that have a
722
         loop-header-phi.  When such a phi is created we also record
723
         the new name in its current definition.  If this new name
724
         exists, then guard_arg was set to this new name (see 1.2
725
         above).  Therefore, if guard_arg is not this new name, this
726
         is an indication that an exit-phi in GUARD_BB was not yet
727
         created, so we take care of it here.  */
728
      if (guard_arg == new_name2)
729
        continue;
730
      arg = guard_arg;
731
 
732
      /* 3.2. Generate new phi node in GUARD_BB:  */
733
      new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
734
                                 guard_edge->src);
735
 
736
      /* 3.3. GUARD_BB has one incoming edge:  */
737
      gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
738
      add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0),
739
                   UNKNOWN_LOCATION);
740
 
741
      /* 3.4. Update phi in successor of GUARD_BB:  */
742
      gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
743
                                                                == guard_arg);
744
      adjust_phi_and_debug_stmts (update_phi2, guard_edge,
745
                                  PHI_RESULT (new_phi));
746
    }
747
}
748
 
749
 
750
/* Make the LOOP iterate NITERS times. This is done by adding a new IV
751
   that starts at zero, increases by one and its limit is NITERS.
752
 
753
   Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
754
 
755
void
756
slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
757
{
758
  tree indx_before_incr, indx_after_incr;
759
  gimple cond_stmt;
760
  gimple orig_cond;
761
  edge exit_edge = single_exit (loop);
762
  gimple_stmt_iterator loop_cond_gsi;
763
  gimple_stmt_iterator incr_gsi;
764
  bool insert_after;
765
  tree init = build_int_cst (TREE_TYPE (niters), 0);
766
  tree step = build_int_cst (TREE_TYPE (niters), 1);
767
  LOC loop_loc;
768
  enum tree_code code;
769
 
770
  orig_cond = get_loop_exit_condition (loop);
771
  gcc_assert (orig_cond);
772
  loop_cond_gsi = gsi_for_stmt (orig_cond);
773
 
774
  standard_iv_increment_position (loop, &incr_gsi, &insert_after);
775
  create_iv (init, step, NULL_TREE, loop,
776
             &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
777
 
778
  indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
779
                                              true, NULL_TREE, true,
780
                                              GSI_SAME_STMT);
781
  niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
782
                                     true, GSI_SAME_STMT);
783
 
784
  code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
785
  cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
786
                                 NULL_TREE);
787
 
788
  gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
789
 
790
  /* Remove old loop exit test:  */
791
  gsi_remove (&loop_cond_gsi, true);
792
 
793
  loop_loc = find_loop_location (loop);
794
  if (dump_file && (dump_flags & TDF_DETAILS))
795
    {
796
      if (loop_loc != UNKNOWN_LOC)
797
        fprintf (dump_file, "\nloop at %s:%d: ",
798
                 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
799
      print_gimple_stmt (dump_file, cond_stmt, 0, TDF_SLIM);
800
    }
801
 
802
  loop->nb_iterations = niters;
803
}
804
 
805
 
806
/* Given LOOP this function generates a new copy of it and puts it
807
   on E which is either the entry or exit of LOOP.  */
808
 
809
struct loop *
810
slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
811
{
812
  struct loop *new_loop;
813
  basic_block *new_bbs, *bbs;
814
  bool at_exit;
815
  bool was_imm_dom;
816
  basic_block exit_dest;
817
  gimple phi;
818
  tree phi_arg;
819
  edge exit, new_exit;
820
  gimple_stmt_iterator gsi;
821
 
822
  at_exit = (e == single_exit (loop));
823
  if (!at_exit && e != loop_preheader_edge (loop))
824
    return NULL;
825
 
826
  bbs = get_loop_body (loop);
827
 
828
  /* Check whether duplication is possible.  */
829
  if (!can_copy_bbs_p (bbs, loop->num_nodes))
830
    {
831
      free (bbs);
832
      return NULL;
833
    }
834
 
835
  /* Generate new loop structure.  */
836
  new_loop = duplicate_loop (loop, loop_outer (loop));
837
  if (!new_loop)
838
    {
839
      free (bbs);
840
      return NULL;
841
    }
842
 
843
  exit_dest = single_exit (loop)->dest;
844
  was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
845
                                          exit_dest) == loop->header ?
846
                 true : false);
847
 
848
  new_bbs = XNEWVEC (basic_block, loop->num_nodes);
849
 
850
  exit = single_exit (loop);
851
  copy_bbs (bbs, loop->num_nodes, new_bbs,
852
            &exit, 1, &new_exit, NULL,
853
            e->src);
854
 
855
  /* Duplicating phi args at exit bbs as coming
856
     also from exit of duplicated loop.  */
857
  for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi); gsi_next (&gsi))
858
    {
859
      phi = gsi_stmt (gsi);
860
      phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop));
861
      if (phi_arg)
862
        {
863
          edge new_loop_exit_edge;
864
          source_location locus;
865
 
866
          locus = gimple_phi_arg_location_from_edge (phi, single_exit (loop));
867
          if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
868
            new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
869
          else
870
            new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
871
 
872
          add_phi_arg (phi, phi_arg, new_loop_exit_edge, locus);
873
        }
874
    }
875
 
876
  if (at_exit) /* Add the loop copy at exit.  */
877
    {
878
      redirect_edge_and_branch_force (e, new_loop->header);
879
      PENDING_STMT (e) = NULL;
880
      set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
881
      if (was_imm_dom)
882
        set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
883
    }
884
  else /* Add the copy at entry.  */
885
    {
886
      edge new_exit_e;
887
      edge entry_e = loop_preheader_edge (loop);
888
      basic_block preheader = entry_e->src;
889
 
890
      if (!flow_bb_inside_loop_p (new_loop,
891
                                  EDGE_SUCC (new_loop->header, 0)->dest))
892
        new_exit_e = EDGE_SUCC (new_loop->header, 0);
893
      else
894
        new_exit_e = EDGE_SUCC (new_loop->header, 1);
895
 
896
      redirect_edge_and_branch_force (new_exit_e, loop->header);
897
      PENDING_STMT (new_exit_e) = NULL;
898
      set_immediate_dominator (CDI_DOMINATORS, loop->header,
899
                               new_exit_e->src);
900
 
901
      /* We have to add phi args to the loop->header here as coming
902
         from new_exit_e edge.  */
903
      for (gsi = gsi_start_phis (loop->header);
904
           !gsi_end_p (gsi);
905
           gsi_next (&gsi))
906
        {
907
          phi = gsi_stmt (gsi);
908
          phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
909
          if (phi_arg)
910
            add_phi_arg (phi, phi_arg, new_exit_e,
911
                         gimple_phi_arg_location_from_edge (phi, entry_e));
912
        }
913
 
914
      redirect_edge_and_branch_force (entry_e, new_loop->header);
915
      PENDING_STMT (entry_e) = NULL;
916
      set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
917
    }
918
 
919
  free (new_bbs);
920
  free (bbs);
921
 
922
  return new_loop;
923
}
924
 
925
 
926
/* Given the condition statement COND, put it as the last statement
927
   of GUARD_BB; EXIT_BB is the basic block to skip the loop;
928
   Assumes that this is the single exit of the guarded loop.
929
   Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST.  */
930
 
931
static edge
932
slpeel_add_loop_guard (basic_block guard_bb, tree cond,
933
                       gimple_seq cond_expr_stmt_list,
934
                       basic_block exit_bb, basic_block dom_bb)
935
{
936
  gimple_stmt_iterator gsi;
937
  edge new_e, enter_e;
938
  gimple cond_stmt;
939
  gimple_seq gimplify_stmt_list = NULL;
940
 
941
  enter_e = EDGE_SUCC (guard_bb, 0);
942
  enter_e->flags &= ~EDGE_FALLTHRU;
943
  enter_e->flags |= EDGE_FALSE_VALUE;
944
  gsi = gsi_last_bb (guard_bb);
945
 
946
  cond = force_gimple_operand (cond, &gimplify_stmt_list, true, NULL_TREE);
947
  if (gimplify_stmt_list)
948
    gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
949
  cond_stmt = gimple_build_cond (NE_EXPR,
950
                                 cond, build_int_cst (TREE_TYPE (cond), 0),
951
                                 NULL_TREE, NULL_TREE);
952
  if (cond_expr_stmt_list)
953
    gsi_insert_seq_after (&gsi, cond_expr_stmt_list, GSI_NEW_STMT);
954
 
955
  gsi = gsi_last_bb (guard_bb);
956
  gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
957
 
958
  /* Add new edge to connect guard block to the merge/loop-exit block.  */
959
  new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
960
  set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
961
  return new_e;
962
}
963
 
964
 
965
/* This function verifies that the following restrictions apply to LOOP:
966
   (1) it is innermost
967
   (2) it consists of exactly 2 basic blocks - header, and an empty latch.
968
   (3) it is single entry, single exit
969
   (4) its exit condition is the last stmt in the header
970
   (5) E is the entry/exit edge of LOOP.
971
 */
972
 
973
bool
974
slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
975
{
976
  edge exit_e = single_exit (loop);
977
  edge entry_e = loop_preheader_edge (loop);
978
  gimple orig_cond = get_loop_exit_condition (loop);
979
  gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
980
 
981
  if (need_ssa_update_p (cfun))
982
    return false;
983
 
984
  if (loop->inner
985
      /* All loops have an outer scope; the only case loop->outer is NULL is for
986
         the function itself.  */
987
      || !loop_outer (loop)
988
      || loop->num_nodes != 2
989
      || !empty_block_p (loop->latch)
990
      || !single_exit (loop)
991
      /* Verify that new loop exit condition can be trivially modified.  */
992
      || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
993
      || (e != exit_e && e != entry_e))
994
    return false;
995
 
996
  return true;
997
}
998
 
999
#ifdef ENABLE_CHECKING
1000
static void
1001
slpeel_verify_cfg_after_peeling (struct loop *first_loop,
1002
                                 struct loop *second_loop)
1003
{
1004
  basic_block loop1_exit_bb = single_exit (first_loop)->dest;
1005
  basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
1006
  basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
1007
 
1008
  /* A guard that controls whether the second_loop is to be executed or skipped
1009
     is placed in first_loop->exit.  first_loop->exit therefore has two
1010
     successors - one is the preheader of second_loop, and the other is a bb
1011
     after second_loop.
1012
   */
1013
  gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
1014
 
1015
  /* 1. Verify that one of the successors of first_loop->exit is the preheader
1016
        of second_loop.  */
1017
 
1018
  /* The preheader of new_loop is expected to have two predecessors:
1019
     first_loop->exit and the block that precedes first_loop.  */
1020
 
1021
  gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
1022
              && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
1023
                   && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
1024
               || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
1025
                   && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
1026
 
1027
  /* Verify that the other successor of first_loop->exit is after the
1028
     second_loop.  */
1029
  /* TODO */
1030
}
1031
#endif
1032
 
1033
/* If the run time cost model check determines that vectorization is
1034
   not profitable and hence scalar loop should be generated then set
1035
   FIRST_NITERS to prologue peeled iterations. This will allow all the
1036
   iterations to be executed in the prologue peeled scalar loop.  */
1037
 
1038
static void
1039
set_prologue_iterations (basic_block bb_before_first_loop,
1040
                         tree *first_niters,
1041
                         struct loop *loop,
1042
                         unsigned int th)
1043
{
1044
  edge e;
1045
  basic_block cond_bb, then_bb;
1046
  tree var, prologue_after_cost_adjust_name;
1047
  gimple_stmt_iterator gsi;
1048
  gimple newphi;
1049
  edge e_true, e_false, e_fallthru;
1050
  gimple cond_stmt;
1051
  gimple_seq gimplify_stmt_list = NULL, stmts = NULL;
1052
  tree cost_pre_condition = NULL_TREE;
1053
  tree scalar_loop_iters =
1054
    unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
1055
 
1056
  e = single_pred_edge (bb_before_first_loop);
1057
  cond_bb = split_edge(e);
1058
 
1059
  e = single_pred_edge (bb_before_first_loop);
1060
  then_bb = split_edge(e);
1061
  set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
1062
 
1063
  e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
1064
                                   EDGE_FALSE_VALUE);
1065
  set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
1066
 
1067
  e_true = EDGE_PRED (then_bb, 0);
1068
  e_true->flags &= ~EDGE_FALLTHRU;
1069
  e_true->flags |= EDGE_TRUE_VALUE;
1070
 
1071
  e_fallthru = EDGE_SUCC (then_bb, 0);
1072
 
1073
  cost_pre_condition =
1074
    fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1075
                 build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1076
  cost_pre_condition =
1077
    force_gimple_operand (cost_pre_condition, &gimplify_stmt_list,
1078
                          true, NULL_TREE);
1079
  cond_stmt = gimple_build_cond (NE_EXPR, cost_pre_condition,
1080
                                 build_int_cst (TREE_TYPE (cost_pre_condition),
1081
                                                0), NULL_TREE, NULL_TREE);
1082
 
1083
  gsi = gsi_last_bb (cond_bb);
1084
  if (gimplify_stmt_list)
1085
    gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
1086
 
1087
  gsi = gsi_last_bb (cond_bb);
1088
  gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
1089
 
1090
  var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
1091
                        "prologue_after_cost_adjust");
1092
  add_referenced_var (var);
1093
  prologue_after_cost_adjust_name =
1094
    force_gimple_operand (scalar_loop_iters, &stmts, false, var);
1095
 
1096
  gsi = gsi_last_bb (then_bb);
1097
  if (stmts)
1098
    gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
1099
 
1100
  newphi = create_phi_node (var, bb_before_first_loop);
1101
  add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru,
1102
               UNKNOWN_LOCATION);
1103
  add_phi_arg (newphi, *first_niters, e_false, UNKNOWN_LOCATION);
1104
 
1105
  *first_niters = PHI_RESULT (newphi);
1106
}
1107
 
1108
/* Function slpeel_tree_peel_loop_to_edge.
1109
 
1110
   Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1111
   that is placed on the entry (exit) edge E of LOOP. After this transformation
1112
   we have two loops one after the other - first-loop iterates FIRST_NITERS
1113
   times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
1114
   If the cost model indicates that it is profitable to emit a scalar
1115
   loop instead of the vector one, then the prolog (epilog) loop will iterate
1116
   for the entire unchanged scalar iterations of the loop.
1117
 
1118
   Input:
1119
   - LOOP: the loop to be peeled.
1120
   - E: the exit or entry edge of LOOP.
1121
        If it is the entry edge, we peel the first iterations of LOOP. In this
1122
        case first-loop is LOOP, and second-loop is the newly created loop.
1123
        If it is the exit edge, we peel the last iterations of LOOP. In this
1124
        case, first-loop is the newly created loop, and second-loop is LOOP.
1125
   - NITERS: the number of iterations that LOOP iterates.
1126
   - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1127
   - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
1128
        for updating the loop bound of the first-loop to FIRST_NITERS.  If it
1129
        is false, the caller of this function may want to take care of this
1130
        (this can be useful if we don't want new stmts added to first-loop).
1131
   - TH: cost model profitability threshold of iterations for vectorization.
1132
   - CHECK_PROFITABILITY: specify whether cost model check has not occurred
1133
                          during versioning and hence needs to occur during
1134
                          prologue generation or whether cost model check
1135
                          has not occurred during prologue generation and hence
1136
                          needs to occur during epilogue generation.
1137
 
1138
 
1139
   Output:
1140
   The function returns a pointer to the new loop-copy, or NULL if it failed
1141
   to perform the transformation.
1142
 
1143
   The function generates two if-then-else guards: one before the first loop,
1144
   and the other before the second loop:
1145
   The first guard is:
1146
     if (FIRST_NITERS == 0) then skip the first loop,
1147
     and go directly to the second loop.
1148
   The second guard is:
1149
     if (FIRST_NITERS == NITERS) then skip the second loop.
1150
 
1151
   If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given
1152
   then the generated condition is combined with COND_EXPR and the
1153
   statements in COND_EXPR_STMT_LIST are emitted together with it.
1154
 
1155
   FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1156
   FORNOW the resulting code will not be in loop-closed-ssa form.
1157
*/
1158
 
1159
static struct loop*
1160
slpeel_tree_peel_loop_to_edge (struct loop *loop,
1161
                               edge e, tree *first_niters,
1162
                               tree niters, bool update_first_loop_count,
1163
                               unsigned int th, bool check_profitability,
1164
                               tree cond_expr, gimple_seq cond_expr_stmt_list)
1165
{
1166
  struct loop *new_loop = NULL, *first_loop, *second_loop;
1167
  edge skip_e;
1168
  tree pre_condition = NULL_TREE;
1169
  bitmap definitions;
1170
  basic_block bb_before_second_loop, bb_after_second_loop;
1171
  basic_block bb_before_first_loop;
1172
  basic_block bb_between_loops;
1173
  basic_block new_exit_bb;
1174
  gimple_stmt_iterator gsi;
1175
  edge exit_e = single_exit (loop);
1176
  LOC loop_loc;
1177
  tree cost_pre_condition = NULL_TREE;
1178
 
1179
  if (!slpeel_can_duplicate_loop_p (loop, e))
1180
    return NULL;
1181
 
1182
  /* We have to initialize cfg_hooks. Then, when calling
1183
   cfg_hooks->split_edge, the function tree_split_edge
1184
   is actually called and, when calling cfg_hooks->duplicate_block,
1185
   the function tree_duplicate_bb is called.  */
1186
  gimple_register_cfg_hooks ();
1187
 
1188
  /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
1189
     in the exit bb and rename all the uses after the loop.  This simplifies
1190
     the *guard[12] routines, which assume loop closed SSA form for all PHIs
1191
     (but normally loop closed SSA form doesn't require virtual PHIs to be
1192
     in the same form).  Doing this early simplifies the checking what
1193
     uses should be renamed.  */
1194
  for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1195
    if (!is_gimple_reg (gimple_phi_result (gsi_stmt (gsi))))
1196
      {
1197
        gimple phi = gsi_stmt (gsi);
1198
        for (gsi = gsi_start_phis (exit_e->dest);
1199
             !gsi_end_p (gsi); gsi_next (&gsi))
1200
          if (!is_gimple_reg (gimple_phi_result (gsi_stmt (gsi))))
1201
            break;
1202
        if (gsi_end_p (gsi))
1203
          {
1204
            gimple new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)),
1205
                                              exit_e->dest);
1206
            tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
1207
            imm_use_iterator imm_iter;
1208
            gimple stmt;
1209
            tree new_vop = make_ssa_name (SSA_NAME_VAR (PHI_RESULT (phi)),
1210
                                          new_phi);
1211
            use_operand_p use_p;
1212
 
1213
            add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
1214
            gimple_phi_set_result (new_phi, new_vop);
1215
            FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
1216
              if (stmt != new_phi && gimple_bb (stmt) != loop->header)
1217
                FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1218
                  SET_USE (use_p, new_vop);
1219
          }
1220
        break;
1221
      }
1222
 
1223
  /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1224
        Resulting CFG would be:
1225
 
1226
        first_loop:
1227
        do {
1228
        } while ...
1229
 
1230
        second_loop:
1231
        do {
1232
        } while ...
1233
 
1234
        orig_exit_bb:
1235
   */
1236
 
1237
  if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e)))
1238
    {
1239
      loop_loc = find_loop_location (loop);
1240
      if (dump_file && (dump_flags & TDF_DETAILS))
1241
        {
1242
          if (loop_loc != UNKNOWN_LOC)
1243
            fprintf (dump_file, "\n%s:%d: note: ",
1244
                     LOC_FILE (loop_loc), LOC_LINE (loop_loc));
1245
          fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1246
        }
1247
      return NULL;
1248
    }
1249
 
1250
  if (MAY_HAVE_DEBUG_STMTS)
1251
    {
1252
      gcc_assert (!adjust_vec);
1253
      adjust_vec = VEC_alloc (adjust_info, stack, 32);
1254
    }
1255
 
1256
  if (e == exit_e)
1257
    {
1258
      /* NEW_LOOP was placed after LOOP.  */
1259
      first_loop = loop;
1260
      second_loop = new_loop;
1261
    }
1262
  else
1263
    {
1264
      /* NEW_LOOP was placed before LOOP.  */
1265
      first_loop = new_loop;
1266
      second_loop = loop;
1267
    }
1268
 
1269
  definitions = ssa_names_to_replace ();
1270
  slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1271
  rename_variables_in_loop (new_loop);
1272
 
1273
 
1274
  /* 2.  Add the guard code in one of the following ways:
1275
 
1276
     2.a Add the guard that controls whether the first loop is executed.
1277
         This occurs when this function is invoked for prologue or epilogue
1278
         generation and when the cost model check can be done at compile time.
1279
 
1280
         Resulting CFG would be:
1281
 
1282
         bb_before_first_loop:
1283
         if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1284
                                GOTO first-loop
1285
 
1286
         first_loop:
1287
         do {
1288
         } while ...
1289
 
1290
         bb_before_second_loop:
1291
 
1292
         second_loop:
1293
         do {
1294
         } while ...
1295
 
1296
         orig_exit_bb:
1297
 
1298
     2.b Add the cost model check that allows the prologue
1299
         to iterate for the entire unchanged scalar
1300
         iterations of the loop in the event that the cost
1301
         model indicates that the scalar loop is more
1302
         profitable than the vector one. This occurs when
1303
         this function is invoked for prologue generation
1304
         and the cost model check needs to be done at run
1305
         time.
1306
 
1307
         Resulting CFG after prologue peeling would be:
1308
 
1309
         if (scalar_loop_iterations <= th)
1310
           FIRST_NITERS = scalar_loop_iterations
1311
 
1312
         bb_before_first_loop:
1313
         if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1314
                                GOTO first-loop
1315
 
1316
         first_loop:
1317
         do {
1318
         } while ...
1319
 
1320
         bb_before_second_loop:
1321
 
1322
         second_loop:
1323
         do {
1324
         } while ...
1325
 
1326
         orig_exit_bb:
1327
 
1328
     2.c Add the cost model check that allows the epilogue
1329
         to iterate for the entire unchanged scalar
1330
         iterations of the loop in the event that the cost
1331
         model indicates that the scalar loop is more
1332
         profitable than the vector one. This occurs when
1333
         this function is invoked for epilogue generation
1334
         and the cost model check needs to be done at run
1335
         time.  This check is combined with any pre-existing
1336
         check in COND_EXPR to avoid versioning.
1337
 
1338
         Resulting CFG after prologue peeling would be:
1339
 
1340
         bb_before_first_loop:
1341
         if ((scalar_loop_iterations <= th)
1342
             ||
1343
             FIRST_NITERS == 0) GOTO bb_before_second_loop
1344
                                GOTO first-loop
1345
 
1346
         first_loop:
1347
         do {
1348
         } while ...
1349
 
1350
         bb_before_second_loop:
1351
 
1352
         second_loop:
1353
         do {
1354
         } while ...
1355
 
1356
         orig_exit_bb:
1357
  */
1358
 
1359
  bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1360
  bb_before_second_loop = split_edge (single_exit (first_loop));
1361
 
1362
  /* Epilogue peeling.  */
1363
  if (!update_first_loop_count)
1364
    {
1365
      pre_condition =
1366
        fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
1367
                     build_int_cst (TREE_TYPE (*first_niters), 0));
1368
      if (check_profitability)
1369
        {
1370
          tree scalar_loop_iters
1371
            = unshare_expr (LOOP_VINFO_NITERS_UNCHANGED
1372
                                        (loop_vec_info_for_loop (loop)));
1373
          cost_pre_condition =
1374
            fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1375
                         build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1376
 
1377
          pre_condition = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1378
                                       cost_pre_condition, pre_condition);
1379
        }
1380
      if (cond_expr)
1381
        {
1382
          pre_condition =
1383
            fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1384
                         pre_condition,
1385
                         fold_build1 (TRUTH_NOT_EXPR, boolean_type_node,
1386
                                      cond_expr));
1387
        }
1388
    }
1389
 
1390
  /* Prologue peeling.  */
1391
  else
1392
    {
1393
      if (check_profitability)
1394
        set_prologue_iterations (bb_before_first_loop, first_niters,
1395
                                 loop, th);
1396
 
1397
      pre_condition =
1398
        fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
1399
                     build_int_cst (TREE_TYPE (*first_niters), 0));
1400
    }
1401
 
1402
  skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1403
                                  cond_expr_stmt_list,
1404
                                  bb_before_second_loop, bb_before_first_loop);
1405
  slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1406
                                      first_loop == new_loop,
1407
                                      &new_exit_bb, &definitions);
1408
 
1409
 
1410
  /* 3. Add the guard that controls whether the second loop is executed.
1411
        Resulting CFG would be:
1412
 
1413
        bb_before_first_loop:
1414
        if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1415
                               GOTO first-loop
1416
 
1417
        first_loop:
1418
        do {
1419
        } while ...
1420
 
1421
        bb_between_loops:
1422
        if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1423
                                    GOTO bb_before_second_loop
1424
 
1425
        bb_before_second_loop:
1426
 
1427
        second_loop:
1428
        do {
1429
        } while ...
1430
 
1431
        bb_after_second_loop:
1432
 
1433
        orig_exit_bb:
1434
   */
1435
 
1436
  bb_between_loops = new_exit_bb;
1437
  bb_after_second_loop = split_edge (single_exit (second_loop));
1438
 
1439
  pre_condition =
1440
        fold_build2 (EQ_EXPR, boolean_type_node, *first_niters, niters);
1441
  skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
1442
                                  bb_after_second_loop, bb_before_first_loop);
1443
  slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1444
                                     second_loop == new_loop, &new_exit_bb);
1445
 
1446
  /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1447
   */
1448
  if (update_first_loop_count)
1449
    slpeel_make_loop_iterate_ntimes (first_loop, *first_niters);
1450
 
1451
  BITMAP_FREE (definitions);
1452
  delete_update_ssa ();
1453
 
1454
  adjust_vec_debug_stmts ();
1455
 
1456
  return new_loop;
1457
}
1458
 
1459
/* Function vect_get_loop_location.
1460
 
1461
   Extract the location of the loop in the source code.
1462
   If the loop is not well formed for vectorization, an estimated
1463
   location is calculated.
1464
   Return the loop location if succeed and NULL if not.  */
1465
 
1466
LOC
1467
find_loop_location (struct loop *loop)
1468
{
1469
  gimple stmt = NULL;
1470
  basic_block bb;
1471
  gimple_stmt_iterator si;
1472
 
1473
  if (!loop)
1474
    return UNKNOWN_LOC;
1475
 
1476
  stmt = get_loop_exit_condition (loop);
1477
 
1478
  if (stmt && gimple_location (stmt) != UNKNOWN_LOC)
1479
    return gimple_location (stmt);
1480
 
1481
  /* If we got here the loop is probably not "well formed",
1482
     try to estimate the loop location */
1483
 
1484
  if (!loop->header)
1485
    return UNKNOWN_LOC;
1486
 
1487
  bb = loop->header;
1488
 
1489
  for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1490
    {
1491
      stmt = gsi_stmt (si);
1492
      if (gimple_location (stmt) != UNKNOWN_LOC)
1493
        return gimple_location (stmt);
1494
    }
1495
 
1496
  return UNKNOWN_LOC;
1497
}
1498
 
1499
 
1500
/* This function builds ni_name = number of iterations loop executes
1501
   on the loop preheader.  If SEQ is given the stmt is instead emitted
1502
   there.  */
1503
 
1504
static tree
1505
vect_build_loop_niters (loop_vec_info loop_vinfo, gimple_seq seq)
1506
{
1507
  tree ni_name, var;
1508
  gimple_seq stmts = NULL;
1509
  edge pe;
1510
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1511
  tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1512
 
1513
  var = create_tmp_var (TREE_TYPE (ni), "niters");
1514
  add_referenced_var (var);
1515
  ni_name = force_gimple_operand (ni, &stmts, false, var);
1516
 
1517
  pe = loop_preheader_edge (loop);
1518
  if (stmts)
1519
    {
1520
      if (seq)
1521
        gimple_seq_add_seq (&seq, stmts);
1522
      else
1523
        {
1524
          basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1525
          gcc_assert (!new_bb);
1526
        }
1527
    }
1528
 
1529
  return ni_name;
1530
}
1531
 
1532
 
1533
/* This function generates the following statements:
1534
 
1535
 ni_name = number of iterations loop executes
1536
 ratio = ni_name / vf
1537
 ratio_mult_vf_name = ratio * vf
1538
 
1539
 and places them at the loop preheader edge or in COND_EXPR_STMT_LIST
1540
 if that is non-NULL.  */
1541
 
1542
static void
1543
vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
1544
                                 tree *ni_name_ptr,
1545
                                 tree *ratio_mult_vf_name_ptr,
1546
                                 tree *ratio_name_ptr,
1547
                                 gimple_seq cond_expr_stmt_list)
1548
{
1549
 
1550
  edge pe;
1551
  basic_block new_bb;
1552
  gimple_seq stmts;
1553
  tree ni_name, ni_minus_gap_name;
1554
  tree var;
1555
  tree ratio_name;
1556
  tree ratio_mult_vf_name;
1557
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1558
  tree ni = LOOP_VINFO_NITERS (loop_vinfo);
1559
  int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1560
  tree log_vf;
1561
 
1562
  pe = loop_preheader_edge (loop);
1563
 
1564
  /* Generate temporary variable that contains
1565
     number of iterations loop executes.  */
1566
 
1567
  ni_name = vect_build_loop_niters (loop_vinfo, cond_expr_stmt_list);
1568
  log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
1569
 
1570
  /* If epilogue loop is required because of data accesses with gaps, we
1571
     subtract one iteration from the total number of iterations here for
1572
     correct calculation of RATIO.  */
1573
  if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1574
    {
1575
      ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
1576
                                       ni_name,
1577
                                       build_one_cst (TREE_TYPE (ni_name)));
1578
      if (!is_gimple_val (ni_minus_gap_name))
1579
        {
1580
          var = create_tmp_var (TREE_TYPE (ni), "ni_gap");
1581
          add_referenced_var (var);
1582
 
1583
          stmts = NULL;
1584
          ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
1585
                                                    true, var);
1586
          if (cond_expr_stmt_list)
1587
            gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1588
          else
1589
            {
1590
              pe = loop_preheader_edge (loop);
1591
              new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1592
              gcc_assert (!new_bb);
1593
            }
1594
        }
1595
    }
1596
  else
1597
    ni_minus_gap_name = ni_name;
1598
 
1599
  /* Create: ratio = ni >> log2(vf) */
1600
 
1601
  ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_minus_gap_name),
1602
                            ni_minus_gap_name, log_vf);
1603
  if (!is_gimple_val (ratio_name))
1604
    {
1605
      var = create_tmp_var (TREE_TYPE (ni), "bnd");
1606
      add_referenced_var (var);
1607
 
1608
      stmts = NULL;
1609
      ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
1610
      if (cond_expr_stmt_list)
1611
        gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1612
      else
1613
        {
1614
          pe = loop_preheader_edge (loop);
1615
          new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1616
          gcc_assert (!new_bb);
1617
        }
1618
    }
1619
 
1620
  /* Create: ratio_mult_vf = ratio << log2 (vf).  */
1621
 
1622
  ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
1623
                                    ratio_name, log_vf);
1624
  if (!is_gimple_val (ratio_mult_vf_name))
1625
    {
1626
      var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
1627
      add_referenced_var (var);
1628
 
1629
      stmts = NULL;
1630
      ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
1631
                                                 true, var);
1632
      if (cond_expr_stmt_list)
1633
        gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1634
      else
1635
        {
1636
          pe = loop_preheader_edge (loop);
1637
          new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1638
          gcc_assert (!new_bb);
1639
        }
1640
    }
1641
 
1642
  *ni_name_ptr = ni_name;
1643
  *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
1644
  *ratio_name_ptr = ratio_name;
1645
 
1646
  return;
1647
}
1648
 
1649
/* Function vect_can_advance_ivs_p
1650
 
1651
   In case the number of iterations that LOOP iterates is unknown at compile
1652
   time, an epilog loop will be generated, and the loop induction variables
1653
   (IVs) will be "advanced" to the value they are supposed to take just before
1654
   the epilog loop.  Here we check that the access function of the loop IVs
1655
   and the expression that represents the loop bound are simple enough.
1656
   These restrictions will be relaxed in the future.  */
1657
 
1658
bool
1659
vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1660
{
1661
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1662
  basic_block bb = loop->header;
1663
  gimple phi;
1664
  gimple_stmt_iterator gsi;
1665
 
1666
  /* Analyze phi functions of the loop header.  */
1667
 
1668
  if (vect_print_dump_info (REPORT_DETAILS))
1669
    fprintf (vect_dump, "vect_can_advance_ivs_p:");
1670
 
1671
  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1672
    {
1673
      tree access_fn = NULL;
1674
      tree evolution_part;
1675
 
1676
      phi = gsi_stmt (gsi);
1677
      if (vect_print_dump_info (REPORT_DETAILS))
1678
        {
1679
          fprintf (vect_dump, "Analyze phi: ");
1680
          print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1681
        }
1682
 
1683
      /* Skip virtual phi's. The data dependences that are associated with
1684
         virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
1685
 
1686
      if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1687
        {
1688
          if (vect_print_dump_info (REPORT_DETAILS))
1689
            fprintf (vect_dump, "virtual phi. skip.");
1690
          continue;
1691
        }
1692
 
1693
      /* Skip reduction phis.  */
1694
 
1695
      if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1696
        {
1697
          if (vect_print_dump_info (REPORT_DETAILS))
1698
            fprintf (vect_dump, "reduc phi. skip.");
1699
          continue;
1700
        }
1701
 
1702
      /* Analyze the evolution function.  */
1703
 
1704
      access_fn = instantiate_parameters
1705
        (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1706
 
1707
      if (!access_fn)
1708
        {
1709
          if (vect_print_dump_info (REPORT_DETAILS))
1710
            fprintf (vect_dump, "No Access function.");
1711
          return false;
1712
        }
1713
 
1714
      if (vect_print_dump_info (REPORT_DETAILS))
1715
        {
1716
          fprintf (vect_dump, "Access function of PHI: ");
1717
          print_generic_expr (vect_dump, access_fn, TDF_SLIM);
1718
        }
1719
 
1720
      evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1721
 
1722
      if (evolution_part == NULL_TREE)
1723
        {
1724
          if (vect_print_dump_info (REPORT_DETAILS))
1725
            fprintf (vect_dump, "No evolution.");
1726
          return false;
1727
        }
1728
 
1729
      /* FORNOW: We do not transform initial conditions of IVs
1730
         which evolution functions are a polynomial of degree >= 2.  */
1731
 
1732
      if (tree_is_chrec (evolution_part))
1733
        return false;
1734
    }
1735
 
1736
  return true;
1737
}
1738
 
1739
 
1740
/*   Function vect_update_ivs_after_vectorizer.
1741
 
1742
     "Advance" the induction variables of LOOP to the value they should take
1743
     after the execution of LOOP.  This is currently necessary because the
1744
     vectorizer does not handle induction variables that are used after the
1745
     loop.  Such a situation occurs when the last iterations of LOOP are
1746
     peeled, because:
1747
     1. We introduced new uses after LOOP for IVs that were not originally used
1748
        after LOOP: the IVs of LOOP are now used by an epilog loop.
1749
     2. LOOP is going to be vectorized; this means that it will iterate N/VF
1750
        times, whereas the loop IVs should be bumped N times.
1751
 
1752
     Input:
1753
     - LOOP - a loop that is going to be vectorized. The last few iterations
1754
              of LOOP were peeled.
1755
     - NITERS - the number of iterations that LOOP executes (before it is
1756
                vectorized). i.e, the number of times the ivs should be bumped.
1757
     - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1758
                  coming out from LOOP on which there are uses of the LOOP ivs
1759
                  (this is the path from LOOP->exit to epilog_loop->preheader).
1760
 
1761
                  The new definitions of the ivs are placed in LOOP->exit.
1762
                  The phi args associated with the edge UPDATE_E in the bb
1763
                  UPDATE_E->dest are updated accordingly.
1764
 
1765
     Assumption 1: Like the rest of the vectorizer, this function assumes
1766
     a single loop exit that has a single predecessor.
1767
 
1768
     Assumption 2: The phi nodes in the LOOP header and in update_bb are
1769
     organized in the same order.
1770
 
1771
     Assumption 3: The access function of the ivs is simple enough (see
1772
     vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
1773
 
1774
     Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1775
     coming out of LOOP on which the ivs of LOOP are used (this is the path
1776
     that leads to the epilog loop; other paths skip the epilog loop).  This
1777
     path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1778
     needs to have its phis updated.
1779
 */
1780
 
1781
static void
1782
vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
1783
                                  edge update_e)
1784
{
1785
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1786
  basic_block exit_bb = single_exit (loop)->dest;
1787
  gimple phi, phi1;
1788
  gimple_stmt_iterator gsi, gsi1;
1789
  basic_block update_bb = update_e->dest;
1790
 
1791
  /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
1792
 
1793
  /* Make sure there exists a single-predecessor exit bb:  */
1794
  gcc_assert (single_pred_p (exit_bb));
1795
 
1796
  for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1797
       !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1798
       gsi_next (&gsi), gsi_next (&gsi1))
1799
    {
1800
      tree access_fn = NULL;
1801
      tree evolution_part;
1802
      tree init_expr;
1803
      tree step_expr, off;
1804
      tree type;
1805
      tree var, ni, ni_name;
1806
      gimple_stmt_iterator last_gsi;
1807
 
1808
      phi = gsi_stmt (gsi);
1809
      phi1 = gsi_stmt (gsi1);
1810
      if (vect_print_dump_info (REPORT_DETAILS))
1811
        {
1812
          fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
1813
          print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1814
        }
1815
 
1816
      /* Skip virtual phi's.  */
1817
      if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1818
        {
1819
          if (vect_print_dump_info (REPORT_DETAILS))
1820
            fprintf (vect_dump, "virtual phi. skip.");
1821
          continue;
1822
        }
1823
 
1824
      /* Skip reduction phis.  */
1825
      if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1826
        {
1827
          if (vect_print_dump_info (REPORT_DETAILS))
1828
            fprintf (vect_dump, "reduc phi. skip.");
1829
          continue;
1830
        }
1831
 
1832
      access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
1833
      gcc_assert (access_fn);
1834
      /* We can end up with an access_fn like
1835
           (short int) {(short unsigned int) i_49, +, 1}_1
1836
         for further analysis we need to strip the outer cast but we
1837
         need to preserve the original type.  */
1838
      type = TREE_TYPE (access_fn);
1839
      STRIP_NOPS (access_fn);
1840
      evolution_part =
1841
         unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
1842
      gcc_assert (evolution_part != NULL_TREE);
1843
 
1844
      /* FORNOW: We do not support IVs whose evolution function is a polynomial
1845
         of degree >= 2 or exponential.  */
1846
      gcc_assert (!tree_is_chrec (evolution_part));
1847
 
1848
      step_expr = evolution_part;
1849
      init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
1850
                                                               loop->num));
1851
      init_expr = fold_convert (type, init_expr);
1852
 
1853
      off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
1854
                         fold_convert (TREE_TYPE (step_expr), niters),
1855
                         step_expr);
1856
      if (POINTER_TYPE_P (TREE_TYPE (init_expr)))
1857
        ni = fold_build_pointer_plus (init_expr, off);
1858
      else
1859
        ni = fold_build2 (PLUS_EXPR, TREE_TYPE (init_expr),
1860
                          init_expr,
1861
                          fold_convert (TREE_TYPE (init_expr), off));
1862
 
1863
      var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
1864
      add_referenced_var (var);
1865
 
1866
      last_gsi = gsi_last_bb (exit_bb);
1867
      ni_name = force_gimple_operand_gsi (&last_gsi, ni, false, var,
1868
                                          true, GSI_SAME_STMT);
1869
 
1870
      /* Fix phi expressions in the successor bb.  */
1871
      adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
1872
    }
1873
}
1874
 
1875
/* Return the more conservative threshold between the
1876
   min_profitable_iters returned by the cost model and the user
1877
   specified threshold, if provided.  */
1878
 
1879
static unsigned int
1880
conservative_cost_threshold (loop_vec_info loop_vinfo,
1881
                             int min_profitable_iters)
1882
{
1883
  unsigned int th;
1884
  int min_scalar_loop_bound;
1885
 
1886
  min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
1887
                            * LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 1);
1888
 
1889
  /* Use the cost model only if it is more conservative than user specified
1890
     threshold.  */
1891
  th = (unsigned) min_scalar_loop_bound;
1892
  if (min_profitable_iters
1893
      && (!min_scalar_loop_bound
1894
          || min_profitable_iters > min_scalar_loop_bound))
1895
    th = (unsigned) min_profitable_iters;
1896
 
1897
  if (th && vect_print_dump_info (REPORT_COST))
1898
    fprintf (vect_dump, "Profitability threshold is %u loop iterations.", th);
1899
 
1900
  return th;
1901
}
1902
 
1903
/* Function vect_do_peeling_for_loop_bound
1904
 
1905
   Peel the last iterations of the loop represented by LOOP_VINFO.
1906
   The peeled iterations form a new epilog loop.  Given that the loop now
1907
   iterates NITERS times, the new epilog loop iterates
1908
   NITERS % VECTORIZATION_FACTOR times.
1909
 
1910
   The original loop will later be made to iterate
1911
   NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
1912
 
1913
   COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
1914
   test.  */
1915
 
1916
void
1917
vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
1918
                                tree cond_expr, gimple_seq cond_expr_stmt_list)
1919
{
1920
  tree ni_name, ratio_mult_vf_name;
1921
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1922
  struct loop *new_loop;
1923
  edge update_e;
1924
  basic_block preheader;
1925
  int loop_num;
1926
  bool check_profitability = false;
1927
  unsigned int th = 0;
1928
  int min_profitable_iters;
1929
 
1930
  if (vect_print_dump_info (REPORT_DETAILS))
1931
    fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
1932
 
1933
  initialize_original_copy_tables ();
1934
 
1935
  /* Generate the following variables on the preheader of original loop:
1936
 
1937
     ni_name = number of iteration the original loop executes
1938
     ratio = ni_name / vf
1939
     ratio_mult_vf_name = ratio * vf  */
1940
  vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
1941
                                   &ratio_mult_vf_name, ratio,
1942
                                   cond_expr_stmt_list);
1943
 
1944
  loop_num  = loop->num;
1945
 
1946
  /* If cost model check not done during versioning and
1947
     peeling for alignment.  */
1948
  if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
1949
      && !LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)
1950
      && !LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)
1951
      && !cond_expr)
1952
    {
1953
      check_profitability = true;
1954
 
1955
      /* Get profitability threshold for vectorized loop.  */
1956
      min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
1957
 
1958
      th = conservative_cost_threshold (loop_vinfo,
1959
                                        min_profitable_iters);
1960
    }
1961
 
1962
  new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
1963
                                            &ratio_mult_vf_name, ni_name, false,
1964
                                            th, check_profitability,
1965
                                            cond_expr, cond_expr_stmt_list);
1966
  gcc_assert (new_loop);
1967
  gcc_assert (loop_num == loop->num);
1968
#ifdef ENABLE_CHECKING
1969
  slpeel_verify_cfg_after_peeling (loop, new_loop);
1970
#endif
1971
 
1972
  /* A guard that controls whether the new_loop is to be executed or skipped
1973
     is placed in LOOP->exit.  LOOP->exit therefore has two successors - one
1974
     is the preheader of NEW_LOOP, where the IVs from LOOP are used.  The other
1975
     is a bb after NEW_LOOP, where these IVs are not used.  Find the edge that
1976
     is on the path where the LOOP IVs are used and need to be updated.  */
1977
 
1978
  preheader = loop_preheader_edge (new_loop)->src;
1979
  if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
1980
    update_e = EDGE_PRED (preheader, 0);
1981
  else
1982
    update_e = EDGE_PRED (preheader, 1);
1983
 
1984
  /* Update IVs of original loop as if they were advanced
1985
     by ratio_mult_vf_name steps.  */
1986
  vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
1987
 
1988
  /* After peeling we have to reset scalar evolution analyzer.  */
1989
  scev_reset ();
1990
 
1991
  free_original_copy_tables ();
1992
}
1993
 
1994
 
1995
/* Function vect_gen_niters_for_prolog_loop
1996
 
1997
   Set the number of iterations for the loop represented by LOOP_VINFO
1998
   to the minimum between LOOP_NITERS (the original iteration count of the loop)
1999
   and the misalignment of DR - the data reference recorded in
2000
   LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).  As a result, after the execution of
2001
   this loop, the data reference DR will refer to an aligned location.
2002
 
2003
   The following computation is generated:
2004
 
2005
   If the misalignment of DR is known at compile time:
2006
     addr_mis = int mis = DR_MISALIGNMENT (dr);
2007
   Else, compute address misalignment in bytes:
2008
     addr_mis = addr & (vectype_size - 1)
2009
 
2010
   prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
2011
 
2012
   (elem_size = element type size; an element is the scalar element whose type
2013
   is the inner type of the vectype)
2014
 
2015
   When the step of the data-ref in the loop is not 1 (as in interleaved data
2016
   and SLP), the number of iterations of the prolog must be divided by the step
2017
   (which is equal to the size of interleaved group).
2018
 
2019
   The above formulas assume that VF == number of elements in the vector. This
2020
   may not hold when there are multiple-types in the loop.
2021
   In this case, for some data-references in the loop the VF does not represent
2022
   the number of elements that fit in the vector.  Therefore, instead of VF we
2023
   use TYPE_VECTOR_SUBPARTS.  */
2024
 
2025
static tree
2026
vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
2027
{
2028
  struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2029
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2030
  tree var;
2031
  gimple_seq stmts;
2032
  tree iters, iters_name;
2033
  edge pe;
2034
  basic_block new_bb;
2035
  gimple dr_stmt = DR_STMT (dr);
2036
  stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
2037
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2038
  int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
2039
  tree niters_type = TREE_TYPE (loop_niters);
2040
  int nelements = TYPE_VECTOR_SUBPARTS (vectype);
2041
 
2042
  pe = loop_preheader_edge (loop);
2043
 
2044
  if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
2045
    {
2046
      int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
2047
 
2048
      if (vect_print_dump_info (REPORT_DETAILS))
2049
        fprintf (vect_dump, "known peeling = %d.", npeel);
2050
 
2051
      iters = build_int_cst (niters_type, npeel);
2052
    }
2053
  else
2054
    {
2055
      gimple_seq new_stmts = NULL;
2056
      bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
2057
      tree offset = negative
2058
          ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
2059
      tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
2060
                                                &new_stmts, offset, loop);
2061
      tree ptr_type = TREE_TYPE (start_addr);
2062
      tree size = TYPE_SIZE (ptr_type);
2063
      tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
2064
      tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
2065
      tree elem_size_log =
2066
        build_int_cst (type, exact_log2 (vectype_align/nelements));
2067
      tree nelements_minus_1 = build_int_cst (type, nelements - 1);
2068
      tree nelements_tree = build_int_cst (type, nelements);
2069
      tree byte_misalign;
2070
      tree elem_misalign;
2071
 
2072
      new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts);
2073
      gcc_assert (!new_bb);
2074
 
2075
      /* Create:  byte_misalign = addr & (vectype_size - 1)  */
2076
      byte_misalign =
2077
        fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
2078
                     vectype_size_minus_1);
2079
 
2080
      /* Create:  elem_misalign = byte_misalign / element_size  */
2081
      elem_misalign =
2082
        fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
2083
 
2084
      /* Create:  (niters_type) (nelements - elem_misalign)&(nelements - 1)  */
2085
      if (negative)
2086
        iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
2087
      else
2088
        iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
2089
      iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
2090
      iters = fold_convert (niters_type, iters);
2091
    }
2092
 
2093
  /* Create:  prolog_loop_niters = min (iters, loop_niters) */
2094
  /* If the loop bound is known at compile time we already verified that it is
2095
     greater than vf; since the misalignment ('iters') is at most vf, there's
2096
     no need to generate the MIN_EXPR in this case.  */
2097
  if (TREE_CODE (loop_niters) != INTEGER_CST)
2098
    iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
2099
 
2100
  if (vect_print_dump_info (REPORT_DETAILS))
2101
    {
2102
      fprintf (vect_dump, "niters for prolog loop: ");
2103
      print_generic_expr (vect_dump, iters, TDF_SLIM);
2104
    }
2105
 
2106
  var = create_tmp_var (niters_type, "prolog_loop_niters");
2107
  add_referenced_var (var);
2108
  stmts = NULL;
2109
  iters_name = force_gimple_operand (iters, &stmts, false, var);
2110
 
2111
  /* Insert stmt on loop preheader edge.  */
2112
  if (stmts)
2113
    {
2114
      basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2115
      gcc_assert (!new_bb);
2116
    }
2117
 
2118
  return iters_name;
2119
}
2120
 
2121
 
2122
/* Function vect_update_init_of_dr
2123
 
2124
   NITERS iterations were peeled from LOOP.  DR represents a data reference
2125
   in LOOP.  This function updates the information recorded in DR to
2126
   account for the fact that the first NITERS iterations had already been
2127
   executed.  Specifically, it updates the OFFSET field of DR.  */
2128
 
2129
static void
2130
vect_update_init_of_dr (struct data_reference *dr, tree niters)
2131
{
2132
  tree offset = DR_OFFSET (dr);
2133
 
2134
  niters = fold_build2 (MULT_EXPR, sizetype,
2135
                        fold_convert (sizetype, niters),
2136
                        fold_convert (sizetype, DR_STEP (dr)));
2137
  offset = fold_build2 (PLUS_EXPR, sizetype,
2138
                        fold_convert (sizetype, offset), niters);
2139
  DR_OFFSET (dr) = offset;
2140
}
2141
 
2142
 
2143
/* Function vect_update_inits_of_drs
2144
 
2145
   NITERS iterations were peeled from the loop represented by LOOP_VINFO.
2146
   This function updates the information recorded for the data references in
2147
   the loop to account for the fact that the first NITERS iterations had
2148
   already been executed.  Specifically, it updates the initial_condition of
2149
   the access_function of all the data_references in the loop.  */
2150
 
2151
static void
2152
vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
2153
{
2154
  unsigned int i;
2155
  VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2156
  struct data_reference *dr;
2157
 
2158
  if (vect_print_dump_info (REPORT_DETAILS))
2159
    fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
2160
 
2161
  FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2162
    vect_update_init_of_dr (dr, niters);
2163
}
2164
 
2165
 
2166
/* Function vect_do_peeling_for_alignment
2167
 
2168
   Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
2169
   'niters' is set to the misalignment of one of the data references in the
2170
   loop, thereby forcing it to refer to an aligned location at the beginning
2171
   of the execution of this loop.  The data reference for which we are
2172
   peeling is recorded in LOOP_VINFO_UNALIGNED_DR.  */
2173
 
2174
void
2175
vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
2176
{
2177
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2178
  tree niters_of_prolog_loop, ni_name;
2179
  tree n_iters;
2180
  tree wide_prolog_niters;
2181
  struct loop *new_loop;
2182
  unsigned int th = 0;
2183
  int min_profitable_iters;
2184
 
2185
  if (vect_print_dump_info (REPORT_DETAILS))
2186
    fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
2187
 
2188
  initialize_original_copy_tables ();
2189
 
2190
  ni_name = vect_build_loop_niters (loop_vinfo, NULL);
2191
  niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo,
2192
                                                           ni_name);
2193
 
2194
  /* Get profitability threshold for vectorized loop.  */
2195
  min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
2196
  th = conservative_cost_threshold (loop_vinfo,
2197
                                    min_profitable_iters);
2198
 
2199
  /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
2200
  new_loop =
2201
    slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
2202
                                   &niters_of_prolog_loop, ni_name, true,
2203
                                   th, true, NULL_TREE, NULL);
2204
 
2205
  gcc_assert (new_loop);
2206
#ifdef ENABLE_CHECKING
2207
  slpeel_verify_cfg_after_peeling (new_loop, loop);
2208
#endif
2209
 
2210
  /* Update number of times loop executes.  */
2211
  n_iters = LOOP_VINFO_NITERS (loop_vinfo);
2212
  LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2213
                TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
2214
 
2215
  if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop)))
2216
    wide_prolog_niters = niters_of_prolog_loop;
2217
  else
2218
    {
2219
      gimple_seq seq = NULL;
2220
      edge pe = loop_preheader_edge (loop);
2221
      tree wide_iters = fold_convert (sizetype, niters_of_prolog_loop);
2222
      tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
2223
      add_referenced_var (var);
2224
      wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false,
2225
                                                 var);
2226
      if (seq)
2227
        {
2228
          /* Insert stmt on loop preheader edge.  */
2229
          basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2230
          gcc_assert (!new_bb);
2231
        }
2232
    }
2233
 
2234
  /* Update the init conditions of the access functions of all data refs.  */
2235
  vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters);
2236
 
2237
  /* After peeling we have to reset scalar evolution analyzer.  */
2238
  scev_reset ();
2239
 
2240
  free_original_copy_tables ();
2241
}
2242
 
2243
 
2244
/* Function vect_create_cond_for_align_checks.
2245
 
2246
   Create a conditional expression that represents the alignment checks for
2247
   all of data references (array element references) whose alignment must be
2248
   checked at runtime.
2249
 
2250
   Input:
2251
   COND_EXPR  - input conditional expression.  New conditions will be chained
2252
                with logical AND operation.
2253
   LOOP_VINFO - two fields of the loop information are used.
2254
                LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2255
                LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2256
 
2257
   Output:
2258
   COND_EXPR_STMT_LIST - statements needed to construct the conditional
2259
                         expression.
2260
   The returned value is the conditional expression to be used in the if
2261
   statement that controls which version of the loop gets executed at runtime.
2262
 
2263
   The algorithm makes two assumptions:
2264
     1) The number of bytes "n" in a vector is a power of 2.
2265
     2) An address "a" is aligned if a%n is zero and that this
2266
        test can be done as a&(n-1) == 0.  For example, for 16
2267
        byte vectors the test is a&0xf == 0.  */
2268
 
2269
static void
2270
vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2271
                                   tree *cond_expr,
2272
                                   gimple_seq *cond_expr_stmt_list)
2273
{
2274
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2275
  VEC(gimple,heap) *may_misalign_stmts
2276
    = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2277
  gimple ref_stmt;
2278
  int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2279
  tree mask_cst;
2280
  unsigned int i;
2281
  tree psize;
2282
  tree int_ptrsize_type;
2283
  char tmp_name[20];
2284
  tree or_tmp_name = NULL_TREE;
2285
  tree and_tmp, and_tmp_name;
2286
  gimple and_stmt;
2287
  tree ptrsize_zero;
2288
  tree part_cond_expr;
2289
 
2290
  /* Check that mask is one less than a power of 2, i.e., mask is
2291
     all zeros followed by all ones.  */
2292
  gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2293
 
2294
  /* CHECKME: what is the best integer or unsigned type to use to hold a
2295
     cast from a pointer value?  */
2296
  psize = TYPE_SIZE (ptr_type_node);
2297
  int_ptrsize_type
2298
    = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
2299
 
2300
  /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2301
     of the first vector of the i'th data reference. */
2302
 
2303
  FOR_EACH_VEC_ELT (gimple, may_misalign_stmts, i, ref_stmt)
2304
    {
2305
      gimple_seq new_stmt_list = NULL;
2306
      tree addr_base;
2307
      tree addr_tmp, addr_tmp_name;
2308
      tree or_tmp, new_or_tmp_name;
2309
      gimple addr_stmt, or_stmt;
2310
      stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
2311
      tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2312
      bool negative = tree_int_cst_compare
2313
        (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
2314
      tree offset = negative
2315
        ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
2316
 
2317
      /* create: addr_tmp = (int)(address_of_first_vector) */
2318
      addr_base =
2319
        vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2320
                                              offset, loop);
2321
      if (new_stmt_list != NULL)
2322
        gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2323
 
2324
      sprintf (tmp_name, "%s%d", "addr2int", i);
2325
      addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2326
      add_referenced_var (addr_tmp);
2327
      addr_tmp_name = make_ssa_name (addr_tmp, NULL);
2328
      addr_stmt = gimple_build_assign_with_ops (NOP_EXPR, addr_tmp_name,
2329
                                                addr_base, NULL_TREE);
2330
      SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
2331
      gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2332
 
2333
      /* The addresses are OR together.  */
2334
 
2335
      if (or_tmp_name != NULL_TREE)
2336
        {
2337
          /* create: or_tmp = or_tmp | addr_tmp */
2338
          sprintf (tmp_name, "%s%d", "orptrs", i);
2339
          or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2340
          add_referenced_var (or_tmp);
2341
          new_or_tmp_name = make_ssa_name (or_tmp, NULL);
2342
          or_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR,
2343
                                                  new_or_tmp_name,
2344
                                                  or_tmp_name, addr_tmp_name);
2345
          SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
2346
          gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2347
          or_tmp_name = new_or_tmp_name;
2348
        }
2349
      else
2350
        or_tmp_name = addr_tmp_name;
2351
 
2352
    } /* end for i */
2353
 
2354
  mask_cst = build_int_cst (int_ptrsize_type, mask);
2355
 
2356
  /* create: and_tmp = or_tmp & mask  */
2357
  and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
2358
  add_referenced_var (and_tmp);
2359
  and_tmp_name = make_ssa_name (and_tmp, NULL);
2360
 
2361
  and_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, and_tmp_name,
2362
                                           or_tmp_name, mask_cst);
2363
  SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
2364
  gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2365
 
2366
  /* Make and_tmp the left operand of the conditional test against zero.
2367
     if and_tmp has a nonzero bit then some address is unaligned.  */
2368
  ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2369
  part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2370
                                and_tmp_name, ptrsize_zero);
2371
  if (*cond_expr)
2372
    *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2373
                              *cond_expr, part_cond_expr);
2374
  else
2375
    *cond_expr = part_cond_expr;
2376
}
2377
 
2378
 
2379
/* Function vect_vfa_segment_size.
2380
 
2381
   Create an expression that computes the size of segment
2382
   that will be accessed for a data reference.  The functions takes into
2383
   account that realignment loads may access one more vector.
2384
 
2385
   Input:
2386
     DR: The data reference.
2387
     LENGTH_FACTOR: segment length to consider.
2388
 
2389
   Return an expression whose value is the size of segment which will be
2390
   accessed by DR.  */
2391
 
2392
static tree
2393
vect_vfa_segment_size (struct data_reference *dr, tree length_factor)
2394
{
2395
  tree segment_length;
2396
 
2397
  if (!compare_tree_int (DR_STEP (dr), 0))
2398
    segment_length = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2399
  else
2400
    segment_length = size_binop (MULT_EXPR,
2401
                                 fold_convert (sizetype, DR_STEP (dr)),
2402
                                 fold_convert (sizetype, length_factor));
2403
 
2404
  if (vect_supportable_dr_alignment (dr, false)
2405
        == dr_explicit_realign_optimized)
2406
    {
2407
      tree vector_size = TYPE_SIZE_UNIT
2408
                          (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
2409
 
2410
      segment_length = size_binop (PLUS_EXPR, segment_length, vector_size);
2411
    }
2412
  return segment_length;
2413
}
2414
 
2415
 
2416
/* Function vect_create_cond_for_alias_checks.
2417
 
2418
   Create a conditional expression that represents the run-time checks for
2419
   overlapping of address ranges represented by a list of data references
2420
   relations passed as input.
2421
 
2422
   Input:
2423
   COND_EXPR  - input conditional expression.  New conditions will be chained
2424
                with logical AND operation.
2425
   LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2426
                to be checked.
2427
 
2428
   Output:
2429
   COND_EXPR - conditional expression.
2430
   COND_EXPR_STMT_LIST - statements needed to construct the conditional
2431
                         expression.
2432
 
2433
 
2434
   The returned value is the conditional expression to be used in the if
2435
   statement that controls which version of the loop gets executed at runtime.
2436
*/
2437
 
2438
static void
2439
vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
2440
                                   tree * cond_expr,
2441
                                   gimple_seq * cond_expr_stmt_list)
2442
{
2443
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2444
  VEC (ddr_p, heap) * may_alias_ddrs =
2445
    LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2446
  int vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2447
  tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2448
 
2449
  ddr_p ddr;
2450
  unsigned int i;
2451
  tree part_cond_expr, length_factor;
2452
 
2453
  /* Create expression
2454
     ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
2455
     || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
2456
     &&
2457
     ...
2458
     &&
2459
     ((store_ptr_n + store_segment_length_n) <= load_ptr_n)
2460
     || (load_ptr_n + load_segment_length_n) <= store_ptr_n))  */
2461
 
2462
  if (VEC_empty (ddr_p, may_alias_ddrs))
2463
    return;
2464
 
2465
  FOR_EACH_VEC_ELT (ddr_p, may_alias_ddrs, i, ddr)
2466
    {
2467
      struct data_reference *dr_a, *dr_b;
2468
      gimple dr_group_first_a, dr_group_first_b;
2469
      tree addr_base_a, addr_base_b;
2470
      tree segment_length_a, segment_length_b;
2471
      gimple stmt_a, stmt_b;
2472
      tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2473
 
2474
      dr_a = DDR_A (ddr);
2475
      stmt_a = DR_STMT (DDR_A (ddr));
2476
      dr_group_first_a = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_a));
2477
      if (dr_group_first_a)
2478
        {
2479
          stmt_a = dr_group_first_a;
2480
          dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a));
2481
        }
2482
 
2483
      dr_b = DDR_B (ddr);
2484
      stmt_b = DR_STMT (DDR_B (ddr));
2485
      dr_group_first_b = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_b));
2486
      if (dr_group_first_b)
2487
        {
2488
          stmt_b = dr_group_first_b;
2489
          dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b));
2490
        }
2491
 
2492
      addr_base_a =
2493
        vect_create_addr_base_for_vector_ref (stmt_a, cond_expr_stmt_list,
2494
                                              NULL_TREE, loop);
2495
      addr_base_b =
2496
        vect_create_addr_base_for_vector_ref (stmt_b, cond_expr_stmt_list,
2497
                                              NULL_TREE, loop);
2498
 
2499
      if (!operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0))
2500
        length_factor = scalar_loop_iters;
2501
      else
2502
        length_factor = size_int (vect_factor);
2503
      segment_length_a = vect_vfa_segment_size (dr_a, length_factor);
2504
      segment_length_b = vect_vfa_segment_size (dr_b, length_factor);
2505
 
2506
      if (vect_print_dump_info (REPORT_DR_DETAILS))
2507
        {
2508
          fprintf (vect_dump,
2509
                   "create runtime check for data references ");
2510
          print_generic_expr (vect_dump, DR_REF (dr_a), TDF_SLIM);
2511
          fprintf (vect_dump, " and ");
2512
          print_generic_expr (vect_dump, DR_REF (dr_b), TDF_SLIM);
2513
        }
2514
 
2515
      seg_a_min = addr_base_a;
2516
      seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
2517
      if (tree_int_cst_compare (DR_STEP (dr_a), size_zero_node) < 0)
2518
        seg_a_min = seg_a_max, seg_a_max = addr_base_a;
2519
 
2520
      seg_b_min = addr_base_b;
2521
      seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
2522
      if (tree_int_cst_compare (DR_STEP (dr_b), size_zero_node) < 0)
2523
        seg_b_min = seg_b_max, seg_b_max = addr_base_b;
2524
 
2525
      part_cond_expr =
2526
        fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2527
          fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2528
          fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
2529
 
2530
      if (*cond_expr)
2531
        *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2532
                                  *cond_expr, part_cond_expr);
2533
      else
2534
        *cond_expr = part_cond_expr;
2535
    }
2536
 
2537
  if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))
2538
    fprintf (vect_dump, "created %u versioning for alias checks.\n",
2539
             VEC_length (ddr_p, may_alias_ddrs));
2540
}
2541
 
2542
 
2543
/* Function vect_loop_versioning.
2544
 
2545
   If the loop has data references that may or may not be aligned or/and
2546
   has data reference relations whose independence was not proven then
2547
   two versions of the loop need to be generated, one which is vectorized
2548
   and one which isn't.  A test is then generated to control which of the
2549
   loops is executed.  The test checks for the alignment of all of the
2550
   data references that may or may not be aligned.  An additional
2551
   sequence of runtime tests is generated for each pairs of DDRs whose
2552
   independence was not proven.  The vectorized version of loop is
2553
   executed only if both alias and alignment tests are passed.
2554
 
2555
   The test generated to check which version of loop is executed
2556
   is modified to also check for profitability as indicated by the
2557
   cost model initially.
2558
 
2559
   The versioning precondition(s) are placed in *COND_EXPR and
2560
   *COND_EXPR_STMT_LIST.  If DO_VERSIONING is true versioning is
2561
   also performed, otherwise only the conditions are generated.  */
2562
 
2563
void
2564
vect_loop_versioning (loop_vec_info loop_vinfo, bool do_versioning,
2565
                      tree *cond_expr, gimple_seq *cond_expr_stmt_list)
2566
{
2567
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2568
  basic_block condition_bb;
2569
  gimple_stmt_iterator gsi, cond_exp_gsi;
2570
  basic_block merge_bb;
2571
  basic_block new_exit_bb;
2572
  edge new_exit_e, e;
2573
  gimple orig_phi, new_phi;
2574
  tree arg;
2575
  unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2576
  gimple_seq gimplify_stmt_list = NULL;
2577
  tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2578
  int min_profitable_iters = 0;
2579
  unsigned int th;
2580
 
2581
  /* Get profitability threshold for vectorized loop.  */
2582
  min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
2583
 
2584
  th = conservative_cost_threshold (loop_vinfo,
2585
                                    min_profitable_iters);
2586
 
2587
  *cond_expr =
2588
    fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2589
                 build_int_cst (TREE_TYPE (scalar_loop_iters), th));
2590
 
2591
  *cond_expr = force_gimple_operand (*cond_expr, cond_expr_stmt_list,
2592
                                     false, NULL_TREE);
2593
 
2594
  if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2595
      vect_create_cond_for_align_checks (loop_vinfo, cond_expr,
2596
                                         cond_expr_stmt_list);
2597
 
2598
  if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2599
    vect_create_cond_for_alias_checks (loop_vinfo, cond_expr,
2600
                                       cond_expr_stmt_list);
2601
 
2602
  *cond_expr =
2603
    fold_build2 (NE_EXPR, boolean_type_node, *cond_expr, integer_zero_node);
2604
  *cond_expr =
2605
    force_gimple_operand (*cond_expr, &gimplify_stmt_list, true, NULL_TREE);
2606
  gimple_seq_add_seq (cond_expr_stmt_list, gimplify_stmt_list);
2607
 
2608
  /* If we only needed the extra conditions and a new loop copy
2609
     bail out here.  */
2610
  if (!do_versioning)
2611
    return;
2612
 
2613
  initialize_original_copy_tables ();
2614
  loop_version (loop, *cond_expr, &condition_bb,
2615
                prob, prob, REG_BR_PROB_BASE - prob, true);
2616
  free_original_copy_tables();
2617
 
2618
  /* Loop versioning violates an assumption we try to maintain during
2619
     vectorization - that the loop exit block has a single predecessor.
2620
     After versioning, the exit block of both loop versions is the same
2621
     basic block (i.e. it has two predecessors). Just in order to simplify
2622
     following transformations in the vectorizer, we fix this situation
2623
     here by adding a new (empty) block on the exit-edge of the loop,
2624
     with the proper loop-exit phis to maintain loop-closed-form.  */
2625
 
2626
  merge_bb = single_exit (loop)->dest;
2627
  gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
2628
  new_exit_bb = split_edge (single_exit (loop));
2629
  new_exit_e = single_exit (loop);
2630
  e = EDGE_SUCC (new_exit_bb, 0);
2631
 
2632
  for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2633
    {
2634
      orig_phi = gsi_stmt (gsi);
2635
      new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
2636
                                  new_exit_bb);
2637
      arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2638
      add_phi_arg (new_phi, arg, new_exit_e,
2639
                   gimple_phi_arg_location_from_edge (orig_phi, e));
2640
      adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2641
    }
2642
 
2643
  /* End loop-exit-fixes after versioning.  */
2644
 
2645
  update_ssa (TODO_update_ssa);
2646
  if (*cond_expr_stmt_list)
2647
    {
2648
      cond_exp_gsi = gsi_last_bb (condition_bb);
2649
      gsi_insert_seq_before (&cond_exp_gsi, *cond_expr_stmt_list,
2650
                             GSI_SAME_STMT);
2651
      *cond_expr_stmt_list = NULL;
2652
    }
2653
  *cond_expr = NULL_TREE;
2654
}
2655
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.