OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-stable/] [gcc-4.5.1/] [gcc/] [tree-parloops.c] - Blame information for rev 826

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 280 jeremybenn
/* Loop autoparallelization.
2
   Copyright (C) 2006, 2007, 2008, 2009, 2010
3
   Free Software Foundation, Inc.
4
   Contributed by Sebastian Pop <pop@cri.ensmp.fr> and
5
   Zdenek Dvorak <dvorakz@suse.cz>.
6
 
7
This file is part of GCC.
8
 
9
GCC is free software; you can redistribute it and/or modify it under
10
the terms of the GNU General Public License as published by the Free
11
Software Foundation; either version 3, or (at your option) any later
12
version.
13
 
14
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15
WARRANTY; without even the implied warranty of MERCHANTABILITY or
16
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17
for more details.
18
 
19
You should have received a copy of the GNU General Public License
20
along with GCC; see the file COPYING3.  If not see
21
<http://www.gnu.org/licenses/>.  */
22
 
23
#include "config.h"
24
#include "system.h"
25
#include "coretypes.h"
26
#include "tm.h"
27
#include "tree.h"
28
#include "rtl.h"
29
#include "tree-flow.h"
30
#include "cfgloop.h"
31
#include "ggc.h"
32
#include "tree-data-ref.h"
33
#include "diagnostic.h"
34
#include "tree-pass.h"
35
#include "tree-scalar-evolution.h"
36
#include "hashtab.h"
37
#include "langhooks.h"
38
#include "tree-vectorizer.h"
39
 
40
/* This pass tries to distribute iterations of loops into several threads.
41
   The implementation is straightforward -- for each loop we test whether its
42
   iterations are independent, and if it is the case (and some additional
43
   conditions regarding profitability and correctness are satisfied), we
44
   add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
45
   machinery do its job.
46
 
47
   The most of the complexity is in bringing the code into shape expected
48
   by the omp expanders:
49
   -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
50
      variable and that the exit test is at the start of the loop body
51
   -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
52
      variables by accesses through pointers, and breaking up ssa chains
53
      by storing the values incoming to the parallelized loop to a structure
54
      passed to the new function as an argument (something similar is done
55
      in omp gimplification, unfortunately only a small part of the code
56
      can be shared).
57
 
58
   TODO:
59
   -- if there are several parallelizable loops in a function, it may be
60
      possible to generate the threads just once (using synchronization to
61
      ensure that cross-loop dependences are obeyed).
62
   -- handling of common scalar dependence patterns (accumulation, ...)
63
   -- handling of non-innermost loops  */
64
 
65
/*
66
  Reduction handling:
67
  currently we use vect_is_simple_reduction() to detect reduction patterns.
68
  The code transformation will be introduced by an example.
69
 
70
 
71
parloop
72
{
73
  int sum=1;
74
 
75
  for (i = 0; i < N; i++)
76
   {
77
    x[i] = i + 3;
78
    sum+=x[i];
79
   }
80
}
81
 
82
gimple-like code:
83
header_bb:
84
 
85
  # sum_29 = PHI <sum_11(5), 1(3)>
86
  # i_28 = PHI <i_12(5), 0(3)>
87
  D.1795_8 = i_28 + 3;
88
  x[i_28] = D.1795_8;
89
  sum_11 = D.1795_8 + sum_29;
90
  i_12 = i_28 + 1;
91
  if (N_6(D) > i_12)
92
    goto header_bb;
93
 
94
 
95
exit_bb:
96
 
97
  # sum_21 = PHI <sum_11(4)>
98
  printf (&"%d"[0], sum_21);
99
 
100
 
101
after reduction transformation (only relevant parts):
102
 
103
parloop
104
{
105
 
106
....
107
 
108
 
109
  # Storing the initial value given by the user.  #
110
 
111
  .paral_data_store.32.sum.27 = 1;
112
 
113
  #pragma omp parallel num_threads(4)
114
 
115
  #pragma omp for schedule(static)
116
 
117
  # The neutral element corresponding to the particular
118
  reduction's operation, e.g. 0 for PLUS_EXPR,
119
  1 for MULT_EXPR, etc. replaces the user's initial value.  #
120
 
121
  # sum.27_29 = PHI <sum.27_11, 0>
122
 
123
  sum.27_11 = D.1827_8 + sum.27_29;
124
 
125
  GIMPLE_OMP_CONTINUE
126
 
127
  # Adding this reduction phi is done at create_phi_for_local_result() #
128
  # sum.27_56 = PHI <sum.27_11, 0>
129
  GIMPLE_OMP_RETURN
130
 
131
  # Creating the atomic operation is done at
132
  create_call_for_reduction_1()  #
133
 
134
  #pragma omp atomic_load
135
  D.1839_59 = *&.paral_data_load.33_51->reduction.23;
136
  D.1840_60 = sum.27_56 + D.1839_59;
137
  #pragma omp atomic_store (D.1840_60);
138
 
139
  GIMPLE_OMP_RETURN
140
 
141
 # collecting the result after the join of the threads is done at
142
  create_loads_for_reductions().
143
  The value computed by the threads is loaded from the
144
  shared struct.  #
145
 
146
 
147
  .paral_data_load.33_52 = &.paral_data_store.32;
148
  sum_37 =  .paral_data_load.33_52->sum.27;
149
  sum_43 = D.1795_41 + sum_37;
150
 
151
  exit bb:
152
  # sum_21 = PHI <sum_43, sum_26>
153
  printf (&"%d"[0], sum_21);
154
 
155
...
156
 
157
}
158
 
159
*/
160
 
161
/* Minimal number of iterations of a loop that should be executed in each
162
   thread.  */
163
#define MIN_PER_THREAD 100
164
 
165
/* Element of the hashtable, representing a
166
   reduction in the current loop.  */
167
struct reduction_info
168
{
169
  gimple reduc_stmt;            /* reduction statement.  */
170
  gimple reduc_phi;             /* The phi node defining the reduction.  */
171
  enum tree_code reduction_code;/* code for the reduction operation.  */
172
  gimple keep_res;              /* The PHI_RESULT of this phi is the resulting value
173
                                   of the reduction variable when existing the loop. */
174
  tree initial_value;           /* The initial value of the reduction var before entering the loop.  */
175
  tree field;                   /*  the name of the field in the parloop data structure intended for reduction.  */
176
  tree init;                    /* reduction initialization value.  */
177
  gimple new_phi;               /* (helper field) Newly created phi node whose result
178
                                   will be passed to the atomic operation.  Represents
179
                                   the local result each thread computed for the reduction
180
                                   operation.  */
181
};
182
 
183
/* Equality and hash functions for hashtab code.  */
184
 
185
static int
186
reduction_info_eq (const void *aa, const void *bb)
187
{
188
  const struct reduction_info *a = (const struct reduction_info *) aa;
189
  const struct reduction_info *b = (const struct reduction_info *) bb;
190
 
191
  return (a->reduc_phi == b->reduc_phi);
192
}
193
 
194
static hashval_t
195
reduction_info_hash (const void *aa)
196
{
197
  const struct reduction_info *a = (const struct reduction_info *) aa;
198
 
199
  return htab_hash_pointer (a->reduc_phi);
200
}
201
 
202
static struct reduction_info *
203
reduction_phi (htab_t reduction_list, gimple phi)
204
{
205
  struct reduction_info tmpred, *red;
206
 
207
  if (htab_elements (reduction_list) == 0)
208
    return NULL;
209
 
210
  tmpred.reduc_phi = phi;
211
  red = (struct reduction_info *) htab_find (reduction_list, &tmpred);
212
 
213
  return red;
214
}
215
 
216
/* Element of hashtable of names to copy.  */
217
 
218
struct name_to_copy_elt
219
{
220
  unsigned version;     /* The version of the name to copy.  */
221
  tree new_name;        /* The new name used in the copy.  */
222
  tree field;           /* The field of the structure used to pass the
223
                           value.  */
224
};
225
 
226
/* Equality and hash functions for hashtab code.  */
227
 
228
static int
229
name_to_copy_elt_eq (const void *aa, const void *bb)
230
{
231
  const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
232
  const struct name_to_copy_elt *b = (const struct name_to_copy_elt *) bb;
233
 
234
  return a->version == b->version;
235
}
236
 
237
static hashval_t
238
name_to_copy_elt_hash (const void *aa)
239
{
240
  const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
241
 
242
  return (hashval_t) a->version;
243
}
244
 
245
 
246
/* Data dependency analysis. Returns true if the iterations of LOOP
247
   are independent on each other (that is, if we can execute them
248
   in parallel).  */
249
 
250
static bool
251
loop_parallel_p (struct loop *loop)
252
{
253
  VEC (ddr_p, heap) * dependence_relations;
254
  VEC (data_reference_p, heap) *datarefs;
255
  lambda_trans_matrix trans;
256
  bool ret = false;
257
 
258
  if (dump_file && (dump_flags & TDF_DETAILS))
259
  {
260
    fprintf (dump_file, "Considering loop %d\n", loop->num);
261
    if (!loop->inner)
262
      fprintf (dump_file, "loop is innermost\n");
263
    else
264
      fprintf (dump_file, "loop NOT innermost\n");
265
   }
266
 
267
  /* Check for problems with dependences.  If the loop can be reversed,
268
     the iterations are independent.  */
269
  datarefs = VEC_alloc (data_reference_p, heap, 10);
270
  dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
271
  compute_data_dependences_for_loop (loop, true, &datarefs,
272
                                     &dependence_relations);
273
  if (dump_file && (dump_flags & TDF_DETAILS))
274
    dump_data_dependence_relations (dump_file, dependence_relations);
275
 
276
  trans = lambda_trans_matrix_new (1, 1);
277
  LTM_MATRIX (trans)[0][0] = -1;
278
 
279
  if (lambda_transform_legal_p (trans, 1, dependence_relations))
280
    {
281
      ret = true;
282
      if (dump_file && (dump_flags & TDF_DETAILS))
283
        fprintf (dump_file, "  SUCCESS: may be parallelized\n");
284
    }
285
  else if (dump_file && (dump_flags & TDF_DETAILS))
286
    fprintf (dump_file,
287
             "  FAILED: data dependencies exist across iterations\n");
288
 
289
  free_dependence_relations (dependence_relations);
290
  free_data_refs (datarefs);
291
 
292
  return ret;
293
}
294
 
295
/* Return true when LOOP contains basic blocks marked with the
296
   BB_IRREDUCIBLE_LOOP flag.  */
297
 
298
static inline bool
299
loop_has_blocks_with_irreducible_flag (struct loop *loop)
300
{
301
  unsigned i;
302
  basic_block *bbs = get_loop_body_in_dom_order (loop);
303
  bool res = true;
304
 
305
  for (i = 0; i < loop->num_nodes; i++)
306
    if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
307
      goto end;
308
 
309
  res = false;
310
 end:
311
  free (bbs);
312
  return res;
313
}
314
 
315
/* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
316
   The assignment statement is placed on edge ENTRY.  DECL_ADDRESS maps decls
317
   to their addresses that can be reused.  The address of OBJ is known to
318
   be invariant in the whole function.  */
319
 
320
static tree
321
take_address_of (tree obj, tree type, edge entry, htab_t decl_address)
322
{
323
  int uid;
324
  void **dslot;
325
  struct int_tree_map ielt, *nielt;
326
  tree *var_p, name, bvar, addr;
327
  gimple stmt;
328
  gimple_seq stmts;
329
 
330
  /* Since the address of OBJ is invariant, the trees may be shared.
331
     Avoid rewriting unrelated parts of the code.  */
332
  obj = unshare_expr (obj);
333
  for (var_p = &obj;
334
       handled_component_p (*var_p);
335
       var_p = &TREE_OPERAND (*var_p, 0))
336
    continue;
337
  uid = DECL_UID (*var_p);
338
 
339
  ielt.uid = uid;
340
  dslot = htab_find_slot_with_hash (decl_address, &ielt, uid, INSERT);
341
  if (!*dslot)
342
    {
343
      addr = build_addr (*var_p, current_function_decl);
344
      bvar = create_tmp_var (TREE_TYPE (addr), get_name (*var_p));
345
      add_referenced_var (bvar);
346
      stmt = gimple_build_assign (bvar, addr);
347
      name = make_ssa_name (bvar, stmt);
348
      gimple_assign_set_lhs (stmt, name);
349
      gsi_insert_on_edge_immediate (entry, stmt);
350
 
351
      nielt = XNEW (struct int_tree_map);
352
      nielt->uid = uid;
353
      nielt->to = name;
354
      *dslot = nielt;
355
    }
356
  else
357
    name = ((struct int_tree_map *) *dslot)->to;
358
 
359
  if (var_p != &obj)
360
    {
361
      *var_p = build1 (INDIRECT_REF, TREE_TYPE (*var_p), name);
362
      name = force_gimple_operand (build_addr (obj, current_function_decl),
363
                                   &stmts, true, NULL_TREE);
364
      if (!gimple_seq_empty_p (stmts))
365
        gsi_insert_seq_on_edge_immediate (entry, stmts);
366
    }
367
 
368
  if (TREE_TYPE (name) != type)
369
    {
370
      name = force_gimple_operand (fold_convert (type, name), &stmts, true,
371
                                   NULL_TREE);
372
      if (!gimple_seq_empty_p (stmts))
373
        gsi_insert_seq_on_edge_immediate (entry, stmts);
374
    }
375
 
376
  return name;
377
}
378
 
379
/* Callback for htab_traverse.  Create the initialization statement
380
   for reduction described in SLOT, and place it at the preheader of
381
   the loop described in DATA.  */
382
 
383
static int
384
initialize_reductions (void **slot, void *data)
385
{
386
  tree init, c;
387
  tree bvar, type, arg;
388
  edge e;
389
 
390
  struct reduction_info *const reduc = (struct reduction_info *) *slot;
391
  struct loop *loop = (struct loop *) data;
392
 
393
  /* Create initialization in preheader:
394
     reduction_variable = initialization value of reduction.  */
395
 
396
  /* In the phi node at the header, replace the argument coming
397
     from the preheader with the reduction initialization value.  */
398
 
399
  /* Create a new variable to initialize the reduction.  */
400
  type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
401
  bvar = create_tmp_var (type, "reduction");
402
  add_referenced_var (bvar);
403
 
404
  c = build_omp_clause (gimple_location (reduc->reduc_stmt),
405
                        OMP_CLAUSE_REDUCTION);
406
  OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
407
  OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
408
 
409
  init = omp_reduction_init (c, TREE_TYPE (bvar));
410
  reduc->init = init;
411
 
412
  /* Replace the argument representing the initialization value
413
     with the initialization value for the reduction (neutral
414
     element for the particular operation, e.g. 0 for PLUS_EXPR,
415
     1 for MULT_EXPR, etc).
416
     Keep the old value in a new variable "reduction_initial",
417
     that will be taken in consideration after the parallel
418
     computing is done.  */
419
 
420
  e = loop_preheader_edge (loop);
421
  arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
422
  /* Create new variable to hold the initial value.  */
423
 
424
  SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
425
           (reduc->reduc_phi, loop_preheader_edge (loop)), init);
426
  reduc->initial_value = arg;
427
  return 1;
428
}
429
 
430
struct elv_data
431
{
432
  struct walk_stmt_info info;
433
  edge entry;
434
  htab_t decl_address;
435
  bool changed;
436
};
437
 
438
/* Eliminates references to local variables in *TP out of the single
439
   entry single exit region starting at DTA->ENTRY.
440
   DECL_ADDRESS contains addresses of the references that had their
441
   address taken already.  If the expression is changed, CHANGED is
442
   set to true.  Callback for walk_tree.  */
443
 
444
static tree
445
eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
446
{
447
  struct elv_data *const dta = (struct elv_data *) data;
448
  tree t = *tp, var, addr, addr_type, type, obj;
449
 
450
  if (DECL_P (t))
451
    {
452
      *walk_subtrees = 0;
453
 
454
      if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
455
        return NULL_TREE;
456
 
457
      type = TREE_TYPE (t);
458
      addr_type = build_pointer_type (type);
459
      addr = take_address_of (t, addr_type, dta->entry, dta->decl_address);
460
      *tp = build1 (INDIRECT_REF, TREE_TYPE (*tp), addr);
461
 
462
      dta->changed = true;
463
      return NULL_TREE;
464
    }
465
 
466
  if (TREE_CODE (t) == ADDR_EXPR)
467
    {
468
      /* ADDR_EXPR may appear in two contexts:
469
         -- as a gimple operand, when the address taken is a function invariant
470
         -- as gimple rhs, when the resulting address in not a function
471
            invariant
472
         We do not need to do anything special in the latter case (the base of
473
         the memory reference whose address is taken may be replaced in the
474
         DECL_P case).  The former case is more complicated, as we need to
475
         ensure that the new address is still a gimple operand.  Thus, it
476
         is not sufficient to replace just the base of the memory reference --
477
         we need to move the whole computation of the address out of the
478
         loop.  */
479
      if (!is_gimple_val (t))
480
        return NULL_TREE;
481
 
482
      *walk_subtrees = 0;
483
      obj = TREE_OPERAND (t, 0);
484
      var = get_base_address (obj);
485
      if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
486
        return NULL_TREE;
487
 
488
      addr_type = TREE_TYPE (t);
489
      addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address);
490
      *tp = addr;
491
 
492
      dta->changed = true;
493
      return NULL_TREE;
494
    }
495
 
496
  if (!EXPR_P (t))
497
    *walk_subtrees = 0;
498
 
499
  return NULL_TREE;
500
}
501
 
502
/* Moves the references to local variables in STMT out of the single
503
   entry single exit region starting at ENTRY.  DECL_ADDRESS contains
504
   addresses of the references that had their address taken
505
   already.  */
506
 
507
static void
508
eliminate_local_variables_stmt (edge entry, gimple stmt,
509
                                htab_t decl_address)
510
{
511
  struct elv_data dta;
512
 
513
  memset (&dta.info, '\0', sizeof (dta.info));
514
  dta.entry = entry;
515
  dta.decl_address = decl_address;
516
  dta.changed = false;
517
 
518
  if (gimple_debug_bind_p (stmt))
519
    walk_tree (gimple_debug_bind_get_value_ptr (stmt),
520
               eliminate_local_variables_1, &dta.info, NULL);
521
  else
522
    walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
523
 
524
  if (dta.changed)
525
    update_stmt (stmt);
526
}
527
 
528
/* Eliminates the references to local variables from the single entry
529
   single exit region between the ENTRY and EXIT edges.
530
 
531
   This includes:
532
   1) Taking address of a local variable -- these are moved out of the
533
   region (and temporary variable is created to hold the address if
534
   necessary).
535
 
536
   2) Dereferencing a local variable -- these are replaced with indirect
537
   references.  */
538
 
539
static void
540
eliminate_local_variables (edge entry, edge exit)
541
{
542
  basic_block bb;
543
  VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
544
  unsigned i;
545
  gimple_stmt_iterator gsi;
546
  htab_t decl_address = htab_create (10, int_tree_map_hash, int_tree_map_eq,
547
                                     free);
548
  basic_block entry_bb = entry->src;
549
  basic_block exit_bb = exit->dest;
550
 
551
  gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
552
 
553
  for (i = 0; VEC_iterate (basic_block, body, i, bb); i++)
554
    if (bb != entry_bb && bb != exit_bb)
555
      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
556
        eliminate_local_variables_stmt (entry, gsi_stmt (gsi),
557
                                        decl_address);
558
 
559
  htab_delete (decl_address);
560
  VEC_free (basic_block, heap, body);
561
}
562
 
563
/* Returns true if expression EXPR is not defined between ENTRY and
564
   EXIT, i.e. if all its operands are defined outside of the region.  */
565
 
566
static bool
567
expr_invariant_in_region_p (edge entry, edge exit, tree expr)
568
{
569
  basic_block entry_bb = entry->src;
570
  basic_block exit_bb = exit->dest;
571
  basic_block def_bb;
572
 
573
  if (is_gimple_min_invariant (expr))
574
    return true;
575
 
576
  if (TREE_CODE (expr) == SSA_NAME)
577
    {
578
      def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
579
      if (def_bb
580
          && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
581
          && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
582
        return false;
583
 
584
      return true;
585
    }
586
 
587
  return false;
588
}
589
 
590
/* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
591
   The copies are stored to NAME_COPIES, if NAME was already duplicated,
592
   its duplicate stored in NAME_COPIES is returned.
593
 
594
   Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
595
   duplicated, storing the copies in DECL_COPIES.  */
596
 
597
static tree
598
separate_decls_in_region_name (tree name,
599
                               htab_t name_copies, htab_t decl_copies,
600
                               bool copy_name_p)
601
{
602
  tree copy, var, var_copy;
603
  unsigned idx, uid, nuid;
604
  struct int_tree_map ielt, *nielt;
605
  struct name_to_copy_elt elt, *nelt;
606
  void **slot, **dslot;
607
 
608
  if (TREE_CODE (name) != SSA_NAME)
609
    return name;
610
 
611
  idx = SSA_NAME_VERSION (name);
612
  elt.version = idx;
613
  slot = htab_find_slot_with_hash (name_copies, &elt, idx,
614
                                   copy_name_p ? INSERT : NO_INSERT);
615
  if (slot && *slot)
616
    return ((struct name_to_copy_elt *) *slot)->new_name;
617
 
618
  var = SSA_NAME_VAR (name);
619
  uid = DECL_UID (var);
620
  ielt.uid = uid;
621
  dslot = htab_find_slot_with_hash (decl_copies, &ielt, uid, INSERT);
622
  if (!*dslot)
623
    {
624
      var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
625
      DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
626
      add_referenced_var (var_copy);
627
      nielt = XNEW (struct int_tree_map);
628
      nielt->uid = uid;
629
      nielt->to = var_copy;
630
      *dslot = nielt;
631
 
632
      /* Ensure that when we meet this decl next time, we won't duplicate
633
         it again.  */
634
      nuid = DECL_UID (var_copy);
635
      ielt.uid = nuid;
636
      dslot = htab_find_slot_with_hash (decl_copies, &ielt, nuid, INSERT);
637
      gcc_assert (!*dslot);
638
      nielt = XNEW (struct int_tree_map);
639
      nielt->uid = nuid;
640
      nielt->to = var_copy;
641
      *dslot = nielt;
642
    }
643
  else
644
    var_copy = ((struct int_tree_map *) *dslot)->to;
645
 
646
  if (copy_name_p)
647
    {
648
      copy = duplicate_ssa_name (name, NULL);
649
      nelt = XNEW (struct name_to_copy_elt);
650
      nelt->version = idx;
651
      nelt->new_name = copy;
652
      nelt->field = NULL_TREE;
653
      *slot = nelt;
654
    }
655
  else
656
    {
657
      gcc_assert (!slot);
658
      copy = name;
659
    }
660
 
661
  SSA_NAME_VAR (copy) = var_copy;
662
  return copy;
663
}
664
 
665
/* Finds the ssa names used in STMT that are defined outside the
666
   region between ENTRY and EXIT and replaces such ssa names with
667
   their duplicates.  The duplicates are stored to NAME_COPIES.  Base
668
   decls of all ssa names used in STMT (including those defined in
669
   LOOP) are replaced with the new temporary variables; the
670
   replacement decls are stored in DECL_COPIES.  */
671
 
672
static void
673
separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
674
                               htab_t name_copies, htab_t decl_copies)
675
{
676
  use_operand_p use;
677
  def_operand_p def;
678
  ssa_op_iter oi;
679
  tree name, copy;
680
  bool copy_name_p;
681
 
682
  mark_virtual_ops_for_renaming (stmt);
683
 
684
  FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
685
  {
686
    name = DEF_FROM_PTR (def);
687
    gcc_assert (TREE_CODE (name) == SSA_NAME);
688
    copy = separate_decls_in_region_name (name, name_copies, decl_copies,
689
                                          false);
690
    gcc_assert (copy == name);
691
  }
692
 
693
  FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
694
  {
695
    name = USE_FROM_PTR (use);
696
    if (TREE_CODE (name) != SSA_NAME)
697
      continue;
698
 
699
    copy_name_p = expr_invariant_in_region_p (entry, exit, name);
700
    copy = separate_decls_in_region_name (name, name_copies, decl_copies,
701
                                          copy_name_p);
702
    SET_USE (use, copy);
703
  }
704
}
705
 
706
/* Finds the ssa names used in STMT that are defined outside the
707
   region between ENTRY and EXIT and replaces such ssa names with
708
   their duplicates.  The duplicates are stored to NAME_COPIES.  Base
709
   decls of all ssa names used in STMT (including those defined in
710
   LOOP) are replaced with the new temporary variables; the
711
   replacement decls are stored in DECL_COPIES.  */
712
 
713
static bool
714
separate_decls_in_region_debug_bind (gimple stmt,
715
                                     htab_t name_copies, htab_t decl_copies)
716
{
717
  use_operand_p use;
718
  ssa_op_iter oi;
719
  tree var, name;
720
  struct int_tree_map ielt;
721
  struct name_to_copy_elt elt;
722
  void **slot, **dslot;
723
 
724
  var = gimple_debug_bind_get_var (stmt);
725
  if (TREE_CODE (var) == DEBUG_EXPR_DECL)
726
    return true;
727
  gcc_assert (DECL_P (var) && SSA_VAR_P (var));
728
  ielt.uid = DECL_UID (var);
729
  dslot = htab_find_slot_with_hash (decl_copies, &ielt, ielt.uid, NO_INSERT);
730
  if (!dslot)
731
    return true;
732
  gimple_debug_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
733
 
734
  FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
735
  {
736
    name = USE_FROM_PTR (use);
737
    if (TREE_CODE (name) != SSA_NAME)
738
      continue;
739
 
740
    elt.version = SSA_NAME_VERSION (name);
741
    slot = htab_find_slot_with_hash (name_copies, &elt, elt.version, NO_INSERT);
742
    if (!slot)
743
      {
744
        gimple_debug_bind_reset_value (stmt);
745
        update_stmt (stmt);
746
        break;
747
      }
748
 
749
    SET_USE (use, ((struct name_to_copy_elt *) *slot)->new_name);
750
  }
751
 
752
  return false;
753
}
754
 
755
/* Callback for htab_traverse.  Adds a field corresponding to the reduction
756
   specified in SLOT. The type is passed in DATA.  */
757
 
758
static int
759
add_field_for_reduction (void **slot, void *data)
760
{
761
 
762
  struct reduction_info *const red = (struct reduction_info *) *slot;
763
  tree const type = (tree) data;
764
  tree var = SSA_NAME_VAR (gimple_assign_lhs (red->reduc_stmt));
765
  tree field = build_decl (gimple_location (red->reduc_stmt),
766
                           FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
767
 
768
  insert_field_into_struct (type, field);
769
 
770
  red->field = field;
771
 
772
  return 1;
773
}
774
 
775
/* Callback for htab_traverse.  Adds a field corresponding to a ssa name
776
   described in SLOT. The type is passed in DATA.  */
777
 
778
static int
779
add_field_for_name (void **slot, void *data)
780
{
781
  struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
782
  tree type = (tree) data;
783
  tree name = ssa_name (elt->version);
784
  tree var = SSA_NAME_VAR (name);
785
  tree field = build_decl (DECL_SOURCE_LOCATION (var),
786
                           FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
787
 
788
  insert_field_into_struct (type, field);
789
  elt->field = field;
790
 
791
  return 1;
792
}
793
 
794
/* Callback for htab_traverse.  A local result is the intermediate result
795
   computed by a single
796
   thread, or the initial value in case no iteration was executed.
797
   This function creates a phi node reflecting these values.
798
   The phi's result will be stored in NEW_PHI field of the
799
   reduction's data structure.  */
800
 
801
static int
802
create_phi_for_local_result (void **slot, void *data)
803
{
804
  struct reduction_info *const reduc = (struct reduction_info *) *slot;
805
  const struct loop *const loop = (const struct loop *) data;
806
  edge e;
807
  gimple new_phi;
808
  basic_block store_bb;
809
  tree local_res;
810
  source_location locus;
811
 
812
  /* STORE_BB is the block where the phi
813
     should be stored.  It is the destination of the loop exit.
814
     (Find the fallthru edge from GIMPLE_OMP_CONTINUE).  */
815
  store_bb = FALLTHRU_EDGE (loop->latch)->dest;
816
 
817
  /* STORE_BB has two predecessors.  One coming from  the loop
818
     (the reduction's result is computed at the loop),
819
     and another coming from a block preceding the loop,
820
     when no iterations
821
     are executed (the initial value should be taken).  */
822
  if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
823
    e = EDGE_PRED (store_bb, 1);
824
  else
825
    e = EDGE_PRED (store_bb, 0);
826
  local_res
827
    = make_ssa_name (SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt)),
828
                     NULL);
829
  locus = gimple_location (reduc->reduc_stmt);
830
  new_phi = create_phi_node (local_res, store_bb);
831
  SSA_NAME_DEF_STMT (local_res) = new_phi;
832
  add_phi_arg (new_phi, reduc->init, e, locus);
833
  add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
834
               FALLTHRU_EDGE (loop->latch), locus);
835
  reduc->new_phi = new_phi;
836
 
837
  return 1;
838
}
839
 
840
struct clsn_data
841
{
842
  tree store;
843
  tree load;
844
 
845
  basic_block store_bb;
846
  basic_block load_bb;
847
};
848
 
849
/* Callback for htab_traverse.  Create an atomic instruction for the
850
   reduction described in SLOT.
851
   DATA annotates the place in memory the atomic operation relates to,
852
   and the basic block it needs to be generated in.  */
853
 
854
static int
855
create_call_for_reduction_1 (void **slot, void *data)
856
{
857
  struct reduction_info *const reduc = (struct reduction_info *) *slot;
858
  struct clsn_data *const clsn_data = (struct clsn_data *) data;
859
  gimple_stmt_iterator gsi;
860
  tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
861
  tree struct_type = TREE_TYPE (TREE_TYPE (clsn_data->load));
862
  tree load_struct;
863
  basic_block bb;
864
  basic_block new_bb;
865
  edge e;
866
  tree t, addr, ref, x;
867
  tree tmp_load, name;
868
  gimple load;
869
 
870
  load_struct = fold_build1 (INDIRECT_REF, struct_type, clsn_data->load);
871
  t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
872
 
873
  addr = build_addr (t, current_function_decl);
874
 
875
  /* Create phi node.  */
876
  bb = clsn_data->load_bb;
877
 
878
  e = split_block (bb, t);
879
  new_bb = e->dest;
880
 
881
  tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)), NULL);
882
  add_referenced_var (tmp_load);
883
  tmp_load = make_ssa_name (tmp_load, NULL);
884
  load = gimple_build_omp_atomic_load (tmp_load, addr);
885
  SSA_NAME_DEF_STMT (tmp_load) = load;
886
  gsi = gsi_start_bb (new_bb);
887
  gsi_insert_after (&gsi, load, GSI_NEW_STMT);
888
 
889
  e = split_block (new_bb, load);
890
  new_bb = e->dest;
891
  gsi = gsi_start_bb (new_bb);
892
  ref = tmp_load;
893
  x = fold_build2 (reduc->reduction_code,
894
                   TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
895
                   PHI_RESULT (reduc->new_phi));
896
 
897
  name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
898
                                   GSI_CONTINUE_LINKING);
899
 
900
  gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
901
  return 1;
902
}
903
 
904
/* Create the atomic operation at the join point of the threads.
905
   REDUCTION_LIST describes the reductions in the LOOP.
906
   LD_ST_DATA describes the shared data structure where
907
   shared data is stored in and loaded from.  */
908
static void
909
create_call_for_reduction (struct loop *loop, htab_t reduction_list,
910
                           struct clsn_data *ld_st_data)
911
{
912
  htab_traverse (reduction_list, create_phi_for_local_result, loop);
913
  /* Find the fallthru edge from GIMPLE_OMP_CONTINUE.  */
914
  ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
915
  htab_traverse (reduction_list, create_call_for_reduction_1, ld_st_data);
916
}
917
 
918
/* Callback for htab_traverse.  Loads the final reduction value at the
919
   join point of all threads, and inserts it in the right place.  */
920
 
921
static int
922
create_loads_for_reductions (void **slot, void *data)
923
{
924
  struct reduction_info *const red = (struct reduction_info *) *slot;
925
  struct clsn_data *const clsn_data = (struct clsn_data *) data;
926
  gimple stmt;
927
  gimple_stmt_iterator gsi;
928
  tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
929
  tree struct_type = TREE_TYPE (TREE_TYPE (clsn_data->load));
930
  tree load_struct;
931
  tree name;
932
  tree x;
933
 
934
  gsi = gsi_after_labels (clsn_data->load_bb);
935
  load_struct = fold_build1 (INDIRECT_REF, struct_type, clsn_data->load);
936
  load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
937
                        NULL_TREE);
938
 
939
  x = load_struct;
940
  name = PHI_RESULT (red->keep_res);
941
  stmt = gimple_build_assign (name, x);
942
  SSA_NAME_DEF_STMT (name) = stmt;
943
 
944
  gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
945
 
946
  for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
947
       !gsi_end_p (gsi); gsi_next (&gsi))
948
    if (gsi_stmt (gsi) == red->keep_res)
949
      {
950
        remove_phi_node (&gsi, false);
951
        return 1;
952
      }
953
  gcc_unreachable ();
954
}
955
 
956
/* Load the reduction result that was stored in LD_ST_DATA.
957
   REDUCTION_LIST describes the list of reductions that the
958
   loads should be generated for.  */
959
static void
960
create_final_loads_for_reduction (htab_t reduction_list,
961
                                  struct clsn_data *ld_st_data)
962
{
963
  gimple_stmt_iterator gsi;
964
  tree t;
965
  gimple stmt;
966
 
967
  gsi = gsi_after_labels (ld_st_data->load_bb);
968
  t = build_fold_addr_expr (ld_st_data->store);
969
  stmt = gimple_build_assign (ld_st_data->load, t);
970
 
971
  gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
972
  SSA_NAME_DEF_STMT (ld_st_data->load) = stmt;
973
 
974
  htab_traverse (reduction_list, create_loads_for_reductions, ld_st_data);
975
 
976
}
977
 
978
/* Callback for htab_traverse.  Store the neutral value for the
979
  particular reduction's operation, e.g. 0 for PLUS_EXPR,
980
  1 for MULT_EXPR, etc. into the reduction field.
981
  The reduction is specified in SLOT. The store information is
982
  passed in DATA.  */
983
 
984
static int
985
create_stores_for_reduction (void **slot, void *data)
986
{
987
  struct reduction_info *const red = (struct reduction_info *) *slot;
988
  struct clsn_data *const clsn_data = (struct clsn_data *) data;
989
  tree t;
990
  gimple stmt;
991
  gimple_stmt_iterator gsi;
992
  tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
993
 
994
  gsi = gsi_last_bb (clsn_data->store_bb);
995
  t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
996
  stmt = gimple_build_assign (t, red->initial_value);
997
  mark_virtual_ops_for_renaming (stmt);
998
  gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
999
 
1000
  return 1;
1001
}
1002
 
1003
/* Callback for htab_traverse.  Creates loads to a field of LOAD in LOAD_BB and
1004
   store to a field of STORE in STORE_BB for the ssa name and its duplicate
1005
   specified in SLOT.  */
1006
 
1007
static int
1008
create_loads_and_stores_for_name (void **slot, void *data)
1009
{
1010
  struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
1011
  struct clsn_data *const clsn_data = (struct clsn_data *) data;
1012
  tree t;
1013
  gimple stmt;
1014
  gimple_stmt_iterator gsi;
1015
  tree type = TREE_TYPE (elt->new_name);
1016
  tree struct_type = TREE_TYPE (TREE_TYPE (clsn_data->load));
1017
  tree load_struct;
1018
 
1019
  gsi = gsi_last_bb (clsn_data->store_bb);
1020
  t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1021
  stmt = gimple_build_assign (t, ssa_name (elt->version));
1022
  mark_virtual_ops_for_renaming (stmt);
1023
  gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1024
 
1025
  gsi = gsi_last_bb (clsn_data->load_bb);
1026
  load_struct = fold_build1 (INDIRECT_REF, struct_type, clsn_data->load);
1027
  t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1028
  stmt = gimple_build_assign (elt->new_name, t);
1029
  SSA_NAME_DEF_STMT (elt->new_name) = stmt;
1030
  gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1031
 
1032
  return 1;
1033
}
1034
 
1035
/* Moves all the variables used in LOOP and defined outside of it (including
1036
   the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1037
   name) to a structure created for this purpose.  The code
1038
 
1039
   while (1)
1040
     {
1041
       use (a);
1042
       use (b);
1043
     }
1044
 
1045
   is transformed this way:
1046
 
1047
   bb0:
1048
   old.a = a;
1049
   old.b = b;
1050
 
1051
   bb1:
1052
   a' = new->a;
1053
   b' = new->b;
1054
   while (1)
1055
     {
1056
       use (a');
1057
       use (b');
1058
     }
1059
 
1060
   `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT.  The
1061
   pointer `new' is intentionally not initialized (the loop will be split to a
1062
   separate function later, and `new' will be initialized from its arguments).
1063
   LD_ST_DATA holds information about the shared data structure used to pass
1064
   information among the threads.  It is initialized here, and
1065
   gen_parallel_loop will pass it to create_call_for_reduction that
1066
   needs this information.  REDUCTION_LIST describes the reductions
1067
   in LOOP.  */
1068
 
1069
static void
1070
separate_decls_in_region (edge entry, edge exit, htab_t reduction_list,
1071
                          tree *arg_struct, tree *new_arg_struct,
1072
                          struct clsn_data *ld_st_data)
1073
 
1074
{
1075
  basic_block bb1 = split_edge (entry);
1076
  basic_block bb0 = single_pred (bb1);
1077
  htab_t name_copies = htab_create (10, name_to_copy_elt_hash,
1078
                                    name_to_copy_elt_eq, free);
1079
  htab_t decl_copies = htab_create (10, int_tree_map_hash, int_tree_map_eq,
1080
                                    free);
1081
  unsigned i;
1082
  tree type, type_name, nvar;
1083
  gimple_stmt_iterator gsi;
1084
  struct clsn_data clsn_data;
1085
  VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
1086
  basic_block bb;
1087
  basic_block entry_bb = bb1;
1088
  basic_block exit_bb = exit->dest;
1089
  bool has_debug_stmt = false;
1090
 
1091
  entry = single_succ_edge (entry_bb);
1092
  gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1093
 
1094
  for (i = 0; VEC_iterate (basic_block, body, i, bb); i++)
1095
    {
1096
      if (bb != entry_bb && bb != exit_bb)
1097
        {
1098
          for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1099
            separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1100
                                           name_copies, decl_copies);
1101
 
1102
          for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1103
            {
1104
              gimple stmt = gsi_stmt (gsi);
1105
 
1106
              if (is_gimple_debug (stmt))
1107
                has_debug_stmt = true;
1108
              else
1109
                separate_decls_in_region_stmt (entry, exit, stmt,
1110
                                               name_copies, decl_copies);
1111
            }
1112
        }
1113
    }
1114
 
1115
  /* Now process debug bind stmts.  We must not create decls while
1116
     processing debug stmts, so we defer their processing so as to
1117
     make sure we will have debug info for as many variables as
1118
     possible (all of those that were dealt with in the loop above),
1119
     and discard those for which we know there's nothing we can
1120
     do.  */
1121
  if (has_debug_stmt)
1122
    for (i = 0; VEC_iterate (basic_block, body, i, bb); i++)
1123
      if (bb != entry_bb && bb != exit_bb)
1124
        {
1125
          for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1126
            {
1127
              gimple stmt = gsi_stmt (gsi);
1128
 
1129
              if (gimple_debug_bind_p (stmt))
1130
                {
1131
                  if (separate_decls_in_region_debug_bind (stmt,
1132
                                                           name_copies,
1133
                                                           decl_copies))
1134
                    {
1135
                      gsi_remove (&gsi, true);
1136
                      continue;
1137
                    }
1138
                }
1139
 
1140
              gsi_next (&gsi);
1141
            }
1142
        }
1143
 
1144
  VEC_free (basic_block, heap, body);
1145
 
1146
  if (htab_elements (name_copies) == 0 && htab_elements (reduction_list) == 0)
1147
    {
1148
      /* It may happen that there is nothing to copy (if there are only
1149
         loop carried and external variables in the loop).  */
1150
      *arg_struct = NULL;
1151
      *new_arg_struct = NULL;
1152
    }
1153
  else
1154
    {
1155
      /* Create the type for the structure to store the ssa names to.  */
1156
      type = lang_hooks.types.make_type (RECORD_TYPE);
1157
      type_name = build_decl (BUILTINS_LOCATION,
1158
                              TYPE_DECL, create_tmp_var_name (".paral_data"),
1159
                              type);
1160
      TYPE_NAME (type) = type_name;
1161
 
1162
      htab_traverse (name_copies, add_field_for_name, type);
1163
      if (reduction_list && htab_elements (reduction_list) > 0)
1164
        {
1165
          /* Create the fields for reductions.  */
1166
          htab_traverse (reduction_list, add_field_for_reduction,
1167
                         type);
1168
        }
1169
      layout_type (type);
1170
 
1171
      /* Create the loads and stores.  */
1172
      *arg_struct = create_tmp_var (type, ".paral_data_store");
1173
      add_referenced_var (*arg_struct);
1174
      nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1175
      add_referenced_var (nvar);
1176
      *new_arg_struct = make_ssa_name (nvar, NULL);
1177
 
1178
      ld_st_data->store = *arg_struct;
1179
      ld_st_data->load = *new_arg_struct;
1180
      ld_st_data->store_bb = bb0;
1181
      ld_st_data->load_bb = bb1;
1182
 
1183
      htab_traverse (name_copies, create_loads_and_stores_for_name,
1184
                     ld_st_data);
1185
 
1186
      /* Load the calculation from memory (after the join of the threads).  */
1187
 
1188
      if (reduction_list && htab_elements (reduction_list) > 0)
1189
        {
1190
          htab_traverse (reduction_list, create_stores_for_reduction,
1191
                        ld_st_data);
1192
          clsn_data.load = make_ssa_name (nvar, NULL);
1193
          clsn_data.load_bb = exit->dest;
1194
          clsn_data.store = ld_st_data->store;
1195
          create_final_loads_for_reduction (reduction_list, &clsn_data);
1196
        }
1197
    }
1198
 
1199
  htab_delete (decl_copies);
1200
  htab_delete (name_copies);
1201
}
1202
 
1203
/* Bitmap containing uids of functions created by parallelization.  We cannot
1204
   allocate it from the default obstack, as it must live across compilation
1205
   of several functions; we make it gc allocated instead.  */
1206
 
1207
static GTY(()) bitmap parallelized_functions;
1208
 
1209
/* Returns true if FN was created by create_loop_fn.  */
1210
 
1211
static bool
1212
parallelized_function_p (tree fn)
1213
{
1214
  if (!parallelized_functions || !DECL_ARTIFICIAL (fn))
1215
    return false;
1216
 
1217
  return bitmap_bit_p (parallelized_functions, DECL_UID (fn));
1218
}
1219
 
1220
/* Creates and returns an empty function that will receive the body of
1221
   a parallelized loop.  */
1222
 
1223
static tree
1224
create_loop_fn (void)
1225
{
1226
  char buf[100];
1227
  char *tname;
1228
  tree decl, type, name, t;
1229
  struct function *act_cfun = cfun;
1230
  static unsigned loopfn_num;
1231
 
1232
  snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1233
  ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1234
  clean_symbol_name (tname);
1235
  name = get_identifier (tname);
1236
  type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1237
 
1238
  decl = build_decl (BUILTINS_LOCATION,
1239
                     FUNCTION_DECL, name, type);
1240
  if (!parallelized_functions)
1241
    parallelized_functions = BITMAP_GGC_ALLOC ();
1242
  bitmap_set_bit (parallelized_functions, DECL_UID (decl));
1243
 
1244
  TREE_STATIC (decl) = 1;
1245
  TREE_USED (decl) = 1;
1246
  DECL_ARTIFICIAL (decl) = 1;
1247
  DECL_IGNORED_P (decl) = 0;
1248
  TREE_PUBLIC (decl) = 0;
1249
  DECL_UNINLINABLE (decl) = 1;
1250
  DECL_EXTERNAL (decl) = 0;
1251
  DECL_CONTEXT (decl) = NULL_TREE;
1252
  DECL_INITIAL (decl) = make_node (BLOCK);
1253
 
1254
  t = build_decl (BUILTINS_LOCATION,
1255
                  RESULT_DECL, NULL_TREE, void_type_node);
1256
  DECL_ARTIFICIAL (t) = 1;
1257
  DECL_IGNORED_P (t) = 1;
1258
  DECL_RESULT (decl) = t;
1259
 
1260
  t = build_decl (BUILTINS_LOCATION,
1261
                  PARM_DECL, get_identifier (".paral_data_param"),
1262
                  ptr_type_node);
1263
  DECL_ARTIFICIAL (t) = 1;
1264
  DECL_ARG_TYPE (t) = ptr_type_node;
1265
  DECL_CONTEXT (t) = decl;
1266
  TREE_USED (t) = 1;
1267
  DECL_ARGUMENTS (decl) = t;
1268
 
1269
  allocate_struct_function (decl, false);
1270
 
1271
  /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1272
     it.  */
1273
  set_cfun (act_cfun);
1274
 
1275
  return decl;
1276
}
1277
 
1278
/* Moves the exit condition of LOOP to the beginning of its header, and
1279
   duplicates the part of the last iteration that gets disabled to the
1280
   exit of the loop.  NIT is the number of iterations of the loop
1281
   (used to initialize the variables in the duplicated part).
1282
 
1283
   TODO: the common case is that latch of the loop is empty and immediately
1284
   follows the loop exit.  In this case, it would be better not to copy the
1285
   body of the loop, but only move the entry of the loop directly before the
1286
   exit check and increase the number of iterations of the loop by one.
1287
   This may need some additional preconditioning in case NIT = ~0.
1288
   REDUCTION_LIST describes the reductions in LOOP.  */
1289
 
1290
static void
1291
transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit)
1292
{
1293
  basic_block *bbs, *nbbs, ex_bb, orig_header;
1294
  unsigned n;
1295
  bool ok;
1296
  edge exit = single_dom_exit (loop), hpred;
1297
  tree control, control_name, res, t;
1298
  gimple phi, nphi, cond_stmt, stmt, cond_nit;
1299
  gimple_stmt_iterator gsi;
1300
  tree nit_1;
1301
 
1302
  split_block_after_labels (loop->header);
1303
  orig_header = single_succ (loop->header);
1304
  hpred = single_succ_edge (loop->header);
1305
 
1306
  cond_stmt = last_stmt (exit->src);
1307
  control = gimple_cond_lhs (cond_stmt);
1308
  gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
1309
 
1310
  /* Make sure that we have phi nodes on exit for all loop header phis
1311
     (create_parallel_loop requires that).  */
1312
  for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1313
    {
1314
      phi = gsi_stmt (gsi);
1315
      res = PHI_RESULT (phi);
1316
      t = make_ssa_name (SSA_NAME_VAR (res), phi);
1317
      SET_PHI_RESULT (phi, t);
1318
      nphi = create_phi_node (res, orig_header);
1319
      SSA_NAME_DEF_STMT (res) = nphi;
1320
      add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
1321
 
1322
      if (res == control)
1323
        {
1324
          gimple_cond_set_lhs (cond_stmt, t);
1325
          update_stmt (cond_stmt);
1326
          control = t;
1327
        }
1328
    }
1329
  bbs = get_loop_body_in_dom_order (loop);
1330
 
1331
  for (n = 0; bbs[n] != loop->latch; n++)
1332
    continue;
1333
  nbbs = XNEWVEC (basic_block, n);
1334
  ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1335
                                   bbs + 1, n, nbbs);
1336
  gcc_assert (ok);
1337
  free (bbs);
1338
  ex_bb = nbbs[0];
1339
  free (nbbs);
1340
 
1341
  /* Other than reductions, the only gimple reg that should be copied
1342
     out of the loop is the control variable.  */
1343
 
1344
  control_name = NULL_TREE;
1345
  for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); )
1346
    {
1347
      phi = gsi_stmt (gsi);
1348
      res = PHI_RESULT (phi);
1349
      if (!is_gimple_reg (res))
1350
        {
1351
          gsi_next (&gsi);
1352
          continue;
1353
        }
1354
 
1355
      /* Check if it is a part of reduction.  If it is,
1356
         keep the phi at the reduction's keep_res field.  The
1357
         PHI_RESULT of this phi is the resulting value of the reduction
1358
         variable when exiting the loop.  */
1359
 
1360
      exit = single_dom_exit (loop);
1361
 
1362
      if (htab_elements (reduction_list) > 0)
1363
        {
1364
          struct reduction_info *red;
1365
 
1366
          tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1367
          red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1368
          if (red)
1369
            {
1370
              red->keep_res = phi;
1371
              gsi_next (&gsi);
1372
              continue;
1373
            }
1374
        }
1375
      gcc_assert (control_name == NULL_TREE
1376
                  && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
1377
      control_name = res;
1378
      remove_phi_node (&gsi, false);
1379
    }
1380
  gcc_assert (control_name != NULL_TREE);
1381
 
1382
  /* Initialize the control variable to number of iterations
1383
     according to the rhs of the exit condition.  */
1384
  gsi = gsi_after_labels (ex_bb);
1385
  cond_nit = last_stmt (exit->src);
1386
  nit_1 =  gimple_cond_rhs (cond_nit);
1387
  nit_1 = force_gimple_operand_gsi (&gsi,
1388
                                  fold_convert (TREE_TYPE (control_name), nit_1),
1389
                                  false, NULL_TREE, false, GSI_SAME_STMT);
1390
  stmt = gimple_build_assign (control_name, nit_1);
1391
  gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1392
  SSA_NAME_DEF_STMT (control_name) = stmt;
1393
}
1394
 
1395
/* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1396
   LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1397
   NEW_DATA is the variable that should be initialized from the argument
1398
   of LOOP_FN.  N_THREADS is the requested number of threads.  Returns the
1399
   basic block containing GIMPLE_OMP_PARALLEL tree.  */
1400
 
1401
static basic_block
1402
create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
1403
                      tree new_data, unsigned n_threads)
1404
{
1405
  gimple_stmt_iterator gsi;
1406
  basic_block bb, paral_bb, for_bb, ex_bb;
1407
  tree t, param;
1408
  gimple stmt, for_stmt, phi, cond_stmt;
1409
  tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
1410
  edge exit, nexit, guard, end, e;
1411
 
1412
  /* Prepare the GIMPLE_OMP_PARALLEL statement.  */
1413
  bb = loop_preheader_edge (loop)->src;
1414
  paral_bb = single_pred (bb);
1415
  gsi = gsi_last_bb (paral_bb);
1416
 
1417
  t = build_omp_clause (BUILTINS_LOCATION, OMP_CLAUSE_NUM_THREADS);
1418
  OMP_CLAUSE_NUM_THREADS_EXPR (t)
1419
    = build_int_cst (integer_type_node, n_threads);
1420
  stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
1421
 
1422
  gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1423
 
1424
  /* Initialize NEW_DATA.  */
1425
  if (data)
1426
    {
1427
      gsi = gsi_after_labels (bb);
1428
 
1429
      param = make_ssa_name (DECL_ARGUMENTS (loop_fn), NULL);
1430
      stmt = gimple_build_assign (param, build_fold_addr_expr (data));
1431
      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1432
      SSA_NAME_DEF_STMT (param) = stmt;
1433
 
1434
      stmt = gimple_build_assign (new_data,
1435
                                  fold_convert (TREE_TYPE (new_data), param));
1436
      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1437
      SSA_NAME_DEF_STMT (new_data) = stmt;
1438
    }
1439
 
1440
  /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL.  */
1441
  bb = split_loop_exit_edge (single_dom_exit (loop));
1442
  gsi = gsi_last_bb (bb);
1443
  gsi_insert_after (&gsi, gimple_build_omp_return (false), GSI_NEW_STMT);
1444
 
1445
  /* Extract data for GIMPLE_OMP_FOR.  */
1446
  gcc_assert (loop->header == single_dom_exit (loop)->src);
1447
  cond_stmt = last_stmt (loop->header);
1448
 
1449
  cvar = gimple_cond_lhs (cond_stmt);
1450
  cvar_base = SSA_NAME_VAR (cvar);
1451
  phi = SSA_NAME_DEF_STMT (cvar);
1452
  cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1453
  initvar = make_ssa_name (cvar_base, NULL);
1454
  SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
1455
           initvar);
1456
  cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1457
 
1458
  gsi = gsi_last_bb (loop->latch);
1459
  gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
1460
  gsi_remove (&gsi, true);
1461
 
1462
  /* Prepare cfg.  */
1463
  for_bb = split_edge (loop_preheader_edge (loop));
1464
  ex_bb = split_loop_exit_edge (single_dom_exit (loop));
1465
  extract_true_false_edges_from_block (loop->header, &nexit, &exit);
1466
  gcc_assert (exit == single_dom_exit (loop));
1467
 
1468
  guard = make_edge (for_bb, ex_bb, 0);
1469
  single_succ_edge (loop->latch)->flags = 0;
1470
  end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
1471
  for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1472
    {
1473
      source_location locus;
1474
      tree def;
1475
      phi = gsi_stmt (gsi);
1476
      stmt = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit));
1477
 
1478
      def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
1479
      locus = gimple_phi_arg_location_from_edge (stmt,
1480
                                                 loop_preheader_edge (loop));
1481
      add_phi_arg (phi, def, guard, locus);
1482
 
1483
      def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
1484
      locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
1485
      add_phi_arg (phi, def, end, locus);
1486
    }
1487
  e = redirect_edge_and_branch (exit, nexit->dest);
1488
  PENDING_STMT (e) = NULL;
1489
 
1490
  /* Emit GIMPLE_OMP_FOR.  */
1491
  gimple_cond_set_lhs (cond_stmt, cvar_base);
1492
  type = TREE_TYPE (cvar);
1493
  t = build_omp_clause (BUILTINS_LOCATION, OMP_CLAUSE_SCHEDULE);
1494
  OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
1495
 
1496
  for_stmt = gimple_build_omp_for (NULL, t, 1, NULL);
1497
  gimple_omp_for_set_index (for_stmt, 0, initvar);
1498
  gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
1499
  gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
1500
  gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
1501
  gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
1502
                                                cvar_base,
1503
                                                build_int_cst (type, 1)));
1504
 
1505
  gsi = gsi_last_bb (for_bb);
1506
  gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
1507
  SSA_NAME_DEF_STMT (initvar) = for_stmt;
1508
 
1509
  /* Emit GIMPLE_OMP_CONTINUE.  */
1510
  gsi = gsi_last_bb (loop->latch);
1511
  stmt = gimple_build_omp_continue (cvar_next, cvar);
1512
  gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1513
  SSA_NAME_DEF_STMT (cvar_next) = stmt;
1514
 
1515
  /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR.  */
1516
  gsi = gsi_last_bb (ex_bb);
1517
  gsi_insert_after (&gsi, gimple_build_omp_return (true), GSI_NEW_STMT);
1518
 
1519
  return paral_bb;
1520
}
1521
 
1522
/* Generates code to execute the iterations of LOOP in N_THREADS
1523
   threads in parallel.
1524
 
1525
   NITER describes number of iterations of LOOP.
1526
   REDUCTION_LIST describes the reductions existent in the LOOP.  */
1527
 
1528
static void
1529
gen_parallel_loop (struct loop *loop, htab_t reduction_list,
1530
                   unsigned n_threads, struct tree_niter_desc *niter)
1531
{
1532
  loop_iterator li;
1533
  tree many_iterations_cond, type, nit;
1534
  tree arg_struct, new_arg_struct;
1535
  gimple_seq stmts;
1536
  basic_block parallel_head;
1537
  edge entry, exit;
1538
  struct clsn_data clsn_data;
1539
  unsigned prob;
1540
 
1541
  /* From
1542
 
1543
     ---------------------------------------------------------------------
1544
     loop
1545
       {
1546
         IV = phi (INIT, IV + STEP)
1547
         BODY1;
1548
         if (COND)
1549
           break;
1550
         BODY2;
1551
       }
1552
     ---------------------------------------------------------------------
1553
 
1554
     with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1555
     we generate the following code:
1556
 
1557
     ---------------------------------------------------------------------
1558
 
1559
     if (MAY_BE_ZERO
1560
     || NITER < MIN_PER_THREAD * N_THREADS)
1561
     goto original;
1562
 
1563
     BODY1;
1564
     store all local loop-invariant variables used in body of the loop to DATA.
1565
     GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1566
     load the variables from DATA.
1567
     GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1568
     BODY2;
1569
     BODY1;
1570
     GIMPLE_OMP_CONTINUE;
1571
     GIMPLE_OMP_RETURN         -- GIMPLE_OMP_FOR
1572
     GIMPLE_OMP_RETURN         -- GIMPLE_OMP_PARALLEL
1573
     goto end;
1574
 
1575
     original:
1576
     loop
1577
       {
1578
         IV = phi (INIT, IV + STEP)
1579
         BODY1;
1580
         if (COND)
1581
           break;
1582
         BODY2;
1583
       }
1584
 
1585
     end:
1586
 
1587
   */
1588
 
1589
  /* Create two versions of the loop -- in the old one, we know that the
1590
     number of iterations is large enough, and we will transform it into the
1591
     loop that will be split to loop_fn, the new one will be used for the
1592
     remaining iterations.  */
1593
 
1594
  type = TREE_TYPE (niter->niter);
1595
  nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
1596
                              NULL_TREE);
1597
  if (stmts)
1598
    gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1599
 
1600
  many_iterations_cond =
1601
    fold_build2 (GE_EXPR, boolean_type_node,
1602
                 nit, build_int_cst (type, MIN_PER_THREAD * n_threads));
1603
  many_iterations_cond
1604
    = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1605
                   invert_truthvalue (unshare_expr (niter->may_be_zero)),
1606
                   many_iterations_cond);
1607
  many_iterations_cond
1608
    = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
1609
  if (stmts)
1610
    gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1611
  if (!is_gimple_condexpr (many_iterations_cond))
1612
    {
1613
      many_iterations_cond
1614
        = force_gimple_operand (many_iterations_cond, &stmts,
1615
                                true, NULL_TREE);
1616
      if (stmts)
1617
        gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1618
    }
1619
 
1620
  initialize_original_copy_tables ();
1621
 
1622
  /* We assume that the loop usually iterates a lot.  */
1623
  prob = 4 * REG_BR_PROB_BASE / 5;
1624
  loop_version (loop, many_iterations_cond, NULL,
1625
                prob, prob, REG_BR_PROB_BASE - prob, true);
1626
  update_ssa (TODO_update_ssa);
1627
  free_original_copy_tables ();
1628
 
1629
  /* Base all the induction variables in LOOP on a single control one.  */
1630
  canonicalize_loop_ivs (loop, &nit, true);
1631
 
1632
  /* Ensure that the exit condition is the first statement in the loop.  */
1633
  transform_to_exit_first_loop (loop, reduction_list, nit);
1634
 
1635
  /* Generate initializations for reductions.  */
1636
  if (htab_elements (reduction_list) > 0)
1637
    htab_traverse (reduction_list, initialize_reductions, loop);
1638
 
1639
  /* Eliminate the references to local variables from the loop.  */
1640
  gcc_assert (single_exit (loop));
1641
  entry = loop_preheader_edge (loop);
1642
  exit = single_dom_exit (loop);
1643
 
1644
  eliminate_local_variables (entry, exit);
1645
  /* In the old loop, move all variables non-local to the loop to a structure
1646
     and back, and create separate decls for the variables used in loop.  */
1647
  separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
1648
                            &new_arg_struct, &clsn_data);
1649
 
1650
  /* Create the parallel constructs.  */
1651
  parallel_head = create_parallel_loop (loop, create_loop_fn (), arg_struct,
1652
                                        new_arg_struct, n_threads);
1653
  if (htab_elements (reduction_list) > 0)
1654
    create_call_for_reduction (loop, reduction_list, &clsn_data);
1655
 
1656
  scev_reset ();
1657
 
1658
  /* Cancel the loop (it is simpler to do it here rather than to teach the
1659
     expander to do it).  */
1660
  cancel_loop_tree (loop);
1661
 
1662
  /* Free loop bound estimations that could contain references to
1663
     removed statements.  */
1664
  FOR_EACH_LOOP (li, loop, 0)
1665
    free_numbers_of_iterations_estimates_loop (loop);
1666
 
1667
  /* Expand the parallel constructs.  We do it directly here instead of running
1668
     a separate expand_omp pass, since it is more efficient, and less likely to
1669
     cause troubles with further analyses not being able to deal with the
1670
     OMP trees.  */
1671
 
1672
  omp_expand_local (parallel_head);
1673
}
1674
 
1675
/* Returns true when LOOP contains vector phi nodes.  */
1676
 
1677
static bool
1678
loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
1679
{
1680
  unsigned i;
1681
  basic_block *bbs = get_loop_body_in_dom_order (loop);
1682
  gimple_stmt_iterator gsi;
1683
  bool res = true;
1684
 
1685
  for (i = 0; i < loop->num_nodes; i++)
1686
    for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1687
      if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi)))) == VECTOR_TYPE)
1688
        goto end;
1689
 
1690
  res = false;
1691
 end:
1692
  free (bbs);
1693
  return res;
1694
}
1695
 
1696
/* Create a reduction_info struct, initialize it with REDUC_STMT
1697
   and PHI, insert it to the REDUCTION_LIST.  */
1698
 
1699
static void
1700
build_new_reduction (htab_t reduction_list, gimple reduc_stmt, gimple phi)
1701
{
1702
  PTR *slot;
1703
  struct reduction_info *new_reduction;
1704
 
1705
  gcc_assert (reduc_stmt);
1706
 
1707
  if (dump_file && (dump_flags & TDF_DETAILS))
1708
    {
1709
      fprintf (dump_file,
1710
               "Detected reduction. reduction stmt is: \n");
1711
      print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
1712
      fprintf (dump_file, "\n");
1713
    }
1714
 
1715
  new_reduction = XCNEW (struct reduction_info);
1716
 
1717
  new_reduction->reduc_stmt = reduc_stmt;
1718
  new_reduction->reduc_phi = phi;
1719
  new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
1720
  slot = htab_find_slot (reduction_list, new_reduction, INSERT);
1721
  *slot = new_reduction;
1722
}
1723
 
1724
/* Detect all reductions in the LOOP, insert them into REDUCTION_LIST.  */
1725
 
1726
static void
1727
gather_scalar_reductions (loop_p loop, htab_t reduction_list)
1728
{
1729
  gimple_stmt_iterator gsi;
1730
  loop_vec_info simple_loop_info;
1731
 
1732
  vect_dump = NULL;
1733
  simple_loop_info = vect_analyze_loop_form (loop);
1734
 
1735
  for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1736
    {
1737
      gimple phi = gsi_stmt (gsi);
1738
      affine_iv iv;
1739
      tree res = PHI_RESULT (phi);
1740
      bool double_reduc;
1741
 
1742
      if (!is_gimple_reg (res))
1743
        continue;
1744
 
1745
      if (!simple_iv (loop, loop, res, &iv, true)
1746
        && simple_loop_info)
1747
        {
1748
           gimple reduc_stmt = vect_is_simple_reduction (simple_loop_info, phi, true, &double_reduc);
1749
           if (reduc_stmt && !double_reduc)
1750
              build_new_reduction (reduction_list, reduc_stmt, phi);
1751
        }
1752
    }
1753
    destroy_loop_vec_info (simple_loop_info, true);
1754
}
1755
 
1756
/* Try to initialize NITER for code generation part.  */
1757
 
1758
static bool
1759
try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
1760
{
1761
  edge exit = single_dom_exit (loop);
1762
 
1763
  gcc_assert (exit);
1764
 
1765
  /* We need to know # of iterations, and there should be no uses of values
1766
     defined inside loop outside of it, unless the values are invariants of
1767
     the loop.  */
1768
  if (!number_of_iterations_exit (loop, exit, niter, false))
1769
    {
1770
      if (dump_file && (dump_flags & TDF_DETAILS))
1771
        fprintf (dump_file, "  FAILED: number of iterations not known\n");
1772
      return false;
1773
    }
1774
 
1775
  return true;
1776
}
1777
 
1778
/* Try to initialize REDUCTION_LIST for code generation part.
1779
   REDUCTION_LIST describes the reductions.  */
1780
 
1781
static bool
1782
try_create_reduction_list (loop_p loop, htab_t reduction_list)
1783
{
1784
  edge exit = single_dom_exit (loop);
1785
  gimple_stmt_iterator gsi;
1786
 
1787
  gcc_assert (exit);
1788
 
1789
  gather_scalar_reductions (loop, reduction_list);
1790
 
1791
 
1792
  for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1793
    {
1794
      gimple phi = gsi_stmt (gsi);
1795
      struct reduction_info *red;
1796
      imm_use_iterator imm_iter;
1797
      use_operand_p use_p;
1798
      gimple reduc_phi;
1799
      tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1800
 
1801
      if (is_gimple_reg (val))
1802
        {
1803
          if (dump_file && (dump_flags & TDF_DETAILS))
1804
            {
1805
              fprintf (dump_file, "phi is ");
1806
              print_gimple_stmt (dump_file, phi, 0, 0);
1807
              fprintf (dump_file, "arg of phi to exit:   value ");
1808
              print_generic_expr (dump_file, val, 0);
1809
              fprintf (dump_file, " used outside loop\n");
1810
              fprintf (dump_file,
1811
                       "  checking if it a part of reduction pattern:  \n");
1812
            }
1813
          if (htab_elements (reduction_list) == 0)
1814
            {
1815
              if (dump_file && (dump_flags & TDF_DETAILS))
1816
                fprintf (dump_file,
1817
                         "  FAILED: it is not a part of reduction.\n");
1818
              return false;
1819
            }
1820
          reduc_phi = NULL;
1821
          FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
1822
            {
1823
              if (flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
1824
                {
1825
                  reduc_phi = USE_STMT (use_p);
1826
                  break;
1827
                }
1828
            }
1829
          red = reduction_phi (reduction_list, reduc_phi);
1830
          if (red == NULL)
1831
            {
1832
              if (dump_file && (dump_flags & TDF_DETAILS))
1833
                fprintf (dump_file,
1834
                         "  FAILED: it is not a part of reduction.\n");
1835
              return false;
1836
            }
1837
          if (dump_file && (dump_flags & TDF_DETAILS))
1838
            {
1839
              fprintf (dump_file, "reduction phi is  ");
1840
              print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
1841
              fprintf (dump_file, "reduction stmt is  ");
1842
              print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
1843
            }
1844
        }
1845
    }
1846
 
1847
  /* The iterations of the loop may communicate only through bivs whose
1848
     iteration space can be distributed efficiently.  */
1849
  for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1850
    {
1851
      gimple phi = gsi_stmt (gsi);
1852
      tree def = PHI_RESULT (phi);
1853
      affine_iv iv;
1854
 
1855
      if (is_gimple_reg (def) && !simple_iv (loop, loop, def, &iv, true))
1856
        {
1857
          struct reduction_info *red;
1858
 
1859
          red = reduction_phi (reduction_list, phi);
1860
          if (red == NULL)
1861
            {
1862
              if (dump_file && (dump_flags & TDF_DETAILS))
1863
                fprintf (dump_file,
1864
                         "  FAILED: scalar dependency between iterations\n");
1865
              return false;
1866
            }
1867
        }
1868
    }
1869
 
1870
 
1871
  return true;
1872
}
1873
 
1874
/* Detect parallel loops and generate parallel code using libgomp
1875
   primitives.  Returns true if some loop was parallelized, false
1876
   otherwise.  */
1877
 
1878
bool
1879
parallelize_loops (void)
1880
{
1881
  unsigned n_threads = flag_tree_parallelize_loops;
1882
  bool changed = false;
1883
  struct loop *loop;
1884
  struct tree_niter_desc niter_desc;
1885
  loop_iterator li;
1886
  htab_t reduction_list;
1887
  HOST_WIDE_INT estimated;
1888
  LOC loop_loc;
1889
 
1890
  /* Do not parallelize loops in the functions created by parallelization.  */
1891
  if (parallelized_function_p (cfun->decl))
1892
    return false;
1893
  if (cfun->has_nonlocal_label)
1894
    return false;
1895
 
1896
  reduction_list = htab_create (10, reduction_info_hash,
1897
                                     reduction_info_eq, free);
1898
  init_stmt_vec_info_vec ();
1899
 
1900
  FOR_EACH_LOOP (li, loop, 0)
1901
    {
1902
      htab_empty (reduction_list);
1903
      if (dump_file && (dump_flags & TDF_DETAILS))
1904
      {
1905
        fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
1906
        if (loop->inner)
1907
          fprintf (dump_file, "loop %d is not innermost\n",loop->num);
1908
        else
1909
          fprintf (dump_file, "loop %d is innermost\n",loop->num);
1910
      }
1911
 
1912
      /* If we use autopar in graphite pass, we use its marked dependency
1913
      checking results.  */
1914
      if (flag_loop_parallelize_all && !loop->can_be_parallel)
1915
      {
1916
        if (dump_file && (dump_flags & TDF_DETAILS))
1917
           fprintf (dump_file, "loop is not parallel according to graphite\n");
1918
        continue;
1919
      }
1920
 
1921
      if (!single_dom_exit (loop))
1922
      {
1923
 
1924
        if (dump_file && (dump_flags & TDF_DETAILS))
1925
          fprintf (dump_file, "loop is !single_dom_exit\n");
1926
 
1927
        continue;
1928
      }
1929
 
1930
      if (/* And of course, the loop must be parallelizable.  */
1931
          !can_duplicate_loop_p (loop)
1932
          || loop_has_blocks_with_irreducible_flag (loop)
1933
          || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
1934
          /* FIXME: the check for vector phi nodes could be removed.  */
1935
          || loop_has_vector_phi_nodes (loop))
1936
        continue;
1937
      estimated = estimated_loop_iterations_int (loop, false);
1938
      /* FIXME: Bypass this check as graphite doesn't update the
1939
      count and frequency correctly now.  */
1940
      if (!flag_loop_parallelize_all
1941
          && ((estimated !=-1
1942
             && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
1943
              /* Do not bother with loops in cold areas.  */
1944
              || optimize_loop_nest_for_size_p (loop)))
1945
        continue;
1946
 
1947
      if (!try_get_loop_niter (loop, &niter_desc))
1948
        continue;
1949
 
1950
      if (!try_create_reduction_list (loop, reduction_list))
1951
        continue;
1952
 
1953
      if (!flag_loop_parallelize_all && !loop_parallel_p (loop))
1954
        continue;
1955
 
1956
      changed = true;
1957
      if (dump_file && (dump_flags & TDF_DETAILS))
1958
      {
1959
        if (loop->inner)
1960
          fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
1961
        else
1962
          fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
1963
        loop_loc = find_loop_location (loop);
1964
        if (loop_loc != UNKNOWN_LOC)
1965
          fprintf (dump_file, "\nloop at %s:%d: ",
1966
                   LOC_FILE (loop_loc), LOC_LINE (loop_loc));
1967
      }
1968
      gen_parallel_loop (loop, reduction_list,
1969
                         n_threads, &niter_desc);
1970
      verify_flow_info ();
1971
      verify_dominators (CDI_DOMINATORS);
1972
      verify_loop_structure ();
1973
      verify_loop_closed_ssa ();
1974
    }
1975
 
1976
  free_stmt_vec_info_vec ();
1977
  htab_delete (reduction_list);
1978
 
1979
  /* Parallelization will cause new function calls to be inserted through
1980
     which local variables will escape.  Reset the points-to solutions
1981
     for ESCAPED and CALLUSED.  */
1982
  if (changed)
1983
    {
1984
      pt_solution_reset (&cfun->gimple_df->escaped);
1985
      pt_solution_reset (&cfun->gimple_df->callused);
1986
    }
1987
 
1988
  return changed;
1989
}
1990
 
1991
#include "gt-tree-parloops.h"

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.