OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [gcc/] [tree-parloops.c] - Blame information for rev 749

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 684 jeremybenn
/* Loop autoparallelization.
2
   Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
3
   Free Software Foundation, Inc.
4
   Contributed by Sebastian Pop <pop@cri.ensmp.fr> and
5
   Zdenek Dvorak <dvorakz@suse.cz>.
6
 
7
This file is part of GCC.
8
 
9
GCC is free software; you can redistribute it and/or modify it under
10
the terms of the GNU General Public License as published by the Free
11
Software Foundation; either version 3, or (at your option) any later
12
version.
13
 
14
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15
WARRANTY; without even the implied warranty of MERCHANTABILITY or
16
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17
for more details.
18
 
19
You should have received a copy of the GNU General Public License
20
along with GCC; see the file COPYING3.  If not see
21
<http://www.gnu.org/licenses/>.  */
22
 
23
#include "config.h"
24
#include "system.h"
25
#include "coretypes.h"
26
#include "tree-flow.h"
27
#include "cfgloop.h"
28
#include "tree-data-ref.h"
29
#include "tree-scalar-evolution.h"
30
#include "gimple-pretty-print.h"
31
#include "tree-pass.h"
32
#include "langhooks.h"
33
#include "tree-vectorizer.h"
34
 
35
/* This pass tries to distribute iterations of loops into several threads.
36
   The implementation is straightforward -- for each loop we test whether its
37
   iterations are independent, and if it is the case (and some additional
38
   conditions regarding profitability and correctness are satisfied), we
39
   add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
40
   machinery do its job.
41
 
42
   The most of the complexity is in bringing the code into shape expected
43
   by the omp expanders:
44
   -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
45
      variable and that the exit test is at the start of the loop body
46
   -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
47
      variables by accesses through pointers, and breaking up ssa chains
48
      by storing the values incoming to the parallelized loop to a structure
49
      passed to the new function as an argument (something similar is done
50
      in omp gimplification, unfortunately only a small part of the code
51
      can be shared).
52
 
53
   TODO:
54
   -- if there are several parallelizable loops in a function, it may be
55
      possible to generate the threads just once (using synchronization to
56
      ensure that cross-loop dependences are obeyed).
57
   -- handling of common scalar dependence patterns (accumulation, ...)
58
   -- handling of non-innermost loops  */
59
 
60
/*
61
  Reduction handling:
62
  currently we use vect_force_simple_reduction() to detect reduction patterns.
63
  The code transformation will be introduced by an example.
64
 
65
 
66
parloop
67
{
68
  int sum=1;
69
 
70
  for (i = 0; i < N; i++)
71
   {
72
    x[i] = i + 3;
73
    sum+=x[i];
74
   }
75
}
76
 
77
gimple-like code:
78
header_bb:
79
 
80
  # sum_29 = PHI <sum_11(5), 1(3)>
81
  # i_28 = PHI <i_12(5), 0(3)>
82
  D.1795_8 = i_28 + 3;
83
  x[i_28] = D.1795_8;
84
  sum_11 = D.1795_8 + sum_29;
85
  i_12 = i_28 + 1;
86
  if (N_6(D) > i_12)
87
    goto header_bb;
88
 
89
 
90
exit_bb:
91
 
92
  # sum_21 = PHI <sum_11(4)>
93
  printf (&"%d"[0], sum_21);
94
 
95
 
96
after reduction transformation (only relevant parts):
97
 
98
parloop
99
{
100
 
101
....
102
 
103
 
104
  # Storing the initial value given by the user.  #
105
 
106
  .paral_data_store.32.sum.27 = 1;
107
 
108
  #pragma omp parallel num_threads(4)
109
 
110
  #pragma omp for schedule(static)
111
 
112
  # The neutral element corresponding to the particular
113
  reduction's operation, e.g. 0 for PLUS_EXPR,
114
  1 for MULT_EXPR, etc. replaces the user's initial value.  #
115
 
116
  # sum.27_29 = PHI <sum.27_11, 0>
117
 
118
  sum.27_11 = D.1827_8 + sum.27_29;
119
 
120
  GIMPLE_OMP_CONTINUE
121
 
122
  # Adding this reduction phi is done at create_phi_for_local_result() #
123
  # sum.27_56 = PHI <sum.27_11, 0>
124
  GIMPLE_OMP_RETURN
125
 
126
  # Creating the atomic operation is done at
127
  create_call_for_reduction_1()  #
128
 
129
  #pragma omp atomic_load
130
  D.1839_59 = *&.paral_data_load.33_51->reduction.23;
131
  D.1840_60 = sum.27_56 + D.1839_59;
132
  #pragma omp atomic_store (D.1840_60);
133
 
134
  GIMPLE_OMP_RETURN
135
 
136
 # collecting the result after the join of the threads is done at
137
  create_loads_for_reductions().
138
  The value computed by the threads is loaded from the
139
  shared struct.  #
140
 
141
 
142
  .paral_data_load.33_52 = &.paral_data_store.32;
143
  sum_37 =  .paral_data_load.33_52->sum.27;
144
  sum_43 = D.1795_41 + sum_37;
145
 
146
  exit bb:
147
  # sum_21 = PHI <sum_43, sum_26>
148
  printf (&"%d"[0], sum_21);
149
 
150
...
151
 
152
}
153
 
154
*/
155
 
156
/* Minimal number of iterations of a loop that should be executed in each
157
   thread.  */
158
#define MIN_PER_THREAD 100
159
 
160
/* Element of the hashtable, representing a
161
   reduction in the current loop.  */
162
struct reduction_info
163
{
164
  gimple reduc_stmt;            /* reduction statement.  */
165
  gimple reduc_phi;             /* The phi node defining the reduction.  */
166
  enum tree_code reduction_code;/* code for the reduction operation.  */
167
  unsigned reduc_version;       /* SSA_NAME_VERSION of original reduc_phi
168
                                   result.  */
169
  gimple keep_res;              /* The PHI_RESULT of this phi is the resulting value
170
                                   of the reduction variable when existing the loop. */
171
  tree initial_value;           /* The initial value of the reduction var before entering the loop.  */
172
  tree field;                   /*  the name of the field in the parloop data structure intended for reduction.  */
173
  tree init;                    /* reduction initialization value.  */
174
  gimple new_phi;               /* (helper field) Newly created phi node whose result
175
                                   will be passed to the atomic operation.  Represents
176
                                   the local result each thread computed for the reduction
177
                                   operation.  */
178
};
179
 
180
/* Equality and hash functions for hashtab code.  */
181
 
182
static int
183
reduction_info_eq (const void *aa, const void *bb)
184
{
185
  const struct reduction_info *a = (const struct reduction_info *) aa;
186
  const struct reduction_info *b = (const struct reduction_info *) bb;
187
 
188
  return (a->reduc_phi == b->reduc_phi);
189
}
190
 
191
static hashval_t
192
reduction_info_hash (const void *aa)
193
{
194
  const struct reduction_info *a = (const struct reduction_info *) aa;
195
 
196
  return a->reduc_version;
197
}
198
 
199
static struct reduction_info *
200
reduction_phi (htab_t reduction_list, gimple phi)
201
{
202
  struct reduction_info tmpred, *red;
203
 
204
  if (htab_elements (reduction_list) == 0 || phi == NULL)
205
    return NULL;
206
 
207
  tmpred.reduc_phi = phi;
208
  tmpred.reduc_version = gimple_uid (phi);
209
  red = (struct reduction_info *) htab_find (reduction_list, &tmpred);
210
 
211
  return red;
212
}
213
 
214
/* Element of hashtable of names to copy.  */
215
 
216
struct name_to_copy_elt
217
{
218
  unsigned version;     /* The version of the name to copy.  */
219
  tree new_name;        /* The new name used in the copy.  */
220
  tree field;           /* The field of the structure used to pass the
221
                           value.  */
222
};
223
 
224
/* Equality and hash functions for hashtab code.  */
225
 
226
static int
227
name_to_copy_elt_eq (const void *aa, const void *bb)
228
{
229
  const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
230
  const struct name_to_copy_elt *b = (const struct name_to_copy_elt *) bb;
231
 
232
  return a->version == b->version;
233
}
234
 
235
static hashval_t
236
name_to_copy_elt_hash (const void *aa)
237
{
238
  const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
239
 
240
  return (hashval_t) a->version;
241
}
242
 
243
/* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
244
   matrix.  Rather than use floats, we simply keep a single DENOMINATOR that
245
   represents the denominator for every element in the matrix.  */
246
typedef struct lambda_trans_matrix_s
247
{
248
  lambda_matrix matrix;
249
  int rowsize;
250
  int colsize;
251
  int denominator;
252
} *lambda_trans_matrix;
253
#define LTM_MATRIX(T) ((T)->matrix)
254
#define LTM_ROWSIZE(T) ((T)->rowsize)
255
#define LTM_COLSIZE(T) ((T)->colsize)
256
#define LTM_DENOMINATOR(T) ((T)->denominator)
257
 
258
/* Allocate a new transformation matrix.  */
259
 
260
static lambda_trans_matrix
261
lambda_trans_matrix_new (int colsize, int rowsize,
262
                         struct obstack * lambda_obstack)
263
{
264
  lambda_trans_matrix ret;
265
 
266
  ret = (lambda_trans_matrix)
267
    obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
268
  LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
269
  LTM_ROWSIZE (ret) = rowsize;
270
  LTM_COLSIZE (ret) = colsize;
271
  LTM_DENOMINATOR (ret) = 1;
272
  return ret;
273
}
274
 
275
/* Multiply a vector VEC by a matrix MAT.
276
   MAT is an M*N matrix, and VEC is a vector with length N.  The result
277
   is stored in DEST which must be a vector of length M.  */
278
 
279
static void
280
lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
281
                           lambda_vector vec, lambda_vector dest)
282
{
283
  int i, j;
284
 
285
  lambda_vector_clear (dest, m);
286
  for (i = 0; i < m; i++)
287
    for (j = 0; j < n; j++)
288
      dest[i] += matrix[i][j] * vec[j];
289
}
290
 
291
/* Return true if TRANS is a legal transformation matrix that respects
292
   the dependence vectors in DISTS and DIRS.  The conservative answer
293
   is false.
294
 
295
   "Wolfe proves that a unimodular transformation represented by the
296
   matrix T is legal when applied to a loop nest with a set of
297
   lexicographically non-negative distance vectors RDG if and only if
298
   for each vector d in RDG, (T.d >= 0) is lexicographically positive.
299
   i.e.: if and only if it transforms the lexicographically positive
300
   distance vectors to lexicographically positive vectors.  Note that
301
   a unimodular matrix must transform the zero vector (and only it) to
302
   the zero vector." S.Muchnick.  */
303
 
304
static bool
305
lambda_transform_legal_p (lambda_trans_matrix trans,
306
                          int nb_loops,
307
                          VEC (ddr_p, heap) *dependence_relations)
308
{
309
  unsigned int i, j;
310
  lambda_vector distres;
311
  struct data_dependence_relation *ddr;
312
 
313
  gcc_assert (LTM_COLSIZE (trans) == nb_loops
314
              && LTM_ROWSIZE (trans) == nb_loops);
315
 
316
  /* When there are no dependences, the transformation is correct.  */
317
  if (VEC_length (ddr_p, dependence_relations) == 0)
318
    return true;
319
 
320
  ddr = VEC_index (ddr_p, dependence_relations, 0);
321
  if (ddr == NULL)
322
    return true;
323
 
324
  /* When there is an unknown relation in the dependence_relations, we
325
     know that it is no worth looking at this loop nest: give up.  */
326
  if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
327
    return false;
328
 
329
  distres = lambda_vector_new (nb_loops);
330
 
331
  /* For each distance vector in the dependence graph.  */
332
  FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
333
    {
334
      /* Don't care about relations for which we know that there is no
335
         dependence, nor about read-read (aka. output-dependences):
336
         these data accesses can happen in any order.  */
337
      if (DDR_ARE_DEPENDENT (ddr) == chrec_known
338
          || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
339
        continue;
340
 
341
      /* Conservatively answer: "this transformation is not valid".  */
342
      if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
343
        return false;
344
 
345
      /* If the dependence could not be captured by a distance vector,
346
         conservatively answer that the transform is not valid.  */
347
      if (DDR_NUM_DIST_VECTS (ddr) == 0)
348
        return false;
349
 
350
      /* Compute trans.dist_vect */
351
      for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
352
        {
353
          lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
354
                                     DDR_DIST_VECT (ddr, j), distres);
355
 
356
          if (!lambda_vector_lexico_pos (distres, nb_loops))
357
            return false;
358
        }
359
    }
360
  return true;
361
}
362
 
363
/* Data dependency analysis. Returns true if the iterations of LOOP
364
   are independent on each other (that is, if we can execute them
365
   in parallel).  */
366
 
367
static bool
368
loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
369
{
370
  VEC (loop_p, heap) *loop_nest;
371
  VEC (ddr_p, heap) *dependence_relations;
372
  VEC (data_reference_p, heap) *datarefs;
373
  lambda_trans_matrix trans;
374
  bool ret = false;
375
 
376
  if (dump_file && (dump_flags & TDF_DETAILS))
377
  {
378
    fprintf (dump_file, "Considering loop %d\n", loop->num);
379
    if (!loop->inner)
380
      fprintf (dump_file, "loop is innermost\n");
381
    else
382
      fprintf (dump_file, "loop NOT innermost\n");
383
   }
384
 
385
  /* Check for problems with dependences.  If the loop can be reversed,
386
     the iterations are independent.  */
387
  datarefs = VEC_alloc (data_reference_p, heap, 10);
388
  dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
389
  loop_nest = VEC_alloc (loop_p, heap, 3);
390
  if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
391
                                           &dependence_relations))
392
    {
393
      if (dump_file && (dump_flags & TDF_DETAILS))
394
        fprintf (dump_file, "  FAILED: cannot analyze data dependencies\n");
395
      ret = false;
396
      goto end;
397
    }
398
  if (dump_file && (dump_flags & TDF_DETAILS))
399
    dump_data_dependence_relations (dump_file, dependence_relations);
400
 
401
  trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
402
  LTM_MATRIX (trans)[0][0] = -1;
403
 
404
  if (lambda_transform_legal_p (trans, 1, dependence_relations))
405
    {
406
      ret = true;
407
      if (dump_file && (dump_flags & TDF_DETAILS))
408
        fprintf (dump_file, "  SUCCESS: may be parallelized\n");
409
    }
410
  else if (dump_file && (dump_flags & TDF_DETAILS))
411
    fprintf (dump_file,
412
             "  FAILED: data dependencies exist across iterations\n");
413
 
414
 end:
415
  VEC_free (loop_p, heap, loop_nest);
416
  free_dependence_relations (dependence_relations);
417
  free_data_refs (datarefs);
418
 
419
  return ret;
420
}
421
 
422
/* Return true when LOOP contains basic blocks marked with the
423
   BB_IRREDUCIBLE_LOOP flag.  */
424
 
425
static inline bool
426
loop_has_blocks_with_irreducible_flag (struct loop *loop)
427
{
428
  unsigned i;
429
  basic_block *bbs = get_loop_body_in_dom_order (loop);
430
  bool res = true;
431
 
432
  for (i = 0; i < loop->num_nodes; i++)
433
    if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
434
      goto end;
435
 
436
  res = false;
437
 end:
438
  free (bbs);
439
  return res;
440
}
441
 
442
/* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
443
   The assignment statement is placed on edge ENTRY.  DECL_ADDRESS maps decls
444
   to their addresses that can be reused.  The address of OBJ is known to
445
   be invariant in the whole function.  Other needed statements are placed
446
   right before GSI.  */
447
 
448
static tree
449
take_address_of (tree obj, tree type, edge entry, htab_t decl_address,
450
                 gimple_stmt_iterator *gsi)
451
{
452
  int uid;
453
  void **dslot;
454
  struct int_tree_map ielt, *nielt;
455
  tree *var_p, name, bvar, addr;
456
  gimple stmt;
457
  gimple_seq stmts;
458
 
459
  /* Since the address of OBJ is invariant, the trees may be shared.
460
     Avoid rewriting unrelated parts of the code.  */
461
  obj = unshare_expr (obj);
462
  for (var_p = &obj;
463
       handled_component_p (*var_p);
464
       var_p = &TREE_OPERAND (*var_p, 0))
465
    continue;
466
 
467
  /* Canonicalize the access to base on a MEM_REF.  */
468
  if (DECL_P (*var_p))
469
    *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
470
 
471
  /* Assign a canonical SSA name to the address of the base decl used
472
     in the address and share it for all accesses and addresses based
473
     on it.  */
474
  uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
475
  ielt.uid = uid;
476
  dslot = htab_find_slot_with_hash (decl_address, &ielt, uid, INSERT);
477
  if (!*dslot)
478
    {
479
      if (gsi == NULL)
480
        return NULL;
481
      addr = TREE_OPERAND (*var_p, 0);
482
      bvar = create_tmp_var (TREE_TYPE (addr),
483
                             get_name (TREE_OPERAND
484
                                         (TREE_OPERAND (*var_p, 0), 0)));
485
      add_referenced_var (bvar);
486
      stmt = gimple_build_assign (bvar, addr);
487
      name = make_ssa_name (bvar, stmt);
488
      gimple_assign_set_lhs (stmt, name);
489
      gsi_insert_on_edge_immediate (entry, stmt);
490
 
491
      nielt = XNEW (struct int_tree_map);
492
      nielt->uid = uid;
493
      nielt->to = name;
494
      *dslot = nielt;
495
    }
496
  else
497
    name = ((struct int_tree_map *) *dslot)->to;
498
 
499
  /* Express the address in terms of the canonical SSA name.  */
500
  TREE_OPERAND (*var_p, 0) = name;
501
  if (gsi == NULL)
502
    return build_fold_addr_expr_with_type (obj, type);
503
 
504
  name = force_gimple_operand (build_addr (obj, current_function_decl),
505
                               &stmts, true, NULL_TREE);
506
  if (!gimple_seq_empty_p (stmts))
507
    gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
508
 
509
  if (!useless_type_conversion_p (type, TREE_TYPE (name)))
510
    {
511
      name = force_gimple_operand (fold_convert (type, name), &stmts, true,
512
                                   NULL_TREE);
513
      if (!gimple_seq_empty_p (stmts))
514
        gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
515
    }
516
 
517
  return name;
518
}
519
 
520
/* Callback for htab_traverse.  Create the initialization statement
521
   for reduction described in SLOT, and place it at the preheader of
522
   the loop described in DATA.  */
523
 
524
static int
525
initialize_reductions (void **slot, void *data)
526
{
527
  tree init, c;
528
  tree bvar, type, arg;
529
  edge e;
530
 
531
  struct reduction_info *const reduc = (struct reduction_info *) *slot;
532
  struct loop *loop = (struct loop *) data;
533
 
534
  /* Create initialization in preheader:
535
     reduction_variable = initialization value of reduction.  */
536
 
537
  /* In the phi node at the header, replace the argument coming
538
     from the preheader with the reduction initialization value.  */
539
 
540
  /* Create a new variable to initialize the reduction.  */
541
  type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
542
  bvar = create_tmp_var (type, "reduction");
543
  add_referenced_var (bvar);
544
 
545
  c = build_omp_clause (gimple_location (reduc->reduc_stmt),
546
                        OMP_CLAUSE_REDUCTION);
547
  OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
548
  OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
549
 
550
  init = omp_reduction_init (c, TREE_TYPE (bvar));
551
  reduc->init = init;
552
 
553
  /* Replace the argument representing the initialization value
554
     with the initialization value for the reduction (neutral
555
     element for the particular operation, e.g. 0 for PLUS_EXPR,
556
     1 for MULT_EXPR, etc).
557
     Keep the old value in a new variable "reduction_initial",
558
     that will be taken in consideration after the parallel
559
     computing is done.  */
560
 
561
  e = loop_preheader_edge (loop);
562
  arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
563
  /* Create new variable to hold the initial value.  */
564
 
565
  SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
566
           (reduc->reduc_phi, loop_preheader_edge (loop)), init);
567
  reduc->initial_value = arg;
568
  return 1;
569
}
570
 
571
struct elv_data
572
{
573
  struct walk_stmt_info info;
574
  edge entry;
575
  htab_t decl_address;
576
  gimple_stmt_iterator *gsi;
577
  bool changed;
578
  bool reset;
579
};
580
 
581
/* Eliminates references to local variables in *TP out of the single
582
   entry single exit region starting at DTA->ENTRY.
583
   DECL_ADDRESS contains addresses of the references that had their
584
   address taken already.  If the expression is changed, CHANGED is
585
   set to true.  Callback for walk_tree.  */
586
 
587
static tree
588
eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
589
{
590
  struct elv_data *const dta = (struct elv_data *) data;
591
  tree t = *tp, var, addr, addr_type, type, obj;
592
 
593
  if (DECL_P (t))
594
    {
595
      *walk_subtrees = 0;
596
 
597
      if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
598
        return NULL_TREE;
599
 
600
      type = TREE_TYPE (t);
601
      addr_type = build_pointer_type (type);
602
      addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
603
                              dta->gsi);
604
      if (dta->gsi == NULL && addr == NULL_TREE)
605
        {
606
          dta->reset = true;
607
          return NULL_TREE;
608
        }
609
 
610
      *tp = build_simple_mem_ref (addr);
611
 
612
      dta->changed = true;
613
      return NULL_TREE;
614
    }
615
 
616
  if (TREE_CODE (t) == ADDR_EXPR)
617
    {
618
      /* ADDR_EXPR may appear in two contexts:
619
         -- as a gimple operand, when the address taken is a function invariant
620
         -- as gimple rhs, when the resulting address in not a function
621
            invariant
622
         We do not need to do anything special in the latter case (the base of
623
         the memory reference whose address is taken may be replaced in the
624
         DECL_P case).  The former case is more complicated, as we need to
625
         ensure that the new address is still a gimple operand.  Thus, it
626
         is not sufficient to replace just the base of the memory reference --
627
         we need to move the whole computation of the address out of the
628
         loop.  */
629
      if (!is_gimple_val (t))
630
        return NULL_TREE;
631
 
632
      *walk_subtrees = 0;
633
      obj = TREE_OPERAND (t, 0);
634
      var = get_base_address (obj);
635
      if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
636
        return NULL_TREE;
637
 
638
      addr_type = TREE_TYPE (t);
639
      addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
640
                              dta->gsi);
641
      if (dta->gsi == NULL && addr == NULL_TREE)
642
        {
643
          dta->reset = true;
644
          return NULL_TREE;
645
        }
646
      *tp = addr;
647
 
648
      dta->changed = true;
649
      return NULL_TREE;
650
    }
651
 
652
  if (!EXPR_P (t))
653
    *walk_subtrees = 0;
654
 
655
  return NULL_TREE;
656
}
657
 
658
/* Moves the references to local variables in STMT at *GSI out of the single
659
   entry single exit region starting at ENTRY.  DECL_ADDRESS contains
660
   addresses of the references that had their address taken
661
   already.  */
662
 
663
static void
664
eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
665
                                htab_t decl_address)
666
{
667
  struct elv_data dta;
668
  gimple stmt = gsi_stmt (*gsi);
669
 
670
  memset (&dta.info, '\0', sizeof (dta.info));
671
  dta.entry = entry;
672
  dta.decl_address = decl_address;
673
  dta.changed = false;
674
  dta.reset = false;
675
 
676
  if (gimple_debug_bind_p (stmt))
677
    {
678
      dta.gsi = NULL;
679
      walk_tree (gimple_debug_bind_get_value_ptr (stmt),
680
                 eliminate_local_variables_1, &dta.info, NULL);
681
      if (dta.reset)
682
        {
683
          gimple_debug_bind_reset_value (stmt);
684
          dta.changed = true;
685
        }
686
    }
687
  else
688
    {
689
      dta.gsi = gsi;
690
      walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
691
    }
692
 
693
  if (dta.changed)
694
    update_stmt (stmt);
695
}
696
 
697
/* Eliminates the references to local variables from the single entry
698
   single exit region between the ENTRY and EXIT edges.
699
 
700
   This includes:
701
   1) Taking address of a local variable -- these are moved out of the
702
   region (and temporary variable is created to hold the address if
703
   necessary).
704
 
705
   2) Dereferencing a local variable -- these are replaced with indirect
706
   references.  */
707
 
708
static void
709
eliminate_local_variables (edge entry, edge exit)
710
{
711
  basic_block bb;
712
  VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
713
  unsigned i;
714
  gimple_stmt_iterator gsi;
715
  bool has_debug_stmt = false;
716
  htab_t decl_address = htab_create (10, int_tree_map_hash, int_tree_map_eq,
717
                                     free);
718
  basic_block entry_bb = entry->src;
719
  basic_block exit_bb = exit->dest;
720
 
721
  gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
722
 
723
  FOR_EACH_VEC_ELT (basic_block, body, i, bb)
724
    if (bb != entry_bb && bb != exit_bb)
725
      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
726
        if (is_gimple_debug (gsi_stmt (gsi)))
727
          {
728
            if (gimple_debug_bind_p (gsi_stmt (gsi)))
729
              has_debug_stmt = true;
730
          }
731
        else
732
          eliminate_local_variables_stmt (entry, &gsi, decl_address);
733
 
734
  if (has_debug_stmt)
735
    FOR_EACH_VEC_ELT (basic_block, body, i, bb)
736
      if (bb != entry_bb && bb != exit_bb)
737
        for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
738
          if (gimple_debug_bind_p (gsi_stmt (gsi)))
739
            eliminate_local_variables_stmt (entry, &gsi, decl_address);
740
 
741
  htab_delete (decl_address);
742
  VEC_free (basic_block, heap, body);
743
}
744
 
745
/* Returns true if expression EXPR is not defined between ENTRY and
746
   EXIT, i.e. if all its operands are defined outside of the region.  */
747
 
748
static bool
749
expr_invariant_in_region_p (edge entry, edge exit, tree expr)
750
{
751
  basic_block entry_bb = entry->src;
752
  basic_block exit_bb = exit->dest;
753
  basic_block def_bb;
754
 
755
  if (is_gimple_min_invariant (expr))
756
    return true;
757
 
758
  if (TREE_CODE (expr) == SSA_NAME)
759
    {
760
      def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
761
      if (def_bb
762
          && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
763
          && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
764
        return false;
765
 
766
      return true;
767
    }
768
 
769
  return false;
770
}
771
 
772
/* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
773
   The copies are stored to NAME_COPIES, if NAME was already duplicated,
774
   its duplicate stored in NAME_COPIES is returned.
775
 
776
   Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
777
   duplicated, storing the copies in DECL_COPIES.  */
778
 
779
static tree
780
separate_decls_in_region_name (tree name,
781
                               htab_t name_copies, htab_t decl_copies,
782
                               bool copy_name_p)
783
{
784
  tree copy, var, var_copy;
785
  unsigned idx, uid, nuid;
786
  struct int_tree_map ielt, *nielt;
787
  struct name_to_copy_elt elt, *nelt;
788
  void **slot, **dslot;
789
 
790
  if (TREE_CODE (name) != SSA_NAME)
791
    return name;
792
 
793
  idx = SSA_NAME_VERSION (name);
794
  elt.version = idx;
795
  slot = htab_find_slot_with_hash (name_copies, &elt, idx,
796
                                   copy_name_p ? INSERT : NO_INSERT);
797
  if (slot && *slot)
798
    return ((struct name_to_copy_elt *) *slot)->new_name;
799
 
800
  var = SSA_NAME_VAR (name);
801
  uid = DECL_UID (var);
802
  ielt.uid = uid;
803
  dslot = htab_find_slot_with_hash (decl_copies, &ielt, uid, INSERT);
804
  if (!*dslot)
805
    {
806
      var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
807
      DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
808
      add_referenced_var (var_copy);
809
      nielt = XNEW (struct int_tree_map);
810
      nielt->uid = uid;
811
      nielt->to = var_copy;
812
      *dslot = nielt;
813
 
814
      /* Ensure that when we meet this decl next time, we won't duplicate
815
         it again.  */
816
      nuid = DECL_UID (var_copy);
817
      ielt.uid = nuid;
818
      dslot = htab_find_slot_with_hash (decl_copies, &ielt, nuid, INSERT);
819
      gcc_assert (!*dslot);
820
      nielt = XNEW (struct int_tree_map);
821
      nielt->uid = nuid;
822
      nielt->to = var_copy;
823
      *dslot = nielt;
824
    }
825
  else
826
    var_copy = ((struct int_tree_map *) *dslot)->to;
827
 
828
  if (copy_name_p)
829
    {
830
      copy = duplicate_ssa_name (name, NULL);
831
      nelt = XNEW (struct name_to_copy_elt);
832
      nelt->version = idx;
833
      nelt->new_name = copy;
834
      nelt->field = NULL_TREE;
835
      *slot = nelt;
836
    }
837
  else
838
    {
839
      gcc_assert (!slot);
840
      copy = name;
841
    }
842
 
843
  SSA_NAME_VAR (copy) = var_copy;
844
  return copy;
845
}
846
 
847
/* Finds the ssa names used in STMT that are defined outside the
848
   region between ENTRY and EXIT and replaces such ssa names with
849
   their duplicates.  The duplicates are stored to NAME_COPIES.  Base
850
   decls of all ssa names used in STMT (including those defined in
851
   LOOP) are replaced with the new temporary variables; the
852
   replacement decls are stored in DECL_COPIES.  */
853
 
854
static void
855
separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
856
                               htab_t name_copies, htab_t decl_copies)
857
{
858
  use_operand_p use;
859
  def_operand_p def;
860
  ssa_op_iter oi;
861
  tree name, copy;
862
  bool copy_name_p;
863
 
864
  mark_virtual_ops_for_renaming (stmt);
865
 
866
  FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
867
  {
868
    name = DEF_FROM_PTR (def);
869
    gcc_assert (TREE_CODE (name) == SSA_NAME);
870
    copy = separate_decls_in_region_name (name, name_copies, decl_copies,
871
                                          false);
872
    gcc_assert (copy == name);
873
  }
874
 
875
  FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
876
  {
877
    name = USE_FROM_PTR (use);
878
    if (TREE_CODE (name) != SSA_NAME)
879
      continue;
880
 
881
    copy_name_p = expr_invariant_in_region_p (entry, exit, name);
882
    copy = separate_decls_in_region_name (name, name_copies, decl_copies,
883
                                          copy_name_p);
884
    SET_USE (use, copy);
885
  }
886
}
887
 
888
/* Finds the ssa names used in STMT that are defined outside the
889
   region between ENTRY and EXIT and replaces such ssa names with
890
   their duplicates.  The duplicates are stored to NAME_COPIES.  Base
891
   decls of all ssa names used in STMT (including those defined in
892
   LOOP) are replaced with the new temporary variables; the
893
   replacement decls are stored in DECL_COPIES.  */
894
 
895
static bool
896
separate_decls_in_region_debug (gimple stmt, htab_t name_copies,
897
                                htab_t decl_copies)
898
{
899
  use_operand_p use;
900
  ssa_op_iter oi;
901
  tree var, name;
902
  struct int_tree_map ielt;
903
  struct name_to_copy_elt elt;
904
  void **slot, **dslot;
905
 
906
  if (gimple_debug_bind_p (stmt))
907
    var = gimple_debug_bind_get_var (stmt);
908
  else if (gimple_debug_source_bind_p (stmt))
909
    var = gimple_debug_source_bind_get_var (stmt);
910
  else
911
    return true;
912
  if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
913
    return true;
914
  gcc_assert (DECL_P (var) && SSA_VAR_P (var));
915
  ielt.uid = DECL_UID (var);
916
  dslot = htab_find_slot_with_hash (decl_copies, &ielt, ielt.uid, NO_INSERT);
917
  if (!dslot)
918
    return true;
919
  if (gimple_debug_bind_p (stmt))
920
    gimple_debug_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
921
  else if (gimple_debug_source_bind_p (stmt))
922
    gimple_debug_source_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
923
 
924
  FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
925
  {
926
    name = USE_FROM_PTR (use);
927
    if (TREE_CODE (name) != SSA_NAME)
928
      continue;
929
 
930
    elt.version = SSA_NAME_VERSION (name);
931
    slot = htab_find_slot_with_hash (name_copies, &elt, elt.version, NO_INSERT);
932
    if (!slot)
933
      {
934
        gimple_debug_bind_reset_value (stmt);
935
        update_stmt (stmt);
936
        break;
937
      }
938
 
939
    SET_USE (use, ((struct name_to_copy_elt *) *slot)->new_name);
940
  }
941
 
942
  return false;
943
}
944
 
945
/* Callback for htab_traverse.  Adds a field corresponding to the reduction
946
   specified in SLOT. The type is passed in DATA.  */
947
 
948
static int
949
add_field_for_reduction (void **slot, void *data)
950
{
951
 
952
  struct reduction_info *const red = (struct reduction_info *) *slot;
953
  tree const type = (tree) data;
954
  tree var = SSA_NAME_VAR (gimple_assign_lhs (red->reduc_stmt));
955
  tree field = build_decl (gimple_location (red->reduc_stmt),
956
                           FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
957
 
958
  insert_field_into_struct (type, field);
959
 
960
  red->field = field;
961
 
962
  return 1;
963
}
964
 
965
/* Callback for htab_traverse.  Adds a field corresponding to a ssa name
966
   described in SLOT. The type is passed in DATA.  */
967
 
968
static int
969
add_field_for_name (void **slot, void *data)
970
{
971
  struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
972
  tree type = (tree) data;
973
  tree name = ssa_name (elt->version);
974
  tree var = SSA_NAME_VAR (name);
975
  tree field = build_decl (DECL_SOURCE_LOCATION (var),
976
                           FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
977
 
978
  insert_field_into_struct (type, field);
979
  elt->field = field;
980
 
981
  return 1;
982
}
983
 
984
/* Callback for htab_traverse.  A local result is the intermediate result
985
   computed by a single
986
   thread, or the initial value in case no iteration was executed.
987
   This function creates a phi node reflecting these values.
988
   The phi's result will be stored in NEW_PHI field of the
989
   reduction's data structure.  */
990
 
991
static int
992
create_phi_for_local_result (void **slot, void *data)
993
{
994
  struct reduction_info *const reduc = (struct reduction_info *) *slot;
995
  const struct loop *const loop = (const struct loop *) data;
996
  edge e;
997
  gimple new_phi;
998
  basic_block store_bb;
999
  tree local_res;
1000
  source_location locus;
1001
 
1002
  /* STORE_BB is the block where the phi
1003
     should be stored.  It is the destination of the loop exit.
1004
     (Find the fallthru edge from GIMPLE_OMP_CONTINUE).  */
1005
  store_bb = FALLTHRU_EDGE (loop->latch)->dest;
1006
 
1007
  /* STORE_BB has two predecessors.  One coming from  the loop
1008
     (the reduction's result is computed at the loop),
1009
     and another coming from a block preceding the loop,
1010
     when no iterations
1011
     are executed (the initial value should be taken).  */
1012
  if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
1013
    e = EDGE_PRED (store_bb, 1);
1014
  else
1015
    e = EDGE_PRED (store_bb, 0);
1016
  local_res
1017
    = make_ssa_name (SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt)),
1018
                     NULL);
1019
  locus = gimple_location (reduc->reduc_stmt);
1020
  new_phi = create_phi_node (local_res, store_bb);
1021
  SSA_NAME_DEF_STMT (local_res) = new_phi;
1022
  add_phi_arg (new_phi, reduc->init, e, locus);
1023
  add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
1024
               FALLTHRU_EDGE (loop->latch), locus);
1025
  reduc->new_phi = new_phi;
1026
 
1027
  return 1;
1028
}
1029
 
1030
struct clsn_data
1031
{
1032
  tree store;
1033
  tree load;
1034
 
1035
  basic_block store_bb;
1036
  basic_block load_bb;
1037
};
1038
 
1039
/* Callback for htab_traverse.  Create an atomic instruction for the
1040
   reduction described in SLOT.
1041
   DATA annotates the place in memory the atomic operation relates to,
1042
   and the basic block it needs to be generated in.  */
1043
 
1044
static int
1045
create_call_for_reduction_1 (void **slot, void *data)
1046
{
1047
  struct reduction_info *const reduc = (struct reduction_info *) *slot;
1048
  struct clsn_data *const clsn_data = (struct clsn_data *) data;
1049
  gimple_stmt_iterator gsi;
1050
  tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1051
  tree load_struct;
1052
  basic_block bb;
1053
  basic_block new_bb;
1054
  edge e;
1055
  tree t, addr, ref, x;
1056
  tree tmp_load, name;
1057
  gimple load;
1058
 
1059
  load_struct = build_simple_mem_ref (clsn_data->load);
1060
  t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
1061
 
1062
  addr = build_addr (t, current_function_decl);
1063
 
1064
  /* Create phi node.  */
1065
  bb = clsn_data->load_bb;
1066
 
1067
  e = split_block (bb, t);
1068
  new_bb = e->dest;
1069
 
1070
  tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)), NULL);
1071
  add_referenced_var (tmp_load);
1072
  tmp_load = make_ssa_name (tmp_load, NULL);
1073
  load = gimple_build_omp_atomic_load (tmp_load, addr);
1074
  SSA_NAME_DEF_STMT (tmp_load) = load;
1075
  gsi = gsi_start_bb (new_bb);
1076
  gsi_insert_after (&gsi, load, GSI_NEW_STMT);
1077
 
1078
  e = split_block (new_bb, load);
1079
  new_bb = e->dest;
1080
  gsi = gsi_start_bb (new_bb);
1081
  ref = tmp_load;
1082
  x = fold_build2 (reduc->reduction_code,
1083
                   TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1084
                   PHI_RESULT (reduc->new_phi));
1085
 
1086
  name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1087
                                   GSI_CONTINUE_LINKING);
1088
 
1089
  gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
1090
  return 1;
1091
}
1092
 
1093
/* Create the atomic operation at the join point of the threads.
1094
   REDUCTION_LIST describes the reductions in the LOOP.
1095
   LD_ST_DATA describes the shared data structure where
1096
   shared data is stored in and loaded from.  */
1097
static void
1098
create_call_for_reduction (struct loop *loop, htab_t reduction_list,
1099
                           struct clsn_data *ld_st_data)
1100
{
1101
  htab_traverse (reduction_list, create_phi_for_local_result, loop);
1102
  /* Find the fallthru edge from GIMPLE_OMP_CONTINUE.  */
1103
  ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
1104
  htab_traverse (reduction_list, create_call_for_reduction_1, ld_st_data);
1105
}
1106
 
1107
/* Callback for htab_traverse.  Loads the final reduction value at the
1108
   join point of all threads, and inserts it in the right place.  */
1109
 
1110
static int
1111
create_loads_for_reductions (void **slot, void *data)
1112
{
1113
  struct reduction_info *const red = (struct reduction_info *) *slot;
1114
  struct clsn_data *const clsn_data = (struct clsn_data *) data;
1115
  gimple stmt;
1116
  gimple_stmt_iterator gsi;
1117
  tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1118
  tree load_struct;
1119
  tree name;
1120
  tree x;
1121
 
1122
  gsi = gsi_after_labels (clsn_data->load_bb);
1123
  load_struct = build_simple_mem_ref (clsn_data->load);
1124
  load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1125
                        NULL_TREE);
1126
 
1127
  x = load_struct;
1128
  name = PHI_RESULT (red->keep_res);
1129
  stmt = gimple_build_assign (name, x);
1130
  SSA_NAME_DEF_STMT (name) = stmt;
1131
 
1132
  gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1133
 
1134
  for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1135
       !gsi_end_p (gsi); gsi_next (&gsi))
1136
    if (gsi_stmt (gsi) == red->keep_res)
1137
      {
1138
        remove_phi_node (&gsi, false);
1139
        return 1;
1140
      }
1141
  gcc_unreachable ();
1142
}
1143
 
1144
/* Load the reduction result that was stored in LD_ST_DATA.
1145
   REDUCTION_LIST describes the list of reductions that the
1146
   loads should be generated for.  */
1147
static void
1148
create_final_loads_for_reduction (htab_t reduction_list,
1149
                                  struct clsn_data *ld_st_data)
1150
{
1151
  gimple_stmt_iterator gsi;
1152
  tree t;
1153
  gimple stmt;
1154
 
1155
  gsi = gsi_after_labels (ld_st_data->load_bb);
1156
  t = build_fold_addr_expr (ld_st_data->store);
1157
  stmt = gimple_build_assign (ld_st_data->load, t);
1158
 
1159
  gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1160
  SSA_NAME_DEF_STMT (ld_st_data->load) = stmt;
1161
 
1162
  htab_traverse (reduction_list, create_loads_for_reductions, ld_st_data);
1163
 
1164
}
1165
 
1166
/* Callback for htab_traverse.  Store the neutral value for the
1167
  particular reduction's operation, e.g. 0 for PLUS_EXPR,
1168
  1 for MULT_EXPR, etc. into the reduction field.
1169
  The reduction is specified in SLOT. The store information is
1170
  passed in DATA.  */
1171
 
1172
static int
1173
create_stores_for_reduction (void **slot, void *data)
1174
{
1175
  struct reduction_info *const red = (struct reduction_info *) *slot;
1176
  struct clsn_data *const clsn_data = (struct clsn_data *) data;
1177
  tree t;
1178
  gimple stmt;
1179
  gimple_stmt_iterator gsi;
1180
  tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1181
 
1182
  gsi = gsi_last_bb (clsn_data->store_bb);
1183
  t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1184
  stmt = gimple_build_assign (t, red->initial_value);
1185
  mark_virtual_ops_for_renaming (stmt);
1186
  gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1187
 
1188
  return 1;
1189
}
1190
 
1191
/* Callback for htab_traverse.  Creates loads to a field of LOAD in LOAD_BB and
1192
   store to a field of STORE in STORE_BB for the ssa name and its duplicate
1193
   specified in SLOT.  */
1194
 
1195
static int
1196
create_loads_and_stores_for_name (void **slot, void *data)
1197
{
1198
  struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
1199
  struct clsn_data *const clsn_data = (struct clsn_data *) data;
1200
  tree t;
1201
  gimple stmt;
1202
  gimple_stmt_iterator gsi;
1203
  tree type = TREE_TYPE (elt->new_name);
1204
  tree load_struct;
1205
 
1206
  gsi = gsi_last_bb (clsn_data->store_bb);
1207
  t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1208
  stmt = gimple_build_assign (t, ssa_name (elt->version));
1209
  mark_virtual_ops_for_renaming (stmt);
1210
  gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1211
 
1212
  gsi = gsi_last_bb (clsn_data->load_bb);
1213
  load_struct = build_simple_mem_ref (clsn_data->load);
1214
  t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1215
  stmt = gimple_build_assign (elt->new_name, t);
1216
  SSA_NAME_DEF_STMT (elt->new_name) = stmt;
1217
  gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1218
 
1219
  return 1;
1220
}
1221
 
1222
/* Moves all the variables used in LOOP and defined outside of it (including
1223
   the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1224
   name) to a structure created for this purpose.  The code
1225
 
1226
   while (1)
1227
     {
1228
       use (a);
1229
       use (b);
1230
     }
1231
 
1232
   is transformed this way:
1233
 
1234
   bb0:
1235
   old.a = a;
1236
   old.b = b;
1237
 
1238
   bb1:
1239
   a' = new->a;
1240
   b' = new->b;
1241
   while (1)
1242
     {
1243
       use (a');
1244
       use (b');
1245
     }
1246
 
1247
   `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT.  The
1248
   pointer `new' is intentionally not initialized (the loop will be split to a
1249
   separate function later, and `new' will be initialized from its arguments).
1250
   LD_ST_DATA holds information about the shared data structure used to pass
1251
   information among the threads.  It is initialized here, and
1252
   gen_parallel_loop will pass it to create_call_for_reduction that
1253
   needs this information.  REDUCTION_LIST describes the reductions
1254
   in LOOP.  */
1255
 
1256
static void
1257
separate_decls_in_region (edge entry, edge exit, htab_t reduction_list,
1258
                          tree *arg_struct, tree *new_arg_struct,
1259
                          struct clsn_data *ld_st_data)
1260
 
1261
{
1262
  basic_block bb1 = split_edge (entry);
1263
  basic_block bb0 = single_pred (bb1);
1264
  htab_t name_copies = htab_create (10, name_to_copy_elt_hash,
1265
                                    name_to_copy_elt_eq, free);
1266
  htab_t decl_copies = htab_create (10, int_tree_map_hash, int_tree_map_eq,
1267
                                    free);
1268
  unsigned i;
1269
  tree type, type_name, nvar;
1270
  gimple_stmt_iterator gsi;
1271
  struct clsn_data clsn_data;
1272
  VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
1273
  basic_block bb;
1274
  basic_block entry_bb = bb1;
1275
  basic_block exit_bb = exit->dest;
1276
  bool has_debug_stmt = false;
1277
 
1278
  entry = single_succ_edge (entry_bb);
1279
  gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1280
 
1281
  FOR_EACH_VEC_ELT (basic_block, body, i, bb)
1282
    {
1283
      if (bb != entry_bb && bb != exit_bb)
1284
        {
1285
          for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1286
            separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1287
                                           name_copies, decl_copies);
1288
 
1289
          for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1290
            {
1291
              gimple stmt = gsi_stmt (gsi);
1292
 
1293
              if (is_gimple_debug (stmt))
1294
                has_debug_stmt = true;
1295
              else
1296
                separate_decls_in_region_stmt (entry, exit, stmt,
1297
                                               name_copies, decl_copies);
1298
            }
1299
        }
1300
    }
1301
 
1302
  /* Now process debug bind stmts.  We must not create decls while
1303
     processing debug stmts, so we defer their processing so as to
1304
     make sure we will have debug info for as many variables as
1305
     possible (all of those that were dealt with in the loop above),
1306
     and discard those for which we know there's nothing we can
1307
     do.  */
1308
  if (has_debug_stmt)
1309
    FOR_EACH_VEC_ELT (basic_block, body, i, bb)
1310
      if (bb != entry_bb && bb != exit_bb)
1311
        {
1312
          for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1313
            {
1314
              gimple stmt = gsi_stmt (gsi);
1315
 
1316
              if (is_gimple_debug (stmt))
1317
                {
1318
                  if (separate_decls_in_region_debug (stmt, name_copies,
1319
                                                      decl_copies))
1320
                    {
1321
                      gsi_remove (&gsi, true);
1322
                      continue;
1323
                    }
1324
                }
1325
 
1326
              gsi_next (&gsi);
1327
            }
1328
        }
1329
 
1330
  VEC_free (basic_block, heap, body);
1331
 
1332
  if (htab_elements (name_copies) == 0 && htab_elements (reduction_list) == 0)
1333
    {
1334
      /* It may happen that there is nothing to copy (if there are only
1335
         loop carried and external variables in the loop).  */
1336
      *arg_struct = NULL;
1337
      *new_arg_struct = NULL;
1338
    }
1339
  else
1340
    {
1341
      /* Create the type for the structure to store the ssa names to.  */
1342
      type = lang_hooks.types.make_type (RECORD_TYPE);
1343
      type_name = build_decl (UNKNOWN_LOCATION,
1344
                              TYPE_DECL, create_tmp_var_name (".paral_data"),
1345
                              type);
1346
      TYPE_NAME (type) = type_name;
1347
 
1348
      htab_traverse (name_copies, add_field_for_name, type);
1349
      if (reduction_list && htab_elements (reduction_list) > 0)
1350
        {
1351
          /* Create the fields for reductions.  */
1352
          htab_traverse (reduction_list, add_field_for_reduction,
1353
                         type);
1354
        }
1355
      layout_type (type);
1356
 
1357
      /* Create the loads and stores.  */
1358
      *arg_struct = create_tmp_var (type, ".paral_data_store");
1359
      add_referenced_var (*arg_struct);
1360
      nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1361
      add_referenced_var (nvar);
1362
      *new_arg_struct = make_ssa_name (nvar, NULL);
1363
 
1364
      ld_st_data->store = *arg_struct;
1365
      ld_st_data->load = *new_arg_struct;
1366
      ld_st_data->store_bb = bb0;
1367
      ld_st_data->load_bb = bb1;
1368
 
1369
      htab_traverse (name_copies, create_loads_and_stores_for_name,
1370
                     ld_st_data);
1371
 
1372
      /* Load the calculation from memory (after the join of the threads).  */
1373
 
1374
      if (reduction_list && htab_elements (reduction_list) > 0)
1375
        {
1376
          htab_traverse (reduction_list, create_stores_for_reduction,
1377
                        ld_st_data);
1378
          clsn_data.load = make_ssa_name (nvar, NULL);
1379
          clsn_data.load_bb = exit->dest;
1380
          clsn_data.store = ld_st_data->store;
1381
          create_final_loads_for_reduction (reduction_list, &clsn_data);
1382
        }
1383
    }
1384
 
1385
  htab_delete (decl_copies);
1386
  htab_delete (name_copies);
1387
}
1388
 
1389
/* Bitmap containing uids of functions created by parallelization.  We cannot
1390
   allocate it from the default obstack, as it must live across compilation
1391
   of several functions; we make it gc allocated instead.  */
1392
 
1393
static GTY(()) bitmap parallelized_functions;
1394
 
1395
/* Returns true if FN was created by create_loop_fn.  */
1396
 
1397
static bool
1398
parallelized_function_p (tree fn)
1399
{
1400
  if (!parallelized_functions || !DECL_ARTIFICIAL (fn))
1401
    return false;
1402
 
1403
  return bitmap_bit_p (parallelized_functions, DECL_UID (fn));
1404
}
1405
 
1406
/* Creates and returns an empty function that will receive the body of
1407
   a parallelized loop.  */
1408
 
1409
static tree
1410
create_loop_fn (location_t loc)
1411
{
1412
  char buf[100];
1413
  char *tname;
1414
  tree decl, type, name, t;
1415
  struct function *act_cfun = cfun;
1416
  static unsigned loopfn_num;
1417
 
1418
  snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1419
  ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1420
  clean_symbol_name (tname);
1421
  name = get_identifier (tname);
1422
  type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1423
 
1424
  decl = build_decl (loc, FUNCTION_DECL, name, type);
1425
  if (!parallelized_functions)
1426
    parallelized_functions = BITMAP_GGC_ALLOC ();
1427
  bitmap_set_bit (parallelized_functions, DECL_UID (decl));
1428
 
1429
  TREE_STATIC (decl) = 1;
1430
  TREE_USED (decl) = 1;
1431
  DECL_ARTIFICIAL (decl) = 1;
1432
  DECL_IGNORED_P (decl) = 0;
1433
  TREE_PUBLIC (decl) = 0;
1434
  DECL_UNINLINABLE (decl) = 1;
1435
  DECL_EXTERNAL (decl) = 0;
1436
  DECL_CONTEXT (decl) = NULL_TREE;
1437
  DECL_INITIAL (decl) = make_node (BLOCK);
1438
 
1439
  t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
1440
  DECL_ARTIFICIAL (t) = 1;
1441
  DECL_IGNORED_P (t) = 1;
1442
  DECL_RESULT (decl) = t;
1443
 
1444
  t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
1445
                  ptr_type_node);
1446
  DECL_ARTIFICIAL (t) = 1;
1447
  DECL_ARG_TYPE (t) = ptr_type_node;
1448
  DECL_CONTEXT (t) = decl;
1449
  TREE_USED (t) = 1;
1450
  DECL_ARGUMENTS (decl) = t;
1451
 
1452
  allocate_struct_function (decl, false);
1453
 
1454
  /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1455
     it.  */
1456
  set_cfun (act_cfun);
1457
 
1458
  return decl;
1459
}
1460
 
1461
/* Moves the exit condition of LOOP to the beginning of its header, and
1462
   duplicates the part of the last iteration that gets disabled to the
1463
   exit of the loop.  NIT is the number of iterations of the loop
1464
   (used to initialize the variables in the duplicated part).
1465
 
1466
   TODO: the common case is that latch of the loop is empty and immediately
1467
   follows the loop exit.  In this case, it would be better not to copy the
1468
   body of the loop, but only move the entry of the loop directly before the
1469
   exit check and increase the number of iterations of the loop by one.
1470
   This may need some additional preconditioning in case NIT = ~0.
1471
   REDUCTION_LIST describes the reductions in LOOP.  */
1472
 
1473
static void
1474
transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit)
1475
{
1476
  basic_block *bbs, *nbbs, ex_bb, orig_header;
1477
  unsigned n;
1478
  bool ok;
1479
  edge exit = single_dom_exit (loop), hpred;
1480
  tree control, control_name, res, t;
1481
  gimple phi, nphi, cond_stmt, stmt, cond_nit;
1482
  gimple_stmt_iterator gsi;
1483
  tree nit_1;
1484
  edge exit_1;
1485
  tree new_rhs;
1486
 
1487
  split_block_after_labels (loop->header);
1488
  orig_header = single_succ (loop->header);
1489
  hpred = single_succ_edge (loop->header);
1490
 
1491
  cond_stmt = last_stmt (exit->src);
1492
  control = gimple_cond_lhs (cond_stmt);
1493
  gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
1494
 
1495
  /* Make sure that we have phi nodes on exit for all loop header phis
1496
     (create_parallel_loop requires that).  */
1497
  for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1498
    {
1499
      phi = gsi_stmt (gsi);
1500
      res = PHI_RESULT (phi);
1501
      t = make_ssa_name (SSA_NAME_VAR (res), phi);
1502
      SET_PHI_RESULT (phi, t);
1503
      nphi = create_phi_node (res, orig_header);
1504
      SSA_NAME_DEF_STMT (res) = nphi;
1505
      add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
1506
 
1507
      if (res == control)
1508
        {
1509
          gimple_cond_set_lhs (cond_stmt, t);
1510
          update_stmt (cond_stmt);
1511
          control = t;
1512
        }
1513
    }
1514
 
1515
 /* Setting the condition towards peeling the last iteration:
1516
    If the block consisting of the exit condition has the latch as
1517
    successor, then the body of the loop is executed before
1518
    the exit condition is tested.  In such case, moving the
1519
    condition to the entry, causes that the loop will iterate
1520
    one less iteration (which is the wanted outcome, since we
1521
    peel out the last iteration).  If the body is executed after
1522
    the condition, moving the condition to the entry requires
1523
    decrementing one iteration.  */
1524
  exit_1 = EDGE_SUCC (exit->src, EDGE_SUCC (exit->src, 0) == exit);
1525
  if (exit_1->dest == loop->latch)
1526
    new_rhs = gimple_cond_rhs (cond_stmt);
1527
  else
1528
  {
1529
    new_rhs = fold_build2 (MINUS_EXPR, TREE_TYPE (gimple_cond_rhs (cond_stmt)),
1530
                           gimple_cond_rhs (cond_stmt),
1531
                           build_int_cst (TREE_TYPE (gimple_cond_rhs (cond_stmt)), 1));
1532
    if (TREE_CODE (gimple_cond_rhs (cond_stmt)) == SSA_NAME)
1533
      {
1534
        basic_block preheader;
1535
        gimple_stmt_iterator gsi1;
1536
 
1537
        preheader = loop_preheader_edge(loop)->src;
1538
        gsi1 = gsi_after_labels (preheader);
1539
        new_rhs = force_gimple_operand_gsi (&gsi1, new_rhs, true,
1540
                                            NULL_TREE,false,GSI_CONTINUE_LINKING);
1541
      }
1542
  }
1543
  gimple_cond_set_rhs (cond_stmt, unshare_expr (new_rhs));
1544
  gimple_cond_set_lhs (cond_stmt, unshare_expr (gimple_cond_lhs (cond_stmt)));
1545
 
1546
  bbs = get_loop_body_in_dom_order (loop);
1547
 
1548
  for (n = 0; bbs[n] != loop->latch; n++)
1549
    continue;
1550
  nbbs = XNEWVEC (basic_block, n);
1551
  ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1552
                                   bbs + 1, n, nbbs);
1553
  gcc_assert (ok);
1554
  free (bbs);
1555
  ex_bb = nbbs[0];
1556
  free (nbbs);
1557
 
1558
  /* Other than reductions, the only gimple reg that should be copied
1559
     out of the loop is the control variable.  */
1560
 
1561
  control_name = NULL_TREE;
1562
  for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); )
1563
    {
1564
      phi = gsi_stmt (gsi);
1565
      res = PHI_RESULT (phi);
1566
      if (!is_gimple_reg (res))
1567
        {
1568
          gsi_next (&gsi);
1569
          continue;
1570
        }
1571
 
1572
      /* Check if it is a part of reduction.  If it is,
1573
         keep the phi at the reduction's keep_res field.  The
1574
         PHI_RESULT of this phi is the resulting value of the reduction
1575
         variable when exiting the loop.  */
1576
 
1577
      exit = single_dom_exit (loop);
1578
 
1579
      if (htab_elements (reduction_list) > 0)
1580
        {
1581
          struct reduction_info *red;
1582
 
1583
          tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1584
          red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1585
          if (red)
1586
            {
1587
              red->keep_res = phi;
1588
              gsi_next (&gsi);
1589
              continue;
1590
            }
1591
        }
1592
      gcc_assert (control_name == NULL_TREE
1593
                  && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
1594
      control_name = res;
1595
      remove_phi_node (&gsi, false);
1596
    }
1597
  gcc_assert (control_name != NULL_TREE);
1598
 
1599
  /* Initialize the control variable to number of iterations
1600
     according to the rhs of the exit condition.  */
1601
  gsi = gsi_after_labels (ex_bb);
1602
  cond_nit = last_stmt (exit->src);
1603
  nit_1 =  gimple_cond_rhs (cond_nit);
1604
  nit_1 = force_gimple_operand_gsi (&gsi,
1605
                                  fold_convert (TREE_TYPE (control_name), nit_1),
1606
                                  false, NULL_TREE, false, GSI_SAME_STMT);
1607
  stmt = gimple_build_assign (control_name, nit_1);
1608
  gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1609
  SSA_NAME_DEF_STMT (control_name) = stmt;
1610
}
1611
 
1612
/* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1613
   LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1614
   NEW_DATA is the variable that should be initialized from the argument
1615
   of LOOP_FN.  N_THREADS is the requested number of threads.  Returns the
1616
   basic block containing GIMPLE_OMP_PARALLEL tree.  */
1617
 
1618
static basic_block
1619
create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
1620
                      tree new_data, unsigned n_threads, location_t loc)
1621
{
1622
  gimple_stmt_iterator gsi;
1623
  basic_block bb, paral_bb, for_bb, ex_bb;
1624
  tree t, param;
1625
  gimple stmt, for_stmt, phi, cond_stmt;
1626
  tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
1627
  edge exit, nexit, guard, end, e;
1628
 
1629
  /* Prepare the GIMPLE_OMP_PARALLEL statement.  */
1630
  bb = loop_preheader_edge (loop)->src;
1631
  paral_bb = single_pred (bb);
1632
  gsi = gsi_last_bb (paral_bb);
1633
 
1634
  t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
1635
  OMP_CLAUSE_NUM_THREADS_EXPR (t)
1636
    = build_int_cst (integer_type_node, n_threads);
1637
  stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
1638
  gimple_set_location (stmt, loc);
1639
 
1640
  gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1641
 
1642
  /* Initialize NEW_DATA.  */
1643
  if (data)
1644
    {
1645
      gsi = gsi_after_labels (bb);
1646
 
1647
      param = make_ssa_name (DECL_ARGUMENTS (loop_fn), NULL);
1648
      stmt = gimple_build_assign (param, build_fold_addr_expr (data));
1649
      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1650
      SSA_NAME_DEF_STMT (param) = stmt;
1651
 
1652
      stmt = gimple_build_assign (new_data,
1653
                                  fold_convert (TREE_TYPE (new_data), param));
1654
      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1655
      SSA_NAME_DEF_STMT (new_data) = stmt;
1656
    }
1657
 
1658
  /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL.  */
1659
  bb = split_loop_exit_edge (single_dom_exit (loop));
1660
  gsi = gsi_last_bb (bb);
1661
  stmt = gimple_build_omp_return (false);
1662
  gimple_set_location (stmt, loc);
1663
  gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1664
 
1665
  /* Extract data for GIMPLE_OMP_FOR.  */
1666
  gcc_assert (loop->header == single_dom_exit (loop)->src);
1667
  cond_stmt = last_stmt (loop->header);
1668
 
1669
  cvar = gimple_cond_lhs (cond_stmt);
1670
  cvar_base = SSA_NAME_VAR (cvar);
1671
  phi = SSA_NAME_DEF_STMT (cvar);
1672
  cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1673
  initvar = make_ssa_name (cvar_base, NULL);
1674
  SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
1675
           initvar);
1676
  cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1677
 
1678
  gsi = gsi_last_nondebug_bb (loop->latch);
1679
  gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
1680
  gsi_remove (&gsi, true);
1681
 
1682
  /* Prepare cfg.  */
1683
  for_bb = split_edge (loop_preheader_edge (loop));
1684
  ex_bb = split_loop_exit_edge (single_dom_exit (loop));
1685
  extract_true_false_edges_from_block (loop->header, &nexit, &exit);
1686
  gcc_assert (exit == single_dom_exit (loop));
1687
 
1688
  guard = make_edge (for_bb, ex_bb, 0);
1689
  single_succ_edge (loop->latch)->flags = 0;
1690
  end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
1691
  for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1692
    {
1693
      source_location locus;
1694
      tree def;
1695
      phi = gsi_stmt (gsi);
1696
      stmt = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit));
1697
 
1698
      def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
1699
      locus = gimple_phi_arg_location_from_edge (stmt,
1700
                                                 loop_preheader_edge (loop));
1701
      add_phi_arg (phi, def, guard, locus);
1702
 
1703
      def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
1704
      locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
1705
      add_phi_arg (phi, def, end, locus);
1706
    }
1707
  e = redirect_edge_and_branch (exit, nexit->dest);
1708
  PENDING_STMT (e) = NULL;
1709
 
1710
  /* Emit GIMPLE_OMP_FOR.  */
1711
  gimple_cond_set_lhs (cond_stmt, cvar_base);
1712
  type = TREE_TYPE (cvar);
1713
  t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
1714
  OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
1715
 
1716
  for_stmt = gimple_build_omp_for (NULL, t, 1, NULL);
1717
  gimple_set_location (for_stmt, loc);
1718
  gimple_omp_for_set_index (for_stmt, 0, initvar);
1719
  gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
1720
  gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
1721
  gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
1722
  gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
1723
                                                cvar_base,
1724
                                                build_int_cst (type, 1)));
1725
 
1726
  gsi = gsi_last_bb (for_bb);
1727
  gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
1728
  SSA_NAME_DEF_STMT (initvar) = for_stmt;
1729
 
1730
  /* Emit GIMPLE_OMP_CONTINUE.  */
1731
  gsi = gsi_last_bb (loop->latch);
1732
  stmt = gimple_build_omp_continue (cvar_next, cvar);
1733
  gimple_set_location (stmt, loc);
1734
  gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1735
  SSA_NAME_DEF_STMT (cvar_next) = stmt;
1736
 
1737
  /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR.  */
1738
  gsi = gsi_last_bb (ex_bb);
1739
  stmt = gimple_build_omp_return (true);
1740
  gimple_set_location (stmt, loc);
1741
  gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1742
 
1743
  return paral_bb;
1744
}
1745
 
1746
/* Generates code to execute the iterations of LOOP in N_THREADS
1747
   threads in parallel.
1748
 
1749
   NITER describes number of iterations of LOOP.
1750
   REDUCTION_LIST describes the reductions existent in the LOOP.  */
1751
 
1752
static void
1753
gen_parallel_loop (struct loop *loop, htab_t reduction_list,
1754
                   unsigned n_threads, struct tree_niter_desc *niter)
1755
{
1756
  loop_iterator li;
1757
  tree many_iterations_cond, type, nit;
1758
  tree arg_struct, new_arg_struct;
1759
  gimple_seq stmts;
1760
  basic_block parallel_head;
1761
  edge entry, exit;
1762
  struct clsn_data clsn_data;
1763
  unsigned prob;
1764
  location_t loc;
1765
  gimple cond_stmt;
1766
 
1767
  /* From
1768
 
1769
     ---------------------------------------------------------------------
1770
     loop
1771
       {
1772
         IV = phi (INIT, IV + STEP)
1773
         BODY1;
1774
         if (COND)
1775
           break;
1776
         BODY2;
1777
       }
1778
     ---------------------------------------------------------------------
1779
 
1780
     with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1781
     we generate the following code:
1782
 
1783
     ---------------------------------------------------------------------
1784
 
1785
     if (MAY_BE_ZERO
1786
     || NITER < MIN_PER_THREAD * N_THREADS)
1787
     goto original;
1788
 
1789
     BODY1;
1790
     store all local loop-invariant variables used in body of the loop to DATA.
1791
     GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1792
     load the variables from DATA.
1793
     GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1794
     BODY2;
1795
     BODY1;
1796
     GIMPLE_OMP_CONTINUE;
1797
     GIMPLE_OMP_RETURN         -- GIMPLE_OMP_FOR
1798
     GIMPLE_OMP_RETURN         -- GIMPLE_OMP_PARALLEL
1799
     goto end;
1800
 
1801
     original:
1802
     loop
1803
       {
1804
         IV = phi (INIT, IV + STEP)
1805
         BODY1;
1806
         if (COND)
1807
           break;
1808
         BODY2;
1809
       }
1810
 
1811
     end:
1812
 
1813
   */
1814
 
1815
  /* Create two versions of the loop -- in the old one, we know that the
1816
     number of iterations is large enough, and we will transform it into the
1817
     loop that will be split to loop_fn, the new one will be used for the
1818
     remaining iterations.  */
1819
 
1820
  type = TREE_TYPE (niter->niter);
1821
  nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
1822
                              NULL_TREE);
1823
  if (stmts)
1824
    gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1825
 
1826
  many_iterations_cond =
1827
    fold_build2 (GE_EXPR, boolean_type_node,
1828
                 nit, build_int_cst (type, MIN_PER_THREAD * n_threads));
1829
  many_iterations_cond
1830
    = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1831
                   invert_truthvalue (unshare_expr (niter->may_be_zero)),
1832
                   many_iterations_cond);
1833
  many_iterations_cond
1834
    = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
1835
  if (stmts)
1836
    gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1837
  if (!is_gimple_condexpr (many_iterations_cond))
1838
    {
1839
      many_iterations_cond
1840
        = force_gimple_operand (many_iterations_cond, &stmts,
1841
                                true, NULL_TREE);
1842
      if (stmts)
1843
        gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1844
    }
1845
 
1846
  initialize_original_copy_tables ();
1847
 
1848
  /* We assume that the loop usually iterates a lot.  */
1849
  prob = 4 * REG_BR_PROB_BASE / 5;
1850
  loop_version (loop, many_iterations_cond, NULL,
1851
                prob, prob, REG_BR_PROB_BASE - prob, true);
1852
  update_ssa (TODO_update_ssa);
1853
  free_original_copy_tables ();
1854
 
1855
  /* Base all the induction variables in LOOP on a single control one.  */
1856
  canonicalize_loop_ivs (loop, &nit, true);
1857
 
1858
  /* Ensure that the exit condition is the first statement in the loop.  */
1859
  transform_to_exit_first_loop (loop, reduction_list, nit);
1860
 
1861
  /* Generate initializations for reductions.  */
1862
  if (htab_elements (reduction_list) > 0)
1863
    htab_traverse (reduction_list, initialize_reductions, loop);
1864
 
1865
  /* Eliminate the references to local variables from the loop.  */
1866
  gcc_assert (single_exit (loop));
1867
  entry = loop_preheader_edge (loop);
1868
  exit = single_dom_exit (loop);
1869
 
1870
  eliminate_local_variables (entry, exit);
1871
  /* In the old loop, move all variables non-local to the loop to a structure
1872
     and back, and create separate decls for the variables used in loop.  */
1873
  separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
1874
                            &new_arg_struct, &clsn_data);
1875
 
1876
  /* Create the parallel constructs.  */
1877
  loc = UNKNOWN_LOCATION;
1878
  cond_stmt = last_stmt (loop->header);
1879
  if (cond_stmt)
1880
    loc = gimple_location (cond_stmt);
1881
  parallel_head = create_parallel_loop (loop, create_loop_fn (loc), arg_struct,
1882
                                        new_arg_struct, n_threads, loc);
1883
  if (htab_elements (reduction_list) > 0)
1884
    create_call_for_reduction (loop, reduction_list, &clsn_data);
1885
 
1886
  scev_reset ();
1887
 
1888
  /* Cancel the loop (it is simpler to do it here rather than to teach the
1889
     expander to do it).  */
1890
  cancel_loop_tree (loop);
1891
 
1892
  /* Free loop bound estimations that could contain references to
1893
     removed statements.  */
1894
  FOR_EACH_LOOP (li, loop, 0)
1895
    free_numbers_of_iterations_estimates_loop (loop);
1896
 
1897
  /* Expand the parallel constructs.  We do it directly here instead of running
1898
     a separate expand_omp pass, since it is more efficient, and less likely to
1899
     cause troubles with further analyses not being able to deal with the
1900
     OMP trees.  */
1901
 
1902
  omp_expand_local (parallel_head);
1903
}
1904
 
1905
/* Returns true when LOOP contains vector phi nodes.  */
1906
 
1907
static bool
1908
loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
1909
{
1910
  unsigned i;
1911
  basic_block *bbs = get_loop_body_in_dom_order (loop);
1912
  gimple_stmt_iterator gsi;
1913
  bool res = true;
1914
 
1915
  for (i = 0; i < loop->num_nodes; i++)
1916
    for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1917
      if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi)))) == VECTOR_TYPE)
1918
        goto end;
1919
 
1920
  res = false;
1921
 end:
1922
  free (bbs);
1923
  return res;
1924
}
1925
 
1926
/* Create a reduction_info struct, initialize it with REDUC_STMT
1927
   and PHI, insert it to the REDUCTION_LIST.  */
1928
 
1929
static void
1930
build_new_reduction (htab_t reduction_list, gimple reduc_stmt, gimple phi)
1931
{
1932
  PTR *slot;
1933
  struct reduction_info *new_reduction;
1934
 
1935
  gcc_assert (reduc_stmt);
1936
 
1937
  if (dump_file && (dump_flags & TDF_DETAILS))
1938
    {
1939
      fprintf (dump_file,
1940
               "Detected reduction. reduction stmt is: \n");
1941
      print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
1942
      fprintf (dump_file, "\n");
1943
    }
1944
 
1945
  new_reduction = XCNEW (struct reduction_info);
1946
 
1947
  new_reduction->reduc_stmt = reduc_stmt;
1948
  new_reduction->reduc_phi = phi;
1949
  new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
1950
  new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
1951
  slot = htab_find_slot (reduction_list, new_reduction, INSERT);
1952
  *slot = new_reduction;
1953
}
1954
 
1955
/* Callback for htab_traverse.  Sets gimple_uid of reduc_phi stmts.  */
1956
 
1957
static int
1958
set_reduc_phi_uids (void **slot, void *data ATTRIBUTE_UNUSED)
1959
{
1960
  struct reduction_info *const red = (struct reduction_info *) *slot;
1961
  gimple_set_uid (red->reduc_phi, red->reduc_version);
1962
  return 1;
1963
}
1964
 
1965
/* Detect all reductions in the LOOP, insert them into REDUCTION_LIST.  */
1966
 
1967
static void
1968
gather_scalar_reductions (loop_p loop, htab_t reduction_list)
1969
{
1970
  gimple_stmt_iterator gsi;
1971
  loop_vec_info simple_loop_info;
1972
 
1973
  vect_dump = NULL;
1974
  simple_loop_info = vect_analyze_loop_form (loop);
1975
 
1976
  for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1977
    {
1978
      gimple phi = gsi_stmt (gsi);
1979
      affine_iv iv;
1980
      tree res = PHI_RESULT (phi);
1981
      bool double_reduc;
1982
 
1983
      if (!is_gimple_reg (res))
1984
        continue;
1985
 
1986
      if (!simple_iv (loop, loop, res, &iv, true)
1987
        && simple_loop_info)
1988
        {
1989
           gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
1990
                                                            phi, true,
1991
                                                            &double_reduc);
1992
           if (reduc_stmt && !double_reduc)
1993
              build_new_reduction (reduction_list, reduc_stmt, phi);
1994
        }
1995
    }
1996
  destroy_loop_vec_info (simple_loop_info, true);
1997
 
1998
  /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
1999
     and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
2000
     only now.  */
2001
  htab_traverse (reduction_list, set_reduc_phi_uids, NULL);
2002
}
2003
 
2004
/* Try to initialize NITER for code generation part.  */
2005
 
2006
static bool
2007
try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
2008
{
2009
  edge exit = single_dom_exit (loop);
2010
 
2011
  gcc_assert (exit);
2012
 
2013
  /* We need to know # of iterations, and there should be no uses of values
2014
     defined inside loop outside of it, unless the values are invariants of
2015
     the loop.  */
2016
  if (!number_of_iterations_exit (loop, exit, niter, false))
2017
    {
2018
      if (dump_file && (dump_flags & TDF_DETAILS))
2019
        fprintf (dump_file, "  FAILED: number of iterations not known\n");
2020
      return false;
2021
    }
2022
 
2023
  return true;
2024
}
2025
 
2026
/* Try to initialize REDUCTION_LIST for code generation part.
2027
   REDUCTION_LIST describes the reductions.  */
2028
 
2029
static bool
2030
try_create_reduction_list (loop_p loop, htab_t reduction_list)
2031
{
2032
  edge exit = single_dom_exit (loop);
2033
  gimple_stmt_iterator gsi;
2034
 
2035
  gcc_assert (exit);
2036
 
2037
  gather_scalar_reductions (loop, reduction_list);
2038
 
2039
 
2040
  for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2041
    {
2042
      gimple phi = gsi_stmt (gsi);
2043
      struct reduction_info *red;
2044
      imm_use_iterator imm_iter;
2045
      use_operand_p use_p;
2046
      gimple reduc_phi;
2047
      tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2048
 
2049
      if (is_gimple_reg (val))
2050
        {
2051
          if (dump_file && (dump_flags & TDF_DETAILS))
2052
            {
2053
              fprintf (dump_file, "phi is ");
2054
              print_gimple_stmt (dump_file, phi, 0, 0);
2055
              fprintf (dump_file, "arg of phi to exit:   value ");
2056
              print_generic_expr (dump_file, val, 0);
2057
              fprintf (dump_file, " used outside loop\n");
2058
              fprintf (dump_file,
2059
                       "  checking if it a part of reduction pattern:  \n");
2060
            }
2061
          if (htab_elements (reduction_list) == 0)
2062
            {
2063
              if (dump_file && (dump_flags & TDF_DETAILS))
2064
                fprintf (dump_file,
2065
                         "  FAILED: it is not a part of reduction.\n");
2066
              return false;
2067
            }
2068
          reduc_phi = NULL;
2069
          FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2070
            {
2071
              if (!gimple_debug_bind_p (USE_STMT (use_p))
2072
                  && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
2073
                {
2074
                  reduc_phi = USE_STMT (use_p);
2075
                  break;
2076
                }
2077
            }
2078
          red = reduction_phi (reduction_list, reduc_phi);
2079
          if (red == NULL)
2080
            {
2081
              if (dump_file && (dump_flags & TDF_DETAILS))
2082
                fprintf (dump_file,
2083
                         "  FAILED: it is not a part of reduction.\n");
2084
              return false;
2085
            }
2086
          if (dump_file && (dump_flags & TDF_DETAILS))
2087
            {
2088
              fprintf (dump_file, "reduction phi is  ");
2089
              print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
2090
              fprintf (dump_file, "reduction stmt is  ");
2091
              print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
2092
            }
2093
        }
2094
    }
2095
 
2096
  /* The iterations of the loop may communicate only through bivs whose
2097
     iteration space can be distributed efficiently.  */
2098
  for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2099
    {
2100
      gimple phi = gsi_stmt (gsi);
2101
      tree def = PHI_RESULT (phi);
2102
      affine_iv iv;
2103
 
2104
      if (is_gimple_reg (def) && !simple_iv (loop, loop, def, &iv, true))
2105
        {
2106
          struct reduction_info *red;
2107
 
2108
          red = reduction_phi (reduction_list, phi);
2109
          if (red == NULL)
2110
            {
2111
              if (dump_file && (dump_flags & TDF_DETAILS))
2112
                fprintf (dump_file,
2113
                         "  FAILED: scalar dependency between iterations\n");
2114
              return false;
2115
            }
2116
        }
2117
    }
2118
 
2119
 
2120
  return true;
2121
}
2122
 
2123
/* Detect parallel loops and generate parallel code using libgomp
2124
   primitives.  Returns true if some loop was parallelized, false
2125
   otherwise.  */
2126
 
2127
bool
2128
parallelize_loops (void)
2129
{
2130
  unsigned n_threads = flag_tree_parallelize_loops;
2131
  bool changed = false;
2132
  struct loop *loop;
2133
  struct tree_niter_desc niter_desc;
2134
  loop_iterator li;
2135
  htab_t reduction_list;
2136
  struct obstack parloop_obstack;
2137
  HOST_WIDE_INT estimated;
2138
  LOC loop_loc;
2139
 
2140
  /* Do not parallelize loops in the functions created by parallelization.  */
2141
  if (parallelized_function_p (cfun->decl))
2142
    return false;
2143
  if (cfun->has_nonlocal_label)
2144
    return false;
2145
 
2146
  gcc_obstack_init (&parloop_obstack);
2147
  reduction_list = htab_create (10, reduction_info_hash,
2148
                                     reduction_info_eq, free);
2149
  init_stmt_vec_info_vec ();
2150
 
2151
  FOR_EACH_LOOP (li, loop, 0)
2152
    {
2153
      htab_empty (reduction_list);
2154
      if (dump_file && (dump_flags & TDF_DETAILS))
2155
      {
2156
        fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
2157
        if (loop->inner)
2158
          fprintf (dump_file, "loop %d is not innermost\n",loop->num);
2159
        else
2160
          fprintf (dump_file, "loop %d is innermost\n",loop->num);
2161
      }
2162
 
2163
      /* If we use autopar in graphite pass, we use its marked dependency
2164
      checking results.  */
2165
      if (flag_loop_parallelize_all && !loop->can_be_parallel)
2166
      {
2167
        if (dump_file && (dump_flags & TDF_DETAILS))
2168
           fprintf (dump_file, "loop is not parallel according to graphite\n");
2169
        continue;
2170
      }
2171
 
2172
      if (!single_dom_exit (loop))
2173
      {
2174
 
2175
        if (dump_file && (dump_flags & TDF_DETAILS))
2176
          fprintf (dump_file, "loop is !single_dom_exit\n");
2177
 
2178
        continue;
2179
      }
2180
 
2181
      if (/* And of course, the loop must be parallelizable.  */
2182
          !can_duplicate_loop_p (loop)
2183
          || loop_has_blocks_with_irreducible_flag (loop)
2184
          || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
2185
          /* FIXME: the check for vector phi nodes could be removed.  */
2186
          || loop_has_vector_phi_nodes (loop)
2187
          /* FIXME: transform_to_exit_first_loop does not handle not
2188
             header-copied loops correctly - see PR46886.  */
2189
          || !do_while_loop_p (loop))
2190
        continue;
2191
      estimated = max_stmt_executions_int (loop, false);
2192
      /* FIXME: Bypass this check as graphite doesn't update the
2193
      count and frequency correctly now.  */
2194
      if (!flag_loop_parallelize_all
2195
          && ((estimated !=-1
2196
             && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
2197
              /* Do not bother with loops in cold areas.  */
2198
              || optimize_loop_nest_for_size_p (loop)))
2199
        continue;
2200
 
2201
      if (!try_get_loop_niter (loop, &niter_desc))
2202
        continue;
2203
 
2204
      if (!try_create_reduction_list (loop, reduction_list))
2205
        continue;
2206
 
2207
      if (!flag_loop_parallelize_all
2208
          && !loop_parallel_p (loop, &parloop_obstack))
2209
        continue;
2210
 
2211
      changed = true;
2212
      if (dump_file && (dump_flags & TDF_DETAILS))
2213
      {
2214
        if (loop->inner)
2215
          fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
2216
        else
2217
          fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2218
        loop_loc = find_loop_location (loop);
2219
        if (loop_loc != UNKNOWN_LOC)
2220
          fprintf (dump_file, "\nloop at %s:%d: ",
2221
                   LOC_FILE (loop_loc), LOC_LINE (loop_loc));
2222
      }
2223
      gen_parallel_loop (loop, reduction_list,
2224
                         n_threads, &niter_desc);
2225
      verify_flow_info ();
2226
      verify_dominators (CDI_DOMINATORS);
2227
      verify_loop_structure ();
2228
      verify_loop_closed_ssa (true);
2229
    }
2230
 
2231
  free_stmt_vec_info_vec ();
2232
  htab_delete (reduction_list);
2233
  obstack_free (&parloop_obstack, NULL);
2234
 
2235
  /* Parallelization will cause new function calls to be inserted through
2236
     which local variables will escape.  Reset the points-to solution
2237
     for ESCAPED.  */
2238
  if (changed)
2239
    pt_solution_reset (&cfun->gimple_df->escaped);
2240
 
2241
  return changed;
2242
}
2243
 
2244
#include "gt-tree-parloops.h"

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.