OpenCores
URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [gcc-4.5.1/] [gcc/] [tree-loop-distribution.c] - Blame information for rev 280

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 280 jeremybenn
/* Loop distribution.
2
   Copyright (C) 2006, 2007, 2008, 2009, 2010
3
   Free Software Foundation, Inc.
4
   Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
5
   and Sebastian Pop <sebastian.pop@amd.com>.
6
 
7
This file is part of GCC.
8
 
9
GCC is free software; you can redistribute it and/or modify it
10
under the terms of the GNU General Public License as published by the
11
Free Software Foundation; either version 3, or (at your option) any
12
later version.
13
 
14
GCC is distributed in the hope that it will be useful, but WITHOUT
15
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17
for more details.
18
 
19
You should have received a copy of the GNU General Public License
20
along with GCC; see the file COPYING3.  If not see
21
<http://www.gnu.org/licenses/>.  */
22
 
23
/* This pass performs loop distribution: for example, the loop
24
 
25
   |DO I = 2, N
26
   |    A(I) = B(I) + C
27
   |    D(I) = A(I-1)*E
28
   |ENDDO
29
 
30
   is transformed to
31
 
32
   |DOALL I = 2, N
33
   |   A(I) = B(I) + C
34
   |ENDDO
35
   |
36
   |DOALL I = 2, N
37
   |   D(I) = A(I-1)*E
38
   |ENDDO
39
 
40
   This pass uses an RDG, Reduced Dependence Graph built on top of the
41
   data dependence relations.  The RDG is then topologically sorted to
42
   obtain a map of information producers/consumers based on which it
43
   generates the new loops.  */
44
 
45
#include "config.h"
46
#include "system.h"
47
#include "coretypes.h"
48
#include "tm.h"
49
#include "ggc.h"
50
#include "tree.h"
51
#include "target.h"
52
 
53
#include "rtl.h"
54
#include "basic-block.h"
55
#include "diagnostic.h"
56
#include "tree-flow.h"
57
#include "tree-dump.h"
58
#include "timevar.h"
59
#include "cfgloop.h"
60
#include "expr.h"
61
#include "optabs.h"
62
#include "tree-chrec.h"
63
#include "tree-data-ref.h"
64
#include "tree-scalar-evolution.h"
65
#include "tree-pass.h"
66
#include "lambda.h"
67
#include "langhooks.h"
68
#include "tree-vectorizer.h"
69
 
70
/* If bit I is not set, it means that this node represents an
71
   operation that has already been performed, and that should not be
72
   performed again.  This is the subgraph of remaining important
73
   computations that is passed to the DFS algorithm for avoiding to
74
   include several times the same stores in different loops.  */
75
static bitmap remaining_stmts;
76
 
77
/* A node of the RDG is marked in this bitmap when it has as a
78
   predecessor a node that writes to memory.  */
79
static bitmap upstream_mem_writes;
80
 
81
/* Update the PHI nodes of NEW_LOOP.  NEW_LOOP is a duplicate of
82
   ORIG_LOOP.  */
83
 
84
static void
85
update_phis_for_loop_copy (struct loop *orig_loop, struct loop *new_loop)
86
{
87
  tree new_ssa_name;
88
  gimple_stmt_iterator si_new, si_orig;
89
  edge orig_loop_latch = loop_latch_edge (orig_loop);
90
  edge orig_entry_e = loop_preheader_edge (orig_loop);
91
  edge new_loop_entry_e = loop_preheader_edge (new_loop);
92
 
93
  /* Scan the phis in the headers of the old and new loops
94
     (they are organized in exactly the same order).  */
95
  for (si_new = gsi_start_phis (new_loop->header),
96
       si_orig = gsi_start_phis (orig_loop->header);
97
       !gsi_end_p (si_new) && !gsi_end_p (si_orig);
98
       gsi_next (&si_new), gsi_next (&si_orig))
99
    {
100
      tree def;
101
      source_location locus;
102
      gimple phi_new = gsi_stmt (si_new);
103
      gimple phi_orig = gsi_stmt (si_orig);
104
 
105
      /* Add the first phi argument for the phi in NEW_LOOP (the one
106
         associated with the entry of NEW_LOOP)  */
107
      def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_entry_e);
108
      locus = gimple_phi_arg_location_from_edge (phi_orig, orig_entry_e);
109
      add_phi_arg (phi_new, def, new_loop_entry_e, locus);
110
 
111
      /* Add the second phi argument for the phi in NEW_LOOP (the one
112
         associated with the latch of NEW_LOOP)  */
113
      def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
114
      locus = gimple_phi_arg_location_from_edge (phi_orig, orig_loop_latch);
115
 
116
      if (TREE_CODE (def) == SSA_NAME)
117
        {
118
          new_ssa_name = get_current_def (def);
119
 
120
          if (!new_ssa_name)
121
            /* This only happens if there are no definitions inside the
122
               loop.  Use the the invariant in the new loop as is.  */
123
            new_ssa_name = def;
124
        }
125
      else
126
        /* Could be an integer.  */
127
        new_ssa_name = def;
128
 
129
      add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop), locus);
130
    }
131
}
132
 
133
/* Return a copy of LOOP placed before LOOP.  */
134
 
135
static struct loop *
136
copy_loop_before (struct loop *loop)
137
{
138
  struct loop *res;
139
  edge preheader = loop_preheader_edge (loop);
140
 
141
  if (!single_exit (loop))
142
    return NULL;
143
 
144
  initialize_original_copy_tables ();
145
  res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
146
  free_original_copy_tables ();
147
 
148
  if (!res)
149
    return NULL;
150
 
151
  update_phis_for_loop_copy (loop, res);
152
  rename_variables_in_loop (res);
153
 
154
  return res;
155
}
156
 
157
/* Creates an empty basic block after LOOP.  */
158
 
159
static void
160
create_bb_after_loop (struct loop *loop)
161
{
162
  edge exit = single_exit (loop);
163
 
164
  if (!exit)
165
    return;
166
 
167
  split_edge (exit);
168
}
169
 
170
/* Generate code for PARTITION from the code in LOOP.  The loop is
171
   copied when COPY_P is true.  All the statements not flagged in the
172
   PARTITION bitmap are removed from the loop or from its copy.  The
173
   statements are indexed in sequence inside a basic block, and the
174
   basic blocks of a loop are taken in dom order.  Returns true when
175
   the code gen succeeded. */
176
 
177
static bool
178
generate_loops_for_partition (struct loop *loop, bitmap partition, bool copy_p)
179
{
180
  unsigned i, x;
181
  gimple_stmt_iterator bsi;
182
  basic_block *bbs;
183
 
184
  if (copy_p)
185
    {
186
      loop = copy_loop_before (loop);
187
      create_preheader (loop, CP_SIMPLE_PREHEADERS);
188
      create_bb_after_loop (loop);
189
    }
190
 
191
  if (loop == NULL)
192
    return false;
193
 
194
  /* Remove stmts not in the PARTITION bitmap.  The order in which we
195
     visit the phi nodes and the statements is exactly as in
196
     stmts_from_loop.  */
197
  bbs = get_loop_body_in_dom_order (loop);
198
 
199
  for (x = 0, i = 0; i < loop->num_nodes; i++)
200
    {
201
      basic_block bb = bbs[i];
202
 
203
      for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
204
        if (!bitmap_bit_p (partition, x++))
205
          {
206
            gimple phi = gsi_stmt (bsi);
207
            if (!is_gimple_reg (gimple_phi_result (phi)))
208
              mark_virtual_phi_result_for_renaming (phi);
209
            remove_phi_node (&bsi, true);
210
          }
211
        else
212
          gsi_next (&bsi);
213
 
214
      for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
215
        {
216
          gimple stmt = gsi_stmt (bsi);
217
          if (gimple_code (gsi_stmt (bsi)) != GIMPLE_LABEL
218
              && !bitmap_bit_p (partition, x++))
219
            {
220
              unlink_stmt_vdef (stmt);
221
              gsi_remove (&bsi, true);
222
              release_defs (stmt);
223
            }
224
          else
225
            gsi_next (&bsi);
226
        }
227
    }
228
 
229
  free (bbs);
230
  return true;
231
}
232
 
233
/* Build the size argument for a memset call.  */
234
 
235
static inline tree
236
build_size_arg_loc (location_t loc, tree nb_iter, tree op,
237
                    gimple_seq *stmt_list)
238
{
239
  gimple_seq stmts;
240
  tree x;
241
 
242
  x = fold_build2_loc (loc, MULT_EXPR, size_type_node,
243
                       fold_convert_loc (loc, size_type_node, nb_iter),
244
                       fold_convert_loc (loc, size_type_node,
245
                                         TYPE_SIZE_UNIT (TREE_TYPE (op))));
246
  x = force_gimple_operand (x, &stmts, true, NULL);
247
  gimple_seq_add_seq (stmt_list, stmts);
248
 
249
  return x;
250
}
251
 
252
/* Generate a call to memset.  Return true when the operation succeeded.  */
253
 
254
static bool
255
generate_memset_zero (gimple stmt, tree op0, tree nb_iter,
256
                      gimple_stmt_iterator bsi)
257
{
258
  tree addr_base, nb_bytes;
259
  bool res = false;
260
  gimple_seq stmt_list = NULL, stmts;
261
  gimple fn_call;
262
  tree mem, fn;
263
  struct data_reference *dr = XCNEW (struct data_reference);
264
  location_t loc = gimple_location (stmt);
265
 
266
  DR_STMT (dr) = stmt;
267
  DR_REF (dr) = op0;
268
  if (!dr_analyze_innermost (dr))
269
    goto end;
270
 
271
  /* Test for a positive stride, iterating over every element.  */
272
  if (integer_zerop (size_binop (MINUS_EXPR,
273
                                 fold_convert (sizetype, DR_STEP (dr)),
274
                                 TYPE_SIZE_UNIT (TREE_TYPE (op0)))))
275
    {
276
      addr_base = fold_convert_loc (loc, sizetype,
277
                                    size_binop_loc (loc, PLUS_EXPR,
278
                                                    DR_OFFSET (dr),
279
                                                    DR_INIT (dr)));
280
      addr_base = fold_build2_loc (loc, POINTER_PLUS_EXPR,
281
                                   TREE_TYPE (DR_BASE_ADDRESS (dr)),
282
                                   DR_BASE_ADDRESS (dr), addr_base);
283
 
284
      nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list);
285
    }
286
 
287
  /* Test for a negative stride, iterating over every element.  */
288
  else if (integer_zerop (size_binop (PLUS_EXPR,
289
                                      TYPE_SIZE_UNIT (TREE_TYPE (op0)),
290
                                      fold_convert (sizetype, DR_STEP (dr)))))
291
    {
292
      nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list);
293
 
294
      addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
295
      addr_base = fold_convert_loc (loc, sizetype, addr_base);
296
      addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
297
                                  fold_convert_loc (loc, sizetype, nb_bytes));
298
      addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
299
                                  TYPE_SIZE_UNIT (TREE_TYPE (op0)));
300
      addr_base = fold_build2_loc (loc, POINTER_PLUS_EXPR,
301
                                   TREE_TYPE (DR_BASE_ADDRESS (dr)),
302
                                   DR_BASE_ADDRESS (dr), addr_base);
303
    }
304
  else
305
    goto end;
306
 
307
  mem = force_gimple_operand (addr_base, &stmts, true, NULL);
308
  gimple_seq_add_seq (&stmt_list, stmts);
309
 
310
  fn = build_fold_addr_expr (implicit_built_in_decls [BUILT_IN_MEMSET]);
311
  fn_call = gimple_build_call (fn, 3, mem, integer_zero_node, nb_bytes);
312
  gimple_seq_add_stmt (&stmt_list, fn_call);
313
  gsi_insert_seq_after (&bsi, stmt_list, GSI_CONTINUE_LINKING);
314
  res = true;
315
 
316
  if (dump_file && (dump_flags & TDF_DETAILS))
317
    fprintf (dump_file, "generated memset zero\n");
318
 
319
 end:
320
  free_data_ref (dr);
321
  return res;
322
}
323
 
324
/* Propagate phis in BB b to their uses and remove them.  */
325
 
326
static void
327
prop_phis (basic_block b)
328
{
329
  gimple_stmt_iterator psi;
330
  gimple_seq phis = phi_nodes (b);
331
 
332
  for (psi = gsi_start (phis); !gsi_end_p (psi); )
333
    {
334
      gimple phi = gsi_stmt (psi);
335
      tree def = gimple_phi_result (phi), use = gimple_phi_arg_def (phi, 0);
336
 
337
      gcc_assert (gimple_phi_num_args (phi) == 1);
338
 
339
      if (!is_gimple_reg (def))
340
        {
341
          imm_use_iterator iter;
342
          use_operand_p use_p;
343
          gimple stmt;
344
 
345
          FOR_EACH_IMM_USE_STMT (stmt, iter, def)
346
            FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
347
              SET_USE (use_p, use);
348
        }
349
      else
350
        replace_uses_by (def, use);
351
 
352
      remove_phi_node (&psi, true);
353
    }
354
}
355
 
356
/* Tries to generate a builtin function for the instructions of LOOP
357
   pointed to by the bits set in PARTITION.  Returns true when the
358
   operation succeeded.  */
359
 
360
static bool
361
generate_builtin (struct loop *loop, bitmap partition, bool copy_p)
362
{
363
  bool res = false;
364
  unsigned i, x = 0;
365
  basic_block *bbs;
366
  gimple write = NULL;
367
  tree op0, op1;
368
  gimple_stmt_iterator bsi;
369
  tree nb_iter = number_of_exit_cond_executions (loop);
370
 
371
  if (!nb_iter || nb_iter == chrec_dont_know)
372
    return false;
373
 
374
  bbs = get_loop_body_in_dom_order (loop);
375
 
376
  for (i = 0; i < loop->num_nodes; i++)
377
    {
378
      basic_block bb = bbs[i];
379
 
380
      for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
381
        x++;
382
 
383
      for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
384
        {
385
          gimple stmt = gsi_stmt (bsi);
386
 
387
          if (bitmap_bit_p (partition, x++)
388
              && is_gimple_assign (stmt)
389
              && !is_gimple_reg (gimple_assign_lhs (stmt)))
390
            {
391
              /* Don't generate the builtins when there are more than
392
                 one memory write.  */
393
              if (write != NULL)
394
                goto end;
395
 
396
              write = stmt;
397
              if (bb == loop->latch)
398
                nb_iter = number_of_latch_executions (loop);
399
            }
400
        }
401
    }
402
 
403
  if (!write)
404
    goto end;
405
 
406
  op0 = gimple_assign_lhs (write);
407
  op1 = gimple_assign_rhs1 (write);
408
 
409
  if (!(TREE_CODE (op0) == ARRAY_REF
410
        || TREE_CODE (op0) == INDIRECT_REF))
411
    goto end;
412
 
413
  /* The new statements will be placed before LOOP.  */
414
  bsi = gsi_last_bb (loop_preheader_edge (loop)->src);
415
 
416
  if (gimple_assign_rhs_code (write) == INTEGER_CST
417
      && (integer_zerop (op1) || real_zerop (op1)))
418
    res = generate_memset_zero (write, op0, nb_iter, bsi);
419
 
420
  /* If this is the last partition for which we generate code, we have
421
     to destroy the loop.  */
422
  if (res && !copy_p)
423
    {
424
      unsigned nbbs = loop->num_nodes;
425
      basic_block src = loop_preheader_edge (loop)->src;
426
      basic_block dest = single_exit (loop)->dest;
427
      prop_phis (dest);
428
      make_edge (src, dest, EDGE_FALLTHRU);
429
      cancel_loop_tree (loop);
430
 
431
      for (i = 0; i < nbbs; i++)
432
        delete_basic_block (bbs[i]);
433
 
434
      set_immediate_dominator (CDI_DOMINATORS, dest,
435
                               recompute_dominator (CDI_DOMINATORS, dest));
436
    }
437
 
438
 end:
439
  free (bbs);
440
  return res;
441
}
442
 
443
/* Generates code for PARTITION.  For simple loops, this function can
444
   generate a built-in.  */
445
 
446
static bool
447
generate_code_for_partition (struct loop *loop, bitmap partition, bool copy_p)
448
{
449
  if (generate_builtin (loop, partition, copy_p))
450
    return true;
451
 
452
  return generate_loops_for_partition (loop, partition, copy_p);
453
}
454
 
455
 
456
/* Returns true if the node V of RDG cannot be recomputed.  */
457
 
458
static bool
459
rdg_cannot_recompute_vertex_p (struct graph *rdg, int v)
460
{
461
  if (RDG_MEM_WRITE_STMT (rdg, v))
462
    return true;
463
 
464
  return false;
465
}
466
 
467
/* Returns true when the vertex V has already been generated in the
468
   current partition (V is in PROCESSED), or when V belongs to another
469
   partition and cannot be recomputed (V is not in REMAINING_STMTS).  */
470
 
471
static inline bool
472
already_processed_vertex_p (bitmap processed, int v)
473
{
474
  return (bitmap_bit_p (processed, v)
475
          || !bitmap_bit_p (remaining_stmts, v));
476
}
477
 
478
/* Returns NULL when there is no anti-dependence among the successors
479
   of vertex V, otherwise returns the edge with the anti-dep.  */
480
 
481
static struct graph_edge *
482
has_anti_dependence (struct vertex *v)
483
{
484
  struct graph_edge *e;
485
 
486
  if (v->succ)
487
    for (e = v->succ; e; e = e->succ_next)
488
      if (RDGE_TYPE (e) == anti_dd)
489
        return e;
490
 
491
  return NULL;
492
}
493
 
494
/* Returns true when V has an anti-dependence edge among its successors.  */
495
 
496
static bool
497
predecessor_has_mem_write (struct graph *rdg, struct vertex *v)
498
{
499
  struct graph_edge *e;
500
 
501
  if (v->pred)
502
    for (e = v->pred; e; e = e->pred_next)
503
      if (bitmap_bit_p (upstream_mem_writes, e->src)
504
          /* Don't consider flow channels: a write to memory followed
505
             by a read from memory.  These channels allow the split of
506
             the RDG in different partitions.  */
507
          && !RDG_MEM_WRITE_STMT (rdg, e->src))
508
        return true;
509
 
510
  return false;
511
}
512
 
513
/* Initializes the upstream_mem_writes bitmap following the
514
   information from RDG.  */
515
 
516
static void
517
mark_nodes_having_upstream_mem_writes (struct graph *rdg)
518
{
519
  int v, x;
520
  bitmap seen = BITMAP_ALLOC (NULL);
521
 
522
  for (v = rdg->n_vertices - 1; v >= 0; v--)
523
    if (!bitmap_bit_p (seen, v))
524
      {
525
        unsigned i;
526
        VEC (int, heap) *nodes = VEC_alloc (int, heap, 3);
527
 
528
        graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
529
 
530
        for (i = 0; VEC_iterate (int, nodes, i, x); i++)
531
          {
532
            if (bitmap_bit_p (seen, x))
533
              continue;
534
 
535
            bitmap_set_bit (seen, x);
536
 
537
            if (RDG_MEM_WRITE_STMT (rdg, x)
538
                || predecessor_has_mem_write (rdg, &(rdg->vertices[x]))
539
                /* In anti dependences the read should occur before
540
                   the write, this is why both the read and the write
541
                   should be placed in the same partition.  */
542
                || has_anti_dependence (&(rdg->vertices[x])))
543
              {
544
                bitmap_set_bit (upstream_mem_writes, x);
545
              }
546
          }
547
 
548
        VEC_free (int, heap, nodes);
549
      }
550
}
551
 
552
/* Returns true when vertex u has a memory write node as a predecessor
553
   in RDG.  */
554
 
555
static bool
556
has_upstream_mem_writes (int u)
557
{
558
  return bitmap_bit_p (upstream_mem_writes, u);
559
}
560
 
561
static void rdg_flag_vertex_and_dependent (struct graph *, int, bitmap, bitmap,
562
                                           bitmap, bool *);
563
 
564
/* Flag all the uses of U.  */
565
 
566
static void
567
rdg_flag_all_uses (struct graph *rdg, int u, bitmap partition, bitmap loops,
568
                   bitmap processed, bool *part_has_writes)
569
{
570
  struct graph_edge *e;
571
 
572
  for (e = rdg->vertices[u].succ; e; e = e->succ_next)
573
    if (!bitmap_bit_p (processed, e->dest))
574
      {
575
        rdg_flag_vertex_and_dependent (rdg, e->dest, partition, loops,
576
                                       processed, part_has_writes);
577
        rdg_flag_all_uses (rdg, e->dest, partition, loops, processed,
578
                           part_has_writes);
579
      }
580
}
581
 
582
/* Flag the uses of U stopping following the information from
583
   upstream_mem_writes.  */
584
 
585
static void
586
rdg_flag_uses (struct graph *rdg, int u, bitmap partition, bitmap loops,
587
               bitmap processed, bool *part_has_writes)
588
{
589
  use_operand_p use_p;
590
  struct vertex *x = &(rdg->vertices[u]);
591
  gimple stmt = RDGV_STMT (x);
592
  struct graph_edge *anti_dep = has_anti_dependence (x);
593
 
594
  /* Keep in the same partition the destination of an antidependence,
595
     because this is a store to the exact same location.  Putting this
596
     in another partition is bad for cache locality.  */
597
  if (anti_dep)
598
    {
599
      int v = anti_dep->dest;
600
 
601
      if (!already_processed_vertex_p (processed, v))
602
        rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
603
                                       processed, part_has_writes);
604
    }
605
 
606
  if (gimple_code (stmt) != GIMPLE_PHI)
607
    {
608
      if ((use_p = gimple_vuse_op (stmt)) != NULL_USE_OPERAND_P)
609
        {
610
          tree use = USE_FROM_PTR (use_p);
611
 
612
          if (TREE_CODE (use) == SSA_NAME)
613
            {
614
              gimple def_stmt = SSA_NAME_DEF_STMT (use);
615
              int v = rdg_vertex_for_stmt (rdg, def_stmt);
616
 
617
              if (v >= 0
618
                  && !already_processed_vertex_p (processed, v))
619
                rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
620
                                               processed, part_has_writes);
621
            }
622
        }
623
    }
624
 
625
  if (is_gimple_assign (stmt) && has_upstream_mem_writes (u))
626
    {
627
      tree op0 = gimple_assign_lhs (stmt);
628
 
629
      /* Scalar channels don't have enough space for transmitting data
630
         between tasks, unless we add more storage by privatizing.  */
631
      if (is_gimple_reg (op0))
632
        {
633
          use_operand_p use_p;
634
          imm_use_iterator iter;
635
 
636
          FOR_EACH_IMM_USE_FAST (use_p, iter, op0)
637
            {
638
              int v = rdg_vertex_for_stmt (rdg, USE_STMT (use_p));
639
 
640
              if (!already_processed_vertex_p (processed, v))
641
                rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
642
                                               processed, part_has_writes);
643
            }
644
        }
645
    }
646
}
647
 
648
/* Flag V from RDG as part of PARTITION, and also flag its loop number
649
   in LOOPS.  */
650
 
651
static void
652
rdg_flag_vertex (struct graph *rdg, int v, bitmap partition, bitmap loops,
653
                 bool *part_has_writes)
654
{
655
  struct loop *loop;
656
 
657
  if (bitmap_bit_p (partition, v))
658
    return;
659
 
660
  loop = loop_containing_stmt (RDG_STMT (rdg, v));
661
  bitmap_set_bit (loops, loop->num);
662
  bitmap_set_bit (partition, v);
663
 
664
  if (rdg_cannot_recompute_vertex_p (rdg, v))
665
    {
666
      *part_has_writes = true;
667
      bitmap_clear_bit (remaining_stmts, v);
668
    }
669
}
670
 
671
/* Flag in the bitmap PARTITION the vertex V and all its predecessors.
672
   Also flag their loop number in LOOPS.  */
673
 
674
static void
675
rdg_flag_vertex_and_dependent (struct graph *rdg, int v, bitmap partition,
676
                               bitmap loops, bitmap processed,
677
                               bool *part_has_writes)
678
{
679
  unsigned i;
680
  VEC (int, heap) *nodes = VEC_alloc (int, heap, 3);
681
  int x;
682
 
683
  bitmap_set_bit (processed, v);
684
  rdg_flag_uses (rdg, v, partition, loops, processed, part_has_writes);
685
  graphds_dfs (rdg, &v, 1, &nodes, false, remaining_stmts);
686
  rdg_flag_vertex (rdg, v, partition, loops, part_has_writes);
687
 
688
  for (i = 0; VEC_iterate (int, nodes, i, x); i++)
689
    if (!already_processed_vertex_p (processed, x))
690
      rdg_flag_vertex_and_dependent (rdg, x, partition, loops, processed,
691
                                     part_has_writes);
692
 
693
  VEC_free (int, heap, nodes);
694
}
695
 
696
/* Initialize CONDS with all the condition statements from the basic
697
   blocks of LOOP.  */
698
 
699
static void
700
collect_condition_stmts (struct loop *loop, VEC (gimple, heap) **conds)
701
{
702
  unsigned i;
703
  edge e;
704
  VEC (edge, heap) *exits = get_loop_exit_edges (loop);
705
 
706
  for (i = 0; VEC_iterate (edge, exits, i, e); i++)
707
    {
708
      gimple cond = last_stmt (e->src);
709
 
710
      if (cond)
711
        VEC_safe_push (gimple, heap, *conds, cond);
712
    }
713
 
714
  VEC_free (edge, heap, exits);
715
}
716
 
717
/* Add to PARTITION all the exit condition statements for LOOPS
718
   together with all their dependent statements determined from
719
   RDG.  */
720
 
721
static void
722
rdg_flag_loop_exits (struct graph *rdg, bitmap loops, bitmap partition,
723
                     bitmap processed, bool *part_has_writes)
724
{
725
  unsigned i;
726
  bitmap_iterator bi;
727
  VEC (gimple, heap) *conds = VEC_alloc (gimple, heap, 3);
728
 
729
  EXECUTE_IF_SET_IN_BITMAP (loops, 0, i, bi)
730
    collect_condition_stmts (get_loop (i), &conds);
731
 
732
  while (!VEC_empty (gimple, conds))
733
    {
734
      gimple cond = VEC_pop (gimple, conds);
735
      int v = rdg_vertex_for_stmt (rdg, cond);
736
      bitmap new_loops = BITMAP_ALLOC (NULL);
737
 
738
      if (!already_processed_vertex_p (processed, v))
739
        rdg_flag_vertex_and_dependent (rdg, v, partition, new_loops, processed,
740
                                       part_has_writes);
741
 
742
      EXECUTE_IF_SET_IN_BITMAP (new_loops, 0, i, bi)
743
        if (!bitmap_bit_p (loops, i))
744
          {
745
            bitmap_set_bit (loops, i);
746
            collect_condition_stmts (get_loop (i), &conds);
747
          }
748
 
749
      BITMAP_FREE (new_loops);
750
    }
751
}
752
 
753
/* Flag all the nodes of RDG containing memory accesses that could
754
   potentially belong to arrays already accessed in the current
755
   PARTITION.  */
756
 
757
static void
758
rdg_flag_similar_memory_accesses (struct graph *rdg, bitmap partition,
759
                                  bitmap loops, bitmap processed,
760
                                  VEC (int, heap) **other_stores)
761
{
762
  bool foo;
763
  unsigned i, n;
764
  int j, k, kk;
765
  bitmap_iterator ii;
766
  struct graph_edge *e;
767
 
768
  EXECUTE_IF_SET_IN_BITMAP (partition, 0, i, ii)
769
    if (RDG_MEM_WRITE_STMT (rdg, i)
770
        || RDG_MEM_READS_STMT (rdg, i))
771
      {
772
        for (j = 0; j < rdg->n_vertices; j++)
773
          if (!bitmap_bit_p (processed, j)
774
              && (RDG_MEM_WRITE_STMT (rdg, j)
775
                  || RDG_MEM_READS_STMT (rdg, j))
776
              && rdg_has_similar_memory_accesses (rdg, i, j))
777
            {
778
              /* Flag first the node J itself, and all the nodes that
779
                 are needed to compute J.  */
780
              rdg_flag_vertex_and_dependent (rdg, j, partition, loops,
781
                                             processed, &foo);
782
 
783
              /* When J is a read, we want to coalesce in the same
784
                 PARTITION all the nodes that are using J: this is
785
                 needed for better cache locality.  */
786
              rdg_flag_all_uses (rdg, j, partition, loops, processed, &foo);
787
 
788
              /* Remove from OTHER_STORES the vertex that we flagged.  */
789
              if (RDG_MEM_WRITE_STMT (rdg, j))
790
                for (k = 0; VEC_iterate (int, *other_stores, k, kk); k++)
791
                  if (kk == j)
792
                    {
793
                      VEC_unordered_remove (int, *other_stores, k);
794
                      break;
795
                    }
796
            }
797
 
798
        /* If the node I has two uses, then keep these together in the
799
           same PARTITION.  */
800
        for (n = 0, e = rdg->vertices[i].succ; e; e = e->succ_next, n++);
801
 
802
        if (n > 1)
803
          rdg_flag_all_uses (rdg, i, partition, loops, processed, &foo);
804
      }
805
}
806
 
807
/* Returns a bitmap in which all the statements needed for computing
808
   the strongly connected component C of the RDG are flagged, also
809
   including the loop exit conditions.  */
810
 
811
static bitmap
812
build_rdg_partition_for_component (struct graph *rdg, rdgc c,
813
                                   bool *part_has_writes,
814
                                   VEC (int, heap) **other_stores)
815
{
816
  int i, v;
817
  bitmap partition = BITMAP_ALLOC (NULL);
818
  bitmap loops = BITMAP_ALLOC (NULL);
819
  bitmap processed = BITMAP_ALLOC (NULL);
820
 
821
  for (i = 0; VEC_iterate (int, c->vertices, i, v); i++)
822
    if (!already_processed_vertex_p (processed, v))
823
      rdg_flag_vertex_and_dependent (rdg, v, partition, loops, processed,
824
                                     part_has_writes);
825
 
826
  /* Also iterate on the array of stores not in the starting vertices,
827
     and determine those vertices that have some memory affinity with
828
     the current nodes in the component: these are stores to the same
829
     arrays, i.e. we're taking care of cache locality.  */
830
  rdg_flag_similar_memory_accesses (rdg, partition, loops, processed,
831
                                    other_stores);
832
 
833
  rdg_flag_loop_exits (rdg, loops, partition, processed, part_has_writes);
834
 
835
  BITMAP_FREE (processed);
836
  BITMAP_FREE (loops);
837
  return partition;
838
}
839
 
840
/* Free memory for COMPONENTS.  */
841
 
842
static void
843
free_rdg_components (VEC (rdgc, heap) *components)
844
{
845
  int i;
846
  rdgc x;
847
 
848
  for (i = 0; VEC_iterate (rdgc, components, i, x); i++)
849
    {
850
      VEC_free (int, heap, x->vertices);
851
      free (x);
852
    }
853
}
854
 
855
/* Build the COMPONENTS vector with the strongly connected components
856
   of RDG in which the STARTING_VERTICES occur.  */
857
 
858
static void
859
rdg_build_components (struct graph *rdg, VEC (int, heap) *starting_vertices,
860
                      VEC (rdgc, heap) **components)
861
{
862
  int i, v;
863
  bitmap saved_components = BITMAP_ALLOC (NULL);
864
  int n_components = graphds_scc (rdg, NULL);
865
  VEC (int, heap) **all_components = XNEWVEC (VEC (int, heap) *, n_components);
866
 
867
  for (i = 0; i < n_components; i++)
868
    all_components[i] = VEC_alloc (int, heap, 3);
869
 
870
  for (i = 0; i < rdg->n_vertices; i++)
871
    VEC_safe_push (int, heap, all_components[rdg->vertices[i].component], i);
872
 
873
  for (i = 0; VEC_iterate (int, starting_vertices, i, v); i++)
874
    {
875
      int c = rdg->vertices[v].component;
876
 
877
      if (!bitmap_bit_p (saved_components, c))
878
        {
879
          rdgc x = XCNEW (struct rdg_component);
880
          x->num = c;
881
          x->vertices = all_components[c];
882
 
883
          VEC_safe_push (rdgc, heap, *components, x);
884
          bitmap_set_bit (saved_components, c);
885
        }
886
    }
887
 
888
  for (i = 0; i < n_components; i++)
889
    if (!bitmap_bit_p (saved_components, i))
890
      VEC_free (int, heap, all_components[i]);
891
 
892
  free (all_components);
893
  BITMAP_FREE (saved_components);
894
}
895
 
896
/* Aggregate several components into a useful partition that is
897
   registered in the PARTITIONS vector.  Partitions will be
898
   distributed in different loops.  */
899
 
900
static void
901
rdg_build_partitions (struct graph *rdg, VEC (rdgc, heap) *components,
902
                      VEC (int, heap) **other_stores,
903
                      VEC (bitmap, heap) **partitions, bitmap processed)
904
{
905
  int i;
906
  rdgc x;
907
  bitmap partition = BITMAP_ALLOC (NULL);
908
 
909
  for (i = 0; VEC_iterate (rdgc, components, i, x); i++)
910
    {
911
      bitmap np;
912
      bool part_has_writes = false;
913
      int v = VEC_index (int, x->vertices, 0);
914
 
915
      if (bitmap_bit_p (processed, v))
916
        continue;
917
 
918
      np = build_rdg_partition_for_component (rdg, x, &part_has_writes,
919
                                              other_stores);
920
      bitmap_ior_into (partition, np);
921
      bitmap_ior_into (processed, np);
922
      BITMAP_FREE (np);
923
 
924
      if (part_has_writes)
925
        {
926
          if (dump_file && (dump_flags & TDF_DETAILS))
927
            {
928
              fprintf (dump_file, "ldist useful partition:\n");
929
              dump_bitmap (dump_file, partition);
930
            }
931
 
932
          VEC_safe_push (bitmap, heap, *partitions, partition);
933
          partition = BITMAP_ALLOC (NULL);
934
        }
935
    }
936
 
937
  /* Add the nodes from the RDG that were not marked as processed, and
938
     that are used outside the current loop.  These are scalar
939
     computations that are not yet part of previous partitions.  */
940
  for (i = 0; i < rdg->n_vertices; i++)
941
    if (!bitmap_bit_p (processed, i)
942
        && rdg_defs_used_in_other_loops_p (rdg, i))
943
      VEC_safe_push (int, heap, *other_stores, i);
944
 
945
  /* If there are still statements left in the OTHER_STORES array,
946
     create other components and partitions with these stores and
947
     their dependences.  */
948
  if (VEC_length (int, *other_stores) > 0)
949
    {
950
      VEC (rdgc, heap) *comps = VEC_alloc (rdgc, heap, 3);
951
      VEC (int, heap) *foo = VEC_alloc (int, heap, 3);
952
 
953
      rdg_build_components (rdg, *other_stores, &comps);
954
      rdg_build_partitions (rdg, comps, &foo, partitions, processed);
955
 
956
      VEC_free (int, heap, foo);
957
      free_rdg_components (comps);
958
    }
959
 
960
  /* If there is something left in the last partition, save it.  */
961
  if (bitmap_count_bits (partition) > 0)
962
    VEC_safe_push (bitmap, heap, *partitions, partition);
963
  else
964
    BITMAP_FREE (partition);
965
}
966
 
967
/* Dump to FILE the PARTITIONS.  */
968
 
969
static void
970
dump_rdg_partitions (FILE *file, VEC (bitmap, heap) *partitions)
971
{
972
  int i;
973
  bitmap partition;
974
 
975
  for (i = 0; VEC_iterate (bitmap, partitions, i, partition); i++)
976
    debug_bitmap_file (file, partition);
977
}
978
 
979
/* Debug PARTITIONS.  */
980
extern void debug_rdg_partitions (VEC (bitmap, heap) *);
981
 
982
void
983
debug_rdg_partitions (VEC (bitmap, heap) *partitions)
984
{
985
  dump_rdg_partitions (stderr, partitions);
986
}
987
 
988
/* Returns the number of read and write operations in the RDG.  */
989
 
990
static int
991
number_of_rw_in_rdg (struct graph *rdg)
992
{
993
  int i, res = 0;
994
 
995
  for (i = 0; i < rdg->n_vertices; i++)
996
    {
997
      if (RDG_MEM_WRITE_STMT (rdg, i))
998
        ++res;
999
 
1000
      if (RDG_MEM_READS_STMT (rdg, i))
1001
        ++res;
1002
    }
1003
 
1004
  return res;
1005
}
1006
 
1007
/* Returns the number of read and write operations in a PARTITION of
1008
   the RDG.  */
1009
 
1010
static int
1011
number_of_rw_in_partition (struct graph *rdg, bitmap partition)
1012
{
1013
  int res = 0;
1014
  unsigned i;
1015
  bitmap_iterator ii;
1016
 
1017
  EXECUTE_IF_SET_IN_BITMAP (partition, 0, i, ii)
1018
    {
1019
      if (RDG_MEM_WRITE_STMT (rdg, i))
1020
        ++res;
1021
 
1022
      if (RDG_MEM_READS_STMT (rdg, i))
1023
        ++res;
1024
    }
1025
 
1026
  return res;
1027
}
1028
 
1029
/* Returns true when one of the PARTITIONS contains all the read or
1030
   write operations of RDG.  */
1031
 
1032
static bool
1033
partition_contains_all_rw (struct graph *rdg, VEC (bitmap, heap) *partitions)
1034
{
1035
  int i;
1036
  bitmap partition;
1037
  int nrw = number_of_rw_in_rdg (rdg);
1038
 
1039
  for (i = 0; VEC_iterate (bitmap, partitions, i, partition); i++)
1040
    if (nrw == number_of_rw_in_partition (rdg, partition))
1041
      return true;
1042
 
1043
  return false;
1044
}
1045
 
1046
/* Generate code from STARTING_VERTICES in RDG.  Returns the number of
1047
   distributed loops.  */
1048
 
1049
static int
1050
ldist_gen (struct loop *loop, struct graph *rdg,
1051
           VEC (int, heap) *starting_vertices)
1052
{
1053
  int i, nbp;
1054
  VEC (rdgc, heap) *components = VEC_alloc (rdgc, heap, 3);
1055
  VEC (bitmap, heap) *partitions = VEC_alloc (bitmap, heap, 3);
1056
  VEC (int, heap) *other_stores = VEC_alloc (int, heap, 3);
1057
  bitmap partition, processed = BITMAP_ALLOC (NULL);
1058
 
1059
  remaining_stmts = BITMAP_ALLOC (NULL);
1060
  upstream_mem_writes = BITMAP_ALLOC (NULL);
1061
 
1062
  for (i = 0; i < rdg->n_vertices; i++)
1063
    {
1064
      bitmap_set_bit (remaining_stmts, i);
1065
 
1066
      /* Save in OTHER_STORES all the memory writes that are not in
1067
         STARTING_VERTICES.  */
1068
      if (RDG_MEM_WRITE_STMT (rdg, i))
1069
        {
1070
          int v;
1071
          unsigned j;
1072
          bool found = false;
1073
 
1074
          for (j = 0; VEC_iterate (int, starting_vertices, j, v); j++)
1075
            if (i == v)
1076
              {
1077
                found = true;
1078
                break;
1079
              }
1080
 
1081
          if (!found)
1082
            VEC_safe_push (int, heap, other_stores, i);
1083
        }
1084
    }
1085
 
1086
  mark_nodes_having_upstream_mem_writes (rdg);
1087
  rdg_build_components (rdg, starting_vertices, &components);
1088
  rdg_build_partitions (rdg, components, &other_stores, &partitions,
1089
                        processed);
1090
  BITMAP_FREE (processed);
1091
  nbp = VEC_length (bitmap, partitions);
1092
 
1093
  if (nbp <= 1
1094
      || partition_contains_all_rw (rdg, partitions))
1095
    goto ldist_done;
1096
 
1097
  if (dump_file && (dump_flags & TDF_DETAILS))
1098
    dump_rdg_partitions (dump_file, partitions);
1099
 
1100
  for (i = 0; VEC_iterate (bitmap, partitions, i, partition); i++)
1101
    if (!generate_code_for_partition (loop, partition, i < nbp - 1))
1102
      goto ldist_done;
1103
 
1104
  rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1105
  update_ssa (TODO_update_ssa_only_virtuals | TODO_update_ssa);
1106
 
1107
 ldist_done:
1108
 
1109
  BITMAP_FREE (remaining_stmts);
1110
  BITMAP_FREE (upstream_mem_writes);
1111
 
1112
  for (i = 0; VEC_iterate (bitmap, partitions, i, partition); i++)
1113
    BITMAP_FREE (partition);
1114
 
1115
  VEC_free (int, heap, other_stores);
1116
  VEC_free (bitmap, heap, partitions);
1117
  free_rdg_components (components);
1118
  return nbp;
1119
}
1120
 
1121
/* Distributes the code from LOOP in such a way that producer
1122
   statements are placed before consumer statements.  When STMTS is
1123
   NULL, performs the maximal distribution, if STMTS is not NULL,
1124
   tries to separate only these statements from the LOOP's body.
1125
   Returns the number of distributed loops.  */
1126
 
1127
static int
1128
distribute_loop (struct loop *loop, VEC (gimple, heap) *stmts)
1129
{
1130
  int res = 0;
1131
  struct graph *rdg;
1132
  gimple s;
1133
  unsigned i;
1134
  VEC (int, heap) *vertices;
1135
 
1136
  if (loop->num_nodes > 2)
1137
    {
1138
      if (dump_file && (dump_flags & TDF_DETAILS))
1139
        fprintf (dump_file,
1140
                 "FIXME: Loop %d not distributed: it has more than two basic blocks.\n",
1141
                 loop->num);
1142
 
1143
      return res;
1144
    }
1145
 
1146
  rdg = build_rdg (loop);
1147
 
1148
  if (!rdg)
1149
    {
1150
      if (dump_file && (dump_flags & TDF_DETAILS))
1151
        fprintf (dump_file,
1152
                 "FIXME: Loop %d not distributed: failed to build the RDG.\n",
1153
                 loop->num);
1154
 
1155
      return res;
1156
    }
1157
 
1158
  vertices = VEC_alloc (int, heap, 3);
1159
 
1160
  if (dump_file && (dump_flags & TDF_DETAILS))
1161
    dump_rdg (dump_file, rdg);
1162
 
1163
  for (i = 0; VEC_iterate (gimple, stmts, i, s); i++)
1164
    {
1165
      int v = rdg_vertex_for_stmt (rdg, s);
1166
 
1167
      if (v >= 0)
1168
        {
1169
          VEC_safe_push (int, heap, vertices, v);
1170
 
1171
          if (dump_file && (dump_flags & TDF_DETAILS))
1172
            fprintf (dump_file,
1173
                     "ldist asked to generate code for vertex %d\n", v);
1174
        }
1175
    }
1176
 
1177
  res = ldist_gen (loop, rdg, vertices);
1178
  VEC_free (int, heap, vertices);
1179
  free_rdg (rdg);
1180
 
1181
  return res;
1182
}
1183
 
1184
/* Distribute all loops in the current function.  */
1185
 
1186
static unsigned int
1187
tree_loop_distribution (void)
1188
{
1189
  struct loop *loop;
1190
  loop_iterator li;
1191
  int nb_generated_loops = 0;
1192
 
1193
  FOR_EACH_LOOP (li, loop, 0)
1194
    {
1195
      VEC (gimple, heap) *work_list = VEC_alloc (gimple, heap, 3);
1196
 
1197
      /* With the following working list, we're asking distribute_loop
1198
         to separate the stores of the loop: when dependences allow,
1199
         it will end on having one store per loop.  */
1200
      stores_from_loop (loop, &work_list);
1201
 
1202
      /* A simple heuristic for cache locality is to not split stores
1203
         to the same array.  Without this call, an unrolled loop would
1204
         be split into as many loops as unroll factor, each loop
1205
         storing in the same array.  */
1206
      remove_similar_memory_refs (&work_list);
1207
 
1208
      nb_generated_loops = distribute_loop (loop, work_list);
1209
 
1210
      if (dump_file && (dump_flags & TDF_DETAILS))
1211
        {
1212
          if (nb_generated_loops > 1)
1213
            fprintf (dump_file, "Loop %d distributed: split to %d loops.\n",
1214
                     loop->num, nb_generated_loops);
1215
          else
1216
            fprintf (dump_file, "Loop %d is the same.\n", loop->num);
1217
        }
1218
 
1219
      verify_loop_structure ();
1220
 
1221
      VEC_free (gimple, heap, work_list);
1222
    }
1223
 
1224
  return 0;
1225
}
1226
 
1227
static bool
1228
gate_tree_loop_distribution (void)
1229
{
1230
  return flag_tree_loop_distribution != 0;
1231
}
1232
 
1233
struct gimple_opt_pass pass_loop_distribution =
1234
{
1235
 {
1236
  GIMPLE_PASS,
1237
  "ldist",                      /* name */
1238
  gate_tree_loop_distribution,  /* gate */
1239
  tree_loop_distribution,       /* execute */
1240
  NULL,                         /* sub */
1241
  NULL,                         /* next */
1242
  0,                             /* static_pass_number */
1243
  TV_TREE_LOOP_DISTRIBUTION,    /* tv_id */
1244
  PROP_cfg | PROP_ssa,          /* properties_required */
1245
  0,                             /* properties_provided */
1246
  0,                             /* properties_destroyed */
1247
  0,                             /* todo_flags_start */
1248
  TODO_dump_func | TODO_verify_loops            /* todo_flags_finish */
1249
 }
1250
};

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.