OpenCores
URL https://opencores.org/ocsvn/openrisc_2011-10-31/openrisc_2011-10-31/trunk

Subversion Repositories openrisc_2011-10-31

[/] [openrisc/] [trunk/] [gnu-src/] [gcc-4.5.1/] [gcc/] [matrix-reorg.c] - Blame information for rev 300

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 280 jeremybenn
/* Matrix layout transformations.
2
   Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
3
   Contributed by Razya Ladelsky <razya@il.ibm.com>
4
   Originally written by Revital Eres and Mustafa Hagog.
5
 
6
This file is part of GCC.
7
 
8
GCC is free software; you can redistribute it and/or modify it under
9
the terms of the GNU General Public License as published by the Free
10
Software Foundation; either version 3, or (at your option) any later
11
version.
12
 
13
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14
WARRANTY; without even the implied warranty of MERCHANTABILITY or
15
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16
for more details.
17
 
18
You should have received a copy of the GNU General Public License
19
along with GCC; see the file COPYING3.  If not see
20
<http://www.gnu.org/licenses/>.  */
21
 
22
/*
23
   Matrix flattening optimization tries to replace a N-dimensional
24
   matrix with its equivalent M-dimensional matrix, where M < N.
25
   This first implementation focuses on global matrices defined dynamically.
26
 
27
   When N==1, we actually flatten the whole matrix.
28
   For instance consider a two-dimensional array a [dim1] [dim2].
29
   The code for allocating space for it usually looks like:
30
 
31
     a = (int **)  malloc(dim1 * sizeof(int *));
32
     for (i=0; i<dim1; i++)
33
        a[i] = (int *) malloc (dim2 * sizeof(int));
34
 
35
   If the array "a" is found suitable for this optimization,
36
   its allocation is replaced by:
37
 
38
     a = (int *) malloc (dim1 * dim2 *sizeof(int));
39
 
40
   and all the references to a[i][j] are replaced by a[i * dim2 + j].
41
 
42
   The two main phases of the optimization are the analysis
43
   and transformation.
44
   The driver of the optimization is matrix_reorg ().
45
 
46
 
47
 
48
   Analysis phase:
49
   ===============
50
 
51
   We'll number the dimensions outside-in, meaning the most external
52
   is 0, then 1, and so on.
53
   The analysis part of the optimization determines K, the escape
54
   level of a N-dimensional matrix (K <= N), that allows flattening of
55
   the external dimensions 0,1,..., K-1. Escape level 0 means that the
56
   whole matrix escapes and no flattening is possible.
57
 
58
   The analysis part is implemented in analyze_matrix_allocation_site()
59
   and analyze_matrix_accesses().
60
 
61
   Transformation phase:
62
   =====================
63
   In this phase we define the new flattened matrices that replace the
64
   original matrices in the code.
65
   Implemented in transform_allocation_sites(),
66
   transform_access_sites().
67
 
68
   Matrix Transposing
69
   ==================
70
   The idea of Matrix Transposing is organizing the matrix in a different
71
   layout such that the dimensions are reordered.
72
   This could produce better cache behavior in some cases.
73
 
74
   For example, lets look at the matrix accesses in the following loop:
75
 
76
   for (i=0; i<N; i++)
77
    for (j=0; j<M; j++)
78
     access to a[i][j]
79
 
80
   This loop can produce good cache behavior because the elements of
81
   the inner dimension are accessed sequentially.
82
 
83
  However, if the accesses of the matrix were of the following form:
84
 
85
  for (i=0; i<N; i++)
86
   for (j=0; j<M; j++)
87
     access to a[j][i]
88
 
89
  In this loop we iterate the columns and not the rows.
90
  Therefore, replacing the rows and columns
91
  would have had an organization with better (cache) locality.
92
  Replacing the dimensions of the matrix is called matrix transposing.
93
 
94
  This  example, of course, could be enhanced to multiple dimensions matrices
95
  as well.
96
 
97
  Since a program could include all kind of accesses, there is a decision
98
  mechanism, implemented in analyze_transpose(), which implements a
99
  heuristic that tries to determine whether to transpose the matrix or not,
100
  according to the form of the more dominant accesses.
101
  This decision is transferred to the flattening mechanism, and whether
102
  the matrix was transposed or not, the matrix is flattened (if possible).
103
 
104
  This decision making is based on profiling information and loop information.
105
  If profiling information is available, decision making mechanism will be
106
  operated, otherwise the matrix will only be flattened (if possible).
107
 
108
  Both optimizations are described in the paper "Matrix flattening and
109
  transposing in GCC" which was presented in GCC summit 2006.
110
  http://www.gccsummit.org/2006/2006-GCC-Summit-Proceedings.pdf.  */
111
 
112
#include "config.h"
113
#include "system.h"
114
#include "coretypes.h"
115
#include "tm.h"
116
#include "tree.h"
117
#include "rtl.h"
118
#include "tree-inline.h"
119
#include "tree-flow.h"
120
#include "tree-flow-inline.h"
121
#include "langhooks.h"
122
#include "hashtab.h"
123
#include "toplev.h"
124
#include "flags.h"
125
#include "ggc.h"
126
#include "debug.h"
127
#include "target.h"
128
#include "cgraph.h"
129
#include "diagnostic.h"
130
#include "timevar.h"
131
#include "params.h"
132
#include "fibheap.h"
133
#include "intl.h"
134
#include "function.h"
135
#include "basic-block.h"
136
#include "cfgloop.h"
137
#include "tree-iterator.h"
138
#include "tree-pass.h"
139
#include "opts.h"
140
#include "tree-data-ref.h"
141
#include "tree-chrec.h"
142
#include "tree-scalar-evolution.h"
143
#include "tree-ssa-sccvn.h"
144
 
145
/* We need to collect a lot of data from the original malloc,
146
   particularly as the gimplifier has converted:
147
 
148
   orig_var = (struct_type *) malloc (x * sizeof (struct_type *));
149
 
150
   into
151
 
152
   T3 = <constant> ;  ** <constant> is amount to malloc; precomputed **
153
   T4 = malloc (T3);
154
   T5 = (struct_type *) T4;
155
   orig_var = T5;
156
 
157
   The following struct fields allow us to collect all the necessary data from
158
   the gimplified program.  The comments in the struct below are all based
159
   on the gimple example above.  */
160
 
161
struct malloc_call_data
162
{
163
  gimple call_stmt;             /* Tree for "T4 = malloc (T3);"                     */
164
  tree size_var;                /* Var decl for T3.                                 */
165
  tree malloc_size;             /* Tree for "<constant>", the rhs assigned to T3.   */
166
};
167
 
168
static tree can_calculate_expr_before_stmt (tree, sbitmap);
169
static tree can_calculate_stmt_before_stmt (gimple, sbitmap);
170
 
171
/* The front end of the compiler, when parsing statements of the form:
172
 
173
   var = (type_cast) malloc (sizeof (type));
174
 
175
   always converts this single statement into the following statements
176
   (GIMPLE form):
177
 
178
   T.1 = sizeof (type);
179
   T.2 = malloc (T.1);
180
   T.3 = (type_cast) T.2;
181
   var = T.3;
182
 
183
   Since we need to create new malloc statements and modify the original
184
   statements somewhat, we need to find all four of the above statements.
185
   Currently record_call_1 (called for building cgraph edges) finds and
186
   records the statements containing the actual call to malloc, but we
187
   need to find the rest of the variables/statements on our own.  That
188
   is what the following function does.  */
189
static void
190
collect_data_for_malloc_call (gimple stmt, struct malloc_call_data *m_data)
191
{
192
  tree size_var = NULL;
193
  tree malloc_fn_decl;
194
  tree arg1;
195
 
196
  gcc_assert (is_gimple_call (stmt));
197
 
198
  malloc_fn_decl = gimple_call_fndecl (stmt);
199
  if (malloc_fn_decl == NULL
200
      || DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC)
201
    return;
202
 
203
  arg1 = gimple_call_arg (stmt, 0);
204
  size_var = arg1;
205
 
206
  m_data->call_stmt = stmt;
207
  m_data->size_var = size_var;
208
  if (TREE_CODE (size_var) != VAR_DECL)
209
    m_data->malloc_size = size_var;
210
  else
211
    m_data->malloc_size = NULL_TREE;
212
}
213
 
214
/* Information about matrix access site.
215
   For example: if an access site of matrix arr is arr[i][j]
216
   the ACCESS_SITE_INFO structure will have the address
217
   of arr as its stmt.  The INDEX_INFO will hold information about the
218
   initial address and index of each dimension.  */
219
struct access_site_info
220
{
221
  /* The statement (INDIRECT_REF or POINTER_PLUS_EXPR).  */
222
  gimple stmt;
223
 
224
  /* In case of POINTER_PLUS_EXPR, what is the offset.  */
225
  tree offset;
226
 
227
  /* The index which created the offset.  */
228
  tree index;
229
 
230
  /* The indirection level of this statement.  */
231
  int level;
232
 
233
  /* TRUE for allocation site FALSE for access site.  */
234
  bool is_alloc;
235
 
236
  /* The function containing the access site.  */
237
  tree function_decl;
238
 
239
  /* This access is iterated in the inner most loop */
240
  bool iterated_by_inner_most_loop_p;
241
};
242
 
243
typedef struct access_site_info *access_site_info_p;
244
DEF_VEC_P (access_site_info_p);
245
DEF_VEC_ALLOC_P (access_site_info_p, heap);
246
 
247
/* Calls to free when flattening a matrix.  */
248
 
249
struct free_info
250
{
251
  gimple stmt;
252
  tree func;
253
};
254
 
255
/* Information about matrix to flatten.  */
256
struct matrix_info
257
{
258
  /* Decl tree of this matrix.  */
259
  tree decl;
260
  /* Number of dimensions; number
261
     of "*" in the type declaration.  */
262
  int num_dims;
263
 
264
  /* Minimum indirection level that escapes, 0 means that
265
     the whole matrix escapes, k means that dimensions
266
 
267
  int min_indirect_level_escape;
268
 
269
  gimple min_indirect_level_escape_stmt;
270
 
271
  /* Hold the allocation site for each level (dimension).
272
     We can use NUM_DIMS as the upper bound and allocate the array
273
     once with this number of elements and no need to use realloc and
274
     MAX_MALLOCED_LEVEL.  */
275
  gimple *malloc_for_level;
276
 
277
  int max_malloced_level;
278
 
279
  /* Is the matrix transposed.  */
280
  bool is_transposed_p;
281
 
282
  /* The location of the allocation sites (they must be in one
283
     function).  */
284
  tree allocation_function_decl;
285
 
286
  /* The calls to free for each level of indirection.  */
287
  struct free_info *free_stmts;
288
 
289
  /* An array which holds for each dimension its size. where
290
     dimension 0 is the outer most (one that contains all the others).
291
   */
292
  tree *dimension_size;
293
 
294
  /* An array which holds for each dimension it's original size
295
     (before transposing and flattening take place).  */
296
  tree *dimension_size_orig;
297
 
298
  /* An array which holds for each dimension the size of the type of
299
     of elements accessed in that level (in bytes).  */
300
  HOST_WIDE_INT *dimension_type_size;
301
 
302
  int dimension_type_size_len;
303
 
304
  /* An array collecting the count of accesses for each dimension.  */
305
  gcov_type *dim_hot_level;
306
 
307
  /* An array of the accesses to be flattened.
308
     elements are of type "struct access_site_info *".  */
309
  VEC (access_site_info_p, heap) * access_l;
310
 
311
  /* A map of how the dimensions will be organized at the end of
312
     the analyses.  */
313
  int *dim_map;
314
};
315
 
316
/* In each phi node we want to record the indirection level we have when we
317
   get to the phi node.  Usually we will have phi nodes with more than two
318
   arguments, then we must assure that all of them get to the phi node with
319
   the same indirection level, otherwise it's not safe to do the flattening.
320
   So we record the information regarding the indirection level each time we
321
   get to the phi node in this hash table.  */
322
 
323
struct matrix_access_phi_node
324
{
325
  gimple phi;
326
  int indirection_level;
327
};
328
 
329
/* We use this structure to find if the SSA variable is accessed inside the
330
   tree and record the tree containing it.  */
331
 
332
struct ssa_acc_in_tree
333
{
334
  /* The variable whose accesses in the tree we are looking for.  */
335
  tree ssa_var;
336
  /* The tree and code inside it the ssa_var is accessed, currently
337
     it could be an INDIRECT_REF or CALL_EXPR.  */
338
  enum tree_code t_code;
339
  tree t_tree;
340
  /* The place in the containing tree.  */
341
  tree *tp;
342
  tree second_op;
343
  bool var_found;
344
};
345
 
346
static void analyze_matrix_accesses (struct matrix_info *, tree, int, bool,
347
                                     sbitmap, bool);
348
static int transform_allocation_sites (void **, void *);
349
static int transform_access_sites (void **, void *);
350
static int analyze_transpose (void **, void *);
351
static int dump_matrix_reorg_analysis (void **, void *);
352
 
353
static bool check_transpose_p;
354
 
355
/* Hash function used for the phi nodes.  */
356
 
357
static hashval_t
358
mat_acc_phi_hash (const void *p)
359
{
360
  const struct matrix_access_phi_node *const ma_phi =
361
    (const struct matrix_access_phi_node *) p;
362
 
363
  return htab_hash_pointer (ma_phi->phi);
364
}
365
 
366
/* Equality means phi node pointers are the same.  */
367
 
368
static int
369
mat_acc_phi_eq (const void *p1, const void *p2)
370
{
371
  const struct matrix_access_phi_node *const phi1 =
372
    (const struct matrix_access_phi_node *) p1;
373
  const struct matrix_access_phi_node *const phi2 =
374
    (const struct matrix_access_phi_node *) p2;
375
 
376
  if (phi1->phi == phi2->phi)
377
    return 1;
378
 
379
  return 0;
380
}
381
 
382
/* Hold the PHI nodes we visit during the traversal for escaping
383
   analysis.  */
384
static htab_t htab_mat_acc_phi_nodes = NULL;
385
 
386
/* This hash-table holds the information about the matrices we are
387
   going to handle.  */
388
static htab_t matrices_to_reorg = NULL;
389
 
390
/* Return a hash for MTT, which is really a "matrix_info *".  */
391
static hashval_t
392
mtt_info_hash (const void *mtt)
393
{
394
  return htab_hash_pointer (((const struct matrix_info *) mtt)->decl);
395
}
396
 
397
/* Return true if MTT1 and MTT2 (which are really both of type
398
   "matrix_info *") refer to the same decl.  */
399
static int
400
mtt_info_eq (const void *mtt1, const void *mtt2)
401
{
402
  const struct matrix_info *const i1 = (const struct matrix_info *) mtt1;
403
  const struct matrix_info *const i2 = (const struct matrix_info *) mtt2;
404
 
405
  if (i1->decl == i2->decl)
406
    return true;
407
 
408
  return false;
409
}
410
 
411
/* Return false if STMT may contain a vector expression.
412
   In this situation, all matrices should not be flattened.  */
413
static bool
414
may_flatten_matrices_1 (gimple stmt)
415
{
416
  tree t;
417
 
418
  switch (gimple_code (stmt))
419
    {
420
    case GIMPLE_ASSIGN:
421
      if (!gimple_assign_cast_p (stmt))
422
        return true;
423
 
424
      t = gimple_assign_rhs1 (stmt);
425
      while (CONVERT_EXPR_P (t))
426
        {
427
          if (TREE_TYPE (t) && POINTER_TYPE_P (TREE_TYPE (t)))
428
            {
429
              tree pointee;
430
 
431
              pointee = TREE_TYPE (t);
432
              while (POINTER_TYPE_P (pointee))
433
                pointee = TREE_TYPE (pointee);
434
              if (TREE_CODE (pointee) == VECTOR_TYPE)
435
                {
436
                  if (dump_file)
437
                    fprintf (dump_file,
438
                             "Found vector type, don't flatten matrix\n");
439
                  return false;
440
                }
441
            }
442
          t = TREE_OPERAND (t, 0);
443
        }
444
      break;
445
    case GIMPLE_ASM:
446
      /* Asm code could contain vector operations.  */
447
      return false;
448
      break;
449
    default:
450
      break;
451
    }
452
  return true;
453
}
454
 
455
/* Return false if there are hand-written vectors in the program.
456
   We disable the flattening in such a case.  */
457
static bool
458
may_flatten_matrices (struct cgraph_node *node)
459
{
460
  tree decl;
461
  struct function *func;
462
  basic_block bb;
463
  gimple_stmt_iterator gsi;
464
 
465
  decl = node->decl;
466
  if (node->analyzed)
467
    {
468
      func = DECL_STRUCT_FUNCTION (decl);
469
      FOR_EACH_BB_FN (bb, func)
470
        for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
471
        if (!may_flatten_matrices_1 (gsi_stmt (gsi)))
472
          return false;
473
    }
474
  return true;
475
}
476
 
477
/* Given a VAR_DECL, check its type to determine whether it is
478
   a definition of a dynamic allocated matrix and therefore is
479
   a suitable candidate for the matrix flattening optimization.
480
   Return NULL if VAR_DECL is not such decl.  Otherwise, allocate
481
   a MATRIX_INFO structure, fill it with the relevant information
482
   and return a pointer to it.
483
   TODO: handle also statically defined arrays.  */
484
static struct matrix_info *
485
analyze_matrix_decl (tree var_decl)
486
{
487
  struct matrix_info *m_node, tmpmi, *mi;
488
  tree var_type;
489
  int dim_num = 0;
490
 
491
  gcc_assert (matrices_to_reorg);
492
 
493
  if (TREE_CODE (var_decl) == PARM_DECL)
494
    var_type = DECL_ARG_TYPE (var_decl);
495
  else if (TREE_CODE (var_decl) == VAR_DECL)
496
    var_type = TREE_TYPE (var_decl);
497
  else
498
    return NULL;
499
 
500
  if (!POINTER_TYPE_P (var_type))
501
    return NULL;
502
 
503
  while (POINTER_TYPE_P (var_type))
504
    {
505
      var_type = TREE_TYPE (var_type);
506
      dim_num++;
507
    }
508
 
509
  if (dim_num <= 1)
510
    return NULL;
511
 
512
  if (!COMPLETE_TYPE_P (var_type)
513
      || TREE_CODE (TYPE_SIZE_UNIT (var_type)) != INTEGER_CST)
514
    return NULL;
515
 
516
  /* Check to see if this pointer is already in there.  */
517
  tmpmi.decl = var_decl;
518
  mi = (struct matrix_info *) htab_find (matrices_to_reorg, &tmpmi);
519
 
520
  if (mi)
521
    return NULL;
522
 
523
  /* Record the matrix.  */
524
 
525
  m_node = (struct matrix_info *) xcalloc (1, sizeof (struct matrix_info));
526
  m_node->decl = var_decl;
527
  m_node->num_dims = dim_num;
528
  m_node->free_stmts
529
    = (struct free_info *) xcalloc (dim_num, sizeof (struct free_info));
530
 
531
  /* Init min_indirect_level_escape to -1 to indicate that no escape
532
     analysis has been done yet.  */
533
  m_node->min_indirect_level_escape = -1;
534
  m_node->is_transposed_p = false;
535
 
536
  return m_node;
537
}
538
 
539
/* Free matrix E.  */
540
static void
541
mat_free (void *e)
542
{
543
  struct matrix_info *mat = (struct matrix_info *) e;
544
 
545
  if (!mat)
546
    return;
547
 
548
  if (mat->free_stmts)
549
    free (mat->free_stmts);
550
  if (mat->dim_hot_level)
551
    free (mat->dim_hot_level);
552
  if (mat->malloc_for_level)
553
    free (mat->malloc_for_level);
554
}
555
 
556
/* Find all potential matrices.
557
   TODO: currently we handle only multidimensional
558
   dynamically allocated arrays.  */
559
static void
560
find_matrices_decl (void)
561
{
562
  struct matrix_info *tmp;
563
  PTR *slot;
564
  struct varpool_node *vnode;
565
 
566
  gcc_assert (matrices_to_reorg);
567
 
568
  /* For every global variable in the program:
569
     Check to see if it's of a candidate type and record it.  */
570
  for (vnode = varpool_nodes_queue; vnode; vnode = vnode->next_needed)
571
    {
572
      tree var_decl = vnode->decl;
573
 
574
      if (!var_decl || TREE_CODE (var_decl) != VAR_DECL)
575
        continue;
576
 
577
      if (matrices_to_reorg)
578
        if ((tmp = analyze_matrix_decl (var_decl)))
579
          {
580
            if (!TREE_ADDRESSABLE (var_decl))
581
              {
582
                slot = htab_find_slot (matrices_to_reorg, tmp, INSERT);
583
                *slot = tmp;
584
              }
585
          }
586
    }
587
  return;
588
}
589
 
590
/* Mark that the matrix MI escapes at level L.  */
591
static void
592
mark_min_matrix_escape_level (struct matrix_info *mi, int l, gimple s)
593
{
594
  if (mi->min_indirect_level_escape == -1
595
      || (mi->min_indirect_level_escape > l))
596
    {
597
      mi->min_indirect_level_escape = l;
598
      mi->min_indirect_level_escape_stmt = s;
599
    }
600
}
601
 
602
/* Find if the SSA variable is accessed inside the
603
   tree and record the tree containing it.
604
   The only relevant uses are the case of SSA_NAME, or SSA inside
605
   INDIRECT_REF, PLUS_EXPR, POINTER_PLUS_EXPR, MULT_EXPR.  */
606
static void
607
ssa_accessed_in_tree (tree t, struct ssa_acc_in_tree *a)
608
{
609
  a->t_code = TREE_CODE (t);
610
  switch (a->t_code)
611
    {
612
    case SSA_NAME:
613
      if (t == a->ssa_var)
614
        a->var_found = true;
615
      break;
616
    case INDIRECT_REF:
617
      if (SSA_VAR_P (TREE_OPERAND (t, 0))
618
          && TREE_OPERAND (t, 0) == a->ssa_var)
619
        a->var_found = true;
620
      break;
621
    default:
622
      break;
623
    }
624
}
625
 
626
/* Find if the SSA variable is accessed on the right hand side of
627
   gimple call STMT. */
628
 
629
static void
630
ssa_accessed_in_call_rhs (gimple stmt, struct ssa_acc_in_tree *a)
631
{
632
  tree decl;
633
  tree arg;
634
  size_t i;
635
 
636
  a->t_code = CALL_EXPR;
637
  for (i = 0; i < gimple_call_num_args (stmt); i++)
638
    {
639
      arg = gimple_call_arg (stmt, i);
640
      if (arg == a->ssa_var)
641
        {
642
          a->var_found = true;
643
          decl = gimple_call_fndecl (stmt);
644
          a->t_tree = decl;
645
          break;
646
        }
647
    }
648
}
649
 
650
/* Find if the SSA variable is accessed on the right hand side of
651
   gimple assign STMT. */
652
 
653
static void
654
ssa_accessed_in_assign_rhs (gimple stmt, struct ssa_acc_in_tree *a)
655
{
656
 
657
  a->t_code = gimple_assign_rhs_code (stmt);
658
  switch (a->t_code)
659
    {
660
      tree op1, op2;
661
 
662
    case SSA_NAME:
663
    case INDIRECT_REF:
664
    CASE_CONVERT:
665
    case VIEW_CONVERT_EXPR:
666
      ssa_accessed_in_tree (gimple_assign_rhs1 (stmt), a);
667
      break;
668
    case POINTER_PLUS_EXPR:
669
    case PLUS_EXPR:
670
    case MULT_EXPR:
671
      op1 = gimple_assign_rhs1 (stmt);
672
      op2 = gimple_assign_rhs2 (stmt);
673
 
674
      if (op1 == a->ssa_var)
675
        {
676
          a->var_found = true;
677
          a->second_op = op2;
678
        }
679
      else if (op2 == a->ssa_var)
680
        {
681
          a->var_found = true;
682
          a->second_op = op1;
683
        }
684
      break;
685
    default:
686
      break;
687
    }
688
}
689
 
690
/* Record the access/allocation site information for matrix MI so we can
691
   handle it later in transformation.  */
692
static void
693
record_access_alloc_site_info (struct matrix_info *mi, gimple stmt, tree offset,
694
                               tree index, int level, bool is_alloc)
695
{
696
  struct access_site_info *acc_info;
697
 
698
  if (!mi->access_l)
699
    mi->access_l = VEC_alloc (access_site_info_p, heap, 100);
700
 
701
  acc_info
702
    = (struct access_site_info *)
703
    xcalloc (1, sizeof (struct access_site_info));
704
  acc_info->stmt = stmt;
705
  acc_info->offset = offset;
706
  acc_info->index = index;
707
  acc_info->function_decl = current_function_decl;
708
  acc_info->level = level;
709
  acc_info->is_alloc = is_alloc;
710
 
711
  VEC_safe_push (access_site_info_p, heap, mi->access_l, acc_info);
712
 
713
}
714
 
715
/* Record the malloc as the allocation site of the given LEVEL.  But
716
   first we Make sure that all the size parameters passed to malloc in
717
   all the allocation sites could be pre-calculated before the call to
718
   the malloc of level 0 (the main malloc call).  */
719
static void
720
add_allocation_site (struct matrix_info *mi, gimple stmt, int level)
721
{
722
  struct malloc_call_data mcd;
723
 
724
  /* Make sure that the allocation sites are in the same function.  */
725
  if (!mi->allocation_function_decl)
726
    mi->allocation_function_decl = current_function_decl;
727
  else if (mi->allocation_function_decl != current_function_decl)
728
    {
729
      int min_malloc_level;
730
 
731
      gcc_assert (mi->malloc_for_level);
732
 
733
      /* Find the minimum malloc level that already has been seen;
734
         we known its allocation function must be
735
         MI->allocation_function_decl since it's different than
736
         CURRENT_FUNCTION_DECL then the escaping level should be
737
         MIN (LEVEL, MIN_MALLOC_LEVEL) - 1 , and the allocation function
738
         must be set accordingly.  */
739
      for (min_malloc_level = 0;
740
           min_malloc_level < mi->max_malloced_level
741
           && mi->malloc_for_level[min_malloc_level]; min_malloc_level++);
742
      if (level < min_malloc_level)
743
        {
744
          mi->allocation_function_decl = current_function_decl;
745
          mark_min_matrix_escape_level (mi, min_malloc_level, stmt);
746
        }
747
      else
748
        {
749
          mark_min_matrix_escape_level (mi, level, stmt);
750
          /* cannot be that (level == min_malloc_level)
751
             we would have returned earlier.  */
752
          return;
753
        }
754
    }
755
 
756
  /* Find the correct malloc information.  */
757
  collect_data_for_malloc_call (stmt, &mcd);
758
 
759
  /* We accept only calls to malloc function; we do not accept
760
     calls like calloc and realloc.  */
761
  if (!mi->malloc_for_level)
762
    {
763
      mi->malloc_for_level = XCNEWVEC (gimple, level + 1);
764
      mi->max_malloced_level = level + 1;
765
    }
766
  else if (mi->max_malloced_level <= level)
767
    {
768
      mi->malloc_for_level
769
        = XRESIZEVEC (gimple, mi->malloc_for_level, level + 1);
770
 
771
      /* Zero the newly allocated items.  */
772
      memset (&(mi->malloc_for_level[mi->max_malloced_level + 1]),
773
              0, (level - mi->max_malloced_level) * sizeof (tree));
774
 
775
      mi->max_malloced_level = level + 1;
776
    }
777
  mi->malloc_for_level[level] = stmt;
778
}
779
 
780
/* Given an assignment statement STMT that we know that its
781
   left-hand-side is the matrix MI variable, we traverse the immediate
782
   uses backwards until we get to a malloc site.  We make sure that
783
   there is one and only one malloc site that sets this variable.  When
784
   we are performing the flattening we generate a new variable that
785
   will hold the size for each dimension; each malloc that allocates a
786
   dimension has the size parameter; we use that parameter to
787
   initialize the dimension size variable so we can use it later in
788
   the address calculations.  LEVEL is the dimension we're inspecting.
789
   Return if STMT is related to an allocation site.  */
790
 
791
static void
792
analyze_matrix_allocation_site (struct matrix_info *mi, gimple stmt,
793
                                int level, sbitmap visited)
794
{
795
  if (gimple_assign_copy_p (stmt) || gimple_assign_cast_p (stmt))
796
    {
797
      tree rhs = gimple_assign_rhs1 (stmt);
798
 
799
      if (TREE_CODE (rhs) == SSA_NAME)
800
        {
801
          gimple def = SSA_NAME_DEF_STMT (rhs);
802
 
803
          analyze_matrix_allocation_site (mi, def, level, visited);
804
          return;
805
        }
806
      /* If we are back to the original matrix variable then we
807
         are sure that this is analyzed as an access site.  */
808
      else if (rhs == mi->decl)
809
        return;
810
    }
811
  /* A result of call to malloc.  */
812
  else if (is_gimple_call (stmt))
813
    {
814
      int call_flags = gimple_call_flags (stmt);
815
 
816
      if (!(call_flags & ECF_MALLOC))
817
        {
818
          mark_min_matrix_escape_level (mi, level, stmt);
819
          return;
820
        }
821
      else
822
        {
823
          tree malloc_fn_decl;
824
 
825
          malloc_fn_decl = gimple_call_fndecl (stmt);
826
          if (malloc_fn_decl == NULL_TREE)
827
            {
828
              mark_min_matrix_escape_level (mi, level, stmt);
829
              return;
830
            }
831
          if (DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC)
832
            {
833
              if (dump_file)
834
                fprintf (dump_file,
835
                         "Matrix %s is an argument to function %s\n",
836
                         get_name (mi->decl), get_name (malloc_fn_decl));
837
              mark_min_matrix_escape_level (mi, level, stmt);
838
              return;
839
            }
840
        }
841
      /* This is a call to malloc of level 'level'.
842
         mi->max_malloced_level-1 == level  means that we've
843
         seen a malloc statement of level 'level' before.
844
         If the statement is not the same one that we've
845
         seen before, then there's another malloc statement
846
         for the same level, which means that we need to mark
847
         it escaping.  */
848
      if (mi->malloc_for_level
849
          && mi->max_malloced_level-1 == level
850
          && mi->malloc_for_level[level] != stmt)
851
        {
852
          mark_min_matrix_escape_level (mi, level, stmt);
853
          return;
854
        }
855
      else
856
        add_allocation_site (mi, stmt, level);
857
      return;
858
    }
859
  /* Looks like we don't know what is happening in this
860
     statement so be in the safe side and mark it as escaping.  */
861
  mark_min_matrix_escape_level (mi, level, stmt);
862
}
863
 
864
/* The transposing decision making.
865
   In order to to calculate the profitability of transposing, we collect two
866
   types of information regarding the accesses:
867
   1. profiling information used to express the hotness of an access, that
868
   is how often the matrix is accessed by this access site (count of the
869
   access site).
870
   2. which dimension in the access site is iterated by the inner
871
   most loop containing this access.
872
 
873
   The matrix will have a calculated value of weighted hotness for each
874
   dimension.
875
   Intuitively the hotness level of a dimension is a function of how
876
   many times it was the most frequently accessed dimension in the
877
   highly executed access sites of this matrix.
878
 
879
   As computed by following equation:
880
   m      n
881
   __   __
882
   \    \  dim_hot_level[i] +=
883
   /_   /_
884
   j     i
885
                 acc[j]->dim[i]->iter_by_inner_loop * count(j)
886
 
887
  Where n is the number of dims and m is the number of the matrix
888
  access sites. acc[j]->dim[i]->iter_by_inner_loop is 1 if acc[j]
889
  iterates over dim[i] in innermost loop, and is 0 otherwise.
890
 
891
  The organization of the new matrix should be according to the
892
  hotness of each dimension. The hotness of the dimension implies
893
  the locality of the elements.*/
894
static int
895
analyze_transpose (void **slot, void *data ATTRIBUTE_UNUSED)
896
{
897
  struct matrix_info *mi = (struct matrix_info *) *slot;
898
  int min_escape_l = mi->min_indirect_level_escape;
899
  struct loop *loop;
900
  affine_iv iv;
901
  struct access_site_info *acc_info;
902
  int i;
903
 
904
  if (min_escape_l < 2 || !mi->access_l)
905
    {
906
      if (mi->access_l)
907
        {
908
          for (i = 0;
909
               VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
910
               i++)
911
            free (acc_info);
912
          VEC_free (access_site_info_p, heap, mi->access_l);
913
 
914
        }
915
      return 1;
916
    }
917
  if (!mi->dim_hot_level)
918
    mi->dim_hot_level =
919
      (gcov_type *) xcalloc (min_escape_l, sizeof (gcov_type));
920
 
921
 
922
  for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
923
       i++)
924
    {
925
      if (gimple_assign_rhs_code (acc_info->stmt) == POINTER_PLUS_EXPR
926
          && acc_info->level < min_escape_l)
927
        {
928
          loop = loop_containing_stmt (acc_info->stmt);
929
          if (!loop || loop->inner)
930
            {
931
              free (acc_info);
932
              continue;
933
            }
934
          if (simple_iv (loop, loop, acc_info->offset, &iv, true))
935
            {
936
              if (iv.step != NULL)
937
                {
938
                  HOST_WIDE_INT istep;
939
 
940
                  istep = int_cst_value (iv.step);
941
                  if (istep != 0)
942
                    {
943
                      acc_info->iterated_by_inner_most_loop_p = 1;
944
                      mi->dim_hot_level[acc_info->level] +=
945
                        gimple_bb (acc_info->stmt)->count;
946
                    }
947
 
948
                }
949
            }
950
        }
951
      free (acc_info);
952
    }
953
  VEC_free (access_site_info_p, heap, mi->access_l);
954
 
955
  return 1;
956
}
957
 
958
/* Find the index which defines the OFFSET from base.
959
   We walk from use to def until we find how the offset was defined.  */
960
static tree
961
get_index_from_offset (tree offset, gimple def_stmt)
962
{
963
  tree op1, op2, index;
964
 
965
  if (gimple_code (def_stmt) == GIMPLE_PHI)
966
    return NULL;
967
  if ((gimple_assign_copy_p (def_stmt) || gimple_assign_cast_p (def_stmt))
968
      && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
969
    return get_index_from_offset (offset,
970
                                  SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt)));
971
  else if (is_gimple_assign (def_stmt)
972
           && gimple_assign_rhs_code (def_stmt) == MULT_EXPR)
973
    {
974
      op1 = gimple_assign_rhs1 (def_stmt);
975
      op2 = gimple_assign_rhs2 (def_stmt);
976
      if (TREE_CODE (op1) != INTEGER_CST && TREE_CODE (op2) != INTEGER_CST)
977
        return NULL;
978
      index = (TREE_CODE (op1) == INTEGER_CST) ? op2 : op1;
979
      return index;
980
    }
981
  else
982
    return NULL_TREE;
983
}
984
 
985
/* update MI->dimension_type_size[CURRENT_INDIRECT_LEVEL] with the size
986
   of the type related to the SSA_VAR, or the type related to the
987
   lhs of STMT, in the case that it is an INDIRECT_REF.  */
988
static void
989
update_type_size (struct matrix_info *mi, gimple stmt, tree ssa_var,
990
                  int current_indirect_level)
991
{
992
  tree lhs;
993
  HOST_WIDE_INT type_size;
994
 
995
  /* Update type according to the type of the INDIRECT_REF expr.   */
996
  if (is_gimple_assign (stmt)
997
      && TREE_CODE (gimple_assign_lhs (stmt)) == INDIRECT_REF)
998
    {
999
      lhs = gimple_assign_lhs (stmt);
1000
      gcc_assert (POINTER_TYPE_P
1001
                  (TREE_TYPE (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
1002
      type_size =
1003
        int_size_in_bytes (TREE_TYPE
1004
                           (TREE_TYPE
1005
                            (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
1006
    }
1007
  else
1008
    type_size = int_size_in_bytes (TREE_TYPE (ssa_var));
1009
 
1010
  /* Record the size of elements accessed (as a whole)
1011
     in the current indirection level (dimension).  If the size of
1012
     elements is not known at compile time, mark it as escaping.  */
1013
  if (type_size <= 0)
1014
    mark_min_matrix_escape_level (mi, current_indirect_level, stmt);
1015
  else
1016
    {
1017
      int l = current_indirect_level;
1018
 
1019
      if (!mi->dimension_type_size)
1020
        {
1021
          mi->dimension_type_size
1022
            = (HOST_WIDE_INT *) xcalloc (l + 1, sizeof (HOST_WIDE_INT));
1023
          mi->dimension_type_size_len = l + 1;
1024
        }
1025
      else if (mi->dimension_type_size_len < l + 1)
1026
        {
1027
          mi->dimension_type_size
1028
            = (HOST_WIDE_INT *) xrealloc (mi->dimension_type_size,
1029
                                          (l + 1) * sizeof (HOST_WIDE_INT));
1030
          memset (&mi->dimension_type_size[mi->dimension_type_size_len],
1031
                  0, (l + 1 - mi->dimension_type_size_len)
1032
                  * sizeof (HOST_WIDE_INT));
1033
          mi->dimension_type_size_len = l + 1;
1034
        }
1035
      /* Make sure all the accesses in the same level have the same size
1036
         of the type.  */
1037
      if (!mi->dimension_type_size[l])
1038
        mi->dimension_type_size[l] = type_size;
1039
      else if (mi->dimension_type_size[l] != type_size)
1040
        mark_min_matrix_escape_level (mi, l, stmt);
1041
    }
1042
}
1043
 
1044
/* USE_STMT represents a GIMPLE_CALL, where one of the arguments is the
1045
   ssa var that we want to check because it came from some use of matrix
1046
   MI.  CURRENT_INDIRECT_LEVEL is the indirection level we reached so
1047
   far.  */
1048
 
1049
static int
1050
analyze_accesses_for_call_stmt (struct matrix_info *mi, tree ssa_var,
1051
                                gimple use_stmt, int current_indirect_level)
1052
{
1053
  tree fndecl = gimple_call_fndecl (use_stmt);
1054
 
1055
  if (gimple_call_lhs (use_stmt))
1056
    {
1057
      tree lhs = gimple_call_lhs (use_stmt);
1058
      struct ssa_acc_in_tree lhs_acc, rhs_acc;
1059
 
1060
      memset (&lhs_acc, 0, sizeof (lhs_acc));
1061
      memset (&rhs_acc, 0, sizeof (rhs_acc));
1062
 
1063
      lhs_acc.ssa_var = ssa_var;
1064
      lhs_acc.t_code = ERROR_MARK;
1065
      ssa_accessed_in_tree (lhs, &lhs_acc);
1066
      rhs_acc.ssa_var = ssa_var;
1067
      rhs_acc.t_code = ERROR_MARK;
1068
      ssa_accessed_in_call_rhs (use_stmt, &rhs_acc);
1069
 
1070
      /* The SSA must be either in the left side or in the right side,
1071
         to understand what is happening.
1072
         In case the SSA_NAME is found in both sides we should be escaping
1073
         at this level because in this case we cannot calculate the
1074
         address correctly.  */
1075
      if ((lhs_acc.var_found && rhs_acc.var_found
1076
           && lhs_acc.t_code == INDIRECT_REF)
1077
          || (!rhs_acc.var_found && !lhs_acc.var_found))
1078
        {
1079
          mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1080
          return current_indirect_level;
1081
        }
1082
      gcc_assert (!rhs_acc.var_found || !lhs_acc.var_found);
1083
 
1084
      /* If we are storing to the matrix at some level, then mark it as
1085
         escaping at that level.  */
1086
      if (lhs_acc.var_found)
1087
        {
1088
          int l = current_indirect_level + 1;
1089
 
1090
          gcc_assert (lhs_acc.t_code == INDIRECT_REF);
1091
          mark_min_matrix_escape_level (mi, l, use_stmt);
1092
          return current_indirect_level;
1093
        }
1094
    }
1095
 
1096
  if (fndecl)
1097
    {
1098
      if (DECL_FUNCTION_CODE (fndecl) != BUILT_IN_FREE)
1099
        {
1100
          if (dump_file)
1101
            fprintf (dump_file,
1102
                     "Matrix %s: Function call %s, level %d escapes.\n",
1103
                     get_name (mi->decl), get_name (fndecl),
1104
                     current_indirect_level);
1105
          mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1106
        }
1107
      else if (mi->free_stmts[current_indirect_level].stmt != NULL
1108
               && mi->free_stmts[current_indirect_level].stmt != use_stmt)
1109
        mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1110
      else
1111
        {
1112
          /*Record the free statements so we can delete them
1113
             later. */
1114
          int l = current_indirect_level;
1115
 
1116
          mi->free_stmts[l].stmt = use_stmt;
1117
          mi->free_stmts[l].func = current_function_decl;
1118
        }
1119
    }
1120
  return current_indirect_level;
1121
}
1122
 
1123
/* USE_STMT represents a phi node of the ssa var that we want to
1124
   check  because it came from some use of matrix
1125
   MI.
1126
   We check all the escaping levels that get to the PHI node
1127
   and make sure they are all the same escaping;
1128
   if not (which is rare) we let the escaping level be the
1129
   minimum level that gets into that PHI because starting from
1130
   that level we cannot expect the behavior of the indirections.
1131
   CURRENT_INDIRECT_LEVEL is the indirection level we reached so far.  */
1132
 
1133
static void
1134
analyze_accesses_for_phi_node (struct matrix_info *mi, gimple use_stmt,
1135
                               int current_indirect_level, sbitmap visited,
1136
                               bool record_accesses)
1137
{
1138
 
1139
  struct matrix_access_phi_node tmp_maphi, *maphi, **pmaphi;
1140
 
1141
  tmp_maphi.phi = use_stmt;
1142
  if ((maphi = (struct matrix_access_phi_node *)
1143
       htab_find (htab_mat_acc_phi_nodes, &tmp_maphi)))
1144
    {
1145
      if (maphi->indirection_level == current_indirect_level)
1146
        return;
1147
      else
1148
        {
1149
          int level = MIN (maphi->indirection_level,
1150
                           current_indirect_level);
1151
          size_t j;
1152
          gimple stmt = NULL;
1153
 
1154
          maphi->indirection_level = level;
1155
          for (j = 0; j < gimple_phi_num_args (use_stmt); j++)
1156
            {
1157
              tree def = PHI_ARG_DEF (use_stmt, j);
1158
 
1159
              if (gimple_code (SSA_NAME_DEF_STMT (def)) != GIMPLE_PHI)
1160
                stmt = SSA_NAME_DEF_STMT (def);
1161
            }
1162
          mark_min_matrix_escape_level (mi, level, stmt);
1163
        }
1164
      return;
1165
    }
1166
  maphi = (struct matrix_access_phi_node *)
1167
    xcalloc (1, sizeof (struct matrix_access_phi_node));
1168
  maphi->phi = use_stmt;
1169
  maphi->indirection_level = current_indirect_level;
1170
 
1171
  /* Insert to hash table.  */
1172
  pmaphi = (struct matrix_access_phi_node **)
1173
    htab_find_slot (htab_mat_acc_phi_nodes, maphi, INSERT);
1174
  gcc_assert (pmaphi);
1175
  *pmaphi = maphi;
1176
 
1177
  if (!TEST_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt))))
1178
    {
1179
      SET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1180
      analyze_matrix_accesses (mi, PHI_RESULT (use_stmt),
1181
                               current_indirect_level, false, visited,
1182
                               record_accesses);
1183
      RESET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1184
    }
1185
}
1186
 
1187
/* USE_STMT represents an assign statement (the rhs or lhs include
1188
   the ssa var that we want to check  because it came from some use of matrix
1189
   MI.  CURRENT_INDIRECT_LEVEL is the indirection level we reached so far.  */
1190
 
1191
static int
1192
analyze_accesses_for_assign_stmt (struct matrix_info *mi, tree ssa_var,
1193
                                  gimple use_stmt, int current_indirect_level,
1194
                                  bool last_op, sbitmap visited,
1195
                                  bool record_accesses)
1196
{
1197
  tree lhs = gimple_get_lhs (use_stmt);
1198
  struct ssa_acc_in_tree lhs_acc, rhs_acc;
1199
 
1200
  memset (&lhs_acc, 0, sizeof (lhs_acc));
1201
  memset (&rhs_acc, 0, sizeof (rhs_acc));
1202
 
1203
  lhs_acc.ssa_var = ssa_var;
1204
  lhs_acc.t_code = ERROR_MARK;
1205
  ssa_accessed_in_tree (lhs, &lhs_acc);
1206
  rhs_acc.ssa_var = ssa_var;
1207
  rhs_acc.t_code = ERROR_MARK;
1208
  ssa_accessed_in_assign_rhs (use_stmt, &rhs_acc);
1209
 
1210
  /* The SSA must be either in the left side or in the right side,
1211
     to understand what is happening.
1212
     In case the SSA_NAME is found in both sides we should be escaping
1213
     at this level because in this case we cannot calculate the
1214
     address correctly.  */
1215
  if ((lhs_acc.var_found && rhs_acc.var_found
1216
       && lhs_acc.t_code == INDIRECT_REF)
1217
      || (!rhs_acc.var_found && !lhs_acc.var_found))
1218
    {
1219
      mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1220
      return current_indirect_level;
1221
    }
1222
  gcc_assert (!rhs_acc.var_found || !lhs_acc.var_found);
1223
 
1224
  /* If we are storing to the matrix at some level, then mark it as
1225
     escaping at that level.  */
1226
  if (lhs_acc.var_found)
1227
    {
1228
      int l = current_indirect_level + 1;
1229
 
1230
      gcc_assert (lhs_acc.t_code == INDIRECT_REF);
1231
 
1232
      if (!(gimple_assign_copy_p (use_stmt)
1233
            || gimple_assign_cast_p (use_stmt))
1234
          || (TREE_CODE (gimple_assign_rhs1 (use_stmt)) != SSA_NAME))
1235
        mark_min_matrix_escape_level (mi, l, use_stmt);
1236
      else
1237
        {
1238
          gimple def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (use_stmt));
1239
          analyze_matrix_allocation_site (mi, def_stmt, l, visited);
1240
          if (record_accesses)
1241
            record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1242
                                           NULL_TREE, l, true);
1243
          update_type_size (mi, use_stmt, NULL, l);
1244
        }
1245
      return current_indirect_level;
1246
    }
1247
  /* Now, check the right-hand-side, to see how the SSA variable
1248
     is used.  */
1249
  if (rhs_acc.var_found)
1250
    {
1251
      if (rhs_acc.t_code != INDIRECT_REF
1252
          && rhs_acc.t_code != POINTER_PLUS_EXPR && rhs_acc.t_code != SSA_NAME)
1253
        {
1254
          mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1255
          return current_indirect_level;
1256
        }
1257
      /* If the access in the RHS has an indirection increase the
1258
         indirection level.  */
1259
      if (rhs_acc.t_code == INDIRECT_REF)
1260
        {
1261
          if (record_accesses)
1262
            record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1263
                                           NULL_TREE,
1264
                                           current_indirect_level, true);
1265
          current_indirect_level += 1;
1266
        }
1267
      else if (rhs_acc.t_code == POINTER_PLUS_EXPR)
1268
        {
1269
          gcc_assert (rhs_acc.second_op);
1270
          if (last_op)
1271
            /* Currently we support only one PLUS expression on the
1272
               SSA_NAME that holds the base address of the current
1273
               indirection level; to support more general case there
1274
               is a need to hold a stack of expressions and regenerate
1275
               the calculation later.  */
1276
            mark_min_matrix_escape_level (mi, current_indirect_level,
1277
                                          use_stmt);
1278
          else
1279
            {
1280
              tree index;
1281
              tree op1, op2;
1282
 
1283
              op1 = gimple_assign_rhs1 (use_stmt);
1284
              op2 = gimple_assign_rhs2 (use_stmt);
1285
 
1286
              op2 = (op1 == ssa_var) ? op2 : op1;
1287
              if (TREE_CODE (op2) == INTEGER_CST)
1288
                index =
1289
                  build_int_cst (TREE_TYPE (op1),
1290
                                 TREE_INT_CST_LOW (op2) /
1291
                                 int_size_in_bytes (TREE_TYPE (op1)));
1292
              else
1293
                {
1294
                  index =
1295
                    get_index_from_offset (op2, SSA_NAME_DEF_STMT (op2));
1296
                  if (index == NULL_TREE)
1297
                    {
1298
                      mark_min_matrix_escape_level (mi,
1299
                                                    current_indirect_level,
1300
                                                    use_stmt);
1301
                      return current_indirect_level;
1302
                    }
1303
                }
1304
              if (record_accesses)
1305
                record_access_alloc_site_info (mi, use_stmt, op2,
1306
                                               index,
1307
                                               current_indirect_level, false);
1308
            }
1309
        }
1310
      /* If we are storing this level of indirection mark it as
1311
         escaping.  */
1312
      if (lhs_acc.t_code == INDIRECT_REF || TREE_CODE (lhs) != SSA_NAME)
1313
        {
1314
          int l = current_indirect_level;
1315
 
1316
          /* One exception is when we are storing to the matrix
1317
             variable itself; this is the case of malloc, we must make
1318
             sure that it's the one and only one call to malloc so
1319
             we call analyze_matrix_allocation_site to check
1320
             this out.  */
1321
          if (TREE_CODE (lhs) != VAR_DECL || lhs != mi->decl)
1322
            mark_min_matrix_escape_level (mi, current_indirect_level,
1323
                                          use_stmt);
1324
          else
1325
            {
1326
              /* Also update the escaping level.  */
1327
              analyze_matrix_allocation_site (mi, use_stmt, l, visited);
1328
              if (record_accesses)
1329
                record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1330
                                               NULL_TREE, l, true);
1331
            }
1332
        }
1333
      else
1334
        {
1335
          /* We are placing it in an SSA, follow that SSA.  */
1336
          analyze_matrix_accesses (mi, lhs,
1337
                                   current_indirect_level,
1338
                                   rhs_acc.t_code == POINTER_PLUS_EXPR,
1339
                                   visited, record_accesses);
1340
        }
1341
    }
1342
  return current_indirect_level;
1343
}
1344
 
1345
/* Given a SSA_VAR (coming from a use statement of the matrix MI),
1346
   follow its uses and level of indirection and find out the minimum
1347
   indirection level it escapes in (the highest dimension) and the maximum
1348
   level it is accessed in (this will be the actual dimension of the
1349
   matrix).  The information is accumulated in MI.
1350
   We look at the immediate uses, if one escapes we finish; if not,
1351
   we make a recursive call for each one of the immediate uses of the
1352
   resulting SSA name.  */
1353
static void
1354
analyze_matrix_accesses (struct matrix_info *mi, tree ssa_var,
1355
                         int current_indirect_level, bool last_op,
1356
                         sbitmap visited, bool record_accesses)
1357
{
1358
  imm_use_iterator imm_iter;
1359
  use_operand_p use_p;
1360
 
1361
  update_type_size (mi, SSA_NAME_DEF_STMT (ssa_var), ssa_var,
1362
                    current_indirect_level);
1363
 
1364
  /* We don't go beyond the escaping level when we are performing the
1365
     flattening.  NOTE: we keep the last indirection level that doesn't
1366
     escape.  */
1367
  if (mi->min_indirect_level_escape > -1
1368
      && mi->min_indirect_level_escape <= current_indirect_level)
1369
    return;
1370
 
1371
/* Now go over the uses of the SSA_NAME and check how it is used in
1372
   each one of them.  We are mainly looking for the pattern INDIRECT_REF,
1373
   then a POINTER_PLUS_EXPR, then INDIRECT_REF etc.  while in between there could
1374
   be any number of copies and casts.  */
1375
  gcc_assert (TREE_CODE (ssa_var) == SSA_NAME);
1376
 
1377
  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_var)
1378
  {
1379
    gimple use_stmt = USE_STMT (use_p);
1380
    if (gimple_code (use_stmt) == GIMPLE_PHI)
1381
      /* We check all the escaping levels that get to the PHI node
1382
         and make sure they are all the same escaping;
1383
         if not (which is rare) we let the escaping level be the
1384
         minimum level that gets into that PHI because starting from
1385
         that level we cannot expect the behavior of the indirections.  */
1386
 
1387
      analyze_accesses_for_phi_node (mi, use_stmt, current_indirect_level,
1388
                                     visited, record_accesses);
1389
 
1390
    else if (is_gimple_call (use_stmt))
1391
      analyze_accesses_for_call_stmt (mi, ssa_var, use_stmt,
1392
                                      current_indirect_level);
1393
    else if (is_gimple_assign (use_stmt))
1394
      current_indirect_level =
1395
        analyze_accesses_for_assign_stmt (mi, ssa_var, use_stmt,
1396
                                          current_indirect_level, last_op,
1397
                                          visited, record_accesses);
1398
  }
1399
}
1400
 
1401
typedef struct
1402
{
1403
  tree fn;
1404
  gimple stmt;
1405
} check_var_data;
1406
 
1407
/* A walk_tree function to go over the VAR_DECL, PARM_DECL nodes of
1408
   the malloc size expression and check that those aren't changed
1409
   over the function.  */
1410
static tree
1411
check_var_notmodified_p (tree * tp, int *walk_subtrees, void *data)
1412
{
1413
  basic_block bb;
1414
  tree t = *tp;
1415
  check_var_data *callback_data = (check_var_data*) data;
1416
  tree fn = callback_data->fn;
1417
  gimple_stmt_iterator gsi;
1418
  gimple stmt;
1419
 
1420
  if (TREE_CODE (t) != VAR_DECL && TREE_CODE (t) != PARM_DECL)
1421
    return NULL_TREE;
1422
 
1423
  FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (fn))
1424
  {
1425
    for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1426
      {
1427
        stmt = gsi_stmt (gsi);
1428
        if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
1429
          continue;
1430
        if (gimple_get_lhs (stmt) == t)
1431
          {
1432
            callback_data->stmt = stmt;
1433
            return t;
1434
          }
1435
      }
1436
  }
1437
  *walk_subtrees = 1;
1438
  return NULL_TREE;
1439
}
1440
 
1441
/* Go backwards in the use-def chains and find out the expression
1442
   represented by the possible SSA name in STMT, until it is composed
1443
   of only VAR_DECL, PARM_DECL and INT_CST.  In case of phi nodes
1444
   we make sure that all the arguments represent the same subexpression,
1445
   otherwise we fail.  */
1446
 
1447
static tree
1448
can_calculate_stmt_before_stmt (gimple stmt, sbitmap visited)
1449
{
1450
  tree op1, op2, res;
1451
  enum tree_code code;
1452
 
1453
  switch (gimple_code (stmt))
1454
    {
1455
    case GIMPLE_ASSIGN:
1456
      code = gimple_assign_rhs_code (stmt);
1457
      op1 = gimple_assign_rhs1 (stmt);
1458
 
1459
      switch (code)
1460
        {
1461
        case POINTER_PLUS_EXPR:
1462
        case PLUS_EXPR:
1463
        case MINUS_EXPR:
1464
        case MULT_EXPR:
1465
 
1466
          op2 = gimple_assign_rhs2 (stmt);
1467
          op1 = can_calculate_expr_before_stmt (op1, visited);
1468
          if (!op1)
1469
            return NULL_TREE;
1470
          op2 = can_calculate_expr_before_stmt (op2, visited);
1471
          if (op2)
1472
            return fold_build2 (code, gimple_expr_type (stmt), op1, op2);
1473
          return NULL_TREE;
1474
 
1475
        CASE_CONVERT:
1476
          res = can_calculate_expr_before_stmt (op1, visited);
1477
          if (res != NULL_TREE)
1478
            return build1 (code, gimple_expr_type (stmt), res);
1479
          else
1480
            return NULL_TREE;
1481
 
1482
        default:
1483
          if (gimple_assign_single_p (stmt))
1484
            return can_calculate_expr_before_stmt (op1, visited);
1485
          else
1486
            return NULL_TREE;
1487
        }
1488
 
1489
    case GIMPLE_PHI:
1490
      {
1491
        size_t j;
1492
 
1493
        res = NULL_TREE;
1494
        /* Make sure all the arguments represent the same value.  */
1495
        for (j = 0; j < gimple_phi_num_args (stmt); j++)
1496
          {
1497
            tree new_res;
1498
            tree def = PHI_ARG_DEF (stmt, j);
1499
 
1500
            new_res = can_calculate_expr_before_stmt (def, visited);
1501
            if (res == NULL_TREE)
1502
              res = new_res;
1503
            else if (!new_res || !expressions_equal_p (res, new_res))
1504
              return NULL_TREE;
1505
          }
1506
        return res;
1507
      }
1508
 
1509
    default:
1510
      return NULL_TREE;
1511
    }
1512
}
1513
 
1514
/* Go backwards in the use-def chains and find out the expression
1515
   represented by the possible SSA name in EXPR, until it is composed
1516
   of only VAR_DECL, PARM_DECL and INT_CST.  In case of phi nodes
1517
   we make sure that all the arguments represent the same subexpression,
1518
   otherwise we fail.  */
1519
static tree
1520
can_calculate_expr_before_stmt (tree expr, sbitmap visited)
1521
{
1522
  gimple def_stmt;
1523
  tree res;
1524
 
1525
  switch (TREE_CODE (expr))
1526
    {
1527
    case SSA_NAME:
1528
      /* Case of loop, we don't know to represent this expression.  */
1529
      if (TEST_BIT (visited, SSA_NAME_VERSION (expr)))
1530
        return NULL_TREE;
1531
 
1532
      SET_BIT (visited, SSA_NAME_VERSION (expr));
1533
      def_stmt = SSA_NAME_DEF_STMT (expr);
1534
      res = can_calculate_stmt_before_stmt (def_stmt, visited);
1535
      RESET_BIT (visited, SSA_NAME_VERSION (expr));
1536
      return res;
1537
    case VAR_DECL:
1538
    case PARM_DECL:
1539
    case INTEGER_CST:
1540
      return expr;
1541
 
1542
    default:
1543
      return NULL_TREE;
1544
    }
1545
}
1546
 
1547
/* There should be only one allocation function for the dimensions
1548
   that don't escape. Here we check the allocation sites in this
1549
   function. We must make sure that all the dimensions are allocated
1550
   using malloc and that the malloc size parameter expression could be
1551
   pre-calculated before the call to the malloc of dimension 0.
1552
 
1553
   Given a candidate matrix for flattening -- MI -- check if it's
1554
   appropriate for flattening -- we analyze the allocation
1555
   sites that we recorded in the previous analysis.  The result of the
1556
   analysis is a level of indirection (matrix dimension) in which the
1557
   flattening is safe.  We check the following conditions:
1558
   1. There is only one allocation site for each dimension.
1559
   2. The allocation sites of all the dimensions are in the same
1560
      function.
1561
      (The above two are being taken care of during the analysis when
1562
      we check the allocation site).
1563
   3. All the dimensions that we flatten are allocated at once; thus
1564
      the total size must be known before the allocation of the
1565
      dimension 0 (top level) -- we must make sure we represent the
1566
      size of the allocation as an expression of global parameters or
1567
      constants and that those doesn't change over the function.  */
1568
 
1569
static int
1570
check_allocation_function (void **slot, void *data ATTRIBUTE_UNUSED)
1571
{
1572
  int level;
1573
  struct matrix_info *mi = (struct matrix_info *) *slot;
1574
  sbitmap visited;
1575
 
1576
  if (!mi->malloc_for_level)
1577
    return 1;
1578
 
1579
  visited = sbitmap_alloc (num_ssa_names);
1580
 
1581
  /* Do nothing if the current function is not the allocation
1582
     function of MI.  */
1583
  if (mi->allocation_function_decl != current_function_decl
1584
      /* We aren't in the main allocation function yet.  */
1585
      || !mi->malloc_for_level[0])
1586
    return 1;
1587
 
1588
  for (level = 1; level < mi->max_malloced_level; level++)
1589
    if (!mi->malloc_for_level[level])
1590
      break;
1591
 
1592
  mark_min_matrix_escape_level (mi, level, NULL);
1593
 
1594
  /* Check if the expression of the size passed to malloc could be
1595
     pre-calculated before the malloc of level 0.  */
1596
  for (level = 1; level < mi->min_indirect_level_escape; level++)
1597
    {
1598
      gimple call_stmt;
1599
      tree size;
1600
      struct malloc_call_data mcd = {NULL, NULL_TREE, NULL_TREE};
1601
 
1602
      call_stmt = mi->malloc_for_level[level];
1603
 
1604
      /* Find the correct malloc information.  */
1605
      collect_data_for_malloc_call (call_stmt, &mcd);
1606
 
1607
      /* No need to check anticipation for constants.  */
1608
      if (TREE_CODE (mcd.size_var) == INTEGER_CST)
1609
        {
1610
          if (!mi->dimension_size)
1611
            {
1612
              mi->dimension_size =
1613
                (tree *) xcalloc (mi->min_indirect_level_escape,
1614
                                  sizeof (tree));
1615
              mi->dimension_size_orig =
1616
                (tree *) xcalloc (mi->min_indirect_level_escape,
1617
                                  sizeof (tree));
1618
            }
1619
          mi->dimension_size[level] = mcd.size_var;
1620
          mi->dimension_size_orig[level] = mcd.size_var;
1621
          continue;
1622
        }
1623
      /* ??? Here we should also add the way to calculate the size
1624
         expression not only know that it is anticipated.  */
1625
      sbitmap_zero (visited);
1626
      size = can_calculate_expr_before_stmt (mcd.size_var, visited);
1627
      if (size == NULL_TREE)
1628
        {
1629
          mark_min_matrix_escape_level (mi, level, call_stmt);
1630
          if (dump_file)
1631
            fprintf (dump_file,
1632
                     "Matrix %s: Cannot calculate the size of allocation, escaping at level %d\n",
1633
                     get_name (mi->decl), level);
1634
          break;
1635
        }
1636
      if (!mi->dimension_size)
1637
        {
1638
          mi->dimension_size =
1639
            (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1640
          mi->dimension_size_orig =
1641
            (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1642
        }
1643
      mi->dimension_size[level] = size;
1644
      mi->dimension_size_orig[level] = size;
1645
    }
1646
 
1647
  /* We don't need those anymore.  */
1648
  for (level = mi->min_indirect_level_escape;
1649
       level < mi->max_malloced_level; level++)
1650
    mi->malloc_for_level[level] = NULL;
1651
  return 1;
1652
}
1653
 
1654
/* Track all access and allocation sites.  */
1655
static void
1656
find_sites_in_func (bool record)
1657
{
1658
  sbitmap visited_stmts_1;
1659
 
1660
  gimple_stmt_iterator gsi;
1661
  gimple stmt;
1662
  basic_block bb;
1663
  struct matrix_info tmpmi, *mi;
1664
 
1665
  visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1666
 
1667
  FOR_EACH_BB (bb)
1668
  {
1669
    for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1670
      {
1671
        tree lhs;
1672
 
1673
        stmt = gsi_stmt (gsi);
1674
        lhs = gimple_get_lhs (stmt);
1675
        if (lhs != NULL_TREE
1676
            && TREE_CODE (lhs) == VAR_DECL)
1677
          {
1678
            tmpmi.decl = lhs;
1679
            if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg,
1680
                                                        &tmpmi)))
1681
              {
1682
                sbitmap_zero (visited_stmts_1);
1683
                analyze_matrix_allocation_site (mi, stmt, 0, visited_stmts_1);
1684
              }
1685
          }
1686
        if (is_gimple_assign (stmt)
1687
            && gimple_assign_single_p (stmt)
1688
            && TREE_CODE (lhs) == SSA_NAME
1689
            && TREE_CODE (gimple_assign_rhs1 (stmt)) == VAR_DECL)
1690
          {
1691
            tmpmi.decl = gimple_assign_rhs1 (stmt);
1692
            if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg,
1693
                                                        &tmpmi)))
1694
              {
1695
                sbitmap_zero (visited_stmts_1);
1696
                analyze_matrix_accesses (mi, lhs, 0,
1697
                                         false, visited_stmts_1, record);
1698
              }
1699
          }
1700
      }
1701
  }
1702
  sbitmap_free (visited_stmts_1);
1703
}
1704
 
1705
/* Traverse the use-def chains to see if there are matrices that
1706
   are passed through pointers and we cannot know how they are accessed.
1707
   For each SSA-name defined by a global variable of our interest,
1708
   we traverse the use-def chains of the SSA and follow the indirections,
1709
   and record in what level of indirection the use of the variable
1710
   escapes.  A use of a pointer escapes when it is passed to a function,
1711
   stored into memory or assigned (except in malloc and free calls).  */
1712
 
1713
static void
1714
record_all_accesses_in_func (void)
1715
{
1716
  unsigned i;
1717
  sbitmap visited_stmts_1;
1718
 
1719
  visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1720
 
1721
  for (i = 0; i < num_ssa_names; i++)
1722
    {
1723
      struct matrix_info tmpmi, *mi;
1724
      tree ssa_var = ssa_name (i);
1725
      tree rhs, lhs;
1726
 
1727
      if (!ssa_var
1728
          || !is_gimple_assign (SSA_NAME_DEF_STMT (ssa_var))
1729
          || !gimple_assign_single_p (SSA_NAME_DEF_STMT (ssa_var)))
1730
        continue;
1731
      rhs = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (ssa_var));
1732
      lhs = gimple_assign_lhs (SSA_NAME_DEF_STMT (ssa_var));
1733
      if (TREE_CODE (rhs) != VAR_DECL && TREE_CODE (lhs) != VAR_DECL)
1734
        continue;
1735
 
1736
      /* If the RHS is a matrix that we want to analyze, follow the def-use
1737
         chain for this SSA_VAR and check for escapes or apply the
1738
         flattening.  */
1739
      tmpmi.decl = rhs;
1740
      if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg, &tmpmi)))
1741
        {
1742
          /* This variable will track the visited PHI nodes, so we can limit
1743
             its size to the maximum number of SSA names.  */
1744
          sbitmap_zero (visited_stmts_1);
1745
          analyze_matrix_accesses (mi, ssa_var,
1746
                                   0, false, visited_stmts_1, true);
1747
 
1748
        }
1749
    }
1750
  sbitmap_free (visited_stmts_1);
1751
}
1752
 
1753
/* Used when we want to convert the expression: RESULT = something *
1754
   ORIG to RESULT = something * NEW_VAL. If ORIG and NEW_VAL are power
1755
   of 2, shift operations can be done, else division and
1756
   multiplication.  */
1757
 
1758
static tree
1759
compute_offset (HOST_WIDE_INT orig, HOST_WIDE_INT new_val, tree result)
1760
{
1761
 
1762
  int x, y;
1763
  tree result1, ratio, log, orig_tree, new_tree;
1764
 
1765
  x = exact_log2 (orig);
1766
  y = exact_log2 (new_val);
1767
 
1768
  if (x != -1 && y != -1)
1769
    {
1770
      if (x == y)
1771
        return result;
1772
      else if (x > y)
1773
        {
1774
          log = build_int_cst (TREE_TYPE (result), x - y);
1775
          result1 =
1776
            fold_build2 (LSHIFT_EXPR, TREE_TYPE (result), result, log);
1777
          return result1;
1778
        }
1779
      log = build_int_cst (TREE_TYPE (result), y - x);
1780
      result1 = fold_build2 (RSHIFT_EXPR, TREE_TYPE (result), result, log);
1781
 
1782
      return result1;
1783
    }
1784
  orig_tree = build_int_cst (TREE_TYPE (result), orig);
1785
  new_tree = build_int_cst (TREE_TYPE (result), new_val);
1786
  ratio = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (result), result, orig_tree);
1787
  result1 = fold_build2 (MULT_EXPR, TREE_TYPE (result), ratio, new_tree);
1788
 
1789
  return result1;
1790
}
1791
 
1792
 
1793
/* We know that we are allowed to perform matrix flattening (according to the
1794
   escape analysis), so we traverse the use-def chains of the SSA vars
1795
   defined by the global variables pointing to the matrices of our interest.
1796
   in each use of the SSA we calculate the offset from the base address
1797
   according to the following equation:
1798
 
1799
     a[I1][I2]...[Ik] , where D1..Dk is the length of each dimension and the
1800
     escaping level is m <= k, and a' is the new allocated matrix,
1801
     will be translated to :
1802
 
1803
       b[I(m+1)]...[Ik]
1804
 
1805
       where
1806
       b = a' + I1*D2...*Dm + I2*D3...Dm + ... + Im
1807
                                                      */
1808
 
1809
static int
1810
transform_access_sites (void **slot, void *data ATTRIBUTE_UNUSED)
1811
{
1812
  gimple_stmt_iterator gsi;
1813
  struct matrix_info *mi = (struct matrix_info *) *slot;
1814
  int min_escape_l = mi->min_indirect_level_escape;
1815
  struct access_site_info *acc_info;
1816
  enum tree_code code;
1817
  int i;
1818
 
1819
  if (min_escape_l < 2 || !mi->access_l)
1820
    return 1;
1821
  for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
1822
       i++)
1823
    {
1824
      /* This is possible because we collect the access sites before
1825
         we determine the final minimum indirection level.  */
1826
      if (acc_info->level >= min_escape_l)
1827
        {
1828
          free (acc_info);
1829
          continue;
1830
        }
1831
      if (acc_info->is_alloc)
1832
        {
1833
          if (acc_info->level >= 0 && gimple_bb (acc_info->stmt))
1834
            {
1835
              ssa_op_iter iter;
1836
              tree def;
1837
              gimple stmt = acc_info->stmt;
1838
              tree lhs;
1839
 
1840
              FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_DEF)
1841
                mark_sym_for_renaming (SSA_NAME_VAR (def));
1842
              gsi = gsi_for_stmt (stmt);
1843
              gcc_assert (is_gimple_assign (acc_info->stmt));
1844
              lhs = gimple_assign_lhs (acc_info->stmt);
1845
              if (TREE_CODE (lhs) == SSA_NAME
1846
                  && acc_info->level < min_escape_l - 1)
1847
                {
1848
                  imm_use_iterator imm_iter;
1849
                  use_operand_p use_p;
1850
                  gimple use_stmt;
1851
 
1852
                  FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
1853
                    FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1854
                  {
1855
                    tree rhs, tmp;
1856
                    gimple new_stmt;
1857
 
1858
                    gcc_assert (gimple_assign_rhs_code (acc_info->stmt)
1859
                                == INDIRECT_REF);
1860
                    /* Emit convert statement to convert to type of use.  */
1861
                    tmp = create_tmp_var (TREE_TYPE (lhs), "new");
1862
                    add_referenced_var (tmp);
1863
                    rhs = gimple_assign_rhs1 (acc_info->stmt);
1864
                    rhs = fold_convert (TREE_TYPE (tmp),
1865
                                        TREE_OPERAND (rhs, 0));
1866
                    new_stmt = gimple_build_assign (tmp, rhs);
1867
                    tmp = make_ssa_name (tmp, new_stmt);
1868
                    gimple_assign_set_lhs (new_stmt, tmp);
1869
                    gsi = gsi_for_stmt (acc_info->stmt);
1870
                    gsi_insert_after (&gsi, new_stmt, GSI_SAME_STMT);
1871
                    SET_USE (use_p, tmp);
1872
                  }
1873
                }
1874
              if (acc_info->level < min_escape_l - 1)
1875
                gsi_remove (&gsi, true);
1876
            }
1877
          free (acc_info);
1878
          continue;
1879
        }
1880
      code = gimple_assign_rhs_code (acc_info->stmt);
1881
      if (code == INDIRECT_REF
1882
          && acc_info->level < min_escape_l - 1)
1883
        {
1884
          /* Replace the INDIRECT_REF with NOP (cast) usually we are casting
1885
             from "pointer to type" to "type".  */
1886
          tree t =
1887
            build1 (NOP_EXPR, TREE_TYPE (gimple_assign_rhs1 (acc_info->stmt)),
1888
                    TREE_OPERAND (gimple_assign_rhs1 (acc_info->stmt), 0));
1889
          gimple_assign_set_rhs_code (acc_info->stmt, NOP_EXPR);
1890
          gimple_assign_set_rhs1 (acc_info->stmt, t);
1891
        }
1892
      else if (code == POINTER_PLUS_EXPR
1893
               && acc_info->level < (min_escape_l))
1894
        {
1895
          imm_use_iterator imm_iter;
1896
          use_operand_p use_p;
1897
 
1898
          tree offset;
1899
          int k = acc_info->level;
1900
          tree num_elements, total_elements;
1901
          tree tmp1;
1902
          tree d_size = mi->dimension_size[k];
1903
 
1904
          /* We already make sure in the analysis that the first operand
1905
             is the base and the second is the offset.  */
1906
          offset = acc_info->offset;
1907
          if (mi->dim_map[k] == min_escape_l - 1)
1908
            {
1909
              if (!check_transpose_p || mi->is_transposed_p == false)
1910
                tmp1 = offset;
1911
              else
1912
                {
1913
                  tree new_offset;
1914
 
1915
                  new_offset =
1916
                    compute_offset (mi->dimension_type_size[min_escape_l],
1917
                                    mi->dimension_type_size[k + 1], offset);
1918
 
1919
                  total_elements = new_offset;
1920
                  if (new_offset != offset)
1921
                    {
1922
                      gsi = gsi_for_stmt (acc_info->stmt);
1923
                      tmp1 = force_gimple_operand_gsi (&gsi, total_elements,
1924
                                                       true, NULL,
1925
                                                       true, GSI_SAME_STMT);
1926
                    }
1927
                  else
1928
                    tmp1 = offset;
1929
                }
1930
            }
1931
          else
1932
            {
1933
              d_size = mi->dimension_size[mi->dim_map[k] + 1];
1934
              num_elements =
1935
                fold_build2 (MULT_EXPR, sizetype, fold_convert (sizetype, acc_info->index),
1936
                            fold_convert (sizetype, d_size));
1937
              add_referenced_var (d_size);
1938
              gsi = gsi_for_stmt (acc_info->stmt);
1939
              tmp1 = force_gimple_operand_gsi (&gsi, num_elements, true,
1940
                                               NULL, true, GSI_SAME_STMT);
1941
            }
1942
          /* Replace the offset if needed.  */
1943
          if (tmp1 != offset)
1944
            {
1945
              if (TREE_CODE (offset) == SSA_NAME)
1946
                {
1947
                  gimple use_stmt;
1948
 
1949
                  FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, offset)
1950
                    FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1951
                      if (use_stmt == acc_info->stmt)
1952
                        SET_USE (use_p, tmp1);
1953
                }
1954
              else
1955
                {
1956
                  gcc_assert (TREE_CODE (offset) == INTEGER_CST);
1957
                  gimple_assign_set_rhs2 (acc_info->stmt, tmp1);
1958
                  update_stmt (acc_info->stmt);
1959
                }
1960
            }
1961
        }
1962
      /* ??? meanwhile this happens because we record the same access
1963
         site more than once; we should be using a hash table to
1964
         avoid this and insert the STMT of the access site only
1965
         once.
1966
         else
1967
         gcc_unreachable (); */
1968
      free (acc_info);
1969
    }
1970
  VEC_free (access_site_info_p, heap, mi->access_l);
1971
 
1972
  update_ssa (TODO_update_ssa);
1973
#ifdef ENABLE_CHECKING
1974
  verify_ssa (true);
1975
#endif
1976
  return 1;
1977
}
1978
 
1979
/* Sort A array of counts. Arrange DIM_MAP to reflect the new order.  */
1980
 
1981
static void
1982
sort_dim_hot_level (gcov_type * a, int *dim_map, int n)
1983
{
1984
  int i, j, tmp1;
1985
  gcov_type tmp;
1986
 
1987
  for (i = 0; i < n - 1; i++)
1988
    {
1989
      for (j = 0; j < n - 1 - i; j++)
1990
        {
1991
          if (a[j + 1] < a[j])
1992
            {
1993
              tmp = a[j];       /* swap a[j] and a[j+1]      */
1994
              a[j] = a[j + 1];
1995
              a[j + 1] = tmp;
1996
              tmp1 = dim_map[j];
1997
              dim_map[j] = dim_map[j + 1];
1998
              dim_map[j + 1] = tmp1;
1999
            }
2000
        }
2001
    }
2002
}
2003
 
2004
/* Replace multiple mallocs (one for each dimension) to one malloc
2005
   with the size of DIM1*DIM2*...*DIMN*size_of_element
2006
   Make sure that we hold the size in the malloc site inside a
2007
   new global variable; this way we ensure that the size doesn't
2008
   change and it is accessible from all the other functions that
2009
   uses the matrix.  Also, the original calls to free are deleted,
2010
   and replaced by a new call to free the flattened matrix.  */
2011
 
2012
static int
2013
transform_allocation_sites (void **slot, void *data ATTRIBUTE_UNUSED)
2014
{
2015
  int i;
2016
  struct matrix_info *mi;
2017
  tree type, oldfn, prev_dim_size;
2018
  gimple call_stmt_0, use_stmt;
2019
  struct cgraph_node *c_node;
2020
  struct cgraph_edge *e;
2021
  gimple_stmt_iterator gsi;
2022
  struct malloc_call_data mcd = {NULL, NULL_TREE, NULL_TREE};
2023
  HOST_WIDE_INT element_size;
2024
 
2025
  imm_use_iterator imm_iter;
2026
  use_operand_p use_p;
2027
  tree old_size_0, tmp;
2028
  int min_escape_l;
2029
  int id;
2030
 
2031
  mi = (struct matrix_info *) *slot;
2032
 
2033
  min_escape_l = mi->min_indirect_level_escape;
2034
 
2035
  if (!mi->malloc_for_level)
2036
    mi->min_indirect_level_escape = 0;
2037
 
2038
  if (mi->min_indirect_level_escape < 2)
2039
    return 1;
2040
 
2041
  mi->dim_map = (int *) xcalloc (mi->min_indirect_level_escape, sizeof (int));
2042
  for (i = 0; i < mi->min_indirect_level_escape; i++)
2043
    mi->dim_map[i] = i;
2044
  if (check_transpose_p)
2045
    {
2046
      int i;
2047
 
2048
      if (dump_file)
2049
        {
2050
          fprintf (dump_file, "Matrix %s:\n", get_name (mi->decl));
2051
          for (i = 0; i < min_escape_l; i++)
2052
            {
2053
              fprintf (dump_file, "dim %d before sort ", i);
2054
              if (mi->dim_hot_level)
2055
                fprintf (dump_file,
2056
                         "count is  " HOST_WIDEST_INT_PRINT_DEC "  \n",
2057
                         mi->dim_hot_level[i]);
2058
            }
2059
        }
2060
      sort_dim_hot_level (mi->dim_hot_level, mi->dim_map,
2061
                          mi->min_indirect_level_escape);
2062
      if (dump_file)
2063
        for (i = 0; i < min_escape_l; i++)
2064
          {
2065
            fprintf (dump_file, "dim %d after sort\n", i);
2066
            if (mi->dim_hot_level)
2067
              fprintf (dump_file, "count is  " HOST_WIDE_INT_PRINT_DEC
2068
                       "  \n", (HOST_WIDE_INT) mi->dim_hot_level[i]);
2069
          }
2070
      for (i = 0; i < mi->min_indirect_level_escape; i++)
2071
        {
2072
          if (dump_file)
2073
            fprintf (dump_file, "dim_map[%d] after sort %d\n", i,
2074
                     mi->dim_map[i]);
2075
          if (mi->dim_map[i] != i)
2076
            {
2077
              if (dump_file)
2078
                fprintf (dump_file,
2079
                         "Transposed dimensions: dim %d is now dim %d\n",
2080
                         mi->dim_map[i], i);
2081
              mi->is_transposed_p = true;
2082
            }
2083
        }
2084
    }
2085
  else
2086
    {
2087
      for (i = 0; i < mi->min_indirect_level_escape; i++)
2088
        mi->dim_map[i] = i;
2089
    }
2090
  /* Call statement of allocation site of level 0.  */
2091
  call_stmt_0 = mi->malloc_for_level[0];
2092
 
2093
  /* Finds the correct malloc information.  */
2094
  collect_data_for_malloc_call (call_stmt_0, &mcd);
2095
 
2096
  mi->dimension_size[0] = mcd.size_var;
2097
  mi->dimension_size_orig[0] = mcd.size_var;
2098
  /* Make sure that the variables in the size expression for
2099
     all the dimensions (above level 0) aren't modified in
2100
     the allocation function.  */
2101
  for (i = 1; i < mi->min_indirect_level_escape; i++)
2102
    {
2103
      tree t;
2104
      check_var_data data;
2105
 
2106
      /* mi->dimension_size must contain the expression of the size calculated
2107
         in check_allocation_function.  */
2108
      gcc_assert (mi->dimension_size[i]);
2109
 
2110
      data.fn = mi->allocation_function_decl;
2111
      data.stmt = NULL;
2112
      t = walk_tree_without_duplicates (&(mi->dimension_size[i]),
2113
                                        check_var_notmodified_p,
2114
                                        &data);
2115
      if (t != NULL_TREE)
2116
        {
2117
          mark_min_matrix_escape_level (mi, i, data.stmt);
2118
          break;
2119
        }
2120
    }
2121
 
2122
  if (mi->min_indirect_level_escape < 2)
2123
    return 1;
2124
 
2125
  /* Since we should make sure that the size expression is available
2126
     before the call to malloc of level 0.  */
2127
  gsi = gsi_for_stmt (call_stmt_0);
2128
 
2129
  /* Find out the size of each dimension by looking at the malloc
2130
     sites and create a global variable to hold it.
2131
     We add the assignment to the global before the malloc of level 0.  */
2132
 
2133
  /* To be able to produce gimple temporaries.  */
2134
  oldfn = current_function_decl;
2135
  current_function_decl = mi->allocation_function_decl;
2136
  push_cfun (DECL_STRUCT_FUNCTION (mi->allocation_function_decl));
2137
 
2138
  /* Set the dimension sizes as follows:
2139
     DIM_SIZE[i] = DIM_SIZE[n] * ... * DIM_SIZE[i]
2140
     where n is the maximum non escaping level.  */
2141
  element_size = mi->dimension_type_size[mi->min_indirect_level_escape];
2142
  prev_dim_size = NULL_TREE;
2143
 
2144
  for (i = mi->min_indirect_level_escape - 1; i >= 0; i--)
2145
    {
2146
      tree dim_size, dim_var;
2147
      gimple stmt;
2148
      tree d_type_size;
2149
 
2150
      /* Now put the size expression in a global variable and initialize it to
2151
         the size expression before the malloc of level 0.  */
2152
      dim_var =
2153
        add_new_static_var (TREE_TYPE
2154
                            (mi->dimension_size_orig[mi->dim_map[i]]));
2155
      type = TREE_TYPE (mi->dimension_size_orig[mi->dim_map[i]]);
2156
 
2157
      /* DIM_SIZE = MALLOC_SIZE_PARAM / TYPE_SIZE.  */
2158
      /* Find which dim ID becomes dim I.  */
2159
      for (id = 0; id < mi->min_indirect_level_escape; id++)
2160
        if (mi->dim_map[id] == i)
2161
          break;
2162
       d_type_size =
2163
        build_int_cst (type, mi->dimension_type_size[id + 1]);
2164
      if (!prev_dim_size)
2165
        prev_dim_size = build_int_cst (type, element_size);
2166
      if (!check_transpose_p && i == mi->min_indirect_level_escape - 1)
2167
        {
2168
          dim_size = mi->dimension_size_orig[id];
2169
        }
2170
      else
2171
        {
2172
          dim_size =
2173
            fold_build2 (TRUNC_DIV_EXPR, type, mi->dimension_size_orig[id],
2174
                         d_type_size);
2175
 
2176
          dim_size = fold_build2 (MULT_EXPR, type, dim_size, prev_dim_size);
2177
        }
2178
      dim_size = force_gimple_operand_gsi (&gsi, dim_size, true, NULL,
2179
                                           true, GSI_SAME_STMT);
2180
      /* GLOBAL_HOLDING_THE_SIZE = DIM_SIZE.  */
2181
      stmt = gimple_build_assign (dim_var, dim_size);
2182
      mark_symbols_for_renaming (stmt);
2183
      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2184
 
2185
      prev_dim_size = mi->dimension_size[i] = dim_var;
2186
    }
2187
  update_ssa (TODO_update_ssa);
2188
  /* Replace the malloc size argument in the malloc of level 0 to be
2189
     the size of all the dimensions.  */
2190
  c_node = cgraph_node (mi->allocation_function_decl);
2191
  old_size_0 = gimple_call_arg (call_stmt_0, 0);
2192
  tmp = force_gimple_operand_gsi (&gsi, mi->dimension_size[0], true,
2193
                                  NULL, true, GSI_SAME_STMT);
2194
  if (TREE_CODE (old_size_0) == SSA_NAME)
2195
    {
2196
      FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, old_size_0)
2197
        FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
2198
        if (use_stmt == call_stmt_0)
2199
        SET_USE (use_p, tmp);
2200
    }
2201
  /* When deleting the calls to malloc we need also to remove the edge from
2202
     the call graph to keep it consistent.  Notice that cgraph_edge may
2203
     create a new node in the call graph if there is no node for the given
2204
     declaration; this shouldn't be the case but currently there is no way to
2205
     check this outside of "cgraph.c".  */
2206
  for (i = 1; i < mi->min_indirect_level_escape; i++)
2207
    {
2208
      gimple_stmt_iterator gsi;
2209
      gimple use_stmt1 = NULL;
2210
 
2211
      gimple call_stmt = mi->malloc_for_level[i];
2212
      gcc_assert (is_gimple_call (call_stmt));
2213
      e = cgraph_edge (c_node, call_stmt);
2214
      gcc_assert (e);
2215
      cgraph_remove_edge (e);
2216
      gsi = gsi_for_stmt (call_stmt);
2217
      /* Remove the call stmt.  */
2218
      gsi_remove (&gsi, true);
2219
      /* remove the type cast stmt.  */
2220
      FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
2221
                             gimple_call_lhs (call_stmt))
2222
      {
2223
        use_stmt1 = use_stmt;
2224
        gsi = gsi_for_stmt (use_stmt);
2225
        gsi_remove (&gsi, true);
2226
      }
2227
      /* Remove the assignment of the allocated area.  */
2228
      FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
2229
                             gimple_get_lhs (use_stmt1))
2230
      {
2231
        gsi = gsi_for_stmt (use_stmt);
2232
        gsi_remove (&gsi, true);
2233
      }
2234
    }
2235
  update_ssa (TODO_update_ssa);
2236
#ifdef ENABLE_CHECKING
2237
  verify_ssa (true);
2238
#endif
2239
  /* Delete the calls to free.  */
2240
  for (i = 1; i < mi->min_indirect_level_escape; i++)
2241
    {
2242
      gimple_stmt_iterator gsi;
2243
 
2244
      /* ??? wonder why this case is possible but we failed on it once.  */
2245
      if (!mi->free_stmts[i].stmt)
2246
        continue;
2247
 
2248
      c_node = cgraph_node (mi->free_stmts[i].func);
2249
      gcc_assert (is_gimple_call (mi->free_stmts[i].stmt));
2250
      e = cgraph_edge (c_node, mi->free_stmts[i].stmt);
2251
      gcc_assert (e);
2252
      cgraph_remove_edge (e);
2253
      current_function_decl = mi->free_stmts[i].func;
2254
      set_cfun (DECL_STRUCT_FUNCTION (mi->free_stmts[i].func));
2255
      gsi = gsi_for_stmt (mi->free_stmts[i].stmt);
2256
      gsi_remove (&gsi, true);
2257
    }
2258
  /* Return to the previous situation.  */
2259
  current_function_decl = oldfn;
2260
  pop_cfun ();
2261
  return 1;
2262
 
2263
}
2264
 
2265
 
2266
/* Print out the results of the escape analysis.  */
2267
static int
2268
dump_matrix_reorg_analysis (void **slot, void *data ATTRIBUTE_UNUSED)
2269
{
2270
  struct matrix_info *mi = (struct matrix_info *) *slot;
2271
 
2272
  if (!dump_file)
2273
    return 1;
2274
  fprintf (dump_file, "Matrix \"%s\"; Escaping Level: %d, Num Dims: %d,",
2275
           get_name (mi->decl), mi->min_indirect_level_escape, mi->num_dims);
2276
  fprintf (dump_file, " Malloc Dims: %d, ", mi->max_malloced_level);
2277
  fprintf (dump_file, "\n");
2278
  if (mi->min_indirect_level_escape >= 2)
2279
    fprintf (dump_file, "Flattened %d dimensions \n",
2280
             mi->min_indirect_level_escape);
2281
  return 1;
2282
}
2283
 
2284
/* Perform matrix flattening.  */
2285
 
2286
static unsigned int
2287
matrix_reorg (void)
2288
{
2289
  struct cgraph_node *node;
2290
 
2291
  if (profile_info)
2292
    check_transpose_p = true;
2293
  else
2294
    check_transpose_p = false;
2295
  /* If there are hand written vectors, we skip this optimization.  */
2296
  for (node = cgraph_nodes; node; node = node->next)
2297
    if (!may_flatten_matrices (node))
2298
      return 0;
2299
  matrices_to_reorg = htab_create (37, mtt_info_hash, mtt_info_eq, mat_free);
2300
  /* Find and record all potential matrices in the program.  */
2301
  find_matrices_decl ();
2302
  /* Analyze the accesses of the matrices (escaping analysis).  */
2303
  for (node = cgraph_nodes; node; node = node->next)
2304
    if (node->analyzed)
2305
      {
2306
        tree temp_fn;
2307
 
2308
        temp_fn = current_function_decl;
2309
        current_function_decl = node->decl;
2310
        push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2311
        bitmap_obstack_initialize (NULL);
2312
        gimple_register_cfg_hooks ();
2313
 
2314
        if (!gimple_in_ssa_p (cfun))
2315
          {
2316
            free_dominance_info (CDI_DOMINATORS);
2317
            free_dominance_info (CDI_POST_DOMINATORS);
2318
            pop_cfun ();
2319
            current_function_decl = temp_fn;
2320
            bitmap_obstack_release (NULL);
2321
 
2322
            return 0;
2323
          }
2324
 
2325
#ifdef ENABLE_CHECKING
2326
        verify_flow_info ();
2327
#endif
2328
 
2329
        if (!matrices_to_reorg)
2330
          {
2331
            free_dominance_info (CDI_DOMINATORS);
2332
            free_dominance_info (CDI_POST_DOMINATORS);
2333
            pop_cfun ();
2334
            current_function_decl = temp_fn;
2335
            bitmap_obstack_release (NULL);
2336
 
2337
            return 0;
2338
          }
2339
 
2340
        /* Create htap for phi nodes.  */
2341
        htab_mat_acc_phi_nodes = htab_create (37, mat_acc_phi_hash,
2342
                                              mat_acc_phi_eq, free);
2343
        if (!check_transpose_p)
2344
          find_sites_in_func (false);
2345
        else
2346
          {
2347
            find_sites_in_func (true);
2348
            loop_optimizer_init (LOOPS_NORMAL);
2349
            if (current_loops)
2350
              scev_initialize ();
2351
            htab_traverse (matrices_to_reorg, analyze_transpose, NULL);
2352
            if (current_loops)
2353
              {
2354
                scev_finalize ();
2355
                loop_optimizer_finalize ();
2356
                current_loops = NULL;
2357
              }
2358
          }
2359
        /* If the current function is the allocation function for any of
2360
           the matrices we check its allocation and the escaping level.  */
2361
        htab_traverse (matrices_to_reorg, check_allocation_function, NULL);
2362
        free_dominance_info (CDI_DOMINATORS);
2363
        free_dominance_info (CDI_POST_DOMINATORS);
2364
        pop_cfun ();
2365
        current_function_decl = temp_fn;
2366
        bitmap_obstack_release (NULL);
2367
      }
2368
  htab_traverse (matrices_to_reorg, transform_allocation_sites, NULL);
2369
  /* Now transform the accesses.  */
2370
  for (node = cgraph_nodes; node; node = node->next)
2371
    if (node->analyzed)
2372
      {
2373
        /* Remember that allocation sites have been handled.  */
2374
        tree temp_fn;
2375
 
2376
        temp_fn = current_function_decl;
2377
        current_function_decl = node->decl;
2378
        push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2379
        bitmap_obstack_initialize (NULL);
2380
        gimple_register_cfg_hooks ();
2381
        record_all_accesses_in_func ();
2382
        htab_traverse (matrices_to_reorg, transform_access_sites, NULL);
2383
        free_dominance_info (CDI_DOMINATORS);
2384
        free_dominance_info (CDI_POST_DOMINATORS);
2385
        pop_cfun ();
2386
        current_function_decl = temp_fn;
2387
        bitmap_obstack_release (NULL);
2388
      }
2389
  htab_traverse (matrices_to_reorg, dump_matrix_reorg_analysis, NULL);
2390
 
2391
  current_function_decl = NULL;
2392
  set_cfun (NULL);
2393
  matrices_to_reorg = NULL;
2394
  return 0;
2395
}
2396
 
2397
 
2398
/* The condition for matrix flattening to be performed.  */
2399
static bool
2400
gate_matrix_reorg (void)
2401
{
2402
  return flag_ipa_matrix_reorg && flag_whole_program;
2403
}
2404
 
2405
struct simple_ipa_opt_pass pass_ipa_matrix_reorg =
2406
{
2407
 {
2408
  SIMPLE_IPA_PASS,
2409
  "matrix-reorg",               /* name */
2410
  gate_matrix_reorg,            /* gate */
2411
  matrix_reorg,                 /* execute */
2412
  NULL,                         /* sub */
2413
  NULL,                         /* next */
2414
  0,                             /* static_pass_number */
2415
  TV_NONE,                      /* tv_id */
2416
  0,                             /* properties_required */
2417
  0,                             /* properties_provided */
2418
  0,                             /* properties_destroyed */
2419
  0,                             /* todo_flags_start */
2420
  TODO_dump_cgraph | TODO_dump_func     /* todo_flags_finish */
2421
 }
2422
};

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.