OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [gcc/] [matrix-reorg.c] - Blame information for rev 867

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 684 jeremybenn
/* Matrix layout transformations.
2
   Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
3
   Contributed by Razya Ladelsky <razya@il.ibm.com>
4
   Originally written by Revital Eres and Mustafa Hagog.
5
 
6
This file is part of GCC.
7
 
8
GCC is free software; you can redistribute it and/or modify it under
9
the terms of the GNU General Public License as published by the Free
10
Software Foundation; either version 3, or (at your option) any later
11
version.
12
 
13
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14
WARRANTY; without even the implied warranty of MERCHANTABILITY or
15
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16
for more details.
17
 
18
You should have received a copy of the GNU General Public License
19
along with GCC; see the file COPYING3.  If not see
20
<http://www.gnu.org/licenses/>.  */
21
 
22
/*
23
   Matrix flattening optimization tries to replace a N-dimensional
24
   matrix with its equivalent M-dimensional matrix, where M < N.
25
   This first implementation focuses on global matrices defined dynamically.
26
 
27
   When N==1, we actually flatten the whole matrix.
28
   For instance consider a two-dimensional array a [dim1] [dim2].
29
   The code for allocating space for it usually looks like:
30
 
31
     a = (int **)  malloc(dim1 * sizeof(int *));
32
     for (i=0; i<dim1; i++)
33
        a[i] = (int *) malloc (dim2 * sizeof(int));
34
 
35
   If the array "a" is found suitable for this optimization,
36
   its allocation is replaced by:
37
 
38
     a = (int *) malloc (dim1 * dim2 *sizeof(int));
39
 
40
   and all the references to a[i][j] are replaced by a[i * dim2 + j].
41
 
42
   The two main phases of the optimization are the analysis
43
   and transformation.
44
   The driver of the optimization is matrix_reorg ().
45
 
46
 
47
 
48
   Analysis phase:
49
   ===============
50
 
51
   We'll number the dimensions outside-in, meaning the most external
52
   is 0, then 1, and so on.
53
   The analysis part of the optimization determines K, the escape
54
   level of a N-dimensional matrix (K <= N), that allows flattening of
55
   the external dimensions 0,1,..., K-1. Escape level 0 means that the
56
   whole matrix escapes and no flattening is possible.
57
 
58
   The analysis part is implemented in analyze_matrix_allocation_site()
59
   and analyze_matrix_accesses().
60
 
61
   Transformation phase:
62
   =====================
63
   In this phase we define the new flattened matrices that replace the
64
   original matrices in the code.
65
   Implemented in transform_allocation_sites(),
66
   transform_access_sites().
67
 
68
   Matrix Transposing
69
   ==================
70
   The idea of Matrix Transposing is organizing the matrix in a different
71
   layout such that the dimensions are reordered.
72
   This could produce better cache behavior in some cases.
73
 
74
   For example, lets look at the matrix accesses in the following loop:
75
 
76
   for (i=0; i<N; i++)
77
    for (j=0; j<M; j++)
78
     access to a[i][j]
79
 
80
   This loop can produce good cache behavior because the elements of
81
   the inner dimension are accessed sequentially.
82
 
83
  However, if the accesses of the matrix were of the following form:
84
 
85
  for (i=0; i<N; i++)
86
   for (j=0; j<M; j++)
87
     access to a[j][i]
88
 
89
  In this loop we iterate the columns and not the rows.
90
  Therefore, replacing the rows and columns
91
  would have had an organization with better (cache) locality.
92
  Replacing the dimensions of the matrix is called matrix transposing.
93
 
94
  This  example, of course, could be enhanced to multiple dimensions matrices
95
  as well.
96
 
97
  Since a program could include all kind of accesses, there is a decision
98
  mechanism, implemented in analyze_transpose(), which implements a
99
  heuristic that tries to determine whether to transpose the matrix or not,
100
  according to the form of the more dominant accesses.
101
  This decision is transferred to the flattening mechanism, and whether
102
  the matrix was transposed or not, the matrix is flattened (if possible).
103
 
104
  This decision making is based on profiling information and loop information.
105
  If profiling information is available, decision making mechanism will be
106
  operated, otherwise the matrix will only be flattened (if possible).
107
 
108
  Both optimizations are described in the paper "Matrix flattening and
109
  transposing in GCC" which was presented in GCC summit 2006.
110
  http://www.gccsummit.org/2006/2006-GCC-Summit-Proceedings.pdf.  */
111
 
112
#include "config.h"
113
#include "system.h"
114
#include "coretypes.h"
115
#include "tm.h"
116
#include "tree.h"
117
#include "rtl.h"
118
#include "tree-inline.h"
119
#include "tree-flow.h"
120
#include "tree-flow-inline.h"
121
#include "langhooks.h"
122
#include "hashtab.h"
123
#include "flags.h"
124
#include "ggc.h"
125
#include "debug.h"
126
#include "target.h"
127
#include "cgraph.h"
128
#include "diagnostic-core.h"
129
#include "timevar.h"
130
#include "params.h"
131
#include "fibheap.h"
132
#include "intl.h"
133
#include "function.h"
134
#include "basic-block.h"
135
#include "cfgloop.h"
136
#include "tree-iterator.h"
137
#include "tree-pass.h"
138
#include "opts.h"
139
#include "tree-data-ref.h"
140
#include "tree-chrec.h"
141
#include "tree-scalar-evolution.h"
142
#include "tree-ssa-sccvn.h"
143
 
144
/* We need to collect a lot of data from the original malloc,
145
   particularly as the gimplifier has converted:
146
 
147
   orig_var = (struct_type *) malloc (x * sizeof (struct_type *));
148
 
149
   into
150
 
151
   T3 = <constant> ;  ** <constant> is amount to malloc; precomputed **
152
   T4 = malloc (T3);
153
   T5 = (struct_type *) T4;
154
   orig_var = T5;
155
 
156
   The following struct fields allow us to collect all the necessary data from
157
   the gimplified program.  The comments in the struct below are all based
158
   on the gimple example above.  */
159
 
160
struct malloc_call_data
161
{
162
  gimple call_stmt;             /* Tree for "T4 = malloc (T3);"                     */
163
  tree size_var;                /* Var decl for T3.                                 */
164
  tree malloc_size;             /* Tree for "<constant>", the rhs assigned to T3.   */
165
};
166
 
167
static tree can_calculate_expr_before_stmt (tree, sbitmap);
168
static tree can_calculate_stmt_before_stmt (gimple, sbitmap);
169
 
170
/* The front end of the compiler, when parsing statements of the form:
171
 
172
   var = (type_cast) malloc (sizeof (type));
173
 
174
   always converts this single statement into the following statements
175
   (GIMPLE form):
176
 
177
   T.1 = sizeof (type);
178
   T.2 = malloc (T.1);
179
   T.3 = (type_cast) T.2;
180
   var = T.3;
181
 
182
   Since we need to create new malloc statements and modify the original
183
   statements somewhat, we need to find all four of the above statements.
184
   Currently record_call_1 (called for building cgraph edges) finds and
185
   records the statements containing the actual call to malloc, but we
186
   need to find the rest of the variables/statements on our own.  That
187
   is what the following function does.  */
188
static void
189
collect_data_for_malloc_call (gimple stmt, struct malloc_call_data *m_data)
190
{
191
  tree size_var = NULL;
192
  tree malloc_fn_decl;
193
  tree arg1;
194
 
195
  gcc_assert (is_gimple_call (stmt));
196
 
197
  malloc_fn_decl = gimple_call_fndecl (stmt);
198
  if (malloc_fn_decl == NULL
199
      || DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC)
200
    return;
201
 
202
  arg1 = gimple_call_arg (stmt, 0);
203
  size_var = arg1;
204
 
205
  m_data->call_stmt = stmt;
206
  m_data->size_var = size_var;
207
  if (TREE_CODE (size_var) != VAR_DECL)
208
    m_data->malloc_size = size_var;
209
  else
210
    m_data->malloc_size = NULL_TREE;
211
}
212
 
213
/* Information about matrix access site.
214
   For example: if an access site of matrix arr is arr[i][j]
215
   the ACCESS_SITE_INFO structure will have the address
216
   of arr as its stmt.  The INDEX_INFO will hold information about the
217
   initial address and index of each dimension.  */
218
struct access_site_info
219
{
220
  /* The statement (MEM_REF or POINTER_PLUS_EXPR).  */
221
  gimple stmt;
222
 
223
  /* In case of POINTER_PLUS_EXPR, what is the offset.  */
224
  tree offset;
225
 
226
  /* The index which created the offset.  */
227
  tree index;
228
 
229
  /* The indirection level of this statement.  */
230
  int level;
231
 
232
  /* TRUE for allocation site FALSE for access site.  */
233
  bool is_alloc;
234
 
235
  /* The function containing the access site.  */
236
  tree function_decl;
237
 
238
  /* This access is iterated in the inner most loop */
239
  bool iterated_by_inner_most_loop_p;
240
};
241
 
242
typedef struct access_site_info *access_site_info_p;
243
DEF_VEC_P (access_site_info_p);
244
DEF_VEC_ALLOC_P (access_site_info_p, heap);
245
 
246
/* Calls to free when flattening a matrix.  */
247
 
248
struct free_info
249
{
250
  gimple stmt;
251
  tree func;
252
};
253
 
254
/* Information about matrix to flatten.  */
255
struct matrix_info
256
{
257
  /* Decl tree of this matrix.  */
258
  tree decl;
259
  /* Number of dimensions; number
260
     of "*" in the type declaration.  */
261
  int num_dims;
262
 
263
  /* Minimum indirection level that escapes, 0 means that
264
     the whole matrix escapes, k means that dimensions
265
 
266
  int min_indirect_level_escape;
267
 
268
  gimple min_indirect_level_escape_stmt;
269
 
270
  /* Hold the allocation site for each level (dimension).
271
     We can use NUM_DIMS as the upper bound and allocate the array
272
     once with this number of elements and no need to use realloc and
273
     MAX_MALLOCED_LEVEL.  */
274
  gimple *malloc_for_level;
275
 
276
  int max_malloced_level;
277
 
278
  /* Is the matrix transposed.  */
279
  bool is_transposed_p;
280
 
281
  /* The location of the allocation sites (they must be in one
282
     function).  */
283
  tree allocation_function_decl;
284
 
285
  /* The calls to free for each level of indirection.  */
286
  struct free_info *free_stmts;
287
 
288
  /* An array which holds for each dimension its size. where
289
     dimension 0 is the outer most (one that contains all the others).
290
   */
291
  tree *dimension_size;
292
 
293
  /* An array which holds for each dimension it's original size
294
     (before transposing and flattening take place).  */
295
  tree *dimension_size_orig;
296
 
297
  /* An array which holds for each dimension the size of the type of
298
     of elements accessed in that level (in bytes).  */
299
  HOST_WIDE_INT *dimension_type_size;
300
 
301
  int dimension_type_size_len;
302
 
303
  /* An array collecting the count of accesses for each dimension.  */
304
  gcov_type *dim_hot_level;
305
 
306
  /* An array of the accesses to be flattened.
307
     elements are of type "struct access_site_info *".  */
308
  VEC (access_site_info_p, heap) * access_l;
309
 
310
  /* A map of how the dimensions will be organized at the end of
311
     the analyses.  */
312
  int *dim_map;
313
};
314
 
315
/* In each phi node we want to record the indirection level we have when we
316
   get to the phi node.  Usually we will have phi nodes with more than two
317
   arguments, then we must assure that all of them get to the phi node with
318
   the same indirection level, otherwise it's not safe to do the flattening.
319
   So we record the information regarding the indirection level each time we
320
   get to the phi node in this hash table.  */
321
 
322
struct matrix_access_phi_node
323
{
324
  gimple phi;
325
  int indirection_level;
326
};
327
 
328
/* We use this structure to find if the SSA variable is accessed inside the
329
   tree and record the tree containing it.  */
330
 
331
struct ssa_acc_in_tree
332
{
333
  /* The variable whose accesses in the tree we are looking for.  */
334
  tree ssa_var;
335
  /* The tree and code inside it the ssa_var is accessed, currently
336
     it could be an MEM_REF or CALL_EXPR.  */
337
  enum tree_code t_code;
338
  tree t_tree;
339
  /* The place in the containing tree.  */
340
  tree *tp;
341
  tree second_op;
342
  bool var_found;
343
};
344
 
345
static void analyze_matrix_accesses (struct matrix_info *, tree, int, bool,
346
                                     sbitmap, bool);
347
static int transform_allocation_sites (void **, void *);
348
static int transform_access_sites (void **, void *);
349
static int analyze_transpose (void **, void *);
350
static int dump_matrix_reorg_analysis (void **, void *);
351
 
352
static bool check_transpose_p;
353
 
354
/* Hash function used for the phi nodes.  */
355
 
356
static hashval_t
357
mat_acc_phi_hash (const void *p)
358
{
359
  const struct matrix_access_phi_node *const ma_phi =
360
    (const struct matrix_access_phi_node *) p;
361
 
362
  return htab_hash_pointer (ma_phi->phi);
363
}
364
 
365
/* Equality means phi node pointers are the same.  */
366
 
367
static int
368
mat_acc_phi_eq (const void *p1, const void *p2)
369
{
370
  const struct matrix_access_phi_node *const phi1 =
371
    (const struct matrix_access_phi_node *) p1;
372
  const struct matrix_access_phi_node *const phi2 =
373
    (const struct matrix_access_phi_node *) p2;
374
 
375
  if (phi1->phi == phi2->phi)
376
    return 1;
377
 
378
  return 0;
379
}
380
 
381
/* Hold the PHI nodes we visit during the traversal for escaping
382
   analysis.  */
383
static htab_t htab_mat_acc_phi_nodes = NULL;
384
 
385
/* This hash-table holds the information about the matrices we are
386
   going to handle.  */
387
static htab_t matrices_to_reorg = NULL;
388
 
389
/* Return a hash for MTT, which is really a "matrix_info *".  */
390
static hashval_t
391
mtt_info_hash (const void *mtt)
392
{
393
  return htab_hash_pointer (((const struct matrix_info *) mtt)->decl);
394
}
395
 
396
/* Return true if MTT1 and MTT2 (which are really both of type
397
   "matrix_info *") refer to the same decl.  */
398
static int
399
mtt_info_eq (const void *mtt1, const void *mtt2)
400
{
401
  const struct matrix_info *const i1 = (const struct matrix_info *) mtt1;
402
  const struct matrix_info *const i2 = (const struct matrix_info *) mtt2;
403
 
404
  if (i1->decl == i2->decl)
405
    return true;
406
 
407
  return false;
408
}
409
 
410
/* Return false if STMT may contain a vector expression.
411
   In this situation, all matrices should not be flattened.  */
412
static bool
413
may_flatten_matrices_1 (gimple stmt)
414
{
415
  switch (gimple_code (stmt))
416
    {
417
    case GIMPLE_ASSIGN:
418
    case GIMPLE_CALL:
419
      if (!gimple_has_lhs (stmt))
420
        return true;
421
      if (TREE_CODE (TREE_TYPE (gimple_get_lhs (stmt))) == VECTOR_TYPE)
422
        {
423
          if (dump_file)
424
            fprintf (dump_file,
425
                     "Found vector type, don't flatten matrix\n");
426
          return false;
427
        }
428
      break;
429
    case GIMPLE_ASM:
430
      /* Asm code could contain vector operations.  */
431
      return false;
432
      break;
433
    default:
434
      break;
435
    }
436
  return true;
437
}
438
 
439
/* Return false if there are hand-written vectors in the program.
440
   We disable the flattening in such a case.  */
441
static bool
442
may_flatten_matrices (struct cgraph_node *node)
443
{
444
  tree decl;
445
  struct function *func;
446
  basic_block bb;
447
  gimple_stmt_iterator gsi;
448
 
449
  decl = node->decl;
450
  if (node->analyzed)
451
    {
452
      func = DECL_STRUCT_FUNCTION (decl);
453
      FOR_EACH_BB_FN (bb, func)
454
        for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
455
        if (!may_flatten_matrices_1 (gsi_stmt (gsi)))
456
          return false;
457
    }
458
  return true;
459
}
460
 
461
/* Given a VAR_DECL, check its type to determine whether it is
462
   a definition of a dynamic allocated matrix and therefore is
463
   a suitable candidate for the matrix flattening optimization.
464
   Return NULL if VAR_DECL is not such decl.  Otherwise, allocate
465
   a MATRIX_INFO structure, fill it with the relevant information
466
   and return a pointer to it.
467
   TODO: handle also statically defined arrays.  */
468
static struct matrix_info *
469
analyze_matrix_decl (tree var_decl)
470
{
471
  struct matrix_info *m_node, tmpmi, *mi;
472
  tree var_type;
473
  int dim_num = 0;
474
 
475
  gcc_assert (matrices_to_reorg);
476
 
477
  if (TREE_CODE (var_decl) == PARM_DECL)
478
    var_type = DECL_ARG_TYPE (var_decl);
479
  else if (TREE_CODE (var_decl) == VAR_DECL)
480
    var_type = TREE_TYPE (var_decl);
481
  else
482
    return NULL;
483
 
484
  if (!POINTER_TYPE_P (var_type))
485
    return NULL;
486
 
487
  while (POINTER_TYPE_P (var_type))
488
    {
489
      var_type = TREE_TYPE (var_type);
490
      dim_num++;
491
    }
492
 
493
  if (dim_num <= 1)
494
    return NULL;
495
 
496
  if (!COMPLETE_TYPE_P (var_type)
497
      || TREE_CODE (TYPE_SIZE_UNIT (var_type)) != INTEGER_CST)
498
    return NULL;
499
 
500
  /* Check to see if this pointer is already in there.  */
501
  tmpmi.decl = var_decl;
502
  mi = (struct matrix_info *) htab_find (matrices_to_reorg, &tmpmi);
503
 
504
  if (mi)
505
    return NULL;
506
 
507
  /* Record the matrix.  */
508
 
509
  m_node = (struct matrix_info *) xcalloc (1, sizeof (struct matrix_info));
510
  m_node->decl = var_decl;
511
  m_node->num_dims = dim_num;
512
  m_node->free_stmts
513
    = (struct free_info *) xcalloc (dim_num, sizeof (struct free_info));
514
 
515
  /* Init min_indirect_level_escape to -1 to indicate that no escape
516
     analysis has been done yet.  */
517
  m_node->min_indirect_level_escape = -1;
518
  m_node->is_transposed_p = false;
519
 
520
  return m_node;
521
}
522
 
523
/* Free matrix E.  */
524
static void
525
mat_free (void *e)
526
{
527
  struct matrix_info *mat = (struct matrix_info *) e;
528
 
529
  if (!mat)
530
    return;
531
 
532
  free (mat->free_stmts);
533
  free (mat->dim_hot_level);
534
  free (mat->malloc_for_level);
535
}
536
 
537
/* Find all potential matrices.
538
   TODO: currently we handle only multidimensional
539
   dynamically allocated arrays.  */
540
static void
541
find_matrices_decl (void)
542
{
543
  struct matrix_info *tmp;
544
  PTR *slot;
545
  struct varpool_node *vnode;
546
 
547
  gcc_assert (matrices_to_reorg);
548
 
549
  /* For every global variable in the program:
550
     Check to see if it's of a candidate type and record it.  */
551
  for (vnode = varpool_nodes_queue; vnode; vnode = vnode->next_needed)
552
    {
553
      tree var_decl = vnode->decl;
554
 
555
      if (!var_decl || TREE_CODE (var_decl) != VAR_DECL)
556
        continue;
557
 
558
      if (matrices_to_reorg)
559
        if ((tmp = analyze_matrix_decl (var_decl)))
560
          {
561
            if (!TREE_ADDRESSABLE (var_decl))
562
              {
563
                slot = htab_find_slot (matrices_to_reorg, tmp, INSERT);
564
                *slot = tmp;
565
              }
566
          }
567
    }
568
  return;
569
}
570
 
571
/* Mark that the matrix MI escapes at level L.  */
572
static void
573
mark_min_matrix_escape_level (struct matrix_info *mi, int l, gimple s)
574
{
575
  if (mi->min_indirect_level_escape == -1
576
      || (mi->min_indirect_level_escape > l))
577
    {
578
      mi->min_indirect_level_escape = l;
579
      mi->min_indirect_level_escape_stmt = s;
580
    }
581
}
582
 
583
/* Find if the SSA variable is accessed inside the
584
   tree and record the tree containing it.
585
   The only relevant uses are the case of SSA_NAME, or SSA inside
586
   MEM_REF, PLUS_EXPR, POINTER_PLUS_EXPR, MULT_EXPR.  */
587
static void
588
ssa_accessed_in_tree (tree t, struct ssa_acc_in_tree *a)
589
{
590
  a->t_code = TREE_CODE (t);
591
  switch (a->t_code)
592
    {
593
    case SSA_NAME:
594
      if (t == a->ssa_var)
595
        a->var_found = true;
596
      break;
597
    case MEM_REF:
598
      if (SSA_VAR_P (TREE_OPERAND (t, 0))
599
          && TREE_OPERAND (t, 0) == a->ssa_var)
600
        a->var_found = true;
601
      break;
602
    default:
603
      break;
604
    }
605
}
606
 
607
/* Find if the SSA variable is accessed on the right hand side of
608
   gimple call STMT. */
609
 
610
static void
611
ssa_accessed_in_call_rhs (gimple stmt, struct ssa_acc_in_tree *a)
612
{
613
  tree decl;
614
  tree arg;
615
  size_t i;
616
 
617
  a->t_code = CALL_EXPR;
618
  for (i = 0; i < gimple_call_num_args (stmt); i++)
619
    {
620
      arg = gimple_call_arg (stmt, i);
621
      if (arg == a->ssa_var)
622
        {
623
          a->var_found = true;
624
          decl = gimple_call_fndecl (stmt);
625
          a->t_tree = decl;
626
          break;
627
        }
628
    }
629
}
630
 
631
/* Find if the SSA variable is accessed on the right hand side of
632
   gimple assign STMT. */
633
 
634
static void
635
ssa_accessed_in_assign_rhs (gimple stmt, struct ssa_acc_in_tree *a)
636
{
637
 
638
  a->t_code = gimple_assign_rhs_code (stmt);
639
  switch (a->t_code)
640
    {
641
      tree op1, op2;
642
 
643
    case SSA_NAME:
644
    case MEM_REF:
645
    CASE_CONVERT:
646
    case VIEW_CONVERT_EXPR:
647
      ssa_accessed_in_tree (gimple_assign_rhs1 (stmt), a);
648
      break;
649
    case POINTER_PLUS_EXPR:
650
    case PLUS_EXPR:
651
    case MULT_EXPR:
652
      op1 = gimple_assign_rhs1 (stmt);
653
      op2 = gimple_assign_rhs2 (stmt);
654
 
655
      if (op1 == a->ssa_var)
656
        {
657
          a->var_found = true;
658
          a->second_op = op2;
659
        }
660
      else if (op2 == a->ssa_var)
661
        {
662
          a->var_found = true;
663
          a->second_op = op1;
664
        }
665
      break;
666
    default:
667
      break;
668
    }
669
}
670
 
671
/* Record the access/allocation site information for matrix MI so we can
672
   handle it later in transformation.  */
673
static void
674
record_access_alloc_site_info (struct matrix_info *mi, gimple stmt, tree offset,
675
                               tree index, int level, bool is_alloc)
676
{
677
  struct access_site_info *acc_info;
678
 
679
  if (!mi->access_l)
680
    mi->access_l = VEC_alloc (access_site_info_p, heap, 100);
681
 
682
  acc_info
683
    = (struct access_site_info *)
684
    xcalloc (1, sizeof (struct access_site_info));
685
  acc_info->stmt = stmt;
686
  acc_info->offset = offset;
687
  acc_info->index = index;
688
  acc_info->function_decl = current_function_decl;
689
  acc_info->level = level;
690
  acc_info->is_alloc = is_alloc;
691
 
692
  VEC_safe_push (access_site_info_p, heap, mi->access_l, acc_info);
693
 
694
}
695
 
696
/* Record the malloc as the allocation site of the given LEVEL.  But
697
   first we Make sure that all the size parameters passed to malloc in
698
   all the allocation sites could be pre-calculated before the call to
699
   the malloc of level 0 (the main malloc call).  */
700
static void
701
add_allocation_site (struct matrix_info *mi, gimple stmt, int level)
702
{
703
  struct malloc_call_data mcd;
704
 
705
  /* Make sure that the allocation sites are in the same function.  */
706
  if (!mi->allocation_function_decl)
707
    mi->allocation_function_decl = current_function_decl;
708
  else if (mi->allocation_function_decl != current_function_decl)
709
    {
710
      int min_malloc_level;
711
 
712
      gcc_assert (mi->malloc_for_level);
713
 
714
      /* Find the minimum malloc level that already has been seen;
715
         we known its allocation function must be
716
         MI->allocation_function_decl since it's different than
717
         CURRENT_FUNCTION_DECL then the escaping level should be
718
         MIN (LEVEL, MIN_MALLOC_LEVEL) - 1 , and the allocation function
719
         must be set accordingly.  */
720
      for (min_malloc_level = 0;
721
           min_malloc_level < mi->max_malloced_level
722
           && mi->malloc_for_level[min_malloc_level]; min_malloc_level++)
723
        ;
724
      if (level < min_malloc_level)
725
        {
726
          mi->allocation_function_decl = current_function_decl;
727
          mark_min_matrix_escape_level (mi, min_malloc_level, stmt);
728
        }
729
      else
730
        {
731
          mark_min_matrix_escape_level (mi, level, stmt);
732
          /* cannot be that (level == min_malloc_level)
733
             we would have returned earlier.  */
734
          return;
735
        }
736
    }
737
 
738
  /* Find the correct malloc information.  */
739
  collect_data_for_malloc_call (stmt, &mcd);
740
 
741
  /* We accept only calls to malloc function; we do not accept
742
     calls like calloc and realloc.  */
743
  if (!mi->malloc_for_level)
744
    {
745
      mi->malloc_for_level = XCNEWVEC (gimple, level + 1);
746
      mi->max_malloced_level = level + 1;
747
    }
748
  else if (mi->max_malloced_level <= level)
749
    {
750
      mi->malloc_for_level
751
        = XRESIZEVEC (gimple, mi->malloc_for_level, level + 1);
752
 
753
      /* Zero the newly allocated items.  */
754
      memset (&(mi->malloc_for_level[mi->max_malloced_level + 1]),
755
              0, (level - mi->max_malloced_level) * sizeof (tree));
756
 
757
      mi->max_malloced_level = level + 1;
758
    }
759
  mi->malloc_for_level[level] = stmt;
760
}
761
 
762
/* Given an assignment statement STMT that we know that its
763
   left-hand-side is the matrix MI variable, we traverse the immediate
764
   uses backwards until we get to a malloc site.  We make sure that
765
   there is one and only one malloc site that sets this variable.  When
766
   we are performing the flattening we generate a new variable that
767
   will hold the size for each dimension; each malloc that allocates a
768
   dimension has the size parameter; we use that parameter to
769
   initialize the dimension size variable so we can use it later in
770
   the address calculations.  LEVEL is the dimension we're inspecting.
771
   Return if STMT is related to an allocation site.  */
772
 
773
static void
774
analyze_matrix_allocation_site (struct matrix_info *mi, gimple stmt,
775
                                int level, sbitmap visited)
776
{
777
  if (gimple_assign_copy_p (stmt) || gimple_assign_cast_p (stmt))
778
    {
779
      tree rhs = gimple_assign_rhs1 (stmt);
780
 
781
      if (TREE_CODE (rhs) == SSA_NAME)
782
        {
783
          gimple def = SSA_NAME_DEF_STMT (rhs);
784
 
785
          analyze_matrix_allocation_site (mi, def, level, visited);
786
          return;
787
        }
788
      /* If we are back to the original matrix variable then we
789
         are sure that this is analyzed as an access site.  */
790
      else if (rhs == mi->decl)
791
        return;
792
    }
793
  /* A result of call to malloc.  */
794
  else if (is_gimple_call (stmt))
795
    {
796
      int call_flags = gimple_call_flags (stmt);
797
 
798
      if (!(call_flags & ECF_MALLOC))
799
        {
800
          mark_min_matrix_escape_level (mi, level, stmt);
801
          return;
802
        }
803
      else
804
        {
805
          tree malloc_fn_decl;
806
 
807
          malloc_fn_decl = gimple_call_fndecl (stmt);
808
          if (malloc_fn_decl == NULL_TREE)
809
            {
810
              mark_min_matrix_escape_level (mi, level, stmt);
811
              return;
812
            }
813
          if (DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC)
814
            {
815
              if (dump_file)
816
                fprintf (dump_file,
817
                         "Matrix %s is an argument to function %s\n",
818
                         get_name (mi->decl), get_name (malloc_fn_decl));
819
              mark_min_matrix_escape_level (mi, level, stmt);
820
              return;
821
            }
822
        }
823
      /* This is a call to malloc of level 'level'.
824
         mi->max_malloced_level-1 == level  means that we've
825
         seen a malloc statement of level 'level' before.
826
         If the statement is not the same one that we've
827
         seen before, then there's another malloc statement
828
         for the same level, which means that we need to mark
829
         it escaping.  */
830
      if (mi->malloc_for_level
831
          && mi->max_malloced_level-1 == level
832
          && mi->malloc_for_level[level] != stmt)
833
        {
834
          mark_min_matrix_escape_level (mi, level, stmt);
835
          return;
836
        }
837
      else
838
        add_allocation_site (mi, stmt, level);
839
      return;
840
    }
841
  /* Looks like we don't know what is happening in this
842
     statement so be in the safe side and mark it as escaping.  */
843
  mark_min_matrix_escape_level (mi, level, stmt);
844
}
845
 
846
/* The transposing decision making.
847
   In order to calculate the profitability of transposing, we collect two
848
   types of information regarding the accesses:
849
   1. profiling information used to express the hotness of an access, that
850
   is how often the matrix is accessed by this access site (count of the
851
   access site).
852
   2. which dimension in the access site is iterated by the inner
853
   most loop containing this access.
854
 
855
   The matrix will have a calculated value of weighted hotness for each
856
   dimension.
857
   Intuitively the hotness level of a dimension is a function of how
858
   many times it was the most frequently accessed dimension in the
859
   highly executed access sites of this matrix.
860
 
861
   As computed by following equation:
862
   m      n
863
   __   __
864
   \    \  dim_hot_level[i] +=
865
   /_   /_
866
   j     i
867
                 acc[j]->dim[i]->iter_by_inner_loop * count(j)
868
 
869
  Where n is the number of dims and m is the number of the matrix
870
  access sites. acc[j]->dim[i]->iter_by_inner_loop is 1 if acc[j]
871
  iterates over dim[i] in innermost loop, and is 0 otherwise.
872
 
873
  The organization of the new matrix should be according to the
874
  hotness of each dimension. The hotness of the dimension implies
875
  the locality of the elements.*/
876
static int
877
analyze_transpose (void **slot, void *data ATTRIBUTE_UNUSED)
878
{
879
  struct matrix_info *mi = (struct matrix_info *) *slot;
880
  int min_escape_l = mi->min_indirect_level_escape;
881
  struct loop *loop;
882
  affine_iv iv;
883
  struct access_site_info *acc_info;
884
  int i;
885
 
886
  if (min_escape_l < 2 || !mi->access_l)
887
    {
888
      if (mi->access_l)
889
        {
890
          FOR_EACH_VEC_ELT (access_site_info_p, mi->access_l, i, acc_info)
891
            free (acc_info);
892
          VEC_free (access_site_info_p, heap, mi->access_l);
893
 
894
        }
895
      return 1;
896
    }
897
  if (!mi->dim_hot_level)
898
    mi->dim_hot_level =
899
      (gcov_type *) xcalloc (min_escape_l, sizeof (gcov_type));
900
 
901
 
902
  for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
903
       i++)
904
    {
905
      if (gimple_assign_rhs_code (acc_info->stmt) == POINTER_PLUS_EXPR
906
          && acc_info->level < min_escape_l)
907
        {
908
          loop = loop_containing_stmt (acc_info->stmt);
909
          if (!loop || loop->inner)
910
            {
911
              free (acc_info);
912
              continue;
913
            }
914
          if (simple_iv (loop, loop, acc_info->offset, &iv, true))
915
            {
916
              if (iv.step != NULL)
917
                {
918
                  HOST_WIDE_INT istep;
919
 
920
                  istep = int_cst_value (iv.step);
921
                  if (istep != 0)
922
                    {
923
                      acc_info->iterated_by_inner_most_loop_p = 1;
924
                      mi->dim_hot_level[acc_info->level] +=
925
                        gimple_bb (acc_info->stmt)->count;
926
                    }
927
 
928
                }
929
            }
930
        }
931
      free (acc_info);
932
    }
933
  VEC_free (access_site_info_p, heap, mi->access_l);
934
 
935
  return 1;
936
}
937
 
938
/* Find the index which defines the OFFSET from base.
939
   We walk from use to def until we find how the offset was defined.  */
940
static tree
941
get_index_from_offset (tree offset, gimple def_stmt)
942
{
943
  tree op1, op2, index;
944
 
945
  if (gimple_code (def_stmt) == GIMPLE_PHI)
946
    return NULL;
947
  if ((gimple_assign_copy_p (def_stmt) || gimple_assign_cast_p (def_stmt))
948
      && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
949
    return get_index_from_offset (offset,
950
                                  SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt)));
951
  else if (is_gimple_assign (def_stmt)
952
           && gimple_assign_rhs_code (def_stmt) == MULT_EXPR)
953
    {
954
      op1 = gimple_assign_rhs1 (def_stmt);
955
      op2 = gimple_assign_rhs2 (def_stmt);
956
      if (TREE_CODE (op1) != INTEGER_CST && TREE_CODE (op2) != INTEGER_CST)
957
        return NULL;
958
      index = (TREE_CODE (op1) == INTEGER_CST) ? op2 : op1;
959
      return index;
960
    }
961
  else
962
    return NULL_TREE;
963
}
964
 
965
/* update MI->dimension_type_size[CURRENT_INDIRECT_LEVEL] with the size
966
   of the type related to the SSA_VAR, or the type related to the
967
   lhs of STMT, in the case that it is an MEM_REF.  */
968
static void
969
update_type_size (struct matrix_info *mi, gimple stmt, tree ssa_var,
970
                  int current_indirect_level)
971
{
972
  tree lhs;
973
  HOST_WIDE_INT type_size;
974
 
975
  /* Update type according to the type of the MEM_REF expr.   */
976
  if (is_gimple_assign (stmt)
977
      && TREE_CODE (gimple_assign_lhs (stmt)) == MEM_REF)
978
    {
979
      lhs = gimple_assign_lhs (stmt);
980
      gcc_assert (POINTER_TYPE_P
981
                  (TREE_TYPE (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
982
      type_size =
983
        int_size_in_bytes (TREE_TYPE
984
                           (TREE_TYPE
985
                            (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
986
    }
987
  else
988
    type_size = int_size_in_bytes (TREE_TYPE (ssa_var));
989
 
990
  /* Record the size of elements accessed (as a whole)
991
     in the current indirection level (dimension).  If the size of
992
     elements is not known at compile time, mark it as escaping.  */
993
  if (type_size <= 0)
994
    mark_min_matrix_escape_level (mi, current_indirect_level, stmt);
995
  else
996
    {
997
      int l = current_indirect_level;
998
 
999
      if (!mi->dimension_type_size)
1000
        {
1001
          mi->dimension_type_size
1002
            = (HOST_WIDE_INT *) xcalloc (l + 1, sizeof (HOST_WIDE_INT));
1003
          mi->dimension_type_size_len = l + 1;
1004
        }
1005
      else if (mi->dimension_type_size_len < l + 1)
1006
        {
1007
          mi->dimension_type_size
1008
            = (HOST_WIDE_INT *) xrealloc (mi->dimension_type_size,
1009
                                          (l + 1) * sizeof (HOST_WIDE_INT));
1010
          memset (&mi->dimension_type_size[mi->dimension_type_size_len],
1011
                  0, (l + 1 - mi->dimension_type_size_len)
1012
                  * sizeof (HOST_WIDE_INT));
1013
          mi->dimension_type_size_len = l + 1;
1014
        }
1015
      /* Make sure all the accesses in the same level have the same size
1016
         of the type.  */
1017
      if (!mi->dimension_type_size[l])
1018
        mi->dimension_type_size[l] = type_size;
1019
      else if (mi->dimension_type_size[l] != type_size)
1020
        mark_min_matrix_escape_level (mi, l, stmt);
1021
    }
1022
}
1023
 
1024
/* USE_STMT represents a GIMPLE_CALL, where one of the arguments is the
1025
   ssa var that we want to check because it came from some use of matrix
1026
   MI.  CURRENT_INDIRECT_LEVEL is the indirection level we reached so
1027
   far.  */
1028
 
1029
static int
1030
analyze_accesses_for_call_stmt (struct matrix_info *mi, tree ssa_var,
1031
                                gimple use_stmt, int current_indirect_level)
1032
{
1033
  tree fndecl = gimple_call_fndecl (use_stmt);
1034
 
1035
  if (gimple_call_lhs (use_stmt))
1036
    {
1037
      tree lhs = gimple_call_lhs (use_stmt);
1038
      struct ssa_acc_in_tree lhs_acc, rhs_acc;
1039
 
1040
      memset (&lhs_acc, 0, sizeof (lhs_acc));
1041
      memset (&rhs_acc, 0, sizeof (rhs_acc));
1042
 
1043
      lhs_acc.ssa_var = ssa_var;
1044
      lhs_acc.t_code = ERROR_MARK;
1045
      ssa_accessed_in_tree (lhs, &lhs_acc);
1046
      rhs_acc.ssa_var = ssa_var;
1047
      rhs_acc.t_code = ERROR_MARK;
1048
      ssa_accessed_in_call_rhs (use_stmt, &rhs_acc);
1049
 
1050
      /* The SSA must be either in the left side or in the right side,
1051
         to understand what is happening.
1052
         In case the SSA_NAME is found in both sides we should be escaping
1053
         at this level because in this case we cannot calculate the
1054
         address correctly.  */
1055
      if ((lhs_acc.var_found && rhs_acc.var_found
1056
           && lhs_acc.t_code == MEM_REF)
1057
          || (!rhs_acc.var_found && !lhs_acc.var_found))
1058
        {
1059
          mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1060
          return current_indirect_level;
1061
        }
1062
      gcc_assert (!rhs_acc.var_found || !lhs_acc.var_found);
1063
 
1064
      /* If we are storing to the matrix at some level, then mark it as
1065
         escaping at that level.  */
1066
      if (lhs_acc.var_found)
1067
        {
1068
          int l = current_indirect_level + 1;
1069
 
1070
          gcc_assert (lhs_acc.t_code == MEM_REF);
1071
          mark_min_matrix_escape_level (mi, l, use_stmt);
1072
          return current_indirect_level;
1073
        }
1074
    }
1075
 
1076
  if (fndecl)
1077
    {
1078
      if (DECL_FUNCTION_CODE (fndecl) != BUILT_IN_FREE)
1079
        {
1080
          if (dump_file)
1081
            fprintf (dump_file,
1082
                     "Matrix %s: Function call %s, level %d escapes.\n",
1083
                     get_name (mi->decl), get_name (fndecl),
1084
                     current_indirect_level);
1085
          mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1086
        }
1087
      else if (mi->free_stmts[current_indirect_level].stmt != NULL
1088
               && mi->free_stmts[current_indirect_level].stmt != use_stmt)
1089
        mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1090
      else
1091
        {
1092
          /*Record the free statements so we can delete them
1093
             later. */
1094
          int l = current_indirect_level;
1095
 
1096
          mi->free_stmts[l].stmt = use_stmt;
1097
          mi->free_stmts[l].func = current_function_decl;
1098
        }
1099
    }
1100
  return current_indirect_level;
1101
}
1102
 
1103
/* USE_STMT represents a phi node of the ssa var that we want to
1104
   check  because it came from some use of matrix
1105
   MI.
1106
   We check all the escaping levels that get to the PHI node
1107
   and make sure they are all the same escaping;
1108
   if not (which is rare) we let the escaping level be the
1109
   minimum level that gets into that PHI because starting from
1110
   that level we cannot expect the behavior of the indirections.
1111
   CURRENT_INDIRECT_LEVEL is the indirection level we reached so far.  */
1112
 
1113
static void
1114
analyze_accesses_for_phi_node (struct matrix_info *mi, gimple use_stmt,
1115
                               int current_indirect_level, sbitmap visited,
1116
                               bool record_accesses)
1117
{
1118
 
1119
  struct matrix_access_phi_node tmp_maphi, *maphi, **pmaphi;
1120
 
1121
  tmp_maphi.phi = use_stmt;
1122
  if ((maphi = (struct matrix_access_phi_node *)
1123
       htab_find (htab_mat_acc_phi_nodes, &tmp_maphi)))
1124
    {
1125
      if (maphi->indirection_level == current_indirect_level)
1126
        return;
1127
      else
1128
        {
1129
          int level = MIN (maphi->indirection_level,
1130
                           current_indirect_level);
1131
          size_t j;
1132
          gimple stmt = NULL;
1133
 
1134
          maphi->indirection_level = level;
1135
          for (j = 0; j < gimple_phi_num_args (use_stmt); j++)
1136
            {
1137
              tree def = PHI_ARG_DEF (use_stmt, j);
1138
 
1139
              if (gimple_code (SSA_NAME_DEF_STMT (def)) != GIMPLE_PHI)
1140
                stmt = SSA_NAME_DEF_STMT (def);
1141
            }
1142
          mark_min_matrix_escape_level (mi, level, stmt);
1143
        }
1144
      return;
1145
    }
1146
  maphi = (struct matrix_access_phi_node *)
1147
    xcalloc (1, sizeof (struct matrix_access_phi_node));
1148
  maphi->phi = use_stmt;
1149
  maphi->indirection_level = current_indirect_level;
1150
 
1151
  /* Insert to hash table.  */
1152
  pmaphi = (struct matrix_access_phi_node **)
1153
    htab_find_slot (htab_mat_acc_phi_nodes, maphi, INSERT);
1154
  gcc_assert (pmaphi);
1155
  *pmaphi = maphi;
1156
 
1157
  if (!TEST_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt))))
1158
    {
1159
      SET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1160
      analyze_matrix_accesses (mi, PHI_RESULT (use_stmt),
1161
                               current_indirect_level, false, visited,
1162
                               record_accesses);
1163
      RESET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1164
    }
1165
}
1166
 
1167
/* USE_STMT represents an assign statement (the rhs or lhs include
1168
   the ssa var that we want to check  because it came from some use of matrix
1169
   MI.  CURRENT_INDIRECT_LEVEL is the indirection level we reached so far.  */
1170
 
1171
static int
1172
analyze_accesses_for_assign_stmt (struct matrix_info *mi, tree ssa_var,
1173
                                  gimple use_stmt, int current_indirect_level,
1174
                                  bool last_op, sbitmap visited,
1175
                                  bool record_accesses)
1176
{
1177
  tree lhs = gimple_get_lhs (use_stmt);
1178
  struct ssa_acc_in_tree lhs_acc, rhs_acc;
1179
 
1180
  memset (&lhs_acc, 0, sizeof (lhs_acc));
1181
  memset (&rhs_acc, 0, sizeof (rhs_acc));
1182
 
1183
  lhs_acc.ssa_var = ssa_var;
1184
  lhs_acc.t_code = ERROR_MARK;
1185
  ssa_accessed_in_tree (lhs, &lhs_acc);
1186
  rhs_acc.ssa_var = ssa_var;
1187
  rhs_acc.t_code = ERROR_MARK;
1188
  ssa_accessed_in_assign_rhs (use_stmt, &rhs_acc);
1189
 
1190
  /* The SSA must be either in the left side or in the right side,
1191
     to understand what is happening.
1192
     In case the SSA_NAME is found in both sides we should be escaping
1193
     at this level because in this case we cannot calculate the
1194
     address correctly.  */
1195
  if ((lhs_acc.var_found && rhs_acc.var_found
1196
       && lhs_acc.t_code == MEM_REF)
1197
      || (!rhs_acc.var_found && !lhs_acc.var_found))
1198
    {
1199
      mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1200
      return current_indirect_level;
1201
    }
1202
  gcc_assert (!rhs_acc.var_found || !lhs_acc.var_found);
1203
 
1204
  /* If we are storing to the matrix at some level, then mark it as
1205
     escaping at that level.  */
1206
  if (lhs_acc.var_found)
1207
    {
1208
      int l = current_indirect_level + 1;
1209
 
1210
      gcc_assert (lhs_acc.t_code == MEM_REF);
1211
 
1212
      if (!(gimple_assign_copy_p (use_stmt)
1213
            || gimple_assign_cast_p (use_stmt))
1214
          || (TREE_CODE (gimple_assign_rhs1 (use_stmt)) != SSA_NAME))
1215
        mark_min_matrix_escape_level (mi, l, use_stmt);
1216
      else
1217
        {
1218
          gimple def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (use_stmt));
1219
          analyze_matrix_allocation_site (mi, def_stmt, l, visited);
1220
          if (record_accesses)
1221
            record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1222
                                           NULL_TREE, l, true);
1223
          update_type_size (mi, use_stmt, NULL, l);
1224
        }
1225
      return current_indirect_level;
1226
    }
1227
  /* Now, check the right-hand-side, to see how the SSA variable
1228
     is used.  */
1229
  if (rhs_acc.var_found)
1230
    {
1231
      if (rhs_acc.t_code != MEM_REF
1232
          && rhs_acc.t_code != POINTER_PLUS_EXPR && rhs_acc.t_code != SSA_NAME)
1233
        {
1234
          mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1235
          return current_indirect_level;
1236
        }
1237
      /* If the access in the RHS has an indirection increase the
1238
         indirection level.  */
1239
      if (rhs_acc.t_code == MEM_REF)
1240
        {
1241
          if (record_accesses)
1242
            record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1243
                                           NULL_TREE,
1244
                                           current_indirect_level, true);
1245
          current_indirect_level += 1;
1246
        }
1247
      else if (rhs_acc.t_code == POINTER_PLUS_EXPR)
1248
        {
1249
          gcc_assert (rhs_acc.second_op);
1250
          if (last_op)
1251
            /* Currently we support only one PLUS expression on the
1252
               SSA_NAME that holds the base address of the current
1253
               indirection level; to support more general case there
1254
               is a need to hold a stack of expressions and regenerate
1255
               the calculation later.  */
1256
            mark_min_matrix_escape_level (mi, current_indirect_level,
1257
                                          use_stmt);
1258
          else
1259
            {
1260
              tree index;
1261
              tree op1, op2;
1262
 
1263
              op1 = gimple_assign_rhs1 (use_stmt);
1264
              op2 = gimple_assign_rhs2 (use_stmt);
1265
 
1266
              op2 = (op1 == ssa_var) ? op2 : op1;
1267
              if (TREE_CODE (op2) == INTEGER_CST)
1268
                index =
1269
                  build_int_cst (TREE_TYPE (op1),
1270
                                 TREE_INT_CST_LOW (op2) /
1271
                                 int_size_in_bytes (TREE_TYPE (op1)));
1272
              else
1273
                {
1274
                  index =
1275
                    get_index_from_offset (op2, SSA_NAME_DEF_STMT (op2));
1276
                  if (index == NULL_TREE)
1277
                    {
1278
                      mark_min_matrix_escape_level (mi,
1279
                                                    current_indirect_level,
1280
                                                    use_stmt);
1281
                      return current_indirect_level;
1282
                    }
1283
                }
1284
              if (record_accesses)
1285
                record_access_alloc_site_info (mi, use_stmt, op2,
1286
                                               index,
1287
                                               current_indirect_level, false);
1288
            }
1289
        }
1290
      /* If we are storing this level of indirection mark it as
1291
         escaping.  */
1292
      if (lhs_acc.t_code == MEM_REF || TREE_CODE (lhs) != SSA_NAME)
1293
        {
1294
          int l = current_indirect_level;
1295
 
1296
          /* One exception is when we are storing to the matrix
1297
             variable itself; this is the case of malloc, we must make
1298
             sure that it's the one and only one call to malloc so
1299
             we call analyze_matrix_allocation_site to check
1300
             this out.  */
1301
          if (TREE_CODE (lhs) != VAR_DECL || lhs != mi->decl)
1302
            mark_min_matrix_escape_level (mi, current_indirect_level,
1303
                                          use_stmt);
1304
          else
1305
            {
1306
              /* Also update the escaping level.  */
1307
              analyze_matrix_allocation_site (mi, use_stmt, l, visited);
1308
              if (record_accesses)
1309
                record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1310
                                               NULL_TREE, l, true);
1311
            }
1312
        }
1313
      else
1314
        {
1315
          /* We are placing it in an SSA, follow that SSA.  */
1316
          analyze_matrix_accesses (mi, lhs,
1317
                                   current_indirect_level,
1318
                                   rhs_acc.t_code == POINTER_PLUS_EXPR,
1319
                                   visited, record_accesses);
1320
        }
1321
    }
1322
  return current_indirect_level;
1323
}
1324
 
1325
/* Given a SSA_VAR (coming from a use statement of the matrix MI),
1326
   follow its uses and level of indirection and find out the minimum
1327
   indirection level it escapes in (the highest dimension) and the maximum
1328
   level it is accessed in (this will be the actual dimension of the
1329
   matrix).  The information is accumulated in MI.
1330
   We look at the immediate uses, if one escapes we finish; if not,
1331
   we make a recursive call for each one of the immediate uses of the
1332
   resulting SSA name.  */
1333
static void
1334
analyze_matrix_accesses (struct matrix_info *mi, tree ssa_var,
1335
                         int current_indirect_level, bool last_op,
1336
                         sbitmap visited, bool record_accesses)
1337
{
1338
  imm_use_iterator imm_iter;
1339
  use_operand_p use_p;
1340
 
1341
  update_type_size (mi, SSA_NAME_DEF_STMT (ssa_var), ssa_var,
1342
                    current_indirect_level);
1343
 
1344
  /* We don't go beyond the escaping level when we are performing the
1345
     flattening.  NOTE: we keep the last indirection level that doesn't
1346
     escape.  */
1347
  if (mi->min_indirect_level_escape > -1
1348
      && mi->min_indirect_level_escape <= current_indirect_level)
1349
    return;
1350
 
1351
/* Now go over the uses of the SSA_NAME and check how it is used in
1352
   each one of them.  We are mainly looking for the pattern MEM_REF,
1353
   then a POINTER_PLUS_EXPR, then MEM_REF etc.  while in between there could
1354
   be any number of copies and casts.  */
1355
  gcc_assert (TREE_CODE (ssa_var) == SSA_NAME);
1356
 
1357
  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_var)
1358
  {
1359
    gimple use_stmt = USE_STMT (use_p);
1360
    if (gimple_code (use_stmt) == GIMPLE_PHI)
1361
      /* We check all the escaping levels that get to the PHI node
1362
         and make sure they are all the same escaping;
1363
         if not (which is rare) we let the escaping level be the
1364
         minimum level that gets into that PHI because starting from
1365
         that level we cannot expect the behavior of the indirections.  */
1366
 
1367
      analyze_accesses_for_phi_node (mi, use_stmt, current_indirect_level,
1368
                                     visited, record_accesses);
1369
 
1370
    else if (is_gimple_call (use_stmt))
1371
      analyze_accesses_for_call_stmt (mi, ssa_var, use_stmt,
1372
                                      current_indirect_level);
1373
    else if (is_gimple_assign (use_stmt))
1374
      current_indirect_level =
1375
        analyze_accesses_for_assign_stmt (mi, ssa_var, use_stmt,
1376
                                          current_indirect_level, last_op,
1377
                                          visited, record_accesses);
1378
  }
1379
}
1380
 
1381
typedef struct
1382
{
1383
  tree fn;
1384
  gimple stmt;
1385
} check_var_data;
1386
 
1387
/* A walk_tree function to go over the VAR_DECL, PARM_DECL nodes of
1388
   the malloc size expression and check that those aren't changed
1389
   over the function.  */
1390
static tree
1391
check_var_notmodified_p (tree * tp, int *walk_subtrees, void *data)
1392
{
1393
  basic_block bb;
1394
  tree t = *tp;
1395
  check_var_data *callback_data = (check_var_data*) data;
1396
  tree fn = callback_data->fn;
1397
  gimple_stmt_iterator gsi;
1398
  gimple stmt;
1399
 
1400
  if (TREE_CODE (t) != VAR_DECL && TREE_CODE (t) != PARM_DECL)
1401
    return NULL_TREE;
1402
 
1403
  FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (fn))
1404
  {
1405
    for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1406
      {
1407
        stmt = gsi_stmt (gsi);
1408
        if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
1409
          continue;
1410
        if (gimple_get_lhs (stmt) == t)
1411
          {
1412
            callback_data->stmt = stmt;
1413
            return t;
1414
          }
1415
      }
1416
  }
1417
  *walk_subtrees = 1;
1418
  return NULL_TREE;
1419
}
1420
 
1421
/* Go backwards in the use-def chains and find out the expression
1422
   represented by the possible SSA name in STMT, until it is composed
1423
   of only VAR_DECL, PARM_DECL and INT_CST.  In case of phi nodes
1424
   we make sure that all the arguments represent the same subexpression,
1425
   otherwise we fail.  */
1426
 
1427
static tree
1428
can_calculate_stmt_before_stmt (gimple stmt, sbitmap visited)
1429
{
1430
  tree op1, op2, res;
1431
  enum tree_code code;
1432
 
1433
  switch (gimple_code (stmt))
1434
    {
1435
    case GIMPLE_ASSIGN:
1436
      code = gimple_assign_rhs_code (stmt);
1437
      op1 = gimple_assign_rhs1 (stmt);
1438
 
1439
      switch (code)
1440
        {
1441
        case POINTER_PLUS_EXPR:
1442
        case PLUS_EXPR:
1443
        case MINUS_EXPR:
1444
        case MULT_EXPR:
1445
 
1446
          op2 = gimple_assign_rhs2 (stmt);
1447
          op1 = can_calculate_expr_before_stmt (op1, visited);
1448
          if (!op1)
1449
            return NULL_TREE;
1450
          op2 = can_calculate_expr_before_stmt (op2, visited);
1451
          if (op2)
1452
            return fold_build2 (code, gimple_expr_type (stmt), op1, op2);
1453
          return NULL_TREE;
1454
 
1455
        CASE_CONVERT:
1456
          res = can_calculate_expr_before_stmt (op1, visited);
1457
          if (res != NULL_TREE)
1458
            return build1 (code, gimple_expr_type (stmt), res);
1459
          else
1460
            return NULL_TREE;
1461
 
1462
        default:
1463
          if (gimple_assign_single_p (stmt))
1464
            return can_calculate_expr_before_stmt (op1, visited);
1465
          else
1466
            return NULL_TREE;
1467
        }
1468
 
1469
    case GIMPLE_PHI:
1470
      {
1471
        size_t j;
1472
 
1473
        res = NULL_TREE;
1474
        /* Make sure all the arguments represent the same value.  */
1475
        for (j = 0; j < gimple_phi_num_args (stmt); j++)
1476
          {
1477
            tree new_res;
1478
            tree def = PHI_ARG_DEF (stmt, j);
1479
 
1480
            new_res = can_calculate_expr_before_stmt (def, visited);
1481
            if (res == NULL_TREE)
1482
              res = new_res;
1483
            else if (!new_res || !expressions_equal_p (res, new_res))
1484
              return NULL_TREE;
1485
          }
1486
        return res;
1487
      }
1488
 
1489
    default:
1490
      return NULL_TREE;
1491
    }
1492
}
1493
 
1494
/* Go backwards in the use-def chains and find out the expression
1495
   represented by the possible SSA name in EXPR, until it is composed
1496
   of only VAR_DECL, PARM_DECL and INT_CST.  In case of phi nodes
1497
   we make sure that all the arguments represent the same subexpression,
1498
   otherwise we fail.  */
1499
static tree
1500
can_calculate_expr_before_stmt (tree expr, sbitmap visited)
1501
{
1502
  gimple def_stmt;
1503
  tree res;
1504
 
1505
  switch (TREE_CODE (expr))
1506
    {
1507
    case SSA_NAME:
1508
      /* Case of loop, we don't know to represent this expression.  */
1509
      if (TEST_BIT (visited, SSA_NAME_VERSION (expr)))
1510
        return NULL_TREE;
1511
 
1512
      SET_BIT (visited, SSA_NAME_VERSION (expr));
1513
      def_stmt = SSA_NAME_DEF_STMT (expr);
1514
      res = can_calculate_stmt_before_stmt (def_stmt, visited);
1515
      RESET_BIT (visited, SSA_NAME_VERSION (expr));
1516
      return res;
1517
    case VAR_DECL:
1518
    case PARM_DECL:
1519
    case INTEGER_CST:
1520
      return expr;
1521
 
1522
    default:
1523
      return NULL_TREE;
1524
    }
1525
}
1526
 
1527
/* There should be only one allocation function for the dimensions
1528
   that don't escape. Here we check the allocation sites in this
1529
   function. We must make sure that all the dimensions are allocated
1530
   using malloc and that the malloc size parameter expression could be
1531
   pre-calculated before the call to the malloc of dimension 0.
1532
 
1533
   Given a candidate matrix for flattening -- MI -- check if it's
1534
   appropriate for flattening -- we analyze the allocation
1535
   sites that we recorded in the previous analysis.  The result of the
1536
   analysis is a level of indirection (matrix dimension) in which the
1537
   flattening is safe.  We check the following conditions:
1538
   1. There is only one allocation site for each dimension.
1539
   2. The allocation sites of all the dimensions are in the same
1540
      function.
1541
      (The above two are being taken care of during the analysis when
1542
      we check the allocation site).
1543
   3. All the dimensions that we flatten are allocated at once; thus
1544
      the total size must be known before the allocation of the
1545
      dimension 0 (top level) -- we must make sure we represent the
1546
      size of the allocation as an expression of global parameters or
1547
      constants and that those doesn't change over the function.  */
1548
 
1549
static int
1550
check_allocation_function (void **slot, void *data ATTRIBUTE_UNUSED)
1551
{
1552
  int level;
1553
  struct matrix_info *mi = (struct matrix_info *) *slot;
1554
  sbitmap visited;
1555
 
1556
  if (!mi->malloc_for_level)
1557
    return 1;
1558
 
1559
  visited = sbitmap_alloc (num_ssa_names);
1560
 
1561
  /* Do nothing if the current function is not the allocation
1562
     function of MI.  */
1563
  if (mi->allocation_function_decl != current_function_decl
1564
      /* We aren't in the main allocation function yet.  */
1565
      || !mi->malloc_for_level[0])
1566
    return 1;
1567
 
1568
  for (level = 1; level < mi->max_malloced_level; level++)
1569
    if (!mi->malloc_for_level[level])
1570
      break;
1571
 
1572
  mark_min_matrix_escape_level (mi, level, NULL);
1573
 
1574
  /* Check if the expression of the size passed to malloc could be
1575
     pre-calculated before the malloc of level 0.  */
1576
  for (level = 1; level < mi->min_indirect_level_escape; level++)
1577
    {
1578
      gimple call_stmt;
1579
      tree size;
1580
      struct malloc_call_data mcd = {NULL, NULL_TREE, NULL_TREE};
1581
 
1582
      call_stmt = mi->malloc_for_level[level];
1583
 
1584
      /* Find the correct malloc information.  */
1585
      collect_data_for_malloc_call (call_stmt, &mcd);
1586
 
1587
      /* No need to check anticipation for constants.  */
1588
      if (TREE_CODE (mcd.size_var) == INTEGER_CST)
1589
        {
1590
          if (!mi->dimension_size)
1591
            {
1592
              mi->dimension_size =
1593
                (tree *) xcalloc (mi->min_indirect_level_escape,
1594
                                  sizeof (tree));
1595
              mi->dimension_size_orig =
1596
                (tree *) xcalloc (mi->min_indirect_level_escape,
1597
                                  sizeof (tree));
1598
            }
1599
          mi->dimension_size[level] = mcd.size_var;
1600
          mi->dimension_size_orig[level] = mcd.size_var;
1601
          continue;
1602
        }
1603
      /* ??? Here we should also add the way to calculate the size
1604
         expression not only know that it is anticipated.  */
1605
      sbitmap_zero (visited);
1606
      size = can_calculate_expr_before_stmt (mcd.size_var, visited);
1607
      if (size == NULL_TREE)
1608
        {
1609
          mark_min_matrix_escape_level (mi, level, call_stmt);
1610
          if (dump_file)
1611
            fprintf (dump_file,
1612
                     "Matrix %s: Cannot calculate the size of allocation, escaping at level %d\n",
1613
                     get_name (mi->decl), level);
1614
          break;
1615
        }
1616
      if (!mi->dimension_size)
1617
        {
1618
          mi->dimension_size =
1619
            (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1620
          mi->dimension_size_orig =
1621
            (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1622
        }
1623
      mi->dimension_size[level] = size;
1624
      mi->dimension_size_orig[level] = size;
1625
    }
1626
 
1627
  /* We don't need those anymore.  */
1628
  for (level = mi->min_indirect_level_escape;
1629
       level < mi->max_malloced_level; level++)
1630
    mi->malloc_for_level[level] = NULL;
1631
  return 1;
1632
}
1633
 
1634
/* Track all access and allocation sites.  */
1635
static void
1636
find_sites_in_func (bool record)
1637
{
1638
  sbitmap visited_stmts_1;
1639
 
1640
  gimple_stmt_iterator gsi;
1641
  gimple stmt;
1642
  basic_block bb;
1643
  struct matrix_info tmpmi, *mi;
1644
 
1645
  visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1646
 
1647
  FOR_EACH_BB (bb)
1648
  {
1649
    for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1650
      {
1651
        tree lhs;
1652
 
1653
        stmt = gsi_stmt (gsi);
1654
        lhs = gimple_get_lhs (stmt);
1655
        if (lhs != NULL_TREE
1656
            && TREE_CODE (lhs) == VAR_DECL)
1657
          {
1658
            tmpmi.decl = lhs;
1659
            if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg,
1660
                                                        &tmpmi)))
1661
              {
1662
                sbitmap_zero (visited_stmts_1);
1663
                analyze_matrix_allocation_site (mi, stmt, 0, visited_stmts_1);
1664
              }
1665
          }
1666
        if (is_gimple_assign (stmt)
1667
            && gimple_assign_single_p (stmt)
1668
            && TREE_CODE (lhs) == SSA_NAME
1669
            && TREE_CODE (gimple_assign_rhs1 (stmt)) == VAR_DECL)
1670
          {
1671
            tmpmi.decl = gimple_assign_rhs1 (stmt);
1672
            if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg,
1673
                                                        &tmpmi)))
1674
              {
1675
                sbitmap_zero (visited_stmts_1);
1676
                analyze_matrix_accesses (mi, lhs, 0,
1677
                                         false, visited_stmts_1, record);
1678
              }
1679
          }
1680
      }
1681
  }
1682
  sbitmap_free (visited_stmts_1);
1683
}
1684
 
1685
/* Traverse the use-def chains to see if there are matrices that
1686
   are passed through pointers and we cannot know how they are accessed.
1687
   For each SSA-name defined by a global variable of our interest,
1688
   we traverse the use-def chains of the SSA and follow the indirections,
1689
   and record in what level of indirection the use of the variable
1690
   escapes.  A use of a pointer escapes when it is passed to a function,
1691
   stored into memory or assigned (except in malloc and free calls).  */
1692
 
1693
static void
1694
record_all_accesses_in_func (void)
1695
{
1696
  unsigned i;
1697
  sbitmap visited_stmts_1;
1698
 
1699
  visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1700
 
1701
  for (i = 0; i < num_ssa_names; i++)
1702
    {
1703
      struct matrix_info tmpmi, *mi;
1704
      tree ssa_var = ssa_name (i);
1705
      tree rhs, lhs;
1706
 
1707
      if (!ssa_var
1708
          || !is_gimple_assign (SSA_NAME_DEF_STMT (ssa_var))
1709
          || !gimple_assign_single_p (SSA_NAME_DEF_STMT (ssa_var)))
1710
        continue;
1711
      rhs = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (ssa_var));
1712
      lhs = gimple_assign_lhs (SSA_NAME_DEF_STMT (ssa_var));
1713
      if (TREE_CODE (rhs) != VAR_DECL && TREE_CODE (lhs) != VAR_DECL)
1714
        continue;
1715
 
1716
      /* If the RHS is a matrix that we want to analyze, follow the def-use
1717
         chain for this SSA_VAR and check for escapes or apply the
1718
         flattening.  */
1719
      tmpmi.decl = rhs;
1720
      if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg, &tmpmi)))
1721
        {
1722
          /* This variable will track the visited PHI nodes, so we can limit
1723
             its size to the maximum number of SSA names.  */
1724
          sbitmap_zero (visited_stmts_1);
1725
          analyze_matrix_accesses (mi, ssa_var,
1726
                                   0, false, visited_stmts_1, true);
1727
 
1728
        }
1729
    }
1730
  sbitmap_free (visited_stmts_1);
1731
}
1732
 
1733
/* Used when we want to convert the expression: RESULT = something *
1734
   ORIG to RESULT = something * NEW_VAL. If ORIG and NEW_VAL are power
1735
   of 2, shift operations can be done, else division and
1736
   multiplication.  */
1737
 
1738
static tree
1739
compute_offset (HOST_WIDE_INT orig, HOST_WIDE_INT new_val, tree result)
1740
{
1741
 
1742
  int x, y;
1743
  tree result1, ratio, log, orig_tree, new_tree;
1744
 
1745
  x = exact_log2 (orig);
1746
  y = exact_log2 (new_val);
1747
 
1748
  if (x != -1 && y != -1)
1749
    {
1750
      if (x == y)
1751
        return result;
1752
      else if (x > y)
1753
        {
1754
          log = build_int_cst (TREE_TYPE (result), x - y);
1755
          result1 =
1756
            fold_build2 (LSHIFT_EXPR, TREE_TYPE (result), result, log);
1757
          return result1;
1758
        }
1759
      log = build_int_cst (TREE_TYPE (result), y - x);
1760
      result1 = fold_build2 (RSHIFT_EXPR, TREE_TYPE (result), result, log);
1761
 
1762
      return result1;
1763
    }
1764
  orig_tree = build_int_cst (TREE_TYPE (result), orig);
1765
  new_tree = build_int_cst (TREE_TYPE (result), new_val);
1766
  ratio = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (result), result, orig_tree);
1767
  result1 = fold_build2 (MULT_EXPR, TREE_TYPE (result), ratio, new_tree);
1768
 
1769
  return result1;
1770
}
1771
 
1772
 
1773
/* We know that we are allowed to perform matrix flattening (according to the
1774
   escape analysis), so we traverse the use-def chains of the SSA vars
1775
   defined by the global variables pointing to the matrices of our interest.
1776
   in each use of the SSA we calculate the offset from the base address
1777
   according to the following equation:
1778
 
1779
     a[I1][I2]...[Ik] , where D1..Dk is the length of each dimension and the
1780
     escaping level is m <= k, and a' is the new allocated matrix,
1781
     will be translated to :
1782
 
1783
       b[I(m+1)]...[Ik]
1784
 
1785
       where
1786
       b = a' + I1*D2...*Dm + I2*D3...Dm + ... + Im
1787
                                                      */
1788
 
1789
static int
1790
transform_access_sites (void **slot, void *data ATTRIBUTE_UNUSED)
1791
{
1792
  gimple_stmt_iterator gsi;
1793
  struct matrix_info *mi = (struct matrix_info *) *slot;
1794
  int min_escape_l = mi->min_indirect_level_escape;
1795
  struct access_site_info *acc_info;
1796
  enum tree_code code;
1797
  int i;
1798
 
1799
  if (min_escape_l < 2 || !mi->access_l)
1800
    return 1;
1801
  for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
1802
       i++)
1803
    {
1804
      /* This is possible because we collect the access sites before
1805
         we determine the final minimum indirection level.  */
1806
      if (acc_info->level >= min_escape_l)
1807
        {
1808
          free (acc_info);
1809
          continue;
1810
        }
1811
      if (acc_info->is_alloc)
1812
        {
1813
          if (acc_info->level >= 0 && gimple_bb (acc_info->stmt))
1814
            {
1815
              ssa_op_iter iter;
1816
              tree def;
1817
              gimple stmt = acc_info->stmt;
1818
              tree lhs;
1819
 
1820
              FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_DEF)
1821
                mark_sym_for_renaming (SSA_NAME_VAR (def));
1822
              gsi = gsi_for_stmt (stmt);
1823
              gcc_assert (is_gimple_assign (acc_info->stmt));
1824
              lhs = gimple_assign_lhs (acc_info->stmt);
1825
              if (TREE_CODE (lhs) == SSA_NAME
1826
                  && acc_info->level < min_escape_l - 1)
1827
                {
1828
                  imm_use_iterator imm_iter;
1829
                  use_operand_p use_p;
1830
                  gimple use_stmt;
1831
 
1832
                  FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
1833
                    FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1834
                  {
1835
                    tree rhs, tmp;
1836
                    gimple new_stmt;
1837
 
1838
                    gcc_assert (gimple_assign_rhs_code (acc_info->stmt)
1839
                                == MEM_REF);
1840
                    /* Emit convert statement to convert to type of use.  */
1841
                    tmp = create_tmp_var (TREE_TYPE (lhs), "new");
1842
                    add_referenced_var (tmp);
1843
                    rhs = gimple_assign_rhs1 (acc_info->stmt);
1844
                    rhs = fold_convert (TREE_TYPE (tmp),
1845
                                        TREE_OPERAND (rhs, 0));
1846
                    new_stmt = gimple_build_assign (tmp, rhs);
1847
                    tmp = make_ssa_name (tmp, new_stmt);
1848
                    gimple_assign_set_lhs (new_stmt, tmp);
1849
                    gsi = gsi_for_stmt (acc_info->stmt);
1850
                    gsi_insert_after (&gsi, new_stmt, GSI_SAME_STMT);
1851
                    SET_USE (use_p, tmp);
1852
                  }
1853
                }
1854
              if (acc_info->level < min_escape_l - 1)
1855
                gsi_remove (&gsi, true);
1856
            }
1857
          free (acc_info);
1858
          continue;
1859
        }
1860
      code = gimple_assign_rhs_code (acc_info->stmt);
1861
      if (code == MEM_REF
1862
          && acc_info->level < min_escape_l - 1)
1863
        {
1864
          /* Replace the MEM_REF with NOP (cast) usually we are casting
1865
             from "pointer to type" to "type".  */
1866
          tree t =
1867
            build1 (NOP_EXPR, TREE_TYPE (gimple_assign_rhs1 (acc_info->stmt)),
1868
                    TREE_OPERAND (gimple_assign_rhs1 (acc_info->stmt), 0));
1869
          gimple_assign_set_rhs_code (acc_info->stmt, NOP_EXPR);
1870
          gimple_assign_set_rhs1 (acc_info->stmt, t);
1871
        }
1872
      else if (code == POINTER_PLUS_EXPR
1873
               && acc_info->level < (min_escape_l))
1874
        {
1875
          imm_use_iterator imm_iter;
1876
          use_operand_p use_p;
1877
 
1878
          tree offset;
1879
          int k = acc_info->level;
1880
          tree num_elements, total_elements;
1881
          tree tmp1;
1882
          tree d_size = mi->dimension_size[k];
1883
 
1884
          /* We already make sure in the analysis that the first operand
1885
             is the base and the second is the offset.  */
1886
          offset = acc_info->offset;
1887
          if (mi->dim_map[k] == min_escape_l - 1)
1888
            {
1889
              if (!check_transpose_p || mi->is_transposed_p == false)
1890
                tmp1 = offset;
1891
              else
1892
                {
1893
                  tree new_offset;
1894
 
1895
                  new_offset =
1896
                    compute_offset (mi->dimension_type_size[min_escape_l],
1897
                                    mi->dimension_type_size[k + 1], offset);
1898
 
1899
                  total_elements = new_offset;
1900
                  if (new_offset != offset)
1901
                    {
1902
                      gsi = gsi_for_stmt (acc_info->stmt);
1903
                      tmp1 = force_gimple_operand_gsi (&gsi, total_elements,
1904
                                                       true, NULL,
1905
                                                       true, GSI_SAME_STMT);
1906
                    }
1907
                  else
1908
                    tmp1 = offset;
1909
                }
1910
            }
1911
          else
1912
            {
1913
              d_size = mi->dimension_size[mi->dim_map[k] + 1];
1914
              num_elements =
1915
                fold_build2 (MULT_EXPR, sizetype, fold_convert (sizetype, acc_info->index),
1916
                            fold_convert (sizetype, d_size));
1917
              add_referenced_var (d_size);
1918
              gsi = gsi_for_stmt (acc_info->stmt);
1919
              tmp1 = force_gimple_operand_gsi (&gsi, num_elements, true,
1920
                                               NULL, true, GSI_SAME_STMT);
1921
            }
1922
          /* Replace the offset if needed.  */
1923
          if (tmp1 != offset)
1924
            {
1925
              if (TREE_CODE (offset) == SSA_NAME)
1926
                {
1927
                  gimple use_stmt;
1928
 
1929
                  FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, offset)
1930
                    FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1931
                      if (use_stmt == acc_info->stmt)
1932
                        SET_USE (use_p, tmp1);
1933
                }
1934
              else
1935
                {
1936
                  gcc_assert (TREE_CODE (offset) == INTEGER_CST);
1937
                  gimple_assign_set_rhs2 (acc_info->stmt, tmp1);
1938
                  update_stmt (acc_info->stmt);
1939
                }
1940
            }
1941
        }
1942
      /* ??? meanwhile this happens because we record the same access
1943
         site more than once; we should be using a hash table to
1944
         avoid this and insert the STMT of the access site only
1945
         once.
1946
         else
1947
         gcc_unreachable (); */
1948
      free (acc_info);
1949
    }
1950
  VEC_free (access_site_info_p, heap, mi->access_l);
1951
 
1952
  update_ssa (TODO_update_ssa);
1953
#ifdef ENABLE_CHECKING
1954
  verify_ssa (true);
1955
#endif
1956
  return 1;
1957
}
1958
 
1959
/* Sort A array of counts. Arrange DIM_MAP to reflect the new order.  */
1960
 
1961
static void
1962
sort_dim_hot_level (gcov_type * a, int *dim_map, int n)
1963
{
1964
  int i, j, tmp1;
1965
  gcov_type tmp;
1966
 
1967
  for (i = 0; i < n - 1; i++)
1968
    {
1969
      for (j = 0; j < n - 1 - i; j++)
1970
        {
1971
          if (a[j + 1] < a[j])
1972
            {
1973
              tmp = a[j];       /* swap a[j] and a[j+1]      */
1974
              a[j] = a[j + 1];
1975
              a[j + 1] = tmp;
1976
              tmp1 = dim_map[j];
1977
              dim_map[j] = dim_map[j + 1];
1978
              dim_map[j + 1] = tmp1;
1979
            }
1980
        }
1981
    }
1982
}
1983
 
1984
/* Replace multiple mallocs (one for each dimension) to one malloc
1985
   with the size of DIM1*DIM2*...*DIMN*size_of_element
1986
   Make sure that we hold the size in the malloc site inside a
1987
   new global variable; this way we ensure that the size doesn't
1988
   change and it is accessible from all the other functions that
1989
   uses the matrix.  Also, the original calls to free are deleted,
1990
   and replaced by a new call to free the flattened matrix.  */
1991
 
1992
static int
1993
transform_allocation_sites (void **slot, void *data ATTRIBUTE_UNUSED)
1994
{
1995
  int i;
1996
  struct matrix_info *mi;
1997
  tree type, oldfn, prev_dim_size;
1998
  gimple call_stmt_0, use_stmt;
1999
  struct cgraph_node *c_node;
2000
  struct cgraph_edge *e;
2001
  gimple_stmt_iterator gsi;
2002
  struct malloc_call_data mcd = {NULL, NULL_TREE, NULL_TREE};
2003
  HOST_WIDE_INT element_size;
2004
 
2005
  imm_use_iterator imm_iter;
2006
  use_operand_p use_p;
2007
  tree old_size_0, tmp;
2008
  int min_escape_l;
2009
  int id;
2010
 
2011
  mi = (struct matrix_info *) *slot;
2012
 
2013
  min_escape_l = mi->min_indirect_level_escape;
2014
 
2015
  if (!mi->malloc_for_level)
2016
    mi->min_indirect_level_escape = 0;
2017
 
2018
  if (mi->min_indirect_level_escape < 2)
2019
    return 1;
2020
 
2021
  mi->dim_map = (int *) xcalloc (mi->min_indirect_level_escape, sizeof (int));
2022
  for (i = 0; i < mi->min_indirect_level_escape; i++)
2023
    mi->dim_map[i] = i;
2024
  if (check_transpose_p)
2025
    {
2026
      int i;
2027
 
2028
      if (dump_file)
2029
        {
2030
          fprintf (dump_file, "Matrix %s:\n", get_name (mi->decl));
2031
          for (i = 0; i < min_escape_l; i++)
2032
            {
2033
              fprintf (dump_file, "dim %d before sort ", i);
2034
              if (mi->dim_hot_level)
2035
                fprintf (dump_file,
2036
                         "count is  " HOST_WIDEST_INT_PRINT_DEC "  \n",
2037
                         mi->dim_hot_level[i]);
2038
            }
2039
        }
2040
      sort_dim_hot_level (mi->dim_hot_level, mi->dim_map,
2041
                          mi->min_indirect_level_escape);
2042
      if (dump_file)
2043
        for (i = 0; i < min_escape_l; i++)
2044
          {
2045
            fprintf (dump_file, "dim %d after sort\n", i);
2046
            if (mi->dim_hot_level)
2047
              fprintf (dump_file, "count is  " HOST_WIDE_INT_PRINT_DEC
2048
                       "  \n", (HOST_WIDE_INT) mi->dim_hot_level[i]);
2049
          }
2050
      for (i = 0; i < mi->min_indirect_level_escape; i++)
2051
        {
2052
          if (dump_file)
2053
            fprintf (dump_file, "dim_map[%d] after sort %d\n", i,
2054
                     mi->dim_map[i]);
2055
          if (mi->dim_map[i] != i)
2056
            {
2057
              if (dump_file)
2058
                fprintf (dump_file,
2059
                         "Transposed dimensions: dim %d is now dim %d\n",
2060
                         mi->dim_map[i], i);
2061
              mi->is_transposed_p = true;
2062
            }
2063
        }
2064
    }
2065
  else
2066
    {
2067
      for (i = 0; i < mi->min_indirect_level_escape; i++)
2068
        mi->dim_map[i] = i;
2069
    }
2070
  /* Call statement of allocation site of level 0.  */
2071
  call_stmt_0 = mi->malloc_for_level[0];
2072
 
2073
  /* Finds the correct malloc information.  */
2074
  collect_data_for_malloc_call (call_stmt_0, &mcd);
2075
 
2076
  mi->dimension_size[0] = mcd.size_var;
2077
  mi->dimension_size_orig[0] = mcd.size_var;
2078
  /* Make sure that the variables in the size expression for
2079
     all the dimensions (above level 0) aren't modified in
2080
     the allocation function.  */
2081
  for (i = 1; i < mi->min_indirect_level_escape; i++)
2082
    {
2083
      tree t;
2084
      check_var_data data;
2085
 
2086
      /* mi->dimension_size must contain the expression of the size calculated
2087
         in check_allocation_function.  */
2088
      gcc_assert (mi->dimension_size[i]);
2089
 
2090
      data.fn = mi->allocation_function_decl;
2091
      data.stmt = NULL;
2092
      t = walk_tree_without_duplicates (&(mi->dimension_size[i]),
2093
                                        check_var_notmodified_p,
2094
                                        &data);
2095
      if (t != NULL_TREE)
2096
        {
2097
          mark_min_matrix_escape_level (mi, i, data.stmt);
2098
          break;
2099
        }
2100
    }
2101
 
2102
  if (mi->min_indirect_level_escape < 2)
2103
    return 1;
2104
 
2105
  /* Since we should make sure that the size expression is available
2106
     before the call to malloc of level 0.  */
2107
  gsi = gsi_for_stmt (call_stmt_0);
2108
 
2109
  /* Find out the size of each dimension by looking at the malloc
2110
     sites and create a global variable to hold it.
2111
     We add the assignment to the global before the malloc of level 0.  */
2112
 
2113
  /* To be able to produce gimple temporaries.  */
2114
  oldfn = current_function_decl;
2115
  current_function_decl = mi->allocation_function_decl;
2116
  push_cfun (DECL_STRUCT_FUNCTION (mi->allocation_function_decl));
2117
 
2118
  /* Set the dimension sizes as follows:
2119
     DIM_SIZE[i] = DIM_SIZE[n] * ... * DIM_SIZE[i]
2120
     where n is the maximum non escaping level.  */
2121
  element_size = mi->dimension_type_size[mi->min_indirect_level_escape];
2122
  prev_dim_size = NULL_TREE;
2123
 
2124
  for (i = mi->min_indirect_level_escape - 1; i >= 0; i--)
2125
    {
2126
      tree dim_size, dim_var;
2127
      gimple stmt;
2128
      tree d_type_size;
2129
 
2130
      /* Now put the size expression in a global variable and initialize it to
2131
         the size expression before the malloc of level 0.  */
2132
      dim_var =
2133
        add_new_static_var (TREE_TYPE
2134
                            (mi->dimension_size_orig[mi->dim_map[i]]));
2135
      type = TREE_TYPE (mi->dimension_size_orig[mi->dim_map[i]]);
2136
 
2137
      /* DIM_SIZE = MALLOC_SIZE_PARAM / TYPE_SIZE.  */
2138
      /* Find which dim ID becomes dim I.  */
2139
      for (id = 0; id < mi->min_indirect_level_escape; id++)
2140
        if (mi->dim_map[id] == i)
2141
          break;
2142
       d_type_size =
2143
        build_int_cst (type, mi->dimension_type_size[id + 1]);
2144
      if (!prev_dim_size)
2145
        prev_dim_size = build_int_cst (type, element_size);
2146
      if (!check_transpose_p && i == mi->min_indirect_level_escape - 1)
2147
        {
2148
          dim_size = mi->dimension_size_orig[id];
2149
        }
2150
      else
2151
        {
2152
          dim_size =
2153
            fold_build2 (TRUNC_DIV_EXPR, type, mi->dimension_size_orig[id],
2154
                         d_type_size);
2155
 
2156
          dim_size = fold_build2 (MULT_EXPR, type, dim_size, prev_dim_size);
2157
        }
2158
      dim_size = force_gimple_operand_gsi (&gsi, dim_size, true, NULL,
2159
                                           true, GSI_SAME_STMT);
2160
      /* GLOBAL_HOLDING_THE_SIZE = DIM_SIZE.  */
2161
      stmt = gimple_build_assign (dim_var, dim_size);
2162
      mark_symbols_for_renaming (stmt);
2163
      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2164
 
2165
      prev_dim_size = mi->dimension_size[i] = dim_var;
2166
    }
2167
  update_ssa (TODO_update_ssa);
2168
  /* Replace the malloc size argument in the malloc of level 0 to be
2169
     the size of all the dimensions.  */
2170
  c_node = cgraph_get_node (mi->allocation_function_decl);
2171
  gcc_checking_assert (c_node);
2172
  old_size_0 = gimple_call_arg (call_stmt_0, 0);
2173
  tmp = force_gimple_operand_gsi (&gsi, mi->dimension_size[0], true,
2174
                                  NULL, true, GSI_SAME_STMT);
2175
  if (TREE_CODE (old_size_0) == SSA_NAME)
2176
    {
2177
      FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, old_size_0)
2178
        FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
2179
        if (use_stmt == call_stmt_0)
2180
        SET_USE (use_p, tmp);
2181
    }
2182
  /* When deleting the calls to malloc we need also to remove the edge from
2183
     the call graph to keep it consistent.  Notice that cgraph_edge may
2184
     create a new node in the call graph if there is no node for the given
2185
     declaration; this shouldn't be the case but currently there is no way to
2186
     check this outside of "cgraph.c".  */
2187
  for (i = 1; i < mi->min_indirect_level_escape; i++)
2188
    {
2189
      gimple_stmt_iterator gsi;
2190
 
2191
      gimple call_stmt = mi->malloc_for_level[i];
2192
      gcc_assert (is_gimple_call (call_stmt));
2193
      e = cgraph_edge (c_node, call_stmt);
2194
      gcc_assert (e);
2195
      cgraph_remove_edge (e);
2196
      gsi = gsi_for_stmt (call_stmt);
2197
      /* Remove the call stmt.  */
2198
      gsi_remove (&gsi, true);
2199
      /* Remove the assignment of the allocated area.  */
2200
      FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
2201
                             gimple_call_lhs (call_stmt))
2202
      {
2203
        gsi = gsi_for_stmt (use_stmt);
2204
        gsi_remove (&gsi, true);
2205
      }
2206
    }
2207
  update_ssa (TODO_update_ssa);
2208
#ifdef ENABLE_CHECKING
2209
  verify_ssa (true);
2210
#endif
2211
  /* Delete the calls to free.  */
2212
  for (i = 1; i < mi->min_indirect_level_escape; i++)
2213
    {
2214
      gimple_stmt_iterator gsi;
2215
 
2216
      /* ??? wonder why this case is possible but we failed on it once.  */
2217
      if (!mi->free_stmts[i].stmt)
2218
        continue;
2219
 
2220
      c_node = cgraph_get_node (mi->free_stmts[i].func);
2221
      gcc_checking_assert (c_node);
2222
      gcc_assert (is_gimple_call (mi->free_stmts[i].stmt));
2223
      e = cgraph_edge (c_node, mi->free_stmts[i].stmt);
2224
      gcc_assert (e);
2225
      cgraph_remove_edge (e);
2226
      current_function_decl = mi->free_stmts[i].func;
2227
      set_cfun (DECL_STRUCT_FUNCTION (mi->free_stmts[i].func));
2228
      gsi = gsi_for_stmt (mi->free_stmts[i].stmt);
2229
      gsi_remove (&gsi, true);
2230
    }
2231
  /* Return to the previous situation.  */
2232
  current_function_decl = oldfn;
2233
  pop_cfun ();
2234
  return 1;
2235
 
2236
}
2237
 
2238
 
2239
/* Print out the results of the escape analysis.  */
2240
static int
2241
dump_matrix_reorg_analysis (void **slot, void *data ATTRIBUTE_UNUSED)
2242
{
2243
  struct matrix_info *mi = (struct matrix_info *) *slot;
2244
 
2245
  if (!dump_file)
2246
    return 1;
2247
  fprintf (dump_file, "Matrix \"%s\"; Escaping Level: %d, Num Dims: %d,",
2248
           get_name (mi->decl), mi->min_indirect_level_escape, mi->num_dims);
2249
  fprintf (dump_file, " Malloc Dims: %d, ", mi->max_malloced_level);
2250
  fprintf (dump_file, "\n");
2251
  if (mi->min_indirect_level_escape >= 2)
2252
    fprintf (dump_file, "Flattened %d dimensions \n",
2253
             mi->min_indirect_level_escape);
2254
  return 1;
2255
}
2256
 
2257
/* Perform matrix flattening.  */
2258
 
2259
static unsigned int
2260
matrix_reorg (void)
2261
{
2262
  struct cgraph_node *node;
2263
 
2264
  if (profile_info)
2265
    check_transpose_p = true;
2266
  else
2267
    check_transpose_p = false;
2268
  /* If there are hand written vectors, we skip this optimization.  */
2269
  for (node = cgraph_nodes; node; node = node->next)
2270
    if (!may_flatten_matrices (node))
2271
      return 0;
2272
  matrices_to_reorg = htab_create (37, mtt_info_hash, mtt_info_eq, mat_free);
2273
  /* Find and record all potential matrices in the program.  */
2274
  find_matrices_decl ();
2275
  /* Analyze the accesses of the matrices (escaping analysis).  */
2276
  for (node = cgraph_nodes; node; node = node->next)
2277
    if (node->analyzed)
2278
      {
2279
        tree temp_fn;
2280
 
2281
        temp_fn = current_function_decl;
2282
        current_function_decl = node->decl;
2283
        push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2284
        bitmap_obstack_initialize (NULL);
2285
        gimple_register_cfg_hooks ();
2286
 
2287
        if (!gimple_in_ssa_p (cfun))
2288
          {
2289
            free_dominance_info (CDI_DOMINATORS);
2290
            free_dominance_info (CDI_POST_DOMINATORS);
2291
            pop_cfun ();
2292
            current_function_decl = temp_fn;
2293
            bitmap_obstack_release (NULL);
2294
 
2295
            return 0;
2296
          }
2297
 
2298
#ifdef ENABLE_CHECKING
2299
        verify_flow_info ();
2300
#endif
2301
 
2302
        if (!matrices_to_reorg)
2303
          {
2304
            free_dominance_info (CDI_DOMINATORS);
2305
            free_dominance_info (CDI_POST_DOMINATORS);
2306
            pop_cfun ();
2307
            current_function_decl = temp_fn;
2308
            bitmap_obstack_release (NULL);
2309
 
2310
            return 0;
2311
          }
2312
 
2313
        /* Create htap for phi nodes.  */
2314
        htab_mat_acc_phi_nodes = htab_create (37, mat_acc_phi_hash,
2315
                                              mat_acc_phi_eq, free);
2316
        if (!check_transpose_p)
2317
          find_sites_in_func (false);
2318
        else
2319
          {
2320
            find_sites_in_func (true);
2321
            loop_optimizer_init (LOOPS_NORMAL);
2322
            if (current_loops)
2323
              scev_initialize ();
2324
            htab_traverse (matrices_to_reorg, analyze_transpose, NULL);
2325
            if (current_loops)
2326
              {
2327
                scev_finalize ();
2328
                loop_optimizer_finalize ();
2329
                current_loops = NULL;
2330
              }
2331
          }
2332
        /* If the current function is the allocation function for any of
2333
           the matrices we check its allocation and the escaping level.  */
2334
        htab_traverse (matrices_to_reorg, check_allocation_function, NULL);
2335
        free_dominance_info (CDI_DOMINATORS);
2336
        free_dominance_info (CDI_POST_DOMINATORS);
2337
        pop_cfun ();
2338
        current_function_decl = temp_fn;
2339
        bitmap_obstack_release (NULL);
2340
      }
2341
  htab_traverse (matrices_to_reorg, transform_allocation_sites, NULL);
2342
  /* Now transform the accesses.  */
2343
  for (node = cgraph_nodes; node; node = node->next)
2344
    if (node->analyzed)
2345
      {
2346
        /* Remember that allocation sites have been handled.  */
2347
        tree temp_fn;
2348
 
2349
        temp_fn = current_function_decl;
2350
        current_function_decl = node->decl;
2351
        push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2352
        bitmap_obstack_initialize (NULL);
2353
        gimple_register_cfg_hooks ();
2354
        record_all_accesses_in_func ();
2355
        htab_traverse (matrices_to_reorg, transform_access_sites, NULL);
2356
        cgraph_rebuild_references ();
2357
        free_dominance_info (CDI_DOMINATORS);
2358
        free_dominance_info (CDI_POST_DOMINATORS);
2359
        pop_cfun ();
2360
        current_function_decl = temp_fn;
2361
        bitmap_obstack_release (NULL);
2362
      }
2363
  htab_traverse (matrices_to_reorg, dump_matrix_reorg_analysis, NULL);
2364
 
2365
  current_function_decl = NULL;
2366
  set_cfun (NULL);
2367
  matrices_to_reorg = NULL;
2368
  return 0;
2369
}
2370
 
2371
 
2372
/* The condition for matrix flattening to be performed.  */
2373
static bool
2374
gate_matrix_reorg (void)
2375
{
2376
  return flag_ipa_matrix_reorg && flag_whole_program;
2377
}
2378
 
2379
struct simple_ipa_opt_pass pass_ipa_matrix_reorg =
2380
{
2381
 {
2382
  SIMPLE_IPA_PASS,
2383
  "matrix-reorg",               /* name */
2384
  gate_matrix_reorg,            /* gate */
2385
  matrix_reorg,                 /* execute */
2386
  NULL,                         /* sub */
2387
  NULL,                         /* next */
2388
  0,                             /* static_pass_number */
2389
  TV_NONE,                      /* tv_id */
2390
  0,                             /* properties_required */
2391
  0,                             /* properties_provided */
2392
  0,                             /* properties_destroyed */
2393
  0,                             /* todo_flags_start */
2394
  TODO_dump_cgraph              /* todo_flags_finish */
2395
 }
2396
};

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.