OpenCores
URL https://opencores.org/ocsvn/open8_urisc/open8_urisc/trunk

Subversion Repositories open8_urisc

[/] [open8_urisc/] [trunk/] [gnu/] [binutils/] [gprof/] [cg_print.c] - Blame information for rev 25

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 25 khays
/* cg_print.c -  Print routines for displaying call graphs.
2
 
3
   Copyright 2000, 2001, 2002, 2004, 2007, 2009, 2011
4
   Free Software Foundation, Inc.
5
 
6
   This file is part of GNU Binutils.
7
 
8
   This program is free software; you can redistribute it and/or modify
9
   it under the terms of the GNU General Public License as published by
10
   the Free Software Foundation; either version 3 of the License, or
11
   (at your option) any later version.
12
 
13
   This program is distributed in the hope that it will be useful,
14
   but WITHOUT ANY WARRANTY; without even the implied warranty of
15
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
   GNU General Public License for more details.
17
 
18
   You should have received a copy of the GNU General Public License
19
   along with this program; if not, write to the Free Software
20
   Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
21
   02110-1301, USA.  */
22
 
23
#include "gprof.h"
24
#include "libiberty.h"
25
#include "filenames.h"
26
#include "search_list.h"
27
#include "source.h"
28
#include "symtab.h"
29
#include "cg_arcs.h"
30
#include "cg_print.h"
31
#include "hist.h"
32
#include "utils.h"
33
#include "corefile.h"
34
 
35
/* Return value of comparison functions used to sort tables.  */
36
#define LESSTHAN        -1
37
#define EQUALTO         0
38
#define GREATERTHAN     1
39
 
40
static void print_header (void);
41
static void print_cycle (Sym *);
42
static int cmp_member (Sym *, Sym *);
43
static void sort_members (Sym *);
44
static void print_members (Sym *);
45
static int cmp_arc (Arc *, Arc *);
46
static void sort_parents (Sym *);
47
static void print_parents (Sym *);
48
static void sort_children (Sym *);
49
static void print_children (Sym *);
50
static void print_line (Sym *);
51
static int cmp_name (const PTR, const PTR);
52
static int cmp_arc_count (const PTR, const PTR);
53
static int cmp_fun_nuses (const PTR, const PTR);
54
static void order_and_dump_functions_by_arcs
55
  (Arc **, unsigned long, int, Arc **, unsigned long *);
56
 
57
/* Declarations of automatically generated functions to output blurbs.  */
58
extern void bsd_callg_blurb (FILE * fp);
59
extern void fsf_callg_blurb (FILE * fp);
60
 
61
double print_time = 0.0;
62
 
63
 
64
static void
65
print_header ()
66
{
67
  if (first_output)
68
    first_output = FALSE;
69
  else
70
    printf ("\f\n");
71
 
72
  if (!bsd_style_output)
73
    {
74
      if (print_descriptions)
75
        printf (_("\t\t     Call graph (explanation follows)\n\n"));
76
      else
77
        printf (_("\t\t\tCall graph\n\n"));
78
    }
79
 
80
  printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
81
          (long) hist_scale * (long) sizeof (UNIT));
82
 
83
  if (print_time > 0.0)
84
    printf (_(" for %.2f%% of %.2f seconds\n\n"),
85
            100.0 / print_time, print_time / hz);
86
  else
87
    {
88
      printf (_(" no time propagated\n\n"));
89
 
90
      /* This doesn't hurt, since all the numerators will be 0.0.  */
91
      print_time = 1.0;
92
    }
93
 
94
  if (bsd_style_output)
95
    {
96
      printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s     %-8.8s\n",
97
              "", "", "", "", _("called"), _("total"), _("parents"));
98
      printf ("%-6.6s %5.5s %7.7s %11.11s %7.7s+%-7.7s %-8.8s\t%5.5s\n",
99
              _("index"),
100
              /* xgettext:no-c-format */
101
              _("%time"),
102
              _("self"), _("descendants"), _("called"), _("self"),
103
              _("name"), _("index"));
104
      printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s     %-8.8s\n",
105
              "", "", "", "", _("called"), _("total"), _("children"));
106
      printf ("\n");
107
    }
108
  else
109
    {
110
      printf (_("index %% time    self  children    called     name\n"));
111
    }
112
}
113
 
114
/* Print a cycle header.  */
115
 
116
static void
117
print_cycle (Sym *cyc)
118
{
119
  char buf[BUFSIZ];
120
 
121
  sprintf (buf, "[%d]", cyc->cg.index);
122
  printf (bsd_style_output
123
          ? "%-6.6s %5.1f %7.2f %11.2f %7lu"
124
          : "%-6.6s %5.1f %7.2f %7.2f %7lu", buf,
125
          100 * (cyc->cg.prop.self + cyc->cg.prop.child) / print_time,
126
          cyc->cg.prop.self / hz, cyc->cg.prop.child / hz, cyc->ncalls);
127
 
128
  if (cyc->cg.self_calls != 0)
129
    printf ("+%-7lu", cyc->cg.self_calls);
130
  else
131
    printf (" %7.7s", "");
132
 
133
  printf (_(" <cycle %d as a whole> [%d]\n"), cyc->cg.cyc.num, cyc->cg.index);
134
}
135
 
136
/* Compare LEFT and RIGHT membmer.  Major comparison key is
137
   CG.PROP.SELF+CG.PROP.CHILD, secondary key is NCALLS+CG.SELF_CALLS.  */
138
 
139
static int
140
cmp_member (Sym *left, Sym *right)
141
{
142
  double left_time = left->cg.prop.self + left->cg.prop.child;
143
  double right_time = right->cg.prop.self + right->cg.prop.child;
144
  unsigned long left_calls = left->ncalls + left->cg.self_calls;
145
  unsigned long right_calls = right->ncalls + right->cg.self_calls;
146
 
147
  if (left_time > right_time)
148
    return GREATERTHAN;
149
 
150
  if (left_time < right_time)
151
    return LESSTHAN;
152
 
153
  if (left_calls > right_calls)
154
    return GREATERTHAN;
155
 
156
  if (left_calls < right_calls)
157
    return LESSTHAN;
158
 
159
  return EQUALTO;
160
}
161
 
162
/* Sort members of a cycle.  */
163
 
164
static void
165
sort_members (Sym *cyc)
166
{
167
  Sym *todo, *doing, *prev;
168
 
169
  /* Detach cycle members from cyclehead,
170
     and insertion sort them back on.  */
171
  todo = cyc->cg.cyc.next;
172
  cyc->cg.cyc.next = 0;
173
 
174
  for (doing = todo; doing != NULL; doing = todo)
175
    {
176
      todo = doing->cg.cyc.next;
177
 
178
      for (prev = cyc; prev->cg.cyc.next; prev = prev->cg.cyc.next)
179
        {
180
          if (cmp_member (doing, prev->cg.cyc.next) == GREATERTHAN)
181
            break;
182
        }
183
 
184
      doing->cg.cyc.next = prev->cg.cyc.next;
185
      prev->cg.cyc.next = doing;
186
    }
187
}
188
 
189
/* Print the members of a cycle.  */
190
 
191
static void
192
print_members (Sym *cyc)
193
{
194
  Sym *member;
195
 
196
  sort_members (cyc);
197
 
198
  for (member = cyc->cg.cyc.next; member; member = member->cg.cyc.next)
199
    {
200
      printf (bsd_style_output
201
              ? "%6.6s %5.5s %7.2f %11.2f %7lu"
202
              : "%6.6s %5.5s %7.2f %7.2f %7lu",
203
              "", "", member->cg.prop.self / hz, member->cg.prop.child / hz,
204
              member->ncalls);
205
 
206
      if (member->cg.self_calls != 0)
207
        printf ("+%-7lu", member->cg.self_calls);
208
      else
209
        printf (" %7.7s", "");
210
 
211
      printf ("     ");
212
      print_name (member);
213
      printf ("\n");
214
    }
215
}
216
 
217
/* Compare two arcs to/from the same child/parent.
218
        - if one arc is a self arc, it's least.
219
        - if one arc is within a cycle, it's less than.
220
        - if both arcs are within a cycle, compare arc counts.
221
        - if neither arc is within a cycle, compare with
222
                time + child_time as major key
223
                arc count as minor key.  */
224
 
225
static int
226
cmp_arc (Arc *left, Arc *right)
227
{
228
  Sym *left_parent = left->parent;
229
  Sym *left_child = left->child;
230
  Sym *right_parent = right->parent;
231
  Sym *right_child = right->child;
232
  double left_time, right_time;
233
 
234
  DBG (TIMEDEBUG,
235
       printf ("[cmp_arc] ");
236
       print_name (left_parent);
237
       printf (" calls ");
238
       print_name (left_child);
239
       printf (" %f + %f %lu/%lu\n", left->time, left->child_time,
240
               left->count, left_child->ncalls);
241
       printf ("[cmp_arc] ");
242
       print_name (right_parent);
243
       printf (" calls ");
244
       print_name (right_child);
245
       printf (" %f + %f %lu/%lu\n", right->time, right->child_time,
246
               right->count, right_child->ncalls);
247
       printf ("\n");
248
    );
249
 
250
  if (left_parent == left_child)
251
    return LESSTHAN;            /* Left is a self call.  */
252
 
253
  if (right_parent == right_child)
254
    return GREATERTHAN;         /* Right is a self call.  */
255
 
256
  if (left_parent->cg.cyc.num != 0 && left_child->cg.cyc.num != 0
257
      && left_parent->cg.cyc.num == left_child->cg.cyc.num)
258
    {
259
      /* Left is a call within a cycle.  */
260
      if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
261
          && right_parent->cg.cyc.num == right_child->cg.cyc.num)
262
        {
263
          /* Right is a call within the cycle, too.  */
264
          if (left->count < right->count)
265
            return LESSTHAN;
266
 
267
          if (left->count > right->count)
268
            return GREATERTHAN;
269
 
270
          return EQUALTO;
271
        }
272
      else
273
        {
274
          /* Right isn't a call within the cycle.  */
275
          return LESSTHAN;
276
        }
277
    }
278
  else
279
    {
280
      /* Left isn't a call within a cycle.  */
281
      if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
282
          && right_parent->cg.cyc.num == right_child->cg.cyc.num)
283
        {
284
          /* Right is a call within a cycle.  */
285
          return GREATERTHAN;
286
        }
287
      else
288
        {
289
          /* Neither is a call within a cycle.  */
290
          left_time = left->time + left->child_time;
291
          right_time = right->time + right->child_time;
292
 
293
          if (left_time < right_time)
294
            return LESSTHAN;
295
 
296
          if (left_time > right_time)
297
            return GREATERTHAN;
298
 
299
          if (left->count < right->count)
300
            return LESSTHAN;
301
 
302
          if (left->count > right->count)
303
            return GREATERTHAN;
304
 
305
          return EQUALTO;
306
        }
307
    }
308
}
309
 
310
 
311
static void
312
sort_parents (Sym * child)
313
{
314
  Arc *arc, *detached, sorted, *prev;
315
 
316
  /* Unlink parents from child, then insertion sort back on to
317
     sorted's parents.
318
          *arc        the arc you have detached and are inserting.
319
          *detached   the rest of the arcs to be sorted.
320
          sorted      arc list onto which you insertion sort.
321
          *prev       arc before the arc you are comparing.  */
322
  sorted.next_parent = 0;
323
 
324
  for (arc = child->cg.parents; arc; arc = detached)
325
    {
326
      detached = arc->next_parent;
327
 
328
      /* Consider *arc as disconnected; insert it into sorted.  */
329
      for (prev = &sorted; prev->next_parent; prev = prev->next_parent)
330
        {
331
          if (cmp_arc (arc, prev->next_parent) != GREATERTHAN)
332
            break;
333
        }
334
 
335
      arc->next_parent = prev->next_parent;
336
      prev->next_parent = arc;
337
    }
338
 
339
  /* Reattach sorted arcs to child.  */
340
  child->cg.parents = sorted.next_parent;
341
}
342
 
343
 
344
static void
345
print_parents (Sym *child)
346
{
347
  Sym *parent;
348
  Arc *arc;
349
  Sym *cycle_head;
350
 
351
  if (child->cg.cyc.head != 0)
352
    cycle_head = child->cg.cyc.head;
353
  else
354
    cycle_head = child;
355
 
356
  if (!child->cg.parents)
357
    {
358
      printf (bsd_style_output
359
              ? _("%6.6s %5.5s %7.7s %11.11s %7.7s %7.7s     <spontaneous>\n")
360
              : _("%6.6s %5.5s %7.7s %7.7s %7.7s %7.7s     <spontaneous>\n"),
361
              "", "", "", "", "", "");
362
      return;
363
    }
364
 
365
  sort_parents (child);
366
 
367
  for (arc = child->cg.parents; arc; arc = arc->next_parent)
368
    {
369
      parent = arc->parent;
370
      if (child == parent || (child->cg.cyc.num != 0
371
                              && parent->cg.cyc.num == child->cg.cyc.num))
372
        {
373
          /* Selfcall or call among siblings.  */
374
          printf (bsd_style_output
375
                  ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s     "
376
                  : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s     ",
377
                  "", "", "", "",
378
                  arc->count, "");
379
          print_name (parent);
380
          printf ("\n");
381
        }
382
      else
383
        {
384
          /* Regular parent of child.  */
385
          printf (bsd_style_output
386
                  ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu     "
387
                  : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu     ",
388
                  "", "",
389
                  arc->time / hz, arc->child_time / hz,
390
                  arc->count, cycle_head->ncalls);
391
          print_name (parent);
392
          printf ("\n");
393
        }
394
    }
395
}
396
 
397
 
398
static void
399
sort_children (Sym *parent)
400
{
401
  Arc *arc, *detached, sorted, *prev;
402
 
403
  /* Unlink children from parent, then insertion sort back on to
404
     sorted's children.
405
          *arc        the arc you have detached and are inserting.
406
          *detached   the rest of the arcs to be sorted.
407
          sorted      arc list onto which you insertion sort.
408
          *prev       arc before the arc you are comparing.  */
409
  sorted.next_child = 0;
410
 
411
  for (arc = parent->cg.children; arc; arc = detached)
412
    {
413
      detached = arc->next_child;
414
 
415
      /* Consider *arc as disconnected; insert it into sorted.  */
416
      for (prev = &sorted; prev->next_child; prev = prev->next_child)
417
        {
418
          if (cmp_arc (arc, prev->next_child) != LESSTHAN)
419
            break;
420
        }
421
 
422
      arc->next_child = prev->next_child;
423
      prev->next_child = arc;
424
    }
425
 
426
  /* Reattach sorted children to parent.  */
427
  parent->cg.children = sorted.next_child;
428
}
429
 
430
 
431
static void
432
print_children (Sym *parent)
433
{
434
  Sym *child;
435
  Arc *arc;
436
 
437
  sort_children (parent);
438
  arc = parent->cg.children;
439
 
440
  for (arc = parent->cg.children; arc; arc = arc->next_child)
441
    {
442
      child = arc->child;
443
      if (child == parent || (child->cg.cyc.num != 0
444
                              && child->cg.cyc.num == parent->cg.cyc.num))
445
        {
446
          /* Self call or call to sibling.  */
447
          printf (bsd_style_output
448
                  ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s     "
449
                  : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s     ",
450
                  "", "", "", "", arc->count, "");
451
          print_name (child);
452
          printf ("\n");
453
        }
454
      else
455
        {
456
          /* Regular child of parent.  */
457
          printf (bsd_style_output
458
                  ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu     "
459
                  : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu     ",
460
                  "", "",
461
                  arc->time / hz, arc->child_time / hz,
462
                  arc->count, child->cg.cyc.head->ncalls);
463
          print_name (child);
464
          printf ("\n");
465
        }
466
    }
467
}
468
 
469
 
470
static void
471
print_line (Sym *np)
472
{
473
  char buf[BUFSIZ];
474
 
475
  sprintf (buf, "[%d]", np->cg.index);
476
  printf (bsd_style_output
477
          ? "%-6.6s %5.1f %7.2f %11.2f"
478
          : "%-6.6s %5.1f %7.2f %7.2f", buf,
479
          100 * (np->cg.prop.self + np->cg.prop.child) / print_time,
480
          np->cg.prop.self / hz, np->cg.prop.child / hz);
481
 
482
  if ((np->ncalls + np->cg.self_calls) != 0)
483
    {
484
      printf (" %7lu", np->ncalls);
485
 
486
      if (np->cg.self_calls != 0)
487
          printf ("+%-7lu ", np->cg.self_calls);
488
      else
489
          printf (" %7.7s ", "");
490
    }
491
  else
492
    {
493
      printf (" %7.7s %7.7s ", "", "");
494
    }
495
 
496
  print_name (np);
497
  printf ("\n");
498
}
499
 
500
 
501
/* Print dynamic call graph.  */
502
 
503
void
504
cg_print (Sym ** timesortsym)
505
{
506
  unsigned int sym_index;
507
  Sym *parent;
508
 
509
  if (print_descriptions && bsd_style_output)
510
    bsd_callg_blurb (stdout);
511
 
512
  print_header ();
513
 
514
  for (sym_index = 0; sym_index < symtab.len + num_cycles; ++sym_index)
515
    {
516
      parent = timesortsym[sym_index];
517
 
518
      if ((ignore_zeros && parent->ncalls == 0
519
           && parent->cg.self_calls == 0 && parent->cg.prop.self == 0
520
           && parent->cg.prop.child == 0)
521
          || !parent->cg.print_flag
522
          || (line_granularity && ! parent->is_func))
523
        continue;
524
 
525
      if (!parent->name && parent->cg.cyc.num != 0)
526
        {
527
          /* Cycle header.  */
528
          print_cycle (parent);
529
          print_members (parent);
530
        }
531
      else
532
        {
533
          print_parents (parent);
534
          print_line (parent);
535
          print_children (parent);
536
        }
537
 
538
      if (bsd_style_output)
539
        printf ("\n");
540
 
541
      printf ("-----------------------------------------------\n");
542
 
543
      if (bsd_style_output)
544
        printf ("\n");
545
    }
546
 
547
  free (timesortsym);
548
 
549
  if (print_descriptions && !bsd_style_output)
550
    fsf_callg_blurb (stdout);
551
}
552
 
553
 
554
static int
555
cmp_name (const PTR left, const PTR right)
556
{
557
  const Sym **npp1 = (const Sym **) left;
558
  const Sym **npp2 = (const Sym **) right;
559
 
560
  return strcmp ((*npp1)->name, (*npp2)->name);
561
}
562
 
563
 
564
void
565
cg_print_index ()
566
{
567
  unsigned int sym_index;
568
  unsigned int nnames, todo, i, j;
569
  int col, starting_col;
570
  Sym **name_sorted_syms, *sym;
571
  const char *filename;
572
  char buf[20];
573
  int column_width = (output_width - 1) / 3;    /* Don't write in last col!  */
574
 
575
  /* Now, sort regular function name
576
     alphabetically to create an index.  */
577
  name_sorted_syms = (Sym **) xmalloc ((symtab.len + num_cycles) * sizeof (Sym *));
578
 
579
  for (sym_index = 0, nnames = 0; sym_index < symtab.len; sym_index++)
580
    {
581
      if (ignore_zeros && symtab.base[sym_index].ncalls == 0
582
          && symtab.base[sym_index].hist.time == 0)
583
        continue;
584
 
585
      name_sorted_syms[nnames++] = &symtab.base[sym_index];
586
    }
587
 
588
  qsort (name_sorted_syms, nnames, sizeof (Sym *), cmp_name);
589
 
590
  for (sym_index = 1, todo = nnames; sym_index <= num_cycles; sym_index++)
591
    name_sorted_syms[todo++] = &cycle_header[sym_index];
592
 
593
  printf ("\f\n");
594
  printf (_("Index by function name\n\n"));
595
  sym_index = (todo + 2) / 3;
596
 
597
  for (i = 0; i < sym_index; i++)
598
    {
599
      col = 0;
600
      starting_col = 0;
601
 
602
      for (j = i; j < todo; j += sym_index)
603
        {
604
          sym = name_sorted_syms[j];
605
 
606
          if (sym->cg.print_flag)
607
            sprintf (buf, "[%d]", sym->cg.index);
608
          else
609
            sprintf (buf, "(%d)", sym->cg.index);
610
 
611
          if (j < nnames)
612
            {
613
              if (bsd_style_output)
614
                {
615
                  printf ("%6.6s %-19.19s", buf, sym->name);
616
                }
617
              else
618
                {
619
                  col += strlen (buf);
620
 
621
                  for (; col < starting_col + 5; ++col)
622
                    putchar (' ');
623
 
624
                  printf (" %s ", buf);
625
                  col += print_name_only (sym);
626
 
627
                  if (!line_granularity && sym->is_static && sym->file)
628
                    {
629
                      filename = sym->file->name;
630
 
631
                      if (!print_path)
632
                        {
633
                          filename = strrchr (filename, '/');
634
 
635
                          if (filename)
636
                            ++filename;
637
                          else
638
                            filename = sym->file->name;
639
                        }
640
 
641
                      printf (" (%s)", filename);
642
                      col += strlen (filename) + 3;
643
                    }
644
                }
645
            }
646
          else
647
            {
648
              if (bsd_style_output)
649
                {
650
                  printf ("%6.6s ", buf);
651
                  sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
652
                  printf ("%-19.19s", buf);
653
                }
654
              else
655
                {
656
                  col += strlen (buf);
657
                  for (; col < starting_col + 5; ++col)
658
                    putchar (' ');
659
                  printf (" %s ", buf);
660
                  sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
661
                  printf ("%s", buf);
662
                  col += strlen (buf);
663
                }
664
            }
665
 
666
          starting_col += column_width;
667
        }
668
 
669
      printf ("\n");
670
    }
671
 
672
  free (name_sorted_syms);
673
}
674
 
675
/* Compare two arcs based on their usage counts.
676
   We want to sort in descending order.  */
677
 
678
static int
679
cmp_arc_count (const PTR left, const PTR right)
680
{
681
  const Arc **npp1 = (const Arc **) left;
682
  const Arc **npp2 = (const Arc **) right;
683
 
684
  if ((*npp1)->count > (*npp2)->count)
685
    return -1;
686
  else if ((*npp1)->count < (*npp2)->count)
687
    return 1;
688
  else
689
    return 0;
690
}
691
 
692
/* Compare two funtions based on their usage counts.
693
   We want to sort in descending order.  */
694
 
695
static int
696
cmp_fun_nuses (const PTR left, const PTR right)
697
{
698
  const Sym **npp1 = (const Sym **) left;
699
  const Sym **npp2 = (const Sym **) right;
700
 
701
  if ((*npp1)->nuses > (*npp2)->nuses)
702
    return -1;
703
  else if ((*npp1)->nuses < (*npp2)->nuses)
704
    return 1;
705
  else
706
    return 0;
707
}
708
 
709
/* Print a suggested function ordering based on the profiling data.
710
 
711
   We perform 4 major steps when ordering functions:
712
 
713
        * Group unused functions together and place them at the
714
        end of the function order.
715
 
716
        * Search the highest use arcs (those which account for 90% of
717
        the total arc count) for functions which have several parents.
718
 
719
        Group those with the most call sites together (currently the
720
        top 1.25% which have at least five different call sites).
721
 
722
        These are emitted at the start of the function order.
723
 
724
        * Use a greedy placement algorithm to place functions which
725
        occur in the top 99% of the arcs in the profile.  Some provisions
726
        are made to handle high usage arcs where the parent and/or
727
        child has already been placed.
728
 
729
        * Run the same greedy placement algorithm on the remaining
730
        arcs to place the leftover functions.
731
 
732
 
733
   The various "magic numbers" should (one day) be tuneable by command
734
   line options.  They were arrived at by benchmarking a few applications
735
   with various values to see which values produced better overall function
736
   orderings.
737
 
738
   Of course, profiling errors, machine limitations (PA long calls), and
739
   poor cutoff values for the placement algorithm may limit the usefullness
740
   of the resulting function order.  Improvements would be greatly appreciated.
741
 
742
   Suggestions:
743
 
744
        * Place the functions with many callers near the middle of the
745
        list to reduce long calls.
746
 
747
        * Propagate arc usage changes as functions are placed.  Ie if
748
        func1 and func2 are placed together, arcs to/from those arcs
749
        to the same parent/child should be combined, then resort the
750
        arcs to choose the next one.
751
 
752
        * Implement some global positioning algorithm to place the
753
        chains made by the greedy local positioning algorithm.  Probably
754
        by examining arcs which haven't been placed yet to tie two
755
        chains together.
756
 
757
        * Take a function's size and time into account in the algorithm;
758
        size in particular is important on the PA (long calls).  Placing
759
        many small functions onto their own page may be wise.
760
 
761
        * Use better profiling information; many published algorithms
762
        are based on call sequences through time, rather than just
763
        arc counts.
764
 
765
        * Prodecure cloning could improve performance when a small number
766
        of arcs account for most of the calls to a particular function.
767
 
768
        * Use relocation information to avoid moving unused functions
769
        completely out of the code stream; this would avoid severe lossage
770
        when the profile data bears little resemblance to actual runs.
771
 
772
        * Propagation of arc usages should also improve .o link line
773
        ordering which shares the same arc placement algorithm with
774
        the function ordering code (in fact it is a degenerate case
775
        of function ordering).  */
776
 
777
void
778
cg_print_function_ordering (void)
779
{
780
  unsigned long sym_index;
781
  unsigned long arc_index;
782
  unsigned long used, unused, scratch_index;
783
  unsigned long  unplaced_arc_count, high_arc_count, scratch_arc_count;
784
#ifdef __GNUC__
785
  unsigned long long total_arcs, tmp_arcs_count;
786
#else
787
  unsigned long total_arcs, tmp_arcs_count;
788
#endif
789
  Sym **unused_syms, **used_syms, **scratch_syms;
790
  Arc **unplaced_arcs, **high_arcs, **scratch_arcs;
791
 
792
  sym_index = 0;
793
  used = 0;
794
  unused = 0;
795
  scratch_index = 0;
796
  unplaced_arc_count = 0;
797
  high_arc_count = 0;
798
  scratch_arc_count = 0;
799
 
800
  /* First group all the unused functions together.  */
801
  unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
802
  used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
803
  scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
804
  high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
805
  scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
806
  unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
807
 
808
  /* Walk through all the functions; mark those which are never
809
     called as placed (we'll emit them as a group later).  */
810
  for (sym_index = 0, used = 0, unused = 0; sym_index < symtab.len; sym_index++)
811
    {
812
      if (symtab.base[sym_index].ncalls == 0)
813
        {
814
          unused_syms[unused++] = &symtab.base[sym_index];
815
          symtab.base[sym_index].has_been_placed = 1;
816
        }
817
      else
818
        {
819
          used_syms[used++] = &symtab.base[sym_index];
820
          symtab.base[sym_index].has_been_placed = 0;
821
          symtab.base[sym_index].next = 0;
822
          symtab.base[sym_index].prev = 0;
823
          symtab.base[sym_index].nuses = 0;
824
        }
825
    }
826
 
827
  /* Sort the arcs from most used to least used.  */
828
  qsort (arcs, numarcs, sizeof (Arc *), cmp_arc_count);
829
 
830
  /* Compute the total arc count.  Also mark arcs as unplaced.
831
 
832
     Note we don't compensate for overflow if that happens!
833
     Overflow is much less likely when this file is compiled
834
     with GCC as it can double-wide integers via long long.  */
835
  total_arcs = 0;
836
  for (arc_index = 0; arc_index < numarcs; arc_index++)
837
    {
838
      total_arcs += arcs[arc_index]->count;
839
      arcs[arc_index]->has_been_placed = 0;
840
    }
841
 
842
  /* We want to pull out those functions which are referenced
843
     by many highly used arcs and emit them as a group.  This
844
     could probably use some tuning.  */
845
  tmp_arcs_count = 0;
846
  for (arc_index = 0; arc_index < numarcs; arc_index++)
847
    {
848
      tmp_arcs_count += arcs[arc_index]->count;
849
 
850
      /* Count how many times each parent and child are used up
851
         to our threshhold of arcs (90%).  */
852
      if ((double)tmp_arcs_count / (double)total_arcs > 0.90)
853
        break;
854
 
855
      arcs[arc_index]->child->nuses++;
856
    }
857
 
858
  /* Now sort a temporary symbol table based on the number of
859
     times each function was used in the highest used arcs.  */
860
  memcpy (scratch_syms, used_syms, used * sizeof (Sym *));
861
  qsort (scratch_syms, used, sizeof (Sym *), cmp_fun_nuses);
862
 
863
  /* Now pick out those symbols we're going to emit as
864
     a group.  We take up to 1.25% of the used symbols.  */
865
  for (sym_index = 0; sym_index < used / 80; sym_index++)
866
    {
867
      Sym *sym = scratch_syms[sym_index];
868
      Arc *arc;
869
 
870
      /* If we hit symbols that aren't used from many call sites,
871
         then we can quit.  We choose five as the low limit for
872
         no particular reason.  */
873
      if (sym->nuses == 5)
874
        break;
875
 
876
      /* We're going to need the arcs between these functions.
877
         Unfortunately, we don't know all these functions
878
         until we're done.  So we keep track of all the arcs
879
         to the functions we care about, then prune out those
880
         which are uninteresting.
881
 
882
         An interesting variation would be to quit when we found
883
         multi-call site functions which account for some percentage
884
         of the arcs.  */
885
      arc = sym->cg.children;
886
 
887
      while (arc)
888
        {
889
          if (arc->parent != arc->child)
890
            scratch_arcs[scratch_arc_count++] = arc;
891
          arc->has_been_placed = 1;
892
          arc = arc->next_child;
893
        }
894
 
895
      arc = sym->cg.parents;
896
 
897
      while (arc)
898
        {
899
          if (arc->parent != arc->child)
900
            scratch_arcs[scratch_arc_count++] = arc;
901
          arc->has_been_placed = 1;
902
          arc = arc->next_parent;
903
        }
904
 
905
      /* Keep track of how many symbols we're going to place.  */
906
      scratch_index = sym_index;
907
 
908
      /* A lie, but it makes identifying
909
         these functions easier later.  */
910
      sym->has_been_placed = 1;
911
    }
912
 
913
  /* Now walk through the temporary arcs and copy
914
     those we care about into the high arcs array.  */
915
  for (arc_index = 0; arc_index < scratch_arc_count; arc_index++)
916
    {
917
      Arc *arc = scratch_arcs[arc_index];
918
 
919
      /* If this arc refers to highly used functions, then
920
         then we want to keep it.  */
921
      if (arc->child->has_been_placed
922
          && arc->parent->has_been_placed)
923
        {
924
          high_arcs[high_arc_count++] = scratch_arcs[arc_index];
925
 
926
          /* We need to turn of has_been_placed since we're going to
927
             use the main arc placement algorithm on these arcs.  */
928
          arc->child->has_been_placed = 0;
929
          arc->parent->has_been_placed = 0;
930
        }
931
    }
932
 
933
  /* Dump the multi-site high usage functions which are not
934
     going to be ordered by the main ordering algorithm.  */
935
  for (sym_index = 0; sym_index < scratch_index; sym_index++)
936
    {
937
      if (scratch_syms[sym_index]->has_been_placed)
938
        printf ("%s\n", scratch_syms[sym_index]->name);
939
    }
940
 
941
  /* Now we can order the multi-site high use
942
     functions based on the arcs between them.  */
943
  qsort (high_arcs, high_arc_count, sizeof (Arc *), cmp_arc_count);
944
  order_and_dump_functions_by_arcs (high_arcs, high_arc_count, 1,
945
                                    unplaced_arcs, &unplaced_arc_count);
946
 
947
  /* Order and dump the high use functions left,
948
     these typically have only a few call sites.  */
949
  order_and_dump_functions_by_arcs (arcs, numarcs, 0,
950
                                    unplaced_arcs, &unplaced_arc_count);
951
 
952
  /* Now place the rarely used functions.  */
953
  order_and_dump_functions_by_arcs (unplaced_arcs, unplaced_arc_count, 1,
954
                                    scratch_arcs, &scratch_arc_count);
955
 
956
  /* Output any functions not emitted by the order_and_dump calls.  */
957
  for (sym_index = 0; sym_index < used; sym_index++)
958
    if (used_syms[sym_index]->has_been_placed == 0)
959
      printf("%s\n", used_syms[sym_index]->name);
960
 
961
  /* Output the unused functions.  */
962
  for (sym_index = 0; sym_index < unused; sym_index++)
963
    printf("%s\n", unused_syms[sym_index]->name);
964
 
965
  unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
966
  used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
967
  scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
968
  high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
969
  scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
970
  unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
971
 
972
  free (unused_syms);
973
  free (used_syms);
974
  free (scratch_syms);
975
  free (high_arcs);
976
  free (scratch_arcs);
977
  free (unplaced_arcs);
978
}
979
 
980
/* Place functions based on the arcs in THE_ARCS with ARC_COUNT entries;
981
   place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT.
982
 
983
   If ALL is nonzero, then place all functions referenced by THE_ARCS,
984
   else only place those referenced in the top 99% of the arcs in THE_ARCS.  */
985
 
986
#define MOST 0.99
987
static void
988
order_and_dump_functions_by_arcs (the_arcs, arc_count, all,
989
                                  unplaced_arcs, unplaced_arc_count)
990
     Arc **the_arcs;
991
     unsigned long arc_count;
992
     int all;
993
     Arc **unplaced_arcs;
994
     unsigned long *unplaced_arc_count;
995
{
996
#ifdef __GNUC__
997
  unsigned long long tmp_arcs, total_arcs;
998
#else
999
  unsigned long tmp_arcs, total_arcs;
1000
#endif
1001
  unsigned int arc_index;
1002
 
1003
  /* If needed, compute the total arc count.
1004
 
1005
     Note we don't compensate for overflow if that happens!  */
1006
  if (! all)
1007
    {
1008
      total_arcs = 0;
1009
      for (arc_index = 0; arc_index < arc_count; arc_index++)
1010
        total_arcs += the_arcs[arc_index]->count;
1011
    }
1012
  else
1013
    total_arcs = 0;
1014
 
1015
  tmp_arcs = 0;
1016
 
1017
  for (arc_index = 0; arc_index < arc_count; arc_index++)
1018
    {
1019
      Sym *sym1, *sym2;
1020
      Sym *child, *parent;
1021
 
1022
      tmp_arcs += the_arcs[arc_index]->count;
1023
 
1024
      /* Ignore this arc if it's already been placed.  */
1025
      if (the_arcs[arc_index]->has_been_placed)
1026
        continue;
1027
 
1028
      child = the_arcs[arc_index]->child;
1029
      parent = the_arcs[arc_index]->parent;
1030
 
1031
      /* If we're not using all arcs, and this is a rarely used
1032
         arc, then put it on the unplaced_arc list.  Similarly
1033
         if both the parent and child of this arc have been placed.  */
1034
      if ((! all && (double)tmp_arcs / (double)total_arcs > MOST)
1035
          || child->has_been_placed || parent->has_been_placed)
1036
        {
1037
          unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1038
          continue;
1039
        }
1040
 
1041
      /* If all slots in the parent and child are full, then there isn't
1042
         anything we can do right now.  We'll place this arc on the
1043
         unplaced arc list in the hope that a global positioning
1044
         algorithm can use it to place function chains.  */
1045
      if (parent->next && parent->prev && child->next && child->prev)
1046
        {
1047
          unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1048
          continue;
1049
        }
1050
 
1051
      /* If the parent is unattached, then find the closest
1052
         place to attach it onto child's chain.   Similarly
1053
         for the opposite case.  */
1054
      if (!parent->next && !parent->prev)
1055
        {
1056
          int next_count = 0;
1057
          int prev_count = 0;
1058
          Sym *prev = child;
1059
          Sym *next = child;
1060
 
1061
          /* Walk to the beginning and end of the child's chain.  */
1062
          while (next->next)
1063
            {
1064
              next = next->next;
1065
              next_count++;
1066
            }
1067
 
1068
          while (prev->prev)
1069
            {
1070
              prev = prev->prev;
1071
              prev_count++;
1072
            }
1073
 
1074
          /* Choose the closest.  */
1075
          child = next_count < prev_count ? next : prev;
1076
        }
1077
      else if (! child->next && !child->prev)
1078
        {
1079
          int next_count = 0;
1080
          int prev_count = 0;
1081
          Sym *prev = parent;
1082
          Sym *next = parent;
1083
 
1084
          while (next->next)
1085
            {
1086
              next = next->next;
1087
              next_count++;
1088
            }
1089
 
1090
          while (prev->prev)
1091
            {
1092
              prev = prev->prev;
1093
              prev_count++;
1094
            }
1095
 
1096
          parent = prev_count < next_count ? prev : next;
1097
        }
1098
      else
1099
        {
1100
          /* Couldn't find anywhere to attach the functions,
1101
             put the arc on the unplaced arc list.  */
1102
          unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1103
          continue;
1104
        }
1105
 
1106
      /* Make sure we don't tie two ends together.  */
1107
      sym1 = parent;
1108
      if (sym1->next)
1109
        while (sym1->next)
1110
          sym1 = sym1->next;
1111
      else
1112
        while (sym1->prev)
1113
          sym1 = sym1->prev;
1114
 
1115
      sym2 = child;
1116
      if (sym2->next)
1117
        while (sym2->next)
1118
          sym2 = sym2->next;
1119
      else
1120
        while (sym2->prev)
1121
          sym2 = sym2->prev;
1122
 
1123
      if (sym1 == child
1124
          && sym2 == parent)
1125
        {
1126
          /* This would tie two ends together.  */
1127
          unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1128
          continue;
1129
        }
1130
 
1131
      if (parent->next)
1132
        {
1133
          /* Must attach to the parent's prev field.  */
1134
          if (! child->next)
1135
            {
1136
              /* parent-prev and child-next */
1137
              parent->prev = child;
1138
              child->next = parent;
1139
              the_arcs[arc_index]->has_been_placed = 1;
1140
            }
1141
        }
1142
      else if (parent->prev)
1143
        {
1144
          /* Must attach to the parent's next field.  */
1145
          if (! child->prev)
1146
            {
1147
              /* parent-next and child-prev */
1148
              parent->next = child;
1149
              child->prev = parent;
1150
              the_arcs[arc_index]->has_been_placed = 1;
1151
            }
1152
        }
1153
      else
1154
        {
1155
          /* Can attach to either field in the parent, depends
1156
             on where we've got space in the child.  */
1157
          if (child->prev)
1158
            {
1159
              /* parent-prev and child-next.  */
1160
              parent->prev = child;
1161
              child->next = parent;
1162
              the_arcs[arc_index]->has_been_placed = 1;
1163
            }
1164
          else
1165
            {
1166
              /* parent-next and child-prev.  */
1167
              parent->next = child;
1168
              child->prev = parent;
1169
              the_arcs[arc_index]->has_been_placed = 1;
1170
            }
1171
        }
1172
    }
1173
 
1174
  /* Dump the chains of functions we've made.  */
1175
  for (arc_index = 0; arc_index < arc_count; arc_index++)
1176
    {
1177
      Sym *sym;
1178
      if (the_arcs[arc_index]->parent->has_been_placed
1179
          || the_arcs[arc_index]->child->has_been_placed)
1180
        continue;
1181
 
1182
      sym = the_arcs[arc_index]->parent;
1183
 
1184
      /* If this symbol isn't attached to any other
1185
         symbols, then we've got a rarely used arc.
1186
 
1187
         Skip it for now, we'll deal with them later.  */
1188
      if (sym->next == NULL
1189
          && sym->prev == NULL)
1190
        continue;
1191
 
1192
      /* Get to the start of this chain.  */
1193
      while (sym->prev)
1194
        sym = sym->prev;
1195
 
1196
      while (sym)
1197
        {
1198
          /* Mark it as placed.  */
1199
          sym->has_been_placed = 1;
1200
          printf ("%s\n", sym->name);
1201
          sym = sym->next;
1202
        }
1203
    }
1204
 
1205
  /* If we want to place all the arcs, then output
1206
     those which weren't placed by the main algorithm.  */
1207
  if (all)
1208
    for (arc_index = 0; arc_index < arc_count; arc_index++)
1209
      {
1210
        Sym *sym;
1211
        if (the_arcs[arc_index]->parent->has_been_placed
1212
            || the_arcs[arc_index]->child->has_been_placed)
1213
          continue;
1214
 
1215
        sym = the_arcs[arc_index]->parent;
1216
 
1217
        sym->has_been_placed = 1;
1218
        printf ("%s\n", sym->name);
1219
      }
1220
}
1221
 
1222
/* Compare two function_map structs based on file name.
1223
   We want to sort in ascending order.  */
1224
 
1225
static int
1226
cmp_symbol_map (const void * l, const void * r)
1227
{
1228
  return filename_cmp (((struct function_map *) l)->file_name,
1229
                       ((struct function_map *) r)->file_name);
1230
}
1231
 
1232
/* Print a suggested .o ordering for files on a link line based
1233
   on profiling information.  This uses the function placement
1234
   code for the bulk of its work.  */
1235
 
1236
void
1237
cg_print_file_ordering (void)
1238
{
1239
  unsigned long scratch_arc_count;
1240
  unsigned long arc_index;
1241
  unsigned long sym_index;
1242
  Arc **scratch_arcs;
1243
  char *last;
1244
 
1245
  scratch_arc_count = 0;
1246
 
1247
  scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
1248
  for (arc_index = 0; arc_index < numarcs; arc_index++)
1249
    {
1250
      if (! arcs[arc_index]->parent->mapped
1251
          || ! arcs[arc_index]->child->mapped)
1252
        arcs[arc_index]->has_been_placed = 1;
1253
    }
1254
 
1255
  order_and_dump_functions_by_arcs (arcs, numarcs, 0,
1256
                                    scratch_arcs, &scratch_arc_count);
1257
 
1258
  /* Output .o's not handled by the main placement algorithm.  */
1259
  for (sym_index = 0; sym_index < symtab.len; sym_index++)
1260
    {
1261
      if (symtab.base[sym_index].mapped
1262
          && ! symtab.base[sym_index].has_been_placed)
1263
        printf ("%s\n", symtab.base[sym_index].name);
1264
    }
1265
 
1266
  qsort (symbol_map, symbol_map_count, sizeof (struct function_map), cmp_symbol_map);
1267
 
1268
  /* Now output any .o's that didn't have any text symbols.  */
1269
  last = NULL;
1270
  for (sym_index = 0; sym_index < symbol_map_count; sym_index++)
1271
    {
1272
      unsigned int index2;
1273
 
1274
      /* Don't bother searching if this symbol
1275
         is the same as the previous one.  */
1276
      if (last && !filename_cmp (last, symbol_map[sym_index].file_name))
1277
        continue;
1278
 
1279
      for (index2 = 0; index2 < symtab.len; index2++)
1280
        {
1281
          if (! symtab.base[index2].mapped)
1282
            continue;
1283
 
1284
          if (!filename_cmp (symtab.base[index2].name,
1285
                             symbol_map[sym_index].file_name))
1286
            break;
1287
        }
1288
 
1289
      /* If we didn't find it in the symbol table, then it must
1290
         be a .o with no text symbols.  Output it last.  */
1291
      if (index2 == symtab.len)
1292
        printf ("%s\n", symbol_map[sym_index].file_name);
1293
      last = symbol_map[sym_index].file_name;
1294
    }
1295
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.