OpenCores
URL https://opencores.org/ocsvn/scarts/scarts/trunk

Subversion Repositories scarts

[/] [scarts/] [trunk/] [toolchain/] [scarts-newlib/] [newlib-1.17.0/] [newlib/] [libc/] [sys/] [linux/] [dl/] [dl-profile.c] - Blame information for rev 9

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 9 jlechner
/* Profiling of shared libraries.
2
   Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
3
   This file is part of the GNU C Library.
4
   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
5
   Based on the BSD mcount implementation.
6
 
7
   The GNU C Library is free software; you can redistribute it and/or
8
   modify it under the terms of the GNU Lesser General Public
9
   License as published by the Free Software Foundation; either
10
   version 2.1 of the License, or (at your option) any later version.
11
 
12
   The GNU C Library is distributed in the hope that it will be useful,
13
   but WITHOUT ANY WARRANTY; without even the implied warranty of
14
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
   Lesser General Public License for more details.
16
 
17
   You should have received a copy of the GNU Lesser General Public
18
   License along with the GNU C Library; if not, write to the Free
19
   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20
   02111-1307 USA.  */
21
 
22
#include <errno.h>
23
#include <fcntl.h>
24
#include <limits.h>
25
#include <stdio.h>
26
#include <stdlib.h>
27
#include <string.h>
28
#include <unistd.h>
29
#include <ldsodefs.h>
30
#include <sys/gmon.h>
31
#include <sys/gmon_out.h>
32
#include <sys/mman.h>
33
#include <sys/param.h>
34
#include <sys/stat.h>
35
#include <atomicity.h>
36
#include <config.h>
37
 
38
/* The LD_PROFILE feature has to be implemented different to the
39
   normal profiling using the gmon/ functions.  The problem is that an
40
   arbitrary amount of processes simulataneously can be run using
41
   profiling and all write the results in the same file.  To provide
42
   this mechanism one could implement a complicated mechanism to merge
43
   the content of two profiling runs or one could extend the file
44
   format to allow more than one data set.  For the second solution we
45
   would have the problem that the file can grow in size beyond any
46
   limit and both solutions have the problem that the concurrency of
47
   writing the results is a big problem.
48
 
49
   Another much simpler method is to use mmap to map the same file in
50
   all using programs and modify the data in the mmap'ed area and so
51
   also automatically on the disk.  Using the MAP_SHARED option of
52
   mmap(2) this can be done without big problems in more than one
53
   file.
54
 
55
   This approach is very different from the normal profiling.  We have
56
   to use the profiling data in exactly the way they are expected to
57
   be written to disk.  But the normal format used by gprof is not usable
58
   to do this.  It is optimized for size.  It writes the tags as single
59
   bytes but this means that the following 32/64 bit values are
60
   unaligned.
61
 
62
   Therefore we use a new format.  This will look like this
63
 
64
 
65
        0000                            g  m  o  n
66
        0004                            *version*       <- GMON_SHOBJ_VERSION
67
        0008                            00 00 00 00
68
        000c                            00 00 00 00
69
        0010                            00 00 00 00
70
 
71
        0014                            *tag*           <- GMON_TAG_TIME_HIST
72
        0018                            ?? ?? ?? ??
73
                                        ?? ?? ?? ??     <- 32/64 bit LowPC
74
        0018+A                          ?? ?? ?? ??
75
                                        ?? ?? ?? ??     <- 32/64 bit HighPC
76
        0018+2*A                        *histsize*
77
        001c+2*A                        *profrate*
78
        0020+2*A                        s  e  c  o
79
        0024+2*A                        n  d  s  \0
80
        0028+2*A                        \0 \0 \0 \0
81
        002c+2*A                        \0 \0 \0
82
        002f+2*A                        s
83
 
84
        0030+2*A                        ?? ?? ?? ??     <- Count data
85
        ...                             ...
86
        0030+2*A+K                      ?? ?? ?? ??
87
 
88
        0030+2*A+K                      *tag*           <- GMON_TAG_CG_ARC
89
        0034+2*A+K                      *lastused*
90
        0038+2*A+K                      ?? ?? ?? ??
91
                                        ?? ?? ?? ??     <- FromPC#1
92
        0038+3*A+K                      ?? ?? ?? ??
93
                                        ?? ?? ?? ??     <- ToPC#1
94
        0038+4*A+K                      ?? ?? ?? ??     <- Count#1
95
        ...                             ...                ...
96
        0038+(2*(CN-1)+2)*A+(CN-1)*4+K  ?? ?? ?? ??
97
                                        ?? ?? ?? ??     <- FromPC#CGN
98
        0038+(2*(CN-1)+3)*A+(CN-1)*4+K  ?? ?? ?? ??
99
                                        ?? ?? ?? ??     <- ToPC#CGN
100
        0038+(2*CN+2)*A+(CN-1)*4+K      ?? ?? ?? ??     <- Count#CGN
101
 
102
   We put (for now?) no basic block information in the file since this would
103
   introduce rase conditions among all the processes who want to write them.
104
 
105
   `K' is the number of count entries which is computed as
106
 
107
                textsize / HISTFRACTION
108
 
109
   `CG' in the above table is the number of call graph arcs.  Normally,
110
   the table is sparse and the profiling code writes out only the those
111
   entries which are really used in the program run.  But since we must
112
   not extend this table (the profiling file) we'll keep them all here.
113
   So CN can be executed in advance as
114
 
115
                MINARCS <= textsize*(ARCDENSITY/100) <= MAXARCS
116
 
117
   Now the remaining question is: how to build the data structures we can
118
   work with from this data.  We need the from set and must associate the
119
   froms with all the associated tos.  We will do this by constructing this
120
   data structures at the program start.  To do this we'll simply visit all
121
   entries in the call graph table and add it to the appropriate list.  */
122
 
123
extern int __profile_frequency (void);
124
 
125
/* We define a special type to address the elements of the arc table.
126
   This is basically the `gmon_cg_arc_record' format but it includes
127
   the room for the tag and it uses real types.  */
128
struct here_cg_arc_record
129
  {
130
    uintptr_t from_pc;
131
    uintptr_t self_pc;
132
    uint32_t count;
133
  } __attribute__ ((packed));
134
 
135
static struct here_cg_arc_record *data;
136
 
137
/* Nonzero if profiling is under way.  */
138
static int running;
139
 
140
/* This is the number of entry which have been incorporated in the toset.  */
141
static uint32_t narcs;
142
/* This is a pointer to the object representing the number of entries
143
   currently in the mmaped file.  At no point of time this has to be the
144
   same as NARCS.  If it is equal all entries from the file are in our
145
   lists.  */
146
static volatile uint32_t *narcsp;
147
 
148
static volatile uint16_t *kcount;
149
static size_t kcountsize;
150
 
151
struct here_fromstruct
152
  {
153
    struct here_cg_arc_record volatile *here;
154
    uint16_t link;
155
  };
156
 
157
static volatile uint16_t *tos;
158
 
159
static struct here_fromstruct *froms;
160
static uint32_t fromlimit;
161
static volatile uint32_t fromidx;
162
 
163
static uintptr_t lowpc;
164
static size_t textsize;
165
static unsigned int hashfraction;
166
static unsigned int log_hashfraction;
167
 
168
 
169
 
170
/* Set up profiling data to profile object desribed by MAP.  The output
171
   file is found (or created) in OUTPUT_DIR.  */
172
void
173
internal_function
174
_dl_start_profile (struct link_map *map, const char *output_dir)
175
{
176
  char *filename;
177
  int fd;
178
  struct stat64 st;
179
  const ElfW(Phdr) *ph;
180
  ElfW(Addr) mapstart = ~((ElfW(Addr)) 0);
181
  ElfW(Addr) mapend = 0;
182
  struct gmon_hdr gmon_hdr;
183
  struct gmon_hist_hdr hist_hdr;
184
  char *hist, *cp, *tmp;
185
  size_t idx;
186
  size_t tossize;
187
  size_t fromssize;
188
  uintptr_t highpc;
189
  struct gmon_hdr *addr = NULL;
190
  off_t expected_size;
191
  /* See profil(2) where this is described.  */
192
  int s_scale;
193
#define SCALE_1_TO_1    0x10000L
194
 
195
  /* Compute the size of the sections which contain program code.  */
196
  for (ph = map->l_phdr; ph < &map->l_phdr[map->l_phnum]; ++ph)
197
    if (ph->p_type == PT_LOAD && (ph->p_flags & PF_X))
198
      {
199
        ElfW(Addr) start = (ph->p_vaddr & ~(_dl_pagesize - 1));
200
        ElfW(Addr) end = ((ph->p_vaddr + ph->p_memsz + _dl_pagesize - 1)
201
                          & ~(_dl_pagesize - 1));
202
 
203
        if (start < mapstart)
204
          mapstart = start;
205
        if (end > mapend)
206
          mapend = end;
207
      }
208
 
209
  /* Now we can compute the size of the profiling data.  This is done
210
     with the same formulars as in `monstartup' (see gmon.c).  */
211
  running = 0;
212
  lowpc = ROUNDDOWN (mapstart + map->l_addr,
213
                     HISTFRACTION * sizeof (HISTCOUNTER));
214
  highpc = ROUNDUP (mapend + map->l_addr,
215
                    HISTFRACTION * sizeof (HISTCOUNTER));
216
  textsize = highpc - lowpc;
217
  kcountsize = textsize / HISTFRACTION;
218
  hashfraction = HASHFRACTION;
219
  if ((HASHFRACTION & (HASHFRACTION - 1)) == 0)
220
    /* If HASHFRACTION is a power of two, mcount can use shifting
221
       instead of integer division.  Precompute shift amount.  */
222
    log_hashfraction = ffs (hashfraction * sizeof (*froms)) - 1;
223
  else
224
    log_hashfraction = -1;
225
  tossize = textsize / HASHFRACTION;
226
  fromlimit = textsize * ARCDENSITY / 100;
227
  if (fromlimit < MINARCS)
228
    fromlimit = MINARCS;
229
  if (fromlimit > MAXARCS)
230
    fromlimit = MAXARCS;
231
  fromssize = fromlimit * sizeof (struct here_fromstruct);
232
 
233
  expected_size = (sizeof (struct gmon_hdr)
234
                   + 4 + sizeof (struct gmon_hist_hdr) + kcountsize
235
                   + 4 + 4 + fromssize * sizeof (struct here_cg_arc_record));
236
 
237
  /* Create the gmon_hdr we expect or write.  */
238
  memset (&gmon_hdr, '\0', sizeof (struct gmon_hdr));
239
  memcpy (&gmon_hdr.cookie[0], GMON_MAGIC, sizeof (gmon_hdr.cookie));
240
  *(int32_t *) gmon_hdr.version = GMON_SHOBJ_VERSION;
241
 
242
  /* Create the hist_hdr we expect or write.  */
243
  *(char **) hist_hdr.low_pc = (char *) mapstart;
244
  *(char **) hist_hdr.high_pc = (char *) mapend;
245
  *(int32_t *) hist_hdr.hist_size = kcountsize / sizeof (HISTCOUNTER);
246
  *(int32_t *) hist_hdr.prof_rate = __profile_frequency ();
247
  strncpy (hist_hdr.dimen, "seconds", sizeof (hist_hdr.dimen));
248
  hist_hdr.dimen_abbrev = 's';
249
 
250
  /* First determine the output name.  We write in the directory
251
     OUTPUT_DIR and the name is composed from the shared objects
252
     soname (or the file name) and the ending ".profile".  */
253
  filename = (char *) alloca (strlen (output_dir) + 1 + strlen (_dl_profile)
254
                              + sizeof ".profile");
255
  cp = strcpy (filename, output_dir);
256
  cp += strlen (output_dir);
257
  *cp++ = '/';
258
  tmp = strcpy (cp, _dl_profile);
259
  tmp += strlen (_dl_profile);
260
  strcpy (tmp, ".profile");
261
 
262
#ifdef O_NOFOLLOW
263
# define EXTRA_FLAGS | O_NOFOLLOW
264
#else
265
# define EXTRA_FLAGS
266
#endif
267
  fd = __open (filename, O_RDWR | O_CREAT EXTRA_FLAGS);
268
  if (fd == -1)
269
    {
270
      /* We cannot write the profiling data so don't do anything.  */
271
      char buf[400];
272
      _dl_error_printf ("%s: cannot open file: %s\n", filename,
273
                        __strerror_r (errno, buf, sizeof buf));
274
      return;
275
    }
276
 
277
  if (fstat64 (fd, &st) < 0 || !S_ISREG (st.st_mode))
278
    {
279
      /* Not stat'able or not a regular file => don't use it.  */
280
      char buf[400];
281
      int errnum = errno;
282
      __close (fd);
283
      _dl_error_printf ("%s: cannot stat file: %s\n", filename,
284
                        __strerror_r (errnum, buf, sizeof buf));
285
      return;
286
    }
287
 
288
  /* Test the size.  If it does not match what we expect from the size
289
     values in the map MAP we don't use it and warn the user.  */
290
  if (st.st_size == 0)
291
    {
292
      /* We have to create the file.  */
293
      char buf[_dl_pagesize];
294
 
295
      memset (buf, '\0', _dl_pagesize);
296
 
297
      if (__lseek (fd, expected_size & ~(_dl_pagesize - 1), SEEK_SET) == -1)
298
        {
299
          char buf[400];
300
          int errnum;
301
        cannot_create:
302
          errnum = errno;
303
          __close (fd);
304
          _dl_error_printf ("%s: cannot create file: %s\n", filename,
305
                            __strerror_r (errnum, buf, sizeof buf));
306
          return;
307
        }
308
 
309
      if (TEMP_FAILURE_RETRY (__libc_write (fd, buf, (expected_size
310
                                                      & (_dl_pagesize - 1))))
311
          < 0)
312
        goto cannot_create;
313
    }
314
  else if (st.st_size != expected_size)
315
    {
316
      __close (fd);
317
    wrong_format:
318
 
319
      if (addr != NULL)
320
        __munmap ((void *) addr, expected_size);
321
 
322
      _dl_error_printf ("%s: file is no correct profile data file for `%s'\n",
323
                        filename, _dl_profile);
324
      return;
325
    }
326
 
327
  addr = (struct gmon_hdr *) __mmap (NULL, expected_size, PROT_READ|PROT_WRITE,
328
                                     MAP_SHARED|MAP_FILE, fd, 0);
329
  if (addr == (struct gmon_hdr *) MAP_FAILED)
330
    {
331
      char buf[400];
332
      int errnum = errno;
333
      __close (fd);
334
      _dl_error_printf ("%s: cannot map file: %s\n", filename,
335
                        __strerror_r (errnum, buf, sizeof buf));
336
      return;
337
    }
338
 
339
  /* We don't need the file desriptor anymore.  */
340
  __close (fd);
341
 
342
  /* Pointer to data after the header.  */
343
  hist = (char *) (addr + 1);
344
  kcount = (uint16_t *) ((char *) hist + sizeof (uint32_t)
345
                         + sizeof (struct gmon_hist_hdr));
346
 
347
  /* Compute pointer to array of the arc information.  */
348
  narcsp = (uint32_t *) ((char *) kcount + kcountsize + sizeof (uint32_t));
349
  data = (struct here_cg_arc_record *) ((char *) narcsp + sizeof (uint32_t));
350
 
351
  if (st.st_size == 0)
352
    {
353
      /* Create the signature.  */
354
      memcpy (addr, &gmon_hdr, sizeof (struct gmon_hdr));
355
 
356
      *(uint32_t *) hist = GMON_TAG_TIME_HIST;
357
      memcpy (hist + sizeof (uint32_t), &hist_hdr,
358
              sizeof (struct gmon_hist_hdr));
359
 
360
      narcsp[-1] = GMON_TAG_CG_ARC;
361
    }
362
  else
363
    {
364
      /* Test the signature in the file.  */
365
      if (memcmp (addr, &gmon_hdr, sizeof (struct gmon_hdr)) != 0
366
          || *(uint32_t *) hist != GMON_TAG_TIME_HIST
367
          || memcmp (hist + sizeof (uint32_t), &hist_hdr,
368
                     sizeof (struct gmon_hist_hdr)) != 0
369
          || narcsp[-1] != GMON_TAG_CG_ARC)
370
        goto wrong_format;
371
    }
372
 
373
  /* Allocate memory for the froms data and the pointer to the tos records.  */
374
  tos = (uint16_t *) calloc (tossize + fromssize, 1);
375
  if (tos == NULL)
376
    {
377
      __munmap ((void *) addr, expected_size);
378
      _dl_fatal_printf ("Out of memory while initializing profiler\n");
379
      /* NOTREACHED */
380
    }
381
 
382
  froms = (struct here_fromstruct *) ((char *) tos + tossize);
383
  fromidx = 0;
384
 
385
  /* Now we have to process all the arc count entries.  BTW: it is
386
     not critical whether the *NARCSP value changes meanwhile.  Before
387
     we enter a new entry in to toset we will check that everything is
388
     available in TOS.  This happens in _dl_mcount.
389
 
390
     Loading the entries in reverse order should help to get the most
391
     frequently used entries at the front of the list.  */
392
  for (idx = narcs = MIN (*narcsp, fromlimit); idx > 0; )
393
    {
394
      size_t to_index;
395
      size_t newfromidx;
396
      --idx;
397
      to_index = (data[idx].self_pc / (hashfraction * sizeof (*tos)));
398
      newfromidx = fromidx++;
399
      froms[newfromidx].here = &data[idx];
400
      froms[newfromidx].link = tos[to_index];
401
      tos[to_index] = newfromidx;
402
    }
403
 
404
  /* Setup counting data.  */
405
  if (kcountsize < highpc - lowpc)
406
    {
407
#if 0
408
      s_scale = ((double) kcountsize / (highpc - lowpc)) * SCALE_1_TO_1;
409
#else
410
      size_t range = highpc - lowpc;
411
      size_t quot = range / kcountsize;
412
 
413
      if (quot >= SCALE_1_TO_1)
414
        s_scale = 1;
415
      else if (quot >= SCALE_1_TO_1 / 256)
416
        s_scale = SCALE_1_TO_1 / quot;
417
      else if (range > ULONG_MAX / 256)
418
        s_scale = (SCALE_1_TO_1 * 256) / (range / (kcountsize / 256));
419
      else
420
        s_scale = (SCALE_1_TO_1 * 256) / ((range * 256) / kcountsize);
421
#endif
422
    }
423
  else
424
    s_scale = SCALE_1_TO_1;
425
 
426
  /* Start the profiler.  */
427
  profil ((void *) kcount, kcountsize, lowpc, s_scale);
428
 
429
  /* Turn on profiling.  */
430
  running = 1;
431
}
432
 
433
 
434
void
435
_dl_mcount (ElfW(Addr) frompc, ElfW(Addr) selfpc)
436
{
437
  volatile uint16_t *topcindex;
438
  size_t i, fromindex;
439
  struct here_fromstruct *fromp;
440
 
441
  if (! running)
442
    return;
443
 
444
  /* Compute relative addresses.  The shared object can be loaded at
445
     any address.  The value of frompc could be anything.  We cannot
446
     restrict it in any way, just set to a fixed value (0) in case it
447
     is outside the allowed range.  These calls show up as calls from
448
     <external> in the gprof output.  */
449
  frompc -= lowpc;
450
  if (frompc >= textsize)
451
    frompc = 0;
452
  selfpc -= lowpc;
453
  if (selfpc >= textsize)
454
    goto done;
455
 
456
  /* Getting here we now have to find out whether the location was
457
     already used.  If yes we are lucky and only have to increment a
458
     counter (this also has to be atomic).  If the entry is new things
459
     are getting complicated...  */
460
 
461
  /* Avoid integer divide if possible.  */
462
  if ((HASHFRACTION & (HASHFRACTION - 1)) == 0)
463
    i = selfpc >> log_hashfraction;
464
  else
465
    i = selfpc / (hashfraction * sizeof (*tos));
466
 
467
  topcindex = &tos[i];
468
  fromindex = *topcindex;
469
 
470
  if (fromindex == 0)
471
    goto check_new_or_add;
472
 
473
  fromp = &froms[fromindex];
474
 
475
  /* We have to look through the chain of arcs whether there is already
476
     an entry for our arc.  */
477
  while (fromp->here->from_pc != frompc)
478
    {
479
      if (fromp->link != 0)
480
        do
481
          fromp = &froms[fromp->link];
482
        while (fromp->link != 0 && fromp->here->from_pc != frompc);
483
 
484
      if (fromp->here->from_pc != frompc)
485
        {
486
          topcindex = &fromp->link;
487
 
488
        check_new_or_add:
489
          /* Our entry is not among the entries we read so far from the
490
             data file.  Now see whether we have to update the list.  */
491
          while (narcs != *narcsp && narcs < fromlimit)
492
            {
493
              size_t to_index;
494
              size_t newfromidx;
495
              to_index = (data[narcs].self_pc
496
                          / (hashfraction * sizeof (*tos)));
497
              newfromidx = exchange_and_add (&fromidx, 1) + 1;
498
              froms[newfromidx].here = &data[narcs];
499
              froms[newfromidx].link = tos[to_index];
500
              tos[to_index] = newfromidx;
501
              atomic_add (&narcs, 1);
502
            }
503
 
504
          /* If we still have no entry stop searching and insert.  */
505
          if (*topcindex == 0)
506
            {
507
              uint32_t newarc = exchange_and_add (narcsp, 1);
508
 
509
              /* In rare cases it could happen that all entries in FROMS are
510
                 occupied.  So we cannot count this anymore.  */
511
              if (newarc >= fromlimit)
512
                goto done;
513
 
514
              *topcindex = exchange_and_add (&fromidx, 1) + 1;
515
              fromp = &froms[*topcindex];
516
 
517
              fromp->here = &data[newarc];
518
              data[newarc].from_pc = frompc;
519
              data[newarc].self_pc = selfpc;
520
              data[newarc].count = 0;
521
              fromp->link = 0;
522
              atomic_add (&narcs, 1);
523
 
524
              break;
525
            }
526
 
527
          fromp = &froms[*topcindex];
528
        }
529
      else
530
        /* Found in.  */
531
        break;
532
    }
533
 
534
  /* Increment the counter.  */
535
  atomic_add (&fromp->here->count, 1);
536
 
537
 done:
538
  ;
539
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.