OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [rtos/] [ecos-3.0/] [packages/] [services/] [profile/] [gprof/] [current/] [src/] [profile.c] - Blame information for rev 786

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 786 skrzyp
//==========================================================================
2
//
3
//      profile.c
4
//
5
//      Application profiling support
6
//
7
//==========================================================================
8
// ####ECOSGPLCOPYRIGHTBEGIN####                                            
9
// -------------------------------------------                              
10
// This file is part of eCos, the Embedded Configurable Operating System.   
11
// Copyright (C) 2002, 2003 Free Software Foundation, Inc.                  
12
//
13
// eCos is free software; you can redistribute it and/or modify it under    
14
// the terms of the GNU General Public License as published by the Free     
15
// Software Foundation; either version 2 or (at your option) any later      
16
// version.                                                                 
17
//
18
// eCos is distributed in the hope that it will be useful, but WITHOUT      
19
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or    
20
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License    
21
// for more details.                                                        
22
//
23
// You should have received a copy of the GNU General Public License        
24
// along with eCos; if not, write to the Free Software Foundation, Inc.,    
25
// 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.            
26
//
27
// As a special exception, if other files instantiate templates or use      
28
// macros or inline functions from this file, or you compile this file      
29
// and link it with other works to produce a work based on this file,       
30
// this file does not by itself cause the resulting work to be covered by   
31
// the GNU General Public License. However the source code for this file    
32
// must still be made available in accordance with section (3) of the GNU   
33
// General Public License v2.                                               
34
//
35
// This exception does not invalidate any other reasons why a work based    
36
// on this file might be covered by the GNU General Public License.         
37
// -------------------------------------------                              
38
// ####ECOSGPLCOPYRIGHTEND####                                              
39
//==========================================================================
40
//#####DESCRIPTIONBEGIN####
41
//
42
// Author(s):    Gary Thomas
43
// Contributors: Bart Veer
44
// Date:         2002-11-14
45
// Purpose:      Application profiling support
46
// Description:  
47
//
48
//####DESCRIPTIONEND####
49
//
50
//===========================================================================
51
 
52
#include <pkgconf/system.h>
53
#include <pkgconf/profile_gprof.h>
54
 
55
#include <stdlib.h>
56
#include <string.h>
57
#include <cyg/infra/cyg_type.h>
58
#include <cyg/infra/diag.h>
59
#include <cyg/profile/profile.h>
60
#include <cyg/profile/gmon_out.h>
61
 
62
#ifdef CYGPKG_PROFILE_TFTP
63
# include <network.h>
64
# include <tftp_support.h>
65
#endif
66
 
67
// ----------------------------------------------------------------------------
68
// A gmon.out file starts with a struct gmon_hdr containing a cookie
69
// "gmon", a format version number, and some spare bytes. The structure
70
// is initialized by the profile_on() entry point so that it does not
71
// get garbage collected by the collector and hence a gdb script can
72
// always access it.
73
static struct gmon_hdr  profile_gmon_hdr;
74
 
75
// The header is followed by data blocks. Each data block consists of a
76
// one-byte tag (HIST, ARC, or BB_COUNT), followed by data in a specific
77
// format.
78
static unsigned char    profile_tags[3];
79
 
80
// The profiling data always contains histogram data. Typically an
81
// extra hardware timer is made to interrupt at the desired rate
82
// and stores the interrupted pc.
83
static struct gmon_hist_hdr    profile_hist_hdr;
84
 
85
// The actual histogram counts. The file format only allows for 16-bit
86
// counts, which means overflow is a real possibility.
87
static cyg_uint16*      profile_hist_data;
88
 
89
// Each slot in the histogram data covers a range of pc addresses,
90
// allowing a trade off between memory requirements and precision.
91
static int              bucket_shift;
92
 
93
// Profiling is disabled on start-up and while a tftp transfer takes place.
94
static int              profile_enabled;
95
 
96
// This is used by the gdb script to reset the profile data.
97
static int              profile_reset_pending;
98
 
99
// The callgraph data. There is no header for this. Instead each non-zero
100
// entry is output separately, prefixed by an ARC tag. The data is accessed
101
// via a hash table/linked list combination. The tag is part of the
102
// structure to reduce the number of I/O operations needed for writing
103
// gmon.out.
104
struct profile_arc {
105
    cyg_uint32                  next;
106
    unsigned char               tags[4];
107
    struct gmon_cg_arc_record   record;
108
};
109
 
110
static struct profile_arc*  profile_arc_records;
111
 
112
// The next free slot in the arc_records table.
113
static int                  profile_arc_next    = 1;
114
 
115
#ifdef CYGPKG_PROFILE_CALLGRAPH
116
// The callgraph is accessed via a hash table. The hashing function is
117
// trivial, it just involves shifting an address an appropriate number
118
// of places.
119
static int*         profile_arc_hashtable;
120
 
121
// The sizes of these tables
122
static int          profile_arc_hash_count;
123
static int          profile_arc_records_count;
124
 
125
// Is the hashtable too small? Used for diagnostics.
126
static int          profile_arc_overflow;
127
#endif
128
 
129
// Reset current profiling data.
130
static void
131
profile_reset(void)
132
{
133
    memset(profile_hist_data, 0, profile_hist_hdr.hist_size * sizeof(cyg_uint16));
134
 
135
#ifdef CYGPKG_PROFILE_CALLGRAPH
136
    // Zeroing the callgraph can be achieved by zeroing the hash
137
    // table and resetting the next field used for indexing into
138
    // the arc data itself. Whenever an arc data slot is allocated
139
    // the count and addresses are reset.
140
    memset(profile_arc_hashtable, 0, profile_arc_hash_count * sizeof(int));
141
    profile_arc_next     = 1;
142
    profile_arc_overflow = 0;
143
#endif
144
}
145
 
146
// ----------------------------------------------------------------------------
147
// Accumulate profiling data.
148
 
149
// __profile_hit() will be called by HAL-specific code, typically in an ISR
150
// associated with a timer.
151
 
152
void
153
__profile_hit(CYG_ADDRWORD pc)
154
{
155
    int bucket;
156
    if (! profile_enabled ) {
157
        if (! profile_reset_pending) {
158
            return;
159
        }
160
        // reset_pending can be set by the gdb script to request resetting
161
        // the data. It avoids having to do lots of memory updates via the
162
        // gdb protocol, which is too slow.
163
        profile_reset_pending   = 0;
164
        profile_reset();
165
        profile_enabled         = 1;
166
    }
167
 
168
    if ((pc >= (CYG_ADDRWORD)profile_hist_hdr.low_pc) && (pc <= (CYG_ADDRWORD)profile_hist_hdr.high_pc)) {
169
        bucket = (pc - (CYG_ADDRWORD)profile_hist_hdr.low_pc) >> bucket_shift;
170
        if (profile_hist_data[bucket] < (unsigned short)0xFFFF) {
171
            profile_hist_data[bucket]++;
172
        }
173
    }
174
}
175
 
176
#ifdef CYGPKG_PROFILE_CALLGRAPH
177
// __profile_mcount() will be called by the HAL-specific mcount() routine.
178
// When code is compiled with -pg the compiler inserts calls to mcount()
179
// at the start of each function. Typically mcount() will not use standard
180
// calling conventions so it has to be provided by the HAL.
181
//
182
// The from_pc/to_pc data should end up in profile_arc_records. A hash table
183
// maps a PC into a list chained through the records array. The hash function
184
// is a simple shift, so a range of PC addresses (usually 256 bytes) map
185
// onto a single linked list of arc records.
186
//
187
// We can hash on either the caller_pc, the callee_pc, or some combination.
188
// The caller PC will typically be in the middle of some function. The
189
// number of arcs that hash into the same list will depend on the number of
190
// function calls within a 256-byte region of code, multiplied by the
191
// number of different functions called at each location. The latter will
192
// be 1 unless the code uses changing function pointers. The callee pc
193
// is near the start of a function, and the number of hash collisions will
194
// depend on the number of places that function is called from. Usually this
195
// will be small, but some utility functions may be called from many different
196
// places.
197
//
198
// Hashing on the caller PC should give more deterministic results.
199
//
200
// On some targets the compiler does additional work. For example on
201
// the 68K in theory there is no need for a hash table because the
202
// compiler provides a word with each callee for the head of the
203
// linked list. It is not easy to cope with that in generic code, so
204
// for now this code ignores such compiler assistance.
205
//
206
// It is assumed that __profile_mcount() will be called with interrupts
207
// disabled. 
208
 
209
void
210
__profile_mcount(CYG_ADDRWORD caller_pc, CYG_ADDRWORD callee_pc)
211
{
212
    int                 hash_index;
213
    struct profile_arc* current;
214
 
215
    // mcount() may be called at any time, even before profile_arc_records
216
    // is enabled. There is an assumption here that .bss has been zeroed
217
    // before the first call into C code, i.e. by the initial assembler
218
    // start-up.
219
    if (!profile_enabled) {
220
        if (! profile_reset_pending) {
221
            return;
222
        }
223
        profile_reset_pending   = 0;
224
        profile_reset();
225
        profile_enabled         = 1;
226
    }
227
 
228
    // Check the caller_pc because that is what is used to index the
229
    // hash table. Checking the callee_pc is optional and depends on
230
    // exactly how you interpret the start and end addresses passed to
231
    // profile_on().
232
    if ((caller_pc < (CYG_ADDRWORD)profile_hist_hdr.low_pc) ||
233
        (caller_pc > (CYG_ADDRWORD)profile_hist_hdr.high_pc)) {
234
        return;
235
    }
236
 
237
    hash_index = (int) ((caller_pc - (CYG_ADDRWORD)profile_hist_hdr.low_pc) >> CYGNUM_PROFILE_CALLGRAPH_HASH_SHIFT);
238
    if (0 == profile_arc_hashtable[hash_index]) {
239
        if (profile_arc_next == profile_arc_records_count) {
240
            profile_arc_overflow = 1;
241
        } else {
242
            profile_arc_hashtable[hash_index]   = profile_arc_next;
243
            current = &(profile_arc_records[profile_arc_next]);
244
            profile_arc_next++;
245
            current->next   = 0;
246
            current->record.from_pc = (void*) caller_pc;
247
            current->record.self_pc = (void*) callee_pc;
248
            current->record.count   = 1;
249
        }
250
    } else {
251
        current = &(profile_arc_records[profile_arc_hashtable[hash_index]]);
252
        while (1) {
253
            if ((current->record.from_pc == (void*) caller_pc) && (current->record.self_pc == (void*) callee_pc)) {
254
                current->record.count++;
255
                break;
256
            } else if (0 == current->next) {
257
                if (profile_arc_next == profile_arc_records_count) {
258
                    profile_arc_overflow = 1;
259
                } else {
260
                    current->next   = profile_arc_next;
261
                    current         = &(profile_arc_records[profile_arc_next]);
262
                    profile_arc_next++;
263
                    current->next   = 0;
264
                    current->record.from_pc = (void*) caller_pc;
265
                    current->record.self_pc = (void*) callee_pc;
266
                    current->record.count   = 1;
267
                }
268
                break;
269
            } else {
270
                current = &(profile_arc_records[current->next]);
271
            }
272
        }
273
    }
274
}
275
#endif
276
 
277
#ifdef CYGPKG_PROFILE_TFTP
278
// ----------------------------------------------------------------------------
279
// TFTP support
280
//
281
// To keep things simple this code only supports one open file at a time,
282
// and only gmon.out is supported.
283
 
284
static int              profile_tftp_next_index     = 0;
285
static unsigned char*   profile_tftp_current_block  = (unsigned char*) 0;
286
static int              profile_tftp_current_len    = 0;
287
static int              profile_tftp_is_open        = 0;
288
 
289
static int
290
profile_tftp_open(const char *filename, int flags)
291
{
292
    // Only allow one open file for now.
293
    if (profile_tftp_is_open) {
294
        return -1;
295
    }
296
    // Only read-only access is supported.
297
    if ((0 != (flags & ~O_RDONLY)) || (0 == (flags & O_RDONLY))) {
298
        return -1;
299
    }
300
    // Only gmon.out can be retrieved using this tftp daemon
301
    if (0 != strcmp(filename, "gmon.out")) {
302
        return -1;
303
    }
304
    // Everything is in order. Prepare for the first read. Profiling
305
    // is suspended while the tftp transfer is in progress to avoid
306
    // inconsistent results.
307
    profile_enabled             = 0;
308
    profile_tftp_is_open        = 1;
309
    profile_tftp_next_index     = 0;
310
    profile_tftp_current_len    = 0;
311
 
312
    // Report any callgraph overflows. This is best done when retrieving
313
    // the results, either in the gdb script or at tftp open time.
314
#ifdef CYGPKG_PROFILE_CALLGRAPH
315
    if (profile_arc_overflow) {
316
        diag_printf("Profiling: warning, the table of callgraph arcs has overflowed\n");
317
        diag_printf("This can be avoided by increasing CYGNUM_PROFILE_CALLGRAPH_ARC_PERCENTAGE\n");
318
    }
319
#endif
320
 
321
    return 1;
322
}
323
 
324
static int
325
profile_tftp_close(int fd)
326
{
327
    if (! profile_tftp_is_open) {
328
        return -1;
329
    }
330
    profile_tftp_is_open = 0;
331
 
332
    // The histogram counters are only 16 bits, so can easily overflow
333
    // during a long run. Resetting the counters here makes it possible
334
    // to examine profile data during different parts of the run with
335
    // a reduced risk of overflow.
336
    profile_reset();
337
 
338
    // Profiling was disabled in the open() call
339
    profile_enabled     = 1;
340
    return 0;
341
}
342
 
343
// gmon.out can only be read, not written.
344
static int
345
profile_tftp_write(int fd, const void *buf, int len)
346
{
347
    return -1;
348
}
349
 
350
// The data that should go into gmon.out is spread all over memory.
351
// This utility is used to move from one block to the next.
352
static void
353
profile_tftp_read_next(void)
354
{
355
    switch (profile_tftp_next_index) {
356
      case 0 :      // The current block is the gmon hdr
357
        profile_tftp_current_block  = (unsigned char*) &profile_gmon_hdr;
358
        profile_tftp_current_len    = sizeof(struct gmon_hdr);
359
        break;
360
      case 1 :      // The histogram tag
361
        profile_tftp_current_block  = &(profile_tags[0]);
362
        profile_tftp_current_len    = 1;
363
        break;
364
      case 2 :      // The histogram header
365
        profile_tftp_current_block  = (unsigned char*) &profile_hist_hdr;
366
        profile_tftp_current_len    = sizeof(struct gmon_hist_hdr);
367
        break;
368
      case 3 :      // The histogram data
369
        profile_tftp_current_block  = (unsigned char*) profile_hist_data;
370
        profile_tftp_current_len    = profile_hist_hdr.hist_size * sizeof(cyg_uint16);
371
        break;
372
      default :     // One of the arc records. These start at array offset 1.
373
        {
374
            int arc_index    = profile_tftp_next_index - 3;
375
            if (arc_index >= profile_arc_next) {
376
                profile_tftp_current_block  = (unsigned char*) 0;
377
                profile_tftp_current_len    = 0;
378
            } else {
379
                // gmon.out should contain a 1 byte tag followed by each
380
                // arc record.
381
                profile_tftp_current_block  = (unsigned char*) &(profile_arc_records[arc_index].tags[3]);
382
                profile_tftp_current_len    = sizeof(struct gmon_cg_arc_record) + 1;
383
            }
384
            break;
385
        }
386
    }
387
    profile_tftp_next_index++;
388
}
389
 
390
// Read the next block of data. There is no seek operation so no need
391
// to worry about the current position. State from the previous reads
392
// is held in profile_tftp_current_block and profile_tftp_current_len
393
static int
394
profile_tftp_read(int fd, void *buf_arg, int len)
395
{
396
    unsigned char*  buf     = (unsigned char*) buf_arg;
397
    int             read    = 0;
398
 
399
    if ( ! profile_tftp_is_open ) {
400
        return -1;
401
    }
402
 
403
    while (len > 0) {
404
        if (0 == profile_tftp_current_len) {
405
            profile_tftp_read_next();
406
            if (0 == profile_tftp_current_len) {
407
                break;
408
            }
409
        }
410
        if (profile_tftp_current_len >= len) {
411
            // The request can be satisfied by the current block
412
            memcpy(&(buf[read]), profile_tftp_current_block, len);
413
            profile_tftp_current_block += len;
414
            profile_tftp_current_len   -= len;
415
            read += len;
416
            break;
417
        } else {
418
            memcpy(&(buf[read]), profile_tftp_current_block, profile_tftp_current_len);
419
            len  -= profile_tftp_current_len;
420
            read += profile_tftp_current_len;
421
            profile_tftp_current_len = 0;
422
        }
423
    }
424
    return read;
425
}
426
 
427
static struct tftpd_fileops profile_tftp_fileops = {
428
    &profile_tftp_open,
429
    &profile_tftp_close,
430
    &profile_tftp_write,
431
    &profile_tftp_read
432
};
433
#endif
434
 
435
// ----------------------------------------------------------------------------
436
// stop profiling
437
void
438
profile_off(void)
439
{
440
    // suspend currently running profiling
441
    profile_enabled = 0;
442
    // Clear all pre-existing profile data
443
    profile_reset();
444
    if (profile_hist_data) {
445
        free(profile_hist_data);
446
        profile_hist_data = NULL;
447
    }
448
#ifdef CYGPKG_PROFILE_CALLGRAPH
449
    if (profile_arc_hashtable) {
450
        free(profile_arc_hashtable);
451
        profile_arc_hashtable=NULL;
452
    }
453
    if (profile_arc_records) {
454
        free(profile_arc_records);
455
        profile_arc_records=NULL;
456
    }
457
#endif
458
}
459
 
460
 
461
// ----------------------------------------------------------------------------
462
// profile_on() has to be called by application code to start profiling.
463
// Application code will determine the start and end addresses, usually
464
// _stext and _etext, but it is possible to limit profiling to only
465
// some of the code. The bucket size controls how many PC addresses
466
// will be treated as a single hit: a smaller bucket increases precision
467
// but requires more memory. The resolution is used to initialize the
468
// profiling timer: more frequent interrupts means more accurate results
469
// but increases the risk of an overflow.
470
//
471
// profile_on() can be invoked multiple times. If invoked a second time
472
// it will stop the current profiling run and create a new profiling 
473
// range.
474
 
475
 
476
 
477
void
478
profile_on(void *_start, void *_end, int _bucket_size, int resolution)
479
{
480
    int             bucket_size;
481
    cyg_uint32      version     = GMON_VERSION;
482
    CYG_ADDRWORD    text_size   = (CYG_ADDRWORD)_end - (CYG_ADDRWORD)_start;
483
 
484
    if (profile_enabled)
485
    {
486
        // invoking profile_on a second time
487
        profile_off();
488
    }
489
 
490
 
491
    // Initialize statics. This also ensures that they won't be
492
    // garbage collected by the linker so a gdb script can safely
493
    // reference them.
494
    memcpy(profile_gmon_hdr.cookie, GMON_MAGIC, 4);
495
    memcpy(profile_gmon_hdr.version, &version, 4);
496
    profile_tags[0] = GMON_TAG_TIME_HIST;
497
    profile_tags[1] = GMON_TAG_CG_ARC;
498
    profile_tags[2] = GMON_TAG_BB_COUNT;
499
    strcpy(profile_hist_hdr.dimen, "seconds");
500
    profile_hist_hdr.dimen_abbrev   = 's';
501
 
502
    // The actual bucket size. For efficiency this should be a power of 2.
503
    bucket_size             = 1;
504
    bucket_shift            = 0;
505
    while (bucket_size < _bucket_size) {
506
        bucket_size     <<= 1;
507
        bucket_shift    += 1;
508
    }
509
 
510
    // The gprof documentation claims that this should be the size in
511
    // bytes. The implementation treats it as a count.
512
    profile_hist_hdr.hist_size  = (cyg_uint32) ((text_size + bucket_size - 1) / bucket_size);
513
    profile_hist_hdr.low_pc     = _start;
514
    profile_hist_hdr.high_pc    = (void*)((cyg_uint8*)_end - 1);
515
    // The prof_rate is the frequency in hz. The resolution argument is
516
    // an interval in microseconds.
517
    profile_hist_hdr.prof_rate  = 1000000 / resolution;
518
 
519
    // Now allocate a buffer for the histogram data.
520
    profile_hist_data = (cyg_uint16*) malloc(profile_hist_hdr.hist_size * sizeof(cyg_uint16));
521
    if ((cyg_uint16*)0 == profile_hist_data) {
522
        diag_printf("profile_on(): cannot allocate histogram buffer - ignored\n");
523
        return;
524
    }
525
    memset(profile_hist_data, 0, profile_hist_hdr.hist_size * sizeof(cyg_uint16));
526
 
527
#ifdef CYGPKG_PROFILE_CALLGRAPH
528
    // Two arrays are needed for keeping track of the callgraph. The
529
    // first is a hash table. The second holds the arc data. The
530
    // latter array contains an extra 50 slots to cope with degenerate
531
    // programs (including testcases).
532
    {
533
        int i;
534
 
535
        profile_arc_hash_count  = (int) ((text_size + (0x01 << CYGNUM_PROFILE_CALLGRAPH_HASH_SHIFT) - 1)
536
                                         >> CYGNUM_PROFILE_CALLGRAPH_HASH_SHIFT);
537
        profile_arc_records_count = (int)
538
            (CYGNUM_PROFILE_CALLGRAPH_ARC_PERCENTAGE * (text_size / 100)) /
539
            sizeof(struct profile_arc)
540
            + 50;
541
 
542
        profile_arc_hashtable = (int*) malloc(profile_arc_hash_count * sizeof(int));
543
        if ((int*)0 == profile_arc_hashtable) {
544
            diag_printf("profile_on(): cannot allocate call graph hash table\n  call graph profiling disabled\n");
545
        } else {
546
            memset(profile_arc_hashtable, 0, profile_arc_hash_count * sizeof(int));
547
            profile_arc_records = (struct profile_arc*) malloc(profile_arc_records_count * sizeof(struct profile_arc));
548
            if ((struct profile_arc*)0 == profile_arc_records) {
549
                diag_printf("profile_on(): cannot allocate call graph arc table\n  call graph profiling disabled\n");
550
                free(profile_arc_hashtable);
551
                profile_arc_hashtable = (int*) 0;
552
            } else {
553
                memset(profile_arc_records, 0, profile_arc_records_count * sizeof(struct profile_arc));
554
                for (i = 0; i < profile_arc_records_count; i++) {
555
                    profile_arc_records[i].tags[3] = GMON_TAG_CG_ARC;
556
                }
557
                profile_arc_next    = 1;    // slot 0 cannot be used because 0 marks an unused hash slot.
558
            }
559
        }
560
    }
561
#else
562
    profile_arc_records     = (struct profile_arc*) 0;
563
#endif
564
 
565
    diag_printf("Profile from %p..%p in %d buckets of size %d\n",
566
                profile_hist_hdr.low_pc, profile_hist_hdr.high_pc,
567
                profile_hist_hdr.hist_size, bucket_size);
568
 
569
    // Activate the profiling timer, which is usually provided by the
570
    // variant or target HAL. The requested resolution may not be
571
    // possible on the current hardware, so the HAL is allowed to
572
    // tweak it.
573
    resolution = hal_enable_profile_timer(resolution);
574
    profile_hist_hdr.prof_rate = 1000000 / resolution;
575
 
576
    profile_enabled = 1;
577
 
578
#ifdef CYGPKG_PROFILE_TFTP
579
    static int profile_tftp_is_started        = 0;
580
    if (!profile_tftp_is_started)
581
    {
582
        profile_tftp_is_started = 1;
583
        // Create a TFTP server the first time we start profiling to
584
        // provide access to the data via the network.
585
        (void) tftpd_start(CYGNUM_PROFILE_TFTP_PORT, &profile_tftp_fileops);
586
    }
587
#endif
588
}
589
 
590
// EOF profile.c

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.