OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgo/] [runtime/] [cpuprof.c] - Blame information for rev 747

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 747 jeremybenn
// Copyright 2011 The Go Authors.  All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4
 
5
// CPU profiling.
6
// Based on algorithms and data structures used in
7
// http://code.google.com/p/google-perftools/.
8
//
9
// The main difference between this code and the google-perftools
10
// code is that this code is written to allow copying the profile data
11
// to an arbitrary io.Writer, while the google-perftools code always
12
// writes to an operating system file.
13
//
14
// The signal handler for the profiling clock tick adds a new stack trace
15
// to a hash table tracking counts for recent traces.  Most clock ticks
16
// hit in the cache.  In the event of a cache miss, an entry must be 
17
// evicted from the hash table, copied to a log that will eventually be
18
// written as profile data.  The google-perftools code flushed the
19
// log itself during the signal handler.  This code cannot do that, because
20
// the io.Writer might block or need system calls or locks that are not
21
// safe to use from within the signal handler.  Instead, we split the log
22
// into two halves and let the signal handler fill one half while a goroutine
23
// is writing out the other half.  When the signal handler fills its half, it
24
// offers to swap with the goroutine.  If the writer is not done with its half,
25
// we lose the stack trace for this clock tick (and record that loss).
26
// The goroutine interacts with the signal handler by calling getprofile() to
27
// get the next log piece to write, implicitly handing back the last log
28
// piece it obtained.
29
//
30
// The state of this dance between the signal handler and the goroutine
31
// is encoded in the Profile.handoff field.  If handoff == 0, then the goroutine
32
// is not using either log half and is waiting (or will soon be waiting) for
33
// a new piece by calling notesleep(&p->wait).  If the signal handler
34
// changes handoff from 0 to non-zero, it must call notewakeup(&p->wait)
35
// to wake the goroutine.  The value indicates the number of entries in the
36
// log half being handed off.  The goroutine leaves the non-zero value in
37
// place until it has finished processing the log half and then flips the number
38
// back to zero.  Setting the high bit in handoff means that the profiling is over, 
39
// and the goroutine is now in charge of flushing the data left in the hash table
40
// to the log and returning that data.  
41
//
42
// The handoff field is manipulated using atomic operations.
43
// For the most part, the manipulation of handoff is orderly: if handoff == 0
44
// then the signal handler owns it and can change it to non-zero.  
45
// If handoff != 0 then the goroutine owns it and can change it to zero.
46
// If that were the end of the story then we would not need to manipulate
47
// handoff using atomic operations.  The operations are needed, however,
48
// in order to let the log closer set the high bit to indicate "EOF" safely
49
// in the situation when normally the goroutine "owns" handoff.
50
 
51
#include "runtime.h"
52
#include "arch.h"
53
#include "malloc.h"
54
 
55
#include "array.h"
56
typedef struct __go_open_array Slice;
57
#define array __values
58
#define len __count
59
#define cap __capacity
60
 
61
enum
62
{
63
        HashSize = 1<<10,
64
        LogSize = 1<<17,
65
        Assoc = 4,
66
        MaxStack = 64,
67
};
68
 
69
typedef struct Profile Profile;
70
typedef struct Bucket Bucket;
71
typedef struct Entry Entry;
72
 
73
struct Entry {
74
        uintptr count;
75
        uintptr depth;
76
        uintptr stack[MaxStack];
77
};
78
 
79
struct Bucket {
80
        Entry entry[Assoc];
81
};
82
 
83
struct Profile {
84
        bool on;                // profiling is on
85
        Note wait;              // goroutine waits here
86
        uintptr count;          // tick count
87
        uintptr evicts;         // eviction count
88
        uintptr lost;           // lost ticks that need to be logged
89
        uintptr totallost;      // total lost ticks
90
 
91
        // Active recent stack traces.
92
        Bucket hash[HashSize];
93
 
94
        // Log of traces evicted from hash.
95
        // Signal handler has filled log[toggle][:nlog].
96
        // Goroutine is writing log[1-toggle][:handoff].
97
        uintptr log[2][LogSize/2];
98
        uintptr nlog;
99
        int32 toggle;
100
        uint32 handoff;
101
 
102
        // Writer state.
103
        // Writer maintains its own toggle to avoid races
104
        // looking at signal handler's toggle.
105
        uint32 wtoggle;
106
        bool wholding;  // holding & need to release a log half
107
        bool flushing;  // flushing hash table - profile is over
108
};
109
 
110
static Lock lk;
111
static Profile *prof;
112
 
113
static void tick(uintptr*, int32);
114
static void add(Profile*, uintptr*, int32);
115
static bool evict(Profile*, Entry*);
116
static bool flushlog(Profile*);
117
 
118
// LostProfileData is a no-op function used in profiles
119
// to mark the number of profiling stack traces that were
120
// discarded due to slow data writers.
121
static void LostProfileData(void) {
122
}
123
 
124
extern void runtime_SetCPUProfileRate(int32)
125
     __asm__("libgo_runtime.runtime.SetCPUProfileRate");
126
 
127
// SetCPUProfileRate sets the CPU profiling rate.
128
// The user documentation is in debug.go.
129
void
130
runtime_SetCPUProfileRate(int32 hz)
131
{
132
        uintptr *p;
133
        uintptr n;
134
 
135
        // Clamp hz to something reasonable.
136
        if(hz < 0)
137
                hz = 0;
138
        if(hz > 1000000)
139
                hz = 1000000;
140
 
141
        runtime_lock(&lk);
142
        if(hz > 0) {
143
                if(prof == nil) {
144
                        prof = runtime_SysAlloc(sizeof *prof);
145
                        if(prof == nil) {
146
                                runtime_printf("runtime: cpu profiling cannot allocate memory\n");
147
                                runtime_unlock(&lk);
148
                                return;
149
                        }
150
                }
151
                if(prof->on || prof->handoff != 0) {
152
                        runtime_printf("runtime: cannot set cpu profile rate until previous profile has finished.\n");
153
                        runtime_unlock(&lk);
154
                        return;
155
                }
156
 
157
                prof->on = true;
158
                p = prof->log[0];
159
                // pprof binary header format.
160
                // http://code.google.com/p/google-perftools/source/browse/trunk/src/profiledata.cc#117
161
                *p++ = 0;  // count for header
162
                *p++ = 3;  // depth for header
163
                *p++ = 0;  // version number
164
                *p++ = 1000000 / hz;  // period (microseconds)
165
                *p++ = 0;
166
                prof->nlog = p - prof->log[0];
167
                prof->toggle = 0;
168
                prof->wholding = false;
169
                prof->wtoggle = 0;
170
                prof->flushing = false;
171
                runtime_noteclear(&prof->wait);
172
 
173
                runtime_setcpuprofilerate(tick, hz);
174
        } else if(prof->on) {
175
                runtime_setcpuprofilerate(nil, 0);
176
                prof->on = false;
177
 
178
                // Now add is not running anymore, and getprofile owns the entire log.
179
                // Set the high bit in prof->handoff to tell getprofile.
180
                for(;;) {
181
                        n = prof->handoff;
182
                        if(n&0x80000000)
183
                                runtime_printf("runtime: setcpuprofile(off) twice");
184
                        if(runtime_cas(&prof->handoff, n, n|0x80000000))
185
                                break;
186
                }
187
                if(n == 0) {
188
                        // we did the transition from 0 -> nonzero so we wake getprofile
189
                        runtime_notewakeup(&prof->wait);
190
                }
191
        }
192
        runtime_unlock(&lk);
193
}
194
 
195
static void
196
tick(uintptr *pc, int32 n)
197
{
198
        add(prof, pc, n);
199
}
200
 
201
// add adds the stack trace to the profile.
202
// It is called from signal handlers and other limited environments
203
// and cannot allocate memory or acquire locks that might be
204
// held at the time of the signal, nor can it use substantial amounts
205
// of stack.  It is allowed to call evict.
206
static void
207
add(Profile *p, uintptr *pc, int32 n)
208
{
209
        int32 i, j;
210
        uintptr h, x;
211
        Bucket *b;
212
        Entry *e;
213
 
214
        if(n > MaxStack)
215
                n = MaxStack;
216
 
217
        // Compute hash.
218
        h = 0;
219
        for(i=0; i<n; i++) {
220
                h = h<<8 | (h>>(8*(sizeof(h)-1)));
221
                x = pc[i];
222
                h += x*31 + x*7 + x*3;
223
        }
224
        p->count++;
225
 
226
        // Add to entry count if already present in table.
227
        b = &p->hash[h%HashSize];
228
        for(i=0; i<Assoc; i++) {
229
                e = &b->entry[i];
230
                if(e->depth != (uintptr)n)
231
                        continue;
232
                for(j=0; j<n; j++)
233
                        if(e->stack[j] != pc[j])
234
                                goto ContinueAssoc;
235
                e->count++;
236
                return;
237
        ContinueAssoc:;
238
        }
239
 
240
        // Evict entry with smallest count.
241
        e = &b->entry[0];
242
        for(i=1; i<Assoc; i++)
243
                if(b->entry[i].count < e->count)
244
                        e = &b->entry[i];
245
        if(e->count > 0) {
246
                if(!evict(p, e)) {
247
                        // Could not evict entry.  Record lost stack.
248
                        p->lost++;
249
                        p->totallost++;
250
                        return;
251
                }
252
                p->evicts++;
253
        }
254
 
255
        // Reuse the newly evicted entry.
256
        e->depth = n;
257
        e->count = 1;
258
        for(i=0; i<n; i++)
259
                e->stack[i] = pc[i];
260
}
261
 
262
// evict copies the given entry's data into the log, so that
263
// the entry can be reused.  evict is called from add, which
264
// is called from the profiling signal handler, so it must not
265
// allocate memory or block.  It is safe to call flushLog.
266
// evict returns true if the entry was copied to the log,
267
// false if there was no room available.
268
static bool
269
evict(Profile *p, Entry *e)
270
{
271
        int32 i, d, nslot;
272
        uintptr *log, *q;
273
 
274
        d = e->depth;
275
        nslot = d+2;
276
        log = p->log[p->toggle];
277
        if(p->nlog+nslot > nelem(p->log[0])) {
278
                if(!flushlog(p))
279
                        return false;
280
                log = p->log[p->toggle];
281
        }
282
 
283
        q = log+p->nlog;
284
        *q++ = e->count;
285
        *q++ = d;
286
        for(i=0; i<d; i++)
287
                *q++ = e->stack[i];
288
        p->nlog = q - log;
289
        e->count = 0;
290
        return true;
291
}
292
 
293
// flushlog tries to flush the current log and switch to the other one.
294
// flushlog is called from evict, called from add, called from the signal handler,
295
// so it cannot allocate memory or block.  It can try to swap logs with
296
// the writing goroutine, as explained in the comment at the top of this file.
297
static bool
298
flushlog(Profile *p)
299
{
300
        uintptr *log, *q;
301
 
302
        if(!runtime_cas(&p->handoff, 0, p->nlog))
303
                return false;
304
        runtime_notewakeup(&p->wait);
305
 
306
        p->toggle = 1 - p->toggle;
307
        log = p->log[p->toggle];
308
        q = log;
309
        if(p->lost > 0) {
310
                *q++ = p->lost;
311
                *q++ = 1;
312
                *q++ = (uintptr)LostProfileData;
313
        }
314
        p->nlog = q - log;
315
        return true;
316
}
317
 
318
// getprofile blocks until the next block of profiling data is available
319
// and returns it as a []byte.  It is called from the writing goroutine.
320
Slice
321
getprofile(Profile *p)
322
{
323
        uint32 i, j, n;
324
        Slice ret;
325
        Bucket *b;
326
        Entry *e;
327
 
328
        ret.array = nil;
329
        ret.len = 0;
330
        ret.cap = 0;
331
 
332
        if(p == nil)
333
                return ret;
334
 
335
        if(p->wholding) {
336
                // Release previous log to signal handling side.
337
                // Loop because we are racing against setprofile(off).
338
                for(;;) {
339
                        n = p->handoff;
340
                        if(n == 0) {
341
                                runtime_printf("runtime: phase error during cpu profile handoff\n");
342
                                return ret;
343
                        }
344
                        if(n & 0x80000000) {
345
                                p->wtoggle = 1 - p->wtoggle;
346
                                p->wholding = false;
347
                                p->flushing = true;
348
                                goto flush;
349
                        }
350
                        if(runtime_cas(&p->handoff, n, 0))
351
                                break;
352
                }
353
                p->wtoggle = 1 - p->wtoggle;
354
                p->wholding = false;
355
        }
356
 
357
        if(p->flushing)
358
                goto flush;
359
 
360
        if(!p->on && p->handoff == 0)
361
                return ret;
362
 
363
        // Wait for new log.
364
        runtime_entersyscall();
365
        runtime_notesleep(&p->wait);
366
        runtime_exitsyscall();
367
        runtime_noteclear(&p->wait);
368
 
369
        n = p->handoff;
370
        if(n == 0) {
371
                runtime_printf("runtime: phase error during cpu profile wait\n");
372
                return ret;
373
        }
374
        if(n == 0x80000000) {
375
                p->flushing = true;
376
                goto flush;
377
        }
378
        n &= ~0x80000000;
379
 
380
        // Return new log to caller.
381
        p->wholding = true;
382
 
383
        ret.array = (byte*)p->log[p->wtoggle];
384
        ret.len = n*sizeof(uintptr);
385
        ret.cap = ret.len;
386
        return ret;
387
 
388
flush:
389
        // In flush mode.
390
        // Add is no longer being called.  We own the log.
391
        // Also, p->handoff is non-zero, so flushlog will return false.
392
        // Evict the hash table into the log and return it.
393
        for(i=0; i<HashSize; i++) {
394
                b = &p->hash[i];
395
                for(j=0; j<Assoc; j++) {
396
                        e = &b->entry[j];
397
                        if(e->count > 0 && !evict(p, e)) {
398
                                // Filled the log.  Stop the loop and return what we've got.
399
                                goto breakflush;
400
                        }
401
                }
402
        }
403
breakflush:
404
 
405
        // Return pending log data.
406
        if(p->nlog > 0) {
407
                // Note that we're using toggle now, not wtoggle,
408
                // because we're working on the log directly.
409
                ret.array = (byte*)p->log[p->toggle];
410
                ret.len = p->nlog*sizeof(uintptr);
411
                ret.cap = ret.len;
412
                p->nlog = 0;
413
                return ret;
414
        }
415
 
416
        // Made it through the table without finding anything to log.
417
        // Finally done.  Clean up and return nil.
418
        p->flushing = false;
419
        if(!runtime_cas(&p->handoff, p->handoff, 0))
420
                runtime_printf("runtime: profile flush racing with something\n");
421
        return ret;  // set to nil at top of function
422
}
423
 
424
extern Slice runtime_CPUProfile(void)
425
     __asm__("libgo_runtime.runtime.CPUProfile");
426
 
427
// CPUProfile returns the next cpu profile block as a []byte.
428
// The user documentation is in debug.go.
429
Slice
430
runtime_CPUProfile(void)
431
{
432
        return getprofile(prof);
433
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.