OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgo/] [runtime/] [mgc0.c] - Blame information for rev 760

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 747 jeremybenn
// Copyright 2009 The Go Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4
 
5
// Garbage collector.
6
 
7
#include "runtime.h"
8
#include "arch.h"
9
#include "malloc.h"
10
 
11
#ifdef USING_SPLIT_STACK
12
 
13
extern void * __splitstack_find (void *, void *, size_t *, void **, void **,
14
                                 void **);
15
 
16
extern void * __splitstack_find_context (void *context[10], size_t *, void **,
17
                                         void **, void **);
18
 
19
#endif
20
 
21
enum {
22
        Debug = 0,
23
        PtrSize = sizeof(void*),
24
        DebugMark = 0,  // run second pass to check mark
25
 
26
        // Four bits per word (see #defines below).
27
        wordsPerBitmapWord = sizeof(void*)*8/4,
28
        bitShift = sizeof(void*)*8/4,
29
};
30
 
31
// Bits in per-word bitmap.
32
// #defines because enum might not be able to hold the values.
33
//
34
// Each word in the bitmap describes wordsPerBitmapWord words
35
// of heap memory.  There are 4 bitmap bits dedicated to each heap word,
36
// so on a 64-bit system there is one bitmap word per 16 heap words.
37
// The bits in the word are packed together by type first, then by
38
// heap location, so each 64-bit bitmap word consists of, from top to bottom,
39
// the 16 bitSpecial bits for the corresponding heap words, then the 16 bitMarked bits,
40
// then the 16 bitNoPointers/bitBlockBoundary bits, then the 16 bitAllocated bits.
41
// This layout makes it easier to iterate over the bits of a given type.
42
//
43
// The bitmap starts at mheap.arena_start and extends *backward* from
44
// there.  On a 64-bit system the off'th word in the arena is tracked by
45
// the off/16+1'th word before mheap.arena_start.  (On a 32-bit system,
46
// the only difference is that the divisor is 8.)
47
//
48
// To pull out the bits corresponding to a given pointer p, we use:
49
//
50
//      off = p - (uintptr*)mheap.arena_start;  // word offset
51
//      b = (uintptr*)mheap.arena_start - off/wordsPerBitmapWord - 1;
52
//      shift = off % wordsPerBitmapWord
53
//      bits = *b >> shift;
54
//      /* then test bits & bitAllocated, bits & bitMarked, etc. */
55
//
56
#define bitAllocated            ((uintptr)1<<(bitShift*0))
57
#define bitNoPointers           ((uintptr)1<<(bitShift*1))      /* when bitAllocated is set */
58
#define bitMarked               ((uintptr)1<<(bitShift*2))      /* when bitAllocated is set */
59
#define bitSpecial              ((uintptr)1<<(bitShift*3))      /* when bitAllocated is set - has finalizer or being profiled */
60
#define bitBlockBoundary        ((uintptr)1<<(bitShift*1))      /* when bitAllocated is NOT set */
61
 
62
#define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial)
63
 
64
// TODO: Make these per-M.
65
static uint64 nhandoff;
66
 
67
static int32 gctrace;
68
 
69
typedef struct Workbuf Workbuf;
70
struct Workbuf
71
{
72
        Workbuf *next;
73
        uintptr nobj;
74
        byte *obj[512-2];
75
};
76
 
77
typedef struct Finalizer Finalizer;
78
struct Finalizer
79
{
80
        void (*fn)(void*);
81
        void *arg;
82
        const struct __go_func_type *ft;
83
};
84
 
85
typedef struct FinBlock FinBlock;
86
struct FinBlock
87
{
88
        FinBlock *alllink;
89
        FinBlock *next;
90
        int32 cnt;
91
        int32 cap;
92
        Finalizer fin[1];
93
};
94
 
95
 
96
static G *fing;
97
static FinBlock *finq; // list of finalizers that are to be executed
98
static FinBlock *finc; // cache of free blocks
99
static FinBlock *allfin; // list of all blocks
100
static Lock finlock;
101
static int32 fingwait;
102
 
103
static void runfinq(void*);
104
static Workbuf* getempty(Workbuf*);
105
static Workbuf* getfull(Workbuf*);
106
static void     putempty(Workbuf*);
107
static Workbuf* handoff(Workbuf*);
108
 
109
static struct {
110
        Lock fmu;
111
        Workbuf *full;
112
        Lock emu;
113
        Workbuf *empty;
114
        uint32  nproc;
115
        volatile uint32 nwait;
116
        volatile uint32 ndone;
117
        Note    alldone;
118
        Lock    markgate;
119
        Lock    sweepgate;
120
        MSpan   *spans;
121
 
122
        Lock;
123
        byte    *chunk;
124
        uintptr nchunk;
125
} work;
126
 
127
// scanblock scans a block of n bytes starting at pointer b for references
128
// to other objects, scanning any it finds recursively until there are no
129
// unscanned objects left.  Instead of using an explicit recursion, it keeps
130
// a work list in the Workbuf* structures and loops in the main function
131
// body.  Keeping an explicit work list is easier on the stack allocator and
132
// more efficient.
133
static void
134
scanblock(byte *b, int64 n)
135
{
136
        byte *obj, *arena_start, *arena_used, *p;
137
        void **vp;
138
        uintptr size, *bitp, bits, shift, i, j, x, xbits, off, nobj, nproc;
139
        MSpan *s;
140
        PageID k;
141
        void **wp;
142
        Workbuf *wbuf;
143
        bool keepworking;
144
 
145
        if((int64)(uintptr)n != n || n < 0) {
146
                // runtime_printf("scanblock %p %lld\n", b, (long long)n);
147
                runtime_throw("scanblock");
148
        }
149
 
150
        // Memory arena parameters.
151
        arena_start = runtime_mheap.arena_start;
152
        arena_used = runtime_mheap.arena_used;
153
        nproc = work.nproc;
154
 
155
        wbuf = nil;  // current work buffer
156
        wp = nil;  // storage for next queued pointer (write pointer)
157
        nobj = 0;  // number of queued objects
158
 
159
        // Scanblock helpers pass b==nil.
160
        // The main proc needs to return to make more
161
        // calls to scanblock.  But if work.nproc==1 then
162
        // might as well process blocks as soon as we
163
        // have them.
164
        keepworking = b == nil || work.nproc == 1;
165
 
166
        // Align b to a word boundary.
167
        off = (uintptr)b & (PtrSize-1);
168
        if(off != 0) {
169
                b += PtrSize - off;
170
                n -= PtrSize - off;
171
        }
172
 
173
        for(;;) {
174
                // Each iteration scans the block b of length n, queueing pointers in
175
                // the work buffer.
176
                if(Debug > 1)
177
                        runtime_printf("scanblock %p %lld\n", b, (long long) n);
178
 
179
                vp = (void**)b;
180
                n >>= (2+PtrSize/8);  /* n /= PtrSize (4 or 8) */
181
                for(i=0; i<(uintptr)n; i++) {
182
                        obj = (byte*)vp[i];
183
 
184
                        // Words outside the arena cannot be pointers.
185
                        if((byte*)obj < arena_start || (byte*)obj >= arena_used)
186
                                continue;
187
 
188
                        // obj may be a pointer to a live object.
189
                        // Try to find the beginning of the object.
190
 
191
                        // Round down to word boundary.
192
                        obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
193
 
194
                        // Find bits for this word.
195
                        off = (uintptr*)obj - (uintptr*)arena_start;
196
                        bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
197
                        shift = off % wordsPerBitmapWord;
198
                        xbits = *bitp;
199
                        bits = xbits >> shift;
200
 
201
                        // Pointing at the beginning of a block?
202
                        if((bits & (bitAllocated|bitBlockBoundary)) != 0)
203
                                goto found;
204
 
205
                        // Pointing just past the beginning?
206
                        // Scan backward a little to find a block boundary.
207
                        for(j=shift; j-->0; ) {
208
                                if(((xbits>>j) & (bitAllocated|bitBlockBoundary)) != 0) {
209
                                        obj = (byte*)obj - (shift-j)*PtrSize;
210
                                        shift = j;
211
                                        bits = xbits>>shift;
212
                                        goto found;
213
                                }
214
                        }
215
 
216
                        // Otherwise consult span table to find beginning.
217
                        // (Manually inlined copy of MHeap_LookupMaybe.)
218
                        k = (uintptr)obj>>PageShift;
219
                        x = k;
220
                        if(sizeof(void*) == 8)
221
                                x -= (uintptr)arena_start>>PageShift;
222
                        s = runtime_mheap.map[x];
223
                        if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse)
224
                                continue;
225
                        p =  (byte*)((uintptr)s->start<<PageShift);
226
                        if(s->sizeclass == 0) {
227
                                obj = p;
228
                        } else {
229
                                if((byte*)obj >= (byte*)s->limit)
230
                                        continue;
231
                                size = runtime_class_to_size[s->sizeclass];
232
                                int32 i = ((byte*)obj - p)/size;
233
                                obj = p+i*size;
234
                        }
235
 
236
                        // Now that we know the object header, reload bits.
237
                        off = (uintptr*)obj - (uintptr*)arena_start;
238
                        bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
239
                        shift = off % wordsPerBitmapWord;
240
                        xbits = *bitp;
241
                        bits = xbits >> shift;
242
 
243
                found:
244
                        // Now we have bits, bitp, and shift correct for
245
                        // obj pointing at the base of the object.
246
                        // Only care about allocated and not marked.
247
                        if((bits & (bitAllocated|bitMarked)) != bitAllocated)
248
                                continue;
249
                        if(nproc == 1)
250
                                *bitp |= bitMarked<<shift;
251
                        else {
252
                                for(;;) {
253
                                        x = *bitp;
254
                                        if(x & (bitMarked<<shift))
255
                                                goto continue_obj;
256
                                        if(runtime_casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift))))
257
                                                break;
258
                                }
259
                        }
260
 
261
                        // If object has no pointers, don't need to scan further.
262
                        if((bits & bitNoPointers) != 0)
263
                                continue;
264
 
265
                        // If another proc wants a pointer, give it some.
266
                        if(nobj > 4 && work.nwait > 0 && work.full == nil) {
267
                                wbuf->nobj = nobj;
268
                                wbuf = handoff(wbuf);
269
                                nobj = wbuf->nobj;
270
                                wp = (void**)(wbuf->obj + nobj);
271
                        }
272
 
273
                        // If buffer is full, get a new one.
274
                        if(wbuf == nil || nobj >= nelem(wbuf->obj)) {
275
                                if(wbuf != nil)
276
                                        wbuf->nobj = nobj;
277
                                wbuf = getempty(wbuf);
278
                                wp = (void**)(wbuf->obj);
279
                                nobj = 0;
280
                        }
281
                        *wp++ = obj;
282
                        nobj++;
283
                continue_obj:;
284
                }
285
 
286
                // Done scanning [b, b+n).  Prepare for the next iteration of
287
                // the loop by setting b and n to the parameters for the next block.
288
 
289
                // Fetch b from the work buffer.
290
                if(nobj == 0) {
291
                        if(!keepworking) {
292
                                putempty(wbuf);
293
                                return;
294
                        }
295
                        // Emptied our buffer: refill.
296
                        wbuf = getfull(wbuf);
297
                        if(wbuf == nil)
298
                                return;
299
                        nobj = wbuf->nobj;
300
                        wp = (void**)(wbuf->obj + wbuf->nobj);
301
                }
302
                b = *--wp;
303
                nobj--;
304
 
305
                // Ask span about size class.
306
                // (Manually inlined copy of MHeap_Lookup.)
307
                x = (uintptr)b>>PageShift;
308
                if(sizeof(void*) == 8)
309
                        x -= (uintptr)arena_start>>PageShift;
310
                s = runtime_mheap.map[x];
311
                if(s->sizeclass == 0)
312
                        n = s->npages<<PageShift;
313
                else
314
                        n = runtime_class_to_size[s->sizeclass];
315
        }
316
}
317
 
318
// debug_scanblock is the debug copy of scanblock.
319
// it is simpler, slower, single-threaded, recursive,
320
// and uses bitSpecial as the mark bit.
321
static void
322
debug_scanblock(byte *b, int64 n)
323
{
324
        byte *obj, *p;
325
        void **vp;
326
        uintptr size, *bitp, bits, shift, i, xbits, off;
327
        MSpan *s;
328
 
329
        if(!DebugMark)
330
                runtime_throw("debug_scanblock without DebugMark");
331
 
332
        if((int64)(uintptr)n != n || n < 0) {
333
                //runtime_printf("debug_scanblock %p %D\n", b, n);
334
                runtime_throw("debug_scanblock");
335
        }
336
 
337
        // Align b to a word boundary.
338
        off = (uintptr)b & (PtrSize-1);
339
        if(off != 0) {
340
                b += PtrSize - off;
341
                n -= PtrSize - off;
342
        }
343
 
344
        vp = (void**)b;
345
        n /= PtrSize;
346
        for(i=0; i<(uintptr)n; i++) {
347
                obj = (byte*)vp[i];
348
 
349
                // Words outside the arena cannot be pointers.
350
                if((byte*)obj < runtime_mheap.arena_start || (byte*)obj >= runtime_mheap.arena_used)
351
                        continue;
352
 
353
                // Round down to word boundary.
354
                obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
355
 
356
                // Consult span table to find beginning.
357
                s = runtime_MHeap_LookupMaybe(&runtime_mheap, obj);
358
                if(s == nil)
359
                        continue;
360
 
361
 
362
                p =  (byte*)((uintptr)s->start<<PageShift);
363
                if(s->sizeclass == 0) {
364
                        obj = p;
365
                        size = (uintptr)s->npages<<PageShift;
366
                } else {
367
                        if((byte*)obj >= (byte*)s->limit)
368
                                continue;
369
                        size = runtime_class_to_size[s->sizeclass];
370
                        int32 i = ((byte*)obj - p)/size;
371
                        obj = p+i*size;
372
                }
373
 
374
                // Now that we know the object header, reload bits.
375
                off = (uintptr*)obj - (uintptr*)runtime_mheap.arena_start;
376
                bitp = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
377
                shift = off % wordsPerBitmapWord;
378
                xbits = *bitp;
379
                bits = xbits >> shift;
380
 
381
                // Now we have bits, bitp, and shift correct for
382
                // obj pointing at the base of the object.
383
                // If not allocated or already marked, done.
384
                if((bits & bitAllocated) == 0 || (bits & bitSpecial) != 0)  // NOTE: bitSpecial not bitMarked
385
                        continue;
386
                *bitp |= bitSpecial<<shift;
387
                if(!(bits & bitMarked))
388
                        runtime_printf("found unmarked block %p in %p\n", obj, vp+i);
389
 
390
                // If object has no pointers, don't need to scan further.
391
                if((bits & bitNoPointers) != 0)
392
                        continue;
393
 
394
                debug_scanblock(obj, size);
395
        }
396
}
397
 
398
// Get an empty work buffer off the work.empty list,
399
// allocating new buffers as needed.
400
static Workbuf*
401
getempty(Workbuf *b)
402
{
403
        if(work.nproc == 1) {
404
                // Put b on full list.
405
                if(b != nil) {
406
                        b->next = work.full;
407
                        work.full = b;
408
                }
409
                // Grab from empty list if possible.
410
                b = work.empty;
411
                if(b != nil) {
412
                        work.empty = b->next;
413
                        goto haveb;
414
                }
415
        } else {
416
                // Put b on full list.
417
                if(b != nil) {
418
                        runtime_lock(&work.fmu);
419
                        b->next = work.full;
420
                        work.full = b;
421
                        runtime_unlock(&work.fmu);
422
                }
423
                // Grab from empty list if possible.
424
                runtime_lock(&work.emu);
425
                b = work.empty;
426
                if(b != nil)
427
                        work.empty = b->next;
428
                runtime_unlock(&work.emu);
429
                if(b != nil)
430
                        goto haveb;
431
        }
432
 
433
        // Need to allocate.
434
        runtime_lock(&work);
435
        if(work.nchunk < sizeof *b) {
436
                work.nchunk = 1<<20;
437
                work.chunk = runtime_SysAlloc(work.nchunk);
438
        }
439
        b = (Workbuf*)work.chunk;
440
        work.chunk += sizeof *b;
441
        work.nchunk -= sizeof *b;
442
        runtime_unlock(&work);
443
 
444
haveb:
445
        b->nobj = 0;
446
        return b;
447
}
448
 
449
static void
450
putempty(Workbuf *b)
451
{
452
        if(b == nil)
453
                return;
454
 
455
        if(work.nproc == 1) {
456
                b->next = work.empty;
457
                work.empty = b;
458
                return;
459
        }
460
 
461
        runtime_lock(&work.emu);
462
        b->next = work.empty;
463
        work.empty = b;
464
        runtime_unlock(&work.emu);
465
}
466
 
467
// Get a full work buffer off the work.full list, or return nil.
468
static Workbuf*
469
getfull(Workbuf *b)
470
{
471
        int32 i;
472
        Workbuf *b1;
473
 
474
        if(work.nproc == 1) {
475
                // Put b on empty list.
476
                if(b != nil) {
477
                        b->next = work.empty;
478
                        work.empty = b;
479
                }
480
                // Grab from full list if possible.
481
                // Since work.nproc==1, no one else is
482
                // going to give us work.
483
                b = work.full;
484
                if(b != nil)
485
                        work.full = b->next;
486
                return b;
487
        }
488
 
489
        putempty(b);
490
 
491
        // Grab buffer from full list if possible.
492
        for(;;) {
493
                b1 = work.full;
494
                if(b1 == nil)
495
                        break;
496
                runtime_lock(&work.fmu);
497
                if(work.full != nil) {
498
                        b1 = work.full;
499
                        work.full = b1->next;
500
                        runtime_unlock(&work.fmu);
501
                        return b1;
502
                }
503
                runtime_unlock(&work.fmu);
504
        }
505
 
506
        runtime_xadd(&work.nwait, +1);
507
        for(i=0;; i++) {
508
                b1 = work.full;
509
                if(b1 != nil) {
510
                        runtime_lock(&work.fmu);
511
                        if(work.full != nil) {
512
                                runtime_xadd(&work.nwait, -1);
513
                                b1 = work.full;
514
                                work.full = b1->next;
515
                                runtime_unlock(&work.fmu);
516
                                return b1;
517
                        }
518
                        runtime_unlock(&work.fmu);
519
                        continue;
520
                }
521
                if(work.nwait == work.nproc)
522
                        return nil;
523
                if(i < 10)
524
                        runtime_procyield(20);
525
                else if(i < 20)
526
                        runtime_osyield();
527
                else
528
                        runtime_usleep(100);
529
        }
530
}
531
 
532
static Workbuf*
533
handoff(Workbuf *b)
534
{
535
        int32 n;
536
        Workbuf *b1;
537
 
538
        // Make new buffer with half of b's pointers.
539
        b1 = getempty(nil);
540
        n = b->nobj/2;
541
        b->nobj -= n;
542
        b1->nobj = n;
543
        runtime_memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]);
544
        nhandoff += n;
545
 
546
        // Put b on full list - let first half of b get stolen.
547
        runtime_lock(&work.fmu);
548
        b->next = work.full;
549
        work.full = b;
550
        runtime_unlock(&work.fmu);
551
 
552
        return b1;
553
}
554
 
555
// Scanstack calls scanblock on each of gp's stack segments.
556
static void
557
scanstack(void (*scanblock)(byte*, int64), G *gp)
558
{
559
#ifdef USING_SPLIT_STACK
560
        M *mp;
561
        void* sp;
562
        size_t spsize;
563
        void* next_segment;
564
        void* next_sp;
565
        void* initial_sp;
566
 
567
        if(gp == runtime_g()) {
568
                // Scanning our own stack.
569
                sp = __splitstack_find(nil, nil, &spsize, &next_segment,
570
                                       &next_sp, &initial_sp);
571
        } else if((mp = gp->m) != nil && mp->helpgc) {
572
                // gchelper's stack is in active use and has no interesting pointers.
573
                return;
574
        } else {
575
                // Scanning another goroutine's stack.
576
                // The goroutine is usually asleep (the world is stopped).
577
 
578
                // The exception is that if the goroutine is about to enter or might
579
                // have just exited a system call, it may be executing code such
580
                // as schedlock and may have needed to start a new stack segment.
581
                // Use the stack segment and stack pointer at the time of
582
                // the system call instead, since that won't change underfoot.
583
                if(gp->gcstack != nil) {
584
                        sp = gp->gcstack;
585
                        spsize = gp->gcstack_size;
586
                        next_segment = gp->gcnext_segment;
587
                        next_sp = gp->gcnext_sp;
588
                        initial_sp = gp->gcinitial_sp;
589
                } else {
590
                        sp = __splitstack_find_context(&gp->stack_context[0],
591
                                                       &spsize, &next_segment,
592
                                                       &next_sp, &initial_sp);
593
                }
594
        }
595
        if(sp != nil) {
596
                scanblock(sp, spsize);
597
                while((sp = __splitstack_find(next_segment, next_sp,
598
                                              &spsize, &next_segment,
599
                                              &next_sp, &initial_sp)) != nil)
600
                        scanblock(sp, spsize);
601
        }
602
#else
603
        M *mp;
604
        byte* bottom;
605
        byte* top;
606
 
607
        if(gp == runtime_g()) {
608
                // Scanning our own stack.
609
                bottom = (byte*)&gp;
610
        } else if((mp = gp->m) != nil && mp->helpgc) {
611
                // gchelper's stack is in active use and has no interesting pointers.
612
                return;
613
        } else {
614
                // Scanning another goroutine's stack.
615
                // The goroutine is usually asleep (the world is stopped).
616
                bottom = (byte*)gp->gcnext_sp;
617
                if(bottom == nil)
618
                        return;
619
        }
620
        top = (byte*)gp->gcinitial_sp + gp->gcstack_size;
621
        if(top > bottom)
622
                scanblock(bottom, top - bottom);
623
        else
624
                scanblock(top, bottom - top);
625
#endif
626
}
627
 
628
// Markfin calls scanblock on the blocks that have finalizers:
629
// the things pointed at cannot be freed until the finalizers have run.
630
static void
631
markfin(void *v)
632
{
633
        uintptr size;
634
 
635
        size = 0;
636
        if(!runtime_mlookup(v, (byte**)&v, &size, nil) || !runtime_blockspecial(v))
637
                runtime_throw("mark - finalizer inconsistency");
638
 
639
        // do not mark the finalizer block itself.  just mark the things it points at.
640
        scanblock(v, size);
641
}
642
 
643
struct root_list {
644
        struct root_list *next;
645
        struct root {
646
                void *decl;
647
                size_t size;
648
        } roots[];
649
};
650
 
651
static struct root_list* roots;
652
 
653
void
654
__go_register_gc_roots (struct root_list* r)
655
{
656
        // FIXME: This needs locking if multiple goroutines can call
657
        // dlopen simultaneously.
658
        r->next = roots;
659
        roots = r;
660
}
661
 
662
static void
663
debug_markfin(void *v)
664
{
665
        uintptr size;
666
 
667
        if(!runtime_mlookup(v, (byte**)&v, &size, nil))
668
                runtime_throw("debug_mark - finalizer inconsistency");
669
        debug_scanblock(v, size);
670
}
671
 
672
// Mark
673
static void
674
mark(void (*scan)(byte*, int64))
675
{
676
        struct root_list *pl;
677
        G *gp;
678
        FinBlock *fb;
679
 
680
        // mark data+bss.
681
        for(pl = roots; pl != nil; pl = pl->next) {
682
                struct root* pr = &pl->roots[0];
683
                while(1) {
684
                        void *decl = pr->decl;
685
                        if(decl == nil)
686
                                break;
687
                        scanblock(decl, pr->size);
688
                        pr++;
689
                }
690
        }
691
 
692
        scan((byte*)&runtime_m0, sizeof runtime_m0);
693
        scan((byte*)&runtime_g0, sizeof runtime_g0);
694
        scan((byte*)&runtime_allg, sizeof runtime_allg);
695
        scan((byte*)&runtime_allm, sizeof runtime_allm);
696
        runtime_MProf_Mark(scan);
697
        runtime_time_scan(scan);
698
 
699
        // mark stacks
700
        for(gp=runtime_allg; gp!=nil; gp=gp->alllink) {
701
                switch(gp->status){
702
                default:
703
                        runtime_printf("unexpected G.status %d\n", gp->status);
704
                        runtime_throw("mark - bad status");
705
                case Gdead:
706
                        break;
707
                case Grunning:
708
                        if(gp != runtime_g())
709
                                runtime_throw("mark - world not stopped");
710
                        scanstack(scan, gp);
711
                        break;
712
                case Grunnable:
713
                case Gsyscall:
714
                case Gwaiting:
715
                        scanstack(scan, gp);
716
                        break;
717
                }
718
        }
719
 
720
        // mark things pointed at by objects with finalizers
721
        if(scan == debug_scanblock)
722
                runtime_walkfintab(debug_markfin, scan);
723
        else
724
                runtime_walkfintab(markfin, scan);
725
 
726
        for(fb=allfin; fb; fb=fb->alllink)
727
                scanblock((byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]));
728
 
729
        // in multiproc mode, join in the queued work.
730
        scan(nil, 0);
731
}
732
 
733
static bool
734
handlespecial(byte *p, uintptr size)
735
{
736
        void (*fn)(void*);
737
        const struct __go_func_type *ft;
738
        FinBlock *block;
739
        Finalizer *f;
740
 
741
        if(!runtime_getfinalizer(p, true, &fn, &ft)) {
742
                runtime_setblockspecial(p, false);
743
                runtime_MProf_Free(p, size);
744
                return false;
745
        }
746
 
747
        runtime_lock(&finlock);
748
        if(finq == nil || finq->cnt == finq->cap) {
749
                if(finc == nil) {
750
                        finc = runtime_SysAlloc(PageSize);
751
                        finc->cap = (PageSize - sizeof(FinBlock)) / sizeof(Finalizer) + 1;
752
                        finc->alllink = allfin;
753
                        allfin = finc;
754
                }
755
                block = finc;
756
                finc = block->next;
757
                block->next = finq;
758
                finq = block;
759
        }
760
        f = &finq->fin[finq->cnt];
761
        finq->cnt++;
762
        f->fn = fn;
763
        f->ft = ft;
764
        f->arg = p;
765
        runtime_unlock(&finlock);
766
        return true;
767
}
768
 
769
// Sweep frees or collects finalizers for blocks not marked in the mark phase.
770
// It clears the mark bits in preparation for the next GC round.
771
static void
772
sweep(void)
773
{
774
        M *m;
775
        MSpan *s;
776
        int32 cl, n, npages;
777
        uintptr size;
778
        byte *p;
779
        MCache *c;
780
        byte *arena_start;
781
 
782
        m = runtime_m();
783
        arena_start = runtime_mheap.arena_start;
784
 
785
        for(;;) {
786
                s = work.spans;
787
                if(s == nil)
788
                        break;
789
                if(!runtime_casp(&work.spans, s, s->allnext))
790
                        continue;
791
 
792
                if(s->state != MSpanInUse)
793
                        continue;
794
 
795
                p = (byte*)(s->start << PageShift);
796
                cl = s->sizeclass;
797
                if(cl == 0) {
798
                        size = s->npages<<PageShift;
799
                        n = 1;
800
                } else {
801
                        // Chunk full of small blocks.
802
                        size = runtime_class_to_size[cl];
803
                        npages = runtime_class_to_allocnpages[cl];
804
                        n = (npages << PageShift) / size;
805
                }
806
 
807
                // Sweep through n objects of given size starting at p.
808
                // This thread owns the span now, so it can manipulate
809
                // the block bitmap without atomic operations.
810
                for(; n > 0; n--, p += size) {
811
                        uintptr off, *bitp, shift, bits;
812
 
813
                        off = (uintptr*)p - (uintptr*)arena_start;
814
                        bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
815
                        shift = off % wordsPerBitmapWord;
816
                        bits = *bitp>>shift;
817
 
818
                        if((bits & bitAllocated) == 0)
819
                                continue;
820
 
821
                        if((bits & bitMarked) != 0) {
822
                                if(DebugMark) {
823
                                        if(!(bits & bitSpecial))
824
                                                runtime_printf("found spurious mark on %p\n", p);
825
                                        *bitp &= ~(bitSpecial<<shift);
826
                                }
827
                                *bitp &= ~(bitMarked<<shift);
828
                                continue;
829
                        }
830
 
831
                        // Special means it has a finalizer or is being profiled.
832
                        // In DebugMark mode, the bit has been coopted so
833
                        // we have to assume all blocks are special.
834
                        if(DebugMark || (bits & bitSpecial) != 0) {
835
                                if(handlespecial(p, size))
836
                                        continue;
837
                        }
838
 
839
                        // Mark freed; restore block boundary bit.
840
                        *bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
841
 
842
                        c = m->mcache;
843
                        if(s->sizeclass == 0) {
844
                                // Free large span.
845
                                runtime_unmarkspan(p, 1<<PageShift);
846
                                *(uintptr*)p = 1;       // needs zeroing
847
                                runtime_MHeap_Free(&runtime_mheap, s, 1);
848
                        } else {
849
                                // Free small object.
850
                                if(size > sizeof(uintptr))
851
                                        ((uintptr*)p)[1] = 1;   // mark as "needs to be zeroed"
852
                                c->local_by_size[s->sizeclass].nfree++;
853
                                runtime_MCache_Free(c, p, s->sizeclass, size);
854
                        }
855
                        c->local_alloc -= size;
856
                        c->local_nfree++;
857
                }
858
        }
859
}
860
 
861
void
862
runtime_gchelper(void)
863
{
864
        // Wait until main proc is ready for mark help.
865
        runtime_lock(&work.markgate);
866
        runtime_unlock(&work.markgate);
867
        scanblock(nil, 0);
868
 
869
        // Wait until main proc is ready for sweep help.
870
        runtime_lock(&work.sweepgate);
871
        runtime_unlock(&work.sweepgate);
872
        sweep();
873
 
874
        if(runtime_xadd(&work.ndone, +1) == work.nproc-1)
875
                runtime_notewakeup(&work.alldone);
876
}
877
 
878
// Semaphore, not Lock, so that the goroutine
879
// reschedules when there is contention rather
880
// than spinning.
881
static uint32 gcsema = 1;
882
 
883
// Initialized from $GOGC.  GOGC=off means no gc.
884
//
885
// Next gc is after we've allocated an extra amount of
886
// memory proportional to the amount already in use.
887
// If gcpercent=100 and we're using 4M, we'll gc again
888
// when we get to 8M.  This keeps the gc cost in linear
889
// proportion to the allocation cost.  Adjusting gcpercent
890
// just changes the linear constant (and also the amount of
891
// extra memory used).
892
static int32 gcpercent = -2;
893
 
894
static void
895
stealcache(void)
896
{
897
        M *m;
898
 
899
        for(m=runtime_allm; m; m=m->alllink)
900
                runtime_MCache_ReleaseAll(m->mcache);
901
}
902
 
903
static void
904
cachestats(void)
905
{
906
        M *m;
907
        MCache *c;
908
        uint32 i;
909
        uint64 stacks_inuse;
910
        uint64 stacks_sys;
911
 
912
        stacks_inuse = 0;
913
        stacks_sys = 0;
914
        for(m=runtime_allm; m; m=m->alllink) {
915
                runtime_purgecachedstats(m);
916
                // stacks_inuse += m->stackalloc->inuse;
917
                // stacks_sys += m->stackalloc->sys;
918
                c = m->mcache;
919
                for(i=0; i<nelem(c->local_by_size); i++) {
920
                        mstats.by_size[i].nmalloc += c->local_by_size[i].nmalloc;
921
                        c->local_by_size[i].nmalloc = 0;
922
                        mstats.by_size[i].nfree += c->local_by_size[i].nfree;
923
                        c->local_by_size[i].nfree = 0;
924
                }
925
        }
926
        mstats.stacks_inuse = stacks_inuse;
927
        mstats.stacks_sys = stacks_sys;
928
}
929
 
930
void
931
runtime_gc(int32 force)
932
{
933
        M *m;
934
        int64 t0, t1, t2, t3;
935
        uint64 heap0, heap1, obj0, obj1;
936
        const byte *p;
937
        bool extra;
938
 
939
        // Make sure all registers are saved on stack so that
940
        // scanstack sees them.
941
        __builtin_unwind_init();
942
 
943
        // The gc is turned off (via enablegc) until
944
        // the bootstrap has completed.
945
        // Also, malloc gets called in the guts
946
        // of a number of libraries that might be
947
        // holding locks.  To avoid priority inversion
948
        // problems, don't bother trying to run gc
949
        // while holding a lock.  The next mallocgc
950
        // without a lock will do the gc instead.
951
        m = runtime_m();
952
        if(!mstats.enablegc || m->locks > 0 || runtime_panicking)
953
                return;
954
 
955
        if(gcpercent == -2) {   // first time through
956
                p = runtime_getenv("GOGC");
957
                if(p == nil || p[0] == '\0')
958
                        gcpercent = 100;
959
                else if(runtime_strcmp((const char*)p, "off") == 0)
960
                        gcpercent = -1;
961
                else
962
                        gcpercent = runtime_atoi(p);
963
 
964
                p = runtime_getenv("GOGCTRACE");
965
                if(p != nil)
966
                        gctrace = runtime_atoi(p);
967
        }
968
        if(gcpercent < 0)
969
                return;
970
 
971
        runtime_semacquire(&gcsema);
972
        if(!force && mstats.heap_alloc < mstats.next_gc) {
973
                runtime_semrelease(&gcsema);
974
                return;
975
        }
976
 
977
        t0 = runtime_nanotime();
978
        nhandoff = 0;
979
 
980
        m->gcing = 1;
981
        runtime_stoptheworld();
982
 
983
        cachestats();
984
        heap0 = mstats.heap_alloc;
985
        obj0 = mstats.nmalloc - mstats.nfree;
986
 
987
        runtime_lock(&work.markgate);
988
        runtime_lock(&work.sweepgate);
989
 
990
        extra = false;
991
        work.nproc = 1;
992
        if(runtime_gomaxprocs > 1 && runtime_ncpu > 1) {
993
                runtime_noteclear(&work.alldone);
994
                work.nproc += runtime_helpgc(&extra);
995
        }
996
        work.nwait = 0;
997
        work.ndone = 0;
998
 
999
        runtime_unlock(&work.markgate);  // let the helpers in
1000
        mark(scanblock);
1001
        if(DebugMark)
1002
                mark(debug_scanblock);
1003
        t1 = runtime_nanotime();
1004
 
1005
        work.spans = runtime_mheap.allspans;
1006
        runtime_unlock(&work.sweepgate);  // let the helpers in
1007
        sweep();
1008
        if(work.nproc > 1)
1009
                runtime_notesleep(&work.alldone);
1010
        t2 = runtime_nanotime();
1011
 
1012
        stealcache();
1013
        cachestats();
1014
 
1015
        mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100;
1016
        m->gcing = 0;
1017
 
1018
        m->locks++;     // disable gc during the mallocs in newproc
1019
        if(finq != nil) {
1020
                // kick off or wake up goroutine to run queued finalizers
1021
                if(fing == nil)
1022
                        fing = __go_go(runfinq, nil);
1023
                else if(fingwait) {
1024
                        fingwait = 0;
1025
                        runtime_ready(fing);
1026
                }
1027
        }
1028
        m->locks--;
1029
 
1030
        cachestats();
1031
        heap1 = mstats.heap_alloc;
1032
        obj1 = mstats.nmalloc - mstats.nfree;
1033
 
1034
        t3 = runtime_nanotime();
1035
        mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t3 - t0;
1036
        mstats.pause_total_ns += t3 - t0;
1037
        mstats.numgc++;
1038
        if(mstats.debuggc)
1039
                runtime_printf("pause %llu\n", (unsigned long long)t3-t0);
1040
 
1041
        if(gctrace) {
1042
                runtime_printf("gc%d(%d): %llu+%llu+%llu ms %llu -> %llu MB %llu -> %llu (%llu-%llu) objects %llu handoff\n",
1043
                        mstats.numgc, work.nproc, (unsigned long long)(t1-t0)/1000000, (unsigned long long)(t2-t1)/1000000, (unsigned long long)(t3-t2)/1000000,
1044
                        (unsigned long long)heap0>>20, (unsigned long long)heap1>>20, (unsigned long long)obj0, (unsigned long long)obj1,
1045
                        (unsigned long long) mstats.nmalloc, (unsigned long long)mstats.nfree,
1046
                        (unsigned long long) nhandoff);
1047
        }
1048
 
1049
        runtime_semrelease(&gcsema);
1050
 
1051
        // If we could have used another helper proc, start one now,
1052
        // in the hope that it will be available next time.
1053
        // It would have been even better to start it before the collection,
1054
        // but doing so requires allocating memory, so it's tricky to
1055
        // coordinate.  This lazy approach works out in practice:
1056
        // we don't mind if the first couple gc rounds don't have quite
1057
        // the maximum number of procs.
1058
        runtime_starttheworld(extra);
1059
 
1060
        // give the queued finalizers, if any, a chance to run  
1061
        if(finq != nil)
1062
                runtime_gosched();
1063
 
1064
        if(gctrace > 1 && !force)
1065
                runtime_gc(1);
1066
}
1067
 
1068
void runtime_ReadMemStats(MStats *)
1069
  __asm__("libgo_runtime.runtime.ReadMemStats");
1070
 
1071
void
1072
runtime_ReadMemStats(MStats *stats)
1073
{
1074
        M *m;
1075
 
1076
        // Have to acquire gcsema to stop the world,
1077
        // because stoptheworld can only be used by
1078
        // one goroutine at a time, and there might be
1079
        // a pending garbage collection already calling it.
1080
        runtime_semacquire(&gcsema);
1081
        m = runtime_m();
1082
        m->gcing = 1;
1083
        runtime_stoptheworld();
1084
        cachestats();
1085
        *stats = mstats;
1086
        m->gcing = 0;
1087
        runtime_semrelease(&gcsema);
1088
        runtime_starttheworld(false);
1089
}
1090
 
1091
static void
1092
runfinq(void* dummy __attribute__ ((unused)))
1093
{
1094
        G* gp;
1095
        Finalizer *f;
1096
        FinBlock *fb, *next;
1097
        uint32 i;
1098
 
1099
        gp = runtime_g();
1100
        for(;;) {
1101
                // There's no need for a lock in this section
1102
                // because it only conflicts with the garbage
1103
                // collector, and the garbage collector only
1104
                // runs when everyone else is stopped, and
1105
                // runfinq only stops at the gosched() or
1106
                // during the calls in the for loop.
1107
                fb = finq;
1108
                finq = nil;
1109
                if(fb == nil) {
1110
                        fingwait = 1;
1111
                        gp->status = Gwaiting;
1112
                        gp->waitreason = "finalizer wait";
1113
                        runtime_gosched();
1114
                        continue;
1115
                }
1116
                for(; fb; fb=next) {
1117
                        next = fb->next;
1118
                        for(i=0; i<(uint32)fb->cnt; i++) {
1119
                                void *params[1];
1120
 
1121
                                f = &fb->fin[i];
1122
                                params[0] = &f->arg;
1123
                                runtime_setblockspecial(f->arg, false);
1124
                                reflect_call(f->ft, (void*)f->fn, 0, 0, params, nil);
1125
                                f->fn = nil;
1126
                                f->arg = nil;
1127
                        }
1128
                        fb->cnt = 0;
1129
                        fb->next = finc;
1130
                        finc = fb;
1131
                }
1132
                runtime_gc(1);  // trigger another gc to clean up the finalized objects, if possible
1133
        }
1134
}
1135
 
1136
// mark the block at v of size n as allocated.
1137
// If noptr is true, mark it as having no pointers.
1138
void
1139
runtime_markallocated(void *v, uintptr n, bool noptr)
1140
{
1141
        uintptr *b, obits, bits, off, shift;
1142
 
1143
        // if(0)
1144
                // runtime_printf("markallocated %p+%p\n", v, n);
1145
 
1146
        if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1147
                runtime_throw("markallocated: bad pointer");
1148
 
1149
        off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;  // word offset
1150
        b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1151
        shift = off % wordsPerBitmapWord;
1152
 
1153
        for(;;) {
1154
                obits = *b;
1155
                bits = (obits & ~(bitMask<<shift)) | (bitAllocated<<shift);
1156
                if(noptr)
1157
                        bits |= bitNoPointers<<shift;
1158
                if(runtime_singleproc) {
1159
                        *b = bits;
1160
                        break;
1161
                } else {
1162
                        // more than one goroutine is potentially running: use atomic op
1163
                        if(runtime_casp((void**)b, (void*)obits, (void*)bits))
1164
                                break;
1165
                }
1166
        }
1167
}
1168
 
1169
// mark the block at v of size n as freed.
1170
void
1171
runtime_markfreed(void *v, uintptr n)
1172
{
1173
        uintptr *b, obits, bits, off, shift;
1174
 
1175
        // if(0)
1176
                // runtime_printf("markallocated %p+%p\n", v, n);
1177
 
1178
        if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1179
                runtime_throw("markallocated: bad pointer");
1180
 
1181
        off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;  // word offset
1182
        b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1183
        shift = off % wordsPerBitmapWord;
1184
 
1185
        for(;;) {
1186
                obits = *b;
1187
                bits = (obits & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
1188
                if(runtime_singleproc) {
1189
                        *b = bits;
1190
                        break;
1191
                } else {
1192
                        // more than one goroutine is potentially running: use atomic op
1193
                        if(runtime_casp((void**)b, (void*)obits, (void*)bits))
1194
                                break;
1195
                }
1196
        }
1197
}
1198
 
1199
// check that the block at v of size n is marked freed.
1200
void
1201
runtime_checkfreed(void *v, uintptr n)
1202
{
1203
        uintptr *b, bits, off, shift;
1204
 
1205
        if(!runtime_checking)
1206
                return;
1207
 
1208
        if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1209
                return; // not allocated, so okay
1210
 
1211
        off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;  // word offset
1212
        b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1213
        shift = off % wordsPerBitmapWord;
1214
 
1215
        bits = *b>>shift;
1216
        if((bits & bitAllocated) != 0) {
1217
                runtime_printf("checkfreed %p+%p: off=%p have=%p\n",
1218
                        v, (void*)n, (void*)off, (void*)(bits & bitMask));
1219
                runtime_throw("checkfreed: not freed");
1220
        }
1221
}
1222
 
1223
// mark the span of memory at v as having n blocks of the given size.
1224
// if leftover is true, there is left over space at the end of the span.
1225
void
1226
runtime_markspan(void *v, uintptr size, uintptr n, bool leftover)
1227
{
1228
        uintptr *b, off, shift;
1229
        byte *p;
1230
 
1231
        if((byte*)v+size*n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1232
                runtime_throw("markspan: bad pointer");
1233
 
1234
        p = v;
1235
        if(leftover)    // mark a boundary just past end of last block too
1236
                n++;
1237
        for(; n-- > 0; p += size) {
1238
                // Okay to use non-atomic ops here, because we control
1239
                // the entire span, and each bitmap word has bits for only
1240
                // one span, so no other goroutines are changing these
1241
                // bitmap words.
1242
                off = (uintptr*)p - (uintptr*)runtime_mheap.arena_start;  // word offset
1243
                b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1244
                shift = off % wordsPerBitmapWord;
1245
                *b = (*b & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
1246
        }
1247
}
1248
 
1249
// unmark the span of memory at v of length n bytes.
1250
void
1251
runtime_unmarkspan(void *v, uintptr n)
1252
{
1253
        uintptr *p, *b, off;
1254
 
1255
        if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
1256
                runtime_throw("markspan: bad pointer");
1257
 
1258
        p = v;
1259
        off = p - (uintptr*)runtime_mheap.arena_start;  // word offset
1260
        if(off % wordsPerBitmapWord != 0)
1261
                runtime_throw("markspan: unaligned pointer");
1262
        b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1263
        n /= PtrSize;
1264
        if(n%wordsPerBitmapWord != 0)
1265
                runtime_throw("unmarkspan: unaligned length");
1266
        // Okay to use non-atomic ops here, because we control
1267
        // the entire span, and each bitmap word has bits for only
1268
        // one span, so no other goroutines are changing these
1269
        // bitmap words.
1270
        n /= wordsPerBitmapWord;
1271
        while(n-- > 0)
1272
                *b-- = 0;
1273
}
1274
 
1275
bool
1276
runtime_blockspecial(void *v)
1277
{
1278
        uintptr *b, off, shift;
1279
 
1280
        if(DebugMark)
1281
                return true;
1282
 
1283
        off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;
1284
        b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1285
        shift = off % wordsPerBitmapWord;
1286
 
1287
        return (*b & (bitSpecial<<shift)) != 0;
1288
}
1289
 
1290
void
1291
runtime_setblockspecial(void *v, bool s)
1292
{
1293
        uintptr *b, off, shift, bits, obits;
1294
 
1295
        if(DebugMark)
1296
                return;
1297
 
1298
        off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;
1299
        b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
1300
        shift = off % wordsPerBitmapWord;
1301
 
1302
        for(;;) {
1303
                obits = *b;
1304
                if(s)
1305
                        bits = obits | (bitSpecial<<shift);
1306
                else
1307
                        bits = obits & ~(bitSpecial<<shift);
1308
                if(runtime_singleproc) {
1309
                        *b = bits;
1310
                        break;
1311
                } else {
1312
                        // more than one goroutine is potentially running: use atomic op
1313
                        if(runtime_casp((void**)b, (void*)obits, (void*)bits))
1314
                                break;
1315
                }
1316
        }
1317
}
1318
 
1319
void
1320
runtime_MHeap_MapBits(MHeap *h)
1321
{
1322
        // Caller has added extra mappings to the arena.
1323
        // Add extra mappings of bitmap words as needed.
1324
        // We allocate extra bitmap pieces in chunks of bitmapChunk.
1325
        enum {
1326
                bitmapChunk = 8192
1327
        };
1328
        uintptr n;
1329
 
1330
        n = (h->arena_used - h->arena_start) / wordsPerBitmapWord;
1331
        n = (n+bitmapChunk-1) & ~(bitmapChunk-1);
1332
        if(h->bitmap_mapped >= n)
1333
                return;
1334
 
1335
        runtime_SysMap(h->arena_start - n, n - h->bitmap_mapped);
1336
        h->bitmap_mapped = n;
1337
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.