OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [tags/] [LINUX_2_4_26_OR32/] [linux/] [linux-2.4/] [mm/] [page_alloc.c] - Blame information for rev 1279

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1275 phoenix
/*
2
 *  linux/mm/page_alloc.c
3
 *
4
 *  Manages the free list, the system allocates free pages here.
5
 *  Note that kmalloc() lives in slab.c
6
 *
7
 *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
8
 *  Swap reorganised 29.12.95, Stephen Tweedie
9
 *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
10
 *  Reshaped it to be a zoned allocator, Ingo Molnar, Red Hat, 1999
11
 *  Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999
12
 *  Zone balancing, Kanoj Sarcar, SGI, Jan 2000
13
 */
14
 
15
#include <linux/config.h>
16
#include <linux/mm.h>
17
#include <linux/swap.h>
18
#include <linux/swapctl.h>
19
#include <linux/interrupt.h>
20
#include <linux/pagemap.h>
21
#include <linux/bootmem.h>
22
#include <linux/slab.h>
23
#include <linux/module.h>
24
 
25
int nr_swap_pages;
26
int nr_active_pages;
27
int nr_inactive_pages;
28
LIST_HEAD(inactive_list);
29
LIST_HEAD(active_list);
30
pg_data_t *pgdat_list;
31
 
32
/*
33
 *
34
 * The zone_table array is used to look up the address of the
35
 * struct zone corresponding to a given zone number (ZONE_DMA,
36
 * ZONE_NORMAL, or ZONE_HIGHMEM).
37
 */
38
zone_t *zone_table[MAX_NR_ZONES*MAX_NR_NODES];
39
EXPORT_SYMBOL(zone_table);
40
 
41
static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" };
42
static int zone_balance_ratio[MAX_NR_ZONES] __initdata = { 128, 128, 128, };
43
static int zone_balance_min[MAX_NR_ZONES] __initdata = { 20 , 20, 20, };
44
static int zone_balance_max[MAX_NR_ZONES] __initdata = { 255 , 255, 255, };
45
static int lower_zone_reserve_ratio[MAX_NR_ZONES-1] = { 256, 32 };
46
 
47
int vm_gfp_debug = 0;
48
 
49
/*
50
 * Temporary debugging check.
51
 */
52
#define BAD_RANGE(zone, page)                                           \
53
(                                                                       \
54
        (((page) - mem_map) >= ((zone)->zone_start_mapnr+(zone)->size)) \
55
        || (((page) - mem_map) < (zone)->zone_start_mapnr)              \
56
        || ((zone) != page_zone(page))                                  \
57
)
58
 
59
/*
60
 * Freeing function for a buddy system allocator.
61
 * Contrary to prior comments, this is *NOT* hairy, and there
62
 * is no reason for anyone not to understand it.
63
 *
64
 * The concept of a buddy system is to maintain direct-mapped tables
65
 * (containing bit values) for memory blocks of various "orders".
66
 * The bottom level table contains the map for the smallest allocatable
67
 * units of memory (here, pages), and each level above it describes
68
 * pairs of units from the levels below, hence, "buddies".
69
 * At a high level, all that happens here is marking the table entry
70
 * at the bottom level available, and propagating the changes upward
71
 * as necessary, plus some accounting needed to play nicely with other
72
 * parts of the VM system.
73
 * At each level, we keep one bit for each pair of blocks, which
74
 * is set to 1 iff only one of the pair is allocated.  So when we
75
 * are allocating or freeing one, we can derive the state of the
76
 * other.  That is, if we allocate a small block, and both were
77
 * free, the remainder of the region must be split into blocks.
78
 * If a block is freed, and its buddy is also free, then this
79
 * triggers coalescing into a block of larger size.
80
 *
81
 * -- wli
82
 */
83
 
84
static void FASTCALL(__free_pages_ok (struct page *page, unsigned int order));
85
static void __free_pages_ok (struct page *page, unsigned int order)
86
{
87
        unsigned long index, page_idx, mask, flags;
88
        free_area_t *area;
89
        struct page *base;
90
        zone_t *zone;
91
 
92
        /*
93
         * Yes, think what happens when other parts of the kernel take
94
         * a reference to a page in order to pin it for io. -ben
95
         */
96
        if (PageLRU(page)) {
97
                if (unlikely(in_interrupt()))
98
                        BUG();
99
                lru_cache_del(page);
100
        }
101
 
102
        if (page->buffers)
103
                BUG();
104
        if (page->mapping)
105
                BUG();
106
        if (!VALID_PAGE(page))
107
                BUG();
108
        if (PageLocked(page))
109
                BUG();
110
        if (PageActive(page))
111
                BUG();
112
        ClearPageReferenced(page);
113
        ClearPageDirty(page);
114
 
115
        if (current->flags & PF_FREE_PAGES)
116
                goto local_freelist;
117
 back_local_freelist:
118
 
119
        zone = page_zone(page);
120
 
121
        mask = (~0UL) << order;
122
        base = zone->zone_mem_map;
123
        page_idx = page - base;
124
        if (page_idx & ~mask)
125
                BUG();
126
        index = page_idx >> (1 + order);
127
 
128
        area = zone->free_area + order;
129
 
130
        spin_lock_irqsave(&zone->lock, flags);
131
 
132
        zone->free_pages -= mask;
133
 
134
        while (mask + (1 << (MAX_ORDER-1))) {
135
                struct page *buddy1, *buddy2;
136
 
137
                if (area >= zone->free_area + MAX_ORDER)
138
                        BUG();
139
                if (!__test_and_change_bit(index, area->map))
140
                        /*
141
                         * the buddy page is still allocated.
142
                         */
143
                        break;
144
                /*
145
                 * Move the buddy up one level.
146
                 * This code is taking advantage of the identity:
147
                 *      -mask = 1+~mask
148
                 */
149
                buddy1 = base + (page_idx ^ -mask);
150
                buddy2 = base + page_idx;
151
                if (BAD_RANGE(zone,buddy1))
152
                        BUG();
153
                if (BAD_RANGE(zone,buddy2))
154
                        BUG();
155
 
156
                list_del(&buddy1->list);
157
                mask <<= 1;
158
                area++;
159
                index >>= 1;
160
                page_idx &= mask;
161
        }
162
        list_add(&(base + page_idx)->list, &area->free_list);
163
 
164
        spin_unlock_irqrestore(&zone->lock, flags);
165
        return;
166
 
167
 local_freelist:
168
        if (current->nr_local_pages)
169
                goto back_local_freelist;
170
        if (in_interrupt())
171
                goto back_local_freelist;
172
 
173
        list_add(&page->list, &current->local_pages);
174
        page->index = order;
175
        current->nr_local_pages++;
176
}
177
 
178
#define MARK_USED(index, order, area) \
179
        __change_bit((index) >> (1+(order)), (area)->map)
180
 
181
static inline struct page * expand (zone_t *zone, struct page *page,
182
         unsigned long index, int low, int high, free_area_t * area)
183
{
184
        unsigned long size = 1 << high;
185
 
186
        while (high > low) {
187
                if (BAD_RANGE(zone,page))
188
                        BUG();
189
                area--;
190
                high--;
191
                size >>= 1;
192
                list_add(&(page)->list, &(area)->free_list);
193
                MARK_USED(index, high, area);
194
                index += size;
195
                page += size;
196
        }
197
        if (BAD_RANGE(zone,page))
198
                BUG();
199
        return page;
200
}
201
 
202
static FASTCALL(struct page * rmqueue(zone_t *zone, unsigned int order));
203
static struct page * rmqueue(zone_t *zone, unsigned int order)
204
{
205
        free_area_t * area = zone->free_area + order;
206
        unsigned int curr_order = order;
207
        struct list_head *head, *curr;
208
        unsigned long flags;
209
        struct page *page;
210
 
211
        spin_lock_irqsave(&zone->lock, flags);
212
        do {
213
                head = &area->free_list;
214
                curr = head->next;
215
 
216
                if (curr != head) {
217
                        unsigned int index;
218
 
219
                        page = list_entry(curr, struct page, list);
220
                        if (BAD_RANGE(zone,page))
221
                                BUG();
222
                        list_del(curr);
223
                        index = page - zone->zone_mem_map;
224
                        if (curr_order != MAX_ORDER-1)
225
                                MARK_USED(index, curr_order, area);
226
                        zone->free_pages -= 1UL << order;
227
 
228
                        page = expand(zone, page, index, order, curr_order, area);
229
                        spin_unlock_irqrestore(&zone->lock, flags);
230
 
231
                        set_page_count(page, 1);
232
                        if (BAD_RANGE(zone,page))
233
                                BUG();
234
                        if (PageLRU(page))
235
                                BUG();
236
                        if (PageActive(page))
237
                                BUG();
238
                        return page;
239
                }
240
                curr_order++;
241
                area++;
242
        } while (curr_order < MAX_ORDER);
243
        spin_unlock_irqrestore(&zone->lock, flags);
244
 
245
        return NULL;
246
}
247
 
248
#ifndef CONFIG_DISCONTIGMEM
249
struct page *_alloc_pages(unsigned int gfp_mask, unsigned int order)
250
{
251
        return __alloc_pages(gfp_mask, order,
252
                contig_page_data.node_zonelists+(gfp_mask & GFP_ZONEMASK));
253
}
254
#endif
255
 
256
static struct page * FASTCALL(balance_classzone(zone_t *, unsigned int, unsigned int, int *));
257
static struct page * balance_classzone(zone_t * classzone, unsigned int gfp_mask, unsigned int order, int * freed)
258
{
259
        struct page * page = NULL;
260
        int __freed;
261
 
262
        if (in_interrupt())
263
                BUG();
264
 
265
        current->allocation_order = order;
266
        current->flags |= PF_MEMALLOC | PF_FREE_PAGES;
267
 
268
        __freed = try_to_free_pages_zone(classzone, gfp_mask);
269
 
270
        current->flags &= ~(PF_MEMALLOC | PF_FREE_PAGES);
271
 
272
        if (current->nr_local_pages) {
273
                struct list_head * entry, * local_pages;
274
                struct page * tmp;
275
                int nr_pages;
276
 
277
                local_pages = &current->local_pages;
278
 
279
                if (likely(__freed)) {
280
                        /* pick from the last inserted so we're lifo */
281
                        entry = local_pages->next;
282
                        do {
283
                                tmp = list_entry(entry, struct page, list);
284
                                if (tmp->index == order && memclass(page_zone(tmp), classzone)) {
285
                                        list_del(entry);
286
                                        current->nr_local_pages--;
287
                                        set_page_count(tmp, 1);
288
                                        page = tmp;
289
 
290
                                        if (page->buffers)
291
                                                BUG();
292
                                        if (page->mapping)
293
                                                BUG();
294
                                        if (!VALID_PAGE(page))
295
                                                BUG();
296
                                        if (PageLocked(page))
297
                                                BUG();
298
                                        if (PageLRU(page))
299
                                                BUG();
300
                                        if (PageActive(page))
301
                                                BUG();
302
                                        if (PageDirty(page))
303
                                                BUG();
304
 
305
                                        break;
306
                                }
307
                        } while ((entry = entry->next) != local_pages);
308
                }
309
 
310
                nr_pages = current->nr_local_pages;
311
                /* free in reverse order so that the global order will be lifo */
312
                while ((entry = local_pages->prev) != local_pages) {
313
                        list_del(entry);
314
                        tmp = list_entry(entry, struct page, list);
315
                        __free_pages_ok(tmp, tmp->index);
316
                        if (!nr_pages--)
317
                                BUG();
318
                }
319
                current->nr_local_pages = 0;
320
        }
321
 
322
        *freed = __freed;
323
        return page;
324
}
325
 
326
static inline unsigned long zone_free_pages(zone_t * zone, unsigned int order)
327
{
328
        long free = zone->free_pages - (1UL << order);
329
        return free >= 0 ? free : 0;
330
}
331
 
332
/*
333
 * This is the 'heart' of the zoned buddy allocator:
334
 */
335
struct page * __alloc_pages(unsigned int gfp_mask, unsigned int order, zonelist_t *zonelist)
336
{
337
        zone_t **zone, * classzone;
338
        struct page * page;
339
        int freed, class_idx;
340
 
341
        zone = zonelist->zones;
342
        classzone = *zone;
343
        class_idx = zone_idx(classzone);
344
 
345
        for (;;) {
346
                zone_t *z = *(zone++);
347
                if (!z)
348
                        break;
349
 
350
                if (zone_free_pages(z, order) > z->watermarks[class_idx].low) {
351
                        page = rmqueue(z, order);
352
                        if (page)
353
                                return page;
354
                }
355
        }
356
 
357
        classzone->need_balance = 1;
358
        mb();
359
        if (waitqueue_active(&kswapd_wait))
360
                wake_up_interruptible(&kswapd_wait);
361
 
362
        zone = zonelist->zones;
363
        for (;;) {
364
                unsigned long min;
365
                zone_t *z = *(zone++);
366
                if (!z)
367
                        break;
368
 
369
                min = z->watermarks[class_idx].min;
370
                if (!(gfp_mask & __GFP_WAIT))
371
                        min >>= 2;
372
                if (zone_free_pages(z, order) > min) {
373
                        page = rmqueue(z, order);
374
                        if (page)
375
                                return page;
376
                }
377
        }
378
 
379
        /* here we're in the low on memory slow path */
380
 
381
        if ((current->flags & PF_MEMALLOC) &&
382
                        (!in_interrupt() || (current->flags & PF_MEMDIE))) {
383
                zone = zonelist->zones;
384
                for (;;) {
385
                        zone_t *z = *(zone++);
386
                        if (!z)
387
                                break;
388
 
389
                        page = rmqueue(z, order);
390
                        if (page)
391
                                return page;
392
                }
393
                return NULL;
394
        }
395
 
396
        /* Atomic allocations - we can't balance anything */
397
        if (!(gfp_mask & __GFP_WAIT))
398
                goto out;
399
 
400
 rebalance:
401
        page = balance_classzone(classzone, gfp_mask, order, &freed);
402
        if (page)
403
                return page;
404
 
405
        zone = zonelist->zones;
406
        if (likely(freed)) {
407
                for (;;) {
408
                        zone_t *z = *(zone++);
409
                        if (!z)
410
                                break;
411
 
412
                        if (zone_free_pages(z, order) > z->watermarks[class_idx].min) {
413
                                page = rmqueue(z, order);
414
                                if (page)
415
                                        return page;
416
                        }
417
                }
418
                goto rebalance;
419
        } else {
420
                /*
421
                 * Check that no other task is been killed meanwhile,
422
                 * in such a case we can succeed the allocation.
423
                 */
424
                for (;;) {
425
                        zone_t *z = *(zone++);
426
                        if (!z)
427
                                break;
428
 
429
                        if (zone_free_pages(z, order) > z->watermarks[class_idx].high) {
430
                                page = rmqueue(z, order);
431
                                if (page)
432
                                        return page;
433
                        }
434
                }
435
        }
436
 
437
 out:
438
        printk(KERN_NOTICE "__alloc_pages: %u-order allocation failed (gfp=0x%x/%i)\n",
439
               order, gfp_mask, !!(current->flags & PF_MEMALLOC));
440
        if (unlikely(vm_gfp_debug))
441
                dump_stack();
442
        return NULL;
443
}
444
 
445
/*
446
 * Common helper functions.
447
 */
448
unsigned long __get_free_pages(unsigned int gfp_mask, unsigned int order)
449
{
450
        struct page * page;
451
 
452
        page = alloc_pages(gfp_mask, order);
453
        if (!page)
454
                return 0;
455
        return (unsigned long) page_address(page);
456
}
457
 
458
unsigned long get_zeroed_page(unsigned int gfp_mask)
459
{
460
        struct page * page;
461
 
462
        page = alloc_pages(gfp_mask, 0);
463
        if (page) {
464
                void *address = page_address(page);
465
                clear_page(address);
466
                return (unsigned long) address;
467
        }
468
        return 0;
469
}
470
 
471
void __free_pages(struct page *page, unsigned int order)
472
{
473
        if (!PageReserved(page) && put_page_testzero(page))
474
                __free_pages_ok(page, order);
475
}
476
 
477
void free_pages(unsigned long addr, unsigned int order)
478
{
479
        if (addr != 0)
480
                __free_pages(virt_to_page(addr), order);
481
}
482
 
483
/*
484
 * Total amount of free (allocatable) RAM:
485
 */
486
unsigned int nr_free_pages (void)
487
{
488
        unsigned int sum = 0;
489
        zone_t *zone;
490
 
491
        for_each_zone(zone)
492
                sum += zone->free_pages;
493
 
494
        return sum;
495
}
496
 
497
/*
498
 * Amount of free RAM allocatable as buffer memory:
499
 */
500
unsigned int nr_free_buffer_pages (void)
501
{
502
        pg_data_t *pgdat;
503
        unsigned int sum = 0;
504
        zonelist_t *zonelist;
505
        zone_t **zonep, *zone;
506
 
507
        for_each_pgdat(pgdat) {
508
                int class_idx;
509
                zonelist = pgdat->node_zonelists + (GFP_USER & GFP_ZONEMASK);
510
                zonep = zonelist->zones;
511
                zone = *zonep;
512
                class_idx = zone_idx(zone);
513
 
514
                sum += zone->nr_cache_pages;
515
                for (zone = pgdat->node_zones; zone < pgdat->node_zones + MAX_NR_ZONES; zone++) {
516
                        int free = zone->free_pages - zone->watermarks[class_idx].high;
517
                        if (free <= 0)
518
                                continue;
519
                        sum += free;
520
                }
521
        }
522
 
523
        return sum;
524
}
525
 
526
#if CONFIG_HIGHMEM
527
unsigned int nr_free_highpages (void)
528
{
529
        pg_data_t *pgdat;
530
        unsigned int pages = 0;
531
 
532
        for_each_pgdat(pgdat)
533
                pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages;
534
 
535
        return pages;
536
}
537
 
538
unsigned int freeable_lowmem(void)
539
{
540
        unsigned int pages = 0;
541
        pg_data_t *pgdat;
542
 
543
        for_each_pgdat(pgdat) {
544
                pages += pgdat->node_zones[ZONE_DMA].free_pages;
545
                pages += pgdat->node_zones[ZONE_DMA].nr_active_pages;
546
                pages += pgdat->node_zones[ZONE_DMA].nr_inactive_pages;
547
                pages += pgdat->node_zones[ZONE_NORMAL].free_pages;
548
                pages += pgdat->node_zones[ZONE_NORMAL].nr_active_pages;
549
                pages += pgdat->node_zones[ZONE_NORMAL].nr_inactive_pages;
550
        }
551
 
552
        return pages;
553
}
554
#endif
555
 
556
#define K(x) ((x) << (PAGE_SHIFT-10))
557
 
558
/*
559
 * Show free area list (used inside shift_scroll-lock stuff)
560
 * We also calculate the percentage fragmentation. We do this by counting the
561
 * memory on each free list with the exception of the first item on the list.
562
 */
563
void show_free_areas_core(pg_data_t *pgdat)
564
{
565
        unsigned int order;
566
        unsigned type;
567
        pg_data_t *tmpdat = pgdat;
568
 
569
        printk("Free pages:      %6dkB (%6dkB HighMem)\n",
570
                K(nr_free_pages()),
571
                K(nr_free_highpages()));
572
 
573
        while (tmpdat) {
574
                zone_t *zone;
575
                for (zone = tmpdat->node_zones;
576
                                zone < tmpdat->node_zones + MAX_NR_ZONES; zone++)
577
                        printk("Zone:%s freepages:%6lukB\n",
578
                                        zone->name,
579
                                        K(zone->free_pages));
580
 
581
                tmpdat = tmpdat->node_next;
582
        }
583
 
584
        printk("( Active: %d, inactive: %d, free: %d )\n",
585
               nr_active_pages,
586
               nr_inactive_pages,
587
               nr_free_pages());
588
 
589
        for (type = 0; type < MAX_NR_ZONES; type++) {
590
                struct list_head *head, *curr;
591
                zone_t *zone = pgdat->node_zones + type;
592
                unsigned long nr, total, flags;
593
 
594
                total = 0;
595
                if (zone->size) {
596
                        spin_lock_irqsave(&zone->lock, flags);
597
                        for (order = 0; order < MAX_ORDER; order++) {
598
                                head = &(zone->free_area + order)->free_list;
599
                                curr = head;
600
                                nr = 0;
601
                                for (;;) {
602
                                        if ((curr = curr->next) == head)
603
                                                break;
604
                                        nr++;
605
                                }
606
                                total += nr * (1 << order);
607
                                printk("%lu*%lukB ", nr, K(1UL) << order);
608
                        }
609
                        spin_unlock_irqrestore(&zone->lock, flags);
610
                }
611
                printk("= %lukB)\n", K(total));
612
        }
613
 
614
#ifdef SWAP_CACHE_INFO
615
        show_swap_cache_info();
616
#endif  
617
}
618
 
619
void show_free_areas(void)
620
{
621
        show_free_areas_core(pgdat_list);
622
}
623
 
624
/*
625
 * Builds allocation fallback zone lists.
626
 */
627
static inline void build_zonelists(pg_data_t *pgdat)
628
{
629
        int i, j, k;
630
 
631
        for (i = 0; i <= GFP_ZONEMASK; i++) {
632
                zonelist_t *zonelist;
633
                zone_t *zone;
634
 
635
                zonelist = pgdat->node_zonelists + i;
636
                memset(zonelist, 0, sizeof(*zonelist));
637
 
638
                j = 0;
639
                k = ZONE_NORMAL;
640
                if (i & __GFP_HIGHMEM)
641
                        k = ZONE_HIGHMEM;
642
                if (i & __GFP_DMA)
643
                        k = ZONE_DMA;
644
 
645
                switch (k) {
646
                        default:
647
                                BUG();
648
                        /*
649
                         * fallthrough:
650
                         */
651
                        case ZONE_HIGHMEM:
652
                                zone = pgdat->node_zones + ZONE_HIGHMEM;
653
                                if (zone->size) {
654
#ifndef CONFIG_HIGHMEM
655
                                        BUG();
656
#endif
657
                                        zonelist->zones[j++] = zone;
658
                                }
659
                        case ZONE_NORMAL:
660
                                zone = pgdat->node_zones + ZONE_NORMAL;
661
                                if (zone->size)
662
                                        zonelist->zones[j++] = zone;
663
                        case ZONE_DMA:
664
                                zone = pgdat->node_zones + ZONE_DMA;
665
                                if (zone->size)
666
                                        zonelist->zones[j++] = zone;
667
                }
668
                zonelist->zones[j++] = NULL;
669
        }
670
}
671
 
672
/*
673
 * Helper functions to size the waitqueue hash table.
674
 * Essentially these want to choose hash table sizes sufficiently
675
 * large so that collisions trying to wait on pages are rare.
676
 * But in fact, the number of active page waitqueues on typical
677
 * systems is ridiculously low, less than 200. So this is even
678
 * conservative, even though it seems large.
679
 *
680
 * The constant PAGES_PER_WAITQUEUE specifies the ratio of pages to
681
 * waitqueues, i.e. the size of the waitq table given the number of pages.
682
 */
683
#define PAGES_PER_WAITQUEUE     256
684
 
685
static inline unsigned long wait_table_size(unsigned long pages)
686
{
687
        unsigned long size = 1;
688
 
689
        pages /= PAGES_PER_WAITQUEUE;
690
 
691
        while (size < pages)
692
                size <<= 1;
693
 
694
        /*
695
         * Once we have dozens or even hundreds of threads sleeping
696
         * on IO we've got bigger problems than wait queue collision.
697
         * Limit the size of the wait table to a reasonable size.
698
         */
699
        size = min(size, 4096UL);
700
 
701
        return size;
702
}
703
 
704
/*
705
 * This is an integer logarithm so that shifts can be used later
706
 * to extract the more random high bits from the multiplicative
707
 * hash function before the remainder is taken.
708
 */
709
static inline unsigned long wait_table_bits(unsigned long size)
710
{
711
        return ffz(~size);
712
}
713
 
714
#define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
715
 
716
/*
717
 * Set up the zone data structures:
718
 *   - mark all pages reserved
719
 *   - mark all memory queues empty
720
 *   - clear the memory bitmaps
721
 */
722
void __init free_area_init_core(int nid, pg_data_t *pgdat, struct page **gmap,
723
        unsigned long *zones_size, unsigned long zone_start_paddr,
724
        unsigned long *zholes_size, struct page *lmem_map)
725
{
726
        unsigned long i, j;
727
        unsigned long map_size;
728
        unsigned long totalpages, offset, realtotalpages;
729
        const unsigned long zone_required_alignment = 1UL << (MAX_ORDER-1);
730
 
731
        if (zone_start_paddr & ~PAGE_MASK)
732
                BUG();
733
 
734
        totalpages = 0;
735
        for (i = 0; i < MAX_NR_ZONES; i++) {
736
                unsigned long size = zones_size[i];
737
                totalpages += size;
738
        }
739
        realtotalpages = totalpages;
740
        if (zholes_size)
741
                for (i = 0; i < MAX_NR_ZONES; i++)
742
                        realtotalpages -= zholes_size[i];
743
 
744
        printk("On node %d totalpages: %lu\n", nid, realtotalpages);
745
 
746
        /*
747
         * Some architectures (with lots of mem and discontinous memory
748
         * maps) have to search for a good mem_map area:
749
         * For discontigmem, the conceptual mem map array starts from
750
         * PAGE_OFFSET, we need to align the actual array onto a mem map
751
         * boundary, so that MAP_NR works.
752
         */
753
        map_size = (totalpages + 1)*sizeof(struct page);
754
        if (lmem_map == (struct page *)0) {
755
                lmem_map = (struct page *) alloc_bootmem_node(pgdat, map_size);
756
                lmem_map = (struct page *)(PAGE_OFFSET +
757
                        MAP_ALIGN((unsigned long)lmem_map - PAGE_OFFSET));
758
        }
759
        *gmap = pgdat->node_mem_map = lmem_map;
760
        pgdat->node_size = totalpages;
761
        pgdat->node_start_paddr = zone_start_paddr;
762
        pgdat->node_start_mapnr = (lmem_map - mem_map);
763
        pgdat->nr_zones = 0;
764
 
765
        offset = lmem_map - mem_map;
766
        for (j = 0; j < MAX_NR_ZONES; j++) {
767
                zone_t *zone = pgdat->node_zones + j;
768
                unsigned long mask;
769
                unsigned long size, realsize;
770
                int idx;
771
 
772
                zone_table[nid * MAX_NR_ZONES + j] = zone;
773
                realsize = size = zones_size[j];
774
                if (zholes_size)
775
                        realsize -= zholes_size[j];
776
 
777
                printk("zone(%lu): %lu pages.\n", j, size);
778
                zone->size = size;
779
                zone->realsize = realsize;
780
                zone->name = zone_names[j];
781
                zone->lock = SPIN_LOCK_UNLOCKED;
782
                zone->zone_pgdat = pgdat;
783
                zone->free_pages = 0;
784
                zone->need_balance = 0;
785
                 zone->nr_active_pages = zone->nr_inactive_pages = 0;
786
 
787
 
788
                if (!size)
789
                        continue;
790
 
791
                /*
792
                 * The per-page waitqueue mechanism uses hashed waitqueues
793
                 * per zone.
794
                 */
795
                zone->wait_table_size = wait_table_size(size);
796
                zone->wait_table_shift =
797
                        BITS_PER_LONG - wait_table_bits(zone->wait_table_size);
798
                zone->wait_table = (wait_queue_head_t *)
799
                        alloc_bootmem_node(pgdat, zone->wait_table_size
800
                                                * sizeof(wait_queue_head_t));
801
 
802
                for(i = 0; i < zone->wait_table_size; ++i)
803
                        init_waitqueue_head(zone->wait_table + i);
804
 
805
                pgdat->nr_zones = j+1;
806
 
807
                mask = (realsize / zone_balance_ratio[j]);
808
                if (mask < zone_balance_min[j])
809
                        mask = zone_balance_min[j];
810
                else if (mask > zone_balance_max[j])
811
                        mask = zone_balance_max[j];
812
                zone->watermarks[j].min = mask;
813
                zone->watermarks[j].low = mask*2;
814
                zone->watermarks[j].high = mask*3;
815
                /* now set the watermarks of the lower zones in the "j" classzone */
816
                for (idx = j-1; idx >= 0; idx--) {
817
                        zone_t * lower_zone = pgdat->node_zones + idx;
818
                        unsigned long lower_zone_reserve;
819
                        if (!lower_zone->size)
820
                                continue;
821
 
822
                        mask = lower_zone->watermarks[idx].min;
823
                        lower_zone->watermarks[j].min = mask;
824
                        lower_zone->watermarks[j].low = mask*2;
825
                        lower_zone->watermarks[j].high = mask*3;
826
 
827
                        /* now the brainer part */
828
                        lower_zone_reserve = realsize / lower_zone_reserve_ratio[idx];
829
                        lower_zone->watermarks[j].min += lower_zone_reserve;
830
                        lower_zone->watermarks[j].low += lower_zone_reserve;
831
                        lower_zone->watermarks[j].high += lower_zone_reserve;
832
 
833
                        realsize += lower_zone->realsize;
834
                }
835
 
836
                zone->zone_mem_map = mem_map + offset;
837
                zone->zone_start_mapnr = offset;
838
                zone->zone_start_paddr = zone_start_paddr;
839
 
840
                if ((zone_start_paddr >> PAGE_SHIFT) & (zone_required_alignment-1))
841
                        printk("BUG: wrong zone alignment, it will crash\n");
842
 
843
                /*
844
                 * Initially all pages are reserved - free ones are freed
845
                 * up by free_all_bootmem() once the early boot process is
846
                 * done. Non-atomic initialization, single-pass.
847
                 */
848
                for (i = 0; i < size; i++) {
849
                        struct page *page = mem_map + offset + i;
850
                        set_page_zone(page, nid * MAX_NR_ZONES + j);
851
                        set_page_count(page, 0);
852
                        SetPageReserved(page);
853
                        INIT_LIST_HEAD(&page->list);
854
                        if (j != ZONE_HIGHMEM)
855
                                set_page_address(page, __va(zone_start_paddr));
856
                        zone_start_paddr += PAGE_SIZE;
857
                }
858
 
859
                offset += size;
860
                for (i = 0; ; i++) {
861
                        unsigned long bitmap_size;
862
 
863
                        INIT_LIST_HEAD(&zone->free_area[i].free_list);
864
                        if (i == MAX_ORDER-1) {
865
                                zone->free_area[i].map = NULL;
866
                                break;
867
                        }
868
 
869
                        /*
870
                         * Page buddy system uses "index >> (i+1)",
871
                         * where "index" is at most "size-1".
872
                         *
873
                         * The extra "+3" is to round down to byte
874
                         * size (8 bits per byte assumption). Thus
875
                         * we get "(size-1) >> (i+4)" as the last byte
876
                         * we can access.
877
                         *
878
                         * The "+1" is because we want to round the
879
                         * byte allocation up rather than down. So
880
                         * we should have had a "+7" before we shifted
881
                         * down by three. Also, we have to add one as
882
                         * we actually _use_ the last bit (it's [0,n]
883
                         * inclusive, not [0,n[).
884
                         *
885
                         * So we actually had +7+1 before we shift
886
                         * down by 3. But (n+8) >> 3 == (n >> 3) + 1
887
                         * (modulo overflows, which we do not have).
888
                         *
889
                         * Finally, we LONG_ALIGN because all bitmap
890
                         * operations are on longs.
891
                         */
892
                        bitmap_size = (size-1) >> (i+4);
893
                        bitmap_size = LONG_ALIGN(bitmap_size+1);
894
                        zone->free_area[i].map =
895
                          (unsigned long *) alloc_bootmem_node(pgdat, bitmap_size);
896
                }
897
        }
898
        build_zonelists(pgdat);
899
}
900
 
901
void __init free_area_init(unsigned long *zones_size)
902
{
903
        free_area_init_core(0, &contig_page_data, &mem_map, zones_size, 0, 0, 0);
904
}
905
 
906
static int __init setup_mem_frac(char *str)
907
{
908
        int j = 0;
909
 
910
        while (get_option(&str, &zone_balance_ratio[j++]) == 2);
911
        printk("setup_mem_frac: ");
912
        for (j = 0; j < MAX_NR_ZONES; j++) printk("%d  ", zone_balance_ratio[j]);
913
        printk("\n");
914
        return 1;
915
}
916
 
917
__setup("memfrac=", setup_mem_frac);
918
 
919
static int __init setup_lower_zone_reserve(char *str)
920
{
921
        int j = 0;
922
 
923
        while (get_option(&str, &lower_zone_reserve_ratio[j++]) == 2);
924
        printk("setup_lower_zone_reserve: ");
925
        for (j = 0; j < MAX_NR_ZONES-1; j++) printk("%d  ", lower_zone_reserve_ratio[j]);
926
        printk("\n");
927
        return 1;
928
}
929
 
930
__setup("lower_zone_reserve=", setup_lower_zone_reserve);

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.