URL https://opencores.org/ocsvn/or1k/or1k/trunk
Subversion Repositories or1k

[/] [or1k/] [tags/] [LINUX_2_4_26_OR32/] [linux/] [linux-2.4/] [mm/] [page_alloc.c] - Blame information for rev 1279

Go to most recent revision | Details | Compare with Previous | View Log

/*
 *  linux/mm/page_alloc.c
 *
 *  Manages the free list, the system allocates free pages here.
 *  Note that kmalloc() lives in slab.c
 *
 *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
 *  Swap reorganised 29.12.95, Stephen Tweedie
 *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
 *  Reshaped it to be a zoned allocator, Ingo Molnar, Red Hat, 1999
 *  Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999
 *  Zone balancing, Kanoj Sarcar, SGI, Jan 2000
 */
 
#include <linux/config.h>
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/swapctl.h>
#include <linux/interrupt.h>
#include <linux/pagemap.h>
#include <linux/bootmem.h>
#include <linux/slab.h>
#include <linux/module.h>
 
int nr_swap_pages;
int nr_active_pages;
int nr_inactive_pages;
LIST_HEAD(inactive_list);
LIST_HEAD(active_list);
pg_data_t *pgdat_list;
 
/*
 *
 * The zone_table array is used to look up the address of the
 * struct zone corresponding to a given zone number (ZONE_DMA,
 * ZONE_NORMAL, or ZONE_HIGHMEM).
 */
zone_t *zone_table[MAX_NR_ZONES*MAX_NR_NODES];
EXPORT_SYMBOL(zone_table);
 
static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" };
static int zone_balance_ratio[MAX_NR_ZONES] __initdata = { 128, 128, 128, };
static int zone_balance_min[MAX_NR_ZONES] __initdata = { 20 , 20, 20, };
static int zone_balance_max[MAX_NR_ZONES] __initdata = { 255 , 255, 255, };
static int lower_zone_reserve_ratio[MAX_NR_ZONES-1] = { 256, 32 };
 
int vm_gfp_debug = 0;
 
/*
 * Temporary debugging check.
 */
#define BAD_RANGE(zone, page)                                           \
(                                                                       \
        (((page) - mem_map) >= ((zone)->zone_start_mapnr+(zone)->size)) \
        || (((page) - mem_map) < (zone)->zone_start_mapnr)              \
        || ((zone) != page_zone(page))                                  \
)
 
/*
 * Freeing function for a buddy system allocator.
 * Contrary to prior comments, this is *NOT* hairy, and there
 * is no reason for anyone not to understand it.
 *
 * The concept of a buddy system is to maintain direct-mapped tables
 * (containing bit values) for memory blocks of various "orders".
 * The bottom level table contains the map for the smallest allocatable
 * units of memory (here, pages), and each level above it describes
 * pairs of units from the levels below, hence, "buddies".
 * At a high level, all that happens here is marking the table entry
 * at the bottom level available, and propagating the changes upward
 * as necessary, plus some accounting needed to play nicely with other
 * parts of the VM system.
 * At each level, we keep one bit for each pair of blocks, which
 * is set to 1 iff only one of the pair is allocated.  So when we
 * are allocating or freeing one, we can derive the state of the
 * other.  That is, if we allocate a small block, and both were
 * free, the remainder of the region must be split into blocks.
 * If a block is freed, and its buddy is also free, then this
 * triggers coalescing into a block of larger size.
 *
 * -- wli
 */
 
static void FASTCALL(__free_pages_ok (struct page *page, unsigned int order));
static void __free_pages_ok (struct page *page, unsigned int order)
{
        unsigned long index, page_idx, mask, flags;
        free_area_t *area;
        struct page *base;
        zone_t *zone;
 
        /*
         * Yes, think what happens when other parts of the kernel take
         * a reference to a page in order to pin it for io. -ben
         */
        if (PageLRU(page)) {
                if (unlikely(in_interrupt()))
                        BUG();
                lru_cache_del(page);
        }
 
        if (page->buffers)
                BUG();
        if (page->mapping)
                BUG();
        if (!VALID_PAGE(page))
                BUG();
        if (PageLocked(page))
                BUG();
        if (PageActive(page))
                BUG();
        ClearPageReferenced(page);
        ClearPageDirty(page);
 
        if (current->flags & PF_FREE_PAGES)
                goto local_freelist;
 back_local_freelist:
 
        zone = page_zone(page);
 
        mask = (~0UL) << order;
        base = zone->zone_mem_map;
        page_idx = page - base;
        if (page_idx & ~mask)
                BUG();
        index = page_idx >> (1 + order);
 
        area = zone->free_area + order;
 
        spin_lock_irqsave(&zone->lock, flags);
 
        zone->free_pages -= mask;
 
        while (mask + (1 << (MAX_ORDER-1))) {
                struct page *buddy1, *buddy2;
 
                if (area >= zone->free_area + MAX_ORDER)
                        BUG();
                if (!__test_and_change_bit(index, area->map))
                        /*
                         * the buddy page is still allocated.
                         */
                        break;
                /*
                 * Move the buddy up one level.
                 * This code is taking advantage of the identity:
                 *      -mask = 1+~mask
                 */
                buddy1 = base + (page_idx ^ -mask);
                buddy2 = base + page_idx;
                if (BAD_RANGE(zone,buddy1))
                        BUG();
                if (BAD_RANGE(zone,buddy2))
                        BUG();
 
                list_del(&buddy1->list);
                mask <<= 1;
                area++;
                index >>= 1;
                page_idx &= mask;
        }
        list_add(&(base + page_idx)->list, &area->free_list);
 
        spin_unlock_irqrestore(&zone->lock, flags);
        return;
 
 local_freelist:
        if (current->nr_local_pages)
                goto back_local_freelist;
        if (in_interrupt())
                goto back_local_freelist;
 
        list_add(&page->list, &current->local_pages);
        page->index = order;
        current->nr_local_pages++;
}
 
#define MARK_USED(index, order, area) \
        __change_bit((index) >> (1+(order)), (area)->map)
 
static inline struct page * expand (zone_t *zone, struct page *page,
         unsigned long index, int low, int high, free_area_t * area)
{
        unsigned long size = 1 << high;
 
        while (high > low) {
                if (BAD_RANGE(zone,page))
                        BUG();
                area--;
                high--;
                size >>= 1;
                list_add(&(page)->list, &(area)->free_list);
                MARK_USED(index, high, area);
                index += size;
                page += size;
        }
        if (BAD_RANGE(zone,page))
                BUG();
        return page;
}
 
static FASTCALL(struct page * rmqueue(zone_t *zone, unsigned int order));
static struct page * rmqueue(zone_t *zone, unsigned int order)
{
        free_area_t * area = zone->free_area + order;
        unsigned int curr_order = order;
        struct list_head *head, *curr;
        unsigned long flags;
        struct page *page;
 
        spin_lock_irqsave(&zone->lock, flags);
        do {
                head = &area->free_list;
                curr = head->next;
 
                if (curr != head) {
                        unsigned int index;
 
                        page = list_entry(curr, struct page, list);
                        if (BAD_RANGE(zone,page))
                                BUG();
                        list_del(curr);
                        index = page - zone->zone_mem_map;
                        if (curr_order != MAX_ORDER-1)
                                MARK_USED(index, curr_order, area);
                        zone->free_pages -= 1UL << order;
 
                        page = expand(zone, page, index, order, curr_order, area);
                        spin_unlock_irqrestore(&zone->lock, flags);
 
                        set_page_count(page, 1);
                        if (BAD_RANGE(zone,page))
                                BUG();
                        if (PageLRU(page))
                                BUG();
                        if (PageActive(page))
                                BUG();
                        return page;
                }
                curr_order++;
                area++;
        } while (curr_order < MAX_ORDER);
        spin_unlock_irqrestore(&zone->lock, flags);
 
        return NULL;
}
 
#ifndef CONFIG_DISCONTIGMEM
struct page *_alloc_pages(unsigned int gfp_mask, unsigned int order)
{
        return __alloc_pages(gfp_mask, order,
                contig_page_data.node_zonelists+(gfp_mask & GFP_ZONEMASK));
}
#endif
 
static struct page * FASTCALL(balance_classzone(zone_t *, unsigned int, unsigned int, int *));
static struct page * balance_classzone(zone_t * classzone, unsigned int gfp_mask, unsigned int order, int * freed)
{
        struct page * page = NULL;
        int __freed;
 
        if (in_interrupt())
                BUG();
 
        current->allocation_order = order;
        current->flags |= PF_MEMALLOC | PF_FREE_PAGES;
 
        __freed = try_to_free_pages_zone(classzone, gfp_mask);
 
        current->flags &= ~(PF_MEMALLOC | PF_FREE_PAGES);
 
        if (current->nr_local_pages) {
                struct list_head * entry, * local_pages;
                struct page * tmp;
                int nr_pages;
 
                local_pages = &current->local_pages;
 
                if (likely(__freed)) {
                        /* pick from the last inserted so we're lifo */
                        entry = local_pages->next;
                        do {
                                tmp = list_entry(entry, struct page, list);
                                if (tmp->index == order && memclass(page_zone(tmp), classzone)) {
                                        list_del(entry);
                                        current->nr_local_pages--;
                                        set_page_count(tmp, 1);
                                        page = tmp;
 
                                        if (page->buffers)
                                                BUG();
                                        if (page->mapping)
                                                BUG();
                                        if (!VALID_PAGE(page))
                                                BUG();
                                        if (PageLocked(page))
                                                BUG();
                                        if (PageLRU(page))
                                                BUG();
                                        if (PageActive(page))
                                                BUG();
                                        if (PageDirty(page))
                                                BUG();
 
                                        break;
                                }
                        } while ((entry = entry->next) != local_pages);
                }
 
                nr_pages = current->nr_local_pages;
                /* free in reverse order so that the global order will be lifo */
                while ((entry = local_pages->prev) != local_pages) {
                        list_del(entry);
                        tmp = list_entry(entry, struct page, list);
                        __free_pages_ok(tmp, tmp->index);
                        if (!nr_pages--)
                                BUG();
                }
                current->nr_local_pages = 0;
        }
 
        *freed = __freed;
        return page;
}
 
static inline unsigned long zone_free_pages(zone_t * zone, unsigned int order)
{
        long free = zone->free_pages - (1UL << order);
        return free >= 0 ? free : 0;
}
 
/*
 * This is the 'heart' of the zoned buddy allocator:
 */
struct page * __alloc_pages(unsigned int gfp_mask, unsigned int order, zonelist_t *zonelist)
{
        zone_t **zone, * classzone;
        struct page * page;
        int freed, class_idx;
 
        zone = zonelist->zones;
        classzone = *zone;
        class_idx = zone_idx(classzone);
 
        for (;;) {
                zone_t *z = *(zone++);
                if (!z)
                        break;
 
                if (zone_free_pages(z, order) > z->watermarks[class_idx].low) {
                        page = rmqueue(z, order);
                        if (page)
                                return page;
                }
        }
 
        classzone->need_balance = 1;
        mb();
        if (waitqueue_active(&kswapd_wait))
                wake_up_interruptible(&kswapd_wait);
 
        zone = zonelist->zones;
        for (;;) {
                unsigned long min;
                zone_t *z = *(zone++);
                if (!z)
                        break;
 
                min = z->watermarks[class_idx].min;
                if (!(gfp_mask & __GFP_WAIT))
                        min >>= 2;
                if (zone_free_pages(z, order) > min) {
                        page = rmqueue(z, order);
                        if (page)
                                return page;
                }
        }
 
        /* here we're in the low on memory slow path */
 
        if ((current->flags & PF_MEMALLOC) &&
                        (!in_interrupt() || (current->flags & PF_MEMDIE))) {
                zone = zonelist->zones;
                for (;;) {
                        zone_t *z = *(zone++);
                        if (!z)
                                break;
 
                        page = rmqueue(z, order);
                        if (page)
                                return page;
                }
                return NULL;
        }
 
        /* Atomic allocations - we can't balance anything */
        if (!(gfp_mask & __GFP_WAIT))
                goto out;
 
 rebalance:
        page = balance_classzone(classzone, gfp_mask, order, &freed);
        if (page)
                return page;
 
        zone = zonelist->zones;
        if (likely(freed)) {
                for (;;) {
                        zone_t *z = *(zone++);
                        if (!z)
                                break;
 
                        if (zone_free_pages(z, order) > z->watermarks[class_idx].min) {
                                page = rmqueue(z, order);
                                if (page)
                                        return page;
                        }
                }
                goto rebalance;
        } else {
                /*
                 * Check that no other task is been killed meanwhile,
                 * in such a case we can succeed the allocation.
                 */
                for (;;) {
                        zone_t *z = *(zone++);
                        if (!z)
                                break;
 
                        if (zone_free_pages(z, order) > z->watermarks[class_idx].high) {
                                page = rmqueue(z, order);
                                if (page)
                                        return page;
                        }
                }
        }
 
 out:
        printk(KERN_NOTICE "__alloc_pages: %u-order allocation failed (gfp=0x%x/%i)\n",
               order, gfp_mask, !!(current->flags & PF_MEMALLOC));
        if (unlikely(vm_gfp_debug))
                dump_stack();
        return NULL;
}
 
/*
 * Common helper functions.
 */
unsigned long __get_free_pages(unsigned int gfp_mask, unsigned int order)
{
        struct page * page;
 
        page = alloc_pages(gfp_mask, order);
        if (!page)
                return 0;
        return (unsigned long) page_address(page);
}
 
unsigned long get_zeroed_page(unsigned int gfp_mask)
{
        struct page * page;
 
        page = alloc_pages(gfp_mask, 0);
        if (page) {
                void *address = page_address(page);
                clear_page(address);
                return (unsigned long) address;
        }
        return 0;
}
 
void __free_pages(struct page *page, unsigned int order)
{
        if (!PageReserved(page) && put_page_testzero(page))
                __free_pages_ok(page, order);
}
 
void free_pages(unsigned long addr, unsigned int order)
{
        if (addr != 0)
                __free_pages(virt_to_page(addr), order);
}
 
/*
 * Total amount of free (allocatable) RAM:
 */
unsigned int nr_free_pages (void)
{
        unsigned int sum = 0;
        zone_t *zone;
 
        for_each_zone(zone)
                sum += zone->free_pages;
 
        return sum;
}
 
/*
 * Amount of free RAM allocatable as buffer memory:
 */
unsigned int nr_free_buffer_pages (void)
{
        pg_data_t *pgdat;
        unsigned int sum = 0;
        zonelist_t *zonelist;
        zone_t **zonep, *zone;
 
        for_each_pgdat(pgdat) {
                int class_idx;
                zonelist = pgdat->node_zonelists + (GFP_USER & GFP_ZONEMASK);
                zonep = zonelist->zones;
                zone = *zonep;
                class_idx = zone_idx(zone);
 
                sum += zone->nr_cache_pages;
                for (zone = pgdat->node_zones; zone < pgdat->node_zones + MAX_NR_ZONES; zone++) {
                        int free = zone->free_pages - zone->watermarks[class_idx].high;
                        if (free <= 0)
                                continue;
                        sum += free;
                }
        }
 
        return sum;
}
 
#if CONFIG_HIGHMEM
unsigned int nr_free_highpages (void)
{
        pg_data_t *pgdat;
        unsigned int pages = 0;
 
        for_each_pgdat(pgdat)
                pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages;
 
        return pages;
}
 
unsigned int freeable_lowmem(void)
{
        unsigned int pages = 0;
        pg_data_t *pgdat;
 
        for_each_pgdat(pgdat) {
                pages += pgdat->node_zones[ZONE_DMA].free_pages;
                pages += pgdat->node_zones[ZONE_DMA].nr_active_pages;
                pages += pgdat->node_zones[ZONE_DMA].nr_inactive_pages;
                pages += pgdat->node_zones[ZONE_NORMAL].free_pages;
                pages += pgdat->node_zones[ZONE_NORMAL].nr_active_pages;
                pages += pgdat->node_zones[ZONE_NORMAL].nr_inactive_pages;
        }
 
        return pages;
}
#endif
 
#define K(x) ((x) << (PAGE_SHIFT-10))
 
/*
 * Show free area list (used inside shift_scroll-lock stuff)
 * We also calculate the percentage fragmentation. We do this by counting the
 * memory on each free list with the exception of the first item on the list.
 */
void show_free_areas_core(pg_data_t *pgdat)
{
        unsigned int order;
        unsigned type;
        pg_data_t *tmpdat = pgdat;
 
        printk("Free pages:      %6dkB (%6dkB HighMem)\n",
                K(nr_free_pages()),
                K(nr_free_highpages()));
 
        while (tmpdat) {
                zone_t *zone;
                for (zone = tmpdat->node_zones;
                                zone < tmpdat->node_zones + MAX_NR_ZONES; zone++)
                        printk("Zone:%s freepages:%6lukB\n",
                                        zone->name,
                                        K(zone->free_pages));
 
                tmpdat = tmpdat->node_next;
        }
 
        printk("( Active: %d, inactive: %d, free: %d )\n",
               nr_active_pages,
               nr_inactive_pages,
               nr_free_pages());
 
        for (type = 0; type < MAX_NR_ZONES; type++) {
                struct list_head *head, *curr;
                zone_t *zone = pgdat->node_zones + type;
                unsigned long nr, total, flags;
 
                total = 0;
                if (zone->size) {
                        spin_lock_irqsave(&zone->lock, flags);
                        for (order = 0; order < MAX_ORDER; order++) {
                                head = &(zone->free_area + order)->free_list;
                                curr = head;
                                nr = 0;
                                for (;;) {
                                        if ((curr = curr->next) == head)
                                                break;
                                        nr++;
                                }
                                total += nr * (1 << order);
                                printk("%lu*%lukB ", nr, K(1UL) << order);
                        }
                        spin_unlock_irqrestore(&zone->lock, flags);
                }
                printk("= %lukB)\n", K(total));
        }
 
#ifdef SWAP_CACHE_INFO
        show_swap_cache_info();
#endif  
}
 
void show_free_areas(void)
{
        show_free_areas_core(pgdat_list);
}
 
/*
 * Builds allocation fallback zone lists.
 */
static inline void build_zonelists(pg_data_t *pgdat)
{
        int i, j, k;
 
        for (i = 0; i <= GFP_ZONEMASK; i++) {
                zonelist_t *zonelist;
                zone_t *zone;
 
                zonelist = pgdat->node_zonelists + i;
                memset(zonelist, 0, sizeof(*zonelist));
 
                j = 0;
                k = ZONE_NORMAL;
                if (i & __GFP_HIGHMEM)
                        k = ZONE_HIGHMEM;
                if (i & __GFP_DMA)
                        k = ZONE_DMA;
 
                switch (k) {
                        default:
                                BUG();
                        /*
                         * fallthrough:
                         */
                        case ZONE_HIGHMEM:
                                zone = pgdat->node_zones + ZONE_HIGHMEM;
                                if (zone->size) {
#ifndef CONFIG_HIGHMEM
                                        BUG();
#endif
                                        zonelist->zones[j++] = zone;
                                }
                        case ZONE_NORMAL:
                                zone = pgdat->node_zones + ZONE_NORMAL;
                                if (zone->size)
                                        zonelist->zones[j++] = zone;
                        case ZONE_DMA:
                                zone = pgdat->node_zones + ZONE_DMA;
                                if (zone->size)
                                        zonelist->zones[j++] = zone;
                }
                zonelist->zones[j++] = NULL;
        }
}
 
/*
 * Helper functions to size the waitqueue hash table.
 * Essentially these want to choose hash table sizes sufficiently
 * large so that collisions trying to wait on pages are rare.
 * But in fact, the number of active page waitqueues on typical
 * systems is ridiculously low, less than 200. So this is even
 * conservative, even though it seems large.
 *
 * The constant PAGES_PER_WAITQUEUE specifies the ratio of pages to
 * waitqueues, i.e. the size of the waitq table given the number of pages.
 */
#define PAGES_PER_WAITQUEUE     256
 
static inline unsigned long wait_table_size(unsigned long pages)
{
        unsigned long size = 1;
 
        pages /= PAGES_PER_WAITQUEUE;
 
        while (size < pages)
                size <<= 1;
 
        /*
         * Once we have dozens or even hundreds of threads sleeping
         * on IO we've got bigger problems than wait queue collision.
         * Limit the size of the wait table to a reasonable size.
         */
        size = min(size, 4096UL);
 
        return size;
}
 
/*
 * This is an integer logarithm so that shifts can be used later
 * to extract the more random high bits from the multiplicative
 * hash function before the remainder is taken.
 */
static inline unsigned long wait_table_bits(unsigned long size)
{
        return ffz(~size);
}
 
#define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
 
/*
 * Set up the zone data structures:
 *   - mark all pages reserved
 *   - mark all memory queues empty
 *   - clear the memory bitmaps
 */
void __init free_area_init_core(int nid, pg_data_t *pgdat, struct page **gmap,
        unsigned long *zones_size, unsigned long zone_start_paddr,
        unsigned long *zholes_size, struct page *lmem_map)
{
        unsigned long i, j;
        unsigned long map_size;
        unsigned long totalpages, offset, realtotalpages;
        const unsigned long zone_required_alignment = 1UL << (MAX_ORDER-1);
 
        if (zone_start_paddr & ~PAGE_MASK)
                BUG();
 
        totalpages = 0;
        for (i = 0; i < MAX_NR_ZONES; i++) {
                unsigned long size = zones_size[i];
                totalpages += size;
        }
        realtotalpages = totalpages;
        if (zholes_size)
                for (i = 0; i < MAX_NR_ZONES; i++)
                        realtotalpages -= zholes_size[i];
 
        printk("On node %d totalpages: %lu\n", nid, realtotalpages);
 
        /*
         * Some architectures (with lots of mem and discontinous memory
         * maps) have to search for a good mem_map area:
         * For discontigmem, the conceptual mem map array starts from
         * PAGE_OFFSET, we need to align the actual array onto a mem map
         * boundary, so that MAP_NR works.
         */
        map_size = (totalpages + 1)*sizeof(struct page);
        if (lmem_map == (struct page *)0) {
                lmem_map = (struct page *) alloc_bootmem_node(pgdat, map_size);
                lmem_map = (struct page *)(PAGE_OFFSET +
                        MAP_ALIGN((unsigned long)lmem_map - PAGE_OFFSET));
        }
        *gmap = pgdat->node_mem_map = lmem_map;
        pgdat->node_size = totalpages;
        pgdat->node_start_paddr = zone_start_paddr;
        pgdat->node_start_mapnr = (lmem_map - mem_map);
        pgdat->nr_zones = 0;
 
        offset = lmem_map - mem_map;
        for (j = 0; j < MAX_NR_ZONES; j++) {
                zone_t *zone = pgdat->node_zones + j;
                unsigned long mask;
                unsigned long size, realsize;
                int idx;
 
                zone_table[nid * MAX_NR_ZONES + j] = zone;
                realsize = size = zones_size[j];
                if (zholes_size)
                        realsize -= zholes_size[j];
 
                printk("zone(%lu): %lu pages.\n", j, size);
                zone->size = size;
                zone->realsize = realsize;
                zone->name = zone_names[j];
                zone->lock = SPIN_LOCK_UNLOCKED;
                zone->zone_pgdat = pgdat;
                zone->free_pages = 0;
                zone->need_balance = 0;
                 zone->nr_active_pages = zone->nr_inactive_pages = 0;
 
 
                if (!size)
                        continue;
 
                /*
                 * The per-page waitqueue mechanism uses hashed waitqueues
                 * per zone.
                 */
                zone->wait_table_size = wait_table_size(size);
                zone->wait_table_shift =
                        BITS_PER_LONG - wait_table_bits(zone->wait_table_size);
                zone->wait_table = (wait_queue_head_t *)
                        alloc_bootmem_node(pgdat, zone->wait_table_size
                                                * sizeof(wait_queue_head_t));
 
                for(i = 0; i < zone->wait_table_size; ++i)
                        init_waitqueue_head(zone->wait_table + i);
 
                pgdat->nr_zones = j+1;
 
                mask = (realsize / zone_balance_ratio[j]);
                if (mask < zone_balance_min[j])
                        mask = zone_balance_min[j];
                else if (mask > zone_balance_max[j])
                        mask = zone_balance_max[j];
                zone->watermarks[j].min = mask;
                zone->watermarks[j].low = mask*2;
                zone->watermarks[j].high = mask*3;
                /* now set the watermarks of the lower zones in the "j" classzone */
                for (idx = j-1; idx >= 0; idx--) {
                        zone_t * lower_zone = pgdat->node_zones + idx;
                        unsigned long lower_zone_reserve;
                        if (!lower_zone->size)
                                continue;
 
                        mask = lower_zone->watermarks[idx].min;
                        lower_zone->watermarks[j].min = mask;
                        lower_zone->watermarks[j].low = mask*2;
                        lower_zone->watermarks[j].high = mask*3;
 
                        /* now the brainer part */
                        lower_zone_reserve = realsize / lower_zone_reserve_ratio[idx];
                        lower_zone->watermarks[j].min += lower_zone_reserve;
                        lower_zone->watermarks[j].low += lower_zone_reserve;
                        lower_zone->watermarks[j].high += lower_zone_reserve;
 
                        realsize += lower_zone->realsize;
                }
 
                zone->zone_mem_map = mem_map + offset;
                zone->zone_start_mapnr = offset;
                zone->zone_start_paddr = zone_start_paddr;
 
                if ((zone_start_paddr >> PAGE_SHIFT) & (zone_required_alignment-1))
                        printk("BUG: wrong zone alignment, it will crash\n");
 
                /*
                 * Initially all pages are reserved - free ones are freed
                 * up by free_all_bootmem() once the early boot process is
                 * done. Non-atomic initialization, single-pass.
                 */
                for (i = 0; i < size; i++) {
                        struct page *page = mem_map + offset + i;
                        set_page_zone(page, nid * MAX_NR_ZONES + j);
                        set_page_count(page, 0);
                        SetPageReserved(page);
                        INIT_LIST_HEAD(&page->list);
                        if (j != ZONE_HIGHMEM)
                                set_page_address(page, __va(zone_start_paddr));
                        zone_start_paddr += PAGE_SIZE;
                }
 
                offset += size;
                for (i = 0; ; i++) {
                        unsigned long bitmap_size;
 
                        INIT_LIST_HEAD(&zone->free_area[i].free_list);
                        if (i == MAX_ORDER-1) {
                                zone->free_area[i].map = NULL;
                                break;
                        }
 
                        /*
                         * Page buddy system uses "index >> (i+1)",
                         * where "index" is at most "size-1".
                         *
                         * The extra "+3" is to round down to byte
                         * size (8 bits per byte assumption). Thus
                         * we get "(size-1) >> (i+4)" as the last byte
                         * we can access.
                         *
                         * The "+1" is because we want to round the
                         * byte allocation up rather than down. So
                         * we should have had a "+7" before we shifted
                         * down by three. Also, we have to add one as
                         * we actually _use_ the last bit (it's [0,n]
                         * inclusive, not [0,n[).
                         *
                         * So we actually had +7+1 before we shift
                         * down by 3. But (n+8) >> 3 == (n >> 3) + 1
                         * (modulo overflows, which we do not have).
                         *
                         * Finally, we LONG_ALIGN because all bitmap
                         * operations are on longs.
                         */
                        bitmap_size = (size-1) >> (i+4);
                        bitmap_size = LONG_ALIGN(bitmap_size+1);
                        zone->free_area[i].map =
                          (unsigned long *) alloc_bootmem_node(pgdat, bitmap_size);
                }
        }
        build_zonelists(pgdat);
}
 
void __init free_area_init(unsigned long *zones_size)
{
        free_area_init_core(0, &contig_page_data, &mem_map, zones_size, 0, 0, 0);
}
 
static int __init setup_mem_frac(char *str)
{
        int j = 0;
 
        while (get_option(&str, &zone_balance_ratio[j++]) == 2);
        printk("setup_mem_frac: ");
        for (j = 0; j < MAX_NR_ZONES; j++) printk("%d  ", zone_balance_ratio[j]);
        printk("\n");
        return 1;
}
 
__setup("memfrac=", setup_mem_frac);
 
static int __init setup_lower_zone_reserve(char *str)
{
        int j = 0;
 
        while (get_option(&str, &lower_zone_reserve_ratio[j++]) == 2);
        printk("setup_lower_zone_reserve: ");
        for (j = 0; j < MAX_NR_ZONES-1; j++) printk("%d  ", lower_zone_reserve_ratio[j]);
        printk("\n");
        return 1;
}
 
__setup("lower_zone_reserve=", setup_lower_zone_reserve);
Browse

Tools

Subversion Repositories or1k

[/] [or1k/] [tags/] [LINUX_2_4_26_OR32/] [linux/] [linux-2.4/] [mm/] [page_alloc.c] - Blame information for rev 1279

Line No.	Rev	Author	Line
1	1275	phoenix	`/*`
2			`* linux/mm/page_alloc.c`
3			`*`
4			`* Manages the free list, the system allocates free pages here.`
5			`* Note that kmalloc() lives in slab.c`
6			`*`
7			`* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds`
8			`* Swap reorganised 29.12.95, Stephen Tweedie`
9			`* Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999`
10			`* Reshaped it to be a zoned allocator, Ingo Molnar, Red Hat, 1999`
11			`* Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999`
12			`* Zone balancing, Kanoj Sarcar, SGI, Jan 2000`
13			`*/`
14
15			`#include <linux/config.h>`
16			`#include <linux/mm.h>`
17			`#include <linux/swap.h>`
18			`#include <linux/swapctl.h>`
19			`#include <linux/interrupt.h>`
20			`#include <linux/pagemap.h>`
21			`#include <linux/bootmem.h>`
22			`#include <linux/slab.h>`
23			`#include <linux/module.h>`
24
25			`int nr_swap_pages;`
26			`int nr_active_pages;`
27			`int nr_inactive_pages;`
28			`LIST_HEAD(inactive_list);`
29			`LIST_HEAD(active_list);`
30			`pg_data_t *pgdat_list;`
31
32			`/*`
33			`*`
34			`* The zone_table array is used to look up the address of the`
35			`* struct zone corresponding to a given zone number (ZONE_DMA,`
36			`* ZONE_NORMAL, or ZONE_HIGHMEM).`
37			`*/`
38			`zone_t zone_table[MAX_NR_ZONESMAX_NR_NODES];`
39			`EXPORT_SYMBOL(zone_table);`
40