URL https://opencores.org/ocsvn/or1k_soc_on_altera_embedded_dev_kit/or1k_soc_on_altera_embedded_dev_kit/trunk

Subversion Repositories or1k_soc_on_altera_embedded_dev_kit

[/] [or1k_soc_on_altera_embedded_dev_kit/] [tags/] [linux-2.6/] [linux-2.6.24_orig/] [mm/] [memory_hotplug.c] - Blame information for rev 8

Go to most recent revision | Details | Compare with Previous | View Log


/*
 *  linux/mm/memory_hotplug.c
 *
 *  Copyright (C)
 */
 
#include <linux/stddef.h>
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/interrupt.h>
#include <linux/pagemap.h>
#include <linux/bootmem.h>
#include <linux/compiler.h>
#include <linux/module.h>
#include <linux/pagevec.h>
#include <linux/writeback.h>
#include <linux/slab.h>
#include <linux/sysctl.h>
#include <linux/cpu.h>
#include <linux/memory.h>
#include <linux/memory_hotplug.h>
#include <linux/highmem.h>
#include <linux/vmalloc.h>
#include <linux/ioport.h>
#include <linux/cpuset.h>
#include <linux/delay.h>
#include <linux/migrate.h>
#include <linux/page-isolation.h>
 
#include <asm/tlbflush.h>
 
/* add this memory to iomem resource */
static struct resource *register_memory_resource(u64 start, u64 size)
{
        struct resource *res;
        res = kzalloc(sizeof(struct resource), GFP_KERNEL);
        BUG_ON(!res);
 
        res->name = "System RAM";
        res->start = start;
        res->end = start + size - 1;
        res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
        if (request_resource(&iomem_resource, res) < 0) {
                printk("System RAM resource %llx - %llx cannot be added\n",
                (unsigned long long)res->start, (unsigned long long)res->end);
                kfree(res);
                res = NULL;
        }
        return res;
}
 
static void release_memory_resource(struct resource *res)
{
        if (!res)
                return;
        release_resource(res);
        kfree(res);
        return;
}
 
 
#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
static int __add_zone(struct zone *zone, unsigned long phys_start_pfn)
{
        struct pglist_data *pgdat = zone->zone_pgdat;
        int nr_pages = PAGES_PER_SECTION;
        int nid = pgdat->node_id;
        int zone_type;
 
        zone_type = zone - pgdat->node_zones;
        if (!zone->wait_table) {
                int ret = 0;
                ret = init_currently_empty_zone(zone, phys_start_pfn,
                                                nr_pages, MEMMAP_HOTPLUG);
                if (ret < 0)
                        return ret;
        }
        memmap_init_zone(nr_pages, nid, zone_type,
                         phys_start_pfn, MEMMAP_HOTPLUG);
        return 0;
}
 
static int __add_section(struct zone *zone, unsigned long phys_start_pfn)
{
        int nr_pages = PAGES_PER_SECTION;
        int ret;
 
        if (pfn_valid(phys_start_pfn))
                return -EEXIST;
 
        ret = sparse_add_one_section(zone, phys_start_pfn, nr_pages);
 
        if (ret < 0)
                return ret;
 
        ret = __add_zone(zone, phys_start_pfn);
 
        if (ret < 0)
                return ret;
 
        return register_new_memory(__pfn_to_section(phys_start_pfn));
}
 
/*
 * Reasonably generic function for adding memory.  It is
 * expected that archs that support memory hotplug will
 * call this function after deciding the zone to which to
 * add the new pages.
 */
int __add_pages(struct zone *zone, unsigned long phys_start_pfn,
                 unsigned long nr_pages)
{
        unsigned long i;
        int err = 0;
        int start_sec, end_sec;
        /* during initialize mem_map, align hot-added range to section */
        start_sec = pfn_to_section_nr(phys_start_pfn);
        end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1);
 
        for (i = start_sec; i <= end_sec; i++) {
                err = __add_section(zone, i << PFN_SECTION_SHIFT);
 
                /*
                 * EEXIST is finally dealt with by ioresource collision
                 * check. see add_memory() => register_memory_resource()
                 * Warning will be printed if there is collision.
                 */
                if (err && (err != -EEXIST))
                        break;
                err = 0;
        }
 
        return err;
}
EXPORT_SYMBOL_GPL(__add_pages);
 
static void grow_zone_span(struct zone *zone,
                unsigned long start_pfn, unsigned long end_pfn)
{
        unsigned long old_zone_end_pfn;
 
        zone_span_writelock(zone);
 
        old_zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages;
        if (start_pfn < zone->zone_start_pfn)
                zone->zone_start_pfn = start_pfn;
 
        zone->spanned_pages = max(old_zone_end_pfn, end_pfn) -
                                zone->zone_start_pfn;
 
        zone_span_writeunlock(zone);
}
 
static void grow_pgdat_span(struct pglist_data *pgdat,
                unsigned long start_pfn, unsigned long end_pfn)
{
        unsigned long old_pgdat_end_pfn =
                pgdat->node_start_pfn + pgdat->node_spanned_pages;
 
        if (start_pfn < pgdat->node_start_pfn)
                pgdat->node_start_pfn = start_pfn;
 
        pgdat->node_spanned_pages = max(old_pgdat_end_pfn, end_pfn) -
                                        pgdat->node_start_pfn;
}
 
static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages,
                        void *arg)
{
        unsigned long i;
        unsigned long onlined_pages = *(unsigned long *)arg;
        struct page *page;
        if (PageReserved(pfn_to_page(start_pfn)))
                for (i = 0; i < nr_pages; i++) {
                        page = pfn_to_page(start_pfn + i);
                        online_page(page);
                        onlined_pages++;
                }
        *(unsigned long *)arg = onlined_pages;
        return 0;
}
 
 
int online_pages(unsigned long pfn, unsigned long nr_pages)
{
        unsigned long flags;
        unsigned long onlined_pages = 0;
        struct zone *zone;
        int need_zonelists_rebuild = 0;
        int nid;
        int ret;
        struct memory_notify arg;
 
        arg.start_pfn = pfn;
        arg.nr_pages = nr_pages;
        arg.status_change_nid = -1;
 
        nid = page_to_nid(pfn_to_page(pfn));
        if (node_present_pages(nid) == 0)
                arg.status_change_nid = nid;
 
        ret = memory_notify(MEM_GOING_ONLINE, &arg);
        ret = notifier_to_errno(ret);
        if (ret) {
                memory_notify(MEM_CANCEL_ONLINE, &arg);
                return ret;
        }
        /*
         * This doesn't need a lock to do pfn_to_page().
         * The section can't be removed here because of the
         * memory_block->state_sem.
         */
        zone = page_zone(pfn_to_page(pfn));
        pgdat_resize_lock(zone->zone_pgdat, &flags);
        grow_zone_span(zone, pfn, pfn + nr_pages);
        grow_pgdat_span(zone->zone_pgdat, pfn, pfn + nr_pages);
        pgdat_resize_unlock(zone->zone_pgdat, &flags);
 
        /*
         * If this zone is not populated, then it is not in zonelist.
         * This means the page allocator ignores this zone.
         * So, zonelist must be updated after online.
         */
        if (!populated_zone(zone))
                need_zonelists_rebuild = 1;
 
        walk_memory_resource(pfn, nr_pages, &onlined_pages,
                online_pages_range);
        zone->present_pages += onlined_pages;
        zone->zone_pgdat->node_present_pages += onlined_pages;
 
        setup_per_zone_pages_min();
        if (onlined_pages) {
                kswapd_run(zone_to_nid(zone));
                node_set_state(zone_to_nid(zone), N_HIGH_MEMORY);
        }
 
        if (need_zonelists_rebuild)
                build_all_zonelists();
        vm_total_pages = nr_free_pagecache_pages();
        writeback_set_ratelimit();
 
        if (onlined_pages)
                memory_notify(MEM_ONLINE, &arg);
 
        return 0;
}
#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
 
static pg_data_t *hotadd_new_pgdat(int nid, u64 start)
{
        struct pglist_data *pgdat;
        unsigned long zones_size[MAX_NR_ZONES] = {0};
        unsigned long zholes_size[MAX_NR_ZONES] = {0};
        unsigned long start_pfn = start >> PAGE_SHIFT;
 
        pgdat = arch_alloc_nodedata(nid);
        if (!pgdat)
                return NULL;
 
        arch_refresh_nodedata(nid, pgdat);
 
        /* we can use NODE_DATA(nid) from here */
 
        /* init node's zones as empty zones, we don't have any present pages.*/
        free_area_init_node(nid, pgdat, zones_size, start_pfn, zholes_size);
 
        return pgdat;
}
 
static void rollback_node_hotadd(int nid, pg_data_t *pgdat)
{
        arch_refresh_nodedata(nid, NULL);
        arch_free_nodedata(pgdat);
        return;
}
 
 
int add_memory(int nid, u64 start, u64 size)
{
        pg_data_t *pgdat = NULL;
        int new_pgdat = 0;
        struct resource *res;
        int ret;
 
        res = register_memory_resource(start, size);
        if (!res)
                return -EEXIST;
 
        if (!node_online(nid)) {
                pgdat = hotadd_new_pgdat(nid, start);
                if (!pgdat)
                        return -ENOMEM;
                new_pgdat = 1;
        }
 
        /* call arch's memory hotadd */
        ret = arch_add_memory(nid, start, size);
 
        if (ret < 0)
                goto error;
 
        /* we online node here. we can't roll back from here. */
        node_set_online(nid);
 
        cpuset_track_online_nodes();
 
        if (new_pgdat) {
                ret = register_one_node(nid);
                /*
                 * If sysfs file of new node can't create, cpu on the node
                 * can't be hot-added. There is no rollback way now.
                 * So, check by BUG_ON() to catch it reluctantly..
                 */
                BUG_ON(ret);
        }
 
        return ret;
error:
        /* rollback pgdat allocation and others */
        if (new_pgdat)
                rollback_node_hotadd(nid, pgdat);
        if (res)
                release_memory_resource(res);
 
        return ret;
}
EXPORT_SYMBOL_GPL(add_memory);
 
#ifdef CONFIG_MEMORY_HOTREMOVE
/*
 * Confirm all pages in a range [start, end) is belongs to the same zone.
 */
static int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn)
{
        unsigned long pfn;
        struct zone *zone = NULL;
        struct page *page;
        int i;
        for (pfn = start_pfn;
             pfn < end_pfn;
             pfn += MAX_ORDER_NR_PAGES) {
                i = 0;
                /* This is just a CONFIG_HOLES_IN_ZONE check.*/
                while ((i < MAX_ORDER_NR_PAGES) && !pfn_valid_within(pfn + i))
                        i++;
                if (i == MAX_ORDER_NR_PAGES)
                        continue;
                page = pfn_to_page(pfn + i);
                if (zone && page_zone(page) != zone)
                        return 0;
                zone = page_zone(page);
        }
        return 1;
}
 
/*
 * Scanning pfn is much easier than scanning lru list.
 * Scan pfn from start to end and Find LRU page.
 */
int scan_lru_pages(unsigned long start, unsigned long end)
{
        unsigned long pfn;
        struct page *page;
        for (pfn = start; pfn < end; pfn++) {
                if (pfn_valid(pfn)) {
                        page = pfn_to_page(pfn);
                        if (PageLRU(page))
                                return pfn;
                }
        }
        return 0;
}
 
static struct page *
hotremove_migrate_alloc(struct page *page,
                        unsigned long private,
                        int **x)
{
        /* This should be improoooooved!! */
        return alloc_page(GFP_HIGHUSER_PAGECACHE);
}
 
 
#define NR_OFFLINE_AT_ONCE_PAGES        (256)
static int
do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
{
        unsigned long pfn;
        struct page *page;
        int move_pages = NR_OFFLINE_AT_ONCE_PAGES;
        int not_managed = 0;
        int ret = 0;
        LIST_HEAD(source);
 
        for (pfn = start_pfn; pfn < end_pfn && move_pages > 0; pfn++) {
                if (!pfn_valid(pfn))
                        continue;
                page = pfn_to_page(pfn);
                if (!page_count(page))
                        continue;
                /*
                 * We can skip free pages. And we can only deal with pages on
                 * LRU.
                 */
                ret = isolate_lru_page(page, &source);
                if (!ret) { /* Success */
                        move_pages--;
                } else {
                        /* Becasue we don't have big zone->lock. we should
                           check this again here. */
                        if (page_count(page))
                                not_managed++;
#ifdef CONFIG_DEBUG_VM
                        printk(KERN_INFO "removing from LRU failed"
                                         " %lx/%d/%lx\n",
                                pfn, page_count(page), page->flags);
#endif
                }
        }
        ret = -EBUSY;
        if (not_managed) {
                if (!list_empty(&source))
                        putback_lru_pages(&source);
                goto out;
        }
        ret = 0;
        if (list_empty(&source))
                goto out;
        /* this function returns # of failed pages */
        ret = migrate_pages(&source, hotremove_migrate_alloc, 0);
 
out:
        return ret;
}
 
/*
 * remove from free_area[] and mark all as Reserved.
 */
static int
offline_isolated_pages_cb(unsigned long start, unsigned long nr_pages,
                        void *data)
{
        __offline_isolated_pages(start, start + nr_pages);
        return 0;
}
 
static void
offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
{
        walk_memory_resource(start_pfn, end_pfn - start_pfn, NULL,
                                offline_isolated_pages_cb);
}
 
/*
 * Check all pages in range, recoreded as memory resource, are isolated.
 */
static int
check_pages_isolated_cb(unsigned long start_pfn, unsigned long nr_pages,
                        void *data)
{
        int ret;
        long offlined = *(long *)data;
        ret = test_pages_isolated(start_pfn, start_pfn + nr_pages);
        offlined = nr_pages;
        if (!ret)
                *(long *)data += offlined;
        return ret;
}
 
static long
check_pages_isolated(unsigned long start_pfn, unsigned long end_pfn)
{
        long offlined = 0;
        int ret;
 
        ret = walk_memory_resource(start_pfn, end_pfn - start_pfn, &offlined,
                        check_pages_isolated_cb);
        if (ret < 0)
                offlined = (long)ret;
        return offlined;
}
 
extern void drain_all_local_pages(void);
 
int offline_pages(unsigned long start_pfn,
                  unsigned long end_pfn, unsigned long timeout)
{
        unsigned long pfn, nr_pages, expire;
        long offlined_pages;
        int ret, drain, retry_max, node;
        struct zone *zone;
        struct memory_notify arg;
 
        BUG_ON(start_pfn >= end_pfn);
        /* at least, alignment against pageblock is necessary */
        if (!IS_ALIGNED(start_pfn, pageblock_nr_pages))
                return -EINVAL;
        if (!IS_ALIGNED(end_pfn, pageblock_nr_pages))
                return -EINVAL;
        /* This makes hotplug much easier...and readable.
           we assume this for now. .*/
        if (!test_pages_in_a_zone(start_pfn, end_pfn))
                return -EINVAL;
 
        zone = page_zone(pfn_to_page(start_pfn));
        node = zone_to_nid(zone);
        nr_pages = end_pfn - start_pfn;
 
        /* set above range as isolated */
        ret = start_isolate_page_range(start_pfn, end_pfn);
        if (ret)
                return ret;
 
        arg.start_pfn = start_pfn;
        arg.nr_pages = nr_pages;
        arg.status_change_nid = -1;
        if (nr_pages >= node_present_pages(node))
                arg.status_change_nid = node;
 
        ret = memory_notify(MEM_GOING_OFFLINE, &arg);
        ret = notifier_to_errno(ret);
        if (ret)
                goto failed_removal;
 
        pfn = start_pfn;
        expire = jiffies + timeout;
        drain = 0;
        retry_max = 5;
repeat:
        /* start memory hot removal */
        ret = -EAGAIN;
        if (time_after(jiffies, expire))
                goto failed_removal;
        ret = -EINTR;
        if (signal_pending(current))
                goto failed_removal;
        ret = 0;
        if (drain) {
                lru_add_drain_all();
                flush_scheduled_work();
                cond_resched();
                drain_all_local_pages();
        }
 
        pfn = scan_lru_pages(start_pfn, end_pfn);
        if (pfn) { /* We have page on LRU */
                ret = do_migrate_range(pfn, end_pfn);
                if (!ret) {
                        drain = 1;
                        goto repeat;
                } else {
                        if (ret < 0)
                                if (--retry_max == 0)
                                        goto failed_removal;
                        yield();
                        drain = 1;
                        goto repeat;
                }
        }
        /* drain all zone's lru pagevec, this is asyncronous... */
        lru_add_drain_all();
        flush_scheduled_work();
        yield();
        /* drain pcp pages , this is synchrouns. */
        drain_all_local_pages();
        /* check again */
        offlined_pages = check_pages_isolated(start_pfn, end_pfn);
        if (offlined_pages < 0) {
                ret = -EBUSY;
                goto failed_removal;
        }
        printk(KERN_INFO "Offlined Pages %ld\n", offlined_pages);
        /* Ok, all of our target is islaoted.
           We cannot do rollback at this point. */
        offline_isolated_pages(start_pfn, end_pfn);
        /* reset pagetype flags and makes migrate type to be MOVABLE */
        undo_isolate_page_range(start_pfn, end_pfn);
        /* removal success */
        zone->present_pages -= offlined_pages;
        zone->zone_pgdat->node_present_pages -= offlined_pages;
        totalram_pages -= offlined_pages;
        num_physpages -= offlined_pages;
 
        vm_total_pages = nr_free_pagecache_pages();
        writeback_set_ratelimit();
 
        memory_notify(MEM_OFFLINE, &arg);
        return 0;
 
failed_removal:
        printk(KERN_INFO "memory offlining %lx to %lx failed\n",
                start_pfn, end_pfn);
        memory_notify(MEM_CANCEL_OFFLINE, &arg);
        /* pushback to free area */
        undo_isolate_page_range(start_pfn, end_pfn);
 
        return ret;
}
#else
int remove_memory(u64 start, u64 size)
{
        return -EINVAL;
}
EXPORT_SYMBOL_GPL(remove_memory);
#endif /* CONFIG_MEMORY_HOTREMOVE */

Browse

Tools

Subversion Repositories or1k_soc_on_altera_embedded_dev_kit

[/] [or1k_soc_on_altera_embedded_dev_kit/] [tags/] [linux-2.6/] [linux-2.6.24_orig/] [mm/] [memory_hotplug.c] - Blame information for rev 8

Line No.	Rev	Author	Line
1	3	xianfeng	`/*`
2			`* linux/mm/memory_hotplug.c`
3			`*`
4			`* Copyright (C)`
5			`*/`
6
7			`#include <linux/stddef.h>`
8			`#include <linux/mm.h>`
9			`#include <linux/swap.h>`
10			`#include <linux/interrupt.h>`
11			`#include <linux/pagemap.h>`
12			`#include <linux/bootmem.h>`
13			`#include <linux/compiler.h>`
14			`#include <linux/module.h>`
15			`#include <linux/pagevec.h>`
16			`#include <linux/writeback.h>`
17			`#include <linux/slab.h>`
18			`#include <linux/sysctl.h>`
19			`#include <linux/cpu.h>`
20			`#include <linux/memory.h>`
21			`#include <linux/memory_hotplug.h>`
22			`#include <linux/highmem.h>`
23			`#include <linux/vmalloc.h>`
24			`#include <linux/ioport.h>`
25			`#include <linux/cpuset.h>`
26			`#include <linux/delay.h>`
27			`#include <linux/migrate.h>`
28			`#include <linux/page-isolation.h>`
29
30			`#include <asm/tlbflush.h>`
31
32			`/* add this memory to iomem resource */`
33			`static struct resource *register_memory_resource(u64 start, u64 size)`
34			`{`
35			`struct resource *res;`
36			`res = kzalloc(sizeof(struct resource), GFP_KERNEL);`
37			`BUG_ON(!res);`
38
39			`res->name = "System RAM";`
40			`res->start = start;`
41			`res->end = start + size - 1;`
42			`res->flags = IORESOURCE_MEM \| IORESOURCE_BUSY;`
43			`if (request_resource(&iomem_resource, res) < 0) {`
44			`printk("System RAM resource %llx - %llx cannot be added\n",`
45			`(unsigned long long)res->start, (unsigned long long)res->end);`
46			`kfree(res);`
47			`res = NULL;`
48			`}`
49			`return res;`
50			`}`
51
52			`static void release_memory_resource(struct resource *res)`
53			`{`
54			`if (!res)`
55			`return;`
56			`release_resource(res);`
57			`kfree(res);`
58			`return;`
59			`}`
60
61
62			`#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE`
63			`static int __add_zone(struct zone *zone, unsigned long phys_start_pfn)`
64			`{`
65			`struct pglist_data *pgdat = zone->zone_pgdat;`
66			`int nr_pages = PAGES_PER_SECTION;`
67			`int nid = pgdat->node_id;`
68			`int zone_type;`
69
70			`zone_type = zone - pgdat->node_zones;`
71			`if (!zone->wait_table) {`
72			`int ret = 0;`
73			`ret = init_currently_empty_zone(zone, phys_start_pfn,`
74			`nr_pages, MEMMAP_HOTPLUG);`
75			`if (ret < 0)`
76			`return ret;`
77			`}`
78			`memmap_init_zone(nr_pages, nid, zone_type,`
79			`phys_start_pfn, MEMMAP_HOTPLUG);`
80			`return 0;`
81			`}`
82
83			`static int __add_section(struct zone *zone, unsigned long phys_start_pfn)`
84			`{`
85			`int nr_pages = PAGES_PER_SECTION;`
86			`int ret;`
87
88			`if (pfn_valid(phys_start_pfn))`
89			`return -EEXIST;`
90
91			`ret = sparse_add_one_section(zone, phys_start_pfn, nr_pages);`
92
93			`if (ret < 0)`
94			`return ret;`
95
96			`ret = __add_zone(zone, phys_start_pfn);`
97
98			`if (ret < 0)`
99			`return ret;`
100
101			`return register_new_memory(__pfn_to_section(phys_start_pfn));`
102			`}`
103
104			`/*`
105			`* Reasonably generic function for adding memory. It is`
106			`* expected that archs that support memory hotplug will`
107			`* call this function after deciding the zone to which to`
108			`* add the new pages.`
109			`*/`
110			`int __add_pages(struct zone *zone, unsigned long phys_start_pfn,`
111			`unsigned long nr_pages)`
112			`{`
113			`unsigned long i;`
114			`int err = 0;`
115			`int start_sec, end_sec;`
116			`/* during initialize mem_map, align hot-added range to section */`
117			`start_sec = pfn_to_section_nr(phys_start_pfn);`
118			`end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1);`
119
120			`for (i = start_sec; i <= end_sec; i++) {`
121			`err = __add_section(zone, i << PFN_SECTION_SHIFT);`
122
123			`/*`
124			`* EEXIST is finally dealt with by ioresource collision`
125			`* check. see add_memory() => register_memory_resource()`
126			`* Warning will be printed if there is collision.`
127			`*/`
128			`if (err && (err != -EEXIST))`
129			`break;`
130			`err = 0;`
131			`}`
132
133			`return err;`
134			`}`
135			`EXPORT_SYMBOL_GPL(__add_pages);`
136
137			`static void grow_zone_span(struct zone *zone,`
138			`unsigned long start_pfn, unsigned long end_pfn)`
139			`{`
140			`unsigned long old_zone_end_pfn;`
141
142			`zone_span_writelock(zone);`
143
144			`old_zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages;`
145			`if (start_pfn < zone->zone_start_pfn)`
146			`zone->zone_start_pfn = start_pfn;`
147
148			`zone->spanned_pages = max(old_zone_end_pfn, end_pfn) -`
149			`zone->zone_start_pfn;`
150
151			`zone_span_writeunlock(zone);`
152			`}`
153
154			`static void grow_pgdat_span(struct pglist_data *pgdat,`
155			`unsigned long start_pfn, unsigned long end_pfn)`
156			`{`
157			`unsigned long old_pgdat_end_pfn =`
158			`pgdat->node_start_pfn + pgdat->node_spanned_pages;`
159
160			`if (start_pfn < pgdat->node_start_pfn)`
161			`pgdat->node_start_pfn = start_pfn;`
162
163			`pgdat->node_spanned_pages = max(old_pgdat_end_pfn, end_pfn) -`
164			`pgdat->node_start_pfn;`
165			`}`
166
167			`static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages,`
168			`void *arg)`
169			`{`
170			`unsigned long i;`
171			`unsigned long onlined_pages = (unsigned long )arg;`
172			`struct page *page;`
173			`if (PageReserved(pfn_to_page(start_pfn)))`
174			`for (i = 0; i < nr_pages; i++) {`
175			`page = pfn_to_page(start_pfn + i);`
176			`online_page(page);`
177			`onlined_pages++;`
178			`}`
179			`(unsigned long )arg = onlined_pages;`
180			`return 0;`
181			`}`
182
183
184			`int online_pages(unsigned long pfn, unsigned long nr_pages)`
185			`{`
186			`unsigned long flags;`
187			`unsigned long onlined_pages = 0;`
188			`struct zone *zone;`
189			`int need_zonelists_rebuild = 0;`
190			`int nid;`
191			`int ret;`
192			`struct memory_notify arg;`
193
194			`arg.start_pfn = pfn;`
195			`arg.nr_pages = nr_pages;`
196			`arg.status_change_nid = -1;`
197
198			`nid = page_to_nid(pfn_to_page(pfn));`
199			`if (node_present_pages(nid) == 0)`
200			`arg.status_change_nid = nid;`
201
202			`ret = memory_notify(MEM_GOING_ONLINE, &arg);`
203			`ret = notifier_to_errno(ret);`
204			`if (ret) {`
205			`memory_notify(MEM_CANCEL_ONLINE, &arg);`
206			`return ret;`
207			`}`
208			`/*`
209			`* This doesn't need a lock to do pfn_to_page().`
210			`* The section can't be removed here because of the`
211			`* memory_block->state_sem.`
212			`*/`
213			`zone = page_zone(pfn_to_page(pfn));`
214			`pgdat_resize_lock(zone->zone_pgdat, &flags);`
215			`grow_zone_span(zone, pfn, pfn + nr_pages);`
216			`grow_pgdat_span(zone->zone_pgdat, pfn, pfn + nr_pages);`
217			`pgdat_resize_unlock(zone->zone_pgdat, &flags);`
218
219			`/*`
220			`* If this zone is not populated, then it is not in zonelist.`
221			`* This means the page allocator ignores this zone.`
222			`* So, zonelist must be updated after online.`
223			`*/`
224			`if (!populated_zone(zone))`
225			`need_zonelists_rebuild = 1;`
226
227			`walk_memory_resource(pfn, nr_pages, &onlined_pages,`
228			`online_pages_range);`
229			`zone->present_pages += onlined_pages;`
230			`zone->zone_pgdat->node_present_pages += onlined_pages;`
231
232			`setup_per_zone_pages_min();`
233			`if (onlined_pages) {`
234			`kswapd_run(zone_to_nid(zone));`
235			`node_set_state(zone_to_nid(zone), N_HIGH_MEMORY);`
236			`}`
237
238			`if (need_zonelists_rebuild)`
239			`build_all_zonelists();`
240			`vm_total_pages = nr_free_pagecache_pages();`
241			`writeback_set_ratelimit();`
242
243			`if (onlined_pages)`
244			`memory_notify(MEM_ONLINE, &arg);`
245
246			`return 0;`
247			`}`
248			`#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */`
249
250			`static pg_data_t *hotadd_new_pgdat(int nid, u64 start)`
251			`{`
252			`struct pglist_data *pgdat;`
253			`unsigned long zones_size[MAX_NR_ZONES] = {0};`
254			`unsigned long zholes_size[MAX_NR_ZONES] = {0};`
255			`unsigned long start_pfn = start >> PAGE_SHIFT;`
256
257			`pgdat = arch_alloc_nodedata(nid);`
258			`if (!pgdat)`
259			`return NULL;`
260
261			`arch_refresh_nodedata(nid, pgdat);`
262
263			`/* we can use NODE_DATA(nid) from here */`
264
265			`/* init node's zones as empty zones, we don't have any present pages.*/`
266			`free_area_init_node(nid, pgdat, zones_size, start_pfn, zholes_size);`
267
268			`return pgdat;`
269			`}`
270
271			`static void rollback_node_hotadd(int nid, pg_data_t *pgdat)`
272			`{`
273			`arch_refresh_nodedata(nid, NULL);`
274			`arch_free_nodedata(pgdat);`
275			`return;`
276			`}`
277
278
279			`int add_memory(int nid, u64 start, u64 size)`
280			`{`
281			`pg_data_t *pgdat = NULL;`
282			`int new_pgdat = 0;`
283			`struct resource *res;`
284			`int ret;`
285
286			`res = register_memory_resource(start, size);`
287			`if (!res)`
288			`return -EEXIST;`
289
290			`if (!node_online(nid)) {`
291			`pgdat = hotadd_new_pgdat(nid, start);`
292			`if (!pgdat)`
293			`return -ENOMEM;`
294			`new_pgdat = 1;`
295			`}`
296
297			`/* call arch's memory hotadd */`
298			`ret = arch_add_memory(nid, start, size);`
299
300			`if (ret < 0)`
301			`goto error;`
302
303			`/* we online node here. we can't roll back from here. */`
304			`node_set_online(nid);`
305
306			`cpuset_track_online_nodes();`
307
308			`if (new_pgdat) {`
309			`ret = register_one_node(nid);`
310			`/*`
311			`* If sysfs file of new node can't create, cpu on the node`
312			`* can't be hot-added. There is no rollback way now.`
313			`* So, check by BUG_ON() to catch it reluctantly..`
314			`*/`
315			`BUG_ON(ret);`
316			`}`
317
318			`return ret;`
319			`error:`
320			`/* rollback pgdat allocation and others */`
321			`if (new_pgdat)`
322			`rollback_node_hotadd(nid, pgdat);`
323			`if (res)`
324			`release_memory_resource(res);`
325
326			`return ret;`
327			`}`
328			`EXPORT_SYMBOL_GPL(add_memory);`
329
330			`#ifdef CONFIG_MEMORY_HOTREMOVE`
331			`/*`
332			`* Confirm all pages in a range [start, end) is belongs to the same zone.`
333			`*/`
334			`static int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn)`
335			`{`
336			`unsigned long pfn;`
337			`struct zone *zone = NULL;`
338			`struct page *page;`
339			`int i;`
340			`for (pfn = start_pfn;`
341			`pfn < end_pfn;`
342			`pfn += MAX_ORDER_NR_PAGES) {`
343			`i = 0;`
344			`/* This is just a CONFIG_HOLES_IN_ZONE check.*/`
345			`while ((i < MAX_ORDER_NR_PAGES) && !pfn_valid_within(pfn + i))`
346			`i++;`
347			`if (i == MAX_ORDER_NR_PAGES)`
348			`continue;`
349			`page = pfn_to_page(pfn + i);`
350			`if (zone && page_zone(page) != zone)`
351			`return 0;`
352			`zone = page_zone(page);`
353			`}`
354			`return 1;`
355			`}`
356
357			`/*`
358			`* Scanning pfn is much easier than scanning lru list.`
359			`* Scan pfn from start to end and Find LRU page.`
360			`*/`
361			`int scan_lru_pages(unsigned long start, unsigned long end)`
362			`{`
363			`unsigned long pfn;`
364			`struct page *page;`
365			`for (pfn = start; pfn < end; pfn++) {`
366			`if (pfn_valid(pfn)) {`
367			`page = pfn_to_page(pfn);`
368			`if (PageLRU(page))`
369			`return pfn;`
370			`}`
371			`}`
372			`return 0;`
373			`}`
374
375			`static struct page *`
376			`hotremove_migrate_alloc(struct page *page,`
377			`unsigned long private,`
378			`int **x)`
379			`{`
380			`/* This should be improoooooved!! */`
381			`return alloc_page(GFP_HIGHUSER_PAGECACHE);`
382			`}`
383
384
385			`#define NR_OFFLINE_AT_ONCE_PAGES (256)`
386			`static int`
387			`do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)`
388			`{`
389			`unsigned long pfn;`
390			`struct page *page;`
391			`int move_pages = NR_OFFLINE_AT_ONCE_PAGES;`
392			`int not_managed = 0;`
393			`int ret = 0;`
394			`LIST_HEAD(source);`
395
396			`for (pfn = start_pfn; pfn < end_pfn && move_pages > 0; pfn++) {`
397			`if (!pfn_valid(pfn))`
398			`continue;`
399			`page = pfn_to_page(pfn);`
400			`if (!page_count(page))`
401			`continue;`
402			`/*`
403			`* We can skip free pages. And we can only deal with pages on`
404			`* LRU.`
405			`*/`
406			`ret = isolate_lru_page(page, &source);`
407			`if (!ret) { /* Success */`
408			`move_pages--;`
409			`} else {`
410			`/* Becasue we don't have big zone->lock. we should`
411			`check this again here. */`
412			`if (page_count(page))`
413			`not_managed++;`
414			`#ifdef CONFIG_DEBUG_VM`
415			`printk(KERN_INFO "removing from LRU failed"`
416			`" %lx/%d/%lx\n",`
417			`pfn, page_count(page), page->flags);`
418			`#endif`
419			`}`
420			`}`
421			`ret = -EBUSY;`
422			`if (not_managed) {`
423			`if (!list_empty(&source))`
424			`putback_lru_pages(&source);`
425			`goto out;`
426			`}`
427			`ret = 0;`
428			`if (list_empty(&source))`
429			`goto out;`
430			`/* this function returns # of failed pages */`
431			`ret = migrate_pages(&source, hotremove_migrate_alloc, 0);`
432
433			`out:`
434			`return ret;`
435			`}`
436
437			`/*`
438			`* remove from free_area[] and mark all as Reserved.`
439			`*/`
440			`static int`
441			`offline_isolated_pages_cb(unsigned long start, unsigned long nr_pages,`
442			`void *data)`
443			`{`
444			`__offline_isolated_pages(start, start + nr_pages);`
445			`return 0;`
446			`}`
447
448			`static void`
449			`offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)`
450			`{`
451			`walk_memory_resource(start_pfn, end_pfn - start_pfn, NULL,`
452			`offline_isolated_pages_cb);`
453			`}`
454
455			`/*`
456			`* Check all pages in range, recoreded as memory resource, are isolated.`
457			`*/`
458			`static int`
459			`check_pages_isolated_cb(unsigned long start_pfn, unsigned long nr_pages,`
460			`void *data)`
461			`{`
462			`int ret;`
463			`long offlined = (long )data;`
464			`ret = test_pages_isolated(start_pfn, start_pfn + nr_pages);`
465			`offlined = nr_pages;`
466			`if (!ret)`
467			`(long )data += offlined;`
468			`return ret;`
469			`}`
470
471			`static long`
472			`check_pages_isolated(unsigned long start_pfn, unsigned long end_pfn)`
473			`{`
474			`long offlined = 0;`
475			`int ret;`
476
477			`ret = walk_memory_resource(start_pfn, end_pfn - start_pfn, &offlined,`
478			`check_pages_isolated_cb);`
479			`if (ret < 0)`
480			`offlined = (long)ret;`
481			`return offlined;`
482			`}`
483
484			`extern void drain_all_local_pages(void);`
485
486			`int offline_pages(unsigned long start_pfn,`
487			`unsigned long end_pfn, unsigned long timeout)`
488			`{`
489			`unsigned long pfn, nr_pages, expire;`
490			`long offlined_pages;`
491			`int ret, drain, retry_max, node;`
492			`struct zone *zone;`
493			`struct memory_notify arg;`
494
495			`BUG_ON(start_pfn >= end_pfn);`
496			`/* at least, alignment against pageblock is necessary */`
497			`if (!IS_ALIGNED(start_pfn, pageblock_nr_pages))`
498			`return -EINVAL;`
499			`if (!IS_ALIGNED(end_pfn, pageblock_nr_pages))`
500			`return -EINVAL;`
501			`/* This makes hotplug much easier...and readable.`
502			`we assume this for now. .*/`
503			`if (!test_pages_in_a_zone(start_pfn, end_pfn))`
504			`return -EINVAL;`
505
506			`zone = page_zone(pfn_to_page(start_pfn));`
507			`node = zone_to_nid(zone);`
508			`nr_pages = end_pfn - start_pfn;`
509
510			`/* set above range as isolated */`
511			`ret = start_isolate_page_range(start_pfn, end_pfn);`
512			`if (ret)`
513			`return ret;`
514
515			`arg.start_pfn = start_pfn;`
516			`arg.nr_pages = nr_pages;`
517			`arg.status_change_nid = -1;`
518			`if (nr_pages >= node_present_pages(node))`
519			`arg.status_change_nid = node;`
520
521			`ret = memory_notify(MEM_GOING_OFFLINE, &arg);`
522			`ret = notifier_to_errno(ret);`
523			`if (ret)`
524			`goto failed_removal;`
525
526			`pfn = start_pfn;`
527			`expire = jiffies + timeout;`
528			`drain = 0;`
529			`retry_max = 5;`
530			`repeat:`
531			`/* start memory hot removal */`
532			`ret = -EAGAIN;`
533			`if (time_after(jiffies, expire))`
534			`goto failed_removal;`
535			`ret = -EINTR;`
536			`if (signal_pending(current))`
537			`goto failed_removal;`
538			`ret = 0;`
539			`if (drain) {`
540			`lru_add_drain_all();`
541			`flush_scheduled_work();`
542			`cond_resched();`
543			`drain_all_local_pages();`
544			`}`
545
546			`pfn = scan_lru_pages(start_pfn, end_pfn);`
547			`if (pfn) { /* We have page on LRU */`
548			`ret = do_migrate_range(pfn, end_pfn);`
549			`if (!ret) {`
550			`drain = 1;`
551			`goto repeat;`
552			`} else {`
553			`if (ret < 0)`
554			`if (--retry_max == 0)`
555			`goto failed_removal;`
556			`yield();`
557			`drain = 1;`
558			`goto repeat;`
559			`}`
560			`}`
561			`/* drain all zone's lru pagevec, this is asyncronous... */`
562			`lru_add_drain_all();`
563			`flush_scheduled_work();`
564			`yield();`
565			`/* drain pcp pages , this is synchrouns. */`
566			`drain_all_local_pages();`
567			`/* check again */`
568			`offlined_pages = check_pages_isolated(start_pfn, end_pfn);`
569			`if (offlined_pages < 0) {`
570			`ret = -EBUSY;`
571			`goto failed_removal;`
572			`}`
573			`printk(KERN_INFO "Offlined Pages %ld\n", offlined_pages);`
574			`/* Ok, all of our target is islaoted.`
575			`We cannot do rollback at this point. */`
576			`offline_isolated_pages(start_pfn, end_pfn);`
577			`/* reset pagetype flags and makes migrate type to be MOVABLE */`
578			`undo_isolate_page_range(start_pfn, end_pfn);`
579			`/* removal success */`
580			`zone->present_pages -= offlined_pages;`
581			`zone->zone_pgdat->node_present_pages -= offlined_pages;`
582			`totalram_pages -= offlined_pages;`
583			`num_physpages -= offlined_pages;`
584
585			`vm_total_pages = nr_free_pagecache_pages();`
586			`writeback_set_ratelimit();`
587
588			`memory_notify(MEM_OFFLINE, &arg);`
589			`return 0;`
590
591			`failed_removal:`
592			`printk(KERN_INFO "memory offlining %lx to %lx failed\n",`
593			`start_pfn, end_pfn);`
594			`memory_notify(MEM_CANCEL_OFFLINE, &arg);`
595			`/* pushback to free area */`
596			`undo_isolate_page_range(start_pfn, end_pfn);`
597
598			`return ret;`
599			`}`
600			`#else`
601			`int remove_memory(u64 start, u64 size)`
602			`{`
603			`return -EINVAL;`
604			`}`
605			`EXPORT_SYMBOL_GPL(remove_memory);`
606			`#endif /* CONFIG_MEMORY_HOTREMOVE */`