OpenCores
URL https://opencores.org/ocsvn/test_project/test_project/trunk

Subversion Repositories test_project

[/] [test_project/] [trunk/] [linux_sd_driver/] [mm/] [migrate.c] - Blame information for rev 62

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 62 marcus.erl
/*
2
 * Memory Migration functionality - linux/mm/migration.c
3
 *
4
 * Copyright (C) 2006 Silicon Graphics, Inc., Christoph Lameter
5
 *
6
 * Page migration was first developed in the context of the memory hotplug
7
 * project. The main authors of the migration code are:
8
 *
9
 * IWAMOTO Toshihiro <iwamoto@valinux.co.jp>
10
 * Hirokazu Takahashi <taka@valinux.co.jp>
11
 * Dave Hansen <haveblue@us.ibm.com>
12
 * Christoph Lameter <clameter@sgi.com>
13
 */
14
 
15
#include <linux/migrate.h>
16
#include <linux/module.h>
17
#include <linux/swap.h>
18
#include <linux/swapops.h>
19
#include <linux/pagemap.h>
20
#include <linux/buffer_head.h>
21
#include <linux/mm_inline.h>
22
#include <linux/nsproxy.h>
23
#include <linux/pagevec.h>
24
#include <linux/rmap.h>
25
#include <linux/topology.h>
26
#include <linux/cpu.h>
27
#include <linux/cpuset.h>
28
#include <linux/writeback.h>
29
#include <linux/mempolicy.h>
30
#include <linux/vmalloc.h>
31
#include <linux/security.h>
32
 
33
#include "internal.h"
34
 
35
#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
36
 
37
/*
38
 * Isolate one page from the LRU lists. If successful put it onto
39
 * the indicated list with elevated page count.
40
 *
41
 * Result:
42
 *  -EBUSY: page not on LRU list
43
 *  0: page removed from LRU list and added to the specified list.
44
 */
45
int isolate_lru_page(struct page *page, struct list_head *pagelist)
46
{
47
        int ret = -EBUSY;
48
 
49
        if (PageLRU(page)) {
50
                struct zone *zone = page_zone(page);
51
 
52
                spin_lock_irq(&zone->lru_lock);
53
                if (PageLRU(page) && get_page_unless_zero(page)) {
54
                        ret = 0;
55
                        ClearPageLRU(page);
56
                        if (PageActive(page))
57
                                del_page_from_active_list(zone, page);
58
                        else
59
                                del_page_from_inactive_list(zone, page);
60
                        list_add_tail(&page->lru, pagelist);
61
                }
62
                spin_unlock_irq(&zone->lru_lock);
63
        }
64
        return ret;
65
}
66
 
67
/*
68
 * migrate_prep() needs to be called before we start compiling a list of pages
69
 * to be migrated using isolate_lru_page().
70
 */
71
int migrate_prep(void)
72
{
73
        /*
74
         * Clear the LRU lists so pages can be isolated.
75
         * Note that pages may be moved off the LRU after we have
76
         * drained them. Those pages will fail to migrate like other
77
         * pages that may be busy.
78
         */
79
        lru_add_drain_all();
80
 
81
        return 0;
82
}
83
 
84
static inline void move_to_lru(struct page *page)
85
{
86
        if (PageActive(page)) {
87
                /*
88
                 * lru_cache_add_active checks that
89
                 * the PG_active bit is off.
90
                 */
91
                ClearPageActive(page);
92
                lru_cache_add_active(page);
93
        } else {
94
                lru_cache_add(page);
95
        }
96
        put_page(page);
97
}
98
 
99
/*
100
 * Add isolated pages on the list back to the LRU.
101
 *
102
 * returns the number of pages put back.
103
 */
104
int putback_lru_pages(struct list_head *l)
105
{
106
        struct page *page;
107
        struct page *page2;
108
        int count = 0;
109
 
110
        list_for_each_entry_safe(page, page2, l, lru) {
111
                list_del(&page->lru);
112
                move_to_lru(page);
113
                count++;
114
        }
115
        return count;
116
}
117
 
118
static inline int is_swap_pte(pte_t pte)
119
{
120
        return !pte_none(pte) && !pte_present(pte) && !pte_file(pte);
121
}
122
 
123
/*
124
 * Restore a potential migration pte to a working pte entry
125
 */
126
static void remove_migration_pte(struct vm_area_struct *vma,
127
                struct page *old, struct page *new)
128
{
129
        struct mm_struct *mm = vma->vm_mm;
130
        swp_entry_t entry;
131
        pgd_t *pgd;
132
        pud_t *pud;
133
        pmd_t *pmd;
134
        pte_t *ptep, pte;
135
        spinlock_t *ptl;
136
        unsigned long addr = page_address_in_vma(new, vma);
137
 
138
        if (addr == -EFAULT)
139
                return;
140
 
141
        pgd = pgd_offset(mm, addr);
142
        if (!pgd_present(*pgd))
143
                return;
144
 
145
        pud = pud_offset(pgd, addr);
146
        if (!pud_present(*pud))
147
                return;
148
 
149
        pmd = pmd_offset(pud, addr);
150
        if (!pmd_present(*pmd))
151
                return;
152
 
153
        ptep = pte_offset_map(pmd, addr);
154
 
155
        if (!is_swap_pte(*ptep)) {
156
                pte_unmap(ptep);
157
                return;
158
        }
159
 
160
        ptl = pte_lockptr(mm, pmd);
161
        spin_lock(ptl);
162
        pte = *ptep;
163
        if (!is_swap_pte(pte))
164
                goto out;
165
 
166
        entry = pte_to_swp_entry(pte);
167
 
168
        if (!is_migration_entry(entry) || migration_entry_to_page(entry) != old)
169
                goto out;
170
 
171
        get_page(new);
172
        pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
173
        if (is_write_migration_entry(entry))
174
                pte = pte_mkwrite(pte);
175
        flush_cache_page(vma, addr, pte_pfn(pte));
176
        set_pte_at(mm, addr, ptep, pte);
177
 
178
        if (PageAnon(new))
179
                page_add_anon_rmap(new, vma, addr);
180
        else
181
                page_add_file_rmap(new);
182
 
183
        /* No need to invalidate - it was non-present before */
184
        update_mmu_cache(vma, addr, pte);
185
 
186
out:
187
        pte_unmap_unlock(ptep, ptl);
188
}
189
 
190
/*
191
 * Note that remove_file_migration_ptes will only work on regular mappings,
192
 * Nonlinear mappings do not use migration entries.
193
 */
194
static void remove_file_migration_ptes(struct page *old, struct page *new)
195
{
196
        struct vm_area_struct *vma;
197
        struct address_space *mapping = page_mapping(new);
198
        struct prio_tree_iter iter;
199
        pgoff_t pgoff = new->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
200
 
201
        if (!mapping)
202
                return;
203
 
204
        spin_lock(&mapping->i_mmap_lock);
205
 
206
        vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff)
207
                remove_migration_pte(vma, old, new);
208
 
209
        spin_unlock(&mapping->i_mmap_lock);
210
}
211
 
212
/*
213
 * Must hold mmap_sem lock on at least one of the vmas containing
214
 * the page so that the anon_vma cannot vanish.
215
 */
216
static void remove_anon_migration_ptes(struct page *old, struct page *new)
217
{
218
        struct anon_vma *anon_vma;
219
        struct vm_area_struct *vma;
220
        unsigned long mapping;
221
 
222
        mapping = (unsigned long)new->mapping;
223
 
224
        if (!mapping || (mapping & PAGE_MAPPING_ANON) == 0)
225
                return;
226
 
227
        /*
228
         * We hold the mmap_sem lock. So no need to call page_lock_anon_vma.
229
         */
230
        anon_vma = (struct anon_vma *) (mapping - PAGE_MAPPING_ANON);
231
        spin_lock(&anon_vma->lock);
232
 
233
        list_for_each_entry(vma, &anon_vma->head, anon_vma_node)
234
                remove_migration_pte(vma, old, new);
235
 
236
        spin_unlock(&anon_vma->lock);
237
}
238
 
239
/*
240
 * Get rid of all migration entries and replace them by
241
 * references to the indicated page.
242
 */
243
static void remove_migration_ptes(struct page *old, struct page *new)
244
{
245
        if (PageAnon(new))
246
                remove_anon_migration_ptes(old, new);
247
        else
248
                remove_file_migration_ptes(old, new);
249
}
250
 
251
/*
252
 * Something used the pte of a page under migration. We need to
253
 * get to the page and wait until migration is finished.
254
 * When we return from this function the fault will be retried.
255
 *
256
 * This function is called from do_swap_page().
257
 */
258
void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
259
                                unsigned long address)
260
{
261
        pte_t *ptep, pte;
262
        spinlock_t *ptl;
263
        swp_entry_t entry;
264
        struct page *page;
265
 
266
        ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
267
        pte = *ptep;
268
        if (!is_swap_pte(pte))
269
                goto out;
270
 
271
        entry = pte_to_swp_entry(pte);
272
        if (!is_migration_entry(entry))
273
                goto out;
274
 
275
        page = migration_entry_to_page(entry);
276
 
277
        get_page(page);
278
        pte_unmap_unlock(ptep, ptl);
279
        wait_on_page_locked(page);
280
        put_page(page);
281
        return;
282
out:
283
        pte_unmap_unlock(ptep, ptl);
284
}
285
 
286
/*
287
 * Replace the page in the mapping.
288
 *
289
 * The number of remaining references must be:
290
 * 1 for anonymous pages without a mapping
291
 * 2 for pages with a mapping
292
 * 3 for pages with a mapping and PagePrivate set.
293
 */
294
static int migrate_page_move_mapping(struct address_space *mapping,
295
                struct page *newpage, struct page *page)
296
{
297
        void **pslot;
298
 
299
        if (!mapping) {
300
                /* Anonymous page without mapping */
301
                if (page_count(page) != 1)
302
                        return -EAGAIN;
303
                return 0;
304
        }
305
 
306
        write_lock_irq(&mapping->tree_lock);
307
 
308
        pslot = radix_tree_lookup_slot(&mapping->page_tree,
309
                                        page_index(page));
310
 
311
        if (page_count(page) != 2 + !!PagePrivate(page) ||
312
                        (struct page *)radix_tree_deref_slot(pslot) != page) {
313
                write_unlock_irq(&mapping->tree_lock);
314
                return -EAGAIN;
315
        }
316
 
317
        /*
318
         * Now we know that no one else is looking at the page.
319
         */
320
        get_page(newpage);      /* add cache reference */
321
#ifdef CONFIG_SWAP
322
        if (PageSwapCache(page)) {
323
                SetPageSwapCache(newpage);
324
                set_page_private(newpage, page_private(page));
325
        }
326
#endif
327
 
328
        radix_tree_replace_slot(pslot, newpage);
329
 
330
        /*
331
         * Drop cache reference from old page.
332
         * We know this isn't the last reference.
333
         */
334
        __put_page(page);
335
 
336
        /*
337
         * If moved to a different zone then also account
338
         * the page for that zone. Other VM counters will be
339
         * taken care of when we establish references to the
340
         * new page and drop references to the old page.
341
         *
342
         * Note that anonymous pages are accounted for
343
         * via NR_FILE_PAGES and NR_ANON_PAGES if they
344
         * are mapped to swap space.
345
         */
346
        __dec_zone_page_state(page, NR_FILE_PAGES);
347
        __inc_zone_page_state(newpage, NR_FILE_PAGES);
348
 
349
        write_unlock_irq(&mapping->tree_lock);
350
 
351
        return 0;
352
}
353
 
354
/*
355
 * Copy the page to its new location
356
 */
357
static void migrate_page_copy(struct page *newpage, struct page *page)
358
{
359
        copy_highpage(newpage, page);
360
 
361
        if (PageError(page))
362
                SetPageError(newpage);
363
        if (PageReferenced(page))
364
                SetPageReferenced(newpage);
365
        if (PageUptodate(page))
366
                SetPageUptodate(newpage);
367
        if (PageActive(page))
368
                SetPageActive(newpage);
369
        if (PageChecked(page))
370
                SetPageChecked(newpage);
371
        if (PageMappedToDisk(page))
372
                SetPageMappedToDisk(newpage);
373
 
374
        if (PageDirty(page)) {
375
                clear_page_dirty_for_io(page);
376
                set_page_dirty(newpage);
377
        }
378
 
379
#ifdef CONFIG_SWAP
380
        ClearPageSwapCache(page);
381
#endif
382
        ClearPageActive(page);
383
        ClearPagePrivate(page);
384
        set_page_private(page, 0);
385
        page->mapping = NULL;
386
 
387
        /*
388
         * If any waiters have accumulated on the new page then
389
         * wake them up.
390
         */
391
        if (PageWriteback(newpage))
392
                end_page_writeback(newpage);
393
}
394
 
395
/************************************************************
396
 *                    Migration functions
397
 ***********************************************************/
398
 
399
/* Always fail migration. Used for mappings that are not movable */
400
int fail_migrate_page(struct address_space *mapping,
401
                        struct page *newpage, struct page *page)
402
{
403
        return -EIO;
404
}
405
EXPORT_SYMBOL(fail_migrate_page);
406
 
407
/*
408
 * Common logic to directly migrate a single page suitable for
409
 * pages that do not use PagePrivate.
410
 *
411
 * Pages are locked upon entry and exit.
412
 */
413
int migrate_page(struct address_space *mapping,
414
                struct page *newpage, struct page *page)
415
{
416
        int rc;
417
 
418
        BUG_ON(PageWriteback(page));    /* Writeback must be complete */
419
 
420
        rc = migrate_page_move_mapping(mapping, newpage, page);
421
 
422
        if (rc)
423
                return rc;
424
 
425
        migrate_page_copy(newpage, page);
426
        return 0;
427
}
428
EXPORT_SYMBOL(migrate_page);
429
 
430
#ifdef CONFIG_BLOCK
431
/*
432
 * Migration function for pages with buffers. This function can only be used
433
 * if the underlying filesystem guarantees that no other references to "page"
434
 * exist.
435
 */
436
int buffer_migrate_page(struct address_space *mapping,
437
                struct page *newpage, struct page *page)
438
{
439
        struct buffer_head *bh, *head;
440
        int rc;
441
 
442
        if (!page_has_buffers(page))
443
                return migrate_page(mapping, newpage, page);
444
 
445
        head = page_buffers(page);
446
 
447
        rc = migrate_page_move_mapping(mapping, newpage, page);
448
 
449
        if (rc)
450
                return rc;
451
 
452
        bh = head;
453
        do {
454
                get_bh(bh);
455
                lock_buffer(bh);
456
                bh = bh->b_this_page;
457
 
458
        } while (bh != head);
459
 
460
        ClearPagePrivate(page);
461
        set_page_private(newpage, page_private(page));
462
        set_page_private(page, 0);
463
        put_page(page);
464
        get_page(newpage);
465
 
466
        bh = head;
467
        do {
468
                set_bh_page(bh, newpage, bh_offset(bh));
469
                bh = bh->b_this_page;
470
 
471
        } while (bh != head);
472
 
473
        SetPagePrivate(newpage);
474
 
475
        migrate_page_copy(newpage, page);
476
 
477
        bh = head;
478
        do {
479
                unlock_buffer(bh);
480
                put_bh(bh);
481
                bh = bh->b_this_page;
482
 
483
        } while (bh != head);
484
 
485
        return 0;
486
}
487
EXPORT_SYMBOL(buffer_migrate_page);
488
#endif
489
 
490
/*
491
 * Writeback a page to clean the dirty state
492
 */
493
static int writeout(struct address_space *mapping, struct page *page)
494
{
495
        struct writeback_control wbc = {
496
                .sync_mode = WB_SYNC_NONE,
497
                .nr_to_write = 1,
498
                .range_start = 0,
499
                .range_end = LLONG_MAX,
500
                .nonblocking = 1,
501
                .for_reclaim = 1
502
        };
503
        int rc;
504
 
505
        if (!mapping->a_ops->writepage)
506
                /* No write method for the address space */
507
                return -EINVAL;
508
 
509
        if (!clear_page_dirty_for_io(page))
510
                /* Someone else already triggered a write */
511
                return -EAGAIN;
512
 
513
        /*
514
         * A dirty page may imply that the underlying filesystem has
515
         * the page on some queue. So the page must be clean for
516
         * migration. Writeout may mean we loose the lock and the
517
         * page state is no longer what we checked for earlier.
518
         * At this point we know that the migration attempt cannot
519
         * be successful.
520
         */
521
        remove_migration_ptes(page, page);
522
 
523
        rc = mapping->a_ops->writepage(page, &wbc);
524
        if (rc < 0)
525
                /* I/O Error writing */
526
                return -EIO;
527
 
528
        if (rc != AOP_WRITEPAGE_ACTIVATE)
529
                /* unlocked. Relock */
530
                lock_page(page);
531
 
532
        return -EAGAIN;
533
}
534
 
535
/*
536
 * Default handling if a filesystem does not provide a migration function.
537
 */
538
static int fallback_migrate_page(struct address_space *mapping,
539
        struct page *newpage, struct page *page)
540
{
541
        if (PageDirty(page))
542
                return writeout(mapping, page);
543
 
544
        /*
545
         * Buffers may be managed in a filesystem specific way.
546
         * We must have no buffers or drop them.
547
         */
548
        if (PagePrivate(page) &&
549
            !try_to_release_page(page, GFP_KERNEL))
550
                return -EAGAIN;
551
 
552
        return migrate_page(mapping, newpage, page);
553
}
554
 
555
/*
556
 * Move a page to a newly allocated page
557
 * The page is locked and all ptes have been successfully removed.
558
 *
559
 * The new page will have replaced the old page if this function
560
 * is successful.
561
 */
562
static int move_to_new_page(struct page *newpage, struct page *page)
563
{
564
        struct address_space *mapping;
565
        int rc;
566
 
567
        /*
568
         * Block others from accessing the page when we get around to
569
         * establishing additional references. We are the only one
570
         * holding a reference to the new page at this point.
571
         */
572
        if (TestSetPageLocked(newpage))
573
                BUG();
574
 
575
        /* Prepare mapping for the new page.*/
576
        newpage->index = page->index;
577
        newpage->mapping = page->mapping;
578
 
579
        mapping = page_mapping(page);
580
        if (!mapping)
581
                rc = migrate_page(mapping, newpage, page);
582
        else if (mapping->a_ops->migratepage)
583
                /*
584
                 * Most pages have a mapping and most filesystems
585
                 * should provide a migration function. Anonymous
586
                 * pages are part of swap space which also has its
587
                 * own migration function. This is the most common
588
                 * path for page migration.
589
                 */
590
                rc = mapping->a_ops->migratepage(mapping,
591
                                                newpage, page);
592
        else
593
                rc = fallback_migrate_page(mapping, newpage, page);
594
 
595
        if (!rc)
596
                remove_migration_ptes(page, newpage);
597
        else
598
                newpage->mapping = NULL;
599
 
600
        unlock_page(newpage);
601
 
602
        return rc;
603
}
604
 
605
/*
606
 * Obtain the lock on page, remove all ptes and migrate the page
607
 * to the newly allocated page in newpage.
608
 */
609
static int unmap_and_move(new_page_t get_new_page, unsigned long private,
610
                        struct page *page, int force)
611
{
612
        int rc = 0;
613
        int *result = NULL;
614
        struct page *newpage = get_new_page(page, private, &result);
615
        int rcu_locked = 0;
616
 
617
        if (!newpage)
618
                return -ENOMEM;
619
 
620
        if (page_count(page) == 1)
621
                /* page was freed from under us. So we are done. */
622
                goto move_newpage;
623
 
624
        rc = -EAGAIN;
625
        if (TestSetPageLocked(page)) {
626
                if (!force)
627
                        goto move_newpage;
628
                lock_page(page);
629
        }
630
 
631
        if (PageWriteback(page)) {
632
                if (!force)
633
                        goto unlock;
634
                wait_on_page_writeback(page);
635
        }
636
        /*
637
         * By try_to_unmap(), page->mapcount goes down to 0 here. In this case,
638
         * we cannot notice that anon_vma is freed while we migrates a page.
639
         * This rcu_read_lock() delays freeing anon_vma pointer until the end
640
         * of migration. File cache pages are no problem because of page_lock()
641
         * File Caches may use write_page() or lock_page() in migration, then,
642
         * just care Anon page here.
643
         */
644
        if (PageAnon(page)) {
645
                rcu_read_lock();
646
                rcu_locked = 1;
647
        }
648
        /*
649
         * This is a corner case handling.
650
         * When a new swap-cache is read into, it is linked to LRU
651
         * and treated as swapcache but has no rmap yet.
652
         * Calling try_to_unmap() against a page->mapping==NULL page is
653
         * BUG. So handle it here.
654
         */
655
        if (!page->mapping)
656
                goto rcu_unlock;
657
        /* Establish migration ptes or remove ptes */
658
        try_to_unmap(page, 1);
659
 
660
        if (!page_mapped(page))
661
                rc = move_to_new_page(newpage, page);
662
 
663
        if (rc)
664
                remove_migration_ptes(page, page);
665
rcu_unlock:
666
        if (rcu_locked)
667
                rcu_read_unlock();
668
 
669
unlock:
670
 
671
        unlock_page(page);
672
 
673
        if (rc != -EAGAIN) {
674
                /*
675
                 * A page that has been migrated has all references
676
                 * removed and will be freed. A page that has not been
677
                 * migrated will have kepts its references and be
678
                 * restored.
679
                 */
680
                list_del(&page->lru);
681
                move_to_lru(page);
682
        }
683
 
684
move_newpage:
685
        /*
686
         * Move the new page to the LRU. If migration was not successful
687
         * then this will free the page.
688
         */
689
        move_to_lru(newpage);
690
        if (result) {
691
                if (rc)
692
                        *result = rc;
693
                else
694
                        *result = page_to_nid(newpage);
695
        }
696
        return rc;
697
}
698
 
699
/*
700
 * migrate_pages
701
 *
702
 * The function takes one list of pages to migrate and a function
703
 * that determines from the page to be migrated and the private data
704
 * the target of the move and allocates the page.
705
 *
706
 * The function returns after 10 attempts or if no pages
707
 * are movable anymore because to has become empty
708
 * or no retryable pages exist anymore. All pages will be
709
 * returned to the LRU or freed.
710
 *
711
 * Return: Number of pages not migrated or error code.
712
 */
713
int migrate_pages(struct list_head *from,
714
                new_page_t get_new_page, unsigned long private)
715
{
716
        int retry = 1;
717
        int nr_failed = 0;
718
        int pass = 0;
719
        struct page *page;
720
        struct page *page2;
721
        int swapwrite = current->flags & PF_SWAPWRITE;
722
        int rc;
723
 
724
        if (!swapwrite)
725
                current->flags |= PF_SWAPWRITE;
726
 
727
        for(pass = 0; pass < 10 && retry; pass++) {
728
                retry = 0;
729
 
730
                list_for_each_entry_safe(page, page2, from, lru) {
731
                        cond_resched();
732
 
733
                        rc = unmap_and_move(get_new_page, private,
734
                                                page, pass > 2);
735
 
736
                        switch(rc) {
737
                        case -ENOMEM:
738
                                goto out;
739
                        case -EAGAIN:
740
                                retry++;
741
                                break;
742
                        case 0:
743
                                break;
744
                        default:
745
                                /* Permanent failure */
746
                                nr_failed++;
747
                                break;
748
                        }
749
                }
750
        }
751
        rc = 0;
752
out:
753
        if (!swapwrite)
754
                current->flags &= ~PF_SWAPWRITE;
755
 
756
        putback_lru_pages(from);
757
 
758
        if (rc)
759
                return rc;
760
 
761
        return nr_failed + retry;
762
}
763
 
764
#ifdef CONFIG_NUMA
765
/*
766
 * Move a list of individual pages
767
 */
768
struct page_to_node {
769
        unsigned long addr;
770
        struct page *page;
771
        int node;
772
        int status;
773
};
774
 
775
static struct page *new_page_node(struct page *p, unsigned long private,
776
                int **result)
777
{
778
        struct page_to_node *pm = (struct page_to_node *)private;
779
 
780
        while (pm->node != MAX_NUMNODES && pm->page != p)
781
                pm++;
782
 
783
        if (pm->node == MAX_NUMNODES)
784
                return NULL;
785
 
786
        *result = &pm->status;
787
 
788
        return alloc_pages_node(pm->node,
789
                                GFP_HIGHUSER_MOVABLE | GFP_THISNODE, 0);
790
}
791
 
792
/*
793
 * Move a set of pages as indicated in the pm array. The addr
794
 * field must be set to the virtual address of the page to be moved
795
 * and the node number must contain a valid target node.
796
 */
797
static int do_move_pages(struct mm_struct *mm, struct page_to_node *pm,
798
                                int migrate_all)
799
{
800
        int err;
801
        struct page_to_node *pp;
802
        LIST_HEAD(pagelist);
803
 
804
        down_read(&mm->mmap_sem);
805
 
806
        /*
807
         * Build a list of pages to migrate
808
         */
809
        migrate_prep();
810
        for (pp = pm; pp->node != MAX_NUMNODES; pp++) {
811
                struct vm_area_struct *vma;
812
                struct page *page;
813
 
814
                /*
815
                 * A valid page pointer that will not match any of the
816
                 * pages that will be moved.
817
                 */
818
                pp->page = ZERO_PAGE(0);
819
 
820
                err = -EFAULT;
821
                vma = find_vma(mm, pp->addr);
822
                if (!vma || !vma_migratable(vma))
823
                        goto set_status;
824
 
825
                page = follow_page(vma, pp->addr, FOLL_GET);
826
                err = -ENOENT;
827
                if (!page)
828
                        goto set_status;
829
 
830
                if (PageReserved(page))         /* Check for zero page */
831
                        goto put_and_set;
832
 
833
                pp->page = page;
834
                err = page_to_nid(page);
835
 
836
                if (err == pp->node)
837
                        /*
838
                         * Node already in the right place
839
                         */
840
                        goto put_and_set;
841
 
842
                err = -EACCES;
843
                if (page_mapcount(page) > 1 &&
844
                                !migrate_all)
845
                        goto put_and_set;
846
 
847
                err = isolate_lru_page(page, &pagelist);
848
put_and_set:
849
                /*
850
                 * Either remove the duplicate refcount from
851
                 * isolate_lru_page() or drop the page ref if it was
852
                 * not isolated.
853
                 */
854
                put_page(page);
855
set_status:
856
                pp->status = err;
857
        }
858
 
859
        if (!list_empty(&pagelist))
860
                err = migrate_pages(&pagelist, new_page_node,
861
                                (unsigned long)pm);
862
        else
863
                err = -ENOENT;
864
 
865
        up_read(&mm->mmap_sem);
866
        return err;
867
}
868
 
869
/*
870
 * Determine the nodes of a list of pages. The addr in the pm array
871
 * must have been set to the virtual address of which we want to determine
872
 * the node number.
873
 */
874
static int do_pages_stat(struct mm_struct *mm, struct page_to_node *pm)
875
{
876
        down_read(&mm->mmap_sem);
877
 
878
        for ( ; pm->node != MAX_NUMNODES; pm++) {
879
                struct vm_area_struct *vma;
880
                struct page *page;
881
                int err;
882
 
883
                err = -EFAULT;
884
                vma = find_vma(mm, pm->addr);
885
                if (!vma)
886
                        goto set_status;
887
 
888
                page = follow_page(vma, pm->addr, 0);
889
                err = -ENOENT;
890
                /* Use PageReserved to check for zero page */
891
                if (!page || PageReserved(page))
892
                        goto set_status;
893
 
894
                err = page_to_nid(page);
895
set_status:
896
                pm->status = err;
897
        }
898
 
899
        up_read(&mm->mmap_sem);
900
        return 0;
901
}
902
 
903
/*
904
 * Move a list of pages in the address space of the currently executing
905
 * process.
906
 */
907
asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
908
                        const void __user * __user *pages,
909
                        const int __user *nodes,
910
                        int __user *status, int flags)
911
{
912
        int err = 0;
913
        int i;
914
        struct task_struct *task;
915
        nodemask_t task_nodes;
916
        struct mm_struct *mm;
917
        struct page_to_node *pm = NULL;
918
 
919
        /* Check flags */
920
        if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL))
921
                return -EINVAL;
922
 
923
        if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE))
924
                return -EPERM;
925
 
926
        /* Find the mm_struct */
927
        read_lock(&tasklist_lock);
928
        task = pid ? find_task_by_vpid(pid) : current;
929
        if (!task) {
930
                read_unlock(&tasklist_lock);
931
                return -ESRCH;
932
        }
933
        mm = get_task_mm(task);
934
        read_unlock(&tasklist_lock);
935
 
936
        if (!mm)
937
                return -EINVAL;
938
 
939
        /*
940
         * Check if this process has the right to modify the specified
941
         * process. The right exists if the process has administrative
942
         * capabilities, superuser privileges or the same
943
         * userid as the target process.
944
         */
945
        if ((current->euid != task->suid) && (current->euid != task->uid) &&
946
            (current->uid != task->suid) && (current->uid != task->uid) &&
947
            !capable(CAP_SYS_NICE)) {
948
                err = -EPERM;
949
                goto out2;
950
        }
951
 
952
        err = security_task_movememory(task);
953
        if (err)
954
                goto out2;
955
 
956
 
957
        task_nodes = cpuset_mems_allowed(task);
958
 
959
        /* Limit nr_pages so that the multiplication may not overflow */
960
        if (nr_pages >= ULONG_MAX / sizeof(struct page_to_node) - 1) {
961
                err = -E2BIG;
962
                goto out2;
963
        }
964
 
965
        pm = vmalloc((nr_pages + 1) * sizeof(struct page_to_node));
966
        if (!pm) {
967
                err = -ENOMEM;
968
                goto out2;
969
        }
970
 
971
        /*
972
         * Get parameters from user space and initialize the pm
973
         * array. Return various errors if the user did something wrong.
974
         */
975
        for (i = 0; i < nr_pages; i++) {
976
                const void __user *p;
977
 
978
                err = -EFAULT;
979
                if (get_user(p, pages + i))
980
                        goto out;
981
 
982
                pm[i].addr = (unsigned long)p;
983
                if (nodes) {
984
                        int node;
985
 
986
                        if (get_user(node, nodes + i))
987
                                goto out;
988
 
989
                        err = -ENODEV;
990
                        if (!node_state(node, N_HIGH_MEMORY))
991
                                goto out;
992
 
993
                        err = -EACCES;
994
                        if (!node_isset(node, task_nodes))
995
                                goto out;
996
 
997
                        pm[i].node = node;
998
                } else
999
                        pm[i].node = 0;  /* anything to not match MAX_NUMNODES */
1000
        }
1001
        /* End marker */
1002
        pm[nr_pages].node = MAX_NUMNODES;
1003
 
1004
        if (nodes)
1005
                err = do_move_pages(mm, pm, flags & MPOL_MF_MOVE_ALL);
1006
        else
1007
                err = do_pages_stat(mm, pm);
1008
 
1009
        if (err >= 0)
1010
                /* Return status information */
1011
                for (i = 0; i < nr_pages; i++)
1012
                        if (put_user(pm[i].status, status + i))
1013
                                err = -EFAULT;
1014
 
1015
out:
1016
        vfree(pm);
1017
out2:
1018
        mmput(mm);
1019
        return err;
1020
}
1021
#endif
1022
 
1023
/*
1024
 * Call migration functions in the vma_ops that may prepare
1025
 * memory in a vm for migration. migration functions may perform
1026
 * the migration for vmas that do not have an underlying page struct.
1027
 */
1028
int migrate_vmas(struct mm_struct *mm, const nodemask_t *to,
1029
        const nodemask_t *from, unsigned long flags)
1030
{
1031
        struct vm_area_struct *vma;
1032
        int err = 0;
1033
 
1034
        for(vma = mm->mmap; vma->vm_next && !err; vma = vma->vm_next) {
1035
                if (vma->vm_ops && vma->vm_ops->migrate) {
1036
                        err = vma->vm_ops->migrate(vma, to, from, flags);
1037
                        if (err)
1038
                                break;
1039
                }
1040
        }
1041
        return err;
1042
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.