OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [linux/] [linux-2.4/] [mm/] [vmscan.c] - Blame information for rev 1765

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1275 phoenix
/*
2
 *  linux/mm/vmscan.c
3
 *
4
 *  The pageout daemon, decides which pages to evict (swap out) and
5
 *  does the actual work of freeing them.
6
 *
7
 *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
8
 *
9
 *  Swap reorganised 29.12.95, Stephen Tweedie.
10
 *  kswapd added: 7.1.96  sct
11
 *  Removed kswapd_ctl limits, and swap out as many pages as needed
12
 *  to bring the system back to freepages.high: 2.4.97, Rik van Riel.
13
 *  Zone aware kswapd started 02/00, Kanoj Sarcar (kanoj@sgi.com).
14
 *  Multiqueue VM started 5.8.00, Rik van Riel.
15
 */
16
 
17
#include <linux/slab.h>
18
#include <linux/kernel_stat.h>
19
#include <linux/swap.h>
20
#include <linux/swapctl.h>
21
#include <linux/smp_lock.h>
22
#include <linux/pagemap.h>
23
#include <linux/init.h>
24
#include <linux/highmem.h>
25
#include <linux/file.h>
26
 
27
#include <asm/pgalloc.h>
28
 
29
/*
30
 * "vm_passes" is the number of vm passes before failing the
31
 * memory balancing. Take into account 3 passes are needed
32
 * for a flush/wait/free cycle and that we only scan 1/vm_cache_scan_ratio
33
 * of the inactive list at each pass.
34
 */
35
int vm_passes = 60;
36
 
37
/*
38
 * "vm_cache_scan_ratio" is how much of the inactive LRU queue we will scan
39
 * in one go. A value of 6 for vm_cache_scan_ratio implies that we'll
40
 * scan 1/6 of the inactive lists during a normal aging round.
41
 */
42
int vm_cache_scan_ratio = 6;
43
 
44
/*
45
 * "vm_mapped_ratio" controls the pageout rate, the smaller, the earlier
46
 * we'll start to pageout.
47
 */
48
int vm_mapped_ratio = 100;
49
 
50
/*
51
 * "vm_lru_balance_ratio" controls the balance between active and
52
 * inactive cache. The bigger vm_balance is, the easier the
53
 * active cache will grow, because we'll rotate the active list
54
 * slowly. A value of 2 means we'll go towards a balance of
55
 * 1/3 of the cache being inactive.
56
 */
57
int vm_lru_balance_ratio = 2;
58
 
59
/*
60
 * "vm_vfs_scan_ratio" is what proportion of the VFS queues we will scan
61
 * in one go. A value of 6 for vm_vfs_scan_ratio implies that 1/6th of
62
 * the unused-inode, dentry and dquot caches will be freed during a normal
63
 * aging round.
64
 */
65
int vm_vfs_scan_ratio = 6;
66
 
67
/*
68
 * The swap-out function returns 1 if it successfully
69
 * scanned all the pages it was asked to (`count').
70
 * It returns zero if it couldn't do anything,
71
 *
72
 * rss may decrease because pages are shared, but this
73
 * doesn't count as having freed a page.
74
 */
75
 
76
/* mm->page_table_lock is held. mmap_sem is not held */
77
static inline int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, struct page *page, zone_t * classzone)
78
{
79
        pte_t pte;
80
        swp_entry_t entry;
81
 
82
        /* Don't look at this pte if it's been accessed recently. */
83
        if ((vma->vm_flags & VM_LOCKED) || ptep_test_and_clear_young(page_table)) {
84
                mark_page_accessed(page);
85
                return 0;
86
        }
87
 
88
        /* Don't bother unmapping pages that are active */
89
        if (PageActive(page))
90
                return 0;
91
 
92
        /* Don't bother replenishing zones not under pressure.. */
93
        if (!memclass(page_zone(page), classzone))
94
                return 0;
95
 
96
        if (TryLockPage(page))
97
                return 0;
98
 
99
        /* From this point on, the odds are that we're going to
100
         * nuke this pte, so read and clear the pte.  This hook
101
         * is needed on CPUs which update the accessed and dirty
102
         * bits in hardware.
103
         */
104
        flush_cache_page(vma, address);
105
        pte = ptep_get_and_clear(page_table);
106
        flush_tlb_page(vma, address);
107
 
108
        if (pte_dirty(pte))
109
                set_page_dirty(page);
110
 
111
        /*
112
         * Is the page already in the swap cache? If so, then
113
         * we can just drop our reference to it without doing
114
         * any IO - it's already up-to-date on disk.
115
         */
116
        if (PageSwapCache(page)) {
117
                entry.val = page->index;
118
                swap_duplicate(entry);
119
set_swap_pte:
120
                set_pte(page_table, swp_entry_to_pte(entry));
121
drop_pte:
122
                mm->rss--;
123
                UnlockPage(page);
124
                {
125
                        int freeable = page_count(page) - !!page->buffers <= 2;
126
                        page_cache_release(page);
127
                        return freeable;
128
                }
129
        }
130
 
131
        /*
132
         * Is it a clean page? Then it must be recoverable
133
         * by just paging it in again, and we can just drop
134
         * it..  or if it's dirty but has backing store,
135
         * just mark the page dirty and drop it.
136
         *
137
         * However, this won't actually free any real
138
         * memory, as the page will just be in the page cache
139
         * somewhere, and as such we should just continue
140
         * our scan.
141
         *
142
         * Basically, this just makes it possible for us to do
143
         * some real work in the future in "refill_inactive()".
144
         */
145
        if (page->mapping)
146
                goto drop_pte;
147
        if (!PageDirty(page))
148
                goto drop_pte;
149
 
150
        /*
151
         * Anonymous buffercache pages can be left behind by
152
         * concurrent truncate and pagefault.
153
         */
154
        if (page->buffers)
155
                goto preserve;
156
 
157
        /*
158
         * This is a dirty, swappable page.  First of all,
159
         * get a suitable swap entry for it, and make sure
160
         * we have the swap cache set up to associate the
161
         * page with that swap entry.
162
         */
163
        for (;;) {
164
                entry = get_swap_page();
165
                if (!entry.val)
166
                        break;
167
                /* Add it to the swap cache and mark it dirty
168
                 * (adding to the page cache will clear the dirty
169
                 * and uptodate bits, so we need to do it again)
170
                 */
171
                if (add_to_swap_cache(page, entry) == 0) {
172
                        SetPageUptodate(page);
173
                        set_page_dirty(page);
174
                        goto set_swap_pte;
175
                }
176
                /* Raced with "speculative" read_swap_cache_async */
177
                swap_free(entry);
178
        }
179
 
180
        /* No swap space left */
181
preserve:
182
        set_pte(page_table, pte);
183
        UnlockPage(page);
184
        return 0;
185
}
186
 
187
/* mm->page_table_lock is held. mmap_sem is not held */
188
static inline int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vma, pmd_t *dir, unsigned long address, unsigned long end, int count, zone_t * classzone)
189
{
190
        pte_t * pte;
191
        unsigned long pmd_end;
192
 
193
        if (pmd_none(*dir))
194
                return count;
195
        if (pmd_bad(*dir)) {
196
                pmd_ERROR(*dir);
197
                pmd_clear(dir);
198
                return count;
199
        }
200
 
201
        pte = pte_offset(dir, address);
202
 
203
        pmd_end = (address + PMD_SIZE) & PMD_MASK;
204
        if (end > pmd_end)
205
                end = pmd_end;
206
 
207
        do {
208
                if (pte_present(*pte)) {
209
                        struct page *page = pte_page(*pte);
210
 
211
                        if (VALID_PAGE(page) && !PageReserved(page)) {
212
                                count -= try_to_swap_out(mm, vma, address, pte, page, classzone);
213
                                if (!count) {
214
                                        address += PAGE_SIZE;
215
                                        break;
216
                                }
217
                        }
218
                }
219
                address += PAGE_SIZE;
220
                pte++;
221
        } while (address && (address < end));
222
        mm->swap_address = address;
223
        return count;
224
}
225
 
226
/* mm->page_table_lock is held. mmap_sem is not held */
227
static inline int swap_out_pgd(struct mm_struct * mm, struct vm_area_struct * vma, pgd_t *dir, unsigned long address, unsigned long end, int count, zone_t * classzone)
228
{
229
        pmd_t * pmd;
230
        unsigned long pgd_end;
231
 
232
        if (pgd_none(*dir))
233
                return count;
234
        if (pgd_bad(*dir)) {
235
                pgd_ERROR(*dir);
236
                pgd_clear(dir);
237
                return count;
238
        }
239
 
240
        pmd = pmd_offset(dir, address);
241
 
242
        pgd_end = (address + PGDIR_SIZE) & PGDIR_MASK;
243
        if (pgd_end && (end > pgd_end))
244
                end = pgd_end;
245
 
246
        do {
247
                count = swap_out_pmd(mm, vma, pmd, address, end, count, classzone);
248
                if (!count)
249
                        break;
250
                address = (address + PMD_SIZE) & PMD_MASK;
251
                pmd++;
252
        } while (address && (address < end));
253
        return count;
254
}
255
 
256
/* mm->page_table_lock is held. mmap_sem is not held */
257
static inline int swap_out_vma(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long address, int count, zone_t * classzone)
258
{
259
        pgd_t *pgdir;
260
        unsigned long end;
261
 
262
        /* Don't swap out areas which are reserved */
263
        if (vma->vm_flags & VM_RESERVED)
264
                return count;
265
 
266
        pgdir = pgd_offset(mm, address);
267
 
268
        end = vma->vm_end;
269
        BUG_ON(address >= end);
270
        do {
271
                count = swap_out_pgd(mm, vma, pgdir, address, end, count, classzone);
272
                if (!count)
273
                        break;
274
                address = (address + PGDIR_SIZE) & PGDIR_MASK;
275
                pgdir++;
276
        } while (address && (address < end));
277
        return count;
278
}
279
 
280
/* Placeholder for swap_out(): may be updated by fork.c:mmput() */
281
struct mm_struct *swap_mm = &init_mm;
282
 
283
/*
284
 * Returns remaining count of pages to be swapped out by followup call.
285
 */
286
static inline int swap_out_mm(struct mm_struct * mm, int count, int * mmcounter, zone_t * classzone)
287
{
288
        unsigned long address;
289
        struct vm_area_struct* vma;
290
 
291
        /*
292
         * Find the proper vm-area after freezing the vma chain
293
         * and ptes.
294
         */
295
        spin_lock(&mm->page_table_lock);
296
        address = mm->swap_address;
297
        if (address == TASK_SIZE || swap_mm != mm) {
298
                /* We raced: don't count this mm but try again */
299
                ++*mmcounter;
300
                goto out_unlock;
301
        }
302
        vma = find_vma(mm, address);
303
        if (vma) {
304
                if (address < vma->vm_start)
305
                        address = vma->vm_start;
306
 
307
                for (;;) {
308
                        count = swap_out_vma(mm, vma, address, count, classzone);
309
                        vma = vma->vm_next;
310
                        if (!vma)
311
                                break;
312
                        if (!count)
313
                                goto out_unlock;
314
                        address = vma->vm_start;
315
                }
316
        }
317
        /* Indicate that we reached the end of address space */
318
        mm->swap_address = TASK_SIZE;
319
 
320
out_unlock:
321
        spin_unlock(&mm->page_table_lock);
322
        return count;
323
}
324
 
325
static int FASTCALL(swap_out(zone_t * classzone));
326
static int swap_out(zone_t * classzone)
327
{
328
        int counter, nr_pages = SWAP_CLUSTER_MAX;
329
        struct mm_struct *mm;
330
 
331
        counter = mmlist_nr << 1;
332
        do {
333
                if (unlikely(current->need_resched)) {
334
                        __set_current_state(TASK_RUNNING);
335
                        schedule();
336
                }
337
 
338
                spin_lock(&mmlist_lock);
339
                mm = swap_mm;
340
                while (mm->swap_address == TASK_SIZE || mm == &init_mm) {
341
                        mm->swap_address = 0;
342
                        mm = list_entry(mm->mmlist.next, struct mm_struct, mmlist);
343
                        if (mm == swap_mm)
344
                                goto empty;
345
                        swap_mm = mm;
346
                }
347
 
348
                /* Make sure the mm doesn't disappear when we drop the lock.. */
349
                atomic_inc(&mm->mm_users);
350
                spin_unlock(&mmlist_lock);
351
 
352
                nr_pages = swap_out_mm(mm, nr_pages, &counter, classzone);
353
 
354
                mmput(mm);
355
 
356
                if (!nr_pages)
357
                        return 1;
358
        } while (--counter >= 0);
359
 
360
        return 0;
361
 
362
empty:
363
        spin_unlock(&mmlist_lock);
364
        return 0;
365
}
366
 
367
static void FASTCALL(refill_inactive(int nr_pages, zone_t * classzone));
368
static int FASTCALL(shrink_cache(int nr_pages, zone_t * classzone, unsigned int gfp_mask, int * failed_swapout));
369
static int shrink_cache(int nr_pages, zone_t * classzone, unsigned int gfp_mask, int * failed_swapout)
370
{
371
        struct list_head * entry;
372
        int max_scan = (classzone->nr_inactive_pages + classzone->nr_active_pages) / vm_cache_scan_ratio;
373
        int max_mapped = vm_mapped_ratio * nr_pages;
374
 
375
        while (max_scan && classzone->nr_inactive_pages && (entry = inactive_list.prev) != &inactive_list) {
376
                struct page * page;
377
 
378
                if (unlikely(current->need_resched)) {
379
                        spin_unlock(&pagemap_lru_lock);
380
                        __set_current_state(TASK_RUNNING);
381
                        schedule();
382
                        spin_lock(&pagemap_lru_lock);
383
                        continue;
384
                }
385
 
386
                page = list_entry(entry, struct page, lru);
387
 
388
                BUG_ON(!PageLRU(page));
389
                BUG_ON(PageActive(page));
390
 
391
                list_del(entry);
392
                list_add(entry, &inactive_list);
393
 
394
                /*
395
                 * Zero page counts can happen because we unlink the pages
396
                 * _after_ decrementing the usage count..
397
                 */
398
                if (unlikely(!page_count(page)))
399
                        continue;
400
 
401
                if (!memclass(page_zone(page), classzone))
402
                        continue;
403
 
404
                max_scan--;
405
 
406
                /* Racy check to avoid trylocking when not worthwhile */
407
                if (!page->buffers && (page_count(page) != 1 || !page->mapping))
408
                        goto page_mapped;
409
 
410
                /*
411
                 * The page is locked. IO in progress?
412
                 * Move it to the back of the list.
413
                 */
414
                if (unlikely(TryLockPage(page))) {
415
                        if (PageLaunder(page) && (gfp_mask & __GFP_FS)) {
416
                                page_cache_get(page);
417
                                spin_unlock(&pagemap_lru_lock);
418
                                wait_on_page(page);
419
                                page_cache_release(page);
420
                                spin_lock(&pagemap_lru_lock);
421
                        }
422
                        continue;
423
                }
424
 
425
                if (PageDirty(page) && is_page_cache_freeable(page) && page->mapping) {
426
                        /*
427
                         * It is not critical here to write it only if
428
                         * the page is unmapped beause any direct writer
429
                         * like O_DIRECT would set the PG_dirty bitflag
430
                         * on the phisical page after having successfully
431
                         * pinned it and after the I/O to the page is finished,
432
                         * so the direct writes to the page cannot get lost.
433
                         */
434
                        int (*writepage)(struct page *);
435
 
436
                        writepage = page->mapping->a_ops->writepage;
437
                        if ((gfp_mask & __GFP_FS) && writepage) {
438
                                ClearPageDirty(page);
439
                                SetPageLaunder(page);
440
                                page_cache_get(page);
441
                                spin_unlock(&pagemap_lru_lock);
442
 
443
                                writepage(page);
444
                                page_cache_release(page);
445
 
446
                                spin_lock(&pagemap_lru_lock);
447
                                continue;
448
                        }
449
                }
450
 
451
                /*
452
                 * If the page has buffers, try to free the buffer mappings
453
                 * associated with this page. If we succeed we try to free
454
                 * the page as well.
455
                 */
456
                if (page->buffers) {
457
                        spin_unlock(&pagemap_lru_lock);
458
 
459
                        /* avoid to free a locked page */
460
                        page_cache_get(page);
461
 
462
                        if (try_to_release_page(page, gfp_mask)) {
463
                                if (!page->mapping) {
464
                                        /*
465
                                         * We must not allow an anon page
466
                                         * with no buffers to be visible on
467
                                         * the LRU, so we unlock the page after
468
                                         * taking the lru lock
469
                                         */
470
                                        spin_lock(&pagemap_lru_lock);
471
                                        UnlockPage(page);
472
                                        __lru_cache_del(page);
473
 
474
                                        /* effectively free the page here */
475
                                        page_cache_release(page);
476
 
477
                                        if (--nr_pages)
478
                                                continue;
479
                                        break;
480
                                } else {
481
                                        /*
482
                                         * The page is still in pagecache so undo the stuff
483
                                         * before the try_to_release_page since we've not
484
                                         * finished and we can now try the next step.
485
                                         */
486
                                        page_cache_release(page);
487
 
488
                                        spin_lock(&pagemap_lru_lock);
489
                                }
490
                        } else {
491
                                /* failed to drop the buffers so stop here */
492
                                UnlockPage(page);
493
                                page_cache_release(page);
494
 
495
                                spin_lock(&pagemap_lru_lock);
496
                                continue;
497
                        }
498
                }
499
 
500
                spin_lock(&pagecache_lock);
501
 
502
                /*
503
                 * This is the non-racy check for busy page.
504
                 * It is critical to check PageDirty _after_ we made sure
505
                 * the page is freeable so not in use by anybody.
506
                 * At this point we're guaranteed that page->buffers is NULL,
507
                 * nobody can refill page->buffers under us because we still
508
                 * hold the page lock.
509
                 */
510
                if (!page->mapping || page_count(page) > 1) {
511
                        spin_unlock(&pagecache_lock);
512
                        UnlockPage(page);
513
page_mapped:
514
                        if (--max_mapped < 0) {
515
                                spin_unlock(&pagemap_lru_lock);
516
 
517
                                nr_pages -= kmem_cache_reap(gfp_mask);
518
                                if (nr_pages <= 0)
519
                                        goto out;
520
 
521
                                shrink_dcache_memory(vm_vfs_scan_ratio, gfp_mask);
522
                                shrink_icache_memory(vm_vfs_scan_ratio, gfp_mask);
523
#ifdef CONFIG_QUOTA
524
                                shrink_dqcache_memory(vm_vfs_scan_ratio, gfp_mask);
525
#endif
526
 
527
                                if (!*failed_swapout)
528
                                        *failed_swapout = !swap_out(classzone);
529
 
530
                                max_mapped = nr_pages * vm_mapped_ratio;
531
 
532
                                spin_lock(&pagemap_lru_lock);
533
                                refill_inactive(nr_pages, classzone);
534
                        }
535
                        continue;
536
 
537
                }
538
                if (PageDirty(page)) {
539
                        spin_unlock(&pagecache_lock);
540
                        UnlockPage(page);
541
                        continue;
542
                }
543
 
544
                __lru_cache_del(page);
545
 
546
                /* point of no return */
547
                if (likely(!PageSwapCache(page))) {
548
                        __remove_inode_page(page);
549
                        spin_unlock(&pagecache_lock);
550
                } else {
551
                        swp_entry_t swap;
552
                        swap.val = page->index;
553
                        __delete_from_swap_cache(page);
554
                        spin_unlock(&pagecache_lock);
555
                        swap_free(swap);
556
                }
557
 
558
                UnlockPage(page);
559
 
560
                /* effectively free the page here */
561
                page_cache_release(page);
562
 
563
                if (--nr_pages)
564
                        continue;
565
                break;
566
        }
567
        spin_unlock(&pagemap_lru_lock);
568
 
569
 out:
570
        return nr_pages;
571
}
572
 
573
/*
574
 * This moves pages from the active list to
575
 * the inactive list.
576
 *
577
 * We move them the other way when we see the
578
 * reference bit on the page.
579
 */
580
static void refill_inactive(int nr_pages, zone_t * classzone)
581
{
582
        struct list_head * entry;
583
        unsigned long ratio;
584
 
585
        ratio = (unsigned long) nr_pages * classzone->nr_active_pages / (((unsigned long) classzone->nr_inactive_pages * vm_lru_balance_ratio) + 1);
586
 
587
        entry = active_list.prev;
588
        while (ratio && entry != &active_list) {
589
                struct page * page;
590
 
591
                page = list_entry(entry, struct page, lru);
592
                entry = entry->prev;
593
                if (PageTestandClearReferenced(page)) {
594
                        list_del(&page->lru);
595
                        list_add(&page->lru, &active_list);
596
                        continue;
597
                }
598
 
599
                ratio--;
600
 
601
                del_page_from_active_list(page);
602
                add_page_to_inactive_list(page);
603
                SetPageReferenced(page);
604
        }
605
 
606
        if (entry != &active_list) {
607
                list_del(&active_list);
608
                list_add(&active_list, entry);
609
        }
610
}
611
 
612
static int FASTCALL(shrink_caches(zone_t * classzone, unsigned int gfp_mask, int nr_pages, int * failed_swapout));
613
static int shrink_caches(zone_t * classzone, unsigned int gfp_mask, int nr_pages, int * failed_swapout)
614
{
615
        nr_pages -= kmem_cache_reap(gfp_mask);
616
        if (nr_pages <= 0)
617
                goto out;
618
 
619
        spin_lock(&pagemap_lru_lock);
620
        refill_inactive(nr_pages, classzone);
621
 
622
        nr_pages = shrink_cache(nr_pages, classzone, gfp_mask, failed_swapout);
623
 
624
out:
625
        return nr_pages;
626
}
627
 
628
static int check_classzone_need_balance(zone_t * classzone);
629
 
630
int try_to_free_pages_zone(zone_t *classzone, unsigned int gfp_mask)
631
{
632
        gfp_mask = pf_gfp_mask(gfp_mask);
633
 
634
        for (;;) {
635
                int tries = vm_passes;
636
                int failed_swapout = !(gfp_mask & __GFP_IO);
637
                int nr_pages = SWAP_CLUSTER_MAX;
638
 
639
                do {
640
                        nr_pages = shrink_caches(classzone, gfp_mask, nr_pages, &failed_swapout);
641
                        if (nr_pages <= 0)
642
                                return 1;
643
                        shrink_dcache_memory(vm_vfs_scan_ratio, gfp_mask);
644
                        shrink_icache_memory(vm_vfs_scan_ratio, gfp_mask);
645
#ifdef CONFIG_QUOTA
646
                        shrink_dqcache_memory(vm_vfs_scan_ratio, gfp_mask);
647
#endif
648
                        if (!failed_swapout)
649
                                failed_swapout = !swap_out(classzone);
650
                } while (--tries);
651
 
652
#ifdef  CONFIG_OOM_KILLER
653
        out_of_memory();
654
#else
655
        if (likely(current->pid != 1))
656
                break;
657
        if (!check_classzone_need_balance(classzone))
658
                break;
659
 
660
        __set_current_state(TASK_RUNNING);
661
        yield();
662
#endif
663
        }
664
 
665
        return 0;
666
}
667
 
668
int try_to_free_pages(unsigned int gfp_mask)
669
{
670
        pg_data_t *pgdat;
671
        zonelist_t *zonelist;
672
        unsigned long pf_free_pages;
673
        int error = 0;
674
 
675
        pf_free_pages = current->flags & PF_FREE_PAGES;
676
        current->flags &= ~PF_FREE_PAGES;
677
 
678
        for_each_pgdat(pgdat) {
679
                zonelist = pgdat->node_zonelists + (gfp_mask & GFP_ZONEMASK);
680
                error |= try_to_free_pages_zone(zonelist->zones[0], gfp_mask);
681
        }
682
 
683
        current->flags |= pf_free_pages;
684
        return error;
685
}
686
 
687
DECLARE_WAIT_QUEUE_HEAD(kswapd_wait);
688
 
689
static int check_classzone_need_balance(zone_t * classzone)
690
{
691
        zone_t * first_zone;
692
        int class_idx = zone_idx(classzone);
693
 
694
        first_zone = classzone->zone_pgdat->node_zones;
695
        while (classzone >= first_zone) {
696
                if (classzone->free_pages > classzone->watermarks[class_idx].high)
697
                        return 0;
698
                classzone--;
699
        }
700
        return 1;
701
}
702
 
703
static int kswapd_balance_pgdat(pg_data_t * pgdat)
704
{
705
        int need_more_balance = 0, i;
706
        zone_t * zone;
707
 
708
        for (i = pgdat->nr_zones-1; i >= 0; i--) {
709
                zone = pgdat->node_zones + i;
710
                if (unlikely(current->need_resched))
711
                        schedule();
712
                if (!zone->need_balance || !zone->size)
713
                        continue;
714
                if (!try_to_free_pages_zone(zone, GFP_KSWAPD)) {
715
                        zone->need_balance = 0;
716
                        __set_current_state(TASK_INTERRUPTIBLE);
717
                        schedule_timeout(HZ*5);
718
                        continue;
719
                }
720
                if (check_classzone_need_balance(zone))
721
                        need_more_balance = 1;
722
                else
723
                        zone->need_balance = 0;
724
        }
725
 
726
        return need_more_balance;
727
}
728
 
729
static void kswapd_balance(void)
730
{
731
        int need_more_balance;
732
        pg_data_t * pgdat;
733
 
734
        do {
735
                need_more_balance = 0;
736
 
737
                for_each_pgdat(pgdat)
738
                        need_more_balance |= kswapd_balance_pgdat(pgdat);
739
        } while (need_more_balance);
740
}
741
 
742
static int kswapd_can_sleep_pgdat(pg_data_t * pgdat)
743
{
744
        zone_t * zone;
745
        int i;
746
 
747
        for (i = pgdat->nr_zones-1; i >= 0; i--) {
748
                zone = pgdat->node_zones + i;
749
                if (!zone->need_balance || !zone->size)
750
                        continue;
751
                return 0;
752
        }
753
 
754
        return 1;
755
}
756
 
757
static int kswapd_can_sleep(void)
758
{
759
        pg_data_t * pgdat;
760
 
761
        for_each_pgdat(pgdat) {
762
                if (!kswapd_can_sleep_pgdat(pgdat))
763
                        return 0;
764
        }
765
 
766
        return 1;
767
}
768
 
769
/*
770
 * The background pageout daemon, started as a kernel thread
771
 * from the init process.
772
 *
773
 * This basically trickles out pages so that we have _some_
774
 * free memory available even if there is no other activity
775
 * that frees anything up. This is needed for things like routing
776
 * etc, where we otherwise might have all activity going on in
777
 * asynchronous contexts that cannot page things out.
778
 *
779
 * If there are applications that are active memory-allocators
780
 * (most normal use), this basically shouldn't matter.
781
 */
782
int kswapd(void *unused)
783
{
784
        struct task_struct *tsk = current;
785
        DECLARE_WAITQUEUE(wait, tsk);
786
 
787
        daemonize();
788
        strcpy(tsk->comm, "kswapd");
789
        sigfillset(&tsk->blocked);
790
 
791
        /*
792
         * Tell the memory management that we're a "memory allocator",
793
         * and that if we need more memory we should get access to it
794
         * regardless (see "__alloc_pages()"). "kswapd" should
795
         * never get caught in the normal page freeing logic.
796
         *
797
         * (Kswapd normally doesn't need memory anyway, but sometimes
798
         * you need a small amount of memory in order to be able to
799
         * page out something else, and this flag essentially protects
800
         * us from recursively trying to free more memory as we're
801
         * trying to free the first piece of memory in the first place).
802
         */
803
        tsk->flags |= PF_MEMALLOC;
804
 
805
        /*
806
         * Kswapd main loop.
807
         */
808
        for (;;) {
809
                __set_current_state(TASK_INTERRUPTIBLE);
810
                add_wait_queue(&kswapd_wait, &wait);
811
 
812
                mb();
813
                if (kswapd_can_sleep())
814
                        schedule();
815
 
816
                __set_current_state(TASK_RUNNING);
817
                remove_wait_queue(&kswapd_wait, &wait);
818
 
819
                /*
820
                 * If we actually get into a low-memory situation,
821
                 * the processes needing more memory will wake us
822
                 * up on a more timely basis.
823
                 */
824
                kswapd_balance();
825
                run_task_queue(&tq_disk);
826
        }
827
}
828
 
829
static int __init kswapd_init(void)
830
{
831
        printk("Starting kswapd\n");
832
        swap_setup();
833
        kernel_thread(kswapd, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGNAL);
834
        return 0;
835
}
836
 
837
module_init(kswapd_init)

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.