1 |
62 |
marcus.erl |
/*
|
2 |
|
|
* mm/rmap.c - physical to virtual reverse mappings
|
3 |
|
|
*
|
4 |
|
|
* Copyright 2001, Rik van Riel <riel@conectiva.com.br>
|
5 |
|
|
* Released under the General Public License (GPL).
|
6 |
|
|
*
|
7 |
|
|
* Simple, low overhead reverse mapping scheme.
|
8 |
|
|
* Please try to keep this thing as modular as possible.
|
9 |
|
|
*
|
10 |
|
|
* Provides methods for unmapping each kind of mapped page:
|
11 |
|
|
* the anon methods track anonymous pages, and
|
12 |
|
|
* the file methods track pages belonging to an inode.
|
13 |
|
|
*
|
14 |
|
|
* Original design by Rik van Riel <riel@conectiva.com.br> 2001
|
15 |
|
|
* File methods by Dave McCracken <dmccr@us.ibm.com> 2003, 2004
|
16 |
|
|
* Anonymous methods by Andrea Arcangeli <andrea@suse.de> 2004
|
17 |
|
|
* Contributions by Hugh Dickins <hugh@veritas.com> 2003, 2004
|
18 |
|
|
*/
|
19 |
|
|
|
20 |
|
|
/*
|
21 |
|
|
* Lock ordering in mm:
|
22 |
|
|
*
|
23 |
|
|
* inode->i_mutex (while writing or truncating, not reading or faulting)
|
24 |
|
|
* inode->i_alloc_sem (vmtruncate_range)
|
25 |
|
|
* mm->mmap_sem
|
26 |
|
|
* page->flags PG_locked (lock_page)
|
27 |
|
|
* mapping->i_mmap_lock
|
28 |
|
|
* anon_vma->lock
|
29 |
|
|
* mm->page_table_lock or pte_lock
|
30 |
|
|
* zone->lru_lock (in mark_page_accessed, isolate_lru_page)
|
31 |
|
|
* swap_lock (in swap_duplicate, swap_info_get)
|
32 |
|
|
* mmlist_lock (in mmput, drain_mmlist and others)
|
33 |
|
|
* mapping->private_lock (in __set_page_dirty_buffers)
|
34 |
|
|
* inode_lock (in set_page_dirty's __mark_inode_dirty)
|
35 |
|
|
* sb_lock (within inode_lock in fs/fs-writeback.c)
|
36 |
|
|
* mapping->tree_lock (widely used, in set_page_dirty,
|
37 |
|
|
* in arch-dependent flush_dcache_mmap_lock,
|
38 |
|
|
* within inode_lock in __sync_single_inode)
|
39 |
|
|
* zone->lock (within radix tree node alloc)
|
40 |
|
|
*/
|
41 |
|
|
|
42 |
|
|
#include <linux/mm.h>
|
43 |
|
|
#include <linux/pagemap.h>
|
44 |
|
|
#include <linux/swap.h>
|
45 |
|
|
#include <linux/swapops.h>
|
46 |
|
|
#include <linux/slab.h>
|
47 |
|
|
#include <linux/init.h>
|
48 |
|
|
#include <linux/rmap.h>
|
49 |
|
|
#include <linux/rcupdate.h>
|
50 |
|
|
#include <linux/module.h>
|
51 |
|
|
#include <linux/kallsyms.h>
|
52 |
|
|
|
53 |
|
|
#include <asm/tlbflush.h>
|
54 |
|
|
|
55 |
|
|
struct kmem_cache *anon_vma_cachep;
|
56 |
|
|
|
57 |
|
|
/* This must be called under the mmap_sem. */
|
58 |
|
|
int anon_vma_prepare(struct vm_area_struct *vma)
|
59 |
|
|
{
|
60 |
|
|
struct anon_vma *anon_vma = vma->anon_vma;
|
61 |
|
|
|
62 |
|
|
might_sleep();
|
63 |
|
|
if (unlikely(!anon_vma)) {
|
64 |
|
|
struct mm_struct *mm = vma->vm_mm;
|
65 |
|
|
struct anon_vma *allocated, *locked;
|
66 |
|
|
|
67 |
|
|
anon_vma = find_mergeable_anon_vma(vma);
|
68 |
|
|
if (anon_vma) {
|
69 |
|
|
allocated = NULL;
|
70 |
|
|
locked = anon_vma;
|
71 |
|
|
spin_lock(&locked->lock);
|
72 |
|
|
} else {
|
73 |
|
|
anon_vma = anon_vma_alloc();
|
74 |
|
|
if (unlikely(!anon_vma))
|
75 |
|
|
return -ENOMEM;
|
76 |
|
|
allocated = anon_vma;
|
77 |
|
|
locked = NULL;
|
78 |
|
|
}
|
79 |
|
|
|
80 |
|
|
/* page_table_lock to protect against threads */
|
81 |
|
|
spin_lock(&mm->page_table_lock);
|
82 |
|
|
if (likely(!vma->anon_vma)) {
|
83 |
|
|
vma->anon_vma = anon_vma;
|
84 |
|
|
list_add_tail(&vma->anon_vma_node, &anon_vma->head);
|
85 |
|
|
allocated = NULL;
|
86 |
|
|
}
|
87 |
|
|
spin_unlock(&mm->page_table_lock);
|
88 |
|
|
|
89 |
|
|
if (locked)
|
90 |
|
|
spin_unlock(&locked->lock);
|
91 |
|
|
if (unlikely(allocated))
|
92 |
|
|
anon_vma_free(allocated);
|
93 |
|
|
}
|
94 |
|
|
return 0;
|
95 |
|
|
}
|
96 |
|
|
|
97 |
|
|
void __anon_vma_merge(struct vm_area_struct *vma, struct vm_area_struct *next)
|
98 |
|
|
{
|
99 |
|
|
BUG_ON(vma->anon_vma != next->anon_vma);
|
100 |
|
|
list_del(&next->anon_vma_node);
|
101 |
|
|
}
|
102 |
|
|
|
103 |
|
|
void __anon_vma_link(struct vm_area_struct *vma)
|
104 |
|
|
{
|
105 |
|
|
struct anon_vma *anon_vma = vma->anon_vma;
|
106 |
|
|
|
107 |
|
|
if (anon_vma)
|
108 |
|
|
list_add_tail(&vma->anon_vma_node, &anon_vma->head);
|
109 |
|
|
}
|
110 |
|
|
|
111 |
|
|
void anon_vma_link(struct vm_area_struct *vma)
|
112 |
|
|
{
|
113 |
|
|
struct anon_vma *anon_vma = vma->anon_vma;
|
114 |
|
|
|
115 |
|
|
if (anon_vma) {
|
116 |
|
|
spin_lock(&anon_vma->lock);
|
117 |
|
|
list_add_tail(&vma->anon_vma_node, &anon_vma->head);
|
118 |
|
|
spin_unlock(&anon_vma->lock);
|
119 |
|
|
}
|
120 |
|
|
}
|
121 |
|
|
|
122 |
|
|
void anon_vma_unlink(struct vm_area_struct *vma)
|
123 |
|
|
{
|
124 |
|
|
struct anon_vma *anon_vma = vma->anon_vma;
|
125 |
|
|
int empty;
|
126 |
|
|
|
127 |
|
|
if (!anon_vma)
|
128 |
|
|
return;
|
129 |
|
|
|
130 |
|
|
spin_lock(&anon_vma->lock);
|
131 |
|
|
list_del(&vma->anon_vma_node);
|
132 |
|
|
|
133 |
|
|
/* We must garbage collect the anon_vma if it's empty */
|
134 |
|
|
empty = list_empty(&anon_vma->head);
|
135 |
|
|
spin_unlock(&anon_vma->lock);
|
136 |
|
|
|
137 |
|
|
if (empty)
|
138 |
|
|
anon_vma_free(anon_vma);
|
139 |
|
|
}
|
140 |
|
|
|
141 |
|
|
static void anon_vma_ctor(struct kmem_cache *cachep, void *data)
|
142 |
|
|
{
|
143 |
|
|
struct anon_vma *anon_vma = data;
|
144 |
|
|
|
145 |
|
|
spin_lock_init(&anon_vma->lock);
|
146 |
|
|
INIT_LIST_HEAD(&anon_vma->head);
|
147 |
|
|
}
|
148 |
|
|
|
149 |
|
|
void __init anon_vma_init(void)
|
150 |
|
|
{
|
151 |
|
|
anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
|
152 |
|
|
0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor);
|
153 |
|
|
}
|
154 |
|
|
|
155 |
|
|
/*
|
156 |
|
|
* Getting a lock on a stable anon_vma from a page off the LRU is
|
157 |
|
|
* tricky: page_lock_anon_vma rely on RCU to guard against the races.
|
158 |
|
|
*/
|
159 |
|
|
static struct anon_vma *page_lock_anon_vma(struct page *page)
|
160 |
|
|
{
|
161 |
|
|
struct anon_vma *anon_vma;
|
162 |
|
|
unsigned long anon_mapping;
|
163 |
|
|
|
164 |
|
|
rcu_read_lock();
|
165 |
|
|
anon_mapping = (unsigned long) page->mapping;
|
166 |
|
|
if (!(anon_mapping & PAGE_MAPPING_ANON))
|
167 |
|
|
goto out;
|
168 |
|
|
if (!page_mapped(page))
|
169 |
|
|
goto out;
|
170 |
|
|
|
171 |
|
|
anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
|
172 |
|
|
spin_lock(&anon_vma->lock);
|
173 |
|
|
return anon_vma;
|
174 |
|
|
out:
|
175 |
|
|
rcu_read_unlock();
|
176 |
|
|
return NULL;
|
177 |
|
|
}
|
178 |
|
|
|
179 |
|
|
static void page_unlock_anon_vma(struct anon_vma *anon_vma)
|
180 |
|
|
{
|
181 |
|
|
spin_unlock(&anon_vma->lock);
|
182 |
|
|
rcu_read_unlock();
|
183 |
|
|
}
|
184 |
|
|
|
185 |
|
|
/*
|
186 |
|
|
* At what user virtual address is page expected in @vma?
|
187 |
|
|
* Returns virtual address or -EFAULT if page's index/offset is not
|
188 |
|
|
* within the range mapped the @vma.
|
189 |
|
|
*/
|
190 |
|
|
static inline unsigned long
|
191 |
|
|
vma_address(struct page *page, struct vm_area_struct *vma)
|
192 |
|
|
{
|
193 |
|
|
pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
|
194 |
|
|
unsigned long address;
|
195 |
|
|
|
196 |
|
|
address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
|
197 |
|
|
if (unlikely(address < vma->vm_start || address >= vma->vm_end)) {
|
198 |
|
|
/* page should be within @vma mapping range */
|
199 |
|
|
return -EFAULT;
|
200 |
|
|
}
|
201 |
|
|
return address;
|
202 |
|
|
}
|
203 |
|
|
|
204 |
|
|
/*
|
205 |
|
|
* At what user virtual address is page expected in vma? checking that the
|
206 |
|
|
* page matches the vma: currently only used on anon pages, by unuse_vma;
|
207 |
|
|
*/
|
208 |
|
|
unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
|
209 |
|
|
{
|
210 |
|
|
if (PageAnon(page)) {
|
211 |
|
|
if ((void *)vma->anon_vma !=
|
212 |
|
|
(void *)page->mapping - PAGE_MAPPING_ANON)
|
213 |
|
|
return -EFAULT;
|
214 |
|
|
} else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) {
|
215 |
|
|
if (!vma->vm_file ||
|
216 |
|
|
vma->vm_file->f_mapping != page->mapping)
|
217 |
|
|
return -EFAULT;
|
218 |
|
|
} else
|
219 |
|
|
return -EFAULT;
|
220 |
|
|
return vma_address(page, vma);
|
221 |
|
|
}
|
222 |
|
|
|
223 |
|
|
/*
|
224 |
|
|
* Check that @page is mapped at @address into @mm.
|
225 |
|
|
*
|
226 |
|
|
* On success returns with pte mapped and locked.
|
227 |
|
|
*/
|
228 |
|
|
pte_t *page_check_address(struct page *page, struct mm_struct *mm,
|
229 |
|
|
unsigned long address, spinlock_t **ptlp)
|
230 |
|
|
{
|
231 |
|
|
pgd_t *pgd;
|
232 |
|
|
pud_t *pud;
|
233 |
|
|
pmd_t *pmd;
|
234 |
|
|
pte_t *pte;
|
235 |
|
|
spinlock_t *ptl;
|
236 |
|
|
|
237 |
|
|
pgd = pgd_offset(mm, address);
|
238 |
|
|
if (!pgd_present(*pgd))
|
239 |
|
|
return NULL;
|
240 |
|
|
|
241 |
|
|
pud = pud_offset(pgd, address);
|
242 |
|
|
if (!pud_present(*pud))
|
243 |
|
|
return NULL;
|
244 |
|
|
|
245 |
|
|
pmd = pmd_offset(pud, address);
|
246 |
|
|
if (!pmd_present(*pmd))
|
247 |
|
|
return NULL;
|
248 |
|
|
|
249 |
|
|
pte = pte_offset_map(pmd, address);
|
250 |
|
|
/* Make a quick check before getting the lock */
|
251 |
|
|
if (!pte_present(*pte)) {
|
252 |
|
|
pte_unmap(pte);
|
253 |
|
|
return NULL;
|
254 |
|
|
}
|
255 |
|
|
|
256 |
|
|
ptl = pte_lockptr(mm, pmd);
|
257 |
|
|
spin_lock(ptl);
|
258 |
|
|
if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte)) {
|
259 |
|
|
*ptlp = ptl;
|
260 |
|
|
return pte;
|
261 |
|
|
}
|
262 |
|
|
pte_unmap_unlock(pte, ptl);
|
263 |
|
|
return NULL;
|
264 |
|
|
}
|
265 |
|
|
|
266 |
|
|
/*
|
267 |
|
|
* Subfunctions of page_referenced: page_referenced_one called
|
268 |
|
|
* repeatedly from either page_referenced_anon or page_referenced_file.
|
269 |
|
|
*/
|
270 |
|
|
static int page_referenced_one(struct page *page,
|
271 |
|
|
struct vm_area_struct *vma, unsigned int *mapcount)
|
272 |
|
|
{
|
273 |
|
|
struct mm_struct *mm = vma->vm_mm;
|
274 |
|
|
unsigned long address;
|
275 |
|
|
pte_t *pte;
|
276 |
|
|
spinlock_t *ptl;
|
277 |
|
|
int referenced = 0;
|
278 |
|
|
|
279 |
|
|
address = vma_address(page, vma);
|
280 |
|
|
if (address == -EFAULT)
|
281 |
|
|
goto out;
|
282 |
|
|
|
283 |
|
|
pte = page_check_address(page, mm, address, &ptl);
|
284 |
|
|
if (!pte)
|
285 |
|
|
goto out;
|
286 |
|
|
|
287 |
|
|
if (ptep_clear_flush_young(vma, address, pte))
|
288 |
|
|
referenced++;
|
289 |
|
|
|
290 |
|
|
/* Pretend the page is referenced if the task has the
|
291 |
|
|
swap token and is in the middle of a page fault. */
|
292 |
|
|
if (mm != current->mm && has_swap_token(mm) &&
|
293 |
|
|
rwsem_is_locked(&mm->mmap_sem))
|
294 |
|
|
referenced++;
|
295 |
|
|
|
296 |
|
|
(*mapcount)--;
|
297 |
|
|
pte_unmap_unlock(pte, ptl);
|
298 |
|
|
out:
|
299 |
|
|
return referenced;
|
300 |
|
|
}
|
301 |
|
|
|
302 |
|
|
static int page_referenced_anon(struct page *page)
|
303 |
|
|
{
|
304 |
|
|
unsigned int mapcount;
|
305 |
|
|
struct anon_vma *anon_vma;
|
306 |
|
|
struct vm_area_struct *vma;
|
307 |
|
|
int referenced = 0;
|
308 |
|
|
|
309 |
|
|
anon_vma = page_lock_anon_vma(page);
|
310 |
|
|
if (!anon_vma)
|
311 |
|
|
return referenced;
|
312 |
|
|
|
313 |
|
|
mapcount = page_mapcount(page);
|
314 |
|
|
list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
|
315 |
|
|
referenced += page_referenced_one(page, vma, &mapcount);
|
316 |
|
|
if (!mapcount)
|
317 |
|
|
break;
|
318 |
|
|
}
|
319 |
|
|
|
320 |
|
|
page_unlock_anon_vma(anon_vma);
|
321 |
|
|
return referenced;
|
322 |
|
|
}
|
323 |
|
|
|
324 |
|
|
/**
|
325 |
|
|
* page_referenced_file - referenced check for object-based rmap
|
326 |
|
|
* @page: the page we're checking references on.
|
327 |
|
|
*
|
328 |
|
|
* For an object-based mapped page, find all the places it is mapped and
|
329 |
|
|
* check/clear the referenced flag. This is done by following the page->mapping
|
330 |
|
|
* pointer, then walking the chain of vmas it holds. It returns the number
|
331 |
|
|
* of references it found.
|
332 |
|
|
*
|
333 |
|
|
* This function is only called from page_referenced for object-based pages.
|
334 |
|
|
*/
|
335 |
|
|
static int page_referenced_file(struct page *page)
|
336 |
|
|
{
|
337 |
|
|
unsigned int mapcount;
|
338 |
|
|
struct address_space *mapping = page->mapping;
|
339 |
|
|
pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
|
340 |
|
|
struct vm_area_struct *vma;
|
341 |
|
|
struct prio_tree_iter iter;
|
342 |
|
|
int referenced = 0;
|
343 |
|
|
|
344 |
|
|
/*
|
345 |
|
|
* The caller's checks on page->mapping and !PageAnon have made
|
346 |
|
|
* sure that this is a file page: the check for page->mapping
|
347 |
|
|
* excludes the case just before it gets set on an anon page.
|
348 |
|
|
*/
|
349 |
|
|
BUG_ON(PageAnon(page));
|
350 |
|
|
|
351 |
|
|
/*
|
352 |
|
|
* The page lock not only makes sure that page->mapping cannot
|
353 |
|
|
* suddenly be NULLified by truncation, it makes sure that the
|
354 |
|
|
* structure at mapping cannot be freed and reused yet,
|
355 |
|
|
* so we can safely take mapping->i_mmap_lock.
|
356 |
|
|
*/
|
357 |
|
|
BUG_ON(!PageLocked(page));
|
358 |
|
|
|
359 |
|
|
spin_lock(&mapping->i_mmap_lock);
|
360 |
|
|
|
361 |
|
|
/*
|
362 |
|
|
* i_mmap_lock does not stabilize mapcount at all, but mapcount
|
363 |
|
|
* is more likely to be accurate if we note it after spinning.
|
364 |
|
|
*/
|
365 |
|
|
mapcount = page_mapcount(page);
|
366 |
|
|
|
367 |
|
|
vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
|
368 |
|
|
if ((vma->vm_flags & (VM_LOCKED|VM_MAYSHARE))
|
369 |
|
|
== (VM_LOCKED|VM_MAYSHARE)) {
|
370 |
|
|
referenced++;
|
371 |
|
|
break;
|
372 |
|
|
}
|
373 |
|
|
referenced += page_referenced_one(page, vma, &mapcount);
|
374 |
|
|
if (!mapcount)
|
375 |
|
|
break;
|
376 |
|
|
}
|
377 |
|
|
|
378 |
|
|
spin_unlock(&mapping->i_mmap_lock);
|
379 |
|
|
return referenced;
|
380 |
|
|
}
|
381 |
|
|
|
382 |
|
|
/**
|
383 |
|
|
* page_referenced - test if the page was referenced
|
384 |
|
|
* @page: the page to test
|
385 |
|
|
* @is_locked: caller holds lock on the page
|
386 |
|
|
*
|
387 |
|
|
* Quick test_and_clear_referenced for all mappings to a page,
|
388 |
|
|
* returns the number of ptes which referenced the page.
|
389 |
|
|
*/
|
390 |
|
|
int page_referenced(struct page *page, int is_locked)
|
391 |
|
|
{
|
392 |
|
|
int referenced = 0;
|
393 |
|
|
|
394 |
|
|
if (page_test_and_clear_young(page))
|
395 |
|
|
referenced++;
|
396 |
|
|
|
397 |
|
|
if (TestClearPageReferenced(page))
|
398 |
|
|
referenced++;
|
399 |
|
|
|
400 |
|
|
if (page_mapped(page) && page->mapping) {
|
401 |
|
|
if (PageAnon(page))
|
402 |
|
|
referenced += page_referenced_anon(page);
|
403 |
|
|
else if (is_locked)
|
404 |
|
|
referenced += page_referenced_file(page);
|
405 |
|
|
else if (TestSetPageLocked(page))
|
406 |
|
|
referenced++;
|
407 |
|
|
else {
|
408 |
|
|
if (page->mapping)
|
409 |
|
|
referenced += page_referenced_file(page);
|
410 |
|
|
unlock_page(page);
|
411 |
|
|
}
|
412 |
|
|
}
|
413 |
|
|
return referenced;
|
414 |
|
|
}
|
415 |
|
|
|
416 |
|
|
static int page_mkclean_one(struct page *page, struct vm_area_struct *vma)
|
417 |
|
|
{
|
418 |
|
|
struct mm_struct *mm = vma->vm_mm;
|
419 |
|
|
unsigned long address;
|
420 |
|
|
pte_t *pte;
|
421 |
|
|
spinlock_t *ptl;
|
422 |
|
|
int ret = 0;
|
423 |
|
|
|
424 |
|
|
address = vma_address(page, vma);
|
425 |
|
|
if (address == -EFAULT)
|
426 |
|
|
goto out;
|
427 |
|
|
|
428 |
|
|
pte = page_check_address(page, mm, address, &ptl);
|
429 |
|
|
if (!pte)
|
430 |
|
|
goto out;
|
431 |
|
|
|
432 |
|
|
if (pte_dirty(*pte) || pte_write(*pte)) {
|
433 |
|
|
pte_t entry;
|
434 |
|
|
|
435 |
|
|
flush_cache_page(vma, address, pte_pfn(*pte));
|
436 |
|
|
entry = ptep_clear_flush(vma, address, pte);
|
437 |
|
|
entry = pte_wrprotect(entry);
|
438 |
|
|
entry = pte_mkclean(entry);
|
439 |
|
|
set_pte_at(mm, address, pte, entry);
|
440 |
|
|
ret = 1;
|
441 |
|
|
}
|
442 |
|
|
|
443 |
|
|
pte_unmap_unlock(pte, ptl);
|
444 |
|
|
out:
|
445 |
|
|
return ret;
|
446 |
|
|
}
|
447 |
|
|
|
448 |
|
|
static int page_mkclean_file(struct address_space *mapping, struct page *page)
|
449 |
|
|
{
|
450 |
|
|
pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
|
451 |
|
|
struct vm_area_struct *vma;
|
452 |
|
|
struct prio_tree_iter iter;
|
453 |
|
|
int ret = 0;
|
454 |
|
|
|
455 |
|
|
BUG_ON(PageAnon(page));
|
456 |
|
|
|
457 |
|
|
spin_lock(&mapping->i_mmap_lock);
|
458 |
|
|
vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
|
459 |
|
|
if (vma->vm_flags & VM_SHARED)
|
460 |
|
|
ret += page_mkclean_one(page, vma);
|
461 |
|
|
}
|
462 |
|
|
spin_unlock(&mapping->i_mmap_lock);
|
463 |
|
|
return ret;
|
464 |
|
|
}
|
465 |
|
|
|
466 |
|
|
int page_mkclean(struct page *page)
|
467 |
|
|
{
|
468 |
|
|
int ret = 0;
|
469 |
|
|
|
470 |
|
|
BUG_ON(!PageLocked(page));
|
471 |
|
|
|
472 |
|
|
if (page_mapped(page)) {
|
473 |
|
|
struct address_space *mapping = page_mapping(page);
|
474 |
|
|
if (mapping) {
|
475 |
|
|
ret = page_mkclean_file(mapping, page);
|
476 |
|
|
if (page_test_dirty(page)) {
|
477 |
|
|
page_clear_dirty(page);
|
478 |
|
|
ret = 1;
|
479 |
|
|
}
|
480 |
|
|
}
|
481 |
|
|
}
|
482 |
|
|
|
483 |
|
|
return ret;
|
484 |
|
|
}
|
485 |
|
|
EXPORT_SYMBOL_GPL(page_mkclean);
|
486 |
|
|
|
487 |
|
|
/**
|
488 |
|
|
* page_set_anon_rmap - setup new anonymous rmap
|
489 |
|
|
* @page: the page to add the mapping to
|
490 |
|
|
* @vma: the vm area in which the mapping is added
|
491 |
|
|
* @address: the user virtual address mapped
|
492 |
|
|
*/
|
493 |
|
|
static void __page_set_anon_rmap(struct page *page,
|
494 |
|
|
struct vm_area_struct *vma, unsigned long address)
|
495 |
|
|
{
|
496 |
|
|
struct anon_vma *anon_vma = vma->anon_vma;
|
497 |
|
|
|
498 |
|
|
BUG_ON(!anon_vma);
|
499 |
|
|
anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
|
500 |
|
|
page->mapping = (struct address_space *) anon_vma;
|
501 |
|
|
|
502 |
|
|
page->index = linear_page_index(vma, address);
|
503 |
|
|
|
504 |
|
|
/*
|
505 |
|
|
* nr_mapped state can be updated without turning off
|
506 |
|
|
* interrupts because it is not modified via interrupt.
|
507 |
|
|
*/
|
508 |
|
|
__inc_zone_page_state(page, NR_ANON_PAGES);
|
509 |
|
|
}
|
510 |
|
|
|
511 |
|
|
/**
|
512 |
|
|
* page_set_anon_rmap - sanity check anonymous rmap addition
|
513 |
|
|
* @page: the page to add the mapping to
|
514 |
|
|
* @vma: the vm area in which the mapping is added
|
515 |
|
|
* @address: the user virtual address mapped
|
516 |
|
|
*/
|
517 |
|
|
static void __page_check_anon_rmap(struct page *page,
|
518 |
|
|
struct vm_area_struct *vma, unsigned long address)
|
519 |
|
|
{
|
520 |
|
|
#ifdef CONFIG_DEBUG_VM
|
521 |
|
|
/*
|
522 |
|
|
* The page's anon-rmap details (mapping and index) are guaranteed to
|
523 |
|
|
* be set up correctly at this point.
|
524 |
|
|
*
|
525 |
|
|
* We have exclusion against page_add_anon_rmap because the caller
|
526 |
|
|
* always holds the page locked, except if called from page_dup_rmap,
|
527 |
|
|
* in which case the page is already known to be setup.
|
528 |
|
|
*
|
529 |
|
|
* We have exclusion against page_add_new_anon_rmap because those pages
|
530 |
|
|
* are initially only visible via the pagetables, and the pte is locked
|
531 |
|
|
* over the call to page_add_new_anon_rmap.
|
532 |
|
|
*/
|
533 |
|
|
struct anon_vma *anon_vma = vma->anon_vma;
|
534 |
|
|
anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
|
535 |
|
|
BUG_ON(page->mapping != (struct address_space *)anon_vma);
|
536 |
|
|
BUG_ON(page->index != linear_page_index(vma, address));
|
537 |
|
|
#endif
|
538 |
|
|
}
|
539 |
|
|
|
540 |
|
|
/**
|
541 |
|
|
* page_add_anon_rmap - add pte mapping to an anonymous page
|
542 |
|
|
* @page: the page to add the mapping to
|
543 |
|
|
* @vma: the vm area in which the mapping is added
|
544 |
|
|
* @address: the user virtual address mapped
|
545 |
|
|
*
|
546 |
|
|
* The caller needs to hold the pte lock and the page must be locked.
|
547 |
|
|
*/
|
548 |
|
|
void page_add_anon_rmap(struct page *page,
|
549 |
|
|
struct vm_area_struct *vma, unsigned long address)
|
550 |
|
|
{
|
551 |
|
|
VM_BUG_ON(!PageLocked(page));
|
552 |
|
|
VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
|
553 |
|
|
if (atomic_inc_and_test(&page->_mapcount))
|
554 |
|
|
__page_set_anon_rmap(page, vma, address);
|
555 |
|
|
else
|
556 |
|
|
__page_check_anon_rmap(page, vma, address);
|
557 |
|
|
}
|
558 |
|
|
|
559 |
|
|
/*
|
560 |
|
|
* page_add_new_anon_rmap - add pte mapping to a new anonymous page
|
561 |
|
|
* @page: the page to add the mapping to
|
562 |
|
|
* @vma: the vm area in which the mapping is added
|
563 |
|
|
* @address: the user virtual address mapped
|
564 |
|
|
*
|
565 |
|
|
* Same as page_add_anon_rmap but must only be called on *new* pages.
|
566 |
|
|
* This means the inc-and-test can be bypassed.
|
567 |
|
|
* Page does not have to be locked.
|
568 |
|
|
*/
|
569 |
|
|
void page_add_new_anon_rmap(struct page *page,
|
570 |
|
|
struct vm_area_struct *vma, unsigned long address)
|
571 |
|
|
{
|
572 |
|
|
BUG_ON(address < vma->vm_start || address >= vma->vm_end);
|
573 |
|
|
atomic_set(&page->_mapcount, 0); /* elevate count by 1 (starts at -1) */
|
574 |
|
|
__page_set_anon_rmap(page, vma, address);
|
575 |
|
|
}
|
576 |
|
|
|
577 |
|
|
/**
|
578 |
|
|
* page_add_file_rmap - add pte mapping to a file page
|
579 |
|
|
* @page: the page to add the mapping to
|
580 |
|
|
*
|
581 |
|
|
* The caller needs to hold the pte lock.
|
582 |
|
|
*/
|
583 |
|
|
void page_add_file_rmap(struct page *page)
|
584 |
|
|
{
|
585 |
|
|
if (atomic_inc_and_test(&page->_mapcount))
|
586 |
|
|
__inc_zone_page_state(page, NR_FILE_MAPPED);
|
587 |
|
|
}
|
588 |
|
|
|
589 |
|
|
#ifdef CONFIG_DEBUG_VM
|
590 |
|
|
/**
|
591 |
|
|
* page_dup_rmap - duplicate pte mapping to a page
|
592 |
|
|
* @page: the page to add the mapping to
|
593 |
|
|
*
|
594 |
|
|
* For copy_page_range only: minimal extract from page_add_file_rmap /
|
595 |
|
|
* page_add_anon_rmap, avoiding unnecessary tests (already checked) so it's
|
596 |
|
|
* quicker.
|
597 |
|
|
*
|
598 |
|
|
* The caller needs to hold the pte lock.
|
599 |
|
|
*/
|
600 |
|
|
void page_dup_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address)
|
601 |
|
|
{
|
602 |
|
|
BUG_ON(page_mapcount(page) == 0);
|
603 |
|
|
if (PageAnon(page))
|
604 |
|
|
__page_check_anon_rmap(page, vma, address);
|
605 |
|
|
atomic_inc(&page->_mapcount);
|
606 |
|
|
}
|
607 |
|
|
#endif
|
608 |
|
|
|
609 |
|
|
/**
|
610 |
|
|
* page_remove_rmap - take down pte mapping from a page
|
611 |
|
|
* @page: page to remove mapping from
|
612 |
|
|
*
|
613 |
|
|
* The caller needs to hold the pte lock.
|
614 |
|
|
*/
|
615 |
|
|
void page_remove_rmap(struct page *page, struct vm_area_struct *vma)
|
616 |
|
|
{
|
617 |
|
|
if (atomic_add_negative(-1, &page->_mapcount)) {
|
618 |
|
|
if (unlikely(page_mapcount(page) < 0)) {
|
619 |
|
|
printk (KERN_EMERG "Eeek! page_mapcount(page) went negative! (%d)\n", page_mapcount(page));
|
620 |
|
|
printk (KERN_EMERG " page pfn = %lx\n", page_to_pfn(page));
|
621 |
|
|
printk (KERN_EMERG " page->flags = %lx\n", page->flags);
|
622 |
|
|
printk (KERN_EMERG " page->count = %x\n", page_count(page));
|
623 |
|
|
printk (KERN_EMERG " page->mapping = %p\n", page->mapping);
|
624 |
|
|
print_symbol (KERN_EMERG " vma->vm_ops = %s\n", (unsigned long)vma->vm_ops);
|
625 |
|
|
if (vma->vm_ops) {
|
626 |
|
|
print_symbol (KERN_EMERG " vma->vm_ops->nopage = %s\n", (unsigned long)vma->vm_ops->nopage);
|
627 |
|
|
print_symbol (KERN_EMERG " vma->vm_ops->fault = %s\n", (unsigned long)vma->vm_ops->fault);
|
628 |
|
|
}
|
629 |
|
|
if (vma->vm_file && vma->vm_file->f_op)
|
630 |
|
|
print_symbol (KERN_EMERG " vma->vm_file->f_op->mmap = %s\n", (unsigned long)vma->vm_file->f_op->mmap);
|
631 |
|
|
BUG();
|
632 |
|
|
}
|
633 |
|
|
|
634 |
|
|
/*
|
635 |
|
|
* It would be tidy to reset the PageAnon mapping here,
|
636 |
|
|
* but that might overwrite a racing page_add_anon_rmap
|
637 |
|
|
* which increments mapcount after us but sets mapping
|
638 |
|
|
* before us: so leave the reset to free_hot_cold_page,
|
639 |
|
|
* and remember that it's only reliable while mapped.
|
640 |
|
|
* Leaving it set also helps swapoff to reinstate ptes
|
641 |
|
|
* faster for those pages still in swapcache.
|
642 |
|
|
*/
|
643 |
|
|
if (page_test_dirty(page)) {
|
644 |
|
|
page_clear_dirty(page);
|
645 |
|
|
set_page_dirty(page);
|
646 |
|
|
}
|
647 |
|
|
__dec_zone_page_state(page,
|
648 |
|
|
PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED);
|
649 |
|
|
}
|
650 |
|
|
}
|
651 |
|
|
|
652 |
|
|
/*
|
653 |
|
|
* Subfunctions of try_to_unmap: try_to_unmap_one called
|
654 |
|
|
* repeatedly from either try_to_unmap_anon or try_to_unmap_file.
|
655 |
|
|
*/
|
656 |
|
|
static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
|
657 |
|
|
int migration)
|
658 |
|
|
{
|
659 |
|
|
struct mm_struct *mm = vma->vm_mm;
|
660 |
|
|
unsigned long address;
|
661 |
|
|
pte_t *pte;
|
662 |
|
|
pte_t pteval;
|
663 |
|
|
spinlock_t *ptl;
|
664 |
|
|
int ret = SWAP_AGAIN;
|
665 |
|
|
|
666 |
|
|
address = vma_address(page, vma);
|
667 |
|
|
if (address == -EFAULT)
|
668 |
|
|
goto out;
|
669 |
|
|
|
670 |
|
|
pte = page_check_address(page, mm, address, &ptl);
|
671 |
|
|
if (!pte)
|
672 |
|
|
goto out;
|
673 |
|
|
|
674 |
|
|
/*
|
675 |
|
|
* If the page is mlock()d, we cannot swap it out.
|
676 |
|
|
* If it's recently referenced (perhaps page_referenced
|
677 |
|
|
* skipped over this mm) then we should reactivate it.
|
678 |
|
|
*/
|
679 |
|
|
if (!migration && ((vma->vm_flags & VM_LOCKED) ||
|
680 |
|
|
(ptep_clear_flush_young(vma, address, pte)))) {
|
681 |
|
|
ret = SWAP_FAIL;
|
682 |
|
|
goto out_unmap;
|
683 |
|
|
}
|
684 |
|
|
|
685 |
|
|
/* Nuke the page table entry. */
|
686 |
|
|
flush_cache_page(vma, address, page_to_pfn(page));
|
687 |
|
|
pteval = ptep_clear_flush(vma, address, pte);
|
688 |
|
|
|
689 |
|
|
/* Move the dirty bit to the physical page now the pte is gone. */
|
690 |
|
|
if (pte_dirty(pteval))
|
691 |
|
|
set_page_dirty(page);
|
692 |
|
|
|
693 |
|
|
/* Update high watermark before we lower rss */
|
694 |
|
|
update_hiwater_rss(mm);
|
695 |
|
|
|
696 |
|
|
if (PageAnon(page)) {
|
697 |
|
|
swp_entry_t entry = { .val = page_private(page) };
|
698 |
|
|
|
699 |
|
|
if (PageSwapCache(page)) {
|
700 |
|
|
/*
|
701 |
|
|
* Store the swap location in the pte.
|
702 |
|
|
* See handle_pte_fault() ...
|
703 |
|
|
*/
|
704 |
|
|
swap_duplicate(entry);
|
705 |
|
|
if (list_empty(&mm->mmlist)) {
|
706 |
|
|
spin_lock(&mmlist_lock);
|
707 |
|
|
if (list_empty(&mm->mmlist))
|
708 |
|
|
list_add(&mm->mmlist, &init_mm.mmlist);
|
709 |
|
|
spin_unlock(&mmlist_lock);
|
710 |
|
|
}
|
711 |
|
|
dec_mm_counter(mm, anon_rss);
|
712 |
|
|
#ifdef CONFIG_MIGRATION
|
713 |
|
|
} else {
|
714 |
|
|
/*
|
715 |
|
|
* Store the pfn of the page in a special migration
|
716 |
|
|
* pte. do_swap_page() will wait until the migration
|
717 |
|
|
* pte is removed and then restart fault handling.
|
718 |
|
|
*/
|
719 |
|
|
BUG_ON(!migration);
|
720 |
|
|
entry = make_migration_entry(page, pte_write(pteval));
|
721 |
|
|
#endif
|
722 |
|
|
}
|
723 |
|
|
set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
|
724 |
|
|
BUG_ON(pte_file(*pte));
|
725 |
|
|
} else
|
726 |
|
|
#ifdef CONFIG_MIGRATION
|
727 |
|
|
if (migration) {
|
728 |
|
|
/* Establish migration entry for a file page */
|
729 |
|
|
swp_entry_t entry;
|
730 |
|
|
entry = make_migration_entry(page, pte_write(pteval));
|
731 |
|
|
set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
|
732 |
|
|
} else
|
733 |
|
|
#endif
|
734 |
|
|
dec_mm_counter(mm, file_rss);
|
735 |
|
|
|
736 |
|
|
|
737 |
|
|
page_remove_rmap(page, vma);
|
738 |
|
|
page_cache_release(page);
|
739 |
|
|
|
740 |
|
|
out_unmap:
|
741 |
|
|
pte_unmap_unlock(pte, ptl);
|
742 |
|
|
out:
|
743 |
|
|
return ret;
|
744 |
|
|
}
|
745 |
|
|
|
746 |
|
|
/*
|
747 |
|
|
* objrmap doesn't work for nonlinear VMAs because the assumption that
|
748 |
|
|
* offset-into-file correlates with offset-into-virtual-addresses does not hold.
|
749 |
|
|
* Consequently, given a particular page and its ->index, we cannot locate the
|
750 |
|
|
* ptes which are mapping that page without an exhaustive linear search.
|
751 |
|
|
*
|
752 |
|
|
* So what this code does is a mini "virtual scan" of each nonlinear VMA which
|
753 |
|
|
* maps the file to which the target page belongs. The ->vm_private_data field
|
754 |
|
|
* holds the current cursor into that scan. Successive searches will circulate
|
755 |
|
|
* around the vma's virtual address space.
|
756 |
|
|
*
|
757 |
|
|
* So as more replacement pressure is applied to the pages in a nonlinear VMA,
|
758 |
|
|
* more scanning pressure is placed against them as well. Eventually pages
|
759 |
|
|
* will become fully unmapped and are eligible for eviction.
|
760 |
|
|
*
|
761 |
|
|
* For very sparsely populated VMAs this is a little inefficient - chances are
|
762 |
|
|
* there there won't be many ptes located within the scan cluster. In this case
|
763 |
|
|
* maybe we could scan further - to the end of the pte page, perhaps.
|
764 |
|
|
*/
|
765 |
|
|
#define CLUSTER_SIZE min(32*PAGE_SIZE, PMD_SIZE)
|
766 |
|
|
#define CLUSTER_MASK (~(CLUSTER_SIZE - 1))
|
767 |
|
|
|
768 |
|
|
static void try_to_unmap_cluster(unsigned long cursor,
|
769 |
|
|
unsigned int *mapcount, struct vm_area_struct *vma)
|
770 |
|
|
{
|
771 |
|
|
struct mm_struct *mm = vma->vm_mm;
|
772 |
|
|
pgd_t *pgd;
|
773 |
|
|
pud_t *pud;
|
774 |
|
|
pmd_t *pmd;
|
775 |
|
|
pte_t *pte;
|
776 |
|
|
pte_t pteval;
|
777 |
|
|
spinlock_t *ptl;
|
778 |
|
|
struct page *page;
|
779 |
|
|
unsigned long address;
|
780 |
|
|
unsigned long end;
|
781 |
|
|
|
782 |
|
|
address = (vma->vm_start + cursor) & CLUSTER_MASK;
|
783 |
|
|
end = address + CLUSTER_SIZE;
|
784 |
|
|
if (address < vma->vm_start)
|
785 |
|
|
address = vma->vm_start;
|
786 |
|
|
if (end > vma->vm_end)
|
787 |
|
|
end = vma->vm_end;
|
788 |
|
|
|
789 |
|
|
pgd = pgd_offset(mm, address);
|
790 |
|
|
if (!pgd_present(*pgd))
|
791 |
|
|
return;
|
792 |
|
|
|
793 |
|
|
pud = pud_offset(pgd, address);
|
794 |
|
|
if (!pud_present(*pud))
|
795 |
|
|
return;
|
796 |
|
|
|
797 |
|
|
pmd = pmd_offset(pud, address);
|
798 |
|
|
if (!pmd_present(*pmd))
|
799 |
|
|
return;
|
800 |
|
|
|
801 |
|
|
pte = pte_offset_map_lock(mm, pmd, address, &ptl);
|
802 |
|
|
|
803 |
|
|
/* Update high watermark before we lower rss */
|
804 |
|
|
update_hiwater_rss(mm);
|
805 |
|
|
|
806 |
|
|
for (; address < end; pte++, address += PAGE_SIZE) {
|
807 |
|
|
if (!pte_present(*pte))
|
808 |
|
|
continue;
|
809 |
|
|
page = vm_normal_page(vma, address, *pte);
|
810 |
|
|
BUG_ON(!page || PageAnon(page));
|
811 |
|
|
|
812 |
|
|
if (ptep_clear_flush_young(vma, address, pte))
|
813 |
|
|
continue;
|
814 |
|
|
|
815 |
|
|
/* Nuke the page table entry. */
|
816 |
|
|
flush_cache_page(vma, address, pte_pfn(*pte));
|
817 |
|
|
pteval = ptep_clear_flush(vma, address, pte);
|
818 |
|
|
|
819 |
|
|
/* If nonlinear, store the file page offset in the pte. */
|
820 |
|
|
if (page->index != linear_page_index(vma, address))
|
821 |
|
|
set_pte_at(mm, address, pte, pgoff_to_pte(page->index));
|
822 |
|
|
|
823 |
|
|
/* Move the dirty bit to the physical page now the pte is gone. */
|
824 |
|
|
if (pte_dirty(pteval))
|
825 |
|
|
set_page_dirty(page);
|
826 |
|
|
|
827 |
|
|
page_remove_rmap(page, vma);
|
828 |
|
|
page_cache_release(page);
|
829 |
|
|
dec_mm_counter(mm, file_rss);
|
830 |
|
|
(*mapcount)--;
|
831 |
|
|
}
|
832 |
|
|
pte_unmap_unlock(pte - 1, ptl);
|
833 |
|
|
}
|
834 |
|
|
|
835 |
|
|
static int try_to_unmap_anon(struct page *page, int migration)
|
836 |
|
|
{
|
837 |
|
|
struct anon_vma *anon_vma;
|
838 |
|
|
struct vm_area_struct *vma;
|
839 |
|
|
int ret = SWAP_AGAIN;
|
840 |
|
|
|
841 |
|
|
anon_vma = page_lock_anon_vma(page);
|
842 |
|
|
if (!anon_vma)
|
843 |
|
|
return ret;
|
844 |
|
|
|
845 |
|
|
list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
|
846 |
|
|
ret = try_to_unmap_one(page, vma, migration);
|
847 |
|
|
if (ret == SWAP_FAIL || !page_mapped(page))
|
848 |
|
|
break;
|
849 |
|
|
}
|
850 |
|
|
|
851 |
|
|
page_unlock_anon_vma(anon_vma);
|
852 |
|
|
return ret;
|
853 |
|
|
}
|
854 |
|
|
|
855 |
|
|
/**
|
856 |
|
|
* try_to_unmap_file - unmap file page using the object-based rmap method
|
857 |
|
|
* @page: the page to unmap
|
858 |
|
|
*
|
859 |
|
|
* Find all the mappings of a page using the mapping pointer and the vma chains
|
860 |
|
|
* contained in the address_space struct it points to.
|
861 |
|
|
*
|
862 |
|
|
* This function is only called from try_to_unmap for object-based pages.
|
863 |
|
|
*/
|
864 |
|
|
static int try_to_unmap_file(struct page *page, int migration)
|
865 |
|
|
{
|
866 |
|
|
struct address_space *mapping = page->mapping;
|
867 |
|
|
pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
|
868 |
|
|
struct vm_area_struct *vma;
|
869 |
|
|
struct prio_tree_iter iter;
|
870 |
|
|
int ret = SWAP_AGAIN;
|
871 |
|
|
unsigned long cursor;
|
872 |
|
|
unsigned long max_nl_cursor = 0;
|
873 |
|
|
unsigned long max_nl_size = 0;
|
874 |
|
|
unsigned int mapcount;
|
875 |
|
|
|
876 |
|
|
spin_lock(&mapping->i_mmap_lock);
|
877 |
|
|
vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
|
878 |
|
|
ret = try_to_unmap_one(page, vma, migration);
|
879 |
|
|
if (ret == SWAP_FAIL || !page_mapped(page))
|
880 |
|
|
goto out;
|
881 |
|
|
}
|
882 |
|
|
|
883 |
|
|
if (list_empty(&mapping->i_mmap_nonlinear))
|
884 |
|
|
goto out;
|
885 |
|
|
|
886 |
|
|
list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
|
887 |
|
|
shared.vm_set.list) {
|
888 |
|
|
if ((vma->vm_flags & VM_LOCKED) && !migration)
|
889 |
|
|
continue;
|
890 |
|
|
cursor = (unsigned long) vma->vm_private_data;
|
891 |
|
|
if (cursor > max_nl_cursor)
|
892 |
|
|
max_nl_cursor = cursor;
|
893 |
|
|
cursor = vma->vm_end - vma->vm_start;
|
894 |
|
|
if (cursor > max_nl_size)
|
895 |
|
|
max_nl_size = cursor;
|
896 |
|
|
}
|
897 |
|
|
|
898 |
|
|
if (max_nl_size == 0) { /* any nonlinears locked or reserved */
|
899 |
|
|
ret = SWAP_FAIL;
|
900 |
|
|
goto out;
|
901 |
|
|
}
|
902 |
|
|
|
903 |
|
|
/*
|
904 |
|
|
* We don't try to search for this page in the nonlinear vmas,
|
905 |
|
|
* and page_referenced wouldn't have found it anyway. Instead
|
906 |
|
|
* just walk the nonlinear vmas trying to age and unmap some.
|
907 |
|
|
* The mapcount of the page we came in with is irrelevant,
|
908 |
|
|
* but even so use it as a guide to how hard we should try?
|
909 |
|
|
*/
|
910 |
|
|
mapcount = page_mapcount(page);
|
911 |
|
|
if (!mapcount)
|
912 |
|
|
goto out;
|
913 |
|
|
cond_resched_lock(&mapping->i_mmap_lock);
|
914 |
|
|
|
915 |
|
|
max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK;
|
916 |
|
|
if (max_nl_cursor == 0)
|
917 |
|
|
max_nl_cursor = CLUSTER_SIZE;
|
918 |
|
|
|
919 |
|
|
do {
|
920 |
|
|
list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
|
921 |
|
|
shared.vm_set.list) {
|
922 |
|
|
if ((vma->vm_flags & VM_LOCKED) && !migration)
|
923 |
|
|
continue;
|
924 |
|
|
cursor = (unsigned long) vma->vm_private_data;
|
925 |
|
|
while ( cursor < max_nl_cursor &&
|
926 |
|
|
cursor < vma->vm_end - vma->vm_start) {
|
927 |
|
|
try_to_unmap_cluster(cursor, &mapcount, vma);
|
928 |
|
|
cursor += CLUSTER_SIZE;
|
929 |
|
|
vma->vm_private_data = (void *) cursor;
|
930 |
|
|
if ((int)mapcount <= 0)
|
931 |
|
|
goto out;
|
932 |
|
|
}
|
933 |
|
|
vma->vm_private_data = (void *) max_nl_cursor;
|
934 |
|
|
}
|
935 |
|
|
cond_resched_lock(&mapping->i_mmap_lock);
|
936 |
|
|
max_nl_cursor += CLUSTER_SIZE;
|
937 |
|
|
} while (max_nl_cursor <= max_nl_size);
|
938 |
|
|
|
939 |
|
|
/*
|
940 |
|
|
* Don't loop forever (perhaps all the remaining pages are
|
941 |
|
|
* in locked vmas). Reset cursor on all unreserved nonlinear
|
942 |
|
|
* vmas, now forgetting on which ones it had fallen behind.
|
943 |
|
|
*/
|
944 |
|
|
list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
|
945 |
|
|
vma->vm_private_data = NULL;
|
946 |
|
|
out:
|
947 |
|
|
spin_unlock(&mapping->i_mmap_lock);
|
948 |
|
|
return ret;
|
949 |
|
|
}
|
950 |
|
|
|
951 |
|
|
/**
|
952 |
|
|
* try_to_unmap - try to remove all page table mappings to a page
|
953 |
|
|
* @page: the page to get unmapped
|
954 |
|
|
*
|
955 |
|
|
* Tries to remove all the page table entries which are mapping this
|
956 |
|
|
* page, used in the pageout path. Caller must hold the page lock.
|
957 |
|
|
* Return values are:
|
958 |
|
|
*
|
959 |
|
|
* SWAP_SUCCESS - we succeeded in removing all mappings
|
960 |
|
|
* SWAP_AGAIN - we missed a mapping, try again later
|
961 |
|
|
* SWAP_FAIL - the page is unswappable
|
962 |
|
|
*/
|
963 |
|
|
int try_to_unmap(struct page *page, int migration)
|
964 |
|
|
{
|
965 |
|
|
int ret;
|
966 |
|
|
|
967 |
|
|
BUG_ON(!PageLocked(page));
|
968 |
|
|
|
969 |
|
|
if (PageAnon(page))
|
970 |
|
|
ret = try_to_unmap_anon(page, migration);
|
971 |
|
|
else
|
972 |
|
|
ret = try_to_unmap_file(page, migration);
|
973 |
|
|
|
974 |
|
|
if (!page_mapped(page))
|
975 |
|
|
ret = SWAP_SUCCESS;
|
976 |
|
|
return ret;
|
977 |
|
|
}
|
978 |
|
|
|