OpenCores
URL https://opencores.org/ocsvn/test_project/test_project/trunk

Subversion Repositories test_project

[/] [test_project/] [trunk/] [linux_sd_driver/] [mm/] [slab.c] - Blame information for rev 62

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 62 marcus.erl
/*
2
 * linux/mm/slab.c
3
 * Written by Mark Hemment, 1996/97.
4
 * (markhe@nextd.demon.co.uk)
5
 *
6
 * kmem_cache_destroy() + some cleanup - 1999 Andrea Arcangeli
7
 *
8
 * Major cleanup, different bufctl logic, per-cpu arrays
9
 *      (c) 2000 Manfred Spraul
10
 *
11
 * Cleanup, make the head arrays unconditional, preparation for NUMA
12
 *      (c) 2002 Manfred Spraul
13
 *
14
 * An implementation of the Slab Allocator as described in outline in;
15
 *      UNIX Internals: The New Frontiers by Uresh Vahalia
16
 *      Pub: Prentice Hall      ISBN 0-13-101908-2
17
 * or with a little more detail in;
18
 *      The Slab Allocator: An Object-Caching Kernel Memory Allocator
19
 *      Jeff Bonwick (Sun Microsystems).
20
 *      Presented at: USENIX Summer 1994 Technical Conference
21
 *
22
 * The memory is organized in caches, one cache for each object type.
23
 * (e.g. inode_cache, dentry_cache, buffer_head, vm_area_struct)
24
 * Each cache consists out of many slabs (they are small (usually one
25
 * page long) and always contiguous), and each slab contains multiple
26
 * initialized objects.
27
 *
28
 * This means, that your constructor is used only for newly allocated
29
 * slabs and you must pass objects with the same initializations to
30
 * kmem_cache_free.
31
 *
32
 * Each cache can only support one memory type (GFP_DMA, GFP_HIGHMEM,
33
 * normal). If you need a special memory type, then must create a new
34
 * cache for that memory type.
35
 *
36
 * In order to reduce fragmentation, the slabs are sorted in 3 groups:
37
 *   full slabs with 0 free objects
38
 *   partial slabs
39
 *   empty slabs with no allocated objects
40
 *
41
 * If partial slabs exist, then new allocations come from these slabs,
42
 * otherwise from empty slabs or new slabs are allocated.
43
 *
44
 * kmem_cache_destroy() CAN CRASH if you try to allocate from the cache
45
 * during kmem_cache_destroy(). The caller must prevent concurrent allocs.
46
 *
47
 * Each cache has a short per-cpu head array, most allocs
48
 * and frees go into that array, and if that array overflows, then 1/2
49
 * of the entries in the array are given back into the global cache.
50
 * The head array is strictly LIFO and should improve the cache hit rates.
51
 * On SMP, it additionally reduces the spinlock operations.
52
 *
53
 * The c_cpuarray may not be read with enabled local interrupts -
54
 * it's changed with a smp_call_function().
55
 *
56
 * SMP synchronization:
57
 *  constructors and destructors are called without any locking.
58
 *  Several members in struct kmem_cache and struct slab never change, they
59
 *      are accessed without any locking.
60
 *  The per-cpu arrays are never accessed from the wrong cpu, no locking,
61
 *      and local interrupts are disabled so slab code is preempt-safe.
62
 *  The non-constant members are protected with a per-cache irq spinlock.
63
 *
64
 * Many thanks to Mark Hemment, who wrote another per-cpu slab patch
65
 * in 2000 - many ideas in the current implementation are derived from
66
 * his patch.
67
 *
68
 * Further notes from the original documentation:
69
 *
70
 * 11 April '97.  Started multi-threading - markhe
71
 *      The global cache-chain is protected by the mutex 'cache_chain_mutex'.
72
 *      The sem is only needed when accessing/extending the cache-chain, which
73
 *      can never happen inside an interrupt (kmem_cache_create(),
74
 *      kmem_cache_shrink() and kmem_cache_reap()).
75
 *
76
 *      At present, each engine can be growing a cache.  This should be blocked.
77
 *
78
 * 15 March 2005. NUMA slab allocator.
79
 *      Shai Fultheim <shai@scalex86.org>.
80
 *      Shobhit Dayal <shobhit@calsoftinc.com>
81
 *      Alok N Kataria <alokk@calsoftinc.com>
82
 *      Christoph Lameter <christoph@lameter.com>
83
 *
84
 *      Modified the slab allocator to be node aware on NUMA systems.
85
 *      Each node has its own list of partial, free and full slabs.
86
 *      All object allocations for a node occur from node specific slab lists.
87
 */
88
 
89
#include        <linux/slab.h>
90
#include        <linux/mm.h>
91
#include        <linux/poison.h>
92
#include        <linux/swap.h>
93
#include        <linux/cache.h>
94
#include        <linux/interrupt.h>
95
#include        <linux/init.h>
96
#include        <linux/compiler.h>
97
#include        <linux/cpuset.h>
98
#include        <linux/seq_file.h>
99
#include        <linux/notifier.h>
100
#include        <linux/kallsyms.h>
101
#include        <linux/cpu.h>
102
#include        <linux/sysctl.h>
103
#include        <linux/module.h>
104
#include        <linux/rcupdate.h>
105
#include        <linux/string.h>
106
#include        <linux/uaccess.h>
107
#include        <linux/nodemask.h>
108
#include        <linux/mempolicy.h>
109
#include        <linux/mutex.h>
110
#include        <linux/fault-inject.h>
111
#include        <linux/rtmutex.h>
112
#include        <linux/reciprocal_div.h>
113
 
114
#include        <asm/cacheflush.h>
115
#include        <asm/tlbflush.h>
116
#include        <asm/page.h>
117
 
118
/*
119
 * DEBUG        - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON.
120
 *                0 for faster, smaller code (especially in the critical paths).
121
 *
122
 * STATS        - 1 to collect stats for /proc/slabinfo.
123
 *                0 for faster, smaller code (especially in the critical paths).
124
 *
125
 * FORCED_DEBUG - 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
126
 */
127
 
128
#ifdef CONFIG_DEBUG_SLAB
129
#define DEBUG           1
130
#define STATS           1
131
#define FORCED_DEBUG    1
132
#else
133
#define DEBUG           0
134
#define STATS           0
135
#define FORCED_DEBUG    0
136
#endif
137
 
138
/* Shouldn't this be in a header file somewhere? */
139
#define BYTES_PER_WORD          sizeof(void *)
140
#define REDZONE_ALIGN           max(BYTES_PER_WORD, __alignof__(unsigned long long))
141
 
142
#ifndef cache_line_size
143
#define cache_line_size()       L1_CACHE_BYTES
144
#endif
145
 
146
#ifndef ARCH_KMALLOC_MINALIGN
147
/*
148
 * Enforce a minimum alignment for the kmalloc caches.
149
 * Usually, the kmalloc caches are cache_line_size() aligned, except when
150
 * DEBUG and FORCED_DEBUG are enabled, then they are BYTES_PER_WORD aligned.
151
 * Some archs want to perform DMA into kmalloc caches and need a guaranteed
152
 * alignment larger than the alignment of a 64-bit integer.
153
 * ARCH_KMALLOC_MINALIGN allows that.
154
 * Note that increasing this value may disable some debug features.
155
 */
156
#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
157
#endif
158
 
159
#ifndef ARCH_SLAB_MINALIGN
160
/*
161
 * Enforce a minimum alignment for all caches.
162
 * Intended for archs that get misalignment faults even for BYTES_PER_WORD
163
 * aligned buffers. Includes ARCH_KMALLOC_MINALIGN.
164
 * If possible: Do not enable this flag for CONFIG_DEBUG_SLAB, it disables
165
 * some debug features.
166
 */
167
#define ARCH_SLAB_MINALIGN 0
168
#endif
169
 
170
#ifndef ARCH_KMALLOC_FLAGS
171
#define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN
172
#endif
173
 
174
/* Legal flag mask for kmem_cache_create(). */
175
#if DEBUG
176
# define CREATE_MASK    (SLAB_RED_ZONE | \
177
                         SLAB_POISON | SLAB_HWCACHE_ALIGN | \
178
                         SLAB_CACHE_DMA | \
179
                         SLAB_STORE_USER | \
180
                         SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
181
                         SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD)
182
#else
183
# define CREATE_MASK    (SLAB_HWCACHE_ALIGN | \
184
                         SLAB_CACHE_DMA | \
185
                         SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
186
                         SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD)
187
#endif
188
 
189
/*
190
 * kmem_bufctl_t:
191
 *
192
 * Bufctl's are used for linking objs within a slab
193
 * linked offsets.
194
 *
195
 * This implementation relies on "struct page" for locating the cache &
196
 * slab an object belongs to.
197
 * This allows the bufctl structure to be small (one int), but limits
198
 * the number of objects a slab (not a cache) can contain when off-slab
199
 * bufctls are used. The limit is the size of the largest general cache
200
 * that does not use off-slab slabs.
201
 * For 32bit archs with 4 kB pages, is this 56.
202
 * This is not serious, as it is only for large objects, when it is unwise
203
 * to have too many per slab.
204
 * Note: This limit can be raised by introducing a general cache whose size
205
 * is less than 512 (PAGE_SIZE<<3), but greater than 256.
206
 */
207
 
208
typedef unsigned int kmem_bufctl_t;
209
#define BUFCTL_END      (((kmem_bufctl_t)(~0U))-0)
210
#define BUFCTL_FREE     (((kmem_bufctl_t)(~0U))-1)
211
#define BUFCTL_ACTIVE   (((kmem_bufctl_t)(~0U))-2)
212
#define SLAB_LIMIT      (((kmem_bufctl_t)(~0U))-3)
213
 
214
/*
215
 * struct slab
216
 *
217
 * Manages the objs in a slab. Placed either at the beginning of mem allocated
218
 * for a slab, or allocated from an general cache.
219
 * Slabs are chained into three list: fully used, partial, fully free slabs.
220
 */
221
struct slab {
222
        struct list_head list;
223
        unsigned long colouroff;
224
        void *s_mem;            /* including colour offset */
225
        unsigned int inuse;     /* num of objs active in slab */
226
        kmem_bufctl_t free;
227
        unsigned short nodeid;
228
};
229
 
230
/*
231
 * struct slab_rcu
232
 *
233
 * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to
234
 * arrange for kmem_freepages to be called via RCU.  This is useful if
235
 * we need to approach a kernel structure obliquely, from its address
236
 * obtained without the usual locking.  We can lock the structure to
237
 * stabilize it and check it's still at the given address, only if we
238
 * can be sure that the memory has not been meanwhile reused for some
239
 * other kind of object (which our subsystem's lock might corrupt).
240
 *
241
 * rcu_read_lock before reading the address, then rcu_read_unlock after
242
 * taking the spinlock within the structure expected at that address.
243
 *
244
 * We assume struct slab_rcu can overlay struct slab when destroying.
245
 */
246
struct slab_rcu {
247
        struct rcu_head head;
248
        struct kmem_cache *cachep;
249
        void *addr;
250
};
251
 
252
/*
253
 * struct array_cache
254
 *
255
 * Purpose:
256
 * - LIFO ordering, to hand out cache-warm objects from _alloc
257
 * - reduce the number of linked list operations
258
 * - reduce spinlock operations
259
 *
260
 * The limit is stored in the per-cpu structure to reduce the data cache
261
 * footprint.
262
 *
263
 */
264
struct array_cache {
265
        unsigned int avail;
266
        unsigned int limit;
267
        unsigned int batchcount;
268
        unsigned int touched;
269
        spinlock_t lock;
270
        void *entry[];  /*
271
                         * Must have this definition in here for the proper
272
                         * alignment of array_cache. Also simplifies accessing
273
                         * the entries.
274
                         */
275
};
276
 
277
/*
278
 * bootstrap: The caches do not work without cpuarrays anymore, but the
279
 * cpuarrays are allocated from the generic caches...
280
 */
281
#define BOOT_CPUCACHE_ENTRIES   1
282
struct arraycache_init {
283
        struct array_cache cache;
284
        void *entries[BOOT_CPUCACHE_ENTRIES];
285
};
286
 
287
/*
288
 * The slab lists for all objects.
289
 */
290
struct kmem_list3 {
291
        struct list_head slabs_partial; /* partial list first, better asm code */
292
        struct list_head slabs_full;
293
        struct list_head slabs_free;
294
        unsigned long free_objects;
295
        unsigned int free_limit;
296
        unsigned int colour_next;       /* Per-node cache coloring */
297
        spinlock_t list_lock;
298
        struct array_cache *shared;     /* shared per node */
299
        struct array_cache **alien;     /* on other nodes */
300
        unsigned long next_reap;        /* updated without locking */
301
        int free_touched;               /* updated without locking */
302
};
303
 
304
/*
305
 * Need this for bootstrapping a per node allocator.
306
 */
307
#define NUM_INIT_LISTS (2 * MAX_NUMNODES + 1)
308
struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];
309
#define CACHE_CACHE 0
310
#define SIZE_AC 1
311
#define SIZE_L3 (1 + MAX_NUMNODES)
312
 
313
static int drain_freelist(struct kmem_cache *cache,
314
                        struct kmem_list3 *l3, int tofree);
315
static void free_block(struct kmem_cache *cachep, void **objpp, int len,
316
                        int node);
317
static int enable_cpucache(struct kmem_cache *cachep);
318
static void cache_reap(struct work_struct *unused);
319
 
320
/*
321
 * This function must be completely optimized away if a constant is passed to
322
 * it.  Mostly the same as what is in linux/slab.h except it returns an index.
323
 */
324
static __always_inline int index_of(const size_t size)
325
{
326
        extern void __bad_size(void);
327
 
328
        if (__builtin_constant_p(size)) {
329
                int i = 0;
330
 
331
#define CACHE(x) \
332
        if (size <=x) \
333
                return i; \
334
        else \
335
                i++;
336
#include "linux/kmalloc_sizes.h"
337
#undef CACHE
338
                __bad_size();
339
        } else
340
                __bad_size();
341
        return 0;
342
}
343
 
344
static int slab_early_init = 1;
345
 
346
#define INDEX_AC index_of(sizeof(struct arraycache_init))
347
#define INDEX_L3 index_of(sizeof(struct kmem_list3))
348
 
349
static void kmem_list3_init(struct kmem_list3 *parent)
350
{
351
        INIT_LIST_HEAD(&parent->slabs_full);
352
        INIT_LIST_HEAD(&parent->slabs_partial);
353
        INIT_LIST_HEAD(&parent->slabs_free);
354
        parent->shared = NULL;
355
        parent->alien = NULL;
356
        parent->colour_next = 0;
357
        spin_lock_init(&parent->list_lock);
358
        parent->free_objects = 0;
359
        parent->free_touched = 0;
360
}
361
 
362
#define MAKE_LIST(cachep, listp, slab, nodeid)                          \
363
        do {                                                            \
364
                INIT_LIST_HEAD(listp);                                  \
365
                list_splice(&(cachep->nodelists[nodeid]->slab), listp); \
366
        } while (0)
367
 
368
#define MAKE_ALL_LISTS(cachep, ptr, nodeid)                             \
369
        do {                                                            \
370
        MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid);  \
371
        MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \
372
        MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid);  \
373
        } while (0)
374
 
375
/*
376
 * struct kmem_cache
377
 *
378
 * manages a cache.
379
 */
380
 
381
struct kmem_cache {
382
/* 1) per-cpu data, touched during every alloc/free */
383
        struct array_cache *array[NR_CPUS];
384
/* 2) Cache tunables. Protected by cache_chain_mutex */
385
        unsigned int batchcount;
386
        unsigned int limit;
387
        unsigned int shared;
388
 
389
        unsigned int buffer_size;
390
        u32 reciprocal_buffer_size;
391
/* 3) touched by every alloc & free from the backend */
392
 
393
        unsigned int flags;             /* constant flags */
394
        unsigned int num;               /* # of objs per slab */
395
 
396
/* 4) cache_grow/shrink */
397
        /* order of pgs per slab (2^n) */
398
        unsigned int gfporder;
399
 
400
        /* force GFP flags, e.g. GFP_DMA */
401
        gfp_t gfpflags;
402
 
403
        size_t colour;                  /* cache colouring range */
404
        unsigned int colour_off;        /* colour offset */
405
        struct kmem_cache *slabp_cache;
406
        unsigned int slab_size;
407
        unsigned int dflags;            /* dynamic flags */
408
 
409
        /* constructor func */
410
        void (*ctor)(struct kmem_cache *, void *);
411
 
412
/* 5) cache creation/removal */
413
        const char *name;
414
        struct list_head next;
415
 
416
/* 6) statistics */
417
#if STATS
418
        unsigned long num_active;
419
        unsigned long num_allocations;
420
        unsigned long high_mark;
421
        unsigned long grown;
422
        unsigned long reaped;
423
        unsigned long errors;
424
        unsigned long max_freeable;
425
        unsigned long node_allocs;
426
        unsigned long node_frees;
427
        unsigned long node_overflow;
428
        atomic_t allochit;
429
        atomic_t allocmiss;
430
        atomic_t freehit;
431
        atomic_t freemiss;
432
#endif
433
#if DEBUG
434
        /*
435
         * If debugging is enabled, then the allocator can add additional
436
         * fields and/or padding to every object. buffer_size contains the total
437
         * object size including these internal fields, the following two
438
         * variables contain the offset to the user object and its size.
439
         */
440
        int obj_offset;
441
        int obj_size;
442
#endif
443
        /*
444
         * We put nodelists[] at the end of kmem_cache, because we want to size
445
         * this array to nr_node_ids slots instead of MAX_NUMNODES
446
         * (see kmem_cache_init())
447
         * We still use [MAX_NUMNODES] and not [1] or [0] because cache_cache
448
         * is statically defined, so we reserve the max number of nodes.
449
         */
450
        struct kmem_list3 *nodelists[MAX_NUMNODES];
451
        /*
452
         * Do not add fields after nodelists[]
453
         */
454
};
455
 
456
#define CFLGS_OFF_SLAB          (0x80000000UL)
457
#define OFF_SLAB(x)     ((x)->flags & CFLGS_OFF_SLAB)
458
 
459
#define BATCHREFILL_LIMIT       16
460
/*
461
 * Optimization question: fewer reaps means less probability for unnessary
462
 * cpucache drain/refill cycles.
463
 *
464
 * OTOH the cpuarrays can contain lots of objects,
465
 * which could lock up otherwise freeable slabs.
466
 */
467
#define REAPTIMEOUT_CPUC        (2*HZ)
468
#define REAPTIMEOUT_LIST3       (4*HZ)
469
 
470
#if STATS
471
#define STATS_INC_ACTIVE(x)     ((x)->num_active++)
472
#define STATS_DEC_ACTIVE(x)     ((x)->num_active--)
473
#define STATS_INC_ALLOCED(x)    ((x)->num_allocations++)
474
#define STATS_INC_GROWN(x)      ((x)->grown++)
475
#define STATS_ADD_REAPED(x,y)   ((x)->reaped += (y))
476
#define STATS_SET_HIGH(x)                                               \
477
        do {                                                            \
478
                if ((x)->num_active > (x)->high_mark)                   \
479
                        (x)->high_mark = (x)->num_active;               \
480
        } while (0)
481
#define STATS_INC_ERR(x)        ((x)->errors++)
482
#define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++)
483
#define STATS_INC_NODEFREES(x)  ((x)->node_frees++)
484
#define STATS_INC_ACOVERFLOW(x)   ((x)->node_overflow++)
485
#define STATS_SET_FREEABLE(x, i)                                        \
486
        do {                                                            \
487
                if ((x)->max_freeable < i)                              \
488
                        (x)->max_freeable = i;                          \
489
        } while (0)
490
#define STATS_INC_ALLOCHIT(x)   atomic_inc(&(x)->allochit)
491
#define STATS_INC_ALLOCMISS(x)  atomic_inc(&(x)->allocmiss)
492
#define STATS_INC_FREEHIT(x)    atomic_inc(&(x)->freehit)
493
#define STATS_INC_FREEMISS(x)   atomic_inc(&(x)->freemiss)
494
#else
495
#define STATS_INC_ACTIVE(x)     do { } while (0)
496
#define STATS_DEC_ACTIVE(x)     do { } while (0)
497
#define STATS_INC_ALLOCED(x)    do { } while (0)
498
#define STATS_INC_GROWN(x)      do { } while (0)
499
#define STATS_ADD_REAPED(x,y)   do { } while (0)
500
#define STATS_SET_HIGH(x)       do { } while (0)
501
#define STATS_INC_ERR(x)        do { } while (0)
502
#define STATS_INC_NODEALLOCS(x) do { } while (0)
503
#define STATS_INC_NODEFREES(x)  do { } while (0)
504
#define STATS_INC_ACOVERFLOW(x)   do { } while (0)
505
#define STATS_SET_FREEABLE(x, i) do { } while (0)
506
#define STATS_INC_ALLOCHIT(x)   do { } while (0)
507
#define STATS_INC_ALLOCMISS(x)  do { } while (0)
508
#define STATS_INC_FREEHIT(x)    do { } while (0)
509
#define STATS_INC_FREEMISS(x)   do { } while (0)
510
#endif
511
 
512
#if DEBUG
513
 
514
/*
515
 * memory layout of objects:
516
 * 0            : objp
517
 * 0 .. cachep->obj_offset - BYTES_PER_WORD - 1: padding. This ensures that
518
 *              the end of an object is aligned with the end of the real
519
 *              allocation. Catches writes behind the end of the allocation.
520
 * cachep->obj_offset - BYTES_PER_WORD .. cachep->obj_offset - 1:
521
 *              redzone word.
522
 * cachep->obj_offset: The real object.
523
 * cachep->buffer_size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long]
524
 * cachep->buffer_size - 1* BYTES_PER_WORD: last caller address
525
 *                                      [BYTES_PER_WORD long]
526
 */
527
static int obj_offset(struct kmem_cache *cachep)
528
{
529
        return cachep->obj_offset;
530
}
531
 
532
static int obj_size(struct kmem_cache *cachep)
533
{
534
        return cachep->obj_size;
535
}
536
 
537
static unsigned long long *dbg_redzone1(struct kmem_cache *cachep, void *objp)
538
{
539
        BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
540
        return (unsigned long long*) (objp + obj_offset(cachep) -
541
                                      sizeof(unsigned long long));
542
}
543
 
544
static unsigned long long *dbg_redzone2(struct kmem_cache *cachep, void *objp)
545
{
546
        BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
547
        if (cachep->flags & SLAB_STORE_USER)
548
                return (unsigned long long *)(objp + cachep->buffer_size -
549
                                              sizeof(unsigned long long) -
550
                                              REDZONE_ALIGN);
551
        return (unsigned long long *) (objp + cachep->buffer_size -
552
                                       sizeof(unsigned long long));
553
}
554
 
555
static void **dbg_userword(struct kmem_cache *cachep, void *objp)
556
{
557
        BUG_ON(!(cachep->flags & SLAB_STORE_USER));
558
        return (void **)(objp + cachep->buffer_size - BYTES_PER_WORD);
559
}
560
 
561
#else
562
 
563
#define obj_offset(x)                   0
564
#define obj_size(cachep)                (cachep->buffer_size)
565
#define dbg_redzone1(cachep, objp)      ({BUG(); (unsigned long long *)NULL;})
566
#define dbg_redzone2(cachep, objp)      ({BUG(); (unsigned long long *)NULL;})
567
#define dbg_userword(cachep, objp)      ({BUG(); (void **)NULL;})
568
 
569
#endif
570
 
571
/*
572
 * Do not go above this order unless 0 objects fit into the slab.
573
 */
574
#define BREAK_GFP_ORDER_HI      1
575
#define BREAK_GFP_ORDER_LO      0
576
static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
577
 
578
/*
579
 * Functions for storing/retrieving the cachep and or slab from the page
580
 * allocator.  These are used to find the slab an obj belongs to.  With kfree(),
581
 * these are used to find the cache which an obj belongs to.
582
 */
583
static inline void page_set_cache(struct page *page, struct kmem_cache *cache)
584
{
585
        page->lru.next = (struct list_head *)cache;
586
}
587
 
588
static inline struct kmem_cache *page_get_cache(struct page *page)
589
{
590
        page = compound_head(page);
591
        BUG_ON(!PageSlab(page));
592
        return (struct kmem_cache *)page->lru.next;
593
}
594
 
595
static inline void page_set_slab(struct page *page, struct slab *slab)
596
{
597
        page->lru.prev = (struct list_head *)slab;
598
}
599
 
600
static inline struct slab *page_get_slab(struct page *page)
601
{
602
        BUG_ON(!PageSlab(page));
603
        return (struct slab *)page->lru.prev;
604
}
605
 
606
static inline struct kmem_cache *virt_to_cache(const void *obj)
607
{
608
        struct page *page = virt_to_head_page(obj);
609
        return page_get_cache(page);
610
}
611
 
612
static inline struct slab *virt_to_slab(const void *obj)
613
{
614
        struct page *page = virt_to_head_page(obj);
615
        return page_get_slab(page);
616
}
617
 
618
static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab,
619
                                 unsigned int idx)
620
{
621
        return slab->s_mem + cache->buffer_size * idx;
622
}
623
 
624
/*
625
 * We want to avoid an expensive divide : (offset / cache->buffer_size)
626
 *   Using the fact that buffer_size is a constant for a particular cache,
627
 *   we can replace (offset / cache->buffer_size) by
628
 *   reciprocal_divide(offset, cache->reciprocal_buffer_size)
629
 */
630
static inline unsigned int obj_to_index(const struct kmem_cache *cache,
631
                                        const struct slab *slab, void *obj)
632
{
633
        u32 offset = (obj - slab->s_mem);
634
        return reciprocal_divide(offset, cache->reciprocal_buffer_size);
635
}
636
 
637
/*
638
 * These are the default caches for kmalloc. Custom caches can have other sizes.
639
 */
640
struct cache_sizes malloc_sizes[] = {
641
#define CACHE(x) { .cs_size = (x) },
642
#include <linux/kmalloc_sizes.h>
643
        CACHE(ULONG_MAX)
644
#undef CACHE
645
};
646
EXPORT_SYMBOL(malloc_sizes);
647
 
648
/* Must match cache_sizes above. Out of line to keep cache footprint low. */
649
struct cache_names {
650
        char *name;
651
        char *name_dma;
652
};
653
 
654
static struct cache_names __initdata cache_names[] = {
655
#define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" },
656
#include <linux/kmalloc_sizes.h>
657
        {NULL,}
658
#undef CACHE
659
};
660
 
661
static struct arraycache_init initarray_cache __initdata =
662
    { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
663
static struct arraycache_init initarray_generic =
664
    { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
665
 
666
/* internal cache of cache description objs */
667
static struct kmem_cache cache_cache = {
668
        .batchcount = 1,
669
        .limit = BOOT_CPUCACHE_ENTRIES,
670
        .shared = 1,
671
        .buffer_size = sizeof(struct kmem_cache),
672
        .name = "kmem_cache",
673
};
674
 
675
#define BAD_ALIEN_MAGIC 0x01020304ul
676
 
677
#ifdef CONFIG_LOCKDEP
678
 
679
/*
680
 * Slab sometimes uses the kmalloc slabs to store the slab headers
681
 * for other slabs "off slab".
682
 * The locking for this is tricky in that it nests within the locks
683
 * of all other slabs in a few places; to deal with this special
684
 * locking we put on-slab caches into a separate lock-class.
685
 *
686
 * We set lock class for alien array caches which are up during init.
687
 * The lock annotation will be lost if all cpus of a node goes down and
688
 * then comes back up during hotplug
689
 */
690
static struct lock_class_key on_slab_l3_key;
691
static struct lock_class_key on_slab_alc_key;
692
 
693
static inline void init_lock_keys(void)
694
 
695
{
696
        int q;
697
        struct cache_sizes *s = malloc_sizes;
698
 
699
        while (s->cs_size != ULONG_MAX) {
700
                for_each_node(q) {
701
                        struct array_cache **alc;
702
                        int r;
703
                        struct kmem_list3 *l3 = s->cs_cachep->nodelists[q];
704
                        if (!l3 || OFF_SLAB(s->cs_cachep))
705
                                continue;
706
                        lockdep_set_class(&l3->list_lock, &on_slab_l3_key);
707
                        alc = l3->alien;
708
                        /*
709
                         * FIXME: This check for BAD_ALIEN_MAGIC
710
                         * should go away when common slab code is taught to
711
                         * work even without alien caches.
712
                         * Currently, non NUMA code returns BAD_ALIEN_MAGIC
713
                         * for alloc_alien_cache,
714
                         */
715
                        if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
716
                                continue;
717
                        for_each_node(r) {
718
                                if (alc[r])
719
                                        lockdep_set_class(&alc[r]->lock,
720
                                             &on_slab_alc_key);
721
                        }
722
                }
723
                s++;
724
        }
725
}
726
#else
727
static inline void init_lock_keys(void)
728
{
729
}
730
#endif
731
 
732
/*
733
 * 1. Guard access to the cache-chain.
734
 * 2. Protect sanity of cpu_online_map against cpu hotplug events
735
 */
736
static DEFINE_MUTEX(cache_chain_mutex);
737
static struct list_head cache_chain;
738
 
739
/*
740
 * chicken and egg problem: delay the per-cpu array allocation
741
 * until the general caches are up.
742
 */
743
static enum {
744
        NONE,
745
        PARTIAL_AC,
746
        PARTIAL_L3,
747
        FULL
748
} g_cpucache_up;
749
 
750
/*
751
 * used by boot code to determine if it can use slab based allocator
752
 */
753
int slab_is_available(void)
754
{
755
        return g_cpucache_up == FULL;
756
}
757
 
758
static DEFINE_PER_CPU(struct delayed_work, reap_work);
759
 
760
static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
761
{
762
        return cachep->array[smp_processor_id()];
763
}
764
 
765
static inline struct kmem_cache *__find_general_cachep(size_t size,
766
                                                        gfp_t gfpflags)
767
{
768
        struct cache_sizes *csizep = malloc_sizes;
769
 
770
#if DEBUG
771
        /* This happens if someone tries to call
772
         * kmem_cache_create(), or __kmalloc(), before
773
         * the generic caches are initialized.
774
         */
775
        BUG_ON(malloc_sizes[INDEX_AC].cs_cachep == NULL);
776
#endif
777
        if (!size)
778
                return ZERO_SIZE_PTR;
779
 
780
        while (size > csizep->cs_size)
781
                csizep++;
782
 
783
        /*
784
         * Really subtle: The last entry with cs->cs_size==ULONG_MAX
785
         * has cs_{dma,}cachep==NULL. Thus no special case
786
         * for large kmalloc calls required.
787
         */
788
#ifdef CONFIG_ZONE_DMA
789
        if (unlikely(gfpflags & GFP_DMA))
790
                return csizep->cs_dmacachep;
791
#endif
792
        return csizep->cs_cachep;
793
}
794
 
795
static struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags)
796
{
797
        return __find_general_cachep(size, gfpflags);
798
}
799
 
800
static size_t slab_mgmt_size(size_t nr_objs, size_t align)
801
{
802
        return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align);
803
}
804
 
805
/*
806
 * Calculate the number of objects and left-over bytes for a given buffer size.
807
 */
808
static void cache_estimate(unsigned long gfporder, size_t buffer_size,
809
                           size_t align, int flags, size_t *left_over,
810
                           unsigned int *num)
811
{
812
        int nr_objs;
813
        size_t mgmt_size;
814
        size_t slab_size = PAGE_SIZE << gfporder;
815
 
816
        /*
817
         * The slab management structure can be either off the slab or
818
         * on it. For the latter case, the memory allocated for a
819
         * slab is used for:
820
         *
821
         * - The struct slab
822
         * - One kmem_bufctl_t for each object
823
         * - Padding to respect alignment of @align
824
         * - @buffer_size bytes for each object
825
         *
826
         * If the slab management structure is off the slab, then the
827
         * alignment will already be calculated into the size. Because
828
         * the slabs are all pages aligned, the objects will be at the
829
         * correct alignment when allocated.
830
         */
831
        if (flags & CFLGS_OFF_SLAB) {
832
                mgmt_size = 0;
833
                nr_objs = slab_size / buffer_size;
834
 
835
                if (nr_objs > SLAB_LIMIT)
836
                        nr_objs = SLAB_LIMIT;
837
        } else {
838
                /*
839
                 * Ignore padding for the initial guess. The padding
840
                 * is at most @align-1 bytes, and @buffer_size is at
841
                 * least @align. In the worst case, this result will
842
                 * be one greater than the number of objects that fit
843
                 * into the memory allocation when taking the padding
844
                 * into account.
845
                 */
846
                nr_objs = (slab_size - sizeof(struct slab)) /
847
                          (buffer_size + sizeof(kmem_bufctl_t));
848
 
849
                /*
850
                 * This calculated number will be either the right
851
                 * amount, or one greater than what we want.
852
                 */
853
                if (slab_mgmt_size(nr_objs, align) + nr_objs*buffer_size
854
                       > slab_size)
855
                        nr_objs--;
856
 
857
                if (nr_objs > SLAB_LIMIT)
858
                        nr_objs = SLAB_LIMIT;
859
 
860
                mgmt_size = slab_mgmt_size(nr_objs, align);
861
        }
862
        *num = nr_objs;
863
        *left_over = slab_size - nr_objs*buffer_size - mgmt_size;
864
}
865
 
866
#define slab_error(cachep, msg) __slab_error(__FUNCTION__, cachep, msg)
867
 
868
static void __slab_error(const char *function, struct kmem_cache *cachep,
869
                        char *msg)
870
{
871
        printk(KERN_ERR "slab error in %s(): cache `%s': %s\n",
872
               function, cachep->name, msg);
873
        dump_stack();
874
}
875
 
876
/*
877
 * By default on NUMA we use alien caches to stage the freeing of
878
 * objects allocated from other nodes. This causes massive memory
879
 * inefficiencies when using fake NUMA setup to split memory into a
880
 * large number of small nodes, so it can be disabled on the command
881
 * line
882
  */
883
 
884
static int use_alien_caches __read_mostly = 1;
885
static int numa_platform __read_mostly = 1;
886
static int __init noaliencache_setup(char *s)
887
{
888
        use_alien_caches = 0;
889
        return 1;
890
}
891
__setup("noaliencache", noaliencache_setup);
892
 
893
#ifdef CONFIG_NUMA
894
/*
895
 * Special reaping functions for NUMA systems called from cache_reap().
896
 * These take care of doing round robin flushing of alien caches (containing
897
 * objects freed on different nodes from which they were allocated) and the
898
 * flushing of remote pcps by calling drain_node_pages.
899
 */
900
static DEFINE_PER_CPU(unsigned long, reap_node);
901
 
902
static void init_reap_node(int cpu)
903
{
904
        int node;
905
 
906
        node = next_node(cpu_to_node(cpu), node_online_map);
907
        if (node == MAX_NUMNODES)
908
                node = first_node(node_online_map);
909
 
910
        per_cpu(reap_node, cpu) = node;
911
}
912
 
913
static void next_reap_node(void)
914
{
915
        int node = __get_cpu_var(reap_node);
916
 
917
        node = next_node(node, node_online_map);
918
        if (unlikely(node >= MAX_NUMNODES))
919
                node = first_node(node_online_map);
920
        __get_cpu_var(reap_node) = node;
921
}
922
 
923
#else
924
#define init_reap_node(cpu) do { } while (0)
925
#define next_reap_node(void) do { } while (0)
926
#endif
927
 
928
/*
929
 * Initiate the reap timer running on the target CPU.  We run at around 1 to 2Hz
930
 * via the workqueue/eventd.
931
 * Add the CPU number into the expiration time to minimize the possibility of
932
 * the CPUs getting into lockstep and contending for the global cache chain
933
 * lock.
934
 */
935
static void __cpuinit start_cpu_timer(int cpu)
936
{
937
        struct delayed_work *reap_work = &per_cpu(reap_work, cpu);
938
 
939
        /*
940
         * When this gets called from do_initcalls via cpucache_init(),
941
         * init_workqueues() has already run, so keventd will be setup
942
         * at that time.
943
         */
944
        if (keventd_up() && reap_work->work.func == NULL) {
945
                init_reap_node(cpu);
946
                INIT_DELAYED_WORK(reap_work, cache_reap);
947
                schedule_delayed_work_on(cpu, reap_work,
948
                                        __round_jiffies_relative(HZ, cpu));
949
        }
950
}
951
 
952
static struct array_cache *alloc_arraycache(int node, int entries,
953
                                            int batchcount)
954
{
955
        int memsize = sizeof(void *) * entries + sizeof(struct array_cache);
956
        struct array_cache *nc = NULL;
957
 
958
        nc = kmalloc_node(memsize, GFP_KERNEL, node);
959
        if (nc) {
960
                nc->avail = 0;
961
                nc->limit = entries;
962
                nc->batchcount = batchcount;
963
                nc->touched = 0;
964
                spin_lock_init(&nc->lock);
965
        }
966
        return nc;
967
}
968
 
969
/*
970
 * Transfer objects in one arraycache to another.
971
 * Locking must be handled by the caller.
972
 *
973
 * Return the number of entries transferred.
974
 */
975
static int transfer_objects(struct array_cache *to,
976
                struct array_cache *from, unsigned int max)
977
{
978
        /* Figure out how many entries to transfer */
979
        int nr = min(min(from->avail, max), to->limit - to->avail);
980
 
981
        if (!nr)
982
                return 0;
983
 
984
        memcpy(to->entry + to->avail, from->entry + from->avail -nr,
985
                        sizeof(void *) *nr);
986
 
987
        from->avail -= nr;
988
        to->avail += nr;
989
        to->touched = 1;
990
        return nr;
991
}
992
 
993
#ifndef CONFIG_NUMA
994
 
995
#define drain_alien_cache(cachep, alien) do { } while (0)
996
#define reap_alien(cachep, l3) do { } while (0)
997
 
998
static inline struct array_cache **alloc_alien_cache(int node, int limit)
999
{
1000
        return (struct array_cache **)BAD_ALIEN_MAGIC;
1001
}
1002
 
1003
static inline void free_alien_cache(struct array_cache **ac_ptr)
1004
{
1005
}
1006
 
1007
static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1008
{
1009
        return 0;
1010
}
1011
 
1012
static inline void *alternate_node_alloc(struct kmem_cache *cachep,
1013
                gfp_t flags)
1014
{
1015
        return NULL;
1016
}
1017
 
1018
static inline void *____cache_alloc_node(struct kmem_cache *cachep,
1019
                 gfp_t flags, int nodeid)
1020
{
1021
        return NULL;
1022
}
1023
 
1024
#else   /* CONFIG_NUMA */
1025
 
1026
static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);
1027
static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
1028
 
1029
static struct array_cache **alloc_alien_cache(int node, int limit)
1030
{
1031
        struct array_cache **ac_ptr;
1032
        int memsize = sizeof(void *) * nr_node_ids;
1033
        int i;
1034
 
1035
        if (limit > 1)
1036
                limit = 12;
1037
        ac_ptr = kmalloc_node(memsize, GFP_KERNEL, node);
1038
        if (ac_ptr) {
1039
                for_each_node(i) {
1040
                        if (i == node || !node_online(i)) {
1041
                                ac_ptr[i] = NULL;
1042
                                continue;
1043
                        }
1044
                        ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d);
1045
                        if (!ac_ptr[i]) {
1046
                                for (i--; i >= 0; i--)
1047
                                        kfree(ac_ptr[i]);
1048
                                kfree(ac_ptr);
1049
                                return NULL;
1050
                        }
1051
                }
1052
        }
1053
        return ac_ptr;
1054
}
1055
 
1056
static void free_alien_cache(struct array_cache **ac_ptr)
1057
{
1058
        int i;
1059
 
1060
        if (!ac_ptr)
1061
                return;
1062
        for_each_node(i)
1063
            kfree(ac_ptr[i]);
1064
        kfree(ac_ptr);
1065
}
1066
 
1067
static void __drain_alien_cache(struct kmem_cache *cachep,
1068
                                struct array_cache *ac, int node)
1069
{
1070
        struct kmem_list3 *rl3 = cachep->nodelists[node];
1071
 
1072
        if (ac->avail) {
1073
                spin_lock(&rl3->list_lock);
1074
                /*
1075
                 * Stuff objects into the remote nodes shared array first.
1076
                 * That way we could avoid the overhead of putting the objects
1077
                 * into the free lists and getting them back later.
1078
                 */
1079
                if (rl3->shared)
1080
                        transfer_objects(rl3->shared, ac, ac->limit);
1081
 
1082
                free_block(cachep, ac->entry, ac->avail, node);
1083
                ac->avail = 0;
1084
                spin_unlock(&rl3->list_lock);
1085
        }
1086
}
1087
 
1088
/*
1089
 * Called from cache_reap() to regularly drain alien caches round robin.
1090
 */
1091
static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)
1092
{
1093
        int node = __get_cpu_var(reap_node);
1094
 
1095
        if (l3->alien) {
1096
                struct array_cache *ac = l3->alien[node];
1097
 
1098
                if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {
1099
                        __drain_alien_cache(cachep, ac, node);
1100
                        spin_unlock_irq(&ac->lock);
1101
                }
1102
        }
1103
}
1104
 
1105
static void drain_alien_cache(struct kmem_cache *cachep,
1106
                                struct array_cache **alien)
1107
{
1108
        int i = 0;
1109
        struct array_cache *ac;
1110
        unsigned long flags;
1111
 
1112
        for_each_online_node(i) {
1113
                ac = alien[i];
1114
                if (ac) {
1115
                        spin_lock_irqsave(&ac->lock, flags);
1116
                        __drain_alien_cache(cachep, ac, i);
1117
                        spin_unlock_irqrestore(&ac->lock, flags);
1118
                }
1119
        }
1120
}
1121
 
1122
static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1123
{
1124
        struct slab *slabp = virt_to_slab(objp);
1125
        int nodeid = slabp->nodeid;
1126
        struct kmem_list3 *l3;
1127
        struct array_cache *alien = NULL;
1128
        int node;
1129
 
1130
        node = numa_node_id();
1131
 
1132
        /*
1133
         * Make sure we are not freeing a object from another node to the array
1134
         * cache on this cpu.
1135
         */
1136
        if (likely(slabp->nodeid == node))
1137
                return 0;
1138
 
1139
        l3 = cachep->nodelists[node];
1140
        STATS_INC_NODEFREES(cachep);
1141
        if (l3->alien && l3->alien[nodeid]) {
1142
                alien = l3->alien[nodeid];
1143
                spin_lock(&alien->lock);
1144
                if (unlikely(alien->avail == alien->limit)) {
1145
                        STATS_INC_ACOVERFLOW(cachep);
1146
                        __drain_alien_cache(cachep, alien, nodeid);
1147
                }
1148
                alien->entry[alien->avail++] = objp;
1149
                spin_unlock(&alien->lock);
1150
        } else {
1151
                spin_lock(&(cachep->nodelists[nodeid])->list_lock);
1152
                free_block(cachep, &objp, 1, nodeid);
1153
                spin_unlock(&(cachep->nodelists[nodeid])->list_lock);
1154
        }
1155
        return 1;
1156
}
1157
#endif
1158
 
1159
static void __cpuinit cpuup_canceled(long cpu)
1160
{
1161
        struct kmem_cache *cachep;
1162
        struct kmem_list3 *l3 = NULL;
1163
        int node = cpu_to_node(cpu);
1164
 
1165
        list_for_each_entry(cachep, &cache_chain, next) {
1166
                struct array_cache *nc;
1167
                struct array_cache *shared;
1168
                struct array_cache **alien;
1169
                cpumask_t mask;
1170
 
1171
                mask = node_to_cpumask(node);
1172
                /* cpu is dead; no one can alloc from it. */
1173
                nc = cachep->array[cpu];
1174
                cachep->array[cpu] = NULL;
1175
                l3 = cachep->nodelists[node];
1176
 
1177
                if (!l3)
1178
                        goto free_array_cache;
1179
 
1180
                spin_lock_irq(&l3->list_lock);
1181
 
1182
                /* Free limit for this kmem_list3 */
1183
                l3->free_limit -= cachep->batchcount;
1184
                if (nc)
1185
                        free_block(cachep, nc->entry, nc->avail, node);
1186
 
1187
                if (!cpus_empty(mask)) {
1188
                        spin_unlock_irq(&l3->list_lock);
1189
                        goto free_array_cache;
1190
                }
1191
 
1192
                shared = l3->shared;
1193
                if (shared) {
1194
                        free_block(cachep, shared->entry,
1195
                                   shared->avail, node);
1196
                        l3->shared = NULL;
1197
                }
1198
 
1199
                alien = l3->alien;
1200
                l3->alien = NULL;
1201
 
1202
                spin_unlock_irq(&l3->list_lock);
1203
 
1204
                kfree(shared);
1205
                if (alien) {
1206
                        drain_alien_cache(cachep, alien);
1207
                        free_alien_cache(alien);
1208
                }
1209
free_array_cache:
1210
                kfree(nc);
1211
        }
1212
        /*
1213
         * In the previous loop, all the objects were freed to
1214
         * the respective cache's slabs,  now we can go ahead and
1215
         * shrink each nodelist to its limit.
1216
         */
1217
        list_for_each_entry(cachep, &cache_chain, next) {
1218
                l3 = cachep->nodelists[node];
1219
                if (!l3)
1220
                        continue;
1221
                drain_freelist(cachep, l3, l3->free_objects);
1222
        }
1223
}
1224
 
1225
static int __cpuinit cpuup_prepare(long cpu)
1226
{
1227
        struct kmem_cache *cachep;
1228
        struct kmem_list3 *l3 = NULL;
1229
        int node = cpu_to_node(cpu);
1230
        const int memsize = sizeof(struct kmem_list3);
1231
 
1232
        /*
1233
         * We need to do this right in the beginning since
1234
         * alloc_arraycache's are going to use this list.
1235
         * kmalloc_node allows us to add the slab to the right
1236
         * kmem_list3 and not this cpu's kmem_list3
1237
         */
1238
 
1239
        list_for_each_entry(cachep, &cache_chain, next) {
1240
                /*
1241
                 * Set up the size64 kmemlist for cpu before we can
1242
                 * begin anything. Make sure some other cpu on this
1243
                 * node has not already allocated this
1244
                 */
1245
                if (!cachep->nodelists[node]) {
1246
                        l3 = kmalloc_node(memsize, GFP_KERNEL, node);
1247
                        if (!l3)
1248
                                goto bad;
1249
                        kmem_list3_init(l3);
1250
                        l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
1251
                            ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
1252
 
1253
                        /*
1254
                         * The l3s don't come and go as CPUs come and
1255
                         * go.  cache_chain_mutex is sufficient
1256
                         * protection here.
1257
                         */
1258
                        cachep->nodelists[node] = l3;
1259
                }
1260
 
1261
                spin_lock_irq(&cachep->nodelists[node]->list_lock);
1262
                cachep->nodelists[node]->free_limit =
1263
                        (1 + nr_cpus_node(node)) *
1264
                        cachep->batchcount + cachep->num;
1265
                spin_unlock_irq(&cachep->nodelists[node]->list_lock);
1266
        }
1267
 
1268
        /*
1269
         * Now we can go ahead with allocating the shared arrays and
1270
         * array caches
1271
         */
1272
        list_for_each_entry(cachep, &cache_chain, next) {
1273
                struct array_cache *nc;
1274
                struct array_cache *shared = NULL;
1275
                struct array_cache **alien = NULL;
1276
 
1277
                nc = alloc_arraycache(node, cachep->limit,
1278
                                        cachep->batchcount);
1279
                if (!nc)
1280
                        goto bad;
1281
                if (cachep->shared) {
1282
                        shared = alloc_arraycache(node,
1283
                                cachep->shared * cachep->batchcount,
1284
                                0xbaadf00d);
1285
                        if (!shared) {
1286
                                kfree(nc);
1287
                                goto bad;
1288
                        }
1289
                }
1290
                if (use_alien_caches) {
1291
                        alien = alloc_alien_cache(node, cachep->limit);
1292
                        if (!alien) {
1293
                                kfree(shared);
1294
                                kfree(nc);
1295
                                goto bad;
1296
                        }
1297
                }
1298
                cachep->array[cpu] = nc;
1299
                l3 = cachep->nodelists[node];
1300
                BUG_ON(!l3);
1301
 
1302
                spin_lock_irq(&l3->list_lock);
1303
                if (!l3->shared) {
1304
                        /*
1305
                         * We are serialised from CPU_DEAD or
1306
                         * CPU_UP_CANCELLED by the cpucontrol lock
1307
                         */
1308
                        l3->shared = shared;
1309
                        shared = NULL;
1310
                }
1311
#ifdef CONFIG_NUMA
1312
                if (!l3->alien) {
1313
                        l3->alien = alien;
1314
                        alien = NULL;
1315
                }
1316
#endif
1317
                spin_unlock_irq(&l3->list_lock);
1318
                kfree(shared);
1319
                free_alien_cache(alien);
1320
        }
1321
        return 0;
1322
bad:
1323
        cpuup_canceled(cpu);
1324
        return -ENOMEM;
1325
}
1326
 
1327
static int __cpuinit cpuup_callback(struct notifier_block *nfb,
1328
                                    unsigned long action, void *hcpu)
1329
{
1330
        long cpu = (long)hcpu;
1331
        int err = 0;
1332
 
1333
        switch (action) {
1334
        case CPU_LOCK_ACQUIRE:
1335
                mutex_lock(&cache_chain_mutex);
1336
                break;
1337
        case CPU_UP_PREPARE:
1338
        case CPU_UP_PREPARE_FROZEN:
1339
                err = cpuup_prepare(cpu);
1340
                break;
1341
        case CPU_ONLINE:
1342
        case CPU_ONLINE_FROZEN:
1343
                start_cpu_timer(cpu);
1344
                break;
1345
#ifdef CONFIG_HOTPLUG_CPU
1346
        case CPU_DOWN_PREPARE:
1347
        case CPU_DOWN_PREPARE_FROZEN:
1348
                /*
1349
                 * Shutdown cache reaper. Note that the cache_chain_mutex is
1350
                 * held so that if cache_reap() is invoked it cannot do
1351
                 * anything expensive but will only modify reap_work
1352
                 * and reschedule the timer.
1353
                */
1354
                cancel_rearming_delayed_work(&per_cpu(reap_work, cpu));
1355
                /* Now the cache_reaper is guaranteed to be not running. */
1356
                per_cpu(reap_work, cpu).work.func = NULL;
1357
                break;
1358
        case CPU_DOWN_FAILED:
1359
        case CPU_DOWN_FAILED_FROZEN:
1360
                start_cpu_timer(cpu);
1361
                break;
1362
        case CPU_DEAD:
1363
        case CPU_DEAD_FROZEN:
1364
                /*
1365
                 * Even if all the cpus of a node are down, we don't free the
1366
                 * kmem_list3 of any cache. This to avoid a race between
1367
                 * cpu_down, and a kmalloc allocation from another cpu for
1368
                 * memory from the node of the cpu going down.  The list3
1369
                 * structure is usually allocated from kmem_cache_create() and
1370
                 * gets destroyed at kmem_cache_destroy().
1371
                 */
1372
                /* fall through */
1373
#endif
1374
        case CPU_UP_CANCELED:
1375
        case CPU_UP_CANCELED_FROZEN:
1376
                cpuup_canceled(cpu);
1377
                break;
1378
        case CPU_LOCK_RELEASE:
1379
                mutex_unlock(&cache_chain_mutex);
1380
                break;
1381
        }
1382
        return err ? NOTIFY_BAD : NOTIFY_OK;
1383
}
1384
 
1385
static struct notifier_block __cpuinitdata cpucache_notifier = {
1386
        &cpuup_callback, NULL, 0
1387
};
1388
 
1389
/*
1390
 * swap the static kmem_list3 with kmalloced memory
1391
 */
1392
static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
1393
                        int nodeid)
1394
{
1395
        struct kmem_list3 *ptr;
1396
 
1397
        ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, nodeid);
1398
        BUG_ON(!ptr);
1399
 
1400
        local_irq_disable();
1401
        memcpy(ptr, list, sizeof(struct kmem_list3));
1402
        /*
1403
         * Do not assume that spinlocks can be initialized via memcpy:
1404
         */
1405
        spin_lock_init(&ptr->list_lock);
1406
 
1407
        MAKE_ALL_LISTS(cachep, ptr, nodeid);
1408
        cachep->nodelists[nodeid] = ptr;
1409
        local_irq_enable();
1410
}
1411
 
1412
/*
1413
 * Initialisation.  Called after the page allocator have been initialised and
1414
 * before smp_init().
1415
 */
1416
void __init kmem_cache_init(void)
1417
{
1418
        size_t left_over;
1419
        struct cache_sizes *sizes;
1420
        struct cache_names *names;
1421
        int i;
1422
        int order;
1423
        int node;
1424
 
1425
        if (num_possible_nodes() == 1) {
1426
                use_alien_caches = 0;
1427
                numa_platform = 0;
1428
        }
1429
 
1430
        for (i = 0; i < NUM_INIT_LISTS; i++) {
1431
                kmem_list3_init(&initkmem_list3[i]);
1432
                if (i < MAX_NUMNODES)
1433
                        cache_cache.nodelists[i] = NULL;
1434
        }
1435
 
1436
        /*
1437
         * Fragmentation resistance on low memory - only use bigger
1438
         * page orders on machines with more than 32MB of memory.
1439
         */
1440
        if (num_physpages > (32 << 20) >> PAGE_SHIFT)
1441
                slab_break_gfp_order = BREAK_GFP_ORDER_HI;
1442
 
1443
        /* Bootstrap is tricky, because several objects are allocated
1444
         * from caches that do not exist yet:
1445
         * 1) initialize the cache_cache cache: it contains the struct
1446
         *    kmem_cache structures of all caches, except cache_cache itself:
1447
         *    cache_cache is statically allocated.
1448
         *    Initially an __init data area is used for the head array and the
1449
         *    kmem_list3 structures, it's replaced with a kmalloc allocated
1450
         *    array at the end of the bootstrap.
1451
         * 2) Create the first kmalloc cache.
1452
         *    The struct kmem_cache for the new cache is allocated normally.
1453
         *    An __init data area is used for the head array.
1454
         * 3) Create the remaining kmalloc caches, with minimally sized
1455
         *    head arrays.
1456
         * 4) Replace the __init data head arrays for cache_cache and the first
1457
         *    kmalloc cache with kmalloc allocated arrays.
1458
         * 5) Replace the __init data for kmem_list3 for cache_cache and
1459
         *    the other cache's with kmalloc allocated memory.
1460
         * 6) Resize the head arrays of the kmalloc caches to their final sizes.
1461
         */
1462
 
1463
        node = numa_node_id();
1464
 
1465
        /* 1) create the cache_cache */
1466
        INIT_LIST_HEAD(&cache_chain);
1467
        list_add(&cache_cache.next, &cache_chain);
1468
        cache_cache.colour_off = cache_line_size();
1469
        cache_cache.array[smp_processor_id()] = &initarray_cache.cache;
1470
        cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE];
1471
 
1472
        /*
1473
         * struct kmem_cache size depends on nr_node_ids, which
1474
         * can be less than MAX_NUMNODES.
1475
         */
1476
        cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) +
1477
                                 nr_node_ids * sizeof(struct kmem_list3 *);
1478
#if DEBUG
1479
        cache_cache.obj_size = cache_cache.buffer_size;
1480
#endif
1481
        cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,
1482
                                        cache_line_size());
1483
        cache_cache.reciprocal_buffer_size =
1484
                reciprocal_value(cache_cache.buffer_size);
1485
 
1486
        for (order = 0; order < MAX_ORDER; order++) {
1487
                cache_estimate(order, cache_cache.buffer_size,
1488
                        cache_line_size(), 0, &left_over, &cache_cache.num);
1489
                if (cache_cache.num)
1490
                        break;
1491
        }
1492
        BUG_ON(!cache_cache.num);
1493
        cache_cache.gfporder = order;
1494
        cache_cache.colour = left_over / cache_cache.colour_off;
1495
        cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +
1496
                                      sizeof(struct slab), cache_line_size());
1497
 
1498
        /* 2+3) create the kmalloc caches */
1499
        sizes = malloc_sizes;
1500
        names = cache_names;
1501
 
1502
        /*
1503
         * Initialize the caches that provide memory for the array cache and the
1504
         * kmem_list3 structures first.  Without this, further allocations will
1505
         * bug.
1506
         */
1507
 
1508
        sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name,
1509
                                        sizes[INDEX_AC].cs_size,
1510
                                        ARCH_KMALLOC_MINALIGN,
1511
                                        ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1512
                                        NULL);
1513
 
1514
        if (INDEX_AC != INDEX_L3) {
1515
                sizes[INDEX_L3].cs_cachep =
1516
                        kmem_cache_create(names[INDEX_L3].name,
1517
                                sizes[INDEX_L3].cs_size,
1518
                                ARCH_KMALLOC_MINALIGN,
1519
                                ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1520
                                NULL);
1521
        }
1522
 
1523
        slab_early_init = 0;
1524
 
1525
        while (sizes->cs_size != ULONG_MAX) {
1526
                /*
1527
                 * For performance, all the general caches are L1 aligned.
1528
                 * This should be particularly beneficial on SMP boxes, as it
1529
                 * eliminates "false sharing".
1530
                 * Note for systems short on memory removing the alignment will
1531
                 * allow tighter packing of the smaller caches.
1532
                 */
1533
                if (!sizes->cs_cachep) {
1534
                        sizes->cs_cachep = kmem_cache_create(names->name,
1535
                                        sizes->cs_size,
1536
                                        ARCH_KMALLOC_MINALIGN,
1537
                                        ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1538
                                        NULL);
1539
                }
1540
#ifdef CONFIG_ZONE_DMA
1541
                sizes->cs_dmacachep = kmem_cache_create(
1542
                                        names->name_dma,
1543
                                        sizes->cs_size,
1544
                                        ARCH_KMALLOC_MINALIGN,
1545
                                        ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA|
1546
                                                SLAB_PANIC,
1547
                                        NULL);
1548
#endif
1549
                sizes++;
1550
                names++;
1551
        }
1552
        /* 4) Replace the bootstrap head arrays */
1553
        {
1554
                struct array_cache *ptr;
1555
 
1556
                ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
1557
 
1558
                local_irq_disable();
1559
                BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);
1560
                memcpy(ptr, cpu_cache_get(&cache_cache),
1561
                       sizeof(struct arraycache_init));
1562
                /*
1563
                 * Do not assume that spinlocks can be initialized via memcpy:
1564
                 */
1565
                spin_lock_init(&ptr->lock);
1566
 
1567
                cache_cache.array[smp_processor_id()] = ptr;
1568
                local_irq_enable();
1569
 
1570
                ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
1571
 
1572
                local_irq_disable();
1573
                BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep)
1574
                       != &initarray_generic.cache);
1575
                memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep),
1576
                       sizeof(struct arraycache_init));
1577
                /*
1578
                 * Do not assume that spinlocks can be initialized via memcpy:
1579
                 */
1580
                spin_lock_init(&ptr->lock);
1581
 
1582
                malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =
1583
                    ptr;
1584
                local_irq_enable();
1585
        }
1586
        /* 5) Replace the bootstrap kmem_list3's */
1587
        {
1588
                int nid;
1589
 
1590
                /* Replace the static kmem_list3 structures for the boot cpu */
1591
                init_list(&cache_cache, &initkmem_list3[CACHE_CACHE], node);
1592
 
1593
                for_each_online_node(nid) {
1594
                        init_list(malloc_sizes[INDEX_AC].cs_cachep,
1595
                                  &initkmem_list3[SIZE_AC + nid], nid);
1596
 
1597
                        if (INDEX_AC != INDEX_L3) {
1598
                                init_list(malloc_sizes[INDEX_L3].cs_cachep,
1599
                                          &initkmem_list3[SIZE_L3 + nid], nid);
1600
                        }
1601
                }
1602
        }
1603
 
1604
        /* 6) resize the head arrays to their final sizes */
1605
        {
1606
                struct kmem_cache *cachep;
1607
                mutex_lock(&cache_chain_mutex);
1608
                list_for_each_entry(cachep, &cache_chain, next)
1609
                        if (enable_cpucache(cachep))
1610
                                BUG();
1611
                mutex_unlock(&cache_chain_mutex);
1612
        }
1613
 
1614
        /* Annotate slab for lockdep -- annotate the malloc caches */
1615
        init_lock_keys();
1616
 
1617
 
1618
        /* Done! */
1619
        g_cpucache_up = FULL;
1620
 
1621
        /*
1622
         * Register a cpu startup notifier callback that initializes
1623
         * cpu_cache_get for all new cpus
1624
         */
1625
        register_cpu_notifier(&cpucache_notifier);
1626
 
1627
        /*
1628
         * The reap timers are started later, with a module init call: That part
1629
         * of the kernel is not yet operational.
1630
         */
1631
}
1632
 
1633
static int __init cpucache_init(void)
1634
{
1635
        int cpu;
1636
 
1637
        /*
1638
         * Register the timers that return unneeded pages to the page allocator
1639
         */
1640
        for_each_online_cpu(cpu)
1641
                start_cpu_timer(cpu);
1642
        return 0;
1643
}
1644
__initcall(cpucache_init);
1645
 
1646
/*
1647
 * Interface to system's page allocator. No need to hold the cache-lock.
1648
 *
1649
 * If we requested dmaable memory, we will get it. Even if we
1650
 * did not request dmaable memory, we might get it, but that
1651
 * would be relatively rare and ignorable.
1652
 */
1653
static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
1654
{
1655
        struct page *page;
1656
        int nr_pages;
1657
        int i;
1658
 
1659
#ifndef CONFIG_MMU
1660
        /*
1661
         * Nommu uses slab's for process anonymous memory allocations, and thus
1662
         * requires __GFP_COMP to properly refcount higher order allocations
1663
         */
1664
        flags |= __GFP_COMP;
1665
#endif
1666
 
1667
        flags |= cachep->gfpflags;
1668
        if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1669
                flags |= __GFP_RECLAIMABLE;
1670
 
1671
        page = alloc_pages_node(nodeid, flags, cachep->gfporder);
1672
        if (!page)
1673
                return NULL;
1674
 
1675
        nr_pages = (1 << cachep->gfporder);
1676
        if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1677
                add_zone_page_state(page_zone(page),
1678
                        NR_SLAB_RECLAIMABLE, nr_pages);
1679
        else
1680
                add_zone_page_state(page_zone(page),
1681
                        NR_SLAB_UNRECLAIMABLE, nr_pages);
1682
        for (i = 0; i < nr_pages; i++)
1683
                __SetPageSlab(page + i);
1684
        return page_address(page);
1685
}
1686
 
1687
/*
1688
 * Interface to system's page release.
1689
 */
1690
static void kmem_freepages(struct kmem_cache *cachep, void *addr)
1691
{
1692
        unsigned long i = (1 << cachep->gfporder);
1693
        struct page *page = virt_to_page(addr);
1694
        const unsigned long nr_freed = i;
1695
 
1696
        if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1697
                sub_zone_page_state(page_zone(page),
1698
                                NR_SLAB_RECLAIMABLE, nr_freed);
1699
        else
1700
                sub_zone_page_state(page_zone(page),
1701
                                NR_SLAB_UNRECLAIMABLE, nr_freed);
1702
        while (i--) {
1703
                BUG_ON(!PageSlab(page));
1704
                __ClearPageSlab(page);
1705
                page++;
1706
        }
1707
        if (current->reclaim_state)
1708
                current->reclaim_state->reclaimed_slab += nr_freed;
1709
        free_pages((unsigned long)addr, cachep->gfporder);
1710
}
1711
 
1712
static void kmem_rcu_free(struct rcu_head *head)
1713
{
1714
        struct slab_rcu *slab_rcu = (struct slab_rcu *)head;
1715
        struct kmem_cache *cachep = slab_rcu->cachep;
1716
 
1717
        kmem_freepages(cachep, slab_rcu->addr);
1718
        if (OFF_SLAB(cachep))
1719
                kmem_cache_free(cachep->slabp_cache, slab_rcu);
1720
}
1721
 
1722
#if DEBUG
1723
 
1724
#ifdef CONFIG_DEBUG_PAGEALLOC
1725
static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
1726
                            unsigned long caller)
1727
{
1728
        int size = obj_size(cachep);
1729
 
1730
        addr = (unsigned long *)&((char *)addr)[obj_offset(cachep)];
1731
 
1732
        if (size < 5 * sizeof(unsigned long))
1733
                return;
1734
 
1735
        *addr++ = 0x12345678;
1736
        *addr++ = caller;
1737
        *addr++ = smp_processor_id();
1738
        size -= 3 * sizeof(unsigned long);
1739
        {
1740
                unsigned long *sptr = &caller;
1741
                unsigned long svalue;
1742
 
1743
                while (!kstack_end(sptr)) {
1744
                        svalue = *sptr++;
1745
                        if (kernel_text_address(svalue)) {
1746
                                *addr++ = svalue;
1747
                                size -= sizeof(unsigned long);
1748
                                if (size <= sizeof(unsigned long))
1749
                                        break;
1750
                        }
1751
                }
1752
 
1753
        }
1754
        *addr++ = 0x87654321;
1755
}
1756
#endif
1757
 
1758
static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val)
1759
{
1760
        int size = obj_size(cachep);
1761
        addr = &((char *)addr)[obj_offset(cachep)];
1762
 
1763
        memset(addr, val, size);
1764
        *(unsigned char *)(addr + size - 1) = POISON_END;
1765
}
1766
 
1767
static void dump_line(char *data, int offset, int limit)
1768
{
1769
        int i;
1770
        unsigned char error = 0;
1771
        int bad_count = 0;
1772
 
1773
        printk(KERN_ERR "%03x:", offset);
1774
        for (i = 0; i < limit; i++) {
1775
                if (data[offset + i] != POISON_FREE) {
1776
                        error = data[offset + i];
1777
                        bad_count++;
1778
                }
1779
                printk(" %02x", (unsigned char)data[offset + i]);
1780
        }
1781
        printk("\n");
1782
 
1783
        if (bad_count == 1) {
1784
                error ^= POISON_FREE;
1785
                if (!(error & (error - 1))) {
1786
                        printk(KERN_ERR "Single bit error detected. Probably "
1787
                                        "bad RAM.\n");
1788
#ifdef CONFIG_X86
1789
                        printk(KERN_ERR "Run memtest86+ or a similar memory "
1790
                                        "test tool.\n");
1791
#else
1792
                        printk(KERN_ERR "Run a memory test tool.\n");
1793
#endif
1794
                }
1795
        }
1796
}
1797
#endif
1798
 
1799
#if DEBUG
1800
 
1801
static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines)
1802
{
1803
        int i, size;
1804
        char *realobj;
1805
 
1806
        if (cachep->flags & SLAB_RED_ZONE) {
1807
                printk(KERN_ERR "Redzone: 0x%llx/0x%llx.\n",
1808
                        *dbg_redzone1(cachep, objp),
1809
                        *dbg_redzone2(cachep, objp));
1810
        }
1811
 
1812
        if (cachep->flags & SLAB_STORE_USER) {
1813
                printk(KERN_ERR "Last user: [<%p>]",
1814
                        *dbg_userword(cachep, objp));
1815
                print_symbol("(%s)",
1816
                                (unsigned long)*dbg_userword(cachep, objp));
1817
                printk("\n");
1818
        }
1819
        realobj = (char *)objp + obj_offset(cachep);
1820
        size = obj_size(cachep);
1821
        for (i = 0; i < size && lines; i += 16, lines--) {
1822
                int limit;
1823
                limit = 16;
1824
                if (i + limit > size)
1825
                        limit = size - i;
1826
                dump_line(realobj, i, limit);
1827
        }
1828
}
1829
 
1830
static void check_poison_obj(struct kmem_cache *cachep, void *objp)
1831
{
1832
        char *realobj;
1833
        int size, i;
1834
        int lines = 0;
1835
 
1836
        realobj = (char *)objp + obj_offset(cachep);
1837
        size = obj_size(cachep);
1838
 
1839
        for (i = 0; i < size; i++) {
1840
                char exp = POISON_FREE;
1841
                if (i == size - 1)
1842
                        exp = POISON_END;
1843
                if (realobj[i] != exp) {
1844
                        int limit;
1845
                        /* Mismatch ! */
1846
                        /* Print header */
1847
                        if (lines == 0) {
1848
                                printk(KERN_ERR
1849
                                        "Slab corruption: %s start=%p, len=%d\n",
1850
                                        cachep->name, realobj, size);
1851
                                print_objinfo(cachep, objp, 0);
1852
                        }
1853
                        /* Hexdump the affected line */
1854
                        i = (i / 16) * 16;
1855
                        limit = 16;
1856
                        if (i + limit > size)
1857
                                limit = size - i;
1858
                        dump_line(realobj, i, limit);
1859
                        i += 16;
1860
                        lines++;
1861
                        /* Limit to 5 lines */
1862
                        if (lines > 5)
1863
                                break;
1864
                }
1865
        }
1866
        if (lines != 0) {
1867
                /* Print some data about the neighboring objects, if they
1868
                 * exist:
1869
                 */
1870
                struct slab *slabp = virt_to_slab(objp);
1871
                unsigned int objnr;
1872
 
1873
                objnr = obj_to_index(cachep, slabp, objp);
1874
                if (objnr) {
1875
                        objp = index_to_obj(cachep, slabp, objnr - 1);
1876
                        realobj = (char *)objp + obj_offset(cachep);
1877
                        printk(KERN_ERR "Prev obj: start=%p, len=%d\n",
1878
                               realobj, size);
1879
                        print_objinfo(cachep, objp, 2);
1880
                }
1881
                if (objnr + 1 < cachep->num) {
1882
                        objp = index_to_obj(cachep, slabp, objnr + 1);
1883
                        realobj = (char *)objp + obj_offset(cachep);
1884
                        printk(KERN_ERR "Next obj: start=%p, len=%d\n",
1885
                               realobj, size);
1886
                        print_objinfo(cachep, objp, 2);
1887
                }
1888
        }
1889
}
1890
#endif
1891
 
1892
#if DEBUG
1893
/**
1894
 * slab_destroy_objs - destroy a slab and its objects
1895
 * @cachep: cache pointer being destroyed
1896
 * @slabp: slab pointer being destroyed
1897
 *
1898
 * Call the registered destructor for each object in a slab that is being
1899
 * destroyed.
1900
 */
1901
static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp)
1902
{
1903
        int i;
1904
        for (i = 0; i < cachep->num; i++) {
1905
                void *objp = index_to_obj(cachep, slabp, i);
1906
 
1907
                if (cachep->flags & SLAB_POISON) {
1908
#ifdef CONFIG_DEBUG_PAGEALLOC
1909
                        if (cachep->buffer_size % PAGE_SIZE == 0 &&
1910
                                        OFF_SLAB(cachep))
1911
                                kernel_map_pages(virt_to_page(objp),
1912
                                        cachep->buffer_size / PAGE_SIZE, 1);
1913
                        else
1914
                                check_poison_obj(cachep, objp);
1915
#else
1916
                        check_poison_obj(cachep, objp);
1917
#endif
1918
                }
1919
                if (cachep->flags & SLAB_RED_ZONE) {
1920
                        if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
1921
                                slab_error(cachep, "start of a freed object "
1922
                                           "was overwritten");
1923
                        if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
1924
                                slab_error(cachep, "end of a freed object "
1925
                                           "was overwritten");
1926
                }
1927
        }
1928
}
1929
#else
1930
static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp)
1931
{
1932
}
1933
#endif
1934
 
1935
/**
1936
 * slab_destroy - destroy and release all objects in a slab
1937
 * @cachep: cache pointer being destroyed
1938
 * @slabp: slab pointer being destroyed
1939
 *
1940
 * Destroy all the objs in a slab, and release the mem back to the system.
1941
 * Before calling the slab must have been unlinked from the cache.  The
1942
 * cache-lock is not held/needed.
1943
 */
1944
static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
1945
{
1946
        void *addr = slabp->s_mem - slabp->colouroff;
1947
 
1948
        slab_destroy_objs(cachep, slabp);
1949
        if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) {
1950
                struct slab_rcu *slab_rcu;
1951
 
1952
                slab_rcu = (struct slab_rcu *)slabp;
1953
                slab_rcu->cachep = cachep;
1954
                slab_rcu->addr = addr;
1955
                call_rcu(&slab_rcu->head, kmem_rcu_free);
1956
        } else {
1957
                kmem_freepages(cachep, addr);
1958
                if (OFF_SLAB(cachep))
1959
                        kmem_cache_free(cachep->slabp_cache, slabp);
1960
        }
1961
}
1962
 
1963
/*
1964
 * For setting up all the kmem_list3s for cache whose buffer_size is same as
1965
 * size of kmem_list3.
1966
 */
1967
static void __init set_up_list3s(struct kmem_cache *cachep, int index)
1968
{
1969
        int node;
1970
 
1971
        for_each_online_node(node) {
1972
                cachep->nodelists[node] = &initkmem_list3[index + node];
1973
                cachep->nodelists[node]->next_reap = jiffies +
1974
                    REAPTIMEOUT_LIST3 +
1975
                    ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
1976
        }
1977
}
1978
 
1979
static void __kmem_cache_destroy(struct kmem_cache *cachep)
1980
{
1981
        int i;
1982
        struct kmem_list3 *l3;
1983
 
1984
        for_each_online_cpu(i)
1985
            kfree(cachep->array[i]);
1986
 
1987
        /* NUMA: free the list3 structures */
1988
        for_each_online_node(i) {
1989
                l3 = cachep->nodelists[i];
1990
                if (l3) {
1991
                        kfree(l3->shared);
1992
                        free_alien_cache(l3->alien);
1993
                        kfree(l3);
1994
                }
1995
        }
1996
        kmem_cache_free(&cache_cache, cachep);
1997
}
1998
 
1999
 
2000
/**
2001
 * calculate_slab_order - calculate size (page order) of slabs
2002
 * @cachep: pointer to the cache that is being created
2003
 * @size: size of objects to be created in this cache.
2004
 * @align: required alignment for the objects.
2005
 * @flags: slab allocation flags
2006
 *
2007
 * Also calculates the number of objects per slab.
2008
 *
2009
 * This could be made much more intelligent.  For now, try to avoid using
2010
 * high order pages for slabs.  When the gfp() functions are more friendly
2011
 * towards high-order requests, this should be changed.
2012
 */
2013
static size_t calculate_slab_order(struct kmem_cache *cachep,
2014
                        size_t size, size_t align, unsigned long flags)
2015
{
2016
        unsigned long offslab_limit;
2017
        size_t left_over = 0;
2018
        int gfporder;
2019
 
2020
        for (gfporder = 0; gfporder <= KMALLOC_MAX_ORDER; gfporder++) {
2021
                unsigned int num;
2022
                size_t remainder;
2023
 
2024
                cache_estimate(gfporder, size, align, flags, &remainder, &num);
2025
                if (!num)
2026
                        continue;
2027
 
2028
                if (flags & CFLGS_OFF_SLAB) {
2029
                        /*
2030
                         * Max number of objs-per-slab for caches which
2031
                         * use off-slab slabs. Needed to avoid a possible
2032
                         * looping condition in cache_grow().
2033
                         */
2034
                        offslab_limit = size - sizeof(struct slab);
2035
                        offslab_limit /= sizeof(kmem_bufctl_t);
2036
 
2037
                        if (num > offslab_limit)
2038
                                break;
2039
                }
2040
 
2041
                /* Found something acceptable - save it away */
2042
                cachep->num = num;
2043
                cachep->gfporder = gfporder;
2044
                left_over = remainder;
2045
 
2046
                /*
2047
                 * A VFS-reclaimable slab tends to have most allocations
2048
                 * as GFP_NOFS and we really don't want to have to be allocating
2049
                 * higher-order pages when we are unable to shrink dcache.
2050
                 */
2051
                if (flags & SLAB_RECLAIM_ACCOUNT)
2052
                        break;
2053
 
2054
                /*
2055
                 * Large number of objects is good, but very large slabs are
2056
                 * currently bad for the gfp()s.
2057
                 */
2058
                if (gfporder >= slab_break_gfp_order)
2059
                        break;
2060
 
2061
                /*
2062
                 * Acceptable internal fragmentation?
2063
                 */
2064
                if (left_over * 8 <= (PAGE_SIZE << gfporder))
2065
                        break;
2066
        }
2067
        return left_over;
2068
}
2069
 
2070
static int __init_refok setup_cpu_cache(struct kmem_cache *cachep)
2071
{
2072
        if (g_cpucache_up == FULL)
2073
                return enable_cpucache(cachep);
2074
 
2075
        if (g_cpucache_up == NONE) {
2076
                /*
2077
                 * Note: the first kmem_cache_create must create the cache
2078
                 * that's used by kmalloc(24), otherwise the creation of
2079
                 * further caches will BUG().
2080
                 */
2081
                cachep->array[smp_processor_id()] = &initarray_generic.cache;
2082
 
2083
                /*
2084
                 * If the cache that's used by kmalloc(sizeof(kmem_list3)) is
2085
                 * the first cache, then we need to set up all its list3s,
2086
                 * otherwise the creation of further caches will BUG().
2087
                 */
2088
                set_up_list3s(cachep, SIZE_AC);
2089
                if (INDEX_AC == INDEX_L3)
2090
                        g_cpucache_up = PARTIAL_L3;
2091
                else
2092
                        g_cpucache_up = PARTIAL_AC;
2093
        } else {
2094
                cachep->array[smp_processor_id()] =
2095
                        kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
2096
 
2097
                if (g_cpucache_up == PARTIAL_AC) {
2098
                        set_up_list3s(cachep, SIZE_L3);
2099
                        g_cpucache_up = PARTIAL_L3;
2100
                } else {
2101
                        int node;
2102
                        for_each_node_state(node, N_NORMAL_MEMORY) {
2103
                                cachep->nodelists[node] =
2104
                                    kmalloc_node(sizeof(struct kmem_list3),
2105
                                                GFP_KERNEL, node);
2106
                                BUG_ON(!cachep->nodelists[node]);
2107
                                kmem_list3_init(cachep->nodelists[node]);
2108
                        }
2109
                }
2110
        }
2111
        cachep->nodelists[numa_node_id()]->next_reap =
2112
                        jiffies + REAPTIMEOUT_LIST3 +
2113
                        ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
2114
 
2115
        cpu_cache_get(cachep)->avail = 0;
2116
        cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;
2117
        cpu_cache_get(cachep)->batchcount = 1;
2118
        cpu_cache_get(cachep)->touched = 0;
2119
        cachep->batchcount = 1;
2120
        cachep->limit = BOOT_CPUCACHE_ENTRIES;
2121
        return 0;
2122
}
2123
 
2124
/**
2125
 * kmem_cache_create - Create a cache.
2126
 * @name: A string which is used in /proc/slabinfo to identify this cache.
2127
 * @size: The size of objects to be created in this cache.
2128
 * @align: The required alignment for the objects.
2129
 * @flags: SLAB flags
2130
 * @ctor: A constructor for the objects.
2131
 *
2132
 * Returns a ptr to the cache on success, NULL on failure.
2133
 * Cannot be called within a int, but can be interrupted.
2134
 * The @ctor is run when new pages are allocated by the cache.
2135
 *
2136
 * @name must be valid until the cache is destroyed. This implies that
2137
 * the module calling this has to destroy the cache before getting unloaded.
2138
 *
2139
 * The flags are
2140
 *
2141
 * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
2142
 * to catch references to uninitialised memory.
2143
 *
2144
 * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
2145
 * for buffer overruns.
2146
 *
2147
 * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
2148
 * cacheline.  This can be beneficial if you're counting cycles as closely
2149
 * as davem.
2150
 */
2151
struct kmem_cache *
2152
kmem_cache_create (const char *name, size_t size, size_t align,
2153
        unsigned long flags,
2154
        void (*ctor)(struct kmem_cache *, void *))
2155
{
2156
        size_t left_over, slab_size, ralign;
2157
        struct kmem_cache *cachep = NULL, *pc;
2158
 
2159
        /*
2160
         * Sanity checks... these are all serious usage bugs.
2161
         */
2162
        if (!name || in_interrupt() || (size < BYTES_PER_WORD) ||
2163
            size > KMALLOC_MAX_SIZE) {
2164
                printk(KERN_ERR "%s: Early error in slab %s\n", __FUNCTION__,
2165
                                name);
2166
                BUG();
2167
        }
2168
 
2169
        /*
2170
         * We use cache_chain_mutex to ensure a consistent view of
2171
         * cpu_online_map as well.  Please see cpuup_callback
2172
         */
2173
        mutex_lock(&cache_chain_mutex);
2174
 
2175
        list_for_each_entry(pc, &cache_chain, next) {
2176
                char tmp;
2177
                int res;
2178
 
2179
                /*
2180
                 * This happens when the module gets unloaded and doesn't
2181
                 * destroy its slab cache and no-one else reuses the vmalloc
2182
                 * area of the module.  Print a warning.
2183
                 */
2184
                res = probe_kernel_address(pc->name, tmp);
2185
                if (res) {
2186
                        printk(KERN_ERR
2187
                               "SLAB: cache with size %d has lost its name\n",
2188
                               pc->buffer_size);
2189
                        continue;
2190
                }
2191
 
2192
                if (!strcmp(pc->name, name)) {
2193
                        printk(KERN_ERR
2194
                               "kmem_cache_create: duplicate cache %s\n", name);
2195
                        dump_stack();
2196
                        goto oops;
2197
                }
2198
        }
2199
 
2200
#if DEBUG
2201
        WARN_ON(strchr(name, ' '));     /* It confuses parsers */
2202
#if FORCED_DEBUG
2203
        /*
2204
         * Enable redzoning and last user accounting, except for caches with
2205
         * large objects, if the increased size would increase the object size
2206
         * above the next power of two: caches with object sizes just above a
2207
         * power of two have a significant amount of internal fragmentation.
2208
         */
2209
        if (size < 4096 || fls(size - 1) == fls(size-1 + REDZONE_ALIGN +
2210
                                                2 * sizeof(unsigned long long)))
2211
                flags |= SLAB_RED_ZONE | SLAB_STORE_USER;
2212
        if (!(flags & SLAB_DESTROY_BY_RCU))
2213
                flags |= SLAB_POISON;
2214
#endif
2215
        if (flags & SLAB_DESTROY_BY_RCU)
2216
                BUG_ON(flags & SLAB_POISON);
2217
#endif
2218
        /*
2219
         * Always checks flags, a caller might be expecting debug support which
2220
         * isn't available.
2221
         */
2222
        BUG_ON(flags & ~CREATE_MASK);
2223
 
2224
        /*
2225
         * Check that size is in terms of words.  This is needed to avoid
2226
         * unaligned accesses for some archs when redzoning is used, and makes
2227
         * sure any on-slab bufctl's are also correctly aligned.
2228
         */
2229
        if (size & (BYTES_PER_WORD - 1)) {
2230
                size += (BYTES_PER_WORD - 1);
2231
                size &= ~(BYTES_PER_WORD - 1);
2232
        }
2233
 
2234
        /* calculate the final buffer alignment: */
2235
 
2236
        /* 1) arch recommendation: can be overridden for debug */
2237
        if (flags & SLAB_HWCACHE_ALIGN) {
2238
                /*
2239
                 * Default alignment: as specified by the arch code.  Except if
2240
                 * an object is really small, then squeeze multiple objects into
2241
                 * one cacheline.
2242
                 */
2243
                ralign = cache_line_size();
2244
                while (size <= ralign / 2)
2245
                        ralign /= 2;
2246
        } else {
2247
                ralign = BYTES_PER_WORD;
2248
        }
2249
 
2250
        /*
2251
         * Redzoning and user store require word alignment or possibly larger.
2252
         * Note this will be overridden by architecture or caller mandated
2253
         * alignment if either is greater than BYTES_PER_WORD.
2254
         */
2255
        if (flags & SLAB_STORE_USER)
2256
                ralign = BYTES_PER_WORD;
2257
 
2258
        if (flags & SLAB_RED_ZONE) {
2259
                ralign = REDZONE_ALIGN;
2260
                /* If redzoning, ensure that the second redzone is suitably
2261
                 * aligned, by adjusting the object size accordingly. */
2262
                size += REDZONE_ALIGN - 1;
2263
                size &= ~(REDZONE_ALIGN - 1);
2264
        }
2265
 
2266
        /* 2) arch mandated alignment */
2267
        if (ralign < ARCH_SLAB_MINALIGN) {
2268
                ralign = ARCH_SLAB_MINALIGN;
2269
        }
2270
        /* 3) caller mandated alignment */
2271
        if (ralign < align) {
2272
                ralign = align;
2273
        }
2274
        /* disable debug if necessary */
2275
        if (ralign > __alignof__(unsigned long long))
2276
                flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
2277
        /*
2278
         * 4) Store it.
2279
         */
2280
        align = ralign;
2281
 
2282
        /* Get cache's description obj. */
2283
        cachep = kmem_cache_zalloc(&cache_cache, GFP_KERNEL);
2284
        if (!cachep)
2285
                goto oops;
2286
 
2287
#if DEBUG
2288
        cachep->obj_size = size;
2289
 
2290
        /*
2291
         * Both debugging options require word-alignment which is calculated
2292
         * into align above.
2293
         */
2294
        if (flags & SLAB_RED_ZONE) {
2295
                /* add space for red zone words */
2296
                cachep->obj_offset += sizeof(unsigned long long);
2297
                size += 2 * sizeof(unsigned long long);
2298
        }
2299
        if (flags & SLAB_STORE_USER) {
2300
                /* user store requires one word storage behind the end of
2301
                 * the real object. But if the second red zone needs to be
2302
                 * aligned to 64 bits, we must allow that much space.
2303
                 */
2304
                if (flags & SLAB_RED_ZONE)
2305
                        size += REDZONE_ALIGN;
2306
                else
2307
                        size += BYTES_PER_WORD;
2308
        }
2309
#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
2310
        if (size >= malloc_sizes[INDEX_L3 + 1].cs_size
2311
            && cachep->obj_size > cache_line_size() && size < PAGE_SIZE) {
2312
                cachep->obj_offset += PAGE_SIZE - size;
2313
                size = PAGE_SIZE;
2314
        }
2315
#endif
2316
#endif
2317
 
2318
        /*
2319
         * Determine if the slab management is 'on' or 'off' slab.
2320
         * (bootstrapping cannot cope with offslab caches so don't do
2321
         * it too early on.)
2322
         */
2323
        if ((size >= (PAGE_SIZE >> 3)) && !slab_early_init)
2324
                /*
2325
                 * Size is large, assume best to place the slab management obj
2326
                 * off-slab (should allow better packing of objs).
2327
                 */
2328
                flags |= CFLGS_OFF_SLAB;
2329
 
2330
        size = ALIGN(size, align);
2331
 
2332
        left_over = calculate_slab_order(cachep, size, align, flags);
2333
 
2334
        if (!cachep->num) {
2335
                printk(KERN_ERR
2336
                       "kmem_cache_create: couldn't create cache %s.\n", name);
2337
                kmem_cache_free(&cache_cache, cachep);
2338
                cachep = NULL;
2339
                goto oops;
2340
        }
2341
        slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t)
2342
                          + sizeof(struct slab), align);
2343
 
2344
        /*
2345
         * If the slab has been placed off-slab, and we have enough space then
2346
         * move it on-slab. This is at the expense of any extra colouring.
2347
         */
2348
        if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {
2349
                flags &= ~CFLGS_OFF_SLAB;
2350
                left_over -= slab_size;
2351
        }
2352
 
2353
        if (flags & CFLGS_OFF_SLAB) {
2354
                /* really off slab. No need for manual alignment */
2355
                slab_size =
2356
                    cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab);
2357
        }
2358
 
2359
        cachep->colour_off = cache_line_size();
2360
        /* Offset must be a multiple of the alignment. */
2361
        if (cachep->colour_off < align)
2362
                cachep->colour_off = align;
2363
        cachep->colour = left_over / cachep->colour_off;
2364
        cachep->slab_size = slab_size;
2365
        cachep->flags = flags;
2366
        cachep->gfpflags = 0;
2367
        if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA))
2368
                cachep->gfpflags |= GFP_DMA;
2369
        cachep->buffer_size = size;
2370
        cachep->reciprocal_buffer_size = reciprocal_value(size);
2371
 
2372
        if (flags & CFLGS_OFF_SLAB) {
2373
                cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u);
2374
                /*
2375
                 * This is a possibility for one of the malloc_sizes caches.
2376
                 * But since we go off slab only for object size greater than
2377
                 * PAGE_SIZE/8, and malloc_sizes gets created in ascending order,
2378
                 * this should not happen at all.
2379
                 * But leave a BUG_ON for some lucky dude.
2380
                 */
2381
                BUG_ON(ZERO_OR_NULL_PTR(cachep->slabp_cache));
2382
        }
2383
        cachep->ctor = ctor;
2384
        cachep->name = name;
2385
 
2386
        if (setup_cpu_cache(cachep)) {
2387
                __kmem_cache_destroy(cachep);
2388
                cachep = NULL;
2389
                goto oops;
2390
        }
2391
 
2392
        /* cache setup completed, link it into the list */
2393
        list_add(&cachep->next, &cache_chain);
2394
oops:
2395
        if (!cachep && (flags & SLAB_PANIC))
2396
                panic("kmem_cache_create(): failed to create slab `%s'\n",
2397
                      name);
2398
        mutex_unlock(&cache_chain_mutex);
2399
        return cachep;
2400
}
2401
EXPORT_SYMBOL(kmem_cache_create);
2402
 
2403
#if DEBUG
2404
static void check_irq_off(void)
2405
{
2406
        BUG_ON(!irqs_disabled());
2407
}
2408
 
2409
static void check_irq_on(void)
2410
{
2411
        BUG_ON(irqs_disabled());
2412
}
2413
 
2414
static void check_spinlock_acquired(struct kmem_cache *cachep)
2415
{
2416
#ifdef CONFIG_SMP
2417
        check_irq_off();
2418
        assert_spin_locked(&cachep->nodelists[numa_node_id()]->list_lock);
2419
#endif
2420
}
2421
 
2422
static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
2423
{
2424
#ifdef CONFIG_SMP
2425
        check_irq_off();
2426
        assert_spin_locked(&cachep->nodelists[node]->list_lock);
2427
#endif
2428
}
2429
 
2430
#else
2431
#define check_irq_off() do { } while(0)
2432
#define check_irq_on()  do { } while(0)
2433
#define check_spinlock_acquired(x) do { } while(0)
2434
#define check_spinlock_acquired_node(x, y) do { } while(0)
2435
#endif
2436
 
2437
static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
2438
                        struct array_cache *ac,
2439
                        int force, int node);
2440
 
2441
static void do_drain(void *arg)
2442
{
2443
        struct kmem_cache *cachep = arg;
2444
        struct array_cache *ac;
2445
        int node = numa_node_id();
2446
 
2447
        check_irq_off();
2448
        ac = cpu_cache_get(cachep);
2449
        spin_lock(&cachep->nodelists[node]->list_lock);
2450
        free_block(cachep, ac->entry, ac->avail, node);
2451
        spin_unlock(&cachep->nodelists[node]->list_lock);
2452
        ac->avail = 0;
2453
}
2454
 
2455
static void drain_cpu_caches(struct kmem_cache *cachep)
2456
{
2457
        struct kmem_list3 *l3;
2458
        int node;
2459
 
2460
        on_each_cpu(do_drain, cachep, 1, 1);
2461
        check_irq_on();
2462
        for_each_online_node(node) {
2463
                l3 = cachep->nodelists[node];
2464
                if (l3 && l3->alien)
2465
                        drain_alien_cache(cachep, l3->alien);
2466
        }
2467
 
2468
        for_each_online_node(node) {
2469
                l3 = cachep->nodelists[node];
2470
                if (l3)
2471
                        drain_array(cachep, l3, l3->shared, 1, node);
2472
        }
2473
}
2474
 
2475
/*
2476
 * Remove slabs from the list of free slabs.
2477
 * Specify the number of slabs to drain in tofree.
2478
 *
2479
 * Returns the actual number of slabs released.
2480
 */
2481
static int drain_freelist(struct kmem_cache *cache,
2482
                        struct kmem_list3 *l3, int tofree)
2483
{
2484
        struct list_head *p;
2485
        int nr_freed;
2486
        struct slab *slabp;
2487
 
2488
        nr_freed = 0;
2489
        while (nr_freed < tofree && !list_empty(&l3->slabs_free)) {
2490
 
2491
                spin_lock_irq(&l3->list_lock);
2492
                p = l3->slabs_free.prev;
2493
                if (p == &l3->slabs_free) {
2494
                        spin_unlock_irq(&l3->list_lock);
2495
                        goto out;
2496
                }
2497
 
2498
                slabp = list_entry(p, struct slab, list);
2499
#if DEBUG
2500
                BUG_ON(slabp->inuse);
2501
#endif
2502
                list_del(&slabp->list);
2503
                /*
2504
                 * Safe to drop the lock. The slab is no longer linked
2505
                 * to the cache.
2506
                 */
2507
                l3->free_objects -= cache->num;
2508
                spin_unlock_irq(&l3->list_lock);
2509
                slab_destroy(cache, slabp);
2510
                nr_freed++;
2511
        }
2512
out:
2513
        return nr_freed;
2514
}
2515
 
2516
/* Called with cache_chain_mutex held to protect against cpu hotplug */
2517
static int __cache_shrink(struct kmem_cache *cachep)
2518
{
2519
        int ret = 0, i = 0;
2520
        struct kmem_list3 *l3;
2521
 
2522
        drain_cpu_caches(cachep);
2523
 
2524
        check_irq_on();
2525
        for_each_online_node(i) {
2526
                l3 = cachep->nodelists[i];
2527
                if (!l3)
2528
                        continue;
2529
 
2530
                drain_freelist(cachep, l3, l3->free_objects);
2531
 
2532
                ret += !list_empty(&l3->slabs_full) ||
2533
                        !list_empty(&l3->slabs_partial);
2534
        }
2535
        return (ret ? 1 : 0);
2536
}
2537
 
2538
/**
2539
 * kmem_cache_shrink - Shrink a cache.
2540
 * @cachep: The cache to shrink.
2541
 *
2542
 * Releases as many slabs as possible for a cache.
2543
 * To help debugging, a zero exit status indicates all slabs were released.
2544
 */
2545
int kmem_cache_shrink(struct kmem_cache *cachep)
2546
{
2547
        int ret;
2548
        BUG_ON(!cachep || in_interrupt());
2549
 
2550
        mutex_lock(&cache_chain_mutex);
2551
        ret = __cache_shrink(cachep);
2552
        mutex_unlock(&cache_chain_mutex);
2553
        return ret;
2554
}
2555
EXPORT_SYMBOL(kmem_cache_shrink);
2556
 
2557
/**
2558
 * kmem_cache_destroy - delete a cache
2559
 * @cachep: the cache to destroy
2560
 *
2561
 * Remove a &struct kmem_cache object from the slab cache.
2562
 *
2563
 * It is expected this function will be called by a module when it is
2564
 * unloaded.  This will remove the cache completely, and avoid a duplicate
2565
 * cache being allocated each time a module is loaded and unloaded, if the
2566
 * module doesn't have persistent in-kernel storage across loads and unloads.
2567
 *
2568
 * The cache must be empty before calling this function.
2569
 *
2570
 * The caller must guarantee that noone will allocate memory from the cache
2571
 * during the kmem_cache_destroy().
2572
 */
2573
void kmem_cache_destroy(struct kmem_cache *cachep)
2574
{
2575
        BUG_ON(!cachep || in_interrupt());
2576
 
2577
        /* Find the cache in the chain of caches. */
2578
        mutex_lock(&cache_chain_mutex);
2579
        /*
2580
         * the chain is never empty, cache_cache is never destroyed
2581
         */
2582
        list_del(&cachep->next);
2583
        if (__cache_shrink(cachep)) {
2584
                slab_error(cachep, "Can't free all objects");
2585
                list_add(&cachep->next, &cache_chain);
2586
                mutex_unlock(&cache_chain_mutex);
2587
                return;
2588
        }
2589
 
2590
        if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU))
2591
                synchronize_rcu();
2592
 
2593
        __kmem_cache_destroy(cachep);
2594
        mutex_unlock(&cache_chain_mutex);
2595
}
2596
EXPORT_SYMBOL(kmem_cache_destroy);
2597
 
2598
/*
2599
 * Get the memory for a slab management obj.
2600
 * For a slab cache when the slab descriptor is off-slab, slab descriptors
2601
 * always come from malloc_sizes caches.  The slab descriptor cannot
2602
 * come from the same cache which is getting created because,
2603
 * when we are searching for an appropriate cache for these
2604
 * descriptors in kmem_cache_create, we search through the malloc_sizes array.
2605
 * If we are creating a malloc_sizes cache here it would not be visible to
2606
 * kmem_find_general_cachep till the initialization is complete.
2607
 * Hence we cannot have slabp_cache same as the original cache.
2608
 */
2609
static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
2610
                                   int colour_off, gfp_t local_flags,
2611
                                   int nodeid)
2612
{
2613
        struct slab *slabp;
2614
 
2615
        if (OFF_SLAB(cachep)) {
2616
                /* Slab management obj is off-slab. */
2617
                slabp = kmem_cache_alloc_node(cachep->slabp_cache,
2618
                                              local_flags & ~GFP_THISNODE, nodeid);
2619
                if (!slabp)
2620
                        return NULL;
2621
        } else {
2622
                slabp = objp + colour_off;
2623
                colour_off += cachep->slab_size;
2624
        }
2625
        slabp->inuse = 0;
2626
        slabp->colouroff = colour_off;
2627
        slabp->s_mem = objp + colour_off;
2628
        slabp->nodeid = nodeid;
2629
        return slabp;
2630
}
2631
 
2632
static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp)
2633
{
2634
        return (kmem_bufctl_t *) (slabp + 1);
2635
}
2636
 
2637
static void cache_init_objs(struct kmem_cache *cachep,
2638
                            struct slab *slabp)
2639
{
2640
        int i;
2641
 
2642
        for (i = 0; i < cachep->num; i++) {
2643
                void *objp = index_to_obj(cachep, slabp, i);
2644
#if DEBUG
2645
                /* need to poison the objs? */
2646
                if (cachep->flags & SLAB_POISON)
2647
                        poison_obj(cachep, objp, POISON_FREE);
2648
                if (cachep->flags & SLAB_STORE_USER)
2649
                        *dbg_userword(cachep, objp) = NULL;
2650
 
2651
                if (cachep->flags & SLAB_RED_ZONE) {
2652
                        *dbg_redzone1(cachep, objp) = RED_INACTIVE;
2653
                        *dbg_redzone2(cachep, objp) = RED_INACTIVE;
2654
                }
2655
                /*
2656
                 * Constructors are not allowed to allocate memory from the same
2657
                 * cache which they are a constructor for.  Otherwise, deadlock.
2658
                 * They must also be threaded.
2659
                 */
2660
                if (cachep->ctor && !(cachep->flags & SLAB_POISON))
2661
                        cachep->ctor(cachep, objp + obj_offset(cachep));
2662
 
2663
                if (cachep->flags & SLAB_RED_ZONE) {
2664
                        if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
2665
                                slab_error(cachep, "constructor overwrote the"
2666
                                           " end of an object");
2667
                        if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
2668
                                slab_error(cachep, "constructor overwrote the"
2669
                                           " start of an object");
2670
                }
2671
                if ((cachep->buffer_size % PAGE_SIZE) == 0 &&
2672
                            OFF_SLAB(cachep) && cachep->flags & SLAB_POISON)
2673
                        kernel_map_pages(virt_to_page(objp),
2674
                                         cachep->buffer_size / PAGE_SIZE, 0);
2675
#else
2676
                if (cachep->ctor)
2677
                        cachep->ctor(cachep, objp);
2678
#endif
2679
                slab_bufctl(slabp)[i] = i + 1;
2680
        }
2681
        slab_bufctl(slabp)[i - 1] = BUFCTL_END;
2682
        slabp->free = 0;
2683
}
2684
 
2685
static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)
2686
{
2687
        if (CONFIG_ZONE_DMA_FLAG) {
2688
                if (flags & GFP_DMA)
2689
                        BUG_ON(!(cachep->gfpflags & GFP_DMA));
2690
                else
2691
                        BUG_ON(cachep->gfpflags & GFP_DMA);
2692
        }
2693
}
2694
 
2695
static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp,
2696
                                int nodeid)
2697
{
2698
        void *objp = index_to_obj(cachep, slabp, slabp->free);
2699
        kmem_bufctl_t next;
2700
 
2701
        slabp->inuse++;
2702
        next = slab_bufctl(slabp)[slabp->free];
2703
#if DEBUG
2704
        slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
2705
        WARN_ON(slabp->nodeid != nodeid);
2706
#endif
2707
        slabp->free = next;
2708
 
2709
        return objp;
2710
}
2711
 
2712
static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp,
2713
                                void *objp, int nodeid)
2714
{
2715
        unsigned int objnr = obj_to_index(cachep, slabp, objp);
2716
 
2717
#if DEBUG
2718
        /* Verify that the slab belongs to the intended node */
2719
        WARN_ON(slabp->nodeid != nodeid);
2720
 
2721
        if (slab_bufctl(slabp)[objnr] + 1 <= SLAB_LIMIT + 1) {
2722
                printk(KERN_ERR "slab: double free detected in cache "
2723
                                "'%s', objp %p\n", cachep->name, objp);
2724
                BUG();
2725
        }
2726
#endif
2727
        slab_bufctl(slabp)[objnr] = slabp->free;
2728
        slabp->free = objnr;
2729
        slabp->inuse--;
2730
}
2731
 
2732
/*
2733
 * Map pages beginning at addr to the given cache and slab. This is required
2734
 * for the slab allocator to be able to lookup the cache and slab of a
2735
 * virtual address for kfree, ksize, kmem_ptr_validate, and slab debugging.
2736
 */
2737
static void slab_map_pages(struct kmem_cache *cache, struct slab *slab,
2738
                           void *addr)
2739
{
2740
        int nr_pages;
2741
        struct page *page;
2742
 
2743
        page = virt_to_page(addr);
2744
 
2745
        nr_pages = 1;
2746
        if (likely(!PageCompound(page)))
2747
                nr_pages <<= cache->gfporder;
2748
 
2749
        do {
2750
                page_set_cache(page, cache);
2751
                page_set_slab(page, slab);
2752
                page++;
2753
        } while (--nr_pages);
2754
}
2755
 
2756
/*
2757
 * Grow (by 1) the number of slabs within a cache.  This is called by
2758
 * kmem_cache_alloc() when there are no active objs left in a cache.
2759
 */
2760
static int cache_grow(struct kmem_cache *cachep,
2761
                gfp_t flags, int nodeid, void *objp)
2762
{
2763
        struct slab *slabp;
2764
        size_t offset;
2765
        gfp_t local_flags;
2766
        struct kmem_list3 *l3;
2767
 
2768
        /*
2769
         * Be lazy and only check for valid flags here,  keeping it out of the
2770
         * critical path in kmem_cache_alloc().
2771
         */
2772
        BUG_ON(flags & GFP_SLAB_BUG_MASK);
2773
        local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
2774
 
2775
        /* Take the l3 list lock to change the colour_next on this node */
2776
        check_irq_off();
2777
        l3 = cachep->nodelists[nodeid];
2778
        spin_lock(&l3->list_lock);
2779
 
2780
        /* Get colour for the slab, and cal the next value. */
2781
        offset = l3->colour_next;
2782
        l3->colour_next++;
2783
        if (l3->colour_next >= cachep->colour)
2784
                l3->colour_next = 0;
2785
        spin_unlock(&l3->list_lock);
2786
 
2787
        offset *= cachep->colour_off;
2788
 
2789
        if (local_flags & __GFP_WAIT)
2790
                local_irq_enable();
2791
 
2792
        /*
2793
         * The test for missing atomic flag is performed here, rather than
2794
         * the more obvious place, simply to reduce the critical path length
2795
         * in kmem_cache_alloc(). If a caller is seriously mis-behaving they
2796
         * will eventually be caught here (where it matters).
2797
         */
2798
        kmem_flagcheck(cachep, flags);
2799
 
2800
        /*
2801
         * Get mem for the objs.  Attempt to allocate a physical page from
2802
         * 'nodeid'.
2803
         */
2804
        if (!objp)
2805
                objp = kmem_getpages(cachep, local_flags, nodeid);
2806
        if (!objp)
2807
                goto failed;
2808
 
2809
        /* Get slab management. */
2810
        slabp = alloc_slabmgmt(cachep, objp, offset,
2811
                        local_flags & ~GFP_CONSTRAINT_MASK, nodeid);
2812
        if (!slabp)
2813
                goto opps1;
2814
 
2815
        slabp->nodeid = nodeid;
2816
        slab_map_pages(cachep, slabp, objp);
2817
 
2818
        cache_init_objs(cachep, slabp);
2819
 
2820
        if (local_flags & __GFP_WAIT)
2821
                local_irq_disable();
2822
        check_irq_off();
2823
        spin_lock(&l3->list_lock);
2824
 
2825
        /* Make slab active. */
2826
        list_add_tail(&slabp->list, &(l3->slabs_free));
2827
        STATS_INC_GROWN(cachep);
2828
        l3->free_objects += cachep->num;
2829
        spin_unlock(&l3->list_lock);
2830
        return 1;
2831
opps1:
2832
        kmem_freepages(cachep, objp);
2833
failed:
2834
        if (local_flags & __GFP_WAIT)
2835
                local_irq_disable();
2836
        return 0;
2837
}
2838
 
2839
#if DEBUG
2840
 
2841
/*
2842
 * Perform extra freeing checks:
2843
 * - detect bad pointers.
2844
 * - POISON/RED_ZONE checking
2845
 */
2846
static void kfree_debugcheck(const void *objp)
2847
{
2848
        if (!virt_addr_valid(objp)) {
2849
                printk(KERN_ERR "kfree_debugcheck: out of range ptr %lxh.\n",
2850
                       (unsigned long)objp);
2851
                BUG();
2852
        }
2853
}
2854
 
2855
static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)
2856
{
2857
        unsigned long long redzone1, redzone2;
2858
 
2859
        redzone1 = *dbg_redzone1(cache, obj);
2860
        redzone2 = *dbg_redzone2(cache, obj);
2861
 
2862
        /*
2863
         * Redzone is ok.
2864
         */
2865
        if (redzone1 == RED_ACTIVE && redzone2 == RED_ACTIVE)
2866
                return;
2867
 
2868
        if (redzone1 == RED_INACTIVE && redzone2 == RED_INACTIVE)
2869
                slab_error(cache, "double free detected");
2870
        else
2871
                slab_error(cache, "memory outside object was overwritten");
2872
 
2873
        printk(KERN_ERR "%p: redzone 1:0x%llx, redzone 2:0x%llx.\n",
2874
                        obj, redzone1, redzone2);
2875
}
2876
 
2877
static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
2878
                                   void *caller)
2879
{
2880
        struct page *page;
2881
        unsigned int objnr;
2882
        struct slab *slabp;
2883
 
2884
        BUG_ON(virt_to_cache(objp) != cachep);
2885
 
2886
        objp -= obj_offset(cachep);
2887
        kfree_debugcheck(objp);
2888
        page = virt_to_head_page(objp);
2889
 
2890
        slabp = page_get_slab(page);
2891
 
2892
        if (cachep->flags & SLAB_RED_ZONE) {
2893
                verify_redzone_free(cachep, objp);
2894
                *dbg_redzone1(cachep, objp) = RED_INACTIVE;
2895
                *dbg_redzone2(cachep, objp) = RED_INACTIVE;
2896
        }
2897
        if (cachep->flags & SLAB_STORE_USER)
2898
                *dbg_userword(cachep, objp) = caller;
2899
 
2900
        objnr = obj_to_index(cachep, slabp, objp);
2901
 
2902
        BUG_ON(objnr >= cachep->num);
2903
        BUG_ON(objp != index_to_obj(cachep, slabp, objnr));
2904
 
2905
#ifdef CONFIG_DEBUG_SLAB_LEAK
2906
        slab_bufctl(slabp)[objnr] = BUFCTL_FREE;
2907
#endif
2908
        if (cachep->flags & SLAB_POISON) {
2909
#ifdef CONFIG_DEBUG_PAGEALLOC
2910
                if ((cachep->buffer_size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) {
2911
                        store_stackinfo(cachep, objp, (unsigned long)caller);
2912
                        kernel_map_pages(virt_to_page(objp),
2913
                                         cachep->buffer_size / PAGE_SIZE, 0);
2914
                } else {
2915
                        poison_obj(cachep, objp, POISON_FREE);
2916
                }
2917
#else
2918
                poison_obj(cachep, objp, POISON_FREE);
2919
#endif
2920
        }
2921
        return objp;
2922
}
2923
 
2924
static void check_slabp(struct kmem_cache *cachep, struct slab *slabp)
2925
{
2926
        kmem_bufctl_t i;
2927
        int entries = 0;
2928
 
2929
        /* Check slab's freelist to see if this obj is there. */
2930
        for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {
2931
                entries++;
2932
                if (entries > cachep->num || i >= cachep->num)
2933
                        goto bad;
2934
        }
2935
        if (entries != cachep->num - slabp->inuse) {
2936
bad:
2937
                printk(KERN_ERR "slab: Internal list corruption detected in "
2938
                                "cache '%s'(%d), slabp %p(%d). Hexdump:\n",
2939
                        cachep->name, cachep->num, slabp, slabp->inuse);
2940
                for (i = 0;
2941
                     i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t);
2942
                     i++) {
2943
                        if (i % 16 == 0)
2944
                                printk("\n%03x:", i);
2945
                        printk(" %02x", ((unsigned char *)slabp)[i]);
2946
                }
2947
                printk("\n");
2948
                BUG();
2949
        }
2950
}
2951
#else
2952
#define kfree_debugcheck(x) do { } while(0)
2953
#define cache_free_debugcheck(x,objp,z) (objp)
2954
#define check_slabp(x,y) do { } while(0)
2955
#endif
2956
 
2957
static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
2958
{
2959
        int batchcount;
2960
        struct kmem_list3 *l3;
2961
        struct array_cache *ac;
2962
        int node;
2963
 
2964
        node = numa_node_id();
2965
 
2966
        check_irq_off();
2967
        ac = cpu_cache_get(cachep);
2968
retry:
2969
        batchcount = ac->batchcount;
2970
        if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
2971
                /*
2972
                 * If there was little recent activity on this cache, then
2973
                 * perform only a partial refill.  Otherwise we could generate
2974
                 * refill bouncing.
2975
                 */
2976
                batchcount = BATCHREFILL_LIMIT;
2977
        }
2978
        l3 = cachep->nodelists[node];
2979
 
2980
        BUG_ON(ac->avail > 0 || !l3);
2981
        spin_lock(&l3->list_lock);
2982
 
2983
        /* See if we can refill from the shared array */
2984
        if (l3->shared && transfer_objects(ac, l3->shared, batchcount))
2985
                goto alloc_done;
2986
 
2987
        while (batchcount > 0) {
2988
                struct list_head *entry;
2989
                struct slab *slabp;
2990
                /* Get slab alloc is to come from. */
2991
                entry = l3->slabs_partial.next;
2992
                if (entry == &l3->slabs_partial) {
2993
                        l3->free_touched = 1;
2994
                        entry = l3->slabs_free.next;
2995
                        if (entry == &l3->slabs_free)
2996
                                goto must_grow;
2997
                }
2998
 
2999
                slabp = list_entry(entry, struct slab, list);
3000
                check_slabp(cachep, slabp);
3001
                check_spinlock_acquired(cachep);
3002
 
3003
                /*
3004
                 * The slab was either on partial or free list so
3005
                 * there must be at least one object available for
3006
                 * allocation.
3007
                 */
3008
                BUG_ON(slabp->inuse < 0 || slabp->inuse >= cachep->num);
3009
 
3010
                while (slabp->inuse < cachep->num && batchcount--) {
3011
                        STATS_INC_ALLOCED(cachep);
3012
                        STATS_INC_ACTIVE(cachep);
3013
                        STATS_SET_HIGH(cachep);
3014
 
3015
                        ac->entry[ac->avail++] = slab_get_obj(cachep, slabp,
3016
                                                            node);
3017
                }
3018
                check_slabp(cachep, slabp);
3019
 
3020
                /* move slabp to correct slabp list: */
3021
                list_del(&slabp->list);
3022
                if (slabp->free == BUFCTL_END)
3023
                        list_add(&slabp->list, &l3->slabs_full);
3024
                else
3025
                        list_add(&slabp->list, &l3->slabs_partial);
3026
        }
3027
 
3028
must_grow:
3029
        l3->free_objects -= ac->avail;
3030
alloc_done:
3031
        spin_unlock(&l3->list_lock);
3032
 
3033
        if (unlikely(!ac->avail)) {
3034
                int x;
3035
                x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);
3036
 
3037
                /* cache_grow can reenable interrupts, then ac could change. */
3038
                ac = cpu_cache_get(cachep);
3039
                if (!x && ac->avail == 0)        /* no objects in sight? abort */
3040
                        return NULL;
3041
 
3042
                if (!ac->avail)         /* objects refilled by interrupt? */
3043
                        goto retry;
3044
        }
3045
        ac->touched = 1;
3046
        return ac->entry[--ac->avail];
3047
}
3048
 
3049
static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep,
3050
                                                gfp_t flags)
3051
{
3052
        might_sleep_if(flags & __GFP_WAIT);
3053
#if DEBUG
3054
        kmem_flagcheck(cachep, flags);
3055
#endif
3056
}
3057
 
3058
#if DEBUG
3059
static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
3060
                                gfp_t flags, void *objp, void *caller)
3061
{
3062
        if (!objp)
3063
                return objp;
3064
        if (cachep->flags & SLAB_POISON) {
3065
#ifdef CONFIG_DEBUG_PAGEALLOC
3066
                if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep))
3067
                        kernel_map_pages(virt_to_page(objp),
3068
                                         cachep->buffer_size / PAGE_SIZE, 1);
3069
                else
3070
                        check_poison_obj(cachep, objp);
3071
#else
3072
                check_poison_obj(cachep, objp);
3073
#endif
3074
                poison_obj(cachep, objp, POISON_INUSE);
3075
        }
3076
        if (cachep->flags & SLAB_STORE_USER)
3077
                *dbg_userword(cachep, objp) = caller;
3078
 
3079
        if (cachep->flags & SLAB_RED_ZONE) {
3080
                if (*dbg_redzone1(cachep, objp) != RED_INACTIVE ||
3081
                                *dbg_redzone2(cachep, objp) != RED_INACTIVE) {
3082
                        slab_error(cachep, "double free, or memory outside"
3083
                                                " object was overwritten");
3084
                        printk(KERN_ERR
3085
                                "%p: redzone 1:0x%llx, redzone 2:0x%llx\n",
3086
                                objp, *dbg_redzone1(cachep, objp),
3087
                                *dbg_redzone2(cachep, objp));
3088
                }
3089
                *dbg_redzone1(cachep, objp) = RED_ACTIVE;
3090
                *dbg_redzone2(cachep, objp) = RED_ACTIVE;
3091
        }
3092
#ifdef CONFIG_DEBUG_SLAB_LEAK
3093
        {
3094
                struct slab *slabp;
3095
                unsigned objnr;
3096
 
3097
                slabp = page_get_slab(virt_to_head_page(objp));
3098
                objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size;
3099
                slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE;
3100
        }
3101
#endif
3102
        objp += obj_offset(cachep);
3103
        if (cachep->ctor && cachep->flags & SLAB_POISON)
3104
                cachep->ctor(cachep, objp);
3105
#if ARCH_SLAB_MINALIGN
3106
        if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) {
3107
                printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n",
3108
                       objp, ARCH_SLAB_MINALIGN);
3109
        }
3110
#endif
3111
        return objp;
3112
}
3113
#else
3114
#define cache_alloc_debugcheck_after(a,b,objp,d) (objp)
3115
#endif
3116
 
3117
#ifdef CONFIG_FAILSLAB
3118
 
3119
static struct failslab_attr {
3120
 
3121
        struct fault_attr attr;
3122
 
3123
        u32 ignore_gfp_wait;
3124
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
3125
        struct dentry *ignore_gfp_wait_file;
3126
#endif
3127
 
3128
} failslab = {
3129
        .attr = FAULT_ATTR_INITIALIZER,
3130
        .ignore_gfp_wait = 1,
3131
};
3132
 
3133
static int __init setup_failslab(char *str)
3134
{
3135
        return setup_fault_attr(&failslab.attr, str);
3136
}
3137
__setup("failslab=", setup_failslab);
3138
 
3139
static int should_failslab(struct kmem_cache *cachep, gfp_t flags)
3140
{
3141
        if (cachep == &cache_cache)
3142
                return 0;
3143
        if (flags & __GFP_NOFAIL)
3144
                return 0;
3145
        if (failslab.ignore_gfp_wait && (flags & __GFP_WAIT))
3146
                return 0;
3147
 
3148
        return should_fail(&failslab.attr, obj_size(cachep));
3149
}
3150
 
3151
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
3152
 
3153
static int __init failslab_debugfs(void)
3154
{
3155
        mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
3156
        struct dentry *dir;
3157
        int err;
3158
 
3159
        err = init_fault_attr_dentries(&failslab.attr, "failslab");
3160
        if (err)
3161
                return err;
3162
        dir = failslab.attr.dentries.dir;
3163
 
3164
        failslab.ignore_gfp_wait_file =
3165
                debugfs_create_bool("ignore-gfp-wait", mode, dir,
3166
                                      &failslab.ignore_gfp_wait);
3167
 
3168
        if (!failslab.ignore_gfp_wait_file) {
3169
                err = -ENOMEM;
3170
                debugfs_remove(failslab.ignore_gfp_wait_file);
3171
                cleanup_fault_attr_dentries(&failslab.attr);
3172
        }
3173
 
3174
        return err;
3175
}
3176
 
3177
late_initcall(failslab_debugfs);
3178
 
3179
#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
3180
 
3181
#else /* CONFIG_FAILSLAB */
3182
 
3183
static inline int should_failslab(struct kmem_cache *cachep, gfp_t flags)
3184
{
3185
        return 0;
3186
}
3187
 
3188
#endif /* CONFIG_FAILSLAB */
3189
 
3190
static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3191
{
3192
        void *objp;
3193
        struct array_cache *ac;
3194
 
3195
        check_irq_off();
3196
 
3197
        ac = cpu_cache_get(cachep);
3198
        if (likely(ac->avail)) {
3199
                STATS_INC_ALLOCHIT(cachep);
3200
                ac->touched = 1;
3201
                objp = ac->entry[--ac->avail];
3202
        } else {
3203
                STATS_INC_ALLOCMISS(cachep);
3204
                objp = cache_alloc_refill(cachep, flags);
3205
        }
3206
        return objp;
3207
}
3208
 
3209
#ifdef CONFIG_NUMA
3210
/*
3211
 * Try allocating on another node if PF_SPREAD_SLAB|PF_MEMPOLICY.
3212
 *
3213
 * If we are in_interrupt, then process context, including cpusets and
3214
 * mempolicy, may not apply and should not be used for allocation policy.
3215
 */
3216
static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
3217
{
3218
        int nid_alloc, nid_here;
3219
 
3220
        if (in_interrupt() || (flags & __GFP_THISNODE))
3221
                return NULL;
3222
        nid_alloc = nid_here = numa_node_id();
3223
        if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
3224
                nid_alloc = cpuset_mem_spread_node();
3225
        else if (current->mempolicy)
3226
                nid_alloc = slab_node(current->mempolicy);
3227
        if (nid_alloc != nid_here)
3228
                return ____cache_alloc_node(cachep, flags, nid_alloc);
3229
        return NULL;
3230
}
3231
 
3232
/*
3233
 * Fallback function if there was no memory available and no objects on a
3234
 * certain node and fall back is permitted. First we scan all the
3235
 * available nodelists for available objects. If that fails then we
3236
 * perform an allocation without specifying a node. This allows the page
3237
 * allocator to do its reclaim / fallback magic. We then insert the
3238
 * slab into the proper nodelist and then allocate from it.
3239
 */
3240
static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
3241
{
3242
        struct zonelist *zonelist;
3243
        gfp_t local_flags;
3244
        struct zone **z;
3245
        void *obj = NULL;
3246
        int nid;
3247
 
3248
        if (flags & __GFP_THISNODE)
3249
                return NULL;
3250
 
3251
        zonelist = &NODE_DATA(slab_node(current->mempolicy))
3252
                        ->node_zonelists[gfp_zone(flags)];
3253
        local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
3254
 
3255
retry:
3256
        /*
3257
         * Look through allowed nodes for objects available
3258
         * from existing per node queues.
3259
         */
3260
        for (z = zonelist->zones; *z && !obj; z++) {
3261
                nid = zone_to_nid(*z);
3262
 
3263
                if (cpuset_zone_allowed_hardwall(*z, flags) &&
3264
                        cache->nodelists[nid] &&
3265
                        cache->nodelists[nid]->free_objects)
3266
                                obj = ____cache_alloc_node(cache,
3267
                                        flags | GFP_THISNODE, nid);
3268
        }
3269
 
3270
        if (!obj) {
3271
                /*
3272
                 * This allocation will be performed within the constraints
3273
                 * of the current cpuset / memory policy requirements.
3274
                 * We may trigger various forms of reclaim on the allowed
3275
                 * set and go into memory reserves if necessary.
3276
                 */
3277
                if (local_flags & __GFP_WAIT)
3278
                        local_irq_enable();
3279
                kmem_flagcheck(cache, flags);
3280
                obj = kmem_getpages(cache, flags, -1);
3281
                if (local_flags & __GFP_WAIT)
3282
                        local_irq_disable();
3283
                if (obj) {
3284
                        /*
3285
                         * Insert into the appropriate per node queues
3286
                         */
3287
                        nid = page_to_nid(virt_to_page(obj));
3288
                        if (cache_grow(cache, flags, nid, obj)) {
3289
                                obj = ____cache_alloc_node(cache,
3290
                                        flags | GFP_THISNODE, nid);
3291
                                if (!obj)
3292
                                        /*
3293
                                         * Another processor may allocate the
3294
                                         * objects in the slab since we are
3295
                                         * not holding any locks.
3296
                                         */
3297
                                        goto retry;
3298
                        } else {
3299
                                /* cache_grow already freed obj */
3300
                                obj = NULL;
3301
                        }
3302
                }
3303
        }
3304
        return obj;
3305
}
3306
 
3307
/*
3308
 * A interface to enable slab creation on nodeid
3309
 */
3310
static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
3311
                                int nodeid)
3312
{
3313
        struct list_head *entry;
3314
        struct slab *slabp;
3315
        struct kmem_list3 *l3;
3316
        void *obj;
3317
        int x;
3318
 
3319
        l3 = cachep->nodelists[nodeid];
3320
        BUG_ON(!l3);
3321
 
3322
retry:
3323
        check_irq_off();
3324
        spin_lock(&l3->list_lock);
3325
        entry = l3->slabs_partial.next;
3326
        if (entry == &l3->slabs_partial) {
3327
                l3->free_touched = 1;
3328
                entry = l3->slabs_free.next;
3329
                if (entry == &l3->slabs_free)
3330
                        goto must_grow;
3331
        }
3332
 
3333
        slabp = list_entry(entry, struct slab, list);
3334
        check_spinlock_acquired_node(cachep, nodeid);
3335
        check_slabp(cachep, slabp);
3336
 
3337
        STATS_INC_NODEALLOCS(cachep);
3338
        STATS_INC_ACTIVE(cachep);
3339
        STATS_SET_HIGH(cachep);
3340
 
3341
        BUG_ON(slabp->inuse == cachep->num);
3342
 
3343
        obj = slab_get_obj(cachep, slabp, nodeid);
3344
        check_slabp(cachep, slabp);
3345
        l3->free_objects--;
3346
        /* move slabp to correct slabp list: */
3347
        list_del(&slabp->list);
3348
 
3349
        if (slabp->free == BUFCTL_END)
3350
                list_add(&slabp->list, &l3->slabs_full);
3351
        else
3352
                list_add(&slabp->list, &l3->slabs_partial);
3353
 
3354
        spin_unlock(&l3->list_lock);
3355
        goto done;
3356
 
3357
must_grow:
3358
        spin_unlock(&l3->list_lock);
3359
        x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL);
3360
        if (x)
3361
                goto retry;
3362
 
3363
        return fallback_alloc(cachep, flags);
3364
 
3365
done:
3366
        return obj;
3367
}
3368
 
3369
/**
3370
 * kmem_cache_alloc_node - Allocate an object on the specified node
3371
 * @cachep: The cache to allocate from.
3372
 * @flags: See kmalloc().
3373
 * @nodeid: node number of the target node.
3374
 * @caller: return address of caller, used for debug information
3375
 *
3376
 * Identical to kmem_cache_alloc but it will allocate memory on the given
3377
 * node, which can improve the performance for cpu bound structures.
3378
 *
3379
 * Fallback to other node is possible if __GFP_THISNODE is not set.
3380
 */
3381
static __always_inline void *
3382
__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
3383
                   void *caller)
3384
{
3385
        unsigned long save_flags;
3386
        void *ptr;
3387
 
3388
        if (should_failslab(cachep, flags))
3389
                return NULL;
3390
 
3391
        cache_alloc_debugcheck_before(cachep, flags);
3392
        local_irq_save(save_flags);
3393
 
3394
        if (unlikely(nodeid == -1))
3395
                nodeid = numa_node_id();
3396
 
3397
        if (unlikely(!cachep->nodelists[nodeid])) {
3398
                /* Node not bootstrapped yet */
3399
                ptr = fallback_alloc(cachep, flags);
3400
                goto out;
3401
        }
3402
 
3403
        if (nodeid == numa_node_id()) {
3404
                /*
3405
                 * Use the locally cached objects if possible.
3406
                 * However ____cache_alloc does not allow fallback
3407
                 * to other nodes. It may fail while we still have
3408
                 * objects on other nodes available.
3409
                 */
3410
                ptr = ____cache_alloc(cachep, flags);
3411
                if (ptr)
3412
                        goto out;
3413
        }
3414
        /* ___cache_alloc_node can fall back to other nodes */
3415
        ptr = ____cache_alloc_node(cachep, flags, nodeid);
3416
  out:
3417
        local_irq_restore(save_flags);
3418
        ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
3419
 
3420
        if (unlikely((flags & __GFP_ZERO) && ptr))
3421
                memset(ptr, 0, obj_size(cachep));
3422
 
3423
        return ptr;
3424
}
3425
 
3426
static __always_inline void *
3427
__do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
3428
{
3429
        void *objp;
3430
 
3431
        if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) {
3432
                objp = alternate_node_alloc(cache, flags);
3433
                if (objp)
3434
                        goto out;
3435
        }
3436
        objp = ____cache_alloc(cache, flags);
3437
 
3438
        /*
3439
         * We may just have run out of memory on the local node.
3440
         * ____cache_alloc_node() knows how to locate memory on other nodes
3441
         */
3442
        if (!objp)
3443
                objp = ____cache_alloc_node(cache, flags, numa_node_id());
3444
 
3445
  out:
3446
        return objp;
3447
}
3448
#else
3449
 
3450
static __always_inline void *
3451
__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3452
{
3453
        return ____cache_alloc(cachep, flags);
3454
}
3455
 
3456
#endif /* CONFIG_NUMA */
3457
 
3458
static __always_inline void *
3459
__cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
3460
{
3461
        unsigned long save_flags;
3462
        void *objp;
3463
 
3464
        if (should_failslab(cachep, flags))
3465
                return NULL;
3466
 
3467
        cache_alloc_debugcheck_before(cachep, flags);
3468
        local_irq_save(save_flags);
3469
        objp = __do_cache_alloc(cachep, flags);
3470
        local_irq_restore(save_flags);
3471
        objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
3472
        prefetchw(objp);
3473
 
3474
        if (unlikely((flags & __GFP_ZERO) && objp))
3475
                memset(objp, 0, obj_size(cachep));
3476
 
3477
        return objp;
3478
}
3479
 
3480
/*
3481
 * Caller needs to acquire correct kmem_list's list_lock
3482
 */
3483
static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
3484
                       int node)
3485
{
3486
        int i;
3487
        struct kmem_list3 *l3;
3488
 
3489
        for (i = 0; i < nr_objects; i++) {
3490
                void *objp = objpp[i];
3491
                struct slab *slabp;
3492
 
3493
                slabp = virt_to_slab(objp);
3494
                l3 = cachep->nodelists[node];
3495
                list_del(&slabp->list);
3496
                check_spinlock_acquired_node(cachep, node);
3497
                check_slabp(cachep, slabp);
3498
                slab_put_obj(cachep, slabp, objp, node);
3499
                STATS_DEC_ACTIVE(cachep);
3500
                l3->free_objects++;
3501
                check_slabp(cachep, slabp);
3502
 
3503
                /* fixup slab chains */
3504
                if (slabp->inuse == 0) {
3505
                        if (l3->free_objects > l3->free_limit) {
3506
                                l3->free_objects -= cachep->num;
3507
                                /* No need to drop any previously held
3508
                                 * lock here, even if we have a off-slab slab
3509
                                 * descriptor it is guaranteed to come from
3510
                                 * a different cache, refer to comments before
3511
                                 * alloc_slabmgmt.
3512
                                 */
3513
                                slab_destroy(cachep, slabp);
3514
                        } else {
3515
                                list_add(&slabp->list, &l3->slabs_free);
3516
                        }
3517
                } else {
3518
                        /* Unconditionally move a slab to the end of the
3519
                         * partial list on free - maximum time for the
3520
                         * other objects to be freed, too.
3521
                         */
3522
                        list_add_tail(&slabp->list, &l3->slabs_partial);
3523
                }
3524
        }
3525
}
3526
 
3527
static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
3528
{
3529
        int batchcount;
3530
        struct kmem_list3 *l3;
3531
        int node = numa_node_id();
3532
 
3533
        batchcount = ac->batchcount;
3534
#if DEBUG
3535
        BUG_ON(!batchcount || batchcount > ac->avail);
3536
#endif
3537
        check_irq_off();
3538
        l3 = cachep->nodelists[node];
3539
        spin_lock(&l3->list_lock);
3540
        if (l3->shared) {
3541
                struct array_cache *shared_array = l3->shared;
3542
                int max = shared_array->limit - shared_array->avail;
3543
                if (max) {
3544
                        if (batchcount > max)
3545
                                batchcount = max;
3546
                        memcpy(&(shared_array->entry[shared_array->avail]),
3547
                               ac->entry, sizeof(void *) * batchcount);
3548
                        shared_array->avail += batchcount;
3549
                        goto free_done;
3550
                }
3551
        }
3552
 
3553
        free_block(cachep, ac->entry, batchcount, node);
3554
free_done:
3555
#if STATS
3556
        {
3557
                int i = 0;
3558
                struct list_head *p;
3559
 
3560
                p = l3->slabs_free.next;
3561
                while (p != &(l3->slabs_free)) {
3562
                        struct slab *slabp;
3563
 
3564
                        slabp = list_entry(p, struct slab, list);
3565
                        BUG_ON(slabp->inuse);
3566
 
3567
                        i++;
3568
                        p = p->next;
3569
                }
3570
                STATS_SET_FREEABLE(cachep, i);
3571
        }
3572
#endif
3573
        spin_unlock(&l3->list_lock);
3574
        ac->avail -= batchcount;
3575
        memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
3576
}
3577
 
3578
/*
3579
 * Release an obj back to its cache. If the obj has a constructed state, it must
3580
 * be in this state _before_ it is released.  Called with disabled ints.
3581
 */
3582
static inline void __cache_free(struct kmem_cache *cachep, void *objp)
3583
{
3584
        struct array_cache *ac = cpu_cache_get(cachep);
3585
 
3586
        check_irq_off();
3587
        objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
3588
 
3589
        /*
3590
         * Skip calling cache_free_alien() when the platform is not numa.
3591
         * This will avoid cache misses that happen while accessing slabp (which
3592
         * is per page memory  reference) to get nodeid. Instead use a global
3593
         * variable to skip the call, which is mostly likely to be present in
3594
         * the cache.
3595
         */
3596
        if (numa_platform && cache_free_alien(cachep, objp))
3597
                return;
3598
 
3599
        if (likely(ac->avail < ac->limit)) {
3600
                STATS_INC_FREEHIT(cachep);
3601
                ac->entry[ac->avail++] = objp;
3602
                return;
3603
        } else {
3604
                STATS_INC_FREEMISS(cachep);
3605
                cache_flusharray(cachep, ac);
3606
                ac->entry[ac->avail++] = objp;
3607
        }
3608
}
3609
 
3610
/**
3611
 * kmem_cache_alloc - Allocate an object
3612
 * @cachep: The cache to allocate from.
3613
 * @flags: See kmalloc().
3614
 *
3615
 * Allocate an object from this cache.  The flags are only relevant
3616
 * if the cache has no available objects.
3617
 */
3618
void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3619
{
3620
        return __cache_alloc(cachep, flags, __builtin_return_address(0));
3621
}
3622
EXPORT_SYMBOL(kmem_cache_alloc);
3623
 
3624
/**
3625
 * kmem_ptr_validate - check if an untrusted pointer might
3626
 *      be a slab entry.
3627
 * @cachep: the cache we're checking against
3628
 * @ptr: pointer to validate
3629
 *
3630
 * This verifies that the untrusted pointer looks sane:
3631
 * it is _not_ a guarantee that the pointer is actually
3632
 * part of the slab cache in question, but it at least
3633
 * validates that the pointer can be dereferenced and
3634
 * looks half-way sane.
3635
 *
3636
 * Currently only used for dentry validation.
3637
 */
3638
int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr)
3639
{
3640
        unsigned long addr = (unsigned long)ptr;
3641
        unsigned long min_addr = PAGE_OFFSET;
3642
        unsigned long align_mask = BYTES_PER_WORD - 1;
3643
        unsigned long size = cachep->buffer_size;
3644
        struct page *page;
3645
 
3646
        if (unlikely(addr < min_addr))
3647
                goto out;
3648
        if (unlikely(addr > (unsigned long)high_memory - size))
3649
                goto out;
3650
        if (unlikely(addr & align_mask))
3651
                goto out;
3652
        if (unlikely(!kern_addr_valid(addr)))
3653
                goto out;
3654
        if (unlikely(!kern_addr_valid(addr + size - 1)))
3655
                goto out;
3656
        page = virt_to_page(ptr);
3657
        if (unlikely(!PageSlab(page)))
3658
                goto out;
3659
        if (unlikely(page_get_cache(page) != cachep))
3660
                goto out;
3661
        return 1;
3662
out:
3663
        return 0;
3664
}
3665
 
3666
#ifdef CONFIG_NUMA
3667
void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
3668
{
3669
        return __cache_alloc_node(cachep, flags, nodeid,
3670
                        __builtin_return_address(0));
3671
}
3672
EXPORT_SYMBOL(kmem_cache_alloc_node);
3673
 
3674
static __always_inline void *
3675
__do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller)
3676
{
3677
        struct kmem_cache *cachep;
3678
 
3679
        cachep = kmem_find_general_cachep(size, flags);
3680
        if (unlikely(ZERO_OR_NULL_PTR(cachep)))
3681
                return cachep;
3682
        return kmem_cache_alloc_node(cachep, flags, node);
3683
}
3684
 
3685
#ifdef CONFIG_DEBUG_SLAB
3686
void *__kmalloc_node(size_t size, gfp_t flags, int node)
3687
{
3688
        return __do_kmalloc_node(size, flags, node,
3689
                        __builtin_return_address(0));
3690
}
3691
EXPORT_SYMBOL(__kmalloc_node);
3692
 
3693
void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
3694
                int node, void *caller)
3695
{
3696
        return __do_kmalloc_node(size, flags, node, caller);
3697
}
3698
EXPORT_SYMBOL(__kmalloc_node_track_caller);
3699
#else
3700
void *__kmalloc_node(size_t size, gfp_t flags, int node)
3701
{
3702
        return __do_kmalloc_node(size, flags, node, NULL);
3703
}
3704
EXPORT_SYMBOL(__kmalloc_node);
3705
#endif /* CONFIG_DEBUG_SLAB */
3706
#endif /* CONFIG_NUMA */
3707
 
3708
/**
3709
 * __do_kmalloc - allocate memory
3710
 * @size: how many bytes of memory are required.
3711
 * @flags: the type of memory to allocate (see kmalloc).
3712
 * @caller: function caller for debug tracking of the caller
3713
 */
3714
static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
3715
                                          void *caller)
3716
{
3717
        struct kmem_cache *cachep;
3718
 
3719
        /* If you want to save a few bytes .text space: replace
3720
         * __ with kmem_.
3721
         * Then kmalloc uses the uninlined functions instead of the inline
3722
         * functions.
3723
         */
3724
        cachep = __find_general_cachep(size, flags);
3725
        if (unlikely(ZERO_OR_NULL_PTR(cachep)))
3726
                return cachep;
3727
        return __cache_alloc(cachep, flags, caller);
3728
}
3729
 
3730
 
3731
#ifdef CONFIG_DEBUG_SLAB
3732
void *__kmalloc(size_t size, gfp_t flags)
3733
{
3734
        return __do_kmalloc(size, flags, __builtin_return_address(0));
3735
}
3736
EXPORT_SYMBOL(__kmalloc);
3737
 
3738
void *__kmalloc_track_caller(size_t size, gfp_t flags, void *caller)
3739
{
3740
        return __do_kmalloc(size, flags, caller);
3741
}
3742
EXPORT_SYMBOL(__kmalloc_track_caller);
3743
 
3744
#else
3745
void *__kmalloc(size_t size, gfp_t flags)
3746
{
3747
        return __do_kmalloc(size, flags, NULL);
3748
}
3749
EXPORT_SYMBOL(__kmalloc);
3750
#endif
3751
 
3752
/**
3753
 * kmem_cache_free - Deallocate an object
3754
 * @cachep: The cache the allocation was from.
3755
 * @objp: The previously allocated object.
3756
 *
3757
 * Free an object which was previously allocated from this
3758
 * cache.
3759
 */
3760
void kmem_cache_free(struct kmem_cache *cachep, void *objp)
3761
{
3762
        unsigned long flags;
3763
 
3764
        local_irq_save(flags);
3765
        debug_check_no_locks_freed(objp, obj_size(cachep));
3766
        __cache_free(cachep, objp);
3767
        local_irq_restore(flags);
3768
}
3769
EXPORT_SYMBOL(kmem_cache_free);
3770
 
3771
/**
3772
 * kfree - free previously allocated memory
3773
 * @objp: pointer returned by kmalloc.
3774
 *
3775
 * If @objp is NULL, no operation is performed.
3776
 *
3777
 * Don't free memory not originally allocated by kmalloc()
3778
 * or you will run into trouble.
3779
 */
3780
void kfree(const void *objp)
3781
{
3782
        struct kmem_cache *c;
3783
        unsigned long flags;
3784
 
3785
        if (unlikely(ZERO_OR_NULL_PTR(objp)))
3786
                return;
3787
        local_irq_save(flags);
3788
        kfree_debugcheck(objp);
3789
        c = virt_to_cache(objp);
3790
        debug_check_no_locks_freed(objp, obj_size(c));
3791
        __cache_free(c, (void *)objp);
3792
        local_irq_restore(flags);
3793
}
3794
EXPORT_SYMBOL(kfree);
3795
 
3796
unsigned int kmem_cache_size(struct kmem_cache *cachep)
3797
{
3798
        return obj_size(cachep);
3799
}
3800
EXPORT_SYMBOL(kmem_cache_size);
3801
 
3802
const char *kmem_cache_name(struct kmem_cache *cachep)
3803
{
3804
        return cachep->name;
3805
}
3806
EXPORT_SYMBOL_GPL(kmem_cache_name);
3807
 
3808
/*
3809
 * This initializes kmem_list3 or resizes various caches for all nodes.
3810
 */
3811
static int alloc_kmemlist(struct kmem_cache *cachep)
3812
{
3813
        int node;
3814
        struct kmem_list3 *l3;
3815
        struct array_cache *new_shared;
3816
        struct array_cache **new_alien = NULL;
3817
 
3818
        for_each_online_node(node) {
3819
 
3820
                if (use_alien_caches) {
3821
                        new_alien = alloc_alien_cache(node, cachep->limit);
3822
                        if (!new_alien)
3823
                                goto fail;
3824
                }
3825
 
3826
                new_shared = NULL;
3827
                if (cachep->shared) {
3828
                        new_shared = alloc_arraycache(node,
3829
                                cachep->shared*cachep->batchcount,
3830
                                        0xbaadf00d);
3831
                        if (!new_shared) {
3832
                                free_alien_cache(new_alien);
3833
                                goto fail;
3834
                        }
3835
                }
3836
 
3837
                l3 = cachep->nodelists[node];
3838
                if (l3) {
3839
                        struct array_cache *shared = l3->shared;
3840
 
3841
                        spin_lock_irq(&l3->list_lock);
3842
 
3843
                        if (shared)
3844
                                free_block(cachep, shared->entry,
3845
                                                shared->avail, node);
3846
 
3847
                        l3->shared = new_shared;
3848
                        if (!l3->alien) {
3849
                                l3->alien = new_alien;
3850
                                new_alien = NULL;
3851
                        }
3852
                        l3->free_limit = (1 + nr_cpus_node(node)) *
3853
                                        cachep->batchcount + cachep->num;
3854
                        spin_unlock_irq(&l3->list_lock);
3855
                        kfree(shared);
3856
                        free_alien_cache(new_alien);
3857
                        continue;
3858
                }
3859
                l3 = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, node);
3860
                if (!l3) {
3861
                        free_alien_cache(new_alien);
3862
                        kfree(new_shared);
3863
                        goto fail;
3864
                }
3865
 
3866
                kmem_list3_init(l3);
3867
                l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
3868
                                ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
3869
                l3->shared = new_shared;
3870
                l3->alien = new_alien;
3871
                l3->free_limit = (1 + nr_cpus_node(node)) *
3872
                                        cachep->batchcount + cachep->num;
3873
                cachep->nodelists[node] = l3;
3874
        }
3875
        return 0;
3876
 
3877
fail:
3878
        if (!cachep->next.next) {
3879
                /* Cache is not active yet. Roll back what we did */
3880
                node--;
3881
                while (node >= 0) {
3882
                        if (cachep->nodelists[node]) {
3883
                                l3 = cachep->nodelists[node];
3884
 
3885
                                kfree(l3->shared);
3886
                                free_alien_cache(l3->alien);
3887
                                kfree(l3);
3888
                                cachep->nodelists[node] = NULL;
3889
                        }
3890
                        node--;
3891
                }
3892
        }
3893
        return -ENOMEM;
3894
}
3895
 
3896
struct ccupdate_struct {
3897
        struct kmem_cache *cachep;
3898
        struct array_cache *new[NR_CPUS];
3899
};
3900
 
3901
static void do_ccupdate_local(void *info)
3902
{
3903
        struct ccupdate_struct *new = info;
3904
        struct array_cache *old;
3905
 
3906
        check_irq_off();
3907
        old = cpu_cache_get(new->cachep);
3908
 
3909
        new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];
3910
        new->new[smp_processor_id()] = old;
3911
}
3912
 
3913
/* Always called with the cache_chain_mutex held */
3914
static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
3915
                                int batchcount, int shared)
3916
{
3917
        struct ccupdate_struct *new;
3918
        int i;
3919
 
3920
        new = kzalloc(sizeof(*new), GFP_KERNEL);
3921
        if (!new)
3922
                return -ENOMEM;
3923
 
3924
        for_each_online_cpu(i) {
3925
                new->new[i] = alloc_arraycache(cpu_to_node(i), limit,
3926
                                                batchcount);
3927
                if (!new->new[i]) {
3928
                        for (i--; i >= 0; i--)
3929
                                kfree(new->new[i]);
3930
                        kfree(new);
3931
                        return -ENOMEM;
3932
                }
3933
        }
3934
        new->cachep = cachep;
3935
 
3936
        on_each_cpu(do_ccupdate_local, (void *)new, 1, 1);
3937
 
3938
        check_irq_on();
3939
        cachep->batchcount = batchcount;
3940
        cachep->limit = limit;
3941
        cachep->shared = shared;
3942
 
3943
        for_each_online_cpu(i) {
3944
                struct array_cache *ccold = new->new[i];
3945
                if (!ccold)
3946
                        continue;
3947
                spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
3948
                free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i));
3949
                spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
3950
                kfree(ccold);
3951
        }
3952
        kfree(new);
3953
        return alloc_kmemlist(cachep);
3954
}
3955
 
3956
/* Called with cache_chain_mutex held always */
3957
static int enable_cpucache(struct kmem_cache *cachep)
3958
{
3959
        int err;
3960
        int limit, shared;
3961
 
3962
        /*
3963
         * The head array serves three purposes:
3964
         * - create a LIFO ordering, i.e. return objects that are cache-warm
3965
         * - reduce the number of spinlock operations.
3966
         * - reduce the number of linked list operations on the slab and
3967
         *   bufctl chains: array operations are cheaper.
3968
         * The numbers are guessed, we should auto-tune as described by
3969
         * Bonwick.
3970
         */
3971
        if (cachep->buffer_size > 131072)
3972
                limit = 1;
3973
        else if (cachep->buffer_size > PAGE_SIZE)
3974
                limit = 8;
3975
        else if (cachep->buffer_size > 1024)
3976
                limit = 24;
3977
        else if (cachep->buffer_size > 256)
3978
                limit = 54;
3979
        else
3980
                limit = 120;
3981
 
3982
        /*
3983
         * CPU bound tasks (e.g. network routing) can exhibit cpu bound
3984
         * allocation behaviour: Most allocs on one cpu, most free operations
3985
         * on another cpu. For these cases, an efficient object passing between
3986
         * cpus is necessary. This is provided by a shared array. The array
3987
         * replaces Bonwick's magazine layer.
3988
         * On uniprocessor, it's functionally equivalent (but less efficient)
3989
         * to a larger limit. Thus disabled by default.
3990
         */
3991
        shared = 0;
3992
        if (cachep->buffer_size <= PAGE_SIZE && num_possible_cpus() > 1)
3993
                shared = 8;
3994
 
3995
#if DEBUG
3996
        /*
3997
         * With debugging enabled, large batchcount lead to excessively long
3998
         * periods with disabled local interrupts. Limit the batchcount
3999
         */
4000
        if (limit > 32)
4001
                limit = 32;
4002
#endif
4003
        err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared);
4004
        if (err)
4005
                printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
4006
                       cachep->name, -err);
4007
        return err;
4008
}
4009
 
4010
/*
4011
 * Drain an array if it contains any elements taking the l3 lock only if
4012
 * necessary. Note that the l3 listlock also protects the array_cache
4013
 * if drain_array() is used on the shared array.
4014
 */
4015
void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
4016
                         struct array_cache *ac, int force, int node)
4017
{
4018
        int tofree;
4019
 
4020
        if (!ac || !ac->avail)
4021
                return;
4022
        if (ac->touched && !force) {
4023
                ac->touched = 0;
4024
        } else {
4025
                spin_lock_irq(&l3->list_lock);
4026
                if (ac->avail) {
4027
                        tofree = force ? ac->avail : (ac->limit + 4) / 5;
4028
                        if (tofree > ac->avail)
4029
                                tofree = (ac->avail + 1) / 2;
4030
                        free_block(cachep, ac->entry, tofree, node);
4031
                        ac->avail -= tofree;
4032
                        memmove(ac->entry, &(ac->entry[tofree]),
4033
                                sizeof(void *) * ac->avail);
4034
                }
4035
                spin_unlock_irq(&l3->list_lock);
4036
        }
4037
}
4038
 
4039
/**
4040
 * cache_reap - Reclaim memory from caches.
4041
 * @w: work descriptor
4042
 *
4043
 * Called from workqueue/eventd every few seconds.
4044
 * Purpose:
4045
 * - clear the per-cpu caches for this CPU.
4046
 * - return freeable pages to the main free memory pool.
4047
 *
4048
 * If we cannot acquire the cache chain mutex then just give up - we'll try
4049
 * again on the next iteration.
4050
 */
4051
static void cache_reap(struct work_struct *w)
4052
{
4053
        struct kmem_cache *searchp;
4054
        struct kmem_list3 *l3;
4055
        int node = numa_node_id();
4056
        struct delayed_work *work =
4057
                container_of(w, struct delayed_work, work);
4058
 
4059
        if (!mutex_trylock(&cache_chain_mutex))
4060
                /* Give up. Setup the next iteration. */
4061
                goto out;
4062
 
4063
        list_for_each_entry(searchp, &cache_chain, next) {
4064
                check_irq_on();
4065
 
4066
                /*
4067
                 * We only take the l3 lock if absolutely necessary and we
4068
                 * have established with reasonable certainty that
4069
                 * we can do some work if the lock was obtained.
4070
                 */
4071
                l3 = searchp->nodelists[node];
4072
 
4073
                reap_alien(searchp, l3);
4074
 
4075
                drain_array(searchp, l3, cpu_cache_get(searchp), 0, node);
4076
 
4077
                /*
4078
                 * These are racy checks but it does not matter
4079
                 * if we skip one check or scan twice.
4080
                 */
4081
                if (time_after(l3->next_reap, jiffies))
4082
                        goto next;
4083
 
4084
                l3->next_reap = jiffies + REAPTIMEOUT_LIST3;
4085
 
4086
                drain_array(searchp, l3, l3->shared, 0, node);
4087
 
4088
                if (l3->free_touched)
4089
                        l3->free_touched = 0;
4090
                else {
4091
                        int freed;
4092
 
4093
                        freed = drain_freelist(searchp, l3, (l3->free_limit +
4094
                                5 * searchp->num - 1) / (5 * searchp->num));
4095
                        STATS_ADD_REAPED(searchp, freed);
4096
                }
4097
next:
4098
                cond_resched();
4099
        }
4100
        check_irq_on();
4101
        mutex_unlock(&cache_chain_mutex);
4102
        next_reap_node();
4103
out:
4104
        /* Set up the next iteration */
4105
        schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC));
4106
}
4107
 
4108
#ifdef CONFIG_SLABINFO
4109
 
4110
static void print_slabinfo_header(struct seq_file *m)
4111
{
4112
        /*
4113
         * Output format version, so at least we can change it
4114
         * without _too_ many complaints.
4115
         */
4116
#if STATS
4117
        seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
4118
#else
4119
        seq_puts(m, "slabinfo - version: 2.1\n");
4120
#endif
4121
        seq_puts(m, "# name            <active_objs> <num_objs> <objsize> "
4122
                 "<objperslab> <pagesperslab>");
4123
        seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
4124
        seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
4125
#if STATS
4126
        seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> "
4127
                 "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
4128
        seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
4129
#endif
4130
        seq_putc(m, '\n');
4131
}
4132
 
4133
static void *s_start(struct seq_file *m, loff_t *pos)
4134
{
4135
        loff_t n = *pos;
4136
 
4137
        mutex_lock(&cache_chain_mutex);
4138
        if (!n)
4139
                print_slabinfo_header(m);
4140
 
4141
        return seq_list_start(&cache_chain, *pos);
4142
}
4143
 
4144
static void *s_next(struct seq_file *m, void *p, loff_t *pos)
4145
{
4146
        return seq_list_next(p, &cache_chain, pos);
4147
}
4148
 
4149
static void s_stop(struct seq_file *m, void *p)
4150
{
4151
        mutex_unlock(&cache_chain_mutex);
4152
}
4153
 
4154
static int s_show(struct seq_file *m, void *p)
4155
{
4156
        struct kmem_cache *cachep = list_entry(p, struct kmem_cache, next);
4157
        struct slab *slabp;
4158
        unsigned long active_objs;
4159
        unsigned long num_objs;
4160
        unsigned long active_slabs = 0;
4161
        unsigned long num_slabs, free_objects = 0, shared_avail = 0;
4162
        const char *name;
4163
        char *error = NULL;
4164
        int node;
4165
        struct kmem_list3 *l3;
4166
 
4167
        active_objs = 0;
4168
        num_slabs = 0;
4169
        for_each_online_node(node) {
4170
                l3 = cachep->nodelists[node];
4171
                if (!l3)
4172
                        continue;
4173
 
4174
                check_irq_on();
4175
                spin_lock_irq(&l3->list_lock);
4176
 
4177
                list_for_each_entry(slabp, &l3->slabs_full, list) {
4178
                        if (slabp->inuse != cachep->num && !error)
4179
                                error = "slabs_full accounting error";
4180
                        active_objs += cachep->num;
4181
                        active_slabs++;
4182
                }
4183
                list_for_each_entry(slabp, &l3->slabs_partial, list) {
4184
                        if (slabp->inuse == cachep->num && !error)
4185
                                error = "slabs_partial inuse accounting error";
4186
                        if (!slabp->inuse && !error)
4187
                                error = "slabs_partial/inuse accounting error";
4188
                        active_objs += slabp->inuse;
4189
                        active_slabs++;
4190
                }
4191
                list_for_each_entry(slabp, &l3->slabs_free, list) {
4192
                        if (slabp->inuse && !error)
4193
                                error = "slabs_free/inuse accounting error";
4194
                        num_slabs++;
4195
                }
4196
                free_objects += l3->free_objects;
4197
                if (l3->shared)
4198
                        shared_avail += l3->shared->avail;
4199
 
4200
                spin_unlock_irq(&l3->list_lock);
4201
        }
4202
        num_slabs += active_slabs;
4203
        num_objs = num_slabs * cachep->num;
4204
        if (num_objs - active_objs != free_objects && !error)
4205
                error = "free_objects accounting error";
4206
 
4207
        name = cachep->name;
4208
        if (error)
4209
                printk(KERN_ERR "slab: cache %s error: %s\n", name, error);
4210
 
4211
        seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
4212
                   name, active_objs, num_objs, cachep->buffer_size,
4213
                   cachep->num, (1 << cachep->gfporder));
4214
        seq_printf(m, " : tunables %4u %4u %4u",
4215
                   cachep->limit, cachep->batchcount, cachep->shared);
4216
        seq_printf(m, " : slabdata %6lu %6lu %6lu",
4217
                   active_slabs, num_slabs, shared_avail);
4218
#if STATS
4219
        {                       /* list3 stats */
4220
                unsigned long high = cachep->high_mark;
4221
                unsigned long allocs = cachep->num_allocations;
4222
                unsigned long grown = cachep->grown;
4223
                unsigned long reaped = cachep->reaped;
4224
                unsigned long errors = cachep->errors;
4225
                unsigned long max_freeable = cachep->max_freeable;
4226
                unsigned long node_allocs = cachep->node_allocs;
4227
                unsigned long node_frees = cachep->node_frees;
4228
                unsigned long overflows = cachep->node_overflow;
4229
 
4230
                seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \
4231
                                %4lu %4lu %4lu %4lu %4lu", allocs, high, grown,
4232
                                reaped, errors, max_freeable, node_allocs,
4233
                                node_frees, overflows);
4234
        }
4235
        /* cpu stats */
4236
        {
4237
                unsigned long allochit = atomic_read(&cachep->allochit);
4238
                unsigned long allocmiss = atomic_read(&cachep->allocmiss);
4239
                unsigned long freehit = atomic_read(&cachep->freehit);
4240
                unsigned long freemiss = atomic_read(&cachep->freemiss);
4241
 
4242
                seq_printf(m, " : cpustat %6lu %6lu %6lu %6lu",
4243
                           allochit, allocmiss, freehit, freemiss);
4244
        }
4245
#endif
4246
        seq_putc(m, '\n');
4247
        return 0;
4248
}
4249
 
4250
/*
4251
 * slabinfo_op - iterator that generates /proc/slabinfo
4252
 *
4253
 * Output layout:
4254
 * cache-name
4255
 * num-active-objs
4256
 * total-objs
4257
 * object size
4258
 * num-active-slabs
4259
 * total-slabs
4260
 * num-pages-per-slab
4261
 * + further values on SMP and with statistics enabled
4262
 */
4263
 
4264
const struct seq_operations slabinfo_op = {
4265
        .start = s_start,
4266
        .next = s_next,
4267
        .stop = s_stop,
4268
        .show = s_show,
4269
};
4270
 
4271
#define MAX_SLABINFO_WRITE 128
4272
/**
4273
 * slabinfo_write - Tuning for the slab allocator
4274
 * @file: unused
4275
 * @buffer: user buffer
4276
 * @count: data length
4277
 * @ppos: unused
4278
 */
4279
ssize_t slabinfo_write(struct file *file, const char __user * buffer,
4280
                       size_t count, loff_t *ppos)
4281
{
4282
        char kbuf[MAX_SLABINFO_WRITE + 1], *tmp;
4283
        int limit, batchcount, shared, res;
4284
        struct kmem_cache *cachep;
4285
 
4286
        if (count > MAX_SLABINFO_WRITE)
4287
                return -EINVAL;
4288
        if (copy_from_user(&kbuf, buffer, count))
4289
                return -EFAULT;
4290
        kbuf[MAX_SLABINFO_WRITE] = '\0';
4291
 
4292
        tmp = strchr(kbuf, ' ');
4293
        if (!tmp)
4294
                return -EINVAL;
4295
        *tmp = '\0';
4296
        tmp++;
4297
        if (sscanf(tmp, " %d %d %d", &limit, &batchcount, &shared) != 3)
4298
                return -EINVAL;
4299
 
4300
        /* Find the cache in the chain of caches. */
4301
        mutex_lock(&cache_chain_mutex);
4302
        res = -EINVAL;
4303
        list_for_each_entry(cachep, &cache_chain, next) {
4304
                if (!strcmp(cachep->name, kbuf)) {
4305
                        if (limit < 1 || batchcount < 1 ||
4306
                                        batchcount > limit || shared < 0) {
4307
                                res = 0;
4308
                        } else {
4309
                                res = do_tune_cpucache(cachep, limit,
4310
                                                       batchcount, shared);
4311
                        }
4312
                        break;
4313
                }
4314
        }
4315
        mutex_unlock(&cache_chain_mutex);
4316
        if (res >= 0)
4317
                res = count;
4318
        return res;
4319
}
4320
 
4321
#ifdef CONFIG_DEBUG_SLAB_LEAK
4322
 
4323
static void *leaks_start(struct seq_file *m, loff_t *pos)
4324
{
4325
        mutex_lock(&cache_chain_mutex);
4326
        return seq_list_start(&cache_chain, *pos);
4327
}
4328
 
4329
static inline int add_caller(unsigned long *n, unsigned long v)
4330
{
4331
        unsigned long *p;
4332
        int l;
4333
        if (!v)
4334
                return 1;
4335
        l = n[1];
4336
        p = n + 2;
4337
        while (l) {
4338
                int i = l/2;
4339
                unsigned long *q = p + 2 * i;
4340
                if (*q == v) {
4341
                        q[1]++;
4342
                        return 1;
4343
                }
4344
                if (*q > v) {
4345
                        l = i;
4346
                } else {
4347
                        p = q + 2;
4348
                        l -= i + 1;
4349
                }
4350
        }
4351
        if (++n[1] == n[0])
4352
                return 0;
4353
        memmove(p + 2, p, n[1] * 2 * sizeof(unsigned long) - ((void *)p - (void *)n));
4354
        p[0] = v;
4355
        p[1] = 1;
4356
        return 1;
4357
}
4358
 
4359
static void handle_slab(unsigned long *n, struct kmem_cache *c, struct slab *s)
4360
{
4361
        void *p;
4362
        int i;
4363
        if (n[0] == n[1])
4364
                return;
4365
        for (i = 0, p = s->s_mem; i < c->num; i++, p += c->buffer_size) {
4366
                if (slab_bufctl(s)[i] != BUFCTL_ACTIVE)
4367
                        continue;
4368
                if (!add_caller(n, (unsigned long)*dbg_userword(c, p)))
4369
                        return;
4370
        }
4371
}
4372
 
4373
static void show_symbol(struct seq_file *m, unsigned long address)
4374
{
4375
#ifdef CONFIG_KALLSYMS
4376
        unsigned long offset, size;
4377
        char modname[MODULE_NAME_LEN], name[KSYM_NAME_LEN];
4378
 
4379
        if (lookup_symbol_attrs(address, &size, &offset, modname, name) == 0) {
4380
                seq_printf(m, "%s+%#lx/%#lx", name, offset, size);
4381
                if (modname[0])
4382
                        seq_printf(m, " [%s]", modname);
4383
                return;
4384
        }
4385
#endif
4386
        seq_printf(m, "%p", (void *)address);
4387
}
4388
 
4389
static int leaks_show(struct seq_file *m, void *p)
4390
{
4391
        struct kmem_cache *cachep = list_entry(p, struct kmem_cache, next);
4392
        struct slab *slabp;
4393
        struct kmem_list3 *l3;
4394
        const char *name;
4395
        unsigned long *n = m->private;
4396
        int node;
4397
        int i;
4398
 
4399
        if (!(cachep->flags & SLAB_STORE_USER))
4400
                return 0;
4401
        if (!(cachep->flags & SLAB_RED_ZONE))
4402
                return 0;
4403
 
4404
        /* OK, we can do it */
4405
 
4406
        n[1] = 0;
4407
 
4408
        for_each_online_node(node) {
4409
                l3 = cachep->nodelists[node];
4410
                if (!l3)
4411
                        continue;
4412
 
4413
                check_irq_on();
4414
                spin_lock_irq(&l3->list_lock);
4415
 
4416
                list_for_each_entry(slabp, &l3->slabs_full, list)
4417
                        handle_slab(n, cachep, slabp);
4418
                list_for_each_entry(slabp, &l3->slabs_partial, list)
4419
                        handle_slab(n, cachep, slabp);
4420
                spin_unlock_irq(&l3->list_lock);
4421
        }
4422
        name = cachep->name;
4423
        if (n[0] == n[1]) {
4424
                /* Increase the buffer size */
4425
                mutex_unlock(&cache_chain_mutex);
4426
                m->private = kzalloc(n[0] * 4 * sizeof(unsigned long), GFP_KERNEL);
4427
                if (!m->private) {
4428
                        /* Too bad, we are really out */
4429
                        m->private = n;
4430
                        mutex_lock(&cache_chain_mutex);
4431
                        return -ENOMEM;
4432
                }
4433
                *(unsigned long *)m->private = n[0] * 2;
4434
                kfree(n);
4435
                mutex_lock(&cache_chain_mutex);
4436
                /* Now make sure this entry will be retried */
4437
                m->count = m->size;
4438
                return 0;
4439
        }
4440
        for (i = 0; i < n[1]; i++) {
4441
                seq_printf(m, "%s: %lu ", name, n[2*i+3]);
4442
                show_symbol(m, n[2*i+2]);
4443
                seq_putc(m, '\n');
4444
        }
4445
 
4446
        return 0;
4447
}
4448
 
4449
const struct seq_operations slabstats_op = {
4450
        .start = leaks_start,
4451
        .next = s_next,
4452
        .stop = s_stop,
4453
        .show = leaks_show,
4454
};
4455
#endif
4456
#endif
4457
 
4458
/**
4459
 * ksize - get the actual amount of memory allocated for a given object
4460
 * @objp: Pointer to the object
4461
 *
4462
 * kmalloc may internally round up allocations and return more memory
4463
 * than requested. ksize() can be used to determine the actual amount of
4464
 * memory allocated. The caller may use this additional memory, even though
4465
 * a smaller amount of memory was initially specified with the kmalloc call.
4466
 * The caller must guarantee that objp points to a valid object previously
4467
 * allocated with either kmalloc() or kmem_cache_alloc(). The object
4468
 * must not be freed during the duration of the call.
4469
 */
4470
size_t ksize(const void *objp)
4471
{
4472
        BUG_ON(!objp);
4473
        if (unlikely(objp == ZERO_SIZE_PTR))
4474
                return 0;
4475
 
4476
        return obj_size(virt_to_cache(objp));
4477
}
4478
EXPORT_SYMBOL(ksize);

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.