1 |
62 |
marcus.erl |
/*
|
2 |
|
|
md_k.h : kernel internal structure of the Linux MD driver
|
3 |
|
|
Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman
|
4 |
|
|
|
5 |
|
|
This program is free software; you can redistribute it and/or modify
|
6 |
|
|
it under the terms of the GNU General Public License as published by
|
7 |
|
|
the Free Software Foundation; either version 2, or (at your option)
|
8 |
|
|
any later version.
|
9 |
|
|
|
10 |
|
|
You should have received a copy of the GNU General Public License
|
11 |
|
|
(for example /usr/src/linux/COPYING); if not, write to the Free
|
12 |
|
|
Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
13 |
|
|
*/
|
14 |
|
|
|
15 |
|
|
#ifndef _MD_K_H
|
16 |
|
|
#define _MD_K_H
|
17 |
|
|
|
18 |
|
|
/* and dm-bio-list.h is not under include/linux because.... ??? */
|
19 |
|
|
#include "../../../drivers/md/dm-bio-list.h"
|
20 |
|
|
|
21 |
|
|
#ifdef CONFIG_BLOCK
|
22 |
|
|
|
23 |
|
|
#define LEVEL_MULTIPATH (-4)
|
24 |
|
|
#define LEVEL_LINEAR (-1)
|
25 |
|
|
#define LEVEL_FAULTY (-5)
|
26 |
|
|
|
27 |
|
|
/* we need a value for 'no level specified' and 0
|
28 |
|
|
* means 'raid0', so we need something else. This is
|
29 |
|
|
* for internal use only
|
30 |
|
|
*/
|
31 |
|
|
#define LEVEL_NONE (-1000000)
|
32 |
|
|
|
33 |
|
|
#define MaxSector (~(sector_t)0)
|
34 |
|
|
|
35 |
|
|
typedef struct mddev_s mddev_t;
|
36 |
|
|
typedef struct mdk_rdev_s mdk_rdev_t;
|
37 |
|
|
|
38 |
|
|
/*
|
39 |
|
|
* options passed in raidrun:
|
40 |
|
|
*/
|
41 |
|
|
|
42 |
|
|
/* Currently this must fit in an 'int' */
|
43 |
|
|
#define MAX_CHUNK_SIZE (1<<30)
|
44 |
|
|
|
45 |
|
|
/*
|
46 |
|
|
* MD's 'extended' device
|
47 |
|
|
*/
|
48 |
|
|
struct mdk_rdev_s
|
49 |
|
|
{
|
50 |
|
|
struct list_head same_set; /* RAID devices within the same set */
|
51 |
|
|
|
52 |
|
|
sector_t size; /* Device size (in blocks) */
|
53 |
|
|
mddev_t *mddev; /* RAID array if running */
|
54 |
|
|
long last_events; /* IO event timestamp */
|
55 |
|
|
|
56 |
|
|
struct block_device *bdev; /* block device handle */
|
57 |
|
|
|
58 |
|
|
struct page *sb_page;
|
59 |
|
|
int sb_loaded;
|
60 |
|
|
__u64 sb_events;
|
61 |
|
|
sector_t data_offset; /* start of data in array */
|
62 |
|
|
sector_t sb_offset;
|
63 |
|
|
int sb_size; /* bytes in the superblock */
|
64 |
|
|
int preferred_minor; /* autorun support */
|
65 |
|
|
|
66 |
|
|
struct kobject kobj;
|
67 |
|
|
|
68 |
|
|
/* A device can be in one of three states based on two flags:
|
69 |
|
|
* Not working: faulty==1 in_sync==0
|
70 |
|
|
* Fully working: faulty==0 in_sync==1
|
71 |
|
|
* Working, but not
|
72 |
|
|
* in sync with array
|
73 |
|
|
* faulty==0 in_sync==0
|
74 |
|
|
*
|
75 |
|
|
* It can never have faulty==1, in_sync==1
|
76 |
|
|
* This reduces the burden of testing multiple flags in many cases
|
77 |
|
|
*/
|
78 |
|
|
|
79 |
|
|
unsigned long flags;
|
80 |
|
|
#define Faulty 1 /* device is known to have a fault */
|
81 |
|
|
#define In_sync 2 /* device is in_sync with rest of array */
|
82 |
|
|
#define WriteMostly 4 /* Avoid reading if at all possible */
|
83 |
|
|
#define BarriersNotsupp 5 /* BIO_RW_BARRIER is not supported */
|
84 |
|
|
|
85 |
|
|
int desc_nr; /* descriptor index in the superblock */
|
86 |
|
|
int raid_disk; /* role of device in array */
|
87 |
|
|
int saved_raid_disk; /* role that device used to have in the
|
88 |
|
|
* array and could again if we did a partial
|
89 |
|
|
* resync from the bitmap
|
90 |
|
|
*/
|
91 |
|
|
sector_t recovery_offset;/* If this device has been partially
|
92 |
|
|
* recovered, this is where we were
|
93 |
|
|
* up to.
|
94 |
|
|
*/
|
95 |
|
|
|
96 |
|
|
atomic_t nr_pending; /* number of pending requests.
|
97 |
|
|
* only maintained for arrays that
|
98 |
|
|
* support hot removal
|
99 |
|
|
*/
|
100 |
|
|
atomic_t read_errors; /* number of consecutive read errors that
|
101 |
|
|
* we have tried to ignore.
|
102 |
|
|
*/
|
103 |
|
|
atomic_t corrected_errors; /* number of corrected read errors,
|
104 |
|
|
* for reporting to userspace and storing
|
105 |
|
|
* in superblock.
|
106 |
|
|
*/
|
107 |
|
|
struct work_struct del_work; /* used for delayed sysfs removal */
|
108 |
|
|
};
|
109 |
|
|
|
110 |
|
|
struct mddev_s
|
111 |
|
|
{
|
112 |
|
|
void *private;
|
113 |
|
|
struct mdk_personality *pers;
|
114 |
|
|
dev_t unit;
|
115 |
|
|
int md_minor;
|
116 |
|
|
struct list_head disks;
|
117 |
|
|
unsigned long flags;
|
118 |
|
|
#define MD_CHANGE_DEVS 0 /* Some device status has changed */
|
119 |
|
|
#define MD_CHANGE_CLEAN 1 /* transition to or from 'clean' */
|
120 |
|
|
#define MD_CHANGE_PENDING 2 /* superblock update in progress */
|
121 |
|
|
|
122 |
|
|
int ro;
|
123 |
|
|
|
124 |
|
|
struct gendisk *gendisk;
|
125 |
|
|
|
126 |
|
|
struct kobject kobj;
|
127 |
|
|
|
128 |
|
|
/* Superblock information */
|
129 |
|
|
int major_version,
|
130 |
|
|
minor_version,
|
131 |
|
|
patch_version;
|
132 |
|
|
int persistent;
|
133 |
|
|
int chunk_size;
|
134 |
|
|
time_t ctime, utime;
|
135 |
|
|
int level, layout;
|
136 |
|
|
char clevel[16];
|
137 |
|
|
int raid_disks;
|
138 |
|
|
int max_disks;
|
139 |
|
|
sector_t size; /* used size of component devices */
|
140 |
|
|
sector_t array_size; /* exported array size */
|
141 |
|
|
__u64 events;
|
142 |
|
|
|
143 |
|
|
char uuid[16];
|
144 |
|
|
|
145 |
|
|
/* If the array is being reshaped, we need to record the
|
146 |
|
|
* new shape and an indication of where we are up to.
|
147 |
|
|
* This is written to the superblock.
|
148 |
|
|
* If reshape_position is MaxSector, then no reshape is happening (yet).
|
149 |
|
|
*/
|
150 |
|
|
sector_t reshape_position;
|
151 |
|
|
int delta_disks, new_level, new_layout, new_chunk;
|
152 |
|
|
|
153 |
|
|
struct mdk_thread_s *thread; /* management thread */
|
154 |
|
|
struct mdk_thread_s *sync_thread; /* doing resync or reconstruct */
|
155 |
|
|
sector_t curr_resync; /* last block scheduled */
|
156 |
|
|
unsigned long resync_mark; /* a recent timestamp */
|
157 |
|
|
sector_t resync_mark_cnt;/* blocks written at resync_mark */
|
158 |
|
|
sector_t curr_mark_cnt; /* blocks scheduled now */
|
159 |
|
|
|
160 |
|
|
sector_t resync_max_sectors; /* may be set by personality */
|
161 |
|
|
|
162 |
|
|
sector_t resync_mismatches; /* count of sectors where
|
163 |
|
|
* parity/replica mismatch found
|
164 |
|
|
*/
|
165 |
|
|
|
166 |
|
|
/* allow user-space to request suspension of IO to regions of the array */
|
167 |
|
|
sector_t suspend_lo;
|
168 |
|
|
sector_t suspend_hi;
|
169 |
|
|
/* if zero, use the system-wide default */
|
170 |
|
|
int sync_speed_min;
|
171 |
|
|
int sync_speed_max;
|
172 |
|
|
|
173 |
|
|
int ok_start_degraded;
|
174 |
|
|
/* recovery/resync flags
|
175 |
|
|
* NEEDED: we might need to start a resync/recover
|
176 |
|
|
* RUNNING: a thread is running, or about to be started
|
177 |
|
|
* SYNC: actually doing a resync, not a recovery
|
178 |
|
|
* ERR: and IO error was detected - abort the resync/recovery
|
179 |
|
|
* INTR: someone requested a (clean) early abort.
|
180 |
|
|
* DONE: thread is done and is waiting to be reaped
|
181 |
|
|
* REQUEST: user-space has requested a sync (used with SYNC)
|
182 |
|
|
* CHECK: user-space request for for check-only, no repair
|
183 |
|
|
* RESHAPE: A reshape is happening
|
184 |
|
|
*
|
185 |
|
|
* If neither SYNC or RESHAPE are set, then it is a recovery.
|
186 |
|
|
*/
|
187 |
|
|
#define MD_RECOVERY_RUNNING 0
|
188 |
|
|
#define MD_RECOVERY_SYNC 1
|
189 |
|
|
#define MD_RECOVERY_ERR 2
|
190 |
|
|
#define MD_RECOVERY_INTR 3
|
191 |
|
|
#define MD_RECOVERY_DONE 4
|
192 |
|
|
#define MD_RECOVERY_NEEDED 5
|
193 |
|
|
#define MD_RECOVERY_REQUESTED 6
|
194 |
|
|
#define MD_RECOVERY_CHECK 7
|
195 |
|
|
#define MD_RECOVERY_RESHAPE 8
|
196 |
|
|
#define MD_RECOVERY_FROZEN 9
|
197 |
|
|
|
198 |
|
|
unsigned long recovery;
|
199 |
|
|
|
200 |
|
|
int in_sync; /* know to not need resync */
|
201 |
|
|
struct mutex reconfig_mutex;
|
202 |
|
|
atomic_t active;
|
203 |
|
|
|
204 |
|
|
int changed; /* true if we might need to reread partition info */
|
205 |
|
|
int degraded; /* whether md should consider
|
206 |
|
|
* adding a spare
|
207 |
|
|
*/
|
208 |
|
|
int barriers_work; /* initialised to true, cleared as soon
|
209 |
|
|
* as a barrier request to slave
|
210 |
|
|
* fails. Only supported
|
211 |
|
|
*/
|
212 |
|
|
struct bio *biolist; /* bios that need to be retried
|
213 |
|
|
* because BIO_RW_BARRIER is not supported
|
214 |
|
|
*/
|
215 |
|
|
|
216 |
|
|
atomic_t recovery_active; /* blocks scheduled, but not written */
|
217 |
|
|
wait_queue_head_t recovery_wait;
|
218 |
|
|
sector_t recovery_cp;
|
219 |
|
|
|
220 |
|
|
spinlock_t write_lock;
|
221 |
|
|
wait_queue_head_t sb_wait; /* for waiting on superblock updates */
|
222 |
|
|
atomic_t pending_writes; /* number of active superblock writes */
|
223 |
|
|
|
224 |
|
|
unsigned int safemode; /* if set, update "clean" superblock
|
225 |
|
|
* when no writes pending.
|
226 |
|
|
*/
|
227 |
|
|
unsigned int safemode_delay;
|
228 |
|
|
struct timer_list safemode_timer;
|
229 |
|
|
atomic_t writes_pending;
|
230 |
|
|
struct request_queue *queue; /* for plugging ... */
|
231 |
|
|
|
232 |
|
|
atomic_t write_behind; /* outstanding async IO */
|
233 |
|
|
unsigned int max_write_behind; /* 0 = sync */
|
234 |
|
|
|
235 |
|
|
struct bitmap *bitmap; /* the bitmap for the device */
|
236 |
|
|
struct file *bitmap_file; /* the bitmap file */
|
237 |
|
|
long bitmap_offset; /* offset from superblock of
|
238 |
|
|
* start of bitmap. May be
|
239 |
|
|
* negative, but not '0'
|
240 |
|
|
*/
|
241 |
|
|
long default_bitmap_offset; /* this is the offset to use when
|
242 |
|
|
* hot-adding a bitmap. It should
|
243 |
|
|
* eventually be settable by sysfs.
|
244 |
|
|
*/
|
245 |
|
|
|
246 |
|
|
struct list_head all_mddevs;
|
247 |
|
|
};
|
248 |
|
|
|
249 |
|
|
|
250 |
|
|
static inline void rdev_dec_pending(mdk_rdev_t *rdev, mddev_t *mddev)
|
251 |
|
|
{
|
252 |
|
|
int faulty = test_bit(Faulty, &rdev->flags);
|
253 |
|
|
if (atomic_dec_and_test(&rdev->nr_pending) && faulty)
|
254 |
|
|
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
255 |
|
|
}
|
256 |
|
|
|
257 |
|
|
static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors)
|
258 |
|
|
{
|
259 |
|
|
atomic_add(nr_sectors, &bdev->bd_contains->bd_disk->sync_io);
|
260 |
|
|
}
|
261 |
|
|
|
262 |
|
|
struct mdk_personality
|
263 |
|
|
{
|
264 |
|
|
char *name;
|
265 |
|
|
int level;
|
266 |
|
|
struct list_head list;
|
267 |
|
|
struct module *owner;
|
268 |
|
|
int (*make_request)(struct request_queue *q, struct bio *bio);
|
269 |
|
|
int (*run)(mddev_t *mddev);
|
270 |
|
|
int (*stop)(mddev_t *mddev);
|
271 |
|
|
void (*status)(struct seq_file *seq, mddev_t *mddev);
|
272 |
|
|
/* error_handler must set ->faulty and clear ->in_sync
|
273 |
|
|
* if appropriate, and should abort recovery if needed
|
274 |
|
|
*/
|
275 |
|
|
void (*error_handler)(mddev_t *mddev, mdk_rdev_t *rdev);
|
276 |
|
|
int (*hot_add_disk) (mddev_t *mddev, mdk_rdev_t *rdev);
|
277 |
|
|
int (*hot_remove_disk) (mddev_t *mddev, int number);
|
278 |
|
|
int (*spare_active) (mddev_t *mddev);
|
279 |
|
|
sector_t (*sync_request)(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster);
|
280 |
|
|
int (*resize) (mddev_t *mddev, sector_t sectors);
|
281 |
|
|
int (*check_reshape) (mddev_t *mddev);
|
282 |
|
|
int (*start_reshape) (mddev_t *mddev);
|
283 |
|
|
int (*reconfig) (mddev_t *mddev, int layout, int chunk_size);
|
284 |
|
|
/* quiesce moves between quiescence states
|
285 |
|
|
* 0 - fully active
|
286 |
|
|
* 1 - no new requests allowed
|
287 |
|
|
* others - reserved
|
288 |
|
|
*/
|
289 |
|
|
void (*quiesce) (mddev_t *mddev, int state);
|
290 |
|
|
};
|
291 |
|
|
|
292 |
|
|
|
293 |
|
|
struct md_sysfs_entry {
|
294 |
|
|
struct attribute attr;
|
295 |
|
|
ssize_t (*show)(mddev_t *, char *);
|
296 |
|
|
ssize_t (*store)(mddev_t *, const char *, size_t);
|
297 |
|
|
};
|
298 |
|
|
|
299 |
|
|
|
300 |
|
|
static inline char * mdname (mddev_t * mddev)
|
301 |
|
|
{
|
302 |
|
|
return mddev->gendisk ? mddev->gendisk->disk_name : "mdX";
|
303 |
|
|
}
|
304 |
|
|
|
305 |
|
|
/*
|
306 |
|
|
* iterates through some rdev ringlist. It's safe to remove the
|
307 |
|
|
* current 'rdev'. Dont touch 'tmp' though.
|
308 |
|
|
*/
|
309 |
|
|
#define ITERATE_RDEV_GENERIC(head,rdev,tmp) \
|
310 |
|
|
\
|
311 |
|
|
for ((tmp) = (head).next; \
|
312 |
|
|
(rdev) = (list_entry((tmp), mdk_rdev_t, same_set)), \
|
313 |
|
|
(tmp) = (tmp)->next, (tmp)->prev != &(head) \
|
314 |
|
|
; )
|
315 |
|
|
/*
|
316 |
|
|
* iterates through the 'same array disks' ringlist
|
317 |
|
|
*/
|
318 |
|
|
#define ITERATE_RDEV(mddev,rdev,tmp) \
|
319 |
|
|
ITERATE_RDEV_GENERIC((mddev)->disks,rdev,tmp)
|
320 |
|
|
|
321 |
|
|
/*
|
322 |
|
|
* Iterates through 'pending RAID disks'
|
323 |
|
|
*/
|
324 |
|
|
#define ITERATE_RDEV_PENDING(rdev,tmp) \
|
325 |
|
|
ITERATE_RDEV_GENERIC(pending_raid_disks,rdev,tmp)
|
326 |
|
|
|
327 |
|
|
typedef struct mdk_thread_s {
|
328 |
|
|
void (*run) (mddev_t *mddev);
|
329 |
|
|
mddev_t *mddev;
|
330 |
|
|
wait_queue_head_t wqueue;
|
331 |
|
|
unsigned long flags;
|
332 |
|
|
struct task_struct *tsk;
|
333 |
|
|
unsigned long timeout;
|
334 |
|
|
} mdk_thread_t;
|
335 |
|
|
|
336 |
|
|
#define THREAD_WAKEUP 0
|
337 |
|
|
|
338 |
|
|
#define __wait_event_lock_irq(wq, condition, lock, cmd) \
|
339 |
|
|
do { \
|
340 |
|
|
wait_queue_t __wait; \
|
341 |
|
|
init_waitqueue_entry(&__wait, current); \
|
342 |
|
|
\
|
343 |
|
|
add_wait_queue(&wq, &__wait); \
|
344 |
|
|
for (;;) { \
|
345 |
|
|
set_current_state(TASK_UNINTERRUPTIBLE); \
|
346 |
|
|
if (condition) \
|
347 |
|
|
break; \
|
348 |
|
|
spin_unlock_irq(&lock); \
|
349 |
|
|
cmd; \
|
350 |
|
|
schedule(); \
|
351 |
|
|
spin_lock_irq(&lock); \
|
352 |
|
|
} \
|
353 |
|
|
current->state = TASK_RUNNING; \
|
354 |
|
|
remove_wait_queue(&wq, &__wait); \
|
355 |
|
|
} while (0)
|
356 |
|
|
|
357 |
|
|
#define wait_event_lock_irq(wq, condition, lock, cmd) \
|
358 |
|
|
do { \
|
359 |
|
|
if (condition) \
|
360 |
|
|
break; \
|
361 |
|
|
__wait_event_lock_irq(wq, condition, lock, cmd); \
|
362 |
|
|
} while (0)
|
363 |
|
|
|
364 |
|
|
static inline void safe_put_page(struct page *p)
|
365 |
|
|
{
|
366 |
|
|
if (p) put_page(p);
|
367 |
|
|
}
|
368 |
|
|
|
369 |
|
|
#endif /* CONFIG_BLOCK */
|
370 |
|
|
#endif
|
371 |
|
|
|