1 |
62 |
marcus.erl |
/*
|
2 |
|
|
* 2.5 block I/O model
|
3 |
|
|
*
|
4 |
|
|
* Copyright (C) 2001 Jens Axboe <axboe@suse.de>
|
5 |
|
|
*
|
6 |
|
|
* This program is free software; you can redistribute it and/or modify
|
7 |
|
|
* it under the terms of the GNU General Public License version 2 as
|
8 |
|
|
* published by the Free Software Foundation.
|
9 |
|
|
*
|
10 |
|
|
* This program is distributed in the hope that it will be useful,
|
11 |
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12 |
|
|
|
13 |
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14 |
|
|
* GNU General Public License for more details.
|
15 |
|
|
*
|
16 |
|
|
* You should have received a copy of the GNU General Public Licens
|
17 |
|
|
* along with this program; if not, write to the Free Software
|
18 |
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
|
19 |
|
|
*/
|
20 |
|
|
#ifndef __LINUX_BIO_H
|
21 |
|
|
#define __LINUX_BIO_H
|
22 |
|
|
|
23 |
|
|
#include <linux/highmem.h>
|
24 |
|
|
#include <linux/mempool.h>
|
25 |
|
|
#include <linux/ioprio.h>
|
26 |
|
|
|
27 |
|
|
#ifdef CONFIG_BLOCK
|
28 |
|
|
|
29 |
|
|
/* Platforms may set this to teach the BIO layer about IOMMU hardware. */
|
30 |
|
|
#include <asm/io.h>
|
31 |
|
|
|
32 |
|
|
#if defined(BIO_VMERGE_MAX_SIZE) && defined(BIO_VMERGE_BOUNDARY)
|
33 |
|
|
#define BIOVEC_VIRT_START_SIZE(x) (bvec_to_phys(x) & (BIO_VMERGE_BOUNDARY - 1))
|
34 |
|
|
#define BIOVEC_VIRT_OVERSIZE(x) ((x) > BIO_VMERGE_MAX_SIZE)
|
35 |
|
|
#else
|
36 |
|
|
#define BIOVEC_VIRT_START_SIZE(x) 0
|
37 |
|
|
#define BIOVEC_VIRT_OVERSIZE(x) 0
|
38 |
|
|
#endif
|
39 |
|
|
|
40 |
|
|
#ifndef BIO_VMERGE_BOUNDARY
|
41 |
|
|
#define BIO_VMERGE_BOUNDARY 0
|
42 |
|
|
#endif
|
43 |
|
|
|
44 |
|
|
#define BIO_DEBUG
|
45 |
|
|
|
46 |
|
|
#ifdef BIO_DEBUG
|
47 |
|
|
#define BIO_BUG_ON BUG_ON
|
48 |
|
|
#else
|
49 |
|
|
#define BIO_BUG_ON
|
50 |
|
|
#endif
|
51 |
|
|
|
52 |
|
|
#define BIO_MAX_PAGES 256
|
53 |
|
|
#define BIO_MAX_SIZE (BIO_MAX_PAGES << PAGE_CACHE_SHIFT)
|
54 |
|
|
#define BIO_MAX_SECTORS (BIO_MAX_SIZE >> 9)
|
55 |
|
|
|
56 |
|
|
/*
|
57 |
|
|
* was unsigned short, but we might as well be ready for > 64kB I/O pages
|
58 |
|
|
*/
|
59 |
|
|
struct bio_vec {
|
60 |
|
|
struct page *bv_page;
|
61 |
|
|
unsigned int bv_len;
|
62 |
|
|
unsigned int bv_offset;
|
63 |
|
|
};
|
64 |
|
|
|
65 |
|
|
struct bio_set;
|
66 |
|
|
struct bio;
|
67 |
|
|
typedef void (bio_end_io_t) (struct bio *, int);
|
68 |
|
|
typedef void (bio_destructor_t) (struct bio *);
|
69 |
|
|
|
70 |
|
|
/*
|
71 |
|
|
* main unit of I/O for the block layer and lower layers (ie drivers and
|
72 |
|
|
* stacking drivers)
|
73 |
|
|
*/
|
74 |
|
|
struct bio {
|
75 |
|
|
sector_t bi_sector; /* device address in 512 byte
|
76 |
|
|
sectors */
|
77 |
|
|
struct bio *bi_next; /* request queue link */
|
78 |
|
|
struct block_device *bi_bdev;
|
79 |
|
|
unsigned long bi_flags; /* status, command, etc */
|
80 |
|
|
unsigned long bi_rw; /* bottom bits READ/WRITE,
|
81 |
|
|
* top bits priority
|
82 |
|
|
*/
|
83 |
|
|
|
84 |
|
|
unsigned short bi_vcnt; /* how many bio_vec's */
|
85 |
|
|
unsigned short bi_idx; /* current index into bvl_vec */
|
86 |
|
|
|
87 |
|
|
/* Number of segments in this BIO after
|
88 |
|
|
* physical address coalescing is performed.
|
89 |
|
|
*/
|
90 |
|
|
unsigned short bi_phys_segments;
|
91 |
|
|
|
92 |
|
|
/* Number of segments after physical and DMA remapping
|
93 |
|
|
* hardware coalescing is performed.
|
94 |
|
|
*/
|
95 |
|
|
unsigned short bi_hw_segments;
|
96 |
|
|
|
97 |
|
|
unsigned int bi_size; /* residual I/O count */
|
98 |
|
|
|
99 |
|
|
/*
|
100 |
|
|
* To keep track of the max hw size, we account for the
|
101 |
|
|
* sizes of the first and last virtually mergeable segments
|
102 |
|
|
* in this bio
|
103 |
|
|
*/
|
104 |
|
|
unsigned int bi_hw_front_size;
|
105 |
|
|
unsigned int bi_hw_back_size;
|
106 |
|
|
|
107 |
|
|
unsigned int bi_max_vecs; /* max bvl_vecs we can hold */
|
108 |
|
|
|
109 |
|
|
struct bio_vec *bi_io_vec; /* the actual vec list */
|
110 |
|
|
|
111 |
|
|
bio_end_io_t *bi_end_io;
|
112 |
|
|
atomic_t bi_cnt; /* pin count */
|
113 |
|
|
|
114 |
|
|
void *bi_private;
|
115 |
|
|
|
116 |
|
|
bio_destructor_t *bi_destructor; /* destructor */
|
117 |
|
|
};
|
118 |
|
|
|
119 |
|
|
/*
|
120 |
|
|
* bio flags
|
121 |
|
|
*/
|
122 |
|
|
#define BIO_UPTODATE 0 /* ok after I/O completion */
|
123 |
|
|
#define BIO_RW_BLOCK 1 /* RW_AHEAD set, and read/write would block */
|
124 |
|
|
#define BIO_EOF 2 /* out-out-bounds error */
|
125 |
|
|
#define BIO_SEG_VALID 3 /* nr_hw_seg valid */
|
126 |
|
|
#define BIO_CLONED 4 /* doesn't own data */
|
127 |
|
|
#define BIO_BOUNCED 5 /* bio is a bounce bio */
|
128 |
|
|
#define BIO_USER_MAPPED 6 /* contains user pages */
|
129 |
|
|
#define BIO_EOPNOTSUPP 7 /* not supported */
|
130 |
|
|
#define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag)))
|
131 |
|
|
|
132 |
|
|
/*
|
133 |
|
|
* top 4 bits of bio flags indicate the pool this bio came from
|
134 |
|
|
*/
|
135 |
|
|
#define BIO_POOL_BITS (4)
|
136 |
|
|
#define BIO_POOL_OFFSET (BITS_PER_LONG - BIO_POOL_BITS)
|
137 |
|
|
#define BIO_POOL_MASK (1UL << BIO_POOL_OFFSET)
|
138 |
|
|
#define BIO_POOL_IDX(bio) ((bio)->bi_flags >> BIO_POOL_OFFSET)
|
139 |
|
|
|
140 |
|
|
/*
|
141 |
|
|
* bio bi_rw flags
|
142 |
|
|
*
|
143 |
|
|
* bit 0 -- read (not set) or write (set)
|
144 |
|
|
* bit 1 -- rw-ahead when set
|
145 |
|
|
* bit 2 -- barrier
|
146 |
|
|
* bit 3 -- fail fast, don't want low level driver retries
|
147 |
|
|
* bit 4 -- synchronous I/O hint: the block layer will unplug immediately
|
148 |
|
|
*/
|
149 |
|
|
#define BIO_RW 0
|
150 |
|
|
#define BIO_RW_AHEAD 1
|
151 |
|
|
#define BIO_RW_BARRIER 2
|
152 |
|
|
#define BIO_RW_FAILFAST 3
|
153 |
|
|
#define BIO_RW_SYNC 4
|
154 |
|
|
#define BIO_RW_META 5
|
155 |
|
|
|
156 |
|
|
/*
|
157 |
|
|
* upper 16 bits of bi_rw define the io priority of this bio
|
158 |
|
|
*/
|
159 |
|
|
#define BIO_PRIO_SHIFT (8 * sizeof(unsigned long) - IOPRIO_BITS)
|
160 |
|
|
#define bio_prio(bio) ((bio)->bi_rw >> BIO_PRIO_SHIFT)
|
161 |
|
|
#define bio_prio_valid(bio) ioprio_valid(bio_prio(bio))
|
162 |
|
|
|
163 |
|
|
#define bio_set_prio(bio, prio) do { \
|
164 |
|
|
WARN_ON(prio >= (1 << IOPRIO_BITS)); \
|
165 |
|
|
(bio)->bi_rw &= ((1UL << BIO_PRIO_SHIFT) - 1); \
|
166 |
|
|
(bio)->bi_rw |= ((unsigned long) (prio) << BIO_PRIO_SHIFT); \
|
167 |
|
|
} while (0)
|
168 |
|
|
|
169 |
|
|
/*
|
170 |
|
|
* various member access, note that bio_data should of course not be used
|
171 |
|
|
* on highmem page vectors
|
172 |
|
|
*/
|
173 |
|
|
#define bio_iovec_idx(bio, idx) (&((bio)->bi_io_vec[(idx)]))
|
174 |
|
|
#define bio_iovec(bio) bio_iovec_idx((bio), (bio)->bi_idx)
|
175 |
|
|
#define bio_page(bio) bio_iovec((bio))->bv_page
|
176 |
|
|
#define bio_offset(bio) bio_iovec((bio))->bv_offset
|
177 |
|
|
#define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx)
|
178 |
|
|
#define bio_sectors(bio) ((bio)->bi_size >> 9)
|
179 |
|
|
#define bio_barrier(bio) ((bio)->bi_rw & (1 << BIO_RW_BARRIER))
|
180 |
|
|
#define bio_sync(bio) ((bio)->bi_rw & (1 << BIO_RW_SYNC))
|
181 |
|
|
#define bio_failfast(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST))
|
182 |
|
|
#define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD))
|
183 |
|
|
#define bio_rw_meta(bio) ((bio)->bi_rw & (1 << BIO_RW_META))
|
184 |
|
|
#define bio_empty_barrier(bio) (bio_barrier(bio) && !(bio)->bi_size)
|
185 |
|
|
|
186 |
|
|
static inline unsigned int bio_cur_sectors(struct bio *bio)
|
187 |
|
|
{
|
188 |
|
|
if (bio->bi_vcnt)
|
189 |
|
|
return bio_iovec(bio)->bv_len >> 9;
|
190 |
|
|
|
191 |
|
|
return 0;
|
192 |
|
|
}
|
193 |
|
|
|
194 |
|
|
static inline void *bio_data(struct bio *bio)
|
195 |
|
|
{
|
196 |
|
|
if (bio->bi_vcnt)
|
197 |
|
|
return page_address(bio_page(bio)) + bio_offset(bio);
|
198 |
|
|
|
199 |
|
|
return NULL;
|
200 |
|
|
}
|
201 |
|
|
|
202 |
|
|
/*
|
203 |
|
|
* will die
|
204 |
|
|
*/
|
205 |
|
|
#define bio_to_phys(bio) (page_to_phys(bio_page((bio))) + (unsigned long) bio_offset((bio)))
|
206 |
|
|
#define bvec_to_phys(bv) (page_to_phys((bv)->bv_page) + (unsigned long) (bv)->bv_offset)
|
207 |
|
|
|
208 |
|
|
/*
|
209 |
|
|
* queues that have highmem support enabled may still need to revert to
|
210 |
|
|
* PIO transfers occasionally and thus map high pages temporarily. For
|
211 |
|
|
* permanent PIO fall back, user is probably better off disabling highmem
|
212 |
|
|
* I/O completely on that queue (see ide-dma for example)
|
213 |
|
|
*/
|
214 |
|
|
#define __bio_kmap_atomic(bio, idx, kmtype) \
|
215 |
|
|
(kmap_atomic(bio_iovec_idx((bio), (idx))->bv_page, kmtype) + \
|
216 |
|
|
bio_iovec_idx((bio), (idx))->bv_offset)
|
217 |
|
|
|
218 |
|
|
#define __bio_kunmap_atomic(addr, kmtype) kunmap_atomic(addr, kmtype)
|
219 |
|
|
|
220 |
|
|
/*
|
221 |
|
|
* merge helpers etc
|
222 |
|
|
*/
|
223 |
|
|
|
224 |
|
|
#define __BVEC_END(bio) bio_iovec_idx((bio), (bio)->bi_vcnt - 1)
|
225 |
|
|
#define __BVEC_START(bio) bio_iovec_idx((bio), (bio)->bi_idx)
|
226 |
|
|
|
227 |
|
|
/*
|
228 |
|
|
* allow arch override, for eg virtualized architectures (put in asm/io.h)
|
229 |
|
|
*/
|
230 |
|
|
#ifndef BIOVEC_PHYS_MERGEABLE
|
231 |
|
|
#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \
|
232 |
|
|
((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2)))
|
233 |
|
|
#endif
|
234 |
|
|
|
235 |
|
|
#define BIOVEC_VIRT_MERGEABLE(vec1, vec2) \
|
236 |
|
|
((((bvec_to_phys((vec1)) + (vec1)->bv_len) | bvec_to_phys((vec2))) & (BIO_VMERGE_BOUNDARY - 1)) == 0)
|
237 |
|
|
#define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \
|
238 |
|
|
(((addr1) | (mask)) == (((addr2) - 1) | (mask)))
|
239 |
|
|
#define BIOVEC_SEG_BOUNDARY(q, b1, b2) \
|
240 |
|
|
__BIO_SEG_BOUNDARY(bvec_to_phys((b1)), bvec_to_phys((b2)) + (b2)->bv_len, (q)->seg_boundary_mask)
|
241 |
|
|
#define BIO_SEG_BOUNDARY(q, b1, b2) \
|
242 |
|
|
BIOVEC_SEG_BOUNDARY((q), __BVEC_END((b1)), __BVEC_START((b2)))
|
243 |
|
|
|
244 |
|
|
#define bio_io_error(bio) bio_endio((bio), -EIO)
|
245 |
|
|
|
246 |
|
|
/*
|
247 |
|
|
* drivers should not use the __ version unless they _really_ want to
|
248 |
|
|
* run through the entire bio and not just pending pieces
|
249 |
|
|
*/
|
250 |
|
|
#define __bio_for_each_segment(bvl, bio, i, start_idx) \
|
251 |
|
|
for (bvl = bio_iovec_idx((bio), (start_idx)), i = (start_idx); \
|
252 |
|
|
i < (bio)->bi_vcnt; \
|
253 |
|
|
bvl++, i++)
|
254 |
|
|
|
255 |
|
|
#define bio_for_each_segment(bvl, bio, i) \
|
256 |
|
|
__bio_for_each_segment(bvl, bio, i, (bio)->bi_idx)
|
257 |
|
|
|
258 |
|
|
/*
|
259 |
|
|
* get a reference to a bio, so it won't disappear. the intended use is
|
260 |
|
|
* something like:
|
261 |
|
|
*
|
262 |
|
|
* bio_get(bio);
|
263 |
|
|
* submit_bio(rw, bio);
|
264 |
|
|
* if (bio->bi_flags ...)
|
265 |
|
|
* do_something
|
266 |
|
|
* bio_put(bio);
|
267 |
|
|
*
|
268 |
|
|
* without the bio_get(), it could potentially complete I/O before submit_bio
|
269 |
|
|
* returns. and then bio would be freed memory when if (bio->bi_flags ...)
|
270 |
|
|
* runs
|
271 |
|
|
*/
|
272 |
|
|
#define bio_get(bio) atomic_inc(&(bio)->bi_cnt)
|
273 |
|
|
|
274 |
|
|
|
275 |
|
|
/*
|
276 |
|
|
* A bio_pair is used when we need to split a bio.
|
277 |
|
|
* This can only happen for a bio that refers to just one
|
278 |
|
|
* page of data, and in the unusual situation when the
|
279 |
|
|
* page crosses a chunk/device boundary
|
280 |
|
|
*
|
281 |
|
|
* The address of the master bio is stored in bio1.bi_private
|
282 |
|
|
* The address of the pool the pair was allocated from is stored
|
283 |
|
|
* in bio2.bi_private
|
284 |
|
|
*/
|
285 |
|
|
struct bio_pair {
|
286 |
|
|
struct bio bio1, bio2;
|
287 |
|
|
struct bio_vec bv1, bv2;
|
288 |
|
|
atomic_t cnt;
|
289 |
|
|
int error;
|
290 |
|
|
};
|
291 |
|
|
extern struct bio_pair *bio_split(struct bio *bi, mempool_t *pool,
|
292 |
|
|
int first_sectors);
|
293 |
|
|
extern mempool_t *bio_split_pool;
|
294 |
|
|
extern void bio_pair_release(struct bio_pair *dbio);
|
295 |
|
|
|
296 |
|
|
extern struct bio_set *bioset_create(int, int);
|
297 |
|
|
extern void bioset_free(struct bio_set *);
|
298 |
|
|
|
299 |
|
|
extern struct bio *bio_alloc(gfp_t, int);
|
300 |
|
|
extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *);
|
301 |
|
|
extern void bio_put(struct bio *);
|
302 |
|
|
extern void bio_free(struct bio *, struct bio_set *);
|
303 |
|
|
|
304 |
|
|
extern void bio_endio(struct bio *, int);
|
305 |
|
|
struct request_queue;
|
306 |
|
|
extern int bio_phys_segments(struct request_queue *, struct bio *);
|
307 |
|
|
extern int bio_hw_segments(struct request_queue *, struct bio *);
|
308 |
|
|
|
309 |
|
|
extern void __bio_clone(struct bio *, struct bio *);
|
310 |
|
|
extern struct bio *bio_clone(struct bio *, gfp_t);
|
311 |
|
|
|
312 |
|
|
extern void bio_init(struct bio *);
|
313 |
|
|
|
314 |
|
|
extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int);
|
315 |
|
|
extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
|
316 |
|
|
unsigned int, unsigned int);
|
317 |
|
|
extern int bio_get_nr_vecs(struct block_device *);
|
318 |
|
|
extern struct bio *bio_map_user(struct request_queue *, struct block_device *,
|
319 |
|
|
unsigned long, unsigned int, int);
|
320 |
|
|
struct sg_iovec;
|
321 |
|
|
extern struct bio *bio_map_user_iov(struct request_queue *,
|
322 |
|
|
struct block_device *,
|
323 |
|
|
struct sg_iovec *, int, int);
|
324 |
|
|
extern void bio_unmap_user(struct bio *);
|
325 |
|
|
extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int,
|
326 |
|
|
gfp_t);
|
327 |
|
|
extern void bio_set_pages_dirty(struct bio *bio);
|
328 |
|
|
extern void bio_check_pages_dirty(struct bio *bio);
|
329 |
|
|
extern void bio_release_pages(struct bio *bio);
|
330 |
|
|
extern struct bio *bio_copy_user(struct request_queue *, unsigned long, unsigned int, int);
|
331 |
|
|
extern int bio_uncopy_user(struct bio *);
|
332 |
|
|
void zero_fill_bio(struct bio *bio);
|
333 |
|
|
|
334 |
|
|
#ifdef CONFIG_HIGHMEM
|
335 |
|
|
/*
|
336 |
|
|
* remember to add offset! and never ever reenable interrupts between a
|
337 |
|
|
* bvec_kmap_irq and bvec_kunmap_irq!!
|
338 |
|
|
*
|
339 |
|
|
* This function MUST be inlined - it plays with the CPU interrupt flags.
|
340 |
|
|
*/
|
341 |
|
|
static inline char *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags)
|
342 |
|
|
{
|
343 |
|
|
unsigned long addr;
|
344 |
|
|
|
345 |
|
|
/*
|
346 |
|
|
* might not be a highmem page, but the preempt/irq count
|
347 |
|
|
* balancing is a lot nicer this way
|
348 |
|
|
*/
|
349 |
|
|
local_irq_save(*flags);
|
350 |
|
|
addr = (unsigned long) kmap_atomic(bvec->bv_page, KM_BIO_SRC_IRQ);
|
351 |
|
|
|
352 |
|
|
BUG_ON(addr & ~PAGE_MASK);
|
353 |
|
|
|
354 |
|
|
return (char *) addr + bvec->bv_offset;
|
355 |
|
|
}
|
356 |
|
|
|
357 |
|
|
static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags)
|
358 |
|
|
{
|
359 |
|
|
unsigned long ptr = (unsigned long) buffer & PAGE_MASK;
|
360 |
|
|
|
361 |
|
|
kunmap_atomic((void *) ptr, KM_BIO_SRC_IRQ);
|
362 |
|
|
local_irq_restore(*flags);
|
363 |
|
|
}
|
364 |
|
|
|
365 |
|
|
#else
|
366 |
|
|
#define bvec_kmap_irq(bvec, flags) (page_address((bvec)->bv_page) + (bvec)->bv_offset)
|
367 |
|
|
#define bvec_kunmap_irq(buf, flags) do { *(flags) = 0; } while (0)
|
368 |
|
|
#endif
|
369 |
|
|
|
370 |
|
|
static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx,
|
371 |
|
|
unsigned long *flags)
|
372 |
|
|
{
|
373 |
|
|
return bvec_kmap_irq(bio_iovec_idx(bio, idx), flags);
|
374 |
|
|
}
|
375 |
|
|
#define __bio_kunmap_irq(buf, flags) bvec_kunmap_irq(buf, flags)
|
376 |
|
|
|
377 |
|
|
#define bio_kmap_irq(bio, flags) \
|
378 |
|
|
__bio_kmap_irq((bio), (bio)->bi_idx, (flags))
|
379 |
|
|
#define bio_kunmap_irq(buf,flags) __bio_kunmap_irq(buf, flags)
|
380 |
|
|
|
381 |
|
|
#endif /* CONFIG_BLOCK */
|
382 |
|
|
#endif /* __LINUX_BIO_H */
|