OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [linux/] [linux-2.4/] [drivers/] [block/] [nbd.c] - Blame information for rev 1765

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1275 phoenix
/*
2
 * Network block device - make block devices work over TCP
3
 *
4
 * Note that you can not swap over this thing, yet. Seems to work but
5
 * deadlocks sometimes - you can not swap over TCP in general.
6
 *
7
 * Copyright 1997-2000 Pavel Machek <pavel@ucw.cz>
8
 * Parts copyright 2001 Steven Whitehouse <steve@chygwyn.com>
9
 *
10
 * (part of code stolen from loop.c)
11
 *
12
 * 97-3-25 compiled 0-th version, not yet tested it
13
 *   (it did not work, BTW) (later that day) HEY! it works!
14
 *   (bit later) hmm, not that much... 2:00am next day:
15
 *   yes, it works, but it gives something like 50kB/sec
16
 * 97-4-01 complete rewrite to make it possible for many requests at
17
 *   once to be processed
18
 * 97-4-11 Making protocol independent of endianity etc.
19
 * 97-9-13 Cosmetic changes
20
 * 98-5-13 Attempt to make 64-bit-clean on 64-bit machines
21
 * 99-1-11 Attempt to make 64-bit-clean on 32-bit machines <ankry@mif.pg.gda.pl>
22
 * 01-2-27 Fix to store proper blockcount for kernel (calculated using
23
 *   BLOCK_SIZE_BITS, not device blocksize) <aga@permonline.ru>
24
 * 01-3-11 Make nbd work with new Linux block layer code. It now supports
25
 *   plugging like all the other block devices. Also added in MSG_MORE to
26
 *   reduce number of partial TCP segments sent. <steve@chygwyn.com>
27
 * 01-12-6 Fix deadlock condition by making queue locks independant of
28
 *   the transmit lock. <steve@chygwyn.com>
29
 * 02-10-11 Allow hung xmit to be aborted via SIGKILL & various fixes.
30
 *   <Paul.Clements@SteelEye.com> <James.Bottomley@SteelEye.com>
31
 *
32
 * possible FIXME: make set_sock / set_blksize / set_size / do_it one syscall
33
 * why not: would need verify_area and friends, would share yet another
34
 *          structure with userland
35
 */
36
 
37
#define PARANOIA
38
#include <linux/major.h>
39
 
40
#include <linux/module.h>
41
#include <linux/init.h>
42
#include <linux/sched.h>
43
#include <linux/fs.h>
44
#include <linux/stat.h>
45
#include <linux/errno.h>
46
#include <linux/file.h>
47
#include <linux/ioctl.h>
48
#include <net/sock.h>
49
 
50
#include <linux/devfs_fs_kernel.h>
51
 
52
#include <asm/uaccess.h>
53
#include <asm/types.h>
54
 
55
#define MAJOR_NR NBD_MAJOR
56
#include <linux/nbd.h>
57
 
58
#define LO_MAGIC 0x68797548
59
 
60
static int nbd_blksizes[MAX_NBD];
61
static int nbd_blksize_bits[MAX_NBD];
62
static int nbd_sizes[MAX_NBD];
63
static u64 nbd_bytesizes[MAX_NBD];
64
 
65
static struct nbd_device nbd_dev[MAX_NBD];
66
static devfs_handle_t devfs_handle;
67
 
68
#define DEBUG( s )
69
/* #define DEBUG( s ) printk( s )
70
 */
71
 
72
#ifdef PARANOIA
73
static int requests_in;
74
static int requests_out;
75
#endif
76
 
77
static int nbd_open(struct inode *inode, struct file *file)
78
{
79
        int dev;
80
 
81
        if (!inode)
82
                return -EINVAL;
83
        dev = MINOR(inode->i_rdev);
84
        if (dev >= MAX_NBD)
85
                return -ENODEV;
86
 
87
        nbd_dev[dev].refcnt++;
88
        return 0;
89
}
90
 
91
/*
92
 *  Send or receive packet.
93
 */
94
static int nbd_xmit(int send, struct socket *sock, char *buf, int size, int msg_flags)
95
{
96
        mm_segment_t oldfs;
97
        int result;
98
        struct msghdr msg;
99
        struct iovec iov;
100
        unsigned long flags;
101
        sigset_t oldset;
102
 
103
        oldfs = get_fs();
104
        set_fs(get_ds());
105
 
106
        /* Allow interception of SIGKILL only
107
         * Don't allow other signals to interrupt the transmission */
108
        spin_lock_irqsave(&current->sigmask_lock, flags);
109
        oldset = current->blocked;
110
        sigfillset(&current->blocked);
111
        sigdelsetmask(&current->blocked, sigmask(SIGKILL));
112
        recalc_sigpending(current);
113
        spin_unlock_irqrestore(&current->sigmask_lock, flags);
114
 
115
 
116
        do {
117
                sock->sk->allocation = GFP_NOIO;
118
                iov.iov_base = buf;
119
                iov.iov_len = size;
120
                msg.msg_name = NULL;
121
                msg.msg_namelen = 0;
122
                msg.msg_iov = &iov;
123
                msg.msg_iovlen = 1;
124
                msg.msg_control = NULL;
125
                msg.msg_controllen = 0;
126
                msg.msg_namelen = 0;
127
                msg.msg_flags = msg_flags | MSG_NOSIGNAL;
128
 
129
                if (send)
130
                        result = sock_sendmsg(sock, &msg, size);
131
                else
132
                        result = sock_recvmsg(sock, &msg, size, 0);
133
 
134
                if (signal_pending(current)) {
135
                        siginfo_t info;
136
                        spin_lock_irqsave(&current->sigmask_lock, flags);
137
                        printk(KERN_WARNING "NBD (pid %d: %s) got signal %d\n",
138
                                current->pid, current->comm,
139
                                dequeue_signal(&current->blocked, &info));
140
                        spin_unlock_irqrestore(&current->sigmask_lock, flags);
141
                        result = -EINTR;
142
                        break;
143
                }
144
 
145
                if (result <= 0) {
146
#ifdef PARANOIA
147
                        printk(KERN_ERR "NBD: %s - sock=%ld at buf=%ld, size=%d returned %d.\n",
148
                               send ? "send" : "receive", (long) sock, (long) buf, size, result);
149
#endif
150
                        break;
151
                }
152
                size -= result;
153
                buf += result;
154
        } while (size > 0);
155
 
156
        spin_lock_irqsave(&current->sigmask_lock, flags);
157
        current->blocked = oldset;
158
        recalc_sigpending(current);
159
        spin_unlock_irqrestore(&current->sigmask_lock, flags);
160
 
161
        set_fs(oldfs);
162
        return result;
163
}
164
 
165
#define FAIL( s ) { printk( KERN_ERR "NBD: " s "(result %d)\n", result ); goto error_out; }
166
 
167
void nbd_send_req(struct nbd_device *lo, struct request *req)
168
{
169
        int result = -1;
170
        struct nbd_request request;
171
        unsigned long size = req->nr_sectors << 9;
172
        struct socket *sock = lo->sock;
173
 
174
        DEBUG("NBD: sending control, ");
175
        request.magic = htonl(NBD_REQUEST_MAGIC);
176
        request.type = htonl(req->cmd);
177
        request.from = cpu_to_be64( (u64) req->sector << 9);
178
        request.len = htonl(size);
179
        memcpy(request.handle, &req, sizeof(req));
180
 
181
        down(&lo->tx_lock);
182
 
183
        if (!sock || !lo->sock) {
184
                FAIL("Attempted sendmsg to closed socket\n");
185
        }
186
 
187
        result = nbd_xmit(1, sock, (char *) &request, sizeof(request), req->cmd == WRITE ? MSG_MORE : 0);
188
        if (result <= 0)
189
                FAIL("Sendmsg failed for control.");
190
 
191
        if (req->cmd == WRITE) {
192
                struct buffer_head *bh = req->bh;
193
                DEBUG("data, ");
194
                do {
195
                        result = nbd_xmit(1, sock, bh->b_data, bh->b_size, bh->b_reqnext == NULL ? 0 : MSG_MORE);
196
                        if (result <= 0)
197
                                FAIL("Send data failed.");
198
                        bh = bh->b_reqnext;
199
                } while(bh);
200
        }
201
        up(&lo->tx_lock);
202
        return;
203
 
204
error_out:
205
        up(&lo->tx_lock);
206
        req->errors++;
207
}
208
 
209
static struct request *nbd_find_request(struct nbd_device *lo, char *handle)
210
{
211
        struct request *req;
212
        struct list_head *tmp;
213
        struct request *xreq;
214
 
215
        memcpy(&xreq, handle, sizeof(xreq));
216
 
217
        spin_lock(&lo->queue_lock);
218
        list_for_each(tmp, &lo->queue_head) {
219
                req = list_entry(tmp, struct request, queue);
220
                if (req != xreq)
221
                        continue;
222
                list_del(&req->queue);
223
                spin_unlock(&lo->queue_lock);
224
                return req;
225
        }
226
        spin_unlock(&lo->queue_lock);
227
        return NULL;
228
}
229
 
230
#define HARDFAIL( s ) { printk( KERN_ERR "NBD: " s "(result %d)\n", result ); lo->harderror = result; return NULL; }
231
struct request *nbd_read_stat(struct nbd_device *lo)
232
                /* NULL returned = something went wrong, inform userspace       */
233
{
234
        int result;
235
        struct nbd_reply reply;
236
        struct request *req;
237
 
238
        DEBUG("reading control, ");
239
        reply.magic = 0;
240
        result = nbd_xmit(0, lo->sock, (char *) &reply, sizeof(reply), MSG_WAITALL);
241
        if (result <= 0)
242
                HARDFAIL("Recv control failed.");
243
        req = nbd_find_request(lo, reply.handle);
244
        if (req == NULL)
245
                HARDFAIL("Unexpected reply");
246
 
247
        DEBUG("ok, ");
248
        if (ntohl(reply.magic) != NBD_REPLY_MAGIC)
249
                HARDFAIL("Not enough magic.");
250
        if (ntohl(reply.error))
251
                FAIL("Other side returned error.");
252
        if (req->cmd == READ) {
253
                struct buffer_head *bh = req->bh;
254
                DEBUG("data, ");
255
                do {
256
                        result = nbd_xmit(0, lo->sock, bh->b_data, bh->b_size, MSG_WAITALL);
257
                        if (result <= 0)
258
                                HARDFAIL("Recv data failed.");
259
                        bh = bh->b_reqnext;
260
                } while(bh);
261
        }
262
        DEBUG("done.\n");
263
        return req;
264
 
265
/* Can we get here? Yes, if other side returns error */
266
      error_out:
267
        req->errors++;
268
        return req;
269
}
270
 
271
void nbd_do_it(struct nbd_device *lo)
272
{
273
        struct request *req;
274
 
275
        while (1) {
276
                req = nbd_read_stat(lo);
277
 
278
                if (!req) {
279
                        printk(KERN_ALERT "req should never be null\n" );
280
                        goto out;
281
                }
282
#ifdef PARANOIA
283
                if (lo != &nbd_dev[MINOR(req->rq_dev)]) {
284
                        printk(KERN_ALERT "NBD: request corrupted!\n");
285
                        continue;
286
                }
287
                if (lo->magic != LO_MAGIC) {
288
                        printk(KERN_ALERT "NBD: nbd_dev[] corrupted: Not enough magic\n");
289
                        goto out;
290
                }
291
#endif
292
 
293
                nbd_end_request(req);
294
 
295
        }
296
 out:
297
        return;
298
}
299
 
300
void nbd_clear_que(struct nbd_device *lo)
301
{
302
        struct request *req;
303
 
304
#ifdef PARANOIA
305
        if (lo->magic != LO_MAGIC) {
306
                printk(KERN_ERR "NBD: nbd_dev[] corrupted: Not enough magic when clearing!\n");
307
                return;
308
        }
309
#endif
310
        do {
311
                req = NULL;
312
                spin_lock(&lo->queue_lock);
313
                if (!list_empty(&lo->queue_head)) {
314
                        req = list_entry(lo->queue_head.next, struct request, queue);
315
                        list_del(&req->queue);
316
                }
317
                spin_unlock(&lo->queue_lock);
318
                if (req) {
319
                        req->errors++;
320
                        nbd_end_request(req);
321
                }
322
        } while(req);
323
 
324
}
325
 
326
/*
327
 * We always wait for result of write, for now. It would be nice to make it optional
328
 * in future
329
 * if ((req->cmd == WRITE) && (lo->flags & NBD_WRITE_NOCHK))
330
 *   { printk( "Warning: Ignoring result!\n"); nbd_end_request( req ); }
331
 */
332
 
333
#undef FAIL
334
#define FAIL( s ) { printk( KERN_ERR "NBD, minor %d: " s "\n", dev ); goto error_out; }
335
 
336
static void do_nbd_request(request_queue_t * q)
337
{
338
        struct request *req;
339
        int dev = 0;
340
        struct nbd_device *lo;
341
 
342
        while (!QUEUE_EMPTY) {
343
                req = CURRENT;
344
#ifdef PARANOIA
345
                if (!req)
346
                        FAIL("que not empty but no request?");
347
#endif
348
                dev = MINOR(req->rq_dev);
349
#ifdef PARANOIA
350
                if (dev >= MAX_NBD)
351
                        FAIL("Minor too big.");         /* Probably can not happen */
352
#endif
353
                lo = &nbd_dev[dev];
354
                if (!lo->file)
355
                        FAIL("Request when not-ready.");
356
                if ((req->cmd == WRITE) && (lo->flags & NBD_READ_ONLY))
357
                        FAIL("Write on read-only");
358
#ifdef PARANOIA
359
                if (lo->magic != LO_MAGIC)
360
                        FAIL("nbd[] is not magical!");
361
                requests_in++;
362
#endif
363
                req->errors = 0;
364
                blkdev_dequeue_request(req);
365
                spin_unlock_irq(&io_request_lock);
366
 
367
                spin_lock(&lo->queue_lock);
368
                if (!lo->file) {
369
                        spin_unlock(&lo->queue_lock);
370
                        printk(KERN_ERR "nbd: failed between accept and semaphore, file lost\n");
371
                        req->errors++;
372
                        nbd_end_request(req);
373
                        spin_lock_irq(&io_request_lock);
374
                        continue;
375
                }
376
 
377
                list_add_tail(&req->queue, &lo->queue_head);
378
                spin_unlock(&lo->queue_lock);
379
 
380
                nbd_send_req(lo, req);
381
                if (req->errors) {
382
                        printk(KERN_ERR "nbd: nbd_send_req failed\n");
383
                        spin_lock(&lo->queue_lock);
384
                        list_del(&req->queue);
385
                        spin_unlock(&lo->queue_lock);
386
                        nbd_end_request(req);
387
                        spin_lock_irq(&io_request_lock);
388
                        continue;
389
                }
390
 
391
                spin_lock_irq(&io_request_lock);
392
                continue;
393
 
394
              error_out:
395
                req->errors++;
396
                blkdev_dequeue_request(req);
397
                spin_unlock(&io_request_lock);
398
                nbd_end_request(req);
399
                spin_lock(&io_request_lock);
400
        }
401
        return;
402
}
403
 
404
static int nbd_ioctl(struct inode *inode, struct file *file,
405
                     unsigned int cmd, unsigned long arg)
406
{
407
        struct nbd_device *lo;
408
        int dev, error, temp;
409
        struct request sreq ;
410
 
411
        /* Anyone capable of this syscall can do *real bad* things */
412
 
413
        if (!capable(CAP_SYS_ADMIN))
414
                return -EPERM;
415
        if (!inode)
416
                return -EINVAL;
417
        dev = MINOR(inode->i_rdev);
418
        if (dev >= MAX_NBD)
419
                return -ENODEV;
420
 
421
        lo = &nbd_dev[dev];
422
        switch (cmd) {
423
        case NBD_DISCONNECT:
424
                printk("NBD_DISCONNECT\n");
425
                sreq.cmd=2 ; /* shutdown command */
426
                if (!lo->sock) return -EINVAL;
427
                nbd_send_req(lo, &sreq);
428
                return 0 ;
429
 
430
        case NBD_CLEAR_SOCK:
431
                error = 0;
432
                down(&lo->tx_lock);
433
                lo->sock = NULL;
434
                up(&lo->tx_lock);
435
                spin_lock(&lo->queue_lock);
436
                file = lo->file;
437
                lo->file = NULL;
438
                spin_unlock(&lo->queue_lock);
439
                nbd_clear_que(lo);
440
                spin_lock(&lo->queue_lock);
441
                if (!list_empty(&lo->queue_head)) {
442
                        printk(KERN_ERR "nbd: disconnect: some requests are in progress -> please try again.\n");
443
                        error = -EBUSY;
444
                }
445
                spin_unlock(&lo->queue_lock);
446
                if (file)
447
                        fput(file);
448
                return error;
449
        case NBD_SET_SOCK:
450
                if (lo->file)
451
                        return -EBUSY;
452
                error = -EINVAL;
453
                file = fget(arg);
454
                if (file) {
455
                        inode = file->f_dentry->d_inode;
456
                        /* N.B. Should verify that it's a socket */
457
                        lo->file = file;
458
                        lo->sock = &inode->u.socket_i;
459
                        error = 0;
460
                }
461
                return error;
462
        case NBD_SET_BLKSIZE:
463
                if ((arg & (arg-1)) || (arg < 512) || (arg > PAGE_SIZE))
464
                        return -EINVAL;
465
                nbd_blksizes[dev] = arg;
466
                temp = arg >> 9;
467
                nbd_blksize_bits[dev] = 9;
468
                while (temp > 1) {
469
                        nbd_blksize_bits[dev]++;
470
                        temp >>= 1;
471
                }
472
                nbd_bytesizes[dev] &= ~(nbd_blksizes[dev]-1);
473
                nbd_sizes[dev] = nbd_bytesizes[dev] >> BLOCK_SIZE_BITS;
474
                return 0;
475
        case NBD_SET_SIZE:
476
                nbd_bytesizes[dev] = arg & ~(nbd_blksizes[dev]-1);
477
                nbd_sizes[dev] = nbd_bytesizes[dev] >> BLOCK_SIZE_BITS;
478
                return 0;
479
        case NBD_SET_SIZE_BLOCKS:
480
                nbd_bytesizes[dev] = ((u64) arg) << nbd_blksize_bits[dev];
481
                nbd_sizes[dev] = nbd_bytesizes[dev] >> BLOCK_SIZE_BITS;
482
                return 0;
483
        case NBD_DO_IT:
484
                if (!lo->file)
485
                        return -EINVAL;
486
                nbd_do_it(lo);
487
                /* on return tidy up in case we have a signal */
488
                /* Forcibly shutdown the socket causing all listeners
489
                 * to error
490
                 *
491
                 * FIXME: This code is duplicated from sys_shutdown, but
492
                 * there should be a more generic interface rather than
493
                 * calling socket ops directly here */
494
                down(&lo->tx_lock);
495
                if (lo->sock) {
496
                        printk(KERN_WARNING "nbd: shutting down socket\n");
497
                        lo->sock->ops->shutdown(lo->sock,
498
                                SEND_SHUTDOWN|RCV_SHUTDOWN);
499
                        lo->sock = NULL;
500
                }
501
                up(&lo->tx_lock);
502
                spin_lock(&lo->queue_lock);
503
                file = lo->file;
504
                lo->file = NULL;
505
                spin_unlock(&lo->queue_lock);
506
                nbd_clear_que(lo);
507
                printk(KERN_WARNING "nbd: queue cleared\n");
508
                if (file)
509
                        fput(file);
510
                return lo->harderror;
511
        case NBD_CLEAR_QUE:
512
                down(&lo->tx_lock);
513
                if (lo->sock) {
514
                        up(&lo->tx_lock);
515
                        return 0; /* probably should be error, but that would
516
                                   * break "nbd-client -d", so just return 0 */
517
                }
518
                up(&lo->tx_lock);
519
                nbd_clear_que(lo);
520
                return 0;
521
#ifdef PARANOIA
522
        case NBD_PRINT_DEBUG:
523
                printk(KERN_INFO "NBD device %d: next = %p, prev = %p. Global: in %d, out %d\n",
524
                       dev, lo->queue_head.next, lo->queue_head.prev, requests_in, requests_out);
525
                return 0;
526
#endif
527
        case BLKGETSIZE:
528
                return put_user(nbd_bytesizes[dev] >> 9, (unsigned long *) arg);
529
        case BLKGETSIZE64:
530
                return put_user((u64)nbd_bytesizes[dev], (u64 *) arg);
531
        }
532
        return -EINVAL;
533
}
534
 
535
static int nbd_release(struct inode *inode, struct file *file)
536
{
537
        struct nbd_device *lo;
538
        int dev;
539
 
540
        if (!inode)
541
                return -ENODEV;
542
        dev = MINOR(inode->i_rdev);
543
        if (dev >= MAX_NBD)
544
                return -ENODEV;
545
        lo = &nbd_dev[dev];
546
        if (lo->refcnt <= 0)
547
                printk(KERN_ALERT "nbd_release: refcount(%d) <= 0\n", lo->refcnt);
548
        lo->refcnt--;
549
        /* N.B. Doesn't lo->file need an fput?? */
550
        return 0;
551
}
552
 
553
static struct block_device_operations nbd_fops =
554
{
555
        owner:          THIS_MODULE,
556
        open:           nbd_open,
557
        release:        nbd_release,
558
        ioctl:          nbd_ioctl,
559
};
560
 
561
/*
562
 * And here should be modules and kernel interface
563
 *  (Just smiley confuses emacs :-)
564
 */
565
 
566
static int __init nbd_init(void)
567
{
568
        int i;
569
 
570
        if (sizeof(struct nbd_request) != 28) {
571
                printk(KERN_CRIT "Sizeof nbd_request needs to be 28 in order to work!\n" );
572
                return -EIO;
573
        }
574
 
575
        if (register_blkdev(MAJOR_NR, "nbd", &nbd_fops)) {
576
                printk("Unable to get major number %d for NBD\n",
577
                       MAJOR_NR);
578
                return -EIO;
579
        }
580
#ifdef MODULE
581
        printk("nbd: registered device at major %d\n", MAJOR_NR);
582
#endif
583
        blksize_size[MAJOR_NR] = nbd_blksizes;
584
        blk_size[MAJOR_NR] = nbd_sizes;
585
        blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), do_nbd_request);
586
        blk_queue_headactive(BLK_DEFAULT_QUEUE(MAJOR_NR), 0);
587
        for (i = 0; i < MAX_NBD; i++) {
588
                nbd_dev[i].refcnt = 0;
589
                nbd_dev[i].file = NULL;
590
                nbd_dev[i].magic = LO_MAGIC;
591
                nbd_dev[i].flags = 0;
592
                spin_lock_init(&nbd_dev[i].queue_lock);
593
                INIT_LIST_HEAD(&nbd_dev[i].queue_head);
594
                init_MUTEX(&nbd_dev[i].tx_lock);
595
                nbd_blksizes[i] = 1024;
596
                nbd_blksize_bits[i] = 10;
597
                nbd_bytesizes[i] = ((u64)0x7ffffc00) << 10; /* 2TB */
598
                nbd_sizes[i] = nbd_bytesizes[i] >> BLOCK_SIZE_BITS;
599
                register_disk(NULL, MKDEV(MAJOR_NR,i), 1, &nbd_fops,
600
                                nbd_bytesizes[i]>>9);
601
        }
602
        devfs_handle = devfs_mk_dir (NULL, "nbd", NULL);
603
        devfs_register_series (devfs_handle, "%u", MAX_NBD,
604
                               DEVFS_FL_DEFAULT, MAJOR_NR, 0,
605
                               S_IFBLK | S_IRUSR | S_IWUSR,
606
                               &nbd_fops, NULL);
607
 
608
        return 0;
609
}
610
 
611
static void __exit nbd_cleanup(void)
612
{
613
        devfs_unregister (devfs_handle);
614
        blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR));
615
 
616
        if (unregister_blkdev(MAJOR_NR, "nbd") != 0)
617
                printk("nbd: cleanup_module failed\n");
618
        else
619
                printk("nbd: module cleaned up.\n");
620
}
621
 
622
module_init(nbd_init);
623
module_exit(nbd_cleanup);
624
 
625
MODULE_DESCRIPTION("Network Block Device");
626
MODULE_LICENSE("GPL");
627
 
628
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.