OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [linux/] [linux-2.4/] [net/] [core/] [sock.c] - Blame information for rev 1765

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1278 phoenix
/*
2
 * INET         An implementation of the TCP/IP protocol suite for the LINUX
3
 *              operating system.  INET is implemented using the  BSD Socket
4
 *              interface as the means of communication with the user level.
5
 *
6
 *              Generic socket support routines. Memory allocators, socket lock/release
7
 *              handler for protocols to use and generic option handler.
8
 *
9
 *
10
 * Version:     $Id: sock.c,v 1.1.1.1 2004-04-17 22:13:17 phoenix Exp $
11
 *
12
 * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
13
 *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14
 *              Florian La Roche, <flla@stud.uni-sb.de>
15
 *              Alan Cox, <A.Cox@swansea.ac.uk>
16
 *
17
 * Fixes:
18
 *              Alan Cox        :       Numerous verify_area() problems
19
 *              Alan Cox        :       Connecting on a connecting socket
20
 *                                      now returns an error for tcp.
21
 *              Alan Cox        :       sock->protocol is set correctly.
22
 *                                      and is not sometimes left as 0.
23
 *              Alan Cox        :       connect handles icmp errors on a
24
 *                                      connect properly. Unfortunately there
25
 *                                      is a restart syscall nasty there. I
26
 *                                      can't match BSD without hacking the C
27
 *                                      library. Ideas urgently sought!
28
 *              Alan Cox        :       Disallow bind() to addresses that are
29
 *                                      not ours - especially broadcast ones!!
30
 *              Alan Cox        :       Socket 1024 _IS_ ok for users. (fencepost)
31
 *              Alan Cox        :       sock_wfree/sock_rfree don't destroy sockets,
32
 *                                      instead they leave that for the DESTROY timer.
33
 *              Alan Cox        :       Clean up error flag in accept
34
 *              Alan Cox        :       TCP ack handling is buggy, the DESTROY timer
35
 *                                      was buggy. Put a remove_sock() in the handler
36
 *                                      for memory when we hit 0. Also altered the timer
37
 *                                      code. The ACK stuff can wait and needs major
38
 *                                      TCP layer surgery.
39
 *              Alan Cox        :       Fixed TCP ack bug, removed remove sock
40
 *                                      and fixed timer/inet_bh race.
41
 *              Alan Cox        :       Added zapped flag for TCP
42
 *              Alan Cox        :       Move kfree_skb into skbuff.c and tidied up surplus code
43
 *              Alan Cox        :       for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
44
 *              Alan Cox        :       kfree_s calls now are kfree_skbmem so we can track skb resources
45
 *              Alan Cox        :       Supports socket option broadcast now as does udp. Packet and raw need fixing.
46
 *              Alan Cox        :       Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
47
 *              Rick Sladkey    :       Relaxed UDP rules for matching packets.
48
 *              C.E.Hawkins     :       IFF_PROMISC/SIOCGHWADDR support
49
 *      Pauline Middelink       :       identd support
50
 *              Alan Cox        :       Fixed connect() taking signals I think.
51
 *              Alan Cox        :       SO_LINGER supported
52
 *              Alan Cox        :       Error reporting fixes
53
 *              Anonymous       :       inet_create tidied up (sk->reuse setting)
54
 *              Alan Cox        :       inet sockets don't set sk->type!
55
 *              Alan Cox        :       Split socket option code
56
 *              Alan Cox        :       Callbacks
57
 *              Alan Cox        :       Nagle flag for Charles & Johannes stuff
58
 *              Alex            :       Removed restriction on inet fioctl
59
 *              Alan Cox        :       Splitting INET from NET core
60
 *              Alan Cox        :       Fixed bogus SO_TYPE handling in getsockopt()
61
 *              Adam Caldwell   :       Missing return in SO_DONTROUTE/SO_DEBUG code
62
 *              Alan Cox        :       Split IP from generic code
63
 *              Alan Cox        :       New kfree_skbmem()
64
 *              Alan Cox        :       Make SO_DEBUG superuser only.
65
 *              Alan Cox        :       Allow anyone to clear SO_DEBUG
66
 *                                      (compatibility fix)
67
 *              Alan Cox        :       Added optimistic memory grabbing for AF_UNIX throughput.
68
 *              Alan Cox        :       Allocator for a socket is settable.
69
 *              Alan Cox        :       SO_ERROR includes soft errors.
70
 *              Alan Cox        :       Allow NULL arguments on some SO_ opts
71
 *              Alan Cox        :       Generic socket allocation to make hooks
72
 *                                      easier (suggested by Craig Metz).
73
 *              Michael Pall    :       SO_ERROR returns positive errno again
74
 *              Steve Whitehouse:       Added default destructor to free
75
 *                                      protocol private data.
76
 *              Steve Whitehouse:       Added various other default routines
77
 *                                      common to several socket families.
78
 *              Chris Evans     :       Call suser() check last on F_SETOWN
79
 *              Jay Schulist    :       Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
80
 *              Andi Kleen      :       Add sock_kmalloc()/sock_kfree_s()
81
 *              Andi Kleen      :       Fix write_space callback
82
 *              Chris Evans     :       Security fixes - signedness again
83
 *              Arnaldo C. Melo :       cleanups, use skb_queue_purge
84
 *
85
 * To Fix:
86
 *
87
 *
88
 *              This program is free software; you can redistribute it and/or
89
 *              modify it under the terms of the GNU General Public License
90
 *              as published by the Free Software Foundation; either version
91
 *              2 of the License, or (at your option) any later version.
92
 */
93
 
94
#include <linux/config.h>
95
#include <linux/errno.h>
96
#include <linux/types.h>
97
#include <linux/socket.h>
98
#include <linux/in.h>
99
#include <linux/kernel.h>
100
#include <linux/major.h>
101
#include <linux/sched.h>
102
#include <linux/timer.h>
103
#include <linux/string.h>
104
#include <linux/sockios.h>
105
#include <linux/net.h>
106
#include <linux/fcntl.h>
107
#include <linux/mm.h>
108
#include <linux/slab.h>
109
#include <linux/interrupt.h>
110
#include <linux/poll.h>
111
#include <linux/tcp.h>
112
#include <linux/init.h>
113
 
114
#include <asm/uaccess.h>
115
#include <asm/system.h>
116
 
117
#include <linux/netdevice.h>
118
#include <net/protocol.h>
119
#include <linux/skbuff.h>
120
#include <net/sock.h>
121
#include <linux/ipsec.h>
122
 
123
#ifdef CONFIG_FILTER
124
#include <linux/filter.h>
125
#endif
126
 
127
#ifdef CONFIG_INET
128
#include <net/tcp.h>
129
#endif
130
 
131
/* Take into consideration the size of the struct sk_buff overhead in the
132
 * determination of these values, since that is non-constant across
133
 * platforms.  This makes socket queueing behavior and performance
134
 * not depend upon such differences.
135
 */
136
#define _SK_MEM_PACKETS         256
137
#define _SK_MEM_OVERHEAD        (sizeof(struct sk_buff) + 256)
138
#define SK_WMEM_MAX             (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
139
#define SK_RMEM_MAX             (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
140
 
141
/* Run time adjustable parameters. */
142
__u32 sysctl_wmem_max = SK_WMEM_MAX;
143
__u32 sysctl_rmem_max = SK_RMEM_MAX;
144
__u32 sysctl_wmem_default = SK_WMEM_MAX;
145
__u32 sysctl_rmem_default = SK_RMEM_MAX;
146
 
147
/* Maximal space eaten by iovec or ancilliary data plus some space */
148
int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512);
149
 
150
static int sock_set_timeout(long *timeo_p, char *optval, int optlen)
151
{
152
        struct timeval tv;
153
 
154
        if (optlen < sizeof(tv))
155
                return -EINVAL;
156
        if (copy_from_user(&tv, optval, sizeof(tv)))
157
                return -EFAULT;
158
 
159
        *timeo_p = MAX_SCHEDULE_TIMEOUT;
160
        if (tv.tv_sec == 0 && tv.tv_usec == 0)
161
                return 0;
162
        if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
163
                *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
164
        return 0;
165
}
166
 
167
/*
168
 *      This is meant for all protocols to use and covers goings on
169
 *      at the socket level. Everything here is generic.
170
 */
171
 
172
int sock_setsockopt(struct socket *sock, int level, int optname,
173
                    char *optval, int optlen)
174
{
175
        struct sock *sk=sock->sk;
176
#ifdef CONFIG_FILTER
177
        struct sk_filter *filter;
178
#endif
179
        int val;
180
        int valbool;
181
        struct linger ling;
182
        int ret = 0;
183
 
184
        /*
185
         *      Options without arguments
186
         */
187
 
188
#ifdef SO_DONTLINGER            /* Compatibility item... */
189
        switch(optname)
190
        {
191
                case SO_DONTLINGER:
192
                        sk->linger=0;
193
                        return 0;
194
        }
195
#endif  
196
 
197
        if(optlen<sizeof(int))
198
                return(-EINVAL);
199
 
200
        if (get_user(val, (int *)optval))
201
                return -EFAULT;
202
 
203
        valbool = val?1:0;
204
 
205
        lock_sock(sk);
206
 
207
        switch(optname)
208
        {
209
                case SO_DEBUG:
210
                        if(val && !capable(CAP_NET_ADMIN))
211
                        {
212
                                ret = -EACCES;
213
                        }
214
                        else
215
                                sk->debug=valbool;
216
                        break;
217
                case SO_REUSEADDR:
218
                        sk->reuse = valbool;
219
                        break;
220
                case SO_TYPE:
221
                case SO_ERROR:
222
                        ret = -ENOPROTOOPT;
223
                        break;
224
                case SO_DONTROUTE:
225
                        sk->localroute=valbool;
226
                        break;
227
                case SO_BROADCAST:
228
                        sk->broadcast=valbool;
229
                        break;
230
                case SO_SNDBUF:
231
                        /* Don't error on this BSD doesn't and if you think
232
                           about it this is right. Otherwise apps have to
233
                           play 'guess the biggest size' games. RCVBUF/SNDBUF
234
                           are treated in BSD as hints */
235
 
236
                        if (val > sysctl_wmem_max)
237
                                val = sysctl_wmem_max;
238
 
239
                        sk->userlocks |= SOCK_SNDBUF_LOCK;
240
                        if ((val * 2) < SOCK_MIN_SNDBUF)
241
                                sk->sndbuf = SOCK_MIN_SNDBUF;
242
                        else
243
                                sk->sndbuf = (val * 2);
244
 
245
                        /*
246
                         *      Wake up sending tasks if we
247
                         *      upped the value.
248
                         */
249
                        sk->write_space(sk);
250
                        break;
251
 
252
                case SO_RCVBUF:
253
                        /* Don't error on this BSD doesn't and if you think
254
                           about it this is right. Otherwise apps have to
255
                           play 'guess the biggest size' games. RCVBUF/SNDBUF
256
                           are treated in BSD as hints */
257
 
258
                        if (val > sysctl_rmem_max)
259
                                val = sysctl_rmem_max;
260
 
261
                        sk->userlocks |= SOCK_RCVBUF_LOCK;
262
                        /* FIXME: is this lower bound the right one? */
263
                        if ((val * 2) < SOCK_MIN_RCVBUF)
264
                                sk->rcvbuf = SOCK_MIN_RCVBUF;
265
                        else
266
                                sk->rcvbuf = (val * 2);
267
                        break;
268
 
269
                case SO_KEEPALIVE:
270
#ifdef CONFIG_INET
271
                        if (sk->protocol == IPPROTO_TCP)
272
                        {
273
                                tcp_set_keepalive(sk, valbool);
274
                        }
275
#endif
276
                        sk->keepopen = valbool;
277
                        break;
278
 
279
                case SO_OOBINLINE:
280
                        sk->urginline = valbool;
281
                        break;
282
 
283
                case SO_NO_CHECK:
284
                        sk->no_check = valbool;
285
                        break;
286
 
287
                case SO_PRIORITY:
288
                        if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
289
                                sk->priority = val;
290
                        else
291
                                ret = -EPERM;
292
                        break;
293
 
294
                case SO_LINGER:
295
                        if(optlen<sizeof(ling)) {
296
                                ret = -EINVAL;  /* 1003.1g */
297
                                break;
298
                        }
299
                        if (copy_from_user(&ling,optval,sizeof(ling))) {
300
                                ret = -EFAULT;
301
                                break;
302
                        }
303
                        if(ling.l_onoff==0) {
304
                                sk->linger=0;
305
                        } else {
306
#if (BITS_PER_LONG == 32)
307
                                if (ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
308
                                        sk->lingertime=MAX_SCHEDULE_TIMEOUT;
309
                                else
310
#endif
311
                                        sk->lingertime=ling.l_linger*HZ;
312
                                sk->linger=1;
313
                        }
314
                        break;
315
 
316
                case SO_BSDCOMPAT:
317
                        sk->bsdism = valbool;
318
                        break;
319
 
320
                case SO_PASSCRED:
321
                        sock->passcred = valbool;
322
                        break;
323
 
324
                case SO_TIMESTAMP:
325
                        sk->rcvtstamp = valbool;
326
                        break;
327
 
328
                case SO_RCVLOWAT:
329
                        if (val < 0)
330
                                val = INT_MAX;
331
                        sk->rcvlowat = val ? : 1;
332
                        break;
333
 
334
                case SO_RCVTIMEO:
335
                        ret = sock_set_timeout(&sk->rcvtimeo, optval, optlen);
336
                        break;
337
 
338
                case SO_SNDTIMEO:
339
                        ret = sock_set_timeout(&sk->sndtimeo, optval, optlen);
340
                        break;
341
 
342
#ifdef CONFIG_NETDEVICES
343
                case SO_BINDTODEVICE:
344
                {
345
                        char devname[IFNAMSIZ];
346
 
347
                        /* Sorry... */
348
                        if (!capable(CAP_NET_RAW)) {
349
                                ret = -EPERM;
350
                                break;
351
                        }
352
 
353
                        /* Bind this socket to a particular device like "eth0",
354
                         * as specified in the passed interface name. If the
355
                         * name is "" or the option length is zero the socket
356
                         * is not bound.
357
                         */
358
 
359
                        if (!valbool) {
360
                                sk->bound_dev_if = 0;
361
                        } else {
362
                                if (optlen > IFNAMSIZ)
363
                                        optlen = IFNAMSIZ;
364
                                if (copy_from_user(devname, optval, optlen)) {
365
                                        ret = -EFAULT;
366
                                        break;
367
                                }
368
 
369
                                /* Remove any cached route for this socket. */
370
                                sk_dst_reset(sk);
371
 
372
                                if (devname[0] == '\0') {
373
                                        sk->bound_dev_if = 0;
374
                                } else {
375
                                        struct net_device *dev = dev_get_by_name(devname);
376
                                        if (!dev) {
377
                                                ret = -ENODEV;
378
                                                break;
379
                                        }
380
                                        sk->bound_dev_if = dev->ifindex;
381
                                        dev_put(dev);
382
                                }
383
                        }
384
                        break;
385
                }
386
#endif
387
 
388
 
389
#ifdef CONFIG_FILTER
390
                case SO_ATTACH_FILTER:
391
                        ret = -EINVAL;
392
                        if (optlen == sizeof(struct sock_fprog)) {
393
                                struct sock_fprog fprog;
394
 
395
                                ret = -EFAULT;
396
                                if (copy_from_user(&fprog, optval, sizeof(fprog)))
397
                                        break;
398
 
399
                                ret = sk_attach_filter(&fprog, sk);
400
                        }
401
                        break;
402
 
403
                case SO_DETACH_FILTER:
404
                        spin_lock_bh(&sk->lock.slock);
405
                        filter = sk->filter;
406
                        if (filter) {
407
                                sk->filter = NULL;
408
                                spin_unlock_bh(&sk->lock.slock);
409
                                sk_filter_release(sk, filter);
410
                                break;
411
                        }
412
                        spin_unlock_bh(&sk->lock.slock);
413
                        ret = -ENONET;
414
                        break;
415
#endif
416
                /* We implement the SO_SNDLOWAT etc to
417
                   not be settable (1003.1g 5.3) */
418
                default:
419
                        ret = -ENOPROTOOPT;
420
                        break;
421
        }
422
        release_sock(sk);
423
        return ret;
424
}
425
 
426
 
427
int sock_getsockopt(struct socket *sock, int level, int optname,
428
                    char *optval, int *optlen)
429
{
430
        struct sock *sk = sock->sk;
431
 
432
        union
433
        {
434
                int val;
435
                struct linger ling;
436
                struct timeval tm;
437
        } v;
438
 
439
        unsigned int lv=sizeof(int),len;
440
 
441
        if(get_user(len,optlen))
442
                return -EFAULT;
443
        if(len < 0)
444
                return -EINVAL;
445
 
446
        switch(optname)
447
        {
448
                case SO_DEBUG:
449
                        v.val = sk->debug;
450
                        break;
451
 
452
                case SO_DONTROUTE:
453
                        v.val = sk->localroute;
454
                        break;
455
 
456
                case SO_BROADCAST:
457
                        v.val= sk->broadcast;
458
                        break;
459
 
460
                case SO_SNDBUF:
461
                        v.val=sk->sndbuf;
462
                        break;
463
 
464
                case SO_RCVBUF:
465
                        v.val =sk->rcvbuf;
466
                        break;
467
 
468
                case SO_REUSEADDR:
469
                        v.val = sk->reuse;
470
                        break;
471
 
472
                case SO_KEEPALIVE:
473
                        v.val = sk->keepopen;
474
                        break;
475
 
476
                case SO_TYPE:
477
                        v.val = sk->type;
478
                        break;
479
 
480
                case SO_ERROR:
481
                        v.val = -sock_error(sk);
482
                        if(v.val==0)
483
                                v.val=xchg(&sk->err_soft,0);
484
                        break;
485
 
486
                case SO_OOBINLINE:
487
                        v.val = sk->urginline;
488
                        break;
489
 
490
                case SO_NO_CHECK:
491
                        v.val = sk->no_check;
492
                        break;
493
 
494
                case SO_PRIORITY:
495
                        v.val = sk->priority;
496
                        break;
497
 
498
                case SO_LINGER:
499
                        lv=sizeof(v.ling);
500
                        v.ling.l_onoff=sk->linger;
501
                        v.ling.l_linger=sk->lingertime/HZ;
502
                        break;
503
 
504
                case SO_BSDCOMPAT:
505
                        v.val = sk->bsdism;
506
                        break;
507
 
508
                case SO_TIMESTAMP:
509
                        v.val = sk->rcvtstamp;
510
                        break;
511
 
512
                case SO_RCVTIMEO:
513
                        lv=sizeof(struct timeval);
514
                        if (sk->rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
515
                                v.tm.tv_sec = 0;
516
                                v.tm.tv_usec = 0;
517
                        } else {
518
                                v.tm.tv_sec = sk->rcvtimeo/HZ;
519
                                v.tm.tv_usec = ((sk->rcvtimeo%HZ)*1000)/HZ;
520
                        }
521
                        break;
522
 
523
                case SO_SNDTIMEO:
524
                        lv=sizeof(struct timeval);
525
                        if (sk->sndtimeo == MAX_SCHEDULE_TIMEOUT) {
526
                                v.tm.tv_sec = 0;
527
                                v.tm.tv_usec = 0;
528
                        } else {
529
                                v.tm.tv_sec = sk->sndtimeo/HZ;
530
                                v.tm.tv_usec = ((sk->sndtimeo%HZ)*1000)/HZ;
531
                        }
532
                        break;
533
 
534
                case SO_RCVLOWAT:
535
                        v.val = sk->rcvlowat;
536
                        break;
537
 
538
                case SO_SNDLOWAT:
539
                        v.val=1;
540
                        break;
541
 
542
                case SO_PASSCRED:
543
                        v.val = sock->passcred;
544
                        break;
545
 
546
                case SO_PEERCRED:
547
                        if (len > sizeof(sk->peercred))
548
                                len = sizeof(sk->peercred);
549
                        if (copy_to_user(optval, &sk->peercred, len))
550
                                return -EFAULT;
551
                        goto lenout;
552
 
553
                case SO_PEERNAME:
554
                {
555
                        char address[128];
556
 
557
                        if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
558
                                return -ENOTCONN;
559
                        if (lv < len)
560
                                return -EINVAL;
561
                        if(copy_to_user((void*)optval, address, len))
562
                                return -EFAULT;
563
                        goto lenout;
564
                }
565
 
566
                /* Dubious BSD thing... Probably nobody even uses it, but
567
                 * the UNIX standard wants it for whatever reason... -DaveM
568
                 */
569
                case SO_ACCEPTCONN:
570
                        v.val = (sk->state == TCP_LISTEN);
571
                        break;
572
 
573
                default:
574
                        return(-ENOPROTOOPT);
575
        }
576
        if (len > lv)
577
                len = lv;
578
        if (copy_to_user(optval, &v, len))
579
                return -EFAULT;
580
lenout:
581
        if (put_user(len, optlen))
582
                return -EFAULT;
583
        return 0;
584
}
585
 
586
static kmem_cache_t *sk_cachep;
587
 
588
/*
589
 *      All socket objects are allocated here. This is for future
590
 *      usage.
591
 */
592
 
593
struct sock *sk_alloc(int family, int priority, int zero_it)
594
{
595
        struct sock *sk = kmem_cache_alloc(sk_cachep, priority);
596
 
597
        if(sk && zero_it) {
598
                memset(sk, 0, sizeof(struct sock));
599
                sk->family = family;
600
                sock_lock_init(sk);
601
        }
602
 
603
        return sk;
604
}
605
 
606
void sk_free(struct sock *sk)
607
{
608
#ifdef CONFIG_FILTER
609
        struct sk_filter *filter;
610
#endif
611
 
612
        if (sk->destruct)
613
                sk->destruct(sk);
614
 
615
#ifdef CONFIG_FILTER
616
        filter = sk->filter;
617
        if (filter) {
618
                sk_filter_release(sk, filter);
619
                sk->filter = NULL;
620
        }
621
#endif
622
 
623
        if (atomic_read(&sk->omem_alloc))
624
                printk(KERN_DEBUG "sk_free: optmem leakage (%d bytes) detected.\n", atomic_read(&sk->omem_alloc));
625
 
626
        kmem_cache_free(sk_cachep, sk);
627
}
628
 
629
void __init sk_init(void)
630
{
631
        sk_cachep = kmem_cache_create("sock", sizeof(struct sock), 0,
632
                                      SLAB_HWCACHE_ALIGN, 0, 0);
633
        if (!sk_cachep)
634
                printk(KERN_CRIT "sk_init: Cannot create sock SLAB cache!");
635
 
636
        if (num_physpages <= 4096) {
637
                sysctl_wmem_max = 32767;
638
                sysctl_rmem_max = 32767;
639
                sysctl_wmem_default = 32767;
640
                sysctl_rmem_default = 32767;
641
        } else if (num_physpages >= 131072) {
642
                sysctl_wmem_max = 131071;
643
                sysctl_rmem_max = 131071;
644
        }
645
}
646
 
647
/*
648
 *      Simple resource managers for sockets.
649
 */
650
 
651
 
652
/*
653
 * Write buffer destructor automatically called from kfree_skb.
654
 */
655
void sock_wfree(struct sk_buff *skb)
656
{
657
        struct sock *sk = skb->sk;
658
 
659
        /* In case it might be waiting for more memory. */
660
        atomic_sub(skb->truesize, &sk->wmem_alloc);
661
        if (!sk->use_write_queue)
662
                sk->write_space(sk);
663
        sock_put(sk);
664
}
665
 
666
/*
667
 * Read buffer destructor automatically called from kfree_skb.
668
 */
669
void sock_rfree(struct sk_buff *skb)
670
{
671
        struct sock *sk = skb->sk;
672
 
673
        atomic_sub(skb->truesize, &sk->rmem_alloc);
674
}
675
 
676
/*
677
 * Allocate a skb from the socket's send buffer.
678
 */
679
struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int priority)
680
{
681
        if (force || atomic_read(&sk->wmem_alloc) < sk->sndbuf) {
682
                struct sk_buff * skb = alloc_skb(size, priority);
683
                if (skb) {
684
                        skb_set_owner_w(skb, sk);
685
                        return skb;
686
                }
687
        }
688
        return NULL;
689
}
690
 
691
/*
692
 * Allocate a skb from the socket's receive buffer.
693
 */
694
struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int priority)
695
{
696
        if (force || atomic_read(&sk->rmem_alloc) < sk->rcvbuf) {
697
                struct sk_buff *skb = alloc_skb(size, priority);
698
                if (skb) {
699
                        skb_set_owner_r(skb, sk);
700
                        return skb;
701
                }
702
        }
703
        return NULL;
704
}
705
 
706
/*
707
 * Allocate a memory block from the socket's option memory buffer.
708
 */
709
void *sock_kmalloc(struct sock *sk, int size, int priority)
710
{
711
        if ((unsigned)size <= sysctl_optmem_max &&
712
            atomic_read(&sk->omem_alloc)+size < sysctl_optmem_max) {
713
                void *mem;
714
                /* First do the add, to avoid the race if kmalloc
715
                 * might sleep.
716
                 */
717
                atomic_add(size, &sk->omem_alloc);
718
                mem = kmalloc(size, priority);
719
                if (mem)
720
                        return mem;
721
                atomic_sub(size, &sk->omem_alloc);
722
        }
723
        return NULL;
724
}
725
 
726
/*
727
 * Free an option memory block.
728
 */
729
void sock_kfree_s(struct sock *sk, void *mem, int size)
730
{
731
        kfree(mem);
732
        atomic_sub(size, &sk->omem_alloc);
733
}
734
 
735
/* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
736
   I think, these locks should be removed for datagram sockets.
737
 */
738
static long sock_wait_for_wmem(struct sock * sk, long timeo)
739
{
740
        DECLARE_WAITQUEUE(wait, current);
741
 
742
        clear_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
743
        add_wait_queue(sk->sleep, &wait);
744
        for (;;) {
745
                if (!timeo)
746
                        break;
747
                if (signal_pending(current))
748
                        break;
749
                set_bit(SOCK_NOSPACE, &sk->socket->flags);
750
                set_current_state(TASK_INTERRUPTIBLE);
751
                if (atomic_read(&sk->wmem_alloc) < sk->sndbuf)
752
                        break;
753
                if (sk->shutdown & SEND_SHUTDOWN)
754
                        break;
755
                if (sk->err)
756
                        break;
757
                timeo = schedule_timeout(timeo);
758
        }
759
        __set_current_state(TASK_RUNNING);
760
        remove_wait_queue(sk->sleep, &wait);
761
        return timeo;
762
}
763
 
764
 
765
/*
766
 *      Generic send/receive buffer handlers
767
 */
768
 
769
struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
770
                                     unsigned long data_len, int noblock, int *errcode)
771
{
772
        struct sk_buff *skb;
773
        long timeo;
774
        int err;
775
 
776
        timeo = sock_sndtimeo(sk, noblock);
777
        while (1) {
778
                err = sock_error(sk);
779
                if (err != 0)
780
                        goto failure;
781
 
782
                err = -EPIPE;
783
                if (sk->shutdown & SEND_SHUTDOWN)
784
                        goto failure;
785
 
786
                if (atomic_read(&sk->wmem_alloc) < sk->sndbuf) {
787
                        skb = alloc_skb(header_len, sk->allocation);
788
                        if (skb) {
789
                                int npages;
790
                                int i;
791
 
792
                                /* No pages, we're done... */
793
                                if (!data_len)
794
                                        break;
795
 
796
                                npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
797
                                skb->truesize += data_len;
798
                                skb_shinfo(skb)->nr_frags = npages;
799
                                for (i = 0; i < npages; i++) {
800
                                        struct page *page;
801
                                        skb_frag_t *frag;
802
 
803
                                        page = alloc_pages(sk->allocation, 0);
804
                                        if (!page) {
805
                                                err = -ENOBUFS;
806
                                                skb_shinfo(skb)->nr_frags = i;
807
                                                kfree_skb(skb);
808
                                                goto failure;
809
                                        }
810
 
811
                                        frag = &skb_shinfo(skb)->frags[i];
812
                                        frag->page = page;
813
                                        frag->page_offset = 0;
814
                                        frag->size = (data_len >= PAGE_SIZE ?
815
                                                      PAGE_SIZE :
816
                                                      data_len);
817
                                        data_len -= PAGE_SIZE;
818
                                }
819
 
820
                                /* Full success... */
821
                                break;
822
                        }
823
                        err = -ENOBUFS;
824
                        goto failure;
825
                }
826
                set_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
827
                set_bit(SOCK_NOSPACE, &sk->socket->flags);
828
                err = -EAGAIN;
829
                if (!timeo)
830
                        goto failure;
831
                if (signal_pending(current))
832
                        goto interrupted;
833
                timeo = sock_wait_for_wmem(sk, timeo);
834
        }
835
 
836
        skb_set_owner_w(skb, sk);
837
        return skb;
838
 
839
interrupted:
840
        err = sock_intr_errno(timeo);
841
failure:
842
        *errcode = err;
843
        return NULL;
844
}
845
 
846
struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
847
                                    int noblock, int *errcode)
848
{
849
        return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
850
}
851
 
852
void __lock_sock(struct sock *sk)
853
{
854
        DECLARE_WAITQUEUE(wait, current);
855
 
856
        add_wait_queue_exclusive(&sk->lock.wq, &wait);
857
        for(;;) {
858
                current->state = TASK_UNINTERRUPTIBLE;
859
                spin_unlock_bh(&sk->lock.slock);
860
                schedule();
861
                spin_lock_bh(&sk->lock.slock);
862
                if(!sk->lock.users)
863
                        break;
864
        }
865
        current->state = TASK_RUNNING;
866
        remove_wait_queue(&sk->lock.wq, &wait);
867
}
868
 
869
void __release_sock(struct sock *sk)
870
{
871
        struct sk_buff *skb = sk->backlog.head;
872
 
873
        do {
874
                sk->backlog.head = sk->backlog.tail = NULL;
875
                bh_unlock_sock(sk);
876
 
877
                do {
878
                        struct sk_buff *next = skb->next;
879
 
880
                        skb->next = NULL;
881
                        sk->backlog_rcv(sk, skb);
882
                        skb = next;
883
                } while (skb != NULL);
884
 
885
                bh_lock_sock(sk);
886
        } while((skb = sk->backlog.head) != NULL);
887
}
888
 
889
/*
890
 *      Generic socket manager library. Most simpler socket families
891
 *      use this to manage their socket lists. At some point we should
892
 *      hash these. By making this generic we get the lot hashed for free.
893
 *
894
 *      It is broken by design. All the protocols using it must be fixed. --ANK
895
 */
896
 
897
rwlock_t net_big_sklist_lock = RW_LOCK_UNLOCKED;
898
 
899
void sklist_remove_socket(struct sock **list, struct sock *sk)
900
{
901
        struct sock *s;
902
 
903
        write_lock_bh(&net_big_sklist_lock);
904
 
905
        while ((s = *list) != NULL) {
906
                if (s == sk) {
907
                        *list = s->next;
908
                        break;
909
                }
910
                list = &s->next;
911
        }
912
 
913
        write_unlock_bh(&net_big_sklist_lock);
914
        if (s)
915
                sock_put(s);
916
}
917
 
918
void sklist_insert_socket(struct sock **list, struct sock *sk)
919
{
920
        write_lock_bh(&net_big_sklist_lock);
921
        sk->next= *list;
922
        *list=sk;
923
        sock_hold(sk);
924
        write_unlock_bh(&net_big_sklist_lock);
925
}
926
 
927
/*
928
 *      This is only called from user mode. Thus it protects itself against
929
 *      interrupt users but doesn't worry about being called during work.
930
 *      Once it is removed from the queue no interrupt or bottom half will
931
 *      touch it and we are (fairly 8-) ) safe.
932
 */
933
 
934
void sklist_destroy_socket(struct sock **list, struct sock *sk);
935
 
936
/*
937
 *      Handler for deferred kills.
938
 */
939
 
940
static void sklist_destroy_timer(unsigned long data)
941
{
942
        struct sock *sk=(struct sock *)data;
943
        sklist_destroy_socket(NULL,sk);
944
}
945
 
946
/*
947
 *      Destroy a socket. We pass NULL for a list if we know the
948
 *      socket is not on a list.
949
 */
950
 
951
void sklist_destroy_socket(struct sock **list,struct sock *sk)
952
{
953
        if(list)
954
                sklist_remove_socket(list, sk);
955
 
956
        skb_queue_purge(&sk->receive_queue);
957
 
958
        if(atomic_read(&sk->wmem_alloc) == 0 &&
959
           atomic_read(&sk->rmem_alloc) == 0 &&
960
           sk->dead)
961
        {
962
                sock_put(sk);
963
        }
964
        else
965
        {
966
                /*
967
                 *      Someone is using our buffers still.. defer
968
                 */
969
                init_timer(&sk->timer);
970
                sk->timer.expires=jiffies+SOCK_DESTROY_TIME;
971
                sk->timer.function=sklist_destroy_timer;
972
                sk->timer.data = (unsigned long)sk;
973
                add_timer(&sk->timer);
974
        }
975
}
976
 
977
/*
978
 * Set of default routines for initialising struct proto_ops when
979
 * the protocol does not support a particular function. In certain
980
 * cases where it makes no sense for a protocol to have a "do nothing"
981
 * function, some default processing is provided.
982
 */
983
 
984
int sock_no_release(struct socket *sock)
985
{
986
        return 0;
987
}
988
 
989
int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
990
{
991
        return -EOPNOTSUPP;
992
}
993
 
994
int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
995
                    int len, int flags)
996
{
997
        return -EOPNOTSUPP;
998
}
999
 
1000
int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
1001
{
1002
        return -EOPNOTSUPP;
1003
}
1004
 
1005
int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
1006
{
1007
        return -EOPNOTSUPP;
1008
}
1009
 
1010
int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
1011
                    int *len, int peer)
1012
{
1013
        return -EOPNOTSUPP;
1014
}
1015
 
1016
unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
1017
{
1018
        return 0;
1019
}
1020
 
1021
int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1022
{
1023
        return -EOPNOTSUPP;
1024
}
1025
 
1026
int sock_no_listen(struct socket *sock, int backlog)
1027
{
1028
        return -EOPNOTSUPP;
1029
}
1030
 
1031
int sock_no_shutdown(struct socket *sock, int how)
1032
{
1033
        return -EOPNOTSUPP;
1034
}
1035
 
1036
int sock_no_setsockopt(struct socket *sock, int level, int optname,
1037
                    char *optval, int optlen)
1038
{
1039
        return -EOPNOTSUPP;
1040
}
1041
 
1042
int sock_no_getsockopt(struct socket *sock, int level, int optname,
1043
                    char *optval, int *optlen)
1044
{
1045
        return -EOPNOTSUPP;
1046
}
1047
 
1048
/*
1049
 * Note: if you add something that sleeps here then change sock_fcntl()
1050
 *       to do proper fd locking.
1051
 */
1052
int sock_no_fcntl(struct socket *sock, unsigned int cmd, unsigned long arg)
1053
{
1054
        struct sock *sk = sock->sk;
1055
 
1056
        switch(cmd)
1057
        {
1058
                case F_SETOWN:
1059
                        /*
1060
                         * This is a little restrictive, but it's the only
1061
                         * way to make sure that you can't send a sigurg to
1062
                         * another process.
1063
                         */
1064
                        if (current->pgrp != -arg &&
1065
                                current->pid != arg &&
1066
                                !capable(CAP_KILL)) return(-EPERM);
1067
                        sk->proc = arg;
1068
                        return(0);
1069
                case F_GETOWN:
1070
                        return(sk->proc);
1071
                default:
1072
                        return(-EINVAL);
1073
        }
1074
}
1075
 
1076
int sock_no_sendmsg(struct socket *sock, struct msghdr *m, int flags,
1077
                    struct scm_cookie *scm)
1078
{
1079
        return -EOPNOTSUPP;
1080
}
1081
 
1082
int sock_no_recvmsg(struct socket *sock, struct msghdr *m, int len, int flags,
1083
                    struct scm_cookie *scm)
1084
{
1085
        return -EOPNOTSUPP;
1086
}
1087
 
1088
int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1089
{
1090
        /* Mirror missing mmap method error code */
1091
        return -ENODEV;
1092
}
1093
 
1094
ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
1095
{
1096
        ssize_t res;
1097
        struct msghdr msg;
1098
        struct iovec iov;
1099
        mm_segment_t old_fs;
1100
        char *kaddr;
1101
 
1102
        kaddr = kmap(page);
1103
 
1104
        msg.msg_name = NULL;
1105
        msg.msg_namelen = 0;
1106
        msg.msg_iov = &iov;
1107
        msg.msg_iovlen = 1;
1108
        msg.msg_control = NULL;
1109
        msg.msg_controllen = 0;
1110
        msg.msg_flags = flags;
1111
 
1112
        iov.iov_base = kaddr + offset;
1113
        iov.iov_len = size;
1114
 
1115
        old_fs = get_fs();
1116
        set_fs(KERNEL_DS);
1117
        res = sock_sendmsg(sock, &msg, size);
1118
        set_fs(old_fs);
1119
 
1120
        kunmap(page);
1121
        return res;
1122
}
1123
 
1124
/*
1125
 *      Default Socket Callbacks
1126
 */
1127
 
1128
void sock_def_wakeup(struct sock *sk)
1129
{
1130
        read_lock(&sk->callback_lock);
1131
        if (sk->sleep && waitqueue_active(sk->sleep))
1132
                wake_up_interruptible_all(sk->sleep);
1133
        read_unlock(&sk->callback_lock);
1134
}
1135
 
1136
void sock_def_error_report(struct sock *sk)
1137
{
1138
        read_lock(&sk->callback_lock);
1139
        if (sk->sleep && waitqueue_active(sk->sleep))
1140
                wake_up_interruptible(sk->sleep);
1141
        sk_wake_async(sk,0,POLL_ERR);
1142
        read_unlock(&sk->callback_lock);
1143
}
1144
 
1145
void sock_def_readable(struct sock *sk, int len)
1146
{
1147
        read_lock(&sk->callback_lock);
1148
        if (sk->sleep && waitqueue_active(sk->sleep))
1149
                wake_up_interruptible(sk->sleep);
1150
        sk_wake_async(sk,1,POLL_IN);
1151
        read_unlock(&sk->callback_lock);
1152
}
1153
 
1154
void sock_def_write_space(struct sock *sk)
1155
{
1156
        read_lock(&sk->callback_lock);
1157
 
1158
        /* Do not wake up a writer until he can make "significant"
1159
         * progress.  --DaveM
1160
         */
1161
        if((atomic_read(&sk->wmem_alloc) << 1) <= sk->sndbuf) {
1162
                if (sk->sleep && waitqueue_active(sk->sleep))
1163
                        wake_up_interruptible(sk->sleep);
1164
 
1165
                /* Should agree with poll, otherwise some programs break */
1166
                if (sock_writeable(sk))
1167
                        sk_wake_async(sk, 2, POLL_OUT);
1168
        }
1169
 
1170
        read_unlock(&sk->callback_lock);
1171
}
1172
 
1173
void sock_def_destruct(struct sock *sk)
1174
{
1175
        if (sk->protinfo.destruct_hook)
1176
                kfree(sk->protinfo.destruct_hook);
1177
}
1178
 
1179
void sock_init_data(struct socket *sock, struct sock *sk)
1180
{
1181
        skb_queue_head_init(&sk->receive_queue);
1182
        skb_queue_head_init(&sk->write_queue);
1183
        skb_queue_head_init(&sk->error_queue);
1184
 
1185
        init_timer(&sk->timer);
1186
 
1187
        sk->allocation  =       GFP_KERNEL;
1188
        sk->rcvbuf      =       sysctl_rmem_default;
1189
        sk->sndbuf      =       sysctl_wmem_default;
1190
        sk->state       =       TCP_CLOSE;
1191
        sk->zapped      =       1;
1192
        sk->socket      =       sock;
1193
 
1194
        if(sock)
1195
        {
1196
                sk->type        =       sock->type;
1197
                sk->sleep       =       &sock->wait;
1198
                sock->sk        =       sk;
1199
        } else
1200
                sk->sleep       =       NULL;
1201
 
1202
        sk->dst_lock            =       RW_LOCK_UNLOCKED;
1203
        sk->callback_lock       =       RW_LOCK_UNLOCKED;
1204
 
1205
        sk->state_change        =       sock_def_wakeup;
1206
        sk->data_ready          =       sock_def_readable;
1207
        sk->write_space         =       sock_def_write_space;
1208
        sk->error_report        =       sock_def_error_report;
1209
        sk->destruct            =       sock_def_destruct;
1210
 
1211
        sk->peercred.pid        =       0;
1212
        sk->peercred.uid        =       -1;
1213
        sk->peercred.gid        =       -1;
1214
        sk->rcvlowat            =       1;
1215
        sk->rcvtimeo            =       MAX_SCHEDULE_TIMEOUT;
1216
        sk->sndtimeo            =       MAX_SCHEDULE_TIMEOUT;
1217
 
1218
        atomic_set(&sk->refcnt, 1);
1219
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.