OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [linux/] [linux-2.4/] [net/] [ipv4/] [ipvs/] [ip_vs_ctl.c] - Blame information for rev 1275

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1275 phoenix
/*
2
 * IPVS         An implementation of the IP virtual server support for the
3
 *              LINUX operating system.  IPVS is now implemented as a module
4
 *              over the NetFilter framework. IPVS can be used to build a
5
 *              high-performance and highly available server based on a
6
 *              cluster of servers.
7
 *
8
 * Version:     $Id: ip_vs_ctl.c,v 1.1.1.1 2004-04-15 01:14:05 phoenix Exp $
9
 *
10
 * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
11
 *              Peter Kese <peter.kese@ijs.si>
12
 *              Julian Anastasov <ja@ssi.bg>
13
 *
14
 *              This program is free software; you can redistribute it and/or
15
 *              modify it under the terms of the GNU General Public License
16
 *              as published by the Free Software Foundation; either version
17
 *              2 of the License, or (at your option) any later version.
18
 *
19
 * Changes:
20
 *
21
 */
22
 
23
#include <linux/module.h>
24
#include <linux/init.h>
25
#include <linux/types.h>
26
#include <linux/fs.h>
27
#include <linux/sysctl.h>
28
#include <linux/proc_fs.h>
29
#include <linux/timer.h>
30
#include <linux/swap.h>
31
#include <linux/proc_fs.h>
32
 
33
#include <linux/netfilter.h>
34
#include <linux/netfilter_ipv4.h>
35
 
36
#include <net/ip.h>
37
#include <net/sock.h>
38
 
39
#include <asm/uaccess.h>
40
 
41
#include <net/ip_vs.h>
42
 
43
/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
44
static DECLARE_MUTEX(__ip_vs_mutex);
45
 
46
/* lock for service table */
47
rwlock_t __ip_vs_svc_lock = RW_LOCK_UNLOCKED;
48
 
49
/* lock for table with the real services */
50
static rwlock_t __ip_vs_rs_lock = RW_LOCK_UNLOCKED;
51
 
52
/* lock for state and timeout tables */
53
static rwlock_t __ip_vs_securetcp_lock = RW_LOCK_UNLOCKED;
54
 
55
/* lock for drop entry handling */
56
static spinlock_t __ip_vs_dropentry_lock = SPIN_LOCK_UNLOCKED;
57
 
58
/* lock for drop packet handling */
59
static spinlock_t __ip_vs_droppacket_lock = SPIN_LOCK_UNLOCKED;
60
 
61
/* 1/rate drop and drop-entry variables */
62
int ip_vs_drop_rate = 0;
63
int ip_vs_drop_counter = 0;
64
atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
65
 
66
/* number of virtual services */
67
static int ip_vs_num_services = 0;
68
 
69
/* sysctl variables */
70
static int sysctl_ip_vs_drop_entry = 0;
71
static int sysctl_ip_vs_drop_packet = 0;
72
static int sysctl_ip_vs_secure_tcp = 0;
73
static int sysctl_ip_vs_amemthresh = 2048;
74
static int sysctl_ip_vs_am_droprate = 10;
75
int sysctl_ip_vs_cache_bypass = 0;
76
int sysctl_ip_vs_expire_nodest_conn = 0;
77
int sysctl_ip_vs_sync_threshold = 3;
78
int sysctl_ip_vs_nat_icmp_send = 0;
79
 
80
#ifdef CONFIG_IP_VS_DEBUG
81
static int sysctl_ip_vs_debug_level = 0;
82
 
83
int ip_vs_get_debug_level(void)
84
{
85
        return sysctl_ip_vs_debug_level;
86
}
87
#endif
88
 
89
/*
90
 *      update_defense_level is called from timer bh and from sysctl.
91
 */
92
static void update_defense_level(void)
93
{
94
        struct sysinfo i;
95
        int availmem;
96
        int nomem;
97
 
98
        /* we only count free and buffered memory (in pages) */
99
        si_meminfo(&i);
100
        availmem = i.freeram + i.bufferram;
101
 
102
        nomem = (availmem < sysctl_ip_vs_amemthresh);
103
 
104
        /* drop_entry */
105
        spin_lock(&__ip_vs_dropentry_lock);
106
        switch (sysctl_ip_vs_drop_entry) {
107
        case 0:
108
                atomic_set(&ip_vs_dropentry, 0);
109
                break;
110
        case 1:
111
                if (nomem) {
112
                        atomic_set(&ip_vs_dropentry, 1);
113
                        sysctl_ip_vs_drop_entry = 2;
114
                } else {
115
                        atomic_set(&ip_vs_dropentry, 0);
116
                }
117
                break;
118
        case 2:
119
                if (nomem) {
120
                        atomic_set(&ip_vs_dropentry, 1);
121
                } else {
122
                        atomic_set(&ip_vs_dropentry, 0);
123
                        sysctl_ip_vs_drop_entry = 1;
124
                };
125
                break;
126
        case 3:
127
                atomic_set(&ip_vs_dropentry, 1);
128
                break;
129
        }
130
        spin_unlock(&__ip_vs_dropentry_lock);
131
 
132
        /* drop_packet */
133
        spin_lock(&__ip_vs_droppacket_lock);
134
        switch (sysctl_ip_vs_drop_packet) {
135
        case 0:
136
                ip_vs_drop_rate = 0;
137
                break;
138
        case 1:
139
                if (nomem) {
140
                        ip_vs_drop_rate = ip_vs_drop_counter
141
                                = sysctl_ip_vs_amemthresh /
142
                                (sysctl_ip_vs_amemthresh - availmem);
143
                        sysctl_ip_vs_drop_packet = 2;
144
                } else {
145
                        ip_vs_drop_rate = 0;
146
                }
147
                break;
148
        case 2:
149
                if (nomem) {
150
                        ip_vs_drop_rate = ip_vs_drop_counter
151
                                = sysctl_ip_vs_amemthresh /
152
                                (sysctl_ip_vs_amemthresh - availmem);
153
                } else {
154
                        ip_vs_drop_rate = 0;
155
                        sysctl_ip_vs_drop_packet = 1;
156
                }
157
                break;
158
        case 3:
159
                ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
160
                break;
161
        }
162
        spin_unlock(&__ip_vs_droppacket_lock);
163
 
164
        /* secure_tcp */
165
        write_lock(&__ip_vs_securetcp_lock);
166
        switch (sysctl_ip_vs_secure_tcp) {
167
        case 0:
168
                ip_vs_secure_tcp_set(0);
169
                break;
170
        case 1:
171
                if (nomem) {
172
                        ip_vs_secure_tcp_set(1);
173
                        sysctl_ip_vs_secure_tcp = 2;
174
                } else {
175
                        ip_vs_secure_tcp_set(0);
176
                }
177
                break;
178
        case 2:
179
                if (nomem) {
180
                        ip_vs_secure_tcp_set(1);
181
                } else {
182
                        ip_vs_secure_tcp_set(0);
183
                        sysctl_ip_vs_secure_tcp = 1;
184
                }
185
                break;
186
        case 3:
187
                ip_vs_secure_tcp_set(1);
188
                break;
189
        }
190
        write_unlock(&__ip_vs_securetcp_lock);
191
}
192
 
193
 
194
/*
195
 *      Timer for checking the defense
196
 */
197
static struct timer_list defense_timer;
198
#define DEFENSE_TIMER_PERIOD    1*HZ
199
 
200
static void defense_timer_handler(unsigned long data)
201
{
202
        update_defense_level();
203
        if (atomic_read(&ip_vs_dropentry))
204
                ip_vs_random_dropentry();
205
 
206
        mod_timer(&defense_timer, jiffies + DEFENSE_TIMER_PERIOD);
207
}
208
 
209
 
210
/*
211
 *  Hash table: for virtual service lookups
212
 */
213
#define IP_VS_SVC_TAB_BITS 8
214
#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
215
#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
216
 
217
/* the service table hashed by <protocol, addr, port> */
218
static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
219
/* the service table hashed by fwmark */
220
static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
221
 
222
/*
223
 *  Hash table: for real service lookups
224
 */
225
#define IP_VS_RTAB_BITS 4
226
#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
227
#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
228
 
229
static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
230
 
231
/*
232
 * Trash for destinations
233
 */
234
static LIST_HEAD(ip_vs_dest_trash);
235
 
236
/*
237
 * FTP & NULL virtual service counters
238
 */
239
static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
240
static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
241
 
242
 
243
/*
244
 *  Returns hash value for virtual service
245
 */
246
static __inline__ unsigned
247
ip_vs_svc_hashkey(unsigned proto, __u32 addr, __u16 port)
248
{
249
        register unsigned porth = ntohs(port);
250
 
251
        return (proto^ntohl(addr)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
252
                & IP_VS_SVC_TAB_MASK;
253
}
254
 
255
/*
256
 *  Returns hash value of fwmark for virtual service lookup
257
 */
258
static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
259
{
260
        return fwmark & IP_VS_SVC_TAB_MASK;
261
}
262
 
263
/*
264
 *  Hashes ip_vs_service in the ip_vs_svc_table by <proto,addr,port>
265
 *  or in the ip_vs_svc_fwm_table by fwmark.
266
 *  Should be called with locked tables.
267
 *  Returns bool success.
268
 */
269
static int ip_vs_svc_hash(struct ip_vs_service *svc)
270
{
271
        unsigned hash;
272
 
273
        if (svc->flags & IP_VS_SVC_F_HASHED) {
274
                IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
275
                          "called from %p\n", __builtin_return_address(0));
276
                return 0;
277
        }
278
 
279
        if (svc->fwmark == 0) {
280
                /*
281
                 *  Hash it by <protocol,addr,port> in ip_vs_svc_table
282
                 */
283
                hash = ip_vs_svc_hashkey(svc->protocol, svc->addr, svc->port);
284
                list_add(&svc->s_list, &ip_vs_svc_table[hash]);
285
        } else {
286
                /*
287
                 *  Hash it by fwmark in ip_vs_svc_fwm_table
288
                 */
289
                hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
290
                list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
291
        }
292
 
293
        svc->flags |= IP_VS_SVC_F_HASHED;
294
        /* increase its refcnt because it is referenced by the svc table */
295
        atomic_inc(&svc->refcnt);
296
        return 1;
297
}
298
 
299
 
300
/*
301
 *  Unhashes ip_vs_service from ip_vs_svc_table/ip_vs_svc_fwm_table.
302
 *  Should be called with locked tables.
303
 *  Returns bool success.
304
 */
305
static int ip_vs_svc_unhash(struct ip_vs_service *svc)
306
{
307
        if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
308
                IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
309
                          "called from %p\n", __builtin_return_address(0));
310
                return 0;
311
        }
312
 
313
        if (svc->fwmark == 0) {
314
                /*
315
                 * Remove it from the ip_vs_svc_table table.
316
                 */
317
                list_del(&svc->s_list);
318
        } else {
319
                /*
320
                 * Remove it from the ip_vs_svc_fwm_table table.
321
                 */
322
                list_del(&svc->f_list);
323
        }
324
 
325
        svc->flags &= ~IP_VS_SVC_F_HASHED;
326
        atomic_dec(&svc->refcnt);
327
        return 1;
328
}
329
 
330
 
331
/*
332
 *  Get service by {proto,addr,port} in the service table.
333
 */
334
static __inline__ struct ip_vs_service *
335
__ip_vs_service_get(__u16 protocol, __u32 vaddr, __u16 vport)
336
{
337
        unsigned hash;
338
        struct ip_vs_service *svc;
339
        struct list_head *l,*e;
340
 
341
        /*
342
         *      Check for "full" addressed entries
343
         */
344
        hash = ip_vs_svc_hashkey(protocol, vaddr, vport);
345
 
346
        l = &ip_vs_svc_table[hash];
347
        for (e=l->next; e!=l; e=e->next) {
348
                svc = list_entry(e, struct ip_vs_service, s_list);
349
                if ((svc->addr == vaddr)
350
                    && (svc->port == vport)
351
                    && (svc->protocol == protocol)) {
352
                        /* HIT */
353
                        atomic_inc(&svc->usecnt);
354
                        return svc;
355
                }
356
        }
357
 
358
        return NULL;
359
}
360
 
361
 
362
/*
363
 *  Get service by {fwmark} in the service table.
364
 */
365
static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark)
366
{
367
        unsigned hash;
368
        struct ip_vs_service *svc;
369
        struct list_head *l,*e;
370
 
371
        /*
372
         *      Check for "full" addressed entries
373
         */
374
        hash = ip_vs_svc_fwm_hashkey(fwmark);
375
 
376
        l = &ip_vs_svc_fwm_table[hash];
377
        for (e=l->next; e!=l; e=e->next) {
378
                svc = list_entry(e, struct ip_vs_service, f_list);
379
                if (svc->fwmark == fwmark) {
380
                        /* HIT */
381
                        atomic_inc(&svc->usecnt);
382
                        return svc;
383
                }
384
        }
385
 
386
        return NULL;
387
}
388
 
389
struct ip_vs_service *
390
ip_vs_service_get(__u32 fwmark, __u16 protocol, __u32 vaddr, __u16 vport)
391
{
392
        struct ip_vs_service *svc;
393
 
394
        read_lock(&__ip_vs_svc_lock);
395
 
396
        /*
397
         *      Check the table hashed by fwmark first
398
         */
399
        if (fwmark && (svc = __ip_vs_svc_fwm_get(fwmark)))
400
                goto out;
401
 
402
        /*
403
         *      Check the table hashed by <protocol,addr,port>
404
         *      for "full" addressed entries
405
         */
406
        svc = __ip_vs_service_get(protocol, vaddr, vport);
407
 
408
        if (svc == NULL
409
            && protocol == IPPROTO_TCP
410
            && atomic_read(&ip_vs_ftpsvc_counter)
411
            && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
412
                /*
413
                 * Check if ftp service entry exists, the packet
414
                 * might belong to FTP data connections.
415
                 */
416
                svc = __ip_vs_service_get(protocol, vaddr, FTPPORT);
417
        }
418
 
419
        if (svc == NULL
420
            && atomic_read(&ip_vs_nullsvc_counter)) {
421
                /*
422
                 * Check if the catch-all port (port zero) exists
423
                 */
424
                svc = __ip_vs_service_get(protocol, vaddr, 0);
425
        }
426
 
427
  out:
428
        read_unlock(&__ip_vs_svc_lock);
429
 
430
        IP_VS_DBG(6, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
431
                  fwmark, ip_vs_proto_name(protocol),
432
                  NIPQUAD(vaddr), ntohs(vport),
433
                  svc?"hit":"not hit");
434
 
435
        return svc;
436
}
437
 
438
 
439
static inline void
440
__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
441
{
442
        atomic_inc(&svc->refcnt);
443
        dest->svc = svc;
444
}
445
 
446
static inline void
447
__ip_vs_unbind_svc(struct ip_vs_dest *dest)
448
{
449
        struct ip_vs_service *svc = dest->svc;
450
 
451
        dest->svc = NULL;
452
        if (atomic_dec_and_test(&svc->refcnt))
453
                kfree(svc);
454
}
455
 
456
/*
457
 *  Returns hash value for real service
458
 */
459
static __inline__ unsigned ip_vs_rs_hashkey(__u32 addr, __u16 port)
460
{
461
        register unsigned porth = ntohs(port);
462
 
463
        return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth)
464
                & IP_VS_RTAB_MASK;
465
}
466
 
467
/*
468
 *  Hashes ip_vs_dest in ip_vs_rtable by proto,addr,port.
469
 *  should be called with locked tables.
470
 *  returns bool success.
471
 */
472
static int ip_vs_rs_hash(struct ip_vs_dest *dest)
473
{
474
        unsigned hash;
475
 
476
        if (!list_empty(&dest->d_list)) {
477
                return 0;
478
        }
479
 
480
        /*
481
         *      Hash by proto,addr,port,
482
         *      which are the parameters of the real service.
483
         */
484
        hash = ip_vs_rs_hashkey(dest->addr, dest->port);
485
        list_add(&dest->d_list, &ip_vs_rtable[hash]);
486
 
487
        return 1;
488
}
489
 
490
/*
491
 *  UNhashes ip_vs_dest from ip_vs_rtable.
492
 *  should be called with locked tables.
493
 *  returns bool success.
494
 */
495
static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
496
{
497
        /*
498
         * Remove it from the ip_vs_rtable table.
499
         */
500
        if (!list_empty(&dest->d_list)) {
501
                list_del(&dest->d_list);
502
                INIT_LIST_HEAD(&dest->d_list);
503
        }
504
 
505
        return 1;
506
}
507
 
508
/*
509
 *  Lookup real service by {proto,addr,port} in the real service table.
510
 */
511
struct ip_vs_dest *
512
ip_vs_lookup_real_service(__u16 protocol, __u32 daddr, __u16 dport)
513
{
514
        unsigned hash;
515
        struct ip_vs_dest *dest;
516
        struct list_head *l,*e;
517
 
518
        /*
519
         *      Check for "full" addressed entries
520
         *      Return the first found entry
521
         */
522
        hash = ip_vs_rs_hashkey(daddr, dport);
523
 
524
        l = &ip_vs_rtable[hash];
525
 
526
        read_lock(&__ip_vs_rs_lock);
527
        for (e=l->next; e!=l; e=e->next) {
528
                dest = list_entry(e, struct ip_vs_dest, d_list);
529
                if ((dest->addr == daddr)
530
                    && (dest->port == dport)
531
                    && ((dest->protocol == protocol) ||
532
                        dest->vfwmark)) {
533
                        /* HIT */
534
                        read_unlock(&__ip_vs_rs_lock);
535
                        return dest;
536
                }
537
        }
538
        read_unlock(&__ip_vs_rs_lock);
539
 
540
        return NULL;
541
}
542
 
543
/*
544
 *  Lookup destination by {addr,port} in the given service
545
 */
546
static struct ip_vs_dest *
547
ip_vs_lookup_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport)
548
{
549
        struct ip_vs_dest *dest;
550
        struct list_head *l, *e;
551
 
552
        /*
553
         * Find the destination for the given service
554
         */
555
        l = &svc->destinations;
556
        for (e=l->next; e!=l; e=e->next) {
557
                dest = list_entry(e, struct ip_vs_dest, n_list);
558
                if ((dest->addr == daddr) && (dest->port == dport)) {
559
                        /* HIT */
560
                        return dest;
561
                }
562
        }
563
 
564
        return NULL;
565
}
566
 
567
 
568
/*
569
 *  Lookup dest by {svc,addr,port} in the destination trash.
570
 *  The destination trash is used to hold the destinations that are removed
571
 *  from the service table but are still referenced by some conn entries.
572
 *  The reason to add the destination trash is when the dest is temporary
573
 *  down (either by administrator or by monitor program), the dest can be
574
 *  picked back from the trash, the remaining connections to the dest can
575
 *  continue, and the counting information of the dest is also useful for
576
 *  scheduling.
577
 */
578
static struct ip_vs_dest *
579
ip_vs_trash_get_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport)
580
{
581
        struct ip_vs_dest *dest;
582
        struct list_head *l, *e;
583
 
584
        /*
585
         * Find the destination in trash
586
         */
587
        l = &ip_vs_dest_trash;
588
 
589
        for (e=l->next; e!=l; e=e->next) {
590
                dest = list_entry(e, struct ip_vs_dest, n_list);
591
                IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
592
                          "refcnt=%d\n",
593
                          dest->vfwmark,
594
                          NIPQUAD(dest->addr), ntohs(dest->port),
595
                          atomic_read(&dest->refcnt));
596
                if (dest->addr == daddr &&
597
                    dest->port == dport &&
598
                    dest->vfwmark == svc->fwmark &&
599
                    dest->protocol == svc->protocol &&
600
                    (svc->fwmark ||
601
                     (dest->vaddr == svc->addr &&
602
                      dest->vport == svc->port))) {
603
                        /* HIT */
604
                        return dest;
605
                }
606
 
607
                /*
608
                 * Try to purge the destination from trash if not referenced
609
                 */
610
                if (atomic_read(&dest->refcnt) == 1) {
611
                        IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u "
612
                                  "from trash\n",
613
                                  dest->vfwmark,
614
                                  NIPQUAD(dest->addr), ntohs(dest->port));
615
                        e = e->prev;
616
                        list_del(&dest->n_list);
617
                        __ip_vs_dst_reset(dest);
618
                        __ip_vs_unbind_svc(dest);
619
                        kfree(dest);
620
                }
621
        }
622
 
623
        return NULL;
624
}
625
 
626
 
627
/*
628
 *  Clean up all the destinations in the trash
629
 *  Called by the ip_vs_control_cleanup()
630
 *
631
 *  When the ip_vs_control_clearup is activated by ipvs module exit,
632
 *  the service tables must have been flushed and all the connections
633
 *  are expired, and the refcnt of each destination in the trash must
634
 *  be 1, so we simply release them here.
635
 */
636
static void ip_vs_trash_cleanup(void)
637
{
638
        struct ip_vs_dest *dest;
639
        struct list_head *l;
640
 
641
        l = &ip_vs_dest_trash;
642
 
643
        while (l->next != l) {
644
                dest = list_entry(l->next, struct ip_vs_dest, n_list);
645
                list_del(&dest->n_list);
646
                __ip_vs_dst_reset(dest);
647
                __ip_vs_unbind_svc(dest);
648
                kfree(dest);
649
        }
650
}
651
 
652
 
653
static inline void
654
__ip_vs_zero_stats(struct ip_vs_stats *stats)
655
{
656
        spin_lock_bh(&stats->lock);
657
        memset(stats, 0, (char *)&stats->lock - (char *)stats);
658
        spin_unlock_bh(&stats->lock);
659
        ip_vs_zero_estimator(stats);
660
}
661
 
662
/*
663
 *  Update a destination in the given service
664
 */
665
static void __ip_vs_update_dest(struct ip_vs_service *svc,
666
                                struct ip_vs_dest *dest,
667
                                struct ip_vs_rule_user *ur)
668
{
669
        int conn_flags;
670
 
671
        /*
672
         *    Set the weight and the flags
673
         */
674
        atomic_set(&dest->weight, ur->weight);
675
 
676
        conn_flags = ur->conn_flags | IP_VS_CONN_F_INACTIVE;
677
 
678
        /*
679
         *    Check if local node and update the flags
680
         */
681
        if (inet_addr_type(ur->daddr) == RTN_LOCAL) {
682
                conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
683
                        | IP_VS_CONN_F_LOCALNODE;
684
        }
685
 
686
        /*
687
         *    Set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading
688
         */
689
        if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
690
                conn_flags |= IP_VS_CONN_F_NOOUTPUT;
691
        } else {
692
                /*
693
                 *    Put the real service in ip_vs_rtable if not present.
694
                 *    For now only for NAT!
695
                 */
696
                write_lock_bh(&__ip_vs_rs_lock);
697
                ip_vs_rs_hash(dest);
698
                write_unlock_bh(&__ip_vs_rs_lock);
699
        }
700
        atomic_set(&dest->conn_flags, conn_flags);
701
 
702
        /* bind the service */
703
        if (!dest->svc) {
704
                __ip_vs_bind_svc(dest, svc);
705
        } else {
706
                if (dest->svc != svc) {
707
                        __ip_vs_unbind_svc(dest);
708
                        __ip_vs_zero_stats(&dest->stats);
709
                        __ip_vs_bind_svc(dest, svc);
710
                }
711
        }
712
 
713
        /* set the dest status flags */
714
        dest->flags |= IP_VS_DEST_F_AVAILABLE;
715
}
716
 
717
 
718
/*
719
 *  Create a destination for the given service
720
 */
721
static int
722
ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_rule_user *ur,
723
               struct ip_vs_dest **destp)
724
{
725
        struct ip_vs_dest *dest;
726
        unsigned atype;
727
 
728
        EnterFunction(2);
729
 
730
        atype = inet_addr_type(ur->daddr);
731
        if (atype != RTN_LOCAL && atype != RTN_UNICAST)
732
                return -EINVAL;
733
 
734
        *destp = dest = (struct ip_vs_dest*)
735
                kmalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
736
        if (dest == NULL) {
737
                IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
738
                return -ENOMEM;
739
        }
740
        memset(dest, 0, sizeof(struct ip_vs_dest));
741
 
742
        dest->protocol = svc->protocol;
743
        dest->vaddr = svc->addr;
744
        dest->vport = svc->port;
745
        dest->vfwmark = svc->fwmark;
746
        dest->addr = ur->daddr;
747
        dest->port = ur->dport;
748
 
749
        atomic_set(&dest->activeconns, 0);
750
        atomic_set(&dest->inactconns, 0);
751
        atomic_set(&dest->refcnt, 0);
752
 
753
        INIT_LIST_HEAD(&dest->d_list);
754
        dest->dst_lock = SPIN_LOCK_UNLOCKED;
755
        dest->stats.lock = SPIN_LOCK_UNLOCKED;
756
        __ip_vs_update_dest(svc, dest, ur);
757
        ip_vs_new_estimator(&dest->stats);
758
 
759
        LeaveFunction(2);
760
        return 0;
761
}
762
 
763
 
764
/*
765
 *  Add a destination into an existing service
766
 */
767
static int ip_vs_add_dest(struct ip_vs_service *svc,
768
                          struct ip_vs_rule_user *ur)
769
{
770
        struct ip_vs_dest *dest;
771
        __u32 daddr = ur->daddr;
772
        __u16 dport = ur->dport;
773
        int ret;
774
 
775
        EnterFunction(2);
776
 
777
        if (ur->weight < 0) {
778
                IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
779
                return -ERANGE;
780
        }
781
 
782
        /*
783
         * Check if the dest already exists in the list
784
         */
785
        dest = ip_vs_lookup_dest(svc, daddr, dport);
786
        if (dest != NULL) {
787
                IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
788
                return -EEXIST;
789
        }
790
 
791
        /*
792
         * Check if the dest already exists in the trash and
793
         * is from the same service
794
         */
795
        dest = ip_vs_trash_get_dest(svc, daddr, dport);
796
        if (dest != NULL) {
797
                IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
798
                          "refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
799
                          NIPQUAD(daddr), ntohs(dport),
800
                          atomic_read(&dest->refcnt),
801
                          dest->vfwmark,
802
                          NIPQUAD(dest->vaddr),
803
                          ntohs(dest->vport));
804
                __ip_vs_update_dest(svc, dest, ur);
805
 
806
                /*
807
                 * Get the destination from the trash
808
                 */
809
                list_del(&dest->n_list);
810
 
811
                ip_vs_new_estimator(&dest->stats);
812
 
813
                write_lock_bh(&__ip_vs_svc_lock);
814
 
815
                /*
816
                 * Wait until all other svc users go away.
817
                 */
818
                while (atomic_read(&svc->usecnt) > 1) {};
819
 
820
                list_add(&dest->n_list, &svc->destinations);
821
                svc->num_dests++;
822
 
823
                /* call the update_service function of its scheduler */
824
                svc->scheduler->update_service(svc);
825
 
826
                write_unlock_bh(&__ip_vs_svc_lock);
827
                return 0;
828
        }
829
 
830
        /*
831
         * Allocate and initialize the dest structure
832
         */
833
        ret = ip_vs_new_dest(svc, ur, &dest);
834
        if (ret) {
835
                return ret;
836
        }
837
 
838
        /*
839
         * Add the dest entry into the list
840
         */
841
        atomic_inc(&dest->refcnt);
842
 
843
        write_lock_bh(&__ip_vs_svc_lock);
844
 
845
        /*
846
         * Wait until all other svc users go away.
847
         */
848
        while (atomic_read(&svc->usecnt) > 1) {};
849
 
850
        list_add(&dest->n_list, &svc->destinations);
851
        svc->num_dests++;
852
 
853
        /* call the update_service function of its scheduler */
854
        svc->scheduler->update_service(svc);
855
 
856
        write_unlock_bh(&__ip_vs_svc_lock);
857
 
858
        LeaveFunction(2);
859
 
860
        return 0;
861
}
862
 
863
 
864
/*
865
 *  Edit a destination in the given service
866
 */
867
static int ip_vs_edit_dest(struct ip_vs_service *svc,
868
                           struct ip_vs_rule_user *ur)
869
{
870
        struct ip_vs_dest *dest;
871
        __u32 daddr = ur->daddr;
872
        __u16 dport = ur->dport;
873
 
874
        EnterFunction(2);
875
 
876
        if (ur->weight < 0) {
877
                IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
878
                return -ERANGE;
879
        }
880
 
881
        /*
882
         *  Lookup the destination list
883
         */
884
        dest = ip_vs_lookup_dest(svc, daddr, dport);
885
        if (dest == NULL) {
886
                IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
887
                return -ENOENT;
888
        }
889
 
890
        __ip_vs_update_dest(svc, dest, ur);
891
 
892
        write_lock_bh(&__ip_vs_svc_lock);
893
 
894
        /* Wait until all other svc users go away */
895
        while (atomic_read(&svc->usecnt) > 1) {};
896
 
897
        /* call the update_service, because server weight may be changed */
898
        svc->scheduler->update_service(svc);
899
 
900
        write_unlock_bh(&__ip_vs_svc_lock);
901
 
902
        LeaveFunction(2);
903
 
904
        return 0;
905
}
906
 
907
 
908
/*
909
 *  Delete a destination (must be already unlinked from the service)
910
 */
911
static void __ip_vs_del_dest(struct ip_vs_dest *dest)
912
{
913
        ip_vs_kill_estimator(&dest->stats);
914
 
915
        /*
916
         *  Remove it from the d-linked list with the real services.
917
         */
918
        write_lock_bh(&__ip_vs_rs_lock);
919
        ip_vs_rs_unhash(dest);
920
        write_unlock_bh(&__ip_vs_rs_lock);
921
 
922
        /*
923
         *  Decrease the refcnt of the dest, and free the dest
924
         *  if nobody refers to it (refcnt=0). Otherwise, throw
925
         *  the destination into the trash.
926
         */
927
        if (atomic_dec_and_test(&dest->refcnt)) {
928
                __ip_vs_dst_reset(dest);
929
                /* simply decrease svc->refcnt here, let the caller check
930
                   and release the service if nobody refers to it.
931
                   Only user context can release destination and service,
932
                   and only one user context can update virtual service at a
933
                   time, so the operation here is OK */
934
                atomic_dec(&dest->svc->refcnt);
935
                kfree(dest);
936
        } else {
937
                IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, refcnt=%d\n",
938
                          NIPQUAD(dest->addr), ntohs(dest->port),
939
                          atomic_read(&dest->refcnt));
940
                list_add(&dest->n_list, &ip_vs_dest_trash);
941
                atomic_inc(&dest->refcnt);
942
        }
943
}
944
 
945
 
946
/*
947
 *  Unlink a destination from the given service
948
 */
949
static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
950
                                struct ip_vs_dest *dest,
951
                                int svcupd)
952
{
953
        dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
954
 
955
        /*
956
         *  Remove it from the d-linked destination list.
957
         */
958
        list_del(&dest->n_list);
959
        svc->num_dests--;
960
        if (svcupd) {
961
                /*
962
                 *  Call the update_service function of its scheduler
963
                 */
964
                svc->scheduler->update_service(svc);
965
        }
966
}
967
 
968
 
969
/*
970
 *  Delete a destination server in the given service
971
 */
972
static int ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_rule_user *ur)
973
{
974
        struct ip_vs_dest *dest;
975
        __u32 daddr = ur->daddr;
976
        __u16 dport = ur->dport;
977
 
978
        EnterFunction(2);
979
 
980
        dest = ip_vs_lookup_dest(svc, daddr, dport);
981
        if (dest == NULL) {
982
                IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
983
                return -ENOENT;
984
        }
985
 
986
        write_lock_bh(&__ip_vs_svc_lock);
987
 
988
        /*
989
         *      Wait until all other svc users go away.
990
         */
991
        while (atomic_read(&svc->usecnt) > 1) {};
992
 
993
        /*
994
         *      Unlink dest from the service
995
         */
996
        __ip_vs_unlink_dest(svc, dest, 1);
997
 
998
        write_unlock_bh(&__ip_vs_svc_lock);
999
 
1000
        /*
1001
         *      Delete the destination
1002
         */
1003
        __ip_vs_del_dest(dest);
1004
 
1005
        LeaveFunction(2);
1006
 
1007
        return 0;
1008
}
1009
 
1010
 
1011
/*
1012
 *  Add a service into the service hash table
1013
 */
1014
static int
1015
ip_vs_add_service(struct ip_vs_rule_user *ur, struct ip_vs_service **svc_p)
1016
{
1017
        int ret = 0;
1018
        struct ip_vs_scheduler *sched;
1019
        struct ip_vs_service *svc = NULL;
1020
 
1021
        MOD_INC_USE_COUNT;
1022
 
1023
        /*
1024
         * Lookup the scheduler, by 'ur->sched_name'
1025
         */
1026
        sched = ip_vs_scheduler_get(ur->sched_name);
1027
        if (sched == NULL) {
1028
                IP_VS_INFO("Scheduler module ip_vs_%s.o not found\n",
1029
                           ur->sched_name);
1030
                ret = -ENOENT;
1031
                goto out_mod_dec;
1032
        }
1033
 
1034
        svc = (struct ip_vs_service*)
1035
                kmalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1036
        if (svc == NULL) {
1037
                IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1038
                ret = -ENOMEM;
1039
                goto out_err;
1040
        }
1041
        memset(svc, 0, sizeof(struct ip_vs_service));
1042
 
1043
        svc->protocol = ur->protocol;
1044
        svc->addr = ur->vaddr;
1045
        svc->port = ur->vport;
1046
        svc->fwmark = ur->vfwmark;
1047
        svc->flags = ur->vs_flags;
1048
        svc->timeout = ur->timeout * HZ;
1049
        svc->netmask = ur->netmask;
1050
 
1051
        INIT_LIST_HEAD(&svc->destinations);
1052
        svc->sched_lock = RW_LOCK_UNLOCKED;
1053
        svc->stats.lock = SPIN_LOCK_UNLOCKED;
1054
 
1055
        /*
1056
         *    Bind the scheduler
1057
         */
1058
        ret = ip_vs_bind_scheduler(svc, sched);
1059
        if (ret) {
1060
                goto out_err;
1061
        }
1062
 
1063
        /*
1064
         *    Update the virtual service counters
1065
         */
1066
        if (svc->port == FTPPORT)
1067
                atomic_inc(&ip_vs_ftpsvc_counter);
1068
        else if (svc->port == 0)
1069
                atomic_inc(&ip_vs_nullsvc_counter);
1070
 
1071
        /*
1072
         *    I'm the first user of the service
1073
         */
1074
        atomic_set(&svc->usecnt, 1);
1075
        atomic_set(&svc->refcnt, 0);
1076
 
1077
        ip_vs_new_estimator(&svc->stats);
1078
        ip_vs_num_services++;
1079
 
1080
        /*
1081
         *    Hash the service into the service table
1082
         */
1083
        write_lock_bh(&__ip_vs_svc_lock);
1084
        ip_vs_svc_hash(svc);
1085
        write_unlock_bh(&__ip_vs_svc_lock);
1086
 
1087
        *svc_p = svc;
1088
        return 0;
1089
 
1090
  out_err:
1091
        if (svc)
1092
                kfree(svc);
1093
        ip_vs_scheduler_put(sched);
1094
  out_mod_dec:
1095
        MOD_DEC_USE_COUNT;
1096
        return ret;
1097
}
1098
 
1099
 
1100
/*
1101
 *      Edit a service and bind it with a new scheduler
1102
 */
1103
static int ip_vs_edit_service(struct ip_vs_service *svc,
1104
                              struct ip_vs_rule_user *ur)
1105
{
1106
        struct ip_vs_scheduler *sched, *old_sched;
1107
        int ret = 0;
1108
 
1109
        /*
1110
         * Lookup the scheduler, by 'ur->sched_name'
1111
         */
1112
        sched = ip_vs_scheduler_get(ur->sched_name);
1113
        if (sched == NULL) {
1114
                IP_VS_INFO("Scheduler module ip_vs_%s.o not found\n",
1115
                           ur->sched_name);
1116
                return -ENOENT;
1117
        }
1118
 
1119
        write_lock_bh(&__ip_vs_svc_lock);
1120
 
1121
        /*
1122
         * Wait until all other svc users go away.
1123
         */
1124
        while (atomic_read(&svc->usecnt) > 1) {};
1125
 
1126
        /*
1127
         * Set the flags and timeout value
1128
         */
1129
        svc->flags = ur->vs_flags | IP_VS_SVC_F_HASHED;
1130
        svc->timeout = ur->timeout * HZ;
1131
        svc->netmask = ur->netmask;
1132
 
1133
        old_sched = svc->scheduler;
1134
        if (sched != old_sched) {
1135
                /*
1136
                 * Unbind the old scheduler
1137
                 */
1138
                if ((ret = ip_vs_unbind_scheduler(svc))) {
1139
                        old_sched = sched;
1140
                        goto out;
1141
                }
1142
 
1143
                /*
1144
                 * Bind the new scheduler
1145
                 */
1146
                if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1147
                        /*
1148
                         * If ip_vs_bind_scheduler fails, restore the old
1149
                         * scheduler.
1150
                         * The main reason of failure is out of memory.
1151
                         *
1152
                         * The question is if the old scheduler can be
1153
                         * restored all the time. TODO: if it cannot be
1154
                         * restored some time, we must delete the service,
1155
                         * otherwise the system may crash.
1156
                         */
1157
                        ip_vs_bind_scheduler(svc, old_sched);
1158
                        old_sched = sched;
1159
                }
1160
        }
1161
 
1162
  out:
1163
        write_unlock_bh(&__ip_vs_svc_lock);
1164
 
1165
        if (old_sched)
1166
                ip_vs_scheduler_put(old_sched);
1167
 
1168
        return ret;
1169
}
1170
 
1171
 
1172
/*
1173
 *  Delete a service from the service list
1174
 *  The service must be unlinked, unlocked and not referenced!
1175
 */
1176
static void __ip_vs_del_service(struct ip_vs_service *svc)
1177
{
1178
        struct list_head *l;
1179
        struct ip_vs_dest *dest;
1180
        struct ip_vs_scheduler *old_sched;
1181
 
1182
        ip_vs_num_services--;
1183
        ip_vs_kill_estimator(&svc->stats);
1184
 
1185
        /*
1186
         *    Unbind scheduler
1187
         */
1188
        old_sched = svc->scheduler;
1189
        ip_vs_unbind_scheduler(svc);
1190
        if (old_sched && old_sched->module)
1191
                __MOD_DEC_USE_COUNT(old_sched->module);
1192
 
1193
        /*
1194
         *    Unlink the whole destination list
1195
         */
1196
        l = &svc->destinations;
1197
        while (l->next != l) {
1198
                dest = list_entry(l->next, struct ip_vs_dest, n_list);
1199
                __ip_vs_unlink_dest(svc, dest, 0);
1200
                __ip_vs_del_dest(dest);
1201
        }
1202
 
1203
        /*
1204
         *    Update the virtual service counters
1205
         */
1206
        if (svc->port == FTPPORT)
1207
                atomic_dec(&ip_vs_ftpsvc_counter);
1208
        else if (svc->port == 0)
1209
                atomic_dec(&ip_vs_nullsvc_counter);
1210
 
1211
        /*
1212
         *    Free the service if nobody refers to it
1213
         */
1214
        if (atomic_read(&svc->refcnt) == 0)
1215
                kfree(svc);
1216
        MOD_DEC_USE_COUNT;
1217
}
1218
 
1219
/*
1220
 *  Delete a service from the service list
1221
 */
1222
static int ip_vs_del_service(struct ip_vs_service *svc)
1223
{
1224
        if (svc == NULL)
1225
                return -EEXIST;
1226
 
1227
        /*
1228
         * Unhash it from the service table
1229
         */
1230
        write_lock_bh(&__ip_vs_svc_lock);
1231
 
1232
        ip_vs_svc_unhash(svc);
1233
 
1234
        /*
1235
         * Wait until all the svc users go away.
1236
         */
1237
        while (atomic_read(&svc->usecnt) > 1) {};
1238
 
1239
        __ip_vs_del_service(svc);
1240
 
1241
        write_unlock_bh(&__ip_vs_svc_lock);
1242
 
1243
        return 0;
1244
}
1245
 
1246
 
1247
/*
1248
 *  Flush all the virtual services
1249
 */
1250
static int ip_vs_flush(void)
1251
{
1252
        int idx;
1253
        struct ip_vs_service *svc;
1254
        struct list_head *l;
1255
 
1256
        /*
1257
         * Flush the service table hashed by <protocol,addr,port>
1258
         */
1259
        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1260
                l = &ip_vs_svc_table[idx];
1261
                while (l->next != l) {
1262
                        svc = list_entry(l->next,struct ip_vs_service,s_list);
1263
                        write_lock_bh(&__ip_vs_svc_lock);
1264
                        ip_vs_svc_unhash(svc);
1265
                        /*
1266
                         * Wait until all the svc users go away.
1267
                         */
1268
                        while (atomic_read(&svc->usecnt) > 0) {};
1269
                        __ip_vs_del_service(svc);
1270
                        write_unlock_bh(&__ip_vs_svc_lock);
1271
                }
1272
        }
1273
 
1274
        /*
1275
         * Flush the service table hashed by fwmark
1276
         */
1277
        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1278
                l = &ip_vs_svc_fwm_table[idx];
1279
                while (l->next != l) {
1280
                        svc = list_entry(l->next,struct ip_vs_service,f_list);
1281
                        write_lock_bh(&__ip_vs_svc_lock);
1282
                        ip_vs_svc_unhash(svc);
1283
                        /*
1284
                         * Wait until all the svc users go away.
1285
                         */
1286
                        while (atomic_read(&svc->usecnt) > 0) {};
1287
                        __ip_vs_del_service(svc);
1288
                        write_unlock_bh(&__ip_vs_svc_lock);
1289
                }
1290
        }
1291
 
1292
        return 0;
1293
}
1294
 
1295
 
1296
/*
1297
 *  Zero counters in a service or all services
1298
 */
1299
static int ip_vs_zero_service(struct ip_vs_service *svc)
1300
{
1301
        struct list_head *l;
1302
        struct ip_vs_dest *dest;
1303
 
1304
        write_lock_bh(&__ip_vs_svc_lock);
1305
        list_for_each (l, &svc->destinations) {
1306
                dest = list_entry(l, struct ip_vs_dest, n_list);
1307
                __ip_vs_zero_stats(&dest->stats);
1308
        }
1309
        __ip_vs_zero_stats(&svc->stats);
1310
        write_unlock_bh(&__ip_vs_svc_lock);
1311
        return 0;
1312
}
1313
 
1314
static int ip_vs_zero_all(void)
1315
{
1316
        int idx;
1317
        struct list_head *l;
1318
        struct ip_vs_service *svc;
1319
 
1320
        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1321
                list_for_each (l, &ip_vs_svc_table[idx]) {
1322
                        svc = list_entry(l, struct ip_vs_service, s_list);
1323
                        ip_vs_zero_service(svc);
1324
                }
1325
        }
1326
 
1327
        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1328
                list_for_each (l, &ip_vs_svc_fwm_table[idx]) {
1329
                        svc = list_entry(l, struct ip_vs_service, f_list);
1330
                        ip_vs_zero_service(svc);
1331
                }
1332
        }
1333
 
1334
        __ip_vs_zero_stats(&ip_vs_stats);
1335
        return 0;
1336
}
1337
 
1338
 
1339
static int ip_vs_sysctl_defense_mode(ctl_table *ctl, int write,
1340
        struct file * filp, void *buffer, size_t *lenp)
1341
{
1342
        int *valp = ctl->data;
1343
        int val = *valp;
1344
        int ret;
1345
 
1346
        ret = proc_dointvec(ctl, write, filp, buffer, lenp);
1347
        if (write && (*valp != val)) {
1348
                if ((*valp < 0) || (*valp > 3)) {
1349
                        /* Restore the correct value */
1350
                        *valp = val;
1351
                } else {
1352
                        local_bh_disable();
1353
                        update_defense_level();
1354
                        local_bh_enable();
1355
                }
1356
        }
1357
        return ret;
1358
}
1359
 
1360
 
1361
/*
1362
 *      IPVS sysctl table
1363
 */
1364
struct ip_vs_sysctl_table {
1365
        struct ctl_table_header *sysctl_header;
1366
        ctl_table vs_vars[NET_IPV4_VS_LAST];
1367
        ctl_table vs_dir[2];
1368
        ctl_table ipv4_dir[2];
1369
        ctl_table root_dir[2];
1370
};
1371
 
1372
 
1373
static struct ip_vs_sysctl_table ipv4_vs_table = {
1374
        NULL,
1375
        {{NET_IPV4_VS_AMEMTHRESH, "amemthresh",
1376
          &sysctl_ip_vs_amemthresh, sizeof(int), 0644, NULL,
1377
          &proc_dointvec},
1378
#ifdef CONFIG_IP_VS_DEBUG
1379
         {NET_IPV4_VS_DEBUG_LEVEL, "debug_level",
1380
          &sysctl_ip_vs_debug_level, sizeof(int), 0644, NULL,
1381
          &proc_dointvec},
1382
#endif
1383
         {NET_IPV4_VS_AMDROPRATE, "am_droprate",
1384
          &sysctl_ip_vs_am_droprate, sizeof(int), 0644, NULL,
1385
          &proc_dointvec},
1386
         {NET_IPV4_VS_DROP_ENTRY, "drop_entry",
1387
          &sysctl_ip_vs_drop_entry, sizeof(int), 0644, NULL,
1388
          &ip_vs_sysctl_defense_mode},
1389
         {NET_IPV4_VS_DROP_PACKET, "drop_packet",
1390
          &sysctl_ip_vs_drop_packet, sizeof(int), 0644, NULL,
1391
          &ip_vs_sysctl_defense_mode},
1392
         {NET_IPV4_VS_SECURE_TCP, "secure_tcp",
1393
          &sysctl_ip_vs_secure_tcp, sizeof(int), 0644, NULL,
1394
          &ip_vs_sysctl_defense_mode},
1395
         {NET_IPV4_VS_TO_ES, "timeout_established",
1396
          &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1397
          sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1398
         {NET_IPV4_VS_TO_SS, "timeout_synsent",
1399
          &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1400
          sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1401
         {NET_IPV4_VS_TO_SR, "timeout_synrecv",
1402
          &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1403
          sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1404
         {NET_IPV4_VS_TO_FW, "timeout_finwait",
1405
          &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1406
          sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1407
         {NET_IPV4_VS_TO_TW, "timeout_timewait",
1408
          &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1409
          sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1410
         {NET_IPV4_VS_TO_CL, "timeout_close",
1411
          &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1412
          sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1413
         {NET_IPV4_VS_TO_CW, "timeout_closewait",
1414
          &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1415
          sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1416
         {NET_IPV4_VS_TO_LA, "timeout_lastack",
1417
          &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1418
          sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1419
         {NET_IPV4_VS_TO_LI, "timeout_listen",
1420
          &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1421
          sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1422
         {NET_IPV4_VS_TO_SA, "timeout_synack",
1423
          &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1424
          sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1425
         {NET_IPV4_VS_TO_UDP, "timeout_udp",
1426
          &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1427
          sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1428
         {NET_IPV4_VS_TO_ICMP, "timeout_icmp",
1429
          &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1430
          sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1431
         {NET_IPV4_VS_CACHE_BYPASS, "cache_bypass",
1432
          &sysctl_ip_vs_cache_bypass, sizeof(int), 0644, NULL,
1433
          &proc_dointvec},
1434
         {NET_IPV4_VS_EXPIRE_NODEST_CONN, "expire_nodest_conn",
1435
          &sysctl_ip_vs_expire_nodest_conn, sizeof(int), 0644, NULL,
1436
          &proc_dointvec},
1437
         {NET_IPV4_VS_SYNC_THRESHOLD, "sync_threshold",
1438
          &sysctl_ip_vs_sync_threshold, sizeof(int), 0644, NULL,
1439
          &proc_dointvec},
1440
         {NET_IPV4_VS_NAT_ICMP_SEND, "nat_icmp_send",
1441
          &sysctl_ip_vs_nat_icmp_send, sizeof(int), 0644, NULL,
1442
          &proc_dointvec},
1443
         {0}},
1444
        {{NET_IPV4_VS, "vs", NULL, 0, 0555, ipv4_vs_table.vs_vars},
1445
         {0}},
1446
        {{NET_IPV4, "ipv4", NULL, 0, 0555, ipv4_vs_table.vs_dir},
1447
         {0}},
1448
        {{CTL_NET, "net", NULL, 0, 0555, ipv4_vs_table.ipv4_dir},
1449
         {0}}
1450
};
1451
 
1452
 
1453
/*
1454
 *      Write the contents of the VS rule table to a PROCfs file.
1455
 *      (It is kept just for backward compatibility)
1456
 */
1457
static inline char *ip_vs_fwd_name(unsigned flags)
1458
{
1459
        char *fwd;
1460
 
1461
        switch (flags & IP_VS_CONN_F_FWD_MASK) {
1462
        case IP_VS_CONN_F_LOCALNODE:
1463
                fwd = "Local";
1464
                break;
1465
        case IP_VS_CONN_F_TUNNEL:
1466
                fwd = "Tunnel";
1467
                break;
1468
        case IP_VS_CONN_F_DROUTE:
1469
                fwd = "Route";
1470
                break;
1471
        default:
1472
                fwd = "Masq";
1473
        }
1474
        return fwd;
1475
}
1476
 
1477
static int ip_vs_get_info(char *buf, char **start, off_t offset, int length)
1478
{
1479
        int len=0;
1480
        off_t pos=0;
1481
        char temp[64], temp2[32];
1482
        int idx;
1483
        struct ip_vs_service *svc;
1484
        struct ip_vs_dest *dest;
1485
        struct list_head *l, *e, *p, *q;
1486
 
1487
        /*
1488
         * Note: since the length of the buffer is usually the multiple
1489
         * of 512, it is good to use fixed record of the divisor of 512,
1490
         * so that records won't be truncated at buffer boundary.
1491
         */
1492
        pos = 192;
1493
        if (pos > offset) {
1494
                sprintf(temp,
1495
                        "IP Virtual Server version %d.%d.%d (size=%d)",
1496
                        NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1497
                len += sprintf(buf+len, "%-63s\n", temp);
1498
                len += sprintf(buf+len, "%-63s\n",
1499
                               "Prot LocalAddress:Port Scheduler Flags");
1500
                len += sprintf(buf+len, "%-63s\n",
1501
                               "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn");
1502
        }
1503
 
1504
        read_lock_bh(&__ip_vs_svc_lock);
1505
 
1506
        /* print the service table hashed by <protocol,addr,port> */
1507
        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1508
                l = &ip_vs_svc_table[idx];
1509
                for (e=l->next; e!=l; e=e->next) {
1510
                        svc = list_entry(e, struct ip_vs_service, s_list);
1511
                        pos += 64;
1512
                        if (pos > offset) {
1513
                                if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1514
                                        sprintf(temp2, "persistent %d %08X",
1515
                                                svc->timeout,
1516
                                                ntohl(svc->netmask));
1517
                                else
1518
                                        temp2[0] = '\0';
1519
 
1520
                                sprintf(temp, "%s  %08X:%04X %s %s",
1521
                                        ip_vs_proto_name(svc->protocol),
1522
                                        ntohl(svc->addr),
1523
                                        ntohs(svc->port),
1524
                                        svc->scheduler->name, temp2);
1525
                                len += sprintf(buf+len, "%-63s\n", temp);
1526
                                if (len >= length)
1527
                                        goto done;
1528
                        }
1529
 
1530
                        p = &svc->destinations;
1531
                        for (q=p->next; q!=p; q=q->next) {
1532
                                dest = list_entry(q, struct ip_vs_dest, n_list);
1533
                                pos += 64;
1534
                                if (pos <= offset)
1535
                                        continue;
1536
                                sprintf(temp,
1537
                                        "  -> %08X:%04X      %-7s %-6d %-10d %-10d",
1538
                                        ntohl(dest->addr),
1539
                                        ntohs(dest->port),
1540
                                        ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1541
                                        atomic_read(&dest->weight),
1542
                                        atomic_read(&dest->activeconns),
1543
                                        atomic_read(&dest->inactconns));
1544
                                len += sprintf(buf+len, "%-63s\n", temp);
1545
                                if (len >= length)
1546
                                        goto done;
1547
                        }
1548
                }
1549
        }
1550
 
1551
        /* print the service table hashed by fwmark */
1552
        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1553
                l = &ip_vs_svc_fwm_table[idx];
1554
                for (e=l->next; e!=l; e=e->next) {
1555
                        svc = list_entry(e, struct ip_vs_service, f_list);
1556
                        pos += 64;
1557
                        if (pos > offset) {
1558
                                if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1559
                                        sprintf(temp2, "persistent %d %08X",
1560
                                                svc->timeout,
1561
                                                ntohl(svc->netmask));
1562
                                else
1563
                                        temp2[0] = '\0';
1564
 
1565
                                sprintf(temp, "FWM  %08X %s %s",
1566
                                        svc->fwmark,
1567
                                        svc->scheduler->name, temp2);
1568
                                len += sprintf(buf+len, "%-63s\n", temp);
1569
                                if (len >= length)
1570
                                        goto done;
1571
                        }
1572
 
1573
                        p = &svc->destinations;
1574
                        for (q=p->next; q!=p; q=q->next) {
1575
                                dest = list_entry(q, struct ip_vs_dest, n_list);
1576
                                pos += 64;
1577
                                if (pos <= offset)
1578
                                        continue;
1579
                                sprintf(temp,
1580
                                        "  -> %08X:%04X      %-7s %-6d %-10d %-10d",
1581
                                        ntohl(dest->addr),
1582
                                        ntohs(dest->port),
1583
                                        ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1584
                                        atomic_read(&dest->weight),
1585
                                        atomic_read(&dest->activeconns),
1586
                                        atomic_read(&dest->inactconns));
1587
                                len += sprintf(buf+len, "%-63s\n", temp);
1588
                                if (len >= length)
1589
                                        goto done;
1590
                        }
1591
                }
1592
        }
1593
 
1594
  done:
1595
        read_unlock_bh(&__ip_vs_svc_lock);
1596
 
1597
        *start = buf+len-(pos-offset);          /* Start of wanted data */
1598
        len = pos-offset;
1599
        if (len > length)
1600
                len = length;
1601
        if (len < 0)
1602
                len = 0;
1603
        return len;
1604
}
1605
 
1606
 
1607
struct ip_vs_stats ip_vs_stats;
1608
 
1609
static int
1610
ip_vs_stats_get_info(char *buf, char **start, off_t offset, int length)
1611
{
1612
        int len=0;
1613
        off_t pos=0;
1614
        char temp[64];
1615
 
1616
        pos += 320;
1617
        if (pos > offset) {
1618
                len += sprintf(buf+len, "%-63s\n%-63s\n",
1619
/*                              01234567 01234567 01234567 0123456701234567 0123456701234567 */
1620
                               "   Total Incoming Outgoing         Incoming         Outgoing",
1621
                               "   Conns  Packets  Packets            Bytes            Bytes");
1622
 
1623
                spin_lock_bh(&ip_vs_stats.lock);
1624
                sprintf(temp, "%8X %8X %8X %8X%08X %8X%08X",
1625
                        ip_vs_stats.conns,
1626
                        ip_vs_stats.inpkts,
1627
                        ip_vs_stats.outpkts,
1628
                        (__u32)(ip_vs_stats.inbytes>>32),
1629
                        (__u32)ip_vs_stats.inbytes,
1630
                        (__u32)(ip_vs_stats.outbytes>>32),
1631
                        (__u32)ip_vs_stats.outbytes);
1632
                len += sprintf(buf+len, "%-62s\n\n", temp);
1633
 
1634
                len += sprintf(buf+len, "%-63s\n",
1635
/*                              01234567 01234567 01234567 0123456701234567 0123456701234567 */
1636
                               " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s");
1637
                sprintf(temp, "%8X %8X %8X %16X %16X",
1638
                        ip_vs_stats.cps,
1639
                        ip_vs_stats.inpps,
1640
                        ip_vs_stats.outpps,
1641
                        ip_vs_stats.inbps,
1642
                        ip_vs_stats.outbps);
1643
                len += sprintf(buf+len, "%-63s\n", temp);
1644
 
1645
                spin_unlock_bh(&ip_vs_stats.lock);
1646
        }
1647
 
1648
        *start = buf+len-(pos-offset);          /* Start of wanted data */
1649
        len = pos-offset;
1650
        if (len > length)
1651
                len = length;
1652
        if (len < 0)
1653
                len = 0;
1654
        return len;
1655
}
1656
 
1657
 
1658
/*
1659
 * Set timeout values for tcp tcpfin udp in the vs_timeout_table.
1660
 */
1661
static int ip_vs_set_timeouts(struct ip_vs_rule_user *u)
1662
{
1663
        IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1664
                  u->tcp_timeout,
1665
                  u->tcp_fin_timeout,
1666
                  u->udp_timeout);
1667
 
1668
        if (u->tcp_timeout) {
1669
                vs_timeout_table.timeout[IP_VS_S_ESTABLISHED]
1670
                        = u->tcp_timeout * HZ;
1671
        }
1672
 
1673
        if (u->tcp_fin_timeout) {
1674
                vs_timeout_table.timeout[IP_VS_S_FIN_WAIT]
1675
                        = u->tcp_fin_timeout * HZ;
1676
        }
1677
 
1678
        if (u->udp_timeout) {
1679
                vs_timeout_table.timeout[IP_VS_S_UDP]
1680
                        = u->udp_timeout * HZ;
1681
        }
1682
        return 0;
1683
}
1684
 
1685
 
1686
static int
1687
do_ip_vs_set_ctl(struct sock *sk, int cmd, void *user, unsigned int len)
1688
{
1689
        int ret;
1690
        struct ip_vs_rule_user *urule;
1691
        struct ip_vs_service *svc = NULL;
1692
 
1693
        if (!capable(CAP_NET_ADMIN))
1694
                return -EPERM;
1695
 
1696
        /*
1697
         * Check the size of mm, no overflow...
1698
         * len > 128000 is a sanity check.
1699
         */
1700
        if (len < sizeof(struct ip_vs_rule_user)) {
1701
                IP_VS_ERR("set_ctl: len %u < %Zu\n",
1702
                          len, sizeof(struct ip_vs_rule_user));
1703
                return -EINVAL;
1704
        } else if (len > 128000) {
1705
                IP_VS_ERR("set_ctl: len %u > 128000\n", len);
1706
                return -EINVAL;
1707
        } else if ((urule = kmalloc(len, GFP_KERNEL)) == NULL) {
1708
                IP_VS_ERR("set_ctl: no mem for len %u\n", len);
1709
                return -ENOMEM;
1710
        } else if (copy_from_user(urule, user, len) != 0) {
1711
                ret = -EFAULT;
1712
                goto out_free;
1713
        }
1714
 
1715
        MOD_INC_USE_COUNT;
1716
        if (down_interruptible(&__ip_vs_mutex)) {
1717
                ret = -ERESTARTSYS;
1718
                goto out_dec;
1719
        }
1720
 
1721
        if (cmd == IP_VS_SO_SET_FLUSH) {
1722
                /* Flush the virtual service */
1723
                ret = ip_vs_flush();
1724
                goto out_unlock;
1725
        } else if (cmd == IP_VS_SO_SET_TIMEOUTS) {
1726
                /* Set timeout values for (tcp tcpfin udp) */
1727
                ret = ip_vs_set_timeouts(urule);
1728
                goto out_unlock;
1729
        } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
1730
                ret = start_sync_thread(urule->state, urule->mcast_ifn);
1731
                goto out_unlock;
1732
        } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
1733
                ret = stop_sync_thread();
1734
                goto out_unlock;
1735
        } else if (cmd == IP_VS_SO_SET_ZERO) {
1736
                /* if no service address is set, zero counters in all */
1737
                if (!urule->vfwmark && !urule->vaddr && !urule->vport) {
1738
                        ret = ip_vs_zero_all();
1739
                        goto out_unlock;
1740
                }
1741
        }
1742
 
1743
        /*
1744
         * Check for valid protocol: TCP or UDP. Even for fwmark!=0
1745
         */
1746
        if (urule->protocol!=IPPROTO_TCP && urule->protocol!=IPPROTO_UDP) {
1747
                IP_VS_ERR("set_ctl: invalid protocol %d %d.%d.%d.%d:%d %s\n",
1748
                          urule->protocol, NIPQUAD(urule->vaddr),
1749
                          ntohs(urule->vport), urule->sched_name);
1750
                ret = -EFAULT;
1751
                goto out_unlock;
1752
        }
1753
 
1754
        /*
1755
         * Lookup the exact service by <protocol, vaddr, vport> or fwmark
1756
         */
1757
        if (urule->vfwmark == 0)
1758
                svc = __ip_vs_service_get(urule->protocol,
1759
                                          urule->vaddr, urule->vport);
1760
        else
1761
                svc = __ip_vs_svc_fwm_get(urule->vfwmark);
1762
 
1763
        if (cmd != IP_VS_SO_SET_ADD
1764
            && (svc == NULL || svc->protocol != urule->protocol)) {
1765
                ret = -ESRCH;
1766
                goto out_unlock;
1767
        }
1768
 
1769
        switch (cmd) {
1770
        case IP_VS_SO_SET_ADD:
1771
                if (svc != NULL)
1772
                        ret = -EEXIST;
1773
                else
1774
                        ret = ip_vs_add_service(urule, &svc);
1775
                break;
1776
        case IP_VS_SO_SET_EDIT:
1777
                ret = ip_vs_edit_service(svc, urule);
1778
                break;
1779
        case IP_VS_SO_SET_DEL:
1780
                ret = ip_vs_del_service(svc);
1781
                if (!ret)
1782
                        goto out_unlock;
1783
                break;
1784
        case IP_VS_SO_SET_ADDDEST:
1785
                ret = ip_vs_add_dest(svc, urule);
1786
                break;
1787
        case IP_VS_SO_SET_EDITDEST:
1788
                ret = ip_vs_edit_dest(svc, urule);
1789
                break;
1790
        case IP_VS_SO_SET_DELDEST:
1791
                ret = ip_vs_del_dest(svc, urule);
1792
                break;
1793
        case IP_VS_SO_SET_ZERO:
1794
                ret = ip_vs_zero_service(svc);
1795
                break;
1796
        default:
1797
                ret = -EINVAL;
1798
        }
1799
 
1800
        if (svc)
1801
                ip_vs_service_put(svc);
1802
 
1803
  out_unlock:
1804
        up(&__ip_vs_mutex);
1805
  out_dec:
1806
        MOD_DEC_USE_COUNT;
1807
  out_free:
1808
        kfree(urule);
1809
        return ret;
1810
}
1811
 
1812
 
1813
static inline void
1814
__ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
1815
{
1816
        spin_lock_bh(&src->lock);
1817
        memcpy(dst, src, (char*)&src->lock - (char*)src);
1818
        spin_unlock_bh(&src->lock);
1819
}
1820
 
1821
static inline int
1822
__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
1823
                            struct ip_vs_get_services *uptr)
1824
{
1825
        int idx, count=0;
1826
        struct ip_vs_service *svc;
1827
        struct list_head *l;
1828
        struct ip_vs_service_user entry;
1829
        int ret = 0;
1830
 
1831
        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1832
                list_for_each (l, &ip_vs_svc_table[idx]) {
1833
                        if (count >= get->num_services)
1834
                                goto out;
1835
                        svc = list_entry(l, struct ip_vs_service, s_list);
1836
                        entry.protocol = svc->protocol;
1837
                        entry.addr = svc->addr;
1838
                        entry.port = svc->port;
1839
                        entry.fwmark = svc->fwmark;
1840
                        strcpy(entry.sched_name, svc->scheduler->name);
1841
                        entry.flags = svc->flags;
1842
                        entry.timeout = svc->timeout / HZ;
1843
                        entry.netmask = svc->netmask;
1844
                        entry.num_dests = svc->num_dests;
1845
                        __ip_vs_copy_stats(&entry.stats, &svc->stats);
1846
                        if (copy_to_user(&uptr->entrytable[count],
1847
                                         &entry, sizeof(entry))) {
1848
                                ret = -EFAULT;
1849
                                goto out;
1850
                        }
1851
                        count++;
1852
                }
1853
        }
1854
 
1855
        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1856
                list_for_each (l, &ip_vs_svc_fwm_table[idx]) {
1857
                        if (count >= get->num_services)
1858
                                goto out;
1859
                        svc = list_entry(l, struct ip_vs_service, f_list);
1860
                        entry.protocol = svc->protocol;
1861
                        entry.addr = svc->addr;
1862
                        entry.port = svc->port;
1863
                        entry.fwmark = svc->fwmark;
1864
                        strcpy(entry.sched_name, svc->scheduler->name);
1865
                        entry.flags = svc->flags;
1866
                        entry.timeout = svc->timeout / HZ;
1867
                        entry.netmask = svc->netmask;
1868
                        entry.num_dests = svc->num_dests;
1869
                        __ip_vs_copy_stats(&entry.stats, &svc->stats);
1870
                        if (copy_to_user(&uptr->entrytable[count],
1871
                                         &entry, sizeof(entry))) {
1872
                                ret = -EFAULT;
1873
                                goto out;
1874
                        }
1875
                        count++;
1876
                }
1877
        }
1878
 out:
1879
        return ret;
1880
}
1881
 
1882
static inline int
1883
__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
1884
                         struct ip_vs_get_dests *uptr)
1885
{
1886
        struct ip_vs_service *svc;
1887
        int ret = 0;
1888
 
1889
        if (get->fwmark)
1890
                svc = __ip_vs_svc_fwm_get(get->fwmark);
1891
        else
1892
                svc = __ip_vs_service_get(get->protocol,
1893
                                          get->addr, get->port);
1894
        if (svc) {
1895
                int count = 0;
1896
                struct ip_vs_dest *dest;
1897
                struct list_head *l, *e;
1898
                struct ip_vs_dest_user entry;
1899
 
1900
                l = &svc->destinations;
1901
                for (e=l->next; e!=l; e=e->next) {
1902
                        if (count >= get->num_dests)
1903
                                break;
1904
                        dest = list_entry(e, struct ip_vs_dest, n_list);
1905
                        entry.addr = dest->addr;
1906
                        entry.port = dest->port;
1907
                        entry.flags = atomic_read(&dest->conn_flags);
1908
                        entry.weight = atomic_read(&dest->weight);
1909
                        entry.activeconns = atomic_read(&dest->activeconns);
1910
                        entry.inactconns = atomic_read(&dest->inactconns);
1911
                        __ip_vs_copy_stats(&entry.stats, &dest->stats);
1912
                        if (copy_to_user(&uptr->entrytable[count],
1913
                                         &entry, sizeof(entry))) {
1914
                                ret = -EFAULT;
1915
                                break;
1916
                        }
1917
                        count++;
1918
                }
1919
                ip_vs_service_put(svc);
1920
        } else
1921
                ret = -ESRCH;
1922
        return ret;
1923
}
1924
 
1925
static inline void
1926
__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
1927
{
1928
        u->tcp_timeout = vs_timeout_table.timeout[IP_VS_S_ESTABLISHED] / HZ;
1929
        u->tcp_fin_timeout = vs_timeout_table.timeout[IP_VS_S_FIN_WAIT] / HZ;
1930
        u->udp_timeout = vs_timeout_table.timeout[IP_VS_S_UDP] / HZ;
1931
}
1932
 
1933
static int
1934
do_ip_vs_get_ctl(struct sock *sk, int cmd, void *user, int *len)
1935
{
1936
        int ret = 0;
1937
 
1938
        if (!capable(CAP_NET_ADMIN))
1939
                return -EPERM;
1940
 
1941
        if (down_interruptible(&__ip_vs_mutex))
1942
                return -ERESTARTSYS;
1943
 
1944
        switch (cmd) {
1945
        case IP_VS_SO_GET_VERSION:
1946
        {
1947
                char buf[64];
1948
 
1949
                sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
1950
                        NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1951
                if (*len < strlen(buf)+1) {
1952
                        ret = -EINVAL;
1953
                        goto out;
1954
                }
1955
                if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
1956
                        ret = -EFAULT;
1957
                        goto out;
1958
                }
1959
                *len = strlen(buf)+1;
1960
        }
1961
        break;
1962
 
1963
        case IP_VS_SO_GET_INFO:
1964
        {
1965
                struct ip_vs_getinfo info;
1966
                info.version = IP_VS_VERSION_CODE;
1967
                info.size = IP_VS_CONN_TAB_SIZE;
1968
                info.num_services = ip_vs_num_services;
1969
                if (copy_to_user(user, &info, sizeof(info)) != 0)
1970
                        ret = -EFAULT;
1971
        }
1972
        break;
1973
 
1974
        case IP_VS_SO_GET_SERVICES:
1975
        {
1976
                struct ip_vs_get_services get;
1977
 
1978
                if (*len < sizeof(get)) {
1979
                        IP_VS_ERR("length: %u < %Zu\n", *len, sizeof(get));
1980
                        ret = -EINVAL;
1981
                        goto out;
1982
                }
1983
                if (copy_from_user(&get, user, sizeof(get))) {
1984
                        ret = -EFAULT;
1985
                        goto out;
1986
                }
1987
                if (*len != (sizeof(get)+sizeof(struct ip_vs_service_user)*get.num_services)) {
1988
                        IP_VS_ERR("length: %u != %Zu\n", *len,
1989
                                  sizeof(get)+sizeof(struct ip_vs_service_user)*get.num_services);
1990
                        ret = -EINVAL;
1991
                        goto out;
1992
                }
1993
                ret = __ip_vs_get_service_entries(&get, user);
1994
        }
1995
        break;
1996
 
1997
        case IP_VS_SO_GET_SERVICE:
1998
        {
1999
                struct ip_vs_service_user get;
2000
                struct ip_vs_service *svc;
2001
 
2002
                if (*len != sizeof(get)) {
2003
                        IP_VS_ERR("length: %u != %Zu\n", *len, sizeof(get));
2004
                        ret = -EINVAL;
2005
                        goto out;
2006
                }
2007
                if (copy_from_user(&get, user, sizeof(get))) {
2008
                        ret = -EFAULT;
2009
                        goto out;
2010
                }
2011
 
2012
                if (get.fwmark)
2013
                        svc = __ip_vs_svc_fwm_get(get.fwmark);
2014
                else
2015
                        svc = __ip_vs_service_get(get.protocol,
2016
                                                  get.addr, get.port);
2017
                if (svc) {
2018
                        strcpy(get.sched_name, svc->scheduler->name);
2019
                        get.flags = svc->flags;
2020
                        get.timeout = svc->timeout / HZ;
2021
                        get.netmask = svc->netmask;
2022
                        get.num_dests = svc->num_dests;
2023
                        __ip_vs_copy_stats(&get.stats, &svc->stats);
2024
                        if (copy_to_user(user, &get, *len) != 0)
2025
                                ret = -EFAULT;
2026
                        ip_vs_service_put(svc);
2027
                } else
2028
                        ret = -ESRCH;
2029
        }
2030
        break;
2031
 
2032
        case IP_VS_SO_GET_DESTS:
2033
        {
2034
                struct ip_vs_get_dests get;
2035
 
2036
                if (*len < sizeof(get)) {
2037
                        IP_VS_ERR("length: %u < %Zu\n", *len, sizeof(get));
2038
                        ret = -EINVAL;
2039
                        goto out;
2040
                }
2041
                if (copy_from_user(&get, user, sizeof(get))) {
2042
                        ret = -EFAULT;
2043
                        goto out;
2044
                }
2045
                if (*len != (sizeof(get) +
2046
                             sizeof(struct ip_vs_dest_user)*get.num_dests)) {
2047
                        IP_VS_ERR("length: %u != %Zu\n", *len,
2048
                                  sizeof(get)+sizeof(struct ip_vs_dest_user)*get.num_dests);
2049
                        ret = -EINVAL;
2050
                        goto out;
2051
                }
2052
                ret = __ip_vs_get_dest_entries(&get, user);
2053
        }
2054
        break;
2055
 
2056
        case IP_VS_SO_GET_TIMEOUTS:
2057
        {
2058
                struct ip_vs_timeout_user u;
2059
 
2060
                if (*len < sizeof(u)) {
2061
                        IP_VS_ERR("length: %u < %Zu\n", *len, sizeof(u));
2062
                        ret = -EINVAL;
2063
                        goto out;
2064
                }
2065
                __ip_vs_get_timeouts(&u);
2066
                if (copy_to_user(user, &u, sizeof(u)) != 0)
2067
                        ret = -EFAULT;
2068
        }
2069
        break;
2070
 
2071
        case IP_VS_SO_GET_DAEMON:
2072
        {
2073
                struct ip_vs_daemon_user u;
2074
 
2075
                if (*len < sizeof(u)) {
2076
                        IP_VS_ERR("length: %u < %Zu\n", *len, sizeof(u));
2077
                        ret = -EINVAL;
2078
                        goto out;
2079
                }
2080
                u.state = ip_vs_sync_state;
2081
                strcpy(u.mcast_ifn, ip_vs_mcast_ifn);
2082
                if (copy_to_user(user, &u, sizeof(u)) != 0)
2083
                        ret = -EFAULT;
2084
        }
2085
        break;
2086
 
2087
        default:
2088
                ret = -EINVAL;
2089
        }
2090
 
2091
  out:
2092
        up(&__ip_vs_mutex);
2093
        return ret;
2094
}
2095
 
2096
 
2097
static struct nf_sockopt_ops ip_vs_sockopts = {
2098
        { NULL, NULL }, PF_INET,
2099
        IP_VS_BASE_CTL, IP_VS_SO_SET_MAX+1, do_ip_vs_set_ctl,
2100
        IP_VS_BASE_CTL, IP_VS_SO_GET_MAX+1, do_ip_vs_get_ctl
2101
};
2102
 
2103
 
2104
int ip_vs_control_init(void)
2105
{
2106
        int ret;
2107
        int idx;
2108
 
2109
        EnterFunction(2);
2110
 
2111
        ret = nf_register_sockopt(&ip_vs_sockopts);
2112
        if (ret) {
2113
                IP_VS_ERR("cannot register sockopt.\n");
2114
                return ret;
2115
        }
2116
 
2117
        proc_net_create("ip_vs", 0, ip_vs_get_info);
2118
        proc_net_create("ip_vs_stats", 0, ip_vs_stats_get_info);
2119
 
2120
        ipv4_vs_table.sysctl_header =
2121
                register_sysctl_table(ipv4_vs_table.root_dir, 0);
2122
        /*
2123
         * Initilize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable,
2124
         * ip_vs_schedulers.
2125
         */
2126
        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
2127
                INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
2128
                INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
2129
        }
2130
        for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++)  {
2131
                INIT_LIST_HEAD(&ip_vs_rtable[idx]);
2132
        }
2133
 
2134
        memset(&ip_vs_stats, 0, sizeof(ip_vs_stats));
2135
        ip_vs_stats.lock = SPIN_LOCK_UNLOCKED;
2136
        ip_vs_new_estimator(&ip_vs_stats);
2137
 
2138
        /* Hook the defense timer */
2139
        init_timer(&defense_timer);
2140
        defense_timer.function = defense_timer_handler;
2141
        defense_timer.expires = jiffies + DEFENSE_TIMER_PERIOD;
2142
        add_timer(&defense_timer);
2143
 
2144
        LeaveFunction(2);
2145
        return 0;
2146
}
2147
 
2148
void ip_vs_control_cleanup(void)
2149
{
2150
        EnterFunction(2);
2151
        ip_vs_trash_cleanup();
2152
        del_timer_sync(&defense_timer);
2153
        ip_vs_kill_estimator(&ip_vs_stats);
2154
        unregister_sysctl_table(ipv4_vs_table.sysctl_header);
2155
        proc_net_remove("ip_vs_stats");
2156
        proc_net_remove("ip_vs");
2157
        nf_unregister_sockopt(&ip_vs_sockopts);
2158
        LeaveFunction(2);
2159
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.