OpenCores
URL https://opencores.org/ocsvn/or1k_soc_on_altera_embedded_dev_kit/or1k_soc_on_altera_embedded_dev_kit/trunk

Subversion Repositories or1k_soc_on_altera_embedded_dev_kit

[/] [or1k_soc_on_altera_embedded_dev_kit/] [trunk/] [linux-2.6/] [linux-2.6.24/] [net/] [ipv4/] [ipvs/] [ip_vs_xmit.c] - Blame information for rev 3

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 3 xianfeng
/*
2
 * ip_vs_xmit.c: various packet transmitters for IPVS
3
 *
4
 * Version:     $Id: ip_vs_xmit.c,v 1.2 2002/11/30 01:50:35 wensong Exp $
5
 *
6
 * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
7
 *              Julian Anastasov <ja@ssi.bg>
8
 *
9
 *              This program is free software; you can redistribute it and/or
10
 *              modify it under the terms of the GNU General Public License
11
 *              as published by the Free Software Foundation; either version
12
 *              2 of the License, or (at your option) any later version.
13
 *
14
 * Changes:
15
 *
16
 */
17
 
18
#include <linux/kernel.h>
19
#include <linux/ip.h>
20
#include <linux/tcp.h>                  /* for tcphdr */
21
#include <net/tcp.h>                    /* for csum_tcpudp_magic */
22
#include <net/udp.h>
23
#include <net/icmp.h>                   /* for icmp_send */
24
#include <net/route.h>                  /* for ip_route_output */
25
#include <linux/netfilter.h>
26
#include <linux/netfilter_ipv4.h>
27
 
28
#include <net/ip_vs.h>
29
 
30
 
31
/*
32
 *      Destination cache to speed up outgoing route lookup
33
 */
34
static inline void
35
__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst)
36
{
37
        struct dst_entry *old_dst;
38
 
39
        old_dst = dest->dst_cache;
40
        dest->dst_cache = dst;
41
        dest->dst_rtos = rtos;
42
        dst_release(old_dst);
43
}
44
 
45
static inline struct dst_entry *
46
__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie)
47
{
48
        struct dst_entry *dst = dest->dst_cache;
49
 
50
        if (!dst)
51
                return NULL;
52
        if ((dst->obsolete || rtos != dest->dst_rtos) &&
53
            dst->ops->check(dst, cookie) == NULL) {
54
                dest->dst_cache = NULL;
55
                dst_release(dst);
56
                return NULL;
57
        }
58
        dst_hold(dst);
59
        return dst;
60
}
61
 
62
static inline struct rtable *
63
__ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
64
{
65
        struct rtable *rt;                      /* Route to the other host */
66
        struct ip_vs_dest *dest = cp->dest;
67
 
68
        if (dest) {
69
                spin_lock(&dest->dst_lock);
70
                if (!(rt = (struct rtable *)
71
                      __ip_vs_dst_check(dest, rtos, 0))) {
72
                        struct flowi fl = {
73
                                .oif = 0,
74
                                .nl_u = {
75
                                        .ip4_u = {
76
                                                .daddr = dest->addr,
77
                                                .saddr = 0,
78
                                                .tos = rtos, } },
79
                        };
80
 
81
                        if (ip_route_output_key(&rt, &fl)) {
82
                                spin_unlock(&dest->dst_lock);
83
                                IP_VS_DBG_RL("ip_route_output error, "
84
                                             "dest: %u.%u.%u.%u\n",
85
                                             NIPQUAD(dest->addr));
86
                                return NULL;
87
                        }
88
                        __ip_vs_dst_set(dest, rtos, dst_clone(&rt->u.dst));
89
                        IP_VS_DBG(10, "new dst %u.%u.%u.%u, refcnt=%d, rtos=%X\n",
90
                                  NIPQUAD(dest->addr),
91
                                  atomic_read(&rt->u.dst.__refcnt), rtos);
92
                }
93
                spin_unlock(&dest->dst_lock);
94
        } else {
95
                struct flowi fl = {
96
                        .oif = 0,
97
                        .nl_u = {
98
                                .ip4_u = {
99
                                        .daddr = cp->daddr,
100
                                        .saddr = 0,
101
                                        .tos = rtos, } },
102
                };
103
 
104
                if (ip_route_output_key(&rt, &fl)) {
105
                        IP_VS_DBG_RL("ip_route_output error, dest: "
106
                                     "%u.%u.%u.%u\n", NIPQUAD(cp->daddr));
107
                        return NULL;
108
                }
109
        }
110
 
111
        return rt;
112
}
113
 
114
 
115
/*
116
 *      Release dest->dst_cache before a dest is removed
117
 */
118
void
119
ip_vs_dst_reset(struct ip_vs_dest *dest)
120
{
121
        struct dst_entry *old_dst;
122
 
123
        old_dst = dest->dst_cache;
124
        dest->dst_cache = NULL;
125
        dst_release(old_dst);
126
}
127
 
128
#define IP_VS_XMIT(skb, rt)                             \
129
do {                                                    \
130
        (skb)->ipvs_property = 1;                       \
131
        skb_forward_csum(skb);                          \
132
        NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, (skb), NULL,  \
133
                (rt)->u.dst.dev, dst_output);           \
134
} while (0)
135
 
136
 
137
/*
138
 *      NULL transmitter (do nothing except return NF_ACCEPT)
139
 */
140
int
141
ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
142
                struct ip_vs_protocol *pp)
143
{
144
        /* we do not touch skb and do not need pskb ptr */
145
        return NF_ACCEPT;
146
}
147
 
148
 
149
/*
150
 *      Bypass transmitter
151
 *      Let packets bypass the destination when the destination is not
152
 *      available, it may be only used in transparent cache cluster.
153
 */
154
int
155
ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
156
                  struct ip_vs_protocol *pp)
157
{
158
        struct rtable *rt;                      /* Route to the other host */
159
        struct iphdr  *iph = ip_hdr(skb);
160
        u8     tos = iph->tos;
161
        int    mtu;
162
        struct flowi fl = {
163
                .oif = 0,
164
                .nl_u = {
165
                        .ip4_u = {
166
                                .daddr = iph->daddr,
167
                                .saddr = 0,
168
                                .tos = RT_TOS(tos), } },
169
        };
170
 
171
        EnterFunction(10);
172
 
173
        if (ip_route_output_key(&rt, &fl)) {
174
                IP_VS_DBG_RL("ip_vs_bypass_xmit(): ip_route_output error, "
175
                             "dest: %u.%u.%u.%u\n", NIPQUAD(iph->daddr));
176
                goto tx_error_icmp;
177
        }
178
 
179
        /* MTU checking */
180
        mtu = dst_mtu(&rt->u.dst);
181
        if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
182
                ip_rt_put(rt);
183
                icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
184
                IP_VS_DBG_RL("ip_vs_bypass_xmit(): frag needed\n");
185
                goto tx_error;
186
        }
187
 
188
        /*
189
         * Call ip_send_check because we are not sure it is called
190
         * after ip_defrag. Is copy-on-write needed?
191
         */
192
        if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
193
                ip_rt_put(rt);
194
                return NF_STOLEN;
195
        }
196
        ip_send_check(ip_hdr(skb));
197
 
198
        /* drop old route */
199
        dst_release(skb->dst);
200
        skb->dst = &rt->u.dst;
201
 
202
        /* Another hack: avoid icmp_send in ip_fragment */
203
        skb->local_df = 1;
204
 
205
        IP_VS_XMIT(skb, rt);
206
 
207
        LeaveFunction(10);
208
        return NF_STOLEN;
209
 
210
 tx_error_icmp:
211
        dst_link_failure(skb);
212
 tx_error:
213
        kfree_skb(skb);
214
        LeaveFunction(10);
215
        return NF_STOLEN;
216
}
217
 
218
 
219
/*
220
 *      NAT transmitter (only for outside-to-inside nat forwarding)
221
 *      Not used for related ICMP
222
 */
223
int
224
ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
225
               struct ip_vs_protocol *pp)
226
{
227
        struct rtable *rt;              /* Route to the other host */
228
        int mtu;
229
        struct iphdr *iph = ip_hdr(skb);
230
 
231
        EnterFunction(10);
232
 
233
        /* check if it is a connection of no-client-port */
234
        if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
235
                __be16 _pt, *p;
236
                p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
237
                if (p == NULL)
238
                        goto tx_error;
239
                ip_vs_conn_fill_cport(cp, *p);
240
                IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
241
        }
242
 
243
        if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
244
                goto tx_error_icmp;
245
 
246
        /* MTU checking */
247
        mtu = dst_mtu(&rt->u.dst);
248
        if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
249
                ip_rt_put(rt);
250
                icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
251
                IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for");
252
                goto tx_error;
253
        }
254
 
255
        /* copy-on-write the packet before mangling it */
256
        if (!skb_make_writable(skb, sizeof(struct iphdr)))
257
                goto tx_error_put;
258
 
259
        if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
260
                goto tx_error_put;
261
 
262
        /* drop old route */
263
        dst_release(skb->dst);
264
        skb->dst = &rt->u.dst;
265
 
266
        /* mangle the packet */
267
        if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
268
                goto tx_error;
269
        ip_hdr(skb)->daddr = cp->daddr;
270
        ip_send_check(ip_hdr(skb));
271
 
272
        IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
273
 
274
        /* FIXME: when application helper enlarges the packet and the length
275
           is larger than the MTU of outgoing device, there will be still
276
           MTU problem. */
277
 
278
        /* Another hack: avoid icmp_send in ip_fragment */
279
        skb->local_df = 1;
280
 
281
        IP_VS_XMIT(skb, rt);
282
 
283
        LeaveFunction(10);
284
        return NF_STOLEN;
285
 
286
  tx_error_icmp:
287
        dst_link_failure(skb);
288
  tx_error:
289
        LeaveFunction(10);
290
        kfree_skb(skb);
291
        return NF_STOLEN;
292
  tx_error_put:
293
        ip_rt_put(rt);
294
        goto tx_error;
295
}
296
 
297
 
298
/*
299
 *   IP Tunneling transmitter
300
 *
301
 *   This function encapsulates the packet in a new IP packet, its
302
 *   destination will be set to cp->daddr. Most code of this function
303
 *   is taken from ipip.c.
304
 *
305
 *   It is used in VS/TUN cluster. The load balancer selects a real
306
 *   server from a cluster based on a scheduling algorithm,
307
 *   encapsulates the request packet and forwards it to the selected
308
 *   server. For example, all real servers are configured with
309
 *   "ifconfig tunl0 <Virtual IP Address> up". When the server receives
310
 *   the encapsulated packet, it will decapsulate the packet, processe
311
 *   the request and return the response packets directly to the client
312
 *   without passing the load balancer. This can greatly increase the
313
 *   scalability of virtual server.
314
 *
315
 *   Used for ANY protocol
316
 */
317
int
318
ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
319
                  struct ip_vs_protocol *pp)
320
{
321
        struct rtable *rt;                      /* Route to the other host */
322
        struct net_device *tdev;                /* Device to other host */
323
        struct iphdr  *old_iph = ip_hdr(skb);
324
        u8     tos = old_iph->tos;
325
        __be16 df = old_iph->frag_off;
326
        sk_buff_data_t old_transport_header = skb->transport_header;
327
        struct iphdr  *iph;                     /* Our new IP header */
328
        unsigned int max_headroom;              /* The extra header space needed */
329
        int    mtu;
330
 
331
        EnterFunction(10);
332
 
333
        if (skb->protocol != htons(ETH_P_IP)) {
334
                IP_VS_DBG_RL("ip_vs_tunnel_xmit(): protocol error, "
335
                             "ETH_P_IP: %d, skb protocol: %d\n",
336
                             htons(ETH_P_IP), skb->protocol);
337
                goto tx_error;
338
        }
339
 
340
        if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(tos))))
341
                goto tx_error_icmp;
342
 
343
        tdev = rt->u.dst.dev;
344
 
345
        mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
346
        if (mtu < 68) {
347
                ip_rt_put(rt);
348
                IP_VS_DBG_RL("ip_vs_tunnel_xmit(): mtu less than 68\n");
349
                goto tx_error;
350
        }
351
        if (skb->dst)
352
                skb->dst->ops->update_pmtu(skb->dst, mtu);
353
 
354
        df |= (old_iph->frag_off & htons(IP_DF));
355
 
356
        if ((old_iph->frag_off & htons(IP_DF))
357
            && mtu < ntohs(old_iph->tot_len)) {
358
                icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
359
                ip_rt_put(rt);
360
                IP_VS_DBG_RL("ip_vs_tunnel_xmit(): frag needed\n");
361
                goto tx_error;
362
        }
363
 
364
        /*
365
         * Okay, now see if we can stuff it in the buffer as-is.
366
         */
367
        max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
368
 
369
        if (skb_headroom(skb) < max_headroom
370
            || skb_cloned(skb) || skb_shared(skb)) {
371
                struct sk_buff *new_skb =
372
                        skb_realloc_headroom(skb, max_headroom);
373
                if (!new_skb) {
374
                        ip_rt_put(rt);
375
                        kfree_skb(skb);
376
                        IP_VS_ERR_RL("ip_vs_tunnel_xmit(): no memory\n");
377
                        return NF_STOLEN;
378
                }
379
                kfree_skb(skb);
380
                skb = new_skb;
381
                old_iph = ip_hdr(skb);
382
        }
383
 
384
        skb->transport_header = old_transport_header;
385
 
386
        /* fix old IP header checksum */
387
        ip_send_check(old_iph);
388
 
389
        skb_push(skb, sizeof(struct iphdr));
390
        skb_reset_network_header(skb);
391
        memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
392
 
393
        /* drop old route */
394
        dst_release(skb->dst);
395
        skb->dst = &rt->u.dst;
396
 
397
        /*
398
         *      Push down and install the IPIP header.
399
         */
400
        iph                     =       ip_hdr(skb);
401
        iph->version            =       4;
402
        iph->ihl                =       sizeof(struct iphdr)>>2;
403
        iph->frag_off           =       df;
404
        iph->protocol           =       IPPROTO_IPIP;
405
        iph->tos                =       tos;
406
        iph->daddr              =       rt->rt_dst;
407
        iph->saddr              =       rt->rt_src;
408
        iph->ttl                =       old_iph->ttl;
409
        iph->tot_len            =       htons(skb->len);
410
        ip_select_ident(iph, &rt->u.dst, NULL);
411
        ip_send_check(iph);
412
 
413
        /* Another hack: avoid icmp_send in ip_fragment */
414
        skb->local_df = 1;
415
 
416
        IP_VS_XMIT(skb, rt);
417
 
418
        LeaveFunction(10);
419
 
420
        return NF_STOLEN;
421
 
422
  tx_error_icmp:
423
        dst_link_failure(skb);
424
  tx_error:
425
        kfree_skb(skb);
426
        LeaveFunction(10);
427
        return NF_STOLEN;
428
}
429
 
430
 
431
/*
432
 *      Direct Routing transmitter
433
 *      Used for ANY protocol
434
 */
435
int
436
ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
437
              struct ip_vs_protocol *pp)
438
{
439
        struct rtable *rt;                      /* Route to the other host */
440
        struct iphdr  *iph = ip_hdr(skb);
441
        int    mtu;
442
 
443
        EnterFunction(10);
444
 
445
        if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
446
                goto tx_error_icmp;
447
 
448
        /* MTU checking */
449
        mtu = dst_mtu(&rt->u.dst);
450
        if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
451
                icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
452
                ip_rt_put(rt);
453
                IP_VS_DBG_RL("ip_vs_dr_xmit(): frag needed\n");
454
                goto tx_error;
455
        }
456
 
457
        /*
458
         * Call ip_send_check because we are not sure it is called
459
         * after ip_defrag. Is copy-on-write needed?
460
         */
461
        if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
462
                ip_rt_put(rt);
463
                return NF_STOLEN;
464
        }
465
        ip_send_check(ip_hdr(skb));
466
 
467
        /* drop old route */
468
        dst_release(skb->dst);
469
        skb->dst = &rt->u.dst;
470
 
471
        /* Another hack: avoid icmp_send in ip_fragment */
472
        skb->local_df = 1;
473
 
474
        IP_VS_XMIT(skb, rt);
475
 
476
        LeaveFunction(10);
477
        return NF_STOLEN;
478
 
479
  tx_error_icmp:
480
        dst_link_failure(skb);
481
  tx_error:
482
        kfree_skb(skb);
483
        LeaveFunction(10);
484
        return NF_STOLEN;
485
}
486
 
487
 
488
/*
489
 *      ICMP packet transmitter
490
 *      called by the ip_vs_in_icmp
491
 */
492
int
493
ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
494
                struct ip_vs_protocol *pp, int offset)
495
{
496
        struct rtable   *rt;    /* Route to the other host */
497
        int mtu;
498
        int rc;
499
 
500
        EnterFunction(10);
501
 
502
        /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
503
           forwarded directly here, because there is no need to
504
           translate address/port back */
505
        if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
506
                if (cp->packet_xmit)
507
                        rc = cp->packet_xmit(skb, cp, pp);
508
                else
509
                        rc = NF_ACCEPT;
510
                /* do not touch skb anymore */
511
                atomic_inc(&cp->in_pkts);
512
                goto out;
513
        }
514
 
515
        /*
516
         * mangle and send the packet here (only for VS/NAT)
517
         */
518
 
519
        if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(ip_hdr(skb)->tos))))
520
                goto tx_error_icmp;
521
 
522
        /* MTU checking */
523
        mtu = dst_mtu(&rt->u.dst);
524
        if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
525
                ip_rt_put(rt);
526
                icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
527
                IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n");
528
                goto tx_error;
529
        }
530
 
531
        /* copy-on-write the packet before mangling it */
532
        if (!skb_make_writable(skb, offset))
533
                goto tx_error_put;
534
 
535
        if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
536
                goto tx_error_put;
537
 
538
        /* drop the old route when skb is not shared */
539
        dst_release(skb->dst);
540
        skb->dst = &rt->u.dst;
541
 
542
        ip_vs_nat_icmp(skb, pp, cp, 0);
543
 
544
        /* Another hack: avoid icmp_send in ip_fragment */
545
        skb->local_df = 1;
546
 
547
        IP_VS_XMIT(skb, rt);
548
 
549
        rc = NF_STOLEN;
550
        goto out;
551
 
552
  tx_error_icmp:
553
        dst_link_failure(skb);
554
  tx_error:
555
        dev_kfree_skb(skb);
556
        rc = NF_STOLEN;
557
  out:
558
        LeaveFunction(10);
559
        return rc;
560
  tx_error_put:
561
        ip_rt_put(rt);
562
        goto tx_error;
563
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.