OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [linux/] [linux-2.4/] [net/] [ipv4/] [netfilter/] [ip_nat_core.c] - Blame information for rev 1765

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1275 phoenix
/* NAT for netfilter; shared with compatibility layer. */
2
 
3
/* (c) 1999 Paul `Rusty' Russell.  Licenced under the GNU General
4
   Public Licence. */
5
#include <linux/version.h>
6
#include <linux/module.h>
7
#include <linux/types.h>
8
#include <linux/timer.h>
9
#include <linux/skbuff.h>
10
#include <linux/netfilter_ipv4.h>
11
#include <linux/brlock.h>
12
#include <linux/vmalloc.h>
13
#include <net/checksum.h>
14
#include <net/icmp.h>
15
#include <net/ip.h>
16
#include <net/tcp.h>  /* For tcp_prot in getorigdst */
17
 
18
#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock)
19
#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock)
20
 
21
#include <linux/netfilter_ipv4/ip_conntrack.h>
22
#include <linux/netfilter_ipv4/ip_conntrack_core.h>
23
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
24
#include <linux/netfilter_ipv4/ip_nat.h>
25
#include <linux/netfilter_ipv4/ip_nat_protocol.h>
26
#include <linux/netfilter_ipv4/ip_nat_core.h>
27
#include <linux/netfilter_ipv4/ip_nat_helper.h>
28
#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
29
#include <linux/netfilter_ipv4/listhelp.h>
30
 
31
#if 0
32
#define DEBUGP printk
33
#else
34
#define DEBUGP(format, args...)
35
#endif
36
 
37
DECLARE_RWLOCK(ip_nat_lock);
38
DECLARE_RWLOCK_EXTERN(ip_conntrack_lock);
39
 
40
/* Calculated at init based on memory size */
41
static unsigned int ip_nat_htable_size;
42
 
43
static struct list_head *bysource;
44
static struct list_head *byipsproto;
45
LIST_HEAD(protos);
46
LIST_HEAD(helpers);
47
 
48
extern struct ip_nat_protocol unknown_nat_protocol;
49
 
50
/* We keep extra hashes for each conntrack, for fast searching. */
51
static inline size_t
52
hash_by_ipsproto(u_int32_t src, u_int32_t dst, u_int16_t proto)
53
{
54
        /* Modified src and dst, to ensure we don't create two
55
           identical streams. */
56
        return (src + dst + proto) % ip_nat_htable_size;
57
}
58
 
59
static inline size_t
60
hash_by_src(const struct ip_conntrack_manip *manip, u_int16_t proto)
61
{
62
        /* Original src, to ensure we map it consistently if poss. */
63
        return (manip->ip + manip->u.all + proto) % ip_nat_htable_size;
64
}
65
 
66
/* Noone using conntrack by the time this called. */
67
static void ip_nat_cleanup_conntrack(struct ip_conntrack *conn)
68
{
69
        struct ip_nat_info *info = &conn->nat.info;
70
        unsigned int hs, hp;
71
 
72
        if (!info->initialized)
73
                return;
74
 
75
        IP_NF_ASSERT(info->bysource.conntrack);
76
        IP_NF_ASSERT(info->byipsproto.conntrack);
77
 
78
        hs = hash_by_src(&conn->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src,
79
                         conn->tuplehash[IP_CT_DIR_ORIGINAL]
80
                         .tuple.dst.protonum);
81
 
82
        hp = hash_by_ipsproto(conn->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip,
83
                              conn->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip,
84
                              conn->tuplehash[IP_CT_DIR_REPLY]
85
                              .tuple.dst.protonum);
86
 
87
        WRITE_LOCK(&ip_nat_lock);
88
        LIST_DELETE(&bysource[hs], &info->bysource);
89
        LIST_DELETE(&byipsproto[hp], &info->byipsproto);
90
        WRITE_UNLOCK(&ip_nat_lock);
91
}
92
 
93
/* We do checksum mangling, so if they were wrong before they're still
94
 * wrong.  Also works for incomplete packets (eg. ICMP dest
95
 * unreachables.) */
96
u_int16_t
97
ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck)
98
{
99
        u_int32_t diffs[] = { oldvalinv, newval };
100
        return csum_fold(csum_partial((char *)diffs, sizeof(diffs),
101
                                      oldcheck^0xFFFF));
102
}
103
 
104
static inline int cmp_proto(const struct ip_nat_protocol *i, int proto)
105
{
106
        return i->protonum == proto;
107
}
108
 
109
struct ip_nat_protocol *
110
find_nat_proto(u_int16_t protonum)
111
{
112
        struct ip_nat_protocol *i;
113
 
114
        MUST_BE_READ_LOCKED(&ip_nat_lock);
115
        i = LIST_FIND(&protos, cmp_proto, struct ip_nat_protocol *, protonum);
116
        if (!i)
117
                i = &unknown_nat_protocol;
118
        return i;
119
}
120
 
121
/* Is this tuple already taken? (not by us) */
122
int
123
ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple,
124
                  const struct ip_conntrack *ignored_conntrack)
125
{
126
        /* Conntrack tracking doesn't keep track of outgoing tuples; only
127
           incoming ones.  NAT means they don't have a fixed mapping,
128
           so we invert the tuple and look for the incoming reply.
129
 
130
           We could keep a separate hash if this proves too slow. */
131
        struct ip_conntrack_tuple reply;
132
 
133
        invert_tuplepr(&reply, tuple);
134
        return ip_conntrack_tuple_taken(&reply, ignored_conntrack);
135
}
136
 
137
/* Does tuple + the source manip come within the range mr */
138
static int
139
in_range(const struct ip_conntrack_tuple *tuple,
140
         const struct ip_conntrack_manip *manip,
141
         const struct ip_nat_multi_range *mr)
142
{
143
        struct ip_nat_protocol *proto = find_nat_proto(tuple->dst.protonum);
144
        unsigned int i;
145
        struct ip_conntrack_tuple newtuple = { *manip, tuple->dst };
146
 
147
        for (i = 0; i < mr->rangesize; i++) {
148
                /* If we are allowed to map IPs, then we must be in the
149
                   range specified, otherwise we must be unchanged. */
150
                if (mr->range[i].flags & IP_NAT_RANGE_MAP_IPS) {
151
                        if (ntohl(newtuple.src.ip) < ntohl(mr->range[i].min_ip)
152
                            || (ntohl(newtuple.src.ip)
153
                                > ntohl(mr->range[i].max_ip)))
154
                                continue;
155
                } else {
156
                        if (newtuple.src.ip != tuple->src.ip)
157
                                continue;
158
                }
159
 
160
                if ((mr->range[i].flags & IP_NAT_RANGE_PROTO_SPECIFIED)
161
                    && proto->in_range(&newtuple, IP_NAT_MANIP_SRC,
162
                                       &mr->range[i].min, &mr->range[i].max))
163
                        return 1;
164
        }
165
        return 0;
166
}
167
 
168
static inline int
169
src_cmp(const struct ip_nat_hash *i,
170
        const struct ip_conntrack_tuple *tuple,
171
        const struct ip_nat_multi_range *mr)
172
{
173
        return (i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum
174
                == tuple->dst.protonum
175
                && i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip
176
                == tuple->src.ip
177
                && i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.all
178
                == tuple->src.u.all
179
                && in_range(tuple,
180
                            &i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
181
                            .tuple.src,
182
                            mr));
183
}
184
 
185
/* Only called for SRC manip */
186
static struct ip_conntrack_manip *
187
find_appropriate_src(const struct ip_conntrack_tuple *tuple,
188
                     const struct ip_nat_multi_range *mr)
189
{
190
        unsigned int h = hash_by_src(&tuple->src, tuple->dst.protonum);
191
        struct ip_nat_hash *i;
192
 
193
        MUST_BE_READ_LOCKED(&ip_nat_lock);
194
        i = LIST_FIND(&bysource[h], src_cmp, struct ip_nat_hash *, tuple, mr);
195
        if (i)
196
                return &i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src;
197
        else
198
                return NULL;
199
}
200
 
201
#ifdef CONFIG_IP_NF_NAT_LOCAL
202
/* If it's really a local destination manip, it may need to do a
203
   source manip too. */
204
static int
205
do_extra_mangle(u_int32_t var_ip, u_int32_t *other_ipp)
206
{
207
        struct rtable *rt;
208
 
209
        /* FIXME: IPTOS_TOS(iph->tos) --RR */
210
        if (ip_route_output(&rt, var_ip, 0, 0, 0) != 0) {
211
                DEBUGP("do_extra_mangle: Can't get route to %u.%u.%u.%u\n",
212
                       NIPQUAD(var_ip));
213
                return 0;
214
        }
215
 
216
        *other_ipp = rt->rt_src;
217
        ip_rt_put(rt);
218
        return 1;
219
}
220
#endif
221
 
222
/* Simple way to iterate through all. */
223
static inline int fake_cmp(const struct ip_nat_hash *i,
224
                           u_int32_t src, u_int32_t dst, u_int16_t protonum,
225
                           unsigned int *score,
226
                           const struct ip_conntrack *conntrack)
227
{
228
        /* Compare backwards: we're dealing with OUTGOING tuples, and
229
           inside the conntrack is the REPLY tuple.  Don't count this
230
           conntrack. */
231
        if (i->conntrack != conntrack
232
            && i->conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip == dst
233
            && i->conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip == src
234
            && (i->conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum
235
                == protonum))
236
                (*score)++;
237
        return 0;
238
}
239
 
240
static inline unsigned int
241
count_maps(u_int32_t src, u_int32_t dst, u_int16_t protonum,
242
           const struct ip_conntrack *conntrack)
243
{
244
        unsigned int score = 0;
245
        unsigned int h;
246
 
247
        MUST_BE_READ_LOCKED(&ip_nat_lock);
248
        h = hash_by_ipsproto(src, dst, protonum);
249
        LIST_FIND(&byipsproto[h], fake_cmp, struct ip_nat_hash *,
250
                  src, dst, protonum, &score, conntrack);
251
 
252
        return score;
253
}
254
 
255
/* For [FUTURE] fragmentation handling, we want the least-used
256
   src-ip/dst-ip/proto triple.  Fairness doesn't come into it.  Thus
257
   if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports
258
   1-65535, we don't do pro-rata allocation based on ports; we choose
259
   the ip with the lowest src-ip/dst-ip/proto usage.
260
 
261
   If an allocation then fails (eg. all 6 ports used in the 1.2.3.4
262
   range), we eliminate that and try again.  This is not the most
263
   efficient approach, but if you're worried about that, don't hand us
264
   ranges you don't really have.  */
265
static struct ip_nat_range *
266
find_best_ips_proto(struct ip_conntrack_tuple *tuple,
267
                    const struct ip_nat_multi_range *mr,
268
                    const struct ip_conntrack *conntrack,
269
                    unsigned int hooknum)
270
{
271
        unsigned int i;
272
        struct {
273
                const struct ip_nat_range *range;
274
                unsigned int score;
275
                struct ip_conntrack_tuple tuple;
276
        } best = { NULL,  0xFFFFFFFF };
277
        u_int32_t *var_ipp, *other_ipp, saved_ip, orig_dstip;
278
        static unsigned int randomness = 0;
279
 
280
        if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC) {
281
                var_ipp = &tuple->src.ip;
282
                saved_ip = tuple->dst.ip;
283
                other_ipp = &tuple->dst.ip;
284
        } else {
285
                var_ipp = &tuple->dst.ip;
286
                saved_ip = tuple->src.ip;
287
                other_ipp = &tuple->src.ip;
288
        }
289
        /* Don't do do_extra_mangle unless neccessary (overrides
290
           explicit socket bindings, for example) */
291
        orig_dstip = tuple->dst.ip;
292
 
293
        IP_NF_ASSERT(mr->rangesize >= 1);
294
        for (i = 0; i < mr->rangesize; i++) {
295
                /* Host order */
296
                u_int32_t minip, maxip, j;
297
 
298
                /* Don't do ranges which are already eliminated. */
299
                if (mr->range[i].flags & IP_NAT_RANGE_FULL) {
300
                        continue;
301
                }
302
 
303
                if (mr->range[i].flags & IP_NAT_RANGE_MAP_IPS) {
304
                        minip = ntohl(mr->range[i].min_ip);
305
                        maxip = ntohl(mr->range[i].max_ip);
306
                } else
307
                        minip = maxip = ntohl(*var_ipp);
308
 
309
                randomness++;
310
                for (j = 0; j < maxip - minip + 1; j++) {
311
                        unsigned int score;
312
 
313
                        *var_ipp = htonl(minip + (randomness + j)
314
                                         % (maxip - minip + 1));
315
 
316
                        /* Reset the other ip in case it was mangled by
317
                         * do_extra_mangle last time. */
318
                        *other_ipp = saved_ip;
319
 
320
#ifdef CONFIG_IP_NF_NAT_LOCAL
321
                        if (hooknum == NF_IP_LOCAL_OUT
322
                            && *var_ipp != orig_dstip
323
                            && !do_extra_mangle(*var_ipp, other_ipp)) {
324
                                DEBUGP("Range %u %u.%u.%u.%u rt failed!\n",
325
                                       i, NIPQUAD(*var_ipp));
326
                                /* Can't route?  This whole range part is
327
                                 * probably screwed, but keep trying
328
                                 * anyway. */
329
                                continue;
330
                        }
331
#endif
332
 
333
                        /* Count how many others map onto this. */
334
                        score = count_maps(tuple->src.ip, tuple->dst.ip,
335
                                           tuple->dst.protonum, conntrack);
336
                        if (score < best.score) {
337
                                /* Optimization: doesn't get any better than
338
                                   this. */
339
                                if (score == 0)
340
                                        return (struct ip_nat_range *)
341
                                                &mr->range[i];
342
 
343
                                best.score = score;
344
                                best.tuple = *tuple;
345
                                best.range = &mr->range[i];
346
                        }
347
                }
348
        }
349
        *tuple = best.tuple;
350
 
351
        /* Discard const. */
352
        return (struct ip_nat_range *)best.range;
353
}
354
 
355
/* Fast version doesn't iterate through hash chains, but only handles
356
   common case of single IP address (null NAT, masquerade) */
357
static struct ip_nat_range *
358
find_best_ips_proto_fast(struct ip_conntrack_tuple *tuple,
359
                         const struct ip_nat_multi_range *mr,
360
                         const struct ip_conntrack *conntrack,
361
                         unsigned int hooknum)
362
{
363
        if (mr->rangesize != 1
364
            || (mr->range[0].flags & IP_NAT_RANGE_FULL)
365
            || ((mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)
366
                && mr->range[0].min_ip != mr->range[0].max_ip))
367
                return find_best_ips_proto(tuple, mr, conntrack, hooknum);
368
 
369
        if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
370
                if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC)
371
                        tuple->src.ip = mr->range[0].min_ip;
372
                else {
373
                        /* Only do extra mangle when required (breaks
374
                           socket binding) */
375
#ifdef CONFIG_IP_NF_NAT_LOCAL
376
                        if (tuple->dst.ip != mr->range[0].min_ip
377
                            && hooknum == NF_IP_LOCAL_OUT
378
                            && !do_extra_mangle(mr->range[0].min_ip,
379
                                                &tuple->src.ip))
380
                                return NULL;
381
#endif
382
                        tuple->dst.ip = mr->range[0].min_ip;
383
                }
384
        }
385
 
386
        /* Discard const. */
387
        return (struct ip_nat_range *)&mr->range[0];
388
}
389
 
390
static int
391
get_unique_tuple(struct ip_conntrack_tuple *tuple,
392
                 const struct ip_conntrack_tuple *orig_tuple,
393
                 const struct ip_nat_multi_range *mrr,
394
                 struct ip_conntrack *conntrack,
395
                 unsigned int hooknum)
396
{
397
        struct ip_nat_protocol *proto
398
                = find_nat_proto(orig_tuple->dst.protonum);
399
        struct ip_nat_range *rptr;
400
        unsigned int i;
401
        int ret;
402
 
403
        /* We temporarily use flags for marking full parts, but we
404
           always clean up afterwards */
405
        struct ip_nat_multi_range *mr = (void *)mrr;
406
 
407
        /* 1) If this srcip/proto/src-proto-part is currently mapped,
408
           and that same mapping gives a unique tuple within the given
409
           range, use that.
410
 
411
           This is only required for source (ie. NAT/masq) mappings.
412
           So far, we don't do local source mappings, so multiple
413
           manips not an issue.  */
414
        if (hooknum == NF_IP_POST_ROUTING) {
415
                struct ip_conntrack_manip *manip;
416
 
417
                manip = find_appropriate_src(orig_tuple, mr);
418
                if (manip) {
419
                        /* Apply same source manipulation. */
420
                        *tuple = ((struct ip_conntrack_tuple)
421
                                  { *manip, orig_tuple->dst });
422
                        DEBUGP("get_unique_tuple: Found current src map\n");
423
                        return 1;
424
                }
425
        }
426
 
427
        /* 2) Select the least-used IP/proto combination in the given
428
           range.
429
        */
430
        *tuple = *orig_tuple;
431
        while ((rptr = find_best_ips_proto_fast(tuple, mr, conntrack, hooknum))
432
               != NULL) {
433
                DEBUGP("Found best for "); DUMP_TUPLE(tuple);
434
                /* 3) The per-protocol part of the manip is made to
435
                   map into the range to make a unique tuple. */
436
 
437
                /* Only bother mapping if it's not already in range
438
                   and unique */
439
                if ((!(rptr->flags & IP_NAT_RANGE_PROTO_SPECIFIED)
440
                     || proto->in_range(tuple, HOOK2MANIP(hooknum),
441
                                        &rptr->min, &rptr->max))
442
                    && !ip_nat_used_tuple(tuple, conntrack)) {
443
                        ret = 1;
444
                        goto clear_fulls;
445
                } else {
446
                        if (proto->unique_tuple(tuple, rptr,
447
                                                HOOK2MANIP(hooknum),
448
                                                conntrack)) {
449
                                /* Must be unique. */
450
                                IP_NF_ASSERT(!ip_nat_used_tuple(tuple,
451
                                                                conntrack));
452
                                ret = 1;
453
                                goto clear_fulls;
454
                        } else if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_DST) {
455
                                /* Try implicit source NAT; protocol
456
                                   may be able to play with ports to
457
                                   make it unique. */
458
                                struct ip_nat_range r
459
                                        = { IP_NAT_RANGE_MAP_IPS,
460
                                            tuple->src.ip, tuple->src.ip,
461
                                            { 0 }, { 0 } };
462
                                DEBUGP("Trying implicit mapping\n");
463
                                if (proto->unique_tuple(tuple, &r,
464
                                                        IP_NAT_MANIP_SRC,
465
                                                        conntrack)) {
466
                                        /* Must be unique. */
467
                                        IP_NF_ASSERT(!ip_nat_used_tuple
468
                                                     (tuple, conntrack));
469
                                        ret = 1;
470
                                        goto clear_fulls;
471
                                }
472
                        }
473
                        DEBUGP("Protocol can't get unique tuple %u.\n",
474
                               hooknum);
475
                }
476
 
477
                /* Eliminate that from range, and try again. */
478
                rptr->flags |= IP_NAT_RANGE_FULL;
479
                *tuple = *orig_tuple;
480
        }
481
 
482
        ret = 0;
483
 
484
 clear_fulls:
485
        /* Clear full flags. */
486
        IP_NF_ASSERT(mr->rangesize >= 1);
487
        for (i = 0; i < mr->rangesize; i++)
488
                mr->range[i].flags &= ~IP_NAT_RANGE_FULL;
489
 
490
        return ret;
491
}
492
 
493
static inline int
494
helper_cmp(const struct ip_nat_helper *helper,
495
           const struct ip_conntrack_tuple *tuple)
496
{
497
        return ip_ct_tuple_mask_cmp(tuple, &helper->tuple, &helper->mask);
498
}
499
 
500
/* Where to manip the reply packets (will be reverse manip). */
501
static unsigned int opposite_hook[NF_IP_NUMHOOKS]
502
= { [NF_IP_PRE_ROUTING] = NF_IP_POST_ROUTING,
503
    [NF_IP_POST_ROUTING] = NF_IP_PRE_ROUTING,
504
#ifdef CONFIG_IP_NF_NAT_LOCAL
505
    [NF_IP_LOCAL_OUT] = NF_IP_LOCAL_IN,
506
    [NF_IP_LOCAL_IN] = NF_IP_LOCAL_OUT,
507
#endif
508
};
509
 
510
unsigned int
511
ip_nat_setup_info(struct ip_conntrack *conntrack,
512
                  const struct ip_nat_multi_range *mr,
513
                  unsigned int hooknum)
514
{
515
        struct ip_conntrack_tuple new_tuple, inv_tuple, reply;
516
        struct ip_conntrack_tuple orig_tp;
517
        struct ip_nat_info *info = &conntrack->nat.info;
518
        int in_hashes = info->initialized;
519
 
520
        MUST_BE_WRITE_LOCKED(&ip_nat_lock);
521
        IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
522
                     || hooknum == NF_IP_POST_ROUTING
523
                     || hooknum == NF_IP_LOCAL_OUT);
524
        IP_NF_ASSERT(info->num_manips < IP_NAT_MAX_MANIPS);
525
        IP_NF_ASSERT(!(info->initialized & (1 << HOOK2MANIP(hooknum))));
526
 
527
        /* What we've got will look like inverse of reply. Normally
528
           this is what is in the conntrack, except for prior
529
           manipulations (future optimization: if num_manips == 0,
530
           orig_tp =
531
           conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */
532
        invert_tuplepr(&orig_tp,
533
                       &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple);
534
 
535
#if 0
536
        {
537
        unsigned int i;
538
 
539
        DEBUGP("Hook %u (%s), ", hooknum,
540
               HOOK2MANIP(hooknum)==IP_NAT_MANIP_SRC ? "SRC" : "DST");
541
        DUMP_TUPLE(&orig_tp);
542
        DEBUGP("Range %p: ", mr);
543
        for (i = 0; i < mr->rangesize; i++) {
544
                DEBUGP("%u:%s%s%s %u.%u.%u.%u - %u.%u.%u.%u %u - %u\n",
545
                       i,
546
                       (mr->range[i].flags & IP_NAT_RANGE_MAP_IPS)
547
                       ? " MAP_IPS" : "",
548
                       (mr->range[i].flags
549
                        & IP_NAT_RANGE_PROTO_SPECIFIED)
550
                       ? " PROTO_SPECIFIED" : "",
551
                       (mr->range[i].flags & IP_NAT_RANGE_FULL)
552
                       ? " FULL" : "",
553
                       NIPQUAD(mr->range[i].min_ip),
554
                       NIPQUAD(mr->range[i].max_ip),
555
                       mr->range[i].min.all,
556
                       mr->range[i].max.all);
557
        }
558
        }
559
#endif
560
 
561
        do {
562
                if (!get_unique_tuple(&new_tuple, &orig_tp, mr, conntrack,
563
                                      hooknum)) {
564
                        DEBUGP("ip_nat_setup_info: Can't get unique for %p.\n",
565
                               conntrack);
566
                        return NF_DROP;
567
                }
568
 
569
#if 0
570
                DEBUGP("Hook %u (%s) %p\n", hooknum,
571
                       HOOK2MANIP(hooknum)==IP_NAT_MANIP_SRC ? "SRC" : "DST",
572
                       conntrack);
573
                DEBUGP("Original: ");
574
                DUMP_TUPLE(&orig_tp);
575
                DEBUGP("New: ");
576
                DUMP_TUPLE(&new_tuple);
577
#endif
578
 
579
                /* We now have two tuples (SRCIP/SRCPT/DSTIP/DSTPT):
580
                   the original (A/B/C/D') and the mangled one (E/F/G/H').
581
 
582
                   We're only allowed to work with the SRC per-proto
583
                   part, so we create inverses of both to start, then
584
                   derive the other fields we need.  */
585
 
586
                /* Reply connection: simply invert the new tuple
587
                   (G/H/E/F') */
588
                invert_tuplepr(&reply, &new_tuple);
589
 
590
                /* Alter conntrack table so it recognizes replies.
591
                   If fail this race (reply tuple now used), repeat. */
592
        } while (!ip_conntrack_alter_reply(conntrack, &reply));
593
 
594
        /* FIXME: We can simply used existing conntrack reply tuple
595
           here --RR */
596
        /* Create inverse of original: C/D/A/B' */
597
        invert_tuplepr(&inv_tuple, &orig_tp);
598
 
599
        /* Has source changed?. */
600
        if (!ip_ct_tuple_src_equal(&new_tuple, &orig_tp)) {
601
                /* In this direction, a source manip. */
602
                info->manips[info->num_manips++] =
603
                        ((struct ip_nat_info_manip)
604
                         { IP_CT_DIR_ORIGINAL, hooknum,
605
                           IP_NAT_MANIP_SRC, new_tuple.src });
606
 
607
                IP_NF_ASSERT(info->num_manips < IP_NAT_MAX_MANIPS);
608
 
609
                /* In the reverse direction, a destination manip. */
610
                info->manips[info->num_manips++] =
611
                        ((struct ip_nat_info_manip)
612
                         { IP_CT_DIR_REPLY, opposite_hook[hooknum],
613
                           IP_NAT_MANIP_DST, orig_tp.src });
614
                IP_NF_ASSERT(info->num_manips <= IP_NAT_MAX_MANIPS);
615
        }
616
 
617
        /* Has destination changed? */
618
        if (!ip_ct_tuple_dst_equal(&new_tuple, &orig_tp)) {
619
                /* In this direction, a destination manip */
620
                info->manips[info->num_manips++] =
621
                        ((struct ip_nat_info_manip)
622
                         { IP_CT_DIR_ORIGINAL, hooknum,
623
                           IP_NAT_MANIP_DST, reply.src });
624
 
625
                IP_NF_ASSERT(info->num_manips < IP_NAT_MAX_MANIPS);
626
 
627
                /* In the reverse direction, a source manip. */
628
                info->manips[info->num_manips++] =
629
                        ((struct ip_nat_info_manip)
630
                         { IP_CT_DIR_REPLY, opposite_hook[hooknum],
631
                           IP_NAT_MANIP_SRC, inv_tuple.src });
632
                IP_NF_ASSERT(info->num_manips <= IP_NAT_MAX_MANIPS);
633
        }
634
 
635
        /* If there's a helper, assign it; based on new tuple. */
636
        if (!conntrack->master)
637
                info->helper = LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *,
638
                                         &reply);
639
 
640
        /* It's done. */
641
        info->initialized |= (1 << HOOK2MANIP(hooknum));
642
 
643
        if (in_hashes) {
644
                IP_NF_ASSERT(info->bysource.conntrack);
645
                replace_in_hashes(conntrack, info);
646
        } else {
647
                place_in_hashes(conntrack, info);
648
        }
649
 
650
        return NF_ACCEPT;
651
}
652
 
653
void replace_in_hashes(struct ip_conntrack *conntrack,
654
                       struct ip_nat_info *info)
655
{
656
        /* Source has changed, so replace in hashes. */
657
        unsigned int srchash
658
                = hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
659
                              .tuple.src,
660
                              conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
661
                              .tuple.dst.protonum);
662
        /* We place packet as seen OUTGOUNG in byips_proto hash
663
           (ie. reverse dst and src of reply packet. */
664
        unsigned int ipsprotohash
665
                = hash_by_ipsproto(conntrack->tuplehash[IP_CT_DIR_REPLY]
666
                                   .tuple.dst.ip,
667
                                   conntrack->tuplehash[IP_CT_DIR_REPLY]
668
                                   .tuple.src.ip,
669
                                   conntrack->tuplehash[IP_CT_DIR_REPLY]
670
                                   .tuple.dst.protonum);
671
 
672
        IP_NF_ASSERT(info->bysource.conntrack == conntrack);
673
        MUST_BE_WRITE_LOCKED(&ip_nat_lock);
674
 
675
        list_del(&info->bysource.list);
676
        list_del(&info->byipsproto.list);
677
 
678
        list_prepend(&bysource[srchash], &info->bysource);
679
        list_prepend(&byipsproto[ipsprotohash], &info->byipsproto);
680
}
681
 
682
void place_in_hashes(struct ip_conntrack *conntrack,
683
                     struct ip_nat_info *info)
684
{
685
        unsigned int srchash
686
                = hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
687
                              .tuple.src,
688
                              conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
689
                              .tuple.dst.protonum);
690
        /* We place packet as seen OUTGOUNG in byips_proto hash
691
           (ie. reverse dst and src of reply packet. */
692
        unsigned int ipsprotohash
693
                = hash_by_ipsproto(conntrack->tuplehash[IP_CT_DIR_REPLY]
694
                                   .tuple.dst.ip,
695
                                   conntrack->tuplehash[IP_CT_DIR_REPLY]
696
                                   .tuple.src.ip,
697
                                   conntrack->tuplehash[IP_CT_DIR_REPLY]
698
                                   .tuple.dst.protonum);
699
 
700
        IP_NF_ASSERT(!info->bysource.conntrack);
701
 
702
        MUST_BE_WRITE_LOCKED(&ip_nat_lock);
703
        info->byipsproto.conntrack = conntrack;
704
        info->bysource.conntrack = conntrack;
705
 
706
        list_prepend(&bysource[srchash], &info->bysource);
707
        list_prepend(&byipsproto[ipsprotohash], &info->byipsproto);
708
}
709
 
710
static void
711
manip_pkt(u_int16_t proto, struct iphdr *iph, size_t len,
712
          const struct ip_conntrack_manip *manip,
713
          enum ip_nat_manip_type maniptype,
714
          __u32 *nfcache)
715
{
716
        *nfcache |= NFC_ALTERED;
717
        find_nat_proto(proto)->manip_pkt(iph, len, manip, maniptype);
718
 
719
        if (maniptype == IP_NAT_MANIP_SRC) {
720
                iph->check = ip_nat_cheat_check(~iph->saddr, manip->ip,
721
                                                iph->check);
722
                iph->saddr = manip->ip;
723
        } else {
724
                iph->check = ip_nat_cheat_check(~iph->daddr, manip->ip,
725
                                                iph->check);
726
                iph->daddr = manip->ip;
727
        }
728
#if 0
729
        if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
730
                DEBUGP("IP: checksum on packet bad.\n");
731
 
732
        if (proto == IPPROTO_TCP) {
733
                void *th = (u_int32_t *)iph + iph->ihl;
734
                if (tcp_v4_check(th, len - 4*iph->ihl, iph->saddr, iph->daddr,
735
                                 csum_partial((char *)th, len-4*iph->ihl, 0)))
736
                        DEBUGP("TCP: checksum on packet bad\n");
737
        }
738
#endif
739
}
740
 
741
static inline int exp_for_packet(struct ip_conntrack_expect *exp,
742
                                 struct sk_buff **pskb)
743
{
744
        struct ip_conntrack_protocol *proto;
745
        int ret = 1;
746
 
747
        MUST_BE_READ_LOCKED(&ip_conntrack_lock);
748
        proto = __ip_ct_find_proto((*pskb)->nh.iph->protocol);
749
        if (proto->exp_matches_pkt)
750
                ret = proto->exp_matches_pkt(exp, pskb);
751
 
752
        return ret;
753
}
754
 
755
/* Do packet manipulations according to binding. */
756
unsigned int
757
do_bindings(struct ip_conntrack *ct,
758
            enum ip_conntrack_info ctinfo,
759
            struct ip_nat_info *info,
760
            unsigned int hooknum,
761
            struct sk_buff **pskb)
762
{
763
        unsigned int i;
764
        struct ip_nat_helper *helper;
765
        enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
766
        int is_tcp = (*pskb)->nh.iph->protocol == IPPROTO_TCP;
767
 
768
        /* Need nat lock to protect against modification, but neither
769
           conntrack (referenced) and helper (deleted with
770
           synchronize_bh()) can vanish. */
771
        READ_LOCK(&ip_nat_lock);
772
        for (i = 0; i < info->num_manips; i++) {
773
                /* raw socket (tcpdump) may have clone of incoming
774
                   skb: don't disturb it --RR */
775
                if (skb_cloned(*pskb) && !(*pskb)->sk) {
776
                        struct sk_buff *nskb = skb_copy(*pskb, GFP_ATOMIC);
777
                        if (!nskb) {
778
                                READ_UNLOCK(&ip_nat_lock);
779
                                return NF_DROP;
780
                        }
781
                        kfree_skb(*pskb);
782
                        *pskb = nskb;
783
                }
784
 
785
                if (info->manips[i].direction == dir
786
                    && info->manips[i].hooknum == hooknum) {
787
                        DEBUGP("Mangling %p: %s to %u.%u.%u.%u %u\n",
788
                               *pskb,
789
                               info->manips[i].maniptype == IP_NAT_MANIP_SRC
790
                               ? "SRC" : "DST",
791
                               NIPQUAD(info->manips[i].manip.ip),
792
                               htons(info->manips[i].manip.u.all));
793
                        manip_pkt((*pskb)->nh.iph->protocol,
794
                                  (*pskb)->nh.iph,
795
                                  (*pskb)->len,
796
                                  &info->manips[i].manip,
797
                                  info->manips[i].maniptype,
798
                                  &(*pskb)->nfcache);
799
                }
800
        }
801
        helper = info->helper;
802
        READ_UNLOCK(&ip_nat_lock);
803
 
804
        if (helper) {
805
                struct ip_conntrack_expect *exp = NULL;
806
                struct list_head *cur_item;
807
                int ret = NF_ACCEPT;
808
                int helper_called = 0;
809
 
810
                DEBUGP("do_bindings: helper existing for (%p)\n", ct);
811
 
812
                /* Always defragged for helpers */
813
                IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off
814
                               & htons(IP_MF|IP_OFFSET)));
815
 
816
                /* Have to grab read lock before sibling_list traversal */
817
                READ_LOCK(&ip_conntrack_lock);
818
                list_for_each_prev(cur_item, &ct->sibling_list) {
819
                        exp = list_entry(cur_item, struct ip_conntrack_expect,
820
                                         expected_list);
821
 
822
                        /* if this expectation is already established, skip */
823
                        if (exp->sibling)
824
                                continue;
825
 
826
                        if (exp_for_packet(exp, pskb)) {
827
                                /* FIXME: May be true multiple times in the
828
                                 * case of UDP!! */
829
                                DEBUGP("calling nat helper (exp=%p) for packet\n", exp);
830
                                ret = helper->help(ct, exp, info, ctinfo,
831
                                                   hooknum, pskb);
832
                                if (ret != NF_ACCEPT) {
833
                                        READ_UNLOCK(&ip_conntrack_lock);
834
                                        return ret;
835
                                }
836
                                helper_called = 1;
837
                        }
838
                }
839
                /* Helper might want to manip the packet even when there is no
840
                 * matching expectation for this packet */
841
                if (!helper_called && helper->flags & IP_NAT_HELPER_F_ALWAYS) {
842
                        DEBUGP("calling nat helper for packet without expectation\n");
843
                        ret = helper->help(ct, NULL, info, ctinfo,
844
                                           hooknum, pskb);
845
                        if (ret != NF_ACCEPT) {
846
                                READ_UNLOCK(&ip_conntrack_lock);
847
                                return ret;
848
                        }
849
                }
850
                READ_UNLOCK(&ip_conntrack_lock);
851
 
852
                /* Adjust sequence number only once per packet
853
                 * (helper is called at all hooks) */
854
                if (is_tcp && (hooknum == NF_IP_POST_ROUTING
855
                               || hooknum == NF_IP_LOCAL_IN)) {
856
                        DEBUGP("ip_nat_core: adjusting sequence number\n");
857
                        /* future: put this in a l4-proto specific function,
858
                         * and call this function here. */
859
                        ip_nat_seq_adjust(*pskb, ct, ctinfo);
860
                }
861
 
862
                return ret;
863
 
864
        } else
865
                return NF_ACCEPT;
866
 
867
        /* not reached */
868
}
869
 
870
unsigned int
871
icmp_reply_translation(struct sk_buff *skb,
872
                       struct ip_conntrack *conntrack,
873
                       unsigned int hooknum,
874
                       int dir)
875
{
876
        struct iphdr *iph = skb->nh.iph;
877
        struct icmphdr *hdr = (struct icmphdr *)((u_int32_t *)iph + iph->ihl);
878
        struct iphdr *inner = (struct iphdr *)(hdr + 1);
879
        size_t datalen = skb->len - ((void *)inner - (void *)iph);
880
        unsigned int i;
881
        struct ip_nat_info *info = &conntrack->nat.info;
882
 
883
        IP_NF_ASSERT(skb->len >= iph->ihl*4 + sizeof(struct icmphdr));
884
        /* Must be RELATED */
885
        IP_NF_ASSERT(skb->nfct - (struct ip_conntrack *)skb->nfct->master
886
                     == IP_CT_RELATED
887
                     || skb->nfct - (struct ip_conntrack *)skb->nfct->master
888
                     == IP_CT_RELATED+IP_CT_IS_REPLY);
889
 
890
        /* Redirects on non-null nats must be dropped, else they'll
891
           start talking to each other without our translation, and be
892
           confused... --RR */
893
        if (hdr->type == ICMP_REDIRECT) {
894
                /* Don't care about races here. */
895
                if (info->initialized
896
                    != ((1 << IP_NAT_MANIP_SRC) | (1 << IP_NAT_MANIP_DST))
897
                    || info->num_manips != 0)
898
                        return NF_DROP;
899
        }
900
 
901
        DEBUGP("icmp_reply_translation: translating error %p hook %u dir %s\n",
902
               skb, hooknum, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
903
        /* Note: May not be from a NAT'd host, but probably safest to
904
           do translation always as if it came from the host itself
905
           (even though a "host unreachable" coming from the host
906
           itself is a bit weird).
907
 
908
           More explanation: some people use NAT for anonymizing.
909
           Also, CERT recommends dropping all packets from private IP
910
           addresses (although ICMP errors from internal links with
911
           such addresses are not too uncommon, as Alan Cox points
912
           out) */
913
 
914
        READ_LOCK(&ip_nat_lock);
915
        for (i = 0; i < info->num_manips; i++) {
916
                DEBUGP("icmp_reply: manip %u dir %s hook %u\n",
917
                       i, info->manips[i].direction == IP_CT_DIR_ORIGINAL ?
918
                       "ORIG" : "REPLY", info->manips[i].hooknum);
919
 
920
                if (info->manips[i].direction != dir)
921
                        continue;
922
 
923
                /* Mapping the inner packet is just like a normal
924
                   packet, except it was never src/dst reversed, so
925
                   where we would normally apply a dst manip, we apply
926
                   a src, and vice versa. */
927
                if (info->manips[i].hooknum == hooknum) {
928
                        DEBUGP("icmp_reply: inner %s -> %u.%u.%u.%u %u\n",
929
                               info->manips[i].maniptype == IP_NAT_MANIP_SRC
930
                               ? "DST" : "SRC",
931
                               NIPQUAD(info->manips[i].manip.ip),
932
                               ntohs(info->manips[i].manip.u.udp.port));
933
                        manip_pkt(inner->protocol, inner,
934
                                  skb->len - ((void *)inner - (void *)iph),
935
                                  &info->manips[i].manip,
936
                                  !info->manips[i].maniptype,
937
                                  &skb->nfcache);
938
                        /* Outer packet needs to have IP header NATed like
939
                           it's a reply. */
940
 
941
                        /* Use mapping to map outer packet: 0 give no
942
                           per-proto mapping */
943
                        DEBUGP("icmp_reply: outer %s -> %u.%u.%u.%u\n",
944
                               info->manips[i].maniptype == IP_NAT_MANIP_SRC
945
                               ? "SRC" : "DST",
946
                               NIPQUAD(info->manips[i].manip.ip));
947
                        manip_pkt(0, iph, skb->len,
948
                                  &info->manips[i].manip,
949
                                  info->manips[i].maniptype,
950
                                  &skb->nfcache);
951
                }
952
        }
953
        READ_UNLOCK(&ip_nat_lock);
954
 
955
        /* Since we mangled inside ICMP packet, recalculate its
956
           checksum from scratch.  (Hence the handling of incorrect
957
           checksums in conntrack, so we don't accidentally fix one.)  */
958
        hdr->checksum = 0;
959
        hdr->checksum = ip_compute_csum((unsigned char *)hdr,
960
                                        sizeof(*hdr) + datalen);
961
 
962
        return NF_ACCEPT;
963
}
964
 
965
int __init ip_nat_init(void)
966
{
967
        size_t i;
968
 
969
        /* Leave them the same for the moment. */
970
        ip_nat_htable_size = ip_conntrack_htable_size;
971
 
972
        /* One vmalloc for both hash tables */
973
        bysource = vmalloc(sizeof(struct list_head) * ip_nat_htable_size*2);
974
        if (!bysource) {
975
                return -ENOMEM;
976
        }
977
        byipsproto = bysource + ip_nat_htable_size;
978
 
979
        /* Sew in builtin protocols. */
980
        WRITE_LOCK(&ip_nat_lock);
981
        list_append(&protos, &ip_nat_protocol_tcp);
982
        list_append(&protos, &ip_nat_protocol_udp);
983
        list_append(&protos, &ip_nat_protocol_icmp);
984
        WRITE_UNLOCK(&ip_nat_lock);
985
 
986
        for (i = 0; i < ip_nat_htable_size; i++) {
987
                INIT_LIST_HEAD(&bysource[i]);
988
                INIT_LIST_HEAD(&byipsproto[i]);
989
        }
990
 
991
        /* FIXME: Man, this is a hack.  <SIGH> */
992
        IP_NF_ASSERT(ip_conntrack_destroyed == NULL);
993
        ip_conntrack_destroyed = &ip_nat_cleanup_conntrack;
994
 
995
        return 0;
996
}
997
 
998
/* Clear NAT section of all conntracks, in case we're loaded again. */
999
static int clean_nat(const struct ip_conntrack *i, void *data)
1000
{
1001
        memset((void *)&i->nat, 0, sizeof(i->nat));
1002
        return 0;
1003
}
1004
 
1005
/* Not __exit: called from ip_nat_standalone.c:init_or_cleanup() --RR */
1006
void ip_nat_cleanup(void)
1007
{
1008
        ip_ct_selective_cleanup(&clean_nat, NULL);
1009
        ip_conntrack_destroyed = NULL;
1010
        vfree(bysource);
1011
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.