OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [linux/] [linux-2.4/] [fs/] [umsdos/] [mangle.c] - Blame information for rev 1765

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1275 phoenix
/*
2
 *  linux/fs/umsdos/mangle.c
3
 *
4
 *      Written 1993 by Jacques Gelinas
5
 *
6
 * Control the mangling of file name to fit msdos name space.
7
 * Many optimisations by GLU == dglaude@is1.vub.ac.be (Glaude David)
8
 */
9
 
10
#include <linux/errno.h>
11
#include <linux/string.h>
12
#include <linux/kernel.h>
13
#include <linux/umsdos_fs.h>
14
 
15
/* (This file is used outside of the kernel) */
16
#ifndef __KERNEL__
17
#define KERN_WARNING
18
#endif
19
 
20
/*
21
 * Complete the mangling of the MSDOS fake name
22
 * based on the position of the entry in the EMD file.
23
 *
24
 * Simply complete the job of umsdos_parse; fill the extension.
25
 *
26
 * Beware that info->f_pos must be set.
27
 */
28
void umsdos_manglename (struct umsdos_info *info)
29
{
30
        if (info->msdos_reject) {
31
                /* #Specification: file name / non MSDOS conforming / mangling
32
                 * Each non MSDOS conforming file has a special extension
33
                 * build from the entry position in the EMD file.
34
                 *
35
                 * This number is then transform in a base 32 number, where
36
                 * each digit is expressed like hexadecimal number, using
37
                 * digit and letter, except it uses 22 letters from 'a' to 'v'.
38
                 * The number 32 comes from 2**5. It is faster to split a binary
39
                 * number using a base which is a power of two. And I was 32
40
                 * when I started this project. Pick your answer :-) .
41
                 *
42
                 * If the result is '0', it is replace with '_', simply
43
                 * to make it odd.
44
                 *
45
                 * This is true for the first two character of the extension.
46
                 * The last one is taken from a list of odd character, which
47
                 * are:
48
                 *
49
                 * { } ( ) ! ` ^ & @
50
                 *
51
                 * With this scheme, we can produce 9216 ( 9* 32 * 32)
52
                 * different extensions which should not clash with any useful
53
                 * extension already popular or meaningful. Since most directory
54
                 * have much less than 32 * 32 files in it, the first character
55
                 * of the extension of any mangled name will be {.
56
                 *
57
                 * Here are the reason to do this (this kind of mangling).
58
                 *
59
                 * -The mangling is deterministic. Just by the extension, we
60
                 * are able to locate the entry in the EMD file.
61
                 *
62
                 * -By keeping to beginning of the file name almost unchanged,
63
                 * we are helping the MSDOS user.
64
                 *
65
                 * -The mangling produces names not too ugly, so an msdos user
66
                 * may live with it (remember it, type it, etc...).
67
                 *
68
                 * -The mangling produces names ugly enough so no one will
69
                 * ever think of using such a name in real life. This is not
70
                 * fool proof. I don't think there is a total solution to this.
71
                 */
72
                int entry_num;
73
                char *pt = info->fake.fname + info->fake.len;
74
                /* lookup for encoding the last character of the extension
75
                 * It contains valid character after the ugly one to make sure
76
                 * even if someone overflows the 32 * 32 * 9 limit, it still
77
                 * does something
78
                 */
79
#define SPECIAL_MANGLING '{','}','(',')','!','`','^','&','@'
80
                static char lookup3[] =
81
                {
82
                        SPECIAL_MANGLING,
83
                /* This is the start of lookup12 */
84
                        '_', '1', '2', '3', '4', '5', '6', '7', '8', '9',
85
                        'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
86
                        'p', 'q', 'r', 's', 't', 'u', 'v'
87
                };
88
 
89
#define lookup12 (lookup3+9)
90
                entry_num = info->f_pos / UMSDOS_REC_SIZE;
91
                if (entry_num > (9* 32 * 32)){
92
                        printk (KERN_WARNING "UMSDOS: more than 9216 files in a directory.\n"
93
                                "This may break the mangling strategy.\n"
94
                                "Not a killer problem. See doc.\n");
95
                }
96
                *pt++ = '.';
97
                *pt++ = lookup3 [(entry_num >> 10) & 31];
98
                *pt++ = lookup12[(entry_num >> 5) & 31];
99
                *pt++ = lookup12[entry_num & 31];
100
                *pt = '\0';             /* help doing printk */
101
                info->fake.len += 4;
102
                info->msdos_reject = 0;          /* Avoid mangling twice */
103
        }
104
}
105
 
106
/*
107
 * Evaluate the record size needed to store of name of len character.
108
 * The value returned is a multiple of UMSDOS_REC_SIZE.
109
 */
110
int umsdos_evalrecsize (int len)
111
{
112
        struct umsdos_dirent dirent;
113
        int nbrec = 1 + ((len - 1 + (dirent.name - (char *) &dirent))
114
                         / UMSDOS_REC_SIZE);
115
 
116
        return nbrec * UMSDOS_REC_SIZE;
117
        /*
118
         * GLU        This should be inlined or something to speed it up to the max.
119
         * GLU        nbrec is absolutely not needed to return the value.
120
         */
121
}
122
#ifdef TEST
123
int umsdos_evalrecsize_old (int len)
124
{
125
        struct umsdos_dirent dirent;
126
        int size = len + (dirent.name - (char *) &dirent);
127
        int nbrec = size / UMSDOS_REC_SIZE;
128
        int extra = size % UMSDOS_REC_SIZE;
129
 
130
        if (extra > 0)
131
                nbrec++;
132
        return nbrec * UMSDOS_REC_SIZE;
133
}
134
#endif
135
 
136
 
137
/*
138
 * Fill the struct info with the full and msdos name of a file
139
 * Return 0 if all is OK, a negative error code otherwise.
140
 */
141
int umsdos_parse (
142
                         const char *fname,
143
                         int len,
144
                         struct umsdos_info *info)
145
{
146
        int ret = -ENAMETOOLONG;
147
 
148
        /* #Specification: file name / too long
149
         * If a file name exceed UMSDOS maxima, the file name is silently
150
         * truncated. This makes it conformant with the other file system
151
         * of Linux (minix and ext2 at least).
152
         */
153
        if (len > UMSDOS_MAXNAME)
154
                len = UMSDOS_MAXNAME;
155
        {
156
                const char *firstpt = NULL;     /* First place we saw a "." in fname */
157
 
158
                /* #Specification: file name / non MSDOS conforming / base length 0
159
                 * file names beginning with a period '.' are invalid for MS-DOS.
160
                 * It needs absolutely a base name. So the file name is mangled
161
                 */
162
                int ivldchar = fname[0] == '.';          /* At least one invalid character */
163
                int msdos_len = len;
164
                int base_len;
165
 
166
                /*
167
                 * cardinal_per_size tells if there exists at least one
168
                 * DOS pseudo device on length n.  See the test below.
169
                 */
170
                static const char cardinal_per_size[9] =
171
                {
172
                        0, 0, 0, 1, 1, 0, 1, 0, 1
173
                };
174
 
175
                /*
176
                 * lkp translate all character to acceptable character (for DOS).
177
                 * When lkp[n] == n, it means also it is an acceptable one.
178
                 * So it serves both as a flag and as a translator.
179
                 */
180
                static char lkp[256];
181
                static char is_init = 0;
182
 
183
                if (!is_init) {
184
                        /*
185
                         * Initialisation of the array is easier and less error
186
                         * prone like this.
187
                         */
188
                        int i;
189
                        static const char *spc = "\"*+,/:;<=>?[\\]|~";
190
 
191
                        is_init = 1;
192
                        for (i = 0; i <= 32; i++)
193
                                lkp[i] = '#';
194
                        for (i = 33; i < 'A'; i++)
195
                                lkp[i] = (char) i;
196
                        for (i = 'A'; i <= 'Z'; i++)
197
                                lkp[i] = (char) (i + ('a' - 'A'));
198
                        for (i = 'Z' + 1; i < 127; i++)
199
                                lkp[i] = (char) i;
200
                        for (i = 128; i < 256; i++)
201
                                lkp[i] = '#';
202
 
203
                        lkp['.'] = '_';
204
                        while (*spc != '\0')
205
                                lkp[(unsigned char) (*spc++)] = '#';
206
                }
207
                /*  GLU
208
                 * File names longer than 8+'.'+3 are invalid for MS-DOS,
209
                 * so the file name is to be mangled--no further test is needed.
210
                 * This speeds up handling of long names.
211
                 * The position of the last point is no more necessary anyway.
212
                 */
213
                if (len <= (8 + 1 + 3)) {
214
                        const char *pt = fname;
215
                        const char *endpt = fname + len;
216
 
217
                        while (pt < endpt) {
218
                                if (*pt == '.') {
219
                                        if (firstpt != NULL) {
220
                                                /* 2 . in a file name. Reject */
221
                                                ivldchar = 1;
222
                                                break;
223
                                        } else {
224
                                                int extlen = (int) (endpt - pt);
225
 
226
                                                firstpt = pt;
227
                                                if (firstpt - fname > 8) {
228
                                                        /* base name longer than 8: reject */
229
                                                        ivldchar = 1;
230
                                                        break;
231
                                                } else if (extlen > 4) {
232
                                                        /* Extension longer than 4 (including .): reject */
233
                                                        ivldchar = 1;
234
                                                        break;
235
                                                } else if (extlen == 1) {
236
                                                        /* #Specification: file name / non MSDOS conforming / last char == .
237
                                                         * If the last character of a file name is
238
                                                         * a period, mangling is applied. MS-DOS does
239
                                                         * not support those file names.
240
                                                         */
241
                                                        ivldchar = 1;
242
                                                        break;
243
                                                } else if (extlen == 4) {
244
                                                        /* #Specification: file name / non MSDOS conforming / mangling clash
245
                                                         * To avoid clash with    the umsdos mangling, any file
246
                                                         * with a special character as the first character
247
                                                         * of the extension will be mangled. This solves the
248
                                                         * following problem:
249
                                                         *
250
                                                         * #
251
                                                         * touch FILE
252
                                                         * # FILE is invalid for DOS, so mangling is applied
253
                                                         * # file.{_1 is created in the DOS directory
254
                                                         * touch file.{_1
255
                                                         * # To UMSDOS file point to a single DOS entry.
256
                                                         * # So file.{_1 has to be mangled.
257
                                                         * #
258
                                                         */
259
                                                        static char special[] =
260
                                                        {
261
                                                                SPECIAL_MANGLING, '\0'
262
                                                        };
263
 
264
                                                        if (strchr (special, firstpt[1]) != NULL) {
265
                                                                ivldchar = 1;
266
                                                                break;
267
                                                        }
268
                                                }
269
                                        }
270
                                } else if (lkp[(unsigned char) (*pt)] != *pt) {
271
                                        ivldchar = 1;
272
                                        break;
273
                                }
274
                                pt++;
275
                        }
276
                } else {
277
                        ivldchar = 1;
278
                }
279
                if (ivldchar
280
                    || (firstpt == NULL && len > 8)
281
                    || (len == UMSDOS_EMD_NAMELEN
282
                        && memcmp (fname, UMSDOS_EMD_FILE, UMSDOS_EMD_NAMELEN) == 0)) {
283
                        /* #Specification: file name / --linux-.---
284
                         * The name of the EMD file --linux-.--- is map to a mangled
285
                         * name. So UMSDOS does not restrict its use.
286
                         */
287
                        /* #Specification: file name / non MSDOS conforming / mangling
288
                         * Non MSDOS conforming file names must use some alias to fit
289
                         * in the MSDOS name space.
290
                         *
291
                         * The strategy is simple. The name is simply truncated to
292
                         * 8 char. points are replace with underscore and a
293
                         * number is given as an extension. This number correspond
294
                         * to the entry number in the EMD file. The EMD file
295
                         * only need to carry the real name.
296
                         *
297
                         * Upper case is also converted to lower case.
298
                         * Control character are converted to #.
299
                         * Spaces are converted to #.
300
                         * The following characters are also converted to #.
301
                         * #
302
                         * " * + , / : ; < = > ? [ \ ] | ~
303
                         * #
304
                         *
305
                         * Sometimes the problem is not in MS-DOS itself but in
306
                         * command.com.
307
                         */
308
                        int i;
309
                        char *pt = info->fake.fname;
310
 
311
                        base_len = msdos_len = (msdos_len > 8) ? 8 : msdos_len;
312
                        /*
313
                         * There is no '.' any more so we know for a fact that
314
                         * the base length is the length.
315
                         */
316
                        memcpy (info->fake.fname, fname, msdos_len);
317
                        for (i = 0; i < msdos_len; i++, pt++)
318
                                *pt = lkp[(unsigned char) (*pt)];
319
                        *pt = '\0';     /* GLU  We force null termination. */
320
                        info->msdos_reject = 1;
321
                        /*
322
                         * The numeric extension is added only when we know
323
                         * the position in the EMD file, in umsdos_newentry(),
324
                         * umsdos_delentry(), and umsdos_findentry().
325
                         * See umsdos_manglename().
326
                         */
327
                } else {
328
                        /* Conforming MSDOS file name */
329
                        strncpy (info->fake.fname, fname, len);
330
                        info->msdos_reject = 0;
331
                        base_len = firstpt != NULL ? (int) (firstpt - fname) : len;
332
                }
333
                if (cardinal_per_size[base_len]) {
334
                        /* #Specification: file name / MSDOS devices / mangling
335
                         * To avoid unreachable file from MS-DOS, any MS-DOS conforming
336
                         * file with a basename equal to one of the MS-DOS pseudo
337
                         * devices will be mangled.
338
                         *
339
                         * If a file such as "prn" was created, it would be unreachable
340
                         * under MS-DOS because "prn" is assumed to be the printer, even
341
                         * if the file does have an extension.
342
                         *
343
                         * Since the extension is unimportant to MS-DOS, we must patch
344
                         * the basename also. We simply insert a minus '-'. To avoid
345
                         * conflict with valid file with a minus in front (such as
346
                         * "-prn"), we add an mangled extension like any other
347
                         * mangled file name.
348
                         *
349
                         * Here is the list of DOS pseudo devices:
350
                         *
351
                         * #
352
                         * "prn","con","aux","nul",
353
                         * "lpt1","lpt2","lpt3","lpt4",
354
                         * "com1","com2","com3","com4",
355
                         * "clock$"
356
                         * #
357
                         *
358
                         * and some standard ones for common DOS programs
359
                         *
360
                         * "emmxxxx0","xmsxxxx0","setverxx"
361
                         *
362
                         * (Thanks to Chris Hall <cah17@phoenix.cambridge.ac.uk>
363
                         * for pointing these out to me).
364
                         *
365
                         * Is there one missing?
366
                         */
367
                        /* This table must be ordered by length */
368
                        static const char *tbdev[] =
369
                        {
370
                                "prn", "con", "aux", "nul",
371
                                "lpt1", "lpt2", "lpt3", "lpt4",
372
                                "com1", "com2", "com3", "com4",
373
                                "clock$",
374
                                "emmxxxx0", "xmsxxxx0", "setverxx"
375
                        };
376
 
377
                        /* Tell where to find in tbdev[], the first name of */
378
                        /* a certain length */
379
                        static const char start_ind_dev[9] =
380
                        {
381
                                0, 0, 0, 4, 12, 12, 13, 13, 16
382
                        };
383
                        char basen[9];
384
                        int i;
385
 
386
                        for (i = start_ind_dev[base_len - 1]; i < start_ind_dev[base_len]; i++) {
387
                                if (memcmp (info->fake.fname, tbdev[i], base_len) == 0) {
388
                                        memcpy (basen, info->fake.fname, base_len);
389
                                        basen[base_len] = '\0';         /* GLU  We force null termination. */
390
                                        /*
391
                                         * GLU        We do that only if necessary; we try to do the
392
                                         * GLU        simple thing in the usual circumstance.
393
                                         */
394
                                        info->fake.fname[0] = '-';
395
                                        strcpy (info->fake.fname + 1, basen);   /* GLU  We already guaranteed a null would be at the end. */
396
                                        msdos_len = (base_len == 8) ? 8 : base_len + 1;
397
                                        info->msdos_reject = 1;
398
                                        break;
399
                                }
400
                        }
401
                }
402
                info->fake.fname[msdos_len] = '\0';     /* Help doing printk */
403
                /* GLU      This zero should (always?) be there already. */
404
                info->fake.len = msdos_len;
405
                /* Why not use info->fake.len everywhere? Is it longer?
406
                 */
407
                memcpy (info->entry.name, fname, len);
408
                info->entry.name[len] = '\0';   /* for printk */
409
                info->entry.name_len = len;
410
                ret = 0;
411
        }
412
        /*
413
         * Evaluate how many records are needed to store this entry.
414
         */
415
        info->recsize = umsdos_evalrecsize (len);
416
        return ret;
417
}
418
 
419
#ifdef TEST
420
 
421
struct MANG_TEST {
422
        char *fname;            /* Name to validate */
423
        int msdos_reject;       /* Expected msdos_reject flag */
424
        char *msname;           /* Expected msdos name */
425
};
426
 
427
struct MANG_TEST tb[] =
428
{
429
        "hello", 0, "hello",
430
        "hello.1", 0, "hello.1",
431
        "hello.1_", 0, "hello.1_",
432
        "prm", 0, "prm",
433
 
434
#ifdef PROPOSITION
435
        "HELLO", 1, "hello",
436
        "Hello.1", 1, "hello.1",
437
        "Hello.c", 1, "hello.c",
438
#else
439
/*
440
 * I find the three examples below very unfortunate.  I propose to
441
 * convert them to lower case in a quick preliminary pass, then test
442
 * whether there are other troublesome characters.  I have not made
443
 * this change, because it is not easy, but I wanted to mention the
444
 * principle.  Obviously something like that would increase the chance
445
 * of collisions, for example between "HELLO" and "Hello", but these
446
 * can be treated elsewhere along with the other collisions.
447
 */
448
 
449
        "HELLO", 1, "hello",
450
        "Hello.1", 1, "hello_1",
451
        "Hello.c", 1, "hello_c",
452
#endif
453
 
454
        "hello.{_1", 1, "hello_{_",
455
        "hello\t", 1, "hello#",
456
        "hello.1.1", 1, "hello_1_",
457
        "hel,lo", 1, "hel#lo",
458
        "Salut.Tu.vas.bien?", 1, "salut_tu",
459
        ".profile", 1, "_profile",
460
        ".xv", 1, "_xv",
461
        "toto.", 1, "toto_",
462
        "clock$.x", 1, "-clock$",
463
        "emmxxxx0", 1, "-emmxxxx",
464
        "emmxxxx0.abcd", 1, "-emmxxxx",
465
        "aux", 1, "-aux",
466
        "prn", 1, "-prn",
467
        "prn.abc", 1, "-prn",
468
        "PRN", 1, "-prn",
469
  /*
470
   * GLU        WARNING:  the results of these are different with my version
471
   * GLU        of mangling compared to the original one.
472
   * GLU        CAUSE:  the manner of calculating the baselen variable.
473
   * GLU                For you they are always 3.
474
   * GLU                For me they are respectively 7, 8, and 8.
475
 
476
   */
477
        "PRN.abc", 1, "prn_abc",
478
        "Prn.abcd", 1, "prn_abcd",
479
        "prn.abcd", 1, "prn_abcd",
480
        "Prn.abcdefghij", 1, "prn_abcd"
481
};
482
 
483
int main (int argc, char *argv[])
484
{
485
        int i, rold, rnew;
486
 
487
        printf ("Testing the umsdos_parse.\n");
488
        for (i = 0; i < sizeof (tb) / sizeof (tb[0]); i++) {
489
                struct MANG_TEST *pttb = tb + i;
490
                struct umsdos_info info;
491
                int ok = umsdos_parse (pttb->fname, strlen (pttb->fname), &info);
492
 
493
                if (strcmp (info.fake.fname, pttb->msname) != 0) {
494
                        printf ("**** %s -> ", pttb->fname);
495
                        printf ("%s <> %s\n", info.fake.fname, pttb->msname);
496
                } else if (info.msdos_reject != pttb->msdos_reject) {
497
                        printf ("**** %s -> %s ", pttb->fname, pttb->msname);
498
                        printf ("%d <> %d\n", info.msdos_reject, pttb->msdos_reject);
499
                } else {
500
                        printf ("     %s -> %s %d\n", pttb->fname, pttb->msname
501
                                ,pttb->msdos_reject);
502
                }
503
        }
504
        printf ("Testing the new umsdos_evalrecsize.");
505
        for (i = 0; i < UMSDOS_MAXNAME; i++) {
506
                rnew = umsdos_evalrecsize (i);
507
                rold = umsdos_evalrecsize_old (i);
508
                if (!(i % UMSDOS_REC_SIZE)) {
509
                        printf ("\n%d:\t", i);
510
                }
511
                if (rnew != rold) {
512
                        printf ("**** %d newres: %d != %d \n", i, rnew, rold);
513
                } else {
514
                        printf (".");
515
                }
516
        }
517
        printf ("\nEnd of Testing.\n");
518
 
519
        return 0;
520
}
521
 
522
#endif

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.