OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [linux/] [linux-2.4/] [fs/] [udf/] [unicode.c] - Blame information for rev 1765

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1275 phoenix
/*
2
 * unicode.c
3
 *
4
 * PURPOSE
5
 *      Routines for converting between UTF-8 and OSTA Compressed Unicode.
6
 *      Also handles filename mangling
7
 *
8
 * DESCRIPTION
9
 *      OSTA Compressed Unicode is explained in the OSTA UDF specification.
10
 *              http://www.osta.org/
11
 *      UTF-8 is explained in the IETF RFC XXXX.
12
 *              ftp://ftp.internic.net/rfc/rfcxxxx.txt
13
 *
14
 * CONTACTS
15
 *      E-mail regarding any portion of the Linux UDF file system should be
16
 *      directed to the development team's mailing list (run by majordomo):
17
 *              linux_udf@hpesjro.fc.hp.com
18
 *
19
 * COPYRIGHT
20
 *      This file is distributed under the terms of the GNU General Public
21
 *      License (GPL). Copies of the GPL can be obtained from:
22
 *              ftp://prep.ai.mit.edu/pub/gnu/GPL
23
 *      Each contributing author retains all rights to their own work.
24
 */
25
 
26
#include "udfdecl.h"
27
 
28
#include <linux/kernel.h>
29
#include <linux/string.h>       /* for memset */
30
#include <linux/nls.h>
31
#include <linux/udf_fs.h>
32
 
33
#include "udf_sb.h"
34
 
35
int udf_ustr_to_dchars(uint8_t *dest, const struct ustr *src, int strlen)
36
{
37
        if ( (!dest) || (!src) || (!strlen) || (src->u_len > strlen) )
38
                return 0;
39
        memcpy(dest+1, src->u_name, src->u_len);
40
        dest[0] = src->u_cmpID;
41
        return src->u_len + 1;
42
}
43
 
44
int udf_ustr_to_char(uint8_t *dest, const struct ustr *src, int strlen)
45
{
46
        if ( (!dest) || (!src) || (!strlen) || (src->u_len >= strlen) )
47
                return 0;
48
        memcpy(dest, src->u_name, src->u_len);
49
        return src->u_len;
50
}
51
 
52
int udf_ustr_to_dstring(dstring *dest, const struct ustr *src, int dlength)
53
{
54
        if ( udf_ustr_to_dchars(dest, src, dlength-1) )
55
        {
56
                dest[dlength-1] = src->u_len + 1;
57
                return dlength;
58
        }
59
        else
60
                return 0;
61
}
62
 
63
int udf_dchars_to_ustr(struct ustr *dest, const uint8_t *src, int strlen)
64
{
65
        if ( (!dest) || (!src) || (!strlen) || (strlen > UDF_NAME_LEN) )
66
                return 0;
67
        memset(dest, 0, sizeof(struct ustr));
68
        memcpy(dest->u_name, src+1, strlen-1);
69
        dest->u_cmpID = src[0];
70
        dest->u_len = strlen-1;
71
        return strlen-1;
72
}
73
 
74
int udf_char_to_ustr(struct ustr *dest, const uint8_t *src, int strlen)
75
{
76
        if ( (!dest) || (!src) || (!strlen) || (strlen >= UDF_NAME_LEN) )
77
                return 0;
78
        memset(dest, 0, sizeof(struct ustr));
79
        memcpy(dest->u_name, src, strlen);
80
        dest->u_cmpID = 0x08;
81
        dest->u_len = strlen;
82
        return strlen;
83
}
84
 
85
 
86
int udf_dstring_to_ustr(struct ustr *dest, const dstring *src, int dlength)
87
{
88
        if ( dlength && udf_dchars_to_ustr(dest, src, src[dlength-1]) )
89
                return dlength;
90
        else
91
                return 0;
92
}
93
 
94
/*
95
 * udf_build_ustr
96
 */
97
int udf_build_ustr(struct ustr *dest, dstring *ptr, int size)
98
{
99
        int usesize;
100
 
101
        if ( (!dest) || (!ptr) || (!size) )
102
                return -1;
103
 
104
        memset(dest, 0, sizeof(struct ustr));
105
        usesize= (size > UDF_NAME_LEN) ? UDF_NAME_LEN : size;
106
        dest->u_cmpID=ptr[0];
107
        dest->u_len=ptr[size-1];
108
        memcpy(dest->u_name, ptr+1, usesize-1);
109
        return 0;
110
}
111
 
112
/*
113
 * udf_build_ustr_exact
114
 */
115
int udf_build_ustr_exact(struct ustr *dest, dstring *ptr, int exactsize)
116
{
117
        if ( (!dest) || (!ptr) || (!exactsize) )
118
                return -1;
119
 
120
        memset(dest, 0, sizeof(struct ustr));
121
        dest->u_cmpID=ptr[0];
122
        dest->u_len=exactsize-1;
123
        memcpy(dest->u_name, ptr+1, exactsize-1);
124
        return 0;
125
}
126
 
127
/*
128
 * udf_ocu_to_utf8
129
 *
130
 * PURPOSE
131
 *      Convert OSTA Compressed Unicode to the UTF-8 equivalent.
132
 *
133
 * DESCRIPTION
134
 *      This routine is only called by udf_filldir().
135
 *
136
 * PRE-CONDITIONS
137
 *      utf                     Pointer to UTF-8 output buffer.
138
 *      ocu                     Pointer to OSTA Compressed Unicode input buffer
139
 *                              of size UDF_NAME_LEN bytes.
140
 *                              both of type "struct ustr *"
141
 *
142
 * POST-CONDITIONS
143
 *      <return>                Zero on success.
144
 *
145
 * HISTORY
146
 *      November 12, 1997 - Andrew E. Mileski
147
 *      Written, tested, and released.
148
 */
149
int udf_CS0toUTF8(struct ustr *utf_o, struct ustr *ocu_i)
150
{
151
        uint8_t *ocu;
152
        uint32_t c;
153
        uint8_t cmp_id, ocu_len;
154
        int i;
155
 
156
        ocu = ocu_i->u_name;
157
 
158
        ocu_len = ocu_i->u_len;
159
        cmp_id = ocu_i->u_cmpID;
160
        utf_o->u_len = 0;
161
 
162
        if (ocu_len == 0)
163
        {
164
                memset(utf_o, 0, sizeof(struct ustr));
165
                utf_o->u_cmpID = 0;
166
                utf_o->u_len = 0;
167
                return 0;
168
        }
169
 
170
        if ((cmp_id != 8) && (cmp_id != 16))
171
        {
172
                printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n", cmp_id, ocu_i->u_name);
173
                return 0;
174
        }
175
 
176
        for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN-3)) ;)
177
        {
178
 
179
                /* Expand OSTA compressed Unicode to Unicode */
180
                c = ocu[i++];
181
                if (cmp_id == 16)
182
                        c = (c << 8) | ocu[i++];
183
 
184
                /* Compress Unicode to UTF-8 */
185
                if (c < 0x80U)
186
                        utf_o->u_name[utf_o->u_len++] = (uint8_t)c;
187
                else if (c < 0x800U)
188
                {
189
                        utf_o->u_name[utf_o->u_len++] = (uint8_t)(0xc0 | (c >> 6));
190
                        utf_o->u_name[utf_o->u_len++] = (uint8_t)(0x80 | (c & 0x3f));
191
                }
192
                else
193
                {
194
                        utf_o->u_name[utf_o->u_len++] = (uint8_t)(0xe0 | (c >> 12));
195
                        utf_o->u_name[utf_o->u_len++] = (uint8_t)(0x80 | ((c >> 6) & 0x3f));
196
                        utf_o->u_name[utf_o->u_len++] = (uint8_t)(0x80 | (c & 0x3f));
197
                }
198
        }
199
        utf_o->u_cmpID=8;
200
 
201
        return utf_o->u_len;
202
}
203
 
204
/*
205
 *
206
 * udf_utf8_to_ocu
207
 *
208
 * PURPOSE
209
 *      Convert UTF-8 to the OSTA Compressed Unicode equivalent.
210
 *
211
 * DESCRIPTION
212
 *      This routine is only called by udf_lookup().
213
 *
214
 * PRE-CONDITIONS
215
 *      ocu                     Pointer to OSTA Compressed Unicode output
216
 *                              buffer of size UDF_NAME_LEN bytes.
217
 *      utf                     Pointer to UTF-8 input buffer.
218
 *      utf_len                 Length of UTF-8 input buffer in bytes.
219
 *
220
 * POST-CONDITIONS
221
 *      <return>                Zero on success.
222
 *
223
 * HISTORY
224
 *      November 12, 1997 - Andrew E. Mileski
225
 *      Written, tested, and released.
226
 */
227
int udf_UTF8toCS0(dstring *ocu, struct ustr *utf, int length)
228
{
229
        unsigned c, i, max_val, utf_char;
230
        int utf_cnt;
231
        int u_len = 0;
232
 
233
        memset(ocu, 0, sizeof(dstring) * length);
234
        ocu[0] = 8;
235
        max_val = 0xffU;
236
 
237
try_again:
238
        utf_char = 0U;
239
        utf_cnt = 0U;
240
        for (i = 0U; i < utf->u_len; i++)
241
        {
242
                c = (uint8_t)utf->u_name[i];
243
 
244
                /* Complete a multi-byte UTF-8 character */
245
                if (utf_cnt)
246
                {
247
                        utf_char = (utf_char << 6) | (c & 0x3fU);
248
                        if (--utf_cnt)
249
                                continue;
250
                }
251
                else
252
                {
253
                        /* Check for a multi-byte UTF-8 character */
254
                        if (c & 0x80U)
255
                        {
256
                                /* Start a multi-byte UTF-8 character */
257
                                if ((c & 0xe0U) == 0xc0U)
258
                                {
259
                                        utf_char = c & 0x1fU;
260
                                        utf_cnt = 1;
261
                                }
262
                                else if ((c & 0xf0U) == 0xe0U)
263
                                {
264
                                        utf_char = c & 0x0fU;
265
                                        utf_cnt = 2;
266
                                }
267
                                else if ((c & 0xf8U) == 0xf0U)
268
                                {
269
                                        utf_char = c & 0x07U;
270
                                        utf_cnt = 3;
271
                                }
272
                                else if ((c & 0xfcU) == 0xf8U)
273
                                {
274
                                        utf_char = c & 0x03U;
275
                                        utf_cnt = 4;
276
                                }
277
                                else if ((c & 0xfeU) == 0xfcU)
278
                                {
279
                                        utf_char = c & 0x01U;
280
                                        utf_cnt = 5;
281
                                }
282
                                else
283
                                        goto error_out;
284
                                continue;
285
                        } else
286
                                /* Single byte UTF-8 character (most common) */
287
                                utf_char = c;
288
                }
289
 
290
                /* Choose no compression if necessary */
291
                if (utf_char > max_val)
292
                {
293
                        if ( 0xffU == max_val )
294
                        {
295
                                max_val = 0xffffU;
296
                                ocu[0] = (uint8_t)0x10U;
297
                                goto try_again;
298
                        }
299
                        goto error_out;
300
                }
301
 
302
                if (max_val == 0xffffU)
303
                {
304
                        ocu[++u_len] = (uint8_t)(utf_char >> 8);
305
                }
306
                ocu[++u_len] = (uint8_t)(utf_char & 0xffU);
307
        }
308
 
309
 
310
        if (utf_cnt)
311
        {
312
error_out:
313
                printk(KERN_ERR "udf: bad UTF-8 character\n");
314
                return 0;
315
        }
316
 
317
        ocu[length - 1] = (uint8_t)u_len + 1;
318
        return u_len + 1;
319
}
320
 
321
int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o, struct ustr *ocu_i)
322
{
323
        uint8_t *ocu;
324
        uint32_t c;
325
        uint8_t cmp_id, ocu_len;
326
        int i;
327
 
328
        ocu = ocu_i->u_name;
329
 
330
        ocu_len = ocu_i->u_len;
331
        cmp_id = ocu_i->u_cmpID;
332
        utf_o->u_len = 0;
333
 
334
        if (ocu_len == 0)
335
        {
336
                memset(utf_o, 0, sizeof(struct ustr));
337
                utf_o->u_cmpID = 0;
338
                utf_o->u_len = 0;
339
                return 0;
340
        }
341
 
342
        if ((cmp_id != 8) && (cmp_id != 16))
343
        {
344
                printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n", cmp_id, ocu_i->u_name);
345
                return 0;
346
        }
347
 
348
        for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN-3)) ;)
349
        {
350
                /* Expand OSTA compressed Unicode to Unicode */
351
                c = ocu[i++];
352
                if (cmp_id == 16)
353
                        c = (c << 8) | ocu[i++];
354
 
355
                utf_o->u_len += nls->uni2char(c, &utf_o->u_name[utf_o->u_len],
356
                        UDF_NAME_LEN - utf_o->u_len);
357
        }
358
        utf_o->u_cmpID=8;
359
 
360
        return utf_o->u_len;
361
}
362
 
363
int udf_NLStoCS0(struct nls_table *nls, dstring *ocu, struct ustr *uni, int length)
364
{
365
        unsigned len, i, max_val;
366
        uint16_t uni_char;
367
        int uni_cnt;
368
        int u_len = 0;
369
 
370
        memset(ocu, 0, sizeof(dstring) * length);
371
        ocu[0] = 8;
372
        max_val = 0xffU;
373
 
374
try_again:
375
        uni_char = 0U;
376
        uni_cnt = 0U;
377
        for (i = 0U; i < uni->u_len; i++)
378
        {
379
                len = nls->char2uni(&uni->u_name[i], uni->u_len-i, &uni_char);
380
 
381
                if (len == 2 && max_val == 0xff)
382
                {
383
                        max_val = 0xffffU;
384
                        ocu[0] = (uint8_t)0x10U;
385
                        goto try_again;
386
                }
387
 
388
                if (max_val == 0xffffU)
389
                {
390
                        ocu[++u_len] = (uint8_t)(uni_char >> 8);
391
                        i++;
392
                }
393
                ocu[++u_len] = (uint8_t)(uni_char & 0xffU);
394
        }
395
 
396
        ocu[length - 1] = (uint8_t)u_len + 1;
397
        return u_len + 1;
398
}
399
 
400
int udf_get_filename(struct super_block *sb, uint8_t *sname, uint8_t *dname, int flen)
401
{
402
        struct ustr filename, unifilename;
403
        int len;
404
 
405
        if (udf_build_ustr_exact(&unifilename, sname, flen))
406
        {
407
                return 0;
408
        }
409
 
410
        if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8))
411
        {
412
                if (!udf_CS0toUTF8(&filename, &unifilename) )
413
                {
414
                        udf_debug("Failed in udf_get_filename: sname = %s\n", sname);
415
                        return 0;
416
                }
417
        }
418
        else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP))
419
        {
420
                if (!udf_CS0toNLS(UDF_SB(sb)->s_nls_map, &filename, &unifilename) )
421
                {
422
                        udf_debug("Failed in udf_get_filename: sname = %s\n", sname);
423
                        return 0;
424
                }
425
        }
426
        else
427
                return 0;
428
 
429
        if ((len = udf_translate_to_linux(dname, filename.u_name, filename.u_len,
430
                unifilename.u_name, unifilename.u_len)))
431
        {
432
                return len;
433
        }
434
        return 0;
435
}
436
 
437
#define ILLEGAL_CHAR_MARK       '_'
438
#define EXT_MARK                        '.'
439
#define CRC_MARK                        '#'
440
#define EXT_SIZE                        5
441
 
442
int udf_translate_to_linux(uint8_t *newName, uint8_t *udfName, int udfLen, uint8_t *fidName, int fidNameLen)
443
{
444
        int index, newIndex = 0, needsCRC = 0;
445
        int extIndex = 0, newExtIndex = 0, hasExt = 0;
446
        unsigned short valueCRC;
447
        uint8_t curr;
448
        const uint8_t hexChar[] = "0123456789ABCDEF";
449
 
450
        if (udfName[0] == '.' && (udfLen == 1 ||
451
                (udfLen == 2 && udfName[1] == '.')))
452
        {
453
                needsCRC = 1;
454
                newIndex = udfLen;
455
                memcpy(newName, udfName, udfLen);
456
        }
457
        else
458
        {
459
                for (index = 0; index < udfLen; index++)
460
                {
461
                        curr = udfName[index];
462
                        if (curr == '/' || curr == 0)
463
                        {
464
                                needsCRC = 1;
465
                                curr = ILLEGAL_CHAR_MARK;
466
                                while (index+1 < udfLen && (udfName[index+1] == '/' ||
467
                                        udfName[index+1] == 0))
468
                                        index++;
469
                        }
470
                        if (curr == EXT_MARK && (udfLen - index - 1) <= EXT_SIZE)
471
                        {
472
                                if (udfLen == index + 1)
473
                                        hasExt = 0;
474
                                else
475
                                {
476
                                        hasExt = 1;
477
                                        extIndex = index;
478
                                        newExtIndex = newIndex;
479
                                }
480
                        }
481
                        if (newIndex < 256)
482
                                newName[newIndex++] = curr;
483
                        else
484
                                needsCRC = 1;
485
                }
486
        }
487
        if (needsCRC)
488
        {
489
                uint8_t ext[EXT_SIZE];
490
                int localExtIndex = 0;
491
 
492
                if (hasExt)
493
                {
494
                        int maxFilenameLen;
495
                        for(index = 0; index<EXT_SIZE && extIndex + index +1 < udfLen;
496
                                index++ )
497
                        {
498
                                curr = udfName[extIndex + index + 1];
499
 
500
                                if (curr == '/' || curr == 0)
501
                                {
502
                                        needsCRC = 1;
503
                                        curr = ILLEGAL_CHAR_MARK;
504
                                        while(extIndex + index + 2 < udfLen && (index + 1 < EXT_SIZE
505
                                                && (udfName[extIndex + index + 2] == '/' ||
506
                                                        udfName[extIndex + index + 2] == 0)))
507
                                                index++;
508
                                }
509
                                ext[localExtIndex++] = curr;
510
                        }
511
                        maxFilenameLen = 250 - localExtIndex;
512
                        if (newIndex > maxFilenameLen)
513
                                newIndex = maxFilenameLen;
514
                        else
515
                                newIndex = newExtIndex;
516
                }
517
                else if (newIndex > 250)
518
                        newIndex = 250;
519
                newName[newIndex++] = CRC_MARK;
520
                valueCRC = udf_crc(fidName, fidNameLen, 0);
521
                newName[newIndex++] = hexChar[(valueCRC & 0xf000) >> 12];
522
                newName[newIndex++] = hexChar[(valueCRC & 0x0f00) >> 8];
523
                newName[newIndex++] = hexChar[(valueCRC & 0x00f0) >> 4];
524
                newName[newIndex++] = hexChar[(valueCRC & 0x000f)];
525
 
526
                if (hasExt)
527
                {
528
                        newName[newIndex++] = EXT_MARK;
529
                        for (index = 0;index < localExtIndex ;index++ )
530
                                newName[newIndex++] = ext[index];
531
                }
532
        }
533
        return newIndex;
534
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.