OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [uclinux/] [uClinux-2.0.x/] [fs/] [hpfs/] [hpfs_caps.c] - Blame information for rev 199

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 199 simons
/* Capitalization rules for HPFS */
2
 
3
/* In OS/2, HPFS filenames preserve upper and lower case letter distinctions
4
   but filename matching ignores case.  That is, creating a file "Foo"
5
   actually creates a file named "Foo" which can be looked up as "Foo",
6
   "foo", or "FOO", among other possibilities.
7
 
8
   Also, HPFS is internationalized -- a table giving the uppercase
9
   equivalent of every character is stored in the filesystem, so that
10
   any national character set may be used.  If several different
11
   national character sets are in use, several tables are stored
12
   in the filesystem.
13
 
14
   It would be perfectly reasonable for Linux HPFS to act as a Unix
15
   filesystem and match "Foo" only if asked for "Foo" exactly.  But
16
   the sort order of HPFS directories is case-insensitive, so Linux
17
   still has to know the capitalization rules used by OS/2.  Because
18
   of this, it turns out to be more natural for us to be case-insensitive
19
   than not.
20
 
21
   Currently the standard character set used by Linux is Latin-1.
22
   Work is underway to permit people to use UTF-8 instead, therefore
23
   all code that depends on the character set is segregated here.
24
 
25
   (It would be wonderful if Linux HPFS could be independent of what
26
   character set is in use on the Linux side, but because of the
27
   necessary case folding this is impossible.)
28
 
29
   There is a map from Latin-1 into code page 850 for every printing
30
   character in Latin-1.  The NLS documentation of OS/2 shows that
31
   everybody has 850 available unless they don't have Western latin
32
   chars available at all (so fitting them to Linux without Unicode
33
   is a doomed exercise).
34
 
35
   It is not clear exactly how HPFS.IFS handles the situation when
36
   multiple code pages are in use.  Experiments show that
37
 
38
   - tables on the disk give uppercasing rules for the installed code pages
39
 
40
   - each directory entry is tagged with what code page was current
41
     when that name was created
42
 
43
   - doing just CHCP, without changing what's on the disk in any way,
44
     can change what DIR reports, and what name a case-folded match
45
     will match.
46
 
47
   This means, I think, that HPFS.IFS operates in the current code
48
   page, without regard to the uppercasing information recorded in
49
   the tables on the disk.  It does record the uppercasing rules
50
   it used, perhaps for CHKDSK, but it does not appear to use them
51
   itself.
52
 
53
   So: Linux, a Latin-1 system, will operate in code page 850.  We
54
   recode between 850 and Latin-1 when dealing with the names actually
55
   on the disk.  We don't use the uppercasing tables either.
56
 
57
   In a hypothetical UTF-8 implementation, one reasonable way to
58
   proceed that matches OS/2 (for least surprise) is: do case
59
   translation in UTF-8, and recode to/from one of the code pages
60
   available on the mounted filesystem.  Reject as invalid any name
61
   containing chars that can't be represented on disk by one of the
62
   code pages OS/2 is using.  Recoding from on-disk names to UTF-8
63
   could use the code page tags, though this is not what OS/2 does. */
64
 
65
 
66
static const unsigned char tb_cp850_to_latin1[128] =
67
{
68
  199, 252, 233, 226, 228, 224, 229, 231,
69
  234, 235, 232, 239, 238, 236, 196, 197,
70
  201, 230, 198, 244, 246, 242, 251, 249,
71
  255, 214, 220, 248, 163, 216, 215, 159,
72
  225, 237, 243, 250, 241, 209, 170, 186,
73
  191, 174, 172, 189, 188, 161, 171, 187,
74
  155, 156, 157, 144, 151, 193, 194, 192,
75
  169, 135, 128, 131, 133, 162, 165, 147,
76
  148, 153, 152, 150, 145, 154, 227, 195,
77
  132, 130, 137, 136, 134, 129, 138, 164,
78
  240, 208, 202, 203, 200, 158, 205, 206,
79
  207, 149, 146, 141, 140, 166, 204, 139,
80
  211, 223, 212, 210, 245, 213, 181, 254,
81
  222, 218, 219, 217, 253, 221, 175, 180,
82
  173, 177, 143, 190, 182, 167, 247, 184,
83
  176, 168, 183, 185, 179, 178, 142, 160,
84
};
85
 
86
#if 0
87
static const unsigned char tb_latin1_to_cp850[128] =
88
{
89
  186, 205, 201, 187, 200, 188, 204, 185,
90
  203, 202, 206, 223, 220, 219, 254, 242,
91
  179, 196, 218, 191, 192, 217, 195, 180,
92
  194, 193, 197, 176, 177, 178, 213, 159,
93
  255, 173, 189, 156, 207, 190, 221, 245,
94
  249, 184, 166, 174, 170, 240, 169, 238,
95
  248, 241, 253, 252, 239, 230, 244, 250,
96
  247, 251, 167, 175, 172, 171, 243, 168,
97
  183, 181, 182, 199, 142, 143, 146, 128,
98
  212, 144, 210, 211, 222, 214, 215, 216,
99
  209, 165, 227, 224, 226, 229, 153, 158,
100
  157, 235, 233, 234, 154, 237, 232, 225,
101
  133, 160, 131, 198, 132, 134, 145, 135,
102
  138, 130, 136, 137, 141, 161, 140, 139,
103
  208, 164, 149, 162, 147, 228, 148, 246,
104
  155, 151, 163, 150, 129, 236, 231, 152,
105
};
106
#endif
107
 
108
#define A_GRAVE 0300
109
#define THORN   0336   
110
#define MULTIPLY 0327
111
#define a_grave 0340
112
#define thorn   0376
113
#define divide  0367
114
 
115
static inline unsigned latin1_upcase (unsigned c)
116
{
117
  if (c - 'a' <= 'z' - 'a'
118
      || (c - a_grave <= thorn - a_grave
119
          && c != divide))
120
    return c - 'a' + 'A';
121
  else
122
    return c;
123
}
124
 
125
static inline unsigned latin1_downcase (unsigned c)
126
{
127
  if (c - 'A' <= 'Z' - 'A'
128
      || (c - A_GRAVE <= THORN - A_GRAVE
129
          && c != MULTIPLY))
130
    return c + 'a' - 'A';
131
  else
132
    return c;
133
}
134
 
135
#if 0
136
static inline unsigned latin1_to_cp850 (unsigned c)
137
{
138
  if ((signed) c - 128 >= 0)
139
    return tb_latin1_to_cp850[c - 128];
140
  else
141
    return c;
142
}
143
#endif
144
 
145
static inline unsigned cp850_to_latin1 (unsigned c)
146
{
147
  if ((signed) c - 128 >= 0)
148
    return tb_cp850_to_latin1[c - 128];
149
  else
150
    return c;
151
}
152
 
153
unsigned hpfs_char_to_upper_linux (unsigned c)
154
{
155
  return latin1_upcase (cp850_to_latin1 (c));
156
}
157
 
158
unsigned linux_char_to_upper_linux (unsigned c)
159
{
160
  return latin1_upcase (c);
161
}
162
 
163
unsigned hpfs_char_to_lower_linux (unsigned c)
164
{
165
  return latin1_downcase (cp850_to_latin1 (c));
166
}
167
 
168
unsigned hpfs_char_to_linux (unsigned c)
169
{
170
  return cp850_to_latin1 (c);
171
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.