OpenCores
URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [gcc-4.2.2/] [libcpp/] [makeucnid.c] - Blame information for rev 231

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 38 julius
/* Make ucnid.h from various sources.
2
   Copyright (C) 2005 Free Software Foundation, Inc.
3
 
4
This program is free software; you can redistribute it and/or modify it
5
under the terms of the GNU General Public License as published by the
6
Free Software Foundation; either version 2, or (at your option) any
7
later version.
8
 
9
This program is distributed in the hope that it will be useful,
10
but WITHOUT ANY WARRANTY; without even the implied warranty of
11
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
GNU General Public License for more details.
13
 
14
You should have received a copy of the GNU General Public License
15
along with this program; if not, write to the Free Software
16
Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
17
 
18
/* Run this program as
19
   ./makeucnid ucnid.tab UnicodeData.txt DerivedNormalizationProps.txt \
20
       > ucnid.h
21
*/
22
 
23
#include <stdio.h>
24
#include <string.h>
25
#include <ctype.h>
26
#include <stdbool.h>
27
#include <stdlib.h>
28
 
29
enum {
30
  C99 = 1,
31
  CXX = 2,
32
  digit = 4,
33
  not_NFC = 8,
34
  not_NFKC = 16,
35
  maybe_not_NFC = 32
36
};
37
 
38
static unsigned flags[65536];
39
static unsigned short decomp[65536][2];
40
static unsigned char combining_value[65536];
41
 
42
/* Die!  */
43
 
44
static void
45
fail (const char *s)
46
{
47
  fprintf (stderr, "%s\n", s);
48
  exit (1);
49
}
50
 
51
/* Read ucnid.tab and set the C99 and CXX flags in header[].  */
52
 
53
static void
54
read_ucnid (const char *fname)
55
{
56
  FILE *f = fopen (fname, "r");
57
  unsigned fl = 0;
58
 
59
  if (!f)
60
    fail ("opening ucnid.tab");
61
  for (;;)
62
    {
63
      char line[256];
64
 
65
      if (!fgets (line, sizeof (line), f))
66
        break;
67
      if (strcmp (line, "[C99]\n") == 0)
68
        fl = C99;
69
      else if (strcmp (line, "[CXX]\n") == 0)
70
        fl = CXX;
71
      else if (isxdigit (line[0]))
72
        {
73
          char *l = line;
74
          while (*l)
75
            {
76
              unsigned long start, end;
77
              char *endptr;
78
              start = strtoul (l, &endptr, 16);
79
              if (endptr == l || (*endptr != '-' && ! isspace (*endptr)))
80
                fail ("parsing ucnid.tab [1]");
81
              l = endptr;
82
              if (*l != '-')
83
                end = start;
84
              else
85
                {
86
                  end = strtoul (l + 1, &endptr, 16);
87
                  if (end < start)
88
                    fail ("parsing ucnid.tab, end before start");
89
                  l = endptr;
90
                  if (! isspace (*l))
91
                    fail ("parsing ucnid.tab, junk after range");
92
                }
93
              while (isspace (*l))
94
                l++;
95
              if (end > 0xFFFF)
96
                fail ("parsing ucnid.tab, end too large");
97
              while (start <= end)
98
                flags[start++] |= fl;
99
            }
100
        }
101
    }
102
  if (ferror (f))
103
    fail ("reading ucnid.tab");
104
  fclose (f);
105
}
106
 
107
/* Read UnicodeData.txt and set the 'digit' flag, and
108
   also fill in the 'decomp' table to be the decompositions of
109
   characters for which both the character decomposed and all the code
110
   points in the decomposition are either C99 or CXX.  */
111
 
112
static void
113
read_table (char *fname)
114
{
115
  FILE * f = fopen (fname, "r");
116
 
117
  if (!f)
118
    fail ("opening UnicodeData.txt");
119
  for (;;)
120
    {
121
      char line[256];
122
      unsigned long codepoint, this_decomp[4];
123
      char *l;
124
      int i;
125
      int decomp_useful;
126
 
127
      if (!fgets (line, sizeof (line), f))
128
        break;
129
      codepoint = strtoul (line, &l, 16);
130
      if (l == line || *l != ';')
131
        fail ("parsing UnicodeData.txt, reading code point");
132
      if (codepoint > 0xffff || ! (flags[codepoint] & (C99 | CXX)))
133
        continue;
134
 
135
      do {
136
        l++;
137
      } while (*l != ';');
138
      /* Category value; things starting with 'N' are numbers of some
139
         kind.  */
140
      if (*++l == 'N')
141
        flags[codepoint] |= digit;
142
 
143
      do {
144
        l++;
145
      } while (*l != ';');
146
      /* Canonical combining class; in NFC/NFKC, they must be increasing
147
         (or zero).  */
148
      if (! isdigit (*++l))
149
        fail ("parsing UnicodeData.txt, combining class not number");
150
      combining_value[codepoint] = strtoul (l, &l, 10);
151
      if (*l++ != ';')
152
        fail ("parsing UnicodeData.txt, junk after combining class");
153
 
154
      /* Skip over bidi value.  */
155
      do {
156
        l++;
157
      } while (*l != ';');
158
 
159
      /* Decomposition mapping.  */
160
      decomp_useful = flags[codepoint];
161
      if (*++l == '<')  /* Compatibility mapping. */
162
        continue;
163
      for (i = 0; i < 4; i++)
164
        {
165
          if (*l == ';')
166
            break;
167
          if (!isxdigit (*l))
168
            fail ("parsing UnicodeData.txt, decomposition format");
169
          this_decomp[i] = strtoul (l, &l, 16);
170
          decomp_useful &= flags[this_decomp[i]];
171
          while (isspace (*l))
172
            l++;
173
        }
174
      if (i > 2)  /* Decomposition too long.  */
175
        fail ("parsing UnicodeData.txt, decomposition too long");
176
      if (decomp_useful)
177
        while (--i >= 0)
178
          decomp[codepoint][i] = this_decomp[i];
179
    }
180
  if (ferror (f))
181
    fail ("reading UnicodeData.txt");
182
  fclose (f);
183
}
184
 
185
/* Read DerivedNormalizationProps.txt and set the flags that say whether
186
   a character is in NFC, NFKC, or is context-dependent.  */
187
 
188
static void
189
read_derived (const char *fname)
190
{
191
  FILE * f = fopen (fname, "r");
192
 
193
  if (!f)
194
    fail ("opening DerivedNormalizationProps.txt");
195
  for (;;)
196
    {
197
      char line[256];
198
      unsigned long start, end;
199
      char *l;
200
      bool not_NFC_p, not_NFKC_p, maybe_not_NFC_p;
201
 
202
      if (!fgets (line, sizeof (line), f))
203
        break;
204
      not_NFC_p = (strstr (line, "; NFC_QC; N") != NULL);
205
      not_NFKC_p = (strstr (line, "; NFKC_QC; N") != NULL);
206
      maybe_not_NFC_p = (strstr (line, "; NFC_QC; M") != NULL);
207
      if (! not_NFC_p && ! not_NFKC_p && ! maybe_not_NFC_p)
208
        continue;
209
 
210
      start = strtoul (line, &l, 16);
211
      if (l == line)
212
        fail ("parsing DerivedNormalizationProps.txt, reading start");
213
      if (start > 0xffff)
214
        continue;
215
      if (*l == '.' && l[1] == '.')
216
        end = strtoul (l + 2, &l, 16);
217
      else
218
        end = start;
219
 
220
      while (start <= end)
221
        flags[start++] |= ((not_NFC_p ? not_NFC : 0)
222
                           | (not_NFKC_p ? not_NFKC : 0)
223
                           | (maybe_not_NFC_p ? maybe_not_NFC : 0)
224
                           );
225
    }
226
  if (ferror (f))
227
    fail ("reading DerivedNormalizationProps.txt");
228
  fclose (f);
229
}
230
 
231
/* Write out the table.
232
   The table consists of two words per entry.  The first word is the flags
233
   for the unicode code points up to and including the second word.  */
234
 
235
static void
236
write_table (void)
237
{
238
  unsigned i;
239
  unsigned last_flag = flags[0];
240
  bool really_safe = decomp[0][0] == 0;
241
  unsigned char last_combine = combining_value[0];
242
 
243
  for (i = 1; i <= 65536; i++)
244
    if (i == 65536
245
        || (flags[i] != last_flag && ((flags[i] | last_flag) & (C99 | CXX)))
246
        || really_safe != (decomp[i][0] == 0)
247
        || combining_value[i] != last_combine)
248
      {
249
        printf ("{ %s|%s|%s|%s|%s|%s|%s, %3d, %#06x },\n",
250
                last_flag & C99 ? "C99" : "  0",
251
                last_flag & digit ? "DIG" : "  0",
252
                last_flag & CXX ? "CXX" : "  0",
253
                really_safe ? "CID" : "  0",
254
                last_flag & not_NFC ? "  0" : "NFC",
255
                last_flag & not_NFKC ? "  0" : "NKC",
256
                last_flag & maybe_not_NFC ? "CTX" : "  0",
257
                combining_value[i - 1],
258
                i - 1);
259
        last_flag = flags[i];
260
        last_combine = combining_value[0];
261
        really_safe = decomp[i][0] == 0;
262
      }
263
}
264
 
265
/* Print out the huge copyright notice.  */
266
 
267
static void
268
write_copyright (void)
269
{
270
  static const char copyright[] = "\
271
/* Unicode characters and various properties.\n\
272
   Copyright (C) 2003, 2005 Free Software Foundation, Inc.\n\
273
\n\
274
   This program is free software; you can redistribute it and/or modify it\n\
275
   under the terms of the GNU General Public License as published by the\n\
276
   Free Software Foundation; either version 2, or (at your option) any\n\
277
   later version.\n\
278
\n\
279
   This program is distributed in the hope that it will be useful,\n\
280
   but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
281
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n\
282
   GNU General Public License for more details.\n\
283
\n\
284
   You should have received a copy of the GNU General Public License\n\
285
   along with this program; if not, write to the Free Software\n\
286
   Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.\n\
287
\n\
288
\n\
289
   Copyright (C) 1991-2005 Unicode, Inc.  All rights reserved.\n\
290
   Distributed under the Terms of Use in\n\
291
   http://www.unicode.org/copyright.html.\n\
292
\n\
293
   Permission is hereby granted, free of charge, to any person\n\
294
   obtaining a copy of the Unicode data files and any associated\n\
295
   documentation (the \"Data Files\") or Unicode software and any\n\
296
   associated documentation (the \"Software\") to deal in the Data Files\n\
297
   or Software without restriction, including without limitation the\n\
298
   rights to use, copy, modify, merge, publish, distribute, and/or\n\
299
   sell copies of the Data Files or Software, and to permit persons to\n\
300
   whom the Data Files or Software are furnished to do so, provided\n\
301
   that (a) the above copyright notice(s) and this permission notice\n\
302
   appear with all copies of the Data Files or Software, (b) both the\n\
303
   above copyright notice(s) and this permission notice appear in\n\
304
   associated documentation, and (c) there is clear notice in each\n\
305
   modified Data File or in the Software as well as in the\n\
306
   documentation associated with the Data File(s) or Software that the\n\
307
   data or software has been modified.\n\
308
\n\
309
   THE DATA FILES AND SOFTWARE ARE PROVIDED \"AS IS\", WITHOUT WARRANTY\n\
310
   OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE\n\
311
   WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n\
312
   NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE\n\
313
   COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR\n\
314
   ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY\n\
315
   DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,\n\
316
   WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS\n\
317
   ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE\n\
318
   OF THE DATA FILES OR SOFTWARE.\n\
319
\n\
320
   Except as contained in this notice, the name of a copyright holder\n\
321
   shall not be used in advertising or otherwise to promote the sale,\n\
322
   use or other dealings in these Data Files or Software without prior\n\
323
   written authorization of the copyright holder.  */\n";
324
 
325
   puts (copyright);
326
}
327
 
328
/* Main program.  */
329
 
330
int
331
main(int argc, char ** argv)
332
{
333
  if (argc != 4)
334
    fail ("too few arguments to makeucn");
335
  read_ucnid (argv[1]);
336
  read_table (argv[2]);
337
  read_derived (argv[3]);
338
 
339
  write_copyright ();
340
  write_table ();
341
  return 0;
342
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.