OpenCores
URL https://opencores.org/ocsvn/scarts/scarts/trunk

Subversion Repositories scarts

[/] [scarts/] [trunk/] [toolchain/] [scarts-gcc/] [gcc-4.1.1/] [gcc/] [java/] [gen-table.pl] - Blame information for rev 20

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 12 jlechner
#! /usr/bin/perl
2
 
3
#    Copyright (C) 2000, 2001, 2003 Free Software Foundation
4
 
5
#    This program is free software; you can redistribute it and/or modify
6
#    it under the terms of the GNU General Public License as published by
7
#    the Free Software Foundation; either version 2, or (at your option)
8
#    any later version.
9
 
10
#    This program is distributed in the hope that it will be useful,
11
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
12
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
#    GNU General Public License for more details.
14
 
15
#    You should have received a copy of the GNU General Public License
16
#    along with this program; if not, write to the Free Software
17
#    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18
#    02110-1301, USA.
19
 
20
# gen-table.pl - Generate tables for gcj from Unicode data.
21
# Usage: perl gen-table.pl DATA-FILE
22
#
23
# You can find the Unicode data file here:
24
#   ftp://www.unicode.org/Public/3.0-Update1/UnicodeData-3.0.1.txt
25
# Please update this URL when this program is used with a more
26
# recent version of the table.  Note that this table cannot be
27
# distributed with gcc.
28
# This program should not be re-run indiscriminately.  Care must be
29
# taken that what it generates is in sync with the Java specification.
30
 
31
# Names of fields in Unicode data table.
32
$CODE = 0;
33
$NAME = 1;
34
$CATEGORY = 2;
35
$COMBINING_CLASSES = 3;
36
$BIDI_CATEGORY = 4;
37
$DECOMPOSITION = 5;
38
$DECIMAL_VALUE = 6;
39
$DIGIT_VALUE = 7;
40
$NUMERIC_VALUE = 8;
41
$MIRRORED = 9;
42
$OLD_NAME = 10;
43
$COMMENT = 11;
44
$UPPER = 12;
45
$LOWER = 13;
46
$TITLE = 14;
47
 
48
# Start of special-cased gaps in Unicode data table.
49
%gaps = (
50
         0x4e00 => "CJK",
51
         0xac00 => "Hangul",
52
         0xd800 => "Unassigned High Surrogate",
53
         0xdb80 => "Private Use High Surrogate",
54
         0xdc00 => "Low Surrogate",
55
         0xe000 => "Private Use"
56
         );
57
 
58
# This lists control characters which are also considered whitespace.
59
# This is a somewhat odd list, taken from the JCL definition of
60
# Character.isIdentifierIgnorable.
61
%whitespace_controls =
62
    (
63
     0x0009 => 1,
64
     0x000a => 1,
65
     0x000b => 1,
66
     0x000c => 1,
67
     0x000d => 1,
68
     0x001c => 1,
69
     0x001d => 1,
70
     0x001e => 1,
71
     0x001f => 1
72
     );
73
 
74
open (INPUT, "< $ARGV[0]") || exit 1;
75
 
76
$last_code = -1;
77
while (<INPUT>)
78
{
79
    chop;
80
    @fields = split (';', $_, 30);
81
    if ($#fields != 14)
82
    {
83
        print STDERR "Entry for $fields[$CODE] has wrong number of fields\n";
84
    }
85
 
86
    $code = hex ($fields[$CODE]);
87
    last if $code > 0xffff;
88
    if ($code > $last_code + 1)
89
    {
90
        # Found a gap.
91
        if (defined $gaps{$code})
92
        {
93
            # Fill the gap with the last character read.
94
            @gfields = @fields;
95
        }
96
        else
97
        {
98
            # The gap represents undefined characters.  Only the type
99
            # matters.
100
            @gfields = ('', '', 'Cn', '0', '', '', '', '', '', '', '',
101
                        '', '', '', '');
102
        }
103
        for (++$last_code; $last_code < $code; ++$last_code)
104
        {
105
            $gfields{$CODE} = sprintf ("%04x", $last_code);
106
            &process_one ($last_code, @gfields);
107
        }
108
    }
109
    &process_one ($code, @fields);
110
    $last_code = $code;
111
}
112
 
113
close (INPUT);
114
 
115
@gfields = ('', '', 'Cn', '0', '', '', '', '', '', '', '',
116
            '', '', '', '');
117
for (++$last_code; $last_code < 0x10000; ++$last_code)
118
{
119
    $gfields{$CODE} = sprintf ("%04x", $last_code);
120
    &process_one ($last_code, @gfields);
121
}
122
--$last_code;                   # Want last to be 0xFFFF.
123
 
124
&print_tables ($last_code);
125
 
126
exit 0;
127
 
128
# Process a single character.
129
sub process_one
130
{
131
    my ($code, @fields) = @_;
132
 
133
    my @value = ();
134
    my $type = $fields[$CATEGORY];
135
 
136
    # See if the character is a valid identifier start.
137
    if ($type =~ /L./           # Letter
138
        || $type eq 'Pc'        # Connecting punctuation
139
        || $type eq 'Sc')       # Currency symbol
140
    {
141
        push (@value, 'LETTER_START');
142
    }
143
 
144
    # See if the character is a valid identifier member.
145
    if ($type =~ /L./           # Letter
146
        || $type eq 'Pc'        # Connecting punctuation
147
        || $type eq 'Sc'        # Currency symbol
148
        || $type =~ /N[dl]/     # Number: decimal or letter
149
        || $type =~ /M[nc]/     # Mark: non-spacing or combining
150
        || ($type eq 'Cc'       # Certain controls
151
            && ! defined $whitespace_controls{$code})
152
        || ($code >= 0x200c     # Join controls
153
            && $code <= 0x200f)
154
        || ($code >= 0x202a     # Bidi controls -- note that there
155
                                # is a typo in the JCL where these are
156
                                # concerned.
157
            && $code <= 0x202e)
158
        || ($code >= 0x206a     # Format controls
159
            && $code <= 0x206f)
160
        || $code == 0xfeff)     # ZWNBSP
161
    {
162
        push (@value, 'LETTER_PART');
163
    }
164
 
165
    if (($type =~ /Z./
166
         # Java treats some values specially as non-spaces.
167
         && $code != 0x00a0
168
         && $code != 0x2007
169
         && $code != 0x202f)
170
        # And for our purposes there are some that should be specially
171
        # treated as spaces.
172
        || $code == 0x000b
173
        || ($code >= 0x001c && $code <= 0x001f))
174
    {
175
        push (@value, 'LETTER_SPACE');
176
    }
177
 
178
    if (! @value)
179
    {
180
        $value = '0';
181
    }
182
    else
183
    {
184
        $value = '(' . join (' | ', @value) . ')';
185
    }
186
 
187
    $map[$code] = $value;
188
}
189
 
190
sub print_tables
191
{
192
    my ($last) = @_;
193
 
194
    local ($bytes_out) = 0;
195
 
196
    open (OUT, "> chartables.h");
197
 
198
    print OUT "/* This file is automatically generated.  DO NOT EDIT!\n";
199
    print OUT "   Instead, edit gen-table.pl and re-run.  */\n\n";
200
 
201
    print OUT "#ifndef GCC_CHARTABLES_H\n";
202
    print OUT "#define GCC_CHARTABLES_H\n\n";
203
 
204
    print OUT "#define LETTER_START 1\n";
205
    print OUT "#define LETTER_PART  2\n";
206
    print OUT "#define LETTER_SPACE 4\n\n";
207
    print OUT "#define LETTER_MASK  7\n\n";
208
 
209
    for ($count = 0; $count <= $last; $count += 256)
210
    {
211
        $row[$count / 256] = &print_row ($count, '(char *) ', 'const char', 1,
212
                                         'page');
213
    }
214
 
215
    print OUT "static const char *const type_table[256] = {\n";
216
    for ($count = 0; $count <= $last; $count += 256)
217
    {
218
        print OUT ",\n" if $count > 0;
219
        print OUT "  ", $row[$count / 256];
220
        $bytes_out += 4;
221
    }
222
    print OUT "\n};\n\n";
223
 
224
    print OUT "#endif /* ! GCC_CHARTABLES_H */\n";
225
 
226
    close (OUT);
227
 
228
    printf "Generated %d bytes\n", $bytes_out;
229
}
230
 
231
# Print a single "row" of a two-level table.
232
sub print_row
233
{
234
    my ($start, $def_pfx, $typname, $typsize, $name) = @_;
235
 
236
    my ($i);
237
    my (@values);
238
    my ($flag) = 1;
239
    my ($off);
240
    for ($off = 0; $off < 256; ++$off)
241
    {
242
        $values[$off] = $map[$off + $start];
243
        if ($values[$off] ne $values[0])
244
        {
245
            $flag = 0;
246
        }
247
    }
248
    if ($flag)
249
    {
250
        return $def_pfx . $values[0];
251
    }
252
 
253
    printf OUT "static %s %s%d[256] = {\n  ", $typname, $name, $start / 256;
254
    my ($column) = 2;
255
    for ($i = $start; $i < $start + 256; ++$i)
256
    {
257
        print OUT ", "
258
            if $i > $start;
259
        my ($text) = $values[$i - $start];
260
        if (length ($text) + $column + 2 > 78)
261
        {
262
            print OUT "\n  ";
263
            $column = 2;
264
        }
265
        print OUT $text;
266
        $column += length ($text) + 2;
267
    }
268
    print OUT "\n};\n\n";
269
 
270
    $bytes_out += 256 * $typsize;
271
 
272
    return sprintf "%s%d", $name, $start / 256;
273
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.