OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libjava/] [scripts/] [unicode-to-chartables.pl] - Blame information for rev 775

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 762 jeremybenn
#!/usr/bin/perl -w
2
# unicode-to-chartables.pl -- generate Unicode database for java.lang.Character
3
# Copyright (C) 1998, 2002, 2004, 2006  Free Software Foundation, Inc.
4
#
5
# This file is part of GNU Classpath.
6
#
7
# GNU Classpath is free software; you can redistribute it and/or modify
8
# it under the terms of the GNU General Public License as published by
9
# the Free Software Foundation; either version 2, or (at your option)
10
# any later version.
11
#
12
# GNU Classpath is distributed in the hope that it will be useful, but
13
# WITHOUT ANY WARRANTY; without even the implied warranty of
14
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
# General Public License for more details.
16
#
17
# You should have received a copy of the GNU General Public License
18
# along with GNU Classpath; see the file COPYING.  If not, write to the
19
# Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20
# 02110-1301 USA.
21
#
22
# Linking this library statically or dynamically with other modules is
23
# making a combined work based on this library.  Thus, the terms and
24
# conditions of the GNU General Public License cover the whole
25
# combination.
26
#
27
# As a special exception, the copyright holders of this library give you
28
# permission to link this library with independent modules to produce an
29
# executable, regardless of the license terms of these independent
30
# modules, and to copy and distribute the resulting executable under
31
# terms of your choice, provided that you also meet, for each linked
32
# independent module, the terms and conditions of the license of that
33
# module.  An independent module is a module which is not derived from
34
# or based on this library.  If you modify this library, you may extend
35
# this exception to your version of the library, but you are not
36
# obligated to do so.  If you do not wish to do so, delete this
37
# exception statement from your version.
38
 
39
# Code for reading UnicodeData-4.0.0.txt and SpecialCasing-4.0.0.txt to generate
40
# the code for java-chartables.h. The relevant files can be found here:
41
#
42
#   http://www.unicode.org/Public/4.0-Update/UnicodeData-4.0.0.txt
43
#   http://www.unicode.org/Public/4.0-Update/SpecialCasing-4.0.0.txt
44
#
45
# Inspired by code from Jochen Hoenicke.
46
# author Eric Blake <ebb9@email.byu.edu>
47
# Unicode 4.0.0 support by Anthony Balkissoon <abalkiss@redhat.com>
48
#
49
# Usage: ./unicode-to-chartables.pl <UnicodeData> <SpecialCasing> <tables>
50
#   where <UnicodeData.txt> is obtained from www.unicode.org (named
51
#   UnicodeData-4.0.0.txt for Unicode version 4.0.0), <SpecialCasing>
52
#   is obtained from www.unicode too (named SpecialCasing-4.0.0.txt for Unicode
53
#   version 4.0.0), and <tables> is the final location for the header file
54
#   java-chartables.h. As of JDK 1.5, use Unicode version 4.0.0
55
#   for best results.
56
 
57
 
58
##
59
## Return the given variable interpreted as a 16 bit signed number.
60
##
61
sub cShort($) {
62
    my ($char) = @_;
63
    return unpack "s", pack "I", $char;
64
}
65
 
66
##
67
## Convert the text UnicodeData file from www.unicode.org into a header file
68
## interface with arrays holding the compressed information.
69
##
70
my @TYPECODES = qw(Cn Lu Ll Lt Lm Lo Mn Me Mc Nd Nl No Zs Zl Zp Cc Cf
71
                   SKIPPED Co Cs Pd Ps Pe Pc Po Sm Sc Sk So Pi Pf);
72
my @DIRCODES = qw(L R AL EN ES ET AN CS NSM BN B S WS ON LRE LRO RLE RLO PDF);
73
 
74
my $NOBREAK_FLAG  = 32;
75
my $MIRRORED_FLAG = 64;
76
 
77
my %special = ();
78
 
79
# infoArray is an array where each element is a list of character information
80
# for characters in a plane.  The index of each list is equal to the plane 
81
# that it corresponds to even though most of these lists will currently be
82
# empty.  This is done so that that this script can be easily modified to 
83
# accomodate future versions of Unicode.
84
my @infoArray = \((), (), (), (), (), (), (), (),
85
    (), (), (), (), (), (), (), (), ());
86
 
87
# info is a reference to one of the lists in infoArray, depending on which 
88
# plane we're currently parsing.
89
my $info;
90
 
91
# largeNums is an array of numerical values that are too large to fit 
92
# into the 16 bit char where most numerical values are stored.  
93
# What is stored in the char then is a number N such that (-N - 3) is 
94
# the index into largeNums where the numerical value can be found.
95
my @largeNums = ();
96
 
97
my $titlecase = "";
98
my $count = 0;
99
my $range = 0;
100
 
101
die "Usage: $0 <UnicodeData.txt> <SpecialCasing.txt> <java-chartables.h>"
102
    unless @ARGV == 3;
103
$| = 1;
104
print "GNU Classpath Unicode Attribute Database Generator 2.1\n";
105
print "Copyright (C) 1998, 2002 Free Software Foundation, Inc.\n";
106
 
107
 
108
################################################################################
109
################################################################################
110
# Stage 0: Parse the special casing file
111
print "Parsing special casing file\n";
112
open (SPECIAL, "< $ARGV[1]") || die "Can't open special casing file: $!\n";
113
while (<SPECIAL>) {
114
    next if /^\#/;
115
    my ($ch, undef, undef, $upper) = split / *; */;
116
 
117
    # This grabs only the special casing for multi-char uppercase. Note that
118
    # there are no multi-char lowercase, and that Sun ignores multi-char
119
    # titlecase rules. This script omits 3 special cases in Unicode 3.0.0,
120
    # which must be hardcoded in java.lang.String:
121
    #  \u03a3 (Sun ignores this special case)
122
    #  \u0049 - lowercases to \u0131, but only in Turkish locale
123
    #  \u0069 - uppercases to \u0130, but only in Turkish locale
124
    next unless defined $upper and $upper =~ / /;
125
    $special{hex $ch} = [map {hex} split ' ', $upper];
126
}
127
 
128
close SPECIAL;
129
 
130
 
131
################################################################################
132
################################################################################
133
## Stage 1: Parse the attribute file
134
print "Parsing attributes file";
135
open (UNICODE, "< $ARGV[0]") || die "Can't open Unicode attribute file: $!\n";
136
while (<UNICODE>) {
137
    print "." unless $count++ % 1000;
138
    chomp;
139
    s/\r//g;
140
    my ($ch, $name, $category, undef, $bidir, $decomp, undef, undef, $numeric,
141
        $mirrored, undef, undef, $upcase, $lowcase, $title) = split ';';
142
    $ch = hex($ch);
143
 
144
    # plane tells us which Unicode code plane we're currently in and is an
145
    # index into infoArray.
146
    my $plane = int($ch / 0x10000);
147
    my $planeBase = $plane * 0x10000;
148
    $info = \@{$infoArray[$plane]};
149
 
150
    my ($type, $numValue, $upperchar, $lowerchar, $direction);
151
 
152
    $type = 0;
153
    while ($category !~ /^$TYPECODES[$type]$/) {
154
        if (++$type == @TYPECODES) {
155
            die "$ch: Unknown type: $category";
156
        }
157
    }
158
    $type |= $NOBREAK_FLAG if ($decomp =~ /noBreak/);
159
    $type |= $MIRRORED_FLAG if ($mirrored =~ /Y/);
160
 
161
    if ($numeric =~ /^[0-9]+$/) {
162
        $numValue = $numeric;
163
        # If numeric takes more than 16 bits to store we want to store that 
164
        # number in a separate array and store a number N in numValue such 
165
        # that (-N - 3) is the offset into the separate array containing the
166
        # large numerical value.
167
        if ($numValue >= 0x7fff) {
168
            $numValue = -3 - @largeNums;
169
            push @largeNums, $numeric;
170
        }
171
    } elsif ($numeric eq "") {
172
        # Special case sequences of 'a'-'z'
173
        if ($ch >= 0x0041 && $ch <= 0x005a) {
174
            $numValue = $ch - 0x0037;
175
        } elsif ($ch >= 0x0061 && $ch <= 0x007a) {
176
            $numValue = $ch - 0x0057;
177
        } elsif ($ch >= 0xff21 && $ch <= 0xff3a) {
178
            $numValue = $ch - 0xff17;
179
        } elsif ($ch >= 0xff41 && $ch <= 0xff5a) {
180
            $numValue = $ch - 0xff37;
181
        } else {
182
            $numValue = -1;
183
        }
184
    } else {
185
        $numValue = -2;
186
    }
187
 
188
    $upperchar = $upcase ? hex($upcase) - $ch : 0;
189
    $lowerchar = $lowcase ? hex($lowcase) - $ch : 0;
190
    if ($title ne $upcase) {
191
        my $titlechar = $title ? hex($title) : $ch;
192
        $titlecase .= pack("n2", $ch, $titlechar);
193
    }
194
 
195
    $direction = 0;
196
    while ($bidir !~ /^$DIRCODES[$direction]$/) {
197
        if (++$direction == @DIRCODES) {
198
            $direction = -1;
199
            last;
200
        }
201
    }
202
    $direction <<= 2;
203
    $direction += $#{$special{$ch}} if defined $special{$ch};
204
 
205
    if ($range) {
206
        die "Expecting end of range at $ch\n" unless $name =~ /Last>$/;
207
        for ($range + 1 .. $ch - 1) {
208
            $info->[$_ - $planeBase] = pack("n5", $type, $numValue, $upperchar,
209
                             $lowerchar, $direction);
210
        }
211
        $range = 0;
212
    } elsif ($name =~ /First>$/) {
213
        $range = $ch;
214
    }
215
    # Store all this parsed information into the element in infoArray that info
216
    # points to.
217
    $info->[$ch - $planeBase] = pack("n5", $type, $numValue, $upperchar, $lowerchar,
218
                      $direction);
219
}
220
close UNICODE;
221
 
222
 
223
################################################################################
224
################################################################################
225
## Stage 2: Compress the data structures
226
printf "\nCompressing data structures";
227
$count = 0;
228
 
229
# data is a String that will be used to create the DATA String containing 
230
# character information and offsets into the attribute tables.
231
my @data = ();
232
 
233
# charhashArray is an array of hashtables used so that we can reuse character
234
# attributes when characters share the same attributes ... this makes our
235
# attribute tables smaller.  charhash is a pointer into this array.
236
my @charhashArray = ({}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {});
237
my $charhash = ();
238
 
239
# charinfoArray is an array of arrays, one per plane, for storing character 
240
# information.  charinfo is a pointer into this array.
241
my @charinfoArray = \((), (), (), (), (), (), (), (),
242
    (), (), (), (), (), (), (), (), ());
243
my $charinfo;
244
 
245
# charlen is an array, one element per plane, that tells us how many unique
246
# character attributes there are for that plane.
247
my @charlen = ();
248
 
249
for my $plane (0 .. 0x10) {
250
    $info = \@{$infoArray[$plane]};
251
    my $planeBase = $plane * 0x10000;
252
    $charhash = \%{$charhashArray[$plane]};
253
    $charinfo = \@{$charinfoArray[$plane]};
254
 
255
    for my $ch ($planeBase .. $planeBase + 0xffff) {
256
        my $index = $ch - $planeBase;
257
        print "." unless $count++ % 0x1000;
258
        $info->[$index] = pack("n5", 0, -1, 0, 0, -4) unless defined $info->[$index];
259
 
260
        my ($type, $numVal, $upper, $lower, $direction) = unpack("n5", $info->[$index]);
261
        if (! exists $charhash->{$info->[$index]}) {
262
            # If we entered this loop that means the character we're looking at 
263
            # now has attributes that are unique from those that we've looked
264
            # at so far for this plane.  So we push its attributes into charinfo
265
            # and store in charhash the offset into charinfo where these
266
            # attributes can later be found.
267
            push @{$charinfo}, [ $numVal, $upper, $lower, $direction ];
268
            $charhash->{$info->[$index]} = @{$charinfo} - 1;
269
            # When the file is generaged, the number we just stored in charhas
270
            # will be the upper 9 bits in the DATA String that are an offset
271
            # into the attribute tables.
272
        }
273
        $data[$plane] .= pack("n", ($charhash->{$info->[$index]} << 7) | $type);
274
    }
275
    $charlen[$plane] = scalar(@{$charinfoArray[$plane]});
276
}
277
 
278
# the shift that results in the best compression of the table.  This is an array
279
# because different shifts are better for the different tables for each plane.
280
my @bestshift;
281
 
282
# an initial guess.
283
my $bestest = 1000000;
284
my @bestblkstr;
285
my @blksize = ();
286
 
287
for my $plane (0 .. 0x10) {
288
    print "\n\nplane: $plane\n";
289
    print "Unique character entries: $charlen[$plane]\n";
290
    $bestest = 1000000;
291
    for my $i (3 .. 8) {
292
        my $blksize = 1 << $i;
293
        my %blocks = ();
294
        my @blkarray = ();
295
        my ($j, $k);
296
        print "shift: $i";
297
 
298
        for ($j = 0; $j < 0x10000; $j += $blksize) {
299
            my $blkkey = substr $data[$plane], 2 * $j, 2 * $blksize;
300
            if (! exists $blocks{$blkkey}) {
301
                push @blkarray, $blkkey;
302
                $blocks{$blkkey} = $#blkarray;
303
            }
304
        }
305
 
306
        my $blknum = @blkarray;
307
        my $blocklen = $blknum * $blksize;
308
        printf " before %5d", $blocklen;
309
 
310
        # Now we try to pack the blkarray as tight as possible by finding matching
311
        # heads and tails.
312
        for ($j = $blksize - 1; $j > 0; $j--) {
313
            my %tails = ();
314
            for $k (0 .. $#blkarray) {
315
                next unless defined $blkarray[$k];
316
                my $len = length $blkarray[$k];
317
                my $tail = substr $blkarray[$k], $len - $j * 2;
318
                if (exists $tails{$tail}) {
319
                    push @{$tails{$tail}}, $k;
320
                } else {
321
                    $tails{$tail} = [ $k ];
322
                }
323
            }
324
 
325
            # tails are calculated, now calculate the heads and merge.
326
          BLOCK:
327
            for $k (0 .. $#blkarray) {
328
                next unless defined $blkarray[$k];
329
                my $tomerge = $k;
330
                while (1) {
331
                    my $head = substr($blkarray[$tomerge], 0, $j * 2);
332
                    my $entry = $tails{$head};
333
                    next BLOCK unless defined $entry;
334
 
335
                    my $other = shift @{$entry};
336
                    if ($other == $tomerge) {
337
                        if (@{$entry}) {
338
                            push @{$entry}, $other;
339
                            $other = shift @{$entry};
340
                        } else {
341
                            push @{$entry}, $other;
342
                            next BLOCK;
343
                        }
344
                    }
345
                    if (@{$entry} == 0) {
346
                        delete $tails{$head};
347
                    }
348
 
349
                    # a match was found
350
                    my $merge = $blkarray[$other]
351
                        . substr($blkarray[$tomerge], $j * 2);
352
                    $blocklen -= $j;
353
                    $blknum--;
354
 
355
                    if ($other < $tomerge) {
356
                        $blkarray[$tomerge] = undef;
357
                        $blkarray[$other] = $merge;
358
                        my $len = length $merge;
359
                        my $tail = substr $merge, $len - $j * 2;
360
                        $tails{$tail} = [ map { $_ == $tomerge ? $other : $_ }
361
                                          @{$tails{$tail}} ];
362
                        next BLOCK;
363
                    }
364
                    $blkarray[$tomerge] = $merge;
365
                    $blkarray[$other] = undef;
366
                }
367
            }
368
        }
369
        my $blockstr;
370
        for $k (0 .. $#blkarray) {
371
            $blockstr .= $blkarray[$k] if defined $blkarray[$k];
372
        }
373
 
374
        die "Unexpected $blocklen" if length($blockstr) != 2 * $blocklen;
375
        my $estimate = 2 * $blocklen + (0x20000 >> $i);
376
 
377
        printf " after merge %5d: %6d bytes\n", $blocklen, $estimate;
378
        if ($estimate < $bestest) {
379
            $bestest = $estimate;
380
            $bestshift[$plane] = $i;
381
            $bestblkstr[$plane] = $blockstr;
382
        }
383
    }
384
    $blksize[$plane] = 1 << $bestshift[$plane];
385
    print "best shift: ", $bestshift[$plane];
386
    print "     blksize: ", $blksize[$plane];
387
}
388
my @blocksArray = \((), (), (), (), (), (), (), (),
389
    (), (), (), (), (), (), (), (), ());
390
 
391
for my $plane (0 .. 0x10) {
392
    for (my $j = 0; $j < 0x10000; $j += $blksize[$plane]) {
393
        my $blkkey = substr $data[$plane], 2 * $j, 2 * $blksize[$plane];
394
        my $index = index $bestblkstr[$plane], $blkkey;
395
        while ($index & 1) {
396
            die "not found: $j" if $index == -1;
397
            $index = index $bestblkstr[$plane], $blkkey, $index + 1;
398
        }
399
        push @{$blocksArray[$plane]}, ($index / 2 - $j) & 0xffff;
400
    }
401
}
402
 
403
 
404
################################################################################
405
################################################################################
406
## Stage 3: Generate the file
407
for my $plane (0 .. 0x10) {
408
    die "UTF-8 limit of blocks may be exceeded for plane $plane: " . scalar(@{$blocksArray[$plane]}) . "\n"
409
        if @{$blocksArray[$plane]} > 0xffff / 3;
410
    die "UTF-8 limit of data may be exceeded for plane $plane: " . length($bestblkstr[$plane]) . "\n"
411
        if length($bestblkstr[$plane]) > 0xffff / 3;
412
}
413
 
414
{
415
    print "\nGenerating $ARGV[2].";
416
    my ($i, $j);
417
 
418
    open OUTPUT, "> $ARGV[2]" or die "Failed creating output file: $!\n";
419
    print OUTPUT <<EOF;
420
/* java-chartables.h -- Character tables for java.lang.Character -*- c++ -*-
421
   Copyright (C) 2002, 2006 Free Software Foundation, Inc.
422
   *** This file is generated by scripts/unicode-to-chartables.pl ***
423
 
424
This file is part of GNU Classpath.
425
 
426
GNU Classpath is free software; you can redistribute it and/or modify
427
it under the terms of the GNU General Public License as published by
428
the Free Software Foundation; either version 2, or (at your option)
429
any later version.
430
 
431
GNU Classpath is distributed in the hope that it will be useful, but
432
WITHOUT ANY WARRANTY; without even the implied warranty of
433
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
434
General Public License for more details.
435
 
436
You should have received a copy of the GNU General Public License
437
along with GNU Classpath; see the file COPYING.  If not, write to the
438
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
439
02110-1301 USA.
440
 
441
Linking this library statically or dynamically with other modules is
442
making a combined work based on this library.  Thus, the terms and
443
conditions of the GNU General Public License cover the whole
444
combination.
445
 
446
As a special exception, the copyright holders of this library give you
447
permission to link this library with independent modules to produce an
448
executable, regardless of the license terms of these independent
449
modules, and to copy and distribute the resulting executable under
450
terms of your choice, provided that you also meet, for each linked
451
independent module, the terms and conditions of the license of that
452
module.  An independent module is a module which is not derived from
453
or based on this library.  If you modify this library, you may extend
454
this exception to your version of the library, but you are not
455
obligated to do so.  If you do not wish to do so, delete this
456
exception statement from your version. */
457
 
458
#ifndef __JAVA_CHARTABLES_H__
459
#define __JAVA_CHARTABLES_H__
460
 
461
// These tables are automatically generated by scripts/unicode_to_chartables.pl.
462
// The Unicode data comes from www.unicode.org; this header is based on
463
// UnicodeData-4.0.0.txt. JDK 1.5 uses Unicode version 4.0.0.
464
// DO NOT EDIT the tables.  Instead, fix the upstream scripts and run
465
// them again.
466
 
467
// The data is stored in C style arrays of the appropriate CNI types, to
468
// guarantee that the data is constant and non-relocatable.  The field
469
// <code>blocks</code> stores the offset of a block of 2<sup>SHIFT</sup>
470
// characters within <code>data</code>. The data field, in turn, stores
471
// information about each character in the low order bits, and an offset
472
// into the attribute tables <code>upper</code>, <code>lower</code>,
473
// <code>numValue</code>, and <code>direction</code>.  Notice that the
474
// attribute tables are much smaller than 0xffff entries; as many characters
475
// in Unicode share common attributes.  Finally, there is a listing for
476
// <code>title</code> exceptions (most characters just have the same title
477
// case as upper case).
478
 
479
// This file should only be included by natCharacter.cc
480
 
481
/**
482
 * The array containing the numeric values that are too large to be stored as
483
 * chars in NUM_VALUE.  NUM_VALUE in this case will contain a negative integer
484
 * N such that LARGENUMS[-N - 3] contains the correct numeric value.
485
 */
486
EOF
487
  print OUTPUT "static const jint largenums[] = {\n    ";
488
  for ($i = 0; $i < @largeNums; $i++) {
489
      print OUTPUT $largeNums[$i], ", ";
490
  }
491
  print OUTPUT "}";
492
  print OUTPUT <<EOF;
493
;
494
 
495
/**
496
 * The character shift amount to look up the block offset. In other words,
497
 * <code>(char) (blocks[p][off >> SHIFT[p]] + off)</code> is the index where
498
 * <code>ch</code> is described in <code>data</code>, where <code>off</code>
499
 * is ch & 0xffff and <code>p</code> is the plane the character belongs to.
500
 */
501
EOF
502
  print OUTPUT "static const int shift[] = {\n    ";
503
  for ($i = 0; $i < @bestshift; $i++) {
504
      print OUTPUT $bestshift[$i], ", ";
505
  }
506
  print OUTPUT "}";
507
  print OUTPUT <<EOF;
508
;
509
 
510
/**
511
 * The mapping of character blocks to their location in <code>data</code>.
512
 * Each entry has been adjusted so that a modulo 16 sum with the desired
513
 * character gives the actual index into <code>data</code>.
514
 */
515
EOF
516
  for ($plane = 0; $plane <= 0x10; $plane++) {
517
      # The following if statement handles the cases of unassigned planes
518
      # specially so we don't waste space with unused Strings.  As of 
519
      # Unicode version 4.0.0 only planes 0, 1, 2, and 14 are used.  If
520
      # you are updating this script to work with a later version of 
521
      # Unicode you may have to alter this if statement.
522
      next if ($plane > 2 && $plane != 14) ;
523
 
524
      print OUTPUT "static const jchar blocks", $plane, "[] = {\n";
525
      for ($i = 0; $i < @{$blocksArray[$plane]} / 10; $i++) {
526
          print OUTPUT "    ";
527
          for $j (0 .. 9) {
528
              last if @{$blocksArray[$plane]} <= $i * 10 + $j;
529
              my $val = $blocksArray[$plane]->[$i * 10 + $j];
530
              print OUTPUT $val, ", ";
531
          }
532
          print OUTPUT "\n";
533
      }
534
      print OUTPUT "};\n\n";
535
  }
536
  print OUTPUT "static const int blocks_length[] = {\n    ";
537
  for ($plane = 0; $plane <= 0x10; $plane++) {
538
      if ($plane > 2 && $plane != 14){
539
          print OUTPUT "-1, ";
540
      }
541
      else {
542
          print OUTPUT scalar(@{$blocksArray[$plane]}), ", ";
543
      }
544
  }
545
  print OUTPUT "};\n";
546
  print OUTPUT <<EOF;
547
static const jchar* blocks[] = {
548
    blocks0, blocks1, blocks2, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
549
    NULL, NULL, NULL, NULL, blocks14, NULL, NULL};
550
 
551
/**
552
 * Information about each character.  The low order 5 bits form the
553
 * character type, the next bit is a flag for non-breaking spaces, and the
554
 * next bit is a flag for mirrored directionality.  The high order 9 bits
555
 * form the offset into the attribute tables.  Note that this limits the
556
 * number of unique character attributes per plane to 512, which is not a
557
 * problem as of Unicode version 4.0.0, but may soon become one.
558
 */
559
EOF
560
  for ($plane = 0; $plane <= 0x10; $plane++) {
561
      # The following if statement handles the cases of unassigned planes
562
      # specially so we don't waste space with unused Strings.  As of 
563
      # Unicode version 4.0.0 only planes 0, 1, 2, and 14 are used.  If
564
      # you are updating this script to work with a later version of 
565
      # Unicode you may have to alter this if statement.
566
      next if ($plane > 2 && $plane != 14);
567
 
568
      print OUTPUT "static const jchar data", $plane, "[] = {\n";
569
      my $len = length($bestblkstr[$plane]) / 2;
570
      for ($i = 0; $i < $len / 10; $i++) {
571
          print OUTPUT "    ";
572
          for $j (0 .. 9) {
573
              last if $len <= $i * 10 + $j;
574
              my $val = unpack "n", substr($bestblkstr[$plane], 2 * ($i * 10 + $j), 2);
575
              print OUTPUT $val, ", ";
576
          }
577
          print OUTPUT "\n";
578
      }
579
      print OUTPUT "};\n\n";
580
  }
581
  print OUTPUT "static const int data_length[] = {\n    ";
582
  for ($plane = 0; $plane <= 0x10; $plane++) {
583
      if ($plane > 2 && $plane != 14){
584
          print OUTPUT "-1, ";
585
      }
586
      else {
587
          print OUTPUT length($bestblkstr[$plane]) / 2, ", ";
588
      }
589
  }
590
  print OUTPUT "};\n";
591
  print OUTPUT <<EOF;
592
static const jchar* data[] = {
593
    data0, data1, data2, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
594
    NULL, NULL, NULL, NULL, data14, NULL, NULL};
595
 
596
 
597
/**
598
 * This is the attribute table for computing the numeric value of a
599
 * character.  The value is -1 if Unicode does not define a value, -2
600
 * if the value is not a positive integer, otherwise it is the value.
601
 */
602
EOF
603
  for ($plane = 0; $plane <= 0x10; $plane++) {
604
      # The following if statement handles the cases of unassigned planes
605
      # specially so we don't waste space with unused Strings.  As of 
606
      # Unicode version 4.0.0 only planes 0, 1, 2, and 14 are used.  If
607
      # you are updating this script to work with a later version of 
608
      # Unicode you may have to alter this if statement.
609
      next if ($plane > 2 && $plane != 14);
610
 
611
      print OUTPUT "static const jshort numValue", $plane, "[] = {\n";
612
      $len = @{$charinfoArray[$plane]};
613
      for ($i = 0; $i < $len / 13; $i++) {
614
          print OUTPUT "    ";
615
          for $j (0 .. 12) {
616
              last if $len <= $i * 13 + $j;
617
              my $val = $charinfoArray[$plane]->[$i * 13 + $j][0];
618
              print OUTPUT cShort($val), ", ";
619
          }
620
          print OUTPUT "\n";
621
      }
622
      print OUTPUT "};\n\n";
623
  }
624
  print OUTPUT "static const int numValue_length[] = {\n    ";
625
  for ($plane = 0; $plane <= 0x10; $plane++) {
626
      if ($plane > 2 && $plane != 14){
627
          print OUTPUT "-1, ";
628
      }
629
      else {
630
          print OUTPUT scalar(@{$charinfoArray[$plane]}), ", ";
631
      }
632
  }
633
  print OUTPUT "};\n";
634
  print OUTPUT <<EOF;
635
static const jshort* numValue[] = {
636
    numValue0, numValue1, numValue2, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
637
    NULL, NULL, NULL, NULL, numValue14, NULL, NULL};
638
 
639
 
640
 
641
/**
642
 * This is the attribute table for computing the uppercase representation
643
 * of a character.  The value is the difference between the character and
644
 * its uppercase version.
645
 */
646
EOF
647
  for ($plane = 0; $plane <= 0x10; $plane++) {
648
      # The following if statement handles the cases of unassigned planes
649
      # specially so we don't waste space with unused Strings.  As of 
650
      # Unicode version 4.0.0 only planes 0, 1, 2, and 14 are used.  If
651
      # you are updating this script to work with a later version of 
652
      # Unicode you may have to alter this if statement.
653
      next if ($plane > 2 && $plane != 14);
654
 
655
      print OUTPUT "static const jshort upper", $plane, "[] = {\n";
656
      $len = @{$charinfoArray[$plane]};
657
      for ($i = 0; $i < $len / 13; $i++) {
658
          print OUTPUT "    ";
659
          for $j (0 .. 12) {
660
              last if $len <= $i * 13 + $j;
661
              my $val = $charinfoArray[$plane]->[$i * 13 + $j][1];
662
              print OUTPUT cShort($val), ", ";
663
          }
664
          print OUTPUT "\n";
665
      }
666
      print OUTPUT "};\n\n";
667
  }
668
  print OUTPUT "static const int upper_length[] = {\n    ";
669
  for ($plane = 0; $plane <= 0x10; $plane++) {
670
      if ($plane > 2 && $plane != 14){
671
          print OUTPUT "-1, ";
672
      }
673
      else {
674
          print OUTPUT scalar(@{$charinfoArray[$plane]}), ", ";
675
      }
676
  }
677
  print OUTPUT "};\n";
678
  print OUTPUT <<EOF;
679
static const jshort* upper[] = {
680
    upper0, upper1, upper2, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
681
    NULL, NULL, NULL, NULL, upper14, NULL, NULL};
682
 
683
 
684
/**
685
 * This is the attribute table for computing the lowercase representation
686
 * of a character.  The value is the difference between the character and
687
 * its lowercase version.
688
 */
689
EOF
690
  for ($plane = 0; $plane <= 0x10; $plane++) {
691
      # The following if statement handles the cases of unassigned planes
692
      # specially so we don't waste space with unused Strings.  As of 
693
      # Unicode version 4.0.0 only planes 0, 1, 2, and 14 are used.  If
694
      # you are updating this script to work with a later version of 
695
      # Unicode you may have to alter this if statement.
696
      next if ($plane > 2 && $plane != 14);
697
 
698
      print OUTPUT "static const jshort lower", $plane, "[] = {\n";
699
      $len = @{$charinfoArray[$plane]};
700
      for ($i = 0; $i < $len / 13; $i++) {
701
          print OUTPUT "    ";
702
          for $j (0 .. 12) {
703
              last if $len <= $i * 13 + $j;
704
              my $val = $charinfoArray[$plane]->[$i * 13 + $j][2];
705
              print OUTPUT cShort($val), ", ";
706
          }
707
          print OUTPUT "\n";
708
      }
709
      print OUTPUT "};\n\n";
710
  }
711
  print OUTPUT "static const int lower_length[] = {\n    ";
712
  for ($plane = 0; $plane <= 0x10; $plane++) {
713
      if ($plane > 2 && $plane != 14){
714
          print OUTPUT "-1, ";
715
      }
716
      else {
717
          print OUTPUT scalar(@{$charinfoArray[$plane]}), ", ";
718
      }
719
  }
720
  print OUTPUT "};\n";
721
  print OUTPUT <<EOF;
722
static const jshort* lower[] = {
723
    lower0, lower1, lower2, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
724
    NULL, NULL, NULL, NULL, lower14, NULL, NULL};
725
 
726
 
727
/**
728
 * This is the attribute table for computing the directionality class
729
 * of a character.  At present, the value is in the range 0 - 18 if the
730
 * character has a direction, otherwise it is -1.
731
 */
732
EOF
733
  for ($plane = 0; $plane <= 0x10; $plane++) {
734
      # The following if statement handles the cases of unassigned planes
735
      # specially so we don't waste space with unused Strings.  As of 
736
      # Unicode version 4.0.0 only planes 0, 1, 2, and 14 are used.  If
737
      # you are updating this script to work with a later version of 
738
      # Unicode you may have to alter this if statement.
739
      next if ($plane > 2 && $plane != 14);
740
 
741
      print OUTPUT "static const jbyte direction", $plane, "[] = {\n";
742
      $len = @{$charinfoArray[$plane]};
743
      for ($i = 0; $i < $len / 19; $i++) {
744
          print OUTPUT "    ";
745
          for $j (0 .. 18) {
746
              last if $len <= $i * 19 + $j;
747
              my $val = $charinfoArray[$plane]->[$i * 19 + $j][3];
748
              $val >>= 2;
749
              if ($val < 0 || $val > 18){
750
                  $val = -1;
751
              }
752
              print OUTPUT cShort($val), ", ";
753
          }
754
          print OUTPUT "\n";
755
      }
756
      print OUTPUT "};\n\n";
757
  }
758
  print OUTPUT "static const int direction_length[] = {\n    ";
759
  for ($plane = 0; $plane <= 0x10; $plane++) {
760
      if ($plane > 2 && $plane != 14){
761
          print OUTPUT "-1, ";
762
      }
763
      else {
764
          print OUTPUT scalar(@{$charinfoArray[$plane]}), ", ";
765
      }
766
  }
767
  print OUTPUT "};\n";
768
  print OUTPUT <<EOF;
769
static const jbyte* direction[] = {
770
    direction0, direction1, direction2, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
771
    NULL, NULL, NULL, NULL, direction14, NULL, NULL};
772
 
773
 
774
/**
775
 * This is the listing of titlecase special cases (all other character
776
 * can use <code>upper</code> to determine their titlecase).  The listing
777
 * is a sequence of character pairs; converting the first character of the
778
 * pair to titlecase produces the second character.
779
 */
780
static const jchar title[] = {
781
EOF
782
 
783
  $len = length($titlecase) / 2;
784
  for ($i = 0; $i < $len / 10; $i++) {
785
      print OUTPUT $i ? "\n    " : "    ";
786
      for $j (0 .. 9) {
787
          last if $len <= $i * 10 + $j;
788
          my $val = unpack "n", substr($titlecase, 2 * ($i * 10 + $j), 2);
789
          print OUTPUT $val, ", ";
790
      }
791
  }
792
 
793
  print OUTPUT "\n  };";
794
  print OUTPUT "\n/** Length of title. */\nstatic const int title_length = ", $len;
795
  print OUTPUT <<EOF;
796
;
797
 
798
#endif /* __JAVA_CHARTABLES_H__ */
799
EOF
800
  close OUTPUT;
801
}
802
print "\nDone.\n";

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.