URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libjava/] [scripts/] [unicode-decomp.pl] - Blame information for rev 833

Go to most recent revision | Details | Compare with Previous | View Log


#!/usr/bin/perl -w
# unicode-decomp.pl - script to generate database for java.text.Collator
# Copyright (C) 1998, 1999, 2002 Free Software Foundation, Inc.
#
# This file is part of libjava.
# 
# This software is copyrighted work licensed under the terms of the
# Libjava License.  Please consult the file "LIBJAVA_LICENSE" for
# details.
 
# Code for reading UnicodeData.txt and generating the code for
# gnu.java.lang.CharData.  For now, the relevant Unicode definition files
# are found in libjava/gnu/gcj/convert/.
#
# Usage: ./unicode-decomp.pl [-n] <UnicodeData.txt> <decomp.h>
#   where <UnicodeData.txt> is obtained from www.unicode.org (named
#   UnicodeData-3.0.0.txt for Unicode version 3.0.0), and <CharData.java>
#   is the final location of include/java-chardecomp.h.
#   As of JDK 1.4, use Unicode version 3.0.0 for best results.
#
# If this exits with nonzero status, then you must investigate the
# cause of the problem.
# Diagnostics and other information to stderr.
# With -n, the files are not created, but all processing still occurs.
 
# These maps characters to their decompositions.
my %canonical_decomposition = ();
my %full_decomposition = ();
 
# Handle `-n' and open output files.
if ($ARGV[0] && $ARGV[0] eq '-n')
{
    shift @ARGV;
    $ARGV[1] = '/dev/null';
}
die "Usage: $0 <UnicodeData.txt> <java-chardecomp.h>" unless @ARGV == 2;
open (UNICODE, "< $ARGV[0]") || die "Can't open Unicode attribute file: $!\n";
 
# Process the Unicode file.
$| = 1;
my $count = 0;
print STDERR "Parsing attributes file";
while (<UNICODE>)
{
    print STDERR "." unless $count++ % 1000;
    chomp;
    s/\r//g;
    my ($ch, undef, undef, undef, undef, $decomp) = split ';';
    $ch = hex($ch);
 
    if ($decomp ne '')
    {
        my $is_full = 0;
        my @decomp = ();
        foreach (split (' ', $decomp))
        {
            if (/^\<.*\>$/)
            {
                $is_full = 1;
                next;
            }
            push (@decomp, hex ($_));
        }
        my $s = pack "n*", @decomp;
        if ($is_full)
        {
            $full_decomposition{$ch} = $s;
        }
        else
        {
            $canonical_decomposition{$ch} = $s;
        }
    }
}
 
# Now generate decomposition tables.
open DECOMP, "> $ARGV[1]" or die "Can't open output file: $!\n";
print STDERR "\nGenerating tables\n";
print DECOMP <<EOF;
// java-chardecomp.h - Decomposition character tables -*- c++ -*-
 
#ifndef __JAVA_CHARDECOMP_H__
#define __JAVA_CHARDECOMP_H__
 
 
// These tables are automatically generated by the $0
// script.  DO NOT EDIT the tables.  Instead, fix the script
// and run it again.
 
// This file should only be included by natCollator.cc
 
struct decomp_entry
{
  jchar key;
  const char *value;
};
 
EOF
 
&write_decompositions;
 
print DECOMP "#endif /* __JAVA_CHARDECOMP_H__ */\n";
 
close(DECOMP);
print STDERR "Done\n";
exit;
 
 
# Write a single decomposition table.
sub write_single_decomposition($$%)
{
    my ($name, $is_canon, %table) = @_;
    my $first_line = 1;
    print DECOMP "static const decomp_entry ${name}_decomposition[] =\n{\n";
 
    for my $key (0 .. 0xffff)
    {
        next if ! defined $table{$key};
        print DECOMP ",\n" unless $first_line;
        $first_line = 0;
 
        printf DECOMP "  { 0x%04x, \"", $key;
 
        # We represent the expansion as a series of bytes, terminated
        # with a double nul.  This is ugly, but relatively
        # space-efficient.  Most expansions are short, but there are a
        # few that are very long (e.g. \uFDFA).  This means that if we
        # chose a fixed-space representation we would waste a lot of
        # space.
        my @expansion = unpack "n*", $table{$key};
        foreach my $char (@expansion)
        {
            printf DECOMP "\\x%02x\\x%02x", ($char / 256), ($char % 256);
        }
 
        print DECOMP "\" }";
    }
 
    print DECOMP "\n};\n\n";
}
 
sub write_decompositions()
{
    &write_single_decomposition ('canonical', 1, %canonical_decomposition);
    &write_single_decomposition ('full', 0, %full_decomposition);
}

Line No.	Rev	Author	Line
1	762	jeremybenn	`#!/usr/bin/perl -w`
2			`# unicode-decomp.pl - script to generate database for java.text.Collator`
3			`# Copyright (C) 1998, 1999, 2002 Free Software Foundation, Inc.`
4			`#`
5			`# This file is part of libjava.`
6			`#`
7			`# This software is copyrighted work licensed under the terms of the`
8			`# Libjava License. Please consult the file "LIBJAVA_LICENSE" for`
9			`# details.`
10
11			`# Code for reading UnicodeData.txt and generating the code for`
12			`# gnu.java.lang.CharData. For now, the relevant Unicode definition files`
13			`# are found in libjava/gnu/gcj/convert/.`
14			`#`
15			`# Usage: ./unicode-decomp.pl [-n] <UnicodeData.txt> <decomp.h>`
16			`# where <UnicodeData.txt> is obtained from www.unicode.org (named`
17			`# UnicodeData-3.0.0.txt for Unicode version 3.0.0), and <CharData.java>`
18			`# is the final location of include/java-chardecomp.h.`
19			`# As of JDK 1.4, use Unicode version 3.0.0 for best results.`
20			`#`
21			`# If this exits with nonzero status, then you must investigate the`
22			`# cause of the problem.`
23			`# Diagnostics and other information to stderr.`
24			`# With -n, the files are not created, but all processing still occurs.`
25
26			`# These maps characters to their decompositions.`
27			`my %canonical_decomposition = ();`
28			`my %full_decomposition = ();`
29
30			# Handle `-n' and open output files.
31			`if ($ARGV[0] && $ARGV[0] eq '-n')`
32			`{`
33			`shift @ARGV;`
34			`$ARGV[1] = '/dev/null';`
35			`}`
36			`die "Usage: $0 <UnicodeData.txt> <java-chardecomp.h>" unless @ARGV == 2;`
37			`open (UNICODE, "< $ARGV[0]") \|\| die "Can't open Unicode attribute file: $!\n";`
38
39			`# Process the Unicode file.`
40			`$\| = 1;`
41			`my $count = 0;`
42			`print STDERR "Parsing attributes file";`
43			`while (<UNICODE>)`
44			`{`
45			`print STDERR "." unless $count++ % 1000;`
46			`chomp;`
47			`s/\r//g;`
48			`my ($ch, undef, undef, undef, undef, $decomp) = split ';';`
49			`$ch = hex($ch);`
50
51			`if ($decomp ne '')`
52			`{`
53			`my $is_full = 0;`
54			`my @decomp = ();`
55			`foreach (split (' ', $decomp))`
56			`{`
57			`if (/^\<.*\>$/)`
58			`{`
59			`$is_full = 1;`
60			`next;`
61			`}`
62			`push (@decomp, hex ($_));`
63			`}`
64			`my $s = pack "n*", @decomp;`
65			`if ($is_full)`
66			`{`
67			`$full_decomposition{$ch} = $s;`
68			`}`
69			`else`
70			`{`
71			`$canonical_decomposition{$ch} = $s;`
72			`}`
73			`}`
74			`}`
75
76			`# Now generate decomposition tables.`
77			`open DECOMP, "> $ARGV[1]" or die "Can't open output file: $!\n";`
78			`print STDERR "\nGenerating tables\n";`
79			`print DECOMP <<EOF;`
80			`// java-chardecomp.h - Decomposition character tables -- c++ --`
81
82			`#ifndef __JAVA_CHARDECOMP_H__`
83			`#define __JAVA_CHARDECOMP_H__`
84
85
86			`// These tables are automatically generated by the $0`
87			`// script. DO NOT EDIT the tables. Instead, fix the script`
88			`// and run it again.`
89
90			`// This file should only be included by natCollator.cc`
91
92			`struct decomp_entry`
93			`{`
94			`jchar key;`
95			`const char *value;`
96			`};`
97
98			`EOF`
99
100			`&write_decompositions;`
101
102			`print DECOMP "#endif /* __JAVA_CHARDECOMP_H__ */\n";`
103
104			`close(DECOMP);`
105			`print STDERR "Done\n";`
106			`exit;`
107
108
109			`# Write a single decomposition table.`
110			`sub write_single_decomposition($$%)`
111			`{`
112			`my ($name, $is_canon, %table) = @_;`
113			`my $first_line = 1;`
114			`print DECOMP "static const decomp_entry ${name}_decomposition[] =\n{\n";`
115
116			`for my $key (0 .. 0xffff)`
117			`{`
118			`next if ! defined $table{$key};`
119			`print DECOMP ",\n" unless $first_line;`
120			`$first_line = 0;`
121
122			`printf DECOMP " { 0x%04x, \"", $key;`
123
124			`# We represent the expansion as a series of bytes, terminated`
125			`# with a double nul. This is ugly, but relatively`
126			`# space-efficient. Most expansions are short, but there are a`
127			`# few that are very long (e.g. \uFDFA). This means that if we`
128			`# chose a fixed-space representation we would waste a lot of`
129			`# space.`
130			`my @expansion = unpack "n*", $table{$key};`
131			`foreach my $char (@expansion)`
132			`{`
133			`printf DECOMP "\\x%02x\\x%02x", ($char / 256), ($char % 256);`
134			`}`
135
136			`print DECOMP "\" }";`
137			`}`
138
139			`print DECOMP "\n};\n\n";`
140			`}`
141
142			`sub write_decompositions()`
143			`{`
144			`&write_single_decomposition ('canonical', 1, %canonical_decomposition);`
145			`&write_single_decomposition ('full', 0, %full_decomposition);`
146			`}`

Browse

Tools

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libjava/] [scripts/] [unicode-decomp.pl] - Blame information for rev 833