OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libjava/] [scripts/] [unicode-decomp.pl] - Blame information for rev 867

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 762 jeremybenn
#!/usr/bin/perl -w
2
# unicode-decomp.pl - script to generate database for java.text.Collator
3
# Copyright (C) 1998, 1999, 2002 Free Software Foundation, Inc.
4
#
5
# This file is part of libjava.
6
# 
7
# This software is copyrighted work licensed under the terms of the
8
# Libjava License.  Please consult the file "LIBJAVA_LICENSE" for
9
# details.
10
 
11
# Code for reading UnicodeData.txt and generating the code for
12
# gnu.java.lang.CharData.  For now, the relevant Unicode definition files
13
# are found in libjava/gnu/gcj/convert/.
14
#
15
# Usage: ./unicode-decomp.pl [-n] <UnicodeData.txt> <decomp.h>
16
#   where <UnicodeData.txt> is obtained from www.unicode.org (named
17
#   UnicodeData-3.0.0.txt for Unicode version 3.0.0), and <CharData.java>
18
#   is the final location of include/java-chardecomp.h.
19
#   As of JDK 1.4, use Unicode version 3.0.0 for best results.
20
#
21
# If this exits with nonzero status, then you must investigate the
22
# cause of the problem.
23
# Diagnostics and other information to stderr.
24
# With -n, the files are not created, but all processing still occurs.
25
 
26
# These maps characters to their decompositions.
27
my %canonical_decomposition = ();
28
my %full_decomposition = ();
29
 
30
# Handle `-n' and open output files.
31
if ($ARGV[0] && $ARGV[0] eq '-n')
32
{
33
    shift @ARGV;
34
    $ARGV[1] = '/dev/null';
35
}
36
die "Usage: $0 <UnicodeData.txt> <java-chardecomp.h>" unless @ARGV == 2;
37
open (UNICODE, "< $ARGV[0]") || die "Can't open Unicode attribute file: $!\n";
38
 
39
# Process the Unicode file.
40
$| = 1;
41
my $count = 0;
42
print STDERR "Parsing attributes file";
43
while (<UNICODE>)
44
{
45
    print STDERR "." unless $count++ % 1000;
46
    chomp;
47
    s/\r//g;
48
    my ($ch, undef, undef, undef, undef, $decomp) = split ';';
49
    $ch = hex($ch);
50
 
51
    if ($decomp ne '')
52
    {
53
        my $is_full = 0;
54
        my @decomp = ();
55
        foreach (split (' ', $decomp))
56
        {
57
            if (/^\<.*\>$/)
58
            {
59
                $is_full = 1;
60
                next;
61
            }
62
            push (@decomp, hex ($_));
63
        }
64
        my $s = pack "n*", @decomp;
65
        if ($is_full)
66
        {
67
            $full_decomposition{$ch} = $s;
68
        }
69
        else
70
        {
71
            $canonical_decomposition{$ch} = $s;
72
        }
73
    }
74
}
75
 
76
# Now generate decomposition tables.
77
open DECOMP, "> $ARGV[1]" or die "Can't open output file: $!\n";
78
print STDERR "\nGenerating tables\n";
79
print DECOMP <<EOF;
80
// java-chardecomp.h - Decomposition character tables -*- c++ -*-
81
 
82
#ifndef __JAVA_CHARDECOMP_H__
83
#define __JAVA_CHARDECOMP_H__
84
 
85
 
86
// These tables are automatically generated by the $0
87
// script.  DO NOT EDIT the tables.  Instead, fix the script
88
// and run it again.
89
 
90
// This file should only be included by natCollator.cc
91
 
92
struct decomp_entry
93
{
94
  jchar key;
95
  const char *value;
96
};
97
 
98
EOF
99
 
100
&write_decompositions;
101
 
102
print DECOMP "#endif /* __JAVA_CHARDECOMP_H__ */\n";
103
 
104
close(DECOMP);
105
print STDERR "Done\n";
106
exit;
107
 
108
 
109
# Write a single decomposition table.
110
sub write_single_decomposition($$%)
111
{
112
    my ($name, $is_canon, %table) = @_;
113
    my $first_line = 1;
114
    print DECOMP "static const decomp_entry ${name}_decomposition[] =\n{\n";
115
 
116
    for my $key (0 .. 0xffff)
117
    {
118
        next if ! defined $table{$key};
119
        print DECOMP ",\n" unless $first_line;
120
        $first_line = 0;
121
 
122
        printf DECOMP "  { 0x%04x, \"", $key;
123
 
124
        # We represent the expansion as a series of bytes, terminated
125
        # with a double nul.  This is ugly, but relatively
126
        # space-efficient.  Most expansions are short, but there are a
127
        # few that are very long (e.g. \uFDFA).  This means that if we
128
        # chose a fixed-space representation we would waste a lot of
129
        # space.
130
        my @expansion = unpack "n*", $table{$key};
131
        foreach my $char (@expansion)
132
        {
133
            printf DECOMP "\\x%02x\\x%02x", ($char / 256), ($char % 256);
134
        }
135
 
136
        print DECOMP "\" }";
137
    }
138
 
139
    print DECOMP "\n};\n\n";
140
}
141
 
142
sub write_decompositions()
143
{
144
    &write_single_decomposition ('canonical', 1, %canonical_decomposition);
145
    &write_single_decomposition ('full', 0, %full_decomposition);
146
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.