
Subversion Repositories openrisc

Compare Revisions

  • This comparison shows the changes necessary to convert path
    from Rev 753 to Rev 762
    Reverse comparison

Rev 753 → Rev 762

0,0 → 1,642
#!/usr/bin/perl -w
# -- generate Unicode database for java.lang.Character
# Copyright (C) 1998, 2002, 2004 Free Software Foundation, Inc.
# This file is part of GNU Classpath.
# GNU Classpath is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
# GNU Classpath is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with GNU Classpath; see the file COPYING. If not, write to the
# Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301 USA.
# Linking this library statically or dynamically with other modules is
# making a combined work based on this library. Thus, the terms and
# conditions of the GNU General Public License cover the whole
# combination.
# As a special exception, the copyright holders of this library give you
# permission to link this library with independent modules to produce an
# executable, regardless of the license terms of these independent
# modules, and to copy and distribute the resulting executable under
# terms of your choice, provided that you also meet, for each linked
# independent module, the terms and conditions of the license of that
# module. An independent module is a module which is not derived from
# or based on this library. If you modify this library, you may extend
# this exception to your version of the library, but you are not
# obligated to do so. If you do not wish to do so, delete this
# exception statement from your version.
# Code for reading UnicodeData-3.0.0.txt and SpecialCasing-2.txt to generate
# the code for The relevant files can be found here:
# Inspired by code from Jochen Hoenicke.
# author Eric Blake <>
# Usage: ./unicode-muncher <UnicodeData.txt> <SpecialCasing> <>
# where <UnicodeData.txt> is obtained from (named
# UnicodeData-3.0.0.txt for Unicode version 3.0.0), <SpecialCasing>
# is obtained from www.unicode too (named SpecialCasing-2.txt for Unicode
# version 3.0.0), and <> is the final location for the Java
# interface As of JDK 1.4, use Unicode version 3.0.0
# for best results.
## Convert a 16-bit integer to a Java source code String literal character
sub javaChar($) {
my ($char) = @_;
die "Out of range: $char\n" if $char < -0x8000 or $char > 0xffff;
$char += 0x10000 if $char < 0;
# Special case characters that must be escaped, or are shorter as ASCII
return sprintf("\\%03o", $char) if $char < 0x20;
return "\\\"" if $char == 0x22;
return "\\\\" if $char == 0x5c;
return pack("C", $char) if $char < 0x7f;
return sprintf("\\u%04x", $char);
## Convert the text UnicodeData file from into a Java
## interface with string constants holding the compressed information.
my @TYPECODES = qw(Cn Lu Ll Lt Lm Lo Mn Me Mc Nd Nl No Zs Zl Zp Cc Cf
SKIPPED Co Cs Pd Ps Pe Pc Po Sm Sc Sk So Pi Pf);
my $NOBREAK_FLAG = 32;
my %special = ();
my @info = ();
my $titlecase = "";
my $count = 0;
my $range = 0;
die "Usage: $0 <UnicodeData.txt> <SpecialCasing.txt> <>"
unless @ARGV == 3;
$| = 1;
print "GNU Classpath Unicode Attribute Database Generator 2.1\n";
print "Copyright (C) 1998, 2002 Free Software Foundation, Inc.\n";
# Stage 0: Parse the special casing file
print "Parsing special casing file\n";
open (SPECIAL, "< $ARGV[1]") || die "Can't open special casing file: $!\n";
while (<SPECIAL>) {
next if /^\#/;
my ($ch, undef, undef, $upper) = split / *; */;
# This grabs only the special casing for multi-char uppercase. Note that
# there are no multi-char lowercase, and that Sun ignores multi-char
# titlecase rules. This script omits 3 special cases in Unicode 3.0.0,
# which must be hardcoded in java.lang.String:
# \u03a3 (Sun ignores this special case)
# \u0049 - lowercases to \u0131, but only in Turkish locale
# \u0069 - uppercases to \u0130, but only in Turkish locale
next unless defined $upper and $upper =~ / /;
$special{hex $ch} = [map {hex} split ' ', $upper];
close SPECIAL;
# Stage 1: Parse the attribute file
print "Parsing attributes file";
open (UNICODE, "< $ARGV[0]") || die "Can't open Unicode attribute file: $!\n";
while (<UNICODE>) {
print "." unless $count++ % 1000;
my ($ch, $name, $category, undef, $bidir, $decomp, undef, undef, $numeric,
$mirrored, undef, undef, $upcase, $lowcase, $title) = split ';';
$ch = hex($ch);
next if $ch > 0xffff; # Ignore surrogate pairs, since Java does
my ($type, $numValue, $upperchar, $lowerchar, $direction);
$type = 0;
while ($category !~ /^$TYPECODES[$type]$/) {
if (++$type == @TYPECODES) {
die "$ch: Unknown type: $category";
$type |= $NOBREAK_FLAG if ($decomp =~ /noBreak/);
$type |= $MIRRORED_FLAG if ($mirrored =~ /Y/);
if ($numeric =~ /^[0-9]+$/) {
$numValue = $numeric;
die "numValue too big: $ch, $numValue\n" if $numValue >= 0x7fff;
} elsif ($numeric eq "") {
# Special case sequences of 'a'-'z'
if ($ch >= 0x0041 && $ch <= 0x005a) {
$numValue = $ch - 0x0037;
} elsif ($ch >= 0x0061 && $ch <= 0x007a) {
$numValue = $ch - 0x0057;
} elsif ($ch >= 0xff21 && $ch <= 0xff3a) {
$numValue = $ch - 0xff17;
} elsif ($ch >= 0xff41 && $ch <= 0xff5a) {
$numValue = $ch - 0xff37;
} else {
$numValue = -1;
} else {
$numValue = -2;
$upperchar = $upcase ? hex($upcase) - $ch : 0;
$lowerchar = $lowcase ? hex($lowcase) - $ch : 0;
if ($title ne $upcase) {
my $titlechar = $title ? hex($title) : $ch;
$titlecase .= pack("n2", $ch, $titlechar);
$direction = 0;
while ($bidir !~ /^$DIRCODES[$direction]$/) {
if (++$direction == @DIRCODES) {
$direction = -1;
$direction <<= 2;
$direction += $#{$special{$ch}} if defined $special{$ch};
if ($range) {
die "Expecting end of range at $ch\n" unless $name =~ /Last>$/;
for ($range + 1 .. $ch - 1) {
$info[$_] = pack("n5", $type, $numValue, $upperchar,
$lowerchar, $direction);
$range = 0;
} elsif ($name =~ /First>$/) {
$range = $ch;
$info[$ch] = pack("n5", $type, $numValue, $upperchar, $lowerchar,
close UNICODE;
# Stage 2: Compress the data structures
printf "\nCompressing data structures";
$count = 0;
my $info = ();
my %charhash = ();
my @charinfo = ();
for my $ch (0 .. 0xffff) {
print "." unless $count++ % 0x1000;
$info[$ch] = pack("n5", 0, -1, 0, 0, -4) unless defined $info[$ch];
my ($type, $numVal, $upper, $lower, $direction) = unpack("n5", $info[$ch]);
if (! exists $charhash{$info[$ch]}) {
push @charinfo, [ $numVal, $upper, $lower, $direction ];
$charhash{$info[$ch]} = $#charinfo;
$info .= pack("n", ($charhash{$info[$ch]} << 7) | $type);
my $charlen = @charinfo;
my $bestshift;
my $bestest = 1000000;
my $bestblkstr;
die "Too many unique character entries: $charlen\n" if $charlen > 512;
print "\nUnique character entries: $charlen\n";
for my $i (3 .. 8) {
my $blksize = 1 << $i;
my %blocks = ();
my @blkarray = ();
my ($j, $k);
print "shift: $i";
for ($j = 0; $j < 0x10000; $j += $blksize) {
my $blkkey = substr $info, 2 * $j, 2 * $blksize;
if (! exists $blocks{$blkkey}) {
push @blkarray, $blkkey;
$blocks{$blkkey} = $#blkarray;
my $blknum = @blkarray;
my $blocklen = $blknum * $blksize;
printf " before %5d", $blocklen;
# Now we try to pack the blkarray as tight as possible by finding matching
# heads and tails.
for ($j = $blksize - 1; $j > 0; $j--) {
my %tails = ();
for $k (0 .. $#blkarray) {
next unless defined $blkarray[$k];
my $len = length $blkarray[$k];
my $tail = substr $blkarray[$k], $len - $j * 2;
if (exists $tails{$tail}) {
push @{$tails{$tail}}, $k;
} else {
$tails{$tail} = [ $k ];
# tails are calculated, now calculate the heads and merge.
for $k (0 .. $#blkarray) {
next unless defined $blkarray[$k];
my $tomerge = $k;
while (1) {
my $head = substr($blkarray[$tomerge], 0, $j * 2);
my $entry = $tails{$head};
next BLOCK unless defined $entry;
my $other = shift @{$entry};
if ($other == $tomerge) {
if (@{$entry}) {
push @{$entry}, $other;
$other = shift @{$entry};
} else {
push @{$entry}, $other;
next BLOCK;
if (@{$entry} == 0) {
delete $tails{$head};
# a match was found
my $merge = $blkarray[$other]
. substr($blkarray[$tomerge], $j * 2);
$blocklen -= $j;
if ($other < $tomerge) {
$blkarray[$tomerge] = undef;
$blkarray[$other] = $merge;
my $len = length $merge;
my $tail = substr $merge, $len - $j * 2;
$tails{$tail} = [ map { $_ == $tomerge ? $other : $_ }
@{$tails{$tail}} ];
next BLOCK;
$blkarray[$tomerge] = $merge;
$blkarray[$other] = undef;
my $blockstr;
for $k (0 .. $#blkarray) {
$blockstr .= $blkarray[$k] if defined $blkarray[$k];
die "Unexpected $blocklen" if length($blockstr) != 2 * $blocklen;
my $estimate = 2 * $blocklen + (0x20000 >> $i);
printf " after merge %5d: %6d bytes\n", $blocklen, $estimate;
if ($estimate < $bestest) {
$bestest = $estimate;
$bestshift = $i;
$bestblkstr = $blockstr;
my @blocks;
my $blksize = 1 << $bestshift;
for (my $j = 0; $j < 0x10000; $j += $blksize) {
my $blkkey = substr $info, 2 * $j, 2 * $blksize;
my $index = index $bestblkstr, $blkkey;
while ($index & 1) {
die "not found: $j" if $index == -1;
$index = index $bestblkstr, $blkkey, $index + 1;
push @blocks, ($index / 2 - $j) & 0xffff;
# Phase 3: Generate the file
die "UTF-8 limit of blocks may be exceeded: " . scalar(@blocks) . "\n"
if @blocks > 0xffff / 3;
die "UTF-8 limit of data may be exceeded: " . length($bestblkstr) . "\n"
if length($bestblkstr) > 0xffff / 3;
print "Generating $ARGV[2] with shift of $bestshift";
my ($i, $j);
open OUTPUT, "> $ARGV[2]" or die "Failed creating output file: $!\n";
print OUTPUT <<EOF;
/* gnu/java/lang/CharData -- Database for java.lang.Character Unicode info
Copyright (C) 2002 Free Software Foundation, Inc.
*** This file is generated by scripts/ ***
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
* This contains the info about the unicode characters, that
* java.lang.Character needs. It is generated automatically from
* <code>$ARGV[0]</code> and
* <code>$ARGV[1]</code>, by some
* perl scripts. These Unicode definition files can be found on the
* <a href=""></a> website.
* JDK 1.4 uses Unicode version 3.0.0.
* The data is stored as string constants, but Character will convert these
* Strings to their respective <code>char[]</code> components. The field
* <code>BLOCKS</code> stores the offset of a block of 2<sup>SHIFT</sup>
* characters within <code>DATA</code>. The DATA field, in turn, stores
* information about each character in the low order bits, and an offset
* into the attribute tables <code>UPPER</code>, <code>LOWER</code>,
* <code>NUM_VALUE</code>, and <code>DIRECTION</code>. Notice that the
* attribute tables are much smaller than 0xffff entries; as many characters
* in Unicode share common attributes. The DIRECTION table also contains
* a field for detecting characters with multi-character uppercase expansions.
* Next, there is a listing for <code>TITLE</code> exceptions (most characters
* just have the same title case as upper case). Finally, there are two
* tables for multi-character capitalization, <code>UPPER_SPECIAL</code>
* which lists the characters which are special cased, and
* <code>UPPER_EXPAND</code>, which lists their expansion.
* \@author scripts/ (written by Jochen Hoenicke,
* Eric Blake)
* \@see Character
* \@see String
public interface CharData
* The Unicode definition file that was parsed to build this database.
String SOURCE = \"$ARGV[0]\";
* The character shift amount to look up the block offset. In other words,
* <code>(char) (BLOCKS.value[ch >> SHIFT] + ch)</code> is the index where
* <code>ch</code> is described in <code>DATA</code>.
int SHIFT = $bestshift;
* The mapping of character blocks to their location in <code>DATA</code>.
* Each entry has been adjusted so that the 16-bit sum with the desired
* character gives the actual index into <code>DATA</code>.
for ($i = 0; $i < @blocks / 11; $i++) {
print OUTPUT $i ? "\n + \"" : " = \"";
for $j (0 .. 10) {
last if @blocks <= $i * 11 + $j;
my $val = $blocks[$i * 11 + $j];
print OUTPUT javaChar($val);
print OUTPUT "\"";
print OUTPUT <<EOF;
* Information about each character. The low order 5 bits form the
* character type, the next bit is a flag for non-breaking spaces, and the
* next bit is a flag for mirrored directionality. The high order 9 bits
* form the offset into the attribute tables. Note that this limits the
* number of unique character attributes to 512, which is not a problem
* as of Unicode version 3.2.0, but may soon become one.
String DATA
my $len = length($bestblkstr) / 2;
for ($i = 0; $i < $len / 11; $i++) {
print OUTPUT $i ? "\n + \"" : " = \"";
for $j (0 .. 10) {
last if $len <= $i * 11 + $j;
my $val = unpack "n", substr($bestblkstr, 2 * ($i * 11 + $j), 2);
print OUTPUT javaChar($val);
print OUTPUT "\"";
print OUTPUT <<EOF;
* This is the attribute table for computing the numeric value of a
* character. The value is -1 if Unicode does not define a value, -2
* if the value is not a positive integer, otherwise it is the value.
* Note that this is a signed value, but stored as an unsigned char
* since this is a String literal.
$len = @charinfo;
for ($i = 0; $i < $len / 11; $i++) {
print OUTPUT $i ? "\n + \"" : " = \"";
for $j (0 .. 10) {
last if $len <= $i * 11 + $j;
my $val = $charinfo[$i * 11 + $j][0];
print OUTPUT javaChar($val);
print OUTPUT "\"";
print OUTPUT <<EOF;
* This is the attribute table for computing the single-character uppercase
* representation of a character. The value is the signed difference
* between the character and its uppercase version. Note that this is
* stored as an unsigned char since this is a String literal. When
* capitalizing a String, you must first check if a multi-character uppercase
* sequence exists before using this character.
String UPPER
$len = @charinfo;
for ($i = 0; $i < $len / 11; $i++) {
print OUTPUT $i ? "\n + \"" : " = \"";
for $j (0 .. 10) {
last if $len <= $i * 11 + $j;
my $val = $charinfo[$i * 11 + $j][1];
print OUTPUT javaChar($val);
print OUTPUT "\"";
print OUTPUT <<EOF;
* This is the attribute table for computing the lowercase representation
* of a character. The value is the signed difference between the
* character and its lowercase version. Note that this is stored as an
* unsigned char since this is a String literal.
String LOWER
$len = @charinfo;
for ($i = 0; $i < $len / 13; $i++) {
print OUTPUT $i ? "\n + \"" : " = \"";
for $j (0 .. 12) {
last if $len <= $i * 13 + $j;
my $val = $charinfo[$i * 13 + $j][2];
print OUTPUT javaChar($val);
print OUTPUT "\"";
print OUTPUT <<EOF;
* This is the attribute table for computing the directionality class
* of a character, as well as a marker of characters with a multi-character
* capitalization. The direction is taken by performing a signed shift
* right by 2 (where a result of -1 means an unknown direction, such as
* for undefined characters). The lower 2 bits form a count of the
* additional characters that will be added to a String when performing
* multi-character uppercase expansion. This count is also used, along with
* the offset in UPPER_SPECIAL, to determine how much of UPPER_EXPAND to use
* when performing the case conversion. Note that this information is stored
* as an unsigned char since this is a String literal.
$len = @charinfo;
for ($i = 0; $i < $len / 17; $i++) {
print OUTPUT $i ? "\n + \"" : " = \"";
for $j (0 .. 16) {
last if $len <= $i * 17 + $j;
my $val = $charinfo[$i * 17 + $j][3];
print OUTPUT javaChar($val);
print OUTPUT "\"";
print OUTPUT <<EOF;
* This is the listing of titlecase special cases (all other characters
* can use <code>UPPER</code> to determine their titlecase). The listing
* is a sorted sequence of character pairs; converting the first character
* of the pair to titlecase produces the second character.
String TITLE
$len = length($titlecase) / 2;
for ($i = 0; $i < $len / 11; $i++) {
print OUTPUT $i ? "\n + \"" : " = \"";
for $j (0 .. 10) {
last if $len <= $i * 11 + $j;
my $val = unpack "n", substr($titlecase, 2 * ($i * 11 + $j), 2);
print OUTPUT javaChar($val);
print OUTPUT "\"";
print OUTPUT <<EOF;
* This is a listing of characters with multi-character uppercase sequences.
* A character appears in this list exactly when it has a non-zero entry
* in the low-order 2-bit field of DIRECTION. The listing is a sorted
* sequence of pairs (hence a binary search on the even elements is an
* efficient way to lookup a character). The first element of a pair is the
* character with the expansion, and the second is the index into
* UPPER_EXPAND where the expansion begins. Use the 2-bit field of
* DIRECTION to determine where the expansion ends.
my @list = sort {$a <=> $b} keys %special;
my $expansion = "";
my $offset = 0;
$len = @list;
for ($i = 0; $i < $len / 5; $i++) {
print OUTPUT $i ? "\n + \"" : " = \"";
for $j (0 .. 4) {
last if $len <= $i * 5 + $j;
my $ch = $list[$i * 5 + $j];
print OUTPUT javaChar($ch);
print OUTPUT javaChar($offset);
$offset += @{$special{$ch}};
$expansion .= pack "n*", @{$special{$ch}};
print OUTPUT "\"";
print OUTPUT <<EOF;
* This is the listing of special case multi-character uppercase sequences.
* Characters listed in UPPER_SPECIAL index into this table to find their
* uppercase expansion. Remember that you must also perform special-casing
* on two single-character sequences in the Turkish locale, which are not
* covered here in CharData.
$len = length($expansion) / 2;
for ($i = 0; $i < $len / 11; $i++) {
print OUTPUT $i ? "\n + \"" : " = \"";
for $j (0 .. 10) {
last if $len <= $i * 11 + $j;
my $val = unpack "n", substr($expansion, 2 * ($i * 11 + $j), 2);
print OUTPUT javaChar($val);
print OUTPUT "\"";
print OUTPUT ";\n}\n";
close OUTPUT;
print "\nDone.\n"; Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: =================================================================== --- (nonexistent) +++ (revision 762) @@ -0,0 +1,523 @@ +#! @SHELL@ +# Copyright (C) 2006 Free Software Foundation +# Written by Paolo Bonzini. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published +# by the Free Software Foundation; either version 2 of the License, +# or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + +# POSIX and NLS nuisances, taken from autoconf. +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac +fi +BIN_SH=xpg4; export BIN_SH # for Tru64 +DUALCASE=1; export DUALCASE # for MKS sh + +if test "${LANG+set}" = set; then LANG=C; export LANG; fi +if test "${LC_ALL+set}" = set; then LC_ALL=C; export LC_ALL; fi +if test "${LC_MESSAGES+set}" = set; then LC_MESSAGES=C; export LC_MESSAGES; fi +if test "${LC_CTYPE+set}" = set; then LC_CTYPE=C; export LC_CTYPE; fi + +# Also make sure CDPATH is empty, and IFS is space, tab, \n in that order. +# Be careful to avoid that editors munge IFS +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH +IFS=" "" "" +" + +: ${TMPDIR=/tmp} +: ${ZIP="@ZIP@"} +: ${UNZIP="@UNZIP@"} +progname="$0" + +# Emit a usage message and exit with error status 1 +usage () { + cat >&2 <&2 + (exit 1); exit 1 +} + +# Usage: copy SRC DEST +# Copy file SRC to directory DEST, which is the staging area of the jar file. +# Fail if it is already present or if it is not a regular file. +copy () { + if test -f "$1"; then + # A simple optimization. Optimistically assuming that ln will work + # cuts 60% of the run-time! + if ln "$1" "$2"/"$1" > /dev/null 2>&1; then + return 0 + fi + + if test -f "$2"/"$1"; then + error "$1": Duplicate entry. + fi + dir=`dirname "$1"` + $mkdir_p "$2"/"$dir" + ln "$1" "$2"/"$1" > /dev/null 2>&1 || cp "$1" "$2"/"$1" + elif test -e "$1"; then + error "$1": Invalid file type. + else + error "$1": File not found. + fi +} + +# Make a temporary directory and store its name in the JARTMP variable. +make_tmp () { + test -n "$JARTMP" && return + + { + JARTMP=`(umask 077 && mktemp -d "$TMPDIR/jarXXXXXX") 2>/dev/null` && + test -n "$JARTMP" && test -d "$JARTMP" + } || { + JARTMP=$TMPDIR/jar$$-$RANDOM + (umask 077 && mkdir "$JARTMP") + } || exit $? + + trap 'exit_status=$? + if test -n "$JARTMP"; then rm -rf "$JARTMP"; fi + exit $exit_status' 0 +} + +# Usage: make_manifest destfile kind [source-manifest] +# Create a manifest file and store it in destfile. KIND can be "default", +# or "user", in which case SOURCE-MANIFEST must be specified as well. +make_manifest () { + dir=`dirname "$1"` + $mkdir_p "$dir" + case $2 in + default) + cat > "$1" <<\EOF +Manifest-Version: 1.0 +Created-By: @VERSION@ + +EOF + ;; + user) + cp "$3" "$1" + ;; + esac +} + +# Usage: set_var var [value] +# Exit with an error if set_var was already called for the same VAR. Else +# set the variable VAR to the value VALUE (or the empty value if no parameter +# is given). +set_var () { + if eval test x\$set_$1 = xset; then + error Incompatible or repeated options. + else + eval $1=\$2 + eval set_$1=set + fi +} + +# Process the arguments, including -C options, and copy the whole tree +# to $JARTMP/files so that zip can be invoked later from there. +make_files () { + change=false + if $process_response_files; then + if test $# = 0; then + while read arg; do + make_files_1 "$arg" + done + else + for infile + do + exec 5<&0 + exec 0< $infile + while read arg; do + make_files_1 "$arg" + done + exec 0<&5 + exec 5<&- + done + fi + else + for arg + do + make_files_1 "$arg" + done + fi + cd "$old_dir" +} + +# Usage: make_files_1 ARG +# Process one argument, ARG. +make_files_1 () { + if $change; then + change=false + if cd "$1"; then + return + else + (exit 1); exit 1 + fi + fi + case "$1" in + -C) + change=: + ;; + -C*) + cd `expr "$1" : '-C\(.*\)' ` + return + ;; + *) + if test -d "$1"; then + $mkdir_p "$JARTMP"/files/"$1" + find "$1" | while read file; do + if test -d "$file"; then + $mkdir_p "$JARTMP"/files/"$file" + else + copy "$file" "$JARTMP"/files + fi + done + else + copy "$1" "$JARTMP"/files + fi + ;; + esac + cd "$old_dir" +} + +# Same as "jar tf $1". +jar_list () { + $UNZIP -l "$1" | \ + sed '1,/^ ----/d;/^ ----/,$d;s/^ *[0-9]* ..-..-.. ..:.. //' +} + +# Same as "jar tvf $1". +jar_list_verbose () { + $UNZIP -l "$1" | \ + @AWK@ 'BEGIN { yes = 0 } + /^ ----/ { yes = !yes; next } + yes { + size=$1 + split ($2, d, "-") + split ($3, t, ":") + d[3] += (d[3] < 80) ? 2000 : 1900 + timestamp=d[3] " " d[1] " " d[2] " " t[1] " " t[2] " 00" + gsub (/^ *[0-9]* ..-..-.. ..:.. /, "") + printf "%6d %s %s\n", size, strftime ("%a %b %d %H:%M:%S %Z %Y", mktime (timestamp)), $0 + }' +} + +# mkdir -p emulation based on the mkinstalldirs script. +func_mkdir_p () { + for file + do + case $file in + /*) pathcomp=/ ;; + *) pathcomp= ;; + esac + oIFS=$IFS + IFS=/ + set fnord $file + shift + IFS=$oIFS + + errstatus=0 + for d + do + test "x$d" = x && continue + pathcomp=$pathcomp$d + case $pathcomp in + -*) pathcomp=./$pathcomp ;; + esac + + if test ! -d "$pathcomp"; then + mkdir "$pathcomp" || lasterr=$? + test -d "$pathcomp" || errstatus=$lasterr + fi + pathcomp=$pathcomp/ + done + done + return "$errstatus" +} + +# Detect mkdir -p +# On NextStep and OpenStep, the `mkdir' command does not +# recognize any option. It will interpret all options as +# directories to create, and then abort because `.' already +# exists. +if mkdir -p --version . >/dev/null 2>&1 && test ! -d ./--version; then + mkdir_p='mkdir -p' +else + mkdir_p='func_mkdir_p' + test -d ./-p && rmdir ./-p + test -d ./--version && rmdir ./--version +fi + +# Process the first command line option. +case "$1" in + -*) commands=`echo X"$1" | sed 's/^X-//' ` ;; + *) commands="$1" +esac +shift + +# Operation to perform on the JAR file +mode=unknown + +# First -C option on the command line +cur_dir=. + +# Base directory for -C options +old_dir=`pwd` +# JAR file to operate on +jarfile= + +# default for no {m,M} option, user for "m" option, none for "M" option +manifest_kind=default + +# "-0" if the "0" option was given +store= + +# true if the "v" option was given +verbose=false + +# true if the non-standard "@" option was given +process_response_files=false + +# An exec command if we need to redirect the zip/unzip commands' output +out_redirect=: + +while test -n "$commands"; do + # Process a letter at a time + command=`expr "$commands" : '\(.\)'` + commands=`expr "$commands" : '.\(.*\)'` + case "$command" in + c) + set_var mode create + ;; + t) + set_var mode list + ;; + x) + set_var mode extract + ;; + u) + set_var mode update + ;; + + f) + test $# = 0 && usage + # Multiple "f" options are accepted by Sun's JAR tool. + jarfile="$1" + test -z "$jarfile" && usage + shift + ;; + m) + test $# = 0 && usage + # Multiple "m" options are accepted by Sun's JAR tool, but + # M always overrides m. + test "$manifest_kind" = default && manifest_kind=user + manifest_file="$1" + test -z "$manifest_file" && usage + shift + ;; + 0) + store=-0 + ;; + v) + verbose=: + ;; + i) + # Not yet implemented, and probably never will. + ;; + M) + manifest_kind=none + ;; + C) + test $# = 0 && usage + cur_dir="$1" + shift + ;; + @) + process_response_files=: ;; + *) + usage ;; + esac +done + +set -e + +case "X$jarfile" in + X) + # Work on stdin/stdout. Messages go to stderr, and if we need an input + # JAR file we save it temporarily in the temporary directory. + make_tmp + $mkdir_p "$JARTMP"/out + jarfile="$JARTMP"/out/tmp-stdin.jar + out_redirect='exec >&2' + case $mode in + update|extract|list) + if $process_response_files && test $# = 0; then + error Cannot use stdin for response file. + fi + cat > "$JARTMP"/out/tmp-stdin.jar + ;; + esac + ;; + + X*/*) + # Make an absolute path. + dir=`dirname "$jarfile"` + jarfile=`cd $dir && pwd`/`basename "$jarfile"` + ;; + + X*) + # Make an absolute path from a filename in the current directory. + jarfile=`pwd`/`basename "$jarfile"` + ;; +esac + +# Perform a -C option if given right away. +cd "$cur_dir" + +case $mode in + unknown) + usage + ;; + + extract) + make_tmp + + # Extract the list of files in the JAR file + jar_list "$jarfile" > "$JARTMP"/list + + # If there are files on the command line, expand directories and skip -C + # command line arguments + for arg + do + if $skip; then + skip=false + continue + fi + case "$arg" in + -C) skip=: ;; + -C*) ;; + *) + escaped=`echo "X$arg" | sed 's/^X//; s/[].[^$\\*]/\\\\&/g' ` + grep "^$escaped/" "$JARTMP"/list >> "$JARTMP"/chosen || : + grep "^$escaped\$" "$JARTMP"/list >> "$JARTMP"/chosen || : + esac + done + test -f "$JARTMP"/chosen || cp "$JARTMP"/list "$JARTMP"/chosen + + # Really execute unzip + if $verbose; then + sort < "$JARTMP"/chosen | uniq | xargs $UNZIP -o "$jarfile" | \ + sed -ne 's/^ creating/ created/p' -e 's/^ inflating/extracted/p' + else + sort < "$JARTMP"/chosen | uniq | xargs $UNZIP -o "$jarfile" > /dev/null + fi + ;; + + create) + make_tmp + $mkdir_p "$JARTMP"/out + $mkdir_p "$JARTMP"/files + + # Do not overwrite the JAR file if something goes wrong + tmp_jarfile="$JARTMP"/out/`basename "$jarfile"` + + # Prepare the files in the temporary directory. This is necessary to + # support -C and still save relative paths in the JAR file. + make_files ${1+"$@"} + if test $manifest_kind != none; then + make_manifest "$JARTMP"/files/META-INF/MANIFEST.MF $manifest_kind "$manifest_file" + fi + + # Really execute zip + if $verbose; then + (eval $out_redirect; cd "$JARTMP"/files && $ZIP -rv "$tmp_jarfile" $store .) + else + (cd "$JARTMP/files" && $ZIP -r "$tmp_jarfile" $store . > /dev/null) + fi + test "$jarfile" = "$tmp_jarfile" || mv "$tmp_jarfile" "$jarfile" + ;; + + update) + make_tmp + $mkdir_p "$JARTMP"/files + make_files ${1+"$@"} + + # Same as above, but zip takes care of not overwriting the file + case $manifest_kind in + none) + $verbose && (eval $out_redirect; echo removing manifest) + $ZIP -d "$jarfile" META-INF/MANIFEST.MF > /dev/null 2>&1 || : + ;; + *) + make_manifest "$JARTMP"/files/META-INF/MANIFEST.MF $manifest_kind "$manifest_file" + ;; + esac + if $verbose; then + (eval $out_redirect; cd "$JARTMP"/files && $ZIP -ruv "$jarfile" $store .) + else + (cd "$JARTMP"/files && $ZIP -ru "$jarfile" $store . > /dev/null) + fi + ;; + + list) + # Everything's done in the functions + if $verbose; then + jar_list_verbose "$jarfile" + else + jar_list "$jarfile" + fi ;; +esac + +if test "$out_redirect" != :; then + # Cat back to stdout if necessary + case $mode in + create|update) cat "$JARTMP"/out/tmp-stdin.jar ;; + esac +fi +exit 0 Index: =================================================================== --- (nonexistent) +++ (revision 762) @@ -0,0 +1,79 @@ +# - Download IANA text and compute alias list. +# Assumes you are running this program from gnu/gcj/convert/. +# Output suitable for direct inclusion in + +# Map IANA canonical names onto our canonical names. +%map = ( + 'ANSI_X3.4-1968' => 'ASCII', + 'ISO_8859-1:1987' => '8859_1', + 'UTF-8' => 'UTF8', + 'Shift_JIS' => 'SJIS', + 'Extended_UNIX_Code_Packed_Format_for_Japanese' => 'EUCJIS', + 'UTF16-LE' => 'UnicodeLittle', + 'UTF16-BE' => 'UnicodeBig' + ); + +if ($ARGV[0] eq '') +{ + $file = 'character-sets'; + if (! -f $file) + { + # Too painful to figure out how to get Perl to do it. + system 'wget -o .wget-log'; + } +} +else +{ + $file = $ARGV[0]; +} + +# Include canonical names in the output. +foreach $key (keys %map) +{ + $output{lc ($key)} = $map{$key}; +} + +open (INPUT, "< $file") || die "couldn't open $file: $!"; + +$body = 0; +$current = ''; +while () +{ + chop; + $body = 1 if /^Name:/; + next unless $body; + + if (/^$/) + { + $current = ''; + next; + } + + ($type, $name) = split (/\s+/); + # Encoding names are case-insensitive. We do all processing on + # the lower-case form. + my $lower = lc ($name); + if ($type eq 'Name:') + { + $current = $map{$name}; + if ($current) + { + $output{$lower} = $current; + } + } + elsif ($type eq 'Alias:') + { + # The IANA list has some ugliness. + if ($name ne '' && $lower ne 'none' && $current) + { + $output{$lower} = $current; + } + } +} + +close (INPUT); + +foreach $key (sort keys %output) +{ + print " hash.put (\"$key\", \"$output{$key}\");\n"; +} Index: =================================================================== --- (nonexistent) +++ (revision 762) @@ -0,0 +1,41 @@ +// Show a value given class name and constant name. + +/* Copyright (C) 2000 Free Software Foundation + + This file is part of libgcj. + +This software is copyrighted work licensed under the terms of the +Libgcj License. Please consult the file "LIBGCJ_LICENSE" for +details. */ + +/* Written by Tom Tromey . */ + +// Use like this to print a `static final' value (integers only, not +// strings yet): +// java showval java.awt.geom.AffineTransform.TYPE_IDENTITY +// Prints result like: +// TYPE_IDENTITY = 0 +// In conjunction with a keyboard macro you can do a number of +// constants very easily. + +import java.lang.reflect.*; + +public class showval +{ + public static void main (String[] args) + { + int ch = args[0].lastIndexOf ('.'); + String className = args[0].substring (0, ch); + String constName = args[0].substring (ch + 1); + try + { + Class klass = Class.forName (className); + Field field = klass.getField (constName); + System.out.println (constName + " = " + field.getInt (null)); + } + catch (Throwable _) + { + System.out.println (_); + } + } +} Index: =================================================================== --- (nonexistent) +++ (revision 762) @@ -0,0 +1,146 @@ +#!/usr/bin/perl -w +# - script to generate database for java.text.Collator +# Copyright (C) 1998, 1999, 2002 Free Software Foundation, Inc. +# +# This file is part of libjava. +# +# This software is copyrighted work licensed under the terms of the +# Libjava License. Please consult the file "LIBJAVA_LICENSE" for +# details. + +# Code for reading UnicodeData.txt and generating the code for +# For now, the relevant Unicode definition files +# are found in libjava/gnu/gcj/convert/. +# +# Usage: ./ [-n] +# where is obtained from (named +# UnicodeData-3.0.0.txt for Unicode version 3.0.0), and +# is the final location of include/java-chardecomp.h. +# As of JDK 1.4, use Unicode version 3.0.0 for best results. +# +# If this exits with nonzero status, then you must investigate the +# cause of the problem. +# Diagnostics and other information to stderr. +# With -n, the files are not created, but all processing still occurs. + +# These maps characters to their decompositions. +my %canonical_decomposition = (); +my %full_decomposition = (); + +# Handle `-n' and open output files. +if ($ARGV[0] && $ARGV[0] eq '-n') +{ + shift @ARGV; + $ARGV[1] = '/dev/null'; +} +die "Usage: $0 " unless @ARGV == 2; +open (UNICODE, "< $ARGV[0]") || die "Can't open Unicode attribute file: $!\n"; + +# Process the Unicode file. +$| = 1; +my $count = 0; +print STDERR "Parsing attributes file"; +while () +{ + print STDERR "." unless $count++ % 1000; + chomp; + s/\r//g; + my ($ch, undef, undef, undef, undef, $decomp) = split ';'; + $ch = hex($ch); + + if ($decomp ne '') + { + my $is_full = 0; + my @decomp = (); + foreach (split (' ', $decomp)) + { + if (/^\<.*\>$/) + { + $is_full = 1; + next; + } + push (@decomp, hex ($_)); + } + my $s = pack "n*", @decomp; + if ($is_full) + { + $full_decomposition{$ch} = $s; + } + else + { + $canonical_decomposition{$ch} = $s; + } + } +} + +# Now generate decomposition tables. +open DECOMP, "> $ARGV[1]" or die "Can't open output file: $!\n"; +print STDERR "\nGenerating tables\n"; +print DECOMP < Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: =================================================================== --- (nonexistent) +++ (revision 762) @@ -0,0 +1,212 @@ +/* - converts into + include/java-chartables.h + Copyright (C) 2002 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +import; + +public class MakeCharTables implements CharData +{ + public static void main(String[] args) + { + System.out.println("/* java-chartables.h -- Character tables for java.lang.Character -*- c++ -*-\n" + + " Copyright (C) 2002 Free Software Foundation, Inc.\n" + + " *** This file is generated by scripts/ ***\n" + + "\n" + + "This file is part of GNU Classpath.\n" + + "\n" + + "GNU Classpath is free software; you can redistribute it and/or modify\n" + + "it under the terms of the GNU General Public License as published by\n" + + "the Free Software Foundation; either version 2, or (at your option)\n" + + "any later version.\n" + + "\n" + + "GNU Classpath is distributed in the hope that it will be useful, but\n" + + "WITHOUT ANY WARRANTY; without even the implied warranty of\n" + + "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n" + + "General Public License for more details.\n" + + "\n" + + "You should have received a copy of the GNU General Public License\n" + + "along with GNU Classpath; see the file COPYING. If not, write to the\n" + + "Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA\n" + + "02110-1301 USA.\n" + + "\n" + + "Linking this library statically or dynamically with other modules is\n" + + "making a combined work based on this library. Thus, the terms and\n" + + "conditions of the GNU General Public License cover the whole\n" + + "combination.\n" + + "\n" + + "As a special exception, the copyright holders of this library give you\n" + + "permission to link this library with independent modules to produce an\n" + + "executable, regardless of the license terms of these independent\n" + + "modules, and to copy and distribute the resulting executable under\n" + + "terms of your choice, provided that you also meet, for each linked\n" + + "independent module, the terms and conditions of the license of that\n" + + "module. An independent module is a module which is not derived from\n" + + "or based on this library. If you modify this library, you may extend\n" + + "this exception to your version of the library, but you are not\n" + + "obligated to do so. If you do not wish to do so, delete this\n" + + "exception statement from your version. */\n" + + "\n" + + "#ifndef __JAVA_CHARTABLES_H__\n" + + "#define __JAVA_CHARTABLES_H__\n" + + "\n" + + "// These tables are automatically generated by scripts/\n" + + "// This is in turn parsing, which is generated by\n" + + "// scripts/ The Unicode data comes from\n" + + "//; this header is based on\n" + + "// " + SOURCE + ". JDK 1.4 uses Unicode version 3.0.0.\n" + + "// DO NOT EDIT the tables. Instead, fix the upstream scripts and run\n" + + "// them again.\n" + + "\n" + + "// The data is stored in C style arrays of the appropriate CNI types, to\n" + + "// guarantee that the data is constant and non-relocatable. The field\n" + + "// blocks stores the offset of a block of 2\n" + + "// characters within data. The data field, in turn, stores\n" + + "// information about each character in the low order bits, and an offset\n" + + "// into the attribute tables upper, lower,\n" + + "// numValue, and direction. Notice that the\n" + + "// attribute tables are much smaller than 0xffff entries; as many characters\n" + + "// in Unicode share common attributes. Finally, there is a listing for\n" + + "// title exceptions (most characters just have the same title\n" + + "// case as upper case).\n" + + "\n" + + "// This file should only be included by\n" + + "\n" + + "/**\n" + + " * The character shift amount to look up the block offset. In other words,\n" + + " * (char) (blocks[ch >> SHIFT] + ch) is the index where\n" + + " * ch is described in data.\n" + + " */\n" + + "#define SHIFT " + SHIFT); + + convertString("/**\n" + + " * The mapping of character blocks to their location in data.\n" + + " * Each entry has been adjusted so that a modulo 16 sum with the desired\n" + + " * character gives the actual index into data.\n" + + " */", + char.class, "blocks", BLOCKS); + + convertString("/**\n" + + " * Information about each character. The low order 5 bits form the\n" + + " * character type, the next bit is a flag for non-breaking spaces, and the\n" + + " * next bit is a flag for mirrored directionality. The high order 9 bits\n" + + " * form the offset into the attribute tables. Note that this limits the\n" + + " * number of unique character attributes to 512, which is not a problem\n" + + " * as of Unicode version 3.2.0, but may soon become one.\n" + + " */", + char.class, "data", DATA); + + convertString("/**\n" + + " * This is the attribute table for computing the numeric value of a\n" + + " * character. The value is -1 if Unicode does not define a value, -2\n" + + " * if the value is not a positive integer, otherwise it is the value.\n" + + " */", + short.class, "numValue", NUM_VALUE); + + convertString("/**\n" + + " * This is the attribute table for computing the uppercase representation\n" + + " * of a character. The value is the difference between the character and\n" + + " * its uppercase version.\n" + + " */", + short.class, "upper", UPPER); + + convertString("/**\n" + + " * This is the attribute table for computing the lowercase representation\n" + + " * of a character. The value is the difference between the character and\n" + + " * its lowercase version.\n" + + " */", + short.class, "lower", LOWER); + + convertString("/**\n" + + " * This is the attribute table for computing the directionality class\n" + + " * of a character. At present, the value is in the range 0 - 18 if the\n" + + " * character has a direction, otherwise it is -1.\n" + + " */", + byte.class, "direction", DIRECTION); + + convertString("/**\n" + + " * This is the listing of titlecase special cases (all other character\n" + + " * can use upper to determine their titlecase). The listing\n" + + " * is a sequence of character pairs; converting the first character of the\n" + + " * pair to titlecase produces the second character.\n" + + " */", + char.class, "title", TITLE); + + System.out.println(); + System.out.println("#endif /* __JAVA_CHARTABLES_H__ */"); + } + + private static void convertString(String header, Class type, + String name, String field) + { + System.out.println(); + System.out.println(header); + System.out.println("static const j" + type.getName() + " " + name + + "[] = {"); + char[] data = field.toCharArray(); + int wrap; + if (type == char.class) + wrap = 10; + else if (type == byte.class) + wrap = 21; + else if (type == short.class) + wrap = 13; + else + throw new Error("Unexpeced type"); + for (int i = 0; i < data.length; i += wrap) + { + System.out.print(" "); + for (int j = 0; j < wrap; j++) + { + if (i + j >= data.length) + break; + System.out.print(" "); + if (type == char.class) + System.out.print((int) data[i + j]); + else if (type == byte.class) + System.out.print((byte) data[i + j]); + else if (type == short.class) + System.out.print((short) data[i + j]); + System.out.print(","); + } + System.out.println(); + } + System.out.println(" };\n" + + "/** Length of " + name + ". */\n" + + "static const int " + name + "_length = " + + data.length + ";"); + } +} Index: =================================================================== --- (nonexistent) +++ (revision 762) @@ -0,0 +1,802 @@ +#!/usr/bin/perl -w +# -- generate Unicode database for java.lang.Character +# Copyright (C) 1998, 2002, 2004, 2006 Free Software Foundation, Inc. +# +# This file is part of GNU Classpath. +# +# GNU Classpath is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# GNU Classpath is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU Classpath; see the file COPYING. If not, write to the +# Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +# 02110-1301 USA. +# +# Linking this library statically or dynamically with other modules is +# making a combined work based on this library. Thus, the terms and +# conditions of the GNU General Public License cover the whole +# combination. +# +# As a special exception, the copyright holders of this library give you +# permission to link this library with independent modules to produce an +# executable, regardless of the license terms of these independent +# modules, and to copy and distribute the resulting executable under +# terms of your choice, provided that you also meet, for each linked +# independent module, the terms and conditions of the license of that +# module. An independent module is a module which is not derived from +# or based on this library. If you modify this library, you may extend +# this exception to your version of the library, but you are not +# obligated to do so. If you do not wish to do so, delete this +# exception statement from your version. + +# Code for reading UnicodeData-4.0.0.txt and SpecialCasing-4.0.0.txt to generate +# the code for java-chartables.h. The relevant files can be found here: +# +# +# +# +# Inspired by code from Jochen Hoenicke. +# author Eric Blake +# Unicode 4.0.0 support by Anthony Balkissoon +# +# Usage: ./ +# where is obtained from (named +# UnicodeData-4.0.0.txt for Unicode version 4.0.0), +# is obtained from www.unicode too (named SpecialCasing-4.0.0.txt for Unicode +# version 4.0.0), and is the final location for the header file +# java-chartables.h. As of JDK 1.5, use Unicode version 4.0.0 +# for best results. + + +## +## Return the given variable interpreted as a 16 bit signed number. +## +sub cShort($) { + my ($char) = @_; + return unpack "s", pack "I", $char; +} + +## +## Convert the text UnicodeData file from into a header file +## interface with arrays holding the compressed information. +## +my @TYPECODES = qw(Cn Lu Ll Lt Lm Lo Mn Me Mc Nd Nl No Zs Zl Zp Cc Cf + SKIPPED Co Cs Pd Ps Pe Pc Po Sm Sc Sk So Pi Pf); +my @DIRCODES = qw(L R AL EN ES ET AN CS NSM BN B S WS ON LRE LRO RLE RLO PDF); + +my $NOBREAK_FLAG = 32; +my $MIRRORED_FLAG = 64; + +my %special = (); + +# infoArray is an array where each element is a list of character information +# for characters in a plane. The index of each list is equal to the plane +# that it corresponds to even though most of these lists will currently be +# empty. This is done so that that this script can be easily modified to +# accomodate future versions of Unicode. +my @infoArray = \((), (), (), (), (), (), (), (), + (), (), (), (), (), (), (), (), ()); + +# info is a reference to one of the lists in infoArray, depending on which +# plane we're currently parsing. +my $info; + +# largeNums is an array of numerical values that are too large to fit +# into the 16 bit char where most numerical values are stored. +# What is stored in the char then is a number N such that (-N - 3) is +# the index into largeNums where the numerical value can be found. +my @largeNums = (); + +my $titlecase = ""; +my $count = 0; +my $range = 0; + +die "Usage: $0 " + unless @ARGV == 3; +$| = 1; +print "GNU Classpath Unicode Attribute Database Generator 2.1\n"; +print "Copyright (C) 1998, 2002 Free Software Foundation, Inc.\n"; + + +################################################################################ +################################################################################ +# Stage 0: Parse the special casing file +print "Parsing special casing file\n"; +open (SPECIAL, "< $ARGV[1]") || die "Can't open special casing file: $!\n"; +while () { + next if /^\#/; + my ($ch, undef, undef, $upper) = split / *; */; + + # This grabs only the special casing for multi-char uppercase. Note that + # there are no multi-char lowercase, and that Sun ignores multi-char + # titlecase rules. This script omits 3 special cases in Unicode 3.0.0, + # which must be hardcoded in java.lang.String: + # \u03a3 (Sun ignores this special case) + # \u0049 - lowercases to \u0131, but only in Turkish locale + # \u0069 - uppercases to \u0130, but only in Turkish locale + next unless defined $upper and $upper =~ / /; + $special{hex $ch} = [map {hex} split ' ', $upper]; +} + +close SPECIAL; + + +################################################################################ +################################################################################ +## Stage 1: Parse the attribute file +print "Parsing attributes file"; +open (UNICODE, "< $ARGV[0]") || die "Can't open Unicode attribute file: $!\n"; +while () { + print "." unless $count++ % 1000; + chomp; + s/\r//g; + my ($ch, $name, $category, undef, $bidir, $decomp, undef, undef, $numeric, + $mirrored, undef, undef, $upcase, $lowcase, $title) = split ';'; + $ch = hex($ch); + + # plane tells us which Unicode code plane we're currently in and is an + # index into infoArray. + my $plane = int($ch / 0x10000); + my $planeBase = $plane * 0x10000; + $info = \@{$infoArray[$plane]}; + + my ($type, $numValue, $upperchar, $lowerchar, $direction); + + $type = 0; + while ($category !~ /^$TYPECODES[$type]$/) { + if (++$type == @TYPECODES) { + die "$ch: Unknown type: $category"; + } + } + $type |= $NOBREAK_FLAG if ($decomp =~ /noBreak/); + $type |= $MIRRORED_FLAG if ($mirrored =~ /Y/); + + if ($numeric =~ /^[0-9]+$/) { + $numValue = $numeric; + # If numeric takes more than 16 bits to store we want to store that + # number in a separate array and store a number N in numValue such + # that (-N - 3) is the offset into the separate array containing the + # large numerical value. + if ($numValue >= 0x7fff) { + $numValue = -3 - @largeNums; + push @largeNums, $numeric; + } + } elsif ($numeric eq "") { + # Special case sequences of 'a'-'z' + if ($ch >= 0x0041 && $ch <= 0x005a) { + $numValue = $ch - 0x0037; + } elsif ($ch >= 0x0061 && $ch <= 0x007a) { + $numValue = $ch - 0x0057; + } elsif ($ch >= 0xff21 && $ch <= 0xff3a) { + $numValue = $ch - 0xff17; + } elsif ($ch >= 0xff41 && $ch <= 0xff5a) { + $numValue = $ch - 0xff37; + } else { + $numValue = -1; + } + } else { + $numValue = -2; + } + + $upperchar = $upcase ? hex($upcase) - $ch : 0; + $lowerchar = $lowcase ? hex($lowcase) - $ch : 0; + if ($title ne $upcase) { + my $titlechar = $title ? hex($title) : $ch; + $titlecase .= pack("n2", $ch, $titlechar); + } + + $direction = 0; + while ($bidir !~ /^$DIRCODES[$direction]$/) { + if (++$direction == @DIRCODES) { + $direction = -1; + last; + } + } + $direction <<= 2; + $direction += $#{$special{$ch}} if defined $special{$ch}; + + if ($range) { + die "Expecting end of range at $ch\n" unless $name =~ /Last>$/; + for ($range + 1 .. $ch - 1) { + $info->[$_ - $planeBase] = pack("n5", $type, $numValue, $upperchar, + $lowerchar, $direction); + } + $range = 0; + } elsif ($name =~ /First>$/) { + $range = $ch; + } + # Store all this parsed information into the element in infoArray that info + # points to. + $info->[$ch - $planeBase] = pack("n5", $type, $numValue, $upperchar, $lowerchar, + $direction); +} +close UNICODE; + + +################################################################################ +################################################################################ +## Stage 2: Compress the data structures +printf "\nCompressing data structures"; +$count = 0; + +# data is a String that will be used to create the DATA String containing +# character information and offsets into the attribute tables. +my @data = (); + +# charhashArray is an array of hashtables used so that we can reuse character +# attributes when characters share the same attributes ... this makes our +# attribute tables smaller. charhash is a pointer into this array. +my @charhashArray = ({}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}); +my $charhash = (); + +# charinfoArray is an array of arrays, one per plane, for storing character +# information. charinfo is a pointer into this array. +my @charinfoArray = \((), (), (), (), (), (), (), (), + (), (), (), (), (), (), (), (), ()); +my $charinfo; + +# charlen is an array, one element per plane, that tells us how many unique +# character attributes there are for that plane. +my @charlen = (); + +for my $plane (0 .. 0x10) { + $info = \@{$infoArray[$plane]}; + my $planeBase = $plane * 0x10000; + $charhash = \%{$charhashArray[$plane]}; + $charinfo = \@{$charinfoArray[$plane]}; + + for my $ch ($planeBase .. $planeBase + 0xffff) { + my $index = $ch - $planeBase; + print "." unless $count++ % 0x1000; + $info->[$index] = pack("n5", 0, -1, 0, 0, -4) unless defined $info->[$index]; + + my ($type, $numVal, $upper, $lower, $direction) = unpack("n5", $info->[$index]); + if (! exists $charhash->{$info->[$index]}) { + # If we entered this loop that means the character we're looking at + # now has attributes that are unique from those that we've looked + # at so far for this plane. So we push its attributes into charinfo + # and store in charhash the offset into charinfo where these + # attributes can later be found. + push @{$charinfo}, [ $numVal, $upper, $lower, $direction ]; + $charhash->{$info->[$index]} = @{$charinfo} - 1; + # When the file is generaged, the number we just stored in charhas + # will be the upper 9 bits in the DATA String that are an offset + # into the attribute tables. + } + $data[$plane] .= pack("n", ($charhash->{$info->[$index]} << 7) | $type); + } + $charlen[$plane] = scalar(@{$charinfoArray[$plane]}); +} + +# the shift that results in the best compression of the table. This is an array +# because different shifts are better for the different tables for each plane. +my @bestshift; + +# an initial guess. +my $bestest = 1000000; +my @bestblkstr; +my @blksize = (); + +for my $plane (0 .. 0x10) { + print "\n\nplane: $plane\n"; + print "Unique character entries: $charlen[$plane]\n"; + $bestest = 1000000; + for my $i (3 .. 8) { + my $blksize = 1 << $i; + my %blocks = (); + my @blkarray = (); + my ($j, $k); + print "shift: $i"; + + for ($j = 0; $j < 0x10000; $j += $blksize) { + my $blkkey = substr $data[$plane], 2 * $j, 2 * $blksize; + if (! exists $blocks{$blkkey}) { + push @blkarray, $blkkey; + $blocks{$blkkey} = $#blkarray; + } + } + + my $blknum = @blkarray; + my $blocklen = $blknum * $blksize; + printf " before %5d", $blocklen; + + # Now we try to pack the blkarray as tight as possible by finding matching + # heads and tails. + for ($j = $blksize - 1; $j > 0; $j--) { + my %tails = (); + for $k (0 .. $#blkarray) { + next unless defined $blkarray[$k]; + my $len = length $blkarray[$k]; + my $tail = substr $blkarray[$k], $len - $j * 2; + if (exists $tails{$tail}) { + push @{$tails{$tail}}, $k; + } else { + $tails{$tail} = [ $k ]; + } + } + + # tails are calculated, now calculate the heads and merge. + BLOCK: + for $k (0 .. $#blkarray) { + next unless defined $blkarray[$k]; + my $tomerge = $k; + while (1) { + my $head = substr($blkarray[$tomerge], 0, $j * 2); + my $entry = $tails{$head}; + next BLOCK unless defined $entry; + + my $other = shift @{$entry}; + if ($other == $tomerge) { + if (@{$entry}) { + push @{$entry}, $other; + $other = shift @{$entry}; + } else { + push @{$entry}, $other; + next BLOCK; + } + } + if (@{$entry} == 0) { + delete $tails{$head}; + } + + # a match was found + my $merge = $blkarray[$other] + . substr($blkarray[$tomerge], $j * 2); + $blocklen -= $j; + $blknum--; + + if ($other < $tomerge) { + $blkarray[$tomerge] = undef; + $blkarray[$other] = $merge; + my $len = length $merge; + my $tail = substr $merge, $len - $j * 2; + $tails{$tail} = [ map { $_ == $tomerge ? $other : $_ } + @{$tails{$tail}} ]; + next BLOCK; + } + $blkarray[$tomerge] = $merge; + $blkarray[$other] = undef; + } + } + } + my $blockstr; + for $k (0 .. $#blkarray) { + $blockstr .= $blkarray[$k] if defined $blkarray[$k]; + } + + die "Unexpected $blocklen" if length($blockstr) != 2 * $blocklen; + my $estimate = 2 * $blocklen + (0x20000 >> $i); + + printf " after merge %5d: %6d bytes\n", $blocklen, $estimate; + if ($estimate < $bestest) { + $bestest = $estimate; + $bestshift[$plane] = $i; + $bestblkstr[$plane] = $blockstr; + } + } + $blksize[$plane] = 1 << $bestshift[$plane]; + print "best shift: ", $bestshift[$plane]; + print " blksize: ", $blksize[$plane]; +} +my @blocksArray = \((), (), (), (), (), (), (), (), + (), (), (), (), (), (), (), (), ()); + +for my $plane (0 .. 0x10) { + for (my $j = 0; $j < 0x10000; $j += $blksize[$plane]) { + my $blkkey = substr $data[$plane], 2 * $j, 2 * $blksize[$plane]; + my $index = index $bestblkstr[$plane], $blkkey; + while ($index & 1) { + die "not found: $j" if $index == -1; + $index = index $bestblkstr[$plane], $blkkey, $index + 1; + } + push @{$blocksArray[$plane]}, ($index / 2 - $j) & 0xffff; + } +} + + +################################################################################ +################################################################################ +## Stage 3: Generate the file +for my $plane (0 .. 0x10) { + die "UTF-8 limit of blocks may be exceeded for plane $plane: " . scalar(@{$blocksArray[$plane]}) . "\n" + if @{$blocksArray[$plane]} > 0xffff / 3; + die "UTF-8 limit of data may be exceeded for plane $plane: " . length($bestblkstr[$plane]) . "\n" + if length($bestblkstr[$plane]) > 0xffff / 3; +} + +{ + print "\nGenerating $ARGV[2]."; + my ($i, $j); + + open OUTPUT, "> $ARGV[2]" or die "Failed creating output file: $!\n"; + print OUTPUT <blocks stores the offset of a block of 2SHIFT +// characters within data. The data field, in turn, stores +// information about each character in the low order bits, and an offset +// into the attribute tables upper, lower, +// numValue, and direction. Notice that the +// attribute tables are much smaller than 0xffff entries; as many characters +// in Unicode share common attributes. Finally, there is a listing for +// title exceptions (most characters just have the same title +// case as upper case). + +// This file should only be included by + +/** + * The array containing the numeric values that are too large to be stored as + * chars in NUM_VALUE. NUM_VALUE in this case will contain a negative integer + * N such that LARGENUMS[-N - 3] contains the correct numeric value. + */ +EOF + print OUTPUT "static const jint largenums[] = {\n "; + for ($i = 0; $i < @largeNums; $i++) { + print OUTPUT $largeNums[$i], ", "; + } + print OUTPUT "}"; + print OUTPUT <(char) (blocks[p][off >> SHIFT[p]] + off) is the index where + * ch is described in data, where off + * is ch & 0xffff and p is the plane the character belongs to. + */ +EOF + print OUTPUT "static const int shift[] = {\n "; + for ($i = 0; $i < @bestshift; $i++) { + print OUTPUT $bestshift[$i], ", "; + } + print OUTPUT "}"; + print OUTPUT <data. + * Each entry has been adjusted so that a modulo 16 sum with the desired + * character gives the actual index into data. + */ +EOF + for ($plane = 0; $plane <= 0x10; $plane++) { + # The following if statement handles the cases of unassigned planes + # specially so we don't waste space with unused Strings. As of + # Unicode version 4.0.0 only planes 0, 1, 2, and 14 are used. If + # you are updating this script to work with a later version of + # Unicode you may have to alter this if statement. + next if ($plane > 2 && $plane != 14) ; + + print OUTPUT "static const jchar blocks", $plane, "[] = {\n"; + for ($i = 0; $i < @{$blocksArray[$plane]} / 10; $i++) { + print OUTPUT " "; + for $j (0 .. 9) { + last if @{$blocksArray[$plane]} <= $i * 10 + $j; + my $val = $blocksArray[$plane]->[$i * 10 + $j]; + print OUTPUT $val, ", "; + } + print OUTPUT "\n"; + } + print OUTPUT "};\n\n"; + } + print OUTPUT "static const int blocks_length[] = {\n "; + for ($plane = 0; $plane <= 0x10; $plane++) { + if ($plane > 2 && $plane != 14){ + print OUTPUT "-1, "; + } + else { + print OUTPUT scalar(@{$blocksArray[$plane]}), ", "; + } + } + print OUTPUT "};\n"; + print OUTPUT < 2 && $plane != 14); + + print OUTPUT "static const jchar data", $plane, "[] = {\n"; + my $len = length($bestblkstr[$plane]) / 2; + for ($i = 0; $i < $len / 10; $i++) { + print OUTPUT " "; + for $j (0 .. 9) { + last if $len <= $i * 10 + $j; + my $val = unpack "n", substr($bestblkstr[$plane], 2 * ($i * 10 + $j), 2); + print OUTPUT $val, ", "; + } + print OUTPUT "\n"; + } + print OUTPUT "};\n\n"; + } + print OUTPUT "static const int data_length[] = {\n "; + for ($plane = 0; $plane <= 0x10; $plane++) { + if ($plane > 2 && $plane != 14){ + print OUTPUT "-1, "; + } + else { + print OUTPUT length($bestblkstr[$plane]) / 2, ", "; + } + } + print OUTPUT "};\n"; + print OUTPUT < 2 && $plane != 14); + + print OUTPUT "static const jshort numValue", $plane, "[] = {\n"; + $len = @{$charinfoArray[$plane]}; + for ($i = 0; $i < $len / 13; $i++) { + print OUTPUT " "; + for $j (0 .. 12) { + last if $len <= $i * 13 + $j; + my $val = $charinfoArray[$plane]->[$i * 13 + $j][0]; + print OUTPUT cShort($val), ", "; + } + print OUTPUT "\n"; + } + print OUTPUT "};\n\n"; + } + print OUTPUT "static const int numValue_length[] = {\n "; + for ($plane = 0; $plane <= 0x10; $plane++) { + if ($plane > 2 && $plane != 14){ + print OUTPUT "-1, "; + } + else { + print OUTPUT scalar(@{$charinfoArray[$plane]}), ", "; + } + } + print OUTPUT "};\n"; + print OUTPUT < 2 && $plane != 14); + + print OUTPUT "static const jshort upper", $plane, "[] = {\n"; + $len = @{$charinfoArray[$plane]}; + for ($i = 0; $i < $len / 13; $i++) { + print OUTPUT " "; + for $j (0 .. 12) { + last if $len <= $i * 13 + $j; + my $val = $charinfoArray[$plane]->[$i * 13 + $j][1]; + print OUTPUT cShort($val), ", "; + } + print OUTPUT "\n"; + } + print OUTPUT "};\n\n"; + } + print OUTPUT "static const int upper_length[] = {\n "; + for ($plane = 0; $plane <= 0x10; $plane++) { + if ($plane > 2 && $plane != 14){ + print OUTPUT "-1, "; + } + else { + print OUTPUT scalar(@{$charinfoArray[$plane]}), ", "; + } + } + print OUTPUT "};\n"; + print OUTPUT < 2 && $plane != 14); + + print OUTPUT "static const jshort lower", $plane, "[] = {\n"; + $len = @{$charinfoArray[$plane]}; + for ($i = 0; $i < $len / 13; $i++) { + print OUTPUT " "; + for $j (0 .. 12) { + last if $len <= $i * 13 + $j; + my $val = $charinfoArray[$plane]->[$i * 13 + $j][2]; + print OUTPUT cShort($val), ", "; + } + print OUTPUT "\n"; + } + print OUTPUT "};\n\n"; + } + print OUTPUT "static const int lower_length[] = {\n "; + for ($plane = 0; $plane <= 0x10; $plane++) { + if ($plane > 2 && $plane != 14){ + print OUTPUT "-1, "; + } + else { + print OUTPUT scalar(@{$charinfoArray[$plane]}), ", "; + } + } + print OUTPUT "};\n"; + print OUTPUT < 2 && $plane != 14); + + print OUTPUT "static const jbyte direction", $plane, "[] = {\n"; + $len = @{$charinfoArray[$plane]}; + for ($i = 0; $i < $len / 19; $i++) { + print OUTPUT " "; + for $j (0 .. 18) { + last if $len <= $i * 19 + $j; + my $val = $charinfoArray[$plane]->[$i * 19 + $j][3]; + $val >>= 2; + if ($val < 0 || $val > 18){ + $val = -1; + } + print OUTPUT cShort($val), ", "; + } + print OUTPUT "\n"; + } + print OUTPUT "};\n\n"; + } + print OUTPUT "static const int direction_length[] = {\n "; + for ($plane = 0; $plane <= 0x10; $plane++) { + if ($plane > 2 && $plane != 14){ + print OUTPUT "-1, "; + } + else { + print OUTPUT scalar(@{$charinfoArray[$plane]}), ", "; + } + } + print OUTPUT "};\n"; + print OUTPUT <upper to determine their titlecase). The listing + * is a sequence of character pairs; converting the first character of the + * pair to titlecase produces the second character. + */ +static const jchar title[] = { +EOF + + $len = length($titlecase) / 2; + for ($i = 0; $i < $len / 10; $i++) { + print OUTPUT $i ? "\n " : " "; + for $j (0 .. 9) { + last if $len <= $i * 10 + $j; + my $val = unpack "n", substr($titlecase, 2 * ($i * 10 + $j), 2); + print OUTPUT $val, ", "; + } + } + + print OUTPUT "\n };"; + print OUTPUT "\n/** Length of title. */\nstatic const int title_length = ", $len; + print OUTPUT < Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: =================================================================== --- (nonexistent) +++ (revision 762) @@ -0,0 +1,93 @@ +# - A perl program to generate most of the contents of +# javaprims.h automatically. + +# Copyright (C) 1998, 1999, 2000, 2002, 2005, 2006, 2007 Free Software +# Foundation +# +# This file is part of libgcj. +# +# This software is copyrighted work licensed under the terms of the +# Libgcj License. Please consult the file "LIBGCJ_LICENSE" for +# details. + +# Usage: cd /classpath/lib ; perl ../../scripts/ + +use DirHandle; + +if (-d 'java') +{ + # Ok here. +} +elsif (-d '../java') +{ + chdir ('..'); +} +else +{ + die "couldn't find java directory\n"; +} + +&scan ('java', 2); + +exit 0; + +sub scan +{ + local ($dir, $indent) = @_; + local (@subdirs) = (); + local (%classes) = (); + + local ($d) = new DirHandle $dir; + local (*JFILE); + local ($name); + if (defined $d) + { + while (defined ($name = $d->read)) + { + next if $name eq 'CVS'; + next if $name eq '.svn'; + next if $name eq '.'; + next if $name eq '..'; + if ($dir eq 'java' + && $name ne 'lang' + && $name ne 'util' + && $name ne 'io') + { + # We only generate decls for java.lang,, and + # java.util. + next; + } + if (-d ($dir . '/' . $name)) + { + push (@subdirs, $name); + next; + } + next unless $name =~ s/\.class$//; + $classes{$name} = 1; + } + + undef $d; + } + + local ($spaces) = ' ' x $indent; + local ($classname); + ($classname = $dir) =~ s/^.*\///; + print $spaces, "namespace ", $classname, "\n"; + print $spaces, "{\n"; + + foreach (sort keys %classes) + { + print $spaces, " class ", $_, ";\n"; + } + print "\n" if scalar @classes > 0 && scalar @subdirs > 0; + + local ($first) = 1; + foreach (sort @subdirs) + { + print "\n" unless $first; + $first = 0; + &scan ("$dir/$_", $indent + 2); + } + + print $spaces, "}\n"; +} Index: =================================================================== --- (nonexistent) +++ (revision 762) @@ -0,0 +1,210 @@ +#!/usr/bin/perl -w +# -- Script to generate java.lang.Character.UnicodeBlock +# Copyright (C) 2002 Free Software Foundation, Inc. +# +# This file is part of GNU Classpath. +# +# GNU Classpath is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# GNU Classpath is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU Classpath; see the file COPYING. If not, write to the +# Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +# 02110-1301 USA. +# +# Linking this library statically or dynamically with other modules is +# making a combined work based on this library. Thus, the terms and +# conditions of the GNU General Public License cover the whole +# combination. +# +# As a special exception, the copyright holders of this library give you +# permission to link this library with independent modules to produce an +# executable, regardless of the license terms of these independent +# modules, and to copy and distribute the resulting executable under +# terms of your choice, provided that you also meet, for each linked +# independent module, the terms and conditions of the license of that +# module. An independent module is a module which is not derived from +# or based on this library. If you modify this library, you may extend +# this exception to your version of the library, but you are not +# obligated to do so. If you do not wish to do so, delete this +# exception statement from your version. + + +# Code for reading Blocks.txt and generating (to standard out) the code for +# java.lang.Character.UnicodeBlock, for pasting into java/lang/ +# You should probably check that the results are accurate to the +# specification, but I made sure it works OOB for Unicode 3.0.0 and JDK 1.4. +# As the grammar for the Blocks.txt file is changing in Unicode 3.2.0, you +# will have to tweak this some for future use. For now, the relevant +# Unicode definition files are found in libjava/gnu/gcj/convert/. +# +# author Eric Blake +# +# usage: +# where is obtained from (named Blocks-3.txt +# for Unicode version 3.0.0). + + +die "Usage: $0 " unless @ARGV == 1; +open (BLOCKS, $ARGV[0]) || die "Can't open Unicode block file: $!\n"; + +# A hash of added fields and the JDK they were added in, to automatically +# print @since tags. Maintaining this is optional (and tedious), but nice. +my %additions = ("SYRIAC" => "1.4", + "THAANA" => "1.4", + "SINHALA" => "1.4", + "MYANMAR" => "1.4", + "ETHIOPIC" => "1.4", + "CHEROKEE" => "1.4", + "UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS" => "1.4", + "OGHAM" => "1.4", + "RUNIC" => "1.4", + "KHMER" => "1.4", + "MONGOLIAN" => "1.4", + "BRAILLE_PATTERNS" => "1.4", + "CJK_RADICALS_SUPPLEMENT" => "1.4", + "KANGXI_RADICALS" => "1.4", + "IDEOGRAPHIC_DESCRIPTION_CHARACTERS" => "1.4", + "BOPOMOFO_EXTENDED" => "1.4", + "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A" => "1.4", + "YI_SYLLABLES" => "1.4", + "YI_RADICALS" => "1.4", + ); + +print <<'EOF'; + /** + * A family of character subsets in the Unicode specification. A character + * is in at most one of these blocks. + * + * This inner class was generated automatically from + * $ARGV[0], by some perl scripts. + * This Unicode definition file can be found on the + * website. + * JDK 1.4 uses Unicode version 3.0.0. + * + * @author scripts/ (written by Eric Blake) + * @since 1.2 + */ + public static final class UnicodeBlock extends Subset + { + /** The start of the subset. */ + private final char start; + + /** The end of the subset. */ + private final char end; + + /** + * Constructor for strictly defined blocks. + * + * @param start the start character of the range + * @param end the end character of the range + * @param name the block name + */ + private UnicodeBlock(char start, char end, String name) + { + super(name); + this.start = start; + this.end = end; + } + + /** + * Returns the Unicode character block which a character belongs to. + * + * @param ch the character to look up + * @return the set it belongs to, or null if it is not in one + */ + public static UnicodeBlock of(char ch) + { + // Special case, since SPECIALS contains two ranges. + if (ch == '\uFEFF') + return SPECIALS; + // Simple binary search for the correct block. + int low = 0; + int hi = sets.length - 1; + while (low <= hi) + { + int mid = (low + hi) >> 1; + UnicodeBlock b = sets[mid]; + if (ch < b.start) + hi = mid - 1; + else if (ch > b.end) + low = mid + 1; + else + return b; + } + return null; + } +EOF + +my $seenSpecials = 0; +my $seenSurrogates = 0; +my $surrogateStart = 0; +my @names = (); +while () { + next if /^\#/; + my ($start, $end, $block) = split(/; /); + next unless defined $block; + chomp $block; + $block =~ s/ *$//; + if (! $seenSpecials and $block =~ /Specials/) { + # Special case SPECIALS, since it is two disjoint ranges + $seenSpecials = 1; + next; + } + if ($block =~ /Surrogates/) { + # Special case SURROGATES_AREA, since it one range, not three + # consecutive, in Java + $seenSurrogates++; + if ($seenSurrogates == 1) { + $surrogateStart = $start; + next; + } elsif ($seenSurrogates == 2) { + next; + } else { + $start = $surrogateStart; + $block = "Surrogates Area"; + } + } + # Special case the name of PRIVATE_USE_AREA. + $block =~ s/(Private Use)/$1 Area/; + + (my $name = $block) =~ tr/a-z -/A-Z__/; + push @names, $name; + my $since = (defined $additions{$name} + ? "\n * \@since $additions{$name}" : ""); + my $extra = ($block =~ /Specials/ ? "'\\uFEFF', " : ""); + print < Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: makemake.tcl =================================================================== --- makemake.tcl (nonexistent) +++ makemake.tcl (revision 762) @@ -0,0 +1,577 @@ +#!/usr/bin/tclsh + +# Helper to enforce array-ness. +proc makearray {name} { + upvar $name ary + set ary(_) 1 + unset ary(_) +} + +global is_verbose +set is_verbose 0 + +# Verbose printer. +proc verbose {text} { + global is_verbose + if {$is_verbose} { + puts stderr $text + } +} + +# This maps a name to its style: +# * bc objects in this package and all its sub-packages +# are to be compiled with the BC ABI. It is an error +# for sub-packages to also appear in the map. +# * bcheaders +# as bc, but generate header files and compile with CNI. +# * package +# objects in this package (and possibly sub-packages, +# if they do not appear in the map) will be compiled en masse +# from source into a single object, using the C++ ABI. +# * ordinary +# objects in this package (and possibly sub-packages +# if they do not appear in the map) will be compiled one at +# a time into separate .o files. +# * ignore +# objects in this package are not used. Note however that +# most ignored files are actually handled by listing them in +# 'standard.omit' +# * interpreter +# objects in this package (and possibly sub-packages, +# if they do not appear in the map) are only compiled if +# the interpreter is enabled. They are compiled as with the +# 'package' specifier. +# +# If a package does not appear in the map, the default is 'package'. +global package_map +set package_map(.) package + +# These are ignored in Classpath. +set package_map(gnu/test) ignore +set package_map(gnu/javax/swing/plaf/gtk) ignore +set package_map(gnu/gcj/tools/gc_analyze) ignore + +set package_map(gnu/java/awt/peer/swing) bc + +set package_map(gnu/xml/aelfred2) bc +set package_map(gnu/xml/dom) bc +set package_map(gnu/xml/libxmlj) bc +set package_map(gnu/xml/pipeline) bc +set package_map(gnu/xml/stream) bc +set package_map(gnu/xml/transform) bc +set package_map(gnu/xml/util) bc +set package_map(gnu/xml/validation) bc +set package_map(gnu/xml/xpath) bc +set package_map(javax/imageio) bc +set package_map(javax/xml) bc +set package_map(gnu/java/beans) bc +set package_map(gnu/java/awt/dnd/peer/gtk) bc +set package_map(gnu/java/util/prefs/gconf) bc +set package_map(gnu/java/awt/peer/gtk) bc +set package_map(gnu/java/awt/dnd/peer/gtk) bc +set package_map(gnu/java/awt/peer/qt) bc +set package_map(gnu/java/awt/peer/x) bc +set package_map(gnu/java/util/prefs/gconf) bc +set package_map(gnu/javax/sound/midi) bc +set package_map(gnu/javax/sound/sampled/gstreamer) ignore +set package_map(org/xml) bc +set package_map(org/w3c) bc +set package_map(org/relaxng) bc +set package_map(javax/rmi) bc +set package_map(org/omg/IOP) bc +set package_map(org/omg/PortableServer) bc +set package_map(org/omg/CosNaming) bc +set package_map(org/omg/CORBA_2_3) bc +set package_map(org/omg/Messaging) bc +set package_map(org/omg/stub) bc +set package_map(org/omg/CORBA) bc +set package_map(org/omg/PortableInterceptor) bc +set package_map(org/omg/DynamicAny) bc +set package_map(org/omg/SendingContext) bc +set package_map(org/omg/Dynamic) bc +set package_map(gnu/CORBA) bc +set package_map(gnu/javax/rmi) bc +set package_map(gnu/java/lang/management) bcheaders +set package_map(java/lang/management) bc +set package_map(gnu/classpath/management) bc +set package_map(gnu/javax/management) bc +set package_map(gnu/java/math) bc + +# parser/HTML_401F.class is really big, and there have been complaints +# about this package requiring too much memory to build. So, we +# compile it as separate objects. But, we're careful to compile the +# sub-packages as packages. +set package_map(gnu/javax/swing/text/html/parser) ordinary +set package_map(gnu/javax/swing/text/html/parser/models) package +set package_map(gnu/javax/swing/text/html/parser/support) package + +# More special cases. These end up in their own library. +# Note that if we BC-compile AWT we must update these as well. +set package_map(gnu/gcj/xlib) package +set package_map(gnu/awt/xlib) package + +# These packages should only be included if the interpreter is +# enabled. +set package_map(gnu/classpath/jdwp) interpreter +set package_map(gnu/classpath/jdwp/event) interpreter +set package_map(gnu/classpath/jdwp/event/filters) interpreter +set package_map(gnu/classpath/jdwp/exception) interpreter +set package_map(gnu/classpath/jdwp/id) interpreter +set package_map(gnu/classpath/jdwp/processor) interpreter +set package_map(gnu/classpath/jdwp/transport) interpreter +set package_map(gnu/classpath/jdwp/util) interpreter +set package_map(gnu/classpath/jdwp/value) interpreter +set package_map(gnu/gcj/jvmti) interpreter + +# Some BC ABI packages have classes which must not be compiled BC. +# This maps such packages to a grep expression for excluding such +# classes. +global exclusion_map +makearray exclusion_map +# set exclusion_map(java/awt) AWTPermission + +# This maps a package name to a list of corresponding .java file base +# names. The package name will either appear as a key in package_map, +# or it will be '.' for the default. +global name_map +makearray name_map + +# This maps a java file base name, like 'java/lang/', to +# the source directory in which it resides. We keep a layer of +# indirection here so that we can override sources in Classpath with +# our own sources. +global dir_map +makearray dir_map + +# An entry in this map means that all .properties files in the +# corresponding directory should be ignored. +global properties_map +makearray properties_map + +# is installed and is editable. +set properties_map(java/util/logging) _ + +# We want to be able to load xerces if it is on the class path. So, +# we have to avoid compiling in the XML-related service files. +set properties_map(META-INF/services/javax.xml.parsers.DocumentBuilderFactory) _ +set properties_map(META-INF/services/javax.xml.parsers.SAXParserFactory) _ +set properties_map(META-INF/services/javax.xml.parsers.TransformerFactory) _ +set properties_map(META-INF/services/org.relaxng.datatype.DatatypeLibraryFactory) _ +set properties_map(META-INF/services/org.w3c.dom.DOMImplementationSourceList) _ +set properties_map(META-INF/services/org.xml.sax.driver) _ +set properties_map(META-INF/services/ ignore +set properties_map(META-INF/services/javax.sound.sampled.spi.MixerProvider) ignore +set properties_map(META-INF/services/ ignore + +# List of all properties files. +set properties_files {} + +# List of all '@' files that we are going to compile. +set package_files {} + +# List of all '@' files that we are going to compile if the +# interpreter is enabled. +set interpreter_package_files {} + +# List of all header file variables. +set header_vars {} + +# List of all header file variables for interpreter packages. +set interpreter_header_vars {} + +# List of all BC object files. +set bc_objects {} + +# List of regexps for matching ignored files. +set ignore_rx_list {} + + +# Return true if a given file should be ignored. +# The argument is the path name including the package part. +proc ignore_file_p {file} { + global ignore_rx_list + foreach rx $ignore_rx_list { + if {[regexp -- $rx $file]} { + verbose "ignoring $file for $rx" + return 1 + } + } + return 0 +} + +# Read a '.omit' file and update the internal data structures. +proc read_omit_file {name} { + global ignore_rx_list + set fd [open $name r] + while {! [eof $fd]} { + set line [gets $fd] + + # Classpath's entries bogusly start with "../". + if {[string match ../* $line]} { + set line [string range $line 3 end] + } + + if {$line != ""} { + lappend ignore_rx_list $line + } + } + close $fd +} + +# Classify a single source file. +proc classify_source_file {basedir file} { + global package_map name_map dir_map + + if {[ignore_file_p $file]} { + return + } + + set seen [info exists dir_map($file)] + set dir_map($file) $basedir + set pkg $file + while {1} { + if {[info exists package_map($pkg)]} { + # If the entry is 'package', then set up a new entry for the + # file's package. + if {$package_map($pkg) == "package"} { + set pkg [file dirname $file] + set package_map($pkg) package + } + verbose "classify succeeded: $file -> $pkg" + if {! $seen} { + lappend name_map($pkg) $file + } + return + } + set pkg [file dirname $pkg] + } + error "can't happen" +} + +# Scan a directory and its subdirectories for .java source files or +# .properties files. Note that we keep basedir and subdir separate so +# we can properly update our global data structures. +proc scan_directory {basedir subdir} { + global dir_map properties_map properties_files + + set subdirs {} + set files {} + set here [pwd] + cd $basedir/$subdir + foreach file [lsort [glob -nocomplain *]] { + if {[string match *.java $file]} { + lappend files $subdir/$file + } elseif {[string match *.properties $file]} { + if {! [info exists properties_map($subdir)]} { + # We assume there aren't any overrides. + lappend properties_files $basedir/$subdir/$file + } + } elseif {[string match *.css $file]} { + # Special case for default.css needed by javax.swing.text.html. + lappend properties_files $basedir/$subdir/$file + } elseif {[file isdirectory $file]} { + lappend subdirs $subdir/$file + } elseif {$subdir == "META-INF/services"} { + # Service files are generally included as properties. + if {! [info exists properties_map($subdir/$file)]} { + lappend properties_files $basedir/$subdir/$file + } + } + } + cd $here + + # Recurse first, so that we don't create new packages too eagerly. + foreach dir $subdirs { + scan_directory $basedir $dir + } + + foreach file $files { + classify_source_file $basedir $file + } +} + +# Scan known packages beneath the base directory for .java source +# files. +proc scan_packages {basedir} { + foreach subdir {gnu java javax org sun com META-INF} { + if {[file exists $basedir/$subdir]} { + scan_directory $basedir $subdir + } + } +} + +# Emit a rule for a 'bc' package. +proc emit_bc_rule {package} { + global package_map exclusion_map bc_objects + + if {$package == "."} { + set pkgname ordinary + } else { + set pkgname $package + } + set varname [join [split $pkgname /] _]_source_files + set loname [join [split $pkgname /] -].lo + set tname [join [split $pkgname /] -].list + + puts "$loname: \$($varname)" + # Create a temporary list file and then compile it. This works + # around the libtool problem mentioned in PR 21058. classpath was + # built first, so the class files are to be found there. + set omit "" + if {[info exists exclusion_map($package)]} { + set omit "| grep -v $exclusion_map($package)" + } + puts "\t@find \$(srcdir)/classpath/lib/$package -name '*.class'${omit} > $tname" + puts -nonewline "\t\$(LTGCJCOMPILE) -fsource-filename=\$(here)/classpath/lib/classes " + if {$package_map($package) == "bc"} { + puts -nonewline "-fjni " + } + # Unless bc is disabled with --disable-libgcj-bc, $(LIBGCJ_BC_FLAGS) is: + # -findirect-dispatch -fno-indirect-classes + puts "\$(LIBGCJ_BC_FLAGS) -c -o $loname @$tname" + puts "\t@rm -f $tname" + puts "" + + # We skip these because they are built into their own libraries and + # are handled specially in + if {$loname != "gnu-java-awt-peer-qt.lo" && $loname != "gnu-java-awt-peer-x.lo"} { + lappend bc_objects $loname + } +} + +# Emit a rule for a 'package' package. +proc emit_package_rule_to_list {package package_files_list} { + global package_map exclusion_map $package_files_list + + if {$package == "."} { + set pkgname ordinary + } else { + set pkgname $package + } + set varname [join [split $pkgname /] _]_source_files + set base $pkgname + set lname $base.list + set dname $base.deps + + if {$pkgname == "java/lang"} { + # Object and Class are special cases due to an apparent compiler + # bug. Process is a special case because we don't build all + # concrete implementations of Process on all platforms. + set omit "| tr ' ' '\\012' | fgrep -v Object.class | fgrep -v Class.class | egrep -v '\(Ecos\|Posix\|Win32\)Process' " + } else { + set omit "" + } + + # A rule to make the phony file we are going to compile. + puts "$lname: \$($varname)" + puts "\t@\$(mkinstalldirs) \$(dir \$@)" + puts "\techo \$(srcdir)/classpath/lib/$package/*.class $omit> $lname" + puts "" + puts "-include $dname" + puts "" + puts "" + + if {$pkgname != "gnu/gcj/xlib" && $pkgname != "gnu/awt/xlib" + && $pkgname != "gnu/gcj/tools/gcj_dbtool"} { + lappend $package_files_list $lname + } +} + +proc emit_package_rule {package} { + global package_files + emit_package_rule_to_list $package package_files +} + +proc emit_interpreter_rule {package} { + global interpreter_package_files + emit_package_rule_to_list $package interpreter_package_files +} + +# Emit a rule to build a package full of 'ordinary' files, that is, +# one .o for each .java. +proc emit_ordinary_rule {package} { + global name_map package_files + + foreach file $name_map($package) { + # Strip off the '.java'. + set root [file rootname $file] + + # Look for all included .class files. Assumes that we don't have + # multiple top-level classes in a .java file. + set lname $root.list + set dname $root.deps + + puts "$lname: classpath/$file" + puts "\t@\$(mkinstalldirs) \$(dir \$@)" + puts "\techo \$(srcdir)/classpath/lib/${root}*.class> $lname" + puts "" + puts "-include $dname" + puts "" + puts "" + + lappend package_files $lname + } +} + +# Emit a package-like rule for a platform-specific Process +# implementation. +proc emit_process_package_rule {platform} { + set base "java/process-$platform" + set lname $base.list + set dname $base.deps + + puts "$lname: java/lang/${platform}" + puts "\t@\$(mkinstalldirs) \$(dir \$@)" + puts "\techo \$(srcdir)/classpath/lib/java/lang/${platform}Process*.class > $lname" + puts "" + puts "-include $dname" + puts "" + puts "" +} + +# Emit a source file variable for a package, and corresponding header +# file variable, if needed. +proc emit_source_var {package} { + global package_map name_map dir_map header_vars interpreter_header_vars + + if {$package == "."} { + set pkgname ordinary + } else { + set pkgname $package + } + set uname [join [split $pkgname /] _] + set varname ${uname}_source_files + puts -nonewline "$varname =" + + makearray dirs + foreach base [lsort $name_map($package)] { + # Terminate previous line. + puts " \\" + # Having files start with './' is ugly and confuses the automake + # "dirstamp" code; see automake PR 461. + set ndir $dir_map($base)/ + if {$ndir == "./"} { + set ndir "" + } + puts -nonewline "${ndir}${base}" + set dirs($dir_map($base)) 1 + } + puts "" + puts "" + + if {$package_map($package) != "bc"} { + # Ugly code to build up the appropriate patsubst. + set result "\$(patsubst,%.h,\$($varname))" + # We use -decreasing so that classpath/external will be stripped + # before classpath. + foreach dir [lsort -decreasing [array names dirs]] { + if {$dir != "."} { + set result "\$(patsubst $dir/%,%,$result)" + } + } + + if {$package == "." || $package == "java/lang"} { + # Ugly hack. + set result "\$(filter-out java/lang/Object.h java/lang/Class.h,$result)" + } + + puts "${uname}_header_files = $result" + puts "" + if {$pkgname != "gnu/gcj/xlib" && $pkgname != "gnu/awt/xlib"} { + if {$package_map($package) == "interpreter"} { + lappend interpreter_header_vars "${uname}_header_files" + } else { + lappend header_vars "${uname}_header_files" + } + } + } +} + +# Pretty-print a Makefile variable. +proc pp_var {name valueList {pre ""} {post ""}} { + puts "" + puts -nonewline "$name =" + foreach val $valueList { + puts " \\" + puts -nonewline " ${pre}${val}${post}" + } + puts "" +} + +global argv +if {[llength $argv] > 0 && [lindex $argv 0] == "-verbose"} { + set is_verbose 1 +} + +# Read the proper .omit files. +read_omit_file +read_omit_file classpath/lib/ + +# Scan classpath first. +scan_packages classpath +scan_packages classpath/external/sax +scan_packages classpath/external/w3c_dom +scan_packages classpath/external/relaxngDatatype +scan_packages classpath/external/jsr166 +# Resource files. +scan_packages classpath/resource +# Now scan our own files; this will correctly override decisions made +# when scanning classpath. +scan_packages . +# Files created by the build. +classify_source_file classpath gnu/java/locale/ +classify_source_file classpath gnu/java/security/ +classify_source_file classpath gnu/classpath/ + +puts "## This file was automatically generated by scripts/makemake.tcl" +puts "## Do not edit!" +puts "" + +foreach package [lsort [array names package_map]] { + if {$package_map($package) == "ignore"} { + continue + } + if {! [info exists name_map($package)]} { + continue + } + + emit_source_var $package + + if {$package_map($package) == "bc"} { + emit_bc_rule $package + } elseif {$package_map($package) == "bcheaders"} { + emit_bc_rule $package + } elseif {$package_map($package) == "ordinary"} { + emit_ordinary_rule $package + } elseif {$package_map($package) == "package"} { + emit_package_rule $package + } elseif {$package_map($package) == "interpreter"} { + emit_interpreter_rule $package + } else { + error "unrecognized type: $package_map($package)" + } +} + +emit_process_package_rule Ecos +emit_process_package_rule Win32 +emit_process_package_rule Posix + +puts "if INTERPRETER" +pp_var interpreter_packages_source_files $interpreter_package_files +pp_var interpreter_header_files $interpreter_header_vars "\$(" ")" +puts "" +puts "else" +puts "" +puts "interpreter_packages_source_files=" +puts "" +puts "interpreter_header_files=" +puts "" +puts "endif" + +lappend package_files {$(interpreter_packages_source_files)} +lappend header_vars interpreter_header_files + +pp_var all_packages_source_files $package_files +pp_var ordinary_header_files $header_vars "\$(" ")" +pp_var bc_objects $bc_objects +pp_var property_files $properties_files
makemake.tcl Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property

powered by: WebSVN 2.1.0

© copyright 1999-2024, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.