| 1 |
14 |
jlechner |
/* RuleBasedCollator.java -- Concrete Collator Class
|
| 2 |
|
|
Copyright (C) 1998, 1999, 2000, 2001, 2003, 2004, 2005 Free Software Foundation, Inc.
|
| 3 |
|
|
|
| 4 |
|
|
This file is part of GNU Classpath.
|
| 5 |
|
|
|
| 6 |
|
|
GNU Classpath is free software; you can redistribute it and/or modify
|
| 7 |
|
|
it under the terms of the GNU General Public License as published by
|
| 8 |
|
|
the Free Software Foundation; either version 2, or (at your option)
|
| 9 |
|
|
any later version.
|
| 10 |
|
|
|
| 11 |
|
|
GNU Classpath is distributed in the hope that it will be useful, but
|
| 12 |
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 13 |
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| 14 |
|
|
General Public License for more details.
|
| 15 |
|
|
|
| 16 |
|
|
You should have received a copy of the GNU General Public License
|
| 17 |
|
|
along with GNU Classpath; see the file COPYING. If not, write to the
|
| 18 |
|
|
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
| 19 |
|
|
02110-1301 USA.
|
| 20 |
|
|
|
| 21 |
|
|
Linking this library statically or dynamically with other modules is
|
| 22 |
|
|
making a combined work based on this library. Thus, the terms and
|
| 23 |
|
|
conditions of the GNU General Public License cover the whole
|
| 24 |
|
|
combination.
|
| 25 |
|
|
|
| 26 |
|
|
As a special exception, the copyright holders of this library give you
|
| 27 |
|
|
permission to link this library with independent modules to produce an
|
| 28 |
|
|
executable, regardless of the license terms of these independent
|
| 29 |
|
|
modules, and to copy and distribute the resulting executable under
|
| 30 |
|
|
terms of your choice, provided that you also meet, for each linked
|
| 31 |
|
|
independent module, the terms and conditions of the license of that
|
| 32 |
|
|
module. An independent module is a module which is not derived from
|
| 33 |
|
|
or based on this library. If you modify this library, you may extend
|
| 34 |
|
|
this exception to your version of the library, but you are not
|
| 35 |
|
|
obligated to do so. If you do not wish to do so, delete this
|
| 36 |
|
|
exception statement from your version. */
|
| 37 |
|
|
|
| 38 |
|
|
|
| 39 |
|
|
package java.text;
|
| 40 |
|
|
|
| 41 |
|
|
import java.util.ArrayList;
|
| 42 |
|
|
import java.util.HashMap;
|
| 43 |
|
|
|
| 44 |
|
|
/* Written using "Java Class Libraries", 2nd edition, plus online
|
| 45 |
|
|
* API docs for JDK 1.2 from http://www.javasoft.com.
|
| 46 |
|
|
* Status: Believed complete and correct
|
| 47 |
|
|
*/
|
| 48 |
|
|
|
| 49 |
|
|
/**
|
| 50 |
|
|
* This class is a concrete subclass of <code>Collator</code> suitable
|
| 51 |
|
|
* for string collation in a wide variety of languages. An instance of
|
| 52 |
|
|
* this class is normally returned by the <code>getInstance</code> method
|
| 53 |
|
|
* of <code>Collator</code> with rules predefined for the requested
|
| 54 |
|
|
* locale. However, an instance of this class can be created manually
|
| 55 |
|
|
* with any desired rules.
|
| 56 |
|
|
* <p>
|
| 57 |
|
|
* Rules take the form of a <code>String</code> with the following syntax
|
| 58 |
|
|
* <ul>
|
| 59 |
|
|
* <li> Modifier: '@'</li>
|
| 60 |
|
|
* <li> Relation: '<' | ';' | ',' | '=' : <text></li>
|
| 61 |
|
|
* <li> Reset: '&' : <text></li>
|
| 62 |
|
|
* </ul>
|
| 63 |
|
|
* The modifier character indicates that accents sort backward as is the
|
| 64 |
|
|
* case with French. The modifier applies to all rules <b>after</b>
|
| 65 |
|
|
* the modifier but before the next primary sequence. If placed at the end
|
| 66 |
|
|
* of the sequence if applies to all unknown accented character.
|
| 67 |
|
|
* The relational operators specify how the text
|
| 68 |
|
|
* argument relates to the previous term. The relation characters have
|
| 69 |
|
|
* the following meanings:
|
| 70 |
|
|
* <ul>
|
| 71 |
|
|
* <li>'<' - The text argument is greater than the prior term at the primary
|
| 72 |
|
|
* difference level.</li>
|
| 73 |
|
|
* <li>';' - The text argument is greater than the prior term at the secondary
|
| 74 |
|
|
* difference level.</li>
|
| 75 |
|
|
* <li>',' - The text argument is greater than the prior term at the tertiary
|
| 76 |
|
|
* difference level.</li>
|
| 77 |
|
|
* <li>'=' - The text argument is equal to the prior term</li>
|
| 78 |
|
|
* </ul>
|
| 79 |
|
|
* <p>
|
| 80 |
|
|
* As for the text argument itself, this is any sequence of Unicode
|
| 81 |
|
|
* characters not in the following ranges: 0x0009-0x000D, 0x0020-0x002F,
|
| 82 |
|
|
* 0x003A-0x0040, 0x005B-0x0060, and 0x007B-0x007E. If these characters are
|
| 83 |
|
|
* desired, they must be enclosed in single quotes. If any whitespace is
|
| 84 |
|
|
* encountered, it is ignored. (For example, "a b" is equal to "ab").
|
| 85 |
|
|
* <p>
|
| 86 |
|
|
* The reset operation inserts the following rule at the point where the
|
| 87 |
|
|
* text argument to it exists in the previously declared rule string. This
|
| 88 |
|
|
* makes it easy to add new rules to an existing string by simply including
|
| 89 |
|
|
* them in a reset sequence at the end. Note that the text argument, or
|
| 90 |
|
|
* at least the first character of it, must be present somewhere in the
|
| 91 |
|
|
* previously declared rules in order to be inserted properly. If this
|
| 92 |
|
|
* is not satisfied, a <code>ParseException</code> will be thrown.
|
| 93 |
|
|
* <p>
|
| 94 |
|
|
* This system of configuring <code>RuleBasedCollator</code> is needlessly
|
| 95 |
|
|
* complex and the people at Taligent who developed it (along with the folks
|
| 96 |
|
|
* at Sun who accepted it into the Java standard library) deserve a slow
|
| 97 |
|
|
* and agonizing death.
|
| 98 |
|
|
* <p>
|
| 99 |
|
|
* Here are a couple of example of rule strings:
|
| 100 |
|
|
* <p>
|
| 101 |
|
|
* "< a < b < c" - This string says that a is greater than b which is
|
| 102 |
|
|
* greater than c, with all differences being primary differences.
|
| 103 |
|
|
* <p>
|
| 104 |
|
|
* "< a,A < b,B < c,C" - This string says that 'A' is greater than 'a' with
|
| 105 |
|
|
* a tertiary strength comparison. Both 'b' and 'B' are greater than 'a' and
|
| 106 |
|
|
* 'A' during a primary strength comparison. But 'B' is greater than 'b'
|
| 107 |
|
|
* under a tertiary strength comparison.
|
| 108 |
|
|
* <p>
|
| 109 |
|
|
* "< a < c & a < b " - This sequence is identical in function to the
|
| 110 |
|
|
* "< a < b < c" rule string above. The '&' reset symbol indicates that
|
| 111 |
|
|
* the rule "< b" is to be inserted after the text argument "a" in the
|
| 112 |
|
|
* previous rule string segment.
|
| 113 |
|
|
* <p>
|
| 114 |
|
|
* "< a < b & y < z" - This is an error. The character 'y' does not appear
|
| 115 |
|
|
* anywhere in the previous rule string segment so the rule following the
|
| 116 |
|
|
* reset rule cannot be inserted.
|
| 117 |
|
|
* <p>
|
| 118 |
|
|
* "< a & A @ < e & E < f& F" - This sequence is equivalent to the following
|
| 119 |
|
|
* "< a & A < E & e < f & F".
|
| 120 |
|
|
* <p>
|
| 121 |
|
|
* For a description of the various comparison strength types, see the
|
| 122 |
|
|
* documentation for the <code>Collator</code> class.
|
| 123 |
|
|
* <p>
|
| 124 |
|
|
* As an additional complication to this already overly complex rule scheme,
|
| 125 |
|
|
* if any characters precede the first rule, these characters are considered
|
| 126 |
|
|
* ignorable. They will be treated as if they did not exist during
|
| 127 |
|
|
* comparisons. For example, "- < a < b ..." would make '-' an ignorable
|
| 128 |
|
|
* character such that the strings "high-tech" and "hightech" would
|
| 129 |
|
|
* be considered identical.
|
| 130 |
|
|
* <p>
|
| 131 |
|
|
* A <code>ParseException</code> will be thrown for any of the following
|
| 132 |
|
|
* conditions:
|
| 133 |
|
|
* <ul>
|
| 134 |
|
|
* <li>Unquoted punctuation characters in a text argument.</li>
|
| 135 |
|
|
* <li>A relational or reset operator not followed by a text argument</li>
|
| 136 |
|
|
* <li>A reset operator where the text argument is not present in
|
| 137 |
|
|
* the previous rule string section.</li>
|
| 138 |
|
|
* </ul>
|
| 139 |
|
|
*
|
| 140 |
|
|
* @author Aaron M. Renn (arenn@urbanophile.com)
|
| 141 |
|
|
* @author Tom Tromey (tromey@cygnus.com)
|
| 142 |
|
|
* @author Guilhem Lavaux (guilhem@kaffe.org)
|
| 143 |
|
|
*/
|
| 144 |
|
|
public class RuleBasedCollator extends Collator
|
| 145 |
|
|
{
|
| 146 |
|
|
/**
|
| 147 |
|
|
* This class describes what rank has a character (or a sequence of characters)
|
| 148 |
|
|
* in the lexicographic order. Each element in a rule has a collation element.
|
| 149 |
|
|
*/
|
| 150 |
|
|
static final class CollationElement
|
| 151 |
|
|
{
|
| 152 |
|
|
String key;
|
| 153 |
|
|
int primary;
|
| 154 |
|
|
short secondary;
|
| 155 |
|
|
short tertiary;
|
| 156 |
|
|
short equality;
|
| 157 |
|
|
boolean ignore;
|
| 158 |
|
|
String expansion;
|
| 159 |
|
|
|
| 160 |
|
|
CollationElement(String key, int primary, short secondary, short tertiary,
|
| 161 |
|
|
short equality, String expansion, boolean ignore)
|
| 162 |
|
|
{
|
| 163 |
|
|
this.key = key;
|
| 164 |
|
|
this.primary = primary;
|
| 165 |
|
|
this.secondary = secondary;
|
| 166 |
|
|
this.tertiary = tertiary;
|
| 167 |
|
|
this.equality = equality;
|
| 168 |
|
|
this.ignore = ignore;
|
| 169 |
|
|
this.expansion = expansion;
|
| 170 |
|
|
}
|
| 171 |
|
|
|
| 172 |
|
|
int getValue()
|
| 173 |
|
|
{
|
| 174 |
|
|
return (primary << 16) + (secondary << 8) + tertiary;
|
| 175 |
|
|
}
|
| 176 |
|
|
}
|
| 177 |
|
|
|
| 178 |
|
|
/**
|
| 179 |
|
|
* Basic collation instruction (internal format) to build the series of
|
| 180 |
|
|
* collation elements. It contains an instruction which specifies the new
|
| 181 |
|
|
* state of the generator. The sequence of instruction should not contain
|
| 182 |
|
|
* RESET (it is used by
|
| 183 |
|
|
* {@link #mergeRules(int,java.lang.String,java.util.ArrayList,java.util.ArrayList)})
|
| 184 |
|
|
* as a temporary state while merging two sets of instructions.
|
| 185 |
|
|
*/
|
| 186 |
|
|
static final class CollationSorter
|
| 187 |
|
|
{
|
| 188 |
|
|
static final int GREATERP = 0;
|
| 189 |
|
|
static final int GREATERS = 1;
|
| 190 |
|
|
static final int GREATERT = 2;
|
| 191 |
|
|
static final int EQUAL = 3;
|
| 192 |
|
|
static final int RESET = 4;
|
| 193 |
|
|
static final int INVERSE_SECONDARY = 5;
|
| 194 |
|
|
|
| 195 |
|
|
int comparisonType;
|
| 196 |
|
|
String textElement;
|
| 197 |
|
|
int hashText;
|
| 198 |
|
|
int offset;
|
| 199 |
|
|
boolean ignore;
|
| 200 |
|
|
|
| 201 |
|
|
String expansionOrdering;
|
| 202 |
|
|
}
|
| 203 |
|
|
|
| 204 |
|
|
/**
|
| 205 |
|
|
* This the the original rule string.
|
| 206 |
|
|
*/
|
| 207 |
|
|
private String rules;
|
| 208 |
|
|
|
| 209 |
|
|
/**
|
| 210 |
|
|
* This is the table of collation element values
|
| 211 |
|
|
*/
|
| 212 |
|
|
private Object[] ce_table;
|
| 213 |
|
|
|
| 214 |
|
|
/**
|
| 215 |
|
|
* Quick-prefix finder.
|
| 216 |
|
|
*/
|
| 217 |
|
|
HashMap prefix_tree;
|
| 218 |
|
|
|
| 219 |
|
|
/**
|
| 220 |
|
|
* This is the value of the last sequence entered into
|
| 221 |
|
|
* <code>ce_table</code>. It is used to compute the
|
| 222 |
|
|
* ordering value of unspecified character.
|
| 223 |
|
|
*/
|
| 224 |
|
|
private int last_primary_value;
|
| 225 |
|
|
|
| 226 |
|
|
/**
|
| 227 |
|
|
* This is the value of the last secondary sequence of the
|
| 228 |
|
|
* primary 0, entered into
|
| 229 |
|
|
* <code>ce_table</code>. It is used to compute the
|
| 230 |
|
|
* ordering value of an unspecified accented character.
|
| 231 |
|
|
*/
|
| 232 |
|
|
private int last_tertiary_value;
|
| 233 |
|
|
|
| 234 |
|
|
/**
|
| 235 |
|
|
* This variable is true if accents need to be sorted
|
| 236 |
|
|
* in the other direction.
|
| 237 |
|
|
*/
|
| 238 |
|
|
private boolean inverseAccentComparison;
|
| 239 |
|
|
|
| 240 |
|
|
/**
|
| 241 |
|
|
* This collation element is special to unknown sequence.
|
| 242 |
|
|
* The JDK uses it to mark and sort the characters which has
|
| 243 |
|
|
* no collation rules.
|
| 244 |
|
|
*/
|
| 245 |
|
|
static final CollationElement SPECIAL_UNKNOWN_SEQ =
|
| 246 |
|
|
new CollationElement("", (short) 32767, (short) 0, (short) 0,
|
| 247 |
|
|
(short) 0, null, false);
|
| 248 |
|
|
|
| 249 |
|
|
/**
|
| 250 |
|
|
* This method initializes a new instance of <code>RuleBasedCollator</code>
|
| 251 |
|
|
* with the specified collation rules. Note that an application normally
|
| 252 |
|
|
* obtains an instance of <code>RuleBasedCollator</code> by calling the
|
| 253 |
|
|
* <code>getInstance</code> method of <code>Collator</code>. That method
|
| 254 |
|
|
* automatically loads the proper set of rules for the desired locale.
|
| 255 |
|
|
*
|
| 256 |
|
|
* @param rules The collation rule string.
|
| 257 |
|
|
*
|
| 258 |
|
|
* @exception ParseException If the rule string contains syntax errors.
|
| 259 |
|
|
*/
|
| 260 |
|
|
public RuleBasedCollator(String rules) throws ParseException
|
| 261 |
|
|
{
|
| 262 |
|
|
if (rules.equals(""))
|
| 263 |
|
|
throw new ParseException("empty rule set", 0);
|
| 264 |
|
|
|
| 265 |
|
|
this.rules = rules;
|
| 266 |
|
|
|
| 267 |
|
|
buildCollationVector(parseString(rules));
|
| 268 |
|
|
buildPrefixAccess();
|
| 269 |
|
|
}
|
| 270 |
|
|
|
| 271 |
|
|
/**
|
| 272 |
|
|
* This method returns the number of common characters at the beginning
|
| 273 |
|
|
* of the string of the two parameters.
|
| 274 |
|
|
*
|
| 275 |
|
|
* @param prefix A string considered as a prefix to test against
|
| 276 |
|
|
* the other string.
|
| 277 |
|
|
* @param s A string to test the prefix against.
|
| 278 |
|
|
* @return The number of common characters.
|
| 279 |
|
|
*/
|
| 280 |
|
|
static int findPrefixLength(String prefix, String s)
|
| 281 |
|
|
{
|
| 282 |
|
|
int index;
|
| 283 |
|
|
int len = prefix.length();
|
| 284 |
|
|
|
| 285 |
|
|
for (index = 0; index < len && index < s.length(); ++index)
|
| 286 |
|
|
{
|
| 287 |
|
|
if (prefix.charAt(index) != s.charAt(index))
|
| 288 |
|
|
return index;
|
| 289 |
|
|
}
|
| 290 |
|
|
|
| 291 |
|
|
|
| 292 |
|
|
return index;
|
| 293 |
|
|
}
|
| 294 |
|
|
|
| 295 |
|
|
/**
|
| 296 |
|
|
* Here we are merging two sets of sorting instructions: 'patch' into 'main'. This methods
|
| 297 |
|
|
* checks whether it is possible to find an anchor point for the rules to be merged and
|
| 298 |
|
|
* then insert them at that precise point.
|
| 299 |
|
|
*
|
| 300 |
|
|
* @param offset Offset in the string containing rules of the beginning of the rules
|
| 301 |
|
|
* being merged in.
|
| 302 |
|
|
* @param starter Text of the rules being merged.
|
| 303 |
|
|
* @param main Repository of all already parsed rules.
|
| 304 |
|
|
* @param patch Rules to be merged into the repository.
|
| 305 |
|
|
* @throws ParseException if it is impossible to find an anchor point for the new rules.
|
| 306 |
|
|
*/
|
| 307 |
|
|
private void mergeRules(int offset, String starter, ArrayList main, ArrayList patch)
|
| 308 |
|
|
throws ParseException
|
| 309 |
|
|
{
|
| 310 |
|
|
int insertion_point = -1;
|
| 311 |
|
|
int max_length = 0;
|
| 312 |
|
|
|
| 313 |
|
|
/* We must check that no rules conflict with another already present. If it
|
| 314 |
|
|
* is the case delete the old rule.
|
| 315 |
|
|
*/
|
| 316 |
|
|
|
| 317 |
|
|
/* For the moment good old O(N^2) algorithm.
|
| 318 |
|
|
*/
|
| 319 |
|
|
for (int i = 0; i < patch.size(); i++)
|
| 320 |
|
|
{
|
| 321 |
|
|
int j = 0;
|
| 322 |
|
|
|
| 323 |
|
|
while (j < main.size())
|
| 324 |
|
|
{
|
| 325 |
|
|
CollationSorter rule1 = (CollationSorter) patch.get(i);
|
| 326 |
|
|
CollationSorter rule2 = (CollationSorter) main.get(j);
|
| 327 |
|
|
|
| 328 |
|
|
if (rule1.textElement.equals(rule2.textElement))
|
| 329 |
|
|
main.remove(j);
|
| 330 |
|
|
else
|
| 331 |
|
|
j++;
|
| 332 |
|
|
}
|
| 333 |
|
|
}
|
| 334 |
|
|
|
| 335 |
|
|
// Find the insertion point... O(N)
|
| 336 |
|
|
for (int i = 0; i < main.size(); i++)
|
| 337 |
|
|
{
|
| 338 |
|
|
CollationSorter sorter = (CollationSorter) main.get(i);
|
| 339 |
|
|
int length = findPrefixLength(starter, sorter.textElement);
|
| 340 |
|
|
|
| 341 |
|
|
if (length > max_length)
|
| 342 |
|
|
{
|
| 343 |
|
|
max_length = length;
|
| 344 |
|
|
insertion_point = i+1;
|
| 345 |
|
|
}
|
| 346 |
|
|
}
|
| 347 |
|
|
|
| 348 |
|
|
if (insertion_point < 0)
|
| 349 |
|
|
throw new ParseException("no insertion point found for " + starter, offset);
|
| 350 |
|
|
|
| 351 |
|
|
if (max_length < starter.length())
|
| 352 |
|
|
{
|
| 353 |
|
|
/*
|
| 354 |
|
|
* We need to expand the first entry. It must be sorted
|
| 355 |
|
|
* like if it was the reference key itself (like the spec
|
| 356 |
|
|
* said. So the first entry is special: the element is
|
| 357 |
|
|
* replaced by the specified text element for the sorting.
|
| 358 |
|
|
* This text replace the old one for comparisons. However
|
| 359 |
|
|
* to preserve the behaviour we replace the first key (corresponding
|
| 360 |
|
|
* to the found prefix) by a new code rightly ordered in the
|
| 361 |
|
|
* sequence. The rest of the subsequence must be appended
|
| 362 |
|
|
* to the end of the sequence.
|
| 363 |
|
|
*/
|
| 364 |
|
|
CollationSorter sorter = (CollationSorter) patch.get(0);
|
| 365 |
|
|
CollationSorter expansionPrefix =
|
| 366 |
|
|
(CollationSorter) main.get(insertion_point-1);
|
| 367 |
|
|
|
| 368 |
|
|
sorter.expansionOrdering = starter.substring(max_length); // Skip the first good prefix element
|
| 369 |
|
|
|
| 370 |
|
|
main.add(insertion_point, sorter);
|
| 371 |
|
|
|
| 372 |
|
|
/*
|
| 373 |
|
|
* This is a new set of rules. Append to the list.
|
| 374 |
|
|
*/
|
| 375 |
|
|
patch.remove(0);
|
| 376 |
|
|
insertion_point++;
|
| 377 |
|
|
}
|
| 378 |
|
|
|
| 379 |
|
|
// Now insert all elements of patch at the insertion point.
|
| 380 |
|
|
for (int i = 0; i < patch.size(); i++)
|
| 381 |
|
|
main.add(i+insertion_point, patch.get(i));
|
| 382 |
|
|
}
|
| 383 |
|
|
|
| 384 |
|
|
/**
|
| 385 |
|
|
* This method parses a string and build a set of sorting instructions. The parsing
|
| 386 |
|
|
* may only be partial on the case the rules are to be merged sometime later.
|
| 387 |
|
|
*
|
| 388 |
|
|
* @param stop_on_reset If this parameter is true then the parser stops when it
|
| 389 |
|
|
* encounters a reset instruction. In the other case, it tries to parse the subrules
|
| 390 |
|
|
* and merged it in the same repository.
|
| 391 |
|
|
* @param v Output vector for the set of instructions.
|
| 392 |
|
|
* @param base_offset Offset in the string to begin parsing.
|
| 393 |
|
|
* @param rules Rules to be parsed.
|
| 394 |
|
|
* @return -1 if the parser reached the end of the string, an integer representing the
|
| 395 |
|
|
* offset in the string at which it stopped parsing.
|
| 396 |
|
|
* @throws ParseException if something turned wrong during the parsing. To get details
|
| 397 |
|
|
* decode the message.
|
| 398 |
|
|
*/
|
| 399 |
|
|
private int subParseString(boolean stop_on_reset, ArrayList v,
|
| 400 |
|
|
int base_offset, String rules)
|
| 401 |
|
|
throws ParseException
|
| 402 |
|
|
{
|
| 403 |
|
|
boolean ignoreChars = (base_offset == 0);
|
| 404 |
|
|
int operator = -1;
|
| 405 |
|
|
StringBuffer sb = new StringBuffer();
|
| 406 |
|
|
boolean doubleQuote = false;
|
| 407 |
|
|
boolean eatingChars = false;
|
| 408 |
|
|
boolean nextIsModifier = false;
|
| 409 |
|
|
boolean isModifier = false;
|
| 410 |
|
|
int i;
|
| 411 |
|
|
|
| 412 |
|
|
main_parse_loop:
|
| 413 |
|
|
for (i = 0; i < rules.length(); i++)
|
| 414 |
|
|
{
|
| 415 |
|
|
char c = rules.charAt(i);
|
| 416 |
|
|
int type = -1;
|
| 417 |
|
|
|
| 418 |
|
|
if (!eatingChars &&
|
| 419 |
|
|
((c >= 0x09 && c <= 0x0D) || (c == 0x20)))
|
| 420 |
|
|
continue;
|
| 421 |
|
|
|
| 422 |
|
|
isModifier = nextIsModifier;
|
| 423 |
|
|
nextIsModifier = false;
|
| 424 |
|
|
|
| 425 |
|
|
if (eatingChars && c != '\'')
|
| 426 |
|
|
{
|
| 427 |
|
|
doubleQuote = false;
|
| 428 |
|
|
sb.append(c);
|
| 429 |
|
|
continue;
|
| 430 |
|
|
}
|
| 431 |
|
|
if (doubleQuote && eatingChars)
|
| 432 |
|
|
{
|
| 433 |
|
|
sb.append(c);
|
| 434 |
|
|
doubleQuote = false;
|
| 435 |
|
|
continue;
|
| 436 |
|
|
}
|
| 437 |
|
|
|
| 438 |
|
|
switch (c)
|
| 439 |
|
|
{
|
| 440 |
|
|
case '!':
|
| 441 |
|
|
throw new ParseException
|
| 442 |
|
|
("Modifier '!' is not yet supported by Classpath", i + base_offset);
|
| 443 |
|
|
case '<':
|
| 444 |
|
|
type = CollationSorter.GREATERP;
|
| 445 |
|
|
break;
|
| 446 |
|
|
case ';':
|
| 447 |
|
|
type = CollationSorter.GREATERS;
|
| 448 |
|
|
break;
|
| 449 |
|
|
case ',':
|
| 450 |
|
|
type = CollationSorter.GREATERT;
|
| 451 |
|
|
break;
|
| 452 |
|
|
case '=':
|
| 453 |
|
|
type = CollationSorter.EQUAL;
|
| 454 |
|
|
break;
|
| 455 |
|
|
case '\'':
|
| 456 |
|
|
eatingChars = !eatingChars;
|
| 457 |
|
|
doubleQuote = true;
|
| 458 |
|
|
break;
|
| 459 |
|
|
case '@':
|
| 460 |
|
|
if (ignoreChars)
|
| 461 |
|
|
throw new ParseException
|
| 462 |
|
|
("comparison list has not yet been started. You may only use"
|
| 463 |
|
|
+ "(<,;=&)", i + base_offset);
|
| 464 |
|
|
// Inverse the order of secondaries from now on.
|
| 465 |
|
|
nextIsModifier = true;
|
| 466 |
|
|
type = CollationSorter.INVERSE_SECONDARY;
|
| 467 |
|
|
break;
|
| 468 |
|
|
case '&':
|
| 469 |
|
|
type = CollationSorter.RESET;
|
| 470 |
|
|
if (stop_on_reset)
|
| 471 |
|
|
break main_parse_loop;
|
| 472 |
|
|
break;
|
| 473 |
|
|
default:
|
| 474 |
|
|
if (operator < 0)
|
| 475 |
|
|
throw new ParseException
|
| 476 |
|
|
("operator missing at " + (i + base_offset), i + base_offset);
|
| 477 |
|
|
if (! eatingChars
|
| 478 |
|
|
&& ((c >= 0x21 && c <= 0x2F)
|
| 479 |
|
|
|| (c >= 0x3A && c <= 0x40)
|
| 480 |
|
|
|| (c >= 0x5B && c <= 0x60)
|
| 481 |
|
|
|| (c >= 0x7B && c <= 0x7E)))
|
| 482 |
|
|
throw new ParseException
|
| 483 |
|
|
("unquoted punctuation character '" + c + "'", i + base_offset);
|
| 484 |
|
|
|
| 485 |
|
|
//type = ignoreChars ? CollationSorter.IGNORE : -1;
|
| 486 |
|
|
sb.append(c);
|
| 487 |
|
|
break;
|
| 488 |
|
|
}
|
| 489 |
|
|
|
| 490 |
|
|
if (type < 0)
|
| 491 |
|
|
continue;
|
| 492 |
|
|
|
| 493 |
|
|
if (operator < 0)
|
| 494 |
|
|
{
|
| 495 |
|
|
operator = type;
|
| 496 |
|
|
continue;
|
| 497 |
|
|
}
|
| 498 |
|
|
|
| 499 |
|
|
if (sb.length() == 0 && !isModifier)
|
| 500 |
|
|
throw new ParseException
|
| 501 |
|
|
("text element empty at " + (i+base_offset), i+base_offset);
|
| 502 |
|
|
|
| 503 |
|
|
if (operator == CollationSorter.RESET)
|
| 504 |
|
|
{
|
| 505 |
|
|
/* Reposition in the sorting list at the position
|
| 506 |
|
|
* indicated by the text element.
|
| 507 |
|
|
*/
|
| 508 |
|
|
String subrules = rules.substring(i);
|
| 509 |
|
|
ArrayList sorted_rules = new ArrayList();
|
| 510 |
|
|
int idx;
|
| 511 |
|
|
|
| 512 |
|
|
// Parse the subrules but do not iterate through all
|
| 513 |
|
|
// sublist. This is the privilege of the first call.
|
| 514 |
|
|
idx = subParseString(true, sorted_rules, base_offset+i, subrules);
|
| 515 |
|
|
|
| 516 |
|
|
// Merge new parsed rules into the list.
|
| 517 |
|
|
mergeRules(base_offset+i, sb.toString(), v, sorted_rules);
|
| 518 |
|
|
sb.setLength(0);
|
| 519 |
|
|
|
| 520 |
|
|
// Reset state to none.
|
| 521 |
|
|
operator = -1;
|
| 522 |
|
|
type = -1;
|
| 523 |
|
|
// We have found a new subrule at 'idx' but it has not been parsed.
|
| 524 |
|
|
if (idx >= 0)
|
| 525 |
|
|
{
|
| 526 |
|
|
i += idx-1;
|
| 527 |
|
|
continue main_parse_loop;
|
| 528 |
|
|
}
|
| 529 |
|
|
else
|
| 530 |
|
|
// No more rules.
|
| 531 |
|
|
break main_parse_loop;
|
| 532 |
|
|
}
|
| 533 |
|
|
|
| 534 |
|
|
CollationSorter sorter = new CollationSorter();
|
| 535 |
|
|
|
| 536 |
|
|
if (operator == CollationSorter.GREATERP)
|
| 537 |
|
|
ignoreChars = false;
|
| 538 |
|
|
|
| 539 |
|
|
sorter.comparisonType = operator;
|
| 540 |
|
|
sorter.textElement = sb.toString();
|
| 541 |
|
|
sorter.hashText = sorter.textElement.hashCode();
|
| 542 |
|
|
sorter.offset = base_offset+rules.length();
|
| 543 |
|
|
sorter.ignore = ignoreChars;
|
| 544 |
|
|
sb.setLength(0);
|
| 545 |
|
|
|
| 546 |
|
|
v.add(sorter);
|
| 547 |
|
|
operator = type;
|
| 548 |
|
|
}
|
| 549 |
|
|
|
| 550 |
|
|
if (operator >= 0)
|
| 551 |
|
|
{
|
| 552 |
|
|
CollationSorter sorter = new CollationSorter();
|
| 553 |
|
|
int pos = rules.length() + base_offset;
|
| 554 |
|
|
|
| 555 |
|
|
if ((sb.length() != 0 && nextIsModifier)
|
| 556 |
|
|
|| (sb.length() == 0 && !nextIsModifier && !eatingChars))
|
| 557 |
|
|
throw new ParseException("text element empty at " + pos, pos);
|
| 558 |
|
|
|
| 559 |
|
|
if (operator == CollationSorter.GREATERP)
|
| 560 |
|
|
ignoreChars = false;
|
| 561 |
|
|
|
| 562 |
|
|
sorter.comparisonType = operator;
|
| 563 |
|
|
sorter.textElement = sb.toString();
|
| 564 |
|
|
sorter.hashText = sorter.textElement.hashCode();
|
| 565 |
|
|
sorter.offset = base_offset+pos;
|
| 566 |
|
|
sorter.ignore = ignoreChars;
|
| 567 |
|
|
v.add(sorter);
|
| 568 |
|
|
}
|
| 569 |
|
|
|
| 570 |
|
|
if (i == rules.length())
|
| 571 |
|
|
return -1;
|
| 572 |
|
|
else
|
| 573 |
|
|
return i;
|
| 574 |
|
|
}
|
| 575 |
|
|
|
| 576 |
|
|
/**
|
| 577 |
|
|
* This method creates a copy of this object.
|
| 578 |
|
|
*
|
| 579 |
|
|
* @return A copy of this object.
|
| 580 |
|
|
*/
|
| 581 |
|
|
public Object clone()
|
| 582 |
|
|
{
|
| 583 |
|
|
return super.clone();
|
| 584 |
|
|
}
|
| 585 |
|
|
|
| 586 |
|
|
/**
|
| 587 |
|
|
* This method completely parses a string 'rules' containing sorting rules.
|
| 588 |
|
|
*
|
| 589 |
|
|
* @param rules String containing the rules to be parsed.
|
| 590 |
|
|
* @return A set of sorting instructions stored in a Vector.
|
| 591 |
|
|
* @throws ParseException if something turned wrong during the parsing. To get details
|
| 592 |
|
|
* decode the message.
|
| 593 |
|
|
*/
|
| 594 |
|
|
private ArrayList parseString(String rules)
|
| 595 |
|
|
throws ParseException
|
| 596 |
|
|
{
|
| 597 |
|
|
ArrayList v = new ArrayList();
|
| 598 |
|
|
|
| 599 |
|
|
// result of the first subParseString is not absolute (may be -1 or a
|
| 600 |
|
|
// positive integer). But we do not care.
|
| 601 |
|
|
subParseString(false, v, 0, rules);
|
| 602 |
|
|
|
| 603 |
|
|
return v;
|
| 604 |
|
|
}
|
| 605 |
|
|
|
| 606 |
|
|
/**
|
| 607 |
|
|
* This method uses the sorting instructions built by {@link #parseString}
|
| 608 |
|
|
* to build collation elements which can be directly used to sort strings.
|
| 609 |
|
|
*
|
| 610 |
|
|
* @param parsedElements Parsed instructions stored in a ArrayList.
|
| 611 |
|
|
* @throws ParseException if the order of the instructions are not valid.
|
| 612 |
|
|
*/
|
| 613 |
|
|
private void buildCollationVector(ArrayList parsedElements)
|
| 614 |
|
|
throws ParseException
|
| 615 |
|
|
{
|
| 616 |
|
|
int primary_seq = 0;
|
| 617 |
|
|
int last_tertiary_seq = 0;
|
| 618 |
|
|
short secondary_seq = 0;
|
| 619 |
|
|
short tertiary_seq = 0;
|
| 620 |
|
|
short equality_seq = 0;
|
| 621 |
|
|
boolean inverseComparisons = false;
|
| 622 |
|
|
final boolean DECREASING = false;
|
| 623 |
|
|
final boolean INCREASING = true;
|
| 624 |
|
|
boolean secondaryType = INCREASING;
|
| 625 |
|
|
ArrayList v = new ArrayList();
|
| 626 |
|
|
|
| 627 |
|
|
// elts is completely sorted.
|
| 628 |
|
|
element_loop:
|
| 629 |
|
|
for (int i = 0; i < parsedElements.size(); i++)
|
| 630 |
|
|
{
|
| 631 |
|
|
CollationSorter elt = (CollationSorter) parsedElements.get(i);
|
| 632 |
|
|
boolean ignoreChar = false;
|
| 633 |
|
|
|
| 634 |
|
|
switch (elt.comparisonType)
|
| 635 |
|
|
{
|
| 636 |
|
|
case CollationSorter.GREATERP:
|
| 637 |
|
|
primary_seq++;
|
| 638 |
|
|
if (inverseComparisons)
|
| 639 |
|
|
{
|
| 640 |
|
|
secondary_seq = Short.MAX_VALUE;
|
| 641 |
|
|
secondaryType = DECREASING;
|
| 642 |
|
|
}
|
| 643 |
|
|
else
|
| 644 |
|
|
{
|
| 645 |
|
|
secondary_seq = 0;
|
| 646 |
|
|
secondaryType = INCREASING;
|
| 647 |
|
|
}
|
| 648 |
|
|
tertiary_seq = 0;
|
| 649 |
|
|
equality_seq = 0;
|
| 650 |
|
|
inverseComparisons = false;
|
| 651 |
|
|
break;
|
| 652 |
|
|
case CollationSorter.GREATERS:
|
| 653 |
|
|
if (secondaryType == DECREASING)
|
| 654 |
|
|
secondary_seq--;
|
| 655 |
|
|
else
|
| 656 |
|
|
secondary_seq++;
|
| 657 |
|
|
tertiary_seq = 0;
|
| 658 |
|
|
equality_seq = 0;
|
| 659 |
|
|
break;
|
| 660 |
|
|
case CollationSorter.INVERSE_SECONDARY:
|
| 661 |
|
|
inverseComparisons = true;
|
| 662 |
|
|
continue element_loop;
|
| 663 |
|
|
case CollationSorter.GREATERT:
|
| 664 |
|
|
tertiary_seq++;
|
| 665 |
|
|
if (primary_seq == 0)
|
| 666 |
|
|
last_tertiary_seq = tertiary_seq;
|
| 667 |
|
|
equality_seq = 0;
|
| 668 |
|
|
break;
|
| 669 |
|
|
case CollationSorter.EQUAL:
|
| 670 |
|
|
equality_seq++;
|
| 671 |
|
|
break;
|
| 672 |
|
|
case CollationSorter.RESET:
|
| 673 |
|
|
throw new ParseException
|
| 674 |
|
|
("Invalid reached state 'RESET'. Internal error", elt.offset);
|
| 675 |
|
|
default:
|
| 676 |
|
|
throw new ParseException
|
| 677 |
|
|
("Invalid unknown state '" + elt.comparisonType + "'", elt.offset);
|
| 678 |
|
|
}
|
| 679 |
|
|
|
| 680 |
|
|
v.add(new CollationElement(elt.textElement, primary_seq,
|
| 681 |
|
|
secondary_seq, tertiary_seq,
|
| 682 |
|
|
equality_seq, elt.expansionOrdering, elt.ignore));
|
| 683 |
|
|
}
|
| 684 |
|
|
|
| 685 |
|
|
this.inverseAccentComparison = inverseComparisons;
|
| 686 |
|
|
|
| 687 |
|
|
ce_table = v.toArray();
|
| 688 |
|
|
|
| 689 |
|
|
last_primary_value = primary_seq+1;
|
| 690 |
|
|
last_tertiary_value = last_tertiary_seq+1;
|
| 691 |
|
|
}
|
| 692 |
|
|
|
| 693 |
|
|
/**
|
| 694 |
|
|
* Build a tree where all keys are the texts of collation elements and data is
|
| 695 |
|
|
* the collation element itself. The tree is used when extracting all prefix
|
| 696 |
|
|
* for a given text.
|
| 697 |
|
|
*/
|
| 698 |
|
|
private void buildPrefixAccess()
|
| 699 |
|
|
{
|
| 700 |
|
|
prefix_tree = new HashMap();
|
| 701 |
|
|
|
| 702 |
|
|
for (int i = 0; i < ce_table.length; i++)
|
| 703 |
|
|
{
|
| 704 |
|
|
CollationElement e = (CollationElement) ce_table[i];
|
| 705 |
|
|
|
| 706 |
|
|
prefix_tree.put(e.key, e);
|
| 707 |
|
|
}
|
| 708 |
|
|
}
|
| 709 |
|
|
|
| 710 |
|
|
/**
|
| 711 |
|
|
* This method returns an integer which indicates whether the first
|
| 712 |
|
|
* specified <code>String</code> is less than, greater than, or equal to
|
| 713 |
|
|
* the second. The value depends not only on the collation rules in
|
| 714 |
|
|
* effect, but also the strength and decomposition settings of this object.
|
| 715 |
|
|
*
|
| 716 |
|
|
* @param source The first <code>String</code> to compare.
|
| 717 |
|
|
* @param target A second <code>String</code> to compare to the first.
|
| 718 |
|
|
*
|
| 719 |
|
|
* @return A negative integer if source < target, a positive integer
|
| 720 |
|
|
* if source > target, or 0 if source == target.
|
| 721 |
|
|
*/
|
| 722 |
|
|
public int compare(String source, String target)
|
| 723 |
|
|
{
|
| 724 |
|
|
CollationElementIterator cs, ct;
|
| 725 |
|
|
CollationElement ord1block = null;
|
| 726 |
|
|
CollationElement ord2block = null;
|
| 727 |
|
|
boolean advance_block_1 = true;
|
| 728 |
|
|
boolean advance_block_2 = true;
|
| 729 |
|
|
|
| 730 |
|
|
cs = getCollationElementIterator(source);
|
| 731 |
|
|
ct = getCollationElementIterator(target);
|
| 732 |
|
|
|
| 733 |
|
|
for(;;)
|
| 734 |
|
|
{
|
| 735 |
|
|
int ord1;
|
| 736 |
|
|
int ord2;
|
| 737 |
|
|
|
| 738 |
|
|
/*
|
| 739 |
|
|
* We have to check whether the characters are ignorable.
|
| 740 |
|
|
* If it is the case then forget them.
|
| 741 |
|
|
*/
|
| 742 |
|
|
if (advance_block_1)
|
| 743 |
|
|
{
|
| 744 |
|
|
ord1block = cs.nextBlock();
|
| 745 |
|
|
if (ord1block != null && ord1block.ignore)
|
| 746 |
|
|
continue;
|
| 747 |
|
|
}
|
| 748 |
|
|
|
| 749 |
|
|
if (advance_block_2)
|
| 750 |
|
|
{
|
| 751 |
|
|
ord2block = ct.nextBlock();
|
| 752 |
|
|
if (ord2block != null && ord2block.ignore)
|
| 753 |
|
|
{
|
| 754 |
|
|
advance_block_1 = false;
|
| 755 |
|
|
continue;
|
| 756 |
|
|
}
|
| 757 |
|
|
}
|
| 758 |
|
|
else
|
| 759 |
|
|
advance_block_2 = true;
|
| 760 |
|
|
|
| 761 |
|
|
if (!advance_block_1)
|
| 762 |
|
|
advance_block_1 = true;
|
| 763 |
|
|
|
| 764 |
|
|
if (ord1block != null)
|
| 765 |
|
|
ord1 = ord1block.getValue();
|
| 766 |
|
|
else
|
| 767 |
|
|
{
|
| 768 |
|
|
if (ord2block == null)
|
| 769 |
|
|
return 0;
|
| 770 |
|
|
return -1;
|
| 771 |
|
|
}
|
| 772 |
|
|
|
| 773 |
|
|
if (ord2block == null)
|
| 774 |
|
|
return 1;
|
| 775 |
|
|
|
| 776 |
|
|
ord2 = ord2block.getValue();
|
| 777 |
|
|
|
| 778 |
|
|
// We know chars are totally equal, so skip
|
| 779 |
|
|
if (ord1 == ord2)
|
| 780 |
|
|
{
|
| 781 |
|
|
if (getStrength() == IDENTICAL)
|
| 782 |
|
|
if (!ord1block.key.equals(ord2block.key))
|
| 783 |
|
|
return ord1block.key.compareTo(ord2block.key);
|
| 784 |
|
|
continue;
|
| 785 |
|
|
}
|
| 786 |
|
|
|
| 787 |
|
|
// Check for primary strength differences
|
| 788 |
|
|
int prim1 = CollationElementIterator.primaryOrder(ord1);
|
| 789 |
|
|
int prim2 = CollationElementIterator.primaryOrder(ord2);
|
| 790 |
|
|
|
| 791 |
|
|
if (prim1 == 0 && getStrength() < TERTIARY)
|
| 792 |
|
|
{
|
| 793 |
|
|
advance_block_2 = false;
|
| 794 |
|
|
continue;
|
| 795 |
|
|
}
|
| 796 |
|
|
else if (prim2 == 0 && getStrength() < TERTIARY)
|
| 797 |
|
|
{
|
| 798 |
|
|
advance_block_1 = false;
|
| 799 |
|
|
continue;
|
| 800 |
|
|
}
|
| 801 |
|
|
|
| 802 |
|
|
if (prim1 < prim2)
|
| 803 |
|
|
return -1;
|
| 804 |
|
|
else if (prim1 > prim2)
|
| 805 |
|
|
return 1;
|
| 806 |
|
|
else if (getStrength() == PRIMARY)
|
| 807 |
|
|
continue;
|
| 808 |
|
|
|
| 809 |
|
|
// Check for secondary strength differences
|
| 810 |
|
|
int sec1 = CollationElementIterator.secondaryOrder(ord1);
|
| 811 |
|
|
int sec2 = CollationElementIterator.secondaryOrder(ord2);
|
| 812 |
|
|
|
| 813 |
|
|
if (sec1 < sec2)
|
| 814 |
|
|
return -1;
|
| 815 |
|
|
else if (sec1 > sec2)
|
| 816 |
|
|
return 1;
|
| 817 |
|
|
else if (getStrength() == SECONDARY)
|
| 818 |
|
|
continue;
|
| 819 |
|
|
|
| 820 |
|
|
// Check for tertiary differences
|
| 821 |
|
|
int tert1 = CollationElementIterator.tertiaryOrder(ord1);
|
| 822 |
|
|
int tert2 = CollationElementIterator.tertiaryOrder(ord2);
|
| 823 |
|
|
|
| 824 |
|
|
if (tert1 < tert2)
|
| 825 |
|
|
return -1;
|
| 826 |
|
|
else if (tert1 > tert2)
|
| 827 |
|
|
return 1;
|
| 828 |
|
|
else if (getStrength() == TERTIARY)
|
| 829 |
|
|
continue;
|
| 830 |
|
|
|
| 831 |
|
|
// Apparently JDK does this (at least for my test case).
|
| 832 |
|
|
return ord1block.key.compareTo(ord2block.key);
|
| 833 |
|
|
}
|
| 834 |
|
|
}
|
| 835 |
|
|
|
| 836 |
|
|
/**
|
| 837 |
|
|
* This method tests this object for equality against the specified
|
| 838 |
|
|
* object. This will be true if and only if the specified object is
|
| 839 |
|
|
* another reference to this object.
|
| 840 |
|
|
*
|
| 841 |
|
|
* @param obj The <code>Object</code> to compare against this object.
|
| 842 |
|
|
*
|
| 843 |
|
|
* @return <code>true</code> if the specified object is equal to this object,
|
| 844 |
|
|
* <code>false</code> otherwise.
|
| 845 |
|
|
*/
|
| 846 |
|
|
public boolean equals(Object obj)
|
| 847 |
|
|
{
|
| 848 |
|
|
if (obj == this)
|
| 849 |
|
|
return true;
|
| 850 |
|
|
else
|
| 851 |
|
|
return false;
|
| 852 |
|
|
}
|
| 853 |
|
|
|
| 854 |
|
|
/**
|
| 855 |
|
|
* This method builds a default collation element without invoking
|
| 856 |
|
|
* the database created from the rules passed to the constructor.
|
| 857 |
|
|
*
|
| 858 |
|
|
* @param c Character which needs a collation element.
|
| 859 |
|
|
* @return A valid brand new CollationElement instance.
|
| 860 |
|
|
*/
|
| 861 |
|
|
CollationElement getDefaultElement(char c)
|
| 862 |
|
|
{
|
| 863 |
|
|
int v;
|
| 864 |
|
|
|
| 865 |
|
|
// Preliminary support for generic accent sorting inversion (I don't know if all
|
| 866 |
|
|
// characters in the range should be sorted backward). This is the place
|
| 867 |
|
|
// to fix this if needed.
|
| 868 |
|
|
if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361))
|
| 869 |
|
|
v = 0x0361 - ((int) c - 0x02B9);
|
| 870 |
|
|
else
|
| 871 |
|
|
v = (short) c;
|
| 872 |
|
|
return new CollationElement("" + c, last_primary_value + v,
|
| 873 |
|
|
(short) 0, (short) 0, (short) 0, null, false);
|
| 874 |
|
|
}
|
| 875 |
|
|
|
| 876 |
|
|
/**
|
| 877 |
|
|
* This method builds a default collation element for an accented character
|
| 878 |
|
|
* without invoking the database created from the rules passed to the constructor.
|
| 879 |
|
|
*
|
| 880 |
|
|
* @param c Character which needs a collation element.
|
| 881 |
|
|
* @return A valid brand new CollationElement instance.
|
| 882 |
|
|
*/
|
| 883 |
|
|
CollationElement getDefaultAccentedElement(char c)
|
| 884 |
|
|
{
|
| 885 |
|
|
int v;
|
| 886 |
|
|
|
| 887 |
|
|
// Preliminary support for generic accent sorting inversion (I don't know if all
|
| 888 |
|
|
// characters in the range should be sorted backward). This is the place
|
| 889 |
|
|
// to fix this if needed.
|
| 890 |
|
|
if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361))
|
| 891 |
|
|
v = 0x0361 - ((int) c - 0x02B9);
|
| 892 |
|
|
else
|
| 893 |
|
|
v = (short) c;
|
| 894 |
|
|
return new CollationElement("" + c, (short) 0,
|
| 895 |
|
|
(short) 0, (short) (last_tertiary_value + v), (short) 0, null, false);
|
| 896 |
|
|
}
|
| 897 |
|
|
|
| 898 |
|
|
/**
|
| 899 |
|
|
* This method returns an instance for <code>CollationElementIterator</code>
|
| 900 |
|
|
* for the specified <code>String</code> under the collation rules for this
|
| 901 |
|
|
* object.
|
| 902 |
|
|
*
|
| 903 |
|
|
* @param source The <code>String</code> to return the
|
| 904 |
|
|
* <code>CollationElementIterator</code> instance for.
|
| 905 |
|
|
*
|
| 906 |
|
|
* @return A <code>CollationElementIterator</code> for the specified
|
| 907 |
|
|
* <code>String</code>.
|
| 908 |
|
|
*/
|
| 909 |
|
|
public CollationElementIterator getCollationElementIterator(String source)
|
| 910 |
|
|
{
|
| 911 |
|
|
return new CollationElementIterator(this, source);
|
| 912 |
|
|
}
|
| 913 |
|
|
|
| 914 |
|
|
/**
|
| 915 |
|
|
* This method returns an instance of <code>CollationElementIterator</code>
|
| 916 |
|
|
* for the <code>String</code> represented by the specified
|
| 917 |
|
|
* <code>CharacterIterator</code>.
|
| 918 |
|
|
*
|
| 919 |
|
|
* @param source The <code>CharacterIterator</code> with the desired <code>String</code>.
|
| 920 |
|
|
*
|
| 921 |
|
|
* @return A <code>CollationElementIterator</code> for the specified <code>String</code>.
|
| 922 |
|
|
*/
|
| 923 |
|
|
public CollationElementIterator getCollationElementIterator(CharacterIterator source)
|
| 924 |
|
|
{
|
| 925 |
|
|
StringBuffer expand = new StringBuffer("");
|
| 926 |
|
|
|
| 927 |
|
|
// Right now we assume that we will read from the beginning of the string.
|
| 928 |
|
|
for (char c = source.first();
|
| 929 |
|
|
c != CharacterIterator.DONE;
|
| 930 |
|
|
c = source.next())
|
| 931 |
|
|
decomposeCharacter(c, expand);
|
| 932 |
|
|
|
| 933 |
|
|
return getCollationElementIterator(expand.toString());
|
| 934 |
|
|
}
|
| 935 |
|
|
|
| 936 |
|
|
/**
|
| 937 |
|
|
* This method returns an instance of <code>CollationKey</code> for the
|
| 938 |
|
|
* specified <code>String</code>. The object returned will have a
|
| 939 |
|
|
* more efficient mechanism for its comparison function that could
|
| 940 |
|
|
* provide speed benefits if multiple comparisons are performed, such
|
| 941 |
|
|
* as during a sort.
|
| 942 |
|
|
*
|
| 943 |
|
|
* @param source The <code>String</code> to create a <code>CollationKey</code> for.
|
| 944 |
|
|
*
|
| 945 |
|
|
* @return A <code>CollationKey</code> for the specified <code>String</code>.
|
| 946 |
|
|
*/
|
| 947 |
|
|
public CollationKey getCollationKey(String source)
|
| 948 |
|
|
{
|
| 949 |
|
|
CollationElementIterator cei = getCollationElementIterator(source);
|
| 950 |
|
|
ArrayList vect = new ArrayList();
|
| 951 |
|
|
|
| 952 |
|
|
int ord = cei.next();
|
| 953 |
|
|
cei.reset(); //set to start of string
|
| 954 |
|
|
|
| 955 |
|
|
while (ord != CollationElementIterator.NULLORDER)
|
| 956 |
|
|
{
|
| 957 |
|
|
// If the primary order is null, it means this is an ignorable
|
| 958 |
|
|
// character.
|
| 959 |
|
|
if (CollationElementIterator.primaryOrder(ord) == 0)
|
| 960 |
|
|
{
|
| 961 |
|
|
ord = cei.next();
|
| 962 |
|
|
continue;
|
| 963 |
|
|
}
|
| 964 |
|
|
switch (getStrength())
|
| 965 |
|
|
{
|
| 966 |
|
|
case PRIMARY:
|
| 967 |
|
|
ord = CollationElementIterator.primaryOrder(ord);
|
| 968 |
|
|
break;
|
| 969 |
|
|
|
| 970 |
|
|
case SECONDARY:
|
| 971 |
|
|
ord = CollationElementIterator.primaryOrder(ord) << 8;
|
| 972 |
|
|
ord |= CollationElementIterator.secondaryOrder(ord);
|
| 973 |
|
|
|
| 974 |
|
|
default:
|
| 975 |
|
|
break;
|
| 976 |
|
|
}
|
| 977 |
|
|
|
| 978 |
|
|
vect.add(new Integer(ord));
|
| 979 |
|
|
ord = cei.next(); //increment to next key
|
| 980 |
|
|
}
|
| 981 |
|
|
|
| 982 |
|
|
Object[] objarr = vect.toArray();
|
| 983 |
|
|
byte[] key = new byte[objarr.length * 4];
|
| 984 |
|
|
|
| 985 |
|
|
for (int i = 0; i < objarr.length; i++)
|
| 986 |
|
|
{
|
| 987 |
|
|
int j = ((Integer) objarr[i]).intValue();
|
| 988 |
|
|
key [i * 4] = (byte) ((j & 0xFF000000) >> 24);
|
| 989 |
|
|
key [i * 4 + 1] = (byte) ((j & 0x00FF0000) >> 16);
|
| 990 |
|
|
key [i * 4 + 2] = (byte) ((j & 0x0000FF00) >> 8);
|
| 991 |
|
|
key [i * 4 + 3] = (byte) (j & 0x000000FF);
|
| 992 |
|
|
}
|
| 993 |
|
|
|
| 994 |
|
|
return new CollationKey(this, source, key);
|
| 995 |
|
|
}
|
| 996 |
|
|
|
| 997 |
|
|
/**
|
| 998 |
|
|
* This method returns a <code>String</code> containing the collation rules
|
| 999 |
|
|
* for this object.
|
| 1000 |
|
|
*
|
| 1001 |
|
|
* @return The collation rules for this object.
|
| 1002 |
|
|
*/
|
| 1003 |
|
|
public String getRules()
|
| 1004 |
|
|
{
|
| 1005 |
|
|
return rules;
|
| 1006 |
|
|
}
|
| 1007 |
|
|
|
| 1008 |
|
|
/**
|
| 1009 |
|
|
* This method returns a hash value for this object.
|
| 1010 |
|
|
*
|
| 1011 |
|
|
* @return A hash value for this object.
|
| 1012 |
|
|
*/
|
| 1013 |
|
|
public int hashCode()
|
| 1014 |
|
|
{
|
| 1015 |
|
|
return System.identityHashCode(this);
|
| 1016 |
|
|
}
|
| 1017 |
|
|
}
|