OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libjava/] [classpath/] [java/] [text/] [RuleBasedCollator.java] - Blame information for rev 771

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 771 jeremybenn
/* RuleBasedCollator.java -- Concrete Collator Class
2
   Copyright (C) 1998, 1999, 2000, 2001, 2003, 2004, 2005  Free Software Foundation, Inc.
3
 
4
This file is part of GNU Classpath.
5
 
6
GNU Classpath is free software; you can redistribute it and/or modify
7
it under the terms of the GNU General Public License as published by
8
the Free Software Foundation; either version 2, or (at your option)
9
any later version.
10
 
11
GNU Classpath is distributed in the hope that it will be useful, but
12
WITHOUT ANY WARRANTY; without even the implied warranty of
13
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
General Public License for more details.
15
 
16
You should have received a copy of the GNU General Public License
17
along with GNU Classpath; see the file COPYING.  If not, write to the
18
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
02110-1301 USA.
20
 
21
Linking this library statically or dynamically with other modules is
22
making a combined work based on this library.  Thus, the terms and
23
conditions of the GNU General Public License cover the whole
24
combination.
25
 
26
As a special exception, the copyright holders of this library give you
27
permission to link this library with independent modules to produce an
28
executable, regardless of the license terms of these independent
29
modules, and to copy and distribute the resulting executable under
30
terms of your choice, provided that you also meet, for each linked
31
independent module, the terms and conditions of the license of that
32
module.  An independent module is a module which is not derived from
33
or based on this library.  If you modify this library, you may extend
34
this exception to your version of the library, but you are not
35
obligated to do so.  If you do not wish to do so, delete this
36
exception statement from your version. */
37
 
38
 
39
package java.text;
40
 
41
import gnu.classpath.NotImplementedException;
42
 
43
import java.util.ArrayList;
44
import java.util.HashMap;
45
 
46
/* Written using "Java Class Libraries", 2nd edition, plus online
47
 * API docs for JDK 1.2 from http://www.javasoft.com.
48
 * Status: Believed complete and correct
49
 */
50
 
51
/**
52
 * This class is a concrete subclass of <code>Collator</code> suitable
53
 * for string collation in a wide variety of languages.  An instance of
54
 * this class is normally returned by the <code>getInstance</code> method
55
 * of <code>Collator</code> with rules predefined for the requested
56
 * locale.  However, an instance of this class can be created manually
57
 * with any desired rules.
58
 * <p>
59
 * Rules take the form of a <code>String</code> with the following syntax
60
 * <ul>
61
 * <li> Modifier: '@'</li>
62
 * <li> Relation: '&lt;' | ';' | ',' | '=' : &lt;text&gt;</li>
63
 * <li> Reset: '&amp;' : &lt;text&gt;</li>
64
 * </ul>
65
 * The modifier character indicates that accents sort backward as is the
66
 * case with French.  The modifier applies to all rules <b>after</b>
67
 * the modifier but before the next primary sequence. If placed at the end
68
 * of the sequence if applies to all unknown accented character.
69
 * The relational operators specify how the text
70
 * argument relates to the previous term.  The relation characters have
71
 * the following meanings:
72
 * <ul>
73
 * <li>'&lt;' - The text argument is greater than the prior term at the primary
74
 * difference level.</li>
75
 * <li>';' - The text argument is greater than the prior term at the secondary
76
 * difference level.</li>
77
 * <li>',' - The text argument is greater than the prior term at the tertiary
78
 * difference level.</li>
79
 * <li>'=' - The text argument is equal to the prior term</li>
80
 * </ul>
81
 * <p>
82
 * As for the text argument itself, this is any sequence of Unicode
83
 * characters not in the following ranges: 0x0009-0x000D, 0x0020-0x002F,
84
 * 0x003A-0x0040, 0x005B-0x0060, and 0x007B-0x007E. If these characters are
85
 * desired, they must be enclosed in single quotes.  If any whitespace is
86
 * encountered, it is ignored.  (For example, "a b" is equal to "ab").
87
 * <p>
88
 * The reset operation inserts the following rule at the point where the
89
 * text argument to it exists in the previously declared rule string.  This
90
 * makes it easy to add new rules to an existing string by simply including
91
 * them in a reset sequence at the end.  Note that the text argument, or
92
 * at least the first character of it, must be present somewhere in the
93
 * previously declared rules in order to be inserted properly.  If this
94
 * is not satisfied, a <code>ParseException</code> will be thrown.
95
 * <p>
96
 * This system of configuring <code>RuleBasedCollator</code> is needlessly
97
 * complex and the people at Taligent who developed it (along with the folks
98
 * at Sun who accepted it into the Java standard library) deserve a slow
99
 * and agonizing death.
100
 * <p>
101
 * Here are a couple of example of rule strings:
102
 * <p>
103
 * "&lt; a &lt; b &lt; c" - This string says that a is greater than b which is
104
 * greater than c, with all differences being primary differences.
105
 * <p>
106
 * "&lt; a,A &lt; b,B &lt; c,C" - This string says that 'A' is greater than 'a' with
107
 * a tertiary strength comparison.  Both 'b' and 'B' are greater than 'a' and
108
 * 'A' during a primary strength comparison.  But 'B' is greater than 'b'
109
 * under a tertiary strength comparison.
110
 * <p>
111
 * "&lt; a &lt; c &amp; a &lt; b " - This sequence is identical in function to the
112
 * "&lt; a &lt; b &lt; c" rule string above.  The '&amp;' reset symbol indicates that
113
 * the rule "&lt; b" is to be inserted after the text argument "a" in the
114
 * previous rule string segment.
115
 * <p>
116
 * "&lt; a &lt; b &amp; y &lt; z" - This is an error.  The character 'y' does not appear
117
 * anywhere in the previous rule string segment so the rule following the
118
 * reset rule cannot be inserted.
119
 * <p>
120
 * "&lt; a &amp; A @ &lt; e &amp; E &lt; f&amp; F" - This sequence is equivalent to the following
121
 * "&lt; a &amp; A &lt; E &amp; e &lt; f &amp; F".
122
 * <p>
123
 * For a description of the various comparison strength types, see the
124
 * documentation for the <code>Collator</code> class.
125
 * <p>
126
 * As an additional complication to this already overly complex rule scheme,
127
 * if any characters precede the first rule, these characters are considered
128
 * ignorable.  They will be treated as if they did not exist during
129
 * comparisons.  For example, "- &lt; a &lt; b ..." would make '-' an ignorable
130
 * character such that the strings "high-tech" and "hightech" would
131
 * be considered identical.
132
 * <p>
133
 * A <code>ParseException</code> will be thrown for any of the following
134
 * conditions:
135
 * <ul>
136
 * <li>Unquoted punctuation characters in a text argument.</li>
137
 * <li>A relational or reset operator not followed by a text argument</li>
138
 * <li>A reset operator where the text argument is not present in
139
 * the previous rule string section.</li>
140
 * </ul>
141
 *
142
 * @author Aaron M. Renn (arenn@urbanophile.com)
143
 * @author Tom Tromey (tromey@cygnus.com)
144
 * @author Guilhem Lavaux (guilhem@kaffe.org)
145
 */
146
public class RuleBasedCollator extends Collator
147
{
148
  /**
149
   * This class describes what rank has a character (or a sequence of characters)
150
   * in the lexicographic order. Each element in a rule has a collation element.
151
   */
152
  static final class CollationElement
153
  {
154
    final String key;
155
    final int primary;
156
    final short secondary;
157
    final short tertiary;
158
    final short equality;
159
    final boolean ignore;
160
    final String expansion;
161
 
162
    CollationElement(String key, int primary, short secondary, short tertiary,
163
                     short equality, String expansion, boolean ignore)
164
    {
165
      this.key = key;
166
      this.primary = primary;
167
      this.secondary = secondary;
168
      this.tertiary = tertiary;
169
      this.equality = equality;
170
      this.ignore = ignore;
171
      this.expansion = expansion;
172
    }
173
 
174
    int getValue()
175
    {
176
      return (primary << 16) + (secondary << 8) + tertiary;
177
    }
178
  }
179
 
180
  /**
181
   * Basic collation instruction (internal format) to build the series of
182
   * collation elements. It contains an instruction which specifies the new
183
   * state of the generator. The sequence of instruction should not contain
184
   * RESET (it is used by
185
   * {@link #mergeRules(int,java.lang.String,java.util.ArrayList,java.util.ArrayList)})
186
   * as a temporary state while merging two sets of instructions.
187
   */
188
  private static final class CollationSorter
189
  {
190
    static final int GREATERP = 0;
191
    static final int GREATERS = 1;
192
    static final int GREATERT = 2;
193
    static final int EQUAL = 3;
194
    static final int RESET = 4;
195
    static final int INVERSE_SECONDARY = 5;
196
 
197
    final int comparisonType;
198
    final String textElement;
199
    final int hashText;
200
    final int offset;
201
    final boolean ignore;
202
 
203
    String expansionOrdering;
204
 
205
    private CollationSorter(final int comparisonType, final String textElement,
206
                            final int offset, final boolean ignore)
207
    {
208
      this.comparisonType = comparisonType;
209
      this.textElement = textElement;
210
      this.offset = offset;
211
      this.ignore = ignore;
212
      hashText = textElement.hashCode();
213
    }
214
  }
215
 
216
  /**
217
   * This is the original rule string.
218
   */
219
  private String rules;
220
 
221
  /**
222
   * This is the table of collation element values
223
   */
224
  private CollationElement[] ce_table;
225
 
226
  /**
227
   * Quick-prefix finder.
228
   */
229
  HashMap<String,CollationElement> prefix_tree;
230
 
231
  /**
232
   * This is the value of the last sequence entered into
233
   * <code>ce_table</code>. It is used to compute the
234
   * ordering value of unspecified character.
235
   */
236
  private int last_primary_value;
237
 
238
  /**
239
   * This is the value of the last secondary sequence of the
240
   * primary 0, entered into
241
   * <code>ce_table</code>. It is used to compute the
242
   * ordering value of an unspecified accented character.
243
   */
244
  private int last_tertiary_value;
245
 
246
  /**
247
   * This variable is true if accents need to be sorted
248
   * in the other direction.
249
   */
250
  private boolean inverseAccentComparison;
251
 
252
  /**
253
   * This collation element is special to unknown sequence.
254
   * The JDK uses it to mark and sort the characters which has
255
   * no collation rules.
256
   */
257
  static final CollationElement SPECIAL_UNKNOWN_SEQ =
258
    new CollationElement("", (short) 32767, (short) 0, (short) 0,
259
                         (short) 0, null, false);
260
 
261
  /**
262
   * This method initializes a new instance of <code>RuleBasedCollator</code>
263
   * with the specified collation rules.  Note that an application normally
264
   * obtains an instance of <code>RuleBasedCollator</code> by calling the
265
   * <code>getInstance</code> method of <code>Collator</code>.  That method
266
   * automatically loads the proper set of rules for the desired locale.
267
   *
268
   * @param rules The collation rule string.
269
   *
270
   * @exception ParseException If the rule string contains syntax errors.
271
   */
272
  public RuleBasedCollator(String rules) throws ParseException
273
  {
274
    if (rules.equals(""))
275
      throw new ParseException("empty rule set", 0);
276
 
277
    this.rules = rules;
278
 
279
    buildCollationVector(parseString(rules));
280
    buildPrefixAccess();
281
  }
282
 
283
  /**
284
   * This method returns the number of common characters at the beginning
285
   * of the string of the two parameters.
286
   *
287
   * @param prefix A string considered as a prefix to test against
288
   * the other string.
289
   * @param s A string to test the prefix against.
290
   * @return The number of common characters.
291
   */
292
  static int findPrefixLength(String prefix, String s)
293
  {
294
    int index;
295
    int len = prefix.length();
296
 
297
    for (index = 0; index < len && index < s.length(); ++index)
298
      {
299
        if (prefix.charAt(index) != s.charAt(index))
300
          return index;
301
      }
302
 
303
 
304
    return index;
305
  }
306
 
307
  /**
308
   * Here we are merging two sets of sorting instructions: 'patch' into 'main'. This methods
309
   * checks whether it is possible to find an anchor point for the rules to be merged and
310
   * then insert them at that precise point.
311
   *
312
   * @param offset Offset in the string containing rules of the beginning of the rules
313
   * being merged in.
314
   * @param starter Text of the rules being merged.
315
   * @param main Repository of all already parsed rules.
316
   * @param patch Rules to be merged into the repository.
317
   * @throws ParseException if it is impossible to find an anchor point for the new rules.
318
   */
319
  private void mergeRules(int offset, String starter, ArrayList<CollationSorter> main,
320
                          ArrayList<CollationSorter> patch)
321
    throws ParseException
322
  {
323
    int insertion_point = -1;
324
    int max_length = 0;
325
 
326
    /* We must check that no rules conflict with another already present. If it
327
     * is the case delete the old rule.
328
     */
329
 
330
    /* For the moment good old O(N^2) algorithm.
331
     */
332
    for (int i = 0; i < patch.size(); i++)
333
      {
334
        int j = 0;
335
 
336
        while (j < main.size())
337
          {
338
            CollationSorter rule1 = patch.get(i);
339
            CollationSorter rule2 = main.get(j);
340
 
341
            if (rule1.textElement.equals(rule2.textElement))
342
              main.remove(j);
343
            else
344
              j++;
345
          }
346
      }
347
 
348
    // Find the insertion point... O(N)
349
    for (int i = 0; i < main.size(); i++)
350
      {
351
        CollationSorter sorter = main.get(i);
352
        int length = findPrefixLength(starter, sorter.textElement);
353
 
354
        if (length > max_length)
355
          {
356
            max_length = length;
357
            insertion_point = i+1;
358
          }
359
      }
360
 
361
    if (insertion_point < 0)
362
      throw new ParseException("no insertion point found for " + starter, offset);
363
 
364
    if (max_length < starter.length())
365
      {
366
        /*
367
         * We need to expand the first entry. It must be sorted
368
         * like if it was the reference key itself (like the spec
369
         * said. So the first entry is special: the element is
370
         * replaced by the specified text element for the sorting.
371
         * This text replace the old one for comparisons. However
372
         * to preserve the behaviour we replace the first key (corresponding
373
         * to the found prefix) by a new code rightly ordered in the
374
         * sequence. The rest of the subsequence must be appended
375
         * to the end of the sequence.
376
         */
377
        CollationSorter sorter = patch.get(0);
378
 
379
        sorter.expansionOrdering = starter.substring(max_length); // Skip the first good prefix element
380
 
381
        main.add(insertion_point, sorter);
382
 
383
        /*
384
         * This is a new set of rules. Append to the list.
385
         */
386
        patch.remove(0);
387
        insertion_point++;
388
      }
389
 
390
    // Now insert all elements of patch at the insertion point.
391
    for (int i = 0; i < patch.size(); i++)
392
      main.add(i+insertion_point, patch.get(i));
393
  }
394
 
395
  /**
396
   * This method parses a string and build a set of sorting instructions. The parsing
397
   * may only be partial on the case the rules are to be merged sometime later.
398
   *
399
   * @param stop_on_reset If this parameter is true then the parser stops when it
400
   * encounters a reset instruction. In the other case, it tries to parse the subrules
401
   * and merged it in the same repository.
402
   * @param v Output vector for the set of instructions.
403
   * @param base_offset Offset in the string to begin parsing.
404
   * @param rules Rules to be parsed.
405
   * @return -1 if the parser reached the end of the string, an integer representing the
406
   * offset in the string at which it stopped parsing.
407
   * @throws ParseException if something turned wrong during the parsing. To get details
408
   * decode the message.
409
   */
410
  private int subParseString(boolean stop_on_reset, ArrayList<CollationSorter> v,
411
                             int base_offset, String rules)
412
    throws ParseException
413
  {
414
    boolean ignoreChars = (base_offset == 0);
415
    int operator = -1;
416
    StringBuilder sb = new StringBuilder();
417
    boolean doubleQuote = false;
418
    boolean eatingChars = false;
419
    boolean nextIsModifier = false;
420
    boolean isModifier = false;
421
    int i;
422
 
423
main_parse_loop:
424
    for (i = 0; i < rules.length(); i++)
425
      {
426
        char c = rules.charAt(i);
427
        int type = -1;
428
 
429
        if (!eatingChars &&
430
            ((c >= 0x09 && c <= 0x0D) || (c == 0x20)))
431
              continue;
432
 
433
        isModifier = nextIsModifier;
434
        nextIsModifier = false;
435
 
436
        if (eatingChars && c != '\'')
437
          {
438
            doubleQuote = false;
439
            sb.append(c);
440
            continue;
441
          }
442
        if (doubleQuote && eatingChars)
443
          {
444
            sb.append(c);
445
            doubleQuote = false;
446
            continue;
447
          }
448
 
449
        switch (c)
450
          {
451
          case '!':
452
            throw new ParseException
453
              ("Modifier '!' is not yet supported by Classpath", i + base_offset);
454
          case '<':
455
            type = CollationSorter.GREATERP;
456
            break;
457
          case ';':
458
            type = CollationSorter.GREATERS;
459
            break;
460
          case ',':
461
            type = CollationSorter.GREATERT;
462
            break;
463
          case '=':
464
            type = CollationSorter.EQUAL;
465
            break;
466
          case '\'':
467
            eatingChars = !eatingChars;
468
            doubleQuote = true;
469
            break;
470
          case '@':
471
            if (ignoreChars)
472
              throw new ParseException
473
                ("comparison list has not yet been started. You may only use"
474
                 + "(<,;=&)", i + base_offset);
475
            // Inverse the order of secondaries from now on.
476
            nextIsModifier = true;
477
            type = CollationSorter.INVERSE_SECONDARY;
478
            break;
479
          case '&':
480
            type = CollationSorter.RESET;
481
            if (stop_on_reset)
482
              break main_parse_loop;
483
            break;
484
          default:
485
            if (operator < 0)
486
              throw new ParseException
487
                ("operator missing at " + (i + base_offset), i + base_offset);
488
            if (! eatingChars
489
                && ((c >= 0x21 && c <= 0x2F)
490
                    || (c >= 0x3A && c <= 0x40)
491
                    || (c >= 0x5B && c <= 0x60)
492
                    || (c >= 0x7B && c <= 0x7E)))
493
              throw new ParseException
494
                ("unquoted punctuation character '" + c + "'", i + base_offset);
495
 
496
            //type = ignoreChars ? CollationSorter.IGNORE : -1;
497
            sb.append(c);
498
            break;
499
          }
500
 
501
        if (type  < 0)
502
          continue;
503
 
504
        if (operator < 0)
505
          {
506
            operator = type;
507
            continue;
508
          }
509
 
510
        if (sb.length() == 0 && !isModifier)
511
          throw new ParseException
512
            ("text element empty at " + (i+base_offset), i+base_offset);
513
 
514
        if (operator == CollationSorter.RESET)
515
          {
516
            /* Reposition in the sorting list at the position
517
             * indicated by the text element.
518
             */
519
            String subrules = rules.substring(i);
520
            ArrayList<CollationSorter> sorted_rules = new ArrayList<CollationSorter>();
521
            int idx;
522
 
523
            // Parse the subrules but do not iterate through all
524
            // sublist. This is the privilege of the first call.
525
            idx = subParseString(true, sorted_rules, base_offset+i, subrules);
526
 
527
            // Merge new parsed rules into the list.
528
            mergeRules(base_offset+i, sb.toString(), v, sorted_rules);
529
            sb.setLength(0);
530
 
531
            // Reset state to none.
532
            operator = -1;
533
            type = -1;
534
            // We have found a new subrule at 'idx' but it has not been parsed.
535
            if (idx >= 0)
536
              {
537
                i += idx-1;
538
                continue main_parse_loop;
539
              }
540
            else
541
                // No more rules.
542
                break main_parse_loop;
543
          }
544
 
545
        String textElement = sb.toString();
546
        if (operator == CollationSorter.GREATERP)
547
          ignoreChars = false;
548
        CollationSorter sorter = new CollationSorter(operator, textElement,
549
                                                     base_offset + rules.length(),
550
                                                     ignoreChars);
551
        sb.setLength(0);
552
 
553
        v.add(sorter);
554
        operator = type;
555
      }
556
 
557
    if (operator >= 0)
558
      {
559
        int pos = rules.length() + base_offset;
560
 
561
        if ((sb.length() != 0 && nextIsModifier)
562
            || (sb.length() == 0 && !nextIsModifier && !eatingChars))
563
          throw new ParseException("text element empty at " + pos, pos);
564
 
565
        if (operator == CollationSorter.GREATERP)
566
          ignoreChars = false;
567
 
568
        CollationSorter sorter = new CollationSorter(operator, sb.toString(),
569
                                                     base_offset+pos, ignoreChars);
570
        v.add(sorter);
571
      }
572
 
573
    if (i == rules.length())
574
      return -1;
575
    else
576
      return i;
577
  }
578
 
579
  /**
580
   * This method creates a copy of this object.
581
   *
582
   * @return A copy of this object.
583
   */
584
  public Object clone()
585
  {
586
    return super.clone();
587
  }
588
 
589
  /**
590
   * This method completely parses a string 'rules' containing sorting rules.
591
   *
592
   * @param rules String containing the rules to be parsed.
593
   * @return A set of sorting instructions stored in a Vector.
594
   * @throws ParseException if something turned wrong during the parsing. To get details
595
   * decode the message.
596
   */
597
  private ArrayList<CollationSorter> parseString(String rules)
598
    throws ParseException
599
  {
600
    ArrayList<CollationSorter> v = new ArrayList<CollationSorter>();
601
 
602
    // result of the first subParseString is not absolute (may be -1 or a
603
    // positive integer). But we do not care.
604
    subParseString(false, v, 0, rules);
605
 
606
    return v;
607
  }
608
 
609
  /**
610
   * This method uses the sorting instructions built by {@link #parseString}
611
   * to build collation elements which can be directly used to sort strings.
612
   *
613
   * @param parsedElements Parsed instructions stored in a ArrayList.
614
   * @throws ParseException if the order of the instructions are not valid.
615
   */
616
  private void buildCollationVector(ArrayList<CollationSorter> parsedElements)
617
    throws ParseException
618
  {
619
    int primary_seq = 0;
620
    int last_tertiary_seq = 0;
621
    short secondary_seq = 0;
622
    short tertiary_seq = 0;
623
    short equality_seq = 0;
624
    boolean inverseComparisons = false;
625
    final boolean DECREASING = false;
626
    final boolean INCREASING = true;
627
    boolean secondaryType = INCREASING;
628
    ArrayList<CollationElement> v = new ArrayList<CollationElement>();
629
 
630
    // elts is completely sorted.
631
element_loop:
632
    for (int i = 0; i < parsedElements.size(); i++)
633
      {
634
        CollationSorter elt = parsedElements.get(i);
635
 
636
        switch (elt.comparisonType)
637
          {
638
          case CollationSorter.GREATERP:
639
            primary_seq++;
640
            if (inverseComparisons)
641
              {
642
                secondary_seq = Short.MAX_VALUE;
643
                secondaryType = DECREASING;
644
              }
645
            else
646
              {
647
                secondary_seq = 0;
648
                secondaryType = INCREASING;
649
              }
650
            tertiary_seq = 0;
651
            equality_seq = 0;
652
            inverseComparisons = false;
653
            break;
654
          case CollationSorter.GREATERS:
655
            if (secondaryType == DECREASING)
656
              secondary_seq--;
657
            else
658
              secondary_seq++;
659
            tertiary_seq = 0;
660
            equality_seq = 0;
661
            break;
662
          case CollationSorter.INVERSE_SECONDARY:
663
            inverseComparisons = true;
664
            continue element_loop;
665
          case CollationSorter.GREATERT:
666
            tertiary_seq++;
667
            if (primary_seq == 0)
668
              last_tertiary_seq = tertiary_seq;
669
            equality_seq = 0;
670
            break;
671
          case CollationSorter.EQUAL:
672
            equality_seq++;
673
            break;
674
          case CollationSorter.RESET:
675
            throw new ParseException
676
              ("Invalid reached state 'RESET'. Internal error", elt.offset);
677
          default:
678
            throw new ParseException
679
              ("Invalid unknown state '" + elt.comparisonType + "'", elt.offset);
680
          }
681
 
682
        v.add(new CollationElement(elt.textElement, primary_seq,
683
                                   secondary_seq, tertiary_seq,
684
                                   equality_seq, elt.expansionOrdering, elt.ignore));
685
      }
686
 
687
    this.inverseAccentComparison = inverseComparisons;
688
 
689
    ce_table = v.toArray(new CollationElement[v.size()]);
690
 
691
    last_primary_value = primary_seq+1;
692
    last_tertiary_value = last_tertiary_seq+1;
693
  }
694
 
695
  /**
696
   * Build a tree where all keys are the texts of collation elements and data is
697
   * the collation element itself. The tree is used when extracting all prefix
698
   * for a given text.
699
   */
700
  private void buildPrefixAccess()
701
  {
702
    prefix_tree = new HashMap<String,CollationElement>();
703
 
704
    for (int i = 0; i < ce_table.length; i++)
705
      {
706
        CollationElement e = ce_table[i];
707
 
708
        prefix_tree.put(e.key, e);
709
      }
710
  }
711
 
712
  /**
713
   * This method returns an integer which indicates whether the first
714
   * specified <code>String</code> is less than, greater than, or equal to
715
   * the second.  The value depends not only on the collation rules in
716
   * effect, but also the strength and decomposition settings of this object.
717
   *
718
   * @param source The first <code>String</code> to compare.
719
   * @param target A second <code>String</code> to compare to the first.
720
   *
721
   * @return A negative integer if source &lt; target, a positive integer
722
   * if source &gt; target, or 0 if source == target.
723
   */
724
  public int compare(String source, String target)
725
  {
726
    CollationElementIterator cs, ct;
727
    CollationElement ord1block = null;
728
    CollationElement ord2block = null;
729
    boolean advance_block_1 = true;
730
    boolean advance_block_2 = true;
731
 
732
    cs = getCollationElementIterator(source);
733
    ct = getCollationElementIterator(target);
734
 
735
    for(;;)
736
      {
737
        int ord1;
738
        int ord2;
739
 
740
        /*
741
         * We have to check whether the characters are ignorable.
742
         * If it is the case then forget them.
743
         */
744
        if (advance_block_1)
745
          {
746
            ord1block = cs.nextBlock();
747
            if (ord1block != null && ord1block.ignore)
748
              continue;
749
          }
750
 
751
        if (advance_block_2)
752
          {
753
            ord2block = ct.nextBlock();
754
            if (ord2block != null && ord2block.ignore)
755
              {
756
                advance_block_1 = false;
757
                continue;
758
              }
759
         }
760
        else
761
          advance_block_2 = true;
762
 
763
        if (!advance_block_1)
764
          advance_block_1 = true;
765
 
766
        if (ord1block != null)
767
          ord1 = ord1block.getValue();
768
        else
769
          {
770
            if (ord2block == null)
771
              return 0;
772
            return -1;
773
          }
774
 
775
        if (ord2block == null)
776
          return 1;
777
 
778
        ord2 = ord2block.getValue();
779
 
780
        // We know chars are totally equal, so skip
781
        if (ord1 == ord2)
782
          {
783
            if (getStrength() == IDENTICAL)
784
              if (!ord1block.key.equals(ord2block.key))
785
                return ord1block.key.compareTo(ord2block.key);
786
            continue;
787
          }
788
 
789
        // Check for primary strength differences
790
        int prim1 = CollationElementIterator.primaryOrder(ord1);
791
        int prim2 = CollationElementIterator.primaryOrder(ord2);
792
 
793
        if (prim1 == 0 && getStrength() < TERTIARY)
794
          {
795
            advance_block_2 = false;
796
            continue;
797
          }
798
        else if (prim2 == 0 && getStrength() < TERTIARY)
799
          {
800
            advance_block_1 = false;
801
            continue;
802
          }
803
 
804
        if (prim1 < prim2)
805
          return -1;
806
        else if (prim1 > prim2)
807
          return 1;
808
        else if (getStrength() == PRIMARY)
809
          continue;
810
 
811
        // Check for secondary strength differences
812
        int sec1 = CollationElementIterator.secondaryOrder(ord1);
813
        int sec2 = CollationElementIterator.secondaryOrder(ord2);
814
 
815
        if (sec1 < sec2)
816
          return -1;
817
        else if (sec1 > sec2)
818
          return 1;
819
        else if (getStrength() == SECONDARY)
820
          continue;
821
 
822
        // Check for tertiary differences
823
        int tert1 = CollationElementIterator.tertiaryOrder(ord1);
824
        int tert2 = CollationElementIterator.tertiaryOrder(ord2);
825
 
826
        if (tert1 < tert2)
827
          return -1;
828
        else if (tert1 > tert2)
829
          return 1;
830
        else if (getStrength() == TERTIARY)
831
          continue;
832
 
833
        // Apparently JDK does this (at least for my test case).
834
        return ord1block.key.compareTo(ord2block.key);
835
      }
836
  }
837
 
838
  /**
839
   * This method tests this object for equality against the specified
840
   * object.  This will be true if and only if the specified object is
841
   * another reference to this object.
842
   *
843
   * @param obj The <code>Object</code> to compare against this object.
844
   *
845
   * @return <code>true</code> if the specified object is equal to this object,
846
   * <code>false</code> otherwise.
847
   */
848
  public boolean equals(Object obj)
849
  {
850
    if (obj == this)
851
      return true;
852
    else
853
      return false;
854
  }
855
 
856
  /**
857
   * This method builds a default collation element without invoking
858
   * the database created from the rules passed to the constructor.
859
   *
860
   * @param c Character which needs a collation element.
861
   * @return A valid brand new CollationElement instance.
862
   */
863
  CollationElement getDefaultElement(char c)
864
  {
865
    int v;
866
 
867
    // Preliminary support for generic accent sorting inversion (I don't know if all
868
    // characters in the range should be sorted backward). This is the place
869
    // to fix this if needed.
870
    if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361))
871
      v = 0x0361 - ((int) c - 0x02B9);
872
    else
873
      v = (short) c;
874
    return new CollationElement("" + c, last_primary_value + v,
875
                                (short) 0, (short) 0, (short) 0, null, false);
876
  }
877
 
878
  /**
879
   * This method builds a default collation element for an accented character
880
   * without invoking the database created from the rules passed to the constructor.
881
   *
882
   * @param c Character which needs a collation element.
883
   * @return A valid brand new CollationElement instance.
884
   */
885
  CollationElement getDefaultAccentedElement(char c)
886
  {
887
    int v;
888
 
889
    // Preliminary support for generic accent sorting inversion (I don't know if all
890
    // characters in the range should be sorted backward). This is the place
891
    // to fix this if needed.
892
    if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361))
893
      v = 0x0361 - ((int) c - 0x02B9);
894
    else
895
      v = (short) c;
896
    return new CollationElement("" + c, (short) 0,
897
                                (short) 0, (short) (last_tertiary_value + v), (short) 0, null, false);
898
  }
899
 
900
  /**
901
   * This method returns an instance for <code>CollationElementIterator</code>
902
   * for the specified <code>String</code> under the collation rules for this
903
   * object.
904
   *
905
   * @param source The <code>String</code> to return the
906
   * <code>CollationElementIterator</code> instance for.
907
   *
908
   * @return A <code>CollationElementIterator</code> for the specified
909
   * <code>String</code>.
910
   */
911
  public CollationElementIterator getCollationElementIterator(String source)
912
  {
913
    return new CollationElementIterator(this, source);
914
  }
915
 
916
  /**
917
   * This method returns an instance of <code>CollationElementIterator</code>
918
   * for the <code>String</code> represented by the specified
919
   * <code>CharacterIterator</code>.
920
   *
921
   * @param source The <code>CharacterIterator</code> with the desired <code>String</code>.
922
   *
923
   * @return A <code>CollationElementIterator</code> for the specified <code>String</code>.
924
   */
925
  public CollationElementIterator getCollationElementIterator(CharacterIterator source)
926
  {
927
    return new CollationElementIterator(this, source);
928
  }
929
 
930
  /**
931
   * This method returns an instance of <code>CollationKey</code> for the
932
   * specified <code>String</code>.  The object returned will have a
933
   * more efficient mechanism for its comparison function that could
934
   * provide speed benefits if multiple comparisons are performed, such
935
   * as during a sort.
936
   *
937
   * @param source The <code>String</code> to create a <code>CollationKey</code> for.
938
   *
939
   * @return A <code>CollationKey</code> for the specified <code>String</code>.
940
   */
941
  public CollationKey getCollationKey(String source)
942
  {
943
    CollationElementIterator cei = getCollationElementIterator(source);
944
    ArrayList<Integer> vect = new ArrayList<Integer>();
945
 
946
    int ord = cei.next();
947
    cei.reset(); //set to start of string
948
 
949
    while (ord != CollationElementIterator.NULLORDER)
950
      {
951
        // If the primary order is null, it means this is an ignorable
952
        // character.
953
        if (CollationElementIterator.primaryOrder(ord) == 0)
954
          {
955
            ord = cei.next();
956
            continue;
957
          }
958
        switch (getStrength())
959
          {
960
            case PRIMARY:
961
              ord = CollationElementIterator.primaryOrder(ord);
962
              break;
963
 
964
            case SECONDARY:
965
              ord = CollationElementIterator.primaryOrder(ord) << 8;
966
              ord |= CollationElementIterator.secondaryOrder(ord);
967
 
968
            default:
969
               break;
970
          }
971
 
972
        vect.add(Integer.valueOf(ord));
973
        ord = cei.next(); //increment to next key
974
      }
975
 
976
    Integer[] objarr = vect.toArray(new Integer[vect.size()]);
977
    byte[] key = new byte[objarr.length * 4];
978
 
979
    for (int i = 0; i < objarr.length; i++)
980
      {
981
        int j = objarr[i].intValue();
982
        key [i * 4] = (byte) ((j & 0xFF000000) >> 24);
983
        key [i * 4 + 1] = (byte) ((j & 0x00FF0000) >> 16);
984
        key [i * 4 + 2] = (byte) ((j & 0x0000FF00) >> 8);
985
        key [i * 4 + 3] = (byte) (j & 0x000000FF);
986
      }
987
 
988
    return new CollationKey(this, source, key);
989
  }
990
 
991
  /**
992
   * This method returns a <code>String</code> containing the collation rules
993
   * for this object.
994
   *
995
   * @return The collation rules for this object.
996
   */
997
  public String getRules()
998
  {
999
    return rules;
1000
  }
1001
 
1002
  /**
1003
   * This method returns a hash value for this object.
1004
   *
1005
   * @return A hash value for this object.
1006
   */
1007
  public int hashCode()
1008
  {
1009
    return System.identityHashCode(this);
1010
  }
1011
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.