OpenCores
URL https://opencores.org/ocsvn/scarts/scarts/trunk

Subversion Repositories scarts

[/] [scarts/] [trunk/] [toolchain/] [scarts-gcc/] [gcc-4.1.1/] [libjava/] [classpath/] [java/] [text/] [RuleBasedCollator.java] - Blame information for rev 14

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 14 jlechner
/* RuleBasedCollator.java -- Concrete Collator Class
2
   Copyright (C) 1998, 1999, 2000, 2001, 2003, 2004, 2005  Free Software Foundation, Inc.
3
 
4
This file is part of GNU Classpath.
5
 
6
GNU Classpath is free software; you can redistribute it and/or modify
7
it under the terms of the GNU General Public License as published by
8
the Free Software Foundation; either version 2, or (at your option)
9
any later version.
10
 
11
GNU Classpath is distributed in the hope that it will be useful, but
12
WITHOUT ANY WARRANTY; without even the implied warranty of
13
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
General Public License for more details.
15
 
16
You should have received a copy of the GNU General Public License
17
along with GNU Classpath; see the file COPYING.  If not, write to the
18
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
02110-1301 USA.
20
 
21
Linking this library statically or dynamically with other modules is
22
making a combined work based on this library.  Thus, the terms and
23
conditions of the GNU General Public License cover the whole
24
combination.
25
 
26
As a special exception, the copyright holders of this library give you
27
permission to link this library with independent modules to produce an
28
executable, regardless of the license terms of these independent
29
modules, and to copy and distribute the resulting executable under
30
terms of your choice, provided that you also meet, for each linked
31
independent module, the terms and conditions of the license of that
32
module.  An independent module is a module which is not derived from
33
or based on this library.  If you modify this library, you may extend
34
this exception to your version of the library, but you are not
35
obligated to do so.  If you do not wish to do so, delete this
36
exception statement from your version. */
37
 
38
 
39
package java.text;
40
 
41
import java.util.ArrayList;
42
import java.util.HashMap;
43
 
44
/* Written using "Java Class Libraries", 2nd edition, plus online
45
 * API docs for JDK 1.2 from http://www.javasoft.com.
46
 * Status: Believed complete and correct
47
 */
48
 
49
/**
50
 * This class is a concrete subclass of <code>Collator</code> suitable
51
 * for string collation in a wide variety of languages.  An instance of
52
 * this class is normally returned by the <code>getInstance</code> method
53
 * of <code>Collator</code> with rules predefined for the requested
54
 * locale.  However, an instance of this class can be created manually
55
 * with any desired rules.
56
 * <p>
57
 * Rules take the form of a <code>String</code> with the following syntax
58
 * <ul>
59
 * <li> Modifier: '@'</li>
60
 * <li> Relation: '&lt;' | ';' | ',' | '=' : &lt;text&gt;</li>
61
 * <li> Reset: '&amp;' : &lt;text&gt;</li>
62
 * </ul>
63
 * The modifier character indicates that accents sort backward as is the
64
 * case with French.  The modifier applies to all rules <b>after</b>
65
 * the modifier but before the next primary sequence. If placed at the end
66
 * of the sequence if applies to all unknown accented character.
67
 * The relational operators specify how the text
68
 * argument relates to the previous term.  The relation characters have
69
 * the following meanings:
70
 * <ul>
71
 * <li>'&lt;' - The text argument is greater than the prior term at the primary
72
 * difference level.</li>
73
 * <li>';' - The text argument is greater than the prior term at the secondary
74
 * difference level.</li>
75
 * <li>',' - The text argument is greater than the prior term at the tertiary
76
 * difference level.</li>
77
 * <li>'=' - The text argument is equal to the prior term</li>
78
 * </ul>
79
 * <p>
80
 * As for the text argument itself, this is any sequence of Unicode
81
 * characters not in the following ranges: 0x0009-0x000D, 0x0020-0x002F,
82
 * 0x003A-0x0040, 0x005B-0x0060, and 0x007B-0x007E. If these characters are
83
 * desired, they must be enclosed in single quotes.  If any whitespace is
84
 * encountered, it is ignored.  (For example, "a b" is equal to "ab").
85
 * <p>
86
 * The reset operation inserts the following rule at the point where the
87
 * text argument to it exists in the previously declared rule string.  This
88
 * makes it easy to add new rules to an existing string by simply including
89
 * them in a reset sequence at the end.  Note that the text argument, or
90
 * at least the first character of it, must be present somewhere in the
91
 * previously declared rules in order to be inserted properly.  If this
92
 * is not satisfied, a <code>ParseException</code> will be thrown.
93
 * <p>
94
 * This system of configuring <code>RuleBasedCollator</code> is needlessly
95
 * complex and the people at Taligent who developed it (along with the folks
96
 * at Sun who accepted it into the Java standard library) deserve a slow
97
 * and agonizing death.
98
 * <p>
99
 * Here are a couple of example of rule strings:
100
 * <p>
101
 * "&lt; a &lt; b &lt; c" - This string says that a is greater than b which is
102
 * greater than c, with all differences being primary differences.
103
 * <p>
104
 * "&lt; a,A &lt; b,B &lt; c,C" - This string says that 'A' is greater than 'a' with
105
 * a tertiary strength comparison.  Both 'b' and 'B' are greater than 'a' and
106
 * 'A' during a primary strength comparison.  But 'B' is greater than 'b'
107
 * under a tertiary strength comparison.
108
 * <p>
109
 * "&lt; a &lt; c &amp; a &lt; b " - This sequence is identical in function to the
110
 * "&lt; a &lt; b &lt; c" rule string above.  The '&amp;' reset symbol indicates that
111
 * the rule "&lt; b" is to be inserted after the text argument "a" in the
112
 * previous rule string segment.
113
 * <p>
114
 * "&lt; a &lt; b &amp; y &lt; z" - This is an error.  The character 'y' does not appear
115
 * anywhere in the previous rule string segment so the rule following the
116
 * reset rule cannot be inserted.
117
 * <p>
118
 * "&lt; a &amp; A @ &lt; e &amp; E &lt; f&amp; F" - This sequence is equivalent to the following
119
 * "&lt; a &amp; A &lt; E &amp; e &lt; f &amp; F".
120
 * <p>
121
 * For a description of the various comparison strength types, see the
122
 * documentation for the <code>Collator</code> class.
123
 * <p>
124
 * As an additional complication to this already overly complex rule scheme,
125
 * if any characters precede the first rule, these characters are considered
126
 * ignorable.  They will be treated as if they did not exist during
127
 * comparisons.  For example, "- &lt; a &lt; b ..." would make '-' an ignorable
128
 * character such that the strings "high-tech" and "hightech" would
129
 * be considered identical.
130
 * <p>
131
 * A <code>ParseException</code> will be thrown for any of the following
132
 * conditions:
133
 * <ul>
134
 * <li>Unquoted punctuation characters in a text argument.</li>
135
 * <li>A relational or reset operator not followed by a text argument</li>
136
 * <li>A reset operator where the text argument is not present in
137
 * the previous rule string section.</li>
138
 * </ul>
139
 *
140
 * @author Aaron M. Renn (arenn@urbanophile.com)
141
 * @author Tom Tromey (tromey@cygnus.com)
142
 * @author Guilhem Lavaux (guilhem@kaffe.org)
143
 */
144
public class RuleBasedCollator extends Collator
145
{
146
  /**
147
   * This class describes what rank has a character (or a sequence of characters)
148
   * in the lexicographic order. Each element in a rule has a collation element.
149
   */
150
  static final class CollationElement
151
  {
152
    String key;
153
    int primary;
154
    short secondary;
155
    short tertiary;
156
    short equality;
157
    boolean ignore;
158
    String expansion;
159
 
160
    CollationElement(String key, int primary, short secondary, short tertiary,
161
                     short equality, String expansion, boolean ignore)
162
    {
163
      this.key = key;
164
      this.primary = primary;
165
      this.secondary = secondary;
166
      this.tertiary = tertiary;
167
      this.equality = equality;
168
      this.ignore = ignore;
169
      this.expansion = expansion;
170
    }
171
 
172
    int getValue()
173
    {
174
      return (primary << 16) + (secondary << 8) + tertiary;
175
    }
176
  }
177
 
178
  /**
179
   * Basic collation instruction (internal format) to build the series of
180
   * collation elements. It contains an instruction which specifies the new
181
   * state of the generator. The sequence of instruction should not contain
182
   * RESET (it is used by
183
   * {@link #mergeRules(int,java.lang.String,java.util.ArrayList,java.util.ArrayList)})
184
   * as a temporary state while merging two sets of instructions.
185
   */
186
  static final class CollationSorter
187
  {
188
    static final int GREATERP = 0;
189
    static final int GREATERS = 1;
190
    static final int GREATERT = 2;
191
    static final int EQUAL = 3;
192
    static final int RESET = 4;
193
    static final int INVERSE_SECONDARY = 5;
194
 
195
    int comparisonType;
196
    String textElement;
197
    int hashText;
198
    int offset;
199
    boolean ignore;
200
 
201
    String expansionOrdering;
202
  }
203
 
204
  /**
205
   * This the the original rule string.
206
   */
207
  private String rules;
208
 
209
  /**
210
   * This is the table of collation element values
211
   */
212
  private Object[] ce_table;
213
 
214
  /**
215
   * Quick-prefix finder.
216
   */
217
  HashMap prefix_tree;
218
 
219
  /**
220
   * This is the value of the last sequence entered into
221
   * <code>ce_table</code>. It is used to compute the
222
   * ordering value of unspecified character.
223
   */
224
  private int last_primary_value;
225
 
226
  /**
227
   * This is the value of the last secondary sequence of the
228
   * primary 0, entered into
229
   * <code>ce_table</code>. It is used to compute the
230
   * ordering value of an unspecified accented character.
231
   */
232
  private int last_tertiary_value;
233
 
234
  /**
235
   * This variable is true if accents need to be sorted
236
   * in the other direction.
237
   */
238
  private boolean inverseAccentComparison;
239
 
240
  /**
241
   * This collation element is special to unknown sequence.
242
   * The JDK uses it to mark and sort the characters which has
243
   * no collation rules.
244
   */
245
  static final CollationElement SPECIAL_UNKNOWN_SEQ =
246
    new CollationElement("", (short) 32767, (short) 0, (short) 0,
247
                         (short) 0, null, false);
248
 
249
  /**
250
   * This method initializes a new instance of <code>RuleBasedCollator</code>
251
   * with the specified collation rules.  Note that an application normally
252
   * obtains an instance of <code>RuleBasedCollator</code> by calling the
253
   * <code>getInstance</code> method of <code>Collator</code>.  That method
254
   * automatically loads the proper set of rules for the desired locale.
255
   *
256
   * @param rules The collation rule string.
257
   *
258
   * @exception ParseException If the rule string contains syntax errors.
259
   */
260
  public RuleBasedCollator(String rules) throws ParseException
261
  {
262
    if (rules.equals(""))
263
      throw new ParseException("empty rule set", 0);
264
 
265
    this.rules = rules;
266
 
267
    buildCollationVector(parseString(rules));
268
    buildPrefixAccess();
269
  }
270
 
271
  /**
272
   * This method returns the number of common characters at the beginning
273
   * of the string of the two parameters.
274
   *
275
   * @param prefix A string considered as a prefix to test against
276
   * the other string.
277
   * @param s A string to test the prefix against.
278
   * @return The number of common characters.
279
   */
280
  static int findPrefixLength(String prefix, String s)
281
  {
282
    int index;
283
    int len = prefix.length();
284
 
285
    for (index = 0; index < len && index < s.length(); ++index)
286
      {
287
        if (prefix.charAt(index) != s.charAt(index))
288
          return index;
289
      }
290
 
291
 
292
    return index;
293
  }
294
 
295
  /**
296
   * Here we are merging two sets of sorting instructions: 'patch' into 'main'. This methods
297
   * checks whether it is possible to find an anchor point for the rules to be merged and
298
   * then insert them at that precise point.
299
   *
300
   * @param offset Offset in the string containing rules of the beginning of the rules
301
   * being merged in.
302
   * @param starter Text of the rules being merged.
303
   * @param main Repository of all already parsed rules.
304
   * @param patch Rules to be merged into the repository.
305
   * @throws ParseException if it is impossible to find an anchor point for the new rules.
306
   */
307
  private void mergeRules(int offset, String starter, ArrayList main, ArrayList patch)
308
    throws ParseException
309
  {
310
    int insertion_point = -1;
311
    int max_length = 0;
312
 
313
    /* We must check that no rules conflict with another already present. If it
314
     * is the case delete the old rule.
315
     */
316
 
317
    /* For the moment good old O(N^2) algorithm.
318
     */
319
    for (int i = 0; i < patch.size(); i++)
320
      {
321
        int j = 0;
322
 
323
        while (j < main.size())
324
          {
325
            CollationSorter rule1 = (CollationSorter) patch.get(i);
326
            CollationSorter rule2 = (CollationSorter) main.get(j);
327
 
328
            if (rule1.textElement.equals(rule2.textElement))
329
              main.remove(j);
330
            else
331
              j++;
332
          }
333
      }
334
 
335
    // Find the insertion point... O(N)
336
    for (int i = 0; i < main.size(); i++)
337
      {
338
        CollationSorter sorter = (CollationSorter) main.get(i);
339
        int length = findPrefixLength(starter, sorter.textElement);
340
 
341
        if (length > max_length)
342
          {
343
            max_length = length;
344
            insertion_point = i+1;
345
          }
346
      }
347
 
348
    if (insertion_point < 0)
349
      throw new ParseException("no insertion point found for " + starter, offset);
350
 
351
    if (max_length < starter.length())
352
      {
353
        /*
354
         * We need to expand the first entry. It must be sorted
355
         * like if it was the reference key itself (like the spec
356
         * said. So the first entry is special: the element is
357
         * replaced by the specified text element for the sorting.
358
         * This text replace the old one for comparisons. However
359
         * to preserve the behaviour we replace the first key (corresponding
360
         * to the found prefix) by a new code rightly ordered in the
361
         * sequence. The rest of the subsequence must be appended
362
         * to the end of the sequence.
363
         */
364
        CollationSorter sorter = (CollationSorter) patch.get(0);
365
        CollationSorter expansionPrefix =
366
          (CollationSorter) main.get(insertion_point-1);
367
 
368
        sorter.expansionOrdering = starter.substring(max_length); // Skip the first good prefix element
369
 
370
        main.add(insertion_point, sorter);
371
 
372
        /*
373
         * This is a new set of rules. Append to the list.
374
         */
375
        patch.remove(0);
376
        insertion_point++;
377
      }
378
 
379
    // Now insert all elements of patch at the insertion point.
380
    for (int i = 0; i < patch.size(); i++)
381
      main.add(i+insertion_point, patch.get(i));
382
  }
383
 
384
  /**
385
   * This method parses a string and build a set of sorting instructions. The parsing
386
   * may only be partial on the case the rules are to be merged sometime later.
387
   *
388
   * @param stop_on_reset If this parameter is true then the parser stops when it
389
   * encounters a reset instruction. In the other case, it tries to parse the subrules
390
   * and merged it in the same repository.
391
   * @param v Output vector for the set of instructions.
392
   * @param base_offset Offset in the string to begin parsing.
393
   * @param rules Rules to be parsed.
394
   * @return -1 if the parser reached the end of the string, an integer representing the
395
   * offset in the string at which it stopped parsing.
396
   * @throws ParseException if something turned wrong during the parsing. To get details
397
   * decode the message.
398
   */
399
  private int subParseString(boolean stop_on_reset, ArrayList v,
400
                             int base_offset, String rules)
401
    throws ParseException
402
  {
403
    boolean ignoreChars = (base_offset == 0);
404
    int operator = -1;
405
    StringBuffer sb = new StringBuffer();
406
    boolean doubleQuote = false;
407
    boolean eatingChars = false;
408
    boolean nextIsModifier = false;
409
    boolean isModifier = false;
410
    int i;
411
 
412
main_parse_loop:
413
    for (i = 0; i < rules.length(); i++)
414
      {
415
        char c = rules.charAt(i);
416
        int type = -1;
417
 
418
        if (!eatingChars &&
419
            ((c >= 0x09 && c <= 0x0D) || (c == 0x20)))
420
              continue;
421
 
422
        isModifier = nextIsModifier;
423
        nextIsModifier = false;
424
 
425
        if (eatingChars && c != '\'')
426
          {
427
            doubleQuote = false;
428
            sb.append(c);
429
            continue;
430
          }
431
        if (doubleQuote && eatingChars)
432
          {
433
            sb.append(c);
434
            doubleQuote = false;
435
            continue;
436
          }
437
 
438
        switch (c)
439
          {
440
          case '!':
441
            throw new ParseException
442
              ("Modifier '!' is not yet supported by Classpath", i + base_offset);
443
          case '<':
444
            type = CollationSorter.GREATERP;
445
            break;
446
          case ';':
447
            type = CollationSorter.GREATERS;
448
            break;
449
          case ',':
450
            type = CollationSorter.GREATERT;
451
            break;
452
          case '=':
453
            type = CollationSorter.EQUAL;
454
            break;
455
          case '\'':
456
            eatingChars = !eatingChars;
457
            doubleQuote = true;
458
            break;
459
          case '@':
460
            if (ignoreChars)
461
              throw new ParseException
462
                ("comparison list has not yet been started. You may only use"
463
                 + "(<,;=&)", i + base_offset);
464
            // Inverse the order of secondaries from now on.
465
            nextIsModifier = true;
466
            type = CollationSorter.INVERSE_SECONDARY;
467
            break;
468
          case '&':
469
            type = CollationSorter.RESET;
470
            if (stop_on_reset)
471
              break main_parse_loop;
472
            break;
473
          default:
474
            if (operator < 0)
475
              throw new ParseException
476
                ("operator missing at " + (i + base_offset), i + base_offset);
477
            if (! eatingChars
478
                && ((c >= 0x21 && c <= 0x2F)
479
                    || (c >= 0x3A && c <= 0x40)
480
                    || (c >= 0x5B && c <= 0x60)
481
                    || (c >= 0x7B && c <= 0x7E)))
482
              throw new ParseException
483
                ("unquoted punctuation character '" + c + "'", i + base_offset);
484
 
485
            //type = ignoreChars ? CollationSorter.IGNORE : -1;
486
            sb.append(c);
487
            break;
488
          }
489
 
490
        if (type  < 0)
491
          continue;
492
 
493
        if (operator < 0)
494
          {
495
            operator = type;
496
            continue;
497
          }
498
 
499
        if (sb.length() == 0 && !isModifier)
500
          throw new ParseException
501
            ("text element empty at " + (i+base_offset), i+base_offset);
502
 
503
        if (operator == CollationSorter.RESET)
504
          {
505
            /* Reposition in the sorting list at the position
506
             * indicated by the text element.
507
             */
508
            String subrules = rules.substring(i);
509
            ArrayList sorted_rules = new ArrayList();
510
            int idx;
511
 
512
            // Parse the subrules but do not iterate through all
513
            // sublist. This is the privilege of the first call.
514
            idx = subParseString(true, sorted_rules, base_offset+i, subrules);
515
 
516
            // Merge new parsed rules into the list.
517
            mergeRules(base_offset+i, sb.toString(), v, sorted_rules);
518
            sb.setLength(0);
519
 
520
            // Reset state to none.
521
            operator = -1;
522
            type = -1;
523
            // We have found a new subrule at 'idx' but it has not been parsed.
524
            if (idx >= 0)
525
              {
526
                i += idx-1;
527
                continue main_parse_loop;
528
              }
529
            else
530
                // No more rules.
531
                break main_parse_loop;
532
          }
533
 
534
        CollationSorter sorter = new CollationSorter();
535
 
536
        if (operator == CollationSorter.GREATERP)
537
          ignoreChars = false;
538
 
539
        sorter.comparisonType = operator;
540
        sorter.textElement = sb.toString();
541
        sorter.hashText = sorter.textElement.hashCode();
542
        sorter.offset = base_offset+rules.length();
543
        sorter.ignore = ignoreChars;
544
        sb.setLength(0);
545
 
546
        v.add(sorter);
547
        operator = type;
548
      }
549
 
550
    if (operator >= 0)
551
      {
552
        CollationSorter sorter = new CollationSorter();
553
        int pos = rules.length() + base_offset;
554
 
555
        if ((sb.length() != 0 && nextIsModifier)
556
            || (sb.length() == 0 && !nextIsModifier && !eatingChars))
557
          throw new ParseException("text element empty at " + pos, pos);
558
 
559
        if (operator == CollationSorter.GREATERP)
560
          ignoreChars = false;
561
 
562
        sorter.comparisonType = operator;
563
        sorter.textElement = sb.toString();
564
        sorter.hashText = sorter.textElement.hashCode();
565
        sorter.offset = base_offset+pos;
566
        sorter.ignore = ignoreChars;
567
        v.add(sorter);
568
      }
569
 
570
    if (i == rules.length())
571
      return -1;
572
    else
573
      return i;
574
  }
575
 
576
  /**
577
   * This method creates a copy of this object.
578
   *
579
   * @return A copy of this object.
580
   */
581
  public Object clone()
582
  {
583
    return super.clone();
584
  }
585
 
586
  /**
587
   * This method completely parses a string 'rules' containing sorting rules.
588
   *
589
   * @param rules String containing the rules to be parsed.
590
   * @return A set of sorting instructions stored in a Vector.
591
   * @throws ParseException if something turned wrong during the parsing. To get details
592
   * decode the message.
593
   */
594
  private ArrayList parseString(String rules)
595
    throws ParseException
596
  {
597
    ArrayList v = new ArrayList();
598
 
599
    // result of the first subParseString is not absolute (may be -1 or a
600
    // positive integer). But we do not care.
601
    subParseString(false, v, 0, rules);
602
 
603
    return v;
604
  }
605
 
606
  /**
607
   * This method uses the sorting instructions built by {@link #parseString}
608
   * to build collation elements which can be directly used to sort strings.
609
   *
610
   * @param parsedElements Parsed instructions stored in a ArrayList.
611
   * @throws ParseException if the order of the instructions are not valid.
612
   */
613
  private void buildCollationVector(ArrayList parsedElements)
614
    throws ParseException
615
  {
616
    int primary_seq = 0;
617
    int last_tertiary_seq = 0;
618
    short secondary_seq = 0;
619
    short tertiary_seq = 0;
620
    short equality_seq = 0;
621
    boolean inverseComparisons = false;
622
    final boolean DECREASING = false;
623
    final boolean INCREASING = true;
624
    boolean secondaryType = INCREASING;
625
    ArrayList v = new ArrayList();
626
 
627
    // elts is completely sorted.
628
element_loop:
629
    for (int i = 0; i < parsedElements.size(); i++)
630
      {
631
        CollationSorter elt = (CollationSorter) parsedElements.get(i);
632
        boolean ignoreChar = false;
633
 
634
        switch (elt.comparisonType)
635
          {
636
          case CollationSorter.GREATERP:
637
            primary_seq++;
638
            if (inverseComparisons)
639
              {
640
                secondary_seq = Short.MAX_VALUE;
641
                secondaryType = DECREASING;
642
              }
643
            else
644
              {
645
                secondary_seq = 0;
646
                secondaryType = INCREASING;
647
              }
648
            tertiary_seq = 0;
649
            equality_seq = 0;
650
            inverseComparisons = false;
651
            break;
652
          case CollationSorter.GREATERS:
653
            if (secondaryType == DECREASING)
654
              secondary_seq--;
655
            else
656
              secondary_seq++;
657
            tertiary_seq = 0;
658
            equality_seq = 0;
659
            break;
660
          case CollationSorter.INVERSE_SECONDARY:
661
            inverseComparisons = true;
662
            continue element_loop;
663
          case CollationSorter.GREATERT:
664
            tertiary_seq++;
665
            if (primary_seq == 0)
666
              last_tertiary_seq = tertiary_seq;
667
            equality_seq = 0;
668
            break;
669
          case CollationSorter.EQUAL:
670
            equality_seq++;
671
            break;
672
          case CollationSorter.RESET:
673
            throw new ParseException
674
              ("Invalid reached state 'RESET'. Internal error", elt.offset);
675
          default:
676
            throw new ParseException
677
              ("Invalid unknown state '" + elt.comparisonType + "'", elt.offset);
678
          }
679
 
680
        v.add(new CollationElement(elt.textElement, primary_seq,
681
                                   secondary_seq, tertiary_seq,
682
                                   equality_seq, elt.expansionOrdering, elt.ignore));
683
      }
684
 
685
    this.inverseAccentComparison = inverseComparisons;
686
 
687
    ce_table = v.toArray();
688
 
689
    last_primary_value = primary_seq+1;
690
    last_tertiary_value = last_tertiary_seq+1;
691
  }
692
 
693
  /**
694
   * Build a tree where all keys are the texts of collation elements and data is
695
   * the collation element itself. The tree is used when extracting all prefix
696
   * for a given text.
697
   */
698
  private void buildPrefixAccess()
699
  {
700
    prefix_tree = new HashMap();
701
 
702
    for (int i = 0; i < ce_table.length; i++)
703
      {
704
        CollationElement e = (CollationElement) ce_table[i];
705
 
706
        prefix_tree.put(e.key, e);
707
      }
708
  }
709
 
710
  /**
711
   * This method returns an integer which indicates whether the first
712
   * specified <code>String</code> is less than, greater than, or equal to
713
   * the second.  The value depends not only on the collation rules in
714
   * effect, but also the strength and decomposition settings of this object.
715
   *
716
   * @param source The first <code>String</code> to compare.
717
   * @param target A second <code>String</code> to compare to the first.
718
   *
719
   * @return A negative integer if source &lt; target, a positive integer
720
   * if source &gt; target, or 0 if source == target.
721
   */
722
  public int compare(String source, String target)
723
  {
724
    CollationElementIterator cs, ct;
725
    CollationElement ord1block = null;
726
    CollationElement ord2block = null;
727
    boolean advance_block_1 = true;
728
    boolean advance_block_2 = true;
729
 
730
    cs = getCollationElementIterator(source);
731
    ct = getCollationElementIterator(target);
732
 
733
    for(;;)
734
      {
735
        int ord1;
736
        int ord2;
737
 
738
        /*
739
         * We have to check whether the characters are ignorable.
740
         * If it is the case then forget them.
741
         */
742
        if (advance_block_1)
743
          {
744
            ord1block = cs.nextBlock();
745
            if (ord1block != null && ord1block.ignore)
746
              continue;
747
          }
748
 
749
        if (advance_block_2)
750
          {
751
            ord2block = ct.nextBlock();
752
            if (ord2block != null && ord2block.ignore)
753
              {
754
                advance_block_1 = false;
755
                continue;
756
              }
757
         }
758
        else
759
          advance_block_2 = true;
760
 
761
        if (!advance_block_1)
762
          advance_block_1 = true;
763
 
764
        if (ord1block != null)
765
          ord1 = ord1block.getValue();
766
        else
767
          {
768
            if (ord2block == null)
769
              return 0;
770
            return -1;
771
          }
772
 
773
        if (ord2block == null)
774
          return 1;
775
 
776
        ord2 = ord2block.getValue();
777
 
778
        // We know chars are totally equal, so skip
779
        if (ord1 == ord2)
780
          {
781
            if (getStrength() == IDENTICAL)
782
              if (!ord1block.key.equals(ord2block.key))
783
                return ord1block.key.compareTo(ord2block.key);
784
            continue;
785
          }
786
 
787
        // Check for primary strength differences
788
        int prim1 = CollationElementIterator.primaryOrder(ord1);
789
        int prim2 = CollationElementIterator.primaryOrder(ord2);
790
 
791
        if (prim1 == 0 && getStrength() < TERTIARY)
792
          {
793
            advance_block_2 = false;
794
            continue;
795
          }
796
        else if (prim2 == 0 && getStrength() < TERTIARY)
797
          {
798
            advance_block_1 = false;
799
            continue;
800
          }
801
 
802
        if (prim1 < prim2)
803
          return -1;
804
        else if (prim1 > prim2)
805
          return 1;
806
        else if (getStrength() == PRIMARY)
807
          continue;
808
 
809
        // Check for secondary strength differences
810
        int sec1 = CollationElementIterator.secondaryOrder(ord1);
811
        int sec2 = CollationElementIterator.secondaryOrder(ord2);
812
 
813
        if (sec1 < sec2)
814
          return -1;
815
        else if (sec1 > sec2)
816
          return 1;
817
        else if (getStrength() == SECONDARY)
818
          continue;
819
 
820
        // Check for tertiary differences
821
        int tert1 = CollationElementIterator.tertiaryOrder(ord1);
822
        int tert2 = CollationElementIterator.tertiaryOrder(ord2);
823
 
824
        if (tert1 < tert2)
825
          return -1;
826
        else if (tert1 > tert2)
827
          return 1;
828
        else if (getStrength() == TERTIARY)
829
          continue;
830
 
831
        // Apparently JDK does this (at least for my test case).
832
        return ord1block.key.compareTo(ord2block.key);
833
      }
834
  }
835
 
836
  /**
837
   * This method tests this object for equality against the specified
838
   * object.  This will be true if and only if the specified object is
839
   * another reference to this object.
840
   *
841
   * @param obj The <code>Object</code> to compare against this object.
842
   *
843
   * @return <code>true</code> if the specified object is equal to this object,
844
   * <code>false</code> otherwise.
845
   */
846
  public boolean equals(Object obj)
847
  {
848
    if (obj == this)
849
      return true;
850
    else
851
      return false;
852
  }
853
 
854
  /**
855
   * This method builds a default collation element without invoking
856
   * the database created from the rules passed to the constructor.
857
   *
858
   * @param c Character which needs a collation element.
859
   * @return A valid brand new CollationElement instance.
860
   */
861
  CollationElement getDefaultElement(char c)
862
  {
863
    int v;
864
 
865
    // Preliminary support for generic accent sorting inversion (I don't know if all
866
    // characters in the range should be sorted backward). This is the place
867
    // to fix this if needed.
868
    if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361))
869
      v = 0x0361 - ((int) c - 0x02B9);
870
    else
871
      v = (short) c;
872
    return new CollationElement("" + c, last_primary_value + v,
873
                                (short) 0, (short) 0, (short) 0, null, false);
874
  }
875
 
876
  /**
877
   * This method builds a default collation element for an accented character
878
   * without invoking the database created from the rules passed to the constructor.
879
   *
880
   * @param c Character which needs a collation element.
881
   * @return A valid brand new CollationElement instance.
882
   */
883
  CollationElement getDefaultAccentedElement(char c)
884
  {
885
    int v;
886
 
887
    // Preliminary support for generic accent sorting inversion (I don't know if all
888
    // characters in the range should be sorted backward). This is the place
889
    // to fix this if needed.
890
    if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361))
891
      v = 0x0361 - ((int) c - 0x02B9);
892
    else
893
      v = (short) c;
894
    return new CollationElement("" + c, (short) 0,
895
                                (short) 0, (short) (last_tertiary_value + v), (short) 0, null, false);
896
  }
897
 
898
  /**
899
   * This method returns an instance for <code>CollationElementIterator</code>
900
   * for the specified <code>String</code> under the collation rules for this
901
   * object.
902
   *
903
   * @param source The <code>String</code> to return the
904
   * <code>CollationElementIterator</code> instance for.
905
   *
906
   * @return A <code>CollationElementIterator</code> for the specified
907
   * <code>String</code>.
908
   */
909
  public CollationElementIterator getCollationElementIterator(String source)
910
  {
911
    return new CollationElementIterator(this, source);
912
  }
913
 
914
  /**
915
   * This method returns an instance of <code>CollationElementIterator</code>
916
   * for the <code>String</code> represented by the specified
917
   * <code>CharacterIterator</code>.
918
   *
919
   * @param source The <code>CharacterIterator</code> with the desired <code>String</code>.
920
   *
921
   * @return A <code>CollationElementIterator</code> for the specified <code>String</code>.
922
   */
923
  public CollationElementIterator getCollationElementIterator(CharacterIterator source)
924
  {
925
    StringBuffer expand = new StringBuffer("");
926
 
927
    // Right now we assume that we will read from the beginning of the string.
928
    for (char c = source.first();
929
         c != CharacterIterator.DONE;
930
         c = source.next())
931
      decomposeCharacter(c, expand);
932
 
933
    return getCollationElementIterator(expand.toString());
934
  }
935
 
936
  /**
937
   * This method returns an instance of <code>CollationKey</code> for the
938
   * specified <code>String</code>.  The object returned will have a
939
   * more efficient mechanism for its comparison function that could
940
   * provide speed benefits if multiple comparisons are performed, such
941
   * as during a sort.
942
   *
943
   * @param source The <code>String</code> to create a <code>CollationKey</code> for.
944
   *
945
   * @return A <code>CollationKey</code> for the specified <code>String</code>.
946
   */
947
  public CollationKey getCollationKey(String source)
948
  {
949
    CollationElementIterator cei = getCollationElementIterator(source);
950
    ArrayList vect = new ArrayList();
951
 
952
    int ord = cei.next();
953
    cei.reset(); //set to start of string
954
 
955
    while (ord != CollationElementIterator.NULLORDER)
956
      {
957
        // If the primary order is null, it means this is an ignorable
958
        // character.
959
        if (CollationElementIterator.primaryOrder(ord) == 0)
960
          {
961
            ord = cei.next();
962
            continue;
963
          }
964
        switch (getStrength())
965
          {
966
            case PRIMARY:
967
              ord = CollationElementIterator.primaryOrder(ord);
968
              break;
969
 
970
            case SECONDARY:
971
              ord = CollationElementIterator.primaryOrder(ord) << 8;
972
              ord |= CollationElementIterator.secondaryOrder(ord);
973
 
974
            default:
975
               break;
976
          }
977
 
978
        vect.add(new Integer(ord));
979
        ord = cei.next(); //increment to next key
980
      }
981
 
982
    Object[] objarr = vect.toArray();
983
    byte[] key = new byte[objarr.length * 4];
984
 
985
    for (int i = 0; i < objarr.length; i++)
986
      {
987
        int j = ((Integer) objarr[i]).intValue();
988
        key [i * 4] = (byte) ((j & 0xFF000000) >> 24);
989
        key [i * 4 + 1] = (byte) ((j & 0x00FF0000) >> 16);
990
        key [i * 4 + 2] = (byte) ((j & 0x0000FF00) >> 8);
991
        key [i * 4 + 3] = (byte) (j & 0x000000FF);
992
      }
993
 
994
    return new CollationKey(this, source, key);
995
  }
996
 
997
  /**
998
   * This method returns a <code>String</code> containing the collation rules
999
   * for this object.
1000
   *
1001
   * @return The collation rules for this object.
1002
   */
1003
  public String getRules()
1004
  {
1005
    return rules;
1006
  }
1007
 
1008
  /**
1009
   * This method returns a hash value for this object.
1010
   *
1011
   * @return A hash value for this object.
1012
   */
1013
  public int hashCode()
1014
  {
1015
    return System.identityHashCode(this);
1016
  }
1017
}

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.