OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libjava/] [classpath/] [java/] [text/] [CollationElementIterator.java] - Blame information for rev 771

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 771 jeremybenn
/* CollationElementIterator.java -- Walks through collation elements
2
   Copyright (C) 1998, 1999, 2001, 2002, 2003, 2004  Free Software Foundation
3
 
4
This file is part of GNU Classpath.
5
 
6
GNU Classpath is free software; you can redistribute it and/or modify
7
it under the terms of the GNU General Public License as published by
8
the Free Software Foundation; either version 2, or (at your option)
9
any later version.
10
 
11
GNU Classpath is distributed in the hope that it will be useful, but
12
WITHOUT ANY WARRANTY; without even the implied warranty of
13
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
General Public License for more details.
15
 
16
You should have received a copy of the GNU General Public License
17
along with GNU Classpath; see the file COPYING.  If not, write to the
18
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
02110-1301 USA.
20
 
21
Linking this library statically or dynamically with other modules is
22
making a combined work based on this library.  Thus, the terms and
23
conditions of the GNU General Public License cover the whole
24
combination.
25
 
26
As a special exception, the copyright holders of this library give you
27
permission to link this library with independent modules to produce an
28
executable, regardless of the license terms of these independent
29
modules, and to copy and distribute the resulting executable under
30
terms of your choice, provided that you also meet, for each linked
31
independent module, the terms and conditions of the license of that
32
module.  An independent module is a module which is not derived from
33
or based on this library.  If you modify this library, you may extend
34
this exception to your version of the library, but you are not
35
obligated to do so.  If you do not wish to do so, delete this
36
exception statement from your version. */
37
 
38
 
39
package java.text;
40
 
41
import gnu.java.lang.CPStringBuilder;
42
 
43
import java.util.ArrayList;
44
 
45
/* Written using "Java Class Libraries", 2nd edition, plus online
46
 * API docs for JDK 1.2 from http://www.javasoft.com.
47
 * Status: Believed complete and correct to JDK 1.1.
48
 */
49
 
50
/**
51
 * This class walks through the character collation elements of a
52
 * <code>String</code> as defined by the collation rules in an instance of
53
 * <code>RuleBasedCollator</code>.  There is no public constructor for
54
 * this class.  An instance is created by calling the
55
 * <code>getCollationElementIterator</code> method on
56
 * <code>RuleBasedCollator</code>.
57
 *
58
 * @author Aaron M. Renn (arenn@urbanophile.com)
59
 * @author Tom Tromey (tromey@cygnus.com)
60
 * @author Guilhem Lavaux (guilhem.lavaux@free.fr)
61
 */
62
public final class CollationElementIterator
63
{
64
  /**
65
   * This is a constant value that is returned to indicate that the end of
66
   * the string was encountered.
67
   */
68
  public static final int NULLORDER = -1;
69
 
70
  /**
71
   * This is the RuleBasedCollator this object was created from.
72
   */
73
  RuleBasedCollator collator;
74
 
75
  /**
76
   * This is the String that is being iterated over.
77
   */
78
  CharacterIterator text;
79
 
80
  /**
81
   * This is the index into the collation decomposition where we are currently scanning.
82
   */
83
  int index;
84
 
85
  /**
86
   * This is the index into the String where we are currently scanning.
87
   */
88
  int textIndex;
89
 
90
  /**
91
   * Array containing the collation decomposition of the
92
   * text given to the constructor.
93
   */
94
  private RuleBasedCollator.CollationElement[] text_decomposition;
95
 
96
  /**
97
   * Array containing the index of the specified block.
98
   */
99
  private int[] text_indexes;
100
 
101
  /**
102
   * This method initializes a new instance of <code>CollationElementIterator</code>
103
   * to iterate over the specified <code>String</code> using the rules in the
104
   * specified <code>RuleBasedCollator</code>.
105
   *
106
   * @param collator The <code>RuleBasedCollation</code> used for calculating collation values
107
   * @param text The <code>String</code> to iterate over.
108
   */
109
  CollationElementIterator(RuleBasedCollator collator, String text)
110
  {
111
    this.collator = collator;
112
 
113
    setText (text);
114
  }
115
 
116
  /**
117
   * This method initializes a new instance of <code>CollationElementIterator</code>
118
   * to iterate over the specified <code>String</code> using the rules in the
119
   * specified <code>RuleBasedCollator</code>.
120
   *
121
   * @param collator The <code>RuleBasedCollation</code> used for calculating collation values
122
   * @param text The character iterator to iterate over.
123
   */
124
  CollationElementIterator(RuleBasedCollator collator, CharacterIterator text)
125
  {
126
    this.collator = collator;
127
 
128
    setText (text);
129
  }
130
 
131
  RuleBasedCollator.CollationElement nextBlock()
132
  {
133
    if (index >= text_decomposition.length)
134
      return null;
135
 
136
    RuleBasedCollator.CollationElement e = text_decomposition[index];
137
 
138
    textIndex = text_indexes[index+1];
139
 
140
    index++;
141
 
142
    return e;
143
  }
144
 
145
  RuleBasedCollator.CollationElement previousBlock()
146
  {
147
    if (index == 0)
148
      return null;
149
 
150
    index--;
151
    RuleBasedCollator.CollationElement e = text_decomposition[index];
152
 
153
    textIndex = text_indexes[index+1];
154
 
155
    return e;
156
  }
157
 
158
  /**
159
   * This method returns the collation ordering value of the next character sequence
160
   * in the string (it may be an extended character following collation rules).
161
   * This method will return <code>NULLORDER</code> if the
162
   * end of the string was reached.
163
   *
164
   * @return The collation ordering value.
165
   */
166
  public int next()
167
  {
168
    RuleBasedCollator.CollationElement e = nextBlock();
169
 
170
    if (e == null)
171
      return NULLORDER;
172
 
173
    return e.getValue();
174
  }
175
 
176
  /**
177
   * This method returns the collation ordering value of the previous character
178
   * in the string.  This method will return <code>NULLORDER</code> if the
179
   * beginning of the string was reached.
180
   *
181
   * @return The collation ordering value.
182
   */
183
  public int previous()
184
  {
185
    RuleBasedCollator.CollationElement e = previousBlock();
186
 
187
    if (e == null)
188
      return NULLORDER;
189
 
190
    return e.getValue();
191
  }
192
 
193
  /**
194
   * This method returns the primary order value for the given collation
195
   * value.
196
   *
197
   * @param order The collation value returned from <code>next()</code> or
198
   *              <code>previous()</code>.
199
   *
200
   * @return The primary order value of the specified collation value.  This is
201
   *         the high 16 bits.
202
   */
203
  public static int primaryOrder(int order)
204
  {
205
    // From the JDK 1.2 spec.
206
    return order >>> 16;
207
  }
208
 
209
  /**
210
   * This method resets the internal position pointer to read from the
211
   * beginning of the <code>String</code> again.
212
   */
213
  public void reset()
214
  {
215
    index = 0;
216
    textIndex = 0;
217
  }
218
 
219
  /**
220
   * This method returns the secondary order value for the given collation
221
   * value.
222
   *
223
   * @param order The collation value returned from <code>next()</code> or
224
   *              <code>previous()</code>.
225
   *
226
   * @return The secondary order value of the specified collation value.  This
227
   *         is the bits 8-15.
228
   */
229
  public static short secondaryOrder(int order)
230
  {
231
    // From the JDK 1.2 spec.
232
    return (short) ((order >>> 8) & 255);
233
  }
234
 
235
  /**
236
   * This method returns the tertiary order value for the given collation
237
   * value.
238
   *
239
   * @param order The collation value returned from <code>next()</code> or
240
   *              <code>previous()</code>.
241
   *
242
   * @return The tertiary order value of the specified collation value.  This
243
   *         is the low eight bits.
244
   */
245
  public static short tertiaryOrder(int order)
246
  {
247
    // From the JDK 1.2 spec.
248
    return (short) (order & 255);
249
  }
250
 
251
  /**
252
   * This method sets the <code>String</code> that it is iterating over
253
   * to the specified <code>String</code>.
254
   *
255
   * @param text The new <code>String</code> to iterate over.
256
   *
257
   * @since 1.2
258
   */
259
  public void setText(String text)
260
  {
261
    int idx = 0;
262
    int idx_idx = 0;
263
    int alreadyExpanded = 0;
264
    int idxToMove = 0;
265
 
266
    this.text = new StringCharacterIterator(text);
267
    this.index = 0;
268
 
269
    String work_text = text.intern();
270
 
271
    ArrayList a_element = new ArrayList();
272
    ArrayList a_idx = new ArrayList();
273
 
274
    // Build element collection ordered as they come in "text".
275
    while (idx < work_text.length())
276
      {
277
        String key, key_old;
278
 
279
        Object object = null;
280
        int p = 1;
281
 
282
        // IMPROVE: use a TreeMap with a prefix-ordering rule.
283
        key_old = key = null;
284
        do
285
          {
286
            if (object != null)
287
              key_old = key;
288
            key = work_text.substring (idx, idx+p);
289
            object = collator.prefix_tree.get (key);
290
            if (object != null && idx < alreadyExpanded)
291
              {
292
                RuleBasedCollator.CollationElement prefix = (RuleBasedCollator.CollationElement)object;
293
                if (prefix.expansion != null &&
294
                    prefix.expansion.startsWith(work_text.substring(0, idx)))
295
                {
296
                  object = null;
297
                  key = key_old;
298
                }
299
              }
300
            p++;
301
          }
302
        while (idx+p <= work_text.length());
303
 
304
        if (object == null)
305
          key = key_old;
306
 
307
        RuleBasedCollator.CollationElement prefix =
308
          (RuleBasedCollator.CollationElement) collator.prefix_tree.get (key);
309
 
310
        /*
311
         * First case: There is no such sequence in the database.
312
         * We will have to build one from the context.
313
         */
314
        if (prefix == null)
315
          {
316
            /*
317
             * We are dealing with sequences in an expansion. They
318
             * are treated as accented characters (tertiary order).
319
             */
320
            if (alreadyExpanded > 0)
321
              {
322
                RuleBasedCollator.CollationElement e =
323
                  collator.getDefaultAccentedElement (work_text.charAt (idx));
324
 
325
                a_element.add (e);
326
                a_idx.add (new Integer(idx_idx));
327
                idx++;
328
                alreadyExpanded--;
329
                if (alreadyExpanded == 0)
330
                  {
331
                    /* There is not any characters left in the expansion set.
332
                     * We can increase the pointer in the source string.
333
                     */
334
                    idx_idx += idxToMove;
335
                    idxToMove = 0;
336
                  }
337
                else
338
                  idx_idx++;
339
              }
340
            else
341
              {
342
                /* This is a normal character. */
343
                RuleBasedCollator.CollationElement e =
344
                  collator.getDefaultElement (work_text.charAt (idx));
345
                Integer i_ref = new Integer(idx_idx);
346
 
347
                /* Don't forget to mark it as a special sequence so the
348
                 * string can be ordered.
349
                 */
350
                a_element.add (RuleBasedCollator.SPECIAL_UNKNOWN_SEQ);
351
                a_idx.add (i_ref);
352
                a_element.add (e);
353
                a_idx.add (i_ref);
354
                idx_idx++;
355
                idx++;
356
              }
357
            continue;
358
          }
359
 
360
        /*
361
         * Second case: Here we have found a matching sequence.
362
         * Here we have an expansion string prepend it to the "work text" and
363
         * add the corresponding sorting element. We must also mark
364
         */
365
        if (prefix.expansion != null)
366
          {
367
            work_text = prefix.expansion
368
              + work_text.substring (idx+prefix.key.length());
369
            idx = 0;
370
            a_element.add (prefix);
371
            a_idx.add (new Integer(idx_idx));
372
            if (alreadyExpanded == 0)
373
              idxToMove = prefix.key.length();
374
            alreadyExpanded += prefix.expansion.length()-prefix.key.length();
375
          }
376
        else
377
          {
378
            /* Third case: the simplest. We have got the prefix and it
379
             * has not to be expanded.
380
             */
381
            a_element.add (prefix);
382
            a_idx.add (new Integer(idx_idx));
383
            idx += prefix.key.length();
384
            /* If the sequence is in an expansion, we must decrease the
385
             * counter.
386
             */
387
            if (alreadyExpanded > 0)
388
              {
389
                alreadyExpanded -= prefix.key.length();
390
                if (alreadyExpanded == 0)
391
                  {
392
                    idx_idx += idxToMove;
393
                    idxToMove = 0;
394
                  }
395
              }
396
            else
397
              idx_idx += prefix.key.length();
398
          }
399
      }
400
 
401
    text_decomposition = (RuleBasedCollator.CollationElement[])
402
           a_element.toArray(new RuleBasedCollator.CollationElement[a_element.size()]);
403
    text_indexes = new int[a_idx.size()+1];
404
    for (int i = 0; i < a_idx.size(); i++)
405
      {
406
        text_indexes[i] = ((Integer)a_idx.get(i)).intValue();
407
      }
408
    text_indexes[a_idx.size()] = text.length();
409
  }
410
 
411
  /**
412
   * This method sets the <code>String</code> that it is iterating over
413
   * to the <code>String</code> represented by the specified
414
   * <code>CharacterIterator</code>.
415
   *
416
   * @param source The <code>CharacterIterator</code> containing the new
417
   * <code>String</code> to iterate over.
418
   */
419
  public void setText(CharacterIterator source)
420
  {
421
    CPStringBuilder expand = new CPStringBuilder();
422
 
423
    // For now assume we read from the beginning of the string.
424
    for (char c = source.first();
425
         c != CharacterIterator.DONE;
426
         c = source.next())
427
      expand.append(c);
428
 
429
    setText(expand.toString());
430
  }
431
 
432
  /**
433
   * This method returns the current offset into the <code>String</code>
434
   * that is being iterated over.
435
   *
436
   * @return The iteration index position.
437
   *
438
   * @since 1.2
439
   */
440
  public int getOffset()
441
  {
442
    return textIndex;
443
  }
444
 
445
  /**
446
   * This method sets the iteration index position into the current
447
   * <code>String</code> to the specified value.  This value must not
448
   * be negative and must not be greater than the last index position
449
   * in the <code>String</code>.
450
   *
451
   * @param offset The new iteration index position.
452
   *
453
   * @exception IllegalArgumentException If the new offset is not valid.
454
   */
455
  public void setOffset(int offset)
456
  {
457
    if (offset < 0)
458
      throw new IllegalArgumentException("Negative offset: " + offset);
459
 
460
    if (offset > (text.getEndIndex() - 1))
461
      throw new IllegalArgumentException("Offset too large: " + offset);
462
 
463
    for (index = 0; index < text_decomposition.length; index++)
464
      {
465
        if (offset <= text_indexes[index])
466
          break;
467
      }
468
    /*
469
     * As text_indexes[0] == 0, we should not have to take care whether index is
470
     * greater than 0. It is always.
471
     */
472
    if (text_indexes[index] == offset)
473
      textIndex = offset;
474
    else
475
      textIndex = text_indexes[index-1];
476
  }
477
 
478
  /**
479
   * This method returns the maximum length of any expansion sequence that
480
   * ends with the specified collation order value.  (Whatever that means).
481
   *
482
   * @param value The collation order value
483
   *
484
   * @return The maximum length of an expansion sequence.
485
   */
486
  public int getMaxExpansion(int value)
487
  {
488
    return 1;
489
  }
490
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.