OpenCores
URL https://opencores.org/ocsvn/scarts/scarts/trunk

Subversion Repositories scarts

[/] [scarts/] [trunk/] [toolchain/] [scarts-gcc/] [gcc-4.1.1/] [libjava/] [classpath/] [java/] [lang/] [Character.java] - Blame information for rev 14

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 14 jlechner
/* java.lang.Character -- Wrapper class for char, and Unicode subsets
2
   Copyright (C) 1998, 1999, 2001, 2002, 2005 Free Software Foundation, Inc.
3
 
4
This file is part of GNU Classpath.
5
 
6
GNU Classpath is free software; you can redistribute it and/or modify
7
it under the terms of the GNU General Public License as published by
8
the Free Software Foundation; either version 2, or (at your option)
9
any later version.
10
 
11
GNU Classpath is distributed in the hope that it will be useful, but
12
WITHOUT ANY WARRANTY; without even the implied warranty of
13
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
General Public License for more details.
15
 
16
You should have received a copy of the GNU General Public License
17
along with GNU Classpath; see the file COPYING.  If not, write to the
18
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
02110-1301 USA.
20
 
21
Linking this library statically or dynamically with other modules is
22
making a combined work based on this library.  Thus, the terms and
23
conditions of the GNU General Public License cover the whole
24
combination.
25
 
26
As a special exception, the copyright holders of this library give you
27
permission to link this library with independent modules to produce an
28
executable, regardless of the license terms of these independent
29
modules, and to copy and distribute the resulting executable under
30
terms of your choice, provided that you also meet, for each linked
31
independent module, the terms and conditions of the license of that
32
module.  An independent module is a module which is not derived from
33
or based on this library.  If you modify this library, you may extend
34
this exception to your version of the library, but you are not
35
obligated to do so.  If you do not wish to do so, delete this
36
exception statement from your version. */
37
 
38
 
39
package java.lang;
40
 
41
import gnu.java.lang.CharData;
42
 
43
import java.io.Serializable;
44
 
45
/**
46
 * Wrapper class for the primitive char data type.  In addition, this class
47
 * allows one to retrieve property information and perform transformations
48
 * on the 57,707 defined characters in the Unicode Standard, Version 3.0.0.
49
 * java.lang.Character is designed to be very dynamic, and as such, it
50
 * retrieves information on the Unicode character set from a separate
51
 * database, gnu.java.lang.CharData, which can be easily upgraded.
52
 *
53
 * <p>For predicates, boundaries are used to describe
54
 * the set of characters for which the method will return true.
55
 * This syntax uses fairly normal regular expression notation.
56
 * See 5.13 of the Unicode Standard, Version 3.0, for the
57
 * boundary specification.
58
 *
59
 * <p>See <a href="http://www.unicode.org">http://www.unicode.org</a>
60
 * for more information on the Unicode Standard.
61
 *
62
 * @author Tom Tromey (tromey@cygnus.com)
63
 * @author Paul N. Fisher
64
 * @author Jochen Hoenicke
65
 * @author Eric Blake (ebb9@email.byu.edu)
66
 * @see CharData
67
 * @since 1.0
68
 * @status updated to 1.4
69
 */
70
public final class Character implements Serializable, Comparable
71
{
72
  /**
73
   * A subset of Unicode blocks.
74
   *
75
   * @author Paul N. Fisher
76
   * @author Eric Blake (ebb9@email.byu.edu)
77
   * @since 1.2
78
   */
79
  public static class Subset
80
  {
81
    /** The name of the subset. */
82
    private final String name;
83
 
84
    /**
85
     * Construct a new subset of characters.
86
     *
87
     * @param name the name of the subset
88
     * @throws NullPointerException if name is null
89
     */
90
    protected Subset(String name)
91
    {
92
      // Note that name.toString() is name, unless name was null.
93
      this.name = name.toString();
94
    }
95
 
96
    /**
97
     * Compares two Subsets for equality. This is <code>final</code>, and
98
     * restricts the comparison on the <code>==</code> operator, so it returns
99
     * true only for the same object.
100
     *
101
     * @param o the object to compare
102
     * @return true if o is this
103
     */
104
    public final boolean equals(Object o)
105
    {
106
      return o == this;
107
    }
108
 
109
    /**
110
     * Makes the original hashCode of Object final, to be consistent with
111
     * equals.
112
     *
113
     * @return the hash code for this object
114
     */
115
    public final int hashCode()
116
    {
117
      return super.hashCode();
118
    }
119
 
120
    /**
121
     * Returns the name of the subset.
122
     *
123
     * @return the name
124
     */
125
    public final String toString()
126
    {
127
      return name;
128
    }
129
  } // class Subset
130
 
131
  /**
132
   * A family of character subsets in the Unicode specification. A character
133
   * is in at most one of these blocks.
134
   *
135
   * This inner class was generated automatically from
136
   * <code>doc/unicode/Block-3.txt</code>, by some perl scripts.
137
   * This Unicode definition file can be found on the
138
   * <a href="http://www.unicode.org">http://www.unicode.org</a> website.
139
   * JDK 1.4 uses Unicode version 3.0.0.
140
   *
141
   * @author scripts/unicode-blocks.pl (written by Eric Blake)
142
   * @since 1.2
143
   */
144
  public static final class UnicodeBlock extends Subset
145
  {
146
    /** The start of the subset. */
147
    private final char start;
148
 
149
    /** The end of the subset. */
150
    private final char end;
151
 
152
    /**
153
     * Constructor for strictly defined blocks.
154
     *
155
     * @param start the start character of the range
156
     * @param end the end character of the range
157
     * @param name the block name
158
     */
159
    private UnicodeBlock(char start, char end, String name)
160
    {
161
      super(name);
162
      this.start = start;
163
      this.end = end;
164
    }
165
 
166
    /**
167
     * Returns the Unicode character block which a character belongs to.
168
     *
169
     * @param ch the character to look up
170
     * @return the set it belongs to, or null if it is not in one
171
     */
172
    public static UnicodeBlock of(char ch)
173
    {
174
      // Special case, since SPECIALS contains two ranges.
175
      if (ch == '\uFEFF')
176
        return SPECIALS;
177
      // Simple binary search for the correct block.
178
      int low = 0;
179
      int hi = sets.length - 1;
180
      while (low <= hi)
181
        {
182
          int mid = (low + hi) >> 1;
183
          UnicodeBlock b = sets[mid];
184
          if (ch < b.start)
185
            hi = mid - 1;
186
          else if (ch > b.end)
187
            low = mid + 1;
188
          else
189
            return b;
190
        }
191
      return null;
192
    }
193
 
194
    /**
195
     * Basic Latin.
196
     * '\u0000' - '\u007F'.
197
     */
198
    public static final UnicodeBlock BASIC_LATIN
199
      = new UnicodeBlock('\u0000', '\u007F',
200
                         "BASIC_LATIN");
201
 
202
    /**
203
     * Latin-1 Supplement.
204
     * '\u0080' - '\u00FF'.
205
     */
206
    public static final UnicodeBlock LATIN_1_SUPPLEMENT
207
      = new UnicodeBlock('\u0080', '\u00FF',
208
                         "LATIN_1_SUPPLEMENT");
209
 
210
    /**
211
     * Latin Extended-A.
212
     * '\u0100' - '\u017F'.
213
     */
214
    public static final UnicodeBlock LATIN_EXTENDED_A
215
      = new UnicodeBlock('\u0100', '\u017F',
216
                         "LATIN_EXTENDED_A");
217
 
218
    /**
219
     * Latin Extended-B.
220
     * '\u0180' - '\u024F'.
221
     */
222
    public static final UnicodeBlock LATIN_EXTENDED_B
223
      = new UnicodeBlock('\u0180', '\u024F',
224
                         "LATIN_EXTENDED_B");
225
 
226
    /**
227
     * IPA Extensions.
228
     * '\u0250' - '\u02AF'.
229
     */
230
    public static final UnicodeBlock IPA_EXTENSIONS
231
      = new UnicodeBlock('\u0250', '\u02AF',
232
                         "IPA_EXTENSIONS");
233
 
234
    /**
235
     * Spacing Modifier Letters.
236
     * '\u02B0' - '\u02FF'.
237
     */
238
    public static final UnicodeBlock SPACING_MODIFIER_LETTERS
239
      = new UnicodeBlock('\u02B0', '\u02FF',
240
                         "SPACING_MODIFIER_LETTERS");
241
 
242
    /**
243
     * Combining Diacritical Marks.
244
     * '\u0300' - '\u036F'.
245
     */
246
    public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS
247
      = new UnicodeBlock('\u0300', '\u036F',
248
                         "COMBINING_DIACRITICAL_MARKS");
249
 
250
    /**
251
     * Greek.
252
     * '\u0370' - '\u03FF'.
253
     */
254
    public static final UnicodeBlock GREEK
255
      = new UnicodeBlock('\u0370', '\u03FF',
256
                         "GREEK");
257
 
258
    /**
259
     * Cyrillic.
260
     * '\u0400' - '\u04FF'.
261
     */
262
    public static final UnicodeBlock CYRILLIC
263
      = new UnicodeBlock('\u0400', '\u04FF',
264
                         "CYRILLIC");
265
 
266
    /**
267
     * Armenian.
268
     * '\u0530' - '\u058F'.
269
     */
270
    public static final UnicodeBlock ARMENIAN
271
      = new UnicodeBlock('\u0530', '\u058F',
272
                         "ARMENIAN");
273
 
274
    /**
275
     * Hebrew.
276
     * '\u0590' - '\u05FF'.
277
     */
278
    public static final UnicodeBlock HEBREW
279
      = new UnicodeBlock('\u0590', '\u05FF',
280
                         "HEBREW");
281
 
282
    /**
283
     * Arabic.
284
     * '\u0600' - '\u06FF'.
285
     */
286
    public static final UnicodeBlock ARABIC
287
      = new UnicodeBlock('\u0600', '\u06FF',
288
                         "ARABIC");
289
 
290
    /**
291
     * Syriac.
292
     * '\u0700' - '\u074F'.
293
     * @since 1.4
294
     */
295
    public static final UnicodeBlock SYRIAC
296
      = new UnicodeBlock('\u0700', '\u074F',
297
                         "SYRIAC");
298
 
299
    /**
300
     * Thaana.
301
     * '\u0780' - '\u07BF'.
302
     * @since 1.4
303
     */
304
    public static final UnicodeBlock THAANA
305
      = new UnicodeBlock('\u0780', '\u07BF',
306
                         "THAANA");
307
 
308
    /**
309
     * Devanagari.
310
     * '\u0900' - '\u097F'.
311
     */
312
    public static final UnicodeBlock DEVANAGARI
313
      = new UnicodeBlock('\u0900', '\u097F',
314
                         "DEVANAGARI");
315
 
316
    /**
317
     * Bengali.
318
     * '\u0980' - '\u09FF'.
319
     */
320
    public static final UnicodeBlock BENGALI
321
      = new UnicodeBlock('\u0980', '\u09FF',
322
                         "BENGALI");
323
 
324
    /**
325
     * Gurmukhi.
326
     * '\u0A00' - '\u0A7F'.
327
     */
328
    public static final UnicodeBlock GURMUKHI
329
      = new UnicodeBlock('\u0A00', '\u0A7F',
330
                         "GURMUKHI");
331
 
332
    /**
333
     * Gujarati.
334
     * '\u0A80' - '\u0AFF'.
335
     */
336
    public static final UnicodeBlock GUJARATI
337
      = new UnicodeBlock('\u0A80', '\u0AFF',
338
                         "GUJARATI");
339
 
340
    /**
341
     * Oriya.
342
     * '\u0B00' - '\u0B7F'.
343
     */
344
    public static final UnicodeBlock ORIYA
345
      = new UnicodeBlock('\u0B00', '\u0B7F',
346
                         "ORIYA");
347
 
348
    /**
349
     * Tamil.
350
     * '\u0B80' - '\u0BFF'.
351
     */
352
    public static final UnicodeBlock TAMIL
353
      = new UnicodeBlock('\u0B80', '\u0BFF',
354
                         "TAMIL");
355
 
356
    /**
357
     * Telugu.
358
     * '\u0C00' - '\u0C7F'.
359
     */
360
    public static final UnicodeBlock TELUGU
361
      = new UnicodeBlock('\u0C00', '\u0C7F',
362
                         "TELUGU");
363
 
364
    /**
365
     * Kannada.
366
     * '\u0C80' - '\u0CFF'.
367
     */
368
    public static final UnicodeBlock KANNADA
369
      = new UnicodeBlock('\u0C80', '\u0CFF',
370
                         "KANNADA");
371
 
372
    /**
373
     * Malayalam.
374
     * '\u0D00' - '\u0D7F'.
375
     */
376
    public static final UnicodeBlock MALAYALAM
377
      = new UnicodeBlock('\u0D00', '\u0D7F',
378
                         "MALAYALAM");
379
 
380
    /**
381
     * Sinhala.
382
     * '\u0D80' - '\u0DFF'.
383
     * @since 1.4
384
     */
385
    public static final UnicodeBlock SINHALA
386
      = new UnicodeBlock('\u0D80', '\u0DFF',
387
                         "SINHALA");
388
 
389
    /**
390
     * Thai.
391
     * '\u0E00' - '\u0E7F'.
392
     */
393
    public static final UnicodeBlock THAI
394
      = new UnicodeBlock('\u0E00', '\u0E7F',
395
                         "THAI");
396
 
397
    /**
398
     * Lao.
399
     * '\u0E80' - '\u0EFF'.
400
     */
401
    public static final UnicodeBlock LAO
402
      = new UnicodeBlock('\u0E80', '\u0EFF',
403
                         "LAO");
404
 
405
    /**
406
     * Tibetan.
407
     * '\u0F00' - '\u0FFF'.
408
     */
409
    public static final UnicodeBlock TIBETAN
410
      = new UnicodeBlock('\u0F00', '\u0FFF',
411
                         "TIBETAN");
412
 
413
    /**
414
     * Myanmar.
415
     * '\u1000' - '\u109F'.
416
     * @since 1.4
417
     */
418
    public static final UnicodeBlock MYANMAR
419
      = new UnicodeBlock('\u1000', '\u109F',
420
                         "MYANMAR");
421
 
422
    /**
423
     * Georgian.
424
     * '\u10A0' - '\u10FF'.
425
     */
426
    public static final UnicodeBlock GEORGIAN
427
      = new UnicodeBlock('\u10A0', '\u10FF',
428
                         "GEORGIAN");
429
 
430
    /**
431
     * Hangul Jamo.
432
     * '\u1100' - '\u11FF'.
433
     */
434
    public static final UnicodeBlock HANGUL_JAMO
435
      = new UnicodeBlock('\u1100', '\u11FF',
436
                         "HANGUL_JAMO");
437
 
438
    /**
439
     * Ethiopic.
440
     * '\u1200' - '\u137F'.
441
     * @since 1.4
442
     */
443
    public static final UnicodeBlock ETHIOPIC
444
      = new UnicodeBlock('\u1200', '\u137F',
445
                         "ETHIOPIC");
446
 
447
    /**
448
     * Cherokee.
449
     * '\u13A0' - '\u13FF'.
450
     * @since 1.4
451
     */
452
    public static final UnicodeBlock CHEROKEE
453
      = new UnicodeBlock('\u13A0', '\u13FF',
454
                         "CHEROKEE");
455
 
456
    /**
457
     * Unified Canadian Aboriginal Syllabics.
458
     * '\u1400' - '\u167F'.
459
     * @since 1.4
460
     */
461
    public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
462
      = new UnicodeBlock('\u1400', '\u167F',
463
                         "UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS");
464
 
465
    /**
466
     * Ogham.
467
     * '\u1680' - '\u169F'.
468
     * @since 1.4
469
     */
470
    public static final UnicodeBlock OGHAM
471
      = new UnicodeBlock('\u1680', '\u169F',
472
                         "OGHAM");
473
 
474
    /**
475
     * Runic.
476
     * '\u16A0' - '\u16FF'.
477
     * @since 1.4
478
     */
479
    public static final UnicodeBlock RUNIC
480
      = new UnicodeBlock('\u16A0', '\u16FF',
481
                         "RUNIC");
482
 
483
    /**
484
     * Khmer.
485
     * '\u1780' - '\u17FF'.
486
     * @since 1.4
487
     */
488
    public static final UnicodeBlock KHMER
489
      = new UnicodeBlock('\u1780', '\u17FF',
490
                         "KHMER");
491
 
492
    /**
493
     * Mongolian.
494
     * '\u1800' - '\u18AF'.
495
     * @since 1.4
496
     */
497
    public static final UnicodeBlock MONGOLIAN
498
      = new UnicodeBlock('\u1800', '\u18AF',
499
                         "MONGOLIAN");
500
 
501
    /**
502
     * Latin Extended Additional.
503
     * '\u1E00' - '\u1EFF'.
504
     */
505
    public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL
506
      = new UnicodeBlock('\u1E00', '\u1EFF',
507
                         "LATIN_EXTENDED_ADDITIONAL");
508
 
509
    /**
510
     * Greek Extended.
511
     * '\u1F00' - '\u1FFF'.
512
     */
513
    public static final UnicodeBlock GREEK_EXTENDED
514
      = new UnicodeBlock('\u1F00', '\u1FFF',
515
                         "GREEK_EXTENDED");
516
 
517
    /**
518
     * General Punctuation.
519
     * '\u2000' - '\u206F'.
520
     */
521
    public static final UnicodeBlock GENERAL_PUNCTUATION
522
      = new UnicodeBlock('\u2000', '\u206F',
523
                         "GENERAL_PUNCTUATION");
524
 
525
    /**
526
     * Superscripts and Subscripts.
527
     * '\u2070' - '\u209F'.
528
     */
529
    public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS
530
      = new UnicodeBlock('\u2070', '\u209F',
531
                         "SUPERSCRIPTS_AND_SUBSCRIPTS");
532
 
533
    /**
534
     * Currency Symbols.
535
     * '\u20A0' - '\u20CF'.
536
     */
537
    public static final UnicodeBlock CURRENCY_SYMBOLS
538
      = new UnicodeBlock('\u20A0', '\u20CF',
539
                         "CURRENCY_SYMBOLS");
540
 
541
    /**
542
     * Combining Marks for Symbols.
543
     * '\u20D0' - '\u20FF'.
544
     */
545
    public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS
546
      = new UnicodeBlock('\u20D0', '\u20FF',
547
                         "COMBINING_MARKS_FOR_SYMBOLS");
548
 
549
    /**
550
     * Letterlike Symbols.
551
     * '\u2100' - '\u214F'.
552
     */
553
    public static final UnicodeBlock LETTERLIKE_SYMBOLS
554
      = new UnicodeBlock('\u2100', '\u214F',
555
                         "LETTERLIKE_SYMBOLS");
556
 
557
    /**
558
     * Number Forms.
559
     * '\u2150' - '\u218F'.
560
     */
561
    public static final UnicodeBlock NUMBER_FORMS
562
      = new UnicodeBlock('\u2150', '\u218F',
563
                         "NUMBER_FORMS");
564
 
565
    /**
566
     * Arrows.
567
     * '\u2190' - '\u21FF'.
568
     */
569
    public static final UnicodeBlock ARROWS
570
      = new UnicodeBlock('\u2190', '\u21FF',
571
                         "ARROWS");
572
 
573
    /**
574
     * Mathematical Operators.
575
     * '\u2200' - '\u22FF'.
576
     */
577
    public static final UnicodeBlock MATHEMATICAL_OPERATORS
578
      = new UnicodeBlock('\u2200', '\u22FF',
579
                         "MATHEMATICAL_OPERATORS");
580
 
581
    /**
582
     * Miscellaneous Technical.
583
     * '\u2300' - '\u23FF'.
584
     */
585
    public static final UnicodeBlock MISCELLANEOUS_TECHNICAL
586
      = new UnicodeBlock('\u2300', '\u23FF',
587
                         "MISCELLANEOUS_TECHNICAL");
588
 
589
    /**
590
     * Control Pictures.
591
     * '\u2400' - '\u243F'.
592
     */
593
    public static final UnicodeBlock CONTROL_PICTURES
594
      = new UnicodeBlock('\u2400', '\u243F',
595
                         "CONTROL_PICTURES");
596
 
597
    /**
598
     * Optical Character Recognition.
599
     * '\u2440' - '\u245F'.
600
     */
601
    public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION
602
      = new UnicodeBlock('\u2440', '\u245F',
603
                         "OPTICAL_CHARACTER_RECOGNITION");
604
 
605
    /**
606
     * Enclosed Alphanumerics.
607
     * '\u2460' - '\u24FF'.
608
     */
609
    public static final UnicodeBlock ENCLOSED_ALPHANUMERICS
610
      = new UnicodeBlock('\u2460', '\u24FF',
611
                         "ENCLOSED_ALPHANUMERICS");
612
 
613
    /**
614
     * Box Drawing.
615
     * '\u2500' - '\u257F'.
616
     */
617
    public static final UnicodeBlock BOX_DRAWING
618
      = new UnicodeBlock('\u2500', '\u257F',
619
                         "BOX_DRAWING");
620
 
621
    /**
622
     * Block Elements.
623
     * '\u2580' - '\u259F'.
624
     */
625
    public static final UnicodeBlock BLOCK_ELEMENTS
626
      = new UnicodeBlock('\u2580', '\u259F',
627
                         "BLOCK_ELEMENTS");
628
 
629
    /**
630
     * Geometric Shapes.
631
     * '\u25A0' - '\u25FF'.
632
     */
633
    public static final UnicodeBlock GEOMETRIC_SHAPES
634
      = new UnicodeBlock('\u25A0', '\u25FF',
635
                         "GEOMETRIC_SHAPES");
636
 
637
    /**
638
     * Miscellaneous Symbols.
639
     * '\u2600' - '\u26FF'.
640
     */
641
    public static final UnicodeBlock MISCELLANEOUS_SYMBOLS
642
      = new UnicodeBlock('\u2600', '\u26FF',
643
                         "MISCELLANEOUS_SYMBOLS");
644
 
645
    /**
646
     * Dingbats.
647
     * '\u2700' - '\u27BF'.
648
     */
649
    public static final UnicodeBlock DINGBATS
650
      = new UnicodeBlock('\u2700', '\u27BF',
651
                         "DINGBATS");
652
 
653
    /**
654
     * Braille Patterns.
655
     * '\u2800' - '\u28FF'.
656
     * @since 1.4
657
     */
658
    public static final UnicodeBlock BRAILLE_PATTERNS
659
      = new UnicodeBlock('\u2800', '\u28FF',
660
                         "BRAILLE_PATTERNS");
661
 
662
    /**
663
     * CJK Radicals Supplement.
664
     * '\u2E80' - '\u2EFF'.
665
     * @since 1.4
666
     */
667
    public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT
668
      = new UnicodeBlock('\u2E80', '\u2EFF',
669
                         "CJK_RADICALS_SUPPLEMENT");
670
 
671
    /**
672
     * Kangxi Radicals.
673
     * '\u2F00' - '\u2FDF'.
674
     * @since 1.4
675
     */
676
    public static final UnicodeBlock KANGXI_RADICALS
677
      = new UnicodeBlock('\u2F00', '\u2FDF',
678
                         "KANGXI_RADICALS");
679
 
680
    /**
681
     * Ideographic Description Characters.
682
     * '\u2FF0' - '\u2FFF'.
683
     * @since 1.4
684
     */
685
    public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS
686
      = new UnicodeBlock('\u2FF0', '\u2FFF',
687
                         "IDEOGRAPHIC_DESCRIPTION_CHARACTERS");
688
 
689
    /**
690
     * CJK Symbols and Punctuation.
691
     * '\u3000' - '\u303F'.
692
     */
693
    public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION
694
      = new UnicodeBlock('\u3000', '\u303F',
695
                         "CJK_SYMBOLS_AND_PUNCTUATION");
696
 
697
    /**
698
     * Hiragana.
699
     * '\u3040' - '\u309F'.
700
     */
701
    public static final UnicodeBlock HIRAGANA
702
      = new UnicodeBlock('\u3040', '\u309F',
703
                         "HIRAGANA");
704
 
705
    /**
706
     * Katakana.
707
     * '\u30A0' - '\u30FF'.
708
     */
709
    public static final UnicodeBlock KATAKANA
710
      = new UnicodeBlock('\u30A0', '\u30FF',
711
                         "KATAKANA");
712
 
713
    /**
714
     * Bopomofo.
715
     * '\u3100' - '\u312F'.
716
     */
717
    public static final UnicodeBlock BOPOMOFO
718
      = new UnicodeBlock('\u3100', '\u312F',
719
                         "BOPOMOFO");
720
 
721
    /**
722
     * Hangul Compatibility Jamo.
723
     * '\u3130' - '\u318F'.
724
     */
725
    public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO
726
      = new UnicodeBlock('\u3130', '\u318F',
727
                         "HANGUL_COMPATIBILITY_JAMO");
728
 
729
    /**
730
     * Kanbun.
731
     * '\u3190' - '\u319F'.
732
     */
733
    public static final UnicodeBlock KANBUN
734
      = new UnicodeBlock('\u3190', '\u319F',
735
                         "KANBUN");
736
 
737
    /**
738
     * Bopomofo Extended.
739
     * '\u31A0' - '\u31BF'.
740
     * @since 1.4
741
     */
742
    public static final UnicodeBlock BOPOMOFO_EXTENDED
743
      = new UnicodeBlock('\u31A0', '\u31BF',
744
                         "BOPOMOFO_EXTENDED");
745
 
746
    /**
747
     * Enclosed CJK Letters and Months.
748
     * '\u3200' - '\u32FF'.
749
     */
750
    public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS
751
      = new UnicodeBlock('\u3200', '\u32FF',
752
                         "ENCLOSED_CJK_LETTERS_AND_MONTHS");
753
 
754
    /**
755
     * CJK Compatibility.
756
     * '\u3300' - '\u33FF'.
757
     */
758
    public static final UnicodeBlock CJK_COMPATIBILITY
759
      = new UnicodeBlock('\u3300', '\u33FF',
760
                         "CJK_COMPATIBILITY");
761
 
762
    /**
763
     * CJK Unified Ideographs Extension A.
764
     * '\u3400' - '\u4DB5'.
765
     * @since 1.4
766
     */
767
    public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
768
      = new UnicodeBlock('\u3400', '\u4DB5',
769
                         "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A");
770
 
771
    /**
772
     * CJK Unified Ideographs.
773
     * '\u4E00' - '\u9FFF'.
774
     */
775
    public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS
776
      = new UnicodeBlock('\u4E00', '\u9FFF',
777
                         "CJK_UNIFIED_IDEOGRAPHS");
778
 
779
    /**
780
     * Yi Syllables.
781
     * '\uA000' - '\uA48F'.
782
     * @since 1.4
783
     */
784
    public static final UnicodeBlock YI_SYLLABLES
785
      = new UnicodeBlock('\uA000', '\uA48F',
786
                         "YI_SYLLABLES");
787
 
788
    /**
789
     * Yi Radicals.
790
     * '\uA490' - '\uA4CF'.
791
     * @since 1.4
792
     */
793
    public static final UnicodeBlock YI_RADICALS
794
      = new UnicodeBlock('\uA490', '\uA4CF',
795
                         "YI_RADICALS");
796
 
797
    /**
798
     * Hangul Syllables.
799
     * '\uAC00' - '\uD7A3'.
800
     */
801
    public static final UnicodeBlock HANGUL_SYLLABLES
802
      = new UnicodeBlock('\uAC00', '\uD7A3',
803
                         "HANGUL_SYLLABLES");
804
 
805
    /**
806
     * Surrogates Area.
807
     * '\uD800' - '\uDFFF'.
808
     */
809
    public static final UnicodeBlock SURROGATES_AREA
810
      = new UnicodeBlock('\uD800', '\uDFFF',
811
                         "SURROGATES_AREA");
812
 
813
    /**
814
     * Private Use Area.
815
     * '\uE000' - '\uF8FF'.
816
     */
817
    public static final UnicodeBlock PRIVATE_USE_AREA
818
      = new UnicodeBlock('\uE000', '\uF8FF',
819
                         "PRIVATE_USE_AREA");
820
 
821
    /**
822
     * CJK Compatibility Ideographs.
823
     * '\uF900' - '\uFAFF'.
824
     */
825
    public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS
826
      = new UnicodeBlock('\uF900', '\uFAFF',
827
                         "CJK_COMPATIBILITY_IDEOGRAPHS");
828
 
829
    /**
830
     * Alphabetic Presentation Forms.
831
     * '\uFB00' - '\uFB4F'.
832
     */
833
    public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS
834
      = new UnicodeBlock('\uFB00', '\uFB4F',
835
                         "ALPHABETIC_PRESENTATION_FORMS");
836
 
837
    /**
838
     * Arabic Presentation Forms-A.
839
     * '\uFB50' - '\uFDFF'.
840
     */
841
    public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A
842
      = new UnicodeBlock('\uFB50', '\uFDFF',
843
                         "ARABIC_PRESENTATION_FORMS_A");
844
 
845
    /**
846
     * Combining Half Marks.
847
     * '\uFE20' - '\uFE2F'.
848
     */
849
    public static final UnicodeBlock COMBINING_HALF_MARKS
850
      = new UnicodeBlock('\uFE20', '\uFE2F',
851
                         "COMBINING_HALF_MARKS");
852
 
853
    /**
854
     * CJK Compatibility Forms.
855
     * '\uFE30' - '\uFE4F'.
856
     */
857
    public static final UnicodeBlock CJK_COMPATIBILITY_FORMS
858
      = new UnicodeBlock('\uFE30', '\uFE4F',
859
                         "CJK_COMPATIBILITY_FORMS");
860
 
861
    /**
862
     * Small Form Variants.
863
     * '\uFE50' - '\uFE6F'.
864
     */
865
    public static final UnicodeBlock SMALL_FORM_VARIANTS
866
      = new UnicodeBlock('\uFE50', '\uFE6F',
867
                         "SMALL_FORM_VARIANTS");
868
 
869
    /**
870
     * Arabic Presentation Forms-B.
871
     * '\uFE70' - '\uFEFE'.
872
     */
873
    public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B
874
      = new UnicodeBlock('\uFE70', '\uFEFE',
875
                         "ARABIC_PRESENTATION_FORMS_B");
876
 
877
    /**
878
     * Halfwidth and Fullwidth Forms.
879
     * '\uFF00' - '\uFFEF'.
880
     */
881
    public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS
882
      = new UnicodeBlock('\uFF00', '\uFFEF',
883
                         "HALFWIDTH_AND_FULLWIDTH_FORMS");
884
 
885
    /**
886
     * Specials.
887
     * '\uFEFF', '\uFFF0' - '\uFFFD'.
888
     */
889
    public static final UnicodeBlock SPECIALS
890
      = new UnicodeBlock('\uFFF0', '\uFFFD',
891
                         "SPECIALS");
892
 
893
    /**
894
     * The defined subsets.
895
     */
896
    private static final UnicodeBlock sets[] = {
897
      BASIC_LATIN,
898
      LATIN_1_SUPPLEMENT,
899
      LATIN_EXTENDED_A,
900
      LATIN_EXTENDED_B,
901
      IPA_EXTENSIONS,
902
      SPACING_MODIFIER_LETTERS,
903
      COMBINING_DIACRITICAL_MARKS,
904
      GREEK,
905
      CYRILLIC,
906
      ARMENIAN,
907
      HEBREW,
908
      ARABIC,
909
      SYRIAC,
910
      THAANA,
911
      DEVANAGARI,
912
      BENGALI,
913
      GURMUKHI,
914
      GUJARATI,
915
      ORIYA,
916
      TAMIL,
917
      TELUGU,
918
      KANNADA,
919
      MALAYALAM,
920
      SINHALA,
921
      THAI,
922
      LAO,
923
      TIBETAN,
924
      MYANMAR,
925
      GEORGIAN,
926
      HANGUL_JAMO,
927
      ETHIOPIC,
928
      CHEROKEE,
929
      UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
930
      OGHAM,
931
      RUNIC,
932
      KHMER,
933
      MONGOLIAN,
934
      LATIN_EXTENDED_ADDITIONAL,
935
      GREEK_EXTENDED,
936
      GENERAL_PUNCTUATION,
937
      SUPERSCRIPTS_AND_SUBSCRIPTS,
938
      CURRENCY_SYMBOLS,
939
      COMBINING_MARKS_FOR_SYMBOLS,
940
      LETTERLIKE_SYMBOLS,
941
      NUMBER_FORMS,
942
      ARROWS,
943
      MATHEMATICAL_OPERATORS,
944
      MISCELLANEOUS_TECHNICAL,
945
      CONTROL_PICTURES,
946
      OPTICAL_CHARACTER_RECOGNITION,
947
      ENCLOSED_ALPHANUMERICS,
948
      BOX_DRAWING,
949
      BLOCK_ELEMENTS,
950
      GEOMETRIC_SHAPES,
951
      MISCELLANEOUS_SYMBOLS,
952
      DINGBATS,
953
      BRAILLE_PATTERNS,
954
      CJK_RADICALS_SUPPLEMENT,
955
      KANGXI_RADICALS,
956
      IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
957
      CJK_SYMBOLS_AND_PUNCTUATION,
958
      HIRAGANA,
959
      KATAKANA,
960
      BOPOMOFO,
961
      HANGUL_COMPATIBILITY_JAMO,
962
      KANBUN,
963
      BOPOMOFO_EXTENDED,
964
      ENCLOSED_CJK_LETTERS_AND_MONTHS,
965
      CJK_COMPATIBILITY,
966
      CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
967
      CJK_UNIFIED_IDEOGRAPHS,
968
      YI_SYLLABLES,
969
      YI_RADICALS,
970
      HANGUL_SYLLABLES,
971
      SURROGATES_AREA,
972
      PRIVATE_USE_AREA,
973
      CJK_COMPATIBILITY_IDEOGRAPHS,
974
      ALPHABETIC_PRESENTATION_FORMS,
975
      ARABIC_PRESENTATION_FORMS_A,
976
      COMBINING_HALF_MARKS,
977
      CJK_COMPATIBILITY_FORMS,
978
      SMALL_FORM_VARIANTS,
979
      ARABIC_PRESENTATION_FORMS_B,
980
      HALFWIDTH_AND_FULLWIDTH_FORMS,
981
      SPECIALS,
982
    };
983
  } // class UnicodeBlock
984
 
985
  /**
986
   * The immutable value of this Character.
987
   *
988
   * @serial the value of this Character
989
   */
990
  private final char value;
991
 
992
  /**
993
   * Compatible with JDK 1.0+.
994
   */
995
  private static final long serialVersionUID = 3786198910865385080L;
996
 
997
  /**
998
   * Smallest value allowed for radix arguments in Java. This value is 2.
999
   *
1000
   * @see #digit(char, int)
1001
   * @see #forDigit(int, int)
1002
   * @see Integer#toString(int, int)
1003
   * @see Integer#valueOf(String)
1004
   */
1005
  public static final int MIN_RADIX = 2;
1006
 
1007
  /**
1008
   * Largest value allowed for radix arguments in Java. This value is 36.
1009
   *
1010
   * @see #digit(char, int)
1011
   * @see #forDigit(int, int)
1012
   * @see Integer#toString(int, int)
1013
   * @see Integer#valueOf(String)
1014
   */
1015
  public static final int MAX_RADIX = 36;
1016
 
1017
  /**
1018
   * The minimum value the char data type can hold.
1019
   * This value is <code>'\\u0000'</code>.
1020
   */
1021
  public static final char MIN_VALUE = '\u0000';
1022
 
1023
  /**
1024
   * The maximum value the char data type can hold.
1025
   * This value is <code>'\\uFFFF'</code>.
1026
   */
1027
  public static final char MAX_VALUE = '\uFFFF';
1028
 
1029
  /**
1030
   * Class object representing the primitive char data type.
1031
   *
1032
   * @since 1.1
1033
   */
1034
  public static final Class TYPE = VMClassLoader.getPrimitiveClass('C');
1035
 
1036
  /**
1037
   * The number of bits needed to represent a <code>char</code>.
1038
   * @since 1.5
1039
   */
1040
  public static final int SIZE = 16;
1041
 
1042
  // This caches some Character values, and is used by boxing
1043
  // conversions via valueOf().  We must cache at least 0..127;
1044
  // this constant controls how much we actually cache.
1045
  private static final int MAX_CACHE = 127;
1046
  private static Character[] charCache = new Character[MAX_CACHE + 1];
1047
 
1048
  /**
1049
   * Lu = Letter, Uppercase (Informative).
1050
   *
1051
   * @since 1.1
1052
   */
1053
  public static final byte UPPERCASE_LETTER = 1;
1054
 
1055
  /**
1056
   * Ll = Letter, Lowercase (Informative).
1057
   *
1058
   * @since 1.1
1059
   */
1060
  public static final byte LOWERCASE_LETTER = 2;
1061
 
1062
  /**
1063
   * Lt = Letter, Titlecase (Informative).
1064
   *
1065
   * @since 1.1
1066
   */
1067
  public static final byte TITLECASE_LETTER = 3;
1068
 
1069
  /**
1070
   * Mn = Mark, Non-Spacing (Normative).
1071
   *
1072
   * @since 1.1
1073
   */
1074
  public static final byte NON_SPACING_MARK = 6;
1075
 
1076
  /**
1077
   * Mc = Mark, Spacing Combining (Normative).
1078
   *
1079
   * @since 1.1
1080
   */
1081
  public static final byte COMBINING_SPACING_MARK = 8;
1082
 
1083
  /**
1084
   * Me = Mark, Enclosing (Normative).
1085
   *
1086
   * @since 1.1
1087
   */
1088
  public static final byte ENCLOSING_MARK = 7;
1089
 
1090
  /**
1091
   * Nd = Number, Decimal Digit (Normative).
1092
   *
1093
   * @since 1.1
1094
   */
1095
  public static final byte DECIMAL_DIGIT_NUMBER = 9;
1096
 
1097
  /**
1098
   * Nl = Number, Letter (Normative).
1099
   *
1100
   * @since 1.1
1101
   */
1102
  public static final byte LETTER_NUMBER = 10;
1103
 
1104
  /**
1105
   * No = Number, Other (Normative).
1106
   *
1107
   * @since 1.1
1108
   */
1109
  public static final byte OTHER_NUMBER = 11;
1110
 
1111
  /**
1112
   * Zs = Separator, Space (Normative).
1113
   *
1114
   * @since 1.1
1115
   */
1116
  public static final byte SPACE_SEPARATOR = 12;
1117
 
1118
  /**
1119
   * Zl = Separator, Line (Normative).
1120
   *
1121
   * @since 1.1
1122
   */
1123
  public static final byte LINE_SEPARATOR = 13;
1124
 
1125
  /**
1126
   * Zp = Separator, Paragraph (Normative).
1127
   *
1128
   * @since 1.1
1129
   */
1130
  public static final byte PARAGRAPH_SEPARATOR = 14;
1131
 
1132
  /**
1133
   * Cc = Other, Control (Normative).
1134
   *
1135
   * @since 1.1
1136
   */
1137
  public static final byte CONTROL = 15;
1138
 
1139
  /**
1140
   * Cf = Other, Format (Normative).
1141
   *
1142
   * @since 1.1
1143
   */
1144
  public static final byte FORMAT = 16;
1145
 
1146
  /**
1147
   * Cs = Other, Surrogate (Normative).
1148
   *
1149
   * @since 1.1
1150
   */
1151
  public static final byte SURROGATE = 19;
1152
 
1153
  /**
1154
   * Co = Other, Private Use (Normative).
1155
   *
1156
   * @since 1.1
1157
   */
1158
  public static final byte PRIVATE_USE = 18;
1159
 
1160
  /**
1161
   * Cn = Other, Not Assigned (Normative).
1162
   *
1163
   * @since 1.1
1164
   */
1165
  public static final byte UNASSIGNED = 0;
1166
 
1167
  /**
1168
   * Lm = Letter, Modifier (Informative).
1169
   *
1170
   * @since 1.1
1171
   */
1172
  public static final byte MODIFIER_LETTER = 4;
1173
 
1174
  /**
1175
   * Lo = Letter, Other (Informative).
1176
   *
1177
   * @since 1.1
1178
   */
1179
  public static final byte OTHER_LETTER = 5;
1180
 
1181
  /**
1182
   * Pc = Punctuation, Connector (Informative).
1183
   *
1184
   * @since 1.1
1185
   */
1186
  public static final byte CONNECTOR_PUNCTUATION = 23;
1187
 
1188
  /**
1189
   * Pd = Punctuation, Dash (Informative).
1190
   *
1191
   * @since 1.1
1192
   */
1193
  public static final byte DASH_PUNCTUATION = 20;
1194
 
1195
  /**
1196
   * Ps = Punctuation, Open (Informative).
1197
   *
1198
   * @since 1.1
1199
   */
1200
  public static final byte START_PUNCTUATION = 21;
1201
 
1202
  /**
1203
   * Pe = Punctuation, Close (Informative).
1204
   *
1205
   * @since 1.1
1206
   */
1207
  public static final byte END_PUNCTUATION = 22;
1208
 
1209
  /**
1210
   * Pi = Punctuation, Initial Quote (Informative).
1211
   *
1212
   * @since 1.4
1213
   */
1214
  public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
1215
 
1216
  /**
1217
   * Pf = Punctuation, Final Quote (Informative).
1218
   *
1219
   * @since 1.4
1220
   */
1221
  public static final byte FINAL_QUOTE_PUNCTUATION = 30;
1222
 
1223
  /**
1224
   * Po = Punctuation, Other (Informative).
1225
   *
1226
   * @since 1.1
1227
   */
1228
  public static final byte OTHER_PUNCTUATION = 24;
1229
 
1230
  /**
1231
   * Sm = Symbol, Math (Informative).
1232
   *
1233
   * @since 1.1
1234
   */
1235
  public static final byte MATH_SYMBOL = 25;
1236
 
1237
  /**
1238
   * Sc = Symbol, Currency (Informative).
1239
   *
1240
   * @since 1.1
1241
   */
1242
  public static final byte CURRENCY_SYMBOL = 26;
1243
 
1244
  /**
1245
   * Sk = Symbol, Modifier (Informative).
1246
   *
1247
   * @since 1.1
1248
   */
1249
  public static final byte MODIFIER_SYMBOL = 27;
1250
 
1251
  /**
1252
   * So = Symbol, Other (Informative).
1253
   *
1254
   * @since 1.1
1255
   */
1256
  public static final byte OTHER_SYMBOL = 28;
1257
 
1258
  /**
1259
   * Undefined bidirectional character type. Undefined char values have
1260
   * undefined directionality in the Unicode specification.
1261
   *
1262
   * @since 1.4
1263
   */
1264
  public static final byte DIRECTIONALITY_UNDEFINED = -1;
1265
 
1266
  /**
1267
   * Strong bidirectional character type "L".
1268
   *
1269
   * @since 1.4
1270
   */
1271
  public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
1272
 
1273
  /**
1274
   * Strong bidirectional character type "R".
1275
   *
1276
   * @since 1.4
1277
   */
1278
  public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
1279
 
1280
  /**
1281
   * Strong bidirectional character type "AL".
1282
   *
1283
   * @since 1.4
1284
   */
1285
  public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
1286
 
1287
  /**
1288
   * Weak bidirectional character type "EN".
1289
   *
1290
   * @since 1.4
1291
   */
1292
  public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
1293
 
1294
  /**
1295
   * Weak bidirectional character type "ES".
1296
   *
1297
   * @since 1.4
1298
   */
1299
  public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
1300
 
1301
  /**
1302
   * Weak bidirectional character type "ET".
1303
   *
1304
   * @since 1.4
1305
   */
1306
  public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
1307
 
1308
  /**
1309
   * Weak bidirectional character type "AN".
1310
   *
1311
   * @since 1.4
1312
   */
1313
  public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
1314
 
1315
  /**
1316
   * Weak bidirectional character type "CS".
1317
   *
1318
   * @since 1.4
1319
   */
1320
  public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
1321
 
1322
  /**
1323
   * Weak bidirectional character type "NSM".
1324
   *
1325
   * @since 1.4
1326
   */
1327
  public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
1328
 
1329
  /**
1330
   * Weak bidirectional character type "BN".
1331
   *
1332
   * @since 1.4
1333
   */
1334
  public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
1335
 
1336
  /**
1337
   * Neutral bidirectional character type "B".
1338
   *
1339
   * @since 1.4
1340
   */
1341
  public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
1342
 
1343
  /**
1344
   * Neutral bidirectional character type "S".
1345
   *
1346
   * @since 1.4
1347
   */
1348
  public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
1349
 
1350
  /**
1351
   * Strong bidirectional character type "WS".
1352
   *
1353
   * @since 1.4
1354
   */
1355
  public static final byte DIRECTIONALITY_WHITESPACE = 12;
1356
 
1357
  /**
1358
   * Neutral bidirectional character type "ON".
1359
   *
1360
   * @since 1.4
1361
   */
1362
  public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
1363
 
1364
  /**
1365
   * Strong bidirectional character type "LRE".
1366
   *
1367
   * @since 1.4
1368
   */
1369
  public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
1370
 
1371
  /**
1372
   * Strong bidirectional character type "LRO".
1373
   *
1374
   * @since 1.4
1375
   */
1376
  public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
1377
 
1378
  /**
1379
   * Strong bidirectional character type "RLE".
1380
   *
1381
   * @since 1.4
1382
   */
1383
  public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
1384
 
1385
  /**
1386
   * Strong bidirectional character type "RLO".
1387
   *
1388
   * @since 1.4
1389
   */
1390
  public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
1391
 
1392
  /**
1393
   * Weak bidirectional character type "PDF".
1394
   *
1395
   * @since 1.4
1396
   */
1397
  public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
1398
 
1399
  /**
1400
   * Stores unicode block offset lookup table. Exploit package visibility of
1401
   * String.value to avoid copying the array.
1402
   * @see #readChar(char)
1403
   * @see CharData#BLOCKS
1404
   */
1405
  private static final char[] blocks = String.zeroBasedStringValue(CharData.BLOCKS);
1406
 
1407
  /**
1408
   * Stores unicode attribute offset lookup table. Exploit package visibility
1409
   * of String.value to avoid copying the array.
1410
   * @see CharData#DATA
1411
   */
1412
  private static final char[] data = String.zeroBasedStringValue(CharData.DATA);
1413
 
1414
  /**
1415
   * Stores unicode numeric value attribute table. Exploit package visibility
1416
   * of String.value to avoid copying the array.
1417
   * @see CharData#NUM_VALUE
1418
   */
1419
  private static final char[] numValue
1420
          = String.zeroBasedStringValue(CharData.NUM_VALUE);
1421
 
1422
  /**
1423
   * Stores unicode uppercase attribute table. Exploit package visibility
1424
   * of String.value to avoid copying the array.
1425
   * @see CharData#UPPER
1426
   */
1427
  private static final char[] upper = String.zeroBasedStringValue(CharData.UPPER);
1428
 
1429
  /**
1430
   * Stores unicode lowercase attribute table. Exploit package visibility
1431
   * of String.value to avoid copying the array.
1432
   * @see CharData#LOWER
1433
   */
1434
  private static final char[] lower = String.zeroBasedStringValue(CharData.LOWER);
1435
 
1436
  /**
1437
   * Stores unicode direction attribute table. Exploit package visibility
1438
   * of String.value to avoid copying the array.
1439
   * @see CharData#DIRECTION
1440
   */
1441
  // Package visible for use by String.
1442
  static final char[] direction = String.zeroBasedStringValue(CharData.DIRECTION);
1443
 
1444
  /**
1445
   * Stores unicode titlecase table. Exploit package visibility of
1446
   * String.value to avoid copying the array.
1447
   * @see CharData#TITLE
1448
   */
1449
  private static final char[] title = String.zeroBasedStringValue(CharData.TITLE);
1450
 
1451
  /**
1452
   * Mask for grabbing the type out of the contents of data.
1453
   * @see CharData#DATA
1454
   */
1455
  private static final int TYPE_MASK = 0x1F;
1456
 
1457
  /**
1458
   * Mask for grabbing the non-breaking space flag out of the contents of
1459
   * data.
1460
   * @see CharData#DATA
1461
   */
1462
  private static final int NO_BREAK_MASK = 0x20;
1463
 
1464
  /**
1465
   * Mask for grabbing the mirrored directionality flag out of the contents
1466
   * of data.
1467
   * @see CharData#DATA
1468
   */
1469
  private static final int MIRROR_MASK = 0x40;
1470
 
1471
  /**
1472
   * Min value for supplementary code point.
1473
   *
1474
   * @since 1.5
1475
   */
1476
  public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;
1477
 
1478
  /**
1479
   * Min value for code point.
1480
   *
1481
   * @since 1.5
1482
   */
1483
  public static final int MIN_CODE_POINT = 0;
1484
 
1485
 
1486
  /**
1487
   * Max value for code point.
1488
   *
1489
   * @since 1.5
1490
   */
1491
  public static final int MAX_CODE_POINT = 0x010ffff;
1492
 
1493
 
1494
  /**
1495
   * Minimum high surrogate code in UTF-16 encoding.
1496
   *
1497
   * @since 1.5
1498
   */
1499
  public static final char MIN_HIGH_SURROGATE = '\ud800';
1500
 
1501
  /**
1502
   * Maximum high surrogate code in UTF-16 encoding.
1503
   *
1504
   * @since 1.5
1505
   */
1506
  public static final char MAX_HIGH_SURROGATE = '\udbff';
1507
 
1508
  /**
1509
   * Minimum low surrogate code in UTF-16 encoding.
1510
   *
1511
   * @since 1.5
1512
   */
1513
  public static final char MIN_LOW_SURROGATE = '\udc00';
1514
 
1515
  /**
1516
   * Maximum low surrogate code in UTF-16 encoding.
1517
   *
1518
   * @since 1.5
1519
   */
1520
  public static final char MAX_LOW_SURROGATE = '\udfff';
1521
 
1522
  /**
1523
   * Minimum surrogate code in UTF-16 encoding.
1524
   *
1525
   * @since 1.5
1526
   */
1527
  public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
1528
 
1529
  /**
1530
   * Maximum low surrogate code in UTF-16 encoding.
1531
   *
1532
   * @since 1.5
1533
   */
1534
  public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
1535
 
1536
  /**
1537
   * Grabs an attribute offset from the Unicode attribute database. The lower
1538
   * 5 bits are the character type, the next 2 bits are flags, and the top
1539
   * 9 bits are the offset into the attribute tables.
1540
   *
1541
   * @param ch the character to look up
1542
   * @return the character's attribute offset and type
1543
   * @see #TYPE_MASK
1544
   * @see #NO_BREAK_MASK
1545
   * @see #MIRROR_MASK
1546
   * @see CharData#DATA
1547
   * @see CharData#SHIFT
1548
   */
1549
  // Package visible for use in String.
1550
  static char readChar(char ch)
1551
  {
1552
    // Perform 16-bit addition to find the correct entry in data.
1553
    return data[(char) (blocks[ch >> CharData.SHIFT] + ch)];
1554
  }
1555
 
1556
  /**
1557
   * Wraps up a character.
1558
   *
1559
   * @param value the character to wrap
1560
   */
1561
  public Character(char value)
1562
  {
1563
    this.value = value;
1564
  }
1565
 
1566
  /**
1567
   * Returns the character which has been wrapped by this class.
1568
   *
1569
   * @return the character wrapped
1570
   */
1571
  public char charValue()
1572
  {
1573
    return value;
1574
  }
1575
 
1576
  /**
1577
   * Returns the numerical value (unsigned) of the wrapped character.
1578
   * Range of returned values: 0x0000-0xFFFF.
1579
   *
1580
   * @return the value of the wrapped character
1581
   */
1582
  public int hashCode()
1583
  {
1584
    return value;
1585
  }
1586
 
1587
  /**
1588
   * Determines if an object is equal to this object. This is only true for
1589
   * another Character object wrapping the same value.
1590
   *
1591
   * @param o object to compare
1592
   * @return true if o is a Character with the same value
1593
   */
1594
  public boolean equals(Object o)
1595
  {
1596
    return o instanceof Character && value == ((Character) o).value;
1597
  }
1598
 
1599
  /**
1600
   * Converts the wrapped character into a String.
1601
   *
1602
   * @return a String containing one character -- the wrapped character
1603
   *         of this instance
1604
   */
1605
  public String toString()
1606
  {
1607
    // Package constructor avoids an array copy.
1608
    return new String(new char[] { value }, 0, 1, true);
1609
  }
1610
 
1611
  /**
1612
   * Returns a String of length 1 representing the specified character.
1613
   *
1614
   * @param ch the character to convert
1615
   * @return a String containing the character
1616
   * @since 1.4
1617
   */
1618
  public static String toString(char ch)
1619
  {
1620
    // Package constructor avoids an array copy.
1621
    return new String(new char[] { ch }, 0, 1, true);
1622
  }
1623
 
1624
  /**
1625
   * Determines if a character is a Unicode lowercase letter. For example,
1626
   * <code>'a'</code> is lowercase.
1627
   * <br>
1628
   * lowercase = [Ll]
1629
   *
1630
   * @param ch character to test
1631
   * @return true if ch is a Unicode lowercase letter, else false
1632
   * @see #isUpperCase(char)
1633
   * @see #isTitleCase(char)
1634
   * @see #toLowerCase(char)
1635
   * @see #getType(char)
1636
   */
1637
  public static boolean isLowerCase(char ch)
1638
  {
1639
    return getType(ch) == LOWERCASE_LETTER;
1640
  }
1641
 
1642
  /**
1643
   * Determines if a character is a Unicode uppercase letter. For example,
1644
   * <code>'A'</code> is uppercase.
1645
   * <br>
1646
   * uppercase = [Lu]
1647
   *
1648
   * @param ch character to test
1649
   * @return true if ch is a Unicode uppercase letter, else false
1650
   * @see #isLowerCase(char)
1651
   * @see #isTitleCase(char)
1652
   * @see #toUpperCase(char)
1653
   * @see #getType(char)
1654
   */
1655
  public static boolean isUpperCase(char ch)
1656
  {
1657
    return getType(ch) == UPPERCASE_LETTER;
1658
  }
1659
 
1660
  /**
1661
   * Determines if a character is a Unicode titlecase letter. For example,
1662
   * the character "Lj" (Latin capital L with small letter j) is titlecase.
1663
   * <br>
1664
   * titlecase = [Lt]
1665
   *
1666
   * @param ch character to test
1667
   * @return true if ch is a Unicode titlecase letter, else false
1668
   * @see #isLowerCase(char)
1669
   * @see #isUpperCase(char)
1670
   * @see #toTitleCase(char)
1671
   * @see #getType(char)
1672
   */
1673
  public static boolean isTitleCase(char ch)
1674
  {
1675
    return getType(ch) == TITLECASE_LETTER;
1676
  }
1677
 
1678
  /**
1679
   * Determines if a character is a Unicode decimal digit. For example,
1680
   * <code>'0'</code> is a digit.
1681
   * <br>
1682
   * Unicode decimal digit = [Nd]
1683
   *
1684
   * @param ch character to test
1685
   * @return true if ch is a Unicode decimal digit, else false
1686
   * @see #digit(char, int)
1687
   * @see #forDigit(int, int)
1688
   * @see #getType(char)
1689
   */
1690
  public static boolean isDigit(char ch)
1691
  {
1692
    return getType(ch) == DECIMAL_DIGIT_NUMBER;
1693
  }
1694
 
1695
  /**
1696
   * Determines if a character is part of the Unicode Standard. This is an
1697
   * evolving standard, but covers every character in the data file.
1698
   * <br>
1699
   * defined = not [Cn]
1700
   *
1701
   * @param ch character to test
1702
   * @return true if ch is a Unicode character, else false
1703
   * @see #isDigit(char)
1704
   * @see #isLetter(char)
1705
   * @see #isLetterOrDigit(char)
1706
   * @see #isLowerCase(char)
1707
   * @see #isTitleCase(char)
1708
   * @see #isUpperCase(char)
1709
   */
1710
  public static boolean isDefined(char ch)
1711
  {
1712
    return getType(ch) != UNASSIGNED;
1713
  }
1714
 
1715
  /**
1716
   * Determines if a character is a Unicode letter. Not all letters have case,
1717
   * so this may return true when isLowerCase and isUpperCase return false.
1718
   * <br>
1719
   * letter = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]
1720
   *
1721
   * @param ch character to test
1722
   * @return true if ch is a Unicode letter, else false
1723
   * @see #isDigit(char)
1724
   * @see #isJavaIdentifierStart(char)
1725
   * @see #isJavaLetter(char)
1726
   * @see #isJavaLetterOrDigit(char)
1727
   * @see #isLetterOrDigit(char)
1728
   * @see #isLowerCase(char)
1729
   * @see #isTitleCase(char)
1730
   * @see #isUnicodeIdentifierStart(char)
1731
   * @see #isUpperCase(char)
1732
   */
1733
  public static boolean isLetter(char ch)
1734
  {
1735
    return ((1 << getType(ch))
1736
            & ((1 << UPPERCASE_LETTER)
1737
               | (1 << LOWERCASE_LETTER)
1738
               | (1 << TITLECASE_LETTER)
1739
               | (1 << MODIFIER_LETTER)
1740
               | (1 << OTHER_LETTER))) != 0;
1741
  }
1742
 
1743
  /**
1744
   * Determines if a character is a Unicode letter or a Unicode digit. This
1745
   * is the combination of isLetter and isDigit.
1746
   * <br>
1747
   * letter or digit = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nd]
1748
   *
1749
   * @param ch character to test
1750
   * @return true if ch is a Unicode letter or a Unicode digit, else false
1751
   * @see #isDigit(char)
1752
   * @see #isJavaIdentifierPart(char)
1753
   * @see #isJavaLetter(char)
1754
   * @see #isJavaLetterOrDigit(char)
1755
   * @see #isLetter(char)
1756
   * @see #isUnicodeIdentifierPart(char)
1757
   */
1758
  public static boolean isLetterOrDigit(char ch)
1759
  {
1760
    return ((1 << getType(ch))
1761
            & ((1 << UPPERCASE_LETTER)
1762
               | (1 << LOWERCASE_LETTER)
1763
               | (1 << TITLECASE_LETTER)
1764
               | (1 << MODIFIER_LETTER)
1765
               | (1 << OTHER_LETTER)
1766
               | (1 << DECIMAL_DIGIT_NUMBER))) != 0;
1767
  }
1768
 
1769
  /**
1770
   * Determines if a character can start a Java identifier. This is the
1771
   * combination of isLetter, any character where getType returns
1772
   * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation
1773
   * (like '_').
1774
   *
1775
   * @param ch character to test
1776
   * @return true if ch can start a Java identifier, else false
1777
   * @deprecated Replaced by {@link #isJavaIdentifierStart(char)}
1778
   * @see #isJavaLetterOrDigit(char)
1779
   * @see #isJavaIdentifierStart(char)
1780
   * @see #isJavaIdentifierPart(char)
1781
   * @see #isLetter(char)
1782
   * @see #isLetterOrDigit(char)
1783
   * @see #isUnicodeIdentifierStart(char)
1784
   */
1785
  public static boolean isJavaLetter(char ch)
1786
  {
1787
    return isJavaIdentifierStart(ch);
1788
  }
1789
 
1790
  /**
1791
   * Determines if a character can follow the first letter in
1792
   * a Java identifier.  This is the combination of isJavaLetter (isLetter,
1793
   * type of LETTER_NUMBER, currency, connecting punctuation) and digit,
1794
   * numeric letter (like Roman numerals), combining marks, non-spacing marks,
1795
   * or isIdentifierIgnorable.
1796
   *
1797
   * @param ch character to test
1798
   * @return true if ch can follow the first letter in a Java identifier
1799
   * @deprecated Replaced by {@link #isJavaIdentifierPart(char)}
1800
   * @see #isJavaLetter(char)
1801
   * @see #isJavaIdentifierStart(char)
1802
   * @see #isJavaIdentifierPart(char)
1803
   * @see #isLetter(char)
1804
   * @see #isLetterOrDigit(char)
1805
   * @see #isUnicodeIdentifierPart(char)
1806
   * @see #isIdentifierIgnorable(char)
1807
   */
1808
  public static boolean isJavaLetterOrDigit(char ch)
1809
  {
1810
    return isJavaIdentifierPart(ch);
1811
  }
1812
 
1813
  /**
1814
   * Determines if a character can start a Java identifier. This is the
1815
   * combination of isLetter, any character where getType returns
1816
   * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation
1817
   * (like '_').
1818
   * <br>
1819
   * Java identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]
1820
   *
1821
   * @param ch character to test
1822
   * @return true if ch can start a Java identifier, else false
1823
   * @see #isJavaIdentifierPart(char)
1824
   * @see #isLetter(char)
1825
   * @see #isUnicodeIdentifierStart(char)
1826
   * @since 1.1
1827
   */
1828
  public static boolean isJavaIdentifierStart(char ch)
1829
  {
1830
    return ((1 << getType(ch))
1831
            & ((1 << UPPERCASE_LETTER)
1832
               | (1 << LOWERCASE_LETTER)
1833
               | (1 << TITLECASE_LETTER)
1834
               | (1 << MODIFIER_LETTER)
1835
               | (1 << OTHER_LETTER)
1836
               | (1 << LETTER_NUMBER)
1837
               | (1 << CURRENCY_SYMBOL)
1838
               | (1 << CONNECTOR_PUNCTUATION))) != 0;
1839
  }
1840
 
1841
  /**
1842
   * Determines if a character can follow the first letter in
1843
   * a Java identifier.  This is the combination of isJavaLetter (isLetter,
1844
   * type of LETTER_NUMBER, currency, connecting punctuation) and digit,
1845
   * numeric letter (like Roman numerals), combining marks, non-spacing marks,
1846
   * or isIdentifierIgnorable.
1847
   * <br>
1848
   * Java identifier extender =
1849
   *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]|[Mn]|[Mc]|[Nd]|[Cf]
1850
   *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
1851
   *
1852
   * @param ch character to test
1853
   * @return true if ch can follow the first letter in a Java identifier
1854
   * @see #isIdentifierIgnorable(char)
1855
   * @see #isJavaIdentifierStart(char)
1856
   * @see #isLetterOrDigit(char)
1857
   * @see #isUnicodeIdentifierPart(char)
1858
   * @since 1.1
1859
   */
1860
  public static boolean isJavaIdentifierPart(char ch)
1861
  {
1862
    int category = getType(ch);
1863
    return ((1 << category)
1864
            & ((1 << UPPERCASE_LETTER)
1865
               | (1 << LOWERCASE_LETTER)
1866
               | (1 << TITLECASE_LETTER)
1867
               | (1 << MODIFIER_LETTER)
1868
               | (1 << OTHER_LETTER)
1869
               | (1 << NON_SPACING_MARK)
1870
               | (1 << COMBINING_SPACING_MARK)
1871
               | (1 << DECIMAL_DIGIT_NUMBER)
1872
               | (1 << LETTER_NUMBER)
1873
               | (1 << CURRENCY_SYMBOL)
1874
               | (1 << CONNECTOR_PUNCTUATION)
1875
               | (1 << FORMAT))) != 0
1876
      || (category == CONTROL && isIdentifierIgnorable(ch));
1877
  }
1878
 
1879
  /**
1880
   * Determines if a character can start a Unicode identifier.  Only
1881
   * letters can start a Unicode identifier, but this includes characters
1882
   * in LETTER_NUMBER.
1883
   * <br>
1884
   * Unicode identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]
1885
   *
1886
   * @param ch character to test
1887
   * @return true if ch can start a Unicode identifier, else false
1888
   * @see #isJavaIdentifierStart(char)
1889
   * @see #isLetter(char)
1890
   * @see #isUnicodeIdentifierPart(char)
1891
   * @since 1.1
1892
   */
1893
  public static boolean isUnicodeIdentifierStart(char ch)
1894
  {
1895
    return ((1 << getType(ch))
1896
            & ((1 << UPPERCASE_LETTER)
1897
               | (1 << LOWERCASE_LETTER)
1898
               | (1 << TITLECASE_LETTER)
1899
               | (1 << MODIFIER_LETTER)
1900
               | (1 << OTHER_LETTER)
1901
               | (1 << LETTER_NUMBER))) != 0;
1902
  }
1903
 
1904
  /**
1905
   * Determines if a character can follow the first letter in
1906
   * a Unicode identifier. This includes letters, connecting punctuation,
1907
   * digits, numeric letters, combining marks, non-spacing marks, and
1908
   * isIdentifierIgnorable.
1909
   * <br>
1910
   * Unicode identifier extender =
1911
   *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Mn]|[Mc]|[Nd]|[Pc]|[Cf]|
1912
   *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
1913
   *
1914
   * @param ch character to test
1915
   * @return true if ch can follow the first letter in a Unicode identifier
1916
   * @see #isIdentifierIgnorable(char)
1917
   * @see #isJavaIdentifierPart(char)
1918
   * @see #isLetterOrDigit(char)
1919
   * @see #isUnicodeIdentifierStart(char)
1920
   * @since 1.1
1921
   */
1922
  public static boolean isUnicodeIdentifierPart(char ch)
1923
  {
1924
    int category = getType(ch);
1925
    return ((1 << category)
1926
            & ((1 << UPPERCASE_LETTER)
1927
               | (1 << LOWERCASE_LETTER)
1928
               | (1 << TITLECASE_LETTER)
1929
               | (1 << MODIFIER_LETTER)
1930
               | (1 << OTHER_LETTER)
1931
               | (1 << NON_SPACING_MARK)
1932
               | (1 << COMBINING_SPACING_MARK)
1933
               | (1 << DECIMAL_DIGIT_NUMBER)
1934
               | (1 << LETTER_NUMBER)
1935
               | (1 << CONNECTOR_PUNCTUATION)
1936
               | (1 << FORMAT))) != 0
1937
      || (category == CONTROL && isIdentifierIgnorable(ch));
1938
  }
1939
 
1940
  /**
1941
   * Determines if a character is ignorable in a Unicode identifier. This
1942
   * includes the non-whitespace ISO control characters (<code>'\u0000'</code>
1943
   * through <code>'\u0008'</code>, <code>'\u000E'</code> through
1944
   * <code>'\u001B'</code>, and <code>'\u007F'</code> through
1945
   * <code>'\u009F'</code>), and FORMAT characters.
1946
   * <br>
1947
   * Unicode identifier ignorable = [Cf]|U+0000-U+0008|U+000E-U+001B
1948
   *    |U+007F-U+009F
1949
   *
1950
   * @param ch character to test
1951
   * @return true if ch is ignorable in a Unicode or Java identifier
1952
   * @see #isJavaIdentifierPart(char)
1953
   * @see #isUnicodeIdentifierPart(char)
1954
   * @since 1.1
1955
   */
1956
  public static boolean isIdentifierIgnorable(char ch)
1957
  {
1958
    return (ch <= '\u009F' && (ch < '\t' || ch >= '\u007F'
1959
                               || (ch <= '\u001B' && ch >= '\u000E')))
1960
      || getType(ch) == FORMAT;
1961
  }
1962
 
1963
  /**
1964
   * Converts a Unicode character into its lowercase equivalent mapping.
1965
   * If a mapping does not exist, then the character passed is returned.
1966
   * Note that isLowerCase(toLowerCase(ch)) does not always return true.
1967
   *
1968
   * @param ch character to convert to lowercase
1969
   * @return lowercase mapping of ch, or ch if lowercase mapping does
1970
   *         not exist
1971
   * @see #isLowerCase(char)
1972
   * @see #isUpperCase(char)
1973
   * @see #toTitleCase(char)
1974
   * @see #toUpperCase(char)
1975
   */
1976
  public static char toLowerCase(char ch)
1977
  {
1978
    // Signedness doesn't matter, as result is cast back to char.
1979
    return (char) (ch + lower[readChar(ch) >> 7]);
1980
  }
1981
 
1982
  /**
1983
   * Converts a Unicode character into its uppercase equivalent mapping.
1984
   * If a mapping does not exist, then the character passed is returned.
1985
   * Note that isUpperCase(toUpperCase(ch)) does not always return true.
1986
   *
1987
   * @param ch character to convert to uppercase
1988
   * @return uppercase mapping of ch, or ch if uppercase mapping does
1989
   *         not exist
1990
   * @see #isLowerCase(char)
1991
   * @see #isUpperCase(char)
1992
   * @see #toLowerCase(char)
1993
   * @see #toTitleCase(char)
1994
   */
1995
  public static char toUpperCase(char ch)
1996
  {
1997
    // Signedness doesn't matter, as result is cast back to char.
1998
    return (char) (ch + upper[readChar(ch) >> 7]);
1999
  }
2000
 
2001
  /**
2002
   * Converts a Unicode character into its titlecase equivalent mapping.
2003
   * If a mapping does not exist, then the character passed is returned.
2004
   * Note that isTitleCase(toTitleCase(ch)) does not always return true.
2005
   *
2006
   * @param ch character to convert to titlecase
2007
   * @return titlecase mapping of ch, or ch if titlecase mapping does
2008
   *         not exist
2009
   * @see #isTitleCase(char)
2010
   * @see #toLowerCase(char)
2011
   * @see #toUpperCase(char)
2012
   */
2013
  public static char toTitleCase(char ch)
2014
  {
2015
    // As title is short, it doesn't hurt to exhaustively iterate over it.
2016
    for (int i = title.length - 2; i >= 0; i -= 2)
2017
      if (title[i] == ch)
2018
        return title[i + 1];
2019
    return toUpperCase(ch);
2020
  }
2021
 
2022
  /**
2023
   * Converts a character into a digit of the specified radix. If the radix
2024
   * exceeds MIN_RADIX or MAX_RADIX, or if the result of getNumericValue(ch)
2025
   * exceeds the radix, or if ch is not a decimal digit or in the case
2026
   * insensitive set of 'a'-'z', the result is -1.
2027
   * <br>
2028
   * character argument boundary = [Nd]|U+0041-U+005A|U+0061-U+007A
2029
   *    |U+FF21-U+FF3A|U+FF41-U+FF5A
2030
   *
2031
   * @param ch character to convert into a digit
2032
   * @param radix radix in which ch is a digit
2033
   * @return digit which ch represents in radix, or -1 not a valid digit
2034
   * @see #MIN_RADIX
2035
   * @see #MAX_RADIX
2036
   * @see #forDigit(int, int)
2037
   * @see #isDigit(char)
2038
   * @see #getNumericValue(char)
2039
   */
2040
  public static int digit(char ch, int radix)
2041
  {
2042
    if (radix < MIN_RADIX || radix > MAX_RADIX)
2043
      return -1;
2044
    char attr = readChar(ch);
2045
    if (((1 << (attr & TYPE_MASK))
2046
         & ((1 << UPPERCASE_LETTER)
2047
            | (1 << LOWERCASE_LETTER)
2048
            | (1 << DECIMAL_DIGIT_NUMBER))) != 0)
2049
      {
2050
        // Signedness doesn't matter; 0xffff vs. -1 are both rejected.
2051
        int digit = numValue[attr >> 7];
2052
        return (digit < radix) ? digit : -1;
2053
      }
2054
    return -1;
2055
  }
2056
 
2057
  /**
2058
   * Returns the Unicode numeric value property of a character. For example,
2059
   * <code>'\\u216C'</code> (the Roman numeral fifty) returns 50.
2060
   *
2061
   * <p>This method also returns values for the letters A through Z, (not
2062
   * specified by Unicode), in these ranges: <code>'\u0041'</code>
2063
   * through <code>'\u005A'</code> (uppercase); <code>'\u0061'</code>
2064
   * through <code>'\u007A'</code> (lowercase); and <code>'\uFF21'</code>
2065
   * through <code>'\uFF3A'</code>, <code>'\uFF41'</code> through
2066
   * <code>'\uFF5A'</code> (full width variants).
2067
   *
2068
   * <p>If the character lacks a numeric value property, -1 is returned.
2069
   * If the character has a numeric value property which is not representable
2070
   * as a nonnegative integer, such as a fraction, -2 is returned.
2071
   *
2072
   * character argument boundary = [Nd]|[Nl]|[No]|U+0041-U+005A|U+0061-U+007A
2073
   *    |U+FF21-U+FF3A|U+FF41-U+FF5A
2074
   *
2075
   * @param ch character from which the numeric value property will
2076
   *        be retrieved
2077
   * @return the numeric value property of ch, or -1 if it does not exist, or
2078
   *         -2 if it is not representable as a nonnegative integer
2079
   * @see #forDigit(int, int)
2080
   * @see #digit(char, int)
2081
   * @see #isDigit(char)
2082
   * @since 1.1
2083
   */
2084
  public static int getNumericValue(char ch)
2085
  {
2086
    // Treat numValue as signed.
2087
    return (short) numValue[readChar(ch) >> 7];
2088
  }
2089
 
2090
  /**
2091
   * Determines if a character is a ISO-LATIN-1 space. This is only the five
2092
   * characters <code>'\t'</code>, <code>'\n'</code>, <code>'\f'</code>,
2093
   * <code>'\r'</code>, and <code>' '</code>.
2094
   * <br>
2095
   * Java space = U+0020|U+0009|U+000A|U+000C|U+000D
2096
   *
2097
   * @param ch character to test
2098
   * @return true if ch is a space, else false
2099
   * @deprecated Replaced by {@link #isWhitespace(char)}
2100
   * @see #isSpaceChar(char)
2101
   * @see #isWhitespace(char)
2102
   */
2103
  public static boolean isSpace(char ch)
2104
  {
2105
    // Performing the subtraction up front alleviates need to compare longs.
2106
    return ch-- <= ' ' && ((1 << ch)
2107
                           & ((1 << (' ' - 1))
2108
                              | (1 << ('\t' - 1))
2109
                              | (1 << ('\n' - 1))
2110
                              | (1 << ('\r' - 1))
2111
                              | (1 << ('\f' - 1)))) != 0;
2112
  }
2113
 
2114
  /**
2115
   * Determines if a character is a Unicode space character. This includes
2116
   * SPACE_SEPARATOR, LINE_SEPARATOR, and PARAGRAPH_SEPARATOR.
2117
   * <br>
2118
   * Unicode space = [Zs]|[Zp]|[Zl]
2119
   *
2120
   * @param ch character to test
2121
   * @return true if ch is a Unicode space, else false
2122
   * @see #isWhitespace(char)
2123
   * @since 1.1
2124
   */
2125
  public static boolean isSpaceChar(char ch)
2126
  {
2127
    return ((1 << getType(ch))
2128
            & ((1 << SPACE_SEPARATOR)
2129
               | (1 << LINE_SEPARATOR)
2130
               | (1 << PARAGRAPH_SEPARATOR))) != 0;
2131
  }
2132
 
2133
  /**
2134
   * Determines if a character is Java whitespace. This includes Unicode
2135
   * space characters (SPACE_SEPARATOR, LINE_SEPARATOR, and
2136
   * PARAGRAPH_SEPARATOR) except the non-breaking spaces
2137
   * (<code>'\u00A0'</code>, <code>'\u2007'</code>, and <code>'\u202F'</code>);
2138
   * and these characters: <code>'\u0009'</code>, <code>'\u000A'</code>,
2139
   * <code>'\u000B'</code>, <code>'\u000C'</code>, <code>'\u000D'</code>,
2140
   * <code>'\u001C'</code>, <code>'\u001D'</code>, <code>'\u001E'</code>,
2141
   * and <code>'\u001F'</code>.
2142
   * <br>
2143
   * Java whitespace = ([Zs] not Nb)|[Zl]|[Zp]|U+0009-U+000D|U+001C-U+001F
2144
   *
2145
   * @param ch character to test
2146
   * @return true if ch is Java whitespace, else false
2147
   * @see #isSpaceChar(char)
2148
   * @since 1.1
2149
   */
2150
  public static boolean isWhitespace(char ch)
2151
  {
2152
    int attr = readChar(ch);
2153
    return ((((1 << (attr & TYPE_MASK))
2154
              & ((1 << SPACE_SEPARATOR)
2155
                 | (1 << LINE_SEPARATOR)
2156
                 | (1 << PARAGRAPH_SEPARATOR))) != 0)
2157
            && (attr & NO_BREAK_MASK) == 0)
2158
      || (ch <= '\u001F' && ((1 << ch)
2159
                             & ((1 << '\t')
2160
                                | (1 << '\n')
2161
                                | (1 << '\u000B')
2162
                                | (1 << '\u000C')
2163
                                | (1 << '\r')
2164
                                | (1 << '\u001C')
2165
                                | (1 << '\u001D')
2166
                                | (1 << '\u001E')
2167
                                | (1 << '\u001F'))) != 0);
2168
  }
2169
 
2170
  /**
2171
   * Determines if a character has the ISO Control property.
2172
   * <br>
2173
   * ISO Control = [Cc]
2174
   *
2175
   * @param ch character to test
2176
   * @return true if ch is an ISO Control character, else false
2177
   * @see #isSpaceChar(char)
2178
   * @see #isWhitespace(char)
2179
   * @since 1.1
2180
   */
2181
  public static boolean isISOControl(char ch)
2182
  {
2183
    return getType(ch) == CONTROL;
2184
  }
2185
 
2186
  /**
2187
   * Returns the Unicode general category property of a character.
2188
   *
2189
   * @param ch character from which the general category property will
2190
   *        be retrieved
2191
   * @return the character category property of ch as an integer
2192
   * @see #UNASSIGNED
2193
   * @see #UPPERCASE_LETTER
2194
   * @see #LOWERCASE_LETTER
2195
   * @see #TITLECASE_LETTER
2196
   * @see #MODIFIER_LETTER
2197
   * @see #OTHER_LETTER
2198
   * @see #NON_SPACING_MARK
2199
   * @see #ENCLOSING_MARK
2200
   * @see #COMBINING_SPACING_MARK
2201
   * @see #DECIMAL_DIGIT_NUMBER
2202
   * @see #LETTER_NUMBER
2203
   * @see #OTHER_NUMBER
2204
   * @see #SPACE_SEPARATOR
2205
   * @see #LINE_SEPARATOR
2206
   * @see #PARAGRAPH_SEPARATOR
2207
   * @see #CONTROL
2208
   * @see #FORMAT
2209
   * @see #PRIVATE_USE
2210
   * @see #SURROGATE
2211
   * @see #DASH_PUNCTUATION
2212
   * @see #START_PUNCTUATION
2213
   * @see #END_PUNCTUATION
2214
   * @see #CONNECTOR_PUNCTUATION
2215
   * @see #OTHER_PUNCTUATION
2216
   * @see #MATH_SYMBOL
2217
   * @see #CURRENCY_SYMBOL
2218
   * @see #MODIFIER_SYMBOL
2219
   * @see #INITIAL_QUOTE_PUNCTUATION
2220
   * @see #FINAL_QUOTE_PUNCTUATION
2221
   * @since 1.1
2222
   */
2223
  public static int getType(char ch)
2224
  {
2225
    return readChar(ch) & TYPE_MASK;
2226
  }
2227
 
2228
  /**
2229
   * Converts a digit into a character which represents that digit
2230
   * in a specified radix. If the radix exceeds MIN_RADIX or MAX_RADIX,
2231
   * or the digit exceeds the radix, then the null character <code>'\0'</code>
2232
   * is returned.  Otherwise the return value is in '0'-'9' and 'a'-'z'.
2233
   * <br>
2234
   * return value boundary = U+0030-U+0039|U+0061-U+007A
2235
   *
2236
   * @param digit digit to be converted into a character
2237
   * @param radix radix of digit
2238
   * @return character representing digit in radix, or '\0'
2239
   * @see #MIN_RADIX
2240
   * @see #MAX_RADIX
2241
   * @see #digit(char, int)
2242
   */
2243
  public static char forDigit(int digit, int radix)
2244
  {
2245
    if (radix < MIN_RADIX || radix > MAX_RADIX
2246
        || digit < 0 || digit >= radix)
2247
      return '\0';
2248
    return Number.digits[digit];
2249
  }
2250
 
2251
  /**
2252
   * Returns the Unicode directionality property of the character. This
2253
   * is used in the visual ordering of text.
2254
   *
2255
   * @param ch the character to look up
2256
   * @return the directionality constant, or DIRECTIONALITY_UNDEFINED
2257
   * @see #DIRECTIONALITY_UNDEFINED
2258
   * @see #DIRECTIONALITY_LEFT_TO_RIGHT
2259
   * @see #DIRECTIONALITY_RIGHT_TO_LEFT
2260
   * @see #DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
2261
   * @see #DIRECTIONALITY_EUROPEAN_NUMBER
2262
   * @see #DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
2263
   * @see #DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
2264
   * @see #DIRECTIONALITY_ARABIC_NUMBER
2265
   * @see #DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
2266
   * @see #DIRECTIONALITY_NONSPACING_MARK
2267
   * @see #DIRECTIONALITY_BOUNDARY_NEUTRAL
2268
   * @see #DIRECTIONALITY_PARAGRAPH_SEPARATOR
2269
   * @see #DIRECTIONALITY_SEGMENT_SEPARATOR
2270
   * @see #DIRECTIONALITY_WHITESPACE
2271
   * @see #DIRECTIONALITY_OTHER_NEUTRALS
2272
   * @see #DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
2273
   * @see #DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
2274
   * @see #DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
2275
   * @see #DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
2276
   * @see #DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
2277
   * @since 1.4
2278
   */
2279
  public static byte getDirectionality(char ch)
2280
  {
2281
    // The result will correctly be signed.
2282
    return (byte) (direction[readChar(ch) >> 7] >> 2);
2283
  }
2284
 
2285
  /**
2286
   * Determines whether the character is mirrored according to Unicode. For
2287
   * example, <code>\u0028</code> (LEFT PARENTHESIS) appears as '(' in
2288
   * left-to-right text, but ')' in right-to-left text.
2289
   *
2290
   * @param ch the character to look up
2291
   * @return true if the character is mirrored
2292
   * @since 1.4
2293
   */
2294
  public static boolean isMirrored(char ch)
2295
  {
2296
    return (readChar(ch) & MIRROR_MASK) != 0;
2297
  }
2298
 
2299
  /**
2300
   * Compares another Character to this Character, numerically.
2301
   *
2302
   * @param anotherCharacter Character to compare with this Character
2303
   * @return a negative integer if this Character is less than
2304
   *         anotherCharacter, zero if this Character is equal, and
2305
   *         a positive integer if this Character is greater
2306
   * @throws NullPointerException if anotherCharacter is null
2307
   * @since 1.2
2308
   */
2309
  public int compareTo(Character anotherCharacter)
2310
  {
2311
    return value - anotherCharacter.value;
2312
  }
2313
 
2314
  /**
2315
   * Compares an object to this Character.  Assuming the object is a
2316
   * Character object, this method performs the same comparison as
2317
   * compareTo(Character).
2318
   *
2319
   * @param o object to compare
2320
   * @return the comparison value
2321
   * @throws ClassCastException if o is not a Character object
2322
   * @throws NullPointerException if o is null
2323
   * @see #compareTo(Character)
2324
   * @since 1.2
2325
   */
2326
  public int compareTo(Object o)
2327
  {
2328
    return compareTo((Character) o);
2329
  }
2330
 
2331
  /**
2332
   * Returns an <code>Character</code> object wrapping the value.
2333
   * In contrast to the <code>Character</code> constructor, this method
2334
   * will cache some values.  It is used by boxing conversion.
2335
   *
2336
   * @param val the value to wrap
2337
   * @return the <code>Character</code>
2338
   *
2339
   * @since 1.5
2340
   */
2341
  public static Character valueOf(char val)
2342
  {
2343
    if (val > MAX_CACHE)
2344
      return new Character(val);
2345
    synchronized (charCache)
2346
      {
2347
    if (charCache[val - MIN_VALUE] == null)
2348
      charCache[val - MIN_VALUE] = new Character(val);
2349
    return charCache[val - MIN_VALUE];
2350
      }
2351
  }
2352
 
2353
  /**
2354
   * Reverse the bytes in val.
2355
   * @since 1.5
2356
   */
2357
  public static char reverseBytes(char val)
2358
  {
2359
    return (char) (((val >> 8) & 0xff) | ((val << 8) & 0xff00));
2360
  }
2361
 
2362
  /**
2363
   * Converts a unicode code point to a UTF-16 representation of that
2364
   * code point.
2365
   *
2366
   * @param codePoint the unicode code point
2367
   *
2368
   * @return the UTF-16 representation of that code point
2369
   *
2370
   * @throws IllegalArgumentException if the code point is not a valid
2371
   *         unicode code point
2372
   *
2373
   * @since 1.5
2374
   */
2375
  public static char[] toChars(int codePoint)
2376
  {
2377
    char[] result = new char[charCount(codePoint)];
2378
    int ignore = toChars(codePoint, result, 0);
2379
    return result;
2380
  }
2381
 
2382
  /**
2383
   * Converts a unicode code point to its UTF-16 representation.
2384
   *
2385
   * @param codePoint the unicode code point
2386
   * @param dst the target char array
2387
   * @param dstIndex the start index for the target
2388
   *
2389
   * @return number of characters written to <code>dst</code>
2390
   *
2391
   * @throws IllegalArgumentException if <code>codePoint</code> is not a
2392
   *         valid unicode code point
2393
   * @throws NullPointerException if <code>dst</code> is <code>null</code>
2394
   * @throws IndexOutOfBoundsException if <code>dstIndex</code> is not valid
2395
   *         in <code>dst</code> or if the UTF-16 representation does not
2396
   *         fit into <code>dst</code>
2397
   *
2398
   * @since 1.5
2399
   */
2400
  public static int toChars(int codePoint, char[] dst, int dstIndex)
2401
  {
2402
    if (!isValidCodePoint(codePoint))
2403
      {
2404
        throw new IllegalArgumentException("not a valid code point: "
2405
                                           + codePoint);
2406
      }
2407
 
2408
    int result;
2409
    if (isSupplementaryCodePoint(codePoint))
2410
      {
2411
        // Write second char first to cause IndexOutOfBoundsException
2412
        // immediately.
2413
        dst[dstIndex + 1] = (char) ((codePoint & 0x3ff)
2414
                                    + (int) MIN_LOW_SURROGATE );
2415
        dst[dstIndex] = (char) ((codePoint >> 10) + (int) MIN_HIGH_SURROGATE);
2416
        result = 2;
2417
    }
2418
    else
2419
      {
2420
        dst[dstIndex] = (char) codePoint;
2421
        result = 1;
2422
      }
2423
    return result;
2424
  }
2425
 
2426
  /**
2427
   * Return number of 16-bit characters required to represent the given
2428
   * code point.
2429
   *
2430
   * @param codePoint a unicode code point
2431
   *
2432
   * @return 2 if codePoint >= 0x10000, 1 otherwise.
2433
   *
2434
   * @since 1.5
2435
   */
2436
  public static int charCount(int codePoint)
2437
  {
2438
    return
2439
      (codePoint >= MIN_SUPPLEMENTARY_CODE_POINT)
2440
      ? 2
2441
      : 1;
2442
  }
2443
 
2444
  /**
2445
   * Determines whether the specified code point is
2446
   * in the range 0x10000 .. 0x10FFFF, i.e. the character is within the Unicode
2447
   * supplementary character range.
2448
   *
2449
   * @param codePoint a Unicode code point
2450
   *
2451
   * @return <code>true</code> if code point is in supplementary range
2452
   *
2453
   * @since 1.5
2454
   */
2455
  public static boolean isSupplementaryCodePoint(int codePoint)
2456
  {
2457
    return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
2458
      && codePoint <= MAX_CODE_POINT;
2459
  }
2460
 
2461
  /**
2462
   * Determines whether the specified code point is
2463
   * in the range 0x0000 .. 0x10FFFF, i.e. it is a valid Unicode code point.
2464
   *
2465
   * @param codePoint a Unicode code point
2466
   *
2467
   * @return <code>true</code> if code point is valid
2468
   *
2469
   * @since 1.5
2470
   */
2471
  public static boolean isValidCodePoint(int codePoint)
2472
  {
2473
    return codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT;
2474
  }
2475
 
2476
  /**
2477
   * Return true if the given character is a high surrogate.
2478
   * @param ch the character
2479
   * @return true if the character is a high surrogate character
2480
   *
2481
   * @since 1.5
2482
   */
2483
  public static boolean isHighSurrogate(char ch)
2484
  {
2485
    return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE;
2486
  }
2487
 
2488
  /**
2489
   * Return true if the given character is a low surrogate.
2490
   * @param ch the character
2491
   * @return true if the character is a low surrogate character
2492
   *
2493
   * @since 1.5
2494
   */
2495
  public static boolean isLowSurrogate(char ch)
2496
  {
2497
    return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE;
2498
  }
2499
 
2500
  /**
2501
   * Return true if the given characters compose a surrogate pair.
2502
   * This is true if the first character is a high surrogate and the
2503
   * second character is a low surrogate.
2504
   * @param ch1 the first character
2505
   * @param ch2 the first character
2506
   * @return true if the characters compose a surrogate pair
2507
   *
2508
   * @since 1.5
2509
   */
2510
  public static boolean isSurrogatePair(char ch1, char ch2)
2511
  {
2512
    return isHighSurrogate(ch1) && isLowSurrogate(ch2);
2513
  }
2514
 
2515
  /**
2516
   * Given a valid surrogate pair, this returns the corresponding
2517
   * code point.
2518
   * @param high the high character of the pair
2519
   * @param low the low character of the pair
2520
   * @return the corresponding code point
2521
   *
2522
   * @since 1.5
2523
   */
2524
  public static int toCodePoint(char high, char low)
2525
  {
2526
    return ((high - MIN_HIGH_SURROGATE) << 10) + (low - MIN_LOW_SURROGATE);
2527
  }
2528
 
2529
  /**
2530
   * Get the code point at the specified index in the CharSequence.
2531
   * This is like CharSequence#charAt(int), but if the character is
2532
   * the start of a surrogate pair, and there is a following
2533
   * character, and this character completes the pair, then the
2534
   * corresponding supplementary code point is returned.  Otherwise,
2535
   * the character at the index is returned.
2536
   *
2537
   * @param sequence the CharSequence
2538
   * @param index the index of the codepoint to get, starting at 0
2539
   * @return the codepoint at the specified index
2540
   * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
2541
   * @since 1.5
2542
   */
2543
  public static int codePointAt(CharSequence sequence, int index)
2544
  {
2545
    int len = sequence.length();
2546
    if (index < 0 || index >= len)
2547
      throw new IndexOutOfBoundsException();
2548
    char high = sequence.charAt(index);
2549
    if (! isHighSurrogate(high) || ++index >= len)
2550
      return high;
2551
    char low = sequence.charAt(index);
2552
    if (! isLowSurrogate(low))
2553
      return high;
2554
    return toCodePoint(high, low);
2555
  }
2556
 
2557
  /**
2558
   * Get the code point at the specified index in the CharSequence.
2559
   * If the character is the start of a surrogate pair, and there is a
2560
   * following character, and this character completes the pair, then
2561
   * the corresponding supplementary code point is returned.
2562
   * Otherwise, the character at the index is returned.
2563
   *
2564
   * @param chars the character array in which to look
2565
   * @param index the index of the codepoint to get, starting at 0
2566
   * @return the codepoint at the specified index
2567
   * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
2568
   * @since 1.5
2569
   */
2570
  public static int codePointAt(char[] chars, int index)
2571
  {
2572
    return codePointAt(chars, index, chars.length);
2573
  }
2574
 
2575
  /**
2576
   * Get the code point at the specified index in the CharSequence.
2577
   * If the character is the start of a surrogate pair, and there is a
2578
   * following character within the specified range, and this
2579
   * character completes the pair, then the corresponding
2580
   * supplementary code point is returned.  Otherwise, the character
2581
   * at the index is returned.
2582
   *
2583
   * @param chars the character array in which to look
2584
   * @param index the index of the codepoint to get, starting at 0
2585
   * @param limit the limit past which characters should not be examined
2586
   * @return the codepoint at the specified index
2587
   * @throws IndexOutOfBoundsException if index is negative or &gt;=
2588
   * limit, or if limit is negative or &gt;= the length of the array
2589
   * @since 1.5
2590
   */
2591
  public static int codePointAt(char[] chars, int index, int limit)
2592
  {
2593
    if (index < 0 || index >= limit || limit < 0 || limit >= chars.length)
2594
      throw new IndexOutOfBoundsException();
2595
    char high = chars[index];
2596
    if (! isHighSurrogate(high) || ++index >= limit)
2597
      return high;
2598
    char low = chars[index];
2599
    if (! isLowSurrogate(low))
2600
      return high;
2601
    return toCodePoint(high, low);
2602
  }
2603
 
2604
  /**
2605
   * Get the code point before the specified index.  This is like
2606
   * #codePointAt(char[], int), but checks the characters at
2607
   * <code>index-1</code> and <code>index-2</code> to see if they form
2608
   * a supplementary code point.  If they do not, the character at
2609
   * <code>index-1</code> is returned.
2610
   *
2611
   * @param chars the character array
2612
   * @param index the index just past the codepoint to get, starting at 0
2613
   * @return the codepoint at the specified index
2614
   * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
2615
   * @since 1.5
2616
   */
2617
  public static int codePointBefore(char[] chars, int index)
2618
  {
2619
    return codePointBefore(chars, index, 1);
2620
  }
2621
 
2622
  /**
2623
   * Get the code point before the specified index.  This is like
2624
   * #codePointAt(char[], int), but checks the characters at
2625
   * <code>index-1</code> and <code>index-2</code> to see if they form
2626
   * a supplementary code point.  If they do not, the character at
2627
   * <code>index-1</code> is returned.  The start parameter is used to
2628
   * limit the range of the array which may be examined.
2629
   *
2630
   * @param chars the character array
2631
   * @param index the index just past the codepoint to get, starting at 0
2632
   * @param start the index before which characters should not be examined
2633
   * @return the codepoint at the specified index
2634
   * @throws IndexOutOfBoundsException if index is &gt; start or &gt;
2635
   * the length of the array, or if limit is negative or &gt;= the
2636
   * length of the array
2637
   * @since 1.5
2638
   */
2639
  public static int codePointBefore(char[] chars, int index, int start)
2640
  {
2641
    if (index < start || index > chars.length
2642
        || start < 0 || start >= chars.length)
2643
      throw new IndexOutOfBoundsException();
2644
    --index;
2645
    char low = chars[index];
2646
    if (! isLowSurrogate(low) || --index < start)
2647
      return low;
2648
    char high = chars[index];
2649
    if (! isHighSurrogate(high))
2650
      return low;
2651
    return toCodePoint(high, low);
2652
  }
2653
 
2654
  /**
2655
   * Get the code point before the specified index.  This is like
2656
   * #codePointAt(CharSequence, int), but checks the characters at
2657
   * <code>index-1</code> and <code>index-2</code> to see if they form
2658
   * a supplementary code point.  If they do not, the character at
2659
   * <code>index-1</code> is returned.
2660
   *
2661
   * @param sequence the CharSequence
2662
   * @param index the index just past the codepoint to get, starting at 0
2663
   * @return the codepoint at the specified index
2664
   * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
2665
   * @since 1.5
2666
   */
2667
  public static int codePointBefore(CharSequence sequence, int index)
2668
  {
2669
    int len = sequence.length();
2670
    if (index < 1 || index > len)
2671
      throw new IndexOutOfBoundsException();
2672
    --index;
2673
    char low = sequence.charAt(index);
2674
    if (! isLowSurrogate(low) || --index < 0)
2675
      return low;
2676
    char high = sequence.charAt(index);
2677
    if (! isHighSurrogate(high))
2678
      return low;
2679
    return toCodePoint(high, low);
2680
  }
2681
} // class Character

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.