OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libjava/] [classpath/] [java/] [lang/] [Character.java] - Blame information for rev 791

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 771 jeremybenn
/* java.lang.Character -- Wrapper class for char, and Unicode subsets
2
   Copyright (C) 1998, 1999, 2001, 2002, 2004, 2005 Free Software Foundation, Inc.
3
 
4
This file is part of GNU Classpath.
5
 
6
GNU Classpath is free software; you can redistribute it and/or modify
7
it under the terms of the GNU General Public License as published by
8
the Free Software Foundation; either version 2, or (at your option)
9
any later version.
10
 
11
GNU Classpath is distributed in the hope that it will be useful, but
12
WITHOUT ANY WARRANTY; without even the implied warranty of
13
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
General Public License for more details.
15
 
16
You should have received a copy of the GNU General Public License
17
along with GNU Classpath; see the file COPYING.  If not, write to the
18
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
02110-1301 USA.
20
 
21
Linking this library statically or dynamically with other modules is
22
making a combined work based on this library.  Thus, the terms and
23
conditions of the GNU General Public License cover the whole
24
combination.
25
 
26
As a special exception, the copyright holders of this library give you
27
permission to link this library with independent modules to produce an
28
executable, regardless of the license terms of these independent
29
modules, and to copy and distribute the resulting executable under
30
terms of your choice, provided that you also meet, for each linked
31
independent module, the terms and conditions of the license of that
32
module.  An independent module is a module which is not derived from
33
or based on this library.  If you modify this library, you may extend
34
this exception to your version of the library, but you are not
35
obligated to do so.  If you do not wish to do so, delete this
36
exception statement from your version. */
37
 
38
 
39
package java.lang;
40
 
41
import gnu.java.lang.CharData;
42
 
43
import java.io.Serializable;
44
import java.text.Collator;
45
import java.util.Locale;
46
 
47
/**
48
 * Wrapper class for the primitive char data type.  In addition, this class
49
 * allows one to retrieve property information and perform transformations
50
 * on the defined characters in the Unicode Standard, Version 4.0.0.
51
 * java.lang.Character is designed to be very dynamic, and as such, it
52
 * retrieves information on the Unicode character set from a separate
53
 * database, gnu.java.lang.CharData, which can be easily upgraded.
54
 *
55
 * <p>For predicates, boundaries are used to describe
56
 * the set of characters for which the method will return true.
57
 * This syntax uses fairly normal regular expression notation.
58
 * See 5.13 of the Unicode Standard, Version 4.0, for the
59
 * boundary specification.
60
 *
61
 * <p>See <a href="http://www.unicode.org">http://www.unicode.org</a>
62
 * for more information on the Unicode Standard.
63
 *
64
 * @author Tom Tromey (tromey@cygnus.com)
65
 * @author Paul N. Fisher
66
 * @author Jochen Hoenicke
67
 * @author Eric Blake (ebb9@email.byu.edu)
68
 * @author Andrew John Hughes (gnu_andrew@member.fsf.org)
69
 * @see CharData
70
 * @since 1.0
71
 * @status partly updated to 1.5; some things still missing
72
 */
73
public final class Character implements Serializable, Comparable<Character>
74
{
75
  /**
76
   * A subset of Unicode blocks.
77
   *
78
   * @author Paul N. Fisher
79
   * @author Eric Blake (ebb9@email.byu.edu)
80
   * @since 1.2
81
   */
82
  public static class Subset
83
  {
84
    /** The name of the subset. */
85
    private final String name;
86
 
87
    /**
88
     * Construct a new subset of characters.
89
     *
90
     * @param name the name of the subset
91
     * @throws NullPointerException if name is null
92
     */
93
    protected Subset(String name)
94
    {
95
      // Note that name.toString() is name, unless name was null.
96
      this.name = name.toString();
97
    }
98
 
99
    /**
100
     * Compares two Subsets for equality. This is <code>final</code>, and
101
     * restricts the comparison on the <code>==</code> operator, so it returns
102
     * true only for the same object.
103
     *
104
     * @param o the object to compare
105
     * @return true if o is this
106
     */
107
    public final boolean equals(Object o)
108
    {
109
      return o == this;
110
    }
111
 
112
    /**
113
     * Makes the original hashCode of Object final, to be consistent with
114
     * equals.
115
     *
116
     * @return the hash code for this object
117
     */
118
    public final int hashCode()
119
    {
120
      return super.hashCode();
121
    }
122
 
123
    /**
124
     * Returns the name of the subset.
125
     *
126
     * @return the name
127
     */
128
    public final String toString()
129
    {
130
      return name;
131
    }
132
  } // class Subset
133
 
134
  /**
135
   * A family of character subsets in the Unicode specification. A character
136
   * is in at most one of these blocks.
137
   *
138
   * This inner class was generated automatically from
139
   * <code>doc/unicode/Blocks-4.0.0.txt</code>, by some perl scripts.
140
   * This Unicode definition file can be found on the
141
   * <a href="http://www.unicode.org">http://www.unicode.org</a> website.
142
   * JDK 1.5 uses Unicode version 4.0.0.
143
   *
144
   * @author scripts/unicode-blocks.pl (written by Eric Blake)
145
   * @since 1.2
146
   */
147
  public static final class UnicodeBlock extends Subset
148
  {
149
    /** The start of the subset. */
150
    private final int start;
151
 
152
    /** The end of the subset. */
153
    private final int end;
154
 
155
    /** The canonical name of the block according to the Unicode standard. */
156
    private final String canonicalName;
157
 
158
    /** Enumeration for the <code>forName()</code> method */
159
    private enum NameType { CANONICAL, NO_SPACES, CONSTANT; }
160
 
161
    /**
162
     * Constructor for strictly defined blocks.
163
     *
164
     * @param start the start character of the range
165
     * @param end the end character of the range
166
     * @param name the block name
167
     * @param canonicalName the name of the block as defined in the Unicode
168
     *        standard.
169
     */
170
    private UnicodeBlock(int start, int end, String name,
171
                         String canonicalName)
172
    {
173
      super(name);
174
      this.start = start;
175
      this.end = end;
176
      this.canonicalName = canonicalName;
177
    }
178
 
179
    /**
180
     * Returns the Unicode character block which a character belongs to.
181
     * <strong>Note</strong>: This method does not support the use of
182
     * supplementary characters.  For such support, <code>of(int)</code>
183
     * should be used instead.
184
     *
185
     * @param ch the character to look up
186
     * @return the set it belongs to, or null if it is not in one
187
     */
188
    public static UnicodeBlock of(char ch)
189
    {
190
      return of((int) ch);
191
    }
192
 
193
    /**
194
     * Returns the Unicode character block which a code point belongs to.
195
     *
196
     * @param codePoint the character to look up
197
     * @return the set it belongs to, or null if it is not in one.
198
     * @throws IllegalArgumentException if the specified code point is
199
     *         invalid.
200
     * @since 1.5
201
     */
202
    public static UnicodeBlock of(int codePoint)
203
    {
204
      if (codePoint > MAX_CODE_POINT)
205
        throw new IllegalArgumentException("The supplied integer value is " +
206
                                           "too large to be a codepoint.");
207
      // Simple binary search for the correct block.
208
      int low = 0;
209
      int hi = sets.length - 1;
210
      while (low <= hi)
211
        {
212
          int mid = (low + hi) >> 1;
213
          UnicodeBlock b = sets[mid];
214
          if (codePoint < b.start)
215
            hi = mid - 1;
216
          else if (codePoint > b.end)
217
            low = mid + 1;
218
          else
219
            return b;
220
        }
221
      return null;
222
    }
223
 
224
    /**
225
     * <p>
226
     * Returns the <code>UnicodeBlock</code> with the given name, as defined
227
     * by the Unicode standard.  The version of Unicode in use is defined by
228
     * the <code>Character</code> class, and the names are given in the
229
     * <code>Blocks-<version>.txt</code> file corresponding to that version.
230
     * The name may be specified in one of three ways:
231
     * </p>
232
     * <ol>
233
     * <li>The canonical, human-readable name used by the Unicode standard.
234
     * This is the name with all spaces and hyphens retained.  For example,
235
     * `Basic Latin' retrieves the block, UnicodeBlock.BASIC_LATIN.</li>
236
     * <li>The canonical name with all spaces removed e.g. `BasicLatin'.</li>
237
     * <li>The name used for the constants specified by this class, which
238
     * is the canonical name with all spaces and hyphens replaced with
239
     * underscores e.g. `BASIC_LATIN'</li>
240
     * </ol>
241
     * <p>
242
     * The names are compared case-insensitively using the case comparison
243
     * associated with the U.S. English locale.  The method recognises the
244
     * previous names used for blocks as well as the current ones.  At
245
     * present, this simply means that the deprecated `SURROGATES_AREA'
246
     * will be recognised by this method (the <code>of()</code> methods
247
     * only return one of the three new surrogate blocks).
248
     * </p>
249
     *
250
     * @param blockName the name of the block to look up.
251
     * @return the specified block.
252
     * @throws NullPointerException if the <code>blockName</code> is
253
     *         <code>null</code>.
254
     * @throws IllegalArgumentException if the name does not match any Unicode
255
     *         block.
256
     * @since 1.5
257
     */
258
    public static final UnicodeBlock forName(String blockName)
259
    {
260
      NameType type;
261
      if (blockName.indexOf(' ') != -1)
262
        type = NameType.CANONICAL;
263
      else if (blockName.indexOf('_') != -1)
264
        type = NameType.CONSTANT;
265
      else
266
        type = NameType.NO_SPACES;
267
      Collator usCollator = Collator.getInstance(Locale.US);
268
      usCollator.setStrength(Collator.PRIMARY);
269
      /* Special case for deprecated blocks not in sets */
270
      switch (type)
271
      {
272
        case CANONICAL:
273
          if (usCollator.compare(blockName, "Surrogates Area") == 0)
274
            return SURROGATES_AREA;
275
          break;
276
        case NO_SPACES:
277
          if (usCollator.compare(blockName, "SurrogatesArea") == 0)
278
            return SURROGATES_AREA;
279
          break;
280
        case CONSTANT:
281
          if (usCollator.compare(blockName, "SURROGATES_AREA") == 0)
282
            return SURROGATES_AREA;
283
          break;
284
      }
285
      /* Other cases */
286
      switch (type)
287
      {
288
        case CANONICAL:
289
          for (UnicodeBlock block : sets)
290
            if (usCollator.compare(blockName, block.canonicalName) == 0)
291
              return block;
292
          break;
293
        case NO_SPACES:
294
          for (UnicodeBlock block : sets)
295
            {
296
              String nsName = block.canonicalName.replaceAll(" ","");
297
              if (usCollator.compare(blockName, nsName) == 0)
298
                return block;
299
            }
300
          break;
301
        case CONSTANT:
302
          for (UnicodeBlock block : sets)
303
            if (usCollator.compare(blockName, block.toString()) == 0)
304
              return block;
305
          break;
306
      }
307
      throw new IllegalArgumentException("No Unicode block found for " +
308
                                         blockName + ".");
309
    }
310
 
311
    /**
312
     * Basic Latin.
313
     * 0x0000 - 0x007F.
314
     */
315
    public static final UnicodeBlock BASIC_LATIN
316
      = new UnicodeBlock(0x0000, 0x007F,
317
                         "BASIC_LATIN",
318
                         "Basic Latin");
319
 
320
    /**
321
     * Latin-1 Supplement.
322
     * 0x0080 - 0x00FF.
323
     */
324
    public static final UnicodeBlock LATIN_1_SUPPLEMENT
325
      = new UnicodeBlock(0x0080, 0x00FF,
326
                         "LATIN_1_SUPPLEMENT",
327
                         "Latin-1 Supplement");
328
 
329
    /**
330
     * Latin Extended-A.
331
     * 0x0100 - 0x017F.
332
     */
333
    public static final UnicodeBlock LATIN_EXTENDED_A
334
      = new UnicodeBlock(0x0100, 0x017F,
335
                         "LATIN_EXTENDED_A",
336
                         "Latin Extended-A");
337
 
338
    /**
339
     * Latin Extended-B.
340
     * 0x0180 - 0x024F.
341
     */
342
    public static final UnicodeBlock LATIN_EXTENDED_B
343
      = new UnicodeBlock(0x0180, 0x024F,
344
                         "LATIN_EXTENDED_B",
345
                         "Latin Extended-B");
346
 
347
    /**
348
     * IPA Extensions.
349
     * 0x0250 - 0x02AF.
350
     */
351
    public static final UnicodeBlock IPA_EXTENSIONS
352
      = new UnicodeBlock(0x0250, 0x02AF,
353
                         "IPA_EXTENSIONS",
354
                         "IPA Extensions");
355
 
356
    /**
357
     * Spacing Modifier Letters.
358
     * 0x02B0 - 0x02FF.
359
     */
360
    public static final UnicodeBlock SPACING_MODIFIER_LETTERS
361
      = new UnicodeBlock(0x02B0, 0x02FF,
362
                         "SPACING_MODIFIER_LETTERS",
363
                         "Spacing Modifier Letters");
364
 
365
    /**
366
     * Combining Diacritical Marks.
367
     * 0x0300 - 0x036F.
368
     */
369
    public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS
370
      = new UnicodeBlock(0x0300, 0x036F,
371
                         "COMBINING_DIACRITICAL_MARKS",
372
                         "Combining Diacritical Marks");
373
 
374
    /**
375
     * Greek.
376
     * 0x0370 - 0x03FF.
377
     */
378
    public static final UnicodeBlock GREEK
379
      = new UnicodeBlock(0x0370, 0x03FF,
380
                         "GREEK",
381
                         "Greek");
382
 
383
    /**
384
     * Cyrillic.
385
     * 0x0400 - 0x04FF.
386
     */
387
    public static final UnicodeBlock CYRILLIC
388
      = new UnicodeBlock(0x0400, 0x04FF,
389
                         "CYRILLIC",
390
                         "Cyrillic");
391
 
392
    /**
393
     * Cyrillic Supplementary.
394
     * 0x0500 - 0x052F.
395
     * @since 1.5
396
     */
397
    public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY
398
      = new UnicodeBlock(0x0500, 0x052F,
399
                         "CYRILLIC_SUPPLEMENTARY",
400
                         "Cyrillic Supplementary");
401
 
402
    /**
403
     * Armenian.
404
     * 0x0530 - 0x058F.
405
     */
406
    public static final UnicodeBlock ARMENIAN
407
      = new UnicodeBlock(0x0530, 0x058F,
408
                         "ARMENIAN",
409
                         "Armenian");
410
 
411
    /**
412
     * Hebrew.
413
     * 0x0590 - 0x05FF.
414
     */
415
    public static final UnicodeBlock HEBREW
416
      = new UnicodeBlock(0x0590, 0x05FF,
417
                         "HEBREW",
418
                         "Hebrew");
419
 
420
    /**
421
     * Arabic.
422
     * 0x0600 - 0x06FF.
423
     */
424
    public static final UnicodeBlock ARABIC
425
      = new UnicodeBlock(0x0600, 0x06FF,
426
                         "ARABIC",
427
                         "Arabic");
428
 
429
    /**
430
     * Syriac.
431
     * 0x0700 - 0x074F.
432
     * @since 1.4
433
     */
434
    public static final UnicodeBlock SYRIAC
435
      = new UnicodeBlock(0x0700, 0x074F,
436
                         "SYRIAC",
437
                         "Syriac");
438
 
439
    /**
440
     * Thaana.
441
     * 0x0780 - 0x07BF.
442
     * @since 1.4
443
     */
444
    public static final UnicodeBlock THAANA
445
      = new UnicodeBlock(0x0780, 0x07BF,
446
                         "THAANA",
447
                         "Thaana");
448
 
449
    /**
450
     * Devanagari.
451
     * 0x0900 - 0x097F.
452
     */
453
    public static final UnicodeBlock DEVANAGARI
454
      = new UnicodeBlock(0x0900, 0x097F,
455
                         "DEVANAGARI",
456
                         "Devanagari");
457
 
458
    /**
459
     * Bengali.
460
     * 0x0980 - 0x09FF.
461
     */
462
    public static final UnicodeBlock BENGALI
463
      = new UnicodeBlock(0x0980, 0x09FF,
464
                         "BENGALI",
465
                         "Bengali");
466
 
467
    /**
468
     * Gurmukhi.
469
     * 0x0A00 - 0x0A7F.
470
     */
471
    public static final UnicodeBlock GURMUKHI
472
      = new UnicodeBlock(0x0A00, 0x0A7F,
473
                         "GURMUKHI",
474
                         "Gurmukhi");
475
 
476
    /**
477
     * Gujarati.
478
     * 0x0A80 - 0x0AFF.
479
     */
480
    public static final UnicodeBlock GUJARATI
481
      = new UnicodeBlock(0x0A80, 0x0AFF,
482
                         "GUJARATI",
483
                         "Gujarati");
484
 
485
    /**
486
     * Oriya.
487
     * 0x0B00 - 0x0B7F.
488
     */
489
    public static final UnicodeBlock ORIYA
490
      = new UnicodeBlock(0x0B00, 0x0B7F,
491
                         "ORIYA",
492
                         "Oriya");
493
 
494
    /**
495
     * Tamil.
496
     * 0x0B80 - 0x0BFF.
497
     */
498
    public static final UnicodeBlock TAMIL
499
      = new UnicodeBlock(0x0B80, 0x0BFF,
500
                         "TAMIL",
501
                         "Tamil");
502
 
503
    /**
504
     * Telugu.
505
     * 0x0C00 - 0x0C7F.
506
     */
507
    public static final UnicodeBlock TELUGU
508
      = new UnicodeBlock(0x0C00, 0x0C7F,
509
                         "TELUGU",
510
                         "Telugu");
511
 
512
    /**
513
     * Kannada.
514
     * 0x0C80 - 0x0CFF.
515
     */
516
    public static final UnicodeBlock KANNADA
517
      = new UnicodeBlock(0x0C80, 0x0CFF,
518
                         "KANNADA",
519
                         "Kannada");
520
 
521
    /**
522
     * Malayalam.
523
     * 0x0D00 - 0x0D7F.
524
     */
525
    public static final UnicodeBlock MALAYALAM
526
      = new UnicodeBlock(0x0D00, 0x0D7F,
527
                         "MALAYALAM",
528
                         "Malayalam");
529
 
530
    /**
531
     * Sinhala.
532
     * 0x0D80 - 0x0DFF.
533
     * @since 1.4
534
     */
535
    public static final UnicodeBlock SINHALA
536
      = new UnicodeBlock(0x0D80, 0x0DFF,
537
                         "SINHALA",
538
                         "Sinhala");
539
 
540
    /**
541
     * Thai.
542
     * 0x0E00 - 0x0E7F.
543
     */
544
    public static final UnicodeBlock THAI
545
      = new UnicodeBlock(0x0E00, 0x0E7F,
546
                         "THAI",
547
                         "Thai");
548
 
549
    /**
550
     * Lao.
551
     * 0x0E80 - 0x0EFF.
552
     */
553
    public static final UnicodeBlock LAO
554
      = new UnicodeBlock(0x0E80, 0x0EFF,
555
                         "LAO",
556
                         "Lao");
557
 
558
    /**
559
     * Tibetan.
560
     * 0x0F00 - 0x0FFF.
561
     */
562
    public static final UnicodeBlock TIBETAN
563
      = new UnicodeBlock(0x0F00, 0x0FFF,
564
                         "TIBETAN",
565
                         "Tibetan");
566
 
567
    /**
568
     * Myanmar.
569
     * 0x1000 - 0x109F.
570
     * @since 1.4
571
     */
572
    public static final UnicodeBlock MYANMAR
573
      = new UnicodeBlock(0x1000, 0x109F,
574
                         "MYANMAR",
575
                         "Myanmar");
576
 
577
    /**
578
     * Georgian.
579
     * 0x10A0 - 0x10FF.
580
     */
581
    public static final UnicodeBlock GEORGIAN
582
      = new UnicodeBlock(0x10A0, 0x10FF,
583
                         "GEORGIAN",
584
                         "Georgian");
585
 
586
    /**
587
     * Hangul Jamo.
588
     * 0x1100 - 0x11FF.
589
     */
590
    public static final UnicodeBlock HANGUL_JAMO
591
      = new UnicodeBlock(0x1100, 0x11FF,
592
                         "HANGUL_JAMO",
593
                         "Hangul Jamo");
594
 
595
    /**
596
     * Ethiopic.
597
     * 0x1200 - 0x137F.
598
     * @since 1.4
599
     */
600
    public static final UnicodeBlock ETHIOPIC
601
      = new UnicodeBlock(0x1200, 0x137F,
602
                         "ETHIOPIC",
603
                         "Ethiopic");
604
 
605
    /**
606
     * Cherokee.
607
     * 0x13A0 - 0x13FF.
608
     * @since 1.4
609
     */
610
    public static final UnicodeBlock CHEROKEE
611
      = new UnicodeBlock(0x13A0, 0x13FF,
612
                         "CHEROKEE",
613
                         "Cherokee");
614
 
615
    /**
616
     * Unified Canadian Aboriginal Syllabics.
617
     * 0x1400 - 0x167F.
618
     * @since 1.4
619
     */
620
    public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
621
      = new UnicodeBlock(0x1400, 0x167F,
622
                         "UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
623
                         "Unified Canadian Aboriginal Syllabics");
624
 
625
    /**
626
     * Ogham.
627
     * 0x1680 - 0x169F.
628
     * @since 1.4
629
     */
630
    public static final UnicodeBlock OGHAM
631
      = new UnicodeBlock(0x1680, 0x169F,
632
                         "OGHAM",
633
                         "Ogham");
634
 
635
    /**
636
     * Runic.
637
     * 0x16A0 - 0x16FF.
638
     * @since 1.4
639
     */
640
    public static final UnicodeBlock RUNIC
641
      = new UnicodeBlock(0x16A0, 0x16FF,
642
                         "RUNIC",
643
                         "Runic");
644
 
645
    /**
646
     * Tagalog.
647
     * 0x1700 - 0x171F.
648
     * @since 1.5
649
     */
650
    public static final UnicodeBlock TAGALOG
651
      = new UnicodeBlock(0x1700, 0x171F,
652
                         "TAGALOG",
653
                         "Tagalog");
654
 
655
    /**
656
     * Hanunoo.
657
     * 0x1720 - 0x173F.
658
     * @since 1.5
659
     */
660
    public static final UnicodeBlock HANUNOO
661
      = new UnicodeBlock(0x1720, 0x173F,
662
                         "HANUNOO",
663
                         "Hanunoo");
664
 
665
    /**
666
     * Buhid.
667
     * 0x1740 - 0x175F.
668
     * @since 1.5
669
     */
670
    public static final UnicodeBlock BUHID
671
      = new UnicodeBlock(0x1740, 0x175F,
672
                         "BUHID",
673
                         "Buhid");
674
 
675
    /**
676
     * Tagbanwa.
677
     * 0x1760 - 0x177F.
678
     * @since 1.5
679
     */
680
    public static final UnicodeBlock TAGBANWA
681
      = new UnicodeBlock(0x1760, 0x177F,
682
                         "TAGBANWA",
683
                         "Tagbanwa");
684
 
685
    /**
686
     * Khmer.
687
     * 0x1780 - 0x17FF.
688
     * @since 1.4
689
     */
690
    public static final UnicodeBlock KHMER
691
      = new UnicodeBlock(0x1780, 0x17FF,
692
                         "KHMER",
693
                         "Khmer");
694
 
695
    /**
696
     * Mongolian.
697
     * 0x1800 - 0x18AF.
698
     * @since 1.4
699
     */
700
    public static final UnicodeBlock MONGOLIAN
701
      = new UnicodeBlock(0x1800, 0x18AF,
702
                         "MONGOLIAN",
703
                         "Mongolian");
704
 
705
    /**
706
     * Limbu.
707
     * 0x1900 - 0x194F.
708
     * @since 1.5
709
     */
710
    public static final UnicodeBlock LIMBU
711
      = new UnicodeBlock(0x1900, 0x194F,
712
                         "LIMBU",
713
                         "Limbu");
714
 
715
    /**
716
     * Tai Le.
717
     * 0x1950 - 0x197F.
718
     * @since 1.5
719
     */
720
    public static final UnicodeBlock TAI_LE
721
      = new UnicodeBlock(0x1950, 0x197F,
722
                         "TAI_LE",
723
                         "Tai Le");
724
 
725
    /**
726
     * Khmer Symbols.
727
     * 0x19E0 - 0x19FF.
728
     * @since 1.5
729
     */
730
    public static final UnicodeBlock KHMER_SYMBOLS
731
      = new UnicodeBlock(0x19E0, 0x19FF,
732
                         "KHMER_SYMBOLS",
733
                         "Khmer Symbols");
734
 
735
    /**
736
     * Phonetic Extensions.
737
     * 0x1D00 - 0x1D7F.
738
     * @since 1.5
739
     */
740
    public static final UnicodeBlock PHONETIC_EXTENSIONS
741
      = new UnicodeBlock(0x1D00, 0x1D7F,
742
                         "PHONETIC_EXTENSIONS",
743
                         "Phonetic Extensions");
744
 
745
    /**
746
     * Latin Extended Additional.
747
     * 0x1E00 - 0x1EFF.
748
     */
749
    public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL
750
      = new UnicodeBlock(0x1E00, 0x1EFF,
751
                         "LATIN_EXTENDED_ADDITIONAL",
752
                         "Latin Extended Additional");
753
 
754
    /**
755
     * Greek Extended.
756
     * 0x1F00 - 0x1FFF.
757
     */
758
    public static final UnicodeBlock GREEK_EXTENDED
759
      = new UnicodeBlock(0x1F00, 0x1FFF,
760
                         "GREEK_EXTENDED",
761
                         "Greek Extended");
762
 
763
    /**
764
     * General Punctuation.
765
     * 0x2000 - 0x206F.
766
     */
767
    public static final UnicodeBlock GENERAL_PUNCTUATION
768
      = new UnicodeBlock(0x2000, 0x206F,
769
                         "GENERAL_PUNCTUATION",
770
                         "General Punctuation");
771
 
772
    /**
773
     * Superscripts and Subscripts.
774
     * 0x2070 - 0x209F.
775
     */
776
    public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS
777
      = new UnicodeBlock(0x2070, 0x209F,
778
                         "SUPERSCRIPTS_AND_SUBSCRIPTS",
779
                         "Superscripts and Subscripts");
780
 
781
    /**
782
     * Currency Symbols.
783
     * 0x20A0 - 0x20CF.
784
     */
785
    public static final UnicodeBlock CURRENCY_SYMBOLS
786
      = new UnicodeBlock(0x20A0, 0x20CF,
787
                         "CURRENCY_SYMBOLS",
788
                         "Currency Symbols");
789
 
790
    /**
791
     * Combining Marks for Symbols.
792
     * 0x20D0 - 0x20FF.
793
     */
794
    public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS
795
      = new UnicodeBlock(0x20D0, 0x20FF,
796
                         "COMBINING_MARKS_FOR_SYMBOLS",
797
                         "Combining Marks for Symbols");
798
 
799
    /**
800
     * Letterlike Symbols.
801
     * 0x2100 - 0x214F.
802
     */
803
    public static final UnicodeBlock LETTERLIKE_SYMBOLS
804
      = new UnicodeBlock(0x2100, 0x214F,
805
                         "LETTERLIKE_SYMBOLS",
806
                         "Letterlike Symbols");
807
 
808
    /**
809
     * Number Forms.
810
     * 0x2150 - 0x218F.
811
     */
812
    public static final UnicodeBlock NUMBER_FORMS
813
      = new UnicodeBlock(0x2150, 0x218F,
814
                         "NUMBER_FORMS",
815
                         "Number Forms");
816
 
817
    /**
818
     * Arrows.
819
     * 0x2190 - 0x21FF.
820
     */
821
    public static final UnicodeBlock ARROWS
822
      = new UnicodeBlock(0x2190, 0x21FF,
823
                         "ARROWS",
824
                         "Arrows");
825
 
826
    /**
827
     * Mathematical Operators.
828
     * 0x2200 - 0x22FF.
829
     */
830
    public static final UnicodeBlock MATHEMATICAL_OPERATORS
831
      = new UnicodeBlock(0x2200, 0x22FF,
832
                         "MATHEMATICAL_OPERATORS",
833
                         "Mathematical Operators");
834
 
835
    /**
836
     * Miscellaneous Technical.
837
     * 0x2300 - 0x23FF.
838
     */
839
    public static final UnicodeBlock MISCELLANEOUS_TECHNICAL
840
      = new UnicodeBlock(0x2300, 0x23FF,
841
                         "MISCELLANEOUS_TECHNICAL",
842
                         "Miscellaneous Technical");
843
 
844
    /**
845
     * Control Pictures.
846
     * 0x2400 - 0x243F.
847
     */
848
    public static final UnicodeBlock CONTROL_PICTURES
849
      = new UnicodeBlock(0x2400, 0x243F,
850
                         "CONTROL_PICTURES",
851
                         "Control Pictures");
852
 
853
    /**
854
     * Optical Character Recognition.
855
     * 0x2440 - 0x245F.
856
     */
857
    public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION
858
      = new UnicodeBlock(0x2440, 0x245F,
859
                         "OPTICAL_CHARACTER_RECOGNITION",
860
                         "Optical Character Recognition");
861
 
862
    /**
863
     * Enclosed Alphanumerics.
864
     * 0x2460 - 0x24FF.
865
     */
866
    public static final UnicodeBlock ENCLOSED_ALPHANUMERICS
867
      = new UnicodeBlock(0x2460, 0x24FF,
868
                         "ENCLOSED_ALPHANUMERICS",
869
                         "Enclosed Alphanumerics");
870
 
871
    /**
872
     * Box Drawing.
873
     * 0x2500 - 0x257F.
874
     */
875
    public static final UnicodeBlock BOX_DRAWING
876
      = new UnicodeBlock(0x2500, 0x257F,
877
                         "BOX_DRAWING",
878
                         "Box Drawing");
879
 
880
    /**
881
     * Block Elements.
882
     * 0x2580 - 0x259F.
883
     */
884
    public static final UnicodeBlock BLOCK_ELEMENTS
885
      = new UnicodeBlock(0x2580, 0x259F,
886
                         "BLOCK_ELEMENTS",
887
                         "Block Elements");
888
 
889
    /**
890
     * Geometric Shapes.
891
     * 0x25A0 - 0x25FF.
892
     */
893
    public static final UnicodeBlock GEOMETRIC_SHAPES
894
      = new UnicodeBlock(0x25A0, 0x25FF,
895
                         "GEOMETRIC_SHAPES",
896
                         "Geometric Shapes");
897
 
898
    /**
899
     * Miscellaneous Symbols.
900
     * 0x2600 - 0x26FF.
901
     */
902
    public static final UnicodeBlock MISCELLANEOUS_SYMBOLS
903
      = new UnicodeBlock(0x2600, 0x26FF,
904
                         "MISCELLANEOUS_SYMBOLS",
905
                         "Miscellaneous Symbols");
906
 
907
    /**
908
     * Dingbats.
909
     * 0x2700 - 0x27BF.
910
     */
911
    public static final UnicodeBlock DINGBATS
912
      = new UnicodeBlock(0x2700, 0x27BF,
913
                         "DINGBATS",
914
                         "Dingbats");
915
 
916
    /**
917
     * Miscellaneous Mathematical Symbols-A.
918
     * 0x27C0 - 0x27EF.
919
     * @since 1.5
920
     */
921
    public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A
922
      = new UnicodeBlock(0x27C0, 0x27EF,
923
                         "MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
924
                         "Miscellaneous Mathematical Symbols-A");
925
 
926
    /**
927
     * Supplemental Arrows-A.
928
     * 0x27F0 - 0x27FF.
929
     * @since 1.5
930
     */
931
    public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A
932
      = new UnicodeBlock(0x27F0, 0x27FF,
933
                         "SUPPLEMENTAL_ARROWS_A",
934
                         "Supplemental Arrows-A");
935
 
936
    /**
937
     * Braille Patterns.
938
     * 0x2800 - 0x28FF.
939
     * @since 1.4
940
     */
941
    public static final UnicodeBlock BRAILLE_PATTERNS
942
      = new UnicodeBlock(0x2800, 0x28FF,
943
                         "BRAILLE_PATTERNS",
944
                         "Braille Patterns");
945
 
946
    /**
947
     * Supplemental Arrows-B.
948
     * 0x2900 - 0x297F.
949
     * @since 1.5
950
     */
951
    public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B
952
      = new UnicodeBlock(0x2900, 0x297F,
953
                         "SUPPLEMENTAL_ARROWS_B",
954
                         "Supplemental Arrows-B");
955
 
956
    /**
957
     * Miscellaneous Mathematical Symbols-B.
958
     * 0x2980 - 0x29FF.
959
     * @since 1.5
960
     */
961
    public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B
962
      = new UnicodeBlock(0x2980, 0x29FF,
963
                         "MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
964
                         "Miscellaneous Mathematical Symbols-B");
965
 
966
    /**
967
     * Supplemental Mathematical Operators.
968
     * 0x2A00 - 0x2AFF.
969
     * @since 1.5
970
     */
971
    public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS
972
      = new UnicodeBlock(0x2A00, 0x2AFF,
973
                         "SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
974
                         "Supplemental Mathematical Operators");
975
 
976
    /**
977
     * Miscellaneous Symbols and Arrows.
978
     * 0x2B00 - 0x2BFF.
979
     * @since 1.5
980
     */
981
    public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS
982
      = new UnicodeBlock(0x2B00, 0x2BFF,
983
                         "MISCELLANEOUS_SYMBOLS_AND_ARROWS",
984
                         "Miscellaneous Symbols and Arrows");
985
 
986
    /**
987
     * CJK Radicals Supplement.
988
     * 0x2E80 - 0x2EFF.
989
     * @since 1.4
990
     */
991
    public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT
992
      = new UnicodeBlock(0x2E80, 0x2EFF,
993
                         "CJK_RADICALS_SUPPLEMENT",
994
                         "CJK Radicals Supplement");
995
 
996
    /**
997
     * Kangxi Radicals.
998
     * 0x2F00 - 0x2FDF.
999
     * @since 1.4
1000
     */
1001
    public static final UnicodeBlock KANGXI_RADICALS
1002
      = new UnicodeBlock(0x2F00, 0x2FDF,
1003
                         "KANGXI_RADICALS",
1004
                         "Kangxi Radicals");
1005
 
1006
    /**
1007
     * Ideographic Description Characters.
1008
     * 0x2FF0 - 0x2FFF.
1009
     * @since 1.4
1010
     */
1011
    public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS
1012
      = new UnicodeBlock(0x2FF0, 0x2FFF,
1013
                         "IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1014
                         "Ideographic Description Characters");
1015
 
1016
    /**
1017
     * CJK Symbols and Punctuation.
1018
     * 0x3000 - 0x303F.
1019
     */
1020
    public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION
1021
      = new UnicodeBlock(0x3000, 0x303F,
1022
                         "CJK_SYMBOLS_AND_PUNCTUATION",
1023
                         "CJK Symbols and Punctuation");
1024
 
1025
    /**
1026
     * Hiragana.
1027
     * 0x3040 - 0x309F.
1028
     */
1029
    public static final UnicodeBlock HIRAGANA
1030
      = new UnicodeBlock(0x3040, 0x309F,
1031
                         "HIRAGANA",
1032
                         "Hiragana");
1033
 
1034
    /**
1035
     * Katakana.
1036
     * 0x30A0 - 0x30FF.
1037
     */
1038
    public static final UnicodeBlock KATAKANA
1039
      = new UnicodeBlock(0x30A0, 0x30FF,
1040
                         "KATAKANA",
1041
                         "Katakana");
1042
 
1043
    /**
1044
     * Bopomofo.
1045
     * 0x3100 - 0x312F.
1046
     */
1047
    public static final UnicodeBlock BOPOMOFO
1048
      = new UnicodeBlock(0x3100, 0x312F,
1049
                         "BOPOMOFO",
1050
                         "Bopomofo");
1051
 
1052
    /**
1053
     * Hangul Compatibility Jamo.
1054
     * 0x3130 - 0x318F.
1055
     */
1056
    public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO
1057
      = new UnicodeBlock(0x3130, 0x318F,
1058
                         "HANGUL_COMPATIBILITY_JAMO",
1059
                         "Hangul Compatibility Jamo");
1060
 
1061
    /**
1062
     * Kanbun.
1063
     * 0x3190 - 0x319F.
1064
     */
1065
    public static final UnicodeBlock KANBUN
1066
      = new UnicodeBlock(0x3190, 0x319F,
1067
                         "KANBUN",
1068
                         "Kanbun");
1069
 
1070
    /**
1071
     * Bopomofo Extended.
1072
     * 0x31A0 - 0x31BF.
1073
     * @since 1.4
1074
     */
1075
    public static final UnicodeBlock BOPOMOFO_EXTENDED
1076
      = new UnicodeBlock(0x31A0, 0x31BF,
1077
                         "BOPOMOFO_EXTENDED",
1078
                         "Bopomofo Extended");
1079
 
1080
    /**
1081
     * Katakana Phonetic Extensions.
1082
     * 0x31F0 - 0x31FF.
1083
     * @since 1.5
1084
     */
1085
    public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS
1086
      = new UnicodeBlock(0x31F0, 0x31FF,
1087
                         "KATAKANA_PHONETIC_EXTENSIONS",
1088
                         "Katakana Phonetic Extensions");
1089
 
1090
    /**
1091
     * Enclosed CJK Letters and Months.
1092
     * 0x3200 - 0x32FF.
1093
     */
1094
    public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS
1095
      = new UnicodeBlock(0x3200, 0x32FF,
1096
                         "ENCLOSED_CJK_LETTERS_AND_MONTHS",
1097
                         "Enclosed CJK Letters and Months");
1098
 
1099
    /**
1100
     * CJK Compatibility.
1101
     * 0x3300 - 0x33FF.
1102
     */
1103
    public static final UnicodeBlock CJK_COMPATIBILITY
1104
      = new UnicodeBlock(0x3300, 0x33FF,
1105
                         "CJK_COMPATIBILITY",
1106
                         "CJK Compatibility");
1107
 
1108
    /**
1109
     * CJK Unified Ideographs Extension A.
1110
     * 0x3400 - 0x4DBF.
1111
     * @since 1.4
1112
     */
1113
    public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
1114
      = new UnicodeBlock(0x3400, 0x4DBF,
1115
                         "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1116
                         "CJK Unified Ideographs Extension A");
1117
 
1118
    /**
1119
     * Yijing Hexagram Symbols.
1120
     * 0x4DC0 - 0x4DFF.
1121
     * @since 1.5
1122
     */
1123
    public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS
1124
      = new UnicodeBlock(0x4DC0, 0x4DFF,
1125
                         "YIJING_HEXAGRAM_SYMBOLS",
1126
                         "Yijing Hexagram Symbols");
1127
 
1128
    /**
1129
     * CJK Unified Ideographs.
1130
     * 0x4E00 - 0x9FFF.
1131
     */
1132
    public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS
1133
      = new UnicodeBlock(0x4E00, 0x9FFF,
1134
                         "CJK_UNIFIED_IDEOGRAPHS",
1135
                         "CJK Unified Ideographs");
1136
 
1137
    /**
1138
     * Yi Syllables.
1139
     * 0xA000 - 0xA48F.
1140
     * @since 1.4
1141
     */
1142
    public static final UnicodeBlock YI_SYLLABLES
1143
      = new UnicodeBlock(0xA000, 0xA48F,
1144
                         "YI_SYLLABLES",
1145
                         "Yi Syllables");
1146
 
1147
    /**
1148
     * Yi Radicals.
1149
     * 0xA490 - 0xA4CF.
1150
     * @since 1.4
1151
     */
1152
    public static final UnicodeBlock YI_RADICALS
1153
      = new UnicodeBlock(0xA490, 0xA4CF,
1154
                         "YI_RADICALS",
1155
                         "Yi Radicals");
1156
 
1157
    /**
1158
     * Hangul Syllables.
1159
     * 0xAC00 - 0xD7AF.
1160
     */
1161
    public static final UnicodeBlock HANGUL_SYLLABLES
1162
      = new UnicodeBlock(0xAC00, 0xD7AF,
1163
                         "HANGUL_SYLLABLES",
1164
                         "Hangul Syllables");
1165
 
1166
    /**
1167
     * High Surrogates.
1168
     * 0xD800 - 0xDB7F.
1169
     * @since 1.5
1170
     */
1171
    public static final UnicodeBlock HIGH_SURROGATES
1172
      = new UnicodeBlock(0xD800, 0xDB7F,
1173
                         "HIGH_SURROGATES",
1174
                         "High Surrogates");
1175
 
1176
    /**
1177
     * High Private Use Surrogates.
1178
     * 0xDB80 - 0xDBFF.
1179
     * @since 1.5
1180
     */
1181
    public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES
1182
      = new UnicodeBlock(0xDB80, 0xDBFF,
1183
                         "HIGH_PRIVATE_USE_SURROGATES",
1184
                         "High Private Use Surrogates");
1185
 
1186
    /**
1187
     * Low Surrogates.
1188
     * 0xDC00 - 0xDFFF.
1189
     * @since 1.5
1190
     */
1191
    public static final UnicodeBlock LOW_SURROGATES
1192
      = new UnicodeBlock(0xDC00, 0xDFFF,
1193
                         "LOW_SURROGATES",
1194
                         "Low Surrogates");
1195
 
1196
    /**
1197
     * Private Use Area.
1198
     * 0xE000 - 0xF8FF.
1199
     */
1200
    public static final UnicodeBlock PRIVATE_USE_AREA
1201
      = new UnicodeBlock(0xE000, 0xF8FF,
1202
                         "PRIVATE_USE_AREA",
1203
                         "Private Use Area");
1204
 
1205
    /**
1206
     * CJK Compatibility Ideographs.
1207
     * 0xF900 - 0xFAFF.
1208
     */
1209
    public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS
1210
      = new UnicodeBlock(0xF900, 0xFAFF,
1211
                         "CJK_COMPATIBILITY_IDEOGRAPHS",
1212
                         "CJK Compatibility Ideographs");
1213
 
1214
    /**
1215
     * Alphabetic Presentation Forms.
1216
     * 0xFB00 - 0xFB4F.
1217
     */
1218
    public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS
1219
      = new UnicodeBlock(0xFB00, 0xFB4F,
1220
                         "ALPHABETIC_PRESENTATION_FORMS",
1221
                         "Alphabetic Presentation Forms");
1222
 
1223
    /**
1224
     * Arabic Presentation Forms-A.
1225
     * 0xFB50 - 0xFDFF.
1226
     */
1227
    public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A
1228
      = new UnicodeBlock(0xFB50, 0xFDFF,
1229
                         "ARABIC_PRESENTATION_FORMS_A",
1230
                         "Arabic Presentation Forms-A");
1231
 
1232
    /**
1233
     * Variation Selectors.
1234
     * 0xFE00 - 0xFE0F.
1235
     * @since 1.5
1236
     */
1237
    public static final UnicodeBlock VARIATION_SELECTORS
1238
      = new UnicodeBlock(0xFE00, 0xFE0F,
1239
                         "VARIATION_SELECTORS",
1240
                         "Variation Selectors");
1241
 
1242
    /**
1243
     * Combining Half Marks.
1244
     * 0xFE20 - 0xFE2F.
1245
     */
1246
    public static final UnicodeBlock COMBINING_HALF_MARKS
1247
      = new UnicodeBlock(0xFE20, 0xFE2F,
1248
                         "COMBINING_HALF_MARKS",
1249
                         "Combining Half Marks");
1250
 
1251
    /**
1252
     * CJK Compatibility Forms.
1253
     * 0xFE30 - 0xFE4F.
1254
     */
1255
    public static final UnicodeBlock CJK_COMPATIBILITY_FORMS
1256
      = new UnicodeBlock(0xFE30, 0xFE4F,
1257
                         "CJK_COMPATIBILITY_FORMS",
1258
                         "CJK Compatibility Forms");
1259
 
1260
    /**
1261
     * Small Form Variants.
1262
     * 0xFE50 - 0xFE6F.
1263
     */
1264
    public static final UnicodeBlock SMALL_FORM_VARIANTS
1265
      = new UnicodeBlock(0xFE50, 0xFE6F,
1266
                         "SMALL_FORM_VARIANTS",
1267
                         "Small Form Variants");
1268
 
1269
    /**
1270
     * Arabic Presentation Forms-B.
1271
     * 0xFE70 - 0xFEFF.
1272
     */
1273
    public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B
1274
      = new UnicodeBlock(0xFE70, 0xFEFF,
1275
                         "ARABIC_PRESENTATION_FORMS_B",
1276
                         "Arabic Presentation Forms-B");
1277
 
1278
    /**
1279
     * Halfwidth and Fullwidth Forms.
1280
     * 0xFF00 - 0xFFEF.
1281
     */
1282
    public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS
1283
      = new UnicodeBlock(0xFF00, 0xFFEF,
1284
                         "HALFWIDTH_AND_FULLWIDTH_FORMS",
1285
                         "Halfwidth and Fullwidth Forms");
1286
 
1287
    /**
1288
     * Specials.
1289
     * 0xFFF0 - 0xFFFF.
1290
     */
1291
    public static final UnicodeBlock SPECIALS
1292
      = new UnicodeBlock(0xFFF0, 0xFFFF,
1293
                         "SPECIALS",
1294
                         "Specials");
1295
 
1296
    /**
1297
     * Linear B Syllabary.
1298
     * 0x10000 - 0x1007F.
1299
     * @since 1.5
1300
     */
1301
    public static final UnicodeBlock LINEAR_B_SYLLABARY
1302
      = new UnicodeBlock(0x10000, 0x1007F,
1303
                         "LINEAR_B_SYLLABARY",
1304
                         "Linear B Syllabary");
1305
 
1306
    /**
1307
     * Linear B Ideograms.
1308
     * 0x10080 - 0x100FF.
1309
     * @since 1.5
1310
     */
1311
    public static final UnicodeBlock LINEAR_B_IDEOGRAMS
1312
      = new UnicodeBlock(0x10080, 0x100FF,
1313
                         "LINEAR_B_IDEOGRAMS",
1314
                         "Linear B Ideograms");
1315
 
1316
    /**
1317
     * Aegean Numbers.
1318
     * 0x10100 - 0x1013F.
1319
     * @since 1.5
1320
     */
1321
    public static final UnicodeBlock AEGEAN_NUMBERS
1322
      = new UnicodeBlock(0x10100, 0x1013F,
1323
                         "AEGEAN_NUMBERS",
1324
                         "Aegean Numbers");
1325
 
1326
    /**
1327
     * Old Italic.
1328
     * 0x10300 - 0x1032F.
1329
     * @since 1.5
1330
     */
1331
    public static final UnicodeBlock OLD_ITALIC
1332
      = new UnicodeBlock(0x10300, 0x1032F,
1333
                         "OLD_ITALIC",
1334
                         "Old Italic");
1335
 
1336
    /**
1337
     * Gothic.
1338
     * 0x10330 - 0x1034F.
1339
     * @since 1.5
1340
     */
1341
    public static final UnicodeBlock GOTHIC
1342
      = new UnicodeBlock(0x10330, 0x1034F,
1343
                         "GOTHIC",
1344
                         "Gothic");
1345
 
1346
    /**
1347
     * Ugaritic.
1348
     * 0x10380 - 0x1039F.
1349
     * @since 1.5
1350
     */
1351
    public static final UnicodeBlock UGARITIC
1352
      = new UnicodeBlock(0x10380, 0x1039F,
1353
                         "UGARITIC",
1354
                         "Ugaritic");
1355
 
1356
    /**
1357
     * Deseret.
1358
     * 0x10400 - 0x1044F.
1359
     * @since 1.5
1360
     */
1361
    public static final UnicodeBlock DESERET
1362
      = new UnicodeBlock(0x10400, 0x1044F,
1363
                         "DESERET",
1364
                         "Deseret");
1365
 
1366
    /**
1367
     * Shavian.
1368
     * 0x10450 - 0x1047F.
1369
     * @since 1.5
1370
     */
1371
    public static final UnicodeBlock SHAVIAN
1372
      = new UnicodeBlock(0x10450, 0x1047F,
1373
                         "SHAVIAN",
1374
                         "Shavian");
1375
 
1376
    /**
1377
     * Osmanya.
1378
     * 0x10480 - 0x104AF.
1379
     * @since 1.5
1380
     */
1381
    public static final UnicodeBlock OSMANYA
1382
      = new UnicodeBlock(0x10480, 0x104AF,
1383
                         "OSMANYA",
1384
                         "Osmanya");
1385
 
1386
    /**
1387
     * Cypriot Syllabary.
1388
     * 0x10800 - 0x1083F.
1389
     * @since 1.5
1390
     */
1391
    public static final UnicodeBlock CYPRIOT_SYLLABARY
1392
      = new UnicodeBlock(0x10800, 0x1083F,
1393
                         "CYPRIOT_SYLLABARY",
1394
                         "Cypriot Syllabary");
1395
 
1396
    /**
1397
     * Byzantine Musical Symbols.
1398
     * 0x1D000 - 0x1D0FF.
1399
     * @since 1.5
1400
     */
1401
    public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS
1402
      = new UnicodeBlock(0x1D000, 0x1D0FF,
1403
                         "BYZANTINE_MUSICAL_SYMBOLS",
1404
                         "Byzantine Musical Symbols");
1405
 
1406
    /**
1407
     * Musical Symbols.
1408
     * 0x1D100 - 0x1D1FF.
1409
     * @since 1.5
1410
     */
1411
    public static final UnicodeBlock MUSICAL_SYMBOLS
1412
      = new UnicodeBlock(0x1D100, 0x1D1FF,
1413
                         "MUSICAL_SYMBOLS",
1414
                         "Musical Symbols");
1415
 
1416
    /**
1417
     * Tai Xuan Jing Symbols.
1418
     * 0x1D300 - 0x1D35F.
1419
     * @since 1.5
1420
     */
1421
    public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS
1422
      = new UnicodeBlock(0x1D300, 0x1D35F,
1423
                         "TAI_XUAN_JING_SYMBOLS",
1424
                         "Tai Xuan Jing Symbols");
1425
 
1426
    /**
1427
     * Mathematical Alphanumeric Symbols.
1428
     * 0x1D400 - 0x1D7FF.
1429
     * @since 1.5
1430
     */
1431
    public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS
1432
      = new UnicodeBlock(0x1D400, 0x1D7FF,
1433
                         "MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1434
                         "Mathematical Alphanumeric Symbols");
1435
 
1436
    /**
1437
     * CJK Unified Ideographs Extension B.
1438
     * 0x20000 - 0x2A6DF.
1439
     * @since 1.5
1440
     */
1441
    public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
1442
      = new UnicodeBlock(0x20000, 0x2A6DF,
1443
                         "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1444
                         "CJK Unified Ideographs Extension B");
1445
 
1446
    /**
1447
     * CJK Compatibility Ideographs Supplement.
1448
     * 0x2F800 - 0x2FA1F.
1449
     * @since 1.5
1450
     */
1451
    public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT
1452
      = new UnicodeBlock(0x2F800, 0x2FA1F,
1453
                         "CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1454
                         "CJK Compatibility Ideographs Supplement");
1455
 
1456
    /**
1457
     * Tags.
1458
     * 0xE0000 - 0xE007F.
1459
     * @since 1.5
1460
     */
1461
    public static final UnicodeBlock TAGS
1462
      = new UnicodeBlock(0xE0000, 0xE007F,
1463
                         "TAGS",
1464
                         "Tags");
1465
 
1466
    /**
1467
     * Variation Selectors Supplement.
1468
     * 0xE0100 - 0xE01EF.
1469
     * @since 1.5
1470
     */
1471
    public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT
1472
      = new UnicodeBlock(0xE0100, 0xE01EF,
1473
                         "VARIATION_SELECTORS_SUPPLEMENT",
1474
                         "Variation Selectors Supplement");
1475
 
1476
    /**
1477
     * Supplementary Private Use Area-A.
1478
     * 0xF0000 - 0xFFFFF.
1479
     * @since 1.5
1480
     */
1481
    public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A
1482
      = new UnicodeBlock(0xF0000, 0xFFFFF,
1483
                         "SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1484
                         "Supplementary Private Use Area-A");
1485
 
1486
    /**
1487
     * Supplementary Private Use Area-B.
1488
     * 0x100000 - 0x10FFFF.
1489
     * @since 1.5
1490
     */
1491
    public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B
1492
      = new UnicodeBlock(0x100000, 0x10FFFF,
1493
                         "SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1494
                         "Supplementary Private Use Area-B");
1495
 
1496
    /**
1497
     * Surrogates Area.
1498
     * 'D800' - 'DFFF'.
1499
     * @deprecated As of 1.5, the three areas,
1500
     * <a href="#HIGH_SURROGATES">HIGH_SURROGATES</a>,
1501
     * <a href="#HIGH_PRIVATE_USE_SURROGATES">HIGH_PRIVATE_USE_SURROGATES</a>
1502
     * and <a href="#LOW_SURROGATES">LOW_SURROGATES</a>, as defined
1503
     * by the Unicode standard, should be used in preference to
1504
     * this.  These are also returned from calls to <code>of(int)</code>
1505
     * and <code>of(char)</code>.
1506
     */
1507
    @Deprecated
1508
    public static final UnicodeBlock SURROGATES_AREA
1509
      = new UnicodeBlock(0xD800, 0xDFFF,
1510
                         "SURROGATES_AREA",
1511
                         "Surrogates Area");
1512
 
1513
    /**
1514
     * The defined subsets.
1515
     */
1516
    private static final UnicodeBlock sets[] = {
1517
      BASIC_LATIN,
1518
      LATIN_1_SUPPLEMENT,
1519
      LATIN_EXTENDED_A,
1520
      LATIN_EXTENDED_B,
1521
      IPA_EXTENSIONS,
1522
      SPACING_MODIFIER_LETTERS,
1523
      COMBINING_DIACRITICAL_MARKS,
1524
      GREEK,
1525
      CYRILLIC,
1526
      CYRILLIC_SUPPLEMENTARY,
1527
      ARMENIAN,
1528
      HEBREW,
1529
      ARABIC,
1530
      SYRIAC,
1531
      THAANA,
1532
      DEVANAGARI,
1533
      BENGALI,
1534
      GURMUKHI,
1535
      GUJARATI,
1536
      ORIYA,
1537
      TAMIL,
1538
      TELUGU,
1539
      KANNADA,
1540
      MALAYALAM,
1541
      SINHALA,
1542
      THAI,
1543
      LAO,
1544
      TIBETAN,
1545
      MYANMAR,
1546
      GEORGIAN,
1547
      HANGUL_JAMO,
1548
      ETHIOPIC,
1549
      CHEROKEE,
1550
      UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
1551
      OGHAM,
1552
      RUNIC,
1553
      TAGALOG,
1554
      HANUNOO,
1555
      BUHID,
1556
      TAGBANWA,
1557
      KHMER,
1558
      MONGOLIAN,
1559
      LIMBU,
1560
      TAI_LE,
1561
      KHMER_SYMBOLS,
1562
      PHONETIC_EXTENSIONS,
1563
      LATIN_EXTENDED_ADDITIONAL,
1564
      GREEK_EXTENDED,
1565
      GENERAL_PUNCTUATION,
1566
      SUPERSCRIPTS_AND_SUBSCRIPTS,
1567
      CURRENCY_SYMBOLS,
1568
      COMBINING_MARKS_FOR_SYMBOLS,
1569
      LETTERLIKE_SYMBOLS,
1570
      NUMBER_FORMS,
1571
      ARROWS,
1572
      MATHEMATICAL_OPERATORS,
1573
      MISCELLANEOUS_TECHNICAL,
1574
      CONTROL_PICTURES,
1575
      OPTICAL_CHARACTER_RECOGNITION,
1576
      ENCLOSED_ALPHANUMERICS,
1577
      BOX_DRAWING,
1578
      BLOCK_ELEMENTS,
1579
      GEOMETRIC_SHAPES,
1580
      MISCELLANEOUS_SYMBOLS,
1581
      DINGBATS,
1582
      MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
1583
      SUPPLEMENTAL_ARROWS_A,
1584
      BRAILLE_PATTERNS,
1585
      SUPPLEMENTAL_ARROWS_B,
1586
      MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
1587
      SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
1588
      MISCELLANEOUS_SYMBOLS_AND_ARROWS,
1589
      CJK_RADICALS_SUPPLEMENT,
1590
      KANGXI_RADICALS,
1591
      IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
1592
      CJK_SYMBOLS_AND_PUNCTUATION,
1593
      HIRAGANA,
1594
      KATAKANA,
1595
      BOPOMOFO,
1596
      HANGUL_COMPATIBILITY_JAMO,
1597
      KANBUN,
1598
      BOPOMOFO_EXTENDED,
1599
      KATAKANA_PHONETIC_EXTENSIONS,
1600
      ENCLOSED_CJK_LETTERS_AND_MONTHS,
1601
      CJK_COMPATIBILITY,
1602
      CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
1603
      YIJING_HEXAGRAM_SYMBOLS,
1604
      CJK_UNIFIED_IDEOGRAPHS,
1605
      YI_SYLLABLES,
1606
      YI_RADICALS,
1607
      HANGUL_SYLLABLES,
1608
      HIGH_SURROGATES,
1609
      HIGH_PRIVATE_USE_SURROGATES,
1610
      LOW_SURROGATES,
1611
      PRIVATE_USE_AREA,
1612
      CJK_COMPATIBILITY_IDEOGRAPHS,
1613
      ALPHABETIC_PRESENTATION_FORMS,
1614
      ARABIC_PRESENTATION_FORMS_A,
1615
      VARIATION_SELECTORS,
1616
      COMBINING_HALF_MARKS,
1617
      CJK_COMPATIBILITY_FORMS,
1618
      SMALL_FORM_VARIANTS,
1619
      ARABIC_PRESENTATION_FORMS_B,
1620
      HALFWIDTH_AND_FULLWIDTH_FORMS,
1621
      SPECIALS,
1622
      LINEAR_B_SYLLABARY,
1623
      LINEAR_B_IDEOGRAMS,
1624
      AEGEAN_NUMBERS,
1625
      OLD_ITALIC,
1626
      GOTHIC,
1627
      UGARITIC,
1628
      DESERET,
1629
      SHAVIAN,
1630
      OSMANYA,
1631
      CYPRIOT_SYLLABARY,
1632
      BYZANTINE_MUSICAL_SYMBOLS,
1633
      MUSICAL_SYMBOLS,
1634
      TAI_XUAN_JING_SYMBOLS,
1635
      MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
1636
      CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
1637
      CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
1638
      TAGS,
1639
      VARIATION_SELECTORS_SUPPLEMENT,
1640
      SUPPLEMENTARY_PRIVATE_USE_AREA_A,
1641
      SUPPLEMENTARY_PRIVATE_USE_AREA_B,
1642
    };
1643
  } // class UnicodeBlock
1644
 
1645
  /**
1646
   * A class to encompass all the properties of characters in the
1647
   * private use blocks in the Unicode standard.  This class extends
1648
   * UnassignedCharacters because the return type from getType() is
1649
   * different.
1650
   * @author Anthony Balkissoon abalkiss at redhat dot com
1651
   *
1652
   */
1653
  private static class PrivateUseCharacters extends UnassignedCharacters
1654
  {
1655
    /**
1656
     * Returns the type of the character cp.
1657
     */
1658
    static int getType(int cp)
1659
    {
1660
      // The upper 2 code points in any plane are considered unassigned,
1661
      // even in the private-use planes.
1662
      if ((cp & 0xffff) >= 0xfffe)
1663
        return UnassignedCharacters.getType(cp);
1664
      return PRIVATE_USE;
1665
    }
1666
 
1667
    /**
1668
     * Returns true if the character cp is defined.
1669
     */
1670
    static boolean isDefined(int cp)
1671
    {
1672
      // The upper 2 code points in any plane are considered unassigned,
1673
      // even in the private-use planes.
1674
      if ((cp & 0xffff) >= 0xfffe)
1675
        return UnassignedCharacters.isDefined(cp);
1676
      return true;
1677
    }
1678
 
1679
    /**
1680
     * Gets the directionality for the character cp.
1681
     */
1682
    static byte getDirectionality(int cp)
1683
    {
1684
      if ((cp & 0xffff) >= 0xfffe)
1685
        return UnassignedCharacters.getDirectionality(cp);
1686
      return DIRECTIONALITY_LEFT_TO_RIGHT;
1687
    }
1688
  }
1689
 
1690
  /**
1691
   * A class to encompass all the properties of code points that are
1692
   * currently undefined in the Unicode standard.
1693
   * @author Anthony Balkissoon abalkiss at redhat dot com
1694
   *
1695
   */
1696
  private static class UnassignedCharacters
1697
  {
1698
    /**
1699
     * Returns the numeric value for the unassigned characters.
1700
     * @param cp the character
1701
     * @param radix the radix (not used)
1702
     * @return the numeric value of this character in this radix
1703
     */
1704
    static int digit(int cp, int radix)
1705
    {
1706
      return -1;
1707
    }
1708
 
1709
    /**
1710
     * Returns the Unicode directionality property for unassigned
1711
     * characters.
1712
     * @param cp the character
1713
     * @return DIRECTIONALITY_UNDEFINED
1714
     */
1715
    static byte getDirectionality(int cp)
1716
    {
1717
      return DIRECTIONALITY_UNDEFINED;
1718
    }
1719
 
1720
    /**
1721
     * Returns -1, the numeric value for unassigned Unicode characters.
1722
     * @param cp the character
1723
     * @return -1
1724
     */
1725
    static int getNumericValue(int cp)
1726
    {
1727
      return -1;
1728
    }
1729
 
1730
    /**
1731
     * Returns UNASSIGNED, the type of unassigned Unicode characters.
1732
     * @param cp the character
1733
     * @return UNASSIGNED
1734
     */
1735
    static int getType(int cp)
1736
    {
1737
      return UNASSIGNED;
1738
    }
1739
 
1740
    /**
1741
     * Returns false to indiciate that the character is not defined in the
1742
     * Unicode standard.
1743
     * @param cp the character
1744
     * @return false
1745
     */
1746
    static boolean isDefined(int cp)
1747
    {
1748
      return false;
1749
    }
1750
 
1751
    /**
1752
     * Returns false to indicate that the character is not a digit.
1753
     * @param cp the character
1754
     * @return false
1755
     */
1756
    static boolean isDigit(int cp)
1757
    {
1758
      return false;
1759
    }
1760
 
1761
    /**
1762
     * Returns false to indicate that the character cannot be ignored
1763
     * within an identifier
1764
     * @param cp the character
1765
     * @return false
1766
     */
1767
    static boolean isIdentifierIgnorable(int cp)
1768
    {
1769
      return false;
1770
    }
1771
 
1772
    /**
1773
     * Returns false to indicate that the character cannot be part of a
1774
     * Java identifier.
1775
     * @param cp the character
1776
     * @return false
1777
     */
1778
    static boolean isJavaIdentifierPart(int cp)
1779
    {
1780
      return false;
1781
    }
1782
 
1783
    /**
1784
     * Returns false to indicate that the character cannot be start a
1785
     * Java identifier.
1786
     * @param cp the character
1787
     * @return false
1788
     */
1789
    static boolean isJavaIdentiferStart(int cp)
1790
    {
1791
      return false;
1792
    }
1793
 
1794
    /**
1795
     * Returns false to indicate that the character is not a letter.
1796
     * @param cp the character
1797
     * @return false
1798
     */
1799
    static boolean isLetter(int cp)
1800
    {
1801
      return false;
1802
    }
1803
 
1804
    /**
1805
     * Returns false to indicate that the character cannot is neither a letter
1806
     * nor a digit.
1807
     * @param cp the character
1808
     * @return false
1809
     */
1810
    static boolean isLetterOrDigit(int cp)
1811
    {
1812
      return false;
1813
    }
1814
 
1815
    /**
1816
     * Returns false to indicate that the character is not a lowercase letter.
1817
     * @param cp the character
1818
     * @return false
1819
     */
1820
    static boolean isLowerCase(int cp)
1821
    {
1822
      return false;
1823
    }
1824
 
1825
    /**
1826
     * Returns false to indicate that the character cannot is not mirrored.
1827
     * @param cp the character
1828
     * @return false
1829
     */
1830
    static boolean isMirrored(int cp)
1831
    {
1832
      return false;
1833
    }
1834
 
1835
    /**
1836
     * Returns false to indicate that the character is not a space character.
1837
     * @param cp the character
1838
     * @return false
1839
     */
1840
    static boolean isSpaceChar(int cp)
1841
    {
1842
      return false;
1843
    }
1844
 
1845
    /**
1846
     * Returns false to indicate that the character it not a titlecase letter.
1847
     * @param cp the character
1848
     * @return false
1849
     */
1850
    static boolean isTitleCase(int cp)
1851
    {
1852
      return false;
1853
    }
1854
 
1855
    /**
1856
     * Returns false to indicate that the character cannot be part of a
1857
     * Unicode identifier.
1858
     * @param cp the character
1859
     * @return false
1860
     */
1861
    static boolean isUnicodeIdentifierPart(int cp)
1862
    {
1863
      return false;
1864
    }
1865
 
1866
    /**
1867
     * Returns false to indicate that the character cannot start a
1868
     * Unicode identifier.
1869
     * @param cp the character
1870
     * @return false
1871
     */
1872
    static boolean isUnicodeIdentifierStart(int cp)
1873
    {
1874
      return false;
1875
    }
1876
 
1877
    /**
1878
     * Returns false to indicate that the character is not an uppercase letter.
1879
     * @param cp the character
1880
     * @return false
1881
     */
1882
    static boolean isUpperCase(int cp)
1883
    {
1884
      return false;
1885
    }
1886
 
1887
    /**
1888
     * Returns false to indicate that the character is not a whitespace
1889
     * character.
1890
     * @param cp the character
1891
     * @return false
1892
     */
1893
    static boolean isWhiteSpace(int cp)
1894
    {
1895
      return false;
1896
    }
1897
 
1898
    /**
1899
     * Returns cp to indicate this character has no lowercase conversion.
1900
     * @param cp the character
1901
     * @return cp
1902
     */
1903
    static int toLowerCase(int cp)
1904
    {
1905
      return cp;
1906
    }
1907
 
1908
    /**
1909
     * Returns cp to indicate this character has no titlecase conversion.
1910
     * @param cp the character
1911
     * @return cp
1912
     */
1913
    static int toTitleCase(int cp)
1914
    {
1915
      return cp;
1916
    }
1917
 
1918
    /**
1919
     * Returns cp to indicate this character has no uppercase conversion.
1920
     * @param cp the character
1921
     * @return cp
1922
     */
1923
    static int toUpperCase(int cp)
1924
    {
1925
      return cp;
1926
    }
1927
  }
1928
 
1929
  /**
1930
   * The immutable value of this Character.
1931
   *
1932
   * @serial the value of this Character
1933
   */
1934
  private final char value;
1935
 
1936
  /**
1937
   * Compatible with JDK 1.0+.
1938
   */
1939
  private static final long serialVersionUID = 3786198910865385080L;
1940
 
1941
  /**
1942
   * Smallest value allowed for radix arguments in Java. This value is 2.
1943
   *
1944
   * @see #digit(char, int)
1945
   * @see #forDigit(int, int)
1946
   * @see Integer#toString(int, int)
1947
   * @see Integer#valueOf(String)
1948
   */
1949
  public static final int MIN_RADIX = 2;
1950
 
1951
  /**
1952
   * Largest value allowed for radix arguments in Java. This value is 36.
1953
   *
1954
   * @see #digit(char, int)
1955
   * @see #forDigit(int, int)
1956
   * @see Integer#toString(int, int)
1957
   * @see Integer#valueOf(String)
1958
   */
1959
  public static final int MAX_RADIX = 36;
1960
 
1961
  /**
1962
   * The minimum value the char data type can hold.
1963
   * This value is <code>'\\u0000'</code>.
1964
   */
1965
  public static final char MIN_VALUE = '\u0000';
1966
 
1967
  /**
1968
   * The maximum value the char data type can hold.
1969
   * This value is <code>'\\uFFFF'</code>.
1970
   */
1971
  public static final char MAX_VALUE = '\uFFFF';
1972
 
1973
  /**
1974
   * The minimum Unicode 4.0 code point.  This value is <code>0</code>.
1975
   * @since 1.5
1976
   */
1977
  public static final int MIN_CODE_POINT = 0;
1978
 
1979
  /**
1980
   * The maximum Unicode 4.0 code point, which is greater than the range
1981
   * of the char data type.
1982
   * This value is <code>0x10FFFF</code>.
1983
   * @since 1.5
1984
   */
1985
  public static final int MAX_CODE_POINT = 0x10FFFF;
1986
 
1987
  /**
1988
   * The minimum Unicode high surrogate code unit, or
1989
   * <emph>leading-surrogate</emph>, in the UTF-16 character encoding.
1990
   * This value is <code>'\uD800'</code>.
1991
   * @since 1.5
1992
   */
1993
  public static final char MIN_HIGH_SURROGATE = '\uD800';
1994
 
1995
  /**
1996
   * The maximum Unicode high surrogate code unit, or
1997
   * <emph>leading-surrogate</emph>, in the UTF-16 character encoding.
1998
   * This value is <code>'\uDBFF'</code>.
1999
   * @since 1.5
2000
   */
2001
  public static final char MAX_HIGH_SURROGATE = '\uDBFF';
2002
 
2003
  /**
2004
   * The minimum Unicode low surrogate code unit, or
2005
   * <emph>trailing-surrogate</emph>, in the UTF-16 character encoding.
2006
   * This value is <code>'\uDC00'</code>.
2007
   * @since 1.5
2008
   */
2009
  public static final char MIN_LOW_SURROGATE = '\uDC00';
2010
 
2011
  /**
2012
   * The maximum Unicode low surrogate code unit, or
2013
   * <emph>trailing-surrogate</emph>, in the UTF-16 character encoding.
2014
   * This value is <code>'\uDFFF'</code>.
2015
   * @since 1.5
2016
   */
2017
  public static final char MAX_LOW_SURROGATE = '\uDFFF';
2018
 
2019
  /**
2020
   * The minimum Unicode surrogate code unit in the UTF-16 character encoding.
2021
   * This value is <code>'\uD800'</code>.
2022
   * @since 1.5
2023
   */
2024
  public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
2025
 
2026
  /**
2027
   * The maximum Unicode surrogate code unit in the UTF-16 character encoding.
2028
   * This value is <code>'\uDFFF'</code>.
2029
   * @since 1.5
2030
   */
2031
  public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
2032
 
2033
  /**
2034
   * The lowest possible supplementary Unicode code point (the first code
2035
   * point outside the basic multilingual plane (BMP)).
2036
   * This value is <code>0x10000</code>.
2037
   */
2038
  public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;
2039
 
2040
  /**
2041
   * Class object representing the primitive char data type.
2042
   *
2043
   * @since 1.1
2044
   */
2045
  public static final Class<Character> TYPE = (Class<Character>) VMClassLoader.getPrimitiveClass('C');
2046
 
2047
  /**
2048
   * The number of bits needed to represent a <code>char</code>.
2049
   * @since 1.5
2050
   */
2051
  public static final int SIZE = 16;
2052
 
2053
  // This caches some Character values, and is used by boxing
2054
  // conversions via valueOf().  We must cache at least 0..127;
2055
  // this constant controls how much we actually cache.
2056
  private static final int MAX_CACHE = 127;
2057
  private static Character[] charCache = new Character[MAX_CACHE + 1];
2058
  static
2059
  {
2060
     for (char i=0; i <= MAX_CACHE; i++)
2061
       charCache[i] = new Character(i);
2062
  }
2063
 
2064
  /**
2065
   * Lu = Letter, Uppercase (Informative).
2066
   *
2067
   * @since 1.1
2068
   */
2069
  public static final byte UPPERCASE_LETTER = 1;
2070
 
2071
  /**
2072
   * Ll = Letter, Lowercase (Informative).
2073
   *
2074
   * @since 1.1
2075
   */
2076
  public static final byte LOWERCASE_LETTER = 2;
2077
 
2078
  /**
2079
   * Lt = Letter, Titlecase (Informative).
2080
   *
2081
   * @since 1.1
2082
   */
2083
  public static final byte TITLECASE_LETTER = 3;
2084
 
2085
  /**
2086
   * Mn = Mark, Non-Spacing (Normative).
2087
   *
2088
   * @since 1.1
2089
   */
2090
  public static final byte NON_SPACING_MARK = 6;
2091
 
2092
  /**
2093
   * Mc = Mark, Spacing Combining (Normative).
2094
   *
2095
   * @since 1.1
2096
   */
2097
  public static final byte COMBINING_SPACING_MARK = 8;
2098
 
2099
  /**
2100
   * Me = Mark, Enclosing (Normative).
2101
   *
2102
   * @since 1.1
2103
   */
2104
  public static final byte ENCLOSING_MARK = 7;
2105
 
2106
  /**
2107
   * Nd = Number, Decimal Digit (Normative).
2108
   *
2109
   * @since 1.1
2110
   */
2111
  public static final byte DECIMAL_DIGIT_NUMBER = 9;
2112
 
2113
  /**
2114
   * Nl = Number, Letter (Normative).
2115
   *
2116
   * @since 1.1
2117
   */
2118
  public static final byte LETTER_NUMBER = 10;
2119
 
2120
  /**
2121
   * No = Number, Other (Normative).
2122
   *
2123
   * @since 1.1
2124
   */
2125
  public static final byte OTHER_NUMBER = 11;
2126
 
2127
  /**
2128
   * Zs = Separator, Space (Normative).
2129
   *
2130
   * @since 1.1
2131
   */
2132
  public static final byte SPACE_SEPARATOR = 12;
2133
 
2134
  /**
2135
   * Zl = Separator, Line (Normative).
2136
   *
2137
   * @since 1.1
2138
   */
2139
  public static final byte LINE_SEPARATOR = 13;
2140
 
2141
  /**
2142
   * Zp = Separator, Paragraph (Normative).
2143
   *
2144
   * @since 1.1
2145
   */
2146
  public static final byte PARAGRAPH_SEPARATOR = 14;
2147
 
2148
  /**
2149
   * Cc = Other, Control (Normative).
2150
   *
2151
   * @since 1.1
2152
   */
2153
  public static final byte CONTROL = 15;
2154
 
2155
  /**
2156
   * Cf = Other, Format (Normative).
2157
   *
2158
   * @since 1.1
2159
   */
2160
  public static final byte FORMAT = 16;
2161
 
2162
  /**
2163
   * Cs = Other, Surrogate (Normative).
2164
   *
2165
   * @since 1.1
2166
   */
2167
  public static final byte SURROGATE = 19;
2168
 
2169
  /**
2170
   * Co = Other, Private Use (Normative).
2171
   *
2172
   * @since 1.1
2173
   */
2174
  public static final byte PRIVATE_USE = 18;
2175
 
2176
  /**
2177
   * Cn = Other, Not Assigned (Normative).
2178
   *
2179
   * @since 1.1
2180
   */
2181
  public static final byte UNASSIGNED = 0;
2182
 
2183
  /**
2184
   * Lm = Letter, Modifier (Informative).
2185
   *
2186
   * @since 1.1
2187
   */
2188
  public static final byte MODIFIER_LETTER = 4;
2189
 
2190
  /**
2191
   * Lo = Letter, Other (Informative).
2192
   *
2193
   * @since 1.1
2194
   */
2195
  public static final byte OTHER_LETTER = 5;
2196
 
2197
  /**
2198
   * Pc = Punctuation, Connector (Informative).
2199
   *
2200
   * @since 1.1
2201
   */
2202
  public static final byte CONNECTOR_PUNCTUATION = 23;
2203
 
2204
  /**
2205
   * Pd = Punctuation, Dash (Informative).
2206
   *
2207
   * @since 1.1
2208
   */
2209
  public static final byte DASH_PUNCTUATION = 20;
2210
 
2211
  /**
2212
   * Ps = Punctuation, Open (Informative).
2213
   *
2214
   * @since 1.1
2215
   */
2216
  public static final byte START_PUNCTUATION = 21;
2217
 
2218
  /**
2219
   * Pe = Punctuation, Close (Informative).
2220
   *
2221
   * @since 1.1
2222
   */
2223
  public static final byte END_PUNCTUATION = 22;
2224
 
2225
  /**
2226
   * Pi = Punctuation, Initial Quote (Informative).
2227
   *
2228
   * @since 1.4
2229
   */
2230
  public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
2231
 
2232
  /**
2233
   * Pf = Punctuation, Final Quote (Informative).
2234
   *
2235
   * @since 1.4
2236
   */
2237
  public static final byte FINAL_QUOTE_PUNCTUATION = 30;
2238
 
2239
  /**
2240
   * Po = Punctuation, Other (Informative).
2241
   *
2242
   * @since 1.1
2243
   */
2244
  public static final byte OTHER_PUNCTUATION = 24;
2245
 
2246
  /**
2247
   * Sm = Symbol, Math (Informative).
2248
   *
2249
   * @since 1.1
2250
   */
2251
  public static final byte MATH_SYMBOL = 25;
2252
 
2253
  /**
2254
   * Sc = Symbol, Currency (Informative).
2255
   *
2256
   * @since 1.1
2257
   */
2258
  public static final byte CURRENCY_SYMBOL = 26;
2259
 
2260
  /**
2261
   * Sk = Symbol, Modifier (Informative).
2262
   *
2263
   * @since 1.1
2264
   */
2265
  public static final byte MODIFIER_SYMBOL = 27;
2266
 
2267
  /**
2268
   * So = Symbol, Other (Informative).
2269
   *
2270
   * @since 1.1
2271
   */
2272
  public static final byte OTHER_SYMBOL = 28;
2273
 
2274
  /**
2275
   * Undefined bidirectional character type. Undefined char values have
2276
   * undefined directionality in the Unicode specification.
2277
   *
2278
   * @since 1.4
2279
   */
2280
  public static final byte DIRECTIONALITY_UNDEFINED = -1;
2281
 
2282
  /**
2283
   * Strong bidirectional character type "L".
2284
   *
2285
   * @since 1.4
2286
   */
2287
  public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
2288
 
2289
  /**
2290
   * Strong bidirectional character type "R".
2291
   *
2292
   * @since 1.4
2293
   */
2294
  public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
2295
 
2296
  /**
2297
   * Strong bidirectional character type "AL".
2298
   *
2299
   * @since 1.4
2300
   */
2301
  public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
2302
 
2303
  /**
2304
   * Weak bidirectional character type "EN".
2305
   *
2306
   * @since 1.4
2307
   */
2308
  public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
2309
 
2310
  /**
2311
   * Weak bidirectional character type "ES".
2312
   *
2313
   * @since 1.4
2314
   */
2315
  public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
2316
 
2317
  /**
2318
   * Weak bidirectional character type "ET".
2319
   *
2320
   * @since 1.4
2321
   */
2322
  public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
2323
 
2324
  /**
2325
   * Weak bidirectional character type "AN".
2326
   *
2327
   * @since 1.4
2328
   */
2329
  public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
2330
 
2331
  /**
2332
   * Weak bidirectional character type "CS".
2333
   *
2334
   * @since 1.4
2335
   */
2336
  public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
2337
 
2338
  /**
2339
   * Weak bidirectional character type "NSM".
2340
   *
2341
   * @since 1.4
2342
   */
2343
  public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
2344
 
2345
  /**
2346
   * Weak bidirectional character type "BN".
2347
   *
2348
   * @since 1.4
2349
   */
2350
  public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
2351
 
2352
  /**
2353
   * Neutral bidirectional character type "B".
2354
   *
2355
   * @since 1.4
2356
   */
2357
  public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
2358
 
2359
  /**
2360
   * Neutral bidirectional character type "S".
2361
   *
2362
   * @since 1.4
2363
   */
2364
  public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
2365
 
2366
  /**
2367
   * Strong bidirectional character type "WS".
2368
   *
2369
   * @since 1.4
2370
   */
2371
  public static final byte DIRECTIONALITY_WHITESPACE = 12;
2372
 
2373
  /**
2374
   * Neutral bidirectional character type "ON".
2375
   *
2376
   * @since 1.4
2377
   */
2378
  public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
2379
 
2380
  /**
2381
   * Strong bidirectional character type "LRE".
2382
   *
2383
   * @since 1.4
2384
   */
2385
  public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
2386
 
2387
  /**
2388
   * Strong bidirectional character type "LRO".
2389
   *
2390
   * @since 1.4
2391
   */
2392
  public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
2393
 
2394
  /**
2395
   * Strong bidirectional character type "RLE".
2396
   *
2397
   * @since 1.4
2398
   */
2399
  public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
2400
 
2401
  /**
2402
   * Strong bidirectional character type "RLO".
2403
   *
2404
   * @since 1.4
2405
   */
2406
  public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
2407
 
2408
  /**
2409
   * Weak bidirectional character type "PDF".
2410
   *
2411
   * @since 1.4
2412
   */
2413
  public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
2414
 
2415
  /**
2416
   * Stores unicode block offset lookup table. Exploit package visibility of
2417
   * String.value to avoid copying the array.
2418
   * @see #readCodePoint(int)
2419
   * @see CharData#BLOCKS
2420
   */
2421
  private static final char[][] blocks =
2422
    new char[][]{
2423
                 String.zeroBasedStringValue(CharData.BLOCKS[0]),
2424
                 String.zeroBasedStringValue(CharData.BLOCKS[1]),
2425
                 String.zeroBasedStringValue(CharData.BLOCKS[2]),
2426
                 String.zeroBasedStringValue(CharData.BLOCKS[3]),
2427
                 String.zeroBasedStringValue(CharData.BLOCKS[4]),
2428
                 String.zeroBasedStringValue(CharData.BLOCKS[5]),
2429
                 String.zeroBasedStringValue(CharData.BLOCKS[6]),
2430
                 String.zeroBasedStringValue(CharData.BLOCKS[7]),
2431
                 String.zeroBasedStringValue(CharData.BLOCKS[8]),
2432
                 String.zeroBasedStringValue(CharData.BLOCKS[9]),
2433
                 String.zeroBasedStringValue(CharData.BLOCKS[10]),
2434
                 String.zeroBasedStringValue(CharData.BLOCKS[11]),
2435
                 String.zeroBasedStringValue(CharData.BLOCKS[12]),
2436
                 String.zeroBasedStringValue(CharData.BLOCKS[13]),
2437
                 String.zeroBasedStringValue(CharData.BLOCKS[14]),
2438
                 String.zeroBasedStringValue(CharData.BLOCKS[15]),
2439
                 String.zeroBasedStringValue(CharData.BLOCKS[16])};
2440
 
2441
  /**
2442
   * Stores unicode attribute offset lookup table. Exploit package visibility
2443
   * of String.value to avoid copying the array.
2444
   * @see CharData#DATA
2445
   */
2446
  private static final char[][] data =
2447
    new char[][]{
2448
                 String.zeroBasedStringValue(CharData.DATA[0]),
2449
                 String.zeroBasedStringValue(CharData.DATA[1]),
2450
                 String.zeroBasedStringValue(CharData.DATA[2]),
2451
                 String.zeroBasedStringValue(CharData.DATA[3]),
2452
                 String.zeroBasedStringValue(CharData.DATA[4]),
2453
                 String.zeroBasedStringValue(CharData.DATA[5]),
2454
                 String.zeroBasedStringValue(CharData.DATA[6]),
2455
                 String.zeroBasedStringValue(CharData.DATA[7]),
2456
                 String.zeroBasedStringValue(CharData.DATA[8]),
2457
                 String.zeroBasedStringValue(CharData.DATA[9]),
2458
                 String.zeroBasedStringValue(CharData.DATA[10]),
2459
                 String.zeroBasedStringValue(CharData.DATA[11]),
2460
                 String.zeroBasedStringValue(CharData.DATA[12]),
2461
                 String.zeroBasedStringValue(CharData.DATA[13]),
2462
                 String.zeroBasedStringValue(CharData.DATA[14]),
2463
                 String.zeroBasedStringValue(CharData.DATA[15]),
2464
                 String.zeroBasedStringValue(CharData.DATA[16])};
2465
 
2466
  /**
2467
   * Stores unicode numeric value attribute table. Exploit package visibility
2468
   * of String.value to avoid copying the array.
2469
   * @see CharData#NUM_VALUE
2470
   */
2471
  private static final char[][] numValue =
2472
    new char[][]{
2473
                 String.zeroBasedStringValue(CharData.NUM_VALUE[0]),
2474
                 String.zeroBasedStringValue(CharData.NUM_VALUE[1]),
2475
                 String.zeroBasedStringValue(CharData.NUM_VALUE[2]),
2476
                 String.zeroBasedStringValue(CharData.NUM_VALUE[3]),
2477
                 String.zeroBasedStringValue(CharData.NUM_VALUE[4]),
2478
                 String.zeroBasedStringValue(CharData.NUM_VALUE[5]),
2479
                 String.zeroBasedStringValue(CharData.NUM_VALUE[6]),
2480
                 String.zeroBasedStringValue(CharData.NUM_VALUE[7]),
2481
                 String.zeroBasedStringValue(CharData.NUM_VALUE[8]),
2482
                 String.zeroBasedStringValue(CharData.NUM_VALUE[9]),
2483
                 String.zeroBasedStringValue(CharData.NUM_VALUE[10]),
2484
                 String.zeroBasedStringValue(CharData.NUM_VALUE[11]),
2485
                 String.zeroBasedStringValue(CharData.NUM_VALUE[12]),
2486
                 String.zeroBasedStringValue(CharData.NUM_VALUE[13]),
2487
                 String.zeroBasedStringValue(CharData.NUM_VALUE[14]),
2488
                 String.zeroBasedStringValue(CharData.NUM_VALUE[15]),
2489
                 String.zeroBasedStringValue(CharData.NUM_VALUE[16])};
2490
 
2491
  /**
2492
   * Stores unicode uppercase attribute table. Exploit package visibility
2493
   * of String.value to avoid copying the array.
2494
   * @see CharData#UPPER
2495
   */
2496
  private static final char[][] upper =
2497
    new char[][]{
2498
                 String.zeroBasedStringValue(CharData.UPPER[0]),
2499
                 String.zeroBasedStringValue(CharData.UPPER[1]),
2500
                 String.zeroBasedStringValue(CharData.UPPER[2]),
2501
                 String.zeroBasedStringValue(CharData.UPPER[3]),
2502
                 String.zeroBasedStringValue(CharData.UPPER[4]),
2503
                 String.zeroBasedStringValue(CharData.UPPER[5]),
2504
                 String.zeroBasedStringValue(CharData.UPPER[6]),
2505
                 String.zeroBasedStringValue(CharData.UPPER[7]),
2506
                 String.zeroBasedStringValue(CharData.UPPER[8]),
2507
                 String.zeroBasedStringValue(CharData.UPPER[9]),
2508
                 String.zeroBasedStringValue(CharData.UPPER[10]),
2509
                 String.zeroBasedStringValue(CharData.UPPER[11]),
2510
                 String.zeroBasedStringValue(CharData.UPPER[12]),
2511
                 String.zeroBasedStringValue(CharData.UPPER[13]),
2512
                 String.zeroBasedStringValue(CharData.UPPER[14]),
2513
                 String.zeroBasedStringValue(CharData.UPPER[15]),
2514
                 String.zeroBasedStringValue(CharData.UPPER[16])};
2515
 
2516
  /**
2517
   * Stores unicode lowercase attribute table. Exploit package visibility
2518
   * of String.value to avoid copying the array.
2519
   * @see CharData#LOWER
2520
   */
2521
  private static final char[][] lower =
2522
    new char[][]{
2523
                 String.zeroBasedStringValue(CharData.LOWER[0]),
2524
                 String.zeroBasedStringValue(CharData.LOWER[1]),
2525
                 String.zeroBasedStringValue(CharData.LOWER[2]),
2526
                 String.zeroBasedStringValue(CharData.LOWER[3]),
2527
                 String.zeroBasedStringValue(CharData.LOWER[4]),
2528
                 String.zeroBasedStringValue(CharData.LOWER[5]),
2529
                 String.zeroBasedStringValue(CharData.LOWER[6]),
2530
                 String.zeroBasedStringValue(CharData.LOWER[7]),
2531
                 String.zeroBasedStringValue(CharData.LOWER[8]),
2532
                 String.zeroBasedStringValue(CharData.LOWER[9]),
2533
                 String.zeroBasedStringValue(CharData.LOWER[10]),
2534
                 String.zeroBasedStringValue(CharData.LOWER[11]),
2535
                 String.zeroBasedStringValue(CharData.LOWER[12]),
2536
                 String.zeroBasedStringValue(CharData.LOWER[13]),
2537
                 String.zeroBasedStringValue(CharData.LOWER[14]),
2538
                 String.zeroBasedStringValue(CharData.LOWER[15]),
2539
                 String.zeroBasedStringValue(CharData.LOWER[16])};
2540
 
2541
  /**
2542
   * Stores unicode direction attribute table. Exploit package visibility
2543
   * of String.value to avoid copying the array.
2544
   * @see CharData#DIRECTION
2545
   */
2546
  // Package visible for use by String.
2547
  static final char[][] direction =
2548
    new char[][]{
2549
                 String.zeroBasedStringValue(CharData.DIRECTION[0]),
2550
                 String.zeroBasedStringValue(CharData.DIRECTION[1]),
2551
                 String.zeroBasedStringValue(CharData.DIRECTION[2]),
2552
                 String.zeroBasedStringValue(CharData.DIRECTION[3]),
2553
                 String.zeroBasedStringValue(CharData.DIRECTION[4]),
2554
                 String.zeroBasedStringValue(CharData.DIRECTION[5]),
2555
                 String.zeroBasedStringValue(CharData.DIRECTION[6]),
2556
                 String.zeroBasedStringValue(CharData.DIRECTION[7]),
2557
                 String.zeroBasedStringValue(CharData.DIRECTION[8]),
2558
                 String.zeroBasedStringValue(CharData.DIRECTION[9]),
2559
                 String.zeroBasedStringValue(CharData.DIRECTION[10]),
2560
                 String.zeroBasedStringValue(CharData.DIRECTION[11]),
2561
                 String.zeroBasedStringValue(CharData.DIRECTION[12]),
2562
                 String.zeroBasedStringValue(CharData.DIRECTION[13]),
2563
                 String.zeroBasedStringValue(CharData.DIRECTION[14]),
2564
                 String.zeroBasedStringValue(CharData.DIRECTION[15]),
2565
                 String.zeroBasedStringValue(CharData.DIRECTION[16])};
2566
 
2567
  /**
2568
   * Stores unicode titlecase table. Exploit package visibility of
2569
   * String.value to avoid copying the array.
2570
   * @see CharData#TITLE
2571
   */
2572
  private static final char[] title = String.zeroBasedStringValue(CharData.TITLE);
2573
 
2574
  /**
2575
   * Mask for grabbing the type out of the contents of data.
2576
   * @see CharData#DATA
2577
   */
2578
  private static final int TYPE_MASK = 0x1F;
2579
 
2580
  /**
2581
   * Mask for grabbing the non-breaking space flag out of the contents of
2582
   * data.
2583
   * @see CharData#DATA
2584
   */
2585
  private static final int NO_BREAK_MASK = 0x20;
2586
 
2587
  /**
2588
   * Mask for grabbing the mirrored directionality flag out of the contents
2589
   * of data.
2590
   * @see CharData#DATA
2591
   */
2592
  private static final int MIRROR_MASK = 0x40;
2593
 
2594
  /**
2595
   * Grabs an attribute offset from the Unicode attribute database. The lower
2596
   * 5 bits are the character type, the next 2 bits are flags, and the top
2597
   * 9 bits are the offset into the attribute tables.
2598
   *
2599
   * @param codePoint the character to look up
2600
   * @return the character's attribute offset and type
2601
   * @see #TYPE_MASK
2602
   * @see #NO_BREAK_MASK
2603
   * @see #MIRROR_MASK
2604
   * @see CharData#DATA
2605
   * @see CharData#SHIFT
2606
   */
2607
  // Package visible for use in String.
2608
  static char readCodePoint(int codePoint)
2609
  {
2610
    int plane = codePoint >>> 16;
2611
    char offset = (char) (codePoint & 0xffff);
2612
    return data[plane][(char) (blocks[plane][offset >> CharData.SHIFT[plane]] + offset)];
2613
  }
2614
 
2615
  /**
2616
   * Wraps up a character.
2617
   *
2618
   * @param value the character to wrap
2619
   */
2620
  public Character(char value)
2621
  {
2622
    this.value = value;
2623
  }
2624
 
2625
  /**
2626
   * Returns the character which has been wrapped by this class.
2627
   *
2628
   * @return the character wrapped
2629
   */
2630
  public char charValue()
2631
  {
2632
    return value;
2633
  }
2634
 
2635
  /**
2636
   * Returns the numerical value (unsigned) of the wrapped character.
2637
   * Range of returned values: 0x0000-0xFFFF.
2638
   *
2639
   * @return the value of the wrapped character
2640
   */
2641
  public int hashCode()
2642
  {
2643
    return value;
2644
  }
2645
 
2646
  /**
2647
   * Determines if an object is equal to this object. This is only true for
2648
   * another Character object wrapping the same value.
2649
   *
2650
   * @param o object to compare
2651
   * @return true if o is a Character with the same value
2652
   */
2653
  public boolean equals(Object o)
2654
  {
2655
    return o instanceof Character && value == ((Character) o).value;
2656
  }
2657
 
2658
  /**
2659
   * Converts the wrapped character into a String.
2660
   *
2661
   * @return a String containing one character -- the wrapped character
2662
   *         of this instance
2663
   */
2664
  public String toString()
2665
  {
2666
    // Package constructor avoids an array copy.
2667
    return new String(new char[] { value }, 0, 1, true);
2668
  }
2669
 
2670
  /**
2671
   * Returns a String of length 1 representing the specified character.
2672
   *
2673
   * @param ch the character to convert
2674
   * @return a String containing the character
2675
   * @since 1.4
2676
   */
2677
  public static String toString(char ch)
2678
  {
2679
    // Package constructor avoids an array copy.
2680
    return new String(new char[] { ch }, 0, 1, true);
2681
  }
2682
 
2683
  /**
2684
   * Determines if a character is a Unicode lowercase letter. For example,
2685
   * <code>'a'</code> is lowercase.  Returns true if getType() returns
2686
   * LOWERCASE_LETTER.
2687
   * <br>
2688
   * lowercase = [Ll]
2689
   *
2690
   * @param ch character to test
2691
   * @return true if ch is a Unicode lowercase letter, else false
2692
   * @see #isUpperCase(char)
2693
   * @see #isTitleCase(char)
2694
   * @see #toLowerCase(char)
2695
   * @see #getType(char)
2696
   */
2697
  public static boolean isLowerCase(char ch)
2698
  {
2699
    return isLowerCase((int)ch);
2700
  }
2701
 
2702
  /**
2703
   * Determines if a character is a Unicode lowercase letter. For example,
2704
   * <code>'a'</code> is lowercase.  Returns true if getType() returns
2705
   * LOWERCASE_LETTER.
2706
   * <br>
2707
   * lowercase = [Ll]
2708
   *
2709
   * @param codePoint character to test
2710
   * @return true if ch is a Unicode lowercase letter, else false
2711
   * @see #isUpperCase(char)
2712
   * @see #isTitleCase(char)
2713
   * @see #toLowerCase(char)
2714
   * @see #getType(char)
2715
   *
2716
   * @since 1.5
2717
   */
2718
  public static boolean isLowerCase(int codePoint)
2719
  {
2720
    return getType(codePoint) == LOWERCASE_LETTER;
2721
  }
2722
 
2723
  /**
2724
   * Determines if a character is a Unicode uppercase letter. For example,
2725
   * <code>'A'</code> is uppercase.  Returns true if getType() returns
2726
   * UPPERCASE_LETTER.
2727
   * <br>
2728
   * uppercase = [Lu]
2729
   *
2730
   * @param ch character to test
2731
   * @return true if ch is a Unicode uppercase letter, else false
2732
   * @see #isLowerCase(char)
2733
   * @see #isTitleCase(char)
2734
   * @see #toUpperCase(char)
2735
   * @see #getType(char)
2736
   */
2737
  public static boolean isUpperCase(char ch)
2738
  {
2739
    return isUpperCase((int)ch);
2740
  }
2741
 
2742
  /**
2743
   * Determines if a character is a Unicode uppercase letter. For example,
2744
   * <code>'A'</code> is uppercase.  Returns true if getType() returns
2745
   * UPPERCASE_LETTER.
2746
   * <br>
2747
   * uppercase = [Lu]
2748
   *
2749
   * @param codePoint character to test
2750
   * @return true if ch is a Unicode uppercase letter, else false
2751
   * @see #isLowerCase(char)
2752
   * @see #isTitleCase(char)
2753
   * @see #toUpperCase(char)
2754
   * @see #getType(char)
2755
   *
2756
   * @since 1.5
2757
   */
2758
  public static boolean isUpperCase(int codePoint)
2759
  {
2760
    return getType(codePoint) == UPPERCASE_LETTER;
2761
  }
2762
 
2763
  /**
2764
   * Determines if a character is a Unicode titlecase letter. For example,
2765
   * the character "Lj" (Latin capital L with small letter j) is titlecase.
2766
   * True if getType() returns TITLECASE_LETTER.
2767
   * <br>
2768
   * titlecase = [Lt]
2769
   *
2770
   * @param ch character to test
2771
   * @return true if ch is a Unicode titlecase letter, else false
2772
   * @see #isLowerCase(char)
2773
   * @see #isUpperCase(char)
2774
   * @see #toTitleCase(char)
2775
   * @see #getType(char)
2776
   */
2777
  public static boolean isTitleCase(char ch)
2778
  {
2779
    return isTitleCase((int)ch);
2780
  }
2781
 
2782
  /**
2783
   * Determines if a character is a Unicode titlecase letter. For example,
2784
   * the character "Lj" (Latin capital L with small letter j) is titlecase.
2785
   * True if getType() returns TITLECASE_LETTER.
2786
   * <br>
2787
   * titlecase = [Lt]
2788
   *
2789
   * @param codePoint character to test
2790
   * @return true if ch is a Unicode titlecase letter, else false
2791
   * @see #isLowerCase(char)
2792
   * @see #isUpperCase(char)
2793
   * @see #toTitleCase(char)
2794
   * @see #getType(char)
2795
   *
2796
   * @since 1.5
2797
   */
2798
  public static boolean isTitleCase(int codePoint)
2799
  {
2800
    return getType(codePoint) == TITLECASE_LETTER;
2801
  }
2802
 
2803
 
2804
  /**
2805
   * Determines if a character is a Unicode decimal digit. For example,
2806
   * <code>'0'</code> is a digit.  A character is a Unicode digit if
2807
   * getType() returns DECIMAL_DIGIT_NUMBER.
2808
   * <br>
2809
   * Unicode decimal digit = [Nd]
2810
   *
2811
   * @param ch character to test
2812
   * @return true if ch is a Unicode decimal digit, else false
2813
   * @see #digit(char, int)
2814
   * @see #forDigit(int, int)
2815
   * @see #getType(char)
2816
   */
2817
  public static boolean isDigit(char ch)
2818
  {
2819
    return isDigit((int)ch);
2820
  }
2821
 
2822
  /**
2823
   * Determines if a character is a Unicode decimal digit. For example,
2824
   * <code>'0'</code> is a digit. A character is a Unicode digit if
2825
   * getType() returns DECIMAL_DIGIT_NUMBER.
2826
   * <br>
2827
   * Unicode decimal digit = [Nd]
2828
   *
2829
   * @param codePoint character to test
2830
   * @return true if ch is a Unicode decimal digit, else false
2831
   * @see #digit(char, int)
2832
   * @see #forDigit(int, int)
2833
   * @see #getType(char)
2834
   *
2835
   * @since 1.5
2836
   */
2837
 
2838
  public static boolean isDigit(int codePoint)
2839
  {
2840
    return getType(codePoint) == DECIMAL_DIGIT_NUMBER;
2841
  }
2842
 
2843
  /**
2844
   * Determines if a character is part of the Unicode Standard. This is an
2845
   * evolving standard, but covers every character in the data file.
2846
   * <br>
2847
   * defined = not [Cn]
2848
   *
2849
   * @param ch character to test
2850
   * @return true if ch is a Unicode character, else false
2851
   * @see #isDigit(char)
2852
   * @see #isLetter(char)
2853
   * @see #isLetterOrDigit(char)
2854
   * @see #isLowerCase(char)
2855
   * @see #isTitleCase(char)
2856
   * @see #isUpperCase(char)
2857
   */
2858
  public static boolean isDefined(char ch)
2859
  {
2860
    return isDefined((int)ch);
2861
  }
2862
 
2863
  /**
2864
   * Determines if a character is part of the Unicode Standard. This is an
2865
   * evolving standard, but covers every character in the data file.
2866
   * <br>
2867
   * defined = not [Cn]
2868
   *
2869
   * @param codePoint character to test
2870
   * @return true if ch is a Unicode character, else false
2871
   * @see #isDigit(char)
2872
   * @see #isLetter(char)
2873
   * @see #isLetterOrDigit(char)
2874
   * @see #isLowerCase(char)
2875
   * @see #isTitleCase(char)
2876
   * @see #isUpperCase(char)
2877
   *
2878
   * @since 1.5
2879
   */
2880
  public static boolean isDefined(int codePoint)
2881
  {
2882
    return getType(codePoint) != UNASSIGNED;
2883
  }
2884
 
2885
  /**
2886
   * Determines if a character is a Unicode letter. Not all letters have case,
2887
   * so this may return true when isLowerCase and isUpperCase return false.
2888
   * A character is a Unicode letter if getType() returns one of
2889
   * UPPERCASE_LETTER, LOWERCASE_LETTER, TITLECASE_LETTER, MODIFIER_LETTER,
2890
   * or OTHER_LETTER.
2891
   * <br>
2892
   * letter = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]
2893
   *
2894
   * @param ch character to test
2895
   * @return true if ch is a Unicode letter, else false
2896
   * @see #isDigit(char)
2897
   * @see #isJavaIdentifierStart(char)
2898
   * @see #isJavaLetter(char)
2899
   * @see #isJavaLetterOrDigit(char)
2900
   * @see #isLetterOrDigit(char)
2901
   * @see #isLowerCase(char)
2902
   * @see #isTitleCase(char)
2903
   * @see #isUnicodeIdentifierStart(char)
2904
   * @see #isUpperCase(char)
2905
   */
2906
  public static boolean isLetter(char ch)
2907
  {
2908
    return isLetter((int)ch);
2909
  }
2910
 
2911
  /**
2912
   * Determines if a character is a Unicode letter. Not all letters have case,
2913
   * so this may return true when isLowerCase and isUpperCase return false.
2914
   * A character is a Unicode letter if getType() returns one of
2915
   * UPPERCASE_LETTER, LOWERCASE_LETTER, TITLECASE_LETTER, MODIFIER_LETTER,
2916
   * or OTHER_LETTER.
2917
   * <br>
2918
   * letter = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]
2919
   *
2920
   * @param codePoint character to test
2921
   * @return true if ch is a Unicode letter, else false
2922
   * @see #isDigit(char)
2923
   * @see #isJavaIdentifierStart(char)
2924
   * @see #isJavaLetter(char)
2925
   * @see #isJavaLetterOrDigit(char)
2926
   * @see #isLetterOrDigit(char)
2927
   * @see #isLowerCase(char)
2928
   * @see #isTitleCase(char)
2929
   * @see #isUnicodeIdentifierStart(char)
2930
   * @see #isUpperCase(char)
2931
   *
2932
   * @since 1.5
2933
   */
2934
  public static boolean isLetter(int codePoint)
2935
  {
2936
    return ((1 << getType(codePoint))
2937
        & ((1 << UPPERCASE_LETTER)
2938
            | (1 << LOWERCASE_LETTER)
2939
            | (1 << TITLECASE_LETTER)
2940
            | (1 << MODIFIER_LETTER)
2941
            | (1 << OTHER_LETTER))) != 0;
2942
  }
2943
  /**
2944
   * Returns the index into the given CharSequence that is offset
2945
   * <code>codePointOffset</code> code points from <code>index</code>.
2946
   * @param seq the CharSequence
2947
   * @param index the start position in the CharSequence
2948
   * @param codePointOffset the number of code points offset from the start
2949
   * position
2950
   * @return the index into the CharSequence that is codePointOffset code
2951
   * points offset from index
2952
   *
2953
   * @throws NullPointerException if seq is null
2954
   * @throws IndexOutOfBoundsException if index is negative or greater than the
2955
   * length of the sequence.
2956
   * @throws IndexOutOfBoundsException if codePointOffset is positive and the
2957
   * subsequence from index to the end of seq has fewer than codePointOffset
2958
   * code points
2959
   * @throws IndexOutOfBoundsException if codePointOffset is negative and the
2960
   * subsequence from the start of seq to index has fewer than
2961
   * (-codePointOffset) code points
2962
   * @since 1.5
2963
   */
2964
  public static int offsetByCodePoints(CharSequence seq,
2965
                                       int index,
2966
                                       int codePointOffset)
2967
  {
2968
    int len = seq.length();
2969
    if (index < 0 || index > len)
2970
      throw new IndexOutOfBoundsException();
2971
 
2972
    int numToGo = codePointOffset;
2973
    int offset = index;
2974
    int adjust = 1;
2975
    if (numToGo >= 0)
2976
      {
2977
        for (; numToGo > 0; offset++)
2978
          {
2979
            numToGo--;
2980
            if (Character.isHighSurrogate(seq.charAt(offset))
2981
                && (offset + 1) < len
2982
                && Character.isLowSurrogate(seq.charAt(offset + 1)))
2983
              offset++;
2984
          }
2985
        return offset;
2986
      }
2987
    else
2988
      {
2989
        numToGo *= -1;
2990
        for (; numToGo > 0;)
2991
          {
2992
            numToGo--;
2993
            offset--;
2994
            if (Character.isLowSurrogate(seq.charAt(offset))
2995
                && (offset - 1) >= 0
2996
                && Character.isHighSurrogate(seq.charAt(offset - 1)))
2997
              offset--;
2998
          }
2999
        return offset;
3000
      }
3001
  }
3002
 
3003
  /**
3004
   * Returns the index into the given char subarray that is offset
3005
   * <code>codePointOffset</code> code points from <code>index</code>.
3006
   * @param a the char array
3007
   * @param start the start index of the subarray
3008
   * @param count the length of the subarray
3009
   * @param index the index to be offset
3010
   * @param codePointOffset the number of code points offset from <code>index
3011
   * </code>
3012
   * @return the index into the char array
3013
   *
3014
   * @throws NullPointerException if a is null
3015
   * @throws IndexOutOfBoundsException if start or count is negative or if
3016
   * start + count is greater than the length of the array
3017
   * @throws IndexOutOfBoundsException if index is less than start or larger
3018
   * than start + count
3019
   * @throws IndexOutOfBoundsException if codePointOffset is positive and the
3020
   * subarray from index to start + count - 1 has fewer than codePointOffset
3021
   * code points.
3022
   * @throws IndexOutOfBoundsException if codePointOffset is negative and the
3023
   * subarray from start to index - 1 has fewer than (-codePointOffset) code
3024
   * points
3025
   *
3026
   * @since 1.5
3027
   */
3028
  public static int offsetByCodePoints(char[] a,
3029
                                       int start,
3030
                                       int count,
3031
                                       int index,
3032
                                       int codePointOffset)
3033
  {
3034
    int len = a.length;
3035
    int end = start + count;
3036
    if (start < 0 || count < 0 || end > len || index < start || index > end)
3037
      throw new IndexOutOfBoundsException();
3038
 
3039
    int numToGo = codePointOffset;
3040
    int offset = index;
3041
    int adjust = 1;
3042
    if (numToGo >= 0)
3043
      {
3044
        for (; numToGo > 0; offset++)
3045
          {
3046
            numToGo--;
3047
            if (Character.isHighSurrogate(a[offset])
3048
                && (offset + 1) < len
3049
                && Character.isLowSurrogate(a[offset + 1]))
3050
              offset++;
3051
          }
3052
        return offset;
3053
      }
3054
    else
3055
      {
3056
        numToGo *= -1;
3057
        for (; numToGo > 0;)
3058
          {
3059
            numToGo--;
3060
            offset--;
3061
            if (Character.isLowSurrogate(a[offset])
3062
                && (offset - 1) >= 0
3063
                && Character.isHighSurrogate(a[offset - 1]))
3064
              offset--;
3065
            if (offset < start)
3066
              throw new IndexOutOfBoundsException();
3067
          }
3068
        return offset;
3069
      }
3070
 
3071
  }
3072
 
3073
  /**
3074
   * Returns the number of Unicode code points in the specified range of the
3075
   * given CharSequence.  The first char in the range is at position
3076
   * beginIndex and the last one is at position endIndex - 1.  Paired
3077
   * surrogates (supplementary characters are represented by a pair of chars -
3078
   * one from the high surrogates and one from the low surrogates)
3079
   * count as just one code point.
3080
   * @param seq the CharSequence to inspect
3081
   * @param beginIndex the beginning of the range
3082
   * @param endIndex the end of the range
3083
   * @return the number of Unicode code points in the given range of the
3084
   * sequence
3085
   * @throws NullPointerException if seq is null
3086
   * @throws IndexOutOfBoundsException if beginIndex is negative, endIndex is
3087
   * larger than the length of seq, or if beginIndex is greater than endIndex.
3088
   * @since 1.5
3089
   */
3090
  public static int codePointCount(CharSequence seq, int beginIndex,
3091
                                   int endIndex)
3092
  {
3093
    int len = seq.length();
3094
    if (beginIndex < 0 || endIndex > len || beginIndex > endIndex)
3095
      throw new IndexOutOfBoundsException();
3096
 
3097
    int count = 0;
3098
    for (int i = beginIndex; i < endIndex; i++)
3099
      {
3100
        count++;
3101
        // If there is a pairing, count it only once.
3102
        if (isHighSurrogate(seq.charAt(i)) && (i + 1) < endIndex
3103
            && isLowSurrogate(seq.charAt(i + 1)))
3104
          i ++;
3105
      }
3106
    return count;
3107
  }
3108
 
3109
  /**
3110
   * Returns the number of Unicode code points in the specified range of the
3111
   * given char array.  The first char in the range is at position
3112
   * offset and the length of the range is count.  Paired surrogates
3113
   * (supplementary characters are represented by a pair of chars -
3114
   * one from the high surrogates and one from the low surrogates)
3115
   * count as just one code point.
3116
   * @param a the char array to inspect
3117
   * @param offset the beginning of the range
3118
   * @param count the length of the range
3119
   * @return the number of Unicode code points in the given range of the
3120
   * array
3121
   * @throws NullPointerException if a is null
3122
   * @throws IndexOutOfBoundsException if offset or count is negative or if
3123
   * offset + countendIndex is larger than the length of a.
3124
   * @since 1.5
3125
   */
3126
  public static int codePointCount(char[] a, int offset,
3127
                                   int count)
3128
  {
3129
    int len = a.length;
3130
    int end = offset + count;
3131
    if (offset < 0 || count < 0 || end > len)
3132
      throw new IndexOutOfBoundsException();
3133
 
3134
    int counter = 0;
3135
    for (int i = offset; i < end; i++)
3136
      {
3137
        counter++;
3138
        // If there is a pairing, count it only once.
3139
        if (isHighSurrogate(a[i]) && (i + 1) < end
3140
            && isLowSurrogate(a[i + 1]))
3141
          i ++;
3142
      }
3143
    return counter;
3144
  }
3145
 
3146
  /**
3147
   * Determines if a character is a Unicode letter or a Unicode digit. This
3148
   * is the combination of isLetter and isDigit.
3149
   * <br>
3150
   * letter or digit = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nd]
3151
   *
3152
   * @param ch character to test
3153
   * @return true if ch is a Unicode letter or a Unicode digit, else false
3154
   * @see #isDigit(char)
3155
   * @see #isJavaIdentifierPart(char)
3156
   * @see #isJavaLetter(char)
3157
   * @see #isJavaLetterOrDigit(char)
3158
   * @see #isLetter(char)
3159
   * @see #isUnicodeIdentifierPart(char)
3160
   */
3161
  public static boolean isLetterOrDigit(char ch)
3162
  {
3163
    return isLetterOrDigit((int)ch);
3164
  }
3165
 
3166
  /**
3167
   * Determines if a character is a Unicode letter or a Unicode digit. This
3168
   * is the combination of isLetter and isDigit.
3169
   * <br>
3170
   * letter or digit = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nd]
3171
   *
3172
   * @param codePoint character to test
3173
   * @return true if ch is a Unicode letter or a Unicode digit, else false
3174
   * @see #isDigit(char)
3175
   * @see #isJavaIdentifierPart(char)
3176
   * @see #isJavaLetter(char)
3177
   * @see #isJavaLetterOrDigit(char)
3178
   * @see #isLetter(char)
3179
   * @see #isUnicodeIdentifierPart(char)
3180
   *
3181
   * @since 1.5
3182
   */
3183
  public static boolean isLetterOrDigit(int codePoint)
3184
  {
3185
    return ((1 << getType(codePoint))
3186
        & ((1 << UPPERCASE_LETTER)
3187
           | (1 << LOWERCASE_LETTER)
3188
           | (1 << TITLECASE_LETTER)
3189
           | (1 << MODIFIER_LETTER)
3190
           | (1 << OTHER_LETTER)
3191
           | (1 << DECIMAL_DIGIT_NUMBER))) != 0;
3192
  }
3193
 
3194
  /**
3195
   * Determines if a character can start a Java identifier. This is the
3196
   * combination of isLetter, any character where getType returns
3197
   * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation
3198
   * (like '_').
3199
   *
3200
   * @param ch character to test
3201
   * @return true if ch can start a Java identifier, else false
3202
   * @deprecated Replaced by {@link #isJavaIdentifierStart(char)}
3203
   * @see #isJavaLetterOrDigit(char)
3204
   * @see #isJavaIdentifierStart(char)
3205
   * @see #isJavaIdentifierPart(char)
3206
   * @see #isLetter(char)
3207
   * @see #isLetterOrDigit(char)
3208
   * @see #isUnicodeIdentifierStart(char)
3209
   */
3210
  public static boolean isJavaLetter(char ch)
3211
  {
3212
    return isJavaIdentifierStart(ch);
3213
  }
3214
 
3215
  /**
3216
   * Determines if a character can follow the first letter in
3217
   * a Java identifier.  This is the combination of isJavaLetter (isLetter,
3218
   * type of LETTER_NUMBER, currency, connecting punctuation) and digit,
3219
   * numeric letter (like Roman numerals), combining marks, non-spacing marks,
3220
   * or isIdentifierIgnorable.
3221
   *
3222
   * @param ch character to test
3223
   * @return true if ch can follow the first letter in a Java identifier
3224
   * @deprecated Replaced by {@link #isJavaIdentifierPart(char)}
3225
   * @see #isJavaLetter(char)
3226
   * @see #isJavaIdentifierStart(char)
3227
   * @see #isJavaIdentifierPart(char)
3228
   * @see #isLetter(char)
3229
   * @see #isLetterOrDigit(char)
3230
   * @see #isUnicodeIdentifierPart(char)
3231
   * @see #isIdentifierIgnorable(char)
3232
   */
3233
  public static boolean isJavaLetterOrDigit(char ch)
3234
  {
3235
    return isJavaIdentifierPart(ch);
3236
  }
3237
 
3238
  /**
3239
   * Determines if a character can start a Java identifier. This is the
3240
   * combination of isLetter, any character where getType returns
3241
   * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation
3242
   * (like '_').
3243
   * <br>
3244
   * Java identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]
3245
   *
3246
   * @param ch character to test
3247
   * @return true if ch can start a Java identifier, else false
3248
   * @see #isJavaIdentifierPart(char)
3249
   * @see #isLetter(char)
3250
   * @see #isUnicodeIdentifierStart(char)
3251
   * @since 1.1
3252
   */
3253
  public static boolean isJavaIdentifierStart(char ch)
3254
  {
3255
    return isJavaIdentifierStart((int)ch);
3256
  }
3257
 
3258
  /**
3259
   * Determines if a character can start a Java identifier. This is the
3260
   * combination of isLetter, any character where getType returns
3261
   * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation
3262
   * (like '_').
3263
   * <br>
3264
   * Java identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]
3265
   *
3266
   * @param codePoint character to test
3267
   * @return true if ch can start a Java identifier, else false
3268
   * @see #isJavaIdentifierPart(char)
3269
   * @see #isLetter(char)
3270
   * @see #isUnicodeIdentifierStart(char)
3271
   * @since 1.5
3272
   */
3273
  public static boolean isJavaIdentifierStart(int codePoint)
3274
  {
3275
    return ((1 << getType(codePoint))
3276
            & ((1 << UPPERCASE_LETTER)
3277
               | (1 << LOWERCASE_LETTER)
3278
               | (1 << TITLECASE_LETTER)
3279
               | (1 << MODIFIER_LETTER)
3280
               | (1 << OTHER_LETTER)
3281
               | (1 << LETTER_NUMBER)
3282
               | (1 << CURRENCY_SYMBOL)
3283
               | (1 << CONNECTOR_PUNCTUATION))) != 0;
3284
  }
3285
 
3286
  /**
3287
   * Determines if a character can follow the first letter in
3288
   * a Java identifier.  This is the combination of isJavaLetter (isLetter,
3289
   * type of LETTER_NUMBER, currency, connecting punctuation) and digit,
3290
   * numeric letter (like Roman numerals), combining marks, non-spacing marks,
3291
   * or isIdentifierIgnorable.
3292
   * <br>
3293
   * Java identifier extender =
3294
   *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]|[Mn]|[Mc]|[Nd]|[Cf]
3295
   *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
3296
   *
3297
   * @param ch character to test
3298
   * @return true if ch can follow the first letter in a Java identifier
3299
   * @see #isIdentifierIgnorable(char)
3300
   * @see #isJavaIdentifierStart(char)
3301
   * @see #isLetterOrDigit(char)
3302
   * @see #isUnicodeIdentifierPart(char)
3303
   * @since 1.1
3304
   */
3305
  public static boolean isJavaIdentifierPart(char ch)
3306
  {
3307
    return isJavaIdentifierPart((int)ch);
3308
  }
3309
 
3310
  /**
3311
   * Determines if a character can follow the first letter in
3312
   * a Java identifier.  This is the combination of isJavaLetter (isLetter,
3313
   * type of LETTER_NUMBER, currency, connecting punctuation) and digit,
3314
   * numeric letter (like Roman numerals), combining marks, non-spacing marks,
3315
   * or isIdentifierIgnorable.
3316
   * <br>
3317
   * Java identifier extender =
3318
   *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]|[Mn]|[Mc]|[Nd]|[Cf]
3319
   *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
3320
   *
3321
   * @param codePoint character to test
3322
   * @return true if ch can follow the first letter in a Java identifier
3323
   * @see #isIdentifierIgnorable(char)
3324
   * @see #isJavaIdentifierStart(char)
3325
   * @see #isLetterOrDigit(char)
3326
   * @see #isUnicodeIdentifierPart(char)
3327
   * @since 1.5
3328
   */
3329
  public static boolean isJavaIdentifierPart(int codePoint)
3330
  {
3331
    int category = getType(codePoint);
3332
    return ((1 << category)
3333
            & ((1 << UPPERCASE_LETTER)
3334
               | (1 << LOWERCASE_LETTER)
3335
               | (1 << TITLECASE_LETTER)
3336
               | (1 << MODIFIER_LETTER)
3337
               | (1 << OTHER_LETTER)
3338
               | (1 << NON_SPACING_MARK)
3339
               | (1 << COMBINING_SPACING_MARK)
3340
               | (1 << DECIMAL_DIGIT_NUMBER)
3341
               | (1 << LETTER_NUMBER)
3342
               | (1 << CURRENCY_SYMBOL)
3343
               | (1 << CONNECTOR_PUNCTUATION)
3344
               | (1 << FORMAT))) != 0
3345
      || (category == CONTROL && isIdentifierIgnorable(codePoint));
3346
  }
3347
 
3348
  /**
3349
   * Determines if a character can start a Unicode identifier.  Only
3350
   * letters can start a Unicode identifier, but this includes characters
3351
   * in LETTER_NUMBER.
3352
   * <br>
3353
   * Unicode identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]
3354
   *
3355
   * @param ch character to test
3356
   * @return true if ch can start a Unicode identifier, else false
3357
   * @see #isJavaIdentifierStart(char)
3358
   * @see #isLetter(char)
3359
   * @see #isUnicodeIdentifierPart(char)
3360
   * @since 1.1
3361
   */
3362
  public static boolean isUnicodeIdentifierStart(char ch)
3363
  {
3364
    return isUnicodeIdentifierStart((int)ch);
3365
  }
3366
 
3367
  /**
3368
   * Determines if a character can start a Unicode identifier.  Only
3369
   * letters can start a Unicode identifier, but this includes characters
3370
   * in LETTER_NUMBER.
3371
   * <br>
3372
   * Unicode identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]
3373
   *
3374
   * @param codePoint character to test
3375
   * @return true if ch can start a Unicode identifier, else false
3376
   * @see #isJavaIdentifierStart(char)
3377
   * @see #isLetter(char)
3378
   * @see #isUnicodeIdentifierPart(char)
3379
   * @since 1.5
3380
   */
3381
  public static boolean isUnicodeIdentifierStart(int codePoint)
3382
  {
3383
    return ((1 << getType(codePoint))
3384
            & ((1 << UPPERCASE_LETTER)
3385
               | (1 << LOWERCASE_LETTER)
3386
               | (1 << TITLECASE_LETTER)
3387
               | (1 << MODIFIER_LETTER)
3388
               | (1 << OTHER_LETTER)
3389
               | (1 << LETTER_NUMBER))) != 0;
3390
  }
3391
 
3392
  /**
3393
   * Determines if a character can follow the first letter in
3394
   * a Unicode identifier. This includes letters, connecting punctuation,
3395
   * digits, numeric letters, combining marks, non-spacing marks, and
3396
   * isIdentifierIgnorable.
3397
   * <br>
3398
   * Unicode identifier extender =
3399
   *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Mn]|[Mc]|[Nd]|[Pc]|[Cf]|
3400
   *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
3401
   *
3402
   * @param ch character to test
3403
   * @return true if ch can follow the first letter in a Unicode identifier
3404
   * @see #isIdentifierIgnorable(char)
3405
   * @see #isJavaIdentifierPart(char)
3406
   * @see #isLetterOrDigit(char)
3407
   * @see #isUnicodeIdentifierStart(char)
3408
   * @since 1.1
3409
   */
3410
  public static boolean isUnicodeIdentifierPart(char ch)
3411
  {
3412
    return isUnicodeIdentifierPart((int)ch);
3413
  }
3414
 
3415
  /**
3416
   * Determines if a character can follow the first letter in
3417
   * a Unicode identifier. This includes letters, connecting punctuation,
3418
   * digits, numeric letters, combining marks, non-spacing marks, and
3419
   * isIdentifierIgnorable.
3420
   * <br>
3421
   * Unicode identifier extender =
3422
   *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Mn]|[Mc]|[Nd]|[Pc]|[Cf]|
3423
   *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
3424
   *
3425
   * @param codePoint character to test
3426
   * @return true if ch can follow the first letter in a Unicode identifier
3427
   * @see #isIdentifierIgnorable(char)
3428
   * @see #isJavaIdentifierPart(char)
3429
   * @see #isLetterOrDigit(char)
3430
   * @see #isUnicodeIdentifierStart(char)
3431
   * @since 1.5
3432
   */
3433
  public static boolean isUnicodeIdentifierPart(int codePoint)
3434
  {
3435
    int category = getType(codePoint);
3436
    return ((1 << category)
3437
            & ((1 << UPPERCASE_LETTER)
3438
               | (1 << LOWERCASE_LETTER)
3439
               | (1 << TITLECASE_LETTER)
3440
               | (1 << MODIFIER_LETTER)
3441
               | (1 << OTHER_LETTER)
3442
               | (1 << NON_SPACING_MARK)
3443
               | (1 << COMBINING_SPACING_MARK)
3444
               | (1 << DECIMAL_DIGIT_NUMBER)
3445
               | (1 << LETTER_NUMBER)
3446
               | (1 << CONNECTOR_PUNCTUATION)
3447
               | (1 << FORMAT))) != 0
3448
      || (category == CONTROL && isIdentifierIgnorable(codePoint));
3449
  }
3450
 
3451
  /**
3452
   * Determines if a character is ignorable in a Unicode identifier. This
3453
   * includes the non-whitespace ISO control characters (<code>'\u0000'</code>
3454
   * through <code>'\u0008'</code>, <code>'\u000E'</code> through
3455
   * <code>'\u001B'</code>, and <code>'\u007F'</code> through
3456
   * <code>'\u009F'</code>), and FORMAT characters.
3457
   * <br>
3458
   * Unicode identifier ignorable = [Cf]|U+0000-U+0008|U+000E-U+001B
3459
   *    |U+007F-U+009F
3460
   *
3461
   * @param ch character to test
3462
   * @return true if ch is ignorable in a Unicode or Java identifier
3463
   * @see #isJavaIdentifierPart(char)
3464
   * @see #isUnicodeIdentifierPart(char)
3465
   * @since 1.1
3466
   */
3467
  public static boolean isIdentifierIgnorable(char ch)
3468
  {
3469
    return isIdentifierIgnorable((int)ch);
3470
  }
3471
 
3472
  /**
3473
   * Determines if a character is ignorable in a Unicode identifier. This
3474
   * includes the non-whitespace ISO control characters (<code>'\u0000'</code>
3475
   * through <code>'\u0008'</code>, <code>'\u000E'</code> through
3476
   * <code>'\u001B'</code>, and <code>'\u007F'</code> through
3477
   * <code>'\u009F'</code>), and FORMAT characters.
3478
   * <br>
3479
   * Unicode identifier ignorable = [Cf]|U+0000-U+0008|U+000E-U+001B
3480
   *    |U+007F-U+009F
3481
   *
3482
   * @param codePoint character to test
3483
   * @return true if ch is ignorable in a Unicode or Java identifier
3484
   * @see #isJavaIdentifierPart(char)
3485
   * @see #isUnicodeIdentifierPart(char)
3486
   * @since 1.5
3487
   */
3488
  public static boolean isIdentifierIgnorable(int codePoint)
3489
  {
3490
    if ((codePoint >= 0 && codePoint <= 0x0008)
3491
        || (codePoint >= 0x000E && codePoint <= 0x001B)
3492
        || (codePoint >= 0x007F && codePoint <= 0x009F)
3493
        || getType(codePoint) == FORMAT)
3494
      return true;
3495
    return false;
3496
  }
3497
 
3498
  /**
3499
   * Converts a Unicode character into its lowercase equivalent mapping.
3500
   * If a mapping does not exist, then the character passed is returned.
3501
   * Note that isLowerCase(toLowerCase(ch)) does not always return true.
3502
   *
3503
   * @param ch character to convert to lowercase
3504
   * @return lowercase mapping of ch, or ch if lowercase mapping does
3505
   *         not exist
3506
   * @see #isLowerCase(char)
3507
   * @see #isUpperCase(char)
3508
   * @see #toTitleCase(char)
3509
   * @see #toUpperCase(char)
3510
   */
3511
  public static char toLowerCase(char ch)
3512
  {
3513
    return (char) (lower[0][readCodePoint((int)ch) >>> 7] + ch);
3514
  }
3515
 
3516
  /**
3517
   * Converts a Unicode character into its lowercase equivalent mapping.
3518
   * If a mapping does not exist, then the character passed is returned.
3519
   * Note that isLowerCase(toLowerCase(ch)) does not always return true.
3520
   *
3521
   * @param codePoint character to convert to lowercase
3522
   * @return lowercase mapping of ch, or ch if lowercase mapping does
3523
   *         not exist
3524
   * @see #isLowerCase(char)
3525
   * @see #isUpperCase(char)
3526
   * @see #toTitleCase(char)
3527
   * @see #toUpperCase(char)
3528
   *
3529
   * @since 1.5
3530
   */
3531
  public static int toLowerCase(int codePoint)
3532
  {
3533
    // If the code point is unassigned or in one of the private use areas
3534
    // then we delegate the call to the appropriate private static inner class.
3535
    int plane = codePoint >>> 16;
3536
    if (plane > 2 && plane < 14)
3537
      return UnassignedCharacters.toLowerCase(codePoint);
3538
    if (plane > 14)
3539
      return PrivateUseCharacters.toLowerCase(codePoint);
3540
 
3541
    // The short value stored in lower[plane] is the signed difference between
3542
    // codePoint and its lowercase conversion.
3543
    return ((short)lower[plane][readCodePoint(codePoint) >>> 7]) + codePoint;
3544
  }
3545
 
3546
  /**
3547
   * Converts a Unicode character into its uppercase equivalent mapping.
3548
   * If a mapping does not exist, then the character passed is returned.
3549
   * Note that isUpperCase(toUpperCase(ch)) does not always return true.
3550
   *
3551
   * @param ch character to convert to uppercase
3552
   * @return uppercase mapping of ch, or ch if uppercase mapping does
3553
   *         not exist
3554
   * @see #isLowerCase(char)
3555
   * @see #isUpperCase(char)
3556
   * @see #toLowerCase(char)
3557
   * @see #toTitleCase(char)
3558
   */
3559
  public static char toUpperCase(char ch)
3560
  {
3561
    return (char) (upper[0][readCodePoint((int)ch) >>> 7] + ch);
3562
  }
3563
 
3564
  /**
3565
   * Converts a Unicode character into its uppercase equivalent mapping.
3566
   * If a mapping does not exist, then the character passed is returned.
3567
   * Note that isUpperCase(toUpperCase(ch)) does not always return true.
3568
   *
3569
   * @param codePoint character to convert to uppercase
3570
   * @return uppercase mapping of ch, or ch if uppercase mapping does
3571
   *         not exist
3572
   * @see #isLowerCase(char)
3573
   * @see #isUpperCase(char)
3574
   * @see #toLowerCase(char)
3575
   * @see #toTitleCase(char)
3576
   *
3577
   * @since 1.5
3578
   */
3579
  public static int toUpperCase(int codePoint)
3580
  {
3581
    // If the code point is unassigned or in one of the private use areas
3582
    // then we delegate the call to the appropriate private static inner class.
3583
    int plane = codePoint >>> 16;
3584
    if (plane > 2 && plane < 14)
3585
      return UnassignedCharacters.toUpperCase(codePoint);
3586
    if (plane > 14)
3587
      return PrivateUseCharacters.toUpperCase(codePoint);
3588
 
3589
    // The short value stored in upper[plane] is the signed difference between
3590
    // codePoint and its uppercase conversion.
3591
    return ((short)upper[plane][readCodePoint(codePoint) >>> 7]) + codePoint;
3592
  }
3593
 
3594
  /**
3595
   * Converts a Unicode character into its titlecase equivalent mapping.
3596
   * If a mapping does not exist, then the character passed is returned.
3597
   * Note that isTitleCase(toTitleCase(ch)) does not always return true.
3598
   *
3599
   * @param ch character to convert to titlecase
3600
   * @return titlecase mapping of ch, or ch if titlecase mapping does
3601
   *         not exist
3602
   * @see #isTitleCase(char)
3603
   * @see #toLowerCase(char)
3604
   * @see #toUpperCase(char)
3605
   */
3606
  public static char toTitleCase(char ch)
3607
  {
3608
    // As title is short, it doesn't hurt to exhaustively iterate over it.
3609
    for (int i = title.length - 2; i >= 0; i -= 2)
3610
      if (title[i] == ch)
3611
        return title[i + 1];
3612
    return toUpperCase(ch);
3613
  }
3614
 
3615
  /**
3616
   * Converts a Unicode character into its titlecase equivalent mapping.
3617
   * If a mapping does not exist, then the character passed is returned.
3618
   * Note that isTitleCase(toTitleCase(ch)) does not always return true.
3619
   *
3620
   * @param codePoint character to convert to titlecase
3621
   * @return titlecase mapping of ch, or ch if titlecase mapping does
3622
   *         not exist
3623
   * @see #isTitleCase(char)
3624
   * @see #toLowerCase(char)
3625
   * @see #toUpperCase(char)
3626
   *
3627
   * @since 1.5
3628
   */
3629
  public static int toTitleCase(int codePoint)
3630
  {
3631
    // As of Unicode 4.0.0 no characters outside of plane 0 have
3632
    // titlecase mappings that are different from their uppercase
3633
    // mapping.
3634
    if (codePoint < 0x10000)
3635
      return (int) toTitleCase((char)codePoint);
3636
    return toUpperCase(codePoint);
3637
  }
3638
 
3639
  /**
3640
   * Converts a character into a digit of the specified radix. If the radix
3641
   * exceeds MIN_RADIX or MAX_RADIX, or if the result of getNumericValue(ch)
3642
   * exceeds the radix, or if ch is not a decimal digit or in the case
3643
   * insensitive set of 'a'-'z', the result is -1.
3644
   * <br>
3645
   * character argument boundary = [Nd]|U+0041-U+005A|U+0061-U+007A
3646
   *    |U+FF21-U+FF3A|U+FF41-U+FF5A
3647
   *
3648
   * @param ch character to convert into a digit
3649
   * @param radix radix in which ch is a digit
3650
   * @return digit which ch represents in radix, or -1 not a valid digit
3651
   * @see #MIN_RADIX
3652
   * @see #MAX_RADIX
3653
   * @see #forDigit(int, int)
3654
   * @see #isDigit(char)
3655
   * @see #getNumericValue(char)
3656
   */
3657
  public static int digit(char ch, int radix)
3658
  {
3659
    if (radix < MIN_RADIX || radix > MAX_RADIX)
3660
      return -1;
3661
    char attr = readCodePoint((int)ch);
3662
    if (((1 << (attr & TYPE_MASK))
3663
         & ((1 << UPPERCASE_LETTER)
3664
            | (1 << LOWERCASE_LETTER)
3665
            | (1 << DECIMAL_DIGIT_NUMBER))) != 0)
3666
      {
3667
        // Signedness doesn't matter; 0xffff vs. -1 are both rejected.
3668
        int digit = numValue[0][attr >> 7];
3669
        return (digit < radix) ? digit : -1;
3670
      }
3671
    return -1;
3672
  }
3673
 
3674
  /**
3675
   * Converts a character into a digit of the specified radix. If the radix
3676
   * exceeds MIN_RADIX or MAX_RADIX, or if the result of getNumericValue(ch)
3677
   * exceeds the radix, or if ch is not a decimal digit or in the case
3678
   * insensitive set of 'a'-'z', the result is -1.
3679
   * <br>
3680
   * character argument boundary = [Nd]|U+0041-U+005A|U+0061-U+007A
3681
   *    |U+FF21-U+FF3A|U+FF41-U+FF5A
3682
   *
3683
   * @param codePoint character to convert into a digit
3684
   * @param radix radix in which ch is a digit
3685
   * @return digit which ch represents in radix, or -1 not a valid digit
3686
   * @see #MIN_RADIX
3687
   * @see #MAX_RADIX
3688
   * @see #forDigit(int, int)
3689
   * @see #isDigit(char)
3690
   * @see #getNumericValue(char)
3691
   */
3692
  public static int digit(int codePoint, int radix)
3693
  {
3694
    if (radix < MIN_RADIX || radix > MAX_RADIX)
3695
      return -1;
3696
 
3697
    // If the code point is unassigned or in one of the private use areas
3698
    // then we delegate the call to the appropriate private static inner class.
3699
    int plane = codePoint >>> 16;
3700
    if (plane > 2 && plane < 14)
3701
      return UnassignedCharacters.digit(codePoint, radix);
3702
    if (plane > 14)
3703
      return PrivateUseCharacters.digit(codePoint, radix);
3704
    char attr = readCodePoint(codePoint);
3705
    if (((1 << (attr & TYPE_MASK))
3706
         & ((1 << UPPERCASE_LETTER)
3707
            | (1 << LOWERCASE_LETTER)
3708
            | (1 << DECIMAL_DIGIT_NUMBER))) != 0)
3709
      {
3710
        // Signedness doesn't matter; 0xffff vs. -1 are both rejected.
3711
        int digit = numValue[plane][attr >> 7];
3712
 
3713
        // If digit is less than or equal to -3 then the numerical value was
3714
        // too large to fit into numValue and is stored in CharData.LARGENUMS.
3715
        if (digit <= -3)
3716
          digit = CharData.LARGENUMS[-digit - 3];
3717
        return (digit < radix) ? digit : -1;
3718
      }
3719
    return -1;
3720
  }
3721
 
3722
  /**
3723
   * Returns the Unicode numeric value property of a character. For example,
3724
   * <code>'\\u216C'</code> (the Roman numeral fifty) returns 50.
3725
   *
3726
   * <p>This method also returns values for the letters A through Z, (not
3727
   * specified by Unicode), in these ranges: <code>'\u0041'</code>
3728
   * through <code>'\u005A'</code> (uppercase); <code>'\u0061'</code>
3729
   * through <code>'\u007A'</code> (lowercase); and <code>'\uFF21'</code>
3730
   * through <code>'\uFF3A'</code>, <code>'\uFF41'</code> through
3731
   * <code>'\uFF5A'</code> (full width variants).
3732
   *
3733
   * <p>If the character lacks a numeric value property, -1 is returned.
3734
   * If the character has a numeric value property which is not representable
3735
   * as a nonnegative integer, such as a fraction, -2 is returned.
3736
   *
3737
   * character argument boundary = [Nd]|[Nl]|[No]|U+0041-U+005A|U+0061-U+007A
3738
   *    |U+FF21-U+FF3A|U+FF41-U+FF5A
3739
   *
3740
   * @param ch character from which the numeric value property will
3741
   *        be retrieved
3742
   * @return the numeric value property of ch, or -1 if it does not exist, or
3743
   *         -2 if it is not representable as a nonnegative integer
3744
   * @see #forDigit(int, int)
3745
   * @see #digit(char, int)
3746
   * @see #isDigit(char)
3747
   * @since 1.1
3748
   */
3749
  public static int getNumericValue(char ch)
3750
  {
3751
    // Treat numValue as signed.
3752
    return (short) numValue[0][readCodePoint((int)ch) >> 7];
3753
  }
3754
 
3755
  /**
3756
   * Returns the Unicode numeric value property of a character. For example,
3757
   * <code>'\\u216C'</code> (the Roman numeral fifty) returns 50.
3758
   *
3759
   * <p>This method also returns values for the letters A through Z, (not
3760
   * specified by Unicode), in these ranges: <code>'\u0041'</code>
3761
   * through <code>'\u005A'</code> (uppercase); <code>'\u0061'</code>
3762
   * through <code>'\u007A'</code> (lowercase); and <code>'\uFF21'</code>
3763
   * through <code>'\uFF3A'</code>, <code>'\uFF41'</code> through
3764
   * <code>'\uFF5A'</code> (full width variants).
3765
   *
3766
   * <p>If the character lacks a numeric value property, -1 is returned.
3767
   * If the character has a numeric value property which is not representable
3768
   * as a nonnegative integer, such as a fraction, -2 is returned.
3769
   *
3770
   * character argument boundary = [Nd]|[Nl]|[No]|U+0041-U+005A|U+0061-U+007A
3771
   *    |U+FF21-U+FF3A|U+FF41-U+FF5A
3772
   *
3773
   * @param codePoint character from which the numeric value property will
3774
   *        be retrieved
3775
   * @return the numeric value property of ch, or -1 if it does not exist, or
3776
   *         -2 if it is not representable as a nonnegative integer
3777
   * @see #forDigit(int, int)
3778
   * @see #digit(char, int)
3779
   * @see #isDigit(char)
3780
   * @since 1.5
3781
   */
3782
  public static int getNumericValue(int codePoint)
3783
  {
3784
    // If the code point is unassigned or in one of the private use areas
3785
    // then we delegate the call to the appropriate private static inner class.
3786
    int plane = codePoint >>> 16;
3787
    if (plane > 2 && plane < 14)
3788
      return UnassignedCharacters.getNumericValue(codePoint);
3789
    if (plane > 14)
3790
      return PrivateUseCharacters.getNumericValue(codePoint);
3791
 
3792
    // If the value N found in numValue[plane] is less than or equal to -3
3793
    // then the numeric value was too big to fit into 16 bits and is
3794
    // stored in CharData.LARGENUMS at offset (-N - 3).
3795
    short num = (short)numValue[plane][readCodePoint(codePoint) >> 7];
3796
    if (num <= -3)
3797
      return CharData.LARGENUMS[-num - 3];
3798
    return num;
3799
  }
3800
 
3801
  /**
3802
   * Determines if a character is a ISO-LATIN-1 space. This is only the five
3803
   * characters <code>'\t'</code>, <code>'\n'</code>, <code>'\f'</code>,
3804
   * <code>'\r'</code>, and <code>' '</code>.
3805
   * <br>
3806
   * Java space = U+0020|U+0009|U+000A|U+000C|U+000D
3807
   *
3808
   * @param ch character to test
3809
   * @return true if ch is a space, else false
3810
   * @deprecated Replaced by {@link #isWhitespace(char)}
3811
   * @see #isSpaceChar(char)
3812
   * @see #isWhitespace(char)
3813
   */
3814
  public static boolean isSpace(char ch)
3815
  {
3816
    // Performing the subtraction up front alleviates need to compare longs.
3817
    return ch-- <= ' ' && ((1 << ch)
3818
                           & ((1 << (' ' - 1))
3819
                              | (1 << ('\t' - 1))
3820
                              | (1 << ('\n' - 1))
3821
                              | (1 << ('\r' - 1))
3822
                              | (1 << ('\f' - 1)))) != 0;
3823
  }
3824
 
3825
  /**
3826
   * Determines if a character is a Unicode space character. This includes
3827
   * SPACE_SEPARATOR, LINE_SEPARATOR, and PARAGRAPH_SEPARATOR.
3828
   * <br>
3829
   * Unicode space = [Zs]|[Zp]|[Zl]
3830
   *
3831
   * @param ch character to test
3832
   * @return true if ch is a Unicode space, else false
3833
   * @see #isWhitespace(char)
3834
   * @since 1.1
3835
   */
3836
  public static boolean isSpaceChar(char ch)
3837
  {
3838
    return isSpaceChar((int)ch);
3839
  }
3840
 
3841
  /**
3842
   * Determines if a character is a Unicode space character. This includes
3843
   * SPACE_SEPARATOR, LINE_SEPARATOR, and PARAGRAPH_SEPARATOR.
3844
   * <br>
3845
   * Unicode space = [Zs]|[Zp]|[Zl]
3846
   *
3847
   * @param codePoint character to test
3848
   * @return true if ch is a Unicode space, else false
3849
   * @see #isWhitespace(char)
3850
   * @since 1.5
3851
   */
3852
  public static boolean isSpaceChar(int codePoint)
3853
  {
3854
    return ((1 << getType(codePoint))
3855
            & ((1 << SPACE_SEPARATOR)
3856
               | (1 << LINE_SEPARATOR)
3857
               | (1 << PARAGRAPH_SEPARATOR))) != 0;
3858
  }
3859
 
3860
  /**
3861
   * Determines if a character is Java whitespace. This includes Unicode
3862
   * space characters (SPACE_SEPARATOR, LINE_SEPARATOR, and
3863
   * PARAGRAPH_SEPARATOR) except the non-breaking spaces
3864
   * (<code>'\u00A0'</code>, <code>'\u2007'</code>, and <code>'\u202F'</code>);
3865
   * and these characters: <code>'\u0009'</code>, <code>'\u000A'</code>,
3866
   * <code>'\u000B'</code>, <code>'\u000C'</code>, <code>'\u000D'</code>,
3867
   * <code>'\u001C'</code>, <code>'\u001D'</code>, <code>'\u001E'</code>,
3868
   * and <code>'\u001F'</code>.
3869
   * <br>
3870
   * Java whitespace = ([Zs] not Nb)|[Zl]|[Zp]|U+0009-U+000D|U+001C-U+001F
3871
   *
3872
   * @param ch character to test
3873
   * @return true if ch is Java whitespace, else false
3874
   * @see #isSpaceChar(char)
3875
   * @since 1.1
3876
   */
3877
  public static boolean isWhitespace(char ch)
3878
  {
3879
    return isWhitespace((int) ch);
3880
  }
3881
 
3882
  /**
3883
   * Determines if a character is Java whitespace. This includes Unicode
3884
   * space characters (SPACE_SEPARATOR, LINE_SEPARATOR, and
3885
   * PARAGRAPH_SEPARATOR) except the non-breaking spaces
3886
   * (<code>'\u00A0'</code>, <code>'\u2007'</code>, and <code>'\u202F'</code>);
3887
   * and these characters: <code>'\u0009'</code>, <code>'\u000A'</code>,
3888
   * <code>'\u000B'</code>, <code>'\u000C'</code>, <code>'\u000D'</code>,
3889
   * <code>'\u001C'</code>, <code>'\u001D'</code>, <code>'\u001E'</code>,
3890
   * and <code>'\u001F'</code>.
3891
   * <br>
3892
   * Java whitespace = ([Zs] not Nb)|[Zl]|[Zp]|U+0009-U+000D|U+001C-U+001F
3893
   *
3894
   * @param codePoint character to test
3895
   * @return true if ch is Java whitespace, else false
3896
   * @see #isSpaceChar(char)
3897
   * @since 1.5
3898
   */
3899
  public static boolean isWhitespace(int codePoint)
3900
  {
3901
    int plane = codePoint >>> 16;
3902
    if (plane > 2 && plane < 14)
3903
      return UnassignedCharacters.isWhiteSpace(codePoint);
3904
    if (plane > 14)
3905
      return PrivateUseCharacters.isWhiteSpace(codePoint);
3906
 
3907
    int attr = readCodePoint(codePoint);
3908
    return ((((1 << (attr & TYPE_MASK))
3909
              & ((1 << SPACE_SEPARATOR)
3910
                 | (1 << LINE_SEPARATOR)
3911
                 | (1 << PARAGRAPH_SEPARATOR))) != 0)
3912
            && (attr & NO_BREAK_MASK) == 0)
3913
      || (codePoint <= '\u001F' && ((1 << codePoint)
3914
                             & ((1 << '\t')
3915
                                | (1 << '\n')
3916
                                | (1 << '\u000B')
3917
                                | (1 << '\u000C')
3918
                                | (1 << '\r')
3919
                                | (1 << '\u001C')
3920
                                | (1 << '\u001D')
3921
                                | (1 << '\u001E')
3922
                                | (1 << '\u001F'))) != 0);
3923
  }
3924
 
3925
  /**
3926
   * Determines if a character has the ISO Control property.
3927
   * <br>
3928
   * ISO Control = [Cc]
3929
   *
3930
   * @param ch character to test
3931
   * @return true if ch is an ISO Control character, else false
3932
   * @see #isSpaceChar(char)
3933
   * @see #isWhitespace(char)
3934
   * @since 1.1
3935
   */
3936
  public static boolean isISOControl(char ch)
3937
  {
3938
    return isISOControl((int)ch);
3939
  }
3940
 
3941
  /**
3942
   * Determines if the character is an ISO Control character.  This is true
3943
   * if the code point is in the range [0, 0x001F] or if it is in the range
3944
   * [0x007F, 0x009F].
3945
   * @param codePoint the character to check
3946
   * @return true if the character is in one of the above ranges
3947
   *
3948
   * @since 1.5
3949
   */
3950
  public static boolean isISOControl(int codePoint)
3951
  {
3952
    if ((codePoint >= 0 && codePoint <= 0x001F)
3953
        || (codePoint >= 0x007F && codePoint <= 0x009F))
3954
      return true;
3955
    return false;
3956
  }
3957
 
3958
  /**
3959
   * Returns the Unicode general category property of a character.
3960
   *
3961
   * @param ch character from which the general category property will
3962
   *        be retrieved
3963
   * @return the character category property of ch as an integer
3964
   * @see #UNASSIGNED
3965
   * @see #UPPERCASE_LETTER
3966
   * @see #LOWERCASE_LETTER
3967
   * @see #TITLECASE_LETTER
3968
   * @see #MODIFIER_LETTER
3969
   * @see #OTHER_LETTER
3970
   * @see #NON_SPACING_MARK
3971
   * @see #ENCLOSING_MARK
3972
   * @see #COMBINING_SPACING_MARK
3973
   * @see #DECIMAL_DIGIT_NUMBER
3974
   * @see #LETTER_NUMBER
3975
   * @see #OTHER_NUMBER
3976
   * @see #SPACE_SEPARATOR
3977
   * @see #LINE_SEPARATOR
3978
   * @see #PARAGRAPH_SEPARATOR
3979
   * @see #CONTROL
3980
   * @see #FORMAT
3981
   * @see #PRIVATE_USE
3982
   * @see #SURROGATE
3983
   * @see #DASH_PUNCTUATION
3984
   * @see #START_PUNCTUATION
3985
   * @see #END_PUNCTUATION
3986
   * @see #CONNECTOR_PUNCTUATION
3987
   * @see #OTHER_PUNCTUATION
3988
   * @see #MATH_SYMBOL
3989
   * @see #CURRENCY_SYMBOL
3990
   * @see #MODIFIER_SYMBOL
3991
   * @see #INITIAL_QUOTE_PUNCTUATION
3992
   * @see #FINAL_QUOTE_PUNCTUATION
3993
   * @since 1.1
3994
   */
3995
  public static int getType(char ch)
3996
  {
3997
    return getType((int)ch);
3998
  }
3999
 
4000
  /**
4001
   * Returns the Unicode general category property of a character.
4002
   *
4003
   * @param codePoint character from which the general category property will
4004
   *        be retrieved
4005
   * @return the character category property of ch as an integer
4006
   * @see #UNASSIGNED
4007
   * @see #UPPERCASE_LETTER
4008
   * @see #LOWERCASE_LETTER
4009
   * @see #TITLECASE_LETTER
4010
   * @see #MODIFIER_LETTER
4011
   * @see #OTHER_LETTER
4012
   * @see #NON_SPACING_MARK
4013
   * @see #ENCLOSING_MARK
4014
   * @see #COMBINING_SPACING_MARK
4015
   * @see #DECIMAL_DIGIT_NUMBER
4016
   * @see #LETTER_NUMBER
4017
   * @see #OTHER_NUMBER
4018
   * @see #SPACE_SEPARATOR
4019
   * @see #LINE_SEPARATOR
4020
   * @see #PARAGRAPH_SEPARATOR
4021
   * @see #CONTROL
4022
   * @see #FORMAT
4023
   * @see #PRIVATE_USE
4024
   * @see #SURROGATE
4025
   * @see #DASH_PUNCTUATION
4026
   * @see #START_PUNCTUATION
4027
   * @see #END_PUNCTUATION
4028
   * @see #CONNECTOR_PUNCTUATION
4029
   * @see #OTHER_PUNCTUATION
4030
   * @see #MATH_SYMBOL
4031
   * @see #CURRENCY_SYMBOL
4032
   * @see #MODIFIER_SYMBOL
4033
   * @see #INITIAL_QUOTE_PUNCTUATION
4034
   * @see #FINAL_QUOTE_PUNCTUATION
4035
   *
4036
   * @since 1.5
4037
   */
4038
  public static int getType(int codePoint)
4039
  {
4040
    // If the codePoint is unassigned or in one of the private use areas
4041
    // then we delegate the call to the appropriate private static inner class.
4042
    int plane = codePoint >>> 16;
4043
    if (plane > 2 && plane < 14)
4044
      return UnassignedCharacters.getType(codePoint);
4045
    if (plane > 14)
4046
      return PrivateUseCharacters.getType(codePoint);
4047
 
4048
    return readCodePoint(codePoint) & TYPE_MASK;
4049
  }
4050
 
4051
  /**
4052
   * Converts a digit into a character which represents that digit
4053
   * in a specified radix. If the radix exceeds MIN_RADIX or MAX_RADIX,
4054
   * or the digit exceeds the radix, then the null character <code>'\0'</code>
4055
   * is returned.  Otherwise the return value is in '0'-'9' and 'a'-'z'.
4056
   * <br>
4057
   * return value boundary = U+0030-U+0039|U+0061-U+007A
4058
   *
4059
   * @param digit digit to be converted into a character
4060
   * @param radix radix of digit
4061
   * @return character representing digit in radix, or '\0'
4062
   * @see #MIN_RADIX
4063
   * @see #MAX_RADIX
4064
   * @see #digit(char, int)
4065
   */
4066
  public static char forDigit(int digit, int radix)
4067
  {
4068
    if (radix < MIN_RADIX || radix > MAX_RADIX
4069
        || digit < 0 || digit >= radix)
4070
      return '\0';
4071
    return Number.digits[digit];
4072
  }
4073
 
4074
  /**
4075
   * Returns the Unicode directionality property of the character. This
4076
   * is used in the visual ordering of text.
4077
   *
4078
   * @param ch the character to look up
4079
   * @return the directionality constant, or DIRECTIONALITY_UNDEFINED
4080
   * @see #DIRECTIONALITY_UNDEFINED
4081
   * @see #DIRECTIONALITY_LEFT_TO_RIGHT
4082
   * @see #DIRECTIONALITY_RIGHT_TO_LEFT
4083
   * @see #DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
4084
   * @see #DIRECTIONALITY_EUROPEAN_NUMBER
4085
   * @see #DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
4086
   * @see #DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
4087
   * @see #DIRECTIONALITY_ARABIC_NUMBER
4088
   * @see #DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
4089
   * @see #DIRECTIONALITY_NONSPACING_MARK
4090
   * @see #DIRECTIONALITY_BOUNDARY_NEUTRAL
4091
   * @see #DIRECTIONALITY_PARAGRAPH_SEPARATOR
4092
   * @see #DIRECTIONALITY_SEGMENT_SEPARATOR
4093
   * @see #DIRECTIONALITY_WHITESPACE
4094
   * @see #DIRECTIONALITY_OTHER_NEUTRALS
4095
   * @see #DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
4096
   * @see #DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
4097
   * @see #DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
4098
   * @see #DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
4099
   * @see #DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
4100
   * @since 1.4
4101
   */
4102
  public static byte getDirectionality(char ch)
4103
  {
4104
    // The result will correctly be signed.
4105
    return getDirectionality((int)ch);
4106
  }
4107
 
4108
 
4109
  /**
4110
   * Returns the Unicode directionality property of the character. This
4111
   * is used in the visual ordering of text.
4112
   *
4113
   * @param codePoint the character to look up
4114
   * @return the directionality constant, or DIRECTIONALITY_UNDEFINED
4115
   * @see #DIRECTIONALITY_UNDEFINED
4116
   * @see #DIRECTIONALITY_LEFT_TO_RIGHT
4117
   * @see #DIRECTIONALITY_RIGHT_TO_LEFT
4118
   * @see #DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
4119
   * @see #DIRECTIONALITY_EUROPEAN_NUMBER
4120
   * @see #DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
4121
   * @see #DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
4122
   * @see #DIRECTIONALITY_ARABIC_NUMBER
4123
   * @see #DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
4124
   * @see #DIRECTIONALITY_NONSPACING_MARK
4125
   * @see #DIRECTIONALITY_BOUNDARY_NEUTRAL
4126
   * @see #DIRECTIONALITY_PARAGRAPH_SEPARATOR
4127
   * @see #DIRECTIONALITY_SEGMENT_SEPARATOR
4128
   * @see #DIRECTIONALITY_WHITESPACE
4129
   * @see #DIRECTIONALITY_OTHER_NEUTRALS
4130
   * @see #DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
4131
   * @see #DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
4132
   * @see #DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
4133
   * @see #DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
4134
   * @see #DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
4135
   * @since 1.5
4136
   */
4137
  public static byte getDirectionality(int codePoint)
4138
  {
4139
    // If the code point is unassigned or in one of the private use areas
4140
    // then we delegate the call to the appropriate private static inner class.
4141
    int plane = codePoint >>> 16;
4142
    if (plane > 2 && plane < 14)
4143
      return UnassignedCharacters.getDirectionality(codePoint);
4144
    if (plane > 14)
4145
      return PrivateUseCharacters.getDirectionality(codePoint);
4146
 
4147
    // The result will correctly be signed.
4148
    return (byte) (direction[plane][readCodePoint(codePoint) >> 7] >> 2);
4149
  }
4150
 
4151
  /**
4152
   * Determines whether the character is mirrored according to Unicode. For
4153
   * example, <code>\u0028</code> (LEFT PARENTHESIS) appears as '(' in
4154
   * left-to-right text, but ')' in right-to-left text.
4155
   *
4156
   * @param ch the character to look up
4157
   * @return true if the character is mirrored
4158
   * @since 1.4
4159
   */
4160
  public static boolean isMirrored(char ch)
4161
  {
4162
    return (readCodePoint((int)ch) & MIRROR_MASK) != 0;
4163
  }
4164
 
4165
  /**
4166
   * Determines whether the character is mirrored according to Unicode. For
4167
   * example, <code>\u0028</code> (LEFT PARENTHESIS) appears as '(' in
4168
   * left-to-right text, but ')' in right-to-left text.
4169
   *
4170
   * @param codePoint the character to look up
4171
   * @return true if the character is mirrored
4172
   * @since 1.5
4173
   */
4174
  public static boolean isMirrored(int codePoint)
4175
  {
4176
    // If the code point is unassigned or part of one of the private use areas
4177
    // then we delegate the call to the appropriate private static inner class.
4178
    int plane = codePoint >>> 16;
4179
    if (plane > 2 && plane < 14)
4180
      return UnassignedCharacters.isMirrored(codePoint);
4181
    if (plane > 14)
4182
      return PrivateUseCharacters.isMirrored(codePoint);
4183
 
4184
    return (readCodePoint(codePoint) & MIRROR_MASK) != 0;
4185
  }
4186
 
4187
  /**
4188
   * Compares another Character to this Character, numerically.
4189
   *
4190
   * @param anotherCharacter Character to compare with this Character
4191
   * @return a negative integer if this Character is less than
4192
   *         anotherCharacter, zero if this Character is equal, and
4193
   *         a positive integer if this Character is greater
4194
   * @throws NullPointerException if anotherCharacter is null
4195
   * @since 1.2
4196
   */
4197
  public int compareTo(Character anotherCharacter)
4198
  {
4199
    return value - anotherCharacter.value;
4200
  }
4201
 
4202
  /**
4203
   * Returns an <code>Character</code> object wrapping the value.
4204
   * In contrast to the <code>Character</code> constructor, this method
4205
   * will cache some values.  It is used by boxing conversion.
4206
   *
4207
   * @param val the value to wrap
4208
   * @return the <code>Character</code>
4209
   *
4210
   * @since 1.5
4211
   */
4212
  public static Character valueOf(char val)
4213
  {
4214
    if (val > MAX_CACHE)
4215
      return new Character(val);
4216
    else
4217
      return charCache[val - MIN_VALUE];
4218
  }
4219
 
4220
  /**
4221
   * Reverse the bytes in val.
4222
   * @since 1.5
4223
   */
4224
  public static char reverseBytes(char val)
4225
  {
4226
    return (char) (((val >> 8) & 0xff) | ((val << 8) & 0xff00));
4227
  }
4228
 
4229
  /**
4230
   * Converts a unicode code point to a UTF-16 representation of that
4231
   * code point.
4232
   *
4233
   * @param codePoint the unicode code point
4234
   *
4235
   * @return the UTF-16 representation of that code point
4236
   *
4237
   * @throws IllegalArgumentException if the code point is not a valid
4238
   *         unicode code point
4239
   *
4240
   * @since 1.5
4241
   */
4242
  public static char[] toChars(int codePoint)
4243
  {
4244
    if (!isValidCodePoint(codePoint))
4245
      throw new IllegalArgumentException("Illegal Unicode code point : "
4246
                                         + codePoint);
4247
    char[] result = new char[charCount(codePoint)];
4248
    int ignore = toChars(codePoint, result, 0);
4249
    return result;
4250
  }
4251
 
4252
  /**
4253
   * Converts a unicode code point to its UTF-16 representation.
4254
   *
4255
   * @param codePoint the unicode code point
4256
   * @param dst the target char array
4257
   * @param dstIndex the start index for the target
4258
   *
4259
   * @return number of characters written to <code>dst</code>
4260
   *
4261
   * @throws IllegalArgumentException if <code>codePoint</code> is not a
4262
   *         valid unicode code point
4263
   * @throws NullPointerException if <code>dst</code> is <code>null</code>
4264
   * @throws IndexOutOfBoundsException if <code>dstIndex</code> is not valid
4265
   *         in <code>dst</code> or if the UTF-16 representation does not
4266
   *         fit into <code>dst</code>
4267
   *
4268
   * @since 1.5
4269
   */
4270
  public static int toChars(int codePoint, char[] dst, int dstIndex)
4271
  {
4272
    if (!isValidCodePoint(codePoint))
4273
      {
4274
        throw new IllegalArgumentException("not a valid code point: "
4275
                                           + codePoint);
4276
      }
4277
 
4278
    int result;
4279
    if (isSupplementaryCodePoint(codePoint))
4280
      {
4281
        // Write second char first to cause IndexOutOfBoundsException
4282
        // immediately.
4283
        final int cp2 = codePoint - 0x10000;
4284
        dst[dstIndex + 1] = (char) ((cp2 % 0x400) + (int) MIN_LOW_SURROGATE);
4285
        dst[dstIndex] = (char) ((cp2 / 0x400) + (int) MIN_HIGH_SURROGATE);
4286
        result = 2;
4287
      }
4288
    else
4289
      {
4290
        dst[dstIndex] = (char) codePoint;
4291
        result = 1;
4292
      }
4293
    return result;
4294
  }
4295
 
4296
  /**
4297
   * Return number of 16-bit characters required to represent the given
4298
   * code point.
4299
   *
4300
   * @param codePoint a unicode code point
4301
   *
4302
   * @return 2 if codePoint >= 0x10000, 1 otherwise.
4303
   *
4304
   * @since 1.5
4305
   */
4306
  public static int charCount(int codePoint)
4307
  {
4308
    return
4309
      (codePoint >= MIN_SUPPLEMENTARY_CODE_POINT)
4310
      ? 2
4311
      : 1;
4312
  }
4313
 
4314
  /**
4315
   * Determines whether the specified code point is
4316
   * in the range 0x10000 .. 0x10FFFF, i.e. the character is within the Unicode
4317
   * supplementary character range.
4318
   *
4319
   * @param codePoint a Unicode code point
4320
   *
4321
   * @return <code>true</code> if code point is in supplementary range
4322
   *
4323
   * @since 1.5
4324
   */
4325
  public static boolean isSupplementaryCodePoint(int codePoint)
4326
  {
4327
    return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
4328
      && codePoint <= MAX_CODE_POINT;
4329
  }
4330
 
4331
  /**
4332
   * Determines whether the specified code point is
4333
   * in the range 0x0000 .. 0x10FFFF, i.e. it is a valid Unicode code point.
4334
   *
4335
   * @param codePoint a Unicode code point
4336
   *
4337
   * @return <code>true</code> if code point is valid
4338
   *
4339
   * @since 1.5
4340
   */
4341
  public static boolean isValidCodePoint(int codePoint)
4342
  {
4343
    return codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT;
4344
  }
4345
 
4346
  /**
4347
   * Return true if the given character is a high surrogate.
4348
   * @param ch the character
4349
   * @return true if the character is a high surrogate character
4350
   *
4351
   * @since 1.5
4352
   */
4353
  public static boolean isHighSurrogate(char ch)
4354
  {
4355
    return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE;
4356
  }
4357
 
4358
  /**
4359
   * Return true if the given character is a low surrogate.
4360
   * @param ch the character
4361
   * @return true if the character is a low surrogate character
4362
   *
4363
   * @since 1.5
4364
   */
4365
  public static boolean isLowSurrogate(char ch)
4366
  {
4367
    return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE;
4368
  }
4369
 
4370
  /**
4371
   * Return true if the given characters compose a surrogate pair.
4372
   * This is true if the first character is a high surrogate and the
4373
   * second character is a low surrogate.
4374
   * @param ch1 the first character
4375
   * @param ch2 the first character
4376
   * @return true if the characters compose a surrogate pair
4377
   *
4378
   * @since 1.5
4379
   */
4380
  public static boolean isSurrogatePair(char ch1, char ch2)
4381
  {
4382
    return isHighSurrogate(ch1) && isLowSurrogate(ch2);
4383
  }
4384
 
4385
  /**
4386
   * Given a valid surrogate pair, this returns the corresponding
4387
   * code point.
4388
   * @param high the high character of the pair
4389
   * @param low the low character of the pair
4390
   * @return the corresponding code point
4391
   *
4392
   * @since 1.5
4393
   */
4394
  public static int toCodePoint(char high, char low)
4395
  {
4396
    return ((high - MIN_HIGH_SURROGATE) * 0x400) +
4397
      (low - MIN_LOW_SURROGATE) + 0x10000;
4398
  }
4399
 
4400
  /**
4401
   * Get the code point at the specified index in the CharSequence.
4402
   * This is like CharSequence#charAt(int), but if the character is
4403
   * the start of a surrogate pair, and there is a following
4404
   * character, and this character completes the pair, then the
4405
   * corresponding supplementary code point is returned.  Otherwise,
4406
   * the character at the index is returned.
4407
   *
4408
   * @param sequence the CharSequence
4409
   * @param index the index of the codepoint to get, starting at 0
4410
   * @return the codepoint at the specified index
4411
   * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
4412
   * @since 1.5
4413
   */
4414
  public static int codePointAt(CharSequence sequence, int index)
4415
  {
4416
    int len = sequence.length();
4417
    if (index < 0 || index >= len)
4418
      throw new IndexOutOfBoundsException();
4419
    char high = sequence.charAt(index);
4420
    if (! isHighSurrogate(high) || ++index >= len)
4421
      return high;
4422
    char low = sequence.charAt(index);
4423
    if (! isLowSurrogate(low))
4424
      return high;
4425
    return toCodePoint(high, low);
4426
  }
4427
 
4428
  /**
4429
   * Get the code point at the specified index in the CharSequence.
4430
   * If the character is the start of a surrogate pair, and there is a
4431
   * following character, and this character completes the pair, then
4432
   * the corresponding supplementary code point is returned.
4433
   * Otherwise, the character at the index is returned.
4434
   *
4435
   * @param chars the character array in which to look
4436
   * @param index the index of the codepoint to get, starting at 0
4437
   * @return the codepoint at the specified index
4438
   * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
4439
   * @since 1.5
4440
   */
4441
  public static int codePointAt(char[] chars, int index)
4442
  {
4443
    return codePointAt(chars, index, chars.length);
4444
  }
4445
 
4446
  /**
4447
   * Get the code point at the specified index in the CharSequence.
4448
   * If the character is the start of a surrogate pair, and there is a
4449
   * following character within the specified range, and this
4450
   * character completes the pair, then the corresponding
4451
   * supplementary code point is returned.  Otherwise, the character
4452
   * at the index is returned.
4453
   *
4454
   * @param chars the character array in which to look
4455
   * @param index the index of the codepoint to get, starting at 0
4456
   * @param limit the limit past which characters should not be examined
4457
   * @return the codepoint at the specified index
4458
   * @throws IndexOutOfBoundsException if index is negative or &gt;=
4459
   * limit, or if limit is negative or &gt;= the length of the array
4460
   * @since 1.5
4461
   */
4462
  public static int codePointAt(char[] chars, int index, int limit)
4463
  {
4464
    if (index < 0 || index >= limit || limit < 0 || limit > chars.length)
4465
      throw new IndexOutOfBoundsException();
4466
    char high = chars[index];
4467
    if (! isHighSurrogate(high) || ++index >= limit)
4468
      return high;
4469
    char low = chars[index];
4470
    if (! isLowSurrogate(low))
4471
      return high;
4472
    return toCodePoint(high, low);
4473
  }
4474
 
4475
  /**
4476
   * Get the code point before the specified index.  This is like
4477
   * #codePointAt(char[], int), but checks the characters at
4478
   * <code>index-1</code> and <code>index-2</code> to see if they form
4479
   * a supplementary code point.  If they do not, the character at
4480
   * <code>index-1</code> is returned.
4481
   *
4482
   * @param chars the character array
4483
   * @param index the index just past the codepoint to get, starting at 0
4484
   * @return the codepoint at the specified index
4485
   * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
4486
   * @since 1.5
4487
   */
4488
  public static int codePointBefore(char[] chars, int index)
4489
  {
4490
    return codePointBefore(chars, index, 1);
4491
  }
4492
 
4493
  /**
4494
   * Get the code point before the specified index.  This is like
4495
   * #codePointAt(char[], int), but checks the characters at
4496
   * <code>index-1</code> and <code>index-2</code> to see if they form
4497
   * a supplementary code point.  If they do not, the character at
4498
   * <code>index-1</code> is returned.  The start parameter is used to
4499
   * limit the range of the array which may be examined.
4500
   *
4501
   * @param chars the character array
4502
   * @param index the index just past the codepoint to get, starting at 0
4503
   * @param start the index before which characters should not be examined
4504
   * @return the codepoint at the specified index
4505
   * @throws IndexOutOfBoundsException if index is &gt; start or &gt;
4506
   * the length of the array, or if limit is negative or &gt;= the
4507
   * length of the array
4508
   * @since 1.5
4509
   */
4510
  public static int codePointBefore(char[] chars, int index, int start)
4511
  {
4512
    if (index < start || index > chars.length
4513
        || start < 0 || start >= chars.length)
4514
      throw new IndexOutOfBoundsException();
4515
    --index;
4516
    char low = chars[index];
4517
    if (! isLowSurrogate(low) || --index < start)
4518
      return low;
4519
    char high = chars[index];
4520
    if (! isHighSurrogate(high))
4521
      return low;
4522
    return toCodePoint(high, low);
4523
  }
4524
 
4525
  /**
4526
   * Get the code point before the specified index.  This is like
4527
   * #codePointAt(CharSequence, int), but checks the characters at
4528
   * <code>index-1</code> and <code>index-2</code> to see if they form
4529
   * a supplementary code point.  If they do not, the character at
4530
   * <code>index-1</code> is returned.
4531
   *
4532
   * @param sequence the CharSequence
4533
   * @param index the index just past the codepoint to get, starting at 0
4534
   * @return the codepoint at the specified index
4535
   * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
4536
   * @since 1.5
4537
   */
4538
  public static int codePointBefore(CharSequence sequence, int index)
4539
  {
4540
    int len = sequence.length();
4541
    if (index < 1 || index > len)
4542
      throw new IndexOutOfBoundsException();
4543
    --index;
4544
    char low = sequence.charAt(index);
4545
    if (! isLowSurrogate(low) || --index < 0)
4546
      return low;
4547
    char high = sequence.charAt(index);
4548
    if (! isHighSurrogate(high))
4549
      return low;
4550
    return toCodePoint(high, low);
4551
  }
4552
} // class Character

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.