| 1 | 
         747 | 
         jeremybenn | 
         // Copyright 2011 The Go Authors. All rights reserved.
  | 
      
      
         | 2 | 
          | 
          | 
         // Use of this source code is governed by a BSD-style
  | 
      
      
         | 3 | 
          | 
          | 
         // license that can be found in the LICENSE file.
  | 
      
      
         | 4 | 
          | 
          | 
          
  | 
      
      
         | 5 | 
          | 
          | 
         package unicode
  | 
      
      
         | 6 | 
          | 
          | 
          
  | 
      
      
         | 7 | 
          | 
          | 
         // Bit masks for each code point under U+0100, for fast lookup.
  | 
      
      
         | 8 | 
          | 
          | 
         const (
  | 
      
      
         | 9 | 
          | 
          | 
                 pC  = 1 << iota // a control character.
  | 
      
      
         | 10 | 
          | 
          | 
                 pP              // a punctuation character.
  | 
      
      
         | 11 | 
          | 
          | 
                 pN              // a numeral.
  | 
      
      
         | 12 | 
          | 
          | 
                 pS              // a symbolic character.
  | 
      
      
         | 13 | 
          | 
          | 
                 pZ              // a spacing character.
  | 
      
      
         | 14 | 
          | 
          | 
                 pLu             // an upper-case letter.
  | 
      
      
         | 15 | 
          | 
          | 
                 pLl             // a lower-case letter.
  | 
      
      
         | 16 | 
          | 
          | 
                 pp              // a printable character according to Go's definition.
  | 
      
      
         | 17 | 
          | 
          | 
                 pg  = pp | pZ   // a graphical character according to the Unicode definition.
  | 
      
      
         | 18 | 
          | 
          | 
         )
  | 
      
      
         | 19 | 
          | 
          | 
          
  | 
      
      
         | 20 | 
          | 
          | 
         // GraphicRanges defines the set of graphic characters according to Unicode.
  | 
      
      
         | 21 | 
          | 
          | 
         var GraphicRanges = []*RangeTable{
  | 
      
      
         | 22 | 
          | 
          | 
                 L, M, N, P, S, Zs,
  | 
      
      
         | 23 | 
          | 
          | 
         }
  | 
      
      
         | 24 | 
          | 
          | 
          
  | 
      
      
         | 25 | 
          | 
          | 
         // PrintRanges defines the set of printable characters according to Go.
  | 
      
      
         | 26 | 
          | 
          | 
         // ASCII space, U+0020, is handled separately.
  | 
      
      
         | 27 | 
          | 
          | 
         var PrintRanges = []*RangeTable{
  | 
      
      
         | 28 | 
          | 
          | 
                 L, M, N, P, S,
  | 
      
      
         | 29 | 
          | 
          | 
         }
  | 
      
      
         | 30 | 
          | 
          | 
          
  | 
      
      
         | 31 | 
          | 
          | 
         // IsGraphic reports whether the rune is defined as a Graphic by Unicode.
  | 
      
      
         | 32 | 
          | 
          | 
         // Such characters include letters, marks, numbers, punctuation, symbols, and
  | 
      
      
         | 33 | 
          | 
          | 
         // spaces, from categories L, M, N, P, S, Zs.
  | 
      
      
         | 34 | 
          | 
          | 
         func IsGraphic(r rune) bool {
  | 
      
      
         | 35 | 
          | 
          | 
                 // We convert to uint32 to avoid the extra test for negative,
  | 
      
      
         | 36 | 
          | 
          | 
                 // and in the index we convert to uint8 to avoid the range check.
  | 
      
      
         | 37 | 
          | 
          | 
                 if uint32(r) <= MaxLatin1 {
  | 
      
      
         | 38 | 
          | 
          | 
                         return properties[uint8(r)]&pg != 0
  | 
      
      
         | 39 | 
          | 
          | 
                 }
  | 
      
      
         | 40 | 
          | 
          | 
                 return IsOneOf(GraphicRanges, r)
  | 
      
      
         | 41 | 
          | 
          | 
         }
  | 
      
      
         | 42 | 
          | 
          | 
          
  | 
      
      
         | 43 | 
          | 
          | 
         // IsPrint reports whether the rune is defined as printable by Go. Such
  | 
      
      
         | 44 | 
          | 
          | 
         // characters include letters, marks, numbers, punctuation, symbols, and the
  | 
      
      
         | 45 | 
          | 
          | 
         // ASCII space character, from categories L, M, N, P, S and the ASCII space
  | 
      
      
         | 46 | 
          | 
          | 
         // character.  This categorization is the same as IsGraphic except that the
  | 
      
      
         | 47 | 
          | 
          | 
         // only spacing character is ASCII space, U+0020.
  | 
      
      
         | 48 | 
          | 
          | 
         func IsPrint(r rune) bool {
  | 
      
      
         | 49 | 
          | 
          | 
                 if uint32(r) <= MaxLatin1 {
  | 
      
      
         | 50 | 
          | 
          | 
                         return properties[uint8(r)]&pp != 0
  | 
      
      
         | 51 | 
          | 
          | 
                 }
  | 
      
      
         | 52 | 
          | 
          | 
                 return IsOneOf(PrintRanges, r)
  | 
      
      
         | 53 | 
          | 
          | 
         }
  | 
      
      
         | 54 | 
          | 
          | 
          
  | 
      
      
         | 55 | 
          | 
          | 
         // IsOneOf reports whether the rune is a member of one of the ranges.
  | 
      
      
         | 56 | 
          | 
          | 
         // The rune is known to be above Latin-1.
  | 
      
      
         | 57 | 
          | 
          | 
         func IsOneOf(set []*RangeTable, r rune) bool {
  | 
      
      
         | 58 | 
          | 
          | 
                 for _, inside := range set {
  | 
      
      
         | 59 | 
          | 
          | 
                         if Is(inside, r) {
  | 
      
      
         | 60 | 
          | 
          | 
                                 return true
  | 
      
      
         | 61 | 
          | 
          | 
                         }
  | 
      
      
         | 62 | 
          | 
          | 
                 }
  | 
      
      
         | 63 | 
          | 
          | 
                 return false
  | 
      
      
         | 64 | 
          | 
          | 
         }
  | 
      
      
         | 65 | 
          | 
          | 
          
  | 
      
      
         | 66 | 
          | 
          | 
         // IsControl reports whether the rune is a control character.
  | 
      
      
         | 67 | 
          | 
          | 
         // The C (Other) Unicode category includes more code points
  | 
      
      
         | 68 | 
          | 
          | 
         // such as surrogates; use Is(C, rune) to test for them.
  | 
      
      
         | 69 | 
          | 
          | 
         func IsControl(r rune) bool {
  | 
      
      
         | 70 | 
          | 
          | 
                 if uint32(r) <= MaxLatin1 {
  | 
      
      
         | 71 | 
          | 
          | 
                         return properties[uint8(r)]&pC != 0
  | 
      
      
         | 72 | 
          | 
          | 
                 }
  | 
      
      
         | 73 | 
          | 
          | 
                 // All control characters are < Latin1Max.
  | 
      
      
         | 74 | 
          | 
          | 
                 return false
  | 
      
      
         | 75 | 
          | 
          | 
         }
  | 
      
      
         | 76 | 
          | 
          | 
          
  | 
      
      
         | 77 | 
          | 
          | 
         // IsLetter reports whether the rune is a letter (category L).
  | 
      
      
         | 78 | 
          | 
          | 
         func IsLetter(r rune) bool {
  | 
      
      
         | 79 | 
          | 
          | 
                 if uint32(r) <= MaxLatin1 {
  | 
      
      
         | 80 | 
          | 
          | 
                         return properties[uint8(r)]&(pLu|pLl) != 0
  | 
      
      
         | 81 | 
          | 
          | 
                 }
  | 
      
      
         | 82 | 
          | 
          | 
                 return Is(Letter, r)
  | 
      
      
         | 83 | 
          | 
          | 
         }
  | 
      
      
         | 84 | 
          | 
          | 
          
  | 
      
      
         | 85 | 
          | 
          | 
         // IsMark reports whether the rune is a mark character (category M).
  | 
      
      
         | 86 | 
          | 
          | 
         func IsMark(r rune) bool {
  | 
      
      
         | 87 | 
          | 
          | 
                 // There are no mark characters in Latin-1.
  | 
      
      
         | 88 | 
          | 
          | 
                 return Is(Mark, r)
  | 
      
      
         | 89 | 
          | 
          | 
         }
  | 
      
      
         | 90 | 
          | 
          | 
          
  | 
      
      
         | 91 | 
          | 
          | 
         // IsNumber reports whether the rune is a number (category N).
  | 
      
      
         | 92 | 
          | 
          | 
         func IsNumber(r rune) bool {
  | 
      
      
         | 93 | 
          | 
          | 
                 if uint32(r) <= MaxLatin1 {
  | 
      
      
         | 94 | 
          | 
          | 
                         return properties[uint8(r)]&pN != 0
  | 
      
      
         | 95 | 
          | 
          | 
                 }
  | 
      
      
         | 96 | 
          | 
          | 
                 return Is(Number, r)
  | 
      
      
         | 97 | 
          | 
          | 
         }
  | 
      
      
         | 98 | 
          | 
          | 
          
  | 
      
      
         | 99 | 
          | 
          | 
         // IsPunct reports whether the rune is a Unicode punctuation character
  | 
      
      
         | 100 | 
          | 
          | 
         // (category P).
  | 
      
      
         | 101 | 
          | 
          | 
         func IsPunct(r rune) bool {
  | 
      
      
         | 102 | 
          | 
          | 
                 if uint32(r) <= MaxLatin1 {
  | 
      
      
         | 103 | 
          | 
          | 
                         return properties[uint8(r)]&pP != 0
  | 
      
      
         | 104 | 
          | 
          | 
                 }
  | 
      
      
         | 105 | 
          | 
          | 
                 return Is(Punct, r)
  | 
      
      
         | 106 | 
          | 
          | 
         }
  | 
      
      
         | 107 | 
          | 
          | 
          
  | 
      
      
         | 108 | 
          | 
          | 
         // IsSpace reports whether the rune is a space character as defined
  | 
      
      
         | 109 | 
          | 
          | 
         // by Unicode's White Space property; in the Latin-1 space
  | 
      
      
         | 110 | 
          | 
          | 
         // this is
  | 
      
      
         | 111 | 
          | 
          | 
         //      '\t', '\n', '\v', '\f', '\r', ' ', U+0085 (NEL), U+00A0 (NBSP).
  | 
      
      
         | 112 | 
          | 
          | 
         // Other definitions of spacing characters are set by category
  | 
      
      
         | 113 | 
          | 
          | 
         // Z and property Pattern_White_Space.
  | 
      
      
         | 114 | 
          | 
          | 
         func IsSpace(r rune) bool {
  | 
      
      
         | 115 | 
          | 
          | 
                 // This property isn't the same as Z; special-case it.
  | 
      
      
         | 116 | 
          | 
          | 
                 if uint32(r) <= MaxLatin1 {
  | 
      
      
         | 117 | 
          | 
          | 
                         switch r {
  | 
      
      
         | 118 | 
          | 
          | 
                         case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0:
  | 
      
      
         | 119 | 
          | 
          | 
                                 return true
  | 
      
      
         | 120 | 
          | 
          | 
                         }
  | 
      
      
         | 121 | 
          | 
          | 
                         return false
  | 
      
      
         | 122 | 
          | 
          | 
                 }
  | 
      
      
         | 123 | 
          | 
          | 
                 return Is(White_Space, r)
  | 
      
      
         | 124 | 
          | 
          | 
         }
  | 
      
      
         | 125 | 
          | 
          | 
          
  | 
      
      
         | 126 | 
          | 
          | 
         // IsSymbol reports whether the rune is a symbolic character.
  | 
      
      
         | 127 | 
          | 
          | 
         func IsSymbol(r rune) bool {
  | 
      
      
         | 128 | 
          | 
          | 
                 if uint32(r) <= MaxLatin1 {
  | 
      
      
         | 129 | 
          | 
          | 
                         return properties[uint8(r)]&pS != 0
  | 
      
      
         | 130 | 
          | 
          | 
                 }
  | 
      
      
         | 131 | 
          | 
          | 
                 return Is(Symbol, r)
  | 
      
      
         | 132 | 
          | 
          | 
         }
  |