OpenCores
URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [gcc-4.5.1/] [gcc/] [ada/] [s-utf_32.ads] - Blame information for rev 281

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 281 jeremybenn
------------------------------------------------------------------------------
2
--                                                                          --
3
--                         GNAT RUN-TIME COMPONENTS                         --
4
--                                                                          --
5
--                        S Y S T E M . U T F _ 3 2                         --
6
--                                                                          --
7
--                                 S p e c                                  --
8
--                                                                          --
9
--          Copyright (C) 2005-2009, Free Software Foundation, Inc.         --
10
--                                                                          --
11
-- GNAT is free software;  you can  redistribute it  and/or modify it under --
12
-- terms of the  GNU General Public License as published  by the Free Soft- --
13
-- ware  Foundation;  either version 3,  or (at your option) any later ver- --
14
-- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
15
-- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
16
-- or FITNESS FOR A PARTICULAR PURPOSE.                                     --
17
--                                                                          --
18
-- As a special exception under Section 7 of GPL version 3, you are granted --
19
-- additional permissions described in the GCC Runtime Library Exception,   --
20
-- version 3.1, as published by the Free Software Foundation.               --
21
--                                                                          --
22
-- You should have received a copy of the GNU General Public License and    --
23
-- a copy of the GCC Runtime Library Exception along with this program;     --
24
-- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    --
25
-- <http://www.gnu.org/licenses/>.                                          --
26
--                                                                          --
27
-- GNAT was originally developed  by the GNAT team at  New York University. --
28
-- Extensive contributions were provided by Ada Core Technologies Inc.      --
29
--                                                                          --
30
------------------------------------------------------------------------------
31
 
32
--  This package is an internal package that provides basic character
33
--  classification capabilities needed by the compiler for handling full
34
--  32-bit wide wide characters. We avoid the use of the actual type
35
--  Wide_Wide_Character, since we want to use these routines in the compiler
36
--  itself, and we want to be able to compile the compiler with old versions
37
--  of GNAT that did not implement Wide_Wide_Character.
38
 
39
--  System.UTF_32 should not be directly used from an application program, but
40
--  an equivalent package GNAT.UTF_32 can be used directly and provides exactly
41
--  the same services. The reason this package is in System is so that it can
42
--  with'ed by other packages in the Ada and System hierarchies.
43
 
44
pragma Compiler_Unit;
45
 
46
package System.UTF_32 is
47
 
48
   type UTF_32 is range 0 .. 16#7FFF_FFFF#;
49
   --  So far, the only defined character codes are in 0 .. 16#01_FFFF#
50
 
51
   --  The following type defines the categories from the unicode definitions.
52
   --  The one addition we make is Fe, which represents the characters FFFE
53
   --  and FFFF in any of the planes.
54
 
55
   type Category is (
56
     Cc,   --  Other, Control
57
     Cf,   --  Other, Format
58
     Cn,   --  Other, Not Assigned
59
     Co,   --  Other, Private Use
60
     Cs,   --  Other, Surrogate
61
     Ll,   --  Letter, Lowercase
62
     Lm,   --  Letter, Modifier
63
     Lo,   --  Letter, Other
64
     Lt,   --  Letter, Titlecase
65
     Lu,   --  Letter, Uppercase
66
     Mc,   --  Mark, Spacing Combining
67
     Me,   --  Mark, Enclosing
68
     Mn,   --  Mark, Nonspacing
69
     Nd,   --  Number, Decimal Digit
70
     Nl,   --  Number, Letter
71
     No,   --  Number, Other
72
     Pc,   --  Punctuation, Connector
73
     Pd,   --  Punctuation, Dash
74
     Pe,   --  Punctuation, Close
75
     Pf,   --  Punctuation, Final quote
76
     Pi,   --  Punctuation, Initial quote
77
     Po,   --  Punctuation, Other
78
     Ps,   --  Punctuation, Open
79
     Sc,   --  Symbol, Currency
80
     Sk,   --  Symbol, Modifier
81
     Sm,   --  Symbol, Math
82
     So,   --  Symbol, Other
83
     Zl,   --  Separator, Line
84
     Zp,   --  Separator, Paragraph
85
     Zs,   --  Separator, Space
86
     Fe);  --  relative position FFFE/FFFF in any plane
87
 
88
   function Get_Category (U : UTF_32) return Category;
89
   --  Given a UTF32 code, returns corresponding Category, or Cn if
90
   --  the code does not have an assigned unicode category.
91
 
92
   --  The following functions perform category tests corresponding to lexical
93
   --  classes defined in the Ada standard. There are two interfaces for each
94
   --  function. The second takes a Category (e.g. returned by Get_Category).
95
   --  The first takes a UTF_32 code. The form taking the UTF_32 code is
96
   --  typically more efficient than calling Get_Category, but if several
97
   --  different tests are to be performed on the same code, it is more
98
   --  efficient to use Get_Category to get the category, then test the
99
   --  resulting category.
100
 
101
   function Is_UTF_32_Letter (U : UTF_32)   return Boolean;
102
   function Is_UTF_32_Letter (C : Category) return Boolean;
103
   pragma Inline (Is_UTF_32_Letter);
104
   --  Returns true iff U is a letter that can be used to start an identifier,
105
   --  or if C is one of the corresponding categories, which are the following:
106
   --    Letter, Uppercase (Lu)
107
   --    Letter, Lowercase (Ll)
108
   --    Letter, Titlecase (Lt)
109
   --    Letter, Modifier  (Lm)
110
   --    Letter, Other     (Lo)
111
   --    Number, Letter    (Nl)
112
 
113
   function Is_UTF_32_Digit (U : UTF_32)   return Boolean;
114
   function Is_UTF_32_Digit (C : Category) return Boolean;
115
   pragma Inline (Is_UTF_32_Digit);
116
   --  Returns true iff U is a digit that can be used to extend an identifier,
117
   --  or if C is one of the corresponding categories, which are the following:
118
   --    Number, Decimal_Digit (Nd)
119
 
120
   function Is_UTF_32_Line_Terminator (U : UTF_32) return Boolean;
121
   pragma Inline (Is_UTF_32_Line_Terminator);
122
   --  Returns true iff U is an allowed line terminator for source programs,
123
   --  if U is in the category Zp (Separator, Paragraph), or Zs (Separator,
124
   --  Line), or if U is a conventional line terminator (CR, LF, VT, FF).
125
   --  There is no category version for this function, since the set of
126
   --  characters does not correspond to a set of Unicode categories.
127
 
128
   function Is_UTF_32_Mark (U : UTF_32)   return Boolean;
129
   function Is_UTF_32_Mark (C : Category) return Boolean;
130
   pragma Inline (Is_UTF_32_Mark);
131
   --  Returns true iff U is a mark character which can be used to extend an
132
   --  identifier, or if C is one of the corresponding categories, which are
133
   --  the following:
134
   --    Mark, Non-Spacing (Mn)
135
   --    Mark, Spacing Combining (Mc)
136
 
137
   function Is_UTF_32_Other (U : UTF_32)   return Boolean;
138
   function Is_UTF_32_Other (C : Category) return Boolean;
139
   pragma Inline (Is_UTF_32_Other);
140
   --  Returns true iff U is an other format character, which means that it
141
   --  can be used to extend an identifier, but is ignored for the purposes of
142
   --  matching of identifiers, or if C is one of the corresponding categories,
143
   --  which are the following:
144
   --    Other, Format (Cf)
145
 
146
   function Is_UTF_32_Punctuation (U : UTF_32)   return Boolean;
147
   function Is_UTF_32_Punctuation (C : Category) return Boolean;
148
   pragma Inline (Is_UTF_32_Punctuation);
149
   --  Returns true iff U is a punctuation character that can be used to
150
   --  separate pieces of an identifier, or if C is one of the corresponding
151
   --  categories, which are the following:
152
   --    Punctuation, Connector (Pc)
153
 
154
   function Is_UTF_32_Space (U : UTF_32)   return Boolean;
155
   function Is_UTF_32_Space (C : Category) return Boolean;
156
   pragma Inline (Is_UTF_32_Space);
157
   --  Returns true iff U is considered a space to be ignored, or if C is one
158
   --  of the corresponding categories, which are the following:
159
   --    Separator, Space (Zs)
160
 
161
   function Is_UTF_32_Non_Graphic (U : UTF_32)   return Boolean;
162
   function Is_UTF_32_Non_Graphic (C : Category) return Boolean;
163
   pragma Inline (Is_UTF_32_Non_Graphic);
164
   --  Returns true iff U is considered to be a non-graphic character, or if C
165
   --  is one of the corresponding categories, which are the following:
166
   --    Other, Control (Cc)
167
   --    Other, Private Use (Co)
168
   --    Other, Surrogate (Cs)
169
   --    Separator, Line (Zl)
170
   --    Separator, Paragraph (Zp)
171
   --    FFFE or FFFF positions in any plane (Fe)
172
   --
173
   --  Note that the Ada category format effector is subsumed by the above
174
   --  list of Unicode categories.
175
   --
176
   --  Note that Other, Unassigned (Cn) is quite deliberately not included
177
   --  in the list of categories above. This means that should any of these
178
   --  code positions be defined in future with graphic characters they will
179
   --  be allowed without a need to change implementations or the standard.
180
   --
181
   --  Note that Other, Format (Cf) is also quite deliberately not included
182
   --  in the list of categories above. This means that these characters can
183
   --  be included in character and string literals.
184
 
185
   --  The following function is used to fold to upper case, as required by
186
   --  the Ada 2005 standard rules for identifier case folding. Two
187
   --  identifiers are equivalent if they are identical after folding all
188
   --  letters to upper case using this routine.
189
 
190
   function UTF_32_To_Upper_Case (U : UTF_32) return UTF_32;
191
   pragma Inline (UTF_32_To_Upper_Case);
192
   --  If U represents a lower case letter, returns the corresponding upper
193
   --  case letter, otherwise U is returned unchanged. The folding is locale
194
   --  independent as defined by documents referenced in the note in section
195
   --  1 of ISO/IEC 10646:2003
196
 
197
end System.UTF_32;

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.