OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [gcc/] [ada/] [s-wchcnv.adb] - Blame information for rev 749

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 706 jeremybenn
------------------------------------------------------------------------------
2
--                                                                          --
3
--                         GNAT RUN-TIME COMPONENTS                         --
4
--                                                                          --
5
--                       S Y S T E M . W C H _ C N V                        --
6
--                                                                          --
7
--                                 B o d y                                  --
8
--                                                                          --
9
--          Copyright (C) 1992-2009, Free Software Foundation, Inc.         --
10
--                                                                          --
11
-- GNAT is free software;  you can  redistribute it  and/or modify it under --
12
-- terms of the  GNU General Public License as published  by the Free Soft- --
13
-- ware  Foundation;  either version 3,  or (at your option) any later ver- --
14
-- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
15
-- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
16
-- or FITNESS FOR A PARTICULAR PURPOSE.                                     --
17
--                                                                          --
18
-- As a special exception under Section 7 of GPL version 3, you are granted --
19
-- additional permissions described in the GCC Runtime Library Exception,   --
20
-- version 3.1, as published by the Free Software Foundation.               --
21
--                                                                          --
22
-- You should have received a copy of the GNU General Public License and    --
23
-- a copy of the GCC Runtime Library Exception along with this program;     --
24
-- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    --
25
-- <http://www.gnu.org/licenses/>.                                          --
26
--                                                                          --
27
-- GNAT was originally developed  by the GNAT team at  New York University. --
28
-- Extensive contributions were provided by Ada Core Technologies Inc.      --
29
--                                                                          --
30
------------------------------------------------------------------------------
31
 
32
pragma Compiler_Unit;
33
 
34
with Interfaces;     use Interfaces;
35
with System.WCh_Con; use System.WCh_Con;
36
with System.WCh_JIS; use System.WCh_JIS;
37
 
38
package body System.WCh_Cnv is
39
 
40
   -----------------------------
41
   -- Char_Sequence_To_UTF_32 --
42
   -----------------------------
43
 
44
   function Char_Sequence_To_UTF_32
45
     (C  : Character;
46
      EM : System.WCh_Con.WC_Encoding_Method) return UTF_32_Code
47
   is
48
      B1 : Unsigned_32;
49
      C1 : Character;
50
      U  : Unsigned_32;
51
      W  : Unsigned_32;
52
 
53
      procedure Get_Hex (N : Character);
54
      --  If N is a hex character, then set B1 to 16 * B1 + character N.
55
      --  Raise Constraint_Error if character N is not a hex character.
56
 
57
      procedure Get_UTF_Byte;
58
      pragma Inline (Get_UTF_Byte);
59
      --  Used to interpret a 2#10xxxxxx# continuation byte in UTF-8 mode.
60
      --  Reads a byte, and raises CE if the first two bits are not 10.
61
      --  Otherwise shifts W 6 bits left and or's in the 6 xxxxxx bits.
62
 
63
      -------------
64
      -- Get_Hex --
65
      -------------
66
 
67
      procedure Get_Hex (N : Character) is
68
         B2 : constant Unsigned_32 := Character'Pos (N);
69
      begin
70
         if B2 in Character'Pos ('0') .. Character'Pos ('9') then
71
            B1 := B1 * 16 + B2 - Character'Pos ('0');
72
         elsif B2 in Character'Pos ('A') .. Character'Pos ('F') then
73
            B1 := B1 * 16 + B2 - (Character'Pos ('A') - 10);
74
         elsif B2 in Character'Pos ('a') .. Character'Pos ('f') then
75
            B1 := B1 * 16 + B2 - (Character'Pos ('a') - 10);
76
         else
77
            raise Constraint_Error;
78
         end if;
79
      end Get_Hex;
80
 
81
      ------------------
82
      -- Get_UTF_Byte --
83
      ------------------
84
 
85
      procedure Get_UTF_Byte is
86
      begin
87
         U := Unsigned_32 (Character'Pos (In_Char));
88
 
89
         if (U and 2#11000000#) /= 2#10_000000# then
90
            raise Constraint_Error;
91
         end if;
92
 
93
         W := Shift_Left (W, 6) or (U and 2#00111111#);
94
      end Get_UTF_Byte;
95
 
96
   --  Start of processing for Char_Sequence_To_Wide
97
 
98
   begin
99
      case EM is
100
 
101
         when WCEM_Hex =>
102
            if C /= ASCII.ESC then
103
               return Character'Pos (C);
104
 
105
            else
106
               B1 := 0;
107
               Get_Hex (In_Char);
108
               Get_Hex (In_Char);
109
               Get_Hex (In_Char);
110
               Get_Hex (In_Char);
111
 
112
               return UTF_32_Code (B1);
113
            end if;
114
 
115
         when WCEM_Upper =>
116
            if C > ASCII.DEL then
117
               return 256 * Character'Pos (C) + Character'Pos (In_Char);
118
            else
119
               return Character'Pos (C);
120
            end if;
121
 
122
         when WCEM_Shift_JIS =>
123
            if C > ASCII.DEL then
124
               return Wide_Character'Pos (Shift_JIS_To_JIS (C, In_Char));
125
            else
126
               return Character'Pos (C);
127
            end if;
128
 
129
         when WCEM_EUC =>
130
            if C > ASCII.DEL then
131
               return Wide_Character'Pos (EUC_To_JIS (C, In_Char));
132
            else
133
               return Character'Pos (C);
134
            end if;
135
 
136
         when WCEM_UTF8 =>
137
 
138
            --  Note: for details of UTF8 encoding see RFC 3629
139
 
140
            U := Unsigned_32 (Character'Pos (C));
141
 
142
            --  16#00_0000#-16#00_007F#: 0xxxxxxx
143
 
144
            if (U and 2#10000000#) = 2#00000000# then
145
               return Character'Pos (C);
146
 
147
            --  16#00_0080#-16#00_07FF#: 110xxxxx 10xxxxxx
148
 
149
            elsif (U and 2#11100000#) = 2#110_00000# then
150
               W := U and 2#00011111#;
151
               Get_UTF_Byte;
152
               return UTF_32_Code (W);
153
 
154
            --  16#00_0800#-16#00_ffff#: 1110xxxx 10xxxxxx 10xxxxxx
155
 
156
            elsif (U and 2#11110000#) = 2#1110_0000# then
157
               W := U and 2#00001111#;
158
               Get_UTF_Byte;
159
               Get_UTF_Byte;
160
               return UTF_32_Code (W);
161
 
162
            --  16#01_0000#-16#10_FFFF#: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
163
 
164
            elsif (U and 2#11111000#) = 2#11110_000# then
165
               W := U and 2#00000111#;
166
 
167
               for K in 1 .. 3 loop
168
                  Get_UTF_Byte;
169
               end loop;
170
 
171
               return UTF_32_Code (W);
172
 
173
            --  16#0020_0000#-16#03FF_FFFF#: 111110xx 10xxxxxx 10xxxxxx
174
            --                               10xxxxxx 10xxxxxx
175
 
176
            elsif (U and 2#11111100#) = 2#111110_00# then
177
               W := U and 2#00000011#;
178
 
179
               for K in 1 .. 4 loop
180
                  Get_UTF_Byte;
181
               end loop;
182
 
183
               return UTF_32_Code (W);
184
 
185
            --  16#0400_0000#-16#7FFF_FFFF#: 1111110x 10xxxxxx 10xxxxxx
186
            --                               10xxxxxx 10xxxxxx 10xxxxxx
187
 
188
            elsif (U and 2#11111110#) = 2#1111110_0# then
189
               W := U and 2#00000001#;
190
 
191
               for K in 1 .. 5 loop
192
                  Get_UTF_Byte;
193
               end loop;
194
 
195
               return UTF_32_Code (W);
196
 
197
            else
198
               raise Constraint_Error;
199
            end if;
200
 
201
         when WCEM_Brackets =>
202
            if C /= '[' then
203
               return Character'Pos (C);
204
            end if;
205
 
206
            if In_Char /= '"' then
207
               raise Constraint_Error;
208
            end if;
209
 
210
            B1 := 0;
211
            Get_Hex (In_Char);
212
            Get_Hex (In_Char);
213
 
214
            C1 := In_Char;
215
 
216
            if C1 /= '"' then
217
               Get_Hex (C1);
218
               Get_Hex (In_Char);
219
 
220
               C1 := In_Char;
221
 
222
               if C1 /= '"' then
223
                  Get_Hex (C1);
224
                  Get_Hex (In_Char);
225
 
226
                  C1 := In_Char;
227
 
228
                  if C1 /= '"' then
229
                     Get_Hex (C1);
230
                     Get_Hex (In_Char);
231
 
232
                     if B1 > Unsigned_32 (UTF_32_Code'Last) then
233
                        raise Constraint_Error;
234
                     end if;
235
 
236
                     if In_Char /= '"' then
237
                        raise Constraint_Error;
238
                     end if;
239
                  end if;
240
               end if;
241
            end if;
242
 
243
            if In_Char /= ']' then
244
               raise Constraint_Error;
245
            end if;
246
 
247
            return UTF_32_Code (B1);
248
 
249
      end case;
250
   end Char_Sequence_To_UTF_32;
251
 
252
   --------------------------------
253
   -- Char_Sequence_To_Wide_Char --
254
   --------------------------------
255
 
256
   function Char_Sequence_To_Wide_Char
257
     (C  : Character;
258
      EM : System.WCh_Con.WC_Encoding_Method) return Wide_Character
259
   is
260
      function Char_Sequence_To_UTF is new Char_Sequence_To_UTF_32 (In_Char);
261
 
262
      U : constant UTF_32_Code := Char_Sequence_To_UTF (C, EM);
263
 
264
   begin
265
      if U > 16#FFFF# then
266
         raise Constraint_Error;
267
      else
268
         return Wide_Character'Val (U);
269
      end if;
270
   end Char_Sequence_To_Wide_Char;
271
 
272
   -----------------------------
273
   -- UTF_32_To_Char_Sequence --
274
   -----------------------------
275
 
276
   procedure UTF_32_To_Char_Sequence
277
     (Val : UTF_32_Code;
278
      EM  : System.WCh_Con.WC_Encoding_Method)
279
   is
280
      Hexc : constant array (UTF_32_Code range 0 .. 15) of Character :=
281
               "0123456789ABCDEF";
282
 
283
      C1, C2 : Character;
284
      U      : Unsigned_32;
285
 
286
   begin
287
      --  Raise CE for invalid UTF_32_Code
288
 
289
      if not Val'Valid then
290
         raise Constraint_Error;
291
      end if;
292
 
293
      --  Processing depends on encoding mode
294
 
295
      case EM is
296
 
297
         when WCEM_Hex =>
298
            if Val < 256 then
299
               Out_Char (Character'Val (Val));
300
            elsif Val <= 16#FFFF# then
301
               Out_Char (ASCII.ESC);
302
               Out_Char (Hexc (Val / (16**3)));
303
               Out_Char (Hexc ((Val / (16**2)) mod 16));
304
               Out_Char (Hexc ((Val / 16) mod 16));
305
               Out_Char (Hexc (Val mod 16));
306
            else
307
               raise Constraint_Error;
308
            end if;
309
 
310
         when WCEM_Upper =>
311
            if Val < 128 then
312
               Out_Char (Character'Val (Val));
313
            elsif Val < 16#8000# or else Val > 16#FFFF# then
314
               raise Constraint_Error;
315
            else
316
               Out_Char (Character'Val (Val / 256));
317
               Out_Char (Character'Val (Val mod 256));
318
            end if;
319
 
320
         when WCEM_Shift_JIS =>
321
            if Val < 128 then
322
               Out_Char (Character'Val (Val));
323
            elsif Val <= 16#FFFF# then
324
               JIS_To_Shift_JIS (Wide_Character'Val (Val), C1, C2);
325
               Out_Char (C1);
326
               Out_Char (C2);
327
            else
328
               raise Constraint_Error;
329
            end if;
330
 
331
         when WCEM_EUC =>
332
            if Val < 128 then
333
               Out_Char (Character'Val (Val));
334
            elsif Val <= 16#FFFF# then
335
               JIS_To_EUC (Wide_Character'Val (Val), C1, C2);
336
               Out_Char (C1);
337
               Out_Char (C2);
338
            else
339
               raise Constraint_Error;
340
            end if;
341
 
342
         when WCEM_UTF8 =>
343
 
344
            --  Note: for details of UTF8 encoding see RFC 3629
345
 
346
            U := Unsigned_32 (Val);
347
 
348
            --  16#00_0000#-16#00_007F#: 0xxxxxxx
349
 
350
            if U <= 16#00_007F# then
351
               Out_Char (Character'Val (U));
352
 
353
            --  16#00_0080#-16#00_07FF#: 110xxxxx 10xxxxxx
354
 
355
            elsif U <= 16#00_07FF# then
356
               Out_Char (Character'Val (2#11000000# or Shift_Right (U, 6)));
357
               Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));
358
 
359
            --  16#00_0800#-16#00_FFFF#: 1110xxxx 10xxxxxx 10xxxxxx
360
 
361
            elsif U <= 16#00_FFFF# then
362
               Out_Char (Character'Val (2#11100000# or Shift_Right (U, 12)));
363
               Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6)
364
                                                          and 2#00111111#)));
365
               Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));
366
 
367
            --  16#01_0000#-16#10_FFFF#: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
368
 
369
            elsif U <= 16#10_FFFF# then
370
               Out_Char (Character'Val (2#11110000# or Shift_Right (U, 18)));
371
               Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 12)
372
                                                          and 2#00111111#)));
373
               Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6)
374
                                                          and 2#00111111#)));
375
               Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));
376
 
377
            --  16#0020_0000#-16#03FF_FFFF#: 111110xx 10xxxxxx 10xxxxxx
378
            --                               10xxxxxx 10xxxxxx
379
 
380
            elsif U <= 16#03FF_FFFF# then
381
               Out_Char (Character'Val (2#11111000# or Shift_Right (U, 24)));
382
               Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 18)
383
                                                          and 2#00111111#)));
384
               Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 12)
385
                                                          and 2#00111111#)));
386
               Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6)
387
                                                          and 2#00111111#)));
388
               Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));
389
 
390
            --  16#0400_0000#-16#7FFF_FFFF#: 1111110x 10xxxxxx 10xxxxxx
391
            --                               10xxxxxx 10xxxxxx 10xxxxxx
392
 
393
            elsif U <= 16#7FFF_FFFF# then
394
               Out_Char (Character'Val (2#11111100# or Shift_Right (U, 30)));
395
               Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 24)
396
                                                          and 2#00111111#)));
397
               Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 18)
398
                                                          and 2#00111111#)));
399
               Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 12)
400
                                                          and 2#00111111#)));
401
               Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6)
402
                                                          and 2#00111111#)));
403
               Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));
404
 
405
            else
406
               raise Constraint_Error;
407
            end if;
408
 
409
         when WCEM_Brackets =>
410
 
411
            --  Values in the range 0-255 are directly output. Note that there
412
            --  is some issue with [ (16#5B#] since this will cause confusion
413
            --  if the resulting string is interpreted using brackets encoding.
414
 
415
            --  One possibility would be to always output [ as ["5B"] but in
416
            --  practice this is undesirable, since for example normal use of
417
            --  Wide_Text_IO for output (much more common than input), really
418
            --  does want to be able to say something like
419
 
420
            --     Put_Line ("Start of output [first run]");
421
 
422
            --  and have it come out as intended, rather than contaminated by
423
            --  a ["5B"] sequence in place of the left bracket.
424
 
425
            if Val < 256 then
426
               Out_Char (Character'Val (Val));
427
 
428
            --  Otherwise use brackets notation for vales greater than 255
429
 
430
            else
431
               Out_Char ('[');
432
               Out_Char ('"');
433
 
434
               if Val > 16#FFFF# then
435
                  if Val > 16#00FF_FFFF# then
436
                     Out_Char (Hexc (Val / 16 ** 7));
437
                     Out_Char (Hexc ((Val / 16 ** 6) mod 16));
438
                  end if;
439
 
440
                  Out_Char (Hexc ((Val / 16 ** 5) mod 16));
441
                  Out_Char (Hexc ((Val / 16 ** 4) mod 16));
442
               end if;
443
 
444
               Out_Char (Hexc ((Val / 16 ** 3) mod 16));
445
               Out_Char (Hexc ((Val / 16 ** 2) mod 16));
446
               Out_Char (Hexc ((Val / 16) mod 16));
447
               Out_Char (Hexc (Val mod 16));
448
 
449
               Out_Char ('"');
450
               Out_Char (']');
451
            end if;
452
      end case;
453
   end UTF_32_To_Char_Sequence;
454
 
455
   --------------------------------
456
   -- Wide_Char_To_Char_Sequence --
457
   --------------------------------
458
 
459
   procedure Wide_Char_To_Char_Sequence
460
     (WC : Wide_Character;
461
      EM : System.WCh_Con.WC_Encoding_Method)
462
   is
463
      procedure UTF_To_Char_Sequence is new UTF_32_To_Char_Sequence (Out_Char);
464
   begin
465
      UTF_To_Char_Sequence (Wide_Character'Pos (WC), EM);
466
   end Wide_Char_To_Char_Sequence;
467
 
468
end System.WCh_Cnv;

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.