OpenCores
URL https://opencores.org/ocsvn/scarts/scarts/trunk

Subversion Repositories scarts

[/] [scarts/] [trunk/] [toolchain/] [scarts-gcc/] [gcc-4.1.1/] [gcc/] [ada/] [s-wchcnv.adb] - Blame information for rev 12

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 12 jlechner
------------------------------------------------------------------------------
2
--                                                                          --
3
--                         GNAT RUN-TIME COMPONENTS                         --
4
--                                                                          --
5
--                       S Y S T E M . W C H _ C N V                        --
6
--                                                                          --
7
--                                 B o d y                                  --
8
--                                                                          --
9
--          Copyright (C) 1992-2005, Free Software Foundation, Inc.         --
10
--                                                                          --
11
-- GNAT is free software;  you can  redistribute it  and/or modify it under --
12
-- terms of the  GNU General Public License as published  by the Free Soft- --
13
-- ware  Foundation;  either version 2,  or (at your option) any later ver- --
14
-- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
15
-- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
16
-- or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License --
17
-- for  more details.  You should have  received  a copy of the GNU General --
18
-- Public License  distributed with GNAT;  see file COPYING.  If not, write --
19
-- to  the  Free Software Foundation,  51  Franklin  Street,  Fifth  Floor, --
20
-- Boston, MA 02110-1301, USA.                                              --
21
--                                                                          --
22
-- As a special exception,  if other files  instantiate  generics from this --
23
-- unit, or you link  this unit with other files  to produce an executable, --
24
-- this  unit  does not  by itself cause  the resulting  executable  to  be --
25
-- covered  by the  GNU  General  Public  License.  This exception does not --
26
-- however invalidate  any other reasons why  the executable file  might be --
27
-- covered by the  GNU Public License.                                      --
28
--                                                                          --
29
-- GNAT was originally developed  by the GNAT team at  New York University. --
30
-- Extensive contributions were provided by Ada Core Technologies Inc.      --
31
--                                                                          --
32
------------------------------------------------------------------------------
33
 
34
--  This package contains generic subprograms used for converting between
35
--  sequences of Character and Wide_Character. All access to wide character
36
--  sequences is isolated in this unit.
37
 
38
with Interfaces;     use Interfaces;
39
with System.WCh_Con; use System.WCh_Con;
40
with System.WCh_JIS; use System.WCh_JIS;
41
 
42
package body System.WCh_Cnv is
43
 
44
   -----------------------------
45
   -- Char_Sequence_To_UTF_32 --
46
   -----------------------------
47
 
48
   function Char_Sequence_To_UTF_32
49
     (C       : Character;
50
      EM      : WC_Encoding_Method) return UTF_32_Code
51
   is
52
      B1 : Unsigned_32;
53
      C1 : Character;
54
      U  : Unsigned_32;
55
      W  : Unsigned_32;
56
 
57
      procedure Get_Hex (N : Character);
58
      --  If N is a hex character, then set B1 to 16 * B1 + character N.
59
      --  Raise Constraint_Error if character N is not a hex character.
60
 
61
      procedure Get_UTF_Byte;
62
      pragma Inline (Get_UTF_Byte);
63
      --  Used to interpret a 2#10xxxxxx# continuation byte in UTF-8 mode.
64
      --  Reads a byte, and raises CE if the first two bits are not 10.
65
      --  Otherwise shifts W 6 bits left and or's in the 6 xxxxxx bits.
66
 
67
      -------------
68
      -- Get_Hex --
69
      -------------
70
 
71
      procedure Get_Hex (N : Character) is
72
         B2 : constant Unsigned_32 := Character'Pos (N);
73
      begin
74
         if B2 in Character'Pos ('0') .. Character'Pos ('9') then
75
            B1 := B1 * 16 + B2 - Character'Pos ('0');
76
         elsif B2 in Character'Pos ('A') .. Character'Pos ('F') then
77
            B1 := B1 * 16 + B2 - (Character'Pos ('A') - 10);
78
         elsif B2 in Character'Pos ('a') .. Character'Pos ('f') then
79
            B1 := B1 * 16 + B2 - (Character'Pos ('a') - 10);
80
         else
81
            raise Constraint_Error;
82
         end if;
83
      end Get_Hex;
84
 
85
      ------------------
86
      -- Get_UTF_Byte --
87
      ------------------
88
 
89
      procedure Get_UTF_Byte is
90
      begin
91
         U := Unsigned_32 (Character'Pos (In_Char));
92
 
93
         if (U and 2#11000000#) /= 2#10_000000# then
94
            raise Constraint_Error;
95
         end if;
96
 
97
         W := Shift_Left (W, 6)  or (U and 2#00111111#);
98
      end Get_UTF_Byte;
99
 
100
   --  Start of processing for Char_Sequence_To_Wide
101
 
102
   begin
103
      case EM is
104
 
105
         when WCEM_Hex =>
106
            if C /= ASCII.ESC then
107
               return Character'Pos (C);
108
 
109
            else
110
               B1 := 0;
111
               Get_Hex (In_Char);
112
               Get_Hex (In_Char);
113
               Get_Hex (In_Char);
114
               Get_Hex (In_Char);
115
 
116
               return UTF_32_Code (B1);
117
            end if;
118
 
119
         when WCEM_Upper =>
120
            if C > ASCII.DEL then
121
               return 256 * Character'Pos (C) + Character'Pos (In_Char);
122
            else
123
               return Character'Pos (C);
124
            end if;
125
 
126
         when WCEM_Shift_JIS =>
127
            if C > ASCII.DEL then
128
               return Wide_Character'Pos (Shift_JIS_To_JIS (C, In_Char));
129
            else
130
               return Character'Pos (C);
131
            end if;
132
 
133
         when WCEM_EUC =>
134
            if C > ASCII.DEL then
135
               return Wide_Character'Pos (EUC_To_JIS (C, In_Char));
136
            else
137
               return Character'Pos (C);
138
            end if;
139
 
140
         when WCEM_UTF8 =>
141
 
142
            --  Note: for details of UTF8 encoding see RFC 3629
143
 
144
            U := Unsigned_32 (Character'Pos (C));
145
 
146
            --  16#00_0000#-16#00_007F#: 0xxxxxxx
147
 
148
            if (U and 2#10000000#) = 2#00000000# then
149
               return Character'Pos (C);
150
 
151
            --  16#00_0080#-16#00_07FF#: 110xxxxx 10xxxxxx
152
 
153
            elsif (U and 2#11100000#) = 2#110_00000# then
154
               W := Shift_Left (U and 2#00011111#, 6);
155
               U := Unsigned_32 (Character'Pos (In_Char));
156
 
157
               if (U and 2#11000000#) /= 2#10_000000# then
158
                  raise Constraint_Error;
159
               end if;
160
 
161
               W := W or (U and 2#00111111#);
162
 
163
               return UTF_32_Code (W);
164
 
165
            --  16#00_0800#-16#00_ffff#: 1110xxxx 10xxxxxx 10xxxxxx
166
 
167
            elsif (U and 2#11110000#) = 2#1110_0000# then
168
               W := U and 2#00001111#;
169
               Get_UTF_Byte;
170
               Get_UTF_Byte;
171
               return UTF_32_Code (W);
172
 
173
            --  16#01_0000#-16#10_FFFF#: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
174
 
175
            elsif (U and 2#11111000#) = 2#11110_000# then
176
               W := U and 2#00000111#;
177
 
178
               for K in 1 .. 3 loop
179
                  Get_UTF_Byte;
180
               end loop;
181
 
182
               return UTF_32_Code (W);
183
 
184
            --  16#0020_0000#-16#03FF_FFFF#: 111110xx 10xxxxxx 10xxxxxx
185
            --                               10xxxxxx 10xxxxxx
186
 
187
            elsif (U and 2#11111100#) = 2#111110_00# then
188
               W := U and 2#00000011#;
189
 
190
               for K in 1 .. 4 loop
191
                  Get_UTF_Byte;
192
               end loop;
193
 
194
               return UTF_32_Code (W);
195
 
196
            --  16#0400_0000#-16#7FFF_FFFF#: 1111110x 10xxxxxx 10xxxxxx
197
            --                               10xxxxxx 10xxxxxx 10xxxxxx
198
 
199
            elsif (U and 2#11111110#) = 2#1111110_0# then
200
               W := U and 2#00000001#;
201
 
202
               for K in 1 .. 5 loop
203
                  Get_UTF_Byte;
204
               end loop;
205
 
206
               return UTF_32_Code (W);
207
 
208
            else
209
               raise Constraint_Error;
210
            end if;
211
 
212
         when WCEM_Brackets =>
213
 
214
            if C /= '[' then
215
               return Character'Pos (C);
216
            end if;
217
 
218
            if In_Char /= '"' then
219
               raise Constraint_Error;
220
            end if;
221
 
222
            B1 := 0;
223
            Get_Hex (In_Char);
224
            Get_Hex (In_Char);
225
 
226
            C1 := In_Char;
227
 
228
            if C1 /= '"' then
229
               Get_Hex (C1);
230
               Get_Hex (In_Char);
231
 
232
               C1 := In_Char;
233
 
234
               if C1 /= '"' then
235
                  Get_Hex (C1);
236
                  Get_Hex (In_Char);
237
 
238
                  C1 := In_Char;
239
 
240
                  if C1 /= '"' then
241
                     Get_Hex (C1);
242
                     Get_Hex (In_Char);
243
 
244
                     if B1 > Unsigned_32 (UTF_32_Code'Last) then
245
                        raise Constraint_Error;
246
                     end if;
247
 
248
                     if In_Char /= '"' then
249
                        raise Constraint_Error;
250
                     end if;
251
                  end if;
252
               end if;
253
            end if;
254
 
255
            if In_Char /= ']' then
256
               raise Constraint_Error;
257
            end if;
258
 
259
            return UTF_32_Code (B1);
260
 
261
      end case;
262
   end Char_Sequence_To_UTF_32;
263
 
264
   --------------------------------
265
   -- Char_Sequence_To_Wide_Char --
266
   --------------------------------
267
 
268
   function Char_Sequence_To_Wide_Char
269
     (C  : Character;
270
      EM : System.WCh_Con.WC_Encoding_Method) return Wide_Character
271
   is
272
      function Char_Sequence_To_UTF is new Char_Sequence_To_UTF_32 (In_Char);
273
 
274
      U : constant UTF_32_Code := Char_Sequence_To_UTF (C, EM);
275
 
276
   begin
277
      if U > 16#FFFF# then
278
         raise Constraint_Error;
279
      else
280
         return Wide_Character'Val (U);
281
      end if;
282
   end Char_Sequence_To_Wide_Char;
283
 
284
   -----------------------------
285
   -- UTF_32_To_Char_Sequence --
286
   -----------------------------
287
 
288
   procedure UTF_32_To_Char_Sequence
289
     (Val : UTF_32_Code;
290
      EM  : System.WCh_Con.WC_Encoding_Method)
291
   is
292
      Hexc : constant array (UTF_32_Code range 0 .. 15) of Character :=
293
               "0123456789ABCDEF";
294
 
295
      C1, C2 : Character;
296
      U      : Unsigned_32;
297
 
298
   begin
299
      case EM is
300
 
301
         when WCEM_Hex =>
302
            if Val < 256 then
303
               Out_Char (Character'Val (Val));
304
            elsif Val <= 16#FFFF# then
305
               Out_Char (ASCII.ESC);
306
               Out_Char (Hexc (Val / (16**3)));
307
               Out_Char (Hexc ((Val / (16**2)) mod 16));
308
               Out_Char (Hexc ((Val / 16) mod 16));
309
               Out_Char (Hexc (Val mod 16));
310
            else
311
               raise Constraint_Error;
312
            end if;
313
 
314
         when WCEM_Upper =>
315
            if Val < 128 then
316
               Out_Char (Character'Val (Val));
317
            elsif Val < 16#8000# or else Val > 16#FFFF# then
318
               raise Constraint_Error;
319
            else
320
               Out_Char (Character'Val (Val / 256));
321
               Out_Char (Character'Val (Val mod 256));
322
            end if;
323
 
324
         when WCEM_Shift_JIS =>
325
            if Val < 128 then
326
               Out_Char (Character'Val (Val));
327
            elsif Val <= 16#FFFF# then
328
               JIS_To_Shift_JIS (Wide_Character'Val (Val), C1, C2);
329
               Out_Char (C1);
330
               Out_Char (C2);
331
            else
332
               raise Constraint_Error;
333
            end if;
334
 
335
         when WCEM_EUC =>
336
            if Val < 128 then
337
               Out_Char (Character'Val (Val));
338
            elsif Val <= 16#FFFF# then
339
               JIS_To_EUC (Wide_Character'Val (Val), C1, C2);
340
               Out_Char (C1);
341
               Out_Char (C2);
342
            else
343
               raise Constraint_Error;
344
            end if;
345
 
346
         when WCEM_UTF8 =>
347
 
348
            --  Note: for details of UTF8 encoding see RFC 3629
349
 
350
            U := Unsigned_32 (Val);
351
 
352
            --  16#00_0000#-16#00_007F#: 0xxxxxxx
353
 
354
            if U <= 16#00_007F# then
355
               Out_Char (Character'Val (U));
356
 
357
            --  16#00_0080#-16#00_07FF#: 110xxxxx 10xxxxxx
358
 
359
            elsif U <= 16#00_07FF# then
360
               Out_Char (Character'Val (2#11000000# or Shift_Right (U, 6)));
361
               Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));
362
 
363
            --  16#00_0800#-16#00_FFFF#: 1110xxxx 10xxxxxx 10xxxxxx
364
 
365
            elsif U <= 16#00_FFFF# then
366
               Out_Char (Character'Val (2#11100000# or Shift_Right (U, 12)));
367
               Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6)
368
                                                          and 2#00111111#)));
369
               Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));
370
 
371
            --  16#01_0000#-16#10_FFFF#: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
372
 
373
            elsif U <= 16#10_FFFF# then
374
               Out_Char (Character'Val (2#11110000# or Shift_Right (U, 18)));
375
               Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 12)
376
                                                          and 2#00111111#)));
377
               Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6)
378
                                                          and 2#00111111#)));
379
               Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));
380
 
381
            --  16#0020_0000#-16#03FF_FFFF#: 111110xx 10xxxxxx 10xxxxxx
382
            --                               10xxxxxx 10xxxxxx
383
 
384
            elsif U <= 16#03FF_FFFF# then
385
               Out_Char (Character'Val (2#11111000# or Shift_Right (U, 24)));
386
               Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 18)
387
                                                          and 2#00111111#)));
388
               Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 12)
389
                                                          and 2#00111111#)));
390
               Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6)
391
                                                          and 2#00111111#)));
392
               Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));
393
 
394
            --  16#0400_0000#-16#7FFF_FFFF#: 1111110x 10xxxxxx 10xxxxxx
395
            --                               10xxxxxx 10xxxxxx 10xxxxxx
396
 
397
            elsif U <= 16#7FFF_FFFF# then
398
               Out_Char (Character'Val (2#11111100# or Shift_Right (U, 30)));
399
               Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 24)
400
                                                          and 2#00111111#)));
401
               Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 18)
402
                                                          and 2#00111111#)));
403
               Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 12)
404
                                                          and 2#00111111#)));
405
               Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6)
406
                                                          and 2#00111111#)));
407
               Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));
408
 
409
            else
410
               raise Constraint_Error;
411
            end if;
412
 
413
         when WCEM_Brackets =>
414
            if Val < 256 then
415
               Out_Char (Character'Val (Val));
416
 
417
            else
418
               Out_Char ('[');
419
               Out_Char ('"');
420
 
421
               if Val > 16#FFFF# then
422
                  if Val > 16#00FF_FFFF# then
423
                     if Val > 16#7FFF_FFFF# then
424
                        raise Constraint_Error;
425
                     end if;
426
 
427
                     Out_Char (Hexc (Val / 16 ** 7));
428
                     Out_Char (Hexc ((Val / 16 ** 6) mod 16));
429
                  end if;
430
 
431
                  Out_Char (Hexc ((Val / 16 ** 5) mod 16));
432
                  Out_Char (Hexc ((Val / 16 ** 4) mod 16));
433
               end if;
434
 
435
               Out_Char (Hexc ((Val / 16 ** 3) mod 16));
436
               Out_Char (Hexc ((Val / 16 ** 2) mod 16));
437
               Out_Char (Hexc ((Val / 16) mod 16));
438
               Out_Char (Hexc (Val mod 16));
439
 
440
               Out_Char ('"');
441
               Out_Char (']');
442
            end if;
443
      end case;
444
   end UTF_32_To_Char_Sequence;
445
 
446
   --------------------------------
447
   -- Wide_Char_To_Char_Sequence --
448
   --------------------------------
449
 
450
   procedure Wide_Char_To_Char_Sequence
451
     (WC : Wide_Character;
452
      EM : System.WCh_Con.WC_Encoding_Method)
453
   is
454
      procedure UTF_To_Char_Sequence is new UTF_32_To_Char_Sequence (Out_Char);
455
   begin
456
      UTF_To_Char_Sequence (Wide_Character'Pos (WC), EM);
457
   end Wide_Char_To_Char_Sequence;
458
 
459
end System.WCh_Cnv;

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.