OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-old/] [newlib-1.17.0/] [newlib/] [libc/] [iconv/] [ces/] [utf-8.c] - Blame information for rev 148

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 148 jeremybenn
/*
2
 * Copyright (c) 2003-2004, Artem B. Bityuckiy
3
 * Copyright (c) 1999,2000, Konstantin Chuguev. All rights reserved.
4
 *
5
 * Redistribution and use in source and binary forms, with or without
6
 * modification, are permitted provided that the following conditions
7
 * are met:
8
 * 1. Redistributions of source code must retain the above copyright
9
 *    notice, this list of conditions and the following disclaimer.
10
 * 2. Redistributions in binary form must reproduce the above copyright
11
 *    notice, this list of conditions and the following disclaimer in the
12
 *    documentation and/or other materials provided with the distribution.
13
 *
14
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24
 * SUCH DAMAGE.
25
 */
26
#include "cesbi.h"
27
 
28
#if defined (ICONV_TO_UCS_CES_UTF_8) \
29
 || defined (ICONV_FROM_UCS_CES_UTF_8)
30
 
31
#include <_ansi.h>
32
#include <reent.h>
33
#include <sys/types.h>
34
#include "../lib/local.h"
35
#include "../lib/ucsconv.h"
36
 
37
#define UTF8_MB_CUR_MAX 6
38
 
39
/*
40
 * UTF-8 CES converter doesn't interpret BOM. Reject overlong sequences,
41
 * U'FFFF, U'FFFE codes, UTF-16 surrogate codes and all codes > 0x7FFFFFFF.
42
 */
43
 
44
#if defined (ICONV_FROM_UCS_CES_UTF_8)
45
static size_t
46
_DEFUN(convert_from_ucs, (data, in, outbuf, outbytesleft),
47
                         _VOID_PTR data         _AND
48
                         register ucs4_t in     _AND
49
                         unsigned char **outbuf _AND
50
                         size_t *outbytesleft)
51
{
52
  register unsigned char *cp;
53
  register size_t bytes;
54
 
55
  if ((in  >= 0x0000D800 && in <= 0x0000DFFF)
56
      || in > 0x7FFFFFFF || in == 0x0000FFFF || in == 0x0000FFFE)
57
    return (size_t)ICONV_CES_INVALID_CHARACTER;
58
 
59
  if (in < 0x80)
60
    bytes = 1;
61
  else if (in < 0x800)
62
    bytes = 2;
63
  else if (in < 0x10000)
64
    bytes = 3;
65
  else if (in < 0x200000)
66
    bytes = 4;
67
  else if (in < 0x4000000)
68
    bytes = 5;
69
  else
70
    bytes = 6;
71
 
72
  if (*outbytesleft < bytes)
73
    return (size_t)ICONV_CES_NOSPACE;
74
 
75
  cp = *outbuf;
76
 
77
  switch (bytes)
78
    {
79
      case 1:
80
        *cp = (unsigned char)in;
81
        break;
82
 
83
      case 2:
84
        *cp++ = (unsigned char)((in >> 6) | 0x000000C0);
85
        *cp++ = (unsigned char)((in & 0x0000003F) | 0x00000080);
86
        break;
87
 
88
      case 3:
89
        *cp++ = (unsigned char)((in >> 12) | 0x000000E0);
90
        *cp++ = (unsigned char)(((in >> 6) & 0x0000003F) | 0x00000080);
91
        *cp++ = (unsigned char)((in        & 0x0000003F) | 0x00000080);
92
        break;
93
 
94
      case 4:
95
        *cp++ = (unsigned char)((in >> 18)  | 0x000000F0);
96
        *cp++ = (unsigned char)(((in >> 12) & 0x0000003F) | 0x00000080);
97
        *cp++ = (unsigned char)(((in >> 6)  & 0x0000003F) | 0x00000080);
98
        *cp++ = (unsigned char)((in         & 0x0000003F) | 0x00000080);
99
        break;
100
 
101
      case 5:
102
        *cp++ = (unsigned char)((in >> 24)  | 0x000000F8);
103
        *cp++ = (unsigned char)(((in >> 18) & 0x0000003F) | 0x00000080);
104
        *cp++ = (unsigned char)(((in >> 12) & 0x0000003F) | 0x00000080);
105
        *cp++ = (unsigned char)(((in >> 6)  & 0x0000003F) | 0x00000080);
106
        *cp++ = (unsigned char)((in         & 0x0000003F) | 0x00000080);
107
        break;
108
 
109
      case 6:
110
        *cp++ = (unsigned char)((in >> 30)  | 0x000000FC);
111
        *cp++ = (unsigned char)(((in >> 24) & 0x0000003F) | 0x00000080);
112
        *cp++ = (unsigned char)(((in >> 18) & 0x0000003F) | 0x00000080);
113
        *cp++ = (unsigned char)(((in >> 12) & 0x0000003F) | 0x00000080);
114
        *cp++ = (unsigned char)(((in >> 6)  & 0x0000003F) | 0x00000080);
115
        *cp++ = (unsigned char)((in         & 0x0000003F) | 0x00000080);
116
        break;
117
    }
118
 
119
  *outbytesleft -= bytes;
120
  *outbuf += bytes;
121
 
122
  return bytes;
123
}
124
#endif /* ICONV_FROM_UCS_CES_UTF_8 */
125
 
126
#if defined (ICONV_TO_UCS_CES_UTF_8)
127
static ucs4_t
128
_DEFUN(convert_to_ucs, (data, inbuf, inbytesleft),
129
                       _VOID_PTR data               _AND
130
                       _CONST unsigned char **inbuf _AND
131
                       size_t *inbytesleft)
132
{
133
  register _CONST unsigned char *in = *inbuf;
134
  register size_t bytes;
135
  ucs4_t res;
136
 
137
  if (in[0] >= 0xC0)
138
    {
139
      if (in[0] < 0xE0)
140
        {
141
          if (*inbytesleft < (bytes = 2))
142
            return (ucs4_t)ICONV_CES_BAD_SEQUENCE;
143
 
144
          if (   ((in[0] & ~0x1F) == 0xC0)
145
              && ((in[1] & 0xC0)  == 0x80))
146
            res = ((ucs4_t)(in[0] & 0x1F) << 6)
147
                | ((ucs4_t)(in[1] & 0x3F));
148
          else
149
            return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
150
 
151
          if (res < 0x00000080) /* Overlong sequence */
152
            return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
153
        }
154
 
155
      else if (in[0] < 0xF0)
156
        {
157
          if (*inbytesleft < (bytes = 3))
158
            return (ucs4_t)ICONV_CES_BAD_SEQUENCE;
159
 
160
          if (   ((in[0] & ~0x0F) == 0xE0)
161
              && ((in[1] & 0xC0)  == 0x80)
162
              && ((in[2] & 0xC0)  == 0x80))
163
            res = ((ucs4_t)(in[0] & 0x0F) << 12)
164
                | ((ucs4_t)(in[1] & 0x3F) << 6)
165
                | ((ucs4_t)(in[2] & 0x3F));
166
          else
167
            return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
168
 
169
          if (res < 0x00000800) /* Overlong sequence */
170
            return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
171
        }
172
 
173
      else if (in[0] < 0xF8)
174
        {
175
          if (*inbytesleft < (bytes = 4))
176
            return (ucs4_t)ICONV_CES_BAD_SEQUENCE;
177
 
178
          if (   ((in[0] & ~0x07) == 0xF0)
179
              && ((in[1] & 0xC0)  == 0x80)
180
              && ((in[2] & 0xC0)  == 0x80)
181
              && ((in[3] & 0xC0)  == 0x80))
182
            res = ((ucs4_t)(in[0] & 0x07) << 18)
183
                | ((ucs4_t)(in[1] & 0x3F) << 12)
184
                | ((ucs4_t)(in[2] & 0x3F) << 6)
185
                | ((ucs4_t)(in[3] & 0x3F));
186
          else
187
            return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
188
 
189
          if (res < 0x00010000) /* Overlong sequence */
190
            return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
191
        }
192
 
193
      else if (in[0] < 0xFC)
194
        {
195
          if (*inbytesleft < (bytes = 5))
196
            return (ucs4_t)ICONV_CES_BAD_SEQUENCE;
197
 
198
          if (   ((in[0] & ~0x03) == 0xF8)
199
              && ((in[1] & 0xC0)  == 0x80)
200
              && ((in[2] & 0xC0)  == 0x80)
201
              && ((in[3] & 0xC0)  == 0x80)
202
              && ((in[4] & 0xC0)  == 0x80))
203
            res = ((ucs4_t)(in[0] & 0x03) << 24)
204
                | ((ucs4_t)(in[1] & 0x3F) << 18)
205
                | ((ucs4_t)(in[2] & 0x3F) << 12)
206
                | ((ucs4_t)(in[3] & 0x3F) << 6)
207
                | ((ucs4_t)(in[4] & 0x3F));
208
          else
209
            return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
210
 
211
          if (res < 0x00200000) /* Overlong sequence */
212
            return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
213
        }
214
 
215
      else if (in[0] <= 0xFD)
216
        {
217
          if (*inbytesleft < (bytes = 6))
218
            return (ucs4_t)ICONV_CES_BAD_SEQUENCE;
219
 
220
          if (   ((in[0] & ~0x01) == 0xFC)
221
              && ((in[1] & 0xC0)  == 0x80)
222
              && ((in[2] & 0xC0)  == 0x80)
223
              && ((in[3] & 0xC0)  == 0x80)
224
              && ((in[4] & 0xC0)  == 0x80)
225
              && ((in[5] & 0xC0)  == 0x80))
226
              res = ((ucs4_t)(in[0] & 0x1)  << 30)
227
                  | ((ucs4_t)(in[1] & 0x3F) << 24)
228
                  | ((ucs4_t)(in[2] & 0x3F) << 18)
229
                  | ((ucs4_t)(in[3] & 0x3F) << 12)
230
                  | ((ucs4_t)(in[4] & 0x3F) << 6)
231
                  | ((ucs4_t)(in[5] & 0x3F));
232
          else
233
            return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
234
 
235
          if (res < 0x04000000) /* Overlong sequence */
236
            return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
237
        }
238
 
239
      else
240
        return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
241
    }
242
  else if (in[0] & 0x80)
243
    return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
244
  else
245
    {
246
      res = (ucs4_t)in[0];
247
      bytes = 1;
248
    }
249
 
250
  if (  (res  >= 0x0000D800 && res <= 0x0000DFFF)
251
      || res > 0x7FFFFFFF || res == 0x0000FFFF || res == 0x0000FFFE)
252
    return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
253
 
254
  *inbytesleft -= bytes;
255
  *inbuf += bytes;
256
 
257
  return res;
258
}
259
#endif /* ICONV_TO_UCS_CES_UTF_8 */
260
 
261
static int
262
_DEFUN(get_mb_cur_max, (data),
263
                       _VOID_PTR data)
264
{
265
  return UTF8_MB_CUR_MAX;
266
}
267
 
268
#if defined (ICONV_TO_UCS_CES_UTF_8)
269
_CONST iconv_to_ucs_ces_handlers_t
270
_iconv_to_ucs_ces_handlers_utf_8 =
271
{
272
  NULL,
273
  NULL,
274
  get_mb_cur_max,
275
  NULL,
276
  NULL,
277
  NULL,
278
  convert_to_ucs
279
};
280
#endif
281
 
282
#if defined (ICONV_FROM_UCS_CES_UTF_8)
283
_CONST iconv_from_ucs_ces_handlers_t
284
_iconv_from_ucs_ces_handlers_utf_8 =
285
{
286
  NULL,
287
  NULL,
288
  get_mb_cur_max,
289
  NULL,
290
  NULL,
291
  NULL,
292
  convert_from_ucs
293
};
294
#endif
295
 
296
#endif /* ICONV_TO_UCS_CES_UTF_8 || ICONV_FROM_UCS_CES_UTF_8 */
297
 

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.