OpenCores
URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [newlib-1.17.0/] [newlib/] [libc/] [iconv/] [ces/] [utf-16.c] - Blame information for rev 252

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 148 jeremybenn
/*
2
 * Copyright (c) 2003-2004, Artem B. Bityuckiy
3
 * Copyright (c) 1999,2000, Konstantin Chuguev. All rights reserved.
4
 *
5
 * Redistribution and use in source and binary forms, with or without
6
 * modification, are permitted provided that the following conditions
7
 * are met:
8
 * 1. Redistributions of source code must retain the above copyright
9
 *    notice, this list of conditions and the following disclaimer.
10
 * 2. Redistributions in binary form must reproduce the above copyright
11
 *    notice, this list of conditions and the following disclaimer in the
12
 *    documentation and/or other materials provided with the distribution.
13
 *
14
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24
 * SUCH DAMAGE.
25
 */
26
#include "cesbi.h"
27
 
28
#if defined (ICONV_TO_UCS_CES_UTF_16) \
29
 || defined (ICONV_FROM_UCS_CES_UTF_16)
30
 
31
#include <_ansi.h>
32
#include <reent.h>
33
#include <sys/types.h>
34
#include <stdlib.h>
35
#include <string.h>
36
#include <wchar.h>
37
#include "../lib/local.h"
38
#include "../lib/ucsconv.h"
39
#include "../lib/endian.h"
40
 
41
/*
42
 * On input UTF-16 converter interpret BOM and uses Big Endian byte order if BOM
43
 * is absent. UTF-16 converter outputs in System Endian and adds correspondent
44
 * BOM as first code. UTF-16LE and UTF-16BE converters ignore BOM on input and
45
 * don't output BOM.
46
 */
47
 
48
#define UTF16_UNDEFINED     0x00
49
#define UTF16_BIG_ENDIAN    0x01
50
#define UTF16_LITTLE_ENDIAN 0x02
51
#define UTF16_SYSTEM_ENDIAN 0x04
52
#define UTF16_BOM_WRITTEN   0x08
53
 
54
#define UTF16_BOM 0xFEFF
55
 
56
#define UTF_16   "utf_16"
57
#define UTF_16BE "utf_16be"
58
#define UTF_16LE "utf_16le"
59
 
60
static size_t
61
_DEFUN(utf_16_close, (rptr, data),
62
                     struct _reent *rptr _AND
63
                     _VOID_PTR data)
64
{
65
  _free_r(rptr, data);
66
  return 0;
67
}
68
 
69
#if defined (ICONV_FROM_UCS_CES_UTF_16)
70
static _VOID_PTR
71
_DEFUN(utf_16_init_from_ucs, (rptr, encoding),
72
                             struct _reent *rptr _AND
73
                             _CONST char *encoding)
74
{
75
  int *data;
76
 
77
  if ((data = (int *)_malloc_r (rptr, sizeof (int))) == NULL)
78
    return (_VOID_PTR)NULL;
79
 
80
  if (strcmp (encoding, UTF_16LE) == 0)
81
    *data = UTF16_LITTLE_ENDIAN;
82
  else if (strcmp (encoding, UTF_16BE) == 0)
83
    *data = UTF16_BIG_ENDIAN;
84
  else
85
    *data = UTF16_SYSTEM_ENDIAN;
86
 
87
  return (_VOID_PTR)data;
88
}
89
 
90
static size_t
91
_DEFUN(utf_16_convert_from_ucs, (data, in, outbuf, outbytesleft),
92
                                _VOID_PTR data         _AND
93
                                register ucs4_t in     _AND
94
                                unsigned char **outbuf _AND
95
                                size_t *outbytesleft)
96
{
97
  register ucs2_t *cp;
98
  register size_t bytes;
99
  register int *state;
100
 
101
  if (in > 0x0010FFFF || (in >= 0x0000D800 && in <= 0x0000DFFF)
102
      || in == 0x0000FFFF || in == 0x0000FFFE)
103
    return (size_t)ICONV_CES_INVALID_CHARACTER;
104
 
105
  state = (int *)data;
106
  bytes = (*state == UTF16_SYSTEM_ENDIAN) ? sizeof (ucs2_t) * 2
107
                                          : sizeof (ucs2_t);
108
 
109
  if (in > 0x0000FFFF)
110
    bytes += sizeof (ucs2_t);
111
 
112
  if (*outbytesleft < bytes)
113
    return (size_t)ICONV_CES_NOSPACE;
114
 
115
  cp = (ucs2_t *)*outbuf;
116
 
117
  if (*state == UTF16_SYSTEM_ENDIAN)
118
    {
119
      *cp++ = UTF16_BOM;
120
      *state |= UTF16_BOM_WRITTEN;
121
    }
122
 
123
  if (in < 0x00010000)
124
    {
125
      switch (*state)
126
        {
127
          case UTF16_LITTLE_ENDIAN:
128
            *cp = ICONV_HTOLES ((ucs2_t)in);
129
            break;
130
          case UTF16_BIG_ENDIAN:
131
            *cp = ICONV_HTOBES ((ucs2_t)in);
132
            break;
133
          case (UTF16_SYSTEM_ENDIAN | UTF16_BOM_WRITTEN):
134
            *cp = (ucs2_t)in;
135
            break;
136
        }
137
    }
138
  else
139
    {
140
      ucs2_t w1, w2;
141
 
142
      /* Process surrogate pair */
143
      in -= 0x00010000;
144
      w1 = ((ucs2_t)((in >> 10)) & 0x03FF) | 0xD800;
145
      w2 = (ucs2_t)(in & 0x000003FF) | 0xDC00;
146
 
147
      switch (*state)
148
        {
149
          case UTF16_LITTLE_ENDIAN:
150
            *cp++ = ICONV_HTOLES (w1);
151
            *cp = ICONV_HTOLES (w2);
152
            break;
153
          case UTF16_BIG_ENDIAN:
154
            *cp++ = ICONV_HTOBES (w1);
155
            *cp = ICONV_HTOBES (w2);
156
            break;
157
          case (UTF16_SYSTEM_ENDIAN | UTF16_BOM_WRITTEN):
158
            *cp++ = w1;
159
            *cp = w2;
160
            break;
161
        }
162
    }
163
 
164
  *outbuf += bytes;
165
  *outbytesleft -= bytes;
166
 
167
  return bytes;
168
}
169
#endif /* ICONV_FROM_UCS_CES_UTF_16 */
170
 
171
#if defined (ICONV_TO_UCS_CES_UTF_16)
172
static _VOID_PTR
173
_DEFUN(utf_16_init_to_ucs, (rptr, encoding),
174
                           struct _reent *rptr _AND
175
                           _CONST char *encoding)
176
{
177
  int *data;
178
 
179
  if ((data = (int *)_malloc_r (rptr, sizeof (int))) == NULL)
180
    return (_VOID_PTR)NULL;
181
 
182
  if (strcmp (encoding, UTF_16BE) == 0)
183
    *data = UTF16_BIG_ENDIAN;
184
  else if (strcmp (encoding, UTF_16LE) == 0)
185
    *data = UTF16_LITTLE_ENDIAN;
186
  else
187
    *data = UTF16_UNDEFINED;
188
 
189
  return (_VOID_PTR)data;
190
}
191
 
192
static ucs4_t
193
_DEFUN(utf_16_convert_to_ucs, (data, inbuf, inbytesleft),
194
                              _VOID_PTR data               _AND
195
                              _CONST unsigned char **inbuf _AND
196
                              size_t *inbytesleft)
197
{
198
  register ucs2_t w1;
199
  register ucs2_t w2;
200
  register ucs2_t *cp;
201
  int *state;
202
  ucs4_t res;
203
  int bytes = sizeof (ucs2_t);
204
 
205
  if (*inbytesleft < bytes)
206
    return (ucs4_t)ICONV_CES_BAD_SEQUENCE;
207
 
208
  state = (int *)data;
209
  cp = ((ucs2_t *)*inbuf);
210
 
211
  if (*state == UTF16_UNDEFINED)
212
    {
213
      if (*cp == ICONV_HTOLES(UTF16_BOM))
214
        *state = UTF16_LITTLE_ENDIAN;
215
      else
216
        *state = UTF16_BIG_ENDIAN;
217
 
218
     if (   *cp == ICONV_HTOBES (UTF16_BOM)
219
         || *cp == ICONV_HTOLES (UTF16_BOM))
220
       {
221
         if (*inbytesleft < (bytes += sizeof (ucs2_t)))
222
           return (ucs4_t)ICONV_CES_BAD_SEQUENCE;
223
         cp += 1;
224
       }
225
    }
226
 
227
  if (*state == UTF16_LITTLE_ENDIAN)
228
    w1 = ICONV_LETOHS (*cp);
229
  else
230
    w1 = ICONV_BETOHS (*cp);
231
 
232
  if (w1  < 0xD800 || w1 > 0xDFFF)
233
    {
234
      if (w1 == 0xFFFF || w1 == 0xFFFE)
235
        return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
236
      res = (ucs4_t)w1;
237
    }
238
  else
239
    {
240
      /* Process surrogate pair */
241
      if (*inbytesleft < (bytes += 2))
242
        return (ucs4_t)ICONV_CES_BAD_SEQUENCE;
243
 
244
      if (w1 > 0xDBFF)
245
        /* Broken surrogate character */
246
        return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
247
 
248
      cp += 1;
249
 
250
      if (*state == UTF16_LITTLE_ENDIAN)
251
        w2 = ICONV_LETOHS (*cp);
252
      else
253
        w2 = ICONV_BETOHS (*cp);
254
 
255
      if (w2 < 0xDC00 || w2 > 0xDFFF)
256
        /* Broken surrogate character */
257
        return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
258
 
259
      res = (ucs4_t)(w2 & 0x03FF) | ((ucs4_t)(w1 & 0x03FF) << 10);
260
      res += 0x00010000;
261
    }
262
 
263
  *inbuf += bytes;
264
  *inbytesleft -= bytes;
265
 
266
  return res;
267
}
268
#endif /* ICONV_TO_UCS_CES_UTF_16 */
269
 
270
static int
271
_DEFUN(utf_16_get_mb_cur_max, (data),
272
                              _VOID_PTR data)
273
{
274
  return 6;
275
}
276
 
277
#if defined (ICONV_TO_UCS_CES_UTF_16)
278
_CONST iconv_to_ucs_ces_handlers_t
279
_iconv_to_ucs_ces_handlers_utf_16 =
280
{
281
  utf_16_init_to_ucs,
282
  utf_16_close,
283
  utf_16_get_mb_cur_max,
284
  NULL,
285
  NULL,
286
  NULL,
287
  utf_16_convert_to_ucs
288
};
289
#endif
290
 
291
#if defined (ICONV_FROM_UCS_CES_UTF_16)
292
_CONST iconv_from_ucs_ces_handlers_t
293
_iconv_from_ucs_ces_handlers_utf_16 =
294
{
295
  utf_16_init_from_ucs,
296
  utf_16_close,
297
  utf_16_get_mb_cur_max,
298
  NULL,
299
  NULL,
300
  NULL,
301
  utf_16_convert_from_ucs
302
};
303
#endif
304
 
305
#endif /* ICONV_TO_UCS_CES_UTF_16 || ICONV_FROM_UCS_CES_UTF_16 */
306
 

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.