OpenCores
URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [newlib-1.17.0/] [newlib/] [libc/] [stdlib/] [mbtowc_r.c] - Blame information for rev 167

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 148 jeremybenn
#include <newlib.h>
2
#include <stdlib.h>
3
#include <locale.h>
4
#include "mbctype.h"
5
#include <wchar.h>
6
#include <string.h>
7
 
8
#ifdef _MB_CAPABLE
9
typedef enum { ESCAPE, DOLLAR, BRACKET, AT, B, J,
10
               NUL, JIS_CHAR, OTHER, JIS_C_NUM } JIS_CHAR_TYPE;
11
typedef enum { ASCII, JIS, A_ESC, A_ESC_DL, JIS_1, J_ESC, J_ESC_BR,
12
               INV, JIS_S_NUM } JIS_STATE;
13
typedef enum { COPY_A, COPY_J1, COPY_J2, MAKE_A, NOOP, EMPTY, ERROR } JIS_ACTION;
14
 
15
/**************************************************************************************
16
 * state/action tables for processing JIS encoding
17
 * Where possible, switches to JIS are grouped with proceding JIS characters and switches
18
 * to ASCII are grouped with preceding JIS characters.  Thus, maximum returned length
19
 * is 2 (switch to JIS) + 2 (JIS characters) + 2 (switch back to ASCII) = 6.
20
 *************************************************************************************/
21
 
22
static JIS_STATE JIS_state_table[JIS_S_NUM][JIS_C_NUM] = {
23
/*              ESCAPE   DOLLAR    BRACKET   AT       B       J        NUL      JIS_CHAR  OTHER */
24
/* ASCII */   { A_ESC,   ASCII,    ASCII,    ASCII,   ASCII,  ASCII,   ASCII,   ASCII,    ASCII },
25
/* JIS */     { J_ESC,   JIS_1,    JIS_1,    JIS_1,   JIS_1,  JIS_1,   INV,     JIS_1,    INV },
26
/* A_ESC */   { ASCII,   A_ESC_DL, ASCII,    ASCII,   ASCII,  ASCII,   ASCII,   ASCII,    ASCII },
27
/* A_ESC_DL */{ ASCII,   ASCII,    ASCII,    JIS,     JIS,    ASCII,   ASCII,   ASCII,    ASCII },
28
/* JIS_1 */   { INV,     JIS,      JIS,      JIS,     JIS,    JIS,     INV,     JIS,      INV },
29
/* J_ESC */   { INV,     INV,      J_ESC_BR, INV,     INV,    INV,     INV,     INV,      INV },
30
/* J_ESC_BR */{ INV,     INV,      INV,      INV,     ASCII,  ASCII,   INV,     INV,      INV },
31
};
32
 
33
static JIS_ACTION JIS_action_table[JIS_S_NUM][JIS_C_NUM] = {
34
/*              ESCAPE   DOLLAR    BRACKET   AT       B        J        NUL      JIS_CHAR  OTHER */
35
/* ASCII */   { NOOP,    COPY_A,   COPY_A,   COPY_A,  COPY_A,  COPY_A,  EMPTY,   COPY_A,  COPY_A},
36
/* JIS */     { NOOP,    COPY_J1,  COPY_J1,  COPY_J1, COPY_J1, COPY_J1, ERROR,   COPY_J1, ERROR },
37
/* A_ESC */   { COPY_A,  NOOP,     COPY_A,   COPY_A,  COPY_A,  COPY_A,  COPY_A,  COPY_A,  COPY_A},
38
/* A_ESC_DL */{ COPY_A,  COPY_A,   COPY_A,   NOOP,    NOOP,    COPY_A,  COPY_A,  COPY_A,  COPY_A},
39
/* JIS_1 */   { ERROR,   COPY_J2,  COPY_J2,  COPY_J2, COPY_J2, COPY_J2, ERROR,   COPY_J2, ERROR },
40
/* J_ESC */   { ERROR,   ERROR,    NOOP,     ERROR,   ERROR,   ERROR,   ERROR,   ERROR,   ERROR },
41
/* J_ESC_BR */{ ERROR,   ERROR,    ERROR,    ERROR,   MAKE_A,  MAKE_A,  ERROR,   ERROR,   ERROR },
42
};
43
#endif /* _MB_CAPABLE */
44
 
45
/* we override the mbstate_t __count field for more complex encodings and use it store a state value */
46
#define __state __count
47
 
48
extern char __lc_ctype[12];
49
 
50
int
51
_DEFUN (_mbtowc_r, (r, pwc, s, n, state),
52
        struct _reent *r   _AND
53
        wchar_t       *pwc _AND
54
        const char    *s   _AND
55
        size_t         n   _AND
56
        mbstate_t      *state)
57
{
58
  wchar_t dummy;
59
  unsigned char *t = (unsigned char *)s;
60
 
61
  if (pwc == NULL)
62
    pwc = &dummy;
63
 
64
  if (s != NULL && n == 0)
65
    return -2;
66
 
67
#ifdef _MB_CAPABLE
68
  if (__lc_ctype == NULL ||
69
      (strlen (__lc_ctype) <= 1))
70
    { /* fall-through */ }
71
  else if (!strcmp (__lc_ctype, "C-UTF-8"))
72
    {
73
      int ch;
74
      int i = 0;
75
 
76
      if (s == NULL)
77
        return 0; /* UTF-8 character encodings are not state-dependent */
78
 
79
      if (state->__count == 0)
80
        ch = t[i++];
81
      else
82
        {
83
          ++n;
84
          ch = state->__value.__wchb[0];
85
        }
86
 
87
      if (ch == '\0')
88
        {
89
          *pwc = 0;
90
          state->__count = 0;
91
          return 0; /* s points to the null character */
92
        }
93
 
94
      if (ch >= 0x0 && ch <= 0x7f)
95
        {
96
          /* single-byte sequence */
97
          state->__count = 0;
98
          *pwc = ch;
99
          return 1;
100
        }
101
      else if (ch >= 0xc0 && ch <= 0xdf)
102
        {
103
          /* two-byte sequence */
104
          state->__value.__wchb[0] = ch;
105
          state->__count = 1;
106
          if (n < 2)
107
            return -2;
108
          ch = t[i++];
109
          if (ch < 0x80 || ch > 0xbf)
110
            return -1;
111
          if (state->__value.__wchb[0] < 0xc2)
112
            /* overlong UTF-8 sequence */
113
            return -1;
114
          state->__count = 0;
115
          *pwc = (wchar_t)((state->__value.__wchb[0] & 0x1f) << 6)
116
            |    (wchar_t)(ch & 0x3f);
117
          return i;
118
        }
119
      else if (ch >= 0xe0 && ch <= 0xef)
120
        {
121
          /* three-byte sequence */
122
          wchar_t tmp;
123
          state->__value.__wchb[0] = ch;
124
          if (state->__count == 0)
125
            state->__count = 1;
126
          else
127
            ++n;
128
          if (n < 2)
129
            return -2;
130
          ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
131
          if (state->__value.__wchb[0] == 0xe0 && ch < 0xa0)
132
            /* overlong UTF-8 sequence */
133
            return -1;
134
          if (ch < 0x80 || ch > 0xbf)
135
            return -1;
136
          state->__value.__wchb[1] = ch;
137
          state->__count = 2;
138
          if (n < 3)
139
            return -2;
140
          ch = t[i++];
141
          if (ch < 0x80 || ch > 0xbf)
142
            return -1;
143
          state->__count = 0;
144
          tmp = (wchar_t)((state->__value.__wchb[0] & 0x0f) << 12)
145
            |    (wchar_t)((state->__value.__wchb[1] & 0x3f) << 6)
146
            |     (wchar_t)(ch & 0x3f);
147
 
148
          if (tmp >= 0xd800 && tmp <= 0xdfff)
149
            return -1;
150
          *pwc = tmp;
151
          return i;
152
        }
153
      else if (ch >= 0xf0 && ch <= 0xf7)
154
        {
155
          /* four-byte sequence */
156
          if (sizeof(wchar_t) < 4)
157
            return -1; /* we can't store such a value */
158
          state->__value.__wchb[0] = ch;
159
          if (state->__count == 0)
160
            state->__count = 1;
161
          else
162
            ++n;
163
          if (n < 2)
164
            return -2;
165
          ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
166
          if (state->__value.__wchb[0] == 0xf0 && ch < 0x90)
167
            /* overlong UTF-8 sequence */
168
            return -1;
169
          if (ch < 0x80 || ch > 0xbf)
170
            return -1;
171
          state->__value.__wchb[1] = ch;
172
          if (state->__count == 1)
173
            state->__count = 2;
174
          else
175
            ++n;
176
          if (n < 3)
177
            return -2;
178
          ch = (state->__count == 2) ? t[i++] : state->__value.__wchb[2];
179
          if (ch < 0x80 || ch > 0xbf)
180
            return -1;
181
          state->__value.__wchb[2] = ch;
182
          state->__count = 3;
183
          if (n < 4)
184
            return -2;
185
          ch = t[i++];
186
          if (ch < 0x80 || ch > 0xbf)
187
            return -1;
188
          *pwc = (wchar_t)((state->__value.__wchb[0] & 0x07) << 18)
189
            |    (wchar_t)((state->__value.__wchb[1] & 0x3f) << 12)
190
            |    (wchar_t)((state->__value.__wchb[2] & 0x3f) << 6)
191
            |    (wchar_t)(ch & 0x3f);
192
 
193
          state->__count = 0;
194
          return i;
195
        }
196
      else if (ch >= 0xf8 && ch <= 0xfb)
197
        {
198
          /* five-byte sequence */
199
          if (sizeof(wchar_t) < 4)
200
            return -1; /* we can't store such a value */
201
          state->__value.__wchb[0] = ch;
202
          if (state->__count == 0)
203
            state->__count = 1;
204
          else
205
            ++n;
206
          if (n < 2)
207
            return -2;
208
          ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
209
          if (state->__value.__wchb[0] == 0xf8 && ch < 0x88)
210
            /* overlong UTF-8 sequence */
211
            return -1;
212
          if (ch < 0x80 || ch > 0xbf)
213
            return -1;
214
          state->__value.__wchb[1] = ch;
215
          if (state->__count == 1)
216
            state->__count = 2;
217
          else
218
            ++n;
219
          if (n < 3)
220
            return -2;
221
          ch = (state->__count == 2) ? t[i++] : state->__value.__wchb[2];
222
          if (ch < 0x80 || ch > 0xbf)
223
            return -1;
224
          state->__value.__wchb[2] = ch;
225
          if (state->__count == 2)
226
            state->__count = 3;
227
          else
228
            ++n;
229
          if (n < 4)
230
            return -2;
231
          ch = (state->__count == 3) ? t[i++] : state->__value.__wchb[3];
232
          if (ch < 0x80 || ch > 0xbf)
233
            return -1;
234
          state->__value.__wchb[3] = ch;
235
          state->__count = 4;
236
          if (n < 5)
237
            return -2;
238
          ch = t[i++];
239
          *pwc = (wchar_t)((state->__value.__wchb[0] & 0x03) << 24)
240
            |    (wchar_t)((state->__value.__wchb[1] & 0x3f) << 18)
241
            |    (wchar_t)((state->__value.__wchb[2] & 0x3f) << 12)
242
            |    (wchar_t)((state->__value.__wchb[3] & 0x3f) << 6)
243
            |    (wchar_t)(ch & 0x3f);
244
 
245
          state->__count = 0;
246
          return i;
247
        }
248
      else if (ch >= 0xfc && ch <= 0xfd)
249
        {
250
          /* six-byte sequence */
251
          int ch2;
252
          if (sizeof(wchar_t) < 4)
253
            return -1; /* we can't store such a value */
254
          state->__value.__wchb[0] = ch;
255
          if (state->__count == 0)
256
            state->__count = 1;
257
          else
258
            ++n;
259
          if (n < 2)
260
            return -2;
261
          ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
262
          if (state->__value.__wchb[0] == 0xfc && ch < 0x84)
263
            /* overlong UTF-8 sequence */
264
            return -1;
265
          if (ch < 0x80 || ch > 0xbf)
266
            return -1;
267
          state->__value.__wchb[1] = ch;
268
          if (state->__count == 1)
269
            state->__count = 2;
270
          else
271
            ++n;
272
          if (n < 3)
273
            return -2;
274
          ch = (state->__count == 2) ? t[i++] : state->__value.__wchb[2];
275
          if (ch < 0x80 || ch > 0xbf)
276
            return -1;
277
          state->__value.__wchb[2] = ch;
278
          if (state->__count == 2)
279
            state->__count = 3;
280
          else
281
            ++n;
282
          if (n < 4)
283
            return -2;
284
          ch = (state->__count == 3) ? t[i++] : state->__value.__wchb[3];
285
          if (ch < 0x80 || ch > 0xbf)
286
            return -1;
287
          state->__value.__wchb[3] = ch;
288
          if (state->__count == 3)
289
            state->__count = 4;
290
          else
291
            ++n;
292
          if (n < 5)
293
            return -2;
294
          if (n == 5)
295
            return -1; /* at this point we can't save enough to restart */
296
          ch = t[i++];
297
          if (ch < 0x80 || ch > 0xbf)
298
            return -1;
299
          ch2 = t[i++];
300
          *pwc = (wchar_t)((state->__value.__wchb[0] & 0x01) << 30)
301
            |    (wchar_t)((state->__value.__wchb[1] & 0x3f) << 24)
302
            |    (wchar_t)((state->__value.__wchb[2] & 0x3f) << 18)
303
            |    (wchar_t)((state->__value.__wchb[3] & 0x3f) << 12)
304
            |    (wchar_t)((ch & 0x3f) << 6)
305
            |    (wchar_t)(ch2 & 0x3f);
306
 
307
          state->__count = 0;
308
          return i;
309
        }
310
      else
311
        return -1;
312
    }
313
  else if (!strcmp (__lc_ctype, "C-SJIS"))
314
    {
315
      int ch;
316
      int i = 0;
317
      if (s == NULL)
318
        return 0;  /* not state-dependent */
319
      ch = t[i++];
320
      if (state->__count == 0)
321
        {
322
          if (_issjis1 (ch))
323
            {
324
              state->__value.__wchb[0] = ch;
325
              state->__count = 1;
326
              if (n <= 1)
327
                return -2;
328
              ch = t[i++];
329
            }
330
        }
331
      if (state->__count == 1)
332
        {
333
          if (_issjis2 (ch))
334
            {
335
              *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch;
336
              state->__count = 0;
337
              return i;
338
            }
339
          else
340
            return -1;
341
        }
342
    }
343
  else if (!strcmp (__lc_ctype, "C-EUCJP"))
344
    {
345
      int ch;
346
      int i = 0;
347
      if (s == NULL)
348
        return 0;  /* not state-dependent */
349
      ch = t[i++];
350
      if (state->__count == 0)
351
        {
352
          if (_iseucjp (ch))
353
            {
354
              state->__value.__wchb[0] = ch;
355
              state->__count = 1;
356
              if (n <= 1)
357
                return -2;
358
              ch = t[i++];
359
            }
360
        }
361
      if (state->__count == 1)
362
        {
363
          if (_iseucjp (ch))
364
            {
365
              *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch;
366
              state->__count = 0;
367
              return i;
368
            }
369
          else
370
            return -1;
371
        }
372
    }
373
  else if (!strcmp (__lc_ctype, "C-JIS"))
374
    {
375
      JIS_STATE curr_state;
376
      JIS_ACTION action;
377
      JIS_CHAR_TYPE ch;
378
      unsigned char *ptr;
379
      unsigned int i;
380
      int curr_ch;
381
 
382
      if (s == NULL)
383
        {
384
          state->__state = ASCII;
385
          return 1;  /* state-dependent */
386
        }
387
 
388
      curr_state = state->__state;
389
      ptr = t;
390
 
391
      for (i = 0; i < n; ++i)
392
        {
393
          curr_ch = t[i];
394
          switch (curr_ch)
395
            {
396
            case ESC_CHAR:
397
              ch = ESCAPE;
398
              break;
399
            case '$':
400
              ch = DOLLAR;
401
              break;
402
            case '@':
403
              ch = AT;
404
              break;
405
            case '(':
406
              ch = BRACKET;
407
              break;
408
            case 'B':
409
              ch = B;
410
              break;
411
            case 'J':
412
              ch = J;
413
              break;
414
            case '\0':
415
              ch = NUL;
416
              break;
417
            default:
418
              if (_isjis (curr_ch))
419
                ch = JIS_CHAR;
420
              else
421
                ch = OTHER;
422
            }
423
 
424
          action = JIS_action_table[curr_state][ch];
425
          curr_state = JIS_state_table[curr_state][ch];
426
 
427
          switch (action)
428
            {
429
            case NOOP:
430
              break;
431
            case EMPTY:
432
              state->__state = ASCII;
433
              *pwc = (wchar_t)0;
434
              return 0;
435
            case COPY_A:
436
              state->__state = ASCII;
437
              *pwc = (wchar_t)*ptr;
438
              return (i + 1);
439
            case COPY_J1:
440
              state->__value.__wchb[0] = t[i];
441
              break;
442
            case COPY_J2:
443
              state->__state = JIS;
444
              *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)(t[i]);
445
              return (i + 1);
446
            case MAKE_A:
447
              ptr = (char *)(t + i + 1);
448
              break;
449
            case ERROR:
450
            default:
451
              return -1;
452
            }
453
 
454
        }
455
 
456
      state->__state = curr_state;
457
      return -2;  /* n < bytes needed */
458
    }
459
#endif /* _MB_CAPABLE */               
460
 
461
  /* otherwise this must be the "C" locale or unknown locale */
462
  if (s == NULL)
463
    return 0;  /* not state-dependent */
464
 
465
  *pwc = (wchar_t)*t;
466
 
467
  if (*t == '\0')
468
    return 0;
469
 
470
  return 1;
471
}

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.