OpenCores
URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [newlib-1.18.0/] [newlib/] [libc/] [stdlib/] [mbtowc_r.c] - Blame information for rev 309

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 207 jeremybenn
#include <newlib.h>
2
#include <stdlib.h>
3
#include <locale.h>
4
#include "mbctype.h"
5
#include <wchar.h>
6
#include <string.h>
7
#include <errno.h>
8
#include "local.h"
9
 
10
int (*__mbtowc) (struct _reent *, wchar_t *, const char *, size_t,
11
                 const char *, mbstate_t *)
12
#ifdef __CYGWIN__
13
   = __utf8_mbtowc;
14
#else
15
   = __ascii_mbtowc;
16
#endif
17
 
18
int
19
_DEFUN (_mbtowc_r, (r, pwc, s, n, state),
20
        struct _reent *r   _AND
21
        wchar_t       *pwc _AND
22
        const char    *s   _AND
23
        size_t         n   _AND
24
        mbstate_t      *state)
25
{
26
  return __mbtowc (r, pwc, s, n, __locale_charset (), state);
27
}
28
 
29
int
30
_DEFUN (__ascii_mbtowc, (r, pwc, s, n, charset, state),
31
        struct _reent *r       _AND
32
        wchar_t       *pwc     _AND
33
        const char    *s       _AND
34
        size_t         n       _AND
35
        const char    *charset _AND
36
        mbstate_t      *state)
37
{
38
  wchar_t dummy;
39
  unsigned char *t = (unsigned char *)s;
40
 
41
  if (pwc == NULL)
42
    pwc = &dummy;
43
 
44
  if (s == NULL)
45
    return 0;
46
 
47
  if (n == 0)
48
    return -2;
49
 
50
  *pwc = (wchar_t)*t;
51
 
52
  if (*t == '\0')
53
    return 0;
54
 
55
  return 1;
56
}
57
 
58
#ifdef _MB_CAPABLE
59
typedef enum { ESCAPE, DOLLAR, BRACKET, AT, B, J,
60
               NUL, JIS_CHAR, OTHER, JIS_C_NUM } JIS_CHAR_TYPE;
61
typedef enum { ASCII, JIS, A_ESC, A_ESC_DL, JIS_1, J_ESC, J_ESC_BR,
62
               INV, JIS_S_NUM } JIS_STATE;
63
typedef enum { COPY_A, COPY_J1, COPY_J2, MAKE_A, NOOP, EMPTY, ERROR } JIS_ACTION;
64
 
65
/**************************************************************************************
66
 * state/action tables for processing JIS encoding
67
 * Where possible, switches to JIS are grouped with proceding JIS characters and switches
68
 * to ASCII are grouped with preceding JIS characters.  Thus, maximum returned length
69
 * is 2 (switch to JIS) + 2 (JIS characters) + 2 (switch back to ASCII) = 6.
70
 *************************************************************************************/
71
 
72
static JIS_STATE JIS_state_table[JIS_S_NUM][JIS_C_NUM] = {
73
/*              ESCAPE   DOLLAR    BRACKET   AT       B       J        NUL      JIS_CHAR  OTHER */
74
/* ASCII */   { A_ESC,   ASCII,    ASCII,    ASCII,   ASCII,  ASCII,   ASCII,   ASCII,    ASCII },
75
/* JIS */     { J_ESC,   JIS_1,    JIS_1,    JIS_1,   JIS_1,  JIS_1,   INV,     JIS_1,    INV },
76
/* A_ESC */   { ASCII,   A_ESC_DL, ASCII,    ASCII,   ASCII,  ASCII,   ASCII,   ASCII,    ASCII },
77
/* A_ESC_DL */{ ASCII,   ASCII,    ASCII,    JIS,     JIS,    ASCII,   ASCII,   ASCII,    ASCII },
78
/* JIS_1 */   { INV,     JIS,      JIS,      JIS,     JIS,    JIS,     INV,     JIS,      INV },
79
/* J_ESC */   { INV,     INV,      J_ESC_BR, INV,     INV,    INV,     INV,     INV,      INV },
80
/* J_ESC_BR */{ INV,     INV,      INV,      INV,     ASCII,  ASCII,   INV,     INV,      INV },
81
};
82
 
83
static JIS_ACTION JIS_action_table[JIS_S_NUM][JIS_C_NUM] = {
84
/*              ESCAPE   DOLLAR    BRACKET   AT       B        J        NUL      JIS_CHAR  OTHER */
85
/* ASCII */   { NOOP,    COPY_A,   COPY_A,   COPY_A,  COPY_A,  COPY_A,  EMPTY,   COPY_A,  COPY_A},
86
/* JIS */     { NOOP,    COPY_J1,  COPY_J1,  COPY_J1, COPY_J1, COPY_J1, ERROR,   COPY_J1, ERROR },
87
/* A_ESC */   { COPY_A,  NOOP,     COPY_A,   COPY_A,  COPY_A,  COPY_A,  COPY_A,  COPY_A,  COPY_A},
88
/* A_ESC_DL */{ COPY_A,  COPY_A,   COPY_A,   NOOP,    NOOP,    COPY_A,  COPY_A,  COPY_A,  COPY_A},
89
/* JIS_1 */   { ERROR,   COPY_J2,  COPY_J2,  COPY_J2, COPY_J2, COPY_J2, ERROR,   COPY_J2, ERROR },
90
/* J_ESC */   { ERROR,   ERROR,    NOOP,     ERROR,   ERROR,   ERROR,   ERROR,   ERROR,   ERROR },
91
/* J_ESC_BR */{ ERROR,   ERROR,    ERROR,    ERROR,   MAKE_A,  MAKE_A,  ERROR,   ERROR,   ERROR },
92
};
93
 
94
/* we override the mbstate_t __count field for more complex encodings and use it store a state value */
95
#define __state __count
96
 
97
#ifdef _MB_EXTENDED_CHARSETS_ISO
98
int
99
_DEFUN (__iso_mbtowc, (r, pwc, s, n, charset, state),
100
        struct _reent *r       _AND
101
        wchar_t       *pwc     _AND
102
        const char    *s       _AND
103
        size_t         n       _AND
104
        const char    *charset _AND
105
        mbstate_t      *state)
106
{
107
  wchar_t dummy;
108
  unsigned char *t = (unsigned char *)s;
109
 
110
  if (pwc == NULL)
111
    pwc = &dummy;
112
 
113
  if (s == NULL)
114
    return 0;
115
 
116
  if (n == 0)
117
    return -2;
118
 
119
  if (*t >= 0xa0)
120
    {
121
      int iso_idx = __iso_8859_index (charset + 9);
122
      if (iso_idx >= 0)
123
        {
124
          *pwc = __iso_8859_conv[iso_idx][*t - 0xa0];
125
          if (*pwc == 0) /* Invalid character */
126
            {
127
              r->_errno = EILSEQ;
128
              return -1;
129
            }
130
          return 1;
131
        }
132
    }
133
 
134
  *pwc = (wchar_t) *t;
135
 
136
  if (*t == '\0')
137
    return 0;
138
 
139
  return 1;
140
}
141
#endif /* _MB_EXTENDED_CHARSETS_ISO */
142
 
143
#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
144
int
145
_DEFUN (__cp_mbtowc, (r, pwc, s, n, charset, state),
146
        struct _reent *r       _AND
147
        wchar_t       *pwc     _AND
148
        const char    *s       _AND
149
        size_t         n       _AND
150
        const char    *charset _AND
151
        mbstate_t      *state)
152
{
153
  wchar_t dummy;
154
  unsigned char *t = (unsigned char *)s;
155
 
156
  if (pwc == NULL)
157
    pwc = &dummy;
158
 
159
  if (s == NULL)
160
    return 0;
161
 
162
  if (n == 0)
163
    return -2;
164
 
165
  if (*t >= 0x80)
166
    {
167
      int cp_idx = __cp_index (charset + 2);
168
      if (cp_idx >= 0)
169
        {
170
          *pwc = __cp_conv[cp_idx][*t - 0x80];
171
          if (*pwc == 0) /* Invalid character */
172
            {
173
              r->_errno = EILSEQ;
174
              return -1;
175
            }
176
          return 1;
177
        }
178
    }
179
 
180
  *pwc = (wchar_t)*t;
181
 
182
  if (*t == '\0')
183
    return 0;
184
 
185
  return 1;
186
}
187
#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
188
 
189
int
190
_DEFUN (__utf8_mbtowc, (r, pwc, s, n, charset, state),
191
        struct _reent *r       _AND
192
        wchar_t       *pwc     _AND
193
        const char    *s       _AND
194
        size_t         n       _AND
195
        const char    *charset _AND
196
        mbstate_t      *state)
197
{
198
  wchar_t dummy;
199
  unsigned char *t = (unsigned char *)s;
200
  int ch;
201
  int i = 0;
202
 
203
  if (pwc == NULL)
204
    pwc = &dummy;
205
 
206
  if (s == NULL)
207
    return 0;
208
 
209
  if (n == 0)
210
    return -2;
211
 
212
  if (state->__count == 0)
213
    ch = t[i++];
214
  else
215
    ch = state->__value.__wchb[0];
216
 
217
  if (ch == '\0')
218
    {
219
      *pwc = 0;
220
      state->__count = 0;
221
      return 0; /* s points to the null character */
222
    }
223
 
224
  if (ch <= 0x7f)
225
    {
226
      /* single-byte sequence */
227
      state->__count = 0;
228
      *pwc = ch;
229
      return 1;
230
    }
231
  if (ch >= 0xc0 && ch <= 0xdf)
232
    {
233
      /* two-byte sequence */
234
      state->__value.__wchb[0] = ch;
235
      if (state->__count == 0)
236
        state->__count = 1;
237
      else if (n < (size_t)-1)
238
        ++n;
239
      if (n < 2)
240
        return -2;
241
      ch = t[i++];
242
      if (ch < 0x80 || ch > 0xbf)
243
        {
244
          r->_errno = EILSEQ;
245
          return -1;
246
        }
247
      if (state->__value.__wchb[0] < 0xc2)
248
        {
249
          /* overlong UTF-8 sequence */
250
          r->_errno = EILSEQ;
251
          return -1;
252
        }
253
      state->__count = 0;
254
      *pwc = (wchar_t)((state->__value.__wchb[0] & 0x1f) << 6)
255
        |    (wchar_t)(ch & 0x3f);
256
      return i;
257
    }
258
  if (ch >= 0xe0 && ch <= 0xef)
259
    {
260
      /* three-byte sequence */
261
      wchar_t tmp;
262
      state->__value.__wchb[0] = ch;
263
      if (state->__count == 0)
264
        state->__count = 1;
265
      else if (n < (size_t)-1)
266
        ++n;
267
      if (n < 2)
268
        return -2;
269
      ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
270
      if (state->__value.__wchb[0] == 0xe0 && ch < 0xa0)
271
        {
272
          /* overlong UTF-8 sequence */
273
          r->_errno = EILSEQ;
274
          return -1;
275
        }
276
      if (ch < 0x80 || ch > 0xbf)
277
        {
278
          r->_errno = EILSEQ;
279
          return -1;
280
        }
281
      state->__value.__wchb[1] = ch;
282
      if (state->__count == 1)
283
        state->__count = 2;
284
      else if (n < (size_t)-1)
285
        ++n;
286
      if (n < 3)
287
        return -2;
288
      ch = t[i++];
289
      if (ch < 0x80 || ch > 0xbf)
290
        {
291
          r->_errno = EILSEQ;
292
          return -1;
293
        }
294
      state->__count = 0;
295
      tmp = (wchar_t)((state->__value.__wchb[0] & 0x0f) << 12)
296
        |    (wchar_t)((state->__value.__wchb[1] & 0x3f) << 6)
297
        |     (wchar_t)(ch & 0x3f);
298
      *pwc = tmp;
299
      return i;
300
    }
301
  if (ch >= 0xf0 && ch <= 0xf4)
302
    {
303
      /* four-byte sequence */
304
      wint_t tmp;
305
      state->__value.__wchb[0] = ch;
306
      if (state->__count == 0)
307
        state->__count = 1;
308
      else if (n < (size_t)-1)
309
        ++n;
310
      if (n < 2)
311
        return -2;
312
      ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1];
313
      if ((state->__value.__wchb[0] == 0xf0 && ch < 0x90)
314
          || (state->__value.__wchb[0] == 0xf4 && ch >= 0x90))
315
        {
316
          /* overlong UTF-8 sequence or result is > 0x10ffff */
317
          r->_errno = EILSEQ;
318
          return -1;
319
        }
320
      if (ch < 0x80 || ch > 0xbf)
321
        {
322
          r->_errno = EILSEQ;
323
          return -1;
324
        }
325
      state->__value.__wchb[1] = ch;
326
      if (state->__count == 1)
327
        state->__count = 2;
328
      else if (n < (size_t)-1)
329
        ++n;
330
      if (n < 3)
331
        return -2;
332
      ch = (state->__count == 2) ? t[i++] : state->__value.__wchb[2];
333
      if (ch < 0x80 || ch > 0xbf)
334
        {
335
          r->_errno = EILSEQ;
336
          return -1;
337
        }
338
      state->__value.__wchb[2] = ch;
339
      if (state->__count == 2)
340
        state->__count = 3;
341
      else if (n < (size_t)-1)
342
        ++n;
343
      if (state->__count == 3 && sizeof(wchar_t) == 2)
344
        {
345
          /* On systems which have wchar_t being UTF-16 values, the value
346
             doesn't fit into a single wchar_t in this case.  So what we
347
             do here is to store the state with a special value of __count
348
             and return the first half of a surrogate pair.  The first
349
             three bytes of a UTF-8 sequence are enough to generate the
350
             first half of a UTF-16 surrogate pair.  As return value we
351
             choose to return the number of bytes actually read up to
352
             here.
353
             The second half of the surrogate pair is returned in case we
354
             recognize the special __count value of four, and the next
355
             byte is actually a valid value.  See below. */
356
          tmp = (wint_t)((state->__value.__wchb[0] & 0x07) << 18)
357
            |   (wint_t)((state->__value.__wchb[1] & 0x3f) << 12)
358
            |   (wint_t)((state->__value.__wchb[2] & 0x3f) << 6);
359
          state->__count = 4;
360
          *pwc = 0xd800 | ((tmp - 0x10000) >> 10);
361
          return i;
362
        }
363
      if (n < 4)
364
        return -2;
365
      ch = t[i++];
366
      if (ch < 0x80 || ch > 0xbf)
367
        {
368
          r->_errno = EILSEQ;
369
          return -1;
370
        }
371
      tmp = (wint_t)((state->__value.__wchb[0] & 0x07) << 18)
372
        |   (wint_t)((state->__value.__wchb[1] & 0x3f) << 12)
373
        |   (wint_t)((state->__value.__wchb[2] & 0x3f) << 6)
374
        |   (wint_t)(ch & 0x3f);
375
      if (state->__count == 4 && sizeof(wchar_t) == 2)
376
        /* Create the second half of the surrogate pair for systems with
377
           wchar_t == UTF-16 . */
378
        *pwc = 0xdc00 | (tmp & 0x3ff);
379
      else
380
        *pwc = tmp;
381
      state->__count = 0;
382
      return i;
383
    }
384
 
385
  r->_errno = EILSEQ;
386
  return -1;
387
}
388
 
389
/* Cygwin defines its own doublebyte charset conversion functions
390
   because the underlying OS requires wchar_t == UTF-16. */
391
#ifndef  __CYGWIN__
392
int
393
_DEFUN (__sjis_mbtowc, (r, pwc, s, n, charset, state),
394
        struct _reent *r       _AND
395
        wchar_t       *pwc     _AND
396
        const char    *s       _AND
397
        size_t         n       _AND
398
        const char    *charset _AND
399
        mbstate_t      *state)
400
{
401
  wchar_t dummy;
402
  unsigned char *t = (unsigned char *)s;
403
  int ch;
404
  int i = 0;
405
 
406
  if (pwc == NULL)
407
    pwc = &dummy;
408
 
409
  if (s == NULL)
410
    return 0;  /* not state-dependent */
411
 
412
  if (n == 0)
413
    return -2;
414
 
415
  ch = t[i++];
416
  if (state->__count == 0)
417
    {
418
      if (_issjis1 (ch))
419
        {
420
          state->__value.__wchb[0] = ch;
421
          state->__count = 1;
422
          if (n <= 1)
423
            return -2;
424
          ch = t[i++];
425
        }
426
    }
427
  if (state->__count == 1)
428
    {
429
      if (_issjis2 (ch))
430
        {
431
          *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch;
432
          state->__count = 0;
433
          return i;
434
        }
435
      else
436
        {
437
          r->_errno = EILSEQ;
438
          return -1;
439
        }
440
    }
441
 
442
  *pwc = (wchar_t)*t;
443
 
444
  if (*t == '\0')
445
    return 0;
446
 
447
  return 1;
448
}
449
 
450
int
451
_DEFUN (__eucjp_mbtowc, (r, pwc, s, n, charset, state),
452
        struct _reent *r       _AND
453
        wchar_t       *pwc     _AND
454
        const char    *s       _AND
455
        size_t         n       _AND
456
        const char    *charset _AND
457
        mbstate_t      *state)
458
{
459
  wchar_t dummy;
460
  unsigned char *t = (unsigned char *)s;
461
  int ch;
462
  int i = 0;
463
 
464
  if (pwc == NULL)
465
    pwc = &dummy;
466
 
467
  if (s == NULL)
468
    return 0;
469
 
470
  if (n == 0)
471
    return -2;
472
 
473
  ch = t[i++];
474
  if (state->__count == 0)
475
    {
476
      if (_iseucjp1 (ch))
477
        {
478
          state->__value.__wchb[0] = ch;
479
          state->__count = 1;
480
          if (n <= 1)
481
            return -2;
482
          ch = t[i++];
483
        }
484
    }
485
  if (state->__count == 1)
486
    {
487
      if (_iseucjp2 (ch))
488
        {
489
          if (state->__value.__wchb[0] == 0x8f)
490
            {
491
              state->__value.__wchb[1] = ch;
492
              state->__count = 2;
493
              if (n <= i)
494
                return -2;
495
              ch = t[i++];
496
            }
497
          else
498
            {
499
              *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch;
500
              state->__count = 0;
501
              return i;
502
            }
503
        }
504
      else
505
        {
506
          r->_errno = EILSEQ;
507
          return -1;
508
        }
509
    }
510
  if (state->__count == 2)
511
    {
512
      if (_iseucjp2 (ch))
513
        {
514
          *pwc = (((wchar_t)state->__value.__wchb[1]) << 8)
515
                 + (wchar_t)(ch & 0x7f);
516
          state->__count = 0;
517
          return i;
518
        }
519
      else
520
        {
521
          r->_errno = EILSEQ;
522
          return -1;
523
        }
524
    }
525
 
526
  *pwc = (wchar_t)*t;
527
 
528
  if (*t == '\0')
529
    return 0;
530
 
531
  return 1;
532
}
533
 
534
int
535
_DEFUN (__jis_mbtowc, (r, pwc, s, n, charset, state),
536
        struct _reent *r       _AND
537
        wchar_t       *pwc     _AND
538
        const char    *s       _AND
539
        size_t         n       _AND
540
        const char    *charset _AND
541
        mbstate_t      *state)
542
{
543
  wchar_t dummy;
544
  unsigned char *t = (unsigned char *)s;
545
  JIS_STATE curr_state;
546
  JIS_ACTION action;
547
  JIS_CHAR_TYPE ch;
548
  unsigned char *ptr;
549
  unsigned int i;
550
  int curr_ch;
551
 
552
  if (pwc == NULL)
553
    pwc = &dummy;
554
 
555
  if (s == NULL)
556
    {
557
      state->__state = ASCII;
558
      return 1;  /* state-dependent */
559
    }
560
 
561
  if (n == 0)
562
    return -2;
563
 
564
  curr_state = state->__state;
565
  ptr = t;
566
 
567
  for (i = 0; i < n; ++i)
568
    {
569
      curr_ch = t[i];
570
      switch (curr_ch)
571
        {
572
        case ESC_CHAR:
573
          ch = ESCAPE;
574
          break;
575
        case '$':
576
          ch = DOLLAR;
577
          break;
578
        case '@':
579
          ch = AT;
580
          break;
581
        case '(':
582
          ch = BRACKET;
583
          break;
584
        case 'B':
585
          ch = B;
586
          break;
587
        case 'J':
588
          ch = J;
589
          break;
590
        case '\0':
591
          ch = NUL;
592
          break;
593
        default:
594
          if (_isjis (curr_ch))
595
            ch = JIS_CHAR;
596
          else
597
            ch = OTHER;
598
        }
599
 
600
      action = JIS_action_table[curr_state][ch];
601
      curr_state = JIS_state_table[curr_state][ch];
602
 
603
      switch (action)
604
        {
605
        case NOOP:
606
          break;
607
        case EMPTY:
608
          state->__state = ASCII;
609
          *pwc = (wchar_t)0;
610
          return 0;
611
        case COPY_A:
612
          state->__state = ASCII;
613
          *pwc = (wchar_t)*ptr;
614
          return (i + 1);
615
        case COPY_J1:
616
          state->__value.__wchb[0] = t[i];
617
          break;
618
        case COPY_J2:
619
          state->__state = JIS;
620
          *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)(t[i]);
621
          return (i + 1);
622
        case MAKE_A:
623
          ptr = (unsigned char *)(t + i + 1);
624
          break;
625
        case ERROR:
626
        default:
627
          r->_errno = EILSEQ;
628
          return -1;
629
        }
630
 
631
    }
632
 
633
  state->__state = curr_state;
634
  return -2;  /* n < bytes needed */
635
}
636
#endif /* !__CYGWIN__*/
637
#endif /* _MB_CAPABLE */

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.