URL
https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk
Subversion Repositories openrisc_me
[/] [openrisc/] [trunk/] [gnu-src/] [newlib-1.17.0/] [newlib/] [libc/] [stdlib/] [mbtowc_r.c] - Rev 158
Compare with Previous | Blame | View Log
#include <newlib.h> #include <stdlib.h> #include <locale.h> #include "mbctype.h" #include <wchar.h> #include <string.h> #ifdef _MB_CAPABLE typedef enum { ESCAPE, DOLLAR, BRACKET, AT, B, J, NUL, JIS_CHAR, OTHER, JIS_C_NUM } JIS_CHAR_TYPE; typedef enum { ASCII, JIS, A_ESC, A_ESC_DL, JIS_1, J_ESC, J_ESC_BR, INV, JIS_S_NUM } JIS_STATE; typedef enum { COPY_A, COPY_J1, COPY_J2, MAKE_A, NOOP, EMPTY, ERROR } JIS_ACTION; /************************************************************************************** * state/action tables for processing JIS encoding * Where possible, switches to JIS are grouped with proceding JIS characters and switches * to ASCII are grouped with preceding JIS characters. Thus, maximum returned length * is 2 (switch to JIS) + 2 (JIS characters) + 2 (switch back to ASCII) = 6. *************************************************************************************/ static JIS_STATE JIS_state_table[JIS_S_NUM][JIS_C_NUM] = { /* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER */ /* ASCII */ { A_ESC, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII }, /* JIS */ { J_ESC, JIS_1, JIS_1, JIS_1, JIS_1, JIS_1, INV, JIS_1, INV }, /* A_ESC */ { ASCII, A_ESC_DL, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII }, /* A_ESC_DL */{ ASCII, ASCII, ASCII, JIS, JIS, ASCII, ASCII, ASCII, ASCII }, /* JIS_1 */ { INV, JIS, JIS, JIS, JIS, JIS, INV, JIS, INV }, /* J_ESC */ { INV, INV, J_ESC_BR, INV, INV, INV, INV, INV, INV }, /* J_ESC_BR */{ INV, INV, INV, INV, ASCII, ASCII, INV, INV, INV }, }; static JIS_ACTION JIS_action_table[JIS_S_NUM][JIS_C_NUM] = { /* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER */ /* ASCII */ { NOOP, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A, EMPTY, COPY_A, COPY_A}, /* JIS */ { NOOP, COPY_J1, COPY_J1, COPY_J1, COPY_J1, COPY_J1, ERROR, COPY_J1, ERROR }, /* A_ESC */ { COPY_A, NOOP, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A}, /* A_ESC_DL */{ COPY_A, COPY_A, COPY_A, NOOP, NOOP, COPY_A, COPY_A, COPY_A, COPY_A}, /* JIS_1 */ { ERROR, COPY_J2, COPY_J2, COPY_J2, COPY_J2, COPY_J2, ERROR, COPY_J2, ERROR }, /* J_ESC */ { ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR }, /* J_ESC_BR */{ ERROR, ERROR, ERROR, ERROR, MAKE_A, MAKE_A, ERROR, ERROR, ERROR }, }; #endif /* _MB_CAPABLE */ /* we override the mbstate_t __count field for more complex encodings and use it store a state value */ #define __state __count extern char __lc_ctype[12]; int _DEFUN (_mbtowc_r, (r, pwc, s, n, state), struct _reent *r _AND wchar_t *pwc _AND const char *s _AND size_t n _AND mbstate_t *state) { wchar_t dummy; unsigned char *t = (unsigned char *)s; if (pwc == NULL) pwc = &dummy; if (s != NULL && n == 0) return -2; #ifdef _MB_CAPABLE if (__lc_ctype == NULL || (strlen (__lc_ctype) <= 1)) { /* fall-through */ } else if (!strcmp (__lc_ctype, "C-UTF-8")) { int ch; int i = 0; if (s == NULL) return 0; /* UTF-8 character encodings are not state-dependent */ if (state->__count == 0) ch = t[i++]; else { ++n; ch = state->__value.__wchb[0]; } if (ch == '\0') { *pwc = 0; state->__count = 0; return 0; /* s points to the null character */ } if (ch >= 0x0 && ch <= 0x7f) { /* single-byte sequence */ state->__count = 0; *pwc = ch; return 1; } else if (ch >= 0xc0 && ch <= 0xdf) { /* two-byte sequence */ state->__value.__wchb[0] = ch; state->__count = 1; if (n < 2) return -2; ch = t[i++]; if (ch < 0x80 || ch > 0xbf) return -1; if (state->__value.__wchb[0] < 0xc2) /* overlong UTF-8 sequence */ return -1; state->__count = 0; *pwc = (wchar_t)((state->__value.__wchb[0] & 0x1f) << 6) | (wchar_t)(ch & 0x3f); return i; } else if (ch >= 0xe0 && ch <= 0xef) { /* three-byte sequence */ wchar_t tmp; state->__value.__wchb[0] = ch; if (state->__count == 0) state->__count = 1; else ++n; if (n < 2) return -2; ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1]; if (state->__value.__wchb[0] == 0xe0 && ch < 0xa0) /* overlong UTF-8 sequence */ return -1; if (ch < 0x80 || ch > 0xbf) return -1; state->__value.__wchb[1] = ch; state->__count = 2; if (n < 3) return -2; ch = t[i++]; if (ch < 0x80 || ch > 0xbf) return -1; state->__count = 0; tmp = (wchar_t)((state->__value.__wchb[0] & 0x0f) << 12) | (wchar_t)((state->__value.__wchb[1] & 0x3f) << 6) | (wchar_t)(ch & 0x3f); if (tmp >= 0xd800 && tmp <= 0xdfff) return -1; *pwc = tmp; return i; } else if (ch >= 0xf0 && ch <= 0xf7) { /* four-byte sequence */ if (sizeof(wchar_t) < 4) return -1; /* we can't store such a value */ state->__value.__wchb[0] = ch; if (state->__count == 0) state->__count = 1; else ++n; if (n < 2) return -2; ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1]; if (state->__value.__wchb[0] == 0xf0 && ch < 0x90) /* overlong UTF-8 sequence */ return -1; if (ch < 0x80 || ch > 0xbf) return -1; state->__value.__wchb[1] = ch; if (state->__count == 1) state->__count = 2; else ++n; if (n < 3) return -2; ch = (state->__count == 2) ? t[i++] : state->__value.__wchb[2]; if (ch < 0x80 || ch > 0xbf) return -1; state->__value.__wchb[2] = ch; state->__count = 3; if (n < 4) return -2; ch = t[i++]; if (ch < 0x80 || ch > 0xbf) return -1; *pwc = (wchar_t)((state->__value.__wchb[0] & 0x07) << 18) | (wchar_t)((state->__value.__wchb[1] & 0x3f) << 12) | (wchar_t)((state->__value.__wchb[2] & 0x3f) << 6) | (wchar_t)(ch & 0x3f); state->__count = 0; return i; } else if (ch >= 0xf8 && ch <= 0xfb) { /* five-byte sequence */ if (sizeof(wchar_t) < 4) return -1; /* we can't store such a value */ state->__value.__wchb[0] = ch; if (state->__count == 0) state->__count = 1; else ++n; if (n < 2) return -2; ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1]; if (state->__value.__wchb[0] == 0xf8 && ch < 0x88) /* overlong UTF-8 sequence */ return -1; if (ch < 0x80 || ch > 0xbf) return -1; state->__value.__wchb[1] = ch; if (state->__count == 1) state->__count = 2; else ++n; if (n < 3) return -2; ch = (state->__count == 2) ? t[i++] : state->__value.__wchb[2]; if (ch < 0x80 || ch > 0xbf) return -1; state->__value.__wchb[2] = ch; if (state->__count == 2) state->__count = 3; else ++n; if (n < 4) return -2; ch = (state->__count == 3) ? t[i++] : state->__value.__wchb[3]; if (ch < 0x80 || ch > 0xbf) return -1; state->__value.__wchb[3] = ch; state->__count = 4; if (n < 5) return -2; ch = t[i++]; *pwc = (wchar_t)((state->__value.__wchb[0] & 0x03) << 24) | (wchar_t)((state->__value.__wchb[1] & 0x3f) << 18) | (wchar_t)((state->__value.__wchb[2] & 0x3f) << 12) | (wchar_t)((state->__value.__wchb[3] & 0x3f) << 6) | (wchar_t)(ch & 0x3f); state->__count = 0; return i; } else if (ch >= 0xfc && ch <= 0xfd) { /* six-byte sequence */ int ch2; if (sizeof(wchar_t) < 4) return -1; /* we can't store such a value */ state->__value.__wchb[0] = ch; if (state->__count == 0) state->__count = 1; else ++n; if (n < 2) return -2; ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1]; if (state->__value.__wchb[0] == 0xfc && ch < 0x84) /* overlong UTF-8 sequence */ return -1; if (ch < 0x80 || ch > 0xbf) return -1; state->__value.__wchb[1] = ch; if (state->__count == 1) state->__count = 2; else ++n; if (n < 3) return -2; ch = (state->__count == 2) ? t[i++] : state->__value.__wchb[2]; if (ch < 0x80 || ch > 0xbf) return -1; state->__value.__wchb[2] = ch; if (state->__count == 2) state->__count = 3; else ++n; if (n < 4) return -2; ch = (state->__count == 3) ? t[i++] : state->__value.__wchb[3]; if (ch < 0x80 || ch > 0xbf) return -1; state->__value.__wchb[3] = ch; if (state->__count == 3) state->__count = 4; else ++n; if (n < 5) return -2; if (n == 5) return -1; /* at this point we can't save enough to restart */ ch = t[i++]; if (ch < 0x80 || ch > 0xbf) return -1; ch2 = t[i++]; *pwc = (wchar_t)((state->__value.__wchb[0] & 0x01) << 30) | (wchar_t)((state->__value.__wchb[1] & 0x3f) << 24) | (wchar_t)((state->__value.__wchb[2] & 0x3f) << 18) | (wchar_t)((state->__value.__wchb[3] & 0x3f) << 12) | (wchar_t)((ch & 0x3f) << 6) | (wchar_t)(ch2 & 0x3f); state->__count = 0; return i; } else return -1; } else if (!strcmp (__lc_ctype, "C-SJIS")) { int ch; int i = 0; if (s == NULL) return 0; /* not state-dependent */ ch = t[i++]; if (state->__count == 0) { if (_issjis1 (ch)) { state->__value.__wchb[0] = ch; state->__count = 1; if (n <= 1) return -2; ch = t[i++]; } } if (state->__count == 1) { if (_issjis2 (ch)) { *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch; state->__count = 0; return i; } else return -1; } } else if (!strcmp (__lc_ctype, "C-EUCJP")) { int ch; int i = 0; if (s == NULL) return 0; /* not state-dependent */ ch = t[i++]; if (state->__count == 0) { if (_iseucjp (ch)) { state->__value.__wchb[0] = ch; state->__count = 1; if (n <= 1) return -2; ch = t[i++]; } } if (state->__count == 1) { if (_iseucjp (ch)) { *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch; state->__count = 0; return i; } else return -1; } } else if (!strcmp (__lc_ctype, "C-JIS")) { JIS_STATE curr_state; JIS_ACTION action; JIS_CHAR_TYPE ch; unsigned char *ptr; unsigned int i; int curr_ch; if (s == NULL) { state->__state = ASCII; return 1; /* state-dependent */ } curr_state = state->__state; ptr = t; for (i = 0; i < n; ++i) { curr_ch = t[i]; switch (curr_ch) { case ESC_CHAR: ch = ESCAPE; break; case '$': ch = DOLLAR; break; case '@': ch = AT; break; case '(': ch = BRACKET; break; case 'B': ch = B; break; case 'J': ch = J; break; case '\0': ch = NUL; break; default: if (_isjis (curr_ch)) ch = JIS_CHAR; else ch = OTHER; } action = JIS_action_table[curr_state][ch]; curr_state = JIS_state_table[curr_state][ch]; switch (action) { case NOOP: break; case EMPTY: state->__state = ASCII; *pwc = (wchar_t)0; return 0; case COPY_A: state->__state = ASCII; *pwc = (wchar_t)*ptr; return (i + 1); case COPY_J1: state->__value.__wchb[0] = t[i]; break; case COPY_J2: state->__state = JIS; *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)(t[i]); return (i + 1); case MAKE_A: ptr = (char *)(t + i + 1); break; case ERROR: default: return -1; } } state->__state = curr_state; return -2; /* n < bytes needed */ } #endif /* _MB_CAPABLE */ /* otherwise this must be the "C" locale or unknown locale */ if (s == NULL) return 0; /* not state-dependent */ *pwc = (wchar_t)*t; if (*t == '\0') return 0; return 1; }