1 |
148 |
jeremybenn |
/* Skeleton for a conversion module.
|
2 |
|
|
Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
|
3 |
|
|
This file is part of the GNU C Library.
|
4 |
|
|
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
|
5 |
|
|
|
6 |
|
|
The GNU C Library is free software; you can redistribute it and/or
|
7 |
|
|
modify it under the terms of the GNU Lesser General Public
|
8 |
|
|
License as published by the Free Software Foundation; either
|
9 |
|
|
version 2.1 of the License, or (at your option) any later version.
|
10 |
|
|
|
11 |
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
12 |
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13 |
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
14 |
|
|
Lesser General Public License for more details.
|
15 |
|
|
|
16 |
|
|
You should have received a copy of the GNU Lesser General Public
|
17 |
|
|
License along with the GNU C Library; if not, write to the Free
|
18 |
|
|
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
19 |
|
|
02111-1307 USA. */
|
20 |
|
|
|
21 |
|
|
/* This file can be included to provide definitions of several things
|
22 |
|
|
many modules have in common. It can be customized using the following
|
23 |
|
|
macros:
|
24 |
|
|
|
25 |
|
|
DEFINE_INIT define the default initializer. This requires the
|
26 |
|
|
following symbol to be defined.
|
27 |
|
|
|
28 |
|
|
CHARSET_NAME string with official name of the coded character
|
29 |
|
|
set (in all-caps)
|
30 |
|
|
|
31 |
|
|
DEFINE_FINI define the default destructor function.
|
32 |
|
|
|
33 |
|
|
MIN_NEEDED_FROM minimal number of bytes needed for the from-charset.
|
34 |
|
|
MIN_NEEDED_TO likewise for the to-charset.
|
35 |
|
|
|
36 |
|
|
MAX_NEEDED_FROM maximal number of bytes needed for the from-charset.
|
37 |
|
|
This macro is optional, it defaults to MIN_NEEDED_FROM.
|
38 |
|
|
MAX_NEEDED_TO likewise for the to-charset.
|
39 |
|
|
|
40 |
|
|
DEFINE_DIRECTION_OBJECTS
|
41 |
|
|
two objects will be defined to be used when the
|
42 |
|
|
`gconv' function must only distinguish two
|
43 |
|
|
directions. This is implied by DEFINE_INIT.
|
44 |
|
|
If this macro is not defined the following
|
45 |
|
|
macro must be available.
|
46 |
|
|
|
47 |
|
|
FROM_DIRECTION this macro is supposed to return a value != 0
|
48 |
|
|
if we convert from the current character set,
|
49 |
|
|
otherwise it return 0.
|
50 |
|
|
|
51 |
|
|
EMIT_SHIFT_TO_INIT this symbol is optional. If it is defined it
|
52 |
|
|
defines some code which writes out a sequence
|
53 |
|
|
of characters which bring the current state into
|
54 |
|
|
the initial state.
|
55 |
|
|
|
56 |
|
|
FROM_LOOP name of the function implementing the conversion
|
57 |
|
|
from the current characters.
|
58 |
|
|
TO_LOOP likewise for the other direction
|
59 |
|
|
|
60 |
|
|
ONE_DIRECTION optional. If defined to 1, only one conversion
|
61 |
|
|
direction is defined instead of two. In this
|
62 |
|
|
case, FROM_DIRECTION should be defined to 1, and
|
63 |
|
|
FROM_LOOP and TO_LOOP should have the same value.
|
64 |
|
|
|
65 |
|
|
SAVE_RESET_STATE in case of an error we must reset the state for
|
66 |
|
|
the rerun so this macro must be defined for
|
67 |
|
|
stateful encodings. It takes an argument which
|
68 |
|
|
is nonzero when saving.
|
69 |
|
|
|
70 |
|
|
RESET_INPUT_BUFFER If the input character sets allow this the macro
|
71 |
|
|
can be defined to reset the input buffer pointers
|
72 |
|
|
to cover only those characters up to the error.
|
73 |
|
|
|
74 |
|
|
FUNCTION_NAME if not set the conversion function is named `gconv'.
|
75 |
|
|
|
76 |
|
|
PREPARE_LOOP optional code preparing the conversion loop. Can
|
77 |
|
|
contain variable definitions.
|
78 |
|
|
END_LOOP also optional, may be used to store information
|
79 |
|
|
|
80 |
|
|
EXTRA_LOOP_ARGS optional macro specifying extra arguments passed
|
81 |
|
|
to loop function.
|
82 |
|
|
*/
|
83 |
|
|
|
84 |
|
|
#include <assert.h>
|
85 |
|
|
#include <gconv.h>
|
86 |
|
|
#include <string.h>
|
87 |
|
|
#define __need_size_t
|
88 |
|
|
#define __need_NULL
|
89 |
|
|
#include <stddef.h>
|
90 |
|
|
|
91 |
|
|
#include <wchar.h>
|
92 |
|
|
|
93 |
|
|
#ifndef STATIC_GCONV
|
94 |
|
|
# include <dlfcn.h>
|
95 |
|
|
#endif
|
96 |
|
|
|
97 |
|
|
# define DL_CALL_FCT(fct, args) fct args
|
98 |
|
|
|
99 |
|
|
/* The direction objects. */
|
100 |
|
|
#if DEFINE_DIRECTION_OBJECTS || DEFINE_INIT
|
101 |
|
|
static int from_object;
|
102 |
|
|
static int to_object;
|
103 |
|
|
|
104 |
|
|
# ifndef FROM_DIRECTION
|
105 |
|
|
# define FROM_DIRECTION (step->__data == &from_object)
|
106 |
|
|
# endif
|
107 |
|
|
#else
|
108 |
|
|
# ifndef FROM_DIRECTION
|
109 |
|
|
# error "FROM_DIRECTION must be provided if direction objects are not used"
|
110 |
|
|
# endif
|
111 |
|
|
#endif
|
112 |
|
|
|
113 |
|
|
|
114 |
|
|
/* How many bytes are needed at most for the from-charset. */
|
115 |
|
|
#ifndef MAX_NEEDED_FROM
|
116 |
|
|
# define MAX_NEEDED_FROM MIN_NEEDED_FROM
|
117 |
|
|
#endif
|
118 |
|
|
|
119 |
|
|
/* Same for the to-charset. */
|
120 |
|
|
#ifndef MAX_NEEDED_TO
|
121 |
|
|
# define MAX_NEEDED_TO MIN_NEEDED_TO
|
122 |
|
|
#endif
|
123 |
|
|
|
124 |
|
|
|
125 |
|
|
/* Define macros which can access unaligned buffers. These macros are
|
126 |
|
|
supposed to be used only in code outside the inner loops. For the inner
|
127 |
|
|
loops we have other definitions which allow optimized access. */
|
128 |
|
|
#ifdef _STRING_ARCH_unaligned
|
129 |
|
|
/* We can handle unaligned memory access. */
|
130 |
|
|
# define get16u(addr) *((__const uint16_t *) (addr))
|
131 |
|
|
# define get32u(addr) *((__const uint32_t *) (addr))
|
132 |
|
|
|
133 |
|
|
/* We need no special support for writing values either. */
|
134 |
|
|
# define put16u(addr, val) *((uint16_t *) (addr)) = (val)
|
135 |
|
|
# define put32u(addr, val) *((uint32_t *) (addr)) = (val)
|
136 |
|
|
#else
|
137 |
|
|
/* Distinguish between big endian and little endian. */
|
138 |
|
|
# if __BYTE_ORDER == __LITTLE_ENDIAN
|
139 |
|
|
# define get16u(addr) \
|
140 |
|
|
(((__const unsigned char *) (addr))[1] << 8 \
|
141 |
|
|
| ((__const unsigned char *) (addr))[0])
|
142 |
|
|
# define get32u(addr) \
|
143 |
|
|
(((((__const unsigned char *) (addr))[3] << 8 \
|
144 |
|
|
| ((__const unsigned char *) (addr))[2]) << 8 \
|
145 |
|
|
| ((__const unsigned char *) (addr))[1]) << 8 \
|
146 |
|
|
| ((__const unsigned char *) (addr))[0])
|
147 |
|
|
|
148 |
|
|
# define put16u(addr, val) \
|
149 |
|
|
({ uint16_t __val = (val); \
|
150 |
|
|
((unsigned char *) (addr))[0] = __val; \
|
151 |
|
|
((unsigned char *) (addr))[1] = __val >> 8; \
|
152 |
|
|
(void) 0; })
|
153 |
|
|
# define put32u(addr, val) \
|
154 |
|
|
({ uint32_t __val = (val); \
|
155 |
|
|
((unsigned char *) (addr))[0] = __val; \
|
156 |
|
|
__val >>= 8; \
|
157 |
|
|
((unsigned char *) (addr))[1] = __val; \
|
158 |
|
|
__val >>= 8; \
|
159 |
|
|
((unsigned char *) (addr))[2] = __val; \
|
160 |
|
|
__val >>= 8; \
|
161 |
|
|
((unsigned char *) (addr))[3] = __val; \
|
162 |
|
|
(void) 0; })
|
163 |
|
|
# else
|
164 |
|
|
# define get16u(addr) \
|
165 |
|
|
(((__const unsigned char *) (addr))[0] << 8 \
|
166 |
|
|
| ((__const unsigned char *) (addr))[1])
|
167 |
|
|
# define get32u(addr) \
|
168 |
|
|
(((((__const unsigned char *) (addr))[0] << 8 \
|
169 |
|
|
| ((__const unsigned char *) (addr))[1]) << 8 \
|
170 |
|
|
| ((__const unsigned char *) (addr))[2]) << 8 \
|
171 |
|
|
| ((__const unsigned char *) (addr))[3])
|
172 |
|
|
|
173 |
|
|
# define put16u(addr, val) \
|
174 |
|
|
({ uint16_t __val = (val); \
|
175 |
|
|
((unsigned char *) (addr))[1] = __val; \
|
176 |
|
|
((unsigned char *) (addr))[0] = __val >> 8; \
|
177 |
|
|
(void) 0; })
|
178 |
|
|
# define put32u(addr, val) \
|
179 |
|
|
({ uint32_t __val = (val); \
|
180 |
|
|
((unsigned char *) (addr))[3] = __val; \
|
181 |
|
|
__val >>= 8; \
|
182 |
|
|
((unsigned char *) (addr))[2] = __val; \
|
183 |
|
|
__val >>= 8; \
|
184 |
|
|
((unsigned char *) (addr))[1] = __val; \
|
185 |
|
|
__val >>= 8; \
|
186 |
|
|
((unsigned char *) (addr))[0] = __val; \
|
187 |
|
|
(void) 0; })
|
188 |
|
|
# endif
|
189 |
|
|
#endif
|
190 |
|
|
|
191 |
|
|
|
192 |
|
|
/* For conversions from a fixed width character set to another fixed width
|
193 |
|
|
character set we can define RESET_INPUT_BUFFER in a very fast way. */
|
194 |
|
|
#if !defined RESET_INPUT_BUFFER && !defined SAVE_RESET_STATE
|
195 |
|
|
# if MIN_NEEDED_FROM == MAX_NEEDED_FROM && MIN_NEEDED_TO == MAX_NEEDED_TO
|
196 |
|
|
/* We have to use these `if's here since the compiler cannot know that
|
197 |
|
|
(outbuf - outerr) is always divisible by MIN_NEEDED_TO. */
|
198 |
|
|
# define RESET_INPUT_BUFFER \
|
199 |
|
|
if (MIN_NEEDED_FROM % MIN_NEEDED_TO == 0) \
|
200 |
|
|
*inptrp -= (outbuf - outerr) * (MIN_NEEDED_FROM / MIN_NEEDED_TO); \
|
201 |
|
|
else if (MIN_NEEDED_TO % MIN_NEEDED_FROM == 0) \
|
202 |
|
|
*inptrp -= (outbuf - outerr) / (MIN_NEEDED_TO / MIN_NEEDED_FROM); \
|
203 |
|
|
else \
|
204 |
|
|
*inptrp -= ((outbuf - outerr) / MIN_NEEDED_TO) * MIN_NEEDED_FROM
|
205 |
|
|
# endif
|
206 |
|
|
#endif
|
207 |
|
|
|
208 |
|
|
|
209 |
|
|
/* The default init function. It simply matches the name and initializes
|
210 |
|
|
the step data to point to one of the objects above. */
|
211 |
|
|
#if DEFINE_INIT
|
212 |
|
|
# ifndef CHARSET_NAME
|
213 |
|
|
# error "CHARSET_NAME not defined"
|
214 |
|
|
# endif
|
215 |
|
|
|
216 |
|
|
extern int gconv_init (struct __gconv_step *step);
|
217 |
|
|
int
|
218 |
|
|
gconv_init (struct __gconv_step *step)
|
219 |
|
|
{
|
220 |
|
|
/* Determine which direction. */
|
221 |
|
|
if (strcmp (step->__from_name, CHARSET_NAME) == 0)
|
222 |
|
|
{
|
223 |
|
|
step->__data = &from_object;
|
224 |
|
|
|
225 |
|
|
step->__min_needed_from = MIN_NEEDED_FROM;
|
226 |
|
|
step->__max_needed_from = MAX_NEEDED_FROM;
|
227 |
|
|
step->__min_needed_to = MIN_NEEDED_TO;
|
228 |
|
|
step->__max_needed_to = MAX_NEEDED_TO;
|
229 |
|
|
}
|
230 |
|
|
else if (__builtin_expect (strcmp (step->__to_name, CHARSET_NAME), 0) == 0)
|
231 |
|
|
{
|
232 |
|
|
step->__data = &to_object;
|
233 |
|
|
|
234 |
|
|
step->__min_needed_from = MIN_NEEDED_TO;
|
235 |
|
|
step->__max_needed_from = MAX_NEEDED_TO;
|
236 |
|
|
step->__min_needed_to = MIN_NEEDED_FROM;
|
237 |
|
|
step->__max_needed_to = MAX_NEEDED_FROM;
|
238 |
|
|
}
|
239 |
|
|
else
|
240 |
|
|
return __GCONV_NOCONV;
|
241 |
|
|
|
242 |
|
|
#ifdef SAVE_RESET_STATE
|
243 |
|
|
step->__stateful = 1;
|
244 |
|
|
#else
|
245 |
|
|
step->__stateful = 0;
|
246 |
|
|
#endif
|
247 |
|
|
|
248 |
|
|
return __GCONV_OK;
|
249 |
|
|
}
|
250 |
|
|
#endif
|
251 |
|
|
|
252 |
|
|
|
253 |
|
|
/* The default destructor function does nothing in the moment and so
|
254 |
|
|
we don't define it at all. But we still provide the macro just in
|
255 |
|
|
case we need it some day. */
|
256 |
|
|
#if DEFINE_FINI
|
257 |
|
|
#endif
|
258 |
|
|
|
259 |
|
|
|
260 |
|
|
/* If no arguments have to passed to the loop function define the macro
|
261 |
|
|
as empty. */
|
262 |
|
|
#ifndef EXTRA_LOOP_ARGS
|
263 |
|
|
# define EXTRA_LOOP_ARGS
|
264 |
|
|
#endif
|
265 |
|
|
|
266 |
|
|
|
267 |
|
|
/* This is the actual conversion function. */
|
268 |
|
|
#ifndef FUNCTION_NAME
|
269 |
|
|
# define FUNCTION_NAME gconv
|
270 |
|
|
#endif
|
271 |
|
|
|
272 |
|
|
/* The macros are used to access the function to convert single characters. */
|
273 |
|
|
#define SINGLE(fct) SINGLE2 (fct)
|
274 |
|
|
#define SINGLE2(fct) fct##_single
|
275 |
|
|
|
276 |
|
|
|
277 |
|
|
extern int FUNCTION_NAME (struct __gconv_step *step,
|
278 |
|
|
struct __gconv_step_data *data,
|
279 |
|
|
const unsigned char **inptrp,
|
280 |
|
|
const unsigned char *inend,
|
281 |
|
|
unsigned char **outbufstart, size_t *irreversible,
|
282 |
|
|
int do_flush, int consume_incomplete);
|
283 |
|
|
int
|
284 |
|
|
FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
|
285 |
|
|
const unsigned char **inptrp, const unsigned char *inend,
|
286 |
|
|
unsigned char **outbufstart, size_t *irreversible, int do_flush,
|
287 |
|
|
int consume_incomplete)
|
288 |
|
|
{
|
289 |
|
|
struct __gconv_step *next_step = step + 1;
|
290 |
|
|
struct __gconv_step_data *next_data = data + 1;
|
291 |
|
|
__gconv_fct fct;
|
292 |
|
|
int status;
|
293 |
|
|
|
294 |
|
|
fct = (data->__flags & __GCONV_IS_LAST) ? NULL : next_step->__fct;
|
295 |
|
|
|
296 |
|
|
/* If the function is called with no input this means we have to reset
|
297 |
|
|
to the initial state. The possibly partly converted input is
|
298 |
|
|
dropped. */
|
299 |
|
|
if (__builtin_expect (do_flush, 0))
|
300 |
|
|
{
|
301 |
|
|
/* This should never happen during error handling. */
|
302 |
|
|
assert (outbufstart == NULL);
|
303 |
|
|
|
304 |
|
|
status = __GCONV_OK;
|
305 |
|
|
|
306 |
|
|
#ifdef EMIT_SHIFT_TO_INIT
|
307 |
|
|
if (do_flush == 1)
|
308 |
|
|
{
|
309 |
|
|
/* We preserve the initial values of the pointer variables. */
|
310 |
|
|
unsigned char *outbuf = data->__outbuf;
|
311 |
|
|
unsigned char *outstart = outbuf;
|
312 |
|
|
unsigned char *outend = data->__outbufend;
|
313 |
|
|
|
314 |
|
|
# ifdef PREPARE_LOOP
|
315 |
|
|
PREPARE_LOOP
|
316 |
|
|
# endif
|
317 |
|
|
|
318 |
|
|
# ifdef SAVE_RESET_STATE
|
319 |
|
|
SAVE_RESET_STATE (1);
|
320 |
|
|
# endif
|
321 |
|
|
|
322 |
|
|
/* Emit the escape sequence to reset the state. */
|
323 |
|
|
EMIT_SHIFT_TO_INIT;
|
324 |
|
|
|
325 |
|
|
/* Call the steps down the chain if there are any but only if we
|
326 |
|
|
successfully emitted the escape sequence. This should only
|
327 |
|
|
fail if the output buffer is full. If the input is invalid
|
328 |
|
|
it should be discarded since the user wants to start from a
|
329 |
|
|
clean state. */
|
330 |
|
|
if (status == __GCONV_OK)
|
331 |
|
|
{
|
332 |
|
|
if (data->__flags & __GCONV_IS_LAST)
|
333 |
|
|
/* Store information about how many bytes are available. */
|
334 |
|
|
data->__outbuf = outbuf;
|
335 |
|
|
else
|
336 |
|
|
{
|
337 |
|
|
/* Write out all output which was produced. */
|
338 |
|
|
if (outbuf > outstart)
|
339 |
|
|
{
|
340 |
|
|
const unsigned char *outerr = outstart;
|
341 |
|
|
int result;
|
342 |
|
|
|
343 |
|
|
result = DL_CALL_FCT (fct, (next_step, next_data,
|
344 |
|
|
&outerr, outbuf, NULL,
|
345 |
|
|
irreversible, 0,
|
346 |
|
|
consume_incomplete));
|
347 |
|
|
|
348 |
|
|
if (result != __GCONV_EMPTY_INPUT)
|
349 |
|
|
{
|
350 |
|
|
if (__builtin_expect (outerr != outbuf, 0))
|
351 |
|
|
{
|
352 |
|
|
/* We have a problem. Undo the conversion. */
|
353 |
|
|
outbuf = outstart;
|
354 |
|
|
|
355 |
|
|
/* Restore the state. */
|
356 |
|
|
# ifdef SAVE_RESET_STATE
|
357 |
|
|
SAVE_RESET_STATE (0);
|
358 |
|
|
# endif
|
359 |
|
|
}
|
360 |
|
|
|
361 |
|
|
/* Change the status. */
|
362 |
|
|
status = result;
|
363 |
|
|
}
|
364 |
|
|
}
|
365 |
|
|
|
366 |
|
|
if (status == __GCONV_OK)
|
367 |
|
|
/* Now flush the remaining steps. */
|
368 |
|
|
status = DL_CALL_FCT (fct, (next_step, next_data, NULL,
|
369 |
|
|
NULL, NULL, irreversible, 1,
|
370 |
|
|
consume_incomplete));
|
371 |
|
|
}
|
372 |
|
|
}
|
373 |
|
|
}
|
374 |
|
|
else
|
375 |
|
|
#endif
|
376 |
|
|
{
|
377 |
|
|
/* Clear the state object. There might be bytes in there from
|
378 |
|
|
previous calls with CONSUME_INCOMPLETE == 1. But don't emit
|
379 |
|
|
escape sequences. */
|
380 |
|
|
memset (data->__statep, '\0', sizeof (*data->__statep));
|
381 |
|
|
|
382 |
|
|
if (! (data->__flags & __GCONV_IS_LAST))
|
383 |
|
|
/* Now flush the remaining steps. */
|
384 |
|
|
status = DL_CALL_FCT (fct, (next_step, next_data, NULL, NULL,
|
385 |
|
|
NULL, irreversible, do_flush,
|
386 |
|
|
consume_incomplete));
|
387 |
|
|
}
|
388 |
|
|
}
|
389 |
|
|
else
|
390 |
|
|
{
|
391 |
|
|
/* We preserve the initial values of the pointer variables. */
|
392 |
|
|
const unsigned char *inptr = *inptrp;
|
393 |
|
|
unsigned char *outbuf = (__builtin_expect (outbufstart == NULL, 1)
|
394 |
|
|
? data->__outbuf : *outbufstart);
|
395 |
|
|
unsigned char *outend = data->__outbufend;
|
396 |
|
|
unsigned char *outstart;
|
397 |
|
|
/* This variable is used to count the number of characters we
|
398 |
|
|
actually converted. */
|
399 |
|
|
size_t lirreversible = 0;
|
400 |
|
|
size_t *lirreversiblep = irreversible ? &lirreversible : NULL;
|
401 |
|
|
#if defined _STRING_ARCH_unaligned \
|
402 |
|
|
|| MIN_NEEDED_FROM == 1 || MAX_NEEDED_FROM % MIN_NEEDED_FROM != 0 \
|
403 |
|
|
|| MIN_NEEDED_TO == 1 || MAX_NEEDED_TO % MIN_NEEDED_TO != 0
|
404 |
|
|
# define unaligned 0
|
405 |
|
|
#else
|
406 |
|
|
int unaligned;
|
407 |
|
|
# define GEN_unaligned(name) GEN_unaligned2 (name)
|
408 |
|
|
# define GEN_unaligned2(name) name##_unaligned
|
409 |
|
|
#endif
|
410 |
|
|
|
411 |
|
|
#ifdef PREPARE_LOOP
|
412 |
|
|
PREPARE_LOOP
|
413 |
|
|
#endif
|
414 |
|
|
|
415 |
|
|
#if MAX_NEEDED_FROM > 1 || MAX_NEEDED_TO > 1
|
416 |
|
|
/* If the function is used to implement the mb*towc*() or wc*tomb*()
|
417 |
|
|
functions we must test whether any bytes from the last call are
|
418 |
|
|
stored in the `state' object. */
|
419 |
|
|
if (((MAX_NEEDED_FROM > 1 && MAX_NEEDED_TO > 1)
|
420 |
|
|
|| (MAX_NEEDED_FROM > 1 && FROM_DIRECTION)
|
421 |
|
|
|| (MAX_NEEDED_TO > 1 && !FROM_DIRECTION))
|
422 |
|
|
&& consume_incomplete && (data->__statep->__count & 7) != 0)
|
423 |
|
|
{
|
424 |
|
|
/* Yep, we have some bytes left over. Process them now.
|
425 |
|
|
But this must not happen while we are called from an
|
426 |
|
|
error handler. */
|
427 |
|
|
assert (outbufstart == NULL);
|
428 |
|
|
|
429 |
|
|
# if MAX_NEEDED_FROM > 1
|
430 |
|
|
if (MAX_NEEDED_TO == 1 || FROM_DIRECTION)
|
431 |
|
|
status = SINGLE(FROM_LOOP) (step, data, inptrp, inend, &outbuf,
|
432 |
|
|
outend, lirreversiblep
|
433 |
|
|
EXTRA_LOOP_ARGS);
|
434 |
|
|
# endif
|
435 |
|
|
# if MAX_NEEDED_FROM > 1 && MAX_NEEDED_TO > 1 && !ONE_DIRECTION
|
436 |
|
|
else
|
437 |
|
|
# endif
|
438 |
|
|
# if MAX_NEEDED_TO > 1 && !ONE_DIRECTION
|
439 |
|
|
status = SINGLE(TO_LOOP) (step, data, inptrp, inend, &outbuf,
|
440 |
|
|
outend, lirreversiblep EXTRA_LOOP_ARGS);
|
441 |
|
|
# endif
|
442 |
|
|
|
443 |
|
|
if (__builtin_expect (status, __GCONV_OK) != __GCONV_OK)
|
444 |
|
|
return status;
|
445 |
|
|
}
|
446 |
|
|
#endif
|
447 |
|
|
|
448 |
|
|
#if !defined _STRING_ARCH_unaligned \
|
449 |
|
|
&& MIN_NEEDED_FROM != 1 && MAX_NEEDED_FROM % MIN_NEEDED_FROM == 0 \
|
450 |
|
|
&& MIN_NEEDED_TO != 1 && MAX_NEEDED_TO % MIN_NEEDED_TO == 0
|
451 |
|
|
/* The following assumes that encodings, which have a variable length
|
452 |
|
|
what might unalign a buffer even though it is a aligned in the
|
453 |
|
|
beginning, either don't have the minimal number of bytes as a divisor
|
454 |
|
|
of the maximum length or have a minimum length of 1. This is true
|
455 |
|
|
for all known and supported encodings. */
|
456 |
|
|
unaligned = ((FROM_DIRECTION
|
457 |
|
|
&& ((uintptr_t) inptr % MIN_NEEDED_FROM != 0
|
458 |
|
|
|| ((data->__flags & __GCONV_IS_LAST)
|
459 |
|
|
&& (uintptr_t) outbuf % MIN_NEEDED_TO != 0)))
|
460 |
|
|
|| (!FROM_DIRECTION
|
461 |
|
|
&& (((data->__flags & __GCONV_IS_LAST)
|
462 |
|
|
&& (uintptr_t) outbuf % MIN_NEEDED_FROM != 0)
|
463 |
|
|
|| (uintptr_t) inptr % MIN_NEEDED_TO != 0)));
|
464 |
|
|
#endif
|
465 |
|
|
|
466 |
|
|
while (1)
|
467 |
|
|
{
|
468 |
|
|
struct __gconv_trans_data *trans;
|
469 |
|
|
|
470 |
|
|
/* Remember the start value for this round. */
|
471 |
|
|
inptr = *inptrp;
|
472 |
|
|
/* The outbuf buffer is empty. */
|
473 |
|
|
outstart = outbuf;
|
474 |
|
|
|
475 |
|
|
#ifdef SAVE_RESET_STATE
|
476 |
|
|
SAVE_RESET_STATE (1);
|
477 |
|
|
#endif
|
478 |
|
|
|
479 |
|
|
if (__builtin_expect (!unaligned, 1))
|
480 |
|
|
{
|
481 |
|
|
if (FROM_DIRECTION)
|
482 |
|
|
/* Run the conversion loop. */
|
483 |
|
|
status = FROM_LOOP (step, data, inptrp, inend, &outbuf, outend,
|
484 |
|
|
lirreversiblep EXTRA_LOOP_ARGS);
|
485 |
|
|
else
|
486 |
|
|
/* Run the conversion loop. */
|
487 |
|
|
status = TO_LOOP (step, data, inptrp, inend, &outbuf, outend,
|
488 |
|
|
lirreversiblep EXTRA_LOOP_ARGS);
|
489 |
|
|
}
|
490 |
|
|
#if !defined _STRING_ARCH_unaligned \
|
491 |
|
|
&& MIN_NEEDED_FROM != 1 && MAX_NEEDED_FROM % MIN_NEEDED_FROM == 0 \
|
492 |
|
|
&& MIN_NEEDED_TO != 1 && MAX_NEEDED_TO % MIN_NEEDED_TO == 0
|
493 |
|
|
else
|
494 |
|
|
{
|
495 |
|
|
if (FROM_DIRECTION)
|
496 |
|
|
/* Run the conversion loop. */
|
497 |
|
|
status = GEN_unaligned (FROM_LOOP) (step, data, inptrp, inend,
|
498 |
|
|
&outbuf, outend,
|
499 |
|
|
lirreversiblep
|
500 |
|
|
EXTRA_LOOP_ARGS);
|
501 |
|
|
else
|
502 |
|
|
/* Run the conversion loop. */
|
503 |
|
|
status = GEN_unaligned (TO_LOOP) (step, data, inptrp, inend,
|
504 |
|
|
&outbuf, outend,
|
505 |
|
|
lirreversiblep
|
506 |
|
|
EXTRA_LOOP_ARGS);
|
507 |
|
|
}
|
508 |
|
|
#endif
|
509 |
|
|
|
510 |
|
|
/* If we were called as part of an error handling module we
|
511 |
|
|
don't do anything else here. */
|
512 |
|
|
if (__builtin_expect (outbufstart != NULL, 0))
|
513 |
|
|
{
|
514 |
|
|
*outbufstart = outbuf;
|
515 |
|
|
return status;
|
516 |
|
|
}
|
517 |
|
|
|
518 |
|
|
/* Give the transliteration module the chance to store the
|
519 |
|
|
original text and the result in case it needs a context. */
|
520 |
|
|
for (trans = data->__trans; trans != NULL; trans = trans->__next)
|
521 |
|
|
if (trans->__trans_context_fct != NULL)
|
522 |
|
|
DL_CALL_FCT (trans->__trans_context_fct,
|
523 |
|
|
(trans->__data, inptr, *inptrp, outstart, outbuf));
|
524 |
|
|
|
525 |
|
|
/* We finished one use of the loops. */
|
526 |
|
|
++data->__invocation_counter;
|
527 |
|
|
|
528 |
|
|
/* If this is the last step leave the loop, there is nothing
|
529 |
|
|
we can do. */
|
530 |
|
|
if (__builtin_expect (data->__flags & __GCONV_IS_LAST, 0))
|
531 |
|
|
{
|
532 |
|
|
/* Store information about how many bytes are available. */
|
533 |
|
|
data->__outbuf = outbuf;
|
534 |
|
|
|
535 |
|
|
/* Remember how many non-identical characters we
|
536 |
|
|
converted in a irreversible way. */
|
537 |
|
|
*irreversible += lirreversible;
|
538 |
|
|
|
539 |
|
|
break;
|
540 |
|
|
}
|
541 |
|
|
|
542 |
|
|
/* Write out all output which was produced. */
|
543 |
|
|
if (__builtin_expect (outbuf > outstart, 1))
|
544 |
|
|
{
|
545 |
|
|
const unsigned char *outerr = data->__outbuf;
|
546 |
|
|
int result;
|
547 |
|
|
|
548 |
|
|
result = DL_CALL_FCT (fct, (next_step, next_data, &outerr,
|
549 |
|
|
outbuf, NULL, irreversible, 0,
|
550 |
|
|
consume_incomplete));
|
551 |
|
|
|
552 |
|
|
if (result != __GCONV_EMPTY_INPUT)
|
553 |
|
|
{
|
554 |
|
|
if (__builtin_expect (outerr != outbuf, 0))
|
555 |
|
|
{
|
556 |
|
|
#ifdef RESET_INPUT_BUFFER
|
557 |
|
|
RESET_INPUT_BUFFER;
|
558 |
|
|
#else
|
559 |
|
|
/* We have a problem with the in on of the functions
|
560 |
|
|
below. Undo the conversion upto the error point. */
|
561 |
|
|
size_t nstatus;
|
562 |
|
|
|
563 |
|
|
/* Reload the pointers. */
|
564 |
|
|
*inptrp = inptr;
|
565 |
|
|
outbuf = outstart;
|
566 |
|
|
|
567 |
|
|
/* Restore the state. */
|
568 |
|
|
# ifdef SAVE_RESET_STATE
|
569 |
|
|
SAVE_RESET_STATE (0);
|
570 |
|
|
# endif
|
571 |
|
|
|
572 |
|
|
if (__builtin_expect (!unaligned, 1))
|
573 |
|
|
{
|
574 |
|
|
if (FROM_DIRECTION)
|
575 |
|
|
/* Run the conversion loop. */
|
576 |
|
|
nstatus = FROM_LOOP (step, data, inptrp, inend,
|
577 |
|
|
&outbuf, outerr,
|
578 |
|
|
lirreversiblep
|
579 |
|
|
EXTRA_LOOP_ARGS);
|
580 |
|
|
else
|
581 |
|
|
/* Run the conversion loop. */
|
582 |
|
|
nstatus = TO_LOOP (step, data, inptrp, inend,
|
583 |
|
|
&outbuf, outerr,
|
584 |
|
|
lirreversiblep
|
585 |
|
|
EXTRA_LOOP_ARGS);
|
586 |
|
|
}
|
587 |
|
|
# if !defined _STRING_ARCH_unaligned \
|
588 |
|
|
&& MIN_NEEDED_FROM != 1 && MAX_NEEDED_FROM % MIN_NEEDED_FROM == 0 \
|
589 |
|
|
&& MIN_NEEDED_TO != 1 && MAX_NEEDED_TO % MIN_NEEDED_TO == 0
|
590 |
|
|
else
|
591 |
|
|
{
|
592 |
|
|
if (FROM_DIRECTION)
|
593 |
|
|
/* Run the conversion loop. */
|
594 |
|
|
nstatus = GEN_unaligned (FROM_LOOP) (step, data,
|
595 |
|
|
inptrp, inend,
|
596 |
|
|
&outbuf,
|
597 |
|
|
outerr,
|
598 |
|
|
lirreversiblep
|
599 |
|
|
EXTRA_LOOP_ARGS);
|
600 |
|
|
else
|
601 |
|
|
/* Run the conversion loop. */
|
602 |
|
|
nstatus = GEN_unaligned (TO_LOOP) (step, data,
|
603 |
|
|
inptrp, inend,
|
604 |
|
|
&outbuf, outerr,
|
605 |
|
|
lirreversiblep
|
606 |
|
|
EXTRA_LOOP_ARGS);
|
607 |
|
|
}
|
608 |
|
|
# endif
|
609 |
|
|
|
610 |
|
|
/* We must run out of output buffer space in this
|
611 |
|
|
rerun. */
|
612 |
|
|
assert (outbuf == outerr);
|
613 |
|
|
assert (nstatus == __GCONV_FULL_OUTPUT);
|
614 |
|
|
|
615 |
|
|
/* If we haven't consumed a single byte decrement
|
616 |
|
|
the invocation counter. */
|
617 |
|
|
if (__builtin_expect (outbuf == outstart, 0))
|
618 |
|
|
--data->__invocation_counter;
|
619 |
|
|
#endif /* reset input buffer */
|
620 |
|
|
}
|
621 |
|
|
|
622 |
|
|
/* Change the status. */
|
623 |
|
|
status = result;
|
624 |
|
|
}
|
625 |
|
|
else
|
626 |
|
|
/* All the output is consumed, we can make another run
|
627 |
|
|
if everything was ok. */
|
628 |
|
|
if (status == __GCONV_FULL_OUTPUT)
|
629 |
|
|
{
|
630 |
|
|
status = __GCONV_OK;
|
631 |
|
|
outbuf = data->__outbuf;
|
632 |
|
|
}
|
633 |
|
|
}
|
634 |
|
|
|
635 |
|
|
if (status != __GCONV_OK)
|
636 |
|
|
break;
|
637 |
|
|
|
638 |
|
|
/* Reset the output buffer pointer for the next round. */
|
639 |
|
|
outbuf = data->__outbuf;
|
640 |
|
|
}
|
641 |
|
|
|
642 |
|
|
#ifdef END_LOOP
|
643 |
|
|
END_LOOP
|
644 |
|
|
#endif
|
645 |
|
|
|
646 |
|
|
/* If we are supposed to consume all character store now all of the
|
647 |
|
|
remaining characters in the `state' object. */
|
648 |
|
|
#if MAX_NEEDED_FROM > 1 || MAX_NEEDED_TO > 1
|
649 |
|
|
if (((MAX_NEEDED_FROM > 1 && MAX_NEEDED_TO > 1)
|
650 |
|
|
|| (MAX_NEEDED_FROM > 1 && FROM_DIRECTION)
|
651 |
|
|
|| (MAX_NEEDED_TO > 1 && !FROM_DIRECTION))
|
652 |
|
|
&& __builtin_expect (consume_incomplete, 0)
|
653 |
|
|
&& status == __GCONV_INCOMPLETE_INPUT)
|
654 |
|
|
{
|
655 |
|
|
# ifdef STORE_REST
|
656 |
|
|
mbstate_t *state = data->__statep;
|
657 |
|
|
|
658 |
|
|
STORE_REST
|
659 |
|
|
# else
|
660 |
|
|
size_t cnt;
|
661 |
|
|
|
662 |
|
|
/* Make sure the remaining bytes fit into the state objects
|
663 |
|
|
buffer. */
|
664 |
|
|
assert (inend - *inptrp < 4);
|
665 |
|
|
|
666 |
|
|
for (cnt = 0; *inptrp < inend; ++cnt)
|
667 |
|
|
data->__statep->__value.__wchb[cnt] = *(*inptrp)++;
|
668 |
|
|
data->__statep->__count &= ~7;
|
669 |
|
|
data->__statep->__count |= cnt;
|
670 |
|
|
# endif
|
671 |
|
|
}
|
672 |
|
|
#endif
|
673 |
|
|
}
|
674 |
|
|
|
675 |
|
|
return status;
|
676 |
|
|
}
|
677 |
|
|
|
678 |
|
|
#undef DEFINE_INIT
|
679 |
|
|
#undef CHARSET_NAME
|
680 |
|
|
#undef DEFINE_FINI
|
681 |
|
|
#undef MIN_NEEDED_FROM
|
682 |
|
|
#undef MIN_NEEDED_TO
|
683 |
|
|
#undef MAX_NEEDED_FROM
|
684 |
|
|
#undef MAX_NEEDED_TO
|
685 |
|
|
#undef DEFINE_DIRECTION_OBJECTS
|
686 |
|
|
#undef FROM_DIRECTION
|
687 |
|
|
#undef EMIT_SHIFT_TO_INIT
|
688 |
|
|
#undef FROM_LOOP
|
689 |
|
|
#undef TO_LOOP
|
690 |
|
|
#undef SAVE_RESET_STATE
|
691 |
|
|
#undef RESET_INPUT_BUFFER
|
692 |
|
|
#undef FUNCTION_NAME
|
693 |
|
|
#undef PREPARE_LOOP
|
694 |
|
|
#undef END_LOOP
|
695 |
|
|
#undef ONE_DIRECTION
|
696 |
|
|
#undef STORE_REST
|