OpenCores
URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [newlib-1.18.0/] [newlib/] [libc/] [sys/] [linux/] [iconv/] [iconv_charmap.c] - Blame information for rev 262

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 207 jeremybenn
/* Convert using charmaps and possibly iconv().
2
   Copyright (C) 2001 Free Software Foundation, Inc.
3
   This file is part of the GNU C Library.
4
   Contributed by Ulrich Drepper <drepper@redhat.com>, 2001.
5
 
6
   The GNU C Library is free software; you can redistribute it and/or
7
   modify it under the terms of the GNU Lesser General Public
8
   License as published by the Free Software Foundation; either
9
   version 2.1 of the License, or (at your option) any later version.
10
 
11
   The GNU C Library is distributed in the hope that it will be useful,
12
   but WITHOUT ANY WARRANTY; without even the implied warranty of
13
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
   Lesser General Public License for more details.
15
 
16
   You should have received a copy of the GNU Lesser General Public
17
   License along with the GNU C Library; if not, write to the Free
18
   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19
   02111-1307 USA.  */
20
 
21
#include <assert.h>
22
#include <errno.h>
23
#include <error.h>
24
#include <fcntl.h>
25
#include <iconv.h>
26
#include <libintl.h>
27
#include <stdio.h>
28
#include <stdlib.h>
29
#include <unistd.h>
30
#include <sys/mman.h>
31
#include <sys/stat.h>
32
 
33
#include "iconv_prog.h"
34
 
35
 
36
/* Prototypes for a few program-wide used functions.  */
37
extern void *xmalloc (size_t __n);
38
extern void *xcalloc (size_t __n, size_t __s);
39
 
40
 
41
struct convtable
42
{
43
  int term[256 / 8];
44
  union
45
  {
46
    struct convtable *sub;
47
    struct charseq *out;
48
  } val[256];
49
};
50
 
51
 
52
static inline struct convtable *
53
allocate_table (void)
54
{
55
  return (struct convtable *) xcalloc (1, sizeof (struct convtable));
56
}
57
 
58
 
59
static inline int
60
is_term (struct convtable *tbl, unsigned int idx)
61
{
62
  return tbl->term[idx / 8] & (1 << (idx % 8));
63
}
64
 
65
 
66
static inline void
67
clear_term (struct convtable *tbl, unsigned int idx)
68
{
69
  tbl->term[idx / 8] &= ~(1 << (idx % 8));
70
}
71
 
72
 
73
static inline void
74
set_term (struct convtable *tbl, unsigned int idx)
75
{
76
  tbl->term[idx / 8] |= 1 << (idx % 8);
77
}
78
 
79
 
80
/* Generate the conversion table.  */
81
static struct convtable *use_from_charmap (struct charmap_t *from_charmap,
82
                                           const char *to_code);
83
static struct convtable *use_to_charmap (const char *from_code,
84
                                         struct charmap_t *to_charmap);
85
static struct convtable *use_both_charmaps (struct charmap_t *from_charmap,
86
                                            struct charmap_t *to_charmap);
87
 
88
/* Prototypes for the functions doing the actual work.  */
89
static int process_block (struct convtable *tbl, char *addr, size_t len,
90
                          FILE *output);
91
static int process_fd (struct convtable *tbl, int fd, FILE *output);
92
static int process_file (struct convtable *tbl, FILE *input, FILE *output);
93
 
94
 
95
int
96
charmap_conversion (const char *from_code, struct charmap_t *from_charmap,
97
                    const char *to_code, struct charmap_t *to_charmap,
98
                    int argc, int remaining, char *argv[], FILE *output)
99
{
100
  struct convtable *cvtbl;
101
  int status = EXIT_SUCCESS;
102
 
103
  /* We have three different cases to handle:
104
 
105
     - both, from_charmap and to_charmap, are available.  This means we
106
       can assume that the symbolic names match and use them to create
107
       the mapping.
108
 
109
     - only from_charmap is available.  In this case we can only hope that
110
       the symbolic names used are of the <Uxxxx> form in which case we
111
       can use a UCS4->"to_code" iconv() conversion for the second step.
112
 
113
     - only to_charmap is available.  This is similar, only that we would
114
       use iconv() for the "to_code"->UCS4 conversion.
115
 
116
       We first create a table which maps input bytes into output bytes.
117
       Once this is done we can handle all three of the cases above
118
       equally.  */
119
  if (from_charmap != NULL)
120
    {
121
      if (to_charmap == NULL)
122
        cvtbl = use_from_charmap (from_charmap, to_code);
123
      else
124
        cvtbl = use_both_charmaps (from_charmap, to_charmap);
125
    }
126
  else
127
    {
128
      assert (to_charmap != NULL);
129
      cvtbl = use_to_charmap (from_code, to_charmap);
130
    }
131
 
132
  /* If we couldn't generate a table stop now.  */
133
  if (cvtbl == NULL)
134
    return EXIT_FAILURE;
135
 
136
  /* We can now start the conversion.  */
137
  if (remaining == argc)
138
    {
139
      if (process_file (cvtbl, stdin, output) != 0)
140
        status = EXIT_FAILURE;
141
    }
142
  else
143
    do
144
      {
145
        struct stat st;
146
        char *addr;
147
        int fd;
148
 
149
        if (verbose)
150
          printf ("%s:\n", argv[remaining]);
151
        if (strcmp (argv[remaining], "-") == 0)
152
          fd = 0;
153
        else
154
          {
155
            fd = open (argv[remaining], O_RDONLY);
156
 
157
            if (fd == -1)
158
              {
159
                error (0, errno, _("cannot open input file `%s'"),
160
                       argv[remaining]);
161
                status = EXIT_FAILURE;
162
                continue;
163
              }
164
          }
165
 
166
#ifdef _POSIX_MAPPED_FILES
167
        /* We have possibilities for reading the input file.  First try
168
           to mmap() it since this will provide the fastest solution.  */
169
        if (fstat (fd, &st) == 0
170
            && ((addr = mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE,
171
                              fd, 0)) != MAP_FAILED))
172
          {
173
            /* Yes, we can use mmap().  The descriptor is not needed
174
               anymore.  */
175
            if (close (fd) != 0)
176
              error (EXIT_FAILURE, errno,
177
                     _("error while closing input `%s'"), argv[remaining]);
178
 
179
            if (process_block (cvtbl, addr, st.st_size, output) < 0)
180
              {
181
                /* Something went wrong.  */
182
                status = EXIT_FAILURE;
183
 
184
                /* We don't need the input data anymore.  */
185
                munmap ((void *) addr, st.st_size);
186
 
187
                /* We cannot go on with producing output since it might
188
                   lead to problem because the last output might leave
189
                   the output stream in an undefined state.  */
190
                break;
191
              }
192
 
193
            /* We don't need the input data anymore.  */
194
            munmap ((void *) addr, st.st_size);
195
          }
196
        else
197
#endif  /* _POSIX_MAPPED_FILES */
198
          {
199
            /* Read the file in pieces.  */
200
            if (process_fd (cvtbl, fd, output) != 0)
201
              {
202
                /* Something went wrong.  */
203
                status = EXIT_FAILURE;
204
 
205
                /* We don't need the input file anymore.  */
206
                close (fd);
207
 
208
                /* We cannot go on with producing output since it might
209
                   lead to problem because the last output might leave
210
                   the output stream in an undefined state.  */
211
                break;
212
              }
213
 
214
            /* Now close the file.  */
215
            close (fd);
216
          }
217
      }
218
    while (++remaining < argc);
219
 
220
  /* All done.  */
221
  return status;
222
}
223
 
224
 
225
static void
226
add_bytes (struct convtable *tbl, struct charseq *in, struct charseq *out)
227
{
228
  int n = 0;
229
  unsigned int byte;
230
 
231
  assert (in->nbytes > 0);
232
 
233
  byte = ((unsigned char *) in->bytes)[n];
234
  while (n + 1 < in->nbytes)
235
    {
236
      if (is_term (tbl, byte) || tbl->val[byte].sub == NULL)
237
        {
238
          /* Note that we simply ignore a definition for a byte sequence
239
             which is also the prefix for a longer one.  */
240
          clear_term (tbl, byte);
241
          tbl->val[byte].sub =
242
            (struct convtable *) xcalloc (1, sizeof (struct convtable));
243
        }
244
 
245
      tbl = tbl->val[byte].sub;
246
 
247
      byte = ((unsigned char *) in->bytes)[++n];
248
    }
249
 
250
  /* Only add the new sequence if there is none yet and the byte sequence
251
     is not part of an even longer one.  */
252
  if (! is_term (tbl, byte) && tbl->val[byte].sub == NULL)
253
    {
254
      set_term (tbl, byte);
255
      tbl->val[byte].out = out;
256
    }
257
}
258
 
259
 
260
static struct convtable *
261
use_from_charmap (struct charmap_t *from_charmap, const char *to_code)
262
{
263
  /* We iterate over all entries in the from_charmap and for those which
264
     have a known UCS4 representation we use an iconv() call to determine
265
     the mapping to the to_code charset.  */
266
  struct convtable *rettbl;
267
  iconv_t cd;
268
  void *ptr = NULL;
269
  const void *key;
270
  size_t keylen;
271
  void *data;
272
 
273
  cd = iconv_open (to_code, "WCHAR_T");
274
  if (cd == (iconv_t) -1)
275
    /* We cannot do anything.  */
276
    return NULL;
277
 
278
  rettbl = allocate_table ();
279
 
280
  while (iterate_table (&from_charmap->char_table, &ptr, &key, &keylen, &data)
281
         >= 0)
282
    {
283
      struct charseq *in = (struct charseq *) data;
284
 
285
      if (in->ucs4 != UNINITIALIZED_CHAR_VALUE)
286
        {
287
          /* There is a chance.  Try the iconv module.  */
288
          wchar_t inbuf[1] = { in->ucs4 };
289
          unsigned char outbuf[64];
290
          char *inptr = (char *) inbuf;
291
          size_t inlen = sizeof (inbuf);
292
          char *outptr = (char *) outbuf;
293
          size_t outlen = sizeof (outbuf);
294
 
295
          (void) iconv (cd, &inptr, &inlen, &outptr, &outlen);
296
 
297
          if (outptr != (char *) outbuf)
298
            {
299
              /* We got some output.  Good, use it.  */
300
              struct charseq *newp;
301
 
302
              outlen = sizeof (outbuf) - outlen;
303
              assert ((char *) outbuf + outlen == outptr);
304
 
305
              newp = (struct charseq *) xmalloc (sizeof (struct charseq)
306
                                                 + outlen);
307
              newp->name = in->name;
308
              newp->ucs4 = in->ucs4;
309
              newp->nbytes = outlen;
310
              memcpy (newp->bytes, outbuf, outlen);
311
 
312
              add_bytes (rettbl, in, newp);
313
            }
314
 
315
          /* Clear any possible state left behind.  */
316
          (void) iconv (cd, NULL, NULL, NULL, NULL);
317
        }
318
    }
319
 
320
  iconv_close (cd);
321
 
322
  return rettbl;
323
}
324
 
325
 
326
static struct convtable *
327
use_to_charmap (const char *from_code, struct charmap_t *to_charmap)
328
{
329
  /* We iterate over all entries in the to_charmap and for those which
330
     have a known UCS4 representation we use an iconv() call to determine
331
     the mapping to the from_code charset.  */
332
  struct convtable *rettbl;
333
  iconv_t cd;
334
  void *ptr = NULL;
335
  const void *key;
336
  size_t keylen;
337
  void *data;
338
 
339
  /* Note that the conversion we use here is the reverse direction.  Without
340
     exhaustive search we cannot figure out which input yields the UCS4
341
     character we are looking for.  Therefore we determine it the other
342
     way round.  */
343
  cd = iconv_open (from_code, "WCHAR_T");
344
  if (cd == (iconv_t) -1)
345
    /* We cannot do anything.  */
346
    return NULL;
347
 
348
  rettbl = allocate_table ();
349
 
350
  while (iterate_table (&to_charmap->char_table, &ptr, &key, &keylen, &data)
351
         >= 0)
352
    {
353
      struct charseq *out = (struct charseq *) data;
354
 
355
      if (out->ucs4 != UNINITIALIZED_CHAR_VALUE)
356
        {
357
          /* There is a chance.  Try the iconv module.  */
358
          wchar_t inbuf[1] = { out->ucs4 };
359
          unsigned char outbuf[64];
360
          char *inptr = (char *) inbuf;
361
          size_t inlen = sizeof (inbuf);
362
          char *outptr = (char *) outbuf;
363
          size_t outlen = sizeof (outbuf);
364
 
365
          (void) iconv (cd, &inptr, &inlen, &outptr, &outlen);
366
 
367
          if (outptr != (char *) outbuf)
368
            {
369
              /* We got some output.  Good, use it.  */
370
              struct charseq *newp;
371
 
372
              outlen = sizeof (outbuf) - outlen;
373
              assert ((char *) outbuf + outlen == outptr);
374
 
375
              newp = (struct charseq *) xmalloc (sizeof (struct charseq)
376
                                                 + outlen);
377
              newp->name = out->name;
378
              newp->ucs4 = out->ucs4;
379
              newp->nbytes = outlen;
380
              memcpy (newp->bytes, outbuf, outlen);
381
 
382
              add_bytes (rettbl, newp, out);
383
            }
384
 
385
          /* Clear any possible state left behind.  */
386
          (void) iconv (cd, NULL, NULL, NULL, NULL);
387
        }
388
    }
389
 
390
  iconv_close (cd);
391
 
392
  return rettbl;
393
}
394
 
395
 
396
static struct convtable *
397
use_both_charmaps (struct charmap_t *from_charmap,
398
                   struct charmap_t *to_charmap)
399
{
400
  /* In this case we iterate over all the entries in the from_charmap,
401
     determine the internal name, and find an appropriate entry in the
402
     to_charmap (if it exists).  */
403
  struct convtable *rettbl = allocate_table ();
404
  void *ptr = NULL;
405
  const void *key;
406
  size_t keylen;
407
  void *data;
408
 
409
  while (iterate_table (&from_charmap->char_table, &ptr, &key, &keylen, &data)
410
         >= 0)
411
    {
412
      struct charseq *in = (struct charseq *) data;
413
      struct charseq *out = charmap_find_value (to_charmap, key, keylen);
414
 
415
      if (out != NULL)
416
        add_bytes (rettbl, in, out);
417
    }
418
 
419
  return rettbl;
420
}
421
 
422
 
423
static int
424
process_block (struct convtable *tbl, char *addr, size_t len, FILE *output)
425
{
426
  size_t n = 0;
427
 
428
  while (n < len)
429
    {
430
      struct convtable *cur = tbl;
431
      unsigned char *curp = (unsigned char *) addr;
432
      unsigned int byte = *curp;
433
      int cnt;
434
      struct charseq *out;
435
 
436
      while (! is_term (cur, byte))
437
        if (cur->val[byte].sub == NULL)
438
          {
439
            /* This is a invalid sequence.  Skip the first byte if we are
440
               ignoring errors.  Otherwise punt.  */
441
            if (! omit_invalid)
442
              {
443
                error (0, 0, _("illegal input sequence at position %Zd"), n);
444
                return -1;
445
              }
446
 
447
            n -= curp - (unsigned char *) addr;
448
 
449
            byte = *(curp = (unsigned char *) ++addr);
450
            if (++n >= len)
451
              /* All converted.  */
452
              return 0;
453
 
454
            cur = tbl;
455
          }
456
        else
457
          {
458
            cur = cur->val[byte].sub;
459
 
460
            if (++n >= len)
461
              {
462
                error (0, 0, _("\
463
incomplete character or shift sequence at end of buffer"));
464
                return -1;
465
              }
466
 
467
            byte = *++curp;
468
          }
469
 
470
      /* We found a final byte.  Write the output bytes.  */
471
      out = cur->val[byte].out;
472
      for (cnt = 0; cnt < out->nbytes; ++cnt)
473
        fputc_unlocked (out->bytes[cnt], output);
474
 
475
      addr = (char *) curp + 1;
476
      ++n;
477
    }
478
 
479
  return 0;
480
}
481
 
482
 
483
static int
484
process_fd (struct convtable *tbl, int fd, FILE *output)
485
{
486
  /* we have a problem with reading from a desriptor since we must not
487
     provide the iconv() function an incomplete character or shift
488
     sequence at the end of the buffer.  Since we have to deal with
489
     arbitrary encodings we must read the whole text in a buffer and
490
     process it in one step.  */
491
  static char *inbuf = NULL;
492
  static size_t maxlen = 0;
493
  char *inptr = NULL;
494
  size_t actlen = 0;
495
 
496
  while (actlen < maxlen)
497
    {
498
      ssize_t n = read (fd, inptr, maxlen - actlen);
499
 
500
      if (n == 0)
501
        /* No more text to read.  */
502
        break;
503
 
504
      if (n == -1)
505
        {
506
          /* Error while reading.  */
507
          error (0, errno, _("error while reading the input"));
508
          return -1;
509
        }
510
 
511
      inptr += n;
512
      actlen += n;
513
    }
514
 
515
  if (actlen == maxlen)
516
    while (1)
517
      {
518
        ssize_t n;
519
 
520
        /* Increase the buffer.  */
521
        maxlen += 32768;
522
        inbuf = realloc (inbuf, maxlen);
523
        if (inbuf == NULL)
524
          error (0, errno, _("unable to allocate buffer for input"));
525
        inptr = inbuf + actlen;
526
 
527
        do
528
          {
529
            n = read (fd, inptr, maxlen - actlen);
530
 
531
            if (n == 0)
532
              /* No more text to read.  */
533
              break;
534
 
535
            if (n == -1)
536
              {
537
                /* Error while reading.  */
538
                error (0, errno, _("error while reading the input"));
539
                return -1;
540
              }
541
 
542
            inptr += n;
543
            actlen += n;
544
          }
545
        while (actlen < maxlen);
546
 
547
        if (n == 0)
548
          /* Break again so we leave both loops.  */
549
          break;
550
      }
551
 
552
  /* Now we have all the input in the buffer.  Process it in one run.  */
553
  return process_block (tbl, inbuf, actlen, output);
554
}
555
 
556
 
557
static int
558
process_file (struct convtable *tbl, FILE *input, FILE *output)
559
{
560
  /* This should be safe since we use this function only for `stdin' and
561
     we haven't read anything so far.  */
562
  return process_fd (tbl, fileno (input), output);
563
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.