OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [tags/] [UCLIBC_0_9_26/] [linux/] [uClibc/] [extra/] [locale/] [gen_wctype.c] - Blame information for rev 1326

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1325 phoenix
 
2
#define _GNU_SOURCE
3
#include <stdio.h>
4
#include <stdlib.h>
5
#include <string.h>
6
#include <locale.h>
7
#include <wctype.h>
8
#include <limits.h>
9
#include <stdint.h>
10
#include <wchar.h>
11
#include <ctype.h>
12
 
13
#ifndef _CTYPE_H
14
#define _CTYPE_H
15
#endif
16
#ifndef _WCTYPE_H
17
#define _WCTYPE_H
18
#endif
19
#include "../../libc/sysdeps/linux/common/bits/uClibc_ctype.h"
20
 
21
/*       0x9 : space  blank */
22
/*       0xa : space */
23
/*       0xb : space */
24
/*       0xc : space */
25
/*       0xd : space */
26
/*      0x20 : space  blank */
27
/*    0x1680 : space  blank */
28
/*    0x2000 : space  blank */
29
/*    0x2001 : space  blank */
30
/*    0x2002 : space  blank */
31
/*    0x2003 : space  blank */
32
/*    0x2004 : space  blank */
33
/*    0x2005 : space  blank */
34
/*    0x2006 : space  blank */
35
/*    0x2008 : space  blank */
36
/*    0x2009 : space  blank */
37
/*    0x200a : space  blank */
38
/*    0x200b : space  blank */
39
/*    0x2028 : space */
40
/*    0x2029 : space */
41
/*    0x3000 : space  blank */
42
 
43
/*  typecount[ 0] =    88670  C_alpha_nonupper_nonlower */
44
/*  typecount[ 1] =      742  C_alpha_lower */
45
/*  typecount[ 2] =        4  C_alpha_upper_lower */
46
/*  typecount[ 3] =      731  C_alpha_upper */
47
/*  typecount[ 4] =       10  C_digit */
48
/*  typecount[ 5] =    10270  C_punct */
49
/*  typecount[ 6] =        0  C_graph */
50
/*  typecount[ 7] =        0  C_print_space_nonblank */
51
/*  typecount[ 8] =       14  C_print_space_blank */
52
/*  typecount[ 9] =        0  C_space_nonblank_noncntrl */
53
/*  typecount[10] =        0  C_space_blank_noncntrl */
54
/*  typecount[11] =        6  C_cntrl_space_nonblank */
55
/*  typecount[12] =        1  C_cntrl_space_blank */
56
/*  typecount[13] =       60  C_cntrl_nonspace */
57
/*  typecount[14] =    96100  C_unclassified */
58
/*  typecount[15] =        0  empty_slot */
59
 
60
 
61
 
62
/* Set to #if 0 to restrict wchars to 16 bits. */
63
#if 1
64
#define RANGE 0x2ffffUL
65
#elif 0
66
#define RANGE 0x1ffffUL
67
#else
68
#define RANGE 0xffffUL                  /* Restrict for 16-bit wchar_t... */
69
#endif
70
 
71
#if 0
72
/* Classification codes. */
73
 
74
static const char *typename[] = {
75
        "C_unclassified",
76
        "C_alpha_nonupper_nonlower",
77
        "C_alpha_lower",
78
        "C_alpha_upper_lower",
79
        "C_alpha_upper",
80
        "C_digit",
81
        "C_punct",
82
        "C_graph",
83
        "C_print_space_nonblank",
84
        "C_print_space_blank",
85
        "C_space_nonblank_noncntrl",
86
        "C_space_blank_noncntrl",
87
        "C_cntrl_space_nonblank",
88
        "C_cntrl_space_blank",
89
        "C_cntrl_nonspace",
90
        "empty_slot"
91
};
92
#endif
93
 
94
#if 0
95
/* Taking advantage of the C99 mutual-exclusion guarantees for the various
96
 * (w)ctype classes, including the descriptions of printing and control
97
 * (w)chars, we can place each in one of the following mutually-exlusive
98
 * subsets.  Since there are less than 16, we can store the data for
99
 * each (w)chars in a nibble. In contrast, glibc uses an unsigned int
100
 * per (w)char, with one bit flag for each is* type.  While this allows
101
 * a simple '&' operation to determine the type vs. a range test and a
102
 * little special handling for the "blank" and "xdigit" types in my
103
 * approach, it also uses 8 times the space for the tables on the typical
104
 * 32-bit archs we supported.*/
105
enum {
106
        __CTYPE_unclassified = 0,
107
        __CTYPE_alpha_nonupper_nonlower,
108
        __CTYPE_alpha_lower,
109
        __CTYPE_alpha_upper_lower,
110
        __CTYPE_alpha_upper,
111
        __CTYPE_digit,
112
        __CTYPE_punct,
113
        __CTYPE_graph,
114
        __CTYPE_print_space_nonblank,
115
        __CTYPE_print_space_blank,
116
        __CTYPE_space_nonblank_noncntrl,
117
        __CTYPE_space_blank_noncntrl,
118
        __CTYPE_cntrl_space_nonblank,
119
        __CTYPE_cntrl_space_blank,
120
        __CTYPE_cntrl_nonspace,
121
};
122
#endif
123
 
124
#define __CTYPE_isxdigit(D,X) \
125
        (__CTYPE_isdigit(D) || (((unsigned int)(((X)|0x20) - 'a')) <= 5))
126
 
127
#define mywalnum(x)             __CTYPE_isalnum(d)
128
#define mywalpha(x)             __CTYPE_isalpha(d)
129
#define mywblank(x)     __CTYPE_isblank(d)
130
#define mywcntrl(x)             __CTYPE_iscntrl(d)
131
#define mywdigit(x)             __CTYPE_isdigit(d)
132
#define mywgraph(x)             __CTYPE_isgraph(d)
133
#define mywlower(x)             __CTYPE_islower(d)
134
#define mywprint(x)             __CTYPE_isprint(d)
135
#define mywpunct(x)             __CTYPE_ispunct(d)
136
#define mywspace(x)             __CTYPE_isspace(d)
137
#define mywupper(x)             __CTYPE_isupper(d)
138
#define mywxdigit(x)    __CTYPE_isxdigit(d,x)
139
 
140
typedef struct {
141
        short l;
142
        short u;
143
} uldiff_entry;
144
 
145
typedef struct {
146
        uint16_t ii_len;
147
        uint16_t ti_len;
148
        uint16_t ut_len;
149
 
150
        unsigned char ii_shift;
151
        unsigned char ti_shift;
152
 
153
        unsigned char *ii;
154
        unsigned char *ti;
155
        unsigned char *ut;
156
} table_data;
157
 
158
 
159
void output_table(FILE *fp, const char *name, table_data *tbl)
160
{
161
        size_t i;
162
 
163
        fprintf(fp, "#define __LOCALE_DATA_WC%s_II_LEN    %7u\n", name, tbl->ii_len);
164
        fprintf(fp, "#define __LOCALE_DATA_WC%s_TI_LEN    %7u\n", name, tbl->ti_len);
165
        fprintf(fp, "#define __LOCALE_DATA_WC%s_UT_LEN    %7u\n", name, tbl->ut_len);
166
 
167
        fprintf(fp, "#define __LOCALE_DATA_WC%s_II_SHIFT  %7u\n", name, tbl->ii_shift);
168
        fprintf(fp, "#define __LOCALE_DATA_WC%s_TI_SHIFT  %7u\n", name, tbl->ti_shift);
169
 
170
        fprintf(fp, "\n#ifdef WANT_WC%s_data\n", name);
171
 
172
        i = tbl->ii_len + tbl->ti_len + tbl->ut_len;
173
        fprintf(fp, "\nstatic const unsigned char __LOCALE_DATA_WC%s_data[%zu] = {", name, i);
174
        for (i=0 ; i < tbl->ii_len ; i++) {
175
                if (i % 12 == 0) {
176
                        fprintf(fp, "\n");
177
                }
178
                fprintf(fp, " %#04x,", tbl->ii[i]);
179
        }
180
        for (i=0 ; i < tbl->ti_len ; i++) {
181
                if (i % 12 == 0) {
182
                        fprintf(fp, "\n");
183
                }
184
                fprintf(fp, " %#04x,", tbl->ti[i]);
185
        }
186
        for (i=0 ; i < tbl->ut_len ; i++) {
187
                if (i % 12 == 0) {
188
                        fprintf(fp, "\n");
189
                }
190
                fprintf(fp, " %#04x,", tbl->ut[i]);
191
        }
192
        fprintf(fp, "\n};\n\n");
193
 
194
        fprintf(fp, "#endif /* WANT_WC%s_data */\n\n", name);
195
}
196
 
197
static void dump_table_data(table_data *tbl)
198
{
199
        printf("ii_shift = %d  ti_shift = %d\n"
200
                   "ii_len = %d  ti_len = %d  ut_len = %d\n"
201
                   "total = %d\n",
202
                   tbl->ii_shift, tbl->ti_shift,
203
                   tbl->ii_len, tbl->ti_len, tbl->ut_len,
204
                   (int) tbl->ii_len + (int) tbl->ti_len + (int) tbl->ut_len);
205
}
206
 
207
/* For sorting the blocks of unsigned chars. */
208
static size_t nu_val;
209
 
210
int nu_memcmp(const void *a, const void *b)
211
{
212
        return memcmp(*(unsigned char**)a, *(unsigned char**)b, nu_val);
213
}
214
 
215
static size_t newopt(unsigned char *ut, size_t usize, int shift, table_data *tbl);
216
 
217
#define MAXTO           255                     /* Restrict to minimal unsigned char max. */
218
 
219
int main(int argc, char **argv)
220
{
221
        long int u, l, tt;
222
        size_t smallest, t;
223
        unsigned int c;
224
        unsigned int d;
225
        int i, n;
226
        int ul_count = 0;
227
        uldiff_entry uldiff[MAXTO];
228
        table_data cttable;
229
        table_data ultable;
230
        table_data combtable;
231
        table_data widthtable;
232
        long int last_comb = 0;
233
 
234
        unsigned char wct[(RANGE/2)+1]; /* wctype table (nibble per wchar) */
235
        unsigned char ult[RANGE+1];     /* upper/lower table */
236
        unsigned char combt[(RANGE/4)+1];       /* combining */
237
        unsigned char widtht[(RANGE/4)+1];      /* width */
238
        wctrans_t totitle;
239
        wctype_t is_comb, is_comb3;
240
 
241
        long int typecount[16];
242
        const char *typename[16];
243
        static const char empty_slot[] = "empty_slot";
244
        int built = 0;
245
 
246
#define INIT_TYPENAME(X) typename[__CTYPE_##X] = "C_" #X
247
 
248
        for (i=0 ; i < 16 ; i++) {
249
                typename[i] = empty_slot;
250
        }
251
 
252
        INIT_TYPENAME(unclassified);
253
        INIT_TYPENAME(alpha_nonupper_nonlower);
254
        INIT_TYPENAME(alpha_lower);
255
        INIT_TYPENAME(alpha_upper_lower);
256
        INIT_TYPENAME(alpha_upper);
257
        INIT_TYPENAME(digit);
258
        INIT_TYPENAME(punct);
259
        INIT_TYPENAME(graph);
260
        INIT_TYPENAME(print_space_nonblank);
261
        INIT_TYPENAME(print_space_blank);
262
        INIT_TYPENAME(space_nonblank_noncntrl);
263
        INIT_TYPENAME(space_blank_noncntrl);
264
        INIT_TYPENAME(cntrl_space_nonblank);
265
        INIT_TYPENAME(cntrl_space_blank);
266
        INIT_TYPENAME(cntrl_nonspace);
267
 
268
        setvbuf(stdout, NULL, _IONBF, 0);
269
 
270
        while (--argc) {
271
                if (!setlocale(LC_CTYPE, *++argv)) {
272
                        printf("setlocale(LC_CTYPE,%s) failed!\n", *argv);
273
                        continue;
274
                }
275
 
276
                if (!(totitle = wctrans("totitle"))) {
277
                        printf("no totitle transformation.\n");
278
                }
279
                if (!(is_comb = wctype("combining"))) {
280
                        printf("no combining wctype.\n");
281
                }
282
                if (!(is_comb3 = wctype("combining_level3"))) {
283
                        printf("no combining_level3 wctype.\n");
284
                }
285
 
286
                if (!built) {
287
                built = 1;
288
                ul_count = 1;
289
                uldiff[0].u = uldiff[0].l = 0;
290
 
291
                memset(wct, 0, sizeof(wct));
292
                memset(combt, 0, sizeof(combt));
293
                memset(widtht, 0, sizeof(widtht));
294
 
295
                for (i = 0 ; i < 16 ; i++) {
296
                        typecount[i] = 0;
297
                }
298
 
299
                for (c=0 ; c <= RANGE ; c++) {
300
                        if (iswdigit(c)) {
301
                                d = __CTYPE_digit;
302
                        } else if (iswalpha(c)) {
303
                                d = __CTYPE_alpha_nonupper_nonlower;
304
                                if (iswlower(c)) {
305
                                        d = __CTYPE_alpha_lower;
306
                                        if (iswupper(c)) {
307
                                                d = __CTYPE_alpha_upper_lower;
308
                                        }
309
                                } else if (iswupper(c)) {
310
                                        d = __CTYPE_alpha_upper;
311
                                }
312
                        } else if (iswpunct(c)) {
313
                                d = __CTYPE_punct;
314
                        } else if (iswgraph(c)) {
315
                                d = __CTYPE_graph;
316
                        } else if (iswprint(c)) {
317
                                d = __CTYPE_print_space_nonblank;
318
                                if (iswblank(c)) {
319
                                        d = __CTYPE_print_space_blank;
320
                                }
321
                        } else if (iswspace(c) && !iswcntrl(c)) {
322
                                d = __CTYPE_space_nonblank_noncntrl;
323
                                if (iswblank(c)) {
324
                                        d = __CTYPE_space_blank_noncntrl;
325
                                }
326
                        } else if (iswcntrl(c)) {
327
                                d = __CTYPE_cntrl_nonspace;
328
                                if (iswspace(c)) {
329
                                        d = __CTYPE_cntrl_space_nonblank;
330
                                        if (iswblank(c)) {
331
                                                d = __CTYPE_cntrl_space_blank;
332
                                        }
333
                                }
334
                        } else {
335
                                d = __CTYPE_unclassified;
336
                        }
337
 
338
                        ++typecount[d];
339
 
340
#if 0
341
                        if (iswspace(c)) {
342
                                if (iswblank(c)) {
343
                                        printf("%#8x : space  blank\n", c);
344
                                } else {
345
                                        printf("%#8x : space\n", c);
346
                                }
347
                        }
348
#endif
349
 
350
#if 0
351
                        if (c < 256) {
352
                                unsigned int glibc;
353
 
354
                                glibc = 0;
355
                                if (isalnum(c)) ++glibc; glibc <<= 1;
356
                                if (isalpha(c)) ++glibc; glibc <<= 1;
357
                                if (isblank(c)) ++glibc; glibc <<= 1;
358
                                if (iscntrl(c)) ++glibc; glibc <<= 1;
359
                                if (isdigit(c)) ++glibc; glibc <<= 1;
360
                                if (isgraph(c)) ++glibc; glibc <<= 1;
361
                                if (islower(c)) ++glibc; glibc <<= 1;
362
                                if (isprint(c)) ++glibc; glibc <<= 1;
363
                                if (ispunct(c)) ++glibc; glibc <<= 1;
364
                                if (isspace(c)) ++glibc; glibc <<= 1;
365
                                if (isupper(c)) ++glibc; glibc <<= 1;
366
                                if (isxdigit(c)) ++glibc;
367
                                printf("%#8x : ctype %#4x\n", c, glibc);
368
                        }
369
#endif
370
 
371
#if 1
372
                        /* Paranoid checking... */
373
                        {
374
                                unsigned int glibc;
375
                                unsigned int mine;
376
 
377
                                glibc = 0;
378
                                if (iswalnum(c)) ++glibc; glibc <<= 1;
379
                                if (iswalpha(c)) ++glibc; glibc <<= 1;
380
                                if (iswblank(c)) ++glibc; glibc <<= 1;
381
                                if (iswcntrl(c)) ++glibc; glibc <<= 1;
382
                                if (iswdigit(c)) ++glibc; glibc <<= 1;
383
                                if (iswgraph(c)) ++glibc; glibc <<= 1;
384
                                if (iswlower(c)) ++glibc; glibc <<= 1;
385
                                if (iswprint(c)) ++glibc; glibc <<= 1;
386
                                if (iswpunct(c)) ++glibc; glibc <<= 1;
387
                                if (iswspace(c)) ++glibc; glibc <<= 1;
388
                                if (iswupper(c)) ++glibc; glibc <<= 1;
389
                                if (iswxdigit(c)) ++glibc;
390
 
391
                                mine = 0;
392
                                if (mywalnum(c)) ++mine; mine <<= 1;
393
                                if (mywalpha(c)) ++mine; mine <<= 1;
394
                                if (mywblank(c)) ++mine; mine <<= 1;
395
                                if (mywcntrl(c)) ++mine; mine <<= 1;
396
                                if (mywdigit(c)) ++mine; mine <<= 1;
397
                                if (mywgraph(c)) ++mine; mine <<= 1;
398
                                if (mywlower(c)) ++mine; mine <<= 1;
399
                                if (mywprint(c)) ++mine; mine <<= 1;
400
                                if (mywpunct(c)) ++mine; mine <<= 1;
401
                                if (mywspace(c)) ++mine; mine <<= 1;
402
                                if (mywupper(c)) ++mine; mine <<= 1;
403
                                if (mywxdigit(c)) ++mine;
404
 
405
                                if (glibc != mine) {
406
                                        printf("%#8x : glibc %#4x != %#4x mine  %u\n", c, glibc, mine, d);
407
                                        return EXIT_FAILURE;
408
                                }
409
 
410
#if 0
411
                                if (iswctype(c,is_comb) || iswctype(c,is_comb3)) {
412
/*                                      if (!iswpunct(c)) { */
413
                                                printf("%#8x : %d %d %#4x\n",
414
                                                           c, iswctype(c,is_comb),iswctype(c,is_comb3), glibc);
415
/*                                      } */
416
                                }
417
#endif
418
#if 0
419
                                if (iswctype(c,is_comb) || iswctype(c,is_comb3)) {
420
                                        if (!last_comb) {
421
                                                printf("%#8x - ", c);
422
                                                last_comb = c;
423
                                        } else if (last_comb + 1 < c) {
424
                                                printf("%#8x\n%#8x - ", last_comb, c);
425
                                                last_comb = c;
426
                                        } else {
427
                                                last_comb = c;
428
                                        }
429
                                }
430
#endif
431
                        }
432
#endif
433
 
434
                        combt[c/4] |= ((((!!iswctype(c,is_comb)) << 1) | !!iswctype(c,is_comb3))
435
                                                   << ((c & 3) << 1));
436
/*                      comb3t[c/8] |= ((!!iswctype(c,is_comb3)) << (c & 7)); */
437
 
438
/*                      widtht[c/4] |= (wcwidth(c) << ((c & 3) << 1)); */
439
 
440
                        if (c & 1) {    /* Use the high nibble for odd numbered wchars. */
441
                                d <<= 4;
442
                        }
443
                        wct[c/2] |= d;
444
 
445
                        l = towlower(c) - c;
446
                        u = towupper(c) - c;
447
                        ult[c] = 0;
448
                        if (l || u) {
449
                                if ((l != (short)l) || (u != (short)u)) {
450
                                        printf("range assumption error!  %x  %ld  %ld\n", c, l, u);
451
                                        return EXIT_FAILURE;
452
                                }
453
                                for (i=0 ; i < ul_count ; i++) {
454
                                        if ((l == uldiff[i].l) && (u == uldiff[i].u)) {
455
                                                goto found;
456
                                        }
457
                                }
458
                                uldiff[ul_count].l = l;
459
                                uldiff[ul_count].u = u;
460
                                ++ul_count;
461
                                if (ul_count > MAXTO) {
462
                                        printf("too many touppers/tolowers!\n");
463
                                        return EXIT_FAILURE;
464
                                }
465
                        found:
466
                                ult[c] = i;
467
                        }
468
                }
469
 
470
                for (i = 0 ; i < 16 ; i++) {
471
                        printf("typecount[%2d] = %8ld  %s\n", i, typecount[i], typename[i]);
472
                }
473
 
474
                printf("optimizing is* table..\n");
475
                n = -1;
476
                smallest = SIZE_MAX;
477
                cttable.ii = NULL;
478
                for (i=0 ; i < 14 ; i++) {
479
                        t = newopt(wct, (RANGE/2)+1, i, &cttable);
480
                        if (smallest >= t) {
481
                                n = i;
482
                                smallest = t;
483
/*                      } else { */
484
/*                              break; */
485
                        }
486
                }
487
                printf("smallest = %zu\n", smallest);
488
                if (!(cttable.ii = malloc(smallest))) {
489
                        printf("couldn't allocate space!\n");
490
                        return EXIT_FAILURE;
491
                }
492
                smallest = SIZE_MAX;
493
                newopt(wct, (RANGE/2)+1, n, &cttable);
494
                ++cttable.ti_shift;             /* correct for nibble mode */
495
 
496
 
497
 
498
                printf("optimizing u/l-to table..\n");
499
                smallest = SIZE_MAX;
500
                ultable.ii = NULL;
501
                for (i=0 ; i < 14 ; i++) {
502
                        t = newopt(ult, RANGE+1, i, &ultable);
503
                        if (smallest >= t) {
504
                                n = i;
505
                                smallest = t;
506
/*                      } else { */
507
/*                              break; */
508
                        }
509
                }
510
                printf("%zu (smallest) + %zu (u/l diffs) = %zu\n",
511
                           smallest, 4 * ul_count, smallest + 4 * ul_count);
512
                printf("smallest = %zu\n", smallest);
513
                if (!(ultable.ii = malloc(smallest))) {
514
                        printf("couldn't allocate space!\n");
515
                        return EXIT_FAILURE;
516
                }
517
                smallest = SIZE_MAX;
518
                newopt(ult, RANGE+1, n, &ultable);
519
 
520
 
521
#if 0
522
                printf("optimizing comb table..\n");
523
                smallest = SIZE_MAX;
524
                combtable.ii = NULL;
525
                for (i=0 ; i < 14 ; i++) {
526
                        t = newopt(combt, sizeof(combt), i, &combtable);
527
                        if (smallest >= t) {
528
                                n = i;
529
                                smallest = t;
530
/*                      } else { */
531
/*                              break; */
532
                        }
533
                }
534
                printf("smallest = %zu\n", smallest);
535
                if (!(combtable.ii = malloc(smallest))) {
536
                        printf("couldn't allocate space!\n");
537
                        return EXIT_FAILURE;
538
                }
539
                smallest = SIZE_MAX;
540
                newopt(combt, sizeof(combt), n, &combtable);
541
                combtable.ti_shift += 4; /* correct for 4 entries per */
542
#endif
543
 
544
 
545
#if 0
546
                printf("optimizing width table..\n");
547
                smallest = SIZE_MAX;
548
                widthtable.ii = NULL;
549
                for (i=0 ; i < 14 ; i++) {
550
                        t = newopt(widtht, sizeof(widtht), i, &widthtable);
551
                        if (smallest >= t) {
552
                                n = i;
553
                                smallest = t;
554
/*                      } else { */
555
/*                              break; */
556
                        }
557
                }
558
                printf("smallest = %zu\n", smallest);
559
                if (!(widthtable.ii = malloc(smallest))) {
560
                        printf("couldn't allocate space!\n");
561
                        return EXIT_FAILURE;
562
                }
563
                smallest = SIZE_MAX;
564
                newopt(widtht, sizeof(widtht), n, &widthtable);
565
                widthtable.ti_shift += 4; /* correct for 4 entries per */
566
#endif
567
 
568
#if 0
569
                printf("optimizing comb3 table..\n");
570
                smallest = SIZE_MAX;
571
                comb3table.ii = NULL;
572
                for (i=0 ; i < 14 ; i++) {
573
                        t = newopt(comb3t, sizeof(comb3t), i, &comb3table);
574
                        if (smallest >= t) {
575
                                n = i;
576
                                smallest = t;
577
/*                      } else { */
578
/*                              break; */
579
                        }
580
                }
581
                printf("smallest = %zu\n", smallest);
582
                if (!(comb3table.ii = malloc(smallest))) {
583
                        printf("couldn't allocate space!\n");
584
                        return EXIT_FAILURE;
585
                }
586
                smallest = SIZE_MAX;
587
                newopt(comb3t, sizeof(comb3t), n, &comb3table);
588
                comb3table.ti_shift += 8; /* correct for 4 entries per */
589
#endif
590
 
591
                dump_table_data(&cttable);
592
                dump_table_data(&ultable);
593
                dump_table_data(&combtable);
594
                }
595
 
596
                printf("verifying for %s...\n", *argv);
597
#if RANGE == 0xffffU
598
                for (c=0 ; c <= 0xffffUL ; c++)
599
#else
600
                for (c=0 ; c <= 0x10ffffUL ; c++)
601
#endif
602
                        {
603
                        unsigned int glibc;
604
                        unsigned int mine;
605
                        unsigned int upper, lower;
606
 
607
#if 0
608
#if RANGE < 0x10000UL
609
                        if (c == 0x10000UL) {
610
                                c = 0x30000UL;  /* skip 1st and 2nd sup planes */
611
                        }
612
#elif RANGE < 0x20000UL
613
                        if (c == 0x20000UL) {
614
                                c = 0x30000UL;  /* skip 2nd sup planes */
615
                        }
616
#endif
617
#endif
618
 
619
                        glibc = 0;
620
                        if (iswalnum(c)) ++glibc; glibc <<= 1;
621
                        if (iswalpha(c)) ++glibc; glibc <<= 1;
622
                        if (iswblank(c)) ++glibc; glibc <<= 1;
623
                        if (iswcntrl(c)) ++glibc; glibc <<= 1;
624
                        if (iswdigit(c)) ++glibc; glibc <<= 1;
625
                        if (iswgraph(c)) ++glibc; glibc <<= 1;
626
                        if (iswlower(c)) ++glibc; glibc <<= 1;
627
                        if (iswprint(c)) ++glibc; glibc <<= 1;
628
                        if (iswpunct(c)) ++glibc; glibc <<= 1;
629
                        if (iswspace(c)) ++glibc; glibc <<= 1;
630
                        if (iswupper(c)) ++glibc; glibc <<= 1;
631
                        if (iswxdigit(c)) ++glibc;
632
 
633
                        {
634
                                unsigned int u;
635
                                int n, sc;
636
                                int i0, i1;
637
 
638
                                u = c;
639
                                if (u <= RANGE) {
640
                                        sc = u & ((1 << cttable.ti_shift) - 1);
641
                                        u >>= cttable.ti_shift;
642
                                        n = u & ((1 << cttable.ii_shift) - 1);
643
                                        u >>= cttable.ii_shift;
644
 
645
                                        i0 = cttable.ii[u];
646
                                        i0 <<= cttable.ii_shift;
647
                                        i1 = cttable.ti[i0 + n];
648
                                        i1 <<= (cttable.ti_shift-1);
649
                                        d = cttable.ut[i1 + (sc >> 1)];
650
 
651
                                        if (sc & 1) {
652
                                                d >>= 4;
653
                                        }
654
                                        d &= 0x0f;
655
                                } else if ((((unsigned int)(c - 0xe0020UL)) <= 0x5f) || (c == 0xe0001UL)){
656
                                        d = __CTYPE_punct;
657
                                } else if (((unsigned int)(c - 0xf0000UL)) < 0x20000UL) {
658
                                        if ((c & 0xffffU) <= 0xfffdU) {
659
                                                d = __CTYPE_punct;
660
                                        } else {
661
                                                d = __CTYPE_unclassified;
662
                                        }
663
                                } else {
664
                                        d = __CTYPE_unclassified;
665
                                }
666
 
667
                        mine = 0;
668
                        if (mywalnum(c)) ++mine; mine <<= 1;
669
                        if (mywalpha(c)) ++mine; mine <<= 1;
670
                        if (mywblank(c)) ++mine; mine <<= 1;
671
                        if (mywcntrl(c)) ++mine; mine <<= 1;
672
                        if (mywdigit(c)) ++mine; mine <<= 1;
673
                        if (mywgraph(c)) ++mine; mine <<= 1;
674
                        if (mywlower(c)) ++mine; mine <<= 1;
675
                        if (mywprint(c)) ++mine; mine <<= 1;
676
                        if (mywpunct(c)) ++mine; mine <<= 1;
677
                        if (mywspace(c)) ++mine; mine <<= 1;
678
                        if (mywupper(c)) ++mine; mine <<= 1;
679
                        if (mywxdigit(c)) ++mine;
680
 
681
                        if (glibc != mine) {
682
                                printf("%#8x : glibc %#4x != %#4x mine %d\n", c, glibc, mine, d);
683
                                if (c < 0x30000UL) {
684
                                        printf("sc=%#x u=%#x n=%#x i0=%#x i1=%#x\n", sc, u, n, i0, i1);
685
                                }
686
                        }
687
                                upper = lower = u = c;
688
                                if (u <= RANGE) {
689
                                        sc = u & ((1 << ultable.ti_shift) - 1);
690
                                        u >>= ultable.ti_shift;
691
                                        n = u & ((1 << ultable.ii_shift) - 1);
692
                                        u >>= ultable.ii_shift;
693
 
694
                                        i0 = ultable.ii[u];
695
                                        i0 <<= ultable.ii_shift;
696
                                        i1 = ultable.ti[i0 + n];
697
                                        i1 <<= (ultable.ti_shift);
698
                                        i1 += sc;
699
                                        i0 = ultable.ut[i1];
700
                                        upper = c + uldiff[i0].u;
701
                                        lower = c + uldiff[i0].l;
702
                                }
703
 
704
                        if (towupper(c) != upper) {
705
                                printf("%#8x : towupper glibc %#4x != %#4x mine\n",
706
                                           c, towupper(c), upper);
707
                        }
708
 
709
                        if (towlower(c) != lower) {
710
                                printf("%#8x : towlower glibc %#4x != %#4x mine   i0 = %d\n",
711
                                           c, towlower(c), lower, i0);
712
                        }
713
 
714
                        if (totitle && ((tt = towctrans(c, totitle)) != upper)) {
715
                                printf("%#8x : totitle glibc %#4lx != %#4x mine   i0 = %d\n",
716
                                           c, tt, upper, i0);
717
                        }
718
                        }
719
 
720
 
721
                        if ((c & 0xfff) == 0xfff) printf(".");
722
                }
723
                printf("done\n");
724
        }
725
 
726
        if (1) {
727
                FILE *fp;
728
 
729
                if (!(fp = fopen("wctables.h", "w"))) {
730
                        printf("couldn't open wctables.h!\n");
731
                        return EXIT_FAILURE;
732
                }
733
 
734
                fprintf(fp, "#define __LOCALE_DATA_WC_TABLE_DOMAIN_MAX  %#8lx\n\n",
735
                                (unsigned long) RANGE);
736
                output_table(fp, "ctype", &cttable);
737
                output_table(fp, "uplow", &ultable);
738
 
739
 
740
#warning fix the upper bound on the upper/lower tables... save 200 bytes or so
741
                fprintf(fp, "#define __LOCALE_DATA_WCuplow_diffs  %7u\n", ul_count);
742
                fprintf(fp, "\n#ifdef WANT_WCuplow_diff_data\n\n");
743
                fprintf(fp, "\nstatic const short __LOCALE_DATA_WCuplow_diff_data[%zu] = {",
744
                           2 * (size_t) ul_count);
745
                for (i=0 ; i < ul_count ; i++) {
746
                        if (i % 4 == 0) {
747
                                fprintf(fp, "\n");
748
                        }
749
                        fprintf(fp, " %6d, %6d,", uldiff[i].u, uldiff[i].l);
750
                }
751
                fprintf(fp, "\n};\n\n");
752
                fprintf(fp, "#endif /* WANT_WCuplow_diff_data */\n\n");
753
 
754
 
755
/*              output_table(fp, "comb", &combtable); */
756
/*              output_table(fp, "width", &widthtable); */
757
 
758
                fclose(fp);
759
        }
760
 
761
        return EXIT_SUCCESS;
762
}
763
 
764
size_t newopt(unsigned char *ut, size_t usize, int shift, table_data *tbl)
765
{
766
        static int recurse = 0;
767
        unsigned char *ti[RANGE+1];     /* table index */
768
        size_t numblocks;
769
        size_t blocksize;
770
        size_t uniq;
771
        size_t i, j;
772
        size_t smallest, t;
773
        unsigned char *ii_save;
774
        int uniqblock[256];
775
        unsigned char uit[RANGE+1];
776
        int shift2;
777
 
778
        ii_save = NULL;
779
        blocksize = 1 << shift;
780
        numblocks = usize >> shift;
781
 
782
        /* init table index */
783
        for (i=j=0 ; i < numblocks ; i++) {
784
                ti[i] = ut + j;
785
                j += blocksize;
786
        }
787
 
788
        /* sort */
789
        nu_val = blocksize;
790
        qsort(ti, numblocks, sizeof(unsigned char *), nu_memcmp);
791
 
792
        uniq = 1;
793
        uit[(ti[0]-ut)/blocksize] = 0;
794
        for (i=1 ; i < numblocks ; i++) {
795
                if (memcmp(ti[i-1], ti[i], blocksize) < 0) {
796
                        if (++uniq > 255) {
797
                                break;
798
                        }
799
                        uniqblock[uniq - 1] = i;
800
                }
801
#if 1
802
                else if (memcmp(ti[i-1], ti[i], blocksize) > 0) {
803
                        printf("bad sort %i!\n", i);
804
                        abort();
805
                }
806
#endif
807
                uit[(ti[i]-ut)/blocksize] = uniq - 1;
808
        }
809
 
810
        smallest = SIZE_MAX;
811
        shift2 = -1;
812
        if (uniq <= 255) {
813
                smallest = numblocks + uniq * blocksize;
814
                if (!recurse) {
815
                        ++recurse;
816
                        for (j=1 ; j < 14 ; j++) {
817
                                if ((numblocks >> j) < 2) break;
818
                                if (tbl) {
819
                                        ii_save = tbl->ii;
820
                                        tbl->ii = NULL;
821
                                }
822
                                if ((t = newopt(uit, numblocks, j, tbl)) < SIZE_MAX) {
823
                                        t += uniq * blocksize;
824
                                }
825
                                if (tbl) {
826
                                        tbl->ii = ii_save;
827
                                }
828
                                if (smallest >= t) {
829
                                        shift2 = j;
830
                                        smallest = t;
831
                                        if (!tbl->ii) {
832
                                                printf("ishift %zu  tshift %zu  size %zu\n",
833
                                                           shift2, shift, t);
834
                                        }
835
/*                              } else { */
836
/*                                      break; */
837
                                }
838
                        }
839
                        --recurse;
840
                }
841
        } else {
842
                return SIZE_MAX;
843
        }
844
 
845
        if (tbl->ii) {
846
                if (recurse) {
847
                        tbl->ii_shift = shift;
848
                        tbl->ii_len = numblocks;
849
                        memcpy(tbl->ii, uit, numblocks);
850
                        tbl->ti = tbl->ii + tbl->ii_len;
851
                        tbl->ti_len = uniq * blocksize;
852
                        for (i=0 ; i < uniq ; i++) {
853
                                memcpy(tbl->ti + i * blocksize, ti[uniqblock[i]], blocksize);
854
                        }
855
                } else {
856
                        ++recurse;
857
                        printf("setting ishift %zu  tshift %zu\n",
858
                                                           shift2, shift);
859
                        newopt(uit, numblocks, shift2, tbl);
860
                        --recurse;
861
                        tbl->ti_shift = shift;
862
                        tbl->ut_len = uniq * blocksize;
863
                        tbl->ut = tbl->ti + tbl->ti_len;
864
                        for (i=0 ; i < uniq ; i++) {
865
                                memcpy(tbl->ut + i * blocksize, ti[uniqblock[i]], blocksize);
866
                        }
867
                }
868
        }
869
        return smallest;
870
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.