OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [linux/] [uClibc/] [extra/] [locale/] [gen_collate.c] - Blame information for rev 1765

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 1325 phoenix
/* TODO:
2
 *
3
 * add UNDEFINED at end if not specified
4
 * convert POSITION -> FORWARD,POSITION
5
 *
6
 *
7
 * deal with lowercase in <Uhhhh>
8
 *
9
 * what about reorders that keep the same rule?
10
 *
11
 * remove "unused" collation elements? (probably doesn't save much)
12
 *
13
 * add_rule function ... returns index into rule table after possibly adding custom-indexed rule
14
 * but don't forget about multichar weights... replace with strings of indexes
15
 *
16
 */
17
 
18
 
19
#ifndef _GNU_SOURCE
20
#define _GNU_SOURCE
21
#endif
22
 
23
#include <stddef.h>
24
#include <stdio.h>
25
#include <stdlib.h>
26
#include <string.h>
27
#include <stdint.h>
28
#include <stdarg.h>
29
#include <limits.h>
30
#include <ctype.h>
31
#include <assert.h>
32
#include <search.h>
33
 
34
typedef struct {
35
        char *name;                                     /*  */
36
 
37
        int num_weights;                        /*  */
38
 
39
        int ii_shift;                           /*  */
40
        int ti_shift;                           /*  */
41
        int ii_len;                                     /*  */
42
        int ti_len;                                     /*  */
43
        int max_weight;                         /*  */
44
        int num_col_base;                       /*  */
45
        int max_col_index;                      /*  */
46
        int undefined_idx;                      /*  */
47
        int range_low;                          /*  */
48
        int range_count;                        /* high - low */
49
        int range_base_weight;          /*  */
50
        int num_starters;                       /*  */
51
 
52
        int range_rule_offset;          /*  */
53
        int wcs2colidt_offset;          /*  */
54
        int index2weight_offset;        /*  */
55
        int index2ruleidx_offset;       /*  */
56
        int multistart_offset;          /*  */
57
 
58
} base_locale_t;
59
 
60
#define BASE_LOCALE_LEN 20
61
static base_locale_t base_locale_array[BASE_LOCALE_LEN];
62
static size_t base_locale_len;
63
 
64
typedef struct {
65
        char *name;                                     /*  */
66
 
67
        int base_idx;                           /*  */
68
 
69
        int undefined_idx;                      /*  */
70
 
71
        int overrides_offset;           /*  */
72
        int multistart_offset;          /*  */
73
} der_locale_t;
74
 
75
#define DER_LOCALE_LEN 300
76
static der_locale_t der_locale_array[DER_LOCALE_LEN];
77
static size_t der_locale_len;
78
 
79
 
80
#define OVERRIDE_LEN  50000
81
static uint16_t override_buffer[OVERRIDE_LEN];
82
static size_t override_len;
83
 
84
#define MULTISTART_LEN 10000
85
static uint16_t multistart_buffer[MULTISTART_LEN];
86
static size_t multistart_len;
87
 
88
#define WCS2COLIDT_LEN 200000
89
static uint16_t wcs2colidt_buffer[WCS2COLIDT_LEN];
90
static size_t wcs2colidt_len;
91
 
92
#define INDEX2WEIGHT_LEN 200000
93
static uint16_t index2weight_buffer[INDEX2WEIGHT_LEN];
94
static size_t index2weight_len;
95
 
96
static uint16_t index2ruleidx_buffer[INDEX2WEIGHT_LEN];
97
static size_t index2ruleidx_len;
98
 
99
#define WEIGHTSTR_LEN 10000
100
static uint16_t weightstr_buffer[WEIGHTSTR_LEN];
101
static size_t weightstr_len;
102
 
103
#define RULETABLE_LEN (1L<<16)
104
static uint16_t ruletable_buffer[RULETABLE_LEN];
105
static size_t ruletable_len;
106
 
107
 
108
#define RANGE (0x10000UL)
109
 
110
typedef uint16_t tbl_item;
111
 
112
static uint16_t u16_buf[10000];
113
static int u16_buf_len;
114
static int u16_starter;
115
 
116
typedef struct {
117
        uint16_t ii_len;
118
        uint16_t ti_len;
119
        uint16_t ut_len;
120
 
121
        unsigned char ii_shift;
122
        unsigned char ti_shift;
123
 
124
        tbl_item *ii;
125
        tbl_item *ti;
126
        tbl_item *ut;
127
} table_data;
128
 
129
 
130
static size_t newopt(tbl_item *ut, size_t usize, int shift, table_data *tbl);
131
 
132
 
133
#define MAX_COLLATION_WEIGHTS 4
134
 
135
#define MAX_FNO 1
136
#define MAX_FILES  (MAX_FNO + 1)
137
 
138
static FILE *fstack[MAX_FILES];
139
static char *fname[MAX_FILES];
140
static int lineno[MAX_FILES];
141
static int fno = -1;
142
 
143
static tbl_item wcs2index[RANGE];
144
 
145
static char linebuf[1024];
146
static char *pos;
147
static char *pos_e = NULL;
148
static char end_of_token = 0;            /* slot to save */
149
 
150
#define IN_ORDER                        0x01
151
#define IN_REORDER                      0x02
152
#define IN_REORDER_SECTIONS     0x04
153
static int order_state;
154
static int cur_num_weights;             /* number of weights in current use */
155
static char cur_rule[MAX_COLLATION_WEIGHTS];
156
 
157
static int anonsection = 0;
158
 
159
typedef struct ll_item_struct ll_item_t;
160
 
161
struct ll_item_struct {
162
        ll_item_t *next;
163
        ll_item_t *prev;
164
        void *data;
165
        int data_type;
166
        int idx;
167
};
168
 
169
static ll_item_t *reorder_section_ptr = NULL;
170
static int superset;
171
static int superset_order_start_cnt; /* only support one order for now */
172
static int superset_in_sync;
173
static ll_item_t *comm_cur_ptr;
174
static ll_item_t *comm_prev_ptr;
175
 
176
enum {
177
        R_FORWARD =             0x01,
178
        R_POSITION =    0x02,
179
        R_BACKWARD =    0x04            /* must be largest in value */
180
};
181
 
182
typedef struct {
183
        size_t num_weights;
184
        char rule[MAX_COLLATION_WEIGHTS];
185
        const char *colitem[MAX_COLLATION_WEIGHTS];
186
} weight_t;
187
 
188
static void *root_weight = NULL;
189
size_t unique_weights = 0;
190
 
191
typedef struct {
192
        const char *symbol;
193
        weight_t *weight;
194
} weighted_item_t;
195
 
196
typedef struct {
197
        const char *symbol1;
198
        const char *symbol2;
199
        int length;
200
        weight_t *weight;
201
} range_item_t;
202
 
203
typedef struct {
204
        const char *name;
205
        ll_item_t *itm_list;            /* weighted_item_t list .. circular!!! */
206
        size_t num_items;
207
        size_t num_rules;
208
        char rules[MAX_COLLATION_WEIGHTS];
209
} section_t;
210
 
211
static section_t *cur_section = NULL;
212
 
213
typedef struct {
214
        const char *symbol;
215
        ll_item_t *node;
216
} wi_index_t;
217
 
218
typedef struct col_locale_struct col_locale_t;
219
 
220
struct  col_locale_struct {
221
        char *name;
222
        void *root_colitem;                     /* all base and derived, or just derived */
223
        void *root_element;
224
        void *root_scripts;
225
        void *root_wi_index;
226
        void *root_wi_index_reordered;
227
        ll_item_t *section_list;
228
        col_locale_t *base_locale;      /* null if this is a base */
229
        void *root_derived_wi;
230
        ll_item_t *derived_list;
231
        void *root_starter_char;
232
        void *root_starter_all;
233
        ll_item_t *undefined_idx;
234
};
235
 
236
typedef struct {
237
        const char *symbol;
238
        int idx;
239
} col_index_t;
240
 
241
static void *root_col_locale = NULL;
242
 
243
typedef struct {
244
    const char *keyword;
245
    void (*handler)(void);
246
} keyword_table_t;
247
 
248
typedef struct {
249
    const char *string;
250
    const char *element;        /* NULL if collating symbol */
251
} colitem_t;
252
 
253
static col_locale_t *cur_base = NULL;
254
static col_locale_t *cur_derived = NULL;
255
static col_locale_t *cur_col = NULL;
256
 
257
static void *root_sym = NULL;
258
static size_t num_sym = 0;
259
static size_t mem_sym = 0;
260
 
261
static void error_msg(const char *fmt, ...) __attribute__ ((noreturn, format (printf, 1, 2)));
262
static void *xmalloc(size_t n);
263
static char *xsymdup(const char *s); /* only allocate once... store in a tree */
264
static void pushfile(char *filename);
265
static void popfile(void);
266
static void processfile(void);
267
static int iscommentchar(int);
268
static void eatwhitespace(void);
269
static int next_line(void);
270
static char *next_token(void);
271
static void do_unrecognized(void);
272
static col_locale_t *new_col_locale(char *name);
273
static ll_item_t *new_ll_item(int data_type, void *data);
274
static weight_t *register_weight(weight_t *w);
275
static size_t ll_len(ll_item_t *l);
276
static size_t ll_count(ll_item_t *l, int mask);
277
static void add_wi_index(ll_item_t *l);
278
static size_t tnumnodes(const void *root);
279
static ll_item_t *find_wi_index(const char *sym, col_locale_t *cl);
280
static void mark_reordered(const char *sym);
281
static ll_item_t *find_wi_index_reordered(const char *sym);
282
static ll_item_t *next_comm_ptr(void);
283
static ll_item_t *init_comm_ptr(void);
284
static ll_item_t *find_ll_last(ll_item_t *p);
285
static void dump_weights(const char *name);
286
static void finalize_base(void);
287
static int is_ucode(const char *s);
288
static int sym_cmp(const void *n1, const void *n2);
289
static void do_starter_lists(col_locale_t *cl);
290
static void dump_base_locale(int n);
291
static void dump_der_locale(int n);
292
static void dump_collate(FILE *fp);
293
 
294
enum {
295
        DT_SECTION = 0x01,
296
        DT_WEIGHTED = 0x02,
297
        DT_REORDER = 0x04,                /* a section to support reorder_after */
298
        DT_COL_LOCALE = 0x08,
299
        DT_RANGE = 0x10,
300
};
301
 
302
static section_t *new_section(const char *name)
303
{
304
        section_t *p;
305
        char buf[128];
306
 
307
        p = xmalloc(sizeof(section_t));
308
        if (!name) {                            /* anonymous section */
309
                name = buf;
310
                snprintf(buf, sizeof(buf), "anon%05d", anonsection);
311
                ++anonsection;
312
        } else if (*name != '<') {      /* reorder */
313
                name = buf;
314
                snprintf(buf, sizeof(buf), "%s %05d", cur_col->name, anonsection);
315
                ++anonsection;
316
        }
317
#warning devel code
318
/*      fprintf(stderr, "section %s\n", name); */
319
        p->name = xsymdup(name);
320
        p->itm_list = NULL;
321
        p->num_items = 0;
322
        p->num_rules = 0;
323
        memset(p->rules, 0, MAX_COLLATION_WEIGHTS);
324
/*      cur_num_weights = p->num_rules = 0; */
325
/*      memset(p->rules, 0, MAX_COLLATION_WEIGHTS); */
326
/*      memset(cur_rule, R_FORWARD, 4); */
327
 
328
#warning devel code
329
        if (*p->name == 'a') {
330
                cur_num_weights = p->num_rules = 4;
331
                memset(p->rules, R_FORWARD, 4);
332
                memset(cur_rule, R_FORWARD, 4);
333
                p->rules[3] |= R_POSITION;
334
                cur_rule[3] |= R_POSITION;
335
        }
336
/*      fprintf(stderr, "new section %s -- cur_num_weights = %d\n", p->name, cur_num_weights); */
337
 
338
        return p;
339
}
340
 
341
 
342
 
343
static void do_order_start(void);
344
static void do_order_end(void);
345
static void do_reorder_after(void);
346
static void do_reorder_end(void);
347
static void do_reorder_sections_after(void);
348
static void do_reorder_sections_end(void);
349
static void do_copy(void);
350
static void do_colsym(void);
351
static void do_colele(void);
352
static void do_script(void);
353
static void do_range(void);
354
 
355
static col_locale_t *new_col_locale(char *name);
356
static int colitem_cmp(const void *n1, const void *n2);
357
static int colelement_cmp(const void *n1, const void *n2);
358
static void del_colitem(colitem_t *p);
359
static colitem_t *new_colitem(char *item, char *def);
360
static void add_colitem(char *item, char *def);
361
static void add_script(const char *s);
362
static unsigned int add_rule(weighted_item_t *wi);
363
static unsigned int add_range_rule(range_item_t *ri);
364
 
365
static const keyword_table_t keyword_table[] = {
366
    { "collating-symbol", do_colsym },
367
    { "collating-element", do_colele },
368
        { "script", do_script },
369
    { "copy", do_copy },
370
    { "order_start", do_order_start },
371
    { "order_end", do_order_end },
372
    { "order-end", do_order_end },
373
    { "reorder-after", do_reorder_after },
374
    { "reorder-end", do_reorder_end },
375
    { "reorder-sections-after", do_reorder_sections_after },
376
    { "reorder-sections-end", do_reorder_sections_end },
377
        { "UCLIBC_RANGE", do_range },
378
    { NULL, do_unrecognized }
379
};
380
 
381
 
382
static void do_unrecognized(void)
383
{
384
#if 1
385
    error_msg("warning: unrecognized: %s", pos);
386
#else
387
/*     fprintf(stderr, "warning: unrecognized initial keyword \"%s\"\n", pos); */
388
        fprintf(stderr, "warning: unrecognized: %s", pos);
389
        if (end_of_token) {
390
                fprintf(stderr, "%c%s", end_of_token, pos_e+1);
391
        }
392
        fprintf(stderr, "\n");
393
#endif
394
}
395
 
396
/* typedef struct { */
397
/*      const char *symbol1; */
398
/*      const char *symbol2; */
399
/*      int length; */
400
/*      weight_t *weight; */
401
/* } range_item_t; */
402
 
403
static void do_range(void)
404
{
405
        range_item_t *ri;
406
        weight_t w;
407
        int i;
408
        char *s;
409
        char *s1;
410
        char *s2;
411
        const char **ci;
412
        ll_item_t *lli;
413
 
414
        assert(!superset);
415
        assert(order_state == IN_ORDER);
416
 
417
        s1 = next_token();
418
        if (!s1) {
419
                error_msg("missing start of range");
420
        }
421
        if (!is_ucode(s1)) {
422
                error_msg("start of range is not a ucode: %s", s1);
423
        }
424
        s1 = xsymdup(s1);
425
 
426
        s2 = next_token();
427
        if (!s2) {
428
                error_msg("missing end of range");
429
        }
430
        if (!is_ucode(s2)) {
431
                error_msg("end of range is not a ucode: %s", s2);
432
        }
433
        s2 = xsymdup(s2);
434
 
435
        ri = (range_item_t *) xmalloc(sizeof(range_item_t));
436
        ri->symbol1 = s1;
437
        ri->symbol2 = s2;
438
        ri->length = strtoul(s2+2, NULL, 16) - strtoul(s1+2, NULL, 16);
439
        if (ri->length <= 0) {
440
                error_msg("illegal range length %d", ri->length);
441
        }
442
 
443
        s = next_token();
444
        w.num_weights = cur_num_weights;
445
 
446
        for (i=0 ; i < cur_num_weights ; i++) {
447
                w.rule[i] = cur_rule[i];
448
        }
449
        ci = w.colitem + (i-1);
450
        /* now i == cur_num_weights */
451
 
452
#define STR_DITTO "."
453
 
454
        while (s && *s && i) {
455
                --i;
456
                if (*s == ';') {
457
                        ci[-i] = xsymdup(STR_DITTO);
458
                        if (*++s) {
459
                                continue;
460
                        }
461
                }
462
                if (*s) {
463
                        ci[-i] = xsymdup(s);
464
                }
465
                s = next_token();
466
                if (s) {
467
                        if (*s == ';') {
468
                                ++s;
469
                        } else if (i) {
470
                                error_msg("missing seperator");
471
                        }
472
                }
473
        }
474
        if (s) {
475
                error_msg("too many weights: %d %d |%s| %d", cur_num_weights, i, s, (int)*s);
476
        }
477
 
478
        while (i) {                                     /* missing weights are not an error */
479
                --i;
480
                ci[-i] = xsymdup(STR_DITTO);
481
        }
482
 
483
        ri->weight = register_weight(&w);
484
 
485
/*      if ((i = is_ucode(t)) != 0) { */
486
/*              assert(!t[i]); */
487
/*              add_colitem(t, NULL); */
488
/*      } */
489
 
490
        lli = new_ll_item(DT_RANGE, ri);
491
        if (!cur_section->itm_list) {
492
/*              printf("creating new item list: %s\n", wi->symbol); */
493
                cur_section->itm_list = lli;
494
                lli->prev = lli->next = lli;
495
                ++cur_section->num_items;
496
        } else {
497
                insque(lli, cur_section->itm_list->prev);
498
/*              printf("adding item to list: %d - %s\n", ll_len(cur_section->itm_list), wi->symbol); */
499
                ++cur_section->num_items;
500
        }
501
/*      add_wi_index(lli); */
502
 
503
 
504
}
505
 
506
static weighted_item_t *add_weight(char *t)
507
{
508
        weighted_item_t *wi;
509
        weight_t w;
510
        int i;
511
        char *s;
512
        const char **ci;
513
 
514
        t = xsymdup(t);
515
 
516
        s = next_token();
517
        w.num_weights = cur_num_weights;
518
 
519
        for (i=0 ; i < cur_num_weights ; i++) {
520
                w.rule[i] = cur_rule[i];
521
        }
522
        ci = w.colitem + (i-1);
523
        /* now i == cur_num_weights */
524
 
525
        while (s && *s && i) {
526
                --i;
527
                if (*s == ';') {
528
                        ci[-i] = xsymdup(STR_DITTO);
529
                        if (*++s) {
530
                                continue;
531
                        }
532
                }
533
                if (*s) {
534
                        if (!strcmp(s,t)) {
535
                                s = STR_DITTO;
536
                        }
537
                        ci[-i] = xsymdup(s);
538
                }
539
                s = next_token();
540
                if (s) {
541
                        if (*s == ';') {
542
                                ++s;
543
                        } else if (i) {
544
                                error_msg("missing seperator");
545
                        }
546
                }
547
        }
548
        if (s) {
549
                error_msg("too many weights: %d %d |%s| %d", cur_num_weights, i, s, (int)*s);
550
        }
551
 
552
        while (i) {                                     /* missing weights are not an error */
553
                --i;
554
                ci[-i] = xsymdup(STR_DITTO);
555
        }
556
 
557
        wi = xmalloc(sizeof(weighted_item_t));
558
        wi->symbol = t;
559
        wi->weight = register_weight(&w);
560
 
561
        if ((i = is_ucode(t)) != 0) {
562
                assert(!t[i]);
563
                add_colitem(t, NULL);
564
        }
565
 
566
        return wi;
567
}
568
 
569
static void add_superset_weight(char *t)
570
{
571
        ll_item_t *lli;
572
        weighted_item_t *wi;
573
 
574
        if (!comm_cur_ptr
575
                || (strcmp(t, ((weighted_item_t *)(comm_cur_ptr->data))->symbol) != 0)
576
                ) {                                             /* now out of sync */
577
                if (superset_in_sync) { /* need a new section */
578
                        superset_in_sync = 0;
579
 
580
                        cur_section = new_section("R");
581
                        cur_num_weights = cur_section->num_rules
582
                                = ((section_t *)(cur_base->section_list->data))->num_rules;
583
                        memcpy(cur_rule,
584
                                   ((section_t *)(cur_base->section_list->data))->rules,
585
                                   MAX_COLLATION_WEIGHTS);
586
                        memcpy(cur_section->rules,
587
                                   ((section_t *)(cur_base->section_list->data))->rules,
588
                                   MAX_COLLATION_WEIGHTS);
589
 
590
                        insque(new_ll_item(DT_REORDER, cur_section), find_ll_last(cur_col->section_list));
591
                        assert(comm_prev_ptr);
592
                        lli = new_ll_item(DT_REORDER, cur_section);
593
                        lli->prev = lli->next = lli;
594
                        insque(lli, comm_prev_ptr);
595
/*                      fprintf(stderr, "  subsection -----------------------\n"); */
596
                }
597
 
598
/*              fprintf(stderr, "     %s   %s\n", t, ((weighted_item_t *)(comm_cur_ptr->data))->symbol); */
599
                wi = add_weight(t);
600
                lli = new_ll_item(DT_WEIGHTED, wi);
601
                mark_reordered(wi->symbol);
602
                /*                      printf("reorder: %s\n", t); */
603
                if (!cur_section->itm_list) {
604
                        cur_section->itm_list = lli;
605
                        lli->prev = lli->next = lli;
606
                        ++cur_section->num_items;
607
                } else {
608
                        insque(lli, cur_section->itm_list->prev);
609
                        ++cur_section->num_items;
610
                }
611
                add_wi_index(lli);
612
 
613
        } else {                                        /* in sync */
614
                superset_in_sync = 1;
615
                next_comm_ptr();
616
        }
617
}
618
 
619
static void do_weight(char *t)
620
{
621
        weighted_item_t *wi;
622
        ll_item_t *lli;
623
 
624
        if (superset) {
625
                add_superset_weight(t);
626
                return;
627
        }
628
 
629
        switch(order_state) {
630
                case 0:
631
/*                      fprintf(stdout, "no-order weight: %s\n", t); */
632
/*                      break; */
633
                case IN_ORDER:
634
                        /* in a section */
635
/*                      fprintf(stdout, "weight: %s\n", t); */
636
                        wi = add_weight(t);
637
                        lli = new_ll_item(DT_WEIGHTED, wi);
638
                        if (!cur_section->itm_list) {
639
/*                              fprintf(stdout, "creating new item list: %s  %s  %p\n", wi->symbol, cur_section->name, lli); */
640
                                cur_section->itm_list = lli;
641
                                lli->prev = lli->next = lli;
642
                                ++cur_section->num_items;
643
                        } else {
644
                                insque(lli, cur_section->itm_list->prev);
645
/*                              fprintf(stdout, "adding item to list: %d - %s  %p\n", ll_len(cur_section->itm_list), wi->symbol, lli); */
646
                                ++cur_section->num_items;
647
                        }
648
                        add_wi_index(lli);
649
                        break;
650
                case IN_REORDER:
651
                        /* std rule - but in a block with an insert-after pt */
652
                        wi = add_weight(t);
653
                        lli = new_ll_item(DT_WEIGHTED, wi);
654
                        mark_reordered(wi->symbol);
655
/*                      fprintf(stdout, "reorder: %s  %s  %p\n", t, cur_section->name, lli); */
656
                        if (!cur_section->itm_list) {
657
                                cur_section->itm_list = lli;
658
                                lli->prev = lli->next = lli;
659
                                ++cur_section->num_items;
660
                        } else {
661
                                insque(lli, cur_section->itm_list->prev);
662
                                ++cur_section->num_items;
663
                        }
664
                        add_wi_index(lli);
665
                        break;
666
                case IN_REORDER_SECTIONS:
667
                        t = xsymdup(t);
668
                        if (next_token() != NULL) {
669
                                error_msg("trailing text in reorder section item: %s", pos);
670
                        }
671
                        lli = cur_col->section_list;
672
                        do {
673
                                if (lli->data_type & DT_SECTION) {
674
                                        if (!strcmp(((section_t *)(lli->data))->name, t)) {
675
                                                lli->data_type = DT_REORDER;
676
                                                lli = new_ll_item(DT_REORDER, (section_t *)(lli->data));
677
                                                insque(lli, reorder_section_ptr);
678
                                                reorder_section_ptr = lli;
679
                                                return;
680
                                        }
681
                                }
682
                                lli = lli->next;
683
                        } while (lli);
684
                        error_msg("reorder_sections_after for non-base item currently not supported: %s", t);
685
/*                      fprintf(stdout, "reorder_secitons: %s\n", t); */
686
                        break;
687
                default:
688
                        error_msg("invalid order_state %d", order_state);
689
        }
690
}
691
 
692
static int col_locale_cmp(const void *n1, const void *n2)
693
{
694
    return strcmp(((const col_locale_t *) n1)->name, ((const col_locale_t *) n2)->name);
695
}
696
 
697
static void processfile(void)
698
{
699
        char *t;
700
        const keyword_table_t *k;
701
 
702
        order_state = 0;
703
#warning devel code
704
/*      cur_num_weights = 0; */
705
/*      cur_num_weights = 4; */
706
/*      memset(cur_rule, R_FORWARD, 4); */
707
 
708
        if (cur_col != cur_base) {
709
                cur_col->base_locale = cur_base;
710
                cur_col->undefined_idx = cur_base->undefined_idx;
711
                if (!cur_base->derived_list) {
712
                        cur_base->derived_list = new_ll_item(DT_COL_LOCALE, cur_col);
713
                } else {
714
                        insque(new_ll_item(DT_COL_LOCALE, cur_col), find_ll_last(cur_base->derived_list));
715
                }
716
        }
717
 
718
        if (tfind(cur_col, &root_col_locale, col_locale_cmp)) {
719
                error_msg("attempt to read locale: %s", cur_col->name);
720
        }
721
        if (!tsearch(cur_col, &root_col_locale, col_locale_cmp)) {
722
                error_msg("OUT OF MEMORY!");
723
        }
724
 
725
        if (superset) {
726
                superset_order_start_cnt = 0;
727
                superset_in_sync = 0;
728
                init_comm_ptr();
729
        }
730
 
731
        while (next_line()) {
732
/*              printf("%5d:", lineno[fno]); */
733
/*              while ((t = next_token()) != NULL) { */
734
/*                      printf(" |%s|", t); */
735
/*              printf("\n"); */
736
/*              } */
737
                t = next_token();
738
                assert(t);
739
                assert(t == pos);
740
                if ((*t == '<') || (!strcmp(t, "UNDEFINED"))) {
741
                        do_weight(t);
742
                } else {
743
                        for (k = keyword_table ; k->keyword ; k++) {
744
                                if (!strcmp(k->keyword, t)) {
745
                                        break;
746
                                }
747
                        }
748
                        k->handler();
749
                }
750
        }
751
 
752
        if (cur_base == cur_col) {
753
                fprintf(stderr, "Base: %15s", cur_col->name);
754
        } else {
755
#if 1
756
                if (!cur_col->undefined_idx) {
757
#if 0
758
                        if (superset) {
759
                                if (superset_order_start_cnt == 1) {
760
                                        --superset_order_start_cnt;     /* ugh.. hack this */
761
                                }
762
                        }
763
#endif
764
                        /* This is an awful hack to get around the problem of unspecified UNDEFINED
765
                         * definitions in the supported locales derived from iso14651_t1. */
766
                        if (!strcmp(cur_base->name, "iso14651_t1")) {
767
                                fprintf(stderr, "Warning: adding UNDEFINED entry for %s\n", cur_col->name);
768
                                strcpy(linebuf, "script <UNDEFINED_SECTION>\n");
769
                                pos_e = NULL;
770
                                pos = linebuf;
771
                                t = next_token();
772
                                assert(t);
773
                                assert(t == pos);
774
                                do_script();
775
                                strcpy(linebuf, "order_start <UNDEFINED_SECTION>;forward;backward;forward;forward,position\n");
776
                                pos_e = NULL;
777
                                pos = linebuf;
778
                                t = next_token();
779
                                assert(t);
780
                                assert(t == pos);
781
                                do_order_start();
782
                                strcpy(linebuf, "UNDEFINED IGNORE;IGNORE;IGNORE\n");
783
                                pos_e = NULL;
784
                                pos = linebuf;
785
                                t = next_token();
786
                                assert(t);
787
                                assert(t == pos);
788
                                do_weight(t);
789
                                strcpy(linebuf, "order_end\n");
790
                                pos_e = NULL;
791
                                pos = linebuf;
792
                                t = next_token();
793
                                assert(t);
794
                                assert(t == pos);
795
                                do_order_end();
796
                        } else {
797
                                error_msg("no definition of UNDEFINED for %s", cur_col->name);
798
                        }
799
                }
800
#endif
801
 
802
                fprintf(stderr, " Der: %15s", cur_col->name);
803
        }
804
        {
805
                ll_item_t *p = cur_col->section_list;
806
 
807
                fprintf(stderr, "%6u weights", tnumnodes(cur_col->root_wi_index));
808
                if (cur_base) {
809
                        fprintf(stderr, "  %6u der %6u reor %6u starter - %u new stubs",
810
                                        tnumnodes(cur_base->root_derived_wi),
811
                                        tnumnodes(cur_base->root_wi_index_reordered),
812
                                        tnumnodes(cur_base->root_starter_char),
813
                                        ll_count(cur_col->section_list, DT_REORDER));
814
                }
815
                fprintf(stderr, "\n");
816
 
817
#if 0
818
                while (p) {
819
                        assert(((section_t *)(p->data))->num_items ==
820
                                   ll_len(((section_t *)(p->data))->itm_list));
821
 
822
 
823
                        if (!p->next &&
824
                                ((*((section_t *)(p->data))->name == 'a')
825
                                 && (((section_t *)(p->data))->num_items == 0))
826
                                ) {
827
                                break;
828
                        }
829
 
830
                        if (!(p->data_type & DT_REORDER)) {
831
                                if ((*((section_t *)(p->data))->name != 'a')
832
                                        || (((section_t *)(p->data))->num_items > 0)
833
                                        ) {
834
                                        fprintf(stderr,
835
/*                                                      "\t%-15s %zu\n", */
836
                                                        "\t%-15s %6u\n",
837
                                                        ((section_t *)(p->data))->name,
838
                                                        ((section_t *)(p->data))->num_items);
839
                                }
840
                        }
841
                        p = p->next;
842
                }
843
#endif
844
        }
845
 
846
 
847
}
848
 
849
static void print_colnode(const void *ptr, VISIT order, int level)
850
{
851
    const colitem_t *p = *(const colitem_t **) ptr;
852
 
853
    if (order == postorder || order == leaf)  {
854
        printf("collating item = \"%s\"", p->string);
855
                if (p->element) {
856
                        printf(" is %s", p->element);
857
                }
858
        printf("\n");
859
    }
860
}
861
 
862
static void print_weight_node(const void *ptr, VISIT order, int level)
863
{
864
    const weight_t *p = *(const weight_t **) ptr;
865
        int i;
866
 
867
    if (order == postorder || order == leaf)  {
868
        printf("weight: (%d)  ", p->num_weights);
869
                for (i = 0 ; i < p->num_weights ; i++) {
870
                        if (p->rule[i] & R_FORWARD) {
871
                                printf("F");
872
                        }
873
                        if (p->rule[i] & R_BACKWARD) {
874
                                printf("B");
875
                        }
876
                        if (p->rule[i] & R_POSITION) {
877
                                printf("P");
878
                        }
879
                        printf(",");
880
                }
881
                for (i = 0 ; i < p->num_weights ; i++) {
882
                        printf("   %s", p->colitem[i]);
883
                }
884
        printf("\n");
885
    }
886
}
887
 
888
 
889
typedef struct {
890
        const char *der_name;
891
        int base_locale;
892
} deps_t;
893
 
894
enum {
895
        BASE_iso14651_t1,
896
        BASE_comm,
897
        BASE_cs_CZ,
898
        BASE_ar_SA,
899
        BASE_th_TH,
900
        BASE_ja_JP,
901
        BASE_ko_KR,
902
        BASE_MAX
903
};
904
 
905
static const char *base_name[] = {
906
        "iso14651_t1",
907
        "comm",
908
        "cs_CZ",
909
        "ar_SA",
910
        "th_TH",
911
        "ja_JP",
912
        "ko_KR"
913
};
914
 
915
 
916
 
917
static ll_item_t *locale_list[BASE_MAX];
918
 
919
static void init_locale_list(void)
920
{
921
        int i;
922
 
923
        for (i=0 ; i < BASE_MAX ; i++) {
924
                locale_list[i] = (ll_item_t *) xmalloc(sizeof(ll_item_t));
925
                locale_list[i]->prev = locale_list[i]->next = locale_list[i];
926
                locale_list[i]->data = (void *) base_name[i];
927
        }
928
}
929
 
930
 
931
deps_t deps[] = {
932
        { "af_ZA", BASE_iso14651_t1 },
933
        { "am_ET", BASE_iso14651_t1 },
934
        { "ar_AE", BASE_iso14651_t1 },
935
        { "ar_BH", BASE_iso14651_t1 },
936
        { "ar_DZ", BASE_iso14651_t1 },
937
        { "ar_EG", BASE_iso14651_t1 },
938
        { "ar_IN", BASE_iso14651_t1 },
939
        { "ar_IQ", BASE_iso14651_t1 },
940
        { "ar_JO", BASE_iso14651_t1 },
941
        { "ar_KW", BASE_iso14651_t1 },
942
        { "ar_LB", BASE_iso14651_t1 },
943
        { "ar_LY", BASE_iso14651_t1 },
944
        { "ar_MA", BASE_iso14651_t1 },
945
        { "ar_OM", BASE_iso14651_t1 },
946
        { "ar_QA", BASE_iso14651_t1 },
947
        { "ar_SA", BASE_ar_SA },
948
        { "ar_SD", BASE_iso14651_t1 },
949
        { "ar_SY", BASE_iso14651_t1 },
950
        { "ar_TN", BASE_iso14651_t1 },
951
        { "ar_YE", BASE_iso14651_t1 },
952
        { "az_AZ", BASE_iso14651_t1 },
953
        { "be_BY", BASE_iso14651_t1 },
954
        { "bg_BG", BASE_iso14651_t1 },
955
        { "bn_BD", BASE_iso14651_t1 },
956
        { "bn_IN", BASE_iso14651_t1 },
957
        { "br_FR", BASE_iso14651_t1 },
958
        { "bs_BA", BASE_iso14651_t1 },
959
        { "ca_ES", BASE_comm },
960
        { "cs_CZ", BASE_cs_CZ },
961
        { "cy_GB", BASE_iso14651_t1 },
962
        { "da_DK", BASE_comm },
963
        { "de_AT", BASE_iso14651_t1 },
964
        { "de_BE", BASE_iso14651_t1 },
965
        { "de_CH", BASE_iso14651_t1 },
966
        { "de_DE", BASE_iso14651_t1 },
967
        { "de_LU", BASE_iso14651_t1 },
968
        { "el_GR", BASE_iso14651_t1 },
969
        { "en_AU", BASE_iso14651_t1 },
970
        { "en_BW", BASE_iso14651_t1 },
971
        { "en_CA", BASE_comm },
972
        { "en_DK", BASE_iso14651_t1 },
973
        { "en_GB", BASE_iso14651_t1 },
974
        { "en_HK", BASE_iso14651_t1 },
975
        { "en_IE", BASE_iso14651_t1 },
976
        { "en_IN", BASE_iso14651_t1 },
977
        { "en_NZ", BASE_iso14651_t1 },
978
        { "en_PH", BASE_iso14651_t1 },
979
        { "en_SG", BASE_iso14651_t1 },
980
        { "en_US", BASE_iso14651_t1 },
981
        { "en_ZA", BASE_iso14651_t1 },
982
        { "en_ZW", BASE_iso14651_t1 },
983
        { "eo_EO", BASE_iso14651_t1 },
984
        { "es_AR", BASE_comm },
985
        { "es_BO", BASE_comm },
986
        { "es_CL", BASE_comm },
987
        { "es_CO", BASE_comm },
988
        { "es_CR", BASE_comm },
989
        { "es_DO", BASE_comm },
990
        { "es_EC", BASE_comm },
991
        { "es_ES", BASE_comm },
992
        { "es_GT", BASE_comm },
993
        { "es_HN", BASE_comm },
994
        { "es_MX", BASE_comm },
995
        { "es_NI", BASE_comm },
996
        { "es_PA", BASE_comm },
997
        { "es_PE", BASE_comm },
998
        { "es_PR", BASE_comm },
999
        { "es_PY", BASE_comm },
1000
        { "es_SV", BASE_comm },
1001
        { "es_US", BASE_comm },
1002
        { "es_UY", BASE_comm },
1003
        { "es_VE", BASE_comm },
1004
        { "et_EE", BASE_comm },
1005
        { "eu_ES", BASE_iso14651_t1 },
1006
        { "fa_IR", BASE_iso14651_t1 },
1007
        { "fi_FI", BASE_comm },
1008
        { "fo_FO", BASE_comm },
1009
        { "fr_BE", BASE_iso14651_t1 },
1010
        { "fr_CA", BASE_comm },
1011
        { "fr_CH", BASE_iso14651_t1 },
1012
        { "fr_FR", BASE_iso14651_t1 },
1013
        { "fr_LU", BASE_iso14651_t1 },
1014
        { "ga_IE", BASE_iso14651_t1 },
1015
        { "gd_GB", BASE_iso14651_t1 },
1016
        { "gl_ES", BASE_comm },
1017
        { "gv_GB", BASE_iso14651_t1 },
1018
        { "he_IL", BASE_iso14651_t1 },
1019
        { "hi_IN", BASE_iso14651_t1 },
1020
        { "hr_HR", BASE_comm },
1021
        { "hu_HU", BASE_iso14651_t1 },
1022
        { "hy_AM", BASE_iso14651_t1 },
1023
        { "id_ID", BASE_iso14651_t1 },
1024
        { "is_IS", BASE_comm },
1025
        { "it_CH", BASE_iso14651_t1 },
1026
        { "it_IT", BASE_iso14651_t1 },
1027
        { "iw_IL", BASE_iso14651_t1 },
1028
        { "ja_JP", BASE_ja_JP },
1029
        { "ka_GE", BASE_iso14651_t1 },
1030
        { "kl_GL", BASE_comm },
1031
        { "ko_KR", BASE_ko_KR },
1032
        { "kw_GB", BASE_iso14651_t1 },
1033
        { "lt_LT", BASE_comm },
1034
        { "lv_LV", BASE_comm },
1035
        { "mi_NZ", BASE_iso14651_t1 },
1036
        { "mk_MK", BASE_iso14651_t1 },
1037
        { "mr_IN", BASE_iso14651_t1 },
1038
        { "ms_MY", BASE_iso14651_t1 },
1039
        { "mt_MT", BASE_iso14651_t1 },
1040
        { "nl_BE", BASE_iso14651_t1 },
1041
        { "nl_NL", BASE_iso14651_t1 },
1042
        { "nn_NO", BASE_iso14651_t1 },
1043
        { "no_NO", BASE_comm },
1044
        { "oc_FR", BASE_iso14651_t1 },
1045
        { "pl_PL", BASE_comm },
1046
        { "pt_BR", BASE_iso14651_t1 },
1047
        { "pt_PT", BASE_iso14651_t1 },
1048
        { "ro_RO", BASE_iso14651_t1 },
1049
        { "ru_RU", BASE_iso14651_t1 },
1050
        { "ru_UA", BASE_iso14651_t1 },
1051
        { "se_NO", BASE_iso14651_t1 },
1052
        { "sk_SK", BASE_cs_CZ },
1053
        { "sl_SI", BASE_comm },
1054
        { "sq_AL", BASE_iso14651_t1 },
1055
        { "sr_YU", BASE_iso14651_t1 },
1056
        { "sv_FI", BASE_comm },
1057
        { "sv_SE", BASE_iso14651_t1 },
1058
        { "ta_IN", BASE_iso14651_t1 },
1059
        { "te_IN", BASE_iso14651_t1 },
1060
        { "tg_TJ", BASE_iso14651_t1 },
1061
        { "th_TH", BASE_th_TH },
1062
        { "ti_ER", BASE_iso14651_t1 },
1063
        { "ti_ET", BASE_iso14651_t1 },
1064
        { "tl_PH", BASE_iso14651_t1 },
1065
        { "tr_TR", BASE_comm },
1066
        { "tt_RU", BASE_iso14651_t1 },
1067
        { "uk_UA", BASE_iso14651_t1 },
1068
        { "ur_PK", BASE_iso14651_t1 },
1069
        { "uz_UZ", BASE_iso14651_t1 },
1070
        { "vi_VN", BASE_iso14651_t1 },
1071
        { "wa_BE", BASE_iso14651_t1 },
1072
        { "yi_US", BASE_iso14651_t1 },
1073
        { "zh_CN", BASE_iso14651_t1 },
1074
        { "zh_HK", BASE_iso14651_t1 },
1075
        { "zh_SG", BASE_iso14651_t1 },
1076
        { "zh_TW", BASE_iso14651_t1 },
1077
};
1078
 
1079
 
1080
static int der_count[BASE_MAX];
1081
static const char *new_args[500];
1082
static int new_arg_count;
1083
 
1084
static int dep_cmp(const void *s1, const void *s2)
1085
{
1086
        return strcmp( (const char *) s1, ((const deps_t *) s2)->der_name);
1087
}
1088
 
1089
static int old_main(int argc, char **argv);
1090
 
1091
int main(int argc, char **argv)
1092
{
1093
        const deps_t *p;
1094
        ll_item_t *lli;
1095
        int i;
1096
        int total;
1097
 
1098
        if (argc < 2) {
1099
                return EXIT_FAILURE;
1100
        }
1101
 
1102
        init_locale_list();
1103
 
1104
        while (--argc) {
1105
                p = (const deps_t *) bsearch(*++argv, deps, sizeof(deps)/sizeof(deps[0]), sizeof(deps[0]), dep_cmp);
1106
                if (!p) {
1107
                        if (!strcmp("C", *argv)) {
1108
                                printf("ignoring C locale\n");
1109
                                continue;
1110
                        } else {
1111
                                printf("%s not found\n", *argv);
1112
                                return EXIT_FAILURE;
1113
                        }
1114
                }
1115
 
1116
                i = p->base_locale;
1117
                ++der_count[i];
1118
 
1119
                if (!strcmp(base_name[i], *argv)) {
1120
                        /* same name as base, so skip after count incremented */
1121
                        continue;
1122
                }
1123
 
1124
                /* add it to the list.  the main body will catch duplicates */
1125
                lli = (ll_item_t *) xmalloc(sizeof(ll_item_t));
1126
                lli->prev = lli->next = NULL;
1127
                lli->data = (void *) *argv;
1128
                insque(lli, locale_list[i]);
1129
        }
1130
 
1131
        total = 0;
1132
        for (i=0 ; i < BASE_MAX ; i++) {
1133
/*              printf("der_count[%2d] = %3d\n", i, der_count[i]); */
1134
                total += der_count[i];
1135
        }
1136
/*      printf("total = %d\n", total); */
1137
 
1138
        new_args[new_arg_count++] = "dummyprogramname";
1139
        for (i=0 ; i < BASE_MAX ; i++) {
1140
                if (!der_count[i]) {
1141
                        continue;
1142
                }
1143
                new_args[new_arg_count++] = (i == BASE_comm) ? "-c" : "-b";
1144
                lli = locale_list[i];
1145
                do {
1146
                        new_args[new_arg_count++] = (const char *) (lli->data);
1147
                        lli = lli->next;
1148
                } while (lli != locale_list[i]);
1149
                new_args[new_arg_count++] = "-f";
1150
        }
1151
 
1152
/*      for (i=0 ; i < new_arg_count ; i++) { */
1153
/*              printf("%3d: %s\n", i, new_args[i]); */
1154
/*      } */
1155
 
1156
        return old_main(new_arg_count, (char **) new_args);
1157
}
1158
 
1159
 
1160
/* usage...  prog -b basefile derived {derived} -s single {single} */
1161
 
1162
static int old_main(int argc, char **argv)
1163
{
1164
        int next_is_base = 0;
1165
        int next_is_subset = 0;
1166
 
1167
        superset = 0;
1168
 
1169
        while (--argc) {
1170
                ++argv;
1171
                if (**argv == '-') {
1172
                        if ((*argv)[1] == 'd') {
1173
                                dump_weights((*argv) + 2);
1174
                        } else if ((*argv)[1] == 'f') { /* dump all weight rules */
1175
                                finalize_base();
1176
                        } else if ((*argv)[1] == 'R') { /* dump all weight rules */
1177
                                twalk(root_weight, print_weight_node);
1178
                        } else if (((*argv)[1] == 'c') && !(*argv)[2]) { /* new common subset */
1179
                                cur_base = cur_derived = NULL;
1180
                                next_is_subset = 1;
1181
                                next_is_base = 1;
1182
                                superset = 0;
1183
                        } else if (((*argv)[1] == 'b') && !(*argv)[2]) { /* new base locale */
1184
                                cur_base = cur_derived = NULL;
1185
                                next_is_subset = 0;
1186
                                next_is_base = 1;
1187
                                superset = 0;
1188
                        } else if (((*argv)[1] == 's') && !(*argv)[2]) { /* single locales follow */
1189
                                cur_base = cur_derived = NULL;
1190
                                next_is_subset = 0;
1191
                                next_is_base = 2;
1192
                                superset = 0;
1193
                        } else {
1194
                                error_msg("unrecognized option %s", *argv);
1195
                        }
1196
                        continue;
1197
                }
1198
                /* new file */
1199
                new_col_locale(*argv);  /* automaticly sets cur_col */
1200
                if (next_is_base) {
1201
                        cur_base = cur_col;
1202
                } else {
1203
                        cur_derived = cur_col;
1204
                }
1205
                pushfile(*argv);
1206
/*              fprintf(stderr, "processing file %s\n", *argv); */
1207
                processfile();                  /* this does a popfile */
1208
 
1209
/*              twalk(cur_col->root_colitem, print_colnode); */
1210
 
1211
                if (next_is_base == 1) {
1212
                        next_is_base = 0;
1213
                }
1214
                if (next_is_subset) {
1215
                        next_is_subset = 0;
1216
                        superset = 1;
1217
                }
1218
        }
1219
 
1220
        fprintf(stderr, "success!\n");
1221
        fprintf(stderr,
1222
/*                      "num_sym=%zu mem_sym=%zu  unique_weights=%zu\n", */
1223
                        "num_sym=%u mem_sym=%u  unique_weights=%u\n",
1224
                        num_sym, mem_sym, unique_weights);
1225
/*      twalk(root_weight, print_weight_node); */
1226
 
1227
        fprintf(stderr, "num base locales = %d    num derived locales = %d\n",
1228
                        base_locale_len, der_locale_len);
1229
 
1230
        fprintf(stderr,
1231
                        "override_len = %d      multistart_len = %d    weightstr_len = %d\n"
1232
                        "wcs2colidt_len = %d    index2weight_len = %d  index2ruleidx_len = %d\n"
1233
                        "ruletable_len = %d\n"
1234
                        "total size is %d bytes or %d kB\n",
1235
                        override_len, multistart_len, weightstr_len,
1236
                        wcs2colidt_len, index2weight_len, index2ruleidx_len,
1237
                        ruletable_len,
1238
#warning mult by 2 for rule indecies
1239
                        (override_len + multistart_len + weightstr_len
1240
                         + wcs2colidt_len + index2weight_len + index2ruleidx_len + ruletable_len) * 2,
1241
                        (override_len + multistart_len + weightstr_len
1242
                         + wcs2colidt_len + index2weight_len + index2ruleidx_len + ruletable_len + 511) / 512);
1243
 
1244
#if 0
1245
        {
1246
                int i;
1247
 
1248
                for (i=0 ; i < base_locale_len ; i++) {
1249
                        dump_base_locale(i);
1250
                }
1251
                for (i=0 ; i < der_locale_len ; i++) {
1252
                        dump_der_locale(i);
1253
                }
1254
        }
1255
#endif
1256
 
1257
        {
1258
                FILE *fp = fopen("locale_collate.h", "w");
1259
 
1260
                if (!fp) {
1261
                        error_msg("couldn't open output file!");
1262
                }
1263
                dump_collate(fp);
1264
                if (ferror(fp) || fclose(fp)) {
1265
                        error_msg("write error or close error for output file!\n");
1266
                }
1267
        }
1268
 
1269
    return EXIT_SUCCESS;
1270
}
1271
 
1272
static void error_msg(const char *fmt, ...)
1273
{
1274
        va_list arg;
1275
 
1276
        fprintf(stderr, "Error: ");
1277
        if (fno >= 0) {
1278
            fprintf(stderr, "file %s (%d): ", fname[fno], lineno[fno]);
1279
        }
1280
        va_start(arg, fmt);
1281
        vfprintf(stderr, fmt, arg);
1282
        va_end(arg);
1283
        fprintf(stderr, "\n");
1284
 
1285
        exit(EXIT_FAILURE);
1286
}
1287
 
1288
static void pushfile(char *filename)
1289
{
1290
        static char fbuf[PATH_MAX];
1291
 
1292
        snprintf(fbuf, PATH_MAX, "collation/%s", filename);
1293
 
1294
        if (fno >= MAX_FNO) {
1295
                error_msg("file stack size exceeded");
1296
        }
1297
 
1298
        if (!(fstack[++fno] = fopen(fbuf, "r"))) {
1299
                --fno;                                  /* oops */
1300
                error_msg("cannot open file %s", fbuf);
1301
        }
1302
 
1303
        fname[fno] = xsymdup(filename);
1304
        lineno[fno] = 0;
1305
}
1306
 
1307
static void popfile(void)
1308
{
1309
        if (fno < 0) {
1310
                error_msg("pop on empty file stack");
1311
        }
1312
 
1313
/*      free(fname[fno]); */
1314
        fclose(fstack[fno]);
1315
        --fno;
1316
}
1317
 
1318
static void eatwhitespace(void)
1319
{
1320
        while (isspace(*pos)) {
1321
                ++pos;
1322
        }
1323
}
1324
 
1325
static int iscommentchar(int c)
1326
{
1327
        return ((c == '#') || (c == '%'));
1328
}
1329
 
1330
static int next_line(void)
1331
{
1332
        size_t n;
1333
        char *s = linebuf;
1334
 
1335
        assert(fno >= 0);
1336
 
1337
        pos_e = NULL;
1338
        do {
1339
                if (fgets(s, sizeof(linebuf), fstack[fno]) != NULL) {
1340
                        ++lineno[fno];
1341
                        n = strlen(linebuf);
1342
                        if ((n == sizeof(linebuf) - 1) && (linebuf[n-1] != '\n')) {
1343
                                /* Either line is too long or last line is very long with
1344
                                 * no trailing newline.  But we'll always treat it as an
1345
                                 * errro. */
1346
                                error_msg("line too long?");
1347
                        }
1348
 
1349
                        --n;
1350
                        /* Be careful... last line doesn't need a newline. */
1351
                        if (linebuf[n] == '\n') {
1352
                                linebuf[n--] = 0;        /* trim trailing newline */
1353
                        }
1354
 
1355
                        pos = linebuf;
1356
                        eatwhitespace();
1357
                        if (*pos && !iscommentchar(*pos)) { /* not empty or comment line */
1358
                                return 1;               /* got a line */
1359
                        }
1360
                } else {                                /* eof */
1361
                        popfile();
1362
                }
1363
        } while (fno >= 0);
1364
 
1365
        return 0;
1366
}
1367
 
1368
static char *next_token(void)
1369
{
1370
        char *p;
1371
 
1372
#if 0
1373
        if (pos_e == NULL) {
1374
                return NULL
1375
                pos = pos_e;
1376
                *pos = end_of_token;
1377
                end_of_token = 0;
1378
        }
1379
#else
1380
        if (pos_e != NULL) {
1381
                pos = pos_e;
1382
                *pos = end_of_token;
1383
                end_of_token = 0;
1384
        }
1385
#endif
1386
        eatwhitespace();
1387
        p = pos;
1388
 
1389
        if (!*p || iscommentchar(*p)) { /* end of line or start of comment */
1390
                pos = pos_e = NULL;
1391
                *p = 0;                                  /* treat comment as end of line */
1392
/*              fprintf(stdout, "returning NUL token |%s|\n", pos); */
1393
                return NULL;
1394
#if 1
1395
        } else if (*p == '<') {  /* collating symbol, element, or value */
1396
                while (*++p) {
1397
                        if ((*p == '/') && p[1]) {
1398
                                ++p;
1399
                                continue;
1400
                        }
1401
                        if (*p == '>') {
1402
                                pos_e = ++p;
1403
                                end_of_token = *p;
1404
                                *p = 0;
1405
/*                              fprintf(stdout, "returning col token |%s|\n", pos); */
1406
                                return pos;
1407
                        }
1408
                }
1409
        } else if (*p == '"') {         /* collating element value? */
1410
                while (*++p) {
1411
                        if (*p == '"') {        /* found the end of the quoted string */
1412
                                pos_e = ++p;
1413
                                end_of_token = *p;
1414
                                *p = 0;
1415
/*                              fprintf(stdout, "returning quote token |%s|\n", pos); */
1416
                                return pos;
1417
                        }
1418
                }
1419
#endif
1420
        } else {                                        /* some kind of keyword */
1421
                while (*++p) {
1422
                        if (isspace(*p) || (*p == ';')) {
1423
                                break;
1424
                        }
1425
                }
1426
                pos_e = p;
1427
                end_of_token = *p;
1428
                *p = 0;
1429
/*              fprintf(stdout, "returning key token |%s|\n", pos); */
1430
                return pos;
1431
        }
1432
 
1433
        error_msg("illegal token |%s|", pos);
1434
}
1435
 
1436
static void *xmalloc(size_t n)
1437
{
1438
        void *p;
1439
 
1440
        if (!(p = malloc(n))) {
1441
                error_msg("OUT OF MEMORY");
1442
        }
1443
        return p;
1444
}
1445
 
1446
static void do_copy(void)
1447
{
1448
        char *s;
1449
        char *e;
1450
 
1451
        if ((s = next_token()) != NULL) {
1452
                e = strchr(s + 1, '"');
1453
                if ((*s == '"') && e && (*e == '"') && !e[1]) {
1454
                        if (next_token() != NULL) {
1455
                                error_msg("illegal trailing text: %s", pos);
1456
                        }
1457
                        *e = 0;
1458
                        ++s;
1459
                        if (cur_base && !strcmp(cur_base->name,s)) {
1460
/*                              fprintf(stderr, "skipping copy of base file %s\n", s); */
1461
#warning need to update last in order and position or check
1462
                                return;
1463
                        }
1464
/*                      fprintf(stderr, "full copy of %s\n", s); */
1465
                        pushfile(s);
1466
                        return;
1467
                }
1468
        }
1469
        error_msg("illegal or missing arg for copy: %s", s);
1470
}
1471
 
1472
static void do_colsym(void)
1473
{
1474
        char *s;
1475
        char *e;
1476
 
1477
        if ((s = next_token()) != NULL) {
1478
                e = strrchr(s,'>');
1479
                if ((*s == '<') && e && (*e == '>') && !e[1]) {
1480
                        if (next_token() != NULL) {
1481
                                error_msg("illegal trailing text: %s", pos);
1482
                        }
1483
                        e[1] = 0; /* cleanup in case next_token stored something */
1484
                        add_colitem(s,NULL);
1485
                        return;
1486
                }
1487
        }
1488
        error_msg("illegal or missing arg for collating-symbol: %s", s);
1489
}
1490
 
1491
static void do_colele(void)
1492
{
1493
        char *s;
1494
        char *e;
1495
        char *s1;
1496
        char *e1;
1497
        int n;
1498
 
1499
        if ((s = next_token()) != NULL) {
1500
                e = strrchr(s,'>');
1501
                if ((*s == '<') && e && (*e == '>') && !e[1]) {
1502
                        if (((s1 = next_token()) == NULL)
1503
                                || (strcmp(s1,"from") != 0)
1504
                                || ((s1 = next_token()) == NULL)
1505
                                || (*s1 != '\"')
1506
                                ) {
1507
                                error_msg("illegal format for collating-element spec");
1508
                        }
1509
                        e1 = strchr(s1 + 1, '"');
1510
                        if ((*s1 != '"') || !e1 || (*e1 != '"') || (e1[1] != 0)) {
1511
                                error_msg("illegal definition for collating-element: %s", s1);
1512
                        }
1513
                        if (next_token() != NULL) {
1514
                                error_msg("illegal trailing text: %s", pos);
1515
                        }
1516
                        e[1] = 0; /* cleanup in case next_token stored something */
1517
                        e1[1] = 0;
1518
                        add_colitem(s,s1);
1519
                        ++s1;
1520
                        if (!(n = is_ucode(s1))) {
1521
                                error_msg("starting char must be a <U####> code: %s", s1);
1522
                        }
1523
                        assert(s1[n] == '<');
1524
                        s1[n] = 0;
1525
                        s = xsymdup(s1);
1526
                        if (!(tsearch(s, &cur_base->root_starter_char, sym_cmp))) {
1527
                                error_msg("OUT OF MEMORY");
1528
                        }
1529
 
1530
                        return;
1531
                }
1532
        }
1533
        error_msg("illegal or missing arg for collating-element: %s", s);
1534
}
1535
 
1536
static ll_item_t *find_section_list_item(const char *name, col_locale_t *loc)
1537
{
1538
        ll_item_t *p;
1539
 
1540
        if (!loc) {
1541
                return NULL;
1542
        }
1543
 
1544
        p = loc->section_list;
1545
 
1546
        while (p) {
1547
#warning devel code
1548
/*              if (!((p->data_type == DT_SECTION) || (p->data_type == DT_REORDER))) { */
1549
/*                      fprintf(stderr, "fsli = %d\n", p->data_type); */
1550
/*              } */
1551
                assert((p->data_type == DT_SECTION) || (p->data_type == DT_REORDER));
1552
                if (!strcmp(name, ((section_t *)(p->data))->name)) {
1553
                        break;
1554
                }
1555
                p = p->next;
1556
        }
1557
        return p;
1558
}
1559
 
1560
static ll_item_t *find_ll_last(ll_item_t *p)
1561
{
1562
        assert(p);
1563
 
1564
        while (p->next) {
1565
                p = p->next;
1566
        }
1567
        return p;
1568
}
1569
 
1570
static void do_script(void)
1571
{
1572
        char *s;
1573
        char *e;
1574
 
1575
        if ((s = next_token()) != NULL) {
1576
                e = strrchr(s,'>');
1577
                if ((*s == '<') && e && (*e == '>') && !e[1]) {
1578
                        if (next_token() != NULL) {
1579
                                error_msg("illegal trailing text: %s", pos);
1580
                        }
1581
                        e[1] = 0; /* cleanup in case next_token stored something */
1582
                        add_script(s);
1583
                        return;
1584
                }
1585
        }
1586
        error_msg("illegal or missing arg for script: %s", s);
1587
}
1588
 
1589
static col_locale_t *new_col_locale(char *name)
1590
{
1591
        ll_item_t *lli;
1592
        ll_item_t *lli2;
1593
 
1594
        cur_col = (col_locale_t *) xmalloc(sizeof(col_locale_t));
1595
        cur_col->name = name;
1596
        cur_col->root_colitem = NULL;
1597
        cur_col->root_element = NULL;
1598
        cur_col->root_scripts = NULL;
1599
        cur_col->base_locale = NULL;
1600
        if (!superset) {
1601
                /* start with an anonymous section */
1602
                cur_section = new_section(NULL);
1603
                cur_col->section_list = new_ll_item(DT_SECTION, cur_section);
1604
        } else {
1605
                /* start with a reorder section */
1606
                cur_section = new_section("R");
1607
                cur_num_weights = cur_section->num_rules
1608
                        = ((section_t *)(cur_base->section_list->data))->num_rules;
1609
                memcpy(cur_rule,
1610
                           ((section_t *)(cur_base->section_list->data))->rules,
1611
                           MAX_COLLATION_WEIGHTS);
1612
                memcpy(cur_section->rules,
1613
                           ((section_t *)(cur_base->section_list->data))->rules,
1614
                           MAX_COLLATION_WEIGHTS);
1615
                cur_col->section_list = new_ll_item(DT_REORDER, cur_section);
1616
                assert(cur_base->section_list->next == NULL); /* currently only one section allowed */
1617
                lli = ((section_t *)(cur_base->section_list->data))->itm_list;
1618
                assert(lli);
1619
                lli2 = new_ll_item(DT_REORDER, cur_section);
1620
                lli2->prev = lli2->next = lli2;
1621
                insque(lli2, lli->prev);
1622
                ((section_t *)(cur_base->section_list->data))->itm_list = lli2;
1623
        }
1624
/*      cur_col->section_list = NULL; */
1625
/*      add_script(((section_t *)(cur_col->section_list->data))->name); */
1626
        cur_col->root_wi_index = NULL;
1627
        cur_col->root_wi_index_reordered = NULL;
1628
        cur_col->root_derived_wi = NULL;
1629
        cur_col->derived_list = NULL;
1630
        cur_col->root_starter_char = NULL;
1631
        cur_col->root_starter_all = NULL;
1632
        cur_col->undefined_idx = NULL;
1633
        return cur_col;
1634
}
1635
 
1636
static int colitem_cmp(const void *n1, const void *n2)
1637
{
1638
    return strcmp(((colitem_t *)n1)->string, ((colitem_t *)n2)->string);
1639
}
1640
 
1641
static int colelement_cmp(const void *n1, const void *n2)
1642
{
1643
    int r;
1644
 
1645
    r = strcmp(((colitem_t *)n1)->string, ((colitem_t *)n2)->string);
1646
    if (!r) {
1647
                if (((colitem_t *)n1)->element && ((colitem_t *)n2)->element) {
1648
                        r = strcmp(((colitem_t *)n1)->element, ((colitem_t *)n2)->element);
1649
                } else if (((colitem_t *)n1)->element == ((colitem_t *)n2)->element) {
1650
                        r = 0;                           /* both null */
1651
                } else {
1652
                        r = (((colitem_t *)n1)->element == NULL) ? -1 : 1;
1653
                }
1654
    }
1655
    return r;
1656
}
1657
 
1658
static void del_colitem(colitem_t *p)
1659
{
1660
/*     free((void *) p->element); */
1661
/*     free((void *) p->string); */
1662
    free(p);
1663
}
1664
 
1665
static colitem_t *new_colitem(char *item, char *def)
1666
{
1667
        colitem_t *p;
1668
 
1669
        p = xmalloc(sizeof(colitem_t));
1670
        p->string = xsymdup(item);
1671
        p->element = (!def) ? def : xsymdup(def);
1672
 
1673
        return p;
1674
}
1675
 
1676
static void add_colitem(char *item, char *def)
1677
{
1678
        colitem_t *p;
1679
 
1680
#if 0
1681
        printf("adding collation item %s", item);
1682
        if (def) {
1683
                printf(" with definition %s", def);
1684
        }
1685
        printf("\n");
1686
#endif
1687
 
1688
        p = new_colitem(item, def);
1689
 
1690
#warning devel code
1691
        if (superset) {
1692
                if (tfind(p, &cur_base->root_colitem, colitem_cmp)) {
1693
/*                      fprintf(stderr, "skipping superset duplicate collating item \"%s\"\n", p->string); */
1694
                        del_colitem(p);
1695
                        return;
1696
/*              } else { */
1697
/*                      fprintf(stderr, "superset: new collating item \"%s\" = %s\n", p->string, p->element); */
1698
                }
1699
        }
1700
 
1701
        if (cur_col == cur_derived) {
1702
                if (!tfind(p, &cur_base->root_colitem, colitem_cmp)) {
1703
                        /* not in current but could be in base */
1704
                        if (!tsearch(p, &cur_base->root_colitem, colitem_cmp)) {
1705
                                error_msg("OUT OF MEMORY!");
1706
                        }
1707
                } else if (!tfind(p,  &cur_base->root_colitem, colelement_cmp)) {
1708
                        error_msg("collating element/symbol mismatch: item=%s def=%s", item, def);
1709
                }
1710
        }
1711
 
1712
 
1713
        if (!tfind(p, &cur_col->root_colitem, colitem_cmp)) {
1714
                /* not in current but could be in base */
1715
                if (!tsearch(p, &cur_col->root_colitem, colitem_cmp)) {
1716
                        error_msg("OUT OF MEMORY!");
1717
                }
1718
        } else if (!tfind(p,  &cur_col->root_colitem, colelement_cmp)) {
1719
                error_msg("collating element/symbol mismatch");
1720
        } else {                                        /* already there */
1721
                fprintf(stderr, "duplicate collating item \"%s\"\n", p->string);
1722
                del_colitem(p);
1723
        }
1724
}
1725
 
1726
/* add a script (section) to the current locale */
1727
static void add_script(const char *s)
1728
{
1729
        ll_item_t *l;
1730
 
1731
        /* make sure it isn't in base if working with derived */
1732
        if (cur_base != cur_col) {
1733
                if (find_section_list_item(s, cur_base)) {
1734
                        error_msg("attempt to add script %s for derived when already in base", s);
1735
                }
1736
        }
1737
 
1738
        if (find_section_list_item(s, cur_col)) {
1739
                error_msg("attempt to readd script %s", s);
1740
        }
1741
 
1742
        l = find_ll_last(cur_col->section_list);
1743
        insque(new_ll_item(DT_SECTION, new_section(s)), l);
1744
}
1745
 
1746
static const char str_forward[] =  "forward";
1747
static const char str_backward[] = "backward";
1748
static const char str_position[] = "position";
1749
 
1750
static void do_order_start(void)
1751
{
1752
        const char *s;
1753
        char *e;
1754
        ll_item_t *l;
1755
        section_t *sect;
1756
        int rule;
1757
 
1758
        if (order_state & ~IN_ORDER) {
1759
                error_msg("order_start following reorder{_sections}_after");
1760
        }
1761
        order_state |= IN_ORDER;
1762
 
1763
        if (superset) {
1764
                if (++superset_order_start_cnt > 1) {
1765
                        error_msg("currently only a common order_start is supported in superset");
1766
                }
1767
                return;
1768
        }
1769
 
1770
        if (!(s = next_token())) {
1771
                s = str_forward;                /* if no args */
1772
        }
1773
 
1774
        if (*s == '<') {                /* section (script) */
1775
                e = strrchr(s,'>');
1776
                if ((*s == '<') && e && (*e == '>') && !e[1]) {
1777
                        e[1] = 0; /* cleanup in case next_token stored something */
1778
 
1779
                        if (!(l = find_section_list_item(s, cur_col))) {
1780
                                error_msg("ref of undefined sections: %s", s);
1781
                        }
1782
                        sect = (section_t *)(l->data);
1783
                        if (sect->num_rules) {
1784
                                error_msg("sections already defined: %s", s);
1785
                        }
1786
                } else {
1787
                        error_msg("illegal section ref: %s", s);
1788
                }
1789
 
1790
                if (!(s = next_token())) {
1791
                        s = str_forward;                /* if no args */
1792
                } else if (*s != ';') {
1793
                        error_msg("missing seperator!");
1794
                }
1795
        } else {                                        /* need an anonymous section */
1796
                if ((*cur_section->name != '<') && (cur_section->num_items == 0)) { /* already in an empty anonymous section */
1797
                        sect = cur_section;
1798
/*                      fprintf(stdout, "using empty anon section %s\n", sect->name); */
1799
                } else {
1800
                        sect = new_section(NULL);
1801
                        l = find_ll_last(cur_col->section_list);
1802
                        insque(new_ll_item(DT_SECTION, sect), l);
1803
/*                      fprintf(stdout, "adding order section after section %s\n", ((section_t *)(l->data))->name); */
1804
/*                      fprintf(stdout, "    last section is %s\n", ((section_t *)(l->next->data))->name); */
1805
                }
1806
                sect->num_rules = 0;     /* setting this below so nix default */
1807
        }
1808
        cur_section = sect;
1809
/*      fprintf(stdout, "cur_section now %s\n", cur_section->name); */
1810
 
1811
#warning need to add section to weight list?
1812
 
1813
        /* now do rules */
1814
        do {
1815
                rule = 0;
1816
                if (*s == ';') {
1817
                        ++s;
1818
                }
1819
                while (*s) {
1820
                        if (!strncmp(str_forward, s, 7)) {
1821
                                rule |= R_FORWARD;
1822
                                s += 7;
1823
                        } else if (!strncmp(str_backward, s, 8)) {
1824
                                rule |= R_BACKWARD;
1825
                                s += 8;
1826
                        } else if (!strncmp(str_position, s, 8)) {
1827
                                rule |= R_POSITION;
1828
                                s += 8;
1829
                        }
1830
 
1831
                        if (*s == ',') {
1832
                                ++s;
1833
                                continue;
1834
                        }
1835
 
1836
                        if (!*s || (*s == ';')) {
1837
                                if (sect->num_rules >= MAX_COLLATION_WEIGHTS) {
1838
                                        error_msg("more than %d weight rules!", MAX_COLLATION_WEIGHTS);
1839
                                }
1840
                                if (!rule) {
1841
                                        error_msg("missing weight rule!");
1842
                                }
1843
                                if ((rule & (R_FORWARD|R_BACKWARD|R_POSITION)) > R_BACKWARD) {
1844
                                        error_msg("backward paired with  forward and/or position!");
1845
                                }
1846
 
1847
                                sect->rules[sect->num_rules++] = rule;
1848
                                rule = 0;
1849
                                continue;
1850
                        }
1851
 
1852
                        error_msg("illegal weight rule: %s", s);
1853
                }
1854
        } while ((s = next_token()) != NULL);
1855
 
1856
        cur_section = sect;
1857
 
1858
/*      fprintf(stderr, "setting cur_num_weights to %d for %s\n", sect->num_rules, sect->name); */
1859
        cur_num_weights = sect->num_rules;
1860
        memcpy(cur_rule, sect->rules, MAX_COLLATION_WEIGHTS);
1861
}
1862
 
1863
static void do_order_end(void)
1864
{
1865
        if (!(order_state & IN_ORDER)) {
1866
                error_msg("order_end with no matching order_start");
1867
        }
1868
        order_state &= ~IN_ORDER;
1869
 
1870
        cur_section = new_section(NULL);
1871
}
1872
 
1873
static void do_reorder_after(void)
1874
{
1875
        char *t;
1876
        ll_item_t *lli;
1877
        const weight_t *w;
1878
        int save_cur_num_weights;
1879
        char save_cur_rule[MAX_COLLATION_WEIGHTS];
1880
 
1881
 
1882
        if (order_state & ~IN_REORDER) {
1883
                error_msg("reorder_after following order_start or reorder_sections_after");
1884
        }
1885
        order_state |= IN_REORDER;
1886
 
1887
        if (superset) {
1888
                error_msg("currently reorder_after is not supported in supersets");
1889
        }
1890
 
1891
#warning have to use rule for current section!!!
1892
 
1893
        if (!(t = next_token())) {
1894
                error_msg("missing arg for reorder_after");
1895
        }
1896
 
1897
        t = xsymdup(t);
1898
 
1899
        if (next_token() != NULL) {
1900
                error_msg("trailing text reorder_after: %s", pos);
1901
        }
1902
 
1903
        if (cur_col == cur_base) {
1904
                error_msg("sorry.. reorder_after in base locale is not currently supported");
1905
        }
1906
 
1907
        if (!(lli = find_wi_index(t, cur_base))) {
1908
                error_msg("reorder_after for non-base item currently not supported: %s", t);
1909
        }
1910
 
1911
        w = ((weighted_item_t *)(lli->data))->weight;
1912
 
1913
 
1914
        save_cur_num_weights = cur_num_weights;
1915
        memcpy(save_cur_rule, cur_rule, MAX_COLLATION_WEIGHTS);
1916
 
1917
        cur_section = new_section("R");
1918
        insque(new_ll_item(DT_REORDER, cur_section), lli);
1919
 
1920
#if 0
1921
 
1922
        {
1923
                ll_item_t *l1;
1924
                ll_item_t *l2;
1925
                ll_item_t *l3;
1926
                l1 = new_ll_item(DT_REORDER, cur_section);
1927
                l2 = find_ll_last(cur_col->section_list);
1928
                insque(l1, l2);
1929
                l3 = find_ll_last(cur_col->section_list);
1930
 
1931
                fprintf(stderr, "reorder_after %p %p %p %s\n", l1, l2, l3, cur_section->name);
1932
        }
1933
#else
1934
        insque(new_ll_item(DT_REORDER, cur_section), find_ll_last(cur_col->section_list));
1935
#endif
1936
 
1937
        cur_num_weights = cur_section->num_rules = save_cur_num_weights;
1938
        memcpy(cur_rule, save_cur_rule, MAX_COLLATION_WEIGHTS);
1939
        memcpy(cur_section->rules, save_cur_rule, MAX_COLLATION_WEIGHTS);
1940
 
1941
 
1942
#warning devel code
1943
/*      fprintf(stderr, "reorder -- %s %d\n", ((weighted_item_t *)(lli->data))->symbol, w->num_weights); */
1944
 
1945
#warning hack to get around hu_HU reorder-after problem
1946
/*      if (!w->num_weights) { */
1947
 
1948
/*      } else { */
1949
/*              cur_num_weights = w->num_weights; */
1950
/*              memcpy(cur_rule, w->rule, MAX_COLLATION_WEIGHTS); */
1951
/*      }        */
1952
 
1953
/*      fprintf(stderr, "reorder_after succeeded for %s\n", t); */
1954
}
1955
 
1956
static void do_reorder_end(void)
1957
{
1958
        if (!(order_state & IN_REORDER)) {
1959
                error_msg("reorder_end with no matching reorder_after");
1960
        }
1961
        order_state &= ~IN_REORDER;
1962
}
1963
 
1964
static void do_reorder_sections_after(void)
1965
{
1966
        const char *t;
1967
        ll_item_t *lli;
1968
 
1969
        if (order_state & ~IN_REORDER_SECTIONS) {
1970
                error_msg("reorder_sections_after following order_start or reorder_after");
1971
        }
1972
        order_state |= IN_REORDER_SECTIONS;
1973
 
1974
        if (superset) {
1975
                error_msg("currently reorder_sections_after is not supported in supersets");
1976
        }
1977
 
1978
        if (!(t = next_token())) {
1979
                error_msg("missing arg for reorder_sections_after");
1980
        }
1981
 
1982
        t = xsymdup(t);
1983
 
1984
        if (next_token() != NULL) {
1985
                error_msg("trailing text reorder_sections_after: %s", pos);
1986
        }
1987
 
1988
        if (cur_col == cur_base) {
1989
                error_msg("sorry.. reorder_sections_after in base locale is not currently supported");
1990
        }
1991
 
1992
        lli = cur_base->section_list;
1993
        do {
1994
/*              fprintf(stderr, "hmm -- |%s|%d|\n", ((section_t *)(lli->data))->name, lli->data_type); */
1995
                if (lli->data_type & DT_SECTION) {
1996
/*                      fprintf(stderr, "checking |%s|%s|\n", ((section_t *)(lli->data))->name, t); */
1997
                        if (!strcmp(((section_t *)(lli->data))->name, t)) {
1998
                                reorder_section_ptr = lli;
1999
                                return;
2000
                        }
2001
                }
2002
                lli = lli->next;
2003
        } while (lli);
2004
 
2005
        error_msg("reorder_sections_after for non-base item currently not supported: %s", t);
2006
}
2007
 
2008
static void do_reorder_sections_end(void)
2009
{
2010
        if (!(order_state & IN_REORDER_SECTIONS)) {
2011
                error_msg("reorder_sections_end with no matching reorder_sections_after");
2012
        }
2013
        order_state &= ~IN_REORDER_SECTIONS;
2014
 
2015
        reorder_section_ptr = NULL;
2016
}
2017
 
2018
static ll_item_t *new_ll_item(int data_type, void *data)
2019
{
2020
        ll_item_t *p;
2021
 
2022
        p = xmalloc(sizeof(ll_item_t));
2023
        p->next = p->prev = NULL;
2024
        p->data_type = data_type;
2025
        p->data = data;
2026
        p->idx = INT_MIN;
2027
 
2028
        return p;
2029
}
2030
 
2031
static int sym_cmp(const void *n1, const void *n2)
2032
{
2033
/*      fprintf(stderr, "sym_cmp: |%s| |%s|\n", (const char *)n1, (const char *)n2); */
2034
    return strcmp((const char *) n1, (const char *) n2);
2035
}
2036
 
2037
static char *xsymdup(const char *s)
2038
{
2039
        void *p;
2040
 
2041
        if (!(p = tfind(s, &root_sym, sym_cmp))) { /* not a currently known symbol */
2042
                if (!(s = strdup(s)) || !(p = tsearch(s, &root_sym, sym_cmp))) {
2043
                        error_msg("OUT OF MEMORY!");
2044
                }
2045
                ++num_sym;
2046
                mem_sym += strlen(s) + 1;
2047
/*              fprintf(stderr, "xsymdup: alloc |%s| %p |%s| %p\n", *(char **)p, p, s, s); */
2048
/*      } else { */
2049
/*              fprintf(stderr, "xsymdup: found |%s| %p\n", *(char **)p, p); */
2050
        }
2051
        return *(char **) p;
2052
}
2053
 
2054
static int weight_cmp(const void *n1, const void *n2)
2055
{
2056
        const weight_t *w1 = (const weight_t *) n1;
2057
        const weight_t *w2 = (const weight_t *) n2;
2058
        int i, r;
2059
 
2060
        if (w1->num_weights != w2->num_weights) {
2061
                return w1->num_weights - w2->num_weights;
2062
        }
2063
 
2064
        for (i=0 ; i < w1->num_weights ; i++) {
2065
                if (w1->rule[i] != w2->rule[i]) {
2066
                        return w1->rule[i] - w2->rule[i];
2067
                }
2068
                if ((r = strcmp(w1->colitem[i], w2->colitem[i])) != 0) {
2069
                        return r;
2070
                }
2071
        }
2072
        return 0;
2073
}
2074
 
2075
static weight_t *register_weight(weight_t *w)
2076
{
2077
        void *p;
2078
 
2079
        if (!(p = tfind(w, &root_weight, weight_cmp))) { /* new weight */
2080
                p = xmalloc(sizeof(weight_t));
2081
                memcpy(p, w, sizeof(weight_t));
2082
                if (!(p = tsearch(p, &root_weight, weight_cmp))) {
2083
                        error_msg("OUT OF MEMORY!");
2084
                }
2085
                ++unique_weights;
2086
/*      } else { */
2087
/*              fprintf(stderr, "rw: found\n"); */
2088
        }
2089
        return *(weight_t **)p;
2090
}
2091
 
2092
static size_t ll_len(ll_item_t *l)
2093
{
2094
        size_t n = 0;
2095
        ll_item_t *p = l;
2096
 
2097
        while (p) {
2098
                ++n;
2099
                p = p->next;
2100
                if (p == l) {                   /* work for circular too */
2101
                        break;
2102
                }
2103
        }
2104
        return n;
2105
}
2106
 
2107
static size_t ll_count(ll_item_t *l, int mask)
2108
{
2109
        size_t n = 0;
2110
        ll_item_t *p = l;
2111
 
2112
        while (p) {
2113
                if (p->data_type & mask) {
2114
                        ++n;
2115
                }
2116
                p = p->next;
2117
                if (p == l) {                   /* work for circular too */
2118
                        break;
2119
                }
2120
        }
2121
        return n;
2122
}
2123
 
2124
 
2125
static int wi_index_cmp(const void *n1, const void *n2)
2126
{
2127
        const char *s1 = ((weighted_item_t *)(((ll_item_t *) n1)->data))->symbol;
2128
        const char *s2 = ((weighted_item_t *)(((ll_item_t *) n2)->data))->symbol;
2129
 
2130
    return strcmp(s1, s2);
2131
}
2132
 
2133
static void add_wi_index(ll_item_t *l)
2134
{
2135
        assert(l->data_type == DT_WEIGHTED);
2136
 
2137
        if (!strcmp(((weighted_item_t *)(l->data))->symbol, "UNDEFINED")) {
2138
                cur_col->undefined_idx = l;
2139
        }
2140
 
2141
        if (!tfind(l, &cur_col->root_wi_index, wi_index_cmp)) { /* new wi_index */
2142
                if (!tsearch(l, &cur_col->root_wi_index, wi_index_cmp)) {
2143
                        error_msg("OUT OF MEMORY!");
2144
                }
2145
        }
2146
 
2147
        if (cur_base != cur_col) {
2148
                if (!tfind(l, &cur_base->root_wi_index, wi_index_cmp)) {/* not a base val */
2149
/*                      printf("derived: %s\n", ((weighted_item_t *)(l->data))->symbol); */
2150
                        if (!tfind(l, &cur_base->root_derived_wi, wi_index_cmp)) { /* new derived */
2151
                                if (!tsearch(l, &cur_base->root_derived_wi, wi_index_cmp)) {
2152
                                        error_msg("OUT OF MEMORY!");
2153
                                }
2154
                        }
2155
                }
2156
        }
2157
}
2158
 
2159
static int final_index;
2160
 
2161
 
2162
static int is_ucode(const char *s)
2163
{
2164
        if ((s[0] == '<')
2165
                && (s[1] == 'U')
2166
                && isxdigit(s[2])
2167
                && isxdigit(s[3])
2168
                && isxdigit(s[4])
2169
                && isxdigit(s[5])
2170
                && (s[6] == '>')
2171
                ) {
2172
                return 7;
2173
        } else {
2174
                return 0;
2175
        }
2176
}
2177
 
2178
static void add_final_col_index(const char *s)
2179
{
2180
        ENTRY e;
2181
 
2182
        e.key = (char *) s;
2183
        e.data = (void *)(final_index);
2184
        if (!hsearch(e, FIND)) {        /* not in the table */
2185
                if (!hsearch(e, ENTER)) {
2186
                        error_msg("OUT OF MEMORY! (hsearch)");
2187
                }
2188
#if 0
2189
                {
2190
                        int n;
2191
                        void *v;
2192
                        colitem_t ci;
2193
                        colitem_t *p;
2194
                        const char *t;
2195
 
2196
                        if (!strcmp(s, "UNDEFINED")) {
2197
                                printf("%6d: %s\n", final_index, s);
2198
                        } else {
2199
                                assert(*s == '<');
2200
                                if ((n = is_ucode(s)) != 0) {
2201
                                        assert(!s[n]);
2202
                                        printf("%6d: %s\n", final_index, s);
2203
                                } else {
2204
                                        ci.string = (char *) s;
2205
                                        ci.element = NULL; /* don't care */
2206
                                        v = tfind(&ci, &cur_base->root_colitem, colitem_cmp);
2207
                                        if (!v) {
2208
                                                fprintf(stderr, "%s  NOT DEFINED!!!\n", s);
2209
                                        } else {
2210
                                                p = *((colitem_t **) v);
2211
                                                if (p->element != NULL) {
2212
                                                        t = p->element;
2213
                                                        assert(*t == '"');
2214
                                                        ++t;
2215
                                                        n = is_ucode(t);
2216
                                                        assert(n);
2217
                                                        printf("%6d: %.*s | ", final_index, n, t);
2218
                                                        do {
2219
                                                                t += n;
2220
                                                                assert(*t);
2221
                                                                if (*t == '"') {
2222
                                                                        assert(!t[1]);
2223
                                                                        break;
2224
                                                                }
2225
                                                                n = is_ucode(t);
2226
                                                                assert(n);
2227
                                                                printf("%.*s", n, t);
2228
                                                        } while (1);
2229
                                                        printf("   collating-element %s\n", s);
2230
                                                } else {
2231
                                                        printf("%6d: %s  (collating-symbol)\n", final_index, s);
2232
                                                }
2233
                                        }
2234
                                }
2235
                        }
2236
                }
2237
#endif
2238
                ++final_index;
2239
        }
2240
 
2241
}
2242
 
2243
static int final_index_val0(const char *s)
2244
{
2245
        ENTRY *p;
2246
        ENTRY e;
2247
        e.key = (char *) s;
2248
 
2249
        if (!(p = hsearch(e, FIND))) {  /* not in the table */
2250
                return 0;
2251
        }
2252
 
2253
        return (int)(p->data);
2254
}
2255
 
2256
static int final_index_val(const char *s)
2257
{
2258
        ENTRY *p;
2259
        ENTRY e;
2260
        e.key = (char *) s;
2261
 
2262
        if (!(p = hsearch(e, FIND))) {  /* not in the table */
2263
                error_msg("can't find final index: %s", s);
2264
        }
2265
 
2266
        return (int)(p->data);
2267
}
2268
 
2269
static size_t num_tree_nodes;
2270
 
2271
static void count_nodes(const void *ptr, VISIT order, int level)
2272
{
2273
    if ((order == postorder) || (order == leaf))  {
2274
                ++num_tree_nodes;
2275
    }
2276
}
2277
 
2278
static size_t tnumnodes(const void *root)
2279
{
2280
        num_tree_nodes = 0;
2281
 
2282
        twalk(root, count_nodes);
2283
 
2284
        return num_tree_nodes;
2285
 
2286
}
2287
 
2288
static ll_item_t *find_wi_index(const char *sym, col_locale_t *cl)
2289
{
2290
        weighted_item_t w;
2291
        ll_item_t l;
2292
        void *p;
2293
 
2294
        w.symbol = sym;
2295
        l.data = &w;
2296
        l.data_type = DT_WEIGHTED;
2297
 
2298
        p = tfind(&l, &cl->root_wi_index, wi_index_cmp);
2299
 
2300
        if (p) {
2301
                p = *(ll_item_t **)p;
2302
        }
2303
 
2304
        return (ll_item_t *) p;
2305
}
2306
 
2307
static void mark_reordered(const char *sym)
2308
{
2309
        ll_item_t *lli;
2310
 
2311
        lli = find_wi_index(sym, cur_base);
2312
 
2313
        if (lli) {
2314
                if (!tsearch(lli, &cur_base->root_wi_index_reordered, wi_index_cmp)) {
2315
                        error_msg("OUT OF MEMORY!");
2316
                }
2317
        }
2318
}
2319
 
2320
static ll_item_t *find_wi_index_reordered(const char *sym)
2321
{
2322
        weighted_item_t w;
2323
        ll_item_t l;
2324
        void *p;
2325
 
2326
        w.symbol = sym;
2327
        l.data = &w;
2328
        l.data_type = DT_WEIGHTED;
2329
 
2330
        p = tfind(&l, &cur_base->root_wi_index_reordered, wi_index_cmp);
2331
 
2332
        if (p) {
2333
                p = *(ll_item_t **)p;
2334
        }
2335
 
2336
        return (ll_item_t *) p;
2337
}
2338
 
2339
static ll_item_t *init_comm_ptr(void)
2340
{
2341
        assert(cur_base);
2342
        assert(cur_base->section_list);
2343
        /* at the moment, only support one section in comm */
2344
        assert(cur_base->section_list->next == NULL);
2345
 
2346
        comm_cur_ptr = ((section_t *)(cur_base->section_list->data))->itm_list;
2347
 
2348
        while (comm_cur_ptr && (comm_cur_ptr->data_type & DT_REORDER)) {
2349
                comm_cur_ptr = comm_cur_ptr->next;
2350
        }
2351
 
2352
#warning devel code
2353
/*      { */
2354
/*              ll_item_t *p = comm_cur_ptr; */
2355
/*              fprintf(stderr, "init_comm_ptr\n"); */
2356
 
2357
/*              while (p != comm_cur_ptr) { */
2358
/*                      if (p->data_type & DT_WEIGHTED) { */
2359
/*                              fprintf(stderr, "%s", ((weighted_item_t *)p)->symbol); */
2360
/*                      } */
2361
/*                      p = p->next; */
2362
/*              } */
2363
/*      } */
2364
 
2365
        assert(comm_cur_ptr);
2366
 
2367
/*      fprintf(stderr, "init_comm_ptr -- %s %p %p %p %d\n", */
2368
/*                      ((weighted_item_t *)(comm_cur_ptr->data))->symbol, */
2369
/*                      comm_cur_ptr, comm_cur_ptr->prev, comm_cur_ptr->next, */
2370
/*                      ll_len(comm_cur_ptr)); */
2371
 
2372
        comm_prev_ptr = NULL;
2373
        return comm_cur_ptr;
2374
}
2375
 
2376
static ll_item_t *next_comm_ptr(void)
2377
{
2378
        /* at the moment, only support one section in comm */
2379
        assert(cur_base->section_list->next == NULL);
2380
 
2381
        comm_prev_ptr = comm_cur_ptr;
2382
 
2383
    while (comm_cur_ptr && ((comm_cur_ptr = comm_cur_ptr->next) != NULL)) {
2384
                if (!(comm_cur_ptr->data_type & DT_REORDER)) {
2385
                        break;
2386
                }
2387
        }
2388
 
2389
        return comm_cur_ptr;
2390
}
2391
 
2392
static int dump_count;
2393
 
2394
#if 0
2395
static void dump_section(section_t *s, int mask, col_locale_t *der)
2396
{
2397
        ll_item_t *lli;
2398
        ll_item_t *lli0;
2399
        weighted_item_t *w;
2400
        weight_t *p;
2401
        int i;
2402
 
2403
        lli0 = lli = s->itm_list;
2404
 
2405
        if (!lli0) {
2406
                return;
2407
        }
2408
 
2409
        do {
2410
                if (!(lli->data_type & mask)) {
2411
                        lli = lli->next;
2412
                        continue;
2413
                }
2414
                if (lli->data_type & DT_WEIGHTED) {
2415
                        ++dump_count;
2416
                        w = (weighted_item_t *)(lli->data);
2417
                        p = w->weight;
2418
                        printf("%6d: %s (%d) ", dump_count, w->symbol, p->num_weights);
2419
                        for (i = 0 ; i < p->num_weights ; i++) {
2420
                                if (p->rule[i] & R_FORWARD) {
2421
                                        printf("F");
2422
                                }
2423
                                if (p->rule[i] & R_BACKWARD) {
2424
                                        printf("B");
2425
                                }
2426
                                if (p->rule[i] & R_POSITION) {
2427
                                        printf("P");
2428
                                }
2429
                                printf(",");
2430
                        }
2431
                        for (i = 0 ; i < p->num_weights ; i++) {
2432
                                printf("   %s", p->colitem[i]);
2433
                        }
2434
                        printf("\n");
2435
                } else if (lli->data_type & (DT_SECTION|DT_REORDER)) {
2436
 
2437
                        if (lli->data_type == DT_REORDER) {
2438
                                assert(der);
2439
                                if (strncmp(((section_t *)(lli->data))->name, der->name, strlen(der->name))) {
2440
                                        lli = lli->next;
2441
                                        continue;
2442
                                }
2443
                        }
2444
 
2445
                        if (lli->data_type & DT_SECTION) {
2446
                                printf("SECTION -----------------\n");
2447
                        } else {
2448
                                printf("REORDER -----------------\n");
2449
                        }
2450
 
2451
                        dump_section((section_t *)(lli->data), mask, der);
2452
                        printf("DONE --------------------\n");
2453
                }
2454
                lli = lli->next;
2455
        } while (lli != lli0);
2456
}
2457
#else
2458
static int in_reorder_section = 0;
2459
 
2460
static void dump_section(section_t *s, int mask, col_locale_t *der)
2461
{
2462
        ll_item_t *lli;
2463
        ll_item_t *lli0;
2464
        weighted_item_t *w;
2465
        weight_t *p;
2466
        int i;
2467
 
2468
        lli0 = lli = s->itm_list;
2469
 
2470
        if (!lli0) {
2471
                return;
2472
        }
2473
 
2474
        do {
2475
                if (!(lli->data_type & mask)) {
2476
                        lli = lli->next;
2477
                        continue;
2478
                }
2479
                if (lli->data_type & DT_WEIGHTED) {
2480
                        ++dump_count;
2481
                        w = (weighted_item_t *)(lli->data);
2482
                        p = w->weight;
2483
#if 1
2484
                        if (in_reorder_section) {
2485
                                printf(" %p", w);
2486
                        }
2487
#else
2488
                        printf("%6d: %s (%d) ", dump_count, w->symbol, p->num_weights);
2489
                        for (i = 0 ; i < p->num_weights ; i++) {
2490
                                if (p->rule[i] & R_FORWARD) {
2491
                                        printf("F");
2492
                                }
2493
                                if (p->rule[i] & R_BACKWARD) {
2494
                                        printf("B");
2495
                                }
2496
                                if (p->rule[i] & R_POSITION) {
2497
                                        printf("P");
2498
                                }
2499
                                printf(",");
2500
                        }
2501
                        for (i = 0 ; i < p->num_weights ; i++) {
2502
                                printf("   %s", p->colitem[i]);
2503
                        }
2504
                        printf("\n");
2505
#endif
2506
                } else if (lli->data_type & (DT_SECTION|DT_REORDER)) {
2507
 
2508
                        if (lli->data_type == DT_REORDER) {
2509
                                assert(der);
2510
                                if (strncmp(((section_t *)(lli->data))->name, der->name, strlen(der->name))) {
2511
                                        lli = lli->next;
2512
                                        continue;
2513
                                }
2514
                        }
2515
 
2516
                        if (lli->data_type & DT_SECTION) {
2517
/*                              printf("SECTION -----------------\n"); */
2518
                                assert(0);
2519
                        } else {
2520
/*                              printf("REORDER -----------------\n"); */
2521
                                in_reorder_section = 1;
2522
                        }
2523
 
2524
                        dump_section((section_t *)(lli->data), mask, der);
2525
/*                      printf("DONE --------------------\n"); */
2526
                        printf("\n");
2527
                        in_reorder_section = 0;
2528
                }
2529
                lli = lli->next;
2530
        } while (lli != lli0);
2531
}
2532
#endif
2533
 
2534
static void dump_weights(const char *name)
2535
{
2536
        ll_item_t *lli;
2537
        col_locale_t *base;
2538
        col_locale_t *der;
2539
        col_locale_t cl;
2540
        void *p;
2541
 
2542
        assert(name);
2543
 
2544
        if (!*name) {                           /* use last */
2545
                base = cur_base;
2546
                der = cur_derived;
2547
        } else {
2548
                cl.name = (char *) name;
2549
                if (!(p = tfind(&cl, &root_col_locale, col_locale_cmp))) {
2550
                        error_msg("unknown locale: %s", name);
2551
                }
2552
                base = *((col_locale_t **) p);
2553
                der = NULL;
2554
                if (base->base_locale) { /* oops... really derived */
2555
                        der = base;
2556
                        base = der->base_locale;
2557
                }
2558
        }
2559
 
2560
        dump_count = 0;
2561
 
2562
        if (base) {
2563
/*              printf("BASE - %s\n", base->name); */
2564
                for (lli = base->section_list ; lli ; lli = lli->next) {
2565
/*                      printf("SECTION %s\n", ((section_t *)(lli->data))->name); */
2566
                        dump_section((section_t *)(lli->data), ~0, der);
2567
                }
2568
        }
2569
 
2570
        assert(der != base);
2571
 
2572
        if (der) {
2573
/*              printf("DERIVED - %s\n", der->name); */
2574
                for (lli = der->section_list ; lli ; lli = lli->next) {
2575
                        if (lli->data_type == DT_SECTION) {
2576
                                dump_section((section_t *)(lli->data), DT_WEIGHTED, der);
2577
                        }
2578
                }
2579
        }
2580
/*      printf("DONE\n"); */
2581
}
2582
 
2583
static void print_starter_node(const void *ptr, VISIT order, int level)
2584
{
2585
    if (order == postorder || order == leaf)  {
2586
                fprintf(stderr, "   %s\n", *(const char **) ptr);
2587
    }
2588
}
2589
 
2590
static void finalize_base(void)
2591
{
2592
        ll_item_t *s;
2593
        ll_item_t *h;
2594
        ll_item_t *lli;
2595
        ll_item_t *h2;
2596
        ll_item_t *l2;
2597
        ll_item_t *cli;
2598
        ll_item_t *rli = NULL;
2599
        weighted_item_t *w;
2600
        weight_t *p;
2601
        int i, n, mr, r, mi;
2602
        col_locale_t *cl;
2603
        void *mm;
2604
 
2605
        int num_invariant = 0;
2606
        int num_varying = 0;
2607
        int max_weight;
2608
        int index2weight_len_inc = 1;
2609
 
2610
        assert(cur_base);
2611
        assert(base_locale_len+1 < BASE_LOCALE_LEN);
2612
 
2613
        base_locale_array[base_locale_len].name = cur_base->name;
2614
        base_locale_array[base_locale_len].num_weights = 1;
2615
        base_locale_array[base_locale_len].index2weight_offset = index2weight_len;
2616
        base_locale_array[base_locale_len].index2ruleidx_offset = index2ruleidx_len;
2617
        if (!strcmp(cur_base->name,"ja_JP") || !strcmp(cur_base->name,"ko_KR")) {
2618
#warning fix the index2weight check!!
2619
                index2weight_len_inc = 0;
2620
        }
2621
/*      printf("%s -- index2weight_len = %d\n", cur_base->name, index2weight_len); */
2622
 
2623
        if (!hcreate(30000)) {
2624
                error_msg("OUT OF MEMORY!");
2625
        }
2626
 
2627
        /* first pass ... set the fixed indexes */
2628
        final_index = i = 1;
2629
        mr = 0;
2630
        for (s = cur_base->section_list ; s ; s = s->next) {
2631
#if 1
2632
                if (s->data_type & DT_REORDER) { /* a reordered section */
2633
                        fprintf(stderr, "pass1: reordered section %s - xxx\n", ((section_t *)(s->data))->name);
2634
                        lli = ((section_t *)(s->data))->itm_list;
2635
                        r = 0;
2636
                        if (lli) {
2637
/*                              r = ll_len( ((section_t *)(lli->data))->itm_list ); */
2638
                                r = ll_len(lli) + 1;
2639
                        }
2640
                        if (r > mr) {
2641
                                mr = r;
2642
                        }
2643
                        fprintf(stderr, "pass1: reordered section %s - %d\n", ((section_t *)(s->data))->name, r);
2644
                        continue;
2645
                }
2646
#endif
2647
                h = lli = ((section_t *)(s->data))->itm_list;
2648
                if (!lli) {
2649
                        continue;
2650
                }
2651
                do {
2652
                        if (lli->data_type & DT_RANGE) {
2653
                                i += mr;
2654
                                mr = 0;
2655
#warning check ko_kR and 9
2656
/*                              ++i; */
2657
                                lli->idx = i;
2658
                                assert(!rli);
2659
                                rli = lli;
2660
                                fprintf(stderr, "range pre = %d  after = ", i);
2661
                                i += ((range_item_t *)(lli->data))->length + 1;
2662
#warning check ko_kR and 9
2663
/*                              ++i; */
2664
                                fprintf(stderr, "%d\n", i);
2665
                                if (!index2weight_len_inc) { /* ko_KR hack */
2666
                                        final_index += ((range_item_t *)(lli->data))->length + 1;
2667
                                }
2668
/*                              add_final_col_index("RANGE"); */
2669
                        } else if (lli->data_type & DT_WEIGHTED) {
2670
                                i += mr;
2671
                                mr = 0;
2672
                                w = (weighted_item_t *)(lli->data);
2673
                                if (find_wi_index_reordered(w->symbol)) { /* reordered symbol so skip on first pass */
2674
                                        ++num_varying;
2675
                                        ++i;
2676
                                        continue;
2677
                                }
2678
                                ++num_invariant;
2679
                                index2weight_buffer[index2weight_len] = lli->idx = i++;
2680
                                index2weight_len += index2weight_len_inc;
2681
                                add_final_col_index(w->symbol);
2682
 
2683
                        } else {
2684
                                assert(lli->data_type & DT_REORDER);
2685
                                r = ll_len( ((section_t *)(lli->data))->itm_list );
2686
#warning check ko_kR and 9
2687
                                if (r > mr) {
2688
                                        mr = r;
2689
                                }
2690
/*                              r = 0; */
2691
                        }
2692
                } while ((lli = lli->next) != h);
2693
        }
2694
 
2695
        /* second pass ... set the reordered indexes */
2696
        mi = i + mr;
2697
        mr = i = 0;
2698
        for (s = cur_base->section_list ; s ; s = s->next) {
2699
                h = lli = ((section_t *)(s->data))->itm_list;
2700
                if (!lli) {
2701
                        continue;
2702
                }
2703
                do {
2704
                        if (lli->data_type & DT_RANGE) {
2705
                                i += mr;
2706
                                mr = 0;
2707
                                i = lli->idx + ((range_item_t *)(lli->data))->length + 1;
2708
#warning check
2709
                        } else if ((lli->data_type & DT_WEIGHTED) && !(s->data_type & DT_REORDER)) {
2710
                                i += mr;
2711
                                mr = 0;
2712
                                w = (weighted_item_t *)(lli->data);
2713
                                if (find_wi_index_reordered(w->symbol) /* reordered symbol skipped on first pass */
2714
#if 0
2715
                                        || (s->data_type & DT_REORDER) /* or in a reordered section */
2716
#endif
2717
                                        ) {
2718
                                        assert(!(s->data_type & DT_REORDER));
2719
                                        index2weight_buffer[index2weight_len] = lli->idx = ++i;
2720
                                        index2weight_len += index2weight_len_inc;
2721
                                        add_final_col_index(w->symbol);
2722
 
2723
/*                                      fprintf(stdout, "%11s: r %6d %6d %s\n", */
2724
/*                                                      cur_base->name, lli->idx, final_index_val(w->symbol), w->symbol); */
2725
 
2726
                                        continue;
2727
                                }
2728
                                i = lli->idx;
2729
 
2730
/*                              fprintf(stdout, "%11s: w %6d %6d %s\n", */
2731
/*                                              cur_base->name, lli->idx, final_index_val(w->symbol), w->symbol); */
2732
 
2733
                        } else {
2734
/*                              fprintf(stderr, "section: %s  %d  %d\n", ((section_t *)(s->data))->name, */
2735
/*                                              s->data_type, lli->data_type); */
2736
/*                                      assert(!(s->data_type & DT_REORDER)); */
2737
/*                              assert(lli->data_type & DT_REORDER); */
2738
#if 1
2739
                                if (s->data_type & DT_REORDER) {
2740
                                        h2 = l2 = lli;
2741
                                        if (!h2) {
2742
                                                continue;
2743
                                        }
2744
                                } else {
2745
                                        assert(s->data_type & DT_SECTION);
2746
                                        h2 = l2 = ((section_t *)(lli->data))->itm_list;
2747
                                        if (!h2) {
2748
                                                continue;
2749
                                        }
2750
                                }
2751
 
2752
 
2753
#else
2754
                                h2 = l2 = ((section_t *)(lli->data))->itm_list;
2755
                                if (!h2) {
2756
                                        continue;
2757
                                }
2758
#endif
2759
                                r = 0;
2760
                                do {
2761
                                        assert(l2->data_type & DT_WEIGHTED);
2762
                                        ++r;
2763
                                        l2->idx = i + r;
2764
 
2765
/*                                      fprintf(stdout, "%s: R %6d        %s\n", */
2766
/*                                                      ((section_t *)(lli->data))->name, l2->idx, ((weighted_item_t *)(l2->data))->symbol); */
2767
 
2768
                                } while ((l2 = l2->next) != h2);
2769
                                if (r > mr) {
2770
                                        mr = r;
2771
                                }
2772
                        }
2773
                } while ((lli = lli->next) != h);
2774
        }
2775
 
2776
        /* finally, walk through all derived locales and set non-reordered section items */
2777
        mr = mi;
2778
        for (cli = cur_base->derived_list ; cli ; cli = cli->next) {
2779
                cl = (col_locale_t *)(cli->data);
2780
/*              fprintf(stderr, "pass3: %d  %s\n", cli->data_type, cl->name); */
2781
 
2782
/*              fprintf(stdout, "pass3: %d  %s\n", cli->data_type, cl->name); */
2783
 
2784
                assert(cli->data_type == DT_COL_LOCALE);
2785
 
2786
                i = mi;
2787
                for (s = cl->section_list ; s ; s = s->next) {
2788
/*                      if (s->data_type & DT_REORDER) { */
2789
/*                              continue; */
2790
/*                      } */
2791
                        h = lli = ((section_t *)(s->data))->itm_list;
2792
                        if (!lli) {
2793
                                continue;
2794
                        }
2795
                        do {
2796
                                assert(!(lli->data_type & DT_RANGE));
2797
                                if (lli->data_type & DT_WEIGHTED) {
2798
/*                                      fprintf(stderr, "     %d %d %s\n", lli->data_type, lli->idx, ((weighted_item_t *)(lli->data))->symbol); */
2799
                                        add_final_col_index(((weighted_item_t *)(lli->data))->symbol);
2800
                                        if (s->data_type & DT_REORDER) {
2801
                                                continue;
2802
                                        }
2803
                                        assert(lli->idx == INT_MIN);
2804
                                        lli->idx = ++i;
2805
 
2806
/*                                      fprintf(stdout, "%11s: S %6d %6d %s\n", */
2807
/*                                                      cl->name, lli->idx, */
2808
/*                                                      final_index_val(((weighted_item_t *)(lli->data))->symbol), */
2809
/*                                                      ((weighted_item_t *)(lli->data))->symbol); */
2810
 
2811
                                } else {
2812
                                        assert(0);
2813
                                        assert(lli->data_type & DT_SECTION);
2814
 
2815
                                        h2 = l2 = ((section_t *)(lli->data))->itm_list;
2816
                                        if (!h2) {
2817
                                                continue;
2818
                                        }
2819
                                        do {
2820
                                                assert(l2->data_type & DT_WEIGHTED);
2821
                                                assert(l2->idx == INT_MIN);
2822
                                                l2->idx = ++i;
2823
                                                add_final_col_index(((weighted_item_t *)(l2->data))->symbol);
2824
                                        } while ((l2 = l2->next) != h2);
2825
                                }
2826
                        } while ((lli = lli->next) != h);
2827
                }
2828
                if (i > mr) {
2829
                        mr = i;
2830
                }
2831
        }
2832
        max_weight = mr;
2833
 
2834
        assert(num_varying == tnumnodes(cur_base->root_wi_index_reordered));
2835
 
2836
        /* we can now initialize the wcs2index array */
2837
        {
2838
                ENTRY *p;
2839
                ENTRY e;
2840
                char buf[8];
2841
                static const char xd[] = "0123456789ABCDEF";
2842
                int starter_index = final_index;
2843
                int wcs2index_count = 0;
2844
 
2845
                strcpy(buf, "<U....>");
2846
                memset(wcs2index, 0, sizeof(wcs2index));
2847
                e.key = (char *) buf;
2848
                for (i=1 ; i <= 0xffff ; i++) {
2849
                        buf[5] = xd[ i & 0xf ];
2850
                        buf[4] = xd[ (i >> 4) & 0xf ];
2851
                        buf[3] = xd[ (i >> 8) & 0xf ];
2852
                        buf[2] = xd[ (i >> 12) & 0xf ];
2853
 
2854
                        if ((p = hsearch(e, FIND)) != NULL) {
2855
                                ++wcs2index_count;
2856
                                if ((tfind(buf, &cur_base->root_starter_char, sym_cmp)) != NULL) {
2857
                                        wcs2index[i] = ++starter_index;
2858
/*                                      fprintf(stderr, "wcs2index[ %#06x ] = %d  (starter)\n", i, wcs2index[i]); */
2859
                                } else {
2860
                                        wcs2index[i] = (int)(p->data);
2861
/*                                      fprintf(stderr, "wcs2index[ %#06x ] = %d\n", i, wcs2index[i]); */
2862
                                }
2863
                        } else {
2864
                                if ((tfind(buf, &cur_base->root_starter_char, sym_cmp)) != NULL) {
2865
                                        error_msg("marked starter but not in hash: %s", buf);
2866
                                }
2867
                        }
2868
                }
2869
 
2870
 
2871
        /* ---------------------------------------------------------------------- */
2872
                {
2873
                        int i, n;
2874
                        table_data table;
2875
                        size_t t, smallest;
2876
 
2877
                        n = 0;
2878
                        smallest = SIZE_MAX;
2879
                        table.ii = NULL;
2880
                        for (i=0 ; i < 14 ; i++) {
2881
                                if ((RANGE >> i) < 4) {
2882
                                        break;
2883
                                }
2884
                                t = newopt(wcs2index, RANGE, i, &table);
2885
                                if (smallest >= t) {
2886
                                        n = i;
2887
                                        smallest = t;
2888
                                        /*                      } else { */
2889
                                        /*                              break; */
2890
                                }
2891
                        }
2892
 
2893
 
2894
/*                      printf("smallest = %u  for range %#x (%u)\n", smallest, RANGE, RANGE); */
2895
                        assert(smallest != SIZE_MAX);
2896
                        if (smallest + wcs2colidt_len >= WCS2COLIDT_LEN) {
2897
                                error_msg("WCS2COLIDT_LEN too small");
2898
                        }
2899
                        base_locale_array[base_locale_len].wcs2colidt_offset = wcs2colidt_len;
2900
                        table.ii = wcs2colidt_buffer + wcs2colidt_len;
2901
                        t = smallest;
2902
                        smallest = SIZE_MAX;
2903
                        smallest = newopt(wcs2index, RANGE, n, &table);
2904
                        assert(t == smallest);
2905
                        wcs2colidt_len += smallest;
2906
/*                      fprintf(stderr, "smallest = %d   wcs2colidt_len = %d\n", smallest, wcs2colidt_len); */
2907
 
2908
#if 0
2909
                        {
2910
                                unsigned int sc, n, i0, i1;
2911
                                unsigned int u = 0xe40;
2912
                                table_data *tbl = &table;
2913
 
2914
#define __LOCALE_DATA_WCctype_TI_MASK ((1 << tbl->ti_shift)-1)
2915
#define __LOCALE_DATA_WCctype_TI_SHIFT (tbl->ti_shift)
2916
#define __LOCALE_DATA_WCctype_TI_LEN (tbl->ti_len)
2917
#define __LOCALE_DATA_WCctype_II_MASK ((1 << tbl->ii_shift)-1)
2918
#define __LOCALE_DATA_WCctype_II_SHIFT (tbl->ii_shift)
2919
#define __LOCALE_DATA_WCctype_II_LEN (tbl->ii_len)
2920
 
2921
                                sc = u & __LOCALE_DATA_WCctype_TI_MASK;
2922
                                u >>= __LOCALE_DATA_WCctype_TI_SHIFT;
2923
                                n = u & __LOCALE_DATA_WCctype_II_MASK;
2924
                                u >>= __LOCALE_DATA_WCctype_II_SHIFT;
2925
 
2926
                                i0 = tbl->ii[u];
2927
                                fprintf(stderr, "i0 = %d\n", i0);
2928
                                i0 <<= __LOCALE_DATA_WCctype_II_SHIFT;
2929
                                i1 = tbl->ii[__LOCALE_DATA_WCctype_II_LEN + i0 + n];
2930
                                /*      i1 = tbl->ti[i0 + n]; */
2931
                                fprintf(stderr, "i1 = %d\n", i1);
2932
                                i1 <<= __LOCALE_DATA_WCctype_TI_SHIFT;
2933
                                /*      return *(uint16_t *)(&(tbl->ii[__LOCALE_DATA_WCctype_II_LEN + __LOCALE_DATA_WCctype_TI_LEN + i1 + sc])); */
2934
                                fprintf(stderr, "i2 = %d\n", __LOCALE_DATA_WCctype_II_LEN + __LOCALE_DATA_WCctype_TI_LEN + i1 + sc);
2935
                                fprintf(stderr, "val = %d\n",  tbl->ii[__LOCALE_DATA_WCctype_II_LEN + __LOCALE_DATA_WCctype_TI_LEN + i1 + sc]);
2936
                                /*      return tbl->ut[i1 + sc]; */
2937
 
2938
 
2939
                        }
2940
#endif
2941
                        base_locale_array[base_locale_len].ii_shift = table.ii_shift;
2942
                        base_locale_array[base_locale_len].ti_shift = table.ti_shift;
2943
                        base_locale_array[base_locale_len].ii_len = table.ii_len;
2944
                        base_locale_array[base_locale_len].ti_len = table.ti_len;
2945
                }
2946
        /* ---------------------------------------------------------------------- */
2947
 
2948
                base_locale_array[base_locale_len].num_col_base = num_invariant + num_varying;
2949
                base_locale_array[base_locale_len].max_col_index = final_index;
2950
                base_locale_array[base_locale_len].max_weight = max_weight;
2951
 
2952
                fprintf(stderr, "%s: %6u invariant  %6u varying  %6u derived  %6u total  %6u max weight  %6u wcs2\n",
2953
                                cur_base->name, num_invariant, num_varying,
2954
                                tnumnodes(cur_base->root_derived_wi), final_index, max_weight,
2955
                                wcs2index_count);
2956
 
2957
        }
2958
 
2959
#if 1
2960
        /* ok, now we need to dump out the base and derived tables... */
2961
        /* don't forget to break up collating elements!!! */
2962
 
2963
/*      fprintf(stdout, "**************************************************\n"); */
2964
        /* first pass ... set the invariants */
2965
        for (s = cur_base->section_list ; s ; s = s->next) {
2966
#if 1
2967
                if (s->data_type & DT_REORDER) {
2968
                        fprintf(stderr, "1: skipping reordered section %s\n", ((section_t *)(s->data))->name);
2969
                        continue;
2970
                }
2971
#endif
2972
                h = lli = ((section_t *)(s->data))->itm_list;
2973
                if (!lli) {
2974
                        continue;
2975
                }
2976
                do {
2977
                        if (lli->data_type & DT_WEIGHTED) {
2978
                                w = (weighted_item_t *)(lli->data);
2979
                                if (find_wi_index_reordered(w->symbol)) { /* reordered symbol so skip on first pass */
2980
                                        continue;
2981
                                }
2982
                                if (index2weight_len_inc) {
2983
                                        index2ruleidx_buffer[index2ruleidx_len++] =
2984
                                                add_rule((weighted_item_t *)(lli->data));
2985
                                }
2986
/*                              fprintf(stdout, "%11s: w %6d %6d %s\n", */
2987
/*                                              cur_base->name, lli->idx, final_index_val(w->symbol), w->symbol); */
2988
                        }
2989
                } while ((lli = lli->next) != h);
2990
        }
2991
 
2992
        /* second pass ... set varying */
2993
        for (s = cur_base->section_list ; s ; s = s->next) {
2994
#if 1
2995
                if (s->data_type & DT_REORDER) {
2996
                        fprintf(stderr, "2: skipping reordered section %s\n", ((section_t *)(s->data))->name);
2997
                        continue;
2998
                }
2999
#endif
3000
                h = lli = ((section_t *)(s->data))->itm_list;
3001
                if (!lli) {
3002
                        continue;
3003
                }
3004
                do {
3005
                        if (lli->data_type & DT_WEIGHTED) {
3006
                                w = (weighted_item_t *)(lli->data);
3007
                                if (find_wi_index_reordered(w->symbol)) { /* reordered symbol so skip on first pass */
3008
                                        if (index2weight_len_inc) {
3009
                                                index2ruleidx_buffer[index2ruleidx_len++] =
3010
                                                        add_rule((weighted_item_t *)(lli->data));
3011
                                        }
3012
/*                                      fprintf(stdout, "%11s: r %6d %6d %s\n", */
3013
/*                                                      cur_base->name, lli->idx, final_index_val(w->symbol), w->symbol); */
3014
                                        continue;
3015
                                }
3016
                        }
3017
                } while ((lli = lli->next) != h);
3018
        }
3019
 
3020
        do_starter_lists(cur_base);
3021
 
3022
 
3023
/*      fprintf(stderr,"updated final_index = %d\n", final_index); */
3024
 
3025
        if (rli) {
3026
                base_locale_array[base_locale_len].range_low
3027
                        = strtoul(((range_item_t *)(rli->data))->symbol1 + 2, NULL, 16);
3028
                base_locale_array[base_locale_len].range_count
3029
                        = ((range_item_t *)(rli->data))->length;
3030
                base_locale_array[base_locale_len].range_base_weight = rli->idx;
3031
                base_locale_array[base_locale_len].range_rule_offset = add_range_rule((range_item_t *)(rli->data));
3032
/*              fprintf(stdout, "%11s:   %6d %6d %s %s (%d)\n", */
3033
/*                              "RANGE", rli->idx, -1, */
3034
/*                              ((range_item_t *)(rli->data))->symbol1, */
3035
/*                              ((range_item_t *)(rli->data))->symbol2, */
3036
/*                              ((range_item_t *)(rli->data))->length); */
3037
        }
3038
 
3039
/*      fprintf(stdout,"\nDerived\n\n"); */
3040
 
3041
        /* first, if base name is of the form ll_CC, add a derived locale for it */
3042
        if ((strlen(cur_base->name) == 5)
3043
                && islower(cur_base->name[0])
3044
                && islower(cur_base->name[1])
3045
                && (cur_base->name[2] == '_')
3046
                && isupper(cur_base->name[3])
3047
                && isupper(cur_base->name[4])
3048
                ) {
3049
 
3050
                fprintf(stderr, "adding special derived for %s\n", cur_base->name);
3051
/*      fprintf(stderr,"updated final_index = %d\n", final_index); */
3052
 
3053
 
3054
                assert(der_locale_len+1 < DER_LOCALE_LEN);
3055
 
3056
                der_locale_array[der_locale_len].name = cur_base->name;
3057
                der_locale_array[der_locale_len].base_idx = base_locale_len;
3058
 
3059
                u16_buf[0] = 1;
3060
                u16_buf[1] = 0;
3061
                u16_buf_len = 2;
3062
 
3063
                mm = NULL;
3064
                if ((u16_buf_len > override_len) ||
3065
                        !(mm = memmem(override_buffer, override_len*sizeof(override_buffer[0]),
3066
                                                  u16_buf, u16_buf_len*sizeof(u16_buf[0])))
3067
                        ) {
3068
                        assert(override_len + u16_buf_len < OVERRIDE_LEN);
3069
                        memcpy(override_buffer + override_len, u16_buf, u16_buf_len*sizeof(u16_buf[0]));
3070
                        der_locale_array[der_locale_len].overrides_offset = override_len;
3071
                        override_len += u16_buf_len;
3072
/*                      printf("%s: override_len = %d   u16_buf_len = %d\n", cl->name, override_len, u16_buf_len); */
3073
                } else if (!(u16_buf_len > override_len)) {
3074
                        assert(mm);
3075
                        der_locale_array[der_locale_len].overrides_offset = ((uint16_t *)(mm)) - override_buffer;
3076
/*                      printf("%s: memmem found a match with u16_buf_len = %d\n", cl->name, u16_buf_len); */
3077
                }
3078
                der_locale_array[der_locale_len].multistart_offset
3079
                        = base_locale_array[base_locale_len].multistart_offset;
3080
                der_locale_array[der_locale_len].undefined_idx = final_index_val0("UNDEFINED");
3081
 
3082
                if (!der_locale_array[der_locale_len].undefined_idx) {
3083
                        error_msg("no UNDEFINED definition for %s", cur_base->name);
3084
                }
3085
 
3086
                ++der_locale_len;
3087
        } else {
3088
                fprintf(stderr, "NOT adding special derived for %s\n", cur_base->name);
3089
        }
3090
 
3091
        /* now all the derived... */
3092
        for (cli = cur_base->derived_list ; cli ; cli = cli->next) {
3093
                cl = (col_locale_t *)(cli->data);
3094
                assert(cli->data_type == DT_COL_LOCALE);
3095
 
3096
                assert(der_locale_len+1 < DER_LOCALE_LEN);
3097
 
3098
                der_locale_array[der_locale_len].name = cl->name;
3099
                der_locale_array[der_locale_len].base_idx = base_locale_len;
3100
 
3101
                u16_buf_len = 0;
3102
 
3103
                for (i = 0 ; i < 2 ; i++) {
3104
                        if (i) {
3105
/*                              fprintf(stdout, "   section --- (singles)\n"); */
3106
                                u16_buf[u16_buf_len++] = 1;     /* single */
3107
                        }
3108
                        /* we do this in two passes... first all sequences, then all single reorders */
3109
                        for (s = cl->section_list ; s ; s = s->next) {
3110
/*                              fprintf(stderr, "doing section %s\n", ((section_t *)(s->data))->name); */
3111
                                h = lli = ((section_t *)(s->data))->itm_list;
3112
                                if (!lli) {
3113
/*                                      fprintf(stdout, "EMPTY ITEM LIST IN SECTION %s\n", ((section_t *)(s->data))->name ); */
3114
                                        continue;
3115
                                }
3116
                                assert(u16_buf_len +4 < sizeof(u16_buf)/sizeof(u16_buf[0]));
3117
                                if ((!i && (ll_len(h) > 1) ) || (ll_len(h) == i)) {
3118
                                        if (!i) {
3119
/*                                              fprintf(stdout, "   section ----------------- %d %d\n", i, ll_len(h)); */
3120
                                                u16_buf[u16_buf_len++] = ll_len(h);     /* multi */
3121
                                                assert(lli->data_type & DT_WEIGHTED);
3122
#if 0
3123
                                                u16_buf[u16_buf_len++] = final_index_val(((weighted_item_t *)(lli->data))->symbol);     /* start index */
3124
#endif
3125
                                                u16_buf[u16_buf_len++] = lli->idx; /* start weight */
3126
                                        }
3127
                                        do {
3128
                                                assert(lli->data_type & DT_WEIGHTED);
3129
                                                if (lli->data_type & DT_WEIGHTED) {
3130
/*                                                      fprintf(stdout, "%11s: S %6d %6d %s\n", */
3131
/*                                                                      cl->name, lli->idx, */
3132
/*                                                                      final_index_val(((weighted_item_t *)(lli->data))->symbol), */
3133
/*                                                                      ((weighted_item_t *)(lli->data))->symbol); */
3134
#if 0
3135
                                                        if (i) {
3136
                                                                assert(u16_buf_len +4 < sizeof(u16_buf)/sizeof(u16_buf[0]));
3137
                                                                u16_buf[u16_buf_len++] = final_index_val(((weighted_item_t *)(lli->data))->symbol);
3138
                                                                assert(u16_buf[u16_buf_len-1]);
3139
                                                                u16_buf[u16_buf_len++] = lli->idx; /* weight */
3140
                                                        }
3141
#else
3142
                                                        assert(u16_buf_len +4 < sizeof(u16_buf)/sizeof(u16_buf[0]));
3143
                                                        u16_buf[u16_buf_len++] = final_index_val(((weighted_item_t *)(lli->data))->symbol);
3144
                                                        assert(u16_buf[u16_buf_len-1]);
3145
                                                        if (i) {
3146
                                                                u16_buf[u16_buf_len++] = lli->idx; /* weight */
3147
                                                        }
3148
#endif
3149
                                                        u16_buf[u16_buf_len++] = add_rule((weighted_item_t *)(lli->data));
3150
 
3151
                                                }
3152
                                        } while ((lli = lli->next) != h);
3153
                                }
3154
                        }
3155
                }
3156
                u16_buf[u16_buf_len++] = 0;
3157
 
3158
                mm = NULL;
3159
                if ((u16_buf_len > override_len) ||
3160
                        !(mm = memmem(override_buffer, override_len*sizeof(override_buffer[0]),
3161
                                                  u16_buf, u16_buf_len*sizeof(u16_buf[0])))
3162
                        ) {
3163
                        assert(override_len + u16_buf_len < OVERRIDE_LEN);
3164
                        memcpy(override_buffer + override_len, u16_buf, u16_buf_len*sizeof(u16_buf[0]));
3165
                        der_locale_array[der_locale_len].overrides_offset = override_len;
3166
                        override_len += u16_buf_len;
3167
/*                      printf("%s: override_len = %d   u16_buf_len = %d\n", cl->name, override_len, u16_buf_len); */
3168
                } else if (!(u16_buf_len > override_len)) {
3169
                        assert(mm);
3170
                        der_locale_array[der_locale_len].overrides_offset = ((uint16_t *)(mm)) - override_buffer;
3171
/*                      printf("%s: memmem found a match with u16_buf_len = %d\n", cl->name, u16_buf_len); */
3172
                }
3173
 
3174
                do_starter_lists(cl);
3175
 
3176
                der_locale_array[der_locale_len].undefined_idx = final_index_val0("UNDEFINED");
3177
#if 0
3178
                assert(der_locale_array[der_locale_len].undefined_idx);
3179
                if (!der_locale_array[der_locale_len].undefined_idx) {
3180
                        der_locale_array[der_locale_len].undefined_idx = base_locale_array[base_locale_len].undefined_idx;
3181
                }
3182
#endif
3183
 
3184
                if (!der_locale_array[der_locale_len].undefined_idx) {
3185
                        error_msg("no UNDEFINED definition for %s", cl->name);
3186
                }
3187
 
3188
                ++der_locale_len;
3189
        }
3190
 
3191
#endif
3192
 
3193
#warning handle UNDEFINED idx specially?  what if in only some of derived?
3194
/*      base_locale_array[base_locale_len].undefined_idx = final_index_val0("UNDEFINED"); */
3195
        base_locale_array[base_locale_len].undefined_idx = 0;
3196
 
3197
 
3198
        hdestroy();
3199
 
3200
        ++base_locale_len;
3201
 
3202
/*      if (tnumnodes(cur_base->root_starter_char)) { */
3203
/*              fprintf(stderr, "starter nodes\n"); */
3204
/*              twalk(cur_base->root_starter_char, print_starter_node); */
3205
/*      } */
3206
}
3207
 
3208
static int starter_all_cmp(const void *n1, const void *n2)
3209
{
3210
        const char *s1 = ((weighted_item_t *) n1)->symbol;
3211
        const char *s2 = ((weighted_item_t *) n2)->symbol;
3212
        colitem_t x;
3213
        colitem_t *p;
3214
        int n;
3215
 
3216
        /* sort by 1st char ... then inverse for string */
3217
 
3218
        x.element = NULL;
3219
        if (!is_ucode(s1)) {
3220
                x.string = s1;
3221
                p = tfind(&x, &cur_base->root_colitem, colitem_cmp);
3222
                s1 = (*((colitem_t **) p))->element + 1;
3223
        }
3224
        if (!is_ucode(s2)) {
3225
                x.string = s2;
3226
                p = tfind(&x, &cur_base->root_colitem, colitem_cmp);
3227
                s2 = (*((colitem_t **) p))->element + 1;
3228
        }
3229
 
3230
        /* <U####>< */
3231
        /* 01234567 */
3232
 
3233
        assert(is_ucode(s1));
3234
        assert(is_ucode(s2));
3235
 
3236
        n = strncmp(s1+2, s2+2, 4);
3237
        if (n) {
3238
                return n;
3239
        }
3240
 
3241
        s1 += 7;
3242
        s2 += 7;
3243
 
3244
        return strcmp(s2, s1);
3245
}
3246
 
3247
static void print_starter_all_node(const void *ptr, VISIT order, int level)
3248
{
3249
    const weighted_item_t *w = *(const weighted_item_t **) ptr;
3250
        colitem_t *ci;
3251
        void *p;
3252
        int n;
3253
        colitem_t x;
3254
 
3255
    if (order == postorder || order == leaf)  {
3256
#if 0
3257
                if ((n = is_ucode(w->symbol)) != 0) {
3258
                        printf(" %s\n", w->symbol);
3259
                } else {
3260
                        x.string = w->symbol;
3261
                        x.element = NULL;
3262
                        p = tfind(&x, &cur_base->root_colitem, colitem_cmp);
3263
                        assert(p);
3264
                        ci = *((colitem_t **) p);
3265
                        printf("%s = %s\n", ci->element, w->symbol);
3266
                }
3267
#else
3268
                printf("%s|", w->symbol);
3269
/*              if ((n = is_ucode(w->symbol)) != 0) { */
3270
/*                      printf("\n"); */
3271
/*              } */
3272
#endif
3273
        }
3274
}
3275
 
3276
static void process_starter_node(const void *ptr, VISIT order, int level)
3277
{
3278
    const weighted_item_t *w = *(const weighted_item_t **) ptr;
3279
        colitem_t *ci;
3280
        void *p;
3281
        int n;
3282
        colitem_t x;
3283
        const char *s;
3284
        char buf[32];
3285
 
3286
        /* store index of collation item followed by (unprefixed) nul-terminated string */
3287
    if (order == postorder || order == leaf)  {
3288
                if ((n = is_ucode(w->symbol)) != 0) {
3289
                        u16_buf[u16_buf_len++] = final_index_val(w->symbol);
3290
                        assert(u16_buf[u16_buf_len-1]);
3291
                        u16_buf[u16_buf_len++] = 0;
3292
                        if (++u16_starter < base_locale_array[base_locale_len].num_starters) {
3293
                                u16_buf[u16_starter] = u16_buf_len;
3294
                        }
3295
/*                      fprintf(stderr, "ucode - %d %d\n", u16_buf[u16_starter-1], u16_buf_len); */
3296
                } else {
3297
                        x.string = w->symbol;
3298
                        x.element = NULL;
3299
                        p = tfind(&x, &cur_base->root_colitem, colitem_cmp);
3300
                        assert(p);
3301
                        ci = *((colitem_t **) p);
3302
                        s = ci->element;
3303
                        u16_buf[u16_buf_len++] = final_index_val(w->symbol);
3304
                        assert(u16_buf[u16_buf_len-1]);
3305
                        assert(*s == '"');
3306
                        n = is_ucode(++s);
3307
/*                      fprintf(stderr, "s is |%s| with len %d (%d)\n", s, strlen(s), n); */
3308
                        assert(n);
3309
                        s += n;
3310
                        while (*s != '"') {
3311
                                n = is_ucode(s);
3312
                                assert(n);
3313
                                strncpy(buf, s, n+1);
3314
                                buf[n] = 0;
3315
/*                              fprintf(stderr, "buf is |%s| with len %d (%d)\n", buf, strlen(buf), n); */
3316
                                u16_buf[u16_buf_len++] = final_index_val(buf);
3317
                                assert(u16_buf[u16_buf_len-1]);
3318
                                s += n;
3319
                        }
3320
                        u16_buf[u16_buf_len++] = 0;
3321
                }
3322
        }
3323
}
3324
 
3325
static void **p_cl_root_starter_all;
3326
 
3327
static void complete_starter_node(const void *ptr, VISIT order, int level)
3328
{
3329
        weighted_item_t w;
3330
        weighted_item_t *p;
3331
 
3332
    if (order == postorder || order == leaf)  {
3333
                w.symbol = *(const char **) ptr;
3334
                w.weight = NULL;
3335
                if (!tfind(&w, p_cl_root_starter_all, starter_all_cmp)) {
3336
                        p = xmalloc(sizeof(weighted_item_t));
3337
                        p->symbol = w.symbol;
3338
                        p->weight = NULL;
3339
/*                      fprintf(stderr, "complete_starter_node: %s\n", *(const char **) ptr); */
3340
                        if (!tsearch(p, p_cl_root_starter_all, starter_all_cmp)) {
3341
                                error_msg("OUT OF MEMORY");
3342
                        }
3343
                }
3344
    }
3345
}
3346
 
3347
static void do_starter_lists(col_locale_t *cl)
3348
{
3349
        ll_item_t *s;
3350
        ll_item_t *h;
3351
        ll_item_t *lli;
3352
        col_locale_t *c;
3353
        colitem_t *ci;
3354
        weighted_item_t *w;
3355
        void *p;
3356
        char buf[32];
3357
        int n;
3358
        colitem_t x;
3359
        void *mm;
3360
 
3361
        c = cl;
3362
        if (c != cur_base) {
3363
                c = cur_base;
3364
        }
3365
 
3366
/*      printf("STARTERS %s --------------------\n", cl->name); */
3367
 LOOP:
3368
        for (s = c->section_list ; s ; s = s->next) {
3369
                h = lli = ((section_t *)(s->data))->itm_list;
3370
                if (!lli) {
3371
                        continue;
3372
                }
3373
                do {
3374
                        if (lli->data_type & DT_WEIGHTED) {
3375
                                w = (weighted_item_t *)(lli->data);
3376
                                ci = NULL;
3377
                                if ((n = is_ucode(w->symbol)) != 0) {
3378
                                        strcpy(buf, w->symbol);
3379
                                } else {
3380
/*                                      fprintf(stdout, "looking for |%s|\n", w->symbol); */
3381
                                        x.string = w->symbol;
3382
                                        x.element = NULL;
3383
                                        p = tfind(&x, &cur_base->root_colitem, colitem_cmp);
3384
                                        if (!p) {
3385
/*                                              fprintf(stderr, "Whoa... processing starters for %s and couldn't find %s\n", */
3386
/*                                                              cl->name, w->symbol); */
3387
                                                continue;
3388
                                        }
3389
                                        ci = *((colitem_t **) p);
3390
                                        if (!ci->element) {     /* just a collating symbol */
3391
                                                continue;
3392
                                        }
3393
                                        assert(ci->element[0] == '"');
3394
                                        n = is_ucode(ci->element + 1);
3395
                                        assert(n);
3396
                                        strncpy(buf, ci->element + 1, n);
3397
                                }
3398
                                if ((tfind(buf, &cur_base->root_starter_char, sym_cmp)) != NULL) {
3399
/*                                      fprintf(stdout, "adding from %s: %s", c->name, w->symbol); */
3400
/*                                      if (ci) { */
3401
/*                                              fprintf(stdout, " = %s", ci->element); */
3402
/*                                      } */
3403
/*                                      fprintf(stdout, "\n"); */
3404
 
3405
                                        if (!tsearch(w, &cl->root_starter_all, starter_all_cmp)) {
3406
                                                error_msg("OUT OF MEMORY");
3407
                                        }
3408
                                }
3409
                        }
3410
                } while ((lli = lli->next) != h);
3411
        }
3412
 
3413
        if (c != cl) {
3414
                c = cl;
3415
                goto LOOP;
3416
        }
3417
 
3418
        p_cl_root_starter_all = &cl->root_starter_all;
3419
        twalk(cur_base->root_starter_char, complete_starter_node);
3420
 
3421
        if (cl == cur_base) {
3422
                base_locale_array[base_locale_len].num_starters = tnumnodes(cur_base->root_starter_char);
3423
        }
3424
 
3425
#if 0
3426
        printf("\nNow walking tree...\n\n");
3427
        twalk(cl->root_starter_all, print_starter_all_node);
3428
        printf("\n\n");
3429
 
3430
#endif
3431
        u16_starter = 0;
3432
        u16_buf[0] = u16_buf_len = base_locale_array[base_locale_len].num_starters;
3433
        twalk(cl->root_starter_all, process_starter_node);
3434
/*      fprintf(stderr, "s=%d n=%d\n", u16_starter,  base_locale_array[base_locale_len].num_starters); */
3435
        assert(u16_starter == base_locale_array[base_locale_len].num_starters);
3436
 
3437
#if 0
3438
        { int i;
3439
        for (i=0 ; i < u16_buf_len ; i++) {
3440
                fprintf(stderr, "starter %2d: %d - %#06x\n", i, u16_buf[i], u16_buf[i]);
3441
        }}
3442
#endif
3443
 
3444
        mm = NULL;
3445
        if (u16_buf_len) {
3446
/*              assert(base_locale_array[base_locale_len].num_starters); */
3447
                if ((u16_buf_len > multistart_len) ||
3448
                        !(mm = memmem(multistart_buffer, multistart_len*sizeof(multistart_buffer[0]),
3449
                                                  u16_buf, u16_buf_len*sizeof(u16_buf[0])))
3450
                        ) {
3451
                        assert(multistart_len + u16_buf_len < MULTISTART_LEN);
3452
                        memcpy(multistart_buffer + multistart_len, u16_buf, u16_buf_len*sizeof(u16_buf[0]));
3453
                        if (cl == cur_base) {
3454
                                base_locale_array[base_locale_len].multistart_offset = multistart_len;
3455
                        } else {
3456
                                der_locale_array[der_locale_len].multistart_offset = multistart_len;
3457
                        }
3458
                        multistart_len += u16_buf_len;
3459
/*                      fprintf(stderr, "%s: multistart_len = %d   u16_buf_len = %d\n", cl->name, multistart_len, u16_buf_len); */
3460
                } else if (!(u16_buf_len > multistart_len)) {
3461
                        assert(mm);
3462
                        if (cl == cur_base) {
3463
                                base_locale_array[base_locale_len].multistart_offset = ((uint16_t *)(mm)) - multistart_buffer;
3464
                        } else {
3465
                                der_locale_array[der_locale_len].multistart_offset = ((uint16_t *)(mm)) - multistart_buffer;
3466
                        }
3467
/*                      fprintf(stderr, "%s: memmem found a match with u16_buf_len = %d\n", cl->name, u16_buf_len); */
3468
                }
3469
        } else {
3470
                assert(!base_locale_array[base_locale_len].num_starters);
3471
        }
3472
 
3473
/*      printf("u16_buf_len = %d\n", u16_buf_len); */
3474
 
3475
/*      printf("STARTERS %s DONE ---------------\n", cl->name); */
3476
}
3477
 
3478
 
3479
/* For sorting the blocks of unsigned chars. */
3480
static size_t nu_val;
3481
 
3482
int nu_memcmp(const void *a, const void *b)
3483
{
3484
        return memcmp(*(unsigned char**)a, *(unsigned char**)b, nu_val * sizeof(tbl_item));
3485
}
3486
 
3487
 
3488
size_t newopt(tbl_item *ut, size_t usize, int shift, table_data *tbl)
3489
{
3490
        static int recurse = 0;
3491
        tbl_item *ti[RANGE];    /* table index */
3492
        size_t numblocks;
3493
        size_t blocksize;
3494
        size_t uniq;
3495
        size_t i, j;
3496
        size_t smallest, t;
3497
        tbl_item *ii_save;
3498
        int uniqblock[1 << (8*sizeof(tbl_item) - 1)];
3499
        tbl_item uit[RANGE];
3500
        int shift2;
3501
 
3502
        if (shift > 15) {
3503
                return SIZE_MAX;
3504
        }
3505
 
3506
        ii_save = NULL;
3507
        blocksize = 1 << shift;
3508
        numblocks = usize >> shift;
3509
 
3510
        /* init table index */
3511
        for (i=j=0 ; i < numblocks ; i++) {
3512
                ti[i] = ut + j;
3513
                j += blocksize;
3514
        }
3515
 
3516
        /* sort */
3517
        nu_val = blocksize;
3518
        qsort(ti, numblocks, sizeof(unsigned char *), nu_memcmp);
3519
 
3520
        uniq = 1;
3521
        uit[(ti[0]-ut)/blocksize] = 0;
3522
        for (i=1 ; i < numblocks ; i++) {
3523
                if (memcmp(ti[i-1], ti[i], blocksize*sizeof(tbl_item)) < 0) {
3524
                        if (++uniq > (1 << (8*sizeof(tbl_item) - 1))) {
3525
                                break;
3526
                        }
3527
                        uniqblock[uniq - 1] = i;
3528
                }
3529
#if 1
3530
                else if (memcmp(ti[i-1], ti[i], blocksize*sizeof(tbl_item)) > 0) {
3531
                        printf("bad sort %i!\n", i);
3532
                        abort();
3533
                }
3534
#endif
3535
                uit[(ti[i]-ut)/blocksize] = uniq - 1;
3536
        }
3537
 
3538
        smallest = SIZE_MAX;
3539
        shift2 = -1;
3540
        if (uniq <= (1 << (8*sizeof(tbl_item) - 1))) {
3541
                smallest = numblocks + uniq * blocksize;
3542
                if (!recurse) {
3543
                        ++recurse;
3544
                        for (j=1 ; j < 14 ; j++) {
3545
                                if ((numblocks >> j) < 2) break;
3546
                                if (tbl) {
3547
                                        ii_save = tbl->ii;
3548
                                        tbl->ii = NULL;
3549
                                }
3550
                                if ((t = newopt(uit, numblocks, j, tbl)) < SIZE_MAX) {
3551
                                        t += uniq * blocksize;
3552
                                }
3553
                                if (tbl) {
3554
                                        tbl->ii = ii_save;
3555
                                }
3556
                                if (smallest >= t) {
3557
                                        shift2 = j;
3558
                                        smallest = t;
3559
/*                                      if (!tbl->ii) { */
3560
/*                                              printf("ishift %u  tshift %u  size %u\n", */
3561
/*                                                         shift2, shift, t); */
3562
/*                                      } */
3563
/*                              } else { */
3564
/*                                      break; */
3565
                                }
3566
                        }
3567
                        --recurse;
3568
                }
3569
        } else {
3570
                return SIZE_MAX;
3571
        }
3572
 
3573
        if (tbl->ii) {
3574
                if (recurse) {
3575
                        tbl->ii_shift = shift;
3576
                        tbl->ii_len = numblocks;
3577
                        memcpy(tbl->ii, uit, numblocks*sizeof(tbl_item));
3578
                        tbl->ti = tbl->ii + tbl->ii_len;
3579
                        tbl->ti_len = uniq * blocksize;
3580
                        for (i=0 ; i < uniq ; i++) {
3581
                                memcpy(tbl->ti + i * blocksize, ti[uniqblock[i]], blocksize*sizeof(tbl_item));
3582
                        }
3583
                } else {
3584
                        ++recurse;
3585
/*                      printf("setting ishift %u  tshift %u\n", shift2, shift); */
3586
                        newopt(uit, numblocks, shift2, tbl);
3587
                        --recurse;
3588
                        tbl->ti_shift = shift;
3589
                        tbl->ut_len = uniq * blocksize;
3590
                        tbl->ut = tbl->ti + tbl->ti_len;
3591
                        for (i=0 ; i < uniq ; i++) {
3592
                                memcpy(tbl->ut + i * blocksize, ti[uniqblock[i]], blocksize*sizeof(tbl_item));
3593
                        }
3594
                }
3595
        }
3596
        return smallest;
3597
}
3598
 
3599
static const int rule2val[8] = {
3600
        -1,
3601
        (1 << 14),                                      /* forward */
3602
        (2 << 14),                                      /* position */
3603
        (3 << 14),                                      /* forward,position */
3604
        0,                                                       /* backward */
3605
        -1,
3606
        -1,
3607
        -1,
3608
};
3609
 
3610
 
3611
static int final_index_val_x(const char *s, const char *sym)
3612
{
3613
        int r;
3614
 
3615
        if (!(r = final_index_val0(s))) {
3616
                if (!strcmp(s, "IGNORE")) {
3617
                        r = 0;
3618
                } else if (!strcmp(s, "..") || !strcmp(sym, "RANGE")) {
3619
                        if (*sym == '.') {
3620
                                final_index_val(sym); /* make sure it's known */
3621
                        }
3622
                        r = 0x3fff;
3623
                } else if (!strcmp(s, ".")) {
3624
                        r = 0x3ffe;
3625
                } else {
3626
                        error_msg("can't find final index: %s", s);
3627
                }
3628
        }
3629
        return r;
3630
}
3631
 
3632
/* store rule2val in 2 high bits and collation index in lower.
3633
 * for sort strings, store (offset from base) + max colindex as index.
3634
 */
3635
static unsigned int add_rule(weighted_item_t *wi)
3636
{
3637
        weight_t *w = wi->weight;
3638
        int i, j, r, n;
3639
        uint16_t rbuf[MAX_COLLATION_WEIGHTS];
3640
        uint16_t ws_buf[32];
3641
        void *mm;
3642
        char buf[32];
3643
        const char *s;
3644
        const char *e;
3645
 
3646
        for (i=0 ; i < MAX_COLLATION_WEIGHTS ; i++) {
3647
                rbuf[i] = rule2val[R_FORWARD]; /* set a default to forward-ignore */
3648
        }
3649
 
3650
        if (base_locale_array[base_locale_len].num_weights < w->num_weights) {
3651
                base_locale_array[base_locale_len].num_weights = w->num_weights;
3652
        }
3653
 
3654
        for (i=0 ; i < w->num_weights ; i++) {
3655
                assert(rule2val[(int)(w->rule[i])] >= 0);
3656
                assert(w->colitem[i] && *w->colitem[i]);
3657
                if (*w->colitem[i] == '"') { /* string... */
3658
                        s = w->colitem[i] + 1;
3659
                        assert(*s == '<');
3660
                        n = 0;
3661
                        do {
3662
                                e = s;
3663
                                do {
3664
                                        if (*e == '/') {
3665
                                                e += 2;
3666
                                                continue;
3667
                                        }
3668
                                } while (*e++ != '>');
3669
                                assert(((size_t)(e-s) < sizeof(buf)));
3670
                                memcpy(buf, s, (size_t)(e-s));
3671
                                buf[(size_t)(e-s)] = 0;
3672
 
3673
                                r = final_index_val_x(buf, wi->symbol);
3674
                                assert(n + 1 < sizeof(ws_buf)/sizeof(ws_buf[0]));
3675
                                ws_buf[n++] = r | rule2val[(int)(w->rule[i])];
3676
 
3677
                                s = e;
3678
                        } while (*s != '"');
3679
                        ws_buf[n++] = 0; /* terminator */
3680
 
3681
                        mm = memmem(weightstr_buffer, weightstr_len*sizeof(weightstr_buffer[0]),
3682
                                                ws_buf, n*sizeof(ws_buf[0]));
3683
 
3684
                        if (!mm) {
3685
                                assert(weightstr_len + n < WEIGHTSTR_LEN);
3686
                                memcpy(weightstr_buffer + weightstr_len, ws_buf, n*sizeof(ws_buf[0]));
3687
                                mm = weightstr_buffer + weightstr_len;
3688
                                weightstr_len += n;
3689
                        }
3690
                        r = (((uint16_t *)(mm)) - weightstr_buffer)
3691
                                + base_locale_array[base_locale_len].max_col_index + 2;
3692
                        assert(r < (1 << 14));
3693
                        rbuf[i] = r | rule2val[(int)(w->rule[i])];
3694
                } else {                                /* item */
3695
                        r = final_index_val_x(w->colitem[i], wi->symbol);
3696
                        rbuf[i] = r | rule2val[(int)(w->rule[i])];
3697
                }
3698
        }
3699
 
3700
        for (i=0 ; i < ruletable_len ; i += MAX_COLLATION_WEIGHTS) {
3701
                if (!memcmp(ruletable_buffer + i, rbuf, MAX_COLLATION_WEIGHTS*sizeof(ruletable_buffer[0]))) {
3702
                        return i/MAX_COLLATION_WEIGHTS;
3703
                }
3704
        }
3705
 
3706
        memcpy(ruletable_buffer + ruletable_len, rbuf, MAX_COLLATION_WEIGHTS*sizeof(ruletable_buffer[0]));
3707
        ruletable_len += MAX_COLLATION_WEIGHTS;
3708
 
3709
        return  (ruletable_len / MAX_COLLATION_WEIGHTS)-1;
3710
}
3711
 
3712
static unsigned int add_range_rule(range_item_t *ri)
3713
{
3714
        weight_t *w = ri->weight;
3715
        int i, j, r, n;
3716
        uint16_t rbuf[MAX_COLLATION_WEIGHTS];
3717
        uint16_t ws_buf[32];
3718
        void *mm;
3719
        char buf[32];
3720
        const char *s;
3721
        const char *e;
3722
 
3723
        for (i=0 ; i < MAX_COLLATION_WEIGHTS ; i++) {
3724
                rbuf[i] = rule2val[R_FORWARD]; /* set a default to forward-ignore */
3725
        }
3726
 
3727
        if (base_locale_array[base_locale_len].num_weights < w->num_weights) {
3728
                base_locale_array[base_locale_len].num_weights = w->num_weights;
3729
        }
3730
 
3731
        for (i=0 ; i < w->num_weights ; i++) {
3732
                assert(rule2val[(int)(w->rule[i])] >= 0);
3733
                assert(w->colitem[i] && *w->colitem[i]);
3734
                if (*w->colitem[i] == '"') { /* string... */
3735
                        s = w->colitem[i] + 1;
3736
                        assert(*s == '<');
3737
                        n = 0;
3738
                        do {
3739
                                e = s;
3740
                                do {
3741
                                        if (*e == '/') {
3742
                                                e += 2;
3743
                                                continue;
3744
                                        }
3745
                                } while (*e++ != '>');
3746
                                assert(((size_t)(e-s) < sizeof(buf)));
3747
                                memcpy(buf, s, (size_t)(e-s));
3748
                                buf[(size_t)(e-s)] = 0;
3749
 
3750
                                r = final_index_val_x(buf, "RANGE");
3751
                                assert(n + 1 < sizeof(ws_buf)/sizeof(ws_buf[0]));
3752
                                ws_buf[n++] = r | rule2val[(int)(w->rule[i])];
3753
 
3754
                                s = e;
3755
                        } while (*s != '"');
3756
                        ws_buf[n++] = 0; /* terminator */
3757
 
3758
                        mm = memmem(weightstr_buffer, weightstr_len*sizeof(weightstr_buffer[0]),
3759
                                                ws_buf, n*sizeof(ws_buf[0]));
3760
 
3761
                        if (!mm) {
3762
                                assert(weightstr_len + n < WEIGHTSTR_LEN);
3763
                                memcpy(weightstr_buffer + weightstr_len, ws_buf, n*sizeof(ws_buf[0]));
3764
                                mm = weightstr_buffer + weightstr_len;
3765
                                weightstr_len += n;
3766
                        }
3767
                        r = (((uint16_t *)(mm)) - weightstr_buffer)
3768
                                + base_locale_array[base_locale_len].max_col_index + 2;
3769
                        assert(r < (1 << 14));
3770
                        rbuf[i] = r | rule2val[(int)(w->rule[i])];
3771
                } else {                                /* item */
3772
                        r = final_index_val_x(w->colitem[i], "RANGE");
3773
                        rbuf[i] = r | rule2val[(int)(w->rule[i])];
3774
                }
3775
        }
3776
 
3777
        for (i=0 ; i < ruletable_len ; i += MAX_COLLATION_WEIGHTS) {
3778
                if (!memcmp(ruletable_buffer + i, rbuf, MAX_COLLATION_WEIGHTS*sizeof(ruletable_buffer[0]))) {
3779
                        return i/MAX_COLLATION_WEIGHTS;
3780
                }
3781
        }
3782
 
3783
        memcpy(ruletable_buffer + ruletable_len, rbuf, MAX_COLLATION_WEIGHTS*sizeof(ruletable_buffer[0]));
3784
        ruletable_len += MAX_COLLATION_WEIGHTS;
3785
 
3786
        return  (ruletable_len / MAX_COLLATION_WEIGHTS)-1;
3787
}
3788
 
3789
#define DUMPn(X) fprintf(stderr, "%10d-%-.20s", base_locale_array[n]. X, #X);
3790
 
3791
static void dump_base_locale(int n)
3792
{
3793
        assert(n < base_locale_len);
3794
 
3795
        fprintf(stderr, "Base Locale: %s\n", base_locale_array[n].name);
3796
 
3797
        DUMPn(num_weights);
3798
 
3799
        DUMPn(ii_shift);
3800
        DUMPn(ti_shift);
3801
        DUMPn(ii_len);
3802
        DUMPn(ti_len);
3803
        DUMPn(max_weight);
3804
        fprintf(stderr, "\n");
3805
        DUMPn(num_col_base);
3806
        DUMPn(max_col_index);
3807
        DUMPn(undefined_idx);
3808
        DUMPn(range_low);
3809
        DUMPn(range_count);
3810
        fprintf(stderr, "\n");
3811
        DUMPn(range_base_weight);
3812
        DUMPn(num_starters);
3813
 
3814
        fprintf(stderr, "\n");
3815
        DUMPn(range_rule_offset);
3816
        DUMPn(wcs2colidt_offset);
3817
        DUMPn(index2weight_offset);
3818
        fprintf(stderr, "\n");
3819
        DUMPn(index2ruleidx_offset);
3820
        DUMPn(multistart_offset);
3821
        fprintf(stderr, "\n");
3822
}
3823
 
3824
#undef DUMPn
3825
#define DUMPn(X) fprintf(stderr, "%10d-%s", der_locale_array[n]. X, #X);
3826
 
3827
static void dump_der_locale(int n)
3828
{
3829
        assert(n < der_locale_len);
3830
 
3831
        fprintf(stderr, "Derived Locale: %s (%.12s)",
3832
                        der_locale_array[n].name,
3833
                        base_locale_array[der_locale_array[n].base_idx].name);
3834
 
3835
 
3836
        DUMPn(base_idx);
3837
 
3838
        DUMPn(undefined_idx);
3839
 
3840
        DUMPn(overrides_offset);
3841
        DUMPn(multistart_offset);
3842
 
3843
        fprintf(stderr, "\n");
3844
}
3845
 
3846
 
3847
static unsigned long collate_pos;
3848
 
3849
static void dump_u16_array(FILE *fp, uint16_t *u, int len, const char *name)
3850
{
3851
        int i;
3852
 
3853
        fprintf(fp, "\t/* %8lu %s */\n", collate_pos, name);
3854
        for (i=0 ; i < len ; i++) {
3855
                if (!(i & 7)) {
3856
                        fprintf(fp, "\n\t");
3857
                }
3858
                fprintf(fp,"  %#06x,", (unsigned int)(u[i]));
3859
        }
3860
        fprintf(fp,"\n");
3861
        collate_pos += len;
3862
}
3863
 
3864
#define OUT_U16C(X,N) fprintf(fp,"\t%10d, /* %8lu %s */\n", X, collate_pos++, N); 
3865
 
3866
static void dump_collate(FILE *fp)
3867
{
3868
        int n;
3869
 
3870
        fprintf(fp, "const uint16_t __locale_collate_tbl[] = {\n");
3871
 
3872
        OUT_U16C(base_locale_len, "numbef of base locales");
3873
        OUT_U16C(der_locale_len, "number of derived locales");
3874
        OUT_U16C(MAX_COLLATION_WEIGHTS, "max collation weights");
3875
        OUT_U16C(index2weight_len, "number of index2{weight|ruleidx} elements");
3876
        OUT_U16C(weightstr_len, "number of weightstr elements");
3877
        OUT_U16C(multistart_len, "number of multistart elements");
3878
        OUT_U16C(override_len, "number of override elements");
3879
        OUT_U16C(ruletable_len, "number of ruletable elements");
3880
 
3881
#undef DUMPn
3882
#define DUMPn(X) fprintf(fp, "\t%10d, /* %8lu %s */\n", base_locale_array[n]. X, collate_pos++, #X);
3883
        for (n=0 ; n < base_locale_len ; n++) {
3884
                unsigned wcs2colidt_offset_low = base_locale_array[n].wcs2colidt_offset & 0xffffU;
3885
                unsigned wcs2colidt_offset_hi = base_locale_array[n].wcs2colidt_offset >> 16;
3886
                fprintf(fp, "\t/* Base Locale %2d: %s */\n", n, base_locale_array[n].name);
3887
                DUMPn(num_weights);
3888
                DUMPn(num_starters);
3889
                DUMPn(ii_shift);
3890
                DUMPn(ti_shift);
3891
                DUMPn(ii_len);
3892
                DUMPn(ti_len);
3893
                DUMPn(max_weight);
3894
                DUMPn(num_col_base);
3895
                DUMPn(max_col_index);
3896
                DUMPn(undefined_idx);
3897
                DUMPn(range_low);
3898
                DUMPn(range_count);
3899
                DUMPn(range_base_weight);
3900
                DUMPn(range_rule_offset);
3901
                DUMPn(index2weight_offset);
3902
                DUMPn(index2ruleidx_offset);
3903
                DUMPn(multistart_offset);
3904
#undef DUMPn
3905
#define DUMPn(X) fprintf(fp, "\t%10d, /* %8lu %s */\n", X, collate_pos++, #X);
3906
                DUMPn(wcs2colidt_offset_low);
3907
                DUMPn(wcs2colidt_offset_hi);
3908
        }
3909
#undef DUMPn            
3910
 
3911
 
3912
        fprintf(fp, "#define COL_IDX_C     %5d\n", 0);
3913
#define DUMPn(X) fprintf(fp, "\t%10d, /* %8lu %s */\n", der_locale_array[n]. X, collate_pos++, #X);
3914
        for (n=0 ; n < der_locale_len ; n++) {
3915
                fprintf(fp, "#define COL_IDX_%s %5d\n", der_locale_array[n].name, n+1);
3916
                fprintf(fp, "\t/* Derived Locale %4d: %s (%.12s) */\n",
3917
                                n, der_locale_array[n].name,
3918
                                base_locale_array[der_locale_array[n].base_idx].name);
3919
                DUMPn(base_idx);
3920
                DUMPn(undefined_idx);
3921
                DUMPn(overrides_offset);
3922
                DUMPn(multistart_offset);
3923
        }
3924
#undef DUMPn
3925
 
3926
        fprintf(fp, "\n");
3927
 
3928
        dump_u16_array(fp, index2weight_buffer, index2weight_len, "index2weight");
3929
        dump_u16_array(fp, index2ruleidx_buffer, index2ruleidx_len, "index2ruleidx");
3930
        dump_u16_array(fp, multistart_buffer, multistart_len, "multistart");
3931
        dump_u16_array(fp, override_buffer, override_len, "override");
3932
        dump_u16_array(fp, ruletable_buffer, ruletable_len, "ruletable");
3933
        dump_u16_array(fp, weightstr_buffer, weightstr_len, "weightstr");
3934
        dump_u16_array(fp, wcs2colidt_buffer, wcs2colidt_len, "wcs2colidt");
3935
 
3936
 
3937
        fprintf(fp,"}; /* %8lu */\n", collate_pos);
3938
 
3939
        fprintf(fp,"#define __lc_collate_data_LEN  %d\n\n", collate_pos);
3940
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.