OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgo/] [go/] [exp/] [norm/] [maketables.go] - Blame information for rev 858

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 747 jeremybenn
// Copyright 2011 The Go Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4
 
5
// Normalization table generator.
6
// Data read from the web.
7
 
8
package main
9
 
10
import (
11
        "bufio"
12
        "bytes"
13
        "flag"
14
        "fmt"
15
        "io"
16
        "log"
17
        "net/http"
18
        "os"
19
        "regexp"
20
        "strconv"
21
        "strings"
22
)
23
 
24
func main() {
25
        flag.Parse()
26
        loadUnicodeData()
27
        loadCompositionExclusions()
28
        completeCharFields(FCanonical)
29
        completeCharFields(FCompatibility)
30
        verifyComputed()
31
        printChars()
32
        makeTables()
33
        testDerived()
34
}
35
 
36
var url = flag.String("url",
37
        "http://www.unicode.org/Public/6.0.0/ucd/",
38
        "URL of Unicode database directory")
39
var tablelist = flag.String("tables",
40
        "all",
41
        "comma-separated list of which tables to generate; "+
42
                "can be 'decomp', 'recomp', 'info' and 'all'")
43
var test = flag.Bool("test",
44
        false,
45
        "test existing tables; can be used to compare web data with package data")
46
var verbose = flag.Bool("verbose",
47
        false,
48
        "write data to stdout as it is parsed")
49
var localFiles = flag.Bool("local",
50
        false,
51
        "data files have been copied to the current directory; for debugging only")
52
 
53
var logger = log.New(os.Stderr, "", log.Lshortfile)
54
 
55
// UnicodeData.txt has form:
56
//      0037;DIGIT SEVEN;Nd;0;EN;;7;7;7;N;;;;;
57
//      007A;LATIN SMALL LETTER Z;Ll;0;L;;;;;N;;;005A;;005A
58
// See http://unicode.org/reports/tr44/ for full explanation
59
// The fields:
60
const (
61
        FCodePoint = iota
62
        FName
63
        FGeneralCategory
64
        FCanonicalCombiningClass
65
        FBidiClass
66
        FDecompMapping
67
        FDecimalValue
68
        FDigitValue
69
        FNumericValue
70
        FBidiMirrored
71
        FUnicode1Name
72
        FISOComment
73
        FSimpleUppercaseMapping
74
        FSimpleLowercaseMapping
75
        FSimpleTitlecaseMapping
76
        NumField
77
 
78
        MaxChar = 0x10FFFF // anything above this shouldn't exist
79
)
80
 
81
// Quick Check properties of runes allow us to quickly
82
// determine whether a rune may occur in a normal form.
83
// For a given normal form, a rune may be guaranteed to occur
84
// verbatim (QC=Yes), may or may not combine with another
85
// rune (QC=Maybe), or may not occur (QC=No).
86
type QCResult int
87
 
88
const (
89
        QCUnknown QCResult = iota
90
        QCYes
91
        QCNo
92
        QCMaybe
93
)
94
 
95
func (r QCResult) String() string {
96
        switch r {
97
        case QCYes:
98
                return "Yes"
99
        case QCNo:
100
                return "No"
101
        case QCMaybe:
102
                return "Maybe"
103
        }
104
        return "***UNKNOWN***"
105
}
106
 
107
const (
108
        FCanonical     = iota // NFC or NFD
109
        FCompatibility        // NFKC or NFKD
110
        FNumberOfFormTypes
111
)
112
 
113
const (
114
        MComposed   = iota // NFC or NFKC
115
        MDecomposed        // NFD or NFKD
116
        MNumberOfModes
117
)
118
 
119
// This contains only the properties we're interested in.
120
type Char struct {
121
        name          string
122
        codePoint     rune  // if zero, this index is not a valid code point.
123
        ccc           uint8 // canonical combining class
124
        excludeInComp bool  // from CompositionExclusions.txt
125
        compatDecomp  bool  // it has a compatibility expansion
126
 
127
        forms [FNumberOfFormTypes]FormInfo // For FCanonical and FCompatibility
128
 
129
        state State
130
}
131
 
132
var chars = make([]Char, MaxChar+1)
133
 
134
func (c Char) String() string {
135
        buf := new(bytes.Buffer)
136
 
137
        fmt.Fprintf(buf, "%U [%s]:\n", c.codePoint, c.name)
138
        fmt.Fprintf(buf, "  ccc: %v\n", c.ccc)
139
        fmt.Fprintf(buf, "  excludeInComp: %v\n", c.excludeInComp)
140
        fmt.Fprintf(buf, "  compatDecomp: %v\n", c.compatDecomp)
141
        fmt.Fprintf(buf, "  state: %v\n", c.state)
142
        fmt.Fprintf(buf, "  NFC:\n")
143
        fmt.Fprint(buf, c.forms[FCanonical])
144
        fmt.Fprintf(buf, "  NFKC:\n")
145
        fmt.Fprint(buf, c.forms[FCompatibility])
146
 
147
        return buf.String()
148
}
149
 
150
// In UnicodeData.txt, some ranges are marked like this:
151
//      3400;;Lo;0;L;;;;;N;;;;;
152
//      4DB5;;Lo;0;L;;;;;N;;;;;
153
// parseCharacter keeps a state variable indicating the weirdness.
154
type State int
155
 
156
const (
157
        SNormal State = iota // known to be zero for the type
158
        SFirst
159
        SLast
160
        SMissing
161
)
162
 
163
var lastChar = rune('\u0000')
164
 
165
func (c Char) isValid() bool {
166
        return c.codePoint != 0 && c.state != SMissing
167
}
168
 
169
type FormInfo struct {
170
        quickCheck [MNumberOfModes]QCResult // index: MComposed or MDecomposed
171
        verified   [MNumberOfModes]bool     // index: MComposed or MDecomposed
172
 
173
        combinesForward  bool // May combine with rune on the right
174
        combinesBackward bool // May combine with rune on the left
175
        isOneWay         bool // Never appears in result
176
        inDecomp         bool // Some decompositions result in this char.
177
        decomp           Decomposition
178
        expandedDecomp   Decomposition
179
}
180
 
181
func (f FormInfo) String() string {
182
        buf := bytes.NewBuffer(make([]byte, 0))
183
 
184
        fmt.Fprintf(buf, "    quickCheck[C]: %v\n", f.quickCheck[MComposed])
185
        fmt.Fprintf(buf, "    quickCheck[D]: %v\n", f.quickCheck[MDecomposed])
186
        fmt.Fprintf(buf, "    cmbForward: %v\n", f.combinesForward)
187
        fmt.Fprintf(buf, "    cmbBackward: %v\n", f.combinesBackward)
188
        fmt.Fprintf(buf, "    isOneWay: %v\n", f.isOneWay)
189
        fmt.Fprintf(buf, "    inDecomp: %v\n", f.inDecomp)
190
        fmt.Fprintf(buf, "    decomposition: %v\n", f.decomp)
191
        fmt.Fprintf(buf, "    expandedDecomp: %v\n", f.expandedDecomp)
192
 
193
        return buf.String()
194
}
195
 
196
type Decomposition []rune
197
 
198
func (d Decomposition) String() string {
199
        return fmt.Sprintf("%.4X", d)
200
}
201
 
202
func openReader(file string) (input io.ReadCloser) {
203
        if *localFiles {
204
                f, err := os.Open(file)
205
                if err != nil {
206
                        logger.Fatal(err)
207
                }
208
                input = f
209
        } else {
210
                path := *url + file
211
                resp, err := http.Get(path)
212
                if err != nil {
213
                        logger.Fatal(err)
214
                }
215
                if resp.StatusCode != 200 {
216
                        logger.Fatal("bad GET status for "+file, resp.Status)
217
                }
218
                input = resp.Body
219
        }
220
        return
221
}
222
 
223
func parseDecomposition(s string, skipfirst bool) (a []rune, e error) {
224
        decomp := strings.Split(s, " ")
225
        if len(decomp) > 0 && skipfirst {
226
                decomp = decomp[1:]
227
        }
228
        for _, d := range decomp {
229
                point, err := strconv.ParseUint(d, 16, 64)
230
                if err != nil {
231
                        return a, err
232
                }
233
                a = append(a, rune(point))
234
        }
235
        return a, nil
236
}
237
 
238
func parseCharacter(line string) {
239
        field := strings.Split(line, ";")
240
        if len(field) != NumField {
241
                logger.Fatalf("%5s: %d fields (expected %d)\n", line, len(field), NumField)
242
        }
243
        x, err := strconv.ParseUint(field[FCodePoint], 16, 64)
244
        point := int(x)
245
        if err != nil {
246
                logger.Fatalf("%.5s...: %s", line, err)
247
        }
248
        if point == 0 {
249
                return // not interesting and we use 0 as unset
250
        }
251
        if point > MaxChar {
252
                logger.Fatalf("%5s: Rune %X > MaxChar (%X)", line, point, MaxChar)
253
                return
254
        }
255
        state := SNormal
256
        switch {
257
        case strings.Index(field[FName], ", First>") > 0:
258
                state = SFirst
259
        case strings.Index(field[FName], ", Last>") > 0:
260
                state = SLast
261
        }
262
        firstChar := lastChar + 1
263
        lastChar = rune(point)
264
        if state != SLast {
265
                firstChar = lastChar
266
        }
267
        x, err = strconv.ParseUint(field[FCanonicalCombiningClass], 10, 64)
268
        if err != nil {
269
                logger.Fatalf("%U: bad ccc field: %s", int(x), err)
270
        }
271
        ccc := uint8(x)
272
        decmap := field[FDecompMapping]
273
        exp, e := parseDecomposition(decmap, false)
274
        isCompat := false
275
        if e != nil {
276
                if len(decmap) > 0 {
277
                        exp, e = parseDecomposition(decmap, true)
278
                        if e != nil {
279
                                logger.Fatalf(`%U: bad decomp |%v|: "%s"`, int(x), decmap, e)
280
                        }
281
                        isCompat = true
282
                }
283
        }
284
        for i := firstChar; i <= lastChar; i++ {
285
                char := &chars[i]
286
                char.name = field[FName]
287
                char.codePoint = i
288
                char.forms[FCompatibility].decomp = exp
289
                if !isCompat {
290
                        char.forms[FCanonical].decomp = exp
291
                } else {
292
                        char.compatDecomp = true
293
                }
294
                if len(decmap) > 0 {
295
                        char.forms[FCompatibility].decomp = exp
296
                }
297
                char.ccc = ccc
298
                char.state = SMissing
299
                if i == lastChar {
300
                        char.state = state
301
                }
302
        }
303
        return
304
}
305
 
306
func loadUnicodeData() {
307
        f := openReader("UnicodeData.txt")
308
        defer f.Close()
309
        input := bufio.NewReader(f)
310
        for {
311
                line, err := input.ReadString('\n')
312
                if err != nil {
313
                        if err == io.EOF {
314
                                break
315
                        }
316
                        logger.Fatal(err)
317
                }
318
                parseCharacter(line[0 : len(line)-1])
319
        }
320
}
321
 
322
var singlePointRe = regexp.MustCompile(`^([0-9A-F]+) *$`)
323
 
324
// CompositionExclusions.txt has form:
325
// 0958    # ...
326
// See http://unicode.org/reports/tr44/ for full explanation
327
func parseExclusion(line string) int {
328
        comment := strings.Index(line, "#")
329
        if comment >= 0 {
330
                line = line[0:comment]
331
        }
332
        if len(line) == 0 {
333
                return 0
334
        }
335
        matches := singlePointRe.FindStringSubmatch(line)
336
        if len(matches) != 2 {
337
                logger.Fatalf("%s: %d matches (expected 1)\n", line, len(matches))
338
        }
339
        point, err := strconv.ParseUint(matches[1], 16, 64)
340
        if err != nil {
341
                logger.Fatalf("%.5s...: %s", line, err)
342
        }
343
        return int(point)
344
}
345
 
346
func loadCompositionExclusions() {
347
        f := openReader("CompositionExclusions.txt")
348
        defer f.Close()
349
        input := bufio.NewReader(f)
350
        for {
351
                line, err := input.ReadString('\n')
352
                if err != nil {
353
                        if err == io.EOF {
354
                                break
355
                        }
356
                        logger.Fatal(err)
357
                }
358
                point := parseExclusion(line[0 : len(line)-1])
359
                if point == 0 {
360
                        continue
361
                }
362
                c := &chars[point]
363
                if c.excludeInComp {
364
                        logger.Fatalf("%U: Duplicate entry in exclusions.", c.codePoint)
365
                }
366
                c.excludeInComp = true
367
        }
368
}
369
 
370
// hasCompatDecomp returns true if any of the recursive
371
// decompositions contains a compatibility expansion.
372
// In this case, the character may not occur in NFK*.
373
func hasCompatDecomp(r rune) bool {
374
        c := &chars[r]
375
        if c.compatDecomp {
376
                return true
377
        }
378
        for _, d := range c.forms[FCompatibility].decomp {
379
                if hasCompatDecomp(d) {
380
                        return true
381
                }
382
        }
383
        return false
384
}
385
 
386
// Hangul related constants.
387
const (
388
        HangulBase = 0xAC00
389
        HangulEnd  = 0xD7A4 // hangulBase + Jamo combinations (19 * 21 * 28)
390
 
391
        JamoLBase = 0x1100
392
        JamoLEnd  = 0x1113
393
        JamoVBase = 0x1161
394
        JamoVEnd  = 0x1176
395
        JamoTBase = 0x11A8
396
        JamoTEnd  = 0x11C3
397
)
398
 
399
func isHangul(r rune) bool {
400
        return HangulBase <= r && r < HangulEnd
401
}
402
 
403
func ccc(r rune) uint8 {
404
        return chars[r].ccc
405
}
406
 
407
// Insert a rune in a buffer, ordered by Canonical Combining Class.
408
func insertOrdered(b Decomposition, r rune) Decomposition {
409
        n := len(b)
410
        b = append(b, 0)
411
        cc := ccc(r)
412
        if cc > 0 {
413
                // Use bubble sort.
414
                for ; n > 0; n-- {
415
                        if ccc(b[n-1]) <= cc {
416
                                break
417
                        }
418
                        b[n] = b[n-1]
419
                }
420
        }
421
        b[n] = r
422
        return b
423
}
424
 
425
// Recursively decompose.
426
func decomposeRecursive(form int, r rune, d Decomposition) Decomposition {
427
        if isHangul(r) {
428
                return d
429
        }
430
        dcomp := chars[r].forms[form].decomp
431
        if len(dcomp) == 0 {
432
                return insertOrdered(d, r)
433
        }
434
        for _, c := range dcomp {
435
                d = decomposeRecursive(form, c, d)
436
        }
437
        return d
438
}
439
 
440
func completeCharFields(form int) {
441
        // Phase 0: pre-expand decomposition.
442
        for i := range chars {
443
                f := &chars[i].forms[form]
444
                if len(f.decomp) == 0 {
445
                        continue
446
                }
447
                exp := make(Decomposition, 0)
448
                for _, c := range f.decomp {
449
                        exp = decomposeRecursive(form, c, exp)
450
                }
451
                f.expandedDecomp = exp
452
        }
453
 
454
        // Phase 1: composition exclusion, mark decomposition.
455
        for i := range chars {
456
                c := &chars[i]
457
                f := &c.forms[form]
458
 
459
                // Marks script-specific exclusions and version restricted.
460
                f.isOneWay = c.excludeInComp
461
 
462
                // Singletons
463
                f.isOneWay = f.isOneWay || len(f.decomp) == 1
464
 
465
                // Non-starter decompositions
466
                if len(f.decomp) > 1 {
467
                        chk := c.ccc != 0 || chars[f.decomp[0]].ccc != 0
468
                        f.isOneWay = f.isOneWay || chk
469
                }
470
 
471
                // Runes that decompose into more than two runes.
472
                f.isOneWay = f.isOneWay || len(f.decomp) > 2
473
 
474
                if form == FCompatibility {
475
                        f.isOneWay = f.isOneWay || hasCompatDecomp(c.codePoint)
476
                }
477
 
478
                for _, r := range f.decomp {
479
                        chars[r].forms[form].inDecomp = true
480
                }
481
        }
482
 
483
        // Phase 2: forward and backward combining.
484
        for i := range chars {
485
                c := &chars[i]
486
                f := &c.forms[form]
487
 
488
                if !f.isOneWay && len(f.decomp) == 2 {
489
                        f0 := &chars[f.decomp[0]].forms[form]
490
                        f1 := &chars[f.decomp[1]].forms[form]
491
                        if !f0.isOneWay {
492
                                f0.combinesForward = true
493
                        }
494
                        if !f1.isOneWay {
495
                                f1.combinesBackward = true
496
                        }
497
                }
498
        }
499
 
500
        // Phase 3: quick check values.
501
        for i := range chars {
502
                c := &chars[i]
503
                f := &c.forms[form]
504
 
505
                switch {
506
                case len(f.decomp) > 0:
507
                        f.quickCheck[MDecomposed] = QCNo
508
                case isHangul(rune(i)):
509
                        f.quickCheck[MDecomposed] = QCNo
510
                default:
511
                        f.quickCheck[MDecomposed] = QCYes
512
                }
513
                switch {
514
                case f.isOneWay:
515
                        f.quickCheck[MComposed] = QCNo
516
                case (i & 0xffff00) == JamoLBase:
517
                        f.quickCheck[MComposed] = QCYes
518
                        if JamoLBase <= i && i < JamoLEnd {
519
                                f.combinesForward = true
520
                        }
521
                        if JamoVBase <= i && i < JamoVEnd {
522
                                f.quickCheck[MComposed] = QCMaybe
523
                                f.combinesBackward = true
524
                                f.combinesForward = true
525
                        }
526
                        if JamoTBase <= i && i < JamoTEnd {
527
                                f.quickCheck[MComposed] = QCMaybe
528
                                f.combinesBackward = true
529
                        }
530
                case !f.combinesBackward:
531
                        f.quickCheck[MComposed] = QCYes
532
                default:
533
                        f.quickCheck[MComposed] = QCMaybe
534
                }
535
        }
536
}
537
 
538
func printBytes(b []byte, name string) {
539
        fmt.Printf("// %s: %d bytes\n", name, len(b))
540
        fmt.Printf("var %s = [...]byte {", name)
541
        for i, c := range b {
542
                switch {
543
                case i%64 == 0:
544
                        fmt.Printf("\n// Bytes %x - %x\n", i, i+63)
545
                case i%8 == 0:
546
                        fmt.Printf("\n")
547
                }
548
                fmt.Printf("0x%.2X, ", c)
549
        }
550
        fmt.Print("\n}\n\n")
551
}
552
 
553
// See forminfo.go for format.
554
func makeEntry(f *FormInfo) uint16 {
555
        e := uint16(0)
556
        if f.combinesForward {
557
                e |= 0x8
558
        }
559
        if f.quickCheck[MDecomposed] == QCNo {
560
                e |= 0x1
561
        }
562
        switch f.quickCheck[MComposed] {
563
        case QCYes:
564
        case QCNo:
565
                e |= 0x4
566
        case QCMaybe:
567
                e |= 0x6
568
        default:
569
                log.Fatalf("Illegal quickcheck value %v.", f.quickCheck[MComposed])
570
        }
571
        return e
572
}
573
 
574
// Bits
575
// 0..8:   CCC
576
// 9..12:  NF(C|D) qc bits.
577
// 13..16: NFK(C|D) qc bits.
578
func makeCharInfo(c Char) uint16 {
579
        e := makeEntry(&c.forms[FCompatibility])
580
        e = e<<4 | makeEntry(&c.forms[FCanonical])
581
        e = e<<8 | uint16(c.ccc)
582
        return e
583
}
584
 
585
func printCharInfoTables() int {
586
        // Quick Check + CCC trie.
587
        t := newNode()
588
        for i, char := range chars {
589
                v := makeCharInfo(char)
590
                if v != 0 {
591
                        t.insert(rune(i), v)
592
                }
593
        }
594
        return t.printTables("charInfo")
595
}
596
 
597
func printDecompositionTables() int {
598
        decompositions := bytes.NewBuffer(make([]byte, 0, 10000))
599
        size := 0
600
 
601
        // Map decompositions
602
        positionMap := make(map[string]uint16)
603
 
604
        // Store the uniqued decompositions in a byte buffer,
605
        // preceded by their byte length.
606
        for _, c := range chars {
607
                for f := 0; f < 2; f++ {
608
                        d := c.forms[f].expandedDecomp
609
                        s := string([]rune(d))
610
                        if _, ok := positionMap[s]; !ok {
611
                                p := decompositions.Len()
612
                                decompositions.WriteByte(uint8(len(s)))
613
                                decompositions.WriteString(s)
614
                                positionMap[s] = uint16(p)
615
                        }
616
                }
617
        }
618
        b := decompositions.Bytes()
619
        printBytes(b, "decomps")
620
        size += len(b)
621
 
622
        nfcT := newNode()
623
        nfkcT := newNode()
624
        for i, c := range chars {
625
                d := c.forms[FCanonical].expandedDecomp
626
                if len(d) != 0 {
627
                        nfcT.insert(rune(i), positionMap[string([]rune(d))])
628
                        if ccc(c.codePoint) != ccc(d[0]) {
629
                                // We assume the lead ccc of a decomposition is !=0 in this case.
630
                                if ccc(d[0]) == 0 {
631
                                        logger.Fatal("Expected differing CCC to be non-zero.")
632
                                }
633
                        }
634
                }
635
                d = c.forms[FCompatibility].expandedDecomp
636
                if len(d) != 0 {
637
                        nfkcT.insert(rune(i), positionMap[string([]rune(d))])
638
                        if ccc(c.codePoint) != ccc(d[0]) {
639
                                // We assume the lead ccc of a decomposition is !=0 in this case.
640
                                if ccc(d[0]) == 0 {
641
                                        logger.Fatal("Expected differing CCC to be non-zero.")
642
                                }
643
                        }
644
                }
645
        }
646
        size += nfcT.printTables("nfcDecomp")
647
        size += nfkcT.printTables("nfkcDecomp")
648
        return size
649
}
650
 
651
func contains(sa []string, s string) bool {
652
        for _, a := range sa {
653
                if a == s {
654
                        return true
655
                }
656
        }
657
        return false
658
}
659
 
660
// Extract the version number from the URL.
661
func version() string {
662
        // From http://www.unicode.org/standard/versions/#Version_Numbering:
663
        // for the later Unicode versions, data files are located in
664
        // versioned directories.
665
        fields := strings.Split(*url, "/")
666
        for _, f := range fields {
667
                if match, _ := regexp.MatchString(`[0-9]\.[0-9]\.[0-9]`, f); match {
668
                        return f
669
                }
670
        }
671
        logger.Fatal("unknown version")
672
        return "Unknown"
673
}
674
 
675
const fileHeader = `// Generated by running
676
//      maketables --tables=%s --url=%s
677
// DO NOT EDIT
678
 
679
package norm
680
 
681
`
682
 
683
func makeTables() {
684
        size := 0
685
        if *tablelist == "" {
686
                return
687
        }
688
        list := strings.Split(*tablelist, ",")
689
        if *tablelist == "all" {
690
                list = []string{"decomp", "recomp", "info"}
691
        }
692
        fmt.Printf(fileHeader, *tablelist, *url)
693
 
694
        fmt.Println("// Version is the Unicode edition from which the tables are derived.")
695
        fmt.Printf("const Version = %q\n\n", version())
696
 
697
        if contains(list, "decomp") {
698
                size += printDecompositionTables()
699
        }
700
 
701
        if contains(list, "recomp") {
702
                // Note that we use 32 bit keys, instead of 64 bit.
703
                // This clips the bits of three entries, but we know
704
                // this won't cause a collision. The compiler will catch
705
                // any changes made to UnicodeData.txt that introduces
706
                // a collision.
707
                // Note that the recomposition map for NFC and NFKC
708
                // are identical.
709
 
710
                // Recomposition map
711
                nrentries := 0
712
                for _, c := range chars {
713
                        f := c.forms[FCanonical]
714
                        if !f.isOneWay && len(f.decomp) > 0 {
715
                                nrentries++
716
                        }
717
                }
718
                sz := nrentries * 8
719
                size += sz
720
                fmt.Printf("// recompMap: %d bytes (entries only)\n", sz)
721
                fmt.Println("var recompMap = map[uint32]rune{")
722
                for i, c := range chars {
723
                        f := c.forms[FCanonical]
724
                        d := f.decomp
725
                        if !f.isOneWay && len(d) > 0 {
726
                                key := uint32(uint16(d[0]))<<16 + uint32(uint16(d[1]))
727
                                fmt.Printf("0x%.8X: 0x%.4X,\n", key, i)
728
                        }
729
                }
730
                fmt.Printf("}\n\n")
731
        }
732
 
733
        if contains(list, "info") {
734
                size += printCharInfoTables()
735
        }
736
        fmt.Printf("// Total size of tables: %dKB (%d bytes)\n", (size+512)/1024, size)
737
}
738
 
739
func printChars() {
740
        if *verbose {
741
                for _, c := range chars {
742
                        if !c.isValid() || c.state == SMissing {
743
                                continue
744
                        }
745
                        fmt.Println(c)
746
                }
747
        }
748
}
749
 
750
// verifyComputed does various consistency tests.
751
func verifyComputed() {
752
        for i, c := range chars {
753
                for _, f := range c.forms {
754
                        isNo := (f.quickCheck[MDecomposed] == QCNo)
755
                        if (len(f.decomp) > 0) != isNo && !isHangul(rune(i)) {
756
                                log.Fatalf("%U: NF*D must be no if rune decomposes", i)
757
                        }
758
 
759
                        isMaybe := f.quickCheck[MComposed] == QCMaybe
760
                        if f.combinesBackward != isMaybe {
761
                                log.Fatalf("%U: NF*C must be maybe if combinesBackward", i)
762
                        }
763
                }
764
        }
765
}
766
 
767
var qcRe = regexp.MustCompile(`([0-9A-F\.]+) *; (NF.*_QC); ([YNM]) #.*`)
768
 
769
// Use values in DerivedNormalizationProps.txt to compare against the
770
// values we computed.
771
// DerivedNormalizationProps.txt has form:
772
// 00C0..00C5    ; NFD_QC; N # ...
773
// 0374          ; NFD_QC; N # ...
774
// See http://unicode.org/reports/tr44/ for full explanation
775
func testDerived() {
776
        if !*test {
777
                return
778
        }
779
        f := openReader("DerivedNormalizationProps.txt")
780
        defer f.Close()
781
        input := bufio.NewReader(f)
782
        for {
783
                line, err := input.ReadString('\n')
784
                if err != nil {
785
                        if err == io.EOF {
786
                                break
787
                        }
788
                        logger.Fatal(err)
789
                }
790
                qc := qcRe.FindStringSubmatch(line)
791
                if qc == nil {
792
                        continue
793
                }
794
                rng := strings.Split(qc[1], "..")
795
                i, err := strconv.ParseUint(rng[0], 16, 64)
796
                if err != nil {
797
                        log.Fatal(err)
798
                }
799
                j := i
800
                if len(rng) > 1 {
801
                        j, err = strconv.ParseUint(rng[1], 16, 64)
802
                        if err != nil {
803
                                log.Fatal(err)
804
                        }
805
                }
806
                var ftype, mode int
807
                qt := strings.TrimSpace(qc[2])
808
                switch qt {
809
                case "NFC_QC":
810
                        ftype, mode = FCanonical, MComposed
811
                case "NFD_QC":
812
                        ftype, mode = FCanonical, MDecomposed
813
                case "NFKC_QC":
814
                        ftype, mode = FCompatibility, MComposed
815
                case "NFKD_QC":
816
                        ftype, mode = FCompatibility, MDecomposed
817
                default:
818
                        log.Fatalf(`Unexpected quick check type "%s"`, qt)
819
                }
820
                var qr QCResult
821
                switch qc[3] {
822
                case "Y":
823
                        qr = QCYes
824
                case "N":
825
                        qr = QCNo
826
                case "M":
827
                        qr = QCMaybe
828
                default:
829
                        log.Fatalf(`Unexpected quick check value "%s"`, qc[3])
830
                }
831
                var lastFailed bool
832
                // Verify current
833
                for ; i <= j; i++ {
834
                        c := &chars[int(i)]
835
                        c.forms[ftype].verified[mode] = true
836
                        curqr := c.forms[ftype].quickCheck[mode]
837
                        if curqr != qr {
838
                                if !lastFailed {
839
                                        logger.Printf("%s: %.4X..%.4X -- %s\n",
840
                                                qt, int(i), int(j), line[0:50])
841
                                }
842
                                logger.Printf("%U: FAILED %s (was %v need %v)\n",
843
                                        int(i), qt, curqr, qr)
844
                                lastFailed = true
845
                        }
846
                }
847
        }
848
        // Any unspecified value must be QCYes. Verify this.
849
        for i, c := range chars {
850
                for j, fd := range c.forms {
851
                        for k, qr := range fd.quickCheck {
852
                                if !fd.verified[k] && qr != QCYes {
853
                                        m := "%U: FAIL F:%d M:%d (was %v need Yes) %s\n"
854
                                        logger.Printf(m, i, j, k, qr, c.name)
855
                                }
856
                        }
857
                }
858
        }
859
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.