OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgo/] [go/] [exp/] [norm/] [triegen.go] - Blame information for rev 791

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 747 jeremybenn
// Copyright 2011 The Go Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4
 
5
// Trie table generator.
6
// Used by make*tables tools to generate a go file with trie data structures
7
// for mapping UTF-8 to a 16-bit value. All but the last byte in a UTF-8 byte
8
// sequence are used to lookup offsets in the index table to be used for the
9
// next byte. The last byte is used to index into a table with 16-bit values.
10
 
11
package main
12
 
13
import (
14
        "fmt"
15
        "hash/crc32"
16
        "log"
17
        "unicode/utf8"
18
)
19
 
20
const blockSize = 64
21
const maxSparseEntries = 16
22
 
23
// Intermediate trie structure
24
type trieNode struct {
25
        table [256]*trieNode
26
        value int
27
        b     byte
28
        leaf  bool
29
}
30
 
31
func newNode() *trieNode {
32
        return new(trieNode)
33
}
34
 
35
func (n trieNode) String() string {
36
        s := fmt.Sprint("trieNode{table: { non-nil at index: ")
37
        for i, v := range n.table {
38
                if v != nil {
39
                        s += fmt.Sprintf("%d, ", i)
40
                }
41
        }
42
        s += fmt.Sprintf("}, value:%#x, b:%#x leaf:%v}", n.value, n.b, n.leaf)
43
        return s
44
}
45
 
46
func (n trieNode) isInternal() bool {
47
        internal := true
48
        for i := 0; i < 256; i++ {
49
                if nn := n.table[i]; nn != nil {
50
                        if !internal && !nn.leaf {
51
                                log.Fatalf("triegen: isInternal: node contains both leaf and non-leaf children (%v)", n)
52
                        }
53
                        internal = internal && !nn.leaf
54
                }
55
        }
56
        return internal
57
}
58
 
59
func (n trieNode) mostFrequentStride() int {
60
        counts := make(map[int]int)
61
        v := 0
62
        for _, t := range n.table[0x80 : 0x80+blockSize] {
63
                if t != nil {
64
                        if stride := t.value - v; v != 0 && stride >= 0 {
65
                                counts[stride]++
66
                        }
67
                        v = t.value
68
                } else {
69
                        v = 0
70
                }
71
        }
72
        var maxs, maxc int
73
        for stride, cnt := range counts {
74
                if cnt > maxc || (cnt == maxc && stride < maxs) {
75
                        maxs, maxc = stride, cnt
76
                }
77
        }
78
        return maxs
79
}
80
 
81
func (n trieNode) countSparseEntries() int {
82
        stride := n.mostFrequentStride()
83
        var count, v int
84
        for _, t := range n.table[0x80 : 0x80+blockSize] {
85
                tv := 0
86
                if t != nil {
87
                        tv = t.value
88
                }
89
                if tv-v != stride {
90
                        if tv != 0 {
91
                                count++
92
                        }
93
                }
94
                v = tv
95
        }
96
        return count
97
}
98
 
99
func (n *trieNode) insert(r rune, value uint16) {
100
        var p [utf8.UTFMax]byte
101
        sz := utf8.EncodeRune(p[:], r)
102
 
103
        for i := 0; i < sz; i++ {
104
                if n.leaf {
105
                        log.Fatalf("triegen: insert: node (%#v) should not be a leaf", n)
106
                }
107
                nn := n.table[p[i]]
108
                if nn == nil {
109
                        nn = newNode()
110
                        nn.b = p[i]
111
                        n.table[p[i]] = nn
112
                }
113
                n = nn
114
        }
115
        n.value = int(value)
116
        n.leaf = true
117
}
118
 
119
type nodeIndex struct {
120
        lookupBlocks []*trieNode
121
        valueBlocks  []*trieNode
122
        sparseBlocks []*trieNode
123
        sparseOffset []uint16
124
        sparseCount  int
125
 
126
        lookupBlockIdx map[uint32]int
127
        valueBlockIdx  map[uint32]int
128
}
129
 
130
func newIndex() *nodeIndex {
131
        index := &nodeIndex{}
132
        index.lookupBlocks = make([]*trieNode, 0)
133
        index.valueBlocks = make([]*trieNode, 0)
134
        index.sparseBlocks = make([]*trieNode, 0)
135
        index.sparseOffset = make([]uint16, 1)
136
        index.lookupBlockIdx = make(map[uint32]int)
137
        index.valueBlockIdx = make(map[uint32]int)
138
        return index
139
}
140
 
141
func computeOffsets(index *nodeIndex, n *trieNode) int {
142
        if n.leaf {
143
                return n.value
144
        }
145
        hasher := crc32.New(crc32.MakeTable(crc32.IEEE))
146
        // We only index continuation bytes.
147
        for i := 0; i < blockSize; i++ {
148
                v := 0
149
                if nn := n.table[0x80+i]; nn != nil {
150
                        v = computeOffsets(index, nn)
151
                }
152
                hasher.Write([]byte{uint8(v >> 8), uint8(v)})
153
        }
154
        h := hasher.Sum32()
155
        if n.isInternal() {
156
                v, ok := index.lookupBlockIdx[h]
157
                if !ok {
158
                        v = len(index.lookupBlocks)
159
                        index.lookupBlocks = append(index.lookupBlocks, n)
160
                        index.lookupBlockIdx[h] = v
161
                }
162
                n.value = v
163
        } else {
164
                v, ok := index.valueBlockIdx[h]
165
                if !ok {
166
                        if c := n.countSparseEntries(); c > maxSparseEntries {
167
                                v = len(index.valueBlocks)
168
                                index.valueBlocks = append(index.valueBlocks, n)
169
                                index.valueBlockIdx[h] = v
170
                        } else {
171
                                v = -len(index.sparseOffset)
172
                                index.sparseBlocks = append(index.sparseBlocks, n)
173
                                index.sparseOffset = append(index.sparseOffset, uint16(index.sparseCount))
174
                                index.sparseCount += c + 1
175
                                index.valueBlockIdx[h] = v
176
                        }
177
                }
178
                n.value = v
179
        }
180
        return n.value
181
}
182
 
183
func printValueBlock(nr int, n *trieNode, offset int) {
184
        boff := nr * blockSize
185
        fmt.Printf("\n// Block %#x, offset %#x", nr, boff)
186
        var printnewline bool
187
        for i := 0; i < blockSize; i++ {
188
                if i%6 == 0 {
189
                        printnewline = true
190
                }
191
                v := 0
192
                if nn := n.table[i+offset]; nn != nil {
193
                        v = nn.value
194
                }
195
                if v != 0 {
196
                        if printnewline {
197
                                fmt.Printf("\n")
198
                                printnewline = false
199
                        }
200
                        fmt.Printf("%#04x:%#04x, ", boff+i, v)
201
                }
202
        }
203
}
204
 
205
func printSparseBlock(nr int, n *trieNode) {
206
        boff := -n.value
207
        fmt.Printf("\n// Block %#x, offset %#x", nr, boff)
208
        v := 0
209
        //stride := f(n)
210
        stride := n.mostFrequentStride()
211
        c := n.countSparseEntries()
212
        fmt.Printf("\n{value:%#04x,lo:%#02x},", stride, uint8(c))
213
        for i, nn := range n.table[0x80 : 0x80+blockSize] {
214
                nv := 0
215
                if nn != nil {
216
                        nv = nn.value
217
                }
218
                if nv-v != stride {
219
                        if v != 0 {
220
                                fmt.Printf(",hi:%#02x},", 0x80+i-1)
221
                        }
222
                        if nv != 0 {
223
                                fmt.Printf("\n{value:%#04x,lo:%#02x", nv, nn.b)
224
                        }
225
                }
226
                v = nv
227
        }
228
        if v != 0 {
229
                fmt.Printf(",hi:%#02x},", 0x80+blockSize-1)
230
        }
231
}
232
 
233
func printLookupBlock(nr int, n *trieNode, offset, cutoff int) {
234
        boff := nr * blockSize
235
        fmt.Printf("\n// Block %#x, offset %#x", nr, boff)
236
        var printnewline bool
237
        for i := 0; i < blockSize; i++ {
238
                if i%8 == 0 {
239
                        printnewline = true
240
                }
241
                v := 0
242
                if nn := n.table[i+offset]; nn != nil {
243
                        v = nn.value
244
                }
245
                if v != 0 {
246
                        if v < 0 {
247
                                v = -v - 1 + cutoff
248
                        }
249
                        if printnewline {
250
                                fmt.Printf("\n")
251
                                printnewline = false
252
                        }
253
                        fmt.Printf("%#03x:%#02x, ", boff+i, v)
254
                }
255
        }
256
}
257
 
258
// printTables returns the size in bytes of the generated tables.
259
func (t *trieNode) printTables(name string) int {
260
        index := newIndex()
261
        // Values for 7-bit ASCII are stored in first two block, followed by nil block.
262
        index.valueBlocks = append(index.valueBlocks, nil, nil, nil)
263
        // First byte of multi-byte UTF-8 codepoints are indexed in 4th block.
264
        index.lookupBlocks = append(index.lookupBlocks, nil, nil, nil, nil)
265
        // Index starter bytes of multi-byte UTF-8.
266
        for i := 0xC0; i < 0x100; i++ {
267
                if t.table[i] != nil {
268
                        computeOffsets(index, t.table[i])
269
                }
270
        }
271
 
272
        nv := len(index.valueBlocks) * blockSize
273
        fmt.Printf("// %sValues: %d entries, %d bytes\n", name, nv, nv*2)
274
        fmt.Printf("// Block 2 is the null block.\n")
275
        fmt.Printf("var %sValues = [%d]uint16 {", name, nv)
276
        printValueBlock(0, t, 0)
277
        printValueBlock(1, t, 64)
278
        printValueBlock(2, newNode(), 0)
279
        for i := 3; i < len(index.valueBlocks); i++ {
280
                printValueBlock(i, index.valueBlocks[i], 0x80)
281
        }
282
        fmt.Print("\n}\n\n")
283
 
284
        ls := len(index.sparseBlocks)
285
        fmt.Printf("// %sSparseOffset: %d entries, %d bytes\n", name, ls, ls*2)
286
        fmt.Printf("var %sSparseOffset = %#v\n\n", name, index.sparseOffset[1:])
287
 
288
        ns := index.sparseCount
289
        fmt.Printf("// %sSparseValues: %d entries, %d bytes\n", name, ns, ns*4)
290
        fmt.Printf("var %sSparseValues = [%d]valueRange {", name, ns)
291
        for i, n := range index.sparseBlocks {
292
                printSparseBlock(i, n)
293
        }
294
        fmt.Print("\n}\n\n")
295
 
296
        cutoff := len(index.valueBlocks)
297
        ni := len(index.lookupBlocks) * blockSize
298
        fmt.Printf("// %sLookup: %d bytes\n", name, ni)
299
        fmt.Printf("// Block 0 is the null block.\n")
300
        fmt.Printf("var %sLookup = [%d]uint8 {", name, ni)
301
        printLookupBlock(0, newNode(), 0, cutoff)
302
        printLookupBlock(1, newNode(), 0, cutoff)
303
        printLookupBlock(2, newNode(), 0, cutoff)
304
        printLookupBlock(3, t, 0xC0, cutoff)
305
        for i := 4; i < len(index.lookupBlocks); i++ {
306
                printLookupBlock(i, index.lookupBlocks[i], 0x80, cutoff)
307
        }
308
        fmt.Print("\n}\n\n")
309
        fmt.Printf("var %sTrie = trie{ %sLookup[:], %sValues[:], %sSparseValues[:], %sSparseOffset[:], %d}\n\n",
310
                name, name, name, name, name, cutoff)
311
        return nv*2 + ns*4 + ni + ls*2
312
}

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.