OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgo/] [go/] [exp/] [norm/] [normalize_test.go] - Blame information for rev 791

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 747 jeremybenn
// Copyright 2011 The Go Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4
 
5
package norm
6
 
7
import (
8
        "strings"
9
        "testing"
10
)
11
 
12
type PositionTest struct {
13
        input  string
14
        pos    int
15
        buffer string // expected contents of reorderBuffer, if applicable
16
}
17
 
18
type positionFunc func(rb *reorderBuffer, s string) int
19
 
20
func runPosTests(t *testing.T, name string, f Form, fn positionFunc, tests []PositionTest) {
21
        rb := reorderBuffer{}
22
        rb.init(f, nil)
23
        for i, test := range tests {
24
                rb.reset()
25
                rb.src = inputString(test.input)
26
                rb.nsrc = len(test.input)
27
                pos := fn(&rb, test.input)
28
                if pos != test.pos {
29
                        t.Errorf("%s:%d: position is %d; want %d", name, i, pos, test.pos)
30
                }
31
                runes := []rune(test.buffer)
32
                if rb.nrune != len(runes) {
33
                        t.Errorf("%s:%d: reorder buffer lenght is %d; want %d", name, i, rb.nrune, len(runes))
34
                        continue
35
                }
36
                for j, want := range runes {
37
                        found := rune(rb.runeAt(j))
38
                        if found != want {
39
                                t.Errorf("%s:%d: rune at %d is %U; want %U", name, i, j, found, want)
40
                        }
41
                }
42
        }
43
}
44
 
45
var decomposeSegmentTests = []PositionTest{
46
        // illegal runes
47
        {"\xC0", 0, ""},
48
        {"\u00E0\x80", 2, "\u0061\u0300"},
49
        // starter
50
        {"a", 1, "a"},
51
        {"ab", 1, "a"},
52
        // starter + composing
53
        {"a\u0300", 3, "a\u0300"},
54
        {"a\u0300b", 3, "a\u0300"},
55
        // with decomposition
56
        {"\u00C0", 2, "A\u0300"},
57
        {"\u00C0b", 2, "A\u0300"},
58
        // long
59
        {strings.Repeat("\u0300", 31), 62, strings.Repeat("\u0300", 31)},
60
        // ends with incomplete UTF-8 encoding
61
        {"\xCC", 0, ""},
62
        {"\u0300\xCC", 2, "\u0300"},
63
}
64
 
65
func decomposeSegmentF(rb *reorderBuffer, s string) int {
66
        rb.src = inputString(s)
67
        rb.nsrc = len(s)
68
        return decomposeSegment(rb, 0)
69
}
70
 
71
func TestDecomposeSegment(t *testing.T) {
72
        runPosTests(t, "TestDecomposeSegment", NFC, decomposeSegmentF, decomposeSegmentTests)
73
}
74
 
75
var firstBoundaryTests = []PositionTest{
76
        // no boundary
77
        {"", -1, ""},
78
        {"\u0300", -1, ""},
79
        {"\x80\x80", -1, ""},
80
        // illegal runes
81
        {"\xff", 0, ""},
82
        {"\u0300\xff", 2, ""},
83
        {"\u0300\xc0\x80\x80", 2, ""},
84
        // boundaries
85
        {"a", 0, ""},
86
        {"\u0300a", 2, ""},
87
        // Hangul
88
        {"\u1103\u1161", 0, ""},
89
        {"\u110B\u1173\u11B7", 0, ""},
90
        {"\u1161\u110B\u1173\u11B7", 3, ""},
91
        {"\u1173\u11B7\u1103\u1161", 6, ""},
92
        // too many combining characters.
93
        {strings.Repeat("\u0300", maxCombiningChars-1), -1, ""},
94
        {strings.Repeat("\u0300", maxCombiningChars), 60, ""},
95
        {strings.Repeat("\u0300", maxCombiningChars+1), 60, ""},
96
}
97
 
98
func firstBoundaryF(rb *reorderBuffer, s string) int {
99
        return rb.f.form.FirstBoundary([]byte(s))
100
}
101
 
102
func firstBoundaryStringF(rb *reorderBuffer, s string) int {
103
        return rb.f.form.FirstBoundaryInString(s)
104
}
105
 
106
func TestFirstBoundary(t *testing.T) {
107
        runPosTests(t, "TestFirstBoundary", NFC, firstBoundaryF, firstBoundaryTests)
108
        runPosTests(t, "TestFirstBoundaryInString", NFC, firstBoundaryStringF, firstBoundaryTests)
109
}
110
 
111
var decomposeToLastTests = []PositionTest{
112
        // ends with inert character
113
        {"Hello!", 6, ""},
114
        {"\u0632", 2, ""},
115
        {"a\u0301\u0635", 5, ""},
116
        // ends with non-inert starter
117
        {"a", 0, "a"},
118
        {"a\u0301a", 3, "a"},
119
        {"a\u0301\u03B9", 3, "\u03B9"},
120
        {"a\u0327", 0, "a\u0327"},
121
        // illegal runes
122
        {"\xFF", 1, ""},
123
        {"aa\xFF", 3, ""},
124
        {"\xC0\x80\x80", 3, ""},
125
        {"\xCC\x80\x80", 3, ""},
126
        // ends with incomplete UTF-8 encoding
127
        {"a\xCC", 2, ""},
128
        // ends with combining characters
129
        {"\u0300\u0301", 0, "\u0300\u0301"},
130
        {"a\u0300\u0301", 0, "a\u0300\u0301"},
131
        {"a\u0301\u0308", 0, "a\u0301\u0308"},
132
        {"a\u0308\u0301", 0, "a\u0308\u0301"},
133
        {"aaaa\u0300\u0301", 3, "a\u0300\u0301"},
134
        {"\u0300a\u0300\u0301", 2, "a\u0300\u0301"},
135
        {"\u00C0", 0, "A\u0300"},
136
        {"a\u00C0", 1, "A\u0300"},
137
        // decomposing
138
        {"a\u0300\uFDC0", 3, "\u0645\u062C\u064A"},
139
        {"\uFDC0" + strings.Repeat("\u0300", 26), 0, "\u0645\u062C\u064A" + strings.Repeat("\u0300", 26)},
140
        // Hangul
141
        {"a\u1103", 1, "\u1103"},
142
        {"a\u110B", 1, "\u110B"},
143
        {"a\u110B\u1173", 1, "\u110B\u1173"},
144
        // See comment in composition.go:compBoundaryAfter.
145
        {"a\u110B\u1173\u11B7", 1, "\u110B\u1173\u11B7"},
146
        {"a\uC73C", 1, "\u110B\u1173"},
147
        {"다음", 3, "\u110B\u1173\u11B7"},
148
        {"다", 0, "\u1103\u1161"},
149
        {"\u1103\u1161\u110B\u1173\u11B7", 6, "\u110B\u1173\u11B7"},
150
        {"\u110B\u1173\u11B7\u1103\u1161", 9, "\u1103\u1161"},
151
        {"다음음", 6, "\u110B\u1173\u11B7"},
152
        {"음다다", 6, "\u1103\u1161"},
153
        // buffer overflow
154
        {"a" + strings.Repeat("\u0300", 30), 3, strings.Repeat("\u0300", 29)},
155
        {"\uFDFA" + strings.Repeat("\u0300", 14), 3, strings.Repeat("\u0300", 14)},
156
        // weird UTF-8
157
        {"a\u0300\u11B7", 0, "a\u0300\u11B7"},
158
}
159
 
160
func decomposeToLast(rb *reorderBuffer, s string) int {
161
        buf := decomposeToLastBoundary(rb, []byte(s))
162
        return len(buf)
163
}
164
 
165
func TestDecomposeToLastBoundary(t *testing.T) {
166
        runPosTests(t, "TestDecomposeToLastBoundary", NFKC, decomposeToLast, decomposeToLastTests)
167
}
168
 
169
var lastBoundaryTests = []PositionTest{
170
        // ends with inert character
171
        {"Hello!", 6, ""},
172
        {"\u0632", 2, ""},
173
        // ends with non-inert starter
174
        {"a", 0, ""},
175
        // illegal runes
176
        {"\xff", 1, ""},
177
        {"aa\xff", 3, ""},
178
        {"a\xff\u0300", 1, ""},
179
        {"\xc0\x80\x80", 3, ""},
180
        {"\xc0\x80\x80\u0300", 3, ""},
181
        // ends with incomplete UTF-8 encoding
182
        {"\xCC", -1, ""},
183
        {"\xE0\x80", -1, ""},
184
        {"\xF0\x80\x80", -1, ""},
185
        {"a\xCC", 0, ""},
186
        {"\x80\xCC", 1, ""},
187
        {"\xCC\xCC", 1, ""},
188
        // ends with combining characters
189
        {"a\u0300\u0301", 0, ""},
190
        {"aaaa\u0300\u0301", 3, ""},
191
        {"\u0300a\u0300\u0301", 2, ""},
192
        {"\u00C0", 0, ""},
193
        {"a\u00C0", 1, ""},
194
        // decomposition may recombine
195
        {"\u0226", 0, ""},
196
        // no boundary
197
        {"", -1, ""},
198
        {"\u0300\u0301", -1, ""},
199
        {"\u0300", -1, ""},
200
        {"\x80\x80", -1, ""},
201
        {"\x80\x80\u0301", -1, ""},
202
        // Hangul
203
        {"다음", 3, ""},
204
        {"다", 0, ""},
205
        {"\u1103\u1161\u110B\u1173\u11B7", 6, ""},
206
        {"\u110B\u1173\u11B7\u1103\u1161", 9, ""},
207
        // too many combining characters.
208
        {strings.Repeat("\u0300", maxCombiningChars-1), -1, ""},
209
        {strings.Repeat("\u0300", maxCombiningChars), 60, ""},
210
        {strings.Repeat("\u0300", maxCombiningChars+1), 62, ""},
211
}
212
 
213
func lastBoundaryF(rb *reorderBuffer, s string) int {
214
        return rb.f.form.LastBoundary([]byte(s))
215
}
216
 
217
func TestLastBoundary(t *testing.T) {
218
        runPosTests(t, "TestLastBoundary", NFC, lastBoundaryF, lastBoundaryTests)
219
}
220
 
221
var quickSpanTests = []PositionTest{
222
        {"", 0, ""},
223
        // starters
224
        {"a", 1, ""},
225
        {"abc", 3, ""},
226
        {"\u043Eb", 3, ""},
227
        // incomplete last rune.
228
        {"\xCC", 1, ""},
229
        {"a\xCC", 2, ""},
230
        // incorrectly ordered combining characters
231
        {"\u0300\u0316", 0, ""},
232
        {"\u0300\u0316cd", 0, ""},
233
        // have a maximum number of combining characters.
234
        {strings.Repeat("\u035D", 30) + "\u035B", 62, ""},
235
        {"a" + strings.Repeat("\u035D", 30) + "\u035B", 63, ""},
236
        {"Ɵ" + strings.Repeat("\u035D", 30) + "\u035B", 64, ""},
237
        {"aa" + strings.Repeat("\u035D", 30) + "\u035B", 64, ""},
238
}
239
 
240
var quickSpanNFDTests = []PositionTest{
241
        // needs decomposing
242
        {"\u00C0", 0, ""},
243
        {"abc\u00C0", 3, ""},
244
        // correctly ordered combining characters
245
        {"\u0300", 2, ""},
246
        {"ab\u0300", 4, ""},
247
        {"ab\u0300cd", 6, ""},
248
        {"\u0300cd", 4, ""},
249
        {"\u0316\u0300", 4, ""},
250
        {"ab\u0316\u0300", 6, ""},
251
        {"ab\u0316\u0300cd", 8, ""},
252
        {"ab\u0316\u0300\u00C0", 6, ""},
253
        {"\u0316\u0300cd", 6, ""},
254
        {"\u043E\u0308b", 5, ""},
255
        // incorrectly ordered combining characters
256
        {"ab\u0300\u0316", 1, ""}, // TODO: we could skip 'b' as well.
257
        {"ab\u0300\u0316cd", 1, ""},
258
        // Hangul
259
        {"같은", 0, ""},
260
}
261
 
262
var quickSpanNFCTests = []PositionTest{
263
        // okay composed
264
        {"\u00C0", 2, ""},
265
        {"abc\u00C0", 5, ""},
266
        // correctly ordered combining characters
267
        {"ab\u0300", 1, ""},
268
        {"ab\u0300cd", 1, ""},
269
        {"ab\u0316\u0300", 1, ""},
270
        {"ab\u0316\u0300cd", 1, ""},
271
        {"\u00C0\u035D", 4, ""},
272
        // we do not special case leading combining characters
273
        {"\u0300cd", 0, ""},
274
        {"\u0300", 0, ""},
275
        {"\u0316\u0300", 0, ""},
276
        {"\u0316\u0300cd", 0, ""},
277
        // incorrectly ordered combining characters
278
        {"ab\u0300\u0316", 1, ""},
279
        {"ab\u0300\u0316cd", 1, ""},
280
        // Hangul
281
        {"같은", 6, ""},
282
}
283
 
284
func doQuickSpan(rb *reorderBuffer, s string) int {
285
        return rb.f.form.QuickSpan([]byte(s))
286
}
287
 
288
func doQuickSpanString(rb *reorderBuffer, s string) int {
289
        return rb.f.form.QuickSpanString(s)
290
}
291
 
292
func TestQuickSpan(t *testing.T) {
293
        runPosTests(t, "TestQuickSpanNFD1", NFD, doQuickSpan, quickSpanTests)
294
        runPosTests(t, "TestQuickSpanNFD2", NFD, doQuickSpan, quickSpanNFDTests)
295
        runPosTests(t, "TestQuickSpanNFC1", NFC, doQuickSpan, quickSpanTests)
296
        runPosTests(t, "TestQuickSpanNFC2", NFC, doQuickSpan, quickSpanNFCTests)
297
 
298
        runPosTests(t, "TestQuickSpanStringNFD1", NFD, doQuickSpanString, quickSpanTests)
299
        runPosTests(t, "TestQuickSpanStringNFD2", NFD, doQuickSpanString, quickSpanNFDTests)
300
        runPosTests(t, "TestQuickSpanStringNFC1", NFC, doQuickSpanString, quickSpanTests)
301
        runPosTests(t, "TestQuickSpanStringNFC2", NFC, doQuickSpanString, quickSpanNFCTests)
302
}
303
 
304
var isNormalTests = []PositionTest{
305
        {"", 1, ""},
306
        // illegal runes
307
        {"\xff", 1, ""},
308
        // starters
309
        {"a", 1, ""},
310
        {"abc", 1, ""},
311
        {"\u043Eb", 1, ""},
312
        // incorrectly ordered combining characters
313
        {"\u0300\u0316", 0, ""},
314
        {"ab\u0300\u0316", 0, ""},
315
        {"ab\u0300\u0316cd", 0, ""},
316
        {"\u0300\u0316cd", 0, ""},
317
}
318
var isNormalNFDTests = []PositionTest{
319
        // needs decomposing
320
        {"\u00C0", 0, ""},
321
        {"abc\u00C0", 0, ""},
322
        // correctly ordered combining characters
323
        {"\u0300", 1, ""},
324
        {"ab\u0300", 1, ""},
325
        {"ab\u0300cd", 1, ""},
326
        {"\u0300cd", 1, ""},
327
        {"\u0316\u0300", 1, ""},
328
        {"ab\u0316\u0300", 1, ""},
329
        {"ab\u0316\u0300cd", 1, ""},
330
        {"\u0316\u0300cd", 1, ""},
331
        {"\u043E\u0308b", 1, ""},
332
        // Hangul
333
        {"같은", 0, ""},
334
}
335
var isNormalNFCTests = []PositionTest{
336
        // okay composed
337
        {"\u00C0", 1, ""},
338
        {"abc\u00C0", 1, ""},
339
        // need reordering
340
        {"a\u0300", 0, ""},
341
        {"a\u0300cd", 0, ""},
342
        {"a\u0316\u0300", 0, ""},
343
        {"a\u0316\u0300cd", 0, ""},
344
        // correctly ordered combining characters
345
        {"ab\u0300", 1, ""},
346
        {"ab\u0300cd", 1, ""},
347
        {"ab\u0316\u0300", 1, ""},
348
        {"ab\u0316\u0300cd", 1, ""},
349
        {"\u00C0\u035D", 1, ""},
350
        {"\u0300", 1, ""},
351
        {"\u0316\u0300cd", 1, ""},
352
        // Hangul
353
        {"같은", 1, ""},
354
}
355
 
356
func isNormalF(rb *reorderBuffer, s string) int {
357
        if rb.f.form.IsNormal([]byte(s)) {
358
                return 1
359
        }
360
        return 0
361
}
362
 
363
func TestIsNormal(t *testing.T) {
364
        runPosTests(t, "TestIsNormalNFD1", NFD, isNormalF, isNormalTests)
365
        runPosTests(t, "TestIsNormalNFD2", NFD, isNormalF, isNormalNFDTests)
366
        runPosTests(t, "TestIsNormalNFC1", NFC, isNormalF, isNormalTests)
367
        runPosTests(t, "TestIsNormalNFC2", NFC, isNormalF, isNormalNFCTests)
368
}
369
 
370
type AppendTest struct {
371
        left  string
372
        right string
373
        out   string
374
}
375
 
376
type appendFunc func(f Form, out []byte, s string) []byte
377
 
378
func runAppendTests(t *testing.T, name string, f Form, fn appendFunc, tests []AppendTest) {
379
        for i, test := range tests {
380
                out := []byte(test.left)
381
                out = fn(f, out, test.right)
382
                outs := string(out)
383
                if len(outs) != len(test.out) {
384
                        t.Errorf("%s:%d: length is %d; want %d", name, i, len(outs), len(test.out))
385
                }
386
                if outs != test.out {
387
                        // Find first rune that differs and show context.
388
                        ir := []rune(outs)
389
                        ig := []rune(test.out)
390
                        for j := 0; j < len(ir) && j < len(ig); j++ {
391
                                if ir[j] == ig[j] {
392
                                        continue
393
                                }
394
                                if j -= 3; j < 0 {
395
                                        j = 0
396
                                }
397
                                for e := j + 7; j < e && j < len(ir) && j < len(ig); j++ {
398
                                        t.Errorf("%s:%d: runeAt(%d) = %U; want %U", name, i, j, ir[j], ig[j])
399
                                }
400
                                break
401
                        }
402
                }
403
        }
404
}
405
 
406
var appendTests = []AppendTest{
407
        // empty buffers
408
        {"", "", ""},
409
        {"a", "", "a"},
410
        {"", "a", "a"},
411
        {"", "\u0041\u0307\u0304", "\u01E0"},
412
        // segment split across buffers
413
        {"", "a\u0300b", "\u00E0b"},
414
        {"a", "\u0300b", "\u00E0b"},
415
        {"a", "\u0300\u0316", "\u00E0\u0316"},
416
        {"a", "\u0316\u0300", "\u00E0\u0316"},
417
        {"a", "\u0300a\u0300", "\u00E0\u00E0"},
418
        {"a", "\u0300a\u0300a\u0300", "\u00E0\u00E0\u00E0"},
419
        {"a", "\u0300aaa\u0300aaa\u0300", "\u00E0aa\u00E0aa\u00E0"},
420
        {"a\u0300", "\u0327", "\u00E0\u0327"},
421
        {"a\u0327", "\u0300", "\u00E0\u0327"},
422
        {"a\u0316", "\u0300", "\u00E0\u0316"},
423
        {"\u0041\u0307", "\u0304", "\u01E0"},
424
        // Hangul
425
        {"", "\u110B\u1173", "\uC73C"},
426
        {"", "\u1103\u1161", "\uB2E4"},
427
        {"", "\u110B\u1173\u11B7", "\uC74C"},
428
        {"", "\u320E", "\x28\uAC00\x29"},
429
        {"", "\x28\u1100\u1161\x29", "\x28\uAC00\x29"},
430
        {"\u1103", "\u1161", "\uB2E4"},
431
        {"\u110B", "\u1173\u11B7", "\uC74C"},
432
        {"\u110B\u1173", "\u11B7", "\uC74C"},
433
        {"\uC73C", "\u11B7", "\uC74C"},
434
        // UTF-8 encoding split across buffers
435
        {"a\xCC", "\x80", "\u00E0"},
436
        {"a\xCC", "\x80b", "\u00E0b"},
437
        {"a\xCC", "\x80a\u0300", "\u00E0\u00E0"},
438
        {"a\xCC", "\x80\x80", "\u00E0\x80"},
439
        {"a\xCC", "\x80\xCC", "\u00E0\xCC"},
440
        {"a\u0316\xCC", "\x80a\u0316\u0300", "\u00E0\u0316\u00E0\u0316"},
441
        // ending in incomplete UTF-8 encoding
442
        {"", "\xCC", "\xCC"},
443
        {"a", "\xCC", "a\xCC"},
444
        {"a", "b\xCC", "ab\xCC"},
445
        {"\u0226", "\xCC", "\u0226\xCC"},
446
        // illegal runes
447
        {"", "\x80", "\x80"},
448
        {"", "\x80\x80\x80", "\x80\x80\x80"},
449
        {"", "\xCC\x80\x80\x80", "\xCC\x80\x80\x80"},
450
        {"", "a\x80", "a\x80"},
451
        {"", "a\x80\x80\x80", "a\x80\x80\x80"},
452
        {"", "a\x80\x80\x80\x80\x80\x80", "a\x80\x80\x80\x80\x80\x80"},
453
        {"a", "\x80\x80\x80", "a\x80\x80\x80"},
454
        // overflow
455
        {"", strings.Repeat("\x80", 33), strings.Repeat("\x80", 33)},
456
        {strings.Repeat("\x80", 33), "", strings.Repeat("\x80", 33)},
457
        {strings.Repeat("\x80", 33), strings.Repeat("\x80", 33), strings.Repeat("\x80", 66)},
458
        // overflow of combining characters
459
        {strings.Repeat("\u0300", 33), "", strings.Repeat("\u0300", 33)},
460
        // weird UTF-8
461
        {"\u00E0\xE1", "\x86", "\u00E0\xE1\x86"},
462
        {"a\u0300\u11B7", "\u0300", "\u00E0\u11B7\u0300"},
463
        {"a\u0300\u11B7\u0300", "\u0300", "\u00E0\u11B7\u0300\u0300"},
464
        {"\u0300", "\xF8\x80\x80\x80\x80\u0300", "\u0300\xF8\x80\x80\x80\x80\u0300"},
465
        {"\u0300", "\xFC\x80\x80\x80\x80\x80\u0300", "\u0300\xFC\x80\x80\x80\x80\x80\u0300"},
466
        {"\xF8\x80\x80\x80\x80\u0300", "\u0300", "\xF8\x80\x80\x80\x80\u0300\u0300"},
467
        {"\xFC\x80\x80\x80\x80\x80\u0300", "\u0300", "\xFC\x80\x80\x80\x80\x80\u0300\u0300"},
468
        {"\xF8\x80\x80\x80", "\x80\u0300\u0300", "\xF8\x80\x80\x80\x80\u0300\u0300"},
469
}
470
 
471
func appendF(f Form, out []byte, s string) []byte {
472
        return f.Append(out, []byte(s)...)
473
}
474
 
475
func appendStringF(f Form, out []byte, s string) []byte {
476
        return f.AppendString(out, s)
477
}
478
 
479
func bytesF(f Form, out []byte, s string) []byte {
480
        buf := []byte{}
481
        buf = append(buf, out...)
482
        buf = append(buf, s...)
483
        return f.Bytes(buf)
484
}
485
 
486
func stringF(f Form, out []byte, s string) []byte {
487
        outs := string(out) + s
488
        return []byte(f.String(outs))
489
}
490
 
491
func TestAppend(t *testing.T) {
492
        runAppendTests(t, "TestAppend", NFKC, appendF, appendTests)
493
        runAppendTests(t, "TestAppendString", NFKC, appendStringF, appendTests)
494
        runAppendTests(t, "TestBytes", NFKC, bytesF, appendTests)
495
        runAppendTests(t, "TestString", NFKC, stringF, appendTests)
496
}
497
 
498
func doFormBenchmark(b *testing.B, inf, f Form, s string) {
499
        b.StopTimer()
500
        in := inf.Bytes([]byte(s))
501
        buf := make([]byte, 2*len(in))
502
        b.SetBytes(int64(len(in)))
503
        b.StartTimer()
504
        for i := 0; i < b.N; i++ {
505
                buf = f.Append(buf[0:0], in...)
506
                buf = buf[0:0]
507
        }
508
}
509
 
510
var ascii = strings.Repeat("There is nothing to change here! ", 500)
511
 
512
func BenchmarkNormalizeAsciiNFC(b *testing.B) {
513
        doFormBenchmark(b, NFC, NFC, ascii)
514
}
515
func BenchmarkNormalizeAsciiNFD(b *testing.B) {
516
        doFormBenchmark(b, NFC, NFD, ascii)
517
}
518
func BenchmarkNormalizeAsciiNFKC(b *testing.B) {
519
        doFormBenchmark(b, NFC, NFKC, ascii)
520
}
521
func BenchmarkNormalizeAsciiNFKD(b *testing.B) {
522
        doFormBenchmark(b, NFC, NFKD, ascii)
523
}
524
 
525
func BenchmarkNormalizeNFC2NFC(b *testing.B) {
526
        doFormBenchmark(b, NFC, NFC, txt_all)
527
}
528
func BenchmarkNormalizeNFC2NFD(b *testing.B) {
529
        doFormBenchmark(b, NFC, NFD, txt_all)
530
}
531
func BenchmarkNormalizeNFD2NFC(b *testing.B) {
532
        doFormBenchmark(b, NFD, NFC, txt_all)
533
}
534
func BenchmarkNormalizeNFD2NFD(b *testing.B) {
535
        doFormBenchmark(b, NFD, NFD, txt_all)
536
}
537
 
538
// Hangul is often special-cased, so we test it separately.
539
func BenchmarkNormalizeHangulNFC2NFC(b *testing.B) {
540
        doFormBenchmark(b, NFC, NFC, txt_kr)
541
}
542
func BenchmarkNormalizeHangulNFC2NFD(b *testing.B) {
543
        doFormBenchmark(b, NFC, NFD, txt_kr)
544
}
545
func BenchmarkNormalizeHangulNFD2NFC(b *testing.B) {
546
        doFormBenchmark(b, NFD, NFC, txt_kr)
547
}
548
func BenchmarkNormalizeHangulNFD2NFD(b *testing.B) {
549
        doFormBenchmark(b, NFD, NFD, txt_kr)
550
}
551
 
552
func doTextBenchmark(b *testing.B, s string) {
553
        b.StopTimer()
554
        b.SetBytes(int64(len(s)) * 4)
555
        in := []byte(s)
556
        var buf = make([]byte, 0, 2*len(in))
557
        b.StartTimer()
558
        for i := 0; i < b.N; i++ {
559
                NFC.Append(buf, in...)
560
                NFD.Append(buf, in...)
561
                NFKC.Append(buf, in...)
562
                NFKD.Append(buf, in...)
563
        }
564
}
565
 
566
func BenchmarkCanonicalOrdering(b *testing.B) {
567
        doTextBenchmark(b, txt_canon)
568
}
569
func BenchmarkExtendedLatin(b *testing.B) {
570
        doTextBenchmark(b, txt_vn)
571
}
572
func BenchmarkMiscTwoByteUtf8(b *testing.B) {
573
        doTextBenchmark(b, twoByteUtf8)
574
}
575
func BenchmarkMiscThreeByteUtf8(b *testing.B) {
576
        doTextBenchmark(b, threeByteUtf8)
577
}
578
func BenchmarkHangul(b *testing.B) {
579
        doTextBenchmark(b, txt_kr)
580
}
581
func BenchmarkJapanese(b *testing.B) {
582
        doTextBenchmark(b, txt_jp)
583
}
584
func BenchmarkChinese(b *testing.B) {
585
        doTextBenchmark(b, txt_cn)
586
}
587
 
588
// Tests sampled from the Canonical ordering tests (Part 2) of
589
// http://unicode.org/Public/UNIDATA/NormalizationTest.txt
590
const txt_canon = `\u0061\u0315\u0300\u05AE\u0300\u0062 \u0061\u0300\u0315\u0300\u05AE\u0062
591
\u0061\u0302\u0315\u0300\u05AE\u0062 \u0061\u0307\u0315\u0300\u05AE\u0062
592
\u0061\u0315\u0300\u05AE\u030A\u0062 \u0061\u059A\u0316\u302A\u031C\u0062
593
\u0061\u032E\u059A\u0316\u302A\u0062 \u0061\u0338\u093C\u0334\u0062
594
\u0061\u059A\u0316\u302A\u0339       \u0061\u0341\u0315\u0300\u05AE\u0062
595
\u0061\u0348\u059A\u0316\u302A\u0062 \u0061\u0361\u0345\u035D\u035C\u0062
596
\u0061\u0366\u0315\u0300\u05AE\u0062 \u0061\u0315\u0300\u05AE\u0486\u0062
597
\u0061\u05A4\u059A\u0316\u302A\u0062 \u0061\u0315\u0300\u05AE\u0613\u0062
598
\u0061\u0315\u0300\u05AE\u0615\u0062 \u0061\u0617\u0315\u0300\u05AE\u0062
599
\u0061\u0619\u0618\u064D\u064E\u0062 \u0061\u0315\u0300\u05AE\u0654\u0062
600
\u0061\u0315\u0300\u05AE\u06DC\u0062 \u0061\u0733\u0315\u0300\u05AE\u0062
601
\u0061\u0744\u059A\u0316\u302A\u0062 \u0061\u0315\u0300\u05AE\u0745\u0062
602
\u0061\u09CD\u05B0\u094D\u3099\u0062 \u0061\u0E38\u0E48\u0E38\u0C56\u0062
603
\u0061\u0EB8\u0E48\u0E38\u0E49\u0062 \u0061\u0F72\u0F71\u0EC8\u0F71\u0062
604
\u0061\u1039\u05B0\u094D\u3099\u0062 \u0061\u05B0\u094D\u3099\u1A60\u0062
605
\u0061\u3099\u093C\u0334\u1BE6\u0062 \u0061\u3099\u093C\u0334\u1C37\u0062
606
\u0061\u1CD9\u059A\u0316\u302A\u0062 \u0061\u2DED\u0315\u0300\u05AE\u0062
607
\u0061\u2DEF\u0315\u0300\u05AE\u0062 \u0061\u302D\u302E\u059A\u0316\u0062`
608
 
609
// Taken from http://creativecommons.org/licenses/by-sa/3.0/vn/
610
const txt_vn = `Với các điều kiện sau: Ghi nhận công của tác giả.
611
Nếu bạn sử dụng, chuyển đổi, hoặc xây dựng dự án từ
612
nội dung được chia sẻ này, bạn phải áp dụng giấy phép này hoặc
613
một giấy phép khác có các điều khoản tương tự như giấy phép này
614
cho dự án của bạn. Hiểu rằng: Miễn — Bất kỳ các điều kiện nào
615
trên đây cũng có thể được miễn bỏ nếu bạn được sự cho phép của
616
người sở hữu bản quyền. Phạm vi công chúng — Khi tác phẩm hoặc
617
bất kỳ chương nào của tác phẩm đã trong vùng dành cho công
618
chúng theo quy định của pháp luật thì tình trạng của nó không
619
bị ảnh hưởng bởi giấy phép trong bất kỳ trường hợp nào.`
620
 
621
// Taken from http://creativecommons.org/licenses/by-sa/1.0/deed.ru
622
const txt_ru = `При обязательном соблюдении следующих условий:
623
Attribution — Вы должны атрибутировать произведение (указывать
624
автора и источник) в порядке, предусмотренном автором или
625
лицензиаром (но только так, чтобы никоим образом не подразумевалось,
626
что они поддерживают вас или использование вами данного произведения).
627
Υπό τις ακόλουθες προϋποθέσεις:`
628
 
629
// Taken from http://creativecommons.org/licenses/by-sa/3.0/gr/
630
const txt_gr = `Αναφορά Δημιουργού — Θα πρέπει να κάνετε την αναφορά στο έργο με τον
631
τρόπο που έχει οριστεί από το δημιουργό ή το χορηγούντο την άδεια
632
(χωρίς όμως να εννοείται με οποιονδήποτε τρόπο ότι εγκρίνουν εσάς ή
633
τη χρήση του έργου από εσάς). Παρόμοια Διανομή — Εάν αλλοιώσετε,
634
τροποποιήσετε ή δημιουργήσετε περαιτέρω βασισμένοι στο έργο θα
635
μπορείτε να διανέμετε το έργο που θα προκύψει μόνο με την ίδια ή
636
παρόμοια άδεια.`
637
 
638
// Taken from http://creativecommons.org/licenses/by-sa/3.0/deed.ar
639
const txt_ar = `بموجب الشروط التالية نسب المصنف — يجب عليك أن
640
تنسب العمل بالطريقة التي تحددها المؤلف أو المرخص (ولكن ليس بأي حال من
641
الأحوال أن توحي وتقترح بتحول أو استخدامك للعمل).
642
المشاركة على قدم المساواة — إذا كنت يعدل ، والتغيير ، أو الاستفادة
643
من هذا العمل ، قد ينتج عن توزيع العمل إلا في ظل تشابه او تطابق فى واحد
644
لهذا الترخيص.`
645
 
646
// Taken from http://creativecommons.org/licenses/by-sa/1.0/il/
647
const txt_il = `בכפוף לתנאים הבאים: ייחוס — עליך לייחס את היצירה (לתת קרדיט) באופן
648
המצויין על-ידי היוצר או מעניק הרישיון (אך לא בשום אופן המרמז על כך
649
שהם תומכים בך או בשימוש שלך ביצירה). שיתוף זהה — אם תחליט/י לשנות,
650
לעבד או ליצור יצירה נגזרת בהסתמך על יצירה זו, תוכל/י להפיץ את יצירתך
651
החדשה רק תחת אותו הרישיון או רישיון דומה לרישיון זה.`
652
 
653
const twoByteUtf8 = txt_ru + txt_gr + txt_ar + txt_il
654
 
655
// Taken from http://creativecommons.org/licenses/by-sa/2.0/kr/
656
const txt_kr = `다음과 같은 조건을 따라야 합니다: 저작자표시
657
(Attribution) — 저작자나 이용허락자가 정한 방법으로 저작물의
658
원저작자를 표시하여야 합니다(그러나 원저작자가 이용자나 이용자의
659
이용을 보증하거나 추천한다는 의미로 표시해서는 안됩니다).
660
동일조건변경허락 — 이 저작물을 이용하여 만든 이차적 저작물에는 본
661
라이선스와 동일한 라이선스를 적용해야 합니다.`
662
 
663
// Taken from http://creativecommons.org/licenses/by-sa/3.0/th/
664
const txt_th = `ภายใต้เงื่อนไข ดังต่อไปนี้ : แสดงที่มา — คุณต้องแสดงที่
665
มาของงานดังกล่าว ตามรูปแบบที่ผู้สร้างสรรค์หรือผู้อนุญาตกำหนด (แต่
666
ไม่ใช่ในลักษณะที่ว่า พวกเขาสนับสนุนคุณหรือสนับสนุนการที่
667
คุณนำงานไปใช้) อนุญาตแบบเดียวกัน — หากคุณดัดแปลง เปลี่ยนรูป หรื
668
อต่อเติมงานนี้ คุณต้องใช้สัญญาอนุญาตแบบเดียวกันหรือแบบที่เหมื
669
อนกับสัญญาอนุญาตที่ใช้กับงานนี้เท่านั้น`
670
 
671
const threeByteUtf8 = txt_th
672
 
673
// Taken from http://creativecommons.org/licenses/by-sa/2.0/jp/
674
const txt_jp = `あなたの従うべき条件は以下の通りです。
675
表示 — あなたは原著作者のクレジットを表示しなければなりません。
676
継承 — もしあなたがこの作品を改変、変形または加工した場合、
677
あなたはその結果生じた作品をこの作品と同一の許諾条件の下でのみ
678
頒布することができます。`
679
 
680
// http://creativecommons.org/licenses/by-sa/2.5/cn/
681
const txt_cn = `您可以自由: 复制、发行、展览、表演、放映、
682
广播或通过信息网络传播本作品 创作演绎作品
683
对本作品进行商业性使用 惟须遵守下列条件:
684
署名 — 您必须按照作者或者许可人指定的方式对作品进行署名。
685
相同方式共享 — 如果您改变、转换本作品或者以本作品为基础进行创作,
686
您只能采用与本协议相同的许可协议发布基于本作品的演绎作品。`
687
 
688
const txt_cjk = txt_cn + txt_jp + txt_kr
689
const txt_all = txt_vn + twoByteUtf8 + threeByteUtf8 + txt_cjk

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.