URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgo/] [go/] [index/] [suffixarray/] [suffixarray_test.go] - Blame information for rev 747

Details | Compare with Previous | View Log


// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
 
package suffixarray
 
import (
        "bytes"
        "math/rand"
        "regexp"
        "sort"
        "strings"
        "testing"
)
 
type testCase struct {
        name     string   // name of test case
        source   string   // source to index
        patterns []string // patterns to lookup
}
 
var testCases = []testCase{
        {
                "empty string",
                "",
                []string{
                        "",
                        "foo",
                        "(foo)",
                        ".*",
                        "a*",
                },
        },
 
        {
                "all a's",
                "aaaaaaaaaa", // 10 a's
                []string{
                        "",
                        "a",
                        "aa",
                        "aaa",
                        "aaaa",
                        "aaaaa",
                        "aaaaaa",
                        "aaaaaaa",
                        "aaaaaaaa",
                        "aaaaaaaaa",
                        "aaaaaaaaaa",
                        "aaaaaaaaaaa", // 11 a's
                        ".",
                        ".*",
                        "a+",
                        "aa+",
                        "aaaa[b]?",
                        "aaa*",
                },
        },
 
        {
                "abc",
                "abc",
                []string{
                        "a",
                        "b",
                        "c",
                        "ab",
                        "bc",
                        "abc",
                        "a.c",
                        "a(b|c)",
                        "abc?",
                },
        },
 
        {
                "barbara*3",
                "barbarabarbarabarbara",
                []string{
                        "a",
                        "bar",
                        "rab",
                        "arab",
                        "barbar",
                        "bara?bar",
                },
        },
 
        {
                "typing drill",
                "Now is the time for all good men to come to the aid of their country.",
                []string{
                        "Now",
                        "the time",
                        "to come the aid",
                        "is the time for all good men to come to the aid of their",
                        "to (come|the)?",
                },
        },
 
        {
                "godoc simulation",
                "package main\n\nimport(\n    \"rand\"\n    ",
                []string{},
        },
}
 
// find all occurrences of s in source; report at most n occurrences
func find(src, s string, n int) []int {
        var res []int
        if s != "" && n != 0 {
                // find at most n occurrences of s in src
                for i := -1; n < 0 || len(res) < n; {
                        j := strings.Index(src[i+1:], s)
                        if j < 0 {
                                break
                        }
                        i += j + 1
                        res = append(res, i)
                }
        }
        return res
}
 
func testLookup(t *testing.T, tc *testCase, x *Index, s string, n int) {
        res := x.Lookup([]byte(s), n)
        exp := find(tc.source, s, n)
 
        // check that the lengths match
        if len(res) != len(exp) {
                t.Errorf("test %q, lookup %q (n = %d): expected %d results; got %d", tc.name, s, n, len(exp), len(res))
        }
 
        // if n >= 0 the number of results is limited --- unless n >= all results,
        // we may obtain different positions from the Index and from find (because
        // Index may not find the results in the same order as find) => in general
        // we cannot simply check that the res and exp lists are equal
 
        // check that each result is in fact a correct match and there are no duplicates
        sort.Ints(res)
        for i, r := range res {
                if r < 0 || len(tc.source) <= r {
                        t.Errorf("test %q, lookup %q, result %d (n = %d): index %d out of range [0, %d[", tc.name, s, i, n, r, len(tc.source))
                } else if !strings.HasPrefix(tc.source[r:], s) {
                        t.Errorf("test %q, lookup %q, result %d (n = %d): index %d not a match", tc.name, s, i, n, r)
                }
                if i > 0 && res[i-1] == r {
                        t.Errorf("test %q, lookup %q, result %d (n = %d): found duplicate index %d", tc.name, s, i, n, r)
                }
        }
 
        if n < 0 {
                // all results computed - sorted res and exp must be equal
                for i, r := range res {
                        e := exp[i]
                        if r != e {
                                t.Errorf("test %q, lookup %q, result %d: expected index %d; got %d", tc.name, s, i, e, r)
                        }
                }
        }
}
 
func testFindAllIndex(t *testing.T, tc *testCase, x *Index, rx *regexp.Regexp, n int) {
        res := x.FindAllIndex(rx, n)
        exp := rx.FindAllStringIndex(tc.source, n)
 
        // check that the lengths match
        if len(res) != len(exp) {
                t.Errorf("test %q, FindAllIndex %q (n = %d): expected %d results; got %d", tc.name, rx, n, len(exp), len(res))
        }
 
        // if n >= 0 the number of results is limited --- unless n >= all results,
        // we may obtain different positions from the Index and from regexp (because
        // Index may not find the results in the same order as regexp) => in general
        // we cannot simply check that the res and exp lists are equal
 
        // check that each result is in fact a correct match and the result is sorted
        for i, r := range res {
                if r[0] < 0 || r[0] > r[1] || len(tc.source) < r[1] {
                        t.Errorf("test %q, FindAllIndex %q, result %d (n == %d): illegal match [%d, %d]", tc.name, rx, i, n, r[0], r[1])
                } else if !rx.MatchString(tc.source[r[0]:r[1]]) {
                        t.Errorf("test %q, FindAllIndex %q, result %d (n = %d): [%d, %d] not a match", tc.name, rx, i, n, r[0], r[1])
                }
        }
 
        if n < 0 {
                // all results computed - sorted res and exp must be equal
                for i, r := range res {
                        e := exp[i]
                        if r[0] != e[0] || r[1] != e[1] {
                                t.Errorf("test %q, FindAllIndex %q, result %d: expected match [%d, %d]; got [%d, %d]",
                                        tc.name, rx, i, e[0], e[1], r[0], r[1])
                        }
                }
        }
}
 
func testLookups(t *testing.T, tc *testCase, x *Index, n int) {
        for _, pat := range tc.patterns {
                testLookup(t, tc, x, pat, n)
                if rx, err := regexp.Compile(pat); err == nil {
                        testFindAllIndex(t, tc, x, rx, n)
                }
        }
}
 
// index is used to hide the sort.Interface
type index Index
 
func (x *index) Len() int           { return len(x.sa) }
func (x *index) Less(i, j int) bool { return bytes.Compare(x.at(i), x.at(j)) < 0 }
func (x *index) Swap(i, j int)      { x.sa[i], x.sa[j] = x.sa[j], x.sa[i] }
func (a *index) at(i int) []byte    { return a.data[a.sa[i]:] }
 
func testConstruction(t *testing.T, tc *testCase, x *Index) {
        if !sort.IsSorted((*index)(x)) {
                t.Errorf("failed testConstruction %s", tc.name)
        }
}
 
func equal(x, y *Index) bool {
        if !bytes.Equal(x.data, y.data) {
                return false
        }
        for i, j := range x.sa {
                if j != y.sa[i] {
                        return false
                }
        }
        return true
}
 
// returns the serialized index size
func testSaveRestore(t *testing.T, tc *testCase, x *Index) int {
        var buf bytes.Buffer
        if err := x.Write(&buf); err != nil {
                t.Errorf("failed writing index %s (%s)", tc.name, err)
        }
        size := buf.Len()
        var y Index
        if err := y.Read(&buf); err != nil {
                t.Errorf("failed reading index %s (%s)", tc.name, err)
        }
        if !equal(x, &y) {
                t.Errorf("restored index doesn't match saved index %s", tc.name)
        }
        return size
}
 
func TestIndex(t *testing.T) {
        for _, tc := range testCases {
                x := New([]byte(tc.source))
                testConstruction(t, &tc, x)
                testSaveRestore(t, &tc, x)
                testLookups(t, &tc, x, 0)
                testLookups(t, &tc, x, 1)
                testLookups(t, &tc, x, 10)
                testLookups(t, &tc, x, 2e9)
                testLookups(t, &tc, x, -1)
        }
}
 
// Of all possible inputs, the random bytes have the least amount of substring
// repetition, and the repeated bytes have the most. For most algorithms,
// the running time of every input will be between these two.
func benchmarkNew(b *testing.B, random bool) {
        b.StopTimer()
        data := make([]byte, 1e6)
        if random {
                for i := range data {
                        data[i] = byte(rand.Intn(256))
                }
        }
        b.StartTimer()
        for i := 0; i < b.N; i++ {
                New(data)
        }
}
 
func BenchmarkNewIndexRandom(b *testing.B) {
        benchmarkNew(b, true)
}
func BenchmarkNewIndexRepeat(b *testing.B) {
        benchmarkNew(b, false)
}
 
func BenchmarkSaveRestore(b *testing.B) {
        b.StopTimer()
        r := rand.New(rand.NewSource(0x5a77a1)) // guarantee always same sequence
        data := make([]byte, 10<<20)            // 10MB of data to index
        for i := range data {
                data[i] = byte(r.Intn(256))
        }
        x := New(data)
        size := testSaveRestore(nil, nil, x)       // verify correctness
        buf := bytes.NewBuffer(make([]byte, size)) // avoid growing
        b.SetBytes(int64(size))
        b.StartTimer()
        for i := 0; i < b.N; i++ {
                x.Write(buf)
                var y Index
                y.Read(buf)
        }
}

Browse

Tools

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgo/] [go/] [index/] [suffixarray/] [suffixarray_test.go] - Blame information for rev 747

Line No.	Rev	Author	Line
1	747	jeremybenn	`// Copyright 2010 The Go Authors. All rights reserved.`
2			`// Use of this source code is governed by a BSD-style`
3			`// license that can be found in the LICENSE file.`
4
5			`package suffixarray`
6
7			`import (`
8			`"bytes"`
9			`"math/rand"`
10			`"regexp"`
11			`"sort"`
12			`"strings"`
13			`"testing"`
14			`)`
15
16			`type testCase struct {`
17			`name string // name of test case`
18			`source string // source to index`
19			`patterns []string // patterns to lookup`
20			`}`
21
22			`var testCases = []testCase{`
23			`{`
24			`"empty string",`
25			`"",`
26			`[]string{`
27			`"",`
28			`"foo",`
29			`"(foo)",`
30			`".*",`
31			`"a*",`
32			`},`
33			`},`
34
35			`{`
36			`"all a's",`
37			`"aaaaaaaaaa", // 10 a's`
38			`[]string{`
39			`"",`
40			`"a",`
41			`"aa",`
42			`"aaa",`
43			`"aaaa",`
44			`"aaaaa",`
45			`"aaaaaa",`
46			`"aaaaaaa",`
47			`"aaaaaaaa",`
48			`"aaaaaaaaa",`
49			`"aaaaaaaaaa",`
50			`"aaaaaaaaaaa", // 11 a's`
51			`".",`
52			`".*",`
53			`"a+",`
54			`"aa+",`
55			`"aaaa[b]?",`
56			`"aaa*",`
57			`},`
58			`},`
59
60			`{`
61			`"abc",`
62			`"abc",`
63			`[]string{`
64			`"a",`
65			`"b",`
66			`"c",`
67			`"ab",`
68			`"bc",`
69			`"abc",`
70			`"a.c",`
71			`"a(b\|c)",`
72			`"abc?",`
73			`},`
74			`},`
75
76			`{`
77			`"barbara*3",`
78			`"barbarabarbarabarbara",`
79			`[]string{`
80			`"a",`
81			`"bar",`
82			`"rab",`
83			`"arab",`
84			`"barbar",`
85			`"bara?bar",`
86			`},`
87			`},`
88
89			`{`
90			`"typing drill",`
91			`"Now is the time for all good men to come to the aid of their country.",`
92			`[]string{`
93			`"Now",`
94			`"the time",`
95			`"to come the aid",`
96			`"is the time for all good men to come to the aid of their",`
97			`"to (come\|the)?",`
98			`},`
99			`},`
100
101			`{`
102			`"godoc simulation",`
103			`"package main\n\nimport(\n \"rand\"\n ",`
104			`[]string{},`
105			`},`
106			`}`
107
108			`// find all occurrences of s in source; report at most n occurrences`
109			`func find(src, s string, n int) []int {`
110			`var res []int`
111			`if s != "" && n != 0 {`
112			`// find at most n occurrences of s in src`
113			`for i := -1; n < 0 \|\| len(res) < n; {`
114			`j := strings.Index(src[i+1:], s)`
115			`if j < 0 {`
116			`break`
117			`}`
118			`i += j + 1`
119			`res = append(res, i)`
120			`}`
121			`}`
122			`return res`
123			`}`
124
125			`func testLookup(t testing.T, tc testCase, x *Index, s string, n int) {`
126			`res := x.Lookup([]byte(s), n)`
127			`exp := find(tc.source, s, n)`
128
129			`// check that the lengths match`
130			`if len(res) != len(exp) {`
131			`t.Errorf("test %q, lookup %q (n = %d): expected %d results; got %d", tc.name, s, n, len(exp), len(res))`
132			`}`
133
134			`// if n >= 0 the number of results is limited --- unless n >= all results,`
135			`// we may obtain different positions from the Index and from find (because`
136			`// Index may not find the results in the same order as find) => in general`
137			`// we cannot simply check that the res and exp lists are equal`
138
139			`// check that each result is in fact a correct match and there are no duplicates`
140			`sort.Ints(res)`
141			`for i, r := range res {`
142			`if r < 0 \|\| len(tc.source) <= r {`
143			`t.Errorf("test %q, lookup %q, result %d (n = %d): index %d out of range [0, %d[", tc.name, s, i, n, r, len(tc.source))`
144			`} else if !strings.HasPrefix(tc.source[r:], s) {`
145			`t.Errorf("test %q, lookup %q, result %d (n = %d): index %d not a match", tc.name, s, i, n, r)`
146			`}`
147			`if i > 0 && res[i-1] == r {`
148			`t.Errorf("test %q, lookup %q, result %d (n = %d): found duplicate index %d", tc.name, s, i, n, r)`
149			`}`
150			`}`
151
152			`if n < 0 {`
153			`// all results computed - sorted res and exp must be equal`
154			`for i, r := range res {`
155			`e := exp[i]`
156			`if r != e {`
157			`t.Errorf("test %q, lookup %q, result %d: expected index %d; got %d", tc.name, s, i, e, r)`
158			`}`
159			`}`
160			`}`
161			`}`
162
163			`func testFindAllIndex(t testing.T, tc testCase, x Index, rx regexp.Regexp, n int) {`
164			`res := x.FindAllIndex(rx, n)`
165			`exp := rx.FindAllStringIndex(tc.source, n)`
166
167			`// check that the lengths match`
168			`if len(res) != len(exp) {`
169			`t.Errorf("test %q, FindAllIndex %q (n = %d): expected %d results; got %d", tc.name, rx, n, len(exp), len(res))`
170			`}`
171
172			`// if n >= 0 the number of results is limited --- unless n >= all results,`
173			`// we may obtain different positions from the Index and from regexp (because`
174			`// Index may not find the results in the same order as regexp) => in general`
175			`// we cannot simply check that the res and exp lists are equal`
176
177			`// check that each result is in fact a correct match and the result is sorted`
178			`for i, r := range res {`
179			`if r[0] < 0 \|\| r[0] > r[1] \|\| len(tc.source) < r[1] {`
180			`t.Errorf("test %q, FindAllIndex %q, result %d (n == %d): illegal match [%d, %d]", tc.name, rx, i, n, r[0], r[1])`
181			`} else if !rx.MatchString(tc.source[r[0]:r[1]]) {`
182			`t.Errorf("test %q, FindAllIndex %q, result %d (n = %d): [%d, %d] not a match", tc.name, rx, i, n, r[0], r[1])`
183			`}`
184			`}`
185
186			`if n < 0 {`
187			`// all results computed - sorted res and exp must be equal`
188			`for i, r := range res {`
189			`e := exp[i]`
190			`if r[0] != e[0] \|\| r[1] != e[1] {`
191			`t.Errorf("test %q, FindAllIndex %q, result %d: expected match [%d, %d]; got [%d, %d]",`
192			`tc.name, rx, i, e[0], e[1], r[0], r[1])`
193			`}`
194			`}`
195			`}`
196			`}`
197
198			`func testLookups(t testing.T, tc testCase, x *Index, n int) {`
199			`for _, pat := range tc.patterns {`
200			`testLookup(t, tc, x, pat, n)`
201			`if rx, err := regexp.Compile(pat); err == nil {`
202			`testFindAllIndex(t, tc, x, rx, n)`
203			`}`
204			`}`
205			`}`
206
207			`// index is used to hide the sort.Interface`
208			`type index Index`
209
210			`func (x *index) Len() int { return len(x.sa) }`
211			`func (x *index) Less(i, j int) bool { return bytes.Compare(x.at(i), x.at(j)) < 0 }`
212			`func (x *index) Swap(i, j int) { x.sa[i], x.sa[j] = x.sa[j], x.sa[i] }`
213			`func (a *index) at(i int) []byte { return a.data[a.sa[i]:] }`
214
215			`func testConstruction(t testing.T, tc testCase, x *Index) {`
216			`if !sort.IsSorted((*index)(x)) {`
217			`t.Errorf("failed testConstruction %s", tc.name)`
218			`}`
219			`}`
220
221			`func equal(x, y *Index) bool {`
222			`if !bytes.Equal(x.data, y.data) {`
223			`return false`
224			`}`
225			`for i, j := range x.sa {`
226			`if j != y.sa[i] {`
227			`return false`
228			`}`
229			`}`
230			`return true`
231			`}`
232
233			`// returns the serialized index size`
234			`func testSaveRestore(t testing.T, tc testCase, x *Index) int {`
235			`var buf bytes.Buffer`
236			`if err := x.Write(&buf); err != nil {`
237			`t.Errorf("failed writing index %s (%s)", tc.name, err)`
238			`}`
239			`size := buf.Len()`
240			`var y Index`
241			`if err := y.Read(&buf); err != nil {`
242			`t.Errorf("failed reading index %s (%s)", tc.name, err)`
243			`}`
244			`if !equal(x, &y) {`
245			`t.Errorf("restored index doesn't match saved index %s", tc.name)`
246			`}`
247			`return size`
248			`}`
249
250			`func TestIndex(t *testing.T) {`
251			`for _, tc := range testCases {`
252			`x := New([]byte(tc.source))`
253			`testConstruction(t, &tc, x)`
254			`testSaveRestore(t, &tc, x)`
255			`testLookups(t, &tc, x, 0)`
256			`testLookups(t, &tc, x, 1)`
257			`testLookups(t, &tc, x, 10)`
258			`testLookups(t, &tc, x, 2e9)`
259			`testLookups(t, &tc, x, -1)`
260			`}`
261			`}`
262
263			`// Of all possible inputs, the random bytes have the least amount of substring`
264			`// repetition, and the repeated bytes have the most. For most algorithms,`
265			`// the running time of every input will be between these two.`
266			`func benchmarkNew(b *testing.B, random bool) {`
267			`b.StopTimer()`
268			`data := make([]byte, 1e6)`
269			`if random {`
270			`for i := range data {`
271			`data[i] = byte(rand.Intn(256))`
272			`}`
273			`}`
274			`b.StartTimer()`
275			`for i := 0; i < b.N; i++ {`
276			`New(data)`
277			`}`
278			`}`
279
280			`func BenchmarkNewIndexRandom(b *testing.B) {`
281			`benchmarkNew(b, true)`
282			`}`
283			`func BenchmarkNewIndexRepeat(b *testing.B) {`
284			`benchmarkNew(b, false)`
285			`}`
286
287			`func BenchmarkSaveRestore(b *testing.B) {`
288			`b.StopTimer()`
289			`r := rand.New(rand.NewSource(0x5a77a1)) // guarantee always same sequence`
290			`data := make([]byte, 10<<20) // 10MB of data to index`
291			`for i := range data {`
292			`data[i] = byte(r.Intn(256))`
293			`}`
294			`x := New(data)`
295			`size := testSaveRestore(nil, nil, x) // verify correctness`
296			`buf := bytes.NewBuffer(make([]byte, size)) // avoid growing`
297			`b.SetBytes(int64(size))`
298			`b.StartTimer()`
299			`for i := 0; i < b.N; i++ {`
300			`x.Write(buf)`
301			`var y Index`
302			`y.Read(buf)`
303			`}`
304			`}`