1 |
747 |
jeremybenn |
// Copyright 2010 The Go Authors. All rights reserved.
|
2 |
|
|
// Use of this source code is governed by a BSD-style
|
3 |
|
|
// license that can be found in the LICENSE file.
|
4 |
|
|
|
5 |
|
|
package regexp
|
6 |
|
|
|
7 |
|
|
import (
|
8 |
|
|
"bufio"
|
9 |
|
|
"compress/bzip2"
|
10 |
|
|
"fmt"
|
11 |
|
|
"io"
|
12 |
|
|
"math/rand"
|
13 |
|
|
"os"
|
14 |
|
|
"path/filepath"
|
15 |
|
|
"regexp/syntax"
|
16 |
|
|
"strconv"
|
17 |
|
|
"strings"
|
18 |
|
|
"testing"
|
19 |
|
|
"unicode/utf8"
|
20 |
|
|
)
|
21 |
|
|
|
22 |
|
|
// TestRE2 tests this package's regexp API against test cases
|
23 |
|
|
// considered during RE2's exhaustive tests, which run all possible
|
24 |
|
|
// regexps over a given set of atoms and operators, up to a given
|
25 |
|
|
// complexity, over all possible strings over a given alphabet,
|
26 |
|
|
// up to a given size. Rather than try to link with RE2, we read a
|
27 |
|
|
// log file containing the test cases and the expected matches.
|
28 |
|
|
// The log file, re2.txt, is generated by running 'make exhaustive-log'
|
29 |
|
|
// in the open source RE2 distribution. http://code.google.com/p/re2/
|
30 |
|
|
//
|
31 |
|
|
// The test file format is a sequence of stanzas like:
|
32 |
|
|
//
|
33 |
|
|
// strings
|
34 |
|
|
// "abc"
|
35 |
|
|
// "123x"
|
36 |
|
|
// regexps
|
37 |
|
|
// "[a-z]+"
|
38 |
|
|
// 0-3;0-3
|
39 |
|
|
// -;-
|
40 |
|
|
// "([0-9])([0-9])([0-9])"
|
41 |
|
|
// -;-
|
42 |
|
|
// -;0-3 0-1 1-2 2-3
|
43 |
|
|
//
|
44 |
|
|
// The stanza begins by defining a set of strings, quoted
|
45 |
|
|
// using Go double-quote syntax, one per line. Then the
|
46 |
|
|
// regexps section gives a sequence of regexps to run on
|
47 |
|
|
// the strings. In the block that follows a regexp, each line
|
48 |
|
|
// gives the semicolon-separated match results of running
|
49 |
|
|
// the regexp on the corresponding string.
|
50 |
|
|
// Each match result is either a single -, meaning no match, or a
|
51 |
|
|
// space-separated sequence of pairs giving the match and
|
52 |
|
|
// submatch indices. An unmatched subexpression formats
|
53 |
|
|
// its pair as a single - (not illustrated above). For now
|
54 |
|
|
// each regexp run produces two match results, one for a
|
55 |
|
|
// ``full match'' that restricts the regexp to matching the entire
|
56 |
|
|
// string or nothing, and one for a ``partial match'' that gives
|
57 |
|
|
// the leftmost first match found in the string.
|
58 |
|
|
//
|
59 |
|
|
// Lines beginning with # are comments. Lines beginning with
|
60 |
|
|
// a capital letter are test names printed during RE2's test suite
|
61 |
|
|
// and are echoed into t but otherwise ignored.
|
62 |
|
|
//
|
63 |
|
|
// At time of writing, re2.txt is 32 MB but compresses to 760 kB,
|
64 |
|
|
// so we store re2.txt.gz in the repository and decompress it on the fly.
|
65 |
|
|
//
|
66 |
|
|
func TestRE2Search(t *testing.T) {
|
67 |
|
|
testRE2(t, "testdata/re2-search.txt")
|
68 |
|
|
}
|
69 |
|
|
|
70 |
|
|
func TestRE2Exhaustive(t *testing.T) {
|
71 |
|
|
if testing.Short() {
|
72 |
|
|
t.Log("skipping TestRE2Exhaustive during short test")
|
73 |
|
|
return
|
74 |
|
|
}
|
75 |
|
|
testRE2(t, "testdata/re2-exhaustive.txt.bz2")
|
76 |
|
|
}
|
77 |
|
|
|
78 |
|
|
func testRE2(t *testing.T, file string) {
|
79 |
|
|
f, err := os.Open(file)
|
80 |
|
|
if err != nil {
|
81 |
|
|
t.Fatal(err)
|
82 |
|
|
}
|
83 |
|
|
defer f.Close()
|
84 |
|
|
var txt io.Reader
|
85 |
|
|
if strings.HasSuffix(file, ".bz2") {
|
86 |
|
|
z := bzip2.NewReader(f)
|
87 |
|
|
txt = z
|
88 |
|
|
file = file[:len(file)-len(".bz2")] // for error messages
|
89 |
|
|
} else {
|
90 |
|
|
txt = f
|
91 |
|
|
}
|
92 |
|
|
lineno := 0
|
93 |
|
|
r := bufio.NewReader(txt)
|
94 |
|
|
var (
|
95 |
|
|
str []string
|
96 |
|
|
input []string
|
97 |
|
|
inStrings bool
|
98 |
|
|
re *Regexp
|
99 |
|
|
refull *Regexp
|
100 |
|
|
nfail int
|
101 |
|
|
ncase int
|
102 |
|
|
)
|
103 |
|
|
for {
|
104 |
|
|
line, err := r.ReadString('\n')
|
105 |
|
|
if err != nil {
|
106 |
|
|
if err == io.EOF {
|
107 |
|
|
break
|
108 |
|
|
}
|
109 |
|
|
t.Fatalf("%s:%d: %v", file, lineno, err)
|
110 |
|
|
}
|
111 |
|
|
line = line[:len(line)-1] // chop \n
|
112 |
|
|
lineno++
|
113 |
|
|
switch {
|
114 |
|
|
case line == "":
|
115 |
|
|
t.Fatalf("%s:%d: unexpected blank line", file, lineno)
|
116 |
|
|
case line[0] == '#':
|
117 |
|
|
continue
|
118 |
|
|
case 'A' <= line[0] && line[0] <= 'Z':
|
119 |
|
|
// Test name.
|
120 |
|
|
t.Logf("%s\n", line)
|
121 |
|
|
continue
|
122 |
|
|
case line == "strings":
|
123 |
|
|
str = str[:0]
|
124 |
|
|
inStrings = true
|
125 |
|
|
case line == "regexps":
|
126 |
|
|
inStrings = false
|
127 |
|
|
case line[0] == '"':
|
128 |
|
|
q, err := strconv.Unquote(line)
|
129 |
|
|
if err != nil {
|
130 |
|
|
// Fatal because we'll get out of sync.
|
131 |
|
|
t.Fatalf("%s:%d: unquote %s: %v", file, lineno, line, err)
|
132 |
|
|
}
|
133 |
|
|
if inStrings {
|
134 |
|
|
str = append(str, q)
|
135 |
|
|
continue
|
136 |
|
|
}
|
137 |
|
|
// Is a regexp.
|
138 |
|
|
if len(input) != 0 {
|
139 |
|
|
t.Fatalf("%s:%d: out of sync: have %d strings left before %#q", file, lineno, len(input), q)
|
140 |
|
|
}
|
141 |
|
|
re, err = tryCompile(q)
|
142 |
|
|
if err != nil {
|
143 |
|
|
if err.Error() == "error parsing regexp: invalid escape sequence: `\\C`" {
|
144 |
|
|
// We don't and likely never will support \C; keep going.
|
145 |
|
|
continue
|
146 |
|
|
}
|
147 |
|
|
t.Errorf("%s:%d: compile %#q: %v", file, lineno, q, err)
|
148 |
|
|
if nfail++; nfail >= 100 {
|
149 |
|
|
t.Fatalf("stopping after %d errors", nfail)
|
150 |
|
|
}
|
151 |
|
|
continue
|
152 |
|
|
}
|
153 |
|
|
full := `\A(?:` + q + `)\z`
|
154 |
|
|
refull, err = tryCompile(full)
|
155 |
|
|
if err != nil {
|
156 |
|
|
// Fatal because q worked, so this should always work.
|
157 |
|
|
t.Fatalf("%s:%d: compile full %#q: %v", file, lineno, full, err)
|
158 |
|
|
}
|
159 |
|
|
input = str
|
160 |
|
|
case line[0] == '-' || '0' <= line[0] && line[0] <= '9':
|
161 |
|
|
// A sequence of match results.
|
162 |
|
|
ncase++
|
163 |
|
|
if re == nil {
|
164 |
|
|
// Failed to compile: skip results.
|
165 |
|
|
continue
|
166 |
|
|
}
|
167 |
|
|
if len(input) == 0 {
|
168 |
|
|
t.Fatalf("%s:%d: out of sync: no input remaining", file, lineno)
|
169 |
|
|
}
|
170 |
|
|
var text string
|
171 |
|
|
text, input = input[0], input[1:]
|
172 |
|
|
if !isSingleBytes(text) && strings.Contains(re.String(), `\B`) {
|
173 |
|
|
// RE2's \B considers every byte position,
|
174 |
|
|
// so it sees 'not word boundary' in the
|
175 |
|
|
// middle of UTF-8 sequences. This package
|
176 |
|
|
// only considers the positions between runes,
|
177 |
|
|
// so it disagrees. Skip those cases.
|
178 |
|
|
continue
|
179 |
|
|
}
|
180 |
|
|
res := strings.Split(line, ";")
|
181 |
|
|
if len(res) != len(run) {
|
182 |
|
|
t.Fatalf("%s:%d: have %d test results, want %d", file, lineno, len(res), len(run))
|
183 |
|
|
}
|
184 |
|
|
for i := range res {
|
185 |
|
|
have, suffix := run[i](re, refull, text)
|
186 |
|
|
want := parseResult(t, file, lineno, res[i])
|
187 |
|
|
if !same(have, want) {
|
188 |
|
|
t.Errorf("%s:%d: %#q%s.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, re, suffix, text, have, want)
|
189 |
|
|
if nfail++; nfail >= 100 {
|
190 |
|
|
t.Fatalf("stopping after %d errors", nfail)
|
191 |
|
|
}
|
192 |
|
|
continue
|
193 |
|
|
}
|
194 |
|
|
b, suffix := match[i](re, refull, text)
|
195 |
|
|
if b != (want != nil) {
|
196 |
|
|
t.Errorf("%s:%d: %#q%s.MatchString(%#q) = %v, want %v", file, lineno, re, suffix, text, b, !b)
|
197 |
|
|
if nfail++; nfail >= 100 {
|
198 |
|
|
t.Fatalf("stopping after %d errors", nfail)
|
199 |
|
|
}
|
200 |
|
|
continue
|
201 |
|
|
}
|
202 |
|
|
}
|
203 |
|
|
|
204 |
|
|
default:
|
205 |
|
|
t.Fatalf("%s:%d: out of sync: %s\n", file, lineno, line)
|
206 |
|
|
}
|
207 |
|
|
}
|
208 |
|
|
if len(input) != 0 {
|
209 |
|
|
t.Fatalf("%s:%d: out of sync: have %d strings left at EOF", file, lineno, len(input))
|
210 |
|
|
}
|
211 |
|
|
t.Logf("%d cases tested", ncase)
|
212 |
|
|
}
|
213 |
|
|
|
214 |
|
|
var run = []func(*Regexp, *Regexp, string) ([]int, string){
|
215 |
|
|
runFull,
|
216 |
|
|
runPartial,
|
217 |
|
|
runFullLongest,
|
218 |
|
|
runPartialLongest,
|
219 |
|
|
}
|
220 |
|
|
|
221 |
|
|
func runFull(re, refull *Regexp, text string) ([]int, string) {
|
222 |
|
|
refull.longest = false
|
223 |
|
|
return refull.FindStringSubmatchIndex(text), "[full]"
|
224 |
|
|
}
|
225 |
|
|
|
226 |
|
|
func runPartial(re, refull *Regexp, text string) ([]int, string) {
|
227 |
|
|
re.longest = false
|
228 |
|
|
return re.FindStringSubmatchIndex(text), ""
|
229 |
|
|
}
|
230 |
|
|
|
231 |
|
|
func runFullLongest(re, refull *Regexp, text string) ([]int, string) {
|
232 |
|
|
refull.longest = true
|
233 |
|
|
return refull.FindStringSubmatchIndex(text), "[full,longest]"
|
234 |
|
|
}
|
235 |
|
|
|
236 |
|
|
func runPartialLongest(re, refull *Regexp, text string) ([]int, string) {
|
237 |
|
|
re.longest = true
|
238 |
|
|
return re.FindStringSubmatchIndex(text), "[longest]"
|
239 |
|
|
}
|
240 |
|
|
|
241 |
|
|
var match = []func(*Regexp, *Regexp, string) (bool, string){
|
242 |
|
|
matchFull,
|
243 |
|
|
matchPartial,
|
244 |
|
|
matchFullLongest,
|
245 |
|
|
matchPartialLongest,
|
246 |
|
|
}
|
247 |
|
|
|
248 |
|
|
func matchFull(re, refull *Regexp, text string) (bool, string) {
|
249 |
|
|
refull.longest = false
|
250 |
|
|
return refull.MatchString(text), "[full]"
|
251 |
|
|
}
|
252 |
|
|
|
253 |
|
|
func matchPartial(re, refull *Regexp, text string) (bool, string) {
|
254 |
|
|
re.longest = false
|
255 |
|
|
return re.MatchString(text), ""
|
256 |
|
|
}
|
257 |
|
|
|
258 |
|
|
func matchFullLongest(re, refull *Regexp, text string) (bool, string) {
|
259 |
|
|
refull.longest = true
|
260 |
|
|
return refull.MatchString(text), "[full,longest]"
|
261 |
|
|
}
|
262 |
|
|
|
263 |
|
|
func matchPartialLongest(re, refull *Regexp, text string) (bool, string) {
|
264 |
|
|
re.longest = true
|
265 |
|
|
return re.MatchString(text), "[longest]"
|
266 |
|
|
}
|
267 |
|
|
|
268 |
|
|
func isSingleBytes(s string) bool {
|
269 |
|
|
for _, c := range s {
|
270 |
|
|
if c >= utf8.RuneSelf {
|
271 |
|
|
return false
|
272 |
|
|
}
|
273 |
|
|
}
|
274 |
|
|
return true
|
275 |
|
|
}
|
276 |
|
|
|
277 |
|
|
func tryCompile(s string) (re *Regexp, err error) {
|
278 |
|
|
// Protect against panic during Compile.
|
279 |
|
|
defer func() {
|
280 |
|
|
if r := recover(); r != nil {
|
281 |
|
|
err = fmt.Errorf("panic: %v", r)
|
282 |
|
|
}
|
283 |
|
|
}()
|
284 |
|
|
return Compile(s)
|
285 |
|
|
}
|
286 |
|
|
|
287 |
|
|
func parseResult(t *testing.T, file string, lineno int, res string) []int {
|
288 |
|
|
// A single - indicates no match.
|
289 |
|
|
if res == "-" {
|
290 |
|
|
return nil
|
291 |
|
|
}
|
292 |
|
|
// Otherwise, a space-separated list of pairs.
|
293 |
|
|
n := 1
|
294 |
|
|
for j := 0; j < len(res); j++ {
|
295 |
|
|
if res[j] == ' ' {
|
296 |
|
|
n++
|
297 |
|
|
}
|
298 |
|
|
}
|
299 |
|
|
out := make([]int, 2*n)
|
300 |
|
|
i := 0
|
301 |
|
|
n = 0
|
302 |
|
|
for j := 0; j <= len(res); j++ {
|
303 |
|
|
if j == len(res) || res[j] == ' ' {
|
304 |
|
|
// Process a single pair. - means no submatch.
|
305 |
|
|
pair := res[i:j]
|
306 |
|
|
if pair == "-" {
|
307 |
|
|
out[n] = -1
|
308 |
|
|
out[n+1] = -1
|
309 |
|
|
} else {
|
310 |
|
|
k := strings.Index(pair, "-")
|
311 |
|
|
if k < 0 {
|
312 |
|
|
t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
|
313 |
|
|
}
|
314 |
|
|
lo, err1 := strconv.Atoi(pair[:k])
|
315 |
|
|
hi, err2 := strconv.Atoi(pair[k+1:])
|
316 |
|
|
if err1 != nil || err2 != nil || lo > hi {
|
317 |
|
|
t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
|
318 |
|
|
}
|
319 |
|
|
out[n] = lo
|
320 |
|
|
out[n+1] = hi
|
321 |
|
|
}
|
322 |
|
|
n += 2
|
323 |
|
|
i = j + 1
|
324 |
|
|
}
|
325 |
|
|
}
|
326 |
|
|
return out
|
327 |
|
|
}
|
328 |
|
|
|
329 |
|
|
func same(x, y []int) bool {
|
330 |
|
|
if len(x) != len(y) {
|
331 |
|
|
return false
|
332 |
|
|
}
|
333 |
|
|
for i, xi := range x {
|
334 |
|
|
if xi != y[i] {
|
335 |
|
|
return false
|
336 |
|
|
}
|
337 |
|
|
}
|
338 |
|
|
return true
|
339 |
|
|
}
|
340 |
|
|
|
341 |
|
|
// TestFowler runs this package's regexp API against the
|
342 |
|
|
// POSIX regular expression tests collected by Glenn Fowler
|
343 |
|
|
// at http://www2.research.att.com/~gsf/testregex/.
|
344 |
|
|
func TestFowler(t *testing.T) {
|
345 |
|
|
files, err := filepath.Glob("testdata/*.dat")
|
346 |
|
|
if err != nil {
|
347 |
|
|
t.Fatal(err)
|
348 |
|
|
}
|
349 |
|
|
for _, file := range files {
|
350 |
|
|
t.Log(file)
|
351 |
|
|
testFowler(t, file)
|
352 |
|
|
}
|
353 |
|
|
}
|
354 |
|
|
|
355 |
|
|
var notab = MustCompilePOSIX(`[^\t]+`)
|
356 |
|
|
|
357 |
|
|
func testFowler(t *testing.T, file string) {
|
358 |
|
|
f, err := os.Open(file)
|
359 |
|
|
if err != nil {
|
360 |
|
|
t.Error(err)
|
361 |
|
|
return
|
362 |
|
|
}
|
363 |
|
|
defer f.Close()
|
364 |
|
|
b := bufio.NewReader(f)
|
365 |
|
|
lineno := 0
|
366 |
|
|
lastRegexp := ""
|
367 |
|
|
Reading:
|
368 |
|
|
for {
|
369 |
|
|
lineno++
|
370 |
|
|
line, err := b.ReadString('\n')
|
371 |
|
|
if err != nil {
|
372 |
|
|
if err != io.EOF {
|
373 |
|
|
t.Errorf("%s:%d: %v", file, lineno, err)
|
374 |
|
|
}
|
375 |
|
|
break Reading
|
376 |
|
|
}
|
377 |
|
|
|
378 |
|
|
// http://www2.research.att.com/~gsf/man/man1/testregex.html
|
379 |
|
|
//
|
380 |
|
|
// INPUT FORMAT
|
381 |
|
|
// Input lines may be blank, a comment beginning with #, or a test
|
382 |
|
|
// specification. A specification is five fields separated by one
|
383 |
|
|
// or more tabs. NULL denotes the empty string and NIL denotes the
|
384 |
|
|
// 0 pointer.
|
385 |
|
|
if line[0] == '#' || line[0] == '\n' {
|
386 |
|
|
continue Reading
|
387 |
|
|
}
|
388 |
|
|
line = line[:len(line)-1]
|
389 |
|
|
field := notab.FindAllString(line, -1)
|
390 |
|
|
for i, f := range field {
|
391 |
|
|
if f == "NULL" {
|
392 |
|
|
field[i] = ""
|
393 |
|
|
}
|
394 |
|
|
if f == "NIL" {
|
395 |
|
|
t.Logf("%s:%d: skip: %s", file, lineno, line)
|
396 |
|
|
continue Reading
|
397 |
|
|
}
|
398 |
|
|
}
|
399 |
|
|
if len(field) == 0 {
|
400 |
|
|
continue Reading
|
401 |
|
|
}
|
402 |
|
|
|
403 |
|
|
// Field 1: the regex(3) flags to apply, one character per REG_feature
|
404 |
|
|
// flag. The test is skipped if REG_feature is not supported by the
|
405 |
|
|
// implementation. If the first character is not [BEASKLP] then the
|
406 |
|
|
// specification is a global control line. One or more of [BEASKLP] may be
|
407 |
|
|
// specified; the test will be repeated for each mode.
|
408 |
|
|
//
|
409 |
|
|
// B basic BRE (grep, ed, sed)
|
410 |
|
|
// E REG_EXTENDED ERE (egrep)
|
411 |
|
|
// A REG_AUGMENTED ARE (egrep with negation)
|
412 |
|
|
// S REG_SHELL SRE (sh glob)
|
413 |
|
|
// K REG_SHELL|REG_AUGMENTED KRE (ksh glob)
|
414 |
|
|
// L REG_LITERAL LRE (fgrep)
|
415 |
|
|
//
|
416 |
|
|
// a REG_LEFT|REG_RIGHT implicit ^...$
|
417 |
|
|
// b REG_NOTBOL lhs does not match ^
|
418 |
|
|
// c REG_COMMENT ignore space and #...\n
|
419 |
|
|
// d REG_SHELL_DOT explicit leading . match
|
420 |
|
|
// e REG_NOTEOL rhs does not match $
|
421 |
|
|
// f REG_MULTIPLE multiple \n separated patterns
|
422 |
|
|
// g FNM_LEADING_DIR testfnmatch only -- match until /
|
423 |
|
|
// h REG_MULTIREF multiple digit backref
|
424 |
|
|
// i REG_ICASE ignore case
|
425 |
|
|
// j REG_SPAN . matches \n
|
426 |
|
|
// k REG_ESCAPE \ to ecape [...] delimiter
|
427 |
|
|
// l REG_LEFT implicit ^...
|
428 |
|
|
// m REG_MINIMAL minimal match
|
429 |
|
|
// n REG_NEWLINE explicit \n match
|
430 |
|
|
// o REG_ENCLOSED (|&) magic inside [@|&](...)
|
431 |
|
|
// p REG_SHELL_PATH explicit / match
|
432 |
|
|
// q REG_DELIMITED delimited pattern
|
433 |
|
|
// r REG_RIGHT implicit ...$
|
434 |
|
|
// s REG_SHELL_ESCAPED \ not special
|
435 |
|
|
// t REG_MUSTDELIM all delimiters must be specified
|
436 |
|
|
// u standard unspecified behavior -- errors not counted
|
437 |
|
|
// v REG_CLASS_ESCAPE \ special inside [...]
|
438 |
|
|
// w REG_NOSUB no subexpression match array
|
439 |
|
|
// x REG_LENIENT let some errors slide
|
440 |
|
|
// y REG_LEFT regexec() implicit ^...
|
441 |
|
|
// z REG_NULL NULL subexpressions ok
|
442 |
|
|
// $ expand C \c escapes in fields 2 and 3
|
443 |
|
|
// / field 2 is a regsubcomp() expression
|
444 |
|
|
// = field 3 is a regdecomp() expression
|
445 |
|
|
//
|
446 |
|
|
// Field 1 control lines:
|
447 |
|
|
//
|
448 |
|
|
// C set LC_COLLATE and LC_CTYPE to locale in field 2
|
449 |
|
|
//
|
450 |
|
|
// ?test ... output field 5 if passed and != EXPECTED, silent otherwise
|
451 |
|
|
// &test ... output field 5 if current and previous passed
|
452 |
|
|
// |test ... output field 5 if current passed and previous failed
|
453 |
|
|
// ; ... output field 2 if previous failed
|
454 |
|
|
// {test ... skip if failed until }
|
455 |
|
|
// } end of skip
|
456 |
|
|
//
|
457 |
|
|
// : comment comment copied as output NOTE
|
458 |
|
|
// :comment:test :comment: ignored
|
459 |
|
|
// N[OTE] comment comment copied as output NOTE
|
460 |
|
|
// T[EST] comment comment
|
461 |
|
|
//
|
462 |
|
|
// number use number for nmatch (20 by default)
|
463 |
|
|
flag := field[0]
|
464 |
|
|
switch flag[0] {
|
465 |
|
|
case '?', '&', '|', ';', '{', '}':
|
466 |
|
|
// Ignore all the control operators.
|
467 |
|
|
// Just run everything.
|
468 |
|
|
flag = flag[1:]
|
469 |
|
|
if flag == "" {
|
470 |
|
|
continue Reading
|
471 |
|
|
}
|
472 |
|
|
case ':':
|
473 |
|
|
i := strings.Index(flag[1:], ":")
|
474 |
|
|
if i < 0 {
|
475 |
|
|
t.Logf("skip: %s", line)
|
476 |
|
|
continue Reading
|
477 |
|
|
}
|
478 |
|
|
flag = flag[1+i+1:]
|
479 |
|
|
case 'C', 'N', 'T', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
|
480 |
|
|
t.Logf("skip: %s", line)
|
481 |
|
|
continue Reading
|
482 |
|
|
}
|
483 |
|
|
|
484 |
|
|
// Can check field count now that we've handled the myriad comment formats.
|
485 |
|
|
if len(field) < 4 {
|
486 |
|
|
t.Errorf("%s:%d: too few fields: %s", file, lineno, line)
|
487 |
|
|
continue Reading
|
488 |
|
|
}
|
489 |
|
|
|
490 |
|
|
// Expand C escapes (a.k.a. Go escapes).
|
491 |
|
|
if strings.Contains(flag, "$") {
|
492 |
|
|
f := `"` + field[1] + `"`
|
493 |
|
|
if field[1], err = strconv.Unquote(f); err != nil {
|
494 |
|
|
t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
|
495 |
|
|
}
|
496 |
|
|
f = `"` + field[2] + `"`
|
497 |
|
|
if field[2], err = strconv.Unquote(f); err != nil {
|
498 |
|
|
t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
|
499 |
|
|
}
|
500 |
|
|
}
|
501 |
|
|
|
502 |
|
|
// Field 2: the regular expression pattern; SAME uses the pattern from
|
503 |
|
|
// the previous specification.
|
504 |
|
|
//
|
505 |
|
|
if field[1] == "SAME" {
|
506 |
|
|
field[1] = lastRegexp
|
507 |
|
|
}
|
508 |
|
|
lastRegexp = field[1]
|
509 |
|
|
|
510 |
|
|
// Field 3: the string to match.
|
511 |
|
|
text := field[2]
|
512 |
|
|
|
513 |
|
|
// Field 4: the test outcome...
|
514 |
|
|
ok, shouldCompile, shouldMatch, pos := parseFowlerResult(field[3])
|
515 |
|
|
if !ok {
|
516 |
|
|
t.Errorf("%s:%d: cannot parse result %#q", file, lineno, field[3])
|
517 |
|
|
continue Reading
|
518 |
|
|
}
|
519 |
|
|
|
520 |
|
|
// Field 5: optional comment appended to the report.
|
521 |
|
|
|
522 |
|
|
Testing:
|
523 |
|
|
// Run test once for each specified capital letter mode that we support.
|
524 |
|
|
for _, c := range flag {
|
525 |
|
|
pattern := field[1]
|
526 |
|
|
syn := syntax.POSIX | syntax.ClassNL
|
527 |
|
|
switch c {
|
528 |
|
|
default:
|
529 |
|
|
continue Testing
|
530 |
|
|
case 'E':
|
531 |
|
|
// extended regexp (what we support)
|
532 |
|
|
case 'L':
|
533 |
|
|
// literal
|
534 |
|
|
pattern = QuoteMeta(pattern)
|
535 |
|
|
}
|
536 |
|
|
|
537 |
|
|
for _, c := range flag {
|
538 |
|
|
switch c {
|
539 |
|
|
case 'i':
|
540 |
|
|
syn |= syntax.FoldCase
|
541 |
|
|
}
|
542 |
|
|
}
|
543 |
|
|
|
544 |
|
|
re, err := compile(pattern, syn, true)
|
545 |
|
|
if err != nil {
|
546 |
|
|
if shouldCompile {
|
547 |
|
|
t.Errorf("%s:%d: %#q did not compile", file, lineno, pattern)
|
548 |
|
|
}
|
549 |
|
|
continue Testing
|
550 |
|
|
}
|
551 |
|
|
if !shouldCompile {
|
552 |
|
|
t.Errorf("%s:%d: %#q should not compile", file, lineno, pattern)
|
553 |
|
|
continue Testing
|
554 |
|
|
}
|
555 |
|
|
match := re.MatchString(text)
|
556 |
|
|
if match != shouldMatch {
|
557 |
|
|
t.Errorf("%s:%d: %#q.Match(%#q) = %v, want %v", file, lineno, pattern, text, match, shouldMatch)
|
558 |
|
|
continue Testing
|
559 |
|
|
}
|
560 |
|
|
have := re.FindStringSubmatchIndex(text)
|
561 |
|
|
if (len(have) > 0) != match {
|
562 |
|
|
t.Errorf("%s:%d: %#q.Match(%#q) = %v, but %#q.FindSubmatchIndex(%#q) = %v", file, lineno, pattern, text, match, pattern, text, have)
|
563 |
|
|
continue Testing
|
564 |
|
|
}
|
565 |
|
|
if len(have) > len(pos) {
|
566 |
|
|
have = have[:len(pos)]
|
567 |
|
|
}
|
568 |
|
|
if !same(have, pos) {
|
569 |
|
|
t.Errorf("%s:%d: %#q.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, pattern, text, have, pos)
|
570 |
|
|
}
|
571 |
|
|
}
|
572 |
|
|
}
|
573 |
|
|
}
|
574 |
|
|
|
575 |
|
|
func parseFowlerResult(s string) (ok, compiled, matched bool, pos []int) {
|
576 |
|
|
// Field 4: the test outcome. This is either one of the posix error
|
577 |
|
|
// codes (with REG_ omitted) or the match array, a list of (m,n)
|
578 |
|
|
// entries with m and n being first and last+1 positions in the
|
579 |
|
|
// field 3 string, or NULL if REG_NOSUB is in effect and success
|
580 |
|
|
// is expected. BADPAT is acceptable in place of any regcomp(3)
|
581 |
|
|
// error code. The match[] array is initialized to (-2,-2) before
|
582 |
|
|
// each test. All array elements from 0 to nmatch-1 must be specified
|
583 |
|
|
// in the outcome. Unspecified endpoints (offset -1) are denoted by ?.
|
584 |
|
|
// Unset endpoints (offset -2) are denoted by X. {x}(o:n) denotes a
|
585 |
|
|
// matched (?{...}) expression, where x is the text enclosed by {...},
|
586 |
|
|
// o is the expression ordinal counting from 1, and n is the length of
|
587 |
|
|
// the unmatched portion of the subject string. If x starts with a
|
588 |
|
|
// number then that is the return value of re_execf(), otherwise 0 is
|
589 |
|
|
// returned.
|
590 |
|
|
switch {
|
591 |
|
|
case s == "":
|
592 |
|
|
// Match with no position information.
|
593 |
|
|
ok = true
|
594 |
|
|
compiled = true
|
595 |
|
|
matched = true
|
596 |
|
|
return
|
597 |
|
|
case s == "NOMATCH":
|
598 |
|
|
// Match failure.
|
599 |
|
|
ok = true
|
600 |
|
|
compiled = true
|
601 |
|
|
matched = false
|
602 |
|
|
return
|
603 |
|
|
case 'A' <= s[0] && s[0] <= 'Z':
|
604 |
|
|
// All the other error codes are compile errors.
|
605 |
|
|
ok = true
|
606 |
|
|
compiled = false
|
607 |
|
|
return
|
608 |
|
|
}
|
609 |
|
|
compiled = true
|
610 |
|
|
|
611 |
|
|
var x []int
|
612 |
|
|
for s != "" {
|
613 |
|
|
var end byte = ')'
|
614 |
|
|
if len(x)%2 == 0 {
|
615 |
|
|
if s[0] != '(' {
|
616 |
|
|
ok = false
|
617 |
|
|
return
|
618 |
|
|
}
|
619 |
|
|
s = s[1:]
|
620 |
|
|
end = ','
|
621 |
|
|
}
|
622 |
|
|
i := 0
|
623 |
|
|
for i < len(s) && s[i] != end {
|
624 |
|
|
i++
|
625 |
|
|
}
|
626 |
|
|
if i == 0 || i == len(s) {
|
627 |
|
|
ok = false
|
628 |
|
|
return
|
629 |
|
|
}
|
630 |
|
|
var v = -1
|
631 |
|
|
var err error
|
632 |
|
|
if s[:i] != "?" {
|
633 |
|
|
v, err = strconv.Atoi(s[:i])
|
634 |
|
|
if err != nil {
|
635 |
|
|
ok = false
|
636 |
|
|
return
|
637 |
|
|
}
|
638 |
|
|
}
|
639 |
|
|
x = append(x, v)
|
640 |
|
|
s = s[i+1:]
|
641 |
|
|
}
|
642 |
|
|
if len(x)%2 != 0 {
|
643 |
|
|
ok = false
|
644 |
|
|
return
|
645 |
|
|
}
|
646 |
|
|
ok = true
|
647 |
|
|
matched = true
|
648 |
|
|
pos = x
|
649 |
|
|
return
|
650 |
|
|
}
|
651 |
|
|
|
652 |
|
|
var text []byte
|
653 |
|
|
|
654 |
|
|
func makeText(n int) []byte {
|
655 |
|
|
if len(text) >= n {
|
656 |
|
|
return text[:n]
|
657 |
|
|
}
|
658 |
|
|
text = make([]byte, n)
|
659 |
|
|
for i := range text {
|
660 |
|
|
if rand.Intn(30) == 0 {
|
661 |
|
|
text[i] = '\n'
|
662 |
|
|
} else {
|
663 |
|
|
text[i] = byte(rand.Intn(0x7E+1-0x20) + 0x20)
|
664 |
|
|
}
|
665 |
|
|
}
|
666 |
|
|
return text
|
667 |
|
|
}
|
668 |
|
|
|
669 |
|
|
func benchmark(b *testing.B, re string, n int) {
|
670 |
|
|
r := MustCompile(re)
|
671 |
|
|
t := makeText(n)
|
672 |
|
|
b.ResetTimer()
|
673 |
|
|
b.SetBytes(int64(n))
|
674 |
|
|
for i := 0; i < b.N; i++ {
|
675 |
|
|
if r.Match(t) {
|
676 |
|
|
b.Fatal("match!")
|
677 |
|
|
}
|
678 |
|
|
}
|
679 |
|
|
}
|
680 |
|
|
|
681 |
|
|
const (
|
682 |
|
|
easy0 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
|
683 |
|
|
easy1 = "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"
|
684 |
|
|
medium = "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
|
685 |
|
|
hard = "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
|
686 |
|
|
parens = "([ -~])*(A)(B)(C)(D)(E)(F)(G)(H)(I)(J)(K)(L)(M)" +
|
687 |
|
|
"(N)(O)(P)(Q)(R)(S)(T)(U)(V)(W)(X)(Y)(Z)$"
|
688 |
|
|
)
|
689 |
|
|
|
690 |
|
|
func BenchmarkMatchEasy0_32(b *testing.B) { benchmark(b, easy0, 32<<0) }
|
691 |
|
|
func BenchmarkMatchEasy0_1K(b *testing.B) { benchmark(b, easy0, 1<<10) }
|
692 |
|
|
func BenchmarkMatchEasy0_32K(b *testing.B) { benchmark(b, easy0, 32<<10) }
|
693 |
|
|
func BenchmarkMatchEasy0_1M(b *testing.B) { benchmark(b, easy0, 1<<20) }
|
694 |
|
|
func BenchmarkMatchEasy0_32M(b *testing.B) { benchmark(b, easy0, 32<<20) }
|
695 |
|
|
func BenchmarkMatchEasy1_32(b *testing.B) { benchmark(b, easy1, 32<<0) }
|
696 |
|
|
func BenchmarkMatchEasy1_1K(b *testing.B) { benchmark(b, easy1, 1<<10) }
|
697 |
|
|
func BenchmarkMatchEasy1_32K(b *testing.B) { benchmark(b, easy1, 32<<10) }
|
698 |
|
|
func BenchmarkMatchEasy1_1M(b *testing.B) { benchmark(b, easy1, 1<<20) }
|
699 |
|
|
func BenchmarkMatchEasy1_32M(b *testing.B) { benchmark(b, easy1, 32<<20) }
|
700 |
|
|
func BenchmarkMatchMedium_32(b *testing.B) { benchmark(b, medium, 1<<0) }
|
701 |
|
|
func BenchmarkMatchMedium_1K(b *testing.B) { benchmark(b, medium, 1<<10) }
|
702 |
|
|
func BenchmarkMatchMedium_32K(b *testing.B) { benchmark(b, medium, 32<<10) }
|
703 |
|
|
func BenchmarkMatchMedium_1M(b *testing.B) { benchmark(b, medium, 1<<20) }
|
704 |
|
|
func BenchmarkMatchMedium_32M(b *testing.B) { benchmark(b, medium, 32<<20) }
|
705 |
|
|
func BenchmarkMatchHard_32(b *testing.B) { benchmark(b, hard, 32<<0) }
|
706 |
|
|
func BenchmarkMatchHard_1K(b *testing.B) { benchmark(b, hard, 1<<10) }
|
707 |
|
|
func BenchmarkMatchHard_32K(b *testing.B) { benchmark(b, hard, 32<<10) }
|
708 |
|
|
func BenchmarkMatchHard_1M(b *testing.B) { benchmark(b, hard, 1<<20) }
|
709 |
|
|
func BenchmarkMatchHard_32M(b *testing.B) { benchmark(b, hard, 32<<20) }
|