OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgo/] [go/] [regexp/] [syntax/] [simplify_test.go] - Rev 868

Go to most recent revision | Compare with Previous | Blame | View Log

// Copyright 2011 The Go Authors.  All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package syntax_test

import . "regexp/syntax"
import "testing"

var simplifyTests = []struct {
        Regexp string
        Simple string
}{
        // Already-simple constructs
        {`a`, `a`},
        {`ab`, `ab`},
        {`a|b`, `[a-b]`},
        {`ab|cd`, `ab|cd`},
        {`(ab)*`, `(ab)*`},
        {`(ab)+`, `(ab)+`},
        {`(ab)?`, `(ab)?`},
        {`.`, `(?s:.)`},
        {`^`, `^`},
        {`$`, `$`},
        {`[ac]`, `[ac]`},
        {`[^ac]`, `[^ac]`},

        // Posix character classes
        {`[[:alnum:]]`, `[0-9A-Za-z]`},
        {`[[:alpha:]]`, `[A-Za-z]`},
        {`[[:blank:]]`, `[\t ]`},
        {`[[:cntrl:]]`, `[\x00-\x1f\x7f]`},
        {`[[:digit:]]`, `[0-9]`},
        {`[[:graph:]]`, `[!-~]`},
        {`[[:lower:]]`, `[a-z]`},
        {`[[:print:]]`, `[ -~]`},
        {`[[:punct:]]`, "[!-/:-@\\[-`\\{-~]"},
        {`[[:space:]]`, `[\t-\r ]`},
        {`[[:upper:]]`, `[A-Z]`},
        {`[[:xdigit:]]`, `[0-9A-Fa-f]`},

        // Perl character classes
        {`\d`, `[0-9]`},
        {`\s`, `[\t-\n\f-\r ]`},
        {`\w`, `[0-9A-Z_a-z]`},
        {`\D`, `[^0-9]`},
        {`\S`, `[^\t-\n\f-\r ]`},
        {`\W`, `[^0-9A-Z_a-z]`},
        {`[\d]`, `[0-9]`},
        {`[\s]`, `[\t-\n\f-\r ]`},
        {`[\w]`, `[0-9A-Z_a-z]`},
        {`[\D]`, `[^0-9]`},
        {`[\S]`, `[^\t-\n\f-\r ]`},
        {`[\W]`, `[^0-9A-Z_a-z]`},

        // Posix repetitions
        {`a{1}`, `a`},
        {`a{2}`, `aa`},
        {`a{5}`, `aaaaa`},
        {`a{0,1}`, `a?`},
        // The next three are illegible because Simplify inserts (?:)
        // parens instead of () parens to avoid creating extra
        // captured subexpressions.  The comments show a version with fewer parens.
        {`(a){0,2}`, `(?:(a)(a)?)?`},                       //       (aa?)?
        {`(a){0,4}`, `(?:(a)(?:(a)(?:(a)(a)?)?)?)?`},       //   (a(a(aa?)?)?)?
        {`(a){2,6}`, `(a)(a)(?:(a)(?:(a)(?:(a)(a)?)?)?)?`}, // aa(a(a(aa?)?)?)?
        {`a{0,2}`, `(?:aa?)?`},                             //       (aa?)?
        {`a{0,4}`, `(?:a(?:a(?:aa?)?)?)?`},                 //   (a(a(aa?)?)?)?
        {`a{2,6}`, `aa(?:a(?:a(?:aa?)?)?)?`},               // aa(a(a(aa?)?)?)?
        {`a{0,}`, `a*`},
        {`a{1,}`, `a+`},
        {`a{2,}`, `aa+`},
        {`a{5,}`, `aaaaa+`},

        // Test that operators simplify their arguments.
        {`(?:a{1,}){1,}`, `a+`},
        {`(a{1,}b{1,})`, `(a+b+)`},
        {`a{1,}|b{1,}`, `a+|b+`},
        {`(?:a{1,})*`, `(?:a+)*`},
        {`(?:a{1,})+`, `a+`},
        {`(?:a{1,})?`, `(?:a+)?`},
        {``, `(?:)`},
        {`a{0}`, `(?:)`},

        // Character class simplification
        {`[ab]`, `[a-b]`},
        {`[a-za-za-z]`, `[a-z]`},
        {`[A-Za-zA-Za-z]`, `[A-Za-z]`},
        {`[ABCDEFGH]`, `[A-H]`},
        {`[AB-CD-EF-GH]`, `[A-H]`},
        {`[W-ZP-XE-R]`, `[E-Z]`},
        {`[a-ee-gg-m]`, `[a-m]`},
        {`[a-ea-ha-m]`, `[a-m]`},
        {`[a-ma-ha-e]`, `[a-m]`},
        {`[a-zA-Z0-9 -~]`, `[ -~]`},

        // Empty character classes
        {`[^[:cntrl:][:^cntrl:]]`, `[^\x00-\x{10FFFF}]`},

        // Full character classes
        {`[[:cntrl:][:^cntrl:]]`, `(?s:.)`},

        // Unicode case folding.
        {`(?i)A`, `(?i:A)`},
        {`(?i)a`, `(?i:A)`},
        {`(?i)[A]`, `(?i:A)`},
        {`(?i)[a]`, `(?i:A)`},
        {`(?i)K`, `(?i:K)`},
        {`(?i)k`, `(?i:K)`},
        {`(?i)\x{212a}`, "(?i:K)"},
        {`(?i)[K]`, "[Kk\u212A]"},
        {`(?i)[k]`, "[Kk\u212A]"},
        {`(?i)[\x{212a}]`, "[Kk\u212A]"},
        {`(?i)[a-z]`, "[A-Za-z\u017F\u212A]"},
        {`(?i)[\x00-\x{FFFD}]`, "[\\x00-\uFFFD]"},
        {`(?i)[\x00-\x{10FFFF}]`, `(?s:.)`},

        // Empty string as a regular expression.
        // The empty string must be preserved inside parens in order
        // to make submatches work right, so these tests are less
        // interesting than they might otherwise be.  String inserts
        // explicit (?:) in place of non-parenthesized empty strings,
        // to make them easier to spot for other parsers.
        {`(a|b|)`, `([a-b]|(?:))`},
        {`(|)`, `()`},
        {`a()`, `a()`},
        {`(()|())`, `(()|())`},
        {`(a|)`, `(a|(?:))`},
        {`ab()cd()`, `ab()cd()`},
        {`()`, `()`},
        {`()*`, `()*`},
        {`()+`, `()+`},
        {`()?`, `()?`},
        {`(){0}`, `(?:)`},
        {`(){1}`, `()`},
        {`(){1,}`, `()+`},
        {`(){0,2}`, `(?:()()?)?`},
}

func TestSimplify(t *testing.T) {
        for _, tt := range simplifyTests {
                re, err := Parse(tt.Regexp, MatchNL|Perl&^OneLine)
                if err != nil {
                        t.Errorf("Parse(%#q) = error %v", tt.Regexp, err)
                        continue
                }
                s := re.Simplify().String()
                if s != tt.Simple {
                        t.Errorf("Simplify(%#q) = %#q, want %#q", tt.Regexp, s, tt.Simple)
                }
        }
}

Go to most recent revision | Compare with Previous | Blame | View Log

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.