URL
https://opencores.org/ocsvn/openrisc/openrisc/trunk
Subversion Repositories openrisc
[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgo/] [go/] [regexp/] [syntax/] [regexp.go] - Rev 747
Compare with Previous | Blame | View Log
// Copyright 2011 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.// Package syntax parses regular expressions into syntax trees.// WORK IN PROGRESS.package syntax// Note to implementers:// In this package, re is always a *Regexp and r is always a rune.import ("bytes""strconv""strings""unicode")// A Regexp is a node in a regular expression syntax tree.type Regexp struct {Op Op // operatorFlags FlagsSub []*Regexp // subexpressions, if anySub0 [1]*Regexp // storage for short SubRune []rune // matched runes, for OpLiteral, OpCharClassRune0 [2]rune // storage for short RuneMin, Max int // min, max for OpRepeatCap int // capturing index, for OpCaptureName string // capturing name, for OpCapture}// An Op is a single regular expression operator.type Op uint8// Operators are listed in precedence order, tightest binding to weakest.// Character class operators are listed simplest to most complex// (OpLiteral, OpCharClass, OpAnyCharNotNL, OpAnyChar).const (OpNoMatch Op = 1 + iota // matches no stringsOpEmptyMatch // matches empty stringOpLiteral // matches Runes sequenceOpCharClass // matches Runes interpreted as range pair listOpAnyCharNotNL // matches any characterOpAnyChar // matches any characterOpBeginLine // matches empty string at beginning of lineOpEndLine // matches empty string at end of lineOpBeginText // matches empty string at beginning of textOpEndText // matches empty string at end of textOpWordBoundary // matches word boundary `\b`OpNoWordBoundary // matches word non-boundary `\B`OpCapture // capturing subexpression with index Cap, optional name NameOpStar // matches Sub[0] zero or more timesOpPlus // matches Sub[0] one or more timesOpQuest // matches Sub[0] zero or one timesOpRepeat // matches Sub[0] at least Min times, at most Max (Max == -1 is no limit)OpConcat // matches concatenation of SubsOpAlternate // matches alternation of Subs)const opPseudo Op = 128 // where pseudo-ops start// Equal returns true if x and y have identical structure.func (x *Regexp) Equal(y *Regexp) bool {if x == nil || y == nil {return x == y}if x.Op != y.Op {return false}switch x.Op {case OpEndText:// The parse flags remember whether this is \z or \Z.if x.Flags&WasDollar != y.Flags&WasDollar {return false}case OpLiteral, OpCharClass:if len(x.Rune) != len(y.Rune) {return false}for i, r := range x.Rune {if r != y.Rune[i] {return false}}case OpAlternate, OpConcat:if len(x.Sub) != len(y.Sub) {return false}for i, sub := range x.Sub {if !sub.Equal(y.Sub[i]) {return false}}case OpStar, OpPlus, OpQuest:if x.Flags&NonGreedy != y.Flags&NonGreedy || !x.Sub[0].Equal(y.Sub[0]) {return false}case OpRepeat:if x.Flags&NonGreedy != y.Flags&NonGreedy || x.Min != y.Min || x.Max != y.Max || !x.Sub[0].Equal(y.Sub[0]) {return false}case OpCapture:if x.Cap != y.Cap || x.Name != y.Name || !x.Sub[0].Equal(y.Sub[0]) {return false}}return true}// writeRegexp writes the Perl syntax for the regular expression re to b.func writeRegexp(b *bytes.Buffer, re *Regexp) {switch re.Op {default:b.WriteString("<invalid op" + strconv.Itoa(int(re.Op)) + ">")case OpNoMatch:b.WriteString(`[^\x00-\x{10FFFF}]`)case OpEmptyMatch:b.WriteString(`(?:)`)case OpLiteral:if re.Flags&FoldCase != 0 {b.WriteString(`(?i:`)}for _, r := range re.Rune {escape(b, r, false)}if re.Flags&FoldCase != 0 {b.WriteString(`)`)}case OpCharClass:if len(re.Rune)%2 != 0 {b.WriteString(`[invalid char class]`)break}b.WriteRune('[')if len(re.Rune) == 0 {b.WriteString(`^\x00-\x{10FFFF}`)} else if re.Rune[0] == 0 && re.Rune[len(re.Rune)-1] == unicode.MaxRune {// Contains 0 and MaxRune. Probably a negated class.// Print the gaps.b.WriteRune('^')for i := 1; i < len(re.Rune)-1; i += 2 {lo, hi := re.Rune[i]+1, re.Rune[i+1]-1escape(b, lo, lo == '-')if lo != hi {b.WriteRune('-')escape(b, hi, hi == '-')}}} else {for i := 0; i < len(re.Rune); i += 2 {lo, hi := re.Rune[i], re.Rune[i+1]escape(b, lo, lo == '-')if lo != hi {b.WriteRune('-')escape(b, hi, hi == '-')}}}b.WriteRune(']')case OpAnyCharNotNL:b.WriteString(`(?-s:.)`)case OpAnyChar:b.WriteString(`(?s:.)`)case OpBeginLine:b.WriteRune('^')case OpEndLine:b.WriteRune('$')case OpBeginText:b.WriteString(`\A`)case OpEndText:if re.Flags&WasDollar != 0 {b.WriteString(`(?-m:$)`)} else {b.WriteString(`\z`)}case OpWordBoundary:b.WriteString(`\b`)case OpNoWordBoundary:b.WriteString(`\B`)case OpCapture:if re.Name != "" {b.WriteString(`(?P<`)b.WriteString(re.Name)b.WriteRune('>')} else {b.WriteRune('(')}if re.Sub[0].Op != OpEmptyMatch {writeRegexp(b, re.Sub[0])}b.WriteRune(')')case OpStar, OpPlus, OpQuest, OpRepeat:if sub := re.Sub[0]; sub.Op > OpCapture || sub.Op == OpLiteral && len(sub.Rune) > 1 {b.WriteString(`(?:`)writeRegexp(b, sub)b.WriteString(`)`)} else {writeRegexp(b, sub)}switch re.Op {case OpStar:b.WriteRune('*')case OpPlus:b.WriteRune('+')case OpQuest:b.WriteRune('?')case OpRepeat:b.WriteRune('{')b.WriteString(strconv.Itoa(re.Min))if re.Max != re.Min {b.WriteRune(',')if re.Max >= 0 {b.WriteString(strconv.Itoa(re.Max))}}b.WriteRune('}')}if re.Flags&NonGreedy != 0 {b.WriteRune('?')}case OpConcat:for _, sub := range re.Sub {if sub.Op == OpAlternate {b.WriteString(`(?:`)writeRegexp(b, sub)b.WriteString(`)`)} else {writeRegexp(b, sub)}}case OpAlternate:for i, sub := range re.Sub {if i > 0 {b.WriteRune('|')}writeRegexp(b, sub)}}}func (re *Regexp) String() string {var b bytes.BufferwriteRegexp(&b, re)return b.String()}const meta = `\.+*?()|[]{}^$`func escape(b *bytes.Buffer, r rune, force bool) {if unicode.IsPrint(r) {if strings.IndexRune(meta, r) >= 0 || force {b.WriteRune('\\')}b.WriteRune(r)return}switch r {case '\a':b.WriteString(`\a`)case '\f':b.WriteString(`\f`)case '\n':b.WriteString(`\n`)case '\r':b.WriteString(`\r`)case '\t':b.WriteString(`\t`)case '\v':b.WriteString(`\v`)default:if r < 0x100 {b.WriteString(`\x`)s := strconv.FormatInt(int64(r), 16)if len(s) == 1 {b.WriteRune('0')}b.WriteString(s)break}b.WriteString(`\x{`)b.WriteString(strconv.FormatInt(int64(r), 16))b.WriteString(`}`)}}// MaxCap walks the regexp to find the maximum capture index.func (re *Regexp) MaxCap() int {m := 0if re.Op == OpCapture {m = re.Cap}for _, sub := range re.Sub {if n := sub.MaxCap(); m < n {m = n}}return m}// CapNames walks the regexp to find the names of capturing groups.func (re *Regexp) CapNames() []string {names := make([]string, re.MaxCap()+1)re.capNames(names)return names}func (re *Regexp) capNames(names []string) {if re.Op == OpCapture {names[re.Cap] = re.Name}for _, sub := range re.Sub {sub.capNames(names)}}
