URL
https://opencores.org/ocsvn/openrisc/openrisc/trunk
Subversion Repositories openrisc
[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgo/] [go/] [regexp/] [syntax/] [prog.go] - Rev 747
Compare with Previous | Blame | View Log
// Copyright 2011 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.package syntaximport ("bytes""strconv""unicode")// Compiled program.// May not belong in this package, but convenient for now.// A Prog is a compiled regular expression program.type Prog struct {Inst []InstStart int // index of start instructionNumCap int // number of InstCapture insts in re}// An InstOp is an instruction opcode.type InstOp uint8const (InstAlt InstOp = iotaInstAltMatchInstCaptureInstEmptyWidthInstMatchInstFailInstNopInstRuneInstRune1InstRuneAnyInstRuneAnyNotNL)// An EmptyOp specifies a kind or mixture of zero-width assertions.type EmptyOp uint8const (EmptyBeginLine EmptyOp = 1 << iotaEmptyEndLineEmptyBeginTextEmptyEndTextEmptyWordBoundaryEmptyNoWordBoundary)// EmptyOpContext returns the zero-width assertions// satisfied at the position between the runes r1 and r2.// Passing r1 == -1 indicates that the position is// at the beginning of the text.// Passing r2 == -1 indicates that the position is// at the end of the text.func EmptyOpContext(r1, r2 rune) EmptyOp {var op EmptyOpif r1 < 0 {op |= EmptyBeginText | EmptyBeginLine}if r1 == '\n' {op |= EmptyBeginLine}if r2 < 0 {op |= EmptyEndText | EmptyEndLine}if r2 == '\n' {op |= EmptyEndLine}if IsWordChar(r1) != IsWordChar(r2) {op |= EmptyWordBoundary} else {op |= EmptyNoWordBoundary}return op}// IsWordChar reports whether r is consider a ``word character''// during the evaluation of the \b and \B zero-width assertions.// These assertions are ASCII-only: the word characters are [A-Za-z0-9_].func IsWordChar(r rune) bool {return 'A' <= r && r <= 'Z' || 'a' <= r && r <= 'z' || '0' <= r && r <= '9' || r == '_'}// An Inst is a single instruction in a regular expression program.type Inst struct {Op InstOpOut uint32 // all but InstMatch, InstFailArg uint32 // InstAlt, InstAltMatch, InstCapture, InstEmptyWidthRune []rune}func (p *Prog) String() string {var b bytes.BufferdumpProg(&b, p)return b.String()}// skipNop follows any no-op or capturing instructions// and returns the resulting pc.func (p *Prog) skipNop(pc uint32) *Inst {i := &p.Inst[pc]for i.Op == InstNop || i.Op == InstCapture {pc = i.Outi = &p.Inst[pc]}return i}// op returns i.Op but merges all the Rune special cases into InstRunefunc (i *Inst) op() InstOp {op := i.Opswitch op {case InstRune1, InstRuneAny, InstRuneAnyNotNL:op = InstRune}return op}// Prefix returns a literal string that all matches for the// regexp must start with. Complete is true if the prefix// is the entire match.func (p *Prog) Prefix() (prefix string, complete bool) {i := p.skipNop(uint32(p.Start))// Avoid allocation of buffer if prefix is empty.if i.op() != InstRune || len(i.Rune) != 1 {return "", i.Op == InstMatch}// Have prefix; gather characters.var buf bytes.Bufferfor i.op() == InstRune && len(i.Rune) == 1 && Flags(i.Arg)&FoldCase == 0 {buf.WriteRune(i.Rune[0])i = p.skipNop(i.Out)}return buf.String(), i.Op == InstMatch}// StartCond returns the leading empty-width conditions that must// be true in any match. It returns ^EmptyOp(0) if no matches are possible.func (p *Prog) StartCond() EmptyOp {var flag EmptyOppc := uint32(p.Start)i := &p.Inst[pc]Loop:for {switch i.Op {case InstEmptyWidth:flag |= EmptyOp(i.Arg)case InstFail:return ^EmptyOp(0)case InstCapture, InstNop:// skipdefault:break Loop}pc = i.Outi = &p.Inst[pc]}return flag}// MatchRune returns true if the instruction matches (and consumes) r.// It should only be called when i.Op == InstRune.func (i *Inst) MatchRune(r rune) bool {rune := i.Rune// Special case: single-rune slice is from literal string, not char class.if len(rune) == 1 {r0 := rune[0]if r == r0 {return true}if Flags(i.Arg)&FoldCase != 0 {for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) {if r == r1 {return true}}}return false}// Peek at the first few pairs.// Should handle ASCII well.for j := 0; j < len(rune) && j <= 8; j += 2 {if r < rune[j] {return false}if r <= rune[j+1] {return true}}// Otherwise binary search.lo := 0hi := len(rune) / 2for lo < hi {m := lo + (hi-lo)/2if c := rune[2*m]; c <= r {if r <= rune[2*m+1] {return true}lo = m + 1} else {hi = m}}return false}// As per re2's Prog::IsWordChar. Determines whether rune is an ASCII word char.// Since we act on runes, it would be easy to support Unicode here.func wordRune(r rune) bool {return r == '_' ||('A' <= r && r <= 'Z') ||('a' <= r && r <= 'z') ||('0' <= r && r <= '9')}// MatchEmptyWidth returns true if the instruction matches// an empty string between the runes before and after.// It should only be called when i.Op == InstEmptyWidth.func (i *Inst) MatchEmptyWidth(before rune, after rune) bool {switch EmptyOp(i.Arg) {case EmptyBeginLine:return before == '\n' || before == -1case EmptyEndLine:return after == '\n' || after == -1case EmptyBeginText:return before == -1case EmptyEndText:return after == -1case EmptyWordBoundary:return wordRune(before) != wordRune(after)case EmptyNoWordBoundary:return wordRune(before) == wordRune(after)}panic("unknown empty width arg")}func (i *Inst) String() string {var b bytes.BufferdumpInst(&b, i)return b.String()}func bw(b *bytes.Buffer, args ...string) {for _, s := range args {b.WriteString(s)}}func dumpProg(b *bytes.Buffer, p *Prog) {for j := range p.Inst {i := &p.Inst[j]pc := strconv.Itoa(j)if len(pc) < 3 {b.WriteString(" "[len(pc):])}if j == p.Start {pc += "*"}bw(b, pc, "\t")dumpInst(b, i)bw(b, "\n")}}func u32(i uint32) string {return strconv.FormatUint(uint64(i), 10)}func dumpInst(b *bytes.Buffer, i *Inst) {switch i.Op {case InstAlt:bw(b, "alt -> ", u32(i.Out), ", ", u32(i.Arg))case InstAltMatch:bw(b, "altmatch -> ", u32(i.Out), ", ", u32(i.Arg))case InstCapture:bw(b, "cap ", u32(i.Arg), " -> ", u32(i.Out))case InstEmptyWidth:bw(b, "empty ", u32(i.Arg), " -> ", u32(i.Out))case InstMatch:bw(b, "match")case InstFail:bw(b, "fail")case InstNop:bw(b, "nop -> ", u32(i.Out))case InstRune:if i.Rune == nil {// shouldn't happenbw(b, "rune <nil>")}bw(b, "rune ", strconv.QuoteToASCII(string(i.Rune)))if Flags(i.Arg)&FoldCase != 0 {bw(b, "/i")}bw(b, " -> ", u32(i.Out))case InstRune1:bw(b, "rune1 ", strconv.QuoteToASCII(string(i.Rune)), " -> ", u32(i.Out))case InstRuneAny:bw(b, "any -> ", u32(i.Out))case InstRuneAnyNotNL:bw(b, "anynotnl -> ", u32(i.Out))}}
