OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgo/] [go/] [regexp/] [syntax/] [prog.go] - Blame information for rev 749

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 747 jeremybenn
// Copyright 2011 The Go Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4
 
5
package syntax
6
 
7
import (
8
        "bytes"
9
        "strconv"
10
        "unicode"
11
)
12
 
13
// Compiled program.
14
// May not belong in this package, but convenient for now.
15
 
16
// A Prog is a compiled regular expression program.
17
type Prog struct {
18
        Inst   []Inst
19
        Start  int // index of start instruction
20
        NumCap int // number of InstCapture insts in re
21
}
22
 
23
// An InstOp is an instruction opcode.
24
type InstOp uint8
25
 
26
const (
27
        InstAlt InstOp = iota
28
        InstAltMatch
29
        InstCapture
30
        InstEmptyWidth
31
        InstMatch
32
        InstFail
33
        InstNop
34
        InstRune
35
        InstRune1
36
        InstRuneAny
37
        InstRuneAnyNotNL
38
)
39
 
40
// An EmptyOp specifies a kind or mixture of zero-width assertions.
41
type EmptyOp uint8
42
 
43
const (
44
        EmptyBeginLine EmptyOp = 1 << iota
45
        EmptyEndLine
46
        EmptyBeginText
47
        EmptyEndText
48
        EmptyWordBoundary
49
        EmptyNoWordBoundary
50
)
51
 
52
// EmptyOpContext returns the zero-width assertions
53
// satisfied at the position between the runes r1 and r2.
54
// Passing r1 == -1 indicates that the position is
55
// at the beginning of the text.
56
// Passing r2 == -1 indicates that the position is
57
// at the end of the text.
58
func EmptyOpContext(r1, r2 rune) EmptyOp {
59
        var op EmptyOp
60
        if r1 < 0 {
61
                op |= EmptyBeginText | EmptyBeginLine
62
        }
63
        if r1 == '\n' {
64
                op |= EmptyBeginLine
65
        }
66
        if r2 < 0 {
67
                op |= EmptyEndText | EmptyEndLine
68
        }
69
        if r2 == '\n' {
70
                op |= EmptyEndLine
71
        }
72
        if IsWordChar(r1) != IsWordChar(r2) {
73
                op |= EmptyWordBoundary
74
        } else {
75
                op |= EmptyNoWordBoundary
76
        }
77
        return op
78
}
79
 
80
// IsWordChar reports whether r is consider a ``word character''
81
// during the evaluation of the \b and \B zero-width assertions.
82
// These assertions are ASCII-only: the word characters are [A-Za-z0-9_].
83
func IsWordChar(r rune) bool {
84
        return 'A' <= r && r <= 'Z' || 'a' <= r && r <= 'z' || '0' <= r && r <= '9' || r == '_'
85
}
86
 
87
// An Inst is a single instruction in a regular expression program.
88
type Inst struct {
89
        Op   InstOp
90
        Out  uint32 // all but InstMatch, InstFail
91
        Arg  uint32 // InstAlt, InstAltMatch, InstCapture, InstEmptyWidth
92
        Rune []rune
93
}
94
 
95
func (p *Prog) String() string {
96
        var b bytes.Buffer
97
        dumpProg(&b, p)
98
        return b.String()
99
}
100
 
101
// skipNop follows any no-op or capturing instructions
102
// and returns the resulting pc.
103
func (p *Prog) skipNop(pc uint32) *Inst {
104
        i := &p.Inst[pc]
105
        for i.Op == InstNop || i.Op == InstCapture {
106
                pc = i.Out
107
                i = &p.Inst[pc]
108
        }
109
        return i
110
}
111
 
112
// op returns i.Op but merges all the Rune special cases into InstRune
113
func (i *Inst) op() InstOp {
114
        op := i.Op
115
        switch op {
116
        case InstRune1, InstRuneAny, InstRuneAnyNotNL:
117
                op = InstRune
118
        }
119
        return op
120
}
121
 
122
// Prefix returns a literal string that all matches for the
123
// regexp must start with.  Complete is true if the prefix
124
// is the entire match.
125
func (p *Prog) Prefix() (prefix string, complete bool) {
126
        i := p.skipNop(uint32(p.Start))
127
 
128
        // Avoid allocation of buffer if prefix is empty.
129
        if i.op() != InstRune || len(i.Rune) != 1 {
130
                return "", i.Op == InstMatch
131
        }
132
 
133
        // Have prefix; gather characters.
134
        var buf bytes.Buffer
135
        for i.op() == InstRune && len(i.Rune) == 1 && Flags(i.Arg)&FoldCase == 0 {
136
                buf.WriteRune(i.Rune[0])
137
                i = p.skipNop(i.Out)
138
        }
139
        return buf.String(), i.Op == InstMatch
140
}
141
 
142
// StartCond returns the leading empty-width conditions that must
143
// be true in any match.  It returns ^EmptyOp(0) if no matches are possible.
144
func (p *Prog) StartCond() EmptyOp {
145
        var flag EmptyOp
146
        pc := uint32(p.Start)
147
        i := &p.Inst[pc]
148
Loop:
149
        for {
150
                switch i.Op {
151
                case InstEmptyWidth:
152
                        flag |= EmptyOp(i.Arg)
153
                case InstFail:
154
                        return ^EmptyOp(0)
155
                case InstCapture, InstNop:
156
                        // skip
157
                default:
158
                        break Loop
159
                }
160
                pc = i.Out
161
                i = &p.Inst[pc]
162
        }
163
        return flag
164
}
165
 
166
// MatchRune returns true if the instruction matches (and consumes) r.
167
// It should only be called when i.Op == InstRune.
168
func (i *Inst) MatchRune(r rune) bool {
169
        rune := i.Rune
170
 
171
        // Special case: single-rune slice is from literal string, not char class.
172
        if len(rune) == 1 {
173
                r0 := rune[0]
174
                if r == r0 {
175
                        return true
176
                }
177
                if Flags(i.Arg)&FoldCase != 0 {
178
                        for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) {
179
                                if r == r1 {
180
                                        return true
181
                                }
182
                        }
183
                }
184
                return false
185
        }
186
 
187
        // Peek at the first few pairs.
188
        // Should handle ASCII well.
189
        for j := 0; j < len(rune) && j <= 8; j += 2 {
190
                if r < rune[j] {
191
                        return false
192
                }
193
                if r <= rune[j+1] {
194
                        return true
195
                }
196
        }
197
 
198
        // Otherwise binary search.
199
        lo := 0
200
        hi := len(rune) / 2
201
        for lo < hi {
202
                m := lo + (hi-lo)/2
203
                if c := rune[2*m]; c <= r {
204
                        if r <= rune[2*m+1] {
205
                                return true
206
                        }
207
                        lo = m + 1
208
                } else {
209
                        hi = m
210
                }
211
        }
212
        return false
213
}
214
 
215
// As per re2's Prog::IsWordChar. Determines whether rune is an ASCII word char.
216
// Since we act on runes, it would be easy to support Unicode here.
217
func wordRune(r rune) bool {
218
        return r == '_' ||
219
                ('A' <= r && r <= 'Z') ||
220
                ('a' <= r && r <= 'z') ||
221
                ('0' <= r && r <= '9')
222
}
223
 
224
// MatchEmptyWidth returns true if the instruction matches
225
// an empty string between the runes before and after.
226
// It should only be called when i.Op == InstEmptyWidth.
227
func (i *Inst) MatchEmptyWidth(before rune, after rune) bool {
228
        switch EmptyOp(i.Arg) {
229
        case EmptyBeginLine:
230
                return before == '\n' || before == -1
231
        case EmptyEndLine:
232
                return after == '\n' || after == -1
233
        case EmptyBeginText:
234
                return before == -1
235
        case EmptyEndText:
236
                return after == -1
237
        case EmptyWordBoundary:
238
                return wordRune(before) != wordRune(after)
239
        case EmptyNoWordBoundary:
240
                return wordRune(before) == wordRune(after)
241
        }
242
        panic("unknown empty width arg")
243
}
244
 
245
func (i *Inst) String() string {
246
        var b bytes.Buffer
247
        dumpInst(&b, i)
248
        return b.String()
249
}
250
 
251
func bw(b *bytes.Buffer, args ...string) {
252
        for _, s := range args {
253
                b.WriteString(s)
254
        }
255
}
256
 
257
func dumpProg(b *bytes.Buffer, p *Prog) {
258
        for j := range p.Inst {
259
                i := &p.Inst[j]
260
                pc := strconv.Itoa(j)
261
                if len(pc) < 3 {
262
                        b.WriteString("   "[len(pc):])
263
                }
264
                if j == p.Start {
265
                        pc += "*"
266
                }
267
                bw(b, pc, "\t")
268
                dumpInst(b, i)
269
                bw(b, "\n")
270
        }
271
}
272
 
273
func u32(i uint32) string {
274
        return strconv.FormatUint(uint64(i), 10)
275
}
276
 
277
func dumpInst(b *bytes.Buffer, i *Inst) {
278
        switch i.Op {
279
        case InstAlt:
280
                bw(b, "alt -> ", u32(i.Out), ", ", u32(i.Arg))
281
        case InstAltMatch:
282
                bw(b, "altmatch -> ", u32(i.Out), ", ", u32(i.Arg))
283
        case InstCapture:
284
                bw(b, "cap ", u32(i.Arg), " -> ", u32(i.Out))
285
        case InstEmptyWidth:
286
                bw(b, "empty ", u32(i.Arg), " -> ", u32(i.Out))
287
        case InstMatch:
288
                bw(b, "match")
289
        case InstFail:
290
                bw(b, "fail")
291
        case InstNop:
292
                bw(b, "nop -> ", u32(i.Out))
293
        case InstRune:
294
                if i.Rune == nil {
295
                        // shouldn't happen
296
                        bw(b, "rune ")
297
                }
298
                bw(b, "rune ", strconv.QuoteToASCII(string(i.Rune)))
299
                if Flags(i.Arg)&FoldCase != 0 {
300
                        bw(b, "/i")
301
                }
302
                bw(b, " -> ", u32(i.Out))
303
        case InstRune1:
304
                bw(b, "rune1 ", strconv.QuoteToASCII(string(i.Rune)), " -> ", u32(i.Out))
305
        case InstRuneAny:
306
                bw(b, "any -> ", u32(i.Out))
307
        case InstRuneAnyNotNL:
308
                bw(b, "anynotnl -> ", u32(i.Out))
309
        }
310
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.