OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgo/] [go/] [regexp/] [syntax/] [regexp.go] - Blame information for rev 747

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 747 jeremybenn
// Copyright 2011 The Go Authors.  All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4
 
5
// Package syntax parses regular expressions into syntax trees.
6
// WORK IN PROGRESS.
7
package syntax
8
 
9
// Note to implementers:
10
// In this package, re is always a *Regexp and r is always a rune.
11
 
12
import (
13
        "bytes"
14
        "strconv"
15
        "strings"
16
        "unicode"
17
)
18
 
19
// A Regexp is a node in a regular expression syntax tree.
20
type Regexp struct {
21
        Op       Op // operator
22
        Flags    Flags
23
        Sub      []*Regexp  // subexpressions, if any
24
        Sub0     [1]*Regexp // storage for short Sub
25
        Rune     []rune     // matched runes, for OpLiteral, OpCharClass
26
        Rune0    [2]rune    // storage for short Rune
27
        Min, Max int        // min, max for OpRepeat
28
        Cap      int        // capturing index, for OpCapture
29
        Name     string     // capturing name, for OpCapture
30
}
31
 
32
// An Op is a single regular expression operator.
33
type Op uint8
34
 
35
// Operators are listed in precedence order, tightest binding to weakest.
36
// Character class operators are listed simplest to most complex
37
// (OpLiteral, OpCharClass, OpAnyCharNotNL, OpAnyChar).
38
 
39
const (
40
        OpNoMatch        Op = 1 + iota // matches no strings
41
        OpEmptyMatch                   // matches empty string
42
        OpLiteral                      // matches Runes sequence
43
        OpCharClass                    // matches Runes interpreted as range pair list
44
        OpAnyCharNotNL                 // matches any character
45
        OpAnyChar                      // matches any character
46
        OpBeginLine                    // matches empty string at beginning of line
47
        OpEndLine                      // matches empty string at end of line
48
        OpBeginText                    // matches empty string at beginning of text
49
        OpEndText                      // matches empty string at end of text
50
        OpWordBoundary                 // matches word boundary `\b`
51
        OpNoWordBoundary               // matches word non-boundary `\B`
52
        OpCapture                      // capturing subexpression with index Cap, optional name Name
53
        OpStar                         // matches Sub[0] zero or more times
54
        OpPlus                         // matches Sub[0] one or more times
55
        OpQuest                        // matches Sub[0] zero or one times
56
        OpRepeat                       // matches Sub[0] at least Min times, at most Max (Max == -1 is no limit)
57
        OpConcat                       // matches concatenation of Subs
58
        OpAlternate                    // matches alternation of Subs
59
)
60
 
61
const opPseudo Op = 128 // where pseudo-ops start
62
 
63
// Equal returns true if x and y have identical structure.
64
func (x *Regexp) Equal(y *Regexp) bool {
65
        if x == nil || y == nil {
66
                return x == y
67
        }
68
        if x.Op != y.Op {
69
                return false
70
        }
71
        switch x.Op {
72
        case OpEndText:
73
                // The parse flags remember whether this is \z or \Z.
74
                if x.Flags&WasDollar != y.Flags&WasDollar {
75
                        return false
76
                }
77
 
78
        case OpLiteral, OpCharClass:
79
                if len(x.Rune) != len(y.Rune) {
80
                        return false
81
                }
82
                for i, r := range x.Rune {
83
                        if r != y.Rune[i] {
84
                                return false
85
                        }
86
                }
87
 
88
        case OpAlternate, OpConcat:
89
                if len(x.Sub) != len(y.Sub) {
90
                        return false
91
                }
92
                for i, sub := range x.Sub {
93
                        if !sub.Equal(y.Sub[i]) {
94
                                return false
95
                        }
96
                }
97
 
98
        case OpStar, OpPlus, OpQuest:
99
                if x.Flags&NonGreedy != y.Flags&NonGreedy || !x.Sub[0].Equal(y.Sub[0]) {
100
                        return false
101
                }
102
 
103
        case OpRepeat:
104
                if x.Flags&NonGreedy != y.Flags&NonGreedy || x.Min != y.Min || x.Max != y.Max || !x.Sub[0].Equal(y.Sub[0]) {
105
                        return false
106
                }
107
 
108
        case OpCapture:
109
                if x.Cap != y.Cap || x.Name != y.Name || !x.Sub[0].Equal(y.Sub[0]) {
110
                        return false
111
                }
112
        }
113
        return true
114
}
115
 
116
// writeRegexp writes the Perl syntax for the regular expression re to b.
117
func writeRegexp(b *bytes.Buffer, re *Regexp) {
118
        switch re.Op {
119
        default:
120
                b.WriteString("")
121
        case OpNoMatch:
122
                b.WriteString(`[^\x00-\x{10FFFF}]`)
123
        case OpEmptyMatch:
124
                b.WriteString(`(?:)`)
125
        case OpLiteral:
126
                if re.Flags&FoldCase != 0 {
127
                        b.WriteString(`(?i:`)
128
                }
129
                for _, r := range re.Rune {
130
                        escape(b, r, false)
131
                }
132
                if re.Flags&FoldCase != 0 {
133
                        b.WriteString(`)`)
134
                }
135
        case OpCharClass:
136
                if len(re.Rune)%2 != 0 {
137
                        b.WriteString(`[invalid char class]`)
138
                        break
139
                }
140
                b.WriteRune('[')
141
                if len(re.Rune) == 0 {
142
                        b.WriteString(`^\x00-\x{10FFFF}`)
143
                } else if re.Rune[0] == 0 && re.Rune[len(re.Rune)-1] == unicode.MaxRune {
144
                        // Contains 0 and MaxRune.  Probably a negated class.
145
                        // Print the gaps.
146
                        b.WriteRune('^')
147
                        for i := 1; i < len(re.Rune)-1; i += 2 {
148
                                lo, hi := re.Rune[i]+1, re.Rune[i+1]-1
149
                                escape(b, lo, lo == '-')
150
                                if lo != hi {
151
                                        b.WriteRune('-')
152
                                        escape(b, hi, hi == '-')
153
                                }
154
                        }
155
                } else {
156
                        for i := 0; i < len(re.Rune); i += 2 {
157
                                lo, hi := re.Rune[i], re.Rune[i+1]
158
                                escape(b, lo, lo == '-')
159
                                if lo != hi {
160
                                        b.WriteRune('-')
161
                                        escape(b, hi, hi == '-')
162
                                }
163
                        }
164
                }
165
                b.WriteRune(']')
166
        case OpAnyCharNotNL:
167
                b.WriteString(`(?-s:.)`)
168
        case OpAnyChar:
169
                b.WriteString(`(?s:.)`)
170
        case OpBeginLine:
171
                b.WriteRune('^')
172
        case OpEndLine:
173
                b.WriteRune('$')
174
        case OpBeginText:
175
                b.WriteString(`\A`)
176
        case OpEndText:
177
                if re.Flags&WasDollar != 0 {
178
                        b.WriteString(`(?-m:$)`)
179
                } else {
180
                        b.WriteString(`\z`)
181
                }
182
        case OpWordBoundary:
183
                b.WriteString(`\b`)
184
        case OpNoWordBoundary:
185
                b.WriteString(`\B`)
186
        case OpCapture:
187
                if re.Name != "" {
188
                        b.WriteString(`(?P<`)
189
                        b.WriteString(re.Name)
190
                        b.WriteRune('>')
191
                } else {
192
                        b.WriteRune('(')
193
                }
194
                if re.Sub[0].Op != OpEmptyMatch {
195
                        writeRegexp(b, re.Sub[0])
196
                }
197
                b.WriteRune(')')
198
        case OpStar, OpPlus, OpQuest, OpRepeat:
199
                if sub := re.Sub[0]; sub.Op > OpCapture || sub.Op == OpLiteral && len(sub.Rune) > 1 {
200
                        b.WriteString(`(?:`)
201
                        writeRegexp(b, sub)
202
                        b.WriteString(`)`)
203
                } else {
204
                        writeRegexp(b, sub)
205
                }
206
                switch re.Op {
207
                case OpStar:
208
                        b.WriteRune('*')
209
                case OpPlus:
210
                        b.WriteRune('+')
211
                case OpQuest:
212
                        b.WriteRune('?')
213
                case OpRepeat:
214
                        b.WriteRune('{')
215
                        b.WriteString(strconv.Itoa(re.Min))
216
                        if re.Max != re.Min {
217
                                b.WriteRune(',')
218
                                if re.Max >= 0 {
219
                                        b.WriteString(strconv.Itoa(re.Max))
220
                                }
221
                        }
222
                        b.WriteRune('}')
223
                }
224
                if re.Flags&NonGreedy != 0 {
225
                        b.WriteRune('?')
226
                }
227
        case OpConcat:
228
                for _, sub := range re.Sub {
229
                        if sub.Op == OpAlternate {
230
                                b.WriteString(`(?:`)
231
                                writeRegexp(b, sub)
232
                                b.WriteString(`)`)
233
                        } else {
234
                                writeRegexp(b, sub)
235
                        }
236
                }
237
        case OpAlternate:
238
                for i, sub := range re.Sub {
239
                        if i > 0 {
240
                                b.WriteRune('|')
241
                        }
242
                        writeRegexp(b, sub)
243
                }
244
        }
245
}
246
 
247
func (re *Regexp) String() string {
248
        var b bytes.Buffer
249
        writeRegexp(&b, re)
250
        return b.String()
251
}
252
 
253
const meta = `\.+*?()|[]{}^$`
254
 
255
func escape(b *bytes.Buffer, r rune, force bool) {
256
        if unicode.IsPrint(r) {
257
                if strings.IndexRune(meta, r) >= 0 || force {
258
                        b.WriteRune('\\')
259
                }
260
                b.WriteRune(r)
261
                return
262
        }
263
 
264
        switch r {
265
        case '\a':
266
                b.WriteString(`\a`)
267
        case '\f':
268
                b.WriteString(`\f`)
269
        case '\n':
270
                b.WriteString(`\n`)
271
        case '\r':
272
                b.WriteString(`\r`)
273
        case '\t':
274
                b.WriteString(`\t`)
275
        case '\v':
276
                b.WriteString(`\v`)
277
        default:
278
                if r < 0x100 {
279
                        b.WriteString(`\x`)
280
                        s := strconv.FormatInt(int64(r), 16)
281
                        if len(s) == 1 {
282
                                b.WriteRune('0')
283
                        }
284
                        b.WriteString(s)
285
                        break
286
                }
287
                b.WriteString(`\x{`)
288
                b.WriteString(strconv.FormatInt(int64(r), 16))
289
                b.WriteString(`}`)
290
        }
291
}
292
 
293
// MaxCap walks the regexp to find the maximum capture index.
294
func (re *Regexp) MaxCap() int {
295
        m := 0
296
        if re.Op == OpCapture {
297
                m = re.Cap
298
        }
299
        for _, sub := range re.Sub {
300
                if n := sub.MaxCap(); m < n {
301
                        m = n
302
                }
303
        }
304
        return m
305
}
306
 
307
// CapNames walks the regexp to find the names of capturing groups.
308
func (re *Regexp) CapNames() []string {
309
        names := make([]string, re.MaxCap()+1)
310
        re.capNames(names)
311
        return names
312
}
313
 
314
func (re *Regexp) capNames(names []string) {
315
        if re.Op == OpCapture {
316
                names[re.Cap] = re.Name
317
        }
318
        for _, sub := range re.Sub {
319
                sub.capNames(names)
320
        }
321
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.