1 |
14 |
jlechner |
/* gnu/regexp/RETokenOneOf.java
|
2 |
|
|
Copyright (C) 2006 Free Software Foundation, Inc.
|
3 |
|
|
|
4 |
|
|
This file is part of GNU Classpath.
|
5 |
|
|
|
6 |
|
|
GNU Classpath is free software; you can redistribute it and/or modify
|
7 |
|
|
it under the terms of the GNU General Public License as published by
|
8 |
|
|
the Free Software Foundation; either version 2, or (at your option)
|
9 |
|
|
any later version.
|
10 |
|
|
|
11 |
|
|
GNU Classpath is distributed in the hope that it will be useful, but
|
12 |
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
13 |
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
14 |
|
|
General Public License for more details.
|
15 |
|
|
|
16 |
|
|
You should have received a copy of the GNU General Public License
|
17 |
|
|
along with GNU Classpath; see the file COPYING. If not, write to the
|
18 |
|
|
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
19 |
|
|
02110-1301 USA.
|
20 |
|
|
|
21 |
|
|
Linking this library statically or dynamically with other modules is
|
22 |
|
|
making a combined work based on this library. Thus, the terms and
|
23 |
|
|
conditions of the GNU General Public License cover the whole
|
24 |
|
|
combination.
|
25 |
|
|
|
26 |
|
|
As a special exception, the copyright holders of this library give you
|
27 |
|
|
permission to link this library with independent modules to produce an
|
28 |
|
|
executable, regardless of the license terms of these independent
|
29 |
|
|
modules, and to copy and distribute the resulting executable under
|
30 |
|
|
terms of your choice, provided that you also meet, for each linked
|
31 |
|
|
independent module, the terms and conditions of the license of that
|
32 |
|
|
module. An independent module is a module which is not derived from
|
33 |
|
|
or based on this library. If you modify this library, you may extend
|
34 |
|
|
this exception to your version of the library, but you are not
|
35 |
|
|
obligated to do so. If you do not wish to do so, delete this
|
36 |
|
|
exception statement from your version. */
|
37 |
|
|
|
38 |
|
|
package gnu.regexp;
|
39 |
|
|
import java.util.Vector;
|
40 |
|
|
import java.util.Stack;
|
41 |
|
|
|
42 |
|
|
final class RETokenOneOf extends REToken {
|
43 |
|
|
private Vector options;
|
44 |
|
|
private boolean negative;
|
45 |
|
|
|
46 |
|
|
private Vector addition;
|
47 |
|
|
// This Vector addition is used to store nested character classes.
|
48 |
|
|
// For example, if the original expression is
|
49 |
|
|
// [2-7a-c[f-k][m-z]&&[^p-v][st]]
|
50 |
|
|
// the basic part /2-7a-c/ is stored in the Vector options, and
|
51 |
|
|
// the additional part /[f-k][m-z]&&[^p-v][st]/ is stored in the
|
52 |
|
|
// Vector addition in the following order (Reverse Polish Notation):
|
53 |
|
|
// -- The matching result of the basic part is assumed here.
|
54 |
|
|
// [f-k] -- REToken
|
55 |
|
|
// "|" -- or
|
56 |
|
|
// [m-z] -- REToken
|
57 |
|
|
// "|" -- or
|
58 |
|
|
// false
|
59 |
|
|
// [^p-v] -- REToken
|
60 |
|
|
// "|" -- or
|
61 |
|
|
// [st] -- REToken
|
62 |
|
|
// "|" -- or
|
63 |
|
|
// "&" -- and
|
64 |
|
|
//
|
65 |
|
|
// As it is clear from the explanation above, the Vector addition is
|
66 |
|
|
// effective only when this REToken originates from a character class
|
67 |
|
|
// expression.
|
68 |
|
|
|
69 |
|
|
// This constructor is used for convenience when we know the set beforehand,
|
70 |
|
|
// e.g. \d --> new RETokenOneOf("0123456789",false, ..)
|
71 |
|
|
// \D --> new RETokenOneOf("0123456789",true, ..)
|
72 |
|
|
|
73 |
|
|
RETokenOneOf(int subIndex, String optionsStr, boolean negative, boolean insens) {
|
74 |
|
|
super(subIndex);
|
75 |
|
|
options = new Vector();
|
76 |
|
|
this.negative = negative;
|
77 |
|
|
for (int i = 0; i < optionsStr.length(); i++)
|
78 |
|
|
options.addElement(new RETokenChar(subIndex,optionsStr.charAt(i),insens));
|
79 |
|
|
}
|
80 |
|
|
|
81 |
|
|
RETokenOneOf(int subIndex, Vector options, boolean negative) {
|
82 |
|
|
super(subIndex);
|
83 |
|
|
this.options = options;
|
84 |
|
|
this.negative = negative;
|
85 |
|
|
}
|
86 |
|
|
|
87 |
|
|
RETokenOneOf(int subIndex, Vector options, Vector addition, boolean negative) {
|
88 |
|
|
super(subIndex);
|
89 |
|
|
this.options = options;
|
90 |
|
|
this.addition = addition;
|
91 |
|
|
this.negative = negative;
|
92 |
|
|
}
|
93 |
|
|
|
94 |
|
|
int getMinimumLength() {
|
95 |
|
|
// (negative || addition != null) occurs when this token originates from
|
96 |
|
|
// character class expression.
|
97 |
|
|
if (negative || addition != null) return 1;
|
98 |
|
|
int min = Integer.MAX_VALUE;
|
99 |
|
|
int x;
|
100 |
|
|
for (int i=0; i < options.size(); i++) {
|
101 |
|
|
if ((x = ((REToken) options.elementAt(i)).getMinimumLength()) < min)
|
102 |
|
|
min = x;
|
103 |
|
|
}
|
104 |
|
|
return min;
|
105 |
|
|
}
|
106 |
|
|
|
107 |
|
|
int getMaximumLength() {
|
108 |
|
|
// (negative || addition != null) occurs when this token originates from
|
109 |
|
|
// character class expression.
|
110 |
|
|
if (negative || addition != null) return 1;
|
111 |
|
|
int max = 0;
|
112 |
|
|
int x;
|
113 |
|
|
for (int i=0; i < options.size(); i++) {
|
114 |
|
|
if ((x = ((REToken) options.elementAt(i)).getMaximumLength()) > max)
|
115 |
|
|
max = x;
|
116 |
|
|
}
|
117 |
|
|
return max;
|
118 |
|
|
}
|
119 |
|
|
|
120 |
|
|
boolean match(CharIndexed input, REMatch mymatch) {
|
121 |
|
|
REMatch tryMatch;
|
122 |
|
|
boolean tryOnly;
|
123 |
|
|
if (addition == null) {
|
124 |
|
|
tryMatch = mymatch;
|
125 |
|
|
tryOnly = false;
|
126 |
|
|
}
|
127 |
|
|
else {
|
128 |
|
|
tryMatch = (REMatch) mymatch.clone();
|
129 |
|
|
tryOnly = true;
|
130 |
|
|
}
|
131 |
|
|
boolean b = negative ?
|
132 |
|
|
matchN(input, tryMatch, tryOnly) :
|
133 |
|
|
matchP(input, tryMatch, tryOnly);
|
134 |
|
|
if (addition == null) return b;
|
135 |
|
|
|
136 |
|
|
Stack stack = new Stack();
|
137 |
|
|
stack.push(new Boolean(b));
|
138 |
|
|
for (int i=0; i < addition.size(); i++) {
|
139 |
|
|
Object obj = addition.elementAt(i);
|
140 |
|
|
if (obj instanceof REToken) {
|
141 |
|
|
b = ((REToken)obj).match(input, (REMatch)mymatch.clone());
|
142 |
|
|
stack.push(new Boolean(b));
|
143 |
|
|
}
|
144 |
|
|
else if (obj instanceof Boolean) {
|
145 |
|
|
stack.push(obj);
|
146 |
|
|
}
|
147 |
|
|
else if (obj.equals("|")) {
|
148 |
|
|
b = ((Boolean)stack.pop()).booleanValue();
|
149 |
|
|
b = ((Boolean)stack.pop()).booleanValue() || b;
|
150 |
|
|
stack.push(new Boolean(b));
|
151 |
|
|
}
|
152 |
|
|
else if (obj.equals("&")) {
|
153 |
|
|
b = ((Boolean)stack.pop()).booleanValue();
|
154 |
|
|
b = ((Boolean)stack.pop()).booleanValue() && b;
|
155 |
|
|
stack.push(new Boolean(b));
|
156 |
|
|
}
|
157 |
|
|
else {
|
158 |
|
|
throw new RuntimeException("Invalid object found");
|
159 |
|
|
}
|
160 |
|
|
}
|
161 |
|
|
b = ((Boolean)stack.pop()).booleanValue();
|
162 |
|
|
if (b) {
|
163 |
|
|
++mymatch.index;
|
164 |
|
|
return next(input, mymatch);
|
165 |
|
|
}
|
166 |
|
|
return false;
|
167 |
|
|
}
|
168 |
|
|
|
169 |
|
|
private boolean matchN(CharIndexed input, REMatch mymatch, boolean tryOnly) {
|
170 |
|
|
if (input.charAt(mymatch.index) == CharIndexed.OUT_OF_BOUNDS)
|
171 |
|
|
return false;
|
172 |
|
|
|
173 |
|
|
REMatch newMatch = null;
|
174 |
|
|
REMatch last = null;
|
175 |
|
|
REToken tk;
|
176 |
|
|
for (int i=0; i < options.size(); i++) {
|
177 |
|
|
tk = (REToken) options.elementAt(i);
|
178 |
|
|
REMatch tryMatch = (REMatch) mymatch.clone();
|
179 |
|
|
if (tk.match(input, tryMatch)) { // match was successful
|
180 |
|
|
return false;
|
181 |
|
|
} // is a match
|
182 |
|
|
} // try next option
|
183 |
|
|
|
184 |
|
|
if (tryOnly) return true;
|
185 |
|
|
++mymatch.index;
|
186 |
|
|
return next(input, mymatch);
|
187 |
|
|
}
|
188 |
|
|
|
189 |
|
|
private boolean matchP(CharIndexed input, REMatch mymatch, boolean tryOnly) {
|
190 |
|
|
REMatch.REMatchList newMatch = new REMatch.REMatchList();
|
191 |
|
|
REToken tk;
|
192 |
|
|
for (int i=0; i < options.size(); i++) {
|
193 |
|
|
// In order that the backtracking can work,
|
194 |
|
|
// each option must be chained to the next token.
|
195 |
|
|
// But the chain method has some side effect, so
|
196 |
|
|
// we use clones.
|
197 |
|
|
tk = (REToken)((REToken) options.elementAt(i)).clone();
|
198 |
|
|
if (! tryOnly) {
|
199 |
|
|
tk.chain(this.next);
|
200 |
|
|
tk.setUncle(this.uncle);
|
201 |
|
|
tk.subIndex = this.subIndex;
|
202 |
|
|
}
|
203 |
|
|
REMatch tryMatch = (REMatch) mymatch.clone();
|
204 |
|
|
if (tk.match(input, tryMatch)) { // match was successful
|
205 |
|
|
if (tryOnly) return true;
|
206 |
|
|
newMatch.addTail(tryMatch);
|
207 |
|
|
} // is a match
|
208 |
|
|
} // try next option
|
209 |
|
|
if (tryOnly) return false;
|
210 |
|
|
|
211 |
|
|
if (newMatch.head != null) {
|
212 |
|
|
// set contents of mymatch equal to newMatch
|
213 |
|
|
|
214 |
|
|
// try each one that matched
|
215 |
|
|
mymatch.assignFrom(newMatch.head);
|
216 |
|
|
return true;
|
217 |
|
|
} else {
|
218 |
|
|
return false;
|
219 |
|
|
}
|
220 |
|
|
}
|
221 |
|
|
|
222 |
|
|
void dump(StringBuffer os) {
|
223 |
|
|
os.append(negative ? "[^" : "(?:");
|
224 |
|
|
for (int i = 0; i < options.size(); i++) {
|
225 |
|
|
if (!negative && (i > 0)) os.append('|');
|
226 |
|
|
((REToken) options.elementAt(i)).dumpAll(os);
|
227 |
|
|
}
|
228 |
|
|
os.append(negative ? ']' : ')');
|
229 |
|
|
}
|
230 |
|
|
}
|